{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999888609587744, "eval_steps": 500, "global_step": 53864, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.713013741863859e-05, "grad_norm": 0.787381649017334, "learning_rate": 1.999999993196498e-05, "loss": 0.683, "step": 2 }, { "epoch": 7.426027483727717e-05, "grad_norm": 0.9172313809394836, "learning_rate": 1.999999972785991e-05, "loss": 0.5783, "step": 4 }, { "epoch": 0.00011139041225591575, "grad_norm": 0.5062880516052246, "learning_rate": 1.99999993876848e-05, "loss": 0.3439, "step": 6 }, { "epoch": 0.00014852054967455435, "grad_norm": 0.5106889009475708, "learning_rate": 1.9999998911439648e-05, "loss": 0.4409, "step": 8 }, { "epoch": 0.00018565068709319293, "grad_norm": 0.41305649280548096, "learning_rate": 1.9999998299124467e-05, "loss": 0.3783, "step": 10 }, { "epoch": 0.0002227808245118315, "grad_norm": 0.4339084327220917, "learning_rate": 1.999999755073926e-05, "loss": 0.3818, "step": 12 }, { "epoch": 0.0002599109619304701, "grad_norm": 0.5772333741188049, "learning_rate": 1.9999996666284042e-05, "loss": 0.481, "step": 14 }, { "epoch": 0.0002970410993491087, "grad_norm": 0.4155276417732239, "learning_rate": 1.9999995645758825e-05, "loss": 0.3249, "step": 16 }, { "epoch": 0.0003341712367677473, "grad_norm": 0.6395503282546997, "learning_rate": 1.9999994489163616e-05, "loss": 0.6011, "step": 18 }, { "epoch": 0.00037130137418638586, "grad_norm": 0.5251273512840271, "learning_rate": 1.999999319649844e-05, "loss": 0.4956, "step": 20 }, { "epoch": 0.00040843151160502446, "grad_norm": 0.43630415201187134, "learning_rate": 1.9999991767763308e-05, "loss": 0.5067, "step": 22 }, { "epoch": 0.000445561649023663, "grad_norm": 0.5417091846466064, "learning_rate": 1.9999990202958243e-05, "loss": 0.3808, "step": 24 }, { "epoch": 0.0004826917864423016, "grad_norm": 0.6985338926315308, "learning_rate": 1.9999988502083257e-05, "loss": 0.4041, "step": 26 }, { "epoch": 0.0005198219238609402, "grad_norm": 0.6815202236175537, "learning_rate": 1.9999986665138392e-05, "loss": 0.651, "step": 28 }, { "epoch": 0.0005569520612795788, "grad_norm": 0.6399803161621094, "learning_rate": 1.9999984692123657e-05, "loss": 0.4616, "step": 30 }, { "epoch": 0.0005940821986982174, "grad_norm": 0.6329267621040344, "learning_rate": 1.9999982583039082e-05, "loss": 0.4948, "step": 32 }, { "epoch": 0.000631212336116856, "grad_norm": 0.40591558814048767, "learning_rate": 1.99999803378847e-05, "loss": 0.3639, "step": 34 }, { "epoch": 0.0006683424735354946, "grad_norm": 0.5665282011032104, "learning_rate": 1.9999977956660537e-05, "loss": 0.3756, "step": 36 }, { "epoch": 0.0007054726109541332, "grad_norm": 0.5512558817863464, "learning_rate": 1.9999975439366632e-05, "loss": 0.3618, "step": 38 }, { "epoch": 0.0007426027483727717, "grad_norm": 0.4901846647262573, "learning_rate": 1.999997278600301e-05, "loss": 0.4045, "step": 40 }, { "epoch": 0.0007797328857914103, "grad_norm": 0.4119759202003479, "learning_rate": 1.999996999656971e-05, "loss": 0.329, "step": 42 }, { "epoch": 0.0008168630232100489, "grad_norm": 0.5291385650634766, "learning_rate": 1.9999967071066777e-05, "loss": 0.3533, "step": 44 }, { "epoch": 0.0008539931606286875, "grad_norm": 0.5106402635574341, "learning_rate": 1.9999964009494243e-05, "loss": 0.3966, "step": 46 }, { "epoch": 0.000891123298047326, "grad_norm": 0.3596688508987427, "learning_rate": 1.999996081185215e-05, "loss": 0.2788, "step": 48 }, { "epoch": 0.0009282534354659647, "grad_norm": 0.4595358073711395, "learning_rate": 1.9999957478140545e-05, "loss": 0.5023, "step": 50 }, { "epoch": 0.0009653835728846033, "grad_norm": 0.5351885557174683, "learning_rate": 1.9999954008359476e-05, "loss": 0.3821, "step": 52 }, { "epoch": 0.0010025137103032418, "grad_norm": 0.49715960025787354, "learning_rate": 1.9999950402508984e-05, "loss": 0.313, "step": 54 }, { "epoch": 0.0010396438477218804, "grad_norm": 0.5796110033988953, "learning_rate": 1.9999946660589117e-05, "loss": 0.3942, "step": 56 }, { "epoch": 0.001076773985140519, "grad_norm": 0.39613473415374756, "learning_rate": 1.9999942782599933e-05, "loss": 0.2985, "step": 58 }, { "epoch": 0.0011139041225591577, "grad_norm": 0.5466755628585815, "learning_rate": 1.999993876854148e-05, "loss": 0.4769, "step": 60 }, { "epoch": 0.0011510342599777962, "grad_norm": 0.3947904109954834, "learning_rate": 1.9999934618413816e-05, "loss": 0.4659, "step": 62 }, { "epoch": 0.0011881643973964348, "grad_norm": 0.41004839539527893, "learning_rate": 1.999993033221699e-05, "loss": 0.4927, "step": 64 }, { "epoch": 0.0012252945348150733, "grad_norm": 0.4754455089569092, "learning_rate": 1.999992590995107e-05, "loss": 0.4159, "step": 66 }, { "epoch": 0.001262424672233712, "grad_norm": 0.7578725218772888, "learning_rate": 1.9999921351616108e-05, "loss": 0.2805, "step": 68 }, { "epoch": 0.0012995548096523504, "grad_norm": 0.6312479972839355, "learning_rate": 1.9999916657212174e-05, "loss": 0.4298, "step": 70 }, { "epoch": 0.0013366849470709892, "grad_norm": 0.3991794288158417, "learning_rate": 1.9999911826739323e-05, "loss": 0.4769, "step": 72 }, { "epoch": 0.0013738150844896278, "grad_norm": 0.6018489003181458, "learning_rate": 1.999990686019763e-05, "loss": 0.5276, "step": 74 }, { "epoch": 0.0014109452219082663, "grad_norm": 0.42345333099365234, "learning_rate": 1.9999901757587153e-05, "loss": 0.6705, "step": 76 }, { "epoch": 0.0014480753593269049, "grad_norm": 0.478071928024292, "learning_rate": 1.999989651890797e-05, "loss": 0.5739, "step": 78 }, { "epoch": 0.0014852054967455434, "grad_norm": 0.5030924677848816, "learning_rate": 1.9999891144160146e-05, "loss": 0.6992, "step": 80 }, { "epoch": 0.001522335634164182, "grad_norm": 0.5798288583755493, "learning_rate": 1.9999885633343757e-05, "loss": 0.4498, "step": 82 }, { "epoch": 0.0015594657715828205, "grad_norm": 0.4327036142349243, "learning_rate": 1.9999879986458877e-05, "loss": 0.1748, "step": 84 }, { "epoch": 0.0015965959090014593, "grad_norm": 0.4869978129863739, "learning_rate": 1.999987420350559e-05, "loss": 0.4885, "step": 86 }, { "epoch": 0.0016337260464200979, "grad_norm": 0.3522797226905823, "learning_rate": 1.999986828448396e-05, "loss": 0.5144, "step": 88 }, { "epoch": 0.0016708561838387364, "grad_norm": 0.34214866161346436, "learning_rate": 1.9999862229394077e-05, "loss": 0.4755, "step": 90 }, { "epoch": 0.001707986321257375, "grad_norm": 0.530851423740387, "learning_rate": 1.9999856038236024e-05, "loss": 0.3345, "step": 92 }, { "epoch": 0.0017451164586760135, "grad_norm": 0.4837367832660675, "learning_rate": 1.9999849711009883e-05, "loss": 0.3591, "step": 94 }, { "epoch": 0.001782246596094652, "grad_norm": 0.7023651599884033, "learning_rate": 1.9999843247715743e-05, "loss": 0.3494, "step": 96 }, { "epoch": 0.0018193767335132906, "grad_norm": 0.39528682827949524, "learning_rate": 1.9999836648353685e-05, "loss": 0.5679, "step": 98 }, { "epoch": 0.0018565068709319294, "grad_norm": 0.4950597882270813, "learning_rate": 1.9999829912923803e-05, "loss": 0.3934, "step": 100 }, { "epoch": 0.001893637008350568, "grad_norm": 0.44712716341018677, "learning_rate": 1.9999823041426193e-05, "loss": 0.4103, "step": 102 }, { "epoch": 0.0019307671457692065, "grad_norm": 0.6989088654518127, "learning_rate": 1.9999816033860943e-05, "loss": 0.337, "step": 104 }, { "epoch": 0.0019678972831878453, "grad_norm": 0.4854273796081543, "learning_rate": 1.999980889022815e-05, "loss": 0.3561, "step": 106 }, { "epoch": 0.0020050274206064836, "grad_norm": 0.39766597747802734, "learning_rate": 1.9999801610527908e-05, "loss": 0.4093, "step": 108 }, { "epoch": 0.0020421575580251224, "grad_norm": 0.7291239500045776, "learning_rate": 1.9999794194760323e-05, "loss": 0.5149, "step": 110 }, { "epoch": 0.0020792876954437607, "grad_norm": 0.6360824704170227, "learning_rate": 1.999978664292549e-05, "loss": 0.3331, "step": 112 }, { "epoch": 0.0021164178328623995, "grad_norm": 0.6387815475463867, "learning_rate": 1.9999778955023513e-05, "loss": 0.5065, "step": 114 }, { "epoch": 0.002153547970281038, "grad_norm": 0.46091097593307495, "learning_rate": 1.9999771131054498e-05, "loss": 0.2113, "step": 116 }, { "epoch": 0.0021906781076996766, "grad_norm": 0.4365173578262329, "learning_rate": 1.999976317101855e-05, "loss": 0.3731, "step": 118 }, { "epoch": 0.0022278082451183154, "grad_norm": 0.5046524405479431, "learning_rate": 1.999975507491578e-05, "loss": 0.3662, "step": 120 }, { "epoch": 0.0022649383825369537, "grad_norm": 0.357047438621521, "learning_rate": 1.999974684274629e-05, "loss": 0.3079, "step": 122 }, { "epoch": 0.0023020685199555925, "grad_norm": 0.6360185146331787, "learning_rate": 1.9999738474510203e-05, "loss": 0.4996, "step": 124 }, { "epoch": 0.002339198657374231, "grad_norm": 0.5160127282142639, "learning_rate": 1.9999729970207632e-05, "loss": 0.4182, "step": 126 }, { "epoch": 0.0023763287947928696, "grad_norm": 0.5332038402557373, "learning_rate": 1.999972132983868e-05, "loss": 0.4211, "step": 128 }, { "epoch": 0.002413458932211508, "grad_norm": 0.34042033553123474, "learning_rate": 1.999971255340348e-05, "loss": 0.3714, "step": 130 }, { "epoch": 0.0024505890696301467, "grad_norm": 0.37444549798965454, "learning_rate": 1.9999703640902142e-05, "loss": 0.2689, "step": 132 }, { "epoch": 0.0024877192070487854, "grad_norm": 0.41683492064476013, "learning_rate": 1.999969459233479e-05, "loss": 0.4776, "step": 134 }, { "epoch": 0.002524849344467424, "grad_norm": 0.4115779995918274, "learning_rate": 1.999968540770155e-05, "loss": 0.3747, "step": 136 }, { "epoch": 0.0025619794818860626, "grad_norm": 0.5225721001625061, "learning_rate": 1.999967608700254e-05, "loss": 0.6104, "step": 138 }, { "epoch": 0.002599109619304701, "grad_norm": 0.3780425190925598, "learning_rate": 1.999966663023789e-05, "loss": 0.5691, "step": 140 }, { "epoch": 0.0026362397567233397, "grad_norm": 0.4317651093006134, "learning_rate": 1.9999657037407733e-05, "loss": 0.4652, "step": 142 }, { "epoch": 0.0026733698941419784, "grad_norm": 0.3654298186302185, "learning_rate": 1.9999647308512197e-05, "loss": 0.3942, "step": 144 }, { "epoch": 0.0027105000315606168, "grad_norm": 0.6619117259979248, "learning_rate": 1.9999637443551414e-05, "loss": 0.3976, "step": 146 }, { "epoch": 0.0027476301689792555, "grad_norm": 0.588629424571991, "learning_rate": 1.9999627442525514e-05, "loss": 0.4946, "step": 148 }, { "epoch": 0.002784760306397894, "grad_norm": 0.4449610114097595, "learning_rate": 1.999961730543464e-05, "loss": 0.4977, "step": 150 }, { "epoch": 0.0028218904438165326, "grad_norm": 0.44638001918792725, "learning_rate": 1.9999607032278925e-05, "loss": 0.3224, "step": 152 }, { "epoch": 0.002859020581235171, "grad_norm": 0.33068954944610596, "learning_rate": 1.9999596623058513e-05, "loss": 0.6474, "step": 154 }, { "epoch": 0.0028961507186538098, "grad_norm": 0.302399605512619, "learning_rate": 1.9999586077773538e-05, "loss": 0.5662, "step": 156 }, { "epoch": 0.0029332808560724485, "grad_norm": 0.448861300945282, "learning_rate": 1.9999575396424154e-05, "loss": 0.3873, "step": 158 }, { "epoch": 0.002970410993491087, "grad_norm": 0.6153700351715088, "learning_rate": 1.99995645790105e-05, "loss": 0.6218, "step": 160 }, { "epoch": 0.0030075411309097256, "grad_norm": 0.8051313161849976, "learning_rate": 1.9999553625532722e-05, "loss": 0.2921, "step": 162 }, { "epoch": 0.003044671268328364, "grad_norm": 0.5313144326210022, "learning_rate": 1.9999542535990972e-05, "loss": 0.4455, "step": 164 }, { "epoch": 0.0030818014057470027, "grad_norm": 0.5131443738937378, "learning_rate": 1.9999531310385403e-05, "loss": 0.3069, "step": 166 }, { "epoch": 0.003118931543165641, "grad_norm": 0.3482806384563446, "learning_rate": 1.9999519948716162e-05, "loss": 0.3313, "step": 168 }, { "epoch": 0.00315606168058428, "grad_norm": 0.6128836870193481, "learning_rate": 1.9999508450983407e-05, "loss": 0.5486, "step": 170 }, { "epoch": 0.0031931918180029186, "grad_norm": 0.5387942790985107, "learning_rate": 1.9999496817187294e-05, "loss": 0.372, "step": 172 }, { "epoch": 0.003230321955421557, "grad_norm": 0.46776172518730164, "learning_rate": 1.9999485047327982e-05, "loss": 0.257, "step": 174 }, { "epoch": 0.0032674520928401957, "grad_norm": 0.45111018419265747, "learning_rate": 1.999947314140563e-05, "loss": 0.347, "step": 176 }, { "epoch": 0.003304582230258834, "grad_norm": 0.46320462226867676, "learning_rate": 1.99994610994204e-05, "loss": 0.4372, "step": 178 }, { "epoch": 0.003341712367677473, "grad_norm": 0.46013370156288147, "learning_rate": 1.9999448921372457e-05, "loss": 0.5137, "step": 180 }, { "epoch": 0.003378842505096111, "grad_norm": 0.3580809533596039, "learning_rate": 1.9999436607261968e-05, "loss": 0.4795, "step": 182 }, { "epoch": 0.00341597264251475, "grad_norm": 0.5853656530380249, "learning_rate": 1.9999424157089093e-05, "loss": 0.2693, "step": 184 }, { "epoch": 0.0034531027799333887, "grad_norm": 0.4293530583381653, "learning_rate": 1.999941157085401e-05, "loss": 0.1988, "step": 186 }, { "epoch": 0.003490232917352027, "grad_norm": 0.48474812507629395, "learning_rate": 1.9999398848556886e-05, "loss": 0.4293, "step": 188 }, { "epoch": 0.003527363054770666, "grad_norm": 0.46240234375, "learning_rate": 1.99993859901979e-05, "loss": 0.3688, "step": 190 }, { "epoch": 0.003564493192189304, "grad_norm": 0.3638309836387634, "learning_rate": 1.999937299577722e-05, "loss": 0.4726, "step": 192 }, { "epoch": 0.003601623329607943, "grad_norm": 0.47168317437171936, "learning_rate": 1.9999359865295023e-05, "loss": 0.5157, "step": 194 }, { "epoch": 0.0036387534670265812, "grad_norm": 0.3542977273464203, "learning_rate": 1.999934659875149e-05, "loss": 0.4035, "step": 196 }, { "epoch": 0.00367588360444522, "grad_norm": 0.41440027952194214, "learning_rate": 1.9999333196146803e-05, "loss": 0.3192, "step": 198 }, { "epoch": 0.003713013741863859, "grad_norm": 0.547886073589325, "learning_rate": 1.999931965748114e-05, "loss": 0.4244, "step": 200 }, { "epoch": 0.003750143879282497, "grad_norm": 0.5255728363990784, "learning_rate": 1.9999305982754694e-05, "loss": 0.4748, "step": 202 }, { "epoch": 0.003787274016701136, "grad_norm": 0.3951583206653595, "learning_rate": 1.9999292171967636e-05, "loss": 0.3381, "step": 204 }, { "epoch": 0.0038244041541197742, "grad_norm": 0.3999170660972595, "learning_rate": 1.9999278225120168e-05, "loss": 0.325, "step": 206 }, { "epoch": 0.003861534291538413, "grad_norm": 0.5085821151733398, "learning_rate": 1.9999264142212476e-05, "loss": 0.4041, "step": 208 }, { "epoch": 0.0038986644289570518, "grad_norm": 0.49167922139167786, "learning_rate": 1.9999249923244747e-05, "loss": 0.2427, "step": 210 }, { "epoch": 0.0039357945663756905, "grad_norm": 0.4795328974723816, "learning_rate": 1.999923556821718e-05, "loss": 0.2821, "step": 212 }, { "epoch": 0.0039729247037943284, "grad_norm": 0.646994411945343, "learning_rate": 1.999922107712997e-05, "loss": 0.3946, "step": 214 }, { "epoch": 0.004010054841212967, "grad_norm": 0.3711841106414795, "learning_rate": 1.999920644998331e-05, "loss": 0.3623, "step": 216 }, { "epoch": 0.004047184978631606, "grad_norm": 0.48085135221481323, "learning_rate": 1.99991916867774e-05, "loss": 0.4661, "step": 218 }, { "epoch": 0.004084315116050245, "grad_norm": 0.34979113936424255, "learning_rate": 1.999917678751244e-05, "loss": 0.495, "step": 220 }, { "epoch": 0.004121445253468883, "grad_norm": 0.49509361386299133, "learning_rate": 1.9999161752188642e-05, "loss": 0.3476, "step": 222 }, { "epoch": 0.004158575390887521, "grad_norm": 0.4042584300041199, "learning_rate": 1.9999146580806198e-05, "loss": 0.2823, "step": 224 }, { "epoch": 0.00419570552830616, "grad_norm": 0.4594835937023163, "learning_rate": 1.9999131273365323e-05, "loss": 0.5605, "step": 226 }, { "epoch": 0.004232835665724799, "grad_norm": 0.47700241208076477, "learning_rate": 1.999911582986622e-05, "loss": 0.5574, "step": 228 }, { "epoch": 0.004269965803143438, "grad_norm": 0.33174213767051697, "learning_rate": 1.99991002503091e-05, "loss": 0.4264, "step": 230 }, { "epoch": 0.004307095940562076, "grad_norm": 0.44007235765457153, "learning_rate": 1.9999084534694182e-05, "loss": 0.5912, "step": 232 }, { "epoch": 0.004344226077980714, "grad_norm": 0.5250499248504639, "learning_rate": 1.999906868302167e-05, "loss": 0.2697, "step": 234 }, { "epoch": 0.004381356215399353, "grad_norm": 0.4792839288711548, "learning_rate": 1.9999052695291784e-05, "loss": 0.3774, "step": 236 }, { "epoch": 0.004418486352817992, "grad_norm": 0.5226247906684875, "learning_rate": 1.9999036571504745e-05, "loss": 0.2578, "step": 238 }, { "epoch": 0.004455616490236631, "grad_norm": 0.5130003690719604, "learning_rate": 1.9999020311660763e-05, "loss": 0.4903, "step": 240 }, { "epoch": 0.004492746627655269, "grad_norm": 0.5420089960098267, "learning_rate": 1.999900391576007e-05, "loss": 0.4147, "step": 242 }, { "epoch": 0.004529876765073907, "grad_norm": 0.5653980374336243, "learning_rate": 1.999898738380288e-05, "loss": 0.4098, "step": 244 }, { "epoch": 0.004567006902492546, "grad_norm": 0.3912901282310486, "learning_rate": 1.9998970715789428e-05, "loss": 0.2877, "step": 246 }, { "epoch": 0.004604137039911185, "grad_norm": 0.32257866859436035, "learning_rate": 1.999895391171993e-05, "loss": 0.4531, "step": 248 }, { "epoch": 0.004641267177329824, "grad_norm": 0.47456029057502747, "learning_rate": 1.9998936971594622e-05, "loss": 0.3763, "step": 250 }, { "epoch": 0.004678397314748462, "grad_norm": 0.4561237096786499, "learning_rate": 1.999891989541373e-05, "loss": 0.4848, "step": 252 }, { "epoch": 0.0047155274521671, "grad_norm": 0.5080367922782898, "learning_rate": 1.9998902683177488e-05, "loss": 0.5258, "step": 254 }, { "epoch": 0.004752657589585739, "grad_norm": 0.3973216116428375, "learning_rate": 1.9998885334886132e-05, "loss": 0.3827, "step": 256 }, { "epoch": 0.004789787727004378, "grad_norm": 0.3797387480735779, "learning_rate": 1.99988678505399e-05, "loss": 0.389, "step": 258 }, { "epoch": 0.004826917864423016, "grad_norm": 0.34787413477897644, "learning_rate": 1.999885023013902e-05, "loss": 0.331, "step": 260 }, { "epoch": 0.004864048001841655, "grad_norm": 0.5755864381790161, "learning_rate": 1.9998832473683745e-05, "loss": 0.5135, "step": 262 }, { "epoch": 0.004901178139260293, "grad_norm": 0.49313580989837646, "learning_rate": 1.9998814581174307e-05, "loss": 0.4115, "step": 264 }, { "epoch": 0.004938308276678932, "grad_norm": 0.34419214725494385, "learning_rate": 1.9998796552610954e-05, "loss": 0.2674, "step": 266 }, { "epoch": 0.004975438414097571, "grad_norm": 0.4715988039970398, "learning_rate": 1.9998778387993923e-05, "loss": 0.2613, "step": 268 }, { "epoch": 0.005012568551516209, "grad_norm": 0.29773658514022827, "learning_rate": 1.9998760087323476e-05, "loss": 0.248, "step": 270 }, { "epoch": 0.005049698688934848, "grad_norm": 0.28970152139663696, "learning_rate": 1.999874165059985e-05, "loss": 0.276, "step": 272 }, { "epoch": 0.005086828826353486, "grad_norm": 0.526797354221344, "learning_rate": 1.9998723077823302e-05, "loss": 0.3526, "step": 274 }, { "epoch": 0.005123958963772125, "grad_norm": 0.34837856888771057, "learning_rate": 1.999870436899408e-05, "loss": 0.2613, "step": 276 }, { "epoch": 0.005161089101190764, "grad_norm": 0.3190463185310364, "learning_rate": 1.9998685524112445e-05, "loss": 0.2662, "step": 278 }, { "epoch": 0.005198219238609402, "grad_norm": 0.5121004581451416, "learning_rate": 1.9998666543178647e-05, "loss": 0.3837, "step": 280 }, { "epoch": 0.0052353493760280406, "grad_norm": 0.46542319655418396, "learning_rate": 1.999864742619295e-05, "loss": 0.1741, "step": 282 }, { "epoch": 0.005272479513446679, "grad_norm": 0.481201708316803, "learning_rate": 1.9998628173155607e-05, "loss": 0.5364, "step": 284 }, { "epoch": 0.005309609650865318, "grad_norm": 0.25467976927757263, "learning_rate": 1.9998608784066883e-05, "loss": 0.2166, "step": 286 }, { "epoch": 0.005346739788283957, "grad_norm": 0.4101756811141968, "learning_rate": 1.9998589258927044e-05, "loss": 0.3244, "step": 288 }, { "epoch": 0.005383869925702595, "grad_norm": 0.47906413674354553, "learning_rate": 1.9998569597736355e-05, "loss": 0.258, "step": 290 }, { "epoch": 0.0054210000631212335, "grad_norm": 0.42529603838920593, "learning_rate": 1.9998549800495083e-05, "loss": 0.3635, "step": 292 }, { "epoch": 0.005458130200539872, "grad_norm": 0.47230392694473267, "learning_rate": 1.9998529867203496e-05, "loss": 0.4149, "step": 294 }, { "epoch": 0.005495260337958511, "grad_norm": 0.39145946502685547, "learning_rate": 1.9998509797861868e-05, "loss": 0.468, "step": 296 }, { "epoch": 0.005532390475377149, "grad_norm": 0.38009825348854065, "learning_rate": 1.999848959247047e-05, "loss": 0.4095, "step": 298 }, { "epoch": 0.005569520612795788, "grad_norm": 0.4906117916107178, "learning_rate": 1.999846925102958e-05, "loss": 0.4982, "step": 300 }, { "epoch": 0.0056066507502144265, "grad_norm": 0.26200997829437256, "learning_rate": 1.999844877353947e-05, "loss": 0.3007, "step": 302 }, { "epoch": 0.005643780887633065, "grad_norm": 0.3799205720424652, "learning_rate": 1.9998428160000422e-05, "loss": 0.4404, "step": 304 }, { "epoch": 0.005680911025051704, "grad_norm": 0.39745423197746277, "learning_rate": 1.9998407410412715e-05, "loss": 0.5053, "step": 306 }, { "epoch": 0.005718041162470342, "grad_norm": 0.342948853969574, "learning_rate": 1.9998386524776633e-05, "loss": 0.3366, "step": 308 }, { "epoch": 0.005755171299888981, "grad_norm": 0.5430181622505188, "learning_rate": 1.9998365503092457e-05, "loss": 0.3058, "step": 310 }, { "epoch": 0.0057923014373076195, "grad_norm": 0.3937097191810608, "learning_rate": 1.9998344345360478e-05, "loss": 0.3755, "step": 312 }, { "epoch": 0.005829431574726258, "grad_norm": 0.46196967363357544, "learning_rate": 1.9998323051580977e-05, "loss": 0.3913, "step": 314 }, { "epoch": 0.005866561712144897, "grad_norm": 0.7649854421615601, "learning_rate": 1.999830162175425e-05, "loss": 0.2668, "step": 316 }, { "epoch": 0.005903691849563535, "grad_norm": 0.5333899855613708, "learning_rate": 1.9998280055880586e-05, "loss": 0.4538, "step": 318 }, { "epoch": 0.005940821986982174, "grad_norm": 0.44176235795021057, "learning_rate": 1.999825835396028e-05, "loss": 0.3859, "step": 320 }, { "epoch": 0.0059779521244008125, "grad_norm": 0.27276358008384705, "learning_rate": 1.9998236515993623e-05, "loss": 0.3317, "step": 322 }, { "epoch": 0.006015082261819451, "grad_norm": 0.40533676743507385, "learning_rate": 1.9998214541980915e-05, "loss": 0.4542, "step": 324 }, { "epoch": 0.006052212399238089, "grad_norm": 0.4885386526584625, "learning_rate": 1.999819243192246e-05, "loss": 0.4974, "step": 326 }, { "epoch": 0.006089342536656728, "grad_norm": 0.4333484172821045, "learning_rate": 1.999817018581855e-05, "loss": 0.207, "step": 328 }, { "epoch": 0.006126472674075367, "grad_norm": 0.4982893466949463, "learning_rate": 1.9998147803669495e-05, "loss": 0.4078, "step": 330 }, { "epoch": 0.0061636028114940055, "grad_norm": 0.5079017281532288, "learning_rate": 1.9998125285475595e-05, "loss": 0.3324, "step": 332 }, { "epoch": 0.006200732948912644, "grad_norm": 0.4985852539539337, "learning_rate": 1.9998102631237155e-05, "loss": 0.3283, "step": 334 }, { "epoch": 0.006237863086331282, "grad_norm": 0.2881741523742676, "learning_rate": 1.999807984095449e-05, "loss": 0.2229, "step": 336 }, { "epoch": 0.006274993223749921, "grad_norm": 0.37681514024734497, "learning_rate": 1.99980569146279e-05, "loss": 0.3103, "step": 338 }, { "epoch": 0.00631212336116856, "grad_norm": 0.40298473834991455, "learning_rate": 1.999803385225771e-05, "loss": 0.4415, "step": 340 }, { "epoch": 0.0063492534985871985, "grad_norm": 0.5242092609405518, "learning_rate": 1.9998010653844222e-05, "loss": 0.618, "step": 342 }, { "epoch": 0.006386383636005837, "grad_norm": 0.36084824800491333, "learning_rate": 1.999798731938776e-05, "loss": 0.298, "step": 344 }, { "epoch": 0.006423513773424475, "grad_norm": 0.43370696902275085, "learning_rate": 1.9997963848888637e-05, "loss": 0.4926, "step": 346 }, { "epoch": 0.006460643910843114, "grad_norm": 0.38683441281318665, "learning_rate": 1.999794024234717e-05, "loss": 0.3756, "step": 348 }, { "epoch": 0.006497774048261753, "grad_norm": 0.5789943933486938, "learning_rate": 1.999791649976369e-05, "loss": 0.3597, "step": 350 }, { "epoch": 0.0065349041856803914, "grad_norm": 0.38878417015075684, "learning_rate": 1.9997892621138512e-05, "loss": 0.2021, "step": 352 }, { "epoch": 0.00657203432309903, "grad_norm": 0.39716699719429016, "learning_rate": 1.9997868606471958e-05, "loss": 0.6617, "step": 354 }, { "epoch": 0.006609164460517668, "grad_norm": 0.2856186628341675, "learning_rate": 1.9997844455764364e-05, "loss": 0.3369, "step": 356 }, { "epoch": 0.006646294597936307, "grad_norm": 0.4489436447620392, "learning_rate": 1.9997820169016048e-05, "loss": 0.386, "step": 358 }, { "epoch": 0.006683424735354946, "grad_norm": 0.37973126769065857, "learning_rate": 1.9997795746227353e-05, "loss": 0.5042, "step": 360 }, { "epoch": 0.006720554872773584, "grad_norm": 0.4062805473804474, "learning_rate": 1.9997771187398604e-05, "loss": 0.2486, "step": 362 }, { "epoch": 0.006757685010192222, "grad_norm": 0.5012133121490479, "learning_rate": 1.9997746492530132e-05, "loss": 0.3411, "step": 364 }, { "epoch": 0.006794815147610861, "grad_norm": 0.39055135846138, "learning_rate": 1.9997721661622278e-05, "loss": 0.2739, "step": 366 }, { "epoch": 0.0068319452850295, "grad_norm": 0.37451842427253723, "learning_rate": 1.9997696694675382e-05, "loss": 0.3286, "step": 368 }, { "epoch": 0.006869075422448139, "grad_norm": 0.4690632224082947, "learning_rate": 1.9997671591689777e-05, "loss": 0.3944, "step": 370 }, { "epoch": 0.006906205559866777, "grad_norm": 0.6265665292739868, "learning_rate": 1.9997646352665812e-05, "loss": 0.573, "step": 372 }, { "epoch": 0.006943335697285415, "grad_norm": 0.42657193541526794, "learning_rate": 1.9997620977603823e-05, "loss": 0.4903, "step": 374 }, { "epoch": 0.006980465834704054, "grad_norm": 0.3993040919303894, "learning_rate": 1.999759546650416e-05, "loss": 0.3696, "step": 376 }, { "epoch": 0.007017595972122693, "grad_norm": 0.5259105563163757, "learning_rate": 1.999756981936717e-05, "loss": 0.3929, "step": 378 }, { "epoch": 0.007054726109541332, "grad_norm": 0.4576737880706787, "learning_rate": 1.99975440361932e-05, "loss": 0.3648, "step": 380 }, { "epoch": 0.00709185624695997, "grad_norm": 0.3197403848171234, "learning_rate": 1.99975181169826e-05, "loss": 0.2748, "step": 382 }, { "epoch": 0.007128986384378608, "grad_norm": 0.38901081681251526, "learning_rate": 1.9997492061735726e-05, "loss": 0.3293, "step": 384 }, { "epoch": 0.007166116521797247, "grad_norm": 0.32115432620048523, "learning_rate": 1.999746587045293e-05, "loss": 0.3153, "step": 386 }, { "epoch": 0.007203246659215886, "grad_norm": 0.4031219482421875, "learning_rate": 1.9997439543134574e-05, "loss": 0.4489, "step": 388 }, { "epoch": 0.007240376796634525, "grad_norm": 0.35943150520324707, "learning_rate": 1.9997413079781008e-05, "loss": 0.3411, "step": 390 }, { "epoch": 0.0072775069340531625, "grad_norm": 0.639771580696106, "learning_rate": 1.9997386480392596e-05, "loss": 0.3086, "step": 392 }, { "epoch": 0.007314637071471801, "grad_norm": 0.41053539514541626, "learning_rate": 1.99973597449697e-05, "loss": 0.3575, "step": 394 }, { "epoch": 0.00735176720889044, "grad_norm": 0.3961491584777832, "learning_rate": 1.999733287351268e-05, "loss": 0.311, "step": 396 }, { "epoch": 0.007388897346309079, "grad_norm": 0.37600892782211304, "learning_rate": 1.999730586602191e-05, "loss": 0.4798, "step": 398 }, { "epoch": 0.007426027483727718, "grad_norm": 0.41316813230514526, "learning_rate": 1.999727872249775e-05, "loss": 0.4722, "step": 400 }, { "epoch": 0.0074631576211463555, "grad_norm": 0.3977676331996918, "learning_rate": 1.9997251442940574e-05, "loss": 0.4144, "step": 402 }, { "epoch": 0.007500287758564994, "grad_norm": 0.422908753156662, "learning_rate": 1.999722402735075e-05, "loss": 0.2458, "step": 404 }, { "epoch": 0.007537417895983633, "grad_norm": 0.45909595489501953, "learning_rate": 1.999719647572865e-05, "loss": 0.5124, "step": 406 }, { "epoch": 0.007574548033402272, "grad_norm": 0.4146987199783325, "learning_rate": 1.9997168788074655e-05, "loss": 0.2605, "step": 408 }, { "epoch": 0.0076116781708209106, "grad_norm": 0.4815383553504944, "learning_rate": 1.9997140964389133e-05, "loss": 0.2757, "step": 410 }, { "epoch": 0.0076488083082395485, "grad_norm": 0.6241264939308167, "learning_rate": 1.9997113004672472e-05, "loss": 0.4455, "step": 412 }, { "epoch": 0.007685938445658187, "grad_norm": 0.5692870020866394, "learning_rate": 1.9997084908925043e-05, "loss": 0.3409, "step": 414 }, { "epoch": 0.007723068583076826, "grad_norm": 0.30715450644493103, "learning_rate": 1.9997056677147237e-05, "loss": 0.2442, "step": 416 }, { "epoch": 0.007760198720495465, "grad_norm": 0.4382787346839905, "learning_rate": 1.9997028309339433e-05, "loss": 0.486, "step": 418 }, { "epoch": 0.0077973288579141035, "grad_norm": 0.5441878437995911, "learning_rate": 1.9996999805502016e-05, "loss": 0.5577, "step": 420 }, { "epoch": 0.007834458995332742, "grad_norm": 0.2951529026031494, "learning_rate": 1.999697116563538e-05, "loss": 0.461, "step": 422 }, { "epoch": 0.007871589132751381, "grad_norm": 0.42165622115135193, "learning_rate": 1.999694238973991e-05, "loss": 0.547, "step": 424 }, { "epoch": 0.007908719270170018, "grad_norm": 0.41160154342651367, "learning_rate": 1.9996913477816e-05, "loss": 0.2792, "step": 426 }, { "epoch": 0.007945849407588657, "grad_norm": 0.4399774372577667, "learning_rate": 1.9996884429864038e-05, "loss": 0.399, "step": 428 }, { "epoch": 0.007982979545007296, "grad_norm": 0.4168318808078766, "learning_rate": 1.9996855245884427e-05, "loss": 0.279, "step": 430 }, { "epoch": 0.008020109682425934, "grad_norm": 0.4693607687950134, "learning_rate": 1.9996825925877555e-05, "loss": 0.4932, "step": 432 }, { "epoch": 0.008057239819844573, "grad_norm": 0.3294755518436432, "learning_rate": 1.999679646984383e-05, "loss": 0.3367, "step": 434 }, { "epoch": 0.008094369957263212, "grad_norm": 0.39903226494789124, "learning_rate": 1.9996766877783647e-05, "loss": 0.409, "step": 436 }, { "epoch": 0.00813150009468185, "grad_norm": 0.3988722264766693, "learning_rate": 1.9996737149697413e-05, "loss": 0.3253, "step": 438 }, { "epoch": 0.00816863023210049, "grad_norm": 0.3188820481300354, "learning_rate": 1.9996707285585527e-05, "loss": 0.2537, "step": 440 }, { "epoch": 0.008205760369519128, "grad_norm": 0.37374868988990784, "learning_rate": 1.99966772854484e-05, "loss": 0.3668, "step": 442 }, { "epoch": 0.008242890506937765, "grad_norm": 0.45591971278190613, "learning_rate": 1.999664714928644e-05, "loss": 0.3725, "step": 444 }, { "epoch": 0.008280020644356404, "grad_norm": 0.3772117495536804, "learning_rate": 1.9996616877100053e-05, "loss": 0.5713, "step": 446 }, { "epoch": 0.008317150781775043, "grad_norm": 0.5080755949020386, "learning_rate": 1.9996586468889653e-05, "loss": 0.2889, "step": 448 }, { "epoch": 0.008354280919193682, "grad_norm": 0.6368932127952576, "learning_rate": 1.9996555924655658e-05, "loss": 0.3513, "step": 450 }, { "epoch": 0.00839141105661232, "grad_norm": 0.3840327560901642, "learning_rate": 1.9996525244398478e-05, "loss": 0.2882, "step": 452 }, { "epoch": 0.00842854119403096, "grad_norm": 0.4199620187282562, "learning_rate": 1.9996494428118533e-05, "loss": 0.1586, "step": 454 }, { "epoch": 0.008465671331449598, "grad_norm": 0.2961635887622833, "learning_rate": 1.9996463475816245e-05, "loss": 0.3291, "step": 456 }, { "epoch": 0.008502801468868237, "grad_norm": 0.49746212363243103, "learning_rate": 1.9996432387492025e-05, "loss": 0.2865, "step": 458 }, { "epoch": 0.008539931606286875, "grad_norm": 0.4187294542789459, "learning_rate": 1.9996401163146308e-05, "loss": 0.5745, "step": 460 }, { "epoch": 0.008577061743705514, "grad_norm": 0.3033667802810669, "learning_rate": 1.999636980277951e-05, "loss": 0.4464, "step": 462 }, { "epoch": 0.008614191881124151, "grad_norm": 0.3640962839126587, "learning_rate": 1.9996338306392065e-05, "loss": 0.284, "step": 464 }, { "epoch": 0.00865132201854279, "grad_norm": 0.4276184141635895, "learning_rate": 1.99963066739844e-05, "loss": 0.3035, "step": 466 }, { "epoch": 0.008688452155961429, "grad_norm": 0.4547252953052521, "learning_rate": 1.999627490555694e-05, "loss": 0.4582, "step": 468 }, { "epoch": 0.008725582293380068, "grad_norm": 0.48601678013801575, "learning_rate": 1.999624300111012e-05, "loss": 0.5454, "step": 470 }, { "epoch": 0.008762712430798706, "grad_norm": 0.39640524983406067, "learning_rate": 1.9996210960644376e-05, "loss": 0.3945, "step": 472 }, { "epoch": 0.008799842568217345, "grad_norm": 0.4193587601184845, "learning_rate": 1.9996178784160144e-05, "loss": 0.4255, "step": 474 }, { "epoch": 0.008836972705635984, "grad_norm": 0.3313017189502716, "learning_rate": 1.999614647165786e-05, "loss": 0.3309, "step": 476 }, { "epoch": 0.008874102843054623, "grad_norm": 0.31618499755859375, "learning_rate": 1.9996114023137967e-05, "loss": 0.3855, "step": 478 }, { "epoch": 0.008911232980473261, "grad_norm": 0.42019137740135193, "learning_rate": 1.9996081438600902e-05, "loss": 0.3635, "step": 480 }, { "epoch": 0.008948363117891898, "grad_norm": 0.43365445733070374, "learning_rate": 1.999604871804711e-05, "loss": 0.4671, "step": 482 }, { "epoch": 0.008985493255310537, "grad_norm": 0.7438284754753113, "learning_rate": 1.9996015861477035e-05, "loss": 0.4199, "step": 484 }, { "epoch": 0.009022623392729176, "grad_norm": 0.42682090401649475, "learning_rate": 1.9995982868891128e-05, "loss": 0.4286, "step": 486 }, { "epoch": 0.009059753530147815, "grad_norm": 0.3651818335056305, "learning_rate": 1.9995949740289835e-05, "loss": 0.5493, "step": 488 }, { "epoch": 0.009096883667566454, "grad_norm": 0.385578453540802, "learning_rate": 1.9995916475673608e-05, "loss": 0.4674, "step": 490 }, { "epoch": 0.009134013804985092, "grad_norm": 0.43144655227661133, "learning_rate": 1.99958830750429e-05, "loss": 0.2013, "step": 492 }, { "epoch": 0.009171143942403731, "grad_norm": 0.5458886623382568, "learning_rate": 1.999584953839816e-05, "loss": 0.3227, "step": 494 }, { "epoch": 0.00920827407982237, "grad_norm": 0.5039573311805725, "learning_rate": 1.9995815865739853e-05, "loss": 0.4867, "step": 496 }, { "epoch": 0.009245404217241009, "grad_norm": 0.3955095112323761, "learning_rate": 1.9995782057068428e-05, "loss": 0.3646, "step": 498 }, { "epoch": 0.009282534354659647, "grad_norm": 0.42155376076698303, "learning_rate": 1.9995748112384355e-05, "loss": 0.4987, "step": 500 }, { "epoch": 0.009319664492078284, "grad_norm": 0.4966467618942261, "learning_rate": 1.9995714031688093e-05, "loss": 0.4172, "step": 502 }, { "epoch": 0.009356794629496923, "grad_norm": 0.4255533814430237, "learning_rate": 1.9995679814980097e-05, "loss": 0.3979, "step": 504 }, { "epoch": 0.009393924766915562, "grad_norm": 0.37801483273506165, "learning_rate": 1.9995645462260843e-05, "loss": 0.313, "step": 506 }, { "epoch": 0.0094310549043342, "grad_norm": 0.36350420117378235, "learning_rate": 1.9995610973530795e-05, "loss": 0.1838, "step": 508 }, { "epoch": 0.00946818504175284, "grad_norm": 0.3894995450973511, "learning_rate": 1.999557634879042e-05, "loss": 0.3882, "step": 510 }, { "epoch": 0.009505315179171478, "grad_norm": 0.34138813614845276, "learning_rate": 1.9995541588040193e-05, "loss": 0.4547, "step": 512 }, { "epoch": 0.009542445316590117, "grad_norm": 0.3640008866786957, "learning_rate": 1.9995506691280585e-05, "loss": 0.2158, "step": 514 }, { "epoch": 0.009579575454008756, "grad_norm": 0.49179133772850037, "learning_rate": 1.999547165851207e-05, "loss": 0.1977, "step": 516 }, { "epoch": 0.009616705591427395, "grad_norm": 0.4022124111652374, "learning_rate": 1.9995436489735124e-05, "loss": 0.3845, "step": 518 }, { "epoch": 0.009653835728846032, "grad_norm": 0.3796842098236084, "learning_rate": 1.9995401184950228e-05, "loss": 0.4438, "step": 520 }, { "epoch": 0.00969096586626467, "grad_norm": 0.4665010869503021, "learning_rate": 1.9995365744157868e-05, "loss": 0.298, "step": 522 }, { "epoch": 0.00972809600368331, "grad_norm": 0.4929797947406769, "learning_rate": 1.999533016735851e-05, "loss": 0.4707, "step": 524 }, { "epoch": 0.009765226141101948, "grad_norm": 0.4474024772644043, "learning_rate": 1.9995294454552654e-05, "loss": 0.339, "step": 526 }, { "epoch": 0.009802356278520587, "grad_norm": 0.6189339756965637, "learning_rate": 1.9995258605740776e-05, "loss": 0.323, "step": 528 }, { "epoch": 0.009839486415939225, "grad_norm": 0.3080368936061859, "learning_rate": 1.999522262092337e-05, "loss": 0.3176, "step": 530 }, { "epoch": 0.009876616553357864, "grad_norm": 0.48607268929481506, "learning_rate": 1.9995186500100923e-05, "loss": 0.4289, "step": 532 }, { "epoch": 0.009913746690776503, "grad_norm": 0.5090469121932983, "learning_rate": 1.999515024327393e-05, "loss": 0.3874, "step": 534 }, { "epoch": 0.009950876828195142, "grad_norm": 0.42586085200309753, "learning_rate": 1.9995113850442878e-05, "loss": 0.3146, "step": 536 }, { "epoch": 0.00998800696561378, "grad_norm": 0.41823825240135193, "learning_rate": 1.9995077321608264e-05, "loss": 0.3602, "step": 538 }, { "epoch": 0.010025137103032418, "grad_norm": 0.4068586528301239, "learning_rate": 1.9995040656770587e-05, "loss": 0.482, "step": 540 }, { "epoch": 0.010062267240451056, "grad_norm": 0.44053971767425537, "learning_rate": 1.999500385593035e-05, "loss": 0.43, "step": 542 }, { "epoch": 0.010099397377869695, "grad_norm": 0.3630414605140686, "learning_rate": 1.9994966919088042e-05, "loss": 0.304, "step": 544 }, { "epoch": 0.010136527515288334, "grad_norm": 0.3131045997142792, "learning_rate": 1.9994929846244177e-05, "loss": 0.4051, "step": 546 }, { "epoch": 0.010173657652706973, "grad_norm": 0.38520804047584534, "learning_rate": 1.9994892637399254e-05, "loss": 0.3213, "step": 548 }, { "epoch": 0.010210787790125611, "grad_norm": 0.444692999124527, "learning_rate": 1.999485529255378e-05, "loss": 0.4698, "step": 550 }, { "epoch": 0.01024791792754425, "grad_norm": 0.34077340364456177, "learning_rate": 1.9994817811708262e-05, "loss": 0.4032, "step": 552 }, { "epoch": 0.010285048064962889, "grad_norm": 0.3990900218486786, "learning_rate": 1.9994780194863215e-05, "loss": 0.4268, "step": 554 }, { "epoch": 0.010322178202381528, "grad_norm": 0.34091925621032715, "learning_rate": 1.9994742442019144e-05, "loss": 0.38, "step": 556 }, { "epoch": 0.010359308339800165, "grad_norm": 0.4409696161746979, "learning_rate": 1.999470455317657e-05, "loss": 0.3509, "step": 558 }, { "epoch": 0.010396438477218804, "grad_norm": 0.4494052529335022, "learning_rate": 1.9994666528336e-05, "loss": 0.2154, "step": 560 }, { "epoch": 0.010433568614637442, "grad_norm": 0.2541842460632324, "learning_rate": 1.9994628367497956e-05, "loss": 0.5204, "step": 562 }, { "epoch": 0.010470698752056081, "grad_norm": 0.3423829674720764, "learning_rate": 1.999459007066296e-05, "loss": 0.2155, "step": 564 }, { "epoch": 0.01050782888947472, "grad_norm": 0.5962779521942139, "learning_rate": 1.9994551637831533e-05, "loss": 0.4892, "step": 566 }, { "epoch": 0.010544959026893359, "grad_norm": 0.2996845543384552, "learning_rate": 1.999451306900419e-05, "loss": 0.2228, "step": 568 }, { "epoch": 0.010582089164311997, "grad_norm": 0.4948548674583435, "learning_rate": 1.9994474364181465e-05, "loss": 0.2742, "step": 570 }, { "epoch": 0.010619219301730636, "grad_norm": 0.24314939975738525, "learning_rate": 1.999443552336388e-05, "loss": 0.3812, "step": 572 }, { "epoch": 0.010656349439149275, "grad_norm": 0.4004712402820587, "learning_rate": 1.9994396546551962e-05, "loss": 0.5465, "step": 574 }, { "epoch": 0.010693479576567914, "grad_norm": 0.30782124400138855, "learning_rate": 1.9994357433746245e-05, "loss": 0.4436, "step": 576 }, { "epoch": 0.01073060971398655, "grad_norm": 0.3293493092060089, "learning_rate": 1.999431818494726e-05, "loss": 0.4877, "step": 578 }, { "epoch": 0.01076773985140519, "grad_norm": 0.4713459014892578, "learning_rate": 1.999427880015554e-05, "loss": 0.4656, "step": 580 }, { "epoch": 0.010804869988823828, "grad_norm": 0.5082594156265259, "learning_rate": 1.9994239279371624e-05, "loss": 0.4909, "step": 582 }, { "epoch": 0.010842000126242467, "grad_norm": 0.34458476305007935, "learning_rate": 1.9994199622596047e-05, "loss": 0.2978, "step": 584 }, { "epoch": 0.010879130263661106, "grad_norm": 0.3392041325569153, "learning_rate": 1.999415982982935e-05, "loss": 0.3605, "step": 586 }, { "epoch": 0.010916260401079745, "grad_norm": 0.39310190081596375, "learning_rate": 1.9994119901072075e-05, "loss": 0.294, "step": 588 }, { "epoch": 0.010953390538498383, "grad_norm": 0.32911011576652527, "learning_rate": 1.9994079836324758e-05, "loss": 0.1853, "step": 590 }, { "epoch": 0.010990520675917022, "grad_norm": 0.43393829464912415, "learning_rate": 1.9994039635587955e-05, "loss": 0.4223, "step": 592 }, { "epoch": 0.011027650813335661, "grad_norm": 0.4050091803073883, "learning_rate": 1.9993999298862205e-05, "loss": 0.1945, "step": 594 }, { "epoch": 0.011064780950754298, "grad_norm": 0.4151243269443512, "learning_rate": 1.9993958826148063e-05, "loss": 0.2674, "step": 596 }, { "epoch": 0.011101911088172937, "grad_norm": 0.4293023645877838, "learning_rate": 1.9993918217446078e-05, "loss": 0.5065, "step": 598 }, { "epoch": 0.011139041225591576, "grad_norm": 0.37561461329460144, "learning_rate": 1.9993877472756798e-05, "loss": 0.4602, "step": 600 }, { "epoch": 0.011176171363010214, "grad_norm": 0.3366253077983856, "learning_rate": 1.999383659208078e-05, "loss": 0.3594, "step": 602 }, { "epoch": 0.011213301500428853, "grad_norm": 0.38236403465270996, "learning_rate": 1.9993795575418583e-05, "loss": 0.3598, "step": 604 }, { "epoch": 0.011250431637847492, "grad_norm": 0.3363437354564667, "learning_rate": 1.9993754422770765e-05, "loss": 0.206, "step": 606 }, { "epoch": 0.01128756177526613, "grad_norm": 0.4060344994068146, "learning_rate": 1.999371313413788e-05, "loss": 0.2939, "step": 608 }, { "epoch": 0.01132469191268477, "grad_norm": 0.40036308765411377, "learning_rate": 1.9993671709520494e-05, "loss": 0.3153, "step": 610 }, { "epoch": 0.011361822050103408, "grad_norm": 0.4371497631072998, "learning_rate": 1.9993630148919173e-05, "loss": 0.4906, "step": 612 }, { "epoch": 0.011398952187522045, "grad_norm": 0.376014769077301, "learning_rate": 1.999358845233448e-05, "loss": 0.4481, "step": 614 }, { "epoch": 0.011436082324940684, "grad_norm": 0.39666667580604553, "learning_rate": 1.9993546619766984e-05, "loss": 0.5355, "step": 616 }, { "epoch": 0.011473212462359323, "grad_norm": 0.5106778740882874, "learning_rate": 1.999350465121725e-05, "loss": 0.4714, "step": 618 }, { "epoch": 0.011510342599777961, "grad_norm": 0.3509008288383484, "learning_rate": 1.9993462546685852e-05, "loss": 0.3091, "step": 620 }, { "epoch": 0.0115474727371966, "grad_norm": 0.3783280849456787, "learning_rate": 1.9993420306173363e-05, "loss": 0.3315, "step": 622 }, { "epoch": 0.011584602874615239, "grad_norm": 0.5052120685577393, "learning_rate": 1.9993377929680355e-05, "loss": 0.4169, "step": 624 }, { "epoch": 0.011621733012033878, "grad_norm": 0.3175913095474243, "learning_rate": 1.999333541720741e-05, "loss": 0.4693, "step": 626 }, { "epoch": 0.011658863149452517, "grad_norm": 0.3224084675312042, "learning_rate": 1.9993292768755104e-05, "loss": 0.3117, "step": 628 }, { "epoch": 0.011695993286871155, "grad_norm": 0.298470675945282, "learning_rate": 1.9993249984324016e-05, "loss": 0.2555, "step": 630 }, { "epoch": 0.011733123424289794, "grad_norm": 0.55222088098526, "learning_rate": 1.9993207063914727e-05, "loss": 0.2993, "step": 632 }, { "epoch": 0.011770253561708431, "grad_norm": 0.3783060312271118, "learning_rate": 1.9993164007527824e-05, "loss": 0.1902, "step": 634 }, { "epoch": 0.01180738369912707, "grad_norm": 0.36905935406684875, "learning_rate": 1.9993120815163893e-05, "loss": 0.4302, "step": 636 }, { "epoch": 0.011844513836545709, "grad_norm": 0.4133588373661041, "learning_rate": 1.999307748682352e-05, "loss": 0.3256, "step": 638 }, { "epoch": 0.011881643973964347, "grad_norm": 0.5520565509796143, "learning_rate": 1.9993034022507295e-05, "loss": 0.2963, "step": 640 }, { "epoch": 0.011918774111382986, "grad_norm": 0.5690224170684814, "learning_rate": 1.9992990422215806e-05, "loss": 0.4899, "step": 642 }, { "epoch": 0.011955904248801625, "grad_norm": 0.5573917031288147, "learning_rate": 1.9992946685949658e-05, "loss": 0.4937, "step": 644 }, { "epoch": 0.011993034386220264, "grad_norm": 0.533745527267456, "learning_rate": 1.9992902813709433e-05, "loss": 0.3984, "step": 646 }, { "epoch": 0.012030164523638903, "grad_norm": 0.29780712723731995, "learning_rate": 1.999285880549573e-05, "loss": 0.5047, "step": 648 }, { "epoch": 0.012067294661057541, "grad_norm": 0.39466410875320435, "learning_rate": 1.9992814661309157e-05, "loss": 0.4383, "step": 650 }, { "epoch": 0.012104424798476178, "grad_norm": 1.1004207134246826, "learning_rate": 1.9992770381150305e-05, "loss": 0.5021, "step": 652 }, { "epoch": 0.012141554935894817, "grad_norm": 1.3368676900863647, "learning_rate": 1.999272596501978e-05, "loss": 0.4131, "step": 654 }, { "epoch": 0.012178685073313456, "grad_norm": 0.4995886981487274, "learning_rate": 1.9992681412918188e-05, "loss": 0.3882, "step": 656 }, { "epoch": 0.012215815210732095, "grad_norm": 0.3659275770187378, "learning_rate": 1.999263672484613e-05, "loss": 0.224, "step": 658 }, { "epoch": 0.012252945348150733, "grad_norm": 0.27424532175064087, "learning_rate": 1.9992591900804223e-05, "loss": 0.3113, "step": 660 }, { "epoch": 0.012290075485569372, "grad_norm": 0.3828636705875397, "learning_rate": 1.9992546940793065e-05, "loss": 0.3334, "step": 662 }, { "epoch": 0.012327205622988011, "grad_norm": 0.4719410240650177, "learning_rate": 1.9992501844813275e-05, "loss": 0.5218, "step": 664 }, { "epoch": 0.01236433576040665, "grad_norm": 0.4182148575782776, "learning_rate": 1.9992456612865473e-05, "loss": 0.3683, "step": 666 }, { "epoch": 0.012401465897825288, "grad_norm": 0.5829437375068665, "learning_rate": 1.999241124495026e-05, "loss": 0.4429, "step": 668 }, { "epoch": 0.012438596035243927, "grad_norm": 0.7482290863990784, "learning_rate": 1.9992365741068264e-05, "loss": 0.4297, "step": 670 }, { "epoch": 0.012475726172662564, "grad_norm": 0.38636359572410583, "learning_rate": 1.99923201012201e-05, "loss": 0.4251, "step": 672 }, { "epoch": 0.012512856310081203, "grad_norm": 0.40750887989997864, "learning_rate": 1.9992274325406393e-05, "loss": 0.2977, "step": 674 }, { "epoch": 0.012549986447499842, "grad_norm": 0.4604863226413727, "learning_rate": 1.9992228413627755e-05, "loss": 0.4084, "step": 676 }, { "epoch": 0.01258711658491848, "grad_norm": 0.4410512447357178, "learning_rate": 1.999218236588483e-05, "loss": 0.3426, "step": 678 }, { "epoch": 0.01262424672233712, "grad_norm": 0.37291786074638367, "learning_rate": 1.999213618217822e-05, "loss": 0.355, "step": 680 }, { "epoch": 0.012661376859755758, "grad_norm": 0.40774786472320557, "learning_rate": 1.9992089862508576e-05, "loss": 0.3963, "step": 682 }, { "epoch": 0.012698506997174397, "grad_norm": 0.5577800869941711, "learning_rate": 1.9992043406876513e-05, "loss": 0.4311, "step": 684 }, { "epoch": 0.012735637134593036, "grad_norm": 0.4383511543273926, "learning_rate": 1.9991996815282673e-05, "loss": 0.3518, "step": 686 }, { "epoch": 0.012772767272011674, "grad_norm": 0.3836650550365448, "learning_rate": 1.9991950087727684e-05, "loss": 0.5306, "step": 688 }, { "epoch": 0.012809897409430311, "grad_norm": 0.38333860039711, "learning_rate": 1.9991903224212185e-05, "loss": 0.442, "step": 690 }, { "epoch": 0.01284702754684895, "grad_norm": 0.5456172227859497, "learning_rate": 1.999185622473681e-05, "loss": 0.3217, "step": 692 }, { "epoch": 0.012884157684267589, "grad_norm": 0.31290164589881897, "learning_rate": 1.9991809089302204e-05, "loss": 0.5287, "step": 694 }, { "epoch": 0.012921287821686228, "grad_norm": 0.28537651896476746, "learning_rate": 1.9991761817909004e-05, "loss": 0.2736, "step": 696 }, { "epoch": 0.012958417959104867, "grad_norm": 0.3639223873615265, "learning_rate": 1.9991714410557854e-05, "loss": 0.3133, "step": 698 }, { "epoch": 0.012995548096523505, "grad_norm": 0.44784969091415405, "learning_rate": 1.99916668672494e-05, "loss": 0.2096, "step": 700 }, { "epoch": 0.013032678233942144, "grad_norm": 0.8881627321243286, "learning_rate": 1.9991619187984287e-05, "loss": 0.4387, "step": 702 }, { "epoch": 0.013069808371360783, "grad_norm": 0.3999682664871216, "learning_rate": 1.9991571372763165e-05, "loss": 0.3796, "step": 704 }, { "epoch": 0.013106938508779422, "grad_norm": 0.43037664890289307, "learning_rate": 1.9991523421586688e-05, "loss": 0.3527, "step": 706 }, { "epoch": 0.01314406864619806, "grad_norm": 0.45817339420318604, "learning_rate": 1.99914753344555e-05, "loss": 0.5195, "step": 708 }, { "epoch": 0.013181198783616697, "grad_norm": 0.402064710855484, "learning_rate": 1.9991427111370264e-05, "loss": 0.3444, "step": 710 }, { "epoch": 0.013218328921035336, "grad_norm": 0.3542466461658478, "learning_rate": 1.9991378752331634e-05, "loss": 0.3361, "step": 712 }, { "epoch": 0.013255459058453975, "grad_norm": 0.5512318015098572, "learning_rate": 1.9991330257340266e-05, "loss": 0.5111, "step": 714 }, { "epoch": 0.013292589195872614, "grad_norm": 0.40329602360725403, "learning_rate": 1.999128162639682e-05, "loss": 0.3864, "step": 716 }, { "epoch": 0.013329719333291253, "grad_norm": 0.44367092847824097, "learning_rate": 1.999123285950196e-05, "loss": 0.3929, "step": 718 }, { "epoch": 0.013366849470709891, "grad_norm": 0.4042806625366211, "learning_rate": 1.9991183956656344e-05, "loss": 0.4039, "step": 720 }, { "epoch": 0.01340397960812853, "grad_norm": 0.3432539999485016, "learning_rate": 1.9991134917860645e-05, "loss": 0.3648, "step": 722 }, { "epoch": 0.013441109745547169, "grad_norm": 0.38274747133255005, "learning_rate": 1.9991085743115525e-05, "loss": 0.4013, "step": 724 }, { "epoch": 0.013478239882965808, "grad_norm": 0.32239046692848206, "learning_rate": 1.9991036432421657e-05, "loss": 0.3329, "step": 726 }, { "epoch": 0.013515370020384445, "grad_norm": 0.36656567454338074, "learning_rate": 1.999098698577971e-05, "loss": 0.436, "step": 728 }, { "epoch": 0.013552500157803083, "grad_norm": 0.2693771421909332, "learning_rate": 1.9990937403190353e-05, "loss": 0.1754, "step": 730 }, { "epoch": 0.013589630295221722, "grad_norm": 0.4604509472846985, "learning_rate": 1.9990887684654264e-05, "loss": 0.3314, "step": 732 }, { "epoch": 0.013626760432640361, "grad_norm": 0.3748721480369568, "learning_rate": 1.9990837830172122e-05, "loss": 0.3602, "step": 734 }, { "epoch": 0.013663890570059, "grad_norm": 0.28685617446899414, "learning_rate": 1.9990787839744604e-05, "loss": 0.3291, "step": 736 }, { "epoch": 0.013701020707477638, "grad_norm": 0.4170685410499573, "learning_rate": 1.9990737713372388e-05, "loss": 0.3891, "step": 738 }, { "epoch": 0.013738150844896277, "grad_norm": 0.4516931474208832, "learning_rate": 1.999068745105616e-05, "loss": 0.298, "step": 740 }, { "epoch": 0.013775280982314916, "grad_norm": 0.34588298201560974, "learning_rate": 1.9990637052796596e-05, "loss": 0.4824, "step": 742 }, { "epoch": 0.013812411119733555, "grad_norm": 0.4612199068069458, "learning_rate": 1.9990586518594394e-05, "loss": 0.3388, "step": 744 }, { "epoch": 0.013849541257152194, "grad_norm": 0.3223876357078552, "learning_rate": 1.999053584845023e-05, "loss": 0.1741, "step": 746 }, { "epoch": 0.01388667139457083, "grad_norm": 0.5699933767318726, "learning_rate": 1.99904850423648e-05, "loss": 0.5256, "step": 748 }, { "epoch": 0.01392380153198947, "grad_norm": 0.33922168612480164, "learning_rate": 1.9990434100338795e-05, "loss": 0.518, "step": 750 }, { "epoch": 0.013960931669408108, "grad_norm": 0.36788812279701233, "learning_rate": 1.9990383022372903e-05, "loss": 0.3338, "step": 752 }, { "epoch": 0.013998061806826747, "grad_norm": 0.3664512038230896, "learning_rate": 1.9990331808467824e-05, "loss": 0.2868, "step": 754 }, { "epoch": 0.014035191944245386, "grad_norm": 0.4419233500957489, "learning_rate": 1.999028045862425e-05, "loss": 0.4018, "step": 756 }, { "epoch": 0.014072322081664024, "grad_norm": 0.41168367862701416, "learning_rate": 1.999022897284289e-05, "loss": 0.4652, "step": 758 }, { "epoch": 0.014109452219082663, "grad_norm": 0.4428732991218567, "learning_rate": 1.9990177351124434e-05, "loss": 0.3778, "step": 760 }, { "epoch": 0.014146582356501302, "grad_norm": 0.29541724920272827, "learning_rate": 1.999012559346959e-05, "loss": 0.2346, "step": 762 }, { "epoch": 0.01418371249391994, "grad_norm": 0.45679134130477905, "learning_rate": 1.999007369987906e-05, "loss": 0.3643, "step": 764 }, { "epoch": 0.014220842631338578, "grad_norm": 0.4593408405780792, "learning_rate": 1.999002167035355e-05, "loss": 0.259, "step": 766 }, { "epoch": 0.014257972768757217, "grad_norm": 0.41827812790870667, "learning_rate": 1.998996950489377e-05, "loss": 0.4825, "step": 768 }, { "epoch": 0.014295102906175855, "grad_norm": 0.3364203870296478, "learning_rate": 1.9989917203500428e-05, "loss": 0.2921, "step": 770 }, { "epoch": 0.014332233043594494, "grad_norm": 0.444349080324173, "learning_rate": 1.9989864766174235e-05, "loss": 0.4103, "step": 772 }, { "epoch": 0.014369363181013133, "grad_norm": 0.3217231035232544, "learning_rate": 1.998981219291591e-05, "loss": 0.3366, "step": 774 }, { "epoch": 0.014406493318431772, "grad_norm": 0.3905393183231354, "learning_rate": 1.9989759483726158e-05, "loss": 0.3954, "step": 776 }, { "epoch": 0.01444362345585041, "grad_norm": 0.3277907967567444, "learning_rate": 1.9989706638605707e-05, "loss": 0.307, "step": 778 }, { "epoch": 0.01448075359326905, "grad_norm": 0.4413081705570221, "learning_rate": 1.998965365755527e-05, "loss": 0.3412, "step": 780 }, { "epoch": 0.014517883730687688, "grad_norm": 0.548041582107544, "learning_rate": 1.9989600540575566e-05, "loss": 0.4014, "step": 782 }, { "epoch": 0.014555013868106325, "grad_norm": 0.4476698338985443, "learning_rate": 1.9989547287667325e-05, "loss": 0.5237, "step": 784 }, { "epoch": 0.014592144005524964, "grad_norm": 0.4426359534263611, "learning_rate": 1.9989493898831265e-05, "loss": 0.4013, "step": 786 }, { "epoch": 0.014629274142943603, "grad_norm": 0.2528916001319885, "learning_rate": 1.9989440374068115e-05, "loss": 0.275, "step": 788 }, { "epoch": 0.014666404280362241, "grad_norm": 0.4305020570755005, "learning_rate": 1.9989386713378606e-05, "loss": 0.3618, "step": 790 }, { "epoch": 0.01470353441778088, "grad_norm": 0.3134215176105499, "learning_rate": 1.9989332916763464e-05, "loss": 0.2907, "step": 792 }, { "epoch": 0.014740664555199519, "grad_norm": 0.31810179352760315, "learning_rate": 1.9989278984223423e-05, "loss": 0.4478, "step": 794 }, { "epoch": 0.014777794692618158, "grad_norm": 0.2873820960521698, "learning_rate": 1.9989224915759212e-05, "loss": 0.3466, "step": 796 }, { "epoch": 0.014814924830036796, "grad_norm": 0.39866873621940613, "learning_rate": 1.9989170711371574e-05, "loss": 0.374, "step": 798 }, { "epoch": 0.014852054967455435, "grad_norm": 0.5070676207542419, "learning_rate": 1.9989116371061246e-05, "loss": 0.2803, "step": 800 }, { "epoch": 0.014889185104874074, "grad_norm": 0.3221116065979004, "learning_rate": 1.998906189482896e-05, "loss": 0.3385, "step": 802 }, { "epoch": 0.014926315242292711, "grad_norm": 0.301285982131958, "learning_rate": 1.9989007282675468e-05, "loss": 0.4443, "step": 804 }, { "epoch": 0.01496344537971135, "grad_norm": 0.9393535256385803, "learning_rate": 1.9988952534601504e-05, "loss": 0.4516, "step": 806 }, { "epoch": 0.015000575517129989, "grad_norm": 0.36717355251312256, "learning_rate": 1.998889765060782e-05, "loss": 0.3675, "step": 808 }, { "epoch": 0.015037705654548627, "grad_norm": 0.42650389671325684, "learning_rate": 1.9988842630695155e-05, "loss": 0.4048, "step": 810 }, { "epoch": 0.015074835791967266, "grad_norm": 0.36395561695098877, "learning_rate": 1.9988787474864263e-05, "loss": 0.2658, "step": 812 }, { "epoch": 0.015111965929385905, "grad_norm": 0.4178607165813446, "learning_rate": 1.9988732183115894e-05, "loss": 0.1977, "step": 814 }, { "epoch": 0.015149096066804544, "grad_norm": 0.3648502230644226, "learning_rate": 1.99886767554508e-05, "loss": 0.5499, "step": 816 }, { "epoch": 0.015186226204223182, "grad_norm": 0.4816143810749054, "learning_rate": 1.9988621191869735e-05, "loss": 0.45, "step": 818 }, { "epoch": 0.015223356341641821, "grad_norm": 0.3553163707256317, "learning_rate": 1.9988565492373454e-05, "loss": 0.3057, "step": 820 }, { "epoch": 0.015260486479060458, "grad_norm": 0.354210764169693, "learning_rate": 1.9988509656962716e-05, "loss": 0.3653, "step": 822 }, { "epoch": 0.015297616616479097, "grad_norm": 0.49755728244781494, "learning_rate": 1.998845368563828e-05, "loss": 0.3403, "step": 824 }, { "epoch": 0.015334746753897736, "grad_norm": 0.39404791593551636, "learning_rate": 1.9988397578400912e-05, "loss": 0.4618, "step": 826 }, { "epoch": 0.015371876891316374, "grad_norm": 0.2892735004425049, "learning_rate": 1.998834133525137e-05, "loss": 0.2484, "step": 828 }, { "epoch": 0.015409007028735013, "grad_norm": 0.3096066415309906, "learning_rate": 1.9988284956190418e-05, "loss": 0.3135, "step": 830 }, { "epoch": 0.015446137166153652, "grad_norm": 0.4149438738822937, "learning_rate": 1.9988228441218828e-05, "loss": 0.3309, "step": 832 }, { "epoch": 0.01548326730357229, "grad_norm": 0.2606120705604553, "learning_rate": 1.9988171790337366e-05, "loss": 0.4085, "step": 834 }, { "epoch": 0.01552039744099093, "grad_norm": 0.3199135363101959, "learning_rate": 1.9988115003546805e-05, "loss": 0.3182, "step": 836 }, { "epoch": 0.015557527578409568, "grad_norm": 0.5812762379646301, "learning_rate": 1.9988058080847917e-05, "loss": 0.5002, "step": 838 }, { "epoch": 0.015594657715828207, "grad_norm": 0.42649170756340027, "learning_rate": 1.9988001022241478e-05, "loss": 0.5042, "step": 840 }, { "epoch": 0.015631787853246846, "grad_norm": 0.409511923789978, "learning_rate": 1.9987943827728256e-05, "loss": 0.257, "step": 842 }, { "epoch": 0.015668917990665485, "grad_norm": 0.3790913224220276, "learning_rate": 1.9987886497309043e-05, "loss": 0.2362, "step": 844 }, { "epoch": 0.015706048128084123, "grad_norm": 0.4556298851966858, "learning_rate": 1.998782903098461e-05, "loss": 0.3452, "step": 846 }, { "epoch": 0.015743178265502762, "grad_norm": 0.48480024933815, "learning_rate": 1.998777142875574e-05, "loss": 0.1826, "step": 848 }, { "epoch": 0.0157803084029214, "grad_norm": 0.47314268350601196, "learning_rate": 1.998771369062322e-05, "loss": 0.4582, "step": 850 }, { "epoch": 0.015817438540340036, "grad_norm": 0.3589678704738617, "learning_rate": 1.998765581658783e-05, "loss": 0.4627, "step": 852 }, { "epoch": 0.015854568677758675, "grad_norm": 0.45375433564186096, "learning_rate": 1.998759780665036e-05, "loss": 0.4603, "step": 854 }, { "epoch": 0.015891698815177314, "grad_norm": 0.49634289741516113, "learning_rate": 1.9987539660811603e-05, "loss": 0.4034, "step": 856 }, { "epoch": 0.015928828952595953, "grad_norm": 0.45370957255363464, "learning_rate": 1.9987481379072346e-05, "loss": 0.3334, "step": 858 }, { "epoch": 0.01596595909001459, "grad_norm": 0.35922300815582275, "learning_rate": 1.9987422961433384e-05, "loss": 0.4046, "step": 860 }, { "epoch": 0.01600308922743323, "grad_norm": 0.360714852809906, "learning_rate": 1.998736440789551e-05, "loss": 0.2845, "step": 862 }, { "epoch": 0.01604021936485187, "grad_norm": 0.3543033003807068, "learning_rate": 1.9987305718459527e-05, "loss": 0.3602, "step": 864 }, { "epoch": 0.016077349502270508, "grad_norm": 0.38280782103538513, "learning_rate": 1.9987246893126224e-05, "loss": 0.2207, "step": 866 }, { "epoch": 0.016114479639689146, "grad_norm": 0.28148868680000305, "learning_rate": 1.9987187931896404e-05, "loss": 0.4137, "step": 868 }, { "epoch": 0.016151609777107785, "grad_norm": 0.40491732954978943, "learning_rate": 1.998712883477088e-05, "loss": 0.4796, "step": 870 }, { "epoch": 0.016188739914526424, "grad_norm": 0.32691875100135803, "learning_rate": 1.998706960175044e-05, "loss": 0.4217, "step": 872 }, { "epoch": 0.016225870051945063, "grad_norm": 0.4398348331451416, "learning_rate": 1.99870102328359e-05, "loss": 0.5123, "step": 874 }, { "epoch": 0.0162630001893637, "grad_norm": 0.2852839529514313, "learning_rate": 1.9986950728028064e-05, "loss": 0.479, "step": 876 }, { "epoch": 0.01630013032678234, "grad_norm": 0.3615904450416565, "learning_rate": 1.9986891087327744e-05, "loss": 0.3495, "step": 878 }, { "epoch": 0.01633726046420098, "grad_norm": 0.3513162434101105, "learning_rate": 1.998683131073575e-05, "loss": 0.3456, "step": 880 }, { "epoch": 0.016374390601619618, "grad_norm": 0.3506811857223511, "learning_rate": 1.9986771398252897e-05, "loss": 0.3244, "step": 882 }, { "epoch": 0.016411520739038257, "grad_norm": 0.3797071874141693, "learning_rate": 1.9986711349879996e-05, "loss": 0.524, "step": 884 }, { "epoch": 0.016448650876456895, "grad_norm": 0.37659594416618347, "learning_rate": 1.998665116561787e-05, "loss": 0.3361, "step": 886 }, { "epoch": 0.01648578101387553, "grad_norm": 0.36968308687210083, "learning_rate": 1.9986590845467334e-05, "loss": 0.428, "step": 888 }, { "epoch": 0.01652291115129417, "grad_norm": 0.5282501578330994, "learning_rate": 1.998653038942921e-05, "loss": 0.4258, "step": 890 }, { "epoch": 0.016560041288712808, "grad_norm": 0.346874862909317, "learning_rate": 1.998646979750432e-05, "loss": 0.4382, "step": 892 }, { "epoch": 0.016597171426131447, "grad_norm": 0.3017529249191284, "learning_rate": 1.998640906969349e-05, "loss": 0.2046, "step": 894 }, { "epoch": 0.016634301563550086, "grad_norm": 0.34062352776527405, "learning_rate": 1.9986348205997545e-05, "loss": 0.379, "step": 896 }, { "epoch": 0.016671431700968724, "grad_norm": 0.7070618271827698, "learning_rate": 1.9986287206417314e-05, "loss": 0.5265, "step": 898 }, { "epoch": 0.016708561838387363, "grad_norm": 0.308414101600647, "learning_rate": 1.998622607095363e-05, "loss": 0.3012, "step": 900 }, { "epoch": 0.016745691975806002, "grad_norm": 0.2565460205078125, "learning_rate": 1.9986164799607315e-05, "loss": 0.2275, "step": 902 }, { "epoch": 0.01678282211322464, "grad_norm": 0.41305750608444214, "learning_rate": 1.998610339237921e-05, "loss": 0.4378, "step": 904 }, { "epoch": 0.01681995225064328, "grad_norm": 0.32478028535842896, "learning_rate": 1.998604184927015e-05, "loss": 0.3894, "step": 906 }, { "epoch": 0.01685708238806192, "grad_norm": 0.3334619700908661, "learning_rate": 1.9985980170280975e-05, "loss": 0.3713, "step": 908 }, { "epoch": 0.016894212525480557, "grad_norm": 0.284366637468338, "learning_rate": 1.9985918355412518e-05, "loss": 0.3807, "step": 910 }, { "epoch": 0.016931342662899196, "grad_norm": 0.30971869826316833, "learning_rate": 1.9985856404665627e-05, "loss": 0.1992, "step": 912 }, { "epoch": 0.016968472800317835, "grad_norm": 0.36958345770835876, "learning_rate": 1.9985794318041137e-05, "loss": 0.5721, "step": 914 }, { "epoch": 0.017005602937736473, "grad_norm": 0.3124692440032959, "learning_rate": 1.9985732095539904e-05, "loss": 0.3025, "step": 916 }, { "epoch": 0.017042733075155112, "grad_norm": 0.3706187307834625, "learning_rate": 1.998566973716276e-05, "loss": 0.6818, "step": 918 }, { "epoch": 0.01707986321257375, "grad_norm": 0.35288944840431213, "learning_rate": 1.9985607242910567e-05, "loss": 0.3121, "step": 920 }, { "epoch": 0.01711699334999239, "grad_norm": 0.27718523144721985, "learning_rate": 1.998554461278417e-05, "loss": 0.3448, "step": 922 }, { "epoch": 0.01715412348741103, "grad_norm": 0.4456179440021515, "learning_rate": 1.9985481846784416e-05, "loss": 0.3475, "step": 924 }, { "epoch": 0.017191253624829664, "grad_norm": 0.4590824544429779, "learning_rate": 1.9985418944912167e-05, "loss": 0.5114, "step": 926 }, { "epoch": 0.017228383762248303, "grad_norm": 0.3306584358215332, "learning_rate": 1.9985355907168275e-05, "loss": 0.2187, "step": 928 }, { "epoch": 0.01726551389966694, "grad_norm": 0.39214852452278137, "learning_rate": 1.9985292733553603e-05, "loss": 0.3698, "step": 930 }, { "epoch": 0.01730264403708558, "grad_norm": 0.40430113673210144, "learning_rate": 1.9985229424069002e-05, "loss": 0.2494, "step": 932 }, { "epoch": 0.01733977417450422, "grad_norm": 0.3753194510936737, "learning_rate": 1.9985165978715336e-05, "loss": 0.4021, "step": 934 }, { "epoch": 0.017376904311922858, "grad_norm": 0.5208768844604492, "learning_rate": 1.9985102397493475e-05, "loss": 0.4051, "step": 936 }, { "epoch": 0.017414034449341496, "grad_norm": 0.35050153732299805, "learning_rate": 1.9985038680404277e-05, "loss": 0.3079, "step": 938 }, { "epoch": 0.017451164586760135, "grad_norm": 0.2716299891471863, "learning_rate": 1.9984974827448612e-05, "loss": 0.3982, "step": 940 }, { "epoch": 0.017488294724178774, "grad_norm": 0.30002743005752563, "learning_rate": 1.9984910838627348e-05, "loss": 0.2825, "step": 942 }, { "epoch": 0.017525424861597413, "grad_norm": 0.3770863115787506, "learning_rate": 1.9984846713941355e-05, "loss": 0.424, "step": 944 }, { "epoch": 0.01756255499901605, "grad_norm": 0.27097564935684204, "learning_rate": 1.9984782453391505e-05, "loss": 0.245, "step": 946 }, { "epoch": 0.01759968513643469, "grad_norm": 0.4151020348072052, "learning_rate": 1.9984718056978674e-05, "loss": 0.3004, "step": 948 }, { "epoch": 0.01763681527385333, "grad_norm": 0.372779905796051, "learning_rate": 1.9984653524703738e-05, "loss": 0.3734, "step": 950 }, { "epoch": 0.017673945411271968, "grad_norm": 0.3685815632343292, "learning_rate": 1.9984588856567576e-05, "loss": 0.4165, "step": 952 }, { "epoch": 0.017711075548690607, "grad_norm": 0.2559392750263214, "learning_rate": 1.998452405257107e-05, "loss": 0.2398, "step": 954 }, { "epoch": 0.017748205686109245, "grad_norm": 0.2744353711605072, "learning_rate": 1.9984459112715097e-05, "loss": 0.4297, "step": 956 }, { "epoch": 0.017785335823527884, "grad_norm": 0.31595975160598755, "learning_rate": 1.998439403700054e-05, "loss": 0.3665, "step": 958 }, { "epoch": 0.017822465960946523, "grad_norm": 0.41052547097206116, "learning_rate": 1.998432882542829e-05, "loss": 0.3007, "step": 960 }, { "epoch": 0.01785959609836516, "grad_norm": 0.28710436820983887, "learning_rate": 1.9984263477999225e-05, "loss": 0.4114, "step": 962 }, { "epoch": 0.017896726235783797, "grad_norm": 0.46606481075286865, "learning_rate": 1.9984197994714245e-05, "loss": 0.4448, "step": 964 }, { "epoch": 0.017933856373202436, "grad_norm": 0.42105162143707275, "learning_rate": 1.9984132375574238e-05, "loss": 0.2621, "step": 966 }, { "epoch": 0.017970986510621074, "grad_norm": 0.36550813913345337, "learning_rate": 1.9984066620580092e-05, "loss": 0.3482, "step": 968 }, { "epoch": 0.018008116648039713, "grad_norm": 0.4748978316783905, "learning_rate": 1.9984000729732708e-05, "loss": 0.4168, "step": 970 }, { "epoch": 0.018045246785458352, "grad_norm": 0.4448954463005066, "learning_rate": 1.9983934703032975e-05, "loss": 0.3936, "step": 972 }, { "epoch": 0.01808237692287699, "grad_norm": 0.38591641187667847, "learning_rate": 1.9983868540481803e-05, "loss": 0.4349, "step": 974 }, { "epoch": 0.01811950706029563, "grad_norm": 0.29241764545440674, "learning_rate": 1.998380224208008e-05, "loss": 0.4832, "step": 976 }, { "epoch": 0.01815663719771427, "grad_norm": 0.38096871972084045, "learning_rate": 1.9983735807828714e-05, "loss": 0.3898, "step": 978 }, { "epoch": 0.018193767335132907, "grad_norm": 0.2944658100605011, "learning_rate": 1.9983669237728607e-05, "loss": 0.3274, "step": 980 }, { "epoch": 0.018230897472551546, "grad_norm": 0.4047447443008423, "learning_rate": 1.998360253178067e-05, "loss": 0.3351, "step": 982 }, { "epoch": 0.018268027609970185, "grad_norm": 0.3475852906703949, "learning_rate": 1.9983535689985807e-05, "loss": 0.4889, "step": 984 }, { "epoch": 0.018305157747388823, "grad_norm": 0.3606351613998413, "learning_rate": 1.9983468712344926e-05, "loss": 0.256, "step": 986 }, { "epoch": 0.018342287884807462, "grad_norm": 0.35583171248435974, "learning_rate": 1.998340159885894e-05, "loss": 0.4103, "step": 988 }, { "epoch": 0.0183794180222261, "grad_norm": 0.343099981546402, "learning_rate": 1.998333434952876e-05, "loss": 0.4615, "step": 990 }, { "epoch": 0.01841654815964474, "grad_norm": 0.41156670451164246, "learning_rate": 1.9983266964355304e-05, "loss": 0.434, "step": 992 }, { "epoch": 0.01845367829706338, "grad_norm": 0.40091824531555176, "learning_rate": 1.998319944333949e-05, "loss": 0.4066, "step": 994 }, { "epoch": 0.018490808434482017, "grad_norm": 0.3511546552181244, "learning_rate": 1.998313178648223e-05, "loss": 0.3634, "step": 996 }, { "epoch": 0.018527938571900656, "grad_norm": 0.5660949945449829, "learning_rate": 1.9983063993784455e-05, "loss": 0.4431, "step": 998 }, { "epoch": 0.018565068709319295, "grad_norm": 0.4245556890964508, "learning_rate": 1.9982996065247077e-05, "loss": 0.243, "step": 1000 }, { "epoch": 0.01860219884673793, "grad_norm": 0.47335347533226013, "learning_rate": 1.998292800087103e-05, "loss": 0.3385, "step": 1002 }, { "epoch": 0.01863932898415657, "grad_norm": 0.23368427157402039, "learning_rate": 1.9982859800657232e-05, "loss": 0.2892, "step": 1004 }, { "epoch": 0.018676459121575208, "grad_norm": 0.3205662667751312, "learning_rate": 1.9982791464606614e-05, "loss": 0.4385, "step": 1006 }, { "epoch": 0.018713589258993846, "grad_norm": 0.3772878050804138, "learning_rate": 1.9982722992720108e-05, "loss": 0.2819, "step": 1008 }, { "epoch": 0.018750719396412485, "grad_norm": 0.45986175537109375, "learning_rate": 1.998265438499864e-05, "loss": 0.4632, "step": 1010 }, { "epoch": 0.018787849533831124, "grad_norm": 0.32547804713249207, "learning_rate": 1.9982585641443153e-05, "loss": 0.3966, "step": 1012 }, { "epoch": 0.018824979671249763, "grad_norm": 0.4675557315349579, "learning_rate": 1.9982516762054574e-05, "loss": 0.2226, "step": 1014 }, { "epoch": 0.0188621098086684, "grad_norm": 0.5159827470779419, "learning_rate": 1.9982447746833844e-05, "loss": 0.4455, "step": 1016 }, { "epoch": 0.01889923994608704, "grad_norm": 0.3231106400489807, "learning_rate": 1.9982378595781898e-05, "loss": 0.6755, "step": 1018 }, { "epoch": 0.01893637008350568, "grad_norm": 0.440939724445343, "learning_rate": 1.9982309308899683e-05, "loss": 0.3858, "step": 1020 }, { "epoch": 0.018973500220924318, "grad_norm": 0.3986174166202545, "learning_rate": 1.9982239886188136e-05, "loss": 0.5566, "step": 1022 }, { "epoch": 0.019010630358342957, "grad_norm": 0.431723415851593, "learning_rate": 1.998217032764821e-05, "loss": 0.4052, "step": 1024 }, { "epoch": 0.019047760495761595, "grad_norm": 0.3694363832473755, "learning_rate": 1.998210063328084e-05, "loss": 0.4682, "step": 1026 }, { "epoch": 0.019084890633180234, "grad_norm": 0.3512609004974365, "learning_rate": 1.9982030803086982e-05, "loss": 0.348, "step": 1028 }, { "epoch": 0.019122020770598873, "grad_norm": 0.31077277660369873, "learning_rate": 1.9981960837067584e-05, "loss": 0.4706, "step": 1030 }, { "epoch": 0.01915915090801751, "grad_norm": 0.3602035343647003, "learning_rate": 1.99818907352236e-05, "loss": 0.4031, "step": 1032 }, { "epoch": 0.01919628104543615, "grad_norm": 0.49307844042778015, "learning_rate": 1.998182049755598e-05, "loss": 0.3982, "step": 1034 }, { "epoch": 0.01923341118285479, "grad_norm": 0.37214940786361694, "learning_rate": 1.9981750124065684e-05, "loss": 0.3833, "step": 1036 }, { "epoch": 0.019270541320273428, "grad_norm": 0.3503119945526123, "learning_rate": 1.9981679614753665e-05, "loss": 0.3887, "step": 1038 }, { "epoch": 0.019307671457692063, "grad_norm": 0.5146250128746033, "learning_rate": 1.998160896962089e-05, "loss": 0.4304, "step": 1040 }, { "epoch": 0.019344801595110702, "grad_norm": 0.413052499294281, "learning_rate": 1.998153818866831e-05, "loss": 0.402, "step": 1042 }, { "epoch": 0.01938193173252934, "grad_norm": 0.516735315322876, "learning_rate": 1.9981467271896897e-05, "loss": 0.2478, "step": 1044 }, { "epoch": 0.01941906186994798, "grad_norm": 0.4669174551963806, "learning_rate": 1.998139621930761e-05, "loss": 0.4645, "step": 1046 }, { "epoch": 0.01945619200736662, "grad_norm": 0.3801192343235016, "learning_rate": 1.9981325030901422e-05, "loss": 0.2726, "step": 1048 }, { "epoch": 0.019493322144785257, "grad_norm": 0.344904363155365, "learning_rate": 1.9981253706679292e-05, "loss": 0.5221, "step": 1050 }, { "epoch": 0.019530452282203896, "grad_norm": 0.3043608069419861, "learning_rate": 1.99811822466422e-05, "loss": 0.5012, "step": 1052 }, { "epoch": 0.019567582419622535, "grad_norm": 0.3266139328479767, "learning_rate": 1.9981110650791116e-05, "loss": 0.539, "step": 1054 }, { "epoch": 0.019604712557041173, "grad_norm": 0.342694491147995, "learning_rate": 1.9981038919127013e-05, "loss": 0.3943, "step": 1056 }, { "epoch": 0.019641842694459812, "grad_norm": 0.28042566776275635, "learning_rate": 1.9980967051650863e-05, "loss": 0.3984, "step": 1058 }, { "epoch": 0.01967897283187845, "grad_norm": 0.5032764077186584, "learning_rate": 1.998089504836365e-05, "loss": 0.5622, "step": 1060 }, { "epoch": 0.01971610296929709, "grad_norm": 0.29308953881263733, "learning_rate": 1.9980822909266352e-05, "loss": 0.3935, "step": 1062 }, { "epoch": 0.01975323310671573, "grad_norm": 0.4919288158416748, "learning_rate": 1.998075063435995e-05, "loss": 0.3721, "step": 1064 }, { "epoch": 0.019790363244134367, "grad_norm": 0.3275351822376251, "learning_rate": 1.9980678223645426e-05, "loss": 0.371, "step": 1066 }, { "epoch": 0.019827493381553006, "grad_norm": 0.31691431999206543, "learning_rate": 1.998060567712377e-05, "loss": 0.1855, "step": 1068 }, { "epoch": 0.019864623518971645, "grad_norm": 0.8908047676086426, "learning_rate": 1.9980532994795965e-05, "loss": 0.4193, "step": 1070 }, { "epoch": 0.019901753656390284, "grad_norm": 0.2907158136367798, "learning_rate": 1.9980460176663002e-05, "loss": 0.4305, "step": 1072 }, { "epoch": 0.019938883793808922, "grad_norm": 0.34081071615219116, "learning_rate": 1.998038722272587e-05, "loss": 0.4845, "step": 1074 }, { "epoch": 0.01997601393122756, "grad_norm": 0.33504238724708557, "learning_rate": 1.9980314132985563e-05, "loss": 0.3663, "step": 1076 }, { "epoch": 0.020013144068646196, "grad_norm": 0.4377474784851074, "learning_rate": 1.9980240907443074e-05, "loss": 0.2243, "step": 1078 }, { "epoch": 0.020050274206064835, "grad_norm": 0.4400773346424103, "learning_rate": 1.99801675460994e-05, "loss": 0.3021, "step": 1080 }, { "epoch": 0.020087404343483474, "grad_norm": 0.48415157198905945, "learning_rate": 1.9980094048955542e-05, "loss": 0.6019, "step": 1082 }, { "epoch": 0.020124534480902113, "grad_norm": 0.4044879376888275, "learning_rate": 1.9980020416012497e-05, "loss": 0.4542, "step": 1084 }, { "epoch": 0.02016166461832075, "grad_norm": 0.2885062098503113, "learning_rate": 1.9979946647271266e-05, "loss": 0.3487, "step": 1086 }, { "epoch": 0.02019879475573939, "grad_norm": 0.5480518937110901, "learning_rate": 1.9979872742732856e-05, "loss": 0.3818, "step": 1088 }, { "epoch": 0.02023592489315803, "grad_norm": 0.6714348196983337, "learning_rate": 1.997979870239827e-05, "loss": 0.374, "step": 1090 }, { "epoch": 0.020273055030576668, "grad_norm": 0.4432184398174286, "learning_rate": 1.997972452626852e-05, "loss": 0.3846, "step": 1092 }, { "epoch": 0.020310185167995307, "grad_norm": 0.33412840962409973, "learning_rate": 1.997965021434461e-05, "loss": 0.295, "step": 1094 }, { "epoch": 0.020347315305413945, "grad_norm": 0.5967075824737549, "learning_rate": 1.997957576662755e-05, "loss": 0.3061, "step": 1096 }, { "epoch": 0.020384445442832584, "grad_norm": 0.43157070875167847, "learning_rate": 1.997950118311836e-05, "loss": 0.3643, "step": 1098 }, { "epoch": 0.020421575580251223, "grad_norm": 0.40424004197120667, "learning_rate": 1.997942646381805e-05, "loss": 0.3498, "step": 1100 }, { "epoch": 0.02045870571766986, "grad_norm": 0.3167525827884674, "learning_rate": 1.9979351608727637e-05, "loss": 0.3574, "step": 1102 }, { "epoch": 0.0204958358550885, "grad_norm": 0.4568251073360443, "learning_rate": 1.997927661784814e-05, "loss": 0.4542, "step": 1104 }, { "epoch": 0.02053296599250714, "grad_norm": 0.41422349214553833, "learning_rate": 1.9979201491180582e-05, "loss": 0.2961, "step": 1106 }, { "epoch": 0.020570096129925778, "grad_norm": 0.3077956736087799, "learning_rate": 1.997912622872598e-05, "loss": 0.5079, "step": 1108 }, { "epoch": 0.020607226267344417, "grad_norm": 0.40603139996528625, "learning_rate": 1.9979050830485362e-05, "loss": 0.4633, "step": 1110 }, { "epoch": 0.020644356404763056, "grad_norm": 0.32469528913497925, "learning_rate": 1.9978975296459754e-05, "loss": 0.3549, "step": 1112 }, { "epoch": 0.020681486542181694, "grad_norm": 0.4310847818851471, "learning_rate": 1.9978899626650184e-05, "loss": 0.4445, "step": 1114 }, { "epoch": 0.02071861667960033, "grad_norm": 0.35542115569114685, "learning_rate": 1.9978823821057674e-05, "loss": 0.3049, "step": 1116 }, { "epoch": 0.02075574681701897, "grad_norm": 0.3398759663105011, "learning_rate": 1.9978747879683266e-05, "loss": 0.3315, "step": 1118 }, { "epoch": 0.020792876954437607, "grad_norm": 0.2974185347557068, "learning_rate": 1.9978671802527993e-05, "loss": 0.2423, "step": 1120 }, { "epoch": 0.020830007091856246, "grad_norm": 0.39581137895584106, "learning_rate": 1.997859558959288e-05, "loss": 0.402, "step": 1122 }, { "epoch": 0.020867137229274885, "grad_norm": 0.42987826466560364, "learning_rate": 1.9978519240878973e-05, "loss": 0.3136, "step": 1124 }, { "epoch": 0.020904267366693523, "grad_norm": 0.2600070536136627, "learning_rate": 1.997844275638731e-05, "loss": 0.2822, "step": 1126 }, { "epoch": 0.020941397504112162, "grad_norm": 0.41146188974380493, "learning_rate": 1.997836613611893e-05, "loss": 0.3714, "step": 1128 }, { "epoch": 0.0209785276415308, "grad_norm": 0.31470268964767456, "learning_rate": 1.9978289380074872e-05, "loss": 0.3559, "step": 1130 }, { "epoch": 0.02101565777894944, "grad_norm": 0.33213889598846436, "learning_rate": 1.9978212488256185e-05, "loss": 0.4429, "step": 1132 }, { "epoch": 0.02105278791636808, "grad_norm": 0.33684155344963074, "learning_rate": 1.9978135460663913e-05, "loss": 0.3902, "step": 1134 }, { "epoch": 0.021089918053786717, "grad_norm": 0.4565243124961853, "learning_rate": 1.9978058297299108e-05, "loss": 0.48, "step": 1136 }, { "epoch": 0.021127048191205356, "grad_norm": 0.3734474182128906, "learning_rate": 1.9977980998162815e-05, "loss": 0.4049, "step": 1138 }, { "epoch": 0.021164178328623995, "grad_norm": 0.3784891366958618, "learning_rate": 1.9977903563256092e-05, "loss": 0.4839, "step": 1140 }, { "epoch": 0.021201308466042634, "grad_norm": 0.31104305386543274, "learning_rate": 1.9977825992579986e-05, "loss": 0.3601, "step": 1142 }, { "epoch": 0.021238438603461272, "grad_norm": 0.37195491790771484, "learning_rate": 1.9977748286135558e-05, "loss": 0.4285, "step": 1144 }, { "epoch": 0.02127556874087991, "grad_norm": 0.3654918372631073, "learning_rate": 1.997767044392386e-05, "loss": 0.4709, "step": 1146 }, { "epoch": 0.02131269887829855, "grad_norm": 0.3603534400463104, "learning_rate": 1.9977592465945955e-05, "loss": 0.4, "step": 1148 }, { "epoch": 0.02134982901571719, "grad_norm": 0.37144672870635986, "learning_rate": 1.99775143522029e-05, "loss": 0.2892, "step": 1150 }, { "epoch": 0.021386959153135827, "grad_norm": 0.34551459550857544, "learning_rate": 1.9977436102695763e-05, "loss": 0.4734, "step": 1152 }, { "epoch": 0.021424089290554463, "grad_norm": 0.3170728087425232, "learning_rate": 1.997735771742561e-05, "loss": 0.3201, "step": 1154 }, { "epoch": 0.0214612194279731, "grad_norm": 0.44156527519226074, "learning_rate": 1.9977279196393498e-05, "loss": 0.3395, "step": 1156 }, { "epoch": 0.02149834956539174, "grad_norm": 0.4250219166278839, "learning_rate": 1.9977200539600502e-05, "loss": 0.4278, "step": 1158 }, { "epoch": 0.02153547970281038, "grad_norm": 0.4389667510986328, "learning_rate": 1.9977121747047694e-05, "loss": 0.2627, "step": 1160 }, { "epoch": 0.021572609840229018, "grad_norm": 0.35008880496025085, "learning_rate": 1.9977042818736145e-05, "loss": 0.4204, "step": 1162 }, { "epoch": 0.021609739977647657, "grad_norm": 0.34787076711654663, "learning_rate": 1.9976963754666926e-05, "loss": 0.2212, "step": 1164 }, { "epoch": 0.021646870115066295, "grad_norm": 0.34879979491233826, "learning_rate": 1.9976884554841117e-05, "loss": 0.4398, "step": 1166 }, { "epoch": 0.021684000252484934, "grad_norm": 0.403594046831131, "learning_rate": 1.9976805219259793e-05, "loss": 0.5654, "step": 1168 }, { "epoch": 0.021721130389903573, "grad_norm": 0.368293434381485, "learning_rate": 1.9976725747924034e-05, "loss": 0.2842, "step": 1170 }, { "epoch": 0.02175826052732221, "grad_norm": 0.2966884970664978, "learning_rate": 1.9976646140834918e-05, "loss": 0.282, "step": 1172 }, { "epoch": 0.02179539066474085, "grad_norm": 0.40858063101768494, "learning_rate": 1.9976566397993533e-05, "loss": 0.3947, "step": 1174 }, { "epoch": 0.02183252080215949, "grad_norm": 0.3218763768672943, "learning_rate": 1.9976486519400965e-05, "loss": 0.3469, "step": 1176 }, { "epoch": 0.021869650939578128, "grad_norm": 0.36078453063964844, "learning_rate": 1.9976406505058296e-05, "loss": 0.5874, "step": 1178 }, { "epoch": 0.021906781076996767, "grad_norm": 0.3969400227069855, "learning_rate": 1.997632635496662e-05, "loss": 0.3521, "step": 1180 }, { "epoch": 0.021943911214415406, "grad_norm": 0.28152936697006226, "learning_rate": 1.9976246069127022e-05, "loss": 0.2369, "step": 1182 }, { "epoch": 0.021981041351834044, "grad_norm": 0.4404222369194031, "learning_rate": 1.99761656475406e-05, "loss": 0.3445, "step": 1184 }, { "epoch": 0.022018171489252683, "grad_norm": 0.3748471140861511, "learning_rate": 1.9976085090208442e-05, "loss": 0.5414, "step": 1186 }, { "epoch": 0.022055301626671322, "grad_norm": 0.4265405237674713, "learning_rate": 1.997600439713165e-05, "loss": 0.5902, "step": 1188 }, { "epoch": 0.02209243176408996, "grad_norm": 0.3155985474586487, "learning_rate": 1.997592356831132e-05, "loss": 0.3268, "step": 1190 }, { "epoch": 0.022129561901508596, "grad_norm": 0.3741665482521057, "learning_rate": 1.997584260374855e-05, "loss": 0.3036, "step": 1192 }, { "epoch": 0.022166692038927235, "grad_norm": 0.3819212317466736, "learning_rate": 1.9975761503444447e-05, "loss": 0.3594, "step": 1194 }, { "epoch": 0.022203822176345873, "grad_norm": 0.2932555377483368, "learning_rate": 1.9975680267400107e-05, "loss": 0.3263, "step": 1196 }, { "epoch": 0.022240952313764512, "grad_norm": 0.34051573276519775, "learning_rate": 1.997559889561664e-05, "loss": 0.5083, "step": 1198 }, { "epoch": 0.02227808245118315, "grad_norm": 0.2579888701438904, "learning_rate": 1.997551738809515e-05, "loss": 0.3431, "step": 1200 }, { "epoch": 0.02231521258860179, "grad_norm": 0.32302817702293396, "learning_rate": 1.9975435744836753e-05, "loss": 0.3618, "step": 1202 }, { "epoch": 0.02235234272602043, "grad_norm": 0.40014100074768066, "learning_rate": 1.9975353965842556e-05, "loss": 0.3728, "step": 1204 }, { "epoch": 0.022389472863439067, "grad_norm": 0.35799989104270935, "learning_rate": 1.9975272051113666e-05, "loss": 0.3406, "step": 1206 }, { "epoch": 0.022426603000857706, "grad_norm": 0.3210586905479431, "learning_rate": 1.9975190000651206e-05, "loss": 0.398, "step": 1208 }, { "epoch": 0.022463733138276345, "grad_norm": 0.44624197483062744, "learning_rate": 1.997510781445629e-05, "loss": 0.3531, "step": 1210 }, { "epoch": 0.022500863275694984, "grad_norm": 0.45051196217536926, "learning_rate": 1.9975025492530034e-05, "loss": 0.2711, "step": 1212 }, { "epoch": 0.022537993413113622, "grad_norm": 0.32351264357566833, "learning_rate": 1.997494303487356e-05, "loss": 0.4831, "step": 1214 }, { "epoch": 0.02257512355053226, "grad_norm": 0.32683709263801575, "learning_rate": 1.997486044148799e-05, "loss": 0.2123, "step": 1216 }, { "epoch": 0.0226122536879509, "grad_norm": 0.5519673824310303, "learning_rate": 1.9974777712374446e-05, "loss": 0.2138, "step": 1218 }, { "epoch": 0.02264938382536954, "grad_norm": 0.4483173191547394, "learning_rate": 1.9974694847534056e-05, "loss": 0.467, "step": 1220 }, { "epoch": 0.022686513962788177, "grad_norm": 0.36775970458984375, "learning_rate": 1.997461184696795e-05, "loss": 0.4628, "step": 1222 }, { "epoch": 0.022723644100206816, "grad_norm": 0.27962982654571533, "learning_rate": 1.997452871067725e-05, "loss": 0.3177, "step": 1224 }, { "epoch": 0.022760774237625455, "grad_norm": 0.3483109772205353, "learning_rate": 1.9974445438663096e-05, "loss": 0.3289, "step": 1226 }, { "epoch": 0.02279790437504409, "grad_norm": 0.40001213550567627, "learning_rate": 1.9974362030926612e-05, "loss": 0.335, "step": 1228 }, { "epoch": 0.02283503451246273, "grad_norm": 0.3237497806549072, "learning_rate": 1.9974278487468938e-05, "loss": 0.2585, "step": 1230 }, { "epoch": 0.022872164649881368, "grad_norm": 0.3753138780593872, "learning_rate": 1.9974194808291215e-05, "loss": 0.6566, "step": 1232 }, { "epoch": 0.022909294787300007, "grad_norm": 0.301988810300827, "learning_rate": 1.9974110993394572e-05, "loss": 0.4063, "step": 1234 }, { "epoch": 0.022946424924718645, "grad_norm": 0.28260454535484314, "learning_rate": 1.9974027042780154e-05, "loss": 0.3565, "step": 1236 }, { "epoch": 0.022983555062137284, "grad_norm": 0.5421139597892761, "learning_rate": 1.9973942956449103e-05, "loss": 0.3696, "step": 1238 }, { "epoch": 0.023020685199555923, "grad_norm": 0.4133604168891907, "learning_rate": 1.9973858734402567e-05, "loss": 0.3346, "step": 1240 }, { "epoch": 0.02305781533697456, "grad_norm": 0.33195871114730835, "learning_rate": 1.9973774376641688e-05, "loss": 0.2657, "step": 1242 }, { "epoch": 0.0230949454743932, "grad_norm": 0.31317609548568726, "learning_rate": 1.997368988316761e-05, "loss": 0.3716, "step": 1244 }, { "epoch": 0.02313207561181184, "grad_norm": 0.36527132987976074, "learning_rate": 1.9973605253981496e-05, "loss": 0.5199, "step": 1246 }, { "epoch": 0.023169205749230478, "grad_norm": 0.32158660888671875, "learning_rate": 1.9973520489084483e-05, "loss": 0.4862, "step": 1248 }, { "epoch": 0.023206335886649117, "grad_norm": 0.41991668939590454, "learning_rate": 1.9973435588477736e-05, "loss": 0.4259, "step": 1250 }, { "epoch": 0.023243466024067756, "grad_norm": 0.39704686403274536, "learning_rate": 1.99733505521624e-05, "loss": 0.3365, "step": 1252 }, { "epoch": 0.023280596161486394, "grad_norm": 0.3496972918510437, "learning_rate": 1.9973265380139633e-05, "loss": 0.3906, "step": 1254 }, { "epoch": 0.023317726298905033, "grad_norm": 0.3918420076370239, "learning_rate": 1.9973180072410603e-05, "loss": 0.3035, "step": 1256 }, { "epoch": 0.023354856436323672, "grad_norm": 0.3882104158401489, "learning_rate": 1.9973094628976465e-05, "loss": 0.4006, "step": 1258 }, { "epoch": 0.02339198657374231, "grad_norm": 0.398441344499588, "learning_rate": 1.997300904983838e-05, "loss": 0.2586, "step": 1260 }, { "epoch": 0.02342911671116095, "grad_norm": 0.8797444105148315, "learning_rate": 1.9972923334997515e-05, "loss": 0.4944, "step": 1262 }, { "epoch": 0.023466246848579588, "grad_norm": 0.3843643069267273, "learning_rate": 1.9972837484455033e-05, "loss": 0.4025, "step": 1264 }, { "epoch": 0.023503376985998223, "grad_norm": 0.51822829246521, "learning_rate": 1.997275149821211e-05, "loss": 0.3721, "step": 1266 }, { "epoch": 0.023540507123416862, "grad_norm": 0.4143613278865814, "learning_rate": 1.9972665376269908e-05, "loss": 0.3916, "step": 1268 }, { "epoch": 0.0235776372608355, "grad_norm": 0.33080142736434937, "learning_rate": 1.9972579118629605e-05, "loss": 0.3852, "step": 1270 }, { "epoch": 0.02361476739825414, "grad_norm": 0.4805343449115753, "learning_rate": 1.9972492725292368e-05, "loss": 0.2985, "step": 1272 }, { "epoch": 0.02365189753567278, "grad_norm": 0.44258785247802734, "learning_rate": 1.997240619625938e-05, "loss": 0.545, "step": 1274 }, { "epoch": 0.023689027673091417, "grad_norm": 0.3181736171245575, "learning_rate": 1.997231953153181e-05, "loss": 0.3476, "step": 1276 }, { "epoch": 0.023726157810510056, "grad_norm": 0.3980746865272522, "learning_rate": 1.9972232731110843e-05, "loss": 0.3606, "step": 1278 }, { "epoch": 0.023763287947928695, "grad_norm": 0.3911648392677307, "learning_rate": 1.9972145794997658e-05, "loss": 0.4655, "step": 1280 }, { "epoch": 0.023800418085347334, "grad_norm": 0.353172242641449, "learning_rate": 1.997205872319344e-05, "loss": 0.5421, "step": 1282 }, { "epoch": 0.023837548222765972, "grad_norm": 0.47451725602149963, "learning_rate": 1.9971971515699376e-05, "loss": 0.4842, "step": 1284 }, { "epoch": 0.02387467836018461, "grad_norm": 0.32680267095565796, "learning_rate": 1.9971884172516644e-05, "loss": 0.4906, "step": 1286 }, { "epoch": 0.02391180849760325, "grad_norm": 0.25843244791030884, "learning_rate": 1.9971796693646437e-05, "loss": 0.266, "step": 1288 }, { "epoch": 0.02394893863502189, "grad_norm": 0.40673211216926575, "learning_rate": 1.997170907908995e-05, "loss": 0.4269, "step": 1290 }, { "epoch": 0.023986068772440527, "grad_norm": 0.39124974608421326, "learning_rate": 1.997162132884837e-05, "loss": 0.4103, "step": 1292 }, { "epoch": 0.024023198909859166, "grad_norm": 0.3710108697414398, "learning_rate": 1.9971533442922893e-05, "loss": 0.219, "step": 1294 }, { "epoch": 0.024060329047277805, "grad_norm": 0.3995617628097534, "learning_rate": 1.9971445421314713e-05, "loss": 0.3433, "step": 1296 }, { "epoch": 0.024097459184696444, "grad_norm": 0.3406704068183899, "learning_rate": 1.9971357264025027e-05, "loss": 0.3873, "step": 1298 }, { "epoch": 0.024134589322115083, "grad_norm": 0.35661450028419495, "learning_rate": 1.997126897105504e-05, "loss": 0.3453, "step": 1300 }, { "epoch": 0.02417171945953372, "grad_norm": 0.8108943104743958, "learning_rate": 1.997118054240595e-05, "loss": 0.3407, "step": 1302 }, { "epoch": 0.024208849596952357, "grad_norm": 0.32169821858406067, "learning_rate": 1.9971091978078957e-05, "loss": 0.3548, "step": 1304 }, { "epoch": 0.024245979734370995, "grad_norm": 0.3153872489929199, "learning_rate": 1.9971003278075266e-05, "loss": 0.34, "step": 1306 }, { "epoch": 0.024283109871789634, "grad_norm": 0.36579766869544983, "learning_rate": 1.9970914442396093e-05, "loss": 0.1801, "step": 1308 }, { "epoch": 0.024320240009208273, "grad_norm": 0.42245689034461975, "learning_rate": 1.9970825471042635e-05, "loss": 0.3233, "step": 1310 }, { "epoch": 0.02435737014662691, "grad_norm": 0.38223937153816223, "learning_rate": 1.9970736364016116e-05, "loss": 0.2972, "step": 1312 }, { "epoch": 0.02439450028404555, "grad_norm": 0.43174970149993896, "learning_rate": 1.9970647121317737e-05, "loss": 0.4011, "step": 1314 }, { "epoch": 0.02443163042146419, "grad_norm": 0.3544157147407532, "learning_rate": 1.9970557742948716e-05, "loss": 0.337, "step": 1316 }, { "epoch": 0.024468760558882828, "grad_norm": 0.2962047755718231, "learning_rate": 1.9970468228910268e-05, "loss": 0.3936, "step": 1318 }, { "epoch": 0.024505890696301467, "grad_norm": 0.5097366571426392, "learning_rate": 1.9970378579203614e-05, "loss": 0.3446, "step": 1320 }, { "epoch": 0.024543020833720106, "grad_norm": 0.3349545896053314, "learning_rate": 1.997028879382997e-05, "loss": 0.4444, "step": 1322 }, { "epoch": 0.024580150971138744, "grad_norm": 0.2875061333179474, "learning_rate": 1.9970198872790564e-05, "loss": 0.4176, "step": 1324 }, { "epoch": 0.024617281108557383, "grad_norm": 0.3119542896747589, "learning_rate": 1.9970108816086614e-05, "loss": 0.4236, "step": 1326 }, { "epoch": 0.024654411245976022, "grad_norm": 0.33009427785873413, "learning_rate": 1.9970018623719344e-05, "loss": 0.2892, "step": 1328 }, { "epoch": 0.02469154138339466, "grad_norm": 0.34331730008125305, "learning_rate": 1.996992829568999e-05, "loss": 0.4261, "step": 1330 }, { "epoch": 0.0247286715208133, "grad_norm": 0.4544834792613983, "learning_rate": 1.9969837831999774e-05, "loss": 0.3532, "step": 1332 }, { "epoch": 0.024765801658231938, "grad_norm": 0.2871183156967163, "learning_rate": 1.9969747232649923e-05, "loss": 0.3155, "step": 1334 }, { "epoch": 0.024802931795650577, "grad_norm": 0.41414642333984375, "learning_rate": 1.9969656497641678e-05, "loss": 0.5466, "step": 1336 }, { "epoch": 0.024840061933069216, "grad_norm": 0.29097655415534973, "learning_rate": 1.996956562697627e-05, "loss": 0.4122, "step": 1338 }, { "epoch": 0.024877192070487854, "grad_norm": 0.3858906030654907, "learning_rate": 1.996947462065494e-05, "loss": 0.2427, "step": 1340 }, { "epoch": 0.02491432220790649, "grad_norm": 0.4520706236362457, "learning_rate": 1.9969383478678917e-05, "loss": 0.5169, "step": 1342 }, { "epoch": 0.02495145234532513, "grad_norm": 0.2730240225791931, "learning_rate": 1.996929220104945e-05, "loss": 0.2457, "step": 1344 }, { "epoch": 0.024988582482743767, "grad_norm": 0.4015446901321411, "learning_rate": 1.996920078776778e-05, "loss": 0.4575, "step": 1346 }, { "epoch": 0.025025712620162406, "grad_norm": 0.31149327754974365, "learning_rate": 1.9969109238835142e-05, "loss": 0.4285, "step": 1348 }, { "epoch": 0.025062842757581045, "grad_norm": 0.336830198764801, "learning_rate": 1.996901755425279e-05, "loss": 0.269, "step": 1350 }, { "epoch": 0.025099972894999684, "grad_norm": 0.3583640456199646, "learning_rate": 1.9968925734021974e-05, "loss": 0.2681, "step": 1352 }, { "epoch": 0.025137103032418322, "grad_norm": 0.4549861550331116, "learning_rate": 1.9968833778143935e-05, "loss": 0.339, "step": 1354 }, { "epoch": 0.02517423316983696, "grad_norm": 0.36295655369758606, "learning_rate": 1.996874168661993e-05, "loss": 0.3372, "step": 1356 }, { "epoch": 0.0252113633072556, "grad_norm": 0.43552401661872864, "learning_rate": 1.996864945945121e-05, "loss": 0.4609, "step": 1358 }, { "epoch": 0.02524849344467424, "grad_norm": 0.40029749274253845, "learning_rate": 1.9968557096639032e-05, "loss": 0.5002, "step": 1360 }, { "epoch": 0.025285623582092877, "grad_norm": 0.37713494896888733, "learning_rate": 1.996846459818465e-05, "loss": 0.319, "step": 1362 }, { "epoch": 0.025322753719511516, "grad_norm": 0.43770843744277954, "learning_rate": 1.9968371964089323e-05, "loss": 0.3566, "step": 1364 }, { "epoch": 0.025359883856930155, "grad_norm": 0.443180114030838, "learning_rate": 1.996827919435431e-05, "loss": 0.49, "step": 1366 }, { "epoch": 0.025397013994348794, "grad_norm": 0.3460487127304077, "learning_rate": 1.9968186288980884e-05, "loss": 0.53, "step": 1368 }, { "epoch": 0.025434144131767433, "grad_norm": 0.34329843521118164, "learning_rate": 1.996809324797029e-05, "loss": 0.4505, "step": 1370 }, { "epoch": 0.02547127426918607, "grad_norm": 0.3537254333496094, "learning_rate": 1.9968000071323816e-05, "loss": 0.5909, "step": 1372 }, { "epoch": 0.02550840440660471, "grad_norm": 0.28023451566696167, "learning_rate": 1.996790675904271e-05, "loss": 0.3167, "step": 1374 }, { "epoch": 0.02554553454402335, "grad_norm": 0.3714352250099182, "learning_rate": 1.9967813311128254e-05, "loss": 0.3826, "step": 1376 }, { "epoch": 0.025582664681441988, "grad_norm": 0.35274237394332886, "learning_rate": 1.9967719727581712e-05, "loss": 0.4447, "step": 1378 }, { "epoch": 0.025619794818860623, "grad_norm": 0.32768285274505615, "learning_rate": 1.9967626008404365e-05, "loss": 0.2885, "step": 1380 }, { "epoch": 0.02565692495627926, "grad_norm": 0.4058002531528473, "learning_rate": 1.9967532153597484e-05, "loss": 0.3783, "step": 1382 }, { "epoch": 0.0256940550936979, "grad_norm": 0.2554856836795807, "learning_rate": 1.9967438163162346e-05, "loss": 0.3025, "step": 1384 }, { "epoch": 0.02573118523111654, "grad_norm": 0.3965078592300415, "learning_rate": 1.9967344037100234e-05, "loss": 0.3931, "step": 1386 }, { "epoch": 0.025768315368535178, "grad_norm": 0.22335362434387207, "learning_rate": 1.9967249775412424e-05, "loss": 0.3573, "step": 1388 }, { "epoch": 0.025805445505953817, "grad_norm": 0.37355759739875793, "learning_rate": 1.9967155378100194e-05, "loss": 0.3631, "step": 1390 }, { "epoch": 0.025842575643372456, "grad_norm": 0.3984421491622925, "learning_rate": 1.996706084516484e-05, "loss": 0.2944, "step": 1392 }, { "epoch": 0.025879705780791094, "grad_norm": 0.34430572390556335, "learning_rate": 1.996696617660764e-05, "loss": 0.1699, "step": 1394 }, { "epoch": 0.025916835918209733, "grad_norm": 0.4926247298717499, "learning_rate": 1.9966871372429888e-05, "loss": 0.3823, "step": 1396 }, { "epoch": 0.025953966055628372, "grad_norm": 0.2837347090244293, "learning_rate": 1.996677643263287e-05, "loss": 0.3125, "step": 1398 }, { "epoch": 0.02599109619304701, "grad_norm": 0.43463966250419617, "learning_rate": 1.9966681357217878e-05, "loss": 0.2929, "step": 1400 }, { "epoch": 0.02602822633046565, "grad_norm": 0.2875401973724365, "learning_rate": 1.9966586146186206e-05, "loss": 0.3651, "step": 1402 }, { "epoch": 0.026065356467884288, "grad_norm": 0.4797331392765045, "learning_rate": 1.9966490799539148e-05, "loss": 0.4355, "step": 1404 }, { "epoch": 0.026102486605302927, "grad_norm": 0.3717973828315735, "learning_rate": 1.9966395317278005e-05, "loss": 0.5791, "step": 1406 }, { "epoch": 0.026139616742721566, "grad_norm": 0.458346962928772, "learning_rate": 1.9966299699404076e-05, "loss": 0.4856, "step": 1408 }, { "epoch": 0.026176746880140205, "grad_norm": 0.20421335101127625, "learning_rate": 1.996620394591866e-05, "loss": 0.3512, "step": 1410 }, { "epoch": 0.026213877017558843, "grad_norm": 0.6964619159698486, "learning_rate": 1.996610805682306e-05, "loss": 0.3468, "step": 1412 }, { "epoch": 0.026251007154977482, "grad_norm": 0.42472800612449646, "learning_rate": 1.9966012032118582e-05, "loss": 0.4975, "step": 1414 }, { "epoch": 0.02628813729239612, "grad_norm": 0.4406464993953705, "learning_rate": 1.9965915871806532e-05, "loss": 0.4985, "step": 1416 }, { "epoch": 0.026325267429814756, "grad_norm": 0.23722697794437408, "learning_rate": 1.9965819575888217e-05, "loss": 0.2273, "step": 1418 }, { "epoch": 0.026362397567233395, "grad_norm": 0.29463279247283936, "learning_rate": 1.996572314436495e-05, "loss": 0.4058, "step": 1420 }, { "epoch": 0.026399527704652034, "grad_norm": 0.32248571515083313, "learning_rate": 1.996562657723804e-05, "loss": 0.4943, "step": 1422 }, { "epoch": 0.026436657842070672, "grad_norm": 0.2820965647697449, "learning_rate": 1.9965529874508805e-05, "loss": 0.5057, "step": 1424 }, { "epoch": 0.02647378797948931, "grad_norm": 0.31063172221183777, "learning_rate": 1.9965433036178557e-05, "loss": 0.207, "step": 1426 }, { "epoch": 0.02651091811690795, "grad_norm": 0.3696473240852356, "learning_rate": 1.9965336062248616e-05, "loss": 0.2089, "step": 1428 }, { "epoch": 0.02654804825432659, "grad_norm": 0.40383201837539673, "learning_rate": 1.99652389527203e-05, "loss": 0.3744, "step": 1430 }, { "epoch": 0.026585178391745228, "grad_norm": 0.4137421250343323, "learning_rate": 1.9965141707594934e-05, "loss": 0.337, "step": 1432 }, { "epoch": 0.026622308529163866, "grad_norm": 0.3258547782897949, "learning_rate": 1.9965044326873833e-05, "loss": 0.4078, "step": 1434 }, { "epoch": 0.026659438666582505, "grad_norm": 0.24489538371562958, "learning_rate": 1.996494681055833e-05, "loss": 0.4465, "step": 1436 }, { "epoch": 0.026696568804001144, "grad_norm": 0.45460736751556396, "learning_rate": 1.9964849158649752e-05, "loss": 0.385, "step": 1438 }, { "epoch": 0.026733698941419783, "grad_norm": 0.2975912094116211, "learning_rate": 1.9964751371149426e-05, "loss": 0.359, "step": 1440 }, { "epoch": 0.02677082907883842, "grad_norm": 0.33785927295684814, "learning_rate": 1.9964653448058678e-05, "loss": 0.4944, "step": 1442 }, { "epoch": 0.02680795921625706, "grad_norm": 0.331898957490921, "learning_rate": 1.9964555389378842e-05, "loss": 0.4063, "step": 1444 }, { "epoch": 0.0268450893536757, "grad_norm": 0.45948436856269836, "learning_rate": 1.996445719511126e-05, "loss": 0.3422, "step": 1446 }, { "epoch": 0.026882219491094338, "grad_norm": 0.44890448451042175, "learning_rate": 1.9964358865257257e-05, "loss": 0.3628, "step": 1448 }, { "epoch": 0.026919349628512976, "grad_norm": 0.322969913482666, "learning_rate": 1.996426039981818e-05, "loss": 0.3265, "step": 1450 }, { "epoch": 0.026956479765931615, "grad_norm": 0.34472888708114624, "learning_rate": 1.9964161798795367e-05, "loss": 0.2761, "step": 1452 }, { "epoch": 0.026993609903350254, "grad_norm": 0.6000632643699646, "learning_rate": 1.9964063062190155e-05, "loss": 0.2997, "step": 1454 }, { "epoch": 0.02703074004076889, "grad_norm": 0.28989025950431824, "learning_rate": 1.9963964190003892e-05, "loss": 0.3571, "step": 1456 }, { "epoch": 0.027067870178187528, "grad_norm": 0.4636005759239197, "learning_rate": 1.9963865182237924e-05, "loss": 0.2713, "step": 1458 }, { "epoch": 0.027105000315606167, "grad_norm": 0.28512948751449585, "learning_rate": 1.9963766038893592e-05, "loss": 0.3763, "step": 1460 }, { "epoch": 0.027142130453024806, "grad_norm": 0.3537326157093048, "learning_rate": 1.9963666759972252e-05, "loss": 0.339, "step": 1462 }, { "epoch": 0.027179260590443444, "grad_norm": 0.7664099931716919, "learning_rate": 1.996356734547525e-05, "loss": 0.5019, "step": 1464 }, { "epoch": 0.027216390727862083, "grad_norm": 0.323015958070755, "learning_rate": 1.996346779540394e-05, "loss": 0.4131, "step": 1466 }, { "epoch": 0.027253520865280722, "grad_norm": 0.2701569199562073, "learning_rate": 1.9963368109759683e-05, "loss": 0.2272, "step": 1468 }, { "epoch": 0.02729065100269936, "grad_norm": 0.46091488003730774, "learning_rate": 1.9963268288543825e-05, "loss": 0.3593, "step": 1470 }, { "epoch": 0.027327781140118, "grad_norm": 0.500520646572113, "learning_rate": 1.9963168331757728e-05, "loss": 0.4335, "step": 1472 }, { "epoch": 0.027364911277536638, "grad_norm": 0.29205963015556335, "learning_rate": 1.9963068239402754e-05, "loss": 0.1835, "step": 1474 }, { "epoch": 0.027402041414955277, "grad_norm": 0.39108943939208984, "learning_rate": 1.9962968011480265e-05, "loss": 0.3862, "step": 1476 }, { "epoch": 0.027439171552373916, "grad_norm": 0.3224082887172699, "learning_rate": 1.9962867647991624e-05, "loss": 0.2572, "step": 1478 }, { "epoch": 0.027476301689792555, "grad_norm": 0.3712817430496216, "learning_rate": 1.99627671489382e-05, "loss": 0.2628, "step": 1480 }, { "epoch": 0.027513431827211193, "grad_norm": 0.3820110559463501, "learning_rate": 1.996266651432135e-05, "loss": 0.3577, "step": 1482 }, { "epoch": 0.027550561964629832, "grad_norm": 0.29978644847869873, "learning_rate": 1.9962565744142454e-05, "loss": 0.3791, "step": 1484 }, { "epoch": 0.02758769210204847, "grad_norm": 0.3531618118286133, "learning_rate": 1.9962464838402883e-05, "loss": 0.2792, "step": 1486 }, { "epoch": 0.02762482223946711, "grad_norm": 0.28927019238471985, "learning_rate": 1.9962363797104e-05, "loss": 0.4817, "step": 1488 }, { "epoch": 0.02766195237688575, "grad_norm": 0.39351004362106323, "learning_rate": 1.996226262024719e-05, "loss": 0.3973, "step": 1490 }, { "epoch": 0.027699082514304387, "grad_norm": 0.3068031072616577, "learning_rate": 1.9962161307833826e-05, "loss": 0.6361, "step": 1492 }, { "epoch": 0.027736212651723022, "grad_norm": 0.2599097788333893, "learning_rate": 1.9962059859865287e-05, "loss": 0.4596, "step": 1494 }, { "epoch": 0.02777334278914166, "grad_norm": 0.34776872396469116, "learning_rate": 1.9961958276342952e-05, "loss": 0.4534, "step": 1496 }, { "epoch": 0.0278104729265603, "grad_norm": 0.29954656958580017, "learning_rate": 1.9961856557268206e-05, "loss": 0.3703, "step": 1498 }, { "epoch": 0.02784760306397894, "grad_norm": 0.27080005407333374, "learning_rate": 1.996175470264243e-05, "loss": 0.2651, "step": 1500 }, { "epoch": 0.027884733201397578, "grad_norm": 0.29802244901657104, "learning_rate": 1.9961652712467013e-05, "loss": 0.5031, "step": 1502 }, { "epoch": 0.027921863338816216, "grad_norm": 0.28868886828422546, "learning_rate": 1.996155058674334e-05, "loss": 0.4347, "step": 1504 }, { "epoch": 0.027958993476234855, "grad_norm": 0.31217366456985474, "learning_rate": 1.99614483254728e-05, "loss": 0.2961, "step": 1506 }, { "epoch": 0.027996123613653494, "grad_norm": 0.37338700890541077, "learning_rate": 1.996134592865679e-05, "loss": 0.4172, "step": 1508 }, { "epoch": 0.028033253751072133, "grad_norm": 0.371517151594162, "learning_rate": 1.9961243396296696e-05, "loss": 0.4035, "step": 1510 }, { "epoch": 0.02807038388849077, "grad_norm": 0.39833423495292664, "learning_rate": 1.996114072839392e-05, "loss": 0.2867, "step": 1512 }, { "epoch": 0.02810751402590941, "grad_norm": 0.37536507844924927, "learning_rate": 1.9961037924949853e-05, "loss": 0.303, "step": 1514 }, { "epoch": 0.02814464416332805, "grad_norm": 0.38057741522789, "learning_rate": 1.9960934985965897e-05, "loss": 0.3201, "step": 1516 }, { "epoch": 0.028181774300746688, "grad_norm": 0.3367200493812561, "learning_rate": 1.9960831911443455e-05, "loss": 0.2645, "step": 1518 }, { "epoch": 0.028218904438165326, "grad_norm": 0.3803890645503998, "learning_rate": 1.9960728701383924e-05, "loss": 0.3295, "step": 1520 }, { "epoch": 0.028256034575583965, "grad_norm": 0.3274245858192444, "learning_rate": 1.9960625355788714e-05, "loss": 0.3953, "step": 1522 }, { "epoch": 0.028293164713002604, "grad_norm": 0.5688687562942505, "learning_rate": 1.9960521874659226e-05, "loss": 0.4073, "step": 1524 }, { "epoch": 0.028330294850421243, "grad_norm": 0.4595238268375397, "learning_rate": 1.996041825799687e-05, "loss": 0.5574, "step": 1526 }, { "epoch": 0.02836742498783988, "grad_norm": 0.5020874738693237, "learning_rate": 1.9960314505803056e-05, "loss": 0.366, "step": 1528 }, { "epoch": 0.02840455512525852, "grad_norm": 0.39065760374069214, "learning_rate": 1.99602106180792e-05, "loss": 0.3016, "step": 1530 }, { "epoch": 0.028441685262677156, "grad_norm": 0.29245397448539734, "learning_rate": 1.996010659482671e-05, "loss": 0.2864, "step": 1532 }, { "epoch": 0.028478815400095794, "grad_norm": 0.41922682523727417, "learning_rate": 1.9960002436047002e-05, "loss": 0.4411, "step": 1534 }, { "epoch": 0.028515945537514433, "grad_norm": 0.3665362000465393, "learning_rate": 1.9959898141741494e-05, "loss": 0.2283, "step": 1536 }, { "epoch": 0.028553075674933072, "grad_norm": 0.3724018931388855, "learning_rate": 1.9959793711911608e-05, "loss": 0.4392, "step": 1538 }, { "epoch": 0.02859020581235171, "grad_norm": 0.409440279006958, "learning_rate": 1.995968914655876e-05, "loss": 0.3358, "step": 1540 }, { "epoch": 0.02862733594977035, "grad_norm": 0.512639045715332, "learning_rate": 1.9959584445684377e-05, "loss": 0.3464, "step": 1542 }, { "epoch": 0.028664466087188988, "grad_norm": 0.3631186783313751, "learning_rate": 1.995947960928988e-05, "loss": 0.436, "step": 1544 }, { "epoch": 0.028701596224607627, "grad_norm": 0.3963833749294281, "learning_rate": 1.9959374637376704e-05, "loss": 0.42, "step": 1546 }, { "epoch": 0.028738726362026266, "grad_norm": 0.27118197083473206, "learning_rate": 1.9959269529946264e-05, "loss": 0.42, "step": 1548 }, { "epoch": 0.028775856499444905, "grad_norm": 0.3797779977321625, "learning_rate": 1.9959164287e-05, "loss": 0.4648, "step": 1550 }, { "epoch": 0.028812986636863543, "grad_norm": 0.37404870986938477, "learning_rate": 1.9959058908539342e-05, "loss": 0.3835, "step": 1552 }, { "epoch": 0.028850116774282182, "grad_norm": 0.27369093894958496, "learning_rate": 1.9958953394565722e-05, "loss": 0.4301, "step": 1554 }, { "epoch": 0.02888724691170082, "grad_norm": 0.37033283710479736, "learning_rate": 1.9958847745080577e-05, "loss": 0.3592, "step": 1556 }, { "epoch": 0.02892437704911946, "grad_norm": 0.3935405910015106, "learning_rate": 1.9958741960085345e-05, "loss": 0.2618, "step": 1558 }, { "epoch": 0.0289615071865381, "grad_norm": 0.2885076701641083, "learning_rate": 1.9958636039581464e-05, "loss": 0.4715, "step": 1560 }, { "epoch": 0.028998637323956737, "grad_norm": 0.24158255755901337, "learning_rate": 1.9958529983570374e-05, "loss": 0.318, "step": 1562 }, { "epoch": 0.029035767461375376, "grad_norm": 0.2989046573638916, "learning_rate": 1.9958423792053524e-05, "loss": 0.4084, "step": 1564 }, { "epoch": 0.029072897598794015, "grad_norm": 0.33272868394851685, "learning_rate": 1.9958317465032352e-05, "loss": 0.4199, "step": 1566 }, { "epoch": 0.02911002773621265, "grad_norm": 0.4965613782405853, "learning_rate": 1.995821100250831e-05, "loss": 0.4254, "step": 1568 }, { "epoch": 0.02914715787363129, "grad_norm": 0.37236645817756653, "learning_rate": 1.9958104404482843e-05, "loss": 0.3838, "step": 1570 }, { "epoch": 0.029184288011049928, "grad_norm": 0.3216698467731476, "learning_rate": 1.9957997670957403e-05, "loss": 0.4545, "step": 1572 }, { "epoch": 0.029221418148468566, "grad_norm": 0.25853726267814636, "learning_rate": 1.9957890801933444e-05, "loss": 0.3244, "step": 1574 }, { "epoch": 0.029258548285887205, "grad_norm": 0.39867860078811646, "learning_rate": 1.9957783797412416e-05, "loss": 0.4424, "step": 1576 }, { "epoch": 0.029295678423305844, "grad_norm": 0.3040996789932251, "learning_rate": 1.995767665739578e-05, "loss": 0.4894, "step": 1578 }, { "epoch": 0.029332808560724483, "grad_norm": 0.3603006601333618, "learning_rate": 1.9957569381884993e-05, "loss": 0.3921, "step": 1580 }, { "epoch": 0.02936993869814312, "grad_norm": 0.40616369247436523, "learning_rate": 1.995746197088151e-05, "loss": 0.5022, "step": 1582 }, { "epoch": 0.02940706883556176, "grad_norm": 0.28409871459007263, "learning_rate": 1.9957354424386797e-05, "loss": 0.5682, "step": 1584 }, { "epoch": 0.0294441989729804, "grad_norm": 0.42425426840782166, "learning_rate": 1.9957246742402313e-05, "loss": 0.4215, "step": 1586 }, { "epoch": 0.029481329110399038, "grad_norm": 0.4165717661380768, "learning_rate": 1.9957138924929532e-05, "loss": 0.5334, "step": 1588 }, { "epoch": 0.029518459247817676, "grad_norm": 0.33867260813713074, "learning_rate": 1.995703097196991e-05, "loss": 0.4195, "step": 1590 }, { "epoch": 0.029555589385236315, "grad_norm": 0.4551469385623932, "learning_rate": 1.9956922883524925e-05, "loss": 0.3001, "step": 1592 }, { "epoch": 0.029592719522654954, "grad_norm": 0.5343561768531799, "learning_rate": 1.9956814659596042e-05, "loss": 0.3407, "step": 1594 }, { "epoch": 0.029629849660073593, "grad_norm": 0.4037441313266754, "learning_rate": 1.9956706300184737e-05, "loss": 0.4728, "step": 1596 }, { "epoch": 0.02966697979749223, "grad_norm": 0.378182053565979, "learning_rate": 1.9956597805292478e-05, "loss": 0.3131, "step": 1598 }, { "epoch": 0.02970410993491087, "grad_norm": 0.35998398065567017, "learning_rate": 1.995648917492075e-05, "loss": 0.4301, "step": 1600 }, { "epoch": 0.02974124007232951, "grad_norm": 0.4198257029056549, "learning_rate": 1.995638040907103e-05, "loss": 0.3985, "step": 1602 }, { "epoch": 0.029778370209748148, "grad_norm": 0.4329060912132263, "learning_rate": 1.9956271507744794e-05, "loss": 0.5794, "step": 1604 }, { "epoch": 0.029815500347166783, "grad_norm": 0.5906333923339844, "learning_rate": 1.9956162470943522e-05, "loss": 0.3322, "step": 1606 }, { "epoch": 0.029852630484585422, "grad_norm": 0.3923175632953644, "learning_rate": 1.9956053298668705e-05, "loss": 0.5322, "step": 1608 }, { "epoch": 0.02988976062200406, "grad_norm": 0.3679203391075134, "learning_rate": 1.9955943990921823e-05, "loss": 0.3602, "step": 1610 }, { "epoch": 0.0299268907594227, "grad_norm": 0.4073547124862671, "learning_rate": 1.9955834547704364e-05, "loss": 0.3927, "step": 1612 }, { "epoch": 0.029964020896841338, "grad_norm": 0.458255797624588, "learning_rate": 1.995572496901782e-05, "loss": 0.4757, "step": 1614 }, { "epoch": 0.030001151034259977, "grad_norm": 0.29495638608932495, "learning_rate": 1.995561525486368e-05, "loss": 0.4078, "step": 1616 }, { "epoch": 0.030038281171678616, "grad_norm": 0.2971830666065216, "learning_rate": 1.9955505405243435e-05, "loss": 0.2955, "step": 1618 }, { "epoch": 0.030075411309097255, "grad_norm": 0.354912668466568, "learning_rate": 1.995539542015858e-05, "loss": 0.4524, "step": 1620 }, { "epoch": 0.030112541446515893, "grad_norm": 0.35809728503227234, "learning_rate": 1.9955285299610615e-05, "loss": 0.3758, "step": 1622 }, { "epoch": 0.030149671583934532, "grad_norm": 0.43015095591545105, "learning_rate": 1.9955175043601034e-05, "loss": 0.5977, "step": 1624 }, { "epoch": 0.03018680172135317, "grad_norm": 0.40109366178512573, "learning_rate": 1.9955064652131347e-05, "loss": 0.4039, "step": 1626 }, { "epoch": 0.03022393185877181, "grad_norm": 0.4162982702255249, "learning_rate": 1.9954954125203042e-05, "loss": 0.515, "step": 1628 }, { "epoch": 0.03026106199619045, "grad_norm": 0.4334047734737396, "learning_rate": 1.9954843462817632e-05, "loss": 0.2991, "step": 1630 }, { "epoch": 0.030298192133609087, "grad_norm": 0.35421228408813477, "learning_rate": 1.9954732664976624e-05, "loss": 0.4193, "step": 1632 }, { "epoch": 0.030335322271027726, "grad_norm": 0.37089452147483826, "learning_rate": 1.9954621731681518e-05, "loss": 0.2015, "step": 1634 }, { "epoch": 0.030372452408446365, "grad_norm": 0.2774275541305542, "learning_rate": 1.995451066293383e-05, "loss": 0.3854, "step": 1636 }, { "epoch": 0.030409582545865003, "grad_norm": 0.5215280652046204, "learning_rate": 1.9954399458735067e-05, "loss": 0.3936, "step": 1638 }, { "epoch": 0.030446712683283642, "grad_norm": 0.34046199917793274, "learning_rate": 1.9954288119086748e-05, "loss": 0.5031, "step": 1640 }, { "epoch": 0.03048384282070228, "grad_norm": 0.36269626021385193, "learning_rate": 1.995417664399038e-05, "loss": 0.3754, "step": 1642 }, { "epoch": 0.030520972958120916, "grad_norm": 0.2609393298625946, "learning_rate": 1.9954065033447488e-05, "loss": 0.3487, "step": 1644 }, { "epoch": 0.030558103095539555, "grad_norm": 0.2629873752593994, "learning_rate": 1.9953953287459584e-05, "loss": 0.5352, "step": 1646 }, { "epoch": 0.030595233232958194, "grad_norm": 0.33361902832984924, "learning_rate": 1.9953841406028192e-05, "loss": 0.2466, "step": 1648 }, { "epoch": 0.030632363370376833, "grad_norm": 0.3738601505756378, "learning_rate": 1.9953729389154835e-05, "loss": 0.3589, "step": 1650 }, { "epoch": 0.03066949350779547, "grad_norm": 0.32117071747779846, "learning_rate": 1.9953617236841035e-05, "loss": 0.254, "step": 1652 }, { "epoch": 0.03070662364521411, "grad_norm": 0.32607585191726685, "learning_rate": 1.9953504949088317e-05, "loss": 0.2791, "step": 1654 }, { "epoch": 0.03074375378263275, "grad_norm": 0.2379506230354309, "learning_rate": 1.9953392525898213e-05, "loss": 0.2839, "step": 1656 }, { "epoch": 0.030780883920051388, "grad_norm": 0.5135214924812317, "learning_rate": 1.995327996727225e-05, "loss": 0.2939, "step": 1658 }, { "epoch": 0.030818014057470026, "grad_norm": 0.43111538887023926, "learning_rate": 1.9953167273211958e-05, "loss": 0.3749, "step": 1660 }, { "epoch": 0.030855144194888665, "grad_norm": 0.41557297110557556, "learning_rate": 1.9953054443718875e-05, "loss": 0.3819, "step": 1662 }, { "epoch": 0.030892274332307304, "grad_norm": 0.30154499411582947, "learning_rate": 1.9952941478794534e-05, "loss": 0.2877, "step": 1664 }, { "epoch": 0.030929404469725943, "grad_norm": 0.37477779388427734, "learning_rate": 1.9952828378440473e-05, "loss": 0.2703, "step": 1666 }, { "epoch": 0.03096653460714458, "grad_norm": 0.35994696617126465, "learning_rate": 1.9952715142658228e-05, "loss": 0.3038, "step": 1668 }, { "epoch": 0.03100366474456322, "grad_norm": 0.374391108751297, "learning_rate": 1.995260177144934e-05, "loss": 0.2479, "step": 1670 }, { "epoch": 0.03104079488198186, "grad_norm": 0.33741456270217896, "learning_rate": 1.9952488264815357e-05, "loss": 0.2619, "step": 1672 }, { "epoch": 0.031077925019400498, "grad_norm": 0.29656046628952026, "learning_rate": 1.9952374622757818e-05, "loss": 0.2605, "step": 1674 }, { "epoch": 0.031115055156819137, "grad_norm": 0.2902997136116028, "learning_rate": 1.9952260845278274e-05, "loss": 0.1991, "step": 1676 }, { "epoch": 0.031152185294237775, "grad_norm": 0.2407100647687912, "learning_rate": 1.9952146932378267e-05, "loss": 0.2439, "step": 1678 }, { "epoch": 0.031189315431656414, "grad_norm": 0.40434888005256653, "learning_rate": 1.995203288405935e-05, "loss": 0.3774, "step": 1680 }, { "epoch": 0.03122644556907505, "grad_norm": 0.358460932970047, "learning_rate": 1.9951918700323077e-05, "loss": 0.4143, "step": 1682 }, { "epoch": 0.03126357570649369, "grad_norm": 0.33627939224243164, "learning_rate": 1.9951804381171e-05, "loss": 0.3964, "step": 1684 }, { "epoch": 0.03130070584391233, "grad_norm": 0.26235583424568176, "learning_rate": 1.9951689926604673e-05, "loss": 0.3224, "step": 1686 }, { "epoch": 0.03133783598133097, "grad_norm": 0.4374541640281677, "learning_rate": 1.9951575336625658e-05, "loss": 0.4657, "step": 1688 }, { "epoch": 0.031374966118749605, "grad_norm": 0.4052223563194275, "learning_rate": 1.995146061123551e-05, "loss": 0.2915, "step": 1690 }, { "epoch": 0.03141209625616825, "grad_norm": 0.4146050810813904, "learning_rate": 1.995134575043579e-05, "loss": 0.3734, "step": 1692 }, { "epoch": 0.03144922639358688, "grad_norm": 0.32710057497024536, "learning_rate": 1.9951230754228062e-05, "loss": 0.3419, "step": 1694 }, { "epoch": 0.031486356531005524, "grad_norm": 0.36174726486206055, "learning_rate": 1.995111562261389e-05, "loss": 0.4012, "step": 1696 }, { "epoch": 0.03152348666842416, "grad_norm": 0.3643866777420044, "learning_rate": 1.9951000355594842e-05, "loss": 0.3863, "step": 1698 }, { "epoch": 0.0315606168058428, "grad_norm": 0.35113590955734253, "learning_rate": 1.9950884953172486e-05, "loss": 0.3159, "step": 1700 }, { "epoch": 0.03159774694326144, "grad_norm": 0.5454980134963989, "learning_rate": 1.9950769415348393e-05, "loss": 0.5577, "step": 1702 }, { "epoch": 0.03163487708068007, "grad_norm": 0.33126652240753174, "learning_rate": 1.9950653742124136e-05, "loss": 0.3469, "step": 1704 }, { "epoch": 0.031672007218098715, "grad_norm": 0.3847205936908722, "learning_rate": 1.9950537933501285e-05, "loss": 0.3718, "step": 1706 }, { "epoch": 0.03170913735551735, "grad_norm": 0.2673433721065521, "learning_rate": 1.9950421989481418e-05, "loss": 0.3175, "step": 1708 }, { "epoch": 0.03174626749293599, "grad_norm": 0.26624780893325806, "learning_rate": 1.995030591006611e-05, "loss": 0.4444, "step": 1710 }, { "epoch": 0.03178339763035463, "grad_norm": 0.3721410632133484, "learning_rate": 1.9950189695256946e-05, "loss": 0.3837, "step": 1712 }, { "epoch": 0.03182052776777327, "grad_norm": 0.40957459807395935, "learning_rate": 1.9950073345055507e-05, "loss": 0.3521, "step": 1714 }, { "epoch": 0.031857657905191905, "grad_norm": 0.2849561870098114, "learning_rate": 1.994995685946337e-05, "loss": 0.2899, "step": 1716 }, { "epoch": 0.03189478804261055, "grad_norm": 0.557837188243866, "learning_rate": 1.9949840238482126e-05, "loss": 0.4348, "step": 1718 }, { "epoch": 0.03193191818002918, "grad_norm": 0.4172285497188568, "learning_rate": 1.9949723482113355e-05, "loss": 0.2374, "step": 1720 }, { "epoch": 0.031969048317447825, "grad_norm": 0.22838103771209717, "learning_rate": 1.9949606590358657e-05, "loss": 0.2863, "step": 1722 }, { "epoch": 0.03200617845486646, "grad_norm": 0.367658793926239, "learning_rate": 1.9949489563219612e-05, "loss": 0.2889, "step": 1724 }, { "epoch": 0.0320433085922851, "grad_norm": 0.427361398935318, "learning_rate": 1.994937240069782e-05, "loss": 0.2958, "step": 1726 }, { "epoch": 0.03208043872970374, "grad_norm": 0.33430930972099304, "learning_rate": 1.9949255102794867e-05, "loss": 0.3483, "step": 1728 }, { "epoch": 0.03211756886712238, "grad_norm": 0.36812496185302734, "learning_rate": 1.9949137669512354e-05, "loss": 0.422, "step": 1730 }, { "epoch": 0.032154699004541015, "grad_norm": 0.24867336452007294, "learning_rate": 1.9949020100851876e-05, "loss": 0.2527, "step": 1732 }, { "epoch": 0.03219182914195966, "grad_norm": 0.5246883034706116, "learning_rate": 1.994890239681504e-05, "loss": 0.5334, "step": 1734 }, { "epoch": 0.03222895927937829, "grad_norm": 0.30807244777679443, "learning_rate": 1.9948784557403442e-05, "loss": 0.3576, "step": 1736 }, { "epoch": 0.03226608941679693, "grad_norm": 0.35629957914352417, "learning_rate": 1.9948666582618685e-05, "loss": 0.2987, "step": 1738 }, { "epoch": 0.03230321955421557, "grad_norm": 0.3506420850753784, "learning_rate": 1.9948548472462376e-05, "loss": 0.1867, "step": 1740 }, { "epoch": 0.032340349691634206, "grad_norm": 0.3471173644065857, "learning_rate": 1.994843022693612e-05, "loss": 0.4119, "step": 1742 }, { "epoch": 0.03237747982905285, "grad_norm": 0.3845759928226471, "learning_rate": 1.994831184604153e-05, "loss": 0.3232, "step": 1744 }, { "epoch": 0.03241460996647148, "grad_norm": 0.25755491852760315, "learning_rate": 1.9948193329780214e-05, "loss": 0.2737, "step": 1746 }, { "epoch": 0.032451740103890125, "grad_norm": 0.3594185411930084, "learning_rate": 1.9948074678153783e-05, "loss": 0.334, "step": 1748 }, { "epoch": 0.03248887024130876, "grad_norm": 0.4289058744907379, "learning_rate": 1.9947955891163858e-05, "loss": 0.3937, "step": 1750 }, { "epoch": 0.0325260003787274, "grad_norm": 0.3494783341884613, "learning_rate": 1.9947836968812047e-05, "loss": 0.3211, "step": 1752 }, { "epoch": 0.03256313051614604, "grad_norm": 0.3607964515686035, "learning_rate": 1.9947717911099973e-05, "loss": 0.2496, "step": 1754 }, { "epoch": 0.03260026065356468, "grad_norm": 0.36975347995758057, "learning_rate": 1.9947598718029257e-05, "loss": 0.4069, "step": 1756 }, { "epoch": 0.032637390790983316, "grad_norm": 0.28233110904693604, "learning_rate": 1.9947479389601516e-05, "loss": 0.3403, "step": 1758 }, { "epoch": 0.03267452092840196, "grad_norm": 0.36703306436538696, "learning_rate": 1.9947359925818378e-05, "loss": 0.293, "step": 1760 }, { "epoch": 0.03271165106582059, "grad_norm": 0.3206377923488617, "learning_rate": 1.994724032668147e-05, "loss": 0.3352, "step": 1762 }, { "epoch": 0.032748781203239236, "grad_norm": 0.2851402163505554, "learning_rate": 1.994712059219241e-05, "loss": 0.5385, "step": 1764 }, { "epoch": 0.03278591134065787, "grad_norm": 0.43175849318504333, "learning_rate": 1.994700072235284e-05, "loss": 0.3726, "step": 1766 }, { "epoch": 0.03282304147807651, "grad_norm": 0.43918466567993164, "learning_rate": 1.994688071716438e-05, "loss": 0.5355, "step": 1768 }, { "epoch": 0.03286017161549515, "grad_norm": 0.5312216877937317, "learning_rate": 1.994676057662867e-05, "loss": 0.4615, "step": 1770 }, { "epoch": 0.03289730175291379, "grad_norm": 0.514531672000885, "learning_rate": 1.994664030074734e-05, "loss": 0.3627, "step": 1772 }, { "epoch": 0.032934431890332426, "grad_norm": 0.3006580173969269, "learning_rate": 1.9946519889522036e-05, "loss": 0.3508, "step": 1774 }, { "epoch": 0.03297156202775106, "grad_norm": 0.44068196415901184, "learning_rate": 1.9946399342954384e-05, "loss": 0.5224, "step": 1776 }, { "epoch": 0.033008692165169704, "grad_norm": 0.4029116928577423, "learning_rate": 1.9946278661046032e-05, "loss": 0.2598, "step": 1778 }, { "epoch": 0.03304582230258834, "grad_norm": 0.2721407413482666, "learning_rate": 1.9946157843798618e-05, "loss": 0.3648, "step": 1780 }, { "epoch": 0.03308295244000698, "grad_norm": 0.3865302801132202, "learning_rate": 1.994603689121379e-05, "loss": 0.5265, "step": 1782 }, { "epoch": 0.033120082577425616, "grad_norm": 0.4034496247768402, "learning_rate": 1.9945915803293192e-05, "loss": 0.5124, "step": 1784 }, { "epoch": 0.03315721271484426, "grad_norm": 0.34259992837905884, "learning_rate": 1.994579458003847e-05, "loss": 0.3764, "step": 1786 }, { "epoch": 0.033194342852262894, "grad_norm": 0.30749520659446716, "learning_rate": 1.9945673221451276e-05, "loss": 0.3621, "step": 1788 }, { "epoch": 0.033231472989681536, "grad_norm": 0.4914582371711731, "learning_rate": 1.9945551727533258e-05, "loss": 0.3662, "step": 1790 }, { "epoch": 0.03326860312710017, "grad_norm": 0.3235917091369629, "learning_rate": 1.9945430098286074e-05, "loss": 0.5538, "step": 1792 }, { "epoch": 0.033305733264518814, "grad_norm": 0.4097521901130676, "learning_rate": 1.9945308333711376e-05, "loss": 0.254, "step": 1794 }, { "epoch": 0.03334286340193745, "grad_norm": 0.3567485511302948, "learning_rate": 1.994518643381082e-05, "loss": 0.4842, "step": 1796 }, { "epoch": 0.03337999353935609, "grad_norm": 0.36090123653411865, "learning_rate": 1.9945064398586066e-05, "loss": 0.2326, "step": 1798 }, { "epoch": 0.033417123676774727, "grad_norm": 0.3272109925746918, "learning_rate": 1.9944942228038773e-05, "loss": 0.5286, "step": 1800 }, { "epoch": 0.03345425381419337, "grad_norm": 0.3510742783546448, "learning_rate": 1.9944819922170605e-05, "loss": 0.412, "step": 1802 }, { "epoch": 0.033491383951612004, "grad_norm": 0.5229774117469788, "learning_rate": 1.994469748098323e-05, "loss": 0.3688, "step": 1804 }, { "epoch": 0.033528514089030646, "grad_norm": 0.29095184803009033, "learning_rate": 1.9944574904478306e-05, "loss": 0.3508, "step": 1806 }, { "epoch": 0.03356564422644928, "grad_norm": 0.3484596312046051, "learning_rate": 1.994445219265751e-05, "loss": 0.4964, "step": 1808 }, { "epoch": 0.033602774363867924, "grad_norm": 0.385435551404953, "learning_rate": 1.99443293455225e-05, "loss": 0.4396, "step": 1810 }, { "epoch": 0.03363990450128656, "grad_norm": 0.3210596442222595, "learning_rate": 1.994420636307496e-05, "loss": 0.4621, "step": 1812 }, { "epoch": 0.033677034638705194, "grad_norm": 0.3639221489429474, "learning_rate": 1.9944083245316555e-05, "loss": 0.3999, "step": 1814 }, { "epoch": 0.03371416477612384, "grad_norm": 0.4703008532524109, "learning_rate": 1.9943959992248966e-05, "loss": 0.2932, "step": 1816 }, { "epoch": 0.03375129491354247, "grad_norm": 0.5173124670982361, "learning_rate": 1.9943836603873863e-05, "loss": 0.458, "step": 1818 }, { "epoch": 0.033788425050961114, "grad_norm": 0.3299204111099243, "learning_rate": 1.994371308019293e-05, "loss": 0.3484, "step": 1820 }, { "epoch": 0.03382555518837975, "grad_norm": 0.3522112965583801, "learning_rate": 1.9943589421207848e-05, "loss": 0.3915, "step": 1822 }, { "epoch": 0.03386268532579839, "grad_norm": 0.3242979347705841, "learning_rate": 1.9943465626920296e-05, "loss": 0.5231, "step": 1824 }, { "epoch": 0.03389981546321703, "grad_norm": 0.30497244000434875, "learning_rate": 1.9943341697331964e-05, "loss": 0.3446, "step": 1826 }, { "epoch": 0.03393694560063567, "grad_norm": 0.4302389323711395, "learning_rate": 1.9943217632444535e-05, "loss": 0.5631, "step": 1828 }, { "epoch": 0.033974075738054305, "grad_norm": 0.3600137531757355, "learning_rate": 1.9943093432259696e-05, "loss": 0.5228, "step": 1830 }, { "epoch": 0.03401120587547295, "grad_norm": 0.4871399700641632, "learning_rate": 1.994296909677914e-05, "loss": 0.3173, "step": 1832 }, { "epoch": 0.03404833601289158, "grad_norm": 0.4235627055168152, "learning_rate": 1.9942844626004557e-05, "loss": 0.3799, "step": 1834 }, { "epoch": 0.034085466150310224, "grad_norm": 0.5523370504379272, "learning_rate": 1.994272001993764e-05, "loss": 0.4066, "step": 1836 }, { "epoch": 0.03412259628772886, "grad_norm": 0.3631313145160675, "learning_rate": 1.9942595278580087e-05, "loss": 0.5382, "step": 1838 }, { "epoch": 0.0341597264251475, "grad_norm": 0.25497767329216003, "learning_rate": 1.9942470401933592e-05, "loss": 0.3474, "step": 1840 }, { "epoch": 0.03419685656256614, "grad_norm": 0.2758070230484009, "learning_rate": 1.9942345389999854e-05, "loss": 0.2967, "step": 1842 }, { "epoch": 0.03423398669998478, "grad_norm": 0.305310994386673, "learning_rate": 1.994222024278058e-05, "loss": 0.478, "step": 1844 }, { "epoch": 0.034271116837403415, "grad_norm": 0.27458810806274414, "learning_rate": 1.9942094960277468e-05, "loss": 0.4244, "step": 1846 }, { "epoch": 0.03430824697482206, "grad_norm": 0.2730516791343689, "learning_rate": 1.994196954249222e-05, "loss": 0.4479, "step": 1848 }, { "epoch": 0.03434537711224069, "grad_norm": 0.5396698713302612, "learning_rate": 1.9941843989426552e-05, "loss": 0.4003, "step": 1850 }, { "epoch": 0.03438250724965933, "grad_norm": 0.3073475956916809, "learning_rate": 1.9941718301082162e-05, "loss": 0.4318, "step": 1852 }, { "epoch": 0.03441963738707797, "grad_norm": 0.27815282344818115, "learning_rate": 1.9941592477460767e-05, "loss": 0.4288, "step": 1854 }, { "epoch": 0.034456767524496605, "grad_norm": 0.39860472083091736, "learning_rate": 1.9941466518564076e-05, "loss": 0.375, "step": 1856 }, { "epoch": 0.03449389766191525, "grad_norm": 0.33730626106262207, "learning_rate": 1.9941340424393804e-05, "loss": 0.1817, "step": 1858 }, { "epoch": 0.03453102779933388, "grad_norm": 0.4199099540710449, "learning_rate": 1.9941214194951666e-05, "loss": 0.472, "step": 1860 }, { "epoch": 0.034568157936752525, "grad_norm": 0.35788774490356445, "learning_rate": 1.994108783023938e-05, "loss": 0.3755, "step": 1862 }, { "epoch": 0.03460528807417116, "grad_norm": 0.3827565312385559, "learning_rate": 1.994096133025867e-05, "loss": 0.3213, "step": 1864 }, { "epoch": 0.0346424182115898, "grad_norm": 0.4929308295249939, "learning_rate": 1.994083469501125e-05, "loss": 0.4381, "step": 1866 }, { "epoch": 0.03467954834900844, "grad_norm": 0.25834909081459045, "learning_rate": 1.9940707924498843e-05, "loss": 0.3474, "step": 1868 }, { "epoch": 0.03471667848642708, "grad_norm": 0.4430135190486908, "learning_rate": 1.994058101872318e-05, "loss": 0.3914, "step": 1870 }, { "epoch": 0.034753808623845715, "grad_norm": 0.3168889284133911, "learning_rate": 1.9940453977685985e-05, "loss": 0.3955, "step": 1872 }, { "epoch": 0.03479093876126436, "grad_norm": 0.3557870388031006, "learning_rate": 1.9940326801388985e-05, "loss": 0.3993, "step": 1874 }, { "epoch": 0.03482806889868299, "grad_norm": 0.4322146773338318, "learning_rate": 1.9940199489833912e-05, "loss": 0.35, "step": 1876 }, { "epoch": 0.034865199036101635, "grad_norm": 0.3354968726634979, "learning_rate": 1.99400720430225e-05, "loss": 0.4373, "step": 1878 }, { "epoch": 0.03490232917352027, "grad_norm": 0.42228370904922485, "learning_rate": 1.993994446095648e-05, "loss": 0.4806, "step": 1880 }, { "epoch": 0.03493945931093891, "grad_norm": 0.316290020942688, "learning_rate": 1.993981674363759e-05, "loss": 0.5521, "step": 1882 }, { "epoch": 0.03497658944835755, "grad_norm": 0.3309059739112854, "learning_rate": 1.993968889106757e-05, "loss": 0.357, "step": 1884 }, { "epoch": 0.03501371958577619, "grad_norm": 0.3444094657897949, "learning_rate": 1.993956090324815e-05, "loss": 0.2369, "step": 1886 }, { "epoch": 0.035050849723194825, "grad_norm": 0.26614245772361755, "learning_rate": 1.9939432780181084e-05, "loss": 0.4925, "step": 1888 }, { "epoch": 0.03508797986061346, "grad_norm": 0.370993435382843, "learning_rate": 1.993930452186811e-05, "loss": 0.3997, "step": 1890 }, { "epoch": 0.0351251099980321, "grad_norm": 0.44502395391464233, "learning_rate": 1.993917612831097e-05, "loss": 0.3391, "step": 1892 }, { "epoch": 0.03516224013545074, "grad_norm": 0.297002375125885, "learning_rate": 1.993904759951142e-05, "loss": 0.5913, "step": 1894 }, { "epoch": 0.03519937027286938, "grad_norm": 0.3435524106025696, "learning_rate": 1.9938918935471196e-05, "loss": 0.5202, "step": 1896 }, { "epoch": 0.035236500410288016, "grad_norm": 0.5185344815254211, "learning_rate": 1.993879013619206e-05, "loss": 0.2987, "step": 1898 }, { "epoch": 0.03527363054770666, "grad_norm": 0.4222150444984436, "learning_rate": 1.993866120167576e-05, "loss": 0.3449, "step": 1900 }, { "epoch": 0.03531076068512529, "grad_norm": 0.3053376376628876, "learning_rate": 1.993853213192405e-05, "loss": 0.3897, "step": 1902 }, { "epoch": 0.035347890822543936, "grad_norm": 0.33486446738243103, "learning_rate": 1.9938402926938686e-05, "loss": 0.4871, "step": 1904 }, { "epoch": 0.03538502095996257, "grad_norm": 0.3348642885684967, "learning_rate": 1.9938273586721428e-05, "loss": 0.3193, "step": 1906 }, { "epoch": 0.03542215109738121, "grad_norm": 0.38619720935821533, "learning_rate": 1.993814411127404e-05, "loss": 0.3135, "step": 1908 }, { "epoch": 0.03545928123479985, "grad_norm": 0.3484084904193878, "learning_rate": 1.9938014500598274e-05, "loss": 0.3072, "step": 1910 }, { "epoch": 0.03549641137221849, "grad_norm": 0.33404597640037537, "learning_rate": 1.9937884754695894e-05, "loss": 0.3448, "step": 1912 }, { "epoch": 0.035533541509637126, "grad_norm": 0.34234708547592163, "learning_rate": 1.993775487356868e-05, "loss": 0.3552, "step": 1914 }, { "epoch": 0.03557067164705577, "grad_norm": 0.2922877371311188, "learning_rate": 1.9937624857218382e-05, "loss": 0.4182, "step": 1916 }, { "epoch": 0.035607801784474404, "grad_norm": 0.33263513445854187, "learning_rate": 1.9937494705646777e-05, "loss": 0.2987, "step": 1918 }, { "epoch": 0.035644931921893046, "grad_norm": 0.4626135528087616, "learning_rate": 1.9937364418855636e-05, "loss": 0.3211, "step": 1920 }, { "epoch": 0.03568206205931168, "grad_norm": 0.28774896264076233, "learning_rate": 1.9937233996846733e-05, "loss": 0.266, "step": 1922 }, { "epoch": 0.03571919219673032, "grad_norm": 0.2679785490036011, "learning_rate": 1.993710343962184e-05, "loss": 0.5221, "step": 1924 }, { "epoch": 0.03575632233414896, "grad_norm": 0.34216928482055664, "learning_rate": 1.993697274718273e-05, "loss": 0.39, "step": 1926 }, { "epoch": 0.035793452471567594, "grad_norm": 0.421111524105072, "learning_rate": 1.993684191953119e-05, "loss": 0.3922, "step": 1928 }, { "epoch": 0.035830582608986236, "grad_norm": 0.6337347030639648, "learning_rate": 1.9936710956668995e-05, "loss": 0.4566, "step": 1930 }, { "epoch": 0.03586771274640487, "grad_norm": 0.3697822690010071, "learning_rate": 1.993657985859793e-05, "loss": 0.4265, "step": 1932 }, { "epoch": 0.035904842883823514, "grad_norm": 0.3487740457057953, "learning_rate": 1.9936448625319772e-05, "loss": 0.4625, "step": 1934 }, { "epoch": 0.03594197302124215, "grad_norm": 0.34794843196868896, "learning_rate": 1.993631725683631e-05, "loss": 0.5031, "step": 1936 }, { "epoch": 0.03597910315866079, "grad_norm": 0.36677077412605286, "learning_rate": 1.993618575314934e-05, "loss": 0.4152, "step": 1938 }, { "epoch": 0.036016233296079427, "grad_norm": 0.3095185160636902, "learning_rate": 1.9936054114260643e-05, "loss": 0.4033, "step": 1940 }, { "epoch": 0.03605336343349807, "grad_norm": 0.32272282242774963, "learning_rate": 1.9935922340172006e-05, "loss": 0.2433, "step": 1942 }, { "epoch": 0.036090493570916704, "grad_norm": 0.36465156078338623, "learning_rate": 1.9935790430885232e-05, "loss": 0.2754, "step": 1944 }, { "epoch": 0.036127623708335346, "grad_norm": 0.4902269244194031, "learning_rate": 1.9935658386402107e-05, "loss": 0.2743, "step": 1946 }, { "epoch": 0.03616475384575398, "grad_norm": 0.37371352314949036, "learning_rate": 1.9935526206724435e-05, "loss": 0.5222, "step": 1948 }, { "epoch": 0.036201883983172624, "grad_norm": 0.28851598501205444, "learning_rate": 1.9935393891854014e-05, "loss": 0.2028, "step": 1950 }, { "epoch": 0.03623901412059126, "grad_norm": 0.37370339035987854, "learning_rate": 1.9935261441792638e-05, "loss": 0.2682, "step": 1952 }, { "epoch": 0.0362761442580099, "grad_norm": 0.3711865246295929, "learning_rate": 1.9935128856542117e-05, "loss": 0.1995, "step": 1954 }, { "epoch": 0.03631327439542854, "grad_norm": 0.33272868394851685, "learning_rate": 1.9934996136104246e-05, "loss": 0.3704, "step": 1956 }, { "epoch": 0.03635040453284718, "grad_norm": 0.2833925485610962, "learning_rate": 1.9934863280480842e-05, "loss": 0.4734, "step": 1958 }, { "epoch": 0.036387534670265814, "grad_norm": 0.3788817226886749, "learning_rate": 1.9934730289673704e-05, "loss": 0.4221, "step": 1960 }, { "epoch": 0.036424664807684456, "grad_norm": 0.2599087655544281, "learning_rate": 1.9934597163684647e-05, "loss": 0.3282, "step": 1962 }, { "epoch": 0.03646179494510309, "grad_norm": 0.3443366587162018, "learning_rate": 1.9934463902515478e-05, "loss": 0.367, "step": 1964 }, { "epoch": 0.03649892508252173, "grad_norm": 0.3527637720108032, "learning_rate": 1.9934330506168016e-05, "loss": 0.5122, "step": 1966 }, { "epoch": 0.03653605521994037, "grad_norm": 0.4264635741710663, "learning_rate": 1.9934196974644067e-05, "loss": 0.3896, "step": 1968 }, { "epoch": 0.036573185357359005, "grad_norm": 0.32263895869255066, "learning_rate": 1.993406330794546e-05, "loss": 0.4964, "step": 1970 }, { "epoch": 0.03661031549477765, "grad_norm": 0.5727680921554565, "learning_rate": 1.9933929506074002e-05, "loss": 0.5659, "step": 1972 }, { "epoch": 0.03664744563219628, "grad_norm": 0.2408292442560196, "learning_rate": 1.993379556903152e-05, "loss": 0.2288, "step": 1974 }, { "epoch": 0.036684575769614924, "grad_norm": 0.5993266105651855, "learning_rate": 1.9933661496819835e-05, "loss": 0.5051, "step": 1976 }, { "epoch": 0.03672170590703356, "grad_norm": 0.2792007327079773, "learning_rate": 1.9933527289440776e-05, "loss": 0.2766, "step": 1978 }, { "epoch": 0.0367588360444522, "grad_norm": 0.3402690589427948, "learning_rate": 1.9933392946896163e-05, "loss": 0.3342, "step": 1980 }, { "epoch": 0.03679596618187084, "grad_norm": 0.3172428607940674, "learning_rate": 1.9933258469187826e-05, "loss": 0.3284, "step": 1982 }, { "epoch": 0.03683309631928948, "grad_norm": 0.42654192447662354, "learning_rate": 1.993312385631759e-05, "loss": 0.3755, "step": 1984 }, { "epoch": 0.036870226456708115, "grad_norm": 0.41326162219047546, "learning_rate": 1.9932989108287294e-05, "loss": 0.467, "step": 1986 }, { "epoch": 0.03690735659412676, "grad_norm": 0.41839542984962463, "learning_rate": 1.9932854225098773e-05, "loss": 0.363, "step": 1988 }, { "epoch": 0.03694448673154539, "grad_norm": 0.23728081583976746, "learning_rate": 1.9932719206753853e-05, "loss": 0.1745, "step": 1990 }, { "epoch": 0.036981616868964035, "grad_norm": 0.29624655842781067, "learning_rate": 1.993258405325438e-05, "loss": 0.3023, "step": 1992 }, { "epoch": 0.03701874700638267, "grad_norm": 0.36446744203567505, "learning_rate": 1.9932448764602188e-05, "loss": 0.4905, "step": 1994 }, { "epoch": 0.03705587714380131, "grad_norm": 0.2961815595626831, "learning_rate": 1.993231334079912e-05, "loss": 0.2249, "step": 1996 }, { "epoch": 0.03709300728121995, "grad_norm": 0.8085873126983643, "learning_rate": 1.993217778184702e-05, "loss": 0.5056, "step": 1998 }, { "epoch": 0.03713013741863859, "grad_norm": 0.46194371581077576, "learning_rate": 1.9932042087747727e-05, "loss": 0.4027, "step": 2000 }, { "epoch": 0.037167267556057225, "grad_norm": 0.2922123968601227, "learning_rate": 1.9931906258503093e-05, "loss": 0.4124, "step": 2002 }, { "epoch": 0.03720439769347586, "grad_norm": 0.43618541955947876, "learning_rate": 1.9931770294114965e-05, "loss": 0.3928, "step": 2004 }, { "epoch": 0.0372415278308945, "grad_norm": 0.37184783816337585, "learning_rate": 1.9931634194585193e-05, "loss": 0.2516, "step": 2006 }, { "epoch": 0.03727865796831314, "grad_norm": 0.4159109890460968, "learning_rate": 1.9931497959915624e-05, "loss": 0.2019, "step": 2008 }, { "epoch": 0.03731578810573178, "grad_norm": 0.3476172089576721, "learning_rate": 1.993136159010812e-05, "loss": 0.4736, "step": 2010 }, { "epoch": 0.037352918243150415, "grad_norm": 0.3119805157184601, "learning_rate": 1.9931225085164533e-05, "loss": 0.3696, "step": 2012 }, { "epoch": 0.03739004838056906, "grad_norm": 0.37937813997268677, "learning_rate": 1.993108844508672e-05, "loss": 0.2255, "step": 2014 }, { "epoch": 0.03742717851798769, "grad_norm": 0.38822057843208313, "learning_rate": 1.9930951669876537e-05, "loss": 0.3128, "step": 2016 }, { "epoch": 0.037464308655406335, "grad_norm": 0.31536105275154114, "learning_rate": 1.993081475953585e-05, "loss": 0.3901, "step": 2018 }, { "epoch": 0.03750143879282497, "grad_norm": 0.38921961188316345, "learning_rate": 1.993067771406652e-05, "loss": 0.2179, "step": 2020 }, { "epoch": 0.03753856893024361, "grad_norm": 0.46892279386520386, "learning_rate": 1.9930540533470415e-05, "loss": 0.3632, "step": 2022 }, { "epoch": 0.03757569906766225, "grad_norm": 0.3340199887752533, "learning_rate": 1.9930403217749395e-05, "loss": 0.4507, "step": 2024 }, { "epoch": 0.03761282920508089, "grad_norm": 0.31743505597114563, "learning_rate": 1.9930265766905337e-05, "loss": 0.3638, "step": 2026 }, { "epoch": 0.037649959342499525, "grad_norm": 0.4792838990688324, "learning_rate": 1.9930128180940103e-05, "loss": 0.5273, "step": 2028 }, { "epoch": 0.03768708947991817, "grad_norm": 0.29156723618507385, "learning_rate": 1.992999045985557e-05, "loss": 0.3429, "step": 2030 }, { "epoch": 0.0377242196173368, "grad_norm": 0.33172696828842163, "learning_rate": 1.9929852603653608e-05, "loss": 0.4972, "step": 2032 }, { "epoch": 0.037761349754755445, "grad_norm": 0.23541422188282013, "learning_rate": 1.9929714612336094e-05, "loss": 0.1105, "step": 2034 }, { "epoch": 0.03779847989217408, "grad_norm": 0.3401322662830353, "learning_rate": 1.9929576485904913e-05, "loss": 0.5482, "step": 2036 }, { "epoch": 0.03783561002959272, "grad_norm": 0.267124205827713, "learning_rate": 1.9929438224361935e-05, "loss": 0.1828, "step": 2038 }, { "epoch": 0.03787274016701136, "grad_norm": 0.4598195552825928, "learning_rate": 1.9929299827709046e-05, "loss": 0.2551, "step": 2040 }, { "epoch": 0.03790987030442999, "grad_norm": 0.34230053424835205, "learning_rate": 1.9929161295948127e-05, "loss": 0.3868, "step": 2042 }, { "epoch": 0.037947000441848636, "grad_norm": 0.32452279329299927, "learning_rate": 1.9929022629081065e-05, "loss": 0.3717, "step": 2044 }, { "epoch": 0.03798413057926727, "grad_norm": 0.28409162163734436, "learning_rate": 1.9928883827109745e-05, "loss": 0.3212, "step": 2046 }, { "epoch": 0.03802126071668591, "grad_norm": 0.3782145380973816, "learning_rate": 1.992874489003606e-05, "loss": 0.352, "step": 2048 }, { "epoch": 0.03805839085410455, "grad_norm": 0.3633666932582855, "learning_rate": 1.9928605817861893e-05, "loss": 0.3211, "step": 2050 }, { "epoch": 0.03809552099152319, "grad_norm": 0.29387167096138, "learning_rate": 1.9928466610589142e-05, "loss": 0.37, "step": 2052 }, { "epoch": 0.038132651128941826, "grad_norm": 0.39630770683288574, "learning_rate": 1.99283272682197e-05, "loss": 0.3872, "step": 2054 }, { "epoch": 0.03816978126636047, "grad_norm": 0.3808368444442749, "learning_rate": 1.992818779075546e-05, "loss": 0.396, "step": 2056 }, { "epoch": 0.038206911403779104, "grad_norm": 0.2896334230899811, "learning_rate": 1.9928048178198325e-05, "loss": 0.2733, "step": 2058 }, { "epoch": 0.038244041541197746, "grad_norm": 0.3005021810531616, "learning_rate": 1.9927908430550192e-05, "loss": 0.3716, "step": 2060 }, { "epoch": 0.03828117167861638, "grad_norm": 0.4141136407852173, "learning_rate": 1.9927768547812963e-05, "loss": 0.3235, "step": 2062 }, { "epoch": 0.03831830181603502, "grad_norm": 0.3960406184196472, "learning_rate": 1.9927628529988537e-05, "loss": 0.4165, "step": 2064 }, { "epoch": 0.03835543195345366, "grad_norm": 0.4837163984775543, "learning_rate": 1.992748837707883e-05, "loss": 0.4182, "step": 2066 }, { "epoch": 0.0383925620908723, "grad_norm": 0.312007337808609, "learning_rate": 1.9927348089085738e-05, "loss": 0.2246, "step": 2068 }, { "epoch": 0.038429692228290936, "grad_norm": 0.3793455958366394, "learning_rate": 1.992720766601118e-05, "loss": 0.5252, "step": 2070 }, { "epoch": 0.03846682236570958, "grad_norm": 0.30731791257858276, "learning_rate": 1.9927067107857053e-05, "loss": 0.25, "step": 2072 }, { "epoch": 0.038503952503128214, "grad_norm": 0.3175412118434906, "learning_rate": 1.9926926414625282e-05, "loss": 0.5231, "step": 2074 }, { "epoch": 0.038541082640546856, "grad_norm": 0.3225423991680145, "learning_rate": 1.9926785586317778e-05, "loss": 0.2602, "step": 2076 }, { "epoch": 0.03857821277796549, "grad_norm": 0.4689269959926605, "learning_rate": 1.9926644622936453e-05, "loss": 0.5089, "step": 2078 }, { "epoch": 0.03861534291538413, "grad_norm": 0.3424060046672821, "learning_rate": 1.9926503524483233e-05, "loss": 0.5296, "step": 2080 }, { "epoch": 0.03865247305280277, "grad_norm": 0.3351812958717346, "learning_rate": 1.9926362290960028e-05, "loss": 0.3798, "step": 2082 }, { "epoch": 0.038689603190221404, "grad_norm": 0.4459707736968994, "learning_rate": 1.992622092236877e-05, "loss": 0.3672, "step": 2084 }, { "epoch": 0.038726733327640046, "grad_norm": 0.26623523235321045, "learning_rate": 1.9926079418711376e-05, "loss": 0.4286, "step": 2086 }, { "epoch": 0.03876386346505868, "grad_norm": 0.3457959294319153, "learning_rate": 1.992593777998977e-05, "loss": 0.4096, "step": 2088 }, { "epoch": 0.038800993602477324, "grad_norm": 0.3527875244617462, "learning_rate": 1.9925796006205886e-05, "loss": 0.2598, "step": 2090 }, { "epoch": 0.03883812373989596, "grad_norm": 0.30972811579704285, "learning_rate": 1.9925654097361652e-05, "loss": 0.5152, "step": 2092 }, { "epoch": 0.0388752538773146, "grad_norm": 0.32219821214675903, "learning_rate": 1.9925512053458994e-05, "loss": 0.4621, "step": 2094 }, { "epoch": 0.03891238401473324, "grad_norm": 0.2656967043876648, "learning_rate": 1.9925369874499843e-05, "loss": 0.3362, "step": 2096 }, { "epoch": 0.03894951415215188, "grad_norm": 0.24639476835727692, "learning_rate": 1.9925227560486144e-05, "loss": 0.4291, "step": 2098 }, { "epoch": 0.038986644289570514, "grad_norm": 0.35264506936073303, "learning_rate": 1.9925085111419824e-05, "loss": 0.2616, "step": 2100 }, { "epoch": 0.039023774426989156, "grad_norm": 0.3753797709941864, "learning_rate": 1.9924942527302826e-05, "loss": 0.3108, "step": 2102 }, { "epoch": 0.03906090456440779, "grad_norm": 0.4267929196357727, "learning_rate": 1.992479980813709e-05, "loss": 0.3397, "step": 2104 }, { "epoch": 0.039098034701826434, "grad_norm": 0.3270409107208252, "learning_rate": 1.9924656953924553e-05, "loss": 0.2825, "step": 2106 }, { "epoch": 0.03913516483924507, "grad_norm": 0.37892788648605347, "learning_rate": 1.9924513964667166e-05, "loss": 0.4369, "step": 2108 }, { "epoch": 0.03917229497666371, "grad_norm": 0.2995428144931793, "learning_rate": 1.992437084036687e-05, "loss": 0.481, "step": 2110 }, { "epoch": 0.03920942511408235, "grad_norm": 0.33561640977859497, "learning_rate": 1.9924227581025613e-05, "loss": 0.3415, "step": 2112 }, { "epoch": 0.03924655525150099, "grad_norm": 0.46927258372306824, "learning_rate": 1.9924084186645347e-05, "loss": 0.3322, "step": 2114 }, { "epoch": 0.039283685388919624, "grad_norm": 0.5767914652824402, "learning_rate": 1.992394065722802e-05, "loss": 0.4341, "step": 2116 }, { "epoch": 0.03932081552633826, "grad_norm": 0.3032285273075104, "learning_rate": 1.9923796992775587e-05, "loss": 0.3981, "step": 2118 }, { "epoch": 0.0393579456637569, "grad_norm": 0.28246644139289856, "learning_rate": 1.9923653193290003e-05, "loss": 0.3932, "step": 2120 }, { "epoch": 0.03939507580117554, "grad_norm": 0.42464685440063477, "learning_rate": 1.9923509258773222e-05, "loss": 0.5505, "step": 2122 }, { "epoch": 0.03943220593859418, "grad_norm": 0.3257697522640228, "learning_rate": 1.9923365189227205e-05, "loss": 0.3472, "step": 2124 }, { "epoch": 0.039469336076012815, "grad_norm": 0.2895216643810272, "learning_rate": 1.992322098465391e-05, "loss": 0.1977, "step": 2126 }, { "epoch": 0.03950646621343146, "grad_norm": 0.3868873119354248, "learning_rate": 1.9923076645055305e-05, "loss": 0.3542, "step": 2128 }, { "epoch": 0.03954359635085009, "grad_norm": 0.2838451564311981, "learning_rate": 1.9922932170433345e-05, "loss": 0.3035, "step": 2130 }, { "epoch": 0.039580726488268735, "grad_norm": 0.3076413571834564, "learning_rate": 1.9922787560790007e-05, "loss": 0.4281, "step": 2132 }, { "epoch": 0.03961785662568737, "grad_norm": 0.40551063418388367, "learning_rate": 1.992264281612725e-05, "loss": 0.2346, "step": 2134 }, { "epoch": 0.03965498676310601, "grad_norm": 0.4483836591243744, "learning_rate": 1.9922497936447042e-05, "loss": 0.3487, "step": 2136 }, { "epoch": 0.03969211690052465, "grad_norm": 0.33309370279312134, "learning_rate": 1.9922352921751363e-05, "loss": 0.2908, "step": 2138 }, { "epoch": 0.03972924703794329, "grad_norm": 0.32426717877388, "learning_rate": 1.992220777204218e-05, "loss": 0.549, "step": 2140 }, { "epoch": 0.039766377175361925, "grad_norm": 0.348467081785202, "learning_rate": 1.992206248732147e-05, "loss": 0.2715, "step": 2142 }, { "epoch": 0.03980350731278057, "grad_norm": 0.38909488916397095, "learning_rate": 1.9921917067591206e-05, "loss": 0.335, "step": 2144 }, { "epoch": 0.0398406374501992, "grad_norm": 0.6617769002914429, "learning_rate": 1.9921771512853378e-05, "loss": 0.326, "step": 2146 }, { "epoch": 0.039877767587617845, "grad_norm": 0.3197513818740845, "learning_rate": 1.9921625823109953e-05, "loss": 0.2736, "step": 2148 }, { "epoch": 0.03991489772503648, "grad_norm": 0.2721817195415497, "learning_rate": 1.9921479998362917e-05, "loss": 0.4214, "step": 2150 }, { "epoch": 0.03995202786245512, "grad_norm": 0.4922572672367096, "learning_rate": 1.9921334038614262e-05, "loss": 0.4888, "step": 2152 }, { "epoch": 0.03998915799987376, "grad_norm": 0.3573835790157318, "learning_rate": 1.9921187943865967e-05, "loss": 0.4041, "step": 2154 }, { "epoch": 0.04002628813729239, "grad_norm": 0.35106274485588074, "learning_rate": 1.992104171412002e-05, "loss": 0.3319, "step": 2156 }, { "epoch": 0.040063418274711035, "grad_norm": 0.3410296142101288, "learning_rate": 1.9920895349378415e-05, "loss": 0.2539, "step": 2158 }, { "epoch": 0.04010054841212967, "grad_norm": 0.3853680491447449, "learning_rate": 1.992074884964314e-05, "loss": 0.275, "step": 2160 }, { "epoch": 0.04013767854954831, "grad_norm": 0.519813597202301, "learning_rate": 1.9920602214916188e-05, "loss": 0.4718, "step": 2162 }, { "epoch": 0.04017480868696695, "grad_norm": 0.383322536945343, "learning_rate": 1.9920455445199555e-05, "loss": 0.2536, "step": 2164 }, { "epoch": 0.04021193882438559, "grad_norm": 0.3393462598323822, "learning_rate": 1.992030854049524e-05, "loss": 0.3868, "step": 2166 }, { "epoch": 0.040249068961804225, "grad_norm": 0.35493627190589905, "learning_rate": 1.992016150080524e-05, "loss": 0.3806, "step": 2168 }, { "epoch": 0.04028619909922287, "grad_norm": 0.336121529340744, "learning_rate": 1.9920014326131555e-05, "loss": 0.4996, "step": 2170 }, { "epoch": 0.0403233292366415, "grad_norm": 0.41170138120651245, "learning_rate": 1.9919867016476192e-05, "loss": 0.2911, "step": 2172 }, { "epoch": 0.040360459374060145, "grad_norm": 0.497999370098114, "learning_rate": 1.9919719571841152e-05, "loss": 0.5393, "step": 2174 }, { "epoch": 0.04039758951147878, "grad_norm": 0.33208751678466797, "learning_rate": 1.991957199222844e-05, "loss": 0.4649, "step": 2176 }, { "epoch": 0.04043471964889742, "grad_norm": 0.3563458025455475, "learning_rate": 1.9919424277640066e-05, "loss": 0.5845, "step": 2178 }, { "epoch": 0.04047184978631606, "grad_norm": 0.6497002243995667, "learning_rate": 1.991927642807804e-05, "loss": 0.477, "step": 2180 }, { "epoch": 0.0405089799237347, "grad_norm": 0.2925940752029419, "learning_rate": 1.991912844354437e-05, "loss": 0.5139, "step": 2182 }, { "epoch": 0.040546110061153336, "grad_norm": 0.33827826380729675, "learning_rate": 1.991898032404108e-05, "loss": 0.2976, "step": 2184 }, { "epoch": 0.04058324019857198, "grad_norm": 0.3400142788887024, "learning_rate": 1.991883206957017e-05, "loss": 0.3475, "step": 2186 }, { "epoch": 0.04062037033599061, "grad_norm": 0.4645389914512634, "learning_rate": 1.9918683680133673e-05, "loss": 0.2604, "step": 2188 }, { "epoch": 0.040657500473409255, "grad_norm": 0.46448948979377747, "learning_rate": 1.99185351557336e-05, "loss": 0.3129, "step": 2190 }, { "epoch": 0.04069463061082789, "grad_norm": 0.3606633245944977, "learning_rate": 1.991838649637197e-05, "loss": 0.388, "step": 2192 }, { "epoch": 0.040731760748246526, "grad_norm": 0.35943177342414856, "learning_rate": 1.991823770205081e-05, "loss": 0.4715, "step": 2194 }, { "epoch": 0.04076889088566517, "grad_norm": 0.47583284974098206, "learning_rate": 1.9918088772772145e-05, "loss": 0.4741, "step": 2196 }, { "epoch": 0.040806021023083804, "grad_norm": 0.3376258313655853, "learning_rate": 1.9917939708538e-05, "loss": 0.2445, "step": 2198 }, { "epoch": 0.040843151160502446, "grad_norm": 0.3509151041507721, "learning_rate": 1.9917790509350402e-05, "loss": 0.24, "step": 2200 }, { "epoch": 0.04088028129792108, "grad_norm": 0.370469868183136, "learning_rate": 1.9917641175211383e-05, "loss": 0.345, "step": 2202 }, { "epoch": 0.04091741143533972, "grad_norm": 0.35861000418663025, "learning_rate": 1.9917491706122974e-05, "loss": 0.4812, "step": 2204 }, { "epoch": 0.04095454157275836, "grad_norm": 0.4066939353942871, "learning_rate": 1.9917342102087207e-05, "loss": 0.3281, "step": 2206 }, { "epoch": 0.040991671710177, "grad_norm": 0.49802619218826294, "learning_rate": 1.9917192363106122e-05, "loss": 0.4234, "step": 2208 }, { "epoch": 0.041028801847595636, "grad_norm": 0.30297183990478516, "learning_rate": 1.9917042489181755e-05, "loss": 0.4447, "step": 2210 }, { "epoch": 0.04106593198501428, "grad_norm": 0.375549852848053, "learning_rate": 1.9916892480316142e-05, "loss": 0.3326, "step": 2212 }, { "epoch": 0.041103062122432914, "grad_norm": 0.460097074508667, "learning_rate": 1.991674233651133e-05, "loss": 0.2396, "step": 2214 }, { "epoch": 0.041140192259851556, "grad_norm": 0.40191009640693665, "learning_rate": 1.9916592057769358e-05, "loss": 0.5136, "step": 2216 }, { "epoch": 0.04117732239727019, "grad_norm": 0.36045041680336, "learning_rate": 1.9916441644092273e-05, "loss": 0.254, "step": 2218 }, { "epoch": 0.041214452534688834, "grad_norm": 0.44894397258758545, "learning_rate": 1.9916291095482117e-05, "loss": 0.2642, "step": 2220 }, { "epoch": 0.04125158267210747, "grad_norm": 0.2763565182685852, "learning_rate": 1.9916140411940945e-05, "loss": 0.3243, "step": 2222 }, { "epoch": 0.04128871280952611, "grad_norm": 0.4381501376628876, "learning_rate": 1.9915989593470802e-05, "loss": 0.4769, "step": 2224 }, { "epoch": 0.041325842946944746, "grad_norm": 0.36060407757759094, "learning_rate": 1.9915838640073746e-05, "loss": 0.1162, "step": 2226 }, { "epoch": 0.04136297308436339, "grad_norm": 0.3952736258506775, "learning_rate": 1.9915687551751825e-05, "loss": 0.2006, "step": 2228 }, { "epoch": 0.041400103221782024, "grad_norm": 0.29216158390045166, "learning_rate": 1.9915536328507093e-05, "loss": 0.4255, "step": 2230 }, { "epoch": 0.04143723335920066, "grad_norm": 0.27519091963768005, "learning_rate": 1.9915384970341617e-05, "loss": 0.2669, "step": 2232 }, { "epoch": 0.0414743634966193, "grad_norm": 0.27323049306869507, "learning_rate": 1.991523347725745e-05, "loss": 0.2183, "step": 2234 }, { "epoch": 0.04151149363403794, "grad_norm": 0.454260915517807, "learning_rate": 1.991508184925666e-05, "loss": 0.1625, "step": 2236 }, { "epoch": 0.04154862377145658, "grad_norm": 0.3792349398136139, "learning_rate": 1.99149300863413e-05, "loss": 0.456, "step": 2238 }, { "epoch": 0.041585753908875214, "grad_norm": 0.4887813925743103, "learning_rate": 1.991477818851344e-05, "loss": 0.5328, "step": 2240 }, { "epoch": 0.041622884046293857, "grad_norm": 0.3308490216732025, "learning_rate": 1.991462615577515e-05, "loss": 0.2673, "step": 2242 }, { "epoch": 0.04166001418371249, "grad_norm": 0.298992782831192, "learning_rate": 1.9914473988128494e-05, "loss": 0.4752, "step": 2244 }, { "epoch": 0.041697144321131134, "grad_norm": 0.2960985600948334, "learning_rate": 1.9914321685575543e-05, "loss": 0.2969, "step": 2246 }, { "epoch": 0.04173427445854977, "grad_norm": 0.3824763596057892, "learning_rate": 1.9914169248118375e-05, "loss": 0.3026, "step": 2248 }, { "epoch": 0.04177140459596841, "grad_norm": 0.3746114671230316, "learning_rate": 1.9914016675759057e-05, "loss": 0.3488, "step": 2250 }, { "epoch": 0.04180853473338705, "grad_norm": 0.33106729388237, "learning_rate": 1.991386396849967e-05, "loss": 0.4569, "step": 2252 }, { "epoch": 0.04184566487080569, "grad_norm": 0.38767120242118835, "learning_rate": 1.9913711126342285e-05, "loss": 0.356, "step": 2254 }, { "epoch": 0.041882795008224324, "grad_norm": 0.44010499119758606, "learning_rate": 1.9913558149288988e-05, "loss": 0.4348, "step": 2256 }, { "epoch": 0.04191992514564297, "grad_norm": 0.30764085054397583, "learning_rate": 1.9913405037341863e-05, "loss": 0.4257, "step": 2258 }, { "epoch": 0.0419570552830616, "grad_norm": 0.34466466307640076, "learning_rate": 1.9913251790502985e-05, "loss": 0.4376, "step": 2260 }, { "epoch": 0.041994185420480244, "grad_norm": 0.36967703700065613, "learning_rate": 1.9913098408774447e-05, "loss": 0.3009, "step": 2262 }, { "epoch": 0.04203131555789888, "grad_norm": 0.357180118560791, "learning_rate": 1.991294489215833e-05, "loss": 0.4859, "step": 2264 }, { "epoch": 0.04206844569531752, "grad_norm": 0.432104229927063, "learning_rate": 1.9912791240656726e-05, "loss": 0.4087, "step": 2266 }, { "epoch": 0.04210557583273616, "grad_norm": 0.379543274641037, "learning_rate": 1.991263745427173e-05, "loss": 0.4416, "step": 2268 }, { "epoch": 0.04214270597015479, "grad_norm": 0.3863603472709656, "learning_rate": 1.9912483533005426e-05, "loss": 0.4804, "step": 2270 }, { "epoch": 0.042179836107573435, "grad_norm": 0.3551326394081116, "learning_rate": 1.9912329476859913e-05, "loss": 0.4418, "step": 2272 }, { "epoch": 0.04221696624499207, "grad_norm": 0.3660787045955658, "learning_rate": 1.9912175285837286e-05, "loss": 0.4397, "step": 2274 }, { "epoch": 0.04225409638241071, "grad_norm": 0.392020583152771, "learning_rate": 1.9912020959939644e-05, "loss": 0.2551, "step": 2276 }, { "epoch": 0.04229122651982935, "grad_norm": 0.4010932445526123, "learning_rate": 1.9911866499169088e-05, "loss": 0.363, "step": 2278 }, { "epoch": 0.04232835665724799, "grad_norm": 0.4786839187145233, "learning_rate": 1.991171190352772e-05, "loss": 0.5113, "step": 2280 }, { "epoch": 0.042365486794666625, "grad_norm": 0.3111361563205719, "learning_rate": 1.991155717301764e-05, "loss": 0.373, "step": 2282 }, { "epoch": 0.04240261693208527, "grad_norm": 0.33212417364120483, "learning_rate": 1.9911402307640953e-05, "loss": 0.3908, "step": 2284 }, { "epoch": 0.0424397470695039, "grad_norm": 0.2925907075405121, "learning_rate": 1.991124730739977e-05, "loss": 0.5159, "step": 2286 }, { "epoch": 0.042476877206922545, "grad_norm": 0.43963900208473206, "learning_rate": 1.99110921722962e-05, "loss": 0.3137, "step": 2288 }, { "epoch": 0.04251400734434118, "grad_norm": 0.5904701352119446, "learning_rate": 1.9910936902332355e-05, "loss": 0.3314, "step": 2290 }, { "epoch": 0.04255113748175982, "grad_norm": 0.26255282759666443, "learning_rate": 1.9910781497510342e-05, "loss": 0.3621, "step": 2292 }, { "epoch": 0.04258826761917846, "grad_norm": 0.22866976261138916, "learning_rate": 1.991062595783228e-05, "loss": 0.4155, "step": 2294 }, { "epoch": 0.0426253977565971, "grad_norm": 0.35680580139160156, "learning_rate": 1.9910470283300283e-05, "loss": 0.3972, "step": 2296 }, { "epoch": 0.042662527894015735, "grad_norm": 0.35472118854522705, "learning_rate": 1.9910314473916475e-05, "loss": 0.4141, "step": 2298 }, { "epoch": 0.04269965803143438, "grad_norm": 0.33780795335769653, "learning_rate": 1.9910158529682966e-05, "loss": 0.255, "step": 2300 }, { "epoch": 0.04273678816885301, "grad_norm": 0.39178887009620667, "learning_rate": 1.991000245060189e-05, "loss": 0.4114, "step": 2302 }, { "epoch": 0.042773918306271655, "grad_norm": 0.29477831721305847, "learning_rate": 1.9909846236675357e-05, "loss": 0.3544, "step": 2304 }, { "epoch": 0.04281104844369029, "grad_norm": 0.36197277903556824, "learning_rate": 1.9909689887905506e-05, "loss": 0.3376, "step": 2306 }, { "epoch": 0.042848178581108926, "grad_norm": 0.5479002594947815, "learning_rate": 1.990953340429446e-05, "loss": 0.3933, "step": 2308 }, { "epoch": 0.04288530871852757, "grad_norm": 0.2806760370731354, "learning_rate": 1.990937678584434e-05, "loss": 0.4755, "step": 2310 }, { "epoch": 0.0429224388559462, "grad_norm": 0.5285623669624329, "learning_rate": 1.990922003255729e-05, "loss": 0.4073, "step": 2312 }, { "epoch": 0.042959568993364845, "grad_norm": 0.2632940411567688, "learning_rate": 1.9909063144435432e-05, "loss": 0.5134, "step": 2314 }, { "epoch": 0.04299669913078348, "grad_norm": 0.26469793915748596, "learning_rate": 1.9908906121480908e-05, "loss": 0.3295, "step": 2316 }, { "epoch": 0.04303382926820212, "grad_norm": 0.44179654121398926, "learning_rate": 1.9908748963695855e-05, "loss": 0.4447, "step": 2318 }, { "epoch": 0.04307095940562076, "grad_norm": 0.4057632088661194, "learning_rate": 1.9908591671082408e-05, "loss": 0.1573, "step": 2320 }, { "epoch": 0.0431080895430394, "grad_norm": 0.30882033705711365, "learning_rate": 1.9908434243642705e-05, "loss": 0.3017, "step": 2322 }, { "epoch": 0.043145219680458036, "grad_norm": 0.3255697786808014, "learning_rate": 1.990827668137889e-05, "loss": 0.544, "step": 2324 }, { "epoch": 0.04318234981787668, "grad_norm": 0.3034602105617523, "learning_rate": 1.9908118984293114e-05, "loss": 0.4561, "step": 2326 }, { "epoch": 0.04321947995529531, "grad_norm": 0.31836676597595215, "learning_rate": 1.9907961152387512e-05, "loss": 0.4676, "step": 2328 }, { "epoch": 0.043256610092713955, "grad_norm": 0.29657238721847534, "learning_rate": 1.990780318566424e-05, "loss": 0.5265, "step": 2330 }, { "epoch": 0.04329374023013259, "grad_norm": 0.43714040517807007, "learning_rate": 1.990764508412544e-05, "loss": 0.37, "step": 2332 }, { "epoch": 0.04333087036755123, "grad_norm": 0.436774879693985, "learning_rate": 1.9907486847773268e-05, "loss": 0.363, "step": 2334 }, { "epoch": 0.04336800050496987, "grad_norm": 0.34877175092697144, "learning_rate": 1.990732847660988e-05, "loss": 0.296, "step": 2336 }, { "epoch": 0.04340513064238851, "grad_norm": 0.34823158383369446, "learning_rate": 1.9907169970637423e-05, "loss": 0.5373, "step": 2338 }, { "epoch": 0.043442260779807146, "grad_norm": 0.39081087708473206, "learning_rate": 1.9907011329858063e-05, "loss": 0.2426, "step": 2340 }, { "epoch": 0.04347939091722579, "grad_norm": 0.3765751123428345, "learning_rate": 1.990685255427395e-05, "loss": 0.181, "step": 2342 }, { "epoch": 0.04351652105464442, "grad_norm": 0.5395621657371521, "learning_rate": 1.9906693643887248e-05, "loss": 0.5283, "step": 2344 }, { "epoch": 0.04355365119206306, "grad_norm": 0.44549351930618286, "learning_rate": 1.990653459870012e-05, "loss": 0.1863, "step": 2346 }, { "epoch": 0.0435907813294817, "grad_norm": 0.3754114508628845, "learning_rate": 1.9906375418714733e-05, "loss": 0.4069, "step": 2348 }, { "epoch": 0.043627911466900336, "grad_norm": 0.42772772908210754, "learning_rate": 1.9906216103933246e-05, "loss": 0.3083, "step": 2350 }, { "epoch": 0.04366504160431898, "grad_norm": 0.344629168510437, "learning_rate": 1.990605665435783e-05, "loss": 0.3885, "step": 2352 }, { "epoch": 0.043702171741737614, "grad_norm": 0.24962933361530304, "learning_rate": 1.9905897069990655e-05, "loss": 0.2465, "step": 2354 }, { "epoch": 0.043739301879156256, "grad_norm": 0.3382076621055603, "learning_rate": 1.9905737350833894e-05, "loss": 0.4076, "step": 2356 }, { "epoch": 0.04377643201657489, "grad_norm": 0.32117506861686707, "learning_rate": 1.990557749688972e-05, "loss": 0.2161, "step": 2358 }, { "epoch": 0.043813562153993534, "grad_norm": 0.3750784993171692, "learning_rate": 1.9905417508160304e-05, "loss": 0.3565, "step": 2360 }, { "epoch": 0.04385069229141217, "grad_norm": 0.20360666513442993, "learning_rate": 1.9905257384647825e-05, "loss": 0.2999, "step": 2362 }, { "epoch": 0.04388782242883081, "grad_norm": 0.24929066002368927, "learning_rate": 1.9905097126354468e-05, "loss": 0.3624, "step": 2364 }, { "epoch": 0.043924952566249446, "grad_norm": 0.24092373251914978, "learning_rate": 1.9904936733282404e-05, "loss": 0.3969, "step": 2366 }, { "epoch": 0.04396208270366809, "grad_norm": 0.3946561813354492, "learning_rate": 1.990477620543382e-05, "loss": 0.385, "step": 2368 }, { "epoch": 0.043999212841086724, "grad_norm": 0.4159776568412781, "learning_rate": 1.99046155428109e-05, "loss": 0.2834, "step": 2370 }, { "epoch": 0.044036342978505366, "grad_norm": 0.3017308712005615, "learning_rate": 1.9904454745415834e-05, "loss": 0.2876, "step": 2372 }, { "epoch": 0.044073473115924, "grad_norm": 0.3111267387866974, "learning_rate": 1.9904293813250803e-05, "loss": 0.4372, "step": 2374 }, { "epoch": 0.044110603253342644, "grad_norm": 0.348039448261261, "learning_rate": 1.9904132746317998e-05, "loss": 0.3744, "step": 2376 }, { "epoch": 0.04414773339076128, "grad_norm": 0.28794625401496887, "learning_rate": 1.9903971544619615e-05, "loss": 0.3805, "step": 2378 }, { "epoch": 0.04418486352817992, "grad_norm": 0.3251354992389679, "learning_rate": 1.9903810208157847e-05, "loss": 0.3764, "step": 2380 }, { "epoch": 0.04422199366559856, "grad_norm": 0.34317511320114136, "learning_rate": 1.9903648736934885e-05, "loss": 0.3482, "step": 2382 }, { "epoch": 0.04425912380301719, "grad_norm": 0.1962006539106369, "learning_rate": 1.9903487130952928e-05, "loss": 0.1744, "step": 2384 }, { "epoch": 0.044296253940435834, "grad_norm": 0.36925792694091797, "learning_rate": 1.990332539021418e-05, "loss": 0.4086, "step": 2386 }, { "epoch": 0.04433338407785447, "grad_norm": 0.3420776426792145, "learning_rate": 1.9903163514720833e-05, "loss": 0.4173, "step": 2388 }, { "epoch": 0.04437051421527311, "grad_norm": 0.2605527341365814, "learning_rate": 1.990300150447509e-05, "loss": 0.3825, "step": 2390 }, { "epoch": 0.04440764435269175, "grad_norm": 0.33080989122390747, "learning_rate": 1.990283935947917e-05, "loss": 0.4515, "step": 2392 }, { "epoch": 0.04444477449011039, "grad_norm": 0.41104617714881897, "learning_rate": 1.990267707973526e-05, "loss": 0.5782, "step": 2394 }, { "epoch": 0.044481904627529024, "grad_norm": 0.2557139992713928, "learning_rate": 1.9902514665245582e-05, "loss": 0.2922, "step": 2396 }, { "epoch": 0.04451903476494767, "grad_norm": 0.3282010853290558, "learning_rate": 1.990235211601234e-05, "loss": 0.3979, "step": 2398 }, { "epoch": 0.0445561649023663, "grad_norm": 0.30607494711875916, "learning_rate": 1.9902189432037744e-05, "loss": 0.1829, "step": 2400 }, { "epoch": 0.044593295039784944, "grad_norm": 0.40720054507255554, "learning_rate": 1.990202661332401e-05, "loss": 0.3416, "step": 2402 }, { "epoch": 0.04463042517720358, "grad_norm": 0.3787792921066284, "learning_rate": 1.9901863659873356e-05, "loss": 0.465, "step": 2404 }, { "epoch": 0.04466755531462222, "grad_norm": 0.5011675953865051, "learning_rate": 1.9901700571687997e-05, "loss": 0.3458, "step": 2406 }, { "epoch": 0.04470468545204086, "grad_norm": 0.33339226245880127, "learning_rate": 1.9901537348770153e-05, "loss": 0.3946, "step": 2408 }, { "epoch": 0.0447418155894595, "grad_norm": 0.33788949251174927, "learning_rate": 1.9901373991122042e-05, "loss": 0.2818, "step": 2410 }, { "epoch": 0.044778945726878135, "grad_norm": 0.37276116013526917, "learning_rate": 1.990121049874589e-05, "loss": 0.3801, "step": 2412 }, { "epoch": 0.04481607586429678, "grad_norm": 0.27786198258399963, "learning_rate": 1.9901046871643924e-05, "loss": 0.3944, "step": 2414 }, { "epoch": 0.04485320600171541, "grad_norm": 0.9589679837226868, "learning_rate": 1.990088310981836e-05, "loss": 0.3325, "step": 2416 }, { "epoch": 0.04489033613913405, "grad_norm": 0.30228763818740845, "learning_rate": 1.990071921327144e-05, "loss": 0.3787, "step": 2418 }, { "epoch": 0.04492746627655269, "grad_norm": 0.4011729955673218, "learning_rate": 1.9900555182005385e-05, "loss": 0.4329, "step": 2420 }, { "epoch": 0.044964596413971325, "grad_norm": 0.2549164891242981, "learning_rate": 1.990039101602243e-05, "loss": 0.2352, "step": 2422 }, { "epoch": 0.04500172655138997, "grad_norm": 0.3323017954826355, "learning_rate": 1.9900226715324807e-05, "loss": 0.505, "step": 2424 }, { "epoch": 0.0450388566888086, "grad_norm": 0.3643483817577362, "learning_rate": 1.9900062279914755e-05, "loss": 0.3692, "step": 2426 }, { "epoch": 0.045075986826227245, "grad_norm": 0.25162139534950256, "learning_rate": 1.989989770979451e-05, "loss": 0.2858, "step": 2428 }, { "epoch": 0.04511311696364588, "grad_norm": 0.24252890050411224, "learning_rate": 1.989973300496631e-05, "loss": 0.2267, "step": 2430 }, { "epoch": 0.04515024710106452, "grad_norm": 0.3379379212856293, "learning_rate": 1.9899568165432393e-05, "loss": 0.4047, "step": 2432 }, { "epoch": 0.04518737723848316, "grad_norm": 0.44211095571517944, "learning_rate": 1.989940319119501e-05, "loss": 0.4375, "step": 2434 }, { "epoch": 0.0452245073759018, "grad_norm": 0.2861168682575226, "learning_rate": 1.98992380822564e-05, "loss": 0.3605, "step": 2436 }, { "epoch": 0.045261637513320435, "grad_norm": 0.3657349944114685, "learning_rate": 1.9899072838618814e-05, "loss": 0.4211, "step": 2438 }, { "epoch": 0.04529876765073908, "grad_norm": 0.3238188922405243, "learning_rate": 1.9898907460284493e-05, "loss": 0.3364, "step": 2440 }, { "epoch": 0.04533589778815771, "grad_norm": 0.30089622735977173, "learning_rate": 1.9898741947255697e-05, "loss": 0.3663, "step": 2442 }, { "epoch": 0.045373027925576355, "grad_norm": 0.37084662914276123, "learning_rate": 1.989857629953467e-05, "loss": 0.4073, "step": 2444 }, { "epoch": 0.04541015806299499, "grad_norm": 0.36076033115386963, "learning_rate": 1.9898410517123673e-05, "loss": 0.42, "step": 2446 }, { "epoch": 0.04544728820041363, "grad_norm": 0.5651138424873352, "learning_rate": 1.9898244600024956e-05, "loss": 0.2326, "step": 2448 }, { "epoch": 0.04548441833783227, "grad_norm": 0.4224671721458435, "learning_rate": 1.989807854824078e-05, "loss": 0.4197, "step": 2450 }, { "epoch": 0.04552154847525091, "grad_norm": 0.391889750957489, "learning_rate": 1.98979123617734e-05, "loss": 0.2539, "step": 2452 }, { "epoch": 0.045558678612669545, "grad_norm": 0.4197494387626648, "learning_rate": 1.9897746040625083e-05, "loss": 0.4475, "step": 2454 }, { "epoch": 0.04559580875008818, "grad_norm": 0.2940739691257477, "learning_rate": 1.9897579584798086e-05, "loss": 0.3785, "step": 2456 }, { "epoch": 0.04563293888750682, "grad_norm": 0.20279686152935028, "learning_rate": 1.989741299429468e-05, "loss": 0.1485, "step": 2458 }, { "epoch": 0.04567006902492546, "grad_norm": 0.36077946424484253, "learning_rate": 1.989724626911713e-05, "loss": 0.3476, "step": 2460 }, { "epoch": 0.0457071991623441, "grad_norm": 0.29293885827064514, "learning_rate": 1.9897079409267705e-05, "loss": 0.2808, "step": 2462 }, { "epoch": 0.045744329299762736, "grad_norm": 0.4073811173439026, "learning_rate": 1.989691241474867e-05, "loss": 0.4207, "step": 2464 }, { "epoch": 0.04578145943718138, "grad_norm": 0.34943458437919617, "learning_rate": 1.9896745285562303e-05, "loss": 0.3376, "step": 2466 }, { "epoch": 0.04581858957460001, "grad_norm": 0.46569836139678955, "learning_rate": 1.9896578021710883e-05, "loss": 0.3053, "step": 2468 }, { "epoch": 0.045855719712018655, "grad_norm": 0.39764001965522766, "learning_rate": 1.9896410623196673e-05, "loss": 0.2863, "step": 2470 }, { "epoch": 0.04589284984943729, "grad_norm": 0.32603809237480164, "learning_rate": 1.989624309002196e-05, "loss": 0.3841, "step": 2472 }, { "epoch": 0.04592997998685593, "grad_norm": 0.36866292357444763, "learning_rate": 1.9896075422189023e-05, "loss": 0.2175, "step": 2474 }, { "epoch": 0.04596711012427457, "grad_norm": 0.3144623041152954, "learning_rate": 1.9895907619700136e-05, "loss": 0.5212, "step": 2476 }, { "epoch": 0.04600424026169321, "grad_norm": 0.27605611085891724, "learning_rate": 1.9895739682557594e-05, "loss": 0.4272, "step": 2478 }, { "epoch": 0.046041370399111846, "grad_norm": 0.427211195230484, "learning_rate": 1.9895571610763675e-05, "loss": 0.4338, "step": 2480 }, { "epoch": 0.04607850053653049, "grad_norm": 0.7452526688575745, "learning_rate": 1.9895403404320665e-05, "loss": 0.5646, "step": 2482 }, { "epoch": 0.04611563067394912, "grad_norm": 0.2777949869632721, "learning_rate": 1.9895235063230855e-05, "loss": 0.474, "step": 2484 }, { "epoch": 0.046152760811367766, "grad_norm": 0.4482880234718323, "learning_rate": 1.9895066587496535e-05, "loss": 0.3579, "step": 2486 }, { "epoch": 0.0461898909487864, "grad_norm": 0.384441077709198, "learning_rate": 1.989489797712e-05, "loss": 0.3721, "step": 2488 }, { "epoch": 0.04622702108620504, "grad_norm": 0.3040623068809509, "learning_rate": 1.9894729232103542e-05, "loss": 0.2381, "step": 2490 }, { "epoch": 0.04626415122362368, "grad_norm": 0.28086578845977783, "learning_rate": 1.9894560352449455e-05, "loss": 0.3732, "step": 2492 }, { "epoch": 0.046301281361042314, "grad_norm": 0.38532325625419617, "learning_rate": 1.989439133816004e-05, "loss": 0.5281, "step": 2494 }, { "epoch": 0.046338411498460956, "grad_norm": 0.47509413957595825, "learning_rate": 1.98942221892376e-05, "loss": 0.2426, "step": 2496 }, { "epoch": 0.04637554163587959, "grad_norm": 0.37172621488571167, "learning_rate": 1.9894052905684428e-05, "loss": 0.4302, "step": 2498 }, { "epoch": 0.046412671773298234, "grad_norm": 0.2374938279390335, "learning_rate": 1.9893883487502833e-05, "loss": 0.5086, "step": 2500 }, { "epoch": 0.04644980191071687, "grad_norm": 0.2940489649772644, "learning_rate": 1.989371393469512e-05, "loss": 0.1727, "step": 2502 }, { "epoch": 0.04648693204813551, "grad_norm": 0.3569496273994446, "learning_rate": 1.9893544247263597e-05, "loss": 0.2745, "step": 2504 }, { "epoch": 0.046524062185554146, "grad_norm": 0.4116990864276886, "learning_rate": 1.9893374425210567e-05, "loss": 0.4684, "step": 2506 }, { "epoch": 0.04656119232297279, "grad_norm": 0.41885608434677124, "learning_rate": 1.9893204468538347e-05, "loss": 0.6169, "step": 2508 }, { "epoch": 0.046598322460391424, "grad_norm": 0.596121072769165, "learning_rate": 1.989303437724925e-05, "loss": 0.3405, "step": 2510 }, { "epoch": 0.046635452597810066, "grad_norm": 0.36439183354377747, "learning_rate": 1.989286415134559e-05, "loss": 0.4145, "step": 2512 }, { "epoch": 0.0466725827352287, "grad_norm": 0.2912504971027374, "learning_rate": 1.9892693790829676e-05, "loss": 0.4611, "step": 2514 }, { "epoch": 0.046709712872647344, "grad_norm": 0.3208148181438446, "learning_rate": 1.9892523295703833e-05, "loss": 0.4708, "step": 2516 }, { "epoch": 0.04674684301006598, "grad_norm": 0.3281371295452118, "learning_rate": 1.989235266597038e-05, "loss": 0.3544, "step": 2518 }, { "epoch": 0.04678397314748462, "grad_norm": 0.4005473554134369, "learning_rate": 1.9892181901631638e-05, "loss": 0.379, "step": 2520 }, { "epoch": 0.04682110328490326, "grad_norm": 0.3310092091560364, "learning_rate": 1.989201100268993e-05, "loss": 0.4445, "step": 2522 }, { "epoch": 0.0468582334223219, "grad_norm": 0.6983801126480103, "learning_rate": 1.9891839969147585e-05, "loss": 0.3418, "step": 2524 }, { "epoch": 0.046895363559740534, "grad_norm": 0.34990838170051575, "learning_rate": 1.9891668801006926e-05, "loss": 0.278, "step": 2526 }, { "epoch": 0.046932493697159176, "grad_norm": 0.26808440685272217, "learning_rate": 1.9891497498270285e-05, "loss": 0.2691, "step": 2528 }, { "epoch": 0.04696962383457781, "grad_norm": 0.3087778687477112, "learning_rate": 1.9891326060939987e-05, "loss": 0.4235, "step": 2530 }, { "epoch": 0.04700675397199645, "grad_norm": 0.22401942312717438, "learning_rate": 1.9891154489018376e-05, "loss": 0.3899, "step": 2532 }, { "epoch": 0.04704388410941509, "grad_norm": 0.4075601100921631, "learning_rate": 1.9890982782507774e-05, "loss": 0.2954, "step": 2534 }, { "epoch": 0.047081014246833724, "grad_norm": 0.47288867831230164, "learning_rate": 1.9890810941410524e-05, "loss": 0.2121, "step": 2536 }, { "epoch": 0.04711814438425237, "grad_norm": 0.3600998818874359, "learning_rate": 1.9890638965728968e-05, "loss": 0.1265, "step": 2538 }, { "epoch": 0.047155274521671, "grad_norm": 0.29128292202949524, "learning_rate": 1.9890466855465437e-05, "loss": 0.5616, "step": 2540 }, { "epoch": 0.047192404659089644, "grad_norm": 0.343300461769104, "learning_rate": 1.989029461062228e-05, "loss": 0.3112, "step": 2542 }, { "epoch": 0.04722953479650828, "grad_norm": 0.38288718461990356, "learning_rate": 1.9890122231201835e-05, "loss": 0.3432, "step": 2544 }, { "epoch": 0.04726666493392692, "grad_norm": 0.2736768126487732, "learning_rate": 1.9889949717206457e-05, "loss": 0.3483, "step": 2546 }, { "epoch": 0.04730379507134556, "grad_norm": 0.5975361466407776, "learning_rate": 1.9889777068638482e-05, "loss": 0.5866, "step": 2548 }, { "epoch": 0.0473409252087642, "grad_norm": 0.24607698619365692, "learning_rate": 1.9889604285500266e-05, "loss": 0.2626, "step": 2550 }, { "epoch": 0.047378055346182835, "grad_norm": 0.429671049118042, "learning_rate": 1.9889431367794158e-05, "loss": 0.373, "step": 2552 }, { "epoch": 0.04741518548360148, "grad_norm": 0.3735169768333435, "learning_rate": 1.9889258315522512e-05, "loss": 0.3311, "step": 2554 }, { "epoch": 0.04745231562102011, "grad_norm": 0.3237277567386627, "learning_rate": 1.988908512868768e-05, "loss": 0.5051, "step": 2556 }, { "epoch": 0.047489445758438754, "grad_norm": 0.3530696630477905, "learning_rate": 1.988891180729202e-05, "loss": 0.3225, "step": 2558 }, { "epoch": 0.04752657589585739, "grad_norm": 0.35617583990097046, "learning_rate": 1.9888738351337898e-05, "loss": 0.2787, "step": 2560 }, { "epoch": 0.04756370603327603, "grad_norm": 0.450501024723053, "learning_rate": 1.988856476082766e-05, "loss": 0.391, "step": 2562 }, { "epoch": 0.04760083617069467, "grad_norm": 0.27211397886276245, "learning_rate": 1.988839103576368e-05, "loss": 0.3339, "step": 2564 }, { "epoch": 0.04763796630811331, "grad_norm": 0.32609447836875916, "learning_rate": 1.9888217176148315e-05, "loss": 0.3408, "step": 2566 }, { "epoch": 0.047675096445531945, "grad_norm": 0.4512081742286682, "learning_rate": 1.9888043181983932e-05, "loss": 0.2967, "step": 2568 }, { "epoch": 0.04771222658295058, "grad_norm": 0.5087811946868896, "learning_rate": 1.98878690532729e-05, "loss": 0.5014, "step": 2570 }, { "epoch": 0.04774935672036922, "grad_norm": 0.36853674054145813, "learning_rate": 1.9887694790017587e-05, "loss": 0.4413, "step": 2572 }, { "epoch": 0.04778648685778786, "grad_norm": 0.3287915885448456, "learning_rate": 1.988752039222037e-05, "loss": 0.4166, "step": 2574 }, { "epoch": 0.0478236169952065, "grad_norm": 0.30874282121658325, "learning_rate": 1.988734585988361e-05, "loss": 0.369, "step": 2576 }, { "epoch": 0.047860747132625135, "grad_norm": 0.4606678783893585, "learning_rate": 1.988717119300969e-05, "loss": 0.3383, "step": 2578 }, { "epoch": 0.04789787727004378, "grad_norm": 0.2779238522052765, "learning_rate": 1.9886996391600987e-05, "loss": 0.5059, "step": 2580 }, { "epoch": 0.04793500740746241, "grad_norm": 0.30820155143737793, "learning_rate": 1.988682145565988e-05, "loss": 0.3367, "step": 2582 }, { "epoch": 0.047972137544881055, "grad_norm": 0.6014087796211243, "learning_rate": 1.9886646385188747e-05, "loss": 0.4306, "step": 2584 }, { "epoch": 0.04800926768229969, "grad_norm": 0.3724692761898041, "learning_rate": 1.9886471180189967e-05, "loss": 0.3303, "step": 2586 }, { "epoch": 0.04804639781971833, "grad_norm": 0.5611693263053894, "learning_rate": 1.9886295840665932e-05, "loss": 0.4383, "step": 2588 }, { "epoch": 0.04808352795713697, "grad_norm": 0.3220396339893341, "learning_rate": 1.988612036661902e-05, "loss": 0.3205, "step": 2590 }, { "epoch": 0.04812065809455561, "grad_norm": 0.31981250643730164, "learning_rate": 1.9885944758051624e-05, "loss": 0.4138, "step": 2592 }, { "epoch": 0.048157788231974245, "grad_norm": 0.3703214228153229, "learning_rate": 1.988576901496613e-05, "loss": 0.4337, "step": 2594 }, { "epoch": 0.04819491836939289, "grad_norm": 0.5945144891738892, "learning_rate": 1.9885593137364935e-05, "loss": 0.3245, "step": 2596 }, { "epoch": 0.04823204850681152, "grad_norm": 0.5193402767181396, "learning_rate": 1.9885417125250427e-05, "loss": 0.3577, "step": 2598 }, { "epoch": 0.048269178644230165, "grad_norm": 0.3410298228263855, "learning_rate": 1.9885240978625e-05, "loss": 0.2847, "step": 2600 }, { "epoch": 0.0483063087816488, "grad_norm": 0.3336454927921295, "learning_rate": 1.9885064697491054e-05, "loss": 0.3829, "step": 2602 }, { "epoch": 0.04834343891906744, "grad_norm": 0.306267112493515, "learning_rate": 1.9884888281850986e-05, "loss": 0.4026, "step": 2604 }, { "epoch": 0.04838056905648608, "grad_norm": 0.3643156588077545, "learning_rate": 1.98847117317072e-05, "loss": 0.3708, "step": 2606 }, { "epoch": 0.04841769919390471, "grad_norm": 0.3960159122943878, "learning_rate": 1.9884535047062094e-05, "loss": 0.2696, "step": 2608 }, { "epoch": 0.048454829331323356, "grad_norm": 0.29586654901504517, "learning_rate": 1.988435822791807e-05, "loss": 0.519, "step": 2610 }, { "epoch": 0.04849195946874199, "grad_norm": 0.44784021377563477, "learning_rate": 1.9884181274277542e-05, "loss": 0.5057, "step": 2612 }, { "epoch": 0.04852908960616063, "grad_norm": 0.31597235798835754, "learning_rate": 1.9884004186142913e-05, "loss": 0.2511, "step": 2614 }, { "epoch": 0.04856621974357927, "grad_norm": 0.23702697455883026, "learning_rate": 1.988382696351659e-05, "loss": 0.4544, "step": 2616 }, { "epoch": 0.04860334988099791, "grad_norm": 0.4404929280281067, "learning_rate": 1.9883649606400988e-05, "loss": 0.2467, "step": 2618 }, { "epoch": 0.048640480018416546, "grad_norm": 0.3470233082771301, "learning_rate": 1.9883472114798525e-05, "loss": 0.5055, "step": 2620 }, { "epoch": 0.04867761015583519, "grad_norm": 0.39667776226997375, "learning_rate": 1.9883294488711607e-05, "loss": 0.2259, "step": 2622 }, { "epoch": 0.04871474029325382, "grad_norm": 0.4350927770137787, "learning_rate": 1.9883116728142654e-05, "loss": 0.3607, "step": 2624 }, { "epoch": 0.048751870430672466, "grad_norm": 0.22223787009716034, "learning_rate": 1.9882938833094085e-05, "loss": 0.3478, "step": 2626 }, { "epoch": 0.0487890005680911, "grad_norm": 0.5158822536468506, "learning_rate": 1.9882760803568325e-05, "loss": 0.4291, "step": 2628 }, { "epoch": 0.04882613070550974, "grad_norm": 0.4217866063117981, "learning_rate": 1.9882582639567788e-05, "loss": 0.4478, "step": 2630 }, { "epoch": 0.04886326084292838, "grad_norm": 0.2986488938331604, "learning_rate": 1.988240434109491e-05, "loss": 0.3516, "step": 2632 }, { "epoch": 0.04890039098034702, "grad_norm": 0.272651731967926, "learning_rate": 1.9882225908152103e-05, "loss": 0.3126, "step": 2634 }, { "epoch": 0.048937521117765656, "grad_norm": 0.4954606890678406, "learning_rate": 1.9882047340741807e-05, "loss": 0.2777, "step": 2636 }, { "epoch": 0.0489746512551843, "grad_norm": 0.4016689956188202, "learning_rate": 1.9881868638866445e-05, "loss": 0.3718, "step": 2638 }, { "epoch": 0.049011781392602934, "grad_norm": 0.30067822337150574, "learning_rate": 1.988168980252845e-05, "loss": 0.4482, "step": 2640 }, { "epoch": 0.049048911530021576, "grad_norm": 0.5616180896759033, "learning_rate": 1.9881510831730257e-05, "loss": 0.3068, "step": 2642 }, { "epoch": 0.04908604166744021, "grad_norm": 0.3896440863609314, "learning_rate": 1.9881331726474298e-05, "loss": 0.4718, "step": 2644 }, { "epoch": 0.049123171804858846, "grad_norm": 0.30395743250846863, "learning_rate": 1.9881152486763015e-05, "loss": 0.2977, "step": 2646 }, { "epoch": 0.04916030194227749, "grad_norm": 0.29402920603752136, "learning_rate": 1.9880973112598842e-05, "loss": 0.3432, "step": 2648 }, { "epoch": 0.049197432079696124, "grad_norm": 0.38253194093704224, "learning_rate": 1.9880793603984224e-05, "loss": 0.36, "step": 2650 }, { "epoch": 0.049234562217114766, "grad_norm": 0.3870534896850586, "learning_rate": 1.98806139609216e-05, "loss": 0.4471, "step": 2652 }, { "epoch": 0.0492716923545334, "grad_norm": 0.46986493468284607, "learning_rate": 1.9880434183413414e-05, "loss": 0.3136, "step": 2654 }, { "epoch": 0.049308822491952044, "grad_norm": 0.33612990379333496, "learning_rate": 1.9880254271462117e-05, "loss": 0.2595, "step": 2656 }, { "epoch": 0.04934595262937068, "grad_norm": 0.32249048352241516, "learning_rate": 1.9880074225070154e-05, "loss": 0.3207, "step": 2658 }, { "epoch": 0.04938308276678932, "grad_norm": 0.24167020618915558, "learning_rate": 1.9879894044239975e-05, "loss": 0.2321, "step": 2660 }, { "epoch": 0.04942021290420796, "grad_norm": 0.24824786186218262, "learning_rate": 1.9879713728974028e-05, "loss": 0.3656, "step": 2662 }, { "epoch": 0.0494573430416266, "grad_norm": 0.3819003701210022, "learning_rate": 1.9879533279274774e-05, "loss": 0.4395, "step": 2664 }, { "epoch": 0.049494473179045234, "grad_norm": 0.29306739568710327, "learning_rate": 1.9879352695144666e-05, "loss": 0.5028, "step": 2666 }, { "epoch": 0.049531603316463876, "grad_norm": 0.40407779812812805, "learning_rate": 1.9879171976586157e-05, "loss": 0.3873, "step": 2668 }, { "epoch": 0.04956873345388251, "grad_norm": 0.3330526351928711, "learning_rate": 1.987899112360171e-05, "loss": 0.3294, "step": 2670 }, { "epoch": 0.049605863591301154, "grad_norm": 0.34792229533195496, "learning_rate": 1.9878810136193785e-05, "loss": 0.3261, "step": 2672 }, { "epoch": 0.04964299372871979, "grad_norm": 0.458290159702301, "learning_rate": 1.9878629014364844e-05, "loss": 0.4611, "step": 2674 }, { "epoch": 0.04968012386613843, "grad_norm": 0.31894224882125854, "learning_rate": 1.9878447758117352e-05, "loss": 0.4871, "step": 2676 }, { "epoch": 0.04971725400355707, "grad_norm": 0.3224238455295563, "learning_rate": 1.9878266367453775e-05, "loss": 0.38, "step": 2678 }, { "epoch": 0.04975438414097571, "grad_norm": 0.3004835844039917, "learning_rate": 1.9878084842376585e-05, "loss": 0.2483, "step": 2680 }, { "epoch": 0.049791514278394344, "grad_norm": 0.4765733778476715, "learning_rate": 1.9877903182888243e-05, "loss": 0.409, "step": 2682 }, { "epoch": 0.04982864441581298, "grad_norm": 0.37976738810539246, "learning_rate": 1.987772138899123e-05, "loss": 0.3213, "step": 2684 }, { "epoch": 0.04986577455323162, "grad_norm": 0.4271876811981201, "learning_rate": 1.9877539460688017e-05, "loss": 0.3077, "step": 2686 }, { "epoch": 0.04990290469065026, "grad_norm": 0.5363243818283081, "learning_rate": 1.9877357397981076e-05, "loss": 0.5745, "step": 2688 }, { "epoch": 0.0499400348280689, "grad_norm": 0.40662485361099243, "learning_rate": 1.9877175200872886e-05, "loss": 0.2489, "step": 2690 }, { "epoch": 0.049977164965487535, "grad_norm": 0.6903659105300903, "learning_rate": 1.987699286936593e-05, "loss": 0.3617, "step": 2692 }, { "epoch": 0.05001429510290618, "grad_norm": 0.396749883890152, "learning_rate": 1.9876810403462684e-05, "loss": 0.2711, "step": 2694 }, { "epoch": 0.05005142524032481, "grad_norm": 0.3562004268169403, "learning_rate": 1.9876627803165632e-05, "loss": 0.5901, "step": 2696 }, { "epoch": 0.050088555377743454, "grad_norm": 0.29518672823905945, "learning_rate": 1.9876445068477265e-05, "loss": 0.4468, "step": 2698 }, { "epoch": 0.05012568551516209, "grad_norm": 0.3475431203842163, "learning_rate": 1.987626219940006e-05, "loss": 0.3713, "step": 2700 }, { "epoch": 0.05016281565258073, "grad_norm": 0.35791468620300293, "learning_rate": 1.987607919593651e-05, "loss": 0.5917, "step": 2702 }, { "epoch": 0.05019994578999937, "grad_norm": 0.3438893258571625, "learning_rate": 1.9875896058089102e-05, "loss": 0.3733, "step": 2704 }, { "epoch": 0.05023707592741801, "grad_norm": 0.3380543887615204, "learning_rate": 1.9875712785860336e-05, "loss": 0.4842, "step": 2706 }, { "epoch": 0.050274206064836645, "grad_norm": 0.33952251076698303, "learning_rate": 1.9875529379252696e-05, "loss": 0.3311, "step": 2708 }, { "epoch": 0.05031133620225529, "grad_norm": 0.31405842304229736, "learning_rate": 1.987534583826868e-05, "loss": 0.3296, "step": 2710 }, { "epoch": 0.05034846633967392, "grad_norm": 0.30055299401283264, "learning_rate": 1.987516216291079e-05, "loss": 0.2863, "step": 2712 }, { "epoch": 0.050385596477092565, "grad_norm": 0.5926289558410645, "learning_rate": 1.9874978353181526e-05, "loss": 0.566, "step": 2714 }, { "epoch": 0.0504227266145112, "grad_norm": 0.33233633637428284, "learning_rate": 1.987479440908338e-05, "loss": 0.1768, "step": 2716 }, { "epoch": 0.05045985675192984, "grad_norm": 0.2553347945213318, "learning_rate": 1.987461033061886e-05, "loss": 0.4093, "step": 2718 }, { "epoch": 0.05049698688934848, "grad_norm": 0.29046761989593506, "learning_rate": 1.9874426117790474e-05, "loss": 0.3383, "step": 2720 }, { "epoch": 0.05053411702676711, "grad_norm": 0.33573877811431885, "learning_rate": 1.9874241770600725e-05, "loss": 0.5166, "step": 2722 }, { "epoch": 0.050571247164185755, "grad_norm": 0.3779090344905853, "learning_rate": 1.9874057289052123e-05, "loss": 0.3437, "step": 2724 }, { "epoch": 0.05060837730160439, "grad_norm": 0.509864091873169, "learning_rate": 1.987387267314718e-05, "loss": 0.3331, "step": 2726 }, { "epoch": 0.05064550743902303, "grad_norm": 0.30726784467697144, "learning_rate": 1.98736879228884e-05, "loss": 0.2722, "step": 2728 }, { "epoch": 0.05068263757644167, "grad_norm": 0.31946155428886414, "learning_rate": 1.9873503038278307e-05, "loss": 0.4447, "step": 2730 }, { "epoch": 0.05071976771386031, "grad_norm": 0.5914859771728516, "learning_rate": 1.987331801931941e-05, "loss": 0.4449, "step": 2732 }, { "epoch": 0.050756897851278945, "grad_norm": 0.3914320766925812, "learning_rate": 1.987313286601423e-05, "loss": 0.1657, "step": 2734 }, { "epoch": 0.05079402798869759, "grad_norm": 0.44104456901550293, "learning_rate": 1.9872947578365283e-05, "loss": 0.3239, "step": 2736 }, { "epoch": 0.05083115812611622, "grad_norm": 0.35505062341690063, "learning_rate": 1.9872762156375093e-05, "loss": 0.4602, "step": 2738 }, { "epoch": 0.050868288263534865, "grad_norm": 0.2532171607017517, "learning_rate": 1.9872576600046184e-05, "loss": 0.3587, "step": 2740 }, { "epoch": 0.0509054184009535, "grad_norm": 0.3133249878883362, "learning_rate": 1.987239090938108e-05, "loss": 0.3708, "step": 2742 }, { "epoch": 0.05094254853837214, "grad_norm": 0.3827840983867645, "learning_rate": 1.9872205084382306e-05, "loss": 0.457, "step": 2744 }, { "epoch": 0.05097967867579078, "grad_norm": 0.27355948090553284, "learning_rate": 1.9872019125052388e-05, "loss": 0.1705, "step": 2746 }, { "epoch": 0.05101680881320942, "grad_norm": 0.3710668683052063, "learning_rate": 1.9871833031393865e-05, "loss": 0.3192, "step": 2748 }, { "epoch": 0.051053938950628056, "grad_norm": 0.4130370020866394, "learning_rate": 1.987164680340926e-05, "loss": 0.2568, "step": 2750 }, { "epoch": 0.0510910690880467, "grad_norm": 0.24248231947422028, "learning_rate": 1.9871460441101113e-05, "loss": 0.1913, "step": 2752 }, { "epoch": 0.05112819922546533, "grad_norm": 0.39129525423049927, "learning_rate": 1.9871273944471962e-05, "loss": 0.3885, "step": 2754 }, { "epoch": 0.051165329362883975, "grad_norm": 0.3475819230079651, "learning_rate": 1.9871087313524334e-05, "loss": 0.3656, "step": 2756 }, { "epoch": 0.05120245950030261, "grad_norm": 0.3614087998867035, "learning_rate": 1.9870900548260777e-05, "loss": 0.337, "step": 2758 }, { "epoch": 0.051239589637721246, "grad_norm": 0.3031628727912903, "learning_rate": 1.987071364868383e-05, "loss": 0.388, "step": 2760 }, { "epoch": 0.05127671977513989, "grad_norm": 0.31338509917259216, "learning_rate": 1.9870526614796036e-05, "loss": 0.3311, "step": 2762 }, { "epoch": 0.05131384991255852, "grad_norm": 0.47206684947013855, "learning_rate": 1.9870339446599943e-05, "loss": 0.3052, "step": 2764 }, { "epoch": 0.051350980049977166, "grad_norm": 0.2726995050907135, "learning_rate": 1.9870152144098092e-05, "loss": 0.3617, "step": 2766 }, { "epoch": 0.0513881101873958, "grad_norm": 0.26517996191978455, "learning_rate": 1.9869964707293036e-05, "loss": 0.4083, "step": 2768 }, { "epoch": 0.05142524032481444, "grad_norm": 0.4201814830303192, "learning_rate": 1.9869777136187324e-05, "loss": 0.5227, "step": 2770 }, { "epoch": 0.05146237046223308, "grad_norm": 0.34274962544441223, "learning_rate": 1.9869589430783512e-05, "loss": 0.4098, "step": 2772 }, { "epoch": 0.05149950059965172, "grad_norm": 0.3870795965194702, "learning_rate": 1.9869401591084147e-05, "loss": 0.4049, "step": 2774 }, { "epoch": 0.051536630737070356, "grad_norm": 0.3675890862941742, "learning_rate": 1.9869213617091788e-05, "loss": 0.4725, "step": 2776 }, { "epoch": 0.051573760874489, "grad_norm": 0.4682360291481018, "learning_rate": 1.9869025508808996e-05, "loss": 0.408, "step": 2778 }, { "epoch": 0.051610891011907634, "grad_norm": 0.32862234115600586, "learning_rate": 1.9868837266238325e-05, "loss": 0.3602, "step": 2780 }, { "epoch": 0.051648021149326276, "grad_norm": 0.36561235785484314, "learning_rate": 1.9868648889382344e-05, "loss": 0.2912, "step": 2782 }, { "epoch": 0.05168515128674491, "grad_norm": 0.3661783039569855, "learning_rate": 1.9868460378243608e-05, "loss": 0.3873, "step": 2784 }, { "epoch": 0.05172228142416355, "grad_norm": 0.3470335304737091, "learning_rate": 1.9868271732824688e-05, "loss": 0.4364, "step": 2786 }, { "epoch": 0.05175941156158219, "grad_norm": 0.26784074306488037, "learning_rate": 1.986808295312815e-05, "loss": 0.3562, "step": 2788 }, { "epoch": 0.05179654169900083, "grad_norm": 0.322917103767395, "learning_rate": 1.9867894039156555e-05, "loss": 0.3666, "step": 2790 }, { "epoch": 0.051833671836419466, "grad_norm": 0.3684297800064087, "learning_rate": 1.9867704990912485e-05, "loss": 0.4611, "step": 2792 }, { "epoch": 0.05187080197383811, "grad_norm": 0.3939528465270996, "learning_rate": 1.9867515808398504e-05, "loss": 0.3503, "step": 2794 }, { "epoch": 0.051907932111256744, "grad_norm": 0.332139253616333, "learning_rate": 1.986732649161719e-05, "loss": 0.2579, "step": 2796 }, { "epoch": 0.05194506224867538, "grad_norm": 0.4107329547405243, "learning_rate": 1.9867137040571123e-05, "loss": 0.3221, "step": 2798 }, { "epoch": 0.05198219238609402, "grad_norm": 0.300315260887146, "learning_rate": 1.9866947455262872e-05, "loss": 0.413, "step": 2800 }, { "epoch": 0.05201932252351266, "grad_norm": 0.3489567041397095, "learning_rate": 1.986675773569502e-05, "loss": 0.4896, "step": 2802 }, { "epoch": 0.0520564526609313, "grad_norm": 0.2821277678012848, "learning_rate": 1.9866567881870152e-05, "loss": 0.4117, "step": 2804 }, { "epoch": 0.052093582798349934, "grad_norm": 0.3023563623428345, "learning_rate": 1.986637789379085e-05, "loss": 0.291, "step": 2806 }, { "epoch": 0.052130712935768576, "grad_norm": 0.5156275629997253, "learning_rate": 1.9866187771459696e-05, "loss": 0.3509, "step": 2808 }, { "epoch": 0.05216784307318721, "grad_norm": 0.41604647040367126, "learning_rate": 1.986599751487928e-05, "loss": 0.36, "step": 2810 }, { "epoch": 0.052204973210605854, "grad_norm": 0.2888268232345581, "learning_rate": 1.986580712405219e-05, "loss": 0.321, "step": 2812 }, { "epoch": 0.05224210334802449, "grad_norm": 0.4198257625102997, "learning_rate": 1.9865616598981018e-05, "loss": 0.3661, "step": 2814 }, { "epoch": 0.05227923348544313, "grad_norm": 0.44610345363616943, "learning_rate": 1.9865425939668353e-05, "loss": 0.4834, "step": 2816 }, { "epoch": 0.05231636362286177, "grad_norm": 0.39528095722198486, "learning_rate": 1.986523514611679e-05, "loss": 0.489, "step": 2818 }, { "epoch": 0.05235349376028041, "grad_norm": 0.3856521248817444, "learning_rate": 1.986504421832893e-05, "loss": 0.3758, "step": 2820 }, { "epoch": 0.052390623897699044, "grad_norm": 0.40536215901374817, "learning_rate": 1.9864853156307365e-05, "loss": 0.2779, "step": 2822 }, { "epoch": 0.05242775403511769, "grad_norm": 0.339359849691391, "learning_rate": 1.9864661960054698e-05, "loss": 0.2139, "step": 2824 }, { "epoch": 0.05246488417253632, "grad_norm": 0.31417736411094666, "learning_rate": 1.986447062957353e-05, "loss": 0.3379, "step": 2826 }, { "epoch": 0.052502014309954964, "grad_norm": 0.3061143159866333, "learning_rate": 1.9864279164866464e-05, "loss": 0.4347, "step": 2828 }, { "epoch": 0.0525391444473736, "grad_norm": 0.3430987298488617, "learning_rate": 1.9864087565936105e-05, "loss": 0.5254, "step": 2830 }, { "epoch": 0.05257627458479224, "grad_norm": 0.29536962509155273, "learning_rate": 1.986389583278506e-05, "loss": 0.4528, "step": 2832 }, { "epoch": 0.05261340472221088, "grad_norm": 0.3575668931007385, "learning_rate": 1.9863703965415938e-05, "loss": 0.3813, "step": 2834 }, { "epoch": 0.05265053485962951, "grad_norm": 0.3959839642047882, "learning_rate": 1.986351196383135e-05, "loss": 0.4202, "step": 2836 }, { "epoch": 0.052687664997048154, "grad_norm": 0.2617895007133484, "learning_rate": 1.9863319828033916e-05, "loss": 0.5305, "step": 2838 }, { "epoch": 0.05272479513446679, "grad_norm": 0.26240354776382446, "learning_rate": 1.9863127558026238e-05, "loss": 0.5136, "step": 2840 }, { "epoch": 0.05276192527188543, "grad_norm": 0.3180220127105713, "learning_rate": 1.9862935153810932e-05, "loss": 0.3635, "step": 2842 }, { "epoch": 0.05279905540930407, "grad_norm": 0.4007718563079834, "learning_rate": 1.986274261539063e-05, "loss": 0.264, "step": 2844 }, { "epoch": 0.05283618554672271, "grad_norm": 0.4117051064968109, "learning_rate": 1.986254994276794e-05, "loss": 0.3939, "step": 2846 }, { "epoch": 0.052873315684141345, "grad_norm": 0.29042381048202515, "learning_rate": 1.986235713594549e-05, "loss": 0.4917, "step": 2848 }, { "epoch": 0.05291044582155999, "grad_norm": 0.33027327060699463, "learning_rate": 1.9862164194925896e-05, "loss": 0.1821, "step": 2850 }, { "epoch": 0.05294757595897862, "grad_norm": 0.36862608790397644, "learning_rate": 1.9861971119711788e-05, "loss": 0.2645, "step": 2852 }, { "epoch": 0.052984706096397265, "grad_norm": 0.36101314425468445, "learning_rate": 1.9861777910305794e-05, "loss": 0.3313, "step": 2854 }, { "epoch": 0.0530218362338159, "grad_norm": 0.39052248001098633, "learning_rate": 1.9861584566710544e-05, "loss": 0.4555, "step": 2856 }, { "epoch": 0.05305896637123454, "grad_norm": 0.3746533691883087, "learning_rate": 1.9861391088928666e-05, "loss": 0.413, "step": 2858 }, { "epoch": 0.05309609650865318, "grad_norm": 0.3168638050556183, "learning_rate": 1.9861197476962793e-05, "loss": 0.5128, "step": 2860 }, { "epoch": 0.05313322664607182, "grad_norm": 0.36404016613960266, "learning_rate": 1.986100373081556e-05, "loss": 0.3457, "step": 2862 }, { "epoch": 0.053170356783490455, "grad_norm": 0.3756319284439087, "learning_rate": 1.9860809850489603e-05, "loss": 0.4069, "step": 2864 }, { "epoch": 0.0532074869209091, "grad_norm": 0.3924770653247833, "learning_rate": 1.986061583598756e-05, "loss": 0.4218, "step": 2866 }, { "epoch": 0.05324461705832773, "grad_norm": 0.1839403510093689, "learning_rate": 1.9860421687312072e-05, "loss": 0.364, "step": 2868 }, { "epoch": 0.053281747195746375, "grad_norm": 0.37121400237083435, "learning_rate": 1.986022740446578e-05, "loss": 0.3547, "step": 2870 }, { "epoch": 0.05331887733316501, "grad_norm": 0.3728128969669342, "learning_rate": 1.986003298745133e-05, "loss": 0.3992, "step": 2872 }, { "epoch": 0.053356007470583645, "grad_norm": 0.34892934560775757, "learning_rate": 1.9859838436271363e-05, "loss": 0.417, "step": 2874 }, { "epoch": 0.05339313760800229, "grad_norm": 0.33326664566993713, "learning_rate": 1.985964375092853e-05, "loss": 0.2545, "step": 2876 }, { "epoch": 0.05343026774542092, "grad_norm": 0.5060580372810364, "learning_rate": 1.9859448931425475e-05, "loss": 0.4769, "step": 2878 }, { "epoch": 0.053467397882839565, "grad_norm": 0.4234153628349304, "learning_rate": 1.9859253977764855e-05, "loss": 0.7388, "step": 2880 }, { "epoch": 0.0535045280202582, "grad_norm": 0.4180959463119507, "learning_rate": 1.9859058889949324e-05, "loss": 0.5245, "step": 2882 }, { "epoch": 0.05354165815767684, "grad_norm": 0.30391424894332886, "learning_rate": 1.985886366798153e-05, "loss": 0.3338, "step": 2884 }, { "epoch": 0.05357878829509548, "grad_norm": 0.3675253987312317, "learning_rate": 1.985866831186413e-05, "loss": 0.2802, "step": 2886 }, { "epoch": 0.05361591843251412, "grad_norm": 0.35947632789611816, "learning_rate": 1.9858472821599787e-05, "loss": 0.5354, "step": 2888 }, { "epoch": 0.053653048569932756, "grad_norm": 0.32267966866493225, "learning_rate": 1.9858277197191157e-05, "loss": 0.3534, "step": 2890 }, { "epoch": 0.0536901787073514, "grad_norm": 0.3630273640155792, "learning_rate": 1.9858081438640904e-05, "loss": 0.3977, "step": 2892 }, { "epoch": 0.05372730884477003, "grad_norm": 0.3030064105987549, "learning_rate": 1.985788554595169e-05, "loss": 0.3888, "step": 2894 }, { "epoch": 0.053764438982188675, "grad_norm": 0.42525264620780945, "learning_rate": 1.9857689519126183e-05, "loss": 0.4263, "step": 2896 }, { "epoch": 0.05380156911960731, "grad_norm": 0.31737402081489563, "learning_rate": 1.9857493358167047e-05, "loss": 0.5581, "step": 2898 }, { "epoch": 0.05383869925702595, "grad_norm": 0.2848888635635376, "learning_rate": 1.9857297063076953e-05, "loss": 0.275, "step": 2900 }, { "epoch": 0.05387582939444459, "grad_norm": 0.27533063292503357, "learning_rate": 1.9857100633858576e-05, "loss": 0.4121, "step": 2902 }, { "epoch": 0.05391295953186323, "grad_norm": 0.37541788816452026, "learning_rate": 1.985690407051458e-05, "loss": 0.2733, "step": 2904 }, { "epoch": 0.053950089669281866, "grad_norm": 0.30114927887916565, "learning_rate": 1.9856707373047647e-05, "loss": 0.283, "step": 2906 }, { "epoch": 0.05398721980670051, "grad_norm": 0.46774184703826904, "learning_rate": 1.985651054146045e-05, "loss": 0.3731, "step": 2908 }, { "epoch": 0.05402434994411914, "grad_norm": 0.2963886559009552, "learning_rate": 1.9856313575755667e-05, "loss": 0.5117, "step": 2910 }, { "epoch": 0.05406148008153778, "grad_norm": 0.3226279020309448, "learning_rate": 1.9856116475935983e-05, "loss": 0.1939, "step": 2912 }, { "epoch": 0.05409861021895642, "grad_norm": 0.40098875761032104, "learning_rate": 1.9855919242004075e-05, "loss": 0.1663, "step": 2914 }, { "epoch": 0.054135740356375056, "grad_norm": 0.4085533022880554, "learning_rate": 1.9855721873962626e-05, "loss": 0.3628, "step": 2916 }, { "epoch": 0.0541728704937937, "grad_norm": 0.2947387993335724, "learning_rate": 1.9855524371814323e-05, "loss": 0.4139, "step": 2918 }, { "epoch": 0.054210000631212334, "grad_norm": 0.467885822057724, "learning_rate": 1.9855326735561857e-05, "loss": 0.4517, "step": 2920 }, { "epoch": 0.054247130768630976, "grad_norm": 0.341871976852417, "learning_rate": 1.9855128965207913e-05, "loss": 0.5479, "step": 2922 }, { "epoch": 0.05428426090604961, "grad_norm": 0.2603185176849365, "learning_rate": 1.9854931060755183e-05, "loss": 0.5239, "step": 2924 }, { "epoch": 0.05432139104346825, "grad_norm": 0.40321770310401917, "learning_rate": 1.985473302220636e-05, "loss": 0.2412, "step": 2926 }, { "epoch": 0.05435852118088689, "grad_norm": 0.4545021057128906, "learning_rate": 1.985453484956414e-05, "loss": 0.2157, "step": 2928 }, { "epoch": 0.05439565131830553, "grad_norm": 0.4130193889141083, "learning_rate": 1.9854336542831218e-05, "loss": 0.6308, "step": 2930 }, { "epoch": 0.054432781455724166, "grad_norm": 0.30755752325057983, "learning_rate": 1.9854138102010293e-05, "loss": 0.2082, "step": 2932 }, { "epoch": 0.05446991159314281, "grad_norm": 0.6711914539337158, "learning_rate": 1.9853939527104066e-05, "loss": 0.387, "step": 2934 }, { "epoch": 0.054507041730561444, "grad_norm": 0.24945802986621857, "learning_rate": 1.985374081811523e-05, "loss": 0.3164, "step": 2936 }, { "epoch": 0.054544171867980086, "grad_norm": 0.3281954228878021, "learning_rate": 1.9853541975046505e-05, "loss": 0.3061, "step": 2938 }, { "epoch": 0.05458130200539872, "grad_norm": 0.3453971743583679, "learning_rate": 1.9853342997900588e-05, "loss": 0.4193, "step": 2940 }, { "epoch": 0.054618432142817364, "grad_norm": 0.3509087860584259, "learning_rate": 1.9853143886680187e-05, "loss": 0.2724, "step": 2942 }, { "epoch": 0.054655562280236, "grad_norm": 0.4127955436706543, "learning_rate": 1.985294464138801e-05, "loss": 0.3478, "step": 2944 }, { "epoch": 0.05469269241765464, "grad_norm": 0.2635808289051056, "learning_rate": 1.9852745262026773e-05, "loss": 0.3018, "step": 2946 }, { "epoch": 0.054729822555073276, "grad_norm": 0.39689818024635315, "learning_rate": 1.985254574859918e-05, "loss": 0.4207, "step": 2948 }, { "epoch": 0.05476695269249191, "grad_norm": 0.3357161283493042, "learning_rate": 1.985234610110795e-05, "loss": 0.4418, "step": 2950 }, { "epoch": 0.054804082829910554, "grad_norm": 0.25282159447669983, "learning_rate": 1.9852146319555805e-05, "loss": 0.4123, "step": 2952 }, { "epoch": 0.05484121296732919, "grad_norm": 0.5032823085784912, "learning_rate": 1.985194640394546e-05, "loss": 0.1766, "step": 2954 }, { "epoch": 0.05487834310474783, "grad_norm": 0.32633665204048157, "learning_rate": 1.9851746354279632e-05, "loss": 0.194, "step": 2956 }, { "epoch": 0.05491547324216647, "grad_norm": 0.3969549834728241, "learning_rate": 1.985154617056105e-05, "loss": 0.4088, "step": 2958 }, { "epoch": 0.05495260337958511, "grad_norm": 0.3090079426765442, "learning_rate": 1.9851345852792426e-05, "loss": 0.5859, "step": 2960 }, { "epoch": 0.054989733517003744, "grad_norm": 0.3409533202648163, "learning_rate": 1.9851145400976493e-05, "loss": 0.3124, "step": 2962 }, { "epoch": 0.05502686365442239, "grad_norm": 0.30572324991226196, "learning_rate": 1.9850944815115984e-05, "loss": 0.2787, "step": 2964 }, { "epoch": 0.05506399379184102, "grad_norm": 0.34997761249542236, "learning_rate": 1.985074409521362e-05, "loss": 0.2627, "step": 2966 }, { "epoch": 0.055101123929259664, "grad_norm": 0.25141867995262146, "learning_rate": 1.9850543241272138e-05, "loss": 0.2682, "step": 2968 }, { "epoch": 0.0551382540666783, "grad_norm": 0.3440292179584503, "learning_rate": 1.9850342253294266e-05, "loss": 0.3827, "step": 2970 }, { "epoch": 0.05517538420409694, "grad_norm": 0.3372299075126648, "learning_rate": 1.985014113128274e-05, "loss": 0.2847, "step": 2972 }, { "epoch": 0.05521251434151558, "grad_norm": 0.26456281542778015, "learning_rate": 1.9849939875240304e-05, "loss": 0.361, "step": 2974 }, { "epoch": 0.05524964447893422, "grad_norm": 0.26251885294914246, "learning_rate": 1.9849738485169686e-05, "loss": 0.4045, "step": 2976 }, { "epoch": 0.055286774616352855, "grad_norm": 0.3801189064979553, "learning_rate": 1.984953696107363e-05, "loss": 0.2112, "step": 2978 }, { "epoch": 0.0553239047537715, "grad_norm": 0.38826003670692444, "learning_rate": 1.9849335302954878e-05, "loss": 0.3879, "step": 2980 }, { "epoch": 0.05536103489119013, "grad_norm": 0.3299528658390045, "learning_rate": 1.9849133510816178e-05, "loss": 0.4362, "step": 2982 }, { "epoch": 0.055398165028608774, "grad_norm": 0.39100533723831177, "learning_rate": 1.984893158466027e-05, "loss": 0.5361, "step": 2984 }, { "epoch": 0.05543529516602741, "grad_norm": 0.5104507207870483, "learning_rate": 1.9848729524489904e-05, "loss": 0.4108, "step": 2986 }, { "epoch": 0.055472425303446045, "grad_norm": 0.31418415904045105, "learning_rate": 1.984852733030783e-05, "loss": 0.2392, "step": 2988 }, { "epoch": 0.05550955544086469, "grad_norm": 0.39942795038223267, "learning_rate": 1.98483250021168e-05, "loss": 0.2222, "step": 2990 }, { "epoch": 0.05554668557828332, "grad_norm": 0.5871292948722839, "learning_rate": 1.9848122539919564e-05, "loss": 0.2719, "step": 2992 }, { "epoch": 0.055583815715701965, "grad_norm": 0.33943361043930054, "learning_rate": 1.984791994371888e-05, "loss": 0.2929, "step": 2994 }, { "epoch": 0.0556209458531206, "grad_norm": 0.3780500888824463, "learning_rate": 1.9847717213517503e-05, "loss": 0.4432, "step": 2996 }, { "epoch": 0.05565807599053924, "grad_norm": 0.4042004346847534, "learning_rate": 1.9847514349318194e-05, "loss": 0.5347, "step": 2998 }, { "epoch": 0.05569520612795788, "grad_norm": 0.3743478059768677, "learning_rate": 1.9847311351123707e-05, "loss": 0.383, "step": 3000 }, { "epoch": 0.05573233626537652, "grad_norm": 0.2726301848888397, "learning_rate": 1.984710821893681e-05, "loss": 0.3469, "step": 3002 }, { "epoch": 0.055769466402795155, "grad_norm": 0.38962477445602417, "learning_rate": 1.984690495276027e-05, "loss": 0.3564, "step": 3004 }, { "epoch": 0.0558065965402138, "grad_norm": 0.7716733813285828, "learning_rate": 1.9846701552596845e-05, "loss": 0.3652, "step": 3006 }, { "epoch": 0.05584372667763243, "grad_norm": 0.2975520193576813, "learning_rate": 1.9846498018449308e-05, "loss": 0.4046, "step": 3008 }, { "epoch": 0.055880856815051075, "grad_norm": 0.41402673721313477, "learning_rate": 1.9846294350320423e-05, "loss": 0.3484, "step": 3010 }, { "epoch": 0.05591798695246971, "grad_norm": 0.34582993388175964, "learning_rate": 1.984609054821297e-05, "loss": 0.5227, "step": 3012 }, { "epoch": 0.05595511708988835, "grad_norm": 0.40721312165260315, "learning_rate": 1.9845886612129712e-05, "loss": 0.2801, "step": 3014 }, { "epoch": 0.05599224722730699, "grad_norm": 0.331721693277359, "learning_rate": 1.984568254207343e-05, "loss": 0.384, "step": 3016 }, { "epoch": 0.05602937736472563, "grad_norm": 0.25454097986221313, "learning_rate": 1.98454783380469e-05, "loss": 0.513, "step": 3018 }, { "epoch": 0.056066507502144265, "grad_norm": 0.2617400884628296, "learning_rate": 1.9845274000052902e-05, "loss": 0.4342, "step": 3020 }, { "epoch": 0.05610363763956291, "grad_norm": 0.4297643005847931, "learning_rate": 1.9845069528094213e-05, "loss": 0.3669, "step": 3022 }, { "epoch": 0.05614076777698154, "grad_norm": 0.4833610951900482, "learning_rate": 1.984486492217362e-05, "loss": 0.3824, "step": 3024 }, { "epoch": 0.05617789791440018, "grad_norm": 0.3023262023925781, "learning_rate": 1.9844660182293905e-05, "loss": 0.5026, "step": 3026 }, { "epoch": 0.05621502805181882, "grad_norm": 0.28864169120788574, "learning_rate": 1.984445530845785e-05, "loss": 0.3047, "step": 3028 }, { "epoch": 0.056252158189237456, "grad_norm": 0.2669251263141632, "learning_rate": 1.9844250300668245e-05, "loss": 0.4851, "step": 3030 }, { "epoch": 0.0562892883266561, "grad_norm": 0.3894294500350952, "learning_rate": 1.9844045158927882e-05, "loss": 0.3312, "step": 3032 }, { "epoch": 0.05632641846407473, "grad_norm": 0.24255990982055664, "learning_rate": 1.984383988323955e-05, "loss": 0.4421, "step": 3034 }, { "epoch": 0.056363548601493375, "grad_norm": 0.3418087065219879, "learning_rate": 1.9843634473606045e-05, "loss": 0.4233, "step": 3036 }, { "epoch": 0.05640067873891201, "grad_norm": 0.32762765884399414, "learning_rate": 1.984342893003016e-05, "loss": 0.1734, "step": 3038 }, { "epoch": 0.05643780887633065, "grad_norm": 0.30636054277420044, "learning_rate": 1.984322325251469e-05, "loss": 0.376, "step": 3040 }, { "epoch": 0.05647493901374929, "grad_norm": 0.3261352479457855, "learning_rate": 1.9843017441062432e-05, "loss": 0.4137, "step": 3042 }, { "epoch": 0.05651206915116793, "grad_norm": 0.297899454832077, "learning_rate": 1.9842811495676196e-05, "loss": 0.4331, "step": 3044 }, { "epoch": 0.056549199288586566, "grad_norm": 0.48860272765159607, "learning_rate": 1.9842605416358777e-05, "loss": 0.2606, "step": 3046 }, { "epoch": 0.05658632942600521, "grad_norm": 0.29924604296684265, "learning_rate": 1.9842399203112977e-05, "loss": 0.3108, "step": 3048 }, { "epoch": 0.05662345956342384, "grad_norm": 0.35606759786605835, "learning_rate": 1.984219285594161e-05, "loss": 0.4766, "step": 3050 }, { "epoch": 0.056660589700842486, "grad_norm": 0.25527697801589966, "learning_rate": 1.9841986374847474e-05, "loss": 0.3813, "step": 3052 }, { "epoch": 0.05669771983826112, "grad_norm": 0.40612947940826416, "learning_rate": 1.9841779759833385e-05, "loss": 0.2962, "step": 3054 }, { "epoch": 0.05673484997567976, "grad_norm": 0.3753218948841095, "learning_rate": 1.9841573010902157e-05, "loss": 0.2815, "step": 3056 }, { "epoch": 0.0567719801130984, "grad_norm": 0.34082576632499695, "learning_rate": 1.9841366128056596e-05, "loss": 0.354, "step": 3058 }, { "epoch": 0.05680911025051704, "grad_norm": 0.37212178111076355, "learning_rate": 1.984115911129952e-05, "loss": 0.2811, "step": 3060 }, { "epoch": 0.056846240387935676, "grad_norm": 0.3254123330116272, "learning_rate": 1.9840951960633745e-05, "loss": 0.3702, "step": 3062 }, { "epoch": 0.05688337052535431, "grad_norm": 0.3400002717971802, "learning_rate": 1.9840744676062093e-05, "loss": 0.3765, "step": 3064 }, { "epoch": 0.05692050066277295, "grad_norm": 0.3192213177680969, "learning_rate": 1.9840537257587383e-05, "loss": 0.3905, "step": 3066 }, { "epoch": 0.05695763080019159, "grad_norm": 0.39725351333618164, "learning_rate": 1.9840329705212435e-05, "loss": 0.4894, "step": 3068 }, { "epoch": 0.05699476093761023, "grad_norm": 0.3115709722042084, "learning_rate": 1.9840122018940077e-05, "loss": 0.4796, "step": 3070 }, { "epoch": 0.057031891075028866, "grad_norm": 0.3007223308086395, "learning_rate": 1.9839914198773128e-05, "loss": 0.2796, "step": 3072 }, { "epoch": 0.05706902121244751, "grad_norm": 0.4191291928291321, "learning_rate": 1.9839706244714424e-05, "loss": 0.7142, "step": 3074 }, { "epoch": 0.057106151349866144, "grad_norm": 0.30084994435310364, "learning_rate": 1.983949815676679e-05, "loss": 0.3169, "step": 3076 }, { "epoch": 0.057143281487284786, "grad_norm": 0.41753578186035156, "learning_rate": 1.9839289934933064e-05, "loss": 0.3967, "step": 3078 }, { "epoch": 0.05718041162470342, "grad_norm": 0.30716875195503235, "learning_rate": 1.983908157921607e-05, "loss": 0.3532, "step": 3080 }, { "epoch": 0.057217541762122064, "grad_norm": 0.26224958896636963, "learning_rate": 1.9838873089618647e-05, "loss": 0.266, "step": 3082 }, { "epoch": 0.0572546718995407, "grad_norm": 0.3286738693714142, "learning_rate": 1.9838664466143632e-05, "loss": 0.3695, "step": 3084 }, { "epoch": 0.05729180203695934, "grad_norm": 0.3400304317474365, "learning_rate": 1.9838455708793867e-05, "loss": 0.5041, "step": 3086 }, { "epoch": 0.057328932174377976, "grad_norm": 0.3571081757545471, "learning_rate": 1.9838246817572188e-05, "loss": 0.3704, "step": 3088 }, { "epoch": 0.05736606231179662, "grad_norm": 0.3553451597690582, "learning_rate": 1.9838037792481434e-05, "loss": 0.2902, "step": 3090 }, { "epoch": 0.057403192449215254, "grad_norm": 0.2603435814380646, "learning_rate": 1.9837828633524457e-05, "loss": 0.3933, "step": 3092 }, { "epoch": 0.057440322586633896, "grad_norm": 0.39056339859962463, "learning_rate": 1.9837619340704102e-05, "loss": 0.3759, "step": 3094 }, { "epoch": 0.05747745272405253, "grad_norm": 0.4029666781425476, "learning_rate": 1.9837409914023213e-05, "loss": 0.2642, "step": 3096 }, { "epoch": 0.057514582861471174, "grad_norm": 0.2690047025680542, "learning_rate": 1.9837200353484644e-05, "loss": 0.1703, "step": 3098 }, { "epoch": 0.05755171299888981, "grad_norm": 0.41959962248802185, "learning_rate": 1.9836990659091243e-05, "loss": 0.3807, "step": 3100 }, { "epoch": 0.057588843136308444, "grad_norm": 0.4477682411670685, "learning_rate": 1.983678083084586e-05, "loss": 0.2581, "step": 3102 }, { "epoch": 0.05762597327372709, "grad_norm": 0.32175272703170776, "learning_rate": 1.9836570868751357e-05, "loss": 0.381, "step": 3104 }, { "epoch": 0.05766310341114572, "grad_norm": 0.2631858289241791, "learning_rate": 1.9836360772810594e-05, "loss": 0.3842, "step": 3106 }, { "epoch": 0.057700233548564364, "grad_norm": 0.4008104205131531, "learning_rate": 1.9836150543026417e-05, "loss": 0.3837, "step": 3108 }, { "epoch": 0.057737363685983, "grad_norm": 0.39099541306495667, "learning_rate": 1.9835940179401697e-05, "loss": 0.5785, "step": 3110 }, { "epoch": 0.05777449382340164, "grad_norm": 0.3048926591873169, "learning_rate": 1.983572968193929e-05, "loss": 0.241, "step": 3112 }, { "epoch": 0.05781162396082028, "grad_norm": 0.3737243711948395, "learning_rate": 1.9835519050642066e-05, "loss": 0.4143, "step": 3114 }, { "epoch": 0.05784875409823892, "grad_norm": 0.3285117447376251, "learning_rate": 1.983530828551289e-05, "loss": 0.3916, "step": 3116 }, { "epoch": 0.057885884235657555, "grad_norm": 0.4149942398071289, "learning_rate": 1.9835097386554623e-05, "loss": 0.4231, "step": 3118 }, { "epoch": 0.0579230143730762, "grad_norm": 0.27757516503334045, "learning_rate": 1.9834886353770145e-05, "loss": 0.5977, "step": 3120 }, { "epoch": 0.05796014451049483, "grad_norm": 0.38600555062294006, "learning_rate": 1.9834675187162324e-05, "loss": 0.2501, "step": 3122 }, { "epoch": 0.057997274647913474, "grad_norm": 0.3534892499446869, "learning_rate": 1.983446388673403e-05, "loss": 0.303, "step": 3124 }, { "epoch": 0.05803440478533211, "grad_norm": 0.31189200282096863, "learning_rate": 1.9834252452488137e-05, "loss": 0.2883, "step": 3126 }, { "epoch": 0.05807153492275075, "grad_norm": 0.47020286321640015, "learning_rate": 1.983404088442753e-05, "loss": 0.3679, "step": 3128 }, { "epoch": 0.05810866506016939, "grad_norm": 0.43118447065353394, "learning_rate": 1.983382918255508e-05, "loss": 0.3649, "step": 3130 }, { "epoch": 0.05814579519758803, "grad_norm": 0.31245341897010803, "learning_rate": 1.983361734687367e-05, "loss": 0.2826, "step": 3132 }, { "epoch": 0.058182925335006665, "grad_norm": 0.3751651346683502, "learning_rate": 1.9833405377386184e-05, "loss": 0.2616, "step": 3134 }, { "epoch": 0.0582200554724253, "grad_norm": 0.34476155042648315, "learning_rate": 1.9833193274095507e-05, "loss": 0.247, "step": 3136 }, { "epoch": 0.05825718560984394, "grad_norm": 0.3048694431781769, "learning_rate": 1.983298103700452e-05, "loss": 0.202, "step": 3138 }, { "epoch": 0.05829431574726258, "grad_norm": 0.2760787606239319, "learning_rate": 1.9832768666116115e-05, "loss": 0.4239, "step": 3140 }, { "epoch": 0.05833144588468122, "grad_norm": 0.262808620929718, "learning_rate": 1.9832556161433184e-05, "loss": 0.4628, "step": 3142 }, { "epoch": 0.058368576022099855, "grad_norm": 0.22784578800201416, "learning_rate": 1.9832343522958613e-05, "loss": 0.2611, "step": 3144 }, { "epoch": 0.0584057061595185, "grad_norm": 0.35843998193740845, "learning_rate": 1.98321307506953e-05, "loss": 0.4844, "step": 3146 }, { "epoch": 0.05844283629693713, "grad_norm": 0.4081767201423645, "learning_rate": 1.9831917844646136e-05, "loss": 0.3774, "step": 3148 }, { "epoch": 0.058479966434355775, "grad_norm": 0.41278553009033203, "learning_rate": 1.983170480481402e-05, "loss": 0.2661, "step": 3150 }, { "epoch": 0.05851709657177441, "grad_norm": 0.2524242401123047, "learning_rate": 1.983149163120185e-05, "loss": 0.2968, "step": 3152 }, { "epoch": 0.05855422670919305, "grad_norm": 0.3095013201236725, "learning_rate": 1.983127832381253e-05, "loss": 0.2998, "step": 3154 }, { "epoch": 0.05859135684661169, "grad_norm": 0.3590630888938904, "learning_rate": 1.983106488264896e-05, "loss": 0.4321, "step": 3156 }, { "epoch": 0.05862848698403033, "grad_norm": 0.4180951714515686, "learning_rate": 1.9830851307714045e-05, "loss": 0.3275, "step": 3158 }, { "epoch": 0.058665617121448965, "grad_norm": 0.30163106322288513, "learning_rate": 1.983063759901069e-05, "loss": 0.5381, "step": 3160 }, { "epoch": 0.05870274725886761, "grad_norm": 0.3472830355167389, "learning_rate": 1.98304237565418e-05, "loss": 0.4688, "step": 3162 }, { "epoch": 0.05873987739628624, "grad_norm": 0.27661651372909546, "learning_rate": 1.9830209780310295e-05, "loss": 0.2387, "step": 3164 }, { "epoch": 0.058777007533704885, "grad_norm": 0.3514387309551239, "learning_rate": 1.9829995670319074e-05, "loss": 0.1432, "step": 3166 }, { "epoch": 0.05881413767112352, "grad_norm": 0.4931705892086029, "learning_rate": 1.982978142657106e-05, "loss": 0.3628, "step": 3168 }, { "epoch": 0.05885126780854216, "grad_norm": 0.31983834505081177, "learning_rate": 1.982956704906916e-05, "loss": 0.3613, "step": 3170 }, { "epoch": 0.0588883979459608, "grad_norm": 0.4016130566596985, "learning_rate": 1.98293525378163e-05, "loss": 0.4634, "step": 3172 }, { "epoch": 0.05892552808337943, "grad_norm": 0.3541540205478668, "learning_rate": 1.982913789281539e-05, "loss": 0.2865, "step": 3174 }, { "epoch": 0.058962658220798075, "grad_norm": 0.29048845171928406, "learning_rate": 1.982892311406936e-05, "loss": 0.3115, "step": 3176 }, { "epoch": 0.05899978835821671, "grad_norm": 0.36161670088768005, "learning_rate": 1.9828708201581123e-05, "loss": 0.1945, "step": 3178 }, { "epoch": 0.05903691849563535, "grad_norm": 0.3577595055103302, "learning_rate": 1.982849315535361e-05, "loss": 0.3475, "step": 3180 }, { "epoch": 0.05907404863305399, "grad_norm": 0.4106470048427582, "learning_rate": 1.9828277975389743e-05, "loss": 0.2811, "step": 3182 }, { "epoch": 0.05911117877047263, "grad_norm": 0.35093221068382263, "learning_rate": 1.9828062661692452e-05, "loss": 0.3538, "step": 3184 }, { "epoch": 0.059148308907891266, "grad_norm": 0.2504813075065613, "learning_rate": 1.9827847214264667e-05, "loss": 0.5435, "step": 3186 }, { "epoch": 0.05918543904530991, "grad_norm": 0.307650625705719, "learning_rate": 1.9827631633109323e-05, "loss": 0.2275, "step": 3188 }, { "epoch": 0.05922256918272854, "grad_norm": 0.3227376341819763, "learning_rate": 1.9827415918229346e-05, "loss": 0.2815, "step": 3190 }, { "epoch": 0.059259699320147186, "grad_norm": 0.25418251752853394, "learning_rate": 1.9827200069627676e-05, "loss": 0.1264, "step": 3192 }, { "epoch": 0.05929682945756582, "grad_norm": 0.4106939435005188, "learning_rate": 1.9826984087307245e-05, "loss": 0.3652, "step": 3194 }, { "epoch": 0.05933395959498446, "grad_norm": 0.23905248939990997, "learning_rate": 1.9826767971271e-05, "loss": 0.3297, "step": 3196 }, { "epoch": 0.0593710897324031, "grad_norm": 0.4137873947620392, "learning_rate": 1.982655172152188e-05, "loss": 0.3361, "step": 3198 }, { "epoch": 0.05940821986982174, "grad_norm": 0.9614133238792419, "learning_rate": 1.982633533806282e-05, "loss": 0.3971, "step": 3200 }, { "epoch": 0.059445350007240376, "grad_norm": 0.3563469648361206, "learning_rate": 1.982611882089677e-05, "loss": 0.4287, "step": 3202 }, { "epoch": 0.05948248014465902, "grad_norm": 0.4971561133861542, "learning_rate": 1.9825902170026677e-05, "loss": 0.3355, "step": 3204 }, { "epoch": 0.059519610282077653, "grad_norm": 0.3778611123561859, "learning_rate": 1.9825685385455486e-05, "loss": 0.3412, "step": 3206 }, { "epoch": 0.059556740419496296, "grad_norm": 0.5087078809738159, "learning_rate": 1.982546846718615e-05, "loss": 0.278, "step": 3208 }, { "epoch": 0.05959387055691493, "grad_norm": 0.30923616886138916, "learning_rate": 1.982525141522162e-05, "loss": 0.2881, "step": 3210 }, { "epoch": 0.059631000694333566, "grad_norm": 0.39335906505584717, "learning_rate": 1.9825034229564845e-05, "loss": 0.3954, "step": 3212 }, { "epoch": 0.05966813083175221, "grad_norm": 0.3137960433959961, "learning_rate": 1.9824816910218784e-05, "loss": 0.4129, "step": 3214 }, { "epoch": 0.059705260969170844, "grad_norm": 0.30671659111976624, "learning_rate": 1.9824599457186396e-05, "loss": 0.2662, "step": 3216 }, { "epoch": 0.059742391106589486, "grad_norm": 0.38358739018440247, "learning_rate": 1.9824381870470634e-05, "loss": 0.3645, "step": 3218 }, { "epoch": 0.05977952124400812, "grad_norm": 0.3540777266025543, "learning_rate": 1.9824164150074466e-05, "loss": 0.3505, "step": 3220 }, { "epoch": 0.059816651381426764, "grad_norm": 0.35432547330856323, "learning_rate": 1.9823946296000848e-05, "loss": 0.2511, "step": 3222 }, { "epoch": 0.0598537815188454, "grad_norm": 0.3370950520038605, "learning_rate": 1.9823728308252746e-05, "loss": 0.4815, "step": 3224 }, { "epoch": 0.05989091165626404, "grad_norm": 0.2935311794281006, "learning_rate": 1.982351018683313e-05, "loss": 0.3035, "step": 3226 }, { "epoch": 0.059928041793682676, "grad_norm": 0.26078927516937256, "learning_rate": 1.9823291931744962e-05, "loss": 0.3891, "step": 3228 }, { "epoch": 0.05996517193110132, "grad_norm": 0.3112160265445709, "learning_rate": 1.982307354299122e-05, "loss": 0.1965, "step": 3230 }, { "epoch": 0.060002302068519954, "grad_norm": 0.3444506525993347, "learning_rate": 1.9822855020574864e-05, "loss": 0.2641, "step": 3232 }, { "epoch": 0.060039432205938596, "grad_norm": 0.38369259238243103, "learning_rate": 1.982263636449888e-05, "loss": 0.3712, "step": 3234 }, { "epoch": 0.06007656234335723, "grad_norm": 0.36025455594062805, "learning_rate": 1.9822417574766236e-05, "loss": 0.4428, "step": 3236 }, { "epoch": 0.060113692480775874, "grad_norm": 0.44601327180862427, "learning_rate": 1.982219865137991e-05, "loss": 0.4692, "step": 3238 }, { "epoch": 0.06015082261819451, "grad_norm": 0.24177546799182892, "learning_rate": 1.982197959434288e-05, "loss": 0.4257, "step": 3240 }, { "epoch": 0.06018795275561315, "grad_norm": 0.3033654987812042, "learning_rate": 1.982176040365813e-05, "loss": 0.3768, "step": 3242 }, { "epoch": 0.06022508289303179, "grad_norm": 0.2943459451198578, "learning_rate": 1.9821541079328636e-05, "loss": 0.3928, "step": 3244 }, { "epoch": 0.06026221303045043, "grad_norm": 0.26412588357925415, "learning_rate": 1.982132162135739e-05, "loss": 0.4435, "step": 3246 }, { "epoch": 0.060299343167869064, "grad_norm": 0.27709636092185974, "learning_rate": 1.9821102029747378e-05, "loss": 0.2038, "step": 3248 }, { "epoch": 0.0603364733052877, "grad_norm": 0.3554525077342987, "learning_rate": 1.9820882304501578e-05, "loss": 0.2629, "step": 3250 }, { "epoch": 0.06037360344270634, "grad_norm": 0.4392123818397522, "learning_rate": 1.982066244562299e-05, "loss": 0.2653, "step": 3252 }, { "epoch": 0.06041073358012498, "grad_norm": 0.38870763778686523, "learning_rate": 1.9820442453114602e-05, "loss": 0.2493, "step": 3254 }, { "epoch": 0.06044786371754362, "grad_norm": 0.37764909863471985, "learning_rate": 1.982022232697941e-05, "loss": 0.3315, "step": 3256 }, { "epoch": 0.060484993854962255, "grad_norm": 0.3400952219963074, "learning_rate": 1.9820002067220408e-05, "loss": 0.3909, "step": 3258 }, { "epoch": 0.0605221239923809, "grad_norm": 0.24018500745296478, "learning_rate": 1.981978167384059e-05, "loss": 0.3641, "step": 3260 }, { "epoch": 0.06055925412979953, "grad_norm": 0.23341913521289825, "learning_rate": 1.981956114684296e-05, "loss": 0.1788, "step": 3262 }, { "epoch": 0.060596384267218174, "grad_norm": 0.3269834816455841, "learning_rate": 1.981934048623051e-05, "loss": 0.4447, "step": 3264 }, { "epoch": 0.06063351440463681, "grad_norm": 0.31003937125205994, "learning_rate": 1.981911969200625e-05, "loss": 0.1936, "step": 3266 }, { "epoch": 0.06067064454205545, "grad_norm": 0.4288104176521301, "learning_rate": 1.9818898764173188e-05, "loss": 0.3103, "step": 3268 }, { "epoch": 0.06070777467947409, "grad_norm": 0.394898384809494, "learning_rate": 1.9818677702734316e-05, "loss": 0.2631, "step": 3270 }, { "epoch": 0.06074490481689273, "grad_norm": 0.2896632254123688, "learning_rate": 1.9818456507692656e-05, "loss": 0.3922, "step": 3272 }, { "epoch": 0.060782034954311365, "grad_norm": 0.34399017691612244, "learning_rate": 1.9818235179051217e-05, "loss": 0.2424, "step": 3274 }, { "epoch": 0.06081916509173001, "grad_norm": 0.46302902698516846, "learning_rate": 1.9818013716812997e-05, "loss": 0.3542, "step": 3276 }, { "epoch": 0.06085629522914864, "grad_norm": 0.3556465804576874, "learning_rate": 1.9817792120981025e-05, "loss": 0.381, "step": 3278 }, { "epoch": 0.060893425366567284, "grad_norm": 0.2924623489379883, "learning_rate": 1.9817570391558306e-05, "loss": 0.3708, "step": 3280 }, { "epoch": 0.06093055550398592, "grad_norm": 0.29451170563697815, "learning_rate": 1.9817348528547863e-05, "loss": 0.3773, "step": 3282 }, { "epoch": 0.06096768564140456, "grad_norm": 0.2974057197570801, "learning_rate": 1.981712653195271e-05, "loss": 0.5249, "step": 3284 }, { "epoch": 0.0610048157788232, "grad_norm": 0.3566272258758545, "learning_rate": 1.981690440177588e-05, "loss": 0.1815, "step": 3286 }, { "epoch": 0.06104194591624183, "grad_norm": 0.3027786612510681, "learning_rate": 1.9816682138020377e-05, "loss": 0.3064, "step": 3288 }, { "epoch": 0.061079076053660475, "grad_norm": 0.4292389750480652, "learning_rate": 1.981645974068924e-05, "loss": 0.2159, "step": 3290 }, { "epoch": 0.06111620619107911, "grad_norm": 0.37225788831710815, "learning_rate": 1.9816237209785487e-05, "loss": 0.2118, "step": 3292 }, { "epoch": 0.06115333632849775, "grad_norm": 0.47235774993896484, "learning_rate": 1.9816014545312147e-05, "loss": 0.294, "step": 3294 }, { "epoch": 0.06119046646591639, "grad_norm": 0.2898741662502289, "learning_rate": 1.9815791747272252e-05, "loss": 0.3089, "step": 3296 }, { "epoch": 0.06122759660333503, "grad_norm": 0.2759163975715637, "learning_rate": 1.9815568815668835e-05, "loss": 0.4878, "step": 3298 }, { "epoch": 0.061264726740753665, "grad_norm": 0.41972827911376953, "learning_rate": 1.9815345750504926e-05, "loss": 0.2574, "step": 3300 }, { "epoch": 0.06130185687817231, "grad_norm": 0.45233675837516785, "learning_rate": 1.981512255178356e-05, "loss": 0.3451, "step": 3302 }, { "epoch": 0.06133898701559094, "grad_norm": 0.33466336131095886, "learning_rate": 1.9814899219507778e-05, "loss": 0.1477, "step": 3304 }, { "epoch": 0.061376117153009585, "grad_norm": 0.3279944658279419, "learning_rate": 1.9814675753680616e-05, "loss": 0.4011, "step": 3306 }, { "epoch": 0.06141324729042822, "grad_norm": 0.4179050624370575, "learning_rate": 1.9814452154305117e-05, "loss": 0.413, "step": 3308 }, { "epoch": 0.06145037742784686, "grad_norm": 0.5420733690261841, "learning_rate": 1.981422842138432e-05, "loss": 0.2935, "step": 3310 }, { "epoch": 0.0614875075652655, "grad_norm": 0.37667885422706604, "learning_rate": 1.981400455492127e-05, "loss": 0.3755, "step": 3312 }, { "epoch": 0.06152463770268414, "grad_norm": 0.30398014187812805, "learning_rate": 1.9813780554919018e-05, "loss": 0.201, "step": 3314 }, { "epoch": 0.061561767840102775, "grad_norm": 0.3155660629272461, "learning_rate": 1.9813556421380608e-05, "loss": 0.4483, "step": 3316 }, { "epoch": 0.06159889797752142, "grad_norm": 0.22533252835273743, "learning_rate": 1.9813332154309087e-05, "loss": 0.3155, "step": 3318 }, { "epoch": 0.06163602811494005, "grad_norm": 0.32661017775535583, "learning_rate": 1.981310775370751e-05, "loss": 0.2922, "step": 3320 }, { "epoch": 0.061673158252358695, "grad_norm": 0.3552851378917694, "learning_rate": 1.981288321957893e-05, "loss": 0.347, "step": 3322 }, { "epoch": 0.06171028838977733, "grad_norm": 0.3009335398674011, "learning_rate": 1.9812658551926403e-05, "loss": 0.1936, "step": 3324 }, { "epoch": 0.061747418527195966, "grad_norm": 0.30215367674827576, "learning_rate": 1.981243375075299e-05, "loss": 0.3696, "step": 3326 }, { "epoch": 0.06178454866461461, "grad_norm": 0.4027291238307953, "learning_rate": 1.981220881606174e-05, "loss": 0.2757, "step": 3328 }, { "epoch": 0.06182167880203324, "grad_norm": 0.40032291412353516, "learning_rate": 1.9811983747855722e-05, "loss": 0.335, "step": 3330 }, { "epoch": 0.061858808939451886, "grad_norm": 0.27187496423721313, "learning_rate": 1.9811758546137995e-05, "loss": 0.3502, "step": 3332 }, { "epoch": 0.06189593907687052, "grad_norm": 0.3077321946620941, "learning_rate": 1.9811533210911623e-05, "loss": 0.4143, "step": 3334 }, { "epoch": 0.06193306921428916, "grad_norm": 0.2704857587814331, "learning_rate": 1.981130774217967e-05, "loss": 0.2955, "step": 3336 }, { "epoch": 0.0619701993517078, "grad_norm": 0.3517553508281708, "learning_rate": 1.981108213994521e-05, "loss": 0.3972, "step": 3338 }, { "epoch": 0.06200732948912644, "grad_norm": 0.36262568831443787, "learning_rate": 1.981085640421131e-05, "loss": 0.3097, "step": 3340 }, { "epoch": 0.062044459626545076, "grad_norm": 0.35919928550720215, "learning_rate": 1.981063053498104e-05, "loss": 0.2836, "step": 3342 }, { "epoch": 0.06208158976396372, "grad_norm": 0.43146777153015137, "learning_rate": 1.9810404532257476e-05, "loss": 0.4293, "step": 3344 }, { "epoch": 0.062118719901382353, "grad_norm": 0.3592202961444855, "learning_rate": 1.981017839604369e-05, "loss": 0.4629, "step": 3346 }, { "epoch": 0.062155850038800996, "grad_norm": 0.2898438274860382, "learning_rate": 1.980995212634276e-05, "loss": 0.3921, "step": 3348 }, { "epoch": 0.06219298017621963, "grad_norm": 0.3588927090167999, "learning_rate": 1.980972572315777e-05, "loss": 0.2682, "step": 3350 }, { "epoch": 0.06223011031363827, "grad_norm": 0.3686981499195099, "learning_rate": 1.9809499186491794e-05, "loss": 0.3534, "step": 3352 }, { "epoch": 0.06226724045105691, "grad_norm": 0.274715781211853, "learning_rate": 1.9809272516347917e-05, "loss": 0.324, "step": 3354 }, { "epoch": 0.06230437058847555, "grad_norm": 0.36873549222946167, "learning_rate": 1.9809045712729225e-05, "loss": 0.2721, "step": 3356 }, { "epoch": 0.062341500725894186, "grad_norm": 0.37963634729385376, "learning_rate": 1.9808818775638802e-05, "loss": 0.4641, "step": 3358 }, { "epoch": 0.06237863086331283, "grad_norm": 0.221884623169899, "learning_rate": 1.9808591705079733e-05, "loss": 0.2945, "step": 3360 }, { "epoch": 0.062415761000731464, "grad_norm": 0.4325578808784485, "learning_rate": 1.9808364501055113e-05, "loss": 0.2793, "step": 3362 }, { "epoch": 0.0624528911381501, "grad_norm": 0.38638925552368164, "learning_rate": 1.9808137163568034e-05, "loss": 0.4063, "step": 3364 }, { "epoch": 0.06249002127556874, "grad_norm": 0.35687869787216187, "learning_rate": 1.980790969262158e-05, "loss": 0.3402, "step": 3366 }, { "epoch": 0.06252715141298738, "grad_norm": 0.34990063309669495, "learning_rate": 1.9807682088218862e-05, "loss": 0.6077, "step": 3368 }, { "epoch": 0.06256428155040601, "grad_norm": 0.3275136947631836, "learning_rate": 1.980745435036296e-05, "loss": 0.3089, "step": 3370 }, { "epoch": 0.06260141168782465, "grad_norm": 0.35529887676239014, "learning_rate": 1.9807226479056987e-05, "loss": 0.2594, "step": 3372 }, { "epoch": 0.0626385418252433, "grad_norm": 0.2974911332130432, "learning_rate": 1.9806998474304037e-05, "loss": 0.4894, "step": 3374 }, { "epoch": 0.06267567196266194, "grad_norm": 0.45959633588790894, "learning_rate": 1.980677033610721e-05, "loss": 0.3815, "step": 3376 }, { "epoch": 0.06271280210008057, "grad_norm": 0.484009325504303, "learning_rate": 1.9806542064469615e-05, "loss": 0.3984, "step": 3378 }, { "epoch": 0.06274993223749921, "grad_norm": 0.41314223408699036, "learning_rate": 1.9806313659394356e-05, "loss": 0.3932, "step": 3380 }, { "epoch": 0.06278706237491785, "grad_norm": 0.2912636995315552, "learning_rate": 1.9806085120884543e-05, "loss": 0.2438, "step": 3382 }, { "epoch": 0.0628241925123365, "grad_norm": 0.3224206864833832, "learning_rate": 1.9805856448943284e-05, "loss": 0.4137, "step": 3384 }, { "epoch": 0.06286132264975512, "grad_norm": 0.3114950656890869, "learning_rate": 1.980562764357369e-05, "loss": 0.316, "step": 3386 }, { "epoch": 0.06289845278717376, "grad_norm": 0.31566303968429565, "learning_rate": 1.9805398704778876e-05, "loss": 0.2337, "step": 3388 }, { "epoch": 0.0629355829245924, "grad_norm": 0.5739381313323975, "learning_rate": 1.9805169632561952e-05, "loss": 0.2494, "step": 3390 }, { "epoch": 0.06297271306201105, "grad_norm": 0.6408292055130005, "learning_rate": 1.9804940426926042e-05, "loss": 0.3554, "step": 3392 }, { "epoch": 0.06300984319942968, "grad_norm": 0.4740843176841736, "learning_rate": 1.9804711087874265e-05, "loss": 0.4606, "step": 3394 }, { "epoch": 0.06304697333684832, "grad_norm": 0.23687942326068878, "learning_rate": 1.9804481615409732e-05, "loss": 0.3875, "step": 3396 }, { "epoch": 0.06308410347426696, "grad_norm": 0.2944756746292114, "learning_rate": 1.980425200953558e-05, "loss": 0.2461, "step": 3398 }, { "epoch": 0.0631212336116856, "grad_norm": 0.26801395416259766, "learning_rate": 1.980402227025492e-05, "loss": 0.2823, "step": 3400 }, { "epoch": 0.06315836374910423, "grad_norm": 0.36365678906440735, "learning_rate": 1.9803792397570882e-05, "loss": 0.388, "step": 3402 }, { "epoch": 0.06319549388652287, "grad_norm": 0.3454486131668091, "learning_rate": 1.9803562391486598e-05, "loss": 0.2633, "step": 3404 }, { "epoch": 0.06323262402394152, "grad_norm": 0.3128768503665924, "learning_rate": 1.9803332252005195e-05, "loss": 0.3173, "step": 3406 }, { "epoch": 0.06326975416136014, "grad_norm": 0.335010826587677, "learning_rate": 1.9803101979129805e-05, "loss": 0.4111, "step": 3408 }, { "epoch": 0.06330688429877879, "grad_norm": 0.39339467883110046, "learning_rate": 1.980287157286356e-05, "loss": 0.2494, "step": 3410 }, { "epoch": 0.06334401443619743, "grad_norm": 0.4477849304676056, "learning_rate": 1.9802641033209595e-05, "loss": 0.521, "step": 3412 }, { "epoch": 0.06338114457361607, "grad_norm": 0.427157461643219, "learning_rate": 1.980241036017105e-05, "loss": 0.3358, "step": 3414 }, { "epoch": 0.0634182747110347, "grad_norm": 0.3436141908168793, "learning_rate": 1.980217955375106e-05, "loss": 0.4574, "step": 3416 }, { "epoch": 0.06345540484845334, "grad_norm": 0.29612526297569275, "learning_rate": 1.9801948613952766e-05, "loss": 0.3696, "step": 3418 }, { "epoch": 0.06349253498587198, "grad_norm": 0.313785195350647, "learning_rate": 1.9801717540779312e-05, "loss": 0.3888, "step": 3420 }, { "epoch": 0.06352966512329063, "grad_norm": 0.5631344318389893, "learning_rate": 1.9801486334233843e-05, "loss": 0.431, "step": 3422 }, { "epoch": 0.06356679526070926, "grad_norm": 0.5482927560806274, "learning_rate": 1.9801254994319506e-05, "loss": 0.4207, "step": 3424 }, { "epoch": 0.0636039253981279, "grad_norm": 0.2833101153373718, "learning_rate": 1.9801023521039445e-05, "loss": 0.3317, "step": 3426 }, { "epoch": 0.06364105553554654, "grad_norm": 0.41085535287857056, "learning_rate": 1.980079191439681e-05, "loss": 0.6227, "step": 3428 }, { "epoch": 0.06367818567296518, "grad_norm": 0.33912044763565063, "learning_rate": 1.9800560174394755e-05, "loss": 0.3248, "step": 3430 }, { "epoch": 0.06371531581038381, "grad_norm": 0.5338802933692932, "learning_rate": 1.9800328301036432e-05, "loss": 0.3868, "step": 3432 }, { "epoch": 0.06375244594780245, "grad_norm": 0.39040011167526245, "learning_rate": 1.9800096294324995e-05, "loss": 0.3828, "step": 3434 }, { "epoch": 0.0637895760852211, "grad_norm": 0.2740331292152405, "learning_rate": 1.9799864154263604e-05, "loss": 0.2468, "step": 3436 }, { "epoch": 0.06382670622263974, "grad_norm": 0.29916927218437195, "learning_rate": 1.979963188085541e-05, "loss": 0.4015, "step": 3438 }, { "epoch": 0.06386383636005837, "grad_norm": 0.31425076723098755, "learning_rate": 1.9799399474103588e-05, "loss": 0.3541, "step": 3440 }, { "epoch": 0.06390096649747701, "grad_norm": 0.2851545810699463, "learning_rate": 1.9799166934011287e-05, "loss": 0.2345, "step": 3442 }, { "epoch": 0.06393809663489565, "grad_norm": 0.3720426857471466, "learning_rate": 1.9798934260581677e-05, "loss": 0.2959, "step": 3444 }, { "epoch": 0.06397522677231428, "grad_norm": 0.3075149655342102, "learning_rate": 1.9798701453817922e-05, "loss": 0.2005, "step": 3446 }, { "epoch": 0.06401235690973292, "grad_norm": 0.6257651448249817, "learning_rate": 1.979846851372319e-05, "loss": 0.3182, "step": 3448 }, { "epoch": 0.06404948704715156, "grad_norm": 0.36483991146087646, "learning_rate": 1.9798235440300654e-05, "loss": 0.3384, "step": 3450 }, { "epoch": 0.0640866171845702, "grad_norm": 0.3266531229019165, "learning_rate": 1.979800223355348e-05, "loss": 0.2649, "step": 3452 }, { "epoch": 0.06412374732198883, "grad_norm": 0.3313089907169342, "learning_rate": 1.979776889348485e-05, "loss": 0.3701, "step": 3454 }, { "epoch": 0.06416087745940748, "grad_norm": 0.4297769069671631, "learning_rate": 1.9797535420097927e-05, "loss": 0.4376, "step": 3456 }, { "epoch": 0.06419800759682612, "grad_norm": 0.3002229332923889, "learning_rate": 1.9797301813395897e-05, "loss": 0.5252, "step": 3458 }, { "epoch": 0.06423513773424476, "grad_norm": 0.31100016832351685, "learning_rate": 1.9797068073381933e-05, "loss": 0.2313, "step": 3460 }, { "epoch": 0.06427226787166339, "grad_norm": 0.3578316569328308, "learning_rate": 1.9796834200059217e-05, "loss": 0.3925, "step": 3462 }, { "epoch": 0.06430939800908203, "grad_norm": 0.36674967408180237, "learning_rate": 1.979660019343094e-05, "loss": 0.3483, "step": 3464 }, { "epoch": 0.06434652814650067, "grad_norm": 0.3229820132255554, "learning_rate": 1.979636605350027e-05, "loss": 0.2578, "step": 3466 }, { "epoch": 0.06438365828391932, "grad_norm": 0.2721008062362671, "learning_rate": 1.9796131780270403e-05, "loss": 0.3217, "step": 3468 }, { "epoch": 0.06442078842133794, "grad_norm": 0.40643110871315, "learning_rate": 1.979589737374453e-05, "loss": 0.2806, "step": 3470 }, { "epoch": 0.06445791855875659, "grad_norm": 0.5781317353248596, "learning_rate": 1.9795662833925834e-05, "loss": 0.4425, "step": 3472 }, { "epoch": 0.06449504869617523, "grad_norm": 0.31618189811706543, "learning_rate": 1.9795428160817506e-05, "loss": 0.4773, "step": 3474 }, { "epoch": 0.06453217883359386, "grad_norm": 0.4076223373413086, "learning_rate": 1.9795193354422743e-05, "loss": 0.2774, "step": 3476 }, { "epoch": 0.0645693089710125, "grad_norm": 0.3126170337200165, "learning_rate": 1.9794958414744737e-05, "loss": 0.2315, "step": 3478 }, { "epoch": 0.06460643910843114, "grad_norm": 0.28904321789741516, "learning_rate": 1.9794723341786686e-05, "loss": 0.1381, "step": 3480 }, { "epoch": 0.06464356924584978, "grad_norm": 0.3527877926826477, "learning_rate": 1.9794488135551796e-05, "loss": 0.3644, "step": 3482 }, { "epoch": 0.06468069938326841, "grad_norm": 0.3266255855560303, "learning_rate": 1.979425279604325e-05, "loss": 0.4152, "step": 3484 }, { "epoch": 0.06471782952068705, "grad_norm": 0.4346773624420166, "learning_rate": 1.979401732326427e-05, "loss": 0.2437, "step": 3486 }, { "epoch": 0.0647549596581057, "grad_norm": 0.3893941640853882, "learning_rate": 1.9793781717218045e-05, "loss": 0.296, "step": 3488 }, { "epoch": 0.06479208979552434, "grad_norm": 0.3035922646522522, "learning_rate": 1.9793545977907787e-05, "loss": 0.446, "step": 3490 }, { "epoch": 0.06482921993294297, "grad_norm": 0.39036884903907776, "learning_rate": 1.9793310105336702e-05, "loss": 0.3657, "step": 3492 }, { "epoch": 0.06486635007036161, "grad_norm": 0.31383204460144043, "learning_rate": 1.9793074099508005e-05, "loss": 0.4136, "step": 3494 }, { "epoch": 0.06490348020778025, "grad_norm": 0.3066549003124237, "learning_rate": 1.97928379604249e-05, "loss": 0.3488, "step": 3496 }, { "epoch": 0.0649406103451989, "grad_norm": 0.3661425709724426, "learning_rate": 1.9792601688090603e-05, "loss": 0.2929, "step": 3498 }, { "epoch": 0.06497774048261752, "grad_norm": 0.33869463205337524, "learning_rate": 1.9792365282508332e-05, "loss": 0.4609, "step": 3500 }, { "epoch": 0.06501487062003616, "grad_norm": 0.3715529441833496, "learning_rate": 1.97921287436813e-05, "loss": 0.453, "step": 3502 }, { "epoch": 0.0650520007574548, "grad_norm": 0.37088167667388916, "learning_rate": 1.9791892071612727e-05, "loss": 0.3071, "step": 3504 }, { "epoch": 0.06508913089487345, "grad_norm": 0.2620362639427185, "learning_rate": 1.9791655266305833e-05, "loss": 0.4533, "step": 3506 }, { "epoch": 0.06512626103229208, "grad_norm": 0.5008695125579834, "learning_rate": 1.9791418327763838e-05, "loss": 0.3663, "step": 3508 }, { "epoch": 0.06516339116971072, "grad_norm": 0.6745080947875977, "learning_rate": 1.979118125598997e-05, "loss": 0.4937, "step": 3510 }, { "epoch": 0.06520052130712936, "grad_norm": 0.7969282269477844, "learning_rate": 1.9790944050987454e-05, "loss": 0.4882, "step": 3512 }, { "epoch": 0.06523765144454799, "grad_norm": 0.2834987938404083, "learning_rate": 1.9790706712759516e-05, "loss": 0.4779, "step": 3514 }, { "epoch": 0.06527478158196663, "grad_norm": 0.3522730767726898, "learning_rate": 1.9790469241309385e-05, "loss": 0.427, "step": 3516 }, { "epoch": 0.06531191171938527, "grad_norm": 0.3356669247150421, "learning_rate": 1.9790231636640294e-05, "loss": 0.2438, "step": 3518 }, { "epoch": 0.06534904185680392, "grad_norm": 0.23388394713401794, "learning_rate": 1.9789993898755475e-05, "loss": 0.4224, "step": 3520 }, { "epoch": 0.06538617199422254, "grad_norm": 0.35800936818122864, "learning_rate": 1.9789756027658165e-05, "loss": 0.4319, "step": 3522 }, { "epoch": 0.06542330213164119, "grad_norm": 0.3816884458065033, "learning_rate": 1.9789518023351597e-05, "loss": 0.4366, "step": 3524 }, { "epoch": 0.06546043226905983, "grad_norm": 0.48795273900032043, "learning_rate": 1.9789279885839016e-05, "loss": 0.4556, "step": 3526 }, { "epoch": 0.06549756240647847, "grad_norm": 0.2694101333618164, "learning_rate": 1.9789041615123653e-05, "loss": 0.3967, "step": 3528 }, { "epoch": 0.0655346925438971, "grad_norm": 0.3457162380218506, "learning_rate": 1.9788803211208757e-05, "loss": 0.4285, "step": 3530 }, { "epoch": 0.06557182268131574, "grad_norm": 0.36494818329811096, "learning_rate": 1.978856467409757e-05, "loss": 0.3815, "step": 3532 }, { "epoch": 0.06560895281873438, "grad_norm": 0.3457881808280945, "learning_rate": 1.978832600379334e-05, "loss": 0.3317, "step": 3534 }, { "epoch": 0.06564608295615303, "grad_norm": 0.3842501640319824, "learning_rate": 1.978808720029931e-05, "loss": 0.4328, "step": 3536 }, { "epoch": 0.06568321309357165, "grad_norm": 0.34488967061042786, "learning_rate": 1.9787848263618735e-05, "loss": 0.2453, "step": 3538 }, { "epoch": 0.0657203432309903, "grad_norm": 0.40323707461357117, "learning_rate": 1.978760919375486e-05, "loss": 0.3317, "step": 3540 }, { "epoch": 0.06575747336840894, "grad_norm": 0.364903062582016, "learning_rate": 1.9787369990710943e-05, "loss": 0.4382, "step": 3542 }, { "epoch": 0.06579460350582758, "grad_norm": 0.4243890643119812, "learning_rate": 1.9787130654490237e-05, "loss": 0.4304, "step": 3544 }, { "epoch": 0.06583173364324621, "grad_norm": 0.43857312202453613, "learning_rate": 1.9786891185096e-05, "loss": 0.4892, "step": 3546 }, { "epoch": 0.06586886378066485, "grad_norm": 0.3423400819301605, "learning_rate": 1.9786651582531484e-05, "loss": 0.2354, "step": 3548 }, { "epoch": 0.0659059939180835, "grad_norm": 0.3118945062160492, "learning_rate": 1.9786411846799957e-05, "loss": 0.3271, "step": 3550 }, { "epoch": 0.06594312405550212, "grad_norm": 0.3465314507484436, "learning_rate": 1.978617197790468e-05, "loss": 0.3232, "step": 3552 }, { "epoch": 0.06598025419292076, "grad_norm": 0.3505212068557739, "learning_rate": 1.9785931975848913e-05, "loss": 0.4013, "step": 3554 }, { "epoch": 0.06601738433033941, "grad_norm": 0.40989041328430176, "learning_rate": 1.9785691840635922e-05, "loss": 0.2751, "step": 3556 }, { "epoch": 0.06605451446775805, "grad_norm": 0.24111665785312653, "learning_rate": 1.9785451572268983e-05, "loss": 0.1604, "step": 3558 }, { "epoch": 0.06609164460517668, "grad_norm": 0.3601658046245575, "learning_rate": 1.9785211170751354e-05, "loss": 0.4636, "step": 3560 }, { "epoch": 0.06612877474259532, "grad_norm": 0.31617647409439087, "learning_rate": 1.9784970636086313e-05, "loss": 0.4174, "step": 3562 }, { "epoch": 0.06616590488001396, "grad_norm": 0.4005337655544281, "learning_rate": 1.978472996827713e-05, "loss": 0.3572, "step": 3564 }, { "epoch": 0.0662030350174326, "grad_norm": 0.27472394704818726, "learning_rate": 1.9784489167327083e-05, "loss": 0.3076, "step": 3566 }, { "epoch": 0.06624016515485123, "grad_norm": 0.283316969871521, "learning_rate": 1.9784248233239447e-05, "loss": 0.4069, "step": 3568 }, { "epoch": 0.06627729529226987, "grad_norm": 0.5566747188568115, "learning_rate": 1.9784007166017497e-05, "loss": 0.3595, "step": 3570 }, { "epoch": 0.06631442542968852, "grad_norm": 0.30584490299224854, "learning_rate": 1.9783765965664515e-05, "loss": 0.5073, "step": 3572 }, { "epoch": 0.06635155556710716, "grad_norm": 0.7525100111961365, "learning_rate": 1.9783524632183786e-05, "loss": 0.6261, "step": 3574 }, { "epoch": 0.06638868570452579, "grad_norm": 0.42935389280319214, "learning_rate": 1.9783283165578594e-05, "loss": 0.3267, "step": 3576 }, { "epoch": 0.06642581584194443, "grad_norm": 0.28410154581069946, "learning_rate": 1.9783041565852218e-05, "loss": 0.5157, "step": 3578 }, { "epoch": 0.06646294597936307, "grad_norm": 0.2615748345851898, "learning_rate": 1.9782799833007952e-05, "loss": 0.392, "step": 3580 }, { "epoch": 0.06650007611678171, "grad_norm": 0.34294745326042175, "learning_rate": 1.9782557967049085e-05, "loss": 0.4662, "step": 3582 }, { "epoch": 0.06653720625420034, "grad_norm": 0.3632686734199524, "learning_rate": 1.97823159679789e-05, "loss": 0.5372, "step": 3584 }, { "epoch": 0.06657433639161899, "grad_norm": 0.33623144030570984, "learning_rate": 1.9782073835800705e-05, "loss": 0.2725, "step": 3586 }, { "epoch": 0.06661146652903763, "grad_norm": 0.28490644693374634, "learning_rate": 1.978183157051778e-05, "loss": 0.4684, "step": 3588 }, { "epoch": 0.06664859666645626, "grad_norm": 0.2859022617340088, "learning_rate": 1.978158917213343e-05, "loss": 0.4832, "step": 3590 }, { "epoch": 0.0666857268038749, "grad_norm": 0.42026716470718384, "learning_rate": 1.9781346640650946e-05, "loss": 0.4421, "step": 3592 }, { "epoch": 0.06672285694129354, "grad_norm": 0.32438981533050537, "learning_rate": 1.9781103976073637e-05, "loss": 0.5605, "step": 3594 }, { "epoch": 0.06675998707871218, "grad_norm": 0.3323333263397217, "learning_rate": 1.97808611784048e-05, "loss": 0.3469, "step": 3596 }, { "epoch": 0.06679711721613081, "grad_norm": 0.3498179018497467, "learning_rate": 1.978061824764774e-05, "loss": 0.3391, "step": 3598 }, { "epoch": 0.06683424735354945, "grad_norm": 0.40430712699890137, "learning_rate": 1.978037518380576e-05, "loss": 0.2071, "step": 3600 }, { "epoch": 0.0668713774909681, "grad_norm": 0.3161903917789459, "learning_rate": 1.9780131986882168e-05, "loss": 0.3566, "step": 3602 }, { "epoch": 0.06690850762838674, "grad_norm": 0.5689277052879333, "learning_rate": 1.977988865688028e-05, "loss": 0.4536, "step": 3604 }, { "epoch": 0.06694563776580537, "grad_norm": 0.3722653090953827, "learning_rate": 1.9779645193803394e-05, "loss": 0.3497, "step": 3606 }, { "epoch": 0.06698276790322401, "grad_norm": 0.4058457016944885, "learning_rate": 1.9779401597654835e-05, "loss": 0.2542, "step": 3608 }, { "epoch": 0.06701989804064265, "grad_norm": 0.37038716673851013, "learning_rate": 1.9779157868437915e-05, "loss": 0.3778, "step": 3610 }, { "epoch": 0.06705702817806129, "grad_norm": 0.286577433347702, "learning_rate": 1.9778914006155944e-05, "loss": 0.2709, "step": 3612 }, { "epoch": 0.06709415831547992, "grad_norm": 0.396380752325058, "learning_rate": 1.977867001081225e-05, "loss": 0.4762, "step": 3614 }, { "epoch": 0.06713128845289856, "grad_norm": 0.34840935468673706, "learning_rate": 1.977842588241014e-05, "loss": 0.478, "step": 3616 }, { "epoch": 0.0671684185903172, "grad_norm": 0.4267539381980896, "learning_rate": 1.977818162095295e-05, "loss": 0.3074, "step": 3618 }, { "epoch": 0.06720554872773585, "grad_norm": 0.2859472632408142, "learning_rate": 1.9777937226443996e-05, "loss": 0.2739, "step": 3620 }, { "epoch": 0.06724267886515448, "grad_norm": 0.35712242126464844, "learning_rate": 1.9777692698886604e-05, "loss": 0.3906, "step": 3622 }, { "epoch": 0.06727980900257312, "grad_norm": 0.28851747512817383, "learning_rate": 1.97774480382841e-05, "loss": 0.2724, "step": 3624 }, { "epoch": 0.06731693913999176, "grad_norm": 0.2951601445674896, "learning_rate": 1.9777203244639818e-05, "loss": 0.4262, "step": 3626 }, { "epoch": 0.06735406927741039, "grad_norm": 0.3259916603565216, "learning_rate": 1.9776958317957082e-05, "loss": 0.1188, "step": 3628 }, { "epoch": 0.06739119941482903, "grad_norm": 0.35760271549224854, "learning_rate": 1.977671325823923e-05, "loss": 0.283, "step": 3630 }, { "epoch": 0.06742832955224767, "grad_norm": 0.3593396842479706, "learning_rate": 1.9776468065489598e-05, "loss": 0.5277, "step": 3632 }, { "epoch": 0.06746545968966632, "grad_norm": 0.2551288604736328, "learning_rate": 1.977622273971152e-05, "loss": 0.3124, "step": 3634 }, { "epoch": 0.06750258982708494, "grad_norm": 0.38036009669303894, "learning_rate": 1.9775977280908328e-05, "loss": 0.4735, "step": 3636 }, { "epoch": 0.06753971996450359, "grad_norm": 0.3669552505016327, "learning_rate": 1.9775731689083372e-05, "loss": 0.2819, "step": 3638 }, { "epoch": 0.06757685010192223, "grad_norm": 0.32467395067214966, "learning_rate": 1.9775485964239993e-05, "loss": 0.3542, "step": 3640 }, { "epoch": 0.06761398023934087, "grad_norm": 0.3737581968307495, "learning_rate": 1.9775240106381527e-05, "loss": 0.3569, "step": 3642 }, { "epoch": 0.0676511103767595, "grad_norm": 0.36326664686203003, "learning_rate": 1.977499411551132e-05, "loss": 0.226, "step": 3644 }, { "epoch": 0.06768824051417814, "grad_norm": 0.3367057144641876, "learning_rate": 1.9774747991632724e-05, "loss": 0.2422, "step": 3646 }, { "epoch": 0.06772537065159678, "grad_norm": 0.5563686490058899, "learning_rate": 1.977450173474909e-05, "loss": 0.3869, "step": 3648 }, { "epoch": 0.06776250078901543, "grad_norm": 0.4534398019313812, "learning_rate": 1.9774255344863764e-05, "loss": 0.4673, "step": 3650 }, { "epoch": 0.06779963092643405, "grad_norm": 0.25752100348472595, "learning_rate": 1.97740088219801e-05, "loss": 0.2981, "step": 3652 }, { "epoch": 0.0678367610638527, "grad_norm": 0.3885330259799957, "learning_rate": 1.977376216610145e-05, "loss": 0.3304, "step": 3654 }, { "epoch": 0.06787389120127134, "grad_norm": 0.3164767920970917, "learning_rate": 1.977351537723118e-05, "loss": 0.511, "step": 3656 }, { "epoch": 0.06791102133868998, "grad_norm": 0.3914724886417389, "learning_rate": 1.9773268455372632e-05, "loss": 0.3287, "step": 3658 }, { "epoch": 0.06794815147610861, "grad_norm": 0.44187113642692566, "learning_rate": 1.977302140052918e-05, "loss": 0.4321, "step": 3660 }, { "epoch": 0.06798528161352725, "grad_norm": 0.3751872479915619, "learning_rate": 1.9772774212704176e-05, "loss": 0.4284, "step": 3662 }, { "epoch": 0.0680224117509459, "grad_norm": 0.2755732238292694, "learning_rate": 1.9772526891900993e-05, "loss": 0.4088, "step": 3664 }, { "epoch": 0.06805954188836452, "grad_norm": 0.3075898289680481, "learning_rate": 1.9772279438122987e-05, "loss": 0.1782, "step": 3666 }, { "epoch": 0.06809667202578316, "grad_norm": 0.3583005368709564, "learning_rate": 1.977203185137353e-05, "loss": 0.3419, "step": 3668 }, { "epoch": 0.0681338021632018, "grad_norm": 0.36649009585380554, "learning_rate": 1.9771784131655993e-05, "loss": 0.3716, "step": 3670 }, { "epoch": 0.06817093230062045, "grad_norm": 0.31687599420547485, "learning_rate": 1.977153627897374e-05, "loss": 0.3021, "step": 3672 }, { "epoch": 0.06820806243803908, "grad_norm": 0.3212026357650757, "learning_rate": 1.9771288293330154e-05, "loss": 0.4681, "step": 3674 }, { "epoch": 0.06824519257545772, "grad_norm": 0.27392446994781494, "learning_rate": 1.9771040174728596e-05, "loss": 0.4048, "step": 3676 }, { "epoch": 0.06828232271287636, "grad_norm": 0.306193083524704, "learning_rate": 1.977079192317245e-05, "loss": 0.4329, "step": 3678 }, { "epoch": 0.068319452850295, "grad_norm": 0.32209184765815735, "learning_rate": 1.9770543538665095e-05, "loss": 0.336, "step": 3680 }, { "epoch": 0.06835658298771363, "grad_norm": 0.2807345390319824, "learning_rate": 1.9770295021209907e-05, "loss": 0.3375, "step": 3682 }, { "epoch": 0.06839371312513227, "grad_norm": 0.3570718467235565, "learning_rate": 1.9770046370810267e-05, "loss": 0.3185, "step": 3684 }, { "epoch": 0.06843084326255092, "grad_norm": 0.3253606855869293, "learning_rate": 1.9769797587469567e-05, "loss": 0.5198, "step": 3686 }, { "epoch": 0.06846797339996956, "grad_norm": 0.4012141227722168, "learning_rate": 1.976954867119118e-05, "loss": 0.316, "step": 3688 }, { "epoch": 0.06850510353738819, "grad_norm": 0.3480335474014282, "learning_rate": 1.97692996219785e-05, "loss": 0.3325, "step": 3690 }, { "epoch": 0.06854223367480683, "grad_norm": 0.46913138031959534, "learning_rate": 1.9769050439834916e-05, "loss": 0.4427, "step": 3692 }, { "epoch": 0.06857936381222547, "grad_norm": 0.330547571182251, "learning_rate": 1.9768801124763815e-05, "loss": 0.282, "step": 3694 }, { "epoch": 0.06861649394964411, "grad_norm": 0.3649209439754486, "learning_rate": 1.9768551676768593e-05, "loss": 0.3271, "step": 3696 }, { "epoch": 0.06865362408706274, "grad_norm": 0.30090102553367615, "learning_rate": 1.9768302095852644e-05, "loss": 0.5838, "step": 3698 }, { "epoch": 0.06869075422448138, "grad_norm": 0.2098192274570465, "learning_rate": 1.9768052382019364e-05, "loss": 0.2257, "step": 3700 }, { "epoch": 0.06872788436190003, "grad_norm": 0.340071439743042, "learning_rate": 1.9767802535272143e-05, "loss": 0.313, "step": 3702 }, { "epoch": 0.06876501449931866, "grad_norm": 0.3772391080856323, "learning_rate": 1.9767552555614395e-05, "loss": 0.5547, "step": 3704 }, { "epoch": 0.0688021446367373, "grad_norm": 0.3363734185695648, "learning_rate": 1.976730244304951e-05, "loss": 0.3533, "step": 3706 }, { "epoch": 0.06883927477415594, "grad_norm": 0.3628649115562439, "learning_rate": 1.9767052197580895e-05, "loss": 0.2255, "step": 3708 }, { "epoch": 0.06887640491157458, "grad_norm": 0.3915059268474579, "learning_rate": 1.9766801819211958e-05, "loss": 0.4689, "step": 3710 }, { "epoch": 0.06891353504899321, "grad_norm": 0.3537546694278717, "learning_rate": 1.97665513079461e-05, "loss": 0.2736, "step": 3712 }, { "epoch": 0.06895066518641185, "grad_norm": 0.4346698820590973, "learning_rate": 1.9766300663786735e-05, "loss": 0.3228, "step": 3714 }, { "epoch": 0.0689877953238305, "grad_norm": 0.23483529686927795, "learning_rate": 1.9766049886737272e-05, "loss": 0.3508, "step": 3716 }, { "epoch": 0.06902492546124914, "grad_norm": 0.4852348566055298, "learning_rate": 1.9765798976801123e-05, "loss": 0.3446, "step": 3718 }, { "epoch": 0.06906205559866777, "grad_norm": 0.2716207802295685, "learning_rate": 1.97655479339817e-05, "loss": 0.3474, "step": 3720 }, { "epoch": 0.06909918573608641, "grad_norm": 0.2739524841308594, "learning_rate": 1.9765296758282422e-05, "loss": 0.2856, "step": 3722 }, { "epoch": 0.06913631587350505, "grad_norm": 0.3598240613937378, "learning_rate": 1.9765045449706703e-05, "loss": 0.4551, "step": 3724 }, { "epoch": 0.06917344601092369, "grad_norm": 0.29737603664398193, "learning_rate": 1.976479400825797e-05, "loss": 0.1317, "step": 3726 }, { "epoch": 0.06921057614834232, "grad_norm": 0.384287565946579, "learning_rate": 1.9764542433939633e-05, "loss": 0.3577, "step": 3728 }, { "epoch": 0.06924770628576096, "grad_norm": 0.4090096056461334, "learning_rate": 1.9764290726755125e-05, "loss": 0.3684, "step": 3730 }, { "epoch": 0.0692848364231796, "grad_norm": 0.4387440085411072, "learning_rate": 1.976403888670787e-05, "loss": 0.2105, "step": 3732 }, { "epoch": 0.06932196656059825, "grad_norm": 0.2417214959859848, "learning_rate": 1.9763786913801292e-05, "loss": 0.2489, "step": 3734 }, { "epoch": 0.06935909669801688, "grad_norm": 0.3465513586997986, "learning_rate": 1.9763534808038818e-05, "loss": 0.471, "step": 3736 }, { "epoch": 0.06939622683543552, "grad_norm": 0.28989678621292114, "learning_rate": 1.976328256942388e-05, "loss": 0.3433, "step": 3738 }, { "epoch": 0.06943335697285416, "grad_norm": 0.9069617390632629, "learning_rate": 1.976303019795991e-05, "loss": 0.2912, "step": 3740 }, { "epoch": 0.06947048711027279, "grad_norm": 0.41809213161468506, "learning_rate": 1.9762777693650344e-05, "loss": 0.6237, "step": 3742 }, { "epoch": 0.06950761724769143, "grad_norm": 0.3669723570346832, "learning_rate": 1.976252505649862e-05, "loss": 0.3269, "step": 3744 }, { "epoch": 0.06954474738511007, "grad_norm": 0.28882896900177, "learning_rate": 1.9762272286508165e-05, "loss": 0.2876, "step": 3746 }, { "epoch": 0.06958187752252872, "grad_norm": 0.33825916051864624, "learning_rate": 1.9762019383682432e-05, "loss": 0.4551, "step": 3748 }, { "epoch": 0.06961900765994734, "grad_norm": 0.31834837794303894, "learning_rate": 1.976176634802485e-05, "loss": 0.2877, "step": 3750 }, { "epoch": 0.06965613779736599, "grad_norm": 0.3667811155319214, "learning_rate": 1.9761513179538875e-05, "loss": 0.367, "step": 3752 }, { "epoch": 0.06969326793478463, "grad_norm": 0.412523478269577, "learning_rate": 1.9761259878227942e-05, "loss": 0.1561, "step": 3754 }, { "epoch": 0.06973039807220327, "grad_norm": 0.39357614517211914, "learning_rate": 1.9761006444095497e-05, "loss": 0.3947, "step": 3756 }, { "epoch": 0.0697675282096219, "grad_norm": 0.2894134819507599, "learning_rate": 1.9760752877144993e-05, "loss": 0.2899, "step": 3758 }, { "epoch": 0.06980465834704054, "grad_norm": 0.49150723218917847, "learning_rate": 1.9760499177379882e-05, "loss": 0.3777, "step": 3760 }, { "epoch": 0.06984178848445918, "grad_norm": 0.2904317378997803, "learning_rate": 1.976024534480361e-05, "loss": 0.4219, "step": 3762 }, { "epoch": 0.06987891862187783, "grad_norm": 0.5349287986755371, "learning_rate": 1.9759991379419636e-05, "loss": 0.473, "step": 3764 }, { "epoch": 0.06991604875929645, "grad_norm": 0.3040470778942108, "learning_rate": 1.9759737281231413e-05, "loss": 0.2991, "step": 3766 }, { "epoch": 0.0699531788967151, "grad_norm": 0.3615362346172333, "learning_rate": 1.97594830502424e-05, "loss": 0.3554, "step": 3768 }, { "epoch": 0.06999030903413374, "grad_norm": 0.5429883003234863, "learning_rate": 1.9759228686456055e-05, "loss": 0.5334, "step": 3770 }, { "epoch": 0.07002743917155238, "grad_norm": 0.29389920830726624, "learning_rate": 1.9758974189875843e-05, "loss": 0.4588, "step": 3772 }, { "epoch": 0.07006456930897101, "grad_norm": 0.3935430347919464, "learning_rate": 1.975871956050522e-05, "loss": 0.3704, "step": 3774 }, { "epoch": 0.07010169944638965, "grad_norm": 0.2728888690471649, "learning_rate": 1.9758464798347657e-05, "loss": 0.3344, "step": 3776 }, { "epoch": 0.0701388295838083, "grad_norm": 0.2200375199317932, "learning_rate": 1.9758209903406616e-05, "loss": 0.1535, "step": 3778 }, { "epoch": 0.07017595972122692, "grad_norm": 0.4148258864879608, "learning_rate": 1.975795487568557e-05, "loss": 0.3065, "step": 3780 }, { "epoch": 0.07021308985864556, "grad_norm": 0.37440967559814453, "learning_rate": 1.9757699715187986e-05, "loss": 0.5939, "step": 3782 }, { "epoch": 0.0702502199960642, "grad_norm": 0.29972875118255615, "learning_rate": 1.9757444421917335e-05, "loss": 0.4825, "step": 3784 }, { "epoch": 0.07028735013348285, "grad_norm": 0.3693486452102661, "learning_rate": 1.975718899587709e-05, "loss": 0.4411, "step": 3786 }, { "epoch": 0.07032448027090148, "grad_norm": 0.2998223304748535, "learning_rate": 1.9756933437070733e-05, "loss": 0.3265, "step": 3788 }, { "epoch": 0.07036161040832012, "grad_norm": 0.49485644698143005, "learning_rate": 1.9756677745501734e-05, "loss": 0.5322, "step": 3790 }, { "epoch": 0.07039874054573876, "grad_norm": 0.3595532774925232, "learning_rate": 1.9756421921173582e-05, "loss": 0.5159, "step": 3792 }, { "epoch": 0.0704358706831574, "grad_norm": 0.3375842273235321, "learning_rate": 1.9756165964089746e-05, "loss": 0.5149, "step": 3794 }, { "epoch": 0.07047300082057603, "grad_norm": 0.3298659324645996, "learning_rate": 1.9755909874253716e-05, "loss": 0.2955, "step": 3796 }, { "epoch": 0.07051013095799467, "grad_norm": 0.4905138909816742, "learning_rate": 1.9755653651668975e-05, "loss": 0.2938, "step": 3798 }, { "epoch": 0.07054726109541332, "grad_norm": 0.32392483949661255, "learning_rate": 1.975539729633901e-05, "loss": 0.3268, "step": 3800 }, { "epoch": 0.07058439123283196, "grad_norm": 0.33975857496261597, "learning_rate": 1.9755140808267305e-05, "loss": 0.461, "step": 3802 }, { "epoch": 0.07062152137025059, "grad_norm": 0.301897257566452, "learning_rate": 1.9754884187457355e-05, "loss": 0.2697, "step": 3804 }, { "epoch": 0.07065865150766923, "grad_norm": 0.3485341966152191, "learning_rate": 1.9754627433912654e-05, "loss": 0.5747, "step": 3806 }, { "epoch": 0.07069578164508787, "grad_norm": 0.33531078696250916, "learning_rate": 1.9754370547636687e-05, "loss": 0.5278, "step": 3808 }, { "epoch": 0.07073291178250651, "grad_norm": 0.26730644702911377, "learning_rate": 1.9754113528632957e-05, "loss": 0.3198, "step": 3810 }, { "epoch": 0.07077004191992514, "grad_norm": 0.3827107548713684, "learning_rate": 1.9753856376904962e-05, "loss": 0.4505, "step": 3812 }, { "epoch": 0.07080717205734378, "grad_norm": 0.2702629268169403, "learning_rate": 1.9753599092456195e-05, "loss": 0.2852, "step": 3814 }, { "epoch": 0.07084430219476243, "grad_norm": 0.4730794429779053, "learning_rate": 1.9753341675290158e-05, "loss": 0.4951, "step": 3816 }, { "epoch": 0.07088143233218105, "grad_norm": 0.2305094450712204, "learning_rate": 1.975308412541036e-05, "loss": 0.4, "step": 3818 }, { "epoch": 0.0709185624695997, "grad_norm": 0.2829965651035309, "learning_rate": 1.9752826442820296e-05, "loss": 0.3778, "step": 3820 }, { "epoch": 0.07095569260701834, "grad_norm": 0.2873823940753937, "learning_rate": 1.975256862752348e-05, "loss": 0.3472, "step": 3822 }, { "epoch": 0.07099282274443698, "grad_norm": 0.23630329966545105, "learning_rate": 1.9752310679523418e-05, "loss": 0.2138, "step": 3824 }, { "epoch": 0.07102995288185561, "grad_norm": 0.34078922867774963, "learning_rate": 1.9752052598823615e-05, "loss": 0.3072, "step": 3826 }, { "epoch": 0.07106708301927425, "grad_norm": 0.24479207396507263, "learning_rate": 1.975179438542759e-05, "loss": 0.2948, "step": 3828 }, { "epoch": 0.0711042131566929, "grad_norm": 0.6064154505729675, "learning_rate": 1.9751536039338854e-05, "loss": 0.3746, "step": 3830 }, { "epoch": 0.07114134329411154, "grad_norm": 0.9282618761062622, "learning_rate": 1.975127756056092e-05, "loss": 0.411, "step": 3832 }, { "epoch": 0.07117847343153016, "grad_norm": 0.3278948962688446, "learning_rate": 1.975101894909731e-05, "loss": 0.587, "step": 3834 }, { "epoch": 0.07121560356894881, "grad_norm": 0.38723716139793396, "learning_rate": 1.975076020495154e-05, "loss": 0.2335, "step": 3836 }, { "epoch": 0.07125273370636745, "grad_norm": 0.3710725009441376, "learning_rate": 1.9750501328127126e-05, "loss": 0.328, "step": 3838 }, { "epoch": 0.07128986384378609, "grad_norm": 0.2876928150653839, "learning_rate": 1.9750242318627595e-05, "loss": 0.1937, "step": 3840 }, { "epoch": 0.07132699398120472, "grad_norm": 0.416787713766098, "learning_rate": 1.9749983176456474e-05, "loss": 0.2841, "step": 3842 }, { "epoch": 0.07136412411862336, "grad_norm": 0.2601301372051239, "learning_rate": 1.9749723901617286e-05, "loss": 0.3131, "step": 3844 }, { "epoch": 0.071401254256042, "grad_norm": 0.42258259654045105, "learning_rate": 1.974946449411356e-05, "loss": 0.563, "step": 3846 }, { "epoch": 0.07143838439346065, "grad_norm": 0.42055875062942505, "learning_rate": 1.9749204953948825e-05, "loss": 0.4556, "step": 3848 }, { "epoch": 0.07147551453087927, "grad_norm": 0.3523327708244324, "learning_rate": 1.9748945281126613e-05, "loss": 0.4599, "step": 3850 }, { "epoch": 0.07151264466829792, "grad_norm": 0.24449065327644348, "learning_rate": 1.974868547565046e-05, "loss": 0.308, "step": 3852 }, { "epoch": 0.07154977480571656, "grad_norm": 0.5089482069015503, "learning_rate": 1.9748425537523894e-05, "loss": 0.2091, "step": 3854 }, { "epoch": 0.07158690494313519, "grad_norm": 0.3861032724380493, "learning_rate": 1.9748165466750454e-05, "loss": 0.3799, "step": 3856 }, { "epoch": 0.07162403508055383, "grad_norm": 0.4229990243911743, "learning_rate": 1.9747905263333688e-05, "loss": 0.3585, "step": 3858 }, { "epoch": 0.07166116521797247, "grad_norm": 0.3257265090942383, "learning_rate": 1.9747644927277126e-05, "loss": 0.3603, "step": 3860 }, { "epoch": 0.07169829535539111, "grad_norm": 0.31086355447769165, "learning_rate": 1.974738445858431e-05, "loss": 0.3673, "step": 3862 }, { "epoch": 0.07173542549280974, "grad_norm": 0.28723418712615967, "learning_rate": 1.9747123857258795e-05, "loss": 0.3488, "step": 3864 }, { "epoch": 0.07177255563022839, "grad_norm": 0.24640463292598724, "learning_rate": 1.974686312330412e-05, "loss": 0.318, "step": 3866 }, { "epoch": 0.07180968576764703, "grad_norm": 0.417511910200119, "learning_rate": 1.974660225672383e-05, "loss": 0.4271, "step": 3868 }, { "epoch": 0.07184681590506567, "grad_norm": 0.2622246742248535, "learning_rate": 1.974634125752148e-05, "loss": 0.4102, "step": 3870 }, { "epoch": 0.0718839460424843, "grad_norm": 0.2264738529920578, "learning_rate": 1.9746080125700617e-05, "loss": 0.3768, "step": 3872 }, { "epoch": 0.07192107617990294, "grad_norm": 0.31941741704940796, "learning_rate": 1.9745818861264797e-05, "loss": 0.3741, "step": 3874 }, { "epoch": 0.07195820631732158, "grad_norm": 0.3227205276489258, "learning_rate": 1.9745557464217574e-05, "loss": 0.3722, "step": 3876 }, { "epoch": 0.07199533645474022, "grad_norm": 0.35105079412460327, "learning_rate": 1.9745295934562508e-05, "loss": 0.4296, "step": 3878 }, { "epoch": 0.07203246659215885, "grad_norm": 0.45736071467399597, "learning_rate": 1.974503427230315e-05, "loss": 0.57, "step": 3880 }, { "epoch": 0.0720695967295775, "grad_norm": 0.2562812268733978, "learning_rate": 1.974477247744307e-05, "loss": 0.4084, "step": 3882 }, { "epoch": 0.07210672686699614, "grad_norm": 0.29160887002944946, "learning_rate": 1.9744510549985826e-05, "loss": 0.3286, "step": 3884 }, { "epoch": 0.07214385700441478, "grad_norm": 0.2824247181415558, "learning_rate": 1.9744248489934978e-05, "loss": 0.5074, "step": 3886 }, { "epoch": 0.07218098714183341, "grad_norm": 0.48832982778549194, "learning_rate": 1.9743986297294098e-05, "loss": 0.2862, "step": 3888 }, { "epoch": 0.07221811727925205, "grad_norm": 0.4221515655517578, "learning_rate": 1.9743723972066752e-05, "loss": 0.3733, "step": 3890 }, { "epoch": 0.07225524741667069, "grad_norm": 0.3622238039970398, "learning_rate": 1.9743461514256506e-05, "loss": 0.2423, "step": 3892 }, { "epoch": 0.07229237755408932, "grad_norm": 0.35267168283462524, "learning_rate": 1.9743198923866935e-05, "loss": 0.4312, "step": 3894 }, { "epoch": 0.07232950769150796, "grad_norm": 0.2510712146759033, "learning_rate": 1.974293620090161e-05, "loss": 0.2027, "step": 3896 }, { "epoch": 0.0723666378289266, "grad_norm": 0.2505778968334198, "learning_rate": 1.9742673345364104e-05, "loss": 0.4079, "step": 3898 }, { "epoch": 0.07240376796634525, "grad_norm": 0.4601978659629822, "learning_rate": 1.9742410357258002e-05, "loss": 0.3597, "step": 3900 }, { "epoch": 0.07244089810376388, "grad_norm": 0.276941180229187, "learning_rate": 1.9742147236586874e-05, "loss": 0.2981, "step": 3902 }, { "epoch": 0.07247802824118252, "grad_norm": 0.3207315504550934, "learning_rate": 1.9741883983354303e-05, "loss": 0.3289, "step": 3904 }, { "epoch": 0.07251515837860116, "grad_norm": 0.4371922016143799, "learning_rate": 1.9741620597563874e-05, "loss": 0.3835, "step": 3906 }, { "epoch": 0.0725522885160198, "grad_norm": 0.3222271502017975, "learning_rate": 1.9741357079219166e-05, "loss": 0.3112, "step": 3908 }, { "epoch": 0.07258941865343843, "grad_norm": 0.32740986347198486, "learning_rate": 1.974109342832377e-05, "loss": 0.2781, "step": 3910 }, { "epoch": 0.07262654879085707, "grad_norm": 0.2803072929382324, "learning_rate": 1.9740829644881265e-05, "loss": 0.26, "step": 3912 }, { "epoch": 0.07266367892827572, "grad_norm": 0.33803749084472656, "learning_rate": 1.974056572889525e-05, "loss": 0.456, "step": 3914 }, { "epoch": 0.07270080906569436, "grad_norm": 0.3652576506137848, "learning_rate": 1.9740301680369305e-05, "loss": 0.3352, "step": 3916 }, { "epoch": 0.07273793920311299, "grad_norm": 0.3471367657184601, "learning_rate": 1.974003749930704e-05, "loss": 0.4407, "step": 3918 }, { "epoch": 0.07277506934053163, "grad_norm": 0.37709423899650574, "learning_rate": 1.973977318571203e-05, "loss": 0.4074, "step": 3920 }, { "epoch": 0.07281219947795027, "grad_norm": 0.2453843206167221, "learning_rate": 1.973950873958789e-05, "loss": 0.2616, "step": 3922 }, { "epoch": 0.07284932961536891, "grad_norm": 0.41919073462486267, "learning_rate": 1.97392441609382e-05, "loss": 0.2783, "step": 3924 }, { "epoch": 0.07288645975278754, "grad_norm": 0.26294803619384766, "learning_rate": 1.9738979449766575e-05, "loss": 0.2955, "step": 3926 }, { "epoch": 0.07292358989020618, "grad_norm": 0.24029801785945892, "learning_rate": 1.973871460607661e-05, "loss": 0.1671, "step": 3928 }, { "epoch": 0.07296072002762483, "grad_norm": 0.3198600113391876, "learning_rate": 1.9738449629871912e-05, "loss": 0.5012, "step": 3930 }, { "epoch": 0.07299785016504345, "grad_norm": 0.3556744158267975, "learning_rate": 1.9738184521156082e-05, "loss": 0.3531, "step": 3932 }, { "epoch": 0.0730349803024621, "grad_norm": 0.3938334882259369, "learning_rate": 1.9737919279932732e-05, "loss": 0.4564, "step": 3934 }, { "epoch": 0.07307211043988074, "grad_norm": 0.3393716812133789, "learning_rate": 1.9737653906205467e-05, "loss": 0.4631, "step": 3936 }, { "epoch": 0.07310924057729938, "grad_norm": 0.2686874270439148, "learning_rate": 1.97373883999779e-05, "loss": 0.2978, "step": 3938 }, { "epoch": 0.07314637071471801, "grad_norm": 0.38741400837898254, "learning_rate": 1.9737122761253644e-05, "loss": 0.3423, "step": 3940 }, { "epoch": 0.07318350085213665, "grad_norm": 0.39600151777267456, "learning_rate": 1.9736856990036314e-05, "loss": 0.2621, "step": 3942 }, { "epoch": 0.0732206309895553, "grad_norm": 0.4025113880634308, "learning_rate": 1.9736591086329526e-05, "loss": 0.5492, "step": 3944 }, { "epoch": 0.07325776112697394, "grad_norm": 0.4818654954433441, "learning_rate": 1.9736325050136898e-05, "loss": 0.3101, "step": 3946 }, { "epoch": 0.07329489126439256, "grad_norm": 0.2225620150566101, "learning_rate": 1.973605888146205e-05, "loss": 0.3906, "step": 3948 }, { "epoch": 0.0733320214018112, "grad_norm": 0.32570746541023254, "learning_rate": 1.9735792580308598e-05, "loss": 0.3475, "step": 3950 }, { "epoch": 0.07336915153922985, "grad_norm": 0.2975451648235321, "learning_rate": 1.9735526146680176e-05, "loss": 0.4291, "step": 3952 }, { "epoch": 0.07340628167664849, "grad_norm": 0.3218895196914673, "learning_rate": 1.9735259580580406e-05, "loss": 0.3967, "step": 3954 }, { "epoch": 0.07344341181406712, "grad_norm": 0.4532105028629303, "learning_rate": 1.9734992882012912e-05, "loss": 0.4763, "step": 3956 }, { "epoch": 0.07348054195148576, "grad_norm": 0.3328412175178528, "learning_rate": 1.9734726050981324e-05, "loss": 0.2871, "step": 3958 }, { "epoch": 0.0735176720889044, "grad_norm": 0.41240718960762024, "learning_rate": 1.9734459087489274e-05, "loss": 0.307, "step": 3960 }, { "epoch": 0.07355480222632305, "grad_norm": 0.43801966309547424, "learning_rate": 1.973419199154039e-05, "loss": 0.2506, "step": 3962 }, { "epoch": 0.07359193236374167, "grad_norm": 0.2605685293674469, "learning_rate": 1.9733924763138315e-05, "loss": 0.3686, "step": 3964 }, { "epoch": 0.07362906250116032, "grad_norm": 0.3927134573459625, "learning_rate": 1.973365740228668e-05, "loss": 0.1737, "step": 3966 }, { "epoch": 0.07366619263857896, "grad_norm": 0.29191920161247253, "learning_rate": 1.973338990898912e-05, "loss": 0.4449, "step": 3968 }, { "epoch": 0.07370332277599759, "grad_norm": 0.29910171031951904, "learning_rate": 1.973312228324928e-05, "loss": 0.45, "step": 3970 }, { "epoch": 0.07374045291341623, "grad_norm": 0.43258580565452576, "learning_rate": 1.97328545250708e-05, "loss": 0.2607, "step": 3972 }, { "epoch": 0.07377758305083487, "grad_norm": 0.2679401934146881, "learning_rate": 1.9732586634457327e-05, "loss": 0.225, "step": 3974 }, { "epoch": 0.07381471318825351, "grad_norm": 0.4128734767436981, "learning_rate": 1.9732318611412498e-05, "loss": 0.3121, "step": 3976 }, { "epoch": 0.07385184332567214, "grad_norm": 0.33572301268577576, "learning_rate": 1.9732050455939963e-05, "loss": 0.2784, "step": 3978 }, { "epoch": 0.07388897346309078, "grad_norm": 0.4437161684036255, "learning_rate": 1.9731782168043375e-05, "loss": 0.1874, "step": 3980 }, { "epoch": 0.07392610360050943, "grad_norm": 0.40449434518814087, "learning_rate": 1.9731513747726382e-05, "loss": 0.3478, "step": 3982 }, { "epoch": 0.07396323373792807, "grad_norm": 0.5866910815238953, "learning_rate": 1.9731245194992637e-05, "loss": 0.5451, "step": 3984 }, { "epoch": 0.0740003638753467, "grad_norm": 0.41867196559906006, "learning_rate": 1.9730976509845793e-05, "loss": 0.2274, "step": 3986 }, { "epoch": 0.07403749401276534, "grad_norm": 0.2776102125644684, "learning_rate": 1.9730707692289504e-05, "loss": 0.4524, "step": 3988 }, { "epoch": 0.07407462415018398, "grad_norm": 0.38838690519332886, "learning_rate": 1.9730438742327428e-05, "loss": 0.301, "step": 3990 }, { "epoch": 0.07411175428760262, "grad_norm": 0.36092332005500793, "learning_rate": 1.9730169659963235e-05, "loss": 0.3999, "step": 3992 }, { "epoch": 0.07414888442502125, "grad_norm": 0.29829004406929016, "learning_rate": 1.972990044520057e-05, "loss": 0.4005, "step": 3994 }, { "epoch": 0.0741860145624399, "grad_norm": 0.23319177329540253, "learning_rate": 1.9729631098043108e-05, "loss": 0.3205, "step": 3996 }, { "epoch": 0.07422314469985854, "grad_norm": 0.2570216953754425, "learning_rate": 1.972936161849451e-05, "loss": 0.5175, "step": 3998 }, { "epoch": 0.07426027483727718, "grad_norm": 0.2947014570236206, "learning_rate": 1.9729092006558443e-05, "loss": 0.3778, "step": 4000 }, { "epoch": 0.07429740497469581, "grad_norm": 0.25722771883010864, "learning_rate": 1.9728822262238575e-05, "loss": 0.4128, "step": 4002 }, { "epoch": 0.07433453511211445, "grad_norm": 0.45795461535453796, "learning_rate": 1.972855238553858e-05, "loss": 0.2863, "step": 4004 }, { "epoch": 0.07437166524953309, "grad_norm": 0.41443169116973877, "learning_rate": 1.9728282376462126e-05, "loss": 0.2423, "step": 4006 }, { "epoch": 0.07440879538695172, "grad_norm": 0.34766697883605957, "learning_rate": 1.9728012235012887e-05, "loss": 0.2741, "step": 4008 }, { "epoch": 0.07444592552437036, "grad_norm": 0.33034998178482056, "learning_rate": 1.972774196119454e-05, "loss": 0.3662, "step": 4010 }, { "epoch": 0.074483055661789, "grad_norm": 0.26384037733078003, "learning_rate": 1.9727471555010767e-05, "loss": 0.189, "step": 4012 }, { "epoch": 0.07452018579920765, "grad_norm": 0.38193410634994507, "learning_rate": 1.972720101646524e-05, "loss": 0.2962, "step": 4014 }, { "epoch": 0.07455731593662628, "grad_norm": 0.45072677731513977, "learning_rate": 1.9726930345561644e-05, "loss": 0.2836, "step": 4016 }, { "epoch": 0.07459444607404492, "grad_norm": 0.3227638006210327, "learning_rate": 1.9726659542303663e-05, "loss": 0.4306, "step": 4018 }, { "epoch": 0.07463157621146356, "grad_norm": 0.25836917757987976, "learning_rate": 1.9726388606694974e-05, "loss": 0.1992, "step": 4020 }, { "epoch": 0.0746687063488822, "grad_norm": 0.3983252942562103, "learning_rate": 1.9726117538739274e-05, "loss": 0.3106, "step": 4022 }, { "epoch": 0.07470583648630083, "grad_norm": 0.4189784824848175, "learning_rate": 1.972584633844025e-05, "loss": 0.4363, "step": 4024 }, { "epoch": 0.07474296662371947, "grad_norm": 0.41066834330558777, "learning_rate": 1.972557500580159e-05, "loss": 0.4359, "step": 4026 }, { "epoch": 0.07478009676113812, "grad_norm": 0.4322146773338318, "learning_rate": 1.972530354082698e-05, "loss": 0.4167, "step": 4028 }, { "epoch": 0.07481722689855676, "grad_norm": 0.37955912947654724, "learning_rate": 1.9725031943520127e-05, "loss": 0.3324, "step": 4030 }, { "epoch": 0.07485435703597539, "grad_norm": 0.3345361649990082, "learning_rate": 1.972476021388471e-05, "loss": 0.3535, "step": 4032 }, { "epoch": 0.07489148717339403, "grad_norm": 0.2872040569782257, "learning_rate": 1.9724488351924444e-05, "loss": 0.1472, "step": 4034 }, { "epoch": 0.07492861731081267, "grad_norm": 0.4636293649673462, "learning_rate": 1.9724216357643017e-05, "loss": 0.4432, "step": 4036 }, { "epoch": 0.07496574744823131, "grad_norm": 0.4114382266998291, "learning_rate": 1.972394423104413e-05, "loss": 0.5859, "step": 4038 }, { "epoch": 0.07500287758564994, "grad_norm": 0.3368780016899109, "learning_rate": 1.972367197213149e-05, "loss": 0.3176, "step": 4040 }, { "epoch": 0.07504000772306858, "grad_norm": 0.3746050298213959, "learning_rate": 1.97233995809088e-05, "loss": 0.1935, "step": 4042 }, { "epoch": 0.07507713786048723, "grad_norm": 0.3974166810512543, "learning_rate": 1.972312705737977e-05, "loss": 0.5018, "step": 4044 }, { "epoch": 0.07511426799790585, "grad_norm": 0.28588545322418213, "learning_rate": 1.97228544015481e-05, "loss": 0.1791, "step": 4046 }, { "epoch": 0.0751513981353245, "grad_norm": 0.3535286486148834, "learning_rate": 1.9722581613417508e-05, "loss": 0.3538, "step": 4048 }, { "epoch": 0.07518852827274314, "grad_norm": 0.31911003589630127, "learning_rate": 1.9722308692991703e-05, "loss": 0.3646, "step": 4050 }, { "epoch": 0.07522565841016178, "grad_norm": 0.4504331946372986, "learning_rate": 1.9722035640274395e-05, "loss": 0.2916, "step": 4052 }, { "epoch": 0.07526278854758041, "grad_norm": 0.275995135307312, "learning_rate": 1.9721762455269307e-05, "loss": 0.2143, "step": 4054 }, { "epoch": 0.07529991868499905, "grad_norm": 0.34938135743141174, "learning_rate": 1.972148913798015e-05, "loss": 0.2619, "step": 4056 }, { "epoch": 0.0753370488224177, "grad_norm": 0.2995147109031677, "learning_rate": 1.9721215688410647e-05, "loss": 0.4379, "step": 4058 }, { "epoch": 0.07537417895983634, "grad_norm": 0.325248658657074, "learning_rate": 1.9720942106564515e-05, "loss": 0.3159, "step": 4060 }, { "epoch": 0.07541130909725496, "grad_norm": 0.4562129080295563, "learning_rate": 1.9720668392445477e-05, "loss": 0.3147, "step": 4062 }, { "epoch": 0.0754484392346736, "grad_norm": 0.3024131655693054, "learning_rate": 1.972039454605726e-05, "loss": 0.2656, "step": 4064 }, { "epoch": 0.07548556937209225, "grad_norm": 0.44684678316116333, "learning_rate": 1.9720120567403593e-05, "loss": 0.2539, "step": 4066 }, { "epoch": 0.07552269950951089, "grad_norm": 0.38811197876930237, "learning_rate": 1.9719846456488193e-05, "loss": 0.4876, "step": 4068 }, { "epoch": 0.07555982964692952, "grad_norm": 0.30226844549179077, "learning_rate": 1.9719572213314802e-05, "loss": 0.1709, "step": 4070 }, { "epoch": 0.07559695978434816, "grad_norm": 0.3021453022956848, "learning_rate": 1.9719297837887147e-05, "loss": 0.2621, "step": 4072 }, { "epoch": 0.0756340899217668, "grad_norm": 0.3178151249885559, "learning_rate": 1.971902333020896e-05, "loss": 0.3794, "step": 4074 }, { "epoch": 0.07567122005918545, "grad_norm": 0.2373242825269699, "learning_rate": 1.9718748690283974e-05, "loss": 0.3639, "step": 4076 }, { "epoch": 0.07570835019660407, "grad_norm": 0.34492459893226624, "learning_rate": 1.9718473918115936e-05, "loss": 0.444, "step": 4078 }, { "epoch": 0.07574548033402272, "grad_norm": 0.3752194941043854, "learning_rate": 1.9718199013708572e-05, "loss": 0.3515, "step": 4080 }, { "epoch": 0.07578261047144136, "grad_norm": 0.37196120619773865, "learning_rate": 1.9717923977065633e-05, "loss": 0.4438, "step": 4082 }, { "epoch": 0.07581974060885999, "grad_norm": 0.24771851301193237, "learning_rate": 1.9717648808190856e-05, "loss": 0.2264, "step": 4084 }, { "epoch": 0.07585687074627863, "grad_norm": 0.4009481370449066, "learning_rate": 1.9717373507087988e-05, "loss": 0.4171, "step": 4086 }, { "epoch": 0.07589400088369727, "grad_norm": 0.32479986548423767, "learning_rate": 1.971709807376077e-05, "loss": 0.2447, "step": 4088 }, { "epoch": 0.07593113102111591, "grad_norm": 0.3384368419647217, "learning_rate": 1.9716822508212952e-05, "loss": 0.4226, "step": 4090 }, { "epoch": 0.07596826115853454, "grad_norm": 0.36190101504325867, "learning_rate": 1.9716546810448292e-05, "loss": 0.2607, "step": 4092 }, { "epoch": 0.07600539129595318, "grad_norm": 0.33590051531791687, "learning_rate": 1.9716270980470527e-05, "loss": 0.4025, "step": 4094 }, { "epoch": 0.07604252143337183, "grad_norm": 0.34180957078933716, "learning_rate": 1.971599501828342e-05, "loss": 0.3514, "step": 4096 }, { "epoch": 0.07607965157079047, "grad_norm": 0.27896565198898315, "learning_rate": 1.9715718923890726e-05, "loss": 0.2845, "step": 4098 }, { "epoch": 0.0761167817082091, "grad_norm": 0.4229014813899994, "learning_rate": 1.97154426972962e-05, "loss": 0.3526, "step": 4100 }, { "epoch": 0.07615391184562774, "grad_norm": 0.4036638140678406, "learning_rate": 1.9715166338503596e-05, "loss": 0.342, "step": 4102 }, { "epoch": 0.07619104198304638, "grad_norm": 0.24274615943431854, "learning_rate": 1.9714889847516677e-05, "loss": 0.2019, "step": 4104 }, { "epoch": 0.07622817212046502, "grad_norm": 0.3785533308982849, "learning_rate": 1.9714613224339212e-05, "loss": 0.3266, "step": 4106 }, { "epoch": 0.07626530225788365, "grad_norm": 0.3162519931793213, "learning_rate": 1.971433646897496e-05, "loss": 0.5281, "step": 4108 }, { "epoch": 0.0763024323953023, "grad_norm": 0.34028100967407227, "learning_rate": 1.9714059581427684e-05, "loss": 0.1673, "step": 4110 }, { "epoch": 0.07633956253272094, "grad_norm": 0.4366309940814972, "learning_rate": 1.9713782561701152e-05, "loss": 0.2767, "step": 4112 }, { "epoch": 0.07637669267013958, "grad_norm": 0.4714266359806061, "learning_rate": 1.9713505409799135e-05, "loss": 0.3825, "step": 4114 }, { "epoch": 0.07641382280755821, "grad_norm": 0.259251207113266, "learning_rate": 1.9713228125725408e-05, "loss": 0.3008, "step": 4116 }, { "epoch": 0.07645095294497685, "grad_norm": 0.3195223808288574, "learning_rate": 1.971295070948374e-05, "loss": 0.3289, "step": 4118 }, { "epoch": 0.07648808308239549, "grad_norm": 0.41382530331611633, "learning_rate": 1.971267316107791e-05, "loss": 0.4593, "step": 4120 }, { "epoch": 0.07652521321981412, "grad_norm": 0.272312730550766, "learning_rate": 1.9712395480511685e-05, "loss": 0.3231, "step": 4122 }, { "epoch": 0.07656234335723276, "grad_norm": 0.3412688672542572, "learning_rate": 1.9712117667788853e-05, "loss": 0.315, "step": 4124 }, { "epoch": 0.0765994734946514, "grad_norm": 0.35669073462486267, "learning_rate": 1.9711839722913188e-05, "loss": 0.3358, "step": 4126 }, { "epoch": 0.07663660363207005, "grad_norm": 0.3865607976913452, "learning_rate": 1.9711561645888477e-05, "loss": 0.423, "step": 4128 }, { "epoch": 0.07667373376948868, "grad_norm": 0.48304542899131775, "learning_rate": 1.97112834367185e-05, "loss": 0.3612, "step": 4130 }, { "epoch": 0.07671086390690732, "grad_norm": 0.3380900025367737, "learning_rate": 1.9711005095407045e-05, "loss": 0.4337, "step": 4132 }, { "epoch": 0.07674799404432596, "grad_norm": 0.45825114846229553, "learning_rate": 1.9710726621957898e-05, "loss": 0.4028, "step": 4134 }, { "epoch": 0.0767851241817446, "grad_norm": 0.33455803990364075, "learning_rate": 1.971044801637485e-05, "loss": 0.4201, "step": 4136 }, { "epoch": 0.07682225431916323, "grad_norm": 0.4597374498844147, "learning_rate": 1.971016927866169e-05, "loss": 0.2897, "step": 4138 }, { "epoch": 0.07685938445658187, "grad_norm": 0.31456583738327026, "learning_rate": 1.9709890408822212e-05, "loss": 0.3231, "step": 4140 }, { "epoch": 0.07689651459400051, "grad_norm": 0.31309837102890015, "learning_rate": 1.9709611406860206e-05, "loss": 0.206, "step": 4142 }, { "epoch": 0.07693364473141916, "grad_norm": 0.3602977693080902, "learning_rate": 1.970933227277948e-05, "loss": 0.427, "step": 4144 }, { "epoch": 0.07697077486883779, "grad_norm": 0.6244282126426697, "learning_rate": 1.9709053006583817e-05, "loss": 0.4821, "step": 4146 }, { "epoch": 0.07700790500625643, "grad_norm": 0.2669077515602112, "learning_rate": 1.970877360827703e-05, "loss": 0.2092, "step": 4148 }, { "epoch": 0.07704503514367507, "grad_norm": 0.6031359434127808, "learning_rate": 1.970849407786291e-05, "loss": 0.2872, "step": 4150 }, { "epoch": 0.07708216528109371, "grad_norm": 0.30174508690834045, "learning_rate": 1.970821441534527e-05, "loss": 0.4057, "step": 4152 }, { "epoch": 0.07711929541851234, "grad_norm": 0.2911519706249237, "learning_rate": 1.970793462072791e-05, "loss": 0.4206, "step": 4154 }, { "epoch": 0.07715642555593098, "grad_norm": 0.3112300634384155, "learning_rate": 1.9707654694014638e-05, "loss": 0.4081, "step": 4156 }, { "epoch": 0.07719355569334962, "grad_norm": 0.3850885331630707, "learning_rate": 1.9707374635209265e-05, "loss": 0.5078, "step": 4158 }, { "epoch": 0.07723068583076825, "grad_norm": 0.7911838889122009, "learning_rate": 1.97070944443156e-05, "loss": 0.2587, "step": 4160 }, { "epoch": 0.0772678159681869, "grad_norm": 0.35940849781036377, "learning_rate": 1.9706814121337454e-05, "loss": 0.2604, "step": 4162 }, { "epoch": 0.07730494610560554, "grad_norm": 0.2801651060581207, "learning_rate": 1.9706533666278646e-05, "loss": 0.3961, "step": 4164 }, { "epoch": 0.07734207624302418, "grad_norm": 0.17348363995552063, "learning_rate": 1.9706253079142987e-05, "loss": 0.235, "step": 4166 }, { "epoch": 0.07737920638044281, "grad_norm": 0.2846076786518097, "learning_rate": 1.9705972359934295e-05, "loss": 0.236, "step": 4168 }, { "epoch": 0.07741633651786145, "grad_norm": 0.21596375107765198, "learning_rate": 1.9705691508656396e-05, "loss": 0.3377, "step": 4170 }, { "epoch": 0.07745346665528009, "grad_norm": 0.27789488434791565, "learning_rate": 1.9705410525313103e-05, "loss": 0.4623, "step": 4172 }, { "epoch": 0.07749059679269873, "grad_norm": 0.3166808485984802, "learning_rate": 1.9705129409908247e-05, "loss": 0.2986, "step": 4174 }, { "epoch": 0.07752772693011736, "grad_norm": 0.366046279668808, "learning_rate": 1.9704848162445652e-05, "loss": 0.4917, "step": 4176 }, { "epoch": 0.077564857067536, "grad_norm": 0.3062710464000702, "learning_rate": 1.970456678292914e-05, "loss": 0.3284, "step": 4178 }, { "epoch": 0.07760198720495465, "grad_norm": 0.3104027509689331, "learning_rate": 1.9704285271362545e-05, "loss": 0.2021, "step": 4180 }, { "epoch": 0.07763911734237329, "grad_norm": 0.23581327497959137, "learning_rate": 1.970400362774969e-05, "loss": 0.4941, "step": 4182 }, { "epoch": 0.07767624747979192, "grad_norm": 0.2966723144054413, "learning_rate": 1.970372185209442e-05, "loss": 0.2486, "step": 4184 }, { "epoch": 0.07771337761721056, "grad_norm": 0.47806960344314575, "learning_rate": 1.9703439944400555e-05, "loss": 0.3719, "step": 4186 }, { "epoch": 0.0777505077546292, "grad_norm": 0.33518242835998535, "learning_rate": 1.970315790467194e-05, "loss": 0.3162, "step": 4188 }, { "epoch": 0.07778763789204785, "grad_norm": 0.5183612704277039, "learning_rate": 1.970287573291241e-05, "loss": 0.1895, "step": 4190 }, { "epoch": 0.07782476802946647, "grad_norm": 0.5089662075042725, "learning_rate": 1.9702593429125808e-05, "loss": 0.3401, "step": 4192 }, { "epoch": 0.07786189816688512, "grad_norm": 0.3420623540878296, "learning_rate": 1.9702310993315968e-05, "loss": 0.4496, "step": 4194 }, { "epoch": 0.07789902830430376, "grad_norm": 0.3703691363334656, "learning_rate": 1.9702028425486735e-05, "loss": 0.2048, "step": 4196 }, { "epoch": 0.07793615844172239, "grad_norm": 0.41426002979278564, "learning_rate": 1.9701745725641963e-05, "loss": 0.4109, "step": 4198 }, { "epoch": 0.07797328857914103, "grad_norm": 0.3361063003540039, "learning_rate": 1.9701462893785487e-05, "loss": 0.3102, "step": 4200 }, { "epoch": 0.07801041871655967, "grad_norm": 0.31623128056526184, "learning_rate": 1.970117992992116e-05, "loss": 0.5012, "step": 4202 }, { "epoch": 0.07804754885397831, "grad_norm": 0.2824561595916748, "learning_rate": 1.9700896834052834e-05, "loss": 0.352, "step": 4204 }, { "epoch": 0.07808467899139694, "grad_norm": 0.2287907749414444, "learning_rate": 1.970061360618436e-05, "loss": 0.279, "step": 4206 }, { "epoch": 0.07812180912881558, "grad_norm": 0.46598443388938904, "learning_rate": 1.9700330246319594e-05, "loss": 0.2332, "step": 4208 }, { "epoch": 0.07815893926623423, "grad_norm": 0.342803418636322, "learning_rate": 1.9700046754462384e-05, "loss": 0.2013, "step": 4210 }, { "epoch": 0.07819606940365287, "grad_norm": 0.38624659180641174, "learning_rate": 1.9699763130616593e-05, "loss": 0.4011, "step": 4212 }, { "epoch": 0.0782331995410715, "grad_norm": 0.3567831516265869, "learning_rate": 1.9699479374786085e-05, "loss": 0.2867, "step": 4214 }, { "epoch": 0.07827032967849014, "grad_norm": 0.3960648477077484, "learning_rate": 1.969919548697471e-05, "loss": 0.1516, "step": 4216 }, { "epoch": 0.07830745981590878, "grad_norm": 0.2812891900539398, "learning_rate": 1.969891146718634e-05, "loss": 0.3329, "step": 4218 }, { "epoch": 0.07834458995332742, "grad_norm": 0.3432716429233551, "learning_rate": 1.9698627315424836e-05, "loss": 0.2467, "step": 4220 }, { "epoch": 0.07838172009074605, "grad_norm": 0.29508450627326965, "learning_rate": 1.9698343031694067e-05, "loss": 0.3327, "step": 4222 }, { "epoch": 0.0784188502281647, "grad_norm": 0.39739924669265747, "learning_rate": 1.9698058615997896e-05, "loss": 0.3809, "step": 4224 }, { "epoch": 0.07845598036558334, "grad_norm": 0.3428349494934082, "learning_rate": 1.96977740683402e-05, "loss": 0.2245, "step": 4226 }, { "epoch": 0.07849311050300198, "grad_norm": 0.3312964141368866, "learning_rate": 1.9697489388724845e-05, "loss": 0.4426, "step": 4228 }, { "epoch": 0.0785302406404206, "grad_norm": 0.2318098098039627, "learning_rate": 1.9697204577155707e-05, "loss": 0.2867, "step": 4230 }, { "epoch": 0.07856737077783925, "grad_norm": 0.5615546703338623, "learning_rate": 1.969691963363666e-05, "loss": 0.4904, "step": 4232 }, { "epoch": 0.07860450091525789, "grad_norm": 0.3217709958553314, "learning_rate": 1.969663455817158e-05, "loss": 0.5586, "step": 4234 }, { "epoch": 0.07864163105267652, "grad_norm": 0.29852867126464844, "learning_rate": 1.9696349350764354e-05, "loss": 0.2817, "step": 4236 }, { "epoch": 0.07867876119009516, "grad_norm": 0.3760199248790741, "learning_rate": 1.9696064011418855e-05, "loss": 0.2395, "step": 4238 }, { "epoch": 0.0787158913275138, "grad_norm": 0.3407188951969147, "learning_rate": 1.9695778540138967e-05, "loss": 0.4005, "step": 4240 }, { "epoch": 0.07875302146493245, "grad_norm": 0.35757091641426086, "learning_rate": 1.9695492936928574e-05, "loss": 0.3033, "step": 4242 }, { "epoch": 0.07879015160235107, "grad_norm": 0.2942734360694885, "learning_rate": 1.969520720179157e-05, "loss": 0.1635, "step": 4244 }, { "epoch": 0.07882728173976972, "grad_norm": 0.4190855920314789, "learning_rate": 1.9694921334731833e-05, "loss": 0.3649, "step": 4246 }, { "epoch": 0.07886441187718836, "grad_norm": 0.33898401260375977, "learning_rate": 1.969463533575325e-05, "loss": 0.5075, "step": 4248 }, { "epoch": 0.078901542014607, "grad_norm": 0.4412969946861267, "learning_rate": 1.9694349204859726e-05, "loss": 0.4492, "step": 4250 }, { "epoch": 0.07893867215202563, "grad_norm": 0.3370778560638428, "learning_rate": 1.9694062942055144e-05, "loss": 0.3274, "step": 4252 }, { "epoch": 0.07897580228944427, "grad_norm": 0.34823164343833923, "learning_rate": 1.969377654734341e-05, "loss": 0.3578, "step": 4254 }, { "epoch": 0.07901293242686291, "grad_norm": 0.5024003386497498, "learning_rate": 1.9693490020728404e-05, "loss": 0.3701, "step": 4256 }, { "epoch": 0.07905006256428156, "grad_norm": 0.4456743597984314, "learning_rate": 1.969320336221404e-05, "loss": 0.2405, "step": 4258 }, { "epoch": 0.07908719270170018, "grad_norm": 0.3806392252445221, "learning_rate": 1.9692916571804207e-05, "loss": 0.275, "step": 4260 }, { "epoch": 0.07912432283911883, "grad_norm": 0.3193275034427643, "learning_rate": 1.9692629649502815e-05, "loss": 0.4136, "step": 4262 }, { "epoch": 0.07916145297653747, "grad_norm": 0.3430837094783783, "learning_rate": 1.9692342595313772e-05, "loss": 0.3763, "step": 4264 }, { "epoch": 0.07919858311395611, "grad_norm": 0.3324311673641205, "learning_rate": 1.9692055409240974e-05, "loss": 0.2553, "step": 4266 }, { "epoch": 0.07923571325137474, "grad_norm": 0.4314964711666107, "learning_rate": 1.969176809128833e-05, "loss": 0.2409, "step": 4268 }, { "epoch": 0.07927284338879338, "grad_norm": 0.2835943102836609, "learning_rate": 1.9691480641459753e-05, "loss": 0.2757, "step": 4270 }, { "epoch": 0.07930997352621202, "grad_norm": 0.36165377497673035, "learning_rate": 1.969119305975916e-05, "loss": 0.3112, "step": 4272 }, { "epoch": 0.07934710366363065, "grad_norm": 0.3235855996608734, "learning_rate": 1.9690905346190455e-05, "loss": 0.4472, "step": 4274 }, { "epoch": 0.0793842338010493, "grad_norm": 0.33653008937835693, "learning_rate": 1.9690617500757554e-05, "loss": 0.336, "step": 4276 }, { "epoch": 0.07942136393846794, "grad_norm": 0.3267183005809784, "learning_rate": 1.969032952346438e-05, "loss": 0.3028, "step": 4278 }, { "epoch": 0.07945849407588658, "grad_norm": 0.3111686706542969, "learning_rate": 1.969004141431484e-05, "loss": 0.4869, "step": 4280 }, { "epoch": 0.07949562421330521, "grad_norm": 0.3802286386489868, "learning_rate": 1.9689753173312865e-05, "loss": 0.3497, "step": 4282 }, { "epoch": 0.07953275435072385, "grad_norm": 0.31104668974876404, "learning_rate": 1.9689464800462374e-05, "loss": 0.4965, "step": 4284 }, { "epoch": 0.07956988448814249, "grad_norm": 0.33291587233543396, "learning_rate": 1.968917629576729e-05, "loss": 0.3282, "step": 4286 }, { "epoch": 0.07960701462556113, "grad_norm": 0.2918630838394165, "learning_rate": 1.9688887659231538e-05, "loss": 0.2416, "step": 4288 }, { "epoch": 0.07964414476297976, "grad_norm": 0.4246099293231964, "learning_rate": 1.9688598890859046e-05, "loss": 0.343, "step": 4290 }, { "epoch": 0.0796812749003984, "grad_norm": 0.39599692821502686, "learning_rate": 1.9688309990653747e-05, "loss": 0.2591, "step": 4292 }, { "epoch": 0.07971840503781705, "grad_norm": 0.27274757623672485, "learning_rate": 1.9688020958619567e-05, "loss": 0.2699, "step": 4294 }, { "epoch": 0.07975553517523569, "grad_norm": 0.3805229067802429, "learning_rate": 1.968773179476044e-05, "loss": 0.3804, "step": 4296 }, { "epoch": 0.07979266531265432, "grad_norm": 0.33901098370552063, "learning_rate": 1.9687442499080302e-05, "loss": 0.4679, "step": 4298 }, { "epoch": 0.07982979545007296, "grad_norm": 0.3738965392112732, "learning_rate": 1.968715307158309e-05, "loss": 0.3382, "step": 4300 }, { "epoch": 0.0798669255874916, "grad_norm": 0.3245031237602234, "learning_rate": 1.968686351227274e-05, "loss": 0.4692, "step": 4302 }, { "epoch": 0.07990405572491024, "grad_norm": 0.354218453168869, "learning_rate": 1.9686573821153196e-05, "loss": 0.327, "step": 4304 }, { "epoch": 0.07994118586232887, "grad_norm": 0.2955273985862732, "learning_rate": 1.968628399822839e-05, "loss": 0.4704, "step": 4306 }, { "epoch": 0.07997831599974752, "grad_norm": 0.32826635241508484, "learning_rate": 1.968599404350228e-05, "loss": 0.3188, "step": 4308 }, { "epoch": 0.08001544613716616, "grad_norm": 0.32851389050483704, "learning_rate": 1.9685703956978798e-05, "loss": 0.3172, "step": 4310 }, { "epoch": 0.08005257627458479, "grad_norm": 0.3576257526874542, "learning_rate": 1.9685413738661904e-05, "loss": 0.3116, "step": 4312 }, { "epoch": 0.08008970641200343, "grad_norm": 0.410132497549057, "learning_rate": 1.9685123388555533e-05, "loss": 0.2865, "step": 4314 }, { "epoch": 0.08012683654942207, "grad_norm": 0.438515305519104, "learning_rate": 1.9684832906663646e-05, "loss": 0.3851, "step": 4316 }, { "epoch": 0.08016396668684071, "grad_norm": 0.31009963154792786, "learning_rate": 1.9684542292990193e-05, "loss": 0.3517, "step": 4318 }, { "epoch": 0.08020109682425934, "grad_norm": 0.3820616602897644, "learning_rate": 1.968425154753913e-05, "loss": 0.3659, "step": 4320 }, { "epoch": 0.08023822696167798, "grad_norm": 0.3518998622894287, "learning_rate": 1.9683960670314407e-05, "loss": 0.4084, "step": 4322 }, { "epoch": 0.08027535709909663, "grad_norm": 0.43173128366470337, "learning_rate": 1.9683669661319985e-05, "loss": 0.4391, "step": 4324 }, { "epoch": 0.08031248723651527, "grad_norm": 0.28281405568122864, "learning_rate": 1.968337852055983e-05, "loss": 0.3639, "step": 4326 }, { "epoch": 0.0803496173739339, "grad_norm": 0.3975304663181305, "learning_rate": 1.9683087248037897e-05, "loss": 0.1999, "step": 4328 }, { "epoch": 0.08038674751135254, "grad_norm": 0.34033700823783875, "learning_rate": 1.968279584375815e-05, "loss": 0.2782, "step": 4330 }, { "epoch": 0.08042387764877118, "grad_norm": 0.5377874374389648, "learning_rate": 1.9682504307724553e-05, "loss": 0.3351, "step": 4332 }, { "epoch": 0.08046100778618982, "grad_norm": 0.25888651609420776, "learning_rate": 1.9682212639941078e-05, "loss": 0.1861, "step": 4334 }, { "epoch": 0.08049813792360845, "grad_norm": 0.4718436598777771, "learning_rate": 1.968192084041169e-05, "loss": 0.4838, "step": 4336 }, { "epoch": 0.0805352680610271, "grad_norm": 0.3890031576156616, "learning_rate": 1.968162890914036e-05, "loss": 0.4469, "step": 4338 }, { "epoch": 0.08057239819844574, "grad_norm": 0.3125271797180176, "learning_rate": 1.968133684613106e-05, "loss": 0.4054, "step": 4340 }, { "epoch": 0.08060952833586438, "grad_norm": 0.3753014802932739, "learning_rate": 1.968104465138776e-05, "loss": 0.4793, "step": 4342 }, { "epoch": 0.080646658473283, "grad_norm": 0.2874627709388733, "learning_rate": 1.9680752324914447e-05, "loss": 0.3491, "step": 4344 }, { "epoch": 0.08068378861070165, "grad_norm": 0.4077173173427582, "learning_rate": 1.968045986671509e-05, "loss": 0.3322, "step": 4346 }, { "epoch": 0.08072091874812029, "grad_norm": 0.2548542320728302, "learning_rate": 1.968016727679367e-05, "loss": 0.3945, "step": 4348 }, { "epoch": 0.08075804888553892, "grad_norm": 0.3074374496936798, "learning_rate": 1.967987455515417e-05, "loss": 0.3587, "step": 4350 }, { "epoch": 0.08079517902295756, "grad_norm": 0.3787061870098114, "learning_rate": 1.9679581701800568e-05, "loss": 0.3588, "step": 4352 }, { "epoch": 0.0808323091603762, "grad_norm": 0.3199010193347931, "learning_rate": 1.9679288716736854e-05, "loss": 0.2033, "step": 4354 }, { "epoch": 0.08086943929779485, "grad_norm": 0.3644576370716095, "learning_rate": 1.967899559996702e-05, "loss": 0.3484, "step": 4356 }, { "epoch": 0.08090656943521347, "grad_norm": 0.30899494886398315, "learning_rate": 1.967870235149504e-05, "loss": 0.3544, "step": 4358 }, { "epoch": 0.08094369957263212, "grad_norm": 0.409390389919281, "learning_rate": 1.9678408971324915e-05, "loss": 0.3397, "step": 4360 }, { "epoch": 0.08098082971005076, "grad_norm": 0.3568435609340668, "learning_rate": 1.9678115459460633e-05, "loss": 0.3103, "step": 4362 }, { "epoch": 0.0810179598474694, "grad_norm": 0.3840464651584625, "learning_rate": 1.967782181590619e-05, "loss": 0.4137, "step": 4364 }, { "epoch": 0.08105508998488803, "grad_norm": 0.47364285588264465, "learning_rate": 1.967752804066558e-05, "loss": 0.2413, "step": 4366 }, { "epoch": 0.08109222012230667, "grad_norm": 0.319794237613678, "learning_rate": 1.96772341337428e-05, "loss": 0.2091, "step": 4368 }, { "epoch": 0.08112935025972531, "grad_norm": 0.36961498856544495, "learning_rate": 1.9676940095141855e-05, "loss": 0.5444, "step": 4370 }, { "epoch": 0.08116648039714396, "grad_norm": 0.5251149535179138, "learning_rate": 1.9676645924866737e-05, "loss": 0.4311, "step": 4372 }, { "epoch": 0.08120361053456258, "grad_norm": 0.38542047142982483, "learning_rate": 1.9676351622921453e-05, "loss": 0.3008, "step": 4374 }, { "epoch": 0.08124074067198123, "grad_norm": 0.3419176936149597, "learning_rate": 1.967605718931001e-05, "loss": 0.5594, "step": 4376 }, { "epoch": 0.08127787080939987, "grad_norm": 0.4605537950992584, "learning_rate": 1.9675762624036408e-05, "loss": 0.2084, "step": 4378 }, { "epoch": 0.08131500094681851, "grad_norm": 0.30281922221183777, "learning_rate": 1.967546792710466e-05, "loss": 0.3386, "step": 4380 }, { "epoch": 0.08135213108423714, "grad_norm": 0.44911879301071167, "learning_rate": 1.9675173098518775e-05, "loss": 0.4053, "step": 4382 }, { "epoch": 0.08138926122165578, "grad_norm": 0.3575640022754669, "learning_rate": 1.9674878138282767e-05, "loss": 0.4251, "step": 4384 }, { "epoch": 0.08142639135907442, "grad_norm": 0.3299250304698944, "learning_rate": 1.9674583046400644e-05, "loss": 0.2951, "step": 4386 }, { "epoch": 0.08146352149649305, "grad_norm": 0.32510480284690857, "learning_rate": 1.9674287822876425e-05, "loss": 0.2401, "step": 4388 }, { "epoch": 0.0815006516339117, "grad_norm": 0.32736584544181824, "learning_rate": 1.9673992467714127e-05, "loss": 0.4181, "step": 4390 }, { "epoch": 0.08153778177133034, "grad_norm": 0.3797236382961273, "learning_rate": 1.967369698091777e-05, "loss": 0.1149, "step": 4392 }, { "epoch": 0.08157491190874898, "grad_norm": 0.2579886019229889, "learning_rate": 1.967340136249137e-05, "loss": 0.159, "step": 4394 }, { "epoch": 0.08161204204616761, "grad_norm": 0.18991221487522125, "learning_rate": 1.9673105612438956e-05, "loss": 0.2736, "step": 4396 }, { "epoch": 0.08164917218358625, "grad_norm": 0.34827739000320435, "learning_rate": 1.9672809730764547e-05, "loss": 0.3956, "step": 4398 }, { "epoch": 0.08168630232100489, "grad_norm": 0.25867408514022827, "learning_rate": 1.9672513717472174e-05, "loss": 0.4237, "step": 4400 }, { "epoch": 0.08172343245842353, "grad_norm": 0.4546580910682678, "learning_rate": 1.967221757256586e-05, "loss": 0.1816, "step": 4402 }, { "epoch": 0.08176056259584216, "grad_norm": 0.38588833808898926, "learning_rate": 1.9671921296049634e-05, "loss": 0.2671, "step": 4404 }, { "epoch": 0.0817976927332608, "grad_norm": 0.34596455097198486, "learning_rate": 1.967162488792753e-05, "loss": 0.2353, "step": 4406 }, { "epoch": 0.08183482287067945, "grad_norm": 0.3007392883300781, "learning_rate": 1.9671328348203585e-05, "loss": 0.3836, "step": 4408 }, { "epoch": 0.08187195300809809, "grad_norm": 0.29349493980407715, "learning_rate": 1.967103167688183e-05, "loss": 0.1965, "step": 4410 }, { "epoch": 0.08190908314551672, "grad_norm": 0.2955908477306366, "learning_rate": 1.96707348739663e-05, "loss": 0.4331, "step": 4412 }, { "epoch": 0.08194621328293536, "grad_norm": 0.4445996880531311, "learning_rate": 1.9670437939461032e-05, "loss": 0.5324, "step": 4414 }, { "epoch": 0.081983343420354, "grad_norm": 0.3887993097305298, "learning_rate": 1.9670140873370074e-05, "loss": 0.3722, "step": 4416 }, { "epoch": 0.08202047355777264, "grad_norm": 0.38987305760383606, "learning_rate": 1.9669843675697464e-05, "loss": 0.4525, "step": 4418 }, { "epoch": 0.08205760369519127, "grad_norm": 0.35607287287712097, "learning_rate": 1.9669546346447247e-05, "loss": 0.4329, "step": 4420 }, { "epoch": 0.08209473383260991, "grad_norm": 1.3302582502365112, "learning_rate": 1.9669248885623466e-05, "loss": 0.6361, "step": 4422 }, { "epoch": 0.08213186397002856, "grad_norm": 0.38887298107147217, "learning_rate": 1.966895129323017e-05, "loss": 0.2566, "step": 4424 }, { "epoch": 0.08216899410744719, "grad_norm": 0.33553558588027954, "learning_rate": 1.966865356927141e-05, "loss": 0.4759, "step": 4426 }, { "epoch": 0.08220612424486583, "grad_norm": 0.5017754435539246, "learning_rate": 1.9668355713751235e-05, "loss": 0.3574, "step": 4428 }, { "epoch": 0.08224325438228447, "grad_norm": 0.27615684270858765, "learning_rate": 1.96680577266737e-05, "loss": 0.6518, "step": 4430 }, { "epoch": 0.08228038451970311, "grad_norm": 0.4217571020126343, "learning_rate": 1.9667759608042858e-05, "loss": 0.4291, "step": 4432 }, { "epoch": 0.08231751465712174, "grad_norm": 0.32189127802848816, "learning_rate": 1.9667461357862768e-05, "loss": 0.5517, "step": 4434 }, { "epoch": 0.08235464479454038, "grad_norm": 0.3140683174133301, "learning_rate": 1.9667162976137485e-05, "loss": 0.375, "step": 4436 }, { "epoch": 0.08239177493195902, "grad_norm": 0.372673898935318, "learning_rate": 1.9666864462871066e-05, "loss": 0.3182, "step": 4438 }, { "epoch": 0.08242890506937767, "grad_norm": 0.39809122681617737, "learning_rate": 1.9666565818067585e-05, "loss": 0.2904, "step": 4440 }, { "epoch": 0.0824660352067963, "grad_norm": 0.3632083237171173, "learning_rate": 1.9666267041731092e-05, "loss": 0.3647, "step": 4442 }, { "epoch": 0.08250316534421494, "grad_norm": 0.37740546464920044, "learning_rate": 1.966596813386566e-05, "loss": 0.2874, "step": 4444 }, { "epoch": 0.08254029548163358, "grad_norm": 0.3099859356880188, "learning_rate": 1.9665669094475354e-05, "loss": 0.3626, "step": 4446 }, { "epoch": 0.08257742561905222, "grad_norm": 0.4261600375175476, "learning_rate": 1.966536992356425e-05, "loss": 0.4043, "step": 4448 }, { "epoch": 0.08261455575647085, "grad_norm": 0.33482927083969116, "learning_rate": 1.9665070621136403e-05, "loss": 0.2671, "step": 4450 }, { "epoch": 0.08265168589388949, "grad_norm": 0.32126525044441223, "learning_rate": 1.96647711871959e-05, "loss": 0.3348, "step": 4452 }, { "epoch": 0.08268881603130813, "grad_norm": 0.3109072744846344, "learning_rate": 1.966447162174681e-05, "loss": 0.2859, "step": 4454 }, { "epoch": 0.08272594616872678, "grad_norm": 0.37587040662765503, "learning_rate": 1.966417192479321e-05, "loss": 0.2943, "step": 4456 }, { "epoch": 0.0827630763061454, "grad_norm": 0.3386680483818054, "learning_rate": 1.9663872096339176e-05, "loss": 0.2397, "step": 4458 }, { "epoch": 0.08280020644356405, "grad_norm": 0.4219374656677246, "learning_rate": 1.966357213638879e-05, "loss": 0.4738, "step": 4460 }, { "epoch": 0.08283733658098269, "grad_norm": 0.4060429334640503, "learning_rate": 1.9663272044946135e-05, "loss": 0.3928, "step": 4462 }, { "epoch": 0.08287446671840132, "grad_norm": 0.5281952619552612, "learning_rate": 1.9662971822015292e-05, "loss": 0.2972, "step": 4464 }, { "epoch": 0.08291159685581996, "grad_norm": 0.27590611577033997, "learning_rate": 1.9662671467600342e-05, "loss": 0.3297, "step": 4466 }, { "epoch": 0.0829487269932386, "grad_norm": 0.3817589282989502, "learning_rate": 1.966237098170538e-05, "loss": 0.3604, "step": 4468 }, { "epoch": 0.08298585713065725, "grad_norm": 0.2598402202129364, "learning_rate": 1.9662070364334492e-05, "loss": 0.2951, "step": 4470 }, { "epoch": 0.08302298726807587, "grad_norm": 0.33865490555763245, "learning_rate": 1.9661769615491765e-05, "loss": 0.3759, "step": 4472 }, { "epoch": 0.08306011740549452, "grad_norm": 0.3624274730682373, "learning_rate": 1.96614687351813e-05, "loss": 0.2334, "step": 4474 }, { "epoch": 0.08309724754291316, "grad_norm": 0.33348944783210754, "learning_rate": 1.9661167723407178e-05, "loss": 0.4479, "step": 4476 }, { "epoch": 0.0831343776803318, "grad_norm": 0.3330913782119751, "learning_rate": 1.9660866580173503e-05, "loss": 0.5039, "step": 4478 }, { "epoch": 0.08317150781775043, "grad_norm": 0.37822842597961426, "learning_rate": 1.966056530548437e-05, "loss": 0.3913, "step": 4480 }, { "epoch": 0.08320863795516907, "grad_norm": 0.41285809874534607, "learning_rate": 1.9660263899343884e-05, "loss": 0.4721, "step": 4482 }, { "epoch": 0.08324576809258771, "grad_norm": 0.4298669695854187, "learning_rate": 1.965996236175614e-05, "loss": 0.5382, "step": 4484 }, { "epoch": 0.08328289823000636, "grad_norm": 0.2986083924770355, "learning_rate": 1.9659660692725243e-05, "loss": 0.5577, "step": 4486 }, { "epoch": 0.08332002836742498, "grad_norm": 0.3051527440547943, "learning_rate": 1.96593588922553e-05, "loss": 0.2837, "step": 4488 }, { "epoch": 0.08335715850484363, "grad_norm": 0.35509946942329407, "learning_rate": 1.9659056960350417e-05, "loss": 0.2567, "step": 4490 }, { "epoch": 0.08339428864226227, "grad_norm": 0.42667677998542786, "learning_rate": 1.9658754897014694e-05, "loss": 0.4816, "step": 4492 }, { "epoch": 0.08343141877968091, "grad_norm": 0.3686564564704895, "learning_rate": 1.9658452702252252e-05, "loss": 0.3361, "step": 4494 }, { "epoch": 0.08346854891709954, "grad_norm": 0.3154265880584717, "learning_rate": 1.9658150376067203e-05, "loss": 0.3437, "step": 4496 }, { "epoch": 0.08350567905451818, "grad_norm": 0.4197397232055664, "learning_rate": 1.9657847918463654e-05, "loss": 0.478, "step": 4498 }, { "epoch": 0.08354280919193682, "grad_norm": 0.32215428352355957, "learning_rate": 1.9657545329445722e-05, "loss": 0.4424, "step": 4500 }, { "epoch": 0.08357993932935545, "grad_norm": 0.44540244340896606, "learning_rate": 1.9657242609017526e-05, "loss": 0.3716, "step": 4502 }, { "epoch": 0.0836170694667741, "grad_norm": 0.4637117087841034, "learning_rate": 1.9656939757183187e-05, "loss": 0.2526, "step": 4504 }, { "epoch": 0.08365419960419274, "grad_norm": 0.28792962431907654, "learning_rate": 1.965663677394682e-05, "loss": 0.3533, "step": 4506 }, { "epoch": 0.08369132974161138, "grad_norm": 0.3542967140674591, "learning_rate": 1.9656333659312557e-05, "loss": 0.4268, "step": 4508 }, { "epoch": 0.08372845987903, "grad_norm": 0.3405669927597046, "learning_rate": 1.9656030413284514e-05, "loss": 0.4391, "step": 4510 }, { "epoch": 0.08376559001644865, "grad_norm": 0.5863144397735596, "learning_rate": 1.9655727035866817e-05, "loss": 0.2254, "step": 4512 }, { "epoch": 0.08380272015386729, "grad_norm": 0.36684104800224304, "learning_rate": 1.9655423527063603e-05, "loss": 0.4588, "step": 4514 }, { "epoch": 0.08383985029128593, "grad_norm": 0.32833239436149597, "learning_rate": 1.9655119886878993e-05, "loss": 0.4541, "step": 4516 }, { "epoch": 0.08387698042870456, "grad_norm": 0.34907475113868713, "learning_rate": 1.9654816115317123e-05, "loss": 0.3647, "step": 4518 }, { "epoch": 0.0839141105661232, "grad_norm": 0.33335158228874207, "learning_rate": 1.9654512212382123e-05, "loss": 0.2761, "step": 4520 }, { "epoch": 0.08395124070354185, "grad_norm": 0.2944607138633728, "learning_rate": 1.965420817807813e-05, "loss": 0.3219, "step": 4522 }, { "epoch": 0.08398837084096049, "grad_norm": 0.3625856637954712, "learning_rate": 1.9653904012409283e-05, "loss": 0.3374, "step": 4524 }, { "epoch": 0.08402550097837912, "grad_norm": 0.36513465642929077, "learning_rate": 1.9653599715379722e-05, "loss": 0.2837, "step": 4526 }, { "epoch": 0.08406263111579776, "grad_norm": 0.26217037439346313, "learning_rate": 1.9653295286993584e-05, "loss": 0.5084, "step": 4528 }, { "epoch": 0.0840997612532164, "grad_norm": 0.30461055040359497, "learning_rate": 1.9652990727255007e-05, "loss": 0.4252, "step": 4530 }, { "epoch": 0.08413689139063504, "grad_norm": 0.32422107458114624, "learning_rate": 1.9652686036168145e-05, "loss": 0.1812, "step": 4532 }, { "epoch": 0.08417402152805367, "grad_norm": 0.3543776273727417, "learning_rate": 1.9652381213737136e-05, "loss": 0.25, "step": 4534 }, { "epoch": 0.08421115166547231, "grad_norm": 0.44842255115509033, "learning_rate": 1.9652076259966133e-05, "loss": 0.4179, "step": 4536 }, { "epoch": 0.08424828180289096, "grad_norm": 0.39853161573410034, "learning_rate": 1.9651771174859284e-05, "loss": 0.3174, "step": 4538 }, { "epoch": 0.08428541194030958, "grad_norm": 0.35731151700019836, "learning_rate": 1.965146595842074e-05, "loss": 0.3676, "step": 4540 }, { "epoch": 0.08432254207772823, "grad_norm": 0.3116741478443146, "learning_rate": 1.9651160610654654e-05, "loss": 0.1742, "step": 4542 }, { "epoch": 0.08435967221514687, "grad_norm": 0.31342506408691406, "learning_rate": 1.9650855131565178e-05, "loss": 0.3839, "step": 4544 }, { "epoch": 0.08439680235256551, "grad_norm": 0.378753125667572, "learning_rate": 1.9650549521156474e-05, "loss": 0.4448, "step": 4546 }, { "epoch": 0.08443393248998414, "grad_norm": 0.33333152532577515, "learning_rate": 1.9650243779432692e-05, "loss": 0.443, "step": 4548 }, { "epoch": 0.08447106262740278, "grad_norm": 0.4335893392562866, "learning_rate": 1.9649937906398004e-05, "loss": 0.3738, "step": 4550 }, { "epoch": 0.08450819276482142, "grad_norm": 0.4551970958709717, "learning_rate": 1.9649631902056564e-05, "loss": 0.55, "step": 4552 }, { "epoch": 0.08454532290224007, "grad_norm": 0.27086567878723145, "learning_rate": 1.9649325766412538e-05, "loss": 0.5156, "step": 4554 }, { "epoch": 0.0845824530396587, "grad_norm": 0.31647568941116333, "learning_rate": 1.9649019499470094e-05, "loss": 0.3354, "step": 4556 }, { "epoch": 0.08461958317707734, "grad_norm": 0.3380351662635803, "learning_rate": 1.9648713101233393e-05, "loss": 0.3419, "step": 4558 }, { "epoch": 0.08465671331449598, "grad_norm": 0.39634308218955994, "learning_rate": 1.964840657170661e-05, "loss": 0.405, "step": 4560 }, { "epoch": 0.08469384345191462, "grad_norm": 0.29830124974250793, "learning_rate": 1.9648099910893915e-05, "loss": 0.2347, "step": 4562 }, { "epoch": 0.08473097358933325, "grad_norm": 0.27111971378326416, "learning_rate": 1.9647793118799474e-05, "loss": 0.3104, "step": 4564 }, { "epoch": 0.08476810372675189, "grad_norm": 0.3366991877555847, "learning_rate": 1.964748619542747e-05, "loss": 0.5462, "step": 4566 }, { "epoch": 0.08480523386417053, "grad_norm": 0.2871280014514923, "learning_rate": 1.964717914078208e-05, "loss": 0.4995, "step": 4568 }, { "epoch": 0.08484236400158918, "grad_norm": 0.4819769859313965, "learning_rate": 1.9646871954867476e-05, "loss": 0.3203, "step": 4570 }, { "epoch": 0.0848794941390078, "grad_norm": 0.40006667375564575, "learning_rate": 1.964656463768784e-05, "loss": 0.3776, "step": 4572 }, { "epoch": 0.08491662427642645, "grad_norm": 0.18466511368751526, "learning_rate": 1.9646257189247355e-05, "loss": 0.261, "step": 4574 }, { "epoch": 0.08495375441384509, "grad_norm": 0.5172551274299622, "learning_rate": 1.96459496095502e-05, "loss": 0.36, "step": 4576 }, { "epoch": 0.08499088455126372, "grad_norm": 0.3059728741645813, "learning_rate": 1.964564189860057e-05, "loss": 0.3953, "step": 4578 }, { "epoch": 0.08502801468868236, "grad_norm": 0.392494261264801, "learning_rate": 1.964533405640264e-05, "loss": 0.3833, "step": 4580 }, { "epoch": 0.085065144826101, "grad_norm": 0.43828439712524414, "learning_rate": 1.9645026082960606e-05, "loss": 0.2643, "step": 4582 }, { "epoch": 0.08510227496351964, "grad_norm": 0.40079665184020996, "learning_rate": 1.964471797827866e-05, "loss": 0.1689, "step": 4584 }, { "epoch": 0.08513940510093827, "grad_norm": 0.3955659866333008, "learning_rate": 1.9644409742360992e-05, "loss": 0.3265, "step": 4586 }, { "epoch": 0.08517653523835692, "grad_norm": 0.3505641520023346, "learning_rate": 1.9644101375211794e-05, "loss": 0.2854, "step": 4588 }, { "epoch": 0.08521366537577556, "grad_norm": 0.4161413609981537, "learning_rate": 1.964379287683526e-05, "loss": 0.2986, "step": 4590 }, { "epoch": 0.0852507955131942, "grad_norm": 0.3062976002693176, "learning_rate": 1.9643484247235597e-05, "loss": 0.3982, "step": 4592 }, { "epoch": 0.08528792565061283, "grad_norm": 0.29844093322753906, "learning_rate": 1.9643175486416998e-05, "loss": 0.4544, "step": 4594 }, { "epoch": 0.08532505578803147, "grad_norm": 0.29506438970565796, "learning_rate": 1.964286659438366e-05, "loss": 0.2951, "step": 4596 }, { "epoch": 0.08536218592545011, "grad_norm": 0.3231109380722046, "learning_rate": 1.9642557571139798e-05, "loss": 0.3056, "step": 4598 }, { "epoch": 0.08539931606286875, "grad_norm": 0.24248245358467102, "learning_rate": 1.964224841668961e-05, "loss": 0.381, "step": 4600 }, { "epoch": 0.08543644620028738, "grad_norm": 0.30829092860221863, "learning_rate": 1.96419391310373e-05, "loss": 0.4148, "step": 4602 }, { "epoch": 0.08547357633770603, "grad_norm": 0.6405683755874634, "learning_rate": 1.964162971418708e-05, "loss": 0.3978, "step": 4604 }, { "epoch": 0.08551070647512467, "grad_norm": 0.38810399174690247, "learning_rate": 1.9641320166143157e-05, "loss": 0.3795, "step": 4606 }, { "epoch": 0.08554783661254331, "grad_norm": 0.3913334608078003, "learning_rate": 1.964101048690975e-05, "loss": 0.2631, "step": 4608 }, { "epoch": 0.08558496674996194, "grad_norm": 0.27588650584220886, "learning_rate": 1.9640700676491066e-05, "loss": 0.3818, "step": 4610 }, { "epoch": 0.08562209688738058, "grad_norm": 0.21955938637256622, "learning_rate": 1.9640390734891323e-05, "loss": 0.2611, "step": 4612 }, { "epoch": 0.08565922702479922, "grad_norm": 0.35103267431259155, "learning_rate": 1.964008066211474e-05, "loss": 0.2142, "step": 4614 }, { "epoch": 0.08569635716221785, "grad_norm": 0.35324332118034363, "learning_rate": 1.9639770458165535e-05, "loss": 0.5041, "step": 4616 }, { "epoch": 0.0857334872996365, "grad_norm": 0.28122270107269287, "learning_rate": 1.9639460123047925e-05, "loss": 0.3081, "step": 4618 }, { "epoch": 0.08577061743705514, "grad_norm": 0.3309430480003357, "learning_rate": 1.963914965676614e-05, "loss": 0.4823, "step": 4620 }, { "epoch": 0.08580774757447378, "grad_norm": 0.3529426157474518, "learning_rate": 1.9638839059324398e-05, "loss": 0.6578, "step": 4622 }, { "epoch": 0.0858448777118924, "grad_norm": 0.5730276107788086, "learning_rate": 1.963852833072693e-05, "loss": 0.3286, "step": 4624 }, { "epoch": 0.08588200784931105, "grad_norm": 0.347434401512146, "learning_rate": 1.9638217470977963e-05, "loss": 0.4237, "step": 4626 }, { "epoch": 0.08591913798672969, "grad_norm": 0.3839362561702728, "learning_rate": 1.9637906480081727e-05, "loss": 0.3103, "step": 4628 }, { "epoch": 0.08595626812414833, "grad_norm": 0.5454792976379395, "learning_rate": 1.9637595358042446e-05, "loss": 0.3282, "step": 4630 }, { "epoch": 0.08599339826156696, "grad_norm": 0.28368109464645386, "learning_rate": 1.9637284104864368e-05, "loss": 0.4548, "step": 4632 }, { "epoch": 0.0860305283989856, "grad_norm": 0.3019779622554779, "learning_rate": 1.9636972720551716e-05, "loss": 0.4542, "step": 4634 }, { "epoch": 0.08606765853640425, "grad_norm": 0.29099613428115845, "learning_rate": 1.9636661205108728e-05, "loss": 0.4649, "step": 4636 }, { "epoch": 0.08610478867382289, "grad_norm": 0.4067786633968353, "learning_rate": 1.9636349558539652e-05, "loss": 0.4441, "step": 4638 }, { "epoch": 0.08614191881124152, "grad_norm": 0.420219361782074, "learning_rate": 1.963603778084872e-05, "loss": 0.4623, "step": 4640 }, { "epoch": 0.08617904894866016, "grad_norm": 0.3464505672454834, "learning_rate": 1.963572587204018e-05, "loss": 0.1677, "step": 4642 }, { "epoch": 0.0862161790860788, "grad_norm": 0.3247014582157135, "learning_rate": 1.963541383211827e-05, "loss": 0.3703, "step": 4644 }, { "epoch": 0.08625330922349744, "grad_norm": 0.3485212028026581, "learning_rate": 1.963510166108724e-05, "loss": 0.2499, "step": 4646 }, { "epoch": 0.08629043936091607, "grad_norm": 0.3235088586807251, "learning_rate": 1.9634789358951337e-05, "loss": 0.4659, "step": 4648 }, { "epoch": 0.08632756949833471, "grad_norm": 0.3788321614265442, "learning_rate": 1.963447692571481e-05, "loss": 0.3304, "step": 4650 }, { "epoch": 0.08636469963575336, "grad_norm": 0.4088015854358673, "learning_rate": 1.963416436138191e-05, "loss": 0.4006, "step": 4652 }, { "epoch": 0.08640182977317198, "grad_norm": 0.34424135088920593, "learning_rate": 1.9633851665956894e-05, "loss": 0.4444, "step": 4654 }, { "epoch": 0.08643895991059063, "grad_norm": 0.2938086688518524, "learning_rate": 1.963353883944401e-05, "loss": 0.2668, "step": 4656 }, { "epoch": 0.08647609004800927, "grad_norm": 0.29691851139068604, "learning_rate": 1.963322588184752e-05, "loss": 0.2383, "step": 4658 }, { "epoch": 0.08651322018542791, "grad_norm": 0.32453083992004395, "learning_rate": 1.963291279317168e-05, "loss": 0.2668, "step": 4660 }, { "epoch": 0.08655035032284654, "grad_norm": 0.43395793437957764, "learning_rate": 1.9632599573420753e-05, "loss": 0.3599, "step": 4662 }, { "epoch": 0.08658748046026518, "grad_norm": 0.3512604236602783, "learning_rate": 1.9632286222598998e-05, "loss": 0.4005, "step": 4664 }, { "epoch": 0.08662461059768382, "grad_norm": 0.3002413511276245, "learning_rate": 1.963197274071068e-05, "loss": 0.3406, "step": 4666 }, { "epoch": 0.08666174073510247, "grad_norm": 0.40121015906333923, "learning_rate": 1.963165912776006e-05, "loss": 0.416, "step": 4668 }, { "epoch": 0.0866988708725211, "grad_norm": 0.3829343318939209, "learning_rate": 1.9631345383751413e-05, "loss": 0.2023, "step": 4670 }, { "epoch": 0.08673600100993974, "grad_norm": 0.24035698175430298, "learning_rate": 1.9631031508689e-05, "loss": 0.204, "step": 4672 }, { "epoch": 0.08677313114735838, "grad_norm": 0.30460163950920105, "learning_rate": 1.96307175025771e-05, "loss": 0.4296, "step": 4674 }, { "epoch": 0.08681026128477702, "grad_norm": 0.2587651014328003, "learning_rate": 1.9630403365419984e-05, "loss": 0.3299, "step": 4676 }, { "epoch": 0.08684739142219565, "grad_norm": 0.355174720287323, "learning_rate": 1.9630089097221924e-05, "loss": 0.1926, "step": 4678 }, { "epoch": 0.08688452155961429, "grad_norm": 0.3191797733306885, "learning_rate": 1.9629774697987195e-05, "loss": 0.256, "step": 4680 }, { "epoch": 0.08692165169703293, "grad_norm": 0.3339892327785492, "learning_rate": 1.9629460167720075e-05, "loss": 0.3226, "step": 4682 }, { "epoch": 0.08695878183445158, "grad_norm": 0.36235374212265015, "learning_rate": 1.9629145506424853e-05, "loss": 0.315, "step": 4684 }, { "epoch": 0.0869959119718702, "grad_norm": 0.2823745906352997, "learning_rate": 1.9628830714105796e-05, "loss": 0.3854, "step": 4686 }, { "epoch": 0.08703304210928885, "grad_norm": 0.3788362145423889, "learning_rate": 1.9628515790767196e-05, "loss": 0.313, "step": 4688 }, { "epoch": 0.08707017224670749, "grad_norm": 0.2732026278972626, "learning_rate": 1.9628200736413337e-05, "loss": 0.5059, "step": 4690 }, { "epoch": 0.08710730238412612, "grad_norm": 0.265223890542984, "learning_rate": 1.962788555104851e-05, "loss": 0.2701, "step": 4692 }, { "epoch": 0.08714443252154476, "grad_norm": 0.2825123369693756, "learning_rate": 1.9627570234676993e-05, "loss": 0.3289, "step": 4694 }, { "epoch": 0.0871815626589634, "grad_norm": 0.2788553237915039, "learning_rate": 1.9627254787303086e-05, "loss": 0.4309, "step": 4696 }, { "epoch": 0.08721869279638204, "grad_norm": 0.340683251619339, "learning_rate": 1.9626939208931078e-05, "loss": 0.3215, "step": 4698 }, { "epoch": 0.08725582293380067, "grad_norm": 0.308350145816803, "learning_rate": 1.9626623499565266e-05, "loss": 0.3846, "step": 4700 }, { "epoch": 0.08729295307121931, "grad_norm": 0.379747599363327, "learning_rate": 1.962630765920994e-05, "loss": 0.553, "step": 4702 }, { "epoch": 0.08733008320863796, "grad_norm": 0.42858612537384033, "learning_rate": 1.9625991687869402e-05, "loss": 0.3364, "step": 4704 }, { "epoch": 0.0873672133460566, "grad_norm": 0.38619449734687805, "learning_rate": 1.962567558554795e-05, "loss": 0.2324, "step": 4706 }, { "epoch": 0.08740434348347523, "grad_norm": 0.31159231066703796, "learning_rate": 1.9625359352249888e-05, "loss": 0.4673, "step": 4708 }, { "epoch": 0.08744147362089387, "grad_norm": 0.2789519131183624, "learning_rate": 1.9625042987979512e-05, "loss": 0.3444, "step": 4710 }, { "epoch": 0.08747860375831251, "grad_norm": 0.31110092997550964, "learning_rate": 1.9624726492741132e-05, "loss": 0.3605, "step": 4712 }, { "epoch": 0.08751573389573115, "grad_norm": 0.3585788905620575, "learning_rate": 1.9624409866539058e-05, "loss": 0.5106, "step": 4714 }, { "epoch": 0.08755286403314978, "grad_norm": 0.4569585919380188, "learning_rate": 1.962409310937759e-05, "loss": 0.3315, "step": 4716 }, { "epoch": 0.08758999417056842, "grad_norm": 0.3330109417438507, "learning_rate": 1.9623776221261046e-05, "loss": 0.3405, "step": 4718 }, { "epoch": 0.08762712430798707, "grad_norm": 0.43835577368736267, "learning_rate": 1.962345920219373e-05, "loss": 0.3185, "step": 4720 }, { "epoch": 0.08766425444540571, "grad_norm": 0.34292635321617126, "learning_rate": 1.962314205217996e-05, "loss": 0.4202, "step": 4722 }, { "epoch": 0.08770138458282434, "grad_norm": 0.44344761967658997, "learning_rate": 1.9622824771224058e-05, "loss": 0.3566, "step": 4724 }, { "epoch": 0.08773851472024298, "grad_norm": 0.31321951746940613, "learning_rate": 1.962250735933033e-05, "loss": 0.4234, "step": 4726 }, { "epoch": 0.08777564485766162, "grad_norm": 0.5036094188690186, "learning_rate": 1.9622189816503098e-05, "loss": 0.5105, "step": 4728 }, { "epoch": 0.08781277499508025, "grad_norm": 0.4505239427089691, "learning_rate": 1.9621872142746684e-05, "loss": 0.3808, "step": 4730 }, { "epoch": 0.08784990513249889, "grad_norm": 0.2138471156358719, "learning_rate": 1.9621554338065414e-05, "loss": 0.401, "step": 4732 }, { "epoch": 0.08788703526991754, "grad_norm": 0.4337904453277588, "learning_rate": 1.9621236402463608e-05, "loss": 0.3911, "step": 4734 }, { "epoch": 0.08792416540733618, "grad_norm": 0.42932403087615967, "learning_rate": 1.9620918335945593e-05, "loss": 0.2703, "step": 4736 }, { "epoch": 0.0879612955447548, "grad_norm": 1.9188131093978882, "learning_rate": 1.96206001385157e-05, "loss": 0.3405, "step": 4738 }, { "epoch": 0.08799842568217345, "grad_norm": 0.3674734830856323, "learning_rate": 1.9620281810178253e-05, "loss": 0.2512, "step": 4740 }, { "epoch": 0.08803555581959209, "grad_norm": 0.35753199458122253, "learning_rate": 1.961996335093759e-05, "loss": 0.3655, "step": 4742 }, { "epoch": 0.08807268595701073, "grad_norm": 0.327698677778244, "learning_rate": 1.9619644760798035e-05, "loss": 0.2822, "step": 4744 }, { "epoch": 0.08810981609442936, "grad_norm": 0.44055238366127014, "learning_rate": 1.9619326039763936e-05, "loss": 0.3258, "step": 4746 }, { "epoch": 0.088146946231848, "grad_norm": 0.33755528926849365, "learning_rate": 1.9619007187839618e-05, "loss": 0.1912, "step": 4748 }, { "epoch": 0.08818407636926665, "grad_norm": 0.3712221086025238, "learning_rate": 1.9618688205029427e-05, "loss": 0.1099, "step": 4750 }, { "epoch": 0.08822120650668529, "grad_norm": 0.358967661857605, "learning_rate": 1.9618369091337698e-05, "loss": 0.4273, "step": 4752 }, { "epoch": 0.08825833664410392, "grad_norm": 0.2960997521877289, "learning_rate": 1.961804984676878e-05, "loss": 0.1827, "step": 4754 }, { "epoch": 0.08829546678152256, "grad_norm": 0.28012827038764954, "learning_rate": 1.961773047132701e-05, "loss": 0.4107, "step": 4756 }, { "epoch": 0.0883325969189412, "grad_norm": 0.3843344748020172, "learning_rate": 1.9617410965016736e-05, "loss": 0.2733, "step": 4758 }, { "epoch": 0.08836972705635984, "grad_norm": 0.33418747782707214, "learning_rate": 1.9617091327842308e-05, "loss": 0.4315, "step": 4760 }, { "epoch": 0.08840685719377847, "grad_norm": 0.3174058198928833, "learning_rate": 1.9616771559808075e-05, "loss": 0.3955, "step": 4762 }, { "epoch": 0.08844398733119711, "grad_norm": 0.41320618987083435, "learning_rate": 1.9616451660918382e-05, "loss": 0.4344, "step": 4764 }, { "epoch": 0.08848111746861576, "grad_norm": 0.3358910381793976, "learning_rate": 1.961613163117759e-05, "loss": 0.2313, "step": 4766 }, { "epoch": 0.08851824760603438, "grad_norm": 0.3710285425186157, "learning_rate": 1.9615811470590048e-05, "loss": 0.3123, "step": 4768 }, { "epoch": 0.08855537774345303, "grad_norm": 0.37014228105545044, "learning_rate": 1.9615491179160117e-05, "loss": 0.4508, "step": 4770 }, { "epoch": 0.08859250788087167, "grad_norm": 0.3562534749507904, "learning_rate": 1.961517075689215e-05, "loss": 0.3191, "step": 4772 }, { "epoch": 0.08862963801829031, "grad_norm": 0.4657309949398041, "learning_rate": 1.9614850203790517e-05, "loss": 0.5744, "step": 4774 }, { "epoch": 0.08866676815570894, "grad_norm": 0.30195385217666626, "learning_rate": 1.9614529519859566e-05, "loss": 0.3684, "step": 4776 }, { "epoch": 0.08870389829312758, "grad_norm": 0.39522865414619446, "learning_rate": 1.9614208705103667e-05, "loss": 0.3502, "step": 4778 }, { "epoch": 0.08874102843054622, "grad_norm": 0.22946004569530487, "learning_rate": 1.9613887759527186e-05, "loss": 0.3307, "step": 4780 }, { "epoch": 0.08877815856796487, "grad_norm": 0.5596912503242493, "learning_rate": 1.961356668313449e-05, "loss": 0.3137, "step": 4782 }, { "epoch": 0.0888152887053835, "grad_norm": 0.37164831161499023, "learning_rate": 1.961324547592995e-05, "loss": 0.2118, "step": 4784 }, { "epoch": 0.08885241884280214, "grad_norm": 0.33855870366096497, "learning_rate": 1.9612924137917932e-05, "loss": 0.3222, "step": 4786 }, { "epoch": 0.08888954898022078, "grad_norm": 0.429386168718338, "learning_rate": 1.961260266910281e-05, "loss": 0.4907, "step": 4788 }, { "epoch": 0.08892667911763942, "grad_norm": 0.3675706088542938, "learning_rate": 1.961228106948896e-05, "loss": 0.2544, "step": 4790 }, { "epoch": 0.08896380925505805, "grad_norm": 0.31491485238075256, "learning_rate": 1.9611959339080756e-05, "loss": 0.376, "step": 4792 }, { "epoch": 0.08900093939247669, "grad_norm": 0.4208625555038452, "learning_rate": 1.9611637477882573e-05, "loss": 0.1293, "step": 4794 }, { "epoch": 0.08903806952989533, "grad_norm": 0.2978513836860657, "learning_rate": 1.9611315485898798e-05, "loss": 0.5303, "step": 4796 }, { "epoch": 0.08907519966731398, "grad_norm": 0.460877001285553, "learning_rate": 1.961099336313381e-05, "loss": 0.2822, "step": 4798 }, { "epoch": 0.0891123298047326, "grad_norm": 0.3632570207118988, "learning_rate": 1.9610671109591988e-05, "loss": 0.3158, "step": 4800 }, { "epoch": 0.08914945994215125, "grad_norm": 0.4490357041358948, "learning_rate": 1.961034872527772e-05, "loss": 0.2587, "step": 4802 }, { "epoch": 0.08918659007956989, "grad_norm": 0.3445192873477936, "learning_rate": 1.961002621019539e-05, "loss": 0.4936, "step": 4804 }, { "epoch": 0.08922372021698852, "grad_norm": 0.33543261885643005, "learning_rate": 1.960970356434939e-05, "loss": 0.435, "step": 4806 }, { "epoch": 0.08926085035440716, "grad_norm": 0.2589396834373474, "learning_rate": 1.960938078774411e-05, "loss": 0.2938, "step": 4808 }, { "epoch": 0.0892979804918258, "grad_norm": 0.42650461196899414, "learning_rate": 1.9609057880383942e-05, "loss": 0.196, "step": 4810 }, { "epoch": 0.08933511062924444, "grad_norm": 0.4195377230644226, "learning_rate": 1.9608734842273276e-05, "loss": 0.3861, "step": 4812 }, { "epoch": 0.08937224076666307, "grad_norm": 0.3228859305381775, "learning_rate": 1.9608411673416513e-05, "loss": 0.4203, "step": 4814 }, { "epoch": 0.08940937090408171, "grad_norm": 0.3798494040966034, "learning_rate": 1.9608088373818045e-05, "loss": 0.5612, "step": 4816 }, { "epoch": 0.08944650104150036, "grad_norm": 0.5108641982078552, "learning_rate": 1.960776494348228e-05, "loss": 0.3089, "step": 4818 }, { "epoch": 0.089483631178919, "grad_norm": 0.45247113704681396, "learning_rate": 1.9607441382413604e-05, "loss": 0.2938, "step": 4820 }, { "epoch": 0.08952076131633763, "grad_norm": 0.40164345502853394, "learning_rate": 1.9607117690616432e-05, "loss": 0.4178, "step": 4822 }, { "epoch": 0.08955789145375627, "grad_norm": 0.31786108016967773, "learning_rate": 1.9606793868095164e-05, "loss": 0.2715, "step": 4824 }, { "epoch": 0.08959502159117491, "grad_norm": 0.432865172624588, "learning_rate": 1.960646991485421e-05, "loss": 0.5264, "step": 4826 }, { "epoch": 0.08963215172859355, "grad_norm": 0.312656044960022, "learning_rate": 1.960614583089797e-05, "loss": 0.4556, "step": 4828 }, { "epoch": 0.08966928186601218, "grad_norm": 0.42316365242004395, "learning_rate": 1.9605821616230867e-05, "loss": 0.3504, "step": 4830 }, { "epoch": 0.08970641200343082, "grad_norm": 0.3008486330509186, "learning_rate": 1.96054972708573e-05, "loss": 0.2799, "step": 4832 }, { "epoch": 0.08974354214084947, "grad_norm": 0.38963010907173157, "learning_rate": 1.9605172794781687e-05, "loss": 0.3733, "step": 4834 }, { "epoch": 0.0897806722782681, "grad_norm": 0.3395363390445709, "learning_rate": 1.960484818800844e-05, "loss": 0.3424, "step": 4836 }, { "epoch": 0.08981780241568674, "grad_norm": 0.332244873046875, "learning_rate": 1.9604523450541983e-05, "loss": 0.2607, "step": 4838 }, { "epoch": 0.08985493255310538, "grad_norm": 0.33453601598739624, "learning_rate": 1.960419858238673e-05, "loss": 0.4416, "step": 4840 }, { "epoch": 0.08989206269052402, "grad_norm": 0.38418081402778625, "learning_rate": 1.96038735835471e-05, "loss": 0.3032, "step": 4842 }, { "epoch": 0.08992919282794265, "grad_norm": 0.38488829135894775, "learning_rate": 1.960354845402752e-05, "loss": 0.3229, "step": 4844 }, { "epoch": 0.08996632296536129, "grad_norm": 0.5189816355705261, "learning_rate": 1.960322319383241e-05, "loss": 0.3619, "step": 4846 }, { "epoch": 0.09000345310277993, "grad_norm": 0.4044243395328522, "learning_rate": 1.9602897802966197e-05, "loss": 0.2732, "step": 4848 }, { "epoch": 0.09004058324019858, "grad_norm": 0.4594970643520355, "learning_rate": 1.960257228143331e-05, "loss": 0.4085, "step": 4850 }, { "epoch": 0.0900777133776172, "grad_norm": 0.42997562885284424, "learning_rate": 1.960224662923818e-05, "loss": 0.3644, "step": 4852 }, { "epoch": 0.09011484351503585, "grad_norm": 0.24007850885391235, "learning_rate": 1.9601920846385232e-05, "loss": 0.168, "step": 4854 }, { "epoch": 0.09015197365245449, "grad_norm": 0.286358505487442, "learning_rate": 1.9601594932878902e-05, "loss": 0.2796, "step": 4856 }, { "epoch": 0.09018910378987313, "grad_norm": 0.3698863089084625, "learning_rate": 1.960126888872363e-05, "loss": 0.3305, "step": 4858 }, { "epoch": 0.09022623392729176, "grad_norm": 0.41181281208992004, "learning_rate": 1.960094271392384e-05, "loss": 0.4354, "step": 4860 }, { "epoch": 0.0902633640647104, "grad_norm": 0.22134605050086975, "learning_rate": 1.960061640848398e-05, "loss": 0.3455, "step": 4862 }, { "epoch": 0.09030049420212904, "grad_norm": 0.4314284026622772, "learning_rate": 1.960028997240849e-05, "loss": 0.2922, "step": 4864 }, { "epoch": 0.09033762433954769, "grad_norm": 0.3973320424556732, "learning_rate": 1.959996340570181e-05, "loss": 0.3395, "step": 4866 }, { "epoch": 0.09037475447696632, "grad_norm": 0.40343576669692993, "learning_rate": 1.9599636708368385e-05, "loss": 0.2528, "step": 4868 }, { "epoch": 0.09041188461438496, "grad_norm": 0.24007877707481384, "learning_rate": 1.9599309880412654e-05, "loss": 0.3304, "step": 4870 }, { "epoch": 0.0904490147518036, "grad_norm": 0.3877570331096649, "learning_rate": 1.9598982921839073e-05, "loss": 0.4146, "step": 4872 }, { "epoch": 0.09048614488922223, "grad_norm": 0.37691619992256165, "learning_rate": 1.9598655832652082e-05, "loss": 0.263, "step": 4874 }, { "epoch": 0.09052327502664087, "grad_norm": 0.22564902901649475, "learning_rate": 1.959832861285614e-05, "loss": 0.3041, "step": 4876 }, { "epoch": 0.09056040516405951, "grad_norm": 0.2808085083961487, "learning_rate": 1.9598001262455694e-05, "loss": 0.4411, "step": 4878 }, { "epoch": 0.09059753530147815, "grad_norm": 0.3611622750759125, "learning_rate": 1.9597673781455202e-05, "loss": 0.3413, "step": 4880 }, { "epoch": 0.09063466543889678, "grad_norm": 0.38722723722457886, "learning_rate": 1.9597346169859114e-05, "loss": 0.1954, "step": 4882 }, { "epoch": 0.09067179557631543, "grad_norm": 0.32024624943733215, "learning_rate": 1.95970184276719e-05, "loss": 0.1965, "step": 4884 }, { "epoch": 0.09070892571373407, "grad_norm": 0.3292338252067566, "learning_rate": 1.9596690554898003e-05, "loss": 0.3352, "step": 4886 }, { "epoch": 0.09074605585115271, "grad_norm": 0.3589654564857483, "learning_rate": 1.9596362551541896e-05, "loss": 0.4066, "step": 4888 }, { "epoch": 0.09078318598857134, "grad_norm": 0.4215356707572937, "learning_rate": 1.9596034417608042e-05, "loss": 0.4987, "step": 4890 }, { "epoch": 0.09082031612598998, "grad_norm": 0.4367898106575012, "learning_rate": 1.95957061531009e-05, "loss": 0.399, "step": 4892 }, { "epoch": 0.09085744626340862, "grad_norm": 0.26233628392219543, "learning_rate": 1.959537775802494e-05, "loss": 0.2887, "step": 4894 }, { "epoch": 0.09089457640082726, "grad_norm": 0.32188430428504944, "learning_rate": 1.959504923238463e-05, "loss": 0.3507, "step": 4896 }, { "epoch": 0.0909317065382459, "grad_norm": 0.2501329779624939, "learning_rate": 1.9594720576184444e-05, "loss": 0.3872, "step": 4898 }, { "epoch": 0.09096883667566454, "grad_norm": 0.3407454490661621, "learning_rate": 1.9594391789428847e-05, "loss": 0.4934, "step": 4900 }, { "epoch": 0.09100596681308318, "grad_norm": 0.3168795108795166, "learning_rate": 1.9594062872122316e-05, "loss": 0.2462, "step": 4902 }, { "epoch": 0.09104309695050182, "grad_norm": 0.4565092921257019, "learning_rate": 1.959373382426933e-05, "loss": 0.2282, "step": 4904 }, { "epoch": 0.09108022708792045, "grad_norm": 0.3739783465862274, "learning_rate": 1.9593404645874357e-05, "loss": 0.2029, "step": 4906 }, { "epoch": 0.09111735722533909, "grad_norm": 0.44228729605674744, "learning_rate": 1.959307533694189e-05, "loss": 0.3279, "step": 4908 }, { "epoch": 0.09115448736275773, "grad_norm": 0.3853660523891449, "learning_rate": 1.9592745897476398e-05, "loss": 0.3799, "step": 4910 }, { "epoch": 0.09119161750017636, "grad_norm": 0.33759164810180664, "learning_rate": 1.9592416327482368e-05, "loss": 0.1575, "step": 4912 }, { "epoch": 0.091228747637595, "grad_norm": 0.2535952627658844, "learning_rate": 1.9592086626964284e-05, "loss": 0.2389, "step": 4914 }, { "epoch": 0.09126587777501365, "grad_norm": 0.3438250720500946, "learning_rate": 1.9591756795926636e-05, "loss": 0.4631, "step": 4916 }, { "epoch": 0.09130300791243229, "grad_norm": 0.3743865191936493, "learning_rate": 1.9591426834373906e-05, "loss": 0.3054, "step": 4918 }, { "epoch": 0.09134013804985092, "grad_norm": 0.30598023533821106, "learning_rate": 1.9591096742310583e-05, "loss": 0.3011, "step": 4920 }, { "epoch": 0.09137726818726956, "grad_norm": 0.39038604497909546, "learning_rate": 1.9590766519741167e-05, "loss": 0.2636, "step": 4922 }, { "epoch": 0.0914143983246882, "grad_norm": 0.3603297770023346, "learning_rate": 1.9590436166670148e-05, "loss": 0.1852, "step": 4924 }, { "epoch": 0.09145152846210684, "grad_norm": 0.304485023021698, "learning_rate": 1.9590105683102014e-05, "loss": 0.4916, "step": 4926 }, { "epoch": 0.09148865859952547, "grad_norm": 0.32898202538490295, "learning_rate": 1.958977506904127e-05, "loss": 0.2939, "step": 4928 }, { "epoch": 0.09152578873694411, "grad_norm": 0.3155774474143982, "learning_rate": 1.9589444324492413e-05, "loss": 0.2508, "step": 4930 }, { "epoch": 0.09156291887436276, "grad_norm": 0.41699492931365967, "learning_rate": 1.958911344945994e-05, "loss": 0.2851, "step": 4932 }, { "epoch": 0.0916000490117814, "grad_norm": 0.35700055956840515, "learning_rate": 1.9588782443948357e-05, "loss": 0.5291, "step": 4934 }, { "epoch": 0.09163717914920003, "grad_norm": 0.33645716309547424, "learning_rate": 1.958845130796217e-05, "loss": 0.2381, "step": 4936 }, { "epoch": 0.09167430928661867, "grad_norm": 0.3360303044319153, "learning_rate": 1.9588120041505876e-05, "loss": 0.392, "step": 4938 }, { "epoch": 0.09171143942403731, "grad_norm": 0.3056754171848297, "learning_rate": 1.958778864458399e-05, "loss": 0.4072, "step": 4940 }, { "epoch": 0.09174856956145595, "grad_norm": 0.3132539987564087, "learning_rate": 1.958745711720102e-05, "loss": 0.4425, "step": 4942 }, { "epoch": 0.09178569969887458, "grad_norm": 0.33548447489738464, "learning_rate": 1.9587125459361474e-05, "loss": 0.4424, "step": 4944 }, { "epoch": 0.09182282983629322, "grad_norm": 0.31488487124443054, "learning_rate": 1.958679367106987e-05, "loss": 0.2995, "step": 4946 }, { "epoch": 0.09185995997371187, "grad_norm": 0.5252687335014343, "learning_rate": 1.958646175233072e-05, "loss": 0.4117, "step": 4948 }, { "epoch": 0.0918970901111305, "grad_norm": 0.3897077143192291, "learning_rate": 1.9586129703148536e-05, "loss": 0.3536, "step": 4950 }, { "epoch": 0.09193422024854914, "grad_norm": 0.47567206621170044, "learning_rate": 1.9585797523527846e-05, "loss": 0.3898, "step": 4952 }, { "epoch": 0.09197135038596778, "grad_norm": 0.38732656836509705, "learning_rate": 1.9585465213473162e-05, "loss": 0.4326, "step": 4954 }, { "epoch": 0.09200848052338642, "grad_norm": 0.33776918053627014, "learning_rate": 1.9585132772989007e-05, "loss": 0.4891, "step": 4956 }, { "epoch": 0.09204561066080505, "grad_norm": 0.2822454273700714, "learning_rate": 1.958480020207991e-05, "loss": 0.3671, "step": 4958 }, { "epoch": 0.09208274079822369, "grad_norm": 0.5686154365539551, "learning_rate": 1.958446750075039e-05, "loss": 0.3593, "step": 4960 }, { "epoch": 0.09211987093564233, "grad_norm": 0.3431811034679413, "learning_rate": 1.9584134669004973e-05, "loss": 0.4032, "step": 4962 }, { "epoch": 0.09215700107306098, "grad_norm": 0.40061676502227783, "learning_rate": 1.9583801706848195e-05, "loss": 0.3971, "step": 4964 }, { "epoch": 0.0921941312104796, "grad_norm": 0.49755749106407166, "learning_rate": 1.958346861428458e-05, "loss": 0.4568, "step": 4966 }, { "epoch": 0.09223126134789825, "grad_norm": 0.3119446039199829, "learning_rate": 1.9583135391318666e-05, "loss": 0.37, "step": 4968 }, { "epoch": 0.09226839148531689, "grad_norm": 0.3328152000904083, "learning_rate": 1.958280203795498e-05, "loss": 0.3955, "step": 4970 }, { "epoch": 0.09230552162273553, "grad_norm": 0.33143851161003113, "learning_rate": 1.9582468554198065e-05, "loss": 0.3534, "step": 4972 }, { "epoch": 0.09234265176015416, "grad_norm": 0.34953469038009644, "learning_rate": 1.9582134940052455e-05, "loss": 0.2167, "step": 4974 }, { "epoch": 0.0923797818975728, "grad_norm": 0.3447701036930084, "learning_rate": 1.9581801195522688e-05, "loss": 0.3402, "step": 4976 }, { "epoch": 0.09241691203499144, "grad_norm": 0.3659749925136566, "learning_rate": 1.958146732061331e-05, "loss": 0.5568, "step": 4978 }, { "epoch": 0.09245404217241009, "grad_norm": 0.2932274639606476, "learning_rate": 1.9581133315328863e-05, "loss": 0.2979, "step": 4980 }, { "epoch": 0.09249117230982871, "grad_norm": 0.3699193596839905, "learning_rate": 1.9580799179673887e-05, "loss": 0.3244, "step": 4982 }, { "epoch": 0.09252830244724736, "grad_norm": 0.3554528057575226, "learning_rate": 1.9580464913652934e-05, "loss": 0.2859, "step": 4984 }, { "epoch": 0.092565432584666, "grad_norm": 0.32328295707702637, "learning_rate": 1.958013051727055e-05, "loss": 0.3323, "step": 4986 }, { "epoch": 0.09260256272208463, "grad_norm": 0.24554848670959473, "learning_rate": 1.9579795990531284e-05, "loss": 0.4453, "step": 4988 }, { "epoch": 0.09263969285950327, "grad_norm": 0.3720664381980896, "learning_rate": 1.9579461333439695e-05, "loss": 0.3158, "step": 4990 }, { "epoch": 0.09267682299692191, "grad_norm": 0.40524178743362427, "learning_rate": 1.9579126546000326e-05, "loss": 0.3411, "step": 4992 }, { "epoch": 0.09271395313434055, "grad_norm": 0.37116512656211853, "learning_rate": 1.957879162821774e-05, "loss": 0.488, "step": 4994 }, { "epoch": 0.09275108327175918, "grad_norm": 0.31278982758522034, "learning_rate": 1.957845658009649e-05, "loss": 0.3637, "step": 4996 }, { "epoch": 0.09278821340917782, "grad_norm": 0.37581178545951843, "learning_rate": 1.9578121401641135e-05, "loss": 0.362, "step": 4998 }, { "epoch": 0.09282534354659647, "grad_norm": 0.36685124039649963, "learning_rate": 1.957778609285624e-05, "loss": 0.2361, "step": 5000 }, { "epoch": 0.09286247368401511, "grad_norm": 0.410597026348114, "learning_rate": 1.9577450653746365e-05, "loss": 0.2514, "step": 5002 }, { "epoch": 0.09289960382143374, "grad_norm": 0.2828640043735504, "learning_rate": 1.9577115084316076e-05, "loss": 0.275, "step": 5004 }, { "epoch": 0.09293673395885238, "grad_norm": 0.3487793803215027, "learning_rate": 1.957677938456993e-05, "loss": 0.5032, "step": 5006 }, { "epoch": 0.09297386409627102, "grad_norm": 0.2856448292732239, "learning_rate": 1.957644355451251e-05, "loss": 0.2998, "step": 5008 }, { "epoch": 0.09301099423368966, "grad_norm": 0.48812663555145264, "learning_rate": 1.9576107594148377e-05, "loss": 0.3224, "step": 5010 }, { "epoch": 0.09304812437110829, "grad_norm": 0.23672828078269958, "learning_rate": 1.95757715034821e-05, "loss": 0.5631, "step": 5012 }, { "epoch": 0.09308525450852694, "grad_norm": 0.3710114359855652, "learning_rate": 1.957543528251826e-05, "loss": 0.4558, "step": 5014 }, { "epoch": 0.09312238464594558, "grad_norm": 0.4182606041431427, "learning_rate": 1.9575098931261425e-05, "loss": 0.1844, "step": 5016 }, { "epoch": 0.09315951478336422, "grad_norm": 0.3306743800640106, "learning_rate": 1.9574762449716172e-05, "loss": 0.3151, "step": 5018 }, { "epoch": 0.09319664492078285, "grad_norm": 0.4734524190425873, "learning_rate": 1.9574425837887087e-05, "loss": 0.3834, "step": 5020 }, { "epoch": 0.09323377505820149, "grad_norm": 0.3479258120059967, "learning_rate": 1.957408909577874e-05, "loss": 0.1905, "step": 5022 }, { "epoch": 0.09327090519562013, "grad_norm": 0.33753538131713867, "learning_rate": 1.957375222339572e-05, "loss": 0.3113, "step": 5024 }, { "epoch": 0.09330803533303876, "grad_norm": 0.2758099436759949, "learning_rate": 1.9573415220742613e-05, "loss": 0.316, "step": 5026 }, { "epoch": 0.0933451654704574, "grad_norm": 0.272198349237442, "learning_rate": 1.9573078087823995e-05, "loss": 0.3678, "step": 5028 }, { "epoch": 0.09338229560787605, "grad_norm": 0.26651960611343384, "learning_rate": 1.957274082464446e-05, "loss": 0.2121, "step": 5030 }, { "epoch": 0.09341942574529469, "grad_norm": 0.403515487909317, "learning_rate": 1.95724034312086e-05, "loss": 0.2852, "step": 5032 }, { "epoch": 0.09345655588271332, "grad_norm": 0.22324799001216888, "learning_rate": 1.9572065907520996e-05, "loss": 0.2286, "step": 5034 }, { "epoch": 0.09349368602013196, "grad_norm": 0.28638893365859985, "learning_rate": 1.957172825358625e-05, "loss": 0.3511, "step": 5036 }, { "epoch": 0.0935308161575506, "grad_norm": 0.3121299743652344, "learning_rate": 1.9571390469408956e-05, "loss": 0.1518, "step": 5038 }, { "epoch": 0.09356794629496924, "grad_norm": 0.25477030873298645, "learning_rate": 1.9571052554993702e-05, "loss": 0.4506, "step": 5040 }, { "epoch": 0.09360507643238787, "grad_norm": 0.40355873107910156, "learning_rate": 1.9570714510345093e-05, "loss": 0.3028, "step": 5042 }, { "epoch": 0.09364220656980651, "grad_norm": 0.3837955594062805, "learning_rate": 1.9570376335467727e-05, "loss": 0.2841, "step": 5044 }, { "epoch": 0.09367933670722516, "grad_norm": 0.42483997344970703, "learning_rate": 1.9570038030366207e-05, "loss": 0.395, "step": 5046 }, { "epoch": 0.0937164668446438, "grad_norm": 0.3975624144077301, "learning_rate": 1.9569699595045135e-05, "loss": 0.3393, "step": 5048 }, { "epoch": 0.09375359698206243, "grad_norm": 0.36357682943344116, "learning_rate": 1.9569361029509116e-05, "loss": 0.2822, "step": 5050 }, { "epoch": 0.09379072711948107, "grad_norm": 0.3386733829975128, "learning_rate": 1.9569022333762757e-05, "loss": 0.4156, "step": 5052 }, { "epoch": 0.09382785725689971, "grad_norm": 0.3303660750389099, "learning_rate": 1.9568683507810666e-05, "loss": 0.3708, "step": 5054 }, { "epoch": 0.09386498739431835, "grad_norm": 0.2501733601093292, "learning_rate": 1.956834455165745e-05, "loss": 0.2368, "step": 5056 }, { "epoch": 0.09390211753173698, "grad_norm": 0.2809985876083374, "learning_rate": 1.956800546530773e-05, "loss": 0.3797, "step": 5058 }, { "epoch": 0.09393924766915562, "grad_norm": 0.39119815826416016, "learning_rate": 1.9567666248766115e-05, "loss": 0.3922, "step": 5060 }, { "epoch": 0.09397637780657427, "grad_norm": 0.2788567841053009, "learning_rate": 1.956732690203722e-05, "loss": 0.4525, "step": 5062 }, { "epoch": 0.0940135079439929, "grad_norm": 0.42304113507270813, "learning_rate": 1.9566987425125665e-05, "loss": 0.3268, "step": 5064 }, { "epoch": 0.09405063808141154, "grad_norm": 0.28994688391685486, "learning_rate": 1.9566647818036064e-05, "loss": 0.2162, "step": 5066 }, { "epoch": 0.09408776821883018, "grad_norm": 0.23911580443382263, "learning_rate": 1.9566308080773043e-05, "loss": 0.3033, "step": 5068 }, { "epoch": 0.09412489835624882, "grad_norm": 0.3103496730327606, "learning_rate": 1.9565968213341224e-05, "loss": 0.346, "step": 5070 }, { "epoch": 0.09416202849366745, "grad_norm": 0.40186089277267456, "learning_rate": 1.9565628215745232e-05, "loss": 0.2603, "step": 5072 }, { "epoch": 0.09419915863108609, "grad_norm": 0.3488561511039734, "learning_rate": 1.9565288087989692e-05, "loss": 0.3838, "step": 5074 }, { "epoch": 0.09423628876850473, "grad_norm": 0.39364808797836304, "learning_rate": 1.9564947830079232e-05, "loss": 0.4645, "step": 5076 }, { "epoch": 0.09427341890592338, "grad_norm": 0.3354980945587158, "learning_rate": 1.956460744201848e-05, "loss": 0.3684, "step": 5078 }, { "epoch": 0.094310549043342, "grad_norm": 0.40367186069488525, "learning_rate": 1.956426692381207e-05, "loss": 0.3756, "step": 5080 }, { "epoch": 0.09434767918076065, "grad_norm": 0.40898868441581726, "learning_rate": 1.956392627546464e-05, "loss": 0.2665, "step": 5082 }, { "epoch": 0.09438480931817929, "grad_norm": 0.26182201504707336, "learning_rate": 1.956358549698082e-05, "loss": 0.4749, "step": 5084 }, { "epoch": 0.09442193945559793, "grad_norm": 0.38462671637535095, "learning_rate": 1.9563244588365243e-05, "loss": 0.1524, "step": 5086 }, { "epoch": 0.09445906959301656, "grad_norm": 0.26189687848091125, "learning_rate": 1.956290354962256e-05, "loss": 0.346, "step": 5088 }, { "epoch": 0.0944961997304352, "grad_norm": 0.4743839502334595, "learning_rate": 1.9562562380757397e-05, "loss": 0.4745, "step": 5090 }, { "epoch": 0.09453332986785384, "grad_norm": 0.2681953012943268, "learning_rate": 1.9562221081774405e-05, "loss": 0.4601, "step": 5092 }, { "epoch": 0.09457046000527249, "grad_norm": 0.38592663407325745, "learning_rate": 1.9561879652678225e-05, "loss": 0.467, "step": 5094 }, { "epoch": 0.09460759014269111, "grad_norm": 0.4928671717643738, "learning_rate": 1.9561538093473507e-05, "loss": 0.4582, "step": 5096 }, { "epoch": 0.09464472028010976, "grad_norm": 0.38182348012924194, "learning_rate": 1.9561196404164895e-05, "loss": 0.2429, "step": 5098 }, { "epoch": 0.0946818504175284, "grad_norm": 0.2938053011894226, "learning_rate": 1.9560854584757037e-05, "loss": 0.4313, "step": 5100 }, { "epoch": 0.09471898055494703, "grad_norm": 0.3560974895954132, "learning_rate": 1.9560512635254588e-05, "loss": 0.2333, "step": 5102 }, { "epoch": 0.09475611069236567, "grad_norm": 0.2717159688472748, "learning_rate": 1.95601705556622e-05, "loss": 0.1733, "step": 5104 }, { "epoch": 0.09479324082978431, "grad_norm": 0.4864628314971924, "learning_rate": 1.9559828345984524e-05, "loss": 0.4561, "step": 5106 }, { "epoch": 0.09483037096720295, "grad_norm": 0.4320840537548065, "learning_rate": 1.9559486006226224e-05, "loss": 0.3936, "step": 5108 }, { "epoch": 0.09486750110462158, "grad_norm": 0.3660784363746643, "learning_rate": 1.955914353639195e-05, "loss": 0.4333, "step": 5110 }, { "epoch": 0.09490463124204022, "grad_norm": 0.30558788776397705, "learning_rate": 1.9558800936486365e-05, "loss": 0.302, "step": 5112 }, { "epoch": 0.09494176137945887, "grad_norm": 0.355805903673172, "learning_rate": 1.955845820651413e-05, "loss": 0.4207, "step": 5114 }, { "epoch": 0.09497889151687751, "grad_norm": 0.36431872844696045, "learning_rate": 1.955811534647991e-05, "loss": 0.4426, "step": 5116 }, { "epoch": 0.09501602165429614, "grad_norm": 0.31632253527641296, "learning_rate": 1.9557772356388375e-05, "loss": 0.2737, "step": 5118 }, { "epoch": 0.09505315179171478, "grad_norm": 0.3225926458835602, "learning_rate": 1.9557429236244183e-05, "loss": 0.2434, "step": 5120 }, { "epoch": 0.09509028192913342, "grad_norm": 0.4353872835636139, "learning_rate": 1.9557085986052008e-05, "loss": 0.4066, "step": 5122 }, { "epoch": 0.09512741206655206, "grad_norm": 0.29980993270874023, "learning_rate": 1.9556742605816517e-05, "loss": 0.3311, "step": 5124 }, { "epoch": 0.09516454220397069, "grad_norm": 0.37580832839012146, "learning_rate": 1.9556399095542388e-05, "loss": 0.4116, "step": 5126 }, { "epoch": 0.09520167234138933, "grad_norm": 0.3811756670475006, "learning_rate": 1.955605545523429e-05, "loss": 0.2633, "step": 5128 }, { "epoch": 0.09523880247880798, "grad_norm": 0.5881888270378113, "learning_rate": 1.9555711684896902e-05, "loss": 0.2603, "step": 5130 }, { "epoch": 0.09527593261622662, "grad_norm": 0.3645700216293335, "learning_rate": 1.95553677845349e-05, "loss": 0.3792, "step": 5132 }, { "epoch": 0.09531306275364525, "grad_norm": 0.48785102367401123, "learning_rate": 1.9555023754152964e-05, "loss": 0.3885, "step": 5134 }, { "epoch": 0.09535019289106389, "grad_norm": 0.5230411291122437, "learning_rate": 1.9554679593755778e-05, "loss": 0.2939, "step": 5136 }, { "epoch": 0.09538732302848253, "grad_norm": 0.48963722586631775, "learning_rate": 1.9554335303348017e-05, "loss": 0.3726, "step": 5138 }, { "epoch": 0.09542445316590116, "grad_norm": 0.33693617582321167, "learning_rate": 1.9553990882934377e-05, "loss": 0.3823, "step": 5140 }, { "epoch": 0.0954615833033198, "grad_norm": 0.47429797053337097, "learning_rate": 1.9553646332519536e-05, "loss": 0.2852, "step": 5142 }, { "epoch": 0.09549871344073844, "grad_norm": 0.29600808024406433, "learning_rate": 1.9553301652108187e-05, "loss": 0.3106, "step": 5144 }, { "epoch": 0.09553584357815709, "grad_norm": 0.4525470435619354, "learning_rate": 1.9552956841705012e-05, "loss": 0.4306, "step": 5146 }, { "epoch": 0.09557297371557572, "grad_norm": 0.3995015621185303, "learning_rate": 1.9552611901314715e-05, "loss": 0.1079, "step": 5148 }, { "epoch": 0.09561010385299436, "grad_norm": 0.3839956521987915, "learning_rate": 1.955226683094198e-05, "loss": 0.3808, "step": 5150 }, { "epoch": 0.095647233990413, "grad_norm": 0.32153570652008057, "learning_rate": 1.9551921630591505e-05, "loss": 0.2056, "step": 5152 }, { "epoch": 0.09568436412783164, "grad_norm": 0.4203532338142395, "learning_rate": 1.955157630026799e-05, "loss": 0.4167, "step": 5154 }, { "epoch": 0.09572149426525027, "grad_norm": 0.3859817087650299, "learning_rate": 1.955123083997613e-05, "loss": 0.447, "step": 5156 }, { "epoch": 0.09575862440266891, "grad_norm": 0.34617263078689575, "learning_rate": 1.955088524972063e-05, "loss": 0.4364, "step": 5158 }, { "epoch": 0.09579575454008755, "grad_norm": 0.606687605381012, "learning_rate": 1.955053952950619e-05, "loss": 0.2586, "step": 5160 }, { "epoch": 0.0958328846775062, "grad_norm": 0.34183990955352783, "learning_rate": 1.9550193679337512e-05, "loss": 0.2831, "step": 5162 }, { "epoch": 0.09587001481492483, "grad_norm": 0.419951468706131, "learning_rate": 1.9549847699219304e-05, "loss": 0.3835, "step": 5164 }, { "epoch": 0.09590714495234347, "grad_norm": 0.3562627136707306, "learning_rate": 1.9549501589156276e-05, "loss": 0.3247, "step": 5166 }, { "epoch": 0.09594427508976211, "grad_norm": 0.43377891182899475, "learning_rate": 1.9549155349153136e-05, "loss": 0.2426, "step": 5168 }, { "epoch": 0.09598140522718075, "grad_norm": 0.36018314957618713, "learning_rate": 1.9548808979214594e-05, "loss": 0.293, "step": 5170 }, { "epoch": 0.09601853536459938, "grad_norm": 0.3135817348957062, "learning_rate": 1.9548462479345364e-05, "loss": 0.2077, "step": 5172 }, { "epoch": 0.09605566550201802, "grad_norm": 0.34467533230781555, "learning_rate": 1.9548115849550158e-05, "loss": 0.5506, "step": 5174 }, { "epoch": 0.09609279563943667, "grad_norm": 0.40089449286460876, "learning_rate": 1.9547769089833697e-05, "loss": 0.2338, "step": 5176 }, { "epoch": 0.0961299257768553, "grad_norm": 0.3767624497413635, "learning_rate": 1.9547422200200698e-05, "loss": 0.5357, "step": 5178 }, { "epoch": 0.09616705591427394, "grad_norm": 0.9575973153114319, "learning_rate": 1.9547075180655878e-05, "loss": 0.2131, "step": 5180 }, { "epoch": 0.09620418605169258, "grad_norm": 0.2937701642513275, "learning_rate": 1.9546728031203966e-05, "loss": 0.1602, "step": 5182 }, { "epoch": 0.09624131618911122, "grad_norm": 0.38985559344291687, "learning_rate": 1.9546380751849678e-05, "loss": 0.3263, "step": 5184 }, { "epoch": 0.09627844632652985, "grad_norm": 0.32413989305496216, "learning_rate": 1.9546033342597745e-05, "loss": 0.3023, "step": 5186 }, { "epoch": 0.09631557646394849, "grad_norm": 0.30475321412086487, "learning_rate": 1.954568580345289e-05, "loss": 0.2922, "step": 5188 }, { "epoch": 0.09635270660136713, "grad_norm": 0.36882004141807556, "learning_rate": 1.9545338134419844e-05, "loss": 0.4484, "step": 5190 }, { "epoch": 0.09638983673878578, "grad_norm": 0.29994648694992065, "learning_rate": 1.954499033550334e-05, "loss": 0.2438, "step": 5192 }, { "epoch": 0.0964269668762044, "grad_norm": 0.4560507833957672, "learning_rate": 1.9544642406708105e-05, "loss": 0.2112, "step": 5194 }, { "epoch": 0.09646409701362305, "grad_norm": 0.3699958622455597, "learning_rate": 1.9544294348038878e-05, "loss": 0.2584, "step": 5196 }, { "epoch": 0.09650122715104169, "grad_norm": 0.3390488922595978, "learning_rate": 1.9543946159500394e-05, "loss": 0.1729, "step": 5198 }, { "epoch": 0.09653835728846033, "grad_norm": 0.2970775365829468, "learning_rate": 1.954359784109739e-05, "loss": 0.3008, "step": 5200 }, { "epoch": 0.09657548742587896, "grad_norm": 0.4615436792373657, "learning_rate": 1.9543249392834603e-05, "loss": 0.4401, "step": 5202 }, { "epoch": 0.0966126175632976, "grad_norm": 0.4490794837474823, "learning_rate": 1.954290081471678e-05, "loss": 0.3737, "step": 5204 }, { "epoch": 0.09664974770071624, "grad_norm": 0.3618118464946747, "learning_rate": 1.954255210674866e-05, "loss": 0.3109, "step": 5206 }, { "epoch": 0.09668687783813489, "grad_norm": 0.4270692765712738, "learning_rate": 1.954220326893499e-05, "loss": 0.4952, "step": 5208 }, { "epoch": 0.09672400797555351, "grad_norm": 0.2871691882610321, "learning_rate": 1.9541854301280513e-05, "loss": 0.3625, "step": 5210 }, { "epoch": 0.09676113811297216, "grad_norm": 0.31491750478744507, "learning_rate": 1.954150520378998e-05, "loss": 0.3116, "step": 5212 }, { "epoch": 0.0967982682503908, "grad_norm": 0.5523895025253296, "learning_rate": 1.9541155976468146e-05, "loss": 0.3392, "step": 5214 }, { "epoch": 0.09683539838780943, "grad_norm": 0.45001524686813354, "learning_rate": 1.9540806619319755e-05, "loss": 0.3753, "step": 5216 }, { "epoch": 0.09687252852522807, "grad_norm": 0.48795196413993835, "learning_rate": 1.9540457132349565e-05, "loss": 0.2818, "step": 5218 }, { "epoch": 0.09690965866264671, "grad_norm": 0.34552592039108276, "learning_rate": 1.954010751556233e-05, "loss": 0.1735, "step": 5220 }, { "epoch": 0.09694678880006535, "grad_norm": 0.4129865765571594, "learning_rate": 1.953975776896281e-05, "loss": 0.3015, "step": 5222 }, { "epoch": 0.09698391893748398, "grad_norm": 0.43247342109680176, "learning_rate": 1.9539407892555758e-05, "loss": 0.2361, "step": 5224 }, { "epoch": 0.09702104907490262, "grad_norm": 0.2945445775985718, "learning_rate": 1.953905788634594e-05, "loss": 0.4375, "step": 5226 }, { "epoch": 0.09705817921232127, "grad_norm": 0.45246607065200806, "learning_rate": 1.9538707750338115e-05, "loss": 0.383, "step": 5228 }, { "epoch": 0.09709530934973991, "grad_norm": 0.8424841165542603, "learning_rate": 1.953835748453705e-05, "loss": 0.2129, "step": 5230 }, { "epoch": 0.09713243948715854, "grad_norm": 0.46386075019836426, "learning_rate": 1.9538007088947513e-05, "loss": 0.3822, "step": 5232 }, { "epoch": 0.09716956962457718, "grad_norm": 0.35502320528030396, "learning_rate": 1.9537656563574267e-05, "loss": 0.2684, "step": 5234 }, { "epoch": 0.09720669976199582, "grad_norm": 0.31353959441185, "learning_rate": 1.9537305908422084e-05, "loss": 0.372, "step": 5236 }, { "epoch": 0.09724382989941446, "grad_norm": 0.32439714670181274, "learning_rate": 1.9536955123495736e-05, "loss": 0.5392, "step": 5238 }, { "epoch": 0.09728096003683309, "grad_norm": 0.38243433833122253, "learning_rate": 1.9536604208799994e-05, "loss": 0.4967, "step": 5240 }, { "epoch": 0.09731809017425173, "grad_norm": 0.39996588230133057, "learning_rate": 1.9536253164339634e-05, "loss": 0.3328, "step": 5242 }, { "epoch": 0.09735522031167038, "grad_norm": 0.34999120235443115, "learning_rate": 1.9535901990119434e-05, "loss": 0.2745, "step": 5244 }, { "epoch": 0.09739235044908902, "grad_norm": 0.5402022004127502, "learning_rate": 1.953555068614417e-05, "loss": 0.4193, "step": 5246 }, { "epoch": 0.09742948058650765, "grad_norm": 0.3582979142665863, "learning_rate": 1.9535199252418623e-05, "loss": 0.3934, "step": 5248 }, { "epoch": 0.09746661072392629, "grad_norm": 0.42996945977211, "learning_rate": 1.9534847688947577e-05, "loss": 0.2625, "step": 5250 }, { "epoch": 0.09750374086134493, "grad_norm": 0.3576291501522064, "learning_rate": 1.9534495995735816e-05, "loss": 0.2958, "step": 5252 }, { "epoch": 0.09754087099876356, "grad_norm": 0.3733225166797638, "learning_rate": 1.953414417278812e-05, "loss": 0.3195, "step": 5254 }, { "epoch": 0.0975780011361822, "grad_norm": 0.43775293231010437, "learning_rate": 1.953379222010928e-05, "loss": 0.3097, "step": 5256 }, { "epoch": 0.09761513127360084, "grad_norm": 0.2613062560558319, "learning_rate": 1.953344013770409e-05, "loss": 0.3565, "step": 5258 }, { "epoch": 0.09765226141101949, "grad_norm": 0.31612250208854675, "learning_rate": 1.9533087925577332e-05, "loss": 0.2586, "step": 5260 }, { "epoch": 0.09768939154843811, "grad_norm": 0.3904920220375061, "learning_rate": 1.95327355837338e-05, "loss": 0.263, "step": 5262 }, { "epoch": 0.09772652168585676, "grad_norm": 0.37386247515678406, "learning_rate": 1.9532383112178292e-05, "loss": 0.5239, "step": 5264 }, { "epoch": 0.0977636518232754, "grad_norm": 0.5240536332130432, "learning_rate": 1.9532030510915604e-05, "loss": 0.1838, "step": 5266 }, { "epoch": 0.09780078196069404, "grad_norm": 0.42270922660827637, "learning_rate": 1.953167777995053e-05, "loss": 0.453, "step": 5268 }, { "epoch": 0.09783791209811267, "grad_norm": 0.4509906470775604, "learning_rate": 1.9531324919287876e-05, "loss": 0.3839, "step": 5270 }, { "epoch": 0.09787504223553131, "grad_norm": 0.38611701130867004, "learning_rate": 1.9530971928932438e-05, "loss": 0.5229, "step": 5272 }, { "epoch": 0.09791217237294995, "grad_norm": 0.3556097745895386, "learning_rate": 1.953061880888902e-05, "loss": 0.3473, "step": 5274 }, { "epoch": 0.0979493025103686, "grad_norm": 0.29100754857063293, "learning_rate": 1.9530265559162426e-05, "loss": 0.356, "step": 5276 }, { "epoch": 0.09798643264778722, "grad_norm": 0.4549669921398163, "learning_rate": 1.9529912179757468e-05, "loss": 0.4548, "step": 5278 }, { "epoch": 0.09802356278520587, "grad_norm": 0.47515588998794556, "learning_rate": 1.9529558670678948e-05, "loss": 0.3572, "step": 5280 }, { "epoch": 0.09806069292262451, "grad_norm": 0.31426772475242615, "learning_rate": 1.9529205031931678e-05, "loss": 0.3466, "step": 5282 }, { "epoch": 0.09809782306004315, "grad_norm": 0.3713514804840088, "learning_rate": 1.9528851263520472e-05, "loss": 0.3487, "step": 5284 }, { "epoch": 0.09813495319746178, "grad_norm": 0.40842631459236145, "learning_rate": 1.9528497365450142e-05, "loss": 0.4509, "step": 5286 }, { "epoch": 0.09817208333488042, "grad_norm": 0.3724248707294464, "learning_rate": 1.9528143337725505e-05, "loss": 0.4582, "step": 5288 }, { "epoch": 0.09820921347229906, "grad_norm": 0.27923640608787537, "learning_rate": 1.9527789180351376e-05, "loss": 0.2932, "step": 5290 }, { "epoch": 0.09824634360971769, "grad_norm": 0.3695182800292969, "learning_rate": 1.9527434893332576e-05, "loss": 0.4083, "step": 5292 }, { "epoch": 0.09828347374713634, "grad_norm": 0.3551189601421356, "learning_rate": 1.9527080476673924e-05, "loss": 0.2452, "step": 5294 }, { "epoch": 0.09832060388455498, "grad_norm": 0.3665805160999298, "learning_rate": 1.9526725930380246e-05, "loss": 0.4893, "step": 5296 }, { "epoch": 0.09835773402197362, "grad_norm": 0.41149330139160156, "learning_rate": 1.9526371254456362e-05, "loss": 0.2324, "step": 5298 }, { "epoch": 0.09839486415939225, "grad_norm": 0.46584969758987427, "learning_rate": 1.9526016448907102e-05, "loss": 0.3626, "step": 5300 }, { "epoch": 0.09843199429681089, "grad_norm": 0.3277958035469055, "learning_rate": 1.952566151373729e-05, "loss": 0.1985, "step": 5302 }, { "epoch": 0.09846912443422953, "grad_norm": 0.2779730558395386, "learning_rate": 1.952530644895176e-05, "loss": 0.3503, "step": 5304 }, { "epoch": 0.09850625457164817, "grad_norm": 0.39081552624702454, "learning_rate": 1.9524951254555335e-05, "loss": 0.3922, "step": 5306 }, { "epoch": 0.0985433847090668, "grad_norm": 0.26650571823120117, "learning_rate": 1.9524595930552858e-05, "loss": 0.3063, "step": 5308 }, { "epoch": 0.09858051484648545, "grad_norm": 0.28204792737960815, "learning_rate": 1.952424047694916e-05, "loss": 0.3786, "step": 5310 }, { "epoch": 0.09861764498390409, "grad_norm": 0.3474501967430115, "learning_rate": 1.952388489374908e-05, "loss": 0.2821, "step": 5312 }, { "epoch": 0.09865477512132273, "grad_norm": 0.41910436749458313, "learning_rate": 1.952352918095745e-05, "loss": 0.3478, "step": 5314 }, { "epoch": 0.09869190525874136, "grad_norm": 0.30340978503227234, "learning_rate": 1.9523173338579112e-05, "loss": 0.3482, "step": 5316 }, { "epoch": 0.09872903539616, "grad_norm": 0.28818002343177795, "learning_rate": 1.9522817366618914e-05, "loss": 0.4146, "step": 5318 }, { "epoch": 0.09876616553357864, "grad_norm": 0.40755388140678406, "learning_rate": 1.9522461265081697e-05, "loss": 0.2265, "step": 5320 }, { "epoch": 0.09880329567099728, "grad_norm": 0.2984924912452698, "learning_rate": 1.9522105033972304e-05, "loss": 0.1711, "step": 5322 }, { "epoch": 0.09884042580841591, "grad_norm": 0.32700037956237793, "learning_rate": 1.952174867329558e-05, "loss": 0.3759, "step": 5324 }, { "epoch": 0.09887755594583456, "grad_norm": 0.3570864796638489, "learning_rate": 1.952139218305638e-05, "loss": 0.3235, "step": 5326 }, { "epoch": 0.0989146860832532, "grad_norm": 0.32278236746788025, "learning_rate": 1.9521035563259553e-05, "loss": 0.3323, "step": 5328 }, { "epoch": 0.09895181622067183, "grad_norm": 0.3525971472263336, "learning_rate": 1.952067881390995e-05, "loss": 0.3428, "step": 5330 }, { "epoch": 0.09898894635809047, "grad_norm": 0.37292882800102234, "learning_rate": 1.9520321935012427e-05, "loss": 0.3548, "step": 5332 }, { "epoch": 0.09902607649550911, "grad_norm": 0.4355502426624298, "learning_rate": 1.951996492657184e-05, "loss": 0.4244, "step": 5334 }, { "epoch": 0.09906320663292775, "grad_norm": 1.4056992530822754, "learning_rate": 1.9519607788593042e-05, "loss": 0.4524, "step": 5336 }, { "epoch": 0.09910033677034638, "grad_norm": 0.35588961839675903, "learning_rate": 1.95192505210809e-05, "loss": 0.4264, "step": 5338 }, { "epoch": 0.09913746690776502, "grad_norm": 0.33673223853111267, "learning_rate": 1.9518893124040272e-05, "loss": 0.2656, "step": 5340 }, { "epoch": 0.09917459704518367, "grad_norm": 0.327662855386734, "learning_rate": 1.951853559747602e-05, "loss": 0.185, "step": 5342 }, { "epoch": 0.09921172718260231, "grad_norm": 0.3220953047275543, "learning_rate": 1.951817794139301e-05, "loss": 0.3434, "step": 5344 }, { "epoch": 0.09924885732002094, "grad_norm": 0.30883336067199707, "learning_rate": 1.9517820155796104e-05, "loss": 0.2657, "step": 5346 }, { "epoch": 0.09928598745743958, "grad_norm": 0.30116966366767883, "learning_rate": 1.9517462240690182e-05, "loss": 0.3001, "step": 5348 }, { "epoch": 0.09932311759485822, "grad_norm": 0.33159059286117554, "learning_rate": 1.95171041960801e-05, "loss": 0.2763, "step": 5350 }, { "epoch": 0.09936024773227686, "grad_norm": 0.31253114342689514, "learning_rate": 1.9516746021970742e-05, "loss": 0.4359, "step": 5352 }, { "epoch": 0.09939737786969549, "grad_norm": 0.38211649656295776, "learning_rate": 1.951638771836698e-05, "loss": 0.2949, "step": 5354 }, { "epoch": 0.09943450800711413, "grad_norm": 0.36627936363220215, "learning_rate": 1.951602928527368e-05, "loss": 0.2508, "step": 5356 }, { "epoch": 0.09947163814453278, "grad_norm": 0.33420252799987793, "learning_rate": 1.9515670722695725e-05, "loss": 0.3512, "step": 5358 }, { "epoch": 0.09950876828195142, "grad_norm": 0.29587164521217346, "learning_rate": 1.9515312030637997e-05, "loss": 0.3399, "step": 5360 }, { "epoch": 0.09954589841937005, "grad_norm": 0.2664264142513275, "learning_rate": 1.951495320910537e-05, "loss": 0.3099, "step": 5362 }, { "epoch": 0.09958302855678869, "grad_norm": 0.26353761553764343, "learning_rate": 1.9514594258102735e-05, "loss": 0.2825, "step": 5364 }, { "epoch": 0.09962015869420733, "grad_norm": 0.4898408353328705, "learning_rate": 1.9514235177634972e-05, "loss": 0.3378, "step": 5366 }, { "epoch": 0.09965728883162596, "grad_norm": 0.29862871766090393, "learning_rate": 1.951387596770696e-05, "loss": 0.2809, "step": 5368 }, { "epoch": 0.0996944189690446, "grad_norm": 0.28889936208724976, "learning_rate": 1.95135166283236e-05, "loss": 0.5213, "step": 5370 }, { "epoch": 0.09973154910646324, "grad_norm": 0.3810235559940338, "learning_rate": 1.9513157159489772e-05, "loss": 0.3083, "step": 5372 }, { "epoch": 0.09976867924388189, "grad_norm": 0.4826198220252991, "learning_rate": 1.9512797561210372e-05, "loss": 0.2303, "step": 5374 }, { "epoch": 0.09980580938130051, "grad_norm": 0.2561202645301819, "learning_rate": 1.951243783349029e-05, "loss": 0.3414, "step": 5376 }, { "epoch": 0.09984293951871916, "grad_norm": 0.4537256062030792, "learning_rate": 1.9512077976334424e-05, "loss": 0.4304, "step": 5378 }, { "epoch": 0.0998800696561378, "grad_norm": 0.30643871426582336, "learning_rate": 1.9511717989747666e-05, "loss": 0.3015, "step": 5380 }, { "epoch": 0.09991719979355644, "grad_norm": 0.23798967897891998, "learning_rate": 1.951135787373492e-05, "loss": 0.4162, "step": 5382 }, { "epoch": 0.09995432993097507, "grad_norm": 0.2725050449371338, "learning_rate": 1.9510997628301083e-05, "loss": 0.3167, "step": 5384 }, { "epoch": 0.09999146006839371, "grad_norm": 0.22903631627559662, "learning_rate": 1.9510637253451056e-05, "loss": 0.3809, "step": 5386 }, { "epoch": 0.10002859020581235, "grad_norm": 0.38538771867752075, "learning_rate": 1.9510276749189742e-05, "loss": 0.2494, "step": 5388 }, { "epoch": 0.100065720343231, "grad_norm": 0.5259299874305725, "learning_rate": 1.9509916115522048e-05, "loss": 0.5736, "step": 5390 }, { "epoch": 0.10010285048064962, "grad_norm": 0.3985842168331146, "learning_rate": 1.9509555352452883e-05, "loss": 0.239, "step": 5392 }, { "epoch": 0.10013998061806827, "grad_norm": 0.2761503756046295, "learning_rate": 1.9509194459987154e-05, "loss": 0.4563, "step": 5394 }, { "epoch": 0.10017711075548691, "grad_norm": 0.433281809091568, "learning_rate": 1.950883343812977e-05, "loss": 0.3784, "step": 5396 }, { "epoch": 0.10021424089290555, "grad_norm": 0.326143354177475, "learning_rate": 1.9508472286885646e-05, "loss": 0.28, "step": 5398 }, { "epoch": 0.10025137103032418, "grad_norm": 0.3231518268585205, "learning_rate": 1.95081110062597e-05, "loss": 0.3623, "step": 5400 }, { "epoch": 0.10028850116774282, "grad_norm": 0.24535638093948364, "learning_rate": 1.9507749596256836e-05, "loss": 0.1355, "step": 5402 }, { "epoch": 0.10032563130516146, "grad_norm": 0.33904486894607544, "learning_rate": 1.9507388056881982e-05, "loss": 0.5071, "step": 5404 }, { "epoch": 0.10036276144258009, "grad_norm": 0.2902483642101288, "learning_rate": 1.9507026388140057e-05, "loss": 0.3308, "step": 5406 }, { "epoch": 0.10039989157999873, "grad_norm": 0.4825877547264099, "learning_rate": 1.9506664590035974e-05, "loss": 0.3736, "step": 5408 }, { "epoch": 0.10043702171741738, "grad_norm": 0.25483444333076477, "learning_rate": 1.9506302662574666e-05, "loss": 0.3465, "step": 5410 }, { "epoch": 0.10047415185483602, "grad_norm": 0.40256524085998535, "learning_rate": 1.9505940605761052e-05, "loss": 0.3799, "step": 5412 }, { "epoch": 0.10051128199225465, "grad_norm": 0.34911879897117615, "learning_rate": 1.950557841960006e-05, "loss": 0.4189, "step": 5414 }, { "epoch": 0.10054841212967329, "grad_norm": 0.47168174386024475, "learning_rate": 1.9505216104096617e-05, "loss": 0.3016, "step": 5416 }, { "epoch": 0.10058554226709193, "grad_norm": 0.327131986618042, "learning_rate": 1.9504853659255655e-05, "loss": 0.2868, "step": 5418 }, { "epoch": 0.10062267240451057, "grad_norm": 0.25280463695526123, "learning_rate": 1.950449108508211e-05, "loss": 0.2506, "step": 5420 }, { "epoch": 0.1006598025419292, "grad_norm": 0.2926620543003082, "learning_rate": 1.9504128381580904e-05, "loss": 0.5166, "step": 5422 }, { "epoch": 0.10069693267934784, "grad_norm": 0.470705509185791, "learning_rate": 1.9503765548756982e-05, "loss": 0.2277, "step": 5424 }, { "epoch": 0.10073406281676649, "grad_norm": 0.5013469457626343, "learning_rate": 1.9503402586615276e-05, "loss": 0.3182, "step": 5426 }, { "epoch": 0.10077119295418513, "grad_norm": 0.29224976897239685, "learning_rate": 1.950303949516073e-05, "loss": 0.4388, "step": 5428 }, { "epoch": 0.10080832309160376, "grad_norm": 0.3648039400577545, "learning_rate": 1.9502676274398278e-05, "loss": 0.4566, "step": 5430 }, { "epoch": 0.1008454532290224, "grad_norm": 0.37001094222068787, "learning_rate": 1.9502312924332866e-05, "loss": 0.3175, "step": 5432 }, { "epoch": 0.10088258336644104, "grad_norm": 0.28458839654922485, "learning_rate": 1.950194944496944e-05, "loss": 0.2475, "step": 5434 }, { "epoch": 0.10091971350385968, "grad_norm": 0.30913984775543213, "learning_rate": 1.950158583631294e-05, "loss": 0.3869, "step": 5436 }, { "epoch": 0.10095684364127831, "grad_norm": 0.3252734839916229, "learning_rate": 1.9501222098368324e-05, "loss": 0.3185, "step": 5438 }, { "epoch": 0.10099397377869695, "grad_norm": 0.35855549573898315, "learning_rate": 1.950085823114053e-05, "loss": 0.3551, "step": 5440 }, { "epoch": 0.1010311039161156, "grad_norm": 0.4488309323787689, "learning_rate": 1.9500494234634514e-05, "loss": 0.3972, "step": 5442 }, { "epoch": 0.10106823405353423, "grad_norm": 0.2795882821083069, "learning_rate": 1.9500130108855228e-05, "loss": 0.3448, "step": 5444 }, { "epoch": 0.10110536419095287, "grad_norm": 0.32300645112991333, "learning_rate": 1.9499765853807632e-05, "loss": 0.4746, "step": 5446 }, { "epoch": 0.10114249432837151, "grad_norm": 0.42869728803634644, "learning_rate": 1.949940146949667e-05, "loss": 0.3796, "step": 5448 }, { "epoch": 0.10117962446579015, "grad_norm": 0.3509422540664673, "learning_rate": 1.9499036955927316e-05, "loss": 0.3397, "step": 5450 }, { "epoch": 0.10121675460320878, "grad_norm": 0.33761149644851685, "learning_rate": 1.9498672313104516e-05, "loss": 0.4286, "step": 5452 }, { "epoch": 0.10125388474062742, "grad_norm": 0.35593512654304504, "learning_rate": 1.949830754103324e-05, "loss": 0.1968, "step": 5454 }, { "epoch": 0.10129101487804607, "grad_norm": 0.5173635482788086, "learning_rate": 1.949794263971845e-05, "loss": 0.3539, "step": 5456 }, { "epoch": 0.10132814501546471, "grad_norm": 0.2860984206199646, "learning_rate": 1.9497577609165108e-05, "loss": 0.3764, "step": 5458 }, { "epoch": 0.10136527515288334, "grad_norm": 0.27472686767578125, "learning_rate": 1.9497212449378185e-05, "loss": 0.2928, "step": 5460 }, { "epoch": 0.10140240529030198, "grad_norm": 0.3337462544441223, "learning_rate": 1.9496847160362647e-05, "loss": 0.3737, "step": 5462 }, { "epoch": 0.10143953542772062, "grad_norm": 0.36239540576934814, "learning_rate": 1.9496481742123468e-05, "loss": 0.4175, "step": 5464 }, { "epoch": 0.10147666556513926, "grad_norm": 0.36568373441696167, "learning_rate": 1.9496116194665615e-05, "loss": 0.1936, "step": 5466 }, { "epoch": 0.10151379570255789, "grad_norm": 0.3121700882911682, "learning_rate": 1.9495750517994065e-05, "loss": 0.2305, "step": 5468 }, { "epoch": 0.10155092583997653, "grad_norm": 0.4357566833496094, "learning_rate": 1.9495384712113795e-05, "loss": 0.3119, "step": 5470 }, { "epoch": 0.10158805597739518, "grad_norm": 0.246283158659935, "learning_rate": 1.9495018777029782e-05, "loss": 0.3571, "step": 5472 }, { "epoch": 0.10162518611481382, "grad_norm": 0.36319881677627563, "learning_rate": 1.9494652712747e-05, "loss": 0.2209, "step": 5474 }, { "epoch": 0.10166231625223245, "grad_norm": 0.46618902683258057, "learning_rate": 1.9494286519270437e-05, "loss": 0.3714, "step": 5476 }, { "epoch": 0.10169944638965109, "grad_norm": 0.4208773970603943, "learning_rate": 1.9493920196605073e-05, "loss": 0.346, "step": 5478 }, { "epoch": 0.10173657652706973, "grad_norm": 0.3264392912387848, "learning_rate": 1.9493553744755895e-05, "loss": 0.2895, "step": 5480 }, { "epoch": 0.10177370666448836, "grad_norm": 0.44028425216674805, "learning_rate": 1.9493187163727883e-05, "loss": 0.341, "step": 5482 }, { "epoch": 0.101810836801907, "grad_norm": 0.540948748588562, "learning_rate": 1.949282045352603e-05, "loss": 0.444, "step": 5484 }, { "epoch": 0.10184796693932564, "grad_norm": 0.437313973903656, "learning_rate": 1.9492453614155328e-05, "loss": 0.4575, "step": 5486 }, { "epoch": 0.10188509707674429, "grad_norm": 0.37025347352027893, "learning_rate": 1.949208664562076e-05, "loss": 0.3717, "step": 5488 }, { "epoch": 0.10192222721416291, "grad_norm": 0.38311830163002014, "learning_rate": 1.949171954792733e-05, "loss": 0.3728, "step": 5490 }, { "epoch": 0.10195935735158156, "grad_norm": 0.34568244218826294, "learning_rate": 1.9491352321080026e-05, "loss": 0.3232, "step": 5492 }, { "epoch": 0.1019964874890002, "grad_norm": 0.391882985830307, "learning_rate": 1.9490984965083844e-05, "loss": 0.1835, "step": 5494 }, { "epoch": 0.10203361762641884, "grad_norm": 0.33830374479293823, "learning_rate": 1.949061747994379e-05, "loss": 0.331, "step": 5496 }, { "epoch": 0.10207074776383747, "grad_norm": 0.305108904838562, "learning_rate": 1.949024986566486e-05, "loss": 0.2525, "step": 5498 }, { "epoch": 0.10210787790125611, "grad_norm": 0.29561322927474976, "learning_rate": 1.9489882122252054e-05, "loss": 0.3222, "step": 5500 }, { "epoch": 0.10214500803867475, "grad_norm": 0.31296899914741516, "learning_rate": 1.9489514249710377e-05, "loss": 0.2674, "step": 5502 }, { "epoch": 0.1021821381760934, "grad_norm": 0.41582176089286804, "learning_rate": 1.9489146248044836e-05, "loss": 0.3928, "step": 5504 }, { "epoch": 0.10221926831351202, "grad_norm": 0.31509995460510254, "learning_rate": 1.9488778117260438e-05, "loss": 0.4059, "step": 5506 }, { "epoch": 0.10225639845093067, "grad_norm": 0.4125397503376007, "learning_rate": 1.948840985736219e-05, "loss": 0.4757, "step": 5508 }, { "epoch": 0.10229352858834931, "grad_norm": 0.30273544788360596, "learning_rate": 1.948804146835511e-05, "loss": 0.2823, "step": 5510 }, { "epoch": 0.10233065872576795, "grad_norm": 0.46629342436790466, "learning_rate": 1.9487672950244203e-05, "loss": 0.4176, "step": 5512 }, { "epoch": 0.10236778886318658, "grad_norm": 0.2867383062839508, "learning_rate": 1.9487304303034483e-05, "loss": 0.2607, "step": 5514 }, { "epoch": 0.10240491900060522, "grad_norm": 0.25056639313697815, "learning_rate": 1.9486935526730973e-05, "loss": 0.293, "step": 5516 }, { "epoch": 0.10244204913802386, "grad_norm": 0.386088490486145, "learning_rate": 1.9486566621338685e-05, "loss": 0.1782, "step": 5518 }, { "epoch": 0.10247917927544249, "grad_norm": 0.40196260809898376, "learning_rate": 1.9486197586862642e-05, "loss": 0.2662, "step": 5520 }, { "epoch": 0.10251630941286113, "grad_norm": 0.7502794861793518, "learning_rate": 1.9485828423307865e-05, "loss": 0.4629, "step": 5522 }, { "epoch": 0.10255343955027978, "grad_norm": 0.3743736147880554, "learning_rate": 1.9485459130679375e-05, "loss": 0.2583, "step": 5524 }, { "epoch": 0.10259056968769842, "grad_norm": 0.3248589336872101, "learning_rate": 1.94850897089822e-05, "loss": 0.4207, "step": 5526 }, { "epoch": 0.10262769982511705, "grad_norm": 0.3544054925441742, "learning_rate": 1.9484720158221365e-05, "loss": 0.2415, "step": 5528 }, { "epoch": 0.10266482996253569, "grad_norm": 0.6023651361465454, "learning_rate": 1.9484350478401897e-05, "loss": 0.2938, "step": 5530 }, { "epoch": 0.10270196009995433, "grad_norm": 0.3340618908405304, "learning_rate": 1.948398066952883e-05, "loss": 0.2678, "step": 5532 }, { "epoch": 0.10273909023737297, "grad_norm": 0.3078499138355255, "learning_rate": 1.9483610731607193e-05, "loss": 0.3749, "step": 5534 }, { "epoch": 0.1027762203747916, "grad_norm": 0.3383847177028656, "learning_rate": 1.9483240664642016e-05, "loss": 0.3723, "step": 5536 }, { "epoch": 0.10281335051221024, "grad_norm": 0.3159855306148529, "learning_rate": 1.9482870468638346e-05, "loss": 0.3411, "step": 5538 }, { "epoch": 0.10285048064962889, "grad_norm": 0.49799844622612, "learning_rate": 1.948250014360121e-05, "loss": 0.2778, "step": 5540 }, { "epoch": 0.10288761078704753, "grad_norm": 0.41063013672828674, "learning_rate": 1.9482129689535655e-05, "loss": 0.2612, "step": 5542 }, { "epoch": 0.10292474092446616, "grad_norm": 0.4678843021392822, "learning_rate": 1.948175910644671e-05, "loss": 0.3867, "step": 5544 }, { "epoch": 0.1029618710618848, "grad_norm": 0.49532073736190796, "learning_rate": 1.9481388394339428e-05, "loss": 0.5261, "step": 5546 }, { "epoch": 0.10299900119930344, "grad_norm": 0.5383819341659546, "learning_rate": 1.948101755321885e-05, "loss": 0.2307, "step": 5548 }, { "epoch": 0.10303613133672208, "grad_norm": 0.3777703642845154, "learning_rate": 1.9480646583090024e-05, "loss": 0.3078, "step": 5550 }, { "epoch": 0.10307326147414071, "grad_norm": 0.3909919261932373, "learning_rate": 1.9480275483957992e-05, "loss": 0.3466, "step": 5552 }, { "epoch": 0.10311039161155935, "grad_norm": 0.32706278562545776, "learning_rate": 1.947990425582781e-05, "loss": 0.2415, "step": 5554 }, { "epoch": 0.103147521748978, "grad_norm": 0.44836801290512085, "learning_rate": 1.9479532898704528e-05, "loss": 0.273, "step": 5556 }, { "epoch": 0.10318465188639662, "grad_norm": 0.31327760219573975, "learning_rate": 1.9479161412593196e-05, "loss": 0.2639, "step": 5558 }, { "epoch": 0.10322178202381527, "grad_norm": 0.42859169840812683, "learning_rate": 1.947878979749887e-05, "loss": 0.2693, "step": 5560 }, { "epoch": 0.10325891216123391, "grad_norm": 0.26981452107429504, "learning_rate": 1.9478418053426605e-05, "loss": 0.3979, "step": 5562 }, { "epoch": 0.10329604229865255, "grad_norm": 0.3653850853443146, "learning_rate": 1.9478046180381468e-05, "loss": 0.3371, "step": 5564 }, { "epoch": 0.10333317243607118, "grad_norm": 0.29580119252204895, "learning_rate": 1.947767417836851e-05, "loss": 0.3759, "step": 5566 }, { "epoch": 0.10337030257348982, "grad_norm": 0.34580251574516296, "learning_rate": 1.947730204739279e-05, "loss": 0.3574, "step": 5568 }, { "epoch": 0.10340743271090846, "grad_norm": 0.22512584924697876, "learning_rate": 1.947692978745938e-05, "loss": 0.1099, "step": 5570 }, { "epoch": 0.1034445628483271, "grad_norm": 0.4294097125530243, "learning_rate": 1.9476557398573344e-05, "loss": 0.2895, "step": 5572 }, { "epoch": 0.10348169298574574, "grad_norm": 0.3201062083244324, "learning_rate": 1.9476184880739747e-05, "loss": 0.385, "step": 5574 }, { "epoch": 0.10351882312316438, "grad_norm": 0.4645669162273407, "learning_rate": 1.9475812233963656e-05, "loss": 0.3803, "step": 5576 }, { "epoch": 0.10355595326058302, "grad_norm": 0.38869452476501465, "learning_rate": 1.947543945825015e-05, "loss": 0.4267, "step": 5578 }, { "epoch": 0.10359308339800166, "grad_norm": 0.3133956491947174, "learning_rate": 1.9475066553604288e-05, "loss": 0.2642, "step": 5580 }, { "epoch": 0.10363021353542029, "grad_norm": 0.4013133943080902, "learning_rate": 1.9474693520031158e-05, "loss": 0.3062, "step": 5582 }, { "epoch": 0.10366734367283893, "grad_norm": 0.3490278422832489, "learning_rate": 1.947432035753582e-05, "loss": 0.3235, "step": 5584 }, { "epoch": 0.10370447381025757, "grad_norm": 0.42173048853874207, "learning_rate": 1.9473947066123368e-05, "loss": 0.4087, "step": 5586 }, { "epoch": 0.10374160394767622, "grad_norm": 0.3163757622241974, "learning_rate": 1.947357364579887e-05, "loss": 0.2032, "step": 5588 }, { "epoch": 0.10377873408509485, "grad_norm": 0.3753913938999176, "learning_rate": 1.9473200096567416e-05, "loss": 0.2795, "step": 5590 }, { "epoch": 0.10381586422251349, "grad_norm": 0.3602595329284668, "learning_rate": 1.947282641843408e-05, "loss": 0.1914, "step": 5592 }, { "epoch": 0.10385299435993213, "grad_norm": 0.25141608715057373, "learning_rate": 1.9472452611403952e-05, "loss": 0.3458, "step": 5594 }, { "epoch": 0.10389012449735076, "grad_norm": 0.25714215636253357, "learning_rate": 1.947207867548212e-05, "loss": 0.3501, "step": 5596 }, { "epoch": 0.1039272546347694, "grad_norm": 0.42117518186569214, "learning_rate": 1.9471704610673668e-05, "loss": 0.3917, "step": 5598 }, { "epoch": 0.10396438477218804, "grad_norm": 0.4055294990539551, "learning_rate": 1.9471330416983684e-05, "loss": 0.2976, "step": 5600 }, { "epoch": 0.10400151490960668, "grad_norm": 0.23825065791606903, "learning_rate": 1.9470956094417265e-05, "loss": 0.3418, "step": 5602 }, { "epoch": 0.10403864504702531, "grad_norm": 0.3468947410583496, "learning_rate": 1.9470581642979506e-05, "loss": 0.35, "step": 5604 }, { "epoch": 0.10407577518444396, "grad_norm": 0.3699585795402527, "learning_rate": 1.9470207062675496e-05, "loss": 0.2119, "step": 5606 }, { "epoch": 0.1041129053218626, "grad_norm": 0.3218643367290497, "learning_rate": 1.9469832353510334e-05, "loss": 0.2598, "step": 5608 }, { "epoch": 0.10415003545928124, "grad_norm": 0.25380244851112366, "learning_rate": 1.9469457515489116e-05, "loss": 0.4322, "step": 5610 }, { "epoch": 0.10418716559669987, "grad_norm": 0.31532925367355347, "learning_rate": 1.9469082548616952e-05, "loss": 0.2267, "step": 5612 }, { "epoch": 0.10422429573411851, "grad_norm": 0.3594014644622803, "learning_rate": 1.9468707452898933e-05, "loss": 0.491, "step": 5614 }, { "epoch": 0.10426142587153715, "grad_norm": 0.41942188143730164, "learning_rate": 1.946833222834017e-05, "loss": 0.2912, "step": 5616 }, { "epoch": 0.1042985560089558, "grad_norm": 0.3073533773422241, "learning_rate": 1.9467956874945763e-05, "loss": 0.2299, "step": 5618 }, { "epoch": 0.10433568614637442, "grad_norm": 0.5761581659317017, "learning_rate": 1.9467581392720823e-05, "loss": 0.3474, "step": 5620 }, { "epoch": 0.10437281628379307, "grad_norm": 0.49407994747161865, "learning_rate": 1.9467205781670463e-05, "loss": 0.2762, "step": 5622 }, { "epoch": 0.10440994642121171, "grad_norm": 0.3751986026763916, "learning_rate": 1.9466830041799785e-05, "loss": 0.4026, "step": 5624 }, { "epoch": 0.10444707655863035, "grad_norm": 0.30550137162208557, "learning_rate": 1.9466454173113912e-05, "loss": 0.3968, "step": 5626 }, { "epoch": 0.10448420669604898, "grad_norm": 0.3125455677509308, "learning_rate": 1.946607817561795e-05, "loss": 0.3578, "step": 5628 }, { "epoch": 0.10452133683346762, "grad_norm": 0.45746296644210815, "learning_rate": 1.946570204931702e-05, "loss": 0.4813, "step": 5630 }, { "epoch": 0.10455846697088626, "grad_norm": 0.307971328496933, "learning_rate": 1.9465325794216235e-05, "loss": 0.2687, "step": 5632 }, { "epoch": 0.10459559710830489, "grad_norm": 0.5898893475532532, "learning_rate": 1.9464949410320718e-05, "loss": 0.2711, "step": 5634 }, { "epoch": 0.10463272724572353, "grad_norm": 0.30872514843940735, "learning_rate": 1.9464572897635593e-05, "loss": 0.3561, "step": 5636 }, { "epoch": 0.10466985738314218, "grad_norm": 0.9824311137199402, "learning_rate": 1.946419625616598e-05, "loss": 0.2251, "step": 5638 }, { "epoch": 0.10470698752056082, "grad_norm": 0.42937248945236206, "learning_rate": 1.9463819485917e-05, "loss": 0.3925, "step": 5640 }, { "epoch": 0.10474411765797945, "grad_norm": 0.3466099202632904, "learning_rate": 1.9463442586893793e-05, "loss": 0.2799, "step": 5642 }, { "epoch": 0.10478124779539809, "grad_norm": 0.32875850796699524, "learning_rate": 1.9463065559101472e-05, "loss": 0.4291, "step": 5644 }, { "epoch": 0.10481837793281673, "grad_norm": 0.3984617590904236, "learning_rate": 1.9462688402545174e-05, "loss": 0.2584, "step": 5646 }, { "epoch": 0.10485550807023537, "grad_norm": 0.5372908711433411, "learning_rate": 1.9462311117230036e-05, "loss": 0.4681, "step": 5648 }, { "epoch": 0.104892638207654, "grad_norm": 0.34178024530410767, "learning_rate": 1.946193370316118e-05, "loss": 0.2364, "step": 5650 }, { "epoch": 0.10492976834507264, "grad_norm": 0.3702748417854309, "learning_rate": 1.9461556160343753e-05, "loss": 0.3125, "step": 5652 }, { "epoch": 0.10496689848249129, "grad_norm": 0.440461665391922, "learning_rate": 1.9461178488782882e-05, "loss": 0.1564, "step": 5654 }, { "epoch": 0.10500402861990993, "grad_norm": 0.2766466438770294, "learning_rate": 1.946080068848372e-05, "loss": 0.2424, "step": 5656 }, { "epoch": 0.10504115875732856, "grad_norm": 0.27612540125846863, "learning_rate": 1.946042275945139e-05, "loss": 0.3179, "step": 5658 }, { "epoch": 0.1050782888947472, "grad_norm": 0.44314730167388916, "learning_rate": 1.946004470169105e-05, "loss": 0.3984, "step": 5660 }, { "epoch": 0.10511541903216584, "grad_norm": 0.4054076075553894, "learning_rate": 1.9459666515207834e-05, "loss": 0.3668, "step": 5662 }, { "epoch": 0.10515254916958448, "grad_norm": 0.3379444181919098, "learning_rate": 1.945928820000689e-05, "loss": 0.2956, "step": 5664 }, { "epoch": 0.10518967930700311, "grad_norm": 0.27987322211265564, "learning_rate": 1.9458909756093375e-05, "loss": 0.3556, "step": 5666 }, { "epoch": 0.10522680944442175, "grad_norm": 0.38211536407470703, "learning_rate": 1.9458531183472424e-05, "loss": 0.3858, "step": 5668 }, { "epoch": 0.1052639395818404, "grad_norm": 0.4223131537437439, "learning_rate": 1.9458152482149197e-05, "loss": 0.3542, "step": 5670 }, { "epoch": 0.10530106971925902, "grad_norm": 0.473664790391922, "learning_rate": 1.9457773652128845e-05, "loss": 0.3758, "step": 5672 }, { "epoch": 0.10533819985667767, "grad_norm": 0.3229224681854248, "learning_rate": 1.9457394693416524e-05, "loss": 0.2796, "step": 5674 }, { "epoch": 0.10537532999409631, "grad_norm": 0.34035953879356384, "learning_rate": 1.9457015606017387e-05, "loss": 0.3071, "step": 5676 }, { "epoch": 0.10541246013151495, "grad_norm": 0.26566579937934875, "learning_rate": 1.9456636389936596e-05, "loss": 0.3156, "step": 5678 }, { "epoch": 0.10544959026893358, "grad_norm": 0.2555239796638489, "learning_rate": 1.9456257045179312e-05, "loss": 0.2193, "step": 5680 }, { "epoch": 0.10548672040635222, "grad_norm": 0.2860983908176422, "learning_rate": 1.9455877571750694e-05, "loss": 0.5282, "step": 5682 }, { "epoch": 0.10552385054377086, "grad_norm": 0.35503295063972473, "learning_rate": 1.9455497969655903e-05, "loss": 0.3858, "step": 5684 }, { "epoch": 0.1055609806811895, "grad_norm": 0.3632769286632538, "learning_rate": 1.945511823890011e-05, "loss": 0.4672, "step": 5686 }, { "epoch": 0.10559811081860813, "grad_norm": 0.2467278391122818, "learning_rate": 1.9454738379488476e-05, "loss": 0.4479, "step": 5688 }, { "epoch": 0.10563524095602678, "grad_norm": 0.5296027064323425, "learning_rate": 1.9454358391426173e-05, "loss": 0.4009, "step": 5690 }, { "epoch": 0.10567237109344542, "grad_norm": 0.37853768467903137, "learning_rate": 1.9453978274718372e-05, "loss": 0.3023, "step": 5692 }, { "epoch": 0.10570950123086406, "grad_norm": 0.36298149824142456, "learning_rate": 1.9453598029370248e-05, "loss": 0.3133, "step": 5694 }, { "epoch": 0.10574663136828269, "grad_norm": 0.4778239130973816, "learning_rate": 1.9453217655386968e-05, "loss": 0.2592, "step": 5696 }, { "epoch": 0.10578376150570133, "grad_norm": 0.39348888397216797, "learning_rate": 1.945283715277371e-05, "loss": 0.377, "step": 5698 }, { "epoch": 0.10582089164311997, "grad_norm": 0.3999602198600769, "learning_rate": 1.945245652153566e-05, "loss": 0.3594, "step": 5700 }, { "epoch": 0.10585802178053862, "grad_norm": 0.4513964056968689, "learning_rate": 1.9452075761677984e-05, "loss": 0.3914, "step": 5702 }, { "epoch": 0.10589515191795724, "grad_norm": 0.4504709541797638, "learning_rate": 1.945169487320587e-05, "loss": 0.2521, "step": 5704 }, { "epoch": 0.10593228205537589, "grad_norm": 0.26126736402511597, "learning_rate": 1.94513138561245e-05, "loss": 0.3002, "step": 5706 }, { "epoch": 0.10596941219279453, "grad_norm": 0.2932392358779907, "learning_rate": 1.945093271043906e-05, "loss": 0.24, "step": 5708 }, { "epoch": 0.10600654233021316, "grad_norm": 0.2898675799369812, "learning_rate": 1.9450551436154733e-05, "loss": 0.2744, "step": 5710 }, { "epoch": 0.1060436724676318, "grad_norm": 0.4925738275051117, "learning_rate": 1.9450170033276706e-05, "loss": 0.5037, "step": 5712 }, { "epoch": 0.10608080260505044, "grad_norm": 0.3793063163757324, "learning_rate": 1.9449788501810177e-05, "loss": 0.2955, "step": 5714 }, { "epoch": 0.10611793274246908, "grad_norm": 0.29674798250198364, "learning_rate": 1.9449406841760327e-05, "loss": 0.4152, "step": 5716 }, { "epoch": 0.10615506287988771, "grad_norm": 0.4603341519832611, "learning_rate": 1.944902505313236e-05, "loss": 0.3756, "step": 5718 }, { "epoch": 0.10619219301730635, "grad_norm": 0.3077545166015625, "learning_rate": 1.944864313593146e-05, "loss": 0.3584, "step": 5720 }, { "epoch": 0.106229323154725, "grad_norm": 0.33327987790107727, "learning_rate": 1.9448261090162833e-05, "loss": 0.4767, "step": 5722 }, { "epoch": 0.10626645329214364, "grad_norm": 0.28055399656295776, "learning_rate": 1.9447878915831667e-05, "loss": 0.4711, "step": 5724 }, { "epoch": 0.10630358342956227, "grad_norm": 0.25300803780555725, "learning_rate": 1.9447496612943177e-05, "loss": 0.3375, "step": 5726 }, { "epoch": 0.10634071356698091, "grad_norm": 0.33119189739227295, "learning_rate": 1.944711418150255e-05, "loss": 0.4009, "step": 5728 }, { "epoch": 0.10637784370439955, "grad_norm": 0.32731327414512634, "learning_rate": 1.9446731621515e-05, "loss": 0.3118, "step": 5730 }, { "epoch": 0.1064149738418182, "grad_norm": 0.39733195304870605, "learning_rate": 1.9446348932985722e-05, "loss": 0.3191, "step": 5732 }, { "epoch": 0.10645210397923682, "grad_norm": 0.38298898935317993, "learning_rate": 1.944596611591994e-05, "loss": 0.2386, "step": 5734 }, { "epoch": 0.10648923411665547, "grad_norm": 0.39737269282341003, "learning_rate": 1.9445583170322848e-05, "loss": 0.417, "step": 5736 }, { "epoch": 0.10652636425407411, "grad_norm": 0.3341575264930725, "learning_rate": 1.944520009619966e-05, "loss": 0.2563, "step": 5738 }, { "epoch": 0.10656349439149275, "grad_norm": 0.4088996648788452, "learning_rate": 1.9444816893555596e-05, "loss": 0.236, "step": 5740 }, { "epoch": 0.10660062452891138, "grad_norm": 0.3626585900783539, "learning_rate": 1.944443356239586e-05, "loss": 0.378, "step": 5742 }, { "epoch": 0.10663775466633002, "grad_norm": 0.42454993724823, "learning_rate": 1.9444050102725675e-05, "loss": 0.255, "step": 5744 }, { "epoch": 0.10667488480374866, "grad_norm": 0.23644159734249115, "learning_rate": 1.9443666514550254e-05, "loss": 0.3274, "step": 5746 }, { "epoch": 0.10671201494116729, "grad_norm": 0.36012452840805054, "learning_rate": 1.944328279787482e-05, "loss": 0.3883, "step": 5748 }, { "epoch": 0.10674914507858593, "grad_norm": 0.34978556632995605, "learning_rate": 1.944289895270459e-05, "loss": 0.52, "step": 5750 }, { "epoch": 0.10678627521600458, "grad_norm": 0.3932737708091736, "learning_rate": 1.9442514979044794e-05, "loss": 0.1831, "step": 5752 }, { "epoch": 0.10682340535342322, "grad_norm": 0.3289352357387543, "learning_rate": 1.9442130876900654e-05, "loss": 0.1993, "step": 5754 }, { "epoch": 0.10686053549084185, "grad_norm": 0.28233715891838074, "learning_rate": 1.9441746646277394e-05, "loss": 0.1737, "step": 5756 }, { "epoch": 0.10689766562826049, "grad_norm": 0.43340542912483215, "learning_rate": 1.9441362287180242e-05, "loss": 0.3667, "step": 5758 }, { "epoch": 0.10693479576567913, "grad_norm": 0.38987642526626587, "learning_rate": 1.944097779961443e-05, "loss": 0.3164, "step": 5760 }, { "epoch": 0.10697192590309777, "grad_norm": 0.3755171597003937, "learning_rate": 1.9440593183585188e-05, "loss": 0.3508, "step": 5762 }, { "epoch": 0.1070090560405164, "grad_norm": 0.44960302114486694, "learning_rate": 1.9440208439097752e-05, "loss": 0.4445, "step": 5764 }, { "epoch": 0.10704618617793504, "grad_norm": 0.3175716698169708, "learning_rate": 1.9439823566157353e-05, "loss": 0.3236, "step": 5766 }, { "epoch": 0.10708331631535369, "grad_norm": 0.30718982219696045, "learning_rate": 1.9439438564769236e-05, "loss": 0.2313, "step": 5768 }, { "epoch": 0.10712044645277233, "grad_norm": 0.8927876353263855, "learning_rate": 1.943905343493863e-05, "loss": 0.3348, "step": 5770 }, { "epoch": 0.10715757659019096, "grad_norm": 0.3095676004886627, "learning_rate": 1.9438668176670785e-05, "loss": 0.4022, "step": 5772 }, { "epoch": 0.1071947067276096, "grad_norm": 0.4106636345386505, "learning_rate": 1.9438282789970934e-05, "loss": 0.1842, "step": 5774 }, { "epoch": 0.10723183686502824, "grad_norm": 0.26190951466560364, "learning_rate": 1.9437897274844327e-05, "loss": 0.1287, "step": 5776 }, { "epoch": 0.10726896700244688, "grad_norm": 0.31700852513313293, "learning_rate": 1.9437511631296205e-05, "loss": 0.3923, "step": 5778 }, { "epoch": 0.10730609713986551, "grad_norm": 0.3483114242553711, "learning_rate": 1.9437125859331824e-05, "loss": 0.2641, "step": 5780 }, { "epoch": 0.10734322727728415, "grad_norm": 0.2810349762439728, "learning_rate": 1.943673995895642e-05, "loss": 0.2339, "step": 5782 }, { "epoch": 0.1073803574147028, "grad_norm": 0.4115602970123291, "learning_rate": 1.9436353930175256e-05, "loss": 0.3854, "step": 5784 }, { "epoch": 0.10741748755212142, "grad_norm": 0.4067830741405487, "learning_rate": 1.9435967772993582e-05, "loss": 0.314, "step": 5786 }, { "epoch": 0.10745461768954007, "grad_norm": 0.3596358895301819, "learning_rate": 1.9435581487416645e-05, "loss": 0.3275, "step": 5788 }, { "epoch": 0.10749174782695871, "grad_norm": 0.39568236470222473, "learning_rate": 1.943519507344971e-05, "loss": 0.2978, "step": 5790 }, { "epoch": 0.10752887796437735, "grad_norm": 0.3731325566768646, "learning_rate": 1.9434808531098034e-05, "loss": 0.4138, "step": 5792 }, { "epoch": 0.10756600810179598, "grad_norm": 0.38418689370155334, "learning_rate": 1.943442186036687e-05, "loss": 0.2724, "step": 5794 }, { "epoch": 0.10760313823921462, "grad_norm": 0.3116511404514313, "learning_rate": 1.9434035061261488e-05, "loss": 0.1247, "step": 5796 }, { "epoch": 0.10764026837663326, "grad_norm": 0.4002717137336731, "learning_rate": 1.943364813378714e-05, "loss": 0.4214, "step": 5798 }, { "epoch": 0.1076773985140519, "grad_norm": 0.3120017349720001, "learning_rate": 1.9433261077949107e-05, "loss": 0.3497, "step": 5800 }, { "epoch": 0.10771452865147053, "grad_norm": 0.2992229461669922, "learning_rate": 1.9432873893752643e-05, "loss": 0.2889, "step": 5802 }, { "epoch": 0.10775165878888918, "grad_norm": 0.3299771547317505, "learning_rate": 1.943248658120302e-05, "loss": 0.2584, "step": 5804 }, { "epoch": 0.10778878892630782, "grad_norm": 0.3887860178947449, "learning_rate": 1.9432099140305506e-05, "loss": 0.3226, "step": 5806 }, { "epoch": 0.10782591906372646, "grad_norm": 0.44004178047180176, "learning_rate": 1.9431711571065377e-05, "loss": 0.2345, "step": 5808 }, { "epoch": 0.10786304920114509, "grad_norm": 0.32659563422203064, "learning_rate": 1.9431323873487905e-05, "loss": 0.512, "step": 5810 }, { "epoch": 0.10790017933856373, "grad_norm": 0.44951125979423523, "learning_rate": 1.9430936047578365e-05, "loss": 0.3856, "step": 5812 }, { "epoch": 0.10793730947598237, "grad_norm": 0.28977170586586, "learning_rate": 1.9430548093342036e-05, "loss": 0.2726, "step": 5814 }, { "epoch": 0.10797443961340102, "grad_norm": 0.3876648247241974, "learning_rate": 1.943016001078419e-05, "loss": 0.378, "step": 5816 }, { "epoch": 0.10801156975081964, "grad_norm": 0.36916598677635193, "learning_rate": 1.9429771799910115e-05, "loss": 0.4125, "step": 5818 }, { "epoch": 0.10804869988823829, "grad_norm": 0.3590720593929291, "learning_rate": 1.9429383460725094e-05, "loss": 0.261, "step": 5820 }, { "epoch": 0.10808583002565693, "grad_norm": 0.30345419049263, "learning_rate": 1.942899499323441e-05, "loss": 0.3931, "step": 5822 }, { "epoch": 0.10812296016307556, "grad_norm": 0.33233898878097534, "learning_rate": 1.942860639744334e-05, "loss": 0.2892, "step": 5824 }, { "epoch": 0.1081600903004942, "grad_norm": 0.3375394344329834, "learning_rate": 1.9428217673357184e-05, "loss": 0.2629, "step": 5826 }, { "epoch": 0.10819722043791284, "grad_norm": 0.2185717672109604, "learning_rate": 1.9427828820981226e-05, "loss": 0.1229, "step": 5828 }, { "epoch": 0.10823435057533148, "grad_norm": 0.27578356862068176, "learning_rate": 1.9427439840320756e-05, "loss": 0.4499, "step": 5830 }, { "epoch": 0.10827148071275011, "grad_norm": 0.27450743317604065, "learning_rate": 1.942705073138107e-05, "loss": 0.4816, "step": 5832 }, { "epoch": 0.10830861085016875, "grad_norm": 0.5252142548561096, "learning_rate": 1.942666149416746e-05, "loss": 0.3833, "step": 5834 }, { "epoch": 0.1083457409875874, "grad_norm": 0.32267916202545166, "learning_rate": 1.942627212868522e-05, "loss": 0.2481, "step": 5836 }, { "epoch": 0.10838287112500604, "grad_norm": 0.38306835293769836, "learning_rate": 1.9425882634939655e-05, "loss": 0.3806, "step": 5838 }, { "epoch": 0.10842000126242467, "grad_norm": 0.33959731459617615, "learning_rate": 1.942549301293606e-05, "loss": 0.2882, "step": 5840 }, { "epoch": 0.10845713139984331, "grad_norm": 0.3266756534576416, "learning_rate": 1.9425103262679736e-05, "loss": 0.3852, "step": 5842 }, { "epoch": 0.10849426153726195, "grad_norm": 0.35104936361312866, "learning_rate": 1.9424713384175994e-05, "loss": 0.223, "step": 5844 }, { "epoch": 0.1085313916746806, "grad_norm": 0.32879096269607544, "learning_rate": 1.9424323377430125e-05, "loss": 0.2237, "step": 5846 }, { "epoch": 0.10856852181209922, "grad_norm": 0.4024861454963684, "learning_rate": 1.942393324244745e-05, "loss": 0.1185, "step": 5848 }, { "epoch": 0.10860565194951786, "grad_norm": 0.3981323838233948, "learning_rate": 1.942354297923327e-05, "loss": 0.6142, "step": 5850 }, { "epoch": 0.1086427820869365, "grad_norm": 0.41584548354148865, "learning_rate": 1.9423152587792896e-05, "loss": 0.4369, "step": 5852 }, { "epoch": 0.10867991222435515, "grad_norm": 0.39271169900894165, "learning_rate": 1.9422762068131643e-05, "loss": 0.5093, "step": 5854 }, { "epoch": 0.10871704236177378, "grad_norm": 0.3449486494064331, "learning_rate": 1.9422371420254817e-05, "loss": 0.4341, "step": 5856 }, { "epoch": 0.10875417249919242, "grad_norm": 0.4763622581958771, "learning_rate": 1.9421980644167747e-05, "loss": 0.4724, "step": 5858 }, { "epoch": 0.10879130263661106, "grad_norm": 0.32343971729278564, "learning_rate": 1.9421589739875738e-05, "loss": 0.5369, "step": 5860 }, { "epoch": 0.10882843277402969, "grad_norm": 0.33949631452560425, "learning_rate": 1.9421198707384114e-05, "loss": 0.3341, "step": 5862 }, { "epoch": 0.10886556291144833, "grad_norm": 0.23112685978412628, "learning_rate": 1.9420807546698196e-05, "loss": 0.1913, "step": 5864 }, { "epoch": 0.10890269304886697, "grad_norm": 0.40140068531036377, "learning_rate": 1.9420416257823302e-05, "loss": 0.4568, "step": 5866 }, { "epoch": 0.10893982318628562, "grad_norm": 0.3172307014465332, "learning_rate": 1.9420024840764767e-05, "loss": 0.4682, "step": 5868 }, { "epoch": 0.10897695332370425, "grad_norm": 0.2731354832649231, "learning_rate": 1.9419633295527905e-05, "loss": 0.4819, "step": 5870 }, { "epoch": 0.10901408346112289, "grad_norm": 0.34615558385849, "learning_rate": 1.9419241622118053e-05, "loss": 0.2828, "step": 5872 }, { "epoch": 0.10905121359854153, "grad_norm": 0.5087578296661377, "learning_rate": 1.9418849820540533e-05, "loss": 0.2022, "step": 5874 }, { "epoch": 0.10908834373596017, "grad_norm": 0.43714675307273865, "learning_rate": 1.941845789080068e-05, "loss": 0.2583, "step": 5876 }, { "epoch": 0.1091254738733788, "grad_norm": 0.4163441061973572, "learning_rate": 1.941806583290383e-05, "loss": 0.3424, "step": 5878 }, { "epoch": 0.10916260401079744, "grad_norm": 0.2833601236343384, "learning_rate": 1.941767364685531e-05, "loss": 0.3319, "step": 5880 }, { "epoch": 0.10919973414821608, "grad_norm": 0.4008776843547821, "learning_rate": 1.9417281332660463e-05, "loss": 0.4196, "step": 5882 }, { "epoch": 0.10923686428563473, "grad_norm": 0.3623258173465729, "learning_rate": 1.9416888890324626e-05, "loss": 0.3032, "step": 5884 }, { "epoch": 0.10927399442305336, "grad_norm": 0.3638496994972229, "learning_rate": 1.9416496319853134e-05, "loss": 0.3202, "step": 5886 }, { "epoch": 0.109311124560472, "grad_norm": 0.5115017890930176, "learning_rate": 1.9416103621251338e-05, "loss": 0.4579, "step": 5888 }, { "epoch": 0.10934825469789064, "grad_norm": 0.3284580111503601, "learning_rate": 1.9415710794524574e-05, "loss": 0.4716, "step": 5890 }, { "epoch": 0.10938538483530928, "grad_norm": 0.4640825688838959, "learning_rate": 1.9415317839678187e-05, "loss": 0.419, "step": 5892 }, { "epoch": 0.10942251497272791, "grad_norm": 0.2682100236415863, "learning_rate": 1.941492475671753e-05, "loss": 0.33, "step": 5894 }, { "epoch": 0.10945964511014655, "grad_norm": 0.2789626121520996, "learning_rate": 1.9414531545647945e-05, "loss": 0.2711, "step": 5896 }, { "epoch": 0.1094967752475652, "grad_norm": 0.34911608695983887, "learning_rate": 1.941413820647479e-05, "loss": 0.4164, "step": 5898 }, { "epoch": 0.10953390538498382, "grad_norm": 0.31880897283554077, "learning_rate": 1.9413744739203407e-05, "loss": 0.4297, "step": 5900 }, { "epoch": 0.10957103552240247, "grad_norm": 0.3555947542190552, "learning_rate": 1.941335114383916e-05, "loss": 0.3629, "step": 5902 }, { "epoch": 0.10960816565982111, "grad_norm": 0.29058215022087097, "learning_rate": 1.9412957420387396e-05, "loss": 0.4357, "step": 5904 }, { "epoch": 0.10964529579723975, "grad_norm": 0.4944290816783905, "learning_rate": 1.941256356885348e-05, "loss": 0.3046, "step": 5906 }, { "epoch": 0.10968242593465838, "grad_norm": 0.4301474392414093, "learning_rate": 1.9412169589242768e-05, "loss": 0.69, "step": 5908 }, { "epoch": 0.10971955607207702, "grad_norm": 0.31939250230789185, "learning_rate": 1.9411775481560617e-05, "loss": 0.3955, "step": 5910 }, { "epoch": 0.10975668620949566, "grad_norm": 0.559546947479248, "learning_rate": 1.9411381245812397e-05, "loss": 0.2494, "step": 5912 }, { "epoch": 0.1097938163469143, "grad_norm": 0.2580447196960449, "learning_rate": 1.9410986882003468e-05, "loss": 0.488, "step": 5914 }, { "epoch": 0.10983094648433293, "grad_norm": 0.26733484864234924, "learning_rate": 1.9410592390139196e-05, "loss": 0.2879, "step": 5916 }, { "epoch": 0.10986807662175158, "grad_norm": 0.31146278977394104, "learning_rate": 1.941019777022495e-05, "loss": 0.5469, "step": 5918 }, { "epoch": 0.10990520675917022, "grad_norm": 0.34816449880599976, "learning_rate": 1.9409803022266097e-05, "loss": 0.348, "step": 5920 }, { "epoch": 0.10994233689658886, "grad_norm": 0.3630014657974243, "learning_rate": 1.940940814626801e-05, "loss": 0.3334, "step": 5922 }, { "epoch": 0.10997946703400749, "grad_norm": 0.2611609697341919, "learning_rate": 1.9409013142236065e-05, "loss": 0.3642, "step": 5924 }, { "epoch": 0.11001659717142613, "grad_norm": 0.31617316603660583, "learning_rate": 1.9408618010175636e-05, "loss": 0.2407, "step": 5926 }, { "epoch": 0.11005372730884477, "grad_norm": 0.36851227283477783, "learning_rate": 1.9408222750092095e-05, "loss": 0.413, "step": 5928 }, { "epoch": 0.11009085744626342, "grad_norm": 0.19425345957279205, "learning_rate": 1.9407827361990824e-05, "loss": 0.4462, "step": 5930 }, { "epoch": 0.11012798758368204, "grad_norm": 0.247972771525383, "learning_rate": 1.9407431845877203e-05, "loss": 0.3072, "step": 5932 }, { "epoch": 0.11016511772110069, "grad_norm": 0.37046679854393005, "learning_rate": 1.940703620175661e-05, "loss": 0.2117, "step": 5934 }, { "epoch": 0.11020224785851933, "grad_norm": 0.3156874179840088, "learning_rate": 1.9406640429634434e-05, "loss": 0.4296, "step": 5936 }, { "epoch": 0.11023937799593796, "grad_norm": 0.3625591993331909, "learning_rate": 1.9406244529516058e-05, "loss": 0.2992, "step": 5938 }, { "epoch": 0.1102765081333566, "grad_norm": 0.3214312791824341, "learning_rate": 1.9405848501406873e-05, "loss": 0.325, "step": 5940 }, { "epoch": 0.11031363827077524, "grad_norm": 0.47295668721199036, "learning_rate": 1.940545234531226e-05, "loss": 0.317, "step": 5942 }, { "epoch": 0.11035076840819388, "grad_norm": 0.42984381318092346, "learning_rate": 1.940505606123761e-05, "loss": 0.4246, "step": 5944 }, { "epoch": 0.11038789854561251, "grad_norm": 0.25924059748649597, "learning_rate": 1.9404659649188323e-05, "loss": 0.2936, "step": 5946 }, { "epoch": 0.11042502868303115, "grad_norm": 0.3624178469181061, "learning_rate": 1.9404263109169786e-05, "loss": 0.2656, "step": 5948 }, { "epoch": 0.1104621588204498, "grad_norm": 0.2970726191997528, "learning_rate": 1.9403866441187396e-05, "loss": 0.3149, "step": 5950 }, { "epoch": 0.11049928895786844, "grad_norm": 0.8646212220191956, "learning_rate": 1.940346964524655e-05, "loss": 0.5251, "step": 5952 }, { "epoch": 0.11053641909528707, "grad_norm": 0.2533368170261383, "learning_rate": 1.940307272135266e-05, "loss": 0.3765, "step": 5954 }, { "epoch": 0.11057354923270571, "grad_norm": 0.42803817987442017, "learning_rate": 1.9402675669511106e-05, "loss": 0.3591, "step": 5956 }, { "epoch": 0.11061067937012435, "grad_norm": 0.45353007316589355, "learning_rate": 1.9402278489727303e-05, "loss": 0.2383, "step": 5958 }, { "epoch": 0.110647809507543, "grad_norm": 0.36995524168014526, "learning_rate": 1.940188118200665e-05, "loss": 0.3012, "step": 5960 }, { "epoch": 0.11068493964496162, "grad_norm": 0.29975393414497375, "learning_rate": 1.9401483746354558e-05, "loss": 0.463, "step": 5962 }, { "epoch": 0.11072206978238026, "grad_norm": 0.37595194578170776, "learning_rate": 1.9401086182776433e-05, "loss": 0.3645, "step": 5964 }, { "epoch": 0.1107591999197989, "grad_norm": 0.31594109535217285, "learning_rate": 1.9400688491277682e-05, "loss": 0.4021, "step": 5966 }, { "epoch": 0.11079633005721755, "grad_norm": 0.3603358566761017, "learning_rate": 1.9400290671863726e-05, "loss": 0.3044, "step": 5968 }, { "epoch": 0.11083346019463618, "grad_norm": 0.36992278695106506, "learning_rate": 1.9399892724539967e-05, "loss": 0.3763, "step": 5970 }, { "epoch": 0.11087059033205482, "grad_norm": 0.4233797788619995, "learning_rate": 1.939949464931182e-05, "loss": 0.3312, "step": 5972 }, { "epoch": 0.11090772046947346, "grad_norm": 0.23897391557693481, "learning_rate": 1.939909644618471e-05, "loss": 0.1467, "step": 5974 }, { "epoch": 0.11094485060689209, "grad_norm": 0.31221428513526917, "learning_rate": 1.9398698115164052e-05, "loss": 0.2822, "step": 5976 }, { "epoch": 0.11098198074431073, "grad_norm": 0.36981192231178284, "learning_rate": 1.939829965625526e-05, "loss": 0.3683, "step": 5978 }, { "epoch": 0.11101911088172937, "grad_norm": 0.27857425808906555, "learning_rate": 1.9397901069463768e-05, "loss": 0.4174, "step": 5980 }, { "epoch": 0.11105624101914802, "grad_norm": 0.21214932203292847, "learning_rate": 1.9397502354794987e-05, "loss": 0.1752, "step": 5982 }, { "epoch": 0.11109337115656664, "grad_norm": 0.5696078538894653, "learning_rate": 1.9397103512254348e-05, "loss": 0.5898, "step": 5984 }, { "epoch": 0.11113050129398529, "grad_norm": 0.27658164501190186, "learning_rate": 1.939670454184728e-05, "loss": 0.2478, "step": 5986 }, { "epoch": 0.11116763143140393, "grad_norm": 0.29363882541656494, "learning_rate": 1.9396305443579208e-05, "loss": 0.4665, "step": 5988 }, { "epoch": 0.11120476156882257, "grad_norm": 0.36517244577407837, "learning_rate": 1.9395906217455565e-05, "loss": 0.3164, "step": 5990 }, { "epoch": 0.1112418917062412, "grad_norm": 0.5709163546562195, "learning_rate": 1.9395506863481782e-05, "loss": 0.5044, "step": 5992 }, { "epoch": 0.11127902184365984, "grad_norm": 0.3584378957748413, "learning_rate": 1.9395107381663294e-05, "loss": 0.3409, "step": 5994 }, { "epoch": 0.11131615198107848, "grad_norm": 0.241102933883667, "learning_rate": 1.9394707772005535e-05, "loss": 0.3295, "step": 5996 }, { "epoch": 0.11135328211849713, "grad_norm": 0.27340802550315857, "learning_rate": 1.9394308034513943e-05, "loss": 0.257, "step": 5998 }, { "epoch": 0.11139041225591576, "grad_norm": 0.40493902564048767, "learning_rate": 1.9393908169193956e-05, "loss": 0.3485, "step": 6000 }, { "epoch": 0.1114275423933344, "grad_norm": 0.27664878964424133, "learning_rate": 1.9393508176051023e-05, "loss": 0.136, "step": 6002 }, { "epoch": 0.11146467253075304, "grad_norm": 0.3698323667049408, "learning_rate": 1.9393108055090577e-05, "loss": 0.3324, "step": 6004 }, { "epoch": 0.11150180266817168, "grad_norm": 0.3152863085269928, "learning_rate": 1.9392707806318066e-05, "loss": 0.2976, "step": 6006 }, { "epoch": 0.11153893280559031, "grad_norm": 0.2979016602039337, "learning_rate": 1.9392307429738937e-05, "loss": 0.3829, "step": 6008 }, { "epoch": 0.11157606294300895, "grad_norm": 0.26415732502937317, "learning_rate": 1.9391906925358636e-05, "loss": 0.2993, "step": 6010 }, { "epoch": 0.1116131930804276, "grad_norm": 0.3328465521335602, "learning_rate": 1.9391506293182615e-05, "loss": 0.3772, "step": 6012 }, { "epoch": 0.11165032321784622, "grad_norm": 0.2960035800933838, "learning_rate": 1.9391105533216322e-05, "loss": 0.4613, "step": 6014 }, { "epoch": 0.11168745335526487, "grad_norm": 0.3554299473762512, "learning_rate": 1.9390704645465216e-05, "loss": 0.3894, "step": 6016 }, { "epoch": 0.11172458349268351, "grad_norm": 0.4388827383518219, "learning_rate": 1.9390303629934745e-05, "loss": 0.3903, "step": 6018 }, { "epoch": 0.11176171363010215, "grad_norm": 0.40241241455078125, "learning_rate": 1.938990248663037e-05, "loss": 0.4127, "step": 6020 }, { "epoch": 0.11179884376752078, "grad_norm": 0.40406933426856995, "learning_rate": 1.938950121555755e-05, "loss": 0.353, "step": 6022 }, { "epoch": 0.11183597390493942, "grad_norm": 0.42639797925949097, "learning_rate": 1.9389099816721742e-05, "loss": 0.232, "step": 6024 }, { "epoch": 0.11187310404235806, "grad_norm": 0.5960007309913635, "learning_rate": 1.9388698290128406e-05, "loss": 0.3347, "step": 6026 }, { "epoch": 0.1119102341797767, "grad_norm": 0.451932817697525, "learning_rate": 1.938829663578301e-05, "loss": 0.3026, "step": 6028 }, { "epoch": 0.11194736431719533, "grad_norm": 0.46730172634124756, "learning_rate": 1.938789485369102e-05, "loss": 0.2846, "step": 6030 }, { "epoch": 0.11198449445461398, "grad_norm": 0.3319157361984253, "learning_rate": 1.9387492943857902e-05, "loss": 0.2332, "step": 6032 }, { "epoch": 0.11202162459203262, "grad_norm": 0.4053408205509186, "learning_rate": 1.9387090906289118e-05, "loss": 0.2424, "step": 6034 }, { "epoch": 0.11205875472945126, "grad_norm": 0.31995534896850586, "learning_rate": 1.938668874099015e-05, "loss": 0.3918, "step": 6036 }, { "epoch": 0.11209588486686989, "grad_norm": 0.8785678744316101, "learning_rate": 1.9386286447966465e-05, "loss": 0.2888, "step": 6038 }, { "epoch": 0.11213301500428853, "grad_norm": 0.3702390491962433, "learning_rate": 1.938588402722353e-05, "loss": 0.1873, "step": 6040 }, { "epoch": 0.11217014514170717, "grad_norm": 3.35353422164917, "learning_rate": 1.9385481478766834e-05, "loss": 0.3397, "step": 6042 }, { "epoch": 0.11220727527912581, "grad_norm": 0.3046782612800598, "learning_rate": 1.9385078802601846e-05, "loss": 0.3496, "step": 6044 }, { "epoch": 0.11224440541654444, "grad_norm": 0.5030434727668762, "learning_rate": 1.9384675998734047e-05, "loss": 0.3792, "step": 6046 }, { "epoch": 0.11228153555396309, "grad_norm": 0.3410714566707611, "learning_rate": 1.938427306716892e-05, "loss": 0.3238, "step": 6048 }, { "epoch": 0.11231866569138173, "grad_norm": 0.4026999771595001, "learning_rate": 1.9383870007911942e-05, "loss": 0.4002, "step": 6050 }, { "epoch": 0.11235579582880036, "grad_norm": 0.32048147916793823, "learning_rate": 1.93834668209686e-05, "loss": 0.4739, "step": 6052 }, { "epoch": 0.112392925966219, "grad_norm": 0.27759453654289246, "learning_rate": 1.9383063506344386e-05, "loss": 0.2648, "step": 6054 }, { "epoch": 0.11243005610363764, "grad_norm": 0.6055189371109009, "learning_rate": 1.9382660064044782e-05, "loss": 0.3572, "step": 6056 }, { "epoch": 0.11246718624105628, "grad_norm": 0.40630897879600525, "learning_rate": 1.938225649407528e-05, "loss": 0.4055, "step": 6058 }, { "epoch": 0.11250431637847491, "grad_norm": 0.3931969404220581, "learning_rate": 1.9381852796441367e-05, "loss": 0.2969, "step": 6060 }, { "epoch": 0.11254144651589355, "grad_norm": 0.40135693550109863, "learning_rate": 1.938144897114854e-05, "loss": 0.5507, "step": 6062 }, { "epoch": 0.1125785766533122, "grad_norm": 0.2813001871109009, "learning_rate": 1.9381045018202294e-05, "loss": 0.2489, "step": 6064 }, { "epoch": 0.11261570679073084, "grad_norm": 0.3997330963611603, "learning_rate": 1.9380640937608123e-05, "loss": 0.3273, "step": 6066 }, { "epoch": 0.11265283692814947, "grad_norm": 0.32363638281822205, "learning_rate": 1.9380236729371532e-05, "loss": 0.2368, "step": 6068 }, { "epoch": 0.11268996706556811, "grad_norm": 0.5048516392707825, "learning_rate": 1.937983239349801e-05, "loss": 0.4573, "step": 6070 }, { "epoch": 0.11272709720298675, "grad_norm": 0.38184475898742676, "learning_rate": 1.937942792999307e-05, "loss": 0.1789, "step": 6072 }, { "epoch": 0.11276422734040539, "grad_norm": 0.24894502758979797, "learning_rate": 1.937902333886221e-05, "loss": 0.6135, "step": 6074 }, { "epoch": 0.11280135747782402, "grad_norm": 0.29433685541152954, "learning_rate": 1.937861862011093e-05, "loss": 0.2062, "step": 6076 }, { "epoch": 0.11283848761524266, "grad_norm": 0.34825021028518677, "learning_rate": 1.937821377374475e-05, "loss": 0.4458, "step": 6078 }, { "epoch": 0.1128756177526613, "grad_norm": 0.39847344160079956, "learning_rate": 1.9377808799769167e-05, "loss": 0.2596, "step": 6080 }, { "epoch": 0.11291274789007995, "grad_norm": 0.36373013257980347, "learning_rate": 1.9377403698189695e-05, "loss": 0.4199, "step": 6082 }, { "epoch": 0.11294987802749858, "grad_norm": 0.3375605046749115, "learning_rate": 1.9376998469011852e-05, "loss": 0.4184, "step": 6084 }, { "epoch": 0.11298700816491722, "grad_norm": 0.4313795268535614, "learning_rate": 1.9376593112241143e-05, "loss": 0.211, "step": 6086 }, { "epoch": 0.11302413830233586, "grad_norm": 0.4363456666469574, "learning_rate": 1.937618762788309e-05, "loss": 0.3217, "step": 6088 }, { "epoch": 0.11306126843975449, "grad_norm": 0.24281597137451172, "learning_rate": 1.937578201594321e-05, "loss": 0.3087, "step": 6090 }, { "epoch": 0.11309839857717313, "grad_norm": 0.29096898436546326, "learning_rate": 1.937537627642702e-05, "loss": 0.3114, "step": 6092 }, { "epoch": 0.11313552871459177, "grad_norm": 0.29845109581947327, "learning_rate": 1.937497040934004e-05, "loss": 0.3343, "step": 6094 }, { "epoch": 0.11317265885201042, "grad_norm": 0.3539322316646576, "learning_rate": 1.9374564414687792e-05, "loss": 0.2617, "step": 6096 }, { "epoch": 0.11320978898942904, "grad_norm": 0.3430907428264618, "learning_rate": 1.9374158292475805e-05, "loss": 0.2286, "step": 6098 }, { "epoch": 0.11324691912684769, "grad_norm": 0.41140106320381165, "learning_rate": 1.9373752042709603e-05, "loss": 0.4725, "step": 6100 }, { "epoch": 0.11328404926426633, "grad_norm": 0.3160984218120575, "learning_rate": 1.937334566539471e-05, "loss": 0.2723, "step": 6102 }, { "epoch": 0.11332117940168497, "grad_norm": 0.3963102400302887, "learning_rate": 1.9372939160536663e-05, "loss": 0.3138, "step": 6104 }, { "epoch": 0.1133583095391036, "grad_norm": 0.3327803909778595, "learning_rate": 1.937253252814099e-05, "loss": 0.4306, "step": 6106 }, { "epoch": 0.11339543967652224, "grad_norm": 0.3599388897418976, "learning_rate": 1.9372125768213217e-05, "loss": 0.4068, "step": 6108 }, { "epoch": 0.11343256981394088, "grad_norm": 0.513262152671814, "learning_rate": 1.937171888075889e-05, "loss": 0.2592, "step": 6110 }, { "epoch": 0.11346969995135953, "grad_norm": 0.43570563197135925, "learning_rate": 1.9371311865783538e-05, "loss": 0.2968, "step": 6112 }, { "epoch": 0.11350683008877815, "grad_norm": 0.2661902606487274, "learning_rate": 1.9370904723292705e-05, "loss": 0.4131, "step": 6114 }, { "epoch": 0.1135439602261968, "grad_norm": 0.23212552070617676, "learning_rate": 1.9370497453291923e-05, "loss": 0.3206, "step": 6116 }, { "epoch": 0.11358109036361544, "grad_norm": 0.394327312707901, "learning_rate": 1.9370090055786744e-05, "loss": 0.4353, "step": 6118 }, { "epoch": 0.11361822050103408, "grad_norm": 0.3261694014072418, "learning_rate": 1.9369682530782703e-05, "loss": 0.393, "step": 6120 }, { "epoch": 0.11365535063845271, "grad_norm": 0.4460222125053406, "learning_rate": 1.936927487828535e-05, "loss": 0.3759, "step": 6122 }, { "epoch": 0.11369248077587135, "grad_norm": 0.28853529691696167, "learning_rate": 1.9368867098300227e-05, "loss": 0.255, "step": 6124 }, { "epoch": 0.11372961091329, "grad_norm": 0.2908032238483429, "learning_rate": 1.9368459190832888e-05, "loss": 0.3659, "step": 6126 }, { "epoch": 0.11376674105070862, "grad_norm": 0.37115544080734253, "learning_rate": 1.936805115588888e-05, "loss": 0.2839, "step": 6128 }, { "epoch": 0.11380387118812726, "grad_norm": 0.35863929986953735, "learning_rate": 1.9367642993473755e-05, "loss": 0.2969, "step": 6130 }, { "epoch": 0.1138410013255459, "grad_norm": 0.3698493540287018, "learning_rate": 1.9367234703593074e-05, "loss": 0.2526, "step": 6132 }, { "epoch": 0.11387813146296455, "grad_norm": 0.35082119703292847, "learning_rate": 1.9366826286252382e-05, "loss": 0.3339, "step": 6134 }, { "epoch": 0.11391526160038318, "grad_norm": 0.44319337606430054, "learning_rate": 1.9366417741457244e-05, "loss": 0.3098, "step": 6136 }, { "epoch": 0.11395239173780182, "grad_norm": 0.38933265209198, "learning_rate": 1.9366009069213212e-05, "loss": 0.4016, "step": 6138 }, { "epoch": 0.11398952187522046, "grad_norm": 0.31285473704338074, "learning_rate": 1.9365600269525856e-05, "loss": 0.2897, "step": 6140 }, { "epoch": 0.1140266520126391, "grad_norm": 0.2849639058113098, "learning_rate": 1.936519134240073e-05, "loss": 0.3427, "step": 6142 }, { "epoch": 0.11406378215005773, "grad_norm": 0.3146854639053345, "learning_rate": 1.9364782287843404e-05, "loss": 0.2635, "step": 6144 }, { "epoch": 0.11410091228747637, "grad_norm": 0.30868664383888245, "learning_rate": 1.936437310585944e-05, "loss": 0.3033, "step": 6146 }, { "epoch": 0.11413804242489502, "grad_norm": 0.3137667775154114, "learning_rate": 1.936396379645441e-05, "loss": 0.2959, "step": 6148 }, { "epoch": 0.11417517256231366, "grad_norm": 0.3515711724758148, "learning_rate": 1.9363554359633875e-05, "loss": 0.2523, "step": 6150 }, { "epoch": 0.11421230269973229, "grad_norm": 0.42071786522865295, "learning_rate": 1.936314479540342e-05, "loss": 0.4516, "step": 6152 }, { "epoch": 0.11424943283715093, "grad_norm": 0.43328598141670227, "learning_rate": 1.9362735103768606e-05, "loss": 0.3506, "step": 6154 }, { "epoch": 0.11428656297456957, "grad_norm": 0.35118716955184937, "learning_rate": 1.9362325284735012e-05, "loss": 0.314, "step": 6156 }, { "epoch": 0.11432369311198821, "grad_norm": 0.33372414112091064, "learning_rate": 1.9361915338308215e-05, "loss": 0.4007, "step": 6158 }, { "epoch": 0.11436082324940684, "grad_norm": 0.3123158812522888, "learning_rate": 1.9361505264493794e-05, "loss": 0.2068, "step": 6160 }, { "epoch": 0.11439795338682548, "grad_norm": 0.4641788899898529, "learning_rate": 1.9361095063297325e-05, "loss": 0.408, "step": 6162 }, { "epoch": 0.11443508352424413, "grad_norm": 0.32063692808151245, "learning_rate": 1.9360684734724393e-05, "loss": 0.3507, "step": 6164 }, { "epoch": 0.11447221366166276, "grad_norm": 0.3549119234085083, "learning_rate": 1.936027427878058e-05, "loss": 0.2362, "step": 6166 }, { "epoch": 0.1145093437990814, "grad_norm": 0.2539325952529907, "learning_rate": 1.935986369547147e-05, "loss": 0.3978, "step": 6168 }, { "epoch": 0.11454647393650004, "grad_norm": 0.2352052628993988, "learning_rate": 1.9359452984802655e-05, "loss": 0.3838, "step": 6170 }, { "epoch": 0.11458360407391868, "grad_norm": 0.42627835273742676, "learning_rate": 1.9359042146779714e-05, "loss": 0.4012, "step": 6172 }, { "epoch": 0.11462073421133731, "grad_norm": 0.3156546354293823, "learning_rate": 1.9358631181408246e-05, "loss": 0.4486, "step": 6174 }, { "epoch": 0.11465786434875595, "grad_norm": 0.3225153386592865, "learning_rate": 1.9358220088693838e-05, "loss": 0.3925, "step": 6176 }, { "epoch": 0.1146949944861746, "grad_norm": 0.255907267332077, "learning_rate": 1.935780886864209e-05, "loss": 0.2269, "step": 6178 }, { "epoch": 0.11473212462359324, "grad_norm": 0.3502480089664459, "learning_rate": 1.935739752125859e-05, "loss": 0.564, "step": 6180 }, { "epoch": 0.11476925476101187, "grad_norm": 0.4209260642528534, "learning_rate": 1.9356986046548942e-05, "loss": 0.3796, "step": 6182 }, { "epoch": 0.11480638489843051, "grad_norm": 0.4643157422542572, "learning_rate": 1.9356574444518736e-05, "loss": 0.2852, "step": 6184 }, { "epoch": 0.11484351503584915, "grad_norm": 0.2948068082332611, "learning_rate": 1.9356162715173582e-05, "loss": 0.2803, "step": 6186 }, { "epoch": 0.11488064517326779, "grad_norm": 0.3148733675479889, "learning_rate": 1.935575085851908e-05, "loss": 0.3623, "step": 6188 }, { "epoch": 0.11491777531068642, "grad_norm": 0.24655990302562714, "learning_rate": 1.935533887456083e-05, "loss": 0.4077, "step": 6190 }, { "epoch": 0.11495490544810506, "grad_norm": 0.31325700879096985, "learning_rate": 1.935492676330444e-05, "loss": 0.3261, "step": 6192 }, { "epoch": 0.1149920355855237, "grad_norm": 0.28601422905921936, "learning_rate": 1.9354514524755516e-05, "loss": 0.3074, "step": 6194 }, { "epoch": 0.11502916572294235, "grad_norm": 0.3692812919616699, "learning_rate": 1.935410215891967e-05, "loss": 0.2409, "step": 6196 }, { "epoch": 0.11506629586036098, "grad_norm": 0.47791656851768494, "learning_rate": 1.9353689665802514e-05, "loss": 0.3236, "step": 6198 }, { "epoch": 0.11510342599777962, "grad_norm": 0.37320658564567566, "learning_rate": 1.9353277045409657e-05, "loss": 0.3115, "step": 6200 }, { "epoch": 0.11514055613519826, "grad_norm": 0.4265114367008209, "learning_rate": 1.935286429774672e-05, "loss": 0.5481, "step": 6202 }, { "epoch": 0.11517768627261689, "grad_norm": 0.4482223689556122, "learning_rate": 1.9352451422819312e-05, "loss": 0.3222, "step": 6204 }, { "epoch": 0.11521481641003553, "grad_norm": 0.44008082151412964, "learning_rate": 1.9352038420633056e-05, "loss": 0.3901, "step": 6206 }, { "epoch": 0.11525194654745417, "grad_norm": 0.3360796868801117, "learning_rate": 1.9351625291193564e-05, "loss": 0.418, "step": 6208 }, { "epoch": 0.11528907668487282, "grad_norm": 0.33378854393959045, "learning_rate": 1.935121203450647e-05, "loss": 0.4117, "step": 6210 }, { "epoch": 0.11532620682229144, "grad_norm": 0.4222334027290344, "learning_rate": 1.935079865057739e-05, "loss": 0.5197, "step": 6212 }, { "epoch": 0.11536333695971009, "grad_norm": 0.5520004630088806, "learning_rate": 1.9350385139411946e-05, "loss": 0.3127, "step": 6214 }, { "epoch": 0.11540046709712873, "grad_norm": 0.32125914096832275, "learning_rate": 1.9349971501015767e-05, "loss": 0.4435, "step": 6216 }, { "epoch": 0.11543759723454737, "grad_norm": 0.3034518361091614, "learning_rate": 1.9349557735394483e-05, "loss": 0.1763, "step": 6218 }, { "epoch": 0.115474727371966, "grad_norm": 0.29721304774284363, "learning_rate": 1.9349143842553727e-05, "loss": 0.2028, "step": 6220 }, { "epoch": 0.11551185750938464, "grad_norm": 0.2339998483657837, "learning_rate": 1.9348729822499125e-05, "loss": 0.274, "step": 6222 }, { "epoch": 0.11554898764680328, "grad_norm": 0.32831862568855286, "learning_rate": 1.9348315675236312e-05, "loss": 0.37, "step": 6224 }, { "epoch": 0.11558611778422193, "grad_norm": 0.3146616816520691, "learning_rate": 1.9347901400770923e-05, "loss": 0.2613, "step": 6226 }, { "epoch": 0.11562324792164055, "grad_norm": 0.3267780840396881, "learning_rate": 1.93474869991086e-05, "loss": 0.3033, "step": 6228 }, { "epoch": 0.1156603780590592, "grad_norm": 0.3073938488960266, "learning_rate": 1.9347072470254976e-05, "loss": 0.3981, "step": 6230 }, { "epoch": 0.11569750819647784, "grad_norm": 0.5259581804275513, "learning_rate": 1.9346657814215694e-05, "loss": 0.3399, "step": 6232 }, { "epoch": 0.11573463833389647, "grad_norm": 0.3002197742462158, "learning_rate": 1.9346243030996394e-05, "loss": 0.4149, "step": 6234 }, { "epoch": 0.11577176847131511, "grad_norm": 0.3518536388874054, "learning_rate": 1.934582812060272e-05, "loss": 0.5392, "step": 6236 }, { "epoch": 0.11580889860873375, "grad_norm": 0.4209156036376953, "learning_rate": 1.934541308304032e-05, "loss": 0.311, "step": 6238 }, { "epoch": 0.1158460287461524, "grad_norm": 0.2754109501838684, "learning_rate": 1.9344997918314846e-05, "loss": 0.341, "step": 6240 }, { "epoch": 0.11588315888357102, "grad_norm": 0.30275246500968933, "learning_rate": 1.934458262643194e-05, "loss": 0.2383, "step": 6242 }, { "epoch": 0.11592028902098966, "grad_norm": 0.5595771670341492, "learning_rate": 1.934416720739725e-05, "loss": 0.2676, "step": 6244 }, { "epoch": 0.1159574191584083, "grad_norm": 0.2599327862262726, "learning_rate": 1.9343751661216435e-05, "loss": 0.2845, "step": 6246 }, { "epoch": 0.11599454929582695, "grad_norm": 0.34935829043388367, "learning_rate": 1.934333598789515e-05, "loss": 0.4269, "step": 6248 }, { "epoch": 0.11603167943324558, "grad_norm": 0.35757824778556824, "learning_rate": 1.9342920187439046e-05, "loss": 0.2328, "step": 6250 }, { "epoch": 0.11606880957066422, "grad_norm": 0.34092605113983154, "learning_rate": 1.9342504259853788e-05, "loss": 0.4343, "step": 6252 }, { "epoch": 0.11610593970808286, "grad_norm": 0.3107389509677887, "learning_rate": 1.9342088205145027e-05, "loss": 0.3598, "step": 6254 }, { "epoch": 0.1161430698455015, "grad_norm": 0.387880802154541, "learning_rate": 1.934167202331843e-05, "loss": 0.3883, "step": 6256 }, { "epoch": 0.11618019998292013, "grad_norm": 0.40455198287963867, "learning_rate": 1.9341255714379657e-05, "loss": 0.4243, "step": 6258 }, { "epoch": 0.11621733012033877, "grad_norm": 0.30802977085113525, "learning_rate": 1.9340839278334376e-05, "loss": 0.2851, "step": 6260 }, { "epoch": 0.11625446025775742, "grad_norm": 0.30081719160079956, "learning_rate": 1.934042271518825e-05, "loss": 0.3266, "step": 6262 }, { "epoch": 0.11629159039517606, "grad_norm": 0.4688226878643036, "learning_rate": 1.9340006024946947e-05, "loss": 0.4721, "step": 6264 }, { "epoch": 0.11632872053259469, "grad_norm": 0.34842449426651, "learning_rate": 1.9339589207616144e-05, "loss": 0.4076, "step": 6266 }, { "epoch": 0.11636585067001333, "grad_norm": 0.861624538898468, "learning_rate": 1.93391722632015e-05, "loss": 0.3153, "step": 6268 }, { "epoch": 0.11640298080743197, "grad_norm": 0.35545170307159424, "learning_rate": 1.93387551917087e-05, "loss": 0.2755, "step": 6270 }, { "epoch": 0.1164401109448506, "grad_norm": 0.3339941203594208, "learning_rate": 1.9338337993143415e-05, "loss": 0.1827, "step": 6272 }, { "epoch": 0.11647724108226924, "grad_norm": 0.3039368987083435, "learning_rate": 1.933792066751132e-05, "loss": 0.3806, "step": 6274 }, { "epoch": 0.11651437121968788, "grad_norm": 0.3907169997692108, "learning_rate": 1.9337503214818097e-05, "loss": 0.2715, "step": 6276 }, { "epoch": 0.11655150135710653, "grad_norm": 0.3075578212738037, "learning_rate": 1.933708563506942e-05, "loss": 0.3609, "step": 6278 }, { "epoch": 0.11658863149452516, "grad_norm": 0.39553922414779663, "learning_rate": 1.933666792827098e-05, "loss": 0.3306, "step": 6280 }, { "epoch": 0.1166257616319438, "grad_norm": 0.462171733379364, "learning_rate": 1.9336250094428456e-05, "loss": 0.3146, "step": 6282 }, { "epoch": 0.11666289176936244, "grad_norm": 0.44297927618026733, "learning_rate": 1.9335832133547527e-05, "loss": 0.5232, "step": 6284 }, { "epoch": 0.11670002190678108, "grad_norm": 0.32021188735961914, "learning_rate": 1.9335414045633893e-05, "loss": 0.3727, "step": 6286 }, { "epoch": 0.11673715204419971, "grad_norm": 0.2519427537918091, "learning_rate": 1.9334995830693234e-05, "loss": 0.2275, "step": 6288 }, { "epoch": 0.11677428218161835, "grad_norm": 0.3117574453353882, "learning_rate": 1.9334577488731244e-05, "loss": 0.4021, "step": 6290 }, { "epoch": 0.116811412319037, "grad_norm": 0.33603936433792114, "learning_rate": 1.9334159019753618e-05, "loss": 0.3638, "step": 6292 }, { "epoch": 0.11684854245645564, "grad_norm": 0.43508630990982056, "learning_rate": 1.9333740423766044e-05, "loss": 0.3672, "step": 6294 }, { "epoch": 0.11688567259387427, "grad_norm": 0.49777570366859436, "learning_rate": 1.933332170077422e-05, "loss": 0.2723, "step": 6296 }, { "epoch": 0.11692280273129291, "grad_norm": 0.32379472255706787, "learning_rate": 1.9332902850783844e-05, "loss": 0.4202, "step": 6298 }, { "epoch": 0.11695993286871155, "grad_norm": 0.30513545870780945, "learning_rate": 1.9332483873800613e-05, "loss": 0.4536, "step": 6300 }, { "epoch": 0.11699706300613019, "grad_norm": 0.400575190782547, "learning_rate": 1.9332064769830237e-05, "loss": 0.3342, "step": 6302 }, { "epoch": 0.11703419314354882, "grad_norm": 0.34833985567092896, "learning_rate": 1.9331645538878407e-05, "loss": 0.4048, "step": 6304 }, { "epoch": 0.11707132328096746, "grad_norm": 0.3945656418800354, "learning_rate": 1.9331226180950835e-05, "loss": 0.3248, "step": 6306 }, { "epoch": 0.1171084534183861, "grad_norm": 0.3738468587398529, "learning_rate": 1.9330806696053225e-05, "loss": 0.2076, "step": 6308 }, { "epoch": 0.11714558355580473, "grad_norm": 0.28878864645957947, "learning_rate": 1.9330387084191285e-05, "loss": 0.3897, "step": 6310 }, { "epoch": 0.11718271369322338, "grad_norm": 0.311980277299881, "learning_rate": 1.9329967345370724e-05, "loss": 0.2539, "step": 6312 }, { "epoch": 0.11721984383064202, "grad_norm": 0.30747777223587036, "learning_rate": 1.932954747959725e-05, "loss": 0.2191, "step": 6314 }, { "epoch": 0.11725697396806066, "grad_norm": 0.3596014380455017, "learning_rate": 1.9329127486876586e-05, "loss": 0.3636, "step": 6316 }, { "epoch": 0.11729410410547929, "grad_norm": 0.3481011986732483, "learning_rate": 1.932870736721444e-05, "loss": 0.24, "step": 6318 }, { "epoch": 0.11733123424289793, "grad_norm": 0.28967753052711487, "learning_rate": 1.9328287120616526e-05, "loss": 0.3163, "step": 6320 }, { "epoch": 0.11736836438031657, "grad_norm": 0.33237722516059875, "learning_rate": 1.932786674708857e-05, "loss": 0.3142, "step": 6322 }, { "epoch": 0.11740549451773521, "grad_norm": 0.3374473750591278, "learning_rate": 1.9327446246636285e-05, "loss": 0.3372, "step": 6324 }, { "epoch": 0.11744262465515384, "grad_norm": 0.2985314726829529, "learning_rate": 1.9327025619265396e-05, "loss": 0.261, "step": 6326 }, { "epoch": 0.11747975479257249, "grad_norm": 0.3636707663536072, "learning_rate": 1.9326604864981626e-05, "loss": 0.3909, "step": 6328 }, { "epoch": 0.11751688492999113, "grad_norm": 0.33055102825164795, "learning_rate": 1.93261839837907e-05, "loss": 0.3352, "step": 6330 }, { "epoch": 0.11755401506740977, "grad_norm": 0.31929707527160645, "learning_rate": 1.9325762975698346e-05, "loss": 0.1707, "step": 6332 }, { "epoch": 0.1175911452048284, "grad_norm": 0.3036884665489197, "learning_rate": 1.9325341840710292e-05, "loss": 0.2002, "step": 6334 }, { "epoch": 0.11762827534224704, "grad_norm": 0.3538627624511719, "learning_rate": 1.9324920578832267e-05, "loss": 0.3487, "step": 6336 }, { "epoch": 0.11766540547966568, "grad_norm": 0.31544238328933716, "learning_rate": 1.9324499190070004e-05, "loss": 0.3783, "step": 6338 }, { "epoch": 0.11770253561708433, "grad_norm": 0.35008835792541504, "learning_rate": 1.932407767442924e-05, "loss": 0.3911, "step": 6340 }, { "epoch": 0.11773966575450295, "grad_norm": 0.4592518210411072, "learning_rate": 1.9323656031915706e-05, "loss": 0.3756, "step": 6342 }, { "epoch": 0.1177767958919216, "grad_norm": 0.4852827787399292, "learning_rate": 1.932323426253514e-05, "loss": 0.3631, "step": 6344 }, { "epoch": 0.11781392602934024, "grad_norm": 0.39585623145103455, "learning_rate": 1.932281236629328e-05, "loss": 0.5045, "step": 6346 }, { "epoch": 0.11785105616675887, "grad_norm": 0.2956748604774475, "learning_rate": 1.9322390343195872e-05, "loss": 0.4251, "step": 6348 }, { "epoch": 0.11788818630417751, "grad_norm": 0.3633421063423157, "learning_rate": 1.9321968193248654e-05, "loss": 0.2381, "step": 6350 }, { "epoch": 0.11792531644159615, "grad_norm": 0.4885057210922241, "learning_rate": 1.932154591645737e-05, "loss": 0.2246, "step": 6352 }, { "epoch": 0.11796244657901479, "grad_norm": 0.1861577332019806, "learning_rate": 1.9321123512827773e-05, "loss": 0.1933, "step": 6354 }, { "epoch": 0.11799957671643342, "grad_norm": 0.3718888759613037, "learning_rate": 1.93207009823656e-05, "loss": 0.4148, "step": 6356 }, { "epoch": 0.11803670685385206, "grad_norm": 0.2409990131855011, "learning_rate": 1.9320278325076607e-05, "loss": 0.4167, "step": 6358 }, { "epoch": 0.1180738369912707, "grad_norm": 0.34573447704315186, "learning_rate": 1.9319855540966545e-05, "loss": 0.2, "step": 6360 }, { "epoch": 0.11811096712868935, "grad_norm": 0.21412226557731628, "learning_rate": 1.9319432630041162e-05, "loss": 0.5324, "step": 6362 }, { "epoch": 0.11814809726610798, "grad_norm": 0.3289864659309387, "learning_rate": 1.9319009592306217e-05, "loss": 0.4534, "step": 6364 }, { "epoch": 0.11818522740352662, "grad_norm": 0.32960912585258484, "learning_rate": 1.9318586427767464e-05, "loss": 0.5263, "step": 6366 }, { "epoch": 0.11822235754094526, "grad_norm": 0.35673004388809204, "learning_rate": 1.9318163136430665e-05, "loss": 0.2807, "step": 6368 }, { "epoch": 0.1182594876783639, "grad_norm": 0.33887341618537903, "learning_rate": 1.9317739718301574e-05, "loss": 0.2443, "step": 6370 }, { "epoch": 0.11829661781578253, "grad_norm": 0.39766794443130493, "learning_rate": 1.931731617338596e-05, "loss": 0.4343, "step": 6372 }, { "epoch": 0.11833374795320117, "grad_norm": 0.38862061500549316, "learning_rate": 1.9316892501689577e-05, "loss": 0.4005, "step": 6374 }, { "epoch": 0.11837087809061982, "grad_norm": 0.36087262630462646, "learning_rate": 1.9316468703218194e-05, "loss": 0.2489, "step": 6376 }, { "epoch": 0.11840800822803846, "grad_norm": 0.49925076961517334, "learning_rate": 1.931604477797758e-05, "loss": 0.2607, "step": 6378 }, { "epoch": 0.11844513836545709, "grad_norm": 0.5335688591003418, "learning_rate": 1.93156207259735e-05, "loss": 0.4691, "step": 6380 }, { "epoch": 0.11848226850287573, "grad_norm": 0.5479843020439148, "learning_rate": 1.9315196547211727e-05, "loss": 0.2915, "step": 6382 }, { "epoch": 0.11851939864029437, "grad_norm": 0.32797855138778687, "learning_rate": 1.931477224169803e-05, "loss": 0.4484, "step": 6384 }, { "epoch": 0.118556528777713, "grad_norm": 0.35125523805618286, "learning_rate": 1.9314347809438184e-05, "loss": 0.3748, "step": 6386 }, { "epoch": 0.11859365891513164, "grad_norm": 0.40221527218818665, "learning_rate": 1.9313923250437964e-05, "loss": 0.4263, "step": 6388 }, { "epoch": 0.11863078905255028, "grad_norm": 0.32278522849082947, "learning_rate": 1.931349856470315e-05, "loss": 0.4645, "step": 6390 }, { "epoch": 0.11866791918996893, "grad_norm": 0.29107481241226196, "learning_rate": 1.9313073752239514e-05, "loss": 0.3998, "step": 6392 }, { "epoch": 0.11870504932738755, "grad_norm": 0.3763576149940491, "learning_rate": 1.931264881305284e-05, "loss": 0.3354, "step": 6394 }, { "epoch": 0.1187421794648062, "grad_norm": 0.45447906851768494, "learning_rate": 1.9312223747148912e-05, "loss": 0.22, "step": 6396 }, { "epoch": 0.11877930960222484, "grad_norm": 0.38563793897628784, "learning_rate": 1.9311798554533512e-05, "loss": 0.1965, "step": 6398 }, { "epoch": 0.11881643973964348, "grad_norm": 0.26056474447250366, "learning_rate": 1.9311373235212426e-05, "loss": 0.3718, "step": 6400 }, { "epoch": 0.11885356987706211, "grad_norm": 0.461426705121994, "learning_rate": 1.931094778919144e-05, "loss": 0.4587, "step": 6402 }, { "epoch": 0.11889070001448075, "grad_norm": 0.31230196356773376, "learning_rate": 1.9310522216476344e-05, "loss": 0.174, "step": 6404 }, { "epoch": 0.1189278301518994, "grad_norm": 0.3929494321346283, "learning_rate": 1.931009651707293e-05, "loss": 0.3449, "step": 6406 }, { "epoch": 0.11896496028931804, "grad_norm": 0.5228191018104553, "learning_rate": 1.930967069098699e-05, "loss": 0.282, "step": 6408 }, { "epoch": 0.11900209042673666, "grad_norm": 0.3462566137313843, "learning_rate": 1.9309244738224313e-05, "loss": 0.4301, "step": 6410 }, { "epoch": 0.11903922056415531, "grad_norm": 0.3149477243423462, "learning_rate": 1.9308818658790703e-05, "loss": 0.138, "step": 6412 }, { "epoch": 0.11907635070157395, "grad_norm": 0.31245216727256775, "learning_rate": 1.9308392452691955e-05, "loss": 0.3328, "step": 6414 }, { "epoch": 0.11911348083899259, "grad_norm": 1.0327508449554443, "learning_rate": 1.9307966119933864e-05, "loss": 0.2014, "step": 6416 }, { "epoch": 0.11915061097641122, "grad_norm": 0.3724728226661682, "learning_rate": 1.930753966052224e-05, "loss": 0.3812, "step": 6418 }, { "epoch": 0.11918774111382986, "grad_norm": 0.3237592279911041, "learning_rate": 1.930711307446288e-05, "loss": 0.3758, "step": 6420 }, { "epoch": 0.1192248712512485, "grad_norm": 0.322488933801651, "learning_rate": 1.9306686361761583e-05, "loss": 0.3573, "step": 6422 }, { "epoch": 0.11926200138866713, "grad_norm": 0.3465365767478943, "learning_rate": 1.930625952242417e-05, "loss": 0.2454, "step": 6424 }, { "epoch": 0.11929913152608577, "grad_norm": 0.3229961097240448, "learning_rate": 1.930583255645643e-05, "loss": 0.406, "step": 6426 }, { "epoch": 0.11933626166350442, "grad_norm": 0.42423954606056213, "learning_rate": 1.930540546386419e-05, "loss": 0.2641, "step": 6428 }, { "epoch": 0.11937339180092306, "grad_norm": 0.38974571228027344, "learning_rate": 1.9304978244653255e-05, "loss": 0.3389, "step": 6430 }, { "epoch": 0.11941052193834169, "grad_norm": 0.3108026683330536, "learning_rate": 1.9304550898829433e-05, "loss": 0.3768, "step": 6432 }, { "epoch": 0.11944765207576033, "grad_norm": 0.29700908064842224, "learning_rate": 1.9304123426398547e-05, "loss": 0.2758, "step": 6434 }, { "epoch": 0.11948478221317897, "grad_norm": 0.29016920924186707, "learning_rate": 1.930369582736641e-05, "loss": 0.3749, "step": 6436 }, { "epoch": 0.11952191235059761, "grad_norm": 0.45442160964012146, "learning_rate": 1.9303268101738843e-05, "loss": 0.4117, "step": 6438 }, { "epoch": 0.11955904248801624, "grad_norm": 0.3652421236038208, "learning_rate": 1.9302840249521656e-05, "loss": 0.3141, "step": 6440 }, { "epoch": 0.11959617262543489, "grad_norm": 0.34132668375968933, "learning_rate": 1.9302412270720684e-05, "loss": 0.4183, "step": 6442 }, { "epoch": 0.11963330276285353, "grad_norm": 0.26527708768844604, "learning_rate": 1.9301984165341748e-05, "loss": 0.3891, "step": 6444 }, { "epoch": 0.11967043290027217, "grad_norm": 0.2945473790168762, "learning_rate": 1.9301555933390665e-05, "loss": 0.3966, "step": 6446 }, { "epoch": 0.1197075630376908, "grad_norm": 0.2841329574584961, "learning_rate": 1.930112757487327e-05, "loss": 0.548, "step": 6448 }, { "epoch": 0.11974469317510944, "grad_norm": 0.3590245842933655, "learning_rate": 1.9300699089795386e-05, "loss": 0.2249, "step": 6450 }, { "epoch": 0.11978182331252808, "grad_norm": 0.47244569659233093, "learning_rate": 1.930027047816285e-05, "loss": 0.36, "step": 6452 }, { "epoch": 0.11981895344994672, "grad_norm": 0.3336014449596405, "learning_rate": 1.9299841739981488e-05, "loss": 0.2288, "step": 6454 }, { "epoch": 0.11985608358736535, "grad_norm": 0.2977416217327118, "learning_rate": 1.9299412875257135e-05, "loss": 0.3897, "step": 6456 }, { "epoch": 0.119893213724784, "grad_norm": 0.38169124722480774, "learning_rate": 1.929898388399563e-05, "loss": 0.4117, "step": 6458 }, { "epoch": 0.11993034386220264, "grad_norm": 0.3038351833820343, "learning_rate": 1.9298554766202805e-05, "loss": 0.3731, "step": 6460 }, { "epoch": 0.11996747399962127, "grad_norm": 0.3170458674430847, "learning_rate": 1.9298125521884506e-05, "loss": 0.2526, "step": 6462 }, { "epoch": 0.12000460413703991, "grad_norm": 0.344821035861969, "learning_rate": 1.9297696151046567e-05, "loss": 0.2507, "step": 6464 }, { "epoch": 0.12004173427445855, "grad_norm": 0.322478711605072, "learning_rate": 1.9297266653694834e-05, "loss": 0.4167, "step": 6466 }, { "epoch": 0.12007886441187719, "grad_norm": 0.41737422347068787, "learning_rate": 1.929683702983515e-05, "loss": 0.4625, "step": 6468 }, { "epoch": 0.12011599454929582, "grad_norm": 0.2380305379629135, "learning_rate": 1.929640727947336e-05, "loss": 0.2276, "step": 6470 }, { "epoch": 0.12015312468671446, "grad_norm": 0.3471153974533081, "learning_rate": 1.9295977402615316e-05, "loss": 0.5055, "step": 6472 }, { "epoch": 0.1201902548241331, "grad_norm": 0.32918861508369446, "learning_rate": 1.9295547399266863e-05, "loss": 0.3427, "step": 6474 }, { "epoch": 0.12022738496155175, "grad_norm": 0.3176921010017395, "learning_rate": 1.9295117269433853e-05, "loss": 0.2522, "step": 6476 }, { "epoch": 0.12026451509897038, "grad_norm": 0.3586351275444031, "learning_rate": 1.929468701312214e-05, "loss": 0.4417, "step": 6478 }, { "epoch": 0.12030164523638902, "grad_norm": 0.3228225111961365, "learning_rate": 1.9294256630337574e-05, "loss": 0.2962, "step": 6480 }, { "epoch": 0.12033877537380766, "grad_norm": 0.3039906322956085, "learning_rate": 1.9293826121086022e-05, "loss": 0.2923, "step": 6482 }, { "epoch": 0.1203759055112263, "grad_norm": 0.2316453754901886, "learning_rate": 1.929339548537333e-05, "loss": 0.4776, "step": 6484 }, { "epoch": 0.12041303564864493, "grad_norm": 0.3888653814792633, "learning_rate": 1.929296472320536e-05, "loss": 0.3179, "step": 6486 }, { "epoch": 0.12045016578606357, "grad_norm": 0.26490679383277893, "learning_rate": 1.929253383458798e-05, "loss": 0.4568, "step": 6488 }, { "epoch": 0.12048729592348222, "grad_norm": 0.5689661502838135, "learning_rate": 1.9292102819527046e-05, "loss": 0.4179, "step": 6490 }, { "epoch": 0.12052442606090086, "grad_norm": 0.22351336479187012, "learning_rate": 1.929167167802843e-05, "loss": 0.2891, "step": 6492 }, { "epoch": 0.12056155619831949, "grad_norm": 0.37877222895622253, "learning_rate": 1.9291240410097986e-05, "loss": 0.4748, "step": 6494 }, { "epoch": 0.12059868633573813, "grad_norm": 0.46963584423065186, "learning_rate": 1.92908090157416e-05, "loss": 0.3359, "step": 6496 }, { "epoch": 0.12063581647315677, "grad_norm": 0.4761298894882202, "learning_rate": 1.9290377494965127e-05, "loss": 0.3705, "step": 6498 }, { "epoch": 0.1206729466105754, "grad_norm": 0.3215329349040985, "learning_rate": 1.9289945847774443e-05, "loss": 0.3573, "step": 6500 }, { "epoch": 0.12071007674799404, "grad_norm": 0.2952653765678406, "learning_rate": 1.9289514074175424e-05, "loss": 0.2789, "step": 6502 }, { "epoch": 0.12074720688541268, "grad_norm": 0.35248643159866333, "learning_rate": 1.9289082174173947e-05, "loss": 0.3571, "step": 6504 }, { "epoch": 0.12078433702283133, "grad_norm": 0.3120441138744354, "learning_rate": 1.9288650147775882e-05, "loss": 0.3292, "step": 6506 }, { "epoch": 0.12082146716024995, "grad_norm": 0.21641826629638672, "learning_rate": 1.9288217994987115e-05, "loss": 0.2532, "step": 6508 }, { "epoch": 0.1208585972976686, "grad_norm": 0.5590580701828003, "learning_rate": 1.928778571581352e-05, "loss": 0.2118, "step": 6510 }, { "epoch": 0.12089572743508724, "grad_norm": 0.4010406732559204, "learning_rate": 1.928735331026098e-05, "loss": 0.3141, "step": 6512 }, { "epoch": 0.12093285757250588, "grad_norm": 0.2877483367919922, "learning_rate": 1.928692077833538e-05, "loss": 0.3298, "step": 6514 }, { "epoch": 0.12096998770992451, "grad_norm": 0.29856744408607483, "learning_rate": 1.928648812004261e-05, "loss": 0.296, "step": 6516 }, { "epoch": 0.12100711784734315, "grad_norm": 0.29913046956062317, "learning_rate": 1.9286055335388552e-05, "loss": 0.4295, "step": 6518 }, { "epoch": 0.1210442479847618, "grad_norm": 0.33249711990356445, "learning_rate": 1.9285622424379093e-05, "loss": 0.3854, "step": 6520 }, { "epoch": 0.12108137812218044, "grad_norm": 0.29221323132514954, "learning_rate": 1.9285189387020127e-05, "loss": 0.3249, "step": 6522 }, { "epoch": 0.12111850825959906, "grad_norm": 0.3873700499534607, "learning_rate": 1.9284756223317548e-05, "loss": 0.2973, "step": 6524 }, { "epoch": 0.1211556383970177, "grad_norm": 0.35372382402420044, "learning_rate": 1.928432293327725e-05, "loss": 0.3401, "step": 6526 }, { "epoch": 0.12119276853443635, "grad_norm": 0.40467023849487305, "learning_rate": 1.9283889516905122e-05, "loss": 0.1733, "step": 6528 }, { "epoch": 0.12122989867185499, "grad_norm": 0.4137405753135681, "learning_rate": 1.9283455974207066e-05, "loss": 0.3361, "step": 6530 }, { "epoch": 0.12126702880927362, "grad_norm": 0.46695342659950256, "learning_rate": 1.9283022305188985e-05, "loss": 0.345, "step": 6532 }, { "epoch": 0.12130415894669226, "grad_norm": 0.4510466456413269, "learning_rate": 1.9282588509856773e-05, "loss": 0.3132, "step": 6534 }, { "epoch": 0.1213412890841109, "grad_norm": 0.27598023414611816, "learning_rate": 1.9282154588216335e-05, "loss": 0.3661, "step": 6536 }, { "epoch": 0.12137841922152953, "grad_norm": 0.38803061842918396, "learning_rate": 1.928172054027358e-05, "loss": 0.417, "step": 6538 }, { "epoch": 0.12141554935894817, "grad_norm": 0.24103468656539917, "learning_rate": 1.928128636603441e-05, "loss": 0.4514, "step": 6540 }, { "epoch": 0.12145267949636682, "grad_norm": 0.31650304794311523, "learning_rate": 1.9280852065504733e-05, "loss": 0.2983, "step": 6542 }, { "epoch": 0.12148980963378546, "grad_norm": 0.3881564140319824, "learning_rate": 1.9280417638690455e-05, "loss": 0.4158, "step": 6544 }, { "epoch": 0.12152693977120409, "grad_norm": 0.5013611316680908, "learning_rate": 1.9279983085597492e-05, "loss": 0.3397, "step": 6546 }, { "epoch": 0.12156406990862273, "grad_norm": 0.6283319592475891, "learning_rate": 1.927954840623176e-05, "loss": 0.5536, "step": 6548 }, { "epoch": 0.12160120004604137, "grad_norm": 0.304241806268692, "learning_rate": 1.9279113600599166e-05, "loss": 0.2746, "step": 6550 }, { "epoch": 0.12163833018346001, "grad_norm": 0.5270283818244934, "learning_rate": 1.9278678668705633e-05, "loss": 0.3561, "step": 6552 }, { "epoch": 0.12167546032087864, "grad_norm": 0.3649897575378418, "learning_rate": 1.9278243610557075e-05, "loss": 0.3251, "step": 6554 }, { "epoch": 0.12171259045829728, "grad_norm": 0.3196565806865692, "learning_rate": 1.927780842615941e-05, "loss": 0.3917, "step": 6556 }, { "epoch": 0.12174972059571593, "grad_norm": 0.2557104229927063, "learning_rate": 1.9277373115518566e-05, "loss": 0.3491, "step": 6558 }, { "epoch": 0.12178685073313457, "grad_norm": 0.3482026755809784, "learning_rate": 1.9276937678640462e-05, "loss": 0.4229, "step": 6560 }, { "epoch": 0.1218239808705532, "grad_norm": 0.39679408073425293, "learning_rate": 1.9276502115531025e-05, "loss": 0.2759, "step": 6562 }, { "epoch": 0.12186111100797184, "grad_norm": 0.39061054587364197, "learning_rate": 1.927606642619618e-05, "loss": 0.3665, "step": 6564 }, { "epoch": 0.12189824114539048, "grad_norm": 0.3834904730319977, "learning_rate": 1.9275630610641855e-05, "loss": 0.2948, "step": 6566 }, { "epoch": 0.12193537128280912, "grad_norm": 0.3347572982311249, "learning_rate": 1.927519466887398e-05, "loss": 0.5463, "step": 6568 }, { "epoch": 0.12197250142022775, "grad_norm": 0.3545336425304413, "learning_rate": 1.927475860089849e-05, "loss": 0.4329, "step": 6570 }, { "epoch": 0.1220096315576464, "grad_norm": 0.3152683675289154, "learning_rate": 1.9274322406721318e-05, "loss": 0.4815, "step": 6572 }, { "epoch": 0.12204676169506504, "grad_norm": 0.27598464488983154, "learning_rate": 1.9273886086348396e-05, "loss": 0.2405, "step": 6574 }, { "epoch": 0.12208389183248367, "grad_norm": 0.1969933807849884, "learning_rate": 1.927344963978566e-05, "loss": 0.2258, "step": 6576 }, { "epoch": 0.12212102196990231, "grad_norm": 0.3946920335292816, "learning_rate": 1.9273013067039055e-05, "loss": 0.3941, "step": 6578 }, { "epoch": 0.12215815210732095, "grad_norm": 0.24420249462127686, "learning_rate": 1.927257636811452e-05, "loss": 0.4051, "step": 6580 }, { "epoch": 0.12219528224473959, "grad_norm": 0.3174636662006378, "learning_rate": 1.927213954301799e-05, "loss": 0.2926, "step": 6582 }, { "epoch": 0.12223241238215822, "grad_norm": 0.31497666239738464, "learning_rate": 1.927170259175542e-05, "loss": 0.2266, "step": 6584 }, { "epoch": 0.12226954251957686, "grad_norm": 0.3753055930137634, "learning_rate": 1.9271265514332745e-05, "loss": 0.5234, "step": 6586 }, { "epoch": 0.1223066726569955, "grad_norm": 0.4616703689098358, "learning_rate": 1.927082831075592e-05, "loss": 0.4288, "step": 6588 }, { "epoch": 0.12234380279441415, "grad_norm": 0.37186765670776367, "learning_rate": 1.9270390981030887e-05, "loss": 0.1932, "step": 6590 }, { "epoch": 0.12238093293183278, "grad_norm": 0.31633326411247253, "learning_rate": 1.9269953525163604e-05, "loss": 0.2587, "step": 6592 }, { "epoch": 0.12241806306925142, "grad_norm": 0.4911516308784485, "learning_rate": 1.926951594316002e-05, "loss": 0.4691, "step": 6594 }, { "epoch": 0.12245519320667006, "grad_norm": 0.44741296768188477, "learning_rate": 1.9269078235026088e-05, "loss": 0.2454, "step": 6596 }, { "epoch": 0.1224923233440887, "grad_norm": 0.23290644586086273, "learning_rate": 1.9268640400767767e-05, "loss": 0.2905, "step": 6598 }, { "epoch": 0.12252945348150733, "grad_norm": 0.3089956045150757, "learning_rate": 1.926820244039101e-05, "loss": 0.3888, "step": 6600 }, { "epoch": 0.12256658361892597, "grad_norm": 0.25630542635917664, "learning_rate": 1.9267764353901784e-05, "loss": 0.4193, "step": 6602 }, { "epoch": 0.12260371375634461, "grad_norm": 0.32781144976615906, "learning_rate": 1.9267326141306043e-05, "loss": 0.3036, "step": 6604 }, { "epoch": 0.12264084389376326, "grad_norm": 0.3330346941947937, "learning_rate": 1.926688780260975e-05, "loss": 0.4617, "step": 6606 }, { "epoch": 0.12267797403118189, "grad_norm": 0.3724346458911896, "learning_rate": 1.9266449337818877e-05, "loss": 0.2683, "step": 6608 }, { "epoch": 0.12271510416860053, "grad_norm": 0.31460464000701904, "learning_rate": 1.9266010746939378e-05, "loss": 0.287, "step": 6610 }, { "epoch": 0.12275223430601917, "grad_norm": 0.4137624204158783, "learning_rate": 1.9265572029977232e-05, "loss": 0.4386, "step": 6612 }, { "epoch": 0.1227893644434378, "grad_norm": 0.39235442876815796, "learning_rate": 1.9265133186938403e-05, "loss": 0.5246, "step": 6614 }, { "epoch": 0.12282649458085644, "grad_norm": 0.371348112821579, "learning_rate": 1.9264694217828863e-05, "loss": 0.2307, "step": 6616 }, { "epoch": 0.12286362471827508, "grad_norm": 0.28955206274986267, "learning_rate": 1.9264255122654587e-05, "loss": 0.229, "step": 6618 }, { "epoch": 0.12290075485569373, "grad_norm": 0.3146589398384094, "learning_rate": 1.9263815901421547e-05, "loss": 0.2618, "step": 6620 }, { "epoch": 0.12293788499311235, "grad_norm": 0.34420573711395264, "learning_rate": 1.926337655413572e-05, "loss": 0.3771, "step": 6622 }, { "epoch": 0.122975015130531, "grad_norm": 0.3516557812690735, "learning_rate": 1.926293708080309e-05, "loss": 0.3948, "step": 6624 }, { "epoch": 0.12301214526794964, "grad_norm": 0.36902934312820435, "learning_rate": 1.9262497481429626e-05, "loss": 0.2671, "step": 6626 }, { "epoch": 0.12304927540536828, "grad_norm": 0.34632790088653564, "learning_rate": 1.926205775602132e-05, "loss": 0.4492, "step": 6628 }, { "epoch": 0.12308640554278691, "grad_norm": 0.3344527781009674, "learning_rate": 1.926161790458415e-05, "loss": 0.2571, "step": 6630 }, { "epoch": 0.12312353568020555, "grad_norm": 0.37171030044555664, "learning_rate": 1.9261177927124102e-05, "loss": 0.4355, "step": 6632 }, { "epoch": 0.1231606658176242, "grad_norm": 0.2987327575683594, "learning_rate": 1.9260737823647162e-05, "loss": 0.2807, "step": 6634 }, { "epoch": 0.12319779595504284, "grad_norm": 0.6868311762809753, "learning_rate": 1.9260297594159322e-05, "loss": 0.3338, "step": 6636 }, { "epoch": 0.12323492609246146, "grad_norm": 0.24660225212574005, "learning_rate": 1.9259857238666567e-05, "loss": 0.3914, "step": 6638 }, { "epoch": 0.1232720562298801, "grad_norm": 0.2904195189476013, "learning_rate": 1.9259416757174892e-05, "loss": 0.2212, "step": 6640 }, { "epoch": 0.12330918636729875, "grad_norm": 0.6242729425430298, "learning_rate": 1.9258976149690286e-05, "loss": 0.346, "step": 6642 }, { "epoch": 0.12334631650471739, "grad_norm": 0.2929135262966156, "learning_rate": 1.9258535416218754e-05, "loss": 0.2867, "step": 6644 }, { "epoch": 0.12338344664213602, "grad_norm": 0.36296072602272034, "learning_rate": 1.9258094556766287e-05, "loss": 0.1159, "step": 6646 }, { "epoch": 0.12342057677955466, "grad_norm": 0.40695279836654663, "learning_rate": 1.9257653571338883e-05, "loss": 0.3907, "step": 6648 }, { "epoch": 0.1234577069169733, "grad_norm": 0.3360540568828583, "learning_rate": 1.925721245994254e-05, "loss": 0.3343, "step": 6650 }, { "epoch": 0.12349483705439193, "grad_norm": 0.31927624344825745, "learning_rate": 1.925677122258327e-05, "loss": 0.2943, "step": 6652 }, { "epoch": 0.12353196719181057, "grad_norm": 0.3285635709762573, "learning_rate": 1.925632985926707e-05, "loss": 0.502, "step": 6654 }, { "epoch": 0.12356909732922922, "grad_norm": 0.31849533319473267, "learning_rate": 1.9255888369999946e-05, "loss": 0.4344, "step": 6656 }, { "epoch": 0.12360622746664786, "grad_norm": 0.3950842320919037, "learning_rate": 1.9255446754787906e-05, "loss": 0.2918, "step": 6658 }, { "epoch": 0.12364335760406649, "grad_norm": 0.2804225981235504, "learning_rate": 1.9255005013636958e-05, "loss": 0.3223, "step": 6660 }, { "epoch": 0.12368048774148513, "grad_norm": 0.3054465353488922, "learning_rate": 1.9254563146553114e-05, "loss": 0.3035, "step": 6662 }, { "epoch": 0.12371761787890377, "grad_norm": 0.34982171654701233, "learning_rate": 1.9254121153542384e-05, "loss": 0.3633, "step": 6664 }, { "epoch": 0.12375474801632241, "grad_norm": 0.39346396923065186, "learning_rate": 1.9253679034610787e-05, "loss": 0.4899, "step": 6666 }, { "epoch": 0.12379187815374104, "grad_norm": 0.4542764723300934, "learning_rate": 1.9253236789764337e-05, "loss": 0.3445, "step": 6668 }, { "epoch": 0.12382900829115968, "grad_norm": 0.3651010990142822, "learning_rate": 1.9252794419009052e-05, "loss": 0.28, "step": 6670 }, { "epoch": 0.12386613842857833, "grad_norm": 0.27628299593925476, "learning_rate": 1.9252351922350946e-05, "loss": 0.3314, "step": 6672 }, { "epoch": 0.12390326856599697, "grad_norm": 0.45232954621315, "learning_rate": 1.925190929979605e-05, "loss": 0.4632, "step": 6674 }, { "epoch": 0.1239403987034156, "grad_norm": 0.37899380922317505, "learning_rate": 1.925146655135037e-05, "loss": 0.4119, "step": 6676 }, { "epoch": 0.12397752884083424, "grad_norm": 0.25395578145980835, "learning_rate": 1.9251023677019952e-05, "loss": 0.3148, "step": 6678 }, { "epoch": 0.12401465897825288, "grad_norm": 0.46651968359947205, "learning_rate": 1.925058067681081e-05, "loss": 0.3641, "step": 6680 }, { "epoch": 0.12405178911567152, "grad_norm": 0.3114718198776245, "learning_rate": 1.9250137550728972e-05, "loss": 0.3062, "step": 6682 }, { "epoch": 0.12408891925309015, "grad_norm": 0.31508609652519226, "learning_rate": 1.924969429878047e-05, "loss": 0.2802, "step": 6684 }, { "epoch": 0.1241260493905088, "grad_norm": 0.38300344347953796, "learning_rate": 1.9249250920971336e-05, "loss": 0.3337, "step": 6686 }, { "epoch": 0.12416317952792744, "grad_norm": 0.3081687390804291, "learning_rate": 1.92488074173076e-05, "loss": 0.4789, "step": 6688 }, { "epoch": 0.12420030966534606, "grad_norm": 0.22241467237472534, "learning_rate": 1.9248363787795297e-05, "loss": 0.3204, "step": 6690 }, { "epoch": 0.12423743980276471, "grad_norm": 0.2904149889945984, "learning_rate": 1.9247920032440468e-05, "loss": 0.4287, "step": 6692 }, { "epoch": 0.12427456994018335, "grad_norm": 0.3065168559551239, "learning_rate": 1.9247476151249147e-05, "loss": 0.2763, "step": 6694 }, { "epoch": 0.12431170007760199, "grad_norm": 0.3422154188156128, "learning_rate": 1.9247032144227374e-05, "loss": 0.513, "step": 6696 }, { "epoch": 0.12434883021502062, "grad_norm": 0.4110053777694702, "learning_rate": 1.9246588011381192e-05, "loss": 0.4607, "step": 6698 }, { "epoch": 0.12438596035243926, "grad_norm": 0.3829737603664398, "learning_rate": 1.9246143752716645e-05, "loss": 0.4163, "step": 6700 }, { "epoch": 0.1244230904898579, "grad_norm": 0.32171669602394104, "learning_rate": 1.9245699368239776e-05, "loss": 0.7558, "step": 6702 }, { "epoch": 0.12446022062727655, "grad_norm": 0.3709343373775482, "learning_rate": 1.924525485795663e-05, "loss": 0.4078, "step": 6704 }, { "epoch": 0.12449735076469517, "grad_norm": 0.3441333770751953, "learning_rate": 1.9244810221873264e-05, "loss": 0.2391, "step": 6706 }, { "epoch": 0.12453448090211382, "grad_norm": 0.4153997004032135, "learning_rate": 1.924436545999572e-05, "loss": 0.2241, "step": 6708 }, { "epoch": 0.12457161103953246, "grad_norm": 0.29028961062431335, "learning_rate": 1.9243920572330047e-05, "loss": 0.2125, "step": 6710 }, { "epoch": 0.1246087411769511, "grad_norm": 0.2682378888130188, "learning_rate": 1.924347555888231e-05, "loss": 0.2812, "step": 6712 }, { "epoch": 0.12464587131436973, "grad_norm": 0.3279280662536621, "learning_rate": 1.9243030419658554e-05, "loss": 0.2952, "step": 6714 }, { "epoch": 0.12468300145178837, "grad_norm": 0.36582037806510925, "learning_rate": 1.9242585154664845e-05, "loss": 0.3329, "step": 6716 }, { "epoch": 0.12472013158920701, "grad_norm": 0.37971365451812744, "learning_rate": 1.924213976390723e-05, "loss": 0.2733, "step": 6718 }, { "epoch": 0.12475726172662566, "grad_norm": 0.29044920206069946, "learning_rate": 1.924169424739178e-05, "loss": 0.2522, "step": 6720 }, { "epoch": 0.12479439186404429, "grad_norm": 0.2184426337480545, "learning_rate": 1.9241248605124555e-05, "loss": 0.1542, "step": 6722 }, { "epoch": 0.12483152200146293, "grad_norm": 0.3339032232761383, "learning_rate": 1.924080283711162e-05, "loss": 0.4775, "step": 6724 }, { "epoch": 0.12486865213888157, "grad_norm": 0.36383771896362305, "learning_rate": 1.924035694335903e-05, "loss": 0.4332, "step": 6726 }, { "epoch": 0.1249057822763002, "grad_norm": 0.32313409447669983, "learning_rate": 1.9239910923872865e-05, "loss": 0.3358, "step": 6728 }, { "epoch": 0.12494291241371884, "grad_norm": 0.3498036861419678, "learning_rate": 1.923946477865919e-05, "loss": 0.2788, "step": 6730 }, { "epoch": 0.12498004255113748, "grad_norm": 0.36773255467414856, "learning_rate": 1.9239018507724074e-05, "loss": 0.3664, "step": 6732 }, { "epoch": 0.1250171726885561, "grad_norm": 0.38692253828048706, "learning_rate": 1.9238572111073584e-05, "loss": 0.4895, "step": 6734 }, { "epoch": 0.12505430282597477, "grad_norm": 0.4416910409927368, "learning_rate": 1.9238125588713807e-05, "loss": 0.4031, "step": 6736 }, { "epoch": 0.1250914329633934, "grad_norm": 0.320575475692749, "learning_rate": 1.923767894065081e-05, "loss": 0.273, "step": 6738 }, { "epoch": 0.12512856310081202, "grad_norm": 0.32703378796577454, "learning_rate": 1.923723216689067e-05, "loss": 0.4847, "step": 6740 }, { "epoch": 0.12516569323823068, "grad_norm": 0.43252208828926086, "learning_rate": 1.9236785267439476e-05, "loss": 0.3917, "step": 6742 }, { "epoch": 0.1252028233756493, "grad_norm": 0.39739105105400085, "learning_rate": 1.9236338242303297e-05, "loss": 0.3036, "step": 6744 }, { "epoch": 0.12523995351306796, "grad_norm": 0.4072605073451996, "learning_rate": 1.923589109148822e-05, "loss": 0.2088, "step": 6746 }, { "epoch": 0.1252770836504866, "grad_norm": 0.361385315656662, "learning_rate": 1.9235443815000333e-05, "loss": 0.2747, "step": 6748 }, { "epoch": 0.12531421378790522, "grad_norm": 0.2929092347621918, "learning_rate": 1.9234996412845716e-05, "loss": 0.2932, "step": 6750 }, { "epoch": 0.12535134392532388, "grad_norm": 0.35604676604270935, "learning_rate": 1.923454888503046e-05, "loss": 0.315, "step": 6752 }, { "epoch": 0.1253884740627425, "grad_norm": 0.2764253616333008, "learning_rate": 1.9234101231560656e-05, "loss": 0.3078, "step": 6754 }, { "epoch": 0.12542560420016113, "grad_norm": 0.3609953224658966, "learning_rate": 1.923365345244239e-05, "loss": 0.2709, "step": 6756 }, { "epoch": 0.1254627343375798, "grad_norm": 0.3713049590587616, "learning_rate": 1.9233205547681764e-05, "loss": 0.4512, "step": 6758 }, { "epoch": 0.12549986447499842, "grad_norm": 0.34089988470077515, "learning_rate": 1.9232757517284863e-05, "loss": 0.4119, "step": 6760 }, { "epoch": 0.12553699461241707, "grad_norm": 0.4533332288265228, "learning_rate": 1.923230936125779e-05, "loss": 0.5263, "step": 6762 }, { "epoch": 0.1255741247498357, "grad_norm": 0.37296736240386963, "learning_rate": 1.923186107960664e-05, "loss": 0.2874, "step": 6764 }, { "epoch": 0.12561125488725433, "grad_norm": 0.33240365982055664, "learning_rate": 1.923141267233751e-05, "loss": 0.2954, "step": 6766 }, { "epoch": 0.125648385024673, "grad_norm": 0.2692447304725647, "learning_rate": 1.9230964139456508e-05, "loss": 0.469, "step": 6768 }, { "epoch": 0.12568551516209162, "grad_norm": 0.29615044593811035, "learning_rate": 1.9230515480969735e-05, "loss": 0.4667, "step": 6770 }, { "epoch": 0.12572264529951024, "grad_norm": 0.251888245344162, "learning_rate": 1.9230066696883294e-05, "loss": 0.3877, "step": 6772 }, { "epoch": 0.1257597754369289, "grad_norm": 0.3574179708957672, "learning_rate": 1.9229617787203293e-05, "loss": 0.5043, "step": 6774 }, { "epoch": 0.12579690557434753, "grad_norm": 0.3815130889415741, "learning_rate": 1.9229168751935838e-05, "loss": 0.3192, "step": 6776 }, { "epoch": 0.12583403571176616, "grad_norm": 0.32082876563072205, "learning_rate": 1.922871959108704e-05, "loss": 0.2146, "step": 6778 }, { "epoch": 0.1258711658491848, "grad_norm": 0.30080121755599976, "learning_rate": 1.9228270304663014e-05, "loss": 0.4461, "step": 6780 }, { "epoch": 0.12590829598660344, "grad_norm": 0.28345707058906555, "learning_rate": 1.922782089266987e-05, "loss": 0.3742, "step": 6782 }, { "epoch": 0.1259454261240221, "grad_norm": 0.4946710765361786, "learning_rate": 1.9227371355113727e-05, "loss": 0.2621, "step": 6784 }, { "epoch": 0.12598255626144073, "grad_norm": 0.38305824995040894, "learning_rate": 1.9226921692000698e-05, "loss": 0.2707, "step": 6786 }, { "epoch": 0.12601968639885935, "grad_norm": 0.26083871722221375, "learning_rate": 1.9226471903336898e-05, "loss": 0.3179, "step": 6788 }, { "epoch": 0.126056816536278, "grad_norm": 0.32412534952163696, "learning_rate": 1.9226021989128456e-05, "loss": 0.3843, "step": 6790 }, { "epoch": 0.12609394667369664, "grad_norm": 0.552943229675293, "learning_rate": 1.922557194938149e-05, "loss": 0.3888, "step": 6792 }, { "epoch": 0.12613107681111527, "grad_norm": 0.4379805624485016, "learning_rate": 1.9225121784102124e-05, "loss": 0.4919, "step": 6794 }, { "epoch": 0.12616820694853392, "grad_norm": 0.4324335753917694, "learning_rate": 1.922467149329648e-05, "loss": 0.451, "step": 6796 }, { "epoch": 0.12620533708595255, "grad_norm": 0.46422454714775085, "learning_rate": 1.922422107697069e-05, "loss": 0.3353, "step": 6798 }, { "epoch": 0.1262424672233712, "grad_norm": 0.466890811920166, "learning_rate": 1.9223770535130878e-05, "loss": 0.4307, "step": 6800 }, { "epoch": 0.12627959736078984, "grad_norm": 0.27056217193603516, "learning_rate": 1.9223319867783182e-05, "loss": 0.426, "step": 6802 }, { "epoch": 0.12631672749820846, "grad_norm": 0.28319016098976135, "learning_rate": 1.922286907493373e-05, "loss": 0.3493, "step": 6804 }, { "epoch": 0.12635385763562712, "grad_norm": 0.4310556948184967, "learning_rate": 1.922241815658865e-05, "loss": 0.396, "step": 6806 }, { "epoch": 0.12639098777304575, "grad_norm": 0.33706748485565186, "learning_rate": 1.9221967112754085e-05, "loss": 0.344, "step": 6808 }, { "epoch": 0.12642811791046438, "grad_norm": 0.4150315821170807, "learning_rate": 1.9221515943436172e-05, "loss": 0.4606, "step": 6810 }, { "epoch": 0.12646524804788303, "grad_norm": 0.33215463161468506, "learning_rate": 1.922106464864105e-05, "loss": 0.2838, "step": 6812 }, { "epoch": 0.12650237818530166, "grad_norm": 0.33037644624710083, "learning_rate": 1.9220613228374857e-05, "loss": 0.2915, "step": 6814 }, { "epoch": 0.1265395083227203, "grad_norm": 0.36261504888534546, "learning_rate": 1.9220161682643736e-05, "loss": 0.3475, "step": 6816 }, { "epoch": 0.12657663846013895, "grad_norm": 0.2907375693321228, "learning_rate": 1.9219710011453833e-05, "loss": 0.4063, "step": 6818 }, { "epoch": 0.12661376859755757, "grad_norm": 0.44252482056617737, "learning_rate": 1.9219258214811295e-05, "loss": 0.4219, "step": 6820 }, { "epoch": 0.12665089873497623, "grad_norm": 0.34110334515571594, "learning_rate": 1.9218806292722263e-05, "loss": 0.1555, "step": 6822 }, { "epoch": 0.12668802887239486, "grad_norm": 0.3758030831813812, "learning_rate": 1.9218354245192894e-05, "loss": 0.3817, "step": 6824 }, { "epoch": 0.1267251590098135, "grad_norm": 0.40207135677337646, "learning_rate": 1.9217902072229335e-05, "loss": 0.3788, "step": 6826 }, { "epoch": 0.12676228914723214, "grad_norm": 0.31781160831451416, "learning_rate": 1.921744977383774e-05, "loss": 0.3406, "step": 6828 }, { "epoch": 0.12679941928465077, "grad_norm": 0.3182947337627411, "learning_rate": 1.9216997350024264e-05, "loss": 0.2978, "step": 6830 }, { "epoch": 0.1268365494220694, "grad_norm": 0.34986546635627747, "learning_rate": 1.9216544800795057e-05, "loss": 0.3198, "step": 6832 }, { "epoch": 0.12687367955948806, "grad_norm": 0.2990535795688629, "learning_rate": 1.921609212615629e-05, "loss": 0.2211, "step": 6834 }, { "epoch": 0.12691080969690668, "grad_norm": 0.39816853404045105, "learning_rate": 1.9215639326114108e-05, "loss": 0.3219, "step": 6836 }, { "epoch": 0.12694793983432534, "grad_norm": 0.2099897712469101, "learning_rate": 1.9215186400674682e-05, "loss": 0.3103, "step": 6838 }, { "epoch": 0.12698506997174397, "grad_norm": 0.3141871988773346, "learning_rate": 1.921473334984417e-05, "loss": 0.369, "step": 6840 }, { "epoch": 0.1270222001091626, "grad_norm": 0.4258623719215393, "learning_rate": 1.9214280173628742e-05, "loss": 0.4774, "step": 6842 }, { "epoch": 0.12705933024658125, "grad_norm": 0.39120393991470337, "learning_rate": 1.9213826872034558e-05, "loss": 0.4062, "step": 6844 }, { "epoch": 0.12709646038399988, "grad_norm": 0.3628799617290497, "learning_rate": 1.921337344506779e-05, "loss": 0.2356, "step": 6846 }, { "epoch": 0.1271335905214185, "grad_norm": 0.383894145488739, "learning_rate": 1.9212919892734605e-05, "loss": 0.3235, "step": 6848 }, { "epoch": 0.12717072065883717, "grad_norm": 0.5199154019355774, "learning_rate": 1.9212466215041177e-05, "loss": 0.3098, "step": 6850 }, { "epoch": 0.1272078507962558, "grad_norm": 0.44096851348876953, "learning_rate": 1.921201241199368e-05, "loss": 0.3114, "step": 6852 }, { "epoch": 0.12724498093367442, "grad_norm": 0.40052562952041626, "learning_rate": 1.9211558483598285e-05, "loss": 0.2748, "step": 6854 }, { "epoch": 0.12728211107109308, "grad_norm": 0.37798264622688293, "learning_rate": 1.921110442986117e-05, "loss": 0.4314, "step": 6856 }, { "epoch": 0.1273192412085117, "grad_norm": 0.3125547766685486, "learning_rate": 1.9210650250788518e-05, "loss": 0.455, "step": 6858 }, { "epoch": 0.12735637134593036, "grad_norm": 0.4118659198284149, "learning_rate": 1.9210195946386504e-05, "loss": 0.31, "step": 6860 }, { "epoch": 0.127393501483349, "grad_norm": 0.2762649357318878, "learning_rate": 1.9209741516661308e-05, "loss": 0.3856, "step": 6862 }, { "epoch": 0.12743063162076762, "grad_norm": 0.39093881845474243, "learning_rate": 1.9209286961619118e-05, "loss": 0.3167, "step": 6864 }, { "epoch": 0.12746776175818628, "grad_norm": 0.385725736618042, "learning_rate": 1.920883228126612e-05, "loss": 0.3749, "step": 6866 }, { "epoch": 0.1275048918956049, "grad_norm": 0.29993143677711487, "learning_rate": 1.9208377475608493e-05, "loss": 0.4906, "step": 6868 }, { "epoch": 0.12754202203302353, "grad_norm": 0.24928006529808044, "learning_rate": 1.9207922544652434e-05, "loss": 0.4518, "step": 6870 }, { "epoch": 0.1275791521704422, "grad_norm": 0.33041566610336304, "learning_rate": 1.920746748840413e-05, "loss": 0.2216, "step": 6872 }, { "epoch": 0.12761628230786082, "grad_norm": 0.41411134600639343, "learning_rate": 1.920701230686977e-05, "loss": 0.3536, "step": 6874 }, { "epoch": 0.12765341244527947, "grad_norm": 0.6628223061561584, "learning_rate": 1.9206557000055554e-05, "loss": 0.3325, "step": 6876 }, { "epoch": 0.1276905425826981, "grad_norm": 0.4323306083679199, "learning_rate": 1.9206101567967675e-05, "loss": 0.5036, "step": 6878 }, { "epoch": 0.12772767272011673, "grad_norm": 0.3774072229862213, "learning_rate": 1.9205646010612327e-05, "loss": 0.3099, "step": 6880 }, { "epoch": 0.1277648028575354, "grad_norm": 0.3012276887893677, "learning_rate": 1.9205190327995714e-05, "loss": 0.5046, "step": 6882 }, { "epoch": 0.12780193299495402, "grad_norm": 0.3034154772758484, "learning_rate": 1.920473452012403e-05, "loss": 0.3681, "step": 6884 }, { "epoch": 0.12783906313237264, "grad_norm": 0.2771282494068146, "learning_rate": 1.9204278587003483e-05, "loss": 0.1678, "step": 6886 }, { "epoch": 0.1278761932697913, "grad_norm": 0.3034060299396515, "learning_rate": 1.9203822528640273e-05, "loss": 0.3596, "step": 6888 }, { "epoch": 0.12791332340720993, "grad_norm": 0.5938284397125244, "learning_rate": 1.9203366345040606e-05, "loss": 0.369, "step": 6890 }, { "epoch": 0.12795045354462856, "grad_norm": 0.32618650794029236, "learning_rate": 1.9202910036210692e-05, "loss": 0.196, "step": 6892 }, { "epoch": 0.1279875836820472, "grad_norm": 0.3151942491531372, "learning_rate": 1.9202453602156738e-05, "loss": 0.1502, "step": 6894 }, { "epoch": 0.12802471381946584, "grad_norm": 0.3502785265445709, "learning_rate": 1.9201997042884955e-05, "loss": 0.3537, "step": 6896 }, { "epoch": 0.1280618439568845, "grad_norm": 0.45874011516571045, "learning_rate": 1.9201540358401553e-05, "loss": 0.3195, "step": 6898 }, { "epoch": 0.12809897409430313, "grad_norm": 0.29599541425704956, "learning_rate": 1.9201083548712753e-05, "loss": 0.3321, "step": 6900 }, { "epoch": 0.12813610423172175, "grad_norm": 0.4213782250881195, "learning_rate": 1.920062661382476e-05, "loss": 0.2932, "step": 6902 }, { "epoch": 0.1281732343691404, "grad_norm": 0.37992042303085327, "learning_rate": 1.9200169553743803e-05, "loss": 0.4813, "step": 6904 }, { "epoch": 0.12821036450655904, "grad_norm": 0.3845260441303253, "learning_rate": 1.9199712368476094e-05, "loss": 0.2097, "step": 6906 }, { "epoch": 0.12824749464397767, "grad_norm": 0.2666932940483093, "learning_rate": 1.9199255058027857e-05, "loss": 0.3519, "step": 6908 }, { "epoch": 0.12828462478139632, "grad_norm": 0.41257423162460327, "learning_rate": 1.9198797622405317e-05, "loss": 0.272, "step": 6910 }, { "epoch": 0.12832175491881495, "grad_norm": 0.31530383229255676, "learning_rate": 1.919834006161469e-05, "loss": 0.4329, "step": 6912 }, { "epoch": 0.12835888505623358, "grad_norm": 0.284525603055954, "learning_rate": 1.9197882375662208e-05, "loss": 0.2638, "step": 6914 }, { "epoch": 0.12839601519365224, "grad_norm": 0.3915586769580841, "learning_rate": 1.91974245645541e-05, "loss": 0.4098, "step": 6916 }, { "epoch": 0.12843314533107086, "grad_norm": 0.2885100245475769, "learning_rate": 1.919696662829659e-05, "loss": 0.2487, "step": 6918 }, { "epoch": 0.12847027546848952, "grad_norm": 0.40493106842041016, "learning_rate": 1.9196508566895913e-05, "loss": 0.4005, "step": 6920 }, { "epoch": 0.12850740560590815, "grad_norm": 0.3438049554824829, "learning_rate": 1.91960503803583e-05, "loss": 0.2687, "step": 6922 }, { "epoch": 0.12854453574332678, "grad_norm": 0.6170334219932556, "learning_rate": 1.919559206868999e-05, "loss": 0.3819, "step": 6924 }, { "epoch": 0.12858166588074543, "grad_norm": 0.5131980776786804, "learning_rate": 1.9195133631897213e-05, "loss": 0.2414, "step": 6926 }, { "epoch": 0.12861879601816406, "grad_norm": 0.45458874106407166, "learning_rate": 1.9194675069986212e-05, "loss": 0.442, "step": 6928 }, { "epoch": 0.1286559261555827, "grad_norm": 0.3635667562484741, "learning_rate": 1.9194216382963223e-05, "loss": 0.2415, "step": 6930 }, { "epoch": 0.12869305629300135, "grad_norm": 0.38065195083618164, "learning_rate": 1.9193757570834492e-05, "loss": 0.4177, "step": 6932 }, { "epoch": 0.12873018643041997, "grad_norm": 0.3185134530067444, "learning_rate": 1.9193298633606258e-05, "loss": 0.3858, "step": 6934 }, { "epoch": 0.12876731656783863, "grad_norm": 0.2842084467411041, "learning_rate": 1.9192839571284763e-05, "loss": 0.2413, "step": 6936 }, { "epoch": 0.12880444670525726, "grad_norm": 0.2919422388076782, "learning_rate": 1.9192380383876258e-05, "loss": 0.346, "step": 6938 }, { "epoch": 0.1288415768426759, "grad_norm": 0.30367588996887207, "learning_rate": 1.919192107138699e-05, "loss": 0.4438, "step": 6940 }, { "epoch": 0.12887870698009454, "grad_norm": 0.2364528626203537, "learning_rate": 1.9191461633823215e-05, "loss": 0.2612, "step": 6942 }, { "epoch": 0.12891583711751317, "grad_norm": 0.2886956036090851, "learning_rate": 1.9191002071191173e-05, "loss": 0.2756, "step": 6944 }, { "epoch": 0.1289529672549318, "grad_norm": 0.31795135140419006, "learning_rate": 1.9190542383497125e-05, "loss": 0.3673, "step": 6946 }, { "epoch": 0.12899009739235046, "grad_norm": 0.34532830119132996, "learning_rate": 1.9190082570747322e-05, "loss": 0.4501, "step": 6948 }, { "epoch": 0.12902722752976908, "grad_norm": 0.34968647360801697, "learning_rate": 1.9189622632948026e-05, "loss": 0.3151, "step": 6950 }, { "epoch": 0.1290643576671877, "grad_norm": 0.5368404984474182, "learning_rate": 1.918916257010549e-05, "loss": 0.4925, "step": 6952 }, { "epoch": 0.12910148780460637, "grad_norm": 0.45469728112220764, "learning_rate": 1.918870238222598e-05, "loss": 0.2654, "step": 6954 }, { "epoch": 0.129138617942025, "grad_norm": 0.3833331763744354, "learning_rate": 1.918824206931575e-05, "loss": 0.3778, "step": 6956 }, { "epoch": 0.12917574807944365, "grad_norm": 0.9119570851325989, "learning_rate": 1.9187781631381067e-05, "loss": 0.4553, "step": 6958 }, { "epoch": 0.12921287821686228, "grad_norm": 0.2994924783706665, "learning_rate": 1.91873210684282e-05, "loss": 0.3078, "step": 6960 }, { "epoch": 0.1292500083542809, "grad_norm": 0.35223865509033203, "learning_rate": 1.9186860380463407e-05, "loss": 0.3356, "step": 6962 }, { "epoch": 0.12928713849169957, "grad_norm": 0.6104734539985657, "learning_rate": 1.9186399567492966e-05, "loss": 0.2372, "step": 6964 }, { "epoch": 0.1293242686291182, "grad_norm": 0.24619168043136597, "learning_rate": 1.9185938629523143e-05, "loss": 0.2637, "step": 6966 }, { "epoch": 0.12936139876653682, "grad_norm": 0.44779258966445923, "learning_rate": 1.9185477566560208e-05, "loss": 0.3396, "step": 6968 }, { "epoch": 0.12939852890395548, "grad_norm": 0.3527730703353882, "learning_rate": 1.9185016378610443e-05, "loss": 0.3316, "step": 6970 }, { "epoch": 0.1294356590413741, "grad_norm": 0.5245075821876526, "learning_rate": 1.918455506568011e-05, "loss": 0.367, "step": 6972 }, { "epoch": 0.12947278917879276, "grad_norm": 0.2701316177845001, "learning_rate": 1.9184093627775496e-05, "loss": 0.3796, "step": 6974 }, { "epoch": 0.1295099193162114, "grad_norm": 0.3432866036891937, "learning_rate": 1.918363206490288e-05, "loss": 0.3815, "step": 6976 }, { "epoch": 0.12954704945363002, "grad_norm": 0.2795094847679138, "learning_rate": 1.918317037706854e-05, "loss": 0.3651, "step": 6978 }, { "epoch": 0.12958417959104868, "grad_norm": 0.25656238198280334, "learning_rate": 1.9182708564278754e-05, "loss": 0.2371, "step": 6980 }, { "epoch": 0.1296213097284673, "grad_norm": 0.47116556763648987, "learning_rate": 1.9182246626539812e-05, "loss": 0.5125, "step": 6982 }, { "epoch": 0.12965843986588593, "grad_norm": 0.34104740619659424, "learning_rate": 1.9181784563857998e-05, "loss": 0.5172, "step": 6984 }, { "epoch": 0.1296955700033046, "grad_norm": 0.32182496786117554, "learning_rate": 1.9181322376239596e-05, "loss": 0.3989, "step": 6986 }, { "epoch": 0.12973270014072322, "grad_norm": 0.4881784915924072, "learning_rate": 1.91808600636909e-05, "loss": 0.3813, "step": 6988 }, { "epoch": 0.12976983027814185, "grad_norm": 0.3208380341529846, "learning_rate": 1.91803976262182e-05, "loss": 0.3366, "step": 6990 }, { "epoch": 0.1298069604155605, "grad_norm": 0.47881579399108887, "learning_rate": 1.9179935063827783e-05, "loss": 0.2821, "step": 6992 }, { "epoch": 0.12984409055297913, "grad_norm": 0.3987862169742584, "learning_rate": 1.9179472376525947e-05, "loss": 0.2486, "step": 6994 }, { "epoch": 0.1298812206903978, "grad_norm": 0.3396744132041931, "learning_rate": 1.9179009564318993e-05, "loss": 0.2653, "step": 6996 }, { "epoch": 0.12991835082781641, "grad_norm": 0.2583022713661194, "learning_rate": 1.9178546627213205e-05, "loss": 0.1902, "step": 6998 }, { "epoch": 0.12995548096523504, "grad_norm": 0.37820449471473694, "learning_rate": 1.9178083565214896e-05, "loss": 0.2452, "step": 7000 }, { "epoch": 0.1299926111026537, "grad_norm": 0.3128203749656677, "learning_rate": 1.9177620378330358e-05, "loss": 0.4935, "step": 7002 }, { "epoch": 0.13002974124007233, "grad_norm": 0.23880182206630707, "learning_rate": 1.9177157066565903e-05, "loss": 0.2483, "step": 7004 }, { "epoch": 0.13006687137749096, "grad_norm": 0.3715354800224304, "learning_rate": 1.917669362992782e-05, "loss": 0.2202, "step": 7006 }, { "epoch": 0.1301040015149096, "grad_norm": 0.3140673339366913, "learning_rate": 1.9176230068422434e-05, "loss": 0.4123, "step": 7008 }, { "epoch": 0.13014113165232824, "grad_norm": 0.28320327401161194, "learning_rate": 1.9175766382056034e-05, "loss": 0.2837, "step": 7010 }, { "epoch": 0.1301782617897469, "grad_norm": 0.6192690134048462, "learning_rate": 1.9175302570834942e-05, "loss": 0.4265, "step": 7012 }, { "epoch": 0.13021539192716552, "grad_norm": 0.49688848853111267, "learning_rate": 1.9174838634765466e-05, "loss": 0.2787, "step": 7014 }, { "epoch": 0.13025252206458415, "grad_norm": 0.3195688724517822, "learning_rate": 1.9174374573853915e-05, "loss": 0.2544, "step": 7016 }, { "epoch": 0.1302896522020028, "grad_norm": 0.3402513861656189, "learning_rate": 1.917391038810661e-05, "loss": 0.402, "step": 7018 }, { "epoch": 0.13032678233942144, "grad_norm": 0.31732264161109924, "learning_rate": 1.9173446077529862e-05, "loss": 0.1558, "step": 7020 }, { "epoch": 0.13036391247684007, "grad_norm": 0.39950212836265564, "learning_rate": 1.917298164212999e-05, "loss": 0.3231, "step": 7022 }, { "epoch": 0.13040104261425872, "grad_norm": 0.4340563118457794, "learning_rate": 1.9172517081913317e-05, "loss": 0.3182, "step": 7024 }, { "epoch": 0.13043817275167735, "grad_norm": 0.33968251943588257, "learning_rate": 1.917205239688616e-05, "loss": 0.1995, "step": 7026 }, { "epoch": 0.13047530288909598, "grad_norm": 0.5604590773582458, "learning_rate": 1.917158758705484e-05, "loss": 0.246, "step": 7028 }, { "epoch": 0.13051243302651463, "grad_norm": 0.29320746660232544, "learning_rate": 1.9171122652425688e-05, "loss": 0.4672, "step": 7030 }, { "epoch": 0.13054956316393326, "grad_norm": 0.3579826354980469, "learning_rate": 1.9170657593005027e-05, "loss": 0.4568, "step": 7032 }, { "epoch": 0.13058669330135192, "grad_norm": 0.2845290005207062, "learning_rate": 1.9170192408799184e-05, "loss": 0.213, "step": 7034 }, { "epoch": 0.13062382343877055, "grad_norm": 0.4265182912349701, "learning_rate": 1.9169727099814492e-05, "loss": 0.3631, "step": 7036 }, { "epoch": 0.13066095357618918, "grad_norm": 0.37000492215156555, "learning_rate": 1.9169261666057283e-05, "loss": 0.297, "step": 7038 }, { "epoch": 0.13069808371360783, "grad_norm": 0.4555400013923645, "learning_rate": 1.9168796107533883e-05, "loss": 0.5511, "step": 7040 }, { "epoch": 0.13073521385102646, "grad_norm": 0.3653505742549896, "learning_rate": 1.916833042425063e-05, "loss": 0.1878, "step": 7042 }, { "epoch": 0.1307723439884451, "grad_norm": 0.24913451075553894, "learning_rate": 1.916786461621387e-05, "loss": 0.3973, "step": 7044 }, { "epoch": 0.13080947412586375, "grad_norm": 0.3549131751060486, "learning_rate": 1.916739868342993e-05, "loss": 0.275, "step": 7046 }, { "epoch": 0.13084660426328237, "grad_norm": 0.29103508591651917, "learning_rate": 1.9166932625905152e-05, "loss": 0.3745, "step": 7048 }, { "epoch": 0.13088373440070103, "grad_norm": 0.376519113779068, "learning_rate": 1.916646644364588e-05, "loss": 0.3088, "step": 7050 }, { "epoch": 0.13092086453811966, "grad_norm": 0.5899032354354858, "learning_rate": 1.916600013665846e-05, "loss": 0.3392, "step": 7052 }, { "epoch": 0.1309579946755383, "grad_norm": 0.48108527064323425, "learning_rate": 1.916553370494923e-05, "loss": 0.2358, "step": 7054 }, { "epoch": 0.13099512481295694, "grad_norm": 0.4275568127632141, "learning_rate": 1.9165067148524537e-05, "loss": 0.1322, "step": 7056 }, { "epoch": 0.13103225495037557, "grad_norm": 0.36434200406074524, "learning_rate": 1.9164600467390733e-05, "loss": 0.2768, "step": 7058 }, { "epoch": 0.1310693850877942, "grad_norm": 0.34344184398651123, "learning_rate": 1.9164133661554174e-05, "loss": 0.5205, "step": 7060 }, { "epoch": 0.13110651522521286, "grad_norm": 0.543472170829773, "learning_rate": 1.9163666731021202e-05, "loss": 0.2428, "step": 7062 }, { "epoch": 0.13114364536263148, "grad_norm": 0.324930876493454, "learning_rate": 1.9163199675798173e-05, "loss": 0.4177, "step": 7064 }, { "epoch": 0.1311807755000501, "grad_norm": 0.2902285158634186, "learning_rate": 1.9162732495891447e-05, "loss": 0.309, "step": 7066 }, { "epoch": 0.13121790563746877, "grad_norm": 0.34280896186828613, "learning_rate": 1.9162265191307377e-05, "loss": 0.2668, "step": 7068 }, { "epoch": 0.1312550357748874, "grad_norm": 0.2957268953323364, "learning_rate": 1.916179776205232e-05, "loss": 0.3454, "step": 7070 }, { "epoch": 0.13129216591230605, "grad_norm": 0.4251037836074829, "learning_rate": 1.9161330208132635e-05, "loss": 0.4571, "step": 7072 }, { "epoch": 0.13132929604972468, "grad_norm": 0.3598605692386627, "learning_rate": 1.9160862529554693e-05, "loss": 0.4459, "step": 7074 }, { "epoch": 0.1313664261871433, "grad_norm": 0.29164984822273254, "learning_rate": 1.9160394726324847e-05, "loss": 0.1834, "step": 7076 }, { "epoch": 0.13140355632456197, "grad_norm": 0.3958226144313812, "learning_rate": 1.9159926798449472e-05, "loss": 0.3798, "step": 7078 }, { "epoch": 0.1314406864619806, "grad_norm": 0.359245240688324, "learning_rate": 1.9159458745934927e-05, "loss": 0.3254, "step": 7080 }, { "epoch": 0.13147781659939922, "grad_norm": 0.3832409381866455, "learning_rate": 1.9158990568787588e-05, "loss": 0.4865, "step": 7082 }, { "epoch": 0.13151494673681788, "grad_norm": 0.3965170085430145, "learning_rate": 1.9158522267013818e-05, "loss": 0.3997, "step": 7084 }, { "epoch": 0.1315520768742365, "grad_norm": 0.3076322078704834, "learning_rate": 1.9158053840619993e-05, "loss": 0.4112, "step": 7086 }, { "epoch": 0.13158920701165516, "grad_norm": 0.27528607845306396, "learning_rate": 1.915758528961249e-05, "loss": 0.3343, "step": 7088 }, { "epoch": 0.1316263371490738, "grad_norm": 0.3214447498321533, "learning_rate": 1.915711661399768e-05, "loss": 0.3168, "step": 7090 }, { "epoch": 0.13166346728649242, "grad_norm": 0.3760682940483093, "learning_rate": 1.9156647813781938e-05, "loss": 0.3632, "step": 7092 }, { "epoch": 0.13170059742391108, "grad_norm": 0.31415367126464844, "learning_rate": 1.915617888897165e-05, "loss": 0.3564, "step": 7094 }, { "epoch": 0.1317377275613297, "grad_norm": 0.34857919812202454, "learning_rate": 1.9155709839573194e-05, "loss": 0.3106, "step": 7096 }, { "epoch": 0.13177485769874833, "grad_norm": 0.29351750016212463, "learning_rate": 1.915524066559295e-05, "loss": 0.3813, "step": 7098 }, { "epoch": 0.131811987836167, "grad_norm": 0.39932531118392944, "learning_rate": 1.9154771367037305e-05, "loss": 0.556, "step": 7100 }, { "epoch": 0.13184911797358562, "grad_norm": 0.5048282146453857, "learning_rate": 1.915430194391264e-05, "loss": 0.2324, "step": 7102 }, { "epoch": 0.13188624811100425, "grad_norm": 0.3569514751434326, "learning_rate": 1.915383239622535e-05, "loss": 0.7564, "step": 7104 }, { "epoch": 0.1319233782484229, "grad_norm": 0.4786880910396576, "learning_rate": 1.9153362723981816e-05, "loss": 0.3076, "step": 7106 }, { "epoch": 0.13196050838584153, "grad_norm": 0.3879612386226654, "learning_rate": 1.9152892927188436e-05, "loss": 0.2427, "step": 7108 }, { "epoch": 0.13199763852326019, "grad_norm": 0.21245542168617249, "learning_rate": 1.9152423005851598e-05, "loss": 0.3149, "step": 7110 }, { "epoch": 0.13203476866067881, "grad_norm": 0.41764116287231445, "learning_rate": 1.9151952959977697e-05, "loss": 0.3248, "step": 7112 }, { "epoch": 0.13207189879809744, "grad_norm": 0.3896578252315521, "learning_rate": 1.915148278957313e-05, "loss": 0.2454, "step": 7114 }, { "epoch": 0.1321090289355161, "grad_norm": 0.559226930141449, "learning_rate": 1.9151012494644296e-05, "loss": 0.36, "step": 7116 }, { "epoch": 0.13214615907293473, "grad_norm": 0.32480138540267944, "learning_rate": 1.915054207519759e-05, "loss": 0.3746, "step": 7118 }, { "epoch": 0.13218328921035336, "grad_norm": 0.26667365431785583, "learning_rate": 1.915007153123942e-05, "loss": 0.2805, "step": 7120 }, { "epoch": 0.132220419347772, "grad_norm": 0.5144453048706055, "learning_rate": 1.914960086277618e-05, "loss": 0.3858, "step": 7122 }, { "epoch": 0.13225754948519064, "grad_norm": 0.2771070897579193, "learning_rate": 1.9149130069814276e-05, "loss": 0.4166, "step": 7124 }, { "epoch": 0.1322946796226093, "grad_norm": 0.33683663606643677, "learning_rate": 1.9148659152360122e-05, "loss": 0.4772, "step": 7126 }, { "epoch": 0.13233180976002792, "grad_norm": 0.3622194230556488, "learning_rate": 1.9148188110420118e-05, "loss": 0.2595, "step": 7128 }, { "epoch": 0.13236893989744655, "grad_norm": 0.3051789700984955, "learning_rate": 1.9147716944000673e-05, "loss": 0.3122, "step": 7130 }, { "epoch": 0.1324060700348652, "grad_norm": 0.35281604528427124, "learning_rate": 1.9147245653108205e-05, "loss": 0.5425, "step": 7132 }, { "epoch": 0.13244320017228384, "grad_norm": 0.4206068813800812, "learning_rate": 1.914677423774912e-05, "loss": 0.5501, "step": 7134 }, { "epoch": 0.13248033030970247, "grad_norm": 0.3616102635860443, "learning_rate": 1.9146302697929838e-05, "loss": 0.3116, "step": 7136 }, { "epoch": 0.13251746044712112, "grad_norm": 0.263750821352005, "learning_rate": 1.914583103365677e-05, "loss": 0.1852, "step": 7138 }, { "epoch": 0.13255459058453975, "grad_norm": 0.6301262378692627, "learning_rate": 1.914535924493634e-05, "loss": 0.2776, "step": 7140 }, { "epoch": 0.13259172072195838, "grad_norm": 0.40337684750556946, "learning_rate": 1.9144887331774963e-05, "loss": 0.544, "step": 7142 }, { "epoch": 0.13262885085937703, "grad_norm": 0.3808266520500183, "learning_rate": 1.914441529417906e-05, "loss": 0.3192, "step": 7144 }, { "epoch": 0.13266598099679566, "grad_norm": 0.40872445702552795, "learning_rate": 1.9143943132155055e-05, "loss": 0.2908, "step": 7146 }, { "epoch": 0.13270311113421432, "grad_norm": 0.49685102701187134, "learning_rate": 1.9143470845709375e-05, "loss": 0.5106, "step": 7148 }, { "epoch": 0.13274024127163295, "grad_norm": 0.3251996338367462, "learning_rate": 1.9142998434848447e-05, "loss": 0.2078, "step": 7150 }, { "epoch": 0.13277737140905158, "grad_norm": 0.2899598777294159, "learning_rate": 1.9142525899578694e-05, "loss": 0.3076, "step": 7152 }, { "epoch": 0.13281450154647023, "grad_norm": 0.39834997057914734, "learning_rate": 1.9142053239906547e-05, "loss": 0.2989, "step": 7154 }, { "epoch": 0.13285163168388886, "grad_norm": 0.4305490255355835, "learning_rate": 1.914158045583844e-05, "loss": 0.4965, "step": 7156 }, { "epoch": 0.1328887618213075, "grad_norm": 0.3186148703098297, "learning_rate": 1.9141107547380806e-05, "loss": 0.2909, "step": 7158 }, { "epoch": 0.13292589195872614, "grad_norm": 0.28367260098457336, "learning_rate": 1.9140634514540078e-05, "loss": 0.321, "step": 7160 }, { "epoch": 0.13296302209614477, "grad_norm": 0.3300277590751648, "learning_rate": 1.9140161357322695e-05, "loss": 0.189, "step": 7162 }, { "epoch": 0.13300015223356343, "grad_norm": 0.364374577999115, "learning_rate": 1.9139688075735092e-05, "loss": 0.0786, "step": 7164 }, { "epoch": 0.13303728237098206, "grad_norm": 0.32289332151412964, "learning_rate": 1.9139214669783713e-05, "loss": 0.2853, "step": 7166 }, { "epoch": 0.13307441250840069, "grad_norm": 0.4499182403087616, "learning_rate": 1.9138741139474995e-05, "loss": 0.3471, "step": 7168 }, { "epoch": 0.13311154264581934, "grad_norm": 0.29096126556396484, "learning_rate": 1.9138267484815387e-05, "loss": 0.4668, "step": 7170 }, { "epoch": 0.13314867278323797, "grad_norm": 0.5043051242828369, "learning_rate": 1.913779370581133e-05, "loss": 0.2469, "step": 7172 }, { "epoch": 0.1331858029206566, "grad_norm": 0.3818332552909851, "learning_rate": 1.913731980246927e-05, "loss": 0.3246, "step": 7174 }, { "epoch": 0.13322293305807525, "grad_norm": 0.22566120326519012, "learning_rate": 1.913684577479566e-05, "loss": 0.2937, "step": 7176 }, { "epoch": 0.13326006319549388, "grad_norm": 0.45355477929115295, "learning_rate": 1.9136371622796942e-05, "loss": 0.2392, "step": 7178 }, { "epoch": 0.1332971933329125, "grad_norm": 0.3846004605293274, "learning_rate": 1.913589734647958e-05, "loss": 0.3306, "step": 7180 }, { "epoch": 0.13333432347033117, "grad_norm": 0.37682563066482544, "learning_rate": 1.913542294585001e-05, "loss": 0.4291, "step": 7182 }, { "epoch": 0.1333714536077498, "grad_norm": 0.46033042669296265, "learning_rate": 1.9134948420914704e-05, "loss": 0.2161, "step": 7184 }, { "epoch": 0.13340858374516845, "grad_norm": 0.3213376998901367, "learning_rate": 1.9134473771680114e-05, "loss": 0.4398, "step": 7186 }, { "epoch": 0.13344571388258708, "grad_norm": 0.3502066135406494, "learning_rate": 1.9133998998152693e-05, "loss": 0.2088, "step": 7188 }, { "epoch": 0.1334828440200057, "grad_norm": 0.41070929169654846, "learning_rate": 1.9133524100338908e-05, "loss": 0.4393, "step": 7190 }, { "epoch": 0.13351997415742436, "grad_norm": 0.38879847526550293, "learning_rate": 1.9133049078245216e-05, "loss": 0.1837, "step": 7192 }, { "epoch": 0.133557104294843, "grad_norm": 0.3301045298576355, "learning_rate": 1.9132573931878083e-05, "loss": 0.439, "step": 7194 }, { "epoch": 0.13359423443226162, "grad_norm": 0.27724719047546387, "learning_rate": 1.9132098661243975e-05, "loss": 0.2628, "step": 7196 }, { "epoch": 0.13363136456968028, "grad_norm": 0.41109728813171387, "learning_rate": 1.9131623266349356e-05, "loss": 0.3366, "step": 7198 }, { "epoch": 0.1336684947070989, "grad_norm": 0.3013942837715149, "learning_rate": 1.9131147747200698e-05, "loss": 0.2323, "step": 7200 }, { "epoch": 0.13370562484451756, "grad_norm": 0.2770337164402008, "learning_rate": 1.913067210380447e-05, "loss": 0.4331, "step": 7202 }, { "epoch": 0.1337427549819362, "grad_norm": 0.34681665897369385, "learning_rate": 1.9130196336167147e-05, "loss": 0.3626, "step": 7204 }, { "epoch": 0.13377988511935482, "grad_norm": 0.3675478994846344, "learning_rate": 1.9129720444295197e-05, "loss": 0.383, "step": 7206 }, { "epoch": 0.13381701525677347, "grad_norm": 0.23802945017814636, "learning_rate": 1.9129244428195097e-05, "loss": 0.2759, "step": 7208 }, { "epoch": 0.1338541453941921, "grad_norm": 0.4891115427017212, "learning_rate": 1.912876828787333e-05, "loss": 0.2184, "step": 7210 }, { "epoch": 0.13389127553161073, "grad_norm": 0.3260249197483063, "learning_rate": 1.9128292023336367e-05, "loss": 0.4242, "step": 7212 }, { "epoch": 0.1339284056690294, "grad_norm": 0.5251856446266174, "learning_rate": 1.9127815634590692e-05, "loss": 0.3437, "step": 7214 }, { "epoch": 0.13396553580644802, "grad_norm": 0.2991001605987549, "learning_rate": 1.9127339121642787e-05, "loss": 0.4452, "step": 7216 }, { "epoch": 0.13400266594386664, "grad_norm": 0.3695489168167114, "learning_rate": 1.9126862484499137e-05, "loss": 0.4247, "step": 7218 }, { "epoch": 0.1340397960812853, "grad_norm": 0.2977232038974762, "learning_rate": 1.9126385723166226e-05, "loss": 0.656, "step": 7220 }, { "epoch": 0.13407692621870393, "grad_norm": 0.5024280548095703, "learning_rate": 1.9125908837650544e-05, "loss": 0.2328, "step": 7222 }, { "epoch": 0.13411405635612259, "grad_norm": 0.4551043212413788, "learning_rate": 1.9125431827958575e-05, "loss": 0.2692, "step": 7224 }, { "epoch": 0.1341511864935412, "grad_norm": 0.285351037979126, "learning_rate": 1.9124954694096817e-05, "loss": 0.2059, "step": 7226 }, { "epoch": 0.13418831663095984, "grad_norm": 0.2855362892150879, "learning_rate": 1.9124477436071755e-05, "loss": 0.4807, "step": 7228 }, { "epoch": 0.1342254467683785, "grad_norm": 0.5678279399871826, "learning_rate": 1.9124000053889885e-05, "loss": 0.335, "step": 7230 }, { "epoch": 0.13426257690579713, "grad_norm": 0.39154475927352905, "learning_rate": 1.9123522547557707e-05, "loss": 0.3555, "step": 7232 }, { "epoch": 0.13429970704321575, "grad_norm": 0.31161314249038696, "learning_rate": 1.9123044917081715e-05, "loss": 0.358, "step": 7234 }, { "epoch": 0.1343368371806344, "grad_norm": 0.35746318101882935, "learning_rate": 1.9122567162468406e-05, "loss": 0.2357, "step": 7236 }, { "epoch": 0.13437396731805304, "grad_norm": 0.36686453223228455, "learning_rate": 1.9122089283724285e-05, "loss": 0.2386, "step": 7238 }, { "epoch": 0.1344110974554717, "grad_norm": 0.46335890889167786, "learning_rate": 1.912161128085585e-05, "loss": 0.2572, "step": 7240 }, { "epoch": 0.13444822759289032, "grad_norm": 0.29942893981933594, "learning_rate": 1.912113315386961e-05, "loss": 0.2825, "step": 7242 }, { "epoch": 0.13448535773030895, "grad_norm": 0.5759248733520508, "learning_rate": 1.9120654902772068e-05, "loss": 0.3768, "step": 7244 }, { "epoch": 0.1345224878677276, "grad_norm": 0.5424313545227051, "learning_rate": 1.9120176527569733e-05, "loss": 0.3264, "step": 7246 }, { "epoch": 0.13455961800514624, "grad_norm": 0.35377129912376404, "learning_rate": 1.9119698028269115e-05, "loss": 0.4773, "step": 7248 }, { "epoch": 0.13459674814256486, "grad_norm": 0.2863421142101288, "learning_rate": 1.9119219404876718e-05, "loss": 0.1956, "step": 7250 }, { "epoch": 0.13463387827998352, "grad_norm": 0.3096824884414673, "learning_rate": 1.9118740657399065e-05, "loss": 0.2662, "step": 7252 }, { "epoch": 0.13467100841740215, "grad_norm": 0.391652375459671, "learning_rate": 1.9118261785842667e-05, "loss": 0.2517, "step": 7254 }, { "epoch": 0.13470813855482078, "grad_norm": 0.3129667043685913, "learning_rate": 1.9117782790214034e-05, "loss": 0.3747, "step": 7256 }, { "epoch": 0.13474526869223943, "grad_norm": 0.3863779306411743, "learning_rate": 1.911730367051969e-05, "loss": 0.2615, "step": 7258 }, { "epoch": 0.13478239882965806, "grad_norm": 0.30290815234184265, "learning_rate": 1.911682442676615e-05, "loss": 0.3589, "step": 7260 }, { "epoch": 0.13481952896707672, "grad_norm": 0.41937631368637085, "learning_rate": 1.9116345058959942e-05, "loss": 0.32, "step": 7262 }, { "epoch": 0.13485665910449535, "grad_norm": 0.48997122049331665, "learning_rate": 1.9115865567107585e-05, "loss": 0.2641, "step": 7264 }, { "epoch": 0.13489378924191398, "grad_norm": 0.33411481976509094, "learning_rate": 1.91153859512156e-05, "loss": 0.4147, "step": 7266 }, { "epoch": 0.13493091937933263, "grad_norm": 0.3630472719669342, "learning_rate": 1.9114906211290514e-05, "loss": 0.4086, "step": 7268 }, { "epoch": 0.13496804951675126, "grad_norm": 0.30900058150291443, "learning_rate": 1.911442634733886e-05, "loss": 0.2582, "step": 7270 }, { "epoch": 0.1350051796541699, "grad_norm": 0.4592708349227905, "learning_rate": 1.911394635936716e-05, "loss": 0.4602, "step": 7272 }, { "epoch": 0.13504230979158854, "grad_norm": 0.26999205350875854, "learning_rate": 1.9113466247381955e-05, "loss": 0.3738, "step": 7274 }, { "epoch": 0.13507943992900717, "grad_norm": 0.2519771456718445, "learning_rate": 1.911298601138977e-05, "loss": 0.3134, "step": 7276 }, { "epoch": 0.13511657006642583, "grad_norm": 0.31244611740112305, "learning_rate": 1.9112505651397143e-05, "loss": 0.4766, "step": 7278 }, { "epoch": 0.13515370020384446, "grad_norm": 0.3162122666835785, "learning_rate": 1.9112025167410605e-05, "loss": 0.3996, "step": 7280 }, { "epoch": 0.13519083034126309, "grad_norm": 0.3427612781524658, "learning_rate": 1.91115445594367e-05, "loss": 0.4997, "step": 7282 }, { "epoch": 0.13522796047868174, "grad_norm": 0.30499011278152466, "learning_rate": 1.911106382748197e-05, "loss": 0.204, "step": 7284 }, { "epoch": 0.13526509061610037, "grad_norm": 0.33350473642349243, "learning_rate": 1.911058297155295e-05, "loss": 0.2868, "step": 7286 }, { "epoch": 0.135302220753519, "grad_norm": 0.3825935125350952, "learning_rate": 1.9110101991656184e-05, "loss": 0.3556, "step": 7288 }, { "epoch": 0.13533935089093765, "grad_norm": 0.38847166299819946, "learning_rate": 1.910962088779822e-05, "loss": 0.2625, "step": 7290 }, { "epoch": 0.13537648102835628, "grad_norm": 0.3060508668422699, "learning_rate": 1.91091396599856e-05, "loss": 0.1712, "step": 7292 }, { "epoch": 0.1354136111657749, "grad_norm": 0.3461926281452179, "learning_rate": 1.910865830822487e-05, "loss": 0.396, "step": 7294 }, { "epoch": 0.13545074130319357, "grad_norm": 0.2541234493255615, "learning_rate": 1.910817683252259e-05, "loss": 0.1824, "step": 7296 }, { "epoch": 0.1354878714406122, "grad_norm": 0.3683461844921112, "learning_rate": 1.9107695232885305e-05, "loss": 0.3257, "step": 7298 }, { "epoch": 0.13552500157803085, "grad_norm": 0.29616573452949524, "learning_rate": 1.9107213509319567e-05, "loss": 0.1357, "step": 7300 }, { "epoch": 0.13556213171544948, "grad_norm": 0.23538632690906525, "learning_rate": 1.9106731661831934e-05, "loss": 0.3382, "step": 7302 }, { "epoch": 0.1355992618528681, "grad_norm": 0.33959728479385376, "learning_rate": 1.910624969042896e-05, "loss": 0.2681, "step": 7304 }, { "epoch": 0.13563639199028676, "grad_norm": 0.5154351592063904, "learning_rate": 1.9105767595117203e-05, "loss": 0.3966, "step": 7306 }, { "epoch": 0.1356735221277054, "grad_norm": 0.3378819227218628, "learning_rate": 1.9105285375903224e-05, "loss": 0.1423, "step": 7308 }, { "epoch": 0.13571065226512402, "grad_norm": 0.4130324125289917, "learning_rate": 1.9104803032793587e-05, "loss": 0.6339, "step": 7310 }, { "epoch": 0.13574778240254268, "grad_norm": 0.2770974934101105, "learning_rate": 1.910432056579485e-05, "loss": 0.2532, "step": 7312 }, { "epoch": 0.1357849125399613, "grad_norm": 0.36908838152885437, "learning_rate": 1.9103837974913583e-05, "loss": 0.3783, "step": 7314 }, { "epoch": 0.13582204267737996, "grad_norm": 0.2566445767879486, "learning_rate": 1.9103355260156352e-05, "loss": 0.3144, "step": 7316 }, { "epoch": 0.1358591728147986, "grad_norm": 0.34032490849494934, "learning_rate": 1.910287242152972e-05, "loss": 0.2979, "step": 7318 }, { "epoch": 0.13589630295221722, "grad_norm": 0.25391313433647156, "learning_rate": 1.910238945904026e-05, "loss": 0.4646, "step": 7320 }, { "epoch": 0.13593343308963587, "grad_norm": 0.25708311796188354, "learning_rate": 1.9101906372694548e-05, "loss": 0.2783, "step": 7322 }, { "epoch": 0.1359705632270545, "grad_norm": 0.27837061882019043, "learning_rate": 1.910142316249915e-05, "loss": 0.2793, "step": 7324 }, { "epoch": 0.13600769336447313, "grad_norm": 0.29085224866867065, "learning_rate": 1.9100939828460644e-05, "loss": 0.3763, "step": 7326 }, { "epoch": 0.1360448235018918, "grad_norm": 0.38636770844459534, "learning_rate": 1.910045637058561e-05, "loss": 0.4149, "step": 7328 }, { "epoch": 0.13608195363931042, "grad_norm": 0.3627934753894806, "learning_rate": 1.9099972788880622e-05, "loss": 0.2178, "step": 7330 }, { "epoch": 0.13611908377672904, "grad_norm": 0.6746304631233215, "learning_rate": 1.9099489083352266e-05, "loss": 0.4325, "step": 7332 }, { "epoch": 0.1361562139141477, "grad_norm": 0.5423709154129028, "learning_rate": 1.9099005254007117e-05, "loss": 0.6612, "step": 7334 }, { "epoch": 0.13619334405156633, "grad_norm": 0.32637229561805725, "learning_rate": 1.909852130085176e-05, "loss": 0.37, "step": 7336 }, { "epoch": 0.13623047418898498, "grad_norm": 0.35113540291786194, "learning_rate": 1.9098037223892784e-05, "loss": 0.2181, "step": 7338 }, { "epoch": 0.1362676043264036, "grad_norm": 0.3909337818622589, "learning_rate": 1.909755302313677e-05, "loss": 0.2177, "step": 7340 }, { "epoch": 0.13630473446382224, "grad_norm": 0.3278919458389282, "learning_rate": 1.909706869859031e-05, "loss": 0.1888, "step": 7342 }, { "epoch": 0.1363418646012409, "grad_norm": 0.339663565158844, "learning_rate": 1.9096584250259996e-05, "loss": 0.3061, "step": 7344 }, { "epoch": 0.13637899473865953, "grad_norm": 0.3417191505432129, "learning_rate": 1.909609967815242e-05, "loss": 0.2961, "step": 7346 }, { "epoch": 0.13641612487607815, "grad_norm": 0.4168943464756012, "learning_rate": 1.909561498227417e-05, "loss": 0.3892, "step": 7348 }, { "epoch": 0.1364532550134968, "grad_norm": 0.45449140667915344, "learning_rate": 1.9095130162631842e-05, "loss": 0.5291, "step": 7350 }, { "epoch": 0.13649038515091544, "grad_norm": 0.7133435010910034, "learning_rate": 1.909464521923204e-05, "loss": 0.3539, "step": 7352 }, { "epoch": 0.1365275152883341, "grad_norm": 0.36785033345222473, "learning_rate": 1.9094160152081355e-05, "loss": 0.2265, "step": 7354 }, { "epoch": 0.13656464542575272, "grad_norm": 0.44135814905166626, "learning_rate": 1.9093674961186394e-05, "loss": 0.397, "step": 7356 }, { "epoch": 0.13660177556317135, "grad_norm": 0.34029603004455566, "learning_rate": 1.9093189646553752e-05, "loss": 0.2474, "step": 7358 }, { "epoch": 0.13663890570059, "grad_norm": 0.5090249180793762, "learning_rate": 1.9092704208190038e-05, "loss": 0.3186, "step": 7360 }, { "epoch": 0.13667603583800864, "grad_norm": 0.24662551283836365, "learning_rate": 1.9092218646101854e-05, "loss": 0.2738, "step": 7362 }, { "epoch": 0.13671316597542726, "grad_norm": 0.3393016755580902, "learning_rate": 1.909173296029581e-05, "loss": 0.5807, "step": 7364 }, { "epoch": 0.13675029611284592, "grad_norm": 0.36386582255363464, "learning_rate": 1.9091247150778515e-05, "loss": 0.2103, "step": 7366 }, { "epoch": 0.13678742625026455, "grad_norm": 0.3460341691970825, "learning_rate": 1.9090761217556574e-05, "loss": 0.346, "step": 7368 }, { "epoch": 0.13682455638768318, "grad_norm": 0.2915242910385132, "learning_rate": 1.9090275160636608e-05, "loss": 0.2753, "step": 7370 }, { "epoch": 0.13686168652510183, "grad_norm": 0.3440723717212677, "learning_rate": 1.908978898002522e-05, "loss": 0.3107, "step": 7372 }, { "epoch": 0.13689881666252046, "grad_norm": 0.29323211312294006, "learning_rate": 1.9089302675729034e-05, "loss": 0.464, "step": 7374 }, { "epoch": 0.13693594679993912, "grad_norm": 0.3394409418106079, "learning_rate": 1.9088816247754663e-05, "loss": 0.1878, "step": 7376 }, { "epoch": 0.13697307693735775, "grad_norm": 0.4465654194355011, "learning_rate": 1.908832969610873e-05, "loss": 0.3614, "step": 7378 }, { "epoch": 0.13701020707477637, "grad_norm": 0.41250550746917725, "learning_rate": 1.908784302079785e-05, "loss": 0.5246, "step": 7380 }, { "epoch": 0.13704733721219503, "grad_norm": 0.3276500105857849, "learning_rate": 1.9087356221828645e-05, "loss": 0.2576, "step": 7382 }, { "epoch": 0.13708446734961366, "grad_norm": 0.23176829516887665, "learning_rate": 1.9086869299207745e-05, "loss": 0.2113, "step": 7384 }, { "epoch": 0.1371215974870323, "grad_norm": 0.48694881796836853, "learning_rate": 1.908638225294177e-05, "loss": 0.3808, "step": 7386 }, { "epoch": 0.13715872762445094, "grad_norm": 0.3344497084617615, "learning_rate": 1.9085895083037353e-05, "loss": 0.4387, "step": 7388 }, { "epoch": 0.13719585776186957, "grad_norm": 0.42727771401405334, "learning_rate": 1.9085407789501115e-05, "loss": 0.4894, "step": 7390 }, { "epoch": 0.13723298789928823, "grad_norm": 0.42960286140441895, "learning_rate": 1.9084920372339697e-05, "loss": 0.51, "step": 7392 }, { "epoch": 0.13727011803670686, "grad_norm": 0.3123345971107483, "learning_rate": 1.908443283155972e-05, "loss": 0.2165, "step": 7394 }, { "epoch": 0.13730724817412548, "grad_norm": 0.37721654772758484, "learning_rate": 1.9083945167167822e-05, "loss": 0.4204, "step": 7396 }, { "epoch": 0.13734437831154414, "grad_norm": 0.4325771927833557, "learning_rate": 1.9083457379170647e-05, "loss": 0.3588, "step": 7398 }, { "epoch": 0.13738150844896277, "grad_norm": 0.45864030718803406, "learning_rate": 1.908296946757482e-05, "loss": 0.4754, "step": 7400 }, { "epoch": 0.1374186385863814, "grad_norm": 0.29926881194114685, "learning_rate": 1.9082481432386982e-05, "loss": 0.2923, "step": 7402 }, { "epoch": 0.13745576872380005, "grad_norm": 0.4547715485095978, "learning_rate": 1.9081993273613782e-05, "loss": 0.3827, "step": 7404 }, { "epoch": 0.13749289886121868, "grad_norm": 0.2662661075592041, "learning_rate": 1.908150499126186e-05, "loss": 0.3679, "step": 7406 }, { "epoch": 0.1375300289986373, "grad_norm": 0.2919824719429016, "learning_rate": 1.9081016585337852e-05, "loss": 0.2957, "step": 7408 }, { "epoch": 0.13756715913605597, "grad_norm": 0.3491005301475525, "learning_rate": 1.9080528055848407e-05, "loss": 0.3699, "step": 7410 }, { "epoch": 0.1376042892734746, "grad_norm": 0.407593309879303, "learning_rate": 1.908003940280018e-05, "loss": 0.2733, "step": 7412 }, { "epoch": 0.13764141941089325, "grad_norm": 0.3127366602420807, "learning_rate": 1.9079550626199812e-05, "loss": 0.2969, "step": 7414 }, { "epoch": 0.13767854954831188, "grad_norm": 0.27980494499206543, "learning_rate": 1.9079061726053957e-05, "loss": 0.1842, "step": 7416 }, { "epoch": 0.1377156796857305, "grad_norm": 0.26316460967063904, "learning_rate": 1.9078572702369268e-05, "loss": 0.1766, "step": 7418 }, { "epoch": 0.13775280982314916, "grad_norm": 0.4010971784591675, "learning_rate": 1.9078083555152397e-05, "loss": 0.3542, "step": 7420 }, { "epoch": 0.1377899399605678, "grad_norm": 0.38183778524398804, "learning_rate": 1.9077594284410003e-05, "loss": 0.3271, "step": 7422 }, { "epoch": 0.13782707009798642, "grad_norm": 0.32891348004341125, "learning_rate": 1.9077104890148735e-05, "loss": 0.2374, "step": 7424 }, { "epoch": 0.13786420023540508, "grad_norm": 0.2728039622306824, "learning_rate": 1.907661537237526e-05, "loss": 0.3228, "step": 7426 }, { "epoch": 0.1379013303728237, "grad_norm": 0.3456726372241974, "learning_rate": 1.907612573109624e-05, "loss": 0.316, "step": 7428 }, { "epoch": 0.13793846051024236, "grad_norm": 0.335875540971756, "learning_rate": 1.9075635966318337e-05, "loss": 0.2688, "step": 7430 }, { "epoch": 0.137975590647661, "grad_norm": 0.3106536865234375, "learning_rate": 1.9075146078048213e-05, "loss": 0.3185, "step": 7432 }, { "epoch": 0.13801272078507962, "grad_norm": 0.3902144134044647, "learning_rate": 1.907465606629253e-05, "loss": 0.345, "step": 7434 }, { "epoch": 0.13804985092249827, "grad_norm": 0.29771289229393005, "learning_rate": 1.907416593105796e-05, "loss": 0.257, "step": 7436 }, { "epoch": 0.1380869810599169, "grad_norm": 0.2500526010990143, "learning_rate": 1.9073675672351174e-05, "loss": 0.2903, "step": 7438 }, { "epoch": 0.13812411119733553, "grad_norm": 0.3253353238105774, "learning_rate": 1.9073185290178845e-05, "loss": 0.2841, "step": 7440 }, { "epoch": 0.1381612413347542, "grad_norm": 0.3276876211166382, "learning_rate": 1.9072694784547633e-05, "loss": 0.3302, "step": 7442 }, { "epoch": 0.13819837147217282, "grad_norm": 0.5529167056083679, "learning_rate": 1.907220415546423e-05, "loss": 0.2302, "step": 7444 }, { "epoch": 0.13823550160959144, "grad_norm": 0.46558377146720886, "learning_rate": 1.9071713402935295e-05, "loss": 0.3398, "step": 7446 }, { "epoch": 0.1382726317470101, "grad_norm": 0.4772559404373169, "learning_rate": 1.9071222526967516e-05, "loss": 0.2164, "step": 7448 }, { "epoch": 0.13830976188442873, "grad_norm": 0.3378334641456604, "learning_rate": 1.907073152756757e-05, "loss": 0.3474, "step": 7450 }, { "epoch": 0.13834689202184738, "grad_norm": 0.5119785070419312, "learning_rate": 1.9070240404742136e-05, "loss": 0.4383, "step": 7452 }, { "epoch": 0.138384022159266, "grad_norm": 0.3765888810157776, "learning_rate": 1.9069749158497905e-05, "loss": 0.353, "step": 7454 }, { "epoch": 0.13842115229668464, "grad_norm": 0.40379026532173157, "learning_rate": 1.906925778884155e-05, "loss": 0.3834, "step": 7456 }, { "epoch": 0.1384582824341033, "grad_norm": 0.4134158790111542, "learning_rate": 1.906876629577976e-05, "loss": 0.1882, "step": 7458 }, { "epoch": 0.13849541257152193, "grad_norm": 0.29162201285362244, "learning_rate": 1.9068274679319228e-05, "loss": 0.2168, "step": 7460 }, { "epoch": 0.13853254270894055, "grad_norm": 0.4610622227191925, "learning_rate": 1.9067782939466642e-05, "loss": 0.38, "step": 7462 }, { "epoch": 0.1385696728463592, "grad_norm": 0.3794720768928528, "learning_rate": 1.9067291076228688e-05, "loss": 0.3199, "step": 7464 }, { "epoch": 0.13860680298377784, "grad_norm": 0.28741660714149475, "learning_rate": 1.9066799089612063e-05, "loss": 0.4323, "step": 7466 }, { "epoch": 0.1386439331211965, "grad_norm": 0.2328682839870453, "learning_rate": 1.906630697962346e-05, "loss": 0.2428, "step": 7468 }, { "epoch": 0.13868106325861512, "grad_norm": 0.3834840655326843, "learning_rate": 1.9065814746269577e-05, "loss": 0.3697, "step": 7470 }, { "epoch": 0.13871819339603375, "grad_norm": 0.46875759959220886, "learning_rate": 1.906532238955711e-05, "loss": 0.3185, "step": 7472 }, { "epoch": 0.1387553235334524, "grad_norm": 0.505338191986084, "learning_rate": 1.906482990949276e-05, "loss": 0.5381, "step": 7474 }, { "epoch": 0.13879245367087104, "grad_norm": 0.44858452677726746, "learning_rate": 1.9064337306083227e-05, "loss": 0.377, "step": 7476 }, { "epoch": 0.13882958380828966, "grad_norm": 0.2833554148674011, "learning_rate": 1.906384457933521e-05, "loss": 0.4701, "step": 7478 }, { "epoch": 0.13886671394570832, "grad_norm": 0.32994261384010315, "learning_rate": 1.9063351729255425e-05, "loss": 0.3555, "step": 7480 }, { "epoch": 0.13890384408312695, "grad_norm": 0.3297823965549469, "learning_rate": 1.9062858755850567e-05, "loss": 0.3037, "step": 7482 }, { "epoch": 0.13894097422054558, "grad_norm": 0.4257313013076782, "learning_rate": 1.9062365659127348e-05, "loss": 0.1939, "step": 7484 }, { "epoch": 0.13897810435796423, "grad_norm": 0.21694734692573547, "learning_rate": 1.9061872439092476e-05, "loss": 0.306, "step": 7486 }, { "epoch": 0.13901523449538286, "grad_norm": 0.43100449442863464, "learning_rate": 1.9061379095752666e-05, "loss": 0.5689, "step": 7488 }, { "epoch": 0.13905236463280152, "grad_norm": 0.2504644989967346, "learning_rate": 1.9060885629114624e-05, "loss": 0.3323, "step": 7490 }, { "epoch": 0.13908949477022015, "grad_norm": 0.24499230086803436, "learning_rate": 1.9060392039185075e-05, "loss": 0.3681, "step": 7492 }, { "epoch": 0.13912662490763877, "grad_norm": 0.22481974959373474, "learning_rate": 1.9059898325970724e-05, "loss": 0.2706, "step": 7494 }, { "epoch": 0.13916375504505743, "grad_norm": 0.3223819434642792, "learning_rate": 1.90594044894783e-05, "loss": 0.3544, "step": 7496 }, { "epoch": 0.13920088518247606, "grad_norm": 0.4236181676387787, "learning_rate": 1.9058910529714512e-05, "loss": 0.2414, "step": 7498 }, { "epoch": 0.1392380153198947, "grad_norm": 0.27202969789505005, "learning_rate": 1.9058416446686088e-05, "loss": 0.2601, "step": 7500 }, { "epoch": 0.13927514545731334, "grad_norm": 0.3573920428752899, "learning_rate": 1.9057922240399747e-05, "loss": 0.337, "step": 7502 }, { "epoch": 0.13931227559473197, "grad_norm": 0.39434346556663513, "learning_rate": 1.905742791086222e-05, "loss": 0.7151, "step": 7504 }, { "epoch": 0.13934940573215063, "grad_norm": 0.4281597435474396, "learning_rate": 1.9056933458080226e-05, "loss": 0.2846, "step": 7506 }, { "epoch": 0.13938653586956926, "grad_norm": 0.4525904655456543, "learning_rate": 1.90564388820605e-05, "loss": 0.2701, "step": 7508 }, { "epoch": 0.13942366600698788, "grad_norm": 0.35705578327178955, "learning_rate": 1.9055944182809763e-05, "loss": 0.4215, "step": 7510 }, { "epoch": 0.13946079614440654, "grad_norm": 0.20357581973075867, "learning_rate": 1.9055449360334756e-05, "loss": 0.4587, "step": 7512 }, { "epoch": 0.13949792628182517, "grad_norm": 0.38579848408699036, "learning_rate": 1.9054954414642205e-05, "loss": 0.4127, "step": 7514 }, { "epoch": 0.1395350564192438, "grad_norm": 0.34190812706947327, "learning_rate": 1.905445934573885e-05, "loss": 0.3555, "step": 7516 }, { "epoch": 0.13957218655666245, "grad_norm": 0.2952379286289215, "learning_rate": 1.905396415363142e-05, "loss": 0.2554, "step": 7518 }, { "epoch": 0.13960931669408108, "grad_norm": 0.37091872096061707, "learning_rate": 1.905346883832666e-05, "loss": 0.3732, "step": 7520 }, { "epoch": 0.1396464468314997, "grad_norm": 0.2575719654560089, "learning_rate": 1.9052973399831306e-05, "loss": 0.4769, "step": 7522 }, { "epoch": 0.13968357696891837, "grad_norm": 0.31771451234817505, "learning_rate": 1.9052477838152103e-05, "loss": 0.2886, "step": 7524 }, { "epoch": 0.139720707106337, "grad_norm": 0.6454352140426636, "learning_rate": 1.9051982153295793e-05, "loss": 0.3109, "step": 7526 }, { "epoch": 0.13975783724375565, "grad_norm": 0.2928260564804077, "learning_rate": 1.9051486345269115e-05, "loss": 0.1593, "step": 7528 }, { "epoch": 0.13979496738117428, "grad_norm": 0.37625187635421753, "learning_rate": 1.9050990414078826e-05, "loss": 0.2461, "step": 7530 }, { "epoch": 0.1398320975185929, "grad_norm": 0.5237709283828735, "learning_rate": 1.9050494359731667e-05, "loss": 0.4091, "step": 7532 }, { "epoch": 0.13986922765601156, "grad_norm": 0.23249033093452454, "learning_rate": 1.9049998182234385e-05, "loss": 0.2453, "step": 7534 }, { "epoch": 0.1399063577934302, "grad_norm": 0.3015519976615906, "learning_rate": 1.904950188159374e-05, "loss": 0.3016, "step": 7536 }, { "epoch": 0.13994348793084882, "grad_norm": 0.4378632605075836, "learning_rate": 1.9049005457816477e-05, "loss": 0.3656, "step": 7538 }, { "epoch": 0.13998061806826748, "grad_norm": 0.31358802318573, "learning_rate": 1.9048508910909356e-05, "loss": 0.3226, "step": 7540 }, { "epoch": 0.1400177482056861, "grad_norm": 0.34746092557907104, "learning_rate": 1.9048012240879132e-05, "loss": 0.3952, "step": 7542 }, { "epoch": 0.14005487834310476, "grad_norm": 0.3674270510673523, "learning_rate": 1.9047515447732564e-05, "loss": 0.4364, "step": 7544 }, { "epoch": 0.1400920084805234, "grad_norm": 0.49495619535446167, "learning_rate": 1.9047018531476415e-05, "loss": 0.3908, "step": 7546 }, { "epoch": 0.14012913861794202, "grad_norm": 0.31911754608154297, "learning_rate": 1.9046521492117437e-05, "loss": 0.4131, "step": 7548 }, { "epoch": 0.14016626875536067, "grad_norm": 0.5316864848136902, "learning_rate": 1.90460243296624e-05, "loss": 0.3893, "step": 7550 }, { "epoch": 0.1402033988927793, "grad_norm": 0.2884799540042877, "learning_rate": 1.904552704411807e-05, "loss": 0.2538, "step": 7552 }, { "epoch": 0.14024052903019793, "grad_norm": 0.4590071141719818, "learning_rate": 1.904502963549121e-05, "loss": 0.3412, "step": 7554 }, { "epoch": 0.1402776591676166, "grad_norm": 0.35984379053115845, "learning_rate": 1.9044532103788588e-05, "loss": 0.3171, "step": 7556 }, { "epoch": 0.14031478930503521, "grad_norm": 0.5221495628356934, "learning_rate": 1.9044034449016975e-05, "loss": 0.4594, "step": 7558 }, { "epoch": 0.14035191944245384, "grad_norm": 0.3746601343154907, "learning_rate": 1.904353667118315e-05, "loss": 0.1844, "step": 7560 }, { "epoch": 0.1403890495798725, "grad_norm": 0.3336004316806793, "learning_rate": 1.9043038770293874e-05, "loss": 0.5946, "step": 7562 }, { "epoch": 0.14042617971729113, "grad_norm": 0.4773891866207123, "learning_rate": 1.9042540746355928e-05, "loss": 0.4231, "step": 7564 }, { "epoch": 0.14046330985470978, "grad_norm": 0.3812766671180725, "learning_rate": 1.904204259937609e-05, "loss": 0.4146, "step": 7566 }, { "epoch": 0.1405004399921284, "grad_norm": 0.5302526950836182, "learning_rate": 1.9041544329361136e-05, "loss": 0.3401, "step": 7568 }, { "epoch": 0.14053757012954704, "grad_norm": 0.4042723774909973, "learning_rate": 1.9041045936317842e-05, "loss": 0.3921, "step": 7570 }, { "epoch": 0.1405747002669657, "grad_norm": 0.42343172430992126, "learning_rate": 1.9040547420252998e-05, "loss": 0.1814, "step": 7572 }, { "epoch": 0.14061183040438432, "grad_norm": 0.3352805972099304, "learning_rate": 1.9040048781173383e-05, "loss": 0.4003, "step": 7574 }, { "epoch": 0.14064896054180295, "grad_norm": 0.30000630021095276, "learning_rate": 1.9039550019085776e-05, "loss": 0.3686, "step": 7576 }, { "epoch": 0.1406860906792216, "grad_norm": 0.45026734471321106, "learning_rate": 1.9039051133996978e-05, "loss": 0.387, "step": 7578 }, { "epoch": 0.14072322081664024, "grad_norm": 0.6084704399108887, "learning_rate": 1.9038552125913768e-05, "loss": 0.4046, "step": 7580 }, { "epoch": 0.1407603509540589, "grad_norm": 0.3534768223762512, "learning_rate": 1.9038052994842933e-05, "loss": 0.2335, "step": 7582 }, { "epoch": 0.14079748109147752, "grad_norm": 0.3005528151988983, "learning_rate": 1.9037553740791272e-05, "loss": 0.4198, "step": 7584 }, { "epoch": 0.14083461122889615, "grad_norm": 0.2921445965766907, "learning_rate": 1.9037054363765572e-05, "loss": 0.3079, "step": 7586 }, { "epoch": 0.1408717413663148, "grad_norm": 0.41042840480804443, "learning_rate": 1.9036554863772637e-05, "loss": 0.4258, "step": 7588 }, { "epoch": 0.14090887150373343, "grad_norm": 0.22219650447368622, "learning_rate": 1.9036055240819252e-05, "loss": 0.2717, "step": 7590 }, { "epoch": 0.14094600164115206, "grad_norm": 0.27416983246803284, "learning_rate": 1.9035555494912225e-05, "loss": 0.3806, "step": 7592 }, { "epoch": 0.14098313177857072, "grad_norm": 0.2190663367509842, "learning_rate": 1.903505562605835e-05, "loss": 0.3256, "step": 7594 }, { "epoch": 0.14102026191598935, "grad_norm": 0.381106972694397, "learning_rate": 1.903455563426443e-05, "loss": 0.4424, "step": 7596 }, { "epoch": 0.14105739205340798, "grad_norm": 0.4182669520378113, "learning_rate": 1.9034055519537272e-05, "loss": 0.4043, "step": 7598 }, { "epoch": 0.14109452219082663, "grad_norm": 0.3130188286304474, "learning_rate": 1.903355528188368e-05, "loss": 0.4642, "step": 7600 }, { "epoch": 0.14113165232824526, "grad_norm": 0.33640605211257935, "learning_rate": 1.903305492131046e-05, "loss": 0.2603, "step": 7602 }, { "epoch": 0.14116878246566392, "grad_norm": 0.27698177099227905, "learning_rate": 1.9032554437824414e-05, "loss": 0.3162, "step": 7604 }, { "epoch": 0.14120591260308255, "grad_norm": 0.2489544302225113, "learning_rate": 1.903205383143236e-05, "loss": 0.3388, "step": 7606 }, { "epoch": 0.14124304274050117, "grad_norm": 0.3171347975730896, "learning_rate": 1.903155310214111e-05, "loss": 0.3501, "step": 7608 }, { "epoch": 0.14128017287791983, "grad_norm": 0.4473964273929596, "learning_rate": 1.903105224995747e-05, "loss": 0.2558, "step": 7610 }, { "epoch": 0.14131730301533846, "grad_norm": 0.37853822112083435, "learning_rate": 1.9030551274888266e-05, "loss": 0.376, "step": 7612 }, { "epoch": 0.1413544331527571, "grad_norm": 0.31770774722099304, "learning_rate": 1.9030050176940306e-05, "loss": 0.2787, "step": 7614 }, { "epoch": 0.14139156329017574, "grad_norm": 0.47328582406044006, "learning_rate": 1.902954895612041e-05, "loss": 0.3568, "step": 7616 }, { "epoch": 0.14142869342759437, "grad_norm": 0.3312424123287201, "learning_rate": 1.90290476124354e-05, "loss": 0.3017, "step": 7618 }, { "epoch": 0.14146582356501303, "grad_norm": 0.3369753360748291, "learning_rate": 1.90285461458921e-05, "loss": 0.3316, "step": 7620 }, { "epoch": 0.14150295370243166, "grad_norm": 0.4140273630619049, "learning_rate": 1.9028044556497324e-05, "loss": 0.2932, "step": 7622 }, { "epoch": 0.14154008383985028, "grad_norm": 0.37320852279663086, "learning_rate": 1.902754284425791e-05, "loss": 0.3398, "step": 7624 }, { "epoch": 0.14157721397726894, "grad_norm": 0.30271226167678833, "learning_rate": 1.9027041009180676e-05, "loss": 0.4219, "step": 7626 }, { "epoch": 0.14161434411468757, "grad_norm": 0.48476535081863403, "learning_rate": 1.9026539051272454e-05, "loss": 0.5501, "step": 7628 }, { "epoch": 0.1416514742521062, "grad_norm": 0.24782590568065643, "learning_rate": 1.902603697054007e-05, "loss": 0.2794, "step": 7630 }, { "epoch": 0.14168860438952485, "grad_norm": 0.2944287955760956, "learning_rate": 1.9025534766990362e-05, "loss": 0.3361, "step": 7632 }, { "epoch": 0.14172573452694348, "grad_norm": 0.38036850094795227, "learning_rate": 1.9025032440630164e-05, "loss": 0.3949, "step": 7634 }, { "epoch": 0.1417628646643621, "grad_norm": 0.5582762956619263, "learning_rate": 1.90245299914663e-05, "loss": 0.5108, "step": 7636 }, { "epoch": 0.14179999480178077, "grad_norm": 0.34962642192840576, "learning_rate": 1.902402741950562e-05, "loss": 0.2645, "step": 7638 }, { "epoch": 0.1418371249391994, "grad_norm": 0.3124140799045563, "learning_rate": 1.9023524724754953e-05, "loss": 0.1926, "step": 7640 }, { "epoch": 0.14187425507661805, "grad_norm": 0.3975899815559387, "learning_rate": 1.9023021907221145e-05, "loss": 0.2962, "step": 7642 }, { "epoch": 0.14191138521403668, "grad_norm": 0.34952908754348755, "learning_rate": 1.9022518966911036e-05, "loss": 0.4175, "step": 7644 }, { "epoch": 0.1419485153514553, "grad_norm": 0.30632108449935913, "learning_rate": 1.902201590383147e-05, "loss": 0.3112, "step": 7646 }, { "epoch": 0.14198564548887396, "grad_norm": 0.5652284622192383, "learning_rate": 1.902151271798929e-05, "loss": 0.5193, "step": 7648 }, { "epoch": 0.1420227756262926, "grad_norm": 0.27054890990257263, "learning_rate": 1.9021009409391346e-05, "loss": 0.2578, "step": 7650 }, { "epoch": 0.14205990576371122, "grad_norm": 0.2990904152393341, "learning_rate": 1.9020505978044484e-05, "loss": 0.5334, "step": 7652 }, { "epoch": 0.14209703590112988, "grad_norm": 0.3570253849029541, "learning_rate": 1.9020002423955555e-05, "loss": 0.3386, "step": 7654 }, { "epoch": 0.1421341660385485, "grad_norm": 0.37413713335990906, "learning_rate": 1.9019498747131412e-05, "loss": 0.3042, "step": 7656 }, { "epoch": 0.14217129617596716, "grad_norm": 0.3607179522514343, "learning_rate": 1.9018994947578905e-05, "loss": 0.3253, "step": 7658 }, { "epoch": 0.1422084263133858, "grad_norm": 0.4070660471916199, "learning_rate": 1.9018491025304895e-05, "loss": 0.3294, "step": 7660 }, { "epoch": 0.14224555645080442, "grad_norm": 0.32453879714012146, "learning_rate": 1.9017986980316236e-05, "loss": 0.1705, "step": 7662 }, { "epoch": 0.14228268658822307, "grad_norm": 0.3114584982395172, "learning_rate": 1.9017482812619784e-05, "loss": 0.6314, "step": 7664 }, { "epoch": 0.1423198167256417, "grad_norm": 0.4178317189216614, "learning_rate": 1.9016978522222403e-05, "loss": 0.3268, "step": 7666 }, { "epoch": 0.14235694686306033, "grad_norm": 0.2957448661327362, "learning_rate": 1.9016474109130955e-05, "loss": 0.3073, "step": 7668 }, { "epoch": 0.14239407700047899, "grad_norm": 0.33956751227378845, "learning_rate": 1.9015969573352296e-05, "loss": 0.3471, "step": 7670 }, { "epoch": 0.14243120713789761, "grad_norm": 0.3486991226673126, "learning_rate": 1.90154649148933e-05, "loss": 0.1757, "step": 7672 }, { "epoch": 0.14246833727531624, "grad_norm": 0.32016023993492126, "learning_rate": 1.901496013376083e-05, "loss": 0.3254, "step": 7674 }, { "epoch": 0.1425054674127349, "grad_norm": 0.5558412075042725, "learning_rate": 1.9014455229961757e-05, "loss": 0.4424, "step": 7676 }, { "epoch": 0.14254259755015353, "grad_norm": 0.35902220010757446, "learning_rate": 1.901395020350295e-05, "loss": 0.2228, "step": 7678 }, { "epoch": 0.14257972768757218, "grad_norm": 0.3260502517223358, "learning_rate": 1.901344505439128e-05, "loss": 0.4339, "step": 7680 }, { "epoch": 0.1426168578249908, "grad_norm": 0.3270975649356842, "learning_rate": 1.9012939782633624e-05, "loss": 0.4883, "step": 7682 }, { "epoch": 0.14265398796240944, "grad_norm": 0.33395111560821533, "learning_rate": 1.901243438823685e-05, "loss": 0.3784, "step": 7684 }, { "epoch": 0.1426911180998281, "grad_norm": 0.36998942494392395, "learning_rate": 1.901192887120784e-05, "loss": 0.2678, "step": 7686 }, { "epoch": 0.14272824823724672, "grad_norm": 0.3290286362171173, "learning_rate": 1.9011423231553473e-05, "loss": 0.4979, "step": 7688 }, { "epoch": 0.14276537837466535, "grad_norm": 0.2865745723247528, "learning_rate": 1.9010917469280628e-05, "loss": 0.3269, "step": 7690 }, { "epoch": 0.142802508512084, "grad_norm": 0.4057199954986572, "learning_rate": 1.901041158439619e-05, "loss": 0.297, "step": 7692 }, { "epoch": 0.14283963864950264, "grad_norm": 0.31791216135025024, "learning_rate": 1.9009905576907035e-05, "loss": 0.223, "step": 7694 }, { "epoch": 0.1428767687869213, "grad_norm": 0.3224818706512451, "learning_rate": 1.9009399446820056e-05, "loss": 0.4132, "step": 7696 }, { "epoch": 0.14291389892433992, "grad_norm": 0.3537513017654419, "learning_rate": 1.9008893194142135e-05, "loss": 0.4522, "step": 7698 }, { "epoch": 0.14295102906175855, "grad_norm": 0.7173958420753479, "learning_rate": 1.900838681888016e-05, "loss": 0.3052, "step": 7700 }, { "epoch": 0.1429881591991772, "grad_norm": 0.4466204345226288, "learning_rate": 1.900788032104103e-05, "loss": 0.3026, "step": 7702 }, { "epoch": 0.14302528933659583, "grad_norm": 0.2805536687374115, "learning_rate": 1.9007373700631627e-05, "loss": 0.3117, "step": 7704 }, { "epoch": 0.14306241947401446, "grad_norm": 0.436399906873703, "learning_rate": 1.9006866957658848e-05, "loss": 0.2908, "step": 7706 }, { "epoch": 0.14309954961143312, "grad_norm": 0.4815303683280945, "learning_rate": 1.900636009212959e-05, "loss": 0.3602, "step": 7708 }, { "epoch": 0.14313667974885175, "grad_norm": 0.3500632643699646, "learning_rate": 1.9005853104050745e-05, "loss": 0.2676, "step": 7710 }, { "epoch": 0.14317380988627038, "grad_norm": 0.4023997485637665, "learning_rate": 1.9005345993429218e-05, "loss": 0.3419, "step": 7712 }, { "epoch": 0.14321094002368903, "grad_norm": 0.46251416206359863, "learning_rate": 1.9004838760271903e-05, "loss": 0.324, "step": 7714 }, { "epoch": 0.14324807016110766, "grad_norm": 0.4099741578102112, "learning_rate": 1.900433140458571e-05, "loss": 0.1832, "step": 7716 }, { "epoch": 0.14328520029852632, "grad_norm": 0.3889768421649933, "learning_rate": 1.9003823926377534e-05, "loss": 0.3035, "step": 7718 }, { "epoch": 0.14332233043594494, "grad_norm": 0.33130908012390137, "learning_rate": 1.9003316325654287e-05, "loss": 0.2448, "step": 7720 }, { "epoch": 0.14335946057336357, "grad_norm": 0.3851718306541443, "learning_rate": 1.900280860242287e-05, "loss": 0.3418, "step": 7722 }, { "epoch": 0.14339659071078223, "grad_norm": 0.26829975843429565, "learning_rate": 1.9002300756690195e-05, "loss": 0.3561, "step": 7724 }, { "epoch": 0.14343372084820086, "grad_norm": 0.276117205619812, "learning_rate": 1.900179278846317e-05, "loss": 0.3257, "step": 7726 }, { "epoch": 0.14347085098561949, "grad_norm": 0.37600189447402954, "learning_rate": 1.9001284697748713e-05, "loss": 0.3427, "step": 7728 }, { "epoch": 0.14350798112303814, "grad_norm": 0.3589591979980469, "learning_rate": 1.900077648455373e-05, "loss": 0.2937, "step": 7730 }, { "epoch": 0.14354511126045677, "grad_norm": 0.26378333568573, "learning_rate": 1.900026814888514e-05, "loss": 0.2918, "step": 7732 }, { "epoch": 0.14358224139787543, "grad_norm": 0.3817266523838043, "learning_rate": 1.8999759690749858e-05, "loss": 0.3178, "step": 7734 }, { "epoch": 0.14361937153529405, "grad_norm": 0.34049347043037415, "learning_rate": 1.8999251110154807e-05, "loss": 0.1398, "step": 7736 }, { "epoch": 0.14365650167271268, "grad_norm": 0.34080126881599426, "learning_rate": 1.8998742407106904e-05, "loss": 0.2845, "step": 7738 }, { "epoch": 0.14369363181013134, "grad_norm": 0.8032690286636353, "learning_rate": 1.8998233581613067e-05, "loss": 0.2119, "step": 7740 }, { "epoch": 0.14373076194754997, "grad_norm": 0.44622287154197693, "learning_rate": 1.8997724633680227e-05, "loss": 0.29, "step": 7742 }, { "epoch": 0.1437678920849686, "grad_norm": 0.37246716022491455, "learning_rate": 1.8997215563315307e-05, "loss": 0.3218, "step": 7744 }, { "epoch": 0.14380502222238725, "grad_norm": 0.308791846036911, "learning_rate": 1.8996706370525232e-05, "loss": 0.3083, "step": 7746 }, { "epoch": 0.14384215235980588, "grad_norm": 0.4257645905017853, "learning_rate": 1.899619705531693e-05, "loss": 0.3996, "step": 7748 }, { "epoch": 0.1438792824972245, "grad_norm": 0.3255974352359772, "learning_rate": 1.8995687617697336e-05, "loss": 0.2728, "step": 7750 }, { "epoch": 0.14391641263464316, "grad_norm": 0.6185017824172974, "learning_rate": 1.899517805767338e-05, "loss": 0.2815, "step": 7752 }, { "epoch": 0.1439535427720618, "grad_norm": 0.550037145614624, "learning_rate": 1.8994668375251986e-05, "loss": 0.3242, "step": 7754 }, { "epoch": 0.14399067290948045, "grad_norm": 0.41134992241859436, "learning_rate": 1.8994158570440105e-05, "loss": 0.3645, "step": 7756 }, { "epoch": 0.14402780304689908, "grad_norm": 0.4989023506641388, "learning_rate": 1.8993648643244666e-05, "loss": 0.2693, "step": 7758 }, { "epoch": 0.1440649331843177, "grad_norm": 0.3724779784679413, "learning_rate": 1.8993138593672603e-05, "loss": 0.3641, "step": 7760 }, { "epoch": 0.14410206332173636, "grad_norm": 0.39235255122184753, "learning_rate": 1.899262842173087e-05, "loss": 0.253, "step": 7762 }, { "epoch": 0.144139193459155, "grad_norm": 0.4052632749080658, "learning_rate": 1.899211812742639e-05, "loss": 0.2497, "step": 7764 }, { "epoch": 0.14417632359657362, "grad_norm": 0.38518375158309937, "learning_rate": 1.899160771076612e-05, "loss": 0.4068, "step": 7766 }, { "epoch": 0.14421345373399228, "grad_norm": 0.3408479690551758, "learning_rate": 1.8991097171757008e-05, "loss": 0.3415, "step": 7768 }, { "epoch": 0.1442505838714109, "grad_norm": 0.34612441062927246, "learning_rate": 1.899058651040599e-05, "loss": 0.2504, "step": 7770 }, { "epoch": 0.14428771400882956, "grad_norm": 0.3262399137020111, "learning_rate": 1.899007572672002e-05, "loss": 0.2993, "step": 7772 }, { "epoch": 0.1443248441462482, "grad_norm": 0.29352858662605286, "learning_rate": 1.898956482070605e-05, "loss": 0.2231, "step": 7774 }, { "epoch": 0.14436197428366682, "grad_norm": 0.4054524600505829, "learning_rate": 1.8989053792371023e-05, "loss": 0.2445, "step": 7776 }, { "epoch": 0.14439910442108547, "grad_norm": 0.3525488078594208, "learning_rate": 1.8988542641721906e-05, "loss": 0.5101, "step": 7778 }, { "epoch": 0.1444362345585041, "grad_norm": 0.33733558654785156, "learning_rate": 1.8988031368765646e-05, "loss": 0.1283, "step": 7780 }, { "epoch": 0.14447336469592273, "grad_norm": 0.383801132440567, "learning_rate": 1.89875199735092e-05, "loss": 0.2063, "step": 7782 }, { "epoch": 0.14451049483334139, "grad_norm": 0.31123608350753784, "learning_rate": 1.8987008455959528e-05, "loss": 0.2958, "step": 7784 }, { "epoch": 0.14454762497076, "grad_norm": 0.3273412585258484, "learning_rate": 1.898649681612359e-05, "loss": 0.4372, "step": 7786 }, { "epoch": 0.14458475510817864, "grad_norm": 0.5262473225593567, "learning_rate": 1.898598505400835e-05, "loss": 0.1877, "step": 7788 }, { "epoch": 0.1446218852455973, "grad_norm": 0.5094627141952515, "learning_rate": 1.898547316962077e-05, "loss": 0.513, "step": 7790 }, { "epoch": 0.14465901538301593, "grad_norm": 0.46677839756011963, "learning_rate": 1.898496116296781e-05, "loss": 0.257, "step": 7792 }, { "epoch": 0.14469614552043458, "grad_norm": 0.3048141300678253, "learning_rate": 1.898444903405645e-05, "loss": 0.3845, "step": 7794 }, { "epoch": 0.1447332756578532, "grad_norm": 0.2790718972682953, "learning_rate": 1.898393678289364e-05, "loss": 0.4282, "step": 7796 }, { "epoch": 0.14477040579527184, "grad_norm": 0.38363999128341675, "learning_rate": 1.8983424409486366e-05, "loss": 0.3478, "step": 7798 }, { "epoch": 0.1448075359326905, "grad_norm": 0.32744741439819336, "learning_rate": 1.8982911913841594e-05, "loss": 0.3119, "step": 7800 }, { "epoch": 0.14484466607010912, "grad_norm": 0.2869434654712677, "learning_rate": 1.8982399295966295e-05, "loss": 0.2012, "step": 7802 }, { "epoch": 0.14488179620752775, "grad_norm": 0.46112629771232605, "learning_rate": 1.8981886555867452e-05, "loss": 0.2723, "step": 7804 }, { "epoch": 0.1449189263449464, "grad_norm": 0.36200079321861267, "learning_rate": 1.898137369355203e-05, "loss": 0.4567, "step": 7806 }, { "epoch": 0.14495605648236504, "grad_norm": 0.3622796833515167, "learning_rate": 1.898086070902702e-05, "loss": 0.2471, "step": 7808 }, { "epoch": 0.1449931866197837, "grad_norm": 0.45017170906066895, "learning_rate": 1.8980347602299396e-05, "loss": 0.4459, "step": 7810 }, { "epoch": 0.14503031675720232, "grad_norm": 0.34005579352378845, "learning_rate": 1.8979834373376142e-05, "loss": 0.2367, "step": 7812 }, { "epoch": 0.14506744689462095, "grad_norm": 0.3467685878276825, "learning_rate": 1.8979321022264234e-05, "loss": 0.5057, "step": 7814 }, { "epoch": 0.1451045770320396, "grad_norm": 0.24002090096473694, "learning_rate": 1.8978807548970667e-05, "loss": 0.5069, "step": 7816 }, { "epoch": 0.14514170716945823, "grad_norm": 0.29576870799064636, "learning_rate": 1.8978293953502422e-05, "loss": 0.3225, "step": 7818 }, { "epoch": 0.14517883730687686, "grad_norm": 0.26339441537857056, "learning_rate": 1.8977780235866494e-05, "loss": 0.1784, "step": 7820 }, { "epoch": 0.14521596744429552, "grad_norm": 0.4551510512828827, "learning_rate": 1.897726639606986e-05, "loss": 0.4412, "step": 7822 }, { "epoch": 0.14525309758171415, "grad_norm": 0.41436058282852173, "learning_rate": 1.897675243411953e-05, "loss": 0.4262, "step": 7824 }, { "epoch": 0.14529022771913278, "grad_norm": 0.4043031334877014, "learning_rate": 1.897623835002248e-05, "loss": 0.3014, "step": 7826 }, { "epoch": 0.14532735785655143, "grad_norm": 0.4691769480705261, "learning_rate": 1.8975724143785717e-05, "loss": 0.4732, "step": 7828 }, { "epoch": 0.14536448799397006, "grad_norm": 0.309151291847229, "learning_rate": 1.8975209815416235e-05, "loss": 0.3494, "step": 7830 }, { "epoch": 0.14540161813138872, "grad_norm": 0.3898021876811981, "learning_rate": 1.897469536492103e-05, "loss": 0.3605, "step": 7832 }, { "epoch": 0.14543874826880734, "grad_norm": 0.34558334946632385, "learning_rate": 1.89741807923071e-05, "loss": 0.5045, "step": 7834 }, { "epoch": 0.14547587840622597, "grad_norm": 0.3210196793079376, "learning_rate": 1.8973666097581456e-05, "loss": 0.3742, "step": 7836 }, { "epoch": 0.14551300854364463, "grad_norm": 0.35887858271598816, "learning_rate": 1.8973151280751092e-05, "loss": 0.276, "step": 7838 }, { "epoch": 0.14555013868106326, "grad_norm": 0.298946738243103, "learning_rate": 1.897263634182302e-05, "loss": 0.3235, "step": 7840 }, { "epoch": 0.14558726881848189, "grad_norm": 0.45819589495658875, "learning_rate": 1.8972121280804238e-05, "loss": 0.2479, "step": 7842 }, { "epoch": 0.14562439895590054, "grad_norm": 0.3384646475315094, "learning_rate": 1.8971606097701764e-05, "loss": 0.4018, "step": 7844 }, { "epoch": 0.14566152909331917, "grad_norm": 0.28389620780944824, "learning_rate": 1.8971090792522604e-05, "loss": 0.5037, "step": 7846 }, { "epoch": 0.14569865923073783, "grad_norm": 0.4108288288116455, "learning_rate": 1.897057536527377e-05, "loss": 0.1672, "step": 7848 }, { "epoch": 0.14573578936815645, "grad_norm": 0.33314409852027893, "learning_rate": 1.8970059815962272e-05, "loss": 0.2858, "step": 7850 }, { "epoch": 0.14577291950557508, "grad_norm": 0.4274868071079254, "learning_rate": 1.896954414459513e-05, "loss": 0.2463, "step": 7852 }, { "epoch": 0.14581004964299374, "grad_norm": 0.4678952991962433, "learning_rate": 1.8969028351179363e-05, "loss": 0.3779, "step": 7854 }, { "epoch": 0.14584717978041237, "grad_norm": 0.3344060480594635, "learning_rate": 1.896851243572198e-05, "loss": 0.387, "step": 7856 }, { "epoch": 0.145884309917831, "grad_norm": 0.37126004695892334, "learning_rate": 1.896799639823001e-05, "loss": 0.3157, "step": 7858 }, { "epoch": 0.14592144005524965, "grad_norm": 0.3887842297554016, "learning_rate": 1.896748023871047e-05, "loss": 0.5238, "step": 7860 }, { "epoch": 0.14595857019266828, "grad_norm": 0.3083222508430481, "learning_rate": 1.8966963957170383e-05, "loss": 0.3956, "step": 7862 }, { "epoch": 0.1459957003300869, "grad_norm": 0.3145461082458496, "learning_rate": 1.8966447553616777e-05, "loss": 0.3687, "step": 7864 }, { "epoch": 0.14603283046750556, "grad_norm": 0.5283592343330383, "learning_rate": 1.8965931028056678e-05, "loss": 0.4006, "step": 7866 }, { "epoch": 0.1460699606049242, "grad_norm": 0.3143438696861267, "learning_rate": 1.8965414380497115e-05, "loss": 0.1904, "step": 7868 }, { "epoch": 0.14610709074234285, "grad_norm": 0.41060465574264526, "learning_rate": 1.8964897610945116e-05, "loss": 0.3466, "step": 7870 }, { "epoch": 0.14614422087976148, "grad_norm": 0.38744065165519714, "learning_rate": 1.896438071940771e-05, "loss": 0.4229, "step": 7872 }, { "epoch": 0.1461813510171801, "grad_norm": 0.3486790955066681, "learning_rate": 1.8963863705891937e-05, "loss": 0.3158, "step": 7874 }, { "epoch": 0.14621848115459876, "grad_norm": 0.407792866230011, "learning_rate": 1.896334657040483e-05, "loss": 0.2977, "step": 7876 }, { "epoch": 0.1462556112920174, "grad_norm": 0.2871744632720947, "learning_rate": 1.8962829312953422e-05, "loss": 0.2173, "step": 7878 }, { "epoch": 0.14629274142943602, "grad_norm": 0.3657383918762207, "learning_rate": 1.8962311933544755e-05, "loss": 0.2121, "step": 7880 }, { "epoch": 0.14632987156685467, "grad_norm": 0.35087794065475464, "learning_rate": 1.8961794432185868e-05, "loss": 0.3991, "step": 7882 }, { "epoch": 0.1463670017042733, "grad_norm": 0.47817355394363403, "learning_rate": 1.8961276808883805e-05, "loss": 0.3414, "step": 7884 }, { "epoch": 0.14640413184169196, "grad_norm": 0.2610751986503601, "learning_rate": 1.8960759063645603e-05, "loss": 0.3385, "step": 7886 }, { "epoch": 0.1464412619791106, "grad_norm": 0.31454387307167053, "learning_rate": 1.8960241196478312e-05, "loss": 0.4764, "step": 7888 }, { "epoch": 0.14647839211652922, "grad_norm": 0.2645203471183777, "learning_rate": 1.8959723207388977e-05, "loss": 0.2859, "step": 7890 }, { "epoch": 0.14651552225394787, "grad_norm": 0.3764110803604126, "learning_rate": 1.8959205096384645e-05, "loss": 0.3828, "step": 7892 }, { "epoch": 0.1465526523913665, "grad_norm": 0.4874105751514435, "learning_rate": 1.895868686347237e-05, "loss": 0.3197, "step": 7894 }, { "epoch": 0.14658978252878513, "grad_norm": 0.49774765968322754, "learning_rate": 1.89581685086592e-05, "loss": 0.3244, "step": 7896 }, { "epoch": 0.14662691266620378, "grad_norm": 0.4108216464519501, "learning_rate": 1.895765003195219e-05, "loss": 0.2732, "step": 7898 }, { "epoch": 0.1466640428036224, "grad_norm": 0.353057861328125, "learning_rate": 1.8957131433358397e-05, "loss": 0.3649, "step": 7900 }, { "epoch": 0.14670117294104104, "grad_norm": 0.42483264207839966, "learning_rate": 1.895661271288487e-05, "loss": 0.2648, "step": 7902 }, { "epoch": 0.1467383030784597, "grad_norm": 0.569980800151825, "learning_rate": 1.895609387053867e-05, "loss": 0.1865, "step": 7904 }, { "epoch": 0.14677543321587833, "grad_norm": 0.2782089114189148, "learning_rate": 1.8955574906326867e-05, "loss": 0.1886, "step": 7906 }, { "epoch": 0.14681256335329698, "grad_norm": 0.49411752820014954, "learning_rate": 1.895505582025651e-05, "loss": 0.419, "step": 7908 }, { "epoch": 0.1468496934907156, "grad_norm": 0.5169767141342163, "learning_rate": 1.8954536612334668e-05, "loss": 0.3793, "step": 7910 }, { "epoch": 0.14688682362813424, "grad_norm": 0.2284662127494812, "learning_rate": 1.8954017282568404e-05, "loss": 0.4272, "step": 7912 }, { "epoch": 0.1469239537655529, "grad_norm": 0.3215291202068329, "learning_rate": 1.8953497830964786e-05, "loss": 0.2995, "step": 7914 }, { "epoch": 0.14696108390297152, "grad_norm": 0.40153321623802185, "learning_rate": 1.895297825753088e-05, "loss": 0.4652, "step": 7916 }, { "epoch": 0.14699821404039015, "grad_norm": 0.2182285189628601, "learning_rate": 1.895245856227376e-05, "loss": 0.2078, "step": 7918 }, { "epoch": 0.1470353441778088, "grad_norm": 0.5507566928863525, "learning_rate": 1.8951938745200493e-05, "loss": 0.2655, "step": 7920 }, { "epoch": 0.14707247431522744, "grad_norm": 0.3515758514404297, "learning_rate": 1.8951418806318153e-05, "loss": 0.3281, "step": 7922 }, { "epoch": 0.1471096044526461, "grad_norm": 0.26673653721809387, "learning_rate": 1.8950898745633813e-05, "loss": 0.2365, "step": 7924 }, { "epoch": 0.14714673459006472, "grad_norm": 0.327364057302475, "learning_rate": 1.8950378563154558e-05, "loss": 0.2789, "step": 7926 }, { "epoch": 0.14718386472748335, "grad_norm": 0.3492949903011322, "learning_rate": 1.8949858258887456e-05, "loss": 0.2403, "step": 7928 }, { "epoch": 0.147220994864902, "grad_norm": 0.5095848441123962, "learning_rate": 1.8949337832839592e-05, "loss": 0.3239, "step": 7930 }, { "epoch": 0.14725812500232063, "grad_norm": 0.35220614075660706, "learning_rate": 1.8948817285018046e-05, "loss": 0.407, "step": 7932 }, { "epoch": 0.14729525513973926, "grad_norm": 0.5153085589408875, "learning_rate": 1.8948296615429903e-05, "loss": 0.4116, "step": 7934 }, { "epoch": 0.14733238527715792, "grad_norm": 0.4465144872665405, "learning_rate": 1.8947775824082247e-05, "loss": 0.3227, "step": 7936 }, { "epoch": 0.14736951541457655, "grad_norm": 0.3701249361038208, "learning_rate": 1.894725491098216e-05, "loss": 0.4431, "step": 7938 }, { "epoch": 0.14740664555199517, "grad_norm": 0.3109250068664551, "learning_rate": 1.8946733876136738e-05, "loss": 0.2046, "step": 7940 }, { "epoch": 0.14744377568941383, "grad_norm": 0.3355062007904053, "learning_rate": 1.8946212719553067e-05, "loss": 0.2802, "step": 7942 }, { "epoch": 0.14748090582683246, "grad_norm": 0.29765698313713074, "learning_rate": 1.8945691441238235e-05, "loss": 0.5689, "step": 7944 }, { "epoch": 0.14751803596425112, "grad_norm": 0.5657996535301208, "learning_rate": 1.8945170041199338e-05, "loss": 0.4415, "step": 7946 }, { "epoch": 0.14755516610166974, "grad_norm": 0.48015791177749634, "learning_rate": 1.8944648519443473e-05, "loss": 0.3703, "step": 7948 }, { "epoch": 0.14759229623908837, "grad_norm": 0.36339807510375977, "learning_rate": 1.894412687597773e-05, "loss": 0.4124, "step": 7950 }, { "epoch": 0.14762942637650703, "grad_norm": 0.33174842596054077, "learning_rate": 1.8943605110809216e-05, "loss": 0.4065, "step": 7952 }, { "epoch": 0.14766655651392566, "grad_norm": 0.4281650185585022, "learning_rate": 1.894308322394502e-05, "loss": 0.4237, "step": 7954 }, { "epoch": 0.14770368665134428, "grad_norm": 0.2378285676240921, "learning_rate": 1.8942561215392253e-05, "loss": 0.3911, "step": 7956 }, { "epoch": 0.14774081678876294, "grad_norm": 0.427030473947525, "learning_rate": 1.8942039085158012e-05, "loss": 0.2956, "step": 7958 }, { "epoch": 0.14777794692618157, "grad_norm": 0.2923468053340912, "learning_rate": 1.8941516833249406e-05, "loss": 0.238, "step": 7960 }, { "epoch": 0.14781507706360023, "grad_norm": 0.3950183689594269, "learning_rate": 1.8940994459673536e-05, "loss": 0.2453, "step": 7962 }, { "epoch": 0.14785220720101885, "grad_norm": 0.541778028011322, "learning_rate": 1.8940471964437514e-05, "loss": 0.3373, "step": 7964 }, { "epoch": 0.14788933733843748, "grad_norm": 0.5443995594978333, "learning_rate": 1.893994934754845e-05, "loss": 0.2392, "step": 7966 }, { "epoch": 0.14792646747585614, "grad_norm": 0.24573831260204315, "learning_rate": 1.8939426609013448e-05, "loss": 0.2077, "step": 7968 }, { "epoch": 0.14796359761327477, "grad_norm": 0.25011658668518066, "learning_rate": 1.8938903748839634e-05, "loss": 0.4232, "step": 7970 }, { "epoch": 0.1480007277506934, "grad_norm": 0.35290372371673584, "learning_rate": 1.893838076703411e-05, "loss": 0.4335, "step": 7972 }, { "epoch": 0.14803785788811205, "grad_norm": 0.42270928621292114, "learning_rate": 1.8937857663604e-05, "loss": 0.2595, "step": 7974 }, { "epoch": 0.14807498802553068, "grad_norm": 0.3626469075679779, "learning_rate": 1.893733443855642e-05, "loss": 0.3947, "step": 7976 }, { "epoch": 0.1481121181629493, "grad_norm": 0.2349478304386139, "learning_rate": 1.8936811091898485e-05, "loss": 0.3459, "step": 7978 }, { "epoch": 0.14814924830036796, "grad_norm": 0.3554820716381073, "learning_rate": 1.8936287623637323e-05, "loss": 0.2615, "step": 7980 }, { "epoch": 0.1481863784377866, "grad_norm": 0.36123916506767273, "learning_rate": 1.893576403378005e-05, "loss": 0.1804, "step": 7982 }, { "epoch": 0.14822350857520525, "grad_norm": 0.32091060280799866, "learning_rate": 1.8935240322333798e-05, "loss": 0.4323, "step": 7984 }, { "epoch": 0.14826063871262388, "grad_norm": 0.26613056659698486, "learning_rate": 1.893471648930569e-05, "loss": 0.4224, "step": 7986 }, { "epoch": 0.1482977688500425, "grad_norm": 0.4161258637905121, "learning_rate": 1.893419253470285e-05, "loss": 0.1924, "step": 7988 }, { "epoch": 0.14833489898746116, "grad_norm": 0.32795459032058716, "learning_rate": 1.893366845853241e-05, "loss": 0.4959, "step": 7990 }, { "epoch": 0.1483720291248798, "grad_norm": 0.6610921025276184, "learning_rate": 1.8933144260801502e-05, "loss": 0.2338, "step": 7992 }, { "epoch": 0.14840915926229842, "grad_norm": 0.22595588862895966, "learning_rate": 1.8932619941517264e-05, "loss": 0.36, "step": 7994 }, { "epoch": 0.14844628939971707, "grad_norm": 0.4424133002758026, "learning_rate": 1.893209550068682e-05, "loss": 0.2464, "step": 7996 }, { "epoch": 0.1484834195371357, "grad_norm": 0.343784362077713, "learning_rate": 1.893157093831731e-05, "loss": 0.2872, "step": 7998 }, { "epoch": 0.14852054967455436, "grad_norm": 0.3268868923187256, "learning_rate": 1.8931046254415875e-05, "loss": 0.2923, "step": 8000 }, { "epoch": 0.148557679811973, "grad_norm": 0.3321167528629303, "learning_rate": 1.8930521448989653e-05, "loss": 0.3032, "step": 8002 }, { "epoch": 0.14859480994939162, "grad_norm": 0.24509146809577942, "learning_rate": 1.892999652204578e-05, "loss": 0.2059, "step": 8004 }, { "epoch": 0.14863194008681027, "grad_norm": 0.25860974192619324, "learning_rate": 1.8929471473591404e-05, "loss": 0.2338, "step": 8006 }, { "epoch": 0.1486690702242289, "grad_norm": 0.472266286611557, "learning_rate": 1.892894630363367e-05, "loss": 0.33, "step": 8008 }, { "epoch": 0.14870620036164753, "grad_norm": 0.22727616131305695, "learning_rate": 1.8928421012179725e-05, "loss": 0.1486, "step": 8010 }, { "epoch": 0.14874333049906618, "grad_norm": 0.3413729667663574, "learning_rate": 1.8927895599236706e-05, "loss": 0.2891, "step": 8012 }, { "epoch": 0.1487804606364848, "grad_norm": 0.3149247169494629, "learning_rate": 1.892737006481178e-05, "loss": 0.3343, "step": 8014 }, { "epoch": 0.14881759077390344, "grad_norm": 0.5944235920906067, "learning_rate": 1.892684440891208e-05, "loss": 0.3546, "step": 8016 }, { "epoch": 0.1488547209113221, "grad_norm": 0.313130259513855, "learning_rate": 1.8926318631544768e-05, "loss": 0.4429, "step": 8018 }, { "epoch": 0.14889185104874073, "grad_norm": 0.30029743909835815, "learning_rate": 1.8925792732717e-05, "loss": 0.2523, "step": 8020 }, { "epoch": 0.14892898118615938, "grad_norm": 0.41819971799850464, "learning_rate": 1.8925266712435926e-05, "loss": 0.2928, "step": 8022 }, { "epoch": 0.148966111323578, "grad_norm": 0.3444328308105469, "learning_rate": 1.8924740570708707e-05, "loss": 0.4183, "step": 8024 }, { "epoch": 0.14900324146099664, "grad_norm": 0.2936622202396393, "learning_rate": 1.89242143075425e-05, "loss": 0.5569, "step": 8026 }, { "epoch": 0.1490403715984153, "grad_norm": 0.3413342535495758, "learning_rate": 1.8923687922944468e-05, "loss": 0.3514, "step": 8028 }, { "epoch": 0.14907750173583392, "grad_norm": 0.30846622586250305, "learning_rate": 1.8923161416921775e-05, "loss": 0.2658, "step": 8030 }, { "epoch": 0.14911463187325255, "grad_norm": 0.3375438153743744, "learning_rate": 1.8922634789481582e-05, "loss": 0.2853, "step": 8032 }, { "epoch": 0.1491517620106712, "grad_norm": 0.42673662304878235, "learning_rate": 1.8922108040631054e-05, "loss": 0.4383, "step": 8034 }, { "epoch": 0.14918889214808984, "grad_norm": 0.43633684515953064, "learning_rate": 1.8921581170377364e-05, "loss": 0.3557, "step": 8036 }, { "epoch": 0.1492260222855085, "grad_norm": 0.5316792130470276, "learning_rate": 1.8921054178727677e-05, "loss": 0.3184, "step": 8038 }, { "epoch": 0.14926315242292712, "grad_norm": 0.4390234649181366, "learning_rate": 1.892052706568916e-05, "loss": 0.3402, "step": 8040 }, { "epoch": 0.14930028256034575, "grad_norm": 0.3417345881462097, "learning_rate": 1.8919999831268992e-05, "loss": 0.3539, "step": 8042 }, { "epoch": 0.1493374126977644, "grad_norm": 0.39099442958831787, "learning_rate": 1.8919472475474346e-05, "loss": 0.3704, "step": 8044 }, { "epoch": 0.14937454283518303, "grad_norm": 0.6233531832695007, "learning_rate": 1.8918944998312398e-05, "loss": 0.4361, "step": 8046 }, { "epoch": 0.14941167297260166, "grad_norm": 0.36886996030807495, "learning_rate": 1.891841739979032e-05, "loss": 0.3245, "step": 8048 }, { "epoch": 0.14944880311002032, "grad_norm": 0.28392186760902405, "learning_rate": 1.89178896799153e-05, "loss": 0.2779, "step": 8050 }, { "epoch": 0.14948593324743895, "grad_norm": 0.2642804682254791, "learning_rate": 1.8917361838694507e-05, "loss": 0.2492, "step": 8052 }, { "epoch": 0.14952306338485757, "grad_norm": 0.39111533761024475, "learning_rate": 1.8916833876135134e-05, "loss": 0.2636, "step": 8054 }, { "epoch": 0.14956019352227623, "grad_norm": 0.3611973524093628, "learning_rate": 1.8916305792244357e-05, "loss": 0.2987, "step": 8056 }, { "epoch": 0.14959732365969486, "grad_norm": 0.47690239548683167, "learning_rate": 1.891577758702937e-05, "loss": 0.3012, "step": 8058 }, { "epoch": 0.14963445379711351, "grad_norm": 0.2648635804653168, "learning_rate": 1.891524926049736e-05, "loss": 0.2043, "step": 8060 }, { "epoch": 0.14967158393453214, "grad_norm": 0.4561508297920227, "learning_rate": 1.8914720812655504e-05, "loss": 0.4975, "step": 8062 }, { "epoch": 0.14970871407195077, "grad_norm": 0.3444630801677704, "learning_rate": 1.8914192243511e-05, "loss": 0.2683, "step": 8064 }, { "epoch": 0.14974584420936943, "grad_norm": 0.24170279502868652, "learning_rate": 1.8913663553071044e-05, "loss": 0.2297, "step": 8066 }, { "epoch": 0.14978297434678806, "grad_norm": 0.3343278765678406, "learning_rate": 1.891313474134283e-05, "loss": 0.4293, "step": 8068 }, { "epoch": 0.14982010448420668, "grad_norm": 0.3095451593399048, "learning_rate": 1.8912605808333543e-05, "loss": 0.2463, "step": 8070 }, { "epoch": 0.14985723462162534, "grad_norm": 0.38149741291999817, "learning_rate": 1.8912076754050392e-05, "loss": 0.2944, "step": 8072 }, { "epoch": 0.14989436475904397, "grad_norm": 0.39498743414878845, "learning_rate": 1.8911547578500567e-05, "loss": 0.478, "step": 8074 }, { "epoch": 0.14993149489646262, "grad_norm": 0.31849828362464905, "learning_rate": 1.8911018281691278e-05, "loss": 0.3367, "step": 8076 }, { "epoch": 0.14996862503388125, "grad_norm": 0.3129948377609253, "learning_rate": 1.891048886362972e-05, "loss": 0.3156, "step": 8078 }, { "epoch": 0.15000575517129988, "grad_norm": 0.24439303576946259, "learning_rate": 1.8909959324323096e-05, "loss": 0.2103, "step": 8080 }, { "epoch": 0.15004288530871854, "grad_norm": 0.33535176515579224, "learning_rate": 1.8909429663778618e-05, "loss": 0.1637, "step": 8082 }, { "epoch": 0.15008001544613717, "grad_norm": 0.3443775177001953, "learning_rate": 1.8908899882003484e-05, "loss": 0.1485, "step": 8084 }, { "epoch": 0.1501171455835558, "grad_norm": 0.44087567925453186, "learning_rate": 1.890836997900491e-05, "loss": 0.3482, "step": 8086 }, { "epoch": 0.15015427572097445, "grad_norm": 0.37624138593673706, "learning_rate": 1.890783995479011e-05, "loss": 0.4073, "step": 8088 }, { "epoch": 0.15019140585839308, "grad_norm": 0.42156079411506653, "learning_rate": 1.8907309809366282e-05, "loss": 0.3459, "step": 8090 }, { "epoch": 0.1502285359958117, "grad_norm": 0.4333178997039795, "learning_rate": 1.890677954274065e-05, "loss": 0.3595, "step": 8092 }, { "epoch": 0.15026566613323036, "grad_norm": 0.3249876797199249, "learning_rate": 1.890624915492043e-05, "loss": 0.1614, "step": 8094 }, { "epoch": 0.150302796270649, "grad_norm": 0.33350321650505066, "learning_rate": 1.8905718645912835e-05, "loss": 0.2572, "step": 8096 }, { "epoch": 0.15033992640806765, "grad_norm": 0.42675504088401794, "learning_rate": 1.8905188015725086e-05, "loss": 0.4396, "step": 8098 }, { "epoch": 0.15037705654548628, "grad_norm": 0.3597617745399475, "learning_rate": 1.8904657264364398e-05, "loss": 0.4652, "step": 8100 }, { "epoch": 0.1504141866829049, "grad_norm": 0.34647828340530396, "learning_rate": 1.8904126391838002e-05, "loss": 0.3756, "step": 8102 }, { "epoch": 0.15045131682032356, "grad_norm": 0.39308691024780273, "learning_rate": 1.8903595398153115e-05, "loss": 0.419, "step": 8104 }, { "epoch": 0.1504884469577422, "grad_norm": 0.6328529715538025, "learning_rate": 1.8903064283316963e-05, "loss": 0.3136, "step": 8106 }, { "epoch": 0.15052557709516082, "grad_norm": 0.26503661274909973, "learning_rate": 1.8902533047336776e-05, "loss": 0.4365, "step": 8108 }, { "epoch": 0.15056270723257947, "grad_norm": 0.3519168198108673, "learning_rate": 1.890200169021978e-05, "loss": 0.4202, "step": 8110 }, { "epoch": 0.1505998373699981, "grad_norm": 0.6732722520828247, "learning_rate": 1.8901470211973203e-05, "loss": 0.4758, "step": 8112 }, { "epoch": 0.15063696750741676, "grad_norm": 0.32329288125038147, "learning_rate": 1.890093861260428e-05, "loss": 0.2723, "step": 8114 }, { "epoch": 0.1506740976448354, "grad_norm": 1.740921974182129, "learning_rate": 1.8900406892120246e-05, "loss": 0.3484, "step": 8116 }, { "epoch": 0.15071122778225401, "grad_norm": 0.35415127873420715, "learning_rate": 1.889987505052833e-05, "loss": 0.3531, "step": 8118 }, { "epoch": 0.15074835791967267, "grad_norm": 0.5177008509635925, "learning_rate": 1.8899343087835776e-05, "loss": 0.3243, "step": 8120 }, { "epoch": 0.1507854880570913, "grad_norm": 0.5174015760421753, "learning_rate": 1.8898811004049823e-05, "loss": 0.6162, "step": 8122 }, { "epoch": 0.15082261819450993, "grad_norm": 0.356586754322052, "learning_rate": 1.88982787991777e-05, "loss": 0.2525, "step": 8124 }, { "epoch": 0.15085974833192858, "grad_norm": 0.5085174441337585, "learning_rate": 1.889774647322666e-05, "loss": 0.4053, "step": 8126 }, { "epoch": 0.1508968784693472, "grad_norm": 0.2928535044193268, "learning_rate": 1.8897214026203944e-05, "loss": 0.3478, "step": 8128 }, { "epoch": 0.15093400860676584, "grad_norm": 0.3939286470413208, "learning_rate": 1.8896681458116793e-05, "loss": 0.2601, "step": 8130 }, { "epoch": 0.1509711387441845, "grad_norm": 0.3399118185043335, "learning_rate": 1.8896148768972456e-05, "loss": 0.4998, "step": 8132 }, { "epoch": 0.15100826888160312, "grad_norm": 0.288313627243042, "learning_rate": 1.889561595877818e-05, "loss": 0.2168, "step": 8134 }, { "epoch": 0.15104539901902178, "grad_norm": 0.354743093252182, "learning_rate": 1.8895083027541217e-05, "loss": 0.1563, "step": 8136 }, { "epoch": 0.1510825291564404, "grad_norm": 0.3141016662120819, "learning_rate": 1.889454997526882e-05, "loss": 0.5022, "step": 8138 }, { "epoch": 0.15111965929385904, "grad_norm": 0.33479002118110657, "learning_rate": 1.889401680196824e-05, "loss": 0.1907, "step": 8140 }, { "epoch": 0.1511567894312777, "grad_norm": 0.40596503019332886, "learning_rate": 1.8893483507646725e-05, "loss": 0.1372, "step": 8142 }, { "epoch": 0.15119391956869632, "grad_norm": 0.3666452467441559, "learning_rate": 1.8892950092311545e-05, "loss": 0.3383, "step": 8144 }, { "epoch": 0.15123104970611495, "grad_norm": 0.3360214829444885, "learning_rate": 1.8892416555969952e-05, "loss": 0.2423, "step": 8146 }, { "epoch": 0.1512681798435336, "grad_norm": 0.35804012417793274, "learning_rate": 1.8891882898629202e-05, "loss": 0.1988, "step": 8148 }, { "epoch": 0.15130530998095224, "grad_norm": 0.5613224506378174, "learning_rate": 1.8891349120296565e-05, "loss": 0.2715, "step": 8150 }, { "epoch": 0.1513424401183709, "grad_norm": 0.4466422498226166, "learning_rate": 1.8890815220979295e-05, "loss": 0.2171, "step": 8152 }, { "epoch": 0.15137957025578952, "grad_norm": 0.2291431874036789, "learning_rate": 1.8890281200684662e-05, "loss": 0.4429, "step": 8154 }, { "epoch": 0.15141670039320815, "grad_norm": 0.3232664167881012, "learning_rate": 1.8889747059419932e-05, "loss": 0.2818, "step": 8156 }, { "epoch": 0.1514538305306268, "grad_norm": 0.6342013478279114, "learning_rate": 1.8889212797192375e-05, "loss": 0.254, "step": 8158 }, { "epoch": 0.15149096066804543, "grad_norm": 0.36274418234825134, "learning_rate": 1.8888678414009255e-05, "loss": 0.3168, "step": 8160 }, { "epoch": 0.15152809080546406, "grad_norm": 0.3029654920101166, "learning_rate": 1.8888143909877846e-05, "loss": 0.1745, "step": 8162 }, { "epoch": 0.15156522094288272, "grad_norm": 0.3898391127586365, "learning_rate": 1.888760928480542e-05, "loss": 0.3324, "step": 8164 }, { "epoch": 0.15160235108030135, "grad_norm": 0.2669164836406708, "learning_rate": 1.8887074538799253e-05, "loss": 0.3229, "step": 8166 }, { "epoch": 0.15163948121771997, "grad_norm": 0.4950024485588074, "learning_rate": 1.8886539671866625e-05, "loss": 0.3934, "step": 8168 }, { "epoch": 0.15167661135513863, "grad_norm": 0.25776857137680054, "learning_rate": 1.8886004684014812e-05, "loss": 0.4377, "step": 8170 }, { "epoch": 0.15171374149255726, "grad_norm": 0.4564487338066101, "learning_rate": 1.888546957525109e-05, "loss": 0.3171, "step": 8172 }, { "epoch": 0.15175087162997591, "grad_norm": 0.31179043650627136, "learning_rate": 1.888493434558274e-05, "loss": 0.2832, "step": 8174 }, { "epoch": 0.15178800176739454, "grad_norm": 0.27408334612846375, "learning_rate": 1.8884398995017046e-05, "loss": 0.2731, "step": 8176 }, { "epoch": 0.15182513190481317, "grad_norm": 0.3307841420173645, "learning_rate": 1.8883863523561293e-05, "loss": 0.3481, "step": 8178 }, { "epoch": 0.15186226204223183, "grad_norm": 0.3532922565937042, "learning_rate": 1.8883327931222774e-05, "loss": 0.356, "step": 8180 }, { "epoch": 0.15189939217965046, "grad_norm": 0.27513787150382996, "learning_rate": 1.8882792218008764e-05, "loss": 0.3857, "step": 8182 }, { "epoch": 0.15193652231706908, "grad_norm": 0.44716960191726685, "learning_rate": 1.8882256383926564e-05, "loss": 0.2656, "step": 8184 }, { "epoch": 0.15197365245448774, "grad_norm": 0.33391496539115906, "learning_rate": 1.8881720428983455e-05, "loss": 0.2694, "step": 8186 }, { "epoch": 0.15201078259190637, "grad_norm": 0.4720102846622467, "learning_rate": 1.888118435318674e-05, "loss": 0.3618, "step": 8188 }, { "epoch": 0.15204791272932502, "grad_norm": 0.2996935546398163, "learning_rate": 1.8880648156543704e-05, "loss": 0.5037, "step": 8190 }, { "epoch": 0.15208504286674365, "grad_norm": 0.4064074754714966, "learning_rate": 1.888011183906165e-05, "loss": 0.3693, "step": 8192 }, { "epoch": 0.15212217300416228, "grad_norm": 0.3001393973827362, "learning_rate": 1.8879575400747872e-05, "loss": 0.5317, "step": 8194 }, { "epoch": 0.15215930314158094, "grad_norm": 0.4057982265949249, "learning_rate": 1.887903884160967e-05, "loss": 0.246, "step": 8196 }, { "epoch": 0.15219643327899957, "grad_norm": 0.2689710557460785, "learning_rate": 1.8878502161654345e-05, "loss": 0.2674, "step": 8198 }, { "epoch": 0.1522335634164182, "grad_norm": 0.34354719519615173, "learning_rate": 1.8877965360889197e-05, "loss": 0.2977, "step": 8200 }, { "epoch": 0.15227069355383685, "grad_norm": 0.4297869801521301, "learning_rate": 1.887742843932154e-05, "loss": 0.1582, "step": 8202 }, { "epoch": 0.15230782369125548, "grad_norm": 0.3412962555885315, "learning_rate": 1.8876891396958668e-05, "loss": 0.3881, "step": 8204 }, { "epoch": 0.1523449538286741, "grad_norm": 0.3971596360206604, "learning_rate": 1.8876354233807892e-05, "loss": 0.4237, "step": 8206 }, { "epoch": 0.15238208396609276, "grad_norm": 0.30143117904663086, "learning_rate": 1.8875816949876527e-05, "loss": 0.3968, "step": 8208 }, { "epoch": 0.1524192141035114, "grad_norm": 0.9673166871070862, "learning_rate": 1.8875279545171877e-05, "loss": 0.3169, "step": 8210 }, { "epoch": 0.15245634424093005, "grad_norm": 0.41814717650413513, "learning_rate": 1.8874742019701257e-05, "loss": 0.2595, "step": 8212 }, { "epoch": 0.15249347437834868, "grad_norm": 0.3168085515499115, "learning_rate": 1.8874204373471982e-05, "loss": 0.2242, "step": 8214 }, { "epoch": 0.1525306045157673, "grad_norm": 0.4080829322338104, "learning_rate": 1.8873666606491364e-05, "loss": 0.4444, "step": 8216 }, { "epoch": 0.15256773465318596, "grad_norm": 0.30472931265830994, "learning_rate": 1.8873128718766728e-05, "loss": 0.4663, "step": 8218 }, { "epoch": 0.1526048647906046, "grad_norm": 0.3709179759025574, "learning_rate": 1.8872590710305386e-05, "loss": 0.3474, "step": 8220 }, { "epoch": 0.15264199492802322, "grad_norm": 0.358796089887619, "learning_rate": 1.887205258111466e-05, "loss": 0.4593, "step": 8222 }, { "epoch": 0.15267912506544187, "grad_norm": 0.31223952770233154, "learning_rate": 1.8871514331201872e-05, "loss": 0.277, "step": 8224 }, { "epoch": 0.1527162552028605, "grad_norm": 0.3244241774082184, "learning_rate": 1.887097596057435e-05, "loss": 0.6544, "step": 8226 }, { "epoch": 0.15275338534027916, "grad_norm": 0.3357715904712677, "learning_rate": 1.8870437469239416e-05, "loss": 0.2969, "step": 8228 }, { "epoch": 0.15279051547769779, "grad_norm": 0.642245352268219, "learning_rate": 1.8869898857204395e-05, "loss": 0.312, "step": 8230 }, { "epoch": 0.15282764561511641, "grad_norm": 0.44083335995674133, "learning_rate": 1.8869360124476624e-05, "loss": 0.2675, "step": 8232 }, { "epoch": 0.15286477575253507, "grad_norm": 0.2946237623691559, "learning_rate": 1.8868821271063427e-05, "loss": 0.3552, "step": 8234 }, { "epoch": 0.1529019058899537, "grad_norm": 0.39548438787460327, "learning_rate": 1.8868282296972138e-05, "loss": 0.3445, "step": 8236 }, { "epoch": 0.15293903602737233, "grad_norm": 0.376122385263443, "learning_rate": 1.886774320221009e-05, "loss": 0.3051, "step": 8238 }, { "epoch": 0.15297616616479098, "grad_norm": 0.357554167509079, "learning_rate": 1.8867203986784618e-05, "loss": 0.3374, "step": 8240 }, { "epoch": 0.1530132963022096, "grad_norm": 0.4153965413570404, "learning_rate": 1.8866664650703058e-05, "loss": 0.1452, "step": 8242 }, { "epoch": 0.15305042643962824, "grad_norm": 0.327599436044693, "learning_rate": 1.8866125193972755e-05, "loss": 0.2772, "step": 8244 }, { "epoch": 0.1530875565770469, "grad_norm": 0.2960450053215027, "learning_rate": 1.8865585616601042e-05, "loss": 0.3275, "step": 8246 }, { "epoch": 0.15312468671446552, "grad_norm": 0.28462567925453186, "learning_rate": 1.8865045918595267e-05, "loss": 0.1539, "step": 8248 }, { "epoch": 0.15316181685188418, "grad_norm": 0.35535839200019836, "learning_rate": 1.886450609996277e-05, "loss": 0.3838, "step": 8250 }, { "epoch": 0.1531989469893028, "grad_norm": 0.3786665201187134, "learning_rate": 1.88639661607109e-05, "loss": 0.4171, "step": 8252 }, { "epoch": 0.15323607712672144, "grad_norm": 0.275909960269928, "learning_rate": 1.8863426100846998e-05, "loss": 0.3196, "step": 8254 }, { "epoch": 0.1532732072641401, "grad_norm": 0.32498323917388916, "learning_rate": 1.8862885920378415e-05, "loss": 0.3672, "step": 8256 }, { "epoch": 0.15331033740155872, "grad_norm": 0.275538831949234, "learning_rate": 1.8862345619312504e-05, "loss": 0.4554, "step": 8258 }, { "epoch": 0.15334746753897735, "grad_norm": 0.3281930088996887, "learning_rate": 1.8861805197656612e-05, "loss": 0.4325, "step": 8260 }, { "epoch": 0.153384597676396, "grad_norm": 0.2855262756347656, "learning_rate": 1.88612646554181e-05, "loss": 0.2941, "step": 8262 }, { "epoch": 0.15342172781381463, "grad_norm": 0.48509934544563293, "learning_rate": 1.8860723992604314e-05, "loss": 0.2736, "step": 8264 }, { "epoch": 0.1534588579512333, "grad_norm": 0.6197798848152161, "learning_rate": 1.8860183209222616e-05, "loss": 0.3455, "step": 8266 }, { "epoch": 0.15349598808865192, "grad_norm": 0.3569723069667816, "learning_rate": 1.8859642305280367e-05, "loss": 0.3243, "step": 8268 }, { "epoch": 0.15353311822607055, "grad_norm": 0.29191651940345764, "learning_rate": 1.8859101280784923e-05, "loss": 0.4646, "step": 8270 }, { "epoch": 0.1535702483634892, "grad_norm": 0.4912727475166321, "learning_rate": 1.8858560135743647e-05, "loss": 0.3712, "step": 8272 }, { "epoch": 0.15360737850090783, "grad_norm": 0.3839099407196045, "learning_rate": 1.88580188701639e-05, "loss": 0.2462, "step": 8274 }, { "epoch": 0.15364450863832646, "grad_norm": 0.2939319312572479, "learning_rate": 1.8857477484053052e-05, "loss": 0.437, "step": 8276 }, { "epoch": 0.15368163877574512, "grad_norm": 0.3470343053340912, "learning_rate": 1.885693597741846e-05, "loss": 0.4802, "step": 8278 }, { "epoch": 0.15371876891316374, "grad_norm": 0.3169183135032654, "learning_rate": 1.8856394350267505e-05, "loss": 0.2843, "step": 8280 }, { "epoch": 0.15375589905058237, "grad_norm": 0.3366777300834656, "learning_rate": 1.8855852602607552e-05, "loss": 0.3752, "step": 8282 }, { "epoch": 0.15379302918800103, "grad_norm": 0.28098663687705994, "learning_rate": 1.8855310734445968e-05, "loss": 0.2157, "step": 8284 }, { "epoch": 0.15383015932541966, "grad_norm": 0.47254812717437744, "learning_rate": 1.8854768745790132e-05, "loss": 0.4557, "step": 8286 }, { "epoch": 0.1538672894628383, "grad_norm": 0.5211777091026306, "learning_rate": 1.8854226636647413e-05, "loss": 0.4324, "step": 8288 }, { "epoch": 0.15390441960025694, "grad_norm": 0.7771388292312622, "learning_rate": 1.885368440702519e-05, "loss": 0.4232, "step": 8290 }, { "epoch": 0.15394154973767557, "grad_norm": 0.3036107122898102, "learning_rate": 1.885314205693085e-05, "loss": 0.5038, "step": 8292 }, { "epoch": 0.15397867987509423, "grad_norm": 0.32725098729133606, "learning_rate": 1.8852599586371758e-05, "loss": 0.3583, "step": 8294 }, { "epoch": 0.15401581001251285, "grad_norm": 0.41499611735343933, "learning_rate": 1.8852056995355306e-05, "loss": 0.4218, "step": 8296 }, { "epoch": 0.15405294014993148, "grad_norm": 0.37212952971458435, "learning_rate": 1.885151428388887e-05, "loss": 0.5429, "step": 8298 }, { "epoch": 0.15409007028735014, "grad_norm": 0.33431586623191833, "learning_rate": 1.885097145197984e-05, "loss": 0.3511, "step": 8300 }, { "epoch": 0.15412720042476877, "grad_norm": 0.24516381323337555, "learning_rate": 1.88504284996356e-05, "loss": 0.3869, "step": 8302 }, { "epoch": 0.15416433056218742, "grad_norm": 0.3198567032814026, "learning_rate": 1.8849885426863537e-05, "loss": 0.2599, "step": 8304 }, { "epoch": 0.15420146069960605, "grad_norm": 0.35716384649276733, "learning_rate": 1.8849342233671044e-05, "loss": 0.315, "step": 8306 }, { "epoch": 0.15423859083702468, "grad_norm": 0.38285890221595764, "learning_rate": 1.8848798920065506e-05, "loss": 0.1891, "step": 8308 }, { "epoch": 0.15427572097444334, "grad_norm": 0.3787875175476074, "learning_rate": 1.8848255486054326e-05, "loss": 0.4427, "step": 8310 }, { "epoch": 0.15431285111186197, "grad_norm": 0.45547017455101013, "learning_rate": 1.8847711931644887e-05, "loss": 0.4486, "step": 8312 }, { "epoch": 0.1543499812492806, "grad_norm": 0.3472912907600403, "learning_rate": 1.884716825684459e-05, "loss": 0.4075, "step": 8314 }, { "epoch": 0.15438711138669925, "grad_norm": 0.38502001762390137, "learning_rate": 1.884662446166084e-05, "loss": 0.2494, "step": 8316 }, { "epoch": 0.15442424152411788, "grad_norm": 0.23485706746578217, "learning_rate": 1.8846080546101024e-05, "loss": 0.2826, "step": 8318 }, { "epoch": 0.1544613716615365, "grad_norm": 0.3629828095436096, "learning_rate": 1.884553651017255e-05, "loss": 0.4393, "step": 8320 }, { "epoch": 0.15449850179895516, "grad_norm": 0.28902557492256165, "learning_rate": 1.884499235388282e-05, "loss": 0.4056, "step": 8322 }, { "epoch": 0.1545356319363738, "grad_norm": 0.33223941922187805, "learning_rate": 1.884444807723924e-05, "loss": 0.2088, "step": 8324 }, { "epoch": 0.15457276207379245, "grad_norm": 0.42679888010025024, "learning_rate": 1.884390368024921e-05, "loss": 0.1811, "step": 8326 }, { "epoch": 0.15460989221121108, "grad_norm": 0.30499541759490967, "learning_rate": 1.8843359162920144e-05, "loss": 0.4709, "step": 8328 }, { "epoch": 0.1546470223486297, "grad_norm": 0.30179184675216675, "learning_rate": 1.884281452525945e-05, "loss": 0.3519, "step": 8330 }, { "epoch": 0.15468415248604836, "grad_norm": 0.5158125162124634, "learning_rate": 1.8842269767274535e-05, "loss": 0.3181, "step": 8332 }, { "epoch": 0.154721282623467, "grad_norm": 0.31337445974349976, "learning_rate": 1.8841724888972813e-05, "loss": 0.343, "step": 8334 }, { "epoch": 0.15475841276088562, "grad_norm": 0.30121591687202454, "learning_rate": 1.8841179890361702e-05, "loss": 0.193, "step": 8336 }, { "epoch": 0.15479554289830427, "grad_norm": 0.38722893595695496, "learning_rate": 1.8840634771448613e-05, "loss": 0.4006, "step": 8338 }, { "epoch": 0.1548326730357229, "grad_norm": 0.4912799894809723, "learning_rate": 1.8840089532240968e-05, "loss": 0.2056, "step": 8340 }, { "epoch": 0.15486980317314156, "grad_norm": 0.29853376746177673, "learning_rate": 1.883954417274618e-05, "loss": 0.2034, "step": 8342 }, { "epoch": 0.15490693331056019, "grad_norm": 0.38545364141464233, "learning_rate": 1.8838998692971676e-05, "loss": 0.3032, "step": 8344 }, { "epoch": 0.1549440634479788, "grad_norm": 0.34697243571281433, "learning_rate": 1.883845309292488e-05, "loss": 0.2253, "step": 8346 }, { "epoch": 0.15498119358539747, "grad_norm": 0.2644882798194885, "learning_rate": 1.8837907372613206e-05, "loss": 0.493, "step": 8348 }, { "epoch": 0.1550183237228161, "grad_norm": 0.39763158559799194, "learning_rate": 1.8837361532044085e-05, "loss": 0.3732, "step": 8350 }, { "epoch": 0.15505545386023473, "grad_norm": 0.35202133655548096, "learning_rate": 1.8836815571224945e-05, "loss": 0.367, "step": 8352 }, { "epoch": 0.15509258399765338, "grad_norm": 0.3993526101112366, "learning_rate": 1.8836269490163218e-05, "loss": 0.6087, "step": 8354 }, { "epoch": 0.155129714135072, "grad_norm": 0.3769938349723816, "learning_rate": 1.883572328886633e-05, "loss": 0.4694, "step": 8356 }, { "epoch": 0.15516684427249064, "grad_norm": 0.31535157561302185, "learning_rate": 1.883517696734171e-05, "loss": 0.2156, "step": 8358 }, { "epoch": 0.1552039744099093, "grad_norm": 0.32255110144615173, "learning_rate": 1.8834630525596798e-05, "loss": 0.2437, "step": 8360 }, { "epoch": 0.15524110454732792, "grad_norm": 0.3419147729873657, "learning_rate": 1.8834083963639026e-05, "loss": 0.4095, "step": 8362 }, { "epoch": 0.15527823468474658, "grad_norm": 0.28262636065483093, "learning_rate": 1.8833537281475835e-05, "loss": 0.3426, "step": 8364 }, { "epoch": 0.1553153648221652, "grad_norm": 0.4587547481060028, "learning_rate": 1.8832990479114656e-05, "loss": 0.4772, "step": 8366 }, { "epoch": 0.15535249495958384, "grad_norm": 0.36408957839012146, "learning_rate": 1.883244355656294e-05, "loss": 0.3623, "step": 8368 }, { "epoch": 0.1553896250970025, "grad_norm": 0.4597477912902832, "learning_rate": 1.8831896513828123e-05, "loss": 0.3255, "step": 8370 }, { "epoch": 0.15542675523442112, "grad_norm": 0.25715145468711853, "learning_rate": 1.8831349350917647e-05, "loss": 0.451, "step": 8372 }, { "epoch": 0.15546388537183975, "grad_norm": 0.3800475001335144, "learning_rate": 1.8830802067838962e-05, "loss": 0.3383, "step": 8374 }, { "epoch": 0.1555010155092584, "grad_norm": 0.6488123536109924, "learning_rate": 1.883025466459951e-05, "loss": 0.3889, "step": 8376 }, { "epoch": 0.15553814564667703, "grad_norm": 0.30191728472709656, "learning_rate": 1.882970714120674e-05, "loss": 0.3073, "step": 8378 }, { "epoch": 0.1555752757840957, "grad_norm": 0.3120136260986328, "learning_rate": 1.8829159497668107e-05, "loss": 0.4175, "step": 8380 }, { "epoch": 0.15561240592151432, "grad_norm": 0.5108128190040588, "learning_rate": 1.882861173399106e-05, "loss": 0.2471, "step": 8382 }, { "epoch": 0.15564953605893295, "grad_norm": 0.28883299231529236, "learning_rate": 1.8828063850183048e-05, "loss": 0.212, "step": 8384 }, { "epoch": 0.1556866661963516, "grad_norm": 0.5487533211708069, "learning_rate": 1.8827515846251532e-05, "loss": 0.348, "step": 8386 }, { "epoch": 0.15572379633377023, "grad_norm": 0.4988667964935303, "learning_rate": 1.8826967722203968e-05, "loss": 0.1381, "step": 8388 }, { "epoch": 0.15576092647118886, "grad_norm": 0.2011573165655136, "learning_rate": 1.8826419478047813e-05, "loss": 0.2634, "step": 8390 }, { "epoch": 0.15579805660860752, "grad_norm": 0.41278955340385437, "learning_rate": 1.8825871113790528e-05, "loss": 0.2462, "step": 8392 }, { "epoch": 0.15583518674602614, "grad_norm": 0.4164373278617859, "learning_rate": 1.882532262943957e-05, "loss": 0.24, "step": 8394 }, { "epoch": 0.15587231688344477, "grad_norm": 0.543787956237793, "learning_rate": 1.882477402500241e-05, "loss": 0.4038, "step": 8396 }, { "epoch": 0.15590944702086343, "grad_norm": 0.2890948951244354, "learning_rate": 1.8824225300486504e-05, "loss": 0.3602, "step": 8398 }, { "epoch": 0.15594657715828206, "grad_norm": 0.4376918077468872, "learning_rate": 1.8823676455899328e-05, "loss": 0.384, "step": 8400 }, { "epoch": 0.1559837072957007, "grad_norm": 0.38471558690071106, "learning_rate": 1.8823127491248345e-05, "loss": 0.1924, "step": 8402 }, { "epoch": 0.15602083743311934, "grad_norm": 0.28494715690612793, "learning_rate": 1.882257840654102e-05, "loss": 0.2271, "step": 8404 }, { "epoch": 0.15605796757053797, "grad_norm": 0.3399980366230011, "learning_rate": 1.8822029201784837e-05, "loss": 0.2603, "step": 8406 }, { "epoch": 0.15609509770795663, "grad_norm": 0.3538065254688263, "learning_rate": 1.8821479876987256e-05, "loss": 0.2999, "step": 8408 }, { "epoch": 0.15613222784537525, "grad_norm": 0.4185173213481903, "learning_rate": 1.882093043215576e-05, "loss": 0.4075, "step": 8410 }, { "epoch": 0.15616935798279388, "grad_norm": 0.2962680757045746, "learning_rate": 1.8820380867297823e-05, "loss": 0.3695, "step": 8412 }, { "epoch": 0.15620648812021254, "grad_norm": 0.3709704875946045, "learning_rate": 1.881983118242092e-05, "loss": 0.4406, "step": 8414 }, { "epoch": 0.15624361825763117, "grad_norm": 0.357825368642807, "learning_rate": 1.8819281377532538e-05, "loss": 0.3227, "step": 8416 }, { "epoch": 0.15628074839504982, "grad_norm": 0.40552014112472534, "learning_rate": 1.8818731452640148e-05, "loss": 0.4656, "step": 8418 }, { "epoch": 0.15631787853246845, "grad_norm": 0.328622967004776, "learning_rate": 1.8818181407751236e-05, "loss": 0.2863, "step": 8420 }, { "epoch": 0.15635500866988708, "grad_norm": 0.44142547249794006, "learning_rate": 1.8817631242873293e-05, "loss": 0.388, "step": 8422 }, { "epoch": 0.15639213880730574, "grad_norm": 0.48155131936073303, "learning_rate": 1.88170809580138e-05, "loss": 0.2765, "step": 8424 }, { "epoch": 0.15642926894472436, "grad_norm": 0.3157564699649811, "learning_rate": 1.8816530553180244e-05, "loss": 0.2506, "step": 8426 }, { "epoch": 0.156466399082143, "grad_norm": 0.28496500849723816, "learning_rate": 1.881598002838012e-05, "loss": 0.2898, "step": 8428 }, { "epoch": 0.15650352921956165, "grad_norm": 0.31027498841285706, "learning_rate": 1.8815429383620912e-05, "loss": 0.2977, "step": 8430 }, { "epoch": 0.15654065935698028, "grad_norm": 0.2794686555862427, "learning_rate": 1.8814878618910117e-05, "loss": 0.4871, "step": 8432 }, { "epoch": 0.1565777894943989, "grad_norm": 0.29024404287338257, "learning_rate": 1.8814327734255224e-05, "loss": 0.3747, "step": 8434 }, { "epoch": 0.15661491963181756, "grad_norm": 0.21158207952976227, "learning_rate": 1.8813776729663736e-05, "loss": 0.2859, "step": 8436 }, { "epoch": 0.1566520497692362, "grad_norm": 0.2505621016025543, "learning_rate": 1.8813225605143143e-05, "loss": 0.452, "step": 8438 }, { "epoch": 0.15668917990665485, "grad_norm": 0.41810840368270874, "learning_rate": 1.881267436070095e-05, "loss": 0.5954, "step": 8440 }, { "epoch": 0.15672631004407347, "grad_norm": 0.4492923617362976, "learning_rate": 1.881212299634466e-05, "loss": 0.1924, "step": 8442 }, { "epoch": 0.1567634401814921, "grad_norm": 0.3148758113384247, "learning_rate": 1.8811571512081766e-05, "loss": 0.1384, "step": 8444 }, { "epoch": 0.15680057031891076, "grad_norm": 0.40080761909484863, "learning_rate": 1.8811019907919775e-05, "loss": 0.1698, "step": 8446 }, { "epoch": 0.1568377004563294, "grad_norm": 0.3242115080356598, "learning_rate": 1.8810468183866206e-05, "loss": 0.2909, "step": 8448 }, { "epoch": 0.15687483059374802, "grad_norm": 0.29555776715278625, "learning_rate": 1.8809916339928547e-05, "loss": 0.2006, "step": 8450 }, { "epoch": 0.15691196073116667, "grad_norm": 0.48480233550071716, "learning_rate": 1.8809364376114312e-05, "loss": 0.3939, "step": 8452 }, { "epoch": 0.1569490908685853, "grad_norm": 0.3605828583240509, "learning_rate": 1.8808812292431025e-05, "loss": 0.2524, "step": 8454 }, { "epoch": 0.15698622100600396, "grad_norm": 0.4065260589122772, "learning_rate": 1.8808260088886182e-05, "loss": 0.4643, "step": 8456 }, { "epoch": 0.15702335114342258, "grad_norm": 0.3973235487937927, "learning_rate": 1.8807707765487305e-05, "loss": 0.2947, "step": 8458 }, { "epoch": 0.1570604812808412, "grad_norm": 0.2911217212677002, "learning_rate": 1.8807155322241906e-05, "loss": 0.2751, "step": 8460 }, { "epoch": 0.15709761141825987, "grad_norm": 0.25099101662635803, "learning_rate": 1.8806602759157503e-05, "loss": 0.3877, "step": 8462 }, { "epoch": 0.1571347415556785, "grad_norm": 0.2811022996902466, "learning_rate": 1.8806050076241618e-05, "loss": 0.285, "step": 8464 }, { "epoch": 0.15717187169309713, "grad_norm": 0.43172702193260193, "learning_rate": 1.8805497273501768e-05, "loss": 0.2786, "step": 8466 }, { "epoch": 0.15720900183051578, "grad_norm": 0.3037837743759155, "learning_rate": 1.8804944350945473e-05, "loss": 0.2659, "step": 8468 }, { "epoch": 0.1572461319679344, "grad_norm": 0.43164506554603577, "learning_rate": 1.8804391308580263e-05, "loss": 0.3052, "step": 8470 }, { "epoch": 0.15728326210535304, "grad_norm": 0.42627617716789246, "learning_rate": 1.8803838146413658e-05, "loss": 0.2732, "step": 8472 }, { "epoch": 0.1573203922427717, "grad_norm": 0.3359403908252716, "learning_rate": 1.8803284864453186e-05, "loss": 0.3816, "step": 8474 }, { "epoch": 0.15735752238019032, "grad_norm": 0.34421515464782715, "learning_rate": 1.8802731462706374e-05, "loss": 0.4073, "step": 8476 }, { "epoch": 0.15739465251760898, "grad_norm": 0.3191753625869751, "learning_rate": 1.8802177941180756e-05, "loss": 0.1902, "step": 8478 }, { "epoch": 0.1574317826550276, "grad_norm": 0.2970689833164215, "learning_rate": 1.8801624299883862e-05, "loss": 0.3047, "step": 8480 }, { "epoch": 0.15746891279244624, "grad_norm": 0.41219812631607056, "learning_rate": 1.8801070538823224e-05, "loss": 0.3404, "step": 8482 }, { "epoch": 0.1575060429298649, "grad_norm": 0.4366006553173065, "learning_rate": 1.880051665800638e-05, "loss": 0.3478, "step": 8484 }, { "epoch": 0.15754317306728352, "grad_norm": 0.44011735916137695, "learning_rate": 1.8799962657440864e-05, "loss": 0.2409, "step": 8486 }, { "epoch": 0.15758030320470215, "grad_norm": 0.34648698568344116, "learning_rate": 1.8799408537134214e-05, "loss": 0.3225, "step": 8488 }, { "epoch": 0.1576174333421208, "grad_norm": 0.4016639292240143, "learning_rate": 1.8798854297093975e-05, "loss": 0.2632, "step": 8490 }, { "epoch": 0.15765456347953943, "grad_norm": 0.2704181969165802, "learning_rate": 1.879829993732768e-05, "loss": 0.2534, "step": 8492 }, { "epoch": 0.1576916936169581, "grad_norm": 0.354867160320282, "learning_rate": 1.8797745457842878e-05, "loss": 0.3467, "step": 8494 }, { "epoch": 0.15772882375437672, "grad_norm": 0.7613488435745239, "learning_rate": 1.8797190858647114e-05, "loss": 0.1332, "step": 8496 }, { "epoch": 0.15776595389179535, "grad_norm": 0.21838530898094177, "learning_rate": 1.879663613974793e-05, "loss": 0.2148, "step": 8498 }, { "epoch": 0.157803084029214, "grad_norm": 0.27511149644851685, "learning_rate": 1.879608130115288e-05, "loss": 0.3795, "step": 8500 }, { "epoch": 0.15784021416663263, "grad_norm": 0.5101073980331421, "learning_rate": 1.879552634286951e-05, "loss": 0.1914, "step": 8502 }, { "epoch": 0.15787734430405126, "grad_norm": 0.35078731179237366, "learning_rate": 1.8794971264905373e-05, "loss": 0.3513, "step": 8504 }, { "epoch": 0.15791447444146992, "grad_norm": 0.33056655526161194, "learning_rate": 1.879441606726802e-05, "loss": 0.4958, "step": 8506 }, { "epoch": 0.15795160457888854, "grad_norm": 0.482127845287323, "learning_rate": 1.8793860749965008e-05, "loss": 0.3168, "step": 8508 }, { "epoch": 0.15798873471630717, "grad_norm": 0.46538054943084717, "learning_rate": 1.879330531300389e-05, "loss": 0.6215, "step": 8510 }, { "epoch": 0.15802586485372583, "grad_norm": 0.36674946546554565, "learning_rate": 1.8792749756392227e-05, "loss": 0.2752, "step": 8512 }, { "epoch": 0.15806299499114446, "grad_norm": 0.3260144889354706, "learning_rate": 1.8792194080137577e-05, "loss": 0.2367, "step": 8514 }, { "epoch": 0.1581001251285631, "grad_norm": 0.4890153408050537, "learning_rate": 1.8791638284247498e-05, "loss": 0.3441, "step": 8516 }, { "epoch": 0.15813725526598174, "grad_norm": 0.4065719544887543, "learning_rate": 1.879108236872956e-05, "loss": 0.3227, "step": 8518 }, { "epoch": 0.15817438540340037, "grad_norm": 0.4680102467536926, "learning_rate": 1.8790526333591326e-05, "loss": 0.2918, "step": 8520 }, { "epoch": 0.15821151554081903, "grad_norm": 0.3054845631122589, "learning_rate": 1.8789970178840354e-05, "loss": 0.1613, "step": 8522 }, { "epoch": 0.15824864567823765, "grad_norm": 0.3788398504257202, "learning_rate": 1.878941390448422e-05, "loss": 0.3803, "step": 8524 }, { "epoch": 0.15828577581565628, "grad_norm": 0.3817039430141449, "learning_rate": 1.8788857510530486e-05, "loss": 0.367, "step": 8526 }, { "epoch": 0.15832290595307494, "grad_norm": 0.558929979801178, "learning_rate": 1.8788300996986728e-05, "loss": 0.3611, "step": 8528 }, { "epoch": 0.15836003609049357, "grad_norm": 0.24036526679992676, "learning_rate": 1.878774436386052e-05, "loss": 0.5088, "step": 8530 }, { "epoch": 0.15839716622791222, "grad_norm": 0.3546796143054962, "learning_rate": 1.8787187611159435e-05, "loss": 0.3854, "step": 8532 }, { "epoch": 0.15843429636533085, "grad_norm": 0.5018741488456726, "learning_rate": 1.8786630738891044e-05, "loss": 0.4239, "step": 8534 }, { "epoch": 0.15847142650274948, "grad_norm": 0.3488694429397583, "learning_rate": 1.8786073747062926e-05, "loss": 0.3024, "step": 8536 }, { "epoch": 0.15850855664016814, "grad_norm": 0.3052525520324707, "learning_rate": 1.8785516635682667e-05, "loss": 0.2005, "step": 8538 }, { "epoch": 0.15854568677758676, "grad_norm": 0.3745570778846741, "learning_rate": 1.8784959404757836e-05, "loss": 0.3843, "step": 8540 }, { "epoch": 0.1585828169150054, "grad_norm": 0.40408721566200256, "learning_rate": 1.8784402054296026e-05, "loss": 0.3659, "step": 8542 }, { "epoch": 0.15861994705242405, "grad_norm": 0.27932727336883545, "learning_rate": 1.8783844584304816e-05, "loss": 0.3886, "step": 8544 }, { "epoch": 0.15865707718984268, "grad_norm": 0.43143123388290405, "learning_rate": 1.878328699479179e-05, "loss": 0.3531, "step": 8546 }, { "epoch": 0.1586942073272613, "grad_norm": 0.32952263951301575, "learning_rate": 1.878272928576454e-05, "loss": 0.2384, "step": 8548 }, { "epoch": 0.15873133746467996, "grad_norm": 0.225346639752388, "learning_rate": 1.8782171457230647e-05, "loss": 0.2715, "step": 8550 }, { "epoch": 0.1587684676020986, "grad_norm": 0.3140149414539337, "learning_rate": 1.878161350919771e-05, "loss": 0.4038, "step": 8552 }, { "epoch": 0.15880559773951725, "grad_norm": 0.3829936981201172, "learning_rate": 1.8781055441673315e-05, "loss": 0.4015, "step": 8554 }, { "epoch": 0.15884272787693587, "grad_norm": 0.3526516854763031, "learning_rate": 1.878049725466506e-05, "loss": 0.4421, "step": 8556 }, { "epoch": 0.1588798580143545, "grad_norm": 0.5073131918907166, "learning_rate": 1.8779938948180536e-05, "loss": 0.464, "step": 8558 }, { "epoch": 0.15891698815177316, "grad_norm": 0.31822317838668823, "learning_rate": 1.8779380522227342e-05, "loss": 0.4428, "step": 8560 }, { "epoch": 0.1589541182891918, "grad_norm": 0.3263387382030487, "learning_rate": 1.877882197681308e-05, "loss": 0.2352, "step": 8562 }, { "epoch": 0.15899124842661042, "grad_norm": 0.3192360997200012, "learning_rate": 1.8778263311945343e-05, "loss": 0.385, "step": 8564 }, { "epoch": 0.15902837856402907, "grad_norm": 0.4619692265987396, "learning_rate": 1.8777704527631736e-05, "loss": 0.4057, "step": 8566 }, { "epoch": 0.1590655087014477, "grad_norm": 0.37241238355636597, "learning_rate": 1.877714562387986e-05, "loss": 0.374, "step": 8568 }, { "epoch": 0.15910263883886636, "grad_norm": 0.29747694730758667, "learning_rate": 1.8776586600697332e-05, "loss": 0.0751, "step": 8570 }, { "epoch": 0.15913976897628498, "grad_norm": 0.3293072283267975, "learning_rate": 1.877602745809174e-05, "loss": 0.463, "step": 8572 }, { "epoch": 0.1591768991137036, "grad_norm": 0.31524768471717834, "learning_rate": 1.8775468196070705e-05, "loss": 0.1662, "step": 8574 }, { "epoch": 0.15921402925112227, "grad_norm": 0.3459037244319916, "learning_rate": 1.8774908814641835e-05, "loss": 0.3044, "step": 8576 }, { "epoch": 0.1592511593885409, "grad_norm": 0.3256380259990692, "learning_rate": 1.8774349313812735e-05, "loss": 0.3346, "step": 8578 }, { "epoch": 0.15928828952595953, "grad_norm": 0.4057963192462921, "learning_rate": 1.8773789693591028e-05, "loss": 0.383, "step": 8580 }, { "epoch": 0.15932541966337818, "grad_norm": 0.4462374448776245, "learning_rate": 1.8773229953984322e-05, "loss": 0.3459, "step": 8582 }, { "epoch": 0.1593625498007968, "grad_norm": 0.3705790936946869, "learning_rate": 1.8772670095000232e-05, "loss": 0.2999, "step": 8584 }, { "epoch": 0.15939967993821544, "grad_norm": 0.6001185178756714, "learning_rate": 1.8772110116646388e-05, "loss": 0.3258, "step": 8586 }, { "epoch": 0.1594368100756341, "grad_norm": 0.32519131898880005, "learning_rate": 1.8771550018930392e-05, "loss": 0.4422, "step": 8588 }, { "epoch": 0.15947394021305272, "grad_norm": 0.3848697245121002, "learning_rate": 1.8770989801859877e-05, "loss": 0.316, "step": 8590 }, { "epoch": 0.15951107035047138, "grad_norm": 0.29297640919685364, "learning_rate": 1.8770429465442465e-05, "loss": 0.3697, "step": 8592 }, { "epoch": 0.15954820048789, "grad_norm": 0.2892846167087555, "learning_rate": 1.8769869009685778e-05, "loss": 0.3376, "step": 8594 }, { "epoch": 0.15958533062530864, "grad_norm": 0.44709864258766174, "learning_rate": 1.876930843459744e-05, "loss": 0.387, "step": 8596 }, { "epoch": 0.1596224607627273, "grad_norm": 0.38853031396865845, "learning_rate": 1.8768747740185085e-05, "loss": 0.698, "step": 8598 }, { "epoch": 0.15965959090014592, "grad_norm": 0.4344070553779602, "learning_rate": 1.8768186926456336e-05, "loss": 0.4328, "step": 8600 }, { "epoch": 0.15969672103756455, "grad_norm": 0.4778657555580139, "learning_rate": 1.876762599341883e-05, "loss": 0.3448, "step": 8602 }, { "epoch": 0.1597338511749832, "grad_norm": 0.31848129630088806, "learning_rate": 1.876706494108019e-05, "loss": 0.4954, "step": 8604 }, { "epoch": 0.15977098131240183, "grad_norm": 0.3195416331291199, "learning_rate": 1.876650376944806e-05, "loss": 0.2679, "step": 8606 }, { "epoch": 0.1598081114498205, "grad_norm": 0.44452306628227234, "learning_rate": 1.8765942478530072e-05, "loss": 0.3155, "step": 8608 }, { "epoch": 0.15984524158723912, "grad_norm": 0.35568225383758545, "learning_rate": 1.8765381068333864e-05, "loss": 0.2622, "step": 8610 }, { "epoch": 0.15988237172465775, "grad_norm": 0.39536669850349426, "learning_rate": 1.8764819538867076e-05, "loss": 0.4116, "step": 8612 }, { "epoch": 0.1599195018620764, "grad_norm": 0.4562740921974182, "learning_rate": 1.8764257890137348e-05, "loss": 0.3428, "step": 8614 }, { "epoch": 0.15995663199949503, "grad_norm": 0.2941872477531433, "learning_rate": 1.876369612215232e-05, "loss": 0.3196, "step": 8616 }, { "epoch": 0.15999376213691366, "grad_norm": 0.35525792837142944, "learning_rate": 1.876313423491964e-05, "loss": 0.2997, "step": 8618 }, { "epoch": 0.16003089227433231, "grad_norm": 0.3040013015270233, "learning_rate": 1.876257222844695e-05, "loss": 0.3572, "step": 8620 }, { "epoch": 0.16006802241175094, "grad_norm": 0.3851323425769806, "learning_rate": 1.87620101027419e-05, "loss": 0.4544, "step": 8622 }, { "epoch": 0.16010515254916957, "grad_norm": 0.31959977746009827, "learning_rate": 1.8761447857812138e-05, "loss": 0.5239, "step": 8624 }, { "epoch": 0.16014228268658823, "grad_norm": 0.3255581259727478, "learning_rate": 1.8760885493665314e-05, "loss": 0.3746, "step": 8626 }, { "epoch": 0.16017941282400686, "grad_norm": 0.345718115568161, "learning_rate": 1.876032301030908e-05, "loss": 0.2424, "step": 8628 }, { "epoch": 0.1602165429614255, "grad_norm": 0.4276328682899475, "learning_rate": 1.875976040775109e-05, "loss": 0.2984, "step": 8630 }, { "epoch": 0.16025367309884414, "grad_norm": 0.4385695457458496, "learning_rate": 1.8759197685999e-05, "loss": 0.1855, "step": 8632 }, { "epoch": 0.16029080323626277, "grad_norm": 0.3090505599975586, "learning_rate": 1.8758634845060465e-05, "loss": 0.3593, "step": 8634 }, { "epoch": 0.16032793337368142, "grad_norm": 0.7547240853309631, "learning_rate": 1.8758071884943147e-05, "loss": 0.2509, "step": 8636 }, { "epoch": 0.16036506351110005, "grad_norm": 0.4626752734184265, "learning_rate": 1.87575088056547e-05, "loss": 0.4202, "step": 8638 }, { "epoch": 0.16040219364851868, "grad_norm": 0.4105139672756195, "learning_rate": 1.8756945607202795e-05, "loss": 0.2204, "step": 8640 }, { "epoch": 0.16043932378593734, "grad_norm": 0.3552491366863251, "learning_rate": 1.875638228959509e-05, "loss": 0.4521, "step": 8642 }, { "epoch": 0.16047645392335597, "grad_norm": 0.35555294156074524, "learning_rate": 1.8755818852839246e-05, "loss": 0.2935, "step": 8644 }, { "epoch": 0.16051358406077462, "grad_norm": 0.40929824113845825, "learning_rate": 1.8755255296942934e-05, "loss": 0.492, "step": 8646 }, { "epoch": 0.16055071419819325, "grad_norm": 0.33090144395828247, "learning_rate": 1.8754691621913827e-05, "loss": 0.2716, "step": 8648 }, { "epoch": 0.16058784433561188, "grad_norm": 0.5216090083122253, "learning_rate": 1.875412782775959e-05, "loss": 0.3294, "step": 8650 }, { "epoch": 0.16062497447303054, "grad_norm": 0.3737295866012573, "learning_rate": 1.8753563914487895e-05, "loss": 0.335, "step": 8652 }, { "epoch": 0.16066210461044916, "grad_norm": 0.3304767310619354, "learning_rate": 1.8752999882106412e-05, "loss": 0.359, "step": 8654 }, { "epoch": 0.1606992347478678, "grad_norm": 0.3724282681941986, "learning_rate": 1.875243573062282e-05, "loss": 0.3107, "step": 8656 }, { "epoch": 0.16073636488528645, "grad_norm": 0.2931241989135742, "learning_rate": 1.8751871460044796e-05, "loss": 0.5784, "step": 8658 }, { "epoch": 0.16077349502270508, "grad_norm": 0.4210611879825592, "learning_rate": 1.8751307070380015e-05, "loss": 0.5042, "step": 8660 }, { "epoch": 0.1608106251601237, "grad_norm": 0.45726799964904785, "learning_rate": 1.8750742561636158e-05, "loss": 0.3166, "step": 8662 }, { "epoch": 0.16084775529754236, "grad_norm": 0.42124277353286743, "learning_rate": 1.8750177933820904e-05, "loss": 0.2216, "step": 8664 }, { "epoch": 0.160884885434961, "grad_norm": 0.4111817181110382, "learning_rate": 1.8749613186941943e-05, "loss": 0.3943, "step": 8666 }, { "epoch": 0.16092201557237965, "grad_norm": 0.406029611825943, "learning_rate": 1.874904832100695e-05, "loss": 0.3875, "step": 8668 }, { "epoch": 0.16095914570979827, "grad_norm": 0.36605018377304077, "learning_rate": 1.874848333602362e-05, "loss": 0.2387, "step": 8670 }, { "epoch": 0.1609962758472169, "grad_norm": 0.3154708743095398, "learning_rate": 1.8747918231999634e-05, "loss": 0.271, "step": 8672 }, { "epoch": 0.16103340598463556, "grad_norm": 0.32795828580856323, "learning_rate": 1.8747353008942683e-05, "loss": 0.2265, "step": 8674 }, { "epoch": 0.1610705361220542, "grad_norm": 0.46512681245803833, "learning_rate": 1.874678766686046e-05, "loss": 0.2286, "step": 8676 }, { "epoch": 0.16110766625947281, "grad_norm": 0.35453662276268005, "learning_rate": 1.8746222205760654e-05, "loss": 0.259, "step": 8678 }, { "epoch": 0.16114479639689147, "grad_norm": 0.3772389888763428, "learning_rate": 1.8745656625650968e-05, "loss": 0.2606, "step": 8680 }, { "epoch": 0.1611819265343101, "grad_norm": 0.39723682403564453, "learning_rate": 1.874509092653909e-05, "loss": 0.3513, "step": 8682 }, { "epoch": 0.16121905667172876, "grad_norm": 0.5562801957130432, "learning_rate": 1.8744525108432718e-05, "loss": 0.2881, "step": 8684 }, { "epoch": 0.16125618680914738, "grad_norm": 0.24145297706127167, "learning_rate": 1.874395917133955e-05, "loss": 0.4559, "step": 8686 }, { "epoch": 0.161293316946566, "grad_norm": 0.34071019291877747, "learning_rate": 1.8743393115267288e-05, "loss": 0.244, "step": 8688 }, { "epoch": 0.16133044708398467, "grad_norm": 0.34396877884864807, "learning_rate": 1.8742826940223638e-05, "loss": 0.3514, "step": 8690 }, { "epoch": 0.1613675772214033, "grad_norm": 0.4700627624988556, "learning_rate": 1.87422606462163e-05, "loss": 0.454, "step": 8692 }, { "epoch": 0.16140470735882192, "grad_norm": 0.27930131554603577, "learning_rate": 1.8741694233252983e-05, "loss": 0.2178, "step": 8694 }, { "epoch": 0.16144183749624058, "grad_norm": 0.39671099185943604, "learning_rate": 1.874112770134139e-05, "loss": 0.3771, "step": 8696 }, { "epoch": 0.1614789676336592, "grad_norm": 0.515316903591156, "learning_rate": 1.874056105048923e-05, "loss": 0.4473, "step": 8698 }, { "epoch": 0.16151609777107784, "grad_norm": 0.25630247592926025, "learning_rate": 1.8739994280704215e-05, "loss": 0.2924, "step": 8700 }, { "epoch": 0.1615532279084965, "grad_norm": 0.32032832503318787, "learning_rate": 1.873942739199406e-05, "loss": 0.4812, "step": 8702 }, { "epoch": 0.16159035804591512, "grad_norm": 0.34187644720077515, "learning_rate": 1.8738860384366474e-05, "loss": 0.2431, "step": 8704 }, { "epoch": 0.16162748818333378, "grad_norm": 0.3880484998226166, "learning_rate": 1.8738293257829176e-05, "loss": 0.3024, "step": 8706 }, { "epoch": 0.1616646183207524, "grad_norm": 0.3767837882041931, "learning_rate": 1.873772601238988e-05, "loss": 0.3059, "step": 8708 }, { "epoch": 0.16170174845817104, "grad_norm": 0.3146280348300934, "learning_rate": 1.8737158648056305e-05, "loss": 0.3483, "step": 8710 }, { "epoch": 0.1617388785955897, "grad_norm": 0.44261911511421204, "learning_rate": 1.8736591164836174e-05, "loss": 0.2981, "step": 8712 }, { "epoch": 0.16177600873300832, "grad_norm": 0.32656797766685486, "learning_rate": 1.8736023562737205e-05, "loss": 0.3056, "step": 8714 }, { "epoch": 0.16181313887042695, "grad_norm": 0.3982129991054535, "learning_rate": 1.873545584176712e-05, "loss": 0.3988, "step": 8716 }, { "epoch": 0.1618502690078456, "grad_norm": 0.3760051429271698, "learning_rate": 1.873488800193365e-05, "loss": 0.3991, "step": 8718 }, { "epoch": 0.16188739914526423, "grad_norm": 0.27282649278640747, "learning_rate": 1.8734320043244518e-05, "loss": 0.3566, "step": 8720 }, { "epoch": 0.1619245292826829, "grad_norm": 0.4922017455101013, "learning_rate": 1.873375196570745e-05, "loss": 0.5588, "step": 8722 }, { "epoch": 0.16196165942010152, "grad_norm": 0.3886096179485321, "learning_rate": 1.8733183769330182e-05, "loss": 0.5044, "step": 8724 }, { "epoch": 0.16199878955752015, "grad_norm": 0.38123759627342224, "learning_rate": 1.873261545412044e-05, "loss": 0.5065, "step": 8726 }, { "epoch": 0.1620359196949388, "grad_norm": 0.39078447222709656, "learning_rate": 1.8732047020085958e-05, "loss": 0.3562, "step": 8728 }, { "epoch": 0.16207304983235743, "grad_norm": 0.22509169578552246, "learning_rate": 1.873147846723447e-05, "loss": 0.3349, "step": 8730 }, { "epoch": 0.16211017996977606, "grad_norm": 0.48230093717575073, "learning_rate": 1.873090979557372e-05, "loss": 0.2265, "step": 8732 }, { "epoch": 0.16214731010719471, "grad_norm": 0.3128286898136139, "learning_rate": 1.8730341005111432e-05, "loss": 0.458, "step": 8734 }, { "epoch": 0.16218444024461334, "grad_norm": 0.28908175230026245, "learning_rate": 1.872977209585536e-05, "loss": 0.456, "step": 8736 }, { "epoch": 0.16222157038203197, "grad_norm": 0.3584655821323395, "learning_rate": 1.8729203067813233e-05, "loss": 0.2529, "step": 8738 }, { "epoch": 0.16225870051945063, "grad_norm": 0.29987919330596924, "learning_rate": 1.8728633920992804e-05, "loss": 0.4907, "step": 8740 }, { "epoch": 0.16229583065686926, "grad_norm": 0.5201492309570312, "learning_rate": 1.872806465540181e-05, "loss": 0.5543, "step": 8742 }, { "epoch": 0.1623329607942879, "grad_norm": 0.44429242610931396, "learning_rate": 1.8727495271048e-05, "loss": 0.3141, "step": 8744 }, { "epoch": 0.16237009093170654, "grad_norm": 0.32017919421195984, "learning_rate": 1.8726925767939122e-05, "loss": 0.4184, "step": 8746 }, { "epoch": 0.16240722106912517, "grad_norm": 0.38378721475601196, "learning_rate": 1.872635614608292e-05, "loss": 0.3663, "step": 8748 }, { "epoch": 0.16244435120654382, "grad_norm": 0.49947452545166016, "learning_rate": 1.872578640548716e-05, "loss": 0.3633, "step": 8750 }, { "epoch": 0.16248148134396245, "grad_norm": 0.4004424512386322, "learning_rate": 1.8725216546159575e-05, "loss": 0.2664, "step": 8752 }, { "epoch": 0.16251861148138108, "grad_norm": 0.3957572877407074, "learning_rate": 1.8724646568107928e-05, "loss": 0.2754, "step": 8754 }, { "epoch": 0.16255574161879974, "grad_norm": 0.2752881646156311, "learning_rate": 1.8724076471339978e-05, "loss": 0.3604, "step": 8756 }, { "epoch": 0.16259287175621837, "grad_norm": 0.31430885195732117, "learning_rate": 1.8723506255863477e-05, "loss": 0.1757, "step": 8758 }, { "epoch": 0.16263000189363702, "grad_norm": 0.2804718613624573, "learning_rate": 1.8722935921686188e-05, "loss": 0.32, "step": 8760 }, { "epoch": 0.16266713203105565, "grad_norm": 0.28155380487442017, "learning_rate": 1.8722365468815863e-05, "loss": 0.2424, "step": 8762 }, { "epoch": 0.16270426216847428, "grad_norm": 0.30976906418800354, "learning_rate": 1.8721794897260278e-05, "loss": 0.3799, "step": 8764 }, { "epoch": 0.16274139230589293, "grad_norm": 0.5113062262535095, "learning_rate": 1.8721224207027187e-05, "loss": 0.4051, "step": 8766 }, { "epoch": 0.16277852244331156, "grad_norm": 0.3286890685558319, "learning_rate": 1.8720653398124357e-05, "loss": 0.4777, "step": 8768 }, { "epoch": 0.1628156525807302, "grad_norm": 0.3769436776638031, "learning_rate": 1.8720082470559553e-05, "loss": 0.3091, "step": 8770 }, { "epoch": 0.16285278271814885, "grad_norm": 0.3779613971710205, "learning_rate": 1.871951142434055e-05, "loss": 0.4016, "step": 8772 }, { "epoch": 0.16288991285556748, "grad_norm": 0.4506412446498871, "learning_rate": 1.871894025947511e-05, "loss": 0.2935, "step": 8774 }, { "epoch": 0.1629270429929861, "grad_norm": 0.30236580967903137, "learning_rate": 1.8718368975971012e-05, "loss": 0.2349, "step": 8776 }, { "epoch": 0.16296417313040476, "grad_norm": 0.31908392906188965, "learning_rate": 1.871779757383603e-05, "loss": 0.4673, "step": 8778 }, { "epoch": 0.1630013032678234, "grad_norm": 0.3405972421169281, "learning_rate": 1.8717226053077933e-05, "loss": 0.536, "step": 8780 }, { "epoch": 0.16303843340524204, "grad_norm": 0.3148418068885803, "learning_rate": 1.87166544137045e-05, "loss": 0.4419, "step": 8782 }, { "epoch": 0.16307556354266067, "grad_norm": 0.31018543243408203, "learning_rate": 1.871608265572351e-05, "loss": 0.3121, "step": 8784 }, { "epoch": 0.1631126936800793, "grad_norm": 0.41335955262184143, "learning_rate": 1.8715510779142746e-05, "loss": 0.261, "step": 8786 }, { "epoch": 0.16314982381749796, "grad_norm": 0.4860115051269531, "learning_rate": 1.871493878396998e-05, "loss": 0.4225, "step": 8788 }, { "epoch": 0.16318695395491659, "grad_norm": 0.32487449049949646, "learning_rate": 1.8714366670213005e-05, "loss": 0.4146, "step": 8790 }, { "epoch": 0.16322408409233521, "grad_norm": 0.37052568793296814, "learning_rate": 1.8713794437879605e-05, "loss": 0.286, "step": 8792 }, { "epoch": 0.16326121422975387, "grad_norm": 0.34378519654273987, "learning_rate": 1.8713222086977557e-05, "loss": 0.3397, "step": 8794 }, { "epoch": 0.1632983443671725, "grad_norm": 0.3928300142288208, "learning_rate": 1.871264961751466e-05, "loss": 0.4875, "step": 8796 }, { "epoch": 0.16333547450459115, "grad_norm": 0.26039910316467285, "learning_rate": 1.8712077029498702e-05, "loss": 0.362, "step": 8798 }, { "epoch": 0.16337260464200978, "grad_norm": 0.4242274761199951, "learning_rate": 1.8711504322937464e-05, "loss": 0.393, "step": 8800 }, { "epoch": 0.1634097347794284, "grad_norm": 0.3434769809246063, "learning_rate": 1.8710931497838748e-05, "loss": 0.377, "step": 8802 }, { "epoch": 0.16344686491684707, "grad_norm": 0.32703617215156555, "learning_rate": 1.8710358554210352e-05, "loss": 0.2256, "step": 8804 }, { "epoch": 0.1634839950542657, "grad_norm": 0.3237664997577667, "learning_rate": 1.870978549206006e-05, "loss": 0.3185, "step": 8806 }, { "epoch": 0.16352112519168432, "grad_norm": 0.40504515171051025, "learning_rate": 1.870921231139568e-05, "loss": 0.3962, "step": 8808 }, { "epoch": 0.16355825532910298, "grad_norm": 0.3916057050228119, "learning_rate": 1.870863901222501e-05, "loss": 0.3832, "step": 8810 }, { "epoch": 0.1635953854665216, "grad_norm": 0.4852084517478943, "learning_rate": 1.8708065594555843e-05, "loss": 0.4308, "step": 8812 }, { "epoch": 0.16363251560394024, "grad_norm": 0.3387133777141571, "learning_rate": 1.8707492058395988e-05, "loss": 0.2143, "step": 8814 }, { "epoch": 0.1636696457413589, "grad_norm": 0.3047282099723816, "learning_rate": 1.8706918403753248e-05, "loss": 0.2391, "step": 8816 }, { "epoch": 0.16370677587877752, "grad_norm": 0.3141844570636749, "learning_rate": 1.870634463063543e-05, "loss": 0.3684, "step": 8818 }, { "epoch": 0.16374390601619618, "grad_norm": 0.5523303747177124, "learning_rate": 1.8705770739050342e-05, "loss": 0.452, "step": 8820 }, { "epoch": 0.1637810361536148, "grad_norm": 0.3234608769416809, "learning_rate": 1.8705196729005788e-05, "loss": 0.3602, "step": 8822 }, { "epoch": 0.16381816629103343, "grad_norm": 0.33608299493789673, "learning_rate": 1.8704622600509582e-05, "loss": 0.2711, "step": 8824 }, { "epoch": 0.1638552964284521, "grad_norm": 0.32217055559158325, "learning_rate": 1.8704048353569537e-05, "loss": 0.3961, "step": 8826 }, { "epoch": 0.16389242656587072, "grad_norm": 0.3437250554561615, "learning_rate": 1.8703473988193466e-05, "loss": 0.2902, "step": 8828 }, { "epoch": 0.16392955670328935, "grad_norm": 0.36695992946624756, "learning_rate": 1.870289950438918e-05, "loss": 0.312, "step": 8830 }, { "epoch": 0.163966686840708, "grad_norm": 0.3539976179599762, "learning_rate": 1.8702324902164507e-05, "loss": 0.2968, "step": 8832 }, { "epoch": 0.16400381697812663, "grad_norm": 0.38439294695854187, "learning_rate": 1.8701750181527252e-05, "loss": 0.2262, "step": 8834 }, { "epoch": 0.1640409471155453, "grad_norm": 0.39139512181282043, "learning_rate": 1.8701175342485244e-05, "loss": 0.1911, "step": 8836 }, { "epoch": 0.16407807725296392, "grad_norm": 0.3429873585700989, "learning_rate": 1.8700600385046302e-05, "loss": 0.2391, "step": 8838 }, { "epoch": 0.16411520739038254, "grad_norm": 0.42418980598449707, "learning_rate": 1.8700025309218253e-05, "loss": 0.2896, "step": 8840 }, { "epoch": 0.1641523375278012, "grad_norm": 0.295303612947464, "learning_rate": 1.8699450115008917e-05, "loss": 0.3443, "step": 8842 }, { "epoch": 0.16418946766521983, "grad_norm": 0.30014926195144653, "learning_rate": 1.8698874802426122e-05, "loss": 0.2854, "step": 8844 }, { "epoch": 0.16422659780263846, "grad_norm": 0.270679235458374, "learning_rate": 1.86982993714777e-05, "loss": 0.3117, "step": 8846 }, { "epoch": 0.1642637279400571, "grad_norm": 0.37993142008781433, "learning_rate": 1.8697723822171475e-05, "loss": 0.4493, "step": 8848 }, { "epoch": 0.16430085807747574, "grad_norm": 0.5098891258239746, "learning_rate": 1.869714815451528e-05, "loss": 0.415, "step": 8850 }, { "epoch": 0.16433798821489437, "grad_norm": 0.2503422796726227, "learning_rate": 1.8696572368516954e-05, "loss": 0.368, "step": 8852 }, { "epoch": 0.16437511835231303, "grad_norm": 0.3472348153591156, "learning_rate": 1.8695996464184325e-05, "loss": 0.2042, "step": 8854 }, { "epoch": 0.16441224848973165, "grad_norm": 0.36320799589157104, "learning_rate": 1.8695420441525233e-05, "loss": 0.3655, "step": 8856 }, { "epoch": 0.1644493786271503, "grad_norm": 0.3802759349346161, "learning_rate": 1.8694844300547513e-05, "loss": 0.2609, "step": 8858 }, { "epoch": 0.16448650876456894, "grad_norm": 0.4180498719215393, "learning_rate": 1.8694268041259006e-05, "loss": 0.2869, "step": 8860 }, { "epoch": 0.16452363890198757, "grad_norm": 0.30235522985458374, "learning_rate": 1.8693691663667555e-05, "loss": 0.2089, "step": 8862 }, { "epoch": 0.16456076903940622, "grad_norm": 0.39410197734832764, "learning_rate": 1.8693115167780998e-05, "loss": 0.1876, "step": 8864 }, { "epoch": 0.16459789917682485, "grad_norm": 0.3708670735359192, "learning_rate": 1.8692538553607183e-05, "loss": 0.2305, "step": 8866 }, { "epoch": 0.16463502931424348, "grad_norm": 0.5014273524284363, "learning_rate": 1.869196182115396e-05, "loss": 0.3745, "step": 8868 }, { "epoch": 0.16467215945166214, "grad_norm": 0.39320728182792664, "learning_rate": 1.8691384970429166e-05, "loss": 0.3298, "step": 8870 }, { "epoch": 0.16470928958908077, "grad_norm": 0.5245968103408813, "learning_rate": 1.869080800144066e-05, "loss": 0.3161, "step": 8872 }, { "epoch": 0.16474641972649942, "grad_norm": 0.3268929123878479, "learning_rate": 1.8690230914196292e-05, "loss": 0.2662, "step": 8874 }, { "epoch": 0.16478354986391805, "grad_norm": 0.3765755295753479, "learning_rate": 1.8689653708703905e-05, "loss": 0.2974, "step": 8876 }, { "epoch": 0.16482068000133668, "grad_norm": 0.4465888738632202, "learning_rate": 1.8689076384971363e-05, "loss": 0.3252, "step": 8878 }, { "epoch": 0.16485781013875533, "grad_norm": 0.3358520567417145, "learning_rate": 1.868849894300652e-05, "loss": 0.3511, "step": 8880 }, { "epoch": 0.16489494027617396, "grad_norm": 0.3509506583213806, "learning_rate": 1.8687921382817227e-05, "loss": 0.6462, "step": 8882 }, { "epoch": 0.1649320704135926, "grad_norm": 0.35972586274147034, "learning_rate": 1.868734370441135e-05, "loss": 0.2916, "step": 8884 }, { "epoch": 0.16496920055101125, "grad_norm": 0.32858628034591675, "learning_rate": 1.8686765907796746e-05, "loss": 0.2517, "step": 8886 }, { "epoch": 0.16500633068842988, "grad_norm": 0.9542116522789001, "learning_rate": 1.8686187992981276e-05, "loss": 0.4535, "step": 8888 }, { "epoch": 0.1650434608258485, "grad_norm": 0.40352657437324524, "learning_rate": 1.8685609959972808e-05, "loss": 0.2336, "step": 8890 }, { "epoch": 0.16508059096326716, "grad_norm": 0.4392412602901459, "learning_rate": 1.8685031808779204e-05, "loss": 0.3026, "step": 8892 }, { "epoch": 0.1651177211006858, "grad_norm": 0.38800475001335144, "learning_rate": 1.8684453539408333e-05, "loss": 0.3553, "step": 8894 }, { "epoch": 0.16515485123810444, "grad_norm": 0.35486483573913574, "learning_rate": 1.868387515186806e-05, "loss": 0.2214, "step": 8896 }, { "epoch": 0.16519198137552307, "grad_norm": 0.397891104221344, "learning_rate": 1.868329664616626e-05, "loss": 0.2955, "step": 8898 }, { "epoch": 0.1652291115129417, "grad_norm": 0.29122766852378845, "learning_rate": 1.8682718022310803e-05, "loss": 0.5533, "step": 8900 }, { "epoch": 0.16526624165036036, "grad_norm": 0.3675747215747833, "learning_rate": 1.8682139280309557e-05, "loss": 0.2951, "step": 8902 }, { "epoch": 0.16530337178777899, "grad_norm": 0.4462049901485443, "learning_rate": 1.8681560420170407e-05, "loss": 0.372, "step": 8904 }, { "epoch": 0.1653405019251976, "grad_norm": 0.3874303698539734, "learning_rate": 1.868098144190122e-05, "loss": 0.2961, "step": 8906 }, { "epoch": 0.16537763206261627, "grad_norm": 0.3423820436000824, "learning_rate": 1.868040234550988e-05, "loss": 0.4155, "step": 8908 }, { "epoch": 0.1654147622000349, "grad_norm": 0.5025599002838135, "learning_rate": 1.8679823131004264e-05, "loss": 0.3226, "step": 8910 }, { "epoch": 0.16545189233745355, "grad_norm": 0.48700037598609924, "learning_rate": 1.8679243798392253e-05, "loss": 0.4387, "step": 8912 }, { "epoch": 0.16548902247487218, "grad_norm": 0.4817005395889282, "learning_rate": 1.8678664347681732e-05, "loss": 0.3622, "step": 8914 }, { "epoch": 0.1655261526122908, "grad_norm": 0.5320210456848145, "learning_rate": 1.8678084778880584e-05, "loss": 0.3755, "step": 8916 }, { "epoch": 0.16556328274970947, "grad_norm": 0.4524260461330414, "learning_rate": 1.8677505091996698e-05, "loss": 0.2338, "step": 8918 }, { "epoch": 0.1656004128871281, "grad_norm": 0.3629036843776703, "learning_rate": 1.867692528703796e-05, "loss": 0.1626, "step": 8920 }, { "epoch": 0.16563754302454672, "grad_norm": 0.36559760570526123, "learning_rate": 1.8676345364012256e-05, "loss": 0.2821, "step": 8922 }, { "epoch": 0.16567467316196538, "grad_norm": 0.35414788126945496, "learning_rate": 1.8675765322927485e-05, "loss": 0.3282, "step": 8924 }, { "epoch": 0.165711803299384, "grad_norm": 0.23960210382938385, "learning_rate": 1.8675185163791533e-05, "loss": 0.2255, "step": 8926 }, { "epoch": 0.16574893343680264, "grad_norm": 0.4486822783946991, "learning_rate": 1.8674604886612293e-05, "loss": 0.5776, "step": 8928 }, { "epoch": 0.1657860635742213, "grad_norm": 0.35357430577278137, "learning_rate": 1.8674024491397667e-05, "loss": 0.3932, "step": 8930 }, { "epoch": 0.16582319371163992, "grad_norm": 0.4018774628639221, "learning_rate": 1.867344397815555e-05, "loss": 0.3245, "step": 8932 }, { "epoch": 0.16586032384905858, "grad_norm": 0.29945969581604004, "learning_rate": 1.867286334689384e-05, "loss": 0.4212, "step": 8934 }, { "epoch": 0.1658974539864772, "grad_norm": 0.47677671909332275, "learning_rate": 1.8672282597620438e-05, "loss": 0.2737, "step": 8936 }, { "epoch": 0.16593458412389583, "grad_norm": 0.40573689341545105, "learning_rate": 1.8671701730343244e-05, "loss": 0.3739, "step": 8938 }, { "epoch": 0.1659717142613145, "grad_norm": 0.3261779546737671, "learning_rate": 1.8671120745070167e-05, "loss": 0.3029, "step": 8940 }, { "epoch": 0.16600884439873312, "grad_norm": 0.3106573224067688, "learning_rate": 1.867053964180911e-05, "loss": 0.4091, "step": 8942 }, { "epoch": 0.16604597453615175, "grad_norm": 0.2213493138551712, "learning_rate": 1.8669958420567977e-05, "loss": 0.1955, "step": 8944 }, { "epoch": 0.1660831046735704, "grad_norm": 0.423654168844223, "learning_rate": 1.866937708135468e-05, "loss": 0.3466, "step": 8946 }, { "epoch": 0.16612023481098903, "grad_norm": 0.3835442066192627, "learning_rate": 1.8668795624177128e-05, "loss": 0.2034, "step": 8948 }, { "epoch": 0.1661573649484077, "grad_norm": 0.3940131366252899, "learning_rate": 1.866821404904324e-05, "loss": 0.2347, "step": 8950 }, { "epoch": 0.16619449508582632, "grad_norm": 0.3909975290298462, "learning_rate": 1.8667632355960915e-05, "loss": 0.1534, "step": 8952 }, { "epoch": 0.16623162522324494, "grad_norm": 0.5471821427345276, "learning_rate": 1.866705054493808e-05, "loss": 0.1818, "step": 8954 }, { "epoch": 0.1662687553606636, "grad_norm": 0.38158008456230164, "learning_rate": 1.8666468615982648e-05, "loss": 0.2384, "step": 8956 }, { "epoch": 0.16630588549808223, "grad_norm": 0.3104810118675232, "learning_rate": 1.8665886569102537e-05, "loss": 0.3025, "step": 8958 }, { "epoch": 0.16634301563550086, "grad_norm": 0.35220158100128174, "learning_rate": 1.8665304404305665e-05, "loss": 0.293, "step": 8960 }, { "epoch": 0.1663801457729195, "grad_norm": 0.3959876298904419, "learning_rate": 1.866472212159996e-05, "loss": 0.3165, "step": 8962 }, { "epoch": 0.16641727591033814, "grad_norm": 0.2942659258842468, "learning_rate": 1.8664139720993337e-05, "loss": 0.4532, "step": 8964 }, { "epoch": 0.16645440604775677, "grad_norm": 0.417222261428833, "learning_rate": 1.8663557202493724e-05, "loss": 0.4038, "step": 8966 }, { "epoch": 0.16649153618517543, "grad_norm": 0.44982826709747314, "learning_rate": 1.8662974566109052e-05, "loss": 0.3952, "step": 8968 }, { "epoch": 0.16652866632259405, "grad_norm": 0.2330973595380783, "learning_rate": 1.866239181184724e-05, "loss": 0.2441, "step": 8970 }, { "epoch": 0.1665657964600127, "grad_norm": 0.2412310093641281, "learning_rate": 1.8661808939716227e-05, "loss": 0.3406, "step": 8972 }, { "epoch": 0.16660292659743134, "grad_norm": 0.5149389505386353, "learning_rate": 1.8661225949723937e-05, "loss": 0.3873, "step": 8974 }, { "epoch": 0.16664005673484997, "grad_norm": 0.5088759660720825, "learning_rate": 1.8660642841878305e-05, "loss": 0.4967, "step": 8976 }, { "epoch": 0.16667718687226862, "grad_norm": 0.4076996445655823, "learning_rate": 1.8660059616187266e-05, "loss": 0.3042, "step": 8978 }, { "epoch": 0.16671431700968725, "grad_norm": 0.294471800327301, "learning_rate": 1.865947627265875e-05, "loss": 0.5449, "step": 8980 }, { "epoch": 0.16675144714710588, "grad_norm": 0.3518579304218292, "learning_rate": 1.865889281130071e-05, "loss": 0.2504, "step": 8982 }, { "epoch": 0.16678857728452454, "grad_norm": 0.4149520993232727, "learning_rate": 1.8658309232121067e-05, "loss": 0.466, "step": 8984 }, { "epoch": 0.16682570742194316, "grad_norm": 0.3717808425426483, "learning_rate": 1.8657725535127777e-05, "loss": 0.298, "step": 8986 }, { "epoch": 0.16686283755936182, "grad_norm": 0.587266743183136, "learning_rate": 1.8657141720328772e-05, "loss": 0.3896, "step": 8988 }, { "epoch": 0.16689996769678045, "grad_norm": 0.5814943313598633, "learning_rate": 1.8656557787731997e-05, "loss": 0.6717, "step": 8990 }, { "epoch": 0.16693709783419908, "grad_norm": 0.3730618953704834, "learning_rate": 1.8655973737345404e-05, "loss": 0.293, "step": 8992 }, { "epoch": 0.16697422797161773, "grad_norm": 0.43233779072761536, "learning_rate": 1.8655389569176932e-05, "loss": 0.2715, "step": 8994 }, { "epoch": 0.16701135810903636, "grad_norm": 0.6507939696311951, "learning_rate": 1.865480528323454e-05, "loss": 0.3389, "step": 8996 }, { "epoch": 0.167048488246455, "grad_norm": 0.29440972208976746, "learning_rate": 1.8654220879526164e-05, "loss": 0.1907, "step": 8998 }, { "epoch": 0.16708561838387365, "grad_norm": 0.2581784129142761, "learning_rate": 1.865363635805977e-05, "loss": 0.3709, "step": 9000 }, { "epoch": 0.16712274852129227, "grad_norm": 0.2679079473018646, "learning_rate": 1.8653051718843305e-05, "loss": 0.2634, "step": 9002 }, { "epoch": 0.1671598786587109, "grad_norm": 0.390916109085083, "learning_rate": 1.8652466961884725e-05, "loss": 0.3326, "step": 9004 }, { "epoch": 0.16719700879612956, "grad_norm": 0.379873663187027, "learning_rate": 1.8651882087191983e-05, "loss": 0.4046, "step": 9006 }, { "epoch": 0.1672341389335482, "grad_norm": 0.28242987394332886, "learning_rate": 1.8651297094773047e-05, "loss": 0.2639, "step": 9008 }, { "epoch": 0.16727126907096684, "grad_norm": 0.2469072788953781, "learning_rate": 1.8650711984635866e-05, "loss": 0.2049, "step": 9010 }, { "epoch": 0.16730839920838547, "grad_norm": 0.335746169090271, "learning_rate": 1.865012675678841e-05, "loss": 0.3211, "step": 9012 }, { "epoch": 0.1673455293458041, "grad_norm": 0.3432886302471161, "learning_rate": 1.864954141123864e-05, "loss": 0.2362, "step": 9014 }, { "epoch": 0.16738265948322276, "grad_norm": 0.23973342776298523, "learning_rate": 1.864895594799452e-05, "loss": 0.3048, "step": 9016 }, { "epoch": 0.16741978962064138, "grad_norm": 0.2192869335412979, "learning_rate": 1.8648370367064012e-05, "loss": 0.3243, "step": 9018 }, { "epoch": 0.16745691975806, "grad_norm": 0.27877163887023926, "learning_rate": 1.864778466845509e-05, "loss": 0.1152, "step": 9020 }, { "epoch": 0.16749404989547867, "grad_norm": 0.3916303217411041, "learning_rate": 1.8647198852175723e-05, "loss": 0.2086, "step": 9022 }, { "epoch": 0.1675311800328973, "grad_norm": 0.2963021993637085, "learning_rate": 1.8646612918233878e-05, "loss": 0.2917, "step": 9024 }, { "epoch": 0.16756831017031595, "grad_norm": 0.20817649364471436, "learning_rate": 1.8646026866637533e-05, "loss": 0.3202, "step": 9026 }, { "epoch": 0.16760544030773458, "grad_norm": 0.35939204692840576, "learning_rate": 1.864544069739466e-05, "loss": 0.4557, "step": 9028 }, { "epoch": 0.1676425704451532, "grad_norm": 0.34954309463500977, "learning_rate": 1.8644854410513236e-05, "loss": 0.3979, "step": 9030 }, { "epoch": 0.16767970058257187, "grad_norm": 0.31985536217689514, "learning_rate": 1.8644268006001235e-05, "loss": 0.2936, "step": 9032 }, { "epoch": 0.1677168307199905, "grad_norm": 0.3944746255874634, "learning_rate": 1.8643681483866643e-05, "loss": 0.4014, "step": 9034 }, { "epoch": 0.16775396085740912, "grad_norm": 0.42706286907196045, "learning_rate": 1.8643094844117434e-05, "loss": 0.291, "step": 9036 }, { "epoch": 0.16779109099482778, "grad_norm": 0.7891225218772888, "learning_rate": 1.8642508086761593e-05, "loss": 0.1962, "step": 9038 }, { "epoch": 0.1678282211322464, "grad_norm": 0.41168656945228577, "learning_rate": 1.8641921211807107e-05, "loss": 0.2771, "step": 9040 }, { "epoch": 0.16786535126966504, "grad_norm": 0.4221994876861572, "learning_rate": 1.8641334219261956e-05, "loss": 0.2993, "step": 9042 }, { "epoch": 0.1679024814070837, "grad_norm": 0.39930611848831177, "learning_rate": 1.864074710913413e-05, "loss": 0.1684, "step": 9044 }, { "epoch": 0.16793961154450232, "grad_norm": 0.3187035620212555, "learning_rate": 1.8640159881431622e-05, "loss": 0.3943, "step": 9046 }, { "epoch": 0.16797674168192098, "grad_norm": 0.4324028193950653, "learning_rate": 1.8639572536162417e-05, "loss": 0.3532, "step": 9048 }, { "epoch": 0.1680138718193396, "grad_norm": 0.3073504865169525, "learning_rate": 1.8638985073334504e-05, "loss": 0.2467, "step": 9050 }, { "epoch": 0.16805100195675823, "grad_norm": 0.3276194930076599, "learning_rate": 1.8638397492955885e-05, "loss": 0.1607, "step": 9052 }, { "epoch": 0.1680881320941769, "grad_norm": 0.39510953426361084, "learning_rate": 1.8637809795034548e-05, "loss": 0.2899, "step": 9054 }, { "epoch": 0.16812526223159552, "grad_norm": 0.3415541648864746, "learning_rate": 1.8637221979578495e-05, "loss": 0.2519, "step": 9056 }, { "epoch": 0.16816239236901415, "grad_norm": 0.36961629986763, "learning_rate": 1.8636634046595723e-05, "loss": 0.3486, "step": 9058 }, { "epoch": 0.1681995225064328, "grad_norm": 0.3302825093269348, "learning_rate": 1.863604599609423e-05, "loss": 0.1825, "step": 9060 }, { "epoch": 0.16823665264385143, "grad_norm": 0.4261574447154999, "learning_rate": 1.863545782808202e-05, "loss": 0.3456, "step": 9062 }, { "epoch": 0.1682737827812701, "grad_norm": 0.3397577106952667, "learning_rate": 1.8634869542567092e-05, "loss": 0.2923, "step": 9064 }, { "epoch": 0.16831091291868872, "grad_norm": 0.32736897468566895, "learning_rate": 1.863428113955746e-05, "loss": 0.5217, "step": 9066 }, { "epoch": 0.16834804305610734, "grad_norm": 0.28635403513908386, "learning_rate": 1.863369261906112e-05, "loss": 0.3189, "step": 9068 }, { "epoch": 0.168385173193526, "grad_norm": 0.3244496285915375, "learning_rate": 1.8633103981086083e-05, "loss": 0.2182, "step": 9070 }, { "epoch": 0.16842230333094463, "grad_norm": 0.3476366698741913, "learning_rate": 1.8632515225640364e-05, "loss": 0.3319, "step": 9072 }, { "epoch": 0.16845943346836326, "grad_norm": 0.34608879685401917, "learning_rate": 1.8631926352731968e-05, "loss": 0.3374, "step": 9074 }, { "epoch": 0.1684965636057819, "grad_norm": 0.3689146935939789, "learning_rate": 1.863133736236891e-05, "loss": 0.447, "step": 9076 }, { "epoch": 0.16853369374320054, "grad_norm": 0.37709754705429077, "learning_rate": 1.8630748254559207e-05, "loss": 0.5672, "step": 9078 }, { "epoch": 0.16857082388061917, "grad_norm": 0.3143155574798584, "learning_rate": 1.8630159029310868e-05, "loss": 0.1586, "step": 9080 }, { "epoch": 0.16860795401803783, "grad_norm": 0.3054594099521637, "learning_rate": 1.8629569686631918e-05, "loss": 0.381, "step": 9082 }, { "epoch": 0.16864508415545645, "grad_norm": 0.31075263023376465, "learning_rate": 1.8628980226530372e-05, "loss": 0.3457, "step": 9084 }, { "epoch": 0.1686822142928751, "grad_norm": 0.2756892442703247, "learning_rate": 1.862839064901425e-05, "loss": 0.232, "step": 9086 }, { "epoch": 0.16871934443029374, "grad_norm": 0.3617435097694397, "learning_rate": 1.862780095409158e-05, "loss": 0.3282, "step": 9088 }, { "epoch": 0.16875647456771237, "grad_norm": 0.3648868501186371, "learning_rate": 1.862721114177038e-05, "loss": 0.3284, "step": 9090 }, { "epoch": 0.16879360470513102, "grad_norm": 0.23681136965751648, "learning_rate": 1.862662121205868e-05, "loss": 0.1978, "step": 9092 }, { "epoch": 0.16883073484254965, "grad_norm": 0.40135663747787476, "learning_rate": 1.86260311649645e-05, "loss": 0.3602, "step": 9094 }, { "epoch": 0.16886786497996828, "grad_norm": 0.38004037737846375, "learning_rate": 1.862544100049588e-05, "loss": 0.3164, "step": 9096 }, { "epoch": 0.16890499511738694, "grad_norm": 0.39326241612434387, "learning_rate": 1.8624850718660837e-05, "loss": 0.2491, "step": 9098 }, { "epoch": 0.16894212525480556, "grad_norm": 0.4096834361553192, "learning_rate": 1.8624260319467415e-05, "loss": 0.2213, "step": 9100 }, { "epoch": 0.16897925539222422, "grad_norm": 0.372112900018692, "learning_rate": 1.8623669802923647e-05, "loss": 0.372, "step": 9102 }, { "epoch": 0.16901638552964285, "grad_norm": 0.30282333493232727, "learning_rate": 1.8623079169037558e-05, "loss": 0.3609, "step": 9104 }, { "epoch": 0.16905351566706148, "grad_norm": 0.3284953534603119, "learning_rate": 1.862248841781719e-05, "loss": 0.3442, "step": 9106 }, { "epoch": 0.16909064580448013, "grad_norm": 0.28249475359916687, "learning_rate": 1.8621897549270584e-05, "loss": 0.2931, "step": 9108 }, { "epoch": 0.16912777594189876, "grad_norm": 0.4184150695800781, "learning_rate": 1.8621306563405778e-05, "loss": 0.2894, "step": 9110 }, { "epoch": 0.1691649060793174, "grad_norm": 0.30237600207328796, "learning_rate": 1.8620715460230812e-05, "loss": 0.255, "step": 9112 }, { "epoch": 0.16920203621673605, "grad_norm": 0.35291510820388794, "learning_rate": 1.8620124239753733e-05, "loss": 0.3163, "step": 9114 }, { "epoch": 0.16923916635415467, "grad_norm": 0.3508572280406952, "learning_rate": 1.861953290198258e-05, "loss": 0.3648, "step": 9116 }, { "epoch": 0.1692762964915733, "grad_norm": 0.2748846709728241, "learning_rate": 1.8618941446925406e-05, "loss": 0.1888, "step": 9118 }, { "epoch": 0.16931342662899196, "grad_norm": 0.43892136216163635, "learning_rate": 1.8618349874590254e-05, "loss": 0.2888, "step": 9120 }, { "epoch": 0.1693505567664106, "grad_norm": 0.2610081732273102, "learning_rate": 1.8617758184985174e-05, "loss": 0.3631, "step": 9122 }, { "epoch": 0.16938768690382924, "grad_norm": 0.44517678022384644, "learning_rate": 1.861716637811822e-05, "loss": 0.3621, "step": 9124 }, { "epoch": 0.16942481704124787, "grad_norm": 0.3575249910354614, "learning_rate": 1.8616574453997445e-05, "loss": 0.2717, "step": 9126 }, { "epoch": 0.1694619471786665, "grad_norm": 0.35729482769966125, "learning_rate": 1.8615982412630898e-05, "loss": 0.263, "step": 9128 }, { "epoch": 0.16949907731608516, "grad_norm": 0.3210984766483307, "learning_rate": 1.861539025402664e-05, "loss": 0.2798, "step": 9130 }, { "epoch": 0.16953620745350378, "grad_norm": 0.32401925325393677, "learning_rate": 1.8614797978192725e-05, "loss": 0.1603, "step": 9132 }, { "epoch": 0.1695733375909224, "grad_norm": 0.488510400056839, "learning_rate": 1.8614205585137217e-05, "loss": 0.3016, "step": 9134 }, { "epoch": 0.16961046772834107, "grad_norm": 0.42595547437667847, "learning_rate": 1.8613613074868168e-05, "loss": 0.4085, "step": 9136 }, { "epoch": 0.1696475978657597, "grad_norm": 0.6518195271492004, "learning_rate": 1.861302044739365e-05, "loss": 0.2394, "step": 9138 }, { "epoch": 0.16968472800317835, "grad_norm": 0.27355557680130005, "learning_rate": 1.8612427702721724e-05, "loss": 0.2291, "step": 9140 }, { "epoch": 0.16972185814059698, "grad_norm": 0.3529863953590393, "learning_rate": 1.861183484086045e-05, "loss": 0.2818, "step": 9142 }, { "epoch": 0.1697589882780156, "grad_norm": 0.48271316289901733, "learning_rate": 1.8611241861817902e-05, "loss": 0.3834, "step": 9144 }, { "epoch": 0.16979611841543427, "grad_norm": 0.2798055112361908, "learning_rate": 1.8610648765602143e-05, "loss": 0.2391, "step": 9146 }, { "epoch": 0.1698332485528529, "grad_norm": 0.45579269528388977, "learning_rate": 1.861005555222125e-05, "loss": 0.4511, "step": 9148 }, { "epoch": 0.16987037869027152, "grad_norm": 0.5225679874420166, "learning_rate": 1.860946222168329e-05, "loss": 0.4077, "step": 9150 }, { "epoch": 0.16990750882769018, "grad_norm": 0.4053509831428528, "learning_rate": 1.8608868773996335e-05, "loss": 0.3867, "step": 9152 }, { "epoch": 0.1699446389651088, "grad_norm": 0.37855228781700134, "learning_rate": 1.8608275209168465e-05, "loss": 0.276, "step": 9154 }, { "epoch": 0.16998176910252744, "grad_norm": 0.3076510429382324, "learning_rate": 1.8607681527207756e-05, "loss": 0.3981, "step": 9156 }, { "epoch": 0.1700188992399461, "grad_norm": 0.41040322184562683, "learning_rate": 1.860708772812228e-05, "loss": 0.5165, "step": 9158 }, { "epoch": 0.17005602937736472, "grad_norm": 0.21080558001995087, "learning_rate": 1.8606493811920124e-05, "loss": 0.3915, "step": 9160 }, { "epoch": 0.17009315951478338, "grad_norm": 0.3336891829967499, "learning_rate": 1.8605899778609363e-05, "loss": 0.3784, "step": 9162 }, { "epoch": 0.170130289652202, "grad_norm": 0.3784641623497009, "learning_rate": 1.860530562819809e-05, "loss": 0.1508, "step": 9164 }, { "epoch": 0.17016741978962063, "grad_norm": 0.35133096575737, "learning_rate": 1.860471136069438e-05, "loss": 0.5316, "step": 9166 }, { "epoch": 0.1702045499270393, "grad_norm": 0.3064520061016083, "learning_rate": 1.8604116976106322e-05, "loss": 0.3887, "step": 9168 }, { "epoch": 0.17024168006445792, "grad_norm": 0.2978232502937317, "learning_rate": 1.8603522474442e-05, "loss": 0.3191, "step": 9170 }, { "epoch": 0.17027881020187655, "grad_norm": 0.32241135835647583, "learning_rate": 1.8602927855709514e-05, "loss": 0.3096, "step": 9172 }, { "epoch": 0.1703159403392952, "grad_norm": 0.3563089966773987, "learning_rate": 1.8602333119916948e-05, "loss": 0.3661, "step": 9174 }, { "epoch": 0.17035307047671383, "grad_norm": 0.3349653482437134, "learning_rate": 1.8601738267072394e-05, "loss": 0.2875, "step": 9176 }, { "epoch": 0.1703902006141325, "grad_norm": 0.25343212485313416, "learning_rate": 1.8601143297183947e-05, "loss": 0.2169, "step": 9178 }, { "epoch": 0.17042733075155111, "grad_norm": 0.3305390179157257, "learning_rate": 1.8600548210259704e-05, "loss": 0.2846, "step": 9180 }, { "epoch": 0.17046446088896974, "grad_norm": 0.34964269399642944, "learning_rate": 1.859995300630776e-05, "loss": 0.3253, "step": 9182 }, { "epoch": 0.1705015910263884, "grad_norm": 0.29429739713668823, "learning_rate": 1.859935768533622e-05, "loss": 0.3509, "step": 9184 }, { "epoch": 0.17053872116380703, "grad_norm": 0.36354726552963257, "learning_rate": 1.8598762247353175e-05, "loss": 0.263, "step": 9186 }, { "epoch": 0.17057585130122566, "grad_norm": 0.42745673656463623, "learning_rate": 1.859816669236673e-05, "loss": 0.4561, "step": 9188 }, { "epoch": 0.1706129814386443, "grad_norm": 0.3214625120162964, "learning_rate": 1.8597571020384997e-05, "loss": 0.2996, "step": 9190 }, { "epoch": 0.17065011157606294, "grad_norm": 0.39977049827575684, "learning_rate": 1.8596975231416072e-05, "loss": 0.1782, "step": 9192 }, { "epoch": 0.17068724171348157, "grad_norm": 0.25968509912490845, "learning_rate": 1.8596379325468066e-05, "loss": 0.2999, "step": 9194 }, { "epoch": 0.17072437185090023, "grad_norm": 0.29702845215797424, "learning_rate": 1.8595783302549085e-05, "loss": 0.2405, "step": 9196 }, { "epoch": 0.17076150198831885, "grad_norm": 0.24404948949813843, "learning_rate": 1.8595187162667242e-05, "loss": 0.203, "step": 9198 }, { "epoch": 0.1707986321257375, "grad_norm": 0.2870927155017853, "learning_rate": 1.8594590905830646e-05, "loss": 0.1675, "step": 9200 }, { "epoch": 0.17083576226315614, "grad_norm": 0.3394530117511749, "learning_rate": 1.8593994532047414e-05, "loss": 0.3978, "step": 9202 }, { "epoch": 0.17087289240057477, "grad_norm": 0.39679962396621704, "learning_rate": 1.8593398041325655e-05, "loss": 0.3616, "step": 9204 }, { "epoch": 0.17091002253799342, "grad_norm": 0.3744213581085205, "learning_rate": 1.859280143367349e-05, "loss": 0.3222, "step": 9206 }, { "epoch": 0.17094715267541205, "grad_norm": 0.3758876621723175, "learning_rate": 1.8592204709099038e-05, "loss": 0.4096, "step": 9208 }, { "epoch": 0.17098428281283068, "grad_norm": 0.31115368008613586, "learning_rate": 1.8591607867610416e-05, "loss": 0.2859, "step": 9210 }, { "epoch": 0.17102141295024934, "grad_norm": 0.40578708052635193, "learning_rate": 1.8591010909215743e-05, "loss": 0.2259, "step": 9212 }, { "epoch": 0.17105854308766796, "grad_norm": 0.4514785408973694, "learning_rate": 1.859041383392315e-05, "loss": 0.3643, "step": 9214 }, { "epoch": 0.17109567322508662, "grad_norm": 0.3591819405555725, "learning_rate": 1.858981664174075e-05, "loss": 0.4375, "step": 9216 }, { "epoch": 0.17113280336250525, "grad_norm": 0.3094094693660736, "learning_rate": 1.858921933267668e-05, "loss": 0.3128, "step": 9218 }, { "epoch": 0.17116993349992388, "grad_norm": 0.3282987177371979, "learning_rate": 1.858862190673906e-05, "loss": 0.2824, "step": 9220 }, { "epoch": 0.17120706363734253, "grad_norm": 0.3291833698749542, "learning_rate": 1.8588024363936018e-05, "loss": 0.4219, "step": 9222 }, { "epoch": 0.17124419377476116, "grad_norm": 0.3618098497390747, "learning_rate": 1.8587426704275694e-05, "loss": 0.2411, "step": 9224 }, { "epoch": 0.1712813239121798, "grad_norm": 0.44711795449256897, "learning_rate": 1.858682892776621e-05, "loss": 0.4277, "step": 9226 }, { "epoch": 0.17131845404959845, "grad_norm": 0.2986961901187897, "learning_rate": 1.858623103441571e-05, "loss": 0.4113, "step": 9228 }, { "epoch": 0.17135558418701707, "grad_norm": 0.3988238275051117, "learning_rate": 1.8585633024232322e-05, "loss": 0.2077, "step": 9230 }, { "epoch": 0.1713927143244357, "grad_norm": 0.3558047413825989, "learning_rate": 1.8585034897224185e-05, "loss": 0.486, "step": 9232 }, { "epoch": 0.17142984446185436, "grad_norm": 0.44968381524086, "learning_rate": 1.858443665339944e-05, "loss": 0.2421, "step": 9234 }, { "epoch": 0.171466974599273, "grad_norm": 0.3278856575489044, "learning_rate": 1.8583838292766225e-05, "loss": 0.349, "step": 9236 }, { "epoch": 0.17150410473669164, "grad_norm": 0.29003486037254333, "learning_rate": 1.8583239815332684e-05, "loss": 0.4458, "step": 9238 }, { "epoch": 0.17154123487411027, "grad_norm": 0.25758254528045654, "learning_rate": 1.8582641221106956e-05, "loss": 0.392, "step": 9240 }, { "epoch": 0.1715783650115289, "grad_norm": 0.328154981136322, "learning_rate": 1.858204251009719e-05, "loss": 0.2883, "step": 9242 }, { "epoch": 0.17161549514894756, "grad_norm": 0.32784155011177063, "learning_rate": 1.858144368231153e-05, "loss": 0.3765, "step": 9244 }, { "epoch": 0.17165262528636618, "grad_norm": 0.3521912395954132, "learning_rate": 1.858084473775813e-05, "loss": 0.2789, "step": 9246 }, { "epoch": 0.1716897554237848, "grad_norm": 0.2367892861366272, "learning_rate": 1.858024567644513e-05, "loss": 0.419, "step": 9248 }, { "epoch": 0.17172688556120347, "grad_norm": 0.41230475902557373, "learning_rate": 1.857964649838069e-05, "loss": 0.2091, "step": 9250 }, { "epoch": 0.1717640156986221, "grad_norm": 0.342324435710907, "learning_rate": 1.8579047203572962e-05, "loss": 0.2902, "step": 9252 }, { "epoch": 0.17180114583604075, "grad_norm": 0.4646769165992737, "learning_rate": 1.8578447792030097e-05, "loss": 0.3464, "step": 9254 }, { "epoch": 0.17183827597345938, "grad_norm": 0.3777949810028076, "learning_rate": 1.857784826376025e-05, "loss": 0.2406, "step": 9256 }, { "epoch": 0.171875406110878, "grad_norm": 0.43188735842704773, "learning_rate": 1.857724861877159e-05, "loss": 0.1858, "step": 9258 }, { "epoch": 0.17191253624829667, "grad_norm": 0.5867998003959656, "learning_rate": 1.8576648857072262e-05, "loss": 0.3568, "step": 9260 }, { "epoch": 0.1719496663857153, "grad_norm": 0.7872071862220764, "learning_rate": 1.8576048978670433e-05, "loss": 0.3128, "step": 9262 }, { "epoch": 0.17198679652313392, "grad_norm": 0.24838481843471527, "learning_rate": 1.857544898357427e-05, "loss": 0.1923, "step": 9264 }, { "epoch": 0.17202392666055258, "grad_norm": 0.40683993697166443, "learning_rate": 1.857484887179193e-05, "loss": 0.3205, "step": 9266 }, { "epoch": 0.1720610567979712, "grad_norm": 0.3763538897037506, "learning_rate": 1.857424864333158e-05, "loss": 0.2685, "step": 9268 }, { "epoch": 0.17209818693538984, "grad_norm": 0.43546822667121887, "learning_rate": 1.857364829820139e-05, "loss": 0.2427, "step": 9270 }, { "epoch": 0.1721353170728085, "grad_norm": 0.5783769488334656, "learning_rate": 1.8573047836409526e-05, "loss": 0.5151, "step": 9272 }, { "epoch": 0.17217244721022712, "grad_norm": 0.4253793954849243, "learning_rate": 1.8572447257964162e-05, "loss": 0.3271, "step": 9274 }, { "epoch": 0.17220957734764578, "grad_norm": 0.3916061818599701, "learning_rate": 1.8571846562873468e-05, "loss": 0.3526, "step": 9276 }, { "epoch": 0.1722467074850644, "grad_norm": 0.5011011362075806, "learning_rate": 1.857124575114562e-05, "loss": 0.4339, "step": 9278 }, { "epoch": 0.17228383762248303, "grad_norm": 0.38932064175605774, "learning_rate": 1.8570644822788786e-05, "loss": 0.3874, "step": 9280 }, { "epoch": 0.1723209677599017, "grad_norm": 0.47515982389450073, "learning_rate": 1.8570043777811153e-05, "loss": 0.2489, "step": 9282 }, { "epoch": 0.17235809789732032, "grad_norm": 0.575239360332489, "learning_rate": 1.856944261622089e-05, "loss": 0.5481, "step": 9284 }, { "epoch": 0.17239522803473895, "grad_norm": 0.43876922130584717, "learning_rate": 1.8568841338026183e-05, "loss": 0.3287, "step": 9286 }, { "epoch": 0.1724323581721576, "grad_norm": 0.30297747254371643, "learning_rate": 1.8568239943235215e-05, "loss": 0.3305, "step": 9288 }, { "epoch": 0.17246948830957623, "grad_norm": 0.4020748436450958, "learning_rate": 1.8567638431856166e-05, "loss": 0.3738, "step": 9290 }, { "epoch": 0.17250661844699489, "grad_norm": 0.5123699903488159, "learning_rate": 1.8567036803897217e-05, "loss": 0.2138, "step": 9292 }, { "epoch": 0.17254374858441351, "grad_norm": 0.34203192591667175, "learning_rate": 1.8566435059366562e-05, "loss": 0.3698, "step": 9294 }, { "epoch": 0.17258087872183214, "grad_norm": 0.31366607546806335, "learning_rate": 1.8565833198272383e-05, "loss": 0.2416, "step": 9296 }, { "epoch": 0.1726180088592508, "grad_norm": 0.3314725458621979, "learning_rate": 1.856523122062287e-05, "loss": 0.399, "step": 9298 }, { "epoch": 0.17265513899666943, "grad_norm": 0.37554433941841125, "learning_rate": 1.856462912642622e-05, "loss": 0.3267, "step": 9300 }, { "epoch": 0.17269226913408806, "grad_norm": 0.311726450920105, "learning_rate": 1.8564026915690624e-05, "loss": 0.2463, "step": 9302 }, { "epoch": 0.1727293992715067, "grad_norm": 0.43950992822647095, "learning_rate": 1.856342458842427e-05, "loss": 0.5294, "step": 9304 }, { "epoch": 0.17276652940892534, "grad_norm": 0.3451043963432312, "learning_rate": 1.8562822144635356e-05, "loss": 0.4019, "step": 9306 }, { "epoch": 0.17280365954634397, "grad_norm": 0.291781485080719, "learning_rate": 1.8562219584332084e-05, "loss": 0.3192, "step": 9308 }, { "epoch": 0.17284078968376262, "grad_norm": 0.3367330729961395, "learning_rate": 1.856161690752265e-05, "loss": 0.3019, "step": 9310 }, { "epoch": 0.17287791982118125, "grad_norm": 0.25980302691459656, "learning_rate": 1.8561014114215253e-05, "loss": 0.2993, "step": 9312 }, { "epoch": 0.1729150499585999, "grad_norm": 0.34610578417778015, "learning_rate": 1.85604112044181e-05, "loss": 0.2862, "step": 9314 }, { "epoch": 0.17295218009601854, "grad_norm": 0.44273290038108826, "learning_rate": 1.855980817813939e-05, "loss": 0.4501, "step": 9316 }, { "epoch": 0.17298931023343717, "grad_norm": 0.3156743347644806, "learning_rate": 1.855920503538733e-05, "loss": 0.4705, "step": 9318 }, { "epoch": 0.17302644037085582, "grad_norm": 0.2643009424209595, "learning_rate": 1.855860177617013e-05, "loss": 0.2435, "step": 9320 }, { "epoch": 0.17306357050827445, "grad_norm": 0.35216060280799866, "learning_rate": 1.855799840049599e-05, "loss": 0.2701, "step": 9322 }, { "epoch": 0.17310070064569308, "grad_norm": 0.45826292037963867, "learning_rate": 1.8557394908373132e-05, "loss": 0.2661, "step": 9324 }, { "epoch": 0.17313783078311173, "grad_norm": 0.3726547062397003, "learning_rate": 1.8556791299809758e-05, "loss": 0.2983, "step": 9326 }, { "epoch": 0.17317496092053036, "grad_norm": 0.32667383551597595, "learning_rate": 1.855618757481409e-05, "loss": 0.4411, "step": 9328 }, { "epoch": 0.17321209105794902, "grad_norm": 0.32809633016586304, "learning_rate": 1.8555583733394332e-05, "loss": 0.2844, "step": 9330 }, { "epoch": 0.17324922119536765, "grad_norm": 0.3578638434410095, "learning_rate": 1.8554979775558708e-05, "loss": 0.5059, "step": 9332 }, { "epoch": 0.17328635133278628, "grad_norm": 0.3978993892669678, "learning_rate": 1.8554375701315438e-05, "loss": 0.6142, "step": 9334 }, { "epoch": 0.17332348147020493, "grad_norm": 0.26432088017463684, "learning_rate": 1.8553771510672734e-05, "loss": 0.2792, "step": 9336 }, { "epoch": 0.17336061160762356, "grad_norm": 0.24240751564502716, "learning_rate": 1.855316720363882e-05, "loss": 0.2333, "step": 9338 }, { "epoch": 0.1733977417450422, "grad_norm": 0.31753164529800415, "learning_rate": 1.8552562780221924e-05, "loss": 0.2924, "step": 9340 }, { "epoch": 0.17343487188246084, "grad_norm": 0.4367333650588989, "learning_rate": 1.8551958240430264e-05, "loss": 0.4729, "step": 9342 }, { "epoch": 0.17347200201987947, "grad_norm": 0.3188006579875946, "learning_rate": 1.855135358427207e-05, "loss": 0.3794, "step": 9344 }, { "epoch": 0.1735091321572981, "grad_norm": 0.38418668508529663, "learning_rate": 1.8550748811755566e-05, "loss": 0.4431, "step": 9346 }, { "epoch": 0.17354626229471676, "grad_norm": 0.35711127519607544, "learning_rate": 1.8550143922888984e-05, "loss": 0.4132, "step": 9348 }, { "epoch": 0.17358339243213539, "grad_norm": 0.4196261763572693, "learning_rate": 1.8549538917680553e-05, "loss": 0.3282, "step": 9350 }, { "epoch": 0.17362052256955404, "grad_norm": 0.4077802002429962, "learning_rate": 1.854893379613851e-05, "loss": 0.2437, "step": 9352 }, { "epoch": 0.17365765270697267, "grad_norm": 0.3750348389148712, "learning_rate": 1.854832855827108e-05, "loss": 0.3686, "step": 9354 }, { "epoch": 0.1736947828443913, "grad_norm": 0.3244530260562897, "learning_rate": 1.854772320408651e-05, "loss": 0.3124, "step": 9356 }, { "epoch": 0.17373191298180996, "grad_norm": 0.28786423802375793, "learning_rate": 1.8547117733593024e-05, "loss": 0.2356, "step": 9358 }, { "epoch": 0.17376904311922858, "grad_norm": 0.3978523015975952, "learning_rate": 1.854651214679887e-05, "loss": 0.3694, "step": 9360 }, { "epoch": 0.1738061732566472, "grad_norm": 0.32264500856399536, "learning_rate": 1.8545906443712285e-05, "loss": 0.2208, "step": 9362 }, { "epoch": 0.17384330339406587, "grad_norm": 0.41607165336608887, "learning_rate": 1.8545300624341507e-05, "loss": 0.2328, "step": 9364 }, { "epoch": 0.1738804335314845, "grad_norm": 0.5304431915283203, "learning_rate": 1.854469468869479e-05, "loss": 0.3716, "step": 9366 }, { "epoch": 0.17391756366890315, "grad_norm": 0.4470873773097992, "learning_rate": 1.854408863678037e-05, "loss": 0.2912, "step": 9368 }, { "epoch": 0.17395469380632178, "grad_norm": 0.4217226505279541, "learning_rate": 1.8543482468606498e-05, "loss": 0.3014, "step": 9370 }, { "epoch": 0.1739918239437404, "grad_norm": 0.3370877504348755, "learning_rate": 1.854287618418142e-05, "loss": 0.2875, "step": 9372 }, { "epoch": 0.17402895408115907, "grad_norm": 0.3335394263267517, "learning_rate": 1.8542269783513386e-05, "loss": 0.4537, "step": 9374 }, { "epoch": 0.1740660842185777, "grad_norm": 0.32001960277557373, "learning_rate": 1.8541663266610645e-05, "loss": 0.3942, "step": 9376 }, { "epoch": 0.17410321435599632, "grad_norm": 0.432697057723999, "learning_rate": 1.8541056633481454e-05, "loss": 0.2389, "step": 9378 }, { "epoch": 0.17414034449341498, "grad_norm": 0.35600340366363525, "learning_rate": 1.8540449884134065e-05, "loss": 0.3917, "step": 9380 }, { "epoch": 0.1741774746308336, "grad_norm": 0.47467970848083496, "learning_rate": 1.8539843018576736e-05, "loss": 0.3246, "step": 9382 }, { "epoch": 0.17421460476825223, "grad_norm": 0.3445054590702057, "learning_rate": 1.8539236036817722e-05, "loss": 0.3018, "step": 9384 }, { "epoch": 0.1742517349056709, "grad_norm": 0.3301033079624176, "learning_rate": 1.8538628938865284e-05, "loss": 0.3655, "step": 9386 }, { "epoch": 0.17428886504308952, "grad_norm": 0.32374081015586853, "learning_rate": 1.8538021724727683e-05, "loss": 0.4083, "step": 9388 }, { "epoch": 0.17432599518050818, "grad_norm": 0.4189296364784241, "learning_rate": 1.853741439441318e-05, "loss": 0.2392, "step": 9390 }, { "epoch": 0.1743631253179268, "grad_norm": 0.3894954323768616, "learning_rate": 1.8536806947930036e-05, "loss": 0.3614, "step": 9392 }, { "epoch": 0.17440025545534543, "grad_norm": 0.3289856016635895, "learning_rate": 1.8536199385286524e-05, "loss": 0.2692, "step": 9394 }, { "epoch": 0.1744373855927641, "grad_norm": 0.4606802463531494, "learning_rate": 1.8535591706490907e-05, "loss": 0.4084, "step": 9396 }, { "epoch": 0.17447451573018272, "grad_norm": 0.5313446521759033, "learning_rate": 1.8534983911551455e-05, "loss": 0.4132, "step": 9398 }, { "epoch": 0.17451164586760134, "grad_norm": 0.3066141903400421, "learning_rate": 1.8534376000476437e-05, "loss": 0.3225, "step": 9400 }, { "epoch": 0.17454877600502, "grad_norm": 0.34293287992477417, "learning_rate": 1.8533767973274123e-05, "loss": 0.3153, "step": 9402 }, { "epoch": 0.17458590614243863, "grad_norm": 0.398378849029541, "learning_rate": 1.853315982995279e-05, "loss": 0.174, "step": 9404 }, { "epoch": 0.17462303627985729, "grad_norm": 0.3824893534183502, "learning_rate": 1.853255157052071e-05, "loss": 0.3311, "step": 9406 }, { "epoch": 0.1746601664172759, "grad_norm": 0.4293367862701416, "learning_rate": 1.853194319498616e-05, "loss": 0.2168, "step": 9408 }, { "epoch": 0.17469729655469454, "grad_norm": 0.4046095013618469, "learning_rate": 1.8531334703357423e-05, "loss": 0.435, "step": 9410 }, { "epoch": 0.1747344266921132, "grad_norm": 0.21468330919742584, "learning_rate": 1.8530726095642772e-05, "loss": 0.2694, "step": 9412 }, { "epoch": 0.17477155682953183, "grad_norm": 0.3662661910057068, "learning_rate": 1.8530117371850493e-05, "loss": 0.2405, "step": 9414 }, { "epoch": 0.17480868696695046, "grad_norm": 0.6266947984695435, "learning_rate": 1.8529508531988866e-05, "loss": 0.2137, "step": 9416 }, { "epoch": 0.1748458171043691, "grad_norm": 0.26765987277030945, "learning_rate": 1.8528899576066178e-05, "loss": 0.3102, "step": 9418 }, { "epoch": 0.17488294724178774, "grad_norm": 0.5364091396331787, "learning_rate": 1.8528290504090713e-05, "loss": 0.3093, "step": 9420 }, { "epoch": 0.17492007737920637, "grad_norm": 0.3082817494869232, "learning_rate": 1.8527681316070758e-05, "loss": 0.3934, "step": 9422 }, { "epoch": 0.17495720751662502, "grad_norm": 0.3861907720565796, "learning_rate": 1.8527072012014608e-05, "loss": 0.3434, "step": 9424 }, { "epoch": 0.17499433765404365, "grad_norm": 0.3344472050666809, "learning_rate": 1.8526462591930546e-05, "loss": 0.2315, "step": 9426 }, { "epoch": 0.1750314677914623, "grad_norm": 0.37833088636398315, "learning_rate": 1.8525853055826867e-05, "loss": 0.221, "step": 9428 }, { "epoch": 0.17506859792888094, "grad_norm": 0.2597125768661499, "learning_rate": 1.852524340371187e-05, "loss": 0.2226, "step": 9430 }, { "epoch": 0.17510572806629957, "grad_norm": 0.3074120581150055, "learning_rate": 1.8524633635593844e-05, "loss": 0.215, "step": 9432 }, { "epoch": 0.17514285820371822, "grad_norm": 0.26205897331237793, "learning_rate": 1.8524023751481085e-05, "loss": 0.4043, "step": 9434 }, { "epoch": 0.17517998834113685, "grad_norm": 0.40058374404907227, "learning_rate": 1.85234137513819e-05, "loss": 0.3174, "step": 9436 }, { "epoch": 0.17521711847855548, "grad_norm": 0.31757161021232605, "learning_rate": 1.8522803635304583e-05, "loss": 0.5511, "step": 9438 }, { "epoch": 0.17525424861597413, "grad_norm": 0.37255167961120605, "learning_rate": 1.8522193403257436e-05, "loss": 0.1876, "step": 9440 }, { "epoch": 0.17529137875339276, "grad_norm": 0.4025558531284332, "learning_rate": 1.8521583055248763e-05, "loss": 0.3859, "step": 9442 }, { "epoch": 0.17532850889081142, "grad_norm": 0.2851197123527527, "learning_rate": 1.852097259128687e-05, "loss": 0.3216, "step": 9444 }, { "epoch": 0.17536563902823005, "grad_norm": 0.3579910099506378, "learning_rate": 1.852036201138007e-05, "loss": 0.3721, "step": 9446 }, { "epoch": 0.17540276916564868, "grad_norm": 0.4212762713432312, "learning_rate": 1.8519751315536657e-05, "loss": 0.4328, "step": 9448 }, { "epoch": 0.17543989930306733, "grad_norm": 0.44826266169548035, "learning_rate": 1.851914050376495e-05, "loss": 0.3397, "step": 9450 }, { "epoch": 0.17547702944048596, "grad_norm": 0.5340867042541504, "learning_rate": 1.8518529576073262e-05, "loss": 0.2108, "step": 9452 }, { "epoch": 0.1755141595779046, "grad_norm": 0.38614746928215027, "learning_rate": 1.8517918532469895e-05, "loss": 0.3878, "step": 9454 }, { "epoch": 0.17555128971532324, "grad_norm": 0.4035106599330902, "learning_rate": 1.8517307372963178e-05, "loss": 0.3643, "step": 9456 }, { "epoch": 0.17558841985274187, "grad_norm": 0.29411834478378296, "learning_rate": 1.8516696097561415e-05, "loss": 0.2756, "step": 9458 }, { "epoch": 0.1756255499901605, "grad_norm": 0.31375500559806824, "learning_rate": 1.851608470627293e-05, "loss": 0.2443, "step": 9460 }, { "epoch": 0.17566268012757916, "grad_norm": 0.36916181445121765, "learning_rate": 1.8515473199106043e-05, "loss": 0.4535, "step": 9462 }, { "epoch": 0.17569981026499779, "grad_norm": 0.3658238351345062, "learning_rate": 1.851486157606907e-05, "loss": 0.25, "step": 9464 }, { "epoch": 0.17573694040241644, "grad_norm": 0.3895134925842285, "learning_rate": 1.851424983717034e-05, "loss": 0.2615, "step": 9466 }, { "epoch": 0.17577407053983507, "grad_norm": 0.3768514096736908, "learning_rate": 1.851363798241817e-05, "loss": 0.4519, "step": 9468 }, { "epoch": 0.1758112006772537, "grad_norm": 0.4980388283729553, "learning_rate": 1.851302601182089e-05, "loss": 0.3016, "step": 9470 }, { "epoch": 0.17584833081467235, "grad_norm": 0.43198224902153015, "learning_rate": 1.851241392538682e-05, "loss": 0.2536, "step": 9472 }, { "epoch": 0.17588546095209098, "grad_norm": 0.3454064726829529, "learning_rate": 1.85118017231243e-05, "loss": 0.244, "step": 9474 }, { "epoch": 0.1759225910895096, "grad_norm": 0.27406659722328186, "learning_rate": 1.8511189405041648e-05, "loss": 0.5167, "step": 9476 }, { "epoch": 0.17595972122692827, "grad_norm": 0.314929723739624, "learning_rate": 1.851057697114721e-05, "loss": 0.2578, "step": 9478 }, { "epoch": 0.1759968513643469, "grad_norm": 0.26748126745224, "learning_rate": 1.8509964421449305e-05, "loss": 0.3624, "step": 9480 }, { "epoch": 0.17603398150176555, "grad_norm": 0.427721232175827, "learning_rate": 1.850935175595628e-05, "loss": 0.2797, "step": 9482 }, { "epoch": 0.17607111163918418, "grad_norm": 0.2944505214691162, "learning_rate": 1.850873897467646e-05, "loss": 0.2098, "step": 9484 }, { "epoch": 0.1761082417766028, "grad_norm": 0.5448423027992249, "learning_rate": 1.8508126077618197e-05, "loss": 0.4087, "step": 9486 }, { "epoch": 0.17614537191402146, "grad_norm": 0.3299909830093384, "learning_rate": 1.850751306478982e-05, "loss": 0.3594, "step": 9488 }, { "epoch": 0.1761825020514401, "grad_norm": 0.46255943179130554, "learning_rate": 1.850689993619967e-05, "loss": 0.377, "step": 9490 }, { "epoch": 0.17621963218885872, "grad_norm": 0.38724610209465027, "learning_rate": 1.8506286691856092e-05, "loss": 0.3468, "step": 9492 }, { "epoch": 0.17625676232627738, "grad_norm": 0.3927869498729706, "learning_rate": 1.8505673331767434e-05, "loss": 0.45, "step": 9494 }, { "epoch": 0.176293892463696, "grad_norm": 0.4074196219444275, "learning_rate": 1.850505985594204e-05, "loss": 0.2707, "step": 9496 }, { "epoch": 0.17633102260111463, "grad_norm": 0.3120250701904297, "learning_rate": 1.8504446264388257e-05, "loss": 0.5151, "step": 9498 }, { "epoch": 0.1763681527385333, "grad_norm": 0.42595574259757996, "learning_rate": 1.850383255711443e-05, "loss": 0.5885, "step": 9500 }, { "epoch": 0.17640528287595192, "grad_norm": 0.5752061605453491, "learning_rate": 1.850321873412892e-05, "loss": 0.3706, "step": 9502 }, { "epoch": 0.17644241301337057, "grad_norm": 0.3946669101715088, "learning_rate": 1.8502604795440068e-05, "loss": 0.3707, "step": 9504 }, { "epoch": 0.1764795431507892, "grad_norm": 0.44369637966156006, "learning_rate": 1.8501990741056236e-05, "loss": 0.2307, "step": 9506 }, { "epoch": 0.17651667328820783, "grad_norm": 0.40915265679359436, "learning_rate": 1.8501376570985777e-05, "loss": 0.2256, "step": 9508 }, { "epoch": 0.1765538034256265, "grad_norm": 0.43708500266075134, "learning_rate": 1.8500762285237048e-05, "loss": 0.2529, "step": 9510 }, { "epoch": 0.17659093356304512, "grad_norm": 0.3137120008468628, "learning_rate": 1.8500147883818404e-05, "loss": 0.3213, "step": 9512 }, { "epoch": 0.17662806370046374, "grad_norm": 0.2800552248954773, "learning_rate": 1.849953336673821e-05, "loss": 0.2088, "step": 9514 }, { "epoch": 0.1766651938378824, "grad_norm": 0.20511212944984436, "learning_rate": 1.8498918734004826e-05, "loss": 0.2332, "step": 9516 }, { "epoch": 0.17670232397530103, "grad_norm": 0.328767329454422, "learning_rate": 1.8498303985626613e-05, "loss": 0.2591, "step": 9518 }, { "epoch": 0.17673945411271968, "grad_norm": 0.4095207750797272, "learning_rate": 1.849768912161194e-05, "loss": 0.4157, "step": 9520 }, { "epoch": 0.1767765842501383, "grad_norm": 0.2723335325717926, "learning_rate": 1.849707414196917e-05, "loss": 0.4392, "step": 9522 }, { "epoch": 0.17681371438755694, "grad_norm": 0.5100218653678894, "learning_rate": 1.8496459046706677e-05, "loss": 0.3481, "step": 9524 }, { "epoch": 0.1768508445249756, "grad_norm": 0.23778165876865387, "learning_rate": 1.849584383583282e-05, "loss": 0.2599, "step": 9526 }, { "epoch": 0.17688797466239423, "grad_norm": 0.2638942003250122, "learning_rate": 1.849522850935598e-05, "loss": 0.1997, "step": 9528 }, { "epoch": 0.17692510479981285, "grad_norm": 0.2838019132614136, "learning_rate": 1.849461306728453e-05, "loss": 0.4762, "step": 9530 }, { "epoch": 0.1769622349372315, "grad_norm": 0.21618467569351196, "learning_rate": 1.8493997509626834e-05, "loss": 0.2855, "step": 9532 }, { "epoch": 0.17699936507465014, "grad_norm": 0.32158729434013367, "learning_rate": 1.8493381836391275e-05, "loss": 0.3031, "step": 9534 }, { "epoch": 0.17703649521206877, "grad_norm": 0.3213442265987396, "learning_rate": 1.849276604758623e-05, "loss": 0.1866, "step": 9536 }, { "epoch": 0.17707362534948742, "grad_norm": 0.5821731090545654, "learning_rate": 1.8492150143220084e-05, "loss": 0.4691, "step": 9538 }, { "epoch": 0.17711075548690605, "grad_norm": 0.3224603533744812, "learning_rate": 1.84915341233012e-05, "loss": 0.3409, "step": 9540 }, { "epoch": 0.1771478856243247, "grad_norm": 0.32279685139656067, "learning_rate": 1.849091798783798e-05, "loss": 0.3744, "step": 9542 }, { "epoch": 0.17718501576174334, "grad_norm": 0.5420823097229004, "learning_rate": 1.8490301736838797e-05, "loss": 0.2885, "step": 9544 }, { "epoch": 0.17722214589916196, "grad_norm": 0.3984422981739044, "learning_rate": 1.8489685370312043e-05, "loss": 0.3111, "step": 9546 }, { "epoch": 0.17725927603658062, "grad_norm": 0.38931065797805786, "learning_rate": 1.8489068888266096e-05, "loss": 0.1416, "step": 9548 }, { "epoch": 0.17729640617399925, "grad_norm": 0.3846571147441864, "learning_rate": 1.848845229070935e-05, "loss": 0.322, "step": 9550 }, { "epoch": 0.17733353631141788, "grad_norm": 0.3965075612068176, "learning_rate": 1.8487835577650194e-05, "loss": 0.3832, "step": 9552 }, { "epoch": 0.17737066644883653, "grad_norm": 0.38376057147979736, "learning_rate": 1.848721874909702e-05, "loss": 0.4807, "step": 9554 }, { "epoch": 0.17740779658625516, "grad_norm": 0.408200740814209, "learning_rate": 1.8486601805058222e-05, "loss": 0.4089, "step": 9556 }, { "epoch": 0.17744492672367382, "grad_norm": 0.38661542534828186, "learning_rate": 1.8485984745542193e-05, "loss": 0.3836, "step": 9558 }, { "epoch": 0.17748205686109245, "grad_norm": 0.3075103759765625, "learning_rate": 1.848536757055733e-05, "loss": 0.4346, "step": 9560 }, { "epoch": 0.17751918699851107, "grad_norm": 0.5638620257377625, "learning_rate": 1.848475028011203e-05, "loss": 0.4128, "step": 9562 }, { "epoch": 0.17755631713592973, "grad_norm": 0.4374730885028839, "learning_rate": 1.8484132874214698e-05, "loss": 0.4658, "step": 9564 }, { "epoch": 0.17759344727334836, "grad_norm": 0.2996465563774109, "learning_rate": 1.8483515352873724e-05, "loss": 0.3067, "step": 9566 }, { "epoch": 0.177630577410767, "grad_norm": 0.4137851297855377, "learning_rate": 1.848289771609752e-05, "loss": 0.4094, "step": 9568 }, { "epoch": 0.17766770754818564, "grad_norm": 0.6291019320487976, "learning_rate": 1.8482279963894488e-05, "loss": 0.2141, "step": 9570 }, { "epoch": 0.17770483768560427, "grad_norm": 0.5233544111251831, "learning_rate": 1.8481662096273035e-05, "loss": 0.3076, "step": 9572 }, { "epoch": 0.1777419678230229, "grad_norm": 0.35800623893737793, "learning_rate": 1.8481044113241564e-05, "loss": 0.4914, "step": 9574 }, { "epoch": 0.17777909796044156, "grad_norm": 0.2738491892814636, "learning_rate": 1.8480426014808485e-05, "loss": 0.3562, "step": 9576 }, { "epoch": 0.17781622809786019, "grad_norm": 0.6201164126396179, "learning_rate": 1.847980780098221e-05, "loss": 0.2276, "step": 9578 }, { "epoch": 0.17785335823527884, "grad_norm": 0.24048741161823273, "learning_rate": 1.8479189471771152e-05, "loss": 0.1767, "step": 9580 }, { "epoch": 0.17789048837269747, "grad_norm": 0.41604742407798767, "learning_rate": 1.8478571027183724e-05, "loss": 0.1528, "step": 9582 }, { "epoch": 0.1779276185101161, "grad_norm": 0.36952152848243713, "learning_rate": 1.847795246722834e-05, "loss": 0.2571, "step": 9584 }, { "epoch": 0.17796474864753475, "grad_norm": 0.3412088453769684, "learning_rate": 1.8477333791913417e-05, "loss": 0.336, "step": 9586 }, { "epoch": 0.17800187878495338, "grad_norm": 0.4281190037727356, "learning_rate": 1.8476715001247374e-05, "loss": 0.5419, "step": 9588 }, { "epoch": 0.178039008922372, "grad_norm": 0.3360917270183563, "learning_rate": 1.847609609523863e-05, "loss": 0.3863, "step": 9590 }, { "epoch": 0.17807613905979067, "grad_norm": 0.3602268099784851, "learning_rate": 1.8475477073895608e-05, "loss": 0.2263, "step": 9592 }, { "epoch": 0.1781132691972093, "grad_norm": 0.3859279453754425, "learning_rate": 1.847485793722673e-05, "loss": 0.451, "step": 9594 }, { "epoch": 0.17815039933462795, "grad_norm": 0.3367448151111603, "learning_rate": 1.847423868524042e-05, "loss": 0.5692, "step": 9596 }, { "epoch": 0.17818752947204658, "grad_norm": 0.3100263476371765, "learning_rate": 1.8473619317945103e-05, "loss": 0.2304, "step": 9598 }, { "epoch": 0.1782246596094652, "grad_norm": 0.3648507595062256, "learning_rate": 1.8472999835349213e-05, "loss": 0.322, "step": 9600 }, { "epoch": 0.17826178974688386, "grad_norm": 0.8895721435546875, "learning_rate": 1.847238023746117e-05, "loss": 0.4908, "step": 9602 }, { "epoch": 0.1782989198843025, "grad_norm": 0.3271710276603699, "learning_rate": 1.8471760524289412e-05, "loss": 0.167, "step": 9604 }, { "epoch": 0.17833605002172112, "grad_norm": 0.26065754890441895, "learning_rate": 1.8471140695842367e-05, "loss": 0.2159, "step": 9606 }, { "epoch": 0.17837318015913978, "grad_norm": 0.4074278175830841, "learning_rate": 1.8470520752128472e-05, "loss": 0.3071, "step": 9608 }, { "epoch": 0.1784103102965584, "grad_norm": 0.4186624586582184, "learning_rate": 1.8469900693156166e-05, "loss": 0.355, "step": 9610 }, { "epoch": 0.17844744043397703, "grad_norm": 0.314352810382843, "learning_rate": 1.8469280518933876e-05, "loss": 0.2123, "step": 9612 }, { "epoch": 0.1784845705713957, "grad_norm": 0.3136752247810364, "learning_rate": 1.846866022947005e-05, "loss": 0.4403, "step": 9614 }, { "epoch": 0.17852170070881432, "grad_norm": 0.3026633858680725, "learning_rate": 1.8468039824773125e-05, "loss": 0.3442, "step": 9616 }, { "epoch": 0.17855883084623297, "grad_norm": 0.297133207321167, "learning_rate": 1.8467419304851543e-05, "loss": 0.4821, "step": 9618 }, { "epoch": 0.1785959609836516, "grad_norm": 0.3496973216533661, "learning_rate": 1.8466798669713744e-05, "loss": 0.337, "step": 9620 }, { "epoch": 0.17863309112107023, "grad_norm": 0.404927521944046, "learning_rate": 1.846617791936818e-05, "loss": 0.3329, "step": 9622 }, { "epoch": 0.1786702212584889, "grad_norm": 0.3777688145637512, "learning_rate": 1.846555705382329e-05, "loss": 0.2465, "step": 9624 }, { "epoch": 0.17870735139590752, "grad_norm": 0.5088377594947815, "learning_rate": 1.8464936073087527e-05, "loss": 0.3038, "step": 9626 }, { "epoch": 0.17874448153332614, "grad_norm": 0.39530888199806213, "learning_rate": 1.846431497716934e-05, "loss": 0.2303, "step": 9628 }, { "epoch": 0.1787816116707448, "grad_norm": 0.3287244141101837, "learning_rate": 1.846369376607718e-05, "loss": 0.2175, "step": 9630 }, { "epoch": 0.17881874180816343, "grad_norm": 0.3361397683620453, "learning_rate": 1.84630724398195e-05, "loss": 0.2259, "step": 9632 }, { "epoch": 0.17885587194558208, "grad_norm": 0.3792361319065094, "learning_rate": 1.846245099840475e-05, "loss": 0.3851, "step": 9634 }, { "epoch": 0.1788930020830007, "grad_norm": 0.3108764588832855, "learning_rate": 1.8461829441841394e-05, "loss": 0.4233, "step": 9636 }, { "epoch": 0.17893013222041934, "grad_norm": 0.3521835505962372, "learning_rate": 1.8461207770137885e-05, "loss": 0.337, "step": 9638 }, { "epoch": 0.178967262357838, "grad_norm": 0.36800017952919006, "learning_rate": 1.846058598330268e-05, "loss": 0.3447, "step": 9640 }, { "epoch": 0.17900439249525663, "grad_norm": 0.29860007762908936, "learning_rate": 1.8459964081344248e-05, "loss": 0.2888, "step": 9642 }, { "epoch": 0.17904152263267525, "grad_norm": 0.545779287815094, "learning_rate": 1.8459342064271037e-05, "loss": 0.4612, "step": 9644 }, { "epoch": 0.1790786527700939, "grad_norm": 0.37817618250846863, "learning_rate": 1.8458719932091523e-05, "loss": 0.3164, "step": 9646 }, { "epoch": 0.17911578290751254, "grad_norm": 0.23531462252140045, "learning_rate": 1.8458097684814168e-05, "loss": 0.2905, "step": 9648 }, { "epoch": 0.17915291304493117, "grad_norm": 0.31386569142341614, "learning_rate": 1.8457475322447437e-05, "loss": 0.1614, "step": 9650 }, { "epoch": 0.17919004318234982, "grad_norm": 0.2847346067428589, "learning_rate": 1.8456852844999805e-05, "loss": 0.4167, "step": 9652 }, { "epoch": 0.17922717331976845, "grad_norm": 0.38693252205848694, "learning_rate": 1.8456230252479732e-05, "loss": 0.1707, "step": 9654 }, { "epoch": 0.1792643034571871, "grad_norm": 0.35181039571762085, "learning_rate": 1.8455607544895694e-05, "loss": 0.4361, "step": 9656 }, { "epoch": 0.17930143359460574, "grad_norm": 0.30536600947380066, "learning_rate": 1.845498472225617e-05, "loss": 0.3256, "step": 9658 }, { "epoch": 0.17933856373202436, "grad_norm": 0.4238933324813843, "learning_rate": 1.8454361784569625e-05, "loss": 0.3954, "step": 9660 }, { "epoch": 0.17937569386944302, "grad_norm": 0.2870017886161804, "learning_rate": 1.845373873184454e-05, "loss": 0.1442, "step": 9662 }, { "epoch": 0.17941282400686165, "grad_norm": 0.3694000244140625, "learning_rate": 1.8453115564089396e-05, "loss": 0.5079, "step": 9664 }, { "epoch": 0.17944995414428028, "grad_norm": 0.36335745453834534, "learning_rate": 1.8452492281312667e-05, "loss": 0.2248, "step": 9666 }, { "epoch": 0.17948708428169893, "grad_norm": 0.33692434430122375, "learning_rate": 1.8451868883522833e-05, "loss": 0.2997, "step": 9668 }, { "epoch": 0.17952421441911756, "grad_norm": 0.2719217836856842, "learning_rate": 1.8451245370728384e-05, "loss": 0.3224, "step": 9670 }, { "epoch": 0.1795613445565362, "grad_norm": 0.2950933575630188, "learning_rate": 1.84506217429378e-05, "loss": 0.258, "step": 9672 }, { "epoch": 0.17959847469395485, "grad_norm": 0.8864665031433105, "learning_rate": 1.8449998000159567e-05, "loss": 0.3255, "step": 9674 }, { "epoch": 0.17963560483137347, "grad_norm": 0.5421977043151855, "learning_rate": 1.844937414240217e-05, "loss": 0.3919, "step": 9676 }, { "epoch": 0.17967273496879213, "grad_norm": 0.4338848888874054, "learning_rate": 1.8448750169674098e-05, "loss": 0.3483, "step": 9678 }, { "epoch": 0.17970986510621076, "grad_norm": 0.2878280282020569, "learning_rate": 1.844812608198385e-05, "loss": 0.3473, "step": 9680 }, { "epoch": 0.1797469952436294, "grad_norm": 0.37720227241516113, "learning_rate": 1.8447501879339904e-05, "loss": 0.2399, "step": 9682 }, { "epoch": 0.17978412538104804, "grad_norm": 0.3899800181388855, "learning_rate": 1.844687756175076e-05, "loss": 0.2252, "step": 9684 }, { "epoch": 0.17982125551846667, "grad_norm": 0.36731138825416565, "learning_rate": 1.8446253129224916e-05, "loss": 0.3274, "step": 9686 }, { "epoch": 0.1798583856558853, "grad_norm": 0.3342880308628082, "learning_rate": 1.844562858177087e-05, "loss": 0.3391, "step": 9688 }, { "epoch": 0.17989551579330396, "grad_norm": 0.39947667717933655, "learning_rate": 1.8445003919397115e-05, "loss": 0.2343, "step": 9690 }, { "epoch": 0.17993264593072258, "grad_norm": 0.41179266571998596, "learning_rate": 1.844437914211215e-05, "loss": 0.3604, "step": 9692 }, { "epoch": 0.17996977606814124, "grad_norm": 0.42948979139328003, "learning_rate": 1.844375424992448e-05, "loss": 0.4247, "step": 9694 }, { "epoch": 0.18000690620555987, "grad_norm": 0.4198618531227112, "learning_rate": 1.8443129242842607e-05, "loss": 0.43, "step": 9696 }, { "epoch": 0.1800440363429785, "grad_norm": 0.4670703411102295, "learning_rate": 1.8442504120875036e-05, "loss": 0.4865, "step": 9698 }, { "epoch": 0.18008116648039715, "grad_norm": 0.38360267877578735, "learning_rate": 1.844187888403027e-05, "loss": 0.3198, "step": 9700 }, { "epoch": 0.18011829661781578, "grad_norm": 0.329893559217453, "learning_rate": 1.844125353231682e-05, "loss": 0.2644, "step": 9702 }, { "epoch": 0.1801554267552344, "grad_norm": 0.4611571431159973, "learning_rate": 1.844062806574319e-05, "loss": 0.3441, "step": 9704 }, { "epoch": 0.18019255689265307, "grad_norm": 0.29672861099243164, "learning_rate": 1.84400024843179e-05, "loss": 0.2753, "step": 9706 }, { "epoch": 0.1802296870300717, "grad_norm": 0.28903207182884216, "learning_rate": 1.8439376788049455e-05, "loss": 0.3827, "step": 9708 }, { "epoch": 0.18026681716749032, "grad_norm": 0.3726979196071625, "learning_rate": 1.8438750976946372e-05, "loss": 0.3596, "step": 9710 }, { "epoch": 0.18030394730490898, "grad_norm": 0.6121364235877991, "learning_rate": 1.843812505101716e-05, "loss": 0.532, "step": 9712 }, { "epoch": 0.1803410774423276, "grad_norm": 0.43849310278892517, "learning_rate": 1.8437499010270347e-05, "loss": 0.3487, "step": 9714 }, { "epoch": 0.18037820757974626, "grad_norm": 0.35089248418807983, "learning_rate": 1.8436872854714445e-05, "loss": 0.3169, "step": 9716 }, { "epoch": 0.1804153377171649, "grad_norm": 0.288174569606781, "learning_rate": 1.843624658435797e-05, "loss": 0.0789, "step": 9718 }, { "epoch": 0.18045246785458352, "grad_norm": 0.22463294863700867, "learning_rate": 1.8435620199209455e-05, "loss": 0.2058, "step": 9720 }, { "epoch": 0.18048959799200218, "grad_norm": 0.3337746858596802, "learning_rate": 1.8434993699277414e-05, "loss": 0.2351, "step": 9722 }, { "epoch": 0.1805267281294208, "grad_norm": 0.3516903817653656, "learning_rate": 1.8434367084570372e-05, "loss": 0.4076, "step": 9724 }, { "epoch": 0.18056385826683943, "grad_norm": 0.29438966512680054, "learning_rate": 1.843374035509686e-05, "loss": 0.1668, "step": 9726 }, { "epoch": 0.1806009884042581, "grad_norm": 0.31906697154045105, "learning_rate": 1.8433113510865405e-05, "loss": 0.3355, "step": 9728 }, { "epoch": 0.18063811854167672, "grad_norm": 0.2787906229496002, "learning_rate": 1.8432486551884535e-05, "loss": 0.218, "step": 9730 }, { "epoch": 0.18067524867909537, "grad_norm": 0.2658388912677765, "learning_rate": 1.843185947816278e-05, "loss": 0.2716, "step": 9732 }, { "epoch": 0.180712378816514, "grad_norm": 0.398564875125885, "learning_rate": 1.843123228970867e-05, "loss": 0.2116, "step": 9734 }, { "epoch": 0.18074950895393263, "grad_norm": 0.2886311113834381, "learning_rate": 1.843060498653075e-05, "loss": 0.2342, "step": 9736 }, { "epoch": 0.1807866390913513, "grad_norm": 0.2524523138999939, "learning_rate": 1.8429977568637546e-05, "loss": 0.3968, "step": 9738 }, { "epoch": 0.18082376922876991, "grad_norm": 0.27005019783973694, "learning_rate": 1.84293500360376e-05, "loss": 0.4382, "step": 9740 }, { "epoch": 0.18086089936618854, "grad_norm": 0.37713804841041565, "learning_rate": 1.8428722388739444e-05, "loss": 0.3396, "step": 9742 }, { "epoch": 0.1808980295036072, "grad_norm": 0.35463401675224304, "learning_rate": 1.842809462675163e-05, "loss": 0.4273, "step": 9744 }, { "epoch": 0.18093515964102583, "grad_norm": 0.28021538257598877, "learning_rate": 1.8427466750082684e-05, "loss": 0.3907, "step": 9746 }, { "epoch": 0.18097228977844446, "grad_norm": 0.27935490012168884, "learning_rate": 1.8426838758741165e-05, "loss": 0.3083, "step": 9748 }, { "epoch": 0.1810094199158631, "grad_norm": 0.3651592433452606, "learning_rate": 1.842621065273561e-05, "loss": 0.226, "step": 9750 }, { "epoch": 0.18104655005328174, "grad_norm": 0.3888011574745178, "learning_rate": 1.842558243207457e-05, "loss": 0.3569, "step": 9752 }, { "epoch": 0.1810836801907004, "grad_norm": 0.3161230981349945, "learning_rate": 1.8424954096766584e-05, "loss": 0.3683, "step": 9754 }, { "epoch": 0.18112081032811903, "grad_norm": 0.40960893034935, "learning_rate": 1.8424325646820214e-05, "loss": 0.2456, "step": 9756 }, { "epoch": 0.18115794046553765, "grad_norm": 0.378567099571228, "learning_rate": 1.842369708224401e-05, "loss": 0.3213, "step": 9758 }, { "epoch": 0.1811950706029563, "grad_norm": 0.5387058258056641, "learning_rate": 1.8423068403046512e-05, "loss": 0.2883, "step": 9760 }, { "epoch": 0.18123220074037494, "grad_norm": 0.3814791738986969, "learning_rate": 1.8422439609236286e-05, "loss": 0.2495, "step": 9762 }, { "epoch": 0.18126933087779357, "grad_norm": 0.4276221692562103, "learning_rate": 1.842181070082189e-05, "loss": 0.2367, "step": 9764 }, { "epoch": 0.18130646101521222, "grad_norm": 0.523210883140564, "learning_rate": 1.842118167781187e-05, "loss": 0.2761, "step": 9766 }, { "epoch": 0.18134359115263085, "grad_norm": 0.5031771659851074, "learning_rate": 1.842055254021479e-05, "loss": 0.3539, "step": 9768 }, { "epoch": 0.1813807212900495, "grad_norm": 0.4519246220588684, "learning_rate": 1.8419923288039218e-05, "loss": 0.301, "step": 9770 }, { "epoch": 0.18141785142746814, "grad_norm": 0.42654770612716675, "learning_rate": 1.8419293921293707e-05, "loss": 0.3605, "step": 9772 }, { "epoch": 0.18145498156488676, "grad_norm": 0.4720914363861084, "learning_rate": 1.8418664439986828e-05, "loss": 0.2795, "step": 9774 }, { "epoch": 0.18149211170230542, "grad_norm": 0.33781060576438904, "learning_rate": 1.841803484412714e-05, "loss": 0.2397, "step": 9776 }, { "epoch": 0.18152924183972405, "grad_norm": 0.34768402576446533, "learning_rate": 1.8417405133723214e-05, "loss": 0.6441, "step": 9778 }, { "epoch": 0.18156637197714268, "grad_norm": 0.3594615161418915, "learning_rate": 1.8416775308783616e-05, "loss": 0.3614, "step": 9780 }, { "epoch": 0.18160350211456133, "grad_norm": 0.35014021396636963, "learning_rate": 1.841614536931692e-05, "loss": 0.2191, "step": 9782 }, { "epoch": 0.18164063225197996, "grad_norm": 0.3346107602119446, "learning_rate": 1.8415515315331692e-05, "loss": 0.3816, "step": 9784 }, { "epoch": 0.1816777623893986, "grad_norm": 0.3111545741558075, "learning_rate": 1.841488514683651e-05, "loss": 0.2995, "step": 9786 }, { "epoch": 0.18171489252681725, "grad_norm": 0.2627500593662262, "learning_rate": 1.8414254863839946e-05, "loss": 0.3501, "step": 9788 }, { "epoch": 0.18175202266423587, "grad_norm": 0.35551393032073975, "learning_rate": 1.8413624466350576e-05, "loss": 0.3958, "step": 9790 }, { "epoch": 0.18178915280165453, "grad_norm": 0.3552328050136566, "learning_rate": 1.8412993954376978e-05, "loss": 0.3843, "step": 9792 }, { "epoch": 0.18182628293907316, "grad_norm": 0.38312020897865295, "learning_rate": 1.8412363327927734e-05, "loss": 0.3268, "step": 9794 }, { "epoch": 0.1818634130764918, "grad_norm": 0.22952710092067719, "learning_rate": 1.8411732587011423e-05, "loss": 0.3035, "step": 9796 }, { "epoch": 0.18190054321391044, "grad_norm": 0.35503703355789185, "learning_rate": 1.8411101731636628e-05, "loss": 0.2711, "step": 9798 }, { "epoch": 0.18193767335132907, "grad_norm": 0.33516064286231995, "learning_rate": 1.8410470761811933e-05, "loss": 0.4079, "step": 9800 }, { "epoch": 0.1819748034887477, "grad_norm": 0.3932841420173645, "learning_rate": 1.8409839677545918e-05, "loss": 0.3557, "step": 9802 }, { "epoch": 0.18201193362616636, "grad_norm": 0.3025108277797699, "learning_rate": 1.8409208478847178e-05, "loss": 0.3723, "step": 9804 }, { "epoch": 0.18204906376358498, "grad_norm": 0.34415170550346375, "learning_rate": 1.8408577165724302e-05, "loss": 0.5636, "step": 9806 }, { "epoch": 0.18208619390100364, "grad_norm": 0.2993118464946747, "learning_rate": 1.8407945738185876e-05, "loss": 0.1903, "step": 9808 }, { "epoch": 0.18212332403842227, "grad_norm": 0.3082931339740753, "learning_rate": 1.8407314196240492e-05, "loss": 0.4864, "step": 9810 }, { "epoch": 0.1821604541758409, "grad_norm": 0.4172440469264984, "learning_rate": 1.8406682539896746e-05, "loss": 0.3378, "step": 9812 }, { "epoch": 0.18219758431325955, "grad_norm": 0.2985750138759613, "learning_rate": 1.840605076916323e-05, "loss": 0.5089, "step": 9814 }, { "epoch": 0.18223471445067818, "grad_norm": 0.42213085293769836, "learning_rate": 1.8405418884048542e-05, "loss": 0.3455, "step": 9816 }, { "epoch": 0.1822718445880968, "grad_norm": 0.48405712842941284, "learning_rate": 1.8404786884561283e-05, "loss": 0.2788, "step": 9818 }, { "epoch": 0.18230897472551547, "grad_norm": 0.3284052908420563, "learning_rate": 1.8404154770710047e-05, "loss": 0.3127, "step": 9820 }, { "epoch": 0.1823461048629341, "grad_norm": 0.24915306270122528, "learning_rate": 1.840352254250344e-05, "loss": 0.2529, "step": 9822 }, { "epoch": 0.18238323500035272, "grad_norm": 0.3323463201522827, "learning_rate": 1.840289019995006e-05, "loss": 0.3785, "step": 9824 }, { "epoch": 0.18242036513777138, "grad_norm": 0.4031302034854889, "learning_rate": 1.8402257743058515e-05, "loss": 0.379, "step": 9826 }, { "epoch": 0.18245749527519, "grad_norm": 0.4071623980998993, "learning_rate": 1.8401625171837413e-05, "loss": 0.2915, "step": 9828 }, { "epoch": 0.18249462541260866, "grad_norm": 0.4241139590740204, "learning_rate": 1.8400992486295354e-05, "loss": 0.4581, "step": 9830 }, { "epoch": 0.1825317555500273, "grad_norm": 0.3751186430454254, "learning_rate": 1.840035968644095e-05, "loss": 0.4105, "step": 9832 }, { "epoch": 0.18256888568744592, "grad_norm": 0.32899799942970276, "learning_rate": 1.839972677228282e-05, "loss": 0.2431, "step": 9834 }, { "epoch": 0.18260601582486458, "grad_norm": 0.32996708154678345, "learning_rate": 1.839909374382956e-05, "loss": 0.346, "step": 9836 }, { "epoch": 0.1826431459622832, "grad_norm": 0.2801746428012848, "learning_rate": 1.83984606010898e-05, "loss": 0.2736, "step": 9838 }, { "epoch": 0.18268027609970183, "grad_norm": 0.3736366331577301, "learning_rate": 1.8397827344072145e-05, "loss": 0.3387, "step": 9840 }, { "epoch": 0.1827174062371205, "grad_norm": 0.35635530948638916, "learning_rate": 1.839719397278521e-05, "loss": 0.2634, "step": 9842 }, { "epoch": 0.18275453637453912, "grad_norm": 0.2773335874080658, "learning_rate": 1.8396560487237624e-05, "loss": 0.3752, "step": 9844 }, { "epoch": 0.18279166651195777, "grad_norm": 0.302762895822525, "learning_rate": 1.8395926887437993e-05, "loss": 0.5539, "step": 9846 }, { "epoch": 0.1828287966493764, "grad_norm": 0.5084289312362671, "learning_rate": 1.839529317339495e-05, "loss": 0.2642, "step": 9848 }, { "epoch": 0.18286592678679503, "grad_norm": 0.3113343417644501, "learning_rate": 1.8394659345117113e-05, "loss": 0.4062, "step": 9850 }, { "epoch": 0.18290305692421369, "grad_norm": 0.2898481786251068, "learning_rate": 1.839402540261311e-05, "loss": 0.4576, "step": 9852 }, { "epoch": 0.18294018706163231, "grad_norm": 0.41298696398735046, "learning_rate": 1.8393391345891563e-05, "loss": 0.5739, "step": 9854 }, { "epoch": 0.18297731719905094, "grad_norm": 0.42617517709732056, "learning_rate": 1.8392757174961096e-05, "loss": 0.2863, "step": 9856 }, { "epoch": 0.1830144473364696, "grad_norm": 0.45977580547332764, "learning_rate": 1.8392122889830347e-05, "loss": 0.398, "step": 9858 }, { "epoch": 0.18305157747388823, "grad_norm": 0.30564093589782715, "learning_rate": 1.8391488490507946e-05, "loss": 0.3398, "step": 9860 }, { "epoch": 0.18308870761130686, "grad_norm": 0.35091835260391235, "learning_rate": 1.8390853977002518e-05, "loss": 0.3235, "step": 9862 }, { "epoch": 0.1831258377487255, "grad_norm": 0.6922197341918945, "learning_rate": 1.8390219349322704e-05, "loss": 0.397, "step": 9864 }, { "epoch": 0.18316296788614414, "grad_norm": 0.37593111395835876, "learning_rate": 1.8389584607477133e-05, "loss": 0.4523, "step": 9866 }, { "epoch": 0.1832000980235628, "grad_norm": 0.3268374502658844, "learning_rate": 1.8388949751474444e-05, "loss": 0.2368, "step": 9868 }, { "epoch": 0.18323722816098142, "grad_norm": 0.6514426469802856, "learning_rate": 1.838831478132328e-05, "loss": 0.2353, "step": 9870 }, { "epoch": 0.18327435829840005, "grad_norm": 0.38410860300064087, "learning_rate": 1.8387679697032275e-05, "loss": 0.3626, "step": 9872 }, { "epoch": 0.1833114884358187, "grad_norm": 0.5829281806945801, "learning_rate": 1.8387044498610077e-05, "loss": 0.3657, "step": 9874 }, { "epoch": 0.18334861857323734, "grad_norm": 0.33428290486335754, "learning_rate": 1.838640918606532e-05, "loss": 0.286, "step": 9876 }, { "epoch": 0.18338574871065597, "grad_norm": 0.30670416355133057, "learning_rate": 1.838577375940666e-05, "loss": 0.5656, "step": 9878 }, { "epoch": 0.18342287884807462, "grad_norm": 0.518703281879425, "learning_rate": 1.8385138218642736e-05, "loss": 0.1887, "step": 9880 }, { "epoch": 0.18346000898549325, "grad_norm": 0.38538694381713867, "learning_rate": 1.8384502563782198e-05, "loss": 0.0978, "step": 9882 }, { "epoch": 0.1834971391229119, "grad_norm": 0.34368419647216797, "learning_rate": 1.838386679483369e-05, "loss": 0.279, "step": 9884 }, { "epoch": 0.18353426926033053, "grad_norm": 0.38312453031539917, "learning_rate": 1.838323091180587e-05, "loss": 0.3464, "step": 9886 }, { "epoch": 0.18357139939774916, "grad_norm": 0.2549024224281311, "learning_rate": 1.838259491470739e-05, "loss": 0.3489, "step": 9888 }, { "epoch": 0.18360852953516782, "grad_norm": 0.29596665501594543, "learning_rate": 1.83819588035469e-05, "loss": 0.3395, "step": 9890 }, { "epoch": 0.18364565967258645, "grad_norm": 0.35860276222229004, "learning_rate": 1.838132257833306e-05, "loss": 0.4298, "step": 9892 }, { "epoch": 0.18368278981000508, "grad_norm": 0.49010443687438965, "learning_rate": 1.8380686239074518e-05, "loss": 0.3149, "step": 9894 }, { "epoch": 0.18371991994742373, "grad_norm": 0.24262480437755585, "learning_rate": 1.8380049785779947e-05, "loss": 0.3188, "step": 9896 }, { "epoch": 0.18375705008484236, "grad_norm": 0.23179686069488525, "learning_rate": 1.8379413218457994e-05, "loss": 0.3206, "step": 9898 }, { "epoch": 0.183794180222261, "grad_norm": 0.3632320463657379, "learning_rate": 1.837877653711733e-05, "loss": 0.3353, "step": 9900 }, { "epoch": 0.18383131035967964, "grad_norm": 0.47408539056777954, "learning_rate": 1.8378139741766613e-05, "loss": 0.3434, "step": 9902 }, { "epoch": 0.18386844049709827, "grad_norm": 0.30511316657066345, "learning_rate": 1.837750283241451e-05, "loss": 0.3216, "step": 9904 }, { "epoch": 0.18390557063451693, "grad_norm": 0.4280533194541931, "learning_rate": 1.8376865809069687e-05, "loss": 0.4023, "step": 9906 }, { "epoch": 0.18394270077193556, "grad_norm": 0.2546878457069397, "learning_rate": 1.8376228671740812e-05, "loss": 0.4436, "step": 9908 }, { "epoch": 0.1839798309093542, "grad_norm": 0.34061458706855774, "learning_rate": 1.8375591420436556e-05, "loss": 0.2533, "step": 9910 }, { "epoch": 0.18401696104677284, "grad_norm": 0.42443713545799255, "learning_rate": 1.837495405516559e-05, "loss": 0.4974, "step": 9912 }, { "epoch": 0.18405409118419147, "grad_norm": 0.5118205547332764, "learning_rate": 1.8374316575936578e-05, "loss": 0.4737, "step": 9914 }, { "epoch": 0.1840912213216101, "grad_norm": 0.37152597308158875, "learning_rate": 1.8373678982758206e-05, "loss": 0.3769, "step": 9916 }, { "epoch": 0.18412835145902876, "grad_norm": 0.2654774487018585, "learning_rate": 1.8373041275639145e-05, "loss": 0.3671, "step": 9918 }, { "epoch": 0.18416548159644738, "grad_norm": 0.24857978522777557, "learning_rate": 1.8372403454588073e-05, "loss": 0.3383, "step": 9920 }, { "epoch": 0.18420261173386604, "grad_norm": 0.3159993588924408, "learning_rate": 1.8371765519613666e-05, "loss": 0.48, "step": 9922 }, { "epoch": 0.18423974187128467, "grad_norm": 0.3612730801105499, "learning_rate": 1.8371127470724606e-05, "loss": 0.3143, "step": 9924 }, { "epoch": 0.1842768720087033, "grad_norm": 0.5210133194923401, "learning_rate": 1.837048930792958e-05, "loss": 0.3155, "step": 9926 }, { "epoch": 0.18431400214612195, "grad_norm": 0.3854641020298004, "learning_rate": 1.8369851031237265e-05, "loss": 0.2234, "step": 9928 }, { "epoch": 0.18435113228354058, "grad_norm": 0.2614252269268036, "learning_rate": 1.8369212640656348e-05, "loss": 0.2337, "step": 9930 }, { "epoch": 0.1843882624209592, "grad_norm": 0.3065553903579712, "learning_rate": 1.8368574136195513e-05, "loss": 0.2027, "step": 9932 }, { "epoch": 0.18442539255837787, "grad_norm": 0.44947549700737, "learning_rate": 1.8367935517863455e-05, "loss": 0.3199, "step": 9934 }, { "epoch": 0.1844625226957965, "grad_norm": 0.8503955006599426, "learning_rate": 1.8367296785668858e-05, "loss": 0.5328, "step": 9936 }, { "epoch": 0.18449965283321512, "grad_norm": 0.41352495551109314, "learning_rate": 1.8366657939620415e-05, "loss": 0.4103, "step": 9938 }, { "epoch": 0.18453678297063378, "grad_norm": 0.31638580560684204, "learning_rate": 1.8366018979726817e-05, "loss": 0.3238, "step": 9940 }, { "epoch": 0.1845739131080524, "grad_norm": 0.5232476592063904, "learning_rate": 1.8365379905996762e-05, "loss": 0.3791, "step": 9942 }, { "epoch": 0.18461104324547106, "grad_norm": 0.418901264667511, "learning_rate": 1.836474071843894e-05, "loss": 0.4642, "step": 9944 }, { "epoch": 0.1846481733828897, "grad_norm": 0.29190734028816223, "learning_rate": 1.8364101417062054e-05, "loss": 0.4276, "step": 9946 }, { "epoch": 0.18468530352030832, "grad_norm": 0.3254503309726715, "learning_rate": 1.8363462001874803e-05, "loss": 0.257, "step": 9948 }, { "epoch": 0.18472243365772698, "grad_norm": 0.36586275696754456, "learning_rate": 1.8362822472885887e-05, "loss": 0.2655, "step": 9950 }, { "epoch": 0.1847595637951456, "grad_norm": 0.4781222343444824, "learning_rate": 1.8362182830104e-05, "loss": 0.4539, "step": 9952 }, { "epoch": 0.18479669393256423, "grad_norm": 0.396098256111145, "learning_rate": 1.8361543073537857e-05, "loss": 0.2428, "step": 9954 }, { "epoch": 0.1848338240699829, "grad_norm": 0.31001099944114685, "learning_rate": 1.8360903203196157e-05, "loss": 0.3677, "step": 9956 }, { "epoch": 0.18487095420740152, "grad_norm": 0.26083776354789734, "learning_rate": 1.836026321908761e-05, "loss": 0.3047, "step": 9958 }, { "epoch": 0.18490808434482017, "grad_norm": 0.35777002573013306, "learning_rate": 1.835962312122092e-05, "loss": 0.3121, "step": 9960 }, { "epoch": 0.1849452144822388, "grad_norm": 0.399875670671463, "learning_rate": 1.83589829096048e-05, "loss": 0.4987, "step": 9962 }, { "epoch": 0.18498234461965743, "grad_norm": 0.3539074957370758, "learning_rate": 1.835834258424796e-05, "loss": 0.2322, "step": 9964 }, { "epoch": 0.18501947475707609, "grad_norm": 0.45889416337013245, "learning_rate": 1.835770214515912e-05, "loss": 0.3029, "step": 9966 }, { "epoch": 0.18505660489449471, "grad_norm": 0.32760050892829895, "learning_rate": 1.835706159234698e-05, "loss": 0.3015, "step": 9968 }, { "epoch": 0.18509373503191334, "grad_norm": 0.33777618408203125, "learning_rate": 1.835642092582027e-05, "loss": 0.2423, "step": 9970 }, { "epoch": 0.185130865169332, "grad_norm": 0.4052596986293793, "learning_rate": 1.8355780145587698e-05, "loss": 0.2568, "step": 9972 }, { "epoch": 0.18516799530675063, "grad_norm": 0.379072904586792, "learning_rate": 1.8355139251657985e-05, "loss": 0.4143, "step": 9974 }, { "epoch": 0.18520512544416926, "grad_norm": 0.4473394453525543, "learning_rate": 1.8354498244039858e-05, "loss": 0.1469, "step": 9976 }, { "epoch": 0.1852422555815879, "grad_norm": 0.4131547212600708, "learning_rate": 1.835385712274203e-05, "loss": 0.3964, "step": 9978 }, { "epoch": 0.18527938571900654, "grad_norm": 0.4453961253166199, "learning_rate": 1.835321588777323e-05, "loss": 0.4611, "step": 9980 }, { "epoch": 0.1853165158564252, "grad_norm": 0.35863277316093445, "learning_rate": 1.8352574539142187e-05, "loss": 0.2339, "step": 9982 }, { "epoch": 0.18535364599384382, "grad_norm": 0.41370517015457153, "learning_rate": 1.8351933076857618e-05, "loss": 0.2234, "step": 9984 }, { "epoch": 0.18539077613126245, "grad_norm": 0.3429970443248749, "learning_rate": 1.835129150092826e-05, "loss": 0.2499, "step": 9986 }, { "epoch": 0.1854279062686811, "grad_norm": 0.2927241921424866, "learning_rate": 1.835064981136284e-05, "loss": 0.1489, "step": 9988 }, { "epoch": 0.18546503640609974, "grad_norm": 0.2844732403755188, "learning_rate": 1.8350008008170084e-05, "loss": 0.1451, "step": 9990 }, { "epoch": 0.18550216654351837, "grad_norm": 0.3491942286491394, "learning_rate": 1.8349366091358735e-05, "loss": 0.2921, "step": 9992 }, { "epoch": 0.18553929668093702, "grad_norm": 0.3390982747077942, "learning_rate": 1.8348724060937524e-05, "loss": 0.3201, "step": 9994 }, { "epoch": 0.18557642681835565, "grad_norm": 0.5280259847640991, "learning_rate": 1.834808191691518e-05, "loss": 0.2777, "step": 9996 }, { "epoch": 0.1856135569557743, "grad_norm": 0.3064058721065521, "learning_rate": 1.834743965930045e-05, "loss": 0.4065, "step": 9998 }, { "epoch": 0.18565068709319293, "grad_norm": 0.33439329266548157, "learning_rate": 1.8346797288102072e-05, "loss": 0.2729, "step": 10000 }, { "epoch": 0.18568781723061156, "grad_norm": 0.28253093361854553, "learning_rate": 1.8346154803328783e-05, "loss": 0.2426, "step": 10002 }, { "epoch": 0.18572494736803022, "grad_norm": 0.44935092329978943, "learning_rate": 1.8345512204989324e-05, "loss": 0.2209, "step": 10004 }, { "epoch": 0.18576207750544885, "grad_norm": 0.283279150724411, "learning_rate": 1.8344869493092444e-05, "loss": 0.3066, "step": 10006 }, { "epoch": 0.18579920764286748, "grad_norm": 0.38343068957328796, "learning_rate": 1.8344226667646884e-05, "loss": 0.3254, "step": 10008 }, { "epoch": 0.18583633778028613, "grad_norm": 0.4531508982181549, "learning_rate": 1.83435837286614e-05, "loss": 0.3926, "step": 10010 }, { "epoch": 0.18587346791770476, "grad_norm": 0.3899098038673401, "learning_rate": 1.8342940676144724e-05, "loss": 0.2624, "step": 10012 }, { "epoch": 0.1859105980551234, "grad_norm": 0.385444313287735, "learning_rate": 1.834229751010562e-05, "loss": 0.243, "step": 10014 }, { "epoch": 0.18594772819254204, "grad_norm": 0.3875812292098999, "learning_rate": 1.8341654230552836e-05, "loss": 0.2728, "step": 10016 }, { "epoch": 0.18598485832996067, "grad_norm": 0.45712265372276306, "learning_rate": 1.834101083749512e-05, "loss": 0.5489, "step": 10018 }, { "epoch": 0.18602198846737933, "grad_norm": 0.3582898676395416, "learning_rate": 1.8340367330941232e-05, "loss": 0.4963, "step": 10020 }, { "epoch": 0.18605911860479796, "grad_norm": 0.3622925877571106, "learning_rate": 1.833972371089993e-05, "loss": 0.3348, "step": 10022 }, { "epoch": 0.18609624874221659, "grad_norm": 0.331865131855011, "learning_rate": 1.8339079977379965e-05, "loss": 0.3835, "step": 10024 }, { "epoch": 0.18613337887963524, "grad_norm": 0.40517669916152954, "learning_rate": 1.8338436130390103e-05, "loss": 0.4138, "step": 10026 }, { "epoch": 0.18617050901705387, "grad_norm": 0.3718627393245697, "learning_rate": 1.83377921699391e-05, "loss": 0.3598, "step": 10028 }, { "epoch": 0.1862076391544725, "grad_norm": 0.45984190702438354, "learning_rate": 1.833714809603572e-05, "loss": 0.2331, "step": 10030 }, { "epoch": 0.18624476929189115, "grad_norm": 0.6453419923782349, "learning_rate": 1.8336503908688727e-05, "loss": 0.2367, "step": 10032 }, { "epoch": 0.18628189942930978, "grad_norm": 0.378261536359787, "learning_rate": 1.8335859607906886e-05, "loss": 0.2027, "step": 10034 }, { "epoch": 0.18631902956672844, "grad_norm": 0.3476535677909851, "learning_rate": 1.8335215193698967e-05, "loss": 0.2969, "step": 10036 }, { "epoch": 0.18635615970414707, "grad_norm": 0.607456386089325, "learning_rate": 1.8334570666073733e-05, "loss": 0.412, "step": 10038 }, { "epoch": 0.1863932898415657, "grad_norm": 0.5959854125976562, "learning_rate": 1.833392602503996e-05, "loss": 0.4672, "step": 10040 }, { "epoch": 0.18643041997898435, "grad_norm": 0.3668532073497772, "learning_rate": 1.8333281270606414e-05, "loss": 0.2478, "step": 10042 }, { "epoch": 0.18646755011640298, "grad_norm": 0.44977787137031555, "learning_rate": 1.8332636402781876e-05, "loss": 0.3463, "step": 10044 }, { "epoch": 0.1865046802538216, "grad_norm": 0.42468881607055664, "learning_rate": 1.833199142157511e-05, "loss": 0.4086, "step": 10046 }, { "epoch": 0.18654181039124026, "grad_norm": 0.36195921897888184, "learning_rate": 1.8331346326994902e-05, "loss": 0.2674, "step": 10048 }, { "epoch": 0.1865789405286589, "grad_norm": 0.3577727973461151, "learning_rate": 1.8330701119050023e-05, "loss": 0.2756, "step": 10050 }, { "epoch": 0.18661607066607752, "grad_norm": 0.37022027373313904, "learning_rate": 1.8330055797749256e-05, "loss": 0.2044, "step": 10052 }, { "epoch": 0.18665320080349618, "grad_norm": 0.3330825865268707, "learning_rate": 1.832941036310138e-05, "loss": 0.4059, "step": 10054 }, { "epoch": 0.1866903309409148, "grad_norm": 0.6666103601455688, "learning_rate": 1.8328764815115182e-05, "loss": 0.2432, "step": 10056 }, { "epoch": 0.18672746107833346, "grad_norm": 0.3342370390892029, "learning_rate": 1.832811915379944e-05, "loss": 0.3715, "step": 10058 }, { "epoch": 0.1867645912157521, "grad_norm": 0.34751439094543457, "learning_rate": 1.832747337916294e-05, "loss": 0.3832, "step": 10060 }, { "epoch": 0.18680172135317072, "grad_norm": 0.30360522866249084, "learning_rate": 1.8326827491214473e-05, "loss": 0.4883, "step": 10062 }, { "epoch": 0.18683885149058937, "grad_norm": 0.40708357095718384, "learning_rate": 1.8326181489962826e-05, "loss": 0.4136, "step": 10064 }, { "epoch": 0.186875981628008, "grad_norm": 0.418451726436615, "learning_rate": 1.8325535375416788e-05, "loss": 0.1855, "step": 10066 }, { "epoch": 0.18691311176542663, "grad_norm": 0.3928448259830475, "learning_rate": 1.8324889147585152e-05, "loss": 0.2512, "step": 10068 }, { "epoch": 0.1869502419028453, "grad_norm": 0.24479812383651733, "learning_rate": 1.832424280647671e-05, "loss": 0.5165, "step": 10070 }, { "epoch": 0.18698737204026392, "grad_norm": 0.3074891269207001, "learning_rate": 1.8323596352100257e-05, "loss": 0.2978, "step": 10072 }, { "epoch": 0.18702450217768257, "grad_norm": 0.450447678565979, "learning_rate": 1.8322949784464593e-05, "loss": 0.3733, "step": 10074 }, { "epoch": 0.1870616323151012, "grad_norm": 0.5366356372833252, "learning_rate": 1.8322303103578506e-05, "loss": 0.2737, "step": 10076 }, { "epoch": 0.18709876245251983, "grad_norm": 0.5472540855407715, "learning_rate": 1.8321656309450806e-05, "loss": 0.2986, "step": 10078 }, { "epoch": 0.18713589258993849, "grad_norm": 0.3829568326473236, "learning_rate": 1.832100940209029e-05, "loss": 0.4872, "step": 10080 }, { "epoch": 0.1871730227273571, "grad_norm": 0.49750491976737976, "learning_rate": 1.832036238150576e-05, "loss": 0.2229, "step": 10082 }, { "epoch": 0.18721015286477574, "grad_norm": 0.5529249906539917, "learning_rate": 1.831971524770602e-05, "loss": 0.2392, "step": 10084 }, { "epoch": 0.1872472830021944, "grad_norm": 0.33275121450424194, "learning_rate": 1.8319068000699878e-05, "loss": 0.2293, "step": 10086 }, { "epoch": 0.18728441313961303, "grad_norm": 0.36916640400886536, "learning_rate": 1.831842064049614e-05, "loss": 0.3386, "step": 10088 }, { "epoch": 0.18732154327703165, "grad_norm": 0.5965574979782104, "learning_rate": 1.8317773167103607e-05, "loss": 0.2376, "step": 10090 }, { "epoch": 0.1873586734144503, "grad_norm": 0.4161369502544403, "learning_rate": 1.83171255805311e-05, "loss": 0.3249, "step": 10092 }, { "epoch": 0.18739580355186894, "grad_norm": 0.34742462635040283, "learning_rate": 1.8316477880787427e-05, "loss": 0.443, "step": 10094 }, { "epoch": 0.1874329336892876, "grad_norm": 0.3939172625541687, "learning_rate": 1.83158300678814e-05, "loss": 0.2988, "step": 10096 }, { "epoch": 0.18747006382670622, "grad_norm": 0.4368314743041992, "learning_rate": 1.8315182141821834e-05, "loss": 0.475, "step": 10098 }, { "epoch": 0.18750719396412485, "grad_norm": 0.3249127268791199, "learning_rate": 1.831453410261755e-05, "loss": 0.5569, "step": 10100 }, { "epoch": 0.1875443241015435, "grad_norm": 0.3012632727622986, "learning_rate": 1.8313885950277356e-05, "loss": 0.278, "step": 10102 }, { "epoch": 0.18758145423896214, "grad_norm": 0.36531540751457214, "learning_rate": 1.8313237684810075e-05, "loss": 0.448, "step": 10104 }, { "epoch": 0.18761858437638076, "grad_norm": 0.41080960631370544, "learning_rate": 1.8312589306224536e-05, "loss": 0.3589, "step": 10106 }, { "epoch": 0.18765571451379942, "grad_norm": 0.37441620230674744, "learning_rate": 1.8311940814529555e-05, "loss": 0.3877, "step": 10108 }, { "epoch": 0.18769284465121805, "grad_norm": 0.3083757758140564, "learning_rate": 1.8311292209733957e-05, "loss": 0.4681, "step": 10110 }, { "epoch": 0.1877299747886367, "grad_norm": 0.3810519874095917, "learning_rate": 1.8310643491846565e-05, "loss": 0.3453, "step": 10112 }, { "epoch": 0.18776710492605533, "grad_norm": 0.3464382290840149, "learning_rate": 1.8309994660876213e-05, "loss": 0.3911, "step": 10114 }, { "epoch": 0.18780423506347396, "grad_norm": 0.40538454055786133, "learning_rate": 1.830934571683172e-05, "loss": 0.2661, "step": 10116 }, { "epoch": 0.18784136520089262, "grad_norm": 0.3924916386604309, "learning_rate": 1.830869665972192e-05, "loss": 0.2532, "step": 10118 }, { "epoch": 0.18787849533831125, "grad_norm": 0.24053512513637543, "learning_rate": 1.8308047489555648e-05, "loss": 0.3108, "step": 10120 }, { "epoch": 0.18791562547572987, "grad_norm": 0.32990771532058716, "learning_rate": 1.8307398206341737e-05, "loss": 0.3901, "step": 10122 }, { "epoch": 0.18795275561314853, "grad_norm": 0.6292337775230408, "learning_rate": 1.8306748810089023e-05, "loss": 0.2062, "step": 10124 }, { "epoch": 0.18798988575056716, "grad_norm": 0.37493157386779785, "learning_rate": 1.830609930080633e-05, "loss": 0.3672, "step": 10126 }, { "epoch": 0.1880270158879858, "grad_norm": 0.3033231496810913, "learning_rate": 1.8305449678502512e-05, "loss": 0.2681, "step": 10128 }, { "epoch": 0.18806414602540444, "grad_norm": 0.28835123777389526, "learning_rate": 1.83047999431864e-05, "loss": 0.2672, "step": 10130 }, { "epoch": 0.18810127616282307, "grad_norm": 0.3232995569705963, "learning_rate": 1.8304150094866836e-05, "loss": 0.3445, "step": 10132 }, { "epoch": 0.18813840630024173, "grad_norm": 0.320734441280365, "learning_rate": 1.8303500133552662e-05, "loss": 0.1707, "step": 10134 }, { "epoch": 0.18817553643766036, "grad_norm": 0.343945175409317, "learning_rate": 1.8302850059252723e-05, "loss": 0.1589, "step": 10136 }, { "epoch": 0.18821266657507899, "grad_norm": 0.45969587564468384, "learning_rate": 1.8302199871975865e-05, "loss": 0.1531, "step": 10138 }, { "epoch": 0.18824979671249764, "grad_norm": 0.30570536851882935, "learning_rate": 1.8301549571730937e-05, "loss": 0.4095, "step": 10140 }, { "epoch": 0.18828692684991627, "grad_norm": 0.28152766823768616, "learning_rate": 1.8300899158526783e-05, "loss": 0.5259, "step": 10142 }, { "epoch": 0.1883240569873349, "grad_norm": 0.3860793113708496, "learning_rate": 1.8300248632372257e-05, "loss": 0.2524, "step": 10144 }, { "epoch": 0.18836118712475355, "grad_norm": 0.25214534997940063, "learning_rate": 1.829959799327621e-05, "loss": 0.3168, "step": 10146 }, { "epoch": 0.18839831726217218, "grad_norm": 0.3945486843585968, "learning_rate": 1.829894724124749e-05, "loss": 0.4294, "step": 10148 }, { "epoch": 0.18843544739959084, "grad_norm": 0.4058411419391632, "learning_rate": 1.8298296376294962e-05, "loss": 0.4185, "step": 10150 }, { "epoch": 0.18847257753700947, "grad_norm": 0.29185596108436584, "learning_rate": 1.8297645398427475e-05, "loss": 0.3392, "step": 10152 }, { "epoch": 0.1885097076744281, "grad_norm": 0.3090771734714508, "learning_rate": 1.8296994307653888e-05, "loss": 0.1249, "step": 10154 }, { "epoch": 0.18854683781184675, "grad_norm": 0.40176692605018616, "learning_rate": 1.829634310398306e-05, "loss": 0.2969, "step": 10156 }, { "epoch": 0.18858396794926538, "grad_norm": 0.35776567459106445, "learning_rate": 1.829569178742385e-05, "loss": 0.3368, "step": 10158 }, { "epoch": 0.188621098086684, "grad_norm": 0.3186683654785156, "learning_rate": 1.829504035798513e-05, "loss": 0.221, "step": 10160 }, { "epoch": 0.18865822822410266, "grad_norm": 0.4903828799724579, "learning_rate": 1.8294388815675753e-05, "loss": 0.1931, "step": 10162 }, { "epoch": 0.1886953583615213, "grad_norm": 0.35308709740638733, "learning_rate": 1.8293737160504587e-05, "loss": 0.1733, "step": 10164 }, { "epoch": 0.18873248849893992, "grad_norm": 0.34929269552230835, "learning_rate": 1.8293085392480504e-05, "loss": 0.3794, "step": 10166 }, { "epoch": 0.18876961863635858, "grad_norm": 0.34631383419036865, "learning_rate": 1.829243351161237e-05, "loss": 0.308, "step": 10168 }, { "epoch": 0.1888067487737772, "grad_norm": 0.3294782042503357, "learning_rate": 1.829178151790905e-05, "loss": 0.2645, "step": 10170 }, { "epoch": 0.18884387891119586, "grad_norm": 0.5012518167495728, "learning_rate": 1.8291129411379428e-05, "loss": 0.325, "step": 10172 }, { "epoch": 0.1888810090486145, "grad_norm": 0.3634997606277466, "learning_rate": 1.829047719203236e-05, "loss": 0.5125, "step": 10174 }, { "epoch": 0.18891813918603312, "grad_norm": 0.3600502610206604, "learning_rate": 1.8289824859876736e-05, "loss": 0.314, "step": 10176 }, { "epoch": 0.18895526932345177, "grad_norm": 0.36589720845222473, "learning_rate": 1.828917241492143e-05, "loss": 0.2664, "step": 10178 }, { "epoch": 0.1889923994608704, "grad_norm": 0.2958115041255951, "learning_rate": 1.828851985717531e-05, "loss": 0.3417, "step": 10180 }, { "epoch": 0.18902952959828903, "grad_norm": 0.3177456855773926, "learning_rate": 1.8287867186647265e-05, "loss": 0.3357, "step": 10182 }, { "epoch": 0.1890666597357077, "grad_norm": 0.35406264662742615, "learning_rate": 1.828721440334617e-05, "loss": 0.2551, "step": 10184 }, { "epoch": 0.18910378987312632, "grad_norm": 0.3648502230644226, "learning_rate": 1.8286561507280912e-05, "loss": 0.4824, "step": 10186 }, { "epoch": 0.18914092001054497, "grad_norm": 0.3534731864929199, "learning_rate": 1.8285908498460372e-05, "loss": 0.2023, "step": 10188 }, { "epoch": 0.1891780501479636, "grad_norm": 0.2903887629508972, "learning_rate": 1.828525537689344e-05, "loss": 0.175, "step": 10190 }, { "epoch": 0.18921518028538223, "grad_norm": 0.4702399969100952, "learning_rate": 1.8284602142588998e-05, "loss": 0.1905, "step": 10192 }, { "epoch": 0.18925231042280088, "grad_norm": 0.2802911400794983, "learning_rate": 1.8283948795555934e-05, "loss": 0.2727, "step": 10194 }, { "epoch": 0.1892894405602195, "grad_norm": 0.34536829590797424, "learning_rate": 1.828329533580314e-05, "loss": 0.5455, "step": 10196 }, { "epoch": 0.18932657069763814, "grad_norm": 0.24384969472885132, "learning_rate": 1.828264176333951e-05, "loss": 0.3243, "step": 10198 }, { "epoch": 0.1893637008350568, "grad_norm": 0.29854816198349, "learning_rate": 1.8281988078173934e-05, "loss": 0.4068, "step": 10200 }, { "epoch": 0.18940083097247543, "grad_norm": 0.28660720586776733, "learning_rate": 1.8281334280315305e-05, "loss": 0.104, "step": 10202 }, { "epoch": 0.18943796110989405, "grad_norm": 0.37110835313796997, "learning_rate": 1.8280680369772526e-05, "loss": 0.3304, "step": 10204 }, { "epoch": 0.1894750912473127, "grad_norm": 0.3357993960380554, "learning_rate": 1.828002634655449e-05, "loss": 0.0829, "step": 10206 }, { "epoch": 0.18951222138473134, "grad_norm": 0.3750956356525421, "learning_rate": 1.8279372210670098e-05, "loss": 0.162, "step": 10208 }, { "epoch": 0.18954935152215, "grad_norm": 0.3358430862426758, "learning_rate": 1.8278717962128246e-05, "loss": 0.2304, "step": 10210 }, { "epoch": 0.18958648165956862, "grad_norm": 0.3922125995159149, "learning_rate": 1.827806360093784e-05, "loss": 0.4568, "step": 10212 }, { "epoch": 0.18962361179698725, "grad_norm": 0.3021722733974457, "learning_rate": 1.8277409127107787e-05, "loss": 0.2651, "step": 10214 }, { "epoch": 0.1896607419344059, "grad_norm": 0.3054031431674957, "learning_rate": 1.827675454064699e-05, "loss": 0.3145, "step": 10216 }, { "epoch": 0.18969787207182454, "grad_norm": 0.36788761615753174, "learning_rate": 1.8276099841564353e-05, "loss": 0.2488, "step": 10218 }, { "epoch": 0.18973500220924316, "grad_norm": 0.33292147517204285, "learning_rate": 1.8275445029868788e-05, "loss": 0.1316, "step": 10220 }, { "epoch": 0.18977213234666182, "grad_norm": 0.4965655207633972, "learning_rate": 1.8274790105569205e-05, "loss": 0.4831, "step": 10222 }, { "epoch": 0.18980926248408045, "grad_norm": 0.3368760049343109, "learning_rate": 1.827413506867451e-05, "loss": 0.2015, "step": 10224 }, { "epoch": 0.1898463926214991, "grad_norm": 0.37184277176856995, "learning_rate": 1.827347991919363e-05, "loss": 0.388, "step": 10226 }, { "epoch": 0.18988352275891773, "grad_norm": 0.2848564088344574, "learning_rate": 1.8272824657135463e-05, "loss": 0.2419, "step": 10228 }, { "epoch": 0.18992065289633636, "grad_norm": 0.4152754545211792, "learning_rate": 1.827216928250893e-05, "loss": 0.3988, "step": 10230 }, { "epoch": 0.18995778303375502, "grad_norm": 0.3655049204826355, "learning_rate": 1.8271513795322956e-05, "loss": 0.2914, "step": 10232 }, { "epoch": 0.18999491317117365, "grad_norm": 0.3804266154766083, "learning_rate": 1.8270858195586453e-05, "loss": 0.4918, "step": 10234 }, { "epoch": 0.19003204330859227, "grad_norm": 0.44650667905807495, "learning_rate": 1.827020248330835e-05, "loss": 0.5099, "step": 10236 }, { "epoch": 0.19006917344601093, "grad_norm": 0.5515437722206116, "learning_rate": 1.8269546658497556e-05, "loss": 0.2801, "step": 10238 }, { "epoch": 0.19010630358342956, "grad_norm": 0.4348154067993164, "learning_rate": 1.8268890721163007e-05, "loss": 0.3094, "step": 10240 }, { "epoch": 0.1901434337208482, "grad_norm": 0.4531700909137726, "learning_rate": 1.826823467131362e-05, "loss": 0.3214, "step": 10242 }, { "epoch": 0.19018056385826684, "grad_norm": 0.6516050100326538, "learning_rate": 1.8267578508958324e-05, "loss": 0.3586, "step": 10244 }, { "epoch": 0.19021769399568547, "grad_norm": 0.47056853771209717, "learning_rate": 1.8266922234106052e-05, "loss": 0.2819, "step": 10246 }, { "epoch": 0.19025482413310413, "grad_norm": 0.42877843976020813, "learning_rate": 1.826626584676573e-05, "loss": 0.3191, "step": 10248 }, { "epoch": 0.19029195427052276, "grad_norm": 0.35493627190589905, "learning_rate": 1.8265609346946292e-05, "loss": 0.2854, "step": 10250 }, { "epoch": 0.19032908440794138, "grad_norm": 0.38031259179115295, "learning_rate": 1.8264952734656667e-05, "loss": 0.3722, "step": 10252 }, { "epoch": 0.19036621454536004, "grad_norm": 0.37702032923698425, "learning_rate": 1.8264296009905792e-05, "loss": 0.3663, "step": 10254 }, { "epoch": 0.19040334468277867, "grad_norm": 0.31660595536231995, "learning_rate": 1.82636391727026e-05, "loss": 0.2031, "step": 10256 }, { "epoch": 0.1904404748201973, "grad_norm": 0.4001532196998596, "learning_rate": 1.8262982223056035e-05, "loss": 0.297, "step": 10258 }, { "epoch": 0.19047760495761595, "grad_norm": 0.2738375961780548, "learning_rate": 1.8262325160975032e-05, "loss": 0.2355, "step": 10260 }, { "epoch": 0.19051473509503458, "grad_norm": 0.3207013010978699, "learning_rate": 1.8261667986468534e-05, "loss": 0.3822, "step": 10262 }, { "epoch": 0.19055186523245324, "grad_norm": 0.3424219787120819, "learning_rate": 1.8261010699545477e-05, "loss": 0.4224, "step": 10264 }, { "epoch": 0.19058899536987187, "grad_norm": 0.5419808626174927, "learning_rate": 1.826035330021481e-05, "loss": 0.3046, "step": 10266 }, { "epoch": 0.1906261255072905, "grad_norm": 0.4402081370353699, "learning_rate": 1.8259695788485478e-05, "loss": 0.183, "step": 10268 }, { "epoch": 0.19066325564470915, "grad_norm": 0.42075181007385254, "learning_rate": 1.8259038164366428e-05, "loss": 0.3375, "step": 10270 }, { "epoch": 0.19070038578212778, "grad_norm": 0.3891755938529968, "learning_rate": 1.8258380427866608e-05, "loss": 0.3046, "step": 10272 }, { "epoch": 0.1907375159195464, "grad_norm": 0.29503315687179565, "learning_rate": 1.825772257899496e-05, "loss": 0.2603, "step": 10274 }, { "epoch": 0.19077464605696506, "grad_norm": 0.5955208539962769, "learning_rate": 1.825706461776045e-05, "loss": 0.2174, "step": 10276 }, { "epoch": 0.1908117761943837, "grad_norm": 0.5589277744293213, "learning_rate": 1.8256406544172024e-05, "loss": 0.2449, "step": 10278 }, { "epoch": 0.19084890633180232, "grad_norm": 0.35382336378097534, "learning_rate": 1.8255748358238633e-05, "loss": 0.3601, "step": 10280 }, { "epoch": 0.19088603646922098, "grad_norm": 0.4089867174625397, "learning_rate": 1.8255090059969235e-05, "loss": 0.324, "step": 10282 }, { "epoch": 0.1909231666066396, "grad_norm": 0.2896486520767212, "learning_rate": 1.825443164937279e-05, "loss": 0.2961, "step": 10284 }, { "epoch": 0.19096029674405826, "grad_norm": 0.5487931966781616, "learning_rate": 1.825377312645825e-05, "loss": 0.1843, "step": 10286 }, { "epoch": 0.1909974268814769, "grad_norm": 0.3776894807815552, "learning_rate": 1.8253114491234587e-05, "loss": 0.2858, "step": 10288 }, { "epoch": 0.19103455701889552, "grad_norm": 0.5502543449401855, "learning_rate": 1.8252455743710752e-05, "loss": 0.4484, "step": 10290 }, { "epoch": 0.19107168715631417, "grad_norm": 0.3481937348842621, "learning_rate": 1.8251796883895715e-05, "loss": 0.2355, "step": 10292 }, { "epoch": 0.1911088172937328, "grad_norm": 0.3464677929878235, "learning_rate": 1.825113791179844e-05, "loss": 0.4775, "step": 10294 }, { "epoch": 0.19114594743115143, "grad_norm": 0.3349195122718811, "learning_rate": 1.8250478827427893e-05, "loss": 0.3036, "step": 10296 }, { "epoch": 0.1911830775685701, "grad_norm": 0.3453654646873474, "learning_rate": 1.8249819630793044e-05, "loss": 0.2553, "step": 10298 }, { "epoch": 0.19122020770598872, "grad_norm": 0.40591198205947876, "learning_rate": 1.824916032190286e-05, "loss": 0.3128, "step": 10300 }, { "epoch": 0.19125733784340737, "grad_norm": 0.521061360836029, "learning_rate": 1.8248500900766312e-05, "loss": 0.3342, "step": 10302 }, { "epoch": 0.191294467980826, "grad_norm": 0.5649065375328064, "learning_rate": 1.8247841367392373e-05, "loss": 0.3777, "step": 10304 }, { "epoch": 0.19133159811824463, "grad_norm": 0.4497404396533966, "learning_rate": 1.824718172179002e-05, "loss": 0.3101, "step": 10306 }, { "epoch": 0.19136872825566328, "grad_norm": 0.4043754041194916, "learning_rate": 1.8246521963968224e-05, "loss": 0.2547, "step": 10308 }, { "epoch": 0.1914058583930819, "grad_norm": 0.318892240524292, "learning_rate": 1.8245862093935968e-05, "loss": 0.3594, "step": 10310 }, { "epoch": 0.19144298853050054, "grad_norm": 0.28512245416641235, "learning_rate": 1.8245202111702228e-05, "loss": 0.3597, "step": 10312 }, { "epoch": 0.1914801186679192, "grad_norm": 0.3797457218170166, "learning_rate": 1.8244542017275985e-05, "loss": 0.3199, "step": 10314 }, { "epoch": 0.19151724880533783, "grad_norm": 0.23189327120780945, "learning_rate": 1.8243881810666216e-05, "loss": 0.3123, "step": 10316 }, { "epoch": 0.19155437894275645, "grad_norm": 0.2917740046977997, "learning_rate": 1.824322149188191e-05, "loss": 0.3349, "step": 10318 }, { "epoch": 0.1915915090801751, "grad_norm": 0.4230726957321167, "learning_rate": 1.8242561060932054e-05, "loss": 0.397, "step": 10320 }, { "epoch": 0.19162863921759374, "grad_norm": 0.22096633911132812, "learning_rate": 1.8241900517825636e-05, "loss": 0.3303, "step": 10322 }, { "epoch": 0.1916657693550124, "grad_norm": 0.3386573791503906, "learning_rate": 1.824123986257163e-05, "loss": 0.3678, "step": 10324 }, { "epoch": 0.19170289949243102, "grad_norm": 0.3953254520893097, "learning_rate": 1.824057909517904e-05, "loss": 0.2924, "step": 10326 }, { "epoch": 0.19174002962984965, "grad_norm": 0.1699492484331131, "learning_rate": 1.823991821565685e-05, "loss": 0.2123, "step": 10328 }, { "epoch": 0.1917771597672683, "grad_norm": 0.38965439796447754, "learning_rate": 1.8239257224014054e-05, "loss": 0.364, "step": 10330 }, { "epoch": 0.19181428990468694, "grad_norm": 0.42472290992736816, "learning_rate": 1.8238596120259648e-05, "loss": 0.4063, "step": 10332 }, { "epoch": 0.19185142004210556, "grad_norm": 0.34790465235710144, "learning_rate": 1.8237934904402624e-05, "loss": 0.4023, "step": 10334 }, { "epoch": 0.19188855017952422, "grad_norm": 0.3114390969276428, "learning_rate": 1.8237273576451984e-05, "loss": 0.6155, "step": 10336 }, { "epoch": 0.19192568031694285, "grad_norm": 0.29550909996032715, "learning_rate": 1.8236612136416723e-05, "loss": 0.562, "step": 10338 }, { "epoch": 0.1919628104543615, "grad_norm": 0.46728673577308655, "learning_rate": 1.8235950584305844e-05, "loss": 0.4387, "step": 10340 }, { "epoch": 0.19199994059178013, "grad_norm": 0.4299033284187317, "learning_rate": 1.8235288920128345e-05, "loss": 0.3679, "step": 10342 }, { "epoch": 0.19203707072919876, "grad_norm": 0.42623013257980347, "learning_rate": 1.8234627143893232e-05, "loss": 0.3048, "step": 10344 }, { "epoch": 0.19207420086661742, "grad_norm": 0.31752580404281616, "learning_rate": 1.8233965255609508e-05, "loss": 0.3704, "step": 10346 }, { "epoch": 0.19211133100403605, "grad_norm": 0.36908331513404846, "learning_rate": 1.8233303255286185e-05, "loss": 0.2221, "step": 10348 }, { "epoch": 0.19214846114145467, "grad_norm": 0.4320656955242157, "learning_rate": 1.823264114293226e-05, "loss": 0.2494, "step": 10350 }, { "epoch": 0.19218559127887333, "grad_norm": 0.515095055103302, "learning_rate": 1.8231978918556752e-05, "loss": 0.2313, "step": 10352 }, { "epoch": 0.19222272141629196, "grad_norm": 0.41074809432029724, "learning_rate": 1.823131658216867e-05, "loss": 0.3836, "step": 10354 }, { "epoch": 0.1922598515537106, "grad_norm": 0.4301954507827759, "learning_rate": 1.823065413377702e-05, "loss": 0.4249, "step": 10356 }, { "epoch": 0.19229698169112924, "grad_norm": 0.4090089201927185, "learning_rate": 1.8229991573390828e-05, "loss": 0.2946, "step": 10358 }, { "epoch": 0.19233411182854787, "grad_norm": 0.40900731086730957, "learning_rate": 1.8229328901019095e-05, "loss": 0.3216, "step": 10360 }, { "epoch": 0.19237124196596653, "grad_norm": 0.3969988524913788, "learning_rate": 1.822866611667085e-05, "loss": 0.4612, "step": 10362 }, { "epoch": 0.19240837210338516, "grad_norm": 0.37536633014678955, "learning_rate": 1.8228003220355103e-05, "loss": 0.3757, "step": 10364 }, { "epoch": 0.19244550224080378, "grad_norm": 0.22825933992862701, "learning_rate": 1.8227340212080885e-05, "loss": 0.3799, "step": 10366 }, { "epoch": 0.19248263237822244, "grad_norm": 0.4094143211841583, "learning_rate": 1.8226677091857203e-05, "loss": 0.1981, "step": 10368 }, { "epoch": 0.19251976251564107, "grad_norm": 0.32274430990219116, "learning_rate": 1.822601385969309e-05, "loss": 0.3365, "step": 10370 }, { "epoch": 0.1925568926530597, "grad_norm": 0.36306002736091614, "learning_rate": 1.8225350515597568e-05, "loss": 0.3687, "step": 10372 }, { "epoch": 0.19259402279047835, "grad_norm": 0.37292805314064026, "learning_rate": 1.8224687059579667e-05, "loss": 0.2193, "step": 10374 }, { "epoch": 0.19263115292789698, "grad_norm": 0.3921237885951996, "learning_rate": 1.8224023491648406e-05, "loss": 0.3511, "step": 10376 }, { "epoch": 0.19266828306531564, "grad_norm": 0.31636109948158264, "learning_rate": 1.822335981181282e-05, "loss": 0.1889, "step": 10378 }, { "epoch": 0.19270541320273427, "grad_norm": 0.40478530526161194, "learning_rate": 1.822269602008194e-05, "loss": 0.3274, "step": 10380 }, { "epoch": 0.1927425433401529, "grad_norm": 0.43385058641433716, "learning_rate": 1.8222032116464798e-05, "loss": 0.2702, "step": 10382 }, { "epoch": 0.19277967347757155, "grad_norm": 0.47108379006385803, "learning_rate": 1.8221368100970424e-05, "loss": 0.2186, "step": 10384 }, { "epoch": 0.19281680361499018, "grad_norm": 0.35603728890419006, "learning_rate": 1.8220703973607857e-05, "loss": 0.3223, "step": 10386 }, { "epoch": 0.1928539337524088, "grad_norm": 0.22544145584106445, "learning_rate": 1.8220039734386136e-05, "loss": 0.3447, "step": 10388 }, { "epoch": 0.19289106388982746, "grad_norm": 0.5928962826728821, "learning_rate": 1.821937538331429e-05, "loss": 0.2882, "step": 10390 }, { "epoch": 0.1929281940272461, "grad_norm": 0.3926587998867035, "learning_rate": 1.821871092040137e-05, "loss": 0.2627, "step": 10392 }, { "epoch": 0.19296532416466472, "grad_norm": 0.3659384846687317, "learning_rate": 1.821804634565641e-05, "loss": 0.2562, "step": 10394 }, { "epoch": 0.19300245430208338, "grad_norm": 0.26231399178504944, "learning_rate": 1.8217381659088453e-05, "loss": 0.3167, "step": 10396 }, { "epoch": 0.193039584439502, "grad_norm": 0.3123832046985626, "learning_rate": 1.8216716860706548e-05, "loss": 0.2726, "step": 10398 }, { "epoch": 0.19307671457692066, "grad_norm": 0.4036448299884796, "learning_rate": 1.821605195051974e-05, "loss": 0.2602, "step": 10400 }, { "epoch": 0.1931138447143393, "grad_norm": 0.3967619836330414, "learning_rate": 1.8215386928537073e-05, "loss": 0.2901, "step": 10402 }, { "epoch": 0.19315097485175792, "grad_norm": 0.29481208324432373, "learning_rate": 1.8214721794767597e-05, "loss": 0.3315, "step": 10404 }, { "epoch": 0.19318810498917657, "grad_norm": 0.3918724060058594, "learning_rate": 1.8214056549220366e-05, "loss": 0.3605, "step": 10406 }, { "epoch": 0.1932252351265952, "grad_norm": 0.4407624900341034, "learning_rate": 1.8213391191904424e-05, "loss": 0.37, "step": 10408 }, { "epoch": 0.19326236526401383, "grad_norm": 0.17345456779003143, "learning_rate": 1.8212725722828838e-05, "loss": 0.1097, "step": 10410 }, { "epoch": 0.1932994954014325, "grad_norm": 0.3316115140914917, "learning_rate": 1.821206014200265e-05, "loss": 0.2392, "step": 10412 }, { "epoch": 0.19333662553885111, "grad_norm": 0.39465439319610596, "learning_rate": 1.821139444943492e-05, "loss": 0.2555, "step": 10414 }, { "epoch": 0.19337375567626977, "grad_norm": 0.4078032374382019, "learning_rate": 1.821072864513471e-05, "loss": 0.4078, "step": 10416 }, { "epoch": 0.1934108858136884, "grad_norm": 0.340839684009552, "learning_rate": 1.8210062729111077e-05, "loss": 0.1896, "step": 10418 }, { "epoch": 0.19344801595110703, "grad_norm": 0.2955494225025177, "learning_rate": 1.820939670137308e-05, "loss": 0.2723, "step": 10420 }, { "epoch": 0.19348514608852568, "grad_norm": 0.39263802766799927, "learning_rate": 1.8208730561929788e-05, "loss": 0.295, "step": 10422 }, { "epoch": 0.1935222762259443, "grad_norm": 0.34953704476356506, "learning_rate": 1.820806431079026e-05, "loss": 0.36, "step": 10424 }, { "epoch": 0.19355940636336294, "grad_norm": 0.44238829612731934, "learning_rate": 1.820739794796356e-05, "loss": 0.2015, "step": 10426 }, { "epoch": 0.1935965365007816, "grad_norm": 0.34842196106910706, "learning_rate": 1.8206731473458757e-05, "loss": 0.2409, "step": 10428 }, { "epoch": 0.19363366663820022, "grad_norm": 0.40157032012939453, "learning_rate": 1.8206064887284925e-05, "loss": 0.3887, "step": 10430 }, { "epoch": 0.19367079677561885, "grad_norm": 0.3731132447719574, "learning_rate": 1.8205398189451128e-05, "loss": 0.289, "step": 10432 }, { "epoch": 0.1937079269130375, "grad_norm": 0.35805171728134155, "learning_rate": 1.8204731379966437e-05, "loss": 0.4443, "step": 10434 }, { "epoch": 0.19374505705045614, "grad_norm": 0.4544253945350647, "learning_rate": 1.8204064458839932e-05, "loss": 0.4354, "step": 10436 }, { "epoch": 0.1937821871878748, "grad_norm": 0.4127708971500397, "learning_rate": 1.8203397426080684e-05, "loss": 0.3682, "step": 10438 }, { "epoch": 0.19381931732529342, "grad_norm": 0.3067456781864166, "learning_rate": 1.8202730281697767e-05, "loss": 0.2851, "step": 10440 }, { "epoch": 0.19385644746271205, "grad_norm": 0.4010867774486542, "learning_rate": 1.8202063025700262e-05, "loss": 0.3113, "step": 10442 }, { "epoch": 0.1938935776001307, "grad_norm": 0.35063600540161133, "learning_rate": 1.8201395658097245e-05, "loss": 0.236, "step": 10444 }, { "epoch": 0.19393070773754933, "grad_norm": 0.32037413120269775, "learning_rate": 1.82007281788978e-05, "loss": 0.2814, "step": 10446 }, { "epoch": 0.19396783787496796, "grad_norm": 0.3340356647968292, "learning_rate": 1.820006058811101e-05, "loss": 0.5214, "step": 10448 }, { "epoch": 0.19400496801238662, "grad_norm": 0.2888498306274414, "learning_rate": 1.8199392885745957e-05, "loss": 0.2936, "step": 10450 }, { "epoch": 0.19404209814980525, "grad_norm": 0.3157704770565033, "learning_rate": 1.8198725071811725e-05, "loss": 0.1533, "step": 10452 }, { "epoch": 0.1940792282872239, "grad_norm": 0.31509166955947876, "learning_rate": 1.8198057146317407e-05, "loss": 0.3485, "step": 10454 }, { "epoch": 0.19411635842464253, "grad_norm": 0.42326149344444275, "learning_rate": 1.819738910927208e-05, "loss": 0.3972, "step": 10456 }, { "epoch": 0.19415348856206116, "grad_norm": 0.3669712245464325, "learning_rate": 1.819672096068485e-05, "loss": 0.2087, "step": 10458 }, { "epoch": 0.19419061869947982, "grad_norm": 0.3648996651172638, "learning_rate": 1.819605270056479e-05, "loss": 0.2442, "step": 10460 }, { "epoch": 0.19422774883689845, "grad_norm": 0.27890896797180176, "learning_rate": 1.819538432892101e-05, "loss": 0.1599, "step": 10462 }, { "epoch": 0.19426487897431707, "grad_norm": 0.3570995032787323, "learning_rate": 1.8194715845762596e-05, "loss": 0.4027, "step": 10464 }, { "epoch": 0.19430200911173573, "grad_norm": 0.2493124157190323, "learning_rate": 1.8194047251098646e-05, "loss": 0.3901, "step": 10466 }, { "epoch": 0.19433913924915436, "grad_norm": 0.43931853771209717, "learning_rate": 1.8193378544938254e-05, "loss": 0.1128, "step": 10468 }, { "epoch": 0.194376269386573, "grad_norm": 0.39804863929748535, "learning_rate": 1.8192709727290526e-05, "loss": 0.2803, "step": 10470 }, { "epoch": 0.19441339952399164, "grad_norm": 0.3732399642467499, "learning_rate": 1.8192040798164554e-05, "loss": 0.2847, "step": 10472 }, { "epoch": 0.19445052966141027, "grad_norm": 0.2559812366962433, "learning_rate": 1.819137175756945e-05, "loss": 0.3249, "step": 10474 }, { "epoch": 0.19448765979882893, "grad_norm": 0.2886560261249542, "learning_rate": 1.8190702605514307e-05, "loss": 0.3991, "step": 10476 }, { "epoch": 0.19452478993624756, "grad_norm": 0.32769614458084106, "learning_rate": 1.819003334200824e-05, "loss": 0.2013, "step": 10478 }, { "epoch": 0.19456192007366618, "grad_norm": 0.42437949776649475, "learning_rate": 1.8189363967060346e-05, "loss": 0.3614, "step": 10480 }, { "epoch": 0.19459905021108484, "grad_norm": 0.3976858854293823, "learning_rate": 1.8188694480679743e-05, "loss": 0.44, "step": 10482 }, { "epoch": 0.19463618034850347, "grad_norm": 0.4024472236633301, "learning_rate": 1.8188024882875534e-05, "loss": 0.3528, "step": 10484 }, { "epoch": 0.1946733104859221, "grad_norm": 0.4627920389175415, "learning_rate": 1.8187355173656836e-05, "loss": 0.1862, "step": 10486 }, { "epoch": 0.19471044062334075, "grad_norm": 0.413394570350647, "learning_rate": 1.8186685353032753e-05, "loss": 0.3557, "step": 10488 }, { "epoch": 0.19474757076075938, "grad_norm": 0.4072570502758026, "learning_rate": 1.8186015421012406e-05, "loss": 0.2602, "step": 10490 }, { "epoch": 0.19478470089817804, "grad_norm": 0.42197901010513306, "learning_rate": 1.818534537760491e-05, "loss": 0.3215, "step": 10492 }, { "epoch": 0.19482183103559667, "grad_norm": 0.3577510714530945, "learning_rate": 1.818467522281938e-05, "loss": 0.2728, "step": 10494 }, { "epoch": 0.1948589611730153, "grad_norm": 0.344599187374115, "learning_rate": 1.8184004956664938e-05, "loss": 0.4647, "step": 10496 }, { "epoch": 0.19489609131043395, "grad_norm": 0.4934121370315552, "learning_rate": 1.8183334579150703e-05, "loss": 0.3121, "step": 10498 }, { "epoch": 0.19493322144785258, "grad_norm": 0.24618367850780487, "learning_rate": 1.8182664090285797e-05, "loss": 0.4257, "step": 10500 }, { "epoch": 0.1949703515852712, "grad_norm": 0.20650441944599152, "learning_rate": 1.818199349007934e-05, "loss": 0.1588, "step": 10502 }, { "epoch": 0.19500748172268986, "grad_norm": 0.3634934723377228, "learning_rate": 1.8181322778540455e-05, "loss": 0.3875, "step": 10504 }, { "epoch": 0.1950446118601085, "grad_norm": 0.3191303312778473, "learning_rate": 1.818065195567828e-05, "loss": 0.1349, "step": 10506 }, { "epoch": 0.19508174199752712, "grad_norm": 0.46026158332824707, "learning_rate": 1.8179981021501935e-05, "loss": 0.3438, "step": 10508 }, { "epoch": 0.19511887213494578, "grad_norm": 0.3885361850261688, "learning_rate": 1.8179309976020546e-05, "loss": 0.4297, "step": 10510 }, { "epoch": 0.1951560022723644, "grad_norm": 0.32798442244529724, "learning_rate": 1.8178638819243253e-05, "loss": 0.3166, "step": 10512 }, { "epoch": 0.19519313240978306, "grad_norm": 0.31509965658187866, "learning_rate": 1.817796755117918e-05, "loss": 0.2445, "step": 10514 }, { "epoch": 0.1952302625472017, "grad_norm": 0.31694459915161133, "learning_rate": 1.8177296171837468e-05, "loss": 0.3619, "step": 10516 }, { "epoch": 0.19526739268462032, "grad_norm": 0.618583619594574, "learning_rate": 1.8176624681227248e-05, "loss": 0.3338, "step": 10518 }, { "epoch": 0.19530452282203897, "grad_norm": 0.3839055001735687, "learning_rate": 1.8175953079357654e-05, "loss": 0.225, "step": 10520 }, { "epoch": 0.1953416529594576, "grad_norm": 0.4258374571800232, "learning_rate": 1.8175281366237832e-05, "loss": 0.3165, "step": 10522 }, { "epoch": 0.19537878309687623, "grad_norm": 0.23897075653076172, "learning_rate": 1.817460954187692e-05, "loss": 0.3364, "step": 10524 }, { "epoch": 0.19541591323429489, "grad_norm": 0.3382510840892792, "learning_rate": 1.8173937606284053e-05, "loss": 0.1694, "step": 10526 }, { "epoch": 0.19545304337171351, "grad_norm": 0.6898068189620972, "learning_rate": 1.817326555946838e-05, "loss": 0.3891, "step": 10528 }, { "epoch": 0.19549017350913217, "grad_norm": 0.3281741440296173, "learning_rate": 1.8172593401439048e-05, "loss": 0.342, "step": 10530 }, { "epoch": 0.1955273036465508, "grad_norm": 0.4617529511451721, "learning_rate": 1.8171921132205196e-05, "loss": 0.3762, "step": 10532 }, { "epoch": 0.19556443378396943, "grad_norm": 0.3313845098018646, "learning_rate": 1.8171248751775975e-05, "loss": 0.3025, "step": 10534 }, { "epoch": 0.19560156392138808, "grad_norm": 0.3868240416049957, "learning_rate": 1.8170576260160535e-05, "loss": 0.3407, "step": 10536 }, { "epoch": 0.1956386940588067, "grad_norm": 0.3786194622516632, "learning_rate": 1.816990365736803e-05, "loss": 0.4106, "step": 10538 }, { "epoch": 0.19567582419622534, "grad_norm": 0.295289546251297, "learning_rate": 1.8169230943407605e-05, "loss": 0.4138, "step": 10540 }, { "epoch": 0.195712954333644, "grad_norm": 0.38305091857910156, "learning_rate": 1.8168558118288415e-05, "loss": 0.1935, "step": 10542 }, { "epoch": 0.19575008447106262, "grad_norm": 0.27974823117256165, "learning_rate": 1.8167885182019616e-05, "loss": 0.3582, "step": 10544 }, { "epoch": 0.19578721460848125, "grad_norm": 0.33655616641044617, "learning_rate": 1.8167212134610368e-05, "loss": 0.321, "step": 10546 }, { "epoch": 0.1958243447458999, "grad_norm": 0.35231056809425354, "learning_rate": 1.8166538976069825e-05, "loss": 0.3232, "step": 10548 }, { "epoch": 0.19586147488331854, "grad_norm": 0.3734198808670044, "learning_rate": 1.8165865706407147e-05, "loss": 0.3575, "step": 10550 }, { "epoch": 0.1958986050207372, "grad_norm": 0.32524755597114563, "learning_rate": 1.81651923256315e-05, "loss": 0.2206, "step": 10552 }, { "epoch": 0.19593573515815582, "grad_norm": 0.3715626299381256, "learning_rate": 1.816451883375204e-05, "loss": 0.3282, "step": 10554 }, { "epoch": 0.19597286529557445, "grad_norm": 0.4585845470428467, "learning_rate": 1.816384523077794e-05, "loss": 0.3723, "step": 10556 }, { "epoch": 0.1960099954329931, "grad_norm": 0.4712212085723877, "learning_rate": 1.816317151671835e-05, "loss": 0.3035, "step": 10558 }, { "epoch": 0.19604712557041173, "grad_norm": 0.41906657814979553, "learning_rate": 1.8162497691582455e-05, "loss": 0.3311, "step": 10560 }, { "epoch": 0.19608425570783036, "grad_norm": 0.34197336435317993, "learning_rate": 1.8161823755379414e-05, "loss": 0.4088, "step": 10562 }, { "epoch": 0.19612138584524902, "grad_norm": 0.4386694133281708, "learning_rate": 1.8161149708118397e-05, "loss": 0.2722, "step": 10564 }, { "epoch": 0.19615851598266765, "grad_norm": 0.2498355656862259, "learning_rate": 1.816047554980858e-05, "loss": 0.2435, "step": 10566 }, { "epoch": 0.1961956461200863, "grad_norm": 0.29070866107940674, "learning_rate": 1.8159801280459132e-05, "loss": 0.4648, "step": 10568 }, { "epoch": 0.19623277625750493, "grad_norm": 0.32423922419548035, "learning_rate": 1.8159126900079234e-05, "loss": 0.4451, "step": 10570 }, { "epoch": 0.19626990639492356, "grad_norm": 0.3282385766506195, "learning_rate": 1.8158452408678057e-05, "loss": 0.2586, "step": 10572 }, { "epoch": 0.19630703653234222, "grad_norm": 0.5702826380729675, "learning_rate": 1.8157777806264775e-05, "loss": 0.2844, "step": 10574 }, { "epoch": 0.19634416666976084, "grad_norm": 0.31377604603767395, "learning_rate": 1.815710309284858e-05, "loss": 0.4656, "step": 10576 }, { "epoch": 0.19638129680717947, "grad_norm": 0.4903489649295807, "learning_rate": 1.815642826843864e-05, "loss": 0.2663, "step": 10578 }, { "epoch": 0.19641842694459813, "grad_norm": 0.4795646369457245, "learning_rate": 1.8155753333044144e-05, "loss": 0.4595, "step": 10580 }, { "epoch": 0.19645555708201676, "grad_norm": 0.26642128825187683, "learning_rate": 1.8155078286674274e-05, "loss": 0.3076, "step": 10582 }, { "epoch": 0.19649268721943539, "grad_norm": 0.2655535936355591, "learning_rate": 1.815440312933822e-05, "loss": 0.1803, "step": 10584 }, { "epoch": 0.19652981735685404, "grad_norm": 0.39043155312538147, "learning_rate": 1.815372786104516e-05, "loss": 0.3942, "step": 10586 }, { "epoch": 0.19656694749427267, "grad_norm": 0.3451046049594879, "learning_rate": 1.8153052481804286e-05, "loss": 0.1798, "step": 10588 }, { "epoch": 0.19660407763169133, "grad_norm": 0.27846482396125793, "learning_rate": 1.8152376991624795e-05, "loss": 0.4045, "step": 10590 }, { "epoch": 0.19664120776910995, "grad_norm": 0.310319721698761, "learning_rate": 1.8151701390515867e-05, "loss": 0.3243, "step": 10592 }, { "epoch": 0.19667833790652858, "grad_norm": 0.3808184564113617, "learning_rate": 1.81510256784867e-05, "loss": 0.3467, "step": 10594 }, { "epoch": 0.19671546804394724, "grad_norm": 0.32306984066963196, "learning_rate": 1.8150349855546494e-05, "loss": 0.4589, "step": 10596 }, { "epoch": 0.19675259818136587, "grad_norm": 0.6907929182052612, "learning_rate": 1.8149673921704437e-05, "loss": 0.3387, "step": 10598 }, { "epoch": 0.1967897283187845, "grad_norm": 0.41490477323532104, "learning_rate": 1.8148997876969728e-05, "loss": 0.2975, "step": 10600 }, { "epoch": 0.19682685845620315, "grad_norm": 0.354691743850708, "learning_rate": 1.8148321721351565e-05, "loss": 0.2463, "step": 10602 }, { "epoch": 0.19686398859362178, "grad_norm": 0.326217919588089, "learning_rate": 1.8147645454859154e-05, "loss": 0.3271, "step": 10604 }, { "epoch": 0.19690111873104044, "grad_norm": 0.4011254906654358, "learning_rate": 1.814696907750169e-05, "loss": 0.2554, "step": 10606 }, { "epoch": 0.19693824886845906, "grad_norm": 0.49285557866096497, "learning_rate": 1.814629258928838e-05, "loss": 0.487, "step": 10608 }, { "epoch": 0.1969753790058777, "grad_norm": 0.4032011926174164, "learning_rate": 1.8145615990228433e-05, "loss": 0.3717, "step": 10610 }, { "epoch": 0.19701250914329635, "grad_norm": 0.2815262973308563, "learning_rate": 1.814493928033105e-05, "loss": 0.2501, "step": 10612 }, { "epoch": 0.19704963928071498, "grad_norm": 0.4225890338420868, "learning_rate": 1.814426245960544e-05, "loss": 0.3522, "step": 10614 }, { "epoch": 0.1970867694181336, "grad_norm": 0.3081131875514984, "learning_rate": 1.814358552806081e-05, "loss": 0.3521, "step": 10616 }, { "epoch": 0.19712389955555226, "grad_norm": 0.25342971086502075, "learning_rate": 1.8142908485706375e-05, "loss": 0.2319, "step": 10618 }, { "epoch": 0.1971610296929709, "grad_norm": 0.40041080117225647, "learning_rate": 1.8142231332551348e-05, "loss": 0.4048, "step": 10620 }, { "epoch": 0.19719815983038952, "grad_norm": 0.42423558235168457, "learning_rate": 1.8141554068604942e-05, "loss": 0.2455, "step": 10622 }, { "epoch": 0.19723528996780818, "grad_norm": 0.2633681297302246, "learning_rate": 1.8140876693876372e-05, "loss": 0.23, "step": 10624 }, { "epoch": 0.1972724201052268, "grad_norm": 0.3154236078262329, "learning_rate": 1.8140199208374852e-05, "loss": 0.2243, "step": 10626 }, { "epoch": 0.19730955024264546, "grad_norm": 0.4470406174659729, "learning_rate": 1.8139521612109605e-05, "loss": 0.2863, "step": 10628 }, { "epoch": 0.1973466803800641, "grad_norm": 0.3068430423736572, "learning_rate": 1.813884390508985e-05, "loss": 0.274, "step": 10630 }, { "epoch": 0.19738381051748272, "grad_norm": 0.2704326808452606, "learning_rate": 1.8138166087324806e-05, "loss": 0.2433, "step": 10632 }, { "epoch": 0.19742094065490137, "grad_norm": 0.2957181930541992, "learning_rate": 1.8137488158823703e-05, "loss": 0.2194, "step": 10634 }, { "epoch": 0.19745807079232, "grad_norm": 0.41069671511650085, "learning_rate": 1.8136810119595756e-05, "loss": 0.525, "step": 10636 }, { "epoch": 0.19749520092973863, "grad_norm": 0.3662183880805969, "learning_rate": 1.8136131969650198e-05, "loss": 0.3967, "step": 10638 }, { "epoch": 0.19753233106715729, "grad_norm": 0.9215697646141052, "learning_rate": 1.8135453708996254e-05, "loss": 0.5995, "step": 10640 }, { "epoch": 0.1975694612045759, "grad_norm": 0.37060633301734924, "learning_rate": 1.8134775337643155e-05, "loss": 0.4379, "step": 10642 }, { "epoch": 0.19760659134199457, "grad_norm": 0.2897200286388397, "learning_rate": 1.8134096855600125e-05, "loss": 0.2779, "step": 10644 }, { "epoch": 0.1976437214794132, "grad_norm": 0.3277202546596527, "learning_rate": 1.8133418262876405e-05, "loss": 0.4352, "step": 10646 }, { "epoch": 0.19768085161683183, "grad_norm": 0.31786489486694336, "learning_rate": 1.8132739559481227e-05, "loss": 0.2168, "step": 10648 }, { "epoch": 0.19771798175425048, "grad_norm": 0.3507799208164215, "learning_rate": 1.813206074542382e-05, "loss": 0.3223, "step": 10650 }, { "epoch": 0.1977551118916691, "grad_norm": 0.2890516519546509, "learning_rate": 1.8131381820713426e-05, "loss": 0.2937, "step": 10652 }, { "epoch": 0.19779224202908774, "grad_norm": 0.40991395711898804, "learning_rate": 1.8130702785359284e-05, "loss": 0.3344, "step": 10654 }, { "epoch": 0.1978293721665064, "grad_norm": 0.4502514600753784, "learning_rate": 1.813002363937063e-05, "loss": 0.3834, "step": 10656 }, { "epoch": 0.19786650230392502, "grad_norm": 0.5486236214637756, "learning_rate": 1.8129344382756702e-05, "loss": 0.3912, "step": 10658 }, { "epoch": 0.19790363244134365, "grad_norm": 0.4866623282432556, "learning_rate": 1.8128665015526753e-05, "loss": 0.2315, "step": 10660 }, { "epoch": 0.1979407625787623, "grad_norm": 0.37923917174339294, "learning_rate": 1.812798553769002e-05, "loss": 0.1825, "step": 10662 }, { "epoch": 0.19797789271618094, "grad_norm": 0.35919103026390076, "learning_rate": 1.812730594925575e-05, "loss": 0.4337, "step": 10664 }, { "epoch": 0.1980150228535996, "grad_norm": 0.3857702612876892, "learning_rate": 1.8126626250233185e-05, "loss": 0.3478, "step": 10666 }, { "epoch": 0.19805215299101822, "grad_norm": 0.44642049074172974, "learning_rate": 1.8125946440631582e-05, "loss": 0.355, "step": 10668 }, { "epoch": 0.19808928312843685, "grad_norm": 0.3609551787376404, "learning_rate": 1.8125266520460192e-05, "loss": 0.3635, "step": 10670 }, { "epoch": 0.1981264132658555, "grad_norm": 0.4047122001647949, "learning_rate": 1.812458648972826e-05, "loss": 0.4582, "step": 10672 }, { "epoch": 0.19816354340327413, "grad_norm": 0.4076749384403229, "learning_rate": 1.812390634844504e-05, "loss": 0.2525, "step": 10674 }, { "epoch": 0.19820067354069276, "grad_norm": 0.3619585335254669, "learning_rate": 1.8123226096619792e-05, "loss": 0.3451, "step": 10676 }, { "epoch": 0.19823780367811142, "grad_norm": 0.5135860443115234, "learning_rate": 1.8122545734261767e-05, "loss": 0.4424, "step": 10678 }, { "epoch": 0.19827493381553005, "grad_norm": 0.36556708812713623, "learning_rate": 1.8121865261380223e-05, "loss": 0.465, "step": 10680 }, { "epoch": 0.1983120639529487, "grad_norm": 0.3625973165035248, "learning_rate": 1.8121184677984424e-05, "loss": 0.3302, "step": 10682 }, { "epoch": 0.19834919409036733, "grad_norm": 0.43512579798698425, "learning_rate": 1.8120503984083622e-05, "loss": 0.2786, "step": 10684 }, { "epoch": 0.19838632422778596, "grad_norm": 0.6174240708351135, "learning_rate": 1.8119823179687088e-05, "loss": 0.2174, "step": 10686 }, { "epoch": 0.19842345436520462, "grad_norm": 0.42891427874565125, "learning_rate": 1.811914226480408e-05, "loss": 0.4079, "step": 10688 }, { "epoch": 0.19846058450262324, "grad_norm": 0.3629204332828522, "learning_rate": 1.811846123944387e-05, "loss": 0.1746, "step": 10690 }, { "epoch": 0.19849771464004187, "grad_norm": 0.3608040511608124, "learning_rate": 1.8117780103615717e-05, "loss": 0.2701, "step": 10692 }, { "epoch": 0.19853484477746053, "grad_norm": 0.4338761270046234, "learning_rate": 1.8117098857328895e-05, "loss": 0.308, "step": 10694 }, { "epoch": 0.19857197491487916, "grad_norm": 0.4181887209415436, "learning_rate": 1.811641750059267e-05, "loss": 0.4115, "step": 10696 }, { "epoch": 0.19860910505229779, "grad_norm": 0.4565739333629608, "learning_rate": 1.8115736033416312e-05, "loss": 0.4606, "step": 10698 }, { "epoch": 0.19864623518971644, "grad_norm": 0.3283848464488983, "learning_rate": 1.8115054455809096e-05, "loss": 0.2638, "step": 10700 }, { "epoch": 0.19868336532713507, "grad_norm": 0.27011847496032715, "learning_rate": 1.8114372767780297e-05, "loss": 0.3762, "step": 10702 }, { "epoch": 0.19872049546455373, "grad_norm": 0.272344172000885, "learning_rate": 1.811369096933919e-05, "loss": 0.4051, "step": 10704 }, { "epoch": 0.19875762560197235, "grad_norm": 0.2958928942680359, "learning_rate": 1.811300906049505e-05, "loss": 0.2722, "step": 10706 }, { "epoch": 0.19879475573939098, "grad_norm": 0.33428341150283813, "learning_rate": 1.8112327041257162e-05, "loss": 0.3913, "step": 10708 }, { "epoch": 0.19883188587680964, "grad_norm": 0.3786318898200989, "learning_rate": 1.8111644911634803e-05, "loss": 0.2783, "step": 10710 }, { "epoch": 0.19886901601422827, "grad_norm": 0.3685378134250641, "learning_rate": 1.811096267163725e-05, "loss": 0.4935, "step": 10712 }, { "epoch": 0.1989061461516469, "grad_norm": 0.30812156200408936, "learning_rate": 1.8110280321273793e-05, "loss": 0.3269, "step": 10714 }, { "epoch": 0.19894327628906555, "grad_norm": 0.9871928095817566, "learning_rate": 1.8109597860553713e-05, "loss": 0.3227, "step": 10716 }, { "epoch": 0.19898040642648418, "grad_norm": 0.3444797098636627, "learning_rate": 1.8108915289486296e-05, "loss": 0.3791, "step": 10718 }, { "epoch": 0.19901753656390284, "grad_norm": 0.3381890654563904, "learning_rate": 1.8108232608080834e-05, "loss": 0.431, "step": 10720 }, { "epoch": 0.19905466670132146, "grad_norm": 0.25154224038124084, "learning_rate": 1.810754981634661e-05, "loss": 0.3114, "step": 10722 }, { "epoch": 0.1990917968387401, "grad_norm": 0.33820077776908875, "learning_rate": 1.8106866914292918e-05, "loss": 0.3455, "step": 10724 }, { "epoch": 0.19912892697615875, "grad_norm": 0.252514123916626, "learning_rate": 1.810618390192905e-05, "loss": 0.3195, "step": 10726 }, { "epoch": 0.19916605711357738, "grad_norm": 0.5573863983154297, "learning_rate": 1.8105500779264302e-05, "loss": 0.1973, "step": 10728 }, { "epoch": 0.199203187250996, "grad_norm": 0.3413804769515991, "learning_rate": 1.8104817546307967e-05, "loss": 0.3683, "step": 10730 }, { "epoch": 0.19924031738841466, "grad_norm": 0.4128609895706177, "learning_rate": 1.810413420306934e-05, "loss": 0.296, "step": 10732 }, { "epoch": 0.1992774475258333, "grad_norm": 0.43613719940185547, "learning_rate": 1.8103450749557724e-05, "loss": 0.2528, "step": 10734 }, { "epoch": 0.19931457766325192, "grad_norm": 0.26584532856941223, "learning_rate": 1.8102767185782415e-05, "loss": 0.3336, "step": 10736 }, { "epoch": 0.19935170780067057, "grad_norm": 0.441381573677063, "learning_rate": 1.8102083511752717e-05, "loss": 0.3325, "step": 10738 }, { "epoch": 0.1993888379380892, "grad_norm": 0.32035815715789795, "learning_rate": 1.8101399727477926e-05, "loss": 0.2888, "step": 10740 }, { "epoch": 0.19942596807550786, "grad_norm": 0.42634662985801697, "learning_rate": 1.8100715832967354e-05, "loss": 0.3069, "step": 10742 }, { "epoch": 0.1994630982129265, "grad_norm": 0.3308328688144684, "learning_rate": 1.81000318282303e-05, "loss": 0.3464, "step": 10744 }, { "epoch": 0.19950022835034512, "grad_norm": 0.3660121262073517, "learning_rate": 1.8099347713276084e-05, "loss": 0.3299, "step": 10746 }, { "epoch": 0.19953735848776377, "grad_norm": 0.35178038477897644, "learning_rate": 1.8098663488114e-05, "loss": 0.4313, "step": 10748 }, { "epoch": 0.1995744886251824, "grad_norm": 0.5728922486305237, "learning_rate": 1.8097979152753364e-05, "loss": 0.4727, "step": 10750 }, { "epoch": 0.19961161876260103, "grad_norm": 0.4055405259132385, "learning_rate": 1.809729470720349e-05, "loss": 0.3252, "step": 10752 }, { "epoch": 0.19964874890001968, "grad_norm": 0.34874027967453003, "learning_rate": 1.8096610151473685e-05, "loss": 0.2923, "step": 10754 }, { "epoch": 0.1996858790374383, "grad_norm": 0.48665115237236023, "learning_rate": 1.8095925485573274e-05, "loss": 0.3447, "step": 10756 }, { "epoch": 0.19972300917485697, "grad_norm": 0.3688524663448334, "learning_rate": 1.8095240709511563e-05, "loss": 0.4582, "step": 10758 }, { "epoch": 0.1997601393122756, "grad_norm": 0.3206796646118164, "learning_rate": 1.8094555823297876e-05, "loss": 0.3614, "step": 10760 }, { "epoch": 0.19979726944969423, "grad_norm": 0.3029753565788269, "learning_rate": 1.809387082694153e-05, "loss": 0.4177, "step": 10762 }, { "epoch": 0.19983439958711288, "grad_norm": 0.303425133228302, "learning_rate": 1.8093185720451846e-05, "loss": 0.2802, "step": 10764 }, { "epoch": 0.1998715297245315, "grad_norm": 0.3895516097545624, "learning_rate": 1.8092500503838145e-05, "loss": 0.1207, "step": 10766 }, { "epoch": 0.19990865986195014, "grad_norm": 0.24686263501644135, "learning_rate": 1.8091815177109754e-05, "loss": 0.5278, "step": 10768 }, { "epoch": 0.1999457899993688, "grad_norm": 0.35627856850624084, "learning_rate": 1.8091129740275994e-05, "loss": 0.3638, "step": 10770 }, { "epoch": 0.19998292013678742, "grad_norm": 0.2892895042896271, "learning_rate": 1.8090444193346196e-05, "loss": 0.2866, "step": 10772 }, { "epoch": 0.20002005027420605, "grad_norm": 0.4437684714794159, "learning_rate": 1.8089758536329688e-05, "loss": 0.174, "step": 10774 }, { "epoch": 0.2000571804116247, "grad_norm": 0.48527154326438904, "learning_rate": 1.8089072769235793e-05, "loss": 0.3254, "step": 10776 }, { "epoch": 0.20009431054904334, "grad_norm": 0.45915573835372925, "learning_rate": 1.808838689207385e-05, "loss": 0.2423, "step": 10778 }, { "epoch": 0.200131440686462, "grad_norm": 0.5010533928871155, "learning_rate": 1.8087700904853188e-05, "loss": 0.2444, "step": 10780 }, { "epoch": 0.20016857082388062, "grad_norm": 0.329974502325058, "learning_rate": 1.8087014807583143e-05, "loss": 0.2029, "step": 10782 }, { "epoch": 0.20020570096129925, "grad_norm": 0.418027400970459, "learning_rate": 1.808632860027305e-05, "loss": 0.1309, "step": 10784 }, { "epoch": 0.2002428310987179, "grad_norm": 0.3399648666381836, "learning_rate": 1.8085642282932247e-05, "loss": 0.3519, "step": 10786 }, { "epoch": 0.20027996123613653, "grad_norm": 0.642388641834259, "learning_rate": 1.808495585557007e-05, "loss": 0.5265, "step": 10788 }, { "epoch": 0.20031709137355516, "grad_norm": 0.4723513722419739, "learning_rate": 1.808426931819586e-05, "loss": 0.2839, "step": 10790 }, { "epoch": 0.20035422151097382, "grad_norm": 0.3475300967693329, "learning_rate": 1.8083582670818966e-05, "loss": 0.2425, "step": 10792 }, { "epoch": 0.20039135164839245, "grad_norm": 0.41961824893951416, "learning_rate": 1.8082895913448718e-05, "loss": 0.2641, "step": 10794 }, { "epoch": 0.2004284817858111, "grad_norm": 0.4302493929862976, "learning_rate": 1.808220904609447e-05, "loss": 0.2446, "step": 10796 }, { "epoch": 0.20046561192322973, "grad_norm": 0.34174981713294983, "learning_rate": 1.8081522068765567e-05, "loss": 0.2751, "step": 10798 }, { "epoch": 0.20050274206064836, "grad_norm": 0.33729425072669983, "learning_rate": 1.808083498147135e-05, "loss": 0.3074, "step": 10800 }, { "epoch": 0.20053987219806702, "grad_norm": 0.44991937279701233, "learning_rate": 1.808014778422118e-05, "loss": 0.3619, "step": 10802 }, { "epoch": 0.20057700233548564, "grad_norm": 0.3150615990161896, "learning_rate": 1.80794604770244e-05, "loss": 0.1592, "step": 10804 }, { "epoch": 0.20061413247290427, "grad_norm": 0.5023438930511475, "learning_rate": 1.8078773059890366e-05, "loss": 0.1777, "step": 10806 }, { "epoch": 0.20065126261032293, "grad_norm": 0.39736321568489075, "learning_rate": 1.8078085532828425e-05, "loss": 0.3187, "step": 10808 }, { "epoch": 0.20068839274774156, "grad_norm": 0.6400244235992432, "learning_rate": 1.807739789584794e-05, "loss": 0.3765, "step": 10810 }, { "epoch": 0.20072552288516018, "grad_norm": 0.32258108258247375, "learning_rate": 1.8076710148958262e-05, "loss": 0.1767, "step": 10812 }, { "epoch": 0.20076265302257884, "grad_norm": 0.40697264671325684, "learning_rate": 1.807602229216875e-05, "loss": 0.2256, "step": 10814 }, { "epoch": 0.20079978315999747, "grad_norm": 0.40953925251960754, "learning_rate": 1.8075334325488767e-05, "loss": 0.3952, "step": 10816 }, { "epoch": 0.20083691329741613, "grad_norm": 0.4370473325252533, "learning_rate": 1.8074646248927673e-05, "loss": 0.2824, "step": 10818 }, { "epoch": 0.20087404343483475, "grad_norm": 0.3332976996898651, "learning_rate": 1.8073958062494828e-05, "loss": 0.4775, "step": 10820 }, { "epoch": 0.20091117357225338, "grad_norm": 0.3931434750556946, "learning_rate": 1.80732697661996e-05, "loss": 0.3925, "step": 10822 }, { "epoch": 0.20094830370967204, "grad_norm": 0.42469334602355957, "learning_rate": 1.8072581360051353e-05, "loss": 0.3629, "step": 10824 }, { "epoch": 0.20098543384709067, "grad_norm": 0.27338218688964844, "learning_rate": 1.8071892844059452e-05, "loss": 0.2915, "step": 10826 }, { "epoch": 0.2010225639845093, "grad_norm": 0.3425963819026947, "learning_rate": 1.807120421823327e-05, "loss": 0.1143, "step": 10828 }, { "epoch": 0.20105969412192795, "grad_norm": 0.4951414167881012, "learning_rate": 1.8070515482582175e-05, "loss": 0.3085, "step": 10830 }, { "epoch": 0.20109682425934658, "grad_norm": 0.3356459438800812, "learning_rate": 1.8069826637115535e-05, "loss": 0.4145, "step": 10832 }, { "epoch": 0.20113395439676524, "grad_norm": 0.555823802947998, "learning_rate": 1.806913768184273e-05, "loss": 0.2103, "step": 10834 }, { "epoch": 0.20117108453418386, "grad_norm": 0.47968658804893494, "learning_rate": 1.8068448616773125e-05, "loss": 0.3189, "step": 10836 }, { "epoch": 0.2012082146716025, "grad_norm": 0.3954299986362457, "learning_rate": 1.8067759441916107e-05, "loss": 0.3065, "step": 10838 }, { "epoch": 0.20124534480902115, "grad_norm": 0.29290133714675903, "learning_rate": 1.8067070157281052e-05, "loss": 0.3637, "step": 10840 }, { "epoch": 0.20128247494643978, "grad_norm": 0.3015665113925934, "learning_rate": 1.806638076287733e-05, "loss": 0.3295, "step": 10842 }, { "epoch": 0.2013196050838584, "grad_norm": 0.3737410008907318, "learning_rate": 1.8065691258714326e-05, "loss": 0.3986, "step": 10844 }, { "epoch": 0.20135673522127706, "grad_norm": 0.3320861756801605, "learning_rate": 1.806500164480143e-05, "loss": 0.3409, "step": 10846 }, { "epoch": 0.2013938653586957, "grad_norm": 0.43762192130088806, "learning_rate": 1.8064311921148014e-05, "loss": 0.2743, "step": 10848 }, { "epoch": 0.20143099549611432, "grad_norm": 0.4020223617553711, "learning_rate": 1.806362208776347e-05, "loss": 0.2362, "step": 10850 }, { "epoch": 0.20146812563353297, "grad_norm": 0.4083857238292694, "learning_rate": 1.8062932144657185e-05, "loss": 0.4352, "step": 10852 }, { "epoch": 0.2015052557709516, "grad_norm": 0.49567094445228577, "learning_rate": 1.806224209183854e-05, "loss": 0.2241, "step": 10854 }, { "epoch": 0.20154238590837026, "grad_norm": 0.3750990927219391, "learning_rate": 1.8061551929316932e-05, "loss": 0.2615, "step": 10856 }, { "epoch": 0.2015795160457889, "grad_norm": 0.45427751541137695, "learning_rate": 1.806086165710175e-05, "loss": 0.3357, "step": 10858 }, { "epoch": 0.20161664618320752, "grad_norm": 0.36389946937561035, "learning_rate": 1.806017127520238e-05, "loss": 0.394, "step": 10860 }, { "epoch": 0.20165377632062617, "grad_norm": 0.49820196628570557, "learning_rate": 1.8059480783628232e-05, "loss": 0.2714, "step": 10862 }, { "epoch": 0.2016909064580448, "grad_norm": 0.39231058955192566, "learning_rate": 1.8058790182388687e-05, "loss": 0.2723, "step": 10864 }, { "epoch": 0.20172803659546343, "grad_norm": 0.2937910556793213, "learning_rate": 1.8058099471493145e-05, "loss": 0.2701, "step": 10866 }, { "epoch": 0.20176516673288208, "grad_norm": 0.33409932255744934, "learning_rate": 1.8057408650951006e-05, "loss": 0.3141, "step": 10868 }, { "epoch": 0.2018022968703007, "grad_norm": 0.31985020637512207, "learning_rate": 1.805671772077167e-05, "loss": 0.4062, "step": 10870 }, { "epoch": 0.20183942700771937, "grad_norm": 0.49771395325660706, "learning_rate": 1.805602668096454e-05, "loss": 0.2688, "step": 10872 }, { "epoch": 0.201876557145138, "grad_norm": 0.3047142028808594, "learning_rate": 1.8055335531539018e-05, "loss": 0.2343, "step": 10874 }, { "epoch": 0.20191368728255663, "grad_norm": 0.5055795907974243, "learning_rate": 1.8054644272504504e-05, "loss": 0.5337, "step": 10876 }, { "epoch": 0.20195081741997528, "grad_norm": 0.3464277684688568, "learning_rate": 1.8053952903870412e-05, "loss": 0.3591, "step": 10878 }, { "epoch": 0.2019879475573939, "grad_norm": 0.6054584383964539, "learning_rate": 1.8053261425646144e-05, "loss": 0.2056, "step": 10880 }, { "epoch": 0.20202507769481254, "grad_norm": 0.3776136338710785, "learning_rate": 1.805256983784111e-05, "loss": 0.2072, "step": 10882 }, { "epoch": 0.2020622078322312, "grad_norm": 0.4827113151550293, "learning_rate": 1.8051878140464723e-05, "loss": 0.2769, "step": 10884 }, { "epoch": 0.20209933796964982, "grad_norm": 0.2429111748933792, "learning_rate": 1.805118633352639e-05, "loss": 0.3155, "step": 10886 }, { "epoch": 0.20213646810706845, "grad_norm": 0.3328652083873749, "learning_rate": 1.805049441703553e-05, "loss": 0.3116, "step": 10888 }, { "epoch": 0.2021735982444871, "grad_norm": 0.3258818984031677, "learning_rate": 1.804980239100155e-05, "loss": 0.1621, "step": 10890 }, { "epoch": 0.20221072838190574, "grad_norm": 0.5889344811439514, "learning_rate": 1.8049110255433875e-05, "loss": 0.4156, "step": 10892 }, { "epoch": 0.2022478585193244, "grad_norm": 0.34815457463264465, "learning_rate": 1.8048418010341917e-05, "loss": 0.5055, "step": 10894 }, { "epoch": 0.20228498865674302, "grad_norm": 0.3929527997970581, "learning_rate": 1.80477256557351e-05, "loss": 0.1796, "step": 10896 }, { "epoch": 0.20232211879416165, "grad_norm": 0.2645559310913086, "learning_rate": 1.8047033191622843e-05, "loss": 0.1441, "step": 10898 }, { "epoch": 0.2023592489315803, "grad_norm": 0.43833860754966736, "learning_rate": 1.8046340618014567e-05, "loss": 0.2069, "step": 10900 }, { "epoch": 0.20239637906899893, "grad_norm": 0.38086459040641785, "learning_rate": 1.8045647934919697e-05, "loss": 0.4202, "step": 10902 }, { "epoch": 0.20243350920641756, "grad_norm": 0.5325655341148376, "learning_rate": 1.804495514234766e-05, "loss": 0.4256, "step": 10904 }, { "epoch": 0.20247063934383622, "grad_norm": 0.5525509119033813, "learning_rate": 1.8044262240307874e-05, "loss": 0.254, "step": 10906 }, { "epoch": 0.20250776948125485, "grad_norm": 0.37684616446495056, "learning_rate": 1.804356922880978e-05, "loss": 0.3692, "step": 10908 }, { "epoch": 0.2025448996186735, "grad_norm": 0.3446234464645386, "learning_rate": 1.8042876107862802e-05, "loss": 0.3716, "step": 10910 }, { "epoch": 0.20258202975609213, "grad_norm": 0.46539050340652466, "learning_rate": 1.8042182877476367e-05, "loss": 0.2808, "step": 10912 }, { "epoch": 0.20261915989351076, "grad_norm": 0.26811131834983826, "learning_rate": 1.8041489537659916e-05, "loss": 0.3519, "step": 10914 }, { "epoch": 0.20265629003092941, "grad_norm": 0.2790672779083252, "learning_rate": 1.804079608842288e-05, "loss": 0.3262, "step": 10916 }, { "epoch": 0.20269342016834804, "grad_norm": 0.26005733013153076, "learning_rate": 1.8040102529774693e-05, "loss": 0.2662, "step": 10918 }, { "epoch": 0.20273055030576667, "grad_norm": 0.30791449546813965, "learning_rate": 1.8039408861724795e-05, "loss": 0.2685, "step": 10920 }, { "epoch": 0.20276768044318533, "grad_norm": 0.4886121451854706, "learning_rate": 1.803871508428262e-05, "loss": 0.5012, "step": 10922 }, { "epoch": 0.20280481058060396, "grad_norm": 0.2542363703250885, "learning_rate": 1.8038021197457613e-05, "loss": 0.3042, "step": 10924 }, { "epoch": 0.20284194071802258, "grad_norm": 0.38627511262893677, "learning_rate": 1.8037327201259214e-05, "loss": 0.3797, "step": 10926 }, { "epoch": 0.20287907085544124, "grad_norm": 0.34510016441345215, "learning_rate": 1.8036633095696867e-05, "loss": 0.2813, "step": 10928 }, { "epoch": 0.20291620099285987, "grad_norm": 0.3160102665424347, "learning_rate": 1.8035938880780016e-05, "loss": 0.2398, "step": 10930 }, { "epoch": 0.20295333113027852, "grad_norm": 0.3049541115760803, "learning_rate": 1.803524455651811e-05, "loss": 0.3977, "step": 10932 }, { "epoch": 0.20299046126769715, "grad_norm": 0.3099616467952728, "learning_rate": 1.803455012292059e-05, "loss": 0.3029, "step": 10934 }, { "epoch": 0.20302759140511578, "grad_norm": 0.29621782898902893, "learning_rate": 1.8033855579996907e-05, "loss": 0.1594, "step": 10936 }, { "epoch": 0.20306472154253444, "grad_norm": 0.39212656021118164, "learning_rate": 1.8033160927756518e-05, "loss": 0.2957, "step": 10938 }, { "epoch": 0.20310185167995307, "grad_norm": 0.37854981422424316, "learning_rate": 1.803246616620887e-05, "loss": 0.5551, "step": 10940 }, { "epoch": 0.2031389818173717, "grad_norm": 0.3617917001247406, "learning_rate": 1.8031771295363417e-05, "loss": 0.284, "step": 10942 }, { "epoch": 0.20317611195479035, "grad_norm": 0.28693732619285583, "learning_rate": 1.8031076315229615e-05, "loss": 0.3541, "step": 10944 }, { "epoch": 0.20321324209220898, "grad_norm": 0.3539438247680664, "learning_rate": 1.803038122581692e-05, "loss": 0.1728, "step": 10946 }, { "epoch": 0.20325037222962763, "grad_norm": 0.4100123345851898, "learning_rate": 1.802968602713479e-05, "loss": 0.1783, "step": 10948 }, { "epoch": 0.20328750236704626, "grad_norm": 0.26993250846862793, "learning_rate": 1.8028990719192686e-05, "loss": 0.3275, "step": 10950 }, { "epoch": 0.2033246325044649, "grad_norm": 0.564293622970581, "learning_rate": 1.8028295302000068e-05, "loss": 0.3285, "step": 10952 }, { "epoch": 0.20336176264188355, "grad_norm": 0.3827095925807953, "learning_rate": 1.8027599775566396e-05, "loss": 0.2651, "step": 10954 }, { "epoch": 0.20339889277930218, "grad_norm": 0.4387151896953583, "learning_rate": 1.802690413990114e-05, "loss": 0.3062, "step": 10956 }, { "epoch": 0.2034360229167208, "grad_norm": 0.39288821816444397, "learning_rate": 1.8026208395013756e-05, "loss": 0.3374, "step": 10958 }, { "epoch": 0.20347315305413946, "grad_norm": 0.36687755584716797, "learning_rate": 1.8025512540913724e-05, "loss": 0.1945, "step": 10960 }, { "epoch": 0.2035102831915581, "grad_norm": 0.23342613875865936, "learning_rate": 1.8024816577610504e-05, "loss": 0.1826, "step": 10962 }, { "epoch": 0.20354741332897672, "grad_norm": 0.23447753489017487, "learning_rate": 1.8024120505113563e-05, "loss": 0.3601, "step": 10964 }, { "epoch": 0.20358454346639537, "grad_norm": 0.40237846970558167, "learning_rate": 1.8023424323432382e-05, "loss": 0.2262, "step": 10966 }, { "epoch": 0.203621673603814, "grad_norm": 0.4993864893913269, "learning_rate": 1.8022728032576426e-05, "loss": 0.2022, "step": 10968 }, { "epoch": 0.20365880374123266, "grad_norm": 0.2714662551879883, "learning_rate": 1.802203163255517e-05, "loss": 0.3509, "step": 10970 }, { "epoch": 0.2036959338786513, "grad_norm": 0.3502453565597534, "learning_rate": 1.80213351233781e-05, "loss": 0.2396, "step": 10972 }, { "epoch": 0.20373306401606991, "grad_norm": 0.38922733068466187, "learning_rate": 1.802063850505468e-05, "loss": 0.211, "step": 10974 }, { "epoch": 0.20377019415348857, "grad_norm": 0.4161888659000397, "learning_rate": 1.8019941777594395e-05, "loss": 0.4479, "step": 10976 }, { "epoch": 0.2038073242909072, "grad_norm": 0.43013548851013184, "learning_rate": 1.8019244941006725e-05, "loss": 0.3909, "step": 10978 }, { "epoch": 0.20384445442832583, "grad_norm": 0.42400020360946655, "learning_rate": 1.8018547995301154e-05, "loss": 0.4307, "step": 10980 }, { "epoch": 0.20388158456574448, "grad_norm": 0.3903316855430603, "learning_rate": 1.8017850940487162e-05, "loss": 0.1936, "step": 10982 }, { "epoch": 0.2039187147031631, "grad_norm": 0.34987080097198486, "learning_rate": 1.8017153776574235e-05, "loss": 0.3128, "step": 10984 }, { "epoch": 0.20395584484058177, "grad_norm": 0.37024426460266113, "learning_rate": 1.8016456503571857e-05, "loss": 0.1905, "step": 10986 }, { "epoch": 0.2039929749780004, "grad_norm": 0.3467206358909607, "learning_rate": 1.801575912148952e-05, "loss": 0.4905, "step": 10988 }, { "epoch": 0.20403010511541902, "grad_norm": 0.2291179895401001, "learning_rate": 1.801506163033671e-05, "loss": 0.3076, "step": 10990 }, { "epoch": 0.20406723525283768, "grad_norm": 0.3153413236141205, "learning_rate": 1.801436403012292e-05, "loss": 0.3613, "step": 10992 }, { "epoch": 0.2041043653902563, "grad_norm": 0.3223901689052582, "learning_rate": 1.8013666320857646e-05, "loss": 0.172, "step": 10994 }, { "epoch": 0.20414149552767494, "grad_norm": 0.36323311924934387, "learning_rate": 1.801296850255037e-05, "loss": 0.3457, "step": 10996 }, { "epoch": 0.2041786256650936, "grad_norm": 0.3450501561164856, "learning_rate": 1.8012270575210596e-05, "loss": 0.441, "step": 10998 }, { "epoch": 0.20421575580251222, "grad_norm": 0.48479440808296204, "learning_rate": 1.801157253884782e-05, "loss": 0.1534, "step": 11000 }, { "epoch": 0.20425288593993085, "grad_norm": 0.42609670758247375, "learning_rate": 1.801087439347154e-05, "loss": 0.4914, "step": 11002 }, { "epoch": 0.2042900160773495, "grad_norm": 0.3819350600242615, "learning_rate": 1.8010176139091257e-05, "loss": 0.2517, "step": 11004 }, { "epoch": 0.20432714621476813, "grad_norm": 0.25776270031929016, "learning_rate": 1.8009477775716468e-05, "loss": 0.2487, "step": 11006 }, { "epoch": 0.2043642763521868, "grad_norm": 0.365556538105011, "learning_rate": 1.8008779303356673e-05, "loss": 0.3783, "step": 11008 }, { "epoch": 0.20440140648960542, "grad_norm": 0.871935248374939, "learning_rate": 1.8008080722021387e-05, "loss": 0.218, "step": 11010 }, { "epoch": 0.20443853662702405, "grad_norm": 0.2795582711696625, "learning_rate": 1.8007382031720104e-05, "loss": 0.313, "step": 11012 }, { "epoch": 0.2044756667644427, "grad_norm": 0.7396692037582397, "learning_rate": 1.800668323246234e-05, "loss": 0.3901, "step": 11014 }, { "epoch": 0.20451279690186133, "grad_norm": 0.3855896592140198, "learning_rate": 1.80059843242576e-05, "loss": 0.3641, "step": 11016 }, { "epoch": 0.20454992703927996, "grad_norm": 0.6135772466659546, "learning_rate": 1.800528530711539e-05, "loss": 0.2708, "step": 11018 }, { "epoch": 0.20458705717669862, "grad_norm": 0.2560223340988159, "learning_rate": 1.800458618104523e-05, "loss": 0.3619, "step": 11020 }, { "epoch": 0.20462418731411725, "grad_norm": 0.3569188117980957, "learning_rate": 1.800388694605663e-05, "loss": 0.3182, "step": 11022 }, { "epoch": 0.2046613174515359, "grad_norm": 0.3389948010444641, "learning_rate": 1.80031876021591e-05, "loss": 0.2331, "step": 11024 }, { "epoch": 0.20469844758895453, "grad_norm": 0.36397939920425415, "learning_rate": 1.8002488149362158e-05, "loss": 0.2084, "step": 11026 }, { "epoch": 0.20473557772637316, "grad_norm": 0.21178899705410004, "learning_rate": 1.8001788587675323e-05, "loss": 0.1995, "step": 11028 }, { "epoch": 0.20477270786379181, "grad_norm": 0.33609259128570557, "learning_rate": 1.8001088917108115e-05, "loss": 0.2748, "step": 11030 }, { "epoch": 0.20480983800121044, "grad_norm": 0.4405866265296936, "learning_rate": 1.8000389137670055e-05, "loss": 0.3556, "step": 11032 }, { "epoch": 0.20484696813862907, "grad_norm": 0.3714745342731476, "learning_rate": 1.7999689249370657e-05, "loss": 0.3512, "step": 11034 }, { "epoch": 0.20488409827604773, "grad_norm": 0.2890619933605194, "learning_rate": 1.7998989252219454e-05, "loss": 0.4128, "step": 11036 }, { "epoch": 0.20492122841346636, "grad_norm": 0.36238807439804077, "learning_rate": 1.7998289146225968e-05, "loss": 0.2808, "step": 11038 }, { "epoch": 0.20495835855088498, "grad_norm": 0.3752381205558777, "learning_rate": 1.799758893139972e-05, "loss": 0.3256, "step": 11040 }, { "epoch": 0.20499548868830364, "grad_norm": 0.39415037631988525, "learning_rate": 1.7996888607750244e-05, "loss": 0.2595, "step": 11042 }, { "epoch": 0.20503261882572227, "grad_norm": 0.30676236748695374, "learning_rate": 1.799618817528707e-05, "loss": 0.2643, "step": 11044 }, { "epoch": 0.20506974896314092, "grad_norm": 0.4020739793777466, "learning_rate": 1.7995487634019725e-05, "loss": 0.1626, "step": 11046 }, { "epoch": 0.20510687910055955, "grad_norm": 0.39993715286254883, "learning_rate": 1.7994786983957745e-05, "loss": 0.407, "step": 11048 }, { "epoch": 0.20514400923797818, "grad_norm": 0.3869505822658539, "learning_rate": 1.7994086225110657e-05, "loss": 0.4255, "step": 11050 }, { "epoch": 0.20518113937539684, "grad_norm": 0.3084717392921448, "learning_rate": 1.7993385357488e-05, "loss": 0.3249, "step": 11052 }, { "epoch": 0.20521826951281547, "grad_norm": 0.35271546244621277, "learning_rate": 1.7992684381099314e-05, "loss": 0.3688, "step": 11054 }, { "epoch": 0.2052553996502341, "grad_norm": 0.6537727117538452, "learning_rate": 1.7991983295954135e-05, "loss": 0.2483, "step": 11056 }, { "epoch": 0.20529252978765275, "grad_norm": 0.35688623785972595, "learning_rate": 1.7991282102062e-05, "loss": 0.3329, "step": 11058 }, { "epoch": 0.20532965992507138, "grad_norm": 0.33994412422180176, "learning_rate": 1.7990580799432452e-05, "loss": 0.2438, "step": 11060 }, { "epoch": 0.20536679006249003, "grad_norm": 0.3979966342449188, "learning_rate": 1.7989879388075037e-05, "loss": 0.3546, "step": 11062 }, { "epoch": 0.20540392019990866, "grad_norm": 0.23355227708816528, "learning_rate": 1.7989177867999294e-05, "loss": 0.1189, "step": 11064 }, { "epoch": 0.2054410503373273, "grad_norm": 0.45612430572509766, "learning_rate": 1.798847623921477e-05, "loss": 0.362, "step": 11066 }, { "epoch": 0.20547818047474595, "grad_norm": 0.31561651825904846, "learning_rate": 1.798777450173101e-05, "loss": 0.2421, "step": 11068 }, { "epoch": 0.20551531061216458, "grad_norm": 0.31964311003685, "learning_rate": 1.798707265555757e-05, "loss": 0.1353, "step": 11070 }, { "epoch": 0.2055524407495832, "grad_norm": 0.28680485486984253, "learning_rate": 1.7986370700703993e-05, "loss": 0.4415, "step": 11072 }, { "epoch": 0.20558957088700186, "grad_norm": 0.31005164980888367, "learning_rate": 1.7985668637179835e-05, "loss": 0.2767, "step": 11074 }, { "epoch": 0.2056267010244205, "grad_norm": 0.2682952880859375, "learning_rate": 1.7984966464994645e-05, "loss": 0.303, "step": 11076 }, { "epoch": 0.20566383116183912, "grad_norm": 0.48930829763412476, "learning_rate": 1.7984264184157978e-05, "loss": 0.2874, "step": 11078 }, { "epoch": 0.20570096129925777, "grad_norm": 0.46756598353385925, "learning_rate": 1.7983561794679394e-05, "loss": 0.2312, "step": 11080 }, { "epoch": 0.2057380914366764, "grad_norm": 0.35370391607284546, "learning_rate": 1.7982859296568445e-05, "loss": 0.1852, "step": 11082 }, { "epoch": 0.20577522157409506, "grad_norm": 0.45003122091293335, "learning_rate": 1.7982156689834693e-05, "loss": 0.1659, "step": 11084 }, { "epoch": 0.20581235171151369, "grad_norm": 0.24225486814975739, "learning_rate": 1.7981453974487698e-05, "loss": 0.2772, "step": 11086 }, { "epoch": 0.20584948184893231, "grad_norm": 0.4070354700088501, "learning_rate": 1.798075115053702e-05, "loss": 0.2216, "step": 11088 }, { "epoch": 0.20588661198635097, "grad_norm": 0.27683866024017334, "learning_rate": 1.798004821799223e-05, "loss": 0.3173, "step": 11090 }, { "epoch": 0.2059237421237696, "grad_norm": 0.4069429636001587, "learning_rate": 1.797934517686288e-05, "loss": 0.4082, "step": 11092 }, { "epoch": 0.20596087226118823, "grad_norm": 0.331180602312088, "learning_rate": 1.7978642027158545e-05, "loss": 0.2893, "step": 11094 }, { "epoch": 0.20599800239860688, "grad_norm": 0.2940187454223633, "learning_rate": 1.797793876888879e-05, "loss": 0.3266, "step": 11096 }, { "epoch": 0.2060351325360255, "grad_norm": 0.6095760464668274, "learning_rate": 1.797723540206319e-05, "loss": 0.2674, "step": 11098 }, { "epoch": 0.20607226267344417, "grad_norm": 0.5362911820411682, "learning_rate": 1.7976531926691305e-05, "loss": 0.4348, "step": 11100 }, { "epoch": 0.2061093928108628, "grad_norm": 0.47066736221313477, "learning_rate": 1.7975828342782712e-05, "loss": 0.4524, "step": 11102 }, { "epoch": 0.20614652294828142, "grad_norm": 0.3122949004173279, "learning_rate": 1.797512465034699e-05, "loss": 0.323, "step": 11104 }, { "epoch": 0.20618365308570008, "grad_norm": 0.31107062101364136, "learning_rate": 1.797442084939371e-05, "loss": 0.2409, "step": 11106 }, { "epoch": 0.2062207832231187, "grad_norm": 0.8616113662719727, "learning_rate": 1.7973716939932445e-05, "loss": 0.4116, "step": 11108 }, { "epoch": 0.20625791336053734, "grad_norm": 0.23543886840343475, "learning_rate": 1.797301292197278e-05, "loss": 0.3517, "step": 11110 }, { "epoch": 0.206295043497956, "grad_norm": 0.45217669010162354, "learning_rate": 1.7972308795524287e-05, "loss": 0.2879, "step": 11112 }, { "epoch": 0.20633217363537462, "grad_norm": 0.32104748487472534, "learning_rate": 1.7971604560596552e-05, "loss": 0.3071, "step": 11114 }, { "epoch": 0.20636930377279325, "grad_norm": 0.37868815660476685, "learning_rate": 1.797090021719916e-05, "loss": 0.3469, "step": 11116 }, { "epoch": 0.2064064339102119, "grad_norm": 0.4879947006702423, "learning_rate": 1.797019576534169e-05, "loss": 0.2358, "step": 11118 }, { "epoch": 0.20644356404763053, "grad_norm": 0.430711954832077, "learning_rate": 1.796949120503373e-05, "loss": 0.3454, "step": 11120 }, { "epoch": 0.2064806941850492, "grad_norm": 0.2504064738750458, "learning_rate": 1.7968786536284865e-05, "loss": 0.4941, "step": 11122 }, { "epoch": 0.20651782432246782, "grad_norm": 0.3445931375026703, "learning_rate": 1.7968081759104686e-05, "loss": 0.358, "step": 11124 }, { "epoch": 0.20655495445988645, "grad_norm": 0.5050252079963684, "learning_rate": 1.796737687350278e-05, "loss": 0.286, "step": 11126 }, { "epoch": 0.2065920845973051, "grad_norm": 0.297049343585968, "learning_rate": 1.796667187948874e-05, "loss": 0.3362, "step": 11128 }, { "epoch": 0.20662921473472373, "grad_norm": 0.32343947887420654, "learning_rate": 1.7965966777072153e-05, "loss": 0.3937, "step": 11130 }, { "epoch": 0.20666634487214236, "grad_norm": 0.41604095697402954, "learning_rate": 1.796526156626263e-05, "loss": 0.2948, "step": 11132 }, { "epoch": 0.20670347500956102, "grad_norm": 0.47810590267181396, "learning_rate": 1.7964556247069747e-05, "loss": 0.1511, "step": 11134 }, { "epoch": 0.20674060514697964, "grad_norm": 0.33088523149490356, "learning_rate": 1.7963850819503115e-05, "loss": 0.1819, "step": 11136 }, { "epoch": 0.2067777352843983, "grad_norm": 0.28317898511886597, "learning_rate": 1.7963145283572327e-05, "loss": 0.339, "step": 11138 }, { "epoch": 0.20681486542181693, "grad_norm": 0.26996004581451416, "learning_rate": 1.796243963928698e-05, "loss": 0.2007, "step": 11140 }, { "epoch": 0.20685199555923556, "grad_norm": 0.4394034445285797, "learning_rate": 1.7961733886656682e-05, "loss": 0.3707, "step": 11142 }, { "epoch": 0.2068891256966542, "grad_norm": 0.34104758501052856, "learning_rate": 1.7961028025691037e-05, "loss": 0.2973, "step": 11144 }, { "epoch": 0.20692625583407284, "grad_norm": 0.41780200600624084, "learning_rate": 1.7960322056399643e-05, "loss": 0.2402, "step": 11146 }, { "epoch": 0.20696338597149147, "grad_norm": 0.2939709722995758, "learning_rate": 1.7959615978792115e-05, "loss": 0.293, "step": 11148 }, { "epoch": 0.20700051610891013, "grad_norm": 0.23490868508815765, "learning_rate": 1.7958909792878048e-05, "loss": 0.2134, "step": 11150 }, { "epoch": 0.20703764624632875, "grad_norm": 0.34482479095458984, "learning_rate": 1.7958203498667065e-05, "loss": 0.3631, "step": 11152 }, { "epoch": 0.20707477638374738, "grad_norm": 0.3382614254951477, "learning_rate": 1.7957497096168767e-05, "loss": 0.4784, "step": 11154 }, { "epoch": 0.20711190652116604, "grad_norm": 0.34633511304855347, "learning_rate": 1.7956790585392764e-05, "loss": 0.3552, "step": 11156 }, { "epoch": 0.20714903665858467, "grad_norm": 0.394165575504303, "learning_rate": 1.7956083966348684e-05, "loss": 0.364, "step": 11158 }, { "epoch": 0.20718616679600332, "grad_norm": 0.2904904782772064, "learning_rate": 1.7955377239046126e-05, "loss": 0.2963, "step": 11160 }, { "epoch": 0.20722329693342195, "grad_norm": 0.26285892724990845, "learning_rate": 1.7954670403494713e-05, "loss": 0.2799, "step": 11162 }, { "epoch": 0.20726042707084058, "grad_norm": 0.34330615401268005, "learning_rate": 1.7953963459704066e-05, "loss": 0.2612, "step": 11164 }, { "epoch": 0.20729755720825924, "grad_norm": 0.5931466221809387, "learning_rate": 1.7953256407683797e-05, "loss": 0.2651, "step": 11166 }, { "epoch": 0.20733468734567786, "grad_norm": 0.34845098853111267, "learning_rate": 1.7952549247443537e-05, "loss": 0.3494, "step": 11168 }, { "epoch": 0.2073718174830965, "grad_norm": 0.48305806517601013, "learning_rate": 1.79518419789929e-05, "loss": 0.45, "step": 11170 }, { "epoch": 0.20740894762051515, "grad_norm": 0.3579391837120056, "learning_rate": 1.7951134602341512e-05, "loss": 0.4734, "step": 11172 }, { "epoch": 0.20744607775793378, "grad_norm": 0.24750709533691406, "learning_rate": 1.7950427117498995e-05, "loss": 0.3436, "step": 11174 }, { "epoch": 0.20748320789535243, "grad_norm": 1.1222401857376099, "learning_rate": 1.7949719524474982e-05, "loss": 0.3056, "step": 11176 }, { "epoch": 0.20752033803277106, "grad_norm": 0.27495741844177246, "learning_rate": 1.7949011823279098e-05, "loss": 0.2536, "step": 11178 }, { "epoch": 0.2075574681701897, "grad_norm": 0.40407535433769226, "learning_rate": 1.7948304013920976e-05, "loss": 0.2484, "step": 11180 }, { "epoch": 0.20759459830760835, "grad_norm": 0.4311932921409607, "learning_rate": 1.7947596096410242e-05, "loss": 0.4077, "step": 11182 }, { "epoch": 0.20763172844502698, "grad_norm": 0.32951274514198303, "learning_rate": 1.794688807075653e-05, "loss": 0.297, "step": 11184 }, { "epoch": 0.2076688585824456, "grad_norm": 0.4299672245979309, "learning_rate": 1.7946179936969476e-05, "loss": 0.2706, "step": 11186 }, { "epoch": 0.20770598871986426, "grad_norm": 0.47778764367103577, "learning_rate": 1.7945471695058713e-05, "loss": 0.3769, "step": 11188 }, { "epoch": 0.2077431188572829, "grad_norm": 0.4164954721927643, "learning_rate": 1.794476334503388e-05, "loss": 0.4864, "step": 11190 }, { "epoch": 0.20778024899470152, "grad_norm": 0.4768127202987671, "learning_rate": 1.794405488690462e-05, "loss": 0.3166, "step": 11192 }, { "epoch": 0.20781737913212017, "grad_norm": 0.46884313225746155, "learning_rate": 1.7943346320680562e-05, "loss": 0.3376, "step": 11194 }, { "epoch": 0.2078545092695388, "grad_norm": 0.5495001673698425, "learning_rate": 1.7942637646371358e-05, "loss": 0.4091, "step": 11196 }, { "epoch": 0.20789163940695746, "grad_norm": 0.2996853291988373, "learning_rate": 1.7941928863986645e-05, "loss": 0.3333, "step": 11198 }, { "epoch": 0.20792876954437609, "grad_norm": 0.42002248764038086, "learning_rate": 1.794121997353607e-05, "loss": 0.2899, "step": 11200 }, { "epoch": 0.2079658996817947, "grad_norm": 0.39984187483787537, "learning_rate": 1.7940510975029278e-05, "loss": 0.3451, "step": 11202 }, { "epoch": 0.20800302981921337, "grad_norm": 0.435496985912323, "learning_rate": 1.793980186847591e-05, "loss": 0.2027, "step": 11204 }, { "epoch": 0.208040159956632, "grad_norm": 0.4816204607486725, "learning_rate": 1.7939092653885627e-05, "loss": 0.527, "step": 11206 }, { "epoch": 0.20807729009405063, "grad_norm": 0.38474929332733154, "learning_rate": 1.793838333126807e-05, "loss": 0.1692, "step": 11208 }, { "epoch": 0.20811442023146928, "grad_norm": 0.39252087473869324, "learning_rate": 1.7937673900632903e-05, "loss": 0.3937, "step": 11210 }, { "epoch": 0.2081515503688879, "grad_norm": 0.35301506519317627, "learning_rate": 1.7936964361989758e-05, "loss": 0.2033, "step": 11212 }, { "epoch": 0.20818868050630657, "grad_norm": 0.2924462556838989, "learning_rate": 1.793625471534831e-05, "loss": 0.3416, "step": 11214 }, { "epoch": 0.2082258106437252, "grad_norm": 0.37668949365615845, "learning_rate": 1.7935544960718204e-05, "loss": 0.1794, "step": 11216 }, { "epoch": 0.20826294078114382, "grad_norm": 0.2842382490634918, "learning_rate": 1.79348350981091e-05, "loss": 0.3269, "step": 11218 }, { "epoch": 0.20830007091856248, "grad_norm": 0.27826449275016785, "learning_rate": 1.7934125127530663e-05, "loss": 0.3411, "step": 11220 }, { "epoch": 0.2083372010559811, "grad_norm": 0.4437578320503235, "learning_rate": 1.7933415048992543e-05, "loss": 0.3274, "step": 11222 }, { "epoch": 0.20837433119339974, "grad_norm": 0.395443856716156, "learning_rate": 1.793270486250441e-05, "loss": 0.3947, "step": 11224 }, { "epoch": 0.2084114613308184, "grad_norm": 0.5515168905258179, "learning_rate": 1.7931994568075925e-05, "loss": 0.1667, "step": 11226 }, { "epoch": 0.20844859146823702, "grad_norm": 0.2250809669494629, "learning_rate": 1.7931284165716753e-05, "loss": 0.1925, "step": 11228 }, { "epoch": 0.20848572160565565, "grad_norm": 0.46520328521728516, "learning_rate": 1.793057365543656e-05, "loss": 0.2073, "step": 11230 }, { "epoch": 0.2085228517430743, "grad_norm": 0.22988620400428772, "learning_rate": 1.7929863037245012e-05, "loss": 0.295, "step": 11232 }, { "epoch": 0.20855998188049293, "grad_norm": 0.27971047163009644, "learning_rate": 1.7929152311151786e-05, "loss": 0.406, "step": 11234 }, { "epoch": 0.2085971120179116, "grad_norm": 0.34872183203697205, "learning_rate": 1.7928441477166543e-05, "loss": 0.3845, "step": 11236 }, { "epoch": 0.20863424215533022, "grad_norm": 0.37287500500679016, "learning_rate": 1.792773053529896e-05, "loss": 0.2862, "step": 11238 }, { "epoch": 0.20867137229274885, "grad_norm": 0.2915292978286743, "learning_rate": 1.7927019485558715e-05, "loss": 0.2374, "step": 11240 }, { "epoch": 0.2087085024301675, "grad_norm": 0.4057576358318329, "learning_rate": 1.7926308327955474e-05, "loss": 0.3982, "step": 11242 }, { "epoch": 0.20874563256758613, "grad_norm": 0.49636590480804443, "learning_rate": 1.7925597062498924e-05, "loss": 0.3781, "step": 11244 }, { "epoch": 0.20878276270500476, "grad_norm": 0.324169397354126, "learning_rate": 1.7924885689198733e-05, "loss": 0.3575, "step": 11246 }, { "epoch": 0.20881989284242342, "grad_norm": 0.40219929814338684, "learning_rate": 1.7924174208064586e-05, "loss": 0.2937, "step": 11248 }, { "epoch": 0.20885702297984204, "grad_norm": 0.3900527060031891, "learning_rate": 1.7923462619106168e-05, "loss": 0.3158, "step": 11250 }, { "epoch": 0.2088941531172607, "grad_norm": 0.2828623354434967, "learning_rate": 1.7922750922333154e-05, "loss": 0.3131, "step": 11252 }, { "epoch": 0.20893128325467933, "grad_norm": 0.30541741847991943, "learning_rate": 1.792203911775523e-05, "loss": 0.2422, "step": 11254 }, { "epoch": 0.20896841339209796, "grad_norm": 0.3387317657470703, "learning_rate": 1.7921327205382086e-05, "loss": 0.2074, "step": 11256 }, { "epoch": 0.2090055435295166, "grad_norm": 0.4591515064239502, "learning_rate": 1.7920615185223402e-05, "loss": 0.4596, "step": 11258 }, { "epoch": 0.20904267366693524, "grad_norm": 0.368550181388855, "learning_rate": 1.7919903057288873e-05, "loss": 0.2687, "step": 11260 }, { "epoch": 0.20907980380435387, "grad_norm": 0.4759654402732849, "learning_rate": 1.7919190821588185e-05, "loss": 0.516, "step": 11262 }, { "epoch": 0.20911693394177253, "grad_norm": 0.3504283130168915, "learning_rate": 1.7918478478131035e-05, "loss": 0.1249, "step": 11264 }, { "epoch": 0.20915406407919115, "grad_norm": 0.30504560470581055, "learning_rate": 1.7917766026927106e-05, "loss": 0.1936, "step": 11266 }, { "epoch": 0.20919119421660978, "grad_norm": 0.4589231312274933, "learning_rate": 1.79170534679861e-05, "loss": 0.3473, "step": 11268 }, { "epoch": 0.20922832435402844, "grad_norm": 0.29722461104393005, "learning_rate": 1.791634080131771e-05, "loss": 0.3495, "step": 11270 }, { "epoch": 0.20926545449144707, "grad_norm": 0.316755086183548, "learning_rate": 1.7915628026931634e-05, "loss": 0.2927, "step": 11272 }, { "epoch": 0.20930258462886572, "grad_norm": 0.32517385482788086, "learning_rate": 1.7914915144837573e-05, "loss": 0.3272, "step": 11274 }, { "epoch": 0.20933971476628435, "grad_norm": 0.24355337023735046, "learning_rate": 1.7914202155045225e-05, "loss": 0.344, "step": 11276 }, { "epoch": 0.20937684490370298, "grad_norm": 0.25505974888801575, "learning_rate": 1.791348905756429e-05, "loss": 0.381, "step": 11278 }, { "epoch": 0.20941397504112164, "grad_norm": 0.3841293156147003, "learning_rate": 1.7912775852404468e-05, "loss": 0.4498, "step": 11280 }, { "epoch": 0.20945110517854026, "grad_norm": 0.4953913390636444, "learning_rate": 1.7912062539575475e-05, "loss": 0.3866, "step": 11282 }, { "epoch": 0.2094882353159589, "grad_norm": 0.40921443700790405, "learning_rate": 1.7911349119087005e-05, "loss": 0.257, "step": 11284 }, { "epoch": 0.20952536545337755, "grad_norm": 0.30752265453338623, "learning_rate": 1.791063559094877e-05, "loss": 0.2584, "step": 11286 }, { "epoch": 0.20956249559079618, "grad_norm": 0.43422332406044006, "learning_rate": 1.7909921955170486e-05, "loss": 0.4647, "step": 11288 }, { "epoch": 0.20959962572821483, "grad_norm": 0.31949397921562195, "learning_rate": 1.7909208211761852e-05, "loss": 0.3347, "step": 11290 }, { "epoch": 0.20963675586563346, "grad_norm": 0.3028753697872162, "learning_rate": 1.7908494360732586e-05, "loss": 0.2717, "step": 11292 }, { "epoch": 0.2096738860030521, "grad_norm": 0.3990941047668457, "learning_rate": 1.7907780402092405e-05, "loss": 0.3538, "step": 11294 }, { "epoch": 0.20971101614047075, "grad_norm": 0.5162336826324463, "learning_rate": 1.7907066335851017e-05, "loss": 0.1953, "step": 11296 }, { "epoch": 0.20974814627788937, "grad_norm": 0.2976301908493042, "learning_rate": 1.790635216201814e-05, "loss": 0.234, "step": 11298 }, { "epoch": 0.209785276415308, "grad_norm": 0.2512798011302948, "learning_rate": 1.7905637880603487e-05, "loss": 0.4178, "step": 11300 }, { "epoch": 0.20982240655272666, "grad_norm": 0.3172019422054291, "learning_rate": 1.790492349161679e-05, "loss": 0.4056, "step": 11302 }, { "epoch": 0.2098595366901453, "grad_norm": 0.3491508960723877, "learning_rate": 1.7904208995067756e-05, "loss": 0.2989, "step": 11304 }, { "epoch": 0.20989666682756392, "grad_norm": 0.3441779315471649, "learning_rate": 1.7903494390966118e-05, "loss": 0.2475, "step": 11306 }, { "epoch": 0.20993379696498257, "grad_norm": 0.3297508656978607, "learning_rate": 1.7902779679321593e-05, "loss": 0.2369, "step": 11308 }, { "epoch": 0.2099709271024012, "grad_norm": 0.31899791955947876, "learning_rate": 1.790206486014391e-05, "loss": 0.3274, "step": 11310 }, { "epoch": 0.21000805723981986, "grad_norm": 0.38884031772613525, "learning_rate": 1.7901349933442795e-05, "loss": 0.3682, "step": 11312 }, { "epoch": 0.21004518737723848, "grad_norm": 0.2976062595844269, "learning_rate": 1.790063489922797e-05, "loss": 0.2472, "step": 11314 }, { "epoch": 0.2100823175146571, "grad_norm": 0.4382602274417877, "learning_rate": 1.789991975750917e-05, "loss": 0.3804, "step": 11316 }, { "epoch": 0.21011944765207577, "grad_norm": 0.40436607599258423, "learning_rate": 1.7899204508296127e-05, "loss": 0.4625, "step": 11318 }, { "epoch": 0.2101565777894944, "grad_norm": 0.39966943860054016, "learning_rate": 1.789848915159857e-05, "loss": 0.365, "step": 11320 }, { "epoch": 0.21019370792691303, "grad_norm": 0.2898668348789215, "learning_rate": 1.7897773687426237e-05, "loss": 0.3255, "step": 11322 }, { "epoch": 0.21023083806433168, "grad_norm": 0.46234452724456787, "learning_rate": 1.789705811578886e-05, "loss": 0.3227, "step": 11324 }, { "epoch": 0.2102679682017503, "grad_norm": 0.3670711815357208, "learning_rate": 1.7896342436696176e-05, "loss": 0.4924, "step": 11326 }, { "epoch": 0.21030509833916897, "grad_norm": 0.3221372663974762, "learning_rate": 1.7895626650157923e-05, "loss": 0.3204, "step": 11328 }, { "epoch": 0.2103422284765876, "grad_norm": 0.3623506724834442, "learning_rate": 1.789491075618384e-05, "loss": 0.4174, "step": 11330 }, { "epoch": 0.21037935861400622, "grad_norm": 0.28841155767440796, "learning_rate": 1.7894194754783672e-05, "loss": 0.4641, "step": 11332 }, { "epoch": 0.21041648875142488, "grad_norm": 0.4555869996547699, "learning_rate": 1.789347864596716e-05, "loss": 0.3762, "step": 11334 }, { "epoch": 0.2104536188888435, "grad_norm": 0.30810022354125977, "learning_rate": 1.7892762429744045e-05, "loss": 0.1977, "step": 11336 }, { "epoch": 0.21049074902626214, "grad_norm": 0.3749142289161682, "learning_rate": 1.7892046106124077e-05, "loss": 0.3887, "step": 11338 }, { "epoch": 0.2105278791636808, "grad_norm": 0.3263684809207916, "learning_rate": 1.7891329675116998e-05, "loss": 0.4671, "step": 11340 }, { "epoch": 0.21056500930109942, "grad_norm": 0.3980671465396881, "learning_rate": 1.789061313673256e-05, "loss": 0.3209, "step": 11342 }, { "epoch": 0.21060213943851805, "grad_norm": 0.37517815828323364, "learning_rate": 1.7889896490980515e-05, "loss": 0.3681, "step": 11344 }, { "epoch": 0.2106392695759367, "grad_norm": 0.30738428235054016, "learning_rate": 1.788917973787061e-05, "loss": 0.2078, "step": 11346 }, { "epoch": 0.21067639971335533, "grad_norm": 0.3333374261856079, "learning_rate": 1.78884628774126e-05, "loss": 0.2817, "step": 11348 }, { "epoch": 0.210713529850774, "grad_norm": 0.2693551480770111, "learning_rate": 1.788774590961624e-05, "loss": 0.2098, "step": 11350 }, { "epoch": 0.21075065998819262, "grad_norm": 0.27048808336257935, "learning_rate": 1.788702883449128e-05, "loss": 0.379, "step": 11352 }, { "epoch": 0.21078779012561125, "grad_norm": 0.2991126477718353, "learning_rate": 1.788631165204749e-05, "loss": 0.4559, "step": 11354 }, { "epoch": 0.2108249202630299, "grad_norm": 0.3492969572544098, "learning_rate": 1.7885594362294613e-05, "loss": 0.2681, "step": 11356 }, { "epoch": 0.21086205040044853, "grad_norm": 0.36768993735313416, "learning_rate": 1.788487696524242e-05, "loss": 0.5137, "step": 11358 }, { "epoch": 0.21089918053786716, "grad_norm": 0.4367530643939972, "learning_rate": 1.788415946090067e-05, "loss": 0.3998, "step": 11360 }, { "epoch": 0.21093631067528582, "grad_norm": 0.3286070227622986, "learning_rate": 1.7883441849279124e-05, "loss": 0.299, "step": 11362 }, { "epoch": 0.21097344081270444, "grad_norm": 0.34484922885894775, "learning_rate": 1.788272413038755e-05, "loss": 0.1799, "step": 11364 }, { "epoch": 0.2110105709501231, "grad_norm": 0.3885658085346222, "learning_rate": 1.788200630423571e-05, "loss": 0.3552, "step": 11366 }, { "epoch": 0.21104770108754173, "grad_norm": 0.447185754776001, "learning_rate": 1.7881288370833374e-05, "loss": 0.3397, "step": 11368 }, { "epoch": 0.21108483122496036, "grad_norm": 0.5488476753234863, "learning_rate": 1.788057033019031e-05, "loss": 0.3098, "step": 11370 }, { "epoch": 0.211121961362379, "grad_norm": 0.20774076879024506, "learning_rate": 1.787985218231629e-05, "loss": 0.2344, "step": 11372 }, { "epoch": 0.21115909149979764, "grad_norm": 0.3104729950428009, "learning_rate": 1.7879133927221085e-05, "loss": 0.3938, "step": 11374 }, { "epoch": 0.21119622163721627, "grad_norm": 0.33838367462158203, "learning_rate": 1.7878415564914468e-05, "loss": 0.2054, "step": 11376 }, { "epoch": 0.21123335177463493, "grad_norm": 0.36007073521614075, "learning_rate": 1.7877697095406215e-05, "loss": 0.3108, "step": 11378 }, { "epoch": 0.21127048191205355, "grad_norm": 0.2878221869468689, "learning_rate": 1.78769785187061e-05, "loss": 0.2944, "step": 11380 }, { "epoch": 0.21130761204947218, "grad_norm": 0.5970224142074585, "learning_rate": 1.7876259834823897e-05, "loss": 0.4931, "step": 11382 }, { "epoch": 0.21134474218689084, "grad_norm": 0.3691718280315399, "learning_rate": 1.7875541043769395e-05, "loss": 0.3745, "step": 11384 }, { "epoch": 0.21138187232430947, "grad_norm": 0.42721423506736755, "learning_rate": 1.7874822145552367e-05, "loss": 0.1797, "step": 11386 }, { "epoch": 0.21141900246172812, "grad_norm": 0.3286542594432831, "learning_rate": 1.7874103140182598e-05, "loss": 0.22, "step": 11388 }, { "epoch": 0.21145613259914675, "grad_norm": 0.3122023046016693, "learning_rate": 1.787338402766987e-05, "loss": 0.2754, "step": 11390 }, { "epoch": 0.21149326273656538, "grad_norm": 0.2577419579029083, "learning_rate": 1.7872664808023974e-05, "loss": 0.4035, "step": 11392 }, { "epoch": 0.21153039287398404, "grad_norm": 0.48411402106285095, "learning_rate": 1.7871945481254685e-05, "loss": 0.2051, "step": 11394 }, { "epoch": 0.21156752301140266, "grad_norm": 0.3355330228805542, "learning_rate": 1.7871226047371802e-05, "loss": 0.3139, "step": 11396 }, { "epoch": 0.2116046531488213, "grad_norm": 0.33480045199394226, "learning_rate": 1.7870506506385106e-05, "loss": 0.3558, "step": 11398 }, { "epoch": 0.21164178328623995, "grad_norm": 0.36884260177612305, "learning_rate": 1.7869786858304392e-05, "loss": 0.2811, "step": 11400 }, { "epoch": 0.21167891342365858, "grad_norm": 0.42031824588775635, "learning_rate": 1.7869067103139452e-05, "loss": 0.369, "step": 11402 }, { "epoch": 0.21171604356107723, "grad_norm": 0.4800170063972473, "learning_rate": 1.786834724090008e-05, "loss": 0.4371, "step": 11404 }, { "epoch": 0.21175317369849586, "grad_norm": 0.41438165307044983, "learning_rate": 1.786762727159607e-05, "loss": 0.4753, "step": 11406 }, { "epoch": 0.2117903038359145, "grad_norm": 0.37987223267555237, "learning_rate": 1.786690719523722e-05, "loss": 0.2602, "step": 11408 }, { "epoch": 0.21182743397333315, "grad_norm": 0.2816537618637085, "learning_rate": 1.7866187011833328e-05, "loss": 0.2211, "step": 11410 }, { "epoch": 0.21186456411075177, "grad_norm": 0.4368533790111542, "learning_rate": 1.786546672139419e-05, "loss": 0.1751, "step": 11412 }, { "epoch": 0.2119016942481704, "grad_norm": 0.274362176656723, "learning_rate": 1.786474632392961e-05, "loss": 0.4659, "step": 11414 }, { "epoch": 0.21193882438558906, "grad_norm": 0.2870135009288788, "learning_rate": 1.7864025819449393e-05, "loss": 0.1647, "step": 11416 }, { "epoch": 0.2119759545230077, "grad_norm": 0.3261653184890747, "learning_rate": 1.786330520796334e-05, "loss": 0.1533, "step": 11418 }, { "epoch": 0.21201308466042632, "grad_norm": 0.5299916863441467, "learning_rate": 1.7862584489481252e-05, "loss": 0.379, "step": 11420 }, { "epoch": 0.21205021479784497, "grad_norm": 0.26870614290237427, "learning_rate": 1.786186366401294e-05, "loss": 0.5631, "step": 11422 }, { "epoch": 0.2120873449352636, "grad_norm": 0.25638487935066223, "learning_rate": 1.7861142731568217e-05, "loss": 0.2603, "step": 11424 }, { "epoch": 0.21212447507268226, "grad_norm": 0.3180806636810303, "learning_rate": 1.7860421692156883e-05, "loss": 0.3897, "step": 11426 }, { "epoch": 0.21216160521010088, "grad_norm": 0.3051069974899292, "learning_rate": 1.785970054578876e-05, "loss": 0.3624, "step": 11428 }, { "epoch": 0.2121987353475195, "grad_norm": 0.317199170589447, "learning_rate": 1.7858979292473652e-05, "loss": 0.3877, "step": 11430 }, { "epoch": 0.21223586548493817, "grad_norm": 0.4193689823150635, "learning_rate": 1.785825793222138e-05, "loss": 0.3963, "step": 11432 }, { "epoch": 0.2122729956223568, "grad_norm": 0.397788405418396, "learning_rate": 1.7857536465041754e-05, "loss": 0.1269, "step": 11434 }, { "epoch": 0.21231012575977543, "grad_norm": 0.420858234167099, "learning_rate": 1.7856814890944592e-05, "loss": 0.3157, "step": 11436 }, { "epoch": 0.21234725589719408, "grad_norm": 0.4014202356338501, "learning_rate": 1.7856093209939712e-05, "loss": 0.5307, "step": 11438 }, { "epoch": 0.2123843860346127, "grad_norm": 0.4186968207359314, "learning_rate": 1.7855371422036937e-05, "loss": 0.3368, "step": 11440 }, { "epoch": 0.21242151617203137, "grad_norm": 0.4789351224899292, "learning_rate": 1.7854649527246084e-05, "loss": 0.2259, "step": 11442 }, { "epoch": 0.21245864630945, "grad_norm": 0.44176778197288513, "learning_rate": 1.785392752557698e-05, "loss": 0.402, "step": 11444 }, { "epoch": 0.21249577644686862, "grad_norm": 0.3761562705039978, "learning_rate": 1.7853205417039447e-05, "loss": 0.1352, "step": 11446 }, { "epoch": 0.21253290658428728, "grad_norm": 0.3794058561325073, "learning_rate": 1.7852483201643314e-05, "loss": 0.195, "step": 11448 }, { "epoch": 0.2125700367217059, "grad_norm": 0.28392860293388367, "learning_rate": 1.7851760879398403e-05, "loss": 0.3234, "step": 11450 }, { "epoch": 0.21260716685912454, "grad_norm": 0.4184112548828125, "learning_rate": 1.7851038450314546e-05, "loss": 0.2738, "step": 11452 }, { "epoch": 0.2126442969965432, "grad_norm": 0.36601537466049194, "learning_rate": 1.785031591440157e-05, "loss": 0.2953, "step": 11454 }, { "epoch": 0.21268142713396182, "grad_norm": 0.30151498317718506, "learning_rate": 1.784959327166931e-05, "loss": 0.1882, "step": 11456 }, { "epoch": 0.21271855727138045, "grad_norm": 0.29366907477378845, "learning_rate": 1.7848870522127598e-05, "loss": 0.2897, "step": 11458 }, { "epoch": 0.2127556874087991, "grad_norm": 0.5849746465682983, "learning_rate": 1.7848147665786272e-05, "loss": 0.343, "step": 11460 }, { "epoch": 0.21279281754621773, "grad_norm": 0.3080577552318573, "learning_rate": 1.7847424702655162e-05, "loss": 0.3845, "step": 11462 }, { "epoch": 0.2128299476836364, "grad_norm": 0.5569397211074829, "learning_rate": 1.7846701632744104e-05, "loss": 0.5116, "step": 11464 }, { "epoch": 0.21286707782105502, "grad_norm": 0.35806289315223694, "learning_rate": 1.7845978456062944e-05, "loss": 0.3764, "step": 11466 }, { "epoch": 0.21290420795847365, "grad_norm": 0.33247724175453186, "learning_rate": 1.7845255172621517e-05, "loss": 0.2466, "step": 11468 }, { "epoch": 0.2129413380958923, "grad_norm": 0.4600261449813843, "learning_rate": 1.784453178242967e-05, "loss": 0.4468, "step": 11470 }, { "epoch": 0.21297846823331093, "grad_norm": 0.49692943692207336, "learning_rate": 1.7843808285497238e-05, "loss": 0.2575, "step": 11472 }, { "epoch": 0.21301559837072956, "grad_norm": 0.3204275369644165, "learning_rate": 1.784308468183407e-05, "loss": 0.2747, "step": 11474 }, { "epoch": 0.21305272850814821, "grad_norm": 0.35899871587753296, "learning_rate": 1.7842360971450013e-05, "loss": 0.2242, "step": 11476 }, { "epoch": 0.21308985864556684, "grad_norm": 0.3258490562438965, "learning_rate": 1.7841637154354916e-05, "loss": 0.3945, "step": 11478 }, { "epoch": 0.2131269887829855, "grad_norm": 0.40194201469421387, "learning_rate": 1.7840913230558624e-05, "loss": 0.2938, "step": 11480 }, { "epoch": 0.21316411892040413, "grad_norm": 0.48156869411468506, "learning_rate": 1.784018920007099e-05, "loss": 0.3341, "step": 11482 }, { "epoch": 0.21320124905782276, "grad_norm": 0.48903074860572815, "learning_rate": 1.7839465062901865e-05, "loss": 0.4978, "step": 11484 }, { "epoch": 0.2132383791952414, "grad_norm": 0.46952056884765625, "learning_rate": 1.7838740819061102e-05, "loss": 0.1329, "step": 11486 }, { "epoch": 0.21327550933266004, "grad_norm": 0.4599023461341858, "learning_rate": 1.7838016468558557e-05, "loss": 0.297, "step": 11488 }, { "epoch": 0.21331263947007867, "grad_norm": 0.40903759002685547, "learning_rate": 1.7837292011404083e-05, "loss": 0.2725, "step": 11490 }, { "epoch": 0.21334976960749732, "grad_norm": 0.391618937253952, "learning_rate": 1.7836567447607542e-05, "loss": 0.3916, "step": 11492 }, { "epoch": 0.21338689974491595, "grad_norm": 2.1403417587280273, "learning_rate": 1.7835842777178792e-05, "loss": 0.3439, "step": 11494 }, { "epoch": 0.21342402988233458, "grad_norm": 0.2944311499595642, "learning_rate": 1.7835118000127693e-05, "loss": 0.4008, "step": 11496 }, { "epoch": 0.21346116001975324, "grad_norm": 0.42583248019218445, "learning_rate": 1.7834393116464107e-05, "loss": 0.2525, "step": 11498 }, { "epoch": 0.21349829015717187, "grad_norm": 0.43102923035621643, "learning_rate": 1.7833668126197895e-05, "loss": 0.2459, "step": 11500 }, { "epoch": 0.21353542029459052, "grad_norm": 0.31348541378974915, "learning_rate": 1.7832943029338925e-05, "loss": 0.1969, "step": 11502 }, { "epoch": 0.21357255043200915, "grad_norm": 0.4670495092868805, "learning_rate": 1.7832217825897065e-05, "loss": 0.2661, "step": 11504 }, { "epoch": 0.21360968056942778, "grad_norm": 0.44927075505256653, "learning_rate": 1.783149251588218e-05, "loss": 0.4168, "step": 11506 }, { "epoch": 0.21364681070684644, "grad_norm": 0.3722849488258362, "learning_rate": 1.7830767099304135e-05, "loss": 0.3584, "step": 11508 }, { "epoch": 0.21368394084426506, "grad_norm": 0.28457188606262207, "learning_rate": 1.7830041576172813e-05, "loss": 0.1731, "step": 11510 }, { "epoch": 0.2137210709816837, "grad_norm": 0.31138989329338074, "learning_rate": 1.7829315946498075e-05, "loss": 0.4616, "step": 11512 }, { "epoch": 0.21375820111910235, "grad_norm": 0.3556770980358124, "learning_rate": 1.7828590210289797e-05, "loss": 0.3717, "step": 11514 }, { "epoch": 0.21379533125652098, "grad_norm": 0.35543492436408997, "learning_rate": 1.7827864367557856e-05, "loss": 0.2778, "step": 11516 }, { "epoch": 0.21383246139393963, "grad_norm": 0.6743049621582031, "learning_rate": 1.7827138418312132e-05, "loss": 0.3329, "step": 11518 }, { "epoch": 0.21386959153135826, "grad_norm": 0.2347886711359024, "learning_rate": 1.7826412362562497e-05, "loss": 0.3199, "step": 11520 }, { "epoch": 0.2139067216687769, "grad_norm": 0.3072112798690796, "learning_rate": 1.7825686200318833e-05, "loss": 0.4713, "step": 11522 }, { "epoch": 0.21394385180619555, "grad_norm": 0.26356202363967896, "learning_rate": 1.782495993159102e-05, "loss": 0.3933, "step": 11524 }, { "epoch": 0.21398098194361417, "grad_norm": 0.30975306034088135, "learning_rate": 1.782423355638894e-05, "loss": 0.3039, "step": 11526 }, { "epoch": 0.2140181120810328, "grad_norm": 0.5246520042419434, "learning_rate": 1.7823507074722477e-05, "loss": 0.3466, "step": 11528 }, { "epoch": 0.21405524221845146, "grad_norm": 0.245462104678154, "learning_rate": 1.782278048660152e-05, "loss": 0.1666, "step": 11530 }, { "epoch": 0.2140923723558701, "grad_norm": 0.5315453410148621, "learning_rate": 1.782205379203595e-05, "loss": 0.3585, "step": 11532 }, { "epoch": 0.21412950249328871, "grad_norm": 0.5651087760925293, "learning_rate": 1.7821326991035656e-05, "loss": 0.2643, "step": 11534 }, { "epoch": 0.21416663263070737, "grad_norm": 0.4269042909145355, "learning_rate": 1.7820600083610534e-05, "loss": 0.6047, "step": 11536 }, { "epoch": 0.214203762768126, "grad_norm": 0.29089438915252686, "learning_rate": 1.7819873069770464e-05, "loss": 0.3744, "step": 11538 }, { "epoch": 0.21424089290554466, "grad_norm": 0.5004473924636841, "learning_rate": 1.781914594952535e-05, "loss": 0.433, "step": 11540 }, { "epoch": 0.21427802304296328, "grad_norm": 0.24751044809818268, "learning_rate": 1.781841872288508e-05, "loss": 0.2993, "step": 11542 }, { "epoch": 0.2143151531803819, "grad_norm": 0.3154331147670746, "learning_rate": 1.781769138985955e-05, "loss": 0.2875, "step": 11544 }, { "epoch": 0.21435228331780057, "grad_norm": 0.43083828687667847, "learning_rate": 1.7816963950458656e-05, "loss": 0.248, "step": 11546 }, { "epoch": 0.2143894134552192, "grad_norm": 0.4418305456638336, "learning_rate": 1.7816236404692294e-05, "loss": 0.335, "step": 11548 }, { "epoch": 0.21442654359263782, "grad_norm": 0.45585572719573975, "learning_rate": 1.7815508752570374e-05, "loss": 0.2524, "step": 11550 }, { "epoch": 0.21446367373005648, "grad_norm": 0.4467269480228424, "learning_rate": 1.7814780994102786e-05, "loss": 0.2737, "step": 11552 }, { "epoch": 0.2145008038674751, "grad_norm": 0.45708996057510376, "learning_rate": 1.7814053129299435e-05, "loss": 0.481, "step": 11554 }, { "epoch": 0.21453793400489377, "grad_norm": 0.3335057497024536, "learning_rate": 1.781332515817023e-05, "loss": 0.4306, "step": 11556 }, { "epoch": 0.2145750641423124, "grad_norm": 0.4115939140319824, "learning_rate": 1.781259708072507e-05, "loss": 0.2511, "step": 11558 }, { "epoch": 0.21461219427973102, "grad_norm": 0.5011863708496094, "learning_rate": 1.7811868896973868e-05, "loss": 0.3784, "step": 11560 }, { "epoch": 0.21464932441714968, "grad_norm": 0.37148159742355347, "learning_rate": 1.781114060692653e-05, "loss": 0.1917, "step": 11562 }, { "epoch": 0.2146864545545683, "grad_norm": 0.44088369607925415, "learning_rate": 1.781041221059296e-05, "loss": 0.174, "step": 11564 }, { "epoch": 0.21472358469198694, "grad_norm": 0.3805510103702545, "learning_rate": 1.780968370798308e-05, "loss": 0.4689, "step": 11566 }, { "epoch": 0.2147607148294056, "grad_norm": 0.3723870515823364, "learning_rate": 1.7808955099106797e-05, "loss": 0.1415, "step": 11568 }, { "epoch": 0.21479784496682422, "grad_norm": 0.3652951717376709, "learning_rate": 1.7808226383974022e-05, "loss": 0.3654, "step": 11570 }, { "epoch": 0.21483497510424285, "grad_norm": 0.42163509130477905, "learning_rate": 1.780749756259468e-05, "loss": 0.3133, "step": 11572 }, { "epoch": 0.2148721052416615, "grad_norm": 0.3045058250427246, "learning_rate": 1.780676863497868e-05, "loss": 0.2281, "step": 11574 }, { "epoch": 0.21490923537908013, "grad_norm": 0.3182167112827301, "learning_rate": 1.780603960113594e-05, "loss": 0.3289, "step": 11576 }, { "epoch": 0.2149463655164988, "grad_norm": 0.38915860652923584, "learning_rate": 1.7805310461076386e-05, "loss": 0.449, "step": 11578 }, { "epoch": 0.21498349565391742, "grad_norm": 0.5140718817710876, "learning_rate": 1.7804581214809934e-05, "loss": 0.4113, "step": 11580 }, { "epoch": 0.21502062579133605, "grad_norm": 0.3171132802963257, "learning_rate": 1.780385186234651e-05, "loss": 0.2031, "step": 11582 }, { "epoch": 0.2150577559287547, "grad_norm": 0.32669177651405334, "learning_rate": 1.7803122403696037e-05, "loss": 0.4019, "step": 11584 }, { "epoch": 0.21509488606617333, "grad_norm": 0.36177048087120056, "learning_rate": 1.7802392838868443e-05, "loss": 0.2707, "step": 11586 }, { "epoch": 0.21513201620359196, "grad_norm": 0.47265127301216125, "learning_rate": 1.7801663167873654e-05, "loss": 0.4945, "step": 11588 }, { "epoch": 0.21516914634101061, "grad_norm": 0.48518139123916626, "learning_rate": 1.7800933390721592e-05, "loss": 0.4166, "step": 11590 }, { "epoch": 0.21520627647842924, "grad_norm": 0.34591609239578247, "learning_rate": 1.7800203507422196e-05, "loss": 0.2916, "step": 11592 }, { "epoch": 0.2152434066158479, "grad_norm": 0.3669845461845398, "learning_rate": 1.7799473517985398e-05, "loss": 0.4412, "step": 11594 }, { "epoch": 0.21528053675326653, "grad_norm": 0.32830438017845154, "learning_rate": 1.7798743422421125e-05, "loss": 0.2918, "step": 11596 }, { "epoch": 0.21531766689068516, "grad_norm": 0.3107183873653412, "learning_rate": 1.7798013220739317e-05, "loss": 0.3959, "step": 11598 }, { "epoch": 0.2153547970281038, "grad_norm": 0.7919156551361084, "learning_rate": 1.7797282912949902e-05, "loss": 0.236, "step": 11600 }, { "epoch": 0.21539192716552244, "grad_norm": 0.3440660834312439, "learning_rate": 1.7796552499062824e-05, "loss": 0.4059, "step": 11602 }, { "epoch": 0.21542905730294107, "grad_norm": 0.3416938781738281, "learning_rate": 1.779582197908802e-05, "loss": 0.151, "step": 11604 }, { "epoch": 0.21546618744035972, "grad_norm": 0.3433080017566681, "learning_rate": 1.7795091353035432e-05, "loss": 0.2142, "step": 11606 }, { "epoch": 0.21550331757777835, "grad_norm": 0.3219144642353058, "learning_rate": 1.7794360620914996e-05, "loss": 0.2532, "step": 11608 }, { "epoch": 0.21554044771519698, "grad_norm": 0.3427276611328125, "learning_rate": 1.779362978273666e-05, "loss": 0.3072, "step": 11610 }, { "epoch": 0.21557757785261564, "grad_norm": 0.29735422134399414, "learning_rate": 1.7792898838510368e-05, "loss": 0.3904, "step": 11612 }, { "epoch": 0.21561470799003427, "grad_norm": 0.3915838599205017, "learning_rate": 1.7792167788246067e-05, "loss": 0.3779, "step": 11614 }, { "epoch": 0.21565183812745292, "grad_norm": 0.434965580701828, "learning_rate": 1.7791436631953696e-05, "loss": 0.4514, "step": 11616 }, { "epoch": 0.21568896826487155, "grad_norm": 0.43974030017852783, "learning_rate": 1.779070536964322e-05, "loss": 0.2888, "step": 11618 }, { "epoch": 0.21572609840229018, "grad_norm": 0.3089367747306824, "learning_rate": 1.7789974001324576e-05, "loss": 0.2847, "step": 11620 }, { "epoch": 0.21576322853970883, "grad_norm": 0.43584901094436646, "learning_rate": 1.7789242527007715e-05, "loss": 0.3013, "step": 11622 }, { "epoch": 0.21580035867712746, "grad_norm": 0.3898523449897766, "learning_rate": 1.77885109467026e-05, "loss": 0.4838, "step": 11624 }, { "epoch": 0.2158374888145461, "grad_norm": 0.4706246852874756, "learning_rate": 1.7787779260419177e-05, "loss": 0.3107, "step": 11626 }, { "epoch": 0.21587461895196475, "grad_norm": 0.2623288929462433, "learning_rate": 1.778704746816741e-05, "loss": 0.4537, "step": 11628 }, { "epoch": 0.21591174908938338, "grad_norm": 0.3656718134880066, "learning_rate": 1.7786315569957246e-05, "loss": 0.3526, "step": 11630 }, { "epoch": 0.21594887922680203, "grad_norm": 0.3711652457714081, "learning_rate": 1.778558356579865e-05, "loss": 0.2342, "step": 11632 }, { "epoch": 0.21598600936422066, "grad_norm": 0.31929171085357666, "learning_rate": 1.7784851455701587e-05, "loss": 0.4484, "step": 11634 }, { "epoch": 0.2160231395016393, "grad_norm": 0.5670583844184875, "learning_rate": 1.7784119239676012e-05, "loss": 0.2593, "step": 11636 }, { "epoch": 0.21606026963905794, "grad_norm": 0.6417336463928223, "learning_rate": 1.7783386917731892e-05, "loss": 0.4084, "step": 11638 }, { "epoch": 0.21609739977647657, "grad_norm": 0.34550225734710693, "learning_rate": 1.7782654489879187e-05, "loss": 0.1716, "step": 11640 }, { "epoch": 0.2161345299138952, "grad_norm": 0.33153343200683594, "learning_rate": 1.7781921956127868e-05, "loss": 0.2753, "step": 11642 }, { "epoch": 0.21617166005131386, "grad_norm": 0.38554567098617554, "learning_rate": 1.77811893164879e-05, "loss": 0.2786, "step": 11644 }, { "epoch": 0.21620879018873249, "grad_norm": 0.45013391971588135, "learning_rate": 1.778045657096925e-05, "loss": 0.5178, "step": 11646 }, { "epoch": 0.21624592032615111, "grad_norm": 0.3294655382633209, "learning_rate": 1.7779723719581893e-05, "loss": 0.2283, "step": 11648 }, { "epoch": 0.21628305046356977, "grad_norm": 0.31702443957328796, "learning_rate": 1.77789907623358e-05, "loss": 0.4256, "step": 11650 }, { "epoch": 0.2163201806009884, "grad_norm": 0.4021613597869873, "learning_rate": 1.7778257699240946e-05, "loss": 0.5154, "step": 11652 }, { "epoch": 0.21635731073840705, "grad_norm": 0.30701741576194763, "learning_rate": 1.77775245303073e-05, "loss": 0.1619, "step": 11654 }, { "epoch": 0.21639444087582568, "grad_norm": 0.27280837297439575, "learning_rate": 1.7776791255544844e-05, "loss": 0.3622, "step": 11656 }, { "epoch": 0.2164315710132443, "grad_norm": 1.4822739362716675, "learning_rate": 1.777605787496355e-05, "loss": 0.3385, "step": 11658 }, { "epoch": 0.21646870115066297, "grad_norm": 0.3658817410469055, "learning_rate": 1.77753243885734e-05, "loss": 0.3216, "step": 11660 }, { "epoch": 0.2165058312880816, "grad_norm": 0.39698219299316406, "learning_rate": 1.7774590796384382e-05, "loss": 0.4169, "step": 11662 }, { "epoch": 0.21654296142550022, "grad_norm": 0.2685529887676239, "learning_rate": 1.7773857098406463e-05, "loss": 0.3702, "step": 11664 }, { "epoch": 0.21658009156291888, "grad_norm": 0.282993346452713, "learning_rate": 1.7773123294649637e-05, "loss": 0.3329, "step": 11666 }, { "epoch": 0.2166172217003375, "grad_norm": 0.30182579159736633, "learning_rate": 1.7772389385123885e-05, "loss": 0.2155, "step": 11668 }, { "epoch": 0.21665435183775617, "grad_norm": 0.3884281516075134, "learning_rate": 1.7771655369839194e-05, "loss": 0.2903, "step": 11670 }, { "epoch": 0.2166914819751748, "grad_norm": 0.3103363513946533, "learning_rate": 1.7770921248805554e-05, "loss": 0.3722, "step": 11672 }, { "epoch": 0.21672861211259342, "grad_norm": 0.6926793456077576, "learning_rate": 1.7770187022032952e-05, "loss": 0.2255, "step": 11674 }, { "epoch": 0.21676574225001208, "grad_norm": 0.39647847414016724, "learning_rate": 1.776945268953138e-05, "loss": 0.3133, "step": 11676 }, { "epoch": 0.2168028723874307, "grad_norm": 0.29309317469596863, "learning_rate": 1.776871825131083e-05, "loss": 0.394, "step": 11678 }, { "epoch": 0.21684000252484933, "grad_norm": 0.4459497332572937, "learning_rate": 1.776798370738129e-05, "loss": 0.4171, "step": 11680 }, { "epoch": 0.216877132662268, "grad_norm": 0.3338577151298523, "learning_rate": 1.7767249057752765e-05, "loss": 0.2979, "step": 11682 }, { "epoch": 0.21691426279968662, "grad_norm": 0.44793543219566345, "learning_rate": 1.7766514302435243e-05, "loss": 0.3787, "step": 11684 }, { "epoch": 0.21695139293710525, "grad_norm": 0.30891355872154236, "learning_rate": 1.7765779441438726e-05, "loss": 0.1643, "step": 11686 }, { "epoch": 0.2169885230745239, "grad_norm": 0.5312884449958801, "learning_rate": 1.7765044474773213e-05, "loss": 0.2262, "step": 11688 }, { "epoch": 0.21702565321194253, "grad_norm": 0.3190726637840271, "learning_rate": 1.77643094024487e-05, "loss": 0.2582, "step": 11690 }, { "epoch": 0.2170627833493612, "grad_norm": 0.4413638114929199, "learning_rate": 1.7763574224475192e-05, "loss": 0.2651, "step": 11692 }, { "epoch": 0.21709991348677982, "grad_norm": 0.32466989755630493, "learning_rate": 1.77628389408627e-05, "loss": 0.2543, "step": 11694 }, { "epoch": 0.21713704362419844, "grad_norm": 0.3333269953727722, "learning_rate": 1.7762103551621214e-05, "loss": 0.5586, "step": 11696 }, { "epoch": 0.2171741737616171, "grad_norm": 0.33949244022369385, "learning_rate": 1.7761368056760753e-05, "loss": 0.4606, "step": 11698 }, { "epoch": 0.21721130389903573, "grad_norm": 0.3035808801651001, "learning_rate": 1.7760632456291324e-05, "loss": 0.4636, "step": 11700 }, { "epoch": 0.21724843403645436, "grad_norm": 0.2670368552207947, "learning_rate": 1.7759896750222927e-05, "loss": 0.1996, "step": 11702 }, { "epoch": 0.217285564173873, "grad_norm": 0.33414679765701294, "learning_rate": 1.7759160938565586e-05, "loss": 0.4058, "step": 11704 }, { "epoch": 0.21732269431129164, "grad_norm": 0.32663220167160034, "learning_rate": 1.77584250213293e-05, "loss": 0.3148, "step": 11706 }, { "epoch": 0.2173598244487103, "grad_norm": 0.28434255719184875, "learning_rate": 1.775768899852409e-05, "loss": 0.2994, "step": 11708 }, { "epoch": 0.21739695458612893, "grad_norm": 0.43545177578926086, "learning_rate": 1.775695287015997e-05, "loss": 0.3458, "step": 11710 }, { "epoch": 0.21743408472354755, "grad_norm": 0.5098344087600708, "learning_rate": 1.775621663624696e-05, "loss": 0.445, "step": 11712 }, { "epoch": 0.2174712148609662, "grad_norm": 0.3793988525867462, "learning_rate": 1.7755480296795068e-05, "loss": 0.3367, "step": 11714 }, { "epoch": 0.21750834499838484, "grad_norm": 0.24274712800979614, "learning_rate": 1.7754743851814322e-05, "loss": 0.345, "step": 11716 }, { "epoch": 0.21754547513580347, "grad_norm": 0.28071388602256775, "learning_rate": 1.775400730131474e-05, "loss": 0.3141, "step": 11718 }, { "epoch": 0.21758260527322212, "grad_norm": 0.3397105932235718, "learning_rate": 1.7753270645306345e-05, "loss": 0.3611, "step": 11720 }, { "epoch": 0.21761973541064075, "grad_norm": 0.3186165988445282, "learning_rate": 1.775253388379916e-05, "loss": 0.1774, "step": 11722 }, { "epoch": 0.21765686554805938, "grad_norm": 0.42826539278030396, "learning_rate": 1.7751797016803213e-05, "loss": 0.3379, "step": 11724 }, { "epoch": 0.21769399568547804, "grad_norm": 0.4246998727321625, "learning_rate": 1.7751060044328525e-05, "loss": 0.5351, "step": 11726 }, { "epoch": 0.21773112582289667, "grad_norm": 0.32764923572540283, "learning_rate": 1.7750322966385126e-05, "loss": 0.222, "step": 11728 }, { "epoch": 0.21776825596031532, "grad_norm": 0.5281040072441101, "learning_rate": 1.774958578298305e-05, "loss": 0.511, "step": 11730 }, { "epoch": 0.21780538609773395, "grad_norm": 0.3740936517715454, "learning_rate": 1.774884849413232e-05, "loss": 0.4096, "step": 11732 }, { "epoch": 0.21784251623515258, "grad_norm": 0.8390377759933472, "learning_rate": 1.7748111099842976e-05, "loss": 0.3183, "step": 11734 }, { "epoch": 0.21787964637257123, "grad_norm": 0.2964892089366913, "learning_rate": 1.7747373600125044e-05, "loss": 0.3896, "step": 11736 }, { "epoch": 0.21791677650998986, "grad_norm": 0.3244796097278595, "learning_rate": 1.774663599498857e-05, "loss": 0.2406, "step": 11738 }, { "epoch": 0.2179539066474085, "grad_norm": 0.36529427766799927, "learning_rate": 1.7745898284443578e-05, "loss": 0.3456, "step": 11740 }, { "epoch": 0.21799103678482715, "grad_norm": 0.39138689637184143, "learning_rate": 1.7745160468500115e-05, "loss": 0.2657, "step": 11742 }, { "epoch": 0.21802816692224578, "grad_norm": 0.361465722322464, "learning_rate": 1.7744422547168216e-05, "loss": 0.1946, "step": 11744 }, { "epoch": 0.21806529705966443, "grad_norm": 0.3145333528518677, "learning_rate": 1.774368452045792e-05, "loss": 0.3187, "step": 11746 }, { "epoch": 0.21810242719708306, "grad_norm": 0.3784826099872589, "learning_rate": 1.774294638837928e-05, "loss": 0.4318, "step": 11748 }, { "epoch": 0.2181395573345017, "grad_norm": 0.3336193859577179, "learning_rate": 1.7742208150942328e-05, "loss": 0.1942, "step": 11750 }, { "epoch": 0.21817668747192034, "grad_norm": 0.2890664041042328, "learning_rate": 1.7741469808157114e-05, "loss": 0.2165, "step": 11752 }, { "epoch": 0.21821381760933897, "grad_norm": 0.5425647497177124, "learning_rate": 1.7740731360033688e-05, "loss": 0.2951, "step": 11754 }, { "epoch": 0.2182509477467576, "grad_norm": 0.4762794077396393, "learning_rate": 1.773999280658209e-05, "loss": 0.384, "step": 11756 }, { "epoch": 0.21828807788417626, "grad_norm": 0.298922061920166, "learning_rate": 1.773925414781237e-05, "loss": 0.4055, "step": 11758 }, { "epoch": 0.21832520802159489, "grad_norm": 0.41174009442329407, "learning_rate": 1.7738515383734593e-05, "loss": 0.2703, "step": 11760 }, { "epoch": 0.2183623381590135, "grad_norm": 0.3122740387916565, "learning_rate": 1.7737776514358795e-05, "loss": 0.2366, "step": 11762 }, { "epoch": 0.21839946829643217, "grad_norm": 0.5369684100151062, "learning_rate": 1.7737037539695036e-05, "loss": 0.2402, "step": 11764 }, { "epoch": 0.2184365984338508, "grad_norm": 0.36874696612358093, "learning_rate": 1.7736298459753373e-05, "loss": 0.3595, "step": 11766 }, { "epoch": 0.21847372857126945, "grad_norm": 0.3430725634098053, "learning_rate": 1.773555927454386e-05, "loss": 0.4032, "step": 11768 }, { "epoch": 0.21851085870868808, "grad_norm": 0.33281052112579346, "learning_rate": 1.7734819984076556e-05, "loss": 0.2263, "step": 11770 }, { "epoch": 0.2185479888461067, "grad_norm": 0.32666975259780884, "learning_rate": 1.7734080588361522e-05, "loss": 0.3449, "step": 11772 }, { "epoch": 0.21858511898352537, "grad_norm": 0.31406232714653015, "learning_rate": 1.7733341087408817e-05, "loss": 0.1904, "step": 11774 }, { "epoch": 0.218622249120944, "grad_norm": 0.3264770805835724, "learning_rate": 1.77326014812285e-05, "loss": 0.3706, "step": 11776 }, { "epoch": 0.21865937925836262, "grad_norm": 0.3386581838130951, "learning_rate": 1.7731861769830643e-05, "loss": 0.3241, "step": 11778 }, { "epoch": 0.21869650939578128, "grad_norm": 0.44383057951927185, "learning_rate": 1.7731121953225306e-05, "loss": 0.2901, "step": 11780 }, { "epoch": 0.2187336395331999, "grad_norm": 0.28619638085365295, "learning_rate": 1.7730382031422554e-05, "loss": 0.3034, "step": 11782 }, { "epoch": 0.21877076967061856, "grad_norm": 0.479587197303772, "learning_rate": 1.772964200443246e-05, "loss": 0.1831, "step": 11784 }, { "epoch": 0.2188078998080372, "grad_norm": 0.4370276629924774, "learning_rate": 1.772890187226509e-05, "loss": 0.538, "step": 11786 }, { "epoch": 0.21884502994545582, "grad_norm": 0.3278469443321228, "learning_rate": 1.7728161634930518e-05, "loss": 0.3296, "step": 11788 }, { "epoch": 0.21888216008287448, "grad_norm": 0.6768957376480103, "learning_rate": 1.7727421292438816e-05, "loss": 0.4059, "step": 11790 }, { "epoch": 0.2189192902202931, "grad_norm": 0.49905920028686523, "learning_rate": 1.7726680844800053e-05, "loss": 0.5475, "step": 11792 }, { "epoch": 0.21895642035771173, "grad_norm": 0.2606540024280548, "learning_rate": 1.772594029202431e-05, "loss": 0.2928, "step": 11794 }, { "epoch": 0.2189935504951304, "grad_norm": 0.315047949552536, "learning_rate": 1.7725199634121663e-05, "loss": 0.2818, "step": 11796 }, { "epoch": 0.21903068063254902, "grad_norm": 0.43197986483573914, "learning_rate": 1.7724458871102186e-05, "loss": 0.3067, "step": 11798 }, { "epoch": 0.21906781076996765, "grad_norm": 0.38735368847846985, "learning_rate": 1.772371800297596e-05, "loss": 0.288, "step": 11800 }, { "epoch": 0.2191049409073863, "grad_norm": 0.31600499153137207, "learning_rate": 1.772297702975307e-05, "loss": 0.3397, "step": 11802 }, { "epoch": 0.21914207104480493, "grad_norm": 0.39824894070625305, "learning_rate": 1.7722235951443595e-05, "loss": 0.2036, "step": 11804 }, { "epoch": 0.2191792011822236, "grad_norm": 0.33607062697410583, "learning_rate": 1.7721494768057618e-05, "loss": 0.2655, "step": 11806 }, { "epoch": 0.21921633131964222, "grad_norm": 0.29336264729499817, "learning_rate": 1.7720753479605226e-05, "loss": 0.1517, "step": 11808 }, { "epoch": 0.21925346145706084, "grad_norm": 0.28137242794036865, "learning_rate": 1.7720012086096507e-05, "loss": 0.309, "step": 11810 }, { "epoch": 0.2192905915944795, "grad_norm": 0.5039309859275818, "learning_rate": 1.7719270587541547e-05, "loss": 0.2358, "step": 11812 }, { "epoch": 0.21932772173189813, "grad_norm": 0.2509121596813202, "learning_rate": 1.7718528983950432e-05, "loss": 0.2434, "step": 11814 }, { "epoch": 0.21936485186931676, "grad_norm": 0.544304370880127, "learning_rate": 1.7717787275333263e-05, "loss": 0.2158, "step": 11816 }, { "epoch": 0.2194019820067354, "grad_norm": 0.2899669408798218, "learning_rate": 1.7717045461700126e-05, "loss": 0.4096, "step": 11818 }, { "epoch": 0.21943911214415404, "grad_norm": 0.4426668584346771, "learning_rate": 1.7716303543061113e-05, "loss": 0.3397, "step": 11820 }, { "epoch": 0.2194762422815727, "grad_norm": 0.36808159947395325, "learning_rate": 1.7715561519426323e-05, "loss": 0.3998, "step": 11822 }, { "epoch": 0.21951337241899133, "grad_norm": 0.4388499855995178, "learning_rate": 1.7714819390805854e-05, "loss": 0.1492, "step": 11824 }, { "epoch": 0.21955050255640995, "grad_norm": 0.31198567152023315, "learning_rate": 1.77140771572098e-05, "loss": 0.3563, "step": 11826 }, { "epoch": 0.2195876326938286, "grad_norm": 0.3879901170730591, "learning_rate": 1.771333481864826e-05, "loss": 0.3429, "step": 11828 }, { "epoch": 0.21962476283124724, "grad_norm": 0.3279518187046051, "learning_rate": 1.7712592375131338e-05, "loss": 0.3222, "step": 11830 }, { "epoch": 0.21966189296866587, "grad_norm": 0.2979283332824707, "learning_rate": 1.7711849826669136e-05, "loss": 0.482, "step": 11832 }, { "epoch": 0.21969902310608452, "grad_norm": 0.2699752151966095, "learning_rate": 1.7711107173271756e-05, "loss": 0.266, "step": 11834 }, { "epoch": 0.21973615324350315, "grad_norm": 0.34165728092193604, "learning_rate": 1.7710364414949312e-05, "loss": 0.2764, "step": 11836 }, { "epoch": 0.21977328338092178, "grad_norm": 0.4444302022457123, "learning_rate": 1.7709621551711896e-05, "loss": 0.3999, "step": 11838 }, { "epoch": 0.21981041351834044, "grad_norm": 0.3308092951774597, "learning_rate": 1.7708878583569626e-05, "loss": 0.5882, "step": 11840 }, { "epoch": 0.21984754365575906, "grad_norm": 0.36871659755706787, "learning_rate": 1.770813551053261e-05, "loss": 0.3896, "step": 11842 }, { "epoch": 0.21988467379317772, "grad_norm": 0.3203166723251343, "learning_rate": 1.7707392332610957e-05, "loss": 0.3394, "step": 11844 }, { "epoch": 0.21992180393059635, "grad_norm": 0.3662722408771515, "learning_rate": 1.770664904981478e-05, "loss": 0.4338, "step": 11846 }, { "epoch": 0.21995893406801498, "grad_norm": 0.433752179145813, "learning_rate": 1.7705905662154196e-05, "loss": 0.2038, "step": 11848 }, { "epoch": 0.21999606420543363, "grad_norm": 0.510762095451355, "learning_rate": 1.7705162169639317e-05, "loss": 0.3264, "step": 11850 }, { "epoch": 0.22003319434285226, "grad_norm": 0.5013280510902405, "learning_rate": 1.770441857228026e-05, "loss": 0.2388, "step": 11852 }, { "epoch": 0.2200703244802709, "grad_norm": 0.2385890930891037, "learning_rate": 1.770367487008714e-05, "loss": 0.3615, "step": 11854 }, { "epoch": 0.22010745461768955, "grad_norm": 0.3456043303012848, "learning_rate": 1.7702931063070084e-05, "loss": 0.4043, "step": 11856 }, { "epoch": 0.22014458475510817, "grad_norm": 0.3471605181694031, "learning_rate": 1.770218715123921e-05, "loss": 0.3438, "step": 11858 }, { "epoch": 0.22018171489252683, "grad_norm": 0.4312245845794678, "learning_rate": 1.770144313460464e-05, "loss": 0.4343, "step": 11860 }, { "epoch": 0.22021884502994546, "grad_norm": 1.200728416442871, "learning_rate": 1.7700699013176494e-05, "loss": 0.3087, "step": 11862 }, { "epoch": 0.2202559751673641, "grad_norm": 0.35962337255477905, "learning_rate": 1.7699954786964902e-05, "loss": 0.0891, "step": 11864 }, { "epoch": 0.22029310530478274, "grad_norm": 0.34709036350250244, "learning_rate": 1.769921045597999e-05, "loss": 0.2664, "step": 11866 }, { "epoch": 0.22033023544220137, "grad_norm": 0.3900023400783539, "learning_rate": 1.7698466020231887e-05, "loss": 0.2083, "step": 11868 }, { "epoch": 0.22036736557962, "grad_norm": 0.33800074458122253, "learning_rate": 1.769772147973072e-05, "loss": 0.5163, "step": 11870 }, { "epoch": 0.22040449571703866, "grad_norm": 0.33367934823036194, "learning_rate": 1.769697683448662e-05, "loss": 0.5203, "step": 11872 }, { "epoch": 0.22044162585445728, "grad_norm": 0.3235739767551422, "learning_rate": 1.769623208450972e-05, "loss": 0.2921, "step": 11874 }, { "epoch": 0.2204787559918759, "grad_norm": 0.410702109336853, "learning_rate": 1.7695487229810157e-05, "loss": 0.2929, "step": 11876 }, { "epoch": 0.22051588612929457, "grad_norm": 0.477309912443161, "learning_rate": 1.769474227039806e-05, "loss": 0.306, "step": 11878 }, { "epoch": 0.2205530162667132, "grad_norm": 0.33906829357147217, "learning_rate": 1.769399720628357e-05, "loss": 0.3882, "step": 11880 }, { "epoch": 0.22059014640413185, "grad_norm": 0.4200831651687622, "learning_rate": 1.7693252037476828e-05, "loss": 0.4112, "step": 11882 }, { "epoch": 0.22062727654155048, "grad_norm": 0.2531754970550537, "learning_rate": 1.7692506763987967e-05, "loss": 0.3926, "step": 11884 }, { "epoch": 0.2206644066789691, "grad_norm": 0.31104719638824463, "learning_rate": 1.7691761385827128e-05, "loss": 0.3234, "step": 11886 }, { "epoch": 0.22070153681638777, "grad_norm": 0.3835548162460327, "learning_rate": 1.769101590300446e-05, "loss": 0.2685, "step": 11888 }, { "epoch": 0.2207386669538064, "grad_norm": 0.3172190487384796, "learning_rate": 1.76902703155301e-05, "loss": 0.2448, "step": 11890 }, { "epoch": 0.22077579709122502, "grad_norm": 0.5205923914909363, "learning_rate": 1.7689524623414196e-05, "loss": 0.5127, "step": 11892 }, { "epoch": 0.22081292722864368, "grad_norm": 0.34658876061439514, "learning_rate": 1.7688778826666896e-05, "loss": 0.2965, "step": 11894 }, { "epoch": 0.2208500573660623, "grad_norm": 0.4719263017177582, "learning_rate": 1.768803292529835e-05, "loss": 0.4143, "step": 11896 }, { "epoch": 0.22088718750348096, "grad_norm": 0.31216877698898315, "learning_rate": 1.76872869193187e-05, "loss": 0.3368, "step": 11898 }, { "epoch": 0.2209243176408996, "grad_norm": 0.32623419165611267, "learning_rate": 1.7686540808738103e-05, "loss": 0.3787, "step": 11900 }, { "epoch": 0.22096144777831822, "grad_norm": 0.5687903165817261, "learning_rate": 1.7685794593566706e-05, "loss": 0.3338, "step": 11902 }, { "epoch": 0.22099857791573688, "grad_norm": 0.34359288215637207, "learning_rate": 1.7685048273814668e-05, "loss": 0.3974, "step": 11904 }, { "epoch": 0.2210357080531555, "grad_norm": 0.5070930123329163, "learning_rate": 1.7684301849492144e-05, "loss": 0.3566, "step": 11906 }, { "epoch": 0.22107283819057413, "grad_norm": 0.3738914728164673, "learning_rate": 1.7683555320609287e-05, "loss": 0.4272, "step": 11908 }, { "epoch": 0.2211099683279928, "grad_norm": 0.5204195976257324, "learning_rate": 1.768280868717626e-05, "loss": 0.3264, "step": 11910 }, { "epoch": 0.22114709846541142, "grad_norm": 0.35910990834236145, "learning_rate": 1.7682061949203215e-05, "loss": 0.2156, "step": 11912 }, { "epoch": 0.22118422860283005, "grad_norm": 0.8209647536277771, "learning_rate": 1.768131510670032e-05, "loss": 0.5776, "step": 11914 }, { "epoch": 0.2212213587402487, "grad_norm": 0.32603925466537476, "learning_rate": 1.7680568159677736e-05, "loss": 0.1758, "step": 11916 }, { "epoch": 0.22125848887766733, "grad_norm": 0.6392351984977722, "learning_rate": 1.767982110814562e-05, "loss": 0.4334, "step": 11918 }, { "epoch": 0.221295619015086, "grad_norm": 0.4216277003288269, "learning_rate": 1.7679073952114148e-05, "loss": 0.2834, "step": 11920 }, { "epoch": 0.22133274915250462, "grad_norm": 0.3311893045902252, "learning_rate": 1.7678326691593478e-05, "loss": 0.2945, "step": 11922 }, { "epoch": 0.22136987928992324, "grad_norm": 0.40621522068977356, "learning_rate": 1.7677579326593784e-05, "loss": 0.3203, "step": 11924 }, { "epoch": 0.2214070094273419, "grad_norm": 0.4474644958972931, "learning_rate": 1.767683185712523e-05, "loss": 0.2587, "step": 11926 }, { "epoch": 0.22144413956476053, "grad_norm": 0.3213600516319275, "learning_rate": 1.767608428319799e-05, "loss": 0.4003, "step": 11928 }, { "epoch": 0.22148126970217916, "grad_norm": 0.5836672186851501, "learning_rate": 1.7675336604822235e-05, "loss": 0.154, "step": 11930 }, { "epoch": 0.2215183998395978, "grad_norm": 0.3745591938495636, "learning_rate": 1.7674588822008137e-05, "loss": 0.2864, "step": 11932 }, { "epoch": 0.22155552997701644, "grad_norm": 0.43277275562286377, "learning_rate": 1.767384093476588e-05, "loss": 0.2934, "step": 11934 }, { "epoch": 0.2215926601144351, "grad_norm": 0.38131460547447205, "learning_rate": 1.7673092943105626e-05, "loss": 0.2992, "step": 11936 }, { "epoch": 0.22162979025185373, "grad_norm": 0.37792280316352844, "learning_rate": 1.7672344847037562e-05, "loss": 0.4105, "step": 11938 }, { "epoch": 0.22166692038927235, "grad_norm": 0.3829842507839203, "learning_rate": 1.7671596646571868e-05, "loss": 0.4318, "step": 11940 }, { "epoch": 0.221704050526691, "grad_norm": 0.465193510055542, "learning_rate": 1.7670848341718724e-05, "loss": 0.3345, "step": 11942 }, { "epoch": 0.22174118066410964, "grad_norm": 0.6588819622993469, "learning_rate": 1.767009993248831e-05, "loss": 0.3363, "step": 11944 }, { "epoch": 0.22177831080152827, "grad_norm": 0.5793259739875793, "learning_rate": 1.7669351418890806e-05, "loss": 0.2801, "step": 11946 }, { "epoch": 0.22181544093894692, "grad_norm": 0.3190169334411621, "learning_rate": 1.766860280093641e-05, "loss": 0.3801, "step": 11948 }, { "epoch": 0.22185257107636555, "grad_norm": 0.46417924761772156, "learning_rate": 1.7667854078635295e-05, "loss": 0.3216, "step": 11950 }, { "epoch": 0.22188970121378418, "grad_norm": 0.3605559170246124, "learning_rate": 1.7667105251997654e-05, "loss": 0.473, "step": 11952 }, { "epoch": 0.22192683135120284, "grad_norm": 0.2942885756492615, "learning_rate": 1.7666356321033677e-05, "loss": 0.4502, "step": 11954 }, { "epoch": 0.22196396148862146, "grad_norm": 0.3319733142852783, "learning_rate": 1.766560728575355e-05, "loss": 0.2812, "step": 11956 }, { "epoch": 0.22200109162604012, "grad_norm": 0.32812047004699707, "learning_rate": 1.7664858146167474e-05, "loss": 0.3876, "step": 11958 }, { "epoch": 0.22203822176345875, "grad_norm": 0.22675660252571106, "learning_rate": 1.7664108902285636e-05, "loss": 0.0913, "step": 11960 }, { "epoch": 0.22207535190087738, "grad_norm": 0.3907155990600586, "learning_rate": 1.7663359554118233e-05, "loss": 0.2604, "step": 11962 }, { "epoch": 0.22211248203829603, "grad_norm": 0.321621298789978, "learning_rate": 1.766261010167546e-05, "loss": 0.1823, "step": 11964 }, { "epoch": 0.22214961217571466, "grad_norm": 0.33325880765914917, "learning_rate": 1.7661860544967515e-05, "loss": 0.2361, "step": 11966 }, { "epoch": 0.2221867423131333, "grad_norm": 0.285675972700119, "learning_rate": 1.76611108840046e-05, "loss": 0.3636, "step": 11968 }, { "epoch": 0.22222387245055195, "grad_norm": 0.3085360527038574, "learning_rate": 1.766036111879691e-05, "loss": 0.2841, "step": 11970 }, { "epoch": 0.22226100258797057, "grad_norm": 0.3567916750907898, "learning_rate": 1.7659611249354654e-05, "loss": 0.2461, "step": 11972 }, { "epoch": 0.22229813272538923, "grad_norm": 0.373532235622406, "learning_rate": 1.765886127568803e-05, "loss": 0.3636, "step": 11974 }, { "epoch": 0.22233526286280786, "grad_norm": 0.6448768973350525, "learning_rate": 1.7658111197807245e-05, "loss": 0.327, "step": 11976 }, { "epoch": 0.2223723930002265, "grad_norm": 0.4483274221420288, "learning_rate": 1.765736101572251e-05, "loss": 0.3557, "step": 11978 }, { "epoch": 0.22240952313764514, "grad_norm": 0.327824205160141, "learning_rate": 1.7656610729444022e-05, "loss": 0.4091, "step": 11980 }, { "epoch": 0.22244665327506377, "grad_norm": 0.3396904170513153, "learning_rate": 1.7655860338981998e-05, "loss": 0.2439, "step": 11982 }, { "epoch": 0.2224837834124824, "grad_norm": 0.30622032284736633, "learning_rate": 1.7655109844346648e-05, "loss": 0.4751, "step": 11984 }, { "epoch": 0.22252091354990106, "grad_norm": 0.42210638523101807, "learning_rate": 1.7654359245548183e-05, "loss": 0.3772, "step": 11986 }, { "epoch": 0.22255804368731968, "grad_norm": 0.2235272228717804, "learning_rate": 1.7653608542596812e-05, "loss": 0.3202, "step": 11988 }, { "epoch": 0.2225951738247383, "grad_norm": 0.6257721781730652, "learning_rate": 1.765285773550276e-05, "loss": 0.4758, "step": 11990 }, { "epoch": 0.22263230396215697, "grad_norm": 0.3404028117656708, "learning_rate": 1.765210682427623e-05, "loss": 0.4208, "step": 11992 }, { "epoch": 0.2226694340995756, "grad_norm": 0.4042735695838928, "learning_rate": 1.7651355808927454e-05, "loss": 0.3651, "step": 11994 }, { "epoch": 0.22270656423699425, "grad_norm": 0.39030569791793823, "learning_rate": 1.7650604689466643e-05, "loss": 0.3205, "step": 11996 }, { "epoch": 0.22274369437441288, "grad_norm": 0.5382828712463379, "learning_rate": 1.7649853465904015e-05, "loss": 0.2219, "step": 11998 }, { "epoch": 0.2227808245118315, "grad_norm": 0.3254512846469879, "learning_rate": 1.7649102138249796e-05, "loss": 0.2656, "step": 12000 }, { "epoch": 0.22281795464925017, "grad_norm": 0.4989635646343231, "learning_rate": 1.764835070651421e-05, "loss": 0.1574, "step": 12002 }, { "epoch": 0.2228550847866688, "grad_norm": 0.4164736866950989, "learning_rate": 1.764759917070748e-05, "loss": 0.5466, "step": 12004 }, { "epoch": 0.22289221492408742, "grad_norm": 0.26724839210510254, "learning_rate": 1.764684753083983e-05, "loss": 0.3191, "step": 12006 }, { "epoch": 0.22292934506150608, "grad_norm": 0.330612450838089, "learning_rate": 1.7646095786921492e-05, "loss": 0.4574, "step": 12008 }, { "epoch": 0.2229664751989247, "grad_norm": 0.4043966829776764, "learning_rate": 1.7645343938962693e-05, "loss": 0.3978, "step": 12010 }, { "epoch": 0.22300360533634336, "grad_norm": 0.3628135025501251, "learning_rate": 1.764459198697367e-05, "loss": 0.2679, "step": 12012 }, { "epoch": 0.223040735473762, "grad_norm": 0.2798672020435333, "learning_rate": 1.7643839930964638e-05, "loss": 0.2558, "step": 12014 }, { "epoch": 0.22307786561118062, "grad_norm": 0.34981054067611694, "learning_rate": 1.7643087770945843e-05, "loss": 0.3094, "step": 12016 }, { "epoch": 0.22311499574859928, "grad_norm": 0.2877528965473175, "learning_rate": 1.764233550692752e-05, "loss": 0.3695, "step": 12018 }, { "epoch": 0.2231521258860179, "grad_norm": 0.3457721173763275, "learning_rate": 1.76415831389199e-05, "loss": 0.4331, "step": 12020 }, { "epoch": 0.22318925602343653, "grad_norm": 0.34324902296066284, "learning_rate": 1.7640830666933225e-05, "loss": 0.5258, "step": 12022 }, { "epoch": 0.2232263861608552, "grad_norm": 0.5169813632965088, "learning_rate": 1.764007809097773e-05, "loss": 0.2426, "step": 12024 }, { "epoch": 0.22326351629827382, "grad_norm": 0.33846956491470337, "learning_rate": 1.7639325411063657e-05, "loss": 0.4613, "step": 12026 }, { "epoch": 0.22330064643569245, "grad_norm": 0.2937115728855133, "learning_rate": 1.7638572627201247e-05, "loss": 0.4964, "step": 12028 }, { "epoch": 0.2233377765731111, "grad_norm": 0.6198121905326843, "learning_rate": 1.7637819739400743e-05, "loss": 0.2469, "step": 12030 }, { "epoch": 0.22337490671052973, "grad_norm": 0.2619032859802246, "learning_rate": 1.7637066747672392e-05, "loss": 0.4192, "step": 12032 }, { "epoch": 0.2234120368479484, "grad_norm": 0.39697787165641785, "learning_rate": 1.763631365202644e-05, "loss": 0.3014, "step": 12034 }, { "epoch": 0.22344916698536701, "grad_norm": 0.24017538130283356, "learning_rate": 1.7635560452473127e-05, "loss": 0.2293, "step": 12036 }, { "epoch": 0.22348629712278564, "grad_norm": 0.36495137214660645, "learning_rate": 1.7634807149022713e-05, "loss": 0.5313, "step": 12038 }, { "epoch": 0.2235234272602043, "grad_norm": 0.3485358953475952, "learning_rate": 1.763405374168544e-05, "loss": 0.1923, "step": 12040 }, { "epoch": 0.22356055739762293, "grad_norm": 0.3236474096775055, "learning_rate": 1.7633300230471562e-05, "loss": 0.3568, "step": 12042 }, { "epoch": 0.22359768753504156, "grad_norm": 0.2673585116863251, "learning_rate": 1.7632546615391334e-05, "loss": 0.519, "step": 12044 }, { "epoch": 0.2236348176724602, "grad_norm": 0.3688596487045288, "learning_rate": 1.7631792896455007e-05, "loss": 0.1387, "step": 12046 }, { "epoch": 0.22367194780987884, "grad_norm": 0.28437888622283936, "learning_rate": 1.7631039073672836e-05, "loss": 0.3222, "step": 12048 }, { "epoch": 0.2237090779472975, "grad_norm": 0.3170979619026184, "learning_rate": 1.7630285147055083e-05, "loss": 0.4551, "step": 12050 }, { "epoch": 0.22374620808471612, "grad_norm": 0.3250986635684967, "learning_rate": 1.7629531116612008e-05, "loss": 0.3673, "step": 12052 }, { "epoch": 0.22378333822213475, "grad_norm": 0.34939953684806824, "learning_rate": 1.7628776982353864e-05, "loss": 0.4111, "step": 12054 }, { "epoch": 0.2238204683595534, "grad_norm": 0.38850826025009155, "learning_rate": 1.762802274429092e-05, "loss": 0.3636, "step": 12056 }, { "epoch": 0.22385759849697204, "grad_norm": 0.22319717705249786, "learning_rate": 1.762726840243343e-05, "loss": 0.296, "step": 12058 }, { "epoch": 0.22389472863439067, "grad_norm": 0.4979951083660126, "learning_rate": 1.7626513956791663e-05, "loss": 0.2192, "step": 12060 }, { "epoch": 0.22393185877180932, "grad_norm": 0.2625686526298523, "learning_rate": 1.762575940737589e-05, "loss": 0.4437, "step": 12062 }, { "epoch": 0.22396898890922795, "grad_norm": 0.41948339343070984, "learning_rate": 1.762500475419637e-05, "loss": 0.4163, "step": 12064 }, { "epoch": 0.22400611904664658, "grad_norm": 0.3573482930660248, "learning_rate": 1.7624249997263375e-05, "loss": 0.2905, "step": 12066 }, { "epoch": 0.22404324918406524, "grad_norm": 0.3806382119655609, "learning_rate": 1.7623495136587174e-05, "loss": 0.2995, "step": 12068 }, { "epoch": 0.22408037932148386, "grad_norm": 0.2932985723018646, "learning_rate": 1.7622740172178043e-05, "loss": 0.2907, "step": 12070 }, { "epoch": 0.22411750945890252, "grad_norm": 0.5836262702941895, "learning_rate": 1.7621985104046245e-05, "loss": 0.3332, "step": 12072 }, { "epoch": 0.22415463959632115, "grad_norm": 0.3987974226474762, "learning_rate": 1.7621229932202065e-05, "loss": 0.3599, "step": 12074 }, { "epoch": 0.22419176973373978, "grad_norm": 0.3861166536808014, "learning_rate": 1.7620474656655774e-05, "loss": 0.4077, "step": 12076 }, { "epoch": 0.22422889987115843, "grad_norm": 0.4544990658760071, "learning_rate": 1.7619719277417648e-05, "loss": 0.2665, "step": 12078 }, { "epoch": 0.22426603000857706, "grad_norm": 0.4183366596698761, "learning_rate": 1.7618963794497966e-05, "loss": 0.2245, "step": 12080 }, { "epoch": 0.2243031601459957, "grad_norm": 0.4875152111053467, "learning_rate": 1.7618208207907007e-05, "loss": 0.4212, "step": 12082 }, { "epoch": 0.22434029028341435, "grad_norm": 0.32507941126823425, "learning_rate": 1.7617452517655055e-05, "loss": 0.2196, "step": 12084 }, { "epoch": 0.22437742042083297, "grad_norm": 0.5160993337631226, "learning_rate": 1.761669672375239e-05, "loss": 0.2918, "step": 12086 }, { "epoch": 0.22441455055825163, "grad_norm": 0.30765220522880554, "learning_rate": 1.7615940826209297e-05, "loss": 0.3645, "step": 12088 }, { "epoch": 0.22445168069567026, "grad_norm": 0.30236923694610596, "learning_rate": 1.7615184825036064e-05, "loss": 0.2412, "step": 12090 }, { "epoch": 0.2244888108330889, "grad_norm": 0.4530021548271179, "learning_rate": 1.7614428720242976e-05, "loss": 0.3649, "step": 12092 }, { "epoch": 0.22452594097050754, "grad_norm": 0.3599660098552704, "learning_rate": 1.761367251184032e-05, "loss": 0.3872, "step": 12094 }, { "epoch": 0.22456307110792617, "grad_norm": 0.4225991368293762, "learning_rate": 1.7612916199838385e-05, "loss": 0.2793, "step": 12096 }, { "epoch": 0.2246002012453448, "grad_norm": 0.28973588347435, "learning_rate": 1.7612159784247462e-05, "loss": 0.4806, "step": 12098 }, { "epoch": 0.22463733138276346, "grad_norm": 0.47195345163345337, "learning_rate": 1.761140326507785e-05, "loss": 0.3048, "step": 12100 }, { "epoch": 0.22467446152018208, "grad_norm": 0.31633633375167847, "learning_rate": 1.7610646642339836e-05, "loss": 0.3259, "step": 12102 }, { "epoch": 0.2247115916576007, "grad_norm": 0.25203317403793335, "learning_rate": 1.760988991604372e-05, "loss": 0.3407, "step": 12104 }, { "epoch": 0.22474872179501937, "grad_norm": 0.32096803188323975, "learning_rate": 1.7609133086199796e-05, "loss": 0.407, "step": 12106 }, { "epoch": 0.224785851932438, "grad_norm": 0.3544452488422394, "learning_rate": 1.7608376152818357e-05, "loss": 0.3093, "step": 12108 }, { "epoch": 0.22482298206985665, "grad_norm": 0.5119296312332153, "learning_rate": 1.7607619115909712e-05, "loss": 0.5716, "step": 12110 }, { "epoch": 0.22486011220727528, "grad_norm": 0.34867793321609497, "learning_rate": 1.760686197548416e-05, "loss": 0.3431, "step": 12112 }, { "epoch": 0.2248972423446939, "grad_norm": 0.3729167580604553, "learning_rate": 1.7606104731552004e-05, "loss": 0.1598, "step": 12114 }, { "epoch": 0.22493437248211257, "grad_norm": 0.32883015275001526, "learning_rate": 1.760534738412354e-05, "loss": 0.1359, "step": 12116 }, { "epoch": 0.2249715026195312, "grad_norm": 0.6238548755645752, "learning_rate": 1.760458993320908e-05, "loss": 0.4753, "step": 12118 }, { "epoch": 0.22500863275694982, "grad_norm": 0.4662645757198334, "learning_rate": 1.760383237881893e-05, "loss": 0.274, "step": 12120 }, { "epoch": 0.22504576289436848, "grad_norm": 0.3792097270488739, "learning_rate": 1.76030747209634e-05, "loss": 0.1781, "step": 12122 }, { "epoch": 0.2250828930317871, "grad_norm": 0.34540051221847534, "learning_rate": 1.7602316959652794e-05, "loss": 0.3906, "step": 12124 }, { "epoch": 0.22512002316920576, "grad_norm": 0.3337765634059906, "learning_rate": 1.7601559094897427e-05, "loss": 0.3008, "step": 12126 }, { "epoch": 0.2251571533066244, "grad_norm": 0.2720305621623993, "learning_rate": 1.7600801126707613e-05, "loss": 0.2594, "step": 12128 }, { "epoch": 0.22519428344404302, "grad_norm": 0.4514831006526947, "learning_rate": 1.7600043055093654e-05, "loss": 0.237, "step": 12130 }, { "epoch": 0.22523141358146168, "grad_norm": 0.293328195810318, "learning_rate": 1.7599284880065883e-05, "loss": 0.3785, "step": 12132 }, { "epoch": 0.2252685437188803, "grad_norm": 0.40314000844955444, "learning_rate": 1.75985266016346e-05, "loss": 0.2489, "step": 12134 }, { "epoch": 0.22530567385629893, "grad_norm": 0.23729155957698822, "learning_rate": 1.7597768219810134e-05, "loss": 0.4259, "step": 12136 }, { "epoch": 0.2253428039937176, "grad_norm": 0.41475462913513184, "learning_rate": 1.75970097346028e-05, "loss": 0.3213, "step": 12138 }, { "epoch": 0.22537993413113622, "grad_norm": 0.5343440175056458, "learning_rate": 1.759625114602292e-05, "loss": 0.3509, "step": 12140 }, { "epoch": 0.22541706426855485, "grad_norm": 0.3142673075199127, "learning_rate": 1.7595492454080813e-05, "loss": 0.1057, "step": 12142 }, { "epoch": 0.2254541944059735, "grad_norm": 0.36299678683280945, "learning_rate": 1.7594733658786807e-05, "loss": 0.2515, "step": 12144 }, { "epoch": 0.22549132454339213, "grad_norm": 0.3457544445991516, "learning_rate": 1.7593974760151223e-05, "loss": 0.3595, "step": 12146 }, { "epoch": 0.22552845468081079, "grad_norm": 0.31571483612060547, "learning_rate": 1.7593215758184392e-05, "loss": 0.3146, "step": 12148 }, { "epoch": 0.22556558481822941, "grad_norm": 0.38800618052482605, "learning_rate": 1.7592456652896636e-05, "loss": 0.25, "step": 12150 }, { "epoch": 0.22560271495564804, "grad_norm": 0.31788524985313416, "learning_rate": 1.759169744429829e-05, "loss": 0.309, "step": 12152 }, { "epoch": 0.2256398450930667, "grad_norm": 0.35473138093948364, "learning_rate": 1.7590938132399678e-05, "loss": 0.4165, "step": 12154 }, { "epoch": 0.22567697523048533, "grad_norm": 0.2991270124912262, "learning_rate": 1.7590178717211138e-05, "loss": 0.2397, "step": 12156 }, { "epoch": 0.22571410536790396, "grad_norm": 0.38730674982070923, "learning_rate": 1.7589419198743e-05, "loss": 0.4355, "step": 12158 }, { "epoch": 0.2257512355053226, "grad_norm": 0.270922988653183, "learning_rate": 1.75886595770056e-05, "loss": 0.3376, "step": 12160 }, { "epoch": 0.22578836564274124, "grad_norm": 0.4049718379974365, "learning_rate": 1.758789985200927e-05, "loss": 0.2612, "step": 12162 }, { "epoch": 0.2258254957801599, "grad_norm": 0.3952605426311493, "learning_rate": 1.7587140023764355e-05, "loss": 0.3824, "step": 12164 }, { "epoch": 0.22586262591757852, "grad_norm": 0.34663283824920654, "learning_rate": 1.7586380092281194e-05, "loss": 0.4979, "step": 12166 }, { "epoch": 0.22589975605499715, "grad_norm": 0.3986808657646179, "learning_rate": 1.758562005757012e-05, "loss": 0.3647, "step": 12168 }, { "epoch": 0.2259368861924158, "grad_norm": 0.5012375712394714, "learning_rate": 1.758485991964148e-05, "loss": 0.3665, "step": 12170 }, { "epoch": 0.22597401632983444, "grad_norm": 0.27836892008781433, "learning_rate": 1.758409967850561e-05, "loss": 0.3261, "step": 12172 }, { "epoch": 0.22601114646725307, "grad_norm": 0.7115713953971863, "learning_rate": 1.7583339334172866e-05, "loss": 0.3473, "step": 12174 }, { "epoch": 0.22604827660467172, "grad_norm": 0.3703259527683258, "learning_rate": 1.7582578886653587e-05, "loss": 0.2575, "step": 12176 }, { "epoch": 0.22608540674209035, "grad_norm": 0.46376755833625793, "learning_rate": 1.758181833595812e-05, "loss": 0.2019, "step": 12178 }, { "epoch": 0.22612253687950898, "grad_norm": 0.40719014406204224, "learning_rate": 1.7581057682096817e-05, "loss": 0.2252, "step": 12180 }, { "epoch": 0.22615966701692763, "grad_norm": 0.5799514055252075, "learning_rate": 1.758029692508003e-05, "loss": 0.3436, "step": 12182 }, { "epoch": 0.22619679715434626, "grad_norm": 0.2772023677825928, "learning_rate": 1.7579536064918104e-05, "loss": 0.2202, "step": 12184 }, { "epoch": 0.22623392729176492, "grad_norm": 0.2913564443588257, "learning_rate": 1.7578775101621396e-05, "loss": 0.2913, "step": 12186 }, { "epoch": 0.22627105742918355, "grad_norm": 0.31022971868515015, "learning_rate": 1.7578014035200262e-05, "loss": 0.2204, "step": 12188 }, { "epoch": 0.22630818756660218, "grad_norm": 0.4769797623157501, "learning_rate": 1.757725286566505e-05, "loss": 0.2824, "step": 12190 }, { "epoch": 0.22634531770402083, "grad_norm": 0.3974776268005371, "learning_rate": 1.757649159302613e-05, "loss": 0.2972, "step": 12192 }, { "epoch": 0.22638244784143946, "grad_norm": 0.31579384207725525, "learning_rate": 1.757573021729385e-05, "loss": 0.4782, "step": 12194 }, { "epoch": 0.2264195779788581, "grad_norm": 0.41150814294815063, "learning_rate": 1.7574968738478576e-05, "loss": 0.2063, "step": 12196 }, { "epoch": 0.22645670811627674, "grad_norm": 0.37733304500579834, "learning_rate": 1.7574207156590667e-05, "loss": 0.4448, "step": 12198 }, { "epoch": 0.22649383825369537, "grad_norm": 0.3141343891620636, "learning_rate": 1.757344547164048e-05, "loss": 0.2598, "step": 12200 }, { "epoch": 0.22653096839111403, "grad_norm": 0.6399528384208679, "learning_rate": 1.7572683683638393e-05, "loss": 0.4158, "step": 12202 }, { "epoch": 0.22656809852853266, "grad_norm": 0.29067307710647583, "learning_rate": 1.757192179259476e-05, "loss": 0.1872, "step": 12204 }, { "epoch": 0.22660522866595129, "grad_norm": 0.4155784547328949, "learning_rate": 1.757115979851995e-05, "loss": 0.4163, "step": 12206 }, { "epoch": 0.22664235880336994, "grad_norm": 0.34444814920425415, "learning_rate": 1.7570397701424337e-05, "loss": 0.3538, "step": 12208 }, { "epoch": 0.22667948894078857, "grad_norm": 0.3122224807739258, "learning_rate": 1.7569635501318287e-05, "loss": 0.2395, "step": 12210 }, { "epoch": 0.2267166190782072, "grad_norm": 0.3670322895050049, "learning_rate": 1.756887319821217e-05, "loss": 0.2709, "step": 12212 }, { "epoch": 0.22675374921562585, "grad_norm": 0.3097686469554901, "learning_rate": 1.7568110792116362e-05, "loss": 0.351, "step": 12214 }, { "epoch": 0.22679087935304448, "grad_norm": 0.4320831298828125, "learning_rate": 1.756734828304123e-05, "loss": 0.4112, "step": 12216 }, { "epoch": 0.2268280094904631, "grad_norm": 0.4121559262275696, "learning_rate": 1.7566585670997164e-05, "loss": 0.3443, "step": 12218 }, { "epoch": 0.22686513962788177, "grad_norm": 0.40930017828941345, "learning_rate": 1.7565822955994524e-05, "loss": 0.3014, "step": 12220 }, { "epoch": 0.2269022697653004, "grad_norm": 0.4146338701248169, "learning_rate": 1.7565060138043697e-05, "loss": 0.2149, "step": 12222 }, { "epoch": 0.22693939990271905, "grad_norm": 0.3705211877822876, "learning_rate": 1.7564297217155066e-05, "loss": 0.2736, "step": 12224 }, { "epoch": 0.22697653004013768, "grad_norm": 0.3298647403717041, "learning_rate": 1.7563534193339002e-05, "loss": 0.3522, "step": 12226 }, { "epoch": 0.2270136601775563, "grad_norm": 0.3647691309452057, "learning_rate": 1.75627710666059e-05, "loss": 0.3509, "step": 12228 }, { "epoch": 0.22705079031497497, "grad_norm": 0.37193411588668823, "learning_rate": 1.7562007836966128e-05, "loss": 0.4075, "step": 12230 }, { "epoch": 0.2270879204523936, "grad_norm": 0.36932727694511414, "learning_rate": 1.7561244504430083e-05, "loss": 0.5432, "step": 12232 }, { "epoch": 0.22712505058981222, "grad_norm": 0.3440976142883301, "learning_rate": 1.7560481069008154e-05, "loss": 0.3834, "step": 12234 }, { "epoch": 0.22716218072723088, "grad_norm": 0.36136186122894287, "learning_rate": 1.7559717530710716e-05, "loss": 0.3166, "step": 12236 }, { "epoch": 0.2271993108646495, "grad_norm": 0.5251002907752991, "learning_rate": 1.755895388954817e-05, "loss": 0.3668, "step": 12238 }, { "epoch": 0.22723644100206816, "grad_norm": 0.4312556982040405, "learning_rate": 1.7558190145530906e-05, "loss": 0.3254, "step": 12240 }, { "epoch": 0.2272735711394868, "grad_norm": 0.3840956687927246, "learning_rate": 1.755742629866931e-05, "loss": 0.5117, "step": 12242 }, { "epoch": 0.22731070127690542, "grad_norm": 0.43443334102630615, "learning_rate": 1.755666234897378e-05, "loss": 0.2533, "step": 12244 }, { "epoch": 0.22734783141432408, "grad_norm": 0.3611725866794586, "learning_rate": 1.755589829645471e-05, "loss": 0.3495, "step": 12246 }, { "epoch": 0.2273849615517427, "grad_norm": 0.34282463788986206, "learning_rate": 1.75551341411225e-05, "loss": 0.3618, "step": 12248 }, { "epoch": 0.22742209168916133, "grad_norm": 0.3647899925708771, "learning_rate": 1.7554369882987542e-05, "loss": 0.4186, "step": 12250 }, { "epoch": 0.22745922182658, "grad_norm": 0.6680319309234619, "learning_rate": 1.7553605522060237e-05, "loss": 0.3006, "step": 12252 }, { "epoch": 0.22749635196399862, "grad_norm": 0.3172919750213623, "learning_rate": 1.7552841058350986e-05, "loss": 0.3028, "step": 12254 }, { "epoch": 0.22753348210141724, "grad_norm": 0.28211918473243713, "learning_rate": 1.7552076491870195e-05, "loss": 0.3686, "step": 12256 }, { "epoch": 0.2275706122388359, "grad_norm": 0.39960405230522156, "learning_rate": 1.7551311822628264e-05, "loss": 0.3279, "step": 12258 }, { "epoch": 0.22760774237625453, "grad_norm": 0.324870228767395, "learning_rate": 1.7550547050635595e-05, "loss": 0.4096, "step": 12260 }, { "epoch": 0.22764487251367319, "grad_norm": 0.3878096342086792, "learning_rate": 1.7549782175902597e-05, "loss": 0.0775, "step": 12262 }, { "epoch": 0.2276820026510918, "grad_norm": 0.42712169885635376, "learning_rate": 1.754901719843968e-05, "loss": 0.2589, "step": 12264 }, { "epoch": 0.22771913278851044, "grad_norm": 0.3460284471511841, "learning_rate": 1.7548252118257246e-05, "loss": 0.219, "step": 12266 }, { "epoch": 0.2277562629259291, "grad_norm": 0.321351021528244, "learning_rate": 1.7547486935365716e-05, "loss": 0.2219, "step": 12268 }, { "epoch": 0.22779339306334773, "grad_norm": 0.443685919046402, "learning_rate": 1.7546721649775494e-05, "loss": 0.4945, "step": 12270 }, { "epoch": 0.22783052320076635, "grad_norm": 0.3065042495727539, "learning_rate": 1.7545956261496995e-05, "loss": 0.2671, "step": 12272 }, { "epoch": 0.227867653338185, "grad_norm": 0.3917382061481476, "learning_rate": 1.7545190770540633e-05, "loss": 0.3305, "step": 12274 }, { "epoch": 0.22790478347560364, "grad_norm": 0.33366790413856506, "learning_rate": 1.7544425176916827e-05, "loss": 0.2396, "step": 12276 }, { "epoch": 0.2279419136130223, "grad_norm": 0.5161924362182617, "learning_rate": 1.7543659480635992e-05, "loss": 0.2298, "step": 12278 }, { "epoch": 0.22797904375044092, "grad_norm": 0.3733651041984558, "learning_rate": 1.7542893681708547e-05, "loss": 0.5555, "step": 12280 }, { "epoch": 0.22801617388785955, "grad_norm": 0.20911754667758942, "learning_rate": 1.7542127780144917e-05, "loss": 0.2317, "step": 12282 }, { "epoch": 0.2280533040252782, "grad_norm": 0.38140398263931274, "learning_rate": 1.7541361775955514e-05, "loss": 0.4778, "step": 12284 }, { "epoch": 0.22809043416269684, "grad_norm": 0.255867600440979, "learning_rate": 1.7540595669150766e-05, "loss": 0.355, "step": 12286 }, { "epoch": 0.22812756430011547, "grad_norm": 0.43670031428337097, "learning_rate": 1.7539829459741102e-05, "loss": 0.4306, "step": 12288 }, { "epoch": 0.22816469443753412, "grad_norm": 0.5742508769035339, "learning_rate": 1.753906314773694e-05, "loss": 0.4602, "step": 12290 }, { "epoch": 0.22820182457495275, "grad_norm": 0.44967716932296753, "learning_rate": 1.753829673314871e-05, "loss": 0.3507, "step": 12292 }, { "epoch": 0.22823895471237138, "grad_norm": 0.42827701568603516, "learning_rate": 1.7537530215986844e-05, "loss": 0.3715, "step": 12294 }, { "epoch": 0.22827608484979003, "grad_norm": 0.39758598804473877, "learning_rate": 1.7536763596261765e-05, "loss": 0.4169, "step": 12296 }, { "epoch": 0.22831321498720866, "grad_norm": 0.2817027270793915, "learning_rate": 1.753599687398391e-05, "loss": 0.4116, "step": 12298 }, { "epoch": 0.22835034512462732, "grad_norm": 0.3187933564186096, "learning_rate": 1.7535230049163713e-05, "loss": 0.4137, "step": 12300 }, { "epoch": 0.22838747526204595, "grad_norm": 0.42051562666893005, "learning_rate": 1.7534463121811603e-05, "loss": 0.4312, "step": 12302 }, { "epoch": 0.22842460539946458, "grad_norm": 0.3544573485851288, "learning_rate": 1.7533696091938024e-05, "loss": 0.3789, "step": 12304 }, { "epoch": 0.22846173553688323, "grad_norm": 0.4475204646587372, "learning_rate": 1.7532928959553403e-05, "loss": 0.2393, "step": 12306 }, { "epoch": 0.22849886567430186, "grad_norm": 0.37221747636795044, "learning_rate": 1.753216172466818e-05, "loss": 0.3988, "step": 12308 }, { "epoch": 0.2285359958117205, "grad_norm": 0.32714807987213135, "learning_rate": 1.75313943872928e-05, "loss": 0.5212, "step": 12310 }, { "epoch": 0.22857312594913914, "grad_norm": 0.4023895561695099, "learning_rate": 1.7530626947437704e-05, "loss": 0.295, "step": 12312 }, { "epoch": 0.22861025608655777, "grad_norm": 0.39455997943878174, "learning_rate": 1.752985940511333e-05, "loss": 0.3178, "step": 12314 }, { "epoch": 0.22864738622397643, "grad_norm": 0.30868273973464966, "learning_rate": 1.7529091760330123e-05, "loss": 0.2897, "step": 12316 }, { "epoch": 0.22868451636139506, "grad_norm": 0.5626739263534546, "learning_rate": 1.752832401309853e-05, "loss": 0.537, "step": 12318 }, { "epoch": 0.22872164649881369, "grad_norm": 0.2995493412017822, "learning_rate": 1.7527556163428994e-05, "loss": 0.3179, "step": 12320 }, { "epoch": 0.22875877663623234, "grad_norm": 0.3250625729560852, "learning_rate": 1.752678821133197e-05, "loss": 0.2105, "step": 12322 }, { "epoch": 0.22879590677365097, "grad_norm": 0.2906014621257782, "learning_rate": 1.7526020156817907e-05, "loss": 0.2625, "step": 12324 }, { "epoch": 0.2288330369110696, "grad_norm": 0.4179259240627289, "learning_rate": 1.7525251999897247e-05, "loss": 0.2694, "step": 12326 }, { "epoch": 0.22887016704848825, "grad_norm": 0.30020153522491455, "learning_rate": 1.752448374058045e-05, "loss": 0.3193, "step": 12328 }, { "epoch": 0.22890729718590688, "grad_norm": 0.6141716837882996, "learning_rate": 1.7523715378877967e-05, "loss": 0.1568, "step": 12330 }, { "epoch": 0.2289444273233255, "grad_norm": 0.29577603936195374, "learning_rate": 1.7522946914800256e-05, "loss": 0.4573, "step": 12332 }, { "epoch": 0.22898155746074417, "grad_norm": 0.35698315501213074, "learning_rate": 1.752217834835777e-05, "loss": 0.4283, "step": 12334 }, { "epoch": 0.2290186875981628, "grad_norm": 0.36258038878440857, "learning_rate": 1.7521409679560967e-05, "loss": 0.2888, "step": 12336 }, { "epoch": 0.22905581773558145, "grad_norm": 0.2752467393875122, "learning_rate": 1.7520640908420307e-05, "loss": 0.2378, "step": 12338 }, { "epoch": 0.22909294787300008, "grad_norm": 0.34941089153289795, "learning_rate": 1.7519872034946253e-05, "loss": 0.2176, "step": 12340 }, { "epoch": 0.2291300780104187, "grad_norm": 0.41091299057006836, "learning_rate": 1.7519103059149264e-05, "loss": 0.2544, "step": 12342 }, { "epoch": 0.22916720814783736, "grad_norm": 0.278972864151001, "learning_rate": 1.751833398103981e-05, "loss": 0.383, "step": 12344 }, { "epoch": 0.229204338285256, "grad_norm": 0.506033182144165, "learning_rate": 1.7517564800628343e-05, "loss": 0.4384, "step": 12346 }, { "epoch": 0.22924146842267462, "grad_norm": 0.48699185252189636, "learning_rate": 1.7516795517925344e-05, "loss": 0.3769, "step": 12348 }, { "epoch": 0.22927859856009328, "grad_norm": 0.366421103477478, "learning_rate": 1.7516026132941266e-05, "loss": 0.2798, "step": 12350 }, { "epoch": 0.2293157286975119, "grad_norm": 0.4105135202407837, "learning_rate": 1.751525664568659e-05, "loss": 0.3215, "step": 12352 }, { "epoch": 0.22935285883493056, "grad_norm": 0.3275894522666931, "learning_rate": 1.751448705617178e-05, "loss": 0.2186, "step": 12354 }, { "epoch": 0.2293899889723492, "grad_norm": 0.3845275938510895, "learning_rate": 1.751371736440731e-05, "loss": 0.2763, "step": 12356 }, { "epoch": 0.22942711910976782, "grad_norm": 0.3953634798526764, "learning_rate": 1.7512947570403654e-05, "loss": 0.3233, "step": 12358 }, { "epoch": 0.22946424924718647, "grad_norm": 0.4476756155490875, "learning_rate": 1.7512177674171282e-05, "loss": 0.4203, "step": 12360 }, { "epoch": 0.2295013793846051, "grad_norm": 0.334692120552063, "learning_rate": 1.7511407675720678e-05, "loss": 0.2407, "step": 12362 }, { "epoch": 0.22953850952202373, "grad_norm": 0.6168070435523987, "learning_rate": 1.7510637575062312e-05, "loss": 0.2661, "step": 12364 }, { "epoch": 0.2295756396594424, "grad_norm": 0.28880152106285095, "learning_rate": 1.7509867372206666e-05, "loss": 0.2695, "step": 12366 }, { "epoch": 0.22961276979686102, "grad_norm": 0.30871060490608215, "learning_rate": 1.750909706716422e-05, "loss": 0.143, "step": 12368 }, { "epoch": 0.22964989993427964, "grad_norm": 0.33034607768058777, "learning_rate": 1.7508326659945457e-05, "loss": 0.4469, "step": 12370 }, { "epoch": 0.2296870300716983, "grad_norm": 0.6231948137283325, "learning_rate": 1.7507556150560856e-05, "loss": 0.3858, "step": 12372 }, { "epoch": 0.22972416020911693, "grad_norm": 0.4673445522785187, "learning_rate": 1.7506785539020904e-05, "loss": 0.3185, "step": 12374 }, { "epoch": 0.22976129034653558, "grad_norm": 0.28963035345077515, "learning_rate": 1.7506014825336086e-05, "loss": 0.3862, "step": 12376 }, { "epoch": 0.2297984204839542, "grad_norm": 0.33484646677970886, "learning_rate": 1.750524400951689e-05, "loss": 0.2071, "step": 12378 }, { "epoch": 0.22983555062137284, "grad_norm": 0.33448272943496704, "learning_rate": 1.7504473091573804e-05, "loss": 0.3293, "step": 12380 }, { "epoch": 0.2298726807587915, "grad_norm": 0.4470660090446472, "learning_rate": 1.7503702071517318e-05, "loss": 0.2937, "step": 12382 }, { "epoch": 0.22990981089621013, "grad_norm": 0.5206077694892883, "learning_rate": 1.7502930949357922e-05, "loss": 0.5043, "step": 12384 }, { "epoch": 0.22994694103362875, "grad_norm": 0.34875521063804626, "learning_rate": 1.750215972510611e-05, "loss": 0.467, "step": 12386 }, { "epoch": 0.2299840711710474, "grad_norm": 0.6374222636222839, "learning_rate": 1.7501388398772373e-05, "loss": 0.1336, "step": 12388 }, { "epoch": 0.23002120130846604, "grad_norm": 0.3302450478076935, "learning_rate": 1.7500616970367216e-05, "loss": 0.2838, "step": 12390 }, { "epoch": 0.2300583314458847, "grad_norm": 0.3730352222919464, "learning_rate": 1.749984543990112e-05, "loss": 0.1782, "step": 12392 }, { "epoch": 0.23009546158330332, "grad_norm": 0.2858547866344452, "learning_rate": 1.7499073807384598e-05, "loss": 0.2981, "step": 12394 }, { "epoch": 0.23013259172072195, "grad_norm": 0.34633609652519226, "learning_rate": 1.7498302072828144e-05, "loss": 0.4711, "step": 12396 }, { "epoch": 0.2301697218581406, "grad_norm": 0.34397944808006287, "learning_rate": 1.7497530236242258e-05, "loss": 0.3962, "step": 12398 }, { "epoch": 0.23020685199555924, "grad_norm": 0.39055126905441284, "learning_rate": 1.7496758297637444e-05, "loss": 0.279, "step": 12400 }, { "epoch": 0.23024398213297786, "grad_norm": 0.4564305245876312, "learning_rate": 1.74959862570242e-05, "loss": 0.38, "step": 12402 }, { "epoch": 0.23028111227039652, "grad_norm": 0.3358359932899475, "learning_rate": 1.7495214114413043e-05, "loss": 0.148, "step": 12404 }, { "epoch": 0.23031824240781515, "grad_norm": 0.3154211640357971, "learning_rate": 1.749444186981447e-05, "loss": 0.4049, "step": 12406 }, { "epoch": 0.23035537254523378, "grad_norm": 0.35937538743019104, "learning_rate": 1.749366952323899e-05, "loss": 0.2327, "step": 12408 }, { "epoch": 0.23039250268265243, "grad_norm": 0.3170214593410492, "learning_rate": 1.7492897074697117e-05, "loss": 0.2995, "step": 12410 }, { "epoch": 0.23042963282007106, "grad_norm": 0.3615940809249878, "learning_rate": 1.7492124524199354e-05, "loss": 0.4211, "step": 12412 }, { "epoch": 0.23046676295748972, "grad_norm": 0.27584195137023926, "learning_rate": 1.7491351871756222e-05, "loss": 0.234, "step": 12414 }, { "epoch": 0.23050389309490835, "grad_norm": 0.3777814507484436, "learning_rate": 1.749057911737823e-05, "loss": 0.3478, "step": 12416 }, { "epoch": 0.23054102323232697, "grad_norm": 0.5194113254547119, "learning_rate": 1.748980626107589e-05, "loss": 0.247, "step": 12418 }, { "epoch": 0.23057815336974563, "grad_norm": 0.34694328904151917, "learning_rate": 1.7489033302859722e-05, "loss": 0.2752, "step": 12420 }, { "epoch": 0.23061528350716426, "grad_norm": 0.35486072301864624, "learning_rate": 1.7488260242740246e-05, "loss": 0.439, "step": 12422 }, { "epoch": 0.2306524136445829, "grad_norm": 0.3262186050415039, "learning_rate": 1.7487487080727975e-05, "loss": 0.2797, "step": 12424 }, { "epoch": 0.23068954378200154, "grad_norm": 0.4208851158618927, "learning_rate": 1.7486713816833433e-05, "loss": 0.312, "step": 12426 }, { "epoch": 0.23072667391942017, "grad_norm": 0.32234078645706177, "learning_rate": 1.7485940451067143e-05, "loss": 0.3958, "step": 12428 }, { "epoch": 0.23076380405683883, "grad_norm": 0.36034539341926575, "learning_rate": 1.7485166983439625e-05, "loss": 0.3528, "step": 12430 }, { "epoch": 0.23080093419425746, "grad_norm": 0.7856832146644592, "learning_rate": 1.7484393413961406e-05, "loss": 0.2883, "step": 12432 }, { "epoch": 0.23083806433167608, "grad_norm": 0.40456244349479675, "learning_rate": 1.748361974264301e-05, "loss": 0.3403, "step": 12434 }, { "epoch": 0.23087519446909474, "grad_norm": 0.42169487476348877, "learning_rate": 1.7482845969494963e-05, "loss": 0.372, "step": 12436 }, { "epoch": 0.23091232460651337, "grad_norm": 0.3566226363182068, "learning_rate": 1.74820720945278e-05, "loss": 0.4594, "step": 12438 }, { "epoch": 0.230949454743932, "grad_norm": 0.673967719078064, "learning_rate": 1.7481298117752046e-05, "loss": 0.3753, "step": 12440 }, { "epoch": 0.23098658488135065, "grad_norm": 0.27730801701545715, "learning_rate": 1.7480524039178234e-05, "loss": 0.303, "step": 12442 }, { "epoch": 0.23102371501876928, "grad_norm": 0.38060349225997925, "learning_rate": 1.7479749858816895e-05, "loss": 0.2556, "step": 12444 }, { "epoch": 0.2310608451561879, "grad_norm": 0.48266640305519104, "learning_rate": 1.7478975576678565e-05, "loss": 0.2467, "step": 12446 }, { "epoch": 0.23109797529360657, "grad_norm": 0.3496796786785126, "learning_rate": 1.7478201192773783e-05, "loss": 0.1936, "step": 12448 }, { "epoch": 0.2311351054310252, "grad_norm": 0.4143736660480499, "learning_rate": 1.747742670711308e-05, "loss": 0.4739, "step": 12450 }, { "epoch": 0.23117223556844385, "grad_norm": 0.34172284603118896, "learning_rate": 1.7476652119706996e-05, "loss": 0.5804, "step": 12452 }, { "epoch": 0.23120936570586248, "grad_norm": 0.41916823387145996, "learning_rate": 1.747587743056607e-05, "loss": 0.408, "step": 12454 }, { "epoch": 0.2312464958432811, "grad_norm": 0.533547580242157, "learning_rate": 1.747510263970085e-05, "loss": 0.1849, "step": 12456 }, { "epoch": 0.23128362598069976, "grad_norm": 0.32442694902420044, "learning_rate": 1.7474327747121874e-05, "loss": 0.2626, "step": 12458 }, { "epoch": 0.2313207561181184, "grad_norm": 0.35257571935653687, "learning_rate": 1.7473552752839682e-05, "loss": 0.3978, "step": 12460 }, { "epoch": 0.23135788625553702, "grad_norm": 0.35884883999824524, "learning_rate": 1.7472777656864825e-05, "loss": 0.3507, "step": 12462 }, { "epoch": 0.23139501639295568, "grad_norm": 0.381747841835022, "learning_rate": 1.7472002459207847e-05, "loss": 0.1963, "step": 12464 }, { "epoch": 0.2314321465303743, "grad_norm": 0.2982572615146637, "learning_rate": 1.7471227159879295e-05, "loss": 0.2763, "step": 12466 }, { "epoch": 0.23146927666779293, "grad_norm": 0.333759069442749, "learning_rate": 1.747045175888972e-05, "loss": 0.3619, "step": 12468 }, { "epoch": 0.2315064068052116, "grad_norm": 0.3700467348098755, "learning_rate": 1.7469676256249677e-05, "loss": 0.2686, "step": 12470 }, { "epoch": 0.23154353694263022, "grad_norm": 0.35166335105895996, "learning_rate": 1.746890065196971e-05, "loss": 0.2595, "step": 12472 }, { "epoch": 0.23158066708004887, "grad_norm": 0.40320897102355957, "learning_rate": 1.7468124946060384e-05, "loss": 0.2864, "step": 12474 }, { "epoch": 0.2316177972174675, "grad_norm": 0.3219544589519501, "learning_rate": 1.746734913853224e-05, "loss": 0.2817, "step": 12476 }, { "epoch": 0.23165492735488613, "grad_norm": 0.39119938015937805, "learning_rate": 1.7466573229395844e-05, "loss": 0.2267, "step": 12478 }, { "epoch": 0.2316920574923048, "grad_norm": 0.30432385206222534, "learning_rate": 1.746579721866175e-05, "loss": 0.3832, "step": 12480 }, { "epoch": 0.23172918762972342, "grad_norm": 0.4062613844871521, "learning_rate": 1.746502110634052e-05, "loss": 0.3976, "step": 12482 }, { "epoch": 0.23176631776714204, "grad_norm": 0.29385146498680115, "learning_rate": 1.7464244892442714e-05, "loss": 0.1382, "step": 12484 }, { "epoch": 0.2318034479045607, "grad_norm": 0.2695964276790619, "learning_rate": 1.7463468576978893e-05, "loss": 0.5599, "step": 12486 }, { "epoch": 0.23184057804197933, "grad_norm": 0.7436281442642212, "learning_rate": 1.746269215995962e-05, "loss": 0.4025, "step": 12488 }, { "epoch": 0.23187770817939798, "grad_norm": 0.35808977484703064, "learning_rate": 1.746191564139546e-05, "loss": 0.3018, "step": 12490 }, { "epoch": 0.2319148383168166, "grad_norm": 0.2579288184642792, "learning_rate": 1.7461139021296974e-05, "loss": 0.2899, "step": 12492 }, { "epoch": 0.23195196845423524, "grad_norm": 0.31625860929489136, "learning_rate": 1.7460362299674743e-05, "loss": 0.1102, "step": 12494 }, { "epoch": 0.2319890985916539, "grad_norm": 0.3438818156719208, "learning_rate": 1.7459585476539324e-05, "loss": 0.3074, "step": 12496 }, { "epoch": 0.23202622872907253, "grad_norm": 0.4542659521102905, "learning_rate": 1.7458808551901286e-05, "loss": 0.3865, "step": 12498 }, { "epoch": 0.23206335886649115, "grad_norm": 0.2878901958465576, "learning_rate": 1.7458031525771212e-05, "loss": 0.1274, "step": 12500 }, { "epoch": 0.2321004890039098, "grad_norm": 0.3579825162887573, "learning_rate": 1.7457254398159665e-05, "loss": 0.2869, "step": 12502 }, { "epoch": 0.23213761914132844, "grad_norm": 0.4130638539791107, "learning_rate": 1.7456477169077228e-05, "loss": 0.3883, "step": 12504 }, { "epoch": 0.23217474927874707, "grad_norm": 0.37749090790748596, "learning_rate": 1.7455699838534463e-05, "loss": 0.4673, "step": 12506 }, { "epoch": 0.23221187941616572, "grad_norm": 0.33898088335990906, "learning_rate": 1.745492240654196e-05, "loss": 0.2841, "step": 12508 }, { "epoch": 0.23224900955358435, "grad_norm": 0.34774860739707947, "learning_rate": 1.7454144873110293e-05, "loss": 0.1943, "step": 12510 }, { "epoch": 0.232286139691003, "grad_norm": 0.28457504510879517, "learning_rate": 1.745336723825004e-05, "loss": 0.3138, "step": 12512 }, { "epoch": 0.23232326982842164, "grad_norm": 0.3134916126728058, "learning_rate": 1.745258950197179e-05, "loss": 0.3829, "step": 12514 }, { "epoch": 0.23236039996584026, "grad_norm": 0.36652836203575134, "learning_rate": 1.7451811664286115e-05, "loss": 0.2887, "step": 12516 }, { "epoch": 0.23239753010325892, "grad_norm": 0.45158714056015015, "learning_rate": 1.7451033725203602e-05, "loss": 0.3936, "step": 12518 }, { "epoch": 0.23243466024067755, "grad_norm": 0.3174259662628174, "learning_rate": 1.7450255684734846e-05, "loss": 0.1917, "step": 12520 }, { "epoch": 0.23247179037809618, "grad_norm": 0.47589996457099915, "learning_rate": 1.744947754289042e-05, "loss": 0.3594, "step": 12522 }, { "epoch": 0.23250892051551483, "grad_norm": 0.4630495309829712, "learning_rate": 1.744869929968092e-05, "loss": 0.1076, "step": 12524 }, { "epoch": 0.23254605065293346, "grad_norm": 0.30611947178840637, "learning_rate": 1.7447920955116934e-05, "loss": 0.3806, "step": 12526 }, { "epoch": 0.23258318079035212, "grad_norm": 0.4588335156440735, "learning_rate": 1.744714250920905e-05, "loss": 0.2879, "step": 12528 }, { "epoch": 0.23262031092777075, "grad_norm": 0.4138917624950409, "learning_rate": 1.744636396196787e-05, "loss": 0.3003, "step": 12530 }, { "epoch": 0.23265744106518937, "grad_norm": 0.5048940777778625, "learning_rate": 1.7445585313403976e-05, "loss": 0.3434, "step": 12532 }, { "epoch": 0.23269457120260803, "grad_norm": 0.22947223484516144, "learning_rate": 1.744480656352797e-05, "loss": 0.2576, "step": 12534 }, { "epoch": 0.23273170134002666, "grad_norm": 0.4218013882637024, "learning_rate": 1.7444027712350448e-05, "loss": 0.2409, "step": 12536 }, { "epoch": 0.2327688314774453, "grad_norm": 0.39741289615631104, "learning_rate": 1.7443248759882002e-05, "loss": 0.4571, "step": 12538 }, { "epoch": 0.23280596161486394, "grad_norm": 0.33087971806526184, "learning_rate": 1.7442469706133234e-05, "loss": 0.4341, "step": 12540 }, { "epoch": 0.23284309175228257, "grad_norm": 0.30327117443084717, "learning_rate": 1.7441690551114752e-05, "loss": 0.3528, "step": 12542 }, { "epoch": 0.2328802218897012, "grad_norm": 0.4206446409225464, "learning_rate": 1.744091129483715e-05, "loss": 0.322, "step": 12544 }, { "epoch": 0.23291735202711986, "grad_norm": 0.38756147027015686, "learning_rate": 1.7440131937311034e-05, "loss": 0.4019, "step": 12546 }, { "epoch": 0.23295448216453848, "grad_norm": 0.3450041711330414, "learning_rate": 1.7439352478547008e-05, "loss": 0.3146, "step": 12548 }, { "epoch": 0.23299161230195714, "grad_norm": 0.34918153285980225, "learning_rate": 1.7438572918555677e-05, "loss": 0.3231, "step": 12550 }, { "epoch": 0.23302874243937577, "grad_norm": 0.29507169127464294, "learning_rate": 1.743779325734765e-05, "loss": 0.4331, "step": 12552 }, { "epoch": 0.2330658725767944, "grad_norm": 0.5163176655769348, "learning_rate": 1.7437013494933533e-05, "loss": 0.3267, "step": 12554 }, { "epoch": 0.23310300271421305, "grad_norm": 0.40732666850090027, "learning_rate": 1.7436233631323943e-05, "loss": 0.1744, "step": 12556 }, { "epoch": 0.23314013285163168, "grad_norm": 0.3646830916404724, "learning_rate": 1.743545366652949e-05, "loss": 0.3587, "step": 12558 }, { "epoch": 0.2331772629890503, "grad_norm": 0.4157034456729889, "learning_rate": 1.7434673600560773e-05, "loss": 0.3563, "step": 12560 }, { "epoch": 0.23321439312646897, "grad_norm": 0.32352957129478455, "learning_rate": 1.743389343342843e-05, "loss": 0.1657, "step": 12562 }, { "epoch": 0.2332515232638876, "grad_norm": 0.35385289788246155, "learning_rate": 1.7433113165143056e-05, "loss": 0.2354, "step": 12564 }, { "epoch": 0.23328865340130625, "grad_norm": 0.26118889451026917, "learning_rate": 1.743233279571528e-05, "loss": 0.4764, "step": 12566 }, { "epoch": 0.23332578353872488, "grad_norm": 0.34386008977890015, "learning_rate": 1.7431552325155718e-05, "loss": 0.3517, "step": 12568 }, { "epoch": 0.2333629136761435, "grad_norm": 0.3405663073062897, "learning_rate": 1.7430771753474987e-05, "loss": 0.3109, "step": 12570 }, { "epoch": 0.23340004381356216, "grad_norm": 0.2750672698020935, "learning_rate": 1.742999108068371e-05, "loss": 0.2927, "step": 12572 }, { "epoch": 0.2334371739509808, "grad_norm": 0.4655449688434601, "learning_rate": 1.742921030679251e-05, "loss": 0.1777, "step": 12574 }, { "epoch": 0.23347430408839942, "grad_norm": 0.28576651215553284, "learning_rate": 1.7428429431812013e-05, "loss": 0.3276, "step": 12576 }, { "epoch": 0.23351143422581808, "grad_norm": 0.5428327918052673, "learning_rate": 1.742764845575284e-05, "loss": 0.2782, "step": 12578 }, { "epoch": 0.2335485643632367, "grad_norm": 0.4101560413837433, "learning_rate": 1.7426867378625622e-05, "loss": 0.293, "step": 12580 }, { "epoch": 0.23358569450065533, "grad_norm": 0.41373971104621887, "learning_rate": 1.7426086200440983e-05, "loss": 0.2911, "step": 12582 }, { "epoch": 0.233622824638074, "grad_norm": 0.4218546152114868, "learning_rate": 1.7425304921209555e-05, "loss": 0.6767, "step": 12584 }, { "epoch": 0.23365995477549262, "grad_norm": 0.3192867040634155, "learning_rate": 1.7424523540941968e-05, "loss": 0.3068, "step": 12586 }, { "epoch": 0.23369708491291127, "grad_norm": 0.27396726608276367, "learning_rate": 1.7423742059648855e-05, "loss": 0.2926, "step": 12588 }, { "epoch": 0.2337342150503299, "grad_norm": 0.30776506662368774, "learning_rate": 1.742296047734085e-05, "loss": 0.1969, "step": 12590 }, { "epoch": 0.23377134518774853, "grad_norm": 1.1420944929122925, "learning_rate": 1.7422178794028587e-05, "loss": 0.4351, "step": 12592 }, { "epoch": 0.2338084753251672, "grad_norm": 0.34347182512283325, "learning_rate": 1.74213970097227e-05, "loss": 0.2518, "step": 12594 }, { "epoch": 0.23384560546258581, "grad_norm": 0.37608620524406433, "learning_rate": 1.7420615124433834e-05, "loss": 0.4885, "step": 12596 }, { "epoch": 0.23388273560000444, "grad_norm": 0.3722425103187561, "learning_rate": 1.7419833138172618e-05, "loss": 0.2205, "step": 12598 }, { "epoch": 0.2339198657374231, "grad_norm": 0.3573843836784363, "learning_rate": 1.7419051050949703e-05, "loss": 0.1322, "step": 12600 }, { "epoch": 0.23395699587484173, "grad_norm": 0.3446076214313507, "learning_rate": 1.7418268862775723e-05, "loss": 0.3544, "step": 12602 }, { "epoch": 0.23399412601226038, "grad_norm": 0.4843568801879883, "learning_rate": 1.7417486573661326e-05, "loss": 0.3412, "step": 12604 }, { "epoch": 0.234031256149679, "grad_norm": 0.30958664417266846, "learning_rate": 1.7416704183617153e-05, "loss": 0.5088, "step": 12606 }, { "epoch": 0.23406838628709764, "grad_norm": 0.34208205342292786, "learning_rate": 1.7415921692653852e-05, "loss": 0.1449, "step": 12608 }, { "epoch": 0.2341055164245163, "grad_norm": 0.4737250506877899, "learning_rate": 1.741513910078207e-05, "loss": 0.3077, "step": 12610 }, { "epoch": 0.23414264656193493, "grad_norm": 0.3565233647823334, "learning_rate": 1.7414356408012457e-05, "loss": 0.1516, "step": 12612 }, { "epoch": 0.23417977669935355, "grad_norm": 0.5153803825378418, "learning_rate": 1.741357361435566e-05, "loss": 0.3821, "step": 12614 }, { "epoch": 0.2342169068367722, "grad_norm": 0.2750087380409241, "learning_rate": 1.7412790719822334e-05, "loss": 0.3858, "step": 12616 }, { "epoch": 0.23425403697419084, "grad_norm": 0.47064000368118286, "learning_rate": 1.741200772442313e-05, "loss": 0.2999, "step": 12618 }, { "epoch": 0.23429116711160947, "grad_norm": 0.4685410261154175, "learning_rate": 1.7411224628168703e-05, "loss": 0.3678, "step": 12620 }, { "epoch": 0.23432829724902812, "grad_norm": 0.6276955604553223, "learning_rate": 1.7410441431069704e-05, "loss": 0.4268, "step": 12622 }, { "epoch": 0.23436542738644675, "grad_norm": 0.28432828187942505, "learning_rate": 1.7409658133136797e-05, "loss": 0.2987, "step": 12624 }, { "epoch": 0.2344025575238654, "grad_norm": 0.31281784176826477, "learning_rate": 1.740887473438064e-05, "loss": 0.3165, "step": 12626 }, { "epoch": 0.23443968766128404, "grad_norm": 0.3922826945781708, "learning_rate": 1.7408091234811887e-05, "loss": 0.2571, "step": 12628 }, { "epoch": 0.23447681779870266, "grad_norm": 0.32701578736305237, "learning_rate": 1.74073076344412e-05, "loss": 0.2985, "step": 12630 }, { "epoch": 0.23451394793612132, "grad_norm": 0.4044848084449768, "learning_rate": 1.7406523933279244e-05, "loss": 0.2234, "step": 12632 }, { "epoch": 0.23455107807353995, "grad_norm": 0.33668404817581177, "learning_rate": 1.7405740131336685e-05, "loss": 0.3202, "step": 12634 }, { "epoch": 0.23458820821095858, "grad_norm": 0.32177063822746277, "learning_rate": 1.7404956228624187e-05, "loss": 0.308, "step": 12636 }, { "epoch": 0.23462533834837723, "grad_norm": 0.3719465732574463, "learning_rate": 1.740417222515241e-05, "loss": 0.3683, "step": 12638 }, { "epoch": 0.23466246848579586, "grad_norm": 0.41589921712875366, "learning_rate": 1.740338812093203e-05, "loss": 0.3301, "step": 12640 }, { "epoch": 0.23469959862321452, "grad_norm": 0.9213447570800781, "learning_rate": 1.7402603915973714e-05, "loss": 0.5306, "step": 12642 }, { "epoch": 0.23473672876063315, "grad_norm": 0.5603246092796326, "learning_rate": 1.740181961028813e-05, "loss": 0.2628, "step": 12644 }, { "epoch": 0.23477385889805177, "grad_norm": 0.42980051040649414, "learning_rate": 1.7401035203885954e-05, "loss": 0.4079, "step": 12646 }, { "epoch": 0.23481098903547043, "grad_norm": 0.3186267018318176, "learning_rate": 1.7400250696777857e-05, "loss": 0.36, "step": 12648 }, { "epoch": 0.23484811917288906, "grad_norm": 0.35135483741760254, "learning_rate": 1.7399466088974515e-05, "loss": 0.136, "step": 12650 }, { "epoch": 0.2348852493103077, "grad_norm": 0.3205353617668152, "learning_rate": 1.73986813804866e-05, "loss": 0.2567, "step": 12652 }, { "epoch": 0.23492237944772634, "grad_norm": 0.3409160077571869, "learning_rate": 1.73978965713248e-05, "loss": 0.2062, "step": 12654 }, { "epoch": 0.23495950958514497, "grad_norm": 0.27931809425354004, "learning_rate": 1.7397111661499782e-05, "loss": 0.4996, "step": 12656 }, { "epoch": 0.2349966397225636, "grad_norm": 0.38363218307495117, "learning_rate": 1.739632665102223e-05, "loss": 0.2805, "step": 12658 }, { "epoch": 0.23503376985998226, "grad_norm": 0.3391798436641693, "learning_rate": 1.739554153990283e-05, "loss": 0.2609, "step": 12660 }, { "epoch": 0.23507089999740088, "grad_norm": 0.2957044839859009, "learning_rate": 1.739475632815226e-05, "loss": 0.1946, "step": 12662 }, { "epoch": 0.23510803013481954, "grad_norm": 0.4695781171321869, "learning_rate": 1.739397101578121e-05, "loss": 0.4279, "step": 12664 }, { "epoch": 0.23514516027223817, "grad_norm": 0.2800712585449219, "learning_rate": 1.739318560280036e-05, "loss": 0.285, "step": 12666 }, { "epoch": 0.2351822904096568, "grad_norm": 0.23228482902050018, "learning_rate": 1.7392400089220392e-05, "loss": 0.3475, "step": 12668 }, { "epoch": 0.23521942054707545, "grad_norm": 0.42808568477630615, "learning_rate": 1.739161447505201e-05, "loss": 0.2079, "step": 12670 }, { "epoch": 0.23525655068449408, "grad_norm": 0.48333513736724854, "learning_rate": 1.7390828760305894e-05, "loss": 0.4686, "step": 12672 }, { "epoch": 0.2352936808219127, "grad_norm": 0.38416048884391785, "learning_rate": 1.7390042944992734e-05, "loss": 0.1864, "step": 12674 }, { "epoch": 0.23533081095933137, "grad_norm": 0.2735489308834076, "learning_rate": 1.7389257029123228e-05, "loss": 0.4269, "step": 12676 }, { "epoch": 0.23536794109675, "grad_norm": 0.4203188419342041, "learning_rate": 1.7388471012708068e-05, "loss": 0.3147, "step": 12678 }, { "epoch": 0.23540507123416865, "grad_norm": 0.3212920129299164, "learning_rate": 1.7387684895757947e-05, "loss": 0.2707, "step": 12680 }, { "epoch": 0.23544220137158728, "grad_norm": 0.3601970076560974, "learning_rate": 1.7386898678283563e-05, "loss": 0.2869, "step": 12682 }, { "epoch": 0.2354793315090059, "grad_norm": 0.2809280753135681, "learning_rate": 1.7386112360295614e-05, "loss": 0.1522, "step": 12684 }, { "epoch": 0.23551646164642456, "grad_norm": 0.2453979104757309, "learning_rate": 1.7385325941804797e-05, "loss": 0.3479, "step": 12686 }, { "epoch": 0.2355535917838432, "grad_norm": 0.3738706707954407, "learning_rate": 1.738453942282182e-05, "loss": 0.4889, "step": 12688 }, { "epoch": 0.23559072192126182, "grad_norm": 0.4311172664165497, "learning_rate": 1.738375280335738e-05, "loss": 0.3496, "step": 12690 }, { "epoch": 0.23562785205868048, "grad_norm": 0.30339083075523376, "learning_rate": 1.738296608342218e-05, "loss": 0.4413, "step": 12692 }, { "epoch": 0.2356649821960991, "grad_norm": 0.4342459738254547, "learning_rate": 1.7382179263026926e-05, "loss": 0.2265, "step": 12694 }, { "epoch": 0.23570211233351773, "grad_norm": 0.4688303768634796, "learning_rate": 1.7381392342182328e-05, "loss": 0.3414, "step": 12696 }, { "epoch": 0.2357392424709364, "grad_norm": 0.25577959418296814, "learning_rate": 1.7380605320899087e-05, "loss": 0.5067, "step": 12698 }, { "epoch": 0.23577637260835502, "grad_norm": 0.43003860116004944, "learning_rate": 1.7379818199187914e-05, "loss": 0.2793, "step": 12700 }, { "epoch": 0.23581350274577367, "grad_norm": 0.3236466944217682, "learning_rate": 1.737903097705952e-05, "loss": 0.2849, "step": 12702 }, { "epoch": 0.2358506328831923, "grad_norm": 0.5448622107505798, "learning_rate": 1.7378243654524622e-05, "loss": 0.4428, "step": 12704 }, { "epoch": 0.23588776302061093, "grad_norm": 0.36351892352104187, "learning_rate": 1.7377456231593923e-05, "loss": 0.37, "step": 12706 }, { "epoch": 0.23592489315802959, "grad_norm": 0.34030577540397644, "learning_rate": 1.7376668708278144e-05, "loss": 0.2143, "step": 12708 }, { "epoch": 0.23596202329544821, "grad_norm": 0.4404785633087158, "learning_rate": 1.7375881084588e-05, "loss": 0.4456, "step": 12710 }, { "epoch": 0.23599915343286684, "grad_norm": 0.37547069787979126, "learning_rate": 1.737509336053421e-05, "loss": 0.2474, "step": 12712 }, { "epoch": 0.2360362835702855, "grad_norm": 0.4028439521789551, "learning_rate": 1.7374305536127486e-05, "loss": 0.3614, "step": 12714 }, { "epoch": 0.23607341370770413, "grad_norm": 0.4768480658531189, "learning_rate": 1.7373517611378558e-05, "loss": 0.2723, "step": 12716 }, { "epoch": 0.23611054384512278, "grad_norm": 0.3648746609687805, "learning_rate": 1.7372729586298137e-05, "loss": 0.2954, "step": 12718 }, { "epoch": 0.2361476739825414, "grad_norm": 0.5499078631401062, "learning_rate": 1.737194146089695e-05, "loss": 0.285, "step": 12720 }, { "epoch": 0.23618480411996004, "grad_norm": 0.3659633696079254, "learning_rate": 1.7371153235185724e-05, "loss": 0.2028, "step": 12722 }, { "epoch": 0.2362219342573787, "grad_norm": 0.42026105523109436, "learning_rate": 1.737036490917518e-05, "loss": 0.358, "step": 12724 }, { "epoch": 0.23625906439479732, "grad_norm": 0.4046187400817871, "learning_rate": 1.7369576482876046e-05, "loss": 0.4486, "step": 12726 }, { "epoch": 0.23629619453221595, "grad_norm": 0.4823373854160309, "learning_rate": 1.7368787956299052e-05, "loss": 0.1377, "step": 12728 }, { "epoch": 0.2363333246696346, "grad_norm": 0.29686641693115234, "learning_rate": 1.7367999329454926e-05, "loss": 0.2842, "step": 12730 }, { "epoch": 0.23637045480705324, "grad_norm": 0.41739293932914734, "learning_rate": 1.7367210602354395e-05, "loss": 0.3491, "step": 12732 }, { "epoch": 0.23640758494447187, "grad_norm": 0.38745439052581787, "learning_rate": 1.7366421775008202e-05, "loss": 0.459, "step": 12734 }, { "epoch": 0.23644471508189052, "grad_norm": 0.4241142272949219, "learning_rate": 1.7365632847427068e-05, "loss": 0.2465, "step": 12736 }, { "epoch": 0.23648184521930915, "grad_norm": 0.4007609486579895, "learning_rate": 1.7364843819621736e-05, "loss": 0.2899, "step": 12738 }, { "epoch": 0.2365189753567278, "grad_norm": 0.5675888657569885, "learning_rate": 1.736405469160294e-05, "loss": 0.3971, "step": 12740 }, { "epoch": 0.23655610549414643, "grad_norm": 0.3920831084251404, "learning_rate": 1.7363265463381416e-05, "loss": 0.3328, "step": 12742 }, { "epoch": 0.23659323563156506, "grad_norm": 0.37745344638824463, "learning_rate": 1.7362476134967906e-05, "loss": 0.4852, "step": 12744 }, { "epoch": 0.23663036576898372, "grad_norm": 0.3095090389251709, "learning_rate": 1.7361686706373147e-05, "loss": 0.3429, "step": 12746 }, { "epoch": 0.23666749590640235, "grad_norm": 0.696565568447113, "learning_rate": 1.7360897177607885e-05, "loss": 0.2833, "step": 12748 }, { "epoch": 0.23670462604382098, "grad_norm": 0.38258102536201477, "learning_rate": 1.736010754868286e-05, "loss": 0.3988, "step": 12750 }, { "epoch": 0.23674175618123963, "grad_norm": 0.4913133680820465, "learning_rate": 1.7359317819608814e-05, "loss": 0.283, "step": 12752 }, { "epoch": 0.23677888631865826, "grad_norm": 0.40903839468955994, "learning_rate": 1.73585279903965e-05, "loss": 0.5389, "step": 12754 }, { "epoch": 0.23681601645607692, "grad_norm": 0.36749300360679626, "learning_rate": 1.7357738061056665e-05, "loss": 0.5194, "step": 12756 }, { "epoch": 0.23685314659349554, "grad_norm": 0.4181799590587616, "learning_rate": 1.7356948031600046e-05, "loss": 0.3729, "step": 12758 }, { "epoch": 0.23689027673091417, "grad_norm": 0.4059695601463318, "learning_rate": 1.7356157902037404e-05, "loss": 0.4474, "step": 12760 }, { "epoch": 0.23692740686833283, "grad_norm": 0.21081207692623138, "learning_rate": 1.735536767237949e-05, "loss": 0.1456, "step": 12762 }, { "epoch": 0.23696453700575146, "grad_norm": 0.4715377688407898, "learning_rate": 1.735457734263705e-05, "loss": 0.2404, "step": 12764 }, { "epoch": 0.23700166714317009, "grad_norm": 0.3792320489883423, "learning_rate": 1.7353786912820846e-05, "loss": 0.1972, "step": 12766 }, { "epoch": 0.23703879728058874, "grad_norm": 0.333168089389801, "learning_rate": 1.7352996382941624e-05, "loss": 0.229, "step": 12768 }, { "epoch": 0.23707592741800737, "grad_norm": 0.5305964350700378, "learning_rate": 1.735220575301015e-05, "loss": 0.5028, "step": 12770 }, { "epoch": 0.237113057555426, "grad_norm": 0.2878890335559845, "learning_rate": 1.7351415023037178e-05, "loss": 0.2405, "step": 12772 }, { "epoch": 0.23715018769284466, "grad_norm": 0.2512670159339905, "learning_rate": 1.7350624193033464e-05, "loss": 0.3051, "step": 12774 }, { "epoch": 0.23718731783026328, "grad_norm": 0.35368621349334717, "learning_rate": 1.7349833263009776e-05, "loss": 0.2299, "step": 12776 }, { "epoch": 0.23722444796768194, "grad_norm": 0.8200502395629883, "learning_rate": 1.734904223297687e-05, "loss": 0.2192, "step": 12778 }, { "epoch": 0.23726157810510057, "grad_norm": 0.367302805185318, "learning_rate": 1.734825110294552e-05, "loss": 0.4873, "step": 12780 }, { "epoch": 0.2372987082425192, "grad_norm": 0.3810739815235138, "learning_rate": 1.734745987292647e-05, "loss": 0.2621, "step": 12782 }, { "epoch": 0.23733583837993785, "grad_norm": 0.37058472633361816, "learning_rate": 1.7346668542930508e-05, "loss": 0.2029, "step": 12784 }, { "epoch": 0.23737296851735648, "grad_norm": 0.3579244017601013, "learning_rate": 1.734587711296839e-05, "loss": 0.2997, "step": 12786 }, { "epoch": 0.2374100986547751, "grad_norm": 0.37096911668777466, "learning_rate": 1.734508558305089e-05, "loss": 0.3205, "step": 12788 }, { "epoch": 0.23744722879219377, "grad_norm": 0.30552271008491516, "learning_rate": 1.7344293953188775e-05, "loss": 0.4221, "step": 12790 }, { "epoch": 0.2374843589296124, "grad_norm": 0.4109688699245453, "learning_rate": 1.734350222339282e-05, "loss": 0.4132, "step": 12792 }, { "epoch": 0.23752148906703105, "grad_norm": 0.6047911643981934, "learning_rate": 1.7342710393673795e-05, "loss": 0.4507, "step": 12794 }, { "epoch": 0.23755861920444968, "grad_norm": 0.29877474904060364, "learning_rate": 1.7341918464042474e-05, "loss": 0.4751, "step": 12796 }, { "epoch": 0.2375957493418683, "grad_norm": 0.44438010454177856, "learning_rate": 1.7341126434509633e-05, "loss": 0.366, "step": 12798 }, { "epoch": 0.23763287947928696, "grad_norm": 0.43472474813461304, "learning_rate": 1.7340334305086052e-05, "loss": 0.353, "step": 12800 }, { "epoch": 0.2376700096167056, "grad_norm": 0.28846633434295654, "learning_rate": 1.733954207578251e-05, "loss": 0.3997, "step": 12802 }, { "epoch": 0.23770713975412422, "grad_norm": 0.3043912351131439, "learning_rate": 1.7338749746609783e-05, "loss": 0.4526, "step": 12804 }, { "epoch": 0.23774426989154288, "grad_norm": 0.41464704275131226, "learning_rate": 1.7337957317578654e-05, "loss": 0.1918, "step": 12806 }, { "epoch": 0.2377814000289615, "grad_norm": 0.30406907200813293, "learning_rate": 1.7337164788699908e-05, "loss": 0.2603, "step": 12808 }, { "epoch": 0.23781853016638013, "grad_norm": 0.33571094274520874, "learning_rate": 1.7336372159984323e-05, "loss": 0.4518, "step": 12810 }, { "epoch": 0.2378556603037988, "grad_norm": 0.3648044168949127, "learning_rate": 1.733557943144269e-05, "loss": 0.3453, "step": 12812 }, { "epoch": 0.23789279044121742, "grad_norm": 0.2715219557285309, "learning_rate": 1.7334786603085792e-05, "loss": 0.4085, "step": 12814 }, { "epoch": 0.23792992057863607, "grad_norm": 0.24025827646255493, "learning_rate": 1.733399367492442e-05, "loss": 0.2742, "step": 12816 }, { "epoch": 0.2379670507160547, "grad_norm": 0.3469150960445404, "learning_rate": 1.7333200646969358e-05, "loss": 0.2603, "step": 12818 }, { "epoch": 0.23800418085347333, "grad_norm": 0.364886611700058, "learning_rate": 1.7332407519231406e-05, "loss": 0.4342, "step": 12820 }, { "epoch": 0.23804131099089199, "grad_norm": 0.3781435191631317, "learning_rate": 1.733161429172135e-05, "loss": 0.3451, "step": 12822 }, { "epoch": 0.23807844112831061, "grad_norm": 0.24098388850688934, "learning_rate": 1.7330820964449984e-05, "loss": 0.2947, "step": 12824 }, { "epoch": 0.23811557126572924, "grad_norm": 0.3040056526660919, "learning_rate": 1.73300275374281e-05, "loss": 0.4029, "step": 12826 }, { "epoch": 0.2381527014031479, "grad_norm": 0.457302451133728, "learning_rate": 1.7329234010666503e-05, "loss": 0.1853, "step": 12828 }, { "epoch": 0.23818983154056653, "grad_norm": 0.38976603746414185, "learning_rate": 1.732844038417598e-05, "loss": 0.324, "step": 12830 }, { "epoch": 0.23822696167798518, "grad_norm": 0.38723477721214294, "learning_rate": 1.7327646657967335e-05, "loss": 0.4528, "step": 12832 }, { "epoch": 0.2382640918154038, "grad_norm": 0.38451531529426575, "learning_rate": 1.7326852832051368e-05, "loss": 0.2307, "step": 12834 }, { "epoch": 0.23830122195282244, "grad_norm": 0.41715332865715027, "learning_rate": 1.732605890643888e-05, "loss": 0.2744, "step": 12836 }, { "epoch": 0.2383383520902411, "grad_norm": 0.5411593317985535, "learning_rate": 1.7325264881140677e-05, "loss": 0.2953, "step": 12838 }, { "epoch": 0.23837548222765972, "grad_norm": 0.4014984369277954, "learning_rate": 1.7324470756167557e-05, "loss": 0.2533, "step": 12840 }, { "epoch": 0.23841261236507835, "grad_norm": 0.42186856269836426, "learning_rate": 1.732367653153033e-05, "loss": 0.2571, "step": 12842 }, { "epoch": 0.238449742502497, "grad_norm": 0.39245909452438354, "learning_rate": 1.7322882207239808e-05, "loss": 0.3315, "step": 12844 }, { "epoch": 0.23848687263991564, "grad_norm": 0.3485632836818695, "learning_rate": 1.732208778330679e-05, "loss": 0.4277, "step": 12846 }, { "epoch": 0.23852400277733427, "grad_norm": 0.4353030025959015, "learning_rate": 1.7321293259742088e-05, "loss": 0.2823, "step": 12848 }, { "epoch": 0.23856113291475292, "grad_norm": 0.3382247984409332, "learning_rate": 1.7320498636556514e-05, "loss": 0.4936, "step": 12850 }, { "epoch": 0.23859826305217155, "grad_norm": 0.36883148550987244, "learning_rate": 1.7319703913760883e-05, "loss": 0.1888, "step": 12852 }, { "epoch": 0.2386353931895902, "grad_norm": 0.4192979633808136, "learning_rate": 1.7318909091366004e-05, "loss": 0.2758, "step": 12854 }, { "epoch": 0.23867252332700883, "grad_norm": 0.3251652121543884, "learning_rate": 1.7318114169382697e-05, "loss": 0.3805, "step": 12856 }, { "epoch": 0.23870965346442746, "grad_norm": 0.38844212889671326, "learning_rate": 1.7317319147821777e-05, "loss": 0.4159, "step": 12858 }, { "epoch": 0.23874678360184612, "grad_norm": 0.30372580885887146, "learning_rate": 1.7316524026694062e-05, "loss": 0.2436, "step": 12860 }, { "epoch": 0.23878391373926475, "grad_norm": 0.3229014575481415, "learning_rate": 1.731572880601037e-05, "loss": 0.5588, "step": 12862 }, { "epoch": 0.23882104387668338, "grad_norm": 0.2849932312965393, "learning_rate": 1.731493348578152e-05, "loss": 0.193, "step": 12864 }, { "epoch": 0.23885817401410203, "grad_norm": 0.5650273561477661, "learning_rate": 1.731413806601834e-05, "loss": 0.3707, "step": 12866 }, { "epoch": 0.23889530415152066, "grad_norm": 0.3212253451347351, "learning_rate": 1.7313342546731643e-05, "loss": 0.3246, "step": 12868 }, { "epoch": 0.23893243428893932, "grad_norm": 0.2947758138179779, "learning_rate": 1.7312546927932264e-05, "loss": 0.2349, "step": 12870 }, { "epoch": 0.23896956442635794, "grad_norm": 0.43986088037490845, "learning_rate": 1.7311751209631027e-05, "loss": 0.4349, "step": 12872 }, { "epoch": 0.23900669456377657, "grad_norm": 0.2511743903160095, "learning_rate": 1.7310955391838754e-05, "loss": 0.4066, "step": 12874 }, { "epoch": 0.23904382470119523, "grad_norm": 0.36121439933776855, "learning_rate": 1.7310159474566275e-05, "loss": 0.2057, "step": 12876 }, { "epoch": 0.23908095483861386, "grad_norm": 0.37918469309806824, "learning_rate": 1.7309363457824428e-05, "loss": 0.2967, "step": 12878 }, { "epoch": 0.23911808497603249, "grad_norm": 0.48838090896606445, "learning_rate": 1.7308567341624033e-05, "loss": 0.3519, "step": 12880 }, { "epoch": 0.23915521511345114, "grad_norm": 0.2849925756454468, "learning_rate": 1.7307771125975935e-05, "loss": 0.3813, "step": 12882 }, { "epoch": 0.23919234525086977, "grad_norm": 0.3952077627182007, "learning_rate": 1.7306974810890954e-05, "loss": 0.4064, "step": 12884 }, { "epoch": 0.2392294753882884, "grad_norm": 0.5538647174835205, "learning_rate": 1.7306178396379937e-05, "loss": 0.151, "step": 12886 }, { "epoch": 0.23926660552570705, "grad_norm": 0.3408423662185669, "learning_rate": 1.7305381882453717e-05, "loss": 0.3177, "step": 12888 }, { "epoch": 0.23930373566312568, "grad_norm": 0.20129266381263733, "learning_rate": 1.7304585269123126e-05, "loss": 0.3393, "step": 12890 }, { "epoch": 0.23934086580054434, "grad_norm": 0.4235449433326721, "learning_rate": 1.730378855639901e-05, "loss": 0.4103, "step": 12892 }, { "epoch": 0.23937799593796297, "grad_norm": 0.4500958323478699, "learning_rate": 1.7302991744292214e-05, "loss": 0.3064, "step": 12894 }, { "epoch": 0.2394151260753816, "grad_norm": 0.2916750907897949, "learning_rate": 1.7302194832813577e-05, "loss": 0.4196, "step": 12896 }, { "epoch": 0.23945225621280025, "grad_norm": 0.2420589029788971, "learning_rate": 1.7301397821973937e-05, "loss": 0.37, "step": 12898 }, { "epoch": 0.23948938635021888, "grad_norm": 0.5080774426460266, "learning_rate": 1.7300600711784142e-05, "loss": 0.4509, "step": 12900 }, { "epoch": 0.2395265164876375, "grad_norm": 0.35668542981147766, "learning_rate": 1.7299803502255042e-05, "loss": 0.407, "step": 12902 }, { "epoch": 0.23956364662505616, "grad_norm": 0.2377709001302719, "learning_rate": 1.7299006193397478e-05, "loss": 0.0846, "step": 12904 }, { "epoch": 0.2396007767624748, "grad_norm": 0.30553528666496277, "learning_rate": 1.7298208785222305e-05, "loss": 0.1847, "step": 12906 }, { "epoch": 0.23963790689989345, "grad_norm": 2.6624503135681152, "learning_rate": 1.7297411277740374e-05, "loss": 0.433, "step": 12908 }, { "epoch": 0.23967503703731208, "grad_norm": 0.5066721439361572, "learning_rate": 1.7296613670962528e-05, "loss": 0.235, "step": 12910 }, { "epoch": 0.2397121671747307, "grad_norm": 0.3370498716831207, "learning_rate": 1.729581596489963e-05, "loss": 0.3136, "step": 12912 }, { "epoch": 0.23974929731214936, "grad_norm": 0.3181498348712921, "learning_rate": 1.7295018159562527e-05, "loss": 0.4081, "step": 12914 }, { "epoch": 0.239786427449568, "grad_norm": 0.2863244414329529, "learning_rate": 1.7294220254962083e-05, "loss": 0.3422, "step": 12916 }, { "epoch": 0.23982355758698662, "grad_norm": 0.5080907940864563, "learning_rate": 1.7293422251109144e-05, "loss": 0.4009, "step": 12918 }, { "epoch": 0.23986068772440527, "grad_norm": 0.2951410710811615, "learning_rate": 1.729262414801458e-05, "loss": 0.2905, "step": 12920 }, { "epoch": 0.2398978178618239, "grad_norm": 0.4922964870929718, "learning_rate": 1.7291825945689244e-05, "loss": 0.2797, "step": 12922 }, { "epoch": 0.23993494799924253, "grad_norm": 0.3180672824382782, "learning_rate": 1.7291027644143997e-05, "loss": 0.3735, "step": 12924 }, { "epoch": 0.2399720781366612, "grad_norm": 0.3576677143573761, "learning_rate": 1.72902292433897e-05, "loss": 0.4923, "step": 12926 }, { "epoch": 0.24000920827407982, "grad_norm": 0.35889574885368347, "learning_rate": 1.7289430743437226e-05, "loss": 0.2224, "step": 12928 }, { "epoch": 0.24004633841149847, "grad_norm": 0.37475645542144775, "learning_rate": 1.728863214429743e-05, "loss": 0.4123, "step": 12930 }, { "epoch": 0.2400834685489171, "grad_norm": 0.4415918290615082, "learning_rate": 1.728783344598118e-05, "loss": 0.3953, "step": 12932 }, { "epoch": 0.24012059868633573, "grad_norm": 0.44548550248146057, "learning_rate": 1.728703464849935e-05, "loss": 0.4443, "step": 12934 }, { "epoch": 0.24015772882375439, "grad_norm": 0.3238242566585541, "learning_rate": 1.728623575186281e-05, "loss": 0.2713, "step": 12936 }, { "epoch": 0.240194858961173, "grad_norm": 0.40348416566848755, "learning_rate": 1.728543675608242e-05, "loss": 0.3249, "step": 12938 }, { "epoch": 0.24023198909859164, "grad_norm": 0.46820053458213806, "learning_rate": 1.7284637661169058e-05, "loss": 0.2933, "step": 12940 }, { "epoch": 0.2402691192360103, "grad_norm": 0.37959709763526917, "learning_rate": 1.72838384671336e-05, "loss": 0.4103, "step": 12942 }, { "epoch": 0.24030624937342893, "grad_norm": 0.44922611117362976, "learning_rate": 1.7283039173986917e-05, "loss": 0.1696, "step": 12944 }, { "epoch": 0.24034337951084758, "grad_norm": 0.33543509244918823, "learning_rate": 1.7282239781739885e-05, "loss": 0.2149, "step": 12946 }, { "epoch": 0.2403805096482662, "grad_norm": 0.3309846520423889, "learning_rate": 1.7281440290403386e-05, "loss": 0.2548, "step": 12948 }, { "epoch": 0.24041763978568484, "grad_norm": 0.41331303119659424, "learning_rate": 1.728064069998829e-05, "loss": 0.3792, "step": 12950 }, { "epoch": 0.2404547699231035, "grad_norm": 0.3837539851665497, "learning_rate": 1.7279841010505488e-05, "loss": 0.3256, "step": 12952 }, { "epoch": 0.24049190006052212, "grad_norm": 0.4244285821914673, "learning_rate": 1.7279041221965852e-05, "loss": 0.243, "step": 12954 }, { "epoch": 0.24052903019794075, "grad_norm": 0.30095329880714417, "learning_rate": 1.7278241334380265e-05, "loss": 0.4204, "step": 12956 }, { "epoch": 0.2405661603353594, "grad_norm": 0.4282054007053375, "learning_rate": 1.727744134775962e-05, "loss": 0.222, "step": 12958 }, { "epoch": 0.24060329047277804, "grad_norm": 0.31342217326164246, "learning_rate": 1.7276641262114795e-05, "loss": 0.2665, "step": 12960 }, { "epoch": 0.24064042061019666, "grad_norm": 0.5499395132064819, "learning_rate": 1.7275841077456677e-05, "loss": 0.3654, "step": 12962 }, { "epoch": 0.24067755074761532, "grad_norm": 0.3552193343639374, "learning_rate": 1.7275040793796157e-05, "loss": 0.5202, "step": 12964 }, { "epoch": 0.24071468088503395, "grad_norm": 0.3272906541824341, "learning_rate": 1.727424041114412e-05, "loss": 0.271, "step": 12966 }, { "epoch": 0.2407518110224526, "grad_norm": 0.49146223068237305, "learning_rate": 1.7273439929511466e-05, "loss": 0.3732, "step": 12968 }, { "epoch": 0.24078894115987123, "grad_norm": 0.5061178803443909, "learning_rate": 1.7272639348909078e-05, "loss": 0.4085, "step": 12970 }, { "epoch": 0.24082607129728986, "grad_norm": 0.39546099305152893, "learning_rate": 1.727183866934785e-05, "loss": 0.3143, "step": 12972 }, { "epoch": 0.24086320143470852, "grad_norm": 0.5701996088027954, "learning_rate": 1.727103789083868e-05, "loss": 0.341, "step": 12974 }, { "epoch": 0.24090033157212715, "grad_norm": 0.3677946627140045, "learning_rate": 1.727023701339247e-05, "loss": 0.2923, "step": 12976 }, { "epoch": 0.24093746170954577, "grad_norm": 0.3409651815891266, "learning_rate": 1.7269436037020106e-05, "loss": 0.3411, "step": 12978 }, { "epoch": 0.24097459184696443, "grad_norm": 0.29803889989852905, "learning_rate": 1.726863496173249e-05, "loss": 0.4735, "step": 12980 }, { "epoch": 0.24101172198438306, "grad_norm": 0.2405932992696762, "learning_rate": 1.726783378754053e-05, "loss": 0.3239, "step": 12982 }, { "epoch": 0.24104885212180172, "grad_norm": 0.3031086027622223, "learning_rate": 1.7267032514455117e-05, "loss": 0.4129, "step": 12984 }, { "epoch": 0.24108598225922034, "grad_norm": 0.3151809871196747, "learning_rate": 1.726623114248716e-05, "loss": 0.4174, "step": 12986 }, { "epoch": 0.24112311239663897, "grad_norm": 0.48531627655029297, "learning_rate": 1.726542967164756e-05, "loss": 0.3294, "step": 12988 }, { "epoch": 0.24116024253405763, "grad_norm": 0.4126521646976471, "learning_rate": 1.7264628101947226e-05, "loss": 0.3746, "step": 12990 }, { "epoch": 0.24119737267147626, "grad_norm": 0.3163250684738159, "learning_rate": 1.7263826433397066e-05, "loss": 0.2242, "step": 12992 }, { "epoch": 0.24123450280889489, "grad_norm": 0.3708619475364685, "learning_rate": 1.7263024666007986e-05, "loss": 0.225, "step": 12994 }, { "epoch": 0.24127163294631354, "grad_norm": 0.30232563614845276, "learning_rate": 1.7262222799790893e-05, "loss": 0.1594, "step": 12996 }, { "epoch": 0.24130876308373217, "grad_norm": 0.3360409438610077, "learning_rate": 1.7261420834756704e-05, "loss": 0.2606, "step": 12998 }, { "epoch": 0.2413458932211508, "grad_norm": 0.4139024019241333, "learning_rate": 1.7260618770916325e-05, "loss": 0.3651, "step": 13000 }, { "epoch": 0.24138302335856945, "grad_norm": 0.34493640065193176, "learning_rate": 1.725981660828067e-05, "loss": 0.2583, "step": 13002 }, { "epoch": 0.24142015349598808, "grad_norm": 0.4492935240268707, "learning_rate": 1.725901434686066e-05, "loss": 0.2989, "step": 13004 }, { "epoch": 0.24145728363340674, "grad_norm": 0.25691118836402893, "learning_rate": 1.725821198666721e-05, "loss": 0.2701, "step": 13006 }, { "epoch": 0.24149441377082537, "grad_norm": 0.33022820949554443, "learning_rate": 1.7257409527711233e-05, "loss": 0.3715, "step": 13008 }, { "epoch": 0.241531543908244, "grad_norm": 0.29889681935310364, "learning_rate": 1.7256606970003652e-05, "loss": 0.3527, "step": 13010 }, { "epoch": 0.24156867404566265, "grad_norm": 0.33318424224853516, "learning_rate": 1.7255804313555384e-05, "loss": 0.4145, "step": 13012 }, { "epoch": 0.24160580418308128, "grad_norm": 0.29783084988594055, "learning_rate": 1.7255001558377356e-05, "loss": 0.238, "step": 13014 }, { "epoch": 0.2416429343204999, "grad_norm": 0.28089606761932373, "learning_rate": 1.7254198704480487e-05, "loss": 0.2178, "step": 13016 }, { "epoch": 0.24168006445791856, "grad_norm": 0.35583245754241943, "learning_rate": 1.72533957518757e-05, "loss": 0.3795, "step": 13018 }, { "epoch": 0.2417171945953372, "grad_norm": 0.40556272864341736, "learning_rate": 1.7252592700573923e-05, "loss": 0.3324, "step": 13020 }, { "epoch": 0.24175432473275585, "grad_norm": 0.35644981265068054, "learning_rate": 1.7251789550586086e-05, "loss": 0.2773, "step": 13022 }, { "epoch": 0.24179145487017448, "grad_norm": 0.35568147897720337, "learning_rate": 1.7250986301923115e-05, "loss": 0.2557, "step": 13024 }, { "epoch": 0.2418285850075931, "grad_norm": 0.3319592773914337, "learning_rate": 1.725018295459594e-05, "loss": 0.2168, "step": 13026 }, { "epoch": 0.24186571514501176, "grad_norm": 0.29799574613571167, "learning_rate": 1.724937950861549e-05, "loss": 0.3508, "step": 13028 }, { "epoch": 0.2419028452824304, "grad_norm": 0.339399129152298, "learning_rate": 1.72485759639927e-05, "loss": 0.2623, "step": 13030 }, { "epoch": 0.24193997541984902, "grad_norm": 0.41107305884361267, "learning_rate": 1.7247772320738503e-05, "loss": 0.2632, "step": 13032 }, { "epoch": 0.24197710555726767, "grad_norm": 0.3391827344894409, "learning_rate": 1.7246968578863836e-05, "loss": 0.4389, "step": 13034 }, { "epoch": 0.2420142356946863, "grad_norm": 0.45916324853897095, "learning_rate": 1.7246164738379632e-05, "loss": 0.3764, "step": 13036 }, { "epoch": 0.24205136583210493, "grad_norm": 0.35283342003822327, "learning_rate": 1.724536079929683e-05, "loss": 0.1874, "step": 13038 }, { "epoch": 0.2420884959695236, "grad_norm": 0.4572621285915375, "learning_rate": 1.7244556761626372e-05, "loss": 0.4463, "step": 13040 }, { "epoch": 0.24212562610694222, "grad_norm": 0.4193453788757324, "learning_rate": 1.724375262537919e-05, "loss": 0.3986, "step": 13042 }, { "epoch": 0.24216275624436087, "grad_norm": 0.3569111227989197, "learning_rate": 1.7242948390566243e-05, "loss": 0.388, "step": 13044 }, { "epoch": 0.2421998863817795, "grad_norm": 0.2860376536846161, "learning_rate": 1.7242144057198457e-05, "loss": 0.2487, "step": 13046 }, { "epoch": 0.24223701651919813, "grad_norm": 0.6685859560966492, "learning_rate": 1.7241339625286783e-05, "loss": 0.3617, "step": 13048 }, { "epoch": 0.24227414665661678, "grad_norm": 0.25891509652137756, "learning_rate": 1.7240535094842172e-05, "loss": 0.31, "step": 13050 }, { "epoch": 0.2423112767940354, "grad_norm": 0.2941647469997406, "learning_rate": 1.723973046587556e-05, "loss": 0.2279, "step": 13052 }, { "epoch": 0.24234840693145404, "grad_norm": 0.42375341057777405, "learning_rate": 1.7238925738397906e-05, "loss": 0.5482, "step": 13054 }, { "epoch": 0.2423855370688727, "grad_norm": 0.3624899983406067, "learning_rate": 1.7238120912420155e-05, "loss": 0.3964, "step": 13056 }, { "epoch": 0.24242266720629133, "grad_norm": 0.3396706283092499, "learning_rate": 1.723731598795326e-05, "loss": 0.402, "step": 13058 }, { "epoch": 0.24245979734370998, "grad_norm": 0.3352181017398834, "learning_rate": 1.7236510965008168e-05, "loss": 0.3338, "step": 13060 }, { "epoch": 0.2424969274811286, "grad_norm": 0.2515247166156769, "learning_rate": 1.723570584359584e-05, "loss": 0.2738, "step": 13062 }, { "epoch": 0.24253405761854724, "grad_norm": 0.293032705783844, "learning_rate": 1.7234900623727232e-05, "loss": 0.2411, "step": 13064 }, { "epoch": 0.2425711877559659, "grad_norm": 0.3973437547683716, "learning_rate": 1.7234095305413295e-05, "loss": 0.4046, "step": 13066 }, { "epoch": 0.24260831789338452, "grad_norm": 0.321879506111145, "learning_rate": 1.723328988866499e-05, "loss": 0.3887, "step": 13068 }, { "epoch": 0.24264544803080315, "grad_norm": 0.4179864227771759, "learning_rate": 1.7232484373493274e-05, "loss": 0.3397, "step": 13070 }, { "epoch": 0.2426825781682218, "grad_norm": 0.29522237181663513, "learning_rate": 1.7231678759909113e-05, "loss": 0.4788, "step": 13072 }, { "epoch": 0.24271970830564044, "grad_norm": 0.33710727095603943, "learning_rate": 1.723087304792346e-05, "loss": 0.3504, "step": 13074 }, { "epoch": 0.24275683844305906, "grad_norm": 0.251616507768631, "learning_rate": 1.723006723754729e-05, "loss": 0.2514, "step": 13076 }, { "epoch": 0.24279396858047772, "grad_norm": 0.4470879137516022, "learning_rate": 1.7229261328791557e-05, "loss": 0.4094, "step": 13078 }, { "epoch": 0.24283109871789635, "grad_norm": 0.29195520281791687, "learning_rate": 1.7228455321667233e-05, "loss": 0.506, "step": 13080 }, { "epoch": 0.242868228855315, "grad_norm": 0.34468114376068115, "learning_rate": 1.7227649216185283e-05, "loss": 0.2528, "step": 13082 }, { "epoch": 0.24290535899273363, "grad_norm": 1.06187903881073, "learning_rate": 1.722684301235668e-05, "loss": 0.4135, "step": 13084 }, { "epoch": 0.24294248913015226, "grad_norm": 0.3771825432777405, "learning_rate": 1.7226036710192385e-05, "loss": 0.2728, "step": 13086 }, { "epoch": 0.24297961926757092, "grad_norm": 0.3228921890258789, "learning_rate": 1.7225230309703378e-05, "loss": 0.5064, "step": 13088 }, { "epoch": 0.24301674940498955, "grad_norm": 0.32422319054603577, "learning_rate": 1.722442381090063e-05, "loss": 0.5009, "step": 13090 }, { "epoch": 0.24305387954240817, "grad_norm": 0.5176020264625549, "learning_rate": 1.7223617213795113e-05, "loss": 0.3618, "step": 13092 }, { "epoch": 0.24309100967982683, "grad_norm": 0.47559887170791626, "learning_rate": 1.72228105183978e-05, "loss": 0.435, "step": 13094 }, { "epoch": 0.24312813981724546, "grad_norm": 0.38654300570487976, "learning_rate": 1.7222003724719672e-05, "loss": 0.3917, "step": 13096 }, { "epoch": 0.24316526995466411, "grad_norm": 0.2886367738246918, "learning_rate": 1.7221196832771707e-05, "loss": 0.2535, "step": 13098 }, { "epoch": 0.24320240009208274, "grad_norm": 0.3581579029560089, "learning_rate": 1.722038984256488e-05, "loss": 0.2081, "step": 13100 }, { "epoch": 0.24323953022950137, "grad_norm": 0.3354935050010681, "learning_rate": 1.721958275411018e-05, "loss": 0.2213, "step": 13102 }, { "epoch": 0.24327666036692003, "grad_norm": 0.44140952825546265, "learning_rate": 1.721877556741858e-05, "loss": 0.1751, "step": 13104 }, { "epoch": 0.24331379050433866, "grad_norm": 0.32853755354881287, "learning_rate": 1.7217968282501066e-05, "loss": 0.4917, "step": 13106 }, { "epoch": 0.24335092064175728, "grad_norm": 0.355752170085907, "learning_rate": 1.721716089936863e-05, "loss": 0.3928, "step": 13108 }, { "epoch": 0.24338805077917594, "grad_norm": 0.4365628957748413, "learning_rate": 1.7216353418032245e-05, "loss": 0.198, "step": 13110 }, { "epoch": 0.24342518091659457, "grad_norm": 0.4583829343318939, "learning_rate": 1.7215545838502913e-05, "loss": 0.3316, "step": 13112 }, { "epoch": 0.2434623110540132, "grad_norm": 0.3248903751373291, "learning_rate": 1.7214738160791607e-05, "loss": 0.3134, "step": 13114 }, { "epoch": 0.24349944119143185, "grad_norm": 0.33115360140800476, "learning_rate": 1.7213930384909332e-05, "loss": 0.3954, "step": 13116 }, { "epoch": 0.24353657132885048, "grad_norm": 0.2920989692211151, "learning_rate": 1.7213122510867068e-05, "loss": 0.1111, "step": 13118 }, { "epoch": 0.24357370146626914, "grad_norm": 0.41305750608444214, "learning_rate": 1.7212314538675813e-05, "loss": 0.3176, "step": 13120 }, { "epoch": 0.24361083160368777, "grad_norm": 0.44872161746025085, "learning_rate": 1.7211506468346562e-05, "loss": 0.3408, "step": 13122 }, { "epoch": 0.2436479617411064, "grad_norm": 0.25415483117103577, "learning_rate": 1.721069829989031e-05, "loss": 0.1128, "step": 13124 }, { "epoch": 0.24368509187852505, "grad_norm": 0.3859167695045471, "learning_rate": 1.720989003331805e-05, "loss": 0.3534, "step": 13126 }, { "epoch": 0.24372222201594368, "grad_norm": 0.41660287976264954, "learning_rate": 1.7209081668640787e-05, "loss": 0.3616, "step": 13128 }, { "epoch": 0.2437593521533623, "grad_norm": 0.322981595993042, "learning_rate": 1.7208273205869513e-05, "loss": 0.4666, "step": 13130 }, { "epoch": 0.24379648229078096, "grad_norm": 0.4274020493030548, "learning_rate": 1.7207464645015233e-05, "loss": 0.3299, "step": 13132 }, { "epoch": 0.2438336124281996, "grad_norm": 0.35858261585235596, "learning_rate": 1.7206655986088946e-05, "loss": 0.3434, "step": 13134 }, { "epoch": 0.24387074256561825, "grad_norm": 0.3371645212173462, "learning_rate": 1.720584722910166e-05, "loss": 0.2476, "step": 13136 }, { "epoch": 0.24390787270303688, "grad_norm": 0.33661943674087524, "learning_rate": 1.7205038374064375e-05, "loss": 0.2343, "step": 13138 }, { "epoch": 0.2439450028404555, "grad_norm": 0.37192678451538086, "learning_rate": 1.72042294209881e-05, "loss": 0.5577, "step": 13140 }, { "epoch": 0.24398213297787416, "grad_norm": 0.3558404743671417, "learning_rate": 1.720342036988384e-05, "loss": 0.2811, "step": 13142 }, { "epoch": 0.2440192631152928, "grad_norm": 0.27632248401641846, "learning_rate": 1.720261122076261e-05, "loss": 0.3152, "step": 13144 }, { "epoch": 0.24405639325271142, "grad_norm": 0.5439814925193787, "learning_rate": 1.7201801973635416e-05, "loss": 0.2343, "step": 13146 }, { "epoch": 0.24409352339013007, "grad_norm": 0.3401106595993042, "learning_rate": 1.7200992628513265e-05, "loss": 0.313, "step": 13148 }, { "epoch": 0.2441306535275487, "grad_norm": 0.4511454105377197, "learning_rate": 1.7200183185407176e-05, "loss": 0.2871, "step": 13150 }, { "epoch": 0.24416778366496733, "grad_norm": 0.2866542935371399, "learning_rate": 1.719937364432816e-05, "loss": 0.3224, "step": 13152 }, { "epoch": 0.244204913802386, "grad_norm": 0.38121873140335083, "learning_rate": 1.7198564005287234e-05, "loss": 0.4019, "step": 13154 }, { "epoch": 0.24424204393980462, "grad_norm": 0.4640549421310425, "learning_rate": 1.719775426829541e-05, "loss": 0.2572, "step": 13156 }, { "epoch": 0.24427917407722327, "grad_norm": 0.3459872603416443, "learning_rate": 1.7196944433363714e-05, "loss": 0.3596, "step": 13158 }, { "epoch": 0.2443163042146419, "grad_norm": 0.3143463134765625, "learning_rate": 1.7196134500503162e-05, "loss": 0.3612, "step": 13160 }, { "epoch": 0.24435343435206053, "grad_norm": 0.36931899189949036, "learning_rate": 1.7195324469724774e-05, "loss": 0.276, "step": 13162 }, { "epoch": 0.24439056448947918, "grad_norm": 0.4110381603240967, "learning_rate": 1.7194514341039572e-05, "loss": 0.5731, "step": 13164 }, { "epoch": 0.2444276946268978, "grad_norm": 0.3556995391845703, "learning_rate": 1.719370411445858e-05, "loss": 0.3167, "step": 13166 }, { "epoch": 0.24446482476431644, "grad_norm": 0.30289989709854126, "learning_rate": 1.719289378999282e-05, "loss": 0.2691, "step": 13168 }, { "epoch": 0.2445019549017351, "grad_norm": 0.3584682047367096, "learning_rate": 1.7192083367653324e-05, "loss": 0.2148, "step": 13170 }, { "epoch": 0.24453908503915373, "grad_norm": 0.3848544657230377, "learning_rate": 1.7191272847451117e-05, "loss": 0.2792, "step": 13172 }, { "epoch": 0.24457621517657238, "grad_norm": 0.42471927404403687, "learning_rate": 1.7190462229397224e-05, "loss": 0.2176, "step": 13174 }, { "epoch": 0.244613345313991, "grad_norm": 0.3466026782989502, "learning_rate": 1.718965151350268e-05, "loss": 0.3751, "step": 13176 }, { "epoch": 0.24465047545140964, "grad_norm": 0.41115647554397583, "learning_rate": 1.7188840699778516e-05, "loss": 0.3462, "step": 13178 }, { "epoch": 0.2446876055888283, "grad_norm": 0.41987869143486023, "learning_rate": 1.7188029788235757e-05, "loss": 0.3363, "step": 13180 }, { "epoch": 0.24472473572624692, "grad_norm": 0.3660711944103241, "learning_rate": 1.718721877888545e-05, "loss": 0.2393, "step": 13182 }, { "epoch": 0.24476186586366555, "grad_norm": 0.49280649423599243, "learning_rate": 1.718640767173862e-05, "loss": 0.3269, "step": 13184 }, { "epoch": 0.2447989960010842, "grad_norm": 0.3349471390247345, "learning_rate": 1.7185596466806308e-05, "loss": 0.2278, "step": 13186 }, { "epoch": 0.24483612613850284, "grad_norm": 1.1708298921585083, "learning_rate": 1.7184785164099553e-05, "loss": 0.3443, "step": 13188 }, { "epoch": 0.24487325627592146, "grad_norm": 0.35315534472465515, "learning_rate": 1.718397376362939e-05, "loss": 0.2783, "step": 13190 }, { "epoch": 0.24491038641334012, "grad_norm": 0.34423086047172546, "learning_rate": 1.7183162265406865e-05, "loss": 0.2307, "step": 13192 }, { "epoch": 0.24494751655075875, "grad_norm": 0.39267128705978394, "learning_rate": 1.7182350669443017e-05, "loss": 0.2213, "step": 13194 }, { "epoch": 0.2449846466881774, "grad_norm": 0.422139972448349, "learning_rate": 1.718153897574889e-05, "loss": 0.3307, "step": 13196 }, { "epoch": 0.24502177682559603, "grad_norm": 0.44690245389938354, "learning_rate": 1.7180727184335525e-05, "loss": 0.4274, "step": 13198 }, { "epoch": 0.24505890696301466, "grad_norm": 0.41924849152565, "learning_rate": 1.7179915295213976e-05, "loss": 0.2368, "step": 13200 }, { "epoch": 0.24509603710043332, "grad_norm": 0.3475898802280426, "learning_rate": 1.7179103308395285e-05, "loss": 0.1549, "step": 13202 }, { "epoch": 0.24513316723785195, "grad_norm": 0.3980828821659088, "learning_rate": 1.7178291223890503e-05, "loss": 0.3345, "step": 13204 }, { "epoch": 0.24517029737527057, "grad_norm": 0.3410588800907135, "learning_rate": 1.717747904171068e-05, "loss": 0.3222, "step": 13206 }, { "epoch": 0.24520742751268923, "grad_norm": 0.30045706033706665, "learning_rate": 1.7176666761866862e-05, "loss": 0.4054, "step": 13208 }, { "epoch": 0.24524455765010786, "grad_norm": 0.3879125714302063, "learning_rate": 1.717585438437011e-05, "loss": 0.3389, "step": 13210 }, { "epoch": 0.24528168778752651, "grad_norm": 0.347405344247818, "learning_rate": 1.717504190923147e-05, "loss": 0.2948, "step": 13212 }, { "epoch": 0.24531881792494514, "grad_norm": 0.32417306303977966, "learning_rate": 1.7174229336462003e-05, "loss": 0.3303, "step": 13214 }, { "epoch": 0.24535594806236377, "grad_norm": 0.4360882043838501, "learning_rate": 1.717341666607277e-05, "loss": 0.3815, "step": 13216 }, { "epoch": 0.24539307819978243, "grad_norm": 0.2860407531261444, "learning_rate": 1.7172603898074814e-05, "loss": 0.5392, "step": 13218 }, { "epoch": 0.24543020833720106, "grad_norm": 0.4340052008628845, "learning_rate": 1.7171791032479206e-05, "loss": 0.2883, "step": 13220 }, { "epoch": 0.24546733847461968, "grad_norm": 0.37193435430526733, "learning_rate": 1.7170978069297007e-05, "loss": 0.1349, "step": 13222 }, { "epoch": 0.24550446861203834, "grad_norm": 0.40919381380081177, "learning_rate": 1.7170165008539276e-05, "loss": 0.3871, "step": 13224 }, { "epoch": 0.24554159874945697, "grad_norm": 0.3199876844882965, "learning_rate": 1.7169351850217074e-05, "loss": 0.3669, "step": 13226 }, { "epoch": 0.2455787288868756, "grad_norm": 0.45857229828834534, "learning_rate": 1.7168538594341468e-05, "loss": 0.4147, "step": 13228 }, { "epoch": 0.24561585902429425, "grad_norm": 0.22806601226329803, "learning_rate": 1.7167725240923526e-05, "loss": 0.3579, "step": 13230 }, { "epoch": 0.24565298916171288, "grad_norm": 0.45223361253738403, "learning_rate": 1.716691178997431e-05, "loss": 0.1652, "step": 13232 }, { "epoch": 0.24569011929913154, "grad_norm": 0.23677855730056763, "learning_rate": 1.7166098241504895e-05, "loss": 0.2829, "step": 13234 }, { "epoch": 0.24572724943655017, "grad_norm": 0.3565402626991272, "learning_rate": 1.7165284595526347e-05, "loss": 0.491, "step": 13236 }, { "epoch": 0.2457643795739688, "grad_norm": 0.3340590000152588, "learning_rate": 1.7164470852049736e-05, "loss": 0.2391, "step": 13238 }, { "epoch": 0.24580150971138745, "grad_norm": 0.39384719729423523, "learning_rate": 1.716365701108614e-05, "loss": 0.2928, "step": 13240 }, { "epoch": 0.24583863984880608, "grad_norm": 0.436230331659317, "learning_rate": 1.7162843072646626e-05, "loss": 0.3541, "step": 13242 }, { "epoch": 0.2458757699862247, "grad_norm": 0.3410209119319916, "learning_rate": 1.7162029036742275e-05, "loss": 0.3494, "step": 13244 }, { "epoch": 0.24591290012364336, "grad_norm": 0.49817532300949097, "learning_rate": 1.7161214903384165e-05, "loss": 0.2482, "step": 13246 }, { "epoch": 0.245950030261062, "grad_norm": 0.3439389169216156, "learning_rate": 1.7160400672583367e-05, "loss": 0.3116, "step": 13248 }, { "epoch": 0.24598716039848065, "grad_norm": 0.28430119156837463, "learning_rate": 1.715958634435096e-05, "loss": 0.296, "step": 13250 }, { "epoch": 0.24602429053589928, "grad_norm": 0.3383987545967102, "learning_rate": 1.7158771918698036e-05, "loss": 0.4191, "step": 13252 }, { "epoch": 0.2460614206733179, "grad_norm": 0.29728689789772034, "learning_rate": 1.7157957395635666e-05, "loss": 0.4433, "step": 13254 }, { "epoch": 0.24609855081073656, "grad_norm": 0.2834780216217041, "learning_rate": 1.7157142775174935e-05, "loss": 0.4482, "step": 13256 }, { "epoch": 0.2461356809481552, "grad_norm": 0.36353743076324463, "learning_rate": 1.715632805732693e-05, "loss": 0.2665, "step": 13258 }, { "epoch": 0.24617281108557382, "grad_norm": 0.2620483636856079, "learning_rate": 1.7155513242102736e-05, "loss": 0.3502, "step": 13260 }, { "epoch": 0.24620994122299247, "grad_norm": 0.31910502910614014, "learning_rate": 1.7154698329513443e-05, "loss": 0.3247, "step": 13262 }, { "epoch": 0.2462470713604111, "grad_norm": 0.29031166434288025, "learning_rate": 1.715388331957013e-05, "loss": 0.2791, "step": 13264 }, { "epoch": 0.24628420149782973, "grad_norm": 0.2690592408180237, "learning_rate": 1.7153068212283898e-05, "loss": 0.2403, "step": 13266 }, { "epoch": 0.2463213316352484, "grad_norm": 0.34039804339408875, "learning_rate": 1.715225300766583e-05, "loss": 0.2414, "step": 13268 }, { "epoch": 0.24635846177266701, "grad_norm": 0.6392877697944641, "learning_rate": 1.7151437705727026e-05, "loss": 0.3618, "step": 13270 }, { "epoch": 0.24639559191008567, "grad_norm": 0.8884836435317993, "learning_rate": 1.7150622306478575e-05, "loss": 0.3704, "step": 13272 }, { "epoch": 0.2464327220475043, "grad_norm": 0.23477886617183685, "learning_rate": 1.7149806809931572e-05, "loss": 0.1839, "step": 13274 }, { "epoch": 0.24646985218492293, "grad_norm": 0.38277870416641235, "learning_rate": 1.7148991216097112e-05, "loss": 0.2553, "step": 13276 }, { "epoch": 0.24650698232234158, "grad_norm": 0.3473656475543976, "learning_rate": 1.7148175524986302e-05, "loss": 0.3704, "step": 13278 }, { "epoch": 0.2465441124597602, "grad_norm": 0.37068596482276917, "learning_rate": 1.7147359736610228e-05, "loss": 0.3807, "step": 13280 }, { "epoch": 0.24658124259717884, "grad_norm": 0.3488757610321045, "learning_rate": 1.714654385098e-05, "loss": 0.2192, "step": 13282 }, { "epoch": 0.2466183727345975, "grad_norm": 0.33016151189804077, "learning_rate": 1.7145727868106715e-05, "loss": 0.3596, "step": 13284 }, { "epoch": 0.24665550287201612, "grad_norm": 0.3586462438106537, "learning_rate": 1.714491178800148e-05, "loss": 0.2789, "step": 13286 }, { "epoch": 0.24669263300943478, "grad_norm": 0.4576156437397003, "learning_rate": 1.7144095610675397e-05, "loss": 0.2144, "step": 13288 }, { "epoch": 0.2467297631468534, "grad_norm": 0.37462636828422546, "learning_rate": 1.7143279336139565e-05, "loss": 0.268, "step": 13290 }, { "epoch": 0.24676689328427204, "grad_norm": 0.3459118902683258, "learning_rate": 1.7142462964405107e-05, "loss": 0.2834, "step": 13292 }, { "epoch": 0.2468040234216907, "grad_norm": 0.44189175963401794, "learning_rate": 1.7141646495483115e-05, "loss": 0.3729, "step": 13294 }, { "epoch": 0.24684115355910932, "grad_norm": 0.3049304485321045, "learning_rate": 1.714082992938471e-05, "loss": 0.3827, "step": 13296 }, { "epoch": 0.24687828369652795, "grad_norm": 0.260957807302475, "learning_rate": 1.7140013266121e-05, "loss": 0.245, "step": 13298 }, { "epoch": 0.2469154138339466, "grad_norm": 0.4045257866382599, "learning_rate": 1.7139196505703094e-05, "loss": 0.3808, "step": 13300 }, { "epoch": 0.24695254397136523, "grad_norm": 0.3291529417037964, "learning_rate": 1.7138379648142108e-05, "loss": 0.3517, "step": 13302 }, { "epoch": 0.24698967410878386, "grad_norm": 0.26678377389907837, "learning_rate": 1.7137562693449153e-05, "loss": 0.338, "step": 13304 }, { "epoch": 0.24702680424620252, "grad_norm": 0.5196218490600586, "learning_rate": 1.7136745641635356e-05, "loss": 0.282, "step": 13306 }, { "epoch": 0.24706393438362115, "grad_norm": 0.41561537981033325, "learning_rate": 1.7135928492711828e-05, "loss": 0.4068, "step": 13308 }, { "epoch": 0.2471010645210398, "grad_norm": 0.4688931107521057, "learning_rate": 1.713511124668968e-05, "loss": 0.2828, "step": 13310 }, { "epoch": 0.24713819465845843, "grad_norm": 0.37110626697540283, "learning_rate": 1.7134293903580047e-05, "loss": 0.3732, "step": 13312 }, { "epoch": 0.24717532479587706, "grad_norm": 0.4646414816379547, "learning_rate": 1.713347646339404e-05, "loss": 0.4901, "step": 13314 }, { "epoch": 0.24721245493329572, "grad_norm": 0.2735046446323395, "learning_rate": 1.713265892614279e-05, "loss": 0.5157, "step": 13316 }, { "epoch": 0.24724958507071434, "grad_norm": 0.3327465355396271, "learning_rate": 1.7131841291837416e-05, "loss": 0.2956, "step": 13318 }, { "epoch": 0.24728671520813297, "grad_norm": 0.3320567011833191, "learning_rate": 1.713102356048904e-05, "loss": 0.2655, "step": 13320 }, { "epoch": 0.24732384534555163, "grad_norm": 0.4400215148925781, "learning_rate": 1.7130205732108795e-05, "loss": 0.2801, "step": 13322 }, { "epoch": 0.24736097548297026, "grad_norm": 0.3597651422023773, "learning_rate": 1.712938780670781e-05, "loss": 0.2907, "step": 13324 }, { "epoch": 0.24739810562038891, "grad_norm": 0.3958512246608734, "learning_rate": 1.7128569784297207e-05, "loss": 0.4731, "step": 13326 }, { "epoch": 0.24743523575780754, "grad_norm": 0.37480098009109497, "learning_rate": 1.712775166488813e-05, "loss": 0.3264, "step": 13328 }, { "epoch": 0.24747236589522617, "grad_norm": 0.3136560916900635, "learning_rate": 1.7126933448491695e-05, "loss": 0.1936, "step": 13330 }, { "epoch": 0.24750949603264483, "grad_norm": 0.37380507588386536, "learning_rate": 1.7126115135119044e-05, "loss": 0.2505, "step": 13332 }, { "epoch": 0.24754662617006346, "grad_norm": 0.5548191070556641, "learning_rate": 1.7125296724781315e-05, "loss": 0.3475, "step": 13334 }, { "epoch": 0.24758375630748208, "grad_norm": 0.4348328411579132, "learning_rate": 1.7124478217489636e-05, "loss": 0.4385, "step": 13336 }, { "epoch": 0.24762088644490074, "grad_norm": 0.48877087235450745, "learning_rate": 1.7123659613255155e-05, "loss": 0.5022, "step": 13338 }, { "epoch": 0.24765801658231937, "grad_norm": 0.409216046333313, "learning_rate": 1.7122840912089002e-05, "loss": 0.3165, "step": 13340 }, { "epoch": 0.247695146719738, "grad_norm": 0.3926894962787628, "learning_rate": 1.7122022114002317e-05, "loss": 0.2234, "step": 13342 }, { "epoch": 0.24773227685715665, "grad_norm": 0.5280458331108093, "learning_rate": 1.7121203219006246e-05, "loss": 0.2775, "step": 13344 }, { "epoch": 0.24776940699457528, "grad_norm": 0.35759904980659485, "learning_rate": 1.7120384227111928e-05, "loss": 0.3918, "step": 13346 }, { "epoch": 0.24780653713199394, "grad_norm": 0.35483217239379883, "learning_rate": 1.711956513833051e-05, "loss": 0.1831, "step": 13348 }, { "epoch": 0.24784366726941257, "grad_norm": 0.2846633195877075, "learning_rate": 1.7118745952673137e-05, "loss": 0.3612, "step": 13350 }, { "epoch": 0.2478807974068312, "grad_norm": 0.4046551287174225, "learning_rate": 1.7117926670150952e-05, "loss": 0.3278, "step": 13352 }, { "epoch": 0.24791792754424985, "grad_norm": 0.3206670582294464, "learning_rate": 1.7117107290775106e-05, "loss": 0.3482, "step": 13354 }, { "epoch": 0.24795505768166848, "grad_norm": 0.26846450567245483, "learning_rate": 1.711628781455675e-05, "loss": 0.2611, "step": 13356 }, { "epoch": 0.2479921878190871, "grad_norm": 0.33748847246170044, "learning_rate": 1.711546824150703e-05, "loss": 0.6318, "step": 13358 }, { "epoch": 0.24802931795650576, "grad_norm": 0.3074412941932678, "learning_rate": 1.7114648571637103e-05, "loss": 0.3257, "step": 13360 }, { "epoch": 0.2480664480939244, "grad_norm": 0.4141692817211151, "learning_rate": 1.711382880495812e-05, "loss": 0.3644, "step": 13362 }, { "epoch": 0.24810357823134305, "grad_norm": 0.40177881717681885, "learning_rate": 1.7113008941481236e-05, "loss": 0.4129, "step": 13364 }, { "epoch": 0.24814070836876168, "grad_norm": 0.3892151415348053, "learning_rate": 1.7112188981217602e-05, "loss": 0.5622, "step": 13366 }, { "epoch": 0.2481778385061803, "grad_norm": 1.0396039485931396, "learning_rate": 1.7111368924178383e-05, "loss": 0.3238, "step": 13368 }, { "epoch": 0.24821496864359896, "grad_norm": 0.4415968954563141, "learning_rate": 1.7110548770374734e-05, "loss": 0.2282, "step": 13370 }, { "epoch": 0.2482520987810176, "grad_norm": 0.33203616738319397, "learning_rate": 1.710972851981781e-05, "loss": 0.3122, "step": 13372 }, { "epoch": 0.24828922891843622, "grad_norm": 0.2803024649620056, "learning_rate": 1.7108908172518785e-05, "loss": 0.2342, "step": 13374 }, { "epoch": 0.24832635905585487, "grad_norm": 0.2605185806751251, "learning_rate": 1.7108087728488807e-05, "loss": 0.3215, "step": 13376 }, { "epoch": 0.2483634891932735, "grad_norm": 0.352474182844162, "learning_rate": 1.710726718773905e-05, "loss": 0.2411, "step": 13378 }, { "epoch": 0.24840061933069213, "grad_norm": 0.4577985107898712, "learning_rate": 1.7106446550280672e-05, "loss": 0.3917, "step": 13380 }, { "epoch": 0.24843774946811079, "grad_norm": 0.2931303083896637, "learning_rate": 1.7105625816124843e-05, "loss": 0.2189, "step": 13382 }, { "epoch": 0.24847487960552941, "grad_norm": 0.32447829842567444, "learning_rate": 1.7104804985282734e-05, "loss": 0.3135, "step": 13384 }, { "epoch": 0.24851200974294807, "grad_norm": 0.4484362304210663, "learning_rate": 1.710398405776551e-05, "loss": 0.3342, "step": 13386 }, { "epoch": 0.2485491398803667, "grad_norm": 0.4414674937725067, "learning_rate": 1.710316303358434e-05, "loss": 0.3514, "step": 13388 }, { "epoch": 0.24858627001778533, "grad_norm": 0.2533408999443054, "learning_rate": 1.7102341912750398e-05, "loss": 0.2756, "step": 13390 }, { "epoch": 0.24862340015520398, "grad_norm": 0.3433852195739746, "learning_rate": 1.7101520695274855e-05, "loss": 0.4985, "step": 13392 }, { "epoch": 0.2486605302926226, "grad_norm": 0.2968577742576599, "learning_rate": 1.710069938116889e-05, "loss": 0.2443, "step": 13394 }, { "epoch": 0.24869766043004124, "grad_norm": 0.3259652554988861, "learning_rate": 1.7099877970443675e-05, "loss": 0.387, "step": 13396 }, { "epoch": 0.2487347905674599, "grad_norm": 0.2970534861087799, "learning_rate": 1.709905646311038e-05, "loss": 0.4225, "step": 13398 }, { "epoch": 0.24877192070487852, "grad_norm": 0.41418832540512085, "learning_rate": 1.70982348591802e-05, "loss": 0.4134, "step": 13400 }, { "epoch": 0.24880905084229718, "grad_norm": 0.45218539237976074, "learning_rate": 1.7097413158664303e-05, "loss": 0.1988, "step": 13402 }, { "epoch": 0.2488461809797158, "grad_norm": 0.38420361280441284, "learning_rate": 1.7096591361573873e-05, "loss": 0.5638, "step": 13404 }, { "epoch": 0.24888331111713444, "grad_norm": 0.29785534739494324, "learning_rate": 1.7095769467920087e-05, "loss": 0.2869, "step": 13406 }, { "epoch": 0.2489204412545531, "grad_norm": 0.4749932289123535, "learning_rate": 1.7094947477714137e-05, "loss": 0.3811, "step": 13408 }, { "epoch": 0.24895757139197172, "grad_norm": 0.6014745831489563, "learning_rate": 1.70941253909672e-05, "loss": 0.3423, "step": 13410 }, { "epoch": 0.24899470152939035, "grad_norm": 0.31092795729637146, "learning_rate": 1.709330320769047e-05, "loss": 0.1398, "step": 13412 }, { "epoch": 0.249031831666809, "grad_norm": 0.3357078433036804, "learning_rate": 1.709248092789513e-05, "loss": 0.1135, "step": 13414 }, { "epoch": 0.24906896180422763, "grad_norm": 0.34562963247299194, "learning_rate": 1.7091658551592364e-05, "loss": 0.3422, "step": 13416 }, { "epoch": 0.24910609194164626, "grad_norm": 0.47185638546943665, "learning_rate": 1.709083607879337e-05, "loss": 0.404, "step": 13418 }, { "epoch": 0.24914322207906492, "grad_norm": 0.3098020553588867, "learning_rate": 1.7090013509509335e-05, "loss": 0.3513, "step": 13420 }, { "epoch": 0.24918035221648355, "grad_norm": 0.4504270851612091, "learning_rate": 1.7089190843751455e-05, "loss": 0.4556, "step": 13422 }, { "epoch": 0.2492174823539022, "grad_norm": 0.2668244540691376, "learning_rate": 1.7088368081530922e-05, "loss": 0.5071, "step": 13424 }, { "epoch": 0.24925461249132083, "grad_norm": 0.39171335101127625, "learning_rate": 1.708754522285893e-05, "loss": 0.3375, "step": 13426 }, { "epoch": 0.24929174262873946, "grad_norm": 0.4340653121471405, "learning_rate": 1.708672226774668e-05, "loss": 0.4444, "step": 13428 }, { "epoch": 0.24932887276615812, "grad_norm": 0.3993779718875885, "learning_rate": 1.708589921620537e-05, "loss": 0.2589, "step": 13430 }, { "epoch": 0.24936600290357674, "grad_norm": 0.2112313210964203, "learning_rate": 1.7085076068246188e-05, "loss": 0.2493, "step": 13432 }, { "epoch": 0.24940313304099537, "grad_norm": 0.33994027972221375, "learning_rate": 1.708425282388035e-05, "loss": 0.2105, "step": 13434 }, { "epoch": 0.24944026317841403, "grad_norm": 0.401529461145401, "learning_rate": 1.7083429483119044e-05, "loss": 0.4486, "step": 13436 }, { "epoch": 0.24947739331583266, "grad_norm": 0.39311596751213074, "learning_rate": 1.7082606045973487e-05, "loss": 0.2584, "step": 13438 }, { "epoch": 0.2495145234532513, "grad_norm": 0.2795025110244751, "learning_rate": 1.7081782512454873e-05, "loss": 0.4173, "step": 13440 }, { "epoch": 0.24955165359066994, "grad_norm": 0.27866944670677185, "learning_rate": 1.7080958882574412e-05, "loss": 0.4654, "step": 13442 }, { "epoch": 0.24958878372808857, "grad_norm": 0.2017209380865097, "learning_rate": 1.708013515634331e-05, "loss": 0.1865, "step": 13444 }, { "epoch": 0.24962591386550723, "grad_norm": 0.4624181389808655, "learning_rate": 1.7079311333772778e-05, "loss": 0.4687, "step": 13446 }, { "epoch": 0.24966304400292585, "grad_norm": 0.5237762331962585, "learning_rate": 1.707848741487402e-05, "loss": 0.189, "step": 13448 }, { "epoch": 0.24970017414034448, "grad_norm": 0.4490068554878235, "learning_rate": 1.7077663399658252e-05, "loss": 0.5101, "step": 13450 }, { "epoch": 0.24973730427776314, "grad_norm": 0.3285551965236664, "learning_rate": 1.7076839288136687e-05, "loss": 0.3384, "step": 13452 }, { "epoch": 0.24977443441518177, "grad_norm": 0.25772300362586975, "learning_rate": 1.7076015080320538e-05, "loss": 0.2376, "step": 13454 }, { "epoch": 0.2498115645526004, "grad_norm": 0.34867772459983826, "learning_rate": 1.7075190776221018e-05, "loss": 0.1967, "step": 13456 }, { "epoch": 0.24984869469001905, "grad_norm": 0.5373649597167969, "learning_rate": 1.7074366375849343e-05, "loss": 0.313, "step": 13458 }, { "epoch": 0.24988582482743768, "grad_norm": 0.4595335125923157, "learning_rate": 1.707354187921673e-05, "loss": 0.2878, "step": 13460 }, { "epoch": 0.24992295496485634, "grad_norm": 0.4692481756210327, "learning_rate": 1.7072717286334402e-05, "loss": 0.2593, "step": 13462 }, { "epoch": 0.24996008510227496, "grad_norm": 0.3877066373825073, "learning_rate": 1.7071892597213576e-05, "loss": 0.3189, "step": 13464 }, { "epoch": 0.2499972152396936, "grad_norm": 0.5486915707588196, "learning_rate": 1.7071067811865477e-05, "loss": 0.3509, "step": 13466 }, { "epoch": 0.2500343453771122, "grad_norm": 0.5739935636520386, "learning_rate": 1.7070242930301324e-05, "loss": 0.3001, "step": 13468 }, { "epoch": 0.2500714755145309, "grad_norm": 0.5411041378974915, "learning_rate": 1.706941795253234e-05, "loss": 0.3059, "step": 13470 }, { "epoch": 0.25010860565194953, "grad_norm": 0.27244988083839417, "learning_rate": 1.706859287856976e-05, "loss": 0.1619, "step": 13472 }, { "epoch": 0.25014573578936816, "grad_norm": 0.38710346817970276, "learning_rate": 1.7067767708424797e-05, "loss": 0.1943, "step": 13474 }, { "epoch": 0.2501828659267868, "grad_norm": 0.4377591013908386, "learning_rate": 1.7066942442108687e-05, "loss": 0.3846, "step": 13476 }, { "epoch": 0.2502199960642054, "grad_norm": 0.4194667339324951, "learning_rate": 1.706611707963266e-05, "loss": 0.3448, "step": 13478 }, { "epoch": 0.25025712620162405, "grad_norm": 0.4690960943698883, "learning_rate": 1.7065291621007944e-05, "loss": 0.3351, "step": 13480 }, { "epoch": 0.25029425633904273, "grad_norm": 0.39001163840293884, "learning_rate": 1.7064466066245772e-05, "loss": 0.3453, "step": 13482 }, { "epoch": 0.25033138647646136, "grad_norm": 0.4255661964416504, "learning_rate": 1.7063640415357378e-05, "loss": 0.4786, "step": 13484 }, { "epoch": 0.25036851661388, "grad_norm": 0.40822985768318176, "learning_rate": 1.7062814668353997e-05, "loss": 0.3558, "step": 13486 }, { "epoch": 0.2504056467512986, "grad_norm": 0.3241831958293915, "learning_rate": 1.706198882524686e-05, "loss": 0.2258, "step": 13488 }, { "epoch": 0.25044277688871724, "grad_norm": 0.23742733895778656, "learning_rate": 1.706116288604721e-05, "loss": 0.2094, "step": 13490 }, { "epoch": 0.25047990702613593, "grad_norm": 0.4142918884754181, "learning_rate": 1.7060336850766287e-05, "loss": 0.2303, "step": 13492 }, { "epoch": 0.25051703716355456, "grad_norm": 0.2777521312236786, "learning_rate": 1.7059510719415323e-05, "loss": 0.1075, "step": 13494 }, { "epoch": 0.2505541673009732, "grad_norm": 0.44745078682899475, "learning_rate": 1.7058684492005568e-05, "loss": 0.4177, "step": 13496 }, { "epoch": 0.2505912974383918, "grad_norm": 0.3736150562763214, "learning_rate": 1.7057858168548257e-05, "loss": 0.3703, "step": 13498 }, { "epoch": 0.25062842757581044, "grad_norm": 0.3684479892253876, "learning_rate": 1.7057031749054638e-05, "loss": 0.3459, "step": 13500 }, { "epoch": 0.25066555771322907, "grad_norm": 0.28411492705345154, "learning_rate": 1.7056205233535957e-05, "loss": 0.2371, "step": 13502 }, { "epoch": 0.25070268785064775, "grad_norm": 0.3408103287220001, "learning_rate": 1.7055378622003455e-05, "loss": 0.2489, "step": 13504 }, { "epoch": 0.2507398179880664, "grad_norm": 0.4849068522453308, "learning_rate": 1.7054551914468388e-05, "loss": 0.3235, "step": 13506 }, { "epoch": 0.250776948125485, "grad_norm": 0.39432206749916077, "learning_rate": 1.7053725110941998e-05, "loss": 0.4644, "step": 13508 }, { "epoch": 0.25081407826290364, "grad_norm": 0.32414188981056213, "learning_rate": 1.7052898211435533e-05, "loss": 0.2818, "step": 13510 }, { "epoch": 0.25085120840032227, "grad_norm": 0.6440356373786926, "learning_rate": 1.7052071215960255e-05, "loss": 0.2385, "step": 13512 }, { "epoch": 0.25088833853774095, "grad_norm": 0.28638899326324463, "learning_rate": 1.705124412452741e-05, "loss": 0.4424, "step": 13514 }, { "epoch": 0.2509254686751596, "grad_norm": 0.30699872970581055, "learning_rate": 1.7050416937148253e-05, "loss": 0.2969, "step": 13516 }, { "epoch": 0.2509625988125782, "grad_norm": 0.3447207510471344, "learning_rate": 1.704958965383404e-05, "loss": 0.3657, "step": 13518 }, { "epoch": 0.25099972894999684, "grad_norm": 0.2970324754714966, "learning_rate": 1.7048762274596028e-05, "loss": 0.2656, "step": 13520 }, { "epoch": 0.25103685908741546, "grad_norm": 0.3424597382545471, "learning_rate": 1.7047934799445474e-05, "loss": 0.1664, "step": 13522 }, { "epoch": 0.25107398922483415, "grad_norm": 0.34687936305999756, "learning_rate": 1.704710722839364e-05, "loss": 0.3276, "step": 13524 }, { "epoch": 0.2511111193622528, "grad_norm": 0.44370514154434204, "learning_rate": 1.7046279561451783e-05, "loss": 0.4512, "step": 13526 }, { "epoch": 0.2511482494996714, "grad_norm": 0.3351060450077057, "learning_rate": 1.704545179863117e-05, "loss": 0.282, "step": 13528 }, { "epoch": 0.25118537963709003, "grad_norm": 0.3558134138584137, "learning_rate": 1.704462393994306e-05, "loss": 0.2581, "step": 13530 }, { "epoch": 0.25122250977450866, "grad_norm": 0.39743366837501526, "learning_rate": 1.7043795985398717e-05, "loss": 0.5565, "step": 13532 }, { "epoch": 0.2512596399119273, "grad_norm": 0.3620244264602661, "learning_rate": 1.7042967935009413e-05, "loss": 0.4073, "step": 13534 }, { "epoch": 0.251296770049346, "grad_norm": 0.5346766710281372, "learning_rate": 1.7042139788786405e-05, "loss": 0.1641, "step": 13536 }, { "epoch": 0.2513339001867646, "grad_norm": 0.2945798933506012, "learning_rate": 1.7041311546740976e-05, "loss": 0.4137, "step": 13538 }, { "epoch": 0.25137103032418323, "grad_norm": 0.5081639289855957, "learning_rate": 1.7040483208884387e-05, "loss": 0.2896, "step": 13540 }, { "epoch": 0.25140816046160186, "grad_norm": 0.42044633626937866, "learning_rate": 1.7039654775227904e-05, "loss": 0.6324, "step": 13542 }, { "epoch": 0.2514452905990205, "grad_norm": 0.2637290358543396, "learning_rate": 1.7038826245782813e-05, "loss": 0.2475, "step": 13544 }, { "epoch": 0.25148242073643917, "grad_norm": 0.4096880555152893, "learning_rate": 1.703799762056038e-05, "loss": 0.3392, "step": 13546 }, { "epoch": 0.2515195508738578, "grad_norm": 0.26376697421073914, "learning_rate": 1.7037168899571876e-05, "loss": 0.3018, "step": 13548 }, { "epoch": 0.25155668101127643, "grad_norm": 0.5098007321357727, "learning_rate": 1.703634008282858e-05, "loss": 0.411, "step": 13550 }, { "epoch": 0.25159381114869506, "grad_norm": 0.2604253590106964, "learning_rate": 1.703551117034178e-05, "loss": 0.2732, "step": 13552 }, { "epoch": 0.2516309412861137, "grad_norm": 0.45323461294174194, "learning_rate": 1.7034682162122745e-05, "loss": 0.2711, "step": 13554 }, { "epoch": 0.2516680714235323, "grad_norm": 0.23245488107204437, "learning_rate": 1.7033853058182752e-05, "loss": 0.2193, "step": 13556 }, { "epoch": 0.251705201560951, "grad_norm": 0.6137582659721375, "learning_rate": 1.703302385853309e-05, "loss": 0.3564, "step": 13558 }, { "epoch": 0.2517423316983696, "grad_norm": 0.42788252234458923, "learning_rate": 1.7032194563185044e-05, "loss": 0.6079, "step": 13560 }, { "epoch": 0.25177946183578825, "grad_norm": 0.2858025133609772, "learning_rate": 1.7031365172149884e-05, "loss": 0.3965, "step": 13562 }, { "epoch": 0.2518165919732069, "grad_norm": 0.4628661870956421, "learning_rate": 1.7030535685438913e-05, "loss": 0.3049, "step": 13564 }, { "epoch": 0.2518537221106255, "grad_norm": 0.284263551235199, "learning_rate": 1.7029706103063412e-05, "loss": 0.323, "step": 13566 }, { "epoch": 0.2518908522480442, "grad_norm": 0.48040521144866943, "learning_rate": 1.702887642503466e-05, "loss": 0.2782, "step": 13568 }, { "epoch": 0.2519279823854628, "grad_norm": 0.45553284883499146, "learning_rate": 1.702804665136396e-05, "loss": 0.3985, "step": 13570 }, { "epoch": 0.25196511252288145, "grad_norm": 0.2838556468486786, "learning_rate": 1.7027216782062592e-05, "loss": 0.3218, "step": 13572 }, { "epoch": 0.2520022426603001, "grad_norm": 0.32408303022384644, "learning_rate": 1.7026386817141854e-05, "loss": 0.2301, "step": 13574 }, { "epoch": 0.2520393727977187, "grad_norm": 0.751385509967804, "learning_rate": 1.7025556756613042e-05, "loss": 0.3233, "step": 13576 }, { "epoch": 0.25207650293513734, "grad_norm": 0.4387381672859192, "learning_rate": 1.702472660048744e-05, "loss": 0.351, "step": 13578 }, { "epoch": 0.252113633072556, "grad_norm": 0.37481051683425903, "learning_rate": 1.7023896348776357e-05, "loss": 0.4684, "step": 13580 }, { "epoch": 0.25215076320997465, "grad_norm": 0.23695437610149384, "learning_rate": 1.702306600149108e-05, "loss": 0.3087, "step": 13582 }, { "epoch": 0.2521878933473933, "grad_norm": 0.5026005506515503, "learning_rate": 1.702223555864291e-05, "loss": 0.3798, "step": 13584 }, { "epoch": 0.2522250234848119, "grad_norm": 0.6652505397796631, "learning_rate": 1.7021405020243152e-05, "loss": 0.3381, "step": 13586 }, { "epoch": 0.25226215362223053, "grad_norm": 0.378642737865448, "learning_rate": 1.70205743863031e-05, "loss": 0.5165, "step": 13588 }, { "epoch": 0.2522992837596492, "grad_norm": 0.36895811557769775, "learning_rate": 1.701974365683406e-05, "loss": 0.4132, "step": 13590 }, { "epoch": 0.25233641389706785, "grad_norm": 0.2597840428352356, "learning_rate": 1.7018912831847337e-05, "loss": 0.4467, "step": 13592 }, { "epoch": 0.2523735440344865, "grad_norm": 0.3972187340259552, "learning_rate": 1.701808191135423e-05, "loss": 0.2537, "step": 13594 }, { "epoch": 0.2524106741719051, "grad_norm": 0.4723551869392395, "learning_rate": 1.7017250895366054e-05, "loss": 0.2146, "step": 13596 }, { "epoch": 0.25244780430932373, "grad_norm": 0.4637466371059418, "learning_rate": 1.7016419783894112e-05, "loss": 0.5138, "step": 13598 }, { "epoch": 0.2524849344467424, "grad_norm": 0.42479127645492554, "learning_rate": 1.7015588576949713e-05, "loss": 0.3211, "step": 13600 }, { "epoch": 0.25252206458416104, "grad_norm": 0.5597760677337646, "learning_rate": 1.7014757274544165e-05, "loss": 0.3117, "step": 13602 }, { "epoch": 0.25255919472157967, "grad_norm": 0.39463362097740173, "learning_rate": 1.701392587668879e-05, "loss": 0.4728, "step": 13604 }, { "epoch": 0.2525963248589983, "grad_norm": 0.369480162858963, "learning_rate": 1.7013094383394886e-05, "loss": 0.2383, "step": 13606 }, { "epoch": 0.25263345499641693, "grad_norm": 0.4247845411300659, "learning_rate": 1.7012262794673774e-05, "loss": 0.3228, "step": 13608 }, { "epoch": 0.25267058513383556, "grad_norm": 0.27499979734420776, "learning_rate": 1.7011431110536772e-05, "loss": 0.1294, "step": 13610 }, { "epoch": 0.25270771527125424, "grad_norm": 0.35639554262161255, "learning_rate": 1.7010599330995194e-05, "loss": 0.2921, "step": 13612 }, { "epoch": 0.25274484540867287, "grad_norm": 0.45653897523880005, "learning_rate": 1.7009767456060354e-05, "loss": 0.3373, "step": 13614 }, { "epoch": 0.2527819755460915, "grad_norm": 0.42157605290412903, "learning_rate": 1.7008935485743585e-05, "loss": 0.4752, "step": 13616 }, { "epoch": 0.2528191056835101, "grad_norm": 0.3991600573062897, "learning_rate": 1.700810342005619e-05, "loss": 0.3342, "step": 13618 }, { "epoch": 0.25285623582092875, "grad_norm": 0.3936379849910736, "learning_rate": 1.7007271259009502e-05, "loss": 0.2549, "step": 13620 }, { "epoch": 0.25289336595834744, "grad_norm": 0.3804357647895813, "learning_rate": 1.7006439002614842e-05, "loss": 0.3514, "step": 13622 }, { "epoch": 0.25293049609576607, "grad_norm": 0.5249435901641846, "learning_rate": 1.7005606650883534e-05, "loss": 0.2186, "step": 13624 }, { "epoch": 0.2529676262331847, "grad_norm": 0.26637545228004456, "learning_rate": 1.70047742038269e-05, "loss": 0.2712, "step": 13626 }, { "epoch": 0.2530047563706033, "grad_norm": 0.31128400564193726, "learning_rate": 1.7003941661456272e-05, "loss": 0.3893, "step": 13628 }, { "epoch": 0.25304188650802195, "grad_norm": 0.3280087113380432, "learning_rate": 1.700310902378298e-05, "loss": 0.2651, "step": 13630 }, { "epoch": 0.2530790166454406, "grad_norm": 0.24037370085716248, "learning_rate": 1.700227629081835e-05, "loss": 0.2972, "step": 13632 }, { "epoch": 0.25311614678285926, "grad_norm": 0.5142262578010559, "learning_rate": 1.7001443462573716e-05, "loss": 0.4333, "step": 13634 }, { "epoch": 0.2531532769202779, "grad_norm": 0.2996436059474945, "learning_rate": 1.7000610539060406e-05, "loss": 0.2376, "step": 13636 }, { "epoch": 0.2531904070576965, "grad_norm": 0.5064845085144043, "learning_rate": 1.6999777520289754e-05, "loss": 0.2596, "step": 13638 }, { "epoch": 0.25322753719511515, "grad_norm": 0.31223976612091064, "learning_rate": 1.69989444062731e-05, "loss": 0.2104, "step": 13640 }, { "epoch": 0.2532646673325338, "grad_norm": 0.46125105023384094, "learning_rate": 1.699811119702177e-05, "loss": 0.5057, "step": 13642 }, { "epoch": 0.25330179746995246, "grad_norm": 0.38701242208480835, "learning_rate": 1.699727789254712e-05, "loss": 0.3401, "step": 13644 }, { "epoch": 0.2533389276073711, "grad_norm": 0.27538976073265076, "learning_rate": 1.6996444492860468e-05, "loss": 0.3991, "step": 13646 }, { "epoch": 0.2533760577447897, "grad_norm": 0.30591198801994324, "learning_rate": 1.6995610997973166e-05, "loss": 0.3411, "step": 13648 }, { "epoch": 0.25341318788220835, "grad_norm": 0.39603808522224426, "learning_rate": 1.699477740789655e-05, "loss": 0.2415, "step": 13650 }, { "epoch": 0.253450318019627, "grad_norm": 0.3029903173446655, "learning_rate": 1.699394372264197e-05, "loss": 0.2939, "step": 13652 }, { "epoch": 0.2534874481570456, "grad_norm": 0.32516413927078247, "learning_rate": 1.699310994222076e-05, "loss": 0.1726, "step": 13654 }, { "epoch": 0.2535245782944643, "grad_norm": 1.33535635471344, "learning_rate": 1.699227606664428e-05, "loss": 0.2552, "step": 13656 }, { "epoch": 0.2535617084318829, "grad_norm": 0.3027133047580719, "learning_rate": 1.6991442095923855e-05, "loss": 0.2955, "step": 13658 }, { "epoch": 0.25359883856930154, "grad_norm": 0.3311522603034973, "learning_rate": 1.6990608030070854e-05, "loss": 0.3306, "step": 13660 }, { "epoch": 0.25363596870672017, "grad_norm": 0.3005430996417999, "learning_rate": 1.698977386909661e-05, "loss": 0.4855, "step": 13662 }, { "epoch": 0.2536730988441388, "grad_norm": 0.3486959636211395, "learning_rate": 1.6988939613012485e-05, "loss": 0.3124, "step": 13664 }, { "epoch": 0.2537102289815575, "grad_norm": 0.42648744583129883, "learning_rate": 1.6988105261829825e-05, "loss": 0.3011, "step": 13666 }, { "epoch": 0.2537473591189761, "grad_norm": 0.46215444803237915, "learning_rate": 1.6987270815559985e-05, "loss": 0.349, "step": 13668 }, { "epoch": 0.25378448925639474, "grad_norm": 0.3062231242656708, "learning_rate": 1.698643627421432e-05, "loss": 0.413, "step": 13670 }, { "epoch": 0.25382161939381337, "grad_norm": 0.4943634569644928, "learning_rate": 1.698560163780418e-05, "loss": 0.2292, "step": 13672 }, { "epoch": 0.253858749531232, "grad_norm": 0.4232478141784668, "learning_rate": 1.6984766906340924e-05, "loss": 0.4758, "step": 13674 }, { "epoch": 0.2538958796686507, "grad_norm": 0.4092228412628174, "learning_rate": 1.6983932079835917e-05, "loss": 0.1964, "step": 13676 }, { "epoch": 0.2539330098060693, "grad_norm": 0.34870395064353943, "learning_rate": 1.6983097158300516e-05, "loss": 0.1751, "step": 13678 }, { "epoch": 0.25397013994348794, "grad_norm": 0.8198285698890686, "learning_rate": 1.6982262141746076e-05, "loss": 0.2075, "step": 13680 }, { "epoch": 0.25400727008090657, "grad_norm": 0.4108404219150543, "learning_rate": 1.6981427030183962e-05, "loss": 0.2724, "step": 13682 }, { "epoch": 0.2540444002183252, "grad_norm": 0.4889931082725525, "learning_rate": 1.6980591823625537e-05, "loss": 0.3884, "step": 13684 }, { "epoch": 0.2540815303557438, "grad_norm": 0.5948621034622192, "learning_rate": 1.697975652208217e-05, "loss": 0.351, "step": 13686 }, { "epoch": 0.2541186604931625, "grad_norm": 0.34527528285980225, "learning_rate": 1.697892112556522e-05, "loss": 0.4693, "step": 13688 }, { "epoch": 0.25415579063058114, "grad_norm": 0.5897166132926941, "learning_rate": 1.697808563408606e-05, "loss": 0.2551, "step": 13690 }, { "epoch": 0.25419292076799976, "grad_norm": 0.3086923360824585, "learning_rate": 1.6977250047656056e-05, "loss": 0.2136, "step": 13692 }, { "epoch": 0.2542300509054184, "grad_norm": 0.4336811304092407, "learning_rate": 1.6976414366286578e-05, "loss": 0.2543, "step": 13694 }, { "epoch": 0.254267181042837, "grad_norm": 0.3661821782588959, "learning_rate": 1.6975578589989e-05, "loss": 0.4478, "step": 13696 }, { "epoch": 0.2543043111802557, "grad_norm": 0.2914643883705139, "learning_rate": 1.6974742718774687e-05, "loss": 0.2323, "step": 13698 }, { "epoch": 0.25434144131767433, "grad_norm": 0.4297862946987152, "learning_rate": 1.697390675265502e-05, "loss": 0.2568, "step": 13700 }, { "epoch": 0.25437857145509296, "grad_norm": 0.3211735785007477, "learning_rate": 1.697307069164137e-05, "loss": 0.3877, "step": 13702 }, { "epoch": 0.2544157015925116, "grad_norm": 0.45898211002349854, "learning_rate": 1.6972234535745117e-05, "loss": 0.1843, "step": 13704 }, { "epoch": 0.2544528317299302, "grad_norm": 0.6417859196662903, "learning_rate": 1.6971398284977634e-05, "loss": 0.3583, "step": 13706 }, { "epoch": 0.25448996186734885, "grad_norm": 0.42082932591438293, "learning_rate": 1.6970561939350302e-05, "loss": 0.3105, "step": 13708 }, { "epoch": 0.25452709200476753, "grad_norm": 0.2992493212223053, "learning_rate": 1.69697254988745e-05, "loss": 0.2017, "step": 13710 }, { "epoch": 0.25456422214218616, "grad_norm": 0.32489416003227234, "learning_rate": 1.6968888963561614e-05, "loss": 0.1897, "step": 13712 }, { "epoch": 0.2546013522796048, "grad_norm": 0.43848279118537903, "learning_rate": 1.6968052333423023e-05, "loss": 0.5144, "step": 13714 }, { "epoch": 0.2546384824170234, "grad_norm": 0.33223775029182434, "learning_rate": 1.696721560847011e-05, "loss": 0.3641, "step": 13716 }, { "epoch": 0.25467561255444204, "grad_norm": 0.2649035155773163, "learning_rate": 1.6966378788714262e-05, "loss": 0.2365, "step": 13718 }, { "epoch": 0.2547127426918607, "grad_norm": 0.36433765292167664, "learning_rate": 1.6965541874166866e-05, "loss": 0.2299, "step": 13720 }, { "epoch": 0.25474987282927936, "grad_norm": 0.3439432680606842, "learning_rate": 1.696470486483931e-05, "loss": 0.2747, "step": 13722 }, { "epoch": 0.254787002966698, "grad_norm": 0.37267714738845825, "learning_rate": 1.696386776074298e-05, "loss": 0.3405, "step": 13724 }, { "epoch": 0.2548241331041166, "grad_norm": 0.33308616280555725, "learning_rate": 1.696303056188927e-05, "loss": 0.3022, "step": 13726 }, { "epoch": 0.25486126324153524, "grad_norm": 0.38547685742378235, "learning_rate": 1.696219326828957e-05, "loss": 0.5033, "step": 13728 }, { "epoch": 0.25489839337895387, "grad_norm": 0.2877867817878723, "learning_rate": 1.696135587995528e-05, "loss": 0.1859, "step": 13730 }, { "epoch": 0.25493552351637255, "grad_norm": 0.5539423823356628, "learning_rate": 1.696051839689778e-05, "loss": 0.1841, "step": 13732 }, { "epoch": 0.2549726536537912, "grad_norm": 0.3133772611618042, "learning_rate": 1.695968081912848e-05, "loss": 0.3078, "step": 13734 }, { "epoch": 0.2550097837912098, "grad_norm": 0.40221840143203735, "learning_rate": 1.6958843146658766e-05, "loss": 0.299, "step": 13736 }, { "epoch": 0.25504691392862844, "grad_norm": 0.35547107458114624, "learning_rate": 1.695800537950004e-05, "loss": 0.2233, "step": 13738 }, { "epoch": 0.25508404406604707, "grad_norm": 0.3636387586593628, "learning_rate": 1.6957167517663705e-05, "loss": 0.365, "step": 13740 }, { "epoch": 0.25512117420346575, "grad_norm": 0.3263877034187317, "learning_rate": 1.695632956116116e-05, "loss": 0.4536, "step": 13742 }, { "epoch": 0.2551583043408844, "grad_norm": 0.257674902677536, "learning_rate": 1.6955491510003805e-05, "loss": 0.281, "step": 13744 }, { "epoch": 0.255195434478303, "grad_norm": 0.38836032152175903, "learning_rate": 1.6954653364203046e-05, "loss": 0.3071, "step": 13746 }, { "epoch": 0.25523256461572164, "grad_norm": 0.5239485502243042, "learning_rate": 1.6953815123770285e-05, "loss": 0.3251, "step": 13748 }, { "epoch": 0.25526969475314026, "grad_norm": 0.34689855575561523, "learning_rate": 1.695297678871693e-05, "loss": 0.5234, "step": 13750 }, { "epoch": 0.25530682489055895, "grad_norm": 0.30790990591049194, "learning_rate": 1.6952138359054387e-05, "loss": 0.2019, "step": 13752 }, { "epoch": 0.2553439550279776, "grad_norm": 0.8268873691558838, "learning_rate": 1.6951299834794065e-05, "loss": 0.2557, "step": 13754 }, { "epoch": 0.2553810851653962, "grad_norm": 0.3010076880455017, "learning_rate": 1.6950461215947377e-05, "loss": 0.2623, "step": 13756 }, { "epoch": 0.25541821530281483, "grad_norm": 0.27561119198799133, "learning_rate": 1.6949622502525723e-05, "loss": 0.286, "step": 13758 }, { "epoch": 0.25545534544023346, "grad_norm": 0.4324747323989868, "learning_rate": 1.6948783694540528e-05, "loss": 0.5414, "step": 13760 }, { "epoch": 0.2554924755776521, "grad_norm": 0.5337046980857849, "learning_rate": 1.6947944792003203e-05, "loss": 0.2074, "step": 13762 }, { "epoch": 0.2555296057150708, "grad_norm": 0.30530399084091187, "learning_rate": 1.694710579492516e-05, "loss": 0.4649, "step": 13764 }, { "epoch": 0.2555667358524894, "grad_norm": 0.3715148866176605, "learning_rate": 1.6946266703317817e-05, "loss": 0.2815, "step": 13766 }, { "epoch": 0.25560386598990803, "grad_norm": 0.3819938898086548, "learning_rate": 1.6945427517192588e-05, "loss": 0.3095, "step": 13768 }, { "epoch": 0.25564099612732666, "grad_norm": 0.38834378123283386, "learning_rate": 1.6944588236560894e-05, "loss": 0.1813, "step": 13770 }, { "epoch": 0.2556781262647453, "grad_norm": 0.40062519907951355, "learning_rate": 1.6943748861434157e-05, "loss": 0.4839, "step": 13772 }, { "epoch": 0.25571525640216397, "grad_norm": 0.4693989157676697, "learning_rate": 1.6942909391823795e-05, "loss": 0.4739, "step": 13774 }, { "epoch": 0.2557523865395826, "grad_norm": 0.31688180565834045, "learning_rate": 1.6942069827741233e-05, "loss": 0.2734, "step": 13776 }, { "epoch": 0.2557895166770012, "grad_norm": 0.32606568932533264, "learning_rate": 1.6941230169197896e-05, "loss": 0.4397, "step": 13778 }, { "epoch": 0.25582664681441986, "grad_norm": 0.29052188992500305, "learning_rate": 1.694039041620521e-05, "loss": 0.3016, "step": 13780 }, { "epoch": 0.2558637769518385, "grad_norm": 0.3872697949409485, "learning_rate": 1.6939550568774597e-05, "loss": 0.2658, "step": 13782 }, { "epoch": 0.2559009070892571, "grad_norm": 0.2635507583618164, "learning_rate": 1.6938710626917485e-05, "loss": 0.2144, "step": 13784 }, { "epoch": 0.2559380372266758, "grad_norm": 0.31077760457992554, "learning_rate": 1.693787059064531e-05, "loss": 0.2891, "step": 13786 }, { "epoch": 0.2559751673640944, "grad_norm": 0.32194823026657104, "learning_rate": 1.6937030459969495e-05, "loss": 0.3408, "step": 13788 }, { "epoch": 0.25601229750151305, "grad_norm": 0.36904555559158325, "learning_rate": 1.6936190234901472e-05, "loss": 0.4703, "step": 13790 }, { "epoch": 0.2560494276389317, "grad_norm": 0.2540688216686249, "learning_rate": 1.693534991545268e-05, "loss": 0.413, "step": 13792 }, { "epoch": 0.2560865577763503, "grad_norm": 0.35131198167800903, "learning_rate": 1.6934509501634548e-05, "loss": 0.2003, "step": 13794 }, { "epoch": 0.256123687913769, "grad_norm": 0.3865675628185272, "learning_rate": 1.6933668993458512e-05, "loss": 0.3688, "step": 13796 }, { "epoch": 0.2561608180511876, "grad_norm": 0.2915065586566925, "learning_rate": 1.693282839093601e-05, "loss": 0.4057, "step": 13798 }, { "epoch": 0.25619794818860625, "grad_norm": 0.3600832223892212, "learning_rate": 1.693198769407848e-05, "loss": 0.533, "step": 13800 }, { "epoch": 0.2562350783260249, "grad_norm": 0.42910027503967285, "learning_rate": 1.6931146902897362e-05, "loss": 0.1325, "step": 13802 }, { "epoch": 0.2562722084634435, "grad_norm": 0.32095006108283997, "learning_rate": 1.6930306017404097e-05, "loss": 0.3628, "step": 13804 }, { "epoch": 0.25630933860086214, "grad_norm": 0.3400716185569763, "learning_rate": 1.692946503761012e-05, "loss": 0.1498, "step": 13806 }, { "epoch": 0.2563464687382808, "grad_norm": 0.37268680334091187, "learning_rate": 1.6928623963526888e-05, "loss": 0.3984, "step": 13808 }, { "epoch": 0.25638359887569945, "grad_norm": 0.3930261433124542, "learning_rate": 1.6927782795165835e-05, "loss": 0.2939, "step": 13810 }, { "epoch": 0.2564207290131181, "grad_norm": 0.3283863961696625, "learning_rate": 1.6926941532538407e-05, "loss": 0.1214, "step": 13812 }, { "epoch": 0.2564578591505367, "grad_norm": 0.3679408133029938, "learning_rate": 1.6926100175656053e-05, "loss": 0.4315, "step": 13814 }, { "epoch": 0.25649498928795533, "grad_norm": 0.4393298029899597, "learning_rate": 1.692525872453022e-05, "loss": 0.2682, "step": 13816 }, { "epoch": 0.256532119425374, "grad_norm": 0.4153956174850464, "learning_rate": 1.6924417179172364e-05, "loss": 0.2776, "step": 13818 }, { "epoch": 0.25656924956279265, "grad_norm": 0.3554937541484833, "learning_rate": 1.692357553959393e-05, "loss": 0.3864, "step": 13820 }, { "epoch": 0.2566063797002113, "grad_norm": 0.3652535080909729, "learning_rate": 1.692273380580637e-05, "loss": 0.2932, "step": 13822 }, { "epoch": 0.2566435098376299, "grad_norm": 0.4037335216999054, "learning_rate": 1.692189197782114e-05, "loss": 0.5484, "step": 13824 }, { "epoch": 0.25668063997504853, "grad_norm": 0.25311097502708435, "learning_rate": 1.692105005564969e-05, "loss": 0.2388, "step": 13826 }, { "epoch": 0.25671777011246716, "grad_norm": 0.2720162868499756, "learning_rate": 1.6920208039303484e-05, "loss": 0.1609, "step": 13828 }, { "epoch": 0.25675490024988584, "grad_norm": 0.28366848826408386, "learning_rate": 1.691936592879398e-05, "loss": 0.3137, "step": 13830 }, { "epoch": 0.25679203038730447, "grad_norm": 0.5329599976539612, "learning_rate": 1.691852372413262e-05, "loss": 0.3553, "step": 13832 }, { "epoch": 0.2568291605247231, "grad_norm": 0.4084221422672272, "learning_rate": 1.6917681425330883e-05, "loss": 0.2966, "step": 13834 }, { "epoch": 0.2568662906621417, "grad_norm": 0.38366132974624634, "learning_rate": 1.6916839032400224e-05, "loss": 0.34, "step": 13836 }, { "epoch": 0.25690342079956036, "grad_norm": 0.25355449318885803, "learning_rate": 1.6915996545352098e-05, "loss": 0.4101, "step": 13838 }, { "epoch": 0.25694055093697904, "grad_norm": 0.4541107416152954, "learning_rate": 1.691515396419798e-05, "loss": 0.4866, "step": 13840 }, { "epoch": 0.25697768107439767, "grad_norm": 0.46720975637435913, "learning_rate": 1.6914311288949327e-05, "loss": 0.3382, "step": 13842 }, { "epoch": 0.2570148112118163, "grad_norm": 0.36868152022361755, "learning_rate": 1.6913468519617605e-05, "loss": 0.405, "step": 13844 }, { "epoch": 0.2570519413492349, "grad_norm": 0.32116058468818665, "learning_rate": 1.691262565621429e-05, "loss": 0.3789, "step": 13846 }, { "epoch": 0.25708907148665355, "grad_norm": 0.3383767604827881, "learning_rate": 1.6911782698750845e-05, "loss": 0.2281, "step": 13848 }, { "epoch": 0.25712620162407224, "grad_norm": 0.38744455575942993, "learning_rate": 1.6910939647238737e-05, "loss": 0.2009, "step": 13850 }, { "epoch": 0.25716333176149087, "grad_norm": 0.3801097869873047, "learning_rate": 1.6910096501689444e-05, "loss": 0.4077, "step": 13852 }, { "epoch": 0.2572004618989095, "grad_norm": 0.21515339612960815, "learning_rate": 1.690925326211444e-05, "loss": 0.3105, "step": 13854 }, { "epoch": 0.2572375920363281, "grad_norm": 0.35749539732933044, "learning_rate": 1.6908409928525188e-05, "loss": 0.3635, "step": 13856 }, { "epoch": 0.25727472217374675, "grad_norm": 0.4581398665904999, "learning_rate": 1.690756650093317e-05, "loss": 0.4339, "step": 13858 }, { "epoch": 0.2573118523111654, "grad_norm": 0.6094984412193298, "learning_rate": 1.6906722979349863e-05, "loss": 0.2473, "step": 13860 }, { "epoch": 0.25734898244858406, "grad_norm": 0.46877989172935486, "learning_rate": 1.690587936378675e-05, "loss": 0.3005, "step": 13862 }, { "epoch": 0.2573861125860027, "grad_norm": 0.36369916796684265, "learning_rate": 1.6905035654255296e-05, "loss": 0.3536, "step": 13864 }, { "epoch": 0.2574232427234213, "grad_norm": 0.40528199076652527, "learning_rate": 1.6904191850766996e-05, "loss": 0.2358, "step": 13866 }, { "epoch": 0.25746037286083995, "grad_norm": 0.2735990881919861, "learning_rate": 1.6903347953333322e-05, "loss": 0.2734, "step": 13868 }, { "epoch": 0.2574975029982586, "grad_norm": 0.5643244385719299, "learning_rate": 1.6902503961965765e-05, "loss": 0.3127, "step": 13870 }, { "epoch": 0.25753463313567726, "grad_norm": 0.4487912356853485, "learning_rate": 1.69016598766758e-05, "loss": 0.2847, "step": 13872 }, { "epoch": 0.2575717632730959, "grad_norm": 0.466102659702301, "learning_rate": 1.690081569747492e-05, "loss": 0.2779, "step": 13874 }, { "epoch": 0.2576088934105145, "grad_norm": 0.28952568769454956, "learning_rate": 1.68999714243746e-05, "loss": 0.3667, "step": 13876 }, { "epoch": 0.25764602354793315, "grad_norm": 0.4669014811515808, "learning_rate": 1.6899127057386346e-05, "loss": 0.281, "step": 13878 }, { "epoch": 0.2576831536853518, "grad_norm": 0.45572248101234436, "learning_rate": 1.6898282596521636e-05, "loss": 0.248, "step": 13880 }, { "epoch": 0.2577202838227704, "grad_norm": 0.37069517374038696, "learning_rate": 1.689743804179196e-05, "loss": 0.3705, "step": 13882 }, { "epoch": 0.2577574139601891, "grad_norm": 0.4735618531703949, "learning_rate": 1.6896593393208816e-05, "loss": 0.3593, "step": 13884 }, { "epoch": 0.2577945440976077, "grad_norm": 0.6046683192253113, "learning_rate": 1.6895748650783693e-05, "loss": 0.4249, "step": 13886 }, { "epoch": 0.25783167423502634, "grad_norm": 0.3490101099014282, "learning_rate": 1.6894903814528083e-05, "loss": 0.2407, "step": 13888 }, { "epoch": 0.25786880437244497, "grad_norm": 0.32474836707115173, "learning_rate": 1.6894058884453487e-05, "loss": 0.4068, "step": 13890 }, { "epoch": 0.2579059345098636, "grad_norm": 0.27498355507850647, "learning_rate": 1.6893213860571402e-05, "loss": 0.3269, "step": 13892 }, { "epoch": 0.2579430646472823, "grad_norm": 0.34867867827415466, "learning_rate": 1.689236874289332e-05, "loss": 0.1692, "step": 13894 }, { "epoch": 0.2579801947847009, "grad_norm": 0.37561291456222534, "learning_rate": 1.6891523531430743e-05, "loss": 0.2597, "step": 13896 }, { "epoch": 0.25801732492211954, "grad_norm": 0.293508380651474, "learning_rate": 1.689067822619518e-05, "loss": 0.2276, "step": 13898 }, { "epoch": 0.25805445505953817, "grad_norm": 0.39469292759895325, "learning_rate": 1.6889832827198122e-05, "loss": 0.3717, "step": 13900 }, { "epoch": 0.2580915851969568, "grad_norm": 0.33319106698036194, "learning_rate": 1.6888987334451076e-05, "loss": 0.3414, "step": 13902 }, { "epoch": 0.2581287153343754, "grad_norm": 0.270370751619339, "learning_rate": 1.6888141747965546e-05, "loss": 0.3507, "step": 13904 }, { "epoch": 0.2581658454717941, "grad_norm": 0.46120569109916687, "learning_rate": 1.6887296067753043e-05, "loss": 0.3988, "step": 13906 }, { "epoch": 0.25820297560921274, "grad_norm": 0.26902490854263306, "learning_rate": 1.688645029382507e-05, "loss": 0.2644, "step": 13908 }, { "epoch": 0.25824010574663137, "grad_norm": 0.48291024565696716, "learning_rate": 1.6885604426193132e-05, "loss": 0.5962, "step": 13910 }, { "epoch": 0.25827723588405, "grad_norm": 0.44153958559036255, "learning_rate": 1.688475846486875e-05, "loss": 0.2469, "step": 13912 }, { "epoch": 0.2583143660214686, "grad_norm": 0.30487239360809326, "learning_rate": 1.688391240986342e-05, "loss": 0.4363, "step": 13914 }, { "epoch": 0.2583514961588873, "grad_norm": 0.29749616980552673, "learning_rate": 1.6883066261188665e-05, "loss": 0.4633, "step": 13916 }, { "epoch": 0.25838862629630593, "grad_norm": 0.4365514814853668, "learning_rate": 1.6882220018855994e-05, "loss": 0.3762, "step": 13918 }, { "epoch": 0.25842575643372456, "grad_norm": 0.3300270736217499, "learning_rate": 1.6881373682876925e-05, "loss": 0.2996, "step": 13920 }, { "epoch": 0.2584628865711432, "grad_norm": 0.4362311065196991, "learning_rate": 1.688052725326297e-05, "loss": 0.3393, "step": 13922 }, { "epoch": 0.2585000167085618, "grad_norm": 0.28232109546661377, "learning_rate": 1.687968073002565e-05, "loss": 0.2331, "step": 13924 }, { "epoch": 0.2585371468459805, "grad_norm": 0.23232927918434143, "learning_rate": 1.6878834113176485e-05, "loss": 0.2472, "step": 13926 }, { "epoch": 0.25857427698339913, "grad_norm": 0.35865363478660583, "learning_rate": 1.6877987402726986e-05, "loss": 0.2303, "step": 13928 }, { "epoch": 0.25861140712081776, "grad_norm": 0.36527344584465027, "learning_rate": 1.6877140598688685e-05, "loss": 0.3992, "step": 13930 }, { "epoch": 0.2586485372582364, "grad_norm": 0.44925445318222046, "learning_rate": 1.68762937010731e-05, "loss": 0.4077, "step": 13932 }, { "epoch": 0.258685667395655, "grad_norm": 0.8289952874183655, "learning_rate": 1.687544670989175e-05, "loss": 0.4337, "step": 13934 }, { "epoch": 0.25872279753307365, "grad_norm": 0.25468018651008606, "learning_rate": 1.687459962515617e-05, "loss": 0.5259, "step": 13936 }, { "epoch": 0.25875992767049233, "grad_norm": 0.490289568901062, "learning_rate": 1.687375244687788e-05, "loss": 0.375, "step": 13938 }, { "epoch": 0.25879705780791096, "grad_norm": 0.5866946578025818, "learning_rate": 1.6872905175068404e-05, "loss": 0.3447, "step": 13940 }, { "epoch": 0.2588341879453296, "grad_norm": 0.32367733120918274, "learning_rate": 1.6872057809739286e-05, "loss": 0.5514, "step": 13942 }, { "epoch": 0.2588713180827482, "grad_norm": 0.3692754805088043, "learning_rate": 1.6871210350902036e-05, "loss": 0.3264, "step": 13944 }, { "epoch": 0.25890844822016684, "grad_norm": 0.38057973980903625, "learning_rate": 1.6870362798568195e-05, "loss": 0.3451, "step": 13946 }, { "epoch": 0.2589455783575855, "grad_norm": 0.3000034689903259, "learning_rate": 1.6869515152749296e-05, "loss": 0.4575, "step": 13948 }, { "epoch": 0.25898270849500415, "grad_norm": 0.49746036529541016, "learning_rate": 1.6868667413456873e-05, "loss": 0.3326, "step": 13950 }, { "epoch": 0.2590198386324228, "grad_norm": 0.42583170533180237, "learning_rate": 1.686781958070246e-05, "loss": 0.3672, "step": 13952 }, { "epoch": 0.2590569687698414, "grad_norm": 0.35490143299102783, "learning_rate": 1.68669716544976e-05, "loss": 0.3543, "step": 13954 }, { "epoch": 0.25909409890726004, "grad_norm": 0.3476410508155823, "learning_rate": 1.6866123634853817e-05, "loss": 0.3369, "step": 13956 }, { "epoch": 0.25913122904467867, "grad_norm": 0.2687476873397827, "learning_rate": 1.6865275521782664e-05, "loss": 0.3628, "step": 13958 }, { "epoch": 0.25916835918209735, "grad_norm": 0.39296820759773254, "learning_rate": 1.686442731529567e-05, "loss": 0.2117, "step": 13960 }, { "epoch": 0.259205489319516, "grad_norm": 0.41421154141426086, "learning_rate": 1.686357901540438e-05, "loss": 0.2982, "step": 13962 }, { "epoch": 0.2592426194569346, "grad_norm": 0.4272485077381134, "learning_rate": 1.6862730622120345e-05, "loss": 0.4008, "step": 13964 }, { "epoch": 0.25927974959435324, "grad_norm": 0.5024080276489258, "learning_rate": 1.68618821354551e-05, "loss": 0.3085, "step": 13966 }, { "epoch": 0.25931687973177187, "grad_norm": 0.2880585193634033, "learning_rate": 1.6861033555420192e-05, "loss": 0.3054, "step": 13968 }, { "epoch": 0.25935400986919055, "grad_norm": 0.6252123713493347, "learning_rate": 1.686018488202717e-05, "loss": 0.3742, "step": 13970 }, { "epoch": 0.2593911400066092, "grad_norm": 0.2947867214679718, "learning_rate": 1.6859336115287576e-05, "loss": 0.2913, "step": 13972 }, { "epoch": 0.2594282701440278, "grad_norm": 0.45847341418266296, "learning_rate": 1.6858487255212967e-05, "loss": 0.4885, "step": 13974 }, { "epoch": 0.25946540028144643, "grad_norm": 0.7742215394973755, "learning_rate": 1.6857638301814888e-05, "loss": 0.2056, "step": 13976 }, { "epoch": 0.25950253041886506, "grad_norm": 0.4215402901172638, "learning_rate": 1.6856789255104895e-05, "loss": 0.3459, "step": 13978 }, { "epoch": 0.2595396605562837, "grad_norm": 0.40069687366485596, "learning_rate": 1.685594011509454e-05, "loss": 0.2383, "step": 13980 }, { "epoch": 0.2595767906937024, "grad_norm": 0.3303038775920868, "learning_rate": 1.6855090881795372e-05, "loss": 0.2049, "step": 13982 }, { "epoch": 0.259613920831121, "grad_norm": 0.37788519263267517, "learning_rate": 1.6854241555218954e-05, "loss": 0.3644, "step": 13984 }, { "epoch": 0.25965105096853963, "grad_norm": 0.2660425901412964, "learning_rate": 1.6853392135376836e-05, "loss": 0.1964, "step": 13986 }, { "epoch": 0.25968818110595826, "grad_norm": 0.5146507024765015, "learning_rate": 1.685254262228058e-05, "loss": 0.3741, "step": 13988 }, { "epoch": 0.2597253112433769, "grad_norm": 0.43032974004745483, "learning_rate": 1.6851693015941747e-05, "loss": 0.297, "step": 13990 }, { "epoch": 0.2597624413807956, "grad_norm": 0.3363281786441803, "learning_rate": 1.6850843316371894e-05, "loss": 0.3052, "step": 13992 }, { "epoch": 0.2597995715182142, "grad_norm": 0.4862024188041687, "learning_rate": 1.684999352358258e-05, "loss": 0.5986, "step": 13994 }, { "epoch": 0.25983670165563283, "grad_norm": 0.4416103661060333, "learning_rate": 1.6849143637585378e-05, "loss": 0.2835, "step": 13996 }, { "epoch": 0.25987383179305146, "grad_norm": 0.3888454735279083, "learning_rate": 1.6848293658391846e-05, "loss": 0.3235, "step": 13998 }, { "epoch": 0.2599109619304701, "grad_norm": 0.3635510802268982, "learning_rate": 1.684744358601355e-05, "loss": 0.3073, "step": 14000 }, { "epoch": 0.25994809206788877, "grad_norm": 0.6778346300125122, "learning_rate": 1.6846593420462056e-05, "loss": 0.5485, "step": 14002 }, { "epoch": 0.2599852222053074, "grad_norm": 0.37570440769195557, "learning_rate": 1.6845743161748935e-05, "loss": 0.2946, "step": 14004 }, { "epoch": 0.260022352342726, "grad_norm": 0.21032491326332092, "learning_rate": 1.6844892809885755e-05, "loss": 0.1352, "step": 14006 }, { "epoch": 0.26005948248014465, "grad_norm": 0.7216085195541382, "learning_rate": 1.6844042364884082e-05, "loss": 0.4938, "step": 14008 }, { "epoch": 0.2600966126175633, "grad_norm": 0.34204918146133423, "learning_rate": 1.6843191826755497e-05, "loss": 0.2725, "step": 14010 }, { "epoch": 0.2601337427549819, "grad_norm": 0.3548336625099182, "learning_rate": 1.6842341195511567e-05, "loss": 0.201, "step": 14012 }, { "epoch": 0.2601708728924006, "grad_norm": 0.28081128001213074, "learning_rate": 1.684149047116387e-05, "loss": 0.293, "step": 14014 }, { "epoch": 0.2602080030298192, "grad_norm": 0.3139168620109558, "learning_rate": 1.6840639653723982e-05, "loss": 0.3792, "step": 14016 }, { "epoch": 0.26024513316723785, "grad_norm": 0.42086249589920044, "learning_rate": 1.6839788743203477e-05, "loss": 0.3359, "step": 14018 }, { "epoch": 0.2602822633046565, "grad_norm": 0.4165078103542328, "learning_rate": 1.6838937739613933e-05, "loss": 0.1393, "step": 14020 }, { "epoch": 0.2603193934420751, "grad_norm": 0.37590858340263367, "learning_rate": 1.6838086642966934e-05, "loss": 0.2781, "step": 14022 }, { "epoch": 0.2603565235794938, "grad_norm": 0.3450084626674652, "learning_rate": 1.6837235453274057e-05, "loss": 0.3789, "step": 14024 }, { "epoch": 0.2603936537169124, "grad_norm": 0.36544740200042725, "learning_rate": 1.6836384170546885e-05, "loss": 0.2555, "step": 14026 }, { "epoch": 0.26043078385433105, "grad_norm": 0.4790702760219574, "learning_rate": 1.6835532794797e-05, "loss": 0.2544, "step": 14028 }, { "epoch": 0.2604679139917497, "grad_norm": 0.4462150037288666, "learning_rate": 1.683468132603599e-05, "loss": 0.3495, "step": 14030 }, { "epoch": 0.2605050441291683, "grad_norm": 0.30679255723953247, "learning_rate": 1.6833829764275444e-05, "loss": 0.4361, "step": 14032 }, { "epoch": 0.26054217426658693, "grad_norm": 0.29472529888153076, "learning_rate": 1.6832978109526936e-05, "loss": 0.2071, "step": 14034 }, { "epoch": 0.2605793044040056, "grad_norm": 0.3187980055809021, "learning_rate": 1.683212636180207e-05, "loss": 0.358, "step": 14036 }, { "epoch": 0.26061643454142425, "grad_norm": 0.38976263999938965, "learning_rate": 1.6831274521112428e-05, "loss": 0.2956, "step": 14038 }, { "epoch": 0.2606535646788429, "grad_norm": 0.40875038504600525, "learning_rate": 1.6830422587469595e-05, "loss": 0.3406, "step": 14040 }, { "epoch": 0.2606906948162615, "grad_norm": 0.5932621359825134, "learning_rate": 1.6829570560885177e-05, "loss": 0.4226, "step": 14042 }, { "epoch": 0.26072782495368013, "grad_norm": 0.38781848549842834, "learning_rate": 1.682871844137076e-05, "loss": 0.2172, "step": 14044 }, { "epoch": 0.2607649550910988, "grad_norm": 0.35778090357780457, "learning_rate": 1.6827866228937938e-05, "loss": 0.2958, "step": 14046 }, { "epoch": 0.26080208522851744, "grad_norm": 0.46684181690216064, "learning_rate": 1.6827013923598307e-05, "loss": 0.3022, "step": 14048 }, { "epoch": 0.2608392153659361, "grad_norm": 0.2739340364933014, "learning_rate": 1.6826161525363466e-05, "loss": 0.1812, "step": 14050 }, { "epoch": 0.2608763455033547, "grad_norm": 0.2747492790222168, "learning_rate": 1.6825309034245018e-05, "loss": 0.2855, "step": 14052 }, { "epoch": 0.26091347564077333, "grad_norm": 0.38275817036628723, "learning_rate": 1.6824456450254554e-05, "loss": 0.3022, "step": 14054 }, { "epoch": 0.26095060577819196, "grad_norm": 0.2889985740184784, "learning_rate": 1.682360377340368e-05, "loss": 0.4276, "step": 14056 }, { "epoch": 0.26098773591561064, "grad_norm": 0.37787890434265137, "learning_rate": 1.6822751003703997e-05, "loss": 0.2897, "step": 14058 }, { "epoch": 0.26102486605302927, "grad_norm": 0.2764505445957184, "learning_rate": 1.682189814116711e-05, "loss": 0.2386, "step": 14060 }, { "epoch": 0.2610619961904479, "grad_norm": 0.4211701452732086, "learning_rate": 1.6821045185804626e-05, "loss": 0.3558, "step": 14062 }, { "epoch": 0.2610991263278665, "grad_norm": 0.31557416915893555, "learning_rate": 1.6820192137628143e-05, "loss": 0.3684, "step": 14064 }, { "epoch": 0.26113625646528515, "grad_norm": 0.4372217655181885, "learning_rate": 1.6819338996649277e-05, "loss": 0.4507, "step": 14066 }, { "epoch": 0.26117338660270384, "grad_norm": 0.3574519753456116, "learning_rate": 1.681848576287963e-05, "loss": 0.2681, "step": 14068 }, { "epoch": 0.26121051674012247, "grad_norm": 0.36359620094299316, "learning_rate": 1.6817632436330826e-05, "loss": 0.4094, "step": 14070 }, { "epoch": 0.2612476468775411, "grad_norm": 0.37492144107818604, "learning_rate": 1.6816779017014456e-05, "loss": 0.1873, "step": 14072 }, { "epoch": 0.2612847770149597, "grad_norm": 0.33945581316947937, "learning_rate": 1.681592550494214e-05, "loss": 0.3307, "step": 14074 }, { "epoch": 0.26132190715237835, "grad_norm": 0.5761824250221252, "learning_rate": 1.6815071900125503e-05, "loss": 0.3925, "step": 14076 }, { "epoch": 0.26135903728979704, "grad_norm": 0.3409815728664398, "learning_rate": 1.6814218202576147e-05, "loss": 0.3691, "step": 14078 }, { "epoch": 0.26139616742721566, "grad_norm": 0.4060782194137573, "learning_rate": 1.681336441230569e-05, "loss": 0.3262, "step": 14080 }, { "epoch": 0.2614332975646343, "grad_norm": 0.2681616246700287, "learning_rate": 1.6812510529325755e-05, "loss": 0.3355, "step": 14082 }, { "epoch": 0.2614704277020529, "grad_norm": 0.39515119791030884, "learning_rate": 1.6811656553647955e-05, "loss": 0.3281, "step": 14084 }, { "epoch": 0.26150755783947155, "grad_norm": 0.2709459364414215, "learning_rate": 1.6810802485283916e-05, "loss": 0.184, "step": 14086 }, { "epoch": 0.2615446879768902, "grad_norm": 0.28708288073539734, "learning_rate": 1.680994832424525e-05, "loss": 0.3242, "step": 14088 }, { "epoch": 0.26158181811430886, "grad_norm": 0.3695259988307953, "learning_rate": 1.680909407054359e-05, "loss": 0.2836, "step": 14090 }, { "epoch": 0.2616189482517275, "grad_norm": 0.30495524406433105, "learning_rate": 1.6808239724190554e-05, "loss": 0.1805, "step": 14092 }, { "epoch": 0.2616560783891461, "grad_norm": 0.3366011381149292, "learning_rate": 1.680738528519777e-05, "loss": 0.1888, "step": 14094 }, { "epoch": 0.26169320852656475, "grad_norm": 0.3887014091014862, "learning_rate": 1.6806530753576862e-05, "loss": 0.3566, "step": 14096 }, { "epoch": 0.2617303386639834, "grad_norm": 0.41419389843940735, "learning_rate": 1.6805676129339456e-05, "loss": 0.3136, "step": 14098 }, { "epoch": 0.26176746880140206, "grad_norm": 0.48114123940467834, "learning_rate": 1.6804821412497188e-05, "loss": 0.36, "step": 14100 }, { "epoch": 0.2618045989388207, "grad_norm": 0.5495288968086243, "learning_rate": 1.680396660306168e-05, "loss": 0.3925, "step": 14102 }, { "epoch": 0.2618417290762393, "grad_norm": 0.3097253441810608, "learning_rate": 1.6803111701044564e-05, "loss": 0.2634, "step": 14104 }, { "epoch": 0.26187885921365794, "grad_norm": 0.3248990774154663, "learning_rate": 1.6802256706457482e-05, "loss": 0.2875, "step": 14106 }, { "epoch": 0.2619159893510766, "grad_norm": 0.35800638794898987, "learning_rate": 1.6801401619312057e-05, "loss": 0.4922, "step": 14108 }, { "epoch": 0.2619531194884952, "grad_norm": 0.2511877417564392, "learning_rate": 1.680054643961993e-05, "loss": 0.2732, "step": 14110 }, { "epoch": 0.2619902496259139, "grad_norm": 0.3973028063774109, "learning_rate": 1.6799691167392735e-05, "loss": 0.3757, "step": 14112 }, { "epoch": 0.2620273797633325, "grad_norm": 0.31965377926826477, "learning_rate": 1.679883580264211e-05, "loss": 0.1373, "step": 14114 }, { "epoch": 0.26206450990075114, "grad_norm": 0.37503311038017273, "learning_rate": 1.6797980345379696e-05, "loss": 0.1595, "step": 14116 }, { "epoch": 0.26210164003816977, "grad_norm": 0.3078148365020752, "learning_rate": 1.679712479561713e-05, "loss": 0.338, "step": 14118 }, { "epoch": 0.2621387701755884, "grad_norm": 0.4150207042694092, "learning_rate": 1.6796269153366058e-05, "loss": 0.2663, "step": 14120 }, { "epoch": 0.2621759003130071, "grad_norm": 0.3178233802318573, "learning_rate": 1.679541341863812e-05, "loss": 0.1775, "step": 14122 }, { "epoch": 0.2622130304504257, "grad_norm": 0.3457436263561249, "learning_rate": 1.679455759144496e-05, "loss": 0.2439, "step": 14124 }, { "epoch": 0.26225016058784434, "grad_norm": 0.34838226437568665, "learning_rate": 1.6793701671798223e-05, "loss": 0.3207, "step": 14126 }, { "epoch": 0.26228729072526297, "grad_norm": 0.24280937016010284, "learning_rate": 1.679284565970955e-05, "loss": 0.2971, "step": 14128 }, { "epoch": 0.2623244208626816, "grad_norm": 0.23812375962734222, "learning_rate": 1.67919895551906e-05, "loss": 0.2827, "step": 14130 }, { "epoch": 0.2623615510001002, "grad_norm": 0.35587090253829956, "learning_rate": 1.6791133358253015e-05, "loss": 0.4991, "step": 14132 }, { "epoch": 0.2623986811375189, "grad_norm": 0.7513763904571533, "learning_rate": 1.679027706890845e-05, "loss": 0.3813, "step": 14134 }, { "epoch": 0.26243581127493754, "grad_norm": 0.5282157063484192, "learning_rate": 1.678942068716855e-05, "loss": 0.4646, "step": 14136 }, { "epoch": 0.26247294141235616, "grad_norm": 0.37438374757766724, "learning_rate": 1.6788564213044973e-05, "loss": 0.2097, "step": 14138 }, { "epoch": 0.2625100715497748, "grad_norm": 0.4786970913410187, "learning_rate": 1.678770764654937e-05, "loss": 0.285, "step": 14140 }, { "epoch": 0.2625472016871934, "grad_norm": 0.42811256647109985, "learning_rate": 1.6786850987693397e-05, "loss": 0.2298, "step": 14142 }, { "epoch": 0.2625843318246121, "grad_norm": 0.3187107443809509, "learning_rate": 1.6785994236488714e-05, "loss": 0.4095, "step": 14144 }, { "epoch": 0.26262146196203073, "grad_norm": 0.3378993272781372, "learning_rate": 1.678513739294697e-05, "loss": 0.1566, "step": 14146 }, { "epoch": 0.26265859209944936, "grad_norm": 0.6031299829483032, "learning_rate": 1.6784280457079836e-05, "loss": 0.2282, "step": 14148 }, { "epoch": 0.262695722236868, "grad_norm": 0.34305110573768616, "learning_rate": 1.678342342889896e-05, "loss": 0.1947, "step": 14150 }, { "epoch": 0.2627328523742866, "grad_norm": 0.5034898519515991, "learning_rate": 1.6782566308416014e-05, "loss": 0.2314, "step": 14152 }, { "epoch": 0.2627699825117053, "grad_norm": 0.30161580443382263, "learning_rate": 1.6781709095642658e-05, "loss": 0.276, "step": 14154 }, { "epoch": 0.26280711264912393, "grad_norm": 0.32590973377227783, "learning_rate": 1.678085179059055e-05, "loss": 0.297, "step": 14156 }, { "epoch": 0.26284424278654256, "grad_norm": 0.4782107174396515, "learning_rate": 1.6779994393271365e-05, "loss": 0.2587, "step": 14158 }, { "epoch": 0.2628813729239612, "grad_norm": 0.2745949327945709, "learning_rate": 1.6779136903696763e-05, "loss": 0.4622, "step": 14160 }, { "epoch": 0.2629185030613798, "grad_norm": 0.3983341157436371, "learning_rate": 1.6778279321878416e-05, "loss": 0.3024, "step": 14162 }, { "epoch": 0.26295563319879844, "grad_norm": 0.5545642375946045, "learning_rate": 1.6777421647827987e-05, "loss": 0.2972, "step": 14164 }, { "epoch": 0.26299276333621713, "grad_norm": 0.5028650760650635, "learning_rate": 1.677656388155715e-05, "loss": 0.1692, "step": 14166 }, { "epoch": 0.26302989347363576, "grad_norm": 0.28886497020721436, "learning_rate": 1.6775706023077578e-05, "loss": 0.2239, "step": 14168 }, { "epoch": 0.2630670236110544, "grad_norm": 0.48967164754867554, "learning_rate": 1.6774848072400943e-05, "loss": 0.2816, "step": 14170 }, { "epoch": 0.263104153748473, "grad_norm": 0.34067201614379883, "learning_rate": 1.6773990029538917e-05, "loss": 0.438, "step": 14172 }, { "epoch": 0.26314128388589164, "grad_norm": 0.4151076674461365, "learning_rate": 1.677313189450318e-05, "loss": 0.2797, "step": 14174 }, { "epoch": 0.2631784140233103, "grad_norm": 0.37378132343292236, "learning_rate": 1.6772273667305405e-05, "loss": 0.247, "step": 14176 }, { "epoch": 0.26321554416072895, "grad_norm": 0.3564856946468353, "learning_rate": 1.677141534795727e-05, "loss": 0.5565, "step": 14178 }, { "epoch": 0.2632526742981476, "grad_norm": 0.4268989861011505, "learning_rate": 1.6770556936470456e-05, "loss": 0.2721, "step": 14180 }, { "epoch": 0.2632898044355662, "grad_norm": 0.3650158643722534, "learning_rate": 1.676969843285664e-05, "loss": 0.4985, "step": 14182 }, { "epoch": 0.26332693457298484, "grad_norm": 0.4041502773761749, "learning_rate": 1.6768839837127508e-05, "loss": 0.2943, "step": 14184 }, { "epoch": 0.26336406471040347, "grad_norm": 0.39799535274505615, "learning_rate": 1.6767981149294738e-05, "loss": 0.4508, "step": 14186 }, { "epoch": 0.26340119484782215, "grad_norm": 0.33557912707328796, "learning_rate": 1.676712236937002e-05, "loss": 0.245, "step": 14188 }, { "epoch": 0.2634383249852408, "grad_norm": 0.39507460594177246, "learning_rate": 1.6766263497365036e-05, "loss": 0.3271, "step": 14190 }, { "epoch": 0.2634754551226594, "grad_norm": 0.46698614954948425, "learning_rate": 1.676540453329147e-05, "loss": 0.37, "step": 14192 }, { "epoch": 0.26351258526007804, "grad_norm": 0.45323115587234497, "learning_rate": 1.6764545477161017e-05, "loss": 0.4808, "step": 14194 }, { "epoch": 0.26354971539749666, "grad_norm": 0.3934701383113861, "learning_rate": 1.676368632898536e-05, "loss": 0.2344, "step": 14196 }, { "epoch": 0.26358684553491535, "grad_norm": 0.21577338874340057, "learning_rate": 1.6762827088776194e-05, "loss": 0.3857, "step": 14198 }, { "epoch": 0.263623975672334, "grad_norm": 0.42878860235214233, "learning_rate": 1.6761967756545206e-05, "loss": 0.3362, "step": 14200 }, { "epoch": 0.2636611058097526, "grad_norm": 0.334805965423584, "learning_rate": 1.6761108332304093e-05, "loss": 0.1876, "step": 14202 }, { "epoch": 0.26369823594717123, "grad_norm": 0.6080650091171265, "learning_rate": 1.6760248816064546e-05, "loss": 0.2604, "step": 14204 }, { "epoch": 0.26373536608458986, "grad_norm": 0.3659916818141937, "learning_rate": 1.675938920783826e-05, "loss": 0.2707, "step": 14206 }, { "epoch": 0.2637724962220085, "grad_norm": 0.2813114821910858, "learning_rate": 1.6758529507636937e-05, "loss": 0.2437, "step": 14208 }, { "epoch": 0.2638096263594272, "grad_norm": 0.3511956036090851, "learning_rate": 1.675766971547227e-05, "loss": 0.2173, "step": 14210 }, { "epoch": 0.2638467564968458, "grad_norm": 0.35966426134109497, "learning_rate": 1.675680983135596e-05, "loss": 0.2128, "step": 14212 }, { "epoch": 0.26388388663426443, "grad_norm": 0.38586369156837463, "learning_rate": 1.6755949855299708e-05, "loss": 0.4097, "step": 14214 }, { "epoch": 0.26392101677168306, "grad_norm": 0.29686152935028076, "learning_rate": 1.6755089787315215e-05, "loss": 0.0991, "step": 14216 }, { "epoch": 0.2639581469091017, "grad_norm": 0.4054957926273346, "learning_rate": 1.6754229627414183e-05, "loss": 0.3325, "step": 14218 }, { "epoch": 0.26399527704652037, "grad_norm": 0.34105566143989563, "learning_rate": 1.6753369375608317e-05, "loss": 0.139, "step": 14220 }, { "epoch": 0.264032407183939, "grad_norm": 0.3767379820346832, "learning_rate": 1.6752509031909324e-05, "loss": 0.3132, "step": 14222 }, { "epoch": 0.26406953732135763, "grad_norm": 0.39031022787094116, "learning_rate": 1.6751648596328903e-05, "loss": 0.2391, "step": 14224 }, { "epoch": 0.26410666745877626, "grad_norm": 0.4413573741912842, "learning_rate": 1.6750788068878777e-05, "loss": 0.3748, "step": 14226 }, { "epoch": 0.2641437975961949, "grad_norm": 0.26224058866500854, "learning_rate": 1.6749927449570642e-05, "loss": 0.2944, "step": 14228 }, { "epoch": 0.26418092773361357, "grad_norm": 0.2730865776538849, "learning_rate": 1.6749066738416214e-05, "loss": 0.29, "step": 14230 }, { "epoch": 0.2642180578710322, "grad_norm": 0.25818562507629395, "learning_rate": 1.6748205935427202e-05, "loss": 0.3834, "step": 14232 }, { "epoch": 0.2642551880084508, "grad_norm": 0.2991827130317688, "learning_rate": 1.6747345040615322e-05, "loss": 0.1989, "step": 14234 }, { "epoch": 0.26429231814586945, "grad_norm": 0.4195464551448822, "learning_rate": 1.6746484053992285e-05, "loss": 0.4589, "step": 14236 }, { "epoch": 0.2643294482832881, "grad_norm": 0.4545131027698517, "learning_rate": 1.674562297556981e-05, "loss": 0.3973, "step": 14238 }, { "epoch": 0.2643665784207067, "grad_norm": 0.4013481140136719, "learning_rate": 1.674476180535961e-05, "loss": 0.2932, "step": 14240 }, { "epoch": 0.2644037085581254, "grad_norm": 0.3643893897533417, "learning_rate": 1.6743900543373405e-05, "loss": 0.5315, "step": 14242 }, { "epoch": 0.264440838695544, "grad_norm": 0.42725899815559387, "learning_rate": 1.6743039189622913e-05, "loss": 0.5253, "step": 14244 }, { "epoch": 0.26447796883296265, "grad_norm": 0.22592240571975708, "learning_rate": 1.6742177744119858e-05, "loss": 0.2865, "step": 14246 }, { "epoch": 0.2645150989703813, "grad_norm": 0.3828607201576233, "learning_rate": 1.6741316206875955e-05, "loss": 0.3774, "step": 14248 }, { "epoch": 0.2645522291077999, "grad_norm": 0.44129350781440735, "learning_rate": 1.6740454577902934e-05, "loss": 0.3331, "step": 14250 }, { "epoch": 0.2645893592452186, "grad_norm": 0.23848488926887512, "learning_rate": 1.6739592857212518e-05, "loss": 0.198, "step": 14252 }, { "epoch": 0.2646264893826372, "grad_norm": 0.34743815660476685, "learning_rate": 1.6738731044816426e-05, "loss": 0.1552, "step": 14254 }, { "epoch": 0.26466361952005585, "grad_norm": 0.46097663044929504, "learning_rate": 1.673786914072639e-05, "loss": 0.3398, "step": 14256 }, { "epoch": 0.2647007496574745, "grad_norm": 0.4356861412525177, "learning_rate": 1.673700714495414e-05, "loss": 0.2615, "step": 14258 }, { "epoch": 0.2647378797948931, "grad_norm": 0.24662169814109802, "learning_rate": 1.6736145057511403e-05, "loss": 0.1473, "step": 14260 }, { "epoch": 0.26477500993231173, "grad_norm": 0.45927730202674866, "learning_rate": 1.6735282878409903e-05, "loss": 0.2772, "step": 14262 }, { "epoch": 0.2648121400697304, "grad_norm": 0.40104055404663086, "learning_rate": 1.6734420607661387e-05, "loss": 0.3106, "step": 14264 }, { "epoch": 0.26484927020714905, "grad_norm": 0.37815555930137634, "learning_rate": 1.6733558245277574e-05, "loss": 0.3442, "step": 14266 }, { "epoch": 0.2648864003445677, "grad_norm": 0.3576014041900635, "learning_rate": 1.6732695791270196e-05, "loss": 0.48, "step": 14268 }, { "epoch": 0.2649235304819863, "grad_norm": 0.3668026924133301, "learning_rate": 1.6731833245651005e-05, "loss": 0.3261, "step": 14270 }, { "epoch": 0.26496066061940493, "grad_norm": 0.28072062134742737, "learning_rate": 1.6730970608431722e-05, "loss": 0.2143, "step": 14272 }, { "epoch": 0.2649977907568236, "grad_norm": 0.31429967284202576, "learning_rate": 1.6730107879624092e-05, "loss": 0.1703, "step": 14274 }, { "epoch": 0.26503492089424224, "grad_norm": 0.6020709872245789, "learning_rate": 1.672924505923985e-05, "loss": 0.1655, "step": 14276 }, { "epoch": 0.26507205103166087, "grad_norm": 0.24876609444618225, "learning_rate": 1.6728382147290747e-05, "loss": 0.3435, "step": 14278 }, { "epoch": 0.2651091811690795, "grad_norm": 0.40927767753601074, "learning_rate": 1.6727519143788512e-05, "loss": 0.498, "step": 14280 }, { "epoch": 0.26514631130649813, "grad_norm": 0.3224967122077942, "learning_rate": 1.6726656048744893e-05, "loss": 0.2814, "step": 14282 }, { "epoch": 0.26518344144391676, "grad_norm": 0.2693054676055908, "learning_rate": 1.672579286217163e-05, "loss": 0.1502, "step": 14284 }, { "epoch": 0.26522057158133544, "grad_norm": 0.39799273014068604, "learning_rate": 1.672492958408048e-05, "loss": 0.499, "step": 14286 }, { "epoch": 0.26525770171875407, "grad_norm": 0.34101539850234985, "learning_rate": 1.6724066214483174e-05, "loss": 0.2973, "step": 14288 }, { "epoch": 0.2652948318561727, "grad_norm": 0.35910147428512573, "learning_rate": 1.672320275339147e-05, "loss": 0.3929, "step": 14290 }, { "epoch": 0.2653319619935913, "grad_norm": 0.27279797196388245, "learning_rate": 1.6722339200817116e-05, "loss": 0.3782, "step": 14292 }, { "epoch": 0.26536909213100995, "grad_norm": 0.34886181354522705, "learning_rate": 1.6721475556771864e-05, "loss": 0.2055, "step": 14294 }, { "epoch": 0.26540622226842864, "grad_norm": 0.451325386762619, "learning_rate": 1.672061182126746e-05, "loss": 0.3413, "step": 14296 }, { "epoch": 0.26544335240584727, "grad_norm": 0.2522677481174469, "learning_rate": 1.671974799431566e-05, "loss": 0.4009, "step": 14298 }, { "epoch": 0.2654804825432659, "grad_norm": 0.3332119286060333, "learning_rate": 1.6718884075928216e-05, "loss": 0.371, "step": 14300 }, { "epoch": 0.2655176126806845, "grad_norm": 0.4892105758190155, "learning_rate": 1.6718020066116888e-05, "loss": 0.4023, "step": 14302 }, { "epoch": 0.26555474281810315, "grad_norm": 0.30999088287353516, "learning_rate": 1.6717155964893428e-05, "loss": 0.3294, "step": 14304 }, { "epoch": 0.26559187295552183, "grad_norm": 0.5551329255104065, "learning_rate": 1.6716291772269598e-05, "loss": 0.3277, "step": 14306 }, { "epoch": 0.26562900309294046, "grad_norm": 0.3143518269062042, "learning_rate": 1.6715427488257153e-05, "loss": 0.3248, "step": 14308 }, { "epoch": 0.2656661332303591, "grad_norm": 0.3433229327201843, "learning_rate": 1.6714563112867855e-05, "loss": 0.2351, "step": 14310 }, { "epoch": 0.2657032633677777, "grad_norm": 0.3209666907787323, "learning_rate": 1.6713698646113463e-05, "loss": 0.3536, "step": 14312 }, { "epoch": 0.26574039350519635, "grad_norm": 0.3675312101840973, "learning_rate": 1.6712834088005744e-05, "loss": 0.3228, "step": 14314 }, { "epoch": 0.265777523642615, "grad_norm": 0.6456232070922852, "learning_rate": 1.671196943855646e-05, "loss": 0.1725, "step": 14316 }, { "epoch": 0.26581465378003366, "grad_norm": 0.5283076167106628, "learning_rate": 1.6711104697777378e-05, "loss": 0.3528, "step": 14318 }, { "epoch": 0.2658517839174523, "grad_norm": 0.5134596228599548, "learning_rate": 1.671023986568026e-05, "loss": 0.3215, "step": 14320 }, { "epoch": 0.2658889140548709, "grad_norm": 0.4296994209289551, "learning_rate": 1.670937494227688e-05, "loss": 0.4656, "step": 14322 }, { "epoch": 0.26592604419228955, "grad_norm": 0.45185139775276184, "learning_rate": 1.6708509927579003e-05, "loss": 0.1717, "step": 14324 }, { "epoch": 0.2659631743297082, "grad_norm": 0.4098852872848511, "learning_rate": 1.67076448215984e-05, "loss": 0.4624, "step": 14326 }, { "epoch": 0.26600030446712686, "grad_norm": 0.3968254327774048, "learning_rate": 1.6706779624346844e-05, "loss": 0.1497, "step": 14328 }, { "epoch": 0.2660374346045455, "grad_norm": 0.5006260871887207, "learning_rate": 1.6705914335836106e-05, "loss": 0.3648, "step": 14330 }, { "epoch": 0.2660745647419641, "grad_norm": 0.33969223499298096, "learning_rate": 1.670504895607796e-05, "loss": 0.2362, "step": 14332 }, { "epoch": 0.26611169487938274, "grad_norm": 0.32858091592788696, "learning_rate": 1.6704183485084183e-05, "loss": 0.4018, "step": 14334 }, { "epoch": 0.26614882501680137, "grad_norm": 0.6745291352272034, "learning_rate": 1.6703317922866546e-05, "loss": 0.4107, "step": 14336 }, { "epoch": 0.26618595515422, "grad_norm": 0.5537766218185425, "learning_rate": 1.6702452269436834e-05, "loss": 0.2483, "step": 14338 }, { "epoch": 0.2662230852916387, "grad_norm": 0.3518688976764679, "learning_rate": 1.6701586524806826e-05, "loss": 0.2292, "step": 14340 }, { "epoch": 0.2662602154290573, "grad_norm": 0.41889575123786926, "learning_rate": 1.670072068898829e-05, "loss": 0.5489, "step": 14342 }, { "epoch": 0.26629734556647594, "grad_norm": 0.3126862943172455, "learning_rate": 1.6699854761993025e-05, "loss": 0.2945, "step": 14344 }, { "epoch": 0.26633447570389457, "grad_norm": 0.3870278596878052, "learning_rate": 1.6698988743832802e-05, "loss": 0.2315, "step": 14346 }, { "epoch": 0.2663716058413132, "grad_norm": 0.3100411295890808, "learning_rate": 1.6698122634519407e-05, "loss": 0.4566, "step": 14348 }, { "epoch": 0.2664087359787319, "grad_norm": 0.3697691261768341, "learning_rate": 1.669725643406463e-05, "loss": 0.2797, "step": 14350 }, { "epoch": 0.2664458661161505, "grad_norm": 0.2827511429786682, "learning_rate": 1.6696390142480246e-05, "loss": 0.2803, "step": 14352 }, { "epoch": 0.26648299625356914, "grad_norm": 0.3630894422531128, "learning_rate": 1.669552375977806e-05, "loss": 0.2501, "step": 14354 }, { "epoch": 0.26652012639098777, "grad_norm": 0.30100512504577637, "learning_rate": 1.6694657285969843e-05, "loss": 0.1759, "step": 14356 }, { "epoch": 0.2665572565284064, "grad_norm": 0.44136178493499756, "learning_rate": 1.6693790721067397e-05, "loss": 0.1432, "step": 14358 }, { "epoch": 0.266594386665825, "grad_norm": 0.48275068402290344, "learning_rate": 1.6692924065082508e-05, "loss": 0.3125, "step": 14360 }, { "epoch": 0.2666315168032437, "grad_norm": 0.32505711913108826, "learning_rate": 1.669205731802697e-05, "loss": 0.1992, "step": 14362 }, { "epoch": 0.26666864694066233, "grad_norm": 0.28460440039634705, "learning_rate": 1.6691190479912574e-05, "loss": 0.3663, "step": 14364 }, { "epoch": 0.26670577707808096, "grad_norm": 0.4201560318470001, "learning_rate": 1.6690323550751127e-05, "loss": 0.4463, "step": 14366 }, { "epoch": 0.2667429072154996, "grad_norm": 0.3393763601779938, "learning_rate": 1.6689456530554407e-05, "loss": 0.3195, "step": 14368 }, { "epoch": 0.2667800373529182, "grad_norm": 0.3699619472026825, "learning_rate": 1.6688589419334226e-05, "loss": 0.2872, "step": 14370 }, { "epoch": 0.2668171674903369, "grad_norm": 0.283236563205719, "learning_rate": 1.6687722217102374e-05, "loss": 0.3006, "step": 14372 }, { "epoch": 0.26685429762775553, "grad_norm": 0.4910120964050293, "learning_rate": 1.668685492387066e-05, "loss": 0.4428, "step": 14374 }, { "epoch": 0.26689142776517416, "grad_norm": 0.3207995295524597, "learning_rate": 1.6685987539650878e-05, "loss": 0.3421, "step": 14376 }, { "epoch": 0.2669285579025928, "grad_norm": 0.41593748331069946, "learning_rate": 1.668512006445483e-05, "loss": 0.6078, "step": 14378 }, { "epoch": 0.2669656880400114, "grad_norm": 0.2827526330947876, "learning_rate": 1.6684252498294322e-05, "loss": 0.2529, "step": 14380 }, { "epoch": 0.2670028181774301, "grad_norm": 0.35570284724235535, "learning_rate": 1.6683384841181162e-05, "loss": 0.1784, "step": 14382 }, { "epoch": 0.26703994831484873, "grad_norm": 0.3963766396045685, "learning_rate": 1.668251709312715e-05, "loss": 0.2537, "step": 14384 }, { "epoch": 0.26707707845226736, "grad_norm": 0.3778403401374817, "learning_rate": 1.66816492541441e-05, "loss": 0.1463, "step": 14386 }, { "epoch": 0.267114208589686, "grad_norm": 0.28598928451538086, "learning_rate": 1.668078132424382e-05, "loss": 0.2729, "step": 14388 }, { "epoch": 0.2671513387271046, "grad_norm": 0.35990801453590393, "learning_rate": 1.667991330343811e-05, "loss": 0.2098, "step": 14390 }, { "epoch": 0.26718846886452324, "grad_norm": 0.31900423765182495, "learning_rate": 1.6679045191738793e-05, "loss": 0.4023, "step": 14392 }, { "epoch": 0.2672255990019419, "grad_norm": 0.35485732555389404, "learning_rate": 1.6678176989157676e-05, "loss": 0.304, "step": 14394 }, { "epoch": 0.26726272913936056, "grad_norm": 0.37659889459609985, "learning_rate": 1.6677308695706577e-05, "loss": 0.4337, "step": 14396 }, { "epoch": 0.2672998592767792, "grad_norm": 0.41286689043045044, "learning_rate": 1.6676440311397302e-05, "loss": 0.4646, "step": 14398 }, { "epoch": 0.2673369894141978, "grad_norm": 0.28072118759155273, "learning_rate": 1.6675571836241674e-05, "loss": 0.2176, "step": 14400 }, { "epoch": 0.26737411955161644, "grad_norm": 0.31832537055015564, "learning_rate": 1.667470327025151e-05, "loss": 0.3886, "step": 14402 }, { "epoch": 0.2674112496890351, "grad_norm": 0.29668357968330383, "learning_rate": 1.6673834613438627e-05, "loss": 0.5288, "step": 14404 }, { "epoch": 0.26744837982645375, "grad_norm": 0.19775919616222382, "learning_rate": 1.6672965865814848e-05, "loss": 0.325, "step": 14406 }, { "epoch": 0.2674855099638724, "grad_norm": 0.41689276695251465, "learning_rate": 1.6672097027391986e-05, "loss": 0.2146, "step": 14408 }, { "epoch": 0.267522640101291, "grad_norm": 0.44212502241134644, "learning_rate": 1.6671228098181876e-05, "loss": 0.4554, "step": 14410 }, { "epoch": 0.26755977023870964, "grad_norm": 0.3626474440097809, "learning_rate": 1.6670359078196327e-05, "loss": 0.3896, "step": 14412 }, { "epoch": 0.26759690037612827, "grad_norm": 0.6020227670669556, "learning_rate": 1.6669489967447178e-05, "loss": 0.3662, "step": 14414 }, { "epoch": 0.26763403051354695, "grad_norm": 0.3484719693660736, "learning_rate": 1.6668620765946242e-05, "loss": 0.2158, "step": 14416 }, { "epoch": 0.2676711606509656, "grad_norm": 0.24232150614261627, "learning_rate": 1.6667751473705357e-05, "loss": 0.255, "step": 14418 }, { "epoch": 0.2677082907883842, "grad_norm": 0.6532554030418396, "learning_rate": 1.6666882090736343e-05, "loss": 0.3411, "step": 14420 }, { "epoch": 0.26774542092580284, "grad_norm": 0.494770884513855, "learning_rate": 1.6666012617051036e-05, "loss": 0.3573, "step": 14422 }, { "epoch": 0.26778255106322146, "grad_norm": 0.474211722612381, "learning_rate": 1.666514305266126e-05, "loss": 0.3398, "step": 14424 }, { "epoch": 0.26781968120064015, "grad_norm": 0.3734486699104309, "learning_rate": 1.6664273397578853e-05, "loss": 0.3873, "step": 14426 }, { "epoch": 0.2678568113380588, "grad_norm": 0.3601103127002716, "learning_rate": 1.666340365181565e-05, "loss": 0.3415, "step": 14428 }, { "epoch": 0.2678939414754774, "grad_norm": 0.4700402319431305, "learning_rate": 1.6662533815383482e-05, "loss": 0.2047, "step": 14430 }, { "epoch": 0.26793107161289603, "grad_norm": 0.3722496032714844, "learning_rate": 1.666166388829418e-05, "loss": 0.3805, "step": 14432 }, { "epoch": 0.26796820175031466, "grad_norm": 0.16663134098052979, "learning_rate": 1.666079387055959e-05, "loss": 0.2927, "step": 14434 }, { "epoch": 0.2680053318877333, "grad_norm": 0.36628684401512146, "learning_rate": 1.665992376219155e-05, "loss": 0.1648, "step": 14436 }, { "epoch": 0.268042462025152, "grad_norm": 0.26776444911956787, "learning_rate": 1.6659053563201896e-05, "loss": 0.2641, "step": 14438 }, { "epoch": 0.2680795921625706, "grad_norm": 0.31087130308151245, "learning_rate": 1.6658183273602463e-05, "loss": 0.398, "step": 14440 }, { "epoch": 0.26811672229998923, "grad_norm": 0.3097818195819855, "learning_rate": 1.6657312893405104e-05, "loss": 0.3958, "step": 14442 }, { "epoch": 0.26815385243740786, "grad_norm": 0.37057098746299744, "learning_rate": 1.6656442422621658e-05, "loss": 0.343, "step": 14444 }, { "epoch": 0.2681909825748265, "grad_norm": 0.3174958825111389, "learning_rate": 1.665557186126397e-05, "loss": 0.3892, "step": 14446 }, { "epoch": 0.26822811271224517, "grad_norm": 0.3206169903278351, "learning_rate": 1.665470120934388e-05, "loss": 0.2514, "step": 14448 }, { "epoch": 0.2682652428496638, "grad_norm": 0.3004591166973114, "learning_rate": 1.6653830466873243e-05, "loss": 0.3424, "step": 14450 }, { "epoch": 0.2683023729870824, "grad_norm": 0.36004695296287537, "learning_rate": 1.66529596338639e-05, "loss": 0.3814, "step": 14452 }, { "epoch": 0.26833950312450106, "grad_norm": 0.3942979872226715, "learning_rate": 1.6652088710327708e-05, "loss": 0.3992, "step": 14454 }, { "epoch": 0.2683766332619197, "grad_norm": 0.29094555974006653, "learning_rate": 1.6651217696276513e-05, "loss": 0.3038, "step": 14456 }, { "epoch": 0.26841376339933837, "grad_norm": 0.34084823727607727, "learning_rate": 1.6650346591722168e-05, "loss": 0.393, "step": 14458 }, { "epoch": 0.268450893536757, "grad_norm": 0.3628236651420593, "learning_rate": 1.6649475396676526e-05, "loss": 0.2301, "step": 14460 }, { "epoch": 0.2684880236741756, "grad_norm": 0.4235619008541107, "learning_rate": 1.6648604111151444e-05, "loss": 0.5134, "step": 14462 }, { "epoch": 0.26852515381159425, "grad_norm": 0.4813389778137207, "learning_rate": 1.664773273515877e-05, "loss": 0.2371, "step": 14464 }, { "epoch": 0.2685622839490129, "grad_norm": 0.3011683523654938, "learning_rate": 1.664686126871037e-05, "loss": 0.2172, "step": 14466 }, { "epoch": 0.2685994140864315, "grad_norm": 0.5195376873016357, "learning_rate": 1.6645989711818092e-05, "loss": 0.3891, "step": 14468 }, { "epoch": 0.2686365442238502, "grad_norm": 0.3900006115436554, "learning_rate": 1.6645118064493807e-05, "loss": 0.4188, "step": 14470 }, { "epoch": 0.2686736743612688, "grad_norm": 0.39062610268592834, "learning_rate": 1.6644246326749368e-05, "loss": 0.1722, "step": 14472 }, { "epoch": 0.26871080449868745, "grad_norm": 0.40507540106773376, "learning_rate": 1.6643374498596637e-05, "loss": 0.3746, "step": 14474 }, { "epoch": 0.2687479346361061, "grad_norm": 0.359060674905777, "learning_rate": 1.664250258004748e-05, "loss": 0.2503, "step": 14476 }, { "epoch": 0.2687850647735247, "grad_norm": 0.2542099952697754, "learning_rate": 1.664163057111376e-05, "loss": 0.3822, "step": 14478 }, { "epoch": 0.2688221949109434, "grad_norm": 0.38323724269866943, "learning_rate": 1.6640758471807337e-05, "loss": 0.4657, "step": 14480 }, { "epoch": 0.268859325048362, "grad_norm": 0.2707110643386841, "learning_rate": 1.6639886282140086e-05, "loss": 0.4027, "step": 14482 }, { "epoch": 0.26889645518578065, "grad_norm": 0.31322354078292847, "learning_rate": 1.663901400212387e-05, "loss": 0.2866, "step": 14484 }, { "epoch": 0.2689335853231993, "grad_norm": 0.4387029707431793, "learning_rate": 1.6638141631770564e-05, "loss": 0.2521, "step": 14486 }, { "epoch": 0.2689707154606179, "grad_norm": 0.33834144473075867, "learning_rate": 1.663726917109203e-05, "loss": 0.3537, "step": 14488 }, { "epoch": 0.26900784559803653, "grad_norm": 0.432029664516449, "learning_rate": 1.6636396620100144e-05, "loss": 0.2071, "step": 14490 }, { "epoch": 0.2690449757354552, "grad_norm": 0.33301714062690735, "learning_rate": 1.663552397880678e-05, "loss": 0.1605, "step": 14492 }, { "epoch": 0.26908210587287384, "grad_norm": 0.3962661325931549, "learning_rate": 1.663465124722381e-05, "loss": 0.3266, "step": 14494 }, { "epoch": 0.2691192360102925, "grad_norm": 0.41214829683303833, "learning_rate": 1.6633778425363107e-05, "loss": 0.3467, "step": 14496 }, { "epoch": 0.2691563661477111, "grad_norm": 1.6523618698120117, "learning_rate": 1.6632905513236553e-05, "loss": 0.2335, "step": 14498 }, { "epoch": 0.26919349628512973, "grad_norm": 0.41188904643058777, "learning_rate": 1.663203251085602e-05, "loss": 0.4452, "step": 14500 }, { "epoch": 0.2692306264225484, "grad_norm": 0.39941343665122986, "learning_rate": 1.6631159418233392e-05, "loss": 0.2934, "step": 14502 }, { "epoch": 0.26926775655996704, "grad_norm": 0.3252164423465729, "learning_rate": 1.6630286235380546e-05, "loss": 0.4299, "step": 14504 }, { "epoch": 0.26930488669738567, "grad_norm": 0.38180050253868103, "learning_rate": 1.6629412962309364e-05, "loss": 0.322, "step": 14506 }, { "epoch": 0.2693420168348043, "grad_norm": 0.4627339243888855, "learning_rate": 1.662853959903173e-05, "loss": 0.3768, "step": 14508 }, { "epoch": 0.2693791469722229, "grad_norm": 0.2700619101524353, "learning_rate": 1.6627666145559526e-05, "loss": 0.3165, "step": 14510 }, { "epoch": 0.26941627710964156, "grad_norm": 0.3088364005088806, "learning_rate": 1.6626792601904637e-05, "loss": 0.2917, "step": 14512 }, { "epoch": 0.26945340724706024, "grad_norm": 0.45101645588874817, "learning_rate": 1.662591896807895e-05, "loss": 0.4708, "step": 14514 }, { "epoch": 0.26949053738447887, "grad_norm": 0.28268539905548096, "learning_rate": 1.6625045244094357e-05, "loss": 0.2777, "step": 14516 }, { "epoch": 0.2695276675218975, "grad_norm": 0.2866145074367523, "learning_rate": 1.662417142996274e-05, "loss": 0.1991, "step": 14518 }, { "epoch": 0.2695647976593161, "grad_norm": 0.28510549664497375, "learning_rate": 1.662329752569599e-05, "loss": 0.3238, "step": 14520 }, { "epoch": 0.26960192779673475, "grad_norm": 0.35713887214660645, "learning_rate": 1.6622423531306005e-05, "loss": 0.3528, "step": 14522 }, { "epoch": 0.26963905793415344, "grad_norm": 0.43582355976104736, "learning_rate": 1.6621549446804666e-05, "loss": 0.3047, "step": 14524 }, { "epoch": 0.26967618807157206, "grad_norm": 0.36458003520965576, "learning_rate": 1.6620675272203873e-05, "loss": 0.3094, "step": 14526 }, { "epoch": 0.2697133182089907, "grad_norm": 0.6691232323646545, "learning_rate": 1.6619801007515523e-05, "loss": 0.4771, "step": 14528 }, { "epoch": 0.2697504483464093, "grad_norm": 0.36070874333381653, "learning_rate": 1.6618926652751513e-05, "loss": 0.3995, "step": 14530 }, { "epoch": 0.26978757848382795, "grad_norm": 0.41977420449256897, "learning_rate": 1.661805220792373e-05, "loss": 0.3016, "step": 14532 }, { "epoch": 0.26982470862124663, "grad_norm": 0.42717525362968445, "learning_rate": 1.6617177673044087e-05, "loss": 0.1057, "step": 14534 }, { "epoch": 0.26986183875866526, "grad_norm": 0.3802667260169983, "learning_rate": 1.6616303048124475e-05, "loss": 0.2439, "step": 14536 }, { "epoch": 0.2698989688960839, "grad_norm": 0.2649378180503845, "learning_rate": 1.661542833317679e-05, "loss": 0.3944, "step": 14538 }, { "epoch": 0.2699360990335025, "grad_norm": 0.3610313832759857, "learning_rate": 1.661455352821295e-05, "loss": 0.2261, "step": 14540 }, { "epoch": 0.26997322917092115, "grad_norm": 0.4174010753631592, "learning_rate": 1.6613678633244846e-05, "loss": 0.291, "step": 14542 }, { "epoch": 0.2700103593083398, "grad_norm": 0.3797670900821686, "learning_rate": 1.6612803648284384e-05, "loss": 0.2449, "step": 14544 }, { "epoch": 0.27004748944575846, "grad_norm": 0.28193598985671997, "learning_rate": 1.6611928573343476e-05, "loss": 0.3682, "step": 14546 }, { "epoch": 0.2700846195831771, "grad_norm": 0.3032357692718506, "learning_rate": 1.661105340843402e-05, "loss": 0.3452, "step": 14548 }, { "epoch": 0.2701217497205957, "grad_norm": 0.38184359669685364, "learning_rate": 1.6610178153567933e-05, "loss": 0.4803, "step": 14550 }, { "epoch": 0.27015887985801434, "grad_norm": 0.44204840064048767, "learning_rate": 1.660930280875712e-05, "loss": 0.3449, "step": 14552 }, { "epoch": 0.270196009995433, "grad_norm": 0.3766881227493286, "learning_rate": 1.6608427374013495e-05, "loss": 0.321, "step": 14554 }, { "epoch": 0.27023314013285166, "grad_norm": 0.3881608843803406, "learning_rate": 1.6607551849348965e-05, "loss": 0.3521, "step": 14556 }, { "epoch": 0.2702702702702703, "grad_norm": 0.27986642718315125, "learning_rate": 1.6606676234775445e-05, "loss": 0.3576, "step": 14558 }, { "epoch": 0.2703074004076889, "grad_norm": 0.3293085992336273, "learning_rate": 1.6605800530304854e-05, "loss": 0.3608, "step": 14560 }, { "epoch": 0.27034453054510754, "grad_norm": 0.36550790071487427, "learning_rate": 1.6604924735949106e-05, "loss": 0.2943, "step": 14562 }, { "epoch": 0.27038166068252617, "grad_norm": 0.39974626898765564, "learning_rate": 1.6604048851720113e-05, "loss": 0.4125, "step": 14564 }, { "epoch": 0.2704187908199448, "grad_norm": 0.6848732233047485, "learning_rate": 1.6603172877629797e-05, "loss": 0.3885, "step": 14566 }, { "epoch": 0.2704559209573635, "grad_norm": 0.26436659693717957, "learning_rate": 1.660229681369008e-05, "loss": 0.2739, "step": 14568 }, { "epoch": 0.2704930510947821, "grad_norm": 0.6374115347862244, "learning_rate": 1.660142065991287e-05, "loss": 0.3012, "step": 14570 }, { "epoch": 0.27053018123220074, "grad_norm": 0.32974353432655334, "learning_rate": 1.660054441631011e-05, "loss": 0.3084, "step": 14572 }, { "epoch": 0.27056731136961937, "grad_norm": 1.8442188501358032, "learning_rate": 1.6599668082893706e-05, "loss": 0.3309, "step": 14574 }, { "epoch": 0.270604441507038, "grad_norm": 0.41182005405426025, "learning_rate": 1.6598791659675588e-05, "loss": 0.3407, "step": 14576 }, { "epoch": 0.2706415716444567, "grad_norm": 0.31686636805534363, "learning_rate": 1.6597915146667682e-05, "loss": 0.3996, "step": 14578 }, { "epoch": 0.2706787017818753, "grad_norm": 0.38003748655319214, "learning_rate": 1.6597038543881913e-05, "loss": 0.2377, "step": 14580 }, { "epoch": 0.27071583191929394, "grad_norm": 0.45389607548713684, "learning_rate": 1.6596161851330212e-05, "loss": 0.4114, "step": 14582 }, { "epoch": 0.27075296205671256, "grad_norm": 0.32193097472190857, "learning_rate": 1.6595285069024504e-05, "loss": 0.2405, "step": 14584 }, { "epoch": 0.2707900921941312, "grad_norm": 0.18964552879333496, "learning_rate": 1.6594408196976723e-05, "loss": 0.3091, "step": 14586 }, { "epoch": 0.2708272223315498, "grad_norm": 0.40375423431396484, "learning_rate": 1.6593531235198797e-05, "loss": 0.34, "step": 14588 }, { "epoch": 0.2708643524689685, "grad_norm": 0.5508294105529785, "learning_rate": 1.659265418370266e-05, "loss": 0.2781, "step": 14590 }, { "epoch": 0.27090148260638713, "grad_norm": 0.39986830949783325, "learning_rate": 1.6591777042500247e-05, "loss": 0.4121, "step": 14592 }, { "epoch": 0.27093861274380576, "grad_norm": 0.3179416060447693, "learning_rate": 1.6590899811603495e-05, "loss": 0.2055, "step": 14594 }, { "epoch": 0.2709757428812244, "grad_norm": 0.3477843105792999, "learning_rate": 1.6590022491024338e-05, "loss": 0.314, "step": 14596 }, { "epoch": 0.271012873018643, "grad_norm": 0.2407577633857727, "learning_rate": 1.6589145080774715e-05, "loss": 0.0664, "step": 14598 }, { "epoch": 0.2710500031560617, "grad_norm": 0.2912905514240265, "learning_rate": 1.6588267580866564e-05, "loss": 0.3466, "step": 14600 }, { "epoch": 0.27108713329348033, "grad_norm": 0.3874660134315491, "learning_rate": 1.6587389991311823e-05, "loss": 0.2334, "step": 14602 }, { "epoch": 0.27112426343089896, "grad_norm": 0.28356364369392395, "learning_rate": 1.658651231212244e-05, "loss": 0.2841, "step": 14604 }, { "epoch": 0.2711613935683176, "grad_norm": 0.3955436050891876, "learning_rate": 1.658563454331035e-05, "loss": 0.3338, "step": 14606 }, { "epoch": 0.2711985237057362, "grad_norm": 0.5678128600120544, "learning_rate": 1.65847566848875e-05, "loss": 0.2635, "step": 14608 }, { "epoch": 0.2712356538431549, "grad_norm": 0.36759284138679504, "learning_rate": 1.658387873686584e-05, "loss": 0.4514, "step": 14610 }, { "epoch": 0.27127278398057353, "grad_norm": 0.19377611577510834, "learning_rate": 1.6583000699257306e-05, "loss": 0.3425, "step": 14612 }, { "epoch": 0.27130991411799216, "grad_norm": 0.23808307945728302, "learning_rate": 1.658212257207385e-05, "loss": 0.2833, "step": 14614 }, { "epoch": 0.2713470442554108, "grad_norm": 0.4442470669746399, "learning_rate": 1.6581244355327425e-05, "loss": 0.2863, "step": 14616 }, { "epoch": 0.2713841743928294, "grad_norm": 0.7035424709320068, "learning_rate": 1.6580366049029975e-05, "loss": 0.4233, "step": 14618 }, { "epoch": 0.27142130453024804, "grad_norm": 0.794154703617096, "learning_rate": 1.6579487653193456e-05, "loss": 0.3395, "step": 14620 }, { "epoch": 0.2714584346676667, "grad_norm": 0.23039385676383972, "learning_rate": 1.657860916782982e-05, "loss": 0.1868, "step": 14622 }, { "epoch": 0.27149556480508535, "grad_norm": 0.4515477418899536, "learning_rate": 1.6577730592951013e-05, "loss": 0.189, "step": 14624 }, { "epoch": 0.271532694942504, "grad_norm": 0.37406814098358154, "learning_rate": 1.6576851928569e-05, "loss": 0.1874, "step": 14626 }, { "epoch": 0.2715698250799226, "grad_norm": 0.3430345058441162, "learning_rate": 1.6575973174695725e-05, "loss": 0.4586, "step": 14628 }, { "epoch": 0.27160695521734124, "grad_norm": 0.4775097966194153, "learning_rate": 1.657509433134316e-05, "loss": 0.4951, "step": 14630 }, { "epoch": 0.2716440853547599, "grad_norm": 0.31621044874191284, "learning_rate": 1.657421539852325e-05, "loss": 0.3502, "step": 14632 }, { "epoch": 0.27168121549217855, "grad_norm": 0.4372848570346832, "learning_rate": 1.6573336376247967e-05, "loss": 0.2372, "step": 14634 }, { "epoch": 0.2717183456295972, "grad_norm": 0.5204339623451233, "learning_rate": 1.657245726452926e-05, "loss": 0.3773, "step": 14636 }, { "epoch": 0.2717554757670158, "grad_norm": 0.5138462781906128, "learning_rate": 1.65715780633791e-05, "loss": 0.3449, "step": 14638 }, { "epoch": 0.27179260590443444, "grad_norm": 0.38314974308013916, "learning_rate": 1.6570698772809444e-05, "loss": 0.2474, "step": 14640 }, { "epoch": 0.27182973604185307, "grad_norm": 0.47644051909446716, "learning_rate": 1.6569819392832262e-05, "loss": 0.1593, "step": 14642 }, { "epoch": 0.27186686617927175, "grad_norm": 0.4106592535972595, "learning_rate": 1.6568939923459514e-05, "loss": 0.3589, "step": 14644 }, { "epoch": 0.2719039963166904, "grad_norm": 0.416670560836792, "learning_rate": 1.656806036470317e-05, "loss": 0.1903, "step": 14646 }, { "epoch": 0.271941126454109, "grad_norm": 0.5414636135101318, "learning_rate": 1.6567180716575198e-05, "loss": 0.3285, "step": 14648 }, { "epoch": 0.27197825659152763, "grad_norm": 0.29910486936569214, "learning_rate": 1.656630097908757e-05, "loss": 0.5219, "step": 14650 }, { "epoch": 0.27201538672894626, "grad_norm": 0.36363092064857483, "learning_rate": 1.656542115225225e-05, "loss": 0.3364, "step": 14652 }, { "epoch": 0.27205251686636495, "grad_norm": 0.6942903399467468, "learning_rate": 1.6564541236081217e-05, "loss": 0.2863, "step": 14654 }, { "epoch": 0.2720896470037836, "grad_norm": 0.5779421329498291, "learning_rate": 1.656366123058644e-05, "loss": 0.3508, "step": 14656 }, { "epoch": 0.2721267771412022, "grad_norm": 0.45575886964797974, "learning_rate": 1.6562781135779898e-05, "loss": 0.2758, "step": 14658 }, { "epoch": 0.27216390727862083, "grad_norm": 0.3178647756576538, "learning_rate": 1.6561900951673556e-05, "loss": 0.1936, "step": 14660 }, { "epoch": 0.27220103741603946, "grad_norm": 0.3453634977340698, "learning_rate": 1.65610206782794e-05, "loss": 0.306, "step": 14662 }, { "epoch": 0.2722381675534581, "grad_norm": 0.3277454078197479, "learning_rate": 1.6560140315609406e-05, "loss": 0.3499, "step": 14664 }, { "epoch": 0.27227529769087677, "grad_norm": 0.3271265923976898, "learning_rate": 1.6559259863675553e-05, "loss": 0.1561, "step": 14666 }, { "epoch": 0.2723124278282954, "grad_norm": 0.3544183671474457, "learning_rate": 1.6558379322489817e-05, "loss": 0.1541, "step": 14668 }, { "epoch": 0.27234955796571403, "grad_norm": 0.260212779045105, "learning_rate": 1.6557498692064187e-05, "loss": 0.501, "step": 14670 }, { "epoch": 0.27238668810313266, "grad_norm": 0.3945387005805969, "learning_rate": 1.655661797241064e-05, "loss": 0.4064, "step": 14672 }, { "epoch": 0.2724238182405513, "grad_norm": 0.3963828980922699, "learning_rate": 1.655573716354116e-05, "loss": 0.2909, "step": 14674 }, { "epoch": 0.27246094837796997, "grad_norm": 0.3975825309753418, "learning_rate": 1.6554856265467735e-05, "loss": 0.4676, "step": 14676 }, { "epoch": 0.2724980785153886, "grad_norm": 0.37554121017456055, "learning_rate": 1.655397527820235e-05, "loss": 0.2851, "step": 14678 }, { "epoch": 0.2725352086528072, "grad_norm": 0.42874693870544434, "learning_rate": 1.6553094201756996e-05, "loss": 0.4541, "step": 14680 }, { "epoch": 0.27257233879022585, "grad_norm": 0.3809939920902252, "learning_rate": 1.6552213036143654e-05, "loss": 0.3199, "step": 14682 }, { "epoch": 0.2726094689276445, "grad_norm": 0.3553813695907593, "learning_rate": 1.655133178137432e-05, "loss": 0.4257, "step": 14684 }, { "epoch": 0.27264659906506317, "grad_norm": 0.2711872160434723, "learning_rate": 1.6550450437460987e-05, "loss": 0.3231, "step": 14686 }, { "epoch": 0.2726837292024818, "grad_norm": 0.27073177695274353, "learning_rate": 1.654956900441564e-05, "loss": 0.2255, "step": 14688 }, { "epoch": 0.2727208593399004, "grad_norm": 0.35050705075263977, "learning_rate": 1.654868748225028e-05, "loss": 0.4098, "step": 14690 }, { "epoch": 0.27275798947731905, "grad_norm": 0.33652275800704956, "learning_rate": 1.65478058709769e-05, "loss": 0.4278, "step": 14692 }, { "epoch": 0.2727951196147377, "grad_norm": 0.4140445291996002, "learning_rate": 1.6546924170607494e-05, "loss": 0.3922, "step": 14694 }, { "epoch": 0.2728322497521563, "grad_norm": 0.3835141360759735, "learning_rate": 1.6546042381154057e-05, "loss": 0.3385, "step": 14696 }, { "epoch": 0.272869379889575, "grad_norm": 0.46845513582229614, "learning_rate": 1.6545160502628595e-05, "loss": 0.3181, "step": 14698 }, { "epoch": 0.2729065100269936, "grad_norm": 0.38605761528015137, "learning_rate": 1.6544278535043103e-05, "loss": 0.2867, "step": 14700 }, { "epoch": 0.27294364016441225, "grad_norm": 0.5140795707702637, "learning_rate": 1.6543396478409583e-05, "loss": 0.1996, "step": 14702 }, { "epoch": 0.2729807703018309, "grad_norm": 0.2288786917924881, "learning_rate": 1.6542514332740035e-05, "loss": 0.0983, "step": 14704 }, { "epoch": 0.2730179004392495, "grad_norm": 0.3231593370437622, "learning_rate": 1.6541632098046464e-05, "loss": 0.4028, "step": 14706 }, { "epoch": 0.2730550305766682, "grad_norm": 0.24441130459308624, "learning_rate": 1.6540749774340874e-05, "loss": 0.3805, "step": 14708 }, { "epoch": 0.2730921607140868, "grad_norm": 0.27992236614227295, "learning_rate": 1.6539867361635272e-05, "loss": 0.3422, "step": 14710 }, { "epoch": 0.27312929085150545, "grad_norm": 0.45079299807548523, "learning_rate": 1.6538984859941667e-05, "loss": 0.3394, "step": 14712 }, { "epoch": 0.2731664209889241, "grad_norm": 0.3355009853839874, "learning_rate": 1.653810226927206e-05, "loss": 0.1901, "step": 14714 }, { "epoch": 0.2732035511263427, "grad_norm": 0.3348577618598938, "learning_rate": 1.6537219589638466e-05, "loss": 0.2437, "step": 14716 }, { "epoch": 0.27324068126376133, "grad_norm": 0.5790876150131226, "learning_rate": 1.65363368210529e-05, "loss": 0.3334, "step": 14718 }, { "epoch": 0.27327781140118, "grad_norm": 0.3333645761013031, "learning_rate": 1.6535453963527363e-05, "loss": 0.2668, "step": 14720 }, { "epoch": 0.27331494153859864, "grad_norm": 0.34725311398506165, "learning_rate": 1.6534571017073873e-05, "loss": 0.3187, "step": 14722 }, { "epoch": 0.27335207167601727, "grad_norm": 0.2609018385410309, "learning_rate": 1.653368798170445e-05, "loss": 0.3864, "step": 14724 }, { "epoch": 0.2733892018134359, "grad_norm": 0.8517078161239624, "learning_rate": 1.65328048574311e-05, "loss": 0.4865, "step": 14726 }, { "epoch": 0.27342633195085453, "grad_norm": 0.29631832242012024, "learning_rate": 1.6531921644265844e-05, "loss": 0.319, "step": 14728 }, { "epoch": 0.2734634620882732, "grad_norm": 0.45585134625434875, "learning_rate": 1.65310383422207e-05, "loss": 0.3505, "step": 14730 }, { "epoch": 0.27350059222569184, "grad_norm": 0.2846459150314331, "learning_rate": 1.6530154951307688e-05, "loss": 0.2223, "step": 14732 }, { "epoch": 0.27353772236311047, "grad_norm": 0.2941604256629944, "learning_rate": 1.6529271471538825e-05, "loss": 0.2407, "step": 14734 }, { "epoch": 0.2735748525005291, "grad_norm": 0.4237828254699707, "learning_rate": 1.6528387902926138e-05, "loss": 0.4774, "step": 14736 }, { "epoch": 0.2736119826379477, "grad_norm": 0.34062060713768005, "learning_rate": 1.6527504245481646e-05, "loss": 0.3844, "step": 14738 }, { "epoch": 0.27364911277536635, "grad_norm": 0.33982667326927185, "learning_rate": 1.6526620499217373e-05, "loss": 0.3503, "step": 14740 }, { "epoch": 0.27368624291278504, "grad_norm": 0.34856364130973816, "learning_rate": 1.6525736664145342e-05, "loss": 0.2474, "step": 14742 }, { "epoch": 0.27372337305020367, "grad_norm": 0.34855902194976807, "learning_rate": 1.6524852740277584e-05, "loss": 0.2024, "step": 14744 }, { "epoch": 0.2737605031876223, "grad_norm": 0.35187119245529175, "learning_rate": 1.6523968727626125e-05, "loss": 0.2365, "step": 14746 }, { "epoch": 0.2737976333250409, "grad_norm": 0.39861640334129333, "learning_rate": 1.652308462620299e-05, "loss": 0.3052, "step": 14748 }, { "epoch": 0.27383476346245955, "grad_norm": 0.31246644258499146, "learning_rate": 1.6522200436020214e-05, "loss": 0.308, "step": 14750 }, { "epoch": 0.27387189359987824, "grad_norm": 0.29504647850990295, "learning_rate": 1.6521316157089827e-05, "loss": 0.3676, "step": 14752 }, { "epoch": 0.27390902373729686, "grad_norm": 0.4543309807777405, "learning_rate": 1.652043178942386e-05, "loss": 0.2578, "step": 14754 }, { "epoch": 0.2739461538747155, "grad_norm": 0.6466224193572998, "learning_rate": 1.651954733303435e-05, "loss": 0.1504, "step": 14756 }, { "epoch": 0.2739832840121341, "grad_norm": 0.30616524815559387, "learning_rate": 1.6518662787933325e-05, "loss": 0.5037, "step": 14758 }, { "epoch": 0.27402041414955275, "grad_norm": 0.27346140146255493, "learning_rate": 1.651777815413283e-05, "loss": 0.4006, "step": 14760 }, { "epoch": 0.27405754428697143, "grad_norm": 0.3785805106163025, "learning_rate": 1.6516893431644892e-05, "loss": 0.2291, "step": 14762 }, { "epoch": 0.27409467442439006, "grad_norm": 0.2849050462245941, "learning_rate": 1.651600862048156e-05, "loss": 0.2911, "step": 14764 }, { "epoch": 0.2741318045618087, "grad_norm": 0.326878160238266, "learning_rate": 1.6515123720654862e-05, "loss": 0.2718, "step": 14766 }, { "epoch": 0.2741689346992273, "grad_norm": 0.334862619638443, "learning_rate": 1.6514238732176847e-05, "loss": 0.1525, "step": 14768 }, { "epoch": 0.27420606483664595, "grad_norm": 0.3678429126739502, "learning_rate": 1.651335365505956e-05, "loss": 0.3416, "step": 14770 }, { "epoch": 0.2742431949740646, "grad_norm": 0.5865926742553711, "learning_rate": 1.6512468489315034e-05, "loss": 0.2622, "step": 14772 }, { "epoch": 0.27428032511148326, "grad_norm": 0.4681185483932495, "learning_rate": 1.651158323495532e-05, "loss": 0.3453, "step": 14774 }, { "epoch": 0.2743174552489019, "grad_norm": 0.38823240995407104, "learning_rate": 1.6510697891992466e-05, "loss": 0.4259, "step": 14776 }, { "epoch": 0.2743545853863205, "grad_norm": 0.23671135306358337, "learning_rate": 1.6509812460438513e-05, "loss": 0.3245, "step": 14778 }, { "epoch": 0.27439171552373914, "grad_norm": 0.36960306763648987, "learning_rate": 1.6508926940305513e-05, "loss": 0.2836, "step": 14780 }, { "epoch": 0.27442884566115777, "grad_norm": 0.349090576171875, "learning_rate": 1.6508041331605512e-05, "loss": 0.336, "step": 14782 }, { "epoch": 0.27446597579857646, "grad_norm": 0.28353485465049744, "learning_rate": 1.6507155634350564e-05, "loss": 0.2208, "step": 14784 }, { "epoch": 0.2745031059359951, "grad_norm": 0.5452004075050354, "learning_rate": 1.6506269848552718e-05, "loss": 0.3043, "step": 14786 }, { "epoch": 0.2745402360734137, "grad_norm": 0.2384907752275467, "learning_rate": 1.6505383974224028e-05, "loss": 0.3658, "step": 14788 }, { "epoch": 0.27457736621083234, "grad_norm": 0.31997108459472656, "learning_rate": 1.6504498011376546e-05, "loss": 0.3844, "step": 14790 }, { "epoch": 0.27461449634825097, "grad_norm": 0.32168155908584595, "learning_rate": 1.6503611960022334e-05, "loss": 0.4847, "step": 14792 }, { "epoch": 0.2746516264856696, "grad_norm": 0.5161004066467285, "learning_rate": 1.650272582017344e-05, "loss": 0.359, "step": 14794 }, { "epoch": 0.2746887566230883, "grad_norm": 0.33702290058135986, "learning_rate": 1.6501839591841926e-05, "loss": 0.3595, "step": 14796 }, { "epoch": 0.2747258867605069, "grad_norm": 0.3972940742969513, "learning_rate": 1.650095327503985e-05, "loss": 0.3269, "step": 14798 }, { "epoch": 0.27476301689792554, "grad_norm": 0.2500225603580475, "learning_rate": 1.6500066869779273e-05, "loss": 0.2834, "step": 14800 }, { "epoch": 0.27480014703534417, "grad_norm": 0.3220600485801697, "learning_rate": 1.649918037607226e-05, "loss": 0.2903, "step": 14802 }, { "epoch": 0.2748372771727628, "grad_norm": 0.3312821090221405, "learning_rate": 1.6498293793930865e-05, "loss": 0.173, "step": 14804 }, { "epoch": 0.2748744073101815, "grad_norm": 0.43873366713523865, "learning_rate": 1.6497407123367155e-05, "loss": 0.2093, "step": 14806 }, { "epoch": 0.2749115374476001, "grad_norm": 0.4325542151927948, "learning_rate": 1.64965203643932e-05, "loss": 0.3874, "step": 14808 }, { "epoch": 0.27494866758501874, "grad_norm": 0.2475588470697403, "learning_rate": 1.649563351702106e-05, "loss": 0.4355, "step": 14810 }, { "epoch": 0.27498579772243736, "grad_norm": 0.48887360095977783, "learning_rate": 1.6494746581262803e-05, "loss": 0.2994, "step": 14812 }, { "epoch": 0.275022927859856, "grad_norm": 0.36854198575019836, "learning_rate": 1.64938595571305e-05, "loss": 0.4927, "step": 14814 }, { "epoch": 0.2750600579972746, "grad_norm": 0.3896780014038086, "learning_rate": 1.6492972444636223e-05, "loss": 0.2577, "step": 14816 }, { "epoch": 0.2750971881346933, "grad_norm": 0.2508016526699066, "learning_rate": 1.6492085243792037e-05, "loss": 0.1243, "step": 14818 }, { "epoch": 0.27513431827211193, "grad_norm": 0.4492914378643036, "learning_rate": 1.6491197954610015e-05, "loss": 0.1922, "step": 14820 }, { "epoch": 0.27517144840953056, "grad_norm": 0.2753238379955292, "learning_rate": 1.6490310577102234e-05, "loss": 0.3277, "step": 14822 }, { "epoch": 0.2752085785469492, "grad_norm": 0.20805779099464417, "learning_rate": 1.6489423111280767e-05, "loss": 0.267, "step": 14824 }, { "epoch": 0.2752457086843678, "grad_norm": 0.4545464813709259, "learning_rate": 1.648853555715769e-05, "loss": 0.2475, "step": 14826 }, { "epoch": 0.2752828388217865, "grad_norm": 0.4415128231048584, "learning_rate": 1.648764791474508e-05, "loss": 0.4062, "step": 14828 }, { "epoch": 0.27531996895920513, "grad_norm": 0.4637845754623413, "learning_rate": 1.648676018405501e-05, "loss": 0.2359, "step": 14830 }, { "epoch": 0.27535709909662376, "grad_norm": 0.37481415271759033, "learning_rate": 1.648587236509957e-05, "loss": 0.2189, "step": 14832 }, { "epoch": 0.2753942292340424, "grad_norm": 0.5689775347709656, "learning_rate": 1.648498445789083e-05, "loss": 0.3498, "step": 14834 }, { "epoch": 0.275431359371461, "grad_norm": 0.45561549067497253, "learning_rate": 1.6484096462440883e-05, "loss": 0.1395, "step": 14836 }, { "epoch": 0.2754684895088797, "grad_norm": 0.23857736587524414, "learning_rate": 1.64832083787618e-05, "loss": 0.2851, "step": 14838 }, { "epoch": 0.2755056196462983, "grad_norm": 0.3643311858177185, "learning_rate": 1.648232020686567e-05, "loss": 0.4135, "step": 14840 }, { "epoch": 0.27554274978371696, "grad_norm": 0.28163841366767883, "learning_rate": 1.6481431946764578e-05, "loss": 0.2765, "step": 14842 }, { "epoch": 0.2755798799211356, "grad_norm": 0.4603663682937622, "learning_rate": 1.6480543598470617e-05, "loss": 0.3075, "step": 14844 }, { "epoch": 0.2756170100585542, "grad_norm": 0.32451507449150085, "learning_rate": 1.6479655161995865e-05, "loss": 0.2501, "step": 14846 }, { "epoch": 0.27565414019597284, "grad_norm": 0.39576223492622375, "learning_rate": 1.6478766637352414e-05, "loss": 0.3298, "step": 14848 }, { "epoch": 0.2756912703333915, "grad_norm": 0.32359379529953003, "learning_rate": 1.6477878024552358e-05, "loss": 0.3423, "step": 14850 }, { "epoch": 0.27572840047081015, "grad_norm": 0.4463805854320526, "learning_rate": 1.6476989323607784e-05, "loss": 0.3104, "step": 14852 }, { "epoch": 0.2757655306082288, "grad_norm": 0.25558438897132874, "learning_rate": 1.6476100534530787e-05, "loss": 0.346, "step": 14854 }, { "epoch": 0.2758026607456474, "grad_norm": 0.3127457797527313, "learning_rate": 1.6475211657333455e-05, "loss": 0.2887, "step": 14856 }, { "epoch": 0.27583979088306604, "grad_norm": 0.2876504063606262, "learning_rate": 1.6474322692027896e-05, "loss": 0.3465, "step": 14858 }, { "epoch": 0.2758769210204847, "grad_norm": 0.3852679431438446, "learning_rate": 1.6473433638626194e-05, "loss": 0.4883, "step": 14860 }, { "epoch": 0.27591405115790335, "grad_norm": 0.4314744174480438, "learning_rate": 1.647254449714045e-05, "loss": 0.335, "step": 14862 }, { "epoch": 0.275951181295322, "grad_norm": 0.25188130140304565, "learning_rate": 1.6471655267582764e-05, "loss": 0.2775, "step": 14864 }, { "epoch": 0.2759883114327406, "grad_norm": 0.4336928129196167, "learning_rate": 1.6470765949965234e-05, "loss": 0.3647, "step": 14866 }, { "epoch": 0.27602544157015924, "grad_norm": 0.34858429431915283, "learning_rate": 1.646987654429996e-05, "loss": 0.4555, "step": 14868 }, { "epoch": 0.27606257170757786, "grad_norm": 0.6125998497009277, "learning_rate": 1.646898705059905e-05, "loss": 0.1871, "step": 14870 }, { "epoch": 0.27609970184499655, "grad_norm": 0.5572607517242432, "learning_rate": 1.6468097468874598e-05, "loss": 0.414, "step": 14872 }, { "epoch": 0.2761368319824152, "grad_norm": 0.38874924182891846, "learning_rate": 1.6467207799138716e-05, "loss": 0.4656, "step": 14874 }, { "epoch": 0.2761739621198338, "grad_norm": 0.48564234375953674, "learning_rate": 1.646631804140351e-05, "loss": 0.1954, "step": 14876 }, { "epoch": 0.27621109225725243, "grad_norm": 0.39803749322891235, "learning_rate": 1.646542819568108e-05, "loss": 0.2265, "step": 14878 }, { "epoch": 0.27624822239467106, "grad_norm": 0.35922297835350037, "learning_rate": 1.6464538261983542e-05, "loss": 0.3295, "step": 14880 }, { "epoch": 0.27628535253208975, "grad_norm": 0.2193242311477661, "learning_rate": 1.6463648240323e-05, "loss": 0.4425, "step": 14882 }, { "epoch": 0.2763224826695084, "grad_norm": 0.39045900106430054, "learning_rate": 1.6462758130711566e-05, "loss": 0.2133, "step": 14884 }, { "epoch": 0.276359612806927, "grad_norm": 0.4693489968776703, "learning_rate": 1.6461867933161353e-05, "loss": 0.3328, "step": 14886 }, { "epoch": 0.27639674294434563, "grad_norm": 0.48045212030410767, "learning_rate": 1.646097764768447e-05, "loss": 0.4166, "step": 14888 }, { "epoch": 0.27643387308176426, "grad_norm": 0.46798980236053467, "learning_rate": 1.6460087274293036e-05, "loss": 0.3705, "step": 14890 }, { "epoch": 0.2764710032191829, "grad_norm": 0.35942989587783813, "learning_rate": 1.645919681299917e-05, "loss": 0.2909, "step": 14892 }, { "epoch": 0.27650813335660157, "grad_norm": 0.4933192729949951, "learning_rate": 1.6458306263814975e-05, "loss": 0.3218, "step": 14894 }, { "epoch": 0.2765452634940202, "grad_norm": 0.2826973497867584, "learning_rate": 1.645741562675258e-05, "loss": 0.1868, "step": 14896 }, { "epoch": 0.2765823936314388, "grad_norm": 0.4001150131225586, "learning_rate": 1.64565249018241e-05, "loss": 0.3571, "step": 14898 }, { "epoch": 0.27661952376885746, "grad_norm": 0.35059475898742676, "learning_rate": 1.6455634089041654e-05, "loss": 0.3113, "step": 14900 }, { "epoch": 0.2766566539062761, "grad_norm": 0.4711218774318695, "learning_rate": 1.6454743188417368e-05, "loss": 0.271, "step": 14902 }, { "epoch": 0.27669378404369477, "grad_norm": 0.4374128580093384, "learning_rate": 1.645385219996336e-05, "loss": 0.3295, "step": 14904 }, { "epoch": 0.2767309141811134, "grad_norm": 0.3825603723526001, "learning_rate": 1.6452961123691754e-05, "loss": 0.3543, "step": 14906 }, { "epoch": 0.276768044318532, "grad_norm": 0.3439706861972809, "learning_rate": 1.6452069959614678e-05, "loss": 0.2064, "step": 14908 }, { "epoch": 0.27680517445595065, "grad_norm": 0.31731534004211426, "learning_rate": 1.6451178707744257e-05, "loss": 0.2999, "step": 14910 }, { "epoch": 0.2768423045933693, "grad_norm": 0.2681587338447571, "learning_rate": 1.6450287368092613e-05, "loss": 0.3907, "step": 14912 }, { "epoch": 0.27687943473078797, "grad_norm": 0.32862699031829834, "learning_rate": 1.6449395940671882e-05, "loss": 0.3119, "step": 14914 }, { "epoch": 0.2769165648682066, "grad_norm": 0.3737541139125824, "learning_rate": 1.6448504425494188e-05, "loss": 0.331, "step": 14916 }, { "epoch": 0.2769536950056252, "grad_norm": 0.364833801984787, "learning_rate": 1.6447612822571667e-05, "loss": 0.3499, "step": 14918 }, { "epoch": 0.27699082514304385, "grad_norm": 0.4749397039413452, "learning_rate": 1.6446721131916444e-05, "loss": 0.2455, "step": 14920 }, { "epoch": 0.2770279552804625, "grad_norm": 0.47684356570243835, "learning_rate": 1.644582935354066e-05, "loss": 0.2661, "step": 14922 }, { "epoch": 0.2770650854178811, "grad_norm": 0.2792404890060425, "learning_rate": 1.6444937487456447e-05, "loss": 0.4977, "step": 14924 }, { "epoch": 0.2771022155552998, "grad_norm": 0.27260011434555054, "learning_rate": 1.6444045533675938e-05, "loss": 0.1396, "step": 14926 }, { "epoch": 0.2771393456927184, "grad_norm": 0.3562160134315491, "learning_rate": 1.6443153492211276e-05, "loss": 0.186, "step": 14928 }, { "epoch": 0.27717647583013705, "grad_norm": 0.38558229804039, "learning_rate": 1.6442261363074584e-05, "loss": 0.5876, "step": 14930 }, { "epoch": 0.2772136059675557, "grad_norm": 0.4793417751789093, "learning_rate": 1.6441369146278023e-05, "loss": 0.356, "step": 14932 }, { "epoch": 0.2772507361049743, "grad_norm": 0.43960198760032654, "learning_rate": 1.6440476841833714e-05, "loss": 0.1908, "step": 14934 }, { "epoch": 0.277287866242393, "grad_norm": 0.4275606870651245, "learning_rate": 1.643958444975381e-05, "loss": 0.2949, "step": 14936 }, { "epoch": 0.2773249963798116, "grad_norm": 0.24383433163166046, "learning_rate": 1.643869197005045e-05, "loss": 0.3725, "step": 14938 }, { "epoch": 0.27736212651723025, "grad_norm": 0.3487373888492584, "learning_rate": 1.6437799402735778e-05, "loss": 0.3811, "step": 14940 }, { "epoch": 0.2773992566546489, "grad_norm": 0.41456958651542664, "learning_rate": 1.6436906747821938e-05, "loss": 0.365, "step": 14942 }, { "epoch": 0.2774363867920675, "grad_norm": 0.30235451459884644, "learning_rate": 1.6436014005321083e-05, "loss": 0.3012, "step": 14944 }, { "epoch": 0.27747351692948613, "grad_norm": 0.34595608711242676, "learning_rate": 1.643512117524535e-05, "loss": 0.295, "step": 14946 }, { "epoch": 0.2775106470669048, "grad_norm": 0.272754043340683, "learning_rate": 1.6434228257606896e-05, "loss": 0.2225, "step": 14948 }, { "epoch": 0.27754777720432344, "grad_norm": 0.43997758626937866, "learning_rate": 1.6433335252417868e-05, "loss": 0.3592, "step": 14950 }, { "epoch": 0.27758490734174207, "grad_norm": 0.25300106406211853, "learning_rate": 1.6432442159690417e-05, "loss": 0.333, "step": 14952 }, { "epoch": 0.2776220374791607, "grad_norm": 0.4052315056324005, "learning_rate": 1.6431548979436697e-05, "loss": 0.1619, "step": 14954 }, { "epoch": 0.2776591676165793, "grad_norm": 0.7130093574523926, "learning_rate": 1.6430655711668858e-05, "loss": 0.2339, "step": 14956 }, { "epoch": 0.277696297753998, "grad_norm": 0.46284353733062744, "learning_rate": 1.6429762356399056e-05, "loss": 0.3146, "step": 14958 }, { "epoch": 0.27773342789141664, "grad_norm": 0.2876720428466797, "learning_rate": 1.642886891363945e-05, "loss": 0.4323, "step": 14960 }, { "epoch": 0.27777055802883527, "grad_norm": 0.4155559241771698, "learning_rate": 1.6427975383402194e-05, "loss": 0.0991, "step": 14962 }, { "epoch": 0.2778076881662539, "grad_norm": 0.41126206517219543, "learning_rate": 1.642708176569945e-05, "loss": 0.2999, "step": 14964 }, { "epoch": 0.2778448183036725, "grad_norm": 0.4702550172805786, "learning_rate": 1.642618806054337e-05, "loss": 0.2222, "step": 14966 }, { "epoch": 0.27788194844109115, "grad_norm": 0.47663938999176025, "learning_rate": 1.642529426794612e-05, "loss": 0.3905, "step": 14968 }, { "epoch": 0.27791907857850984, "grad_norm": 0.2766834795475006, "learning_rate": 1.6424400387919865e-05, "loss": 0.1392, "step": 14970 }, { "epoch": 0.27795620871592847, "grad_norm": 0.39705637097358704, "learning_rate": 1.6423506420476756e-05, "loss": 0.2438, "step": 14972 }, { "epoch": 0.2779933388533471, "grad_norm": 0.4040052592754364, "learning_rate": 1.6422612365628972e-05, "loss": 0.3141, "step": 14974 }, { "epoch": 0.2780304689907657, "grad_norm": 0.4049037992954254, "learning_rate": 1.6421718223388668e-05, "loss": 0.2965, "step": 14976 }, { "epoch": 0.27806759912818435, "grad_norm": 0.3851534426212311, "learning_rate": 1.642082399376802e-05, "loss": 0.1841, "step": 14978 }, { "epoch": 0.27810472926560303, "grad_norm": 0.37929391860961914, "learning_rate": 1.6419929676779184e-05, "loss": 0.3155, "step": 14980 }, { "epoch": 0.27814185940302166, "grad_norm": 0.4493348300457001, "learning_rate": 1.6419035272434336e-05, "loss": 0.4621, "step": 14982 }, { "epoch": 0.2781789895404403, "grad_norm": 0.47922950983047485, "learning_rate": 1.6418140780745647e-05, "loss": 0.5089, "step": 14984 }, { "epoch": 0.2782161196778589, "grad_norm": 0.22817960381507874, "learning_rate": 1.6417246201725286e-05, "loss": 0.2825, "step": 14986 }, { "epoch": 0.27825324981527755, "grad_norm": 0.2597392201423645, "learning_rate": 1.6416351535385423e-05, "loss": 0.3174, "step": 14988 }, { "epoch": 0.27829037995269623, "grad_norm": 0.3169132173061371, "learning_rate": 1.6415456781738235e-05, "loss": 0.3405, "step": 14990 }, { "epoch": 0.27832751009011486, "grad_norm": 0.29082944989204407, "learning_rate": 1.64145619407959e-05, "loss": 0.2831, "step": 14992 }, { "epoch": 0.2783646402275335, "grad_norm": 0.30459529161453247, "learning_rate": 1.6413667012570594e-05, "loss": 0.2131, "step": 14994 }, { "epoch": 0.2784017703649521, "grad_norm": 0.7897620797157288, "learning_rate": 1.6412771997074487e-05, "loss": 0.3588, "step": 14996 }, { "epoch": 0.27843890050237075, "grad_norm": 0.38560277223587036, "learning_rate": 1.641187689431976e-05, "loss": 0.2154, "step": 14998 }, { "epoch": 0.2784760306397894, "grad_norm": 0.3687324523925781, "learning_rate": 1.6410981704318596e-05, "loss": 0.2144, "step": 15000 }, { "epoch": 0.27851316077720806, "grad_norm": 0.31290608644485474, "learning_rate": 1.6410086427083176e-05, "loss": 0.3951, "step": 15002 }, { "epoch": 0.2785502909146267, "grad_norm": 0.3430293798446655, "learning_rate": 1.6409191062625676e-05, "loss": 0.3614, "step": 15004 }, { "epoch": 0.2785874210520453, "grad_norm": 0.4427013695240021, "learning_rate": 1.6408295610958288e-05, "loss": 0.3289, "step": 15006 }, { "epoch": 0.27862455118946394, "grad_norm": 0.26438021659851074, "learning_rate": 1.6407400072093188e-05, "loss": 0.2451, "step": 15008 }, { "epoch": 0.27866168132688257, "grad_norm": 0.28633636236190796, "learning_rate": 1.6406504446042567e-05, "loss": 0.166, "step": 15010 }, { "epoch": 0.27869881146430125, "grad_norm": 0.33371731638908386, "learning_rate": 1.640560873281861e-05, "loss": 0.4639, "step": 15012 }, { "epoch": 0.2787359416017199, "grad_norm": 0.32669776678085327, "learning_rate": 1.6404712932433508e-05, "loss": 0.1534, "step": 15014 }, { "epoch": 0.2787730717391385, "grad_norm": 0.47225221991539, "learning_rate": 1.6403817044899445e-05, "loss": 0.34, "step": 15016 }, { "epoch": 0.27881020187655714, "grad_norm": 0.4173768162727356, "learning_rate": 1.640292107022861e-05, "loss": 0.4621, "step": 15018 }, { "epoch": 0.27884733201397577, "grad_norm": 0.5146902203559875, "learning_rate": 1.6402025008433204e-05, "loss": 0.5664, "step": 15020 }, { "epoch": 0.2788844621513944, "grad_norm": 0.5303643941879272, "learning_rate": 1.640112885952541e-05, "loss": 0.2577, "step": 15022 }, { "epoch": 0.2789215922888131, "grad_norm": 0.26057448983192444, "learning_rate": 1.6400232623517426e-05, "loss": 0.2277, "step": 15024 }, { "epoch": 0.2789587224262317, "grad_norm": 0.4882340431213379, "learning_rate": 1.639933630042145e-05, "loss": 0.3506, "step": 15026 }, { "epoch": 0.27899585256365034, "grad_norm": 0.261616975069046, "learning_rate": 1.6398439890249675e-05, "loss": 0.4672, "step": 15028 }, { "epoch": 0.27903298270106897, "grad_norm": 0.5336483716964722, "learning_rate": 1.6397543393014293e-05, "loss": 0.2873, "step": 15030 }, { "epoch": 0.2790701128384876, "grad_norm": 0.3441188335418701, "learning_rate": 1.6396646808727516e-05, "loss": 0.1481, "step": 15032 }, { "epoch": 0.2791072429759063, "grad_norm": 0.4678948223590851, "learning_rate": 1.639575013740153e-05, "loss": 0.418, "step": 15034 }, { "epoch": 0.2791443731133249, "grad_norm": 0.5374354720115662, "learning_rate": 1.6394853379048544e-05, "loss": 0.2509, "step": 15036 }, { "epoch": 0.27918150325074353, "grad_norm": 0.3136776387691498, "learning_rate": 1.6393956533680758e-05, "loss": 0.3832, "step": 15038 }, { "epoch": 0.27921863338816216, "grad_norm": 0.3334224820137024, "learning_rate": 1.639305960131038e-05, "loss": 0.2516, "step": 15040 }, { "epoch": 0.2792557635255808, "grad_norm": 0.47994497418403625, "learning_rate": 1.6392162581949604e-05, "loss": 0.2265, "step": 15042 }, { "epoch": 0.2792928936629994, "grad_norm": 0.31730639934539795, "learning_rate": 1.6391265475610644e-05, "loss": 0.2638, "step": 15044 }, { "epoch": 0.2793300238004181, "grad_norm": 0.3441244661808014, "learning_rate": 1.6390368282305707e-05, "loss": 0.2086, "step": 15046 }, { "epoch": 0.27936715393783673, "grad_norm": 0.38913920521736145, "learning_rate": 1.6389471002046998e-05, "loss": 0.3959, "step": 15048 }, { "epoch": 0.27940428407525536, "grad_norm": 0.43224868178367615, "learning_rate": 1.6388573634846725e-05, "loss": 0.3332, "step": 15050 }, { "epoch": 0.279441414212674, "grad_norm": 0.47961071133613586, "learning_rate": 1.6387676180717105e-05, "loss": 0.3112, "step": 15052 }, { "epoch": 0.2794785443500926, "grad_norm": 0.45057591795921326, "learning_rate": 1.638677863967034e-05, "loss": 0.4359, "step": 15054 }, { "epoch": 0.2795156744875113, "grad_norm": 0.3402251601219177, "learning_rate": 1.6385881011718653e-05, "loss": 0.2692, "step": 15056 }, { "epoch": 0.27955280462492993, "grad_norm": 0.3727535307407379, "learning_rate": 1.6384983296874253e-05, "loss": 0.2467, "step": 15058 }, { "epoch": 0.27958993476234856, "grad_norm": 0.48922502994537354, "learning_rate": 1.6384085495149356e-05, "loss": 0.2591, "step": 15060 }, { "epoch": 0.2796270648997672, "grad_norm": 0.4106374979019165, "learning_rate": 1.6383187606556174e-05, "loss": 0.4223, "step": 15062 }, { "epoch": 0.2796641950371858, "grad_norm": 0.39098769426345825, "learning_rate": 1.6382289631106933e-05, "loss": 0.2028, "step": 15064 }, { "epoch": 0.2797013251746045, "grad_norm": 0.3745861053466797, "learning_rate": 1.6381391568813843e-05, "loss": 0.3108, "step": 15066 }, { "epoch": 0.2797384553120231, "grad_norm": 0.3746451139450073, "learning_rate": 1.6380493419689133e-05, "loss": 0.148, "step": 15068 }, { "epoch": 0.27977558544944175, "grad_norm": 0.3354095220565796, "learning_rate": 1.6379595183745015e-05, "loss": 0.3559, "step": 15070 }, { "epoch": 0.2798127155868604, "grad_norm": 0.35680198669433594, "learning_rate": 1.637869686099372e-05, "loss": 0.2749, "step": 15072 }, { "epoch": 0.279849845724279, "grad_norm": 0.46964308619499207, "learning_rate": 1.6377798451447465e-05, "loss": 0.4228, "step": 15074 }, { "epoch": 0.27988697586169764, "grad_norm": 0.5090184807777405, "learning_rate": 1.6376899955118474e-05, "loss": 0.2076, "step": 15076 }, { "epoch": 0.2799241059991163, "grad_norm": 0.23075906932353973, "learning_rate": 1.6376001372018978e-05, "loss": 0.247, "step": 15078 }, { "epoch": 0.27996123613653495, "grad_norm": 0.424236923456192, "learning_rate": 1.6375102702161203e-05, "loss": 0.2249, "step": 15080 }, { "epoch": 0.2799983662739536, "grad_norm": 0.38155269622802734, "learning_rate": 1.6374203945557375e-05, "loss": 0.3799, "step": 15082 }, { "epoch": 0.2800354964113722, "grad_norm": 0.36049073934555054, "learning_rate": 1.6373305102219724e-05, "loss": 0.3232, "step": 15084 }, { "epoch": 0.28007262654879084, "grad_norm": 0.32451942563056946, "learning_rate": 1.637240617216048e-05, "loss": 0.3627, "step": 15086 }, { "epoch": 0.2801097566862095, "grad_norm": 0.4158051609992981, "learning_rate": 1.6371507155391877e-05, "loss": 0.385, "step": 15088 }, { "epoch": 0.28014688682362815, "grad_norm": 0.43560266494750977, "learning_rate": 1.6370608051926146e-05, "loss": 0.386, "step": 15090 }, { "epoch": 0.2801840169610468, "grad_norm": 0.35539305210113525, "learning_rate": 1.636970886177552e-05, "loss": 0.356, "step": 15092 }, { "epoch": 0.2802211470984654, "grad_norm": 0.26674985885620117, "learning_rate": 1.636880958495224e-05, "loss": 0.3461, "step": 15094 }, { "epoch": 0.28025827723588403, "grad_norm": 0.23910953104496002, "learning_rate": 1.6367910221468535e-05, "loss": 0.3154, "step": 15096 }, { "epoch": 0.28029540737330266, "grad_norm": 0.29694047570228577, "learning_rate": 1.6367010771336647e-05, "loss": 0.3483, "step": 15098 }, { "epoch": 0.28033253751072135, "grad_norm": 0.3674033284187317, "learning_rate": 1.636611123456881e-05, "loss": 0.4217, "step": 15100 }, { "epoch": 0.28036966764814, "grad_norm": 0.4674500524997711, "learning_rate": 1.6365211611177274e-05, "loss": 0.3562, "step": 15102 }, { "epoch": 0.2804067977855586, "grad_norm": 0.3588138520717621, "learning_rate": 1.6364311901174272e-05, "loss": 0.352, "step": 15104 }, { "epoch": 0.28044392792297723, "grad_norm": 0.4785209596157074, "learning_rate": 1.6363412104572044e-05, "loss": 0.2462, "step": 15106 }, { "epoch": 0.28048105806039586, "grad_norm": 0.36932075023651123, "learning_rate": 1.6362512221382846e-05, "loss": 0.3516, "step": 15108 }, { "epoch": 0.28051818819781454, "grad_norm": 0.2706802785396576, "learning_rate": 1.6361612251618908e-05, "loss": 0.3183, "step": 15110 }, { "epoch": 0.2805553183352332, "grad_norm": 0.3177291750907898, "learning_rate": 1.6360712195292486e-05, "loss": 0.2849, "step": 15112 }, { "epoch": 0.2805924484726518, "grad_norm": 0.5077996253967285, "learning_rate": 1.635981205241582e-05, "loss": 0.2259, "step": 15114 }, { "epoch": 0.28062957861007043, "grad_norm": 0.4048846662044525, "learning_rate": 1.6358911823001166e-05, "loss": 0.248, "step": 15116 }, { "epoch": 0.28066670874748906, "grad_norm": 0.538139820098877, "learning_rate": 1.6358011507060765e-05, "loss": 0.314, "step": 15118 }, { "epoch": 0.2807038388849077, "grad_norm": 0.3161731958389282, "learning_rate": 1.6357111104606875e-05, "loss": 0.5129, "step": 15120 }, { "epoch": 0.28074096902232637, "grad_norm": 0.4685649573802948, "learning_rate": 1.6356210615651744e-05, "loss": 0.1855, "step": 15122 }, { "epoch": 0.280778099159745, "grad_norm": 0.3559631109237671, "learning_rate": 1.6355310040207624e-05, "loss": 0.2189, "step": 15124 }, { "epoch": 0.2808152292971636, "grad_norm": 0.3861045241355896, "learning_rate": 1.635440937828677e-05, "loss": 0.3423, "step": 15126 }, { "epoch": 0.28085235943458225, "grad_norm": 0.44704627990722656, "learning_rate": 1.6353508629901442e-05, "loss": 0.3762, "step": 15128 }, { "epoch": 0.2808894895720009, "grad_norm": 0.35397785902023315, "learning_rate": 1.635260779506389e-05, "loss": 0.2989, "step": 15130 }, { "epoch": 0.28092661970941957, "grad_norm": 0.3193988800048828, "learning_rate": 1.6351706873786376e-05, "loss": 0.2216, "step": 15132 }, { "epoch": 0.2809637498468382, "grad_norm": 0.4408256411552429, "learning_rate": 1.6350805866081153e-05, "loss": 0.3688, "step": 15134 }, { "epoch": 0.2810008799842568, "grad_norm": 0.33592477440834045, "learning_rate": 1.634990477196049e-05, "loss": 0.1755, "step": 15136 }, { "epoch": 0.28103801012167545, "grad_norm": 0.5282504558563232, "learning_rate": 1.634900359143664e-05, "loss": 0.2946, "step": 15138 }, { "epoch": 0.2810751402590941, "grad_norm": 0.3795854449272156, "learning_rate": 1.6348102324521872e-05, "loss": 0.3686, "step": 15140 }, { "epoch": 0.28111227039651276, "grad_norm": 0.38313087821006775, "learning_rate": 1.6347200971228443e-05, "loss": 0.2266, "step": 15142 }, { "epoch": 0.2811494005339314, "grad_norm": 0.4143899381160736, "learning_rate": 1.6346299531568618e-05, "loss": 0.243, "step": 15144 }, { "epoch": 0.28118653067135, "grad_norm": 0.25574710965156555, "learning_rate": 1.634539800555467e-05, "loss": 0.1704, "step": 15146 }, { "epoch": 0.28122366080876865, "grad_norm": 0.23985464870929718, "learning_rate": 1.6344496393198862e-05, "loss": 0.3421, "step": 15148 }, { "epoch": 0.2812607909461873, "grad_norm": 0.5032542943954468, "learning_rate": 1.634359469451346e-05, "loss": 0.3088, "step": 15150 }, { "epoch": 0.2812979210836059, "grad_norm": 0.34067878127098083, "learning_rate": 1.6342692909510738e-05, "loss": 0.2392, "step": 15152 }, { "epoch": 0.2813350512210246, "grad_norm": 0.44177791476249695, "learning_rate": 1.634179103820296e-05, "loss": 0.4731, "step": 15154 }, { "epoch": 0.2813721813584432, "grad_norm": 0.48239821195602417, "learning_rate": 1.6340889080602406e-05, "loss": 0.2198, "step": 15156 }, { "epoch": 0.28140931149586185, "grad_norm": 0.2975512146949768, "learning_rate": 1.6339987036721342e-05, "loss": 0.2864, "step": 15158 }, { "epoch": 0.2814464416332805, "grad_norm": 0.27439776062965393, "learning_rate": 1.6339084906572045e-05, "loss": 0.3156, "step": 15160 }, { "epoch": 0.2814835717706991, "grad_norm": 0.28306058049201965, "learning_rate": 1.6338182690166795e-05, "loss": 0.3097, "step": 15162 }, { "epoch": 0.2815207019081178, "grad_norm": 0.5881744027137756, "learning_rate": 1.6337280387517857e-05, "loss": 0.1904, "step": 15164 }, { "epoch": 0.2815578320455364, "grad_norm": 0.33809372782707214, "learning_rate": 1.6336377998637522e-05, "loss": 0.312, "step": 15166 }, { "epoch": 0.28159496218295504, "grad_norm": 0.392665296792984, "learning_rate": 1.6335475523538056e-05, "loss": 0.2473, "step": 15168 }, { "epoch": 0.2816320923203737, "grad_norm": 0.27196863293647766, "learning_rate": 1.6334572962231747e-05, "loss": 0.2773, "step": 15170 }, { "epoch": 0.2816692224577923, "grad_norm": 0.4898926317691803, "learning_rate": 1.6333670314730877e-05, "loss": 0.3732, "step": 15172 }, { "epoch": 0.28170635259521093, "grad_norm": 0.28694918751716614, "learning_rate": 1.6332767581047722e-05, "loss": 0.2583, "step": 15174 }, { "epoch": 0.2817434827326296, "grad_norm": 0.35229045152664185, "learning_rate": 1.6331864761194568e-05, "loss": 0.271, "step": 15176 }, { "epoch": 0.28178061287004824, "grad_norm": 0.47089067101478577, "learning_rate": 1.6330961855183708e-05, "loss": 0.4246, "step": 15178 }, { "epoch": 0.28181774300746687, "grad_norm": 0.4692615270614624, "learning_rate": 1.6330058863027413e-05, "loss": 0.255, "step": 15180 }, { "epoch": 0.2818548731448855, "grad_norm": 0.27859732508659363, "learning_rate": 1.632915578473798e-05, "loss": 0.1934, "step": 15182 }, { "epoch": 0.2818920032823041, "grad_norm": 0.5272289514541626, "learning_rate": 1.6328252620327692e-05, "loss": 0.397, "step": 15184 }, { "epoch": 0.2819291334197228, "grad_norm": 0.4672413468360901, "learning_rate": 1.6327349369808848e-05, "loss": 0.1945, "step": 15186 }, { "epoch": 0.28196626355714144, "grad_norm": 0.5078940987586975, "learning_rate": 1.6326446033193726e-05, "loss": 0.4013, "step": 15188 }, { "epoch": 0.28200339369456007, "grad_norm": 0.2850618064403534, "learning_rate": 1.6325542610494626e-05, "loss": 0.2901, "step": 15190 }, { "epoch": 0.2820405238319787, "grad_norm": 0.29544752836227417, "learning_rate": 1.6324639101723835e-05, "loss": 0.5343, "step": 15192 }, { "epoch": 0.2820776539693973, "grad_norm": 0.32594382762908936, "learning_rate": 1.6323735506893654e-05, "loss": 0.3613, "step": 15194 }, { "epoch": 0.28211478410681595, "grad_norm": 0.39919862151145935, "learning_rate": 1.6322831826016373e-05, "loss": 0.2641, "step": 15196 }, { "epoch": 0.28215191424423464, "grad_norm": 0.38633760809898376, "learning_rate": 1.632192805910429e-05, "loss": 0.2317, "step": 15198 }, { "epoch": 0.28218904438165326, "grad_norm": 0.29906320571899414, "learning_rate": 1.63210242061697e-05, "loss": 0.3474, "step": 15200 }, { "epoch": 0.2822261745190719, "grad_norm": 0.6725844144821167, "learning_rate": 1.632012026722491e-05, "loss": 0.3482, "step": 15202 }, { "epoch": 0.2822633046564905, "grad_norm": 0.42531195282936096, "learning_rate": 1.631921624228221e-05, "loss": 0.2521, "step": 15204 }, { "epoch": 0.28230043479390915, "grad_norm": 0.3144271671772003, "learning_rate": 1.6318312131353907e-05, "loss": 0.2728, "step": 15206 }, { "epoch": 0.28233756493132783, "grad_norm": 1.1425817012786865, "learning_rate": 1.6317407934452297e-05, "loss": 0.4547, "step": 15208 }, { "epoch": 0.28237469506874646, "grad_norm": 0.43557244539260864, "learning_rate": 1.6316503651589694e-05, "loss": 0.2263, "step": 15210 }, { "epoch": 0.2824118252061651, "grad_norm": 0.43249669671058655, "learning_rate": 1.6315599282778393e-05, "loss": 0.3461, "step": 15212 }, { "epoch": 0.2824489553435837, "grad_norm": 0.6911650896072388, "learning_rate": 1.6314694828030707e-05, "loss": 0.5044, "step": 15214 }, { "epoch": 0.28248608548100235, "grad_norm": 0.356902152299881, "learning_rate": 1.6313790287358935e-05, "loss": 0.2319, "step": 15216 }, { "epoch": 0.28252321561842103, "grad_norm": 0.3941543996334076, "learning_rate": 1.631288566077539e-05, "loss": 0.3122, "step": 15218 }, { "epoch": 0.28256034575583966, "grad_norm": 0.25098317861557007, "learning_rate": 1.631198094829238e-05, "loss": 0.3425, "step": 15220 }, { "epoch": 0.2825974758932583, "grad_norm": 0.3847087323665619, "learning_rate": 1.6311076149922217e-05, "loss": 0.2571, "step": 15222 }, { "epoch": 0.2826346060306769, "grad_norm": 0.38837409019470215, "learning_rate": 1.6310171265677213e-05, "loss": 0.2483, "step": 15224 }, { "epoch": 0.28267173616809554, "grad_norm": 0.27800941467285156, "learning_rate": 1.6309266295569674e-05, "loss": 0.409, "step": 15226 }, { "epoch": 0.2827088663055142, "grad_norm": 0.3290328085422516, "learning_rate": 1.6308361239611924e-05, "loss": 0.1897, "step": 15228 }, { "epoch": 0.28274599644293286, "grad_norm": 0.4303019940853119, "learning_rate": 1.6307456097816272e-05, "loss": 0.1812, "step": 15230 }, { "epoch": 0.2827831265803515, "grad_norm": 0.3664807677268982, "learning_rate": 1.6306550870195033e-05, "loss": 0.3847, "step": 15232 }, { "epoch": 0.2828202567177701, "grad_norm": 0.3614775538444519, "learning_rate": 1.630564555676053e-05, "loss": 0.2171, "step": 15234 }, { "epoch": 0.28285738685518874, "grad_norm": 0.4108017385005951, "learning_rate": 1.6304740157525078e-05, "loss": 0.235, "step": 15236 }, { "epoch": 0.28289451699260737, "grad_norm": 0.373841255903244, "learning_rate": 1.6303834672500996e-05, "loss": 0.4997, "step": 15238 }, { "epoch": 0.28293164713002605, "grad_norm": 0.2432202696800232, "learning_rate": 1.630292910170061e-05, "loss": 0.2127, "step": 15240 }, { "epoch": 0.2829687772674447, "grad_norm": 0.2322385460138321, "learning_rate": 1.6302023445136234e-05, "loss": 0.1921, "step": 15242 }, { "epoch": 0.2830059074048633, "grad_norm": 0.5921317934989929, "learning_rate": 1.6301117702820196e-05, "loss": 0.4181, "step": 15244 }, { "epoch": 0.28304303754228194, "grad_norm": 0.42867523431777954, "learning_rate": 1.6300211874764823e-05, "loss": 0.4547, "step": 15246 }, { "epoch": 0.28308016767970057, "grad_norm": 0.4820225238800049, "learning_rate": 1.629930596098244e-05, "loss": 0.4175, "step": 15248 }, { "epoch": 0.2831172978171192, "grad_norm": 0.39568570256233215, "learning_rate": 1.6298399961485364e-05, "loss": 0.3123, "step": 15250 }, { "epoch": 0.2831544279545379, "grad_norm": 0.3059687316417694, "learning_rate": 1.6297493876285933e-05, "loss": 0.2979, "step": 15252 }, { "epoch": 0.2831915580919565, "grad_norm": 0.4182986617088318, "learning_rate": 1.6296587705396476e-05, "loss": 0.1913, "step": 15254 }, { "epoch": 0.28322868822937514, "grad_norm": 0.2948521077632904, "learning_rate": 1.629568144882932e-05, "loss": 0.5376, "step": 15256 }, { "epoch": 0.28326581836679376, "grad_norm": 0.32156965136528015, "learning_rate": 1.6294775106596796e-05, "loss": 0.5023, "step": 15258 }, { "epoch": 0.2833029485042124, "grad_norm": 0.2610750198364258, "learning_rate": 1.6293868678711237e-05, "loss": 0.3343, "step": 15260 }, { "epoch": 0.2833400786416311, "grad_norm": 0.6007379293441772, "learning_rate": 1.629296216518498e-05, "loss": 0.3914, "step": 15262 }, { "epoch": 0.2833772087790497, "grad_norm": 0.30240124464035034, "learning_rate": 1.6292055566030356e-05, "loss": 0.2935, "step": 15264 }, { "epoch": 0.28341433891646833, "grad_norm": 0.38106393814086914, "learning_rate": 1.6291148881259704e-05, "loss": 0.366, "step": 15266 }, { "epoch": 0.28345146905388696, "grad_norm": 0.4438855051994324, "learning_rate": 1.6290242110885358e-05, "loss": 0.3173, "step": 15268 }, { "epoch": 0.2834885991913056, "grad_norm": 0.46781694889068604, "learning_rate": 1.628933525491966e-05, "loss": 0.3647, "step": 15270 }, { "epoch": 0.2835257293287242, "grad_norm": 0.4572279155254364, "learning_rate": 1.6288428313374948e-05, "loss": 0.4107, "step": 15272 }, { "epoch": 0.2835628594661429, "grad_norm": 0.366137832403183, "learning_rate": 1.6287521286263563e-05, "loss": 0.3605, "step": 15274 }, { "epoch": 0.28359998960356153, "grad_norm": 0.2983122766017914, "learning_rate": 1.6286614173597843e-05, "loss": 0.5297, "step": 15276 }, { "epoch": 0.28363711974098016, "grad_norm": 0.2641507387161255, "learning_rate": 1.628570697539014e-05, "loss": 0.2767, "step": 15278 }, { "epoch": 0.2836742498783988, "grad_norm": 0.5122215151786804, "learning_rate": 1.6284799691652787e-05, "loss": 0.2294, "step": 15280 }, { "epoch": 0.2837113800158174, "grad_norm": 0.3430140018463135, "learning_rate": 1.628389232239814e-05, "loss": 0.1308, "step": 15282 }, { "epoch": 0.2837485101532361, "grad_norm": 0.3650031089782715, "learning_rate": 1.6282984867638538e-05, "loss": 0.2422, "step": 15284 }, { "epoch": 0.28378564029065473, "grad_norm": 0.45217829942703247, "learning_rate": 1.6282077327386337e-05, "loss": 0.3221, "step": 15286 }, { "epoch": 0.28382277042807336, "grad_norm": 0.32803067564964294, "learning_rate": 1.6281169701653875e-05, "loss": 0.2795, "step": 15288 }, { "epoch": 0.283859900565492, "grad_norm": 0.47347021102905273, "learning_rate": 1.628026199045351e-05, "loss": 0.3021, "step": 15290 }, { "epoch": 0.2838970307029106, "grad_norm": 0.46110835671424866, "learning_rate": 1.627935419379759e-05, "loss": 0.3758, "step": 15292 }, { "epoch": 0.2839341608403293, "grad_norm": 0.24924182891845703, "learning_rate": 1.6278446311698467e-05, "loss": 0.3331, "step": 15294 }, { "epoch": 0.2839712909777479, "grad_norm": 0.40695130825042725, "learning_rate": 1.62775383441685e-05, "loss": 0.4265, "step": 15296 }, { "epoch": 0.28400842111516655, "grad_norm": 0.3572111129760742, "learning_rate": 1.6276630291220035e-05, "loss": 0.3569, "step": 15298 }, { "epoch": 0.2840455512525852, "grad_norm": 0.44562914967536926, "learning_rate": 1.6275722152865436e-05, "loss": 0.4146, "step": 15300 }, { "epoch": 0.2840826813900038, "grad_norm": 0.5651805996894836, "learning_rate": 1.6274813929117054e-05, "loss": 0.3408, "step": 15302 }, { "epoch": 0.28411981152742244, "grad_norm": 0.38320788741111755, "learning_rate": 1.627390561998725e-05, "loss": 0.4149, "step": 15304 }, { "epoch": 0.2841569416648411, "grad_norm": 0.4090460538864136, "learning_rate": 1.6272997225488383e-05, "loss": 0.2655, "step": 15306 }, { "epoch": 0.28419407180225975, "grad_norm": 0.4915003776550293, "learning_rate": 1.6272088745632814e-05, "loss": 0.282, "step": 15308 }, { "epoch": 0.2842312019396784, "grad_norm": 0.3459673225879669, "learning_rate": 1.6271180180432905e-05, "loss": 0.2553, "step": 15310 }, { "epoch": 0.284268332077097, "grad_norm": 0.36616796255111694, "learning_rate": 1.627027152990102e-05, "loss": 0.4398, "step": 15312 }, { "epoch": 0.28430546221451564, "grad_norm": 0.43059292435646057, "learning_rate": 1.626936279404952e-05, "loss": 0.2593, "step": 15314 }, { "epoch": 0.2843425923519343, "grad_norm": 0.40349170565605164, "learning_rate": 1.6268453972890764e-05, "loss": 0.4947, "step": 15316 }, { "epoch": 0.28437972248935295, "grad_norm": 0.3066299259662628, "learning_rate": 1.6267545066437134e-05, "loss": 0.2513, "step": 15318 }, { "epoch": 0.2844168526267716, "grad_norm": 0.5004720091819763, "learning_rate": 1.6266636074700987e-05, "loss": 0.143, "step": 15320 }, { "epoch": 0.2844539827641902, "grad_norm": 0.26795145869255066, "learning_rate": 1.626572699769469e-05, "loss": 0.3567, "step": 15322 }, { "epoch": 0.28449111290160883, "grad_norm": 0.35458195209503174, "learning_rate": 1.6264817835430622e-05, "loss": 0.2723, "step": 15324 }, { "epoch": 0.28452824303902746, "grad_norm": 0.33366137742996216, "learning_rate": 1.6263908587921148e-05, "loss": 0.2204, "step": 15326 }, { "epoch": 0.28456537317644615, "grad_norm": 0.33586347103118896, "learning_rate": 1.6262999255178637e-05, "loss": 0.295, "step": 15328 }, { "epoch": 0.2846025033138648, "grad_norm": 0.2718474864959717, "learning_rate": 1.626208983721547e-05, "loss": 0.2439, "step": 15330 }, { "epoch": 0.2846396334512834, "grad_norm": 0.33899179100990295, "learning_rate": 1.6261180334044016e-05, "loss": 0.2297, "step": 15332 }, { "epoch": 0.28467676358870203, "grad_norm": 0.24895575642585754, "learning_rate": 1.626027074567665e-05, "loss": 0.3484, "step": 15334 }, { "epoch": 0.28471389372612066, "grad_norm": 0.3588293194770813, "learning_rate": 1.6259361072125756e-05, "loss": 0.343, "step": 15336 }, { "epoch": 0.28475102386353934, "grad_norm": 0.2956729233264923, "learning_rate": 1.62584513134037e-05, "loss": 0.3685, "step": 15338 }, { "epoch": 0.28478815400095797, "grad_norm": 0.44835031032562256, "learning_rate": 1.6257541469522872e-05, "loss": 0.4142, "step": 15340 }, { "epoch": 0.2848252841383766, "grad_norm": 0.4319595992565155, "learning_rate": 1.6256631540495648e-05, "loss": 0.3616, "step": 15342 }, { "epoch": 0.28486241427579523, "grad_norm": 0.4029219150543213, "learning_rate": 1.6255721526334407e-05, "loss": 0.4293, "step": 15344 }, { "epoch": 0.28489954441321386, "grad_norm": 0.38367873430252075, "learning_rate": 1.6254811427051538e-05, "loss": 0.3847, "step": 15346 }, { "epoch": 0.2849366745506325, "grad_norm": 0.38078564405441284, "learning_rate": 1.625390124265942e-05, "loss": 0.2318, "step": 15348 }, { "epoch": 0.28497380468805117, "grad_norm": 0.4418644607067108, "learning_rate": 1.6252990973170435e-05, "loss": 0.2504, "step": 15350 }, { "epoch": 0.2850109348254698, "grad_norm": 0.3804689943790436, "learning_rate": 1.6252080618596976e-05, "loss": 0.3188, "step": 15352 }, { "epoch": 0.2850480649628884, "grad_norm": 0.37130653858184814, "learning_rate": 1.6251170178951423e-05, "loss": 0.321, "step": 15354 }, { "epoch": 0.28508519510030705, "grad_norm": 0.41873249411582947, "learning_rate": 1.6250259654246172e-05, "loss": 0.4018, "step": 15356 }, { "epoch": 0.2851223252377257, "grad_norm": 0.33758434653282166, "learning_rate": 1.624934904449361e-05, "loss": 0.1856, "step": 15358 }, { "epoch": 0.28515945537514437, "grad_norm": 0.517618715763092, "learning_rate": 1.624843834970612e-05, "loss": 0.2499, "step": 15360 }, { "epoch": 0.285196585512563, "grad_norm": 0.4106998145580292, "learning_rate": 1.624752756989611e-05, "loss": 0.3723, "step": 15362 }, { "epoch": 0.2852337156499816, "grad_norm": 0.5599865317344666, "learning_rate": 1.6246616705075956e-05, "loss": 0.3468, "step": 15364 }, { "epoch": 0.28527084578740025, "grad_norm": 0.49578964710235596, "learning_rate": 1.6245705755258062e-05, "loss": 0.164, "step": 15366 }, { "epoch": 0.2853079759248189, "grad_norm": 0.6233541369438171, "learning_rate": 1.624479472045482e-05, "loss": 0.314, "step": 15368 }, { "epoch": 0.28534510606223756, "grad_norm": 0.544994592666626, "learning_rate": 1.6243883600678628e-05, "loss": 0.3274, "step": 15370 }, { "epoch": 0.2853822361996562, "grad_norm": 0.4158276319503784, "learning_rate": 1.6242972395941882e-05, "loss": 0.2367, "step": 15372 }, { "epoch": 0.2854193663370748, "grad_norm": 0.4828920066356659, "learning_rate": 1.6242061106256985e-05, "loss": 0.2873, "step": 15374 }, { "epoch": 0.28545649647449345, "grad_norm": 0.43755224347114563, "learning_rate": 1.6241149731636326e-05, "loss": 0.2263, "step": 15376 }, { "epoch": 0.2854936266119121, "grad_norm": 0.4212903380393982, "learning_rate": 1.624023827209232e-05, "loss": 0.4343, "step": 15378 }, { "epoch": 0.2855307567493307, "grad_norm": 0.4520227313041687, "learning_rate": 1.623932672763736e-05, "loss": 0.2744, "step": 15380 }, { "epoch": 0.2855678868867494, "grad_norm": 0.35532331466674805, "learning_rate": 1.6238415098283853e-05, "loss": 0.1519, "step": 15382 }, { "epoch": 0.285605017024168, "grad_norm": 0.337658554315567, "learning_rate": 1.6237503384044203e-05, "loss": 0.3772, "step": 15384 }, { "epoch": 0.28564214716158665, "grad_norm": 0.3435303866863251, "learning_rate": 1.6236591584930814e-05, "loss": 0.2648, "step": 15386 }, { "epoch": 0.2856792772990053, "grad_norm": 0.30965641140937805, "learning_rate": 1.62356797009561e-05, "loss": 0.1044, "step": 15388 }, { "epoch": 0.2857164074364239, "grad_norm": 0.3630635738372803, "learning_rate": 1.6234767732132456e-05, "loss": 0.2765, "step": 15390 }, { "epoch": 0.2857535375738426, "grad_norm": 0.3090052902698517, "learning_rate": 1.62338556784723e-05, "loss": 0.184, "step": 15392 }, { "epoch": 0.2857906677112612, "grad_norm": 0.3754655420780182, "learning_rate": 1.6232943539988043e-05, "loss": 0.2981, "step": 15394 }, { "epoch": 0.28582779784867984, "grad_norm": 0.30027732253074646, "learning_rate": 1.623203131669209e-05, "loss": 0.219, "step": 15396 }, { "epoch": 0.28586492798609847, "grad_norm": 0.3603162467479706, "learning_rate": 1.6231119008596862e-05, "loss": 0.3673, "step": 15398 }, { "epoch": 0.2859020581235171, "grad_norm": 0.35720306634902954, "learning_rate": 1.6230206615714767e-05, "loss": 0.2093, "step": 15400 }, { "epoch": 0.28593918826093573, "grad_norm": 0.26947757601737976, "learning_rate": 1.6229294138058223e-05, "loss": 0.2292, "step": 15402 }, { "epoch": 0.2859763183983544, "grad_norm": 0.5669321417808533, "learning_rate": 1.6228381575639644e-05, "loss": 0.2901, "step": 15404 }, { "epoch": 0.28601344853577304, "grad_norm": 0.5515179634094238, "learning_rate": 1.622746892847145e-05, "loss": 0.2241, "step": 15406 }, { "epoch": 0.28605057867319167, "grad_norm": 0.3312181830406189, "learning_rate": 1.622655619656605e-05, "loss": 0.4145, "step": 15408 }, { "epoch": 0.2860877088106103, "grad_norm": 0.3909747898578644, "learning_rate": 1.622564337993588e-05, "loss": 0.4107, "step": 15410 }, { "epoch": 0.2861248389480289, "grad_norm": 0.4423218369483948, "learning_rate": 1.6224730478593347e-05, "loss": 0.4942, "step": 15412 }, { "epoch": 0.2861619690854476, "grad_norm": 0.4754064381122589, "learning_rate": 1.6223817492550877e-05, "loss": 0.3382, "step": 15414 }, { "epoch": 0.28619909922286624, "grad_norm": 0.37255749106407166, "learning_rate": 1.6222904421820894e-05, "loss": 0.4644, "step": 15416 }, { "epoch": 0.28623622936028487, "grad_norm": 0.3786499798297882, "learning_rate": 1.6221991266415823e-05, "loss": 0.4033, "step": 15418 }, { "epoch": 0.2862733594977035, "grad_norm": 0.36177828907966614, "learning_rate": 1.6221078026348087e-05, "loss": 0.3115, "step": 15420 }, { "epoch": 0.2863104896351221, "grad_norm": 0.4462338089942932, "learning_rate": 1.6220164701630112e-05, "loss": 0.1892, "step": 15422 }, { "epoch": 0.28634761977254075, "grad_norm": 0.3418540060520172, "learning_rate": 1.621925129227433e-05, "loss": 0.1989, "step": 15424 }, { "epoch": 0.28638474990995944, "grad_norm": 0.5730937123298645, "learning_rate": 1.6218337798293165e-05, "loss": 0.3572, "step": 15426 }, { "epoch": 0.28642188004737806, "grad_norm": 0.44200706481933594, "learning_rate": 1.6217424219699046e-05, "loss": 0.2889, "step": 15428 }, { "epoch": 0.2864590101847967, "grad_norm": 0.46696531772613525, "learning_rate": 1.621651055650441e-05, "loss": 0.3177, "step": 15430 }, { "epoch": 0.2864961403222153, "grad_norm": 0.4467834234237671, "learning_rate": 1.6215596808721684e-05, "loss": 0.3039, "step": 15432 }, { "epoch": 0.28653327045963395, "grad_norm": 0.2603677809238434, "learning_rate": 1.6214682976363304e-05, "loss": 0.3223, "step": 15434 }, { "epoch": 0.28657040059705263, "grad_norm": 0.3614133596420288, "learning_rate": 1.6213769059441705e-05, "loss": 0.1018, "step": 15436 }, { "epoch": 0.28660753073447126, "grad_norm": 0.3147931396961212, "learning_rate": 1.6212855057969324e-05, "loss": 0.4178, "step": 15438 }, { "epoch": 0.2866446608718899, "grad_norm": 0.422879695892334, "learning_rate": 1.621194097195859e-05, "loss": 0.2894, "step": 15440 }, { "epoch": 0.2866817910093085, "grad_norm": 0.4944377839565277, "learning_rate": 1.6211026801421947e-05, "loss": 0.2128, "step": 15442 }, { "epoch": 0.28671892114672715, "grad_norm": 0.34172323346138, "learning_rate": 1.6210112546371835e-05, "loss": 0.2334, "step": 15444 }, { "epoch": 0.28675605128414583, "grad_norm": 0.3031104803085327, "learning_rate": 1.6209198206820694e-05, "loss": 0.1632, "step": 15446 }, { "epoch": 0.28679318142156446, "grad_norm": 0.42268919944763184, "learning_rate": 1.6208283782780964e-05, "loss": 0.2931, "step": 15448 }, { "epoch": 0.2868303115589831, "grad_norm": 0.5924049019813538, "learning_rate": 1.6207369274265086e-05, "loss": 0.3113, "step": 15450 }, { "epoch": 0.2868674416964017, "grad_norm": 0.35454505681991577, "learning_rate": 1.6206454681285505e-05, "loss": 0.3643, "step": 15452 }, { "epoch": 0.28690457183382034, "grad_norm": 0.37803372740745544, "learning_rate": 1.6205540003854666e-05, "loss": 0.23, "step": 15454 }, { "epoch": 0.28694170197123897, "grad_norm": 0.44532960653305054, "learning_rate": 1.6204625241985017e-05, "loss": 0.3562, "step": 15456 }, { "epoch": 0.28697883210865766, "grad_norm": 0.4188586473464966, "learning_rate": 1.6203710395689005e-05, "loss": 0.3087, "step": 15458 }, { "epoch": 0.2870159622460763, "grad_norm": 0.3082582652568817, "learning_rate": 1.620279546497907e-05, "loss": 0.2921, "step": 15460 }, { "epoch": 0.2870530923834949, "grad_norm": 0.200359508395195, "learning_rate": 1.6201880449867676e-05, "loss": 0.2381, "step": 15462 }, { "epoch": 0.28709022252091354, "grad_norm": 0.2680836319923401, "learning_rate": 1.620096535036726e-05, "loss": 0.2656, "step": 15464 }, { "epoch": 0.28712735265833217, "grad_norm": 0.43087753653526306, "learning_rate": 1.6200050166490283e-05, "loss": 0.197, "step": 15466 }, { "epoch": 0.28716448279575085, "grad_norm": 0.49592265486717224, "learning_rate": 1.6199134898249193e-05, "loss": 0.1827, "step": 15468 }, { "epoch": 0.2872016129331695, "grad_norm": 0.4730733036994934, "learning_rate": 1.6198219545656448e-05, "loss": 0.3691, "step": 15470 }, { "epoch": 0.2872387430705881, "grad_norm": 0.37694552540779114, "learning_rate": 1.6197304108724498e-05, "loss": 0.3427, "step": 15472 }, { "epoch": 0.28727587320800674, "grad_norm": 0.4769977033138275, "learning_rate": 1.6196388587465803e-05, "loss": 0.2563, "step": 15474 }, { "epoch": 0.28731300334542537, "grad_norm": 0.3011767566204071, "learning_rate": 1.619547298189282e-05, "loss": 0.3026, "step": 15476 }, { "epoch": 0.287350133482844, "grad_norm": 0.31554466485977173, "learning_rate": 1.619455729201801e-05, "loss": 0.3056, "step": 15478 }, { "epoch": 0.2873872636202627, "grad_norm": 0.2843112647533417, "learning_rate": 1.6193641517853825e-05, "loss": 0.2372, "step": 15480 }, { "epoch": 0.2874243937576813, "grad_norm": 0.35560521483421326, "learning_rate": 1.6192725659412734e-05, "loss": 0.6194, "step": 15482 }, { "epoch": 0.28746152389509994, "grad_norm": 0.306471586227417, "learning_rate": 1.6191809716707195e-05, "loss": 0.2463, "step": 15484 }, { "epoch": 0.28749865403251856, "grad_norm": 0.24490579962730408, "learning_rate": 1.6190893689749675e-05, "loss": 0.3032, "step": 15486 }, { "epoch": 0.2875357841699372, "grad_norm": 0.3124704658985138, "learning_rate": 1.618997757855263e-05, "loss": 0.3571, "step": 15488 }, { "epoch": 0.2875729143073559, "grad_norm": 0.34375661611557007, "learning_rate": 1.6189061383128537e-05, "loss": 0.2745, "step": 15490 }, { "epoch": 0.2876100444447745, "grad_norm": 0.34555792808532715, "learning_rate": 1.6188145103489853e-05, "loss": 0.2492, "step": 15492 }, { "epoch": 0.28764717458219313, "grad_norm": 0.36836835741996765, "learning_rate": 1.6187228739649054e-05, "loss": 0.3939, "step": 15494 }, { "epoch": 0.28768430471961176, "grad_norm": 0.36780625581741333, "learning_rate": 1.61863122916186e-05, "loss": 0.1897, "step": 15496 }, { "epoch": 0.2877214348570304, "grad_norm": 0.41419675946235657, "learning_rate": 1.618539575941097e-05, "loss": 0.2899, "step": 15498 }, { "epoch": 0.287758564994449, "grad_norm": 0.3153773248195648, "learning_rate": 1.6184479143038633e-05, "loss": 0.3534, "step": 15500 }, { "epoch": 0.2877956951318677, "grad_norm": 0.3480892777442932, "learning_rate": 1.6183562442514055e-05, "loss": 0.2988, "step": 15502 }, { "epoch": 0.28783282526928633, "grad_norm": 0.31886228919029236, "learning_rate": 1.6182645657849714e-05, "loss": 0.3502, "step": 15504 }, { "epoch": 0.28786995540670496, "grad_norm": 0.4016492962837219, "learning_rate": 1.6181728789058087e-05, "loss": 0.3568, "step": 15506 }, { "epoch": 0.2879070855441236, "grad_norm": 0.49086523056030273, "learning_rate": 1.6180811836151643e-05, "loss": 0.1567, "step": 15508 }, { "epoch": 0.2879442156815422, "grad_norm": 0.41445788741111755, "learning_rate": 1.6179894799142868e-05, "loss": 0.3235, "step": 15510 }, { "epoch": 0.2879813458189609, "grad_norm": 0.25982508063316345, "learning_rate": 1.6178977678044236e-05, "loss": 0.2999, "step": 15512 }, { "epoch": 0.2880184759563795, "grad_norm": 0.2904912829399109, "learning_rate": 1.6178060472868222e-05, "loss": 0.2873, "step": 15514 }, { "epoch": 0.28805560609379816, "grad_norm": 0.2471446692943573, "learning_rate": 1.6177143183627313e-05, "loss": 0.3689, "step": 15516 }, { "epoch": 0.2880927362312168, "grad_norm": 0.23567642271518707, "learning_rate": 1.617622581033399e-05, "loss": 0.4435, "step": 15518 }, { "epoch": 0.2881298663686354, "grad_norm": 0.285057008266449, "learning_rate": 1.6175308353000733e-05, "loss": 0.1573, "step": 15520 }, { "epoch": 0.2881669965060541, "grad_norm": 0.43118301033973694, "learning_rate": 1.6174390811640023e-05, "loss": 0.4376, "step": 15522 }, { "epoch": 0.2882041266434727, "grad_norm": 0.3691777288913727, "learning_rate": 1.6173473186264347e-05, "loss": 0.372, "step": 15524 }, { "epoch": 0.28824125678089135, "grad_norm": 0.4879799485206604, "learning_rate": 1.61725554768862e-05, "loss": 0.4235, "step": 15526 }, { "epoch": 0.28827838691831, "grad_norm": 0.21953199803829193, "learning_rate": 1.617163768351806e-05, "loss": 0.3363, "step": 15528 }, { "epoch": 0.2883155170557286, "grad_norm": 0.3726438879966736, "learning_rate": 1.6170719806172413e-05, "loss": 0.562, "step": 15530 }, { "epoch": 0.28835264719314724, "grad_norm": 0.39426764845848083, "learning_rate": 1.6169801844861756e-05, "loss": 0.2275, "step": 15532 }, { "epoch": 0.2883897773305659, "grad_norm": 0.5270881652832031, "learning_rate": 1.6168883799598578e-05, "loss": 0.4719, "step": 15534 }, { "epoch": 0.28842690746798455, "grad_norm": 0.5495846271514893, "learning_rate": 1.6167965670395366e-05, "loss": 0.177, "step": 15536 }, { "epoch": 0.2884640376054032, "grad_norm": 0.3154410719871521, "learning_rate": 1.616704745726462e-05, "loss": 0.2957, "step": 15538 }, { "epoch": 0.2885011677428218, "grad_norm": 0.36117202043533325, "learning_rate": 1.6166129160218823e-05, "loss": 0.2326, "step": 15540 }, { "epoch": 0.28853829788024044, "grad_norm": 0.7620652914047241, "learning_rate": 1.6165210779270487e-05, "loss": 0.3344, "step": 15542 }, { "epoch": 0.2885754280176591, "grad_norm": 0.23021607100963593, "learning_rate": 1.6164292314432096e-05, "loss": 0.0988, "step": 15544 }, { "epoch": 0.28861255815507775, "grad_norm": 0.40819084644317627, "learning_rate": 1.616337376571615e-05, "loss": 0.1714, "step": 15546 }, { "epoch": 0.2886496882924964, "grad_norm": 0.3554391860961914, "learning_rate": 1.6162455133135148e-05, "loss": 0.3991, "step": 15548 }, { "epoch": 0.288686818429915, "grad_norm": 0.24647042155265808, "learning_rate": 1.616153641670159e-05, "loss": 0.3449, "step": 15550 }, { "epoch": 0.28872394856733363, "grad_norm": 0.797376811504364, "learning_rate": 1.616061761642798e-05, "loss": 0.3413, "step": 15552 }, { "epoch": 0.28876107870475226, "grad_norm": 0.4246409237384796, "learning_rate": 1.6159698732326815e-05, "loss": 0.3481, "step": 15554 }, { "epoch": 0.28879820884217094, "grad_norm": 0.3307946026325226, "learning_rate": 1.61587797644106e-05, "loss": 0.3762, "step": 15556 }, { "epoch": 0.2888353389795896, "grad_norm": 0.32959240674972534, "learning_rate": 1.6157860712691845e-05, "loss": 0.4313, "step": 15558 }, { "epoch": 0.2888724691170082, "grad_norm": 0.45044249296188354, "learning_rate": 1.6156941577183043e-05, "loss": 0.2394, "step": 15560 }, { "epoch": 0.28890959925442683, "grad_norm": 0.3602319061756134, "learning_rate": 1.6156022357896715e-05, "loss": 0.2119, "step": 15562 }, { "epoch": 0.28894672939184546, "grad_norm": 0.30920371413230896, "learning_rate": 1.6155103054845362e-05, "loss": 0.1847, "step": 15564 }, { "epoch": 0.28898385952926414, "grad_norm": 0.39376068115234375, "learning_rate": 1.6154183668041484e-05, "loss": 0.2678, "step": 15566 }, { "epoch": 0.28902098966668277, "grad_norm": 0.3980129361152649, "learning_rate": 1.615326419749761e-05, "loss": 0.2494, "step": 15568 }, { "epoch": 0.2890581198041014, "grad_norm": 0.3452844023704529, "learning_rate": 1.6152344643226237e-05, "loss": 0.0795, "step": 15570 }, { "epoch": 0.28909524994152, "grad_norm": 0.45482882857322693, "learning_rate": 1.6151425005239886e-05, "loss": 0.2772, "step": 15572 }, { "epoch": 0.28913238007893866, "grad_norm": 0.336314857006073, "learning_rate": 1.615050528355106e-05, "loss": 0.2172, "step": 15574 }, { "epoch": 0.2891695102163573, "grad_norm": 0.3411087691783905, "learning_rate": 1.614958547817229e-05, "loss": 0.3079, "step": 15576 }, { "epoch": 0.28920664035377597, "grad_norm": 0.2088017612695694, "learning_rate": 1.614866558911607e-05, "loss": 0.4252, "step": 15578 }, { "epoch": 0.2892437704911946, "grad_norm": 0.4121551215648651, "learning_rate": 1.6147745616394933e-05, "loss": 0.2619, "step": 15580 }, { "epoch": 0.2892809006286132, "grad_norm": 0.42888790369033813, "learning_rate": 1.6146825560021397e-05, "loss": 0.2323, "step": 15582 }, { "epoch": 0.28931803076603185, "grad_norm": 0.4210417866706848, "learning_rate": 1.6145905420007973e-05, "loss": 0.3908, "step": 15584 }, { "epoch": 0.2893551609034505, "grad_norm": 0.34588950872421265, "learning_rate": 1.6144985196367185e-05, "loss": 0.3081, "step": 15586 }, { "epoch": 0.28939229104086917, "grad_norm": 0.37002184987068176, "learning_rate": 1.6144064889111557e-05, "loss": 0.2208, "step": 15588 }, { "epoch": 0.2894294211782878, "grad_norm": 0.33495160937309265, "learning_rate": 1.6143144498253608e-05, "loss": 0.0921, "step": 15590 }, { "epoch": 0.2894665513157064, "grad_norm": 0.400707870721817, "learning_rate": 1.6142224023805863e-05, "loss": 0.3751, "step": 15592 }, { "epoch": 0.28950368145312505, "grad_norm": 0.43633151054382324, "learning_rate": 1.614130346578085e-05, "loss": 0.3835, "step": 15594 }, { "epoch": 0.2895408115905437, "grad_norm": 0.5015726089477539, "learning_rate": 1.614038282419109e-05, "loss": 0.4262, "step": 15596 }, { "epoch": 0.28957794172796236, "grad_norm": 0.45344147086143494, "learning_rate": 1.6139462099049112e-05, "loss": 0.3551, "step": 15598 }, { "epoch": 0.289615071865381, "grad_norm": 0.3031162619590759, "learning_rate": 1.6138541290367445e-05, "loss": 0.4266, "step": 15600 }, { "epoch": 0.2896522020027996, "grad_norm": 0.3173650801181793, "learning_rate": 1.613762039815862e-05, "loss": 0.3103, "step": 15602 }, { "epoch": 0.28968933214021825, "grad_norm": 0.2842457592487335, "learning_rate": 1.613669942243516e-05, "loss": 0.1897, "step": 15604 }, { "epoch": 0.2897264622776369, "grad_norm": 2.010040760040283, "learning_rate": 1.6135778363209607e-05, "loss": 0.2242, "step": 15606 }, { "epoch": 0.2897635924150555, "grad_norm": 0.4785624146461487, "learning_rate": 1.613485722049449e-05, "loss": 0.2185, "step": 15608 }, { "epoch": 0.2898007225524742, "grad_norm": 0.36233460903167725, "learning_rate": 1.6133935994302338e-05, "loss": 0.2856, "step": 15610 }, { "epoch": 0.2898378526898928, "grad_norm": 0.385520339012146, "learning_rate": 1.6133014684645694e-05, "loss": 0.4, "step": 15612 }, { "epoch": 0.28987498282731144, "grad_norm": 0.25901177525520325, "learning_rate": 1.613209329153709e-05, "loss": 0.2829, "step": 15614 }, { "epoch": 0.2899121129647301, "grad_norm": 0.4623892307281494, "learning_rate": 1.6131171814989064e-05, "loss": 0.4186, "step": 15616 }, { "epoch": 0.2899492431021487, "grad_norm": 0.2792411148548126, "learning_rate": 1.6130250255014153e-05, "loss": 0.3546, "step": 15618 }, { "epoch": 0.2899863732395674, "grad_norm": 0.3469361662864685, "learning_rate": 1.61293286116249e-05, "loss": 0.194, "step": 15620 }, { "epoch": 0.290023503376986, "grad_norm": 0.4245806932449341, "learning_rate": 1.6128406884833845e-05, "loss": 0.332, "step": 15622 }, { "epoch": 0.29006063351440464, "grad_norm": 0.29889777302742004, "learning_rate": 1.6127485074653527e-05, "loss": 0.3547, "step": 15624 }, { "epoch": 0.29009776365182327, "grad_norm": 0.36889442801475525, "learning_rate": 1.612656318109649e-05, "loss": 0.2949, "step": 15626 }, { "epoch": 0.2901348937892419, "grad_norm": 0.49445563554763794, "learning_rate": 1.6125641204175282e-05, "loss": 0.3758, "step": 15628 }, { "epoch": 0.2901720239266605, "grad_norm": 0.38085517287254333, "learning_rate": 1.6124719143902445e-05, "loss": 0.5218, "step": 15630 }, { "epoch": 0.2902091540640792, "grad_norm": 0.34607717394828796, "learning_rate": 1.6123797000290524e-05, "loss": 0.3529, "step": 15632 }, { "epoch": 0.29024628420149784, "grad_norm": 0.36897605657577515, "learning_rate": 1.6122874773352072e-05, "loss": 0.3134, "step": 15634 }, { "epoch": 0.29028341433891647, "grad_norm": 0.2585378885269165, "learning_rate": 1.612195246309963e-05, "loss": 0.3037, "step": 15636 }, { "epoch": 0.2903205444763351, "grad_norm": 0.45234742760658264, "learning_rate": 1.6121030069545758e-05, "loss": 0.2496, "step": 15638 }, { "epoch": 0.2903576746137537, "grad_norm": 0.37002167105674744, "learning_rate": 1.6120107592702998e-05, "loss": 0.3083, "step": 15640 }, { "epoch": 0.2903948047511724, "grad_norm": 0.6520326733589172, "learning_rate": 1.611918503258391e-05, "loss": 0.2581, "step": 15642 }, { "epoch": 0.29043193488859104, "grad_norm": 0.3902058005332947, "learning_rate": 1.6118262389201035e-05, "loss": 0.0838, "step": 15644 }, { "epoch": 0.29046906502600967, "grad_norm": 0.48861244320869446, "learning_rate": 1.611733966256694e-05, "loss": 0.3132, "step": 15646 }, { "epoch": 0.2905061951634283, "grad_norm": 0.37170565128326416, "learning_rate": 1.6116416852694174e-05, "loss": 0.187, "step": 15648 }, { "epoch": 0.2905433253008469, "grad_norm": 0.5379403829574585, "learning_rate": 1.6115493959595297e-05, "loss": 0.4199, "step": 15650 }, { "epoch": 0.29058045543826555, "grad_norm": 0.4162154197692871, "learning_rate": 1.6114570983282863e-05, "loss": 0.25, "step": 15652 }, { "epoch": 0.29061758557568423, "grad_norm": 0.3742355406284332, "learning_rate": 1.611364792376944e-05, "loss": 0.3921, "step": 15654 }, { "epoch": 0.29065471571310286, "grad_norm": 0.46915262937545776, "learning_rate": 1.611272478106757e-05, "loss": 0.3619, "step": 15656 }, { "epoch": 0.2906918458505215, "grad_norm": 0.5767585039138794, "learning_rate": 1.6111801555189832e-05, "loss": 0.3113, "step": 15658 }, { "epoch": 0.2907289759879401, "grad_norm": 0.3501065969467163, "learning_rate": 1.6110878246148782e-05, "loss": 0.2739, "step": 15660 }, { "epoch": 0.29076610612535875, "grad_norm": 0.4263308644294739, "learning_rate": 1.6109954853956985e-05, "loss": 0.3078, "step": 15662 }, { "epoch": 0.29080323626277743, "grad_norm": 0.5071778893470764, "learning_rate": 1.6109031378627e-05, "loss": 0.2069, "step": 15664 }, { "epoch": 0.29084036640019606, "grad_norm": 0.4627779424190521, "learning_rate": 1.6108107820171396e-05, "loss": 0.459, "step": 15666 }, { "epoch": 0.2908774965376147, "grad_norm": 0.4224827289581299, "learning_rate": 1.6107184178602745e-05, "loss": 0.1784, "step": 15668 }, { "epoch": 0.2909146266750333, "grad_norm": 0.3111225664615631, "learning_rate": 1.6106260453933606e-05, "loss": 0.4632, "step": 15670 }, { "epoch": 0.29095175681245194, "grad_norm": 0.33872827887535095, "learning_rate": 1.6105336646176555e-05, "loss": 0.2122, "step": 15672 }, { "epoch": 0.29098888694987063, "grad_norm": 0.34843072295188904, "learning_rate": 1.6104412755344162e-05, "loss": 0.2098, "step": 15674 }, { "epoch": 0.29102601708728926, "grad_norm": 0.8168874382972717, "learning_rate": 1.6103488781448992e-05, "loss": 0.2519, "step": 15676 }, { "epoch": 0.2910631472247079, "grad_norm": 0.3109864592552185, "learning_rate": 1.6102564724503623e-05, "loss": 0.3273, "step": 15678 }, { "epoch": 0.2911002773621265, "grad_norm": 0.4328574240207672, "learning_rate": 1.6101640584520626e-05, "loss": 0.2864, "step": 15680 }, { "epoch": 0.29113740749954514, "grad_norm": 0.352992445230484, "learning_rate": 1.610071636151258e-05, "loss": 0.2614, "step": 15682 }, { "epoch": 0.29117453763696377, "grad_norm": 0.7581146359443665, "learning_rate": 1.609979205549206e-05, "loss": 0.3957, "step": 15684 }, { "epoch": 0.29121166777438245, "grad_norm": 0.36366379261016846, "learning_rate": 1.6098867666471634e-05, "loss": 0.3672, "step": 15686 }, { "epoch": 0.2912487979118011, "grad_norm": 0.4659481942653656, "learning_rate": 1.6097943194463892e-05, "loss": 0.3018, "step": 15688 }, { "epoch": 0.2912859280492197, "grad_norm": 0.4258236885070801, "learning_rate": 1.609701863948141e-05, "loss": 0.439, "step": 15690 }, { "epoch": 0.29132305818663834, "grad_norm": 0.41472896933555603, "learning_rate": 1.6096094001536765e-05, "loss": 0.2013, "step": 15692 }, { "epoch": 0.29136018832405697, "grad_norm": 0.33130306005477905, "learning_rate": 1.6095169280642537e-05, "loss": 0.2435, "step": 15694 }, { "epoch": 0.29139731846147565, "grad_norm": 0.39732077717781067, "learning_rate": 1.609424447681132e-05, "loss": 0.2237, "step": 15696 }, { "epoch": 0.2914344485988943, "grad_norm": 0.3854469954967499, "learning_rate": 1.6093319590055686e-05, "loss": 0.2369, "step": 15698 }, { "epoch": 0.2914715787363129, "grad_norm": 0.24874833226203918, "learning_rate": 1.609239462038822e-05, "loss": 0.2831, "step": 15700 }, { "epoch": 0.29150870887373154, "grad_norm": 0.2649223208427429, "learning_rate": 1.6091469567821518e-05, "loss": 0.2141, "step": 15702 }, { "epoch": 0.29154583901115017, "grad_norm": 0.39283373951911926, "learning_rate": 1.609054443236816e-05, "loss": 0.4789, "step": 15704 }, { "epoch": 0.2915829691485688, "grad_norm": 0.38399532437324524, "learning_rate": 1.608961921404073e-05, "loss": 0.2576, "step": 15706 }, { "epoch": 0.2916200992859875, "grad_norm": 0.3801678717136383, "learning_rate": 1.608869391285183e-05, "loss": 0.3431, "step": 15708 }, { "epoch": 0.2916572294234061, "grad_norm": 0.5046018958091736, "learning_rate": 1.608776852881404e-05, "loss": 0.2838, "step": 15710 }, { "epoch": 0.29169435956082473, "grad_norm": 0.36326077580451965, "learning_rate": 1.608684306193996e-05, "loss": 0.3495, "step": 15712 }, { "epoch": 0.29173148969824336, "grad_norm": 0.4390855133533478, "learning_rate": 1.6085917512242173e-05, "loss": 0.3133, "step": 15714 }, { "epoch": 0.291768619835662, "grad_norm": 0.23382356762886047, "learning_rate": 1.608499187973328e-05, "loss": 0.3046, "step": 15716 }, { "epoch": 0.2918057499730807, "grad_norm": 0.3334776759147644, "learning_rate": 1.6084066164425874e-05, "loss": 0.2719, "step": 15718 }, { "epoch": 0.2918428801104993, "grad_norm": 0.2548871338367462, "learning_rate": 1.6083140366332552e-05, "loss": 0.2798, "step": 15720 }, { "epoch": 0.29188001024791793, "grad_norm": 0.42713379859924316, "learning_rate": 1.608221448546591e-05, "loss": 0.2003, "step": 15722 }, { "epoch": 0.29191714038533656, "grad_norm": 0.3050050139427185, "learning_rate": 1.6081288521838546e-05, "loss": 0.2153, "step": 15724 }, { "epoch": 0.2919542705227552, "grad_norm": 0.4136345088481903, "learning_rate": 1.608036247546306e-05, "loss": 0.3352, "step": 15726 }, { "epoch": 0.2919914006601738, "grad_norm": 0.3212653398513794, "learning_rate": 1.607943634635206e-05, "loss": 0.4001, "step": 15728 }, { "epoch": 0.2920285307975925, "grad_norm": 0.5238571166992188, "learning_rate": 1.6078510134518132e-05, "loss": 0.3439, "step": 15730 }, { "epoch": 0.29206566093501113, "grad_norm": 0.2564865052700043, "learning_rate": 1.6077583839973897e-05, "loss": 0.1781, "step": 15732 }, { "epoch": 0.29210279107242976, "grad_norm": 0.41845089197158813, "learning_rate": 1.6076657462731945e-05, "loss": 0.2731, "step": 15734 }, { "epoch": 0.2921399212098484, "grad_norm": 0.42702433466911316, "learning_rate": 1.607573100280489e-05, "loss": 0.4761, "step": 15736 }, { "epoch": 0.292177051347267, "grad_norm": 0.25510188937187195, "learning_rate": 1.6074804460205335e-05, "loss": 0.2431, "step": 15738 }, { "epoch": 0.2922141814846857, "grad_norm": 0.3392500877380371, "learning_rate": 1.6073877834945884e-05, "loss": 0.172, "step": 15740 }, { "epoch": 0.2922513116221043, "grad_norm": 0.30395883321762085, "learning_rate": 1.607295112703915e-05, "loss": 0.1531, "step": 15742 }, { "epoch": 0.29228844175952295, "grad_norm": 0.3760819435119629, "learning_rate": 1.607202433649774e-05, "loss": 0.3712, "step": 15744 }, { "epoch": 0.2923255718969416, "grad_norm": 0.3539535701274872, "learning_rate": 1.607109746333427e-05, "loss": 0.4087, "step": 15746 }, { "epoch": 0.2923627020343602, "grad_norm": 0.5151153206825256, "learning_rate": 1.6070170507561348e-05, "loss": 0.3473, "step": 15748 }, { "epoch": 0.2923998321717789, "grad_norm": 0.40087592601776123, "learning_rate": 1.6069243469191586e-05, "loss": 0.2614, "step": 15750 }, { "epoch": 0.2924369623091975, "grad_norm": 0.45849931240081787, "learning_rate": 1.6068316348237603e-05, "loss": 0.3205, "step": 15752 }, { "epoch": 0.29247409244661615, "grad_norm": 0.33599889278411865, "learning_rate": 1.6067389144712013e-05, "loss": 0.248, "step": 15754 }, { "epoch": 0.2925112225840348, "grad_norm": 0.25124552845954895, "learning_rate": 1.606646185862742e-05, "loss": 0.129, "step": 15756 }, { "epoch": 0.2925483527214534, "grad_norm": 0.3443413972854614, "learning_rate": 1.6065534489996465e-05, "loss": 0.2186, "step": 15758 }, { "epoch": 0.29258548285887204, "grad_norm": 0.4380885362625122, "learning_rate": 1.606460703883175e-05, "loss": 0.4973, "step": 15760 }, { "epoch": 0.2926226129962907, "grad_norm": 0.3385966122150421, "learning_rate": 1.6063679505145892e-05, "loss": 0.1657, "step": 15762 }, { "epoch": 0.29265974313370935, "grad_norm": 0.4208071529865265, "learning_rate": 1.6062751888951528e-05, "loss": 0.2736, "step": 15764 }, { "epoch": 0.292696873271128, "grad_norm": 0.38226985931396484, "learning_rate": 1.6061824190261264e-05, "loss": 0.4101, "step": 15766 }, { "epoch": 0.2927340034085466, "grad_norm": 0.45783621072769165, "learning_rate": 1.606089640908773e-05, "loss": 0.3603, "step": 15768 }, { "epoch": 0.29277113354596523, "grad_norm": 0.5467211008071899, "learning_rate": 1.6059968545443557e-05, "loss": 0.3058, "step": 15770 }, { "epoch": 0.2928082636833839, "grad_norm": 0.2641834616661072, "learning_rate": 1.6059040599341356e-05, "loss": 0.3768, "step": 15772 }, { "epoch": 0.29284539382080255, "grad_norm": 0.47348564863204956, "learning_rate": 1.6058112570793766e-05, "loss": 0.4562, "step": 15774 }, { "epoch": 0.2928825239582212, "grad_norm": 0.315066933631897, "learning_rate": 1.605718445981341e-05, "loss": 0.2777, "step": 15776 }, { "epoch": 0.2929196540956398, "grad_norm": 0.47561630606651306, "learning_rate": 1.6056256266412916e-05, "loss": 0.1049, "step": 15778 }, { "epoch": 0.29295678423305843, "grad_norm": 0.5746839642524719, "learning_rate": 1.6055327990604916e-05, "loss": 0.3523, "step": 15780 }, { "epoch": 0.29299391437047706, "grad_norm": 0.42559921741485596, "learning_rate": 1.605439963240204e-05, "loss": 0.2788, "step": 15782 }, { "epoch": 0.29303104450789574, "grad_norm": 0.4412107765674591, "learning_rate": 1.6053471191816918e-05, "loss": 0.4538, "step": 15784 }, { "epoch": 0.29306817464531437, "grad_norm": 1.3458364009857178, "learning_rate": 1.6052542668862187e-05, "loss": 0.2097, "step": 15786 }, { "epoch": 0.293105304782733, "grad_norm": 0.5330139994621277, "learning_rate": 1.605161406355048e-05, "loss": 0.2756, "step": 15788 }, { "epoch": 0.29314243492015163, "grad_norm": 0.322184294462204, "learning_rate": 1.605068537589443e-05, "loss": 0.3241, "step": 15790 }, { "epoch": 0.29317956505757026, "grad_norm": 0.23872196674346924, "learning_rate": 1.6049756605906675e-05, "loss": 0.339, "step": 15792 }, { "epoch": 0.29321669519498894, "grad_norm": 0.4767598509788513, "learning_rate": 1.6048827753599855e-05, "loss": 0.2008, "step": 15794 }, { "epoch": 0.29325382533240757, "grad_norm": 0.45141828060150146, "learning_rate": 1.6047898818986613e-05, "loss": 0.3625, "step": 15796 }, { "epoch": 0.2932909554698262, "grad_norm": 0.3824867904186249, "learning_rate": 1.6046969802079576e-05, "loss": 0.3142, "step": 15798 }, { "epoch": 0.2933280856072448, "grad_norm": 0.4334510266780853, "learning_rate": 1.6046040702891397e-05, "loss": 0.4441, "step": 15800 }, { "epoch": 0.29336521574466345, "grad_norm": 0.27167803049087524, "learning_rate": 1.6045111521434712e-05, "loss": 0.4491, "step": 15802 }, { "epoch": 0.2934023458820821, "grad_norm": 0.28596147894859314, "learning_rate": 1.6044182257722167e-05, "loss": 0.4787, "step": 15804 }, { "epoch": 0.29343947601950077, "grad_norm": 0.5326327681541443, "learning_rate": 1.6043252911766405e-05, "loss": 0.3881, "step": 15806 }, { "epoch": 0.2934766061569194, "grad_norm": 0.4218844473361969, "learning_rate": 1.6042323483580075e-05, "loss": 0.1486, "step": 15808 }, { "epoch": 0.293513736294338, "grad_norm": 0.6821032762527466, "learning_rate": 1.604139397317582e-05, "loss": 0.1409, "step": 15810 }, { "epoch": 0.29355086643175665, "grad_norm": 0.28542548418045044, "learning_rate": 1.604046438056629e-05, "loss": 0.4346, "step": 15812 }, { "epoch": 0.2935879965691753, "grad_norm": 0.43990278244018555, "learning_rate": 1.603953470576413e-05, "loss": 0.3321, "step": 15814 }, { "epoch": 0.29362512670659396, "grad_norm": 0.38376545906066895, "learning_rate": 1.6038604948781994e-05, "loss": 0.409, "step": 15816 }, { "epoch": 0.2936622568440126, "grad_norm": 0.4855653643608093, "learning_rate": 1.6037675109632537e-05, "loss": 0.2908, "step": 15818 }, { "epoch": 0.2936993869814312, "grad_norm": 0.3898667097091675, "learning_rate": 1.6036745188328402e-05, "loss": 0.4082, "step": 15820 }, { "epoch": 0.29373651711884985, "grad_norm": 0.2707900106906891, "learning_rate": 1.6035815184882248e-05, "loss": 0.3521, "step": 15822 }, { "epoch": 0.2937736472562685, "grad_norm": 0.3762018084526062, "learning_rate": 1.603488509930673e-05, "loss": 0.2577, "step": 15824 }, { "epoch": 0.29381077739368716, "grad_norm": 0.2909925878047943, "learning_rate": 1.60339549316145e-05, "loss": 0.2367, "step": 15826 }, { "epoch": 0.2938479075311058, "grad_norm": 0.41317829489707947, "learning_rate": 1.603302468181822e-05, "loss": 0.2127, "step": 15828 }, { "epoch": 0.2938850376685244, "grad_norm": 0.26684844493865967, "learning_rate": 1.6032094349930543e-05, "loss": 0.3748, "step": 15830 }, { "epoch": 0.29392216780594305, "grad_norm": 0.6406076550483704, "learning_rate": 1.6031163935964128e-05, "loss": 0.262, "step": 15832 }, { "epoch": 0.2939592979433617, "grad_norm": 0.4238293170928955, "learning_rate": 1.6030233439931645e-05, "loss": 0.353, "step": 15834 }, { "epoch": 0.2939964280807803, "grad_norm": 0.3279445171356201, "learning_rate": 1.6029302861845742e-05, "loss": 0.1309, "step": 15836 }, { "epoch": 0.294033558218199, "grad_norm": 0.46401703357696533, "learning_rate": 1.6028372201719088e-05, "loss": 0.3902, "step": 15838 }, { "epoch": 0.2940706883556176, "grad_norm": 0.5426220893859863, "learning_rate": 1.6027441459564347e-05, "loss": 0.4946, "step": 15840 }, { "epoch": 0.29410781849303624, "grad_norm": 0.40311840176582336, "learning_rate": 1.6026510635394176e-05, "loss": 0.3499, "step": 15842 }, { "epoch": 0.29414494863045487, "grad_norm": 0.4529612362384796, "learning_rate": 1.6025579729221253e-05, "loss": 0.2847, "step": 15844 }, { "epoch": 0.2941820787678735, "grad_norm": 0.43816739320755005, "learning_rate": 1.6024648741058236e-05, "loss": 0.238, "step": 15846 }, { "epoch": 0.2942192089052922, "grad_norm": 0.33676931262016296, "learning_rate": 1.6023717670917793e-05, "loss": 0.305, "step": 15848 }, { "epoch": 0.2942563390427108, "grad_norm": 0.40976738929748535, "learning_rate": 1.60227865188126e-05, "loss": 0.371, "step": 15850 }, { "epoch": 0.29429346918012944, "grad_norm": 0.22584185004234314, "learning_rate": 1.602185528475532e-05, "loss": 0.3181, "step": 15852 }, { "epoch": 0.29433059931754807, "grad_norm": 0.5257530212402344, "learning_rate": 1.602092396875863e-05, "loss": 0.3127, "step": 15854 }, { "epoch": 0.2943677294549667, "grad_norm": 0.32537636160850525, "learning_rate": 1.6019992570835196e-05, "loss": 0.3968, "step": 15856 }, { "epoch": 0.2944048595923853, "grad_norm": 0.30383968353271484, "learning_rate": 1.60190610909977e-05, "loss": 0.2705, "step": 15858 }, { "epoch": 0.294441989729804, "grad_norm": 0.29775476455688477, "learning_rate": 1.6018129529258807e-05, "loss": 0.2262, "step": 15860 }, { "epoch": 0.29447911986722264, "grad_norm": 0.48876088857650757, "learning_rate": 1.60171978856312e-05, "loss": 0.1981, "step": 15862 }, { "epoch": 0.29451625000464127, "grad_norm": 0.30494779348373413, "learning_rate": 1.601626616012755e-05, "loss": 0.1458, "step": 15864 }, { "epoch": 0.2945533801420599, "grad_norm": 0.34384116530418396, "learning_rate": 1.6015334352760544e-05, "loss": 0.4137, "step": 15866 }, { "epoch": 0.2945905102794785, "grad_norm": 0.33940520882606506, "learning_rate": 1.6014402463542852e-05, "loss": 0.3839, "step": 15868 }, { "epoch": 0.2946276404168972, "grad_norm": 0.36387044191360474, "learning_rate": 1.6013470492487158e-05, "loss": 0.4268, "step": 15870 }, { "epoch": 0.29466477055431584, "grad_norm": 0.3652462065219879, "learning_rate": 1.6012538439606142e-05, "loss": 0.2652, "step": 15872 }, { "epoch": 0.29470190069173446, "grad_norm": 0.25367286801338196, "learning_rate": 1.601160630491249e-05, "loss": 0.0947, "step": 15874 }, { "epoch": 0.2947390308291531, "grad_norm": 0.275566965341568, "learning_rate": 1.601067408841888e-05, "loss": 0.5275, "step": 15876 }, { "epoch": 0.2947761609665717, "grad_norm": 0.3932128846645355, "learning_rate": 1.6009741790138004e-05, "loss": 0.3335, "step": 15878 }, { "epoch": 0.29481329110399035, "grad_norm": 0.19389225542545319, "learning_rate": 1.6008809410082537e-05, "loss": 0.2799, "step": 15880 }, { "epoch": 0.29485042124140903, "grad_norm": 0.7034175395965576, "learning_rate": 1.6007876948265176e-05, "loss": 0.385, "step": 15882 }, { "epoch": 0.29488755137882766, "grad_norm": 0.39741891622543335, "learning_rate": 1.600694440469861e-05, "loss": 0.4252, "step": 15884 }, { "epoch": 0.2949246815162463, "grad_norm": 0.25038355588912964, "learning_rate": 1.6006011779395515e-05, "loss": 0.2614, "step": 15886 }, { "epoch": 0.2949618116536649, "grad_norm": 0.41598060727119446, "learning_rate": 1.6005079072368594e-05, "loss": 0.1831, "step": 15888 }, { "epoch": 0.29499894179108355, "grad_norm": 0.5460042953491211, "learning_rate": 1.6004146283630535e-05, "loss": 0.4931, "step": 15890 }, { "epoch": 0.29503607192850223, "grad_norm": 0.5921924114227295, "learning_rate": 1.600321341319403e-05, "loss": 0.2979, "step": 15892 }, { "epoch": 0.29507320206592086, "grad_norm": 0.30093103647232056, "learning_rate": 1.600228046107177e-05, "loss": 0.3029, "step": 15894 }, { "epoch": 0.2951103322033395, "grad_norm": 0.3172679543495178, "learning_rate": 1.6001347427276454e-05, "loss": 0.3465, "step": 15896 }, { "epoch": 0.2951474623407581, "grad_norm": 0.49164605140686035, "learning_rate": 1.6000414311820775e-05, "loss": 0.3912, "step": 15898 }, { "epoch": 0.29518459247817674, "grad_norm": 0.5129261612892151, "learning_rate": 1.599948111471743e-05, "loss": 0.2846, "step": 15900 }, { "epoch": 0.2952217226155954, "grad_norm": 0.40990951657295227, "learning_rate": 1.5998547835979114e-05, "loss": 0.4367, "step": 15902 }, { "epoch": 0.29525885275301406, "grad_norm": 0.3677123188972473, "learning_rate": 1.5997614475618533e-05, "loss": 0.3194, "step": 15904 }, { "epoch": 0.2952959828904327, "grad_norm": 0.3080374002456665, "learning_rate": 1.5996681033648384e-05, "loss": 0.2761, "step": 15906 }, { "epoch": 0.2953331130278513, "grad_norm": 0.32889053225517273, "learning_rate": 1.599574751008137e-05, "loss": 0.137, "step": 15908 }, { "epoch": 0.29537024316526994, "grad_norm": 0.3005254864692688, "learning_rate": 1.5994813904930187e-05, "loss": 0.3357, "step": 15910 }, { "epoch": 0.29540737330268857, "grad_norm": 0.39450713992118835, "learning_rate": 1.599388021820755e-05, "loss": 0.1212, "step": 15912 }, { "epoch": 0.29544450344010725, "grad_norm": 0.391381174325943, "learning_rate": 1.599294644992615e-05, "loss": 0.1835, "step": 15914 }, { "epoch": 0.2954816335775259, "grad_norm": 0.41406121850013733, "learning_rate": 1.5992012600098704e-05, "loss": 0.265, "step": 15916 }, { "epoch": 0.2955187637149445, "grad_norm": 0.4220198690891266, "learning_rate": 1.5991078668737916e-05, "loss": 0.26, "step": 15918 }, { "epoch": 0.29555589385236314, "grad_norm": 0.3121607303619385, "learning_rate": 1.5990144655856488e-05, "loss": 0.3302, "step": 15920 }, { "epoch": 0.29559302398978177, "grad_norm": 0.35688114166259766, "learning_rate": 1.598921056146714e-05, "loss": 0.2773, "step": 15922 }, { "epoch": 0.29563015412720045, "grad_norm": 0.5068998336791992, "learning_rate": 1.598827638558257e-05, "loss": 0.3802, "step": 15924 }, { "epoch": 0.2956672842646191, "grad_norm": 0.24365411698818207, "learning_rate": 1.5987342128215496e-05, "loss": 0.385, "step": 15926 }, { "epoch": 0.2957044144020377, "grad_norm": 0.33499500155448914, "learning_rate": 1.5986407789378632e-05, "loss": 0.308, "step": 15928 }, { "epoch": 0.29574154453945634, "grad_norm": 0.3394600749015808, "learning_rate": 1.598547336908469e-05, "loss": 0.3867, "step": 15930 }, { "epoch": 0.29577867467687496, "grad_norm": 0.32194146513938904, "learning_rate": 1.5984538867346383e-05, "loss": 0.3396, "step": 15932 }, { "epoch": 0.2958158048142936, "grad_norm": 0.6228675246238708, "learning_rate": 1.5983604284176425e-05, "loss": 0.3322, "step": 15934 }, { "epoch": 0.2958529349517123, "grad_norm": 0.37650227546691895, "learning_rate": 1.598266961958754e-05, "loss": 0.2935, "step": 15936 }, { "epoch": 0.2958900650891309, "grad_norm": 0.2601744532585144, "learning_rate": 1.5981734873592437e-05, "loss": 0.3826, "step": 15938 }, { "epoch": 0.29592719522654953, "grad_norm": 0.5472261905670166, "learning_rate": 1.5980800046203846e-05, "loss": 0.4356, "step": 15940 }, { "epoch": 0.29596432536396816, "grad_norm": 0.2757434546947479, "learning_rate": 1.5979865137434477e-05, "loss": 0.2561, "step": 15942 }, { "epoch": 0.2960014555013868, "grad_norm": 0.3484655022621155, "learning_rate": 1.5978930147297057e-05, "loss": 0.3614, "step": 15944 }, { "epoch": 0.2960385856388055, "grad_norm": 0.2160046100616455, "learning_rate": 1.5977995075804304e-05, "loss": 0.1595, "step": 15946 }, { "epoch": 0.2960757157762241, "grad_norm": 0.34649547934532166, "learning_rate": 1.5977059922968944e-05, "loss": 0.1787, "step": 15948 }, { "epoch": 0.29611284591364273, "grad_norm": 0.3408724367618561, "learning_rate": 1.5976124688803706e-05, "loss": 0.4858, "step": 15950 }, { "epoch": 0.29614997605106136, "grad_norm": 0.2644693851470947, "learning_rate": 1.5975189373321307e-05, "loss": 0.1441, "step": 15952 }, { "epoch": 0.29618710618848, "grad_norm": 0.30196136236190796, "learning_rate": 1.597425397653448e-05, "loss": 0.2903, "step": 15954 }, { "epoch": 0.2962242363258986, "grad_norm": 0.362359881401062, "learning_rate": 1.5973318498455953e-05, "loss": 0.3668, "step": 15956 }, { "epoch": 0.2962613664633173, "grad_norm": 0.4532642662525177, "learning_rate": 1.597238293909845e-05, "loss": 0.3152, "step": 15958 }, { "epoch": 0.29629849660073593, "grad_norm": 0.5208582878112793, "learning_rate": 1.5971447298474708e-05, "loss": 0.3827, "step": 15960 }, { "epoch": 0.29633562673815456, "grad_norm": 0.3784242868423462, "learning_rate": 1.5970511576597454e-05, "loss": 0.2326, "step": 15962 }, { "epoch": 0.2963727568755732, "grad_norm": 0.3939746022224426, "learning_rate": 1.596957577347942e-05, "loss": 0.2906, "step": 15964 }, { "epoch": 0.2964098870129918, "grad_norm": 0.2783624827861786, "learning_rate": 1.5968639889133343e-05, "loss": 0.4099, "step": 15966 }, { "epoch": 0.2964470171504105, "grad_norm": 0.32852616906166077, "learning_rate": 1.596770392357195e-05, "loss": 0.365, "step": 15968 }, { "epoch": 0.2964841472878291, "grad_norm": 0.5242838859558105, "learning_rate": 1.5966767876807986e-05, "loss": 0.3891, "step": 15970 }, { "epoch": 0.29652127742524775, "grad_norm": 0.3619416356086731, "learning_rate": 1.5965831748854185e-05, "loss": 0.2109, "step": 15972 }, { "epoch": 0.2965584075626664, "grad_norm": 0.39686986804008484, "learning_rate": 1.596489553972328e-05, "loss": 0.3167, "step": 15974 }, { "epoch": 0.296595537700085, "grad_norm": 0.4223601520061493, "learning_rate": 1.5963959249428017e-05, "loss": 0.2763, "step": 15976 }, { "epoch": 0.2966326678375037, "grad_norm": 0.5149276852607727, "learning_rate": 1.5963022877981132e-05, "loss": 0.5316, "step": 15978 }, { "epoch": 0.2966697979749223, "grad_norm": 0.40934059023857117, "learning_rate": 1.5962086425395368e-05, "loss": 0.3323, "step": 15980 }, { "epoch": 0.29670692811234095, "grad_norm": 0.5370131134986877, "learning_rate": 1.5961149891683462e-05, "loss": 0.2522, "step": 15982 }, { "epoch": 0.2967440582497596, "grad_norm": 0.3608935475349426, "learning_rate": 1.5960213276858163e-05, "loss": 0.4717, "step": 15984 }, { "epoch": 0.2967811883871782, "grad_norm": 1.8925145864486694, "learning_rate": 1.5959276580932216e-05, "loss": 0.3949, "step": 15986 }, { "epoch": 0.29681831852459684, "grad_norm": 0.3202454149723053, "learning_rate": 1.5958339803918366e-05, "loss": 0.4012, "step": 15988 }, { "epoch": 0.2968554486620155, "grad_norm": 0.3580626845359802, "learning_rate": 1.5957402945829355e-05, "loss": 0.2624, "step": 15990 }, { "epoch": 0.29689257879943415, "grad_norm": 0.3558816909790039, "learning_rate": 1.595646600667794e-05, "loss": 0.2249, "step": 15992 }, { "epoch": 0.2969297089368528, "grad_norm": 0.38526681065559387, "learning_rate": 1.5955528986476857e-05, "loss": 0.3184, "step": 15994 }, { "epoch": 0.2969668390742714, "grad_norm": 0.37881582975387573, "learning_rate": 1.5954591885238866e-05, "loss": 0.3373, "step": 15996 }, { "epoch": 0.29700396921169003, "grad_norm": 0.27788904309272766, "learning_rate": 1.5953654702976716e-05, "loss": 0.2519, "step": 15998 }, { "epoch": 0.2970410993491087, "grad_norm": 0.39048248529434204, "learning_rate": 1.5952717439703162e-05, "loss": 0.3716, "step": 16000 }, { "epoch": 0.29707822948652735, "grad_norm": 0.3973650336265564, "learning_rate": 1.595178009543095e-05, "loss": 0.2755, "step": 16002 }, { "epoch": 0.297115359623946, "grad_norm": 0.281314879655838, "learning_rate": 1.5950842670172842e-05, "loss": 0.3085, "step": 16004 }, { "epoch": 0.2971524897613646, "grad_norm": 0.6539525985717773, "learning_rate": 1.5949905163941585e-05, "loss": 0.179, "step": 16006 }, { "epoch": 0.29718961989878323, "grad_norm": 0.4613947570323944, "learning_rate": 1.5948967576749944e-05, "loss": 0.4279, "step": 16008 }, { "epoch": 0.29722675003620186, "grad_norm": 0.2610434889793396, "learning_rate": 1.5948029908610677e-05, "loss": 0.3779, "step": 16010 }, { "epoch": 0.29726388017362054, "grad_norm": 0.3161751329898834, "learning_rate": 1.594709215953653e-05, "loss": 0.2509, "step": 16012 }, { "epoch": 0.29730101031103917, "grad_norm": 0.3024757504463196, "learning_rate": 1.594615432954028e-05, "loss": 0.2228, "step": 16014 }, { "epoch": 0.2973381404484578, "grad_norm": 0.37941187620162964, "learning_rate": 1.5945216418634675e-05, "loss": 0.4287, "step": 16016 }, { "epoch": 0.29737527058587643, "grad_norm": 0.3800159692764282, "learning_rate": 1.5944278426832488e-05, "loss": 0.1879, "step": 16018 }, { "epoch": 0.29741240072329506, "grad_norm": 0.3999338448047638, "learning_rate": 1.5943340354146477e-05, "loss": 0.2752, "step": 16020 }, { "epoch": 0.29744953086071374, "grad_norm": 0.5095757842063904, "learning_rate": 1.5942402200589403e-05, "loss": 0.4635, "step": 16022 }, { "epoch": 0.29748666099813237, "grad_norm": 0.31285855174064636, "learning_rate": 1.5941463966174032e-05, "loss": 0.1266, "step": 16024 }, { "epoch": 0.297523791135551, "grad_norm": 0.359053373336792, "learning_rate": 1.594052565091314e-05, "loss": 0.4553, "step": 16026 }, { "epoch": 0.2975609212729696, "grad_norm": 1.1174899339675903, "learning_rate": 1.593958725481948e-05, "loss": 0.3598, "step": 16028 }, { "epoch": 0.29759805141038825, "grad_norm": 0.33334434032440186, "learning_rate": 1.5938648777905838e-05, "loss": 0.4043, "step": 16030 }, { "epoch": 0.2976351815478069, "grad_norm": 0.22132715582847595, "learning_rate": 1.5937710220184967e-05, "loss": 0.4288, "step": 16032 }, { "epoch": 0.29767231168522557, "grad_norm": 0.47149693965911865, "learning_rate": 1.5936771581669648e-05, "loss": 0.6128, "step": 16034 }, { "epoch": 0.2977094418226442, "grad_norm": 0.34123945236206055, "learning_rate": 1.593583286237265e-05, "loss": 0.3032, "step": 16036 }, { "epoch": 0.2977465719600628, "grad_norm": 0.2863142490386963, "learning_rate": 1.5934894062306747e-05, "loss": 0.4077, "step": 16038 }, { "epoch": 0.29778370209748145, "grad_norm": 0.46670910716056824, "learning_rate": 1.5933955181484714e-05, "loss": 0.4023, "step": 16040 }, { "epoch": 0.2978208322349001, "grad_norm": 0.3590395748615265, "learning_rate": 1.5933016219919323e-05, "loss": 0.4234, "step": 16042 }, { "epoch": 0.29785796237231876, "grad_norm": 0.3236614167690277, "learning_rate": 1.5932077177623354e-05, "loss": 0.3969, "step": 16044 }, { "epoch": 0.2978950925097374, "grad_norm": 0.27161961793899536, "learning_rate": 1.5931138054609584e-05, "loss": 0.3299, "step": 16046 }, { "epoch": 0.297932222647156, "grad_norm": 0.32223424315452576, "learning_rate": 1.593019885089079e-05, "loss": 0.4028, "step": 16048 }, { "epoch": 0.29796935278457465, "grad_norm": 0.23701806366443634, "learning_rate": 1.592925956647975e-05, "loss": 0.2521, "step": 16050 }, { "epoch": 0.2980064829219933, "grad_norm": 0.3584916591644287, "learning_rate": 1.592832020138925e-05, "loss": 0.2643, "step": 16052 }, { "epoch": 0.29804361305941196, "grad_norm": 0.3109648823738098, "learning_rate": 1.592738075563207e-05, "loss": 0.207, "step": 16054 }, { "epoch": 0.2980807431968306, "grad_norm": 0.5483985543251038, "learning_rate": 1.592644122922099e-05, "loss": 0.3383, "step": 16056 }, { "epoch": 0.2981178733342492, "grad_norm": 0.3502201735973358, "learning_rate": 1.5925501622168798e-05, "loss": 0.4377, "step": 16058 }, { "epoch": 0.29815500347166785, "grad_norm": 0.39416614174842834, "learning_rate": 1.5924561934488276e-05, "loss": 0.4753, "step": 16060 }, { "epoch": 0.2981921336090865, "grad_norm": 0.3235785961151123, "learning_rate": 1.592362216619222e-05, "loss": 0.492, "step": 16062 }, { "epoch": 0.2982292637465051, "grad_norm": 0.30711907148361206, "learning_rate": 1.5922682317293402e-05, "loss": 0.5145, "step": 16064 }, { "epoch": 0.2982663938839238, "grad_norm": 0.2883948087692261, "learning_rate": 1.592174238780462e-05, "loss": 0.4891, "step": 16066 }, { "epoch": 0.2983035240213424, "grad_norm": 0.2432112842798233, "learning_rate": 1.5920802377738663e-05, "loss": 0.3993, "step": 16068 }, { "epoch": 0.29834065415876104, "grad_norm": 0.4724024832248688, "learning_rate": 1.5919862287108315e-05, "loss": 0.4229, "step": 16070 }, { "epoch": 0.29837778429617967, "grad_norm": 0.2420969307422638, "learning_rate": 1.591892211592638e-05, "loss": 0.4508, "step": 16072 }, { "epoch": 0.2984149144335983, "grad_norm": 0.3737691044807434, "learning_rate": 1.5917981864205643e-05, "loss": 0.1867, "step": 16074 }, { "epoch": 0.298452044571017, "grad_norm": 0.849558413028717, "learning_rate": 1.59170415319589e-05, "loss": 0.2895, "step": 16076 }, { "epoch": 0.2984891747084356, "grad_norm": 0.7339929342269897, "learning_rate": 1.591610111919894e-05, "loss": 0.3812, "step": 16078 }, { "epoch": 0.29852630484585424, "grad_norm": 0.37790122628211975, "learning_rate": 1.5915160625938568e-05, "loss": 0.3223, "step": 16080 }, { "epoch": 0.29856343498327287, "grad_norm": 0.25142228603363037, "learning_rate": 1.5914220052190578e-05, "loss": 0.1882, "step": 16082 }, { "epoch": 0.2986005651206915, "grad_norm": 0.4242027699947357, "learning_rate": 1.591327939796777e-05, "loss": 0.2595, "step": 16084 }, { "epoch": 0.2986376952581101, "grad_norm": 0.5102601051330566, "learning_rate": 1.591233866328294e-05, "loss": 0.4069, "step": 16086 }, { "epoch": 0.2986748253955288, "grad_norm": 0.27951785922050476, "learning_rate": 1.591139784814889e-05, "loss": 0.3433, "step": 16088 }, { "epoch": 0.29871195553294744, "grad_norm": 0.2540474236011505, "learning_rate": 1.5910456952578422e-05, "loss": 0.2422, "step": 16090 }, { "epoch": 0.29874908567036607, "grad_norm": 0.4229940176010132, "learning_rate": 1.590951597658434e-05, "loss": 0.3142, "step": 16092 }, { "epoch": 0.2987862158077847, "grad_norm": 0.28358596563339233, "learning_rate": 1.5908574920179444e-05, "loss": 0.2704, "step": 16094 }, { "epoch": 0.2988233459452033, "grad_norm": 0.21484948694705963, "learning_rate": 1.590763378337654e-05, "loss": 0.2397, "step": 16096 }, { "epoch": 0.298860476082622, "grad_norm": 0.8725574612617493, "learning_rate": 1.5906692566188443e-05, "loss": 0.403, "step": 16098 }, { "epoch": 0.29889760622004063, "grad_norm": 0.25622326135635376, "learning_rate": 1.590575126862795e-05, "loss": 0.2329, "step": 16100 }, { "epoch": 0.29893473635745926, "grad_norm": 0.38767457008361816, "learning_rate": 1.5904809890707865e-05, "loss": 0.3103, "step": 16102 }, { "epoch": 0.2989718664948779, "grad_norm": 0.2989526391029358, "learning_rate": 1.590386843244101e-05, "loss": 0.4354, "step": 16104 }, { "epoch": 0.2990089966322965, "grad_norm": 0.2956234812736511, "learning_rate": 1.5902926893840192e-05, "loss": 0.4601, "step": 16106 }, { "epoch": 0.29904612676971515, "grad_norm": 0.3971045911312103, "learning_rate": 1.5901985274918218e-05, "loss": 0.4183, "step": 16108 }, { "epoch": 0.29908325690713383, "grad_norm": 0.381245493888855, "learning_rate": 1.59010435756879e-05, "loss": 0.3589, "step": 16110 }, { "epoch": 0.29912038704455246, "grad_norm": 1.8292866945266724, "learning_rate": 1.590010179616206e-05, "loss": 0.2834, "step": 16112 }, { "epoch": 0.2991575171819711, "grad_norm": 0.19837979972362518, "learning_rate": 1.5899159936353506e-05, "loss": 0.2525, "step": 16114 }, { "epoch": 0.2991946473193897, "grad_norm": 0.4586329758167267, "learning_rate": 1.5898217996275053e-05, "loss": 0.2402, "step": 16116 }, { "epoch": 0.29923177745680835, "grad_norm": 0.42977920174598694, "learning_rate": 1.589727597593952e-05, "loss": 0.2989, "step": 16118 }, { "epoch": 0.29926890759422703, "grad_norm": 0.4592747092247009, "learning_rate": 1.5896333875359727e-05, "loss": 0.2732, "step": 16120 }, { "epoch": 0.29930603773164566, "grad_norm": 0.48158320784568787, "learning_rate": 1.5895391694548493e-05, "loss": 0.3802, "step": 16122 }, { "epoch": 0.2993431678690643, "grad_norm": 0.3117944896221161, "learning_rate": 1.5894449433518634e-05, "loss": 0.2182, "step": 16124 }, { "epoch": 0.2993802980064829, "grad_norm": 0.2854127287864685, "learning_rate": 1.5893507092282974e-05, "loss": 0.4905, "step": 16126 }, { "epoch": 0.29941742814390154, "grad_norm": 0.38067081570625305, "learning_rate": 1.589256467085434e-05, "loss": 0.2131, "step": 16128 }, { "epoch": 0.2994545582813202, "grad_norm": 0.30732300877571106, "learning_rate": 1.5891622169245544e-05, "loss": 0.4093, "step": 16130 }, { "epoch": 0.29949168841873886, "grad_norm": 0.3696860074996948, "learning_rate": 1.5890679587469424e-05, "loss": 0.3965, "step": 16132 }, { "epoch": 0.2995288185561575, "grad_norm": 0.5029286742210388, "learning_rate": 1.5889736925538793e-05, "loss": 0.2333, "step": 16134 }, { "epoch": 0.2995659486935761, "grad_norm": 0.4822166860103607, "learning_rate": 1.5888794183466492e-05, "loss": 0.453, "step": 16136 }, { "epoch": 0.29960307883099474, "grad_norm": 0.5186971426010132, "learning_rate": 1.5887851361265336e-05, "loss": 0.2685, "step": 16138 }, { "epoch": 0.29964020896841337, "grad_norm": 0.5340501666069031, "learning_rate": 1.588690845894816e-05, "loss": 0.2185, "step": 16140 }, { "epoch": 0.29967733910583205, "grad_norm": 0.3281538188457489, "learning_rate": 1.5885965476527794e-05, "loss": 0.3392, "step": 16142 }, { "epoch": 0.2997144692432507, "grad_norm": 0.3609340786933899, "learning_rate": 1.5885022414017068e-05, "loss": 0.3292, "step": 16144 }, { "epoch": 0.2997515993806693, "grad_norm": 0.3774430751800537, "learning_rate": 1.5884079271428816e-05, "loss": 0.2892, "step": 16146 }, { "epoch": 0.29978872951808794, "grad_norm": 0.2638816237449646, "learning_rate": 1.5883136048775866e-05, "loss": 0.2368, "step": 16148 }, { "epoch": 0.29982585965550657, "grad_norm": 0.4817085862159729, "learning_rate": 1.5882192746071062e-05, "loss": 0.2575, "step": 16150 }, { "epoch": 0.29986298979292525, "grad_norm": 0.4615142047405243, "learning_rate": 1.588124936332723e-05, "loss": 0.1181, "step": 16152 }, { "epoch": 0.2999001199303439, "grad_norm": 0.3382371664047241, "learning_rate": 1.588030590055721e-05, "loss": 0.1331, "step": 16154 }, { "epoch": 0.2999372500677625, "grad_norm": 0.6113383173942566, "learning_rate": 1.5879362357773844e-05, "loss": 0.2208, "step": 16156 }, { "epoch": 0.29997438020518113, "grad_norm": 0.5485912561416626, "learning_rate": 1.5878418734989963e-05, "loss": 0.417, "step": 16158 }, { "epoch": 0.30001151034259976, "grad_norm": 0.6068719029426575, "learning_rate": 1.5877475032218416e-05, "loss": 0.2609, "step": 16160 }, { "epoch": 0.3000486404800184, "grad_norm": 0.5113223195075989, "learning_rate": 1.5876531249472034e-05, "loss": 0.2562, "step": 16162 }, { "epoch": 0.3000857706174371, "grad_norm": 0.28108909726142883, "learning_rate": 1.5875587386763665e-05, "loss": 0.2683, "step": 16164 }, { "epoch": 0.3001229007548557, "grad_norm": 0.35743606090545654, "learning_rate": 1.587464344410615e-05, "loss": 0.1832, "step": 16166 }, { "epoch": 0.30016003089227433, "grad_norm": 0.37228259444236755, "learning_rate": 1.587369942151234e-05, "loss": 0.3436, "step": 16168 }, { "epoch": 0.30019716102969296, "grad_norm": 0.4015525281429291, "learning_rate": 1.5872755318995066e-05, "loss": 0.2902, "step": 16170 }, { "epoch": 0.3002342911671116, "grad_norm": 0.43857961893081665, "learning_rate": 1.587181113656719e-05, "loss": 0.2085, "step": 16172 }, { "epoch": 0.3002714213045303, "grad_norm": 0.3534403443336487, "learning_rate": 1.5870866874241554e-05, "loss": 0.3567, "step": 16174 }, { "epoch": 0.3003085514419489, "grad_norm": 0.4879935085773468, "learning_rate": 1.5869922532031e-05, "loss": 0.2511, "step": 16176 }, { "epoch": 0.30034568157936753, "grad_norm": 0.3121826946735382, "learning_rate": 1.586897810994838e-05, "loss": 0.2975, "step": 16178 }, { "epoch": 0.30038281171678616, "grad_norm": 0.3071901798248291, "learning_rate": 1.5868033608006553e-05, "loss": 0.3155, "step": 16180 }, { "epoch": 0.3004199418542048, "grad_norm": 0.3504277169704437, "learning_rate": 1.5867089026218366e-05, "loss": 0.2508, "step": 16182 }, { "epoch": 0.3004570719916234, "grad_norm": 0.45316821336746216, "learning_rate": 1.5866144364596666e-05, "loss": 0.1407, "step": 16184 }, { "epoch": 0.3004942021290421, "grad_norm": 0.5187512040138245, "learning_rate": 1.586519962315432e-05, "loss": 0.5181, "step": 16186 }, { "epoch": 0.3005313322664607, "grad_norm": 1.0387310981750488, "learning_rate": 1.5864254801904168e-05, "loss": 0.2628, "step": 16188 }, { "epoch": 0.30056846240387936, "grad_norm": 0.43464142084121704, "learning_rate": 1.5863309900859078e-05, "loss": 0.3263, "step": 16190 }, { "epoch": 0.300605592541298, "grad_norm": 0.6859362125396729, "learning_rate": 1.58623649200319e-05, "loss": 0.3203, "step": 16192 }, { "epoch": 0.3006427226787166, "grad_norm": 0.46165481209754944, "learning_rate": 1.5861419859435497e-05, "loss": 0.3308, "step": 16194 }, { "epoch": 0.3006798528161353, "grad_norm": 0.446772038936615, "learning_rate": 1.586047471908273e-05, "loss": 0.362, "step": 16196 }, { "epoch": 0.3007169829535539, "grad_norm": 0.2324133962392807, "learning_rate": 1.585952949898645e-05, "loss": 0.2184, "step": 16198 }, { "epoch": 0.30075411309097255, "grad_norm": 0.9389732480049133, "learning_rate": 1.585858419915953e-05, "loss": 0.3131, "step": 16200 }, { "epoch": 0.3007912432283912, "grad_norm": 0.48875007033348083, "learning_rate": 1.5857638819614826e-05, "loss": 0.318, "step": 16202 }, { "epoch": 0.3008283733658098, "grad_norm": 0.47570088505744934, "learning_rate": 1.58566933603652e-05, "loss": 0.2984, "step": 16204 }, { "epoch": 0.3008655035032285, "grad_norm": 0.2943958342075348, "learning_rate": 1.5855747821423523e-05, "loss": 0.3832, "step": 16206 }, { "epoch": 0.3009026336406471, "grad_norm": 0.25589779019355774, "learning_rate": 1.5854802202802657e-05, "loss": 0.151, "step": 16208 }, { "epoch": 0.30093976377806575, "grad_norm": 0.4360898733139038, "learning_rate": 1.5853856504515473e-05, "loss": 0.165, "step": 16210 }, { "epoch": 0.3009768939154844, "grad_norm": 0.4053947627544403, "learning_rate": 1.5852910726574836e-05, "loss": 0.3924, "step": 16212 }, { "epoch": 0.301014024052903, "grad_norm": 1.1675968170166016, "learning_rate": 1.5851964868993613e-05, "loss": 0.3263, "step": 16214 }, { "epoch": 0.30105115419032163, "grad_norm": 0.21326427161693573, "learning_rate": 1.5851018931784676e-05, "loss": 0.2199, "step": 16216 }, { "epoch": 0.3010882843277403, "grad_norm": 0.482308566570282, "learning_rate": 1.5850072914960903e-05, "loss": 0.261, "step": 16218 }, { "epoch": 0.30112541446515895, "grad_norm": 0.3268890082836151, "learning_rate": 1.5849126818535152e-05, "loss": 0.142, "step": 16220 }, { "epoch": 0.3011625446025776, "grad_norm": 0.4247482120990753, "learning_rate": 1.584818064252031e-05, "loss": 0.2335, "step": 16222 }, { "epoch": 0.3011996747399962, "grad_norm": 0.417898952960968, "learning_rate": 1.5847234386929247e-05, "loss": 0.3243, "step": 16224 }, { "epoch": 0.30123680487741483, "grad_norm": 0.3287702202796936, "learning_rate": 1.584628805177484e-05, "loss": 0.1683, "step": 16226 }, { "epoch": 0.3012739350148335, "grad_norm": 0.38879936933517456, "learning_rate": 1.584534163706996e-05, "loss": 0.231, "step": 16228 }, { "epoch": 0.30131106515225214, "grad_norm": 0.4904578626155853, "learning_rate": 1.5844395142827492e-05, "loss": 0.3104, "step": 16230 }, { "epoch": 0.3013481952896708, "grad_norm": 0.30873847007751465, "learning_rate": 1.584344856906031e-05, "loss": 0.343, "step": 16232 }, { "epoch": 0.3013853254270894, "grad_norm": 0.5864978432655334, "learning_rate": 1.5842501915781298e-05, "loss": 0.4186, "step": 16234 }, { "epoch": 0.30142245556450803, "grad_norm": 0.34228113293647766, "learning_rate": 1.5841555183003333e-05, "loss": 0.2788, "step": 16236 }, { "epoch": 0.30145958570192666, "grad_norm": 0.7428646683692932, "learning_rate": 1.5840608370739305e-05, "loss": 0.2834, "step": 16238 }, { "epoch": 0.30149671583934534, "grad_norm": 0.3483157753944397, "learning_rate": 1.5839661479002085e-05, "loss": 0.1976, "step": 16240 }, { "epoch": 0.30153384597676397, "grad_norm": 0.373030424118042, "learning_rate": 1.583871450780457e-05, "loss": 0.3706, "step": 16242 }, { "epoch": 0.3015709761141826, "grad_norm": 0.3886907696723938, "learning_rate": 1.5837767457159635e-05, "loss": 0.1152, "step": 16244 }, { "epoch": 0.3016081062516012, "grad_norm": 0.48107388615608215, "learning_rate": 1.5836820327080172e-05, "loss": 0.2688, "step": 16246 }, { "epoch": 0.30164523638901986, "grad_norm": 0.4616096317768097, "learning_rate": 1.5835873117579067e-05, "loss": 0.1599, "step": 16248 }, { "epoch": 0.30168236652643854, "grad_norm": 0.4929538071155548, "learning_rate": 1.5834925828669214e-05, "loss": 0.1403, "step": 16250 }, { "epoch": 0.30171949666385717, "grad_norm": 0.4717556834220886, "learning_rate": 1.5833978460363492e-05, "loss": 0.2582, "step": 16252 }, { "epoch": 0.3017566268012758, "grad_norm": 0.33128252625465393, "learning_rate": 1.58330310126748e-05, "loss": 0.184, "step": 16254 }, { "epoch": 0.3017937569386944, "grad_norm": 0.6636273264884949, "learning_rate": 1.583208348561603e-05, "loss": 0.2302, "step": 16256 }, { "epoch": 0.30183088707611305, "grad_norm": 0.34337711334228516, "learning_rate": 1.583113587920007e-05, "loss": 0.4143, "step": 16258 }, { "epoch": 0.3018680172135317, "grad_norm": 0.3692743182182312, "learning_rate": 1.583018819343982e-05, "loss": 0.099, "step": 16260 }, { "epoch": 0.30190514735095036, "grad_norm": 0.2798841893672943, "learning_rate": 1.582924042834817e-05, "loss": 0.2229, "step": 16262 }, { "epoch": 0.301942277488369, "grad_norm": 0.39461827278137207, "learning_rate": 1.582829258393802e-05, "loss": 0.4144, "step": 16264 }, { "epoch": 0.3019794076257876, "grad_norm": 0.38346654176712036, "learning_rate": 1.5827344660222264e-05, "loss": 0.4, "step": 16266 }, { "epoch": 0.30201653776320625, "grad_norm": 0.4233570098876953, "learning_rate": 1.5826396657213803e-05, "loss": 0.2624, "step": 16268 }, { "epoch": 0.3020536679006249, "grad_norm": 0.37801480293273926, "learning_rate": 1.5825448574925535e-05, "loss": 0.4319, "step": 16270 }, { "epoch": 0.30209079803804356, "grad_norm": 0.5160408020019531, "learning_rate": 1.582450041337036e-05, "loss": 0.3099, "step": 16272 }, { "epoch": 0.3021279281754622, "grad_norm": 0.4123377799987793, "learning_rate": 1.582355217256118e-05, "loss": 0.3165, "step": 16274 }, { "epoch": 0.3021650583128808, "grad_norm": 0.31174302101135254, "learning_rate": 1.5822603852510904e-05, "loss": 0.3122, "step": 16276 }, { "epoch": 0.30220218845029945, "grad_norm": 0.3060372769832611, "learning_rate": 1.5821655453232427e-05, "loss": 0.3226, "step": 16278 }, { "epoch": 0.3022393185877181, "grad_norm": 0.5194147825241089, "learning_rate": 1.5820706974738657e-05, "loss": 0.4578, "step": 16280 }, { "epoch": 0.30227644872513676, "grad_norm": 0.33621835708618164, "learning_rate": 1.58197584170425e-05, "loss": 0.4504, "step": 16282 }, { "epoch": 0.3023135788625554, "grad_norm": 0.35131993889808655, "learning_rate": 1.5818809780156863e-05, "loss": 0.3386, "step": 16284 }, { "epoch": 0.302350708999974, "grad_norm": 0.27675920724868774, "learning_rate": 1.5817861064094654e-05, "loss": 0.3684, "step": 16286 }, { "epoch": 0.30238783913739264, "grad_norm": 0.42793887853622437, "learning_rate": 1.5816912268868783e-05, "loss": 0.2511, "step": 16288 }, { "epoch": 0.3024249692748113, "grad_norm": 0.3586810231208801, "learning_rate": 1.5815963394492162e-05, "loss": 0.2102, "step": 16290 }, { "epoch": 0.3024620994122299, "grad_norm": 0.26683351397514343, "learning_rate": 1.5815014440977697e-05, "loss": 0.3612, "step": 16292 }, { "epoch": 0.3024992295496486, "grad_norm": 0.36184874176979065, "learning_rate": 1.5814065408338303e-05, "loss": 0.4777, "step": 16294 }, { "epoch": 0.3025363596870672, "grad_norm": 0.39173153042793274, "learning_rate": 1.5813116296586896e-05, "loss": 0.2786, "step": 16296 }, { "epoch": 0.30257348982448584, "grad_norm": 0.46049219369888306, "learning_rate": 1.5812167105736387e-05, "loss": 0.3843, "step": 16298 }, { "epoch": 0.30261061996190447, "grad_norm": 0.3502812385559082, "learning_rate": 1.5811217835799696e-05, "loss": 0.2393, "step": 16300 }, { "epoch": 0.3026477500993231, "grad_norm": 0.5710545778274536, "learning_rate": 1.5810268486789732e-05, "loss": 0.1627, "step": 16302 }, { "epoch": 0.3026848802367418, "grad_norm": 0.5966254472732544, "learning_rate": 1.580931905871942e-05, "loss": 0.3489, "step": 16304 }, { "epoch": 0.3027220103741604, "grad_norm": 0.2895124554634094, "learning_rate": 1.5808369551601677e-05, "loss": 0.3314, "step": 16306 }, { "epoch": 0.30275914051157904, "grad_norm": 0.3414720892906189, "learning_rate": 1.5807419965449423e-05, "loss": 0.2939, "step": 16308 }, { "epoch": 0.30279627064899767, "grad_norm": 0.2914572060108185, "learning_rate": 1.580647030027558e-05, "loss": 0.522, "step": 16310 }, { "epoch": 0.3028334007864163, "grad_norm": 0.2794126272201538, "learning_rate": 1.5805520556093062e-05, "loss": 0.3364, "step": 16312 }, { "epoch": 0.3028705309238349, "grad_norm": 0.5639039874076843, "learning_rate": 1.5804570732914806e-05, "loss": 0.2648, "step": 16314 }, { "epoch": 0.3029076610612536, "grad_norm": 0.5402952432632446, "learning_rate": 1.5803620830753726e-05, "loss": 0.416, "step": 16316 }, { "epoch": 0.30294479119867224, "grad_norm": 0.3365645110607147, "learning_rate": 1.580267084962275e-05, "loss": 0.2107, "step": 16318 }, { "epoch": 0.30298192133609086, "grad_norm": 0.44152238965034485, "learning_rate": 1.5801720789534805e-05, "loss": 0.3615, "step": 16320 }, { "epoch": 0.3030190514735095, "grad_norm": 0.48954296112060547, "learning_rate": 1.580077065050282e-05, "loss": 0.3276, "step": 16322 }, { "epoch": 0.3030561816109281, "grad_norm": 0.30221888422966003, "learning_rate": 1.5799820432539718e-05, "loss": 0.2612, "step": 16324 }, { "epoch": 0.3030933117483468, "grad_norm": 0.2957903742790222, "learning_rate": 1.5798870135658435e-05, "loss": 0.3312, "step": 16326 }, { "epoch": 0.30313044188576543, "grad_norm": 0.3304097354412079, "learning_rate": 1.57979197598719e-05, "loss": 0.369, "step": 16328 }, { "epoch": 0.30316757202318406, "grad_norm": 0.4203992784023285, "learning_rate": 1.5796969305193042e-05, "loss": 0.2827, "step": 16330 }, { "epoch": 0.3032047021606027, "grad_norm": 0.5688360333442688, "learning_rate": 1.5796018771634796e-05, "loss": 0.2526, "step": 16332 }, { "epoch": 0.3032418322980213, "grad_norm": 0.38786038756370544, "learning_rate": 1.5795068159210095e-05, "loss": 0.3511, "step": 16334 }, { "epoch": 0.30327896243543995, "grad_norm": 0.3857662081718445, "learning_rate": 1.5794117467931878e-05, "loss": 0.1532, "step": 16336 }, { "epoch": 0.30331609257285863, "grad_norm": 0.2677517235279083, "learning_rate": 1.5793166697813074e-05, "loss": 0.2753, "step": 16338 }, { "epoch": 0.30335322271027726, "grad_norm": 0.4150560200214386, "learning_rate": 1.5792215848866627e-05, "loss": 0.4178, "step": 16340 }, { "epoch": 0.3033903528476959, "grad_norm": 0.35351672768592834, "learning_rate": 1.5791264921105467e-05, "loss": 0.2274, "step": 16342 }, { "epoch": 0.3034274829851145, "grad_norm": 0.5662781000137329, "learning_rate": 1.5790313914542543e-05, "loss": 0.3439, "step": 16344 }, { "epoch": 0.30346461312253314, "grad_norm": 0.33523303270339966, "learning_rate": 1.578936282919079e-05, "loss": 0.2497, "step": 16346 }, { "epoch": 0.30350174325995183, "grad_norm": 0.42839181423187256, "learning_rate": 1.5788411665063148e-05, "loss": 0.3044, "step": 16348 }, { "epoch": 0.30353887339737046, "grad_norm": 0.44385963678359985, "learning_rate": 1.578746042217256e-05, "loss": 0.4338, "step": 16350 }, { "epoch": 0.3035760035347891, "grad_norm": 0.41629549860954285, "learning_rate": 1.5786509100531976e-05, "loss": 0.1757, "step": 16352 }, { "epoch": 0.3036131336722077, "grad_norm": 0.4542321562767029, "learning_rate": 1.5785557700154333e-05, "loss": 0.2535, "step": 16354 }, { "epoch": 0.30365026380962634, "grad_norm": 0.3555143177509308, "learning_rate": 1.5784606221052578e-05, "loss": 0.3635, "step": 16356 }, { "epoch": 0.303687393947045, "grad_norm": 0.5755445957183838, "learning_rate": 1.578365466323966e-05, "loss": 0.2618, "step": 16358 }, { "epoch": 0.30372452408446365, "grad_norm": 0.4481756091117859, "learning_rate": 1.578270302672853e-05, "loss": 0.3444, "step": 16360 }, { "epoch": 0.3037616542218823, "grad_norm": 0.3632296025753021, "learning_rate": 1.5781751311532126e-05, "loss": 0.2418, "step": 16362 }, { "epoch": 0.3037987843593009, "grad_norm": 0.29582175612449646, "learning_rate": 1.5780799517663414e-05, "loss": 0.2556, "step": 16364 }, { "epoch": 0.30383591449671954, "grad_norm": 0.5160143971443176, "learning_rate": 1.577984764513533e-05, "loss": 0.3634, "step": 16366 }, { "epoch": 0.30387304463413817, "grad_norm": 0.6076065301895142, "learning_rate": 1.577889569396083e-05, "loss": 0.4017, "step": 16368 }, { "epoch": 0.30391017477155685, "grad_norm": 0.4002518951892853, "learning_rate": 1.5777943664152874e-05, "loss": 0.3762, "step": 16370 }, { "epoch": 0.3039473049089755, "grad_norm": 0.2705652415752411, "learning_rate": 1.5776991555724412e-05, "loss": 0.3525, "step": 16372 }, { "epoch": 0.3039844350463941, "grad_norm": 0.29727306962013245, "learning_rate": 1.5776039368688396e-05, "loss": 0.4308, "step": 16374 }, { "epoch": 0.30402156518381274, "grad_norm": 0.2842528820037842, "learning_rate": 1.5775087103057786e-05, "loss": 0.3998, "step": 16376 }, { "epoch": 0.30405869532123136, "grad_norm": 0.3011460602283478, "learning_rate": 1.577413475884554e-05, "loss": 0.434, "step": 16378 }, { "epoch": 0.30409582545865005, "grad_norm": 0.28696584701538086, "learning_rate": 1.5773182336064615e-05, "loss": 0.3244, "step": 16380 }, { "epoch": 0.3041329555960687, "grad_norm": 0.35604965686798096, "learning_rate": 1.5772229834727975e-05, "loss": 0.3521, "step": 16382 }, { "epoch": 0.3041700857334873, "grad_norm": 0.45947733521461487, "learning_rate": 1.577127725484857e-05, "loss": 0.2479, "step": 16384 }, { "epoch": 0.30420721587090593, "grad_norm": 0.5401590466499329, "learning_rate": 1.5770324596439374e-05, "loss": 0.4565, "step": 16386 }, { "epoch": 0.30424434600832456, "grad_norm": 0.45186594128608704, "learning_rate": 1.576937185951334e-05, "loss": 0.2811, "step": 16388 }, { "epoch": 0.3042814761457432, "grad_norm": 0.3366377651691437, "learning_rate": 1.576841904408344e-05, "loss": 0.385, "step": 16390 }, { "epoch": 0.3043186062831619, "grad_norm": 0.3845144212245941, "learning_rate": 1.5767466150162632e-05, "loss": 0.3989, "step": 16392 }, { "epoch": 0.3043557364205805, "grad_norm": 0.2760485112667084, "learning_rate": 1.5766513177763885e-05, "loss": 0.3178, "step": 16394 }, { "epoch": 0.30439286655799913, "grad_norm": 0.19351300597190857, "learning_rate": 1.576556012690017e-05, "loss": 0.2492, "step": 16396 }, { "epoch": 0.30442999669541776, "grad_norm": 0.30799242854118347, "learning_rate": 1.576460699758445e-05, "loss": 0.3962, "step": 16398 }, { "epoch": 0.3044671268328364, "grad_norm": 0.30644455552101135, "learning_rate": 1.5763653789829693e-05, "loss": 0.3345, "step": 16400 }, { "epoch": 0.30450425697025507, "grad_norm": 0.34072408080101013, "learning_rate": 1.5762700503648874e-05, "loss": 0.3826, "step": 16402 }, { "epoch": 0.3045413871076737, "grad_norm": 0.24619989097118378, "learning_rate": 1.5761747139054967e-05, "loss": 0.3262, "step": 16404 }, { "epoch": 0.30457851724509233, "grad_norm": 0.38269028067588806, "learning_rate": 1.5760793696060933e-05, "loss": 0.3002, "step": 16406 }, { "epoch": 0.30461564738251096, "grad_norm": 0.3844844102859497, "learning_rate": 1.5759840174679756e-05, "loss": 0.1433, "step": 16408 }, { "epoch": 0.3046527775199296, "grad_norm": 0.48180678486824036, "learning_rate": 1.5758886574924407e-05, "loss": 0.218, "step": 16410 }, { "epoch": 0.3046899076573482, "grad_norm": 0.37486982345581055, "learning_rate": 1.575793289680786e-05, "loss": 0.3982, "step": 16412 }, { "epoch": 0.3047270377947669, "grad_norm": 0.580539345741272, "learning_rate": 1.5756979140343094e-05, "loss": 0.3191, "step": 16414 }, { "epoch": 0.3047641679321855, "grad_norm": 0.37441331148147583, "learning_rate": 1.5756025305543085e-05, "loss": 0.3382, "step": 16416 }, { "epoch": 0.30480129806960415, "grad_norm": 0.308307409286499, "learning_rate": 1.575507139242081e-05, "loss": 0.5857, "step": 16418 }, { "epoch": 0.3048384282070228, "grad_norm": 0.27833014726638794, "learning_rate": 1.575411740098926e-05, "loss": 0.1347, "step": 16420 }, { "epoch": 0.3048755583444414, "grad_norm": 0.25873076915740967, "learning_rate": 1.57531633312614e-05, "loss": 0.2473, "step": 16422 }, { "epoch": 0.3049126884818601, "grad_norm": 0.3725907802581787, "learning_rate": 1.5752209183250228e-05, "loss": 0.4214, "step": 16424 }, { "epoch": 0.3049498186192787, "grad_norm": 0.47309261560440063, "learning_rate": 1.575125495696871e-05, "loss": 0.2003, "step": 16426 }, { "epoch": 0.30498694875669735, "grad_norm": 0.2646389603614807, "learning_rate": 1.5750300652429846e-05, "loss": 0.4743, "step": 16428 }, { "epoch": 0.305024078894116, "grad_norm": 0.5863294005393982, "learning_rate": 1.574934626964661e-05, "loss": 0.4601, "step": 16430 }, { "epoch": 0.3050612090315346, "grad_norm": 0.29395779967308044, "learning_rate": 1.5748391808631992e-05, "loss": 0.1264, "step": 16432 }, { "epoch": 0.3050983391689533, "grad_norm": 0.2814014256000519, "learning_rate": 1.5747437269398984e-05, "loss": 0.548, "step": 16434 }, { "epoch": 0.3051354693063719, "grad_norm": 0.2762012183666229, "learning_rate": 1.5746482651960565e-05, "loss": 0.2469, "step": 16436 }, { "epoch": 0.30517259944379055, "grad_norm": 0.3044281601905823, "learning_rate": 1.5745527956329736e-05, "loss": 0.3498, "step": 16438 }, { "epoch": 0.3052097295812092, "grad_norm": 0.209794819355011, "learning_rate": 1.5744573182519477e-05, "loss": 0.3118, "step": 16440 }, { "epoch": 0.3052468597186278, "grad_norm": 0.5362929701805115, "learning_rate": 1.574361833054279e-05, "loss": 0.3381, "step": 16442 }, { "epoch": 0.30528398985604643, "grad_norm": 0.6461273431777954, "learning_rate": 1.5742663400412654e-05, "loss": 0.3473, "step": 16444 }, { "epoch": 0.3053211199934651, "grad_norm": 0.32658568024635315, "learning_rate": 1.5741708392142072e-05, "loss": 0.407, "step": 16446 }, { "epoch": 0.30535825013088375, "grad_norm": 0.2680683732032776, "learning_rate": 1.574075330574404e-05, "loss": 0.1845, "step": 16448 }, { "epoch": 0.3053953802683024, "grad_norm": 0.3414680063724518, "learning_rate": 1.573979814123155e-05, "loss": 0.3635, "step": 16450 }, { "epoch": 0.305432510405721, "grad_norm": 0.3205528259277344, "learning_rate": 1.57388428986176e-05, "loss": 0.5048, "step": 16452 }, { "epoch": 0.30546964054313963, "grad_norm": 0.42299753427505493, "learning_rate": 1.5737887577915186e-05, "loss": 0.2803, "step": 16454 }, { "epoch": 0.3055067706805583, "grad_norm": 0.4396992027759552, "learning_rate": 1.573693217913731e-05, "loss": 0.3947, "step": 16456 }, { "epoch": 0.30554390081797694, "grad_norm": 0.2916158437728882, "learning_rate": 1.5735976702296972e-05, "loss": 0.1199, "step": 16458 }, { "epoch": 0.30558103095539557, "grad_norm": 0.36378878355026245, "learning_rate": 1.5735021147407174e-05, "loss": 0.3627, "step": 16460 }, { "epoch": 0.3056181610928142, "grad_norm": 0.2865287661552429, "learning_rate": 1.573406551448091e-05, "loss": 0.2512, "step": 16462 }, { "epoch": 0.30565529123023283, "grad_norm": 0.3946283161640167, "learning_rate": 1.5733109803531197e-05, "loss": 0.5993, "step": 16464 }, { "epoch": 0.30569242136765146, "grad_norm": 0.38869160413742065, "learning_rate": 1.573215401457103e-05, "loss": 0.4061, "step": 16466 }, { "epoch": 0.30572955150507014, "grad_norm": 0.4151996672153473, "learning_rate": 1.5731198147613415e-05, "loss": 0.3125, "step": 16468 }, { "epoch": 0.30576668164248877, "grad_norm": 0.3945901393890381, "learning_rate": 1.573024220267136e-05, "loss": 0.4078, "step": 16470 }, { "epoch": 0.3058038117799074, "grad_norm": 0.39663851261138916, "learning_rate": 1.5729286179757878e-05, "loss": 0.2258, "step": 16472 }, { "epoch": 0.305840941917326, "grad_norm": 0.33139923214912415, "learning_rate": 1.5728330078885966e-05, "loss": 0.3279, "step": 16474 }, { "epoch": 0.30587807205474465, "grad_norm": 0.43833813071250916, "learning_rate": 1.5727373900068642e-05, "loss": 0.364, "step": 16476 }, { "epoch": 0.30591520219216334, "grad_norm": 0.4080651104450226, "learning_rate": 1.5726417643318917e-05, "loss": 0.1984, "step": 16478 }, { "epoch": 0.30595233232958197, "grad_norm": 0.19043704867362976, "learning_rate": 1.5725461308649797e-05, "loss": 0.2146, "step": 16480 }, { "epoch": 0.3059894624670006, "grad_norm": 0.2585740089416504, "learning_rate": 1.57245048960743e-05, "loss": 0.2808, "step": 16482 }, { "epoch": 0.3060265926044192, "grad_norm": 0.3631007969379425, "learning_rate": 1.5723548405605438e-05, "loss": 0.3558, "step": 16484 }, { "epoch": 0.30606372274183785, "grad_norm": 0.24084387719631195, "learning_rate": 1.5722591837256228e-05, "loss": 0.2598, "step": 16486 }, { "epoch": 0.3061008528792565, "grad_norm": 0.36494797468185425, "learning_rate": 1.572163519103968e-05, "loss": 0.3286, "step": 16488 }, { "epoch": 0.30613798301667516, "grad_norm": 0.3660631775856018, "learning_rate": 1.572067846696882e-05, "loss": 0.3426, "step": 16490 }, { "epoch": 0.3061751131540938, "grad_norm": 0.6746957302093506, "learning_rate": 1.571972166505666e-05, "loss": 0.3402, "step": 16492 }, { "epoch": 0.3062122432915124, "grad_norm": 0.34617069363594055, "learning_rate": 1.571876478531622e-05, "loss": 0.3434, "step": 16494 }, { "epoch": 0.30624937342893105, "grad_norm": 0.2727055847644806, "learning_rate": 1.571780782776052e-05, "loss": 0.1097, "step": 16496 }, { "epoch": 0.3062865035663497, "grad_norm": 0.4270043671131134, "learning_rate": 1.5716850792402583e-05, "loss": 0.2976, "step": 16498 }, { "epoch": 0.30632363370376836, "grad_norm": 0.3759697675704956, "learning_rate": 1.571589367925543e-05, "loss": 0.3867, "step": 16500 }, { "epoch": 0.306360763841187, "grad_norm": 0.2748478651046753, "learning_rate": 1.5714936488332088e-05, "loss": 0.4845, "step": 16502 }, { "epoch": 0.3063978939786056, "grad_norm": 0.3093646466732025, "learning_rate": 1.5713979219645575e-05, "loss": 0.3446, "step": 16504 }, { "epoch": 0.30643502411602425, "grad_norm": 0.7534693479537964, "learning_rate": 1.5713021873208926e-05, "loss": 0.2728, "step": 16506 }, { "epoch": 0.3064721542534429, "grad_norm": 0.31008830666542053, "learning_rate": 1.5712064449035157e-05, "loss": 0.3914, "step": 16508 }, { "epoch": 0.30650928439086156, "grad_norm": 0.49009695649147034, "learning_rate": 1.57111069471373e-05, "loss": 0.3029, "step": 16510 }, { "epoch": 0.3065464145282802, "grad_norm": 0.29887720942497253, "learning_rate": 1.5710149367528383e-05, "loss": 0.3723, "step": 16512 }, { "epoch": 0.3065835446656988, "grad_norm": 0.26778683066368103, "learning_rate": 1.570919171022144e-05, "loss": 0.2706, "step": 16514 }, { "epoch": 0.30662067480311744, "grad_norm": 0.31673118472099304, "learning_rate": 1.57082339752295e-05, "loss": 0.2003, "step": 16516 }, { "epoch": 0.30665780494053607, "grad_norm": 0.36437028646469116, "learning_rate": 1.570727616256559e-05, "loss": 0.2945, "step": 16518 }, { "epoch": 0.3066949350779547, "grad_norm": 0.47071221470832825, "learning_rate": 1.5706318272242747e-05, "loss": 0.2912, "step": 16520 }, { "epoch": 0.3067320652153734, "grad_norm": 0.30297818779945374, "learning_rate": 1.570536030427401e-05, "loss": 0.2929, "step": 16522 }, { "epoch": 0.306769195352792, "grad_norm": 0.3967669606208801, "learning_rate": 1.57044022586724e-05, "loss": 0.3168, "step": 16524 }, { "epoch": 0.30680632549021064, "grad_norm": 0.2511221766471863, "learning_rate": 1.570344413545097e-05, "loss": 0.3666, "step": 16526 }, { "epoch": 0.30684345562762927, "grad_norm": 0.27303099632263184, "learning_rate": 1.5702485934622747e-05, "loss": 0.4436, "step": 16528 }, { "epoch": 0.3068805857650479, "grad_norm": 0.5541063547134399, "learning_rate": 1.570152765620077e-05, "loss": 0.191, "step": 16530 }, { "epoch": 0.3069177159024666, "grad_norm": 0.27020296454429626, "learning_rate": 1.5700569300198077e-05, "loss": 0.3549, "step": 16532 }, { "epoch": 0.3069548460398852, "grad_norm": 0.44569888710975647, "learning_rate": 1.5699610866627718e-05, "loss": 0.2356, "step": 16534 }, { "epoch": 0.30699197617730384, "grad_norm": 0.3283940553665161, "learning_rate": 1.5698652355502724e-05, "loss": 0.3459, "step": 16536 }, { "epoch": 0.30702910631472247, "grad_norm": 0.44204750657081604, "learning_rate": 1.5697693766836137e-05, "loss": 0.2603, "step": 16538 }, { "epoch": 0.3070662364521411, "grad_norm": 0.37685173749923706, "learning_rate": 1.5696735100641013e-05, "loss": 0.4798, "step": 16540 }, { "epoch": 0.3071033665895597, "grad_norm": 0.4294675886631012, "learning_rate": 1.5695776356930378e-05, "loss": 0.2816, "step": 16542 }, { "epoch": 0.3071404967269784, "grad_norm": 0.3429599106311798, "learning_rate": 1.569481753571729e-05, "loss": 0.3775, "step": 16544 }, { "epoch": 0.30717762686439704, "grad_norm": 0.36742743849754333, "learning_rate": 1.56938586370148e-05, "loss": 0.3747, "step": 16546 }, { "epoch": 0.30721475700181566, "grad_norm": 0.44516023993492126, "learning_rate": 1.5692899660835946e-05, "loss": 0.3403, "step": 16548 }, { "epoch": 0.3072518871392343, "grad_norm": 0.4365513026714325, "learning_rate": 1.5691940607193777e-05, "loss": 0.157, "step": 16550 }, { "epoch": 0.3072890172766529, "grad_norm": 0.4185921251773834, "learning_rate": 1.5690981476101347e-05, "loss": 0.329, "step": 16552 }, { "epoch": 0.3073261474140716, "grad_norm": 0.22240328788757324, "learning_rate": 1.569002226757171e-05, "loss": 0.242, "step": 16554 }, { "epoch": 0.30736327755149023, "grad_norm": 0.2623108923435211, "learning_rate": 1.5689062981617905e-05, "loss": 0.3067, "step": 16556 }, { "epoch": 0.30740040768890886, "grad_norm": 0.33413684368133545, "learning_rate": 1.5688103618252995e-05, "loss": 0.4104, "step": 16558 }, { "epoch": 0.3074375378263275, "grad_norm": 0.3714750111103058, "learning_rate": 1.568714417749004e-05, "loss": 0.2817, "step": 16560 }, { "epoch": 0.3074746679637461, "grad_norm": 0.38130223751068115, "learning_rate": 1.568618465934208e-05, "loss": 0.2496, "step": 16562 }, { "epoch": 0.30751179810116475, "grad_norm": 0.3975653648376465, "learning_rate": 1.5685225063822183e-05, "loss": 0.3612, "step": 16564 }, { "epoch": 0.30754892823858343, "grad_norm": 0.37250664830207825, "learning_rate": 1.5684265390943404e-05, "loss": 0.3116, "step": 16566 }, { "epoch": 0.30758605837600206, "grad_norm": 0.5297329425811768, "learning_rate": 1.5683305640718794e-05, "loss": 0.1213, "step": 16568 }, { "epoch": 0.3076231885134207, "grad_norm": 0.3852524161338806, "learning_rate": 1.5682345813161418e-05, "loss": 0.325, "step": 16570 }, { "epoch": 0.3076603186508393, "grad_norm": 0.39035290479660034, "learning_rate": 1.568138590828434e-05, "loss": 0.3929, "step": 16572 }, { "epoch": 0.30769744878825794, "grad_norm": 0.6014788150787354, "learning_rate": 1.5680425926100618e-05, "loss": 0.2191, "step": 16574 }, { "epoch": 0.3077345789256766, "grad_norm": 0.2949322760105133, "learning_rate": 1.567946586662331e-05, "loss": 0.2513, "step": 16576 }, { "epoch": 0.30777170906309526, "grad_norm": 0.45096710324287415, "learning_rate": 1.5678505729865485e-05, "loss": 0.3135, "step": 16578 }, { "epoch": 0.3078088392005139, "grad_norm": 0.40488123893737793, "learning_rate": 1.5677545515840205e-05, "loss": 0.3203, "step": 16580 }, { "epoch": 0.3078459693379325, "grad_norm": 0.4338582158088684, "learning_rate": 1.5676585224560537e-05, "loss": 0.1121, "step": 16582 }, { "epoch": 0.30788309947535114, "grad_norm": 0.28338631987571716, "learning_rate": 1.5675624856039552e-05, "loss": 0.3647, "step": 16584 }, { "epoch": 0.30792022961276977, "grad_norm": 0.3651292026042938, "learning_rate": 1.567466441029031e-05, "loss": 0.2897, "step": 16586 }, { "epoch": 0.30795735975018845, "grad_norm": 0.43862706422805786, "learning_rate": 1.5673703887325885e-05, "loss": 0.3559, "step": 16588 }, { "epoch": 0.3079944898876071, "grad_norm": 0.899421215057373, "learning_rate": 1.567274328715934e-05, "loss": 0.3258, "step": 16590 }, { "epoch": 0.3080316200250257, "grad_norm": 0.2669694125652313, "learning_rate": 1.5671782609803754e-05, "loss": 0.1478, "step": 16592 }, { "epoch": 0.30806875016244434, "grad_norm": 0.35610005259513855, "learning_rate": 1.5670821855272192e-05, "loss": 0.2901, "step": 16594 }, { "epoch": 0.30810588029986297, "grad_norm": 0.5791921615600586, "learning_rate": 1.5669861023577735e-05, "loss": 0.3681, "step": 16596 }, { "epoch": 0.30814301043728165, "grad_norm": 0.3730636239051819, "learning_rate": 1.5668900114733454e-05, "loss": 0.1845, "step": 16598 }, { "epoch": 0.3081801405747003, "grad_norm": 0.41708168387413025, "learning_rate": 1.566793912875242e-05, "loss": 0.3099, "step": 16600 }, { "epoch": 0.3082172707121189, "grad_norm": 0.29422029852867126, "learning_rate": 1.566697806564771e-05, "loss": 0.3004, "step": 16602 }, { "epoch": 0.30825440084953754, "grad_norm": 0.250308632850647, "learning_rate": 1.5666016925432405e-05, "loss": 0.158, "step": 16604 }, { "epoch": 0.30829153098695616, "grad_norm": 0.39087826013565063, "learning_rate": 1.566505570811958e-05, "loss": 0.2636, "step": 16606 }, { "epoch": 0.30832866112437485, "grad_norm": 0.32174766063690186, "learning_rate": 1.5664094413722318e-05, "loss": 0.3822, "step": 16608 }, { "epoch": 0.3083657912617935, "grad_norm": 0.5919029712677002, "learning_rate": 1.5663133042253696e-05, "loss": 0.1749, "step": 16610 }, { "epoch": 0.3084029213992121, "grad_norm": 0.6407782435417175, "learning_rate": 1.5662171593726797e-05, "loss": 0.4165, "step": 16612 }, { "epoch": 0.30844005153663073, "grad_norm": 0.30908292531967163, "learning_rate": 1.56612100681547e-05, "loss": 0.3558, "step": 16614 }, { "epoch": 0.30847718167404936, "grad_norm": 0.3452662229537964, "learning_rate": 1.5660248465550495e-05, "loss": 0.4225, "step": 16616 }, { "epoch": 0.308514311811468, "grad_norm": 0.2847616374492645, "learning_rate": 1.565928678592726e-05, "loss": 0.2013, "step": 16618 }, { "epoch": 0.3085514419488867, "grad_norm": 0.205544114112854, "learning_rate": 1.5658325029298084e-05, "loss": 0.3006, "step": 16620 }, { "epoch": 0.3085885720863053, "grad_norm": 0.40073904395103455, "learning_rate": 1.5657363195676055e-05, "loss": 0.3951, "step": 16622 }, { "epoch": 0.30862570222372393, "grad_norm": 0.37328073382377625, "learning_rate": 1.5656401285074255e-05, "loss": 0.3626, "step": 16624 }, { "epoch": 0.30866283236114256, "grad_norm": 0.38561737537384033, "learning_rate": 1.565543929750578e-05, "loss": 0.4373, "step": 16626 }, { "epoch": 0.3086999624985612, "grad_norm": 0.37617290019989014, "learning_rate": 1.5654477232983715e-05, "loss": 0.2897, "step": 16628 }, { "epoch": 0.30873709263597987, "grad_norm": 0.32271674275398254, "learning_rate": 1.565351509152115e-05, "loss": 0.2295, "step": 16630 }, { "epoch": 0.3087742227733985, "grad_norm": 0.2987178862094879, "learning_rate": 1.565255287313118e-05, "loss": 0.3438, "step": 16632 }, { "epoch": 0.3088113529108171, "grad_norm": 0.3464885652065277, "learning_rate": 1.56515905778269e-05, "loss": 0.2258, "step": 16634 }, { "epoch": 0.30884848304823576, "grad_norm": 0.24199305474758148, "learning_rate": 1.5650628205621397e-05, "loss": 0.0808, "step": 16636 }, { "epoch": 0.3088856131856544, "grad_norm": 0.44331419467926025, "learning_rate": 1.564966575652777e-05, "loss": 0.3605, "step": 16638 }, { "epoch": 0.308922743323073, "grad_norm": 0.33524084091186523, "learning_rate": 1.5648703230559115e-05, "loss": 0.1736, "step": 16640 }, { "epoch": 0.3089598734604917, "grad_norm": 0.19516101479530334, "learning_rate": 1.564774062772853e-05, "loss": 0.3516, "step": 16642 }, { "epoch": 0.3089970035979103, "grad_norm": 0.34480419754981995, "learning_rate": 1.564677794804911e-05, "loss": 0.3492, "step": 16644 }, { "epoch": 0.30903413373532895, "grad_norm": 1.7767935991287231, "learning_rate": 1.5645815191533956e-05, "loss": 0.3228, "step": 16646 }, { "epoch": 0.3090712638727476, "grad_norm": 0.4164237678050995, "learning_rate": 1.564485235819617e-05, "loss": 0.3073, "step": 16648 }, { "epoch": 0.3091083940101662, "grad_norm": 0.49734604358673096, "learning_rate": 1.5643889448048853e-05, "loss": 0.3002, "step": 16650 }, { "epoch": 0.3091455241475849, "grad_norm": 0.380039244890213, "learning_rate": 1.5642926461105106e-05, "loss": 0.2654, "step": 16652 }, { "epoch": 0.3091826542850035, "grad_norm": 0.44405633211135864, "learning_rate": 1.5641963397378032e-05, "loss": 0.2832, "step": 16654 }, { "epoch": 0.30921978442242215, "grad_norm": 0.33024150133132935, "learning_rate": 1.5641000256880734e-05, "loss": 0.4254, "step": 16656 }, { "epoch": 0.3092569145598408, "grad_norm": 0.30719926953315735, "learning_rate": 1.564003703962632e-05, "loss": 0.3623, "step": 16658 }, { "epoch": 0.3092940446972594, "grad_norm": 0.4838407635688782, "learning_rate": 1.5639073745627894e-05, "loss": 0.316, "step": 16660 }, { "epoch": 0.30933117483467804, "grad_norm": 0.3967163562774658, "learning_rate": 1.5638110374898567e-05, "loss": 0.3132, "step": 16662 }, { "epoch": 0.3093683049720967, "grad_norm": 0.23284919559955597, "learning_rate": 1.5637146927451447e-05, "loss": 0.2195, "step": 16664 }, { "epoch": 0.30940543510951535, "grad_norm": 0.3349030315876007, "learning_rate": 1.563618340329964e-05, "loss": 0.2879, "step": 16666 }, { "epoch": 0.309442565246934, "grad_norm": 0.4190990626811981, "learning_rate": 1.5635219802456264e-05, "loss": 0.366, "step": 16668 }, { "epoch": 0.3094796953843526, "grad_norm": 0.3902192711830139, "learning_rate": 1.563425612493442e-05, "loss": 0.2041, "step": 16670 }, { "epoch": 0.30951682552177123, "grad_norm": 0.3882218897342682, "learning_rate": 1.5633292370747232e-05, "loss": 0.2131, "step": 16672 }, { "epoch": 0.3095539556591899, "grad_norm": 0.6261725425720215, "learning_rate": 1.5632328539907806e-05, "loss": 0.1961, "step": 16674 }, { "epoch": 0.30959108579660854, "grad_norm": 0.3968117833137512, "learning_rate": 1.5631364632429255e-05, "loss": 0.2987, "step": 16676 }, { "epoch": 0.3096282159340272, "grad_norm": 0.32582974433898926, "learning_rate": 1.5630400648324705e-05, "loss": 0.303, "step": 16678 }, { "epoch": 0.3096653460714458, "grad_norm": 0.34196314215660095, "learning_rate": 1.5629436587607266e-05, "loss": 0.3422, "step": 16680 }, { "epoch": 0.30970247620886443, "grad_norm": 0.3452033996582031, "learning_rate": 1.5628472450290058e-05, "loss": 0.2331, "step": 16682 }, { "epoch": 0.3097396063462831, "grad_norm": 0.3539777994155884, "learning_rate": 1.5627508236386195e-05, "loss": 0.2744, "step": 16684 }, { "epoch": 0.30977673648370174, "grad_norm": 0.3950103521347046, "learning_rate": 1.5626543945908805e-05, "loss": 0.3383, "step": 16686 }, { "epoch": 0.30981386662112037, "grad_norm": 0.3352287709712982, "learning_rate": 1.5625579578871004e-05, "loss": 0.3235, "step": 16688 }, { "epoch": 0.309850996758539, "grad_norm": 0.6298438906669617, "learning_rate": 1.5624615135285917e-05, "loss": 0.2235, "step": 16690 }, { "epoch": 0.3098881268959576, "grad_norm": 0.45068278908729553, "learning_rate": 1.5623650615166664e-05, "loss": 0.4382, "step": 16692 }, { "epoch": 0.30992525703337626, "grad_norm": 0.3961672782897949, "learning_rate": 1.5622686018526373e-05, "loss": 0.1926, "step": 16694 }, { "epoch": 0.30996238717079494, "grad_norm": 0.5014447569847107, "learning_rate": 1.5621721345378166e-05, "loss": 0.3701, "step": 16696 }, { "epoch": 0.30999951730821357, "grad_norm": 0.42759397625923157, "learning_rate": 1.562075659573517e-05, "loss": 0.3959, "step": 16698 }, { "epoch": 0.3100366474456322, "grad_norm": 0.43737176060676575, "learning_rate": 1.5619791769610512e-05, "loss": 0.3423, "step": 16700 }, { "epoch": 0.3100737775830508, "grad_norm": 0.29817995429039, "learning_rate": 1.5618826867017322e-05, "loss": 0.3211, "step": 16702 }, { "epoch": 0.31011090772046945, "grad_norm": 0.3803866505622864, "learning_rate": 1.561786188796873e-05, "loss": 0.2431, "step": 16704 }, { "epoch": 0.31014803785788814, "grad_norm": 0.551832914352417, "learning_rate": 1.5616896832477864e-05, "loss": 0.4096, "step": 16706 }, { "epoch": 0.31018516799530677, "grad_norm": 0.388785719871521, "learning_rate": 1.5615931700557856e-05, "loss": 0.295, "step": 16708 }, { "epoch": 0.3102222981327254, "grad_norm": 0.4111138582229614, "learning_rate": 1.5614966492221843e-05, "loss": 0.375, "step": 16710 }, { "epoch": 0.310259428270144, "grad_norm": 0.384615421295166, "learning_rate": 1.561400120748295e-05, "loss": 0.2436, "step": 16712 }, { "epoch": 0.31029655840756265, "grad_norm": 0.5951463580131531, "learning_rate": 1.5613035846354318e-05, "loss": 0.3952, "step": 16714 }, { "epoch": 0.3103336885449813, "grad_norm": 0.3531409502029419, "learning_rate": 1.561207040884908e-05, "loss": 0.2586, "step": 16716 }, { "epoch": 0.31037081868239996, "grad_norm": 0.664386510848999, "learning_rate": 1.5611104894980377e-05, "loss": 0.2643, "step": 16718 }, { "epoch": 0.3104079488198186, "grad_norm": 0.20239783823490143, "learning_rate": 1.561013930476134e-05, "loss": 0.1795, "step": 16720 }, { "epoch": 0.3104450789572372, "grad_norm": 0.4996491074562073, "learning_rate": 1.560917363820511e-05, "loss": 0.1277, "step": 16722 }, { "epoch": 0.31048220909465585, "grad_norm": 0.3859277069568634, "learning_rate": 1.5608207895324837e-05, "loss": 0.1541, "step": 16724 }, { "epoch": 0.3105193392320745, "grad_norm": 0.4379408359527588, "learning_rate": 1.5607242076133646e-05, "loss": 0.4843, "step": 16726 }, { "epoch": 0.31055646936949316, "grad_norm": 0.5015046000480652, "learning_rate": 1.5606276180644685e-05, "loss": 0.2584, "step": 16728 }, { "epoch": 0.3105935995069118, "grad_norm": 0.2655382454395294, "learning_rate": 1.5605310208871107e-05, "loss": 0.4608, "step": 16730 }, { "epoch": 0.3106307296443304, "grad_norm": 0.3892948031425476, "learning_rate": 1.5604344160826033e-05, "loss": 0.1443, "step": 16732 }, { "epoch": 0.31066785978174905, "grad_norm": 0.38978904485702515, "learning_rate": 1.560337803652263e-05, "loss": 0.3282, "step": 16734 }, { "epoch": 0.3107049899191677, "grad_norm": 0.3597666323184967, "learning_rate": 1.560241183597404e-05, "loss": 0.2989, "step": 16736 }, { "epoch": 0.3107421200565863, "grad_norm": 0.315565288066864, "learning_rate": 1.5601445559193397e-05, "loss": 0.2377, "step": 16738 }, { "epoch": 0.310779250194005, "grad_norm": 0.30008500814437866, "learning_rate": 1.5600479206193867e-05, "loss": 0.3912, "step": 16740 }, { "epoch": 0.3108163803314236, "grad_norm": 0.533191978931427, "learning_rate": 1.5599512776988586e-05, "loss": 0.3127, "step": 16742 }, { "epoch": 0.31085351046884224, "grad_norm": 0.3123820424079895, "learning_rate": 1.5598546271590705e-05, "loss": 0.1398, "step": 16744 }, { "epoch": 0.31089064060626087, "grad_norm": 0.3105533719062805, "learning_rate": 1.559757969001338e-05, "loss": 0.1833, "step": 16746 }, { "epoch": 0.3109277707436795, "grad_norm": 0.36196911334991455, "learning_rate": 1.5596613032269768e-05, "loss": 0.1458, "step": 16748 }, { "epoch": 0.3109649008810982, "grad_norm": 0.40094897150993347, "learning_rate": 1.559564629837301e-05, "loss": 0.2116, "step": 16750 }, { "epoch": 0.3110020310185168, "grad_norm": 0.3503999710083008, "learning_rate": 1.559467948833627e-05, "loss": 0.4452, "step": 16752 }, { "epoch": 0.31103916115593544, "grad_norm": 0.29853150248527527, "learning_rate": 1.5593712602172702e-05, "loss": 0.2372, "step": 16754 }, { "epoch": 0.31107629129335407, "grad_norm": 0.38461723923683167, "learning_rate": 1.559274563989546e-05, "loss": 0.2793, "step": 16756 }, { "epoch": 0.3111134214307727, "grad_norm": 0.46388331055641174, "learning_rate": 1.5591778601517698e-05, "loss": 0.2289, "step": 16758 }, { "epoch": 0.3111505515681914, "grad_norm": 0.3509833514690399, "learning_rate": 1.559081148705258e-05, "loss": 0.3896, "step": 16760 }, { "epoch": 0.31118768170561, "grad_norm": 0.4400383234024048, "learning_rate": 1.5589844296513266e-05, "loss": 0.3793, "step": 16762 }, { "epoch": 0.31122481184302864, "grad_norm": 0.3094305694103241, "learning_rate": 1.5588877029912914e-05, "loss": 0.3506, "step": 16764 }, { "epoch": 0.31126194198044727, "grad_norm": 0.41437506675720215, "learning_rate": 1.5587909687264686e-05, "loss": 0.3179, "step": 16766 }, { "epoch": 0.3112990721178659, "grad_norm": 0.45820292830467224, "learning_rate": 1.5586942268581747e-05, "loss": 0.3012, "step": 16768 }, { "epoch": 0.3113362022552845, "grad_norm": 0.38028040528297424, "learning_rate": 1.558597477387725e-05, "loss": 0.3794, "step": 16770 }, { "epoch": 0.3113733323927032, "grad_norm": 0.48959627747535706, "learning_rate": 1.5585007203164376e-05, "loss": 0.2376, "step": 16772 }, { "epoch": 0.31141046253012183, "grad_norm": 0.43235957622528076, "learning_rate": 1.5584039556456284e-05, "loss": 0.2732, "step": 16774 }, { "epoch": 0.31144759266754046, "grad_norm": 0.33892425894737244, "learning_rate": 1.558307183376614e-05, "loss": 0.4299, "step": 16776 }, { "epoch": 0.3114847228049591, "grad_norm": 0.3762683570384979, "learning_rate": 1.5582104035107105e-05, "loss": 0.2682, "step": 16778 }, { "epoch": 0.3115218529423777, "grad_norm": 0.43319469690322876, "learning_rate": 1.558113616049236e-05, "loss": 0.293, "step": 16780 }, { "epoch": 0.3115589830797964, "grad_norm": 0.429830938577652, "learning_rate": 1.558016820993507e-05, "loss": 0.2874, "step": 16782 }, { "epoch": 0.31159611321721503, "grad_norm": 0.4593583941459656, "learning_rate": 1.55792001834484e-05, "loss": 0.3934, "step": 16784 }, { "epoch": 0.31163324335463366, "grad_norm": 0.3336837887763977, "learning_rate": 1.5578232081045532e-05, "loss": 0.2438, "step": 16786 }, { "epoch": 0.3116703734920523, "grad_norm": 0.28354746103286743, "learning_rate": 1.557726390273963e-05, "loss": 0.2287, "step": 16788 }, { "epoch": 0.3117075036294709, "grad_norm": 0.33228740096092224, "learning_rate": 1.5576295648543876e-05, "loss": 0.2207, "step": 16790 }, { "epoch": 0.31174463376688955, "grad_norm": 0.32588300108909607, "learning_rate": 1.557532731847144e-05, "loss": 0.2904, "step": 16792 }, { "epoch": 0.31178176390430823, "grad_norm": 0.3487551212310791, "learning_rate": 1.55743589125355e-05, "loss": 0.1579, "step": 16794 }, { "epoch": 0.31181889404172686, "grad_norm": 0.2647467255592346, "learning_rate": 1.557339043074923e-05, "loss": 0.2663, "step": 16796 }, { "epoch": 0.3118560241791455, "grad_norm": 0.34950193762779236, "learning_rate": 1.5572421873125812e-05, "loss": 0.1523, "step": 16798 }, { "epoch": 0.3118931543165641, "grad_norm": 0.33119437098503113, "learning_rate": 1.557145323967842e-05, "loss": 0.2687, "step": 16800 }, { "epoch": 0.31193028445398274, "grad_norm": 0.643551230430603, "learning_rate": 1.5570484530420243e-05, "loss": 0.2939, "step": 16802 }, { "epoch": 0.3119674145914014, "grad_norm": 0.3724636435508728, "learning_rate": 1.5569515745364455e-05, "loss": 0.3898, "step": 16804 }, { "epoch": 0.31200454472882005, "grad_norm": 0.4132455289363861, "learning_rate": 1.556854688452424e-05, "loss": 0.3013, "step": 16806 }, { "epoch": 0.3120416748662387, "grad_norm": 0.37189170718193054, "learning_rate": 1.5567577947912783e-05, "loss": 0.2683, "step": 16808 }, { "epoch": 0.3120788050036573, "grad_norm": 0.6936743259429932, "learning_rate": 1.5566608935543265e-05, "loss": 0.313, "step": 16810 }, { "epoch": 0.31211593514107594, "grad_norm": 0.3180742561817169, "learning_rate": 1.5565639847428876e-05, "loss": 0.4826, "step": 16812 }, { "epoch": 0.31215306527849457, "grad_norm": 0.38176193833351135, "learning_rate": 1.5564670683582797e-05, "loss": 0.0578, "step": 16814 }, { "epoch": 0.31219019541591325, "grad_norm": 0.3374461531639099, "learning_rate": 1.556370144401822e-05, "loss": 0.2773, "step": 16816 }, { "epoch": 0.3122273255533319, "grad_norm": 0.5728803277015686, "learning_rate": 1.556273212874833e-05, "loss": 0.0859, "step": 16818 }, { "epoch": 0.3122644556907505, "grad_norm": 0.3416641354560852, "learning_rate": 1.5561762737786317e-05, "loss": 0.4054, "step": 16820 }, { "epoch": 0.31230158582816914, "grad_norm": 0.5831412076950073, "learning_rate": 1.5560793271145373e-05, "loss": 0.1905, "step": 16822 }, { "epoch": 0.31233871596558777, "grad_norm": 0.414559006690979, "learning_rate": 1.5559823728838694e-05, "loss": 0.5655, "step": 16824 }, { "epoch": 0.31237584610300645, "grad_norm": 0.42011094093322754, "learning_rate": 1.5558854110879464e-05, "loss": 0.2772, "step": 16826 }, { "epoch": 0.3124129762404251, "grad_norm": 0.4071503281593323, "learning_rate": 1.5557884417280876e-05, "loss": 0.3225, "step": 16828 }, { "epoch": 0.3124501063778437, "grad_norm": 0.22644047439098358, "learning_rate": 1.5556914648056138e-05, "loss": 0.1915, "step": 16830 }, { "epoch": 0.31248723651526233, "grad_norm": 0.3112654387950897, "learning_rate": 1.555594480321843e-05, "loss": 0.2623, "step": 16832 }, { "epoch": 0.31252436665268096, "grad_norm": 0.4038960337638855, "learning_rate": 1.5554974882780955e-05, "loss": 0.2931, "step": 16834 }, { "epoch": 0.31256149679009965, "grad_norm": 0.2340363711118698, "learning_rate": 1.5554004886756914e-05, "loss": 0.3117, "step": 16836 }, { "epoch": 0.3125986269275183, "grad_norm": 0.19384273886680603, "learning_rate": 1.5553034815159502e-05, "loss": 0.1922, "step": 16838 }, { "epoch": 0.3126357570649369, "grad_norm": 0.3948344886302948, "learning_rate": 1.5552064668001917e-05, "loss": 0.324, "step": 16840 }, { "epoch": 0.31267288720235553, "grad_norm": 0.35939788818359375, "learning_rate": 1.5551094445297365e-05, "loss": 0.3553, "step": 16842 }, { "epoch": 0.31271001733977416, "grad_norm": 0.3781391680240631, "learning_rate": 1.5550124147059046e-05, "loss": 0.1575, "step": 16844 }, { "epoch": 0.3127471474771928, "grad_norm": 0.2912297248840332, "learning_rate": 1.5549153773300157e-05, "loss": 0.2537, "step": 16846 }, { "epoch": 0.3127842776146115, "grad_norm": 0.3970768451690674, "learning_rate": 1.5548183324033912e-05, "loss": 0.4899, "step": 16848 }, { "epoch": 0.3128214077520301, "grad_norm": 0.5380943417549133, "learning_rate": 1.5547212799273508e-05, "loss": 0.2991, "step": 16850 }, { "epoch": 0.31285853788944873, "grad_norm": 0.41640540957450867, "learning_rate": 1.5546242199032155e-05, "loss": 0.5422, "step": 16852 }, { "epoch": 0.31289566802686736, "grad_norm": 0.4586479365825653, "learning_rate": 1.554527152332306e-05, "loss": 0.3226, "step": 16854 }, { "epoch": 0.312932798164286, "grad_norm": 0.5198866724967957, "learning_rate": 1.5544300772159425e-05, "loss": 0.4153, "step": 16856 }, { "epoch": 0.31296992830170467, "grad_norm": 0.34491318464279175, "learning_rate": 1.5543329945554467e-05, "loss": 0.2755, "step": 16858 }, { "epoch": 0.3130070584391233, "grad_norm": 0.33182820677757263, "learning_rate": 1.554235904352139e-05, "loss": 0.3102, "step": 16860 }, { "epoch": 0.3130441885765419, "grad_norm": 0.32501810789108276, "learning_rate": 1.5541388066073412e-05, "loss": 0.2889, "step": 16862 }, { "epoch": 0.31308131871396055, "grad_norm": 0.3546353876590729, "learning_rate": 1.5540417013223734e-05, "loss": 0.425, "step": 16864 }, { "epoch": 0.3131184488513792, "grad_norm": 0.3657384514808655, "learning_rate": 1.553944588498558e-05, "loss": 0.2205, "step": 16866 }, { "epoch": 0.3131555789887978, "grad_norm": 0.39293238520622253, "learning_rate": 1.5538474681372164e-05, "loss": 0.3824, "step": 16868 }, { "epoch": 0.3131927091262165, "grad_norm": 0.303480863571167, "learning_rate": 1.553750340239669e-05, "loss": 0.2991, "step": 16870 }, { "epoch": 0.3132298392636351, "grad_norm": 0.3296826481819153, "learning_rate": 1.5536532048072386e-05, "loss": 0.3682, "step": 16872 }, { "epoch": 0.31326696940105375, "grad_norm": 0.34540224075317383, "learning_rate": 1.5535560618412464e-05, "loss": 0.4344, "step": 16874 }, { "epoch": 0.3133040995384724, "grad_norm": 0.33837801218032837, "learning_rate": 1.553458911343014e-05, "loss": 0.2586, "step": 16876 }, { "epoch": 0.313341229675891, "grad_norm": 0.3564090132713318, "learning_rate": 1.553361753313864e-05, "loss": 0.3548, "step": 16878 }, { "epoch": 0.3133783598133097, "grad_norm": 0.5724231600761414, "learning_rate": 1.553264587755118e-05, "loss": 0.2972, "step": 16880 }, { "epoch": 0.3134154899507283, "grad_norm": 0.3740960955619812, "learning_rate": 1.5531674146680978e-05, "loss": 0.2756, "step": 16882 }, { "epoch": 0.31345262008814695, "grad_norm": 0.2462249994277954, "learning_rate": 1.5530702340541264e-05, "loss": 0.261, "step": 16884 }, { "epoch": 0.3134897502255656, "grad_norm": 0.503528892993927, "learning_rate": 1.5529730459145255e-05, "loss": 0.2253, "step": 16886 }, { "epoch": 0.3135268803629842, "grad_norm": 0.408740758895874, "learning_rate": 1.552875850250618e-05, "loss": 0.2096, "step": 16888 }, { "epoch": 0.31356401050040283, "grad_norm": 0.34150925278663635, "learning_rate": 1.552778647063726e-05, "loss": 0.4244, "step": 16890 }, { "epoch": 0.3136011406378215, "grad_norm": 0.5568698644638062, "learning_rate": 1.5526814363551728e-05, "loss": 0.3533, "step": 16892 }, { "epoch": 0.31363827077524015, "grad_norm": 0.32314637303352356, "learning_rate": 1.5525842181262806e-05, "loss": 0.38, "step": 16894 }, { "epoch": 0.3136754009126588, "grad_norm": 0.5012269020080566, "learning_rate": 1.552486992378372e-05, "loss": 0.3402, "step": 16896 }, { "epoch": 0.3137125310500774, "grad_norm": 0.37399527430534363, "learning_rate": 1.5523897591127706e-05, "loss": 0.2211, "step": 16898 }, { "epoch": 0.31374966118749603, "grad_norm": 0.41073545813560486, "learning_rate": 1.5522925183307994e-05, "loss": 0.2781, "step": 16900 }, { "epoch": 0.3137867913249147, "grad_norm": 0.426268994808197, "learning_rate": 1.552195270033781e-05, "loss": 0.3287, "step": 16902 }, { "epoch": 0.31382392146233334, "grad_norm": 0.3344692587852478, "learning_rate": 1.552098014223039e-05, "loss": 0.3983, "step": 16904 }, { "epoch": 0.313861051599752, "grad_norm": 0.4050685167312622, "learning_rate": 1.5520007508998967e-05, "loss": 0.222, "step": 16906 }, { "epoch": 0.3138981817371706, "grad_norm": 0.4483424425125122, "learning_rate": 1.5519034800656777e-05, "loss": 0.2489, "step": 16908 }, { "epoch": 0.31393531187458923, "grad_norm": 0.28396525979042053, "learning_rate": 1.551806201721706e-05, "loss": 0.3253, "step": 16910 }, { "epoch": 0.3139724420120079, "grad_norm": 0.3871673047542572, "learning_rate": 1.5517089158693036e-05, "loss": 0.361, "step": 16912 }, { "epoch": 0.31400957214942654, "grad_norm": 0.42115357518196106, "learning_rate": 1.551611622509796e-05, "loss": 0.3653, "step": 16914 }, { "epoch": 0.31404670228684517, "grad_norm": 0.37125369906425476, "learning_rate": 1.5515143216445068e-05, "loss": 0.2533, "step": 16916 }, { "epoch": 0.3140838324242638, "grad_norm": 0.58906489610672, "learning_rate": 1.5514170132747596e-05, "loss": 0.1792, "step": 16918 }, { "epoch": 0.3141209625616824, "grad_norm": 0.41949182748794556, "learning_rate": 1.551319697401878e-05, "loss": 0.2152, "step": 16920 }, { "epoch": 0.31415809269910105, "grad_norm": 0.46473515033721924, "learning_rate": 1.551222374027187e-05, "loss": 0.2991, "step": 16922 }, { "epoch": 0.31419522283651974, "grad_norm": 0.4395226538181305, "learning_rate": 1.551125043152011e-05, "loss": 0.4531, "step": 16924 }, { "epoch": 0.31423235297393837, "grad_norm": 0.41832226514816284, "learning_rate": 1.5510277047776738e-05, "loss": 0.224, "step": 16926 }, { "epoch": 0.314269483111357, "grad_norm": 0.3494199514389038, "learning_rate": 1.5509303589054995e-05, "loss": 0.6301, "step": 16928 }, { "epoch": 0.3143066132487756, "grad_norm": 0.3696920573711395, "learning_rate": 1.5508330055368135e-05, "loss": 0.3947, "step": 16930 }, { "epoch": 0.31434374338619425, "grad_norm": 0.3260973393917084, "learning_rate": 1.5507356446729408e-05, "loss": 0.373, "step": 16932 }, { "epoch": 0.31438087352361294, "grad_norm": 0.3837592303752899, "learning_rate": 1.550638276315205e-05, "loss": 0.4387, "step": 16934 }, { "epoch": 0.31441800366103156, "grad_norm": 0.42772239446640015, "learning_rate": 1.550540900464932e-05, "loss": 0.1604, "step": 16936 }, { "epoch": 0.3144551337984502, "grad_norm": 0.3555832505226135, "learning_rate": 1.550443517123446e-05, "loss": 0.2962, "step": 16938 }, { "epoch": 0.3144922639358688, "grad_norm": 0.4194395840167999, "learning_rate": 1.5503461262920726e-05, "loss": 0.2955, "step": 16940 }, { "epoch": 0.31452939407328745, "grad_norm": 0.36514461040496826, "learning_rate": 1.550248727972137e-05, "loss": 0.1946, "step": 16942 }, { "epoch": 0.3145665242107061, "grad_norm": 0.2874706983566284, "learning_rate": 1.5501513221649643e-05, "loss": 0.2431, "step": 16944 }, { "epoch": 0.31460365434812476, "grad_norm": 0.33884042501449585, "learning_rate": 1.5500539088718802e-05, "loss": 0.3121, "step": 16946 }, { "epoch": 0.3146407844855434, "grad_norm": 0.3478817641735077, "learning_rate": 1.54995648809421e-05, "loss": 0.3009, "step": 16948 }, { "epoch": 0.314677914622962, "grad_norm": 0.3149224817752838, "learning_rate": 1.549859059833279e-05, "loss": 0.1553, "step": 16950 }, { "epoch": 0.31471504476038065, "grad_norm": 0.3089819848537445, "learning_rate": 1.5497616240904135e-05, "loss": 0.3595, "step": 16952 }, { "epoch": 0.3147521748977993, "grad_norm": 0.323050320148468, "learning_rate": 1.5496641808669387e-05, "loss": 0.3407, "step": 16954 }, { "epoch": 0.31478930503521796, "grad_norm": 0.3516741394996643, "learning_rate": 1.5495667301641813e-05, "loss": 0.6208, "step": 16956 }, { "epoch": 0.3148264351726366, "grad_norm": 0.312967449426651, "learning_rate": 1.5494692719834668e-05, "loss": 0.2443, "step": 16958 }, { "epoch": 0.3148635653100552, "grad_norm": 0.3278021216392517, "learning_rate": 1.549371806326121e-05, "loss": 0.3215, "step": 16960 }, { "epoch": 0.31490069544747384, "grad_norm": 0.3084736764431, "learning_rate": 1.5492743331934708e-05, "loss": 0.4914, "step": 16962 }, { "epoch": 0.3149378255848925, "grad_norm": 0.4344116151332855, "learning_rate": 1.5491768525868417e-05, "loss": 0.4472, "step": 16964 }, { "epoch": 0.3149749557223111, "grad_norm": 0.2994072139263153, "learning_rate": 1.5490793645075612e-05, "loss": 0.2869, "step": 16966 }, { "epoch": 0.3150120858597298, "grad_norm": 0.4782739579677582, "learning_rate": 1.548981868956955e-05, "loss": 0.1901, "step": 16968 }, { "epoch": 0.3150492159971484, "grad_norm": 0.26447317004203796, "learning_rate": 1.5488843659363498e-05, "loss": 0.3519, "step": 16970 }, { "epoch": 0.31508634613456704, "grad_norm": 0.41366568207740784, "learning_rate": 1.5487868554470725e-05, "loss": 0.3481, "step": 16972 }, { "epoch": 0.31512347627198567, "grad_norm": 0.35360974073410034, "learning_rate": 1.54868933749045e-05, "loss": 0.369, "step": 16974 }, { "epoch": 0.3151606064094043, "grad_norm": 0.2950894832611084, "learning_rate": 1.5485918120678095e-05, "loss": 0.1567, "step": 16976 }, { "epoch": 0.315197736546823, "grad_norm": 0.48664331436157227, "learning_rate": 1.5484942791804772e-05, "loss": 0.2836, "step": 16978 }, { "epoch": 0.3152348666842416, "grad_norm": 0.3096611797809601, "learning_rate": 1.548396738829781e-05, "loss": 0.3211, "step": 16980 }, { "epoch": 0.31527199682166024, "grad_norm": 0.287286639213562, "learning_rate": 1.5482991910170474e-05, "loss": 0.2503, "step": 16982 }, { "epoch": 0.31530912695907887, "grad_norm": 0.36283108592033386, "learning_rate": 1.5482016357436044e-05, "loss": 0.2339, "step": 16984 }, { "epoch": 0.3153462570964975, "grad_norm": 0.4548643231391907, "learning_rate": 1.5481040730107793e-05, "loss": 0.2347, "step": 16986 }, { "epoch": 0.3153833872339162, "grad_norm": 0.23737511038780212, "learning_rate": 1.5480065028198993e-05, "loss": 0.3196, "step": 16988 }, { "epoch": 0.3154205173713348, "grad_norm": 0.39252200722694397, "learning_rate": 1.5479089251722924e-05, "loss": 0.1878, "step": 16990 }, { "epoch": 0.31545764750875344, "grad_norm": 0.5081819295883179, "learning_rate": 1.547811340069286e-05, "loss": 0.3945, "step": 16992 }, { "epoch": 0.31549477764617206, "grad_norm": 0.47546496987342834, "learning_rate": 1.5477137475122087e-05, "loss": 0.2928, "step": 16994 }, { "epoch": 0.3155319077835907, "grad_norm": 0.38005730509757996, "learning_rate": 1.5476161475023876e-05, "loss": 0.2861, "step": 16996 }, { "epoch": 0.3155690379210093, "grad_norm": 0.4586920142173767, "learning_rate": 1.5475185400411505e-05, "loss": 0.2932, "step": 16998 }, { "epoch": 0.315606168058428, "grad_norm": 0.3637450635433197, "learning_rate": 1.5474209251298266e-05, "loss": 0.2526, "step": 17000 }, { "epoch": 0.31564329819584663, "grad_norm": 0.44537630677223206, "learning_rate": 1.5473233027697435e-05, "loss": 0.1664, "step": 17002 }, { "epoch": 0.31568042833326526, "grad_norm": 0.5144463777542114, "learning_rate": 1.5472256729622296e-05, "loss": 0.3907, "step": 17004 }, { "epoch": 0.3157175584706839, "grad_norm": 0.3204866051673889, "learning_rate": 1.5471280357086137e-05, "loss": 0.1882, "step": 17006 }, { "epoch": 0.3157546886081025, "grad_norm": 0.6256248950958252, "learning_rate": 1.547030391010224e-05, "loss": 0.488, "step": 17008 }, { "epoch": 0.3157918187455212, "grad_norm": 0.44798097014427185, "learning_rate": 1.5469327388683892e-05, "loss": 0.254, "step": 17010 }, { "epoch": 0.31582894888293983, "grad_norm": 0.45150575041770935, "learning_rate": 1.546835079284438e-05, "loss": 0.2003, "step": 17012 }, { "epoch": 0.31586607902035846, "grad_norm": 0.2642674744129181, "learning_rate": 1.5467374122596993e-05, "loss": 0.2592, "step": 17014 }, { "epoch": 0.3159032091577771, "grad_norm": 0.45391687750816345, "learning_rate": 1.546639737795502e-05, "loss": 0.3936, "step": 17016 }, { "epoch": 0.3159403392951957, "grad_norm": 0.40966805815696716, "learning_rate": 1.5465420558931754e-05, "loss": 0.2343, "step": 17018 }, { "epoch": 0.31597746943261434, "grad_norm": 0.38056161999702454, "learning_rate": 1.5464443665540487e-05, "loss": 0.2579, "step": 17020 }, { "epoch": 0.31601459957003303, "grad_norm": 0.24162164330482483, "learning_rate": 1.5463466697794506e-05, "loss": 0.1599, "step": 17022 }, { "epoch": 0.31605172970745166, "grad_norm": 0.2503904104232788, "learning_rate": 1.546248965570711e-05, "loss": 0.3272, "step": 17024 }, { "epoch": 0.3160888598448703, "grad_norm": 0.2907984256744385, "learning_rate": 1.5461512539291594e-05, "loss": 0.3525, "step": 17026 }, { "epoch": 0.3161259899822889, "grad_norm": 0.24068839848041534, "learning_rate": 1.5460535348561245e-05, "loss": 0.4553, "step": 17028 }, { "epoch": 0.31616312011970754, "grad_norm": 0.3070763349533081, "learning_rate": 1.545955808352937e-05, "loss": 0.1747, "step": 17030 }, { "epoch": 0.3162002502571262, "grad_norm": 0.6431100368499756, "learning_rate": 1.5458580744209263e-05, "loss": 0.253, "step": 17032 }, { "epoch": 0.31623738039454485, "grad_norm": 0.3573244512081146, "learning_rate": 1.5457603330614223e-05, "loss": 0.378, "step": 17034 }, { "epoch": 0.3162745105319635, "grad_norm": 0.3873637914657593, "learning_rate": 1.5456625842757547e-05, "loss": 0.171, "step": 17036 }, { "epoch": 0.3163116406693821, "grad_norm": 0.4575580954551697, "learning_rate": 1.5455648280652542e-05, "loss": 0.3128, "step": 17038 }, { "epoch": 0.31634877080680074, "grad_norm": 0.3653605282306671, "learning_rate": 1.5454670644312502e-05, "loss": 0.3308, "step": 17040 }, { "epoch": 0.31638590094421937, "grad_norm": 0.29542601108551025, "learning_rate": 1.5453692933750733e-05, "loss": 0.0634, "step": 17042 }, { "epoch": 0.31642303108163805, "grad_norm": 0.29997730255126953, "learning_rate": 1.5452715148980542e-05, "loss": 0.2642, "step": 17044 }, { "epoch": 0.3164601612190567, "grad_norm": 0.6037067770957947, "learning_rate": 1.5451737290015226e-05, "loss": 0.1598, "step": 17046 }, { "epoch": 0.3164972913564753, "grad_norm": 0.3124706745147705, "learning_rate": 1.54507593568681e-05, "loss": 0.5156, "step": 17048 }, { "epoch": 0.31653442149389394, "grad_norm": 0.30188944935798645, "learning_rate": 1.5449781349552463e-05, "loss": 0.196, "step": 17050 }, { "epoch": 0.31657155163131256, "grad_norm": 0.2679090201854706, "learning_rate": 1.5448803268081623e-05, "loss": 0.174, "step": 17052 }, { "epoch": 0.31660868176873125, "grad_norm": 0.2522358000278473, "learning_rate": 1.5447825112468896e-05, "loss": 0.2339, "step": 17054 }, { "epoch": 0.3166458119061499, "grad_norm": 0.3371419608592987, "learning_rate": 1.5446846882727585e-05, "loss": 0.3042, "step": 17056 }, { "epoch": 0.3166829420435685, "grad_norm": 0.3254573941230774, "learning_rate": 1.5445868578871003e-05, "loss": 0.3273, "step": 17058 }, { "epoch": 0.31672007218098713, "grad_norm": 0.4566475450992584, "learning_rate": 1.5444890200912466e-05, "loss": 0.406, "step": 17060 }, { "epoch": 0.31675720231840576, "grad_norm": 0.32688191533088684, "learning_rate": 1.5443911748865278e-05, "loss": 0.3158, "step": 17062 }, { "epoch": 0.31679433245582445, "grad_norm": 0.3352530300617218, "learning_rate": 1.544293322274276e-05, "loss": 0.3865, "step": 17064 }, { "epoch": 0.3168314625932431, "grad_norm": 0.2378959059715271, "learning_rate": 1.5441954622558225e-05, "loss": 0.1334, "step": 17066 }, { "epoch": 0.3168685927306617, "grad_norm": 0.5573373436927795, "learning_rate": 1.5440975948324988e-05, "loss": 0.4126, "step": 17068 }, { "epoch": 0.31690572286808033, "grad_norm": 0.3591885268688202, "learning_rate": 1.5439997200056365e-05, "loss": 0.4827, "step": 17070 }, { "epoch": 0.31694285300549896, "grad_norm": 0.40282365679740906, "learning_rate": 1.5439018377765675e-05, "loss": 0.1898, "step": 17072 }, { "epoch": 0.3169799831429176, "grad_norm": 0.47648149728775024, "learning_rate": 1.5438039481466236e-05, "loss": 0.2402, "step": 17074 }, { "epoch": 0.31701711328033627, "grad_norm": 0.44104674458503723, "learning_rate": 1.543706051117137e-05, "loss": 0.2882, "step": 17076 }, { "epoch": 0.3170542434177549, "grad_norm": 0.341251015663147, "learning_rate": 1.5436081466894394e-05, "loss": 0.4102, "step": 17078 }, { "epoch": 0.31709137355517353, "grad_norm": 0.41686397790908813, "learning_rate": 1.543510234864863e-05, "loss": 0.3669, "step": 17080 }, { "epoch": 0.31712850369259216, "grad_norm": 0.4234611392021179, "learning_rate": 1.543412315644741e-05, "loss": 0.4593, "step": 17082 }, { "epoch": 0.3171656338300108, "grad_norm": 0.40406325459480286, "learning_rate": 1.5433143890304046e-05, "loss": 0.3915, "step": 17084 }, { "epoch": 0.31720276396742947, "grad_norm": 0.29293304681777954, "learning_rate": 1.543216455023187e-05, "loss": 0.2941, "step": 17086 }, { "epoch": 0.3172398941048481, "grad_norm": 0.3940310776233673, "learning_rate": 1.5431185136244204e-05, "loss": 0.4023, "step": 17088 }, { "epoch": 0.3172770242422667, "grad_norm": 0.42459636926651, "learning_rate": 1.543020564835438e-05, "loss": 0.3453, "step": 17090 }, { "epoch": 0.31731415437968535, "grad_norm": 0.3892497420310974, "learning_rate": 1.5429226086575718e-05, "loss": 0.3783, "step": 17092 }, { "epoch": 0.317351284517104, "grad_norm": 0.7685954570770264, "learning_rate": 1.5428246450921552e-05, "loss": 0.2637, "step": 17094 }, { "epoch": 0.3173884146545226, "grad_norm": 0.3846810758113861, "learning_rate": 1.5427266741405212e-05, "loss": 0.4204, "step": 17096 }, { "epoch": 0.3174255447919413, "grad_norm": 0.3611105680465698, "learning_rate": 1.542628695804003e-05, "loss": 0.3729, "step": 17098 }, { "epoch": 0.3174626749293599, "grad_norm": 0.42518800497055054, "learning_rate": 1.5425307100839332e-05, "loss": 0.2504, "step": 17100 }, { "epoch": 0.31749980506677855, "grad_norm": 0.27884796261787415, "learning_rate": 1.542432716981646e-05, "loss": 0.2942, "step": 17102 }, { "epoch": 0.3175369352041972, "grad_norm": 0.3047977387905121, "learning_rate": 1.542334716498474e-05, "loss": 0.3466, "step": 17104 }, { "epoch": 0.3175740653416158, "grad_norm": 0.21829275786876678, "learning_rate": 1.542236708635751e-05, "loss": 0.2413, "step": 17106 }, { "epoch": 0.3176111954790345, "grad_norm": 0.6674472689628601, "learning_rate": 1.542138693394811e-05, "loss": 0.1465, "step": 17108 }, { "epoch": 0.3176483256164531, "grad_norm": 0.40542134642601013, "learning_rate": 1.5420406707769867e-05, "loss": 0.4684, "step": 17110 }, { "epoch": 0.31768545575387175, "grad_norm": 0.4353960156440735, "learning_rate": 1.5419426407836126e-05, "loss": 0.207, "step": 17112 }, { "epoch": 0.3177225858912904, "grad_norm": 0.4035026431083679, "learning_rate": 1.541844603416023e-05, "loss": 0.2406, "step": 17114 }, { "epoch": 0.317759716028709, "grad_norm": 0.522284984588623, "learning_rate": 1.5417465586755508e-05, "loss": 0.2644, "step": 17116 }, { "epoch": 0.31779684616612763, "grad_norm": 0.46294915676116943, "learning_rate": 1.5416485065635305e-05, "loss": 0.2445, "step": 17118 }, { "epoch": 0.3178339763035463, "grad_norm": 0.3212882876396179, "learning_rate": 1.541550447081297e-05, "loss": 0.4095, "step": 17120 }, { "epoch": 0.31787110644096495, "grad_norm": 0.27929168939590454, "learning_rate": 1.5414523802301834e-05, "loss": 0.4618, "step": 17122 }, { "epoch": 0.3179082365783836, "grad_norm": 0.4637104570865631, "learning_rate": 1.541354306011525e-05, "loss": 0.1734, "step": 17124 }, { "epoch": 0.3179453667158022, "grad_norm": 0.2908938527107239, "learning_rate": 1.5412562244266563e-05, "loss": 0.3919, "step": 17126 }, { "epoch": 0.31798249685322083, "grad_norm": 0.49456197023391724, "learning_rate": 1.541158135476912e-05, "loss": 0.353, "step": 17128 }, { "epoch": 0.3180196269906395, "grad_norm": 0.4210249185562134, "learning_rate": 1.5410600391636255e-05, "loss": 0.5233, "step": 17130 }, { "epoch": 0.31805675712805814, "grad_norm": 0.3941677212715149, "learning_rate": 1.540961935488133e-05, "loss": 0.4472, "step": 17132 }, { "epoch": 0.31809388726547677, "grad_norm": 0.42962098121643066, "learning_rate": 1.540863824451769e-05, "loss": 0.317, "step": 17134 }, { "epoch": 0.3181310174028954, "grad_norm": 0.2861193120479584, "learning_rate": 1.5407657060558682e-05, "loss": 0.159, "step": 17136 }, { "epoch": 0.31816814754031403, "grad_norm": 0.39070311188697815, "learning_rate": 1.540667580301766e-05, "loss": 0.3073, "step": 17138 }, { "epoch": 0.3182052776777327, "grad_norm": 0.35487428307533264, "learning_rate": 1.5405694471907975e-05, "loss": 0.3436, "step": 17140 }, { "epoch": 0.31824240781515134, "grad_norm": 0.4509940445423126, "learning_rate": 1.5404713067242982e-05, "loss": 0.4102, "step": 17142 }, { "epoch": 0.31827953795256997, "grad_norm": 0.31767669320106506, "learning_rate": 1.540373158903603e-05, "loss": 0.1784, "step": 17144 }, { "epoch": 0.3183166680899886, "grad_norm": 0.6901447772979736, "learning_rate": 1.540275003730048e-05, "loss": 0.334, "step": 17146 }, { "epoch": 0.3183537982274072, "grad_norm": 0.3304341733455658, "learning_rate": 1.5401768412049682e-05, "loss": 0.3113, "step": 17148 }, { "epoch": 0.31839092836482585, "grad_norm": 0.46414175629615784, "learning_rate": 1.5400786713297e-05, "loss": 0.4127, "step": 17150 }, { "epoch": 0.31842805850224454, "grad_norm": 0.3294484615325928, "learning_rate": 1.539980494105579e-05, "loss": 0.2479, "step": 17152 }, { "epoch": 0.31846518863966317, "grad_norm": 0.7244925498962402, "learning_rate": 1.53988230953394e-05, "loss": 0.281, "step": 17154 }, { "epoch": 0.3185023187770818, "grad_norm": 0.4349120855331421, "learning_rate": 1.5397841176161205e-05, "loss": 0.388, "step": 17156 }, { "epoch": 0.3185394489145004, "grad_norm": 0.46940499544143677, "learning_rate": 1.539685918353456e-05, "loss": 0.3085, "step": 17158 }, { "epoch": 0.31857657905191905, "grad_norm": 0.33632388710975647, "learning_rate": 1.5395877117472828e-05, "loss": 0.3303, "step": 17160 }, { "epoch": 0.31861370918933773, "grad_norm": 0.5118480920791626, "learning_rate": 1.5394894977989368e-05, "loss": 0.2932, "step": 17162 }, { "epoch": 0.31865083932675636, "grad_norm": 0.35144031047821045, "learning_rate": 1.539391276509755e-05, "loss": 0.3017, "step": 17164 }, { "epoch": 0.318687969464175, "grad_norm": 0.29943007230758667, "learning_rate": 1.5392930478810732e-05, "loss": 0.4069, "step": 17166 }, { "epoch": 0.3187250996015936, "grad_norm": 0.738653838634491, "learning_rate": 1.5391948119142284e-05, "loss": 0.3162, "step": 17168 }, { "epoch": 0.31876222973901225, "grad_norm": 0.33713826537132263, "learning_rate": 1.5390965686105578e-05, "loss": 0.2667, "step": 17170 }, { "epoch": 0.3187993598764309, "grad_norm": 0.3678927421569824, "learning_rate": 1.538998317971397e-05, "loss": 0.4504, "step": 17172 }, { "epoch": 0.31883649001384956, "grad_norm": 0.24662251770496368, "learning_rate": 1.5389000599980838e-05, "loss": 0.2912, "step": 17174 }, { "epoch": 0.3188736201512682, "grad_norm": 0.4291885495185852, "learning_rate": 1.5388017946919554e-05, "loss": 0.3903, "step": 17176 }, { "epoch": 0.3189107502886868, "grad_norm": 0.286139577627182, "learning_rate": 1.5387035220543482e-05, "loss": 0.4978, "step": 17178 }, { "epoch": 0.31894788042610545, "grad_norm": 0.3912820518016815, "learning_rate": 1.5386052420865993e-05, "loss": 0.3459, "step": 17180 }, { "epoch": 0.3189850105635241, "grad_norm": 0.36913177371025085, "learning_rate": 1.538506954790047e-05, "loss": 0.2298, "step": 17182 }, { "epoch": 0.31902214070094276, "grad_norm": 0.4968512952327728, "learning_rate": 1.5384086601660278e-05, "loss": 0.2571, "step": 17184 }, { "epoch": 0.3190592708383614, "grad_norm": 0.37922948598861694, "learning_rate": 1.5383103582158792e-05, "loss": 0.3074, "step": 17186 }, { "epoch": 0.31909640097578, "grad_norm": 0.42133092880249023, "learning_rate": 1.5382120489409394e-05, "loss": 0.3144, "step": 17188 }, { "epoch": 0.31913353111319864, "grad_norm": 0.28329595923423767, "learning_rate": 1.5381137323425455e-05, "loss": 0.3173, "step": 17190 }, { "epoch": 0.31917066125061727, "grad_norm": 0.3245623707771301, "learning_rate": 1.5380154084220357e-05, "loss": 0.2919, "step": 17192 }, { "epoch": 0.3192077913880359, "grad_norm": 0.4003051519393921, "learning_rate": 1.5379170771807478e-05, "loss": 0.4647, "step": 17194 }, { "epoch": 0.3192449215254546, "grad_norm": 0.3576517701148987, "learning_rate": 1.5378187386200196e-05, "loss": 0.1311, "step": 17196 }, { "epoch": 0.3192820516628732, "grad_norm": 0.32612344622612, "learning_rate": 1.5377203927411895e-05, "loss": 0.3704, "step": 17198 }, { "epoch": 0.31931918180029184, "grad_norm": 0.21062368154525757, "learning_rate": 1.537622039545595e-05, "loss": 0.3815, "step": 17200 }, { "epoch": 0.31935631193771047, "grad_norm": 0.4796724021434784, "learning_rate": 1.5375236790345755e-05, "loss": 0.6317, "step": 17202 }, { "epoch": 0.3193934420751291, "grad_norm": 0.3687177896499634, "learning_rate": 1.5374253112094688e-05, "loss": 0.1888, "step": 17204 }, { "epoch": 0.3194305722125478, "grad_norm": 0.3645276129245758, "learning_rate": 1.537326936071613e-05, "loss": 0.1396, "step": 17206 }, { "epoch": 0.3194677023499664, "grad_norm": 0.34646156430244446, "learning_rate": 1.537228553622347e-05, "loss": 0.3101, "step": 17208 }, { "epoch": 0.31950483248738504, "grad_norm": 0.3313872218132019, "learning_rate": 1.53713016386301e-05, "loss": 0.1897, "step": 17210 }, { "epoch": 0.31954196262480367, "grad_norm": 0.5552208423614502, "learning_rate": 1.53703176679494e-05, "loss": 0.4732, "step": 17212 }, { "epoch": 0.3195790927622223, "grad_norm": 0.3241727948188782, "learning_rate": 1.5369333624194766e-05, "loss": 0.1679, "step": 17214 }, { "epoch": 0.319616222899641, "grad_norm": 0.3199171721935272, "learning_rate": 1.5368349507379583e-05, "loss": 0.2744, "step": 17216 }, { "epoch": 0.3196533530370596, "grad_norm": 0.28099092841148376, "learning_rate": 1.536736531751724e-05, "loss": 0.258, "step": 17218 }, { "epoch": 0.31969048317447823, "grad_norm": 0.2466171681880951, "learning_rate": 1.5366381054621137e-05, "loss": 0.3925, "step": 17220 }, { "epoch": 0.31972761331189686, "grad_norm": 0.33610379695892334, "learning_rate": 1.536539671870466e-05, "loss": 0.3924, "step": 17222 }, { "epoch": 0.3197647434493155, "grad_norm": 0.47139471769332886, "learning_rate": 1.5364412309781204e-05, "loss": 0.2039, "step": 17224 }, { "epoch": 0.3198018735867341, "grad_norm": 0.3382294476032257, "learning_rate": 1.5363427827864165e-05, "loss": 0.3758, "step": 17226 }, { "epoch": 0.3198390037241528, "grad_norm": 0.3508336544036865, "learning_rate": 1.5362443272966943e-05, "loss": 0.3285, "step": 17228 }, { "epoch": 0.31987613386157143, "grad_norm": 0.5476653575897217, "learning_rate": 1.5361458645102926e-05, "loss": 0.2605, "step": 17230 }, { "epoch": 0.31991326399899006, "grad_norm": 0.31968954205513, "learning_rate": 1.5360473944285515e-05, "loss": 0.2254, "step": 17232 }, { "epoch": 0.3199503941364087, "grad_norm": 0.8002970814704895, "learning_rate": 1.5359489170528113e-05, "loss": 0.3504, "step": 17234 }, { "epoch": 0.3199875242738273, "grad_norm": 0.4641616642475128, "learning_rate": 1.5358504323844118e-05, "loss": 0.4218, "step": 17236 }, { "epoch": 0.320024654411246, "grad_norm": 0.20135943591594696, "learning_rate": 1.5357519404246923e-05, "loss": 0.1754, "step": 17238 }, { "epoch": 0.32006178454866463, "grad_norm": 0.41504454612731934, "learning_rate": 1.5356534411749943e-05, "loss": 0.2351, "step": 17240 }, { "epoch": 0.32009891468608326, "grad_norm": 0.41960692405700684, "learning_rate": 1.5355549346366575e-05, "loss": 0.1294, "step": 17242 }, { "epoch": 0.3201360448235019, "grad_norm": 0.26389211416244507, "learning_rate": 1.5354564208110217e-05, "loss": 0.2593, "step": 17244 }, { "epoch": 0.3201731749609205, "grad_norm": 0.4203268587589264, "learning_rate": 1.5353578996994284e-05, "loss": 0.3053, "step": 17246 }, { "epoch": 0.32021030509833914, "grad_norm": 0.36045747995376587, "learning_rate": 1.5352593713032173e-05, "loss": 0.3535, "step": 17248 }, { "epoch": 0.3202474352357578, "grad_norm": 0.3000342845916748, "learning_rate": 1.5351608356237296e-05, "loss": 0.2659, "step": 17250 }, { "epoch": 0.32028456537317646, "grad_norm": 0.36360421776771545, "learning_rate": 1.535062292662306e-05, "loss": 0.2136, "step": 17252 }, { "epoch": 0.3203216955105951, "grad_norm": 0.33014458417892456, "learning_rate": 1.534963742420287e-05, "loss": 0.3287, "step": 17254 }, { "epoch": 0.3203588256480137, "grad_norm": 0.39336514472961426, "learning_rate": 1.534865184899014e-05, "loss": 0.3898, "step": 17256 }, { "epoch": 0.32039595578543234, "grad_norm": 0.46903207898139954, "learning_rate": 1.5347666200998278e-05, "loss": 0.4361, "step": 17258 }, { "epoch": 0.320433085922851, "grad_norm": 0.300927996635437, "learning_rate": 1.53466804802407e-05, "loss": 0.304, "step": 17260 }, { "epoch": 0.32047021606026965, "grad_norm": 0.4261765778064728, "learning_rate": 1.5345694686730818e-05, "loss": 0.2999, "step": 17262 }, { "epoch": 0.3205073461976883, "grad_norm": 0.3545638620853424, "learning_rate": 1.5344708820482036e-05, "loss": 0.2746, "step": 17264 }, { "epoch": 0.3205444763351069, "grad_norm": 0.2779943346977234, "learning_rate": 1.5343722881507785e-05, "loss": 0.1993, "step": 17266 }, { "epoch": 0.32058160647252554, "grad_norm": 0.375210702419281, "learning_rate": 1.5342736869821467e-05, "loss": 0.2526, "step": 17268 }, { "epoch": 0.32061873660994417, "grad_norm": 0.434173047542572, "learning_rate": 1.53417507854365e-05, "loss": 0.313, "step": 17270 }, { "epoch": 0.32065586674736285, "grad_norm": 0.46127668023109436, "learning_rate": 1.5340764628366312e-05, "loss": 0.4468, "step": 17272 }, { "epoch": 0.3206929968847815, "grad_norm": 0.49569782614707947, "learning_rate": 1.533977839862431e-05, "loss": 0.4376, "step": 17274 }, { "epoch": 0.3207301270222001, "grad_norm": 0.4674670696258545, "learning_rate": 1.5338792096223923e-05, "loss": 0.2679, "step": 17276 }, { "epoch": 0.32076725715961873, "grad_norm": 0.3406663239002228, "learning_rate": 1.5337805721178564e-05, "loss": 0.3752, "step": 17278 }, { "epoch": 0.32080438729703736, "grad_norm": 0.35662025213241577, "learning_rate": 1.5336819273501657e-05, "loss": 0.3691, "step": 17280 }, { "epoch": 0.32084151743445605, "grad_norm": 0.28006112575531006, "learning_rate": 1.5335832753206627e-05, "loss": 0.3284, "step": 17282 }, { "epoch": 0.3208786475718747, "grad_norm": 0.2908593714237213, "learning_rate": 1.5334846160306898e-05, "loss": 0.4365, "step": 17284 }, { "epoch": 0.3209157777092933, "grad_norm": 0.3357962369918823, "learning_rate": 1.533385949481589e-05, "loss": 0.3913, "step": 17286 }, { "epoch": 0.32095290784671193, "grad_norm": 0.4044245183467865, "learning_rate": 1.533287275674703e-05, "loss": 0.2279, "step": 17288 }, { "epoch": 0.32099003798413056, "grad_norm": 0.30236268043518066, "learning_rate": 1.533188594611375e-05, "loss": 0.3203, "step": 17290 }, { "epoch": 0.32102716812154924, "grad_norm": 0.31960272789001465, "learning_rate": 1.5330899062929466e-05, "loss": 0.5171, "step": 17292 }, { "epoch": 0.3210642982589679, "grad_norm": 0.41443485021591187, "learning_rate": 1.532991210720762e-05, "loss": 0.317, "step": 17294 }, { "epoch": 0.3211014283963865, "grad_norm": 0.35447803139686584, "learning_rate": 1.532892507896163e-05, "loss": 0.2812, "step": 17296 }, { "epoch": 0.32113855853380513, "grad_norm": 0.25095534324645996, "learning_rate": 1.5327937978204934e-05, "loss": 0.2661, "step": 17298 }, { "epoch": 0.32117568867122376, "grad_norm": 0.4252188801765442, "learning_rate": 1.5326950804950962e-05, "loss": 0.3085, "step": 17300 }, { "epoch": 0.3212128188086424, "grad_norm": 0.44707366824150085, "learning_rate": 1.5325963559213145e-05, "loss": 0.3979, "step": 17302 }, { "epoch": 0.32124994894606107, "grad_norm": 0.48115578293800354, "learning_rate": 1.5324976241004917e-05, "loss": 0.0781, "step": 17304 }, { "epoch": 0.3212870790834797, "grad_norm": 0.37249040603637695, "learning_rate": 1.532398885033971e-05, "loss": 0.2739, "step": 17306 }, { "epoch": 0.3213242092208983, "grad_norm": 0.3208727240562439, "learning_rate": 1.5323001387230966e-05, "loss": 0.4704, "step": 17308 }, { "epoch": 0.32136133935831696, "grad_norm": 0.5011731386184692, "learning_rate": 1.5322013851692115e-05, "loss": 0.1942, "step": 17310 }, { "epoch": 0.3213984694957356, "grad_norm": 0.3763359785079956, "learning_rate": 1.53210262437366e-05, "loss": 0.2026, "step": 17312 }, { "epoch": 0.32143559963315427, "grad_norm": 0.22363798320293427, "learning_rate": 1.5320038563377852e-05, "loss": 0.2756, "step": 17314 }, { "epoch": 0.3214727297705729, "grad_norm": 0.48202216625213623, "learning_rate": 1.5319050810629318e-05, "loss": 0.3317, "step": 17316 }, { "epoch": 0.3215098599079915, "grad_norm": 0.6128024458885193, "learning_rate": 1.531806298550443e-05, "loss": 0.474, "step": 17318 }, { "epoch": 0.32154699004541015, "grad_norm": 0.42750561237335205, "learning_rate": 1.531707508801664e-05, "loss": 0.1523, "step": 17320 }, { "epoch": 0.3215841201828288, "grad_norm": 0.2953563928604126, "learning_rate": 1.5316087118179384e-05, "loss": 0.2984, "step": 17322 }, { "epoch": 0.3216212503202474, "grad_norm": 0.252926766872406, "learning_rate": 1.5315099076006102e-05, "loss": 0.2459, "step": 17324 }, { "epoch": 0.3216583804576661, "grad_norm": 0.45459315180778503, "learning_rate": 1.5314110961510243e-05, "loss": 0.367, "step": 17326 }, { "epoch": 0.3216955105950847, "grad_norm": 0.42746007442474365, "learning_rate": 1.5313122774705252e-05, "loss": 0.4985, "step": 17328 }, { "epoch": 0.32173264073250335, "grad_norm": 0.46953216195106506, "learning_rate": 1.5312134515604576e-05, "loss": 0.3558, "step": 17330 }, { "epoch": 0.321769770869922, "grad_norm": 0.5032084584236145, "learning_rate": 1.5311146184221662e-05, "loss": 0.4311, "step": 17332 }, { "epoch": 0.3218069010073406, "grad_norm": 0.23475800454616547, "learning_rate": 1.5310157780569955e-05, "loss": 0.227, "step": 17334 }, { "epoch": 0.3218440311447593, "grad_norm": 0.39614832401275635, "learning_rate": 1.5309169304662902e-05, "loss": 0.3198, "step": 17336 }, { "epoch": 0.3218811612821779, "grad_norm": 0.5124629139900208, "learning_rate": 1.5308180756513964e-05, "loss": 0.4336, "step": 17338 }, { "epoch": 0.32191829141959655, "grad_norm": 0.44774746894836426, "learning_rate": 1.5307192136136585e-05, "loss": 0.2818, "step": 17340 }, { "epoch": 0.3219554215570152, "grad_norm": 0.3130606412887573, "learning_rate": 1.5306203443544216e-05, "loss": 0.1607, "step": 17342 }, { "epoch": 0.3219925516944338, "grad_norm": 0.29858627915382385, "learning_rate": 1.530521467875031e-05, "loss": 0.4292, "step": 17344 }, { "epoch": 0.32202968183185243, "grad_norm": 0.4775102734565735, "learning_rate": 1.530422584176833e-05, "loss": 0.3906, "step": 17346 }, { "epoch": 0.3220668119692711, "grad_norm": 0.3680359125137329, "learning_rate": 1.530323693261172e-05, "loss": 0.3493, "step": 17348 }, { "epoch": 0.32210394210668974, "grad_norm": 0.3718049228191376, "learning_rate": 1.5302247951293937e-05, "loss": 0.3705, "step": 17350 }, { "epoch": 0.3221410722441084, "grad_norm": 0.4301467835903168, "learning_rate": 1.530125889782845e-05, "loss": 0.3691, "step": 17352 }, { "epoch": 0.322178202381527, "grad_norm": 0.4566783905029297, "learning_rate": 1.53002697722287e-05, "loss": 0.2242, "step": 17354 }, { "epoch": 0.32221533251894563, "grad_norm": 0.40902405977249146, "learning_rate": 1.5299280574508156e-05, "loss": 0.3981, "step": 17356 }, { "epoch": 0.3222524626563643, "grad_norm": 0.30622631311416626, "learning_rate": 1.529829130468028e-05, "loss": 0.3508, "step": 17358 }, { "epoch": 0.32228959279378294, "grad_norm": 0.4049510955810547, "learning_rate": 1.529730196275853e-05, "loss": 0.3525, "step": 17360 }, { "epoch": 0.32232672293120157, "grad_norm": 0.3212869465351105, "learning_rate": 1.5296312548756364e-05, "loss": 0.317, "step": 17362 }, { "epoch": 0.3223638530686202, "grad_norm": 0.41256698966026306, "learning_rate": 1.5295323062687254e-05, "loss": 0.4926, "step": 17364 }, { "epoch": 0.3224009832060388, "grad_norm": 0.38584163784980774, "learning_rate": 1.5294333504564652e-05, "loss": 0.4525, "step": 17366 }, { "epoch": 0.3224381133434575, "grad_norm": 0.4642098844051361, "learning_rate": 1.529334387440203e-05, "loss": 0.2633, "step": 17368 }, { "epoch": 0.32247524348087614, "grad_norm": 0.5035676956176758, "learning_rate": 1.5292354172212854e-05, "loss": 0.215, "step": 17370 }, { "epoch": 0.32251237361829477, "grad_norm": 0.36337435245513916, "learning_rate": 1.5291364398010596e-05, "loss": 0.2318, "step": 17372 }, { "epoch": 0.3225495037557134, "grad_norm": 0.3551802635192871, "learning_rate": 1.5290374551808715e-05, "loss": 0.2171, "step": 17374 }, { "epoch": 0.322586633893132, "grad_norm": 0.30073145031929016, "learning_rate": 1.5289384633620678e-05, "loss": 0.2128, "step": 17376 }, { "epoch": 0.32262376403055065, "grad_norm": 0.23783640563488007, "learning_rate": 1.5288394643459964e-05, "loss": 0.3694, "step": 17378 }, { "epoch": 0.32266089416796934, "grad_norm": 0.27272284030914307, "learning_rate": 1.5287404581340036e-05, "loss": 0.5207, "step": 17380 }, { "epoch": 0.32269802430538796, "grad_norm": 0.4207136034965515, "learning_rate": 1.528641444727437e-05, "loss": 0.4435, "step": 17382 }, { "epoch": 0.3227351544428066, "grad_norm": 0.37411409616470337, "learning_rate": 1.5285424241276444e-05, "loss": 0.2533, "step": 17384 }, { "epoch": 0.3227722845802252, "grad_norm": 0.41730114817619324, "learning_rate": 1.528443396335972e-05, "loss": 0.1833, "step": 17386 }, { "epoch": 0.32280941471764385, "grad_norm": 0.2243090122938156, "learning_rate": 1.528344361353768e-05, "loss": 0.2127, "step": 17388 }, { "epoch": 0.32284654485506253, "grad_norm": 0.3316304385662079, "learning_rate": 1.5282453191823797e-05, "loss": 0.3668, "step": 17390 }, { "epoch": 0.32288367499248116, "grad_norm": 0.29832443594932556, "learning_rate": 1.528146269823155e-05, "loss": 0.372, "step": 17392 }, { "epoch": 0.3229208051298998, "grad_norm": 0.44492384791374207, "learning_rate": 1.5280472132774414e-05, "loss": 0.3083, "step": 17394 }, { "epoch": 0.3229579352673184, "grad_norm": 0.4695076048374176, "learning_rate": 1.5279481495465874e-05, "loss": 0.275, "step": 17396 }, { "epoch": 0.32299506540473705, "grad_norm": 0.5234975814819336, "learning_rate": 1.5278490786319403e-05, "loss": 0.2488, "step": 17398 }, { "epoch": 0.3230321955421557, "grad_norm": 0.2769046127796173, "learning_rate": 1.527750000534848e-05, "loss": 0.3021, "step": 17400 }, { "epoch": 0.32306932567957436, "grad_norm": 0.3211325407028198, "learning_rate": 1.527650915256659e-05, "loss": 0.2848, "step": 17402 }, { "epoch": 0.323106455816993, "grad_norm": 0.3746379315853119, "learning_rate": 1.5275518227987218e-05, "loss": 0.4673, "step": 17404 }, { "epoch": 0.3231435859544116, "grad_norm": 0.5678956508636475, "learning_rate": 1.5274527231623844e-05, "loss": 0.2224, "step": 17406 }, { "epoch": 0.32318071609183024, "grad_norm": 0.3615468740463257, "learning_rate": 1.5273536163489953e-05, "loss": 0.4327, "step": 17408 }, { "epoch": 0.3232178462292489, "grad_norm": 0.37712574005126953, "learning_rate": 1.5272545023599032e-05, "loss": 0.3222, "step": 17410 }, { "epoch": 0.32325497636666756, "grad_norm": 0.24174006283283234, "learning_rate": 1.5271553811964566e-05, "loss": 0.2971, "step": 17412 }, { "epoch": 0.3232921065040862, "grad_norm": 0.38790497183799744, "learning_rate": 1.527056252860004e-05, "loss": 0.253, "step": 17414 }, { "epoch": 0.3233292366415048, "grad_norm": 0.28069284558296204, "learning_rate": 1.526957117351895e-05, "loss": 0.4008, "step": 17416 }, { "epoch": 0.32336636677892344, "grad_norm": 0.37146157026290894, "learning_rate": 1.5268579746734777e-05, "loss": 0.373, "step": 17418 }, { "epoch": 0.32340349691634207, "grad_norm": 0.4824257493019104, "learning_rate": 1.5267588248261016e-05, "loss": 0.3018, "step": 17420 }, { "epoch": 0.3234406270537607, "grad_norm": 0.47460636496543884, "learning_rate": 1.526659667811116e-05, "loss": 0.303, "step": 17422 }, { "epoch": 0.3234777571911794, "grad_norm": 0.30944719910621643, "learning_rate": 1.526560503629869e-05, "loss": 0.2781, "step": 17424 }, { "epoch": 0.323514887328598, "grad_norm": 0.29679232835769653, "learning_rate": 1.5264613322837116e-05, "loss": 0.2733, "step": 17426 }, { "epoch": 0.32355201746601664, "grad_norm": 0.3553576171398163, "learning_rate": 1.5263621537739922e-05, "loss": 0.4171, "step": 17428 }, { "epoch": 0.32358914760343527, "grad_norm": 0.5936649441719055, "learning_rate": 1.5262629681020603e-05, "loss": 0.4375, "step": 17430 }, { "epoch": 0.3236262777408539, "grad_norm": 0.28599485754966736, "learning_rate": 1.5261637752692658e-05, "loss": 0.3791, "step": 17432 }, { "epoch": 0.3236634078782726, "grad_norm": 0.3888452351093292, "learning_rate": 1.5260645752769585e-05, "loss": 0.4027, "step": 17434 }, { "epoch": 0.3237005380156912, "grad_norm": 0.4386485815048218, "learning_rate": 1.525965368126488e-05, "loss": 0.4206, "step": 17436 }, { "epoch": 0.32373766815310984, "grad_norm": 0.5147672295570374, "learning_rate": 1.5258661538192043e-05, "loss": 0.2118, "step": 17438 }, { "epoch": 0.32377479829052846, "grad_norm": 0.3790130019187927, "learning_rate": 1.5257669323564575e-05, "loss": 0.3404, "step": 17440 }, { "epoch": 0.3238119284279471, "grad_norm": 0.3347530961036682, "learning_rate": 1.5256677037395977e-05, "loss": 0.35, "step": 17442 }, { "epoch": 0.3238490585653658, "grad_norm": 0.5212411284446716, "learning_rate": 1.5255684679699747e-05, "loss": 0.4494, "step": 17444 }, { "epoch": 0.3238861887027844, "grad_norm": 0.3576100468635559, "learning_rate": 1.525469225048939e-05, "loss": 0.2731, "step": 17446 }, { "epoch": 0.32392331884020303, "grad_norm": 0.33504900336265564, "learning_rate": 1.5253699749778417e-05, "loss": 0.2819, "step": 17448 }, { "epoch": 0.32396044897762166, "grad_norm": 0.3056875467300415, "learning_rate": 1.5252707177580322e-05, "loss": 0.4526, "step": 17450 }, { "epoch": 0.3239975791150403, "grad_norm": 0.5367453694343567, "learning_rate": 1.5251714533908617e-05, "loss": 0.1851, "step": 17452 }, { "epoch": 0.3240347092524589, "grad_norm": 0.41884738206863403, "learning_rate": 1.5250721818776811e-05, "loss": 0.3973, "step": 17454 }, { "epoch": 0.3240718393898776, "grad_norm": 0.3681398034095764, "learning_rate": 1.5249729032198404e-05, "loss": 0.2082, "step": 17456 }, { "epoch": 0.32410896952729623, "grad_norm": 0.41263511776924133, "learning_rate": 1.5248736174186913e-05, "loss": 0.294, "step": 17458 }, { "epoch": 0.32414609966471486, "grad_norm": 0.366334468126297, "learning_rate": 1.5247743244755846e-05, "loss": 0.5154, "step": 17460 }, { "epoch": 0.3241832298021335, "grad_norm": 0.5292330384254456, "learning_rate": 1.5246750243918707e-05, "loss": 0.2717, "step": 17462 }, { "epoch": 0.3242203599395521, "grad_norm": 0.35583099722862244, "learning_rate": 1.5245757171689016e-05, "loss": 0.4378, "step": 17464 }, { "epoch": 0.3242574900769708, "grad_norm": 0.28098538517951965, "learning_rate": 1.5244764028080279e-05, "loss": 0.3424, "step": 17466 }, { "epoch": 0.32429462021438943, "grad_norm": 0.49299970269203186, "learning_rate": 1.5243770813106018e-05, "loss": 0.4128, "step": 17468 }, { "epoch": 0.32433175035180806, "grad_norm": 0.3476695418357849, "learning_rate": 1.5242777526779744e-05, "loss": 0.2381, "step": 17470 }, { "epoch": 0.3243688804892267, "grad_norm": 0.4195510745048523, "learning_rate": 1.5241784169114968e-05, "loss": 0.3695, "step": 17472 }, { "epoch": 0.3244060106266453, "grad_norm": 0.26670563220977783, "learning_rate": 1.5240790740125215e-05, "loss": 0.3813, "step": 17474 }, { "epoch": 0.32444314076406394, "grad_norm": 0.2688024938106537, "learning_rate": 1.5239797239823992e-05, "loss": 0.2917, "step": 17476 }, { "epoch": 0.3244802709014826, "grad_norm": 0.29855844378471375, "learning_rate": 1.5238803668224826e-05, "loss": 0.1792, "step": 17478 }, { "epoch": 0.32451740103890125, "grad_norm": 0.4138330817222595, "learning_rate": 1.5237810025341238e-05, "loss": 0.3354, "step": 17480 }, { "epoch": 0.3245545311763199, "grad_norm": 0.3219321072101593, "learning_rate": 1.5236816311186742e-05, "loss": 0.6215, "step": 17482 }, { "epoch": 0.3245916613137385, "grad_norm": 0.31449997425079346, "learning_rate": 1.5235822525774859e-05, "loss": 0.264, "step": 17484 }, { "epoch": 0.32462879145115714, "grad_norm": 0.29745444655418396, "learning_rate": 1.5234828669119116e-05, "loss": 0.3373, "step": 17486 }, { "epoch": 0.3246659215885758, "grad_norm": 0.2264559417963028, "learning_rate": 1.5233834741233038e-05, "loss": 0.2489, "step": 17488 }, { "epoch": 0.32470305172599445, "grad_norm": 0.43536579608917236, "learning_rate": 1.5232840742130143e-05, "loss": 0.1644, "step": 17490 }, { "epoch": 0.3247401818634131, "grad_norm": 0.18491986393928528, "learning_rate": 1.5231846671823965e-05, "loss": 0.3477, "step": 17492 }, { "epoch": 0.3247773120008317, "grad_norm": 0.39093300700187683, "learning_rate": 1.523085253032802e-05, "loss": 0.4341, "step": 17494 }, { "epoch": 0.32481444213825034, "grad_norm": 0.37062758207321167, "learning_rate": 1.5229858317655839e-05, "loss": 0.2965, "step": 17496 }, { "epoch": 0.32485157227566896, "grad_norm": 0.2674814760684967, "learning_rate": 1.5228864033820956e-05, "loss": 0.2219, "step": 17498 }, { "epoch": 0.32488870241308765, "grad_norm": 0.3341875970363617, "learning_rate": 1.5227869678836892e-05, "loss": 0.2747, "step": 17500 }, { "epoch": 0.3249258325505063, "grad_norm": 0.2975541651248932, "learning_rate": 1.5226875252717184e-05, "loss": 0.4577, "step": 17502 }, { "epoch": 0.3249629626879249, "grad_norm": 0.9835726618766785, "learning_rate": 1.522588075547536e-05, "loss": 0.2427, "step": 17504 }, { "epoch": 0.32500009282534353, "grad_norm": 0.39639636874198914, "learning_rate": 1.5224886187124947e-05, "loss": 0.2435, "step": 17506 }, { "epoch": 0.32503722296276216, "grad_norm": 0.2899585962295532, "learning_rate": 1.5223891547679488e-05, "loss": 0.2769, "step": 17508 }, { "epoch": 0.32507435310018085, "grad_norm": 0.25025486946105957, "learning_rate": 1.522289683715251e-05, "loss": 0.2264, "step": 17510 }, { "epoch": 0.3251114832375995, "grad_norm": 0.662032425403595, "learning_rate": 1.5221902055557556e-05, "loss": 0.1958, "step": 17512 }, { "epoch": 0.3251486133750181, "grad_norm": 0.5503290295600891, "learning_rate": 1.5220907202908151e-05, "loss": 0.1985, "step": 17514 }, { "epoch": 0.32518574351243673, "grad_norm": 0.38150089979171753, "learning_rate": 1.5219912279217839e-05, "loss": 0.4144, "step": 17516 }, { "epoch": 0.32522287364985536, "grad_norm": 0.4446289837360382, "learning_rate": 1.5218917284500157e-05, "loss": 0.2535, "step": 17518 }, { "epoch": 0.32526000378727404, "grad_norm": 0.5262390375137329, "learning_rate": 1.5217922218768641e-05, "loss": 0.3208, "step": 17520 }, { "epoch": 0.32529713392469267, "grad_norm": 0.4500311613082886, "learning_rate": 1.5216927082036837e-05, "loss": 0.2856, "step": 17522 }, { "epoch": 0.3253342640621113, "grad_norm": 0.213739812374115, "learning_rate": 1.521593187431828e-05, "loss": 0.2688, "step": 17524 }, { "epoch": 0.32537139419952993, "grad_norm": 0.38909661769866943, "learning_rate": 1.5214936595626514e-05, "loss": 0.429, "step": 17526 }, { "epoch": 0.32540852433694856, "grad_norm": 0.2122373729944229, "learning_rate": 1.5213941245975077e-05, "loss": 0.2699, "step": 17528 }, { "epoch": 0.3254456544743672, "grad_norm": 0.2740071713924408, "learning_rate": 1.5212945825377521e-05, "loss": 0.3057, "step": 17530 }, { "epoch": 0.32548278461178587, "grad_norm": 0.2417953759431839, "learning_rate": 1.5211950333847387e-05, "loss": 0.1686, "step": 17532 }, { "epoch": 0.3255199147492045, "grad_norm": 0.46877309679985046, "learning_rate": 1.521095477139822e-05, "loss": 0.3598, "step": 17534 }, { "epoch": 0.3255570448866231, "grad_norm": 0.3852144777774811, "learning_rate": 1.5209959138043568e-05, "loss": 0.3551, "step": 17536 }, { "epoch": 0.32559417502404175, "grad_norm": 0.27003175020217896, "learning_rate": 1.5208963433796977e-05, "loss": 0.171, "step": 17538 }, { "epoch": 0.3256313051614604, "grad_norm": 0.373787522315979, "learning_rate": 1.5207967658671995e-05, "loss": 0.3101, "step": 17540 }, { "epoch": 0.32566843529887907, "grad_norm": 0.48493140935897827, "learning_rate": 1.5206971812682176e-05, "loss": 0.2358, "step": 17542 }, { "epoch": 0.3257055654362977, "grad_norm": 0.3519165515899658, "learning_rate": 1.5205975895841067e-05, "loss": 0.2814, "step": 17544 }, { "epoch": 0.3257426955737163, "grad_norm": 0.3446064889431, "learning_rate": 1.520497990816222e-05, "loss": 0.1971, "step": 17546 }, { "epoch": 0.32577982571113495, "grad_norm": 0.43002253770828247, "learning_rate": 1.5203983849659184e-05, "loss": 0.2282, "step": 17548 }, { "epoch": 0.3258169558485536, "grad_norm": 0.3817003667354584, "learning_rate": 1.5202987720345517e-05, "loss": 0.2598, "step": 17550 }, { "epoch": 0.3258540859859722, "grad_norm": 0.7365832924842834, "learning_rate": 1.5201991520234771e-05, "loss": 0.2996, "step": 17552 }, { "epoch": 0.3258912161233909, "grad_norm": 0.3756067454814911, "learning_rate": 1.5200995249340505e-05, "loss": 0.4609, "step": 17554 }, { "epoch": 0.3259283462608095, "grad_norm": 0.21131837368011475, "learning_rate": 1.5199998907676272e-05, "loss": 0.3541, "step": 17556 }, { "epoch": 0.32596547639822815, "grad_norm": 0.36641743779182434, "learning_rate": 1.5199002495255626e-05, "loss": 0.4212, "step": 17558 }, { "epoch": 0.3260026065356468, "grad_norm": 0.2675943076610565, "learning_rate": 1.519800601209213e-05, "loss": 0.4573, "step": 17560 }, { "epoch": 0.3260397366730654, "grad_norm": 0.36896276473999023, "learning_rate": 1.5197009458199344e-05, "loss": 0.3257, "step": 17562 }, { "epoch": 0.3260768668104841, "grad_norm": 0.3580615818500519, "learning_rate": 1.5196012833590825e-05, "loss": 0.3963, "step": 17564 }, { "epoch": 0.3261139969479027, "grad_norm": 0.45092225074768066, "learning_rate": 1.5195016138280136e-05, "loss": 0.3073, "step": 17566 }, { "epoch": 0.32615112708532135, "grad_norm": 0.3870398700237274, "learning_rate": 1.5194019372280839e-05, "loss": 0.4073, "step": 17568 }, { "epoch": 0.32618825722274, "grad_norm": 0.32575640082359314, "learning_rate": 1.5193022535606494e-05, "loss": 0.1979, "step": 17570 }, { "epoch": 0.3262253873601586, "grad_norm": 0.33010345697402954, "learning_rate": 1.5192025628270667e-05, "loss": 0.3164, "step": 17572 }, { "epoch": 0.32626251749757723, "grad_norm": 0.43834686279296875, "learning_rate": 1.5191028650286925e-05, "loss": 0.4459, "step": 17574 }, { "epoch": 0.3262996476349959, "grad_norm": 0.347760945558548, "learning_rate": 1.5190031601668832e-05, "loss": 0.2396, "step": 17576 }, { "epoch": 0.32633677777241454, "grad_norm": 0.41803792119026184, "learning_rate": 1.5189034482429954e-05, "loss": 0.2408, "step": 17578 }, { "epoch": 0.32637390790983317, "grad_norm": 0.27051258087158203, "learning_rate": 1.5188037292583861e-05, "loss": 0.2564, "step": 17580 }, { "epoch": 0.3264110380472518, "grad_norm": 0.3627765476703644, "learning_rate": 1.5187040032144121e-05, "loss": 0.4411, "step": 17582 }, { "epoch": 0.32644816818467043, "grad_norm": 0.36485153436660767, "learning_rate": 1.5186042701124297e-05, "loss": 0.1505, "step": 17584 }, { "epoch": 0.3264852983220891, "grad_norm": 0.3833666741847992, "learning_rate": 1.5185045299537973e-05, "loss": 0.5058, "step": 17586 }, { "epoch": 0.32652242845950774, "grad_norm": 0.32322031259536743, "learning_rate": 1.5184047827398711e-05, "loss": 0.3719, "step": 17588 }, { "epoch": 0.32655955859692637, "grad_norm": 0.4045676589012146, "learning_rate": 1.5183050284720085e-05, "loss": 0.3108, "step": 17590 }, { "epoch": 0.326596688734345, "grad_norm": 0.3353482186794281, "learning_rate": 1.5182052671515668e-05, "loss": 0.3088, "step": 17592 }, { "epoch": 0.3266338188717636, "grad_norm": 0.36045610904693604, "learning_rate": 1.518105498779904e-05, "loss": 0.3742, "step": 17594 }, { "epoch": 0.3266709490091823, "grad_norm": 0.32726573944091797, "learning_rate": 1.5180057233583773e-05, "loss": 0.2729, "step": 17596 }, { "epoch": 0.32670807914660094, "grad_norm": 0.3097217381000519, "learning_rate": 1.517905940888344e-05, "loss": 0.2189, "step": 17598 }, { "epoch": 0.32674520928401957, "grad_norm": 0.2941550314426422, "learning_rate": 1.5178061513711626e-05, "loss": 0.2108, "step": 17600 }, { "epoch": 0.3267823394214382, "grad_norm": 0.2941129803657532, "learning_rate": 1.51770635480819e-05, "loss": 0.2544, "step": 17602 }, { "epoch": 0.3268194695588568, "grad_norm": 0.3898106813430786, "learning_rate": 1.5176065512007845e-05, "loss": 0.3461, "step": 17604 }, { "epoch": 0.32685659969627545, "grad_norm": 0.44670993089675903, "learning_rate": 1.5175067405503049e-05, "loss": 0.3665, "step": 17606 }, { "epoch": 0.32689372983369414, "grad_norm": 0.4735059142112732, "learning_rate": 1.5174069228581083e-05, "loss": 0.1969, "step": 17608 }, { "epoch": 0.32693085997111276, "grad_norm": 0.4374541938304901, "learning_rate": 1.5173070981255533e-05, "loss": 0.244, "step": 17610 }, { "epoch": 0.3269679901085314, "grad_norm": 0.615386426448822, "learning_rate": 1.5172072663539983e-05, "loss": 0.2606, "step": 17612 }, { "epoch": 0.32700512024595, "grad_norm": 0.5306576490402222, "learning_rate": 1.5171074275448015e-05, "loss": 0.2445, "step": 17614 }, { "epoch": 0.32704225038336865, "grad_norm": 0.31964847445487976, "learning_rate": 1.5170075816993212e-05, "loss": 0.2725, "step": 17616 }, { "epoch": 0.32707938052078733, "grad_norm": 0.3527660667896271, "learning_rate": 1.5169077288189171e-05, "loss": 0.3411, "step": 17618 }, { "epoch": 0.32711651065820596, "grad_norm": 0.32894060015678406, "learning_rate": 1.5168078689049467e-05, "loss": 0.3256, "step": 17620 }, { "epoch": 0.3271536407956246, "grad_norm": 0.4134563207626343, "learning_rate": 1.5167080019587692e-05, "loss": 0.2392, "step": 17622 }, { "epoch": 0.3271907709330432, "grad_norm": 0.2852741479873657, "learning_rate": 1.5166081279817438e-05, "loss": 0.1862, "step": 17624 }, { "epoch": 0.32722790107046185, "grad_norm": 0.35628023743629456, "learning_rate": 1.5165082469752288e-05, "loss": 0.4758, "step": 17626 }, { "epoch": 0.3272650312078805, "grad_norm": 0.3674044609069824, "learning_rate": 1.5164083589405838e-05, "loss": 0.3559, "step": 17628 }, { "epoch": 0.32730216134529916, "grad_norm": 0.30254948139190674, "learning_rate": 1.5163084638791681e-05, "loss": 0.1966, "step": 17630 }, { "epoch": 0.3273392914827178, "grad_norm": 0.3422847092151642, "learning_rate": 1.5162085617923406e-05, "loss": 0.3276, "step": 17632 }, { "epoch": 0.3273764216201364, "grad_norm": 0.29455673694610596, "learning_rate": 1.5161086526814606e-05, "loss": 0.2337, "step": 17634 }, { "epoch": 0.32741355175755504, "grad_norm": 0.3275715708732605, "learning_rate": 1.5160087365478877e-05, "loss": 0.3023, "step": 17636 }, { "epoch": 0.32745068189497367, "grad_norm": 0.3403056263923645, "learning_rate": 1.5159088133929818e-05, "loss": 0.2749, "step": 17638 }, { "epoch": 0.32748781203239236, "grad_norm": 0.8026832342147827, "learning_rate": 1.5158088832181021e-05, "loss": 0.3454, "step": 17640 }, { "epoch": 0.327524942169811, "grad_norm": 0.41045746207237244, "learning_rate": 1.5157089460246086e-05, "loss": 0.2972, "step": 17642 }, { "epoch": 0.3275620723072296, "grad_norm": 0.373545378446579, "learning_rate": 1.5156090018138612e-05, "loss": 0.2673, "step": 17644 }, { "epoch": 0.32759920244464824, "grad_norm": 0.42723414301872253, "learning_rate": 1.5155090505872197e-05, "loss": 0.2114, "step": 17646 }, { "epoch": 0.32763633258206687, "grad_norm": 0.3335084617137909, "learning_rate": 1.5154090923460437e-05, "loss": 0.1781, "step": 17648 }, { "epoch": 0.3276734627194855, "grad_norm": 0.3104463815689087, "learning_rate": 1.5153091270916945e-05, "loss": 0.4006, "step": 17650 }, { "epoch": 0.3277105928569042, "grad_norm": 0.3768613338470459, "learning_rate": 1.5152091548255311e-05, "loss": 0.4186, "step": 17652 }, { "epoch": 0.3277477229943228, "grad_norm": 0.312747061252594, "learning_rate": 1.5151091755489145e-05, "loss": 0.2124, "step": 17654 }, { "epoch": 0.32778485313174144, "grad_norm": 0.33740243315696716, "learning_rate": 1.515009189263205e-05, "loss": 0.3759, "step": 17656 }, { "epoch": 0.32782198326916007, "grad_norm": 0.33628979325294495, "learning_rate": 1.514909195969763e-05, "loss": 0.3303, "step": 17658 }, { "epoch": 0.3278591134065787, "grad_norm": 0.30011647939682007, "learning_rate": 1.5148091956699494e-05, "loss": 0.1345, "step": 17660 }, { "epoch": 0.3278962435439974, "grad_norm": 0.29070523381233215, "learning_rate": 1.5147091883651243e-05, "loss": 0.4343, "step": 17662 }, { "epoch": 0.327933373681416, "grad_norm": 0.26595136523246765, "learning_rate": 1.5146091740566489e-05, "loss": 0.2523, "step": 17664 }, { "epoch": 0.32797050381883464, "grad_norm": 0.42118337750434875, "learning_rate": 1.5145091527458841e-05, "loss": 0.3005, "step": 17666 }, { "epoch": 0.32800763395625326, "grad_norm": 0.3476470112800598, "learning_rate": 1.5144091244341912e-05, "loss": 0.2996, "step": 17668 }, { "epoch": 0.3280447640936719, "grad_norm": 0.35275891423225403, "learning_rate": 1.5143090891229304e-05, "loss": 0.3488, "step": 17670 }, { "epoch": 0.3280818942310906, "grad_norm": 0.3063492476940155, "learning_rate": 1.514209046813464e-05, "loss": 0.1288, "step": 17672 }, { "epoch": 0.3281190243685092, "grad_norm": 0.3863551914691925, "learning_rate": 1.5141089975071523e-05, "loss": 0.4002, "step": 17674 }, { "epoch": 0.32815615450592783, "grad_norm": 0.320879727602005, "learning_rate": 1.5140089412053574e-05, "loss": 0.2943, "step": 17676 }, { "epoch": 0.32819328464334646, "grad_norm": 0.2639245092868805, "learning_rate": 1.5139088779094402e-05, "loss": 0.3584, "step": 17678 }, { "epoch": 0.3282304147807651, "grad_norm": 0.309308260679245, "learning_rate": 1.5138088076207626e-05, "loss": 0.1263, "step": 17680 }, { "epoch": 0.3282675449181837, "grad_norm": 0.31055134534835815, "learning_rate": 1.5137087303406862e-05, "loss": 0.2806, "step": 17682 }, { "epoch": 0.3283046750556024, "grad_norm": 0.36841925978660583, "learning_rate": 1.5136086460705729e-05, "loss": 0.3882, "step": 17684 }, { "epoch": 0.32834180519302103, "grad_norm": 0.5002008080482483, "learning_rate": 1.5135085548117837e-05, "loss": 0.1106, "step": 17686 }, { "epoch": 0.32837893533043966, "grad_norm": 0.8487111330032349, "learning_rate": 1.513408456565682e-05, "loss": 0.401, "step": 17688 }, { "epoch": 0.3284160654678583, "grad_norm": 0.5213286876678467, "learning_rate": 1.5133083513336284e-05, "loss": 0.195, "step": 17690 }, { "epoch": 0.3284531956052769, "grad_norm": 0.4582628011703491, "learning_rate": 1.513208239116986e-05, "loss": 0.3187, "step": 17692 }, { "epoch": 0.3284903257426956, "grad_norm": 0.3811066746711731, "learning_rate": 1.5131081199171168e-05, "loss": 0.2224, "step": 17694 }, { "epoch": 0.3285274558801142, "grad_norm": 0.3852074444293976, "learning_rate": 1.5130079937353827e-05, "loss": 0.3576, "step": 17696 }, { "epoch": 0.32856458601753286, "grad_norm": 0.4354002773761749, "learning_rate": 1.5129078605731464e-05, "loss": 0.3119, "step": 17698 }, { "epoch": 0.3286017161549515, "grad_norm": 0.38786378502845764, "learning_rate": 1.5128077204317708e-05, "loss": 0.3141, "step": 17700 }, { "epoch": 0.3286388462923701, "grad_norm": 0.39947086572647095, "learning_rate": 1.5127075733126182e-05, "loss": 0.3908, "step": 17702 }, { "epoch": 0.32867597642978874, "grad_norm": 0.682485044002533, "learning_rate": 1.512607419217051e-05, "loss": 0.392, "step": 17704 }, { "epoch": 0.3287131065672074, "grad_norm": 0.4146837890148163, "learning_rate": 1.5125072581464325e-05, "loss": 0.1713, "step": 17706 }, { "epoch": 0.32875023670462605, "grad_norm": 0.3243124186992645, "learning_rate": 1.5124070901021251e-05, "loss": 0.379, "step": 17708 }, { "epoch": 0.3287873668420447, "grad_norm": 0.5397246479988098, "learning_rate": 1.512306915085492e-05, "loss": 0.3793, "step": 17710 }, { "epoch": 0.3288244969794633, "grad_norm": 0.8581190705299377, "learning_rate": 1.5122067330978966e-05, "loss": 0.2368, "step": 17712 }, { "epoch": 0.32886162711688194, "grad_norm": 0.5463131070137024, "learning_rate": 1.5121065441407017e-05, "loss": 0.1634, "step": 17714 }, { "epoch": 0.3288987572543006, "grad_norm": 0.313841313123703, "learning_rate": 1.5120063482152707e-05, "loss": 0.3584, "step": 17716 }, { "epoch": 0.32893588739171925, "grad_norm": 0.3885699510574341, "learning_rate": 1.5119061453229668e-05, "loss": 0.407, "step": 17718 }, { "epoch": 0.3289730175291379, "grad_norm": 0.38246700167655945, "learning_rate": 1.5118059354651538e-05, "loss": 0.2082, "step": 17720 }, { "epoch": 0.3290101476665565, "grad_norm": 0.318854421377182, "learning_rate": 1.5117057186431949e-05, "loss": 0.3448, "step": 17722 }, { "epoch": 0.32904727780397514, "grad_norm": 0.33163121342658997, "learning_rate": 1.511605494858454e-05, "loss": 0.4712, "step": 17724 }, { "epoch": 0.32908440794139376, "grad_norm": 0.5115985870361328, "learning_rate": 1.5115052641122947e-05, "loss": 0.3477, "step": 17726 }, { "epoch": 0.32912153807881245, "grad_norm": 0.3364209830760956, "learning_rate": 1.5114050264060808e-05, "loss": 0.1625, "step": 17728 }, { "epoch": 0.3291586682162311, "grad_norm": 0.4749687612056732, "learning_rate": 1.5113047817411764e-05, "loss": 0.2879, "step": 17730 }, { "epoch": 0.3291957983536497, "grad_norm": 0.449405699968338, "learning_rate": 1.5112045301189454e-05, "loss": 0.1957, "step": 17732 }, { "epoch": 0.32923292849106833, "grad_norm": 0.3973706364631653, "learning_rate": 1.5111042715407522e-05, "loss": 0.2427, "step": 17734 }, { "epoch": 0.32927005862848696, "grad_norm": 0.42291906476020813, "learning_rate": 1.5110040060079607e-05, "loss": 0.4602, "step": 17736 }, { "epoch": 0.32930718876590565, "grad_norm": 0.31715869903564453, "learning_rate": 1.5109037335219352e-05, "loss": 0.2928, "step": 17738 }, { "epoch": 0.3293443189033243, "grad_norm": 0.39417240023612976, "learning_rate": 1.5108034540840404e-05, "loss": 0.5245, "step": 17740 }, { "epoch": 0.3293814490407429, "grad_norm": 0.22487516701221466, "learning_rate": 1.5107031676956403e-05, "loss": 0.1961, "step": 17742 }, { "epoch": 0.32941857917816153, "grad_norm": 0.24678479135036469, "learning_rate": 1.5106028743581e-05, "loss": 0.2691, "step": 17744 }, { "epoch": 0.32945570931558016, "grad_norm": 0.35239556431770325, "learning_rate": 1.5105025740727843e-05, "loss": 0.2633, "step": 17746 }, { "epoch": 0.32949283945299884, "grad_norm": 0.5946488976478577, "learning_rate": 1.5104022668410571e-05, "loss": 0.4743, "step": 17748 }, { "epoch": 0.32952996959041747, "grad_norm": 0.33725619316101074, "learning_rate": 1.5103019526642846e-05, "loss": 0.2039, "step": 17750 }, { "epoch": 0.3295670997278361, "grad_norm": 0.3687255382537842, "learning_rate": 1.5102016315438304e-05, "loss": 0.2938, "step": 17752 }, { "epoch": 0.3296042298652547, "grad_norm": 0.547210693359375, "learning_rate": 1.5101013034810605e-05, "loss": 0.2901, "step": 17754 }, { "epoch": 0.32964136000267336, "grad_norm": 0.7228071093559265, "learning_rate": 1.5100009684773397e-05, "loss": 0.2829, "step": 17756 }, { "epoch": 0.329678490140092, "grad_norm": 0.38952481746673584, "learning_rate": 1.5099006265340337e-05, "loss": 0.1534, "step": 17758 }, { "epoch": 0.32971562027751067, "grad_norm": 0.6288354992866516, "learning_rate": 1.5098002776525072e-05, "loss": 0.2562, "step": 17760 }, { "epoch": 0.3297527504149293, "grad_norm": 0.3054794669151306, "learning_rate": 1.5096999218341257e-05, "loss": 0.3383, "step": 17762 }, { "epoch": 0.3297898805523479, "grad_norm": 0.582605242729187, "learning_rate": 1.5095995590802554e-05, "loss": 0.252, "step": 17764 }, { "epoch": 0.32982701068976655, "grad_norm": 0.3173079788684845, "learning_rate": 1.5094991893922615e-05, "loss": 0.4706, "step": 17766 }, { "epoch": 0.3298641408271852, "grad_norm": 0.40634965896606445, "learning_rate": 1.5093988127715096e-05, "loss": 0.2742, "step": 17768 }, { "epoch": 0.32990127096460387, "grad_norm": 4.459052562713623, "learning_rate": 1.5092984292193658e-05, "loss": 0.2365, "step": 17770 }, { "epoch": 0.3299384011020225, "grad_norm": 0.276470422744751, "learning_rate": 1.5091980387371959e-05, "loss": 0.259, "step": 17772 }, { "epoch": 0.3299755312394411, "grad_norm": 0.3766598403453827, "learning_rate": 1.5090976413263656e-05, "loss": 0.1958, "step": 17774 }, { "epoch": 0.33001266137685975, "grad_norm": 0.44429340958595276, "learning_rate": 1.5089972369882418e-05, "loss": 0.2262, "step": 17776 }, { "epoch": 0.3300497915142784, "grad_norm": 0.44059309363365173, "learning_rate": 1.50889682572419e-05, "loss": 0.1919, "step": 17778 }, { "epoch": 0.330086921651697, "grad_norm": 0.28233402967453003, "learning_rate": 1.5087964075355768e-05, "loss": 0.3583, "step": 17780 }, { "epoch": 0.3301240517891157, "grad_norm": 0.36759957671165466, "learning_rate": 1.5086959824237686e-05, "loss": 0.2375, "step": 17782 }, { "epoch": 0.3301611819265343, "grad_norm": 0.4023893475532532, "learning_rate": 1.5085955503901314e-05, "loss": 0.3377, "step": 17784 }, { "epoch": 0.33019831206395295, "grad_norm": 0.37473300099372864, "learning_rate": 1.5084951114360325e-05, "loss": 0.3836, "step": 17786 }, { "epoch": 0.3302354422013716, "grad_norm": 0.37250426411628723, "learning_rate": 1.5083946655628382e-05, "loss": 0.413, "step": 17788 }, { "epoch": 0.3302725723387902, "grad_norm": 0.41275903582572937, "learning_rate": 1.5082942127719156e-05, "loss": 0.3508, "step": 17790 }, { "epoch": 0.3303097024762089, "grad_norm": 0.43645885586738586, "learning_rate": 1.5081937530646307e-05, "loss": 0.2888, "step": 17792 }, { "epoch": 0.3303468326136275, "grad_norm": 0.44883567094802856, "learning_rate": 1.5080932864423514e-05, "loss": 0.3066, "step": 17794 }, { "epoch": 0.33038396275104615, "grad_norm": 0.44382384419441223, "learning_rate": 1.5079928129064442e-05, "loss": 0.5077, "step": 17796 }, { "epoch": 0.3304210928884648, "grad_norm": 0.5066351294517517, "learning_rate": 1.5078923324582766e-05, "loss": 0.2077, "step": 17798 }, { "epoch": 0.3304582230258834, "grad_norm": 0.4591412842273712, "learning_rate": 1.5077918450992157e-05, "loss": 0.2826, "step": 17800 }, { "epoch": 0.33049535316330203, "grad_norm": 0.3614860773086548, "learning_rate": 1.5076913508306288e-05, "loss": 0.2676, "step": 17802 }, { "epoch": 0.3305324833007207, "grad_norm": 0.34059938788414, "learning_rate": 1.5075908496538831e-05, "loss": 0.4131, "step": 17804 }, { "epoch": 0.33056961343813934, "grad_norm": 0.4687926769256592, "learning_rate": 1.5074903415703466e-05, "loss": 0.1256, "step": 17806 }, { "epoch": 0.33060674357555797, "grad_norm": 0.47998255491256714, "learning_rate": 1.5073898265813865e-05, "loss": 0.3518, "step": 17808 }, { "epoch": 0.3306438737129766, "grad_norm": 0.5340453386306763, "learning_rate": 1.5072893046883707e-05, "loss": 0.2348, "step": 17810 }, { "epoch": 0.3306810038503952, "grad_norm": 0.48077383637428284, "learning_rate": 1.5071887758926669e-05, "loss": 0.3362, "step": 17812 }, { "epoch": 0.3307181339878139, "grad_norm": 0.37552666664123535, "learning_rate": 1.5070882401956435e-05, "loss": 0.2883, "step": 17814 }, { "epoch": 0.33075526412523254, "grad_norm": 0.6973111629486084, "learning_rate": 1.5069876975986675e-05, "loss": 0.383, "step": 17816 }, { "epoch": 0.33079239426265117, "grad_norm": 0.27123117446899414, "learning_rate": 1.506887148103108e-05, "loss": 0.3437, "step": 17818 }, { "epoch": 0.3308295244000698, "grad_norm": 0.6203113794326782, "learning_rate": 1.5067865917103324e-05, "loss": 0.3698, "step": 17820 }, { "epoch": 0.3308666545374884, "grad_norm": 0.3681880831718445, "learning_rate": 1.5066860284217093e-05, "loss": 0.377, "step": 17822 }, { "epoch": 0.3309037846749071, "grad_norm": 0.35213109850883484, "learning_rate": 1.506585458238607e-05, "loss": 0.3372, "step": 17824 }, { "epoch": 0.33094091481232574, "grad_norm": 0.5624686479568481, "learning_rate": 1.5064848811623943e-05, "loss": 0.3828, "step": 17826 }, { "epoch": 0.33097804494974437, "grad_norm": 0.4062316119670868, "learning_rate": 1.506384297194439e-05, "loss": 0.2378, "step": 17828 }, { "epoch": 0.331015175087163, "grad_norm": 0.4410496652126312, "learning_rate": 1.5062837063361109e-05, "loss": 0.4173, "step": 17830 }, { "epoch": 0.3310523052245816, "grad_norm": 0.32789239287376404, "learning_rate": 1.5061831085887777e-05, "loss": 0.1051, "step": 17832 }, { "epoch": 0.33108943536200025, "grad_norm": 0.32628872990608215, "learning_rate": 1.5060825039538083e-05, "loss": 0.4203, "step": 17834 }, { "epoch": 0.33112656549941893, "grad_norm": 0.5331152677536011, "learning_rate": 1.5059818924325722e-05, "loss": 0.2779, "step": 17836 }, { "epoch": 0.33116369563683756, "grad_norm": 0.30990102887153625, "learning_rate": 1.505881274026438e-05, "loss": 0.2681, "step": 17838 }, { "epoch": 0.3312008257742562, "grad_norm": 0.3304871618747711, "learning_rate": 1.505780648736775e-05, "loss": 0.4464, "step": 17840 }, { "epoch": 0.3312379559116748, "grad_norm": 0.4052425026893616, "learning_rate": 1.5056800165649525e-05, "loss": 0.4468, "step": 17842 }, { "epoch": 0.33127508604909345, "grad_norm": 0.6457363963127136, "learning_rate": 1.5055793775123396e-05, "loss": 0.3015, "step": 17844 }, { "epoch": 0.33131221618651213, "grad_norm": 0.330112487077713, "learning_rate": 1.505478731580306e-05, "loss": 0.215, "step": 17846 }, { "epoch": 0.33134934632393076, "grad_norm": 0.25201767683029175, "learning_rate": 1.5053780787702204e-05, "loss": 0.3117, "step": 17848 }, { "epoch": 0.3313864764613494, "grad_norm": 0.3351127505302429, "learning_rate": 1.5052774190834532e-05, "loss": 0.1682, "step": 17850 }, { "epoch": 0.331423606598768, "grad_norm": 0.7798597812652588, "learning_rate": 1.5051767525213738e-05, "loss": 0.34, "step": 17852 }, { "epoch": 0.33146073673618665, "grad_norm": 0.3377073407173157, "learning_rate": 1.505076079085352e-05, "loss": 0.3356, "step": 17854 }, { "epoch": 0.3314978668736053, "grad_norm": 0.4009382426738739, "learning_rate": 1.5049753987767574e-05, "loss": 0.2366, "step": 17856 }, { "epoch": 0.33153499701102396, "grad_norm": 0.5234353542327881, "learning_rate": 1.5048747115969606e-05, "loss": 0.33, "step": 17858 }, { "epoch": 0.3315721271484426, "grad_norm": 0.5827322602272034, "learning_rate": 1.5047740175473312e-05, "loss": 0.435, "step": 17860 }, { "epoch": 0.3316092572858612, "grad_norm": 0.4959309697151184, "learning_rate": 1.5046733166292393e-05, "loss": 0.3188, "step": 17862 }, { "epoch": 0.33164638742327984, "grad_norm": 0.38424575328826904, "learning_rate": 1.5045726088440553e-05, "loss": 0.2, "step": 17864 }, { "epoch": 0.33168351756069847, "grad_norm": 0.48506999015808105, "learning_rate": 1.504471894193149e-05, "loss": 0.4822, "step": 17866 }, { "epoch": 0.33172064769811715, "grad_norm": 0.3128223717212677, "learning_rate": 1.5043711726778915e-05, "loss": 0.3039, "step": 17868 }, { "epoch": 0.3317577778355358, "grad_norm": 0.3348535895347595, "learning_rate": 1.5042704442996536e-05, "loss": 0.3208, "step": 17870 }, { "epoch": 0.3317949079729544, "grad_norm": 0.3610891103744507, "learning_rate": 1.5041697090598053e-05, "loss": 0.4105, "step": 17872 }, { "epoch": 0.33183203811037304, "grad_norm": 0.3912738859653473, "learning_rate": 1.5040689669597171e-05, "loss": 0.2652, "step": 17874 }, { "epoch": 0.33186916824779167, "grad_norm": 0.25603529810905457, "learning_rate": 1.5039682180007602e-05, "loss": 0.4298, "step": 17876 }, { "epoch": 0.3319062983852103, "grad_norm": 0.5491587519645691, "learning_rate": 1.5038674621843053e-05, "loss": 0.3159, "step": 17878 }, { "epoch": 0.331943428522629, "grad_norm": 0.35179197788238525, "learning_rate": 1.5037666995117234e-05, "loss": 0.4204, "step": 17880 }, { "epoch": 0.3319805586600476, "grad_norm": 0.42932820320129395, "learning_rate": 1.5036659299843864e-05, "loss": 0.3302, "step": 17882 }, { "epoch": 0.33201768879746624, "grad_norm": 0.42902302742004395, "learning_rate": 1.5035651536036642e-05, "loss": 0.3491, "step": 17884 }, { "epoch": 0.33205481893488487, "grad_norm": 0.30484434962272644, "learning_rate": 1.5034643703709285e-05, "loss": 0.1665, "step": 17886 }, { "epoch": 0.3320919490723035, "grad_norm": 0.4538746774196625, "learning_rate": 1.5033635802875507e-05, "loss": 0.2712, "step": 17888 }, { "epoch": 0.3321290792097222, "grad_norm": 0.33844348788261414, "learning_rate": 1.5032627833549028e-05, "loss": 0.4158, "step": 17890 }, { "epoch": 0.3321662093471408, "grad_norm": 0.5058414340019226, "learning_rate": 1.5031619795743554e-05, "loss": 0.2046, "step": 17892 }, { "epoch": 0.33220333948455943, "grad_norm": 0.5693158507347107, "learning_rate": 1.503061168947281e-05, "loss": 0.228, "step": 17894 }, { "epoch": 0.33224046962197806, "grad_norm": 0.4657512605190277, "learning_rate": 1.5029603514750508e-05, "loss": 0.4079, "step": 17896 }, { "epoch": 0.3322775997593967, "grad_norm": 0.4146985709667206, "learning_rate": 1.5028595271590367e-05, "loss": 0.3487, "step": 17898 }, { "epoch": 0.3323147298968154, "grad_norm": 0.2547996938228607, "learning_rate": 1.5027586960006107e-05, "loss": 0.0888, "step": 17900 }, { "epoch": 0.332351860034234, "grad_norm": 0.28766632080078125, "learning_rate": 1.5026578580011446e-05, "loss": 0.4945, "step": 17902 }, { "epoch": 0.33238899017165263, "grad_norm": 0.5285801291465759, "learning_rate": 1.5025570131620111e-05, "loss": 0.1636, "step": 17904 }, { "epoch": 0.33242612030907126, "grad_norm": 0.3439742624759674, "learning_rate": 1.5024561614845819e-05, "loss": 0.2901, "step": 17906 }, { "epoch": 0.3324632504464899, "grad_norm": 0.33359089493751526, "learning_rate": 1.5023553029702292e-05, "loss": 0.2764, "step": 17908 }, { "epoch": 0.3325003805839085, "grad_norm": 0.4356013238430023, "learning_rate": 1.5022544376203254e-05, "loss": 0.1527, "step": 17910 }, { "epoch": 0.3325375107213272, "grad_norm": 0.6118255853652954, "learning_rate": 1.5021535654362431e-05, "loss": 0.1973, "step": 17912 }, { "epoch": 0.33257464085874583, "grad_norm": 0.3409269452095032, "learning_rate": 1.5020526864193553e-05, "loss": 0.3395, "step": 17914 }, { "epoch": 0.33261177099616446, "grad_norm": 0.3647063970565796, "learning_rate": 1.501951800571034e-05, "loss": 0.2978, "step": 17916 }, { "epoch": 0.3326489011335831, "grad_norm": 0.4006160795688629, "learning_rate": 1.5018509078926525e-05, "loss": 0.2305, "step": 17918 }, { "epoch": 0.3326860312710017, "grad_norm": 0.43046560883522034, "learning_rate": 1.501750008385583e-05, "loss": 0.4847, "step": 17920 }, { "epoch": 0.3327231614084204, "grad_norm": 0.26167067885398865, "learning_rate": 1.5016491020511987e-05, "loss": 0.2669, "step": 17922 }, { "epoch": 0.332760291545839, "grad_norm": 0.32392409443855286, "learning_rate": 1.501548188890873e-05, "loss": 0.3135, "step": 17924 }, { "epoch": 0.33279742168325765, "grad_norm": 0.49963998794555664, "learning_rate": 1.501447268905979e-05, "loss": 0.2329, "step": 17926 }, { "epoch": 0.3328345518206763, "grad_norm": 0.3100620210170746, "learning_rate": 1.5013463420978895e-05, "loss": 0.2812, "step": 17928 }, { "epoch": 0.3328716819580949, "grad_norm": 0.4499354660511017, "learning_rate": 1.5012454084679778e-05, "loss": 0.5106, "step": 17930 }, { "epoch": 0.33290881209551354, "grad_norm": 0.5090258717536926, "learning_rate": 1.5011444680176173e-05, "loss": 0.3133, "step": 17932 }, { "epoch": 0.3329459422329322, "grad_norm": 0.3382207751274109, "learning_rate": 1.5010435207481822e-05, "loss": 0.3882, "step": 17934 }, { "epoch": 0.33298307237035085, "grad_norm": 0.45492812991142273, "learning_rate": 1.5009425666610456e-05, "loss": 0.4252, "step": 17936 }, { "epoch": 0.3330202025077695, "grad_norm": 0.5794858932495117, "learning_rate": 1.5008416057575805e-05, "loss": 0.2892, "step": 17938 }, { "epoch": 0.3330573326451881, "grad_norm": 0.3883581757545471, "learning_rate": 1.500740638039162e-05, "loss": 0.2041, "step": 17940 }, { "epoch": 0.33309446278260674, "grad_norm": 0.4709897041320801, "learning_rate": 1.5006396635071633e-05, "loss": 0.2788, "step": 17942 }, { "epoch": 0.3331315929200254, "grad_norm": 0.25976231694221497, "learning_rate": 1.5005386821629577e-05, "loss": 0.2902, "step": 17944 }, { "epoch": 0.33316872305744405, "grad_norm": 0.4804779291152954, "learning_rate": 1.5004376940079204e-05, "loss": 0.3874, "step": 17946 }, { "epoch": 0.3332058531948627, "grad_norm": 0.3953636586666107, "learning_rate": 1.5003366990434254e-05, "loss": 0.306, "step": 17948 }, { "epoch": 0.3332429833322813, "grad_norm": 0.48406878113746643, "learning_rate": 1.500235697270846e-05, "loss": 0.3845, "step": 17950 }, { "epoch": 0.33328011346969993, "grad_norm": 0.4482232928276062, "learning_rate": 1.5001346886915576e-05, "loss": 0.3902, "step": 17952 }, { "epoch": 0.33331724360711856, "grad_norm": 0.3405599594116211, "learning_rate": 1.5000336733069338e-05, "loss": 0.28, "step": 17954 }, { "epoch": 0.33335437374453725, "grad_norm": 0.4166962504386902, "learning_rate": 1.4999326511183498e-05, "loss": 0.2619, "step": 17956 }, { "epoch": 0.3333915038819559, "grad_norm": 0.36426955461502075, "learning_rate": 1.4998316221271798e-05, "loss": 0.4352, "step": 17958 }, { "epoch": 0.3334286340193745, "grad_norm": 0.4869938790798187, "learning_rate": 1.4997305863347984e-05, "loss": 0.159, "step": 17960 }, { "epoch": 0.33346576415679313, "grad_norm": 0.3511381447315216, "learning_rate": 1.4996295437425805e-05, "loss": 0.4285, "step": 17962 }, { "epoch": 0.33350289429421176, "grad_norm": 0.9059442281723022, "learning_rate": 1.4995284943519012e-05, "loss": 0.2069, "step": 17964 }, { "epoch": 0.33354002443163044, "grad_norm": 0.40236422419548035, "learning_rate": 1.4994274381641355e-05, "loss": 0.3119, "step": 17966 }, { "epoch": 0.3335771545690491, "grad_norm": 0.3907240927219391, "learning_rate": 1.4993263751806584e-05, "loss": 0.3197, "step": 17968 }, { "epoch": 0.3336142847064677, "grad_norm": 0.4376314878463745, "learning_rate": 1.4992253054028447e-05, "loss": 0.2833, "step": 17970 }, { "epoch": 0.33365141484388633, "grad_norm": 0.3018713891506195, "learning_rate": 1.4991242288320703e-05, "loss": 0.2281, "step": 17972 }, { "epoch": 0.33368854498130496, "grad_norm": 1.0719242095947266, "learning_rate": 1.4990231454697099e-05, "loss": 0.2917, "step": 17974 }, { "epoch": 0.33372567511872364, "grad_norm": 0.379462867975235, "learning_rate": 1.498922055317139e-05, "loss": 0.6311, "step": 17976 }, { "epoch": 0.33376280525614227, "grad_norm": 0.3152543902397156, "learning_rate": 1.4988209583757338e-05, "loss": 0.324, "step": 17978 }, { "epoch": 0.3337999353935609, "grad_norm": 0.42757052183151245, "learning_rate": 1.4987198546468697e-05, "loss": 0.5221, "step": 17980 }, { "epoch": 0.3338370655309795, "grad_norm": 0.3611413538455963, "learning_rate": 1.4986187441319217e-05, "loss": 0.5084, "step": 17982 }, { "epoch": 0.33387419566839815, "grad_norm": 0.5011786222457886, "learning_rate": 1.4985176268322666e-05, "loss": 0.4575, "step": 17984 }, { "epoch": 0.3339113258058168, "grad_norm": 0.4002304673194885, "learning_rate": 1.4984165027492794e-05, "loss": 0.2608, "step": 17986 }, { "epoch": 0.33394845594323547, "grad_norm": 0.3544389307498932, "learning_rate": 1.4983153718843366e-05, "loss": 0.4668, "step": 17988 }, { "epoch": 0.3339855860806541, "grad_norm": 0.3432004153728485, "learning_rate": 1.4982142342388146e-05, "loss": 0.2618, "step": 17990 }, { "epoch": 0.3340227162180727, "grad_norm": 0.4127478003501892, "learning_rate": 1.4981130898140887e-05, "loss": 0.2288, "step": 17992 }, { "epoch": 0.33405984635549135, "grad_norm": 0.3847413957118988, "learning_rate": 1.4980119386115357e-05, "loss": 0.3089, "step": 17994 }, { "epoch": 0.33409697649291, "grad_norm": 0.3328947424888611, "learning_rate": 1.4979107806325324e-05, "loss": 0.3101, "step": 17996 }, { "epoch": 0.33413410663032866, "grad_norm": 0.34489721059799194, "learning_rate": 1.4978096158784543e-05, "loss": 0.5336, "step": 17998 }, { "epoch": 0.3341712367677473, "grad_norm": 0.3683588206768036, "learning_rate": 1.4977084443506789e-05, "loss": 0.2934, "step": 18000 }, { "epoch": 0.3342083669051659, "grad_norm": 0.4330499768257141, "learning_rate": 1.497607266050582e-05, "loss": 0.1523, "step": 18002 }, { "epoch": 0.33424549704258455, "grad_norm": 0.4272777736186981, "learning_rate": 1.497506080979541e-05, "loss": 0.4294, "step": 18004 }, { "epoch": 0.3342826271800032, "grad_norm": 0.639563798904419, "learning_rate": 1.497404889138932e-05, "loss": 0.4541, "step": 18006 }, { "epoch": 0.3343197573174218, "grad_norm": 0.4317440092563629, "learning_rate": 1.4973036905301325e-05, "loss": 0.3376, "step": 18008 }, { "epoch": 0.3343568874548405, "grad_norm": 0.46777164936065674, "learning_rate": 1.4972024851545199e-05, "loss": 0.3037, "step": 18010 }, { "epoch": 0.3343940175922591, "grad_norm": 0.5522424578666687, "learning_rate": 1.4971012730134703e-05, "loss": 0.382, "step": 18012 }, { "epoch": 0.33443114772967775, "grad_norm": 0.5065568089485168, "learning_rate": 1.4970000541083614e-05, "loss": 0.2006, "step": 18014 }, { "epoch": 0.3344682778670964, "grad_norm": 0.5036908388137817, "learning_rate": 1.4968988284405706e-05, "loss": 0.3243, "step": 18016 }, { "epoch": 0.334505408004515, "grad_norm": 0.28333693742752075, "learning_rate": 1.4967975960114749e-05, "loss": 0.1869, "step": 18018 }, { "epoch": 0.3345425381419337, "grad_norm": 0.5294538736343384, "learning_rate": 1.4966963568224522e-05, "loss": 0.2584, "step": 18020 }, { "epoch": 0.3345796682793523, "grad_norm": 0.4187543988227844, "learning_rate": 1.4965951108748798e-05, "loss": 0.2598, "step": 18022 }, { "epoch": 0.33461679841677094, "grad_norm": 0.36463862657546997, "learning_rate": 1.4964938581701354e-05, "loss": 0.3729, "step": 18024 }, { "epoch": 0.3346539285541896, "grad_norm": 0.3892824649810791, "learning_rate": 1.4963925987095967e-05, "loss": 0.2548, "step": 18026 }, { "epoch": 0.3346910586916082, "grad_norm": 0.2688679099082947, "learning_rate": 1.4962913324946417e-05, "loss": 0.3552, "step": 18028 }, { "epoch": 0.33472818882902683, "grad_norm": 0.35725778341293335, "learning_rate": 1.4961900595266481e-05, "loss": 0.3145, "step": 18030 }, { "epoch": 0.3347653189664455, "grad_norm": 0.2737744450569153, "learning_rate": 1.4960887798069942e-05, "loss": 0.4351, "step": 18032 }, { "epoch": 0.33480244910386414, "grad_norm": 0.47076961398124695, "learning_rate": 1.495987493337058e-05, "loss": 0.4078, "step": 18034 }, { "epoch": 0.33483957924128277, "grad_norm": 0.5594877004623413, "learning_rate": 1.4958862001182175e-05, "loss": 0.3629, "step": 18036 }, { "epoch": 0.3348767093787014, "grad_norm": 0.44012850522994995, "learning_rate": 1.4957849001518512e-05, "loss": 0.3209, "step": 18038 }, { "epoch": 0.33491383951612, "grad_norm": 0.7808220386505127, "learning_rate": 1.4956835934393374e-05, "loss": 0.4849, "step": 18040 }, { "epoch": 0.3349509696535387, "grad_norm": 0.33568307757377625, "learning_rate": 1.4955822799820549e-05, "loss": 0.2029, "step": 18042 }, { "epoch": 0.33498809979095734, "grad_norm": 0.34144726395606995, "learning_rate": 1.4954809597813817e-05, "loss": 0.2223, "step": 18044 }, { "epoch": 0.33502522992837597, "grad_norm": 0.3721770644187927, "learning_rate": 1.4953796328386969e-05, "loss": 0.3967, "step": 18046 }, { "epoch": 0.3350623600657946, "grad_norm": 0.3758309781551361, "learning_rate": 1.4952782991553794e-05, "loss": 0.3822, "step": 18048 }, { "epoch": 0.3350994902032132, "grad_norm": 0.3911252021789551, "learning_rate": 1.4951769587328073e-05, "loss": 0.3925, "step": 18050 }, { "epoch": 0.3351366203406319, "grad_norm": 0.34705039858818054, "learning_rate": 1.4950756115723604e-05, "loss": 0.3055, "step": 18052 }, { "epoch": 0.33517375047805054, "grad_norm": 0.21749524772167206, "learning_rate": 1.4949742576754173e-05, "loss": 0.1382, "step": 18054 }, { "epoch": 0.33521088061546916, "grad_norm": 0.4963466227054596, "learning_rate": 1.494872897043357e-05, "loss": 0.2788, "step": 18056 }, { "epoch": 0.3352480107528878, "grad_norm": 0.4473222494125366, "learning_rate": 1.494771529677559e-05, "loss": 0.3865, "step": 18058 }, { "epoch": 0.3352851408903064, "grad_norm": 0.4760928452014923, "learning_rate": 1.4946701555794026e-05, "loss": 0.3801, "step": 18060 }, { "epoch": 0.33532227102772505, "grad_norm": 0.47154590487480164, "learning_rate": 1.4945687747502667e-05, "loss": 0.2973, "step": 18062 }, { "epoch": 0.33535940116514373, "grad_norm": 0.27352288365364075, "learning_rate": 1.4944673871915317e-05, "loss": 0.3828, "step": 18064 }, { "epoch": 0.33539653130256236, "grad_norm": 0.3778846561908722, "learning_rate": 1.4943659929045762e-05, "loss": 0.2882, "step": 18066 }, { "epoch": 0.335433661439981, "grad_norm": 0.20338238775730133, "learning_rate": 1.4942645918907806e-05, "loss": 0.1716, "step": 18068 }, { "epoch": 0.3354707915773996, "grad_norm": 0.5180934071540833, "learning_rate": 1.4941631841515243e-05, "loss": 0.3009, "step": 18070 }, { "epoch": 0.33550792171481825, "grad_norm": 0.2743087708950043, "learning_rate": 1.4940617696881872e-05, "loss": 0.4032, "step": 18072 }, { "epoch": 0.33554505185223693, "grad_norm": 0.43645066022872925, "learning_rate": 1.4939603485021494e-05, "loss": 0.1106, "step": 18074 }, { "epoch": 0.33558218198965556, "grad_norm": 0.4472203850746155, "learning_rate": 1.4938589205947909e-05, "loss": 0.4987, "step": 18076 }, { "epoch": 0.3356193121270742, "grad_norm": 0.3432043492794037, "learning_rate": 1.4937574859674917e-05, "loss": 0.3349, "step": 18078 }, { "epoch": 0.3356564422644928, "grad_norm": 0.21766094863414764, "learning_rate": 1.4936560446216319e-05, "loss": 0.2139, "step": 18080 }, { "epoch": 0.33569357240191144, "grad_norm": 0.33876872062683105, "learning_rate": 1.493554596558592e-05, "loss": 0.1748, "step": 18082 }, { "epoch": 0.3357307025393301, "grad_norm": 0.26665404438972473, "learning_rate": 1.4934531417797528e-05, "loss": 0.4438, "step": 18084 }, { "epoch": 0.33576783267674876, "grad_norm": 0.48850172758102417, "learning_rate": 1.4933516802864945e-05, "loss": 0.272, "step": 18086 }, { "epoch": 0.3358049628141674, "grad_norm": 0.5862286686897278, "learning_rate": 1.493250212080197e-05, "loss": 0.3842, "step": 18088 }, { "epoch": 0.335842092951586, "grad_norm": 0.5566452145576477, "learning_rate": 1.4931487371622418e-05, "loss": 0.4226, "step": 18090 }, { "epoch": 0.33587922308900464, "grad_norm": 0.4099358916282654, "learning_rate": 1.4930472555340097e-05, "loss": 0.4184, "step": 18092 }, { "epoch": 0.33591635322642327, "grad_norm": 0.39967721700668335, "learning_rate": 1.4929457671968809e-05, "loss": 0.387, "step": 18094 }, { "epoch": 0.33595348336384195, "grad_norm": 0.46006521582603455, "learning_rate": 1.4928442721522369e-05, "loss": 0.4534, "step": 18096 }, { "epoch": 0.3359906135012606, "grad_norm": 0.3420569896697998, "learning_rate": 1.4927427704014588e-05, "loss": 0.3575, "step": 18098 }, { "epoch": 0.3360277436386792, "grad_norm": 0.2239494025707245, "learning_rate": 1.4926412619459272e-05, "loss": 0.3339, "step": 18100 }, { "epoch": 0.33606487377609784, "grad_norm": 0.31252405047416687, "learning_rate": 1.4925397467870237e-05, "loss": 0.1748, "step": 18102 }, { "epoch": 0.33610200391351647, "grad_norm": 0.30184435844421387, "learning_rate": 1.49243822492613e-05, "loss": 0.2698, "step": 18104 }, { "epoch": 0.3361391340509351, "grad_norm": 0.5092756152153015, "learning_rate": 1.4923366963646269e-05, "loss": 0.1324, "step": 18106 }, { "epoch": 0.3361762641883538, "grad_norm": 0.324421763420105, "learning_rate": 1.4922351611038957e-05, "loss": 0.4726, "step": 18108 }, { "epoch": 0.3362133943257724, "grad_norm": 0.5144684910774231, "learning_rate": 1.492133619145319e-05, "loss": 0.2619, "step": 18110 }, { "epoch": 0.33625052446319104, "grad_norm": 0.31315067410469055, "learning_rate": 1.4920320704902773e-05, "loss": 0.2529, "step": 18112 }, { "epoch": 0.33628765460060966, "grad_norm": 0.4374421536922455, "learning_rate": 1.4919305151401529e-05, "loss": 0.2697, "step": 18114 }, { "epoch": 0.3363247847380283, "grad_norm": 0.3299446403980255, "learning_rate": 1.4918289530963281e-05, "loss": 0.3127, "step": 18116 }, { "epoch": 0.336361914875447, "grad_norm": 0.446545273065567, "learning_rate": 1.4917273843601844e-05, "loss": 0.2267, "step": 18118 }, { "epoch": 0.3363990450128656, "grad_norm": 0.4188675582408905, "learning_rate": 1.4916258089331035e-05, "loss": 0.2963, "step": 18120 }, { "epoch": 0.33643617515028423, "grad_norm": 0.33802530169487, "learning_rate": 1.4915242268164682e-05, "loss": 0.5353, "step": 18122 }, { "epoch": 0.33647330528770286, "grad_norm": 0.2887125015258789, "learning_rate": 1.4914226380116606e-05, "loss": 0.2303, "step": 18124 }, { "epoch": 0.3365104354251215, "grad_norm": 0.2958166301250458, "learning_rate": 1.4913210425200626e-05, "loss": 0.2891, "step": 18126 }, { "epoch": 0.3365475655625402, "grad_norm": 0.23940473794937134, "learning_rate": 1.4912194403430569e-05, "loss": 0.2643, "step": 18128 }, { "epoch": 0.3365846956999588, "grad_norm": 0.3542059063911438, "learning_rate": 1.4911178314820263e-05, "loss": 0.279, "step": 18130 }, { "epoch": 0.33662182583737743, "grad_norm": 0.4075571894645691, "learning_rate": 1.4910162159383528e-05, "loss": 0.3025, "step": 18132 }, { "epoch": 0.33665895597479606, "grad_norm": 0.2779483497142792, "learning_rate": 1.4909145937134192e-05, "loss": 0.2808, "step": 18134 }, { "epoch": 0.3366960861122147, "grad_norm": 0.26593664288520813, "learning_rate": 1.4908129648086087e-05, "loss": 0.4225, "step": 18136 }, { "epoch": 0.3367332162496333, "grad_norm": 0.27906349301338196, "learning_rate": 1.4907113292253042e-05, "loss": 0.1161, "step": 18138 }, { "epoch": 0.336770346387052, "grad_norm": 0.23779112100601196, "learning_rate": 1.4906096869648878e-05, "loss": 0.294, "step": 18140 }, { "epoch": 0.33680747652447063, "grad_norm": 0.37250158190727234, "learning_rate": 1.4905080380287435e-05, "loss": 0.2972, "step": 18142 }, { "epoch": 0.33684460666188926, "grad_norm": 0.42775219678878784, "learning_rate": 1.4904063824182537e-05, "loss": 0.3813, "step": 18144 }, { "epoch": 0.3368817367993079, "grad_norm": 0.2107010930776596, "learning_rate": 1.4903047201348022e-05, "loss": 0.2266, "step": 18146 }, { "epoch": 0.3369188669367265, "grad_norm": 0.3748937249183655, "learning_rate": 1.490203051179772e-05, "loss": 0.3494, "step": 18148 }, { "epoch": 0.3369559970741452, "grad_norm": 0.5349082946777344, "learning_rate": 1.4901013755545468e-05, "loss": 0.4264, "step": 18150 }, { "epoch": 0.3369931272115638, "grad_norm": 0.34124860167503357, "learning_rate": 1.4899996932605097e-05, "loss": 0.1921, "step": 18152 }, { "epoch": 0.33703025734898245, "grad_norm": 0.3762378990650177, "learning_rate": 1.4898980042990447e-05, "loss": 0.2237, "step": 18154 }, { "epoch": 0.3370673874864011, "grad_norm": 0.22387202084064484, "learning_rate": 1.489796308671535e-05, "loss": 0.2175, "step": 18156 }, { "epoch": 0.3371045176238197, "grad_norm": 0.37569281458854675, "learning_rate": 1.489694606379365e-05, "loss": 0.1186, "step": 18158 }, { "epoch": 0.33714164776123834, "grad_norm": 0.35135531425476074, "learning_rate": 1.489592897423918e-05, "loss": 0.3264, "step": 18160 }, { "epoch": 0.337178777898657, "grad_norm": 0.4404272735118866, "learning_rate": 1.489491181806578e-05, "loss": 0.4489, "step": 18162 }, { "epoch": 0.33721590803607565, "grad_norm": 0.38416457176208496, "learning_rate": 1.4893894595287293e-05, "loss": 0.3701, "step": 18164 }, { "epoch": 0.3372530381734943, "grad_norm": 0.336603581905365, "learning_rate": 1.4892877305917561e-05, "loss": 0.391, "step": 18166 }, { "epoch": 0.3372901683109129, "grad_norm": 0.47559013962745667, "learning_rate": 1.4891859949970422e-05, "loss": 0.3299, "step": 18168 }, { "epoch": 0.33732729844833154, "grad_norm": 0.4884147644042969, "learning_rate": 1.4890842527459725e-05, "loss": 0.4522, "step": 18170 }, { "epoch": 0.3373644285857502, "grad_norm": 1.8078135251998901, "learning_rate": 1.4889825038399308e-05, "loss": 0.5159, "step": 18172 }, { "epoch": 0.33740155872316885, "grad_norm": 0.3391738533973694, "learning_rate": 1.4888807482803023e-05, "loss": 0.4232, "step": 18174 }, { "epoch": 0.3374386888605875, "grad_norm": 0.27409452199935913, "learning_rate": 1.488778986068471e-05, "loss": 0.3525, "step": 18176 }, { "epoch": 0.3374758189980061, "grad_norm": 0.5105887055397034, "learning_rate": 1.4886772172058216e-05, "loss": 0.3551, "step": 18178 }, { "epoch": 0.33751294913542473, "grad_norm": 0.4102279841899872, "learning_rate": 1.4885754416937392e-05, "loss": 0.3401, "step": 18180 }, { "epoch": 0.33755007927284336, "grad_norm": 0.5282325148582458, "learning_rate": 1.4884736595336084e-05, "loss": 0.1652, "step": 18182 }, { "epoch": 0.33758720941026205, "grad_norm": 0.3346143066883087, "learning_rate": 1.4883718707268142e-05, "loss": 0.4516, "step": 18184 }, { "epoch": 0.3376243395476807, "grad_norm": 0.4011649191379547, "learning_rate": 1.4882700752747418e-05, "loss": 0.4263, "step": 18186 }, { "epoch": 0.3376614696850993, "grad_norm": 0.6434336304664612, "learning_rate": 1.4881682731787761e-05, "loss": 0.1938, "step": 18188 }, { "epoch": 0.33769859982251793, "grad_norm": 0.4085772931575775, "learning_rate": 1.4880664644403026e-05, "loss": 0.291, "step": 18190 }, { "epoch": 0.33773572995993656, "grad_norm": 0.3318369388580322, "learning_rate": 1.4879646490607065e-05, "loss": 0.1883, "step": 18192 }, { "epoch": 0.33777286009735524, "grad_norm": 0.2531749904155731, "learning_rate": 1.487862827041373e-05, "loss": 0.3559, "step": 18194 }, { "epoch": 0.33780999023477387, "grad_norm": 0.2857612669467926, "learning_rate": 1.4877609983836875e-05, "loss": 0.4578, "step": 18196 }, { "epoch": 0.3378471203721925, "grad_norm": 0.41911283135414124, "learning_rate": 1.4876591630890362e-05, "loss": 0.3251, "step": 18198 }, { "epoch": 0.33788425050961113, "grad_norm": 0.39310166239738464, "learning_rate": 1.4875573211588044e-05, "loss": 0.2964, "step": 18200 }, { "epoch": 0.33792138064702976, "grad_norm": 0.3363457918167114, "learning_rate": 1.4874554725943777e-05, "loss": 0.5485, "step": 18202 }, { "epoch": 0.33795851078444844, "grad_norm": 0.516950249671936, "learning_rate": 1.4873536173971422e-05, "loss": 0.2587, "step": 18204 }, { "epoch": 0.33799564092186707, "grad_norm": 0.4866335690021515, "learning_rate": 1.4872517555684835e-05, "loss": 0.2183, "step": 18206 }, { "epoch": 0.3380327710592857, "grad_norm": 0.568638265132904, "learning_rate": 1.4871498871097883e-05, "loss": 0.3719, "step": 18208 }, { "epoch": 0.3380699011967043, "grad_norm": 0.5424169301986694, "learning_rate": 1.487048012022442e-05, "loss": 0.1711, "step": 18210 }, { "epoch": 0.33810703133412295, "grad_norm": 0.9764118790626526, "learning_rate": 1.4869461303078315e-05, "loss": 0.3511, "step": 18212 }, { "epoch": 0.3381441614715416, "grad_norm": 0.3772079646587372, "learning_rate": 1.4868442419673424e-05, "loss": 0.0629, "step": 18214 }, { "epoch": 0.33818129160896027, "grad_norm": 0.3446721136569977, "learning_rate": 1.4867423470023614e-05, "loss": 0.4339, "step": 18216 }, { "epoch": 0.3382184217463789, "grad_norm": 0.4032352566719055, "learning_rate": 1.4866404454142754e-05, "loss": 0.2481, "step": 18218 }, { "epoch": 0.3382555518837975, "grad_norm": 0.2930324673652649, "learning_rate": 1.4865385372044701e-05, "loss": 0.394, "step": 18220 }, { "epoch": 0.33829268202121615, "grad_norm": 0.383143812417984, "learning_rate": 1.486436622374333e-05, "loss": 0.3317, "step": 18222 }, { "epoch": 0.3383298121586348, "grad_norm": 0.3990165889263153, "learning_rate": 1.4863347009252504e-05, "loss": 0.137, "step": 18224 }, { "epoch": 0.33836694229605346, "grad_norm": 0.2508726418018341, "learning_rate": 1.4862327728586095e-05, "loss": 0.2739, "step": 18226 }, { "epoch": 0.3384040724334721, "grad_norm": 0.39910128712654114, "learning_rate": 1.4861308381757966e-05, "loss": 0.3002, "step": 18228 }, { "epoch": 0.3384412025708907, "grad_norm": 0.5039410591125488, "learning_rate": 1.4860288968781993e-05, "loss": 0.3888, "step": 18230 }, { "epoch": 0.33847833270830935, "grad_norm": 0.37737831473350525, "learning_rate": 1.4859269489672046e-05, "loss": 0.3185, "step": 18232 }, { "epoch": 0.338515462845728, "grad_norm": 0.483288049697876, "learning_rate": 1.4858249944441996e-05, "loss": 0.4087, "step": 18234 }, { "epoch": 0.3385525929831466, "grad_norm": 0.32364943623542786, "learning_rate": 1.4857230333105716e-05, "loss": 0.262, "step": 18236 }, { "epoch": 0.3385897231205653, "grad_norm": 0.33189359307289124, "learning_rate": 1.4856210655677079e-05, "loss": 0.2402, "step": 18238 }, { "epoch": 0.3386268532579839, "grad_norm": 0.3368898630142212, "learning_rate": 1.4855190912169963e-05, "loss": 0.3653, "step": 18240 }, { "epoch": 0.33866398339540255, "grad_norm": 0.44581544399261475, "learning_rate": 1.4854171102598243e-05, "loss": 0.3465, "step": 18242 }, { "epoch": 0.3387011135328212, "grad_norm": 0.32312047481536865, "learning_rate": 1.4853151226975793e-05, "loss": 0.2754, "step": 18244 }, { "epoch": 0.3387382436702398, "grad_norm": 0.41122275590896606, "learning_rate": 1.4852131285316488e-05, "loss": 0.289, "step": 18246 }, { "epoch": 0.3387753738076585, "grad_norm": 0.42758405208587646, "learning_rate": 1.4851111277634216e-05, "loss": 0.3875, "step": 18248 }, { "epoch": 0.3388125039450771, "grad_norm": 0.5334575772285461, "learning_rate": 1.4850091203942847e-05, "loss": 0.2982, "step": 18250 }, { "epoch": 0.33884963408249574, "grad_norm": 0.41673538088798523, "learning_rate": 1.4849071064256264e-05, "loss": 0.3638, "step": 18252 }, { "epoch": 0.33888676421991437, "grad_norm": 0.3653479218482971, "learning_rate": 1.484805085858835e-05, "loss": 0.4835, "step": 18254 }, { "epoch": 0.338923894357333, "grad_norm": 0.3188316822052002, "learning_rate": 1.4847030586952986e-05, "loss": 0.2774, "step": 18256 }, { "epoch": 0.33896102449475163, "grad_norm": 0.5281440019607544, "learning_rate": 1.484601024936405e-05, "loss": 0.261, "step": 18258 }, { "epoch": 0.3389981546321703, "grad_norm": 0.35306859016418457, "learning_rate": 1.4844989845835433e-05, "loss": 0.2948, "step": 18260 }, { "epoch": 0.33903528476958894, "grad_norm": 0.25086507201194763, "learning_rate": 1.4843969376381017e-05, "loss": 0.4044, "step": 18262 }, { "epoch": 0.33907241490700757, "grad_norm": 0.3014141917228699, "learning_rate": 1.4842948841014688e-05, "loss": 0.2049, "step": 18264 }, { "epoch": 0.3391095450444262, "grad_norm": 0.37135809659957886, "learning_rate": 1.4841928239750329e-05, "loss": 0.1638, "step": 18266 }, { "epoch": 0.3391466751818448, "grad_norm": 0.35058286786079407, "learning_rate": 1.4840907572601833e-05, "loss": 0.2769, "step": 18268 }, { "epoch": 0.3391838053192635, "grad_norm": 0.38817042112350464, "learning_rate": 1.4839886839583082e-05, "loss": 0.2606, "step": 18270 }, { "epoch": 0.33922093545668214, "grad_norm": 0.29744139313697815, "learning_rate": 1.483886604070797e-05, "loss": 0.274, "step": 18272 }, { "epoch": 0.33925806559410077, "grad_norm": 0.6697937846183777, "learning_rate": 1.4837845175990384e-05, "loss": 0.4953, "step": 18274 }, { "epoch": 0.3392951957315194, "grad_norm": 0.3933962285518646, "learning_rate": 1.483682424544422e-05, "loss": 0.338, "step": 18276 }, { "epoch": 0.339332325868938, "grad_norm": 0.2833634614944458, "learning_rate": 1.483580324908336e-05, "loss": 0.3862, "step": 18278 }, { "epoch": 0.3393694560063567, "grad_norm": 0.44437509775161743, "learning_rate": 1.4834782186921708e-05, "loss": 0.3438, "step": 18280 }, { "epoch": 0.33940658614377534, "grad_norm": 0.38834190368652344, "learning_rate": 1.4833761058973148e-05, "loss": 0.1916, "step": 18282 }, { "epoch": 0.33944371628119396, "grad_norm": 0.3366439640522003, "learning_rate": 1.483273986525158e-05, "loss": 0.178, "step": 18284 }, { "epoch": 0.3394808464186126, "grad_norm": 0.3393609821796417, "learning_rate": 1.48317186057709e-05, "loss": 0.3019, "step": 18286 }, { "epoch": 0.3395179765560312, "grad_norm": 0.2888309061527252, "learning_rate": 1.4830697280545003e-05, "loss": 0.3157, "step": 18288 }, { "epoch": 0.33955510669344985, "grad_norm": 0.3677518963813782, "learning_rate": 1.4829675889587782e-05, "loss": 0.247, "step": 18290 }, { "epoch": 0.33959223683086853, "grad_norm": 0.5376136302947998, "learning_rate": 1.4828654432913144e-05, "loss": 0.3058, "step": 18292 }, { "epoch": 0.33962936696828716, "grad_norm": 0.391912579536438, "learning_rate": 1.4827632910534978e-05, "loss": 0.4533, "step": 18294 }, { "epoch": 0.3396664971057058, "grad_norm": 0.38033872842788696, "learning_rate": 1.4826611322467188e-05, "loss": 0.3379, "step": 18296 }, { "epoch": 0.3397036272431244, "grad_norm": 0.48980510234832764, "learning_rate": 1.4825589668723676e-05, "loss": 0.293, "step": 18298 }, { "epoch": 0.33974075738054305, "grad_norm": 0.5520845651626587, "learning_rate": 1.4824567949318347e-05, "loss": 0.3664, "step": 18300 }, { "epoch": 0.33977788751796173, "grad_norm": 0.5400775074958801, "learning_rate": 1.4823546164265095e-05, "loss": 0.192, "step": 18302 }, { "epoch": 0.33981501765538036, "grad_norm": 0.6307773590087891, "learning_rate": 1.482252431357783e-05, "loss": 0.5288, "step": 18304 }, { "epoch": 0.339852147792799, "grad_norm": 0.35385769605636597, "learning_rate": 1.4821502397270454e-05, "loss": 0.2402, "step": 18306 }, { "epoch": 0.3398892779302176, "grad_norm": 0.32030171155929565, "learning_rate": 1.4820480415356873e-05, "loss": 0.2521, "step": 18308 }, { "epoch": 0.33992640806763624, "grad_norm": 0.4245765209197998, "learning_rate": 1.4819458367850989e-05, "loss": 0.3722, "step": 18310 }, { "epoch": 0.33996353820505487, "grad_norm": 0.6182373762130737, "learning_rate": 1.4818436254766716e-05, "loss": 0.2469, "step": 18312 }, { "epoch": 0.34000066834247356, "grad_norm": 0.3729632794857025, "learning_rate": 1.481741407611796e-05, "loss": 0.3412, "step": 18314 }, { "epoch": 0.3400377984798922, "grad_norm": 0.3928276300430298, "learning_rate": 1.4816391831918623e-05, "loss": 0.3475, "step": 18316 }, { "epoch": 0.3400749286173108, "grad_norm": 0.37950918078422546, "learning_rate": 1.4815369522182626e-05, "loss": 0.2787, "step": 18318 }, { "epoch": 0.34011205875472944, "grad_norm": 0.33199527859687805, "learning_rate": 1.4814347146923868e-05, "loss": 0.2408, "step": 18320 }, { "epoch": 0.34014918889214807, "grad_norm": 0.3416058123111725, "learning_rate": 1.4813324706156269e-05, "loss": 0.3192, "step": 18322 }, { "epoch": 0.34018631902956675, "grad_norm": 0.4705767333507538, "learning_rate": 1.481230219989374e-05, "loss": 0.3766, "step": 18324 }, { "epoch": 0.3402234491669854, "grad_norm": 0.3312683701515198, "learning_rate": 1.4811279628150188e-05, "loss": 0.2684, "step": 18326 }, { "epoch": 0.340260579304404, "grad_norm": 0.41331741213798523, "learning_rate": 1.4810256990939536e-05, "loss": 0.1745, "step": 18328 }, { "epoch": 0.34029770944182264, "grad_norm": 0.5942723155021667, "learning_rate": 1.4809234288275693e-05, "loss": 0.375, "step": 18330 }, { "epoch": 0.34033483957924127, "grad_norm": 0.24649269878864288, "learning_rate": 1.4808211520172576e-05, "loss": 0.3339, "step": 18332 }, { "epoch": 0.3403719697166599, "grad_norm": 0.44161251187324524, "learning_rate": 1.4807188686644101e-05, "loss": 0.1539, "step": 18334 }, { "epoch": 0.3404090998540786, "grad_norm": 0.3539154827594757, "learning_rate": 1.4806165787704188e-05, "loss": 0.1946, "step": 18336 }, { "epoch": 0.3404462299914972, "grad_norm": 0.7322278618812561, "learning_rate": 1.4805142823366757e-05, "loss": 0.3892, "step": 18338 }, { "epoch": 0.34048336012891584, "grad_norm": 0.35283616185188293, "learning_rate": 1.4804119793645724e-05, "loss": 0.266, "step": 18340 }, { "epoch": 0.34052049026633446, "grad_norm": 0.2875336706638336, "learning_rate": 1.480309669855501e-05, "loss": 0.1382, "step": 18342 }, { "epoch": 0.3405576204037531, "grad_norm": 0.4004838466644287, "learning_rate": 1.4802073538108538e-05, "loss": 0.2007, "step": 18344 }, { "epoch": 0.3405947505411718, "grad_norm": 0.36307767033576965, "learning_rate": 1.4801050312320229e-05, "loss": 0.2224, "step": 18346 }, { "epoch": 0.3406318806785904, "grad_norm": 0.4122692048549652, "learning_rate": 1.4800027021204007e-05, "loss": 0.4446, "step": 18348 }, { "epoch": 0.34066901081600903, "grad_norm": 0.27654966711997986, "learning_rate": 1.4799003664773795e-05, "loss": 0.3937, "step": 18350 }, { "epoch": 0.34070614095342766, "grad_norm": 0.573334813117981, "learning_rate": 1.4797980243043512e-05, "loss": 0.5729, "step": 18352 }, { "epoch": 0.3407432710908463, "grad_norm": 0.5605304837226868, "learning_rate": 1.4796956756027094e-05, "loss": 0.302, "step": 18354 }, { "epoch": 0.340780401228265, "grad_norm": 0.27380573749542236, "learning_rate": 1.4795933203738467e-05, "loss": 0.2565, "step": 18356 }, { "epoch": 0.3408175313656836, "grad_norm": 0.40755441784858704, "learning_rate": 1.4794909586191547e-05, "loss": 0.278, "step": 18358 }, { "epoch": 0.34085466150310223, "grad_norm": 0.40119150280952454, "learning_rate": 1.4793885903400276e-05, "loss": 0.1833, "step": 18360 }, { "epoch": 0.34089179164052086, "grad_norm": 0.36560434103012085, "learning_rate": 1.4792862155378575e-05, "loss": 0.38, "step": 18362 }, { "epoch": 0.3409289217779395, "grad_norm": 0.6416745781898499, "learning_rate": 1.4791838342140374e-05, "loss": 0.3211, "step": 18364 }, { "epoch": 0.3409660519153581, "grad_norm": 0.46165522933006287, "learning_rate": 1.4790814463699607e-05, "loss": 0.4203, "step": 18366 }, { "epoch": 0.3410031820527768, "grad_norm": 0.28949829936027527, "learning_rate": 1.4789790520070208e-05, "loss": 0.3578, "step": 18368 }, { "epoch": 0.3410403121901954, "grad_norm": 0.4012550711631775, "learning_rate": 1.4788766511266105e-05, "loss": 0.3998, "step": 18370 }, { "epoch": 0.34107744232761406, "grad_norm": 0.5021101832389832, "learning_rate": 1.4787742437301238e-05, "loss": 0.2776, "step": 18372 }, { "epoch": 0.3411145724650327, "grad_norm": 0.31587058305740356, "learning_rate": 1.4786718298189532e-05, "loss": 0.3816, "step": 18374 }, { "epoch": 0.3411517026024513, "grad_norm": 0.4440610408782959, "learning_rate": 1.478569409394493e-05, "loss": 0.4557, "step": 18376 }, { "epoch": 0.34118883273987, "grad_norm": 0.3337494730949402, "learning_rate": 1.4784669824581366e-05, "loss": 0.3086, "step": 18378 }, { "epoch": 0.3412259628772886, "grad_norm": 0.38442182540893555, "learning_rate": 1.4783645490112779e-05, "loss": 0.3325, "step": 18380 }, { "epoch": 0.34126309301470725, "grad_norm": 0.2954692542552948, "learning_rate": 1.4782621090553105e-05, "loss": 0.2121, "step": 18382 }, { "epoch": 0.3413002231521259, "grad_norm": 0.3860663175582886, "learning_rate": 1.4781596625916282e-05, "loss": 0.2644, "step": 18384 }, { "epoch": 0.3413373532895445, "grad_norm": 0.3253702223300934, "learning_rate": 1.478057209621625e-05, "loss": 0.2706, "step": 18386 }, { "epoch": 0.34137448342696314, "grad_norm": 1.248823642730713, "learning_rate": 1.4779547501466956e-05, "loss": 0.2269, "step": 18388 }, { "epoch": 0.3414116135643818, "grad_norm": 0.6219111680984497, "learning_rate": 1.4778522841682333e-05, "loss": 0.3911, "step": 18390 }, { "epoch": 0.34144874370180045, "grad_norm": 0.47504132986068726, "learning_rate": 1.4777498116876328e-05, "loss": 0.513, "step": 18392 }, { "epoch": 0.3414858738392191, "grad_norm": 0.3708641529083252, "learning_rate": 1.4776473327062886e-05, "loss": 0.2627, "step": 18394 }, { "epoch": 0.3415230039766377, "grad_norm": 0.44879040122032166, "learning_rate": 1.4775448472255946e-05, "loss": 0.3754, "step": 18396 }, { "epoch": 0.34156013411405634, "grad_norm": 0.46180999279022217, "learning_rate": 1.4774423552469457e-05, "loss": 0.3226, "step": 18398 }, { "epoch": 0.341597264251475, "grad_norm": 0.46589720249176025, "learning_rate": 1.4773398567717368e-05, "loss": 0.411, "step": 18400 }, { "epoch": 0.34163439438889365, "grad_norm": 0.6094220876693726, "learning_rate": 1.4772373518013621e-05, "loss": 0.1175, "step": 18402 }, { "epoch": 0.3416715245263123, "grad_norm": 0.39103585481643677, "learning_rate": 1.4771348403372163e-05, "loss": 0.214, "step": 18404 }, { "epoch": 0.3417086546637309, "grad_norm": 0.6076875925064087, "learning_rate": 1.4770323223806948e-05, "loss": 0.5578, "step": 18406 }, { "epoch": 0.34174578480114953, "grad_norm": 0.4555501639842987, "learning_rate": 1.476929797933192e-05, "loss": 0.3345, "step": 18408 }, { "epoch": 0.34178291493856816, "grad_norm": 0.3708654046058655, "learning_rate": 1.4768272669961032e-05, "loss": 0.265, "step": 18410 }, { "epoch": 0.34182004507598684, "grad_norm": 0.3246888816356659, "learning_rate": 1.4767247295708238e-05, "loss": 0.1496, "step": 18412 }, { "epoch": 0.3418571752134055, "grad_norm": 0.4212571382522583, "learning_rate": 1.4766221856587487e-05, "loss": 0.2489, "step": 18414 }, { "epoch": 0.3418943053508241, "grad_norm": 0.2987867593765259, "learning_rate": 1.4765196352612734e-05, "loss": 0.3318, "step": 18416 }, { "epoch": 0.34193143548824273, "grad_norm": 0.30457404255867004, "learning_rate": 1.4764170783797931e-05, "loss": 0.2203, "step": 18418 }, { "epoch": 0.34196856562566136, "grad_norm": 0.35973840951919556, "learning_rate": 1.4763145150157035e-05, "loss": 0.3911, "step": 18420 }, { "epoch": 0.34200569576308004, "grad_norm": 0.27560552954673767, "learning_rate": 1.4762119451703998e-05, "loss": 0.2638, "step": 18422 }, { "epoch": 0.34204282590049867, "grad_norm": 0.3037315309047699, "learning_rate": 1.4761093688452781e-05, "loss": 0.2871, "step": 18424 }, { "epoch": 0.3420799560379173, "grad_norm": 0.3087960183620453, "learning_rate": 1.4760067860417343e-05, "loss": 0.3604, "step": 18426 }, { "epoch": 0.3421170861753359, "grad_norm": 0.4646759331226349, "learning_rate": 1.4759041967611636e-05, "loss": 0.25, "step": 18428 }, { "epoch": 0.34215421631275456, "grad_norm": 0.4942202866077423, "learning_rate": 1.4758016010049621e-05, "loss": 0.2276, "step": 18430 }, { "epoch": 0.34219134645017324, "grad_norm": 0.541326105594635, "learning_rate": 1.4756989987745265e-05, "loss": 0.3129, "step": 18432 }, { "epoch": 0.34222847658759187, "grad_norm": 0.2703700363636017, "learning_rate": 1.4755963900712521e-05, "loss": 0.145, "step": 18434 }, { "epoch": 0.3422656067250105, "grad_norm": 0.315107136964798, "learning_rate": 1.4754937748965354e-05, "loss": 0.4845, "step": 18436 }, { "epoch": 0.3423027368624291, "grad_norm": 0.3075566291809082, "learning_rate": 1.4753911532517728e-05, "loss": 0.307, "step": 18438 }, { "epoch": 0.34233986699984775, "grad_norm": 0.42130833864212036, "learning_rate": 1.4752885251383607e-05, "loss": 0.2895, "step": 18440 }, { "epoch": 0.3423769971372664, "grad_norm": 0.40836820006370544, "learning_rate": 1.475185890557695e-05, "loss": 0.3714, "step": 18442 }, { "epoch": 0.34241412727468507, "grad_norm": 0.4758872389793396, "learning_rate": 1.475083249511173e-05, "loss": 0.5032, "step": 18444 }, { "epoch": 0.3424512574121037, "grad_norm": 0.4957320988178253, "learning_rate": 1.4749806020001911e-05, "loss": 0.2063, "step": 18446 }, { "epoch": 0.3424883875495223, "grad_norm": 0.4166910946369171, "learning_rate": 1.4748779480261456e-05, "loss": 0.4267, "step": 18448 }, { "epoch": 0.34252551768694095, "grad_norm": 0.44949257373809814, "learning_rate": 1.474775287590434e-05, "loss": 0.2754, "step": 18450 }, { "epoch": 0.3425626478243596, "grad_norm": 0.5518895983695984, "learning_rate": 1.4746726206944527e-05, "loss": 0.4205, "step": 18452 }, { "epoch": 0.34259977796177826, "grad_norm": 0.37484726309776306, "learning_rate": 1.4745699473395986e-05, "loss": 0.2487, "step": 18454 }, { "epoch": 0.3426369080991969, "grad_norm": 0.39550405740737915, "learning_rate": 1.4744672675272695e-05, "loss": 0.4005, "step": 18456 }, { "epoch": 0.3426740382366155, "grad_norm": 0.3381112813949585, "learning_rate": 1.4743645812588619e-05, "loss": 0.2921, "step": 18458 }, { "epoch": 0.34271116837403415, "grad_norm": 0.38555794954299927, "learning_rate": 1.4742618885357728e-05, "loss": 0.2701, "step": 18460 }, { "epoch": 0.3427482985114528, "grad_norm": 0.4074064791202545, "learning_rate": 1.4741591893594004e-05, "loss": 0.2891, "step": 18462 }, { "epoch": 0.3427854286488714, "grad_norm": 0.3551812469959259, "learning_rate": 1.4740564837311417e-05, "loss": 0.3131, "step": 18464 }, { "epoch": 0.3428225587862901, "grad_norm": 0.4106914699077606, "learning_rate": 1.4739537716523943e-05, "loss": 0.2862, "step": 18466 }, { "epoch": 0.3428596889237087, "grad_norm": 0.25194311141967773, "learning_rate": 1.4738510531245556e-05, "loss": 0.1716, "step": 18468 }, { "epoch": 0.34289681906112734, "grad_norm": 0.3113246262073517, "learning_rate": 1.4737483281490235e-05, "loss": 0.3441, "step": 18470 }, { "epoch": 0.342933949198546, "grad_norm": 0.5459404587745667, "learning_rate": 1.4736455967271955e-05, "loss": 0.3171, "step": 18472 }, { "epoch": 0.3429710793359646, "grad_norm": 0.3636152446269989, "learning_rate": 1.4735428588604697e-05, "loss": 0.4642, "step": 18474 }, { "epoch": 0.3430082094733833, "grad_norm": 0.5082035660743713, "learning_rate": 1.4734401145502442e-05, "loss": 0.2514, "step": 18476 }, { "epoch": 0.3430453396108019, "grad_norm": 0.38087666034698486, "learning_rate": 1.4733373637979168e-05, "loss": 0.3558, "step": 18478 }, { "epoch": 0.34308246974822054, "grad_norm": 0.4863491356372833, "learning_rate": 1.4732346066048855e-05, "loss": 0.4232, "step": 18480 }, { "epoch": 0.34311959988563917, "grad_norm": 0.30016979575157166, "learning_rate": 1.473131842972549e-05, "loss": 0.4608, "step": 18482 }, { "epoch": 0.3431567300230578, "grad_norm": 0.35095369815826416, "learning_rate": 1.473029072902305e-05, "loss": 0.4489, "step": 18484 }, { "epoch": 0.3431938601604764, "grad_norm": 0.3252183496952057, "learning_rate": 1.4729262963955524e-05, "loss": 0.2471, "step": 18486 }, { "epoch": 0.3432309902978951, "grad_norm": 0.34476903080940247, "learning_rate": 1.4728235134536894e-05, "loss": 0.439, "step": 18488 }, { "epoch": 0.34326812043531374, "grad_norm": 0.3564518392086029, "learning_rate": 1.4727207240781148e-05, "loss": 0.4229, "step": 18490 }, { "epoch": 0.34330525057273237, "grad_norm": 0.365873247385025, "learning_rate": 1.4726179282702268e-05, "loss": 0.2776, "step": 18492 }, { "epoch": 0.343342380710151, "grad_norm": 4.668469429016113, "learning_rate": 1.4725151260314248e-05, "loss": 0.3878, "step": 18494 }, { "epoch": 0.3433795108475696, "grad_norm": 0.5209546685218811, "learning_rate": 1.4724123173631071e-05, "loss": 0.3809, "step": 18496 }, { "epoch": 0.3434166409849883, "grad_norm": 0.3985597491264343, "learning_rate": 1.472309502266673e-05, "loss": 0.3819, "step": 18498 }, { "epoch": 0.34345377112240694, "grad_norm": 0.41030529141426086, "learning_rate": 1.4722066807435212e-05, "loss": 0.2816, "step": 18500 }, { "epoch": 0.34349090125982557, "grad_norm": 0.41002827882766724, "learning_rate": 1.4721038527950507e-05, "loss": 0.3486, "step": 18502 }, { "epoch": 0.3435280313972442, "grad_norm": 0.4368060529232025, "learning_rate": 1.4720010184226607e-05, "loss": 0.4878, "step": 18504 }, { "epoch": 0.3435651615346628, "grad_norm": 0.33190497756004333, "learning_rate": 1.471898177627751e-05, "loss": 0.441, "step": 18506 }, { "epoch": 0.3436022916720815, "grad_norm": 0.37431544065475464, "learning_rate": 1.4717953304117205e-05, "loss": 0.3051, "step": 18508 }, { "epoch": 0.34363942180950013, "grad_norm": 0.24775822460651398, "learning_rate": 1.471692476775969e-05, "loss": 0.2774, "step": 18510 }, { "epoch": 0.34367655194691876, "grad_norm": 0.3449932038784027, "learning_rate": 1.4715896167218956e-05, "loss": 0.3105, "step": 18512 }, { "epoch": 0.3437136820843374, "grad_norm": 0.32338500022888184, "learning_rate": 1.4714867502509001e-05, "loss": 0.3576, "step": 18514 }, { "epoch": 0.343750812221756, "grad_norm": 0.41777700185775757, "learning_rate": 1.471383877364382e-05, "loss": 0.2771, "step": 18516 }, { "epoch": 0.34378794235917465, "grad_norm": 0.9013303518295288, "learning_rate": 1.4712809980637414e-05, "loss": 0.2851, "step": 18518 }, { "epoch": 0.34382507249659333, "grad_norm": 0.5362027883529663, "learning_rate": 1.4711781123503784e-05, "loss": 0.3007, "step": 18520 }, { "epoch": 0.34386220263401196, "grad_norm": 0.32192492485046387, "learning_rate": 1.4710752202256921e-05, "loss": 0.3793, "step": 18522 }, { "epoch": 0.3438993327714306, "grad_norm": 0.377270370721817, "learning_rate": 1.4709723216910833e-05, "loss": 0.1943, "step": 18524 }, { "epoch": 0.3439364629088492, "grad_norm": 0.3293372392654419, "learning_rate": 1.470869416747952e-05, "loss": 0.3127, "step": 18526 }, { "epoch": 0.34397359304626784, "grad_norm": 0.42359501123428345, "learning_rate": 1.4707665053976982e-05, "loss": 0.34, "step": 18528 }, { "epoch": 0.34401072318368653, "grad_norm": 0.38239607214927673, "learning_rate": 1.4706635876417226e-05, "loss": 0.2964, "step": 18530 }, { "epoch": 0.34404785332110516, "grad_norm": 0.3367011547088623, "learning_rate": 1.4705606634814253e-05, "loss": 0.3975, "step": 18532 }, { "epoch": 0.3440849834585238, "grad_norm": 0.2904767394065857, "learning_rate": 1.4704577329182069e-05, "loss": 0.1761, "step": 18534 }, { "epoch": 0.3441221135959424, "grad_norm": 0.38558465242385864, "learning_rate": 1.4703547959534677e-05, "loss": 0.2191, "step": 18536 }, { "epoch": 0.34415924373336104, "grad_norm": 0.34732577204704285, "learning_rate": 1.4702518525886088e-05, "loss": 0.3606, "step": 18538 }, { "epoch": 0.34419637387077967, "grad_norm": 0.45775535702705383, "learning_rate": 1.4701489028250309e-05, "loss": 0.2958, "step": 18540 }, { "epoch": 0.34423350400819835, "grad_norm": 0.48883628845214844, "learning_rate": 1.4700459466641344e-05, "loss": 0.2339, "step": 18542 }, { "epoch": 0.344270634145617, "grad_norm": 0.19950418174266815, "learning_rate": 1.4699429841073204e-05, "loss": 0.2887, "step": 18544 }, { "epoch": 0.3443077642830356, "grad_norm": 0.45074546337127686, "learning_rate": 1.4698400151559903e-05, "loss": 0.3845, "step": 18546 }, { "epoch": 0.34434489442045424, "grad_norm": 0.41176578402519226, "learning_rate": 1.4697370398115449e-05, "loss": 0.2371, "step": 18548 }, { "epoch": 0.34438202455787287, "grad_norm": 0.29774734377861023, "learning_rate": 1.4696340580753854e-05, "loss": 0.2491, "step": 18550 }, { "epoch": 0.34441915469529155, "grad_norm": 0.37725627422332764, "learning_rate": 1.4695310699489134e-05, "loss": 0.338, "step": 18552 }, { "epoch": 0.3444562848327102, "grad_norm": 0.41958385705947876, "learning_rate": 1.4694280754335296e-05, "loss": 0.2931, "step": 18554 }, { "epoch": 0.3444934149701288, "grad_norm": 0.315873920917511, "learning_rate": 1.4693250745306356e-05, "loss": 0.2888, "step": 18556 }, { "epoch": 0.34453054510754744, "grad_norm": 0.42969194054603577, "learning_rate": 1.4692220672416334e-05, "loss": 0.3944, "step": 18558 }, { "epoch": 0.34456767524496607, "grad_norm": 0.22775757312774658, "learning_rate": 1.469119053567924e-05, "loss": 0.1546, "step": 18560 }, { "epoch": 0.3446048053823847, "grad_norm": 0.37798747420310974, "learning_rate": 1.46901603351091e-05, "loss": 0.2355, "step": 18562 }, { "epoch": 0.3446419355198034, "grad_norm": 0.4860764443874359, "learning_rate": 1.4689130070719926e-05, "loss": 0.358, "step": 18564 }, { "epoch": 0.344679065657222, "grad_norm": 0.3808879852294922, "learning_rate": 1.4688099742525732e-05, "loss": 0.3541, "step": 18566 }, { "epoch": 0.34471619579464063, "grad_norm": 0.2747688889503479, "learning_rate": 1.4687069350540545e-05, "loss": 0.2521, "step": 18568 }, { "epoch": 0.34475332593205926, "grad_norm": 0.4016200602054596, "learning_rate": 1.4686038894778387e-05, "loss": 0.2727, "step": 18570 }, { "epoch": 0.3447904560694779, "grad_norm": 0.691413402557373, "learning_rate": 1.4685008375253276e-05, "loss": 0.2286, "step": 18572 }, { "epoch": 0.3448275862068966, "grad_norm": 0.5206535458564758, "learning_rate": 1.4683977791979228e-05, "loss": 0.3718, "step": 18574 }, { "epoch": 0.3448647163443152, "grad_norm": 0.23580624163150787, "learning_rate": 1.4682947144970278e-05, "loss": 0.3449, "step": 18576 }, { "epoch": 0.34490184648173383, "grad_norm": 0.5057315230369568, "learning_rate": 1.4681916434240442e-05, "loss": 0.2601, "step": 18578 }, { "epoch": 0.34493897661915246, "grad_norm": 0.4236955940723419, "learning_rate": 1.4680885659803747e-05, "loss": 0.4124, "step": 18580 }, { "epoch": 0.3449761067565711, "grad_norm": 0.578579306602478, "learning_rate": 1.4679854821674221e-05, "loss": 0.27, "step": 18582 }, { "epoch": 0.34501323689398977, "grad_norm": 0.5346922874450684, "learning_rate": 1.467882391986589e-05, "loss": 0.3457, "step": 18584 }, { "epoch": 0.3450503670314084, "grad_norm": 0.5471673607826233, "learning_rate": 1.4677792954392778e-05, "loss": 0.4815, "step": 18586 }, { "epoch": 0.34508749716882703, "grad_norm": 0.3296218514442444, "learning_rate": 1.4676761925268915e-05, "loss": 0.2711, "step": 18588 }, { "epoch": 0.34512462730624566, "grad_norm": 0.4686400592327118, "learning_rate": 1.467573083250833e-05, "loss": 0.3736, "step": 18590 }, { "epoch": 0.3451617574436643, "grad_norm": 0.38642826676368713, "learning_rate": 1.4674699676125055e-05, "loss": 0.2918, "step": 18592 }, { "epoch": 0.3451988875810829, "grad_norm": 0.40722349286079407, "learning_rate": 1.4673668456133121e-05, "loss": 0.4454, "step": 18594 }, { "epoch": 0.3452360177185016, "grad_norm": 0.20134998857975006, "learning_rate": 1.4672637172546558e-05, "loss": 0.3673, "step": 18596 }, { "epoch": 0.3452731478559202, "grad_norm": 0.3522738516330719, "learning_rate": 1.4671605825379396e-05, "loss": 0.3988, "step": 18598 }, { "epoch": 0.34531027799333885, "grad_norm": 0.3568783104419708, "learning_rate": 1.4670574414645672e-05, "loss": 0.2853, "step": 18600 }, { "epoch": 0.3453474081307575, "grad_norm": 0.46944695711135864, "learning_rate": 1.4669542940359424e-05, "loss": 0.285, "step": 18602 }, { "epoch": 0.3453845382681761, "grad_norm": 0.5164563655853271, "learning_rate": 1.4668511402534684e-05, "loss": 0.2112, "step": 18604 }, { "epoch": 0.3454216684055948, "grad_norm": 0.408833771944046, "learning_rate": 1.4667479801185485e-05, "loss": 0.3871, "step": 18606 }, { "epoch": 0.3454587985430134, "grad_norm": 3.7545087337493896, "learning_rate": 1.4666448136325869e-05, "loss": 0.2925, "step": 18608 }, { "epoch": 0.34549592868043205, "grad_norm": 0.3766627907752991, "learning_rate": 1.4665416407969867e-05, "loss": 0.2477, "step": 18610 }, { "epoch": 0.3455330588178507, "grad_norm": 0.3098503649234772, "learning_rate": 1.4664384616131525e-05, "loss": 0.3116, "step": 18612 }, { "epoch": 0.3455701889552693, "grad_norm": 0.36140838265419006, "learning_rate": 1.4663352760824881e-05, "loss": 0.0721, "step": 18614 }, { "epoch": 0.34560731909268794, "grad_norm": 0.3446345925331116, "learning_rate": 1.4662320842063972e-05, "loss": 0.4233, "step": 18616 }, { "epoch": 0.3456444492301066, "grad_norm": 0.4399997293949127, "learning_rate": 1.4661288859862844e-05, "loss": 0.3954, "step": 18618 }, { "epoch": 0.34568157936752525, "grad_norm": 0.33791542053222656, "learning_rate": 1.4660256814235536e-05, "loss": 0.3203, "step": 18620 }, { "epoch": 0.3457187095049439, "grad_norm": 0.44381722807884216, "learning_rate": 1.4659224705196092e-05, "loss": 0.3669, "step": 18622 }, { "epoch": 0.3457558396423625, "grad_norm": 0.4761871099472046, "learning_rate": 1.4658192532758553e-05, "loss": 0.1948, "step": 18624 }, { "epoch": 0.34579296977978113, "grad_norm": 0.617220401763916, "learning_rate": 1.465716029693697e-05, "loss": 0.293, "step": 18626 }, { "epoch": 0.3458300999171998, "grad_norm": 0.4558509886264801, "learning_rate": 1.4656127997745385e-05, "loss": 0.3223, "step": 18628 }, { "epoch": 0.34586723005461845, "grad_norm": 0.2962754964828491, "learning_rate": 1.4655095635197843e-05, "loss": 0.1827, "step": 18630 }, { "epoch": 0.3459043601920371, "grad_norm": 0.3096504807472229, "learning_rate": 1.4654063209308392e-05, "loss": 0.3243, "step": 18632 }, { "epoch": 0.3459414903294557, "grad_norm": 0.42045292258262634, "learning_rate": 1.4653030720091081e-05, "loss": 0.2591, "step": 18634 }, { "epoch": 0.34597862046687433, "grad_norm": 0.4099344313144684, "learning_rate": 1.4651998167559964e-05, "loss": 0.2409, "step": 18636 }, { "epoch": 0.34601575060429296, "grad_norm": 0.4277016520500183, "learning_rate": 1.4650965551729081e-05, "loss": 0.2968, "step": 18638 }, { "epoch": 0.34605288074171164, "grad_norm": 0.43297651410102844, "learning_rate": 1.4649932872612493e-05, "loss": 0.35, "step": 18640 }, { "epoch": 0.34609001087913027, "grad_norm": 0.38874542713165283, "learning_rate": 1.4648900130224243e-05, "loss": 0.3985, "step": 18642 }, { "epoch": 0.3461271410165489, "grad_norm": 0.43265971541404724, "learning_rate": 1.4647867324578386e-05, "loss": 0.195, "step": 18644 }, { "epoch": 0.34616427115396753, "grad_norm": 0.331837922334671, "learning_rate": 1.4646834455688984e-05, "loss": 0.2876, "step": 18646 }, { "epoch": 0.34620140129138616, "grad_norm": 0.5791233777999878, "learning_rate": 1.4645801523570078e-05, "loss": 0.3325, "step": 18648 }, { "epoch": 0.34623853142880484, "grad_norm": 0.3405318558216095, "learning_rate": 1.464476852823573e-05, "loss": 0.2062, "step": 18650 }, { "epoch": 0.34627566156622347, "grad_norm": 0.26828306913375854, "learning_rate": 1.4643735469699996e-05, "loss": 0.3651, "step": 18652 }, { "epoch": 0.3463127917036421, "grad_norm": 0.24720120429992676, "learning_rate": 1.4642702347976929e-05, "loss": 0.1551, "step": 18654 }, { "epoch": 0.3463499218410607, "grad_norm": 0.4631330072879791, "learning_rate": 1.4641669163080594e-05, "loss": 0.1814, "step": 18656 }, { "epoch": 0.34638705197847935, "grad_norm": 0.2955108880996704, "learning_rate": 1.4640635915025043e-05, "loss": 0.367, "step": 18658 }, { "epoch": 0.34642418211589804, "grad_norm": 0.3483012020587921, "learning_rate": 1.4639602603824336e-05, "loss": 0.1411, "step": 18660 }, { "epoch": 0.34646131225331667, "grad_norm": 0.31474199891090393, "learning_rate": 1.4638569229492534e-05, "loss": 0.2085, "step": 18662 }, { "epoch": 0.3464984423907353, "grad_norm": 0.29732224345207214, "learning_rate": 1.4637535792043703e-05, "loss": 0.202, "step": 18664 }, { "epoch": 0.3465355725281539, "grad_norm": 0.6565135717391968, "learning_rate": 1.4636502291491898e-05, "loss": 0.4816, "step": 18666 }, { "epoch": 0.34657270266557255, "grad_norm": 0.2887673079967499, "learning_rate": 1.4635468727851185e-05, "loss": 0.2504, "step": 18668 }, { "epoch": 0.3466098328029912, "grad_norm": 0.4279642403125763, "learning_rate": 1.463443510113563e-05, "loss": 0.2958, "step": 18670 }, { "epoch": 0.34664696294040986, "grad_norm": 0.3540893793106079, "learning_rate": 1.4633401411359293e-05, "loss": 0.1684, "step": 18672 }, { "epoch": 0.3466840930778285, "grad_norm": 0.46122291684150696, "learning_rate": 1.463236765853624e-05, "loss": 0.1376, "step": 18674 }, { "epoch": 0.3467212232152471, "grad_norm": 0.4325233995914459, "learning_rate": 1.463133384268054e-05, "loss": 0.2256, "step": 18676 }, { "epoch": 0.34675835335266575, "grad_norm": 0.48237791657447815, "learning_rate": 1.4630299963806258e-05, "loss": 0.3086, "step": 18678 }, { "epoch": 0.3467954834900844, "grad_norm": 0.3149224519729614, "learning_rate": 1.4629266021927466e-05, "loss": 0.343, "step": 18680 }, { "epoch": 0.34683261362750306, "grad_norm": 0.35750582814216614, "learning_rate": 1.4628232017058226e-05, "loss": 0.3399, "step": 18682 }, { "epoch": 0.3468697437649217, "grad_norm": 0.670259952545166, "learning_rate": 1.4627197949212615e-05, "loss": 0.2727, "step": 18684 }, { "epoch": 0.3469068739023403, "grad_norm": 0.39398065209388733, "learning_rate": 1.4626163818404697e-05, "loss": 0.2913, "step": 18686 }, { "epoch": 0.34694400403975895, "grad_norm": 0.5637697577476501, "learning_rate": 1.462512962464855e-05, "loss": 0.1917, "step": 18688 }, { "epoch": 0.3469811341771776, "grad_norm": 0.3628010153770447, "learning_rate": 1.462409536795824e-05, "loss": 0.2279, "step": 18690 }, { "epoch": 0.3470182643145962, "grad_norm": 0.41548043489456177, "learning_rate": 1.462306104834784e-05, "loss": 0.5091, "step": 18692 }, { "epoch": 0.3470553944520149, "grad_norm": 0.32653701305389404, "learning_rate": 1.462202666583143e-05, "loss": 0.2071, "step": 18694 }, { "epoch": 0.3470925245894335, "grad_norm": 0.39029526710510254, "learning_rate": 1.4620992220423083e-05, "loss": 0.2316, "step": 18696 }, { "epoch": 0.34712965472685214, "grad_norm": 0.40262460708618164, "learning_rate": 1.461995771213687e-05, "loss": 0.4467, "step": 18698 }, { "epoch": 0.34716678486427077, "grad_norm": 0.6048769354820251, "learning_rate": 1.4618923140986873e-05, "loss": 0.2478, "step": 18700 }, { "epoch": 0.3472039150016894, "grad_norm": 0.29295116662979126, "learning_rate": 1.4617888506987168e-05, "loss": 0.3061, "step": 18702 }, { "epoch": 0.3472410451391081, "grad_norm": 0.35957178473472595, "learning_rate": 1.461685381015183e-05, "loss": 0.1717, "step": 18704 }, { "epoch": 0.3472781752765267, "grad_norm": 0.34165510535240173, "learning_rate": 1.4615819050494941e-05, "loss": 0.3767, "step": 18706 }, { "epoch": 0.34731530541394534, "grad_norm": 0.29587599635124207, "learning_rate": 1.461478422803058e-05, "loss": 0.2332, "step": 18708 }, { "epoch": 0.34735243555136397, "grad_norm": 0.5329194664955139, "learning_rate": 1.4613749342772834e-05, "loss": 0.4454, "step": 18710 }, { "epoch": 0.3473895656887826, "grad_norm": 0.46350687742233276, "learning_rate": 1.4612714394735776e-05, "loss": 0.3278, "step": 18712 }, { "epoch": 0.3474266958262012, "grad_norm": 0.36020445823669434, "learning_rate": 1.4611679383933491e-05, "loss": 0.1883, "step": 18714 }, { "epoch": 0.3474638259636199, "grad_norm": 0.40545451641082764, "learning_rate": 1.4610644310380064e-05, "loss": 0.1758, "step": 18716 }, { "epoch": 0.34750095610103854, "grad_norm": 0.5108841061592102, "learning_rate": 1.4609609174089577e-05, "loss": 0.3112, "step": 18718 }, { "epoch": 0.34753808623845717, "grad_norm": 0.3191016614437103, "learning_rate": 1.460857397507612e-05, "loss": 0.2535, "step": 18720 }, { "epoch": 0.3475752163758758, "grad_norm": 0.3361862897872925, "learning_rate": 1.4607538713353773e-05, "loss": 0.5364, "step": 18722 }, { "epoch": 0.3476123465132944, "grad_norm": 0.4213963747024536, "learning_rate": 1.4606503388936624e-05, "loss": 0.3165, "step": 18724 }, { "epoch": 0.3476494766507131, "grad_norm": 0.46971482038497925, "learning_rate": 1.4605468001838761e-05, "loss": 0.2962, "step": 18726 }, { "epoch": 0.34768660678813174, "grad_norm": 0.20069344341754913, "learning_rate": 1.4604432552074279e-05, "loss": 0.2469, "step": 18728 }, { "epoch": 0.34772373692555036, "grad_norm": 0.3534761965274811, "learning_rate": 1.4603397039657256e-05, "loss": 0.3857, "step": 18730 }, { "epoch": 0.347760867062969, "grad_norm": 0.3173171579837799, "learning_rate": 1.4602361464601792e-05, "loss": 0.3651, "step": 18732 }, { "epoch": 0.3477979972003876, "grad_norm": 0.2487792819738388, "learning_rate": 1.4601325826921975e-05, "loss": 0.3731, "step": 18734 }, { "epoch": 0.3478351273378063, "grad_norm": 0.28780779242515564, "learning_rate": 1.460029012663189e-05, "loss": 0.5058, "step": 18736 }, { "epoch": 0.34787225747522493, "grad_norm": 0.44666510820388794, "learning_rate": 1.4599254363745641e-05, "loss": 0.3549, "step": 18738 }, { "epoch": 0.34790938761264356, "grad_norm": 0.3653266429901123, "learning_rate": 1.4598218538277316e-05, "loss": 0.1752, "step": 18740 }, { "epoch": 0.3479465177500622, "grad_norm": 0.3699641227722168, "learning_rate": 1.459718265024101e-05, "loss": 0.1784, "step": 18742 }, { "epoch": 0.3479836478874808, "grad_norm": 0.3862158954143524, "learning_rate": 1.4596146699650815e-05, "loss": 0.5148, "step": 18744 }, { "epoch": 0.34802077802489945, "grad_norm": 0.3720279037952423, "learning_rate": 1.4595110686520835e-05, "loss": 0.2753, "step": 18746 }, { "epoch": 0.34805790816231813, "grad_norm": 0.5562987327575684, "learning_rate": 1.459407461086516e-05, "loss": 0.3424, "step": 18748 }, { "epoch": 0.34809503829973676, "grad_norm": 0.3503073751926422, "learning_rate": 1.459303847269789e-05, "loss": 0.3055, "step": 18750 }, { "epoch": 0.3481321684371554, "grad_norm": 0.3184279501438141, "learning_rate": 1.4592002272033124e-05, "loss": 0.3122, "step": 18752 }, { "epoch": 0.348169298574574, "grad_norm": 0.31101447343826294, "learning_rate": 1.4590966008884964e-05, "loss": 0.2606, "step": 18754 }, { "epoch": 0.34820642871199264, "grad_norm": 0.27976590394973755, "learning_rate": 1.4589929683267506e-05, "loss": 0.2799, "step": 18756 }, { "epoch": 0.3482435588494113, "grad_norm": 0.28993380069732666, "learning_rate": 1.4588893295194852e-05, "loss": 0.084, "step": 18758 }, { "epoch": 0.34828068898682996, "grad_norm": 0.30710121989250183, "learning_rate": 1.458785684468111e-05, "loss": 0.1164, "step": 18760 }, { "epoch": 0.3483178191242486, "grad_norm": 0.36882343888282776, "learning_rate": 1.4586820331740375e-05, "loss": 0.2058, "step": 18762 }, { "epoch": 0.3483549492616672, "grad_norm": 0.3641347587108612, "learning_rate": 1.4585783756386758e-05, "loss": 0.3212, "step": 18764 }, { "epoch": 0.34839207939908584, "grad_norm": 0.20711177587509155, "learning_rate": 1.4584747118634357e-05, "loss": 0.2762, "step": 18766 }, { "epoch": 0.34842920953650447, "grad_norm": 0.2762656509876251, "learning_rate": 1.4583710418497281e-05, "loss": 0.2832, "step": 18768 }, { "epoch": 0.34846633967392315, "grad_norm": 0.38318726420402527, "learning_rate": 1.4582673655989636e-05, "loss": 0.1701, "step": 18770 }, { "epoch": 0.3485034698113418, "grad_norm": 0.24246035516262054, "learning_rate": 1.4581636831125533e-05, "loss": 0.349, "step": 18772 }, { "epoch": 0.3485405999487604, "grad_norm": 0.47852542996406555, "learning_rate": 1.4580599943919072e-05, "loss": 0.2527, "step": 18774 }, { "epoch": 0.34857773008617904, "grad_norm": 0.410382479429245, "learning_rate": 1.4579562994384367e-05, "loss": 0.3348, "step": 18776 }, { "epoch": 0.34861486022359767, "grad_norm": 0.35311540961265564, "learning_rate": 1.4578525982535528e-05, "loss": 0.2678, "step": 18778 }, { "epoch": 0.34865199036101635, "grad_norm": 0.39281854033470154, "learning_rate": 1.4577488908386662e-05, "loss": 0.2425, "step": 18780 }, { "epoch": 0.348689120498435, "grad_norm": 0.35671600699424744, "learning_rate": 1.4576451771951884e-05, "loss": 0.3336, "step": 18782 }, { "epoch": 0.3487262506358536, "grad_norm": 0.4285375773906708, "learning_rate": 1.4575414573245308e-05, "loss": 0.2162, "step": 18784 }, { "epoch": 0.34876338077327224, "grad_norm": 0.31513622403144836, "learning_rate": 1.4574377312281045e-05, "loss": 0.2992, "step": 18786 }, { "epoch": 0.34880051091069086, "grad_norm": 0.3937511444091797, "learning_rate": 1.4573339989073205e-05, "loss": 0.3872, "step": 18788 }, { "epoch": 0.3488376410481095, "grad_norm": 0.3312351703643799, "learning_rate": 1.4572302603635909e-05, "loss": 0.3135, "step": 18790 }, { "epoch": 0.3488747711855282, "grad_norm": 0.26028090715408325, "learning_rate": 1.4571265155983267e-05, "loss": 0.3707, "step": 18792 }, { "epoch": 0.3489119013229468, "grad_norm": 2.04455304145813, "learning_rate": 1.4570227646129404e-05, "loss": 0.4414, "step": 18794 }, { "epoch": 0.34894903146036543, "grad_norm": 0.44057872891426086, "learning_rate": 1.4569190074088429e-05, "loss": 0.302, "step": 18796 }, { "epoch": 0.34898616159778406, "grad_norm": 0.5087441802024841, "learning_rate": 1.4568152439874463e-05, "loss": 0.2661, "step": 18798 }, { "epoch": 0.3490232917352027, "grad_norm": 0.35400882363319397, "learning_rate": 1.4567114743501626e-05, "loss": 0.3644, "step": 18800 }, { "epoch": 0.3490604218726214, "grad_norm": 0.3413498103618622, "learning_rate": 1.4566076984984037e-05, "loss": 0.3869, "step": 18802 }, { "epoch": 0.34909755201004, "grad_norm": 0.46016883850097656, "learning_rate": 1.4565039164335817e-05, "loss": 0.5021, "step": 18804 }, { "epoch": 0.34913468214745863, "grad_norm": 0.3496893346309662, "learning_rate": 1.456400128157109e-05, "loss": 0.624, "step": 18806 }, { "epoch": 0.34917181228487726, "grad_norm": 0.30721890926361084, "learning_rate": 1.4562963336703975e-05, "loss": 0.3317, "step": 18808 }, { "epoch": 0.3492089424222959, "grad_norm": 0.3969864547252655, "learning_rate": 1.4561925329748599e-05, "loss": 0.3714, "step": 18810 }, { "epoch": 0.34924607255971457, "grad_norm": 0.3184185028076172, "learning_rate": 1.4560887260719078e-05, "loss": 0.2846, "step": 18812 }, { "epoch": 0.3492832026971332, "grad_norm": 0.3169567286968231, "learning_rate": 1.4559849129629546e-05, "loss": 0.3262, "step": 18814 }, { "epoch": 0.3493203328345518, "grad_norm": 0.3786376714706421, "learning_rate": 1.4558810936494127e-05, "loss": 0.4997, "step": 18816 }, { "epoch": 0.34935746297197046, "grad_norm": 0.41073447465896606, "learning_rate": 1.4557772681326946e-05, "loss": 0.3819, "step": 18818 }, { "epoch": 0.3493945931093891, "grad_norm": 0.26496219635009766, "learning_rate": 1.4556734364142131e-05, "loss": 0.1437, "step": 18820 }, { "epoch": 0.3494317232468077, "grad_norm": 0.4841495752334595, "learning_rate": 1.455569598495381e-05, "loss": 0.2011, "step": 18822 }, { "epoch": 0.3494688533842264, "grad_norm": 0.26623275876045227, "learning_rate": 1.4554657543776111e-05, "loss": 0.1131, "step": 18824 }, { "epoch": 0.349505983521645, "grad_norm": 0.47117680311203003, "learning_rate": 1.4553619040623168e-05, "loss": 0.1915, "step": 18826 }, { "epoch": 0.34954311365906365, "grad_norm": 0.4823889136314392, "learning_rate": 1.4552580475509108e-05, "loss": 0.3308, "step": 18828 }, { "epoch": 0.3495802437964823, "grad_norm": 0.35075661540031433, "learning_rate": 1.4551541848448065e-05, "loss": 0.304, "step": 18830 }, { "epoch": 0.3496173739339009, "grad_norm": 0.35729700326919556, "learning_rate": 1.455050315945417e-05, "loss": 0.3433, "step": 18832 }, { "epoch": 0.3496545040713196, "grad_norm": 0.4022490382194519, "learning_rate": 1.4549464408541556e-05, "loss": 0.1271, "step": 18834 }, { "epoch": 0.3496916342087382, "grad_norm": 0.29640525579452515, "learning_rate": 1.4548425595724363e-05, "loss": 0.304, "step": 18836 }, { "epoch": 0.34972876434615685, "grad_norm": 0.3635883927345276, "learning_rate": 1.4547386721016719e-05, "loss": 0.4537, "step": 18838 }, { "epoch": 0.3497658944835755, "grad_norm": 0.35302531719207764, "learning_rate": 1.4546347784432762e-05, "loss": 0.2853, "step": 18840 }, { "epoch": 0.3498030246209941, "grad_norm": 0.4265187382698059, "learning_rate": 1.4545308785986629e-05, "loss": 0.2285, "step": 18842 }, { "epoch": 0.34984015475841274, "grad_norm": 0.27550208568573, "learning_rate": 1.454426972569246e-05, "loss": 0.5115, "step": 18844 }, { "epoch": 0.3498772848958314, "grad_norm": 0.37839362025260925, "learning_rate": 1.4543230603564391e-05, "loss": 0.3212, "step": 18846 }, { "epoch": 0.34991441503325005, "grad_norm": 0.334229439496994, "learning_rate": 1.4542191419616564e-05, "loss": 0.2941, "step": 18848 }, { "epoch": 0.3499515451706687, "grad_norm": 0.40643075108528137, "learning_rate": 1.4541152173863115e-05, "loss": 0.0503, "step": 18850 }, { "epoch": 0.3499886753080873, "grad_norm": 0.428376168012619, "learning_rate": 1.4540112866318188e-05, "loss": 0.533, "step": 18852 }, { "epoch": 0.35002580544550593, "grad_norm": 0.3517612814903259, "learning_rate": 1.4539073496995923e-05, "loss": 0.3046, "step": 18854 }, { "epoch": 0.3500629355829246, "grad_norm": 0.36351194977760315, "learning_rate": 1.4538034065910464e-05, "loss": 0.2042, "step": 18856 }, { "epoch": 0.35010006572034325, "grad_norm": 0.30827611684799194, "learning_rate": 1.4536994573075958e-05, "loss": 0.2718, "step": 18858 }, { "epoch": 0.3501371958577619, "grad_norm": 0.5288692116737366, "learning_rate": 1.4535955018506543e-05, "loss": 0.2539, "step": 18860 }, { "epoch": 0.3501743259951805, "grad_norm": 0.25314849615097046, "learning_rate": 1.4534915402216365e-05, "loss": 0.2002, "step": 18862 }, { "epoch": 0.35021145613259913, "grad_norm": 0.29567253589630127, "learning_rate": 1.4533875724219573e-05, "loss": 0.2256, "step": 18864 }, { "epoch": 0.35024858627001776, "grad_norm": 0.43860554695129395, "learning_rate": 1.4532835984530316e-05, "loss": 0.2226, "step": 18866 }, { "epoch": 0.35028571640743644, "grad_norm": 0.2843737006187439, "learning_rate": 1.4531796183162734e-05, "loss": 0.2969, "step": 18868 }, { "epoch": 0.35032284654485507, "grad_norm": 0.34850290417671204, "learning_rate": 1.4530756320130986e-05, "loss": 0.0991, "step": 18870 }, { "epoch": 0.3503599766822737, "grad_norm": 0.5070458650588989, "learning_rate": 1.4529716395449211e-05, "loss": 0.3592, "step": 18872 }, { "epoch": 0.3503971068196923, "grad_norm": 0.32836413383483887, "learning_rate": 1.4528676409131563e-05, "loss": 0.2995, "step": 18874 }, { "epoch": 0.35043423695711096, "grad_norm": 0.5014967322349548, "learning_rate": 1.4527636361192197e-05, "loss": 0.194, "step": 18876 }, { "epoch": 0.35047136709452964, "grad_norm": 0.2500004172325134, "learning_rate": 1.4526596251645262e-05, "loss": 0.3754, "step": 18878 }, { "epoch": 0.35050849723194827, "grad_norm": 0.48153412342071533, "learning_rate": 1.4525556080504911e-05, "loss": 0.3056, "step": 18880 }, { "epoch": 0.3505456273693669, "grad_norm": 0.39096927642822266, "learning_rate": 1.4524515847785296e-05, "loss": 0.4848, "step": 18882 }, { "epoch": 0.3505827575067855, "grad_norm": 0.27435189485549927, "learning_rate": 1.4523475553500572e-05, "loss": 0.1736, "step": 18884 }, { "epoch": 0.35061988764420415, "grad_norm": 0.4454135000705719, "learning_rate": 1.4522435197664897e-05, "loss": 0.387, "step": 18886 }, { "epoch": 0.35065701778162284, "grad_norm": 0.25808340311050415, "learning_rate": 1.4521394780292424e-05, "loss": 0.2177, "step": 18888 }, { "epoch": 0.35069414791904147, "grad_norm": 0.30163413286209106, "learning_rate": 1.4520354301397311e-05, "loss": 0.4891, "step": 18890 }, { "epoch": 0.3507312780564601, "grad_norm": 0.3731898367404938, "learning_rate": 1.4519313760993718e-05, "loss": 0.2501, "step": 18892 }, { "epoch": 0.3507684081938787, "grad_norm": 0.33895304799079895, "learning_rate": 1.4518273159095799e-05, "loss": 0.3835, "step": 18894 }, { "epoch": 0.35080553833129735, "grad_norm": 0.4123552143573761, "learning_rate": 1.4517232495717718e-05, "loss": 0.2735, "step": 18896 }, { "epoch": 0.350842668468716, "grad_norm": 0.3857586681842804, "learning_rate": 1.4516191770873634e-05, "loss": 0.3535, "step": 18898 }, { "epoch": 0.35087979860613466, "grad_norm": 1.3258209228515625, "learning_rate": 1.4515150984577704e-05, "loss": 0.1833, "step": 18900 }, { "epoch": 0.3509169287435533, "grad_norm": 0.4324641823768616, "learning_rate": 1.4514110136844098e-05, "loss": 0.5614, "step": 18902 }, { "epoch": 0.3509540588809719, "grad_norm": 0.57988440990448, "learning_rate": 1.4513069227686971e-05, "loss": 0.2968, "step": 18904 }, { "epoch": 0.35099118901839055, "grad_norm": 0.3326569199562073, "learning_rate": 1.4512028257120493e-05, "loss": 0.4654, "step": 18906 }, { "epoch": 0.3510283191558092, "grad_norm": 0.5209187865257263, "learning_rate": 1.4510987225158821e-05, "loss": 0.4054, "step": 18908 }, { "epoch": 0.35106544929322786, "grad_norm": 0.4271082282066345, "learning_rate": 1.4509946131816128e-05, "loss": 0.2713, "step": 18910 }, { "epoch": 0.3511025794306465, "grad_norm": 0.4181838035583496, "learning_rate": 1.4508904977106578e-05, "loss": 0.3248, "step": 18912 }, { "epoch": 0.3511397095680651, "grad_norm": 0.37019485235214233, "learning_rate": 1.4507863761044333e-05, "loss": 0.2776, "step": 18914 }, { "epoch": 0.35117683970548375, "grad_norm": 0.6449816823005676, "learning_rate": 1.4506822483643568e-05, "loss": 0.2889, "step": 18916 }, { "epoch": 0.3512139698429024, "grad_norm": 0.40885910391807556, "learning_rate": 1.4505781144918449e-05, "loss": 0.4017, "step": 18918 }, { "epoch": 0.351251099980321, "grad_norm": 0.4887562394142151, "learning_rate": 1.4504739744883142e-05, "loss": 0.3366, "step": 18920 }, { "epoch": 0.3512882301177397, "grad_norm": 0.2767049968242645, "learning_rate": 1.4503698283551824e-05, "loss": 0.1118, "step": 18922 }, { "epoch": 0.3513253602551583, "grad_norm": 0.40932509303092957, "learning_rate": 1.4502656760938662e-05, "loss": 0.3261, "step": 18924 }, { "epoch": 0.35136249039257694, "grad_norm": 0.3227868974208832, "learning_rate": 1.4501615177057826e-05, "loss": 0.2119, "step": 18926 }, { "epoch": 0.35139962052999557, "grad_norm": 0.5095918774604797, "learning_rate": 1.4500573531923492e-05, "loss": 0.3011, "step": 18928 }, { "epoch": 0.3514367506674142, "grad_norm": 0.5764751434326172, "learning_rate": 1.4499531825549832e-05, "loss": 0.3115, "step": 18930 }, { "epoch": 0.3514738808048329, "grad_norm": 0.4265660345554352, "learning_rate": 1.4498490057951027e-05, "loss": 0.3725, "step": 18932 }, { "epoch": 0.3515110109422515, "grad_norm": 0.39393746852874756, "learning_rate": 1.449744822914124e-05, "loss": 0.3337, "step": 18934 }, { "epoch": 0.35154814107967014, "grad_norm": 0.3743571639060974, "learning_rate": 1.4496406339134659e-05, "loss": 0.3893, "step": 18936 }, { "epoch": 0.35158527121708877, "grad_norm": 0.3582865297794342, "learning_rate": 1.4495364387945452e-05, "loss": 0.443, "step": 18938 }, { "epoch": 0.3516224013545074, "grad_norm": 0.6780370473861694, "learning_rate": 1.4494322375587802e-05, "loss": 0.2762, "step": 18940 }, { "epoch": 0.351659531491926, "grad_norm": 0.23048317432403564, "learning_rate": 1.4493280302075889e-05, "loss": 0.3221, "step": 18942 }, { "epoch": 0.3516966616293447, "grad_norm": 0.23651336133480072, "learning_rate": 1.449223816742389e-05, "loss": 0.2192, "step": 18944 }, { "epoch": 0.35173379176676334, "grad_norm": 0.22723370790481567, "learning_rate": 1.4491195971645982e-05, "loss": 0.1957, "step": 18946 }, { "epoch": 0.35177092190418197, "grad_norm": 0.3477269411087036, "learning_rate": 1.4490153714756352e-05, "loss": 0.1932, "step": 18948 }, { "epoch": 0.3518080520416006, "grad_norm": 0.3378618657588959, "learning_rate": 1.4489111396769177e-05, "loss": 0.2347, "step": 18950 }, { "epoch": 0.3518451821790192, "grad_norm": 0.28808775544166565, "learning_rate": 1.4488069017698645e-05, "loss": 0.3441, "step": 18952 }, { "epoch": 0.3518823123164379, "grad_norm": 0.3887220323085785, "learning_rate": 1.4487026577558936e-05, "loss": 0.3022, "step": 18954 }, { "epoch": 0.35191944245385653, "grad_norm": 0.31523799896240234, "learning_rate": 1.4485984076364236e-05, "loss": 0.3654, "step": 18956 }, { "epoch": 0.35195657259127516, "grad_norm": 0.47469210624694824, "learning_rate": 1.4484941514128727e-05, "loss": 0.411, "step": 18958 }, { "epoch": 0.3519937027286938, "grad_norm": 0.3684457838535309, "learning_rate": 1.44838988908666e-05, "loss": 0.2563, "step": 18960 }, { "epoch": 0.3520308328661124, "grad_norm": 0.464172899723053, "learning_rate": 1.4482856206592042e-05, "loss": 0.3372, "step": 18962 }, { "epoch": 0.3520679630035311, "grad_norm": 0.4620152711868286, "learning_rate": 1.4481813461319239e-05, "loss": 0.5468, "step": 18964 }, { "epoch": 0.35210509314094973, "grad_norm": 0.37961575388908386, "learning_rate": 1.4480770655062376e-05, "loss": 0.2963, "step": 18966 }, { "epoch": 0.35214222327836836, "grad_norm": 0.31157928705215454, "learning_rate": 1.447972778783565e-05, "loss": 0.2724, "step": 18968 }, { "epoch": 0.352179353415787, "grad_norm": 0.2580147385597229, "learning_rate": 1.447868485965324e-05, "loss": 0.5055, "step": 18970 }, { "epoch": 0.3522164835532056, "grad_norm": 0.294188916683197, "learning_rate": 1.4477641870529349e-05, "loss": 0.2431, "step": 18972 }, { "epoch": 0.35225361369062425, "grad_norm": 0.3997357487678528, "learning_rate": 1.4476598820478165e-05, "loss": 0.3866, "step": 18974 }, { "epoch": 0.35229074382804293, "grad_norm": 0.3046589493751526, "learning_rate": 1.447555570951388e-05, "loss": 0.3742, "step": 18976 }, { "epoch": 0.35232787396546156, "grad_norm": 0.2687056064605713, "learning_rate": 1.4474512537650685e-05, "loss": 0.195, "step": 18978 }, { "epoch": 0.3523650041028802, "grad_norm": 0.25810807943344116, "learning_rate": 1.4473469304902779e-05, "loss": 0.0964, "step": 18980 }, { "epoch": 0.3524021342402988, "grad_norm": 0.3868236541748047, "learning_rate": 1.4472426011284352e-05, "loss": 0.4079, "step": 18982 }, { "epoch": 0.35243926437771744, "grad_norm": 0.3207060694694519, "learning_rate": 1.4471382656809607e-05, "loss": 0.1489, "step": 18984 }, { "epoch": 0.3524763945151361, "grad_norm": 0.6569737195968628, "learning_rate": 1.4470339241492738e-05, "loss": 0.3572, "step": 18986 }, { "epoch": 0.35251352465255475, "grad_norm": 0.29283884167671204, "learning_rate": 1.4469295765347941e-05, "loss": 0.364, "step": 18988 }, { "epoch": 0.3525506547899734, "grad_norm": 0.37402501702308655, "learning_rate": 1.4468252228389417e-05, "loss": 0.1169, "step": 18990 }, { "epoch": 0.352587784927392, "grad_norm": 0.4235284626483917, "learning_rate": 1.4467208630631362e-05, "loss": 0.3852, "step": 18992 }, { "epoch": 0.35262491506481064, "grad_norm": 0.39247334003448486, "learning_rate": 1.4466164972087979e-05, "loss": 0.2522, "step": 18994 }, { "epoch": 0.35266204520222927, "grad_norm": 0.3064703643321991, "learning_rate": 1.4465121252773468e-05, "loss": 0.2032, "step": 18996 }, { "epoch": 0.35269917533964795, "grad_norm": 0.3349137604236603, "learning_rate": 1.4464077472702032e-05, "loss": 0.3044, "step": 18998 }, { "epoch": 0.3527363054770666, "grad_norm": 0.40613552927970886, "learning_rate": 1.4463033631887875e-05, "loss": 0.3493, "step": 19000 }, { "epoch": 0.3527734356144852, "grad_norm": 0.4191027879714966, "learning_rate": 1.4461989730345193e-05, "loss": 0.2418, "step": 19002 }, { "epoch": 0.35281056575190384, "grad_norm": 0.4051223397254944, "learning_rate": 1.44609457680882e-05, "loss": 0.2732, "step": 19004 }, { "epoch": 0.35284769588932247, "grad_norm": 0.5641425251960754, "learning_rate": 1.4459901745131097e-05, "loss": 0.266, "step": 19006 }, { "epoch": 0.35288482602674115, "grad_norm": 0.3093421161174774, "learning_rate": 1.4458857661488092e-05, "loss": 0.2755, "step": 19008 }, { "epoch": 0.3529219561641598, "grad_norm": 0.4044405519962311, "learning_rate": 1.4457813517173387e-05, "loss": 0.2002, "step": 19010 }, { "epoch": 0.3529590863015784, "grad_norm": 0.3390043079853058, "learning_rate": 1.4456769312201195e-05, "loss": 0.2729, "step": 19012 }, { "epoch": 0.35299621643899703, "grad_norm": 0.47962307929992676, "learning_rate": 1.445572504658572e-05, "loss": 0.4253, "step": 19014 }, { "epoch": 0.35303334657641566, "grad_norm": 0.3191542625427246, "learning_rate": 1.4454680720341175e-05, "loss": 0.3936, "step": 19016 }, { "epoch": 0.3530704767138343, "grad_norm": 0.39377671480178833, "learning_rate": 1.4453636333481769e-05, "loss": 0.2508, "step": 19018 }, { "epoch": 0.353107606851253, "grad_norm": 0.3223792016506195, "learning_rate": 1.4452591886021711e-05, "loss": 0.3646, "step": 19020 }, { "epoch": 0.3531447369886716, "grad_norm": 0.32481256127357483, "learning_rate": 1.4451547377975214e-05, "loss": 0.4295, "step": 19022 }, { "epoch": 0.35318186712609023, "grad_norm": 0.44496139883995056, "learning_rate": 1.4450502809356492e-05, "loss": 0.3073, "step": 19024 }, { "epoch": 0.35321899726350886, "grad_norm": 0.5025999546051025, "learning_rate": 1.4449458180179759e-05, "loss": 0.4304, "step": 19026 }, { "epoch": 0.3532561274009275, "grad_norm": 0.39571231603622437, "learning_rate": 1.4448413490459228e-05, "loss": 0.4454, "step": 19028 }, { "epoch": 0.3532932575383462, "grad_norm": 0.3054034113883972, "learning_rate": 1.4447368740209113e-05, "loss": 0.2294, "step": 19030 }, { "epoch": 0.3533303876757648, "grad_norm": 0.3918306529521942, "learning_rate": 1.4446323929443631e-05, "loss": 0.46, "step": 19032 }, { "epoch": 0.35336751781318343, "grad_norm": 0.46022093296051025, "learning_rate": 1.4445279058176996e-05, "loss": 0.3588, "step": 19034 }, { "epoch": 0.35340464795060206, "grad_norm": 0.2909444570541382, "learning_rate": 1.4444234126423431e-05, "loss": 0.361, "step": 19036 }, { "epoch": 0.3534417780880207, "grad_norm": 0.29414787888526917, "learning_rate": 1.444318913419715e-05, "loss": 0.3962, "step": 19038 }, { "epoch": 0.35347890822543937, "grad_norm": 0.5349095463752747, "learning_rate": 1.4442144081512376e-05, "loss": 0.4621, "step": 19040 }, { "epoch": 0.353516038362858, "grad_norm": 0.5301125049591064, "learning_rate": 1.4441098968383325e-05, "loss": 0.4063, "step": 19042 }, { "epoch": 0.3535531685002766, "grad_norm": 0.37551823258399963, "learning_rate": 1.4440053794824222e-05, "loss": 0.468, "step": 19044 }, { "epoch": 0.35359029863769526, "grad_norm": 0.4179868996143341, "learning_rate": 1.4439008560849283e-05, "loss": 0.1533, "step": 19046 }, { "epoch": 0.3536274287751139, "grad_norm": 0.3283635675907135, "learning_rate": 1.4437963266472737e-05, "loss": 0.196, "step": 19048 }, { "epoch": 0.3536645589125325, "grad_norm": 0.28628742694854736, "learning_rate": 1.4436917911708804e-05, "loss": 0.3198, "step": 19050 }, { "epoch": 0.3537016890499512, "grad_norm": 0.38309386372566223, "learning_rate": 1.4435872496571705e-05, "loss": 0.3602, "step": 19052 }, { "epoch": 0.3537388191873698, "grad_norm": 0.34142830967903137, "learning_rate": 1.443482702107567e-05, "loss": 0.1889, "step": 19054 }, { "epoch": 0.35377594932478845, "grad_norm": 0.5507894158363342, "learning_rate": 1.4433781485234923e-05, "loss": 0.2195, "step": 19056 }, { "epoch": 0.3538130794622071, "grad_norm": 0.32069459557533264, "learning_rate": 1.4432735889063692e-05, "loss": 0.366, "step": 19058 }, { "epoch": 0.3538502095996257, "grad_norm": 0.30436572432518005, "learning_rate": 1.4431690232576203e-05, "loss": 0.314, "step": 19060 }, { "epoch": 0.3538873397370444, "grad_norm": 0.5098092555999756, "learning_rate": 1.4430644515786685e-05, "loss": 0.2964, "step": 19062 }, { "epoch": 0.353924469874463, "grad_norm": 0.20647500455379486, "learning_rate": 1.4429598738709362e-05, "loss": 0.2752, "step": 19064 }, { "epoch": 0.35396160001188165, "grad_norm": 0.5044084787368774, "learning_rate": 1.4428552901358472e-05, "loss": 0.2656, "step": 19066 }, { "epoch": 0.3539987301493003, "grad_norm": 0.3877022862434387, "learning_rate": 1.4427507003748242e-05, "loss": 0.0961, "step": 19068 }, { "epoch": 0.3540358602867189, "grad_norm": 0.40637773275375366, "learning_rate": 1.4426461045892904e-05, "loss": 0.2014, "step": 19070 }, { "epoch": 0.35407299042413753, "grad_norm": 0.34401699900627136, "learning_rate": 1.442541502780669e-05, "loss": 0.1265, "step": 19072 }, { "epoch": 0.3541101205615562, "grad_norm": 0.4048682451248169, "learning_rate": 1.4424368949503832e-05, "loss": 0.0969, "step": 19074 }, { "epoch": 0.35414725069897485, "grad_norm": 0.3697626292705536, "learning_rate": 1.4423322810998563e-05, "loss": 0.2951, "step": 19076 }, { "epoch": 0.3541843808363935, "grad_norm": 0.363372266292572, "learning_rate": 1.4422276612305122e-05, "loss": 0.1346, "step": 19078 }, { "epoch": 0.3542215109738121, "grad_norm": 0.31801411509513855, "learning_rate": 1.4421230353437744e-05, "loss": 0.4545, "step": 19080 }, { "epoch": 0.35425864111123073, "grad_norm": 0.4249822497367859, "learning_rate": 1.4420184034410662e-05, "loss": 0.2492, "step": 19082 }, { "epoch": 0.3542957712486494, "grad_norm": 0.30824634432792664, "learning_rate": 1.4419137655238115e-05, "loss": 0.1225, "step": 19084 }, { "epoch": 0.35433290138606804, "grad_norm": 0.4208660423755646, "learning_rate": 1.4418091215934342e-05, "loss": 0.4945, "step": 19086 }, { "epoch": 0.3543700315234867, "grad_norm": 0.6723758578300476, "learning_rate": 1.4417044716513583e-05, "loss": 0.4782, "step": 19088 }, { "epoch": 0.3544071616609053, "grad_norm": 0.44197720289230347, "learning_rate": 1.4415998156990074e-05, "loss": 0.3774, "step": 19090 }, { "epoch": 0.35444429179832393, "grad_norm": 0.28031468391418457, "learning_rate": 1.4414951537378059e-05, "loss": 0.2776, "step": 19092 }, { "epoch": 0.35448142193574256, "grad_norm": 0.3647652268409729, "learning_rate": 1.4413904857691778e-05, "loss": 0.1988, "step": 19094 }, { "epoch": 0.35451855207316124, "grad_norm": 0.29621008038520813, "learning_rate": 1.4412858117945473e-05, "loss": 0.4334, "step": 19096 }, { "epoch": 0.35455568221057987, "grad_norm": 0.29490378499031067, "learning_rate": 1.4411811318153383e-05, "loss": 0.4856, "step": 19098 }, { "epoch": 0.3545928123479985, "grad_norm": 0.3255443871021271, "learning_rate": 1.441076445832976e-05, "loss": 0.2549, "step": 19100 }, { "epoch": 0.3546299424854171, "grad_norm": 0.4909319579601288, "learning_rate": 1.4409717538488845e-05, "loss": 0.256, "step": 19102 }, { "epoch": 0.35466707262283576, "grad_norm": 0.40481331944465637, "learning_rate": 1.440867055864488e-05, "loss": 0.2274, "step": 19104 }, { "epoch": 0.35470420276025444, "grad_norm": 0.46618103981018066, "learning_rate": 1.4407623518812117e-05, "loss": 0.1934, "step": 19106 }, { "epoch": 0.35474133289767307, "grad_norm": 0.38732388615608215, "learning_rate": 1.4406576419004798e-05, "loss": 0.3747, "step": 19108 }, { "epoch": 0.3547784630350917, "grad_norm": 0.4707793593406677, "learning_rate": 1.4405529259237172e-05, "loss": 0.3053, "step": 19110 }, { "epoch": 0.3548155931725103, "grad_norm": 0.4835131764411926, "learning_rate": 1.4404482039523492e-05, "loss": 0.239, "step": 19112 }, { "epoch": 0.35485272330992895, "grad_norm": 0.41828829050064087, "learning_rate": 1.4403434759878005e-05, "loss": 0.2935, "step": 19114 }, { "epoch": 0.35488985344734764, "grad_norm": 0.44106265902519226, "learning_rate": 1.4402387420314959e-05, "loss": 0.2116, "step": 19116 }, { "epoch": 0.35492698358476626, "grad_norm": 0.4473205804824829, "learning_rate": 1.4401340020848608e-05, "loss": 0.3158, "step": 19118 }, { "epoch": 0.3549641137221849, "grad_norm": 0.4244312345981598, "learning_rate": 1.44002925614932e-05, "loss": 0.6686, "step": 19120 }, { "epoch": 0.3550012438596035, "grad_norm": 0.4721492528915405, "learning_rate": 1.4399245042262991e-05, "loss": 0.405, "step": 19122 }, { "epoch": 0.35503837399702215, "grad_norm": 0.4620789885520935, "learning_rate": 1.4398197463172236e-05, "loss": 0.3029, "step": 19124 }, { "epoch": 0.3550755041344408, "grad_norm": 0.30846601724624634, "learning_rate": 1.439714982423519e-05, "loss": 0.3605, "step": 19126 }, { "epoch": 0.35511263427185946, "grad_norm": 0.34849846363067627, "learning_rate": 1.4396102125466103e-05, "loss": 0.2225, "step": 19128 }, { "epoch": 0.3551497644092781, "grad_norm": 0.38298845291137695, "learning_rate": 1.4395054366879232e-05, "loss": 0.3043, "step": 19130 }, { "epoch": 0.3551868945466967, "grad_norm": 0.360188752412796, "learning_rate": 1.4394006548488839e-05, "loss": 0.6272, "step": 19132 }, { "epoch": 0.35522402468411535, "grad_norm": 0.504703164100647, "learning_rate": 1.4392958670309178e-05, "loss": 0.2917, "step": 19134 }, { "epoch": 0.355261154821534, "grad_norm": 0.5333461165428162, "learning_rate": 1.4391910732354508e-05, "loss": 0.4409, "step": 19136 }, { "epoch": 0.35529828495895266, "grad_norm": 0.33476823568344116, "learning_rate": 1.4390862734639086e-05, "loss": 0.3322, "step": 19138 }, { "epoch": 0.3553354150963713, "grad_norm": 0.4086160957813263, "learning_rate": 1.4389814677177177e-05, "loss": 0.4502, "step": 19140 }, { "epoch": 0.3553725452337899, "grad_norm": 0.30259355902671814, "learning_rate": 1.4388766559983036e-05, "loss": 0.2849, "step": 19142 }, { "epoch": 0.35540967537120854, "grad_norm": 0.3576435148715973, "learning_rate": 1.4387718383070932e-05, "loss": 0.3169, "step": 19144 }, { "epoch": 0.3554468055086272, "grad_norm": 0.40742558240890503, "learning_rate": 1.4386670146455122e-05, "loss": 0.2034, "step": 19146 }, { "epoch": 0.3554839356460458, "grad_norm": 0.3722027540206909, "learning_rate": 1.4385621850149872e-05, "loss": 0.1926, "step": 19148 }, { "epoch": 0.3555210657834645, "grad_norm": 0.2623305916786194, "learning_rate": 1.4384573494169442e-05, "loss": 0.4584, "step": 19150 }, { "epoch": 0.3555581959208831, "grad_norm": 0.4020449221134186, "learning_rate": 1.4383525078528101e-05, "loss": 0.3635, "step": 19152 }, { "epoch": 0.35559532605830174, "grad_norm": 0.36772334575653076, "learning_rate": 1.4382476603240113e-05, "loss": 0.3094, "step": 19154 }, { "epoch": 0.35563245619572037, "grad_norm": 0.8524356484413147, "learning_rate": 1.4381428068319752e-05, "loss": 0.2937, "step": 19156 }, { "epoch": 0.355669586333139, "grad_norm": 0.3645341992378235, "learning_rate": 1.4380379473781272e-05, "loss": 0.3304, "step": 19158 }, { "epoch": 0.3557067164705577, "grad_norm": 0.5001469850540161, "learning_rate": 1.437933081963895e-05, "loss": 0.2521, "step": 19160 }, { "epoch": 0.3557438466079763, "grad_norm": 0.3354986011981964, "learning_rate": 1.4378282105907054e-05, "loss": 0.3293, "step": 19162 }, { "epoch": 0.35578097674539494, "grad_norm": 0.4423581659793854, "learning_rate": 1.4377233332599852e-05, "loss": 0.2792, "step": 19164 }, { "epoch": 0.35581810688281357, "grad_norm": 0.2927844822406769, "learning_rate": 1.4376184499731617e-05, "loss": 0.3956, "step": 19166 }, { "epoch": 0.3558552370202322, "grad_norm": 0.4370005130767822, "learning_rate": 1.4375135607316614e-05, "loss": 0.2585, "step": 19168 }, { "epoch": 0.3558923671576508, "grad_norm": 0.7163337469100952, "learning_rate": 1.4374086655369129e-05, "loss": 0.349, "step": 19170 }, { "epoch": 0.3559294972950695, "grad_norm": 0.457614928483963, "learning_rate": 1.4373037643903419e-05, "loss": 0.2962, "step": 19172 }, { "epoch": 0.35596662743248814, "grad_norm": 0.3752543032169342, "learning_rate": 1.4371988572933768e-05, "loss": 0.3875, "step": 19174 }, { "epoch": 0.35600375756990676, "grad_norm": 0.4762692451477051, "learning_rate": 1.437093944247445e-05, "loss": 0.3106, "step": 19176 }, { "epoch": 0.3560408877073254, "grad_norm": 0.3628615438938141, "learning_rate": 1.4369890252539738e-05, "loss": 0.4215, "step": 19178 }, { "epoch": 0.356078017844744, "grad_norm": 0.5029466152191162, "learning_rate": 1.4368841003143908e-05, "loss": 0.1494, "step": 19180 }, { "epoch": 0.3561151479821627, "grad_norm": 0.37303322553634644, "learning_rate": 1.4367791694301238e-05, "loss": 0.3318, "step": 19182 }, { "epoch": 0.35615227811958133, "grad_norm": 0.38239043951034546, "learning_rate": 1.4366742326026004e-05, "loss": 0.2978, "step": 19184 }, { "epoch": 0.35618940825699996, "grad_norm": 0.3466437757015228, "learning_rate": 1.436569289833249e-05, "loss": 0.4416, "step": 19186 }, { "epoch": 0.3562265383944186, "grad_norm": 0.39457646012306213, "learning_rate": 1.4364643411234976e-05, "loss": 0.3742, "step": 19188 }, { "epoch": 0.3562636685318372, "grad_norm": 0.449174702167511, "learning_rate": 1.4363593864747732e-05, "loss": 0.3012, "step": 19190 }, { "epoch": 0.3563007986692559, "grad_norm": 0.4582138955593109, "learning_rate": 1.4362544258885046e-05, "loss": 0.2818, "step": 19192 }, { "epoch": 0.35633792880667453, "grad_norm": 0.348893404006958, "learning_rate": 1.4361494593661205e-05, "loss": 0.3953, "step": 19194 }, { "epoch": 0.35637505894409316, "grad_norm": 0.2620885372161865, "learning_rate": 1.4360444869090484e-05, "loss": 0.177, "step": 19196 }, { "epoch": 0.3564121890815118, "grad_norm": 0.3366757035255432, "learning_rate": 1.4359395085187169e-05, "loss": 0.4193, "step": 19198 }, { "epoch": 0.3564493192189304, "grad_norm": 0.46537330746650696, "learning_rate": 1.4358345241965548e-05, "loss": 0.5712, "step": 19200 }, { "epoch": 0.35648644935634904, "grad_norm": 0.42223677039146423, "learning_rate": 1.4357295339439899e-05, "loss": 0.3058, "step": 19202 }, { "epoch": 0.35652357949376773, "grad_norm": 0.4203982949256897, "learning_rate": 1.4356245377624514e-05, "loss": 0.1256, "step": 19204 }, { "epoch": 0.35656070963118636, "grad_norm": 0.30981892347335815, "learning_rate": 1.4355195356533675e-05, "loss": 0.2917, "step": 19206 }, { "epoch": 0.356597839768605, "grad_norm": 0.29343441128730774, "learning_rate": 1.4354145276181677e-05, "loss": 0.2114, "step": 19208 }, { "epoch": 0.3566349699060236, "grad_norm": 0.6155351400375366, "learning_rate": 1.4353095136582802e-05, "loss": 0.2353, "step": 19210 }, { "epoch": 0.35667210004344224, "grad_norm": 0.3946570158004761, "learning_rate": 1.435204493775134e-05, "loss": 0.1984, "step": 19212 }, { "epoch": 0.3567092301808609, "grad_norm": 0.5555511713027954, "learning_rate": 1.4350994679701584e-05, "loss": 0.4287, "step": 19214 }, { "epoch": 0.35674636031827955, "grad_norm": 1.17799711227417, "learning_rate": 1.434994436244782e-05, "loss": 0.2128, "step": 19216 }, { "epoch": 0.3567834904556982, "grad_norm": 0.29123303294181824, "learning_rate": 1.4348893986004349e-05, "loss": 0.399, "step": 19218 }, { "epoch": 0.3568206205931168, "grad_norm": 0.5263556241989136, "learning_rate": 1.4347843550385454e-05, "loss": 0.1962, "step": 19220 }, { "epoch": 0.35685775073053544, "grad_norm": 0.23480884730815887, "learning_rate": 1.434679305560543e-05, "loss": 0.327, "step": 19222 }, { "epoch": 0.35689488086795407, "grad_norm": 0.32617440819740295, "learning_rate": 1.4345742501678572e-05, "loss": 0.3877, "step": 19224 }, { "epoch": 0.35693201100537275, "grad_norm": 0.35131189227104187, "learning_rate": 1.4344691888619177e-05, "loss": 0.2187, "step": 19226 }, { "epoch": 0.3569691411427914, "grad_norm": 0.5032466650009155, "learning_rate": 1.434364121644154e-05, "loss": 0.3571, "step": 19228 }, { "epoch": 0.35700627128021, "grad_norm": 0.2917448878288269, "learning_rate": 1.4342590485159957e-05, "loss": 0.2341, "step": 19230 }, { "epoch": 0.35704340141762864, "grad_norm": 0.44347652792930603, "learning_rate": 1.4341539694788726e-05, "loss": 0.3934, "step": 19232 }, { "epoch": 0.35708053155504726, "grad_norm": 0.35745173692703247, "learning_rate": 1.4340488845342142e-05, "loss": 0.3543, "step": 19234 }, { "epoch": 0.35711766169246595, "grad_norm": 0.4164411425590515, "learning_rate": 1.4339437936834506e-05, "loss": 0.5095, "step": 19236 }, { "epoch": 0.3571547918298846, "grad_norm": 0.3867277204990387, "learning_rate": 1.433838696928012e-05, "loss": 0.3299, "step": 19238 }, { "epoch": 0.3571919219673032, "grad_norm": 0.9293546676635742, "learning_rate": 1.4337335942693282e-05, "loss": 0.3052, "step": 19240 }, { "epoch": 0.35722905210472183, "grad_norm": 0.3814060389995575, "learning_rate": 1.433628485708829e-05, "loss": 0.32, "step": 19242 }, { "epoch": 0.35726618224214046, "grad_norm": 0.34118515253067017, "learning_rate": 1.4335233712479454e-05, "loss": 0.3605, "step": 19244 }, { "epoch": 0.3573033123795591, "grad_norm": 0.42078039050102234, "learning_rate": 1.4334182508881069e-05, "loss": 0.2718, "step": 19246 }, { "epoch": 0.3573404425169778, "grad_norm": 0.31590035557746887, "learning_rate": 1.4333131246307445e-05, "loss": 0.1826, "step": 19248 }, { "epoch": 0.3573775726543964, "grad_norm": 0.3172628879547119, "learning_rate": 1.4332079924772885e-05, "loss": 0.1794, "step": 19250 }, { "epoch": 0.35741470279181503, "grad_norm": 0.49243366718292236, "learning_rate": 1.4331028544291692e-05, "loss": 0.2191, "step": 19252 }, { "epoch": 0.35745183292923366, "grad_norm": 0.42085498571395874, "learning_rate": 1.4329977104878172e-05, "loss": 0.1693, "step": 19254 }, { "epoch": 0.3574889630666523, "grad_norm": 0.3401266038417816, "learning_rate": 1.4328925606546634e-05, "loss": 0.1716, "step": 19256 }, { "epoch": 0.35752609320407097, "grad_norm": 0.2822279930114746, "learning_rate": 1.4327874049311386e-05, "loss": 0.2452, "step": 19258 }, { "epoch": 0.3575632233414896, "grad_norm": 0.28478866815567017, "learning_rate": 1.4326822433186735e-05, "loss": 0.2706, "step": 19260 }, { "epoch": 0.35760035347890823, "grad_norm": 0.3893972635269165, "learning_rate": 1.4325770758186993e-05, "loss": 0.3115, "step": 19262 }, { "epoch": 0.35763748361632686, "grad_norm": 0.34105637669563293, "learning_rate": 1.4324719024326465e-05, "loss": 0.2059, "step": 19264 }, { "epoch": 0.3576746137537455, "grad_norm": 0.598537802696228, "learning_rate": 1.4323667231619467e-05, "loss": 0.3632, "step": 19266 }, { "epoch": 0.35771174389116417, "grad_norm": 0.4352877140045166, "learning_rate": 1.4322615380080307e-05, "loss": 0.247, "step": 19268 }, { "epoch": 0.3577488740285828, "grad_norm": 0.5066704750061035, "learning_rate": 1.4321563469723302e-05, "loss": 0.323, "step": 19270 }, { "epoch": 0.3577860041660014, "grad_norm": 0.3929022252559662, "learning_rate": 1.4320511500562763e-05, "loss": 0.3433, "step": 19272 }, { "epoch": 0.35782313430342005, "grad_norm": 0.3574424088001251, "learning_rate": 1.4319459472613002e-05, "loss": 0.2943, "step": 19274 }, { "epoch": 0.3578602644408387, "grad_norm": 0.3119262456893921, "learning_rate": 1.4318407385888339e-05, "loss": 0.3586, "step": 19276 }, { "epoch": 0.3578973945782573, "grad_norm": 0.2621961236000061, "learning_rate": 1.4317355240403082e-05, "loss": 0.2365, "step": 19278 }, { "epoch": 0.357934524715676, "grad_norm": 0.45818644762039185, "learning_rate": 1.4316303036171553e-05, "loss": 0.299, "step": 19280 }, { "epoch": 0.3579716548530946, "grad_norm": 0.4423239529132843, "learning_rate": 1.431525077320807e-05, "loss": 0.3084, "step": 19282 }, { "epoch": 0.35800878499051325, "grad_norm": 0.38348856568336487, "learning_rate": 1.431419845152695e-05, "loss": 0.3641, "step": 19284 }, { "epoch": 0.3580459151279319, "grad_norm": 0.21954844892024994, "learning_rate": 1.431314607114251e-05, "loss": 0.3943, "step": 19286 }, { "epoch": 0.3580830452653505, "grad_norm": 0.3987937569618225, "learning_rate": 1.431209363206907e-05, "loss": 0.2648, "step": 19288 }, { "epoch": 0.3581201754027692, "grad_norm": 0.2781100571155548, "learning_rate": 1.4311041134320958e-05, "loss": 0.3875, "step": 19290 }, { "epoch": 0.3581573055401878, "grad_norm": 0.3760882019996643, "learning_rate": 1.4309988577912481e-05, "loss": 0.3152, "step": 19292 }, { "epoch": 0.35819443567760645, "grad_norm": 0.25897979736328125, "learning_rate": 1.4308935962857978e-05, "loss": 0.2846, "step": 19294 }, { "epoch": 0.3582315658150251, "grad_norm": 0.47104737162590027, "learning_rate": 1.4307883289171758e-05, "loss": 0.3588, "step": 19296 }, { "epoch": 0.3582686959524437, "grad_norm": 0.3214458227157593, "learning_rate": 1.430683055686815e-05, "loss": 0.3748, "step": 19298 }, { "epoch": 0.35830582608986233, "grad_norm": 0.5017143487930298, "learning_rate": 1.4305777765961479e-05, "loss": 0.4127, "step": 19300 }, { "epoch": 0.358342956227281, "grad_norm": 0.2939670979976654, "learning_rate": 1.430472491646607e-05, "loss": 0.2129, "step": 19302 }, { "epoch": 0.35838008636469965, "grad_norm": 0.41886237263679504, "learning_rate": 1.4303672008396251e-05, "loss": 0.3259, "step": 19304 }, { "epoch": 0.3584172165021183, "grad_norm": 0.41379138827323914, "learning_rate": 1.4302619041766345e-05, "loss": 0.285, "step": 19306 }, { "epoch": 0.3584543466395369, "grad_norm": 0.29987138509750366, "learning_rate": 1.4301566016590684e-05, "loss": 0.3014, "step": 19308 }, { "epoch": 0.35849147677695553, "grad_norm": 0.5723175406455994, "learning_rate": 1.4300512932883593e-05, "loss": 0.3322, "step": 19310 }, { "epoch": 0.3585286069143742, "grad_norm": 0.36991435289382935, "learning_rate": 1.42994597906594e-05, "loss": 0.3981, "step": 19312 }, { "epoch": 0.35856573705179284, "grad_norm": 0.4879956841468811, "learning_rate": 1.429840658993244e-05, "loss": 0.2317, "step": 19314 }, { "epoch": 0.35860286718921147, "grad_norm": 0.2696889638900757, "learning_rate": 1.4297353330717041e-05, "loss": 0.2703, "step": 19316 }, { "epoch": 0.3586399973266301, "grad_norm": 0.4427299499511719, "learning_rate": 1.4296300013027535e-05, "loss": 0.2024, "step": 19318 }, { "epoch": 0.35867712746404873, "grad_norm": 0.35004928708076477, "learning_rate": 1.4295246636878256e-05, "loss": 0.4206, "step": 19320 }, { "epoch": 0.35871425760146736, "grad_norm": 0.310921311378479, "learning_rate": 1.4294193202283535e-05, "loss": 0.2024, "step": 19322 }, { "epoch": 0.35875138773888604, "grad_norm": 0.4394582509994507, "learning_rate": 1.4293139709257708e-05, "loss": 0.3806, "step": 19324 }, { "epoch": 0.35878851787630467, "grad_norm": 0.31655433773994446, "learning_rate": 1.4292086157815108e-05, "loss": 0.3274, "step": 19326 }, { "epoch": 0.3588256480137233, "grad_norm": 0.427811861038208, "learning_rate": 1.4291032547970073e-05, "loss": 0.3271, "step": 19328 }, { "epoch": 0.3588627781511419, "grad_norm": 0.383688747882843, "learning_rate": 1.4289978879736934e-05, "loss": 0.1742, "step": 19330 }, { "epoch": 0.35889990828856055, "grad_norm": 0.3519056737422943, "learning_rate": 1.4288925153130036e-05, "loss": 0.401, "step": 19332 }, { "epoch": 0.35893703842597924, "grad_norm": 0.4023473560810089, "learning_rate": 1.4287871368163712e-05, "loss": 0.2115, "step": 19334 }, { "epoch": 0.35897416856339787, "grad_norm": 0.4246709644794464, "learning_rate": 1.4286817524852306e-05, "loss": 0.4124, "step": 19336 }, { "epoch": 0.3590112987008165, "grad_norm": 0.42302870750427246, "learning_rate": 1.428576362321015e-05, "loss": 0.3036, "step": 19338 }, { "epoch": 0.3590484288382351, "grad_norm": 0.44748467206954956, "learning_rate": 1.428470966325159e-05, "loss": 0.4582, "step": 19340 }, { "epoch": 0.35908555897565375, "grad_norm": 0.3671167492866516, "learning_rate": 1.4283655644990963e-05, "loss": 0.1495, "step": 19342 }, { "epoch": 0.3591226891130724, "grad_norm": 0.55576092004776, "learning_rate": 1.4282601568442615e-05, "loss": 0.2972, "step": 19344 }, { "epoch": 0.35915981925049106, "grad_norm": 0.3427821695804596, "learning_rate": 1.428154743362089e-05, "loss": 0.2152, "step": 19346 }, { "epoch": 0.3591969493879097, "grad_norm": 0.3411884307861328, "learning_rate": 1.4280493240540129e-05, "loss": 0.3299, "step": 19348 }, { "epoch": 0.3592340795253283, "grad_norm": 0.5980251431465149, "learning_rate": 1.4279438989214676e-05, "loss": 0.2164, "step": 19350 }, { "epoch": 0.35927120966274695, "grad_norm": 0.7066493034362793, "learning_rate": 1.4278384679658876e-05, "loss": 0.4806, "step": 19352 }, { "epoch": 0.3593083398001656, "grad_norm": 0.49866965413093567, "learning_rate": 1.4277330311887077e-05, "loss": 0.3272, "step": 19354 }, { "epoch": 0.35934546993758426, "grad_norm": 0.2560674846172333, "learning_rate": 1.4276275885913622e-05, "loss": 0.3135, "step": 19356 }, { "epoch": 0.3593826000750029, "grad_norm": 0.43878498673439026, "learning_rate": 1.4275221401752864e-05, "loss": 0.1221, "step": 19358 }, { "epoch": 0.3594197302124215, "grad_norm": 0.3382638990879059, "learning_rate": 1.4274166859419146e-05, "loss": 0.3064, "step": 19360 }, { "epoch": 0.35945686034984015, "grad_norm": 0.29951754212379456, "learning_rate": 1.427311225892682e-05, "loss": 0.2599, "step": 19362 }, { "epoch": 0.3594939904872588, "grad_norm": 0.37848109006881714, "learning_rate": 1.4272057600290236e-05, "loss": 0.3596, "step": 19364 }, { "epoch": 0.35953112062467746, "grad_norm": 0.527595043182373, "learning_rate": 1.4271002883523743e-05, "loss": 0.5082, "step": 19366 }, { "epoch": 0.3595682507620961, "grad_norm": 0.47584301233291626, "learning_rate": 1.4269948108641698e-05, "loss": 0.2329, "step": 19368 }, { "epoch": 0.3596053808995147, "grad_norm": 0.27808305621147156, "learning_rate": 1.4268893275658449e-05, "loss": 0.29, "step": 19370 }, { "epoch": 0.35964251103693334, "grad_norm": 0.41396379470825195, "learning_rate": 1.4267838384588343e-05, "loss": 0.14, "step": 19372 }, { "epoch": 0.35967964117435197, "grad_norm": 0.3349596858024597, "learning_rate": 1.4266783435445746e-05, "loss": 0.4614, "step": 19374 }, { "epoch": 0.3597167713117706, "grad_norm": 0.4447016417980194, "learning_rate": 1.4265728428245002e-05, "loss": 0.3802, "step": 19376 }, { "epoch": 0.3597539014491893, "grad_norm": 0.485930860042572, "learning_rate": 1.4264673363000478e-05, "loss": 0.3623, "step": 19378 }, { "epoch": 0.3597910315866079, "grad_norm": 0.6662027835845947, "learning_rate": 1.426361823972652e-05, "loss": 0.211, "step": 19380 }, { "epoch": 0.35982816172402654, "grad_norm": 0.49511489272117615, "learning_rate": 1.426256305843749e-05, "loss": 0.2623, "step": 19382 }, { "epoch": 0.35986529186144517, "grad_norm": 0.29400357604026794, "learning_rate": 1.4261507819147741e-05, "loss": 0.3062, "step": 19384 }, { "epoch": 0.3599024219988638, "grad_norm": 0.38039326667785645, "learning_rate": 1.426045252187164e-05, "loss": 0.3109, "step": 19386 }, { "epoch": 0.3599395521362825, "grad_norm": 0.2881231904029846, "learning_rate": 1.425939716662354e-05, "loss": 0.4163, "step": 19388 }, { "epoch": 0.3599766822737011, "grad_norm": 0.4104432761669159, "learning_rate": 1.4258341753417803e-05, "loss": 0.4175, "step": 19390 }, { "epoch": 0.36001381241111974, "grad_norm": 0.25922685861587524, "learning_rate": 1.425728628226879e-05, "loss": 0.3576, "step": 19392 }, { "epoch": 0.36005094254853837, "grad_norm": 0.27954208850860596, "learning_rate": 1.4256230753190861e-05, "loss": 0.3717, "step": 19394 }, { "epoch": 0.360088072685957, "grad_norm": 0.4536297619342804, "learning_rate": 1.4255175166198383e-05, "loss": 0.2959, "step": 19396 }, { "epoch": 0.3601252028233756, "grad_norm": 0.3709540367126465, "learning_rate": 1.4254119521305714e-05, "loss": 0.272, "step": 19398 }, { "epoch": 0.3601623329607943, "grad_norm": 0.3171406686306, "learning_rate": 1.4253063818527224e-05, "loss": 0.2997, "step": 19400 }, { "epoch": 0.36019946309821294, "grad_norm": 0.2949376404285431, "learning_rate": 1.4252008057877274e-05, "loss": 0.4016, "step": 19402 }, { "epoch": 0.36023659323563156, "grad_norm": 1.0760380029678345, "learning_rate": 1.4250952239370228e-05, "loss": 0.2733, "step": 19404 }, { "epoch": 0.3602737233730502, "grad_norm": 0.350651353597641, "learning_rate": 1.4249896363020455e-05, "loss": 0.3678, "step": 19406 }, { "epoch": 0.3603108535104688, "grad_norm": 0.32489749789237976, "learning_rate": 1.4248840428842325e-05, "loss": 0.3478, "step": 19408 }, { "epoch": 0.3603479836478875, "grad_norm": 0.415242999792099, "learning_rate": 1.4247784436850204e-05, "loss": 0.3702, "step": 19410 }, { "epoch": 0.36038511378530613, "grad_norm": 0.3564092218875885, "learning_rate": 1.424672838705846e-05, "loss": 0.2973, "step": 19412 }, { "epoch": 0.36042224392272476, "grad_norm": 0.33056971430778503, "learning_rate": 1.424567227948146e-05, "loss": 0.3879, "step": 19414 }, { "epoch": 0.3604593740601434, "grad_norm": 0.37101346254348755, "learning_rate": 1.4244616114133581e-05, "loss": 0.2339, "step": 19416 }, { "epoch": 0.360496504197562, "grad_norm": 0.21719877421855927, "learning_rate": 1.4243559891029189e-05, "loss": 0.2379, "step": 19418 }, { "epoch": 0.36053363433498065, "grad_norm": 0.25728940963745117, "learning_rate": 1.4242503610182658e-05, "loss": 0.3683, "step": 19420 }, { "epoch": 0.36057076447239933, "grad_norm": 0.5536287426948547, "learning_rate": 1.4241447271608363e-05, "loss": 0.4299, "step": 19422 }, { "epoch": 0.36060789460981796, "grad_norm": 0.28365758061408997, "learning_rate": 1.424039087532067e-05, "loss": 0.3239, "step": 19424 }, { "epoch": 0.3606450247472366, "grad_norm": 0.39073842763900757, "learning_rate": 1.4239334421333962e-05, "loss": 0.6209, "step": 19426 }, { "epoch": 0.3606821548846552, "grad_norm": 0.3459383249282837, "learning_rate": 1.4238277909662612e-05, "loss": 0.3424, "step": 19428 }, { "epoch": 0.36071928502207384, "grad_norm": 0.8320008516311646, "learning_rate": 1.4237221340320993e-05, "loss": 0.1513, "step": 19430 }, { "epoch": 0.3607564151594925, "grad_norm": 0.3445095717906952, "learning_rate": 1.4236164713323483e-05, "loss": 0.3787, "step": 19432 }, { "epoch": 0.36079354529691116, "grad_norm": 0.4179249703884125, "learning_rate": 1.4235108028684463e-05, "loss": 0.3335, "step": 19434 }, { "epoch": 0.3608306754343298, "grad_norm": 0.3133543133735657, "learning_rate": 1.4234051286418305e-05, "loss": 0.2513, "step": 19436 }, { "epoch": 0.3608678055717484, "grad_norm": 0.2690754234790802, "learning_rate": 1.423299448653939e-05, "loss": 0.2418, "step": 19438 }, { "epoch": 0.36090493570916704, "grad_norm": 0.464995801448822, "learning_rate": 1.4231937629062105e-05, "loss": 0.252, "step": 19440 }, { "epoch": 0.3609420658465857, "grad_norm": 0.3129501938819885, "learning_rate": 1.4230880714000824e-05, "loss": 0.2725, "step": 19442 }, { "epoch": 0.36097919598400435, "grad_norm": 0.3478209972381592, "learning_rate": 1.4229823741369926e-05, "loss": 0.1372, "step": 19444 }, { "epoch": 0.361016326121423, "grad_norm": 0.40127572417259216, "learning_rate": 1.42287667111838e-05, "loss": 0.3705, "step": 19446 }, { "epoch": 0.3610534562588416, "grad_norm": 0.3084023594856262, "learning_rate": 1.4227709623456823e-05, "loss": 0.4258, "step": 19448 }, { "epoch": 0.36109058639626024, "grad_norm": 0.5917215347290039, "learning_rate": 1.422665247820338e-05, "loss": 0.2639, "step": 19450 }, { "epoch": 0.36112771653367887, "grad_norm": 0.32714250683784485, "learning_rate": 1.422559527543786e-05, "loss": 0.3106, "step": 19452 }, { "epoch": 0.36116484667109755, "grad_norm": 0.19050537049770355, "learning_rate": 1.4224538015174647e-05, "loss": 0.1674, "step": 19454 }, { "epoch": 0.3612019768085162, "grad_norm": 0.4742777347564697, "learning_rate": 1.422348069742812e-05, "loss": 0.383, "step": 19456 }, { "epoch": 0.3612391069459348, "grad_norm": 0.3823901116847992, "learning_rate": 1.4222423322212676e-05, "loss": 0.2073, "step": 19458 }, { "epoch": 0.36127623708335344, "grad_norm": 0.4636363387107849, "learning_rate": 1.4221365889542698e-05, "loss": 0.4066, "step": 19460 }, { "epoch": 0.36131336722077206, "grad_norm": 0.3310929536819458, "learning_rate": 1.4220308399432574e-05, "loss": 0.2447, "step": 19462 }, { "epoch": 0.36135049735819075, "grad_norm": 0.34844252467155457, "learning_rate": 1.4219250851896693e-05, "loss": 0.2753, "step": 19464 }, { "epoch": 0.3613876274956094, "grad_norm": 0.3810291290283203, "learning_rate": 1.4218193246949448e-05, "loss": 0.2264, "step": 19466 }, { "epoch": 0.361424757633028, "grad_norm": 0.3767564594745636, "learning_rate": 1.4217135584605225e-05, "loss": 0.1662, "step": 19468 }, { "epoch": 0.36146188777044663, "grad_norm": 0.4006989300251007, "learning_rate": 1.421607786487842e-05, "loss": 0.2757, "step": 19470 }, { "epoch": 0.36149901790786526, "grad_norm": 0.340705543756485, "learning_rate": 1.4215020087783425e-05, "loss": 0.209, "step": 19472 }, { "epoch": 0.3615361480452839, "grad_norm": 0.4438702166080475, "learning_rate": 1.4213962253334633e-05, "loss": 0.2898, "step": 19474 }, { "epoch": 0.3615732781827026, "grad_norm": 0.2826271653175354, "learning_rate": 1.4212904361546432e-05, "loss": 0.1184, "step": 19476 }, { "epoch": 0.3616104083201212, "grad_norm": 0.316656231880188, "learning_rate": 1.4211846412433226e-05, "loss": 0.2498, "step": 19478 }, { "epoch": 0.36164753845753983, "grad_norm": 0.530942976474762, "learning_rate": 1.4210788406009404e-05, "loss": 0.3692, "step": 19480 }, { "epoch": 0.36168466859495846, "grad_norm": 0.404958039522171, "learning_rate": 1.4209730342289366e-05, "loss": 0.3864, "step": 19482 }, { "epoch": 0.3617217987323771, "grad_norm": 0.3390571177005768, "learning_rate": 1.4208672221287507e-05, "loss": 0.2055, "step": 19484 }, { "epoch": 0.36175892886979577, "grad_norm": 0.4394838213920593, "learning_rate": 1.4207614043018228e-05, "loss": 0.4327, "step": 19486 }, { "epoch": 0.3617960590072144, "grad_norm": 0.351423978805542, "learning_rate": 1.4206555807495922e-05, "loss": 0.3163, "step": 19488 }, { "epoch": 0.361833189144633, "grad_norm": 0.457510769367218, "learning_rate": 1.4205497514734993e-05, "loss": 0.2466, "step": 19490 }, { "epoch": 0.36187031928205166, "grad_norm": 0.35586559772491455, "learning_rate": 1.4204439164749837e-05, "loss": 0.2381, "step": 19492 }, { "epoch": 0.3619074494194703, "grad_norm": 0.5308236479759216, "learning_rate": 1.420338075755486e-05, "loss": 0.3012, "step": 19494 }, { "epoch": 0.3619445795568889, "grad_norm": 0.6988393068313599, "learning_rate": 1.4202322293164464e-05, "loss": 0.3331, "step": 19496 }, { "epoch": 0.3619817096943076, "grad_norm": 1.3446675539016724, "learning_rate": 1.4201263771593045e-05, "loss": 0.2747, "step": 19498 }, { "epoch": 0.3620188398317262, "grad_norm": 0.38556936383247375, "learning_rate": 1.420020519285501e-05, "loss": 0.3301, "step": 19500 }, { "epoch": 0.36205596996914485, "grad_norm": 0.35705500841140747, "learning_rate": 1.4199146556964765e-05, "loss": 0.325, "step": 19502 }, { "epoch": 0.3620931001065635, "grad_norm": 0.46387165784835815, "learning_rate": 1.4198087863936714e-05, "loss": 0.1574, "step": 19504 }, { "epoch": 0.3621302302439821, "grad_norm": 0.4282967746257782, "learning_rate": 1.4197029113785264e-05, "loss": 0.196, "step": 19506 }, { "epoch": 0.3621673603814008, "grad_norm": 0.3932381272315979, "learning_rate": 1.4195970306524815e-05, "loss": 0.3732, "step": 19508 }, { "epoch": 0.3622044905188194, "grad_norm": 0.36688801646232605, "learning_rate": 1.4194911442169781e-05, "loss": 0.3949, "step": 19510 }, { "epoch": 0.36224162065623805, "grad_norm": 0.5592859387397766, "learning_rate": 1.4193852520734565e-05, "loss": 0.2494, "step": 19512 }, { "epoch": 0.3622787507936567, "grad_norm": 0.45428985357284546, "learning_rate": 1.4192793542233579e-05, "loss": 0.421, "step": 19514 }, { "epoch": 0.3623158809310753, "grad_norm": 0.4370211362838745, "learning_rate": 1.4191734506681236e-05, "loss": 0.2709, "step": 19516 }, { "epoch": 0.362353011068494, "grad_norm": 0.4268166124820709, "learning_rate": 1.4190675414091937e-05, "loss": 0.1489, "step": 19518 }, { "epoch": 0.3623901412059126, "grad_norm": 0.4415932893753052, "learning_rate": 1.41896162644801e-05, "loss": 0.4014, "step": 19520 }, { "epoch": 0.36242727134333125, "grad_norm": 0.2947825789451599, "learning_rate": 1.4188557057860135e-05, "loss": 0.3449, "step": 19522 }, { "epoch": 0.3624644014807499, "grad_norm": 0.453798770904541, "learning_rate": 1.4187497794246455e-05, "loss": 0.4646, "step": 19524 }, { "epoch": 0.3625015316181685, "grad_norm": 3.982621669769287, "learning_rate": 1.4186438473653473e-05, "loss": 0.3369, "step": 19526 }, { "epoch": 0.36253866175558713, "grad_norm": 0.5682688355445862, "learning_rate": 1.4185379096095602e-05, "loss": 0.1894, "step": 19528 }, { "epoch": 0.3625757918930058, "grad_norm": 0.28124478459358215, "learning_rate": 1.4184319661587258e-05, "loss": 0.4234, "step": 19530 }, { "epoch": 0.36261292203042444, "grad_norm": 0.6364871859550476, "learning_rate": 1.4183260170142857e-05, "loss": 0.2529, "step": 19532 }, { "epoch": 0.3626500521678431, "grad_norm": 0.35344138741493225, "learning_rate": 1.4182200621776816e-05, "loss": 0.4488, "step": 19534 }, { "epoch": 0.3626871823052617, "grad_norm": 0.7011620998382568, "learning_rate": 1.4181141016503552e-05, "loss": 0.435, "step": 19536 }, { "epoch": 0.36272431244268033, "grad_norm": 0.4352754056453705, "learning_rate": 1.4180081354337483e-05, "loss": 0.4362, "step": 19538 }, { "epoch": 0.362761442580099, "grad_norm": 0.570061206817627, "learning_rate": 1.4179021635293027e-05, "loss": 0.4287, "step": 19540 }, { "epoch": 0.36279857271751764, "grad_norm": 0.46416133642196655, "learning_rate": 1.4177961859384604e-05, "loss": 0.2883, "step": 19542 }, { "epoch": 0.36283570285493627, "grad_norm": 0.4613078236579895, "learning_rate": 1.4176902026626635e-05, "loss": 0.2745, "step": 19544 }, { "epoch": 0.3628728329923549, "grad_norm": 0.31681764125823975, "learning_rate": 1.417584213703354e-05, "loss": 0.2747, "step": 19546 }, { "epoch": 0.3629099631297735, "grad_norm": 0.4063815474510193, "learning_rate": 1.4174782190619745e-05, "loss": 0.4011, "step": 19548 }, { "epoch": 0.36294709326719216, "grad_norm": 0.4370083808898926, "learning_rate": 1.4173722187399666e-05, "loss": 0.2299, "step": 19550 }, { "epoch": 0.36298422340461084, "grad_norm": 0.26459628343582153, "learning_rate": 1.4172662127387726e-05, "loss": 0.2999, "step": 19552 }, { "epoch": 0.36302135354202947, "grad_norm": 0.24206113815307617, "learning_rate": 1.417160201059836e-05, "loss": 0.31, "step": 19554 }, { "epoch": 0.3630584836794481, "grad_norm": 0.40039780735969543, "learning_rate": 1.4170541837045981e-05, "loss": 0.4261, "step": 19556 }, { "epoch": 0.3630956138168667, "grad_norm": 0.4162488877773285, "learning_rate": 1.4169481606745023e-05, "loss": 0.3216, "step": 19558 }, { "epoch": 0.36313274395428535, "grad_norm": 0.3435562551021576, "learning_rate": 1.4168421319709912e-05, "loss": 0.3266, "step": 19560 }, { "epoch": 0.36316987409170404, "grad_norm": 0.29202988743782043, "learning_rate": 1.4167360975955067e-05, "loss": 0.1948, "step": 19562 }, { "epoch": 0.36320700422912267, "grad_norm": 0.37673017382621765, "learning_rate": 1.4166300575494922e-05, "loss": 0.4237, "step": 19564 }, { "epoch": 0.3632441343665413, "grad_norm": 0.4063500165939331, "learning_rate": 1.4165240118343908e-05, "loss": 0.252, "step": 19566 }, { "epoch": 0.3632812645039599, "grad_norm": 0.4879564046859741, "learning_rate": 1.416417960451645e-05, "loss": 0.4091, "step": 19568 }, { "epoch": 0.36331839464137855, "grad_norm": 0.4541158974170685, "learning_rate": 1.4163119034026984e-05, "loss": 0.3571, "step": 19570 }, { "epoch": 0.3633555247787972, "grad_norm": 0.3003189265727997, "learning_rate": 1.4162058406889938e-05, "loss": 0.5226, "step": 19572 }, { "epoch": 0.36339265491621586, "grad_norm": 0.3884391784667969, "learning_rate": 1.4160997723119742e-05, "loss": 0.3981, "step": 19574 }, { "epoch": 0.3634297850536345, "grad_norm": 0.37985676527023315, "learning_rate": 1.415993698273083e-05, "loss": 0.2039, "step": 19576 }, { "epoch": 0.3634669151910531, "grad_norm": 0.37474411725997925, "learning_rate": 1.415887618573764e-05, "loss": 0.1386, "step": 19578 }, { "epoch": 0.36350404532847175, "grad_norm": 0.3575778901576996, "learning_rate": 1.4157815332154598e-05, "loss": 0.2139, "step": 19580 }, { "epoch": 0.3635411754658904, "grad_norm": 0.35885781049728394, "learning_rate": 1.4156754421996147e-05, "loss": 0.4081, "step": 19582 }, { "epoch": 0.36357830560330906, "grad_norm": 0.3839130699634552, "learning_rate": 1.4155693455276716e-05, "loss": 0.4175, "step": 19584 }, { "epoch": 0.3636154357407277, "grad_norm": 0.30613404512405396, "learning_rate": 1.4154632432010746e-05, "loss": 0.3266, "step": 19586 }, { "epoch": 0.3636525658781463, "grad_norm": 0.3216108977794647, "learning_rate": 1.4153571352212671e-05, "loss": 0.3034, "step": 19588 }, { "epoch": 0.36368969601556494, "grad_norm": 0.2818622589111328, "learning_rate": 1.4152510215896936e-05, "loss": 0.1936, "step": 19590 }, { "epoch": 0.3637268261529836, "grad_norm": 0.2734121084213257, "learning_rate": 1.4151449023077972e-05, "loss": 0.1365, "step": 19592 }, { "epoch": 0.36376395629040226, "grad_norm": 0.39637458324432373, "learning_rate": 1.4150387773770222e-05, "loss": 0.2345, "step": 19594 }, { "epoch": 0.3638010864278209, "grad_norm": 0.32374125719070435, "learning_rate": 1.4149326467988127e-05, "loss": 0.381, "step": 19596 }, { "epoch": 0.3638382165652395, "grad_norm": 0.3764549791812897, "learning_rate": 1.4148265105746128e-05, "loss": 0.3107, "step": 19598 }, { "epoch": 0.36387534670265814, "grad_norm": 0.4097733199596405, "learning_rate": 1.4147203687058667e-05, "loss": 0.2284, "step": 19600 }, { "epoch": 0.36391247684007677, "grad_norm": 0.3480445146560669, "learning_rate": 1.4146142211940184e-05, "loss": 0.3732, "step": 19602 }, { "epoch": 0.3639496069774954, "grad_norm": 0.32401493191719055, "learning_rate": 1.4145080680405125e-05, "loss": 0.1009, "step": 19604 }, { "epoch": 0.3639867371149141, "grad_norm": 0.3910563886165619, "learning_rate": 1.4144019092467933e-05, "loss": 0.2834, "step": 19606 }, { "epoch": 0.3640238672523327, "grad_norm": 0.3129458725452423, "learning_rate": 1.4142957448143056e-05, "loss": 0.3255, "step": 19608 }, { "epoch": 0.36406099738975134, "grad_norm": 0.4297815263271332, "learning_rate": 1.4141895747444938e-05, "loss": 0.1376, "step": 19610 }, { "epoch": 0.36409812752716997, "grad_norm": 0.38689860701560974, "learning_rate": 1.4140833990388022e-05, "loss": 0.4161, "step": 19612 }, { "epoch": 0.3641352576645886, "grad_norm": 0.4048779010772705, "learning_rate": 1.4139772176986762e-05, "loss": 0.3856, "step": 19614 }, { "epoch": 0.3641723878020073, "grad_norm": 0.28877386450767517, "learning_rate": 1.4138710307255602e-05, "loss": 0.4035, "step": 19616 }, { "epoch": 0.3642095179394259, "grad_norm": 0.31135332584381104, "learning_rate": 1.413764838120899e-05, "loss": 0.4439, "step": 19618 }, { "epoch": 0.36424664807684454, "grad_norm": 0.48642754554748535, "learning_rate": 1.4136586398861379e-05, "loss": 0.2254, "step": 19620 }, { "epoch": 0.36428377821426317, "grad_norm": 0.48098546266555786, "learning_rate": 1.4135524360227218e-05, "loss": 0.2268, "step": 19622 }, { "epoch": 0.3643209083516818, "grad_norm": 0.338257759809494, "learning_rate": 1.413446226532096e-05, "loss": 0.4359, "step": 19624 }, { "epoch": 0.3643580384891004, "grad_norm": 0.4198701083660126, "learning_rate": 1.4133400114157047e-05, "loss": 0.2899, "step": 19626 }, { "epoch": 0.3643951686265191, "grad_norm": 0.32552823424339294, "learning_rate": 1.4132337906749944e-05, "loss": 0.3084, "step": 19628 }, { "epoch": 0.36443229876393773, "grad_norm": 0.41945046186447144, "learning_rate": 1.41312756431141e-05, "loss": 0.2714, "step": 19630 }, { "epoch": 0.36446942890135636, "grad_norm": 0.3319258689880371, "learning_rate": 1.4130213323263971e-05, "loss": 0.3977, "step": 19632 }, { "epoch": 0.364506559038775, "grad_norm": 0.29167088866233826, "learning_rate": 1.4129150947214006e-05, "loss": 0.3782, "step": 19634 }, { "epoch": 0.3645436891761936, "grad_norm": 0.4393881857395172, "learning_rate": 1.4128088514978668e-05, "loss": 0.3559, "step": 19636 }, { "epoch": 0.3645808193136123, "grad_norm": 0.6081888675689697, "learning_rate": 1.4127026026572408e-05, "loss": 0.385, "step": 19638 }, { "epoch": 0.36461794945103093, "grad_norm": 0.5613787174224854, "learning_rate": 1.4125963482009686e-05, "loss": 0.2547, "step": 19640 }, { "epoch": 0.36465507958844956, "grad_norm": 0.37745073437690735, "learning_rate": 1.4124900881304962e-05, "loss": 0.2323, "step": 19642 }, { "epoch": 0.3646922097258682, "grad_norm": 0.3574422597885132, "learning_rate": 1.4123838224472692e-05, "loss": 0.2231, "step": 19644 }, { "epoch": 0.3647293398632868, "grad_norm": 0.3520275950431824, "learning_rate": 1.4122775511527333e-05, "loss": 0.3635, "step": 19646 }, { "epoch": 0.36476647000070545, "grad_norm": 0.2459086775779724, "learning_rate": 1.4121712742483354e-05, "loss": 0.321, "step": 19648 }, { "epoch": 0.36480360013812413, "grad_norm": 0.43133100867271423, "learning_rate": 1.4120649917355205e-05, "loss": 0.2612, "step": 19650 }, { "epoch": 0.36484073027554276, "grad_norm": 0.7307223677635193, "learning_rate": 1.4119587036157354e-05, "loss": 0.3203, "step": 19652 }, { "epoch": 0.3648778604129614, "grad_norm": 0.29405370354652405, "learning_rate": 1.4118524098904267e-05, "loss": 0.28, "step": 19654 }, { "epoch": 0.36491499055038, "grad_norm": 0.3256421387195587, "learning_rate": 1.4117461105610402e-05, "loss": 0.2807, "step": 19656 }, { "epoch": 0.36495212068779864, "grad_norm": 0.35624784231185913, "learning_rate": 1.4116398056290222e-05, "loss": 0.1957, "step": 19658 }, { "epoch": 0.3649892508252173, "grad_norm": 0.2632369101047516, "learning_rate": 1.4115334950958198e-05, "loss": 0.2977, "step": 19660 }, { "epoch": 0.36502638096263595, "grad_norm": 0.24811019003391266, "learning_rate": 1.4114271789628788e-05, "loss": 0.2749, "step": 19662 }, { "epoch": 0.3650635111000546, "grad_norm": 1.0779023170471191, "learning_rate": 1.4113208572316465e-05, "loss": 0.2522, "step": 19664 }, { "epoch": 0.3651006412374732, "grad_norm": 0.32825222611427307, "learning_rate": 1.4112145299035693e-05, "loss": 0.4234, "step": 19666 }, { "epoch": 0.36513777137489184, "grad_norm": 0.38045060634613037, "learning_rate": 1.4111081969800941e-05, "loss": 0.3656, "step": 19668 }, { "epoch": 0.3651749015123105, "grad_norm": 0.36870869994163513, "learning_rate": 1.4110018584626678e-05, "loss": 0.354, "step": 19670 }, { "epoch": 0.36521203164972915, "grad_norm": 0.4029601812362671, "learning_rate": 1.4108955143527372e-05, "loss": 0.3685, "step": 19672 }, { "epoch": 0.3652491617871478, "grad_norm": 0.34298762679100037, "learning_rate": 1.4107891646517497e-05, "loss": 0.1645, "step": 19674 }, { "epoch": 0.3652862919245664, "grad_norm": 0.35761430859565735, "learning_rate": 1.410682809361152e-05, "loss": 0.2785, "step": 19676 }, { "epoch": 0.36532342206198504, "grad_norm": 0.21414943039417267, "learning_rate": 1.4105764484823912e-05, "loss": 0.2213, "step": 19678 }, { "epoch": 0.36536055219940367, "grad_norm": 0.2507987916469574, "learning_rate": 1.410470082016915e-05, "loss": 0.2802, "step": 19680 }, { "epoch": 0.36539768233682235, "grad_norm": 0.3661417067050934, "learning_rate": 1.4103637099661703e-05, "loss": 0.0958, "step": 19682 }, { "epoch": 0.365434812474241, "grad_norm": 0.24103400111198425, "learning_rate": 1.4102573323316046e-05, "loss": 0.2302, "step": 19684 }, { "epoch": 0.3654719426116596, "grad_norm": 0.47021281719207764, "learning_rate": 1.410150949114666e-05, "loss": 0.4059, "step": 19686 }, { "epoch": 0.36550907274907823, "grad_norm": 3.089322805404663, "learning_rate": 1.410044560316801e-05, "loss": 0.4785, "step": 19688 }, { "epoch": 0.36554620288649686, "grad_norm": 0.2955191731452942, "learning_rate": 1.4099381659394579e-05, "loss": 0.3642, "step": 19690 }, { "epoch": 0.36558333302391555, "grad_norm": 0.26204001903533936, "learning_rate": 1.4098317659840846e-05, "loss": 0.249, "step": 19692 }, { "epoch": 0.3656204631613342, "grad_norm": 0.44068658351898193, "learning_rate": 1.4097253604521281e-05, "loss": 0.3962, "step": 19694 }, { "epoch": 0.3656575932987528, "grad_norm": 0.34314167499542236, "learning_rate": 1.4096189493450369e-05, "loss": 0.2091, "step": 19696 }, { "epoch": 0.36569472343617143, "grad_norm": 0.40914028882980347, "learning_rate": 1.409512532664259e-05, "loss": 0.3216, "step": 19698 }, { "epoch": 0.36573185357359006, "grad_norm": 0.3474060297012329, "learning_rate": 1.4094061104112415e-05, "loss": 0.3361, "step": 19700 }, { "epoch": 0.3657689837110087, "grad_norm": 0.4032563865184784, "learning_rate": 1.4092996825874335e-05, "loss": 0.1635, "step": 19702 }, { "epoch": 0.36580611384842737, "grad_norm": 0.40958371758461, "learning_rate": 1.4091932491942828e-05, "loss": 0.1466, "step": 19704 }, { "epoch": 0.365843243985846, "grad_norm": 0.32558050751686096, "learning_rate": 1.4090868102332378e-05, "loss": 0.2585, "step": 19706 }, { "epoch": 0.36588037412326463, "grad_norm": 0.3151330053806305, "learning_rate": 1.4089803657057465e-05, "loss": 0.2442, "step": 19708 }, { "epoch": 0.36591750426068326, "grad_norm": 0.3123070299625397, "learning_rate": 1.4088739156132576e-05, "loss": 0.2547, "step": 19710 }, { "epoch": 0.3659546343981019, "grad_norm": 0.4219985604286194, "learning_rate": 1.4087674599572195e-05, "loss": 0.3546, "step": 19712 }, { "epoch": 0.36599176453552057, "grad_norm": 0.3112752437591553, "learning_rate": 1.4086609987390802e-05, "loss": 0.2208, "step": 19714 }, { "epoch": 0.3660288946729392, "grad_norm": 0.38033539056777954, "learning_rate": 1.4085545319602894e-05, "loss": 0.2005, "step": 19716 }, { "epoch": 0.3660660248103578, "grad_norm": 0.5819393992424011, "learning_rate": 1.408448059622295e-05, "loss": 0.2716, "step": 19718 }, { "epoch": 0.36610315494777645, "grad_norm": 0.41848811507225037, "learning_rate": 1.4083415817265457e-05, "loss": 0.2395, "step": 19720 }, { "epoch": 0.3661402850851951, "grad_norm": 0.3015039563179016, "learning_rate": 1.4082350982744906e-05, "loss": 0.1003, "step": 19722 }, { "epoch": 0.3661774152226137, "grad_norm": 0.3398449420928955, "learning_rate": 1.4081286092675788e-05, "loss": 0.2636, "step": 19724 }, { "epoch": 0.3662145453600324, "grad_norm": 0.49297478795051575, "learning_rate": 1.408022114707259e-05, "loss": 0.426, "step": 19726 }, { "epoch": 0.366251675497451, "grad_norm": 0.4017919898033142, "learning_rate": 1.4079156145949806e-05, "loss": 0.2537, "step": 19728 }, { "epoch": 0.36628880563486965, "grad_norm": 0.5466989874839783, "learning_rate": 1.4078091089321925e-05, "loss": 0.1877, "step": 19730 }, { "epoch": 0.3663259357722883, "grad_norm": 0.275598406791687, "learning_rate": 1.4077025977203438e-05, "loss": 0.2078, "step": 19732 }, { "epoch": 0.3663630659097069, "grad_norm": 0.24837088584899902, "learning_rate": 1.4075960809608839e-05, "loss": 0.2299, "step": 19734 }, { "epoch": 0.3664001960471256, "grad_norm": 0.36476150155067444, "learning_rate": 1.4074895586552626e-05, "loss": 0.3551, "step": 19736 }, { "epoch": 0.3664373261845442, "grad_norm": 0.3732600510120392, "learning_rate": 1.4073830308049286e-05, "loss": 0.2971, "step": 19738 }, { "epoch": 0.36647445632196285, "grad_norm": 0.49440932273864746, "learning_rate": 1.407276497411332e-05, "loss": 0.2615, "step": 19740 }, { "epoch": 0.3665115864593815, "grad_norm": 0.4376753866672516, "learning_rate": 1.4071699584759222e-05, "loss": 0.169, "step": 19742 }, { "epoch": 0.3665487165968001, "grad_norm": 0.4948379695415497, "learning_rate": 1.4070634140001487e-05, "loss": 0.217, "step": 19744 }, { "epoch": 0.3665858467342188, "grad_norm": 0.4794681668281555, "learning_rate": 1.4069568639854613e-05, "loss": 0.4288, "step": 19746 }, { "epoch": 0.3666229768716374, "grad_norm": 0.37141862511634827, "learning_rate": 1.4068503084333105e-05, "loss": 0.2167, "step": 19748 }, { "epoch": 0.36666010700905605, "grad_norm": 0.35490235686302185, "learning_rate": 1.4067437473451453e-05, "loss": 0.3415, "step": 19750 }, { "epoch": 0.3666972371464747, "grad_norm": 4.577044486999512, "learning_rate": 1.4066371807224161e-05, "loss": 0.4908, "step": 19752 }, { "epoch": 0.3667343672838933, "grad_norm": 0.6670210361480713, "learning_rate": 1.4065306085665728e-05, "loss": 0.2843, "step": 19754 }, { "epoch": 0.36677149742131193, "grad_norm": 0.5045584440231323, "learning_rate": 1.4064240308790658e-05, "loss": 0.1779, "step": 19756 }, { "epoch": 0.3668086275587306, "grad_norm": 0.5253371000289917, "learning_rate": 1.4063174476613449e-05, "loss": 0.3026, "step": 19758 }, { "epoch": 0.36684575769614924, "grad_norm": 0.3712799549102783, "learning_rate": 1.4062108589148609e-05, "loss": 0.3911, "step": 19760 }, { "epoch": 0.3668828878335679, "grad_norm": 0.49503302574157715, "learning_rate": 1.4061042646410637e-05, "loss": 0.2465, "step": 19762 }, { "epoch": 0.3669200179709865, "grad_norm": 0.4100792706012726, "learning_rate": 1.4059976648414038e-05, "loss": 0.499, "step": 19764 }, { "epoch": 0.36695714810840513, "grad_norm": 0.566878080368042, "learning_rate": 1.4058910595173315e-05, "loss": 0.2255, "step": 19766 }, { "epoch": 0.3669942782458238, "grad_norm": 0.3531090021133423, "learning_rate": 1.4057844486702983e-05, "loss": 0.4532, "step": 19768 }, { "epoch": 0.36703140838324244, "grad_norm": 0.3961486518383026, "learning_rate": 1.4056778323017541e-05, "loss": 0.3222, "step": 19770 }, { "epoch": 0.36706853852066107, "grad_norm": 0.30226781964302063, "learning_rate": 1.4055712104131494e-05, "loss": 0.3407, "step": 19772 }, { "epoch": 0.3671056686580797, "grad_norm": 0.40176841616630554, "learning_rate": 1.4054645830059356e-05, "loss": 0.2552, "step": 19774 }, { "epoch": 0.3671427987954983, "grad_norm": 0.3255210816860199, "learning_rate": 1.4053579500815633e-05, "loss": 0.38, "step": 19776 }, { "epoch": 0.36717992893291695, "grad_norm": 0.3860858082771301, "learning_rate": 1.4052513116414831e-05, "loss": 0.2448, "step": 19778 }, { "epoch": 0.36721705907033564, "grad_norm": 0.5881112813949585, "learning_rate": 1.405144667687147e-05, "loss": 0.2097, "step": 19780 }, { "epoch": 0.36725418920775427, "grad_norm": 0.5123052000999451, "learning_rate": 1.4050380182200054e-05, "loss": 0.2412, "step": 19782 }, { "epoch": 0.3672913193451729, "grad_norm": 0.41227757930755615, "learning_rate": 1.4049313632415093e-05, "loss": 0.3831, "step": 19784 }, { "epoch": 0.3673284494825915, "grad_norm": 0.39411109685897827, "learning_rate": 1.4048247027531104e-05, "loss": 0.2493, "step": 19786 }, { "epoch": 0.36736557962001015, "grad_norm": 0.3688342273235321, "learning_rate": 1.40471803675626e-05, "loss": 0.3052, "step": 19788 }, { "epoch": 0.36740270975742884, "grad_norm": 0.5016171932220459, "learning_rate": 1.4046113652524094e-05, "loss": 0.3513, "step": 19790 }, { "epoch": 0.36743983989484746, "grad_norm": 0.5210905075073242, "learning_rate": 1.40450468824301e-05, "loss": 0.3102, "step": 19792 }, { "epoch": 0.3674769700322661, "grad_norm": 0.27672454714775085, "learning_rate": 1.4043980057295135e-05, "loss": 0.3127, "step": 19794 }, { "epoch": 0.3675141001696847, "grad_norm": 0.4603653848171234, "learning_rate": 1.4042913177133713e-05, "loss": 0.3356, "step": 19796 }, { "epoch": 0.36755123030710335, "grad_norm": 0.45041635632514954, "learning_rate": 1.4041846241960353e-05, "loss": 0.2722, "step": 19798 }, { "epoch": 0.367588360444522, "grad_norm": 0.2742089629173279, "learning_rate": 1.4040779251789572e-05, "loss": 0.2325, "step": 19800 }, { "epoch": 0.36762549058194066, "grad_norm": 0.4382975697517395, "learning_rate": 1.4039712206635891e-05, "loss": 0.2816, "step": 19802 }, { "epoch": 0.3676626207193593, "grad_norm": 0.530083954334259, "learning_rate": 1.4038645106513823e-05, "loss": 0.2455, "step": 19804 }, { "epoch": 0.3676997508567779, "grad_norm": 0.29423898458480835, "learning_rate": 1.4037577951437898e-05, "loss": 0.2766, "step": 19806 }, { "epoch": 0.36773688099419655, "grad_norm": 0.49707090854644775, "learning_rate": 1.4036510741422627e-05, "loss": 0.2937, "step": 19808 }, { "epoch": 0.3677740111316152, "grad_norm": 0.42107880115509033, "learning_rate": 1.4035443476482535e-05, "loss": 0.3112, "step": 19810 }, { "epoch": 0.36781114126903386, "grad_norm": 0.28626957535743713, "learning_rate": 1.4034376156632148e-05, "loss": 0.2299, "step": 19812 }, { "epoch": 0.3678482714064525, "grad_norm": 0.3611034154891968, "learning_rate": 1.4033308781885987e-05, "loss": 0.2723, "step": 19814 }, { "epoch": 0.3678854015438711, "grad_norm": 0.37940099835395813, "learning_rate": 1.4032241352258568e-05, "loss": 0.2344, "step": 19816 }, { "epoch": 0.36792253168128974, "grad_norm": 0.3809797167778015, "learning_rate": 1.403117386776443e-05, "loss": 0.4395, "step": 19818 }, { "epoch": 0.3679596618187084, "grad_norm": 0.30382221937179565, "learning_rate": 1.4030106328418085e-05, "loss": 0.1861, "step": 19820 }, { "epoch": 0.36799679195612706, "grad_norm": 0.3881332576274872, "learning_rate": 1.4029038734234064e-05, "loss": 0.2843, "step": 19822 }, { "epoch": 0.3680339220935457, "grad_norm": 0.4450070559978485, "learning_rate": 1.40279710852269e-05, "loss": 0.3987, "step": 19824 }, { "epoch": 0.3680710522309643, "grad_norm": 0.3255630433559418, "learning_rate": 1.402690338141111e-05, "loss": 0.3101, "step": 19826 }, { "epoch": 0.36810818236838294, "grad_norm": 0.31240347027778625, "learning_rate": 1.4025835622801225e-05, "loss": 0.2112, "step": 19828 }, { "epoch": 0.36814531250580157, "grad_norm": 0.46936532855033875, "learning_rate": 1.4024767809411779e-05, "loss": 0.2729, "step": 19830 }, { "epoch": 0.3681824426432202, "grad_norm": 0.4618358314037323, "learning_rate": 1.4023699941257299e-05, "loss": 0.3405, "step": 19832 }, { "epoch": 0.3682195727806389, "grad_norm": 0.3514604866504669, "learning_rate": 1.4022632018352315e-05, "loss": 0.1945, "step": 19834 }, { "epoch": 0.3682567029180575, "grad_norm": 0.44015613198280334, "learning_rate": 1.4021564040711357e-05, "loss": 0.275, "step": 19836 }, { "epoch": 0.36829383305547614, "grad_norm": 0.5452552437782288, "learning_rate": 1.4020496008348961e-05, "loss": 0.3467, "step": 19838 }, { "epoch": 0.36833096319289477, "grad_norm": 0.5117089748382568, "learning_rate": 1.4019427921279653e-05, "loss": 0.4378, "step": 19840 }, { "epoch": 0.3683680933303134, "grad_norm": 0.3432047963142395, "learning_rate": 1.4018359779517974e-05, "loss": 0.4241, "step": 19842 }, { "epoch": 0.3684052234677321, "grad_norm": 0.3727293312549591, "learning_rate": 1.4017291583078454e-05, "loss": 0.352, "step": 19844 }, { "epoch": 0.3684423536051507, "grad_norm": 0.3713934123516083, "learning_rate": 1.4016223331975628e-05, "loss": 0.1653, "step": 19846 }, { "epoch": 0.36847948374256934, "grad_norm": 0.3766219913959503, "learning_rate": 1.4015155026224034e-05, "loss": 0.2882, "step": 19848 }, { "epoch": 0.36851661387998796, "grad_norm": 0.35271942615509033, "learning_rate": 1.4014086665838207e-05, "loss": 0.4556, "step": 19850 }, { "epoch": 0.3685537440174066, "grad_norm": 0.2975536286830902, "learning_rate": 1.4013018250832682e-05, "loss": 0.1583, "step": 19852 }, { "epoch": 0.3685908741548252, "grad_norm": 0.26816362142562866, "learning_rate": 1.4011949781222e-05, "loss": 0.3755, "step": 19854 }, { "epoch": 0.3686280042922439, "grad_norm": 0.5107616186141968, "learning_rate": 1.40108812570207e-05, "loss": 0.2508, "step": 19856 }, { "epoch": 0.36866513442966253, "grad_norm": 0.3093423843383789, "learning_rate": 1.4009812678243317e-05, "loss": 0.3939, "step": 19858 }, { "epoch": 0.36870226456708116, "grad_norm": 0.3917011618614197, "learning_rate": 1.4008744044904397e-05, "loss": 0.4131, "step": 19860 }, { "epoch": 0.3687393947044998, "grad_norm": 0.4304105341434479, "learning_rate": 1.4007675357018478e-05, "loss": 0.3959, "step": 19862 }, { "epoch": 0.3687765248419184, "grad_norm": 0.2791980803012848, "learning_rate": 1.4006606614600099e-05, "loss": 0.361, "step": 19864 }, { "epoch": 0.3688136549793371, "grad_norm": 0.362958699464798, "learning_rate": 1.4005537817663807e-05, "loss": 0.1953, "step": 19866 }, { "epoch": 0.36885078511675573, "grad_norm": 0.29998910427093506, "learning_rate": 1.4004468966224142e-05, "loss": 0.268, "step": 19868 }, { "epoch": 0.36888791525417436, "grad_norm": 0.4440038502216339, "learning_rate": 1.4003400060295653e-05, "loss": 0.3163, "step": 19870 }, { "epoch": 0.368925045391593, "grad_norm": 0.583439826965332, "learning_rate": 1.4002331099892877e-05, "loss": 0.2807, "step": 19872 }, { "epoch": 0.3689621755290116, "grad_norm": 0.29887261986732483, "learning_rate": 1.4001262085030363e-05, "loss": 0.1539, "step": 19874 }, { "epoch": 0.36899930566643024, "grad_norm": 0.333649605512619, "learning_rate": 1.4000193015722659e-05, "loss": 0.2771, "step": 19876 }, { "epoch": 0.36903643580384893, "grad_norm": 0.375797301530838, "learning_rate": 1.3999123891984309e-05, "loss": 0.2906, "step": 19878 }, { "epoch": 0.36907356594126756, "grad_norm": 0.30055153369903564, "learning_rate": 1.3998054713829862e-05, "loss": 0.421, "step": 19880 }, { "epoch": 0.3691106960786862, "grad_norm": 0.24014157056808472, "learning_rate": 1.3996985481273869e-05, "loss": 0.431, "step": 19882 }, { "epoch": 0.3691478262161048, "grad_norm": 0.3399409353733063, "learning_rate": 1.3995916194330872e-05, "loss": 0.1793, "step": 19884 }, { "epoch": 0.36918495635352344, "grad_norm": 0.4142495095729828, "learning_rate": 1.3994846853015425e-05, "loss": 0.3761, "step": 19886 }, { "epoch": 0.3692220864909421, "grad_norm": 0.4311040937900543, "learning_rate": 1.399377745734208e-05, "loss": 0.3731, "step": 19888 }, { "epoch": 0.36925921662836075, "grad_norm": 0.4256608486175537, "learning_rate": 1.3992708007325384e-05, "loss": 0.3338, "step": 19890 }, { "epoch": 0.3692963467657794, "grad_norm": 0.28841111063957214, "learning_rate": 1.3991638502979891e-05, "loss": 0.1511, "step": 19892 }, { "epoch": 0.369333476903198, "grad_norm": 0.4195404648780823, "learning_rate": 1.399056894432016e-05, "loss": 0.2248, "step": 19894 }, { "epoch": 0.36937060704061664, "grad_norm": 0.5865377187728882, "learning_rate": 1.3989499331360733e-05, "loss": 0.396, "step": 19896 }, { "epoch": 0.3694077371780353, "grad_norm": 0.7562296390533447, "learning_rate": 1.3988429664116175e-05, "loss": 0.2078, "step": 19898 }, { "epoch": 0.36944486731545395, "grad_norm": 0.3007420301437378, "learning_rate": 1.3987359942601032e-05, "loss": 0.3334, "step": 19900 }, { "epoch": 0.3694819974528726, "grad_norm": 0.5938670635223389, "learning_rate": 1.3986290166829866e-05, "loss": 0.3465, "step": 19902 }, { "epoch": 0.3695191275902912, "grad_norm": 0.31614843010902405, "learning_rate": 1.3985220336817226e-05, "loss": 0.495, "step": 19904 }, { "epoch": 0.36955625772770984, "grad_norm": 0.3564547896385193, "learning_rate": 1.398415045257768e-05, "loss": 0.2107, "step": 19906 }, { "epoch": 0.36959338786512846, "grad_norm": 0.42998653650283813, "learning_rate": 1.398308051412578e-05, "loss": 0.3464, "step": 19908 }, { "epoch": 0.36963051800254715, "grad_norm": 0.5240596532821655, "learning_rate": 1.3982010521476083e-05, "loss": 0.5494, "step": 19910 }, { "epoch": 0.3696676481399658, "grad_norm": 0.35242682695388794, "learning_rate": 1.398094047464315e-05, "loss": 0.2738, "step": 19912 }, { "epoch": 0.3697047782773844, "grad_norm": 0.3632034957408905, "learning_rate": 1.3979870373641543e-05, "loss": 0.3553, "step": 19914 }, { "epoch": 0.36974190841480303, "grad_norm": 0.29925113916397095, "learning_rate": 1.3978800218485819e-05, "loss": 0.2721, "step": 19916 }, { "epoch": 0.36977903855222166, "grad_norm": 0.42727649211883545, "learning_rate": 1.3977730009190547e-05, "loss": 0.2008, "step": 19918 }, { "epoch": 0.36981616868964035, "grad_norm": 0.38272160291671753, "learning_rate": 1.397665974577028e-05, "loss": 0.4324, "step": 19920 }, { "epoch": 0.369853298827059, "grad_norm": 0.37463170289993286, "learning_rate": 1.3975589428239587e-05, "loss": 0.3564, "step": 19922 }, { "epoch": 0.3698904289644776, "grad_norm": 0.34132835268974304, "learning_rate": 1.3974519056613028e-05, "loss": 0.5391, "step": 19924 }, { "epoch": 0.36992755910189623, "grad_norm": 0.49981725215911865, "learning_rate": 1.3973448630905171e-05, "loss": 0.4044, "step": 19926 }, { "epoch": 0.36996468923931486, "grad_norm": 0.32303696870803833, "learning_rate": 1.397237815113058e-05, "loss": 0.1147, "step": 19928 }, { "epoch": 0.3700018193767335, "grad_norm": 0.37099358439445496, "learning_rate": 1.3971307617303823e-05, "loss": 0.3839, "step": 19930 }, { "epoch": 0.37003894951415217, "grad_norm": 0.4158245921134949, "learning_rate": 1.3970237029439462e-05, "loss": 0.4518, "step": 19932 }, { "epoch": 0.3700760796515708, "grad_norm": 0.3985375761985779, "learning_rate": 1.3969166387552067e-05, "loss": 0.1662, "step": 19934 }, { "epoch": 0.37011320978898943, "grad_norm": 0.38583165407180786, "learning_rate": 1.3968095691656207e-05, "loss": 0.2297, "step": 19936 }, { "epoch": 0.37015033992640806, "grad_norm": 0.46508556604385376, "learning_rate": 1.3967024941766451e-05, "loss": 0.3123, "step": 19938 }, { "epoch": 0.3701874700638267, "grad_norm": 0.34358328580856323, "learning_rate": 1.3965954137897368e-05, "loss": 0.4212, "step": 19940 }, { "epoch": 0.37022460020124537, "grad_norm": 0.40049877762794495, "learning_rate": 1.396488328006353e-05, "loss": 0.2406, "step": 19942 }, { "epoch": 0.370261730338664, "grad_norm": 0.6300816535949707, "learning_rate": 1.3963812368279506e-05, "loss": 0.393, "step": 19944 }, { "epoch": 0.3702988604760826, "grad_norm": 0.3964472711086273, "learning_rate": 1.3962741402559867e-05, "loss": 0.2183, "step": 19946 }, { "epoch": 0.37033599061350125, "grad_norm": 0.3769698143005371, "learning_rate": 1.3961670382919187e-05, "loss": 0.1766, "step": 19948 }, { "epoch": 0.3703731207509199, "grad_norm": 0.4179772436618805, "learning_rate": 1.3960599309372042e-05, "loss": 0.3964, "step": 19950 }, { "epoch": 0.3704102508883385, "grad_norm": 0.23776397109031677, "learning_rate": 1.3959528181933005e-05, "loss": 0.1912, "step": 19952 }, { "epoch": 0.3704473810257572, "grad_norm": 0.39710840582847595, "learning_rate": 1.3958457000616646e-05, "loss": 0.2873, "step": 19954 }, { "epoch": 0.3704845111631758, "grad_norm": 0.3445592522621155, "learning_rate": 1.3957385765437544e-05, "loss": 0.3484, "step": 19956 }, { "epoch": 0.37052164130059445, "grad_norm": 0.3431375026702881, "learning_rate": 1.3956314476410278e-05, "loss": 0.403, "step": 19958 }, { "epoch": 0.3705587714380131, "grad_norm": 0.3629648983478546, "learning_rate": 1.395524313354942e-05, "loss": 0.5092, "step": 19960 }, { "epoch": 0.3705959015754317, "grad_norm": 0.4141274094581604, "learning_rate": 1.3954171736869553e-05, "loss": 0.1268, "step": 19962 }, { "epoch": 0.3706330317128504, "grad_norm": 0.22249390184879303, "learning_rate": 1.3953100286385253e-05, "loss": 0.2202, "step": 19964 }, { "epoch": 0.370670161850269, "grad_norm": 0.3651902377605438, "learning_rate": 1.3952028782111099e-05, "loss": 0.2827, "step": 19966 }, { "epoch": 0.37070729198768765, "grad_norm": 0.2630471885204315, "learning_rate": 1.3950957224061668e-05, "loss": 0.2429, "step": 19968 }, { "epoch": 0.3707444221251063, "grad_norm": 0.43547508120536804, "learning_rate": 1.3949885612251546e-05, "loss": 0.1774, "step": 19970 }, { "epoch": 0.3707815522625249, "grad_norm": 0.44554635882377625, "learning_rate": 1.3948813946695311e-05, "loss": 0.2937, "step": 19972 }, { "epoch": 0.3708186823999436, "grad_norm": 0.49214935302734375, "learning_rate": 1.3947742227407545e-05, "loss": 0.2204, "step": 19974 }, { "epoch": 0.3708558125373622, "grad_norm": 0.6007041335105896, "learning_rate": 1.3946670454402838e-05, "loss": 0.2867, "step": 19976 }, { "epoch": 0.37089294267478085, "grad_norm": 0.31248021125793457, "learning_rate": 1.394559862769576e-05, "loss": 0.3555, "step": 19978 }, { "epoch": 0.3709300728121995, "grad_norm": 0.5294929146766663, "learning_rate": 1.3944526747300906e-05, "loss": 0.3467, "step": 19980 }, { "epoch": 0.3709672029496181, "grad_norm": 0.27503687143325806, "learning_rate": 1.3943454813232862e-05, "loss": 0.204, "step": 19982 }, { "epoch": 0.37100433308703673, "grad_norm": 0.2617643475532532, "learning_rate": 1.3942382825506206e-05, "loss": 0.187, "step": 19984 }, { "epoch": 0.3710414632244554, "grad_norm": 0.4061446189880371, "learning_rate": 1.3941310784135529e-05, "loss": 0.2201, "step": 19986 }, { "epoch": 0.37107859336187404, "grad_norm": 0.5333889126777649, "learning_rate": 1.394023868913542e-05, "loss": 0.3019, "step": 19988 }, { "epoch": 0.37111572349929267, "grad_norm": 0.3337761461734772, "learning_rate": 1.393916654052046e-05, "loss": 0.158, "step": 19990 }, { "epoch": 0.3711528536367113, "grad_norm": 0.45581570267677307, "learning_rate": 1.3938094338305249e-05, "loss": 0.2493, "step": 19992 }, { "epoch": 0.37118998377412993, "grad_norm": 0.3279041051864624, "learning_rate": 1.3937022082504364e-05, "loss": 0.4578, "step": 19994 }, { "epoch": 0.3712271139115486, "grad_norm": 0.3062148094177246, "learning_rate": 1.3935949773132405e-05, "loss": 0.3448, "step": 19996 }, { "epoch": 0.37126424404896724, "grad_norm": 0.39469510316848755, "learning_rate": 1.3934877410203958e-05, "loss": 0.357, "step": 19998 }, { "epoch": 0.37130137418638587, "grad_norm": 0.321353018283844, "learning_rate": 1.3933804993733615e-05, "loss": 0.3805, "step": 20000 }, { "epoch": 0.3713385043238045, "grad_norm": 0.35121700167655945, "learning_rate": 1.393273252373597e-05, "loss": 0.2556, "step": 20002 }, { "epoch": 0.3713756344612231, "grad_norm": 0.43175145983695984, "learning_rate": 1.3931660000225615e-05, "loss": 0.2147, "step": 20004 }, { "epoch": 0.37141276459864175, "grad_norm": 0.35854315757751465, "learning_rate": 1.3930587423217144e-05, "loss": 0.2244, "step": 20006 }, { "epoch": 0.37144989473606044, "grad_norm": 0.3761281967163086, "learning_rate": 1.3929514792725155e-05, "loss": 0.2454, "step": 20008 }, { "epoch": 0.37148702487347907, "grad_norm": 0.2423001378774643, "learning_rate": 1.3928442108764233e-05, "loss": 0.4407, "step": 20010 }, { "epoch": 0.3715241550108977, "grad_norm": 0.43786919116973877, "learning_rate": 1.3927369371348985e-05, "loss": 0.3069, "step": 20012 }, { "epoch": 0.3715612851483163, "grad_norm": 0.28782787919044495, "learning_rate": 1.3926296580494003e-05, "loss": 0.2496, "step": 20014 }, { "epoch": 0.37159841528573495, "grad_norm": 0.46189549565315247, "learning_rate": 1.3925223736213888e-05, "loss": 0.2131, "step": 20016 }, { "epoch": 0.37163554542315363, "grad_norm": 0.6770983338356018, "learning_rate": 1.3924150838523232e-05, "loss": 0.4071, "step": 20018 }, { "epoch": 0.37167267556057226, "grad_norm": 0.3633163571357727, "learning_rate": 1.392307788743664e-05, "loss": 0.3694, "step": 20020 }, { "epoch": 0.3717098056979909, "grad_norm": 0.3335234522819519, "learning_rate": 1.3922004882968705e-05, "loss": 0.2469, "step": 20022 }, { "epoch": 0.3717469358354095, "grad_norm": 0.3177517056465149, "learning_rate": 1.3920931825134034e-05, "loss": 0.3049, "step": 20024 }, { "epoch": 0.37178406597282815, "grad_norm": 0.3574046492576599, "learning_rate": 1.3919858713947228e-05, "loss": 0.235, "step": 20026 }, { "epoch": 0.3718211961102468, "grad_norm": 0.3220933973789215, "learning_rate": 1.391878554942288e-05, "loss": 0.2809, "step": 20028 }, { "epoch": 0.37185832624766546, "grad_norm": 0.5346874594688416, "learning_rate": 1.3917712331575601e-05, "loss": 0.2479, "step": 20030 }, { "epoch": 0.3718954563850841, "grad_norm": 0.37695932388305664, "learning_rate": 1.3916639060419993e-05, "loss": 0.3018, "step": 20032 }, { "epoch": 0.3719325865225027, "grad_norm": 0.5514830946922302, "learning_rate": 1.391556573597066e-05, "loss": 0.4107, "step": 20034 }, { "epoch": 0.37196971665992135, "grad_norm": 0.3860941231250763, "learning_rate": 1.3914492358242206e-05, "loss": 0.2678, "step": 20036 }, { "epoch": 0.37200684679734, "grad_norm": 0.40493252873420715, "learning_rate": 1.3913418927249233e-05, "loss": 0.2871, "step": 20038 }, { "epoch": 0.37204397693475866, "grad_norm": 0.2914874255657196, "learning_rate": 1.3912345443006355e-05, "loss": 0.2694, "step": 20040 }, { "epoch": 0.3720811070721773, "grad_norm": 0.40273770689964294, "learning_rate": 1.391127190552817e-05, "loss": 0.1674, "step": 20042 }, { "epoch": 0.3721182372095959, "grad_norm": 0.2518525719642639, "learning_rate": 1.391019831482929e-05, "loss": 0.2432, "step": 20044 }, { "epoch": 0.37215536734701454, "grad_norm": 0.35817602276802063, "learning_rate": 1.390912467092433e-05, "loss": 0.3608, "step": 20046 }, { "epoch": 0.37219249748443317, "grad_norm": 0.682751476764679, "learning_rate": 1.3908050973827887e-05, "loss": 0.182, "step": 20048 }, { "epoch": 0.37222962762185186, "grad_norm": 0.526157557964325, "learning_rate": 1.3906977223554576e-05, "loss": 0.2158, "step": 20050 }, { "epoch": 0.3722667577592705, "grad_norm": 0.29240548610687256, "learning_rate": 1.390590342011901e-05, "loss": 0.666, "step": 20052 }, { "epoch": 0.3723038878966891, "grad_norm": 0.5400998592376709, "learning_rate": 1.3904829563535796e-05, "loss": 0.2926, "step": 20054 }, { "epoch": 0.37234101803410774, "grad_norm": 0.4679791033267975, "learning_rate": 1.3903755653819549e-05, "loss": 0.2421, "step": 20056 }, { "epoch": 0.37237814817152637, "grad_norm": 0.32286354899406433, "learning_rate": 1.390268169098488e-05, "loss": 0.2445, "step": 20058 }, { "epoch": 0.372415278308945, "grad_norm": 0.3165193498134613, "learning_rate": 1.3901607675046402e-05, "loss": 0.1307, "step": 20060 }, { "epoch": 0.3724524084463637, "grad_norm": 0.6171788573265076, "learning_rate": 1.3900533606018732e-05, "loss": 0.3297, "step": 20062 }, { "epoch": 0.3724895385837823, "grad_norm": 0.4425636827945709, "learning_rate": 1.3899459483916481e-05, "loss": 0.3031, "step": 20064 }, { "epoch": 0.37252666872120094, "grad_norm": 0.4312133193016052, "learning_rate": 1.389838530875427e-05, "loss": 0.2747, "step": 20066 }, { "epoch": 0.37256379885861957, "grad_norm": 0.3306850790977478, "learning_rate": 1.389731108054671e-05, "loss": 0.1685, "step": 20068 }, { "epoch": 0.3726009289960382, "grad_norm": 0.3475620448589325, "learning_rate": 1.3896236799308422e-05, "loss": 0.3834, "step": 20070 }, { "epoch": 0.3726380591334569, "grad_norm": 0.30715104937553406, "learning_rate": 1.3895162465054019e-05, "loss": 0.1464, "step": 20072 }, { "epoch": 0.3726751892708755, "grad_norm": 0.40902286767959595, "learning_rate": 1.389408807779812e-05, "loss": 0.1452, "step": 20074 }, { "epoch": 0.37271231940829413, "grad_norm": 0.5330752730369568, "learning_rate": 1.3893013637555353e-05, "loss": 0.5111, "step": 20076 }, { "epoch": 0.37274944954571276, "grad_norm": 0.2899560332298279, "learning_rate": 1.3891939144340328e-05, "loss": 0.1988, "step": 20078 }, { "epoch": 0.3727865796831314, "grad_norm": 0.7345959544181824, "learning_rate": 1.3890864598167668e-05, "loss": 0.4382, "step": 20080 }, { "epoch": 0.37282370982055, "grad_norm": 0.31923699378967285, "learning_rate": 1.3889789999051995e-05, "loss": 0.1323, "step": 20082 }, { "epoch": 0.3728608399579687, "grad_norm": 0.3731330633163452, "learning_rate": 1.3888715347007932e-05, "loss": 0.2733, "step": 20084 }, { "epoch": 0.37289797009538733, "grad_norm": 0.2001037448644638, "learning_rate": 1.3887640642050102e-05, "loss": 0.189, "step": 20086 }, { "epoch": 0.37293510023280596, "grad_norm": 0.3716852366924286, "learning_rate": 1.3886565884193129e-05, "loss": 0.4738, "step": 20088 }, { "epoch": 0.3729722303702246, "grad_norm": 0.3844122886657715, "learning_rate": 1.3885491073451634e-05, "loss": 0.3485, "step": 20090 }, { "epoch": 0.3730093605076432, "grad_norm": 0.29949280619621277, "learning_rate": 1.3884416209840243e-05, "loss": 0.215, "step": 20092 }, { "epoch": 0.3730464906450619, "grad_norm": 0.35048598051071167, "learning_rate": 1.3883341293373582e-05, "loss": 0.3716, "step": 20094 }, { "epoch": 0.37308362078248053, "grad_norm": 0.4095684289932251, "learning_rate": 1.3882266324066282e-05, "loss": 0.2034, "step": 20096 }, { "epoch": 0.37312075091989916, "grad_norm": 0.3948003351688385, "learning_rate": 1.3881191301932963e-05, "loss": 0.3831, "step": 20098 }, { "epoch": 0.3731578810573178, "grad_norm": 0.38309329748153687, "learning_rate": 1.3880116226988258e-05, "loss": 0.5074, "step": 20100 }, { "epoch": 0.3731950111947364, "grad_norm": 0.3737712502479553, "learning_rate": 1.3879041099246792e-05, "loss": 0.4418, "step": 20102 }, { "epoch": 0.37323214133215504, "grad_norm": 0.5642034411430359, "learning_rate": 1.3877965918723194e-05, "loss": 0.3944, "step": 20104 }, { "epoch": 0.3732692714695737, "grad_norm": 0.3231869637966156, "learning_rate": 1.3876890685432099e-05, "loss": 0.3836, "step": 20106 }, { "epoch": 0.37330640160699236, "grad_norm": 0.39459705352783203, "learning_rate": 1.3875815399388133e-05, "loss": 0.2053, "step": 20108 }, { "epoch": 0.373343531744411, "grad_norm": 0.322160929441452, "learning_rate": 1.3874740060605931e-05, "loss": 0.2356, "step": 20110 }, { "epoch": 0.3733806618818296, "grad_norm": 0.3258999288082123, "learning_rate": 1.3873664669100118e-05, "loss": 0.2407, "step": 20112 }, { "epoch": 0.37341779201924824, "grad_norm": 0.5043696165084839, "learning_rate": 1.3872589224885335e-05, "loss": 0.4399, "step": 20114 }, { "epoch": 0.3734549221566669, "grad_norm": 0.4689836800098419, "learning_rate": 1.3871513727976212e-05, "loss": 0.3366, "step": 20116 }, { "epoch": 0.37349205229408555, "grad_norm": 0.4764638841152191, "learning_rate": 1.3870438178387382e-05, "loss": 0.1361, "step": 20118 }, { "epoch": 0.3735291824315042, "grad_norm": 0.305999219417572, "learning_rate": 1.3869362576133485e-05, "loss": 0.1727, "step": 20120 }, { "epoch": 0.3735663125689228, "grad_norm": 0.3126106262207031, "learning_rate": 1.3868286921229153e-05, "loss": 0.3255, "step": 20122 }, { "epoch": 0.37360344270634144, "grad_norm": 0.3261148929595947, "learning_rate": 1.386721121368902e-05, "loss": 0.4469, "step": 20124 }, { "epoch": 0.3736405728437601, "grad_norm": 0.30270540714263916, "learning_rate": 1.3866135453527727e-05, "loss": 0.3831, "step": 20126 }, { "epoch": 0.37367770298117875, "grad_norm": 0.42626285552978516, "learning_rate": 1.3865059640759913e-05, "loss": 0.324, "step": 20128 }, { "epoch": 0.3737148331185974, "grad_norm": 0.4067237377166748, "learning_rate": 1.3863983775400214e-05, "loss": 0.1985, "step": 20130 }, { "epoch": 0.373751963256016, "grad_norm": 0.39006122946739197, "learning_rate": 1.3862907857463268e-05, "loss": 0.4717, "step": 20132 }, { "epoch": 0.37378909339343463, "grad_norm": 0.5019491314888, "learning_rate": 1.3861831886963718e-05, "loss": 0.4423, "step": 20134 }, { "epoch": 0.37382622353085326, "grad_norm": 0.4504082500934601, "learning_rate": 1.3860755863916203e-05, "loss": 0.1617, "step": 20136 }, { "epoch": 0.37386335366827195, "grad_norm": 0.4010758697986603, "learning_rate": 1.3859679788335363e-05, "loss": 0.3667, "step": 20138 }, { "epoch": 0.3739004838056906, "grad_norm": 0.4239271879196167, "learning_rate": 1.3858603660235846e-05, "loss": 0.2497, "step": 20140 }, { "epoch": 0.3739376139431092, "grad_norm": 0.3606035113334656, "learning_rate": 1.3857527479632289e-05, "loss": 0.4436, "step": 20142 }, { "epoch": 0.37397474408052783, "grad_norm": 0.5415440797805786, "learning_rate": 1.3856451246539337e-05, "loss": 0.2846, "step": 20144 }, { "epoch": 0.37401187421794646, "grad_norm": 0.5108171701431274, "learning_rate": 1.3855374960971637e-05, "loss": 0.3918, "step": 20146 }, { "epoch": 0.37404900435536514, "grad_norm": 0.387134850025177, "learning_rate": 1.3854298622943832e-05, "loss": 0.3604, "step": 20148 }, { "epoch": 0.3740861344927838, "grad_norm": 0.3296907842159271, "learning_rate": 1.3853222232470564e-05, "loss": 0.3141, "step": 20150 }, { "epoch": 0.3741232646302024, "grad_norm": 0.3946489691734314, "learning_rate": 1.3852145789566487e-05, "loss": 0.3296, "step": 20152 }, { "epoch": 0.37416039476762103, "grad_norm": 0.269846647977829, "learning_rate": 1.3851069294246244e-05, "loss": 0.1606, "step": 20154 }, { "epoch": 0.37419752490503966, "grad_norm": 0.3285703957080841, "learning_rate": 1.3849992746524481e-05, "loss": 0.201, "step": 20156 }, { "epoch": 0.3742346550424583, "grad_norm": 0.40791597962379456, "learning_rate": 1.384891614641585e-05, "loss": 0.3791, "step": 20158 }, { "epoch": 0.37427178517987697, "grad_norm": 0.38663923740386963, "learning_rate": 1.3847839493935e-05, "loss": 0.1598, "step": 20160 }, { "epoch": 0.3743089153172956, "grad_norm": 0.2794301211833954, "learning_rate": 1.384676278909658e-05, "loss": 0.4217, "step": 20162 }, { "epoch": 0.3743460454547142, "grad_norm": 0.40352746844291687, "learning_rate": 1.3845686031915238e-05, "loss": 0.2545, "step": 20164 }, { "epoch": 0.37438317559213286, "grad_norm": 0.514489471912384, "learning_rate": 1.3844609222405632e-05, "loss": 0.1664, "step": 20166 }, { "epoch": 0.3744203057295515, "grad_norm": 0.6553933620452881, "learning_rate": 1.3843532360582408e-05, "loss": 0.2509, "step": 20168 }, { "epoch": 0.37445743586697017, "grad_norm": 0.40633782744407654, "learning_rate": 1.3842455446460221e-05, "loss": 0.5374, "step": 20170 }, { "epoch": 0.3744945660043888, "grad_norm": 0.49291539192199707, "learning_rate": 1.3841378480053726e-05, "loss": 0.2023, "step": 20172 }, { "epoch": 0.3745316961418074, "grad_norm": 0.3370891511440277, "learning_rate": 1.3840301461377577e-05, "loss": 0.3819, "step": 20174 }, { "epoch": 0.37456882627922605, "grad_norm": 0.4726948142051697, "learning_rate": 1.3839224390446425e-05, "loss": 0.3378, "step": 20176 }, { "epoch": 0.3746059564166447, "grad_norm": 0.33347535133361816, "learning_rate": 1.3838147267274935e-05, "loss": 0.2956, "step": 20178 }, { "epoch": 0.3746430865540633, "grad_norm": 0.4049592912197113, "learning_rate": 1.3837070091877752e-05, "loss": 0.3076, "step": 20180 }, { "epoch": 0.374680216691482, "grad_norm": 0.23513221740722656, "learning_rate": 1.3835992864269538e-05, "loss": 0.4452, "step": 20182 }, { "epoch": 0.3747173468289006, "grad_norm": 0.49878957867622375, "learning_rate": 1.3834915584464956e-05, "loss": 0.3168, "step": 20184 }, { "epoch": 0.37475447696631925, "grad_norm": 0.27951204776763916, "learning_rate": 1.3833838252478655e-05, "loss": 0.2515, "step": 20186 }, { "epoch": 0.3747916071037379, "grad_norm": 0.4702208638191223, "learning_rate": 1.3832760868325302e-05, "loss": 0.2061, "step": 20188 }, { "epoch": 0.3748287372411565, "grad_norm": 0.27690836787223816, "learning_rate": 1.3831683432019553e-05, "loss": 0.108, "step": 20190 }, { "epoch": 0.3748658673785752, "grad_norm": 0.3577194809913635, "learning_rate": 1.3830605943576068e-05, "loss": 0.2822, "step": 20192 }, { "epoch": 0.3749029975159938, "grad_norm": 0.46531084179878235, "learning_rate": 1.3829528403009513e-05, "loss": 0.301, "step": 20194 }, { "epoch": 0.37494012765341245, "grad_norm": 0.31811726093292236, "learning_rate": 1.3828450810334547e-05, "loss": 0.3618, "step": 20196 }, { "epoch": 0.3749772577908311, "grad_norm": 0.34373581409454346, "learning_rate": 1.3827373165565829e-05, "loss": 0.388, "step": 20198 }, { "epoch": 0.3750143879282497, "grad_norm": 0.399286687374115, "learning_rate": 1.3826295468718029e-05, "loss": 0.1814, "step": 20200 }, { "epoch": 0.3750515180656684, "grad_norm": 0.42931726574897766, "learning_rate": 1.382521771980581e-05, "loss": 0.2449, "step": 20202 }, { "epoch": 0.375088648203087, "grad_norm": 0.2964993417263031, "learning_rate": 1.3824139918843835e-05, "loss": 0.2523, "step": 20204 }, { "epoch": 0.37512577834050564, "grad_norm": 0.3761836290359497, "learning_rate": 1.382306206584677e-05, "loss": 0.4636, "step": 20206 }, { "epoch": 0.3751629084779243, "grad_norm": 0.47489526867866516, "learning_rate": 1.3821984160829282e-05, "loss": 0.3403, "step": 20208 }, { "epoch": 0.3752000386153429, "grad_norm": 0.42662665247917175, "learning_rate": 1.3820906203806039e-05, "loss": 0.1792, "step": 20210 }, { "epoch": 0.37523716875276153, "grad_norm": 0.5387255549430847, "learning_rate": 1.3819828194791705e-05, "loss": 0.3561, "step": 20212 }, { "epoch": 0.3752742988901802, "grad_norm": 0.36297503113746643, "learning_rate": 1.3818750133800952e-05, "loss": 0.2866, "step": 20214 }, { "epoch": 0.37531142902759884, "grad_norm": 0.26776930689811707, "learning_rate": 1.3817672020848449e-05, "loss": 0.5039, "step": 20216 }, { "epoch": 0.37534855916501747, "grad_norm": 0.3598172664642334, "learning_rate": 1.3816593855948863e-05, "loss": 0.2748, "step": 20218 }, { "epoch": 0.3753856893024361, "grad_norm": 0.40361225605010986, "learning_rate": 1.3815515639116868e-05, "loss": 0.1978, "step": 20220 }, { "epoch": 0.3754228194398547, "grad_norm": 0.49906542897224426, "learning_rate": 1.3814437370367135e-05, "loss": 0.2688, "step": 20222 }, { "epoch": 0.3754599495772734, "grad_norm": 0.19722506403923035, "learning_rate": 1.3813359049714332e-05, "loss": 0.3045, "step": 20224 }, { "epoch": 0.37549707971469204, "grad_norm": 0.21416231989860535, "learning_rate": 1.3812280677173138e-05, "loss": 0.1203, "step": 20226 }, { "epoch": 0.37553420985211067, "grad_norm": 0.29404380917549133, "learning_rate": 1.3811202252758223e-05, "loss": 0.382, "step": 20228 }, { "epoch": 0.3755713399895293, "grad_norm": 0.4771401286125183, "learning_rate": 1.3810123776484259e-05, "loss": 0.3273, "step": 20230 }, { "epoch": 0.3756084701269479, "grad_norm": 0.43984588980674744, "learning_rate": 1.3809045248365923e-05, "loss": 0.1622, "step": 20232 }, { "epoch": 0.37564560026436655, "grad_norm": 0.404715359210968, "learning_rate": 1.3807966668417891e-05, "loss": 0.2863, "step": 20234 }, { "epoch": 0.37568273040178524, "grad_norm": 0.4546372592449188, "learning_rate": 1.380688803665484e-05, "loss": 0.2462, "step": 20236 }, { "epoch": 0.37571986053920386, "grad_norm": 0.36051028966903687, "learning_rate": 1.3805809353091446e-05, "loss": 0.2272, "step": 20238 }, { "epoch": 0.3757569906766225, "grad_norm": 0.3537476658821106, "learning_rate": 1.3804730617742386e-05, "loss": 0.1723, "step": 20240 }, { "epoch": 0.3757941208140411, "grad_norm": 0.36529824137687683, "learning_rate": 1.3803651830622338e-05, "loss": 0.5659, "step": 20242 }, { "epoch": 0.37583125095145975, "grad_norm": 0.40430253744125366, "learning_rate": 1.3802572991745979e-05, "loss": 0.3682, "step": 20244 }, { "epoch": 0.37586838108887843, "grad_norm": 0.39774322509765625, "learning_rate": 1.3801494101127997e-05, "loss": 0.2397, "step": 20246 }, { "epoch": 0.37590551122629706, "grad_norm": 0.3177061378955841, "learning_rate": 1.3800415158783065e-05, "loss": 0.3003, "step": 20248 }, { "epoch": 0.3759426413637157, "grad_norm": 0.35512226819992065, "learning_rate": 1.3799336164725865e-05, "loss": 0.3467, "step": 20250 }, { "epoch": 0.3759797715011343, "grad_norm": 0.470333069562912, "learning_rate": 1.3798257118971079e-05, "loss": 0.2541, "step": 20252 }, { "epoch": 0.37601690163855295, "grad_norm": 0.433196485042572, "learning_rate": 1.3797178021533396e-05, "loss": 0.3809, "step": 20254 }, { "epoch": 0.3760540317759716, "grad_norm": 0.3638652563095093, "learning_rate": 1.3796098872427487e-05, "loss": 0.3122, "step": 20256 }, { "epoch": 0.37609116191339026, "grad_norm": 0.2668880522251129, "learning_rate": 1.3795019671668048e-05, "loss": 0.1882, "step": 20258 }, { "epoch": 0.3761282920508089, "grad_norm": 0.29094910621643066, "learning_rate": 1.379394041926976e-05, "loss": 0.2471, "step": 20260 }, { "epoch": 0.3761654221882275, "grad_norm": 0.3126886785030365, "learning_rate": 1.3792861115247304e-05, "loss": 0.2921, "step": 20262 }, { "epoch": 0.37620255232564614, "grad_norm": 0.4343261122703552, "learning_rate": 1.3791781759615368e-05, "loss": 0.4161, "step": 20264 }, { "epoch": 0.3762396824630648, "grad_norm": 0.3110836446285248, "learning_rate": 1.3790702352388642e-05, "loss": 0.3998, "step": 20266 }, { "epoch": 0.37627681260048346, "grad_norm": 0.38669630885124207, "learning_rate": 1.3789622893581812e-05, "loss": 0.2528, "step": 20268 }, { "epoch": 0.3763139427379021, "grad_norm": 0.25040751695632935, "learning_rate": 1.3788543383209565e-05, "loss": 0.3041, "step": 20270 }, { "epoch": 0.3763510728753207, "grad_norm": 0.43754398822784424, "learning_rate": 1.3787463821286592e-05, "loss": 0.2961, "step": 20272 }, { "epoch": 0.37638820301273934, "grad_norm": 0.3090064525604248, "learning_rate": 1.3786384207827577e-05, "loss": 0.3027, "step": 20274 }, { "epoch": 0.37642533315015797, "grad_norm": 0.4928251802921295, "learning_rate": 1.3785304542847215e-05, "loss": 0.3496, "step": 20276 }, { "epoch": 0.37646246328757665, "grad_norm": 0.5487385988235474, "learning_rate": 1.3784224826360202e-05, "loss": 0.3784, "step": 20278 }, { "epoch": 0.3764995934249953, "grad_norm": 0.49109455943107605, "learning_rate": 1.3783145058381219e-05, "loss": 0.2521, "step": 20280 }, { "epoch": 0.3765367235624139, "grad_norm": 0.37386277318000793, "learning_rate": 1.3782065238924966e-05, "loss": 0.2755, "step": 20282 }, { "epoch": 0.37657385369983254, "grad_norm": 0.4417116641998291, "learning_rate": 1.378098536800613e-05, "loss": 0.2784, "step": 20284 }, { "epoch": 0.37661098383725117, "grad_norm": 0.3929435610771179, "learning_rate": 1.3779905445639414e-05, "loss": 0.1653, "step": 20286 }, { "epoch": 0.3766481139746698, "grad_norm": 0.44289231300354004, "learning_rate": 1.3778825471839503e-05, "loss": 0.4669, "step": 20288 }, { "epoch": 0.3766852441120885, "grad_norm": 0.4003215730190277, "learning_rate": 1.3777745446621098e-05, "loss": 0.3566, "step": 20290 }, { "epoch": 0.3767223742495071, "grad_norm": 0.31513309478759766, "learning_rate": 1.3776665369998895e-05, "loss": 0.3176, "step": 20292 }, { "epoch": 0.37675950438692574, "grad_norm": 0.3404492437839508, "learning_rate": 1.3775585241987584e-05, "loss": 0.2161, "step": 20294 }, { "epoch": 0.37679663452434436, "grad_norm": 0.31017041206359863, "learning_rate": 1.3774505062601868e-05, "loss": 0.3265, "step": 20296 }, { "epoch": 0.376833764661763, "grad_norm": 0.39869487285614014, "learning_rate": 1.3773424831856448e-05, "loss": 0.329, "step": 20298 }, { "epoch": 0.3768708947991817, "grad_norm": 0.3715631663799286, "learning_rate": 1.3772344549766017e-05, "loss": 0.3852, "step": 20300 }, { "epoch": 0.3769080249366003, "grad_norm": 0.3697710931301117, "learning_rate": 1.3771264216345276e-05, "loss": 0.2902, "step": 20302 }, { "epoch": 0.37694515507401893, "grad_norm": 0.4216092526912689, "learning_rate": 1.3770183831608925e-05, "loss": 0.1894, "step": 20304 }, { "epoch": 0.37698228521143756, "grad_norm": 0.5225966572761536, "learning_rate": 1.3769103395571664e-05, "loss": 0.2304, "step": 20306 }, { "epoch": 0.3770194153488562, "grad_norm": 0.2896948754787445, "learning_rate": 1.3768022908248196e-05, "loss": 0.2495, "step": 20308 }, { "epoch": 0.3770565454862748, "grad_norm": 0.49638232588768005, "learning_rate": 1.3766942369653222e-05, "loss": 0.2503, "step": 20310 }, { "epoch": 0.3770936756236935, "grad_norm": 0.23471103608608246, "learning_rate": 1.3765861779801448e-05, "loss": 0.3026, "step": 20312 }, { "epoch": 0.37713080576111213, "grad_norm": 0.36043858528137207, "learning_rate": 1.3764781138707573e-05, "loss": 0.2585, "step": 20314 }, { "epoch": 0.37716793589853076, "grad_norm": 0.6257306337356567, "learning_rate": 1.3763700446386306e-05, "loss": 0.1778, "step": 20316 }, { "epoch": 0.3772050660359494, "grad_norm": 0.253715842962265, "learning_rate": 1.3762619702852347e-05, "loss": 0.2372, "step": 20318 }, { "epoch": 0.377242196173368, "grad_norm": 0.38302934169769287, "learning_rate": 1.3761538908120404e-05, "loss": 0.3603, "step": 20320 }, { "epoch": 0.3772793263107867, "grad_norm": 0.3651839792728424, "learning_rate": 1.3760458062205185e-05, "loss": 0.4339, "step": 20322 }, { "epoch": 0.37731645644820533, "grad_norm": 0.3702917695045471, "learning_rate": 1.3759377165121397e-05, "loss": 0.2648, "step": 20324 }, { "epoch": 0.37735358658562396, "grad_norm": 0.36105290055274963, "learning_rate": 1.3758296216883744e-05, "loss": 0.4399, "step": 20326 }, { "epoch": 0.3773907167230426, "grad_norm": 0.3261259198188782, "learning_rate": 1.3757215217506936e-05, "loss": 0.2304, "step": 20328 }, { "epoch": 0.3774278468604612, "grad_norm": 0.3453252911567688, "learning_rate": 1.3756134167005686e-05, "loss": 0.2952, "step": 20330 }, { "epoch": 0.37746497699787984, "grad_norm": 0.3901367485523224, "learning_rate": 1.37550530653947e-05, "loss": 0.4554, "step": 20332 }, { "epoch": 0.3775021071352985, "grad_norm": 0.29137834906578064, "learning_rate": 1.3753971912688688e-05, "loss": 0.3514, "step": 20334 }, { "epoch": 0.37753923727271715, "grad_norm": 0.3398778438568115, "learning_rate": 1.3752890708902366e-05, "loss": 0.3795, "step": 20336 }, { "epoch": 0.3775763674101358, "grad_norm": 0.39813128113746643, "learning_rate": 1.375180945405044e-05, "loss": 0.4359, "step": 20338 }, { "epoch": 0.3776134975475544, "grad_norm": 0.46451589465141296, "learning_rate": 1.3750728148147625e-05, "loss": 0.3998, "step": 20340 }, { "epoch": 0.37765062768497304, "grad_norm": 0.5691365003585815, "learning_rate": 1.3749646791208635e-05, "loss": 0.2756, "step": 20342 }, { "epoch": 0.3776877578223917, "grad_norm": 0.30361229181289673, "learning_rate": 1.3748565383248187e-05, "loss": 0.2064, "step": 20344 }, { "epoch": 0.37772488795981035, "grad_norm": 0.43304821848869324, "learning_rate": 1.3747483924280989e-05, "loss": 0.3243, "step": 20346 }, { "epoch": 0.377762018097229, "grad_norm": 0.49435749650001526, "learning_rate": 1.374640241432176e-05, "loss": 0.2634, "step": 20348 }, { "epoch": 0.3777991482346476, "grad_norm": 0.4735288918018341, "learning_rate": 1.3745320853385215e-05, "loss": 0.2771, "step": 20350 }, { "epoch": 0.37783627837206624, "grad_norm": 0.4788779616355896, "learning_rate": 1.3744239241486072e-05, "loss": 0.3688, "step": 20352 }, { "epoch": 0.3778734085094849, "grad_norm": 0.40775758028030396, "learning_rate": 1.3743157578639053e-05, "loss": 0.4317, "step": 20354 }, { "epoch": 0.37791053864690355, "grad_norm": 0.3843857944011688, "learning_rate": 1.3742075864858866e-05, "loss": 0.3096, "step": 20356 }, { "epoch": 0.3779476687843222, "grad_norm": 0.3461295962333679, "learning_rate": 1.3740994100160236e-05, "loss": 0.3152, "step": 20358 }, { "epoch": 0.3779847989217408, "grad_norm": 0.35579144954681396, "learning_rate": 1.3739912284557884e-05, "loss": 0.2204, "step": 20360 }, { "epoch": 0.37802192905915943, "grad_norm": 0.3175497353076935, "learning_rate": 1.3738830418066526e-05, "loss": 0.3939, "step": 20362 }, { "epoch": 0.37805905919657806, "grad_norm": 0.4204776883125305, "learning_rate": 1.3737748500700888e-05, "loss": 0.2641, "step": 20364 }, { "epoch": 0.37809618933399675, "grad_norm": 0.3607107400894165, "learning_rate": 1.3736666532475684e-05, "loss": 0.3211, "step": 20366 }, { "epoch": 0.3781333194714154, "grad_norm": 0.4271625578403473, "learning_rate": 1.3735584513405647e-05, "loss": 0.1706, "step": 20368 }, { "epoch": 0.378170449608834, "grad_norm": 0.18619593977928162, "learning_rate": 1.3734502443505488e-05, "loss": 0.3222, "step": 20370 }, { "epoch": 0.37820757974625263, "grad_norm": 0.4362223744392395, "learning_rate": 1.3733420322789939e-05, "loss": 0.284, "step": 20372 }, { "epoch": 0.37824470988367126, "grad_norm": 0.4553815424442291, "learning_rate": 1.3732338151273723e-05, "loss": 0.3589, "step": 20374 }, { "epoch": 0.37828184002108994, "grad_norm": 0.4997227191925049, "learning_rate": 1.3731255928971567e-05, "loss": 0.2397, "step": 20376 }, { "epoch": 0.37831897015850857, "grad_norm": 0.3272298276424408, "learning_rate": 1.3730173655898191e-05, "loss": 0.3295, "step": 20378 }, { "epoch": 0.3783561002959272, "grad_norm": 0.3228127360343933, "learning_rate": 1.3729091332068327e-05, "loss": 0.3993, "step": 20380 }, { "epoch": 0.37839323043334583, "grad_norm": 0.35237592458724976, "learning_rate": 1.3728008957496697e-05, "loss": 0.3585, "step": 20382 }, { "epoch": 0.37843036057076446, "grad_norm": 0.5042340755462646, "learning_rate": 1.3726926532198035e-05, "loss": 0.2242, "step": 20384 }, { "epoch": 0.3784674907081831, "grad_norm": 0.4173370897769928, "learning_rate": 1.3725844056187065e-05, "loss": 0.3806, "step": 20386 }, { "epoch": 0.37850462084560177, "grad_norm": 0.49067965149879456, "learning_rate": 1.3724761529478516e-05, "loss": 0.2187, "step": 20388 }, { "epoch": 0.3785417509830204, "grad_norm": 0.3630199432373047, "learning_rate": 1.3723678952087118e-05, "loss": 0.1061, "step": 20390 }, { "epoch": 0.378578881120439, "grad_norm": 0.42760494351387024, "learning_rate": 1.3722596324027608e-05, "loss": 0.5486, "step": 20392 }, { "epoch": 0.37861601125785765, "grad_norm": 0.9868090152740479, "learning_rate": 1.372151364531471e-05, "loss": 0.295, "step": 20394 }, { "epoch": 0.3786531413952763, "grad_norm": 0.3924976885318756, "learning_rate": 1.3720430915963159e-05, "loss": 0.2392, "step": 20396 }, { "epoch": 0.37869027153269497, "grad_norm": 0.6594648361206055, "learning_rate": 1.371934813598769e-05, "loss": 0.2606, "step": 20398 }, { "epoch": 0.3787274016701136, "grad_norm": 0.3596559762954712, "learning_rate": 1.371826530540303e-05, "loss": 0.1531, "step": 20400 }, { "epoch": 0.3787645318075322, "grad_norm": 0.4754466116428375, "learning_rate": 1.3717182424223914e-05, "loss": 0.2637, "step": 20402 }, { "epoch": 0.37880166194495085, "grad_norm": 0.5291134119033813, "learning_rate": 1.3716099492465086e-05, "loss": 0.2715, "step": 20404 }, { "epoch": 0.3788387920823695, "grad_norm": 0.3805255591869354, "learning_rate": 1.3715016510141268e-05, "loss": 0.214, "step": 20406 }, { "epoch": 0.3788759222197881, "grad_norm": 0.2791435420513153, "learning_rate": 1.3713933477267207e-05, "loss": 0.384, "step": 20408 }, { "epoch": 0.3789130523572068, "grad_norm": 0.3975648880004883, "learning_rate": 1.3712850393857636e-05, "loss": 0.3435, "step": 20410 }, { "epoch": 0.3789501824946254, "grad_norm": 0.49346694350242615, "learning_rate": 1.3711767259927294e-05, "loss": 0.3415, "step": 20412 }, { "epoch": 0.37898731263204405, "grad_norm": 0.5182379484176636, "learning_rate": 1.3710684075490914e-05, "loss": 0.2778, "step": 20414 }, { "epoch": 0.3790244427694627, "grad_norm": 0.3699735999107361, "learning_rate": 1.370960084056324e-05, "loss": 0.3075, "step": 20416 }, { "epoch": 0.3790615729068813, "grad_norm": 0.3503396213054657, "learning_rate": 1.3708517555159011e-05, "loss": 0.27, "step": 20418 }, { "epoch": 0.3790987030443, "grad_norm": 0.3092592656612396, "learning_rate": 1.3707434219292966e-05, "loss": 0.3359, "step": 20420 }, { "epoch": 0.3791358331817186, "grad_norm": 0.3436076045036316, "learning_rate": 1.3706350832979845e-05, "loss": 0.3574, "step": 20422 }, { "epoch": 0.37917296331913725, "grad_norm": 0.42176553606987, "learning_rate": 1.3705267396234393e-05, "loss": 0.422, "step": 20424 }, { "epoch": 0.3792100934565559, "grad_norm": 0.48506563901901245, "learning_rate": 1.3704183909071346e-05, "loss": 0.2037, "step": 20426 }, { "epoch": 0.3792472235939745, "grad_norm": 0.3283042311668396, "learning_rate": 1.3703100371505458e-05, "loss": 0.3238, "step": 20428 }, { "epoch": 0.3792843537313932, "grad_norm": 0.5023981928825378, "learning_rate": 1.3702016783551464e-05, "loss": 0.294, "step": 20430 }, { "epoch": 0.3793214838688118, "grad_norm": 0.4818968176841736, "learning_rate": 1.3700933145224107e-05, "loss": 0.3597, "step": 20432 }, { "epoch": 0.37935861400623044, "grad_norm": 0.2782365679740906, "learning_rate": 1.3699849456538137e-05, "loss": 0.2668, "step": 20434 }, { "epoch": 0.37939574414364907, "grad_norm": 0.4432222843170166, "learning_rate": 1.36987657175083e-05, "loss": 0.3965, "step": 20436 }, { "epoch": 0.3794328742810677, "grad_norm": 0.3050204813480377, "learning_rate": 1.369768192814934e-05, "loss": 0.2497, "step": 20438 }, { "epoch": 0.37947000441848633, "grad_norm": 0.2594740092754364, "learning_rate": 1.3696598088476003e-05, "loss": 0.3189, "step": 20440 }, { "epoch": 0.379507134555905, "grad_norm": 0.3082748055458069, "learning_rate": 1.369551419850304e-05, "loss": 0.2091, "step": 20442 }, { "epoch": 0.37954426469332364, "grad_norm": 0.23749855160713196, "learning_rate": 1.3694430258245197e-05, "loss": 0.1367, "step": 20444 }, { "epoch": 0.37958139483074227, "grad_norm": 0.4700714945793152, "learning_rate": 1.3693346267717225e-05, "loss": 0.3756, "step": 20446 }, { "epoch": 0.3796185249681609, "grad_norm": 0.3161366581916809, "learning_rate": 1.3692262226933874e-05, "loss": 0.3495, "step": 20448 }, { "epoch": 0.3796556551055795, "grad_norm": 0.5001155138015747, "learning_rate": 1.3691178135909896e-05, "loss": 0.2445, "step": 20450 }, { "epoch": 0.3796927852429982, "grad_norm": 0.38339486718177795, "learning_rate": 1.3690093994660035e-05, "loss": 0.1631, "step": 20452 }, { "epoch": 0.37972991538041684, "grad_norm": 0.3934367001056671, "learning_rate": 1.3689009803199047e-05, "loss": 0.1345, "step": 20454 }, { "epoch": 0.37976704551783547, "grad_norm": 0.29895511269569397, "learning_rate": 1.368792556154169e-05, "loss": 0.1783, "step": 20456 }, { "epoch": 0.3798041756552541, "grad_norm": 0.45116275548934937, "learning_rate": 1.3686841269702708e-05, "loss": 0.1813, "step": 20458 }, { "epoch": 0.3798413057926727, "grad_norm": 0.4946790337562561, "learning_rate": 1.3685756927696865e-05, "loss": 0.2913, "step": 20460 }, { "epoch": 0.37987843593009135, "grad_norm": 0.28817740082740784, "learning_rate": 1.368467253553891e-05, "loss": 0.258, "step": 20462 }, { "epoch": 0.37991556606751004, "grad_norm": 0.4325798451900482, "learning_rate": 1.3683588093243594e-05, "loss": 0.2569, "step": 20464 }, { "epoch": 0.37995269620492866, "grad_norm": 0.4484389126300812, "learning_rate": 1.368250360082568e-05, "loss": 0.4341, "step": 20466 }, { "epoch": 0.3799898263423473, "grad_norm": 0.315391480922699, "learning_rate": 1.3681419058299925e-05, "loss": 0.3907, "step": 20468 }, { "epoch": 0.3800269564797659, "grad_norm": 0.648390531539917, "learning_rate": 1.3680334465681082e-05, "loss": 0.1124, "step": 20470 }, { "epoch": 0.38006408661718455, "grad_norm": 0.3056327998638153, "learning_rate": 1.3679249822983908e-05, "loss": 0.3437, "step": 20472 }, { "epoch": 0.38010121675460323, "grad_norm": 0.32072412967681885, "learning_rate": 1.3678165130223169e-05, "loss": 0.3675, "step": 20474 }, { "epoch": 0.38013834689202186, "grad_norm": 0.3778831958770752, "learning_rate": 1.3677080387413617e-05, "loss": 0.244, "step": 20476 }, { "epoch": 0.3801754770294405, "grad_norm": 0.31670743227005005, "learning_rate": 1.3675995594570016e-05, "loss": 0.3022, "step": 20478 }, { "epoch": 0.3802126071668591, "grad_norm": 0.25902318954467773, "learning_rate": 1.3674910751707125e-05, "loss": 0.3219, "step": 20480 }, { "epoch": 0.38024973730427775, "grad_norm": 0.35113245248794556, "learning_rate": 1.3673825858839708e-05, "loss": 0.3814, "step": 20482 }, { "epoch": 0.3802868674416964, "grad_norm": 0.35644057393074036, "learning_rate": 1.3672740915982523e-05, "loss": 0.2453, "step": 20484 }, { "epoch": 0.38032399757911506, "grad_norm": 0.320034384727478, "learning_rate": 1.3671655923150338e-05, "loss": 0.3547, "step": 20486 }, { "epoch": 0.3803611277165337, "grad_norm": 0.7678956985473633, "learning_rate": 1.3670570880357912e-05, "loss": 0.1925, "step": 20488 }, { "epoch": 0.3803982578539523, "grad_norm": 0.28292229771614075, "learning_rate": 1.3669485787620009e-05, "loss": 0.2242, "step": 20490 }, { "epoch": 0.38043538799137094, "grad_norm": 0.28078579902648926, "learning_rate": 1.3668400644951399e-05, "loss": 0.4978, "step": 20492 }, { "epoch": 0.38047251812878957, "grad_norm": 0.4316692054271698, "learning_rate": 1.3667315452366844e-05, "loss": 0.3052, "step": 20494 }, { "epoch": 0.38050964826620826, "grad_norm": 0.4917985796928406, "learning_rate": 1.366623020988111e-05, "loss": 0.4659, "step": 20496 }, { "epoch": 0.3805467784036269, "grad_norm": 0.4542140066623688, "learning_rate": 1.3665144917508963e-05, "loss": 0.2941, "step": 20498 }, { "epoch": 0.3805839085410455, "grad_norm": 0.4768338203430176, "learning_rate": 1.3664059575265175e-05, "loss": 0.1233, "step": 20500 }, { "epoch": 0.38062103867846414, "grad_norm": 0.3003799319267273, "learning_rate": 1.3662974183164512e-05, "loss": 0.1661, "step": 20502 }, { "epoch": 0.38065816881588277, "grad_norm": 0.4999409317970276, "learning_rate": 1.3661888741221738e-05, "loss": 0.4353, "step": 20504 }, { "epoch": 0.38069529895330145, "grad_norm": 0.2663573622703552, "learning_rate": 1.3660803249451631e-05, "loss": 0.4115, "step": 20506 }, { "epoch": 0.3807324290907201, "grad_norm": 1.525360107421875, "learning_rate": 1.3659717707868953e-05, "loss": 0.3512, "step": 20508 }, { "epoch": 0.3807695592281387, "grad_norm": 0.4079039692878723, "learning_rate": 1.365863211648848e-05, "loss": 0.2369, "step": 20510 }, { "epoch": 0.38080668936555734, "grad_norm": 0.3989945948123932, "learning_rate": 1.3657546475324985e-05, "loss": 0.4508, "step": 20512 }, { "epoch": 0.38084381950297597, "grad_norm": 0.36864134669303894, "learning_rate": 1.3656460784393239e-05, "loss": 0.4803, "step": 20514 }, { "epoch": 0.3808809496403946, "grad_norm": 0.4028772711753845, "learning_rate": 1.365537504370801e-05, "loss": 0.1342, "step": 20516 }, { "epoch": 0.3809180797778133, "grad_norm": 0.35606274008750916, "learning_rate": 1.365428925328408e-05, "loss": 0.4585, "step": 20518 }, { "epoch": 0.3809552099152319, "grad_norm": 0.2720849812030792, "learning_rate": 1.3653203413136215e-05, "loss": 0.3143, "step": 20520 }, { "epoch": 0.38099234005265054, "grad_norm": 0.45939990878105164, "learning_rate": 1.3652117523279198e-05, "loss": 0.3574, "step": 20522 }, { "epoch": 0.38102947019006916, "grad_norm": 0.37470516562461853, "learning_rate": 1.36510315837278e-05, "loss": 0.1504, "step": 20524 }, { "epoch": 0.3810666003274878, "grad_norm": 0.3238503336906433, "learning_rate": 1.3649945594496799e-05, "loss": 0.2712, "step": 20526 }, { "epoch": 0.3811037304649065, "grad_norm": 0.2754843533039093, "learning_rate": 1.3648859555600965e-05, "loss": 0.3999, "step": 20528 }, { "epoch": 0.3811408606023251, "grad_norm": 0.36991745233535767, "learning_rate": 1.364777346705509e-05, "loss": 0.5372, "step": 20530 }, { "epoch": 0.38117799073974373, "grad_norm": 0.43377766013145447, "learning_rate": 1.364668732887394e-05, "loss": 0.2779, "step": 20532 }, { "epoch": 0.38121512087716236, "grad_norm": 0.37827566266059875, "learning_rate": 1.36456011410723e-05, "loss": 0.2897, "step": 20534 }, { "epoch": 0.381252251014581, "grad_norm": 0.3209867477416992, "learning_rate": 1.3644514903664948e-05, "loss": 0.213, "step": 20536 }, { "epoch": 0.3812893811519996, "grad_norm": 0.7252829074859619, "learning_rate": 1.3643428616666666e-05, "loss": 0.425, "step": 20538 }, { "epoch": 0.3813265112894183, "grad_norm": 0.3354499042034149, "learning_rate": 1.3642342280092232e-05, "loss": 0.3663, "step": 20540 }, { "epoch": 0.38136364142683693, "grad_norm": 0.4506750702857971, "learning_rate": 1.3641255893956429e-05, "loss": 0.3269, "step": 20542 }, { "epoch": 0.38140077156425556, "grad_norm": 0.4305640161037445, "learning_rate": 1.3640169458274043e-05, "loss": 0.506, "step": 20544 }, { "epoch": 0.3814379017016742, "grad_norm": 0.5406296253204346, "learning_rate": 1.3639082973059853e-05, "loss": 0.4419, "step": 20546 }, { "epoch": 0.3814750318390928, "grad_norm": 0.6192275881767273, "learning_rate": 1.3637996438328642e-05, "loss": 0.3148, "step": 20548 }, { "epoch": 0.3815121619765115, "grad_norm": 0.34689196944236755, "learning_rate": 1.3636909854095199e-05, "loss": 0.3298, "step": 20550 }, { "epoch": 0.3815492921139301, "grad_norm": 0.5247117877006531, "learning_rate": 1.3635823220374307e-05, "loss": 0.2452, "step": 20552 }, { "epoch": 0.38158642225134876, "grad_norm": 0.31703129410743713, "learning_rate": 1.363473653718075e-05, "loss": 0.0497, "step": 20554 }, { "epoch": 0.3816235523887674, "grad_norm": 0.4014911353588104, "learning_rate": 1.3633649804529319e-05, "loss": 0.2181, "step": 20556 }, { "epoch": 0.381660682526186, "grad_norm": 0.3600418269634247, "learning_rate": 1.3632563022434795e-05, "loss": 0.2458, "step": 20558 }, { "epoch": 0.38169781266360464, "grad_norm": 0.3815743327140808, "learning_rate": 1.3631476190911967e-05, "loss": 0.3458, "step": 20560 }, { "epoch": 0.3817349428010233, "grad_norm": 0.2056141048669815, "learning_rate": 1.363038930997563e-05, "loss": 0.3166, "step": 20562 }, { "epoch": 0.38177207293844195, "grad_norm": 0.4001248776912689, "learning_rate": 1.3629302379640566e-05, "loss": 0.4189, "step": 20564 }, { "epoch": 0.3818092030758606, "grad_norm": 0.46026602387428284, "learning_rate": 1.362821539992157e-05, "loss": 0.2763, "step": 20566 }, { "epoch": 0.3818463332132792, "grad_norm": 0.3800026774406433, "learning_rate": 1.3627128370833428e-05, "loss": 0.2214, "step": 20568 }, { "epoch": 0.38188346335069784, "grad_norm": 0.382589727640152, "learning_rate": 1.3626041292390935e-05, "loss": 0.291, "step": 20570 }, { "epoch": 0.3819205934881165, "grad_norm": 0.5096129179000854, "learning_rate": 1.362495416460888e-05, "loss": 0.2788, "step": 20572 }, { "epoch": 0.38195772362553515, "grad_norm": 0.4187592566013336, "learning_rate": 1.3623866987502058e-05, "loss": 0.3177, "step": 20574 }, { "epoch": 0.3819948537629538, "grad_norm": 0.47894489765167236, "learning_rate": 1.3622779761085261e-05, "loss": 0.3229, "step": 20576 }, { "epoch": 0.3820319839003724, "grad_norm": 0.49744951725006104, "learning_rate": 1.362169248537328e-05, "loss": 0.3892, "step": 20578 }, { "epoch": 0.38206911403779104, "grad_norm": 0.49890074133872986, "learning_rate": 1.3620605160380915e-05, "loss": 0.1438, "step": 20580 }, { "epoch": 0.3821062441752097, "grad_norm": 0.5018547773361206, "learning_rate": 1.361951778612296e-05, "loss": 0.3175, "step": 20582 }, { "epoch": 0.38214337431262835, "grad_norm": 0.3734203577041626, "learning_rate": 1.3618430362614207e-05, "loss": 0.3407, "step": 20584 }, { "epoch": 0.382180504450047, "grad_norm": 0.33536097407341003, "learning_rate": 1.3617342889869457e-05, "loss": 0.283, "step": 20586 }, { "epoch": 0.3822176345874656, "grad_norm": 0.5764023065567017, "learning_rate": 1.3616255367903506e-05, "loss": 0.3569, "step": 20588 }, { "epoch": 0.38225476472488423, "grad_norm": 0.26459458470344543, "learning_rate": 1.361516779673115e-05, "loss": 0.3067, "step": 20590 }, { "epoch": 0.38229189486230286, "grad_norm": 0.4419151246547699, "learning_rate": 1.3614080176367189e-05, "loss": 0.3166, "step": 20592 }, { "epoch": 0.38232902499972155, "grad_norm": 0.34582841396331787, "learning_rate": 1.3612992506826421e-05, "loss": 0.298, "step": 20594 }, { "epoch": 0.3823661551371402, "grad_norm": 0.34223949909210205, "learning_rate": 1.3611904788123649e-05, "loss": 0.4859, "step": 20596 }, { "epoch": 0.3824032852745588, "grad_norm": 0.3498494625091553, "learning_rate": 1.3610817020273673e-05, "loss": 0.1676, "step": 20598 }, { "epoch": 0.38244041541197743, "grad_norm": 0.3764305114746094, "learning_rate": 1.3609729203291293e-05, "loss": 0.3048, "step": 20600 }, { "epoch": 0.38247754554939606, "grad_norm": 0.4367479681968689, "learning_rate": 1.3608641337191308e-05, "loss": 0.2758, "step": 20602 }, { "epoch": 0.38251467568681474, "grad_norm": 0.6940366625785828, "learning_rate": 1.3607553421988522e-05, "loss": 0.2847, "step": 20604 }, { "epoch": 0.38255180582423337, "grad_norm": 0.28144943714141846, "learning_rate": 1.3606465457697745e-05, "loss": 0.3064, "step": 20606 }, { "epoch": 0.382588935961652, "grad_norm": 0.4265226125717163, "learning_rate": 1.3605377444333774e-05, "loss": 0.1447, "step": 20608 }, { "epoch": 0.3826260660990706, "grad_norm": 0.48617100715637207, "learning_rate": 1.3604289381911412e-05, "loss": 0.2759, "step": 20610 }, { "epoch": 0.38266319623648926, "grad_norm": 0.537524402141571, "learning_rate": 1.3603201270445472e-05, "loss": 0.4904, "step": 20612 }, { "epoch": 0.3827003263739079, "grad_norm": 0.43786805868148804, "learning_rate": 1.3602113109950754e-05, "loss": 0.2519, "step": 20614 }, { "epoch": 0.38273745651132657, "grad_norm": 0.3110393285751343, "learning_rate": 1.3601024900442066e-05, "loss": 0.3612, "step": 20616 }, { "epoch": 0.3827745866487452, "grad_norm": 0.4043773114681244, "learning_rate": 1.3599936641934216e-05, "loss": 0.3865, "step": 20618 }, { "epoch": 0.3828117167861638, "grad_norm": 0.47072672843933105, "learning_rate": 1.3598848334442012e-05, "loss": 0.186, "step": 20620 }, { "epoch": 0.38284884692358245, "grad_norm": 0.2767280340194702, "learning_rate": 1.3597759977980258e-05, "loss": 0.3127, "step": 20622 }, { "epoch": 0.3828859770610011, "grad_norm": 0.35014182329177856, "learning_rate": 1.359667157256377e-05, "loss": 0.2374, "step": 20624 }, { "epoch": 0.38292310719841977, "grad_norm": 0.5060983896255493, "learning_rate": 1.3595583118207357e-05, "loss": 0.4177, "step": 20626 }, { "epoch": 0.3829602373358384, "grad_norm": 0.3250192701816559, "learning_rate": 1.3594494614925823e-05, "loss": 0.4588, "step": 20628 }, { "epoch": 0.382997367473257, "grad_norm": 0.4417460262775421, "learning_rate": 1.359340606273399e-05, "loss": 0.4208, "step": 20630 }, { "epoch": 0.38303449761067565, "grad_norm": 0.626419186592102, "learning_rate": 1.359231746164666e-05, "loss": 0.2677, "step": 20632 }, { "epoch": 0.3830716277480943, "grad_norm": 0.3920382261276245, "learning_rate": 1.3591228811678652e-05, "loss": 0.2439, "step": 20634 }, { "epoch": 0.3831087578855129, "grad_norm": 0.40673375129699707, "learning_rate": 1.3590140112844772e-05, "loss": 0.3038, "step": 20636 }, { "epoch": 0.3831458880229316, "grad_norm": 0.3109780550003052, "learning_rate": 1.3589051365159845e-05, "loss": 0.2835, "step": 20638 }, { "epoch": 0.3831830181603502, "grad_norm": 0.4205111563205719, "learning_rate": 1.3587962568638678e-05, "loss": 0.3475, "step": 20640 }, { "epoch": 0.38322014829776885, "grad_norm": 0.3837061822414398, "learning_rate": 1.3586873723296084e-05, "loss": 0.5226, "step": 20642 }, { "epoch": 0.3832572784351875, "grad_norm": 0.3452624976634979, "learning_rate": 1.3585784829146887e-05, "loss": 0.3252, "step": 20644 }, { "epoch": 0.3832944085726061, "grad_norm": 0.2979359030723572, "learning_rate": 1.3584695886205894e-05, "loss": 0.3098, "step": 20646 }, { "epoch": 0.3833315387100248, "grad_norm": 0.3637213110923767, "learning_rate": 1.358360689448793e-05, "loss": 0.3067, "step": 20648 }, { "epoch": 0.3833686688474434, "grad_norm": 0.34143176674842834, "learning_rate": 1.358251785400781e-05, "loss": 0.274, "step": 20650 }, { "epoch": 0.38340579898486205, "grad_norm": 0.4378577768802643, "learning_rate": 1.3581428764780356e-05, "loss": 0.4916, "step": 20652 }, { "epoch": 0.3834429291222807, "grad_norm": 0.39235880970954895, "learning_rate": 1.358033962682038e-05, "loss": 0.2964, "step": 20654 }, { "epoch": 0.3834800592596993, "grad_norm": 0.3207988440990448, "learning_rate": 1.3579250440142708e-05, "loss": 0.3585, "step": 20656 }, { "epoch": 0.383517189397118, "grad_norm": 0.3703027367591858, "learning_rate": 1.357816120476216e-05, "loss": 0.2508, "step": 20658 }, { "epoch": 0.3835543195345366, "grad_norm": 0.44120365381240845, "learning_rate": 1.3577071920693555e-05, "loss": 0.2725, "step": 20660 }, { "epoch": 0.38359144967195524, "grad_norm": 0.36774852871894836, "learning_rate": 1.3575982587951712e-05, "loss": 0.2028, "step": 20662 }, { "epoch": 0.38362857980937387, "grad_norm": 0.5156617760658264, "learning_rate": 1.3574893206551463e-05, "loss": 0.1843, "step": 20664 }, { "epoch": 0.3836657099467925, "grad_norm": 0.36022359132766724, "learning_rate": 1.357380377650762e-05, "loss": 0.4465, "step": 20666 }, { "epoch": 0.3837028400842111, "grad_norm": 0.3445912301540375, "learning_rate": 1.3572714297835015e-05, "loss": 0.1971, "step": 20668 }, { "epoch": 0.3837399702216298, "grad_norm": 0.3245997130870819, "learning_rate": 1.357162477054847e-05, "loss": 0.2748, "step": 20670 }, { "epoch": 0.38377710035904844, "grad_norm": 0.2201475352048874, "learning_rate": 1.3570535194662812e-05, "loss": 0.3352, "step": 20672 }, { "epoch": 0.38381423049646707, "grad_norm": 0.24271944165229797, "learning_rate": 1.3569445570192863e-05, "loss": 0.2269, "step": 20674 }, { "epoch": 0.3838513606338857, "grad_norm": 0.28391996026039124, "learning_rate": 1.3568355897153451e-05, "loss": 0.1698, "step": 20676 }, { "epoch": 0.3838884907713043, "grad_norm": 0.5780355334281921, "learning_rate": 1.3567266175559403e-05, "loss": 0.3052, "step": 20678 }, { "epoch": 0.383925620908723, "grad_norm": 0.3626141846179962, "learning_rate": 1.3566176405425549e-05, "loss": 0.3149, "step": 20680 }, { "epoch": 0.38396275104614164, "grad_norm": 0.3780469298362732, "learning_rate": 1.3565086586766716e-05, "loss": 0.4655, "step": 20682 }, { "epoch": 0.38399988118356027, "grad_norm": 0.351436048746109, "learning_rate": 1.3563996719597735e-05, "loss": 0.387, "step": 20684 }, { "epoch": 0.3840370113209789, "grad_norm": 0.5046899914741516, "learning_rate": 1.356290680393343e-05, "loss": 0.1605, "step": 20686 }, { "epoch": 0.3840741414583975, "grad_norm": 0.4094959497451782, "learning_rate": 1.356181683978864e-05, "loss": 0.3558, "step": 20688 }, { "epoch": 0.38411127159581615, "grad_norm": 0.5119794011116028, "learning_rate": 1.3560726827178188e-05, "loss": 0.2913, "step": 20690 }, { "epoch": 0.38414840173323483, "grad_norm": 0.6190775632858276, "learning_rate": 1.3559636766116913e-05, "loss": 0.2832, "step": 20692 }, { "epoch": 0.38418553187065346, "grad_norm": 0.3524041175842285, "learning_rate": 1.3558546656619642e-05, "loss": 0.3811, "step": 20694 }, { "epoch": 0.3842226620080721, "grad_norm": 0.4896256625652313, "learning_rate": 1.3557456498701208e-05, "loss": 0.1852, "step": 20696 }, { "epoch": 0.3842597921454907, "grad_norm": 0.40523743629455566, "learning_rate": 1.3556366292376449e-05, "loss": 0.2495, "step": 20698 }, { "epoch": 0.38429692228290935, "grad_norm": 0.38215065002441406, "learning_rate": 1.3555276037660194e-05, "loss": 0.2913, "step": 20700 }, { "epoch": 0.38433405242032803, "grad_norm": 0.36358264088630676, "learning_rate": 1.3554185734567287e-05, "loss": 0.263, "step": 20702 }, { "epoch": 0.38437118255774666, "grad_norm": 0.3265949487686157, "learning_rate": 1.3553095383112559e-05, "loss": 0.4035, "step": 20704 }, { "epoch": 0.3844083126951653, "grad_norm": 0.36384525895118713, "learning_rate": 1.355200498331084e-05, "loss": 0.398, "step": 20706 }, { "epoch": 0.3844454428325839, "grad_norm": 0.37665513157844543, "learning_rate": 1.3550914535176976e-05, "loss": 0.2416, "step": 20708 }, { "epoch": 0.38448257297000255, "grad_norm": 0.31690457463264465, "learning_rate": 1.3549824038725798e-05, "loss": 0.3089, "step": 20710 }, { "epoch": 0.3845197031074212, "grad_norm": 0.336553692817688, "learning_rate": 1.3548733493972149e-05, "loss": 0.3224, "step": 20712 }, { "epoch": 0.38455683324483986, "grad_norm": 0.41726386547088623, "learning_rate": 1.354764290093087e-05, "loss": 0.4078, "step": 20714 }, { "epoch": 0.3845939633822585, "grad_norm": 0.31295037269592285, "learning_rate": 1.3546552259616796e-05, "loss": 0.3534, "step": 20716 }, { "epoch": 0.3846310935196771, "grad_norm": 0.3717849850654602, "learning_rate": 1.3545461570044767e-05, "loss": 0.1314, "step": 20718 }, { "epoch": 0.38466822365709574, "grad_norm": 0.37344080209732056, "learning_rate": 1.3544370832229627e-05, "loss": 0.296, "step": 20720 }, { "epoch": 0.38470535379451437, "grad_norm": 0.6680738925933838, "learning_rate": 1.3543280046186218e-05, "loss": 0.3636, "step": 20722 }, { "epoch": 0.38474248393193305, "grad_norm": 0.35376036167144775, "learning_rate": 1.354218921192938e-05, "loss": 0.492, "step": 20724 }, { "epoch": 0.3847796140693517, "grad_norm": 0.45670193433761597, "learning_rate": 1.3541098329473959e-05, "loss": 0.0976, "step": 20726 }, { "epoch": 0.3848167442067703, "grad_norm": 0.36153843998908997, "learning_rate": 1.354000739883479e-05, "loss": 0.2532, "step": 20728 }, { "epoch": 0.38485387434418894, "grad_norm": 0.21934334933757782, "learning_rate": 1.3538916420026729e-05, "loss": 0.294, "step": 20730 }, { "epoch": 0.38489100448160757, "grad_norm": 0.48715871572494507, "learning_rate": 1.3537825393064617e-05, "loss": 0.6024, "step": 20732 }, { "epoch": 0.38492813461902625, "grad_norm": 0.39648228883743286, "learning_rate": 1.3536734317963295e-05, "loss": 0.1849, "step": 20734 }, { "epoch": 0.3849652647564449, "grad_norm": 0.3398877680301666, "learning_rate": 1.3535643194737615e-05, "loss": 0.4288, "step": 20736 }, { "epoch": 0.3850023948938635, "grad_norm": 0.44927507638931274, "learning_rate": 1.3534552023402419e-05, "loss": 0.3296, "step": 20738 }, { "epoch": 0.38503952503128214, "grad_norm": 0.3417001962661743, "learning_rate": 1.353346080397256e-05, "loss": 0.3033, "step": 20740 }, { "epoch": 0.38507665516870077, "grad_norm": 0.49154606461524963, "learning_rate": 1.3532369536462882e-05, "loss": 0.2953, "step": 20742 }, { "epoch": 0.3851137853061194, "grad_norm": 0.6155595183372498, "learning_rate": 1.3531278220888236e-05, "loss": 0.2304, "step": 20744 }, { "epoch": 0.3851509154435381, "grad_norm": 0.566434919834137, "learning_rate": 1.3530186857263472e-05, "loss": 0.296, "step": 20746 }, { "epoch": 0.3851880455809567, "grad_norm": 0.42195311188697815, "learning_rate": 1.3529095445603436e-05, "loss": 0.3149, "step": 20748 }, { "epoch": 0.38522517571837533, "grad_norm": 0.41019490361213684, "learning_rate": 1.3528003985922982e-05, "loss": 0.2655, "step": 20750 }, { "epoch": 0.38526230585579396, "grad_norm": 0.6509824991226196, "learning_rate": 1.3526912478236964e-05, "loss": 0.3241, "step": 20752 }, { "epoch": 0.3852994359932126, "grad_norm": 0.4650309085845947, "learning_rate": 1.3525820922560229e-05, "loss": 0.2569, "step": 20754 }, { "epoch": 0.3853365661306313, "grad_norm": 0.3566668629646301, "learning_rate": 1.3524729318907634e-05, "loss": 0.23, "step": 20756 }, { "epoch": 0.3853736962680499, "grad_norm": 0.29631632566452026, "learning_rate": 1.3523637667294031e-05, "loss": 0.4055, "step": 20758 }, { "epoch": 0.38541082640546853, "grad_norm": 0.3497377634048462, "learning_rate": 1.3522545967734273e-05, "loss": 0.2774, "step": 20760 }, { "epoch": 0.38544795654288716, "grad_norm": 0.3390210270881653, "learning_rate": 1.3521454220243213e-05, "loss": 0.277, "step": 20762 }, { "epoch": 0.3854850866803058, "grad_norm": 0.41099813580513, "learning_rate": 1.3520362424835713e-05, "loss": 0.2541, "step": 20764 }, { "epoch": 0.3855222168177244, "grad_norm": 0.4419342577457428, "learning_rate": 1.3519270581526622e-05, "loss": 0.115, "step": 20766 }, { "epoch": 0.3855593469551431, "grad_norm": 0.40069812536239624, "learning_rate": 1.3518178690330804e-05, "loss": 0.362, "step": 20768 }, { "epoch": 0.38559647709256173, "grad_norm": 0.4005439877510071, "learning_rate": 1.3517086751263111e-05, "loss": 0.3497, "step": 20770 }, { "epoch": 0.38563360722998036, "grad_norm": 0.4062248170375824, "learning_rate": 1.3515994764338399e-05, "loss": 0.2436, "step": 20772 }, { "epoch": 0.385670737367399, "grad_norm": 0.4341149926185608, "learning_rate": 1.3514902729571531e-05, "loss": 0.4543, "step": 20774 }, { "epoch": 0.3857078675048176, "grad_norm": 0.3939974308013916, "learning_rate": 1.3513810646977368e-05, "loss": 0.2545, "step": 20776 }, { "epoch": 0.3857449976422363, "grad_norm": 0.5217578411102295, "learning_rate": 1.3512718516570766e-05, "loss": 0.2443, "step": 20778 }, { "epoch": 0.3857821277796549, "grad_norm": 0.2592640817165375, "learning_rate": 1.3511626338366583e-05, "loss": 0.0874, "step": 20780 }, { "epoch": 0.38581925791707355, "grad_norm": 0.3430674970149994, "learning_rate": 1.3510534112379688e-05, "loss": 0.3298, "step": 20782 }, { "epoch": 0.3858563880544922, "grad_norm": 0.35523903369903564, "learning_rate": 1.350944183862494e-05, "loss": 0.2056, "step": 20784 }, { "epoch": 0.3858935181919108, "grad_norm": 0.44332513213157654, "learning_rate": 1.3508349517117197e-05, "loss": 0.1999, "step": 20786 }, { "epoch": 0.38593064832932944, "grad_norm": 0.4697682857513428, "learning_rate": 1.3507257147871328e-05, "loss": 0.6506, "step": 20788 }, { "epoch": 0.3859677784667481, "grad_norm": 0.50628262758255, "learning_rate": 1.3506164730902195e-05, "loss": 0.2437, "step": 20790 }, { "epoch": 0.38600490860416675, "grad_norm": 0.4779333174228668, "learning_rate": 1.350507226622466e-05, "loss": 0.1921, "step": 20792 }, { "epoch": 0.3860420387415854, "grad_norm": 0.2679564356803894, "learning_rate": 1.3503979753853591e-05, "loss": 0.5024, "step": 20794 }, { "epoch": 0.386079168879004, "grad_norm": 0.5327733755111694, "learning_rate": 1.3502887193803856e-05, "loss": 0.3846, "step": 20796 }, { "epoch": 0.38611629901642264, "grad_norm": 0.3848426342010498, "learning_rate": 1.3501794586090313e-05, "loss": 0.2346, "step": 20798 }, { "epoch": 0.3861534291538413, "grad_norm": 0.47144296765327454, "learning_rate": 1.350070193072784e-05, "loss": 0.3231, "step": 20800 }, { "epoch": 0.38619055929125995, "grad_norm": 0.31437861919403076, "learning_rate": 1.3499609227731298e-05, "loss": 0.263, "step": 20802 }, { "epoch": 0.3862276894286786, "grad_norm": 0.3516876697540283, "learning_rate": 1.3498516477115558e-05, "loss": 0.3418, "step": 20804 }, { "epoch": 0.3862648195660972, "grad_norm": 0.31398946046829224, "learning_rate": 1.3497423678895484e-05, "loss": 0.241, "step": 20806 }, { "epoch": 0.38630194970351583, "grad_norm": 0.43738794326782227, "learning_rate": 1.3496330833085955e-05, "loss": 0.2829, "step": 20808 }, { "epoch": 0.3863390798409345, "grad_norm": 0.47186732292175293, "learning_rate": 1.3495237939701833e-05, "loss": 0.2678, "step": 20810 }, { "epoch": 0.38637620997835315, "grad_norm": 0.40803611278533936, "learning_rate": 1.3494144998757992e-05, "loss": 0.3319, "step": 20812 }, { "epoch": 0.3864133401157718, "grad_norm": 0.32951849699020386, "learning_rate": 1.3493052010269305e-05, "loss": 0.2858, "step": 20814 }, { "epoch": 0.3864504702531904, "grad_norm": 0.6528772711753845, "learning_rate": 1.3491958974250642e-05, "loss": 0.2179, "step": 20816 }, { "epoch": 0.38648760039060903, "grad_norm": 0.3366418480873108, "learning_rate": 1.3490865890716878e-05, "loss": 0.3621, "step": 20818 }, { "epoch": 0.38652473052802766, "grad_norm": 0.33307650685310364, "learning_rate": 1.3489772759682884e-05, "loss": 0.2444, "step": 20820 }, { "epoch": 0.38656186066544634, "grad_norm": 0.5108956098556519, "learning_rate": 1.3488679581163538e-05, "loss": 0.2383, "step": 20822 }, { "epoch": 0.386598990802865, "grad_norm": 0.2978900969028473, "learning_rate": 1.3487586355173711e-05, "loss": 0.4003, "step": 20824 }, { "epoch": 0.3866361209402836, "grad_norm": 0.3805393576622009, "learning_rate": 1.348649308172828e-05, "loss": 0.3751, "step": 20826 }, { "epoch": 0.38667325107770223, "grad_norm": 0.42926251888275146, "learning_rate": 1.3485399760842124e-05, "loss": 0.1912, "step": 20828 }, { "epoch": 0.38671038121512086, "grad_norm": 0.40642642974853516, "learning_rate": 1.3484306392530115e-05, "loss": 0.2548, "step": 20830 }, { "epoch": 0.38674751135253954, "grad_norm": 0.4774445593357086, "learning_rate": 1.3483212976807134e-05, "loss": 0.239, "step": 20832 }, { "epoch": 0.38678464148995817, "grad_norm": 0.27411189675331116, "learning_rate": 1.3482119513688057e-05, "loss": 0.3896, "step": 20834 }, { "epoch": 0.3868217716273768, "grad_norm": 0.4208177924156189, "learning_rate": 1.3481026003187762e-05, "loss": 0.2264, "step": 20836 }, { "epoch": 0.3868589017647954, "grad_norm": 0.3298663794994354, "learning_rate": 1.3479932445321132e-05, "loss": 0.4131, "step": 20838 }, { "epoch": 0.38689603190221405, "grad_norm": 0.2186870574951172, "learning_rate": 1.3478838840103045e-05, "loss": 0.3937, "step": 20840 }, { "epoch": 0.3869331620396327, "grad_norm": 0.354340523481369, "learning_rate": 1.3477745187548381e-05, "loss": 0.152, "step": 20842 }, { "epoch": 0.38697029217705137, "grad_norm": 0.35978081822395325, "learning_rate": 1.3476651487672023e-05, "loss": 0.6011, "step": 20844 }, { "epoch": 0.38700742231447, "grad_norm": 0.4790162742137909, "learning_rate": 1.3475557740488851e-05, "loss": 0.4519, "step": 20846 }, { "epoch": 0.3870445524518886, "grad_norm": 0.32861942052841187, "learning_rate": 1.347446394601375e-05, "loss": 0.2626, "step": 20848 }, { "epoch": 0.38708168258930725, "grad_norm": 0.3664741814136505, "learning_rate": 1.3473370104261599e-05, "loss": 0.257, "step": 20850 }, { "epoch": 0.3871188127267259, "grad_norm": 0.3495136499404907, "learning_rate": 1.3472276215247287e-05, "loss": 0.3904, "step": 20852 }, { "epoch": 0.38715594286414456, "grad_norm": 0.22909832000732422, "learning_rate": 1.3471182278985697e-05, "loss": 0.3477, "step": 20854 }, { "epoch": 0.3871930730015632, "grad_norm": 0.42756387591362, "learning_rate": 1.3470088295491712e-05, "loss": 0.1728, "step": 20856 }, { "epoch": 0.3872302031389818, "grad_norm": 0.2829986810684204, "learning_rate": 1.346899426478022e-05, "loss": 0.2708, "step": 20858 }, { "epoch": 0.38726733327640045, "grad_norm": 0.543093740940094, "learning_rate": 1.3467900186866107e-05, "loss": 0.2961, "step": 20860 }, { "epoch": 0.3873044634138191, "grad_norm": 0.36149317026138306, "learning_rate": 1.3466806061764261e-05, "loss": 0.2172, "step": 20862 }, { "epoch": 0.3873415935512377, "grad_norm": 0.3609108328819275, "learning_rate": 1.3465711889489566e-05, "loss": 0.3758, "step": 20864 }, { "epoch": 0.3873787236886564, "grad_norm": 0.4260145425796509, "learning_rate": 1.3464617670056917e-05, "loss": 0.3615, "step": 20866 }, { "epoch": 0.387415853826075, "grad_norm": 0.36468422412872314, "learning_rate": 1.3463523403481195e-05, "loss": 0.4057, "step": 20868 }, { "epoch": 0.38745298396349365, "grad_norm": 0.3786851763725281, "learning_rate": 1.3462429089777296e-05, "loss": 0.2455, "step": 20870 }, { "epoch": 0.3874901141009123, "grad_norm": 0.2598850727081299, "learning_rate": 1.346133472896011e-05, "loss": 0.3806, "step": 20872 }, { "epoch": 0.3875272442383309, "grad_norm": 0.5214970111846924, "learning_rate": 1.3460240321044525e-05, "loss": 0.3802, "step": 20874 }, { "epoch": 0.3875643743757496, "grad_norm": 0.3710053563117981, "learning_rate": 1.3459145866045433e-05, "loss": 0.414, "step": 20876 }, { "epoch": 0.3876015045131682, "grad_norm": 0.4426306486129761, "learning_rate": 1.3458051363977728e-05, "loss": 0.2758, "step": 20878 }, { "epoch": 0.38763863465058684, "grad_norm": 0.2110464721918106, "learning_rate": 1.3456956814856302e-05, "loss": 0.2671, "step": 20880 }, { "epoch": 0.3876757647880055, "grad_norm": 0.3493625521659851, "learning_rate": 1.3455862218696045e-05, "loss": 0.2062, "step": 20882 }, { "epoch": 0.3877128949254241, "grad_norm": 0.5118810534477234, "learning_rate": 1.3454767575511862e-05, "loss": 0.2884, "step": 20884 }, { "epoch": 0.3877500250628428, "grad_norm": 0.5153297185897827, "learning_rate": 1.3453672885318636e-05, "loss": 0.3604, "step": 20886 }, { "epoch": 0.3877871552002614, "grad_norm": 0.3881579637527466, "learning_rate": 1.3452578148131265e-05, "loss": 0.3028, "step": 20888 }, { "epoch": 0.38782428533768004, "grad_norm": 0.25400811433792114, "learning_rate": 1.345148336396465e-05, "loss": 0.2939, "step": 20890 }, { "epoch": 0.38786141547509867, "grad_norm": 1.0255693197250366, "learning_rate": 1.3450388532833685e-05, "loss": 0.3004, "step": 20892 }, { "epoch": 0.3878985456125173, "grad_norm": 0.24599038064479828, "learning_rate": 1.3449293654753266e-05, "loss": 0.2602, "step": 20894 }, { "epoch": 0.3879356757499359, "grad_norm": 0.39052486419677734, "learning_rate": 1.3448198729738295e-05, "loss": 0.185, "step": 20896 }, { "epoch": 0.3879728058873546, "grad_norm": 0.29600226879119873, "learning_rate": 1.3447103757803666e-05, "loss": 0.2882, "step": 20898 }, { "epoch": 0.38800993602477324, "grad_norm": 0.527108371257782, "learning_rate": 1.3446008738964276e-05, "loss": 0.2831, "step": 20900 }, { "epoch": 0.38804706616219187, "grad_norm": 0.36527591943740845, "learning_rate": 1.3444913673235034e-05, "loss": 0.1595, "step": 20902 }, { "epoch": 0.3880841962996105, "grad_norm": 0.274984747171402, "learning_rate": 1.3443818560630834e-05, "loss": 0.3654, "step": 20904 }, { "epoch": 0.3881213264370291, "grad_norm": 0.27965012192726135, "learning_rate": 1.344272340116658e-05, "loss": 0.2232, "step": 20906 }, { "epoch": 0.3881584565744478, "grad_norm": 0.6494016051292419, "learning_rate": 1.344162819485717e-05, "loss": 0.5417, "step": 20908 }, { "epoch": 0.38819558671186644, "grad_norm": 0.4156023859977722, "learning_rate": 1.3440532941717513e-05, "loss": 0.4697, "step": 20910 }, { "epoch": 0.38823271684928506, "grad_norm": 0.46189770102500916, "learning_rate": 1.3439437641762505e-05, "loss": 0.3199, "step": 20912 }, { "epoch": 0.3882698469867037, "grad_norm": 0.5394167304039001, "learning_rate": 1.3438342295007054e-05, "loss": 0.2194, "step": 20914 }, { "epoch": 0.3883069771241223, "grad_norm": 0.37195292115211487, "learning_rate": 1.3437246901466066e-05, "loss": 0.4286, "step": 20916 }, { "epoch": 0.38834410726154095, "grad_norm": 0.31552475690841675, "learning_rate": 1.3436151461154441e-05, "loss": 0.1736, "step": 20918 }, { "epoch": 0.38838123739895963, "grad_norm": 0.5918641090393066, "learning_rate": 1.3435055974087083e-05, "loss": 0.2236, "step": 20920 }, { "epoch": 0.38841836753637826, "grad_norm": 0.3376547694206238, "learning_rate": 1.3433960440278908e-05, "loss": 0.2717, "step": 20922 }, { "epoch": 0.3884554976737969, "grad_norm": 0.35671812295913696, "learning_rate": 1.3432864859744816e-05, "loss": 0.4821, "step": 20924 }, { "epoch": 0.3884926278112155, "grad_norm": 0.40426918864250183, "learning_rate": 1.3431769232499716e-05, "loss": 0.2692, "step": 20926 }, { "epoch": 0.38852975794863415, "grad_norm": 0.45418408513069153, "learning_rate": 1.3430673558558516e-05, "loss": 0.4624, "step": 20928 }, { "epoch": 0.38856688808605283, "grad_norm": 0.3226243257522583, "learning_rate": 1.3429577837936125e-05, "loss": 0.297, "step": 20930 }, { "epoch": 0.38860401822347146, "grad_norm": 0.4090017080307007, "learning_rate": 1.342848207064745e-05, "loss": 0.1396, "step": 20932 }, { "epoch": 0.3886411483608901, "grad_norm": 0.46387729048728943, "learning_rate": 1.3427386256707407e-05, "loss": 0.3157, "step": 20934 }, { "epoch": 0.3886782784983087, "grad_norm": 0.49868059158325195, "learning_rate": 1.34262903961309e-05, "loss": 0.3909, "step": 20936 }, { "epoch": 0.38871540863572734, "grad_norm": 0.33697232604026794, "learning_rate": 1.3425194488932847e-05, "loss": 0.3319, "step": 20938 }, { "epoch": 0.388752538773146, "grad_norm": 0.4747236669063568, "learning_rate": 1.3424098535128155e-05, "loss": 0.25, "step": 20940 }, { "epoch": 0.38878966891056466, "grad_norm": 0.2780747711658478, "learning_rate": 1.342300253473174e-05, "loss": 0.2726, "step": 20942 }, { "epoch": 0.3888267990479833, "grad_norm": 0.36898523569107056, "learning_rate": 1.342190648775851e-05, "loss": 0.1427, "step": 20944 }, { "epoch": 0.3888639291854019, "grad_norm": 0.330413818359375, "learning_rate": 1.3420810394223385e-05, "loss": 0.2066, "step": 20946 }, { "epoch": 0.38890105932282054, "grad_norm": 0.3008469045162201, "learning_rate": 1.3419714254141278e-05, "loss": 0.439, "step": 20948 }, { "epoch": 0.38893818946023917, "grad_norm": 0.4299360513687134, "learning_rate": 1.3418618067527101e-05, "loss": 0.3182, "step": 20950 }, { "epoch": 0.38897531959765785, "grad_norm": 0.31372538208961487, "learning_rate": 1.341752183439577e-05, "loss": 0.1605, "step": 20952 }, { "epoch": 0.3890124497350765, "grad_norm": 0.3003089427947998, "learning_rate": 1.3416425554762209e-05, "loss": 0.4386, "step": 20954 }, { "epoch": 0.3890495798724951, "grad_norm": 0.39285245537757874, "learning_rate": 1.3415329228641326e-05, "loss": 0.4065, "step": 20956 }, { "epoch": 0.38908671000991374, "grad_norm": 0.5915917158126831, "learning_rate": 1.3414232856048045e-05, "loss": 0.2302, "step": 20958 }, { "epoch": 0.38912384014733237, "grad_norm": 0.2580043375492096, "learning_rate": 1.3413136436997278e-05, "loss": 0.3308, "step": 20960 }, { "epoch": 0.38916097028475105, "grad_norm": 0.30734193325042725, "learning_rate": 1.3412039971503949e-05, "loss": 0.1003, "step": 20962 }, { "epoch": 0.3891981004221697, "grad_norm": 0.3624153137207031, "learning_rate": 1.3410943459582977e-05, "loss": 0.2541, "step": 20964 }, { "epoch": 0.3892352305595883, "grad_norm": 0.444143146276474, "learning_rate": 1.3409846901249282e-05, "loss": 0.2639, "step": 20966 }, { "epoch": 0.38927236069700694, "grad_norm": 0.4695796072483063, "learning_rate": 1.3408750296517786e-05, "loss": 0.4196, "step": 20968 }, { "epoch": 0.38930949083442556, "grad_norm": 0.48529860377311707, "learning_rate": 1.3407653645403405e-05, "loss": 0.2694, "step": 20970 }, { "epoch": 0.3893466209718442, "grad_norm": 0.3905702233314514, "learning_rate": 1.3406556947921066e-05, "loss": 0.3957, "step": 20972 }, { "epoch": 0.3893837511092629, "grad_norm": 0.4604209065437317, "learning_rate": 1.340546020408569e-05, "loss": 0.3678, "step": 20974 }, { "epoch": 0.3894208812466815, "grad_norm": 0.5386272072792053, "learning_rate": 1.34043634139122e-05, "loss": 0.3906, "step": 20976 }, { "epoch": 0.38945801138410013, "grad_norm": 0.4299006462097168, "learning_rate": 1.3403266577415526e-05, "loss": 0.2727, "step": 20978 }, { "epoch": 0.38949514152151876, "grad_norm": 0.3433120846748352, "learning_rate": 1.3402169694610585e-05, "loss": 0.2322, "step": 20980 }, { "epoch": 0.3895322716589374, "grad_norm": 0.316511332988739, "learning_rate": 1.3401072765512304e-05, "loss": 0.3579, "step": 20982 }, { "epoch": 0.3895694017963561, "grad_norm": 0.3686107099056244, "learning_rate": 1.3399975790135613e-05, "loss": 0.3703, "step": 20984 }, { "epoch": 0.3896065319337747, "grad_norm": 0.3704521358013153, "learning_rate": 1.3398878768495434e-05, "loss": 0.2759, "step": 20986 }, { "epoch": 0.38964366207119333, "grad_norm": 0.5123729109764099, "learning_rate": 1.3397781700606694e-05, "loss": 0.2449, "step": 20988 }, { "epoch": 0.38968079220861196, "grad_norm": 0.41569241881370544, "learning_rate": 1.3396684586484325e-05, "loss": 0.3517, "step": 20990 }, { "epoch": 0.3897179223460306, "grad_norm": 0.28489747643470764, "learning_rate": 1.3395587426143255e-05, "loss": 0.2565, "step": 20992 }, { "epoch": 0.3897550524834492, "grad_norm": 0.6491680145263672, "learning_rate": 1.3394490219598406e-05, "loss": 0.3088, "step": 20994 }, { "epoch": 0.3897921826208679, "grad_norm": 0.41795191168785095, "learning_rate": 1.3393392966864714e-05, "loss": 0.3171, "step": 20996 }, { "epoch": 0.38982931275828653, "grad_norm": 0.9456215500831604, "learning_rate": 1.3392295667957111e-05, "loss": 0.2891, "step": 20998 }, { "epoch": 0.38986644289570516, "grad_norm": 0.28814780712127686, "learning_rate": 1.3391198322890524e-05, "loss": 0.2366, "step": 21000 }, { "epoch": 0.3899035730331238, "grad_norm": 0.4209561347961426, "learning_rate": 1.3390100931679886e-05, "loss": 0.3493, "step": 21002 }, { "epoch": 0.3899407031705424, "grad_norm": 0.2765824794769287, "learning_rate": 1.3389003494340127e-05, "loss": 0.2164, "step": 21004 }, { "epoch": 0.3899778333079611, "grad_norm": 0.4336777627468109, "learning_rate": 1.3387906010886182e-05, "loss": 0.5543, "step": 21006 }, { "epoch": 0.3900149634453797, "grad_norm": 0.522078812122345, "learning_rate": 1.3386808481332985e-05, "loss": 0.4384, "step": 21008 }, { "epoch": 0.39005209358279835, "grad_norm": 0.3964535593986511, "learning_rate": 1.338571090569547e-05, "loss": 0.1105, "step": 21010 }, { "epoch": 0.390089223720217, "grad_norm": 0.5796847939491272, "learning_rate": 1.338461328398857e-05, "loss": 0.2128, "step": 21012 }, { "epoch": 0.3901263538576356, "grad_norm": 0.3732931613922119, "learning_rate": 1.338351561622722e-05, "loss": 0.2689, "step": 21014 }, { "epoch": 0.39016348399505424, "grad_norm": 0.44324392080307007, "learning_rate": 1.3382417902426358e-05, "loss": 0.4351, "step": 21016 }, { "epoch": 0.3902006141324729, "grad_norm": 0.29921138286590576, "learning_rate": 1.3381320142600922e-05, "loss": 0.3281, "step": 21018 }, { "epoch": 0.39023774426989155, "grad_norm": 0.2649189233779907, "learning_rate": 1.3380222336765844e-05, "loss": 0.2626, "step": 21020 }, { "epoch": 0.3902748744073102, "grad_norm": 0.3623538017272949, "learning_rate": 1.337912448493607e-05, "loss": 0.288, "step": 21022 }, { "epoch": 0.3903120045447288, "grad_norm": 0.3688085675239563, "learning_rate": 1.337802658712653e-05, "loss": 0.2343, "step": 21024 }, { "epoch": 0.39034913468214744, "grad_norm": 0.4979703724384308, "learning_rate": 1.3376928643352165e-05, "loss": 0.2031, "step": 21026 }, { "epoch": 0.3903862648195661, "grad_norm": 0.34725460410118103, "learning_rate": 1.3375830653627914e-05, "loss": 0.2158, "step": 21028 }, { "epoch": 0.39042339495698475, "grad_norm": 0.48986560106277466, "learning_rate": 1.3374732617968725e-05, "loss": 0.2763, "step": 21030 }, { "epoch": 0.3904605250944034, "grad_norm": 0.25171974301338196, "learning_rate": 1.3373634536389533e-05, "loss": 0.4196, "step": 21032 }, { "epoch": 0.390497655231822, "grad_norm": 0.33803626894950867, "learning_rate": 1.3372536408905279e-05, "loss": 0.257, "step": 21034 }, { "epoch": 0.39053478536924063, "grad_norm": 0.31209734082221985, "learning_rate": 1.3371438235530907e-05, "loss": 0.2631, "step": 21036 }, { "epoch": 0.3905719155066593, "grad_norm": 0.39501842856407166, "learning_rate": 1.3370340016281356e-05, "loss": 0.3451, "step": 21038 }, { "epoch": 0.39060904564407795, "grad_norm": 0.2956770956516266, "learning_rate": 1.3369241751171575e-05, "loss": 0.4203, "step": 21040 }, { "epoch": 0.3906461757814966, "grad_norm": 0.2859567403793335, "learning_rate": 1.3368143440216508e-05, "loss": 0.2804, "step": 21042 }, { "epoch": 0.3906833059189152, "grad_norm": 0.40659254789352417, "learning_rate": 1.3367045083431096e-05, "loss": 0.3244, "step": 21044 }, { "epoch": 0.39072043605633383, "grad_norm": 0.4834231734275818, "learning_rate": 1.3365946680830285e-05, "loss": 0.2023, "step": 21046 }, { "epoch": 0.39075756619375246, "grad_norm": 0.4338100552558899, "learning_rate": 1.3364848232429025e-05, "loss": 0.2128, "step": 21048 }, { "epoch": 0.39079469633117114, "grad_norm": 0.36406126618385315, "learning_rate": 1.3363749738242253e-05, "loss": 0.1364, "step": 21050 }, { "epoch": 0.39083182646858977, "grad_norm": 0.3659365773200989, "learning_rate": 1.3362651198284929e-05, "loss": 0.3412, "step": 21052 }, { "epoch": 0.3908689566060084, "grad_norm": 0.3697330355644226, "learning_rate": 1.3361552612571993e-05, "loss": 0.1963, "step": 21054 }, { "epoch": 0.39090608674342703, "grad_norm": 0.3940783739089966, "learning_rate": 1.3360453981118392e-05, "loss": 0.4194, "step": 21056 }, { "epoch": 0.39094321688084566, "grad_norm": 0.34696316719055176, "learning_rate": 1.3359355303939079e-05, "loss": 0.2441, "step": 21058 }, { "epoch": 0.39098034701826434, "grad_norm": 0.34688594937324524, "learning_rate": 1.3358256581049005e-05, "loss": 0.3986, "step": 21060 }, { "epoch": 0.39101747715568297, "grad_norm": 0.29138773679733276, "learning_rate": 1.3357157812463116e-05, "loss": 0.1161, "step": 21062 }, { "epoch": 0.3910546072931016, "grad_norm": 0.3353050947189331, "learning_rate": 1.3356058998196366e-05, "loss": 0.2321, "step": 21064 }, { "epoch": 0.3910917374305202, "grad_norm": 0.5530006885528564, "learning_rate": 1.3354960138263706e-05, "loss": 0.284, "step": 21066 }, { "epoch": 0.39112886756793885, "grad_norm": 0.5759733319282532, "learning_rate": 1.3353861232680085e-05, "loss": 0.2761, "step": 21068 }, { "epoch": 0.3911659977053575, "grad_norm": 0.4651113450527191, "learning_rate": 1.335276228146046e-05, "loss": 0.1315, "step": 21070 }, { "epoch": 0.39120312784277617, "grad_norm": 0.45784854888916016, "learning_rate": 1.3351663284619784e-05, "loss": 0.5345, "step": 21072 }, { "epoch": 0.3912402579801948, "grad_norm": 0.6664987206459045, "learning_rate": 1.3350564242173012e-05, "loss": 0.4362, "step": 21074 }, { "epoch": 0.3912773881176134, "grad_norm": 0.3597731590270996, "learning_rate": 1.3349465154135094e-05, "loss": 0.1609, "step": 21076 }, { "epoch": 0.39131451825503205, "grad_norm": 0.3273705840110779, "learning_rate": 1.3348366020520989e-05, "loss": 0.4168, "step": 21078 }, { "epoch": 0.3913516483924507, "grad_norm": 0.3557974696159363, "learning_rate": 1.3347266841345652e-05, "loss": 0.2151, "step": 21080 }, { "epoch": 0.39138877852986936, "grad_norm": 0.21370138227939606, "learning_rate": 1.334616761662404e-05, "loss": 0.1439, "step": 21082 }, { "epoch": 0.391425908667288, "grad_norm": 0.338091105222702, "learning_rate": 1.3345068346371108e-05, "loss": 0.3926, "step": 21084 }, { "epoch": 0.3914630388047066, "grad_norm": 0.42032161355018616, "learning_rate": 1.3343969030601818e-05, "loss": 0.13, "step": 21086 }, { "epoch": 0.39150016894212525, "grad_norm": 0.23689033091068268, "learning_rate": 1.3342869669331123e-05, "loss": 0.4157, "step": 21088 }, { "epoch": 0.3915372990795439, "grad_norm": 0.4776283800601959, "learning_rate": 1.3341770262573986e-05, "loss": 0.3817, "step": 21090 }, { "epoch": 0.3915744292169625, "grad_norm": 0.5193130373954773, "learning_rate": 1.3340670810345366e-05, "loss": 0.3639, "step": 21092 }, { "epoch": 0.3916115593543812, "grad_norm": 0.4199797511100769, "learning_rate": 1.333957131266022e-05, "loss": 0.354, "step": 21094 }, { "epoch": 0.3916486894917998, "grad_norm": 0.2748834788799286, "learning_rate": 1.3338471769533517e-05, "loss": 0.1861, "step": 21096 }, { "epoch": 0.39168581962921845, "grad_norm": 0.517143726348877, "learning_rate": 1.3337372180980208e-05, "loss": 0.304, "step": 21098 }, { "epoch": 0.3917229497666371, "grad_norm": 0.40330085158348083, "learning_rate": 1.3336272547015263e-05, "loss": 0.3512, "step": 21100 }, { "epoch": 0.3917600799040557, "grad_norm": 0.36303767561912537, "learning_rate": 1.3335172867653642e-05, "loss": 0.38, "step": 21102 }, { "epoch": 0.3917972100414744, "grad_norm": 0.38747358322143555, "learning_rate": 1.3334073142910307e-05, "loss": 0.3439, "step": 21104 }, { "epoch": 0.391834340178893, "grad_norm": 0.43079546093940735, "learning_rate": 1.3332973372800224e-05, "loss": 0.3644, "step": 21106 }, { "epoch": 0.39187147031631164, "grad_norm": 0.4817890524864197, "learning_rate": 1.3331873557338355e-05, "loss": 0.3224, "step": 21108 }, { "epoch": 0.39190860045373027, "grad_norm": 0.2721439301967621, "learning_rate": 1.3330773696539669e-05, "loss": 0.3213, "step": 21110 }, { "epoch": 0.3919457305911489, "grad_norm": 0.47233209013938904, "learning_rate": 1.3329673790419128e-05, "loss": 0.2544, "step": 21112 }, { "epoch": 0.3919828607285676, "grad_norm": 0.5644276142120361, "learning_rate": 1.33285738389917e-05, "loss": 0.386, "step": 21114 }, { "epoch": 0.3920199908659862, "grad_norm": 0.36235418915748596, "learning_rate": 1.3327473842272356e-05, "loss": 0.4345, "step": 21116 }, { "epoch": 0.39205712100340484, "grad_norm": 0.4895215630531311, "learning_rate": 1.3326373800276057e-05, "loss": 0.3474, "step": 21118 }, { "epoch": 0.39209425114082347, "grad_norm": 0.32556769251823425, "learning_rate": 1.3325273713017775e-05, "loss": 0.2586, "step": 21120 }, { "epoch": 0.3921313812782421, "grad_norm": 0.4315909743309021, "learning_rate": 1.3324173580512478e-05, "loss": 0.2239, "step": 21122 }, { "epoch": 0.3921685114156607, "grad_norm": 0.530937671661377, "learning_rate": 1.3323073402775135e-05, "loss": 0.2066, "step": 21124 }, { "epoch": 0.3922056415530794, "grad_norm": 0.4639502763748169, "learning_rate": 1.3321973179820717e-05, "loss": 0.2685, "step": 21126 }, { "epoch": 0.39224277169049804, "grad_norm": 0.24358013272285461, "learning_rate": 1.3320872911664195e-05, "loss": 0.2083, "step": 21128 }, { "epoch": 0.39227990182791667, "grad_norm": 0.3938358426094055, "learning_rate": 1.3319772598320543e-05, "loss": 0.2416, "step": 21130 }, { "epoch": 0.3923170319653353, "grad_norm": 0.37304943799972534, "learning_rate": 1.3318672239804724e-05, "loss": 0.1616, "step": 21132 }, { "epoch": 0.3923541621027539, "grad_norm": 0.39024823904037476, "learning_rate": 1.3317571836131718e-05, "loss": 0.2498, "step": 21134 }, { "epoch": 0.3923912922401726, "grad_norm": 0.5496432781219482, "learning_rate": 1.3316471387316499e-05, "loss": 0.3569, "step": 21136 }, { "epoch": 0.39242842237759124, "grad_norm": 0.30583733320236206, "learning_rate": 1.3315370893374038e-05, "loss": 0.3339, "step": 21138 }, { "epoch": 0.39246555251500986, "grad_norm": 0.4832291007041931, "learning_rate": 1.3314270354319308e-05, "loss": 0.301, "step": 21140 }, { "epoch": 0.3925026826524285, "grad_norm": 0.4621949791908264, "learning_rate": 1.3313169770167287e-05, "loss": 0.2119, "step": 21142 }, { "epoch": 0.3925398127898471, "grad_norm": 0.37230128049850464, "learning_rate": 1.3312069140932948e-05, "loss": 0.3119, "step": 21144 }, { "epoch": 0.39257694292726575, "grad_norm": 0.30157050490379333, "learning_rate": 1.3310968466631268e-05, "loss": 0.5064, "step": 21146 }, { "epoch": 0.39261407306468443, "grad_norm": 0.3491836190223694, "learning_rate": 1.3309867747277227e-05, "loss": 0.2235, "step": 21148 }, { "epoch": 0.39265120320210306, "grad_norm": 0.4691217839717865, "learning_rate": 1.3308766982885803e-05, "loss": 0.3598, "step": 21150 }, { "epoch": 0.3926883333395217, "grad_norm": 0.32685142755508423, "learning_rate": 1.3307666173471966e-05, "loss": 0.4718, "step": 21152 }, { "epoch": 0.3927254634769403, "grad_norm": 0.35390961170196533, "learning_rate": 1.3306565319050702e-05, "loss": 0.2644, "step": 21154 }, { "epoch": 0.39276259361435895, "grad_norm": 0.45941615104675293, "learning_rate": 1.3305464419636988e-05, "loss": 0.3608, "step": 21156 }, { "epoch": 0.39279972375177763, "grad_norm": 0.3114888072013855, "learning_rate": 1.3304363475245805e-05, "loss": 0.2937, "step": 21158 }, { "epoch": 0.39283685388919626, "grad_norm": 0.4053100049495697, "learning_rate": 1.3303262485892132e-05, "loss": 0.236, "step": 21160 }, { "epoch": 0.3928739840266149, "grad_norm": 0.35260215401649475, "learning_rate": 1.3302161451590953e-05, "loss": 0.266, "step": 21162 }, { "epoch": 0.3929111141640335, "grad_norm": 0.4710381031036377, "learning_rate": 1.3301060372357247e-05, "loss": 0.4269, "step": 21164 }, { "epoch": 0.39294824430145214, "grad_norm": 0.2651098966598511, "learning_rate": 1.3299959248205994e-05, "loss": 0.314, "step": 21166 }, { "epoch": 0.39298537443887077, "grad_norm": 0.3585895299911499, "learning_rate": 1.3298858079152184e-05, "loss": 0.2222, "step": 21168 }, { "epoch": 0.39302250457628946, "grad_norm": 0.30114251375198364, "learning_rate": 1.3297756865210794e-05, "loss": 0.2756, "step": 21170 }, { "epoch": 0.3930596347137081, "grad_norm": 0.29901087284088135, "learning_rate": 1.3296655606396814e-05, "loss": 0.2678, "step": 21172 }, { "epoch": 0.3930967648511267, "grad_norm": 0.33698150515556335, "learning_rate": 1.3295554302725223e-05, "loss": 0.4412, "step": 21174 }, { "epoch": 0.39313389498854534, "grad_norm": 0.3872765004634857, "learning_rate": 1.3294452954211008e-05, "loss": 0.2959, "step": 21176 }, { "epoch": 0.39317102512596397, "grad_norm": 0.1867680698633194, "learning_rate": 1.329335156086916e-05, "loss": 0.1974, "step": 21178 }, { "epoch": 0.39320815526338265, "grad_norm": 0.30476659536361694, "learning_rate": 1.329225012271466e-05, "loss": 0.4242, "step": 21180 }, { "epoch": 0.3932452854008013, "grad_norm": 0.37183302640914917, "learning_rate": 1.32911486397625e-05, "loss": 0.354, "step": 21182 }, { "epoch": 0.3932824155382199, "grad_norm": 0.43083375692367554, "learning_rate": 1.3290047112027661e-05, "loss": 0.4312, "step": 21184 }, { "epoch": 0.39331954567563854, "grad_norm": 0.2652100920677185, "learning_rate": 1.3288945539525138e-05, "loss": 0.3595, "step": 21186 }, { "epoch": 0.39335667581305717, "grad_norm": 0.35747572779655457, "learning_rate": 1.3287843922269916e-05, "loss": 0.3319, "step": 21188 }, { "epoch": 0.39339380595047585, "grad_norm": 0.34798482060432434, "learning_rate": 1.328674226027699e-05, "loss": 0.4377, "step": 21190 }, { "epoch": 0.3934309360878945, "grad_norm": 0.2766435742378235, "learning_rate": 1.3285640553561345e-05, "loss": 0.1935, "step": 21192 }, { "epoch": 0.3934680662253131, "grad_norm": 0.5573251247406006, "learning_rate": 1.3284538802137972e-05, "loss": 0.2626, "step": 21194 }, { "epoch": 0.39350519636273174, "grad_norm": 0.39734238386154175, "learning_rate": 1.3283437006021864e-05, "loss": 0.1784, "step": 21196 }, { "epoch": 0.39354232650015036, "grad_norm": 0.45531079173088074, "learning_rate": 1.3282335165228016e-05, "loss": 0.4027, "step": 21198 }, { "epoch": 0.393579456637569, "grad_norm": 0.3285204768180847, "learning_rate": 1.3281233279771418e-05, "loss": 0.1327, "step": 21200 }, { "epoch": 0.3936165867749877, "grad_norm": 0.26710113883018494, "learning_rate": 1.3280131349667062e-05, "loss": 0.3392, "step": 21202 }, { "epoch": 0.3936537169124063, "grad_norm": 0.36151015758514404, "learning_rate": 1.3279029374929944e-05, "loss": 0.3119, "step": 21204 }, { "epoch": 0.39369084704982493, "grad_norm": 0.33402392268180847, "learning_rate": 1.3277927355575057e-05, "loss": 0.143, "step": 21206 }, { "epoch": 0.39372797718724356, "grad_norm": 0.3668234050273895, "learning_rate": 1.3276825291617399e-05, "loss": 0.3995, "step": 21208 }, { "epoch": 0.3937651073246622, "grad_norm": 0.40789994597435, "learning_rate": 1.3275723183071962e-05, "loss": 0.3212, "step": 21210 }, { "epoch": 0.3938022374620809, "grad_norm": 0.5132853388786316, "learning_rate": 1.3274621029953747e-05, "loss": 0.0871, "step": 21212 }, { "epoch": 0.3938393675994995, "grad_norm": 0.6429086923599243, "learning_rate": 1.3273518832277747e-05, "loss": 0.3234, "step": 21214 }, { "epoch": 0.39387649773691813, "grad_norm": 0.5411849021911621, "learning_rate": 1.327241659005896e-05, "loss": 0.3408, "step": 21216 }, { "epoch": 0.39391362787433676, "grad_norm": 0.3104947507381439, "learning_rate": 1.3271314303312388e-05, "loss": 0.2007, "step": 21218 }, { "epoch": 0.3939507580117554, "grad_norm": 0.4175235629081726, "learning_rate": 1.3270211972053024e-05, "loss": 0.2485, "step": 21220 }, { "epoch": 0.393987888149174, "grad_norm": 0.35643407702445984, "learning_rate": 1.3269109596295873e-05, "loss": 0.3487, "step": 21222 }, { "epoch": 0.3940250182865927, "grad_norm": 0.32255619764328003, "learning_rate": 1.3268007176055932e-05, "loss": 0.209, "step": 21224 }, { "epoch": 0.3940621484240113, "grad_norm": 0.6464990973472595, "learning_rate": 1.3266904711348202e-05, "loss": 0.2817, "step": 21226 }, { "epoch": 0.39409927856142996, "grad_norm": 0.9786666035652161, "learning_rate": 1.3265802202187682e-05, "loss": 0.2629, "step": 21228 }, { "epoch": 0.3941364086988486, "grad_norm": 0.3321402072906494, "learning_rate": 1.3264699648589381e-05, "loss": 0.3024, "step": 21230 }, { "epoch": 0.3941735388362672, "grad_norm": 0.25988873839378357, "learning_rate": 1.326359705056829e-05, "loss": 0.3272, "step": 21232 }, { "epoch": 0.3942106689736859, "grad_norm": 0.43860870599746704, "learning_rate": 1.3262494408139426e-05, "loss": 0.4614, "step": 21234 }, { "epoch": 0.3942477991111045, "grad_norm": 0.2583576440811157, "learning_rate": 1.326139172131778e-05, "loss": 0.2008, "step": 21236 }, { "epoch": 0.39428492924852315, "grad_norm": 0.3852914571762085, "learning_rate": 1.3260288990118364e-05, "loss": 0.2093, "step": 21238 }, { "epoch": 0.3943220593859418, "grad_norm": 0.32271698117256165, "learning_rate": 1.3259186214556178e-05, "loss": 0.2465, "step": 21240 }, { "epoch": 0.3943591895233604, "grad_norm": 0.3636697828769684, "learning_rate": 1.3258083394646233e-05, "loss": 0.3989, "step": 21242 }, { "epoch": 0.39439631966077904, "grad_norm": 0.5882134437561035, "learning_rate": 1.3256980530403533e-05, "loss": 0.3478, "step": 21244 }, { "epoch": 0.3944334497981977, "grad_norm": 0.3851419985294342, "learning_rate": 1.3255877621843084e-05, "loss": 0.4214, "step": 21246 }, { "epoch": 0.39447057993561635, "grad_norm": 0.39672455191612244, "learning_rate": 1.3254774668979889e-05, "loss": 0.481, "step": 21248 }, { "epoch": 0.394507710073035, "grad_norm": 0.32517409324645996, "learning_rate": 1.3253671671828963e-05, "loss": 0.2591, "step": 21250 }, { "epoch": 0.3945448402104536, "grad_norm": 0.38770776987075806, "learning_rate": 1.325256863040531e-05, "loss": 0.3072, "step": 21252 }, { "epoch": 0.39458197034787224, "grad_norm": 0.48269984126091003, "learning_rate": 1.325146554472394e-05, "loss": 0.2211, "step": 21254 }, { "epoch": 0.3946191004852909, "grad_norm": 0.4763682782649994, "learning_rate": 1.3250362414799866e-05, "loss": 0.2234, "step": 21256 }, { "epoch": 0.39465623062270955, "grad_norm": 1.129381775856018, "learning_rate": 1.3249259240648093e-05, "loss": 0.3374, "step": 21258 }, { "epoch": 0.3946933607601282, "grad_norm": 0.5001177787780762, "learning_rate": 1.3248156022283634e-05, "loss": 0.4029, "step": 21260 }, { "epoch": 0.3947304908975468, "grad_norm": 0.5001845955848694, "learning_rate": 1.3247052759721504e-05, "loss": 0.4877, "step": 21262 }, { "epoch": 0.39476762103496543, "grad_norm": 0.3281537592411041, "learning_rate": 1.3245949452976707e-05, "loss": 0.34, "step": 21264 }, { "epoch": 0.3948047511723841, "grad_norm": 0.2567083537578583, "learning_rate": 1.3244846102064266e-05, "loss": 0.2163, "step": 21266 }, { "epoch": 0.39484188130980274, "grad_norm": 0.40553757548332214, "learning_rate": 1.3243742706999187e-05, "loss": 0.2292, "step": 21268 }, { "epoch": 0.3948790114472214, "grad_norm": 0.5182636380195618, "learning_rate": 1.3242639267796484e-05, "loss": 0.246, "step": 21270 }, { "epoch": 0.39491614158464, "grad_norm": 0.2763744294643402, "learning_rate": 1.3241535784471173e-05, "loss": 0.2178, "step": 21272 }, { "epoch": 0.39495327172205863, "grad_norm": 0.3817051947116852, "learning_rate": 1.3240432257038274e-05, "loss": 0.4699, "step": 21274 }, { "epoch": 0.39499040185947726, "grad_norm": 0.37756794691085815, "learning_rate": 1.3239328685512796e-05, "loss": 0.4243, "step": 21276 }, { "epoch": 0.39502753199689594, "grad_norm": 0.3087480664253235, "learning_rate": 1.3238225069909757e-05, "loss": 0.2684, "step": 21278 }, { "epoch": 0.39506466213431457, "grad_norm": 0.4551987946033478, "learning_rate": 1.3237121410244174e-05, "loss": 0.341, "step": 21280 }, { "epoch": 0.3951017922717332, "grad_norm": 0.28957197070121765, "learning_rate": 1.3236017706531066e-05, "loss": 0.2887, "step": 21282 }, { "epoch": 0.3951389224091518, "grad_norm": 0.36098745465278625, "learning_rate": 1.323491395878545e-05, "loss": 0.179, "step": 21284 }, { "epoch": 0.39517605254657046, "grad_norm": 0.23374038934707642, "learning_rate": 1.3233810167022343e-05, "loss": 0.2984, "step": 21286 }, { "epoch": 0.39521318268398914, "grad_norm": 0.35284125804901123, "learning_rate": 1.3232706331256768e-05, "loss": 0.1746, "step": 21288 }, { "epoch": 0.39525031282140777, "grad_norm": 0.3048955798149109, "learning_rate": 1.3231602451503743e-05, "loss": 0.28, "step": 21290 }, { "epoch": 0.3952874429588264, "grad_norm": 0.38226455450057983, "learning_rate": 1.3230498527778285e-05, "loss": 0.3399, "step": 21292 }, { "epoch": 0.395324573096245, "grad_norm": 0.3001598119735718, "learning_rate": 1.3229394560095421e-05, "loss": 0.5027, "step": 21294 }, { "epoch": 0.39536170323366365, "grad_norm": 0.4842562675476074, "learning_rate": 1.322829054847017e-05, "loss": 0.3179, "step": 21296 }, { "epoch": 0.3953988333710823, "grad_norm": 0.38519835472106934, "learning_rate": 1.3227186492917557e-05, "loss": 0.4255, "step": 21298 }, { "epoch": 0.39543596350850096, "grad_norm": 0.3944139778614044, "learning_rate": 1.3226082393452599e-05, "loss": 0.1568, "step": 21300 }, { "epoch": 0.3954730936459196, "grad_norm": 0.27098917961120605, "learning_rate": 1.3224978250090323e-05, "loss": 0.2928, "step": 21302 }, { "epoch": 0.3955102237833382, "grad_norm": 0.2932801842689514, "learning_rate": 1.3223874062845755e-05, "loss": 0.3957, "step": 21304 }, { "epoch": 0.39554735392075685, "grad_norm": 0.364400178194046, "learning_rate": 1.3222769831733918e-05, "loss": 0.2877, "step": 21306 }, { "epoch": 0.3955844840581755, "grad_norm": 0.45450127124786377, "learning_rate": 1.3221665556769833e-05, "loss": 0.2072, "step": 21308 }, { "epoch": 0.39562161419559416, "grad_norm": 0.4417363107204437, "learning_rate": 1.3220561237968531e-05, "loss": 0.5069, "step": 21310 }, { "epoch": 0.3956587443330128, "grad_norm": 0.4484076201915741, "learning_rate": 1.321945687534504e-05, "loss": 0.3622, "step": 21312 }, { "epoch": 0.3956958744704314, "grad_norm": 0.38349708914756775, "learning_rate": 1.3218352468914381e-05, "loss": 0.3303, "step": 21314 }, { "epoch": 0.39573300460785005, "grad_norm": 0.33295178413391113, "learning_rate": 1.3217248018691589e-05, "loss": 0.2341, "step": 21316 }, { "epoch": 0.3957701347452687, "grad_norm": 0.4847041964530945, "learning_rate": 1.3216143524691684e-05, "loss": 0.4044, "step": 21318 }, { "epoch": 0.3958072648826873, "grad_norm": 0.4032294750213623, "learning_rate": 1.3215038986929702e-05, "loss": 0.2625, "step": 21320 }, { "epoch": 0.395844395020106, "grad_norm": 0.5330463647842407, "learning_rate": 1.321393440542067e-05, "loss": 0.2502, "step": 21322 }, { "epoch": 0.3958815251575246, "grad_norm": 0.35369816422462463, "learning_rate": 1.3212829780179614e-05, "loss": 0.4269, "step": 21324 }, { "epoch": 0.39591865529494324, "grad_norm": 0.31855523586273193, "learning_rate": 1.3211725111221571e-05, "loss": 0.268, "step": 21326 }, { "epoch": 0.3959557854323619, "grad_norm": 0.41287457942962646, "learning_rate": 1.3210620398561568e-05, "loss": 0.2168, "step": 21328 }, { "epoch": 0.3959929155697805, "grad_norm": 0.461836576461792, "learning_rate": 1.3209515642214642e-05, "loss": 0.2727, "step": 21330 }, { "epoch": 0.3960300457071992, "grad_norm": 0.3211372494697571, "learning_rate": 1.3208410842195818e-05, "loss": 0.2892, "step": 21332 }, { "epoch": 0.3960671758446178, "grad_norm": 0.25807759165763855, "learning_rate": 1.3207305998520133e-05, "loss": 0.2613, "step": 21334 }, { "epoch": 0.39610430598203644, "grad_norm": 0.36174342036247253, "learning_rate": 1.3206201111202619e-05, "loss": 0.3193, "step": 21336 }, { "epoch": 0.39614143611945507, "grad_norm": 0.3498457670211792, "learning_rate": 1.3205096180258314e-05, "loss": 0.3475, "step": 21338 }, { "epoch": 0.3961785662568737, "grad_norm": 0.37105366587638855, "learning_rate": 1.320399120570225e-05, "loss": 0.334, "step": 21340 }, { "epoch": 0.3962156963942924, "grad_norm": 0.38353902101516724, "learning_rate": 1.3202886187549465e-05, "loss": 0.2405, "step": 21342 }, { "epoch": 0.396252826531711, "grad_norm": 0.3860570192337036, "learning_rate": 1.320178112581499e-05, "loss": 0.2292, "step": 21344 }, { "epoch": 0.39628995666912964, "grad_norm": 0.48549535870552063, "learning_rate": 1.3200676020513866e-05, "loss": 0.1837, "step": 21346 }, { "epoch": 0.39632708680654827, "grad_norm": 0.5339629054069519, "learning_rate": 1.3199570871661124e-05, "loss": 0.384, "step": 21348 }, { "epoch": 0.3963642169439669, "grad_norm": 0.4551251232624054, "learning_rate": 1.319846567927181e-05, "loss": 0.3641, "step": 21350 }, { "epoch": 0.3964013470813855, "grad_norm": 0.4270746409893036, "learning_rate": 1.319736044336096e-05, "loss": 0.2495, "step": 21352 }, { "epoch": 0.3964384772188042, "grad_norm": 0.32252249121665955, "learning_rate": 1.3196255163943608e-05, "loss": 0.2225, "step": 21354 }, { "epoch": 0.39647560735622284, "grad_norm": 0.34464919567108154, "learning_rate": 1.31951498410348e-05, "loss": 0.4904, "step": 21356 }, { "epoch": 0.39651273749364147, "grad_norm": 0.3619462847709656, "learning_rate": 1.3194044474649572e-05, "loss": 0.2859, "step": 21358 }, { "epoch": 0.3965498676310601, "grad_norm": 0.4266261160373688, "learning_rate": 1.3192939064802966e-05, "loss": 0.2435, "step": 21360 }, { "epoch": 0.3965869977684787, "grad_norm": 0.5674176216125488, "learning_rate": 1.3191833611510024e-05, "loss": 0.3594, "step": 21362 }, { "epoch": 0.3966241279058974, "grad_norm": 0.44203129410743713, "learning_rate": 1.3190728114785784e-05, "loss": 0.2809, "step": 21364 }, { "epoch": 0.39666125804331603, "grad_norm": 0.45486965775489807, "learning_rate": 1.3189622574645295e-05, "loss": 0.2336, "step": 21366 }, { "epoch": 0.39669838818073466, "grad_norm": 0.3379833698272705, "learning_rate": 1.3188516991103595e-05, "loss": 0.3201, "step": 21368 }, { "epoch": 0.3967355183181533, "grad_norm": 0.3028241991996765, "learning_rate": 1.318741136417573e-05, "loss": 0.2787, "step": 21370 }, { "epoch": 0.3967726484555719, "grad_norm": 0.3652766942977905, "learning_rate": 1.3186305693876746e-05, "loss": 0.3897, "step": 21372 }, { "epoch": 0.39680977859299055, "grad_norm": 0.39875528216362, "learning_rate": 1.3185199980221683e-05, "loss": 0.3577, "step": 21374 }, { "epoch": 0.39684690873040923, "grad_norm": 0.3411742150783539, "learning_rate": 1.3184094223225592e-05, "loss": 0.2498, "step": 21376 }, { "epoch": 0.39688403886782786, "grad_norm": 0.4538913667201996, "learning_rate": 1.3182988422903513e-05, "loss": 0.3293, "step": 21378 }, { "epoch": 0.3969211690052465, "grad_norm": 0.46343672275543213, "learning_rate": 1.3181882579270495e-05, "loss": 0.5112, "step": 21380 }, { "epoch": 0.3969582991426651, "grad_norm": 2.7093591690063477, "learning_rate": 1.3180776692341589e-05, "loss": 0.3928, "step": 21382 }, { "epoch": 0.39699542928008374, "grad_norm": 0.44910404086112976, "learning_rate": 1.3179670762131838e-05, "loss": 0.2048, "step": 21384 }, { "epoch": 0.39703255941750243, "grad_norm": 0.3405029773712158, "learning_rate": 1.3178564788656291e-05, "loss": 0.2839, "step": 21386 }, { "epoch": 0.39706968955492106, "grad_norm": 0.32834893465042114, "learning_rate": 1.317745877193e-05, "loss": 0.1788, "step": 21388 }, { "epoch": 0.3971068196923397, "grad_norm": 0.42046496272087097, "learning_rate": 1.3176352711968013e-05, "loss": 0.362, "step": 21390 }, { "epoch": 0.3971439498297583, "grad_norm": 0.4401164948940277, "learning_rate": 1.317524660878538e-05, "loss": 0.4253, "step": 21392 }, { "epoch": 0.39718107996717694, "grad_norm": 0.3752206563949585, "learning_rate": 1.3174140462397152e-05, "loss": 0.3122, "step": 21394 }, { "epoch": 0.39721821010459557, "grad_norm": 0.4886712431907654, "learning_rate": 1.3173034272818378e-05, "loss": 0.3552, "step": 21396 }, { "epoch": 0.39725534024201425, "grad_norm": 0.340210884809494, "learning_rate": 1.317192804006411e-05, "loss": 0.3417, "step": 21398 }, { "epoch": 0.3972924703794329, "grad_norm": 0.3552398085594177, "learning_rate": 1.3170821764149406e-05, "loss": 0.2166, "step": 21400 }, { "epoch": 0.3973296005168515, "grad_norm": 0.36921200156211853, "learning_rate": 1.3169715445089315e-05, "loss": 0.3026, "step": 21402 }, { "epoch": 0.39736673065427014, "grad_norm": 0.34347596764564514, "learning_rate": 1.3168609082898892e-05, "loss": 0.3594, "step": 21404 }, { "epoch": 0.39740386079168877, "grad_norm": 0.24991226196289062, "learning_rate": 1.3167502677593187e-05, "loss": 0.2462, "step": 21406 }, { "epoch": 0.39744099092910745, "grad_norm": 0.49743711948394775, "learning_rate": 1.3166396229187262e-05, "loss": 0.4166, "step": 21408 }, { "epoch": 0.3974781210665261, "grad_norm": 3.030057430267334, "learning_rate": 1.3165289737696165e-05, "loss": 0.0842, "step": 21410 }, { "epoch": 0.3975152512039447, "grad_norm": 0.5286726355552673, "learning_rate": 1.3164183203134957e-05, "loss": 0.3606, "step": 21412 }, { "epoch": 0.39755238134136334, "grad_norm": 0.5562342405319214, "learning_rate": 1.3163076625518699e-05, "loss": 0.2306, "step": 21414 }, { "epoch": 0.39758951147878197, "grad_norm": 0.3972679674625397, "learning_rate": 1.3161970004862437e-05, "loss": 0.3435, "step": 21416 }, { "epoch": 0.39762664161620065, "grad_norm": 0.362491637468338, "learning_rate": 1.3160863341181234e-05, "loss": 0.5325, "step": 21418 }, { "epoch": 0.3976637717536193, "grad_norm": 0.4831483066082001, "learning_rate": 1.315975663449015e-05, "loss": 0.3474, "step": 21420 }, { "epoch": 0.3977009018910379, "grad_norm": 0.40423882007598877, "learning_rate": 1.3158649884804242e-05, "loss": 0.1777, "step": 21422 }, { "epoch": 0.39773803202845653, "grad_norm": 0.3032251000404358, "learning_rate": 1.3157543092138574e-05, "loss": 0.3354, "step": 21424 }, { "epoch": 0.39777516216587516, "grad_norm": 0.3344537317752838, "learning_rate": 1.31564362565082e-05, "loss": 0.3274, "step": 21426 }, { "epoch": 0.3978122923032938, "grad_norm": 0.5467990636825562, "learning_rate": 1.3155329377928183e-05, "loss": 0.2467, "step": 21428 }, { "epoch": 0.3978494224407125, "grad_norm": 0.2842448055744171, "learning_rate": 1.3154222456413584e-05, "loss": 0.2789, "step": 21430 }, { "epoch": 0.3978865525781311, "grad_norm": 0.6168789863586426, "learning_rate": 1.3153115491979467e-05, "loss": 0.3106, "step": 21432 }, { "epoch": 0.39792368271554973, "grad_norm": 0.4014568328857422, "learning_rate": 1.3152008484640891e-05, "loss": 0.2412, "step": 21434 }, { "epoch": 0.39796081285296836, "grad_norm": 0.2806049883365631, "learning_rate": 1.3150901434412921e-05, "loss": 0.2462, "step": 21436 }, { "epoch": 0.397997942990387, "grad_norm": 0.2523127496242523, "learning_rate": 1.3149794341310623e-05, "loss": 0.2597, "step": 21438 }, { "epoch": 0.39803507312780567, "grad_norm": 0.44269904494285583, "learning_rate": 1.3148687205349055e-05, "loss": 0.3978, "step": 21440 }, { "epoch": 0.3980722032652243, "grad_norm": 0.3841804563999176, "learning_rate": 1.3147580026543288e-05, "loss": 0.4216, "step": 21442 }, { "epoch": 0.39810933340264293, "grad_norm": 0.5604090690612793, "learning_rate": 1.3146472804908386e-05, "loss": 0.3827, "step": 21444 }, { "epoch": 0.39814646354006156, "grad_norm": 0.5545257329940796, "learning_rate": 1.3145365540459412e-05, "loss": 0.2932, "step": 21446 }, { "epoch": 0.3981835936774802, "grad_norm": 0.28801146149635315, "learning_rate": 1.3144258233211434e-05, "loss": 0.2779, "step": 21448 }, { "epoch": 0.3982207238148988, "grad_norm": 0.29569265246391296, "learning_rate": 1.3143150883179519e-05, "loss": 0.2422, "step": 21450 }, { "epoch": 0.3982578539523175, "grad_norm": 0.4048759937286377, "learning_rate": 1.3142043490378737e-05, "loss": 0.3219, "step": 21452 }, { "epoch": 0.3982949840897361, "grad_norm": 0.5841191411018372, "learning_rate": 1.3140936054824153e-05, "loss": 0.3237, "step": 21454 }, { "epoch": 0.39833211422715475, "grad_norm": 0.3433621823787689, "learning_rate": 1.313982857653084e-05, "loss": 0.2875, "step": 21456 }, { "epoch": 0.3983692443645734, "grad_norm": 0.541553795337677, "learning_rate": 1.3138721055513866e-05, "loss": 0.2948, "step": 21458 }, { "epoch": 0.398406374501992, "grad_norm": 0.32442864775657654, "learning_rate": 1.3137613491788295e-05, "loss": 0.1831, "step": 21460 }, { "epoch": 0.3984435046394107, "grad_norm": 0.45583680272102356, "learning_rate": 1.3136505885369203e-05, "loss": 0.1952, "step": 21462 }, { "epoch": 0.3984806347768293, "grad_norm": 0.41744568943977356, "learning_rate": 1.3135398236271666e-05, "loss": 0.4591, "step": 21464 }, { "epoch": 0.39851776491424795, "grad_norm": 0.4317241311073303, "learning_rate": 1.3134290544510746e-05, "loss": 0.238, "step": 21466 }, { "epoch": 0.3985548950516666, "grad_norm": 0.40772545337677, "learning_rate": 1.3133182810101524e-05, "loss": 0.1757, "step": 21468 }, { "epoch": 0.3985920251890852, "grad_norm": 0.3294576406478882, "learning_rate": 1.3132075033059067e-05, "loss": 0.2891, "step": 21470 }, { "epoch": 0.39862915532650384, "grad_norm": 0.3333803713321686, "learning_rate": 1.3130967213398448e-05, "loss": 0.2023, "step": 21472 }, { "epoch": 0.3986662854639225, "grad_norm": 0.26052340865135193, "learning_rate": 1.3129859351134746e-05, "loss": 0.3567, "step": 21474 }, { "epoch": 0.39870341560134115, "grad_norm": 0.29549750685691833, "learning_rate": 1.3128751446283032e-05, "loss": 0.2689, "step": 21476 }, { "epoch": 0.3987405457387598, "grad_norm": 0.3896428942680359, "learning_rate": 1.3127643498858386e-05, "loss": 0.3316, "step": 21478 }, { "epoch": 0.3987776758761784, "grad_norm": 0.5215996503829956, "learning_rate": 1.3126535508875878e-05, "loss": 0.4622, "step": 21480 }, { "epoch": 0.39881480601359703, "grad_norm": 0.3271085023880005, "learning_rate": 1.3125427476350586e-05, "loss": 0.2186, "step": 21482 }, { "epoch": 0.3988519361510157, "grad_norm": 0.36718741059303284, "learning_rate": 1.3124319401297587e-05, "loss": 0.376, "step": 21484 }, { "epoch": 0.39888906628843435, "grad_norm": 0.4738021790981293, "learning_rate": 1.312321128373196e-05, "loss": 0.3363, "step": 21486 }, { "epoch": 0.398926196425853, "grad_norm": 0.35821533203125, "learning_rate": 1.3122103123668783e-05, "loss": 0.2736, "step": 21488 }, { "epoch": 0.3989633265632716, "grad_norm": 0.4866703152656555, "learning_rate": 1.3120994921123136e-05, "loss": 0.3008, "step": 21490 }, { "epoch": 0.39900045670069023, "grad_norm": 0.6055618524551392, "learning_rate": 1.3119886676110094e-05, "loss": 0.1778, "step": 21492 }, { "epoch": 0.3990375868381089, "grad_norm": 0.38749030232429504, "learning_rate": 1.3118778388644739e-05, "loss": 0.4815, "step": 21494 }, { "epoch": 0.39907471697552754, "grad_norm": 0.32699114084243774, "learning_rate": 1.3117670058742156e-05, "loss": 0.4089, "step": 21496 }, { "epoch": 0.39911184711294617, "grad_norm": 0.37155407667160034, "learning_rate": 1.3116561686417419e-05, "loss": 0.3324, "step": 21498 }, { "epoch": 0.3991489772503648, "grad_norm": 0.38316500186920166, "learning_rate": 1.3115453271685614e-05, "loss": 0.1814, "step": 21500 }, { "epoch": 0.39918610738778343, "grad_norm": 0.5003445744514465, "learning_rate": 1.3114344814561822e-05, "loss": 0.3445, "step": 21502 }, { "epoch": 0.39922323752520206, "grad_norm": 0.3085777759552002, "learning_rate": 1.3113236315061122e-05, "loss": 0.3262, "step": 21504 }, { "epoch": 0.39926036766262074, "grad_norm": 0.28057214617729187, "learning_rate": 1.3112127773198605e-05, "loss": 0.2253, "step": 21506 }, { "epoch": 0.39929749780003937, "grad_norm": 0.34965234994888306, "learning_rate": 1.311101918898935e-05, "loss": 0.2385, "step": 21508 }, { "epoch": 0.399334627937458, "grad_norm": 0.2598631978034973, "learning_rate": 1.3109910562448441e-05, "loss": 0.2407, "step": 21510 }, { "epoch": 0.3993717580748766, "grad_norm": 0.518075704574585, "learning_rate": 1.3108801893590965e-05, "loss": 0.1029, "step": 21512 }, { "epoch": 0.39940888821229525, "grad_norm": 0.5881736874580383, "learning_rate": 1.3107693182432009e-05, "loss": 0.3001, "step": 21514 }, { "epoch": 0.39944601834971394, "grad_norm": 0.34947633743286133, "learning_rate": 1.3106584428986655e-05, "loss": 0.3163, "step": 21516 }, { "epoch": 0.39948314848713257, "grad_norm": 0.4749600291252136, "learning_rate": 1.310547563326999e-05, "loss": 0.378, "step": 21518 }, { "epoch": 0.3995202786245512, "grad_norm": 0.4823436141014099, "learning_rate": 1.310436679529711e-05, "loss": 0.345, "step": 21520 }, { "epoch": 0.3995574087619698, "grad_norm": 0.41014525294303894, "learning_rate": 1.3103257915083094e-05, "loss": 0.3339, "step": 21522 }, { "epoch": 0.39959453889938845, "grad_norm": 0.415578693151474, "learning_rate": 1.3102148992643031e-05, "loss": 0.2949, "step": 21524 }, { "epoch": 0.3996316690368071, "grad_norm": 0.30829232931137085, "learning_rate": 1.3101040027992013e-05, "loss": 0.2404, "step": 21526 }, { "epoch": 0.39966879917422576, "grad_norm": 0.2791459858417511, "learning_rate": 1.3099931021145128e-05, "loss": 0.2947, "step": 21528 }, { "epoch": 0.3997059293116444, "grad_norm": 0.4334114193916321, "learning_rate": 1.3098821972117472e-05, "loss": 0.3971, "step": 21530 }, { "epoch": 0.399743059449063, "grad_norm": 0.3893239200115204, "learning_rate": 1.3097712880924127e-05, "loss": 0.3549, "step": 21532 }, { "epoch": 0.39978018958648165, "grad_norm": 0.5558448433876038, "learning_rate": 1.309660374758019e-05, "loss": 0.2364, "step": 21534 }, { "epoch": 0.3998173197239003, "grad_norm": 0.5246394872665405, "learning_rate": 1.3095494572100749e-05, "loss": 0.2716, "step": 21536 }, { "epoch": 0.39985444986131896, "grad_norm": 0.35002467036247253, "learning_rate": 1.30943853545009e-05, "loss": 0.327, "step": 21538 }, { "epoch": 0.3998915799987376, "grad_norm": 0.42251402139663696, "learning_rate": 1.3093276094795738e-05, "loss": 0.3332, "step": 21540 }, { "epoch": 0.3999287101361562, "grad_norm": 0.3282107710838318, "learning_rate": 1.3092166793000351e-05, "loss": 0.3129, "step": 21542 }, { "epoch": 0.39996584027357485, "grad_norm": 0.367582768201828, "learning_rate": 1.3091057449129837e-05, "loss": 0.3116, "step": 21544 }, { "epoch": 0.4000029704109935, "grad_norm": 0.28339990973472595, "learning_rate": 1.308994806319929e-05, "loss": 0.1659, "step": 21546 }, { "epoch": 0.4000401005484121, "grad_norm": 0.44281962513923645, "learning_rate": 1.3088838635223802e-05, "loss": 0.2373, "step": 21548 }, { "epoch": 0.4000772306858308, "grad_norm": 0.5008556246757507, "learning_rate": 1.3087729165218475e-05, "loss": 0.2859, "step": 21550 }, { "epoch": 0.4001143608232494, "grad_norm": 0.47648438811302185, "learning_rate": 1.3086619653198405e-05, "loss": 0.361, "step": 21552 }, { "epoch": 0.40015149096066804, "grad_norm": 0.3815816044807434, "learning_rate": 1.3085510099178684e-05, "loss": 0.2648, "step": 21554 }, { "epoch": 0.40018862109808667, "grad_norm": 0.28419145941734314, "learning_rate": 1.3084400503174413e-05, "loss": 0.24, "step": 21556 }, { "epoch": 0.4002257512355053, "grad_norm": 0.24789069592952728, "learning_rate": 1.3083290865200691e-05, "loss": 0.1528, "step": 21558 }, { "epoch": 0.400262881372924, "grad_norm": 0.39886751770973206, "learning_rate": 1.3082181185272616e-05, "loss": 0.4838, "step": 21560 }, { "epoch": 0.4003000115103426, "grad_norm": 0.3590618371963501, "learning_rate": 1.3081071463405286e-05, "loss": 0.3311, "step": 21562 }, { "epoch": 0.40033714164776124, "grad_norm": 0.4033971428871155, "learning_rate": 1.3079961699613803e-05, "loss": 0.3016, "step": 21564 }, { "epoch": 0.40037427178517987, "grad_norm": 0.4263336658477783, "learning_rate": 1.3078851893913267e-05, "loss": 0.5059, "step": 21566 }, { "epoch": 0.4004114019225985, "grad_norm": 0.3186505138874054, "learning_rate": 1.3077742046318777e-05, "loss": 0.2991, "step": 21568 }, { "epoch": 0.4004485320600172, "grad_norm": 0.26430410146713257, "learning_rate": 1.3076632156845438e-05, "loss": 0.1828, "step": 21570 }, { "epoch": 0.4004856621974358, "grad_norm": 0.4302446246147156, "learning_rate": 1.3075522225508352e-05, "loss": 0.2867, "step": 21572 }, { "epoch": 0.40052279233485444, "grad_norm": 0.34651967883110046, "learning_rate": 1.3074412252322622e-05, "loss": 0.0938, "step": 21574 }, { "epoch": 0.40055992247227307, "grad_norm": 0.38205254077911377, "learning_rate": 1.3073302237303347e-05, "loss": 0.2982, "step": 21576 }, { "epoch": 0.4005970526096917, "grad_norm": 0.333000510931015, "learning_rate": 1.3072192180465638e-05, "loss": 0.2751, "step": 21578 }, { "epoch": 0.4006341827471103, "grad_norm": 0.28413447737693787, "learning_rate": 1.307108208182459e-05, "loss": 0.1093, "step": 21580 }, { "epoch": 0.400671312884529, "grad_norm": 0.20403039455413818, "learning_rate": 1.3069971941395319e-05, "loss": 0.2215, "step": 21582 }, { "epoch": 0.40070844302194764, "grad_norm": 0.3771042227745056, "learning_rate": 1.3068861759192926e-05, "loss": 0.3641, "step": 21584 }, { "epoch": 0.40074557315936626, "grad_norm": 0.36434975266456604, "learning_rate": 1.3067751535232511e-05, "loss": 0.1067, "step": 21586 }, { "epoch": 0.4007827032967849, "grad_norm": 0.2780624032020569, "learning_rate": 1.306664126952919e-05, "loss": 0.2565, "step": 21588 }, { "epoch": 0.4008198334342035, "grad_norm": 0.368868887424469, "learning_rate": 1.3065530962098068e-05, "loss": 0.4183, "step": 21590 }, { "epoch": 0.4008569635716222, "grad_norm": 0.48014897108078003, "learning_rate": 1.306442061295425e-05, "loss": 0.2594, "step": 21592 }, { "epoch": 0.40089409370904083, "grad_norm": 0.4141768515110016, "learning_rate": 1.3063310222112847e-05, "loss": 0.3086, "step": 21594 }, { "epoch": 0.40093122384645946, "grad_norm": 0.762408435344696, "learning_rate": 1.3062199789588969e-05, "loss": 0.3154, "step": 21596 }, { "epoch": 0.4009683539838781, "grad_norm": 0.4782993495464325, "learning_rate": 1.3061089315397721e-05, "loss": 0.3074, "step": 21598 }, { "epoch": 0.4010054841212967, "grad_norm": 0.46653079986572266, "learning_rate": 1.3059978799554218e-05, "loss": 0.2632, "step": 21600 }, { "epoch": 0.40104261425871535, "grad_norm": 0.3656485676765442, "learning_rate": 1.3058868242073572e-05, "loss": 0.3791, "step": 21602 }, { "epoch": 0.40107974439613403, "grad_norm": 0.40548112988471985, "learning_rate": 1.3057757642970887e-05, "loss": 0.4675, "step": 21604 }, { "epoch": 0.40111687453355266, "grad_norm": 0.3303479552268982, "learning_rate": 1.3056647002261283e-05, "loss": 0.4633, "step": 21606 }, { "epoch": 0.4011540046709713, "grad_norm": 0.36539745330810547, "learning_rate": 1.3055536319959868e-05, "loss": 0.3138, "step": 21608 }, { "epoch": 0.4011911348083899, "grad_norm": 0.5113174915313721, "learning_rate": 1.3054425596081757e-05, "loss": 0.2921, "step": 21610 }, { "epoch": 0.40122826494580854, "grad_norm": 0.3386172950267792, "learning_rate": 1.305331483064206e-05, "loss": 0.1756, "step": 21612 }, { "epoch": 0.4012653950832272, "grad_norm": 0.513117790222168, "learning_rate": 1.3052204023655897e-05, "loss": 0.4476, "step": 21614 }, { "epoch": 0.40130252522064586, "grad_norm": 0.4559982717037201, "learning_rate": 1.3051093175138379e-05, "loss": 0.3686, "step": 21616 }, { "epoch": 0.4013396553580645, "grad_norm": 0.378753125667572, "learning_rate": 1.3049982285104618e-05, "loss": 0.3007, "step": 21618 }, { "epoch": 0.4013767854954831, "grad_norm": 0.37841469049453735, "learning_rate": 1.3048871353569737e-05, "loss": 0.3634, "step": 21620 }, { "epoch": 0.40141391563290174, "grad_norm": 0.28112712502479553, "learning_rate": 1.304776038054885e-05, "loss": 0.2533, "step": 21622 }, { "epoch": 0.40145104577032037, "grad_norm": 0.389801949262619, "learning_rate": 1.304664936605707e-05, "loss": 0.2219, "step": 21624 }, { "epoch": 0.40148817590773905, "grad_norm": 0.2653312087059021, "learning_rate": 1.3045538310109524e-05, "loss": 0.2627, "step": 21626 }, { "epoch": 0.4015253060451577, "grad_norm": 0.5216689109802246, "learning_rate": 1.304442721272132e-05, "loss": 0.2086, "step": 21628 }, { "epoch": 0.4015624361825763, "grad_norm": 1.0068920850753784, "learning_rate": 1.3043316073907582e-05, "loss": 0.2471, "step": 21630 }, { "epoch": 0.40159956631999494, "grad_norm": 0.42398956418037415, "learning_rate": 1.3042204893683425e-05, "loss": 0.3777, "step": 21632 }, { "epoch": 0.40163669645741357, "grad_norm": 0.4337816536426544, "learning_rate": 1.3041093672063976e-05, "loss": 0.4506, "step": 21634 }, { "epoch": 0.40167382659483225, "grad_norm": 0.32641860842704773, "learning_rate": 1.303998240906435e-05, "loss": 0.32, "step": 21636 }, { "epoch": 0.4017109567322509, "grad_norm": 0.4616388976573944, "learning_rate": 1.3038871104699666e-05, "loss": 0.2949, "step": 21638 }, { "epoch": 0.4017480868696695, "grad_norm": 0.41880515217781067, "learning_rate": 1.3037759758985053e-05, "loss": 0.142, "step": 21640 }, { "epoch": 0.40178521700708814, "grad_norm": 0.9421810507774353, "learning_rate": 1.3036648371935628e-05, "loss": 0.3271, "step": 21642 }, { "epoch": 0.40182234714450676, "grad_norm": 0.2598293423652649, "learning_rate": 1.3035536943566512e-05, "loss": 0.2909, "step": 21644 }, { "epoch": 0.40185947728192545, "grad_norm": 0.4688859283924103, "learning_rate": 1.3034425473892833e-05, "loss": 0.199, "step": 21646 }, { "epoch": 0.4018966074193441, "grad_norm": 0.28260838985443115, "learning_rate": 1.3033313962929713e-05, "loss": 0.2756, "step": 21648 }, { "epoch": 0.4019337375567627, "grad_norm": 0.4188922345638275, "learning_rate": 1.3032202410692274e-05, "loss": 0.4869, "step": 21650 }, { "epoch": 0.40197086769418133, "grad_norm": 0.25489652156829834, "learning_rate": 1.3031090817195641e-05, "loss": 0.3904, "step": 21652 }, { "epoch": 0.40200799783159996, "grad_norm": 0.691049337387085, "learning_rate": 1.3029979182454944e-05, "loss": 0.295, "step": 21654 }, { "epoch": 0.4020451279690186, "grad_norm": 0.4149673283100128, "learning_rate": 1.3028867506485305e-05, "loss": 0.201, "step": 21656 }, { "epoch": 0.4020822581064373, "grad_norm": 0.38414302468299866, "learning_rate": 1.3027755789301853e-05, "loss": 0.3103, "step": 21658 }, { "epoch": 0.4021193882438559, "grad_norm": 0.360568106174469, "learning_rate": 1.3026644030919715e-05, "loss": 0.1855, "step": 21660 }, { "epoch": 0.40215651838127453, "grad_norm": 0.41245150566101074, "learning_rate": 1.3025532231354014e-05, "loss": 0.2322, "step": 21662 }, { "epoch": 0.40219364851869316, "grad_norm": 0.38634243607521057, "learning_rate": 1.3024420390619882e-05, "loss": 0.2568, "step": 21664 }, { "epoch": 0.4022307786561118, "grad_norm": 0.41453200578689575, "learning_rate": 1.302330850873245e-05, "loss": 0.2059, "step": 21666 }, { "epoch": 0.40226790879353047, "grad_norm": 0.3939494490623474, "learning_rate": 1.3022196585706848e-05, "loss": 0.1023, "step": 21668 }, { "epoch": 0.4023050389309491, "grad_norm": 0.5226161479949951, "learning_rate": 1.3021084621558197e-05, "loss": 0.4945, "step": 21670 }, { "epoch": 0.4023421690683677, "grad_norm": 0.5636163949966431, "learning_rate": 1.3019972616301637e-05, "loss": 0.3663, "step": 21672 }, { "epoch": 0.40237929920578636, "grad_norm": 0.31683680415153503, "learning_rate": 1.3018860569952292e-05, "loss": 0.3103, "step": 21674 }, { "epoch": 0.402416429343205, "grad_norm": 0.3009171783924103, "learning_rate": 1.30177484825253e-05, "loss": 0.3506, "step": 21676 }, { "epoch": 0.4024535594806236, "grad_norm": 0.4664005935192108, "learning_rate": 1.3016636354035793e-05, "loss": 0.2831, "step": 21678 }, { "epoch": 0.4024906896180423, "grad_norm": 0.5344043374061584, "learning_rate": 1.3015524184498895e-05, "loss": 0.097, "step": 21680 }, { "epoch": 0.4025278197554609, "grad_norm": 0.5564618706703186, "learning_rate": 1.301441197392975e-05, "loss": 0.4211, "step": 21682 }, { "epoch": 0.40256494989287955, "grad_norm": 0.5117163062095642, "learning_rate": 1.3013299722343487e-05, "loss": 0.277, "step": 21684 }, { "epoch": 0.4026020800302982, "grad_norm": 0.34625816345214844, "learning_rate": 1.3012187429755237e-05, "loss": 0.2664, "step": 21686 }, { "epoch": 0.4026392101677168, "grad_norm": 0.9470846652984619, "learning_rate": 1.3011075096180142e-05, "loss": 0.3186, "step": 21688 }, { "epoch": 0.4026763403051355, "grad_norm": 0.3190852999687195, "learning_rate": 1.3009962721633333e-05, "loss": 0.4491, "step": 21690 }, { "epoch": 0.4027134704425541, "grad_norm": 0.2218458652496338, "learning_rate": 1.3008850306129948e-05, "loss": 0.1908, "step": 21692 }, { "epoch": 0.40275060057997275, "grad_norm": 0.40608659386634827, "learning_rate": 1.300773784968512e-05, "loss": 0.4634, "step": 21694 }, { "epoch": 0.4027877307173914, "grad_norm": 0.33153679966926575, "learning_rate": 1.3006625352313991e-05, "loss": 0.4621, "step": 21696 }, { "epoch": 0.40282486085481, "grad_norm": 0.29292234778404236, "learning_rate": 1.3005512814031697e-05, "loss": 0.2983, "step": 21698 }, { "epoch": 0.40286199099222864, "grad_norm": 0.4406436085700989, "learning_rate": 1.3004400234853378e-05, "loss": 0.2223, "step": 21700 }, { "epoch": 0.4028991211296473, "grad_norm": 0.3828709125518799, "learning_rate": 1.300328761479417e-05, "loss": 0.4096, "step": 21702 }, { "epoch": 0.40293625126706595, "grad_norm": 0.4048078656196594, "learning_rate": 1.3002174953869214e-05, "loss": 0.2852, "step": 21704 }, { "epoch": 0.4029733814044846, "grad_norm": 0.3186642527580261, "learning_rate": 1.3001062252093645e-05, "loss": 0.1662, "step": 21706 }, { "epoch": 0.4030105115419032, "grad_norm": 0.39670151472091675, "learning_rate": 1.299994950948261e-05, "loss": 0.249, "step": 21708 }, { "epoch": 0.40304764167932183, "grad_norm": 0.3205305337905884, "learning_rate": 1.2998836726051252e-05, "loss": 0.3076, "step": 21710 }, { "epoch": 0.4030847718167405, "grad_norm": 0.26526880264282227, "learning_rate": 1.2997723901814707e-05, "loss": 0.236, "step": 21712 }, { "epoch": 0.40312190195415915, "grad_norm": 0.3418671488761902, "learning_rate": 1.2996611036788117e-05, "loss": 0.2131, "step": 21714 }, { "epoch": 0.4031590320915778, "grad_norm": 0.3721555769443512, "learning_rate": 1.2995498130986627e-05, "loss": 0.2716, "step": 21716 }, { "epoch": 0.4031961622289964, "grad_norm": 0.28217655420303345, "learning_rate": 1.299438518442538e-05, "loss": 0.0978, "step": 21718 }, { "epoch": 0.40323329236641503, "grad_norm": 0.25930649042129517, "learning_rate": 1.2993272197119522e-05, "loss": 0.256, "step": 21720 }, { "epoch": 0.4032704225038337, "grad_norm": 0.2782888114452362, "learning_rate": 1.2992159169084194e-05, "loss": 0.5646, "step": 21722 }, { "epoch": 0.40330755264125234, "grad_norm": 0.398754745721817, "learning_rate": 1.2991046100334541e-05, "loss": 0.3, "step": 21724 }, { "epoch": 0.40334468277867097, "grad_norm": 0.3119527995586395, "learning_rate": 1.298993299088571e-05, "loss": 0.3481, "step": 21726 }, { "epoch": 0.4033818129160896, "grad_norm": 0.353280633687973, "learning_rate": 1.2988819840752849e-05, "loss": 0.0875, "step": 21728 }, { "epoch": 0.4034189430535082, "grad_norm": 0.49691230058670044, "learning_rate": 1.29877066499511e-05, "loss": 0.3868, "step": 21730 }, { "epoch": 0.40345607319092686, "grad_norm": 0.4137439429759979, "learning_rate": 1.2986593418495614e-05, "loss": 0.3547, "step": 21732 }, { "epoch": 0.40349320332834554, "grad_norm": 0.3058105707168579, "learning_rate": 1.2985480146401537e-05, "loss": 0.2253, "step": 21734 }, { "epoch": 0.40353033346576417, "grad_norm": 0.2684827446937561, "learning_rate": 1.2984366833684019e-05, "loss": 0.2719, "step": 21736 }, { "epoch": 0.4035674636031828, "grad_norm": 0.5444830060005188, "learning_rate": 1.2983253480358205e-05, "loss": 0.3607, "step": 21738 }, { "epoch": 0.4036045937406014, "grad_norm": 0.5136457085609436, "learning_rate": 1.2982140086439248e-05, "loss": 0.1674, "step": 21740 }, { "epoch": 0.40364172387802005, "grad_norm": 0.3209325671195984, "learning_rate": 1.2981026651942299e-05, "loss": 0.4467, "step": 21742 }, { "epoch": 0.40367885401543874, "grad_norm": 0.2790270447731018, "learning_rate": 1.2979913176882505e-05, "loss": 0.1646, "step": 21744 }, { "epoch": 0.40371598415285737, "grad_norm": 0.3251958191394806, "learning_rate": 1.297879966127502e-05, "loss": 0.2531, "step": 21746 }, { "epoch": 0.403753114290276, "grad_norm": 0.47938621044158936, "learning_rate": 1.2977686105134993e-05, "loss": 0.215, "step": 21748 }, { "epoch": 0.4037902444276946, "grad_norm": 0.38935449719429016, "learning_rate": 1.2976572508477577e-05, "loss": 0.1579, "step": 21750 }, { "epoch": 0.40382737456511325, "grad_norm": 0.3315196633338928, "learning_rate": 1.2975458871317925e-05, "loss": 0.2163, "step": 21752 }, { "epoch": 0.4038645047025319, "grad_norm": 0.5032125115394592, "learning_rate": 1.2974345193671192e-05, "loss": 0.188, "step": 21754 }, { "epoch": 0.40390163483995056, "grad_norm": 0.37132251262664795, "learning_rate": 1.2973231475552526e-05, "loss": 0.1701, "step": 21756 }, { "epoch": 0.4039387649773692, "grad_norm": 0.39295119047164917, "learning_rate": 1.2972117716977088e-05, "loss": 0.1931, "step": 21758 }, { "epoch": 0.4039758951147878, "grad_norm": 0.4047181010246277, "learning_rate": 1.2971003917960032e-05, "loss": 0.4227, "step": 21760 }, { "epoch": 0.40401302525220645, "grad_norm": 0.5436265468597412, "learning_rate": 1.2969890078516509e-05, "loss": 0.2296, "step": 21762 }, { "epoch": 0.4040501553896251, "grad_norm": 0.3423088788986206, "learning_rate": 1.296877619866168e-05, "loss": 0.4142, "step": 21764 }, { "epoch": 0.40408728552704376, "grad_norm": 0.4568958878517151, "learning_rate": 1.29676622784107e-05, "loss": 0.1749, "step": 21766 }, { "epoch": 0.4041244156644624, "grad_norm": 0.36838066577911377, "learning_rate": 1.2966548317778722e-05, "loss": 0.2227, "step": 21768 }, { "epoch": 0.404161545801881, "grad_norm": 0.44415345788002014, "learning_rate": 1.296543431678091e-05, "loss": 0.2351, "step": 21770 }, { "epoch": 0.40419867593929965, "grad_norm": 0.32770678400993347, "learning_rate": 1.2964320275432418e-05, "loss": 0.41, "step": 21772 }, { "epoch": 0.4042358060767183, "grad_norm": 0.33450430631637573, "learning_rate": 1.296320619374841e-05, "loss": 0.4396, "step": 21774 }, { "epoch": 0.4042729362141369, "grad_norm": 0.7526260614395142, "learning_rate": 1.2962092071744037e-05, "loss": 0.17, "step": 21776 }, { "epoch": 0.4043100663515556, "grad_norm": 0.3169478178024292, "learning_rate": 1.2960977909434465e-05, "loss": 0.2449, "step": 21778 }, { "epoch": 0.4043471964889742, "grad_norm": 0.5039459466934204, "learning_rate": 1.2959863706834852e-05, "loss": 0.1448, "step": 21780 }, { "epoch": 0.40438432662639284, "grad_norm": 0.2954540252685547, "learning_rate": 1.2958749463960361e-05, "loss": 0.1503, "step": 21782 }, { "epoch": 0.40442145676381147, "grad_norm": 0.47662824392318726, "learning_rate": 1.295763518082615e-05, "loss": 0.3386, "step": 21784 }, { "epoch": 0.4044585869012301, "grad_norm": 0.30216357111930847, "learning_rate": 1.2956520857447389e-05, "loss": 0.3492, "step": 21786 }, { "epoch": 0.4044957170386488, "grad_norm": 0.39031192660331726, "learning_rate": 1.295540649383923e-05, "loss": 0.2154, "step": 21788 }, { "epoch": 0.4045328471760674, "grad_norm": 0.32937124371528625, "learning_rate": 1.295429209001684e-05, "loss": 0.1658, "step": 21790 }, { "epoch": 0.40456997731348604, "grad_norm": 0.34788423776626587, "learning_rate": 1.2953177645995387e-05, "loss": 0.3279, "step": 21792 }, { "epoch": 0.40460710745090467, "grad_norm": 0.20182204246520996, "learning_rate": 1.2952063161790032e-05, "loss": 0.3672, "step": 21794 }, { "epoch": 0.4046442375883233, "grad_norm": 0.45234495401382446, "learning_rate": 1.2950948637415938e-05, "loss": 0.267, "step": 21796 }, { "epoch": 0.404681367725742, "grad_norm": 0.49033886194229126, "learning_rate": 1.2949834072888275e-05, "loss": 0.3331, "step": 21798 }, { "epoch": 0.4047184978631606, "grad_norm": 0.5745718479156494, "learning_rate": 1.2948719468222204e-05, "loss": 0.2635, "step": 21800 }, { "epoch": 0.40475562800057924, "grad_norm": 0.43127498030662537, "learning_rate": 1.294760482343289e-05, "loss": 0.3136, "step": 21802 }, { "epoch": 0.40479275813799787, "grad_norm": 0.36452364921569824, "learning_rate": 1.294649013853551e-05, "loss": 0.1716, "step": 21804 }, { "epoch": 0.4048298882754165, "grad_norm": 0.21352314949035645, "learning_rate": 1.2945375413545222e-05, "loss": 0.3057, "step": 21806 }, { "epoch": 0.4048670184128351, "grad_norm": 0.5029972195625305, "learning_rate": 1.2944260648477196e-05, "loss": 0.2712, "step": 21808 }, { "epoch": 0.4049041485502538, "grad_norm": 0.4876079261302948, "learning_rate": 1.2943145843346605e-05, "loss": 0.2478, "step": 21810 }, { "epoch": 0.40494127868767243, "grad_norm": 0.37176385521888733, "learning_rate": 1.294203099816861e-05, "loss": 0.2913, "step": 21812 }, { "epoch": 0.40497840882509106, "grad_norm": 0.4429399073123932, "learning_rate": 1.2940916112958388e-05, "loss": 0.3614, "step": 21814 }, { "epoch": 0.4050155389625097, "grad_norm": 0.26407429575920105, "learning_rate": 1.2939801187731108e-05, "loss": 0.2098, "step": 21816 }, { "epoch": 0.4050526690999283, "grad_norm": 0.3279687166213989, "learning_rate": 1.2938686222501939e-05, "loss": 0.3351, "step": 21818 }, { "epoch": 0.405089799237347, "grad_norm": 0.37407180666923523, "learning_rate": 1.2937571217286052e-05, "loss": 0.1892, "step": 21820 }, { "epoch": 0.40512692937476563, "grad_norm": 0.31828615069389343, "learning_rate": 1.2936456172098616e-05, "loss": 0.3469, "step": 21822 }, { "epoch": 0.40516405951218426, "grad_norm": 0.34677034616470337, "learning_rate": 1.2935341086954813e-05, "loss": 0.2379, "step": 21824 }, { "epoch": 0.4052011896496029, "grad_norm": 0.2904433608055115, "learning_rate": 1.2934225961869809e-05, "loss": 0.2955, "step": 21826 }, { "epoch": 0.4052383197870215, "grad_norm": 0.2883409857749939, "learning_rate": 1.2933110796858778e-05, "loss": 0.0993, "step": 21828 }, { "epoch": 0.40527544992444015, "grad_norm": 0.44709712266921997, "learning_rate": 1.2931995591936897e-05, "loss": 0.2151, "step": 21830 }, { "epoch": 0.40531258006185883, "grad_norm": 0.36038461327552795, "learning_rate": 1.2930880347119335e-05, "loss": 0.2366, "step": 21832 }, { "epoch": 0.40534971019927746, "grad_norm": 0.33324363827705383, "learning_rate": 1.2929765062421268e-05, "loss": 0.33, "step": 21834 }, { "epoch": 0.4053868403366961, "grad_norm": 0.47948741912841797, "learning_rate": 1.292864973785788e-05, "loss": 0.4498, "step": 21836 }, { "epoch": 0.4054239704741147, "grad_norm": 0.303437739610672, "learning_rate": 1.2927534373444342e-05, "loss": 0.3818, "step": 21838 }, { "epoch": 0.40546110061153334, "grad_norm": 0.465300589799881, "learning_rate": 1.2926418969195827e-05, "loss": 0.1835, "step": 21840 }, { "epoch": 0.405498230748952, "grad_norm": 0.45483431220054626, "learning_rate": 1.2925303525127515e-05, "loss": 0.2159, "step": 21842 }, { "epoch": 0.40553536088637065, "grad_norm": 0.5572876334190369, "learning_rate": 1.2924188041254583e-05, "loss": 0.3726, "step": 21844 }, { "epoch": 0.4055724910237893, "grad_norm": 0.309108167886734, "learning_rate": 1.2923072517592212e-05, "loss": 0.2518, "step": 21846 }, { "epoch": 0.4056096211612079, "grad_norm": 0.2934266924858093, "learning_rate": 1.2921956954155583e-05, "loss": 0.1943, "step": 21848 }, { "epoch": 0.40564675129862654, "grad_norm": 0.3023587465286255, "learning_rate": 1.2920841350959869e-05, "loss": 0.3293, "step": 21850 }, { "epoch": 0.40568388143604517, "grad_norm": 0.8941232562065125, "learning_rate": 1.2919725708020254e-05, "loss": 0.2897, "step": 21852 }, { "epoch": 0.40572101157346385, "grad_norm": 0.39852002263069153, "learning_rate": 1.2918610025351919e-05, "loss": 0.3711, "step": 21854 }, { "epoch": 0.4057581417108825, "grad_norm": 0.39841628074645996, "learning_rate": 1.2917494302970041e-05, "loss": 0.2896, "step": 21856 }, { "epoch": 0.4057952718483011, "grad_norm": 0.3771861493587494, "learning_rate": 1.2916378540889804e-05, "loss": 0.3221, "step": 21858 }, { "epoch": 0.40583240198571974, "grad_norm": 0.3386521339416504, "learning_rate": 1.2915262739126395e-05, "loss": 0.4418, "step": 21860 }, { "epoch": 0.40586953212313837, "grad_norm": 0.3377368748188019, "learning_rate": 1.291414689769499e-05, "loss": 0.184, "step": 21862 }, { "epoch": 0.40590666226055705, "grad_norm": 0.2260040044784546, "learning_rate": 1.2913031016610773e-05, "loss": 0.1264, "step": 21864 }, { "epoch": 0.4059437923979757, "grad_norm": 0.33107268810272217, "learning_rate": 1.2911915095888929e-05, "loss": 0.3793, "step": 21866 }, { "epoch": 0.4059809225353943, "grad_norm": 0.6162815690040588, "learning_rate": 1.2910799135544646e-05, "loss": 0.2911, "step": 21868 }, { "epoch": 0.40601805267281293, "grad_norm": 0.34075483679771423, "learning_rate": 1.2909683135593105e-05, "loss": 0.2123, "step": 21870 }, { "epoch": 0.40605518281023156, "grad_norm": 0.6916275024414062, "learning_rate": 1.290856709604949e-05, "loss": 0.5729, "step": 21872 }, { "epoch": 0.40609231294765025, "grad_norm": 0.35797375440597534, "learning_rate": 1.2907451016928991e-05, "loss": 0.2361, "step": 21874 }, { "epoch": 0.4061294430850689, "grad_norm": 0.330986350774765, "learning_rate": 1.290633489824679e-05, "loss": 0.2909, "step": 21876 }, { "epoch": 0.4061665732224875, "grad_norm": 0.4496910870075226, "learning_rate": 1.2905218740018077e-05, "loss": 0.3276, "step": 21878 }, { "epoch": 0.40620370335990613, "grad_norm": 0.29114681482315063, "learning_rate": 1.2904102542258041e-05, "loss": 0.2864, "step": 21880 }, { "epoch": 0.40624083349732476, "grad_norm": 0.26612186431884766, "learning_rate": 1.290298630498187e-05, "loss": 0.1163, "step": 21882 }, { "epoch": 0.4062779636347434, "grad_norm": 0.32967260479927063, "learning_rate": 1.2901870028204746e-05, "loss": 0.141, "step": 21884 }, { "epoch": 0.4063150937721621, "grad_norm": 0.45753490924835205, "learning_rate": 1.2900753711941866e-05, "loss": 0.1653, "step": 21886 }, { "epoch": 0.4063522239095807, "grad_norm": 0.3833034634590149, "learning_rate": 1.2899637356208417e-05, "loss": 0.1963, "step": 21888 }, { "epoch": 0.40638935404699933, "grad_norm": 0.30614739656448364, "learning_rate": 1.2898520961019587e-05, "loss": 0.3235, "step": 21890 }, { "epoch": 0.40642648418441796, "grad_norm": 0.4653598964214325, "learning_rate": 1.289740452639057e-05, "loss": 0.3921, "step": 21892 }, { "epoch": 0.4064636143218366, "grad_norm": 0.33255210518836975, "learning_rate": 1.2896288052336556e-05, "loss": 0.3772, "step": 21894 }, { "epoch": 0.40650074445925527, "grad_norm": 0.34993740916252136, "learning_rate": 1.2895171538872738e-05, "loss": 0.1821, "step": 21896 }, { "epoch": 0.4065378745966739, "grad_norm": 0.4239446520805359, "learning_rate": 1.2894054986014305e-05, "loss": 0.1997, "step": 21898 }, { "epoch": 0.4065750047340925, "grad_norm": 0.23019862174987793, "learning_rate": 1.2892938393776453e-05, "loss": 0.4514, "step": 21900 }, { "epoch": 0.40661213487151115, "grad_norm": 0.24404466152191162, "learning_rate": 1.2891821762174376e-05, "loss": 0.2215, "step": 21902 }, { "epoch": 0.4066492650089298, "grad_norm": 0.281076580286026, "learning_rate": 1.2890705091223265e-05, "loss": 0.2858, "step": 21904 }, { "epoch": 0.4066863951463484, "grad_norm": 0.3514172434806824, "learning_rate": 1.2889588380938318e-05, "loss": 0.1443, "step": 21906 }, { "epoch": 0.4067235252837671, "grad_norm": 0.39859738945961, "learning_rate": 1.288847163133473e-05, "loss": 0.2289, "step": 21908 }, { "epoch": 0.4067606554211857, "grad_norm": 1.7944791316986084, "learning_rate": 1.288735484242769e-05, "loss": 0.1953, "step": 21910 }, { "epoch": 0.40679778555860435, "grad_norm": 0.3340701162815094, "learning_rate": 1.2886238014232404e-05, "loss": 0.3154, "step": 21912 }, { "epoch": 0.406834915696023, "grad_norm": 0.4775649607181549, "learning_rate": 1.2885121146764064e-05, "loss": 0.2809, "step": 21914 }, { "epoch": 0.4068720458334416, "grad_norm": 0.5364314913749695, "learning_rate": 1.2884004240037863e-05, "loss": 0.2985, "step": 21916 }, { "epoch": 0.4069091759708603, "grad_norm": 0.41578948497772217, "learning_rate": 1.2882887294069007e-05, "loss": 0.3321, "step": 21918 }, { "epoch": 0.4069463061082789, "grad_norm": 0.33452993631362915, "learning_rate": 1.2881770308872687e-05, "loss": 0.3258, "step": 21920 }, { "epoch": 0.40698343624569755, "grad_norm": 0.4842405915260315, "learning_rate": 1.288065328446411e-05, "loss": 0.2687, "step": 21922 }, { "epoch": 0.4070205663831162, "grad_norm": 0.4110822081565857, "learning_rate": 1.2879536220858467e-05, "loss": 0.2408, "step": 21924 }, { "epoch": 0.4070576965205348, "grad_norm": 0.1997343897819519, "learning_rate": 1.2878419118070959e-05, "loss": 0.2086, "step": 21926 }, { "epoch": 0.40709482665795343, "grad_norm": 0.3725113272666931, "learning_rate": 1.287730197611679e-05, "loss": 0.245, "step": 21928 }, { "epoch": 0.4071319567953721, "grad_norm": 0.41921934485435486, "learning_rate": 1.2876184795011162e-05, "loss": 0.238, "step": 21930 }, { "epoch": 0.40716908693279075, "grad_norm": 0.36538413166999817, "learning_rate": 1.2875067574769271e-05, "loss": 0.3131, "step": 21932 }, { "epoch": 0.4072062170702094, "grad_norm": 0.4104001224040985, "learning_rate": 1.2873950315406325e-05, "loss": 0.3004, "step": 21934 }, { "epoch": 0.407243347207628, "grad_norm": 0.31808042526245117, "learning_rate": 1.2872833016937523e-05, "loss": 0.4164, "step": 21936 }, { "epoch": 0.40728047734504663, "grad_norm": 0.4039733111858368, "learning_rate": 1.2871715679378068e-05, "loss": 0.2781, "step": 21938 }, { "epoch": 0.4073176074824653, "grad_norm": 0.352021723985672, "learning_rate": 1.2870598302743163e-05, "loss": 0.4339, "step": 21940 }, { "epoch": 0.40735473761988394, "grad_norm": 0.2531554400920868, "learning_rate": 1.2869480887048012e-05, "loss": 0.3418, "step": 21942 }, { "epoch": 0.4073918677573026, "grad_norm": 0.6839932203292847, "learning_rate": 1.286836343230783e-05, "loss": 0.381, "step": 21944 }, { "epoch": 0.4074289978947212, "grad_norm": 0.3953968286514282, "learning_rate": 1.2867245938537804e-05, "loss": 0.2606, "step": 21946 }, { "epoch": 0.40746612803213983, "grad_norm": 0.36634135246276855, "learning_rate": 1.2866128405753153e-05, "loss": 0.4976, "step": 21948 }, { "epoch": 0.4075032581695585, "grad_norm": 0.9666898846626282, "learning_rate": 1.286501083396908e-05, "loss": 0.3888, "step": 21950 }, { "epoch": 0.40754038830697714, "grad_norm": 0.37718817591667175, "learning_rate": 1.286389322320079e-05, "loss": 0.1564, "step": 21952 }, { "epoch": 0.40757751844439577, "grad_norm": 0.28650951385498047, "learning_rate": 1.2862775573463492e-05, "loss": 0.1909, "step": 21954 }, { "epoch": 0.4076146485818144, "grad_norm": 0.3960302174091339, "learning_rate": 1.2861657884772397e-05, "loss": 0.2756, "step": 21956 }, { "epoch": 0.407651778719233, "grad_norm": 0.5276435017585754, "learning_rate": 1.2860540157142705e-05, "loss": 0.2076, "step": 21958 }, { "epoch": 0.40768890885665166, "grad_norm": 0.32526475191116333, "learning_rate": 1.2859422390589629e-05, "loss": 0.3485, "step": 21960 }, { "epoch": 0.40772603899407034, "grad_norm": 0.4247753620147705, "learning_rate": 1.2858304585128382e-05, "loss": 0.1784, "step": 21962 }, { "epoch": 0.40776316913148897, "grad_norm": 0.4074791371822357, "learning_rate": 1.285718674077417e-05, "loss": 0.2451, "step": 21964 }, { "epoch": 0.4078002992689076, "grad_norm": 1.5009312629699707, "learning_rate": 1.2856068857542205e-05, "loss": 0.4021, "step": 21966 }, { "epoch": 0.4078374294063262, "grad_norm": 0.5415545105934143, "learning_rate": 1.28549509354477e-05, "loss": 0.3435, "step": 21968 }, { "epoch": 0.40787455954374485, "grad_norm": 0.5598306059837341, "learning_rate": 1.2853832974505863e-05, "loss": 0.2738, "step": 21970 }, { "epoch": 0.40791168968116354, "grad_norm": 0.42792728543281555, "learning_rate": 1.2852714974731903e-05, "loss": 0.2435, "step": 21972 }, { "epoch": 0.40794881981858216, "grad_norm": 0.6369269490242004, "learning_rate": 1.2851596936141042e-05, "loss": 0.3702, "step": 21974 }, { "epoch": 0.4079859499560008, "grad_norm": 0.34136971831321716, "learning_rate": 1.285047885874849e-05, "loss": 0.133, "step": 21976 }, { "epoch": 0.4080230800934194, "grad_norm": 0.3024825155735016, "learning_rate": 1.2849360742569455e-05, "loss": 0.2843, "step": 21978 }, { "epoch": 0.40806021023083805, "grad_norm": 0.26109573245048523, "learning_rate": 1.2848242587619154e-05, "loss": 0.1244, "step": 21980 }, { "epoch": 0.4080973403682567, "grad_norm": 0.34682828187942505, "learning_rate": 1.2847124393912806e-05, "loss": 0.4284, "step": 21982 }, { "epoch": 0.40813447050567536, "grad_norm": 0.3292953372001648, "learning_rate": 1.284600616146562e-05, "loss": 0.2863, "step": 21984 }, { "epoch": 0.408171600643094, "grad_norm": 0.4602740406990051, "learning_rate": 1.2844887890292821e-05, "loss": 0.335, "step": 21986 }, { "epoch": 0.4082087307805126, "grad_norm": 0.3390699028968811, "learning_rate": 1.2843769580409616e-05, "loss": 0.2223, "step": 21988 }, { "epoch": 0.40824586091793125, "grad_norm": 0.2238067388534546, "learning_rate": 1.2842651231831223e-05, "loss": 0.2637, "step": 21990 }, { "epoch": 0.4082829910553499, "grad_norm": 0.3772287964820862, "learning_rate": 1.2841532844572862e-05, "loss": 0.184, "step": 21992 }, { "epoch": 0.40832012119276856, "grad_norm": 0.4175773859024048, "learning_rate": 1.284041441864975e-05, "loss": 0.2449, "step": 21994 }, { "epoch": 0.4083572513301872, "grad_norm": 0.30667364597320557, "learning_rate": 1.2839295954077108e-05, "loss": 0.2868, "step": 21996 }, { "epoch": 0.4083943814676058, "grad_norm": 0.2907668650150299, "learning_rate": 1.2838177450870153e-05, "loss": 0.328, "step": 21998 }, { "epoch": 0.40843151160502444, "grad_norm": 0.3119083046913147, "learning_rate": 1.2837058909044102e-05, "loss": 0.2762, "step": 22000 }, { "epoch": 0.4084686417424431, "grad_norm": 0.42587146162986755, "learning_rate": 1.2835940328614178e-05, "loss": 0.3885, "step": 22002 }, { "epoch": 0.4085057718798617, "grad_norm": 0.3464655876159668, "learning_rate": 1.2834821709595599e-05, "loss": 0.1904, "step": 22004 }, { "epoch": 0.4085429020172804, "grad_norm": 0.23605437576770782, "learning_rate": 1.2833703052003592e-05, "loss": 0.2812, "step": 22006 }, { "epoch": 0.408580032154699, "grad_norm": 0.5717463493347168, "learning_rate": 1.2832584355853372e-05, "loss": 0.2866, "step": 22008 }, { "epoch": 0.40861716229211764, "grad_norm": 0.47225040197372437, "learning_rate": 1.2831465621160163e-05, "loss": 0.2274, "step": 22010 }, { "epoch": 0.40865429242953627, "grad_norm": 0.4087161123752594, "learning_rate": 1.283034684793919e-05, "loss": 0.27, "step": 22012 }, { "epoch": 0.4086914225669549, "grad_norm": 0.43267086148262024, "learning_rate": 1.2829228036205672e-05, "loss": 0.2401, "step": 22014 }, { "epoch": 0.4087285527043736, "grad_norm": 0.3372718393802643, "learning_rate": 1.2828109185974835e-05, "loss": 0.2642, "step": 22016 }, { "epoch": 0.4087656828417922, "grad_norm": 0.29518356919288635, "learning_rate": 1.2826990297261905e-05, "loss": 0.4242, "step": 22018 }, { "epoch": 0.40880281297921084, "grad_norm": 0.40041399002075195, "learning_rate": 1.2825871370082106e-05, "loss": 0.3102, "step": 22020 }, { "epoch": 0.40883994311662947, "grad_norm": 0.3825591802597046, "learning_rate": 1.2824752404450658e-05, "loss": 0.2562, "step": 22022 }, { "epoch": 0.4088770732540481, "grad_norm": 0.3004355728626251, "learning_rate": 1.2823633400382792e-05, "loss": 0.1737, "step": 22024 }, { "epoch": 0.4089142033914668, "grad_norm": 0.3956766128540039, "learning_rate": 1.2822514357893736e-05, "loss": 0.5191, "step": 22026 }, { "epoch": 0.4089513335288854, "grad_norm": 0.7151725888252258, "learning_rate": 1.2821395276998714e-05, "loss": 0.3229, "step": 22028 }, { "epoch": 0.40898846366630404, "grad_norm": 0.2683252990245819, "learning_rate": 1.282027615771295e-05, "loss": 0.3193, "step": 22030 }, { "epoch": 0.40902559380372266, "grad_norm": 0.30411496758461, "learning_rate": 1.2819157000051676e-05, "loss": 0.3319, "step": 22032 }, { "epoch": 0.4090627239411413, "grad_norm": 0.3963678777217865, "learning_rate": 1.281803780403012e-05, "loss": 0.3166, "step": 22034 }, { "epoch": 0.4090998540785599, "grad_norm": 0.35781484842300415, "learning_rate": 1.281691856966351e-05, "loss": 0.1533, "step": 22036 }, { "epoch": 0.4091369842159786, "grad_norm": 0.3196694850921631, "learning_rate": 1.2815799296967077e-05, "loss": 0.2475, "step": 22038 }, { "epoch": 0.40917411435339723, "grad_norm": 0.4347062408924103, "learning_rate": 1.2814679985956051e-05, "loss": 0.227, "step": 22040 }, { "epoch": 0.40921124449081586, "grad_norm": 0.42302682995796204, "learning_rate": 1.2813560636645658e-05, "loss": 0.4828, "step": 22042 }, { "epoch": 0.4092483746282345, "grad_norm": 0.2995123565196991, "learning_rate": 1.2812441249051136e-05, "loss": 0.1105, "step": 22044 }, { "epoch": 0.4092855047656531, "grad_norm": 0.5302488803863525, "learning_rate": 1.281132182318771e-05, "loss": 0.3285, "step": 22046 }, { "epoch": 0.4093226349030718, "grad_norm": 0.495243638753891, "learning_rate": 1.2810202359070614e-05, "loss": 0.4314, "step": 22048 }, { "epoch": 0.40935976504049043, "grad_norm": 0.46075671911239624, "learning_rate": 1.2809082856715084e-05, "loss": 0.2619, "step": 22050 }, { "epoch": 0.40939689517790906, "grad_norm": 0.36969444155693054, "learning_rate": 1.2807963316136354e-05, "loss": 0.3702, "step": 22052 }, { "epoch": 0.4094340253153277, "grad_norm": 0.4912140667438507, "learning_rate": 1.2806843737349648e-05, "loss": 0.2102, "step": 22054 }, { "epoch": 0.4094711554527463, "grad_norm": 0.3260190486907959, "learning_rate": 1.2805724120370208e-05, "loss": 0.169, "step": 22056 }, { "epoch": 0.40950828559016494, "grad_norm": 0.31940963864326477, "learning_rate": 1.2804604465213267e-05, "loss": 0.3333, "step": 22058 }, { "epoch": 0.40954541572758363, "grad_norm": 0.28856950998306274, "learning_rate": 1.2803484771894059e-05, "loss": 0.2899, "step": 22060 }, { "epoch": 0.40958254586500226, "grad_norm": 0.4783872365951538, "learning_rate": 1.2802365040427824e-05, "loss": 0.3011, "step": 22062 }, { "epoch": 0.4096196760024209, "grad_norm": 0.713039755821228, "learning_rate": 1.2801245270829791e-05, "loss": 0.3008, "step": 22064 }, { "epoch": 0.4096568061398395, "grad_norm": 0.3580036461353302, "learning_rate": 1.2800125463115201e-05, "loss": 0.3416, "step": 22066 }, { "epoch": 0.40969393627725814, "grad_norm": 0.34160560369491577, "learning_rate": 1.2799005617299292e-05, "loss": 0.395, "step": 22068 }, { "epoch": 0.4097310664146768, "grad_norm": 0.21054638922214508, "learning_rate": 1.27978857333973e-05, "loss": 0.1851, "step": 22070 }, { "epoch": 0.40976819655209545, "grad_norm": 0.36528873443603516, "learning_rate": 1.2796765811424465e-05, "loss": 0.4689, "step": 22072 }, { "epoch": 0.4098053266895141, "grad_norm": 0.42593032121658325, "learning_rate": 1.2795645851396024e-05, "loss": 0.1817, "step": 22074 }, { "epoch": 0.4098424568269327, "grad_norm": 1.1457351446151733, "learning_rate": 1.2794525853327218e-05, "loss": 0.3701, "step": 22076 }, { "epoch": 0.40987958696435134, "grad_norm": 0.3434610366821289, "learning_rate": 1.2793405817233282e-05, "loss": 0.2395, "step": 22078 }, { "epoch": 0.40991671710176997, "grad_norm": 0.9202415347099304, "learning_rate": 1.2792285743129462e-05, "loss": 0.1899, "step": 22080 }, { "epoch": 0.40995384723918865, "grad_norm": 0.32737934589385986, "learning_rate": 1.2791165631030999e-05, "loss": 0.1559, "step": 22082 }, { "epoch": 0.4099909773766073, "grad_norm": 0.5351212620735168, "learning_rate": 1.279004548095313e-05, "loss": 0.2897, "step": 22084 }, { "epoch": 0.4100281075140259, "grad_norm": 0.41636592149734497, "learning_rate": 1.27889252929111e-05, "loss": 0.1397, "step": 22086 }, { "epoch": 0.41006523765144454, "grad_norm": 0.3152400553226471, "learning_rate": 1.2787805066920152e-05, "loss": 0.1116, "step": 22088 }, { "epoch": 0.41010236778886316, "grad_norm": 0.3018081784248352, "learning_rate": 1.2786684802995523e-05, "loss": 0.2394, "step": 22090 }, { "epoch": 0.41013949792628185, "grad_norm": 0.38363754749298096, "learning_rate": 1.2785564501152466e-05, "loss": 0.4681, "step": 22092 }, { "epoch": 0.4101766280637005, "grad_norm": 0.42933177947998047, "learning_rate": 1.2784444161406217e-05, "loss": 0.215, "step": 22094 }, { "epoch": 0.4102137582011191, "grad_norm": 0.3766684830188751, "learning_rate": 1.2783323783772024e-05, "loss": 0.3584, "step": 22096 }, { "epoch": 0.41025088833853773, "grad_norm": 0.42244377732276917, "learning_rate": 1.2782203368265132e-05, "loss": 0.1763, "step": 22098 }, { "epoch": 0.41028801847595636, "grad_norm": 0.5051140785217285, "learning_rate": 1.2781082914900784e-05, "loss": 0.3556, "step": 22100 }, { "epoch": 0.410325148613375, "grad_norm": 0.8686238527297974, "learning_rate": 1.2779962423694228e-05, "loss": 0.3756, "step": 22102 }, { "epoch": 0.4103622787507937, "grad_norm": 0.2839677333831787, "learning_rate": 1.2778841894660711e-05, "loss": 0.3638, "step": 22104 }, { "epoch": 0.4103994088882123, "grad_norm": 0.3259103298187256, "learning_rate": 1.277772132781548e-05, "loss": 0.2873, "step": 22106 }, { "epoch": 0.41043653902563093, "grad_norm": 0.48589566349983215, "learning_rate": 1.2776600723173781e-05, "loss": 0.2614, "step": 22108 }, { "epoch": 0.41047366916304956, "grad_norm": 0.3626291751861572, "learning_rate": 1.2775480080750865e-05, "loss": 0.1987, "step": 22110 }, { "epoch": 0.4105107993004682, "grad_norm": 0.3636896312236786, "learning_rate": 1.2774359400561978e-05, "loss": 0.2602, "step": 22112 }, { "epoch": 0.41054792943788687, "grad_norm": 0.27297523617744446, "learning_rate": 1.2773238682622369e-05, "loss": 0.2658, "step": 22114 }, { "epoch": 0.4105850595753055, "grad_norm": 0.5583170652389526, "learning_rate": 1.2772117926947288e-05, "loss": 0.2219, "step": 22116 }, { "epoch": 0.41062218971272413, "grad_norm": 0.24498596787452698, "learning_rate": 1.2770997133551985e-05, "loss": 0.1947, "step": 22118 }, { "epoch": 0.41065931985014276, "grad_norm": 0.40482643246650696, "learning_rate": 1.2769876302451713e-05, "loss": 0.1495, "step": 22120 }, { "epoch": 0.4106964499875614, "grad_norm": 0.4663492441177368, "learning_rate": 1.276875543366172e-05, "loss": 0.1769, "step": 22122 }, { "epoch": 0.41073358012498007, "grad_norm": 0.33320194482803345, "learning_rate": 1.276763452719726e-05, "loss": 0.2146, "step": 22124 }, { "epoch": 0.4107707102623987, "grad_norm": 0.2746712267398834, "learning_rate": 1.2766513583073584e-05, "loss": 0.3031, "step": 22126 }, { "epoch": 0.4108078403998173, "grad_norm": 0.38440099358558655, "learning_rate": 1.2765392601305941e-05, "loss": 0.2659, "step": 22128 }, { "epoch": 0.41084497053723595, "grad_norm": 0.3118140995502472, "learning_rate": 1.2764271581909591e-05, "loss": 0.4069, "step": 22130 }, { "epoch": 0.4108821006746546, "grad_norm": 0.34607967734336853, "learning_rate": 1.2763150524899785e-05, "loss": 0.2222, "step": 22132 }, { "epoch": 0.4109192308120732, "grad_norm": 0.39724650979042053, "learning_rate": 1.2762029430291775e-05, "loss": 0.2466, "step": 22134 }, { "epoch": 0.4109563609494919, "grad_norm": 0.48744913935661316, "learning_rate": 1.2760908298100822e-05, "loss": 0.5332, "step": 22136 }, { "epoch": 0.4109934910869105, "grad_norm": 0.41839271783828735, "learning_rate": 1.2759787128342175e-05, "loss": 0.2966, "step": 22138 }, { "epoch": 0.41103062122432915, "grad_norm": 0.3997511565685272, "learning_rate": 1.275866592103109e-05, "loss": 0.4353, "step": 22140 }, { "epoch": 0.4110677513617478, "grad_norm": 0.3391873240470886, "learning_rate": 1.2757544676182825e-05, "loss": 0.1147, "step": 22142 }, { "epoch": 0.4111048814991664, "grad_norm": 0.3180474042892456, "learning_rate": 1.2756423393812641e-05, "loss": 0.3028, "step": 22144 }, { "epoch": 0.4111420116365851, "grad_norm": 0.4800836741924286, "learning_rate": 1.2755302073935787e-05, "loss": 0.249, "step": 22146 }, { "epoch": 0.4111791417740037, "grad_norm": 0.30945470929145813, "learning_rate": 1.2754180716567526e-05, "loss": 0.2923, "step": 22148 }, { "epoch": 0.41121627191142235, "grad_norm": 0.460316002368927, "learning_rate": 1.2753059321723113e-05, "loss": 0.2798, "step": 22150 }, { "epoch": 0.411253402048841, "grad_norm": 0.37005358934402466, "learning_rate": 1.2751937889417812e-05, "loss": 0.4806, "step": 22152 }, { "epoch": 0.4112905321862596, "grad_norm": 0.47213301062583923, "learning_rate": 1.2750816419666875e-05, "loss": 0.4558, "step": 22154 }, { "epoch": 0.41132766232367823, "grad_norm": 0.4230639934539795, "learning_rate": 1.2749694912485573e-05, "loss": 0.3416, "step": 22156 }, { "epoch": 0.4113647924610969, "grad_norm": 0.6258881092071533, "learning_rate": 1.2748573367889157e-05, "loss": 0.2694, "step": 22158 }, { "epoch": 0.41140192259851555, "grad_norm": 0.4091075360774994, "learning_rate": 1.2747451785892888e-05, "loss": 0.2415, "step": 22160 }, { "epoch": 0.4114390527359342, "grad_norm": 0.43477752804756165, "learning_rate": 1.274633016651203e-05, "loss": 0.3638, "step": 22162 }, { "epoch": 0.4114761828733528, "grad_norm": 0.43976837396621704, "learning_rate": 1.2745208509761846e-05, "loss": 0.472, "step": 22164 }, { "epoch": 0.41151331301077143, "grad_norm": 0.3615671694278717, "learning_rate": 1.2744086815657599e-05, "loss": 0.2014, "step": 22166 }, { "epoch": 0.4115504431481901, "grad_norm": 0.3114439845085144, "learning_rate": 1.2742965084214547e-05, "loss": 0.4001, "step": 22168 }, { "epoch": 0.41158757328560874, "grad_norm": 0.29664692282676697, "learning_rate": 1.2741843315447958e-05, "loss": 0.3341, "step": 22170 }, { "epoch": 0.41162470342302737, "grad_norm": 0.2777436375617981, "learning_rate": 1.274072150937309e-05, "loss": 0.305, "step": 22172 }, { "epoch": 0.411661833560446, "grad_norm": 0.48372167348861694, "learning_rate": 1.2739599666005216e-05, "loss": 0.2438, "step": 22174 }, { "epoch": 0.41169896369786463, "grad_norm": 0.3963071405887604, "learning_rate": 1.2738477785359595e-05, "loss": 0.2792, "step": 22176 }, { "epoch": 0.41173609383528326, "grad_norm": 0.2918238341808319, "learning_rate": 1.2737355867451498e-05, "loss": 0.3107, "step": 22178 }, { "epoch": 0.41177322397270194, "grad_norm": 0.4556460678577423, "learning_rate": 1.273623391229618e-05, "loss": 0.4217, "step": 22180 }, { "epoch": 0.41181035411012057, "grad_norm": 0.321087121963501, "learning_rate": 1.273511191990892e-05, "loss": 0.2012, "step": 22182 }, { "epoch": 0.4118474842475392, "grad_norm": 0.33717501163482666, "learning_rate": 1.2733989890304976e-05, "loss": 0.4066, "step": 22184 }, { "epoch": 0.4118846143849578, "grad_norm": 0.4401673376560211, "learning_rate": 1.273286782349962e-05, "loss": 0.2485, "step": 22186 }, { "epoch": 0.41192174452237645, "grad_norm": 0.28662893176078796, "learning_rate": 1.2731745719508119e-05, "loss": 0.429, "step": 22188 }, { "epoch": 0.41195887465979514, "grad_norm": 0.5214123129844666, "learning_rate": 1.2730623578345742e-05, "loss": 0.3119, "step": 22190 }, { "epoch": 0.41199600479721377, "grad_norm": 0.2936370372772217, "learning_rate": 1.2729501400027757e-05, "loss": 0.2931, "step": 22192 }, { "epoch": 0.4120331349346324, "grad_norm": 0.32686617970466614, "learning_rate": 1.272837918456943e-05, "loss": 0.2659, "step": 22194 }, { "epoch": 0.412070265072051, "grad_norm": 0.3903789520263672, "learning_rate": 1.2727256931986041e-05, "loss": 0.4379, "step": 22196 }, { "epoch": 0.41210739520946965, "grad_norm": 0.3727833032608032, "learning_rate": 1.272613464229285e-05, "loss": 0.367, "step": 22198 }, { "epoch": 0.41214452534688834, "grad_norm": 0.608617901802063, "learning_rate": 1.2725012315505135e-05, "loss": 0.4806, "step": 22200 }, { "epoch": 0.41218165548430696, "grad_norm": 0.3271966874599457, "learning_rate": 1.2723889951638164e-05, "loss": 0.2447, "step": 22202 }, { "epoch": 0.4122187856217256, "grad_norm": 0.38452741503715515, "learning_rate": 1.272276755070721e-05, "loss": 0.2558, "step": 22204 }, { "epoch": 0.4122559157591442, "grad_norm": 0.31904590129852295, "learning_rate": 1.272164511272754e-05, "loss": 0.4097, "step": 22206 }, { "epoch": 0.41229304589656285, "grad_norm": 0.38718876242637634, "learning_rate": 1.2720522637714438e-05, "loss": 0.282, "step": 22208 }, { "epoch": 0.4123301760339815, "grad_norm": 0.23350363969802856, "learning_rate": 1.2719400125683173e-05, "loss": 0.2003, "step": 22210 }, { "epoch": 0.41236730617140016, "grad_norm": 0.3466246724128723, "learning_rate": 1.2718277576649013e-05, "loss": 0.1488, "step": 22212 }, { "epoch": 0.4124044363088188, "grad_norm": 0.3530449867248535, "learning_rate": 1.2717154990627241e-05, "loss": 0.3411, "step": 22214 }, { "epoch": 0.4124415664462374, "grad_norm": 0.5673527121543884, "learning_rate": 1.2716032367633127e-05, "loss": 0.2394, "step": 22216 }, { "epoch": 0.41247869658365605, "grad_norm": 0.48155415058135986, "learning_rate": 1.2714909707681947e-05, "loss": 0.2724, "step": 22218 }, { "epoch": 0.4125158267210747, "grad_norm": 0.4941686689853668, "learning_rate": 1.271378701078898e-05, "loss": 0.2185, "step": 22220 }, { "epoch": 0.41255295685849336, "grad_norm": 0.45109835267066956, "learning_rate": 1.27126642769695e-05, "loss": 0.1631, "step": 22222 }, { "epoch": 0.412590086995912, "grad_norm": 0.3319634795188904, "learning_rate": 1.2711541506238783e-05, "loss": 0.3316, "step": 22224 }, { "epoch": 0.4126272171333306, "grad_norm": 0.2841742932796478, "learning_rate": 1.2710418698612111e-05, "loss": 0.2583, "step": 22226 }, { "epoch": 0.41266434727074924, "grad_norm": 0.3774219751358032, "learning_rate": 1.2709295854104754e-05, "loss": 0.254, "step": 22228 }, { "epoch": 0.41270147740816787, "grad_norm": 0.36438465118408203, "learning_rate": 1.2708172972732e-05, "loss": 0.3751, "step": 22230 }, { "epoch": 0.4127386075455865, "grad_norm": 0.4523380696773529, "learning_rate": 1.2707050054509122e-05, "loss": 0.218, "step": 22232 }, { "epoch": 0.4127757376830052, "grad_norm": 0.2547382712364197, "learning_rate": 1.2705927099451402e-05, "loss": 0.3666, "step": 22234 }, { "epoch": 0.4128128678204238, "grad_norm": 0.494395911693573, "learning_rate": 1.2704804107574118e-05, "loss": 0.3002, "step": 22236 }, { "epoch": 0.41284999795784244, "grad_norm": 0.4235498905181885, "learning_rate": 1.2703681078892554e-05, "loss": 0.1947, "step": 22238 }, { "epoch": 0.41288712809526107, "grad_norm": 0.20970535278320312, "learning_rate": 1.2702558013421989e-05, "loss": 0.2835, "step": 22240 }, { "epoch": 0.4129242582326797, "grad_norm": 0.4145757555961609, "learning_rate": 1.2701434911177705e-05, "loss": 0.2779, "step": 22242 }, { "epoch": 0.4129613883700984, "grad_norm": 0.3728560209274292, "learning_rate": 1.270031177217498e-05, "loss": 0.3807, "step": 22244 }, { "epoch": 0.412998518507517, "grad_norm": 0.37832915782928467, "learning_rate": 1.2699188596429104e-05, "loss": 0.4804, "step": 22246 }, { "epoch": 0.41303564864493564, "grad_norm": 0.3881000578403473, "learning_rate": 1.2698065383955353e-05, "loss": 0.1524, "step": 22248 }, { "epoch": 0.41307277878235427, "grad_norm": 0.45861899852752686, "learning_rate": 1.2696942134769019e-05, "loss": 0.2758, "step": 22250 }, { "epoch": 0.4131099089197729, "grad_norm": 0.3362489640712738, "learning_rate": 1.2695818848885377e-05, "loss": 0.2918, "step": 22252 }, { "epoch": 0.4131470390571915, "grad_norm": 0.32611092925071716, "learning_rate": 1.2694695526319714e-05, "loss": 0.5259, "step": 22254 }, { "epoch": 0.4131841691946102, "grad_norm": 0.3833596110343933, "learning_rate": 1.2693572167087318e-05, "loss": 0.4025, "step": 22256 }, { "epoch": 0.41322129933202884, "grad_norm": 0.5344322323799133, "learning_rate": 1.2692448771203473e-05, "loss": 0.3344, "step": 22258 }, { "epoch": 0.41325842946944746, "grad_norm": 0.3957487642765045, "learning_rate": 1.2691325338683464e-05, "loss": 0.1913, "step": 22260 }, { "epoch": 0.4132955596068661, "grad_norm": 0.41014614701271057, "learning_rate": 1.2690201869542582e-05, "loss": 0.2992, "step": 22262 }, { "epoch": 0.4133326897442847, "grad_norm": 0.34932592511177063, "learning_rate": 1.268907836379611e-05, "loss": 0.4096, "step": 22264 }, { "epoch": 0.4133698198817034, "grad_norm": 0.4497375190258026, "learning_rate": 1.2687954821459334e-05, "loss": 0.4504, "step": 22266 }, { "epoch": 0.41340695001912203, "grad_norm": 0.38725030422210693, "learning_rate": 1.2686831242547544e-05, "loss": 0.4117, "step": 22268 }, { "epoch": 0.41344408015654066, "grad_norm": 0.31301382184028625, "learning_rate": 1.268570762707603e-05, "loss": 0.4188, "step": 22270 }, { "epoch": 0.4134812102939593, "grad_norm": 0.39095038175582886, "learning_rate": 1.2684583975060079e-05, "loss": 0.153, "step": 22272 }, { "epoch": 0.4135183404313779, "grad_norm": 0.27216750383377075, "learning_rate": 1.2683460286514982e-05, "loss": 0.3461, "step": 22274 }, { "epoch": 0.4135554705687966, "grad_norm": 0.3232177197933197, "learning_rate": 1.2682336561456029e-05, "loss": 0.13, "step": 22276 }, { "epoch": 0.41359260070621523, "grad_norm": 0.6255667209625244, "learning_rate": 1.2681212799898507e-05, "loss": 0.2016, "step": 22278 }, { "epoch": 0.41362973084363386, "grad_norm": 0.5035505890846252, "learning_rate": 1.2680089001857712e-05, "loss": 0.329, "step": 22280 }, { "epoch": 0.4136668609810525, "grad_norm": 0.5952460765838623, "learning_rate": 1.2678965167348936e-05, "loss": 0.1998, "step": 22282 }, { "epoch": 0.4137039911184711, "grad_norm": 0.3756662905216217, "learning_rate": 1.2677841296387465e-05, "loss": 0.2137, "step": 22284 }, { "epoch": 0.41374112125588974, "grad_norm": 0.5828529000282288, "learning_rate": 1.2676717388988595e-05, "loss": 0.1433, "step": 22286 }, { "epoch": 0.4137782513933084, "grad_norm": 0.42227739095687866, "learning_rate": 1.267559344516762e-05, "loss": 0.3524, "step": 22288 }, { "epoch": 0.41381538153072706, "grad_norm": 0.4134095013141632, "learning_rate": 1.2674469464939834e-05, "loss": 0.3187, "step": 22290 }, { "epoch": 0.4138525116681457, "grad_norm": 0.8603450655937195, "learning_rate": 1.2673345448320527e-05, "loss": 0.2461, "step": 22292 }, { "epoch": 0.4138896418055643, "grad_norm": 0.43120262026786804, "learning_rate": 1.2672221395324997e-05, "loss": 0.5413, "step": 22294 }, { "epoch": 0.41392677194298294, "grad_norm": 0.36243775486946106, "learning_rate": 1.2671097305968541e-05, "loss": 0.2902, "step": 22296 }, { "epoch": 0.4139639020804016, "grad_norm": 0.35519498586654663, "learning_rate": 1.2669973180266446e-05, "loss": 0.3629, "step": 22298 }, { "epoch": 0.41400103221782025, "grad_norm": 0.3112615644931793, "learning_rate": 1.2668849018234018e-05, "loss": 0.1534, "step": 22300 }, { "epoch": 0.4140381623552389, "grad_norm": 0.4090019762516022, "learning_rate": 1.2667724819886547e-05, "loss": 0.3092, "step": 22302 }, { "epoch": 0.4140752924926575, "grad_norm": 0.3079445958137512, "learning_rate": 1.2666600585239332e-05, "loss": 0.2301, "step": 22304 }, { "epoch": 0.41411242263007614, "grad_norm": 0.3032417595386505, "learning_rate": 1.2665476314307669e-05, "loss": 0.3444, "step": 22306 }, { "epoch": 0.41414955276749477, "grad_norm": 0.38366255164146423, "learning_rate": 1.2664352007106861e-05, "loss": 0.3142, "step": 22308 }, { "epoch": 0.41418668290491345, "grad_norm": 0.3512505888938904, "learning_rate": 1.26632276636522e-05, "loss": 0.3726, "step": 22310 }, { "epoch": 0.4142238130423321, "grad_norm": 0.4796908497810364, "learning_rate": 1.2662103283958988e-05, "loss": 0.388, "step": 22312 }, { "epoch": 0.4142609431797507, "grad_norm": 0.34870582818984985, "learning_rate": 1.2660978868042527e-05, "loss": 0.3169, "step": 22314 }, { "epoch": 0.41429807331716934, "grad_norm": 0.32598769664764404, "learning_rate": 1.2659854415918113e-05, "loss": 0.2753, "step": 22316 }, { "epoch": 0.41433520345458796, "grad_norm": 0.32522955536842346, "learning_rate": 1.2658729927601046e-05, "loss": 0.257, "step": 22318 }, { "epoch": 0.41437233359200665, "grad_norm": 0.358354777097702, "learning_rate": 1.2657605403106628e-05, "loss": 0.2733, "step": 22320 }, { "epoch": 0.4144094637294253, "grad_norm": 0.5097821950912476, "learning_rate": 1.2656480842450162e-05, "loss": 0.2829, "step": 22322 }, { "epoch": 0.4144465938668439, "grad_norm": 0.33971095085144043, "learning_rate": 1.2655356245646948e-05, "loss": 0.3393, "step": 22324 }, { "epoch": 0.41448372400426253, "grad_norm": 0.36624693870544434, "learning_rate": 1.2654231612712292e-05, "loss": 0.3593, "step": 22326 }, { "epoch": 0.41452085414168116, "grad_norm": 0.5099690556526184, "learning_rate": 1.2653106943661492e-05, "loss": 0.642, "step": 22328 }, { "epoch": 0.4145579842790998, "grad_norm": 0.5320605635643005, "learning_rate": 1.2651982238509854e-05, "loss": 0.2412, "step": 22330 }, { "epoch": 0.4145951144165185, "grad_norm": 1.5672521591186523, "learning_rate": 1.2650857497272679e-05, "loss": 0.3139, "step": 22332 }, { "epoch": 0.4146322445539371, "grad_norm": 0.30454984307289124, "learning_rate": 1.2649732719965277e-05, "loss": 0.2804, "step": 22334 }, { "epoch": 0.41466937469135573, "grad_norm": 0.2134273797273636, "learning_rate": 1.264860790660295e-05, "loss": 0.2725, "step": 22336 }, { "epoch": 0.41470650482877436, "grad_norm": 0.4693770706653595, "learning_rate": 1.2647483057201e-05, "loss": 0.2274, "step": 22338 }, { "epoch": 0.414743634966193, "grad_norm": 0.5466782450675964, "learning_rate": 1.2646358171774738e-05, "loss": 0.1077, "step": 22340 }, { "epoch": 0.41478076510361167, "grad_norm": 0.38015276193618774, "learning_rate": 1.2645233250339465e-05, "loss": 0.3335, "step": 22342 }, { "epoch": 0.4148178952410303, "grad_norm": 0.4040320813655853, "learning_rate": 1.2644108292910493e-05, "loss": 0.195, "step": 22344 }, { "epoch": 0.4148550253784489, "grad_norm": 0.5064042806625366, "learning_rate": 1.264298329950313e-05, "loss": 0.3448, "step": 22346 }, { "epoch": 0.41489215551586756, "grad_norm": 0.48571261763572693, "learning_rate": 1.2641858270132676e-05, "loss": 0.3193, "step": 22348 }, { "epoch": 0.4149292856532862, "grad_norm": 0.262954443693161, "learning_rate": 1.2640733204814446e-05, "loss": 0.299, "step": 22350 }, { "epoch": 0.41496641579070487, "grad_norm": 0.4457722306251526, "learning_rate": 1.2639608103563747e-05, "loss": 0.2353, "step": 22352 }, { "epoch": 0.4150035459281235, "grad_norm": 0.29154732823371887, "learning_rate": 1.2638482966395888e-05, "loss": 0.2643, "step": 22354 }, { "epoch": 0.4150406760655421, "grad_norm": 0.3535892069339752, "learning_rate": 1.2637357793326179e-05, "loss": 0.2678, "step": 22356 }, { "epoch": 0.41507780620296075, "grad_norm": 0.36658260226249695, "learning_rate": 1.2636232584369931e-05, "loss": 0.404, "step": 22358 }, { "epoch": 0.4151149363403794, "grad_norm": 0.20808948576450348, "learning_rate": 1.2635107339542457e-05, "loss": 0.2026, "step": 22360 }, { "epoch": 0.415152066477798, "grad_norm": 0.36776214838027954, "learning_rate": 1.2633982058859058e-05, "loss": 0.3276, "step": 22362 }, { "epoch": 0.4151891966152167, "grad_norm": 0.36909955739974976, "learning_rate": 1.2632856742335056e-05, "loss": 0.145, "step": 22364 }, { "epoch": 0.4152263267526353, "grad_norm": 0.5838977694511414, "learning_rate": 1.2631731389985762e-05, "loss": 0.2485, "step": 22366 }, { "epoch": 0.41526345689005395, "grad_norm": 0.39687880873680115, "learning_rate": 1.2630606001826486e-05, "loss": 0.2355, "step": 22368 }, { "epoch": 0.4153005870274726, "grad_norm": 0.39901086688041687, "learning_rate": 1.2629480577872541e-05, "loss": 0.3431, "step": 22370 }, { "epoch": 0.4153377171648912, "grad_norm": 0.43962743878364563, "learning_rate": 1.2628355118139242e-05, "loss": 0.3904, "step": 22372 }, { "epoch": 0.4153748473023099, "grad_norm": 0.6133963465690613, "learning_rate": 1.2627229622641903e-05, "loss": 0.3298, "step": 22374 }, { "epoch": 0.4154119774397285, "grad_norm": 0.31781327724456787, "learning_rate": 1.2626104091395836e-05, "loss": 0.2589, "step": 22376 }, { "epoch": 0.41544910757714715, "grad_norm": 0.5248299837112427, "learning_rate": 1.2624978524416363e-05, "loss": 0.2843, "step": 22378 }, { "epoch": 0.4154862377145658, "grad_norm": 0.3978639543056488, "learning_rate": 1.2623852921718791e-05, "loss": 0.3733, "step": 22380 }, { "epoch": 0.4155233678519844, "grad_norm": 0.32916125655174255, "learning_rate": 1.262272728331844e-05, "loss": 0.3335, "step": 22382 }, { "epoch": 0.41556049798940303, "grad_norm": 0.3002493679523468, "learning_rate": 1.262160160923063e-05, "loss": 0.1836, "step": 22384 }, { "epoch": 0.4155976281268217, "grad_norm": 0.46988898515701294, "learning_rate": 1.2620475899470672e-05, "loss": 0.3031, "step": 22386 }, { "epoch": 0.41563475826424034, "grad_norm": 0.377679705619812, "learning_rate": 1.2619350154053885e-05, "loss": 0.1563, "step": 22388 }, { "epoch": 0.415671888401659, "grad_norm": 0.5000572204589844, "learning_rate": 1.2618224372995593e-05, "loss": 0.3355, "step": 22390 }, { "epoch": 0.4157090185390776, "grad_norm": 0.41688838601112366, "learning_rate": 1.2617098556311107e-05, "loss": 0.3628, "step": 22392 }, { "epoch": 0.41574614867649623, "grad_norm": 0.3701666593551636, "learning_rate": 1.2615972704015746e-05, "loss": 0.4207, "step": 22394 }, { "epoch": 0.4157832788139149, "grad_norm": 0.371471107006073, "learning_rate": 1.2614846816124834e-05, "loss": 0.2842, "step": 22396 }, { "epoch": 0.41582040895133354, "grad_norm": 0.4221228063106537, "learning_rate": 1.2613720892653692e-05, "loss": 0.3154, "step": 22398 }, { "epoch": 0.41585753908875217, "grad_norm": 0.4081903100013733, "learning_rate": 1.2612594933617636e-05, "loss": 0.3374, "step": 22400 }, { "epoch": 0.4158946692261708, "grad_norm": 0.2494126707315445, "learning_rate": 1.2611468939031988e-05, "loss": 0.4441, "step": 22402 }, { "epoch": 0.4159317993635894, "grad_norm": 0.3489457964897156, "learning_rate": 1.2610342908912072e-05, "loss": 0.4704, "step": 22404 }, { "epoch": 0.41596892950100806, "grad_norm": 0.3283196985721588, "learning_rate": 1.2609216843273205e-05, "loss": 0.2223, "step": 22406 }, { "epoch": 0.41600605963842674, "grad_norm": 0.23611219227313995, "learning_rate": 1.2608090742130712e-05, "loss": 0.326, "step": 22408 }, { "epoch": 0.41604318977584537, "grad_norm": 0.3353910744190216, "learning_rate": 1.2606964605499918e-05, "loss": 0.3564, "step": 22410 }, { "epoch": 0.416080319913264, "grad_norm": 0.24398206174373627, "learning_rate": 1.2605838433396147e-05, "loss": 0.3901, "step": 22412 }, { "epoch": 0.4161174500506826, "grad_norm": 0.4506359100341797, "learning_rate": 1.2604712225834716e-05, "loss": 0.1377, "step": 22414 }, { "epoch": 0.41615458018810125, "grad_norm": 0.3832830488681793, "learning_rate": 1.2603585982830956e-05, "loss": 0.2633, "step": 22416 }, { "epoch": 0.41619171032551994, "grad_norm": 0.3120003938674927, "learning_rate": 1.2602459704400188e-05, "loss": 0.2865, "step": 22418 }, { "epoch": 0.41622884046293857, "grad_norm": 0.3582679033279419, "learning_rate": 1.260133339055774e-05, "loss": 0.4833, "step": 22420 }, { "epoch": 0.4162659706003572, "grad_norm": 1.9840152263641357, "learning_rate": 1.2600207041318937e-05, "loss": 0.2441, "step": 22422 }, { "epoch": 0.4163031007377758, "grad_norm": 0.55437833070755, "learning_rate": 1.2599080656699102e-05, "loss": 0.2911, "step": 22424 }, { "epoch": 0.41634023087519445, "grad_norm": 0.18342788517475128, "learning_rate": 1.2597954236713563e-05, "loss": 0.2221, "step": 22426 }, { "epoch": 0.41637736101261313, "grad_norm": 0.39264431595802307, "learning_rate": 1.2596827781377655e-05, "loss": 0.1615, "step": 22428 }, { "epoch": 0.41641449115003176, "grad_norm": 0.3323133885860443, "learning_rate": 1.2595701290706695e-05, "loss": 0.2976, "step": 22430 }, { "epoch": 0.4164516212874504, "grad_norm": 0.5766331553459167, "learning_rate": 1.2594574764716017e-05, "loss": 0.1977, "step": 22432 }, { "epoch": 0.416488751424869, "grad_norm": 0.3951661288738251, "learning_rate": 1.2593448203420947e-05, "loss": 0.4084, "step": 22434 }, { "epoch": 0.41652588156228765, "grad_norm": 0.5520362257957458, "learning_rate": 1.2592321606836815e-05, "loss": 0.2932, "step": 22436 }, { "epoch": 0.4165630116997063, "grad_norm": 0.3689362108707428, "learning_rate": 1.259119497497895e-05, "loss": 0.186, "step": 22438 }, { "epoch": 0.41660014183712496, "grad_norm": 0.37370771169662476, "learning_rate": 1.2590068307862682e-05, "loss": 0.2458, "step": 22440 }, { "epoch": 0.4166372719745436, "grad_norm": 0.45080283284187317, "learning_rate": 1.2588941605503347e-05, "loss": 0.2814, "step": 22442 }, { "epoch": 0.4166744021119622, "grad_norm": 0.4246978759765625, "learning_rate": 1.258781486791627e-05, "loss": 0.2053, "step": 22444 }, { "epoch": 0.41671153224938084, "grad_norm": 0.3312555253505707, "learning_rate": 1.2586688095116782e-05, "loss": 0.3394, "step": 22446 }, { "epoch": 0.4167486623867995, "grad_norm": 0.34370651841163635, "learning_rate": 1.258556128712022e-05, "loss": 0.3435, "step": 22448 }, { "epoch": 0.41678579252421816, "grad_norm": 0.5265771746635437, "learning_rate": 1.2584434443941911e-05, "loss": 0.204, "step": 22450 }, { "epoch": 0.4168229226616368, "grad_norm": 0.2886512279510498, "learning_rate": 1.2583307565597192e-05, "loss": 0.2267, "step": 22452 }, { "epoch": 0.4168600527990554, "grad_norm": 0.37400022149086, "learning_rate": 1.2582180652101394e-05, "loss": 0.2839, "step": 22454 }, { "epoch": 0.41689718293647404, "grad_norm": 0.3268842399120331, "learning_rate": 1.2581053703469852e-05, "loss": 0.3035, "step": 22456 }, { "epoch": 0.41693431307389267, "grad_norm": 0.47995254397392273, "learning_rate": 1.2579926719717899e-05, "loss": 0.3242, "step": 22458 }, { "epoch": 0.4169714432113113, "grad_norm": 0.4329659342765808, "learning_rate": 1.2578799700860876e-05, "loss": 0.2703, "step": 22460 }, { "epoch": 0.41700857334873, "grad_norm": 0.43291178345680237, "learning_rate": 1.2577672646914107e-05, "loss": 0.4337, "step": 22462 }, { "epoch": 0.4170457034861486, "grad_norm": 0.35983771085739136, "learning_rate": 1.2576545557892939e-05, "loss": 0.2319, "step": 22464 }, { "epoch": 0.41708283362356724, "grad_norm": 0.391220360994339, "learning_rate": 1.2575418433812702e-05, "loss": 0.2065, "step": 22466 }, { "epoch": 0.41711996376098587, "grad_norm": 0.3316316604614258, "learning_rate": 1.2574291274688734e-05, "loss": 0.3326, "step": 22468 }, { "epoch": 0.4171570938984045, "grad_norm": 0.3946816325187683, "learning_rate": 1.257316408053637e-05, "loss": 0.3084, "step": 22470 }, { "epoch": 0.4171942240358232, "grad_norm": 0.31929251551628113, "learning_rate": 1.2572036851370956e-05, "loss": 0.4248, "step": 22472 }, { "epoch": 0.4172313541732418, "grad_norm": 0.5548383593559265, "learning_rate": 1.2570909587207822e-05, "loss": 0.4839, "step": 22474 }, { "epoch": 0.41726848431066044, "grad_norm": 0.47951415181159973, "learning_rate": 1.2569782288062309e-05, "loss": 0.2928, "step": 22476 }, { "epoch": 0.41730561444807907, "grad_norm": 0.36975565552711487, "learning_rate": 1.2568654953949755e-05, "loss": 0.3424, "step": 22478 }, { "epoch": 0.4173427445854977, "grad_norm": 0.21453146636486053, "learning_rate": 1.2567527584885503e-05, "loss": 0.1939, "step": 22480 }, { "epoch": 0.4173798747229163, "grad_norm": 0.43721315264701843, "learning_rate": 1.256640018088489e-05, "loss": 0.2002, "step": 22482 }, { "epoch": 0.417417004860335, "grad_norm": 0.369391530752182, "learning_rate": 1.2565272741963262e-05, "loss": 0.199, "step": 22484 }, { "epoch": 0.41745413499775363, "grad_norm": 0.28911253809928894, "learning_rate": 1.2564145268135952e-05, "loss": 0.1586, "step": 22486 }, { "epoch": 0.41749126513517226, "grad_norm": 0.2809910774230957, "learning_rate": 1.2563017759418305e-05, "loss": 0.1899, "step": 22488 }, { "epoch": 0.4175283952725909, "grad_norm": 0.4824793040752411, "learning_rate": 1.2561890215825662e-05, "loss": 0.4619, "step": 22490 }, { "epoch": 0.4175655254100095, "grad_norm": 9.8869047164917, "learning_rate": 1.2560762637373371e-05, "loss": 0.2899, "step": 22492 }, { "epoch": 0.4176026555474282, "grad_norm": 0.4115208685398102, "learning_rate": 1.2559635024076769e-05, "loss": 0.3026, "step": 22494 }, { "epoch": 0.41763978568484683, "grad_norm": 0.5028640031814575, "learning_rate": 1.2558507375951204e-05, "loss": 0.2202, "step": 22496 }, { "epoch": 0.41767691582226546, "grad_norm": 0.4057249426841736, "learning_rate": 1.2557379693012015e-05, "loss": 0.4076, "step": 22498 }, { "epoch": 0.4177140459596841, "grad_norm": 0.3594348728656769, "learning_rate": 1.2556251975274547e-05, "loss": 0.2056, "step": 22500 }, { "epoch": 0.4177511760971027, "grad_norm": 0.2122144252061844, "learning_rate": 1.2555124222754148e-05, "loss": 0.251, "step": 22502 }, { "epoch": 0.4177883062345214, "grad_norm": 0.35639718174934387, "learning_rate": 1.2553996435466163e-05, "loss": 0.4011, "step": 22504 }, { "epoch": 0.41782543637194003, "grad_norm": 0.36366695165634155, "learning_rate": 1.2552868613425935e-05, "loss": 0.1872, "step": 22506 }, { "epoch": 0.41786256650935866, "grad_norm": 0.38499191403388977, "learning_rate": 1.255174075664881e-05, "loss": 0.2026, "step": 22508 }, { "epoch": 0.4178996966467773, "grad_norm": 0.3735295832157135, "learning_rate": 1.2550612865150141e-05, "loss": 0.3605, "step": 22510 }, { "epoch": 0.4179368267841959, "grad_norm": 0.37961629033088684, "learning_rate": 1.2549484938945267e-05, "loss": 0.2803, "step": 22512 }, { "epoch": 0.41797395692161454, "grad_norm": 0.5305289626121521, "learning_rate": 1.254835697804954e-05, "loss": 0.2571, "step": 22514 }, { "epoch": 0.4180110870590332, "grad_norm": 0.4585077166557312, "learning_rate": 1.254722898247831e-05, "loss": 0.185, "step": 22516 }, { "epoch": 0.41804821719645185, "grad_norm": 0.44458502531051636, "learning_rate": 1.2546100952246925e-05, "loss": 0.2868, "step": 22518 }, { "epoch": 0.4180853473338705, "grad_norm": 0.6276764273643494, "learning_rate": 1.2544972887370727e-05, "loss": 0.2004, "step": 22520 }, { "epoch": 0.4181224774712891, "grad_norm": 0.485355943441391, "learning_rate": 1.2543844787865074e-05, "loss": 0.4108, "step": 22522 }, { "epoch": 0.41815960760870774, "grad_norm": 0.27617180347442627, "learning_rate": 1.2542716653745312e-05, "loss": 0.0921, "step": 22524 }, { "epoch": 0.4181967377461264, "grad_norm": 0.36838507652282715, "learning_rate": 1.2541588485026794e-05, "loss": 0.2826, "step": 22526 }, { "epoch": 0.41823386788354505, "grad_norm": 0.33345288038253784, "learning_rate": 1.2540460281724871e-05, "loss": 0.3104, "step": 22528 }, { "epoch": 0.4182709980209637, "grad_norm": 0.4041168987751007, "learning_rate": 1.2539332043854893e-05, "loss": 0.2697, "step": 22530 }, { "epoch": 0.4183081281583823, "grad_norm": 0.2819782495498657, "learning_rate": 1.2538203771432209e-05, "loss": 0.2312, "step": 22532 }, { "epoch": 0.41834525829580094, "grad_norm": 0.7115983366966248, "learning_rate": 1.2537075464472174e-05, "loss": 0.3205, "step": 22534 }, { "epoch": 0.41838238843321957, "grad_norm": 0.20556685328483582, "learning_rate": 1.2535947122990146e-05, "loss": 0.2175, "step": 22536 }, { "epoch": 0.41841951857063825, "grad_norm": 0.30183717608451843, "learning_rate": 1.2534818747001471e-05, "loss": 0.2709, "step": 22538 }, { "epoch": 0.4184566487080569, "grad_norm": 0.31497636437416077, "learning_rate": 1.2533690336521507e-05, "loss": 0.175, "step": 22540 }, { "epoch": 0.4184937788454755, "grad_norm": 0.38196489214897156, "learning_rate": 1.2532561891565604e-05, "loss": 0.1586, "step": 22542 }, { "epoch": 0.41853090898289413, "grad_norm": 0.3567071557044983, "learning_rate": 1.253143341214912e-05, "loss": 0.0769, "step": 22544 }, { "epoch": 0.41856803912031276, "grad_norm": 0.3131732642650604, "learning_rate": 1.2530304898287411e-05, "loss": 0.3743, "step": 22546 }, { "epoch": 0.41860516925773145, "grad_norm": 0.32282838225364685, "learning_rate": 1.2529176349995833e-05, "loss": 0.3831, "step": 22548 }, { "epoch": 0.4186422993951501, "grad_norm": 0.32722216844558716, "learning_rate": 1.252804776728974e-05, "loss": 0.3444, "step": 22550 }, { "epoch": 0.4186794295325687, "grad_norm": 0.5492792129516602, "learning_rate": 1.2526919150184487e-05, "loss": 0.4061, "step": 22552 }, { "epoch": 0.41871655966998733, "grad_norm": 0.27537593245506287, "learning_rate": 1.2525790498695434e-05, "loss": 0.3589, "step": 22554 }, { "epoch": 0.41875368980740596, "grad_norm": 0.4112614095211029, "learning_rate": 1.2524661812837936e-05, "loss": 0.3125, "step": 22556 }, { "epoch": 0.4187908199448246, "grad_norm": 0.5140699744224548, "learning_rate": 1.2523533092627357e-05, "loss": 0.2321, "step": 22558 }, { "epoch": 0.41882795008224327, "grad_norm": 0.42734774947166443, "learning_rate": 1.2522404338079054e-05, "loss": 0.4071, "step": 22560 }, { "epoch": 0.4188650802196619, "grad_norm": 0.30502915382385254, "learning_rate": 1.2521275549208375e-05, "loss": 0.3844, "step": 22562 }, { "epoch": 0.41890221035708053, "grad_norm": 0.32507753372192383, "learning_rate": 1.2520146726030692e-05, "loss": 0.2428, "step": 22564 }, { "epoch": 0.41893934049449916, "grad_norm": 0.2918146252632141, "learning_rate": 1.251901786856136e-05, "loss": 0.3123, "step": 22566 }, { "epoch": 0.4189764706319178, "grad_norm": 0.5561637282371521, "learning_rate": 1.2517888976815743e-05, "loss": 0.3694, "step": 22568 }, { "epoch": 0.41901360076933647, "grad_norm": 0.3774101138114929, "learning_rate": 1.2516760050809198e-05, "loss": 0.4536, "step": 22570 }, { "epoch": 0.4190507309067551, "grad_norm": 0.4797917604446411, "learning_rate": 1.2515631090557086e-05, "loss": 0.3997, "step": 22572 }, { "epoch": 0.4190878610441737, "grad_norm": 0.27623823285102844, "learning_rate": 1.251450209607477e-05, "loss": 0.3785, "step": 22574 }, { "epoch": 0.41912499118159235, "grad_norm": 0.3862324655056, "learning_rate": 1.2513373067377613e-05, "loss": 0.2645, "step": 22576 }, { "epoch": 0.419162121319011, "grad_norm": 0.6311046481132507, "learning_rate": 1.2512244004480976e-05, "loss": 0.2798, "step": 22578 }, { "epoch": 0.41919925145642967, "grad_norm": 0.2939143478870392, "learning_rate": 1.2511114907400224e-05, "loss": 0.3434, "step": 22580 }, { "epoch": 0.4192363815938483, "grad_norm": 0.5135506987571716, "learning_rate": 1.2509985776150719e-05, "loss": 0.4155, "step": 22582 }, { "epoch": 0.4192735117312669, "grad_norm": 0.3979550898075104, "learning_rate": 1.2508856610747826e-05, "loss": 0.3452, "step": 22584 }, { "epoch": 0.41931064186868555, "grad_norm": 0.3651474714279175, "learning_rate": 1.250772741120691e-05, "loss": 0.2031, "step": 22586 }, { "epoch": 0.4193477720061042, "grad_norm": 0.33344167470932007, "learning_rate": 1.2506598177543336e-05, "loss": 0.3693, "step": 22588 }, { "epoch": 0.4193849021435228, "grad_norm": 0.3398403227329254, "learning_rate": 1.250546890977247e-05, "loss": 0.3254, "step": 22590 }, { "epoch": 0.4194220322809415, "grad_norm": 0.41044193506240845, "learning_rate": 1.2504339607909674e-05, "loss": 0.491, "step": 22592 }, { "epoch": 0.4194591624183601, "grad_norm": 0.6559653878211975, "learning_rate": 1.2503210271970319e-05, "loss": 0.391, "step": 22594 }, { "epoch": 0.41949629255577875, "grad_norm": 0.4157891869544983, "learning_rate": 1.2502080901969768e-05, "loss": 0.2938, "step": 22596 }, { "epoch": 0.4195334226931974, "grad_norm": 0.45225077867507935, "learning_rate": 1.2500951497923395e-05, "loss": 0.3571, "step": 22598 }, { "epoch": 0.419570552830616, "grad_norm": 0.42949336767196655, "learning_rate": 1.2499822059846558e-05, "loss": 0.4214, "step": 22600 }, { "epoch": 0.4196076829680347, "grad_norm": 0.468717485666275, "learning_rate": 1.2498692587754633e-05, "loss": 0.3102, "step": 22602 }, { "epoch": 0.4196448131054533, "grad_norm": 0.35520678758621216, "learning_rate": 1.2497563081662986e-05, "loss": 0.4355, "step": 22604 }, { "epoch": 0.41968194324287195, "grad_norm": 0.5001177191734314, "learning_rate": 1.2496433541586988e-05, "loss": 0.4724, "step": 22606 }, { "epoch": 0.4197190733802906, "grad_norm": 0.31420400738716125, "learning_rate": 1.2495303967542006e-05, "loss": 0.3317, "step": 22608 }, { "epoch": 0.4197562035177092, "grad_norm": 0.4527732729911804, "learning_rate": 1.249417435954341e-05, "loss": 0.248, "step": 22610 }, { "epoch": 0.41979333365512783, "grad_norm": 0.24900026619434357, "learning_rate": 1.2493044717606578e-05, "loss": 0.2381, "step": 22612 }, { "epoch": 0.4198304637925465, "grad_norm": 0.4950977563858032, "learning_rate": 1.2491915041746867e-05, "loss": 0.1906, "step": 22614 }, { "epoch": 0.41986759392996514, "grad_norm": 0.35937047004699707, "learning_rate": 1.2490785331979657e-05, "loss": 0.4202, "step": 22616 }, { "epoch": 0.41990472406738377, "grad_norm": 0.3663603961467743, "learning_rate": 1.2489655588320324e-05, "loss": 0.3175, "step": 22618 }, { "epoch": 0.4199418542048024, "grad_norm": 0.31991150975227356, "learning_rate": 1.2488525810784234e-05, "loss": 0.3025, "step": 22620 }, { "epoch": 0.41997898434222103, "grad_norm": 0.33774426579475403, "learning_rate": 1.2487395999386763e-05, "loss": 0.5691, "step": 22622 }, { "epoch": 0.4200161144796397, "grad_norm": 0.2390291839838028, "learning_rate": 1.2486266154143282e-05, "loss": 0.2675, "step": 22624 }, { "epoch": 0.42005324461705834, "grad_norm": 0.47351783514022827, "learning_rate": 1.2485136275069166e-05, "loss": 0.1541, "step": 22626 }, { "epoch": 0.42009037475447697, "grad_norm": 0.5223966240882874, "learning_rate": 1.2484006362179786e-05, "loss": 0.2042, "step": 22628 }, { "epoch": 0.4201275048918956, "grad_norm": 0.34706950187683105, "learning_rate": 1.2482876415490523e-05, "loss": 0.3336, "step": 22630 }, { "epoch": 0.4201646350293142, "grad_norm": 0.3015008866786957, "learning_rate": 1.2481746435016749e-05, "loss": 0.5155, "step": 22632 }, { "epoch": 0.42020176516673285, "grad_norm": 0.37568390369415283, "learning_rate": 1.248061642077384e-05, "loss": 0.3653, "step": 22634 }, { "epoch": 0.42023889530415154, "grad_norm": 0.3156394958496094, "learning_rate": 1.2479486372777172e-05, "loss": 0.2329, "step": 22636 }, { "epoch": 0.42027602544157017, "grad_norm": 0.41168591380119324, "learning_rate": 1.247835629104212e-05, "loss": 0.4531, "step": 22638 }, { "epoch": 0.4203131555789888, "grad_norm": 0.4071168303489685, "learning_rate": 1.2477226175584061e-05, "loss": 0.3128, "step": 22640 }, { "epoch": 0.4203502857164074, "grad_norm": 0.3467046618461609, "learning_rate": 1.2476096026418376e-05, "loss": 0.1717, "step": 22642 }, { "epoch": 0.42038741585382605, "grad_norm": 0.4623037278652191, "learning_rate": 1.2474965843560443e-05, "loss": 0.1231, "step": 22644 }, { "epoch": 0.42042454599124474, "grad_norm": 0.3470204174518585, "learning_rate": 1.2473835627025634e-05, "loss": 0.3367, "step": 22646 }, { "epoch": 0.42046167612866336, "grad_norm": 0.3762527108192444, "learning_rate": 1.2472705376829333e-05, "loss": 0.1833, "step": 22648 }, { "epoch": 0.420498806266082, "grad_norm": 0.3082398474216461, "learning_rate": 1.247157509298692e-05, "loss": 0.3068, "step": 22650 }, { "epoch": 0.4205359364035006, "grad_norm": 0.5302447080612183, "learning_rate": 1.2470444775513773e-05, "loss": 0.1479, "step": 22652 }, { "epoch": 0.42057306654091925, "grad_norm": 0.3482266962528229, "learning_rate": 1.2469314424425272e-05, "loss": 0.3229, "step": 22654 }, { "epoch": 0.42061019667833793, "grad_norm": 0.283134788274765, "learning_rate": 1.2468184039736799e-05, "loss": 0.4041, "step": 22656 }, { "epoch": 0.42064732681575656, "grad_norm": 0.5044047236442566, "learning_rate": 1.2467053621463734e-05, "loss": 0.3357, "step": 22658 }, { "epoch": 0.4206844569531752, "grad_norm": 0.31700700521469116, "learning_rate": 1.2465923169621456e-05, "loss": 0.2638, "step": 22660 }, { "epoch": 0.4207215870905938, "grad_norm": 0.41031935811042786, "learning_rate": 1.2464792684225355e-05, "loss": 0.2693, "step": 22662 }, { "epoch": 0.42075871722801245, "grad_norm": 0.45200008153915405, "learning_rate": 1.2463662165290804e-05, "loss": 0.2813, "step": 22664 }, { "epoch": 0.4207958473654311, "grad_norm": 0.27047303318977356, "learning_rate": 1.2462531612833194e-05, "loss": 0.2714, "step": 22666 }, { "epoch": 0.42083297750284976, "grad_norm": 0.28923970460891724, "learning_rate": 1.2461401026867904e-05, "loss": 0.3784, "step": 22668 }, { "epoch": 0.4208701076402684, "grad_norm": 0.17238780856132507, "learning_rate": 1.2460270407410318e-05, "loss": 0.2301, "step": 22670 }, { "epoch": 0.420907237777687, "grad_norm": 0.5400007367134094, "learning_rate": 1.2459139754475818e-05, "loss": 0.3487, "step": 22672 }, { "epoch": 0.42094436791510564, "grad_norm": 0.3310256600379944, "learning_rate": 1.2458009068079797e-05, "loss": 0.3329, "step": 22674 }, { "epoch": 0.4209814980525243, "grad_norm": 0.5805754661560059, "learning_rate": 1.2456878348237632e-05, "loss": 0.2925, "step": 22676 }, { "epoch": 0.42101862818994296, "grad_norm": 0.5921448469161987, "learning_rate": 1.2455747594964713e-05, "loss": 0.2806, "step": 22678 }, { "epoch": 0.4210557583273616, "grad_norm": 0.4316427409648895, "learning_rate": 1.2454616808276428e-05, "loss": 0.2188, "step": 22680 }, { "epoch": 0.4210928884647802, "grad_norm": 0.3620373010635376, "learning_rate": 1.2453485988188156e-05, "loss": 0.287, "step": 22682 }, { "epoch": 0.42113001860219884, "grad_norm": 0.2724496126174927, "learning_rate": 1.2452355134715291e-05, "loss": 0.4246, "step": 22684 }, { "epoch": 0.42116714873961747, "grad_norm": 0.2627083659172058, "learning_rate": 1.2451224247873217e-05, "loss": 0.2528, "step": 22686 }, { "epoch": 0.4212042788770361, "grad_norm": 0.338580459356308, "learning_rate": 1.2450093327677325e-05, "loss": 0.1854, "step": 22688 }, { "epoch": 0.4212414090144548, "grad_norm": 0.4994960427284241, "learning_rate": 1.2448962374143001e-05, "loss": 0.3067, "step": 22690 }, { "epoch": 0.4212785391518734, "grad_norm": 0.48993536829948425, "learning_rate": 1.2447831387285631e-05, "loss": 0.2692, "step": 22692 }, { "epoch": 0.42131566928929204, "grad_norm": 0.5791990160942078, "learning_rate": 1.2446700367120614e-05, "loss": 0.2534, "step": 22694 }, { "epoch": 0.42135279942671067, "grad_norm": 0.6208539605140686, "learning_rate": 1.2445569313663333e-05, "loss": 0.2369, "step": 22696 }, { "epoch": 0.4213899295641293, "grad_norm": 0.4211345314979553, "learning_rate": 1.2444438226929174e-05, "loss": 0.4635, "step": 22698 }, { "epoch": 0.421427059701548, "grad_norm": 0.47431862354278564, "learning_rate": 1.2443307106933538e-05, "loss": 0.0952, "step": 22700 }, { "epoch": 0.4214641898389666, "grad_norm": 0.4242370128631592, "learning_rate": 1.2442175953691806e-05, "loss": 0.3361, "step": 22702 }, { "epoch": 0.42150131997638524, "grad_norm": 0.36016300320625305, "learning_rate": 1.2441044767219378e-05, "loss": 0.2444, "step": 22704 }, { "epoch": 0.42153845011380386, "grad_norm": 0.2902941107749939, "learning_rate": 1.2439913547531641e-05, "loss": 0.4214, "step": 22706 }, { "epoch": 0.4215755802512225, "grad_norm": 0.43286773562431335, "learning_rate": 1.243878229464399e-05, "loss": 0.3672, "step": 22708 }, { "epoch": 0.4216127103886411, "grad_norm": 0.48602020740509033, "learning_rate": 1.2437651008571816e-05, "loss": 0.2056, "step": 22710 }, { "epoch": 0.4216498405260598, "grad_norm": 0.31060197949409485, "learning_rate": 1.2436519689330516e-05, "loss": 0.217, "step": 22712 }, { "epoch": 0.42168697066347843, "grad_norm": 0.33403152227401733, "learning_rate": 1.2435388336935476e-05, "loss": 0.2793, "step": 22714 }, { "epoch": 0.42172410080089706, "grad_norm": 0.28068020939826965, "learning_rate": 1.2434256951402097e-05, "loss": 0.0453, "step": 22716 }, { "epoch": 0.4217612309383157, "grad_norm": 0.38120952248573303, "learning_rate": 1.2433125532745776e-05, "loss": 0.388, "step": 22718 }, { "epoch": 0.4217983610757343, "grad_norm": 0.33326345682144165, "learning_rate": 1.2431994080981902e-05, "loss": 0.4343, "step": 22720 }, { "epoch": 0.421835491213153, "grad_norm": 0.3045865297317505, "learning_rate": 1.2430862596125875e-05, "loss": 0.1846, "step": 22722 }, { "epoch": 0.42187262135057163, "grad_norm": 0.36258482933044434, "learning_rate": 1.2429731078193088e-05, "loss": 0.3848, "step": 22724 }, { "epoch": 0.42190975148799026, "grad_norm": 0.4740740656852722, "learning_rate": 1.2428599527198936e-05, "loss": 0.3284, "step": 22726 }, { "epoch": 0.4219468816254089, "grad_norm": 0.40667107701301575, "learning_rate": 1.2427467943158824e-05, "loss": 0.3763, "step": 22728 }, { "epoch": 0.4219840117628275, "grad_norm": 0.37334051728248596, "learning_rate": 1.2426336326088142e-05, "loss": 0.2557, "step": 22730 }, { "epoch": 0.4220211419002462, "grad_norm": 0.4673433005809784, "learning_rate": 1.2425204676002291e-05, "loss": 0.3418, "step": 22732 }, { "epoch": 0.42205827203766483, "grad_norm": 0.28942012786865234, "learning_rate": 1.2424072992916666e-05, "loss": 0.2359, "step": 22734 }, { "epoch": 0.42209540217508346, "grad_norm": 0.3647730052471161, "learning_rate": 1.2422941276846672e-05, "loss": 0.1397, "step": 22736 }, { "epoch": 0.4221325323125021, "grad_norm": 0.9200834035873413, "learning_rate": 1.2421809527807705e-05, "loss": 0.2261, "step": 22738 }, { "epoch": 0.4221696624499207, "grad_norm": 0.29546844959259033, "learning_rate": 1.2420677745815161e-05, "loss": 0.3452, "step": 22740 }, { "epoch": 0.42220679258733934, "grad_norm": 0.3888777494430542, "learning_rate": 1.2419545930884447e-05, "loss": 0.2735, "step": 22742 }, { "epoch": 0.422243922724758, "grad_norm": 0.2747851610183716, "learning_rate": 1.2418414083030958e-05, "loss": 0.2599, "step": 22744 }, { "epoch": 0.42228105286217665, "grad_norm": 0.4163869619369507, "learning_rate": 1.2417282202270099e-05, "loss": 0.2856, "step": 22746 }, { "epoch": 0.4223181829995953, "grad_norm": 0.41317400336265564, "learning_rate": 1.2416150288617268e-05, "loss": 0.2423, "step": 22748 }, { "epoch": 0.4223553131370139, "grad_norm": 0.36191147565841675, "learning_rate": 1.2415018342087872e-05, "loss": 0.3766, "step": 22750 }, { "epoch": 0.42239244327443254, "grad_norm": 0.3260866403579712, "learning_rate": 1.2413886362697307e-05, "loss": 0.2617, "step": 22752 }, { "epoch": 0.4224295734118512, "grad_norm": 0.3463301360607147, "learning_rate": 1.2412754350460978e-05, "loss": 0.2133, "step": 22754 }, { "epoch": 0.42246670354926985, "grad_norm": 0.3350251615047455, "learning_rate": 1.2411622305394294e-05, "loss": 0.3557, "step": 22756 }, { "epoch": 0.4225038336866885, "grad_norm": 0.4382261335849762, "learning_rate": 1.241049022751265e-05, "loss": 0.3999, "step": 22758 }, { "epoch": 0.4225409638241071, "grad_norm": 0.36749404668807983, "learning_rate": 1.2409358116831456e-05, "loss": 0.4108, "step": 22760 }, { "epoch": 0.42257809396152574, "grad_norm": 0.26575928926467896, "learning_rate": 1.2408225973366117e-05, "loss": 0.2619, "step": 22762 }, { "epoch": 0.42261522409894436, "grad_norm": 0.3456897735595703, "learning_rate": 1.2407093797132033e-05, "loss": 0.333, "step": 22764 }, { "epoch": 0.42265235423636305, "grad_norm": 0.22343279421329498, "learning_rate": 1.2405961588144611e-05, "loss": 0.2969, "step": 22766 }, { "epoch": 0.4226894843737817, "grad_norm": 0.31952428817749023, "learning_rate": 1.240482934641926e-05, "loss": 0.1943, "step": 22768 }, { "epoch": 0.4227266145112003, "grad_norm": 0.33623000979423523, "learning_rate": 1.2403697071971386e-05, "loss": 0.3037, "step": 22770 }, { "epoch": 0.42276374464861893, "grad_norm": 0.438067227602005, "learning_rate": 1.2402564764816396e-05, "loss": 0.2707, "step": 22772 }, { "epoch": 0.42280087478603756, "grad_norm": 0.2880840301513672, "learning_rate": 1.2401432424969693e-05, "loss": 0.2182, "step": 22774 }, { "epoch": 0.42283800492345625, "grad_norm": 0.3437395691871643, "learning_rate": 1.240030005244669e-05, "loss": 0.2097, "step": 22776 }, { "epoch": 0.4228751350608749, "grad_norm": 0.2337070107460022, "learning_rate": 1.2399167647262791e-05, "loss": 0.3433, "step": 22778 }, { "epoch": 0.4229122651982935, "grad_norm": 0.3288826048374176, "learning_rate": 1.2398035209433407e-05, "loss": 0.2974, "step": 22780 }, { "epoch": 0.42294939533571213, "grad_norm": 0.38473883271217346, "learning_rate": 1.2396902738973951e-05, "loss": 0.2397, "step": 22782 }, { "epoch": 0.42298652547313076, "grad_norm": 0.3544543981552124, "learning_rate": 1.2395770235899821e-05, "loss": 0.1966, "step": 22784 }, { "epoch": 0.4230236556105494, "grad_norm": 0.4153810143470764, "learning_rate": 1.239463770022644e-05, "loss": 0.2791, "step": 22786 }, { "epoch": 0.42306078574796807, "grad_norm": 0.5816980600357056, "learning_rate": 1.239350513196921e-05, "loss": 0.3479, "step": 22788 }, { "epoch": 0.4230979158853867, "grad_norm": 0.22781512141227722, "learning_rate": 1.2392372531143545e-05, "loss": 0.3277, "step": 22790 }, { "epoch": 0.42313504602280533, "grad_norm": 0.37092310190200806, "learning_rate": 1.2391239897764857e-05, "loss": 0.2714, "step": 22792 }, { "epoch": 0.42317217616022396, "grad_norm": 0.3977445960044861, "learning_rate": 1.2390107231848557e-05, "loss": 0.3608, "step": 22794 }, { "epoch": 0.4232093062976426, "grad_norm": 0.38939419388771057, "learning_rate": 1.2388974533410054e-05, "loss": 0.3195, "step": 22796 }, { "epoch": 0.42324643643506127, "grad_norm": 0.4520815908908844, "learning_rate": 1.2387841802464764e-05, "loss": 0.2828, "step": 22798 }, { "epoch": 0.4232835665724799, "grad_norm": 0.37553831934928894, "learning_rate": 1.2386709039028103e-05, "loss": 0.2603, "step": 22800 }, { "epoch": 0.4233206967098985, "grad_norm": 0.3826637268066406, "learning_rate": 1.2385576243115476e-05, "loss": 0.3561, "step": 22802 }, { "epoch": 0.42335782684731715, "grad_norm": 0.5578433871269226, "learning_rate": 1.2384443414742304e-05, "loss": 0.2962, "step": 22804 }, { "epoch": 0.4233949569847358, "grad_norm": 0.28151801228523254, "learning_rate": 1.2383310553924002e-05, "loss": 0.4536, "step": 22806 }, { "epoch": 0.42343208712215447, "grad_norm": 0.4643591046333313, "learning_rate": 1.2382177660675979e-05, "loss": 0.2425, "step": 22808 }, { "epoch": 0.4234692172595731, "grad_norm": 0.29180800914764404, "learning_rate": 1.2381044735013652e-05, "loss": 0.4458, "step": 22810 }, { "epoch": 0.4235063473969917, "grad_norm": 0.47145482897758484, "learning_rate": 1.2379911776952443e-05, "loss": 0.197, "step": 22812 }, { "epoch": 0.42354347753441035, "grad_norm": 0.3320649266242981, "learning_rate": 1.2378778786507761e-05, "loss": 0.2019, "step": 22814 }, { "epoch": 0.423580607671829, "grad_norm": 0.34492138028144836, "learning_rate": 1.2377645763695024e-05, "loss": 0.3681, "step": 22816 }, { "epoch": 0.4236177378092476, "grad_norm": 0.31292101740837097, "learning_rate": 1.2376512708529649e-05, "loss": 0.3541, "step": 22818 }, { "epoch": 0.4236548679466663, "grad_norm": 0.7579625844955444, "learning_rate": 1.2375379621027057e-05, "loss": 0.254, "step": 22820 }, { "epoch": 0.4236919980840849, "grad_norm": 0.4231375753879547, "learning_rate": 1.2374246501202663e-05, "loss": 0.3349, "step": 22822 }, { "epoch": 0.42372912822150355, "grad_norm": 0.4542980492115021, "learning_rate": 1.2373113349071889e-05, "loss": 0.2038, "step": 22824 }, { "epoch": 0.4237662583589222, "grad_norm": 0.6494877934455872, "learning_rate": 1.2371980164650145e-05, "loss": 0.3464, "step": 22826 }, { "epoch": 0.4238033884963408, "grad_norm": 0.34000924229621887, "learning_rate": 1.237084694795286e-05, "loss": 0.3373, "step": 22828 }, { "epoch": 0.4238405186337595, "grad_norm": 0.2824628949165344, "learning_rate": 1.2369713698995444e-05, "loss": 0.2161, "step": 22830 }, { "epoch": 0.4238776487711781, "grad_norm": 0.26404258608818054, "learning_rate": 1.2368580417793329e-05, "loss": 0.1449, "step": 22832 }, { "epoch": 0.42391477890859675, "grad_norm": 0.29113391041755676, "learning_rate": 1.2367447104361926e-05, "loss": 0.1401, "step": 22834 }, { "epoch": 0.4239519090460154, "grad_norm": 0.31812843680381775, "learning_rate": 1.236631375871666e-05, "loss": 0.4179, "step": 22836 }, { "epoch": 0.423989039183434, "grad_norm": 0.3559657335281372, "learning_rate": 1.2365180380872952e-05, "loss": 0.2882, "step": 22838 }, { "epoch": 0.42402616932085263, "grad_norm": 0.3740849494934082, "learning_rate": 1.2364046970846221e-05, "loss": 0.3081, "step": 22840 }, { "epoch": 0.4240632994582713, "grad_norm": 0.38844820857048035, "learning_rate": 1.2362913528651894e-05, "loss": 0.3419, "step": 22842 }, { "epoch": 0.42410042959568994, "grad_norm": 0.3080359995365143, "learning_rate": 1.2361780054305392e-05, "loss": 0.3599, "step": 22844 }, { "epoch": 0.42413755973310857, "grad_norm": 0.2991790175437927, "learning_rate": 1.2360646547822139e-05, "loss": 0.1865, "step": 22846 }, { "epoch": 0.4241746898705272, "grad_norm": 0.3395121097564697, "learning_rate": 1.2359513009217556e-05, "loss": 0.3303, "step": 22848 }, { "epoch": 0.42421182000794583, "grad_norm": 0.28079748153686523, "learning_rate": 1.2358379438507069e-05, "loss": 0.4024, "step": 22850 }, { "epoch": 0.4242489501453645, "grad_norm": 0.29016152024269104, "learning_rate": 1.23572458357061e-05, "loss": 0.2816, "step": 22852 }, { "epoch": 0.42428608028278314, "grad_norm": 0.36855748295783997, "learning_rate": 1.2356112200830077e-05, "loss": 0.2084, "step": 22854 }, { "epoch": 0.42432321042020177, "grad_norm": 1.0051796436309814, "learning_rate": 1.2354978533894424e-05, "loss": 0.2515, "step": 22856 }, { "epoch": 0.4243603405576204, "grad_norm": 0.4385787844657898, "learning_rate": 1.235384483491457e-05, "loss": 0.3012, "step": 22858 }, { "epoch": 0.424397470695039, "grad_norm": 0.4704073667526245, "learning_rate": 1.2352711103905934e-05, "loss": 0.3389, "step": 22860 }, { "epoch": 0.42443460083245765, "grad_norm": 0.37719833850860596, "learning_rate": 1.2351577340883948e-05, "loss": 0.2799, "step": 22862 }, { "epoch": 0.42447173096987634, "grad_norm": 0.3356054723262787, "learning_rate": 1.235044354586404e-05, "loss": 0.3124, "step": 22864 }, { "epoch": 0.42450886110729497, "grad_norm": 0.4583863914012909, "learning_rate": 1.2349309718861638e-05, "loss": 0.1776, "step": 22866 }, { "epoch": 0.4245459912447136, "grad_norm": 0.4187314212322235, "learning_rate": 1.2348175859892164e-05, "loss": 0.3005, "step": 22868 }, { "epoch": 0.4245831213821322, "grad_norm": 0.4396700859069824, "learning_rate": 1.2347041968971052e-05, "loss": 0.4134, "step": 22870 }, { "epoch": 0.42462025151955085, "grad_norm": 0.2254665642976761, "learning_rate": 1.2345908046113726e-05, "loss": 0.1278, "step": 22872 }, { "epoch": 0.42465738165696953, "grad_norm": 0.35500895977020264, "learning_rate": 1.2344774091335618e-05, "loss": 0.1753, "step": 22874 }, { "epoch": 0.42469451179438816, "grad_norm": 0.6753424406051636, "learning_rate": 1.2343640104652163e-05, "loss": 0.2017, "step": 22876 }, { "epoch": 0.4247316419318068, "grad_norm": 0.4725152850151062, "learning_rate": 1.2342506086078785e-05, "loss": 0.2733, "step": 22878 }, { "epoch": 0.4247687720692254, "grad_norm": 0.40250149369239807, "learning_rate": 1.2341372035630912e-05, "loss": 0.2067, "step": 22880 }, { "epoch": 0.42480590220664405, "grad_norm": 0.3511542081832886, "learning_rate": 1.2340237953323983e-05, "loss": 0.1954, "step": 22882 }, { "epoch": 0.42484303234406273, "grad_norm": 0.4057621955871582, "learning_rate": 1.2339103839173423e-05, "loss": 0.2613, "step": 22884 }, { "epoch": 0.42488016248148136, "grad_norm": 0.4731575846672058, "learning_rate": 1.2337969693194665e-05, "loss": 0.3361, "step": 22886 }, { "epoch": 0.4249172926189, "grad_norm": 0.44400572776794434, "learning_rate": 1.2336835515403147e-05, "loss": 0.2467, "step": 22888 }, { "epoch": 0.4249544227563186, "grad_norm": 0.38770273327827454, "learning_rate": 1.2335701305814294e-05, "loss": 0.2607, "step": 22890 }, { "epoch": 0.42499155289373725, "grad_norm": 0.3193565309047699, "learning_rate": 1.233456706444354e-05, "loss": 0.197, "step": 22892 }, { "epoch": 0.4250286830311559, "grad_norm": 0.39757582545280457, "learning_rate": 1.2333432791306323e-05, "loss": 0.3951, "step": 22894 }, { "epoch": 0.42506581316857456, "grad_norm": 0.4024692475795746, "learning_rate": 1.2332298486418077e-05, "loss": 0.3592, "step": 22896 }, { "epoch": 0.4251029433059932, "grad_norm": 0.3067563772201538, "learning_rate": 1.2331164149794234e-05, "loss": 0.437, "step": 22898 }, { "epoch": 0.4251400734434118, "grad_norm": 0.2389126867055893, "learning_rate": 1.233002978145023e-05, "loss": 0.2956, "step": 22900 }, { "epoch": 0.42517720358083044, "grad_norm": 0.3209879994392395, "learning_rate": 1.23288953814015e-05, "loss": 0.2473, "step": 22902 }, { "epoch": 0.42521433371824907, "grad_norm": 0.9317252039909363, "learning_rate": 1.2327760949663477e-05, "loss": 0.2747, "step": 22904 }, { "epoch": 0.42525146385566776, "grad_norm": 0.38047611713409424, "learning_rate": 1.23266264862516e-05, "loss": 0.2258, "step": 22906 }, { "epoch": 0.4252885939930864, "grad_norm": 0.23447075486183167, "learning_rate": 1.2325491991181309e-05, "loss": 0.194, "step": 22908 }, { "epoch": 0.425325724130505, "grad_norm": 0.325809121131897, "learning_rate": 1.2324357464468037e-05, "loss": 0.3452, "step": 22910 }, { "epoch": 0.42536285426792364, "grad_norm": 0.2812923491001129, "learning_rate": 1.2323222906127222e-05, "loss": 0.2049, "step": 22912 }, { "epoch": 0.42539998440534227, "grad_norm": 0.29659613966941833, "learning_rate": 1.2322088316174301e-05, "loss": 0.1808, "step": 22914 }, { "epoch": 0.4254371145427609, "grad_norm": 0.22950735688209534, "learning_rate": 1.2320953694624712e-05, "loss": 0.3864, "step": 22916 }, { "epoch": 0.4254742446801796, "grad_norm": 0.2875741124153137, "learning_rate": 1.2319819041493897e-05, "loss": 0.3914, "step": 22918 }, { "epoch": 0.4255113748175982, "grad_norm": 0.2793431878089905, "learning_rate": 1.2318684356797297e-05, "loss": 0.3962, "step": 22920 }, { "epoch": 0.42554850495501684, "grad_norm": 0.3640422821044922, "learning_rate": 1.2317549640550344e-05, "loss": 0.2095, "step": 22922 }, { "epoch": 0.42558563509243547, "grad_norm": 0.4431861937046051, "learning_rate": 1.2316414892768482e-05, "loss": 0.2884, "step": 22924 }, { "epoch": 0.4256227652298541, "grad_norm": 0.4358106851577759, "learning_rate": 1.2315280113467152e-05, "loss": 0.2747, "step": 22926 }, { "epoch": 0.4256598953672728, "grad_norm": 0.30239108204841614, "learning_rate": 1.2314145302661795e-05, "loss": 0.3204, "step": 22928 }, { "epoch": 0.4256970255046914, "grad_norm": 0.40962329506874084, "learning_rate": 1.2313010460367853e-05, "loss": 0.3144, "step": 22930 }, { "epoch": 0.42573415564211003, "grad_norm": 0.28338727355003357, "learning_rate": 1.2311875586600768e-05, "loss": 0.2308, "step": 22932 }, { "epoch": 0.42577128577952866, "grad_norm": 0.3081931173801422, "learning_rate": 1.231074068137598e-05, "loss": 0.3556, "step": 22934 }, { "epoch": 0.4258084159169473, "grad_norm": 0.4209566116333008, "learning_rate": 1.230960574470893e-05, "loss": 0.4912, "step": 22936 }, { "epoch": 0.4258455460543659, "grad_norm": 0.506011962890625, "learning_rate": 1.2308470776615065e-05, "loss": 0.1949, "step": 22938 }, { "epoch": 0.4258826761917846, "grad_norm": 0.6085726022720337, "learning_rate": 1.2307335777109831e-05, "loss": 0.4255, "step": 22940 }, { "epoch": 0.42591980632920323, "grad_norm": 0.42970091104507446, "learning_rate": 1.2306200746208668e-05, "loss": 0.2556, "step": 22942 }, { "epoch": 0.42595693646662186, "grad_norm": 0.3490724563598633, "learning_rate": 1.2305065683927015e-05, "loss": 0.3008, "step": 22944 }, { "epoch": 0.4259940666040405, "grad_norm": 0.3265831470489502, "learning_rate": 1.2303930590280328e-05, "loss": 0.4057, "step": 22946 }, { "epoch": 0.4260311967414591, "grad_norm": 0.4583674967288971, "learning_rate": 1.2302795465284046e-05, "loss": 0.1731, "step": 22948 }, { "epoch": 0.4260683268788778, "grad_norm": 0.3717461824417114, "learning_rate": 1.2301660308953614e-05, "loss": 0.3712, "step": 22950 }, { "epoch": 0.42610545701629643, "grad_norm": 0.25856301188468933, "learning_rate": 1.2300525121304481e-05, "loss": 0.1683, "step": 22952 }, { "epoch": 0.42614258715371506, "grad_norm": 0.25912532210350037, "learning_rate": 1.229938990235209e-05, "loss": 0.2344, "step": 22954 }, { "epoch": 0.4261797172911337, "grad_norm": 0.41414007544517517, "learning_rate": 1.2298254652111889e-05, "loss": 0.3619, "step": 22956 }, { "epoch": 0.4262168474285523, "grad_norm": 0.2439611554145813, "learning_rate": 1.2297119370599328e-05, "loss": 0.1103, "step": 22958 }, { "epoch": 0.426253977565971, "grad_norm": 0.18834145367145538, "learning_rate": 1.2295984057829853e-05, "loss": 0.1764, "step": 22960 }, { "epoch": 0.4262911077033896, "grad_norm": 0.5146392583847046, "learning_rate": 1.2294848713818914e-05, "loss": 0.1639, "step": 22962 }, { "epoch": 0.42632823784080826, "grad_norm": 0.37873223423957825, "learning_rate": 1.2293713338581957e-05, "loss": 0.3562, "step": 22964 }, { "epoch": 0.4263653679782269, "grad_norm": 0.35179558396339417, "learning_rate": 1.229257793213443e-05, "loss": 0.3914, "step": 22966 }, { "epoch": 0.4264024981156455, "grad_norm": 0.33815863728523254, "learning_rate": 1.2291442494491786e-05, "loss": 0.154, "step": 22968 }, { "epoch": 0.42643962825306414, "grad_norm": 0.4831031858921051, "learning_rate": 1.2290307025669473e-05, "loss": 0.2434, "step": 22970 }, { "epoch": 0.4264767583904828, "grad_norm": 0.4986487925052643, "learning_rate": 1.228917152568294e-05, "loss": 0.2098, "step": 22972 }, { "epoch": 0.42651388852790145, "grad_norm": 0.38299787044525146, "learning_rate": 1.2288035994547642e-05, "loss": 0.2932, "step": 22974 }, { "epoch": 0.4265510186653201, "grad_norm": 0.28940391540527344, "learning_rate": 1.2286900432279025e-05, "loss": 0.3772, "step": 22976 }, { "epoch": 0.4265881488027387, "grad_norm": 0.24480387568473816, "learning_rate": 1.2285764838892544e-05, "loss": 0.1235, "step": 22978 }, { "epoch": 0.42662527894015734, "grad_norm": 0.6749100089073181, "learning_rate": 1.2284629214403651e-05, "loss": 0.1893, "step": 22980 }, { "epoch": 0.426662409077576, "grad_norm": 0.5375548601150513, "learning_rate": 1.22834935588278e-05, "loss": 0.4104, "step": 22982 }, { "epoch": 0.42669953921499465, "grad_norm": 0.30733543634414673, "learning_rate": 1.228235787218044e-05, "loss": 0.264, "step": 22984 }, { "epoch": 0.4267366693524133, "grad_norm": 0.338520348072052, "learning_rate": 1.2281222154477021e-05, "loss": 0.3547, "step": 22986 }, { "epoch": 0.4267737994898319, "grad_norm": 0.27566567063331604, "learning_rate": 1.2280086405733005e-05, "loss": 0.1892, "step": 22988 }, { "epoch": 0.42681092962725053, "grad_norm": 0.26056286692619324, "learning_rate": 1.2278950625963843e-05, "loss": 0.2179, "step": 22990 }, { "epoch": 0.42684805976466916, "grad_norm": 0.3909897208213806, "learning_rate": 1.2277814815184989e-05, "loss": 0.4415, "step": 22992 }, { "epoch": 0.42688518990208785, "grad_norm": 0.3203113377094269, "learning_rate": 1.22766789734119e-05, "loss": 0.3038, "step": 22994 }, { "epoch": 0.4269223200395065, "grad_norm": 0.4672233760356903, "learning_rate": 1.227554310066003e-05, "loss": 0.2978, "step": 22996 }, { "epoch": 0.4269594501769251, "grad_norm": 0.4107931852340698, "learning_rate": 1.2274407196944831e-05, "loss": 0.2269, "step": 22998 }, { "epoch": 0.42699658031434373, "grad_norm": 0.40406060218811035, "learning_rate": 1.2273271262281762e-05, "loss": 0.2408, "step": 23000 }, { "epoch": 0.42703371045176236, "grad_norm": 0.39488640427589417, "learning_rate": 1.2272135296686285e-05, "loss": 0.2529, "step": 23002 }, { "epoch": 0.42707084058918104, "grad_norm": 0.3654564619064331, "learning_rate": 1.2270999300173849e-05, "loss": 0.3488, "step": 23004 }, { "epoch": 0.4271079707265997, "grad_norm": 0.6523936986923218, "learning_rate": 1.2269863272759916e-05, "loss": 0.3418, "step": 23006 }, { "epoch": 0.4271451008640183, "grad_norm": 0.547085165977478, "learning_rate": 1.2268727214459944e-05, "loss": 0.4111, "step": 23008 }, { "epoch": 0.42718223100143693, "grad_norm": 0.3856388330459595, "learning_rate": 1.2267591125289388e-05, "loss": 0.1991, "step": 23010 }, { "epoch": 0.42721936113885556, "grad_norm": 0.38456717133522034, "learning_rate": 1.2266455005263709e-05, "loss": 0.1407, "step": 23012 }, { "epoch": 0.4272564912762742, "grad_norm": 0.612034022808075, "learning_rate": 1.2265318854398366e-05, "loss": 0.3702, "step": 23014 }, { "epoch": 0.42729362141369287, "grad_norm": 0.40548208355903625, "learning_rate": 1.2264182672708823e-05, "loss": 0.3399, "step": 23016 }, { "epoch": 0.4273307515511115, "grad_norm": 0.3133481442928314, "learning_rate": 1.2263046460210532e-05, "loss": 0.3199, "step": 23018 }, { "epoch": 0.4273678816885301, "grad_norm": 0.42254459857940674, "learning_rate": 1.2261910216918957e-05, "loss": 0.2897, "step": 23020 }, { "epoch": 0.42740501182594876, "grad_norm": 0.33620908856391907, "learning_rate": 1.2260773942849562e-05, "loss": 0.4578, "step": 23022 }, { "epoch": 0.4274421419633674, "grad_norm": 0.335371196269989, "learning_rate": 1.2259637638017803e-05, "loss": 0.2779, "step": 23024 }, { "epoch": 0.42747927210078607, "grad_norm": 0.3532801866531372, "learning_rate": 1.2258501302439145e-05, "loss": 0.1636, "step": 23026 }, { "epoch": 0.4275164022382047, "grad_norm": 0.2666946053504944, "learning_rate": 1.225736493612905e-05, "loss": 0.2908, "step": 23028 }, { "epoch": 0.4275535323756233, "grad_norm": 0.2571125328540802, "learning_rate": 1.2256228539102979e-05, "loss": 0.3319, "step": 23030 }, { "epoch": 0.42759066251304195, "grad_norm": 0.42697784304618835, "learning_rate": 1.2255092111376395e-05, "loss": 0.1265, "step": 23032 }, { "epoch": 0.4276277926504606, "grad_norm": 0.3501487672328949, "learning_rate": 1.2253955652964762e-05, "loss": 0.3816, "step": 23034 }, { "epoch": 0.42766492278787926, "grad_norm": 0.4579354226589203, "learning_rate": 1.2252819163883548e-05, "loss": 0.2369, "step": 23036 }, { "epoch": 0.4277020529252979, "grad_norm": 0.3472498059272766, "learning_rate": 1.2251682644148208e-05, "loss": 0.3058, "step": 23038 }, { "epoch": 0.4277391830627165, "grad_norm": 0.4118129312992096, "learning_rate": 1.2250546093774214e-05, "loss": 0.2865, "step": 23040 }, { "epoch": 0.42777631320013515, "grad_norm": 0.37376001477241516, "learning_rate": 1.2249409512777029e-05, "loss": 0.1539, "step": 23042 }, { "epoch": 0.4278134433375538, "grad_norm": 0.4816228747367859, "learning_rate": 1.2248272901172116e-05, "loss": 0.3309, "step": 23044 }, { "epoch": 0.4278505734749724, "grad_norm": 0.3830767869949341, "learning_rate": 1.2247136258974945e-05, "loss": 0.413, "step": 23046 }, { "epoch": 0.4278877036123911, "grad_norm": 0.44802048802375793, "learning_rate": 1.2245999586200982e-05, "loss": 0.3644, "step": 23048 }, { "epoch": 0.4279248337498097, "grad_norm": 0.28901875019073486, "learning_rate": 1.2244862882865691e-05, "loss": 0.2763, "step": 23050 }, { "epoch": 0.42796196388722835, "grad_norm": 0.7705404758453369, "learning_rate": 1.2243726148984541e-05, "loss": 0.4502, "step": 23052 }, { "epoch": 0.427999094024647, "grad_norm": 0.29758206009864807, "learning_rate": 1.2242589384572996e-05, "loss": 0.1601, "step": 23054 }, { "epoch": 0.4280362241620656, "grad_norm": 0.4036569595336914, "learning_rate": 1.2241452589646528e-05, "loss": 0.3428, "step": 23056 }, { "epoch": 0.4280733542994843, "grad_norm": 0.416521817445755, "learning_rate": 1.2240315764220604e-05, "loss": 0.1801, "step": 23058 }, { "epoch": 0.4281104844369029, "grad_norm": 0.40393149852752686, "learning_rate": 1.2239178908310697e-05, "loss": 0.3821, "step": 23060 }, { "epoch": 0.42814761457432154, "grad_norm": 0.42451438307762146, "learning_rate": 1.2238042021932268e-05, "loss": 0.1351, "step": 23062 }, { "epoch": 0.4281847447117402, "grad_norm": 0.29696065187454224, "learning_rate": 1.223690510510079e-05, "loss": 0.1528, "step": 23064 }, { "epoch": 0.4282218748491588, "grad_norm": 0.37280505895614624, "learning_rate": 1.2235768157831738e-05, "loss": 0.2149, "step": 23066 }, { "epoch": 0.42825900498657743, "grad_norm": 0.33905187249183655, "learning_rate": 1.2234631180140575e-05, "loss": 0.3213, "step": 23068 }, { "epoch": 0.4282961351239961, "grad_norm": 0.36966705322265625, "learning_rate": 1.2233494172042777e-05, "loss": 0.3788, "step": 23070 }, { "epoch": 0.42833326526141474, "grad_norm": 0.2549493610858917, "learning_rate": 1.2232357133553812e-05, "loss": 0.2205, "step": 23072 }, { "epoch": 0.42837039539883337, "grad_norm": 0.4327949285507202, "learning_rate": 1.2231220064689153e-05, "loss": 0.4365, "step": 23074 }, { "epoch": 0.428407525536252, "grad_norm": 0.2787284553050995, "learning_rate": 1.2230082965464271e-05, "loss": 0.3685, "step": 23076 }, { "epoch": 0.4284446556736706, "grad_norm": 0.3058987855911255, "learning_rate": 1.2228945835894644e-05, "loss": 0.3338, "step": 23078 }, { "epoch": 0.4284817858110893, "grad_norm": 0.39583471417427063, "learning_rate": 1.2227808675995738e-05, "loss": 0.2665, "step": 23080 }, { "epoch": 0.42851891594850794, "grad_norm": 0.34660831093788147, "learning_rate": 1.2226671485783028e-05, "loss": 0.3431, "step": 23082 }, { "epoch": 0.42855604608592657, "grad_norm": 0.349342405796051, "learning_rate": 1.222553426527199e-05, "loss": 0.2833, "step": 23084 }, { "epoch": 0.4285931762233452, "grad_norm": 0.39499565958976746, "learning_rate": 1.2224397014478098e-05, "loss": 0.3762, "step": 23086 }, { "epoch": 0.4286303063607638, "grad_norm": 0.46083858609199524, "learning_rate": 1.2223259733416823e-05, "loss": 0.2188, "step": 23088 }, { "epoch": 0.42866743649818245, "grad_norm": 3.4553961753845215, "learning_rate": 1.2222122422103646e-05, "loss": 0.3337, "step": 23090 }, { "epoch": 0.42870456663560114, "grad_norm": 0.4417554438114166, "learning_rate": 1.2220985080554035e-05, "loss": 0.2312, "step": 23092 }, { "epoch": 0.42874169677301976, "grad_norm": 0.24031218886375427, "learning_rate": 1.221984770878347e-05, "loss": 0.3668, "step": 23094 }, { "epoch": 0.4287788269104384, "grad_norm": 0.3566148579120636, "learning_rate": 1.2218710306807429e-05, "loss": 0.3607, "step": 23096 }, { "epoch": 0.428815957047857, "grad_norm": 0.43841347098350525, "learning_rate": 1.2217572874641386e-05, "loss": 0.3739, "step": 23098 }, { "epoch": 0.42885308718527565, "grad_norm": 0.3723064363002777, "learning_rate": 1.2216435412300818e-05, "loss": 0.3829, "step": 23100 }, { "epoch": 0.42889021732269433, "grad_norm": 0.36878836154937744, "learning_rate": 1.22152979198012e-05, "loss": 0.116, "step": 23102 }, { "epoch": 0.42892734746011296, "grad_norm": 0.4086867868900299, "learning_rate": 1.221416039715802e-05, "loss": 0.1755, "step": 23104 }, { "epoch": 0.4289644775975316, "grad_norm": 0.7433109283447266, "learning_rate": 1.2213022844386743e-05, "loss": 0.2151, "step": 23106 }, { "epoch": 0.4290016077349502, "grad_norm": 0.3579424321651459, "learning_rate": 1.2211885261502855e-05, "loss": 0.329, "step": 23108 }, { "epoch": 0.42903873787236885, "grad_norm": 0.5112415552139282, "learning_rate": 1.2210747648521837e-05, "loss": 0.2966, "step": 23110 }, { "epoch": 0.42907586800978753, "grad_norm": 0.3081870377063751, "learning_rate": 1.2209610005459164e-05, "loss": 0.2573, "step": 23112 }, { "epoch": 0.42911299814720616, "grad_norm": 0.32011377811431885, "learning_rate": 1.2208472332330318e-05, "loss": 0.2732, "step": 23114 }, { "epoch": 0.4291501282846248, "grad_norm": 0.5502941012382507, "learning_rate": 1.2207334629150776e-05, "loss": 0.445, "step": 23116 }, { "epoch": 0.4291872584220434, "grad_norm": 0.43634793162345886, "learning_rate": 1.2206196895936023e-05, "loss": 0.3416, "step": 23118 }, { "epoch": 0.42922438855946204, "grad_norm": 0.3453655242919922, "learning_rate": 1.220505913270154e-05, "loss": 0.2658, "step": 23120 }, { "epoch": 0.4292615186968807, "grad_norm": 0.35604044795036316, "learning_rate": 1.2203921339462805e-05, "loss": 0.3373, "step": 23122 }, { "epoch": 0.42929864883429936, "grad_norm": 0.401803195476532, "learning_rate": 1.2202783516235303e-05, "loss": 0.2712, "step": 23124 }, { "epoch": 0.429335778971718, "grad_norm": 0.3024548292160034, "learning_rate": 1.2201645663034514e-05, "loss": 0.4558, "step": 23126 }, { "epoch": 0.4293729091091366, "grad_norm": 0.8156460523605347, "learning_rate": 1.2200507779875925e-05, "loss": 0.375, "step": 23128 }, { "epoch": 0.42941003924655524, "grad_norm": 0.3897463381290436, "learning_rate": 1.2199369866775012e-05, "loss": 0.4592, "step": 23130 }, { "epoch": 0.42944716938397387, "grad_norm": 0.3447442054748535, "learning_rate": 1.2198231923747267e-05, "loss": 0.362, "step": 23132 }, { "epoch": 0.42948429952139255, "grad_norm": 0.4403630495071411, "learning_rate": 1.2197093950808173e-05, "loss": 0.3008, "step": 23134 }, { "epoch": 0.4295214296588112, "grad_norm": 0.33020108938217163, "learning_rate": 1.2195955947973203e-05, "loss": 0.3821, "step": 23136 }, { "epoch": 0.4295585597962298, "grad_norm": 0.3349105417728424, "learning_rate": 1.2194817915257855e-05, "loss": 0.1456, "step": 23138 }, { "epoch": 0.42959568993364844, "grad_norm": 0.3247455060482025, "learning_rate": 1.219367985267761e-05, "loss": 0.2925, "step": 23140 }, { "epoch": 0.42963282007106707, "grad_norm": 0.2528064548969269, "learning_rate": 1.2192541760247953e-05, "loss": 0.3945, "step": 23142 }, { "epoch": 0.4296699502084857, "grad_norm": 0.31630587577819824, "learning_rate": 1.2191403637984367e-05, "loss": 0.2378, "step": 23144 }, { "epoch": 0.4297070803459044, "grad_norm": 0.23515333235263824, "learning_rate": 1.2190265485902343e-05, "loss": 0.2519, "step": 23146 }, { "epoch": 0.429744210483323, "grad_norm": 0.45384711027145386, "learning_rate": 1.2189127304017367e-05, "loss": 0.4155, "step": 23148 }, { "epoch": 0.42978134062074164, "grad_norm": 0.28183314204216003, "learning_rate": 1.2187989092344925e-05, "loss": 0.2654, "step": 23150 }, { "epoch": 0.42981847075816026, "grad_norm": 0.5351614356040955, "learning_rate": 1.2186850850900507e-05, "loss": 0.3583, "step": 23152 }, { "epoch": 0.4298556008955789, "grad_norm": 0.34302783012390137, "learning_rate": 1.2185712579699598e-05, "loss": 0.1633, "step": 23154 }, { "epoch": 0.4298927310329976, "grad_norm": 0.18280485272407532, "learning_rate": 1.2184574278757688e-05, "loss": 0.1829, "step": 23156 }, { "epoch": 0.4299298611704162, "grad_norm": 0.44055184721946716, "learning_rate": 1.2183435948090263e-05, "loss": 0.2283, "step": 23158 }, { "epoch": 0.42996699130783483, "grad_norm": 0.3919624090194702, "learning_rate": 1.218229758771282e-05, "loss": 0.2851, "step": 23160 }, { "epoch": 0.43000412144525346, "grad_norm": 0.32525837421417236, "learning_rate": 1.2181159197640838e-05, "loss": 0.4172, "step": 23162 }, { "epoch": 0.4300412515826721, "grad_norm": 0.436370313167572, "learning_rate": 1.2180020777889815e-05, "loss": 0.2238, "step": 23164 }, { "epoch": 0.4300783817200907, "grad_norm": 0.3954935073852539, "learning_rate": 1.2178882328475244e-05, "loss": 0.3807, "step": 23166 }, { "epoch": 0.4301155118575094, "grad_norm": 0.3737346827983856, "learning_rate": 1.2177743849412605e-05, "loss": 0.3215, "step": 23168 }, { "epoch": 0.43015264199492803, "grad_norm": 0.5883674025535583, "learning_rate": 1.2176605340717395e-05, "loss": 0.4364, "step": 23170 }, { "epoch": 0.43018977213234666, "grad_norm": 0.391816109418869, "learning_rate": 1.217546680240511e-05, "loss": 0.2544, "step": 23172 }, { "epoch": 0.4302269022697653, "grad_norm": 0.5665327310562134, "learning_rate": 1.2174328234491235e-05, "loss": 0.251, "step": 23174 }, { "epoch": 0.4302640324071839, "grad_norm": 0.36060065031051636, "learning_rate": 1.2173189636991266e-05, "loss": 0.2896, "step": 23176 }, { "epoch": 0.4303011625446026, "grad_norm": 0.40805038809776306, "learning_rate": 1.2172051009920698e-05, "loss": 0.5056, "step": 23178 }, { "epoch": 0.43033829268202123, "grad_norm": 0.29368507862091064, "learning_rate": 1.2170912353295017e-05, "loss": 0.2301, "step": 23180 }, { "epoch": 0.43037542281943986, "grad_norm": 0.2922075688838959, "learning_rate": 1.2169773667129725e-05, "loss": 0.3532, "step": 23182 }, { "epoch": 0.4304125529568585, "grad_norm": 0.374648779630661, "learning_rate": 1.2168634951440312e-05, "loss": 0.2379, "step": 23184 }, { "epoch": 0.4304496830942771, "grad_norm": 0.38597720861434937, "learning_rate": 1.2167496206242276e-05, "loss": 0.3382, "step": 23186 }, { "epoch": 0.4304868132316958, "grad_norm": 0.3847152292728424, "learning_rate": 1.2166357431551103e-05, "loss": 0.3534, "step": 23188 }, { "epoch": 0.4305239433691144, "grad_norm": 0.40399467945098877, "learning_rate": 1.2165218627382296e-05, "loss": 0.3624, "step": 23190 }, { "epoch": 0.43056107350653305, "grad_norm": 0.3807084560394287, "learning_rate": 1.2164079793751353e-05, "loss": 0.2474, "step": 23192 }, { "epoch": 0.4305982036439517, "grad_norm": 0.25879350304603577, "learning_rate": 1.216294093067376e-05, "loss": 0.2076, "step": 23194 }, { "epoch": 0.4306353337813703, "grad_norm": 0.36035412549972534, "learning_rate": 1.2161802038165028e-05, "loss": 0.3596, "step": 23196 }, { "epoch": 0.43067246391878894, "grad_norm": 0.39120063185691833, "learning_rate": 1.2160663116240641e-05, "loss": 0.3354, "step": 23198 }, { "epoch": 0.4307095940562076, "grad_norm": 0.2907465696334839, "learning_rate": 1.2159524164916099e-05, "loss": 0.2905, "step": 23200 }, { "epoch": 0.43074672419362625, "grad_norm": 0.4556128978729248, "learning_rate": 1.2158385184206902e-05, "loss": 0.3241, "step": 23202 }, { "epoch": 0.4307838543310449, "grad_norm": 0.4247533082962036, "learning_rate": 1.2157246174128553e-05, "loss": 0.3166, "step": 23204 }, { "epoch": 0.4308209844684635, "grad_norm": 0.47747036814689636, "learning_rate": 1.2156107134696543e-05, "loss": 0.182, "step": 23206 }, { "epoch": 0.43085811460588214, "grad_norm": 0.4764822721481323, "learning_rate": 1.2154968065926369e-05, "loss": 0.3656, "step": 23208 }, { "epoch": 0.4308952447433008, "grad_norm": 0.3703691065311432, "learning_rate": 1.2153828967833539e-05, "loss": 0.2498, "step": 23210 }, { "epoch": 0.43093237488071945, "grad_norm": 0.3465287387371063, "learning_rate": 1.2152689840433545e-05, "loss": 0.354, "step": 23212 }, { "epoch": 0.4309695050181381, "grad_norm": 0.42818284034729004, "learning_rate": 1.215155068374189e-05, "loss": 0.4737, "step": 23214 }, { "epoch": 0.4310066351555567, "grad_norm": 0.37709662318229675, "learning_rate": 1.2150411497774077e-05, "loss": 0.2949, "step": 23216 }, { "epoch": 0.43104376529297533, "grad_norm": 0.4030877649784088, "learning_rate": 1.2149272282545608e-05, "loss": 0.2289, "step": 23218 }, { "epoch": 0.43108089543039396, "grad_norm": 0.3285463750362396, "learning_rate": 1.2148133038071976e-05, "loss": 0.1606, "step": 23220 }, { "epoch": 0.43111802556781265, "grad_norm": 0.2906420826911926, "learning_rate": 1.2146993764368688e-05, "loss": 0.2696, "step": 23222 }, { "epoch": 0.4311551557052313, "grad_norm": 0.319789320230484, "learning_rate": 1.2145854461451248e-05, "loss": 0.2895, "step": 23224 }, { "epoch": 0.4311922858426499, "grad_norm": 0.5244007110595703, "learning_rate": 1.2144715129335157e-05, "loss": 0.315, "step": 23226 }, { "epoch": 0.43122941598006853, "grad_norm": 0.35446733236312866, "learning_rate": 1.2143575768035914e-05, "loss": 0.4828, "step": 23228 }, { "epoch": 0.43126654611748716, "grad_norm": 0.6110371351242065, "learning_rate": 1.2142436377569027e-05, "loss": 0.291, "step": 23230 }, { "epoch": 0.43130367625490584, "grad_norm": 0.38392752408981323, "learning_rate": 1.2141296957949997e-05, "loss": 0.3001, "step": 23232 }, { "epoch": 0.43134080639232447, "grad_norm": 0.2700739800930023, "learning_rate": 1.214015750919433e-05, "loss": 0.261, "step": 23234 }, { "epoch": 0.4313779365297431, "grad_norm": 0.38205358386039734, "learning_rate": 1.2139018031317533e-05, "loss": 0.2886, "step": 23236 }, { "epoch": 0.43141506666716173, "grad_norm": 0.42201003432273865, "learning_rate": 1.2137878524335106e-05, "loss": 0.2207, "step": 23238 }, { "epoch": 0.43145219680458036, "grad_norm": 0.4867798686027527, "learning_rate": 1.2136738988262555e-05, "loss": 0.2635, "step": 23240 }, { "epoch": 0.431489326941999, "grad_norm": 0.38359594345092773, "learning_rate": 1.2135599423115387e-05, "loss": 0.3509, "step": 23242 }, { "epoch": 0.43152645707941767, "grad_norm": 0.33045750856399536, "learning_rate": 1.2134459828909107e-05, "loss": 0.5437, "step": 23244 }, { "epoch": 0.4315635872168363, "grad_norm": 0.35055261850357056, "learning_rate": 1.2133320205659223e-05, "loss": 0.2281, "step": 23246 }, { "epoch": 0.4316007173542549, "grad_norm": 0.4190187156200409, "learning_rate": 1.2132180553381242e-05, "loss": 0.3006, "step": 23248 }, { "epoch": 0.43163784749167355, "grad_norm": 0.5887752771377563, "learning_rate": 1.213104087209067e-05, "loss": 0.2004, "step": 23250 }, { "epoch": 0.4316749776290922, "grad_norm": 0.2555970847606659, "learning_rate": 1.2129901161803013e-05, "loss": 0.167, "step": 23252 }, { "epoch": 0.43171210776651087, "grad_norm": 0.5476435422897339, "learning_rate": 1.2128761422533781e-05, "loss": 0.5467, "step": 23254 }, { "epoch": 0.4317492379039295, "grad_norm": 0.2246808111667633, "learning_rate": 1.2127621654298482e-05, "loss": 0.3526, "step": 23256 }, { "epoch": 0.4317863680413481, "grad_norm": 0.4039890170097351, "learning_rate": 1.2126481857112629e-05, "loss": 0.3415, "step": 23258 }, { "epoch": 0.43182349817876675, "grad_norm": 0.4147579073905945, "learning_rate": 1.2125342030991726e-05, "loss": 0.3932, "step": 23260 }, { "epoch": 0.4318606283161854, "grad_norm": 0.3487914204597473, "learning_rate": 1.2124202175951283e-05, "loss": 0.3348, "step": 23262 }, { "epoch": 0.43189775845360406, "grad_norm": 0.3068159818649292, "learning_rate": 1.2123062292006811e-05, "loss": 0.2338, "step": 23264 }, { "epoch": 0.4319348885910227, "grad_norm": 0.42535725235939026, "learning_rate": 1.2121922379173818e-05, "loss": 0.3939, "step": 23266 }, { "epoch": 0.4319720187284413, "grad_norm": 0.5371565818786621, "learning_rate": 1.2120782437467821e-05, "loss": 0.3613, "step": 23268 }, { "epoch": 0.43200914886585995, "grad_norm": 0.39797282218933105, "learning_rate": 1.2119642466904328e-05, "loss": 0.3979, "step": 23270 }, { "epoch": 0.4320462790032786, "grad_norm": 0.44049230217933655, "learning_rate": 1.2118502467498849e-05, "loss": 0.5261, "step": 23272 }, { "epoch": 0.4320834091406972, "grad_norm": 0.3842976689338684, "learning_rate": 1.2117362439266898e-05, "loss": 0.3972, "step": 23274 }, { "epoch": 0.4321205392781159, "grad_norm": 0.48244377970695496, "learning_rate": 1.2116222382223983e-05, "loss": 0.1613, "step": 23276 }, { "epoch": 0.4321576694155345, "grad_norm": 0.23082970082759857, "learning_rate": 1.2115082296385623e-05, "loss": 0.2896, "step": 23278 }, { "epoch": 0.43219479955295315, "grad_norm": 0.4386811852455139, "learning_rate": 1.211394218176733e-05, "loss": 0.2761, "step": 23280 }, { "epoch": 0.4322319296903718, "grad_norm": 0.3757105767726898, "learning_rate": 1.2112802038384612e-05, "loss": 0.3827, "step": 23282 }, { "epoch": 0.4322690598277904, "grad_norm": 0.6001486778259277, "learning_rate": 1.2111661866252988e-05, "loss": 0.5113, "step": 23284 }, { "epoch": 0.4323061899652091, "grad_norm": 0.38551756739616394, "learning_rate": 1.2110521665387973e-05, "loss": 0.309, "step": 23286 }, { "epoch": 0.4323433201026277, "grad_norm": 0.3928928077220917, "learning_rate": 1.210938143580508e-05, "loss": 0.4282, "step": 23288 }, { "epoch": 0.43238045024004634, "grad_norm": 0.5995832085609436, "learning_rate": 1.2108241177519821e-05, "loss": 0.2396, "step": 23290 }, { "epoch": 0.43241758037746497, "grad_norm": 0.331207275390625, "learning_rate": 1.2107100890547718e-05, "loss": 0.3683, "step": 23292 }, { "epoch": 0.4324547105148836, "grad_norm": 0.44506579637527466, "learning_rate": 1.2105960574904282e-05, "loss": 0.3635, "step": 23294 }, { "epoch": 0.43249184065230223, "grad_norm": 0.3518659472465515, "learning_rate": 1.2104820230605028e-05, "loss": 0.2474, "step": 23296 }, { "epoch": 0.4325289707897209, "grad_norm": 0.2805148959159851, "learning_rate": 1.2103679857665477e-05, "loss": 0.3096, "step": 23298 }, { "epoch": 0.43256610092713954, "grad_norm": 0.40993937849998474, "learning_rate": 1.2102539456101145e-05, "loss": 0.36, "step": 23300 }, { "epoch": 0.43260323106455817, "grad_norm": 0.29210585355758667, "learning_rate": 1.210139902592755e-05, "loss": 0.2525, "step": 23302 }, { "epoch": 0.4326403612019768, "grad_norm": 0.2924707233905792, "learning_rate": 1.2100258567160207e-05, "loss": 0.1766, "step": 23304 }, { "epoch": 0.4326774913393954, "grad_norm": 0.5818803310394287, "learning_rate": 1.2099118079814636e-05, "loss": 0.3899, "step": 23306 }, { "epoch": 0.4327146214768141, "grad_norm": 0.5296061635017395, "learning_rate": 1.2097977563906356e-05, "loss": 0.177, "step": 23308 }, { "epoch": 0.43275175161423274, "grad_norm": 0.36435437202453613, "learning_rate": 1.2096837019450884e-05, "loss": 0.3098, "step": 23310 }, { "epoch": 0.43278888175165137, "grad_norm": 0.49574407935142517, "learning_rate": 1.2095696446463746e-05, "loss": 0.3023, "step": 23312 }, { "epoch": 0.43282601188907, "grad_norm": 0.3840111196041107, "learning_rate": 1.2094555844960452e-05, "loss": 0.3476, "step": 23314 }, { "epoch": 0.4328631420264886, "grad_norm": 0.3309880197048187, "learning_rate": 1.2093415214956526e-05, "loss": 0.1849, "step": 23316 }, { "epoch": 0.43290027216390725, "grad_norm": 0.3271883726119995, "learning_rate": 1.2092274556467492e-05, "loss": 0.1865, "step": 23318 }, { "epoch": 0.43293740230132594, "grad_norm": 0.4441545903682709, "learning_rate": 1.2091133869508867e-05, "loss": 0.3206, "step": 23320 }, { "epoch": 0.43297453243874456, "grad_norm": 0.3447020351886749, "learning_rate": 1.2089993154096176e-05, "loss": 0.2285, "step": 23322 }, { "epoch": 0.4330116625761632, "grad_norm": 0.36402827501296997, "learning_rate": 1.2088852410244939e-05, "loss": 0.2962, "step": 23324 }, { "epoch": 0.4330487927135818, "grad_norm": 0.4426742494106293, "learning_rate": 1.2087711637970672e-05, "loss": 0.2773, "step": 23326 }, { "epoch": 0.43308592285100045, "grad_norm": 0.30313706398010254, "learning_rate": 1.2086570837288904e-05, "loss": 0.4364, "step": 23328 }, { "epoch": 0.43312305298841913, "grad_norm": 0.25074154138565063, "learning_rate": 1.2085430008215162e-05, "loss": 0.3632, "step": 23330 }, { "epoch": 0.43316018312583776, "grad_norm": 0.2982826232910156, "learning_rate": 1.208428915076496e-05, "loss": 0.235, "step": 23332 }, { "epoch": 0.4331973132632564, "grad_norm": 0.2748717665672302, "learning_rate": 1.2083148264953828e-05, "loss": 0.1569, "step": 23334 }, { "epoch": 0.433234443400675, "grad_norm": 0.36050736904144287, "learning_rate": 1.2082007350797289e-05, "loss": 0.308, "step": 23336 }, { "epoch": 0.43327157353809365, "grad_norm": 0.41918471455574036, "learning_rate": 1.2080866408310864e-05, "loss": 0.2041, "step": 23338 }, { "epoch": 0.43330870367551233, "grad_norm": 0.39146679639816284, "learning_rate": 1.2079725437510077e-05, "loss": 0.2156, "step": 23340 }, { "epoch": 0.43334583381293096, "grad_norm": 0.580489456653595, "learning_rate": 1.2078584438410462e-05, "loss": 0.2443, "step": 23342 }, { "epoch": 0.4333829639503496, "grad_norm": 0.38199475407600403, "learning_rate": 1.2077443411027539e-05, "loss": 0.2843, "step": 23344 }, { "epoch": 0.4334200940877682, "grad_norm": 0.3835553824901581, "learning_rate": 1.2076302355376827e-05, "loss": 0.1446, "step": 23346 }, { "epoch": 0.43345722422518684, "grad_norm": 0.4048469066619873, "learning_rate": 1.2075161271473866e-05, "loss": 0.523, "step": 23348 }, { "epoch": 0.43349435436260547, "grad_norm": 0.19754807651042938, "learning_rate": 1.2074020159334175e-05, "loss": 0.1556, "step": 23350 }, { "epoch": 0.43353148450002416, "grad_norm": 0.45872822403907776, "learning_rate": 1.2072879018973277e-05, "loss": 0.4246, "step": 23352 }, { "epoch": 0.4335686146374428, "grad_norm": 0.5624155402183533, "learning_rate": 1.2071737850406711e-05, "loss": 0.3328, "step": 23354 }, { "epoch": 0.4336057447748614, "grad_norm": 0.567057728767395, "learning_rate": 1.2070596653649996e-05, "loss": 0.2451, "step": 23356 }, { "epoch": 0.43364287491228004, "grad_norm": 0.33670100569725037, "learning_rate": 1.206945542871866e-05, "loss": 0.4138, "step": 23358 }, { "epoch": 0.43368000504969867, "grad_norm": 0.2645980417728424, "learning_rate": 1.2068314175628237e-05, "loss": 0.3229, "step": 23360 }, { "epoch": 0.43371713518711735, "grad_norm": 0.4024185240268707, "learning_rate": 1.2067172894394254e-05, "loss": 0.2547, "step": 23362 }, { "epoch": 0.433754265324536, "grad_norm": 0.3469744026660919, "learning_rate": 1.2066031585032239e-05, "loss": 0.2468, "step": 23364 }, { "epoch": 0.4337913954619546, "grad_norm": 0.37204432487487793, "learning_rate": 1.2064890247557721e-05, "loss": 0.3668, "step": 23366 }, { "epoch": 0.43382852559937324, "grad_norm": 0.46184876561164856, "learning_rate": 1.2063748881986235e-05, "loss": 0.2843, "step": 23368 }, { "epoch": 0.43386565573679187, "grad_norm": 0.419368177652359, "learning_rate": 1.2062607488333306e-05, "loss": 0.1764, "step": 23370 }, { "epoch": 0.4339027858742105, "grad_norm": 0.330716609954834, "learning_rate": 1.2061466066614467e-05, "loss": 0.4845, "step": 23372 }, { "epoch": 0.4339399160116292, "grad_norm": 0.55287104845047, "learning_rate": 1.2060324616845252e-05, "loss": 0.2753, "step": 23374 }, { "epoch": 0.4339770461490478, "grad_norm": 0.44660496711730957, "learning_rate": 1.2059183139041188e-05, "loss": 0.2021, "step": 23376 }, { "epoch": 0.43401417628646644, "grad_norm": 0.4001522958278656, "learning_rate": 1.2058041633217812e-05, "loss": 0.3987, "step": 23378 }, { "epoch": 0.43405130642388506, "grad_norm": 0.45645761489868164, "learning_rate": 1.2056900099390651e-05, "loss": 0.3296, "step": 23380 }, { "epoch": 0.4340884365613037, "grad_norm": 0.5197969079017639, "learning_rate": 1.2055758537575241e-05, "loss": 0.4612, "step": 23382 }, { "epoch": 0.4341255666987224, "grad_norm": 0.36933034658432007, "learning_rate": 1.2054616947787117e-05, "loss": 0.3072, "step": 23384 }, { "epoch": 0.434162696836141, "grad_norm": 0.3709254562854767, "learning_rate": 1.2053475330041811e-05, "loss": 0.2236, "step": 23386 }, { "epoch": 0.43419982697355963, "grad_norm": 0.31581318378448486, "learning_rate": 1.2052333684354856e-05, "loss": 0.349, "step": 23388 }, { "epoch": 0.43423695711097826, "grad_norm": 0.30725303292274475, "learning_rate": 1.2051192010741786e-05, "loss": 0.3808, "step": 23390 }, { "epoch": 0.4342740872483969, "grad_norm": 0.34022271633148193, "learning_rate": 1.2050050309218136e-05, "loss": 0.2695, "step": 23392 }, { "epoch": 0.4343112173858155, "grad_norm": 0.26789337396621704, "learning_rate": 1.2048908579799444e-05, "loss": 0.306, "step": 23394 }, { "epoch": 0.4343483475232342, "grad_norm": 0.3022186756134033, "learning_rate": 1.2047766822501244e-05, "loss": 0.3314, "step": 23396 }, { "epoch": 0.43438547766065283, "grad_norm": 0.3700431287288666, "learning_rate": 1.2046625037339067e-05, "loss": 0.3632, "step": 23398 }, { "epoch": 0.43442260779807146, "grad_norm": 0.37892642617225647, "learning_rate": 1.2045483224328458e-05, "loss": 0.0968, "step": 23400 }, { "epoch": 0.4344597379354901, "grad_norm": 0.5065642595291138, "learning_rate": 1.2044341383484946e-05, "loss": 0.448, "step": 23402 }, { "epoch": 0.4344968680729087, "grad_norm": 0.4491575360298157, "learning_rate": 1.2043199514824071e-05, "loss": 0.4688, "step": 23404 }, { "epoch": 0.4345339982103274, "grad_norm": 0.31397414207458496, "learning_rate": 1.2042057618361374e-05, "loss": 0.2925, "step": 23406 }, { "epoch": 0.434571128347746, "grad_norm": 0.25237804651260376, "learning_rate": 1.204091569411239e-05, "loss": 0.4011, "step": 23408 }, { "epoch": 0.43460825848516466, "grad_norm": 0.34165745973587036, "learning_rate": 1.2039773742092654e-05, "loss": 0.1067, "step": 23410 }, { "epoch": 0.4346453886225833, "grad_norm": 0.45153725147247314, "learning_rate": 1.2038631762317708e-05, "loss": 0.454, "step": 23412 }, { "epoch": 0.4346825187600019, "grad_norm": 0.4539611041545868, "learning_rate": 1.2037489754803088e-05, "loss": 0.4256, "step": 23414 }, { "epoch": 0.4347196488974206, "grad_norm": 0.4070637822151184, "learning_rate": 1.2036347719564337e-05, "loss": 0.2729, "step": 23416 }, { "epoch": 0.4347567790348392, "grad_norm": 0.6854671835899353, "learning_rate": 1.2035205656616998e-05, "loss": 0.2036, "step": 23418 }, { "epoch": 0.43479390917225785, "grad_norm": 0.28789129853248596, "learning_rate": 1.20340635659766e-05, "loss": 0.2542, "step": 23420 }, { "epoch": 0.4348310393096765, "grad_norm": 0.43835821747779846, "learning_rate": 1.203292144765869e-05, "loss": 0.316, "step": 23422 }, { "epoch": 0.4348681694470951, "grad_norm": 0.3816711902618408, "learning_rate": 1.2031779301678812e-05, "loss": 0.1848, "step": 23424 }, { "epoch": 0.43490529958451374, "grad_norm": 0.38157516717910767, "learning_rate": 1.2030637128052499e-05, "loss": 0.2261, "step": 23426 }, { "epoch": 0.4349424297219324, "grad_norm": 0.5143613815307617, "learning_rate": 1.20294949267953e-05, "loss": 0.3824, "step": 23428 }, { "epoch": 0.43497955985935105, "grad_norm": 0.4589637219905853, "learning_rate": 1.2028352697922757e-05, "loss": 0.334, "step": 23430 }, { "epoch": 0.4350166899967697, "grad_norm": 0.30158838629722595, "learning_rate": 1.2027210441450404e-05, "loss": 0.4638, "step": 23432 }, { "epoch": 0.4350538201341883, "grad_norm": 0.37864673137664795, "learning_rate": 1.202606815739379e-05, "loss": 0.4138, "step": 23434 }, { "epoch": 0.43509095027160694, "grad_norm": 0.3741653263568878, "learning_rate": 1.2024925845768457e-05, "loss": 0.2642, "step": 23436 }, { "epoch": 0.4351280804090256, "grad_norm": 0.5454931259155273, "learning_rate": 1.202378350658995e-05, "loss": 0.2884, "step": 23438 }, { "epoch": 0.43516521054644425, "grad_norm": 0.3262859880924225, "learning_rate": 1.2022641139873811e-05, "loss": 0.3105, "step": 23440 }, { "epoch": 0.4352023406838629, "grad_norm": 0.31813907623291016, "learning_rate": 1.2021498745635585e-05, "loss": 0.2357, "step": 23442 }, { "epoch": 0.4352394708212815, "grad_norm": 0.39694613218307495, "learning_rate": 1.2020356323890815e-05, "loss": 0.315, "step": 23444 }, { "epoch": 0.43527660095870013, "grad_norm": 0.4749318063259125, "learning_rate": 1.201921387465505e-05, "loss": 0.4181, "step": 23446 }, { "epoch": 0.43531373109611876, "grad_norm": 0.25391390919685364, "learning_rate": 1.2018071397943829e-05, "loss": 0.332, "step": 23448 }, { "epoch": 0.43535086123353745, "grad_norm": 0.3346829414367676, "learning_rate": 1.2016928893772706e-05, "loss": 0.4412, "step": 23450 }, { "epoch": 0.4353879913709561, "grad_norm": 0.30323755741119385, "learning_rate": 1.2015786362157215e-05, "loss": 0.3074, "step": 23452 }, { "epoch": 0.4354251215083747, "grad_norm": 0.3684828281402588, "learning_rate": 1.2014643803112916e-05, "loss": 0.3547, "step": 23454 }, { "epoch": 0.43546225164579333, "grad_norm": 0.4753077030181885, "learning_rate": 1.2013501216655345e-05, "loss": 0.3697, "step": 23456 }, { "epoch": 0.43549938178321196, "grad_norm": 0.4252491295337677, "learning_rate": 1.2012358602800056e-05, "loss": 0.2645, "step": 23458 }, { "epoch": 0.43553651192063064, "grad_norm": 0.391164630651474, "learning_rate": 1.2011215961562595e-05, "loss": 0.2626, "step": 23460 }, { "epoch": 0.43557364205804927, "grad_norm": 0.33539018034935, "learning_rate": 1.2010073292958506e-05, "loss": 0.2308, "step": 23462 }, { "epoch": 0.4356107721954679, "grad_norm": 0.19928646087646484, "learning_rate": 1.2008930597003341e-05, "loss": 0.287, "step": 23464 }, { "epoch": 0.4356479023328865, "grad_norm": 0.5480028390884399, "learning_rate": 1.2007787873712649e-05, "loss": 0.2732, "step": 23466 }, { "epoch": 0.43568503247030516, "grad_norm": 0.3385656774044037, "learning_rate": 1.200664512310198e-05, "loss": 0.4161, "step": 23468 }, { "epoch": 0.4357221626077238, "grad_norm": 0.27215296030044556, "learning_rate": 1.2005502345186878e-05, "loss": 0.2598, "step": 23470 }, { "epoch": 0.43575929274514247, "grad_norm": 0.2901736795902252, "learning_rate": 1.2004359539982897e-05, "loss": 0.2295, "step": 23472 }, { "epoch": 0.4357964228825611, "grad_norm": 0.3861478567123413, "learning_rate": 1.2003216707505587e-05, "loss": 0.2296, "step": 23474 }, { "epoch": 0.4358335530199797, "grad_norm": 0.42177239060401917, "learning_rate": 1.2002073847770499e-05, "loss": 0.1907, "step": 23476 }, { "epoch": 0.43587068315739835, "grad_norm": 0.32222235202789307, "learning_rate": 1.200093096079318e-05, "loss": 0.165, "step": 23478 }, { "epoch": 0.435907813294817, "grad_norm": 0.27451735734939575, "learning_rate": 1.1999788046589188e-05, "loss": 0.212, "step": 23480 }, { "epoch": 0.43594494343223567, "grad_norm": 0.2757277488708496, "learning_rate": 1.1998645105174069e-05, "loss": 0.301, "step": 23482 }, { "epoch": 0.4359820735696543, "grad_norm": 0.4462694525718689, "learning_rate": 1.1997502136563375e-05, "loss": 0.1161, "step": 23484 }, { "epoch": 0.4360192037070729, "grad_norm": 0.39914610981941223, "learning_rate": 1.1996359140772661e-05, "loss": 0.2204, "step": 23486 }, { "epoch": 0.43605633384449155, "grad_norm": 0.5718931555747986, "learning_rate": 1.199521611781748e-05, "loss": 0.0884, "step": 23488 }, { "epoch": 0.4360934639819102, "grad_norm": 0.4829443097114563, "learning_rate": 1.1994073067713383e-05, "loss": 0.3983, "step": 23490 }, { "epoch": 0.43613059411932886, "grad_norm": 0.3673272728919983, "learning_rate": 1.1992929990475926e-05, "loss": 0.2151, "step": 23492 }, { "epoch": 0.4361677242567475, "grad_norm": 0.3773770034313202, "learning_rate": 1.199178688612066e-05, "loss": 0.1144, "step": 23494 }, { "epoch": 0.4362048543941661, "grad_norm": 0.24898436665534973, "learning_rate": 1.199064375466314e-05, "loss": 0.2372, "step": 23496 }, { "epoch": 0.43624198453158475, "grad_norm": 0.42697516083717346, "learning_rate": 1.1989500596118921e-05, "loss": 0.1878, "step": 23498 }, { "epoch": 0.4362791146690034, "grad_norm": 0.4543319642543793, "learning_rate": 1.1988357410503562e-05, "loss": 0.2946, "step": 23500 }, { "epoch": 0.436316244806422, "grad_norm": 0.228993758559227, "learning_rate": 1.1987214197832611e-05, "loss": 0.3176, "step": 23502 }, { "epoch": 0.4363533749438407, "grad_norm": 0.4548444449901581, "learning_rate": 1.1986070958121627e-05, "loss": 0.4259, "step": 23504 }, { "epoch": 0.4363905050812593, "grad_norm": 0.7008549571037292, "learning_rate": 1.1984927691386167e-05, "loss": 0.3127, "step": 23506 }, { "epoch": 0.43642763521867795, "grad_norm": 0.3774678707122803, "learning_rate": 1.1983784397641787e-05, "loss": 0.3254, "step": 23508 }, { "epoch": 0.4364647653560966, "grad_norm": 0.31446534395217896, "learning_rate": 1.1982641076904042e-05, "loss": 0.2699, "step": 23510 }, { "epoch": 0.4365018954935152, "grad_norm": 0.3595420718193054, "learning_rate": 1.1981497729188495e-05, "loss": 0.3007, "step": 23512 }, { "epoch": 0.4365390256309339, "grad_norm": 0.235508531332016, "learning_rate": 1.1980354354510696e-05, "loss": 0.2891, "step": 23514 }, { "epoch": 0.4365761557683525, "grad_norm": 0.3506704270839691, "learning_rate": 1.1979210952886205e-05, "loss": 0.1837, "step": 23516 }, { "epoch": 0.43661328590577114, "grad_norm": 0.30411526560783386, "learning_rate": 1.1978067524330582e-05, "loss": 0.2854, "step": 23518 }, { "epoch": 0.43665041604318977, "grad_norm": 0.29133695363998413, "learning_rate": 1.1976924068859384e-05, "loss": 0.2478, "step": 23520 }, { "epoch": 0.4366875461806084, "grad_norm": 0.39450761675834656, "learning_rate": 1.1975780586488172e-05, "loss": 0.2582, "step": 23522 }, { "epoch": 0.436724676318027, "grad_norm": 0.2995562255382538, "learning_rate": 1.1974637077232505e-05, "loss": 0.2765, "step": 23524 }, { "epoch": 0.4367618064554457, "grad_norm": 0.46240922808647156, "learning_rate": 1.1973493541107944e-05, "loss": 0.1299, "step": 23526 }, { "epoch": 0.43679893659286434, "grad_norm": 0.8839894533157349, "learning_rate": 1.1972349978130045e-05, "loss": 0.2663, "step": 23528 }, { "epoch": 0.43683606673028297, "grad_norm": 0.33137866854667664, "learning_rate": 1.1971206388314368e-05, "loss": 0.1237, "step": 23530 }, { "epoch": 0.4368731968677016, "grad_norm": 0.7207096219062805, "learning_rate": 1.197006277167648e-05, "loss": 0.257, "step": 23532 }, { "epoch": 0.4369103270051202, "grad_norm": 0.2938666343688965, "learning_rate": 1.196891912823194e-05, "loss": 0.2525, "step": 23534 }, { "epoch": 0.4369474571425389, "grad_norm": 0.4168083369731903, "learning_rate": 1.1967775457996304e-05, "loss": 0.2091, "step": 23536 }, { "epoch": 0.43698458727995754, "grad_norm": 0.39647307991981506, "learning_rate": 1.1966631760985142e-05, "loss": 0.2094, "step": 23538 }, { "epoch": 0.43702171741737617, "grad_norm": 0.3388347029685974, "learning_rate": 1.1965488037214011e-05, "loss": 0.3695, "step": 23540 }, { "epoch": 0.4370588475547948, "grad_norm": 0.33060359954833984, "learning_rate": 1.1964344286698473e-05, "loss": 0.2225, "step": 23542 }, { "epoch": 0.4370959776922134, "grad_norm": 0.5126566886901855, "learning_rate": 1.1963200509454096e-05, "loss": 0.2986, "step": 23544 }, { "epoch": 0.43713310782963205, "grad_norm": 0.37752553820610046, "learning_rate": 1.196205670549644e-05, "loss": 0.133, "step": 23546 }, { "epoch": 0.43717023796705073, "grad_norm": 0.4504946768283844, "learning_rate": 1.1960912874841068e-05, "loss": 0.2805, "step": 23548 }, { "epoch": 0.43720736810446936, "grad_norm": 0.335664302110672, "learning_rate": 1.1959769017503548e-05, "loss": 0.1968, "step": 23550 }, { "epoch": 0.437244498241888, "grad_norm": 0.3718658983707428, "learning_rate": 1.195862513349944e-05, "loss": 0.3762, "step": 23552 }, { "epoch": 0.4372816283793066, "grad_norm": 0.34397536516189575, "learning_rate": 1.1957481222844309e-05, "loss": 0.3097, "step": 23554 }, { "epoch": 0.43731875851672525, "grad_norm": 0.38907334208488464, "learning_rate": 1.1956337285553725e-05, "loss": 0.1681, "step": 23556 }, { "epoch": 0.43735588865414393, "grad_norm": 0.33358630537986755, "learning_rate": 1.195519332164325e-05, "loss": 0.4205, "step": 23558 }, { "epoch": 0.43739301879156256, "grad_norm": 0.32918423414230347, "learning_rate": 1.1954049331128447e-05, "loss": 0.275, "step": 23560 }, { "epoch": 0.4374301489289812, "grad_norm": 0.3052709400653839, "learning_rate": 1.1952905314024887e-05, "loss": 0.3273, "step": 23562 }, { "epoch": 0.4374672790663998, "grad_norm": 0.2730685770511627, "learning_rate": 1.1951761270348139e-05, "loss": 0.2047, "step": 23564 }, { "epoch": 0.43750440920381845, "grad_norm": 0.4017612636089325, "learning_rate": 1.1950617200113765e-05, "loss": 0.2397, "step": 23566 }, { "epoch": 0.43754153934123713, "grad_norm": 0.3625796139240265, "learning_rate": 1.1949473103337333e-05, "loss": 0.427, "step": 23568 }, { "epoch": 0.43757866947865576, "grad_norm": 0.30939817428588867, "learning_rate": 1.1948328980034412e-05, "loss": 0.3075, "step": 23570 }, { "epoch": 0.4376157996160744, "grad_norm": 0.2783633768558502, "learning_rate": 1.1947184830220566e-05, "loss": 0.2993, "step": 23572 }, { "epoch": 0.437652929753493, "grad_norm": 0.3197158873081207, "learning_rate": 1.1946040653911368e-05, "loss": 0.2296, "step": 23574 }, { "epoch": 0.43769005989091164, "grad_norm": 0.23380829393863678, "learning_rate": 1.1944896451122387e-05, "loss": 0.2422, "step": 23576 }, { "epoch": 0.43772719002833027, "grad_norm": 0.5042902231216431, "learning_rate": 1.1943752221869194e-05, "loss": 0.3204, "step": 23578 }, { "epoch": 0.43776432016574895, "grad_norm": 0.4200644791126251, "learning_rate": 1.1942607966167353e-05, "loss": 0.2944, "step": 23580 }, { "epoch": 0.4378014503031676, "grad_norm": 0.2486756145954132, "learning_rate": 1.1941463684032435e-05, "loss": 0.3438, "step": 23582 }, { "epoch": 0.4378385804405862, "grad_norm": 0.30275389552116394, "learning_rate": 1.1940319375480012e-05, "loss": 0.2734, "step": 23584 }, { "epoch": 0.43787571057800484, "grad_norm": 0.34503835439682007, "learning_rate": 1.1939175040525655e-05, "loss": 0.2646, "step": 23586 }, { "epoch": 0.43791284071542347, "grad_norm": 0.34359151124954224, "learning_rate": 1.1938030679184936e-05, "loss": 0.266, "step": 23588 }, { "epoch": 0.43794997085284215, "grad_norm": 0.37743625044822693, "learning_rate": 1.1936886291473421e-05, "loss": 0.4051, "step": 23590 }, { "epoch": 0.4379871009902608, "grad_norm": 0.3425731360912323, "learning_rate": 1.1935741877406685e-05, "loss": 0.2631, "step": 23592 }, { "epoch": 0.4380242311276794, "grad_norm": 0.39717477560043335, "learning_rate": 1.1934597437000304e-05, "loss": 0.5267, "step": 23594 }, { "epoch": 0.43806136126509804, "grad_norm": 0.7706037163734436, "learning_rate": 1.1933452970269843e-05, "loss": 0.308, "step": 23596 }, { "epoch": 0.43809849140251667, "grad_norm": 0.604516863822937, "learning_rate": 1.1932308477230883e-05, "loss": 0.2436, "step": 23598 }, { "epoch": 0.4381356215399353, "grad_norm": 0.33222606778144836, "learning_rate": 1.1931163957898988e-05, "loss": 0.3729, "step": 23600 }, { "epoch": 0.438172751677354, "grad_norm": 0.4277777671813965, "learning_rate": 1.1930019412289738e-05, "loss": 0.1177, "step": 23602 }, { "epoch": 0.4382098818147726, "grad_norm": 0.40444886684417725, "learning_rate": 1.1928874840418705e-05, "loss": 0.2541, "step": 23604 }, { "epoch": 0.43824701195219123, "grad_norm": 0.2931002080440521, "learning_rate": 1.192773024230146e-05, "loss": 0.2252, "step": 23606 }, { "epoch": 0.43828414208960986, "grad_norm": 0.26695096492767334, "learning_rate": 1.1926585617953586e-05, "loss": 0.2187, "step": 23608 }, { "epoch": 0.4383212722270285, "grad_norm": 0.2957378923892975, "learning_rate": 1.192544096739065e-05, "loss": 0.3458, "step": 23610 }, { "epoch": 0.4383584023644472, "grad_norm": 0.40524229407310486, "learning_rate": 1.1924296290628228e-05, "loss": 0.2635, "step": 23612 }, { "epoch": 0.4383955325018658, "grad_norm": 0.3096320927143097, "learning_rate": 1.19231515876819e-05, "loss": 0.2552, "step": 23614 }, { "epoch": 0.43843266263928443, "grad_norm": 0.7302179336547852, "learning_rate": 1.1922006858567239e-05, "loss": 0.193, "step": 23616 }, { "epoch": 0.43846979277670306, "grad_norm": 0.4016776978969574, "learning_rate": 1.1920862103299822e-05, "loss": 0.2798, "step": 23618 }, { "epoch": 0.4385069229141217, "grad_norm": 0.3226209580898285, "learning_rate": 1.1919717321895226e-05, "loss": 0.4318, "step": 23620 }, { "epoch": 0.4385440530515403, "grad_norm": 0.40980634093284607, "learning_rate": 1.1918572514369024e-05, "loss": 0.141, "step": 23622 }, { "epoch": 0.438581183188959, "grad_norm": 0.35381022095680237, "learning_rate": 1.1917427680736798e-05, "loss": 0.3719, "step": 23624 }, { "epoch": 0.43861831332637763, "grad_norm": 0.45407426357269287, "learning_rate": 1.1916282821014127e-05, "loss": 0.3829, "step": 23626 }, { "epoch": 0.43865544346379626, "grad_norm": 0.4319459795951843, "learning_rate": 1.1915137935216583e-05, "loss": 0.3205, "step": 23628 }, { "epoch": 0.4386925736012149, "grad_norm": 0.5747219920158386, "learning_rate": 1.1913993023359751e-05, "loss": 0.3315, "step": 23630 }, { "epoch": 0.4387297037386335, "grad_norm": 0.372081458568573, "learning_rate": 1.1912848085459209e-05, "loss": 0.2112, "step": 23632 }, { "epoch": 0.4387668338760522, "grad_norm": 0.3500063419342041, "learning_rate": 1.1911703121530532e-05, "loss": 0.1823, "step": 23634 }, { "epoch": 0.4388039640134708, "grad_norm": 0.2596704959869385, "learning_rate": 1.1910558131589298e-05, "loss": 0.2484, "step": 23636 }, { "epoch": 0.43884109415088945, "grad_norm": 0.5212854743003845, "learning_rate": 1.1909413115651096e-05, "loss": 0.3309, "step": 23638 }, { "epoch": 0.4388782242883081, "grad_norm": 0.42243945598602295, "learning_rate": 1.1908268073731497e-05, "loss": 0.1906, "step": 23640 }, { "epoch": 0.4389153544257267, "grad_norm": 0.48382827639579773, "learning_rate": 1.1907123005846089e-05, "loss": 0.2125, "step": 23642 }, { "epoch": 0.4389524845631454, "grad_norm": 0.39249637722969055, "learning_rate": 1.1905977912010447e-05, "loss": 0.2454, "step": 23644 }, { "epoch": 0.438989614700564, "grad_norm": 0.48454025387763977, "learning_rate": 1.1904832792240157e-05, "loss": 0.2909, "step": 23646 }, { "epoch": 0.43902674483798265, "grad_norm": 0.394525945186615, "learning_rate": 1.1903687646550795e-05, "loss": 0.3435, "step": 23648 }, { "epoch": 0.4390638749754013, "grad_norm": 0.39680492877960205, "learning_rate": 1.190254247495795e-05, "loss": 0.3474, "step": 23650 }, { "epoch": 0.4391010051128199, "grad_norm": 0.36667600274086, "learning_rate": 1.19013972774772e-05, "loss": 0.2419, "step": 23652 }, { "epoch": 0.43913813525023854, "grad_norm": 0.2906239926815033, "learning_rate": 1.1900252054124127e-05, "loss": 0.263, "step": 23654 }, { "epoch": 0.4391752653876572, "grad_norm": 0.4001409113407135, "learning_rate": 1.1899106804914313e-05, "loss": 0.3431, "step": 23656 }, { "epoch": 0.43921239552507585, "grad_norm": 0.37662196159362793, "learning_rate": 1.1897961529863346e-05, "loss": 0.4826, "step": 23658 }, { "epoch": 0.4392495256624945, "grad_norm": 0.35898080468177795, "learning_rate": 1.1896816228986809e-05, "loss": 0.3105, "step": 23660 }, { "epoch": 0.4392866557999131, "grad_norm": 0.40890440344810486, "learning_rate": 1.1895670902300285e-05, "loss": 0.2809, "step": 23662 }, { "epoch": 0.43932378593733173, "grad_norm": 0.38531020283699036, "learning_rate": 1.1894525549819358e-05, "loss": 0.3465, "step": 23664 }, { "epoch": 0.4393609160747504, "grad_norm": 0.553290069103241, "learning_rate": 1.189338017155961e-05, "loss": 0.3095, "step": 23666 }, { "epoch": 0.43939804621216905, "grad_norm": 0.3739621043205261, "learning_rate": 1.189223476753663e-05, "loss": 0.3406, "step": 23668 }, { "epoch": 0.4394351763495877, "grad_norm": 0.27504613995552063, "learning_rate": 1.1891089337766006e-05, "loss": 0.0928, "step": 23670 }, { "epoch": 0.4394723064870063, "grad_norm": 0.3084995746612549, "learning_rate": 1.188994388226332e-05, "loss": 0.2113, "step": 23672 }, { "epoch": 0.43950943662442493, "grad_norm": 0.49022436141967773, "learning_rate": 1.1888798401044158e-05, "loss": 0.3225, "step": 23674 }, { "epoch": 0.43954656676184356, "grad_norm": 0.5140630006790161, "learning_rate": 1.1887652894124107e-05, "loss": 0.1536, "step": 23676 }, { "epoch": 0.43958369689926224, "grad_norm": 0.3043373227119446, "learning_rate": 1.188650736151875e-05, "loss": 0.238, "step": 23678 }, { "epoch": 0.4396208270366809, "grad_norm": 0.23684245347976685, "learning_rate": 1.1885361803243685e-05, "loss": 0.109, "step": 23680 }, { "epoch": 0.4396579571740995, "grad_norm": 0.3821539580821991, "learning_rate": 1.188421621931449e-05, "loss": 0.1975, "step": 23682 }, { "epoch": 0.43969508731151813, "grad_norm": 0.413874089717865, "learning_rate": 1.1883070609746758e-05, "loss": 0.2326, "step": 23684 }, { "epoch": 0.43973221744893676, "grad_norm": 0.6154585480690002, "learning_rate": 1.1881924974556074e-05, "loss": 0.2247, "step": 23686 }, { "epoch": 0.43976934758635544, "grad_norm": 0.33350494503974915, "learning_rate": 1.1880779313758026e-05, "loss": 0.2178, "step": 23688 }, { "epoch": 0.43980647772377407, "grad_norm": 0.2938879430294037, "learning_rate": 1.187963362736821e-05, "loss": 0.2546, "step": 23690 }, { "epoch": 0.4398436078611927, "grad_norm": 0.2042945921421051, "learning_rate": 1.1878487915402209e-05, "loss": 0.3589, "step": 23692 }, { "epoch": 0.4398807379986113, "grad_norm": 0.3872981369495392, "learning_rate": 1.1877342177875613e-05, "loss": 0.4697, "step": 23694 }, { "epoch": 0.43991786813602995, "grad_norm": 0.34465670585632324, "learning_rate": 1.1876196414804014e-05, "loss": 0.288, "step": 23696 }, { "epoch": 0.4399549982734486, "grad_norm": 0.2642345130443573, "learning_rate": 1.1875050626203e-05, "loss": 0.4191, "step": 23698 }, { "epoch": 0.43999212841086727, "grad_norm": 0.5179166793823242, "learning_rate": 1.1873904812088163e-05, "loss": 0.3563, "step": 23700 }, { "epoch": 0.4400292585482859, "grad_norm": 0.34532374143600464, "learning_rate": 1.1872758972475097e-05, "loss": 0.4796, "step": 23702 }, { "epoch": 0.4400663886857045, "grad_norm": 0.24797675013542175, "learning_rate": 1.1871613107379392e-05, "loss": 0.4058, "step": 23704 }, { "epoch": 0.44010351882312315, "grad_norm": 1.0722007751464844, "learning_rate": 1.1870467216816635e-05, "loss": 0.1337, "step": 23706 }, { "epoch": 0.4401406489605418, "grad_norm": 0.5361062288284302, "learning_rate": 1.1869321300802425e-05, "loss": 0.3732, "step": 23708 }, { "epoch": 0.44017777909796046, "grad_norm": 0.4638739228248596, "learning_rate": 1.186817535935235e-05, "loss": 0.2358, "step": 23710 }, { "epoch": 0.4402149092353791, "grad_norm": 0.37871047854423523, "learning_rate": 1.1867029392482004e-05, "loss": 0.466, "step": 23712 }, { "epoch": 0.4402520393727977, "grad_norm": 0.3585980236530304, "learning_rate": 1.1865883400206982e-05, "loss": 0.1976, "step": 23714 }, { "epoch": 0.44028916951021635, "grad_norm": 0.38213521242141724, "learning_rate": 1.1864737382542874e-05, "loss": 0.5324, "step": 23716 }, { "epoch": 0.440326299647635, "grad_norm": 0.26332977414131165, "learning_rate": 1.1863591339505276e-05, "loss": 0.2894, "step": 23718 }, { "epoch": 0.44036342978505366, "grad_norm": 0.23357310891151428, "learning_rate": 1.1862445271109781e-05, "loss": 0.1982, "step": 23720 }, { "epoch": 0.4404005599224723, "grad_norm": 0.37500178813934326, "learning_rate": 1.1861299177371986e-05, "loss": 0.2802, "step": 23722 }, { "epoch": 0.4404376900598909, "grad_norm": 0.6693291068077087, "learning_rate": 1.1860153058307482e-05, "loss": 0.249, "step": 23724 }, { "epoch": 0.44047482019730955, "grad_norm": 0.3429964482784271, "learning_rate": 1.1859006913931871e-05, "loss": 0.248, "step": 23726 }, { "epoch": 0.4405119503347282, "grad_norm": 0.5627470016479492, "learning_rate": 1.1857860744260743e-05, "loss": 0.2844, "step": 23728 }, { "epoch": 0.4405490804721468, "grad_norm": 0.4331391453742981, "learning_rate": 1.1856714549309694e-05, "loss": 0.2493, "step": 23730 }, { "epoch": 0.4405862106095655, "grad_norm": 0.43830960988998413, "learning_rate": 1.185556832909432e-05, "loss": 0.215, "step": 23732 }, { "epoch": 0.4406233407469841, "grad_norm": 0.5378835797309875, "learning_rate": 1.1854422083630222e-05, "loss": 0.2995, "step": 23734 }, { "epoch": 0.44066047088440274, "grad_norm": 0.4547078609466553, "learning_rate": 1.1853275812932994e-05, "loss": 0.3756, "step": 23736 }, { "epoch": 0.4406976010218214, "grad_norm": 0.34700706601142883, "learning_rate": 1.1852129517018232e-05, "loss": 0.2541, "step": 23738 }, { "epoch": 0.44073473115924, "grad_norm": 0.40170758962631226, "learning_rate": 1.1850983195901535e-05, "loss": 0.2866, "step": 23740 }, { "epoch": 0.4407718612966587, "grad_norm": 0.3848995864391327, "learning_rate": 1.1849836849598501e-05, "loss": 0.3374, "step": 23742 }, { "epoch": 0.4408089914340773, "grad_norm": 0.3300943374633789, "learning_rate": 1.184869047812473e-05, "loss": 0.2351, "step": 23744 }, { "epoch": 0.44084612157149594, "grad_norm": 0.3932442367076874, "learning_rate": 1.1847544081495816e-05, "loss": 0.2037, "step": 23746 }, { "epoch": 0.44088325170891457, "grad_norm": 0.5142634510993958, "learning_rate": 1.1846397659727367e-05, "loss": 0.1404, "step": 23748 }, { "epoch": 0.4409203818463332, "grad_norm": 0.3607427775859833, "learning_rate": 1.1845251212834969e-05, "loss": 0.1879, "step": 23750 }, { "epoch": 0.4409575119837518, "grad_norm": 0.36212384700775146, "learning_rate": 1.1844104740834235e-05, "loss": 0.294, "step": 23752 }, { "epoch": 0.4409946421211705, "grad_norm": 0.3918575346469879, "learning_rate": 1.1842958243740756e-05, "loss": 0.3842, "step": 23754 }, { "epoch": 0.44103177225858914, "grad_norm": 0.4928266704082489, "learning_rate": 1.1841811721570138e-05, "loss": 0.4172, "step": 23756 }, { "epoch": 0.44106890239600777, "grad_norm": 0.35291337966918945, "learning_rate": 1.1840665174337977e-05, "loss": 0.3658, "step": 23758 }, { "epoch": 0.4411060325334264, "grad_norm": 0.5061340928077698, "learning_rate": 1.1839518602059877e-05, "loss": 0.0446, "step": 23760 }, { "epoch": 0.441143162670845, "grad_norm": 0.42473292350769043, "learning_rate": 1.1838372004751436e-05, "loss": 0.182, "step": 23762 }, { "epoch": 0.4411802928082637, "grad_norm": 0.4381121098995209, "learning_rate": 1.1837225382428263e-05, "loss": 0.2985, "step": 23764 }, { "epoch": 0.44121742294568234, "grad_norm": 0.5044626593589783, "learning_rate": 1.1836078735105954e-05, "loss": 0.1455, "step": 23766 }, { "epoch": 0.44125455308310096, "grad_norm": 0.42887911200523376, "learning_rate": 1.1834932062800115e-05, "loss": 0.4054, "step": 23768 }, { "epoch": 0.4412916832205196, "grad_norm": 0.6691617369651794, "learning_rate": 1.1833785365526344e-05, "loss": 0.3122, "step": 23770 }, { "epoch": 0.4413288133579382, "grad_norm": 0.35354384779930115, "learning_rate": 1.1832638643300249e-05, "loss": 0.3718, "step": 23772 }, { "epoch": 0.44136594349535685, "grad_norm": 0.5450619459152222, "learning_rate": 1.183149189613743e-05, "loss": 0.3814, "step": 23774 }, { "epoch": 0.44140307363277553, "grad_norm": 0.23925697803497314, "learning_rate": 1.1830345124053493e-05, "loss": 0.1398, "step": 23776 }, { "epoch": 0.44144020377019416, "grad_norm": 0.7135844826698303, "learning_rate": 1.1829198327064042e-05, "loss": 0.1999, "step": 23778 }, { "epoch": 0.4414773339076128, "grad_norm": 0.7205314040184021, "learning_rate": 1.182805150518468e-05, "loss": 0.3509, "step": 23780 }, { "epoch": 0.4415144640450314, "grad_norm": 0.29555100202560425, "learning_rate": 1.1826904658431014e-05, "loss": 0.3551, "step": 23782 }, { "epoch": 0.44155159418245005, "grad_norm": 0.7239555716514587, "learning_rate": 1.1825757786818646e-05, "loss": 0.2727, "step": 23784 }, { "epoch": 0.44158872431986873, "grad_norm": 0.34976357221603394, "learning_rate": 1.1824610890363185e-05, "loss": 0.1807, "step": 23786 }, { "epoch": 0.44162585445728736, "grad_norm": 0.4200124740600586, "learning_rate": 1.1823463969080234e-05, "loss": 0.2017, "step": 23788 }, { "epoch": 0.441662984594706, "grad_norm": 0.41671186685562134, "learning_rate": 1.1822317022985401e-05, "loss": 0.3845, "step": 23790 }, { "epoch": 0.4417001147321246, "grad_norm": 0.30348482728004456, "learning_rate": 1.182117005209429e-05, "loss": 0.1164, "step": 23792 }, { "epoch": 0.44173724486954324, "grad_norm": 0.260517418384552, "learning_rate": 1.182002305642251e-05, "loss": 0.5546, "step": 23794 }, { "epoch": 0.44177437500696193, "grad_norm": 0.36911827325820923, "learning_rate": 1.1818876035985669e-05, "loss": 0.2633, "step": 23796 }, { "epoch": 0.44181150514438056, "grad_norm": 0.49242958426475525, "learning_rate": 1.1817728990799374e-05, "loss": 0.2084, "step": 23798 }, { "epoch": 0.4418486352817992, "grad_norm": 0.3496561348438263, "learning_rate": 1.181658192087923e-05, "loss": 0.2069, "step": 23800 }, { "epoch": 0.4418857654192178, "grad_norm": 0.4719865024089813, "learning_rate": 1.181543482624085e-05, "loss": 0.2234, "step": 23802 }, { "epoch": 0.44192289555663644, "grad_norm": 0.33670270442962646, "learning_rate": 1.1814287706899836e-05, "loss": 0.4826, "step": 23804 }, { "epoch": 0.44196002569405507, "grad_norm": 0.30540457367897034, "learning_rate": 1.1813140562871803e-05, "loss": 0.3552, "step": 23806 }, { "epoch": 0.44199715583147375, "grad_norm": 0.67805415391922, "learning_rate": 1.1811993394172356e-05, "loss": 0.2221, "step": 23808 }, { "epoch": 0.4420342859688924, "grad_norm": 0.3721048831939697, "learning_rate": 1.1810846200817112e-05, "loss": 0.3098, "step": 23810 }, { "epoch": 0.442071416106311, "grad_norm": 0.3707892894744873, "learning_rate": 1.180969898282167e-05, "loss": 0.2358, "step": 23812 }, { "epoch": 0.44210854624372964, "grad_norm": 0.28123393654823303, "learning_rate": 1.1808551740201649e-05, "loss": 0.3635, "step": 23814 }, { "epoch": 0.44214567638114827, "grad_norm": 0.43573126196861267, "learning_rate": 1.1807404472972655e-05, "loss": 0.2, "step": 23816 }, { "epoch": 0.44218280651856695, "grad_norm": 0.34867316484451294, "learning_rate": 1.1806257181150298e-05, "loss": 0.2057, "step": 23818 }, { "epoch": 0.4422199366559856, "grad_norm": 0.4324578642845154, "learning_rate": 1.1805109864750193e-05, "loss": 0.3184, "step": 23820 }, { "epoch": 0.4422570667934042, "grad_norm": 0.3645049035549164, "learning_rate": 1.1803962523787948e-05, "loss": 0.3173, "step": 23822 }, { "epoch": 0.44229419693082284, "grad_norm": 0.27655619382858276, "learning_rate": 1.1802815158279178e-05, "loss": 0.3596, "step": 23824 }, { "epoch": 0.44233132706824146, "grad_norm": 0.48209938406944275, "learning_rate": 1.180166776823949e-05, "loss": 0.4345, "step": 23826 }, { "epoch": 0.4423684572056601, "grad_norm": 0.2817263603210449, "learning_rate": 1.1800520353684505e-05, "loss": 0.248, "step": 23828 }, { "epoch": 0.4424055873430788, "grad_norm": 0.3553263247013092, "learning_rate": 1.179937291462983e-05, "loss": 0.3545, "step": 23830 }, { "epoch": 0.4424427174804974, "grad_norm": 0.30517029762268066, "learning_rate": 1.179822545109108e-05, "loss": 0.3027, "step": 23832 }, { "epoch": 0.44247984761791603, "grad_norm": 0.3245173990726471, "learning_rate": 1.1797077963083869e-05, "loss": 0.4029, "step": 23834 }, { "epoch": 0.44251697775533466, "grad_norm": 0.4065669775009155, "learning_rate": 1.1795930450623807e-05, "loss": 0.2569, "step": 23836 }, { "epoch": 0.4425541078927533, "grad_norm": 0.35312923789024353, "learning_rate": 1.179478291372651e-05, "loss": 0.1981, "step": 23838 }, { "epoch": 0.442591238030172, "grad_norm": 0.42727893590927124, "learning_rate": 1.1793635352407598e-05, "loss": 0.208, "step": 23840 }, { "epoch": 0.4426283681675906, "grad_norm": 0.30203187465667725, "learning_rate": 1.1792487766682679e-05, "loss": 0.3275, "step": 23842 }, { "epoch": 0.44266549830500923, "grad_norm": 0.3186185657978058, "learning_rate": 1.179134015656737e-05, "loss": 0.1899, "step": 23844 }, { "epoch": 0.44270262844242786, "grad_norm": 0.28849631547927856, "learning_rate": 1.1790192522077286e-05, "loss": 0.4682, "step": 23846 }, { "epoch": 0.4427397585798465, "grad_norm": 0.6674695014953613, "learning_rate": 1.1789044863228047e-05, "loss": 0.2275, "step": 23848 }, { "epoch": 0.4427768887172651, "grad_norm": 0.4135274589061737, "learning_rate": 1.1787897180035263e-05, "loss": 0.3184, "step": 23850 }, { "epoch": 0.4428140188546838, "grad_norm": 0.3842872679233551, "learning_rate": 1.1786749472514556e-05, "loss": 0.3361, "step": 23852 }, { "epoch": 0.44285114899210243, "grad_norm": 0.3466397523880005, "learning_rate": 1.1785601740681542e-05, "loss": 0.2423, "step": 23854 }, { "epoch": 0.44288827912952106, "grad_norm": 0.30038174986839294, "learning_rate": 1.1784453984551833e-05, "loss": 0.2812, "step": 23856 }, { "epoch": 0.4429254092669397, "grad_norm": 0.3503870666027069, "learning_rate": 1.1783306204141052e-05, "loss": 0.2705, "step": 23858 }, { "epoch": 0.4429625394043583, "grad_norm": 0.5904318690299988, "learning_rate": 1.1782158399464816e-05, "loss": 0.1788, "step": 23860 }, { "epoch": 0.442999669541777, "grad_norm": 0.28318244218826294, "learning_rate": 1.1781010570538742e-05, "loss": 0.3146, "step": 23862 }, { "epoch": 0.4430367996791956, "grad_norm": 0.5092008113861084, "learning_rate": 1.1779862717378449e-05, "loss": 0.2174, "step": 23864 }, { "epoch": 0.44307392981661425, "grad_norm": 0.4855818748474121, "learning_rate": 1.1778714839999556e-05, "loss": 0.3217, "step": 23866 }, { "epoch": 0.4431110599540329, "grad_norm": 0.4310131371021271, "learning_rate": 1.1777566938417681e-05, "loss": 0.4751, "step": 23868 }, { "epoch": 0.4431481900914515, "grad_norm": 0.4504663348197937, "learning_rate": 1.1776419012648443e-05, "loss": 0.3063, "step": 23870 }, { "epoch": 0.4431853202288702, "grad_norm": 0.3513050675392151, "learning_rate": 1.1775271062707468e-05, "loss": 0.2177, "step": 23872 }, { "epoch": 0.4432224503662888, "grad_norm": 0.4568898677825928, "learning_rate": 1.1774123088610369e-05, "loss": 0.1516, "step": 23874 }, { "epoch": 0.44325958050370745, "grad_norm": 0.3882018029689789, "learning_rate": 1.1772975090372766e-05, "loss": 0.2372, "step": 23876 }, { "epoch": 0.4432967106411261, "grad_norm": 0.27046525478363037, "learning_rate": 1.1771827068010286e-05, "loss": 0.167, "step": 23878 }, { "epoch": 0.4433338407785447, "grad_norm": 0.36586055159568787, "learning_rate": 1.1770679021538545e-05, "loss": 0.1866, "step": 23880 }, { "epoch": 0.44337097091596334, "grad_norm": 0.4313051402568817, "learning_rate": 1.1769530950973167e-05, "loss": 0.2696, "step": 23882 }, { "epoch": 0.443408101053382, "grad_norm": 0.5722182393074036, "learning_rate": 1.1768382856329774e-05, "loss": 0.22, "step": 23884 }, { "epoch": 0.44344523119080065, "grad_norm": 0.24752557277679443, "learning_rate": 1.1767234737623988e-05, "loss": 0.3267, "step": 23886 }, { "epoch": 0.4434823613282193, "grad_norm": 0.4271041750907898, "learning_rate": 1.176608659487143e-05, "loss": 0.329, "step": 23888 }, { "epoch": 0.4435194914656379, "grad_norm": 0.4507372975349426, "learning_rate": 1.176493842808772e-05, "loss": 0.1859, "step": 23890 }, { "epoch": 0.44355662160305653, "grad_norm": 0.30965831875801086, "learning_rate": 1.1763790237288488e-05, "loss": 0.4126, "step": 23892 }, { "epoch": 0.4435937517404752, "grad_norm": 0.4449214041233063, "learning_rate": 1.1762642022489355e-05, "loss": 0.2956, "step": 23894 }, { "epoch": 0.44363088187789385, "grad_norm": 0.6289788484573364, "learning_rate": 1.176149378370594e-05, "loss": 0.2853, "step": 23896 }, { "epoch": 0.4436680120153125, "grad_norm": 0.47286567091941833, "learning_rate": 1.1760345520953875e-05, "loss": 0.2164, "step": 23898 }, { "epoch": 0.4437051421527311, "grad_norm": 0.5417950749397278, "learning_rate": 1.1759197234248775e-05, "loss": 0.3351, "step": 23900 }, { "epoch": 0.44374227229014973, "grad_norm": 0.6363476514816284, "learning_rate": 1.1758048923606274e-05, "loss": 0.4697, "step": 23902 }, { "epoch": 0.44377940242756836, "grad_norm": 0.3745894432067871, "learning_rate": 1.1756900589041993e-05, "loss": 0.3303, "step": 23904 }, { "epoch": 0.44381653256498704, "grad_norm": 0.4443109631538391, "learning_rate": 1.1755752230571557e-05, "loss": 0.3082, "step": 23906 }, { "epoch": 0.44385366270240567, "grad_norm": 0.5911005139350891, "learning_rate": 1.1754603848210593e-05, "loss": 0.4053, "step": 23908 }, { "epoch": 0.4438907928398243, "grad_norm": 0.34702086448669434, "learning_rate": 1.1753455441974726e-05, "loss": 0.3088, "step": 23910 }, { "epoch": 0.44392792297724293, "grad_norm": 0.3851543068885803, "learning_rate": 1.175230701187958e-05, "loss": 0.4825, "step": 23912 }, { "epoch": 0.44396505311466156, "grad_norm": 0.4213216304779053, "learning_rate": 1.1751158557940787e-05, "loss": 0.3531, "step": 23914 }, { "epoch": 0.44400218325208024, "grad_norm": 0.3401533365249634, "learning_rate": 1.175001008017397e-05, "loss": 0.2891, "step": 23916 }, { "epoch": 0.44403931338949887, "grad_norm": 0.530329704284668, "learning_rate": 1.174886157859476e-05, "loss": 0.2451, "step": 23918 }, { "epoch": 0.4440764435269175, "grad_norm": 0.33848339319229126, "learning_rate": 1.1747713053218782e-05, "loss": 0.2495, "step": 23920 }, { "epoch": 0.4441135736643361, "grad_norm": 0.26268675923347473, "learning_rate": 1.1746564504061663e-05, "loss": 0.3251, "step": 23922 }, { "epoch": 0.44415070380175475, "grad_norm": 0.39955538511276245, "learning_rate": 1.1745415931139033e-05, "loss": 0.2323, "step": 23924 }, { "epoch": 0.4441878339391734, "grad_norm": 0.38394472002983093, "learning_rate": 1.174426733446652e-05, "loss": 0.3164, "step": 23926 }, { "epoch": 0.44422496407659207, "grad_norm": 0.3943612277507782, "learning_rate": 1.1743118714059754e-05, "loss": 0.3199, "step": 23928 }, { "epoch": 0.4442620942140107, "grad_norm": 0.40309277176856995, "learning_rate": 1.1741970069934364e-05, "loss": 0.496, "step": 23930 }, { "epoch": 0.4442992243514293, "grad_norm": 0.34559857845306396, "learning_rate": 1.1740821402105975e-05, "loss": 0.3281, "step": 23932 }, { "epoch": 0.44433635448884795, "grad_norm": 0.32255464792251587, "learning_rate": 1.1739672710590224e-05, "loss": 0.2037, "step": 23934 }, { "epoch": 0.4443734846262666, "grad_norm": 0.18587647378444672, "learning_rate": 1.1738523995402739e-05, "loss": 0.2626, "step": 23936 }, { "epoch": 0.44441061476368526, "grad_norm": 0.3890645205974579, "learning_rate": 1.1737375256559152e-05, "loss": 0.3201, "step": 23938 }, { "epoch": 0.4444477449011039, "grad_norm": 0.4232846796512604, "learning_rate": 1.1736226494075086e-05, "loss": 0.2023, "step": 23940 }, { "epoch": 0.4444848750385225, "grad_norm": 0.39195990562438965, "learning_rate": 1.1735077707966183e-05, "loss": 0.1999, "step": 23942 }, { "epoch": 0.44452200517594115, "grad_norm": 0.3769840896129608, "learning_rate": 1.1733928898248069e-05, "loss": 0.4931, "step": 23944 }, { "epoch": 0.4445591353133598, "grad_norm": 0.5406650304794312, "learning_rate": 1.1732780064936374e-05, "loss": 0.2801, "step": 23946 }, { "epoch": 0.44459626545077846, "grad_norm": 2.4105215072631836, "learning_rate": 1.1731631208046736e-05, "loss": 0.4158, "step": 23948 }, { "epoch": 0.4446333955881971, "grad_norm": 0.4223223626613617, "learning_rate": 1.1730482327594781e-05, "loss": 0.3205, "step": 23950 }, { "epoch": 0.4446705257256157, "grad_norm": 0.3691438138484955, "learning_rate": 1.1729333423596145e-05, "loss": 0.2326, "step": 23952 }, { "epoch": 0.44470765586303435, "grad_norm": 0.28826966881752014, "learning_rate": 1.1728184496066462e-05, "loss": 0.2155, "step": 23954 }, { "epoch": 0.444744786000453, "grad_norm": 0.350644588470459, "learning_rate": 1.1727035545021363e-05, "loss": 0.4619, "step": 23956 }, { "epoch": 0.4447819161378716, "grad_norm": 0.4109569191932678, "learning_rate": 1.1725886570476486e-05, "loss": 0.2696, "step": 23958 }, { "epoch": 0.4448190462752903, "grad_norm": 0.33637312054634094, "learning_rate": 1.1724737572447462e-05, "loss": 0.3739, "step": 23960 }, { "epoch": 0.4448561764127089, "grad_norm": 0.42051035165786743, "learning_rate": 1.1723588550949925e-05, "loss": 0.3735, "step": 23962 }, { "epoch": 0.44489330655012754, "grad_norm": 0.26516294479370117, "learning_rate": 1.1722439505999508e-05, "loss": 0.3908, "step": 23964 }, { "epoch": 0.44493043668754617, "grad_norm": 0.3965071439743042, "learning_rate": 1.1721290437611854e-05, "loss": 0.259, "step": 23966 }, { "epoch": 0.4449675668249648, "grad_norm": 0.3436717987060547, "learning_rate": 1.1720141345802588e-05, "loss": 0.4519, "step": 23968 }, { "epoch": 0.4450046969623835, "grad_norm": 0.4568477272987366, "learning_rate": 1.1718992230587355e-05, "loss": 0.4053, "step": 23970 }, { "epoch": 0.4450418270998021, "grad_norm": 0.6536632776260376, "learning_rate": 1.1717843091981782e-05, "loss": 0.4454, "step": 23972 }, { "epoch": 0.44507895723722074, "grad_norm": 0.3625437021255493, "learning_rate": 1.1716693930001515e-05, "loss": 0.436, "step": 23974 }, { "epoch": 0.44511608737463937, "grad_norm": 0.3941170871257782, "learning_rate": 1.1715544744662182e-05, "loss": 0.4027, "step": 23976 }, { "epoch": 0.445153217512058, "grad_norm": 0.3628867268562317, "learning_rate": 1.1714395535979424e-05, "loss": 0.3325, "step": 23978 }, { "epoch": 0.4451903476494766, "grad_norm": 0.429275244474411, "learning_rate": 1.1713246303968882e-05, "loss": 0.3416, "step": 23980 }, { "epoch": 0.4452274777868953, "grad_norm": 0.5084751844406128, "learning_rate": 1.1712097048646186e-05, "loss": 0.2719, "step": 23982 }, { "epoch": 0.44526460792431394, "grad_norm": 0.2849515378475189, "learning_rate": 1.1710947770026977e-05, "loss": 0.2419, "step": 23984 }, { "epoch": 0.44530173806173257, "grad_norm": 0.33132898807525635, "learning_rate": 1.1709798468126896e-05, "loss": 0.342, "step": 23986 }, { "epoch": 0.4453388681991512, "grad_norm": 0.44278115034103394, "learning_rate": 1.1708649142961576e-05, "loss": 0.1917, "step": 23988 }, { "epoch": 0.4453759983365698, "grad_norm": 0.345720499753952, "learning_rate": 1.1707499794546662e-05, "loss": 0.441, "step": 23990 }, { "epoch": 0.4454131284739885, "grad_norm": 0.29425013065338135, "learning_rate": 1.1706350422897792e-05, "loss": 0.4168, "step": 23992 }, { "epoch": 0.44545025861140713, "grad_norm": 0.24771080911159515, "learning_rate": 1.17052010280306e-05, "loss": 0.446, "step": 23994 }, { "epoch": 0.44548738874882576, "grad_norm": 0.5992075800895691, "learning_rate": 1.1704051609960729e-05, "loss": 0.1989, "step": 23996 }, { "epoch": 0.4455245188862444, "grad_norm": 0.3501785695552826, "learning_rate": 1.1702902168703823e-05, "loss": 0.2966, "step": 23998 }, { "epoch": 0.445561649023663, "grad_norm": 0.46936652064323425, "learning_rate": 1.1701752704275518e-05, "loss": 0.3691, "step": 24000 }, { "epoch": 0.44559877916108165, "grad_norm": 0.36993396282196045, "learning_rate": 1.1700603216691458e-05, "loss": 0.4871, "step": 24002 }, { "epoch": 0.44563590929850033, "grad_norm": 0.42775020003318787, "learning_rate": 1.169945370596728e-05, "loss": 0.2937, "step": 24004 }, { "epoch": 0.44567303943591896, "grad_norm": 0.3824927508831024, "learning_rate": 1.1698304172118627e-05, "loss": 0.389, "step": 24006 }, { "epoch": 0.4457101695733376, "grad_norm": 0.35369062423706055, "learning_rate": 1.1697154615161142e-05, "loss": 0.3447, "step": 24008 }, { "epoch": 0.4457472997107562, "grad_norm": 0.3623195290565491, "learning_rate": 1.1696005035110468e-05, "loss": 0.2547, "step": 24010 }, { "epoch": 0.44578442984817485, "grad_norm": 0.33387210965156555, "learning_rate": 1.1694855431982243e-05, "loss": 0.5037, "step": 24012 }, { "epoch": 0.44582155998559353, "grad_norm": 0.29337361454963684, "learning_rate": 1.1693705805792112e-05, "loss": 0.4434, "step": 24014 }, { "epoch": 0.44585869012301216, "grad_norm": 0.47422701120376587, "learning_rate": 1.1692556156555719e-05, "loss": 0.2834, "step": 24016 }, { "epoch": 0.4458958202604308, "grad_norm": 0.5289061665534973, "learning_rate": 1.1691406484288708e-05, "loss": 0.213, "step": 24018 }, { "epoch": 0.4459329503978494, "grad_norm": 0.27482104301452637, "learning_rate": 1.169025678900672e-05, "loss": 0.3034, "step": 24020 }, { "epoch": 0.44597008053526804, "grad_norm": 0.299385130405426, "learning_rate": 1.16891070707254e-05, "loss": 0.2953, "step": 24022 }, { "epoch": 0.4460072106726867, "grad_norm": 0.5817592144012451, "learning_rate": 1.1687957329460393e-05, "loss": 0.2229, "step": 24024 }, { "epoch": 0.44604434081010536, "grad_norm": 0.5666360259056091, "learning_rate": 1.1686807565227341e-05, "loss": 0.4658, "step": 24026 }, { "epoch": 0.446081470947524, "grad_norm": 0.35040393471717834, "learning_rate": 1.168565777804189e-05, "loss": 0.2929, "step": 24028 }, { "epoch": 0.4461186010849426, "grad_norm": 0.3602055013179779, "learning_rate": 1.168450796791969e-05, "loss": 0.2605, "step": 24030 }, { "epoch": 0.44615573122236124, "grad_norm": 0.34880051016807556, "learning_rate": 1.1683358134876377e-05, "loss": 0.446, "step": 24032 }, { "epoch": 0.44619286135977987, "grad_norm": 0.7034657001495361, "learning_rate": 1.1682208278927607e-05, "loss": 0.5569, "step": 24034 }, { "epoch": 0.44622999149719855, "grad_norm": 0.33224138617515564, "learning_rate": 1.168105840008902e-05, "loss": 0.3605, "step": 24036 }, { "epoch": 0.4462671216346172, "grad_norm": 0.24132730066776276, "learning_rate": 1.167990849837626e-05, "loss": 0.2734, "step": 24038 }, { "epoch": 0.4463042517720358, "grad_norm": 0.2993573844432831, "learning_rate": 1.167875857380498e-05, "loss": 0.4002, "step": 24040 }, { "epoch": 0.44634138190945444, "grad_norm": 0.33143749833106995, "learning_rate": 1.1677608626390824e-05, "loss": 0.3191, "step": 24042 }, { "epoch": 0.44637851204687307, "grad_norm": 0.6321326494216919, "learning_rate": 1.167645865614944e-05, "loss": 0.5384, "step": 24044 }, { "epoch": 0.44641564218429175, "grad_norm": 0.3029479384422302, "learning_rate": 1.1675308663096473e-05, "loss": 0.164, "step": 24046 }, { "epoch": 0.4464527723217104, "grad_norm": 0.3932936191558838, "learning_rate": 1.1674158647247578e-05, "loss": 0.269, "step": 24048 }, { "epoch": 0.446489902459129, "grad_norm": 0.36209365725517273, "learning_rate": 1.1673008608618392e-05, "loss": 0.4002, "step": 24050 }, { "epoch": 0.44652703259654764, "grad_norm": 0.3835330605506897, "learning_rate": 1.1671858547224574e-05, "loss": 0.2055, "step": 24052 }, { "epoch": 0.44656416273396626, "grad_norm": 0.3269481956958771, "learning_rate": 1.167070846308177e-05, "loss": 0.299, "step": 24054 }, { "epoch": 0.4466012928713849, "grad_norm": 0.2737219035625458, "learning_rate": 1.1669558356205628e-05, "loss": 0.3431, "step": 24056 }, { "epoch": 0.4466384230088036, "grad_norm": 0.35054299235343933, "learning_rate": 1.1668408226611793e-05, "loss": 0.1714, "step": 24058 }, { "epoch": 0.4466755531462222, "grad_norm": 0.3232698440551758, "learning_rate": 1.1667258074315924e-05, "loss": 0.3528, "step": 24060 }, { "epoch": 0.44671268328364083, "grad_norm": 0.3683943748474121, "learning_rate": 1.1666107899333665e-05, "loss": 0.3006, "step": 24062 }, { "epoch": 0.44674981342105946, "grad_norm": 0.31169185042381287, "learning_rate": 1.166495770168067e-05, "loss": 0.1771, "step": 24064 }, { "epoch": 0.4467869435584781, "grad_norm": 0.23884519934654236, "learning_rate": 1.1663807481372587e-05, "loss": 0.2089, "step": 24066 }, { "epoch": 0.4468240736958968, "grad_norm": 0.5430421829223633, "learning_rate": 1.1662657238425065e-05, "loss": 0.3875, "step": 24068 }, { "epoch": 0.4468612038333154, "grad_norm": 0.41073131561279297, "learning_rate": 1.1661506972853762e-05, "loss": 0.2629, "step": 24070 }, { "epoch": 0.44689833397073403, "grad_norm": 0.5527470111846924, "learning_rate": 1.1660356684674323e-05, "loss": 0.2775, "step": 24072 }, { "epoch": 0.44693546410815266, "grad_norm": 4.22423791885376, "learning_rate": 1.1659206373902405e-05, "loss": 0.246, "step": 24074 }, { "epoch": 0.4469725942455713, "grad_norm": 0.45812708139419556, "learning_rate": 1.1658056040553658e-05, "loss": 0.3123, "step": 24076 }, { "epoch": 0.4470097243829899, "grad_norm": 0.49532121419906616, "learning_rate": 1.1656905684643733e-05, "loss": 0.2828, "step": 24078 }, { "epoch": 0.4470468545204086, "grad_norm": 0.27955830097198486, "learning_rate": 1.1655755306188288e-05, "loss": 0.4633, "step": 24080 }, { "epoch": 0.4470839846578272, "grad_norm": 0.7527885437011719, "learning_rate": 1.1654604905202968e-05, "loss": 0.2245, "step": 24082 }, { "epoch": 0.44712111479524586, "grad_norm": 0.5109013319015503, "learning_rate": 1.1653454481703433e-05, "loss": 0.2999, "step": 24084 }, { "epoch": 0.4471582449326645, "grad_norm": 0.4185657799243927, "learning_rate": 1.1652304035705337e-05, "loss": 0.2853, "step": 24086 }, { "epoch": 0.4471953750700831, "grad_norm": 0.3150821030139923, "learning_rate": 1.1651153567224332e-05, "loss": 0.227, "step": 24088 }, { "epoch": 0.4472325052075018, "grad_norm": 0.3207242786884308, "learning_rate": 1.1650003076276071e-05, "loss": 0.4943, "step": 24090 }, { "epoch": 0.4472696353449204, "grad_norm": 0.4198581874370575, "learning_rate": 1.1648852562876214e-05, "loss": 0.2121, "step": 24092 }, { "epoch": 0.44730676548233905, "grad_norm": 0.5249981880187988, "learning_rate": 1.1647702027040408e-05, "loss": 0.4632, "step": 24094 }, { "epoch": 0.4473438956197577, "grad_norm": 0.3403719961643219, "learning_rate": 1.1646551468784315e-05, "loss": 0.1828, "step": 24096 }, { "epoch": 0.4473810257571763, "grad_norm": 0.4295170307159424, "learning_rate": 1.1645400888123588e-05, "loss": 0.2504, "step": 24098 }, { "epoch": 0.447418155894595, "grad_norm": 0.34082096815109253, "learning_rate": 1.1644250285073886e-05, "loss": 0.2214, "step": 24100 }, { "epoch": 0.4474552860320136, "grad_norm": 0.2675071060657501, "learning_rate": 1.1643099659650858e-05, "loss": 0.2623, "step": 24102 }, { "epoch": 0.44749241616943225, "grad_norm": 0.29089948534965515, "learning_rate": 1.1641949011870169e-05, "loss": 0.2752, "step": 24104 }, { "epoch": 0.4475295463068509, "grad_norm": 0.3934226632118225, "learning_rate": 1.164079834174747e-05, "loss": 0.4361, "step": 24106 }, { "epoch": 0.4475666764442695, "grad_norm": 0.3137322664260864, "learning_rate": 1.1639647649298423e-05, "loss": 0.2348, "step": 24108 }, { "epoch": 0.44760380658168814, "grad_norm": 0.41590625047683716, "learning_rate": 1.163849693453868e-05, "loss": 0.3274, "step": 24110 }, { "epoch": 0.4476409367191068, "grad_norm": 0.43023356795310974, "learning_rate": 1.1637346197483902e-05, "loss": 0.194, "step": 24112 }, { "epoch": 0.44767806685652545, "grad_norm": 0.441462904214859, "learning_rate": 1.1636195438149749e-05, "loss": 0.1265, "step": 24114 }, { "epoch": 0.4477151969939441, "grad_norm": 0.3871336579322815, "learning_rate": 1.1635044656551875e-05, "loss": 0.2088, "step": 24116 }, { "epoch": 0.4477523271313627, "grad_norm": 0.30690649151802063, "learning_rate": 1.1633893852705943e-05, "loss": 0.3618, "step": 24118 }, { "epoch": 0.44778945726878133, "grad_norm": 0.4032331109046936, "learning_rate": 1.1632743026627606e-05, "loss": 0.4696, "step": 24120 }, { "epoch": 0.4478265874062, "grad_norm": 0.443729430437088, "learning_rate": 1.163159217833253e-05, "loss": 0.299, "step": 24122 }, { "epoch": 0.44786371754361864, "grad_norm": 0.28050127625465393, "learning_rate": 1.1630441307836371e-05, "loss": 0.1166, "step": 24124 }, { "epoch": 0.4479008476810373, "grad_norm": 0.21925316751003265, "learning_rate": 1.1629290415154788e-05, "loss": 0.2606, "step": 24126 }, { "epoch": 0.4479379778184559, "grad_norm": 0.3937493562698364, "learning_rate": 1.1628139500303446e-05, "loss": 0.43, "step": 24128 }, { "epoch": 0.44797510795587453, "grad_norm": 0.18187883496284485, "learning_rate": 1.1626988563298e-05, "loss": 0.2802, "step": 24130 }, { "epoch": 0.44801223809329316, "grad_norm": 0.47616690397262573, "learning_rate": 1.1625837604154113e-05, "loss": 0.2522, "step": 24132 }, { "epoch": 0.44804936823071184, "grad_norm": 0.4741135835647583, "learning_rate": 1.1624686622887444e-05, "loss": 0.186, "step": 24134 }, { "epoch": 0.44808649836813047, "grad_norm": 0.3838316798210144, "learning_rate": 1.1623535619513659e-05, "loss": 0.2108, "step": 24136 }, { "epoch": 0.4481236285055491, "grad_norm": 0.2918176054954529, "learning_rate": 1.1622384594048418e-05, "loss": 0.3597, "step": 24138 }, { "epoch": 0.4481607586429677, "grad_norm": 0.33782634139060974, "learning_rate": 1.1621233546507382e-05, "loss": 0.3898, "step": 24140 }, { "epoch": 0.44819788878038636, "grad_norm": 0.25396132469177246, "learning_rate": 1.1620082476906212e-05, "loss": 0.3019, "step": 24142 }, { "epoch": 0.44823501891780504, "grad_norm": 0.5581413507461548, "learning_rate": 1.1618931385260574e-05, "loss": 0.2709, "step": 24144 }, { "epoch": 0.44827214905522367, "grad_norm": 0.3434060513973236, "learning_rate": 1.1617780271586128e-05, "loss": 0.383, "step": 24146 }, { "epoch": 0.4483092791926423, "grad_norm": 0.46190202236175537, "learning_rate": 1.1616629135898538e-05, "loss": 0.2948, "step": 24148 }, { "epoch": 0.4483464093300609, "grad_norm": 0.3322995603084564, "learning_rate": 1.1615477978213471e-05, "loss": 0.2121, "step": 24150 }, { "epoch": 0.44838353946747955, "grad_norm": 0.28766751289367676, "learning_rate": 1.1614326798546585e-05, "loss": 0.4175, "step": 24152 }, { "epoch": 0.4484206696048982, "grad_norm": 0.7717507481575012, "learning_rate": 1.1613175596913546e-05, "loss": 0.3161, "step": 24154 }, { "epoch": 0.44845779974231686, "grad_norm": 0.7002748250961304, "learning_rate": 1.161202437333002e-05, "loss": 0.3003, "step": 24156 }, { "epoch": 0.4484949298797355, "grad_norm": 0.3266995847225189, "learning_rate": 1.161087312781167e-05, "loss": 0.4411, "step": 24158 }, { "epoch": 0.4485320600171541, "grad_norm": 0.33778637647628784, "learning_rate": 1.1609721860374164e-05, "loss": 0.1441, "step": 24160 }, { "epoch": 0.44856919015457275, "grad_norm": 0.371554434299469, "learning_rate": 1.1608570571033164e-05, "loss": 0.2539, "step": 24162 }, { "epoch": 0.4486063202919914, "grad_norm": 0.37389928102493286, "learning_rate": 1.1607419259804336e-05, "loss": 0.2845, "step": 24164 }, { "epoch": 0.44864345042941006, "grad_norm": 0.2681103050708771, "learning_rate": 1.1606267926703347e-05, "loss": 0.3067, "step": 24166 }, { "epoch": 0.4486805805668287, "grad_norm": 0.34936437010765076, "learning_rate": 1.1605116571745864e-05, "loss": 0.2126, "step": 24168 }, { "epoch": 0.4487177107042473, "grad_norm": 0.339579701423645, "learning_rate": 1.160396519494755e-05, "loss": 0.253, "step": 24170 }, { "epoch": 0.44875484084166595, "grad_norm": 0.4387122690677643, "learning_rate": 1.1602813796324077e-05, "loss": 0.2073, "step": 24172 }, { "epoch": 0.4487919709790846, "grad_norm": 0.45612820982933044, "learning_rate": 1.160166237589111e-05, "loss": 0.3018, "step": 24174 }, { "epoch": 0.44882910111650326, "grad_norm": 0.3554868996143341, "learning_rate": 1.160051093366431e-05, "loss": 0.2237, "step": 24176 }, { "epoch": 0.4488662312539219, "grad_norm": 0.7070173025131226, "learning_rate": 1.1599359469659355e-05, "loss": 0.3964, "step": 24178 }, { "epoch": 0.4489033613913405, "grad_norm": 0.3109132647514343, "learning_rate": 1.1598207983891907e-05, "loss": 0.3791, "step": 24180 }, { "epoch": 0.44894049152875914, "grad_norm": 0.33284395933151245, "learning_rate": 1.1597056476377637e-05, "loss": 0.225, "step": 24182 }, { "epoch": 0.4489776216661778, "grad_norm": 0.3307919502258301, "learning_rate": 1.1595904947132209e-05, "loss": 0.2377, "step": 24184 }, { "epoch": 0.4490147518035964, "grad_norm": 0.3260999023914337, "learning_rate": 1.1594753396171295e-05, "loss": 0.345, "step": 24186 }, { "epoch": 0.4490518819410151, "grad_norm": 0.2680944502353668, "learning_rate": 1.1593601823510568e-05, "loss": 0.3512, "step": 24188 }, { "epoch": 0.4490890120784337, "grad_norm": 0.26139354705810547, "learning_rate": 1.1592450229165688e-05, "loss": 0.1844, "step": 24190 }, { "epoch": 0.44912614221585234, "grad_norm": 0.28114280104637146, "learning_rate": 1.1591298613152336e-05, "loss": 0.2858, "step": 24192 }, { "epoch": 0.44916327235327097, "grad_norm": 0.28542953729629517, "learning_rate": 1.1590146975486173e-05, "loss": 0.5018, "step": 24194 }, { "epoch": 0.4492004024906896, "grad_norm": 0.41173604130744934, "learning_rate": 1.1588995316182874e-05, "loss": 0.2398, "step": 24196 }, { "epoch": 0.4492375326281083, "grad_norm": 0.5837909579277039, "learning_rate": 1.1587843635258107e-05, "loss": 0.2741, "step": 24198 }, { "epoch": 0.4492746627655269, "grad_norm": 0.33508795499801636, "learning_rate": 1.1586691932727547e-05, "loss": 0.1162, "step": 24200 }, { "epoch": 0.44931179290294554, "grad_norm": 0.17821308970451355, "learning_rate": 1.1585540208606863e-05, "loss": 0.3627, "step": 24202 }, { "epoch": 0.44934892304036417, "grad_norm": 0.841312050819397, "learning_rate": 1.1584388462911723e-05, "loss": 0.247, "step": 24204 }, { "epoch": 0.4493860531777828, "grad_norm": 0.4229127764701843, "learning_rate": 1.1583236695657802e-05, "loss": 0.3341, "step": 24206 }, { "epoch": 0.4494231833152014, "grad_norm": 0.3443514406681061, "learning_rate": 1.1582084906860772e-05, "loss": 0.3204, "step": 24208 }, { "epoch": 0.4494603134526201, "grad_norm": 0.3607397675514221, "learning_rate": 1.1580933096536304e-05, "loss": 0.2103, "step": 24210 }, { "epoch": 0.44949744359003874, "grad_norm": 0.28506115078926086, "learning_rate": 1.1579781264700075e-05, "loss": 0.4519, "step": 24212 }, { "epoch": 0.44953457372745736, "grad_norm": 0.4430101215839386, "learning_rate": 1.1578629411367755e-05, "loss": 0.2865, "step": 24214 }, { "epoch": 0.449571703864876, "grad_norm": 0.8077531456947327, "learning_rate": 1.1577477536555014e-05, "loss": 0.4132, "step": 24216 }, { "epoch": 0.4496088340022946, "grad_norm": 0.4098680019378662, "learning_rate": 1.1576325640277532e-05, "loss": 0.3987, "step": 24218 }, { "epoch": 0.4496459641397133, "grad_norm": 0.20865680277347565, "learning_rate": 1.1575173722550978e-05, "loss": 0.1289, "step": 24220 }, { "epoch": 0.44968309427713193, "grad_norm": 0.38363873958587646, "learning_rate": 1.1574021783391027e-05, "loss": 0.2021, "step": 24222 }, { "epoch": 0.44972022441455056, "grad_norm": 0.5751428008079529, "learning_rate": 1.1572869822813356e-05, "loss": 0.5319, "step": 24224 }, { "epoch": 0.4497573545519692, "grad_norm": 0.2770703136920929, "learning_rate": 1.1571717840833638e-05, "loss": 0.4279, "step": 24226 }, { "epoch": 0.4497944846893878, "grad_norm": 0.24907010793685913, "learning_rate": 1.1570565837467546e-05, "loss": 0.2851, "step": 24228 }, { "epoch": 0.44983161482680645, "grad_norm": 0.3281336724758148, "learning_rate": 1.156941381273076e-05, "loss": 0.1999, "step": 24230 }, { "epoch": 0.44986874496422513, "grad_norm": 0.4506210684776306, "learning_rate": 1.1568261766638954e-05, "loss": 0.2597, "step": 24232 }, { "epoch": 0.44990587510164376, "grad_norm": 0.23393791913986206, "learning_rate": 1.1567109699207801e-05, "loss": 0.1556, "step": 24234 }, { "epoch": 0.4499430052390624, "grad_norm": 0.7359989285469055, "learning_rate": 1.1565957610452979e-05, "loss": 0.6043, "step": 24236 }, { "epoch": 0.449980135376481, "grad_norm": 0.22738473117351532, "learning_rate": 1.1564805500390164e-05, "loss": 0.2146, "step": 24238 }, { "epoch": 0.45001726551389964, "grad_norm": 0.3872811496257782, "learning_rate": 1.156365336903503e-05, "loss": 0.2651, "step": 24240 }, { "epoch": 0.45005439565131833, "grad_norm": 0.34296178817749023, "learning_rate": 1.1562501216403261e-05, "loss": 0.342, "step": 24242 }, { "epoch": 0.45009152578873696, "grad_norm": 0.4160754680633545, "learning_rate": 1.1561349042510532e-05, "loss": 0.3028, "step": 24244 }, { "epoch": 0.4501286559261556, "grad_norm": 0.6919189095497131, "learning_rate": 1.156019684737252e-05, "loss": 0.3078, "step": 24246 }, { "epoch": 0.4501657860635742, "grad_norm": 0.36379021406173706, "learning_rate": 1.1559044631004898e-05, "loss": 0.303, "step": 24248 }, { "epoch": 0.45020291620099284, "grad_norm": 0.35951507091522217, "learning_rate": 1.1557892393423351e-05, "loss": 0.2808, "step": 24250 }, { "epoch": 0.4502400463384115, "grad_norm": 0.46340298652648926, "learning_rate": 1.1556740134643554e-05, "loss": 0.2855, "step": 24252 }, { "epoch": 0.45027717647583015, "grad_norm": 0.33905476331710815, "learning_rate": 1.1555587854681187e-05, "loss": 0.4119, "step": 24254 }, { "epoch": 0.4503143066132488, "grad_norm": 0.5160535573959351, "learning_rate": 1.1554435553551933e-05, "loss": 0.1015, "step": 24256 }, { "epoch": 0.4503514367506674, "grad_norm": 0.3321382999420166, "learning_rate": 1.1553283231271462e-05, "loss": 0.2341, "step": 24258 }, { "epoch": 0.45038856688808604, "grad_norm": 0.4394192397594452, "learning_rate": 1.1552130887855457e-05, "loss": 0.5611, "step": 24260 }, { "epoch": 0.45042569702550467, "grad_norm": 0.7245113849639893, "learning_rate": 1.1550978523319603e-05, "loss": 0.3062, "step": 24262 }, { "epoch": 0.45046282716292335, "grad_norm": 0.40561386942863464, "learning_rate": 1.1549826137679578e-05, "loss": 0.2109, "step": 24264 }, { "epoch": 0.450499957300342, "grad_norm": 0.4951251745223999, "learning_rate": 1.154867373095106e-05, "loss": 0.2717, "step": 24266 }, { "epoch": 0.4505370874377606, "grad_norm": 0.3473818004131317, "learning_rate": 1.154752130314973e-05, "loss": 0.2167, "step": 24268 }, { "epoch": 0.45057421757517924, "grad_norm": 0.214506134390831, "learning_rate": 1.1546368854291275e-05, "loss": 0.164, "step": 24270 }, { "epoch": 0.45061134771259787, "grad_norm": 0.4057979881763458, "learning_rate": 1.154521638439137e-05, "loss": 0.1924, "step": 24272 }, { "epoch": 0.45064847785001655, "grad_norm": 0.4874143600463867, "learning_rate": 1.1544063893465695e-05, "loss": 0.4362, "step": 24274 }, { "epoch": 0.4506856079874352, "grad_norm": 0.49551358819007874, "learning_rate": 1.1542911381529937e-05, "loss": 0.351, "step": 24276 }, { "epoch": 0.4507227381248538, "grad_norm": 0.7293763756752014, "learning_rate": 1.154175884859978e-05, "loss": 0.388, "step": 24278 }, { "epoch": 0.45075986826227243, "grad_norm": 0.3577684760093689, "learning_rate": 1.15406062946909e-05, "loss": 0.3224, "step": 24280 }, { "epoch": 0.45079699839969106, "grad_norm": 0.34816378355026245, "learning_rate": 1.1539453719818985e-05, "loss": 0.2899, "step": 24282 }, { "epoch": 0.4508341285371097, "grad_norm": 0.27185338735580444, "learning_rate": 1.1538301123999713e-05, "loss": 0.2826, "step": 24284 }, { "epoch": 0.4508712586745284, "grad_norm": 0.3021487593650818, "learning_rate": 1.1537148507248774e-05, "loss": 0.3028, "step": 24286 }, { "epoch": 0.450908388811947, "grad_norm": 0.49784591794013977, "learning_rate": 1.1535995869581846e-05, "loss": 0.4266, "step": 24288 }, { "epoch": 0.45094551894936563, "grad_norm": 0.4350736737251282, "learning_rate": 1.1534843211014614e-05, "loss": 0.1592, "step": 24290 }, { "epoch": 0.45098264908678426, "grad_norm": 0.5790149569511414, "learning_rate": 1.1533690531562765e-05, "loss": 0.4709, "step": 24292 }, { "epoch": 0.4510197792242029, "grad_norm": 0.43375521898269653, "learning_rate": 1.1532537831241983e-05, "loss": 0.2505, "step": 24294 }, { "epoch": 0.45105690936162157, "grad_norm": 0.3203457295894623, "learning_rate": 1.153138511006795e-05, "loss": 0.4867, "step": 24296 }, { "epoch": 0.4510940394990402, "grad_norm": 0.30377355217933655, "learning_rate": 1.1530232368056354e-05, "loss": 0.2107, "step": 24298 }, { "epoch": 0.45113116963645883, "grad_norm": 0.28263983130455017, "learning_rate": 1.1529079605222876e-05, "loss": 0.4215, "step": 24300 }, { "epoch": 0.45116829977387746, "grad_norm": 0.41750532388687134, "learning_rate": 1.1527926821583207e-05, "loss": 0.2489, "step": 24302 }, { "epoch": 0.4512054299112961, "grad_norm": 0.5356645584106445, "learning_rate": 1.1526774017153029e-05, "loss": 0.2804, "step": 24304 }, { "epoch": 0.4512425600487147, "grad_norm": 0.5181710720062256, "learning_rate": 1.1525621191948031e-05, "loss": 0.5768, "step": 24306 }, { "epoch": 0.4512796901861334, "grad_norm": 0.3529236316680908, "learning_rate": 1.15244683459839e-05, "loss": 0.3728, "step": 24308 }, { "epoch": 0.451316820323552, "grad_norm": 0.4705997407436371, "learning_rate": 1.1523315479276318e-05, "loss": 0.4743, "step": 24310 }, { "epoch": 0.45135395046097065, "grad_norm": 0.4588668942451477, "learning_rate": 1.1522162591840976e-05, "loss": 0.2983, "step": 24312 }, { "epoch": 0.4513910805983893, "grad_norm": 0.2729796767234802, "learning_rate": 1.1521009683693563e-05, "loss": 0.1774, "step": 24314 }, { "epoch": 0.4514282107358079, "grad_norm": 0.31937137246131897, "learning_rate": 1.1519856754849761e-05, "loss": 0.1868, "step": 24316 }, { "epoch": 0.4514653408732266, "grad_norm": 0.40577009320259094, "learning_rate": 1.1518703805325264e-05, "loss": 0.3799, "step": 24318 }, { "epoch": 0.4515024710106452, "grad_norm": 0.4380381107330322, "learning_rate": 1.1517550835135756e-05, "loss": 0.4497, "step": 24320 }, { "epoch": 0.45153960114806385, "grad_norm": 0.7073315382003784, "learning_rate": 1.1516397844296925e-05, "loss": 0.4003, "step": 24322 }, { "epoch": 0.4515767312854825, "grad_norm": 0.44480425119400024, "learning_rate": 1.151524483282446e-05, "loss": 0.3039, "step": 24324 }, { "epoch": 0.4516138614229011, "grad_norm": 0.32456642389297485, "learning_rate": 1.1514091800734058e-05, "loss": 0.3176, "step": 24326 }, { "epoch": 0.4516509915603198, "grad_norm": 0.5175254940986633, "learning_rate": 1.1512938748041396e-05, "loss": 0.3516, "step": 24328 }, { "epoch": 0.4516881216977384, "grad_norm": 0.36971572041511536, "learning_rate": 1.1511785674762173e-05, "loss": 0.2398, "step": 24330 }, { "epoch": 0.45172525183515705, "grad_norm": 0.4066765308380127, "learning_rate": 1.1510632580912075e-05, "loss": 0.5428, "step": 24332 }, { "epoch": 0.4517623819725757, "grad_norm": 0.25535905361175537, "learning_rate": 1.150947946650679e-05, "loss": 0.4229, "step": 24334 }, { "epoch": 0.4517995121099943, "grad_norm": 0.37440025806427, "learning_rate": 1.1508326331562012e-05, "loss": 0.268, "step": 24336 }, { "epoch": 0.45183664224741293, "grad_norm": 0.4801817834377289, "learning_rate": 1.1507173176093432e-05, "loss": 0.3184, "step": 24338 }, { "epoch": 0.4518737723848316, "grad_norm": 0.3520878851413727, "learning_rate": 1.1506020000116739e-05, "loss": 0.2632, "step": 24340 }, { "epoch": 0.45191090252225025, "grad_norm": 0.35772955417633057, "learning_rate": 1.1504866803647622e-05, "loss": 0.2517, "step": 24342 }, { "epoch": 0.4519480326596689, "grad_norm": 0.42108869552612305, "learning_rate": 1.1503713586701777e-05, "loss": 0.2571, "step": 24344 }, { "epoch": 0.4519851627970875, "grad_norm": 0.4936422109603882, "learning_rate": 1.1502560349294896e-05, "loss": 0.2214, "step": 24346 }, { "epoch": 0.45202229293450613, "grad_norm": 0.4932944178581238, "learning_rate": 1.1501407091442667e-05, "loss": 0.3233, "step": 24348 }, { "epoch": 0.4520594230719248, "grad_norm": 0.32448112964630127, "learning_rate": 1.1500253813160786e-05, "loss": 0.2351, "step": 24350 }, { "epoch": 0.45209655320934344, "grad_norm": 0.41791751980781555, "learning_rate": 1.1499100514464947e-05, "loss": 0.3427, "step": 24352 }, { "epoch": 0.45213368334676207, "grad_norm": 0.4049769937992096, "learning_rate": 1.1497947195370835e-05, "loss": 0.3262, "step": 24354 }, { "epoch": 0.4521708134841807, "grad_norm": 0.38210293650627136, "learning_rate": 1.1496793855894151e-05, "loss": 0.1533, "step": 24356 }, { "epoch": 0.45220794362159933, "grad_norm": 0.4687541425228119, "learning_rate": 1.1495640496050588e-05, "loss": 0.3664, "step": 24358 }, { "epoch": 0.45224507375901796, "grad_norm": 0.5578746199607849, "learning_rate": 1.1494487115855834e-05, "loss": 0.4291, "step": 24360 }, { "epoch": 0.45228220389643664, "grad_norm": 0.34367769956588745, "learning_rate": 1.149333371532559e-05, "loss": 0.4349, "step": 24362 }, { "epoch": 0.45231933403385527, "grad_norm": 0.30054235458374023, "learning_rate": 1.1492180294475548e-05, "loss": 0.3398, "step": 24364 }, { "epoch": 0.4523564641712739, "grad_norm": 0.7804811596870422, "learning_rate": 1.1491026853321398e-05, "loss": 0.3537, "step": 24366 }, { "epoch": 0.4523935943086925, "grad_norm": 0.24323876202106476, "learning_rate": 1.148987339187884e-05, "loss": 0.2364, "step": 24368 }, { "epoch": 0.45243072444611115, "grad_norm": 0.5521492958068848, "learning_rate": 1.1488719910163569e-05, "loss": 0.5575, "step": 24370 }, { "epoch": 0.45246785458352984, "grad_norm": 0.3839251399040222, "learning_rate": 1.148756640819128e-05, "loss": 0.2073, "step": 24372 }, { "epoch": 0.45250498472094847, "grad_norm": 0.2829572856426239, "learning_rate": 1.1486412885977664e-05, "loss": 0.4, "step": 24374 }, { "epoch": 0.4525421148583671, "grad_norm": 0.45137205719947815, "learning_rate": 1.1485259343538426e-05, "loss": 0.5329, "step": 24376 }, { "epoch": 0.4525792449957857, "grad_norm": 0.4369995594024658, "learning_rate": 1.1484105780889253e-05, "loss": 0.2938, "step": 24378 }, { "epoch": 0.45261637513320435, "grad_norm": 0.2578050494194031, "learning_rate": 1.1482952198045846e-05, "loss": 0.2111, "step": 24380 }, { "epoch": 0.452653505270623, "grad_norm": 0.4284721612930298, "learning_rate": 1.1481798595023905e-05, "loss": 0.226, "step": 24382 }, { "epoch": 0.45269063540804166, "grad_norm": 0.7003005146980286, "learning_rate": 1.1480644971839124e-05, "loss": 0.3373, "step": 24384 }, { "epoch": 0.4527277655454603, "grad_norm": 0.34780243039131165, "learning_rate": 1.1479491328507195e-05, "loss": 0.3447, "step": 24386 }, { "epoch": 0.4527648956828789, "grad_norm": 0.4656771719455719, "learning_rate": 1.1478337665043822e-05, "loss": 0.3761, "step": 24388 }, { "epoch": 0.45280202582029755, "grad_norm": 0.3914140462875366, "learning_rate": 1.1477183981464704e-05, "loss": 0.1832, "step": 24390 }, { "epoch": 0.4528391559577162, "grad_norm": 0.4113878309726715, "learning_rate": 1.1476030277785534e-05, "loss": 0.3029, "step": 24392 }, { "epoch": 0.45287628609513486, "grad_norm": 0.3519241213798523, "learning_rate": 1.1474876554022016e-05, "loss": 0.3168, "step": 24394 }, { "epoch": 0.4529134162325535, "grad_norm": 0.2517205774784088, "learning_rate": 1.1473722810189845e-05, "loss": 0.2098, "step": 24396 }, { "epoch": 0.4529505463699721, "grad_norm": 0.562839925289154, "learning_rate": 1.147256904630472e-05, "loss": 0.1431, "step": 24398 }, { "epoch": 0.45298767650739075, "grad_norm": 0.4227277636528015, "learning_rate": 1.147141526238234e-05, "loss": 0.2299, "step": 24400 }, { "epoch": 0.4530248066448094, "grad_norm": 0.24431215226650238, "learning_rate": 1.1470261458438408e-05, "loss": 0.1003, "step": 24402 }, { "epoch": 0.45306193678222806, "grad_norm": 0.3886420428752899, "learning_rate": 1.146910763448862e-05, "loss": 0.3595, "step": 24404 }, { "epoch": 0.4530990669196467, "grad_norm": 0.5953771471977234, "learning_rate": 1.1467953790548678e-05, "loss": 0.2306, "step": 24406 }, { "epoch": 0.4531361970570653, "grad_norm": 0.410057932138443, "learning_rate": 1.1466799926634284e-05, "loss": 0.2469, "step": 24408 }, { "epoch": 0.45317332719448394, "grad_norm": 0.3346385955810547, "learning_rate": 1.1465646042761134e-05, "loss": 0.4124, "step": 24410 }, { "epoch": 0.45321045733190257, "grad_norm": 0.5741729140281677, "learning_rate": 1.146449213894493e-05, "loss": 0.227, "step": 24412 }, { "epoch": 0.4532475874693212, "grad_norm": 0.4406915307044983, "learning_rate": 1.1463338215201378e-05, "loss": 0.388, "step": 24414 }, { "epoch": 0.4532847176067399, "grad_norm": 0.5334178805351257, "learning_rate": 1.1462184271546177e-05, "loss": 0.4765, "step": 24416 }, { "epoch": 0.4533218477441585, "grad_norm": 0.4429932236671448, "learning_rate": 1.1461030307995023e-05, "loss": 0.1941, "step": 24418 }, { "epoch": 0.45335897788157714, "grad_norm": 0.51790452003479, "learning_rate": 1.1459876324563628e-05, "loss": 0.2628, "step": 24420 }, { "epoch": 0.45339610801899577, "grad_norm": 0.4175902009010315, "learning_rate": 1.1458722321267685e-05, "loss": 0.2257, "step": 24422 }, { "epoch": 0.4534332381564144, "grad_norm": 0.2585659623146057, "learning_rate": 1.1457568298122903e-05, "loss": 0.1457, "step": 24424 }, { "epoch": 0.4534703682938331, "grad_norm": 0.31611302495002747, "learning_rate": 1.1456414255144981e-05, "loss": 0.2361, "step": 24426 }, { "epoch": 0.4535074984312517, "grad_norm": 0.5543726682662964, "learning_rate": 1.1455260192349625e-05, "loss": 0.1923, "step": 24428 }, { "epoch": 0.45354462856867034, "grad_norm": 1.5112606287002563, "learning_rate": 1.1454106109752535e-05, "loss": 0.3767, "step": 24430 }, { "epoch": 0.45358175870608897, "grad_norm": 0.3529212474822998, "learning_rate": 1.1452952007369416e-05, "loss": 0.428, "step": 24432 }, { "epoch": 0.4536188888435076, "grad_norm": 0.2337718904018402, "learning_rate": 1.1451797885215974e-05, "loss": 0.2674, "step": 24434 }, { "epoch": 0.4536560189809262, "grad_norm": 0.566755473613739, "learning_rate": 1.1450643743307913e-05, "loss": 0.1397, "step": 24436 }, { "epoch": 0.4536931491183449, "grad_norm": 0.38359037041664124, "learning_rate": 1.1449489581660931e-05, "loss": 0.3229, "step": 24438 }, { "epoch": 0.45373027925576354, "grad_norm": 0.2971417009830475, "learning_rate": 1.1448335400290741e-05, "loss": 0.3266, "step": 24440 }, { "epoch": 0.45376740939318216, "grad_norm": 0.47858765721321106, "learning_rate": 1.1447181199213043e-05, "loss": 0.1577, "step": 24442 }, { "epoch": 0.4538045395306008, "grad_norm": 0.44255489110946655, "learning_rate": 1.1446026978443542e-05, "loss": 0.2982, "step": 24444 }, { "epoch": 0.4538416696680194, "grad_norm": 0.40738019347190857, "learning_rate": 1.144487273799795e-05, "loss": 0.3056, "step": 24446 }, { "epoch": 0.4538787998054381, "grad_norm": 0.329432874917984, "learning_rate": 1.1443718477891968e-05, "loss": 0.2561, "step": 24448 }, { "epoch": 0.45391592994285673, "grad_norm": 0.391357421875, "learning_rate": 1.1442564198141297e-05, "loss": 0.1623, "step": 24450 }, { "epoch": 0.45395306008027536, "grad_norm": 0.5092080235481262, "learning_rate": 1.1441409898761652e-05, "loss": 0.2887, "step": 24452 }, { "epoch": 0.453990190217694, "grad_norm": 0.3103174865245819, "learning_rate": 1.1440255579768733e-05, "loss": 0.4682, "step": 24454 }, { "epoch": 0.4540273203551126, "grad_norm": 0.3614620566368103, "learning_rate": 1.1439101241178252e-05, "loss": 0.191, "step": 24456 }, { "epoch": 0.45406445049253125, "grad_norm": 1.1077080965042114, "learning_rate": 1.1437946883005915e-05, "loss": 0.4011, "step": 24458 }, { "epoch": 0.45410158062994993, "grad_norm": 0.4501185715198517, "learning_rate": 1.1436792505267425e-05, "loss": 0.348, "step": 24460 }, { "epoch": 0.45413871076736856, "grad_norm": 0.33334261178970337, "learning_rate": 1.1435638107978491e-05, "loss": 0.1522, "step": 24462 }, { "epoch": 0.4541758409047872, "grad_norm": 0.3686613142490387, "learning_rate": 1.1434483691154828e-05, "loss": 0.1669, "step": 24464 }, { "epoch": 0.4542129710422058, "grad_norm": 0.4061455726623535, "learning_rate": 1.1433329254812133e-05, "loss": 0.1999, "step": 24466 }, { "epoch": 0.45425010117962444, "grad_norm": 0.2769937515258789, "learning_rate": 1.1432174798966124e-05, "loss": 0.1557, "step": 24468 }, { "epoch": 0.4542872313170431, "grad_norm": 0.4050350487232208, "learning_rate": 1.1431020323632505e-05, "loss": 0.5059, "step": 24470 }, { "epoch": 0.45432436145446176, "grad_norm": 0.4567606747150421, "learning_rate": 1.1429865828826987e-05, "loss": 0.2729, "step": 24472 }, { "epoch": 0.4543614915918804, "grad_norm": 0.48036959767341614, "learning_rate": 1.1428711314565275e-05, "loss": 0.4366, "step": 24474 }, { "epoch": 0.454398621729299, "grad_norm": 0.2840084433555603, "learning_rate": 1.1427556780863083e-05, "loss": 0.457, "step": 24476 }, { "epoch": 0.45443575186671764, "grad_norm": 0.2606395184993744, "learning_rate": 1.1426402227736123e-05, "loss": 0.4134, "step": 24478 }, { "epoch": 0.4544728820041363, "grad_norm": 0.3812691867351532, "learning_rate": 1.1425247655200096e-05, "loss": 0.2857, "step": 24480 }, { "epoch": 0.45451001214155495, "grad_norm": 0.3884766101837158, "learning_rate": 1.1424093063270719e-05, "loss": 0.3528, "step": 24482 }, { "epoch": 0.4545471422789736, "grad_norm": 0.21928803622722626, "learning_rate": 1.1422938451963702e-05, "loss": 0.1583, "step": 24484 }, { "epoch": 0.4545842724163922, "grad_norm": 0.273173451423645, "learning_rate": 1.1421783821294752e-05, "loss": 0.429, "step": 24486 }, { "epoch": 0.45462140255381084, "grad_norm": 0.358315110206604, "learning_rate": 1.1420629171279587e-05, "loss": 0.2361, "step": 24488 }, { "epoch": 0.45465853269122947, "grad_norm": 0.3807966113090515, "learning_rate": 1.1419474501933912e-05, "loss": 0.3653, "step": 24490 }, { "epoch": 0.45469566282864815, "grad_norm": 0.35825005173683167, "learning_rate": 1.141831981327344e-05, "loss": 0.4645, "step": 24492 }, { "epoch": 0.4547327929660668, "grad_norm": 0.6739867925643921, "learning_rate": 1.1417165105313884e-05, "loss": 0.1992, "step": 24494 }, { "epoch": 0.4547699231034854, "grad_norm": 0.29355382919311523, "learning_rate": 1.1416010378070958e-05, "loss": 0.2054, "step": 24496 }, { "epoch": 0.45480705324090404, "grad_norm": 0.529712438583374, "learning_rate": 1.141485563156037e-05, "loss": 0.3194, "step": 24498 }, { "epoch": 0.45484418337832266, "grad_norm": 0.454432874917984, "learning_rate": 1.1413700865797836e-05, "loss": 0.1373, "step": 24500 }, { "epoch": 0.45488131351574135, "grad_norm": 0.3998640775680542, "learning_rate": 1.141254608079907e-05, "loss": 0.2565, "step": 24502 }, { "epoch": 0.45491844365316, "grad_norm": 0.32677263021469116, "learning_rate": 1.141139127657978e-05, "loss": 0.1635, "step": 24504 }, { "epoch": 0.4549555737905786, "grad_norm": 0.35489946603775024, "learning_rate": 1.1410236453155681e-05, "loss": 0.2178, "step": 24506 }, { "epoch": 0.45499270392799723, "grad_norm": 0.44876015186309814, "learning_rate": 1.1409081610542489e-05, "loss": 0.4058, "step": 24508 }, { "epoch": 0.45502983406541586, "grad_norm": 0.32283416390419006, "learning_rate": 1.140792674875592e-05, "loss": 0.527, "step": 24510 }, { "epoch": 0.4550669642028345, "grad_norm": 0.31632810831069946, "learning_rate": 1.1406771867811682e-05, "loss": 0.3693, "step": 24512 }, { "epoch": 0.4551040943402532, "grad_norm": 0.32010865211486816, "learning_rate": 1.1405616967725493e-05, "loss": 0.3847, "step": 24514 }, { "epoch": 0.4551412244776718, "grad_norm": 0.28043118119239807, "learning_rate": 1.1404462048513067e-05, "loss": 0.2551, "step": 24516 }, { "epoch": 0.45517835461509043, "grad_norm": 0.3210687041282654, "learning_rate": 1.140330711019012e-05, "loss": 0.3279, "step": 24518 }, { "epoch": 0.45521548475250906, "grad_norm": 0.5838424563407898, "learning_rate": 1.1402152152772368e-05, "loss": 0.3853, "step": 24520 }, { "epoch": 0.4552526148899277, "grad_norm": 0.37957724928855896, "learning_rate": 1.1400997176275525e-05, "loss": 0.4725, "step": 24522 }, { "epoch": 0.45528974502734637, "grad_norm": 0.6297940611839294, "learning_rate": 1.1399842180715306e-05, "loss": 0.2272, "step": 24524 }, { "epoch": 0.455326875164765, "grad_norm": 0.5752727389335632, "learning_rate": 1.1398687166107424e-05, "loss": 0.3823, "step": 24526 }, { "epoch": 0.4553640053021836, "grad_norm": 0.25575560331344604, "learning_rate": 1.1397532132467606e-05, "loss": 0.2138, "step": 24528 }, { "epoch": 0.45540113543960226, "grad_norm": 0.4276936948299408, "learning_rate": 1.1396377079811558e-05, "loss": 0.3427, "step": 24530 }, { "epoch": 0.4554382655770209, "grad_norm": 0.3437599241733551, "learning_rate": 1.1395222008155002e-05, "loss": 0.2192, "step": 24532 }, { "epoch": 0.4554753957144395, "grad_norm": 0.28980952501296997, "learning_rate": 1.1394066917513656e-05, "loss": 0.3994, "step": 24534 }, { "epoch": 0.4555125258518582, "grad_norm": 0.5903273224830627, "learning_rate": 1.139291180790323e-05, "loss": 0.2171, "step": 24536 }, { "epoch": 0.4555496559892768, "grad_norm": 0.25357452034950256, "learning_rate": 1.1391756679339448e-05, "loss": 0.2338, "step": 24538 }, { "epoch": 0.45558678612669545, "grad_norm": 0.41084229946136475, "learning_rate": 1.1390601531838029e-05, "loss": 0.1131, "step": 24540 }, { "epoch": 0.4556239162641141, "grad_norm": 0.3623978793621063, "learning_rate": 1.1389446365414685e-05, "loss": 0.2403, "step": 24542 }, { "epoch": 0.4556610464015327, "grad_norm": 0.36783409118652344, "learning_rate": 1.1388291180085138e-05, "loss": 0.3034, "step": 24544 }, { "epoch": 0.4556981765389514, "grad_norm": 0.3583255708217621, "learning_rate": 1.1387135975865109e-05, "loss": 0.2518, "step": 24546 }, { "epoch": 0.45573530667637, "grad_norm": 0.41535913944244385, "learning_rate": 1.1385980752770314e-05, "loss": 0.3909, "step": 24548 }, { "epoch": 0.45577243681378865, "grad_norm": 0.38567739725112915, "learning_rate": 1.1384825510816468e-05, "loss": 0.3593, "step": 24550 }, { "epoch": 0.4558095669512073, "grad_norm": 0.3696776330471039, "learning_rate": 1.1383670250019302e-05, "loss": 0.3823, "step": 24552 }, { "epoch": 0.4558466970886259, "grad_norm": 0.2262435406446457, "learning_rate": 1.1382514970394524e-05, "loss": 0.3393, "step": 24554 }, { "epoch": 0.4558838272260446, "grad_norm": 0.23637531697750092, "learning_rate": 1.138135967195786e-05, "loss": 0.2319, "step": 24556 }, { "epoch": 0.4559209573634632, "grad_norm": 0.43223726749420166, "learning_rate": 1.1380204354725024e-05, "loss": 0.3389, "step": 24558 }, { "epoch": 0.45595808750088185, "grad_norm": 0.3760911226272583, "learning_rate": 1.1379049018711747e-05, "loss": 0.3885, "step": 24560 }, { "epoch": 0.4559952176383005, "grad_norm": 0.5189329981803894, "learning_rate": 1.1377893663933739e-05, "loss": 0.3595, "step": 24562 }, { "epoch": 0.4560323477757191, "grad_norm": 0.4941771328449249, "learning_rate": 1.137673829040673e-05, "loss": 0.2323, "step": 24564 }, { "epoch": 0.45606947791313773, "grad_norm": 0.3823826014995575, "learning_rate": 1.1375582898146436e-05, "loss": 0.404, "step": 24566 }, { "epoch": 0.4561066080505564, "grad_norm": 0.42866823077201843, "learning_rate": 1.1374427487168575e-05, "loss": 0.4135, "step": 24568 }, { "epoch": 0.45614373818797505, "grad_norm": 0.4048547148704529, "learning_rate": 1.1373272057488875e-05, "loss": 0.2561, "step": 24570 }, { "epoch": 0.4561808683253937, "grad_norm": 0.3053908348083496, "learning_rate": 1.1372116609123056e-05, "loss": 0.3646, "step": 24572 }, { "epoch": 0.4562179984628123, "grad_norm": 0.36771899461746216, "learning_rate": 1.1370961142086843e-05, "loss": 0.3279, "step": 24574 }, { "epoch": 0.45625512860023093, "grad_norm": 0.47602155804634094, "learning_rate": 1.1369805656395949e-05, "loss": 0.3204, "step": 24576 }, { "epoch": 0.4562922587376496, "grad_norm": 0.33591604232788086, "learning_rate": 1.1368650152066109e-05, "loss": 0.3125, "step": 24578 }, { "epoch": 0.45632938887506824, "grad_norm": 0.37706390023231506, "learning_rate": 1.1367494629113036e-05, "loss": 0.2787, "step": 24580 }, { "epoch": 0.45636651901248687, "grad_norm": 0.44107377529144287, "learning_rate": 1.1366339087552458e-05, "loss": 0.3419, "step": 24582 }, { "epoch": 0.4564036491499055, "grad_norm": 0.3912048935890198, "learning_rate": 1.1365183527400099e-05, "loss": 0.2214, "step": 24584 }, { "epoch": 0.4564407792873241, "grad_norm": 0.4682738482952118, "learning_rate": 1.136402794867168e-05, "loss": 0.4859, "step": 24586 }, { "epoch": 0.45647790942474276, "grad_norm": 0.3997812569141388, "learning_rate": 1.1362872351382927e-05, "loss": 0.214, "step": 24588 }, { "epoch": 0.45651503956216144, "grad_norm": 0.4531441628932953, "learning_rate": 1.1361716735549565e-05, "loss": 0.1835, "step": 24590 }, { "epoch": 0.45655216969958007, "grad_norm": 0.38408786058425903, "learning_rate": 1.1360561101187316e-05, "loss": 0.2306, "step": 24592 }, { "epoch": 0.4565892998369987, "grad_norm": 0.3898982107639313, "learning_rate": 1.1359405448311905e-05, "loss": 0.3398, "step": 24594 }, { "epoch": 0.4566264299744173, "grad_norm": 0.3453042507171631, "learning_rate": 1.1358249776939056e-05, "loss": 0.2156, "step": 24596 }, { "epoch": 0.45666356011183595, "grad_norm": 0.28083473443984985, "learning_rate": 1.13570940870845e-05, "loss": 0.3232, "step": 24598 }, { "epoch": 0.45670069024925464, "grad_norm": 0.3714282512664795, "learning_rate": 1.1355938378763955e-05, "loss": 0.0918, "step": 24600 }, { "epoch": 0.45673782038667327, "grad_norm": 0.41268599033355713, "learning_rate": 1.135478265199315e-05, "loss": 0.169, "step": 24602 }, { "epoch": 0.4567749505240919, "grad_norm": 0.4764304757118225, "learning_rate": 1.1353626906787815e-05, "loss": 0.1995, "step": 24604 }, { "epoch": 0.4568120806615105, "grad_norm": 0.6143215894699097, "learning_rate": 1.1352471143163673e-05, "loss": 0.2432, "step": 24606 }, { "epoch": 0.45684921079892915, "grad_norm": 0.4299054443836212, "learning_rate": 1.1351315361136443e-05, "loss": 0.1698, "step": 24608 }, { "epoch": 0.4568863409363478, "grad_norm": 0.4971691370010376, "learning_rate": 1.1350159560721865e-05, "loss": 0.4941, "step": 24610 }, { "epoch": 0.45692347107376646, "grad_norm": 0.4644004702568054, "learning_rate": 1.1349003741935656e-05, "loss": 0.2069, "step": 24612 }, { "epoch": 0.4569606012111851, "grad_norm": 0.3718923032283783, "learning_rate": 1.1347847904793547e-05, "loss": 0.509, "step": 24614 }, { "epoch": 0.4569977313486037, "grad_norm": 0.38594526052474976, "learning_rate": 1.1346692049311267e-05, "loss": 0.3381, "step": 24616 }, { "epoch": 0.45703486148602235, "grad_norm": 0.46163222193717957, "learning_rate": 1.1345536175504544e-05, "loss": 0.256, "step": 24618 }, { "epoch": 0.457071991623441, "grad_norm": 0.4183988571166992, "learning_rate": 1.13443802833891e-05, "loss": 0.2962, "step": 24620 }, { "epoch": 0.45710912176085966, "grad_norm": 0.27365225553512573, "learning_rate": 1.1343224372980668e-05, "loss": 0.2902, "step": 24622 }, { "epoch": 0.4571462518982783, "grad_norm": 0.4900434613227844, "learning_rate": 1.1342068444294976e-05, "loss": 0.3922, "step": 24624 }, { "epoch": 0.4571833820356969, "grad_norm": 0.3156397342681885, "learning_rate": 1.1340912497347753e-05, "loss": 0.2715, "step": 24626 }, { "epoch": 0.45722051217311555, "grad_norm": 0.44852909445762634, "learning_rate": 1.1339756532154728e-05, "loss": 0.2205, "step": 24628 }, { "epoch": 0.4572576423105342, "grad_norm": 0.3294508159160614, "learning_rate": 1.1338600548731628e-05, "loss": 0.2872, "step": 24630 }, { "epoch": 0.45729477244795286, "grad_norm": 0.5000939965248108, "learning_rate": 1.1337444547094186e-05, "loss": 0.2379, "step": 24632 }, { "epoch": 0.4573319025853715, "grad_norm": 0.5787004232406616, "learning_rate": 1.1336288527258125e-05, "loss": 0.3117, "step": 24634 }, { "epoch": 0.4573690327227901, "grad_norm": 0.5235352516174316, "learning_rate": 1.1335132489239187e-05, "loss": 0.3567, "step": 24636 }, { "epoch": 0.45740616286020874, "grad_norm": 0.3239721953868866, "learning_rate": 1.1333976433053092e-05, "loss": 0.2903, "step": 24638 }, { "epoch": 0.45744329299762737, "grad_norm": 0.5485575795173645, "learning_rate": 1.1332820358715572e-05, "loss": 0.4664, "step": 24640 }, { "epoch": 0.457480423135046, "grad_norm": 0.26542019844055176, "learning_rate": 1.133166426624236e-05, "loss": 0.5019, "step": 24642 }, { "epoch": 0.4575175532724647, "grad_norm": 0.3433041274547577, "learning_rate": 1.1330508155649187e-05, "loss": 0.1303, "step": 24644 }, { "epoch": 0.4575546834098833, "grad_norm": 0.5400921702384949, "learning_rate": 1.1329352026951781e-05, "loss": 0.3413, "step": 24646 }, { "epoch": 0.45759181354730194, "grad_norm": 0.3454822897911072, "learning_rate": 1.132819588016588e-05, "loss": 0.4525, "step": 24648 }, { "epoch": 0.45762894368472057, "grad_norm": 0.29039466381073, "learning_rate": 1.1327039715307208e-05, "loss": 0.3117, "step": 24650 }, { "epoch": 0.4576660738221392, "grad_norm": 0.46030300855636597, "learning_rate": 1.13258835323915e-05, "loss": 0.3963, "step": 24652 }, { "epoch": 0.4577032039595579, "grad_norm": 0.35532301664352417, "learning_rate": 1.1324727331434491e-05, "loss": 0.2821, "step": 24654 }, { "epoch": 0.4577403340969765, "grad_norm": 0.30830031633377075, "learning_rate": 1.132357111245191e-05, "loss": 0.3755, "step": 24656 }, { "epoch": 0.45777746423439514, "grad_norm": 0.3656231462955475, "learning_rate": 1.1322414875459492e-05, "loss": 0.2861, "step": 24658 }, { "epoch": 0.45781459437181377, "grad_norm": 0.39429420232772827, "learning_rate": 1.1321258620472969e-05, "loss": 0.4696, "step": 24660 }, { "epoch": 0.4578517245092324, "grad_norm": 0.577022910118103, "learning_rate": 1.1320102347508068e-05, "loss": 0.3116, "step": 24662 }, { "epoch": 0.457888854646651, "grad_norm": 0.6878483891487122, "learning_rate": 1.1318946056580534e-05, "loss": 0.5136, "step": 24664 }, { "epoch": 0.4579259847840697, "grad_norm": 0.32243695855140686, "learning_rate": 1.1317789747706093e-05, "loss": 0.3957, "step": 24666 }, { "epoch": 0.45796311492148833, "grad_norm": 0.29694223403930664, "learning_rate": 1.131663342090048e-05, "loss": 0.4932, "step": 24668 }, { "epoch": 0.45800024505890696, "grad_norm": 0.39853808283805847, "learning_rate": 1.131547707617943e-05, "loss": 0.3442, "step": 24670 }, { "epoch": 0.4580373751963256, "grad_norm": 0.3936103582382202, "learning_rate": 1.1314320713558682e-05, "loss": 0.1655, "step": 24672 }, { "epoch": 0.4580745053337442, "grad_norm": 0.382458359003067, "learning_rate": 1.1313164333053962e-05, "loss": 0.45, "step": 24674 }, { "epoch": 0.4581116354711629, "grad_norm": 0.35554081201553345, "learning_rate": 1.1312007934681006e-05, "loss": 0.3633, "step": 24676 }, { "epoch": 0.45814876560858153, "grad_norm": 0.3385803997516632, "learning_rate": 1.1310851518455555e-05, "loss": 0.4222, "step": 24678 }, { "epoch": 0.45818589574600016, "grad_norm": 0.3930008113384247, "learning_rate": 1.1309695084393342e-05, "loss": 0.2888, "step": 24680 }, { "epoch": 0.4582230258834188, "grad_norm": 0.42606255412101746, "learning_rate": 1.1308538632510099e-05, "loss": 0.3203, "step": 24682 }, { "epoch": 0.4582601560208374, "grad_norm": 0.4616042375564575, "learning_rate": 1.1307382162821568e-05, "loss": 0.2457, "step": 24684 }, { "epoch": 0.45829728615825605, "grad_norm": 0.3962433636188507, "learning_rate": 1.130622567534348e-05, "loss": 0.3392, "step": 24686 }, { "epoch": 0.45833441629567473, "grad_norm": 0.32231810688972473, "learning_rate": 1.1305069170091575e-05, "loss": 0.203, "step": 24688 }, { "epoch": 0.45837154643309336, "grad_norm": 0.6415740847587585, "learning_rate": 1.1303912647081585e-05, "loss": 0.1647, "step": 24690 }, { "epoch": 0.458408676570512, "grad_norm": 0.38293391466140747, "learning_rate": 1.1302756106329253e-05, "loss": 0.099, "step": 24692 }, { "epoch": 0.4584458067079306, "grad_norm": 0.3291141390800476, "learning_rate": 1.130159954785031e-05, "loss": 0.3832, "step": 24694 }, { "epoch": 0.45848293684534924, "grad_norm": 0.3407718539237976, "learning_rate": 1.1300442971660494e-05, "loss": 0.3806, "step": 24696 }, { "epoch": 0.4585200669827679, "grad_norm": 0.41238492727279663, "learning_rate": 1.129928637777555e-05, "loss": 0.3569, "step": 24698 }, { "epoch": 0.45855719712018655, "grad_norm": 0.44277670979499817, "learning_rate": 1.1298129766211205e-05, "loss": 0.2663, "step": 24700 }, { "epoch": 0.4585943272576052, "grad_norm": 0.54229736328125, "learning_rate": 1.1296973136983205e-05, "loss": 0.5922, "step": 24702 }, { "epoch": 0.4586314573950238, "grad_norm": 0.31878790259361267, "learning_rate": 1.1295816490107287e-05, "loss": 0.4151, "step": 24704 }, { "epoch": 0.45866858753244244, "grad_norm": 0.40891537070274353, "learning_rate": 1.1294659825599185e-05, "loss": 0.2835, "step": 24706 }, { "epoch": 0.4587057176698611, "grad_norm": 0.4385741949081421, "learning_rate": 1.129350314347464e-05, "loss": 0.253, "step": 24708 }, { "epoch": 0.45874284780727975, "grad_norm": 0.3405691683292389, "learning_rate": 1.1292346443749395e-05, "loss": 0.2806, "step": 24710 }, { "epoch": 0.4587799779446984, "grad_norm": 0.32953062653541565, "learning_rate": 1.1291189726439184e-05, "loss": 0.2548, "step": 24712 }, { "epoch": 0.458817108082117, "grad_norm": 0.3323723077774048, "learning_rate": 1.1290032991559748e-05, "loss": 0.4626, "step": 24714 }, { "epoch": 0.45885423821953564, "grad_norm": 0.37523239850997925, "learning_rate": 1.1288876239126827e-05, "loss": 0.2649, "step": 24716 }, { "epoch": 0.45889136835695427, "grad_norm": 0.6160756349563599, "learning_rate": 1.1287719469156161e-05, "loss": 0.2367, "step": 24718 }, { "epoch": 0.45892849849437295, "grad_norm": 0.2867957055568695, "learning_rate": 1.128656268166349e-05, "loss": 0.1872, "step": 24720 }, { "epoch": 0.4589656286317916, "grad_norm": 0.3562508523464203, "learning_rate": 1.1285405876664558e-05, "loss": 0.1574, "step": 24722 }, { "epoch": 0.4590027587692102, "grad_norm": 0.241835817694664, "learning_rate": 1.1284249054175101e-05, "loss": 0.1733, "step": 24724 }, { "epoch": 0.45903988890662883, "grad_norm": 0.47047892212867737, "learning_rate": 1.1283092214210858e-05, "loss": 0.2462, "step": 24726 }, { "epoch": 0.45907701904404746, "grad_norm": 0.4878137707710266, "learning_rate": 1.1281935356787574e-05, "loss": 0.5055, "step": 24728 }, { "epoch": 0.45911414918146615, "grad_norm": 0.5731932520866394, "learning_rate": 1.1280778481920993e-05, "loss": 0.2046, "step": 24730 }, { "epoch": 0.4591512793188848, "grad_norm": 0.8377817273139954, "learning_rate": 1.1279621589626852e-05, "loss": 0.2873, "step": 24732 }, { "epoch": 0.4591884094563034, "grad_norm": 0.2459387481212616, "learning_rate": 1.127846467992089e-05, "loss": 0.2742, "step": 24734 }, { "epoch": 0.45922553959372203, "grad_norm": 0.3931157886981964, "learning_rate": 1.127730775281886e-05, "loss": 0.4358, "step": 24736 }, { "epoch": 0.45926266973114066, "grad_norm": 0.46490150690078735, "learning_rate": 1.1276150808336493e-05, "loss": 0.2828, "step": 24738 }, { "epoch": 0.4592997998685593, "grad_norm": 0.36766037344932556, "learning_rate": 1.1274993846489535e-05, "loss": 0.1467, "step": 24740 }, { "epoch": 0.459336930005978, "grad_norm": 0.4598161578178406, "learning_rate": 1.1273836867293732e-05, "loss": 0.1934, "step": 24742 }, { "epoch": 0.4593740601433966, "grad_norm": 0.42807143926620483, "learning_rate": 1.1272679870764827e-05, "loss": 0.3257, "step": 24744 }, { "epoch": 0.45941119028081523, "grad_norm": 0.22443221509456635, "learning_rate": 1.1271522856918556e-05, "loss": 0.3506, "step": 24746 }, { "epoch": 0.45944832041823386, "grad_norm": 0.32144659757614136, "learning_rate": 1.1270365825770671e-05, "loss": 0.2601, "step": 24748 }, { "epoch": 0.4594854505556525, "grad_norm": 0.4392704367637634, "learning_rate": 1.126920877733691e-05, "loss": 0.3422, "step": 24750 }, { "epoch": 0.45952258069307117, "grad_norm": 0.41745316982269287, "learning_rate": 1.1268051711633019e-05, "loss": 0.3134, "step": 24752 }, { "epoch": 0.4595597108304898, "grad_norm": 5.275430679321289, "learning_rate": 1.1266894628674746e-05, "loss": 0.2722, "step": 24754 }, { "epoch": 0.4595968409679084, "grad_norm": 0.32729780673980713, "learning_rate": 1.1265737528477831e-05, "loss": 0.3244, "step": 24756 }, { "epoch": 0.45963397110532705, "grad_norm": 0.5418132543563843, "learning_rate": 1.1264580411058017e-05, "loss": 0.258, "step": 24758 }, { "epoch": 0.4596711012427457, "grad_norm": 0.35150980949401855, "learning_rate": 1.1263423276431051e-05, "loss": 0.1977, "step": 24760 }, { "epoch": 0.4597082313801643, "grad_norm": 0.3756154477596283, "learning_rate": 1.1262266124612684e-05, "loss": 0.3478, "step": 24762 }, { "epoch": 0.459745361517583, "grad_norm": 0.32415974140167236, "learning_rate": 1.1261108955618654e-05, "loss": 0.2035, "step": 24764 }, { "epoch": 0.4597824916550016, "grad_norm": 0.4630662798881531, "learning_rate": 1.1259951769464706e-05, "loss": 0.2426, "step": 24766 }, { "epoch": 0.45981962179242025, "grad_norm": 0.2896573543548584, "learning_rate": 1.1258794566166588e-05, "loss": 0.2515, "step": 24768 }, { "epoch": 0.4598567519298389, "grad_norm": 0.4056151211261749, "learning_rate": 1.1257637345740048e-05, "loss": 0.1871, "step": 24770 }, { "epoch": 0.4598938820672575, "grad_norm": 0.20932339131832123, "learning_rate": 1.1256480108200829e-05, "loss": 0.067, "step": 24772 }, { "epoch": 0.4599310122046762, "grad_norm": 0.379909873008728, "learning_rate": 1.1255322853564683e-05, "loss": 0.2877, "step": 24774 }, { "epoch": 0.4599681423420948, "grad_norm": 0.31244438886642456, "learning_rate": 1.1254165581847352e-05, "loss": 0.1886, "step": 24776 }, { "epoch": 0.46000527247951345, "grad_norm": 0.4667509198188782, "learning_rate": 1.1253008293064583e-05, "loss": 0.4993, "step": 24778 }, { "epoch": 0.4600424026169321, "grad_norm": 0.36874887347221375, "learning_rate": 1.1251850987232124e-05, "loss": 0.3163, "step": 24780 }, { "epoch": 0.4600795327543507, "grad_norm": 0.5158633589744568, "learning_rate": 1.1250693664365725e-05, "loss": 0.3736, "step": 24782 }, { "epoch": 0.4601166628917694, "grad_norm": 0.4242803156375885, "learning_rate": 1.1249536324481127e-05, "loss": 0.4671, "step": 24784 }, { "epoch": 0.460153793029188, "grad_norm": 0.49340811371803284, "learning_rate": 1.124837896759409e-05, "loss": 0.3054, "step": 24786 }, { "epoch": 0.46019092316660665, "grad_norm": 0.2399706095457077, "learning_rate": 1.1247221593720349e-05, "loss": 0.3092, "step": 24788 }, { "epoch": 0.4602280533040253, "grad_norm": 0.37511441111564636, "learning_rate": 1.1246064202875657e-05, "loss": 0.3362, "step": 24790 }, { "epoch": 0.4602651834414439, "grad_norm": 0.33872243762016296, "learning_rate": 1.1244906795075764e-05, "loss": 0.3581, "step": 24792 }, { "epoch": 0.46030231357886253, "grad_norm": 0.4021400809288025, "learning_rate": 1.1243749370336421e-05, "loss": 0.3554, "step": 24794 }, { "epoch": 0.4603394437162812, "grad_norm": 0.25100380182266235, "learning_rate": 1.1242591928673374e-05, "loss": 0.1628, "step": 24796 }, { "epoch": 0.46037657385369984, "grad_norm": 0.44247281551361084, "learning_rate": 1.1241434470102373e-05, "loss": 0.2754, "step": 24798 }, { "epoch": 0.4604137039911185, "grad_norm": 0.3028048872947693, "learning_rate": 1.1240276994639166e-05, "loss": 0.2695, "step": 24800 }, { "epoch": 0.4604508341285371, "grad_norm": 0.3111465275287628, "learning_rate": 1.1239119502299505e-05, "loss": 0.4772, "step": 24802 }, { "epoch": 0.46048796426595573, "grad_norm": 0.3493809700012207, "learning_rate": 1.1237961993099135e-05, "loss": 0.4162, "step": 24804 }, { "epoch": 0.4605250944033744, "grad_norm": 0.3528115451335907, "learning_rate": 1.1236804467053816e-05, "loss": 0.4133, "step": 24806 }, { "epoch": 0.46056222454079304, "grad_norm": 0.40689849853515625, "learning_rate": 1.1235646924179292e-05, "loss": 0.2373, "step": 24808 }, { "epoch": 0.46059935467821167, "grad_norm": 0.4057770073413849, "learning_rate": 1.1234489364491315e-05, "loss": 0.1969, "step": 24810 }, { "epoch": 0.4606364848156303, "grad_norm": 0.42430856823921204, "learning_rate": 1.1233331788005635e-05, "loss": 0.1991, "step": 24812 }, { "epoch": 0.4606736149530489, "grad_norm": 0.34488871693611145, "learning_rate": 1.1232174194738002e-05, "loss": 0.2081, "step": 24814 }, { "epoch": 0.46071074509046755, "grad_norm": 0.6896771788597107, "learning_rate": 1.123101658470417e-05, "loss": 0.3053, "step": 24816 }, { "epoch": 0.46074787522788624, "grad_norm": 0.33918485045433044, "learning_rate": 1.122985895791989e-05, "loss": 0.3627, "step": 24818 }, { "epoch": 0.46078500536530487, "grad_norm": 0.46250197291374207, "learning_rate": 1.1228701314400911e-05, "loss": 0.3811, "step": 24820 }, { "epoch": 0.4608221355027235, "grad_norm": 0.3258797824382782, "learning_rate": 1.122754365416299e-05, "loss": 0.2223, "step": 24822 }, { "epoch": 0.4608592656401421, "grad_norm": 0.3176458477973938, "learning_rate": 1.1226385977221876e-05, "loss": 0.2942, "step": 24824 }, { "epoch": 0.46089639577756075, "grad_norm": 0.4141657054424286, "learning_rate": 1.1225228283593323e-05, "loss": 0.243, "step": 24826 }, { "epoch": 0.46093352591497944, "grad_norm": 0.33861052989959717, "learning_rate": 1.1224070573293082e-05, "loss": 0.2692, "step": 24828 }, { "epoch": 0.46097065605239806, "grad_norm": 0.2734968066215515, "learning_rate": 1.1222912846336912e-05, "loss": 0.3567, "step": 24830 }, { "epoch": 0.4610077861898167, "grad_norm": 0.3466772437095642, "learning_rate": 1.1221755102740555e-05, "loss": 0.2999, "step": 24832 }, { "epoch": 0.4610449163272353, "grad_norm": 0.35979652404785156, "learning_rate": 1.122059734251977e-05, "loss": 0.2568, "step": 24834 }, { "epoch": 0.46108204646465395, "grad_norm": 0.40245288610458374, "learning_rate": 1.1219439565690315e-05, "loss": 0.3132, "step": 24836 }, { "epoch": 0.4611191766020726, "grad_norm": 0.28886058926582336, "learning_rate": 1.1218281772267938e-05, "loss": 0.1643, "step": 24838 }, { "epoch": 0.46115630673949126, "grad_norm": 0.35303306579589844, "learning_rate": 1.1217123962268398e-05, "loss": 0.258, "step": 24840 }, { "epoch": 0.4611934368769099, "grad_norm": 0.5408716201782227, "learning_rate": 1.121596613570744e-05, "loss": 0.2441, "step": 24842 }, { "epoch": 0.4612305670143285, "grad_norm": 0.443154901266098, "learning_rate": 1.1214808292600833e-05, "loss": 0.2506, "step": 24844 }, { "epoch": 0.46126769715174715, "grad_norm": 0.5537800788879395, "learning_rate": 1.121365043296432e-05, "loss": 0.315, "step": 24846 }, { "epoch": 0.4613048272891658, "grad_norm": 0.41054776310920715, "learning_rate": 1.1212492556813662e-05, "loss": 0.3117, "step": 24848 }, { "epoch": 0.46134195742658446, "grad_norm": 0.3634506165981293, "learning_rate": 1.121133466416461e-05, "loss": 0.2409, "step": 24850 }, { "epoch": 0.4613790875640031, "grad_norm": 0.3884308636188507, "learning_rate": 1.1210176755032922e-05, "loss": 0.1973, "step": 24852 }, { "epoch": 0.4614162177014217, "grad_norm": 0.45028001070022583, "learning_rate": 1.1209018829434352e-05, "loss": 0.4625, "step": 24854 }, { "epoch": 0.46145334783884034, "grad_norm": 0.2986210882663727, "learning_rate": 1.120786088738466e-05, "loss": 0.1974, "step": 24856 }, { "epoch": 0.461490477976259, "grad_norm": 0.2877485156059265, "learning_rate": 1.1206702928899598e-05, "loss": 0.28, "step": 24858 }, { "epoch": 0.46152760811367766, "grad_norm": 0.4148741662502289, "learning_rate": 1.1205544953994925e-05, "loss": 0.312, "step": 24860 }, { "epoch": 0.4615647382510963, "grad_norm": 0.23268738389015198, "learning_rate": 1.1204386962686396e-05, "loss": 0.4079, "step": 24862 }, { "epoch": 0.4616018683885149, "grad_norm": 0.3963732123374939, "learning_rate": 1.1203228954989766e-05, "loss": 0.4038, "step": 24864 }, { "epoch": 0.46163899852593354, "grad_norm": 0.7261603474617004, "learning_rate": 1.1202070930920794e-05, "loss": 0.3564, "step": 24866 }, { "epoch": 0.46167612866335217, "grad_norm": 0.33898189663887024, "learning_rate": 1.1200912890495239e-05, "loss": 0.2924, "step": 24868 }, { "epoch": 0.4617132588007708, "grad_norm": 0.3380786180496216, "learning_rate": 1.1199754833728858e-05, "loss": 0.4501, "step": 24870 }, { "epoch": 0.4617503889381895, "grad_norm": 0.4006290137767792, "learning_rate": 1.1198596760637403e-05, "loss": 0.4165, "step": 24872 }, { "epoch": 0.4617875190756081, "grad_norm": 0.344072550535202, "learning_rate": 1.119743867123664e-05, "loss": 0.2894, "step": 24874 }, { "epoch": 0.46182464921302674, "grad_norm": 0.2505243718624115, "learning_rate": 1.1196280565542322e-05, "loss": 0.1822, "step": 24876 }, { "epoch": 0.46186177935044537, "grad_norm": 0.31893032789230347, "learning_rate": 1.1195122443570205e-05, "loss": 0.187, "step": 24878 }, { "epoch": 0.461898909487864, "grad_norm": 0.23779061436653137, "learning_rate": 1.1193964305336058e-05, "loss": 0.2853, "step": 24880 }, { "epoch": 0.4619360396252827, "grad_norm": 0.3483765721321106, "learning_rate": 1.1192806150855631e-05, "loss": 0.2709, "step": 24882 }, { "epoch": 0.4619731697627013, "grad_norm": 0.34843936562538147, "learning_rate": 1.1191647980144681e-05, "loss": 0.3114, "step": 24884 }, { "epoch": 0.46201029990011994, "grad_norm": 0.46261489391326904, "learning_rate": 1.1190489793218975e-05, "loss": 0.3064, "step": 24886 }, { "epoch": 0.46204743003753856, "grad_norm": 0.4472907483577728, "learning_rate": 1.118933159009427e-05, "loss": 0.226, "step": 24888 }, { "epoch": 0.4620845601749572, "grad_norm": 0.3483368754386902, "learning_rate": 1.1188173370786321e-05, "loss": 0.3393, "step": 24890 }, { "epoch": 0.4621216903123758, "grad_norm": 0.33254578709602356, "learning_rate": 1.1187015135310897e-05, "loss": 0.2397, "step": 24892 }, { "epoch": 0.4621588204497945, "grad_norm": 0.28217658400535583, "learning_rate": 1.118585688368375e-05, "loss": 0.321, "step": 24894 }, { "epoch": 0.46219595058721313, "grad_norm": 0.23270997405052185, "learning_rate": 1.1184698615920639e-05, "loss": 0.2167, "step": 24896 }, { "epoch": 0.46223308072463176, "grad_norm": 0.2630429267883301, "learning_rate": 1.118354033203733e-05, "loss": 0.2045, "step": 24898 }, { "epoch": 0.4622702108620504, "grad_norm": 0.31943827867507935, "learning_rate": 1.1182382032049586e-05, "loss": 0.5334, "step": 24900 }, { "epoch": 0.462307340999469, "grad_norm": 0.37029388546943665, "learning_rate": 1.1181223715973162e-05, "loss": 0.1842, "step": 24902 }, { "epoch": 0.4623444711368877, "grad_norm": 0.2004348635673523, "learning_rate": 1.1180065383823821e-05, "loss": 0.1687, "step": 24904 }, { "epoch": 0.46238160127430633, "grad_norm": 0.3348277509212494, "learning_rate": 1.1178907035617325e-05, "loss": 0.2744, "step": 24906 }, { "epoch": 0.46241873141172496, "grad_norm": 0.5614563226699829, "learning_rate": 1.1177748671369435e-05, "loss": 0.3372, "step": 24908 }, { "epoch": 0.4624558615491436, "grad_norm": 0.38579264283180237, "learning_rate": 1.1176590291095912e-05, "loss": 0.3123, "step": 24910 }, { "epoch": 0.4624929916865622, "grad_norm": 0.3339270353317261, "learning_rate": 1.1175431894812524e-05, "loss": 0.2235, "step": 24912 }, { "epoch": 0.46253012182398084, "grad_norm": 0.49250340461730957, "learning_rate": 1.1174273482535028e-05, "loss": 0.4845, "step": 24914 }, { "epoch": 0.46256725196139953, "grad_norm": 0.4855112135410309, "learning_rate": 1.1173115054279182e-05, "loss": 0.2975, "step": 24916 }, { "epoch": 0.46260438209881816, "grad_norm": 0.4341851770877838, "learning_rate": 1.117195661006076e-05, "loss": 0.2032, "step": 24918 }, { "epoch": 0.4626415122362368, "grad_norm": 0.4957031309604645, "learning_rate": 1.1170798149895514e-05, "loss": 0.2484, "step": 24920 }, { "epoch": 0.4626786423736554, "grad_norm": 0.3832385241985321, "learning_rate": 1.1169639673799213e-05, "loss": 0.2838, "step": 24922 }, { "epoch": 0.46271577251107404, "grad_norm": 0.4815942347049713, "learning_rate": 1.1168481181787622e-05, "loss": 0.2357, "step": 24924 }, { "epoch": 0.4627529026484927, "grad_norm": 0.38324955105781555, "learning_rate": 1.1167322673876501e-05, "loss": 0.3678, "step": 24926 }, { "epoch": 0.46279003278591135, "grad_norm": 0.25386250019073486, "learning_rate": 1.1166164150081616e-05, "loss": 0.3157, "step": 24928 }, { "epoch": 0.46282716292333, "grad_norm": 0.5275769829750061, "learning_rate": 1.1165005610418726e-05, "loss": 0.1944, "step": 24930 }, { "epoch": 0.4628642930607486, "grad_norm": 0.8354781270027161, "learning_rate": 1.1163847054903605e-05, "loss": 0.2927, "step": 24932 }, { "epoch": 0.46290142319816724, "grad_norm": 0.4561612606048584, "learning_rate": 1.1162688483552009e-05, "loss": 0.3681, "step": 24934 }, { "epoch": 0.46293855333558587, "grad_norm": 0.6024540662765503, "learning_rate": 1.1161529896379703e-05, "loss": 0.2533, "step": 24936 }, { "epoch": 0.46297568347300455, "grad_norm": 0.19927014410495758, "learning_rate": 1.1160371293402458e-05, "loss": 0.2018, "step": 24938 }, { "epoch": 0.4630128136104232, "grad_norm": 0.4563714265823364, "learning_rate": 1.1159212674636036e-05, "loss": 0.3889, "step": 24940 }, { "epoch": 0.4630499437478418, "grad_norm": 0.3190673589706421, "learning_rate": 1.1158054040096198e-05, "loss": 0.2855, "step": 24942 }, { "epoch": 0.46308707388526044, "grad_norm": 0.6262931823730469, "learning_rate": 1.1156895389798714e-05, "loss": 0.281, "step": 24944 }, { "epoch": 0.46312420402267906, "grad_norm": 0.35368961095809937, "learning_rate": 1.1155736723759352e-05, "loss": 0.4124, "step": 24946 }, { "epoch": 0.46316133416009775, "grad_norm": 0.2934551239013672, "learning_rate": 1.1154578041993874e-05, "loss": 0.2523, "step": 24948 }, { "epoch": 0.4631984642975164, "grad_norm": 0.3389197289943695, "learning_rate": 1.1153419344518047e-05, "loss": 0.4044, "step": 24950 }, { "epoch": 0.463235594434935, "grad_norm": 0.4323364198207855, "learning_rate": 1.1152260631347634e-05, "loss": 0.1507, "step": 24952 }, { "epoch": 0.46327272457235363, "grad_norm": 0.3411848247051239, "learning_rate": 1.115110190249841e-05, "loss": 0.2434, "step": 24954 }, { "epoch": 0.46330985470977226, "grad_norm": 0.3328692615032196, "learning_rate": 1.1149943157986136e-05, "loss": 0.5382, "step": 24956 }, { "epoch": 0.46334698484719095, "grad_norm": 0.3350425958633423, "learning_rate": 1.1148784397826578e-05, "loss": 0.416, "step": 24958 }, { "epoch": 0.4633841149846096, "grad_norm": 0.4458329975605011, "learning_rate": 1.1147625622035506e-05, "loss": 0.2581, "step": 24960 }, { "epoch": 0.4634212451220282, "grad_norm": 0.3829217851161957, "learning_rate": 1.1146466830628688e-05, "loss": 0.1953, "step": 24962 }, { "epoch": 0.46345837525944683, "grad_norm": 0.8776864409446716, "learning_rate": 1.1145308023621887e-05, "loss": 0.2634, "step": 24964 }, { "epoch": 0.46349550539686546, "grad_norm": 0.31340283155441284, "learning_rate": 1.1144149201030881e-05, "loss": 0.1179, "step": 24966 }, { "epoch": 0.4635326355342841, "grad_norm": 0.3972989618778229, "learning_rate": 1.1142990362871426e-05, "loss": 0.4119, "step": 24968 }, { "epoch": 0.46356976567170277, "grad_norm": 0.41166236996650696, "learning_rate": 1.1141831509159298e-05, "loss": 0.4087, "step": 24970 }, { "epoch": 0.4636068958091214, "grad_norm": 0.41264697909355164, "learning_rate": 1.1140672639910261e-05, "loss": 0.2968, "step": 24972 }, { "epoch": 0.46364402594654003, "grad_norm": 0.3329342305660248, "learning_rate": 1.1139513755140087e-05, "loss": 0.2092, "step": 24974 }, { "epoch": 0.46368115608395866, "grad_norm": 0.39777353405952454, "learning_rate": 1.1138354854864549e-05, "loss": 0.256, "step": 24976 }, { "epoch": 0.4637182862213773, "grad_norm": 0.3956587612628937, "learning_rate": 1.1137195939099408e-05, "loss": 0.2352, "step": 24978 }, { "epoch": 0.46375541635879597, "grad_norm": 0.4261299669742584, "learning_rate": 1.1136037007860435e-05, "loss": 0.1663, "step": 24980 }, { "epoch": 0.4637925464962146, "grad_norm": 0.2760485112667084, "learning_rate": 1.1134878061163401e-05, "loss": 0.3332, "step": 24982 }, { "epoch": 0.4638296766336332, "grad_norm": 0.43186715245246887, "learning_rate": 1.1133719099024076e-05, "loss": 0.208, "step": 24984 }, { "epoch": 0.46386680677105185, "grad_norm": 0.5443124175071716, "learning_rate": 1.1132560121458234e-05, "loss": 0.2296, "step": 24986 }, { "epoch": 0.4639039369084705, "grad_norm": 0.5913235545158386, "learning_rate": 1.1131401128481638e-05, "loss": 0.2801, "step": 24988 }, { "epoch": 0.4639410670458891, "grad_norm": 0.35802584886550903, "learning_rate": 1.113024212011006e-05, "loss": 0.3241, "step": 24990 }, { "epoch": 0.4639781971833078, "grad_norm": 0.25926247239112854, "learning_rate": 1.1129083096359273e-05, "loss": 0.2435, "step": 24992 }, { "epoch": 0.4640153273207264, "grad_norm": 0.39737045764923096, "learning_rate": 1.112792405724505e-05, "loss": 0.1968, "step": 24994 }, { "epoch": 0.46405245745814505, "grad_norm": 0.7752198576927185, "learning_rate": 1.1126765002783155e-05, "loss": 0.1905, "step": 24996 }, { "epoch": 0.4640895875955637, "grad_norm": 0.179983988404274, "learning_rate": 1.1125605932989367e-05, "loss": 0.2536, "step": 24998 }, { "epoch": 0.4641267177329823, "grad_norm": 0.33478426933288574, "learning_rate": 1.1124446847879454e-05, "loss": 0.4343, "step": 25000 }, { "epoch": 0.464163847870401, "grad_norm": 0.31066006422042847, "learning_rate": 1.1123287747469183e-05, "loss": 0.2546, "step": 25002 }, { "epoch": 0.4642009780078196, "grad_norm": 0.5079692602157593, "learning_rate": 1.1122128631774331e-05, "loss": 0.3384, "step": 25004 }, { "epoch": 0.46423810814523825, "grad_norm": 0.3383162021636963, "learning_rate": 1.1120969500810672e-05, "loss": 0.171, "step": 25006 }, { "epoch": 0.4642752382826569, "grad_norm": 0.47008898854255676, "learning_rate": 1.111981035459398e-05, "loss": 0.3149, "step": 25008 }, { "epoch": 0.4643123684200755, "grad_norm": 0.46213915944099426, "learning_rate": 1.1118651193140016e-05, "loss": 0.3812, "step": 25010 }, { "epoch": 0.46434949855749413, "grad_norm": 0.5277388691902161, "learning_rate": 1.1117492016464562e-05, "loss": 0.519, "step": 25012 }, { "epoch": 0.4643866286949128, "grad_norm": 0.35149312019348145, "learning_rate": 1.111633282458339e-05, "loss": 0.2365, "step": 25014 }, { "epoch": 0.46442375883233145, "grad_norm": 0.42730915546417236, "learning_rate": 1.1115173617512271e-05, "loss": 0.2942, "step": 25016 }, { "epoch": 0.4644608889697501, "grad_norm": 0.23597809672355652, "learning_rate": 1.111401439526698e-05, "loss": 0.3043, "step": 25018 }, { "epoch": 0.4644980191071687, "grad_norm": 0.483123779296875, "learning_rate": 1.111285515786329e-05, "loss": 0.3044, "step": 25020 }, { "epoch": 0.46453514924458733, "grad_norm": 0.2866550385951996, "learning_rate": 1.1111695905316976e-05, "loss": 0.2033, "step": 25022 }, { "epoch": 0.464572279382006, "grad_norm": 0.557029664516449, "learning_rate": 1.1110536637643807e-05, "loss": 0.3902, "step": 25024 }, { "epoch": 0.46460940951942464, "grad_norm": 0.33052733540534973, "learning_rate": 1.1109377354859565e-05, "loss": 0.2201, "step": 25026 }, { "epoch": 0.46464653965684327, "grad_norm": 0.42713308334350586, "learning_rate": 1.1108218056980017e-05, "loss": 0.3562, "step": 25028 }, { "epoch": 0.4646836697942619, "grad_norm": 0.3552428185939789, "learning_rate": 1.1107058744020945e-05, "loss": 0.1674, "step": 25030 }, { "epoch": 0.46472079993168053, "grad_norm": 0.4714275300502777, "learning_rate": 1.1105899415998116e-05, "loss": 0.3118, "step": 25032 }, { "epoch": 0.4647579300690992, "grad_norm": 0.39476677775382996, "learning_rate": 1.1104740072927309e-05, "loss": 0.2278, "step": 25034 }, { "epoch": 0.46479506020651784, "grad_norm": 0.24415062367916107, "learning_rate": 1.1103580714824298e-05, "loss": 0.2332, "step": 25036 }, { "epoch": 0.46483219034393647, "grad_norm": 0.238313227891922, "learning_rate": 1.1102421341704861e-05, "loss": 0.3035, "step": 25038 }, { "epoch": 0.4648693204813551, "grad_norm": 0.586731493473053, "learning_rate": 1.110126195358477e-05, "loss": 0.2039, "step": 25040 }, { "epoch": 0.4649064506187737, "grad_norm": 0.3552999794483185, "learning_rate": 1.1100102550479803e-05, "loss": 0.3484, "step": 25042 }, { "epoch": 0.46494358075619235, "grad_norm": 0.36459195613861084, "learning_rate": 1.1098943132405735e-05, "loss": 0.3841, "step": 25044 }, { "epoch": 0.46498071089361104, "grad_norm": 0.40709102153778076, "learning_rate": 1.1097783699378344e-05, "loss": 0.3617, "step": 25046 }, { "epoch": 0.46501784103102967, "grad_norm": 0.1688622236251831, "learning_rate": 1.1096624251413401e-05, "loss": 0.3131, "step": 25048 }, { "epoch": 0.4650549711684483, "grad_norm": 0.3475238084793091, "learning_rate": 1.1095464788526693e-05, "loss": 0.4155, "step": 25050 }, { "epoch": 0.4650921013058669, "grad_norm": 0.4512470066547394, "learning_rate": 1.1094305310733988e-05, "loss": 0.2479, "step": 25052 }, { "epoch": 0.46512923144328555, "grad_norm": 0.2326812595129013, "learning_rate": 1.1093145818051063e-05, "loss": 0.3289, "step": 25054 }, { "epoch": 0.46516636158070424, "grad_norm": 0.3711986839771271, "learning_rate": 1.10919863104937e-05, "loss": 0.3219, "step": 25056 }, { "epoch": 0.46520349171812286, "grad_norm": 0.419181227684021, "learning_rate": 1.1090826788077675e-05, "loss": 0.3731, "step": 25058 }, { "epoch": 0.4652406218555415, "grad_norm": 2.2664918899536133, "learning_rate": 1.1089667250818763e-05, "loss": 0.2771, "step": 25060 }, { "epoch": 0.4652777519929601, "grad_norm": 0.29841262102127075, "learning_rate": 1.1088507698732745e-05, "loss": 0.3939, "step": 25062 }, { "epoch": 0.46531488213037875, "grad_norm": 0.354340523481369, "learning_rate": 1.1087348131835399e-05, "loss": 0.1534, "step": 25064 }, { "epoch": 0.4653520122677974, "grad_norm": 0.45017555356025696, "learning_rate": 1.1086188550142498e-05, "loss": 0.2657, "step": 25066 }, { "epoch": 0.46538914240521606, "grad_norm": 0.5132289528846741, "learning_rate": 1.1085028953669824e-05, "loss": 0.2018, "step": 25068 }, { "epoch": 0.4654262725426347, "grad_norm": 0.30213218927383423, "learning_rate": 1.108386934243316e-05, "loss": 0.2668, "step": 25070 }, { "epoch": 0.4654634026800533, "grad_norm": 0.5752504467964172, "learning_rate": 1.1082709716448281e-05, "loss": 0.1883, "step": 25072 }, { "epoch": 0.46550053281747195, "grad_norm": 1.1086883544921875, "learning_rate": 1.1081550075730962e-05, "loss": 0.2311, "step": 25074 }, { "epoch": 0.4655376629548906, "grad_norm": 0.35609790682792664, "learning_rate": 1.1080390420296989e-05, "loss": 0.2149, "step": 25076 }, { "epoch": 0.46557479309230926, "grad_norm": 0.3935128152370453, "learning_rate": 1.1079230750162137e-05, "loss": 0.1225, "step": 25078 }, { "epoch": 0.4656119232297279, "grad_norm": 0.53044593334198, "learning_rate": 1.1078071065342187e-05, "loss": 0.2086, "step": 25080 }, { "epoch": 0.4656490533671465, "grad_norm": 0.4616038203239441, "learning_rate": 1.1076911365852921e-05, "loss": 0.2539, "step": 25082 }, { "epoch": 0.46568618350456514, "grad_norm": 0.3729253113269806, "learning_rate": 1.1075751651710117e-05, "loss": 0.1893, "step": 25084 }, { "epoch": 0.46572331364198377, "grad_norm": 0.28270411491394043, "learning_rate": 1.1074591922929553e-05, "loss": 0.408, "step": 25086 }, { "epoch": 0.4657604437794024, "grad_norm": 0.6389529705047607, "learning_rate": 1.1073432179527014e-05, "loss": 0.2549, "step": 25088 }, { "epoch": 0.4657975739168211, "grad_norm": 0.39129218459129333, "learning_rate": 1.1072272421518275e-05, "loss": 0.4089, "step": 25090 }, { "epoch": 0.4658347040542397, "grad_norm": 0.5073806643486023, "learning_rate": 1.1071112648919123e-05, "loss": 0.6049, "step": 25092 }, { "epoch": 0.46587183419165834, "grad_norm": 0.5053232908248901, "learning_rate": 1.1069952861745336e-05, "loss": 0.2074, "step": 25094 }, { "epoch": 0.46590896432907697, "grad_norm": 0.6206637620925903, "learning_rate": 1.1068793060012695e-05, "loss": 0.2796, "step": 25096 }, { "epoch": 0.4659460944664956, "grad_norm": 0.4092426300048828, "learning_rate": 1.1067633243736982e-05, "loss": 0.3888, "step": 25098 }, { "epoch": 0.4659832246039143, "grad_norm": 0.3590167462825775, "learning_rate": 1.1066473412933975e-05, "loss": 0.2997, "step": 25100 }, { "epoch": 0.4660203547413329, "grad_norm": 0.41744059324264526, "learning_rate": 1.1065313567619464e-05, "loss": 0.3785, "step": 25102 }, { "epoch": 0.46605748487875154, "grad_norm": 0.29979026317596436, "learning_rate": 1.1064153707809226e-05, "loss": 0.1919, "step": 25104 }, { "epoch": 0.46609461501617017, "grad_norm": 0.3698369562625885, "learning_rate": 1.1062993833519043e-05, "loss": 0.6105, "step": 25106 }, { "epoch": 0.4661317451535888, "grad_norm": 0.494426965713501, "learning_rate": 1.10618339447647e-05, "loss": 0.279, "step": 25108 }, { "epoch": 0.4661688752910075, "grad_norm": 0.38214918971061707, "learning_rate": 1.1060674041561972e-05, "loss": 0.0795, "step": 25110 }, { "epoch": 0.4662060054284261, "grad_norm": 0.3748513460159302, "learning_rate": 1.105951412392665e-05, "loss": 0.2659, "step": 25112 }, { "epoch": 0.46624313556584474, "grad_norm": 0.21436573565006256, "learning_rate": 1.1058354191874516e-05, "loss": 0.3447, "step": 25114 }, { "epoch": 0.46628026570326336, "grad_norm": 0.5077074766159058, "learning_rate": 1.105719424542135e-05, "loss": 0.2142, "step": 25116 }, { "epoch": 0.466317395840682, "grad_norm": 0.2975423038005829, "learning_rate": 1.1056034284582937e-05, "loss": 0.2708, "step": 25118 }, { "epoch": 0.4663545259781006, "grad_norm": 0.5866014361381531, "learning_rate": 1.105487430937506e-05, "loss": 0.2119, "step": 25120 }, { "epoch": 0.4663916561155193, "grad_norm": 0.3719688057899475, "learning_rate": 1.1053714319813504e-05, "loss": 0.2184, "step": 25122 }, { "epoch": 0.46642878625293793, "grad_norm": 0.49235957860946655, "learning_rate": 1.1052554315914056e-05, "loss": 0.3743, "step": 25124 }, { "epoch": 0.46646591639035656, "grad_norm": 0.4199400842189789, "learning_rate": 1.1051394297692493e-05, "loss": 0.3393, "step": 25126 }, { "epoch": 0.4665030465277752, "grad_norm": 0.6567108035087585, "learning_rate": 1.10502342651646e-05, "loss": 0.2389, "step": 25128 }, { "epoch": 0.4665401766651938, "grad_norm": 0.3141202926635742, "learning_rate": 1.1049074218346167e-05, "loss": 0.2315, "step": 25130 }, { "epoch": 0.4665773068026125, "grad_norm": 0.44798415899276733, "learning_rate": 1.1047914157252978e-05, "loss": 0.3434, "step": 25132 }, { "epoch": 0.46661443694003113, "grad_norm": 0.5066092014312744, "learning_rate": 1.1046754081900815e-05, "loss": 0.2428, "step": 25134 }, { "epoch": 0.46665156707744976, "grad_norm": 0.5264660120010376, "learning_rate": 1.1045593992305466e-05, "loss": 0.3802, "step": 25136 }, { "epoch": 0.4666886972148684, "grad_norm": 0.31128212809562683, "learning_rate": 1.104443388848271e-05, "loss": 0.348, "step": 25138 }, { "epoch": 0.466725827352287, "grad_norm": 0.3445548713207245, "learning_rate": 1.1043273770448342e-05, "loss": 0.4487, "step": 25140 }, { "epoch": 0.46676295748970564, "grad_norm": 0.5792197585105896, "learning_rate": 1.1042113638218141e-05, "loss": 0.1849, "step": 25142 }, { "epoch": 0.4668000876271243, "grad_norm": 0.4217890501022339, "learning_rate": 1.1040953491807893e-05, "loss": 0.391, "step": 25144 }, { "epoch": 0.46683721776454296, "grad_norm": 0.3623315691947937, "learning_rate": 1.103979333123339e-05, "loss": 0.268, "step": 25146 }, { "epoch": 0.4668743479019616, "grad_norm": 0.38843193650245667, "learning_rate": 1.1038633156510413e-05, "loss": 0.4919, "step": 25148 }, { "epoch": 0.4669114780393802, "grad_norm": 0.38810229301452637, "learning_rate": 1.1037472967654748e-05, "loss": 0.5589, "step": 25150 }, { "epoch": 0.46694860817679884, "grad_norm": 0.5897156000137329, "learning_rate": 1.1036312764682186e-05, "loss": 0.4108, "step": 25152 }, { "epoch": 0.4669857383142175, "grad_norm": 0.34713464975357056, "learning_rate": 1.1035152547608507e-05, "loss": 0.3496, "step": 25154 }, { "epoch": 0.46702286845163615, "grad_norm": 0.6029224395751953, "learning_rate": 1.103399231644951e-05, "loss": 0.3305, "step": 25156 }, { "epoch": 0.4670599985890548, "grad_norm": 0.32493284344673157, "learning_rate": 1.103283207122097e-05, "loss": 0.324, "step": 25158 }, { "epoch": 0.4670971287264734, "grad_norm": 0.35407131910324097, "learning_rate": 1.1031671811938679e-05, "loss": 0.3839, "step": 25160 }, { "epoch": 0.46713425886389204, "grad_norm": 0.4451431632041931, "learning_rate": 1.1030511538618423e-05, "loss": 0.1529, "step": 25162 }, { "epoch": 0.46717138900131067, "grad_norm": 0.38851088285446167, "learning_rate": 1.1029351251275996e-05, "loss": 0.4896, "step": 25164 }, { "epoch": 0.46720851913872935, "grad_norm": 0.37447088956832886, "learning_rate": 1.1028190949927177e-05, "loss": 0.3102, "step": 25166 }, { "epoch": 0.467245649276148, "grad_norm": 0.32535800337791443, "learning_rate": 1.1027030634587763e-05, "loss": 0.319, "step": 25168 }, { "epoch": 0.4672827794135666, "grad_norm": 0.6257903575897217, "learning_rate": 1.1025870305273539e-05, "loss": 0.3255, "step": 25170 }, { "epoch": 0.46731990955098524, "grad_norm": 0.365803599357605, "learning_rate": 1.1024709962000288e-05, "loss": 0.3271, "step": 25172 }, { "epoch": 0.46735703968840386, "grad_norm": 0.3670259416103363, "learning_rate": 1.1023549604783807e-05, "loss": 0.3423, "step": 25174 }, { "epoch": 0.46739416982582255, "grad_norm": 0.568078339099884, "learning_rate": 1.102238923363988e-05, "loss": 0.3, "step": 25176 }, { "epoch": 0.4674312999632412, "grad_norm": 0.6759040951728821, "learning_rate": 1.1021228848584302e-05, "loss": 0.2391, "step": 25178 }, { "epoch": 0.4674684301006598, "grad_norm": 0.34443145990371704, "learning_rate": 1.1020068449632855e-05, "loss": 0.0866, "step": 25180 }, { "epoch": 0.46750556023807843, "grad_norm": 0.34142881631851196, "learning_rate": 1.1018908036801333e-05, "loss": 0.2949, "step": 25182 }, { "epoch": 0.46754269037549706, "grad_norm": 0.3911501169204712, "learning_rate": 1.1017747610105524e-05, "loss": 0.3449, "step": 25184 }, { "epoch": 0.46757982051291574, "grad_norm": 0.3580528497695923, "learning_rate": 1.1016587169561219e-05, "loss": 0.112, "step": 25186 }, { "epoch": 0.4676169506503344, "grad_norm": 0.33637839555740356, "learning_rate": 1.1015426715184208e-05, "loss": 0.1929, "step": 25188 }, { "epoch": 0.467654080787753, "grad_norm": 0.4197739064693451, "learning_rate": 1.1014266246990282e-05, "loss": 0.3444, "step": 25190 }, { "epoch": 0.46769121092517163, "grad_norm": 0.4633869528770447, "learning_rate": 1.1013105764995226e-05, "loss": 0.4417, "step": 25192 }, { "epoch": 0.46772834106259026, "grad_norm": 0.3353174924850464, "learning_rate": 1.1011945269214838e-05, "loss": 0.3327, "step": 25194 }, { "epoch": 0.4677654712000089, "grad_norm": 0.49701496958732605, "learning_rate": 1.1010784759664908e-05, "loss": 0.2775, "step": 25196 }, { "epoch": 0.46780260133742757, "grad_norm": 0.33493027091026306, "learning_rate": 1.1009624236361221e-05, "loss": 0.2803, "step": 25198 }, { "epoch": 0.4678397314748462, "grad_norm": 0.3537202775478363, "learning_rate": 1.1008463699319576e-05, "loss": 0.3812, "step": 25200 }, { "epoch": 0.4678768616122648, "grad_norm": 0.39976415038108826, "learning_rate": 1.1007303148555759e-05, "loss": 0.4311, "step": 25202 }, { "epoch": 0.46791399174968346, "grad_norm": 0.4124237596988678, "learning_rate": 1.1006142584085565e-05, "loss": 0.3013, "step": 25204 }, { "epoch": 0.4679511218871021, "grad_norm": 0.30356067419052124, "learning_rate": 1.100498200592478e-05, "loss": 0.3088, "step": 25206 }, { "epoch": 0.46798825202452077, "grad_norm": 0.3849911093711853, "learning_rate": 1.1003821414089204e-05, "loss": 0.2899, "step": 25208 }, { "epoch": 0.4680253821619394, "grad_norm": 0.3869044780731201, "learning_rate": 1.1002660808594625e-05, "loss": 0.3382, "step": 25210 }, { "epoch": 0.468062512299358, "grad_norm": 0.32600852847099304, "learning_rate": 1.1001500189456833e-05, "loss": 0.2665, "step": 25212 }, { "epoch": 0.46809964243677665, "grad_norm": 0.3954405188560486, "learning_rate": 1.1000339556691626e-05, "loss": 0.2853, "step": 25214 }, { "epoch": 0.4681367725741953, "grad_norm": 0.2962939441204071, "learning_rate": 1.0999178910314793e-05, "loss": 0.2734, "step": 25216 }, { "epoch": 0.4681739027116139, "grad_norm": 0.34606990218162537, "learning_rate": 1.0998018250342129e-05, "loss": 0.174, "step": 25218 }, { "epoch": 0.4682110328490326, "grad_norm": 0.3607162535190582, "learning_rate": 1.0996857576789424e-05, "loss": 0.3884, "step": 25220 }, { "epoch": 0.4682481629864512, "grad_norm": 0.39793089032173157, "learning_rate": 1.0995696889672475e-05, "loss": 0.2161, "step": 25222 }, { "epoch": 0.46828529312386985, "grad_norm": 0.34819117188453674, "learning_rate": 1.0994536189007072e-05, "loss": 0.3701, "step": 25224 }, { "epoch": 0.4683224232612885, "grad_norm": 0.4439598023891449, "learning_rate": 1.0993375474809012e-05, "loss": 0.2586, "step": 25226 }, { "epoch": 0.4683595533987071, "grad_norm": 0.4569803774356842, "learning_rate": 1.0992214747094087e-05, "loss": 0.2141, "step": 25228 }, { "epoch": 0.4683966835361258, "grad_norm": 0.3045515716075897, "learning_rate": 1.099105400587809e-05, "loss": 0.4746, "step": 25230 }, { "epoch": 0.4684338136735444, "grad_norm": 0.38938406109809875, "learning_rate": 1.098989325117682e-05, "loss": 0.413, "step": 25232 }, { "epoch": 0.46847094381096305, "grad_norm": 0.5721226334571838, "learning_rate": 1.0988732483006065e-05, "loss": 0.3424, "step": 25234 }, { "epoch": 0.4685080739483817, "grad_norm": 0.30690598487854004, "learning_rate": 1.0987571701381622e-05, "loss": 0.215, "step": 25236 }, { "epoch": 0.4685452040858003, "grad_norm": 0.28986838459968567, "learning_rate": 1.098641090631929e-05, "loss": 0.3765, "step": 25238 }, { "epoch": 0.46858233422321893, "grad_norm": 0.3085126280784607, "learning_rate": 1.0985250097834856e-05, "loss": 0.4014, "step": 25240 }, { "epoch": 0.4686194643606376, "grad_norm": 0.3355043828487396, "learning_rate": 1.0984089275944124e-05, "loss": 0.3825, "step": 25242 }, { "epoch": 0.46865659449805624, "grad_norm": 0.3309830129146576, "learning_rate": 1.098292844066288e-05, "loss": 0.213, "step": 25244 }, { "epoch": 0.4686937246354749, "grad_norm": 0.4079948365688324, "learning_rate": 1.0981767592006926e-05, "loss": 0.3177, "step": 25246 }, { "epoch": 0.4687308547728935, "grad_norm": 0.3680943250656128, "learning_rate": 1.0980606729992057e-05, "loss": 0.3049, "step": 25248 }, { "epoch": 0.46876798491031213, "grad_norm": 0.37264418601989746, "learning_rate": 1.0979445854634063e-05, "loss": 0.2529, "step": 25250 }, { "epoch": 0.4688051150477308, "grad_norm": 0.4549490511417389, "learning_rate": 1.0978284965948749e-05, "loss": 0.2483, "step": 25252 }, { "epoch": 0.46884224518514944, "grad_norm": 0.4595699906349182, "learning_rate": 1.0977124063951907e-05, "loss": 0.3032, "step": 25254 }, { "epoch": 0.46887937532256807, "grad_norm": 0.5923108458518982, "learning_rate": 1.0975963148659332e-05, "loss": 0.2797, "step": 25256 }, { "epoch": 0.4689165054599867, "grad_norm": 0.3875150680541992, "learning_rate": 1.097480222008682e-05, "loss": 0.2685, "step": 25258 }, { "epoch": 0.4689536355974053, "grad_norm": 0.580773651599884, "learning_rate": 1.0973641278250174e-05, "loss": 0.3159, "step": 25260 }, { "epoch": 0.468990765734824, "grad_norm": 0.35838016867637634, "learning_rate": 1.0972480323165184e-05, "loss": 0.4134, "step": 25262 }, { "epoch": 0.46902789587224264, "grad_norm": 0.41542139649391174, "learning_rate": 1.097131935484765e-05, "loss": 0.2316, "step": 25264 }, { "epoch": 0.46906502600966127, "grad_norm": 0.4149010181427002, "learning_rate": 1.0970158373313372e-05, "loss": 0.4205, "step": 25266 }, { "epoch": 0.4691021561470799, "grad_norm": 0.582599401473999, "learning_rate": 1.096899737857814e-05, "loss": 0.325, "step": 25268 }, { "epoch": 0.4691392862844985, "grad_norm": 0.4867689609527588, "learning_rate": 1.0967836370657756e-05, "loss": 0.3488, "step": 25270 }, { "epoch": 0.46917641642191715, "grad_norm": 0.34824836254119873, "learning_rate": 1.0966675349568022e-05, "loss": 0.2708, "step": 25272 }, { "epoch": 0.46921354655933584, "grad_norm": 0.4124637842178345, "learning_rate": 1.096551431532473e-05, "loss": 0.3572, "step": 25274 }, { "epoch": 0.46925067669675447, "grad_norm": 0.28803399205207825, "learning_rate": 1.0964353267943676e-05, "loss": 0.2799, "step": 25276 }, { "epoch": 0.4692878068341731, "grad_norm": 0.3415999114513397, "learning_rate": 1.096319220744067e-05, "loss": 0.3573, "step": 25278 }, { "epoch": 0.4693249369715917, "grad_norm": 0.6424931883811951, "learning_rate": 1.09620311338315e-05, "loss": 0.4905, "step": 25280 }, { "epoch": 0.46936206710901035, "grad_norm": 0.3480803966522217, "learning_rate": 1.0960870047131964e-05, "loss": 0.2942, "step": 25282 }, { "epoch": 0.46939919724642903, "grad_norm": 0.3467440903186798, "learning_rate": 1.095970894735787e-05, "loss": 0.2624, "step": 25284 }, { "epoch": 0.46943632738384766, "grad_norm": 0.32604917883872986, "learning_rate": 1.0958547834525014e-05, "loss": 0.3658, "step": 25286 }, { "epoch": 0.4694734575212663, "grad_norm": 0.5554585456848145, "learning_rate": 1.0957386708649186e-05, "loss": 0.2983, "step": 25288 }, { "epoch": 0.4695105876586849, "grad_norm": 0.3255753517150879, "learning_rate": 1.0956225569746197e-05, "loss": 0.4388, "step": 25290 }, { "epoch": 0.46954771779610355, "grad_norm": 0.4001261591911316, "learning_rate": 1.095506441783184e-05, "loss": 0.335, "step": 25292 }, { "epoch": 0.4695848479335222, "grad_norm": 0.4568459987640381, "learning_rate": 1.095390325292192e-05, "loss": 0.2287, "step": 25294 }, { "epoch": 0.46962197807094086, "grad_norm": 0.35111257433891296, "learning_rate": 1.0952742075032233e-05, "loss": 0.2003, "step": 25296 }, { "epoch": 0.4696591082083595, "grad_norm": 0.23794105648994446, "learning_rate": 1.0951580884178578e-05, "loss": 0.287, "step": 25298 }, { "epoch": 0.4696962383457781, "grad_norm": 0.44977840781211853, "learning_rate": 1.0950419680376758e-05, "loss": 0.3437, "step": 25300 }, { "epoch": 0.46973336848319674, "grad_norm": 0.37733030319213867, "learning_rate": 1.0949258463642573e-05, "loss": 0.1901, "step": 25302 }, { "epoch": 0.4697704986206154, "grad_norm": 0.4485464096069336, "learning_rate": 1.0948097233991826e-05, "loss": 0.215, "step": 25304 }, { "epoch": 0.46980762875803406, "grad_norm": 0.3021954894065857, "learning_rate": 1.0946935991440316e-05, "loss": 0.2614, "step": 25306 }, { "epoch": 0.4698447588954527, "grad_norm": 0.33837538957595825, "learning_rate": 1.094577473600384e-05, "loss": 0.2, "step": 25308 }, { "epoch": 0.4698818890328713, "grad_norm": 0.3244591951370239, "learning_rate": 1.0944613467698206e-05, "loss": 0.2979, "step": 25310 }, { "epoch": 0.46991901917028994, "grad_norm": 0.40208226442337036, "learning_rate": 1.094345218653921e-05, "loss": 0.3295, "step": 25312 }, { "epoch": 0.46995614930770857, "grad_norm": 0.4616546332836151, "learning_rate": 1.0942290892542655e-05, "loss": 0.6368, "step": 25314 }, { "epoch": 0.4699932794451272, "grad_norm": 0.3548000454902649, "learning_rate": 1.0941129585724349e-05, "loss": 0.3495, "step": 25316 }, { "epoch": 0.4700304095825459, "grad_norm": 0.28740596771240234, "learning_rate": 1.0939968266100082e-05, "loss": 0.1909, "step": 25318 }, { "epoch": 0.4700675397199645, "grad_norm": 0.31129199266433716, "learning_rate": 1.0938806933685664e-05, "loss": 0.1985, "step": 25320 }, { "epoch": 0.47010466985738314, "grad_norm": 0.31043151021003723, "learning_rate": 1.0937645588496897e-05, "loss": 0.3829, "step": 25322 }, { "epoch": 0.47014179999480177, "grad_norm": 0.3982013165950775, "learning_rate": 1.093648423054958e-05, "loss": 0.1303, "step": 25324 }, { "epoch": 0.4701789301322204, "grad_norm": 0.45914027094841003, "learning_rate": 1.093532285985952e-05, "loss": 0.3286, "step": 25326 }, { "epoch": 0.4702160602696391, "grad_norm": 0.3020865321159363, "learning_rate": 1.0934161476442517e-05, "loss": 0.3722, "step": 25328 }, { "epoch": 0.4702531904070577, "grad_norm": 0.43697452545166016, "learning_rate": 1.0933000080314373e-05, "loss": 0.2866, "step": 25330 }, { "epoch": 0.47029032054447634, "grad_norm": 0.5363171100616455, "learning_rate": 1.0931838671490893e-05, "loss": 0.3378, "step": 25332 }, { "epoch": 0.47032745068189497, "grad_norm": 0.30566373467445374, "learning_rate": 1.0930677249987882e-05, "loss": 0.1571, "step": 25334 }, { "epoch": 0.4703645808193136, "grad_norm": 0.3327628970146179, "learning_rate": 1.0929515815821139e-05, "loss": 0.1733, "step": 25336 }, { "epoch": 0.4704017109567323, "grad_norm": 0.4444446563720703, "learning_rate": 1.092835436900647e-05, "loss": 0.2512, "step": 25338 }, { "epoch": 0.4704388410941509, "grad_norm": 0.5063433647155762, "learning_rate": 1.0927192909559678e-05, "loss": 0.4599, "step": 25340 }, { "epoch": 0.47047597123156953, "grad_norm": 0.3016646206378937, "learning_rate": 1.0926031437496571e-05, "loss": 0.0265, "step": 25342 }, { "epoch": 0.47051310136898816, "grad_norm": 0.1943443864583969, "learning_rate": 1.0924869952832949e-05, "loss": 0.1243, "step": 25344 }, { "epoch": 0.4705502315064068, "grad_norm": 0.47387853264808655, "learning_rate": 1.0923708455584616e-05, "loss": 0.2959, "step": 25346 }, { "epoch": 0.4705873616438254, "grad_norm": 0.40176093578338623, "learning_rate": 1.092254694576738e-05, "loss": 0.2729, "step": 25348 }, { "epoch": 0.4706244917812441, "grad_norm": 0.4118253290653229, "learning_rate": 1.092138542339704e-05, "loss": 0.2129, "step": 25350 }, { "epoch": 0.47066162191866273, "grad_norm": 0.6900607943534851, "learning_rate": 1.0920223888489405e-05, "loss": 0.2445, "step": 25352 }, { "epoch": 0.47069875205608136, "grad_norm": 0.42615848779678345, "learning_rate": 1.091906234106028e-05, "loss": 0.346, "step": 25354 }, { "epoch": 0.4707358821935, "grad_norm": 0.4939764738082886, "learning_rate": 1.091790078112547e-05, "loss": 0.2522, "step": 25356 }, { "epoch": 0.4707730123309186, "grad_norm": 0.40538614988327026, "learning_rate": 1.091673920870078e-05, "loss": 0.3133, "step": 25358 }, { "epoch": 0.4708101424683373, "grad_norm": 0.3616763949394226, "learning_rate": 1.0915577623802017e-05, "loss": 0.232, "step": 25360 }, { "epoch": 0.47084727260575593, "grad_norm": 0.3461170494556427, "learning_rate": 1.091441602644498e-05, "loss": 0.4564, "step": 25362 }, { "epoch": 0.47088440274317456, "grad_norm": 0.5732318758964539, "learning_rate": 1.0913254416645483e-05, "loss": 0.4667, "step": 25364 }, { "epoch": 0.4709215328805932, "grad_norm": 0.4554021656513214, "learning_rate": 1.091209279441933e-05, "loss": 0.4004, "step": 25366 }, { "epoch": 0.4709586630180118, "grad_norm": 0.31949055194854736, "learning_rate": 1.091093115978232e-05, "loss": 0.165, "step": 25368 }, { "epoch": 0.47099579315543044, "grad_norm": 0.41777971386909485, "learning_rate": 1.0909769512750272e-05, "loss": 0.2034, "step": 25370 }, { "epoch": 0.4710329232928491, "grad_norm": 0.3102082908153534, "learning_rate": 1.0908607853338986e-05, "loss": 0.2731, "step": 25372 }, { "epoch": 0.47107005343026775, "grad_norm": 0.4500311017036438, "learning_rate": 1.0907446181564266e-05, "loss": 0.4227, "step": 25374 }, { "epoch": 0.4711071835676864, "grad_norm": 0.5790847539901733, "learning_rate": 1.0906284497441922e-05, "loss": 0.2848, "step": 25376 }, { "epoch": 0.471144313705105, "grad_norm": 0.29279616475105286, "learning_rate": 1.0905122800987762e-05, "loss": 0.2126, "step": 25378 }, { "epoch": 0.47118144384252364, "grad_norm": 0.317947655916214, "learning_rate": 1.0903961092217593e-05, "loss": 0.277, "step": 25380 }, { "epoch": 0.4712185739799423, "grad_norm": 0.4463728666305542, "learning_rate": 1.0902799371147217e-05, "loss": 0.2892, "step": 25382 }, { "epoch": 0.47125570411736095, "grad_norm": 0.39299476146698, "learning_rate": 1.0901637637792447e-05, "loss": 0.2097, "step": 25384 }, { "epoch": 0.4712928342547796, "grad_norm": 0.40631529688835144, "learning_rate": 1.0900475892169088e-05, "loss": 0.4494, "step": 25386 }, { "epoch": 0.4713299643921982, "grad_norm": 0.29346928000450134, "learning_rate": 1.0899314134292953e-05, "loss": 0.3716, "step": 25388 }, { "epoch": 0.47136709452961684, "grad_norm": 0.4119087755680084, "learning_rate": 1.0898152364179846e-05, "loss": 0.2156, "step": 25390 }, { "epoch": 0.47140422466703547, "grad_norm": 0.322813481092453, "learning_rate": 1.0896990581845575e-05, "loss": 0.3095, "step": 25392 }, { "epoch": 0.47144135480445415, "grad_norm": 0.4258841574192047, "learning_rate": 1.0895828787305949e-05, "loss": 0.3963, "step": 25394 }, { "epoch": 0.4714784849418728, "grad_norm": 0.4616994261741638, "learning_rate": 1.0894666980576773e-05, "loss": 0.2597, "step": 25396 }, { "epoch": 0.4715156150792914, "grad_norm": 0.36743608117103577, "learning_rate": 1.0893505161673865e-05, "loss": 0.4485, "step": 25398 }, { "epoch": 0.47155274521671003, "grad_norm": 0.5619312524795532, "learning_rate": 1.0892343330613026e-05, "loss": 0.3208, "step": 25400 }, { "epoch": 0.47158987535412866, "grad_norm": 0.26258498430252075, "learning_rate": 1.0891181487410066e-05, "loss": 0.215, "step": 25402 }, { "epoch": 0.47162700549154735, "grad_norm": 0.26424798369407654, "learning_rate": 1.0890019632080798e-05, "loss": 0.4018, "step": 25404 }, { "epoch": 0.471664135628966, "grad_norm": 0.3750796616077423, "learning_rate": 1.0888857764641023e-05, "loss": 0.3046, "step": 25406 }, { "epoch": 0.4717012657663846, "grad_norm": 0.4208914041519165, "learning_rate": 1.088769588510656e-05, "loss": 0.6732, "step": 25408 }, { "epoch": 0.47173839590380323, "grad_norm": 0.31493067741394043, "learning_rate": 1.0886533993493216e-05, "loss": 0.3852, "step": 25410 }, { "epoch": 0.47177552604122186, "grad_norm": 0.3870280385017395, "learning_rate": 1.08853720898168e-05, "loss": 0.3499, "step": 25412 }, { "epoch": 0.47181265617864054, "grad_norm": 0.30913302302360535, "learning_rate": 1.088421017409312e-05, "loss": 0.2119, "step": 25414 }, { "epoch": 0.47184978631605917, "grad_norm": 0.23520545661449432, "learning_rate": 1.0883048246337988e-05, "loss": 0.1693, "step": 25416 }, { "epoch": 0.4718869164534778, "grad_norm": 0.41441458463668823, "learning_rate": 1.0881886306567216e-05, "loss": 0.3543, "step": 25418 }, { "epoch": 0.47192404659089643, "grad_norm": 0.38247382640838623, "learning_rate": 1.088072435479661e-05, "loss": 0.1647, "step": 25420 }, { "epoch": 0.47196117672831506, "grad_norm": 0.5761268734931946, "learning_rate": 1.0879562391041987e-05, "loss": 0.2509, "step": 25422 }, { "epoch": 0.4719983068657337, "grad_norm": 0.35303500294685364, "learning_rate": 1.0878400415319152e-05, "loss": 0.3159, "step": 25424 }, { "epoch": 0.47203543700315237, "grad_norm": 0.2870520055294037, "learning_rate": 1.087723842764392e-05, "loss": 0.3038, "step": 25426 }, { "epoch": 0.472072567140571, "grad_norm": 0.36818113923072815, "learning_rate": 1.0876076428032098e-05, "loss": 0.3283, "step": 25428 }, { "epoch": 0.4721096972779896, "grad_norm": 0.3182864487171173, "learning_rate": 1.0874914416499502e-05, "loss": 0.2689, "step": 25430 }, { "epoch": 0.47214682741540825, "grad_norm": 0.35932067036628723, "learning_rate": 1.0873752393061946e-05, "loss": 0.4433, "step": 25432 }, { "epoch": 0.4721839575528269, "grad_norm": 0.4307151436805725, "learning_rate": 1.0872590357735228e-05, "loss": 0.1923, "step": 25434 }, { "epoch": 0.47222108769024557, "grad_norm": 0.33131757378578186, "learning_rate": 1.0871428310535178e-05, "loss": 0.3132, "step": 25436 }, { "epoch": 0.4722582178276642, "grad_norm": 0.43724343180656433, "learning_rate": 1.0870266251477592e-05, "loss": 0.1883, "step": 25438 }, { "epoch": 0.4722953479650828, "grad_norm": 0.3439030051231384, "learning_rate": 1.086910418057829e-05, "loss": 0.3815, "step": 25440 }, { "epoch": 0.47233247810250145, "grad_norm": 0.40140312910079956, "learning_rate": 1.0867942097853086e-05, "loss": 0.4382, "step": 25442 }, { "epoch": 0.4723696082399201, "grad_norm": 0.3417443037033081, "learning_rate": 1.086678000331779e-05, "loss": 0.2102, "step": 25444 }, { "epoch": 0.4724067383773387, "grad_norm": 0.39919495582580566, "learning_rate": 1.0865617896988212e-05, "loss": 0.2417, "step": 25446 }, { "epoch": 0.4724438685147574, "grad_norm": 0.39805692434310913, "learning_rate": 1.0864455778880169e-05, "loss": 0.1616, "step": 25448 }, { "epoch": 0.472480998652176, "grad_norm": 0.4144118130207062, "learning_rate": 1.0863293649009472e-05, "loss": 0.3664, "step": 25450 }, { "epoch": 0.47251812878959465, "grad_norm": 0.3545556664466858, "learning_rate": 1.0862131507391933e-05, "loss": 0.2719, "step": 25452 }, { "epoch": 0.4725552589270133, "grad_norm": 0.27268433570861816, "learning_rate": 1.0860969354043368e-05, "loss": 0.2867, "step": 25454 }, { "epoch": 0.4725923890644319, "grad_norm": 0.4307922422885895, "learning_rate": 1.085980718897959e-05, "loss": 0.287, "step": 25456 }, { "epoch": 0.4726295192018506, "grad_norm": 0.4680244028568268, "learning_rate": 1.0858645012216408e-05, "loss": 0.187, "step": 25458 }, { "epoch": 0.4726666493392692, "grad_norm": 0.24568291008472443, "learning_rate": 1.0857482823769643e-05, "loss": 0.2515, "step": 25460 }, { "epoch": 0.47270377947668785, "grad_norm": 0.36726653575897217, "learning_rate": 1.0856320623655103e-05, "loss": 0.3427, "step": 25462 }, { "epoch": 0.4727409096141065, "grad_norm": 0.4769476652145386, "learning_rate": 1.0855158411888606e-05, "loss": 0.4239, "step": 25464 }, { "epoch": 0.4727780397515251, "grad_norm": 0.513557493686676, "learning_rate": 1.0853996188485963e-05, "loss": 0.173, "step": 25466 }, { "epoch": 0.47281516988894373, "grad_norm": 0.49371397495269775, "learning_rate": 1.0852833953462992e-05, "loss": 0.4302, "step": 25468 }, { "epoch": 0.4728523000263624, "grad_norm": 0.4058995544910431, "learning_rate": 1.08516717068355e-05, "loss": 0.457, "step": 25470 }, { "epoch": 0.47288943016378104, "grad_norm": 0.6060823202133179, "learning_rate": 1.085050944861931e-05, "loss": 0.3163, "step": 25472 }, { "epoch": 0.47292656030119967, "grad_norm": 0.3792531490325928, "learning_rate": 1.0849347178830236e-05, "loss": 0.2224, "step": 25474 }, { "epoch": 0.4729636904386183, "grad_norm": 0.2610177993774414, "learning_rate": 1.0848184897484092e-05, "loss": 0.2312, "step": 25476 }, { "epoch": 0.47300082057603693, "grad_norm": 0.45387837290763855, "learning_rate": 1.0847022604596688e-05, "loss": 0.3001, "step": 25478 }, { "epoch": 0.4730379507134556, "grad_norm": 0.4042569696903229, "learning_rate": 1.0845860300183848e-05, "loss": 0.205, "step": 25480 }, { "epoch": 0.47307508085087424, "grad_norm": 0.2229710817337036, "learning_rate": 1.0844697984261378e-05, "loss": 0.3165, "step": 25482 }, { "epoch": 0.47311221098829287, "grad_norm": 0.2741638422012329, "learning_rate": 1.0843535656845104e-05, "loss": 0.249, "step": 25484 }, { "epoch": 0.4731493411257115, "grad_norm": 0.3848188817501068, "learning_rate": 1.0842373317950835e-05, "loss": 0.233, "step": 25486 }, { "epoch": 0.4731864712631301, "grad_norm": 0.3627490699291229, "learning_rate": 1.0841210967594385e-05, "loss": 0.1811, "step": 25488 }, { "epoch": 0.4732236014005488, "grad_norm": 0.34293967485427856, "learning_rate": 1.0840048605791573e-05, "loss": 0.3531, "step": 25490 }, { "epoch": 0.47326073153796744, "grad_norm": 0.4283178746700287, "learning_rate": 1.0838886232558219e-05, "loss": 0.2431, "step": 25492 }, { "epoch": 0.47329786167538607, "grad_norm": 0.4440542161464691, "learning_rate": 1.0837723847910135e-05, "loss": 0.3731, "step": 25494 }, { "epoch": 0.4733349918128047, "grad_norm": 0.5516613721847534, "learning_rate": 1.0836561451863138e-05, "loss": 0.367, "step": 25496 }, { "epoch": 0.4733721219502233, "grad_norm": 0.26940014958381653, "learning_rate": 1.0835399044433048e-05, "loss": 0.2869, "step": 25498 }, { "epoch": 0.47340925208764195, "grad_norm": 0.3848629295825958, "learning_rate": 1.0834236625635678e-05, "loss": 0.2989, "step": 25500 }, { "epoch": 0.47344638222506064, "grad_norm": 0.5390532612800598, "learning_rate": 1.0833074195486844e-05, "loss": 0.4017, "step": 25502 }, { "epoch": 0.47348351236247926, "grad_norm": 0.2716115117073059, "learning_rate": 1.0831911754002367e-05, "loss": 0.3809, "step": 25504 }, { "epoch": 0.4735206424998979, "grad_norm": 0.4002685248851776, "learning_rate": 1.0830749301198062e-05, "loss": 0.3151, "step": 25506 }, { "epoch": 0.4735577726373165, "grad_norm": 0.4001477360725403, "learning_rate": 1.0829586837089752e-05, "loss": 0.2348, "step": 25508 }, { "epoch": 0.47359490277473515, "grad_norm": 0.3563826382160187, "learning_rate": 1.0828424361693247e-05, "loss": 0.1986, "step": 25510 }, { "epoch": 0.47363203291215383, "grad_norm": 0.36404919624328613, "learning_rate": 1.0827261875024366e-05, "loss": 0.3029, "step": 25512 }, { "epoch": 0.47366916304957246, "grad_norm": 0.48661258816719055, "learning_rate": 1.0826099377098931e-05, "loss": 0.3824, "step": 25514 }, { "epoch": 0.4737062931869911, "grad_norm": 0.3092491328716278, "learning_rate": 1.0824936867932759e-05, "loss": 0.2552, "step": 25516 }, { "epoch": 0.4737434233244097, "grad_norm": 0.591813862323761, "learning_rate": 1.0823774347541664e-05, "loss": 0.2769, "step": 25518 }, { "epoch": 0.47378055346182835, "grad_norm": 0.2924026846885681, "learning_rate": 1.0822611815941468e-05, "loss": 0.1458, "step": 25520 }, { "epoch": 0.473817683599247, "grad_norm": 0.26555246114730835, "learning_rate": 1.082144927314799e-05, "loss": 0.2173, "step": 25522 }, { "epoch": 0.47385481373666566, "grad_norm": 0.26442041993141174, "learning_rate": 1.082028671917705e-05, "loss": 0.2171, "step": 25524 }, { "epoch": 0.4738919438740843, "grad_norm": 0.3518792390823364, "learning_rate": 1.0819124154044463e-05, "loss": 0.3947, "step": 25526 }, { "epoch": 0.4739290740115029, "grad_norm": 0.37692686915397644, "learning_rate": 1.081796157776605e-05, "loss": 0.4507, "step": 25528 }, { "epoch": 0.47396620414892154, "grad_norm": 0.5003446340560913, "learning_rate": 1.0816798990357631e-05, "loss": 0.2765, "step": 25530 }, { "epoch": 0.47400333428634017, "grad_norm": 0.22410474717617035, "learning_rate": 1.0815636391835023e-05, "loss": 0.1679, "step": 25532 }, { "epoch": 0.47404046442375886, "grad_norm": 0.480230450630188, "learning_rate": 1.0814473782214045e-05, "loss": 0.2638, "step": 25534 }, { "epoch": 0.4740775945611775, "grad_norm": 0.33393579721450806, "learning_rate": 1.0813311161510522e-05, "loss": 0.4679, "step": 25536 }, { "epoch": 0.4741147246985961, "grad_norm": 0.5020701289176941, "learning_rate": 1.0812148529740269e-05, "loss": 0.362, "step": 25538 }, { "epoch": 0.47415185483601474, "grad_norm": 0.5628647208213806, "learning_rate": 1.0810985886919108e-05, "loss": 0.5066, "step": 25540 }, { "epoch": 0.47418898497343337, "grad_norm": 0.5545311570167542, "learning_rate": 1.0809823233062858e-05, "loss": 0.2333, "step": 25542 }, { "epoch": 0.474226115110852, "grad_norm": 0.37639284133911133, "learning_rate": 1.0808660568187337e-05, "loss": 0.6017, "step": 25544 }, { "epoch": 0.4742632452482707, "grad_norm": 0.38127413392066956, "learning_rate": 1.0807497892308367e-05, "loss": 0.2783, "step": 25546 }, { "epoch": 0.4743003753856893, "grad_norm": 0.3367161452770233, "learning_rate": 1.0806335205441774e-05, "loss": 0.3212, "step": 25548 }, { "epoch": 0.47433750552310794, "grad_norm": 0.26724082231521606, "learning_rate": 1.0805172507603374e-05, "loss": 0.3105, "step": 25550 }, { "epoch": 0.47437463566052657, "grad_norm": 0.429168164730072, "learning_rate": 1.0804009798808983e-05, "loss": 0.3194, "step": 25552 }, { "epoch": 0.4744117657979452, "grad_norm": 0.4205878674983978, "learning_rate": 1.080284707907443e-05, "loss": 0.3499, "step": 25554 }, { "epoch": 0.4744488959353639, "grad_norm": 0.3207207918167114, "learning_rate": 1.0801684348415534e-05, "loss": 0.0743, "step": 25556 }, { "epoch": 0.4744860260727825, "grad_norm": 0.5857090950012207, "learning_rate": 1.0800521606848114e-05, "loss": 0.4589, "step": 25558 }, { "epoch": 0.47452315621020114, "grad_norm": 0.43266546726226807, "learning_rate": 1.0799358854387994e-05, "loss": 0.2773, "step": 25560 }, { "epoch": 0.47456028634761976, "grad_norm": 0.5602853298187256, "learning_rate": 1.0798196091050994e-05, "loss": 0.3807, "step": 25562 }, { "epoch": 0.4745974164850384, "grad_norm": 0.4169822037220001, "learning_rate": 1.0797033316852934e-05, "loss": 0.2354, "step": 25564 }, { "epoch": 0.4746345466224571, "grad_norm": 0.31028062105178833, "learning_rate": 1.079587053180964e-05, "loss": 0.3197, "step": 25566 }, { "epoch": 0.4746716767598757, "grad_norm": 0.4163873493671417, "learning_rate": 1.0794707735936932e-05, "loss": 0.1939, "step": 25568 }, { "epoch": 0.47470880689729433, "grad_norm": 0.4786476492881775, "learning_rate": 1.0793544929250632e-05, "loss": 0.5305, "step": 25570 }, { "epoch": 0.47474593703471296, "grad_norm": 0.3375973105430603, "learning_rate": 1.0792382111766562e-05, "loss": 0.3852, "step": 25572 }, { "epoch": 0.4747830671721316, "grad_norm": 0.31792545318603516, "learning_rate": 1.0791219283500545e-05, "loss": 0.2206, "step": 25574 }, { "epoch": 0.4748201973095502, "grad_norm": 0.356144517660141, "learning_rate": 1.0790056444468404e-05, "loss": 0.4568, "step": 25576 }, { "epoch": 0.4748573274469689, "grad_norm": 0.19883421063423157, "learning_rate": 1.0788893594685959e-05, "loss": 0.3621, "step": 25578 }, { "epoch": 0.47489445758438753, "grad_norm": 0.2478051334619522, "learning_rate": 1.0787730734169038e-05, "loss": 0.4037, "step": 25580 }, { "epoch": 0.47493158772180616, "grad_norm": 0.5600211024284363, "learning_rate": 1.078656786293346e-05, "loss": 0.6172, "step": 25582 }, { "epoch": 0.4749687178592248, "grad_norm": 0.4443606436252594, "learning_rate": 1.0785404980995051e-05, "loss": 0.2629, "step": 25584 }, { "epoch": 0.4750058479966434, "grad_norm": 0.31411972641944885, "learning_rate": 1.078424208836963e-05, "loss": 0.179, "step": 25586 }, { "epoch": 0.4750429781340621, "grad_norm": 0.3916686475276947, "learning_rate": 1.0783079185073024e-05, "loss": 0.2815, "step": 25588 }, { "epoch": 0.4750801082714807, "grad_norm": 0.2799230217933655, "learning_rate": 1.0781916271121055e-05, "loss": 0.3454, "step": 25590 }, { "epoch": 0.47511723840889936, "grad_norm": 0.31751441955566406, "learning_rate": 1.078075334652955e-05, "loss": 0.4885, "step": 25592 }, { "epoch": 0.475154368546318, "grad_norm": 0.4760719835758209, "learning_rate": 1.0779590411314334e-05, "loss": 0.3011, "step": 25594 }, { "epoch": 0.4751914986837366, "grad_norm": 0.5476299524307251, "learning_rate": 1.077842746549122e-05, "loss": 0.4214, "step": 25596 }, { "epoch": 0.47522862882115524, "grad_norm": 0.37174248695373535, "learning_rate": 1.0777264509076045e-05, "loss": 0.4231, "step": 25598 }, { "epoch": 0.4752657589585739, "grad_norm": 0.6096732020378113, "learning_rate": 1.0776101542084628e-05, "loss": 0.3826, "step": 25600 }, { "epoch": 0.47530288909599255, "grad_norm": 0.596465528011322, "learning_rate": 1.077493856453279e-05, "loss": 0.2254, "step": 25602 }, { "epoch": 0.4753400192334112, "grad_norm": 0.33112844824790955, "learning_rate": 1.0773775576436363e-05, "loss": 0.3118, "step": 25604 }, { "epoch": 0.4753771493708298, "grad_norm": 0.2534058094024658, "learning_rate": 1.0772612577811167e-05, "loss": 0.2804, "step": 25606 }, { "epoch": 0.47541427950824844, "grad_norm": 0.32938072085380554, "learning_rate": 1.0771449568673027e-05, "loss": 0.2835, "step": 25608 }, { "epoch": 0.4754514096456671, "grad_norm": 0.49696657061576843, "learning_rate": 1.077028654903777e-05, "loss": 0.264, "step": 25610 }, { "epoch": 0.47548853978308575, "grad_norm": 0.43749210238456726, "learning_rate": 1.0769123518921221e-05, "loss": 0.0831, "step": 25612 }, { "epoch": 0.4755256699205044, "grad_norm": 0.3019997477531433, "learning_rate": 1.0767960478339204e-05, "loss": 0.1734, "step": 25614 }, { "epoch": 0.475562800057923, "grad_norm": 0.3572320342063904, "learning_rate": 1.0766797427307544e-05, "loss": 0.3879, "step": 25616 }, { "epoch": 0.47559993019534164, "grad_norm": 0.3345218896865845, "learning_rate": 1.076563436584207e-05, "loss": 0.3348, "step": 25618 }, { "epoch": 0.47563706033276026, "grad_norm": 0.3520383834838867, "learning_rate": 1.0764471293958602e-05, "loss": 0.3428, "step": 25620 }, { "epoch": 0.47567419047017895, "grad_norm": 0.3539745807647705, "learning_rate": 1.0763308211672972e-05, "loss": 0.2175, "step": 25622 }, { "epoch": 0.4757113206075976, "grad_norm": 0.4124791920185089, "learning_rate": 1.0762145119001009e-05, "loss": 0.4167, "step": 25624 }, { "epoch": 0.4757484507450162, "grad_norm": 0.55335533618927, "learning_rate": 1.0760982015958526e-05, "loss": 0.2349, "step": 25626 }, { "epoch": 0.47578558088243483, "grad_norm": 0.4106026291847229, "learning_rate": 1.0759818902561358e-05, "loss": 0.1539, "step": 25628 }, { "epoch": 0.47582271101985346, "grad_norm": 0.31891921162605286, "learning_rate": 1.0758655778825334e-05, "loss": 0.1954, "step": 25630 }, { "epoch": 0.47585984115727215, "grad_norm": 0.3169207274913788, "learning_rate": 1.0757492644766273e-05, "loss": 0.586, "step": 25632 }, { "epoch": 0.4758969712946908, "grad_norm": 0.7749449014663696, "learning_rate": 1.0756329500400011e-05, "loss": 0.4366, "step": 25634 }, { "epoch": 0.4759341014321094, "grad_norm": 0.37495163083076477, "learning_rate": 1.0755166345742368e-05, "loss": 0.3604, "step": 25636 }, { "epoch": 0.47597123156952803, "grad_norm": 0.4163711667060852, "learning_rate": 1.0754003180809173e-05, "loss": 0.4244, "step": 25638 }, { "epoch": 0.47600836170694666, "grad_norm": 0.4245756268501282, "learning_rate": 1.0752840005616251e-05, "loss": 0.3348, "step": 25640 }, { "epoch": 0.47604549184436534, "grad_norm": 0.36342230439186096, "learning_rate": 1.0751676820179433e-05, "loss": 0.2902, "step": 25642 }, { "epoch": 0.47608262198178397, "grad_norm": 0.5366952419281006, "learning_rate": 1.0750513624514546e-05, "loss": 0.5316, "step": 25644 }, { "epoch": 0.4761197521192026, "grad_norm": 0.3358038365840912, "learning_rate": 1.0749350418637419e-05, "loss": 0.409, "step": 25646 }, { "epoch": 0.47615688225662123, "grad_norm": 0.3410406708717346, "learning_rate": 1.0748187202563873e-05, "loss": 0.307, "step": 25648 }, { "epoch": 0.47619401239403986, "grad_norm": 0.36222317814826965, "learning_rate": 1.0747023976309743e-05, "loss": 0.2426, "step": 25650 }, { "epoch": 0.4762311425314585, "grad_norm": 0.47697383165359497, "learning_rate": 1.0745860739890852e-05, "loss": 0.2783, "step": 25652 }, { "epoch": 0.47626827266887717, "grad_norm": 0.2913890480995178, "learning_rate": 1.0744697493323033e-05, "loss": 0.1946, "step": 25654 }, { "epoch": 0.4763054028062958, "grad_norm": 0.4798871874809265, "learning_rate": 1.074353423662211e-05, "loss": 0.4034, "step": 25656 }, { "epoch": 0.4763425329437144, "grad_norm": 0.46947070956230164, "learning_rate": 1.0742370969803912e-05, "loss": 0.3679, "step": 25658 }, { "epoch": 0.47637966308113305, "grad_norm": 0.5112821459770203, "learning_rate": 1.074120769288427e-05, "loss": 0.3529, "step": 25660 }, { "epoch": 0.4764167932185517, "grad_norm": 0.2952834665775299, "learning_rate": 1.0740044405879012e-05, "loss": 0.3201, "step": 25662 }, { "epoch": 0.47645392335597037, "grad_norm": 0.4461756944656372, "learning_rate": 1.0738881108803965e-05, "loss": 0.3363, "step": 25664 }, { "epoch": 0.476491053493389, "grad_norm": 0.34060725569725037, "learning_rate": 1.073771780167496e-05, "loss": 0.4085, "step": 25666 }, { "epoch": 0.4765281836308076, "grad_norm": 0.28135544061660767, "learning_rate": 1.0736554484507829e-05, "loss": 0.2788, "step": 25668 }, { "epoch": 0.47656531376822625, "grad_norm": 0.5438511967658997, "learning_rate": 1.0735391157318393e-05, "loss": 0.3363, "step": 25670 }, { "epoch": 0.4766024439056449, "grad_norm": 0.4071270227432251, "learning_rate": 1.0734227820122487e-05, "loss": 0.3527, "step": 25672 }, { "epoch": 0.4766395740430635, "grad_norm": 0.33415988087654114, "learning_rate": 1.0733064472935939e-05, "loss": 0.4921, "step": 25674 }, { "epoch": 0.4766767041804822, "grad_norm": 0.4653334319591522, "learning_rate": 1.0731901115774582e-05, "loss": 0.3871, "step": 25676 }, { "epoch": 0.4767138343179008, "grad_norm": 0.3142509162425995, "learning_rate": 1.0730737748654241e-05, "loss": 0.3771, "step": 25678 }, { "epoch": 0.47675096445531945, "grad_norm": 0.38125258684158325, "learning_rate": 1.0729574371590747e-05, "loss": 0.4175, "step": 25680 }, { "epoch": 0.4767880945927381, "grad_norm": 0.5916948318481445, "learning_rate": 1.0728410984599935e-05, "loss": 0.3363, "step": 25682 }, { "epoch": 0.4768252247301567, "grad_norm": 0.4913061559200287, "learning_rate": 1.0727247587697627e-05, "loss": 0.2275, "step": 25684 }, { "epoch": 0.4768623548675754, "grad_norm": 0.40251681208610535, "learning_rate": 1.0726084180899661e-05, "loss": 0.5608, "step": 25686 }, { "epoch": 0.476899485004994, "grad_norm": 0.5869660377502441, "learning_rate": 1.0724920764221864e-05, "loss": 0.357, "step": 25688 }, { "epoch": 0.47693661514241265, "grad_norm": 0.35948267579078674, "learning_rate": 1.0723757337680065e-05, "loss": 0.1441, "step": 25690 }, { "epoch": 0.4769737452798313, "grad_norm": 0.35727715492248535, "learning_rate": 1.0722593901290097e-05, "loss": 0.3451, "step": 25692 }, { "epoch": 0.4770108754172499, "grad_norm": 0.3327142298221588, "learning_rate": 1.0721430455067791e-05, "loss": 0.2602, "step": 25694 }, { "epoch": 0.47704800555466853, "grad_norm": 0.305571973323822, "learning_rate": 1.0720266999028976e-05, "loss": 0.4308, "step": 25696 }, { "epoch": 0.4770851356920872, "grad_norm": 0.2570231854915619, "learning_rate": 1.0719103533189488e-05, "loss": 0.2282, "step": 25698 }, { "epoch": 0.47712226582950584, "grad_norm": 0.34305548667907715, "learning_rate": 1.0717940057565153e-05, "loss": 0.2475, "step": 25700 }, { "epoch": 0.47715939596692447, "grad_norm": 0.4209199547767639, "learning_rate": 1.0716776572171803e-05, "loss": 0.3068, "step": 25702 }, { "epoch": 0.4771965261043431, "grad_norm": 0.2675342857837677, "learning_rate": 1.0715613077025271e-05, "loss": 0.3316, "step": 25704 }, { "epoch": 0.47723365624176173, "grad_norm": 0.32007548213005066, "learning_rate": 1.071444957214139e-05, "loss": 0.2145, "step": 25706 }, { "epoch": 0.4772707863791804, "grad_norm": 0.32496023178100586, "learning_rate": 1.071328605753599e-05, "loss": 0.466, "step": 25708 }, { "epoch": 0.47730791651659904, "grad_norm": 0.3902900516986847, "learning_rate": 1.0712122533224903e-05, "loss": 0.1209, "step": 25710 }, { "epoch": 0.47734504665401767, "grad_norm": 0.3667261600494385, "learning_rate": 1.071095899922396e-05, "loss": 0.3444, "step": 25712 }, { "epoch": 0.4773821767914363, "grad_norm": 0.37834101915359497, "learning_rate": 1.0709795455548996e-05, "loss": 0.2929, "step": 25714 }, { "epoch": 0.4774193069288549, "grad_norm": 0.32055819034576416, "learning_rate": 1.070863190221584e-05, "loss": 0.1999, "step": 25716 }, { "epoch": 0.4774564370662736, "grad_norm": 0.5087131857872009, "learning_rate": 1.070746833924033e-05, "loss": 0.2359, "step": 25718 }, { "epoch": 0.47749356720369224, "grad_norm": 2.0127739906311035, "learning_rate": 1.0706304766638296e-05, "loss": 0.3208, "step": 25720 }, { "epoch": 0.47753069734111087, "grad_norm": 0.47244301438331604, "learning_rate": 1.0705141184425565e-05, "loss": 0.4341, "step": 25722 }, { "epoch": 0.4775678274785295, "grad_norm": 0.35016411542892456, "learning_rate": 1.0703977592617975e-05, "loss": 0.1843, "step": 25724 }, { "epoch": 0.4776049576159481, "grad_norm": 0.3687516152858734, "learning_rate": 1.0702813991231363e-05, "loss": 0.1912, "step": 25726 }, { "epoch": 0.47764208775336675, "grad_norm": 0.45923924446105957, "learning_rate": 1.0701650380281552e-05, "loss": 0.452, "step": 25728 }, { "epoch": 0.47767921789078543, "grad_norm": 0.3328072428703308, "learning_rate": 1.0700486759784385e-05, "loss": 0.3818, "step": 25730 }, { "epoch": 0.47771634802820406, "grad_norm": 0.2896766662597656, "learning_rate": 1.0699323129755692e-05, "loss": 0.3868, "step": 25732 }, { "epoch": 0.4777534781656227, "grad_norm": 0.41523781418800354, "learning_rate": 1.0698159490211304e-05, "loss": 0.1679, "step": 25734 }, { "epoch": 0.4777906083030413, "grad_norm": 0.33768758177757263, "learning_rate": 1.0696995841167055e-05, "loss": 0.2751, "step": 25736 }, { "epoch": 0.47782773844045995, "grad_norm": 0.378711462020874, "learning_rate": 1.0695832182638783e-05, "loss": 0.3012, "step": 25738 }, { "epoch": 0.47786486857787863, "grad_norm": 0.41520217061042786, "learning_rate": 1.069466851464232e-05, "loss": 0.2973, "step": 25740 }, { "epoch": 0.47790199871529726, "grad_norm": 0.5890416502952576, "learning_rate": 1.0693504837193495e-05, "loss": 0.1234, "step": 25742 }, { "epoch": 0.4779391288527159, "grad_norm": 0.33692285418510437, "learning_rate": 1.0692341150308153e-05, "loss": 0.386, "step": 25744 }, { "epoch": 0.4779762589901345, "grad_norm": 0.5813331604003906, "learning_rate": 1.0691177454002115e-05, "loss": 0.348, "step": 25746 }, { "epoch": 0.47801338912755315, "grad_norm": 0.36305853724479675, "learning_rate": 1.0690013748291226e-05, "loss": 0.3424, "step": 25748 }, { "epoch": 0.4780505192649718, "grad_norm": 0.5061136484146118, "learning_rate": 1.0688850033191317e-05, "loss": 0.679, "step": 25750 }, { "epoch": 0.47808764940239046, "grad_norm": 0.3417928218841553, "learning_rate": 1.0687686308718221e-05, "loss": 0.2822, "step": 25752 }, { "epoch": 0.4781247795398091, "grad_norm": 0.5205857157707214, "learning_rate": 1.0686522574887774e-05, "loss": 0.2745, "step": 25754 }, { "epoch": 0.4781619096772277, "grad_norm": 0.30825477838516235, "learning_rate": 1.0685358831715811e-05, "loss": 0.4334, "step": 25756 }, { "epoch": 0.47819903981464634, "grad_norm": 0.26875045895576477, "learning_rate": 1.068419507921817e-05, "loss": 0.4397, "step": 25758 }, { "epoch": 0.47823616995206497, "grad_norm": 0.6560484766960144, "learning_rate": 1.068303131741068e-05, "loss": 0.3197, "step": 25760 }, { "epoch": 0.47827330008948366, "grad_norm": 0.2730303704738617, "learning_rate": 1.068186754630918e-05, "loss": 0.3246, "step": 25762 }, { "epoch": 0.4783104302269023, "grad_norm": 0.8990581035614014, "learning_rate": 1.0680703765929508e-05, "loss": 0.1455, "step": 25764 }, { "epoch": 0.4783475603643209, "grad_norm": 0.257618248462677, "learning_rate": 1.0679539976287492e-05, "loss": 0.2712, "step": 25766 }, { "epoch": 0.47838469050173954, "grad_norm": 0.2745590806007385, "learning_rate": 1.0678376177398974e-05, "loss": 0.3214, "step": 25768 }, { "epoch": 0.47842182063915817, "grad_norm": 0.30385804176330566, "learning_rate": 1.067721236927979e-05, "loss": 0.3032, "step": 25770 }, { "epoch": 0.4784589507765768, "grad_norm": 0.5318928956985474, "learning_rate": 1.0676048551945775e-05, "loss": 0.3133, "step": 25772 }, { "epoch": 0.4784960809139955, "grad_norm": 0.5492009520530701, "learning_rate": 1.0674884725412761e-05, "loss": 0.4143, "step": 25774 }, { "epoch": 0.4785332110514141, "grad_norm": 0.4106338322162628, "learning_rate": 1.0673720889696591e-05, "loss": 0.299, "step": 25776 }, { "epoch": 0.47857034118883274, "grad_norm": 0.3753180503845215, "learning_rate": 1.0672557044813094e-05, "loss": 0.3786, "step": 25778 }, { "epoch": 0.47860747132625137, "grad_norm": 0.3486310839653015, "learning_rate": 1.0671393190778112e-05, "loss": 0.4971, "step": 25780 }, { "epoch": 0.47864460146367, "grad_norm": 0.7115786075592041, "learning_rate": 1.067022932760748e-05, "loss": 0.4516, "step": 25782 }, { "epoch": 0.4786817316010887, "grad_norm": 0.38976845145225525, "learning_rate": 1.0669065455317037e-05, "loss": 0.209, "step": 25784 }, { "epoch": 0.4787188617385073, "grad_norm": 0.225276917219162, "learning_rate": 1.0667901573922612e-05, "loss": 0.333, "step": 25786 }, { "epoch": 0.47875599187592593, "grad_norm": 0.38125425577163696, "learning_rate": 1.066673768344005e-05, "loss": 0.2695, "step": 25788 }, { "epoch": 0.47879312201334456, "grad_norm": 0.3854677081108093, "learning_rate": 1.0665573783885184e-05, "loss": 0.2037, "step": 25790 }, { "epoch": 0.4788302521507632, "grad_norm": 0.34418985247612, "learning_rate": 1.0664409875273853e-05, "loss": 0.1517, "step": 25792 }, { "epoch": 0.4788673822881819, "grad_norm": 0.4653046727180481, "learning_rate": 1.0663245957621892e-05, "loss": 0.3215, "step": 25794 }, { "epoch": 0.4789045124256005, "grad_norm": 0.2933042347431183, "learning_rate": 1.0662082030945144e-05, "loss": 0.2735, "step": 25796 }, { "epoch": 0.47894164256301913, "grad_norm": 0.220557302236557, "learning_rate": 1.066091809525944e-05, "loss": 0.0732, "step": 25798 }, { "epoch": 0.47897877270043776, "grad_norm": 0.3641858398914337, "learning_rate": 1.0659754150580622e-05, "loss": 0.2119, "step": 25800 }, { "epoch": 0.4790159028378564, "grad_norm": 0.3173206150531769, "learning_rate": 1.0658590196924526e-05, "loss": 0.3479, "step": 25802 }, { "epoch": 0.479053032975275, "grad_norm": 0.6831305027008057, "learning_rate": 1.065742623430699e-05, "loss": 0.387, "step": 25804 }, { "epoch": 0.4790901631126937, "grad_norm": 0.3711341321468353, "learning_rate": 1.0656262262743851e-05, "loss": 0.3475, "step": 25806 }, { "epoch": 0.47912729325011233, "grad_norm": 0.3482346534729004, "learning_rate": 1.0655098282250948e-05, "loss": 0.3159, "step": 25808 }, { "epoch": 0.47916442338753096, "grad_norm": 0.34346863627433777, "learning_rate": 1.065393429284412e-05, "loss": 0.239, "step": 25810 }, { "epoch": 0.4792015535249496, "grad_norm": 0.3327527642250061, "learning_rate": 1.0652770294539206e-05, "loss": 0.1992, "step": 25812 }, { "epoch": 0.4792386836623682, "grad_norm": 0.6932603120803833, "learning_rate": 1.0651606287352042e-05, "loss": 0.49, "step": 25814 }, { "epoch": 0.4792758137997869, "grad_norm": 0.25849881768226624, "learning_rate": 1.0650442271298472e-05, "loss": 0.5894, "step": 25816 }, { "epoch": 0.4793129439372055, "grad_norm": 0.45103219151496887, "learning_rate": 1.0649278246394328e-05, "loss": 0.4128, "step": 25818 }, { "epoch": 0.47935007407462416, "grad_norm": 0.3949589133262634, "learning_rate": 1.0648114212655452e-05, "loss": 0.3025, "step": 25820 }, { "epoch": 0.4793872042120428, "grad_norm": 0.45555379986763, "learning_rate": 1.0646950170097683e-05, "loss": 0.1345, "step": 25822 }, { "epoch": 0.4794243343494614, "grad_norm": 0.39250314235687256, "learning_rate": 1.064578611873686e-05, "loss": 0.2505, "step": 25824 }, { "epoch": 0.47946146448688004, "grad_norm": 0.2591609060764313, "learning_rate": 1.0644622058588825e-05, "loss": 0.2313, "step": 25826 }, { "epoch": 0.4794985946242987, "grad_norm": 0.3499685227870941, "learning_rate": 1.064345798966941e-05, "loss": 0.4806, "step": 25828 }, { "epoch": 0.47953572476171735, "grad_norm": 0.33018285036087036, "learning_rate": 1.064229391199446e-05, "loss": 0.3458, "step": 25830 }, { "epoch": 0.479572854899136, "grad_norm": 0.28483545780181885, "learning_rate": 1.0641129825579817e-05, "loss": 0.2113, "step": 25832 }, { "epoch": 0.4796099850365546, "grad_norm": 0.3753323256969452, "learning_rate": 1.0639965730441312e-05, "loss": 0.3736, "step": 25834 }, { "epoch": 0.47964711517397324, "grad_norm": 0.3545713424682617, "learning_rate": 1.0638801626594796e-05, "loss": 0.2362, "step": 25836 }, { "epoch": 0.4796842453113919, "grad_norm": 0.33421653509140015, "learning_rate": 1.06376375140561e-05, "loss": 0.3229, "step": 25838 }, { "epoch": 0.47972137544881055, "grad_norm": 0.40005314350128174, "learning_rate": 1.0636473392841069e-05, "loss": 0.1842, "step": 25840 }, { "epoch": 0.4797585055862292, "grad_norm": 0.3972844183444977, "learning_rate": 1.063530926296554e-05, "loss": 0.0998, "step": 25842 }, { "epoch": 0.4797956357236478, "grad_norm": 0.5402910709381104, "learning_rate": 1.0634145124445356e-05, "loss": 0.2587, "step": 25844 }, { "epoch": 0.47983276586106643, "grad_norm": 0.379145085811615, "learning_rate": 1.0632980977296359e-05, "loss": 0.4205, "step": 25846 }, { "epoch": 0.47986989599848506, "grad_norm": 0.3514607548713684, "learning_rate": 1.0631816821534385e-05, "loss": 0.3258, "step": 25848 }, { "epoch": 0.47990702613590375, "grad_norm": 0.28997018933296204, "learning_rate": 1.0630652657175273e-05, "loss": 0.2114, "step": 25850 }, { "epoch": 0.4799441562733224, "grad_norm": 0.27410557866096497, "learning_rate": 1.0629488484234871e-05, "loss": 0.2977, "step": 25852 }, { "epoch": 0.479981286410741, "grad_norm": 0.5679981112480164, "learning_rate": 1.0628324302729013e-05, "loss": 0.2115, "step": 25854 }, { "epoch": 0.48001841654815963, "grad_norm": 0.3682366609573364, "learning_rate": 1.0627160112673549e-05, "loss": 0.4061, "step": 25856 }, { "epoch": 0.48005554668557826, "grad_norm": 0.3065265715122223, "learning_rate": 1.0625995914084313e-05, "loss": 0.3097, "step": 25858 }, { "epoch": 0.48009267682299694, "grad_norm": 0.3950786292552948, "learning_rate": 1.0624831706977144e-05, "loss": 0.2392, "step": 25860 }, { "epoch": 0.4801298069604156, "grad_norm": 0.5891554951667786, "learning_rate": 1.0623667491367886e-05, "loss": 0.3198, "step": 25862 }, { "epoch": 0.4801669370978342, "grad_norm": 0.3438788652420044, "learning_rate": 1.0622503267272387e-05, "loss": 0.3419, "step": 25864 }, { "epoch": 0.48020406723525283, "grad_norm": 0.31763380765914917, "learning_rate": 1.062133903470648e-05, "loss": 0.3149, "step": 25866 }, { "epoch": 0.48024119737267146, "grad_norm": 0.47302332520484924, "learning_rate": 1.0620174793686012e-05, "loss": 0.2338, "step": 25868 }, { "epoch": 0.48027832751009014, "grad_norm": 0.33269596099853516, "learning_rate": 1.061901054422682e-05, "loss": 0.3203, "step": 25870 }, { "epoch": 0.48031545764750877, "grad_norm": 0.3056771159172058, "learning_rate": 1.0617846286344749e-05, "loss": 0.4243, "step": 25872 }, { "epoch": 0.4803525877849274, "grad_norm": 0.4264232814311981, "learning_rate": 1.061668202005564e-05, "loss": 0.3966, "step": 25874 }, { "epoch": 0.480389717922346, "grad_norm": 0.25301283597946167, "learning_rate": 1.061551774537534e-05, "loss": 0.2643, "step": 25876 }, { "epoch": 0.48042684805976466, "grad_norm": 0.3553018569946289, "learning_rate": 1.0614353462319684e-05, "loss": 0.1603, "step": 25878 }, { "epoch": 0.4804639781971833, "grad_norm": 0.35869327187538147, "learning_rate": 1.0613189170904518e-05, "loss": 0.2116, "step": 25880 }, { "epoch": 0.48050110833460197, "grad_norm": 0.3207240402698517, "learning_rate": 1.0612024871145683e-05, "loss": 0.2569, "step": 25882 }, { "epoch": 0.4805382384720206, "grad_norm": 0.3287566900253296, "learning_rate": 1.0610860563059025e-05, "loss": 0.3241, "step": 25884 }, { "epoch": 0.4805753686094392, "grad_norm": 0.4188264310359955, "learning_rate": 1.0609696246660382e-05, "loss": 0.288, "step": 25886 }, { "epoch": 0.48061249874685785, "grad_norm": 0.2996284067630768, "learning_rate": 1.0608531921965602e-05, "loss": 0.2205, "step": 25888 }, { "epoch": 0.4806496288842765, "grad_norm": 0.34011662006378174, "learning_rate": 1.0607367588990525e-05, "loss": 0.1616, "step": 25890 }, { "epoch": 0.48068675902169516, "grad_norm": 0.50629061460495, "learning_rate": 1.0606203247750993e-05, "loss": 0.2595, "step": 25892 }, { "epoch": 0.4807238891591138, "grad_norm": 0.296122282743454, "learning_rate": 1.060503889826285e-05, "loss": 0.1873, "step": 25894 }, { "epoch": 0.4807610192965324, "grad_norm": 0.4013459384441376, "learning_rate": 1.060387454054194e-05, "loss": 0.3102, "step": 25896 }, { "epoch": 0.48079814943395105, "grad_norm": 0.4383610188961029, "learning_rate": 1.0602710174604107e-05, "loss": 0.2114, "step": 25898 }, { "epoch": 0.4808352795713697, "grad_norm": 0.4170689284801483, "learning_rate": 1.0601545800465197e-05, "loss": 0.2498, "step": 25900 }, { "epoch": 0.4808724097087883, "grad_norm": 0.5081580877304077, "learning_rate": 1.0600381418141047e-05, "loss": 0.3481, "step": 25902 }, { "epoch": 0.480909539846207, "grad_norm": 0.3706102669239044, "learning_rate": 1.0599217027647504e-05, "loss": 0.2647, "step": 25904 }, { "epoch": 0.4809466699836256, "grad_norm": 0.49083277583122253, "learning_rate": 1.0598052629000412e-05, "loss": 0.2301, "step": 25906 }, { "epoch": 0.48098380012104425, "grad_norm": 0.36529672145843506, "learning_rate": 1.0596888222215618e-05, "loss": 0.3074, "step": 25908 }, { "epoch": 0.4810209302584629, "grad_norm": 0.24706918001174927, "learning_rate": 1.0595723807308963e-05, "loss": 0.2356, "step": 25910 }, { "epoch": 0.4810580603958815, "grad_norm": 0.571923017501831, "learning_rate": 1.0594559384296286e-05, "loss": 0.3954, "step": 25912 }, { "epoch": 0.4810951905333002, "grad_norm": 0.42421993613243103, "learning_rate": 1.0593394953193443e-05, "loss": 0.132, "step": 25914 }, { "epoch": 0.4811323206707188, "grad_norm": 0.3086685836315155, "learning_rate": 1.0592230514016268e-05, "loss": 0.3092, "step": 25916 }, { "epoch": 0.48116945080813744, "grad_norm": 0.42523038387298584, "learning_rate": 1.0591066066780609e-05, "loss": 0.3185, "step": 25918 }, { "epoch": 0.4812065809455561, "grad_norm": 0.3302091658115387, "learning_rate": 1.0589901611502315e-05, "loss": 0.2911, "step": 25920 }, { "epoch": 0.4812437110829747, "grad_norm": 0.4887673556804657, "learning_rate": 1.0588737148197225e-05, "loss": 0.3035, "step": 25922 }, { "epoch": 0.48128084122039333, "grad_norm": 0.4397135376930237, "learning_rate": 1.0587572676881186e-05, "loss": 0.2615, "step": 25924 }, { "epoch": 0.481317971357812, "grad_norm": 0.39657092094421387, "learning_rate": 1.0586408197570041e-05, "loss": 0.3352, "step": 25926 }, { "epoch": 0.48135510149523064, "grad_norm": 0.3883914351463318, "learning_rate": 1.0585243710279643e-05, "loss": 0.3096, "step": 25928 }, { "epoch": 0.48139223163264927, "grad_norm": 0.36172398924827576, "learning_rate": 1.0584079215025826e-05, "loss": 0.3868, "step": 25930 }, { "epoch": 0.4814293617700679, "grad_norm": 0.30220121145248413, "learning_rate": 1.058291471182444e-05, "loss": 0.2566, "step": 25932 }, { "epoch": 0.4814664919074865, "grad_norm": 0.517325758934021, "learning_rate": 1.0581750200691334e-05, "loss": 0.2998, "step": 25934 }, { "epoch": 0.4815036220449052, "grad_norm": 0.3371557593345642, "learning_rate": 1.0580585681642348e-05, "loss": 0.27, "step": 25936 }, { "epoch": 0.48154075218232384, "grad_norm": 0.420112669467926, "learning_rate": 1.057942115469333e-05, "loss": 0.219, "step": 25938 }, { "epoch": 0.48157788231974247, "grad_norm": 0.4462807774543762, "learning_rate": 1.0578256619860128e-05, "loss": 0.1767, "step": 25940 }, { "epoch": 0.4816150124571611, "grad_norm": 0.24513261020183563, "learning_rate": 1.0577092077158583e-05, "loss": 0.3393, "step": 25942 }, { "epoch": 0.4816521425945797, "grad_norm": 0.22974194586277008, "learning_rate": 1.0575927526604544e-05, "loss": 0.3438, "step": 25944 }, { "epoch": 0.4816892727319984, "grad_norm": 0.6168479323387146, "learning_rate": 1.0574762968213857e-05, "loss": 0.1242, "step": 25946 }, { "epoch": 0.48172640286941704, "grad_norm": 0.25510695576667786, "learning_rate": 1.0573598402002368e-05, "loss": 0.1252, "step": 25948 }, { "epoch": 0.48176353300683566, "grad_norm": 0.26457667350769043, "learning_rate": 1.057243382798592e-05, "loss": 0.3293, "step": 25950 }, { "epoch": 0.4818006631442543, "grad_norm": 0.23546819388866425, "learning_rate": 1.0571269246180365e-05, "loss": 0.1639, "step": 25952 }, { "epoch": 0.4818377932816729, "grad_norm": 0.34259888529777527, "learning_rate": 1.0570104656601548e-05, "loss": 0.302, "step": 25954 }, { "epoch": 0.48187492341909155, "grad_norm": 0.397570937871933, "learning_rate": 1.056894005926531e-05, "loss": 0.2128, "step": 25956 }, { "epoch": 0.48191205355651023, "grad_norm": 0.20074734091758728, "learning_rate": 1.0567775454187505e-05, "loss": 0.2137, "step": 25958 }, { "epoch": 0.48194918369392886, "grad_norm": 0.3152163326740265, "learning_rate": 1.0566610841383975e-05, "loss": 0.3188, "step": 25960 }, { "epoch": 0.4819863138313475, "grad_norm": 0.5078747868537903, "learning_rate": 1.0565446220870573e-05, "loss": 0.1864, "step": 25962 }, { "epoch": 0.4820234439687661, "grad_norm": 0.48174622654914856, "learning_rate": 1.0564281592663135e-05, "loss": 0.2456, "step": 25964 }, { "epoch": 0.48206057410618475, "grad_norm": 0.38263198733329773, "learning_rate": 1.056311695677752e-05, "loss": 0.2717, "step": 25966 }, { "epoch": 0.48209770424360343, "grad_norm": 0.3269321620464325, "learning_rate": 1.0561952313229567e-05, "loss": 0.4477, "step": 25968 }, { "epoch": 0.48213483438102206, "grad_norm": 0.34150880575180054, "learning_rate": 1.0560787662035126e-05, "loss": 0.1791, "step": 25970 }, { "epoch": 0.4821719645184407, "grad_norm": 0.241269052028656, "learning_rate": 1.0559623003210047e-05, "loss": 0.1179, "step": 25972 }, { "epoch": 0.4822090946558593, "grad_norm": 0.2618664801120758, "learning_rate": 1.0558458336770173e-05, "loss": 0.2245, "step": 25974 }, { "epoch": 0.48224622479327794, "grad_norm": 0.23175397515296936, "learning_rate": 1.0557293662731357e-05, "loss": 0.1045, "step": 25976 }, { "epoch": 0.4822833549306966, "grad_norm": 0.3808254897594452, "learning_rate": 1.0556128981109442e-05, "loss": 0.2747, "step": 25978 }, { "epoch": 0.48232048506811526, "grad_norm": 0.36512550711631775, "learning_rate": 1.0554964291920276e-05, "loss": 0.3013, "step": 25980 }, { "epoch": 0.4823576152055339, "grad_norm": 0.3891080915927887, "learning_rate": 1.0553799595179707e-05, "loss": 0.2552, "step": 25982 }, { "epoch": 0.4823947453429525, "grad_norm": 0.6203058362007141, "learning_rate": 1.055263489090359e-05, "loss": 0.276, "step": 25984 }, { "epoch": 0.48243187548037114, "grad_norm": 0.40197300910949707, "learning_rate": 1.0551470179107763e-05, "loss": 0.2428, "step": 25986 }, { "epoch": 0.48246900561778977, "grad_norm": 0.2707844078540802, "learning_rate": 1.055030545980808e-05, "loss": 0.1677, "step": 25988 }, { "epoch": 0.48250613575520845, "grad_norm": 0.45740237832069397, "learning_rate": 1.0549140733020386e-05, "loss": 0.218, "step": 25990 }, { "epoch": 0.4825432658926271, "grad_norm": 0.38537925481796265, "learning_rate": 1.0547975998760534e-05, "loss": 0.2205, "step": 25992 }, { "epoch": 0.4825803960300457, "grad_norm": 0.5144960880279541, "learning_rate": 1.054681125704437e-05, "loss": 0.3108, "step": 25994 }, { "epoch": 0.48261752616746434, "grad_norm": 0.27678626775741577, "learning_rate": 1.0545646507887744e-05, "loss": 0.1682, "step": 25996 }, { "epoch": 0.48265465630488297, "grad_norm": 0.3633998930454254, "learning_rate": 1.05444817513065e-05, "loss": 0.3683, "step": 25998 }, { "epoch": 0.4826917864423016, "grad_norm": 0.3564842641353607, "learning_rate": 1.0543316987316489e-05, "loss": 0.2931, "step": 26000 }, { "epoch": 0.4827289165797203, "grad_norm": 0.3071421682834625, "learning_rate": 1.0542152215933563e-05, "loss": 0.3321, "step": 26002 }, { "epoch": 0.4827660467171389, "grad_norm": 0.46005627512931824, "learning_rate": 1.0540987437173571e-05, "loss": 0.2435, "step": 26004 }, { "epoch": 0.48280317685455754, "grad_norm": 0.47856539487838745, "learning_rate": 1.053982265105236e-05, "loss": 0.3589, "step": 26006 }, { "epoch": 0.48284030699197616, "grad_norm": 0.4329420328140259, "learning_rate": 1.0538657857585779e-05, "loss": 0.2202, "step": 26008 }, { "epoch": 0.4828774371293948, "grad_norm": 0.3009786307811737, "learning_rate": 1.0537493056789678e-05, "loss": 0.1983, "step": 26010 }, { "epoch": 0.4829145672668135, "grad_norm": 0.5520566701889038, "learning_rate": 1.0536328248679908e-05, "loss": 0.3142, "step": 26012 }, { "epoch": 0.4829516974042321, "grad_norm": 0.3896366059780121, "learning_rate": 1.0535163433272315e-05, "loss": 0.262, "step": 26014 }, { "epoch": 0.48298882754165073, "grad_norm": 0.47369384765625, "learning_rate": 1.0533998610582754e-05, "loss": 0.4611, "step": 26016 }, { "epoch": 0.48302595767906936, "grad_norm": 0.7444141507148743, "learning_rate": 1.0532833780627067e-05, "loss": 0.1764, "step": 26018 }, { "epoch": 0.483063087816488, "grad_norm": 0.3476954996585846, "learning_rate": 1.053166894342111e-05, "loss": 0.4405, "step": 26020 }, { "epoch": 0.4831002179539067, "grad_norm": 0.5313552021980286, "learning_rate": 1.0530504098980732e-05, "loss": 0.1211, "step": 26022 }, { "epoch": 0.4831373480913253, "grad_norm": 0.40248793363571167, "learning_rate": 1.052933924732178e-05, "loss": 0.2196, "step": 26024 }, { "epoch": 0.48317447822874393, "grad_norm": 0.39716479182243347, "learning_rate": 1.0528174388460108e-05, "loss": 0.3005, "step": 26026 }, { "epoch": 0.48321160836616256, "grad_norm": 0.2691101133823395, "learning_rate": 1.0527009522411567e-05, "loss": 0.3618, "step": 26028 }, { "epoch": 0.4832487385035812, "grad_norm": 0.3173148036003113, "learning_rate": 1.0525844649192001e-05, "loss": 0.247, "step": 26030 }, { "epoch": 0.4832858686409998, "grad_norm": 0.6390669941902161, "learning_rate": 1.0524679768817264e-05, "loss": 0.492, "step": 26032 }, { "epoch": 0.4833229987784185, "grad_norm": 0.344378799200058, "learning_rate": 1.0523514881303211e-05, "loss": 0.3025, "step": 26034 }, { "epoch": 0.48336012891583713, "grad_norm": 0.3831423819065094, "learning_rate": 1.0522349986665685e-05, "loss": 0.3224, "step": 26036 }, { "epoch": 0.48339725905325576, "grad_norm": 0.42256098985671997, "learning_rate": 1.0521185084920544e-05, "loss": 0.1753, "step": 26038 }, { "epoch": 0.4834343891906744, "grad_norm": 0.5278465747833252, "learning_rate": 1.0520020176083634e-05, "loss": 0.3064, "step": 26040 }, { "epoch": 0.483471519328093, "grad_norm": 0.2826898992061615, "learning_rate": 1.0518855260170803e-05, "loss": 0.0739, "step": 26042 }, { "epoch": 0.4835086494655117, "grad_norm": 0.2939543128013611, "learning_rate": 1.051769033719791e-05, "loss": 0.2762, "step": 26044 }, { "epoch": 0.4835457796029303, "grad_norm": 0.35585254430770874, "learning_rate": 1.0516525407180801e-05, "loss": 0.377, "step": 26046 }, { "epoch": 0.48358290974034895, "grad_norm": 0.3219221830368042, "learning_rate": 1.0515360470135329e-05, "loss": 0.3918, "step": 26048 }, { "epoch": 0.4836200398777676, "grad_norm": 0.5564090013504028, "learning_rate": 1.0514195526077344e-05, "loss": 0.4431, "step": 26050 }, { "epoch": 0.4836571700151862, "grad_norm": 0.9560893774032593, "learning_rate": 1.0513030575022698e-05, "loss": 0.2544, "step": 26052 }, { "epoch": 0.48369430015260484, "grad_norm": 0.21328911185264587, "learning_rate": 1.0511865616987245e-05, "loss": 0.3638, "step": 26054 }, { "epoch": 0.4837314302900235, "grad_norm": 0.37342801690101624, "learning_rate": 1.0510700651986829e-05, "loss": 0.2924, "step": 26056 }, { "epoch": 0.48376856042744215, "grad_norm": 0.4090507924556732, "learning_rate": 1.0509535680037312e-05, "loss": 0.3017, "step": 26058 }, { "epoch": 0.4838056905648608, "grad_norm": 0.5802554488182068, "learning_rate": 1.050837070115454e-05, "loss": 0.2673, "step": 26060 }, { "epoch": 0.4838428207022794, "grad_norm": 0.31440210342407227, "learning_rate": 1.0507205715354363e-05, "loss": 0.2877, "step": 26062 }, { "epoch": 0.48387995083969804, "grad_norm": 0.44029486179351807, "learning_rate": 1.0506040722652635e-05, "loss": 0.3705, "step": 26064 }, { "epoch": 0.4839170809771167, "grad_norm": 0.4129449427127838, "learning_rate": 1.0504875723065212e-05, "loss": 0.3597, "step": 26066 }, { "epoch": 0.48395421111453535, "grad_norm": 0.30198583006858826, "learning_rate": 1.050371071660794e-05, "loss": 0.4239, "step": 26068 }, { "epoch": 0.483991341251954, "grad_norm": 0.4920758605003357, "learning_rate": 1.0502545703296673e-05, "loss": 0.3841, "step": 26070 }, { "epoch": 0.4840284713893726, "grad_norm": 0.9584425091743469, "learning_rate": 1.0501380683147269e-05, "loss": 0.2691, "step": 26072 }, { "epoch": 0.48406560152679123, "grad_norm": 0.4189384877681732, "learning_rate": 1.0500215656175571e-05, "loss": 0.2275, "step": 26074 }, { "epoch": 0.48410273166420986, "grad_norm": 0.27028873562812805, "learning_rate": 1.0499050622397437e-05, "loss": 0.3389, "step": 26076 }, { "epoch": 0.48413986180162855, "grad_norm": 0.39562690258026123, "learning_rate": 1.0497885581828721e-05, "loss": 0.2539, "step": 26078 }, { "epoch": 0.4841769919390472, "grad_norm": 0.28657814860343933, "learning_rate": 1.0496720534485274e-05, "loss": 0.2297, "step": 26080 }, { "epoch": 0.4842141220764658, "grad_norm": 0.33497828245162964, "learning_rate": 1.0495555480382946e-05, "loss": 0.3391, "step": 26082 }, { "epoch": 0.48425125221388443, "grad_norm": 0.3151029050350189, "learning_rate": 1.049439041953759e-05, "loss": 0.3636, "step": 26084 }, { "epoch": 0.48428838235130306, "grad_norm": 0.37022027373313904, "learning_rate": 1.0493225351965066e-05, "loss": 0.1166, "step": 26086 }, { "epoch": 0.48432551248872174, "grad_norm": 0.2984742820262909, "learning_rate": 1.049206027768122e-05, "loss": 0.2928, "step": 26088 }, { "epoch": 0.48436264262614037, "grad_norm": 0.2641547918319702, "learning_rate": 1.0490895196701909e-05, "loss": 0.285, "step": 26090 }, { "epoch": 0.484399772763559, "grad_norm": 0.24549604952335358, "learning_rate": 1.0489730109042986e-05, "loss": 0.2447, "step": 26092 }, { "epoch": 0.48443690290097763, "grad_norm": 0.5230951309204102, "learning_rate": 1.04885650147203e-05, "loss": 0.433, "step": 26094 }, { "epoch": 0.48447403303839626, "grad_norm": 0.30966290831565857, "learning_rate": 1.0487399913749707e-05, "loss": 0.2986, "step": 26096 }, { "epoch": 0.48451116317581494, "grad_norm": 0.4245856702327728, "learning_rate": 1.0486234806147064e-05, "loss": 0.4317, "step": 26098 }, { "epoch": 0.48454829331323357, "grad_norm": 0.4193677008152008, "learning_rate": 1.048506969192822e-05, "loss": 0.3559, "step": 26100 }, { "epoch": 0.4845854234506522, "grad_norm": 0.3671071529388428, "learning_rate": 1.0483904571109031e-05, "loss": 0.0657, "step": 26102 }, { "epoch": 0.4846225535880708, "grad_norm": 0.3049987256526947, "learning_rate": 1.0482739443705352e-05, "loss": 0.1637, "step": 26104 }, { "epoch": 0.48465968372548945, "grad_norm": 0.3050650358200073, "learning_rate": 1.0481574309733032e-05, "loss": 0.2439, "step": 26106 }, { "epoch": 0.4846968138629081, "grad_norm": 0.5235344171524048, "learning_rate": 1.0480409169207928e-05, "loss": 0.3414, "step": 26108 }, { "epoch": 0.48473394400032677, "grad_norm": 0.28188449144363403, "learning_rate": 1.0479244022145897e-05, "loss": 0.2146, "step": 26110 }, { "epoch": 0.4847710741377454, "grad_norm": 0.42991575598716736, "learning_rate": 1.0478078868562789e-05, "loss": 0.2435, "step": 26112 }, { "epoch": 0.484808204275164, "grad_norm": 0.25120148062705994, "learning_rate": 1.047691370847446e-05, "loss": 0.3591, "step": 26114 }, { "epoch": 0.48484533441258265, "grad_norm": 0.5862785577774048, "learning_rate": 1.0475748541896765e-05, "loss": 0.4838, "step": 26116 }, { "epoch": 0.4848824645500013, "grad_norm": 0.38598841428756714, "learning_rate": 1.0474583368845552e-05, "loss": 0.1274, "step": 26118 }, { "epoch": 0.48491959468741996, "grad_norm": 0.39472344517707825, "learning_rate": 1.0473418189336686e-05, "loss": 0.3618, "step": 26120 }, { "epoch": 0.4849567248248386, "grad_norm": 0.5864745378494263, "learning_rate": 1.0472253003386017e-05, "loss": 0.2096, "step": 26122 }, { "epoch": 0.4849938549622572, "grad_norm": 0.6655839085578918, "learning_rate": 1.0471087811009396e-05, "loss": 0.3477, "step": 26124 }, { "epoch": 0.48503098509967585, "grad_norm": 0.27581992745399475, "learning_rate": 1.0469922612222683e-05, "loss": 0.3604, "step": 26126 }, { "epoch": 0.4850681152370945, "grad_norm": 0.38262125849723816, "learning_rate": 1.0468757407041728e-05, "loss": 0.1624, "step": 26128 }, { "epoch": 0.4851052453745131, "grad_norm": 0.28196388483047485, "learning_rate": 1.0467592195482393e-05, "loss": 0.3331, "step": 26130 }, { "epoch": 0.4851423755119318, "grad_norm": 0.3221602439880371, "learning_rate": 1.046642697756053e-05, "loss": 0.2333, "step": 26132 }, { "epoch": 0.4851795056493504, "grad_norm": 0.2928047180175781, "learning_rate": 1.0465261753291987e-05, "loss": 0.1963, "step": 26134 }, { "epoch": 0.48521663578676905, "grad_norm": 0.45597997307777405, "learning_rate": 1.0464096522692628e-05, "loss": 0.2277, "step": 26136 }, { "epoch": 0.4852537659241877, "grad_norm": 0.24692007899284363, "learning_rate": 1.0462931285778304e-05, "loss": 0.356, "step": 26138 }, { "epoch": 0.4852908960616063, "grad_norm": 0.44605571031570435, "learning_rate": 1.0461766042564873e-05, "loss": 0.4442, "step": 26140 }, { "epoch": 0.485328026199025, "grad_norm": 0.2032811939716339, "learning_rate": 1.046060079306819e-05, "loss": 0.1136, "step": 26142 }, { "epoch": 0.4853651563364436, "grad_norm": 0.4138736426830292, "learning_rate": 1.0459435537304113e-05, "loss": 0.2724, "step": 26144 }, { "epoch": 0.48540228647386224, "grad_norm": 0.5404435992240906, "learning_rate": 1.0458270275288489e-05, "loss": 0.3294, "step": 26146 }, { "epoch": 0.48543941661128087, "grad_norm": 0.354728102684021, "learning_rate": 1.045710500703718e-05, "loss": 0.1184, "step": 26148 }, { "epoch": 0.4854765467486995, "grad_norm": 0.3683900535106659, "learning_rate": 1.0455939732566042e-05, "loss": 0.2005, "step": 26150 }, { "epoch": 0.48551367688611813, "grad_norm": 0.3551870286464691, "learning_rate": 1.045477445189093e-05, "loss": 0.1388, "step": 26152 }, { "epoch": 0.4855508070235368, "grad_norm": 0.3943800628185272, "learning_rate": 1.0453609165027702e-05, "loss": 0.2921, "step": 26154 }, { "epoch": 0.48558793716095544, "grad_norm": 0.5617119669914246, "learning_rate": 1.045244387199221e-05, "loss": 0.3084, "step": 26156 }, { "epoch": 0.48562506729837407, "grad_norm": 0.47015583515167236, "learning_rate": 1.0451278572800312e-05, "loss": 0.1243, "step": 26158 }, { "epoch": 0.4856621974357927, "grad_norm": 0.35810357332229614, "learning_rate": 1.0450113267467865e-05, "loss": 0.2815, "step": 26160 }, { "epoch": 0.4856993275732113, "grad_norm": 0.6324231624603271, "learning_rate": 1.0448947956010722e-05, "loss": 0.2323, "step": 26162 }, { "epoch": 0.48573645771063, "grad_norm": 0.35674723982810974, "learning_rate": 1.0447782638444746e-05, "loss": 0.2651, "step": 26164 }, { "epoch": 0.48577358784804864, "grad_norm": 0.513820469379425, "learning_rate": 1.0446617314785787e-05, "loss": 0.2444, "step": 26166 }, { "epoch": 0.48581071798546727, "grad_norm": 0.38412192463874817, "learning_rate": 1.0445451985049706e-05, "loss": 0.1859, "step": 26168 }, { "epoch": 0.4858478481228859, "grad_norm": 0.6000122427940369, "learning_rate": 1.0444286649252356e-05, "loss": 0.3476, "step": 26170 }, { "epoch": 0.4858849782603045, "grad_norm": 0.4199706017971039, "learning_rate": 1.0443121307409594e-05, "loss": 0.171, "step": 26172 }, { "epoch": 0.4859221083977232, "grad_norm": 0.28809675574302673, "learning_rate": 1.0441955959537282e-05, "loss": 0.3298, "step": 26174 }, { "epoch": 0.48595923853514184, "grad_norm": 0.3716817796230316, "learning_rate": 1.0440790605651274e-05, "loss": 0.2936, "step": 26176 }, { "epoch": 0.48599636867256046, "grad_norm": 0.2462117075920105, "learning_rate": 1.0439625245767422e-05, "loss": 0.1726, "step": 26178 }, { "epoch": 0.4860334988099791, "grad_norm": 0.16147224605083466, "learning_rate": 1.043845987990159e-05, "loss": 0.1436, "step": 26180 }, { "epoch": 0.4860706289473977, "grad_norm": 0.2748958170413971, "learning_rate": 1.0437294508069631e-05, "loss": 0.2125, "step": 26182 }, { "epoch": 0.48610775908481635, "grad_norm": 0.3940703570842743, "learning_rate": 1.0436129130287405e-05, "loss": 0.4976, "step": 26184 }, { "epoch": 0.48614488922223503, "grad_norm": 0.3703155517578125, "learning_rate": 1.0434963746570767e-05, "loss": 0.2278, "step": 26186 }, { "epoch": 0.48618201935965366, "grad_norm": 0.3123624324798584, "learning_rate": 1.0433798356935575e-05, "loss": 0.0522, "step": 26188 }, { "epoch": 0.4862191494970723, "grad_norm": 0.3375261723995209, "learning_rate": 1.0432632961397686e-05, "loss": 0.1961, "step": 26190 }, { "epoch": 0.4862562796344909, "grad_norm": 0.27479127049446106, "learning_rate": 1.0431467559972961e-05, "loss": 0.4013, "step": 26192 }, { "epoch": 0.48629340977190955, "grad_norm": 0.37649446725845337, "learning_rate": 1.0430302152677251e-05, "loss": 0.41, "step": 26194 }, { "epoch": 0.48633053990932823, "grad_norm": 0.41825172305107117, "learning_rate": 1.0429136739526423e-05, "loss": 0.319, "step": 26196 }, { "epoch": 0.48636767004674686, "grad_norm": 0.3431108593940735, "learning_rate": 1.0427971320536325e-05, "loss": 0.4502, "step": 26198 }, { "epoch": 0.4864048001841655, "grad_norm": 0.34142208099365234, "learning_rate": 1.042680589572282e-05, "loss": 0.2738, "step": 26200 }, { "epoch": 0.4864419303215841, "grad_norm": 0.2936791777610779, "learning_rate": 1.0425640465101765e-05, "loss": 0.3192, "step": 26202 }, { "epoch": 0.48647906045900274, "grad_norm": 0.5433480143547058, "learning_rate": 1.0424475028689021e-05, "loss": 0.3617, "step": 26204 }, { "epoch": 0.48651619059642137, "grad_norm": 0.3263556957244873, "learning_rate": 1.042330958650044e-05, "loss": 0.2207, "step": 26206 }, { "epoch": 0.48655332073384006, "grad_norm": 0.44900795817375183, "learning_rate": 1.0422144138551885e-05, "loss": 0.2401, "step": 26208 }, { "epoch": 0.4865904508712587, "grad_norm": 0.3900314271450043, "learning_rate": 1.0420978684859212e-05, "loss": 0.3598, "step": 26210 }, { "epoch": 0.4866275810086773, "grad_norm": 0.336601197719574, "learning_rate": 1.041981322543828e-05, "loss": 0.4088, "step": 26212 }, { "epoch": 0.48666471114609594, "grad_norm": 0.2085847407579422, "learning_rate": 1.0418647760304948e-05, "loss": 0.2696, "step": 26214 }, { "epoch": 0.48670184128351457, "grad_norm": 0.23566988110542297, "learning_rate": 1.0417482289475074e-05, "loss": 0.278, "step": 26216 }, { "epoch": 0.48673897142093325, "grad_norm": 0.36089327931404114, "learning_rate": 1.0416316812964517e-05, "loss": 0.1912, "step": 26218 }, { "epoch": 0.4867761015583519, "grad_norm": 0.4002305865287781, "learning_rate": 1.0415151330789135e-05, "loss": 0.2394, "step": 26220 }, { "epoch": 0.4868132316957705, "grad_norm": 0.315372496843338, "learning_rate": 1.0413985842964785e-05, "loss": 0.2527, "step": 26222 }, { "epoch": 0.48685036183318914, "grad_norm": 0.2926592528820038, "learning_rate": 1.0412820349507332e-05, "loss": 0.4929, "step": 26224 }, { "epoch": 0.48688749197060777, "grad_norm": 0.4769361913204193, "learning_rate": 1.0411654850432627e-05, "loss": 0.3774, "step": 26226 }, { "epoch": 0.4869246221080264, "grad_norm": 0.46475571393966675, "learning_rate": 1.0410489345756533e-05, "loss": 0.2797, "step": 26228 }, { "epoch": 0.4869617522454451, "grad_norm": 1.1983230113983154, "learning_rate": 1.0409323835494912e-05, "loss": 0.336, "step": 26230 }, { "epoch": 0.4869988823828637, "grad_norm": 0.6116618514060974, "learning_rate": 1.0408158319663612e-05, "loss": 0.3149, "step": 26232 }, { "epoch": 0.48703601252028234, "grad_norm": 0.42179369926452637, "learning_rate": 1.0406992798278505e-05, "loss": 0.3866, "step": 26234 }, { "epoch": 0.48707314265770096, "grad_norm": 0.4507051706314087, "learning_rate": 1.0405827271355446e-05, "loss": 0.4276, "step": 26236 }, { "epoch": 0.4871102727951196, "grad_norm": 0.5983268022537231, "learning_rate": 1.0404661738910293e-05, "loss": 0.3079, "step": 26238 }, { "epoch": 0.4871474029325383, "grad_norm": 0.353651762008667, "learning_rate": 1.0403496200958904e-05, "loss": 0.2173, "step": 26240 }, { "epoch": 0.4871845330699569, "grad_norm": 0.5611802339553833, "learning_rate": 1.040233065751714e-05, "loss": 0.1258, "step": 26242 }, { "epoch": 0.48722166320737553, "grad_norm": 0.35338711738586426, "learning_rate": 1.0401165108600863e-05, "loss": 0.3382, "step": 26244 }, { "epoch": 0.48725879334479416, "grad_norm": 0.4124941825866699, "learning_rate": 1.0399999554225928e-05, "loss": 0.3234, "step": 26246 }, { "epoch": 0.4872959234822128, "grad_norm": 0.41682717204093933, "learning_rate": 1.03988339944082e-05, "loss": 0.468, "step": 26248 }, { "epoch": 0.4873330536196315, "grad_norm": 0.36976101994514465, "learning_rate": 1.0397668429163537e-05, "loss": 0.4514, "step": 26250 }, { "epoch": 0.4873701837570501, "grad_norm": 0.36929577589035034, "learning_rate": 1.0396502858507792e-05, "loss": 0.1731, "step": 26252 }, { "epoch": 0.48740731389446873, "grad_norm": 0.4354987144470215, "learning_rate": 1.0395337282456834e-05, "loss": 0.2372, "step": 26254 }, { "epoch": 0.48744444403188736, "grad_norm": 0.47942647337913513, "learning_rate": 1.0394171701026522e-05, "loss": 0.3352, "step": 26256 }, { "epoch": 0.487481574169306, "grad_norm": 0.3604249954223633, "learning_rate": 1.0393006114232712e-05, "loss": 0.2125, "step": 26258 }, { "epoch": 0.4875187043067246, "grad_norm": 0.2972436547279358, "learning_rate": 1.0391840522091265e-05, "loss": 0.3072, "step": 26260 }, { "epoch": 0.4875558344441433, "grad_norm": 0.21511903405189514, "learning_rate": 1.0390674924618046e-05, "loss": 0.2936, "step": 26262 }, { "epoch": 0.4875929645815619, "grad_norm": 0.33157965540885925, "learning_rate": 1.0389509321828905e-05, "loss": 0.1684, "step": 26264 }, { "epoch": 0.48763009471898056, "grad_norm": 0.6565263867378235, "learning_rate": 1.0388343713739712e-05, "loss": 0.2897, "step": 26266 }, { "epoch": 0.4876672248563992, "grad_norm": 0.3367452919483185, "learning_rate": 1.0387178100366326e-05, "loss": 0.1929, "step": 26268 }, { "epoch": 0.4877043549938178, "grad_norm": 0.3277583420276642, "learning_rate": 1.0386012481724606e-05, "loss": 0.1916, "step": 26270 }, { "epoch": 0.4877414851312365, "grad_norm": 0.37614673376083374, "learning_rate": 1.038484685783041e-05, "loss": 0.2698, "step": 26272 }, { "epoch": 0.4877786152686551, "grad_norm": 0.5407077074050903, "learning_rate": 1.0383681228699602e-05, "loss": 0.2743, "step": 26274 }, { "epoch": 0.48781574540607375, "grad_norm": 0.4296466112136841, "learning_rate": 1.0382515594348042e-05, "loss": 0.206, "step": 26276 }, { "epoch": 0.4878528755434924, "grad_norm": 0.31616124510765076, "learning_rate": 1.0381349954791588e-05, "loss": 0.3692, "step": 26278 }, { "epoch": 0.487890005680911, "grad_norm": 0.4597640633583069, "learning_rate": 1.0380184310046106e-05, "loss": 0.2951, "step": 26280 }, { "epoch": 0.48792713581832964, "grad_norm": 0.3943755626678467, "learning_rate": 1.0379018660127456e-05, "loss": 0.2196, "step": 26282 }, { "epoch": 0.4879642659557483, "grad_norm": 0.30830249190330505, "learning_rate": 1.0377853005051495e-05, "loss": 0.3263, "step": 26284 }, { "epoch": 0.48800139609316695, "grad_norm": 0.2921684682369232, "learning_rate": 1.0376687344834087e-05, "loss": 0.2259, "step": 26286 }, { "epoch": 0.4880385262305856, "grad_norm": 0.27995535731315613, "learning_rate": 1.0375521679491089e-05, "loss": 0.287, "step": 26288 }, { "epoch": 0.4880756563680042, "grad_norm": 0.3882593512535095, "learning_rate": 1.0374356009038368e-05, "loss": 0.3016, "step": 26290 }, { "epoch": 0.48811278650542284, "grad_norm": 0.32382476329803467, "learning_rate": 1.0373190333491788e-05, "loss": 0.2859, "step": 26292 }, { "epoch": 0.4881499166428415, "grad_norm": 0.4555091857910156, "learning_rate": 1.03720246528672e-05, "loss": 0.3855, "step": 26294 }, { "epoch": 0.48818704678026015, "grad_norm": 0.23767714202404022, "learning_rate": 1.0370858967180472e-05, "loss": 0.081, "step": 26296 }, { "epoch": 0.4882241769176788, "grad_norm": 0.3204522430896759, "learning_rate": 1.0369693276447464e-05, "loss": 0.22, "step": 26298 }, { "epoch": 0.4882613070550974, "grad_norm": 0.3012599050998688, "learning_rate": 1.0368527580684038e-05, "loss": 0.3562, "step": 26300 }, { "epoch": 0.48829843719251603, "grad_norm": 0.29556670784950256, "learning_rate": 1.0367361879906057e-05, "loss": 0.4121, "step": 26302 }, { "epoch": 0.48833556732993466, "grad_norm": 0.26569175720214844, "learning_rate": 1.0366196174129379e-05, "loss": 0.2434, "step": 26304 }, { "epoch": 0.48837269746735334, "grad_norm": 0.4446493089199066, "learning_rate": 1.0365030463369869e-05, "loss": 0.4108, "step": 26306 }, { "epoch": 0.488409827604772, "grad_norm": 0.42011359333992004, "learning_rate": 1.0363864747643388e-05, "loss": 0.4548, "step": 26308 }, { "epoch": 0.4884469577421906, "grad_norm": 0.28344857692718506, "learning_rate": 1.0362699026965798e-05, "loss": 0.2177, "step": 26310 }, { "epoch": 0.48848408787960923, "grad_norm": 0.26737070083618164, "learning_rate": 1.036153330135296e-05, "loss": 0.2392, "step": 26312 }, { "epoch": 0.48852121801702786, "grad_norm": 0.4713113009929657, "learning_rate": 1.0360367570820737e-05, "loss": 0.2023, "step": 26314 }, { "epoch": 0.48855834815444654, "grad_norm": 0.42013394832611084, "learning_rate": 1.0359201835384989e-05, "loss": 0.2361, "step": 26316 }, { "epoch": 0.48859547829186517, "grad_norm": 0.302665650844574, "learning_rate": 1.0358036095061583e-05, "loss": 0.2088, "step": 26318 }, { "epoch": 0.4886326084292838, "grad_norm": 0.5672982335090637, "learning_rate": 1.0356870349866376e-05, "loss": 0.4448, "step": 26320 }, { "epoch": 0.4886697385667024, "grad_norm": 0.474040150642395, "learning_rate": 1.0355704599815235e-05, "loss": 0.3476, "step": 26322 }, { "epoch": 0.48870686870412106, "grad_norm": 0.6736011505126953, "learning_rate": 1.0354538844924018e-05, "loss": 0.331, "step": 26324 }, { "epoch": 0.48874399884153974, "grad_norm": 0.28990036249160767, "learning_rate": 1.0353373085208588e-05, "loss": 0.4096, "step": 26326 }, { "epoch": 0.48878112897895837, "grad_norm": 0.33474770188331604, "learning_rate": 1.035220732068481e-05, "loss": 0.2461, "step": 26328 }, { "epoch": 0.488818259116377, "grad_norm": 0.2854396402835846, "learning_rate": 1.0351041551368545e-05, "loss": 0.2418, "step": 26330 }, { "epoch": 0.4888553892537956, "grad_norm": 0.24770832061767578, "learning_rate": 1.0349875777275658e-05, "loss": 0.2065, "step": 26332 }, { "epoch": 0.48889251939121425, "grad_norm": 0.3799992799758911, "learning_rate": 1.0348709998422009e-05, "loss": 0.3352, "step": 26334 }, { "epoch": 0.4889296495286329, "grad_norm": 0.4896249771118164, "learning_rate": 1.0347544214823457e-05, "loss": 0.2304, "step": 26336 }, { "epoch": 0.48896677966605157, "grad_norm": 0.5061522722244263, "learning_rate": 1.0346378426495876e-05, "loss": 0.2901, "step": 26338 }, { "epoch": 0.4890039098034702, "grad_norm": 0.3306157886981964, "learning_rate": 1.0345212633455118e-05, "loss": 0.1861, "step": 26340 }, { "epoch": 0.4890410399408888, "grad_norm": 0.45157331228256226, "learning_rate": 1.034404683571705e-05, "loss": 0.3889, "step": 26342 }, { "epoch": 0.48907817007830745, "grad_norm": 0.27555394172668457, "learning_rate": 1.0342881033297536e-05, "loss": 0.3142, "step": 26344 }, { "epoch": 0.4891153002157261, "grad_norm": 0.5186547636985779, "learning_rate": 1.0341715226212438e-05, "loss": 0.263, "step": 26346 }, { "epoch": 0.48915243035314476, "grad_norm": 0.3780006766319275, "learning_rate": 1.034054941447762e-05, "loss": 0.4711, "step": 26348 }, { "epoch": 0.4891895604905634, "grad_norm": 0.4200582802295685, "learning_rate": 1.0339383598108946e-05, "loss": 0.1823, "step": 26350 }, { "epoch": 0.489226690627982, "grad_norm": 0.2983166575431824, "learning_rate": 1.0338217777122275e-05, "loss": 0.1176, "step": 26352 }, { "epoch": 0.48926382076540065, "grad_norm": 0.367590993642807, "learning_rate": 1.0337051951533474e-05, "loss": 0.2617, "step": 26354 }, { "epoch": 0.4893009509028193, "grad_norm": 0.23578345775604248, "learning_rate": 1.0335886121358407e-05, "loss": 0.1151, "step": 26356 }, { "epoch": 0.4893380810402379, "grad_norm": 0.34940141439437866, "learning_rate": 1.0334720286612933e-05, "loss": 0.3644, "step": 26358 }, { "epoch": 0.4893752111776566, "grad_norm": 0.4192293584346771, "learning_rate": 1.0333554447312919e-05, "loss": 0.2118, "step": 26360 }, { "epoch": 0.4894123413150752, "grad_norm": 0.438501238822937, "learning_rate": 1.033238860347423e-05, "loss": 0.2045, "step": 26362 }, { "epoch": 0.48944947145249385, "grad_norm": 0.22159920632839203, "learning_rate": 1.0331222755112726e-05, "loss": 0.2492, "step": 26364 }, { "epoch": 0.4894866015899125, "grad_norm": 0.36134669184684753, "learning_rate": 1.0330056902244273e-05, "loss": 0.2277, "step": 26366 }, { "epoch": 0.4895237317273311, "grad_norm": 0.2913864254951477, "learning_rate": 1.0328891044884737e-05, "loss": 0.349, "step": 26368 }, { "epoch": 0.4895608618647498, "grad_norm": 0.3037043809890747, "learning_rate": 1.0327725183049977e-05, "loss": 0.2406, "step": 26370 }, { "epoch": 0.4895979920021684, "grad_norm": 0.3260821998119354, "learning_rate": 1.0326559316755855e-05, "loss": 0.3151, "step": 26372 }, { "epoch": 0.48963512213958704, "grad_norm": 0.30364322662353516, "learning_rate": 1.0325393446018243e-05, "loss": 0.3556, "step": 26374 }, { "epoch": 0.48967225227700567, "grad_norm": 0.2487402707338333, "learning_rate": 1.0324227570853002e-05, "loss": 0.353, "step": 26376 }, { "epoch": 0.4897093824144243, "grad_norm": 0.5113440155982971, "learning_rate": 1.0323061691275993e-05, "loss": 0.205, "step": 26378 }, { "epoch": 0.4897465125518429, "grad_norm": 0.4941776692867279, "learning_rate": 1.0321895807303082e-05, "loss": 0.3058, "step": 26380 }, { "epoch": 0.4897836426892616, "grad_norm": 0.3544394373893738, "learning_rate": 1.0320729918950134e-05, "loss": 0.1782, "step": 26382 }, { "epoch": 0.48982077282668024, "grad_norm": 0.46059635281562805, "learning_rate": 1.031956402623301e-05, "loss": 0.2408, "step": 26384 }, { "epoch": 0.48985790296409887, "grad_norm": 0.4019808769226074, "learning_rate": 1.031839812916758e-05, "loss": 0.2676, "step": 26386 }, { "epoch": 0.4898950331015175, "grad_norm": 0.41258150339126587, "learning_rate": 1.0317232227769705e-05, "loss": 0.1422, "step": 26388 }, { "epoch": 0.4899321632389361, "grad_norm": 0.39506393671035767, "learning_rate": 1.0316066322055247e-05, "loss": 0.2838, "step": 26390 }, { "epoch": 0.4899692933763548, "grad_norm": 0.4231836497783661, "learning_rate": 1.0314900412040073e-05, "loss": 0.2662, "step": 26392 }, { "epoch": 0.49000642351377344, "grad_norm": 0.5631445050239563, "learning_rate": 1.031373449774005e-05, "loss": 0.3403, "step": 26394 }, { "epoch": 0.49004355365119207, "grad_norm": 0.33038315176963806, "learning_rate": 1.031256857917104e-05, "loss": 0.2205, "step": 26396 }, { "epoch": 0.4900806837886107, "grad_norm": 0.3521159887313843, "learning_rate": 1.0311402656348909e-05, "loss": 0.068, "step": 26398 }, { "epoch": 0.4901178139260293, "grad_norm": 0.3315359652042389, "learning_rate": 1.031023672928952e-05, "loss": 0.1829, "step": 26400 }, { "epoch": 0.490154944063448, "grad_norm": 0.31536856293678284, "learning_rate": 1.0309070798008736e-05, "loss": 0.2038, "step": 26402 }, { "epoch": 0.49019207420086663, "grad_norm": 0.3454214632511139, "learning_rate": 1.0307904862522425e-05, "loss": 0.2291, "step": 26404 }, { "epoch": 0.49022920433828526, "grad_norm": 0.32094013690948486, "learning_rate": 1.0306738922846453e-05, "loss": 0.2592, "step": 26406 }, { "epoch": 0.4902663344757039, "grad_norm": 0.4113997220993042, "learning_rate": 1.0305572978996683e-05, "loss": 0.2533, "step": 26408 }, { "epoch": 0.4903034646131225, "grad_norm": 0.6455010771751404, "learning_rate": 1.0304407030988976e-05, "loss": 0.2174, "step": 26410 }, { "epoch": 0.49034059475054115, "grad_norm": 0.4021225869655609, "learning_rate": 1.0303241078839206e-05, "loss": 0.2666, "step": 26412 }, { "epoch": 0.49037772488795983, "grad_norm": 0.31682640314102173, "learning_rate": 1.0302075122563229e-05, "loss": 0.0896, "step": 26414 }, { "epoch": 0.49041485502537846, "grad_norm": 0.4463070034980774, "learning_rate": 1.0300909162176915e-05, "loss": 0.3754, "step": 26416 }, { "epoch": 0.4904519851627971, "grad_norm": 0.3955119550228119, "learning_rate": 1.0299743197696133e-05, "loss": 0.2614, "step": 26418 }, { "epoch": 0.4904891153002157, "grad_norm": 0.47725868225097656, "learning_rate": 1.0298577229136741e-05, "loss": 0.2706, "step": 26420 }, { "epoch": 0.49052624543763435, "grad_norm": 0.3843446969985962, "learning_rate": 1.0297411256514606e-05, "loss": 0.0535, "step": 26422 }, { "epoch": 0.49056337557505303, "grad_norm": 0.6384403109550476, "learning_rate": 1.0296245279845596e-05, "loss": 0.4368, "step": 26424 }, { "epoch": 0.49060050571247166, "grad_norm": 0.40317273139953613, "learning_rate": 1.0295079299145576e-05, "loss": 0.1785, "step": 26426 }, { "epoch": 0.4906376358498903, "grad_norm": 0.40376192331314087, "learning_rate": 1.0293913314430408e-05, "loss": 0.2981, "step": 26428 }, { "epoch": 0.4906747659873089, "grad_norm": 0.292687326669693, "learning_rate": 1.0292747325715963e-05, "loss": 0.2739, "step": 26430 }, { "epoch": 0.49071189612472754, "grad_norm": 0.47509098052978516, "learning_rate": 1.0291581333018104e-05, "loss": 0.2739, "step": 26432 }, { "epoch": 0.49074902626214617, "grad_norm": 0.32268375158309937, "learning_rate": 1.0290415336352692e-05, "loss": 0.3287, "step": 26434 }, { "epoch": 0.49078615639956485, "grad_norm": 0.3302516043186188, "learning_rate": 1.02892493357356e-05, "loss": 0.2066, "step": 26436 }, { "epoch": 0.4908232865369835, "grad_norm": 0.3668462038040161, "learning_rate": 1.0288083331182692e-05, "loss": 0.1934, "step": 26438 }, { "epoch": 0.4908604166744021, "grad_norm": 0.3946560323238373, "learning_rate": 1.0286917322709835e-05, "loss": 0.268, "step": 26440 }, { "epoch": 0.49089754681182074, "grad_norm": 0.3784308433532715, "learning_rate": 1.0285751310332887e-05, "loss": 0.433, "step": 26442 }, { "epoch": 0.49093467694923937, "grad_norm": 0.4192458689212799, "learning_rate": 1.0284585294067724e-05, "loss": 0.349, "step": 26444 }, { "epoch": 0.49097180708665805, "grad_norm": 0.24136297404766083, "learning_rate": 1.0283419273930204e-05, "loss": 0.1592, "step": 26446 }, { "epoch": 0.4910089372240767, "grad_norm": 0.31866827607154846, "learning_rate": 1.0282253249936198e-05, "loss": 0.3945, "step": 26448 }, { "epoch": 0.4910460673614953, "grad_norm": 0.48707345128059387, "learning_rate": 1.0281087222101571e-05, "loss": 0.3245, "step": 26450 }, { "epoch": 0.49108319749891394, "grad_norm": 0.29982709884643555, "learning_rate": 1.027992119044219e-05, "loss": 0.283, "step": 26452 }, { "epoch": 0.49112032763633257, "grad_norm": 1.4749685525894165, "learning_rate": 1.0278755154973918e-05, "loss": 0.3387, "step": 26454 }, { "epoch": 0.4911574577737512, "grad_norm": 0.24318383634090424, "learning_rate": 1.0277589115712625e-05, "loss": 0.1674, "step": 26456 }, { "epoch": 0.4911945879111699, "grad_norm": 0.26913678646087646, "learning_rate": 1.0276423072674175e-05, "loss": 0.2121, "step": 26458 }, { "epoch": 0.4912317180485885, "grad_norm": 0.46446430683135986, "learning_rate": 1.0275257025874436e-05, "loss": 0.375, "step": 26460 }, { "epoch": 0.49126884818600713, "grad_norm": 0.3124270737171173, "learning_rate": 1.0274090975329272e-05, "loss": 0.4954, "step": 26462 }, { "epoch": 0.49130597832342576, "grad_norm": 0.4616926908493042, "learning_rate": 1.0272924921054552e-05, "loss": 0.3711, "step": 26464 }, { "epoch": 0.4913431084608444, "grad_norm": 0.2119692862033844, "learning_rate": 1.027175886306614e-05, "loss": 0.2615, "step": 26466 }, { "epoch": 0.4913802385982631, "grad_norm": 0.4567694664001465, "learning_rate": 1.0270592801379902e-05, "loss": 0.158, "step": 26468 }, { "epoch": 0.4914173687356817, "grad_norm": 0.44871777296066284, "learning_rate": 1.026942673601171e-05, "loss": 0.1461, "step": 26470 }, { "epoch": 0.49145449887310033, "grad_norm": 0.24175108969211578, "learning_rate": 1.0268260666977428e-05, "loss": 0.2603, "step": 26472 }, { "epoch": 0.49149162901051896, "grad_norm": 0.4066236615180969, "learning_rate": 1.0267094594292918e-05, "loss": 0.3231, "step": 26474 }, { "epoch": 0.4915287591479376, "grad_norm": 0.5919638276100159, "learning_rate": 1.0265928517974053e-05, "loss": 0.3488, "step": 26476 }, { "epoch": 0.4915658892853563, "grad_norm": 0.4045667350292206, "learning_rate": 1.0264762438036695e-05, "loss": 0.4124, "step": 26478 }, { "epoch": 0.4916030194227749, "grad_norm": 0.3669506311416626, "learning_rate": 1.0263596354496714e-05, "loss": 0.2583, "step": 26480 }, { "epoch": 0.49164014956019353, "grad_norm": 0.4036490023136139, "learning_rate": 1.0262430267369979e-05, "loss": 0.4119, "step": 26482 }, { "epoch": 0.49167727969761216, "grad_norm": 0.838638186454773, "learning_rate": 1.0261264176672354e-05, "loss": 0.4668, "step": 26484 }, { "epoch": 0.4917144098350308, "grad_norm": 0.3675265908241272, "learning_rate": 1.0260098082419702e-05, "loss": 0.2731, "step": 26486 }, { "epoch": 0.4917515399724494, "grad_norm": 0.38461047410964966, "learning_rate": 1.0258931984627897e-05, "loss": 0.0963, "step": 26488 }, { "epoch": 0.4917886701098681, "grad_norm": 0.40181416273117065, "learning_rate": 1.0257765883312804e-05, "loss": 0.4068, "step": 26490 }, { "epoch": 0.4918258002472867, "grad_norm": 0.5698877573013306, "learning_rate": 1.0256599778490289e-05, "loss": 0.2949, "step": 26492 }, { "epoch": 0.49186293038470535, "grad_norm": 0.3826169967651367, "learning_rate": 1.025543367017622e-05, "loss": 0.2638, "step": 26494 }, { "epoch": 0.491900060522124, "grad_norm": 0.2359253615140915, "learning_rate": 1.0254267558386461e-05, "loss": 0.2977, "step": 26496 }, { "epoch": 0.4919371906595426, "grad_norm": 0.3967624604701996, "learning_rate": 1.0253101443136883e-05, "loss": 0.2672, "step": 26498 }, { "epoch": 0.4919743207969613, "grad_norm": 0.33991748094558716, "learning_rate": 1.0251935324443355e-05, "loss": 0.3036, "step": 26500 }, { "epoch": 0.4920114509343799, "grad_norm": 0.3304373323917389, "learning_rate": 1.025076920232174e-05, "loss": 0.3123, "step": 26502 }, { "epoch": 0.49204858107179855, "grad_norm": 0.28889793157577515, "learning_rate": 1.0249603076787908e-05, "loss": 0.3092, "step": 26504 }, { "epoch": 0.4920857112092172, "grad_norm": 0.549604058265686, "learning_rate": 1.0248436947857725e-05, "loss": 0.2865, "step": 26506 }, { "epoch": 0.4921228413466358, "grad_norm": 0.3449608087539673, "learning_rate": 1.0247270815547061e-05, "loss": 0.1442, "step": 26508 }, { "epoch": 0.49215997148405444, "grad_norm": 0.30500131845474243, "learning_rate": 1.024610467987178e-05, "loss": 0.1518, "step": 26510 }, { "epoch": 0.4921971016214731, "grad_norm": 0.53940349817276, "learning_rate": 1.0244938540847752e-05, "loss": 0.3516, "step": 26512 }, { "epoch": 0.49223423175889175, "grad_norm": 0.428055077791214, "learning_rate": 1.0243772398490845e-05, "loss": 0.2377, "step": 26514 }, { "epoch": 0.4922713618963104, "grad_norm": 0.4078656733036041, "learning_rate": 1.0242606252816925e-05, "loss": 0.3004, "step": 26516 }, { "epoch": 0.492308492033729, "grad_norm": 0.34508949518203735, "learning_rate": 1.024144010384186e-05, "loss": 0.2106, "step": 26518 }, { "epoch": 0.49234562217114763, "grad_norm": 0.4372147023677826, "learning_rate": 1.0240273951581521e-05, "loss": 0.2488, "step": 26520 }, { "epoch": 0.4923827523085663, "grad_norm": 0.36753395199775696, "learning_rate": 1.0239107796051768e-05, "loss": 0.2514, "step": 26522 }, { "epoch": 0.49241988244598495, "grad_norm": 0.3852521777153015, "learning_rate": 1.023794163726848e-05, "loss": 0.2577, "step": 26524 }, { "epoch": 0.4924570125834036, "grad_norm": 0.5499061942100525, "learning_rate": 1.0236775475247518e-05, "loss": 0.4012, "step": 26526 }, { "epoch": 0.4924941427208222, "grad_norm": 0.37243399024009705, "learning_rate": 1.0235609310004748e-05, "loss": 0.268, "step": 26528 }, { "epoch": 0.49253127285824083, "grad_norm": 0.24391011893749237, "learning_rate": 1.023444314155604e-05, "loss": 0.2759, "step": 26530 }, { "epoch": 0.49256840299565946, "grad_norm": 0.34231558442115784, "learning_rate": 1.0233276969917267e-05, "loss": 0.4431, "step": 26532 }, { "epoch": 0.49260553313307814, "grad_norm": 0.4161551296710968, "learning_rate": 1.023211079510429e-05, "loss": 0.1988, "step": 26534 }, { "epoch": 0.4926426632704968, "grad_norm": 0.33417993783950806, "learning_rate": 1.0230944617132985e-05, "loss": 0.2987, "step": 26536 }, { "epoch": 0.4926797934079154, "grad_norm": 0.44479745626449585, "learning_rate": 1.0229778436019213e-05, "loss": 0.3708, "step": 26538 }, { "epoch": 0.49271692354533403, "grad_norm": 0.38209518790245056, "learning_rate": 1.0228612251778843e-05, "loss": 0.3813, "step": 26540 }, { "epoch": 0.49275405368275266, "grad_norm": 0.4661446213722229, "learning_rate": 1.0227446064427746e-05, "loss": 0.5228, "step": 26542 }, { "epoch": 0.49279118382017134, "grad_norm": 0.4897814393043518, "learning_rate": 1.022627987398179e-05, "loss": 0.4422, "step": 26544 }, { "epoch": 0.49282831395758997, "grad_norm": 0.2601756155490875, "learning_rate": 1.0225113680456844e-05, "loss": 0.2809, "step": 26546 }, { "epoch": 0.4928654440950086, "grad_norm": 0.3746464252471924, "learning_rate": 1.0223947483868773e-05, "loss": 0.414, "step": 26548 }, { "epoch": 0.4929025742324272, "grad_norm": 0.4170742332935333, "learning_rate": 1.0222781284233445e-05, "loss": 0.2898, "step": 26550 }, { "epoch": 0.49293970436984585, "grad_norm": 0.34457671642303467, "learning_rate": 1.0221615081566737e-05, "loss": 0.3377, "step": 26552 }, { "epoch": 0.49297683450726454, "grad_norm": 0.41983088850975037, "learning_rate": 1.0220448875884508e-05, "loss": 0.3021, "step": 26554 }, { "epoch": 0.49301396464468317, "grad_norm": 0.3755203187465668, "learning_rate": 1.021928266720263e-05, "loss": 0.3243, "step": 26556 }, { "epoch": 0.4930510947821018, "grad_norm": 0.38865768909454346, "learning_rate": 1.0218116455536975e-05, "loss": 0.26, "step": 26558 }, { "epoch": 0.4930882249195204, "grad_norm": 0.3116987943649292, "learning_rate": 1.0216950240903404e-05, "loss": 0.1243, "step": 26560 }, { "epoch": 0.49312535505693905, "grad_norm": 0.23697133362293243, "learning_rate": 1.021578402331779e-05, "loss": 0.2304, "step": 26562 }, { "epoch": 0.4931624851943577, "grad_norm": 0.46624237298965454, "learning_rate": 1.0214617802796005e-05, "loss": 0.4044, "step": 26564 }, { "epoch": 0.49319961533177636, "grad_norm": 0.3294352889060974, "learning_rate": 1.0213451579353913e-05, "loss": 0.1986, "step": 26566 }, { "epoch": 0.493236745469195, "grad_norm": 0.37446993589401245, "learning_rate": 1.0212285353007385e-05, "loss": 0.0786, "step": 26568 }, { "epoch": 0.4932738756066136, "grad_norm": 0.2895089089870453, "learning_rate": 1.0211119123772291e-05, "loss": 0.1566, "step": 26570 }, { "epoch": 0.49331100574403225, "grad_norm": 0.2512679398059845, "learning_rate": 1.0209952891664495e-05, "loss": 0.3362, "step": 26572 }, { "epoch": 0.4933481358814509, "grad_norm": 0.37703052163124084, "learning_rate": 1.0208786656699868e-05, "loss": 0.2679, "step": 26574 }, { "epoch": 0.49338526601886956, "grad_norm": 0.49722206592559814, "learning_rate": 1.0207620418894284e-05, "loss": 0.3751, "step": 26576 }, { "epoch": 0.4934223961562882, "grad_norm": 0.44880250096321106, "learning_rate": 1.0206454178263605e-05, "loss": 0.1814, "step": 26578 }, { "epoch": 0.4934595262937068, "grad_norm": 0.3312230408191681, "learning_rate": 1.0205287934823704e-05, "loss": 0.2786, "step": 26580 }, { "epoch": 0.49349665643112545, "grad_norm": 0.42703157663345337, "learning_rate": 1.0204121688590447e-05, "loss": 0.3236, "step": 26582 }, { "epoch": 0.4935337865685441, "grad_norm": 0.6197587251663208, "learning_rate": 1.0202955439579709e-05, "loss": 0.4815, "step": 26584 }, { "epoch": 0.4935709167059627, "grad_norm": 0.37159937620162964, "learning_rate": 1.0201789187807353e-05, "loss": 0.2774, "step": 26586 }, { "epoch": 0.4936080468433814, "grad_norm": 0.40503570437431335, "learning_rate": 1.0200622933289252e-05, "loss": 0.2478, "step": 26588 }, { "epoch": 0.4936451769808, "grad_norm": 0.4374489486217499, "learning_rate": 1.0199456676041275e-05, "loss": 0.3199, "step": 26590 }, { "epoch": 0.49368230711821864, "grad_norm": 0.3472742438316345, "learning_rate": 1.0198290416079287e-05, "loss": 0.2849, "step": 26592 }, { "epoch": 0.4937194372556373, "grad_norm": 0.4937828779220581, "learning_rate": 1.019712415341916e-05, "loss": 0.3462, "step": 26594 }, { "epoch": 0.4937565673930559, "grad_norm": 0.5056163668632507, "learning_rate": 1.0195957888076767e-05, "loss": 0.2618, "step": 26596 }, { "epoch": 0.4937936975304746, "grad_norm": 0.34028157591819763, "learning_rate": 1.0194791620067973e-05, "loss": 0.3849, "step": 26598 }, { "epoch": 0.4938308276678932, "grad_norm": 0.43088021874427795, "learning_rate": 1.0193625349408646e-05, "loss": 0.1685, "step": 26600 }, { "epoch": 0.49386795780531184, "grad_norm": 0.4028365910053253, "learning_rate": 1.0192459076114662e-05, "loss": 0.3712, "step": 26602 }, { "epoch": 0.49390508794273047, "grad_norm": 0.5389242768287659, "learning_rate": 1.0191292800201885e-05, "loss": 0.2741, "step": 26604 }, { "epoch": 0.4939422180801491, "grad_norm": 0.39854696393013, "learning_rate": 1.0190126521686183e-05, "loss": 0.1872, "step": 26606 }, { "epoch": 0.4939793482175677, "grad_norm": 0.21230767667293549, "learning_rate": 1.0188960240583433e-05, "loss": 0.229, "step": 26608 }, { "epoch": 0.4940164783549864, "grad_norm": 0.41884365677833557, "learning_rate": 1.0187793956909497e-05, "loss": 0.3876, "step": 26610 }, { "epoch": 0.49405360849240504, "grad_norm": 0.4534231424331665, "learning_rate": 1.018662767068025e-05, "loss": 0.3011, "step": 26612 }, { "epoch": 0.49409073862982367, "grad_norm": 0.5291032791137695, "learning_rate": 1.0185461381911556e-05, "loss": 0.2397, "step": 26614 }, { "epoch": 0.4941278687672423, "grad_norm": 0.4040442407131195, "learning_rate": 1.0184295090619291e-05, "loss": 0.265, "step": 26616 }, { "epoch": 0.4941649989046609, "grad_norm": 0.28613871335983276, "learning_rate": 1.0183128796819319e-05, "loss": 0.344, "step": 26618 }, { "epoch": 0.4942021290420796, "grad_norm": 0.8896944522857666, "learning_rate": 1.0181962500527516e-05, "loss": 0.2231, "step": 26620 }, { "epoch": 0.49423925917949824, "grad_norm": 0.43337246775627136, "learning_rate": 1.0180796201759748e-05, "loss": 0.2743, "step": 26622 }, { "epoch": 0.49427638931691686, "grad_norm": 0.4792998731136322, "learning_rate": 1.0179629900531885e-05, "loss": 0.3272, "step": 26624 }, { "epoch": 0.4943135194543355, "grad_norm": 0.4641440808773041, "learning_rate": 1.0178463596859794e-05, "loss": 0.2588, "step": 26626 }, { "epoch": 0.4943506495917541, "grad_norm": 0.5454699397087097, "learning_rate": 1.0177297290759352e-05, "loss": 0.3448, "step": 26628 }, { "epoch": 0.4943877797291728, "grad_norm": 0.48575395345687866, "learning_rate": 1.0176130982246424e-05, "loss": 0.3347, "step": 26630 }, { "epoch": 0.49442490986659143, "grad_norm": 0.16514244675636292, "learning_rate": 1.017496467133688e-05, "loss": 0.0776, "step": 26632 }, { "epoch": 0.49446204000401006, "grad_norm": 0.31314805150032043, "learning_rate": 1.0173798358046592e-05, "loss": 0.3376, "step": 26634 }, { "epoch": 0.4944991701414287, "grad_norm": 0.40999871492385864, "learning_rate": 1.0172632042391428e-05, "loss": 0.2233, "step": 26636 }, { "epoch": 0.4945363002788473, "grad_norm": 0.5122815370559692, "learning_rate": 1.0171465724387257e-05, "loss": 0.3336, "step": 26638 }, { "epoch": 0.49457343041626595, "grad_norm": 0.44166526198387146, "learning_rate": 1.0170299404049954e-05, "loss": 0.1839, "step": 26640 }, { "epoch": 0.49461056055368463, "grad_norm": 0.3624061644077301, "learning_rate": 1.0169133081395388e-05, "loss": 0.3919, "step": 26642 }, { "epoch": 0.49464769069110326, "grad_norm": 0.514481782913208, "learning_rate": 1.0167966756439423e-05, "loss": 0.294, "step": 26644 }, { "epoch": 0.4946848208285219, "grad_norm": 0.48552820086479187, "learning_rate": 1.0166800429197936e-05, "loss": 0.2474, "step": 26646 }, { "epoch": 0.4947219509659405, "grad_norm": 0.5801034569740295, "learning_rate": 1.0165634099686794e-05, "loss": 0.2883, "step": 26648 }, { "epoch": 0.49475908110335914, "grad_norm": 0.28711357712745667, "learning_rate": 1.0164467767921866e-05, "loss": 0.2679, "step": 26650 }, { "epoch": 0.49479621124077783, "grad_norm": 0.6777228713035583, "learning_rate": 1.016330143391903e-05, "loss": 0.3596, "step": 26652 }, { "epoch": 0.49483334137819646, "grad_norm": 0.4579601287841797, "learning_rate": 1.0162135097694145e-05, "loss": 0.4341, "step": 26654 }, { "epoch": 0.4948704715156151, "grad_norm": 0.3473369777202606, "learning_rate": 1.016096875926309e-05, "loss": 0.2744, "step": 26656 }, { "epoch": 0.4949076016530337, "grad_norm": 0.58405601978302, "learning_rate": 1.0159802418641734e-05, "loss": 0.2282, "step": 26658 }, { "epoch": 0.49494473179045234, "grad_norm": 0.3529479503631592, "learning_rate": 1.015863607584594e-05, "loss": 0.1739, "step": 26660 }, { "epoch": 0.49498186192787097, "grad_norm": 0.2871580123901367, "learning_rate": 1.015746973089159e-05, "loss": 0.2588, "step": 26662 }, { "epoch": 0.49501899206528965, "grad_norm": 0.30716392397880554, "learning_rate": 1.0156303383794543e-05, "loss": 0.3405, "step": 26664 }, { "epoch": 0.4950561222027083, "grad_norm": 0.2876185178756714, "learning_rate": 1.015513703457068e-05, "loss": 0.3794, "step": 26666 }, { "epoch": 0.4950932523401269, "grad_norm": 0.3798554241657257, "learning_rate": 1.0153970683235863e-05, "loss": 0.3221, "step": 26668 }, { "epoch": 0.49513038247754554, "grad_norm": 0.47295188903808594, "learning_rate": 1.0152804329805967e-05, "loss": 0.151, "step": 26670 }, { "epoch": 0.49516751261496417, "grad_norm": 0.26839762926101685, "learning_rate": 1.0151637974296863e-05, "loss": 0.3964, "step": 26672 }, { "epoch": 0.49520464275238285, "grad_norm": 0.2356579750776291, "learning_rate": 1.0150471616724424e-05, "loss": 0.2817, "step": 26674 }, { "epoch": 0.4952417728898015, "grad_norm": 0.5730855464935303, "learning_rate": 1.0149305257104511e-05, "loss": 0.3657, "step": 26676 }, { "epoch": 0.4952789030272201, "grad_norm": 0.2966228425502777, "learning_rate": 1.0148138895453004e-05, "loss": 0.3649, "step": 26678 }, { "epoch": 0.49531603316463874, "grad_norm": 0.4350131154060364, "learning_rate": 1.0146972531785768e-05, "loss": 0.3816, "step": 26680 }, { "epoch": 0.49535316330205736, "grad_norm": 0.4339401423931122, "learning_rate": 1.0145806166118677e-05, "loss": 0.3573, "step": 26682 }, { "epoch": 0.495390293439476, "grad_norm": 0.5247344970703125, "learning_rate": 1.0144639798467605e-05, "loss": 0.349, "step": 26684 }, { "epoch": 0.4954274235768947, "grad_norm": 0.41206443309783936, "learning_rate": 1.0143473428848414e-05, "loss": 0.4337, "step": 26686 }, { "epoch": 0.4954645537143133, "grad_norm": 0.45704370737075806, "learning_rate": 1.0142307057276978e-05, "loss": 0.5111, "step": 26688 }, { "epoch": 0.49550168385173193, "grad_norm": 0.299344927072525, "learning_rate": 1.0141140683769172e-05, "loss": 0.111, "step": 26690 }, { "epoch": 0.49553881398915056, "grad_norm": 0.2707463502883911, "learning_rate": 1.0139974308340864e-05, "loss": 0.1708, "step": 26692 }, { "epoch": 0.4955759441265692, "grad_norm": 0.3805144727230072, "learning_rate": 1.0138807931007923e-05, "loss": 0.0725, "step": 26694 }, { "epoch": 0.4956130742639879, "grad_norm": 0.35223814845085144, "learning_rate": 1.0137641551786225e-05, "loss": 0.4761, "step": 26696 }, { "epoch": 0.4956502044014065, "grad_norm": 0.34545236825942993, "learning_rate": 1.0136475170691635e-05, "loss": 0.3075, "step": 26698 }, { "epoch": 0.49568733453882513, "grad_norm": 0.3345290422439575, "learning_rate": 1.0135308787740026e-05, "loss": 0.3072, "step": 26700 }, { "epoch": 0.49572446467624376, "grad_norm": 0.30638587474823, "learning_rate": 1.0134142402947273e-05, "loss": 0.2923, "step": 26702 }, { "epoch": 0.4957615948136624, "grad_norm": 0.48701202869415283, "learning_rate": 1.013297601632924e-05, "loss": 0.3216, "step": 26704 }, { "epoch": 0.49579872495108107, "grad_norm": 0.570892333984375, "learning_rate": 1.0131809627901805e-05, "loss": 0.3229, "step": 26706 }, { "epoch": 0.4958358550884997, "grad_norm": 0.4411565065383911, "learning_rate": 1.0130643237680835e-05, "loss": 0.2481, "step": 26708 }, { "epoch": 0.49587298522591833, "grad_norm": 0.23127366602420807, "learning_rate": 1.0129476845682202e-05, "loss": 0.2037, "step": 26710 }, { "epoch": 0.49591011536333696, "grad_norm": 0.36747708916664124, "learning_rate": 1.0128310451921774e-05, "loss": 0.3093, "step": 26712 }, { "epoch": 0.4959472455007556, "grad_norm": 0.4010242521762848, "learning_rate": 1.012714405641543e-05, "loss": 0.1851, "step": 26714 }, { "epoch": 0.4959843756381742, "grad_norm": 0.4909267723560333, "learning_rate": 1.0125977659179034e-05, "loss": 0.3345, "step": 26716 }, { "epoch": 0.4960215057755929, "grad_norm": 0.3525868356227875, "learning_rate": 1.0124811260228457e-05, "loss": 0.4439, "step": 26718 }, { "epoch": 0.4960586359130115, "grad_norm": 0.5203554034233093, "learning_rate": 1.0123644859579575e-05, "loss": 0.5961, "step": 26720 }, { "epoch": 0.49609576605043015, "grad_norm": 0.3930593430995941, "learning_rate": 1.0122478457248258e-05, "loss": 0.3508, "step": 26722 }, { "epoch": 0.4961328961878488, "grad_norm": 0.45534875988960266, "learning_rate": 1.0121312053250371e-05, "loss": 0.1914, "step": 26724 }, { "epoch": 0.4961700263252674, "grad_norm": 0.5005047917366028, "learning_rate": 1.0120145647601796e-05, "loss": 0.2731, "step": 26726 }, { "epoch": 0.4962071564626861, "grad_norm": 0.3658311367034912, "learning_rate": 1.0118979240318399e-05, "loss": 0.4434, "step": 26728 }, { "epoch": 0.4962442866001047, "grad_norm": 0.4492465853691101, "learning_rate": 1.0117812831416048e-05, "loss": 0.2492, "step": 26730 }, { "epoch": 0.49628141673752335, "grad_norm": 0.41235604882240295, "learning_rate": 1.0116646420910614e-05, "loss": 0.3944, "step": 26732 }, { "epoch": 0.496318546874942, "grad_norm": 0.4014976918697357, "learning_rate": 1.0115480008817977e-05, "loss": 0.2699, "step": 26734 }, { "epoch": 0.4963556770123606, "grad_norm": 0.5120599865913391, "learning_rate": 1.0114313595154002e-05, "loss": 0.2735, "step": 26736 }, { "epoch": 0.49639280714977924, "grad_norm": 0.29358986020088196, "learning_rate": 1.0113147179934562e-05, "loss": 0.2843, "step": 26738 }, { "epoch": 0.4964299372871979, "grad_norm": 0.5018960237503052, "learning_rate": 1.0111980763175526e-05, "loss": 0.1552, "step": 26740 }, { "epoch": 0.49646706742461655, "grad_norm": 0.2884894609451294, "learning_rate": 1.0110814344892768e-05, "loss": 0.2355, "step": 26742 }, { "epoch": 0.4965041975620352, "grad_norm": 0.4196353852748871, "learning_rate": 1.0109647925102155e-05, "loss": 0.1787, "step": 26744 }, { "epoch": 0.4965413276994538, "grad_norm": 0.2575789988040924, "learning_rate": 1.0108481503819567e-05, "loss": 0.1943, "step": 26746 }, { "epoch": 0.49657845783687243, "grad_norm": 0.3614758849143982, "learning_rate": 1.0107315081060872e-05, "loss": 0.3847, "step": 26748 }, { "epoch": 0.4966155879742911, "grad_norm": 0.21284295618534088, "learning_rate": 1.0106148656841934e-05, "loss": 0.2375, "step": 26750 }, { "epoch": 0.49665271811170975, "grad_norm": 0.41940784454345703, "learning_rate": 1.0104982231178635e-05, "loss": 0.2821, "step": 26752 }, { "epoch": 0.4966898482491284, "grad_norm": 0.42432481050491333, "learning_rate": 1.0103815804086841e-05, "loss": 0.3316, "step": 26754 }, { "epoch": 0.496726978386547, "grad_norm": 0.29224902391433716, "learning_rate": 1.0102649375582425e-05, "loss": 0.1555, "step": 26756 }, { "epoch": 0.49676410852396563, "grad_norm": 0.32181861996650696, "learning_rate": 1.0101482945681261e-05, "loss": 0.3075, "step": 26758 }, { "epoch": 0.49680123866138426, "grad_norm": 0.5196285247802734, "learning_rate": 1.0100316514399215e-05, "loss": 0.1112, "step": 26760 }, { "epoch": 0.49683836879880294, "grad_norm": 0.3820239007472992, "learning_rate": 1.0099150081752162e-05, "loss": 0.3735, "step": 26762 }, { "epoch": 0.49687549893622157, "grad_norm": 0.3780510425567627, "learning_rate": 1.0097983647755974e-05, "loss": 0.1449, "step": 26764 }, { "epoch": 0.4969126290736402, "grad_norm": 0.4542478322982788, "learning_rate": 1.0096817212426522e-05, "loss": 0.1787, "step": 26766 }, { "epoch": 0.49694975921105883, "grad_norm": 0.30595123767852783, "learning_rate": 1.0095650775779679e-05, "loss": 0.316, "step": 26768 }, { "epoch": 0.49698688934847746, "grad_norm": 0.34468504786491394, "learning_rate": 1.0094484337831314e-05, "loss": 0.4013, "step": 26770 }, { "epoch": 0.49702401948589614, "grad_norm": 0.19810239970684052, "learning_rate": 1.0093317898597301e-05, "loss": 0.2337, "step": 26772 }, { "epoch": 0.49706114962331477, "grad_norm": 0.3810808062553406, "learning_rate": 1.009215145809351e-05, "loss": 0.2599, "step": 26774 }, { "epoch": 0.4970982797607334, "grad_norm": 0.3304886817932129, "learning_rate": 1.0090985016335812e-05, "loss": 0.3857, "step": 26776 }, { "epoch": 0.497135409898152, "grad_norm": 0.33980000019073486, "learning_rate": 1.0089818573340082e-05, "loss": 0.4227, "step": 26778 }, { "epoch": 0.49717254003557065, "grad_norm": 0.38496121764183044, "learning_rate": 1.0088652129122191e-05, "loss": 0.2173, "step": 26780 }, { "epoch": 0.49720967017298934, "grad_norm": 0.3895653784275055, "learning_rate": 1.008748568369801e-05, "loss": 0.5394, "step": 26782 }, { "epoch": 0.49724680031040797, "grad_norm": 0.3522690236568451, "learning_rate": 1.0086319237083413e-05, "loss": 0.1561, "step": 26784 }, { "epoch": 0.4972839304478266, "grad_norm": 0.3797140121459961, "learning_rate": 1.0085152789294265e-05, "loss": 0.2534, "step": 26786 }, { "epoch": 0.4973210605852452, "grad_norm": 0.30519089102745056, "learning_rate": 1.0083986340346443e-05, "loss": 0.3095, "step": 26788 }, { "epoch": 0.49735819072266385, "grad_norm": 0.42169806361198425, "learning_rate": 1.0082819890255824e-05, "loss": 0.4702, "step": 26790 }, { "epoch": 0.4973953208600825, "grad_norm": 0.23226481676101685, "learning_rate": 1.008165343903827e-05, "loss": 0.3178, "step": 26792 }, { "epoch": 0.49743245099750116, "grad_norm": 0.46743327379226685, "learning_rate": 1.0080486986709659e-05, "loss": 0.2234, "step": 26794 }, { "epoch": 0.4974695811349198, "grad_norm": 0.5484879016876221, "learning_rate": 1.0079320533285858e-05, "loss": 0.2262, "step": 26796 }, { "epoch": 0.4975067112723384, "grad_norm": 0.4141293466091156, "learning_rate": 1.0078154078782745e-05, "loss": 0.4032, "step": 26798 }, { "epoch": 0.49754384140975705, "grad_norm": 0.30675286054611206, "learning_rate": 1.007698762321619e-05, "loss": 0.3855, "step": 26800 }, { "epoch": 0.4975809715471757, "grad_norm": 0.5084612965583801, "learning_rate": 1.0075821166602062e-05, "loss": 0.3221, "step": 26802 }, { "epoch": 0.49761810168459436, "grad_norm": 0.2735929489135742, "learning_rate": 1.0074654708956237e-05, "loss": 0.3158, "step": 26804 }, { "epoch": 0.497655231822013, "grad_norm": 0.35964760184288025, "learning_rate": 1.007348825029458e-05, "loss": 0.2437, "step": 26806 }, { "epoch": 0.4976923619594316, "grad_norm": 0.5688581466674805, "learning_rate": 1.0072321790632973e-05, "loss": 0.2337, "step": 26808 }, { "epoch": 0.49772949209685025, "grad_norm": 0.3958166837692261, "learning_rate": 1.0071155329987283e-05, "loss": 0.3309, "step": 26810 }, { "epoch": 0.4977666222342689, "grad_norm": 0.3531842827796936, "learning_rate": 1.0069988868373382e-05, "loss": 0.1752, "step": 26812 }, { "epoch": 0.4978037523716875, "grad_norm": 0.4273420572280884, "learning_rate": 1.006882240580714e-05, "loss": 0.2729, "step": 26814 }, { "epoch": 0.4978408825091062, "grad_norm": 0.5354814529418945, "learning_rate": 1.0067655942304434e-05, "loss": 0.2849, "step": 26816 }, { "epoch": 0.4978780126465248, "grad_norm": 0.5052360892295837, "learning_rate": 1.006648947788113e-05, "loss": 0.2312, "step": 26818 }, { "epoch": 0.49791514278394344, "grad_norm": 0.4538573920726776, "learning_rate": 1.0065323012553102e-05, "loss": 0.1823, "step": 26820 }, { "epoch": 0.49795227292136207, "grad_norm": 0.39478498697280884, "learning_rate": 1.006415654633623e-05, "loss": 0.1077, "step": 26822 }, { "epoch": 0.4979894030587807, "grad_norm": 0.4058161675930023, "learning_rate": 1.0062990079246376e-05, "loss": 0.3839, "step": 26824 }, { "epoch": 0.4980265331961994, "grad_norm": 0.36407390236854553, "learning_rate": 1.0061823611299413e-05, "loss": 0.2726, "step": 26826 }, { "epoch": 0.498063663333618, "grad_norm": 0.2972058057785034, "learning_rate": 1.0060657142511219e-05, "loss": 0.2907, "step": 26828 }, { "epoch": 0.49810079347103664, "grad_norm": 0.4083372950553894, "learning_rate": 1.0059490672897663e-05, "loss": 0.4299, "step": 26830 }, { "epoch": 0.49813792360845527, "grad_norm": 0.4232308566570282, "learning_rate": 1.0058324202474616e-05, "loss": 0.3249, "step": 26832 }, { "epoch": 0.4981750537458739, "grad_norm": 0.28382018208503723, "learning_rate": 1.0057157731257951e-05, "loss": 0.2256, "step": 26834 }, { "epoch": 0.4982121838832925, "grad_norm": 0.23879002034664154, "learning_rate": 1.0055991259263543e-05, "loss": 0.3146, "step": 26836 }, { "epoch": 0.4982493140207112, "grad_norm": 0.5346073508262634, "learning_rate": 1.0054824786507257e-05, "loss": 0.4178, "step": 26838 }, { "epoch": 0.49828644415812984, "grad_norm": 0.4364805519580841, "learning_rate": 1.0053658313004973e-05, "loss": 0.3411, "step": 26840 }, { "epoch": 0.49832357429554847, "grad_norm": 0.5216670036315918, "learning_rate": 1.005249183877256e-05, "loss": 0.2456, "step": 26842 }, { "epoch": 0.4983607044329671, "grad_norm": 0.7121615409851074, "learning_rate": 1.0051325363825892e-05, "loss": 0.3084, "step": 26844 }, { "epoch": 0.4983978345703857, "grad_norm": 0.2606574594974518, "learning_rate": 1.0050158888180836e-05, "loss": 0.214, "step": 26846 }, { "epoch": 0.4984349647078044, "grad_norm": 0.4554485082626343, "learning_rate": 1.0048992411853272e-05, "loss": 0.1396, "step": 26848 }, { "epoch": 0.49847209484522303, "grad_norm": 0.47734448313713074, "learning_rate": 1.0047825934859063e-05, "loss": 0.1508, "step": 26850 }, { "epoch": 0.49850922498264166, "grad_norm": 0.38202816247940063, "learning_rate": 1.0046659457214091e-05, "loss": 0.3595, "step": 26852 }, { "epoch": 0.4985463551200603, "grad_norm": 0.3905223608016968, "learning_rate": 1.0045492978934224e-05, "loss": 0.2276, "step": 26854 }, { "epoch": 0.4985834852574789, "grad_norm": 0.34084296226501465, "learning_rate": 1.0044326500035329e-05, "loss": 0.1343, "step": 26856 }, { "epoch": 0.4986206153948976, "grad_norm": 0.2738212049007416, "learning_rate": 1.0043160020533284e-05, "loss": 0.3012, "step": 26858 }, { "epoch": 0.49865774553231623, "grad_norm": 0.4875483810901642, "learning_rate": 1.0041993540443963e-05, "loss": 0.2466, "step": 26860 }, { "epoch": 0.49869487566973486, "grad_norm": 0.34649237990379333, "learning_rate": 1.0040827059783234e-05, "loss": 0.2201, "step": 26862 }, { "epoch": 0.4987320058071535, "grad_norm": 0.3726114332675934, "learning_rate": 1.0039660578566971e-05, "loss": 0.3249, "step": 26864 }, { "epoch": 0.4987691359445721, "grad_norm": 0.6323537826538086, "learning_rate": 1.0038494096811049e-05, "loss": 0.315, "step": 26866 }, { "epoch": 0.49880626608199075, "grad_norm": 0.4168843924999237, "learning_rate": 1.0037327614531334e-05, "loss": 0.3033, "step": 26868 }, { "epoch": 0.49884339621940943, "grad_norm": 0.2692236006259918, "learning_rate": 1.0036161131743702e-05, "loss": 0.2252, "step": 26870 }, { "epoch": 0.49888052635682806, "grad_norm": 0.4252229332923889, "learning_rate": 1.0034994648464025e-05, "loss": 0.4056, "step": 26872 }, { "epoch": 0.4989176564942467, "grad_norm": 0.503212571144104, "learning_rate": 1.003382816470818e-05, "loss": 0.3089, "step": 26874 }, { "epoch": 0.4989547866316653, "grad_norm": 0.4341176450252533, "learning_rate": 1.0032661680492033e-05, "loss": 0.2135, "step": 26876 }, { "epoch": 0.49899191676908394, "grad_norm": 0.4128177762031555, "learning_rate": 1.0031495195831456e-05, "loss": 0.3238, "step": 26878 }, { "epoch": 0.4990290469065026, "grad_norm": 0.27954617142677307, "learning_rate": 1.0030328710742328e-05, "loss": 0.4152, "step": 26880 }, { "epoch": 0.49906617704392126, "grad_norm": 0.3026414215564728, "learning_rate": 1.0029162225240513e-05, "loss": 0.1961, "step": 26882 }, { "epoch": 0.4991033071813399, "grad_norm": 0.4159076511859894, "learning_rate": 1.0027995739341889e-05, "loss": 0.1404, "step": 26884 }, { "epoch": 0.4991404373187585, "grad_norm": 0.2822812795639038, "learning_rate": 1.002682925306233e-05, "loss": 0.2133, "step": 26886 }, { "epoch": 0.49917756745617714, "grad_norm": 0.591202974319458, "learning_rate": 1.0025662766417698e-05, "loss": 0.4922, "step": 26888 }, { "epoch": 0.49921469759359577, "grad_norm": 0.5966176986694336, "learning_rate": 1.0024496279423877e-05, "loss": 0.3457, "step": 26890 }, { "epoch": 0.49925182773101445, "grad_norm": 0.3594204783439636, "learning_rate": 1.0023329792096737e-05, "loss": 0.2015, "step": 26892 }, { "epoch": 0.4992889578684331, "grad_norm": 0.5917710661888123, "learning_rate": 1.0022163304452143e-05, "loss": 0.3288, "step": 26894 }, { "epoch": 0.4993260880058517, "grad_norm": 0.31645387411117554, "learning_rate": 1.0020996816505977e-05, "loss": 0.3256, "step": 26896 }, { "epoch": 0.49936321814327034, "grad_norm": 0.5079969167709351, "learning_rate": 1.0019830328274108e-05, "loss": 0.4852, "step": 26898 }, { "epoch": 0.49940034828068897, "grad_norm": 0.44086822867393494, "learning_rate": 1.0018663839772404e-05, "loss": 0.3844, "step": 26900 }, { "epoch": 0.49943747841810765, "grad_norm": 0.443491131067276, "learning_rate": 1.0017497351016742e-05, "loss": 0.3064, "step": 26902 }, { "epoch": 0.4994746085555263, "grad_norm": 0.5364022254943848, "learning_rate": 1.0016330862022996e-05, "loss": 0.3155, "step": 26904 }, { "epoch": 0.4995117386929449, "grad_norm": 0.37668925523757935, "learning_rate": 1.0015164372807035e-05, "loss": 0.3362, "step": 26906 }, { "epoch": 0.49954886883036353, "grad_norm": 0.3791133463382721, "learning_rate": 1.001399788338473e-05, "loss": 0.1952, "step": 26908 }, { "epoch": 0.49958599896778216, "grad_norm": 0.36500510573387146, "learning_rate": 1.0012831393771958e-05, "loss": 0.4677, "step": 26910 }, { "epoch": 0.4996231291052008, "grad_norm": 0.3498799800872803, "learning_rate": 1.0011664903984587e-05, "loss": 0.4296, "step": 26912 }, { "epoch": 0.4996602592426195, "grad_norm": 0.40156617760658264, "learning_rate": 1.0010498414038491e-05, "loss": 0.3281, "step": 26914 }, { "epoch": 0.4996973893800381, "grad_norm": 0.3518311083316803, "learning_rate": 1.0009331923949548e-05, "loss": 0.0444, "step": 26916 }, { "epoch": 0.49973451951745673, "grad_norm": 0.5822263360023499, "learning_rate": 1.0008165433733623e-05, "loss": 0.1783, "step": 26918 }, { "epoch": 0.49977164965487536, "grad_norm": 0.33593299984931946, "learning_rate": 1.000699894340659e-05, "loss": 0.1649, "step": 26920 }, { "epoch": 0.499808779792294, "grad_norm": 0.3730458617210388, "learning_rate": 1.000583245298432e-05, "loss": 0.4119, "step": 26922 }, { "epoch": 0.4998459099297127, "grad_norm": 0.31013160943984985, "learning_rate": 1.0004665962482693e-05, "loss": 0.1891, "step": 26924 }, { "epoch": 0.4998830400671313, "grad_norm": 0.5429261922836304, "learning_rate": 1.0003499471917573e-05, "loss": 0.3316, "step": 26926 }, { "epoch": 0.49992017020454993, "grad_norm": 0.4259258806705475, "learning_rate": 1.0002332981304837e-05, "loss": 0.3527, "step": 26928 }, { "epoch": 0.49995730034196856, "grad_norm": 0.2676601707935333, "learning_rate": 1.0001166490660357e-05, "loss": 0.2827, "step": 26930 }, { "epoch": 0.4999944304793872, "grad_norm": 0.3304806351661682, "learning_rate": 1e-05, "loss": 0.2581, "step": 26932 }, { "epoch": 0.5000315606168059, "grad_norm": 0.3376041352748871, "learning_rate": 9.998833509339646e-06, "loss": 0.4154, "step": 26934 }, { "epoch": 0.5000686907542244, "grad_norm": 0.4785219132900238, "learning_rate": 9.997667018695166e-06, "loss": 0.2627, "step": 26936 }, { "epoch": 0.5001058208916431, "grad_norm": 0.36770257353782654, "learning_rate": 9.996500528082428e-06, "loss": 0.4671, "step": 26938 }, { "epoch": 0.5001429510290618, "grad_norm": 0.3594304025173187, "learning_rate": 9.995334037517312e-06, "loss": 0.2399, "step": 26940 }, { "epoch": 0.5001800811664804, "grad_norm": 0.4786681532859802, "learning_rate": 9.994167547015681e-06, "loss": 0.3796, "step": 26942 }, { "epoch": 0.5002172113038991, "grad_norm": 0.35319259762763977, "learning_rate": 9.993001056593414e-06, "loss": 0.3072, "step": 26944 }, { "epoch": 0.5002543414413176, "grad_norm": 0.2906579375267029, "learning_rate": 9.99183456626638e-06, "loss": 0.3575, "step": 26946 }, { "epoch": 0.5002914715787363, "grad_norm": 0.9308989644050598, "learning_rate": 9.990668076050455e-06, "loss": 0.471, "step": 26948 }, { "epoch": 0.5003286017161549, "grad_norm": 0.26063209772109985, "learning_rate": 9.989501585961509e-06, "loss": 0.2708, "step": 26950 }, { "epoch": 0.5003657318535736, "grad_norm": 0.27027469873428345, "learning_rate": 9.988335096015418e-06, "loss": 0.2939, "step": 26952 }, { "epoch": 0.5004028619909923, "grad_norm": 0.4825303852558136, "learning_rate": 9.987168606228047e-06, "loss": 0.3055, "step": 26954 }, { "epoch": 0.5004399921284108, "grad_norm": 0.2751249074935913, "learning_rate": 9.986002116615274e-06, "loss": 0.1922, "step": 26956 }, { "epoch": 0.5004771222658295, "grad_norm": 0.32798242568969727, "learning_rate": 9.984835627192968e-06, "loss": 0.207, "step": 26958 }, { "epoch": 0.5005142524032481, "grad_norm": 0.3726319372653961, "learning_rate": 9.983669137977008e-06, "loss": 0.138, "step": 26960 }, { "epoch": 0.5005513825406668, "grad_norm": 0.358015239238739, "learning_rate": 9.98250264898326e-06, "loss": 0.3447, "step": 26962 }, { "epoch": 0.5005885126780855, "grad_norm": 0.36054402589797974, "learning_rate": 9.9813361602276e-06, "loss": 0.1727, "step": 26964 }, { "epoch": 0.500625642815504, "grad_norm": 0.23488083481788635, "learning_rate": 9.980169671725897e-06, "loss": 0.2127, "step": 26966 }, { "epoch": 0.5006627729529227, "grad_norm": 0.6105630397796631, "learning_rate": 9.979003183494025e-06, "loss": 0.1532, "step": 26968 }, { "epoch": 0.5006999030903413, "grad_norm": 0.3569817543029785, "learning_rate": 9.977836695547859e-06, "loss": 0.1417, "step": 26970 }, { "epoch": 0.50073703322776, "grad_norm": 0.2526264488697052, "learning_rate": 9.976670207903268e-06, "loss": 0.3781, "step": 26972 }, { "epoch": 0.5007741633651787, "grad_norm": 0.1905379295349121, "learning_rate": 9.975503720576123e-06, "loss": 0.3267, "step": 26974 }, { "epoch": 0.5008112935025972, "grad_norm": 0.5174555778503418, "learning_rate": 9.974337233582301e-06, "loss": 0.2387, "step": 26976 }, { "epoch": 0.5008484236400159, "grad_norm": 0.4516080617904663, "learning_rate": 9.973170746937677e-06, "loss": 0.2597, "step": 26978 }, { "epoch": 0.5008855537774345, "grad_norm": 0.3129976689815521, "learning_rate": 9.972004260658114e-06, "loss": 0.2476, "step": 26980 }, { "epoch": 0.5009226839148532, "grad_norm": 0.40397852659225464, "learning_rate": 9.97083777475949e-06, "loss": 0.3502, "step": 26982 }, { "epoch": 0.5009598140522719, "grad_norm": 0.4846981167793274, "learning_rate": 9.969671289257677e-06, "loss": 0.2871, "step": 26984 }, { "epoch": 0.5009969441896904, "grad_norm": 0.46104303002357483, "learning_rate": 9.968504804168544e-06, "loss": 0.3291, "step": 26986 }, { "epoch": 0.5010340743271091, "grad_norm": 0.44227516651153564, "learning_rate": 9.967338319507967e-06, "loss": 0.5471, "step": 26988 }, { "epoch": 0.5010712044645277, "grad_norm": 0.3475395739078522, "learning_rate": 9.966171835291824e-06, "loss": 0.4743, "step": 26990 }, { "epoch": 0.5011083346019464, "grad_norm": 0.3276630640029907, "learning_rate": 9.965005351535977e-06, "loss": 0.4024, "step": 26992 }, { "epoch": 0.501145464739365, "grad_norm": 0.5174431204795837, "learning_rate": 9.9638388682563e-06, "loss": 0.2456, "step": 26994 }, { "epoch": 0.5011825948767836, "grad_norm": 0.36459478735923767, "learning_rate": 9.962672385468669e-06, "loss": 0.4116, "step": 26996 }, { "epoch": 0.5012197250142023, "grad_norm": 0.31800082325935364, "learning_rate": 9.961505903188955e-06, "loss": 0.2623, "step": 26998 }, { "epoch": 0.5012568551516209, "grad_norm": 0.3742848038673401, "learning_rate": 9.96033942143303e-06, "loss": 0.2844, "step": 27000 }, { "epoch": 0.5012939852890396, "grad_norm": 0.5424303412437439, "learning_rate": 9.95917294021677e-06, "loss": 0.3803, "step": 27002 }, { "epoch": 0.5013311154264581, "grad_norm": 0.3784252405166626, "learning_rate": 9.958006459556042e-06, "loss": 0.2611, "step": 27004 }, { "epoch": 0.5013682455638768, "grad_norm": 0.33086666464805603, "learning_rate": 9.956839979466719e-06, "loss": 0.2506, "step": 27006 }, { "epoch": 0.5014053757012955, "grad_norm": 0.38567325472831726, "learning_rate": 9.955673499964675e-06, "loss": 0.2842, "step": 27008 }, { "epoch": 0.5014425058387141, "grad_norm": 0.5531355142593384, "learning_rate": 9.95450702106578e-06, "loss": 0.2822, "step": 27010 }, { "epoch": 0.5014796359761328, "grad_norm": 0.46249035000801086, "learning_rate": 9.953340542785912e-06, "loss": 0.4188, "step": 27012 }, { "epoch": 0.5015167661135513, "grad_norm": 0.42776793241500854, "learning_rate": 9.95217406514094e-06, "loss": 0.4403, "step": 27014 }, { "epoch": 0.50155389625097, "grad_norm": 0.39458444714546204, "learning_rate": 9.951007588146733e-06, "loss": 0.2046, "step": 27016 }, { "epoch": 0.5015910263883887, "grad_norm": 0.3665659427642822, "learning_rate": 9.949841111819167e-06, "loss": 0.205, "step": 27018 }, { "epoch": 0.5016281565258073, "grad_norm": 0.46325308084487915, "learning_rate": 9.948674636174111e-06, "loss": 0.4072, "step": 27020 }, { "epoch": 0.501665286663226, "grad_norm": 0.4123363792896271, "learning_rate": 9.947508161227442e-06, "loss": 0.4834, "step": 27022 }, { "epoch": 0.5017024168006445, "grad_norm": 0.35976356267929077, "learning_rate": 9.946341686995027e-06, "loss": 0.3714, "step": 27024 }, { "epoch": 0.5017395469380632, "grad_norm": 0.3051232397556305, "learning_rate": 9.945175213492743e-06, "loss": 0.1394, "step": 27026 }, { "epoch": 0.5017766770754819, "grad_norm": 0.5761842727661133, "learning_rate": 9.944008740736462e-06, "loss": 0.242, "step": 27028 }, { "epoch": 0.5018138072129005, "grad_norm": 0.2948196530342102, "learning_rate": 9.942842268742052e-06, "loss": 0.0917, "step": 27030 }, { "epoch": 0.5018509373503192, "grad_norm": 0.4692282974720001, "learning_rate": 9.941675797525386e-06, "loss": 0.1744, "step": 27032 }, { "epoch": 0.5018880674877377, "grad_norm": 0.4681616425514221, "learning_rate": 9.94050932710234e-06, "loss": 0.2593, "step": 27034 }, { "epoch": 0.5019251976251564, "grad_norm": 0.3291204869747162, "learning_rate": 9.939342857488783e-06, "loss": 0.1897, "step": 27036 }, { "epoch": 0.5019623277625751, "grad_norm": 0.5584930181503296, "learning_rate": 9.938176388700587e-06, "loss": 0.3591, "step": 27038 }, { "epoch": 0.5019994578999937, "grad_norm": 0.28577449917793274, "learning_rate": 9.937009920753631e-06, "loss": 0.2049, "step": 27040 }, { "epoch": 0.5020365880374124, "grad_norm": 0.48338791728019714, "learning_rate": 9.935843453663775e-06, "loss": 0.5619, "step": 27042 }, { "epoch": 0.5020737181748309, "grad_norm": 0.45104774832725525, "learning_rate": 9.934676987446901e-06, "loss": 0.352, "step": 27044 }, { "epoch": 0.5021108483122496, "grad_norm": 0.3470545709133148, "learning_rate": 9.933510522118874e-06, "loss": 0.2918, "step": 27046 }, { "epoch": 0.5021479784496683, "grad_norm": 0.37051069736480713, "learning_rate": 9.932344057695571e-06, "loss": 0.2505, "step": 27048 }, { "epoch": 0.5021851085870869, "grad_norm": 0.3474474251270294, "learning_rate": 9.931177594192861e-06, "loss": 0.2108, "step": 27050 }, { "epoch": 0.5022222387245056, "grad_norm": 0.29777202010154724, "learning_rate": 9.930011131626623e-06, "loss": 0.1484, "step": 27052 }, { "epoch": 0.5022593688619241, "grad_norm": 0.32228752970695496, "learning_rate": 9.92884467001272e-06, "loss": 0.4737, "step": 27054 }, { "epoch": 0.5022964989993428, "grad_norm": 0.48034149408340454, "learning_rate": 9.92767820936703e-06, "loss": 0.2923, "step": 27056 }, { "epoch": 0.5023336291367614, "grad_norm": 0.44116613268852234, "learning_rate": 9.926511749705422e-06, "loss": 0.2555, "step": 27058 }, { "epoch": 0.5023707592741801, "grad_norm": 0.324666291475296, "learning_rate": 9.925345291043766e-06, "loss": 0.2746, "step": 27060 }, { "epoch": 0.5024078894115988, "grad_norm": 0.5034061670303345, "learning_rate": 9.92417883339794e-06, "loss": 0.2197, "step": 27062 }, { "epoch": 0.5024450195490173, "grad_norm": 0.3064354658126831, "learning_rate": 9.923012376783813e-06, "loss": 0.278, "step": 27064 }, { "epoch": 0.502482149686436, "grad_norm": 0.347992479801178, "learning_rate": 9.921845921217257e-06, "loss": 0.4737, "step": 27066 }, { "epoch": 0.5025192798238546, "grad_norm": 0.39355143904685974, "learning_rate": 9.920679466714145e-06, "loss": 0.3105, "step": 27068 }, { "epoch": 0.5025564099612733, "grad_norm": 0.4315430521965027, "learning_rate": 9.919513013290344e-06, "loss": 0.3077, "step": 27070 }, { "epoch": 0.502593540098692, "grad_norm": 0.23621046543121338, "learning_rate": 9.918346560961732e-06, "loss": 0.2535, "step": 27072 }, { "epoch": 0.5026306702361105, "grad_norm": 0.2695353031158447, "learning_rate": 9.91718010974418e-06, "loss": 0.2152, "step": 27074 }, { "epoch": 0.5026678003735292, "grad_norm": 0.36385855078697205, "learning_rate": 9.916013659653555e-06, "loss": 0.2477, "step": 27076 }, { "epoch": 0.5027049305109478, "grad_norm": 0.4169882833957672, "learning_rate": 9.91484721070574e-06, "loss": 0.2444, "step": 27078 }, { "epoch": 0.5027420606483665, "grad_norm": 0.324619323015213, "learning_rate": 9.913680762916594e-06, "loss": 0.2445, "step": 27080 }, { "epoch": 0.5027791907857851, "grad_norm": 0.4718611538410187, "learning_rate": 9.912514316301993e-06, "loss": 0.1802, "step": 27082 }, { "epoch": 0.5028163209232037, "grad_norm": 0.31225594878196716, "learning_rate": 9.91134787087781e-06, "loss": 0.2085, "step": 27084 }, { "epoch": 0.5028534510606224, "grad_norm": 0.35890844464302063, "learning_rate": 9.91018142665992e-06, "loss": 0.2661, "step": 27086 }, { "epoch": 0.502890581198041, "grad_norm": 0.35641393065452576, "learning_rate": 9.90901498366419e-06, "loss": 0.1958, "step": 27088 }, { "epoch": 0.5029277113354597, "grad_norm": 0.4119141101837158, "learning_rate": 9.907848541906496e-06, "loss": 0.0782, "step": 27090 }, { "epoch": 0.5029648414728783, "grad_norm": 0.29675421118736267, "learning_rate": 9.906682101402705e-06, "loss": 0.1886, "step": 27092 }, { "epoch": 0.5030019716102969, "grad_norm": 0.3640308976173401, "learning_rate": 9.90551566216869e-06, "loss": 0.1102, "step": 27094 }, { "epoch": 0.5030391017477156, "grad_norm": 0.40626031160354614, "learning_rate": 9.904349224220324e-06, "loss": 0.3634, "step": 27096 }, { "epoch": 0.5030762318851342, "grad_norm": 0.5021699666976929, "learning_rate": 9.903182787573482e-06, "loss": 0.3339, "step": 27098 }, { "epoch": 0.5031133620225529, "grad_norm": 0.48933136463165283, "learning_rate": 9.902016352244028e-06, "loss": 0.4875, "step": 27100 }, { "epoch": 0.5031504921599714, "grad_norm": 0.3093032240867615, "learning_rate": 9.90084991824784e-06, "loss": 0.3027, "step": 27102 }, { "epoch": 0.5031876222973901, "grad_norm": 0.25190266966819763, "learning_rate": 9.89968348560079e-06, "loss": 0.2034, "step": 27104 }, { "epoch": 0.5032247524348088, "grad_norm": 0.41998064517974854, "learning_rate": 9.898517054318744e-06, "loss": 0.2548, "step": 27106 }, { "epoch": 0.5032618825722274, "grad_norm": 0.40255624055862427, "learning_rate": 9.897350624417577e-06, "loss": 0.3753, "step": 27108 }, { "epoch": 0.503299012709646, "grad_norm": 0.35159605741500854, "learning_rate": 9.89618419591316e-06, "loss": 0.2146, "step": 27110 }, { "epoch": 0.5033361428470646, "grad_norm": 0.55653977394104, "learning_rate": 9.895017768821366e-06, "loss": 0.4111, "step": 27112 }, { "epoch": 0.5033732729844833, "grad_norm": 0.493134468793869, "learning_rate": 9.893851343158064e-06, "loss": 0.1941, "step": 27114 }, { "epoch": 0.503410403121902, "grad_norm": 0.3710097372531891, "learning_rate": 9.892684918939133e-06, "loss": 0.2622, "step": 27116 }, { "epoch": 0.5034475332593206, "grad_norm": 0.35841497778892517, "learning_rate": 9.891518496180436e-06, "loss": 0.309, "step": 27118 }, { "epoch": 0.5034846633967393, "grad_norm": 0.3774789571762085, "learning_rate": 9.890352074897846e-06, "loss": 0.3103, "step": 27120 }, { "epoch": 0.5035217935341578, "grad_norm": 0.21137604117393494, "learning_rate": 9.889185655107235e-06, "loss": 0.2269, "step": 27122 }, { "epoch": 0.5035589236715765, "grad_norm": 0.3149380683898926, "learning_rate": 9.888019236824477e-06, "loss": 0.198, "step": 27124 }, { "epoch": 0.5035960538089952, "grad_norm": 0.3213143050670624, "learning_rate": 9.88685282006544e-06, "loss": 0.4271, "step": 27126 }, { "epoch": 0.5036331839464138, "grad_norm": 0.3381570875644684, "learning_rate": 9.885686404846002e-06, "loss": 0.3979, "step": 27128 }, { "epoch": 0.5036703140838324, "grad_norm": 0.47645604610443115, "learning_rate": 9.884519991182028e-06, "loss": 0.2976, "step": 27130 }, { "epoch": 0.503707444221251, "grad_norm": 0.4178716540336609, "learning_rate": 9.883353579089388e-06, "loss": 0.3564, "step": 27132 }, { "epoch": 0.5037445743586697, "grad_norm": 0.40592581033706665, "learning_rate": 9.882187168583957e-06, "loss": 0.2043, "step": 27134 }, { "epoch": 0.5037817044960884, "grad_norm": 0.5035300254821777, "learning_rate": 9.881020759681604e-06, "loss": 0.2132, "step": 27136 }, { "epoch": 0.503818834633507, "grad_norm": 0.32951027154922485, "learning_rate": 9.879854352398206e-06, "loss": 0.2364, "step": 27138 }, { "epoch": 0.5038559647709256, "grad_norm": 0.39500245451927185, "learning_rate": 9.878687946749628e-06, "loss": 0.4651, "step": 27140 }, { "epoch": 0.5038930949083442, "grad_norm": 0.4188745319843292, "learning_rate": 9.877521542751747e-06, "loss": 0.3062, "step": 27142 }, { "epoch": 0.5039302250457629, "grad_norm": 0.39757540822029114, "learning_rate": 9.876355140420429e-06, "loss": 0.3237, "step": 27144 }, { "epoch": 0.5039673551831816, "grad_norm": 0.2403126060962677, "learning_rate": 9.875188739771544e-06, "loss": 0.1516, "step": 27146 }, { "epoch": 0.5040044853206002, "grad_norm": 0.3626934885978699, "learning_rate": 9.87402234082097e-06, "loss": 0.084, "step": 27148 }, { "epoch": 0.5040416154580188, "grad_norm": 0.4089568257331848, "learning_rate": 9.872855943584575e-06, "loss": 0.1532, "step": 27150 }, { "epoch": 0.5040787455954374, "grad_norm": 0.3505588471889496, "learning_rate": 9.871689548078226e-06, "loss": 0.2182, "step": 27152 }, { "epoch": 0.5041158757328561, "grad_norm": 0.4032585322856903, "learning_rate": 9.870523154317803e-06, "loss": 0.1941, "step": 27154 }, { "epoch": 0.5041530058702747, "grad_norm": 0.37027329206466675, "learning_rate": 9.869356762319168e-06, "loss": 0.306, "step": 27156 }, { "epoch": 0.5041901360076934, "grad_norm": 0.4290004074573517, "learning_rate": 9.868190372098198e-06, "loss": 0.3245, "step": 27158 }, { "epoch": 0.504227266145112, "grad_norm": 0.9687259197235107, "learning_rate": 9.867023983670761e-06, "loss": 0.2616, "step": 27160 }, { "epoch": 0.5042643962825306, "grad_norm": 0.4411226511001587, "learning_rate": 9.86585759705273e-06, "loss": 0.3786, "step": 27162 }, { "epoch": 0.5043015264199493, "grad_norm": 0.38836535811424255, "learning_rate": 9.864691212259974e-06, "loss": 0.2314, "step": 27164 }, { "epoch": 0.5043386565573679, "grad_norm": 0.3700661063194275, "learning_rate": 9.86352482930837e-06, "loss": 0.389, "step": 27166 }, { "epoch": 0.5043757866947866, "grad_norm": 0.3821013569831848, "learning_rate": 9.86235844821378e-06, "loss": 0.4283, "step": 27168 }, { "epoch": 0.5044129168322052, "grad_norm": 0.3961995542049408, "learning_rate": 9.86119206899208e-06, "loss": 0.2786, "step": 27170 }, { "epoch": 0.5044500469696238, "grad_norm": 0.28907904028892517, "learning_rate": 9.860025691659141e-06, "loss": 0.396, "step": 27172 }, { "epoch": 0.5044871771070425, "grad_norm": 0.2272331863641739, "learning_rate": 9.858859316230831e-06, "loss": 0.3285, "step": 27174 }, { "epoch": 0.5045243072444611, "grad_norm": 0.29686227440834045, "learning_rate": 9.857692942723024e-06, "loss": 0.3677, "step": 27176 }, { "epoch": 0.5045614373818798, "grad_norm": 0.31016772985458374, "learning_rate": 9.856526571151593e-06, "loss": 0.3477, "step": 27178 }, { "epoch": 0.5045985675192984, "grad_norm": 0.49688124656677246, "learning_rate": 9.8553602015324e-06, "loss": 0.3045, "step": 27180 }, { "epoch": 0.504635697656717, "grad_norm": 0.32326236367225647, "learning_rate": 9.854193833881326e-06, "loss": 0.4869, "step": 27182 }, { "epoch": 0.5046728277941357, "grad_norm": 0.42438217997550964, "learning_rate": 9.853027468214235e-06, "loss": 0.2216, "step": 27184 }, { "epoch": 0.5047099579315543, "grad_norm": 0.3244577944278717, "learning_rate": 9.851861104546998e-06, "loss": 0.2082, "step": 27186 }, { "epoch": 0.504747088068973, "grad_norm": 0.4199690520763397, "learning_rate": 9.850694742895488e-06, "loss": 0.212, "step": 27188 }, { "epoch": 0.5047842182063916, "grad_norm": 0.4289074242115021, "learning_rate": 9.84952838327558e-06, "loss": 0.286, "step": 27190 }, { "epoch": 0.5048213483438102, "grad_norm": 0.3022020757198334, "learning_rate": 9.848362025703138e-06, "loss": 0.2173, "step": 27192 }, { "epoch": 0.5048584784812289, "grad_norm": 0.39626890420913696, "learning_rate": 9.847195670194036e-06, "loss": 0.3482, "step": 27194 }, { "epoch": 0.5048956086186475, "grad_norm": 0.31610792875289917, "learning_rate": 9.846029316764138e-06, "loss": 0.118, "step": 27196 }, { "epoch": 0.5049327387560661, "grad_norm": 0.5148515105247498, "learning_rate": 9.844862965429323e-06, "loss": 0.2664, "step": 27198 }, { "epoch": 0.5049698688934848, "grad_norm": 0.2193974256515503, "learning_rate": 9.843696616205457e-06, "loss": 0.0685, "step": 27200 }, { "epoch": 0.5050069990309034, "grad_norm": 0.44844838976860046, "learning_rate": 9.842530269108413e-06, "loss": 0.3262, "step": 27202 }, { "epoch": 0.5050441291683221, "grad_norm": 0.42538976669311523, "learning_rate": 9.841363924154063e-06, "loss": 0.4215, "step": 27204 }, { "epoch": 0.5050812593057407, "grad_norm": 0.509914755821228, "learning_rate": 9.840197581358273e-06, "loss": 0.1893, "step": 27206 }, { "epoch": 0.5051183894431593, "grad_norm": 0.6601515412330627, "learning_rate": 9.839031240736913e-06, "loss": 0.4558, "step": 27208 }, { "epoch": 0.5051555195805779, "grad_norm": 0.48657745122909546, "learning_rate": 9.837864902305856e-06, "loss": 0.5459, "step": 27210 }, { "epoch": 0.5051926497179966, "grad_norm": 0.30325713753700256, "learning_rate": 9.836698566080972e-06, "loss": 0.226, "step": 27212 }, { "epoch": 0.5052297798554153, "grad_norm": 0.4423142075538635, "learning_rate": 9.835532232078134e-06, "loss": 0.1455, "step": 27214 }, { "epoch": 0.5052669099928339, "grad_norm": 0.3947449028491974, "learning_rate": 9.834365900313211e-06, "loss": 0.268, "step": 27216 }, { "epoch": 0.5053040401302525, "grad_norm": 0.4056461751461029, "learning_rate": 9.833199570802069e-06, "loss": 0.2889, "step": 27218 }, { "epoch": 0.5053411702676711, "grad_norm": 0.3948806822299957, "learning_rate": 9.832033243560579e-06, "loss": 0.3232, "step": 27220 }, { "epoch": 0.5053783004050898, "grad_norm": 0.34596171975135803, "learning_rate": 9.830866918604615e-06, "loss": 0.2442, "step": 27222 }, { "epoch": 0.5054154305425085, "grad_norm": 0.3630673587322235, "learning_rate": 9.829700595950047e-06, "loss": 0.3623, "step": 27224 }, { "epoch": 0.505452560679927, "grad_norm": 0.35214555263519287, "learning_rate": 9.828534275612743e-06, "loss": 0.2649, "step": 27226 }, { "epoch": 0.5054896908173457, "grad_norm": 0.4082280695438385, "learning_rate": 9.827367957608574e-06, "loss": 0.0437, "step": 27228 }, { "epoch": 0.5055268209547643, "grad_norm": 0.40199020504951477, "learning_rate": 9.826201641953413e-06, "loss": 0.2244, "step": 27230 }, { "epoch": 0.505563951092183, "grad_norm": 0.5142661929130554, "learning_rate": 9.825035328663123e-06, "loss": 0.2809, "step": 27232 }, { "epoch": 0.5056010812296017, "grad_norm": 0.4034191966056824, "learning_rate": 9.823869017753578e-06, "loss": 0.4427, "step": 27234 }, { "epoch": 0.5056382113670203, "grad_norm": 0.47163230180740356, "learning_rate": 9.822702709240652e-06, "loss": 0.4074, "step": 27236 }, { "epoch": 0.5056753415044389, "grad_norm": 0.3310648202896118, "learning_rate": 9.821536403140206e-06, "loss": 0.3334, "step": 27238 }, { "epoch": 0.5057124716418575, "grad_norm": 0.4580729603767395, "learning_rate": 9.820370099468115e-06, "loss": 0.2925, "step": 27240 }, { "epoch": 0.5057496017792762, "grad_norm": 0.4034956097602844, "learning_rate": 9.819203798240257e-06, "loss": 0.3419, "step": 27242 }, { "epoch": 0.5057867319166949, "grad_norm": 0.35212817788124084, "learning_rate": 9.818037499472486e-06, "loss": 0.2904, "step": 27244 }, { "epoch": 0.5058238620541134, "grad_norm": 0.35502949357032776, "learning_rate": 9.816871203180683e-06, "loss": 0.4541, "step": 27246 }, { "epoch": 0.5058609921915321, "grad_norm": 0.3449929654598236, "learning_rate": 9.815704909380712e-06, "loss": 0.2485, "step": 27248 }, { "epoch": 0.5058981223289507, "grad_norm": 0.31588485836982727, "learning_rate": 9.814538618088445e-06, "loss": 0.344, "step": 27250 }, { "epoch": 0.5059352524663694, "grad_norm": 0.2900664210319519, "learning_rate": 9.813372329319752e-06, "loss": 0.186, "step": 27252 }, { "epoch": 0.505972382603788, "grad_norm": 0.3932534158229828, "learning_rate": 9.812206043090508e-06, "loss": 0.2604, "step": 27254 }, { "epoch": 0.5060095127412066, "grad_norm": 0.4577184319496155, "learning_rate": 9.811039759416572e-06, "loss": 0.0955, "step": 27256 }, { "epoch": 0.5060466428786253, "grad_norm": 0.47477906942367554, "learning_rate": 9.80987347831382e-06, "loss": 0.4599, "step": 27258 }, { "epoch": 0.5060837730160439, "grad_norm": 0.4280111789703369, "learning_rate": 9.80870719979812e-06, "loss": 0.3104, "step": 27260 }, { "epoch": 0.5061209031534626, "grad_norm": 0.4136579930782318, "learning_rate": 9.807540923885341e-06, "loss": 0.3004, "step": 27262 }, { "epoch": 0.5061580332908812, "grad_norm": 0.23737718164920807, "learning_rate": 9.806374650591353e-06, "loss": 0.1982, "step": 27264 }, { "epoch": 0.5061951634282998, "grad_norm": 0.44319865107536316, "learning_rate": 9.80520837993203e-06, "loss": 0.161, "step": 27266 }, { "epoch": 0.5062322935657185, "grad_norm": 0.43614593148231506, "learning_rate": 9.804042111923238e-06, "loss": 0.2234, "step": 27268 }, { "epoch": 0.5062694237031371, "grad_norm": 0.29607024788856506, "learning_rate": 9.802875846580842e-06, "loss": 0.2625, "step": 27270 }, { "epoch": 0.5063065538405558, "grad_norm": 0.4026789963245392, "learning_rate": 9.801709583920717e-06, "loss": 0.3906, "step": 27272 }, { "epoch": 0.5063436839779744, "grad_norm": 0.5164517760276794, "learning_rate": 9.800543323958728e-06, "loss": 0.1882, "step": 27274 }, { "epoch": 0.506380814115393, "grad_norm": 0.40611159801483154, "learning_rate": 9.79937706671075e-06, "loss": 0.1818, "step": 27276 }, { "epoch": 0.5064179442528117, "grad_norm": 0.3482772409915924, "learning_rate": 9.798210812192649e-06, "loss": 0.2479, "step": 27278 }, { "epoch": 0.5064550743902303, "grad_norm": 0.3575907349586487, "learning_rate": 9.797044560420296e-06, "loss": 0.3033, "step": 27280 }, { "epoch": 0.506492204527649, "grad_norm": 0.3511286973953247, "learning_rate": 9.795878311409554e-06, "loss": 0.1738, "step": 27282 }, { "epoch": 0.5065293346650676, "grad_norm": 0.48284628987312317, "learning_rate": 9.7947120651763e-06, "loss": 0.4909, "step": 27284 }, { "epoch": 0.5065664648024862, "grad_norm": 0.37745344638824463, "learning_rate": 9.793545821736396e-06, "loss": 0.3169, "step": 27286 }, { "epoch": 0.5066035949399049, "grad_norm": 0.21041035652160645, "learning_rate": 9.792379581105719e-06, "loss": 0.2035, "step": 27288 }, { "epoch": 0.5066407250773235, "grad_norm": 0.34612002968788147, "learning_rate": 9.791213343300132e-06, "loss": 0.3758, "step": 27290 }, { "epoch": 0.5066778552147422, "grad_norm": 0.40188127756118774, "learning_rate": 9.79004710833551e-06, "loss": 0.2681, "step": 27292 }, { "epoch": 0.5067149853521608, "grad_norm": 0.39686840772628784, "learning_rate": 9.788880876227714e-06, "loss": 0.0715, "step": 27294 }, { "epoch": 0.5067521154895794, "grad_norm": 0.6442857384681702, "learning_rate": 9.787714646992618e-06, "loss": 0.0954, "step": 27296 }, { "epoch": 0.5067892456269981, "grad_norm": 0.49370160698890686, "learning_rate": 9.78654842064609e-06, "loss": 0.1756, "step": 27298 }, { "epoch": 0.5068263757644167, "grad_norm": 0.2876185476779938, "learning_rate": 9.785382197203997e-06, "loss": 0.3837, "step": 27300 }, { "epoch": 0.5068635059018354, "grad_norm": 0.2753759026527405, "learning_rate": 9.78421597668221e-06, "loss": 0.3236, "step": 27302 }, { "epoch": 0.506900636039254, "grad_norm": 0.2751653790473938, "learning_rate": 9.7830497590966e-06, "loss": 0.5996, "step": 27304 }, { "epoch": 0.5069377661766726, "grad_norm": 0.4358898997306824, "learning_rate": 9.781883544463031e-06, "loss": 0.2122, "step": 27306 }, { "epoch": 0.5069748963140912, "grad_norm": 0.3494413495063782, "learning_rate": 9.780717332797372e-06, "loss": 0.3515, "step": 27308 }, { "epoch": 0.5070120264515099, "grad_norm": 0.3184601068496704, "learning_rate": 9.779551124115497e-06, "loss": 0.2073, "step": 27310 }, { "epoch": 0.5070491565889286, "grad_norm": 0.4273928105831146, "learning_rate": 9.778384918433267e-06, "loss": 0.2541, "step": 27312 }, { "epoch": 0.5070862867263471, "grad_norm": 0.4476768672466278, "learning_rate": 9.777218715766555e-06, "loss": 0.308, "step": 27314 }, { "epoch": 0.5071234168637658, "grad_norm": 0.3439481556415558, "learning_rate": 9.776052516131229e-06, "loss": 0.6137, "step": 27316 }, { "epoch": 0.5071605470011844, "grad_norm": 0.4766983389854431, "learning_rate": 9.774886319543161e-06, "loss": 0.304, "step": 27318 }, { "epoch": 0.5071976771386031, "grad_norm": 0.36192506551742554, "learning_rate": 9.773720126018212e-06, "loss": 0.2094, "step": 27320 }, { "epoch": 0.5072348072760218, "grad_norm": 0.31431835889816284, "learning_rate": 9.772553935572258e-06, "loss": 0.3545, "step": 27322 }, { "epoch": 0.5072719374134403, "grad_norm": 0.31274521350860596, "learning_rate": 9.771387748221159e-06, "loss": 0.3507, "step": 27324 }, { "epoch": 0.507309067550859, "grad_norm": 0.42054906487464905, "learning_rate": 9.77022156398079e-06, "loss": 0.5935, "step": 27326 }, { "epoch": 0.5073461976882776, "grad_norm": 0.4115454852581024, "learning_rate": 9.769055382867018e-06, "loss": 0.3246, "step": 27328 }, { "epoch": 0.5073833278256963, "grad_norm": 0.39358383417129517, "learning_rate": 9.767889204895711e-06, "loss": 0.1866, "step": 27330 }, { "epoch": 0.507420457963115, "grad_norm": 0.43749693036079407, "learning_rate": 9.766723030082738e-06, "loss": 0.2156, "step": 27332 }, { "epoch": 0.5074575881005335, "grad_norm": 0.39062029123306274, "learning_rate": 9.765556858443961e-06, "loss": 0.3841, "step": 27334 }, { "epoch": 0.5074947182379522, "grad_norm": 0.4628467261791229, "learning_rate": 9.764390689995255e-06, "loss": 0.2981, "step": 27336 }, { "epoch": 0.5075318483753708, "grad_norm": 0.5927623510360718, "learning_rate": 9.763224524752487e-06, "loss": 0.4069, "step": 27338 }, { "epoch": 0.5075689785127895, "grad_norm": 0.1744084656238556, "learning_rate": 9.762058362731524e-06, "loss": 0.4431, "step": 27340 }, { "epoch": 0.5076061086502082, "grad_norm": 0.41308119893074036, "learning_rate": 9.760892203948234e-06, "loss": 0.2354, "step": 27342 }, { "epoch": 0.5076432387876267, "grad_norm": 0.390735387802124, "learning_rate": 9.759726048418485e-06, "loss": 0.3865, "step": 27344 }, { "epoch": 0.5076803689250454, "grad_norm": 0.3669309616088867, "learning_rate": 9.758559896158142e-06, "loss": 0.3652, "step": 27346 }, { "epoch": 0.507717499062464, "grad_norm": 0.40453261137008667, "learning_rate": 9.757393747183077e-06, "loss": 0.1849, "step": 27348 }, { "epoch": 0.5077546291998827, "grad_norm": 0.5804651975631714, "learning_rate": 9.756227601509157e-06, "loss": 0.2931, "step": 27350 }, { "epoch": 0.5077917593373014, "grad_norm": 0.42025619745254517, "learning_rate": 9.75506145915225e-06, "loss": 0.4583, "step": 27352 }, { "epoch": 0.5078288894747199, "grad_norm": 0.41886240243911743, "learning_rate": 9.753895320128221e-06, "loss": 0.2414, "step": 27354 }, { "epoch": 0.5078660196121386, "grad_norm": 0.3709939420223236, "learning_rate": 9.752729184452944e-06, "loss": 0.1909, "step": 27356 }, { "epoch": 0.5079031497495572, "grad_norm": 1.063105583190918, "learning_rate": 9.751563052142278e-06, "loss": 0.1721, "step": 27358 }, { "epoch": 0.5079402798869759, "grad_norm": 0.492556631565094, "learning_rate": 9.750396923212093e-06, "loss": 0.338, "step": 27360 }, { "epoch": 0.5079774100243944, "grad_norm": 0.25917309522628784, "learning_rate": 9.749230797678264e-06, "loss": 0.1779, "step": 27362 }, { "epoch": 0.5080145401618131, "grad_norm": 0.30102917551994324, "learning_rate": 9.748064675556647e-06, "loss": 0.2104, "step": 27364 }, { "epoch": 0.5080516702992318, "grad_norm": 0.33802661299705505, "learning_rate": 9.746898556863116e-06, "loss": 0.3439, "step": 27366 }, { "epoch": 0.5080888004366504, "grad_norm": 0.5920265316963196, "learning_rate": 9.745732441613542e-06, "loss": 0.2545, "step": 27368 }, { "epoch": 0.5081259305740691, "grad_norm": 0.3891545534133911, "learning_rate": 9.744566329823784e-06, "loss": 0.3775, "step": 27370 }, { "epoch": 0.5081630607114876, "grad_norm": 0.4400654435157776, "learning_rate": 9.743400221509713e-06, "loss": 0.3398, "step": 27372 }, { "epoch": 0.5082001908489063, "grad_norm": 0.19824333488941193, "learning_rate": 9.742234116687199e-06, "loss": 0.3071, "step": 27374 }, { "epoch": 0.508237320986325, "grad_norm": 0.28075167536735535, "learning_rate": 9.741068015372104e-06, "loss": 0.2828, "step": 27376 }, { "epoch": 0.5082744511237436, "grad_norm": 0.5438506007194519, "learning_rate": 9.739901917580298e-06, "loss": 0.3065, "step": 27378 }, { "epoch": 0.5083115812611623, "grad_norm": 0.30266237258911133, "learning_rate": 9.738735823327652e-06, "loss": 0.2322, "step": 27380 }, { "epoch": 0.5083487113985808, "grad_norm": 0.43106696009635925, "learning_rate": 9.737569732630023e-06, "loss": 0.2476, "step": 27382 }, { "epoch": 0.5083858415359995, "grad_norm": 0.36497265100479126, "learning_rate": 9.73640364550329e-06, "loss": 0.3701, "step": 27384 }, { "epoch": 0.5084229716734182, "grad_norm": 0.34190189838409424, "learning_rate": 9.735237561963307e-06, "loss": 0.3332, "step": 27386 }, { "epoch": 0.5084601018108368, "grad_norm": 0.37989184260368347, "learning_rate": 9.73407148202595e-06, "loss": 0.3233, "step": 27388 }, { "epoch": 0.5084972319482555, "grad_norm": 0.2973982095718384, "learning_rate": 9.732905405707082e-06, "loss": 0.1501, "step": 27390 }, { "epoch": 0.508534362085674, "grad_norm": 0.495017409324646, "learning_rate": 9.731739333022576e-06, "loss": 0.3307, "step": 27392 }, { "epoch": 0.5085714922230927, "grad_norm": 0.3756818473339081, "learning_rate": 9.730573263988293e-06, "loss": 0.2262, "step": 27394 }, { "epoch": 0.5086086223605114, "grad_norm": 0.2919599413871765, "learning_rate": 9.729407198620101e-06, "loss": 0.2101, "step": 27396 }, { "epoch": 0.50864575249793, "grad_norm": 0.3531649112701416, "learning_rate": 9.728241136933865e-06, "loss": 0.2044, "step": 27398 }, { "epoch": 0.5086828826353487, "grad_norm": 0.32702580094337463, "learning_rate": 9.727075078945451e-06, "loss": 0.2058, "step": 27400 }, { "epoch": 0.5087200127727672, "grad_norm": 0.42790696024894714, "learning_rate": 9.72590902467073e-06, "loss": 0.2116, "step": 27402 }, { "epoch": 0.5087571429101859, "grad_norm": 0.37379634380340576, "learning_rate": 9.724742974125567e-06, "loss": 0.2655, "step": 27404 }, { "epoch": 0.5087942730476045, "grad_norm": 0.3516804575920105, "learning_rate": 9.72357692732583e-06, "loss": 0.3382, "step": 27406 }, { "epoch": 0.5088314031850232, "grad_norm": 0.4283115863800049, "learning_rate": 9.722410884287378e-06, "loss": 0.1795, "step": 27408 }, { "epoch": 0.5088685333224419, "grad_norm": 0.5059515237808228, "learning_rate": 9.721244845026084e-06, "loss": 0.5063, "step": 27410 }, { "epoch": 0.5089056634598604, "grad_norm": 0.3887662887573242, "learning_rate": 9.720078809557813e-06, "loss": 0.3849, "step": 27412 }, { "epoch": 0.5089427935972791, "grad_norm": 0.2702990472316742, "learning_rate": 9.71891277789843e-06, "loss": 0.2979, "step": 27414 }, { "epoch": 0.5089799237346977, "grad_norm": 0.4402986466884613, "learning_rate": 9.717746750063803e-06, "loss": 0.317, "step": 27416 }, { "epoch": 0.5090170538721164, "grad_norm": 0.42464402318000793, "learning_rate": 9.716580726069801e-06, "loss": 0.3432, "step": 27418 }, { "epoch": 0.5090541840095351, "grad_norm": 0.5096556544303894, "learning_rate": 9.715414705932281e-06, "loss": 0.1925, "step": 27420 }, { "epoch": 0.5090913141469536, "grad_norm": 0.21243000030517578, "learning_rate": 9.714248689667115e-06, "loss": 0.1497, "step": 27422 }, { "epoch": 0.5091284442843723, "grad_norm": 0.27646562457084656, "learning_rate": 9.713082677290168e-06, "loss": 0.2693, "step": 27424 }, { "epoch": 0.5091655744217909, "grad_norm": 0.3234902322292328, "learning_rate": 9.71191666881731e-06, "loss": 0.1154, "step": 27426 }, { "epoch": 0.5092027045592096, "grad_norm": 0.46533554792404175, "learning_rate": 9.7107506642644e-06, "loss": 0.1844, "step": 27428 }, { "epoch": 0.5092398346966283, "grad_norm": 0.3183642625808716, "learning_rate": 9.709584663647306e-06, "loss": 0.1446, "step": 27430 }, { "epoch": 0.5092769648340468, "grad_norm": 0.42225557565689087, "learning_rate": 9.7084186669819e-06, "loss": 0.2338, "step": 27432 }, { "epoch": 0.5093140949714655, "grad_norm": 0.4061495363712311, "learning_rate": 9.70725267428404e-06, "loss": 0.248, "step": 27434 }, { "epoch": 0.5093512251088841, "grad_norm": 0.3309496343135834, "learning_rate": 9.706086685569594e-06, "loss": 0.3234, "step": 27436 }, { "epoch": 0.5093883552463028, "grad_norm": 0.4352647662162781, "learning_rate": 9.704920700854428e-06, "loss": 0.3005, "step": 27438 }, { "epoch": 0.5094254853837215, "grad_norm": 0.4708001911640167, "learning_rate": 9.703754720154406e-06, "loss": 0.2254, "step": 27440 }, { "epoch": 0.50946261552114, "grad_norm": 0.34142231941223145, "learning_rate": 9.702588743485394e-06, "loss": 0.3171, "step": 27442 }, { "epoch": 0.5094997456585587, "grad_norm": 0.43381214141845703, "learning_rate": 9.701422770863264e-06, "loss": 0.2035, "step": 27444 }, { "epoch": 0.5095368757959773, "grad_norm": 0.3174418807029724, "learning_rate": 9.70025680230387e-06, "loss": 0.2195, "step": 27446 }, { "epoch": 0.509574005933396, "grad_norm": 0.22408679127693176, "learning_rate": 9.699090837823088e-06, "loss": 0.1321, "step": 27448 }, { "epoch": 0.5096111360708147, "grad_norm": 0.3952559530735016, "learning_rate": 9.697924877436773e-06, "loss": 0.1671, "step": 27450 }, { "epoch": 0.5096482662082332, "grad_norm": 0.45117366313934326, "learning_rate": 9.696758921160797e-06, "loss": 0.3419, "step": 27452 }, { "epoch": 0.5096853963456519, "grad_norm": 0.27644842863082886, "learning_rate": 9.695592969011023e-06, "loss": 0.1969, "step": 27454 }, { "epoch": 0.5097225264830705, "grad_norm": 0.3339453637599945, "learning_rate": 9.694427021003322e-06, "loss": 0.4069, "step": 27456 }, { "epoch": 0.5097596566204892, "grad_norm": 0.36425602436065674, "learning_rate": 9.69326107715355e-06, "loss": 0.2779, "step": 27458 }, { "epoch": 0.5097967867579077, "grad_norm": 0.34550178050994873, "learning_rate": 9.692095137477579e-06, "loss": 0.2057, "step": 27460 }, { "epoch": 0.5098339168953264, "grad_norm": 0.3911181390285492, "learning_rate": 9.690929201991268e-06, "loss": 0.2168, "step": 27462 }, { "epoch": 0.5098710470327451, "grad_norm": 0.3915248513221741, "learning_rate": 9.689763270710484e-06, "loss": 0.3805, "step": 27464 }, { "epoch": 0.5099081771701637, "grad_norm": 0.3424127697944641, "learning_rate": 9.688597343651093e-06, "loss": 0.3677, "step": 27466 }, { "epoch": 0.5099453073075824, "grad_norm": 0.2904253304004669, "learning_rate": 9.687431420828963e-06, "loss": 0.285, "step": 27468 }, { "epoch": 0.5099824374450009, "grad_norm": 0.3368496894836426, "learning_rate": 9.686265502259953e-06, "loss": 0.2684, "step": 27470 }, { "epoch": 0.5100195675824196, "grad_norm": 0.2652243971824646, "learning_rate": 9.685099587959928e-06, "loss": 0.3147, "step": 27472 }, { "epoch": 0.5100566977198383, "grad_norm": 0.6277669072151184, "learning_rate": 9.683933677944755e-06, "loss": 0.1486, "step": 27474 }, { "epoch": 0.5100938278572569, "grad_norm": 0.2805199921131134, "learning_rate": 9.682767772230297e-06, "loss": 0.34, "step": 27476 }, { "epoch": 0.5101309579946756, "grad_norm": 0.3326171338558197, "learning_rate": 9.681601870832422e-06, "loss": 0.208, "step": 27478 }, { "epoch": 0.5101680881320941, "grad_norm": 0.2681579291820526, "learning_rate": 9.680435973766991e-06, "loss": 0.4093, "step": 27480 }, { "epoch": 0.5102052182695128, "grad_norm": 0.25577929615974426, "learning_rate": 9.679270081049872e-06, "loss": 0.1657, "step": 27482 }, { "epoch": 0.5102423484069315, "grad_norm": 0.24016334116458893, "learning_rate": 9.678104192696921e-06, "loss": 0.1867, "step": 27484 }, { "epoch": 0.5102794785443501, "grad_norm": 0.3700762987136841, "learning_rate": 9.67693830872401e-06, "loss": 0.1944, "step": 27486 }, { "epoch": 0.5103166086817688, "grad_norm": 0.32547080516815186, "learning_rate": 9.675772429147e-06, "loss": 0.419, "step": 27488 }, { "epoch": 0.5103537388191873, "grad_norm": 0.5849716067314148, "learning_rate": 9.674606553981759e-06, "loss": 0.3567, "step": 27490 }, { "epoch": 0.510390868956606, "grad_norm": 0.2290562093257904, "learning_rate": 9.673440683244144e-06, "loss": 0.2867, "step": 27492 }, { "epoch": 0.5104279990940247, "grad_norm": 0.343606173992157, "learning_rate": 9.67227481695003e-06, "loss": 0.3108, "step": 27494 }, { "epoch": 0.5104651292314433, "grad_norm": 0.5379561185836792, "learning_rate": 9.671108955115268e-06, "loss": 0.235, "step": 27496 }, { "epoch": 0.510502259368862, "grad_norm": 0.33252012729644775, "learning_rate": 9.669943097755728e-06, "loss": 0.511, "step": 27498 }, { "epoch": 0.5105393895062805, "grad_norm": 0.45400112867355347, "learning_rate": 9.668777244887276e-06, "loss": 0.172, "step": 27500 }, { "epoch": 0.5105765196436992, "grad_norm": 0.47489675879478455, "learning_rate": 9.667611396525773e-06, "loss": 0.2856, "step": 27502 }, { "epoch": 0.5106136497811179, "grad_norm": 0.44201552867889404, "learning_rate": 9.666445552687081e-06, "loss": 0.4246, "step": 27504 }, { "epoch": 0.5106507799185365, "grad_norm": 0.25042638182640076, "learning_rate": 9.665279713387072e-06, "loss": 0.2083, "step": 27506 }, { "epoch": 0.5106879100559552, "grad_norm": 0.3039798438549042, "learning_rate": 9.664113878641598e-06, "loss": 0.3318, "step": 27508 }, { "epoch": 0.5107250401933737, "grad_norm": 0.4846624732017517, "learning_rate": 9.662948048466529e-06, "loss": 0.3854, "step": 27510 }, { "epoch": 0.5107621703307924, "grad_norm": 0.1738000065088272, "learning_rate": 9.66178222287773e-06, "loss": 0.1097, "step": 27512 }, { "epoch": 0.510799300468211, "grad_norm": 0.26782912015914917, "learning_rate": 9.660616401891057e-06, "loss": 0.3062, "step": 27514 }, { "epoch": 0.5108364306056297, "grad_norm": 0.3808671236038208, "learning_rate": 9.659450585522382e-06, "loss": 0.2983, "step": 27516 }, { "epoch": 0.5108735607430483, "grad_norm": 0.3146984875202179, "learning_rate": 9.658284773787562e-06, "loss": 0.2566, "step": 27518 }, { "epoch": 0.5109106908804669, "grad_norm": 0.5071689486503601, "learning_rate": 9.657118966702467e-06, "loss": 0.3832, "step": 27520 }, { "epoch": 0.5109478210178856, "grad_norm": 0.37467461824417114, "learning_rate": 9.655953164282953e-06, "loss": 0.2528, "step": 27522 }, { "epoch": 0.5109849511553042, "grad_norm": 0.5074592232704163, "learning_rate": 9.654787366544887e-06, "loss": 0.1873, "step": 27524 }, { "epoch": 0.5110220812927229, "grad_norm": 0.34784483909606934, "learning_rate": 9.653621573504129e-06, "loss": 0.241, "step": 27526 }, { "epoch": 0.5110592114301415, "grad_norm": 0.4454055726528168, "learning_rate": 9.652455785176541e-06, "loss": 0.3974, "step": 27528 }, { "epoch": 0.5110963415675601, "grad_norm": 0.4718903601169586, "learning_rate": 9.651290001577995e-06, "loss": 0.2497, "step": 27530 }, { "epoch": 0.5111334717049788, "grad_norm": 0.33056846261024475, "learning_rate": 9.650124222724347e-06, "loss": 0.4463, "step": 27532 }, { "epoch": 0.5111706018423974, "grad_norm": 0.4470878839492798, "learning_rate": 9.648958448631458e-06, "loss": 0.2981, "step": 27534 }, { "epoch": 0.5112077319798161, "grad_norm": 0.6056501269340515, "learning_rate": 9.647792679315193e-06, "loss": 0.3201, "step": 27536 }, { "epoch": 0.5112448621172347, "grad_norm": 0.46294721961021423, "learning_rate": 9.646626914791413e-06, "loss": 0.3699, "step": 27538 }, { "epoch": 0.5112819922546533, "grad_norm": 0.3775184750556946, "learning_rate": 9.645461155075984e-06, "loss": 0.3386, "step": 27540 }, { "epoch": 0.511319122392072, "grad_norm": 0.23049336671829224, "learning_rate": 9.64429540018477e-06, "loss": 0.306, "step": 27542 }, { "epoch": 0.5113562525294906, "grad_norm": 0.4777885973453522, "learning_rate": 9.643129650133629e-06, "loss": 0.3088, "step": 27544 }, { "epoch": 0.5113933826669093, "grad_norm": 0.6331747770309448, "learning_rate": 9.641963904938422e-06, "loss": 0.1966, "step": 27546 }, { "epoch": 0.5114305128043279, "grad_norm": 0.3435724973678589, "learning_rate": 9.640798164615013e-06, "loss": 0.503, "step": 27548 }, { "epoch": 0.5114676429417465, "grad_norm": 0.3178273141384125, "learning_rate": 9.639632429179266e-06, "loss": 0.1723, "step": 27550 }, { "epoch": 0.5115047730791652, "grad_norm": 0.41977745294570923, "learning_rate": 9.638466698647044e-06, "loss": 0.213, "step": 27552 }, { "epoch": 0.5115419032165838, "grad_norm": 0.3878977298736572, "learning_rate": 9.637300973034204e-06, "loss": 0.272, "step": 27554 }, { "epoch": 0.5115790333540025, "grad_norm": 0.3197470009326935, "learning_rate": 9.636135252356614e-06, "loss": 0.3322, "step": 27556 }, { "epoch": 0.511616163491421, "grad_norm": 0.37615951895713806, "learning_rate": 9.634969536630135e-06, "loss": 0.1032, "step": 27558 }, { "epoch": 0.5116532936288397, "grad_norm": 0.3494502007961273, "learning_rate": 9.633803825870624e-06, "loss": 0.3145, "step": 27560 }, { "epoch": 0.5116904237662584, "grad_norm": 0.3623175024986267, "learning_rate": 9.632638120093946e-06, "loss": 0.2021, "step": 27562 }, { "epoch": 0.511727553903677, "grad_norm": 0.3752327561378479, "learning_rate": 9.631472419315965e-06, "loss": 0.2877, "step": 27564 }, { "epoch": 0.5117646840410957, "grad_norm": 0.36790701746940613, "learning_rate": 9.630306723552536e-06, "loss": 0.5292, "step": 27566 }, { "epoch": 0.5118018141785142, "grad_norm": 0.30650147795677185, "learning_rate": 9.62914103281953e-06, "loss": 0.2868, "step": 27568 }, { "epoch": 0.5118389443159329, "grad_norm": 0.2874961197376251, "learning_rate": 9.627975347132804e-06, "loss": 0.41, "step": 27570 }, { "epoch": 0.5118760744533516, "grad_norm": 0.5278917551040649, "learning_rate": 9.626809666508217e-06, "loss": 0.2459, "step": 27572 }, { "epoch": 0.5119132045907702, "grad_norm": 0.49385857582092285, "learning_rate": 9.625643990961633e-06, "loss": 0.1222, "step": 27574 }, { "epoch": 0.5119503347281888, "grad_norm": 0.2153642475605011, "learning_rate": 9.624478320508913e-06, "loss": 0.2939, "step": 27576 }, { "epoch": 0.5119874648656074, "grad_norm": 0.4303489923477173, "learning_rate": 9.623312655165916e-06, "loss": 0.4353, "step": 27578 }, { "epoch": 0.5120245950030261, "grad_norm": 0.35985898971557617, "learning_rate": 9.622146994948506e-06, "loss": 0.1548, "step": 27580 }, { "epoch": 0.5120617251404448, "grad_norm": 0.5189068913459778, "learning_rate": 9.620981339872549e-06, "loss": 0.2133, "step": 27582 }, { "epoch": 0.5120988552778634, "grad_norm": 0.4007475674152374, "learning_rate": 9.619815689953896e-06, "loss": 0.3123, "step": 27584 }, { "epoch": 0.512135985415282, "grad_norm": 0.3837631940841675, "learning_rate": 9.618650045208415e-06, "loss": 0.1508, "step": 27586 }, { "epoch": 0.5121731155527006, "grad_norm": 0.43478071689605713, "learning_rate": 9.617484405651961e-06, "loss": 0.2845, "step": 27588 }, { "epoch": 0.5122102456901193, "grad_norm": 0.437438428401947, "learning_rate": 9.6163187713004e-06, "loss": 0.3461, "step": 27590 }, { "epoch": 0.512247375827538, "grad_norm": 0.3370918333530426, "learning_rate": 9.615153142169592e-06, "loss": 0.3641, "step": 27592 }, { "epoch": 0.5122845059649566, "grad_norm": 0.32581931352615356, "learning_rate": 9.613987518275399e-06, "loss": 0.2029, "step": 27594 }, { "epoch": 0.5123216361023752, "grad_norm": 1.029665231704712, "learning_rate": 9.612821899633676e-06, "loss": 0.1393, "step": 27596 }, { "epoch": 0.5123587662397938, "grad_norm": 0.4986780285835266, "learning_rate": 9.61165628626029e-06, "loss": 0.363, "step": 27598 }, { "epoch": 0.5123958963772125, "grad_norm": 0.35451340675354004, "learning_rate": 9.610490678171096e-06, "loss": 0.3287, "step": 27600 }, { "epoch": 0.5124330265146312, "grad_norm": 0.46983006596565247, "learning_rate": 9.609325075381958e-06, "loss": 0.359, "step": 27602 }, { "epoch": 0.5124701566520498, "grad_norm": 0.38885220885276794, "learning_rate": 9.608159477908737e-06, "loss": 0.3191, "step": 27604 }, { "epoch": 0.5125072867894684, "grad_norm": 0.29006606340408325, "learning_rate": 9.60699388576729e-06, "loss": 0.2564, "step": 27606 }, { "epoch": 0.512544416926887, "grad_norm": 0.4570528268814087, "learning_rate": 9.605828298973483e-06, "loss": 0.3213, "step": 27608 }, { "epoch": 0.5125815470643057, "grad_norm": 0.38161566853523254, "learning_rate": 9.604662717543169e-06, "loss": 0.131, "step": 27610 }, { "epoch": 0.5126186772017243, "grad_norm": 0.49640652537345886, "learning_rate": 9.60349714149221e-06, "loss": 0.3664, "step": 27612 }, { "epoch": 0.512655807339143, "grad_norm": 0.3071663975715637, "learning_rate": 9.602331570836467e-06, "loss": 0.2587, "step": 27614 }, { "epoch": 0.5126929374765616, "grad_norm": 0.34845954179763794, "learning_rate": 9.601166005591802e-06, "loss": 0.2551, "step": 27616 }, { "epoch": 0.5127300676139802, "grad_norm": 0.3759823441505432, "learning_rate": 9.600000445774073e-06, "loss": 0.1983, "step": 27618 }, { "epoch": 0.5127671977513989, "grad_norm": 0.3184506595134735, "learning_rate": 9.598834891399142e-06, "loss": 0.227, "step": 27620 }, { "epoch": 0.5128043278888175, "grad_norm": 0.3116915225982666, "learning_rate": 9.597669342482863e-06, "loss": 0.2744, "step": 27622 }, { "epoch": 0.5128414580262362, "grad_norm": 0.35930871963500977, "learning_rate": 9.5965037990411e-06, "loss": 0.2065, "step": 27624 }, { "epoch": 0.5128785881636548, "grad_norm": 0.2619815170764923, "learning_rate": 9.59533826108971e-06, "loss": 0.3548, "step": 27626 }, { "epoch": 0.5129157183010734, "grad_norm": 0.413163959980011, "learning_rate": 9.594172728644557e-06, "loss": 0.3184, "step": 27628 }, { "epoch": 0.5129528484384921, "grad_norm": 0.4463177025318146, "learning_rate": 9.593007201721496e-06, "loss": 0.1993, "step": 27630 }, { "epoch": 0.5129899785759107, "grad_norm": 0.5362995266914368, "learning_rate": 9.591841680336391e-06, "loss": 0.3493, "step": 27632 }, { "epoch": 0.5130271087133293, "grad_norm": 0.30266591906547546, "learning_rate": 9.590676164505095e-06, "loss": 0.1993, "step": 27634 }, { "epoch": 0.513064238850748, "grad_norm": 0.4228737950325012, "learning_rate": 9.58951065424347e-06, "loss": 0.3133, "step": 27636 }, { "epoch": 0.5131013689881666, "grad_norm": 0.3493140637874603, "learning_rate": 9.588345149567378e-06, "loss": 0.323, "step": 27638 }, { "epoch": 0.5131384991255853, "grad_norm": 0.5333438515663147, "learning_rate": 9.587179650492672e-06, "loss": 0.3137, "step": 27640 }, { "epoch": 0.5131756292630039, "grad_norm": 0.3421265780925751, "learning_rate": 9.586014157035215e-06, "loss": 0.3101, "step": 27642 }, { "epoch": 0.5132127594004225, "grad_norm": 0.49278348684310913, "learning_rate": 9.584848669210865e-06, "loss": 0.1394, "step": 27644 }, { "epoch": 0.5132498895378412, "grad_norm": 0.4265742897987366, "learning_rate": 9.583683187035486e-06, "loss": 0.2435, "step": 27646 }, { "epoch": 0.5132870196752598, "grad_norm": 0.4515606462955475, "learning_rate": 9.582517710524928e-06, "loss": 0.2817, "step": 27648 }, { "epoch": 0.5133241498126785, "grad_norm": 0.2932735085487366, "learning_rate": 9.581352239695055e-06, "loss": 0.2149, "step": 27650 }, { "epoch": 0.5133612799500971, "grad_norm": 0.32840389013290405, "learning_rate": 9.580186774561722e-06, "loss": 0.243, "step": 27652 }, { "epoch": 0.5133984100875157, "grad_norm": 0.4383675158023834, "learning_rate": 9.57902131514079e-06, "loss": 0.221, "step": 27654 }, { "epoch": 0.5134355402249343, "grad_norm": 0.35928624868392944, "learning_rate": 9.577855861448115e-06, "loss": 0.201, "step": 27656 }, { "epoch": 0.513472670362353, "grad_norm": 0.49422237277030945, "learning_rate": 9.576690413499564e-06, "loss": 0.2742, "step": 27658 }, { "epoch": 0.5135098004997717, "grad_norm": 0.36891844868659973, "learning_rate": 9.575524971310986e-06, "loss": 0.4366, "step": 27660 }, { "epoch": 0.5135469306371903, "grad_norm": 0.29024386405944824, "learning_rate": 9.574359534898237e-06, "loss": 0.4735, "step": 27662 }, { "epoch": 0.5135840607746089, "grad_norm": 0.35033005475997925, "learning_rate": 9.573194104277183e-06, "loss": 0.266, "step": 27664 }, { "epoch": 0.5136211909120275, "grad_norm": 0.2778182029724121, "learning_rate": 9.572028679463676e-06, "loss": 0.2173, "step": 27666 }, { "epoch": 0.5136583210494462, "grad_norm": 0.34723880887031555, "learning_rate": 9.57086326047358e-06, "loss": 0.318, "step": 27668 }, { "epoch": 0.5136954511868649, "grad_norm": 0.5657864809036255, "learning_rate": 9.56969784732275e-06, "loss": 0.2148, "step": 27670 }, { "epoch": 0.5137325813242835, "grad_norm": 0.4703945517539978, "learning_rate": 9.568532440027044e-06, "loss": 0.3285, "step": 27672 }, { "epoch": 0.5137697114617021, "grad_norm": 0.23526544868946075, "learning_rate": 9.567367038602316e-06, "loss": 0.225, "step": 27674 }, { "epoch": 0.5138068415991207, "grad_norm": 0.40574878454208374, "learning_rate": 9.566201643064428e-06, "loss": 0.3226, "step": 27676 }, { "epoch": 0.5138439717365394, "grad_norm": 0.3196861147880554, "learning_rate": 9.565036253429235e-06, "loss": 0.2961, "step": 27678 }, { "epoch": 0.5138811018739581, "grad_norm": 0.6547278761863708, "learning_rate": 9.563870869712598e-06, "loss": 0.3287, "step": 27680 }, { "epoch": 0.5139182320113767, "grad_norm": 0.3820594847202301, "learning_rate": 9.56270549193037e-06, "loss": 0.2355, "step": 27682 }, { "epoch": 0.5139553621487953, "grad_norm": 0.34088024497032166, "learning_rate": 9.561540120098416e-06, "loss": 0.2137, "step": 27684 }, { "epoch": 0.5139924922862139, "grad_norm": 0.39357149600982666, "learning_rate": 9.560374754232581e-06, "loss": 0.1701, "step": 27686 }, { "epoch": 0.5140296224236326, "grad_norm": 0.47195547819137573, "learning_rate": 9.55920939434873e-06, "loss": 0.3294, "step": 27688 }, { "epoch": 0.5140667525610513, "grad_norm": 0.47095268964767456, "learning_rate": 9.558044040462721e-06, "loss": 0.2283, "step": 27690 }, { "epoch": 0.5141038826984698, "grad_norm": 0.3605042099952698, "learning_rate": 9.556878692590406e-06, "loss": 0.3657, "step": 27692 }, { "epoch": 0.5141410128358885, "grad_norm": 0.2524566352367401, "learning_rate": 9.555713350747646e-06, "loss": 0.3753, "step": 27694 }, { "epoch": 0.5141781429733071, "grad_norm": 0.5168209075927734, "learning_rate": 9.554548014950299e-06, "loss": 0.1736, "step": 27696 }, { "epoch": 0.5142152731107258, "grad_norm": 0.6899694204330444, "learning_rate": 9.553382685214216e-06, "loss": 0.2238, "step": 27698 }, { "epoch": 0.5142524032481445, "grad_norm": 0.25912654399871826, "learning_rate": 9.552217361555258e-06, "loss": 0.3107, "step": 27700 }, { "epoch": 0.514289533385563, "grad_norm": 0.35134774446487427, "learning_rate": 9.55105204398928e-06, "loss": 0.3746, "step": 27702 }, { "epoch": 0.5143266635229817, "grad_norm": 0.47562161087989807, "learning_rate": 9.54988673253214e-06, "loss": 0.1151, "step": 27704 }, { "epoch": 0.5143637936604003, "grad_norm": 0.4155772924423218, "learning_rate": 9.54872142719969e-06, "loss": 0.2649, "step": 27706 }, { "epoch": 0.514400923797819, "grad_norm": 0.4811541438102722, "learning_rate": 9.547556128007796e-06, "loss": 0.3068, "step": 27708 }, { "epoch": 0.5144380539352376, "grad_norm": 0.5299921035766602, "learning_rate": 9.546390834972303e-06, "loss": 0.1566, "step": 27710 }, { "epoch": 0.5144751840726562, "grad_norm": 0.4587494730949402, "learning_rate": 9.545225548109074e-06, "loss": 0.339, "step": 27712 }, { "epoch": 0.5145123142100749, "grad_norm": 0.2998519837856293, "learning_rate": 9.54406026743396e-06, "loss": 0.1944, "step": 27714 }, { "epoch": 0.5145494443474935, "grad_norm": 0.5208114385604858, "learning_rate": 9.542894992962821e-06, "loss": 0.5225, "step": 27716 }, { "epoch": 0.5145865744849122, "grad_norm": 0.33112192153930664, "learning_rate": 9.541729724711513e-06, "loss": 0.3084, "step": 27718 }, { "epoch": 0.5146237046223308, "grad_norm": 0.4124448895454407, "learning_rate": 9.54056446269589e-06, "loss": 0.2268, "step": 27720 }, { "epoch": 0.5146608347597494, "grad_norm": 0.7226142883300781, "learning_rate": 9.539399206931812e-06, "loss": 0.3854, "step": 27722 }, { "epoch": 0.5146979648971681, "grad_norm": 0.34287604689598083, "learning_rate": 9.53823395743513e-06, "loss": 0.3085, "step": 27724 }, { "epoch": 0.5147350950345867, "grad_norm": 0.3313058316707611, "learning_rate": 9.537068714221698e-06, "loss": 0.2894, "step": 27726 }, { "epoch": 0.5147722251720054, "grad_norm": 0.39880281686782837, "learning_rate": 9.535903477307375e-06, "loss": 0.3512, "step": 27728 }, { "epoch": 0.514809355309424, "grad_norm": 0.35275503993034363, "learning_rate": 9.534738246708013e-06, "loss": 0.2703, "step": 27730 }, { "epoch": 0.5148464854468426, "grad_norm": 0.4860374331474304, "learning_rate": 9.533573022439476e-06, "loss": 0.222, "step": 27732 }, { "epoch": 0.5148836155842613, "grad_norm": 0.5078479647636414, "learning_rate": 9.53240780451761e-06, "loss": 0.2027, "step": 27734 }, { "epoch": 0.5149207457216799, "grad_norm": 0.25618186593055725, "learning_rate": 9.531242592958274e-06, "loss": 0.1937, "step": 27736 }, { "epoch": 0.5149578758590986, "grad_norm": 0.3207758963108063, "learning_rate": 9.53007738777732e-06, "loss": 0.2233, "step": 27738 }, { "epoch": 0.5149950059965172, "grad_norm": 0.2983584403991699, "learning_rate": 9.528912188990605e-06, "loss": 0.3669, "step": 27740 }, { "epoch": 0.5150321361339358, "grad_norm": 0.4820159375667572, "learning_rate": 9.527746996613985e-06, "loss": 0.3109, "step": 27742 }, { "epoch": 0.5150692662713545, "grad_norm": 0.3552098870277405, "learning_rate": 9.526581810663315e-06, "loss": 0.2484, "step": 27744 }, { "epoch": 0.5151063964087731, "grad_norm": 0.45764485001564026, "learning_rate": 9.525416631154452e-06, "loss": 0.3487, "step": 27746 }, { "epoch": 0.5151435265461918, "grad_norm": 0.41365164518356323, "learning_rate": 9.524251458103242e-06, "loss": 0.2458, "step": 27748 }, { "epoch": 0.5151806566836103, "grad_norm": 0.3427340090274811, "learning_rate": 9.523086291525544e-06, "loss": 0.1638, "step": 27750 }, { "epoch": 0.515217786821029, "grad_norm": 0.45375701785087585, "learning_rate": 9.521921131437213e-06, "loss": 0.4803, "step": 27752 }, { "epoch": 0.5152549169584477, "grad_norm": 0.6443138718605042, "learning_rate": 9.520755977854107e-06, "loss": 0.3183, "step": 27754 }, { "epoch": 0.5152920470958663, "grad_norm": 0.33075985312461853, "learning_rate": 9.519590830792072e-06, "loss": 0.4141, "step": 27756 }, { "epoch": 0.515329177233285, "grad_norm": 0.3032643496990204, "learning_rate": 9.518425690266973e-06, "loss": 0.1921, "step": 27758 }, { "epoch": 0.5153663073707035, "grad_norm": 0.29809653759002686, "learning_rate": 9.517260556294655e-06, "loss": 0.3433, "step": 27760 }, { "epoch": 0.5154034375081222, "grad_norm": 0.8759490251541138, "learning_rate": 9.516095428890972e-06, "loss": 0.1899, "step": 27762 }, { "epoch": 0.5154405676455408, "grad_norm": 0.42079952359199524, "learning_rate": 9.514930308071781e-06, "loss": 0.2934, "step": 27764 }, { "epoch": 0.5154776977829595, "grad_norm": 0.4169912338256836, "learning_rate": 9.513765193852939e-06, "loss": 0.1118, "step": 27766 }, { "epoch": 0.5155148279203782, "grad_norm": 0.3454352915287018, "learning_rate": 9.512600086250295e-06, "loss": 0.375, "step": 27768 }, { "epoch": 0.5155519580577967, "grad_norm": 0.43056464195251465, "learning_rate": 9.511434985279702e-06, "loss": 0.3636, "step": 27770 }, { "epoch": 0.5155890881952154, "grad_norm": 0.3244856297969818, "learning_rate": 9.51026989095702e-06, "loss": 0.3136, "step": 27772 }, { "epoch": 0.515626218332634, "grad_norm": 0.3567945659160614, "learning_rate": 9.509104803298095e-06, "loss": 0.2152, "step": 27774 }, { "epoch": 0.5156633484700527, "grad_norm": 0.4030345678329468, "learning_rate": 9.507939722318783e-06, "loss": 0.2069, "step": 27776 }, { "epoch": 0.5157004786074714, "grad_norm": 0.3154042065143585, "learning_rate": 9.506774648034937e-06, "loss": 0.2484, "step": 27778 }, { "epoch": 0.5157376087448899, "grad_norm": 0.3310115337371826, "learning_rate": 9.50560958046241e-06, "loss": 0.2657, "step": 27780 }, { "epoch": 0.5157747388823086, "grad_norm": 0.3910242021083832, "learning_rate": 9.504444519617056e-06, "loss": 0.2425, "step": 27782 }, { "epoch": 0.5158118690197272, "grad_norm": 0.47065800428390503, "learning_rate": 9.503279465514732e-06, "loss": 0.2677, "step": 27784 }, { "epoch": 0.5158489991571459, "grad_norm": 0.49375393986701965, "learning_rate": 9.502114418171282e-06, "loss": 0.2887, "step": 27786 }, { "epoch": 0.5158861292945646, "grad_norm": 0.302882581949234, "learning_rate": 9.500949377602565e-06, "loss": 0.3542, "step": 27788 }, { "epoch": 0.5159232594319831, "grad_norm": 0.28566449880599976, "learning_rate": 9.499784343824432e-06, "loss": 0.2946, "step": 27790 }, { "epoch": 0.5159603895694018, "grad_norm": 0.31632503867149353, "learning_rate": 9.498619316852735e-06, "loss": 0.4188, "step": 27792 }, { "epoch": 0.5159975197068204, "grad_norm": 0.2564805746078491, "learning_rate": 9.497454296703325e-06, "loss": 0.1568, "step": 27794 }, { "epoch": 0.5160346498442391, "grad_norm": 0.4565523564815521, "learning_rate": 9.496289283392065e-06, "loss": 0.3079, "step": 27796 }, { "epoch": 0.5160717799816578, "grad_norm": 0.38501212000846863, "learning_rate": 9.495124276934794e-06, "loss": 0.1923, "step": 27798 }, { "epoch": 0.5161089101190763, "grad_norm": 0.4378299117088318, "learning_rate": 9.493959277347368e-06, "loss": 0.1228, "step": 27800 }, { "epoch": 0.516146040256495, "grad_norm": 0.5729774236679077, "learning_rate": 9.49279428464564e-06, "loss": 0.4596, "step": 27802 }, { "epoch": 0.5161831703939136, "grad_norm": 0.3421713709831238, "learning_rate": 9.491629298845463e-06, "loss": 0.2026, "step": 27804 }, { "epoch": 0.5162203005313323, "grad_norm": 0.36979350447654724, "learning_rate": 9.490464319962691e-06, "loss": 0.2302, "step": 27806 }, { "epoch": 0.5162574306687508, "grad_norm": 0.35657092928886414, "learning_rate": 9.489299348013171e-06, "loss": 0.2773, "step": 27808 }, { "epoch": 0.5162945608061695, "grad_norm": 0.5680682063102722, "learning_rate": 9.488134383012762e-06, "loss": 0.3563, "step": 27810 }, { "epoch": 0.5163316909435882, "grad_norm": 0.42965248227119446, "learning_rate": 9.486969424977305e-06, "loss": 0.2811, "step": 27812 }, { "epoch": 0.5163688210810068, "grad_norm": 0.35181960463523865, "learning_rate": 9.485804473922658e-06, "loss": 0.3888, "step": 27814 }, { "epoch": 0.5164059512184255, "grad_norm": 0.3829491436481476, "learning_rate": 9.484639529864673e-06, "loss": 0.4518, "step": 27816 }, { "epoch": 0.516443081355844, "grad_norm": 0.31823301315307617, "learning_rate": 9.483474592819202e-06, "loss": 0.3236, "step": 27818 }, { "epoch": 0.5164802114932627, "grad_norm": 0.44607260823249817, "learning_rate": 9.482309662802092e-06, "loss": 0.3969, "step": 27820 }, { "epoch": 0.5165173416306814, "grad_norm": 0.27846381068229675, "learning_rate": 9.481144739829202e-06, "loss": 0.2398, "step": 27822 }, { "epoch": 0.5165544717681, "grad_norm": 0.5593746304512024, "learning_rate": 9.479979823916373e-06, "loss": 0.279, "step": 27824 }, { "epoch": 0.5165916019055187, "grad_norm": 0.4391820728778839, "learning_rate": 9.478814915079459e-06, "loss": 0.2281, "step": 27826 }, { "epoch": 0.5166287320429372, "grad_norm": 0.5220600962638855, "learning_rate": 9.477650013334317e-06, "loss": 0.3077, "step": 27828 }, { "epoch": 0.5166658621803559, "grad_norm": 0.4352561831474304, "learning_rate": 9.476485118696792e-06, "loss": 0.3293, "step": 27830 }, { "epoch": 0.5167029923177746, "grad_norm": 0.5185238718986511, "learning_rate": 9.475320231182735e-06, "loss": 0.3076, "step": 27832 }, { "epoch": 0.5167401224551932, "grad_norm": 0.32103052735328674, "learning_rate": 9.474155350808004e-06, "loss": 0.3127, "step": 27834 }, { "epoch": 0.5167772525926119, "grad_norm": 0.36642470955848694, "learning_rate": 9.472990477588438e-06, "loss": 0.287, "step": 27836 }, { "epoch": 0.5168143827300304, "grad_norm": 0.4976699948310852, "learning_rate": 9.471825611539895e-06, "loss": 0.4656, "step": 27838 }, { "epoch": 0.5168515128674491, "grad_norm": 0.40556374192237854, "learning_rate": 9.470660752678222e-06, "loss": 0.112, "step": 27840 }, { "epoch": 0.5168886430048678, "grad_norm": 0.3443363904953003, "learning_rate": 9.46949590101927e-06, "loss": 0.2885, "step": 27842 }, { "epoch": 0.5169257731422864, "grad_norm": 0.336807519197464, "learning_rate": 9.468331056578892e-06, "loss": 0.2502, "step": 27844 }, { "epoch": 0.5169629032797051, "grad_norm": 0.45986685156822205, "learning_rate": 9.467166219372934e-06, "loss": 0.1523, "step": 27846 }, { "epoch": 0.5170000334171236, "grad_norm": 0.45763063430786133, "learning_rate": 9.466001389417251e-06, "loss": 0.2274, "step": 27848 }, { "epoch": 0.5170371635545423, "grad_norm": 0.45369982719421387, "learning_rate": 9.464836566727686e-06, "loss": 0.2806, "step": 27850 }, { "epoch": 0.517074293691961, "grad_norm": 0.3789762556552887, "learning_rate": 9.463671751320096e-06, "loss": 0.283, "step": 27852 }, { "epoch": 0.5171114238293796, "grad_norm": 0.5189527869224548, "learning_rate": 9.462506943210324e-06, "loss": 0.3772, "step": 27854 }, { "epoch": 0.5171485539667983, "grad_norm": 0.4830610156059265, "learning_rate": 9.461342142414221e-06, "loss": 0.1923, "step": 27856 }, { "epoch": 0.5171856841042168, "grad_norm": 0.3147564232349396, "learning_rate": 9.460177348947642e-06, "loss": 0.3133, "step": 27858 }, { "epoch": 0.5172228142416355, "grad_norm": 0.2779880464076996, "learning_rate": 9.459012562826432e-06, "loss": 0.2671, "step": 27860 }, { "epoch": 0.5172599443790541, "grad_norm": 0.5062434673309326, "learning_rate": 9.457847784066439e-06, "loss": 0.1997, "step": 27862 }, { "epoch": 0.5172970745164728, "grad_norm": 0.27005279064178467, "learning_rate": 9.456683012683514e-06, "loss": 0.3134, "step": 27864 }, { "epoch": 0.5173342046538915, "grad_norm": 0.3421546518802643, "learning_rate": 9.455518248693504e-06, "loss": 0.2436, "step": 27866 }, { "epoch": 0.51737133479131, "grad_norm": 0.4909321963787079, "learning_rate": 9.45435349211226e-06, "loss": 0.2767, "step": 27868 }, { "epoch": 0.5174084649287287, "grad_norm": 0.37666478753089905, "learning_rate": 9.453188742955634e-06, "loss": 0.279, "step": 27870 }, { "epoch": 0.5174455950661473, "grad_norm": 0.3315078616142273, "learning_rate": 9.45202400123947e-06, "loss": 0.3265, "step": 27872 }, { "epoch": 0.517482725203566, "grad_norm": 0.17898738384246826, "learning_rate": 9.450859266979617e-06, "loss": 0.1656, "step": 27874 }, { "epoch": 0.5175198553409847, "grad_norm": 0.49123722314834595, "learning_rate": 9.449694540191925e-06, "loss": 0.432, "step": 27876 }, { "epoch": 0.5175569854784032, "grad_norm": 0.38956162333488464, "learning_rate": 9.44852982089224e-06, "loss": 0.375, "step": 27878 }, { "epoch": 0.5175941156158219, "grad_norm": 0.37408027052879333, "learning_rate": 9.447365109096412e-06, "loss": 0.3232, "step": 27880 }, { "epoch": 0.5176312457532405, "grad_norm": 0.2778705358505249, "learning_rate": 9.446200404820294e-06, "loss": 0.4696, "step": 27882 }, { "epoch": 0.5176683758906592, "grad_norm": 0.32104092836380005, "learning_rate": 9.44503570807973e-06, "loss": 0.3303, "step": 27884 }, { "epoch": 0.5177055060280779, "grad_norm": 0.44376298785209656, "learning_rate": 9.443871018890563e-06, "loss": 0.3465, "step": 27886 }, { "epoch": 0.5177426361654964, "grad_norm": 0.3576256334781647, "learning_rate": 9.442706337268646e-06, "loss": 0.2748, "step": 27888 }, { "epoch": 0.5177797663029151, "grad_norm": 0.36411380767822266, "learning_rate": 9.441541663229828e-06, "loss": 0.3017, "step": 27890 }, { "epoch": 0.5178168964403337, "grad_norm": 0.38967758417129517, "learning_rate": 9.440376996789956e-06, "loss": 0.6552, "step": 27892 }, { "epoch": 0.5178540265777524, "grad_norm": 0.3927607834339142, "learning_rate": 9.439212337964874e-06, "loss": 0.3014, "step": 27894 }, { "epoch": 0.517891156715171, "grad_norm": 0.41854771971702576, "learning_rate": 9.438047686770435e-06, "loss": 0.5006, "step": 27896 }, { "epoch": 0.5179282868525896, "grad_norm": 0.36931127309799194, "learning_rate": 9.436883043222485e-06, "loss": 0.2831, "step": 27898 }, { "epoch": 0.5179654169900083, "grad_norm": 0.5507428050041199, "learning_rate": 9.435718407336866e-06, "loss": 0.2454, "step": 27900 }, { "epoch": 0.5180025471274269, "grad_norm": 0.3885299563407898, "learning_rate": 9.434553779129432e-06, "loss": 0.4107, "step": 27902 }, { "epoch": 0.5180396772648456, "grad_norm": 0.26246514916419983, "learning_rate": 9.433389158616027e-06, "loss": 0.2621, "step": 27904 }, { "epoch": 0.5180768074022642, "grad_norm": 0.3750298321247101, "learning_rate": 9.432224545812497e-06, "loss": 0.2546, "step": 27906 }, { "epoch": 0.5181139375396828, "grad_norm": 0.5171964764595032, "learning_rate": 9.431059940734691e-06, "loss": 0.2664, "step": 27908 }, { "epoch": 0.5181510676771015, "grad_norm": 0.28415557742118835, "learning_rate": 9.429895343398459e-06, "loss": 0.318, "step": 27910 }, { "epoch": 0.5181881978145201, "grad_norm": 0.26206061244010925, "learning_rate": 9.428730753819638e-06, "loss": 0.4525, "step": 27912 }, { "epoch": 0.5182253279519388, "grad_norm": 0.4848164916038513, "learning_rate": 9.427566172014083e-06, "loss": 0.2962, "step": 27914 }, { "epoch": 0.5182624580893573, "grad_norm": 0.46354538202285767, "learning_rate": 9.426401597997637e-06, "loss": 0.3552, "step": 27916 }, { "epoch": 0.518299588226776, "grad_norm": 0.5734918713569641, "learning_rate": 9.425237031786145e-06, "loss": 0.5306, "step": 27918 }, { "epoch": 0.5183367183641947, "grad_norm": 0.49697911739349365, "learning_rate": 9.424072473395457e-06, "loss": 0.3912, "step": 27920 }, { "epoch": 0.5183738485016133, "grad_norm": 0.3033851087093353, "learning_rate": 9.42290792284142e-06, "loss": 0.3631, "step": 27922 }, { "epoch": 0.518410978639032, "grad_norm": 0.3845214545726776, "learning_rate": 9.421743380139877e-06, "loss": 0.4111, "step": 27924 }, { "epoch": 0.5184481087764505, "grad_norm": 0.4567599296569824, "learning_rate": 9.420578845306675e-06, "loss": 0.5958, "step": 27926 }, { "epoch": 0.5184852389138692, "grad_norm": 0.34994688630104065, "learning_rate": 9.419414318357655e-06, "loss": 0.3037, "step": 27928 }, { "epoch": 0.5185223690512879, "grad_norm": 0.2931062877178192, "learning_rate": 9.418249799308668e-06, "loss": 0.2392, "step": 27930 }, { "epoch": 0.5185594991887065, "grad_norm": 0.44967758655548096, "learning_rate": 9.41708528817556e-06, "loss": 0.1492, "step": 27932 }, { "epoch": 0.5185966293261252, "grad_norm": 0.3435254693031311, "learning_rate": 9.415920784974176e-06, "loss": 0.296, "step": 27934 }, { "epoch": 0.5186337594635437, "grad_norm": 0.5711683630943298, "learning_rate": 9.414756289720364e-06, "loss": 0.257, "step": 27936 }, { "epoch": 0.5186708896009624, "grad_norm": 0.4668540060520172, "learning_rate": 9.41359180242996e-06, "loss": 0.3236, "step": 27938 }, { "epoch": 0.5187080197383811, "grad_norm": 0.3586547374725342, "learning_rate": 9.412427323118818e-06, "loss": 0.2377, "step": 27940 }, { "epoch": 0.5187451498757997, "grad_norm": 0.26479896903038025, "learning_rate": 9.411262851802776e-06, "loss": 0.1784, "step": 27942 }, { "epoch": 0.5187822800132184, "grad_norm": 0.37328359484672546, "learning_rate": 9.410098388497688e-06, "loss": 0.1335, "step": 27944 }, { "epoch": 0.5188194101506369, "grad_norm": 0.2846425771713257, "learning_rate": 9.40893393321939e-06, "loss": 0.3318, "step": 27946 }, { "epoch": 0.5188565402880556, "grad_norm": 0.45188888907432556, "learning_rate": 9.407769485983737e-06, "loss": 0.2089, "step": 27948 }, { "epoch": 0.5188936704254743, "grad_norm": 0.3138563632965088, "learning_rate": 9.406605046806562e-06, "loss": 0.3846, "step": 27950 }, { "epoch": 0.5189308005628929, "grad_norm": 0.31780603528022766, "learning_rate": 9.405440615703715e-06, "loss": 0.3695, "step": 27952 }, { "epoch": 0.5189679307003116, "grad_norm": 0.28879013657569885, "learning_rate": 9.404276192691042e-06, "loss": 0.4848, "step": 27954 }, { "epoch": 0.5190050608377301, "grad_norm": 0.4851306974887848, "learning_rate": 9.403111777784387e-06, "loss": 0.118, "step": 27956 }, { "epoch": 0.5190421909751488, "grad_norm": 0.4309792220592499, "learning_rate": 9.40194737099959e-06, "loss": 0.2774, "step": 27958 }, { "epoch": 0.5190793211125674, "grad_norm": 0.31807616353034973, "learning_rate": 9.4007829723525e-06, "loss": 0.1428, "step": 27960 }, { "epoch": 0.5191164512499861, "grad_norm": 0.44116929173469543, "learning_rate": 9.399618581858958e-06, "loss": 0.2778, "step": 27962 }, { "epoch": 0.5191535813874048, "grad_norm": 0.47313371300697327, "learning_rate": 9.398454199534807e-06, "loss": 0.5085, "step": 27964 }, { "epoch": 0.5191907115248233, "grad_norm": 0.3320086896419525, "learning_rate": 9.397289825395896e-06, "loss": 0.3695, "step": 27966 }, { "epoch": 0.519227841662242, "grad_norm": 0.3190910816192627, "learning_rate": 9.396125459458062e-06, "loss": 0.2845, "step": 27968 }, { "epoch": 0.5192649717996606, "grad_norm": 2.3000662326812744, "learning_rate": 9.394961101737152e-06, "loss": 0.4036, "step": 27970 }, { "epoch": 0.5193021019370793, "grad_norm": 0.4869636297225952, "learning_rate": 9.393796752249009e-06, "loss": 0.2669, "step": 27972 }, { "epoch": 0.519339232074498, "grad_norm": 0.25937286019325256, "learning_rate": 9.39263241100948e-06, "loss": 0.4043, "step": 27974 }, { "epoch": 0.5193763622119165, "grad_norm": 0.21920296549797058, "learning_rate": 9.3914680780344e-06, "loss": 0.2946, "step": 27976 }, { "epoch": 0.5194134923493352, "grad_norm": 0.29129552841186523, "learning_rate": 9.39030375333962e-06, "loss": 0.2387, "step": 27978 }, { "epoch": 0.5194506224867538, "grad_norm": 0.5493195056915283, "learning_rate": 9.389139436940978e-06, "loss": 0.3199, "step": 27980 }, { "epoch": 0.5194877526241725, "grad_norm": 0.5561234951019287, "learning_rate": 9.387975128854317e-06, "loss": 0.3273, "step": 27982 }, { "epoch": 0.5195248827615911, "grad_norm": 0.3792039453983307, "learning_rate": 9.386810829095483e-06, "loss": 0.2395, "step": 27984 }, { "epoch": 0.5195620128990097, "grad_norm": 0.3583786189556122, "learning_rate": 9.385646537680321e-06, "loss": 0.2593, "step": 27986 }, { "epoch": 0.5195991430364284, "grad_norm": 0.3538975715637207, "learning_rate": 9.384482254624664e-06, "loss": 0.2736, "step": 27988 }, { "epoch": 0.519636273173847, "grad_norm": 0.38106194138526917, "learning_rate": 9.383317979944362e-06, "loss": 0.2387, "step": 27990 }, { "epoch": 0.5196734033112657, "grad_norm": 0.2817035913467407, "learning_rate": 9.382153713655253e-06, "loss": 0.3735, "step": 27992 }, { "epoch": 0.5197105334486843, "grad_norm": 0.4266744554042816, "learning_rate": 9.380989455773181e-06, "loss": 0.2872, "step": 27994 }, { "epoch": 0.5197476635861029, "grad_norm": 0.42260074615478516, "learning_rate": 9.379825206313993e-06, "loss": 0.2972, "step": 27996 }, { "epoch": 0.5197847937235216, "grad_norm": 0.35097286105155945, "learning_rate": 9.378660965293523e-06, "loss": 0.1369, "step": 27998 }, { "epoch": 0.5198219238609402, "grad_norm": 0.5277460813522339, "learning_rate": 9.377496732727618e-06, "loss": 0.2276, "step": 28000 }, { "epoch": 0.5198590539983589, "grad_norm": 0.23006442189216614, "learning_rate": 9.376332508632115e-06, "loss": 0.2022, "step": 28002 }, { "epoch": 0.5198961841357775, "grad_norm": 0.29697734117507935, "learning_rate": 9.37516829302286e-06, "loss": 0.4618, "step": 28004 }, { "epoch": 0.5199333142731961, "grad_norm": 0.4390746057033539, "learning_rate": 9.374004085915692e-06, "loss": 0.3349, "step": 28006 }, { "epoch": 0.5199704444106148, "grad_norm": 0.38183993101119995, "learning_rate": 9.372839887326455e-06, "loss": 0.4673, "step": 28008 }, { "epoch": 0.5200075745480334, "grad_norm": 0.34838250279426575, "learning_rate": 9.371675697270985e-06, "loss": 0.4356, "step": 28010 }, { "epoch": 0.520044704685452, "grad_norm": 0.3497498631477356, "learning_rate": 9.370511515765134e-06, "loss": 0.2554, "step": 28012 }, { "epoch": 0.5200818348228706, "grad_norm": 0.33054205775260925, "learning_rate": 9.369347342824729e-06, "loss": 0.2306, "step": 28014 }, { "epoch": 0.5201189649602893, "grad_norm": 0.40547436475753784, "learning_rate": 9.36818317846562e-06, "loss": 0.2854, "step": 28016 }, { "epoch": 0.520156095097708, "grad_norm": 0.35170090198516846, "learning_rate": 9.367019022703643e-06, "loss": 0.2572, "step": 28018 }, { "epoch": 0.5201932252351266, "grad_norm": 0.35822197794914246, "learning_rate": 9.365854875554646e-06, "loss": 0.3626, "step": 28020 }, { "epoch": 0.5202303553725453, "grad_norm": 0.22573544085025787, "learning_rate": 9.36469073703446e-06, "loss": 0.1807, "step": 28022 }, { "epoch": 0.5202674855099638, "grad_norm": 0.32722705602645874, "learning_rate": 9.363526607158935e-06, "loss": 0.3243, "step": 28024 }, { "epoch": 0.5203046156473825, "grad_norm": 0.39880236983299255, "learning_rate": 9.362362485943903e-06, "loss": 0.2678, "step": 28026 }, { "epoch": 0.5203417457848012, "grad_norm": 0.38540735840797424, "learning_rate": 9.361198373405207e-06, "loss": 0.1603, "step": 28028 }, { "epoch": 0.5203788759222198, "grad_norm": 0.4847244918346405, "learning_rate": 9.36003426955869e-06, "loss": 0.2196, "step": 28030 }, { "epoch": 0.5204160060596384, "grad_norm": 0.3694347143173218, "learning_rate": 9.358870174420187e-06, "loss": 0.3299, "step": 28032 }, { "epoch": 0.520453136197057, "grad_norm": 0.7537364363670349, "learning_rate": 9.35770608800554e-06, "loss": 0.2433, "step": 28034 }, { "epoch": 0.5204902663344757, "grad_norm": 0.31515857577323914, "learning_rate": 9.356542010330594e-06, "loss": 0.2468, "step": 28036 }, { "epoch": 0.5205273964718944, "grad_norm": 0.4077747166156769, "learning_rate": 9.355377941411182e-06, "loss": 0.3577, "step": 28038 }, { "epoch": 0.520564526609313, "grad_norm": 0.5394335389137268, "learning_rate": 9.354213881263143e-06, "loss": 0.1876, "step": 28040 }, { "epoch": 0.5206016567467316, "grad_norm": 0.2732018530368805, "learning_rate": 9.353049829902322e-06, "loss": 0.2012, "step": 28042 }, { "epoch": 0.5206387868841502, "grad_norm": 0.9316643476486206, "learning_rate": 9.351885787344551e-06, "loss": 0.2581, "step": 28044 }, { "epoch": 0.5206759170215689, "grad_norm": 0.3779129385948181, "learning_rate": 9.350721753605673e-06, "loss": 0.1904, "step": 28046 }, { "epoch": 0.5207130471589876, "grad_norm": 0.3806953728199005, "learning_rate": 9.349557728701533e-06, "loss": 0.3752, "step": 28048 }, { "epoch": 0.5207501772964062, "grad_norm": 0.3448702096939087, "learning_rate": 9.34839371264796e-06, "loss": 0.5401, "step": 28050 }, { "epoch": 0.5207873074338248, "grad_norm": 0.309926301240921, "learning_rate": 9.3472297054608e-06, "loss": 0.2669, "step": 28052 }, { "epoch": 0.5208244375712434, "grad_norm": 0.470135360956192, "learning_rate": 9.346065707155882e-06, "loss": 0.1578, "step": 28054 }, { "epoch": 0.5208615677086621, "grad_norm": 0.3822442591190338, "learning_rate": 9.344901717749054e-06, "loss": 0.392, "step": 28056 }, { "epoch": 0.5208986978460808, "grad_norm": 0.3793529272079468, "learning_rate": 9.34373773725615e-06, "loss": 0.2133, "step": 28058 }, { "epoch": 0.5209358279834994, "grad_norm": 0.34007447957992554, "learning_rate": 9.342573765693014e-06, "loss": 0.1548, "step": 28060 }, { "epoch": 0.520972958120918, "grad_norm": 0.3707200288772583, "learning_rate": 9.34140980307548e-06, "loss": 0.3097, "step": 28062 }, { "epoch": 0.5210100882583366, "grad_norm": 0.35025542974472046, "learning_rate": 9.340245849419382e-06, "loss": 0.4204, "step": 28064 }, { "epoch": 0.5210472183957553, "grad_norm": 0.4748229682445526, "learning_rate": 9.339081904740563e-06, "loss": 0.3603, "step": 28066 }, { "epoch": 0.5210843485331739, "grad_norm": 0.4065103530883789, "learning_rate": 9.337917969054858e-06, "loss": 0.3187, "step": 28068 }, { "epoch": 0.5211214786705926, "grad_norm": 0.40040847659111023, "learning_rate": 9.336754042378106e-06, "loss": 0.2666, "step": 28070 }, { "epoch": 0.5211586088080112, "grad_norm": 0.2917388379573822, "learning_rate": 9.335590124726149e-06, "loss": 0.1755, "step": 28072 }, { "epoch": 0.5211957389454298, "grad_norm": 0.2174740731716156, "learning_rate": 9.334426216114821e-06, "loss": 0.1865, "step": 28074 }, { "epoch": 0.5212328690828485, "grad_norm": 0.3238449692726135, "learning_rate": 9.333262316559955e-06, "loss": 0.2982, "step": 28076 }, { "epoch": 0.5212699992202671, "grad_norm": 0.3809027671813965, "learning_rate": 9.33209842607739e-06, "loss": 0.4384, "step": 28078 }, { "epoch": 0.5213071293576858, "grad_norm": 0.39081084728240967, "learning_rate": 9.330934544682968e-06, "loss": 0.1902, "step": 28080 }, { "epoch": 0.5213442594951044, "grad_norm": 0.3424111604690552, "learning_rate": 9.329770672392522e-06, "loss": 0.2501, "step": 28082 }, { "epoch": 0.521381389632523, "grad_norm": 0.49280112981796265, "learning_rate": 9.32860680922189e-06, "loss": 0.1777, "step": 28084 }, { "epoch": 0.5214185197699417, "grad_norm": 0.5211786031723022, "learning_rate": 9.327442955186911e-06, "loss": 0.3472, "step": 28086 }, { "epoch": 0.5214556499073603, "grad_norm": 0.8068822026252747, "learning_rate": 9.326279110303414e-06, "loss": 0.2726, "step": 28088 }, { "epoch": 0.521492780044779, "grad_norm": 0.35349878668785095, "learning_rate": 9.32511527458724e-06, "loss": 0.3564, "step": 28090 }, { "epoch": 0.5215299101821976, "grad_norm": 0.5040069222450256, "learning_rate": 9.323951448054227e-06, "loss": 0.2808, "step": 28092 }, { "epoch": 0.5215670403196162, "grad_norm": 0.3312101364135742, "learning_rate": 9.322787630720212e-06, "loss": 0.3297, "step": 28094 }, { "epoch": 0.5216041704570349, "grad_norm": 1.3002091646194458, "learning_rate": 9.321623822601026e-06, "loss": 0.2575, "step": 28096 }, { "epoch": 0.5216413005944535, "grad_norm": 0.5194166898727417, "learning_rate": 9.320460023712508e-06, "loss": 0.2088, "step": 28098 }, { "epoch": 0.5216784307318721, "grad_norm": 0.34553706645965576, "learning_rate": 9.319296234070497e-06, "loss": 0.3115, "step": 28100 }, { "epoch": 0.5217155608692908, "grad_norm": 0.3347233533859253, "learning_rate": 9.318132453690822e-06, "loss": 0.2175, "step": 28102 }, { "epoch": 0.5217526910067094, "grad_norm": 0.3491487503051758, "learning_rate": 9.316968682589325e-06, "loss": 0.2171, "step": 28104 }, { "epoch": 0.5217898211441281, "grad_norm": 0.28038862347602844, "learning_rate": 9.315804920781835e-06, "loss": 0.4354, "step": 28106 }, { "epoch": 0.5218269512815467, "grad_norm": 0.48502781987190247, "learning_rate": 9.31464116828419e-06, "loss": 0.3773, "step": 28108 }, { "epoch": 0.5218640814189653, "grad_norm": 0.3009917438030243, "learning_rate": 9.313477425112228e-06, "loss": 0.055, "step": 28110 }, { "epoch": 0.5219012115563839, "grad_norm": 0.315805584192276, "learning_rate": 9.312313691281784e-06, "loss": 0.2719, "step": 28112 }, { "epoch": 0.5219383416938026, "grad_norm": 0.40033459663391113, "learning_rate": 9.311149966808687e-06, "loss": 0.1994, "step": 28114 }, { "epoch": 0.5219754718312213, "grad_norm": 0.31273797154426575, "learning_rate": 9.309986251708779e-06, "loss": 0.2127, "step": 28116 }, { "epoch": 0.5220126019686399, "grad_norm": 0.4080374240875244, "learning_rate": 9.308822545997887e-06, "loss": 0.4004, "step": 28118 }, { "epoch": 0.5220497321060585, "grad_norm": 0.5991426110267639, "learning_rate": 9.30765884969185e-06, "loss": 0.3441, "step": 28120 }, { "epoch": 0.5220868622434771, "grad_norm": 0.3351306617259979, "learning_rate": 9.306495162806503e-06, "loss": 0.2095, "step": 28122 }, { "epoch": 0.5221239923808958, "grad_norm": 0.48446792364120483, "learning_rate": 9.305331485357685e-06, "loss": 0.5414, "step": 28124 }, { "epoch": 0.5221611225183145, "grad_norm": 0.4659987688064575, "learning_rate": 9.30416781736122e-06, "loss": 0.0927, "step": 28126 }, { "epoch": 0.522198252655733, "grad_norm": 0.26826244592666626, "learning_rate": 9.303004158832948e-06, "loss": 0.216, "step": 28128 }, { "epoch": 0.5222353827931517, "grad_norm": 0.40166664123535156, "learning_rate": 9.3018405097887e-06, "loss": 0.3425, "step": 28130 }, { "epoch": 0.5222725129305703, "grad_norm": 0.5637459754943848, "learning_rate": 9.300676870244311e-06, "loss": 0.3456, "step": 28132 }, { "epoch": 0.522309643067989, "grad_norm": 0.4651561975479126, "learning_rate": 9.299513240215617e-06, "loss": 0.299, "step": 28134 }, { "epoch": 0.5223467732054077, "grad_norm": 0.1611827313899994, "learning_rate": 9.298349619718448e-06, "loss": 0.1383, "step": 28136 }, { "epoch": 0.5223839033428263, "grad_norm": 0.39330193400382996, "learning_rate": 9.297186008768644e-06, "loss": 0.2466, "step": 28138 }, { "epoch": 0.5224210334802449, "grad_norm": 0.9296402335166931, "learning_rate": 9.296022407382026e-06, "loss": 0.3551, "step": 28140 }, { "epoch": 0.5224581636176635, "grad_norm": 0.4432893395423889, "learning_rate": 9.294858815574438e-06, "loss": 0.3705, "step": 28142 }, { "epoch": 0.5224952937550822, "grad_norm": 0.6663895845413208, "learning_rate": 9.293695233361709e-06, "loss": 0.2025, "step": 28144 }, { "epoch": 0.5225324238925009, "grad_norm": 0.3534146249294281, "learning_rate": 9.292531660759673e-06, "loss": 0.1761, "step": 28146 }, { "epoch": 0.5225695540299194, "grad_norm": 0.4033986032009125, "learning_rate": 9.29136809778416e-06, "loss": 0.4726, "step": 28148 }, { "epoch": 0.5226066841673381, "grad_norm": 0.3837416172027588, "learning_rate": 9.290204544451009e-06, "loss": 0.2261, "step": 28150 }, { "epoch": 0.5226438143047567, "grad_norm": 0.48960021138191223, "learning_rate": 9.289041000776044e-06, "loss": 0.2718, "step": 28152 }, { "epoch": 0.5226809444421754, "grad_norm": 0.3605748414993286, "learning_rate": 9.2878774667751e-06, "loss": 0.3043, "step": 28154 }, { "epoch": 0.5227180745795941, "grad_norm": 0.4591005742549896, "learning_rate": 9.286713942464012e-06, "loss": 0.4019, "step": 28156 }, { "epoch": 0.5227552047170126, "grad_norm": 0.39213526248931885, "learning_rate": 9.285550427858613e-06, "loss": 0.1753, "step": 28158 }, { "epoch": 0.5227923348544313, "grad_norm": 0.4175074100494385, "learning_rate": 9.28438692297473e-06, "loss": 0.3902, "step": 28160 }, { "epoch": 0.5228294649918499, "grad_norm": 0.31997689604759216, "learning_rate": 9.283223427828202e-06, "loss": 0.2936, "step": 28162 }, { "epoch": 0.5228665951292686, "grad_norm": 0.36666950583457947, "learning_rate": 9.282059942434853e-06, "loss": 0.2091, "step": 28164 }, { "epoch": 0.5229037252666872, "grad_norm": 0.3158455491065979, "learning_rate": 9.280896466810517e-06, "loss": 0.319, "step": 28166 }, { "epoch": 0.5229408554041058, "grad_norm": 0.19260850548744202, "learning_rate": 9.279733000971026e-06, "loss": 0.3322, "step": 28168 }, { "epoch": 0.5229779855415245, "grad_norm": 0.7922513484954834, "learning_rate": 9.278569544932212e-06, "loss": 0.2031, "step": 28170 }, { "epoch": 0.5230151156789431, "grad_norm": 0.40140706300735474, "learning_rate": 9.277406098709904e-06, "loss": 0.1891, "step": 28172 }, { "epoch": 0.5230522458163618, "grad_norm": 0.3927055299282074, "learning_rate": 9.27624266231994e-06, "loss": 0.3235, "step": 28174 }, { "epoch": 0.5230893759537804, "grad_norm": 0.4807228147983551, "learning_rate": 9.275079235778141e-06, "loss": 0.2437, "step": 28176 }, { "epoch": 0.523126506091199, "grad_norm": 0.47512444853782654, "learning_rate": 9.27391581910034e-06, "loss": 0.3789, "step": 28178 }, { "epoch": 0.5231636362286177, "grad_norm": 0.3931896388530731, "learning_rate": 9.272752412302375e-06, "loss": 0.2345, "step": 28180 }, { "epoch": 0.5232007663660363, "grad_norm": 0.36406317353248596, "learning_rate": 9.271589015400068e-06, "loss": 0.244, "step": 28182 }, { "epoch": 0.523237896503455, "grad_norm": 0.33124038577079773, "learning_rate": 9.270425628409253e-06, "loss": 0.1856, "step": 28184 }, { "epoch": 0.5232750266408736, "grad_norm": 0.3963111937046051, "learning_rate": 9.26926225134576e-06, "loss": 0.3992, "step": 28186 }, { "epoch": 0.5233121567782922, "grad_norm": 0.4703839123249054, "learning_rate": 9.268098884225423e-06, "loss": 0.1952, "step": 28188 }, { "epoch": 0.5233492869157109, "grad_norm": 0.4198876619338989, "learning_rate": 9.266935527064064e-06, "loss": 0.3384, "step": 28190 }, { "epoch": 0.5233864170531295, "grad_norm": 0.30893459916114807, "learning_rate": 9.265772179877516e-06, "loss": 0.1143, "step": 28192 }, { "epoch": 0.5234235471905482, "grad_norm": 0.3882627785205841, "learning_rate": 9.26460884268161e-06, "loss": 0.3301, "step": 28194 }, { "epoch": 0.5234606773279668, "grad_norm": 0.4515036940574646, "learning_rate": 9.263445515492175e-06, "loss": 0.2258, "step": 28196 }, { "epoch": 0.5234978074653854, "grad_norm": 0.2816133499145508, "learning_rate": 9.262282198325042e-06, "loss": 0.2502, "step": 28198 }, { "epoch": 0.5235349376028041, "grad_norm": 0.40757328271865845, "learning_rate": 9.261118891196037e-06, "loss": 0.3263, "step": 28200 }, { "epoch": 0.5235720677402227, "grad_norm": 0.43003734946250916, "learning_rate": 9.259955594120993e-06, "loss": 0.2896, "step": 28202 }, { "epoch": 0.5236091978776414, "grad_norm": 0.33691883087158203, "learning_rate": 9.258792307115734e-06, "loss": 0.3621, "step": 28204 }, { "epoch": 0.52364632801506, "grad_norm": 0.4610239267349243, "learning_rate": 9.25762903019609e-06, "loss": 0.4275, "step": 28206 }, { "epoch": 0.5236834581524786, "grad_norm": 0.4140169322490692, "learning_rate": 9.256465763377893e-06, "loss": 0.2953, "step": 28208 }, { "epoch": 0.5237205882898973, "grad_norm": 0.4700329005718231, "learning_rate": 9.25530250667697e-06, "loss": 0.3914, "step": 28210 }, { "epoch": 0.5237577184273159, "grad_norm": 0.38899728655815125, "learning_rate": 9.254139260109154e-06, "loss": 0.2721, "step": 28212 }, { "epoch": 0.5237948485647346, "grad_norm": 0.40652865171432495, "learning_rate": 9.252976023690262e-06, "loss": 0.2205, "step": 28214 }, { "epoch": 0.5238319787021531, "grad_norm": 0.4482325315475464, "learning_rate": 9.25181279743613e-06, "loss": 0.1083, "step": 28216 }, { "epoch": 0.5238691088395718, "grad_norm": 0.5338302254676819, "learning_rate": 9.250649581362584e-06, "loss": 0.2671, "step": 28218 }, { "epoch": 0.5239062389769904, "grad_norm": 0.45330163836479187, "learning_rate": 9.249486375485455e-06, "loss": 0.2875, "step": 28220 }, { "epoch": 0.5239433691144091, "grad_norm": 0.4857039153575897, "learning_rate": 9.248323179820567e-06, "loss": 0.2919, "step": 28222 }, { "epoch": 0.5239804992518278, "grad_norm": 0.5985873341560364, "learning_rate": 9.247159994383749e-06, "loss": 0.2469, "step": 28224 }, { "epoch": 0.5240176293892463, "grad_norm": 0.5718949437141418, "learning_rate": 9.245996819190832e-06, "loss": 0.1501, "step": 28226 }, { "epoch": 0.524054759526665, "grad_norm": 0.2761910557746887, "learning_rate": 9.244833654257636e-06, "loss": 0.3437, "step": 28228 }, { "epoch": 0.5240918896640836, "grad_norm": 0.3907056450843811, "learning_rate": 9.243670499599992e-06, "loss": 0.3118, "step": 28230 }, { "epoch": 0.5241290198015023, "grad_norm": 0.3935077488422394, "learning_rate": 9.242507355233728e-06, "loss": 0.381, "step": 28232 }, { "epoch": 0.524166149938921, "grad_norm": 0.4161234200000763, "learning_rate": 9.241344221174668e-06, "loss": 0.354, "step": 28234 }, { "epoch": 0.5242032800763395, "grad_norm": 0.29580315947532654, "learning_rate": 9.240181097438642e-06, "loss": 0.1385, "step": 28236 }, { "epoch": 0.5242404102137582, "grad_norm": 0.3137395977973938, "learning_rate": 9.23901798404148e-06, "loss": 0.3989, "step": 28238 }, { "epoch": 0.5242775403511768, "grad_norm": 0.24473921954631805, "learning_rate": 9.237854880998998e-06, "loss": 0.3554, "step": 28240 }, { "epoch": 0.5243146704885955, "grad_norm": 0.3380820155143738, "learning_rate": 9.23669178832703e-06, "loss": 0.3023, "step": 28242 }, { "epoch": 0.5243518006260142, "grad_norm": 0.32730650901794434, "learning_rate": 9.2355287060414e-06, "loss": 0.1512, "step": 28244 }, { "epoch": 0.5243889307634327, "grad_norm": 0.4202280640602112, "learning_rate": 9.234365634157933e-06, "loss": 0.1769, "step": 28246 }, { "epoch": 0.5244260609008514, "grad_norm": 0.3653663694858551, "learning_rate": 9.233202572692457e-06, "loss": 0.1369, "step": 28248 }, { "epoch": 0.52446319103827, "grad_norm": 0.29243552684783936, "learning_rate": 9.232039521660801e-06, "loss": 0.1576, "step": 28250 }, { "epoch": 0.5245003211756887, "grad_norm": 0.2632932662963867, "learning_rate": 9.230876481078784e-06, "loss": 0.2622, "step": 28252 }, { "epoch": 0.5245374513131074, "grad_norm": 0.3520936965942383, "learning_rate": 9.229713450962235e-06, "loss": 0.2851, "step": 28254 }, { "epoch": 0.5245745814505259, "grad_norm": 0.4301888942718506, "learning_rate": 9.228550431326976e-06, "loss": 0.2411, "step": 28256 }, { "epoch": 0.5246117115879446, "grad_norm": 0.4315635561943054, "learning_rate": 9.227387422188836e-06, "loss": 0.3548, "step": 28258 }, { "epoch": 0.5246488417253632, "grad_norm": 0.597524881362915, "learning_rate": 9.226224423563639e-06, "loss": 0.2527, "step": 28260 }, { "epoch": 0.5246859718627819, "grad_norm": 0.29058656096458435, "learning_rate": 9.225061435467211e-06, "loss": 0.2446, "step": 28262 }, { "epoch": 0.5247231020002004, "grad_norm": 0.40379804372787476, "learning_rate": 9.223898457915377e-06, "loss": 0.422, "step": 28264 }, { "epoch": 0.5247602321376191, "grad_norm": 0.2448199838399887, "learning_rate": 9.22273549092396e-06, "loss": 0.0734, "step": 28266 }, { "epoch": 0.5247973622750378, "grad_norm": 0.36263418197631836, "learning_rate": 9.221572534508781e-06, "loss": 0.1742, "step": 28268 }, { "epoch": 0.5248344924124564, "grad_norm": 0.29628270864486694, "learning_rate": 9.220409588685671e-06, "loss": 0.3642, "step": 28270 }, { "epoch": 0.5248716225498751, "grad_norm": 0.4896189868450165, "learning_rate": 9.219246653470448e-06, "loss": 0.1655, "step": 28272 }, { "epoch": 0.5249087526872936, "grad_norm": 0.38273170590400696, "learning_rate": 9.218083728878943e-06, "loss": 0.2669, "step": 28274 }, { "epoch": 0.5249458828247123, "grad_norm": 0.32097193598747253, "learning_rate": 9.21692081492698e-06, "loss": 0.346, "step": 28276 }, { "epoch": 0.524983012962131, "grad_norm": 0.5770221948623657, "learning_rate": 9.215757911630373e-06, "loss": 0.425, "step": 28278 }, { "epoch": 0.5250201430995496, "grad_norm": 0.45663943886756897, "learning_rate": 9.214595019004952e-06, "loss": 0.2861, "step": 28280 }, { "epoch": 0.5250572732369683, "grad_norm": 0.5390895009040833, "learning_rate": 9.213432137066541e-06, "loss": 0.402, "step": 28282 }, { "epoch": 0.5250944033743868, "grad_norm": 0.30878981947898865, "learning_rate": 9.212269265830964e-06, "loss": 0.1949, "step": 28284 }, { "epoch": 0.5251315335118055, "grad_norm": 0.43309304118156433, "learning_rate": 9.211106405314043e-06, "loss": 0.2935, "step": 28286 }, { "epoch": 0.5251686636492242, "grad_norm": 0.5291520953178406, "learning_rate": 9.209943555531602e-06, "loss": 0.1873, "step": 28288 }, { "epoch": 0.5252057937866428, "grad_norm": 0.4113317131996155, "learning_rate": 9.208780716499459e-06, "loss": 0.3627, "step": 28290 }, { "epoch": 0.5252429239240615, "grad_norm": 0.4700908064842224, "learning_rate": 9.20761788823344e-06, "loss": 0.2292, "step": 28292 }, { "epoch": 0.52528005406148, "grad_norm": 0.3196422755718231, "learning_rate": 9.20645507074937e-06, "loss": 0.1897, "step": 28294 }, { "epoch": 0.5253171841988987, "grad_norm": 0.6900250315666199, "learning_rate": 9.20529226406307e-06, "loss": 0.1252, "step": 28296 }, { "epoch": 0.5253543143363174, "grad_norm": 0.2927192151546478, "learning_rate": 9.204129468190362e-06, "loss": 0.2493, "step": 28298 }, { "epoch": 0.525391444473736, "grad_norm": 0.461359441280365, "learning_rate": 9.202966683147065e-06, "loss": 0.2976, "step": 28300 }, { "epoch": 0.5254285746111547, "grad_norm": 0.27889353036880493, "learning_rate": 9.201803908949011e-06, "loss": 0.2777, "step": 28302 }, { "epoch": 0.5254657047485732, "grad_norm": 0.3735930621623993, "learning_rate": 9.20064114561201e-06, "loss": 0.4009, "step": 28304 }, { "epoch": 0.5255028348859919, "grad_norm": 0.5680769681930542, "learning_rate": 9.19947839315189e-06, "loss": 0.3545, "step": 28306 }, { "epoch": 0.5255399650234106, "grad_norm": 0.47031331062316895, "learning_rate": 9.198315651584468e-06, "loss": 0.4245, "step": 28308 }, { "epoch": 0.5255770951608292, "grad_norm": 0.4699476659297943, "learning_rate": 9.19715292092557e-06, "loss": 0.1753, "step": 28310 }, { "epoch": 0.5256142252982479, "grad_norm": 0.5114132165908813, "learning_rate": 9.195990201191017e-06, "loss": 0.484, "step": 28312 }, { "epoch": 0.5256513554356664, "grad_norm": 0.23072189092636108, "learning_rate": 9.194827492396631e-06, "loss": 0.4752, "step": 28314 }, { "epoch": 0.5256884855730851, "grad_norm": 0.37426748871803284, "learning_rate": 9.19366479455823e-06, "loss": 0.2425, "step": 28316 }, { "epoch": 0.5257256157105037, "grad_norm": 0.38548386096954346, "learning_rate": 9.192502107691636e-06, "loss": 0.4286, "step": 28318 }, { "epoch": 0.5257627458479224, "grad_norm": 0.3310141861438751, "learning_rate": 9.191339431812666e-06, "loss": 0.2724, "step": 28320 }, { "epoch": 0.5257998759853411, "grad_norm": 0.2608674466609955, "learning_rate": 9.190176766937147e-06, "loss": 0.1642, "step": 28322 }, { "epoch": 0.5258370061227596, "grad_norm": 0.5083295106887817, "learning_rate": 9.189014113080894e-06, "loss": 0.2406, "step": 28324 }, { "epoch": 0.5258741362601783, "grad_norm": 0.4197280704975128, "learning_rate": 9.187851470259736e-06, "loss": 0.1947, "step": 28326 }, { "epoch": 0.5259112663975969, "grad_norm": 0.41674289107322693, "learning_rate": 9.186688838489483e-06, "loss": 0.2755, "step": 28328 }, { "epoch": 0.5259483965350156, "grad_norm": 0.2371097356081009, "learning_rate": 9.185526217785959e-06, "loss": 0.2025, "step": 28330 }, { "epoch": 0.5259855266724343, "grad_norm": 0.4113895297050476, "learning_rate": 9.18436360816498e-06, "loss": 0.2106, "step": 28332 }, { "epoch": 0.5260226568098528, "grad_norm": 0.3476105034351349, "learning_rate": 9.183201009642372e-06, "loss": 0.2154, "step": 28334 }, { "epoch": 0.5260597869472715, "grad_norm": 0.37932664155960083, "learning_rate": 9.182038422233952e-06, "loss": 0.5002, "step": 28336 }, { "epoch": 0.5260969170846901, "grad_norm": 0.4214911162853241, "learning_rate": 9.180875845955542e-06, "loss": 0.1484, "step": 28338 }, { "epoch": 0.5261340472221088, "grad_norm": 0.32194986939430237, "learning_rate": 9.179713280822955e-06, "loss": 0.349, "step": 28340 }, { "epoch": 0.5261711773595275, "grad_norm": 0.49952277541160583, "learning_rate": 9.178550726852012e-06, "loss": 0.2022, "step": 28342 }, { "epoch": 0.526208307496946, "grad_norm": 0.3168894648551941, "learning_rate": 9.177388184058533e-06, "loss": 0.2835, "step": 28344 }, { "epoch": 0.5262454376343647, "grad_norm": 0.31756705045700073, "learning_rate": 9.176225652458338e-06, "loss": 0.3117, "step": 28346 }, { "epoch": 0.5262825677717833, "grad_norm": 0.35774365067481995, "learning_rate": 9.175063132067246e-06, "loss": 0.3104, "step": 28348 }, { "epoch": 0.526319697909202, "grad_norm": 0.4334663152694702, "learning_rate": 9.17390062290107e-06, "loss": 0.2554, "step": 28350 }, { "epoch": 0.5263568280466207, "grad_norm": 0.39199215173721313, "learning_rate": 9.172738124975639e-06, "loss": 0.2479, "step": 28352 }, { "epoch": 0.5263939581840392, "grad_norm": 0.2825665771961212, "learning_rate": 9.171575638306758e-06, "loss": 0.4201, "step": 28354 }, { "epoch": 0.5264310883214579, "grad_norm": 0.51008540391922, "learning_rate": 9.170413162910252e-06, "loss": 0.3606, "step": 28356 }, { "epoch": 0.5264682184588765, "grad_norm": 0.3553040027618408, "learning_rate": 9.16925069880194e-06, "loss": 0.2364, "step": 28358 }, { "epoch": 0.5265053485962952, "grad_norm": 0.43595871329307556, "learning_rate": 9.168088245997635e-06, "loss": 0.3047, "step": 28360 }, { "epoch": 0.5265424787337138, "grad_norm": 0.2794182002544403, "learning_rate": 9.166925804513157e-06, "loss": 0.3564, "step": 28362 }, { "epoch": 0.5265796088711324, "grad_norm": 0.5841917395591736, "learning_rate": 9.165763374364327e-06, "loss": 0.2978, "step": 28364 }, { "epoch": 0.5266167390085511, "grad_norm": 0.4386557936668396, "learning_rate": 9.164600955566957e-06, "loss": 0.2326, "step": 28366 }, { "epoch": 0.5266538691459697, "grad_norm": 0.4702779948711395, "learning_rate": 9.163438548136863e-06, "loss": 0.2682, "step": 28368 }, { "epoch": 0.5266909992833884, "grad_norm": 0.30933886766433716, "learning_rate": 9.162276152089868e-06, "loss": 0.3006, "step": 28370 }, { "epoch": 0.5267281294208069, "grad_norm": 0.3269096612930298, "learning_rate": 9.161113767441784e-06, "loss": 0.2067, "step": 28372 }, { "epoch": 0.5267652595582256, "grad_norm": 0.3352173864841461, "learning_rate": 9.159951394208427e-06, "loss": 0.4423, "step": 28374 }, { "epoch": 0.5268023896956443, "grad_norm": 0.5195205211639404, "learning_rate": 9.15878903240562e-06, "loss": 0.2642, "step": 28376 }, { "epoch": 0.5268395198330629, "grad_norm": 0.2897060811519623, "learning_rate": 9.157626682049172e-06, "loss": 0.2127, "step": 28378 }, { "epoch": 0.5268766499704816, "grad_norm": 0.40540388226509094, "learning_rate": 9.1564643431549e-06, "loss": 0.3407, "step": 28380 }, { "epoch": 0.5269137801079001, "grad_norm": 0.5326733589172363, "learning_rate": 9.155302015738623e-06, "loss": 0.2676, "step": 28382 }, { "epoch": 0.5269509102453188, "grad_norm": 0.46720582246780396, "learning_rate": 9.154139699816155e-06, "loss": 0.3279, "step": 28384 }, { "epoch": 0.5269880403827375, "grad_norm": 0.466681569814682, "learning_rate": 9.152977395403312e-06, "loss": 0.3336, "step": 28386 }, { "epoch": 0.5270251705201561, "grad_norm": 0.495192289352417, "learning_rate": 9.15181510251591e-06, "loss": 0.0963, "step": 28388 }, { "epoch": 0.5270623006575748, "grad_norm": 0.3837859034538269, "learning_rate": 9.150652821169766e-06, "loss": 0.3577, "step": 28390 }, { "epoch": 0.5270994307949933, "grad_norm": 0.31270748376846313, "learning_rate": 9.149490551380692e-06, "loss": 0.2454, "step": 28392 }, { "epoch": 0.527136560932412, "grad_norm": 0.30723896622657776, "learning_rate": 9.148328293164502e-06, "loss": 0.2585, "step": 28394 }, { "epoch": 0.5271736910698307, "grad_norm": 0.4640527367591858, "learning_rate": 9.147166046537013e-06, "loss": 0.4626, "step": 28396 }, { "epoch": 0.5272108212072493, "grad_norm": 0.4258682429790497, "learning_rate": 9.146003811514039e-06, "loss": 0.1764, "step": 28398 }, { "epoch": 0.527247951344668, "grad_norm": 0.46926218271255493, "learning_rate": 9.144841588111397e-06, "loss": 0.2698, "step": 28400 }, { "epoch": 0.5272850814820865, "grad_norm": 0.3380294442176819, "learning_rate": 9.1436793763449e-06, "loss": 0.2317, "step": 28402 }, { "epoch": 0.5273222116195052, "grad_norm": 0.3501266539096832, "learning_rate": 9.142517176230362e-06, "loss": 0.2335, "step": 28404 }, { "epoch": 0.5273593417569239, "grad_norm": 0.29773467779159546, "learning_rate": 9.141354987783595e-06, "loss": 0.2064, "step": 28406 }, { "epoch": 0.5273964718943425, "grad_norm": 0.42288297414779663, "learning_rate": 9.140192811020412e-06, "loss": 0.3167, "step": 28408 }, { "epoch": 0.5274336020317612, "grad_norm": 0.24485230445861816, "learning_rate": 9.139030645956633e-06, "loss": 0.0877, "step": 28410 }, { "epoch": 0.5274707321691797, "grad_norm": 0.4290095567703247, "learning_rate": 9.137868492608069e-06, "loss": 0.2641, "step": 28412 }, { "epoch": 0.5275078623065984, "grad_norm": 0.4234887361526489, "learning_rate": 9.136706350990534e-06, "loss": 0.359, "step": 28414 }, { "epoch": 0.527544992444017, "grad_norm": 0.2719157934188843, "learning_rate": 9.135544221119836e-06, "loss": 0.3014, "step": 28416 }, { "epoch": 0.5275821225814357, "grad_norm": 0.35349541902542114, "learning_rate": 9.13438210301179e-06, "loss": 0.2665, "step": 28418 }, { "epoch": 0.5276192527188543, "grad_norm": 0.3618432879447937, "learning_rate": 9.133219996682213e-06, "loss": 0.2824, "step": 28420 }, { "epoch": 0.5276563828562729, "grad_norm": 0.3793849050998688, "learning_rate": 9.132057902146917e-06, "loss": 0.5318, "step": 28422 }, { "epoch": 0.5276935129936916, "grad_norm": 0.4053475856781006, "learning_rate": 9.13089581942171e-06, "loss": 0.3727, "step": 28424 }, { "epoch": 0.5277306431311102, "grad_norm": 0.3458296060562134, "learning_rate": 9.12973374852241e-06, "loss": 0.3271, "step": 28426 }, { "epoch": 0.5277677732685289, "grad_norm": 0.31473100185394287, "learning_rate": 9.128571689464829e-06, "loss": 0.05, "step": 28428 }, { "epoch": 0.5278049034059475, "grad_norm": 0.38306495547294617, "learning_rate": 9.127409642264773e-06, "loss": 0.2475, "step": 28430 }, { "epoch": 0.5278420335433661, "grad_norm": 0.2218981236219406, "learning_rate": 9.12624760693806e-06, "loss": 0.338, "step": 28432 }, { "epoch": 0.5278791636807848, "grad_norm": 0.6062273979187012, "learning_rate": 9.1250855835005e-06, "loss": 0.5244, "step": 28434 }, { "epoch": 0.5279162938182034, "grad_norm": 0.48028719425201416, "learning_rate": 9.123923571967902e-06, "loss": 0.3459, "step": 28436 }, { "epoch": 0.5279534239556221, "grad_norm": 0.43190088868141174, "learning_rate": 9.122761572356082e-06, "loss": 0.4084, "step": 28438 }, { "epoch": 0.5279905540930407, "grad_norm": 0.2599162459373474, "learning_rate": 9.121599584680853e-06, "loss": 0.1965, "step": 28440 }, { "epoch": 0.5280276842304593, "grad_norm": 0.28590288758277893, "learning_rate": 9.120437608958016e-06, "loss": 0.2318, "step": 28442 }, { "epoch": 0.528064814367878, "grad_norm": 0.3959429860115051, "learning_rate": 9.119275645203394e-06, "loss": 0.247, "step": 28444 }, { "epoch": 0.5281019445052966, "grad_norm": 0.2837539315223694, "learning_rate": 9.118113693432788e-06, "loss": 0.3302, "step": 28446 }, { "epoch": 0.5281390746427153, "grad_norm": 0.4189326763153076, "learning_rate": 9.116951753662014e-06, "loss": 0.4026, "step": 28448 }, { "epoch": 0.5281762047801339, "grad_norm": 0.4856795370578766, "learning_rate": 9.115789825906882e-06, "loss": 0.495, "step": 28450 }, { "epoch": 0.5282133349175525, "grad_norm": 0.47973716259002686, "learning_rate": 9.114627910183205e-06, "loss": 0.2553, "step": 28452 }, { "epoch": 0.5282504650549712, "grad_norm": 0.3574253022670746, "learning_rate": 9.113466006506787e-06, "loss": 0.2375, "step": 28454 }, { "epoch": 0.5282875951923898, "grad_norm": 0.30163365602493286, "learning_rate": 9.112304114893443e-06, "loss": 0.4018, "step": 28456 }, { "epoch": 0.5283247253298085, "grad_norm": 0.44410839676856995, "learning_rate": 9.11114223535898e-06, "loss": 0.2864, "step": 28458 }, { "epoch": 0.5283618554672271, "grad_norm": 0.7240673899650574, "learning_rate": 9.109980367919207e-06, "loss": 0.2536, "step": 28460 }, { "epoch": 0.5283989856046457, "grad_norm": 0.3009434938430786, "learning_rate": 9.108818512589936e-06, "loss": 0.4371, "step": 28462 }, { "epoch": 0.5284361157420644, "grad_norm": 0.2967061698436737, "learning_rate": 9.10765666938698e-06, "loss": 0.1812, "step": 28464 }, { "epoch": 0.528473245879483, "grad_norm": 0.2505098879337311, "learning_rate": 9.106494838326142e-06, "loss": 0.1836, "step": 28466 }, { "epoch": 0.5285103760169017, "grad_norm": 0.543154239654541, "learning_rate": 9.105333019423229e-06, "loss": 0.2606, "step": 28468 }, { "epoch": 0.5285475061543202, "grad_norm": 0.35611391067504883, "learning_rate": 9.104171212694055e-06, "loss": 0.1888, "step": 28470 }, { "epoch": 0.5285846362917389, "grad_norm": 0.3405942916870117, "learning_rate": 9.103009418154427e-06, "loss": 0.3491, "step": 28472 }, { "epoch": 0.5286217664291576, "grad_norm": 0.41428476572036743, "learning_rate": 9.101847635820157e-06, "loss": 0.2503, "step": 28474 }, { "epoch": 0.5286588965665762, "grad_norm": 0.3893307149410248, "learning_rate": 9.100685865707049e-06, "loss": 0.3167, "step": 28476 }, { "epoch": 0.5286960267039948, "grad_norm": 0.3714529275894165, "learning_rate": 9.099524107830915e-06, "loss": 0.2076, "step": 28478 }, { "epoch": 0.5287331568414134, "grad_norm": 0.4382079243659973, "learning_rate": 9.098362362207557e-06, "loss": 0.2234, "step": 28480 }, { "epoch": 0.5287702869788321, "grad_norm": 0.48060309886932373, "learning_rate": 9.097200628852788e-06, "loss": 0.2515, "step": 28482 }, { "epoch": 0.5288074171162508, "grad_norm": 0.16425210237503052, "learning_rate": 9.096038907782412e-06, "loss": 0.2991, "step": 28484 }, { "epoch": 0.5288445472536694, "grad_norm": 0.3998776376247406, "learning_rate": 9.094877199012241e-06, "loss": 0.4614, "step": 28486 }, { "epoch": 0.528881677391088, "grad_norm": 0.3245198130607605, "learning_rate": 9.09371550255808e-06, "loss": 0.2493, "step": 28488 }, { "epoch": 0.5289188075285066, "grad_norm": 0.3437099754810333, "learning_rate": 9.092553818435739e-06, "loss": 0.2995, "step": 28490 }, { "epoch": 0.5289559376659253, "grad_norm": 0.3244602084159851, "learning_rate": 9.09139214666102e-06, "loss": 0.5073, "step": 28492 }, { "epoch": 0.528993067803344, "grad_norm": 0.4118961691856384, "learning_rate": 9.09023048724973e-06, "loss": 0.3693, "step": 28494 }, { "epoch": 0.5290301979407626, "grad_norm": 0.4771861135959625, "learning_rate": 9.089068840217681e-06, "loss": 0.2352, "step": 28496 }, { "epoch": 0.5290673280781812, "grad_norm": 0.3344717025756836, "learning_rate": 9.087907205580675e-06, "loss": 0.3935, "step": 28498 }, { "epoch": 0.5291044582155998, "grad_norm": 0.7406889200210571, "learning_rate": 9.086745583354518e-06, "loss": 0.4143, "step": 28500 }, { "epoch": 0.5291415883530185, "grad_norm": 0.3255789279937744, "learning_rate": 9.085583973555024e-06, "loss": 0.3686, "step": 28502 }, { "epoch": 0.5291787184904372, "grad_norm": 0.36220628023147583, "learning_rate": 9.08442237619799e-06, "loss": 0.2573, "step": 28504 }, { "epoch": 0.5292158486278558, "grad_norm": 0.3419683277606964, "learning_rate": 9.083260791299223e-06, "loss": 0.2444, "step": 28506 }, { "epoch": 0.5292529787652744, "grad_norm": 0.3914368450641632, "learning_rate": 9.082099218874535e-06, "loss": 0.4161, "step": 28508 }, { "epoch": 0.529290108902693, "grad_norm": 0.38675177097320557, "learning_rate": 9.080937658939721e-06, "loss": 0.2112, "step": 28510 }, { "epoch": 0.5293272390401117, "grad_norm": 0.37038740515708923, "learning_rate": 9.079776111510597e-06, "loss": 0.2183, "step": 28512 }, { "epoch": 0.5293643691775304, "grad_norm": 0.2745799124240875, "learning_rate": 9.07861457660296e-06, "loss": 0.2575, "step": 28514 }, { "epoch": 0.529401499314949, "grad_norm": 0.26264482736587524, "learning_rate": 9.077453054232625e-06, "loss": 0.3371, "step": 28516 }, { "epoch": 0.5294386294523676, "grad_norm": 0.43180668354034424, "learning_rate": 9.076291544415387e-06, "loss": 0.1423, "step": 28518 }, { "epoch": 0.5294757595897862, "grad_norm": 0.36550915241241455, "learning_rate": 9.075130047167056e-06, "loss": 0.2119, "step": 28520 }, { "epoch": 0.5295128897272049, "grad_norm": 0.2861661911010742, "learning_rate": 9.07396856250343e-06, "loss": 0.4124, "step": 28522 }, { "epoch": 0.5295500198646235, "grad_norm": 0.5105169415473938, "learning_rate": 9.072807090440321e-06, "loss": 0.2854, "step": 28524 }, { "epoch": 0.5295871500020422, "grad_norm": 0.2688589096069336, "learning_rate": 9.071645630993533e-06, "loss": 0.3294, "step": 28526 }, { "epoch": 0.5296242801394608, "grad_norm": 0.3442625403404236, "learning_rate": 9.070484184178865e-06, "loss": 0.2519, "step": 28528 }, { "epoch": 0.5296614102768794, "grad_norm": 0.4653208255767822, "learning_rate": 9.069322750012125e-06, "loss": 0.3002, "step": 28530 }, { "epoch": 0.5296985404142981, "grad_norm": 0.27355819940567017, "learning_rate": 9.06816132850911e-06, "loss": 0.2936, "step": 28532 }, { "epoch": 0.5297356705517167, "grad_norm": 0.20658884942531586, "learning_rate": 9.06699991968563e-06, "loss": 0.1736, "step": 28534 }, { "epoch": 0.5297728006891353, "grad_norm": 0.49564096331596375, "learning_rate": 9.065838523557485e-06, "loss": 0.4089, "step": 28536 }, { "epoch": 0.529809930826554, "grad_norm": 0.5529326796531677, "learning_rate": 9.064677140140483e-06, "loss": 0.2418, "step": 28538 }, { "epoch": 0.5298470609639726, "grad_norm": 0.4253714680671692, "learning_rate": 9.063515769450423e-06, "loss": 0.1565, "step": 28540 }, { "epoch": 0.5298841911013913, "grad_norm": 0.5363646745681763, "learning_rate": 9.062354411503108e-06, "loss": 0.3174, "step": 28542 }, { "epoch": 0.5299213212388099, "grad_norm": 0.2433885931968689, "learning_rate": 9.06119306631434e-06, "loss": 0.3749, "step": 28544 }, { "epoch": 0.5299584513762285, "grad_norm": 0.3358222544193268, "learning_rate": 9.060031733899921e-06, "loss": 0.3841, "step": 28546 }, { "epoch": 0.5299955815136472, "grad_norm": 0.336128830909729, "learning_rate": 9.058870414275656e-06, "loss": 0.3237, "step": 28548 }, { "epoch": 0.5300327116510658, "grad_norm": 0.3295275866985321, "learning_rate": 9.057709107457347e-06, "loss": 0.2777, "step": 28550 }, { "epoch": 0.5300698417884845, "grad_norm": 0.31791406869888306, "learning_rate": 9.05654781346079e-06, "loss": 0.1994, "step": 28552 }, { "epoch": 0.5301069719259031, "grad_norm": 0.4116072356700897, "learning_rate": 9.055386532301799e-06, "loss": 0.3552, "step": 28554 }, { "epoch": 0.5301441020633217, "grad_norm": 0.5951401591300964, "learning_rate": 9.054225263996162e-06, "loss": 0.3792, "step": 28556 }, { "epoch": 0.5301812322007404, "grad_norm": 0.40632858872413635, "learning_rate": 9.053064008559686e-06, "loss": 0.3236, "step": 28558 }, { "epoch": 0.530218362338159, "grad_norm": 0.337022989988327, "learning_rate": 9.051902766008175e-06, "loss": 0.3186, "step": 28560 }, { "epoch": 0.5302554924755777, "grad_norm": 0.40571415424346924, "learning_rate": 9.050741536357427e-06, "loss": 0.1985, "step": 28562 }, { "epoch": 0.5302926226129963, "grad_norm": 0.3221340477466583, "learning_rate": 9.049580319623242e-06, "loss": 0.4269, "step": 28564 }, { "epoch": 0.5303297527504149, "grad_norm": 0.27010414004325867, "learning_rate": 9.048419115821427e-06, "loss": 0.2755, "step": 28566 }, { "epoch": 0.5303668828878335, "grad_norm": 0.3577069640159607, "learning_rate": 9.047257924967772e-06, "loss": 0.173, "step": 28568 }, { "epoch": 0.5304040130252522, "grad_norm": 0.4532218277454376, "learning_rate": 9.046096747078083e-06, "loss": 0.2328, "step": 28570 }, { "epoch": 0.5304411431626709, "grad_norm": 0.27819734811782837, "learning_rate": 9.044935582168162e-06, "loss": 0.21, "step": 28572 }, { "epoch": 0.5304782733000895, "grad_norm": 0.4359601140022278, "learning_rate": 9.043774430253806e-06, "loss": 0.2805, "step": 28574 }, { "epoch": 0.5305154034375081, "grad_norm": 0.3099932372570038, "learning_rate": 9.042613291350815e-06, "loss": 0.1423, "step": 28576 }, { "epoch": 0.5305525335749267, "grad_norm": 0.4820757806301117, "learning_rate": 9.041452165474993e-06, "loss": 0.3898, "step": 28578 }, { "epoch": 0.5305896637123454, "grad_norm": 0.556048572063446, "learning_rate": 9.040291052642131e-06, "loss": 0.3458, "step": 28580 }, { "epoch": 0.5306267938497641, "grad_norm": 0.3959524929523468, "learning_rate": 9.039129952868037e-06, "loss": 0.1553, "step": 28582 }, { "epoch": 0.5306639239871827, "grad_norm": 0.3517950475215912, "learning_rate": 9.037968866168504e-06, "loss": 0.3405, "step": 28584 }, { "epoch": 0.5307010541246013, "grad_norm": 0.32837456464767456, "learning_rate": 9.036807792559334e-06, "loss": 0.0859, "step": 28586 }, { "epoch": 0.5307381842620199, "grad_norm": 0.41319870948791504, "learning_rate": 9.035646732056322e-06, "loss": 0.2147, "step": 28588 }, { "epoch": 0.5307753143994386, "grad_norm": 0.33680450916290283, "learning_rate": 9.034485684675273e-06, "loss": 0.4008, "step": 28590 }, { "epoch": 0.5308124445368573, "grad_norm": 0.4057881832122803, "learning_rate": 9.033324650431983e-06, "loss": 0.1335, "step": 28592 }, { "epoch": 0.5308495746742758, "grad_norm": 0.27993640303611755, "learning_rate": 9.032163629342248e-06, "loss": 0.2056, "step": 28594 }, { "epoch": 0.5308867048116945, "grad_norm": 0.32959744334220886, "learning_rate": 9.031002621421864e-06, "loss": 0.3711, "step": 28596 }, { "epoch": 0.5309238349491131, "grad_norm": 0.5029144883155823, "learning_rate": 9.029841626686633e-06, "loss": 0.235, "step": 28598 }, { "epoch": 0.5309609650865318, "grad_norm": 0.3615346848964691, "learning_rate": 9.02868064515235e-06, "loss": 0.2299, "step": 28600 }, { "epoch": 0.5309980952239505, "grad_norm": 0.49010297656059265, "learning_rate": 9.027519676834817e-06, "loss": 0.1367, "step": 28602 }, { "epoch": 0.531035225361369, "grad_norm": 0.43259549140930176, "learning_rate": 9.026358721749831e-06, "loss": 0.3543, "step": 28604 }, { "epoch": 0.5310723554987877, "grad_norm": 0.5976904630661011, "learning_rate": 9.025197779913182e-06, "loss": 0.2803, "step": 28606 }, { "epoch": 0.5311094856362063, "grad_norm": 0.44180819392204285, "learning_rate": 9.024036851340671e-06, "loss": 0.1787, "step": 28608 }, { "epoch": 0.531146615773625, "grad_norm": 0.3821260929107666, "learning_rate": 9.022875936048095e-06, "loss": 0.3309, "step": 28610 }, { "epoch": 0.5311837459110437, "grad_norm": 0.27863022685050964, "learning_rate": 9.021715034051253e-06, "loss": 0.2108, "step": 28612 }, { "epoch": 0.5312208760484622, "grad_norm": 0.27270522713661194, "learning_rate": 9.020554145365937e-06, "loss": 0.3301, "step": 28614 }, { "epoch": 0.5312580061858809, "grad_norm": 0.09505432844161987, "learning_rate": 9.01939327000795e-06, "loss": 0.1631, "step": 28616 }, { "epoch": 0.5312951363232995, "grad_norm": 0.3926463723182678, "learning_rate": 9.018232407993079e-06, "loss": 0.4673, "step": 28618 }, { "epoch": 0.5313322664607182, "grad_norm": 0.3896433115005493, "learning_rate": 9.017071559337122e-06, "loss": 0.3096, "step": 28620 }, { "epoch": 0.5313693965981368, "grad_norm": 0.5015110373497009, "learning_rate": 9.01591072405588e-06, "loss": 0.2623, "step": 28622 }, { "epoch": 0.5314065267355554, "grad_norm": 0.25698205828666687, "learning_rate": 9.014749902165145e-06, "loss": 0.2308, "step": 28624 }, { "epoch": 0.5314436568729741, "grad_norm": 0.2424246072769165, "learning_rate": 9.013589093680712e-06, "loss": 0.1352, "step": 28626 }, { "epoch": 0.5314807870103927, "grad_norm": 0.328136146068573, "learning_rate": 9.012428298618381e-06, "loss": 0.3104, "step": 28628 }, { "epoch": 0.5315179171478114, "grad_norm": 0.29206541180610657, "learning_rate": 9.011267516993938e-06, "loss": 0.2803, "step": 28630 }, { "epoch": 0.53155504728523, "grad_norm": 0.6134827136993408, "learning_rate": 9.010106748823184e-06, "loss": 0.3915, "step": 28632 }, { "epoch": 0.5315921774226486, "grad_norm": 0.3014030158519745, "learning_rate": 9.008945994121912e-06, "loss": 0.2853, "step": 28634 }, { "epoch": 0.5316293075600673, "grad_norm": 0.38580888509750366, "learning_rate": 9.007785252905914e-06, "loss": 0.1835, "step": 28636 }, { "epoch": 0.5316664376974859, "grad_norm": 0.3474043309688568, "learning_rate": 9.006624525190988e-06, "loss": 0.2729, "step": 28638 }, { "epoch": 0.5317035678349046, "grad_norm": 0.37960511445999146, "learning_rate": 9.005463810992928e-06, "loss": 0.193, "step": 28640 }, { "epoch": 0.5317406979723232, "grad_norm": 0.29074230790138245, "learning_rate": 9.00430311032753e-06, "loss": 0.1112, "step": 28642 }, { "epoch": 0.5317778281097418, "grad_norm": 0.2615014314651489, "learning_rate": 9.003142423210578e-06, "loss": 0.2699, "step": 28644 }, { "epoch": 0.5318149582471605, "grad_norm": 0.47838708758354187, "learning_rate": 9.001981749657876e-06, "loss": 0.4673, "step": 28646 }, { "epoch": 0.5318520883845791, "grad_norm": 0.5226272344589233, "learning_rate": 9.00082108968521e-06, "loss": 0.3158, "step": 28648 }, { "epoch": 0.5318892185219978, "grad_norm": 0.3913807272911072, "learning_rate": 8.999660443308376e-06, "loss": 0.4194, "step": 28650 }, { "epoch": 0.5319263486594163, "grad_norm": 0.31727370619773865, "learning_rate": 8.998499810543167e-06, "loss": 0.2641, "step": 28652 }, { "epoch": 0.531963478796835, "grad_norm": 0.6132047772407532, "learning_rate": 8.99733919140538e-06, "loss": 0.2269, "step": 28654 }, { "epoch": 0.5320006089342537, "grad_norm": 0.5233863592147827, "learning_rate": 8.9961785859108e-06, "loss": 0.3324, "step": 28656 }, { "epoch": 0.5320377390716723, "grad_norm": 0.3149389326572418, "learning_rate": 8.995017994075223e-06, "loss": 0.4466, "step": 28658 }, { "epoch": 0.532074869209091, "grad_norm": 0.5390313863754272, "learning_rate": 8.99385741591444e-06, "loss": 0.1472, "step": 28660 }, { "epoch": 0.5321119993465095, "grad_norm": 0.5517196655273438, "learning_rate": 8.992696851444243e-06, "loss": 0.2837, "step": 28662 }, { "epoch": 0.5321491294839282, "grad_norm": 0.5110749006271362, "learning_rate": 8.991536300680427e-06, "loss": 0.3229, "step": 28664 }, { "epoch": 0.5321862596213469, "grad_norm": 0.2862986624240875, "learning_rate": 8.99037576363878e-06, "loss": 0.3643, "step": 28666 }, { "epoch": 0.5322233897587655, "grad_norm": 0.30766740441322327, "learning_rate": 8.989215240335099e-06, "loss": 0.2406, "step": 28668 }, { "epoch": 0.5322605198961842, "grad_norm": 0.5216443538665771, "learning_rate": 8.988054730785165e-06, "loss": 0.2923, "step": 28670 }, { "epoch": 0.5322976500336027, "grad_norm": 0.4727714955806732, "learning_rate": 8.986894235004775e-06, "loss": 0.276, "step": 28672 }, { "epoch": 0.5323347801710214, "grad_norm": 0.5130835771560669, "learning_rate": 8.985733753009722e-06, "loss": 0.0979, "step": 28674 }, { "epoch": 0.53237191030844, "grad_norm": 0.5627540349960327, "learning_rate": 8.984573284815796e-06, "loss": 0.1872, "step": 28676 }, { "epoch": 0.5324090404458587, "grad_norm": 0.4905412197113037, "learning_rate": 8.983412830438783e-06, "loss": 0.3372, "step": 28678 }, { "epoch": 0.5324461705832774, "grad_norm": 0.45058009028434753, "learning_rate": 8.98225238989448e-06, "loss": 0.3828, "step": 28680 }, { "epoch": 0.5324833007206959, "grad_norm": 0.4002496004104614, "learning_rate": 8.98109196319867e-06, "loss": 0.3114, "step": 28682 }, { "epoch": 0.5325204308581146, "grad_norm": 0.3322855234146118, "learning_rate": 8.979931550367147e-06, "loss": 0.2278, "step": 28684 }, { "epoch": 0.5325575609955332, "grad_norm": 0.2826186418533325, "learning_rate": 8.9787711514157e-06, "loss": 0.2277, "step": 28686 }, { "epoch": 0.5325946911329519, "grad_norm": 0.5161633491516113, "learning_rate": 8.97761076636012e-06, "loss": 0.3175, "step": 28688 }, { "epoch": 0.5326318212703706, "grad_norm": 0.3529422879219055, "learning_rate": 8.976450395216194e-06, "loss": 0.2363, "step": 28690 }, { "epoch": 0.5326689514077891, "grad_norm": 0.29578402638435364, "learning_rate": 8.975290037999715e-06, "loss": 0.2273, "step": 28692 }, { "epoch": 0.5327060815452078, "grad_norm": 0.4117681086063385, "learning_rate": 8.974129694726466e-06, "loss": 0.2895, "step": 28694 }, { "epoch": 0.5327432116826264, "grad_norm": 0.39813968539237976, "learning_rate": 8.97296936541224e-06, "loss": 0.3968, "step": 28696 }, { "epoch": 0.5327803418200451, "grad_norm": 0.2933594584465027, "learning_rate": 8.971809050072824e-06, "loss": 0.2852, "step": 28698 }, { "epoch": 0.5328174719574638, "grad_norm": 0.5049290657043457, "learning_rate": 8.970648748724008e-06, "loss": 0.2552, "step": 28700 }, { "epoch": 0.5328546020948823, "grad_norm": 0.687104344367981, "learning_rate": 8.969488461381579e-06, "loss": 0.2803, "step": 28702 }, { "epoch": 0.532891732232301, "grad_norm": 0.6806204319000244, "learning_rate": 8.968328188061326e-06, "loss": 0.2774, "step": 28704 }, { "epoch": 0.5329288623697196, "grad_norm": 0.5600220561027527, "learning_rate": 8.967167928779036e-06, "loss": 0.2249, "step": 28706 }, { "epoch": 0.5329659925071383, "grad_norm": 0.2667781412601471, "learning_rate": 8.966007683550495e-06, "loss": 0.0993, "step": 28708 }, { "epoch": 0.533003122644557, "grad_norm": 0.5475757718086243, "learning_rate": 8.964847452391494e-06, "loss": 0.1468, "step": 28710 }, { "epoch": 0.5330402527819755, "grad_norm": 0.5000697374343872, "learning_rate": 8.963687235317818e-06, "loss": 0.3236, "step": 28712 }, { "epoch": 0.5330773829193942, "grad_norm": 0.4197654128074646, "learning_rate": 8.962527032345252e-06, "loss": 0.2815, "step": 28714 }, { "epoch": 0.5331145130568128, "grad_norm": 0.5457786917686462, "learning_rate": 8.961366843489587e-06, "loss": 0.2604, "step": 28716 }, { "epoch": 0.5331516431942315, "grad_norm": 0.5328965187072754, "learning_rate": 8.960206668766614e-06, "loss": 0.3618, "step": 28718 }, { "epoch": 0.53318877333165, "grad_norm": 0.2846589684486389, "learning_rate": 8.95904650819211e-06, "loss": 0.286, "step": 28720 }, { "epoch": 0.5332259034690687, "grad_norm": 0.28273579478263855, "learning_rate": 8.957886361781862e-06, "loss": 0.3344, "step": 28722 }, { "epoch": 0.5332630336064874, "grad_norm": 0.49682775139808655, "learning_rate": 8.95672622955166e-06, "loss": 0.2476, "step": 28724 }, { "epoch": 0.533300163743906, "grad_norm": 0.5343006253242493, "learning_rate": 8.955566111517288e-06, "loss": 0.324, "step": 28726 }, { "epoch": 0.5333372938813247, "grad_norm": 0.3209720551967621, "learning_rate": 8.954406007694537e-06, "loss": 0.3463, "step": 28728 }, { "epoch": 0.5333744240187432, "grad_norm": 0.41841772198677063, "learning_rate": 8.953245918099188e-06, "loss": 0.1869, "step": 28730 }, { "epoch": 0.5334115541561619, "grad_norm": 0.32088202238082886, "learning_rate": 8.952085842747026e-06, "loss": 0.4696, "step": 28732 }, { "epoch": 0.5334486842935806, "grad_norm": 0.2829832136631012, "learning_rate": 8.950925781653834e-06, "loss": 0.3488, "step": 28734 }, { "epoch": 0.5334858144309992, "grad_norm": 0.2852284908294678, "learning_rate": 8.949765734835401e-06, "loss": 0.2503, "step": 28736 }, { "epoch": 0.5335229445684179, "grad_norm": 0.3803352415561676, "learning_rate": 8.94860570230751e-06, "loss": 0.4592, "step": 28738 }, { "epoch": 0.5335600747058364, "grad_norm": 0.3303178548812866, "learning_rate": 8.947445684085948e-06, "loss": 0.2805, "step": 28740 }, { "epoch": 0.5335972048432551, "grad_norm": 0.4031260907649994, "learning_rate": 8.9462856801865e-06, "loss": 0.4815, "step": 28742 }, { "epoch": 0.5336343349806738, "grad_norm": 0.2629953920841217, "learning_rate": 8.945125690624943e-06, "loss": 0.2595, "step": 28744 }, { "epoch": 0.5336714651180924, "grad_norm": 0.3868923485279083, "learning_rate": 8.943965715417065e-06, "loss": 0.2858, "step": 28746 }, { "epoch": 0.5337085952555111, "grad_norm": 0.4546166658401489, "learning_rate": 8.942805754578651e-06, "loss": 0.175, "step": 28748 }, { "epoch": 0.5337457253929296, "grad_norm": 0.3049778640270233, "learning_rate": 8.941645808125487e-06, "loss": 0.2588, "step": 28750 }, { "epoch": 0.5337828555303483, "grad_norm": 0.36252841353416443, "learning_rate": 8.94048587607335e-06, "loss": 0.2864, "step": 28752 }, { "epoch": 0.533819985667767, "grad_norm": 0.4173068702220917, "learning_rate": 8.939325958438033e-06, "loss": 0.3098, "step": 28754 }, { "epoch": 0.5338571158051856, "grad_norm": 0.5044727921485901, "learning_rate": 8.938166055235307e-06, "loss": 0.254, "step": 28756 }, { "epoch": 0.5338942459426043, "grad_norm": 0.21815672516822815, "learning_rate": 8.93700616648096e-06, "loss": 0.2525, "step": 28758 }, { "epoch": 0.5339313760800228, "grad_norm": 0.2718968093395233, "learning_rate": 8.935846292190775e-06, "loss": 0.3024, "step": 28760 }, { "epoch": 0.5339685062174415, "grad_norm": 0.38695719838142395, "learning_rate": 8.934686432380537e-06, "loss": 0.4865, "step": 28762 }, { "epoch": 0.5340056363548602, "grad_norm": 0.5715163350105286, "learning_rate": 8.933526587066023e-06, "loss": 0.3981, "step": 28764 }, { "epoch": 0.5340427664922788, "grad_norm": 0.49943724274635315, "learning_rate": 8.93236675626302e-06, "loss": 0.3209, "step": 28766 }, { "epoch": 0.5340798966296975, "grad_norm": 0.2775653600692749, "learning_rate": 8.931206939987308e-06, "loss": 0.2458, "step": 28768 }, { "epoch": 0.534117026767116, "grad_norm": 0.5197280645370483, "learning_rate": 8.930047138254667e-06, "loss": 0.2593, "step": 28770 }, { "epoch": 0.5341541569045347, "grad_norm": 0.405407190322876, "learning_rate": 8.92888735108088e-06, "loss": 0.2056, "step": 28772 }, { "epoch": 0.5341912870419533, "grad_norm": 0.2584144175052643, "learning_rate": 8.927727578481727e-06, "loss": 0.338, "step": 28774 }, { "epoch": 0.534228417179372, "grad_norm": 0.23247696459293365, "learning_rate": 8.926567820472989e-06, "loss": 0.2809, "step": 28776 }, { "epoch": 0.5342655473167907, "grad_norm": 0.1724630445241928, "learning_rate": 8.925408077070448e-06, "loss": 0.1288, "step": 28778 }, { "epoch": 0.5343026774542092, "grad_norm": 0.5522811412811279, "learning_rate": 8.924248348289888e-06, "loss": 0.2924, "step": 28780 }, { "epoch": 0.5343398075916279, "grad_norm": 0.5627481937408447, "learning_rate": 8.92308863414708e-06, "loss": 0.3517, "step": 28782 }, { "epoch": 0.5343769377290465, "grad_norm": 0.43672919273376465, "learning_rate": 8.921928934657814e-06, "loss": 0.1331, "step": 28784 }, { "epoch": 0.5344140678664652, "grad_norm": 0.28904980421066284, "learning_rate": 8.920769249837865e-06, "loss": 0.3959, "step": 28786 }, { "epoch": 0.5344511980038839, "grad_norm": 0.3805067837238312, "learning_rate": 8.919609579703013e-06, "loss": 0.1751, "step": 28788 }, { "epoch": 0.5344883281413024, "grad_norm": 0.41834399104118347, "learning_rate": 8.918449924269038e-06, "loss": 0.3534, "step": 28790 }, { "epoch": 0.5345254582787211, "grad_norm": 0.3008694052696228, "learning_rate": 8.917290283551724e-06, "loss": 0.4662, "step": 28792 }, { "epoch": 0.5345625884161397, "grad_norm": 0.35984915494918823, "learning_rate": 8.916130657566844e-06, "loss": 0.3961, "step": 28794 }, { "epoch": 0.5345997185535584, "grad_norm": 0.26578521728515625, "learning_rate": 8.914971046330177e-06, "loss": 0.2273, "step": 28796 }, { "epoch": 0.534636848690977, "grad_norm": 0.4290737807750702, "learning_rate": 8.913811449857505e-06, "loss": 0.3847, "step": 28798 }, { "epoch": 0.5346739788283956, "grad_norm": 0.30025798082351685, "learning_rate": 8.912651868164604e-06, "loss": 0.1242, "step": 28800 }, { "epoch": 0.5347111089658143, "grad_norm": 0.5194622278213501, "learning_rate": 8.911492301267257e-06, "loss": 0.2932, "step": 28802 }, { "epoch": 0.5347482391032329, "grad_norm": 0.3497410714626312, "learning_rate": 8.910332749181239e-06, "loss": 0.1945, "step": 28804 }, { "epoch": 0.5347853692406516, "grad_norm": 0.5166685581207275, "learning_rate": 8.909173211922331e-06, "loss": 0.3248, "step": 28806 }, { "epoch": 0.5348224993780702, "grad_norm": 0.6599903106689453, "learning_rate": 8.908013689506302e-06, "loss": 0.2456, "step": 28808 }, { "epoch": 0.5348596295154888, "grad_norm": 0.24852032959461212, "learning_rate": 8.906854181948939e-06, "loss": 0.1787, "step": 28810 }, { "epoch": 0.5348967596529075, "grad_norm": 0.5869738459587097, "learning_rate": 8.905694689266014e-06, "loss": 0.3927, "step": 28812 }, { "epoch": 0.5349338897903261, "grad_norm": 0.7700765132904053, "learning_rate": 8.90453521147331e-06, "loss": 0.3076, "step": 28814 }, { "epoch": 0.5349710199277448, "grad_norm": 0.3175654113292694, "learning_rate": 8.903375748586597e-06, "loss": 0.2964, "step": 28816 }, { "epoch": 0.5350081500651634, "grad_norm": 0.24191737174987793, "learning_rate": 8.902216300621662e-06, "loss": 0.452, "step": 28818 }, { "epoch": 0.535045280202582, "grad_norm": 0.4792058765888214, "learning_rate": 8.901056867594269e-06, "loss": 0.1772, "step": 28820 }, { "epoch": 0.5350824103400007, "grad_norm": 0.4530286192893982, "learning_rate": 8.8998974495202e-06, "loss": 0.2067, "step": 28822 }, { "epoch": 0.5351195404774193, "grad_norm": 0.3312215209007263, "learning_rate": 8.898738046415231e-06, "loss": 0.2798, "step": 28824 }, { "epoch": 0.535156670614838, "grad_norm": 0.4080938398838043, "learning_rate": 8.897578658295142e-06, "loss": 0.2407, "step": 28826 }, { "epoch": 0.5351938007522565, "grad_norm": 0.34011387825012207, "learning_rate": 8.896419285175704e-06, "loss": 0.2968, "step": 28828 }, { "epoch": 0.5352309308896752, "grad_norm": 0.345647394657135, "learning_rate": 8.895259927072698e-06, "loss": 0.3059, "step": 28830 }, { "epoch": 0.5352680610270939, "grad_norm": 0.5208058953285217, "learning_rate": 8.894100584001889e-06, "loss": 0.32, "step": 28832 }, { "epoch": 0.5353051911645125, "grad_norm": 0.3290981352329254, "learning_rate": 8.89294125597906e-06, "loss": 0.3565, "step": 28834 }, { "epoch": 0.5353423213019312, "grad_norm": 0.2666882872581482, "learning_rate": 8.891781943019986e-06, "loss": 0.2079, "step": 28836 }, { "epoch": 0.5353794514393497, "grad_norm": 0.35050931572914124, "learning_rate": 8.890622645140438e-06, "loss": 0.1663, "step": 28838 }, { "epoch": 0.5354165815767684, "grad_norm": 0.41442951560020447, "learning_rate": 8.889463362356193e-06, "loss": 0.2077, "step": 28840 }, { "epoch": 0.5354537117141871, "grad_norm": 0.3473535180091858, "learning_rate": 8.888304094683025e-06, "loss": 0.1456, "step": 28842 }, { "epoch": 0.5354908418516057, "grad_norm": 0.392761766910553, "learning_rate": 8.887144842136713e-06, "loss": 0.2245, "step": 28844 }, { "epoch": 0.5355279719890244, "grad_norm": 0.4616940915584564, "learning_rate": 8.885985604733021e-06, "loss": 0.348, "step": 28846 }, { "epoch": 0.5355651021264429, "grad_norm": 0.3946167826652527, "learning_rate": 8.884826382487732e-06, "loss": 0.1951, "step": 28848 }, { "epoch": 0.5356022322638616, "grad_norm": 0.3316513001918793, "learning_rate": 8.883667175416613e-06, "loss": 0.2824, "step": 28850 }, { "epoch": 0.5356393624012803, "grad_norm": 0.3715413510799408, "learning_rate": 8.882507983535438e-06, "loss": 0.2765, "step": 28852 }, { "epoch": 0.5356764925386989, "grad_norm": 0.33346349000930786, "learning_rate": 8.881348806859984e-06, "loss": 0.437, "step": 28854 }, { "epoch": 0.5357136226761176, "grad_norm": 0.6776406764984131, "learning_rate": 8.880189645406026e-06, "loss": 0.4656, "step": 28856 }, { "epoch": 0.5357507528135361, "grad_norm": 0.5393676161766052, "learning_rate": 8.879030499189331e-06, "loss": 0.3356, "step": 28858 }, { "epoch": 0.5357878829509548, "grad_norm": 0.3181859254837036, "learning_rate": 8.87787136822567e-06, "loss": 0.3038, "step": 28860 }, { "epoch": 0.5358250130883735, "grad_norm": 0.3831586539745331, "learning_rate": 8.876712252530819e-06, "loss": 0.3269, "step": 28862 }, { "epoch": 0.5358621432257921, "grad_norm": 0.3261096775531769, "learning_rate": 8.87555315212055e-06, "loss": 0.3043, "step": 28864 }, { "epoch": 0.5358992733632107, "grad_norm": 0.43998849391937256, "learning_rate": 8.874394067010636e-06, "loss": 0.3892, "step": 28866 }, { "epoch": 0.5359364035006293, "grad_norm": 0.32244300842285156, "learning_rate": 8.873234997216847e-06, "loss": 0.3034, "step": 28868 }, { "epoch": 0.535973533638048, "grad_norm": 0.670664370059967, "learning_rate": 8.872075942754956e-06, "loss": 0.4198, "step": 28870 }, { "epoch": 0.5360106637754666, "grad_norm": 0.3537195324897766, "learning_rate": 8.870916903640729e-06, "loss": 0.2529, "step": 28872 }, { "epoch": 0.5360477939128853, "grad_norm": 0.31270405650138855, "learning_rate": 8.869757879889941e-06, "loss": 0.1513, "step": 28874 }, { "epoch": 0.536084924050304, "grad_norm": 0.5249885320663452, "learning_rate": 8.868598871518365e-06, "loss": 0.2813, "step": 28876 }, { "epoch": 0.5361220541877225, "grad_norm": 0.2855727970600128, "learning_rate": 8.86743987854177e-06, "loss": 0.107, "step": 28878 }, { "epoch": 0.5361591843251412, "grad_norm": 0.3103468120098114, "learning_rate": 8.866280900975924e-06, "loss": 0.3253, "step": 28880 }, { "epoch": 0.5361963144625598, "grad_norm": 0.3224376440048218, "learning_rate": 8.865121938836602e-06, "loss": 0.2603, "step": 28882 }, { "epoch": 0.5362334445999785, "grad_norm": 0.3249346613883972, "learning_rate": 8.863962992139569e-06, "loss": 0.1916, "step": 28884 }, { "epoch": 0.5362705747373971, "grad_norm": 0.5392810702323914, "learning_rate": 8.862804060900597e-06, "loss": 0.2862, "step": 28886 }, { "epoch": 0.5363077048748157, "grad_norm": 0.337587833404541, "learning_rate": 8.861645145135456e-06, "loss": 0.4876, "step": 28888 }, { "epoch": 0.5363448350122344, "grad_norm": 0.24969573318958282, "learning_rate": 8.860486244859911e-06, "loss": 0.1564, "step": 28890 }, { "epoch": 0.536381965149653, "grad_norm": 0.4109410047531128, "learning_rate": 8.859327360089737e-06, "loss": 0.4201, "step": 28892 }, { "epoch": 0.5364190952870717, "grad_norm": 0.30118539929389954, "learning_rate": 8.858168490840706e-06, "loss": 0.3436, "step": 28894 }, { "epoch": 0.5364562254244903, "grad_norm": 0.22410225868225098, "learning_rate": 8.857009637128577e-06, "loss": 0.3657, "step": 28896 }, { "epoch": 0.5364933555619089, "grad_norm": 0.3994862139225006, "learning_rate": 8.855850798969122e-06, "loss": 0.4053, "step": 28898 }, { "epoch": 0.5365304856993276, "grad_norm": 0.2635518014431, "learning_rate": 8.854691976378114e-06, "loss": 0.3909, "step": 28900 }, { "epoch": 0.5365676158367462, "grad_norm": 0.4947318732738495, "learning_rate": 8.853533169371315e-06, "loss": 0.4727, "step": 28902 }, { "epoch": 0.5366047459741649, "grad_norm": 0.3866179287433624, "learning_rate": 8.852374377964496e-06, "loss": 0.2078, "step": 28904 }, { "epoch": 0.5366418761115835, "grad_norm": 0.3277641236782074, "learning_rate": 8.851215602173427e-06, "loss": 0.2229, "step": 28906 }, { "epoch": 0.5366790062490021, "grad_norm": 0.39967960119247437, "learning_rate": 8.85005684201387e-06, "loss": 0.1722, "step": 28908 }, { "epoch": 0.5367161363864208, "grad_norm": 0.3952726125717163, "learning_rate": 8.848898097501594e-06, "loss": 0.2345, "step": 28910 }, { "epoch": 0.5367532665238394, "grad_norm": 0.8256970643997192, "learning_rate": 8.847739368652368e-06, "loss": 0.3172, "step": 28912 }, { "epoch": 0.536790396661258, "grad_norm": 0.3216754198074341, "learning_rate": 8.846580655481958e-06, "loss": 0.4198, "step": 28914 }, { "epoch": 0.5368275267986767, "grad_norm": 0.7213714718818665, "learning_rate": 8.845421958006128e-06, "loss": 0.4123, "step": 28916 }, { "epoch": 0.5368646569360953, "grad_norm": 0.32979604601860046, "learning_rate": 8.844263276240653e-06, "loss": 0.1941, "step": 28918 }, { "epoch": 0.536901787073514, "grad_norm": 0.19757139682769775, "learning_rate": 8.843104610201288e-06, "loss": 0.2762, "step": 28920 }, { "epoch": 0.5369389172109326, "grad_norm": 0.44028541445732117, "learning_rate": 8.841945959903807e-06, "loss": 0.2592, "step": 28922 }, { "epoch": 0.5369760473483512, "grad_norm": 0.2324734926223755, "learning_rate": 8.840787325363969e-06, "loss": 0.239, "step": 28924 }, { "epoch": 0.5370131774857698, "grad_norm": 0.6083295941352844, "learning_rate": 8.839628706597543e-06, "loss": 0.1767, "step": 28926 }, { "epoch": 0.5370503076231885, "grad_norm": 0.47903871536254883, "learning_rate": 8.838470103620296e-06, "loss": 0.2226, "step": 28928 }, { "epoch": 0.5370874377606072, "grad_norm": 0.39700552821159363, "learning_rate": 8.837311516447993e-06, "loss": 0.2916, "step": 28930 }, { "epoch": 0.5371245678980258, "grad_norm": 0.2975066304206848, "learning_rate": 8.836152945096398e-06, "loss": 0.3395, "step": 28932 }, { "epoch": 0.5371616980354444, "grad_norm": 0.4705333411693573, "learning_rate": 8.834994389581275e-06, "loss": 0.3068, "step": 28934 }, { "epoch": 0.537198828172863, "grad_norm": 0.5135672688484192, "learning_rate": 8.833835849918388e-06, "loss": 0.4573, "step": 28936 }, { "epoch": 0.5372359583102817, "grad_norm": 0.2641277611255646, "learning_rate": 8.8326773261235e-06, "loss": 0.2299, "step": 28938 }, { "epoch": 0.5372730884477004, "grad_norm": 0.4112910330295563, "learning_rate": 8.831518818212378e-06, "loss": 0.2287, "step": 28940 }, { "epoch": 0.537310218585119, "grad_norm": 0.38870710134506226, "learning_rate": 8.830360326200787e-06, "loss": 0.5895, "step": 28942 }, { "epoch": 0.5373473487225376, "grad_norm": 0.22866074740886688, "learning_rate": 8.829201850104492e-06, "loss": 0.3707, "step": 28944 }, { "epoch": 0.5373844788599562, "grad_norm": 0.3247593343257904, "learning_rate": 8.828043389939246e-06, "loss": 0.4327, "step": 28946 }, { "epoch": 0.5374216089973749, "grad_norm": 0.4620649516582489, "learning_rate": 8.82688494572082e-06, "loss": 0.1971, "step": 28948 }, { "epoch": 0.5374587391347936, "grad_norm": 0.36376142501831055, "learning_rate": 8.825726517464976e-06, "loss": 0.4478, "step": 28950 }, { "epoch": 0.5374958692722122, "grad_norm": 0.5271807312965393, "learning_rate": 8.824568105187478e-06, "loss": 0.3627, "step": 28952 }, { "epoch": 0.5375329994096308, "grad_norm": 0.6430363655090332, "learning_rate": 8.823409708904087e-06, "loss": 0.2088, "step": 28954 }, { "epoch": 0.5375701295470494, "grad_norm": 0.34231269359588623, "learning_rate": 8.82225132863057e-06, "loss": 0.1291, "step": 28956 }, { "epoch": 0.5376072596844681, "grad_norm": 0.37590786814689636, "learning_rate": 8.82109296438268e-06, "loss": 0.3799, "step": 28958 }, { "epoch": 0.5376443898218868, "grad_norm": 0.36601924896240234, "learning_rate": 8.819934616176182e-06, "loss": 0.3098, "step": 28960 }, { "epoch": 0.5376815199593054, "grad_norm": 0.2647835910320282, "learning_rate": 8.81877628402684e-06, "loss": 0.3885, "step": 28962 }, { "epoch": 0.537718650096724, "grad_norm": 0.4027288258075714, "learning_rate": 8.817617967950417e-06, "loss": 0.0979, "step": 28964 }, { "epoch": 0.5377557802341426, "grad_norm": 0.3420749306678772, "learning_rate": 8.81645966796267e-06, "loss": 0.27, "step": 28966 }, { "epoch": 0.5377929103715613, "grad_norm": 0.48124876618385315, "learning_rate": 8.81530138407936e-06, "loss": 0.3065, "step": 28968 }, { "epoch": 0.53783004050898, "grad_norm": 0.42390236258506775, "learning_rate": 8.814143116316257e-06, "loss": 0.3103, "step": 28970 }, { "epoch": 0.5378671706463986, "grad_norm": 0.2607991695404053, "learning_rate": 8.812984864689107e-06, "loss": 0.2639, "step": 28972 }, { "epoch": 0.5379043007838172, "grad_norm": 0.3881067633628845, "learning_rate": 8.81182662921368e-06, "loss": 0.3044, "step": 28974 }, { "epoch": 0.5379414309212358, "grad_norm": 0.36070653796195984, "learning_rate": 8.810668409905733e-06, "loss": 0.2133, "step": 28976 }, { "epoch": 0.5379785610586545, "grad_norm": 0.3744252026081085, "learning_rate": 8.809510206781025e-06, "loss": 0.1141, "step": 28978 }, { "epoch": 0.5380156911960731, "grad_norm": 0.42860186100006104, "learning_rate": 8.808352019855317e-06, "loss": 0.31, "step": 28980 }, { "epoch": 0.5380528213334917, "grad_norm": 0.41899728775024414, "learning_rate": 8.807193849144374e-06, "loss": 0.1678, "step": 28982 }, { "epoch": 0.5380899514709104, "grad_norm": 0.24706198275089264, "learning_rate": 8.806035694663945e-06, "loss": 0.3168, "step": 28984 }, { "epoch": 0.538127081608329, "grad_norm": 0.34947726130485535, "learning_rate": 8.804877556429797e-06, "loss": 0.2965, "step": 28986 }, { "epoch": 0.5381642117457477, "grad_norm": 0.3506985008716583, "learning_rate": 8.803719434457683e-06, "loss": 0.3101, "step": 28988 }, { "epoch": 0.5382013418831663, "grad_norm": 0.38096871972084045, "learning_rate": 8.802561328763364e-06, "loss": 0.4228, "step": 28990 }, { "epoch": 0.538238472020585, "grad_norm": 0.3246283531188965, "learning_rate": 8.801403239362598e-06, "loss": 0.2242, "step": 28992 }, { "epoch": 0.5382756021580036, "grad_norm": 0.3986336588859558, "learning_rate": 8.800245166271149e-06, "loss": 0.2624, "step": 28994 }, { "epoch": 0.5383127322954222, "grad_norm": 0.35336074233055115, "learning_rate": 8.799087109504766e-06, "loss": 0.1392, "step": 28996 }, { "epoch": 0.5383498624328409, "grad_norm": 0.44116055965423584, "learning_rate": 8.79792906907921e-06, "loss": 0.147, "step": 28998 }, { "epoch": 0.5383869925702595, "grad_norm": 0.33933666348457336, "learning_rate": 8.796771045010237e-06, "loss": 0.2637, "step": 29000 }, { "epoch": 0.5384241227076781, "grad_norm": 0.3280552327632904, "learning_rate": 8.795613037313607e-06, "loss": 0.4042, "step": 29002 }, { "epoch": 0.5384612528450968, "grad_norm": 0.4678749442100525, "learning_rate": 8.794455046005079e-06, "loss": 0.3733, "step": 29004 }, { "epoch": 0.5384983829825154, "grad_norm": 0.23080037534236908, "learning_rate": 8.793297071100402e-06, "loss": 0.3163, "step": 29006 }, { "epoch": 0.5385355131199341, "grad_norm": 0.41035544872283936, "learning_rate": 8.792139112615345e-06, "loss": 0.4399, "step": 29008 }, { "epoch": 0.5385726432573527, "grad_norm": 0.38496923446655273, "learning_rate": 8.79098117056565e-06, "loss": 0.3785, "step": 29010 }, { "epoch": 0.5386097733947713, "grad_norm": 0.4847329258918762, "learning_rate": 8.789823244967081e-06, "loss": 0.3321, "step": 29012 }, { "epoch": 0.53864690353219, "grad_norm": 0.42741239070892334, "learning_rate": 8.788665335835391e-06, "loss": 0.2095, "step": 29014 }, { "epoch": 0.5386840336696086, "grad_norm": 0.2723098397254944, "learning_rate": 8.787507443186341e-06, "loss": 0.1508, "step": 29016 }, { "epoch": 0.5387211638070273, "grad_norm": 0.42734912037849426, "learning_rate": 8.786349567035681e-06, "loss": 0.3142, "step": 29018 }, { "epoch": 0.5387582939444459, "grad_norm": 0.3509759306907654, "learning_rate": 8.785191707399172e-06, "loss": 0.2006, "step": 29020 }, { "epoch": 0.5387954240818645, "grad_norm": 0.5491913557052612, "learning_rate": 8.784033864292561e-06, "loss": 0.3318, "step": 29022 }, { "epoch": 0.5388325542192831, "grad_norm": 0.4884089231491089, "learning_rate": 8.782876037731607e-06, "loss": 0.1724, "step": 29024 }, { "epoch": 0.5388696843567018, "grad_norm": 0.381264328956604, "learning_rate": 8.781718227732065e-06, "loss": 0.3064, "step": 29026 }, { "epoch": 0.5389068144941205, "grad_norm": 0.515300452709198, "learning_rate": 8.780560434309688e-06, "loss": 0.2855, "step": 29028 }, { "epoch": 0.538943944631539, "grad_norm": 0.19382821023464203, "learning_rate": 8.779402657480231e-06, "loss": 0.2689, "step": 29030 }, { "epoch": 0.5389810747689577, "grad_norm": 0.45632404088974, "learning_rate": 8.778244897259452e-06, "loss": 0.271, "step": 29032 }, { "epoch": 0.5390182049063763, "grad_norm": 0.8036473989486694, "learning_rate": 8.777087153663095e-06, "loss": 0.2716, "step": 29034 }, { "epoch": 0.539055335043795, "grad_norm": 0.49166810512542725, "learning_rate": 8.77592942670692e-06, "loss": 0.3119, "step": 29036 }, { "epoch": 0.5390924651812137, "grad_norm": 0.3464691936969757, "learning_rate": 8.77477171640668e-06, "loss": 0.2479, "step": 29038 }, { "epoch": 0.5391295953186322, "grad_norm": 0.39825841784477234, "learning_rate": 8.773614022778126e-06, "loss": 0.1964, "step": 29040 }, { "epoch": 0.5391667254560509, "grad_norm": 0.35477596521377563, "learning_rate": 8.77245634583701e-06, "loss": 0.5109, "step": 29042 }, { "epoch": 0.5392038555934695, "grad_norm": 0.3489423096179962, "learning_rate": 8.771298685599092e-06, "loss": 0.3363, "step": 29044 }, { "epoch": 0.5392409857308882, "grad_norm": 0.32201874256134033, "learning_rate": 8.770141042080115e-06, "loss": 0.3162, "step": 29046 }, { "epoch": 0.5392781158683069, "grad_norm": 0.3752102553844452, "learning_rate": 8.768983415295833e-06, "loss": 0.2798, "step": 29048 }, { "epoch": 0.5393152460057254, "grad_norm": 0.3636285066604614, "learning_rate": 8.767825805262001e-06, "loss": 0.2117, "step": 29050 }, { "epoch": 0.5393523761431441, "grad_norm": 0.3099227249622345, "learning_rate": 8.766668211994369e-06, "loss": 0.1141, "step": 29052 }, { "epoch": 0.5393895062805627, "grad_norm": 0.36934393644332886, "learning_rate": 8.765510635508686e-06, "loss": 0.3204, "step": 29054 }, { "epoch": 0.5394266364179814, "grad_norm": 0.33397069573402405, "learning_rate": 8.76435307582071e-06, "loss": 0.3264, "step": 29056 }, { "epoch": 0.5394637665554001, "grad_norm": 0.5294509530067444, "learning_rate": 8.763195532946185e-06, "loss": 0.2112, "step": 29058 }, { "epoch": 0.5395008966928186, "grad_norm": 0.43548229336738586, "learning_rate": 8.762038006900866e-06, "loss": 0.1291, "step": 29060 }, { "epoch": 0.5395380268302373, "grad_norm": 0.5218189358711243, "learning_rate": 8.7608804977005e-06, "loss": 0.3903, "step": 29062 }, { "epoch": 0.5395751569676559, "grad_norm": 0.39514532685279846, "learning_rate": 8.759723005360837e-06, "loss": 0.1726, "step": 29064 }, { "epoch": 0.5396122871050746, "grad_norm": 0.4090101718902588, "learning_rate": 8.758565529897629e-06, "loss": 0.3336, "step": 29066 }, { "epoch": 0.5396494172424933, "grad_norm": 0.41136986017227173, "learning_rate": 8.757408071326629e-06, "loss": 0.2609, "step": 29068 }, { "epoch": 0.5396865473799118, "grad_norm": 0.39628228545188904, "learning_rate": 8.756250629663582e-06, "loss": 0.1928, "step": 29070 }, { "epoch": 0.5397236775173305, "grad_norm": 0.2596226930618286, "learning_rate": 8.755093204924239e-06, "loss": 0.186, "step": 29072 }, { "epoch": 0.5397608076547491, "grad_norm": 0.412746787071228, "learning_rate": 8.753935797124346e-06, "loss": 0.2539, "step": 29074 }, { "epoch": 0.5397979377921678, "grad_norm": 0.26234158873558044, "learning_rate": 8.752778406279655e-06, "loss": 0.3241, "step": 29076 }, { "epoch": 0.5398350679295864, "grad_norm": 0.43193626403808594, "learning_rate": 8.751621032405915e-06, "loss": 0.3383, "step": 29078 }, { "epoch": 0.539872198067005, "grad_norm": 0.23748880624771118, "learning_rate": 8.750463675518873e-06, "loss": 0.2931, "step": 29080 }, { "epoch": 0.5399093282044237, "grad_norm": 0.274153470993042, "learning_rate": 8.749306335634282e-06, "loss": 0.3192, "step": 29082 }, { "epoch": 0.5399464583418423, "grad_norm": 0.2875758111476898, "learning_rate": 8.74814901276788e-06, "loss": 0.4605, "step": 29084 }, { "epoch": 0.539983588479261, "grad_norm": 0.5302455425262451, "learning_rate": 8.74699170693542e-06, "loss": 0.1804, "step": 29086 }, { "epoch": 0.5400207186166796, "grad_norm": 0.4128270745277405, "learning_rate": 8.745834418152651e-06, "loss": 0.2892, "step": 29088 }, { "epoch": 0.5400578487540982, "grad_norm": 0.44809195399284363, "learning_rate": 8.74467714643532e-06, "loss": 0.4741, "step": 29090 }, { "epoch": 0.5400949788915169, "grad_norm": 0.44445866346359253, "learning_rate": 8.743519891799171e-06, "loss": 0.2123, "step": 29092 }, { "epoch": 0.5401321090289355, "grad_norm": 0.48729515075683594, "learning_rate": 8.742362654259953e-06, "loss": 0.175, "step": 29094 }, { "epoch": 0.5401692391663542, "grad_norm": 0.34059056639671326, "learning_rate": 8.741205433833417e-06, "loss": 0.3175, "step": 29096 }, { "epoch": 0.5402063693037727, "grad_norm": 0.34828782081604004, "learning_rate": 8.740048230535298e-06, "loss": 0.2684, "step": 29098 }, { "epoch": 0.5402434994411914, "grad_norm": 0.2956511080265045, "learning_rate": 8.73889104438135e-06, "loss": 0.2526, "step": 29100 }, { "epoch": 0.5402806295786101, "grad_norm": 0.3331354856491089, "learning_rate": 8.73773387538732e-06, "loss": 0.1748, "step": 29102 }, { "epoch": 0.5403177597160287, "grad_norm": 0.33080562949180603, "learning_rate": 8.736576723568949e-06, "loss": 0.2284, "step": 29104 }, { "epoch": 0.5403548898534474, "grad_norm": 0.5027536153793335, "learning_rate": 8.735419588941984e-06, "loss": 0.4092, "step": 29106 }, { "epoch": 0.540392019990866, "grad_norm": 0.458329975605011, "learning_rate": 8.734262471522174e-06, "loss": 0.4297, "step": 29108 }, { "epoch": 0.5404291501282846, "grad_norm": 0.3305177092552185, "learning_rate": 8.733105371325256e-06, "loss": 0.2675, "step": 29110 }, { "epoch": 0.5404662802657033, "grad_norm": 0.4668492376804352, "learning_rate": 8.731948288366983e-06, "loss": 0.3893, "step": 29112 }, { "epoch": 0.5405034104031219, "grad_norm": 0.32844310998916626, "learning_rate": 8.730791222663093e-06, "loss": 0.1768, "step": 29114 }, { "epoch": 0.5405405405405406, "grad_norm": 0.3423122763633728, "learning_rate": 8.72963417422933e-06, "loss": 0.2147, "step": 29116 }, { "epoch": 0.5405776706779591, "grad_norm": 0.26020705699920654, "learning_rate": 8.728477143081444e-06, "loss": 0.2379, "step": 29118 }, { "epoch": 0.5406148008153778, "grad_norm": 0.4827598035335541, "learning_rate": 8.72732012923518e-06, "loss": 0.2814, "step": 29120 }, { "epoch": 0.5406519309527965, "grad_norm": 0.2928997576236725, "learning_rate": 8.726163132706271e-06, "loss": 0.3541, "step": 29122 }, { "epoch": 0.5406890610902151, "grad_norm": 0.5309603214263916, "learning_rate": 8.725006153510468e-06, "loss": 0.489, "step": 29124 }, { "epoch": 0.5407261912276338, "grad_norm": 0.4305154085159302, "learning_rate": 8.723849191663512e-06, "loss": 0.2963, "step": 29126 }, { "epoch": 0.5407633213650523, "grad_norm": 0.2677434980869293, "learning_rate": 8.722692247181143e-06, "loss": 0.2165, "step": 29128 }, { "epoch": 0.540800451502471, "grad_norm": 0.30531907081604004, "learning_rate": 8.721535320079109e-06, "loss": 0.2333, "step": 29130 }, { "epoch": 0.5408375816398896, "grad_norm": 0.40117356181144714, "learning_rate": 8.720378410373152e-06, "loss": 0.2355, "step": 29132 }, { "epoch": 0.5408747117773083, "grad_norm": 1.154480218887329, "learning_rate": 8.719221518079012e-06, "loss": 0.4209, "step": 29134 }, { "epoch": 0.540911841914727, "grad_norm": 0.36175766587257385, "learning_rate": 8.718064643212428e-06, "loss": 0.1844, "step": 29136 }, { "epoch": 0.5409489720521455, "grad_norm": 0.521953821182251, "learning_rate": 8.716907785789143e-06, "loss": 0.2785, "step": 29138 }, { "epoch": 0.5409861021895642, "grad_norm": 0.7608530521392822, "learning_rate": 8.7157509458249e-06, "loss": 0.3054, "step": 29140 }, { "epoch": 0.5410232323269828, "grad_norm": 0.32299312949180603, "learning_rate": 8.714594123335444e-06, "loss": 0.3696, "step": 29142 }, { "epoch": 0.5410603624644015, "grad_norm": 0.4030701518058777, "learning_rate": 8.71343731833651e-06, "loss": 0.3391, "step": 29144 }, { "epoch": 0.5410974926018202, "grad_norm": 0.4206581711769104, "learning_rate": 8.712280530843842e-06, "loss": 0.3013, "step": 29146 }, { "epoch": 0.5411346227392387, "grad_norm": 0.27141445875167847, "learning_rate": 8.711123760873176e-06, "loss": 0.3519, "step": 29148 }, { "epoch": 0.5411717528766574, "grad_norm": 0.33926326036453247, "learning_rate": 8.709967008440254e-06, "loss": 0.3592, "step": 29150 }, { "epoch": 0.541208883014076, "grad_norm": 0.32513123750686646, "learning_rate": 8.708810273560818e-06, "loss": 0.2045, "step": 29152 }, { "epoch": 0.5412460131514947, "grad_norm": 0.34170204401016235, "learning_rate": 8.707653556250608e-06, "loss": 0.3308, "step": 29154 }, { "epoch": 0.5412831432889134, "grad_norm": 0.2505717873573303, "learning_rate": 8.70649685652536e-06, "loss": 0.2412, "step": 29156 }, { "epoch": 0.5413202734263319, "grad_norm": 0.37244531512260437, "learning_rate": 8.70534017440082e-06, "loss": 0.2982, "step": 29158 }, { "epoch": 0.5413574035637506, "grad_norm": 0.36878788471221924, "learning_rate": 8.704183509892718e-06, "loss": 0.4244, "step": 29160 }, { "epoch": 0.5413945337011692, "grad_norm": 0.28823912143707275, "learning_rate": 8.703026863016797e-06, "loss": 0.2015, "step": 29162 }, { "epoch": 0.5414316638385879, "grad_norm": 0.39584553241729736, "learning_rate": 8.701870233788796e-06, "loss": 0.267, "step": 29164 }, { "epoch": 0.5414687939760066, "grad_norm": 0.31679457426071167, "learning_rate": 8.700713622224454e-06, "loss": 0.2615, "step": 29166 }, { "epoch": 0.5415059241134251, "grad_norm": 0.2750388979911804, "learning_rate": 8.699557028339504e-06, "loss": 0.3156, "step": 29168 }, { "epoch": 0.5415430542508438, "grad_norm": 0.358825147151947, "learning_rate": 8.698400452149692e-06, "loss": 0.5301, "step": 29170 }, { "epoch": 0.5415801843882624, "grad_norm": 0.38050150871276855, "learning_rate": 8.697243893670752e-06, "loss": 0.2253, "step": 29172 }, { "epoch": 0.5416173145256811, "grad_norm": 0.5608035922050476, "learning_rate": 8.696087352918416e-06, "loss": 0.2824, "step": 29174 }, { "epoch": 0.5416544446630996, "grad_norm": 0.2506846785545349, "learning_rate": 8.694930829908429e-06, "loss": 0.3267, "step": 29176 }, { "epoch": 0.5416915748005183, "grad_norm": 0.46021321415901184, "learning_rate": 8.693774324656523e-06, "loss": 0.2858, "step": 29178 }, { "epoch": 0.541728704937937, "grad_norm": 0.2649284601211548, "learning_rate": 8.692617837178432e-06, "loss": 0.2535, "step": 29180 }, { "epoch": 0.5417658350753556, "grad_norm": 0.4189710021018982, "learning_rate": 8.691461367489899e-06, "loss": 0.2883, "step": 29182 }, { "epoch": 0.5418029652127743, "grad_norm": 0.3972805142402649, "learning_rate": 8.690304915606663e-06, "loss": 0.206, "step": 29184 }, { "epoch": 0.5418400953501928, "grad_norm": 0.3949586749076843, "learning_rate": 8.689148481544447e-06, "loss": 0.2376, "step": 29186 }, { "epoch": 0.5418772254876115, "grad_norm": 0.30991628766059875, "learning_rate": 8.687992065318997e-06, "loss": 0.1523, "step": 29188 }, { "epoch": 0.5419143556250302, "grad_norm": 0.5625579357147217, "learning_rate": 8.686835666946043e-06, "loss": 0.4294, "step": 29190 }, { "epoch": 0.5419514857624488, "grad_norm": 0.4907143712043762, "learning_rate": 8.685679286441321e-06, "loss": 0.363, "step": 29192 }, { "epoch": 0.5419886158998675, "grad_norm": 0.3919946849346161, "learning_rate": 8.684522923820571e-06, "loss": 0.1879, "step": 29194 }, { "epoch": 0.542025746037286, "grad_norm": 0.41752931475639343, "learning_rate": 8.683366579099524e-06, "loss": 0.2567, "step": 29196 }, { "epoch": 0.5420628761747047, "grad_norm": 0.35558754205703735, "learning_rate": 8.682210252293912e-06, "loss": 0.1417, "step": 29198 }, { "epoch": 0.5421000063121234, "grad_norm": 0.4281615912914276, "learning_rate": 8.681053943419471e-06, "loss": 0.2674, "step": 29200 }, { "epoch": 0.542137136449542, "grad_norm": 0.29554855823516846, "learning_rate": 8.679897652491934e-06, "loss": 0.3941, "step": 29202 }, { "epoch": 0.5421742665869607, "grad_norm": 0.3773784637451172, "learning_rate": 8.678741379527036e-06, "loss": 0.4441, "step": 29204 }, { "epoch": 0.5422113967243792, "grad_norm": 0.2607143223285675, "learning_rate": 8.677585124540512e-06, "loss": 0.3643, "step": 29206 }, { "epoch": 0.5422485268617979, "grad_norm": 0.5972867608070374, "learning_rate": 8.676428887548094e-06, "loss": 0.2166, "step": 29208 }, { "epoch": 0.5422856569992166, "grad_norm": 0.37390974164009094, "learning_rate": 8.675272668565514e-06, "loss": 0.3442, "step": 29210 }, { "epoch": 0.5423227871366352, "grad_norm": 0.4715038537979126, "learning_rate": 8.674116467608502e-06, "loss": 0.2342, "step": 29212 }, { "epoch": 0.5423599172740539, "grad_norm": 0.3736487030982971, "learning_rate": 8.672960284692795e-06, "loss": 0.432, "step": 29214 }, { "epoch": 0.5423970474114724, "grad_norm": 0.2512434422969818, "learning_rate": 8.671804119834123e-06, "loss": 0.2563, "step": 29216 }, { "epoch": 0.5424341775488911, "grad_norm": 0.3261411190032959, "learning_rate": 8.67064797304822e-06, "loss": 0.4319, "step": 29218 }, { "epoch": 0.5424713076863098, "grad_norm": 0.4159064292907715, "learning_rate": 8.669491844350815e-06, "loss": 0.3327, "step": 29220 }, { "epoch": 0.5425084378237284, "grad_norm": 0.2847120463848114, "learning_rate": 8.668335733757644e-06, "loss": 0.2616, "step": 29222 }, { "epoch": 0.5425455679611471, "grad_norm": 0.3157494068145752, "learning_rate": 8.667179641284431e-06, "loss": 0.184, "step": 29224 }, { "epoch": 0.5425826980985656, "grad_norm": 0.30882105231285095, "learning_rate": 8.66602356694691e-06, "loss": 0.4316, "step": 29226 }, { "epoch": 0.5426198282359843, "grad_norm": 0.5159033536911011, "learning_rate": 8.664867510760817e-06, "loss": 0.431, "step": 29228 }, { "epoch": 0.5426569583734029, "grad_norm": 0.4435185194015503, "learning_rate": 8.663711472741873e-06, "loss": 0.2369, "step": 29230 }, { "epoch": 0.5426940885108216, "grad_norm": 0.2777823805809021, "learning_rate": 8.662555452905816e-06, "loss": 0.2603, "step": 29232 }, { "epoch": 0.5427312186482403, "grad_norm": 0.44630444049835205, "learning_rate": 8.661399451268377e-06, "loss": 0.4203, "step": 29234 }, { "epoch": 0.5427683487856588, "grad_norm": 0.3093295991420746, "learning_rate": 8.660243467845275e-06, "loss": 0.1436, "step": 29236 }, { "epoch": 0.5428054789230775, "grad_norm": 0.2716151475906372, "learning_rate": 8.659087502652249e-06, "loss": 0.1959, "step": 29238 }, { "epoch": 0.5428426090604961, "grad_norm": 0.28655266761779785, "learning_rate": 8.657931555705025e-06, "loss": 0.2633, "step": 29240 }, { "epoch": 0.5428797391979148, "grad_norm": 0.4227493107318878, "learning_rate": 8.656775627019334e-06, "loss": 0.2132, "step": 29242 }, { "epoch": 0.5429168693353335, "grad_norm": 0.3981489837169647, "learning_rate": 8.655619716610901e-06, "loss": 0.4335, "step": 29244 }, { "epoch": 0.542953999472752, "grad_norm": 0.2782026529312134, "learning_rate": 8.654463824495461e-06, "loss": 0.1388, "step": 29246 }, { "epoch": 0.5429911296101707, "grad_norm": 0.553490400314331, "learning_rate": 8.653307950688734e-06, "loss": 0.4726, "step": 29248 }, { "epoch": 0.5430282597475893, "grad_norm": 0.2811882793903351, "learning_rate": 8.652152095206454e-06, "loss": 0.1968, "step": 29250 }, { "epoch": 0.543065389885008, "grad_norm": 0.39556363224983215, "learning_rate": 8.650996258064345e-06, "loss": 0.3158, "step": 29252 }, { "epoch": 0.5431025200224266, "grad_norm": 0.3316425383090973, "learning_rate": 8.649840439278138e-06, "loss": 0.233, "step": 29254 }, { "epoch": 0.5431396501598452, "grad_norm": 0.36390629410743713, "learning_rate": 8.648684638863555e-06, "loss": 0.2459, "step": 29256 }, { "epoch": 0.5431767802972639, "grad_norm": 0.4033108055591583, "learning_rate": 8.647528856836332e-06, "loss": 0.3237, "step": 29258 }, { "epoch": 0.5432139104346825, "grad_norm": 0.5155245661735535, "learning_rate": 8.646373093212188e-06, "loss": 0.3359, "step": 29260 }, { "epoch": 0.5432510405721012, "grad_norm": 0.33235734701156616, "learning_rate": 8.645217348006852e-06, "loss": 0.3862, "step": 29262 }, { "epoch": 0.5432881707095198, "grad_norm": 0.4683685004711151, "learning_rate": 8.644061621236048e-06, "loss": 0.27, "step": 29264 }, { "epoch": 0.5433253008469384, "grad_norm": 0.3547971546649933, "learning_rate": 8.642905912915502e-06, "loss": 0.2596, "step": 29266 }, { "epoch": 0.5433624309843571, "grad_norm": 0.47313353419303894, "learning_rate": 8.641750223060944e-06, "loss": 0.3379, "step": 29268 }, { "epoch": 0.5433995611217757, "grad_norm": 0.20375724136829376, "learning_rate": 8.640594551688097e-06, "loss": 0.1693, "step": 29270 }, { "epoch": 0.5434366912591944, "grad_norm": 0.3484920561313629, "learning_rate": 8.639438898812691e-06, "loss": 0.3134, "step": 29272 }, { "epoch": 0.543473821396613, "grad_norm": 0.2577415108680725, "learning_rate": 8.63828326445044e-06, "loss": 0.2449, "step": 29274 }, { "epoch": 0.5435109515340316, "grad_norm": 0.30464401841163635, "learning_rate": 8.637127648617076e-06, "loss": 0.2087, "step": 29276 }, { "epoch": 0.5435480816714503, "grad_norm": 0.4083777368068695, "learning_rate": 8.635972051328321e-06, "loss": 0.3699, "step": 29278 }, { "epoch": 0.5435852118088689, "grad_norm": 0.32922303676605225, "learning_rate": 8.634816472599905e-06, "loss": 0.2812, "step": 29280 }, { "epoch": 0.5436223419462876, "grad_norm": 0.4418775141239166, "learning_rate": 8.633660912447544e-06, "loss": 0.2437, "step": 29282 }, { "epoch": 0.5436594720837061, "grad_norm": 0.4124562442302704, "learning_rate": 8.63250537088697e-06, "loss": 0.3922, "step": 29284 }, { "epoch": 0.5436966022211248, "grad_norm": 0.5106393098831177, "learning_rate": 8.631349847933896e-06, "loss": 0.3073, "step": 29286 }, { "epoch": 0.5437337323585435, "grad_norm": 0.3068910539150238, "learning_rate": 8.630194343604053e-06, "loss": 0.3269, "step": 29288 }, { "epoch": 0.5437708624959621, "grad_norm": 0.5096837282180786, "learning_rate": 8.62903885791316e-06, "loss": 0.3069, "step": 29290 }, { "epoch": 0.5438079926333808, "grad_norm": 0.34762489795684814, "learning_rate": 8.627883390876946e-06, "loss": 0.2773, "step": 29292 }, { "epoch": 0.5438451227707993, "grad_norm": 1.0538008213043213, "learning_rate": 8.626727942511127e-06, "loss": 0.3702, "step": 29294 }, { "epoch": 0.543882252908218, "grad_norm": 0.5484046339988708, "learning_rate": 8.625572512831425e-06, "loss": 0.1806, "step": 29296 }, { "epoch": 0.5439193830456367, "grad_norm": 0.30598756670951843, "learning_rate": 8.62441710185357e-06, "loss": 0.3188, "step": 29298 }, { "epoch": 0.5439565131830553, "grad_norm": 0.4106521010398865, "learning_rate": 8.623261709593274e-06, "loss": 0.266, "step": 29300 }, { "epoch": 0.543993643320474, "grad_norm": 0.5405019521713257, "learning_rate": 8.622106336066263e-06, "loss": 0.1675, "step": 29302 }, { "epoch": 0.5440307734578925, "grad_norm": 0.571747899055481, "learning_rate": 8.620950981288256e-06, "loss": 0.3958, "step": 29304 }, { "epoch": 0.5440679035953112, "grad_norm": 0.331687331199646, "learning_rate": 8.619795645274976e-06, "loss": 0.3091, "step": 29306 }, { "epoch": 0.5441050337327299, "grad_norm": 0.43747061491012573, "learning_rate": 8.618640328042142e-06, "loss": 0.3649, "step": 29308 }, { "epoch": 0.5441421638701485, "grad_norm": 0.4858039617538452, "learning_rate": 8.617485029605481e-06, "loss": 0.4178, "step": 29310 }, { "epoch": 0.5441792940075671, "grad_norm": 0.7736327052116394, "learning_rate": 8.616329749980703e-06, "loss": 0.3369, "step": 29312 }, { "epoch": 0.5442164241449857, "grad_norm": 0.49970731139183044, "learning_rate": 8.615174489183534e-06, "loss": 0.2086, "step": 29314 }, { "epoch": 0.5442535542824044, "grad_norm": 0.34400734305381775, "learning_rate": 8.61401924722969e-06, "loss": 0.5262, "step": 29316 }, { "epoch": 0.5442906844198231, "grad_norm": 0.36918163299560547, "learning_rate": 8.612864024134893e-06, "loss": 0.2009, "step": 29318 }, { "epoch": 0.5443278145572417, "grad_norm": 0.34294623136520386, "learning_rate": 8.611708819914862e-06, "loss": 0.4299, "step": 29320 }, { "epoch": 0.5443649446946603, "grad_norm": 0.6117779016494751, "learning_rate": 8.610553634585319e-06, "loss": 0.1814, "step": 29322 }, { "epoch": 0.5444020748320789, "grad_norm": 0.26201289892196655, "learning_rate": 8.609398468161976e-06, "loss": 0.3145, "step": 29324 }, { "epoch": 0.5444392049694976, "grad_norm": 0.37982288002967834, "learning_rate": 8.608243320660556e-06, "loss": 0.2892, "step": 29326 }, { "epoch": 0.5444763351069162, "grad_norm": 0.28766703605651855, "learning_rate": 8.607088192096772e-06, "loss": 0.2514, "step": 29328 }, { "epoch": 0.5445134652443349, "grad_norm": 0.42481979727745056, "learning_rate": 8.605933082486349e-06, "loss": 0.3786, "step": 29330 }, { "epoch": 0.5445505953817535, "grad_norm": 0.2438274323940277, "learning_rate": 8.604777991844998e-06, "loss": 0.0748, "step": 29332 }, { "epoch": 0.5445877255191721, "grad_norm": 0.43185245990753174, "learning_rate": 8.603622920188446e-06, "loss": 0.2974, "step": 29334 }, { "epoch": 0.5446248556565908, "grad_norm": 0.4894871711730957, "learning_rate": 8.602467867532399e-06, "loss": 0.3289, "step": 29336 }, { "epoch": 0.5446619857940094, "grad_norm": 0.3500831127166748, "learning_rate": 8.601312833892577e-06, "loss": 0.1911, "step": 29338 }, { "epoch": 0.5446991159314281, "grad_norm": 0.3648889660835266, "learning_rate": 8.600157819284699e-06, "loss": 0.3523, "step": 29340 }, { "epoch": 0.5447362460688467, "grad_norm": 0.5154054164886475, "learning_rate": 8.599002823724478e-06, "loss": 0.2669, "step": 29342 }, { "epoch": 0.5447733762062653, "grad_norm": 0.3284898102283478, "learning_rate": 8.597847847227636e-06, "loss": 0.2639, "step": 29344 }, { "epoch": 0.544810506343684, "grad_norm": 0.24722422659397125, "learning_rate": 8.596692889809881e-06, "loss": 0.2497, "step": 29346 }, { "epoch": 0.5448476364811026, "grad_norm": 0.41449519991874695, "learning_rate": 8.595537951486938e-06, "loss": 0.3893, "step": 29348 }, { "epoch": 0.5448847666185213, "grad_norm": 0.49547505378723145, "learning_rate": 8.594383032274512e-06, "loss": 0.2032, "step": 29350 }, { "epoch": 0.5449218967559399, "grad_norm": 0.3524686396121979, "learning_rate": 8.593228132188322e-06, "loss": 0.1248, "step": 29352 }, { "epoch": 0.5449590268933585, "grad_norm": 0.3160668611526489, "learning_rate": 8.592073251244083e-06, "loss": 0.2721, "step": 29354 }, { "epoch": 0.5449961570307772, "grad_norm": 0.6410817503929138, "learning_rate": 8.590918389457513e-06, "loss": 0.4092, "step": 29356 }, { "epoch": 0.5450332871681958, "grad_norm": 0.4196353256702423, "learning_rate": 8.58976354684432e-06, "loss": 0.2926, "step": 29358 }, { "epoch": 0.5450704173056145, "grad_norm": 0.3630804419517517, "learning_rate": 8.588608723420226e-06, "loss": 0.277, "step": 29360 }, { "epoch": 0.5451075474430331, "grad_norm": 0.38126394152641296, "learning_rate": 8.587453919200937e-06, "loss": 0.2567, "step": 29362 }, { "epoch": 0.5451446775804517, "grad_norm": 0.47089827060699463, "learning_rate": 8.586299134202165e-06, "loss": 0.2469, "step": 29364 }, { "epoch": 0.5451818077178704, "grad_norm": 0.4572823941707611, "learning_rate": 8.585144368439632e-06, "loss": 0.2357, "step": 29366 }, { "epoch": 0.545218937855289, "grad_norm": 0.3201158046722412, "learning_rate": 8.583989621929045e-06, "loss": 0.516, "step": 29368 }, { "epoch": 0.5452560679927076, "grad_norm": 0.25221237540245056, "learning_rate": 8.582834894686116e-06, "loss": 0.268, "step": 29370 }, { "epoch": 0.5452931981301263, "grad_norm": 0.4691847264766693, "learning_rate": 8.581680186726565e-06, "loss": 0.1847, "step": 29372 }, { "epoch": 0.5453303282675449, "grad_norm": 0.2895698547363281, "learning_rate": 8.580525498066092e-06, "loss": 0.3571, "step": 29374 }, { "epoch": 0.5453674584049636, "grad_norm": 0.2594881057739258, "learning_rate": 8.579370828720418e-06, "loss": 0.2436, "step": 29376 }, { "epoch": 0.5454045885423822, "grad_norm": 0.32218727469444275, "learning_rate": 8.57821617870525e-06, "loss": 0.3968, "step": 29378 }, { "epoch": 0.5454417186798008, "grad_norm": 0.48512133955955505, "learning_rate": 8.577061548036302e-06, "loss": 0.3378, "step": 29380 }, { "epoch": 0.5454788488172194, "grad_norm": 0.3078174591064453, "learning_rate": 8.575906936729283e-06, "loss": 0.1891, "step": 29382 }, { "epoch": 0.5455159789546381, "grad_norm": 0.3876544237136841, "learning_rate": 8.574752344799906e-06, "loss": 0.4891, "step": 29384 }, { "epoch": 0.5455531090920568, "grad_norm": 0.22093725204467773, "learning_rate": 8.573597772263884e-06, "loss": 0.1763, "step": 29386 }, { "epoch": 0.5455902392294754, "grad_norm": 0.45386743545532227, "learning_rate": 8.57244321913692e-06, "loss": 0.3071, "step": 29388 }, { "epoch": 0.545627369366894, "grad_norm": 0.4772425591945648, "learning_rate": 8.571288685434727e-06, "loss": 0.1918, "step": 29390 }, { "epoch": 0.5456644995043126, "grad_norm": 0.530746340751648, "learning_rate": 8.570134171173017e-06, "loss": 0.2592, "step": 29392 }, { "epoch": 0.5457016296417313, "grad_norm": 0.430203914642334, "learning_rate": 8.568979676367495e-06, "loss": 0.3222, "step": 29394 }, { "epoch": 0.54573875977915, "grad_norm": 0.3285220265388489, "learning_rate": 8.567825201033878e-06, "loss": 0.2714, "step": 29396 }, { "epoch": 0.5457758899165686, "grad_norm": 0.63829106092453, "learning_rate": 8.566670745187869e-06, "loss": 0.4461, "step": 29398 }, { "epoch": 0.5458130200539872, "grad_norm": 0.4500024616718292, "learning_rate": 8.565516308845179e-06, "loss": 0.4219, "step": 29400 }, { "epoch": 0.5458501501914058, "grad_norm": 0.5143587589263916, "learning_rate": 8.56436189202151e-06, "loss": 0.2334, "step": 29402 }, { "epoch": 0.5458872803288245, "grad_norm": 0.6377624869346619, "learning_rate": 8.56320749473258e-06, "loss": 0.3156, "step": 29404 }, { "epoch": 0.5459244104662432, "grad_norm": 0.25424185395240784, "learning_rate": 8.562053116994088e-06, "loss": 0.2873, "step": 29406 }, { "epoch": 0.5459615406036618, "grad_norm": 0.2730737626552582, "learning_rate": 8.560898758821751e-06, "loss": 0.2995, "step": 29408 }, { "epoch": 0.5459986707410804, "grad_norm": 0.4875289499759674, "learning_rate": 8.559744420231272e-06, "loss": 0.2175, "step": 29410 }, { "epoch": 0.546035800878499, "grad_norm": 0.3255956172943115, "learning_rate": 8.558590101238353e-06, "loss": 0.3691, "step": 29412 }, { "epoch": 0.5460729310159177, "grad_norm": 0.3296992778778076, "learning_rate": 8.557435801858706e-06, "loss": 0.1409, "step": 29414 }, { "epoch": 0.5461100611533364, "grad_norm": 0.34794560074806213, "learning_rate": 8.556281522108037e-06, "loss": 0.3194, "step": 29416 }, { "epoch": 0.546147191290755, "grad_norm": 0.4201442003250122, "learning_rate": 8.555127262002054e-06, "loss": 0.3529, "step": 29418 }, { "epoch": 0.5461843214281736, "grad_norm": 0.3547205626964569, "learning_rate": 8.553973021556457e-06, "loss": 0.5032, "step": 29420 }, { "epoch": 0.5462214515655922, "grad_norm": 0.3350699245929718, "learning_rate": 8.552818800786957e-06, "loss": 0.1418, "step": 29422 }, { "epoch": 0.5462585817030109, "grad_norm": 0.43205714225769043, "learning_rate": 8.551664599709264e-06, "loss": 0.2846, "step": 29424 }, { "epoch": 0.5462957118404296, "grad_norm": 0.26601508259773254, "learning_rate": 8.550510418339072e-06, "loss": 0.2907, "step": 29426 }, { "epoch": 0.5463328419778481, "grad_norm": 0.47079217433929443, "learning_rate": 8.549356256692092e-06, "loss": 0.2974, "step": 29428 }, { "epoch": 0.5463699721152668, "grad_norm": 0.3658428192138672, "learning_rate": 8.54820211478403e-06, "loss": 0.2326, "step": 29430 }, { "epoch": 0.5464071022526854, "grad_norm": 0.2766571044921875, "learning_rate": 8.547047992630586e-06, "loss": 0.4282, "step": 29432 }, { "epoch": 0.5464442323901041, "grad_norm": 0.4139076769351959, "learning_rate": 8.545893890247467e-06, "loss": 0.2459, "step": 29434 }, { "epoch": 0.5464813625275227, "grad_norm": 0.408913254737854, "learning_rate": 8.54473980765038e-06, "loss": 0.3762, "step": 29436 }, { "epoch": 0.5465184926649413, "grad_norm": 0.580410361289978, "learning_rate": 8.543585744855024e-06, "loss": 0.2703, "step": 29438 }, { "epoch": 0.54655562280236, "grad_norm": 0.5737597942352295, "learning_rate": 8.5424317018771e-06, "loss": 0.2995, "step": 29440 }, { "epoch": 0.5465927529397786, "grad_norm": 0.4350390136241913, "learning_rate": 8.541277678732318e-06, "loss": 0.2394, "step": 29442 }, { "epoch": 0.5466298830771973, "grad_norm": 0.3459354341030121, "learning_rate": 8.540123675436375e-06, "loss": 0.4399, "step": 29444 }, { "epoch": 0.5466670132146159, "grad_norm": 0.20533688366413116, "learning_rate": 8.538969692004977e-06, "loss": 0.2288, "step": 29446 }, { "epoch": 0.5467041433520345, "grad_norm": 0.5552954077720642, "learning_rate": 8.53781572845383e-06, "loss": 0.3677, "step": 29448 }, { "epoch": 0.5467412734894532, "grad_norm": 0.42814841866493225, "learning_rate": 8.536661784798625e-06, "loss": 0.2172, "step": 29450 }, { "epoch": 0.5467784036268718, "grad_norm": 0.4103059768676758, "learning_rate": 8.535507861055072e-06, "loss": 0.2807, "step": 29452 }, { "epoch": 0.5468155337642905, "grad_norm": 0.3233448266983032, "learning_rate": 8.53435395723887e-06, "loss": 0.1688, "step": 29454 }, { "epoch": 0.5468526639017091, "grad_norm": 0.30224910378456116, "learning_rate": 8.53320007336572e-06, "loss": 0.3671, "step": 29456 }, { "epoch": 0.5468897940391277, "grad_norm": 0.36225661635398865, "learning_rate": 8.532046209451322e-06, "loss": 0.2667, "step": 29458 }, { "epoch": 0.5469269241765464, "grad_norm": 0.2656540274620056, "learning_rate": 8.530892365511381e-06, "loss": 0.2337, "step": 29460 }, { "epoch": 0.546964054313965, "grad_norm": 0.6690658330917358, "learning_rate": 8.529738541561595e-06, "loss": 0.2246, "step": 29462 }, { "epoch": 0.5470011844513837, "grad_norm": 0.38893425464630127, "learning_rate": 8.528584737617664e-06, "loss": 0.215, "step": 29464 }, { "epoch": 0.5470383145888023, "grad_norm": 0.27084463834762573, "learning_rate": 8.527430953695284e-06, "loss": 0.307, "step": 29466 }, { "epoch": 0.5470754447262209, "grad_norm": 0.332878977060318, "learning_rate": 8.526277189810157e-06, "loss": 0.2087, "step": 29468 }, { "epoch": 0.5471125748636396, "grad_norm": 0.2772464454174042, "learning_rate": 8.525123445977985e-06, "loss": 0.1552, "step": 29470 }, { "epoch": 0.5471497050010582, "grad_norm": 2.5414254665374756, "learning_rate": 8.523969722214467e-06, "loss": 0.2646, "step": 29472 }, { "epoch": 0.5471868351384769, "grad_norm": 0.45054513216018677, "learning_rate": 8.522816018535301e-06, "loss": 0.2148, "step": 29474 }, { "epoch": 0.5472239652758955, "grad_norm": 0.42739564180374146, "learning_rate": 8.52166233495618e-06, "loss": 0.3996, "step": 29476 }, { "epoch": 0.5472610954133141, "grad_norm": 0.5109399557113647, "learning_rate": 8.520508671492807e-06, "loss": 0.1984, "step": 29478 }, { "epoch": 0.5472982255507327, "grad_norm": 0.41114407777786255, "learning_rate": 8.519355028160881e-06, "loss": 0.2378, "step": 29480 }, { "epoch": 0.5473353556881514, "grad_norm": 0.41081589460372925, "learning_rate": 8.518201404976097e-06, "loss": 0.2549, "step": 29482 }, { "epoch": 0.5473724858255701, "grad_norm": 0.2856124937534332, "learning_rate": 8.517047801954154e-06, "loss": 0.3273, "step": 29484 }, { "epoch": 0.5474096159629886, "grad_norm": 0.39343586564064026, "learning_rate": 8.515894219110752e-06, "loss": 0.2401, "step": 29486 }, { "epoch": 0.5474467461004073, "grad_norm": 0.32764309644699097, "learning_rate": 8.514740656461579e-06, "loss": 0.2299, "step": 29488 }, { "epoch": 0.5474838762378259, "grad_norm": 0.2899824380874634, "learning_rate": 8.513587114022338e-06, "loss": 0.1612, "step": 29490 }, { "epoch": 0.5475210063752446, "grad_norm": 0.4124244749546051, "learning_rate": 8.512433591808724e-06, "loss": 0.3067, "step": 29492 }, { "epoch": 0.5475581365126633, "grad_norm": 0.29867270588874817, "learning_rate": 8.511280089836433e-06, "loss": 0.1515, "step": 29494 }, { "epoch": 0.5475952666500818, "grad_norm": 0.30294597148895264, "learning_rate": 8.510126608121161e-06, "loss": 0.4286, "step": 29496 }, { "epoch": 0.5476323967875005, "grad_norm": 0.5003699064254761, "learning_rate": 8.508973146678605e-06, "loss": 0.1303, "step": 29498 }, { "epoch": 0.5476695269249191, "grad_norm": 0.6814979314804077, "learning_rate": 8.507819705524457e-06, "loss": 0.3632, "step": 29500 }, { "epoch": 0.5477066570623378, "grad_norm": 0.4204842746257782, "learning_rate": 8.506666284674412e-06, "loss": 0.3272, "step": 29502 }, { "epoch": 0.5477437871997565, "grad_norm": 0.19407132267951965, "learning_rate": 8.505512884144167e-06, "loss": 0.2306, "step": 29504 }, { "epoch": 0.547780917337175, "grad_norm": 0.5600400567054749, "learning_rate": 8.504359503949415e-06, "loss": 0.5105, "step": 29506 }, { "epoch": 0.5478180474745937, "grad_norm": 0.27770617604255676, "learning_rate": 8.503206144105849e-06, "loss": 0.2642, "step": 29508 }, { "epoch": 0.5478551776120123, "grad_norm": 0.39783868193626404, "learning_rate": 8.502052804629163e-06, "loss": 0.2691, "step": 29510 }, { "epoch": 0.547892307749431, "grad_norm": 0.4914141595363617, "learning_rate": 8.500899485535058e-06, "loss": 0.3238, "step": 29512 }, { "epoch": 0.5479294378868497, "grad_norm": 0.5469263195991516, "learning_rate": 8.499746186839215e-06, "loss": 0.3548, "step": 29514 }, { "epoch": 0.5479665680242682, "grad_norm": 0.3003077507019043, "learning_rate": 8.498592908557334e-06, "loss": 0.1714, "step": 29516 }, { "epoch": 0.5480036981616869, "grad_norm": 0.32447564601898193, "learning_rate": 8.497439650705106e-06, "loss": 0.2247, "step": 29518 }, { "epoch": 0.5480408282991055, "grad_norm": 0.4069264829158783, "learning_rate": 8.496286413298223e-06, "loss": 0.2087, "step": 29520 }, { "epoch": 0.5480779584365242, "grad_norm": 0.3681446611881256, "learning_rate": 8.495133196352377e-06, "loss": 0.1478, "step": 29522 }, { "epoch": 0.5481150885739429, "grad_norm": 0.30589357018470764, "learning_rate": 8.493979999883266e-06, "loss": 0.3183, "step": 29524 }, { "epoch": 0.5481522187113614, "grad_norm": 0.2754327356815338, "learning_rate": 8.492826823906573e-06, "loss": 0.3515, "step": 29526 }, { "epoch": 0.5481893488487801, "grad_norm": 0.3715721666812897, "learning_rate": 8.491673668437992e-06, "loss": 0.2342, "step": 29528 }, { "epoch": 0.5482264789861987, "grad_norm": 0.15907639265060425, "learning_rate": 8.490520533493211e-06, "loss": 0.0624, "step": 29530 }, { "epoch": 0.5482636091236174, "grad_norm": 0.3349675238132477, "learning_rate": 8.489367419087926e-06, "loss": 0.3386, "step": 29532 }, { "epoch": 0.548300739261036, "grad_norm": 0.5463942289352417, "learning_rate": 8.488214325237829e-06, "loss": 0.2161, "step": 29534 }, { "epoch": 0.5483378693984546, "grad_norm": 0.3647940456867218, "learning_rate": 8.487061251958606e-06, "loss": 0.4235, "step": 29536 }, { "epoch": 0.5483749995358733, "grad_norm": 0.23527753353118896, "learning_rate": 8.485908199265947e-06, "loss": 0.2033, "step": 29538 }, { "epoch": 0.5484121296732919, "grad_norm": 0.3834899961948395, "learning_rate": 8.484755167175541e-06, "loss": 0.3793, "step": 29540 }, { "epoch": 0.5484492598107106, "grad_norm": 0.39678052067756653, "learning_rate": 8.483602155703079e-06, "loss": 0.3552, "step": 29542 }, { "epoch": 0.5484863899481291, "grad_norm": 0.37919747829437256, "learning_rate": 8.482449164864248e-06, "loss": 0.2535, "step": 29544 }, { "epoch": 0.5485235200855478, "grad_norm": 0.5572365522384644, "learning_rate": 8.48129619467474e-06, "loss": 0.2346, "step": 29546 }, { "epoch": 0.5485606502229665, "grad_norm": 0.3966858386993408, "learning_rate": 8.48014324515024e-06, "loss": 0.4016, "step": 29548 }, { "epoch": 0.5485977803603851, "grad_norm": 0.26546618342399597, "learning_rate": 8.478990316306442e-06, "loss": 0.209, "step": 29550 }, { "epoch": 0.5486349104978038, "grad_norm": 0.47832462191581726, "learning_rate": 8.477837408159026e-06, "loss": 0.3201, "step": 29552 }, { "epoch": 0.5486720406352223, "grad_norm": 0.25968796014785767, "learning_rate": 8.476684520723683e-06, "loss": 0.2261, "step": 29554 }, { "epoch": 0.548709170772641, "grad_norm": 0.4551369249820709, "learning_rate": 8.475531654016104e-06, "loss": 0.2591, "step": 29556 }, { "epoch": 0.5487463009100597, "grad_norm": 0.2256239950656891, "learning_rate": 8.47437880805197e-06, "loss": 0.254, "step": 29558 }, { "epoch": 0.5487834310474783, "grad_norm": 0.38865092396736145, "learning_rate": 8.473225982846971e-06, "loss": 0.471, "step": 29560 }, { "epoch": 0.548820561184897, "grad_norm": 0.38139957189559937, "learning_rate": 8.472073178416798e-06, "loss": 0.2729, "step": 29562 }, { "epoch": 0.5488576913223155, "grad_norm": 0.21085870265960693, "learning_rate": 8.470920394777127e-06, "loss": 0.1975, "step": 29564 }, { "epoch": 0.5488948214597342, "grad_norm": 0.3681356906890869, "learning_rate": 8.469767631943651e-06, "loss": 0.3372, "step": 29566 }, { "epoch": 0.5489319515971529, "grad_norm": 0.2004367709159851, "learning_rate": 8.468614889932055e-06, "loss": 0.391, "step": 29568 }, { "epoch": 0.5489690817345715, "grad_norm": 0.3810936212539673, "learning_rate": 8.46746216875802e-06, "loss": 0.3781, "step": 29570 }, { "epoch": 0.5490062118719902, "grad_norm": 0.5741381049156189, "learning_rate": 8.466309468437235e-06, "loss": 0.26, "step": 29572 }, { "epoch": 0.5490433420094087, "grad_norm": 0.48123329877853394, "learning_rate": 8.46515678898539e-06, "loss": 0.3651, "step": 29574 }, { "epoch": 0.5490804721468274, "grad_norm": 0.4998578429222107, "learning_rate": 8.464004130418157e-06, "loss": 0.2832, "step": 29576 }, { "epoch": 0.5491176022842461, "grad_norm": 0.48362430930137634, "learning_rate": 8.46285149275123e-06, "loss": 0.2779, "step": 29578 }, { "epoch": 0.5491547324216647, "grad_norm": 0.4045029878616333, "learning_rate": 8.46169887600029e-06, "loss": 0.3614, "step": 29580 }, { "epoch": 0.5491918625590834, "grad_norm": 0.3947937488555908, "learning_rate": 8.460546280181018e-06, "loss": 0.28, "step": 29582 }, { "epoch": 0.5492289926965019, "grad_norm": 0.3645005226135254, "learning_rate": 8.4593937053091e-06, "loss": 0.3471, "step": 29584 }, { "epoch": 0.5492661228339206, "grad_norm": 0.6296367049217224, "learning_rate": 8.458241151400223e-06, "loss": 0.3591, "step": 29586 }, { "epoch": 0.5493032529713392, "grad_norm": 0.34151867032051086, "learning_rate": 8.457088618470064e-06, "loss": 0.2352, "step": 29588 }, { "epoch": 0.5493403831087579, "grad_norm": 0.4820873737335205, "learning_rate": 8.455936106534308e-06, "loss": 0.3794, "step": 29590 }, { "epoch": 0.5493775132461766, "grad_norm": 0.4958608150482178, "learning_rate": 8.454783615608634e-06, "loss": 0.2119, "step": 29592 }, { "epoch": 0.5494146433835951, "grad_norm": 0.46441322565078735, "learning_rate": 8.453631145708726e-06, "loss": 0.383, "step": 29594 }, { "epoch": 0.5494517735210138, "grad_norm": 0.354354590177536, "learning_rate": 8.452478696850268e-06, "loss": 0.2379, "step": 29596 }, { "epoch": 0.5494889036584324, "grad_norm": 0.410057932138443, "learning_rate": 8.451326269048941e-06, "loss": 0.2988, "step": 29598 }, { "epoch": 0.5495260337958511, "grad_norm": 0.18379488587379456, "learning_rate": 8.450173862320425e-06, "loss": 0.396, "step": 29600 }, { "epoch": 0.5495631639332698, "grad_norm": 0.34139513969421387, "learning_rate": 8.4490214766804e-06, "loss": 0.2417, "step": 29602 }, { "epoch": 0.5496002940706883, "grad_norm": 0.535956084728241, "learning_rate": 8.447869112144544e-06, "loss": 0.3587, "step": 29604 }, { "epoch": 0.549637424208107, "grad_norm": 0.45256224274635315, "learning_rate": 8.446716768728543e-06, "loss": 0.2962, "step": 29606 }, { "epoch": 0.5496745543455256, "grad_norm": 0.4366180896759033, "learning_rate": 8.445564446448072e-06, "loss": 0.2863, "step": 29608 }, { "epoch": 0.5497116844829443, "grad_norm": 0.4836982786655426, "learning_rate": 8.444412145318815e-06, "loss": 0.1852, "step": 29610 }, { "epoch": 0.549748814620363, "grad_norm": 0.5191369652748108, "learning_rate": 8.44325986535645e-06, "loss": 0.3641, "step": 29612 }, { "epoch": 0.5497859447577815, "grad_norm": 0.3350653350353241, "learning_rate": 8.442107606576652e-06, "loss": 0.4311, "step": 29614 }, { "epoch": 0.5498230748952002, "grad_norm": 0.5967183709144592, "learning_rate": 8.440955368995105e-06, "loss": 0.3083, "step": 29616 }, { "epoch": 0.5498602050326188, "grad_norm": 0.39591431617736816, "learning_rate": 8.439803152627483e-06, "loss": 0.3851, "step": 29618 }, { "epoch": 0.5498973351700375, "grad_norm": 0.37901535630226135, "learning_rate": 8.43865095748947e-06, "loss": 0.345, "step": 29620 }, { "epoch": 0.5499344653074562, "grad_norm": 0.3170936405658722, "learning_rate": 8.437498783596739e-06, "loss": 0.2525, "step": 29622 }, { "epoch": 0.5499715954448747, "grad_norm": 0.33359119296073914, "learning_rate": 8.436346630964973e-06, "loss": 0.2222, "step": 29624 }, { "epoch": 0.5500087255822934, "grad_norm": 0.39624372124671936, "learning_rate": 8.435194499609841e-06, "loss": 0.235, "step": 29626 }, { "epoch": 0.550045855719712, "grad_norm": 0.29178348183631897, "learning_rate": 8.434042389547026e-06, "loss": 0.2949, "step": 29628 }, { "epoch": 0.5500829858571307, "grad_norm": 0.4446309804916382, "learning_rate": 8.432890300792202e-06, "loss": 0.2422, "step": 29630 }, { "epoch": 0.5501201159945492, "grad_norm": 0.34932273626327515, "learning_rate": 8.431738233361051e-06, "loss": 0.2506, "step": 29632 }, { "epoch": 0.5501572461319679, "grad_norm": 0.30129197239875793, "learning_rate": 8.43058618726924e-06, "loss": 0.4619, "step": 29634 }, { "epoch": 0.5501943762693866, "grad_norm": 0.4278201758861542, "learning_rate": 8.429434162532454e-06, "loss": 0.2816, "step": 29636 }, { "epoch": 0.5502315064068052, "grad_norm": 0.5870686173439026, "learning_rate": 8.428282159166365e-06, "loss": 0.2263, "step": 29638 }, { "epoch": 0.5502686365442239, "grad_norm": 0.4553792476654053, "learning_rate": 8.427130177186646e-06, "loss": 0.3765, "step": 29640 }, { "epoch": 0.5503057666816424, "grad_norm": 0.37603169679641724, "learning_rate": 8.425978216608976e-06, "loss": 0.3398, "step": 29642 }, { "epoch": 0.5503428968190611, "grad_norm": 0.474406898021698, "learning_rate": 8.424826277449025e-06, "loss": 0.2323, "step": 29644 }, { "epoch": 0.5503800269564798, "grad_norm": 0.2842349708080292, "learning_rate": 8.423674359722471e-06, "loss": 0.1291, "step": 29646 }, { "epoch": 0.5504171570938984, "grad_norm": 0.2941468358039856, "learning_rate": 8.422522463444986e-06, "loss": 0.2435, "step": 29648 }, { "epoch": 0.5504542872313171, "grad_norm": 0.45396292209625244, "learning_rate": 8.42137058863225e-06, "loss": 0.3079, "step": 29650 }, { "epoch": 0.5504914173687356, "grad_norm": 0.36189764738082886, "learning_rate": 8.420218735299929e-06, "loss": 0.3917, "step": 29652 }, { "epoch": 0.5505285475061543, "grad_norm": 0.3167206346988678, "learning_rate": 8.4190669034637e-06, "loss": 0.1783, "step": 29654 }, { "epoch": 0.550565677643573, "grad_norm": 0.40876254439353943, "learning_rate": 8.417915093139232e-06, "loss": 0.4082, "step": 29656 }, { "epoch": 0.5506028077809916, "grad_norm": 0.7512531280517578, "learning_rate": 8.416763304342202e-06, "loss": 0.3855, "step": 29658 }, { "epoch": 0.5506399379184103, "grad_norm": 0.40257060527801514, "learning_rate": 8.415611537088279e-06, "loss": 0.4172, "step": 29660 }, { "epoch": 0.5506770680558288, "grad_norm": 0.35208678245544434, "learning_rate": 8.414459791393144e-06, "loss": 0.2877, "step": 29662 }, { "epoch": 0.5507141981932475, "grad_norm": 0.4353802502155304, "learning_rate": 8.41330806727246e-06, "loss": 0.2873, "step": 29664 }, { "epoch": 0.5507513283306662, "grad_norm": 0.5050287246704102, "learning_rate": 8.412156364741896e-06, "loss": 0.3522, "step": 29666 }, { "epoch": 0.5507884584680848, "grad_norm": 0.2604857087135315, "learning_rate": 8.411004683817129e-06, "loss": 0.2177, "step": 29668 }, { "epoch": 0.5508255886055035, "grad_norm": 0.3294623792171478, "learning_rate": 8.409853024513828e-06, "loss": 0.192, "step": 29670 }, { "epoch": 0.550862718742922, "grad_norm": 0.5258513689041138, "learning_rate": 8.408701386847668e-06, "loss": 0.3332, "step": 29672 }, { "epoch": 0.5508998488803407, "grad_norm": 0.2859926223754883, "learning_rate": 8.407549770834312e-06, "loss": 0.2652, "step": 29674 }, { "epoch": 0.5509369790177594, "grad_norm": 0.4033631682395935, "learning_rate": 8.40639817648944e-06, "loss": 0.3011, "step": 29676 }, { "epoch": 0.550974109155178, "grad_norm": 0.17057602107524872, "learning_rate": 8.405246603828707e-06, "loss": 0.2408, "step": 29678 }, { "epoch": 0.5510112392925967, "grad_norm": 0.4241195321083069, "learning_rate": 8.404095052867793e-06, "loss": 0.2024, "step": 29680 }, { "epoch": 0.5510483694300152, "grad_norm": 0.2661696970462799, "learning_rate": 8.402943523622366e-06, "loss": 0.1341, "step": 29682 }, { "epoch": 0.5510854995674339, "grad_norm": 0.2898448705673218, "learning_rate": 8.401792016108096e-06, "loss": 0.2322, "step": 29684 }, { "epoch": 0.5511226297048525, "grad_norm": 0.4003200829029083, "learning_rate": 8.400640530340647e-06, "loss": 0.2691, "step": 29686 }, { "epoch": 0.5511597598422712, "grad_norm": 0.2662317156791687, "learning_rate": 8.399489066335695e-06, "loss": 0.173, "step": 29688 }, { "epoch": 0.5511968899796899, "grad_norm": 0.45847034454345703, "learning_rate": 8.398337624108897e-06, "loss": 0.1946, "step": 29690 }, { "epoch": 0.5512340201171084, "grad_norm": 0.49228665232658386, "learning_rate": 8.397186203675926e-06, "loss": 0.2572, "step": 29692 }, { "epoch": 0.5512711502545271, "grad_norm": 0.37093785405158997, "learning_rate": 8.396034805052453e-06, "loss": 0.3547, "step": 29694 }, { "epoch": 0.5513082803919457, "grad_norm": 0.4184906780719757, "learning_rate": 8.394883428254139e-06, "loss": 0.4533, "step": 29696 }, { "epoch": 0.5513454105293644, "grad_norm": 0.5013591647148132, "learning_rate": 8.393732073296654e-06, "loss": 0.208, "step": 29698 }, { "epoch": 0.551382540666783, "grad_norm": 0.4134382903575897, "learning_rate": 8.392580740195669e-06, "loss": 0.3003, "step": 29700 }, { "epoch": 0.5514196708042016, "grad_norm": 0.46402209997177124, "learning_rate": 8.39142942896684e-06, "loss": 0.305, "step": 29702 }, { "epoch": 0.5514568009416203, "grad_norm": 0.28141483664512634, "learning_rate": 8.390278139625839e-06, "loss": 0.2202, "step": 29704 }, { "epoch": 0.5514939310790389, "grad_norm": 0.3504815995693207, "learning_rate": 8.389126872188333e-06, "loss": 0.1518, "step": 29706 }, { "epoch": 0.5515310612164576, "grad_norm": 0.36789146065711975, "learning_rate": 8.387975626669982e-06, "loss": 0.2319, "step": 29708 }, { "epoch": 0.5515681913538762, "grad_norm": 0.4471376836299896, "learning_rate": 8.386824403086455e-06, "loss": 0.4276, "step": 29710 }, { "epoch": 0.5516053214912948, "grad_norm": 0.3521673083305359, "learning_rate": 8.385673201453416e-06, "loss": 0.2054, "step": 29712 }, { "epoch": 0.5516424516287135, "grad_norm": 0.4252486824989319, "learning_rate": 8.384522021786534e-06, "loss": 0.2412, "step": 29714 }, { "epoch": 0.5516795817661321, "grad_norm": 0.31602349877357483, "learning_rate": 8.383370864101464e-06, "loss": 0.1956, "step": 29716 }, { "epoch": 0.5517167119035508, "grad_norm": 0.5742897391319275, "learning_rate": 8.382219728413875e-06, "loss": 0.3732, "step": 29718 }, { "epoch": 0.5517538420409694, "grad_norm": 0.4237477481365204, "learning_rate": 8.381068614739428e-06, "loss": 0.3486, "step": 29720 }, { "epoch": 0.551790972178388, "grad_norm": 0.34446054697036743, "learning_rate": 8.379917523093788e-06, "loss": 0.3259, "step": 29722 }, { "epoch": 0.5518281023158067, "grad_norm": 0.42857325077056885, "learning_rate": 8.378766453492621e-06, "loss": 0.3333, "step": 29724 }, { "epoch": 0.5518652324532253, "grad_norm": 0.20612427592277527, "learning_rate": 8.377615405951584e-06, "loss": 0.4361, "step": 29726 }, { "epoch": 0.551902362590644, "grad_norm": 0.8472954034805298, "learning_rate": 8.376464380486344e-06, "loss": 0.343, "step": 29728 }, { "epoch": 0.5519394927280626, "grad_norm": 0.25781169533729553, "learning_rate": 8.375313377112558e-06, "loss": 0.3917, "step": 29730 }, { "epoch": 0.5519766228654812, "grad_norm": 0.3379178047180176, "learning_rate": 8.37416239584589e-06, "loss": 0.2115, "step": 29732 }, { "epoch": 0.5520137530028999, "grad_norm": 0.28762543201446533, "learning_rate": 8.373011436702003e-06, "loss": 0.3057, "step": 29734 }, { "epoch": 0.5520508831403185, "grad_norm": 0.2951710522174835, "learning_rate": 8.371860499696558e-06, "loss": 0.218, "step": 29736 }, { "epoch": 0.5520880132777372, "grad_norm": 0.4116617739200592, "learning_rate": 8.370709584845215e-06, "loss": 0.306, "step": 29738 }, { "epoch": 0.5521251434151557, "grad_norm": 0.4373941123485565, "learning_rate": 8.369558692163634e-06, "loss": 0.2075, "step": 29740 }, { "epoch": 0.5521622735525744, "grad_norm": 0.3932211995124817, "learning_rate": 8.368407821667473e-06, "loss": 0.1079, "step": 29742 }, { "epoch": 0.5521994036899931, "grad_norm": 0.4484252631664276, "learning_rate": 8.367256973372396e-06, "loss": 0.1403, "step": 29744 }, { "epoch": 0.5522365338274117, "grad_norm": 0.348652720451355, "learning_rate": 8.36610614729406e-06, "loss": 0.2509, "step": 29746 }, { "epoch": 0.5522736639648304, "grad_norm": 0.2790572941303253, "learning_rate": 8.364955343448127e-06, "loss": 0.2477, "step": 29748 }, { "epoch": 0.5523107941022489, "grad_norm": 0.30164262652397156, "learning_rate": 8.363804561850253e-06, "loss": 0.3405, "step": 29750 }, { "epoch": 0.5523479242396676, "grad_norm": 0.4022175967693329, "learning_rate": 8.362653802516101e-06, "loss": 0.3793, "step": 29752 }, { "epoch": 0.5523850543770863, "grad_norm": 0.2693368196487427, "learning_rate": 8.361503065461323e-06, "loss": 0.2227, "step": 29754 }, { "epoch": 0.5524221845145049, "grad_norm": 0.2327956259250641, "learning_rate": 8.360352350701579e-06, "loss": 0.3339, "step": 29756 }, { "epoch": 0.5524593146519235, "grad_norm": 0.48128634691238403, "learning_rate": 8.359201658252531e-06, "loss": 0.3183, "step": 29758 }, { "epoch": 0.5524964447893421, "grad_norm": 0.37836596369743347, "learning_rate": 8.358050988129833e-06, "loss": 0.2509, "step": 29760 }, { "epoch": 0.5525335749267608, "grad_norm": 0.3320165276527405, "learning_rate": 8.35690034034914e-06, "loss": 0.2313, "step": 29762 }, { "epoch": 0.5525707050641795, "grad_norm": 0.41630059480667114, "learning_rate": 8.35574971492612e-06, "loss": 0.2661, "step": 29764 }, { "epoch": 0.5526078352015981, "grad_norm": 0.38498005270957947, "learning_rate": 8.354599111876415e-06, "loss": 0.2546, "step": 29766 }, { "epoch": 0.5526449653390167, "grad_norm": 0.7790164351463318, "learning_rate": 8.353448531215686e-06, "loss": 0.1586, "step": 29768 }, { "epoch": 0.5526820954764353, "grad_norm": 0.6608381271362305, "learning_rate": 8.352297972959594e-06, "loss": 0.3292, "step": 29770 }, { "epoch": 0.552719225613854, "grad_norm": 0.5020132660865784, "learning_rate": 8.35114743712379e-06, "loss": 0.3759, "step": 29772 }, { "epoch": 0.5527563557512727, "grad_norm": 0.5110713839530945, "learning_rate": 8.349996923723929e-06, "loss": 0.2945, "step": 29774 }, { "epoch": 0.5527934858886913, "grad_norm": 0.26747772097587585, "learning_rate": 8.348846432775672e-06, "loss": 0.245, "step": 29776 }, { "epoch": 0.5528306160261099, "grad_norm": 0.3154778480529785, "learning_rate": 8.347695964294665e-06, "loss": 0.3245, "step": 29778 }, { "epoch": 0.5528677461635285, "grad_norm": 0.33503904938697815, "learning_rate": 8.346545518296569e-06, "loss": 0.2878, "step": 29780 }, { "epoch": 0.5529048763009472, "grad_norm": 0.36724853515625, "learning_rate": 8.345395094797035e-06, "loss": 0.345, "step": 29782 }, { "epoch": 0.5529420064383658, "grad_norm": 0.2365207076072693, "learning_rate": 8.344244693811717e-06, "loss": 0.3763, "step": 29784 }, { "epoch": 0.5529791365757845, "grad_norm": 0.7945840954780579, "learning_rate": 8.343094315356268e-06, "loss": 0.4443, "step": 29786 }, { "epoch": 0.5530162667132031, "grad_norm": 0.3459824025630951, "learning_rate": 8.341943959446347e-06, "loss": 0.4156, "step": 29788 }, { "epoch": 0.5530533968506217, "grad_norm": 0.24934335052967072, "learning_rate": 8.340793626097599e-06, "loss": 0.2112, "step": 29790 }, { "epoch": 0.5530905269880404, "grad_norm": 0.7116324305534363, "learning_rate": 8.33964331532568e-06, "loss": 0.3924, "step": 29792 }, { "epoch": 0.553127657125459, "grad_norm": 0.442314475774765, "learning_rate": 8.338493027146241e-06, "loss": 0.2192, "step": 29794 }, { "epoch": 0.5531647872628777, "grad_norm": 0.4371623694896698, "learning_rate": 8.337342761574937e-06, "loss": 0.4533, "step": 29796 }, { "epoch": 0.5532019174002963, "grad_norm": 0.4227270185947418, "learning_rate": 8.336192518627414e-06, "loss": 0.201, "step": 29798 }, { "epoch": 0.5532390475377149, "grad_norm": 0.36401867866516113, "learning_rate": 8.335042298319333e-06, "loss": 0.3025, "step": 29800 }, { "epoch": 0.5532761776751336, "grad_norm": 0.296690434217453, "learning_rate": 8.333892100666338e-06, "loss": 0.4312, "step": 29802 }, { "epoch": 0.5533133078125522, "grad_norm": 0.4659436047077179, "learning_rate": 8.33274192568408e-06, "loss": 0.3107, "step": 29804 }, { "epoch": 0.5533504379499709, "grad_norm": 0.28338250517845154, "learning_rate": 8.331591773388208e-06, "loss": 0.2867, "step": 29806 }, { "epoch": 0.5533875680873895, "grad_norm": 0.34068912267684937, "learning_rate": 8.330441643794376e-06, "loss": 0.5331, "step": 29808 }, { "epoch": 0.5534246982248081, "grad_norm": 0.41322460770606995, "learning_rate": 8.329291536918234e-06, "loss": 0.2053, "step": 29810 }, { "epoch": 0.5534618283622268, "grad_norm": 0.44815555214881897, "learning_rate": 8.328141452775427e-06, "loss": 0.2053, "step": 29812 }, { "epoch": 0.5534989584996454, "grad_norm": 0.34072810411453247, "learning_rate": 8.326991391381611e-06, "loss": 0.3035, "step": 29814 }, { "epoch": 0.553536088637064, "grad_norm": 0.3203069269657135, "learning_rate": 8.325841352752427e-06, "loss": 0.3049, "step": 29816 }, { "epoch": 0.5535732187744827, "grad_norm": 0.27456966042518616, "learning_rate": 8.324691336903528e-06, "loss": 0.3151, "step": 29818 }, { "epoch": 0.5536103489119013, "grad_norm": 0.3897068202495575, "learning_rate": 8.323541343850561e-06, "loss": 0.501, "step": 29820 }, { "epoch": 0.55364747904932, "grad_norm": 0.41484394669532776, "learning_rate": 8.32239137360918e-06, "loss": 0.3531, "step": 29822 }, { "epoch": 0.5536846091867386, "grad_norm": 0.40265825390815735, "learning_rate": 8.321241426195022e-06, "loss": 0.284, "step": 29824 }, { "epoch": 0.5537217393241572, "grad_norm": 0.35671958327293396, "learning_rate": 8.320091501623744e-06, "loss": 0.1586, "step": 29826 }, { "epoch": 0.5537588694615759, "grad_norm": 0.3852379322052002, "learning_rate": 8.318941599910986e-06, "loss": 0.2807, "step": 29828 }, { "epoch": 0.5537959995989945, "grad_norm": 0.49228161573410034, "learning_rate": 8.317791721072396e-06, "loss": 0.1886, "step": 29830 }, { "epoch": 0.5538331297364132, "grad_norm": 0.3709466755390167, "learning_rate": 8.316641865123624e-06, "loss": 0.391, "step": 29832 }, { "epoch": 0.5538702598738318, "grad_norm": 0.29470640420913696, "learning_rate": 8.315492032080314e-06, "loss": 0.1621, "step": 29834 }, { "epoch": 0.5539073900112504, "grad_norm": 0.4109002351760864, "learning_rate": 8.314342221958109e-06, "loss": 0.0796, "step": 29836 }, { "epoch": 0.553944520148669, "grad_norm": 0.3403627574443817, "learning_rate": 8.313192434772659e-06, "loss": 0.3232, "step": 29838 }, { "epoch": 0.5539816502860877, "grad_norm": 0.37002187967300415, "learning_rate": 8.312042670539612e-06, "loss": 0.2959, "step": 29840 }, { "epoch": 0.5540187804235064, "grad_norm": 0.3811722695827484, "learning_rate": 8.310892929274603e-06, "loss": 0.4498, "step": 29842 }, { "epoch": 0.554055910560925, "grad_norm": 0.2105754315853119, "learning_rate": 8.309743210993283e-06, "loss": 0.2821, "step": 29844 }, { "epoch": 0.5540930406983436, "grad_norm": 0.34644967317581177, "learning_rate": 8.308593515711293e-06, "loss": 0.2091, "step": 29846 }, { "epoch": 0.5541301708357622, "grad_norm": 0.32401999831199646, "learning_rate": 8.30744384344428e-06, "loss": 0.173, "step": 29848 }, { "epoch": 0.5541673009731809, "grad_norm": 0.4948294460773468, "learning_rate": 8.306294194207888e-06, "loss": 0.2431, "step": 29850 }, { "epoch": 0.5542044311105996, "grad_norm": 0.37393561005592346, "learning_rate": 8.305144568017762e-06, "loss": 0.2617, "step": 29852 }, { "epoch": 0.5542415612480182, "grad_norm": 0.5742794275283813, "learning_rate": 8.303994964889537e-06, "loss": 0.1826, "step": 29854 }, { "epoch": 0.5542786913854368, "grad_norm": 0.42632126808166504, "learning_rate": 8.302845384838861e-06, "loss": 0.2757, "step": 29856 }, { "epoch": 0.5543158215228554, "grad_norm": 0.4956934154033661, "learning_rate": 8.301695827881375e-06, "loss": 0.1983, "step": 29858 }, { "epoch": 0.5543529516602741, "grad_norm": 0.39284560084342957, "learning_rate": 8.300546294032723e-06, "loss": 0.2687, "step": 29860 }, { "epoch": 0.5543900817976928, "grad_norm": 0.2390819936990738, "learning_rate": 8.299396783308544e-06, "loss": 0.1368, "step": 29862 }, { "epoch": 0.5544272119351114, "grad_norm": 0.7485839128494263, "learning_rate": 8.298247295724486e-06, "loss": 0.3089, "step": 29864 }, { "epoch": 0.55446434207253, "grad_norm": 0.3891410529613495, "learning_rate": 8.297097831296182e-06, "loss": 0.1834, "step": 29866 }, { "epoch": 0.5545014722099486, "grad_norm": 0.3280470669269562, "learning_rate": 8.295948390039273e-06, "loss": 0.1663, "step": 29868 }, { "epoch": 0.5545386023473673, "grad_norm": 0.3790913224220276, "learning_rate": 8.294798971969402e-06, "loss": 0.3071, "step": 29870 }, { "epoch": 0.554575732484786, "grad_norm": 0.4453819990158081, "learning_rate": 8.293649577102213e-06, "loss": 0.2461, "step": 29872 }, { "epoch": 0.5546128626222045, "grad_norm": 0.5803861618041992, "learning_rate": 8.29250020545334e-06, "loss": 0.4481, "step": 29874 }, { "epoch": 0.5546499927596232, "grad_norm": 0.5808305740356445, "learning_rate": 8.291350857038426e-06, "loss": 0.2343, "step": 29876 }, { "epoch": 0.5546871228970418, "grad_norm": 0.37444931268692017, "learning_rate": 8.29020153187311e-06, "loss": 0.4974, "step": 29878 }, { "epoch": 0.5547242530344605, "grad_norm": 0.293048620223999, "learning_rate": 8.289052229973027e-06, "loss": 0.2064, "step": 29880 }, { "epoch": 0.5547613831718792, "grad_norm": 0.5807592272758484, "learning_rate": 8.287902951353818e-06, "loss": 0.1936, "step": 29882 }, { "epoch": 0.5547985133092977, "grad_norm": 0.3036838173866272, "learning_rate": 8.286753696031121e-06, "loss": 0.1635, "step": 29884 }, { "epoch": 0.5548356434467164, "grad_norm": 0.865731418132782, "learning_rate": 8.285604464020576e-06, "loss": 0.2447, "step": 29886 }, { "epoch": 0.554872773584135, "grad_norm": 0.24341550469398499, "learning_rate": 8.284455255337818e-06, "loss": 0.2342, "step": 29888 }, { "epoch": 0.5549099037215537, "grad_norm": 0.552036702632904, "learning_rate": 8.28330606999849e-06, "loss": 0.2524, "step": 29890 }, { "epoch": 0.5549470338589723, "grad_norm": 0.6531091928482056, "learning_rate": 8.28215690801822e-06, "loss": 0.4213, "step": 29892 }, { "epoch": 0.554984163996391, "grad_norm": 0.32186159491539, "learning_rate": 8.281007769412649e-06, "loss": 0.1684, "step": 29894 }, { "epoch": 0.5550212941338096, "grad_norm": 0.40910765528678894, "learning_rate": 8.279858654197415e-06, "loss": 0.3347, "step": 29896 }, { "epoch": 0.5550584242712282, "grad_norm": 0.5070319771766663, "learning_rate": 8.27870956238815e-06, "loss": 0.3065, "step": 29898 }, { "epoch": 0.5550955544086469, "grad_norm": 0.4051646590232849, "learning_rate": 8.277560494000491e-06, "loss": 0.3031, "step": 29900 }, { "epoch": 0.5551326845460655, "grad_norm": 0.33359289169311523, "learning_rate": 8.27641144905008e-06, "loss": 0.3236, "step": 29902 }, { "epoch": 0.5551698146834841, "grad_norm": 0.9940642714500427, "learning_rate": 8.275262427552541e-06, "loss": 0.3611, "step": 29904 }, { "epoch": 0.5552069448209028, "grad_norm": 0.29814261198043823, "learning_rate": 8.274113429523516e-06, "loss": 0.3249, "step": 29906 }, { "epoch": 0.5552440749583214, "grad_norm": 0.3672918677330017, "learning_rate": 8.272964454978638e-06, "loss": 0.4686, "step": 29908 }, { "epoch": 0.5552812050957401, "grad_norm": 0.5019917488098145, "learning_rate": 8.27181550393354e-06, "loss": 0.3553, "step": 29910 }, { "epoch": 0.5553183352331587, "grad_norm": 0.38007551431655884, "learning_rate": 8.270666576403856e-06, "loss": 0.153, "step": 29912 }, { "epoch": 0.5553554653705773, "grad_norm": 0.4911530315876007, "learning_rate": 8.269517672405226e-06, "loss": 0.2503, "step": 29914 }, { "epoch": 0.555392595507996, "grad_norm": 0.311686247587204, "learning_rate": 8.268368791953269e-06, "loss": 0.1289, "step": 29916 }, { "epoch": 0.5554297256454146, "grad_norm": 0.33318284153938293, "learning_rate": 8.267219935063631e-06, "loss": 0.3504, "step": 29918 }, { "epoch": 0.5554668557828333, "grad_norm": 0.561971127986908, "learning_rate": 8.266071101751936e-06, "loss": 0.4043, "step": 29920 }, { "epoch": 0.5555039859202519, "grad_norm": 0.43648287653923035, "learning_rate": 8.264922292033819e-06, "loss": 0.1611, "step": 29922 }, { "epoch": 0.5555411160576705, "grad_norm": 0.2933158874511719, "learning_rate": 8.263773505924914e-06, "loss": 0.2958, "step": 29924 }, { "epoch": 0.5555782461950892, "grad_norm": 0.3781273365020752, "learning_rate": 8.262624743440852e-06, "loss": 0.1689, "step": 29926 }, { "epoch": 0.5556153763325078, "grad_norm": 0.32228848338127136, "learning_rate": 8.261476004597263e-06, "loss": 0.4075, "step": 29928 }, { "epoch": 0.5556525064699265, "grad_norm": 0.3182445466518402, "learning_rate": 8.260327289409779e-06, "loss": 0.4898, "step": 29930 }, { "epoch": 0.555689636607345, "grad_norm": 0.2422630339860916, "learning_rate": 8.259178597894027e-06, "loss": 0.3613, "step": 29932 }, { "epoch": 0.5557267667447637, "grad_norm": 0.4437832236289978, "learning_rate": 8.258029930065641e-06, "loss": 0.3255, "step": 29934 }, { "epoch": 0.5557638968821823, "grad_norm": 0.34847161173820496, "learning_rate": 8.256881285940248e-06, "loss": 0.1816, "step": 29936 }, { "epoch": 0.555801027019601, "grad_norm": 0.6126718521118164, "learning_rate": 8.255732665533482e-06, "loss": 0.388, "step": 29938 }, { "epoch": 0.5558381571570197, "grad_norm": 1.5948585271835327, "learning_rate": 8.254584068860973e-06, "loss": 0.232, "step": 29940 }, { "epoch": 0.5558752872944382, "grad_norm": 0.410330206155777, "learning_rate": 8.253435495938342e-06, "loss": 0.1965, "step": 29942 }, { "epoch": 0.5559124174318569, "grad_norm": 0.4859998822212219, "learning_rate": 8.252286946781221e-06, "loss": 0.269, "step": 29944 }, { "epoch": 0.5559495475692755, "grad_norm": 0.3334956467151642, "learning_rate": 8.251138421405241e-06, "loss": 0.3332, "step": 29946 }, { "epoch": 0.5559866777066942, "grad_norm": 0.3331974744796753, "learning_rate": 8.249989919826031e-06, "loss": 0.1801, "step": 29948 }, { "epoch": 0.5560238078441129, "grad_norm": 0.4292665719985962, "learning_rate": 8.248841442059214e-06, "loss": 0.2981, "step": 29950 }, { "epoch": 0.5560609379815314, "grad_norm": 0.2759549021720886, "learning_rate": 8.247692988120424e-06, "loss": 0.1597, "step": 29952 }, { "epoch": 0.5560980681189501, "grad_norm": 0.46558675169944763, "learning_rate": 8.246544558025279e-06, "loss": 0.2625, "step": 29954 }, { "epoch": 0.5561351982563687, "grad_norm": 0.6499155759811401, "learning_rate": 8.24539615178941e-06, "loss": 0.2623, "step": 29956 }, { "epoch": 0.5561723283937874, "grad_norm": 0.39783161878585815, "learning_rate": 8.244247769428444e-06, "loss": 0.3455, "step": 29958 }, { "epoch": 0.5562094585312061, "grad_norm": 0.34358033537864685, "learning_rate": 8.24309941095801e-06, "loss": 0.3624, "step": 29960 }, { "epoch": 0.5562465886686246, "grad_norm": 0.4012523889541626, "learning_rate": 8.241951076393726e-06, "loss": 0.2804, "step": 29962 }, { "epoch": 0.5562837188060433, "grad_norm": 0.3931114077568054, "learning_rate": 8.240802765751223e-06, "loss": 0.3491, "step": 29964 }, { "epoch": 0.5563208489434619, "grad_norm": 0.45764410495758057, "learning_rate": 8.23965447904613e-06, "loss": 0.3176, "step": 29966 }, { "epoch": 0.5563579790808806, "grad_norm": 0.42533084750175476, "learning_rate": 8.238506216294062e-06, "loss": 0.0987, "step": 29968 }, { "epoch": 0.5563951092182993, "grad_norm": 0.39778098464012146, "learning_rate": 8.237357977510649e-06, "loss": 0.1238, "step": 29970 }, { "epoch": 0.5564322393557178, "grad_norm": 0.4398359954357147, "learning_rate": 8.236209762711516e-06, "loss": 0.476, "step": 29972 }, { "epoch": 0.5564693694931365, "grad_norm": 0.37641459703445435, "learning_rate": 8.235061571912282e-06, "loss": 0.1082, "step": 29974 }, { "epoch": 0.5565064996305551, "grad_norm": 0.2843545079231262, "learning_rate": 8.233913405128572e-06, "loss": 0.225, "step": 29976 }, { "epoch": 0.5565436297679738, "grad_norm": 0.35863617062568665, "learning_rate": 8.232765262376017e-06, "loss": 0.1856, "step": 29978 }, { "epoch": 0.5565807599053925, "grad_norm": 0.41007199883461, "learning_rate": 8.23161714367023e-06, "loss": 0.327, "step": 29980 }, { "epoch": 0.556617890042811, "grad_norm": 0.47068873047828674, "learning_rate": 8.230469049026835e-06, "loss": 0.2472, "step": 29982 }, { "epoch": 0.5566550201802297, "grad_norm": 0.5136558413505554, "learning_rate": 8.229320978461457e-06, "loss": 0.2835, "step": 29984 }, { "epoch": 0.5566921503176483, "grad_norm": 0.8411747217178345, "learning_rate": 8.228172931989715e-06, "loss": 0.2659, "step": 29986 }, { "epoch": 0.556729280455067, "grad_norm": 0.3823561668395996, "learning_rate": 8.227024909627234e-06, "loss": 0.1061, "step": 29988 }, { "epoch": 0.5567664105924855, "grad_norm": 0.537619411945343, "learning_rate": 8.225876911389636e-06, "loss": 0.2512, "step": 29990 }, { "epoch": 0.5568035407299042, "grad_norm": 0.4279894530773163, "learning_rate": 8.224728937292535e-06, "loss": 0.2858, "step": 29992 }, { "epoch": 0.5568406708673229, "grad_norm": 0.45453375577926636, "learning_rate": 8.223580987351559e-06, "loss": 0.2551, "step": 29994 }, { "epoch": 0.5568778010047415, "grad_norm": 0.3921010494232178, "learning_rate": 8.22243306158232e-06, "loss": 0.4282, "step": 29996 }, { "epoch": 0.5569149311421602, "grad_norm": 0.6211047172546387, "learning_rate": 8.221285160000445e-06, "loss": 0.241, "step": 29998 }, { "epoch": 0.5569520612795787, "grad_norm": 0.5025211572647095, "learning_rate": 8.220137282621551e-06, "loss": 0.2081, "step": 30000 }, { "epoch": 0.5569891914169974, "grad_norm": 0.3004184663295746, "learning_rate": 8.21898942946126e-06, "loss": 0.4883, "step": 30002 }, { "epoch": 0.5570263215544161, "grad_norm": 0.2746807336807251, "learning_rate": 8.217841600535187e-06, "loss": 0.2318, "step": 30004 }, { "epoch": 0.5570634516918347, "grad_norm": 0.36326611042022705, "learning_rate": 8.21669379585895e-06, "loss": 0.3016, "step": 30006 }, { "epoch": 0.5571005818292534, "grad_norm": 0.4255525767803192, "learning_rate": 8.215546015448169e-06, "loss": 0.4402, "step": 30008 }, { "epoch": 0.557137711966672, "grad_norm": 0.2435346096754074, "learning_rate": 8.214398259318461e-06, "loss": 0.2103, "step": 30010 }, { "epoch": 0.5571748421040906, "grad_norm": 0.2673097848892212, "learning_rate": 8.213250527485446e-06, "loss": 0.1927, "step": 30012 }, { "epoch": 0.5572119722415093, "grad_norm": 0.5042145848274231, "learning_rate": 8.212102819964738e-06, "loss": 0.3071, "step": 30014 }, { "epoch": 0.5572491023789279, "grad_norm": 0.38507479429244995, "learning_rate": 8.210955136771958e-06, "loss": 0.336, "step": 30016 }, { "epoch": 0.5572862325163466, "grad_norm": 0.37809982895851135, "learning_rate": 8.209807477922718e-06, "loss": 0.1269, "step": 30018 }, { "epoch": 0.5573233626537651, "grad_norm": 0.3661077916622162, "learning_rate": 8.208659843432633e-06, "loss": 0.1981, "step": 30020 }, { "epoch": 0.5573604927911838, "grad_norm": 0.3845650553703308, "learning_rate": 8.207512233317324e-06, "loss": 0.2308, "step": 30022 }, { "epoch": 0.5573976229286025, "grad_norm": 0.550507664680481, "learning_rate": 8.206364647592406e-06, "loss": 0.4359, "step": 30024 }, { "epoch": 0.5574347530660211, "grad_norm": 0.24292199313640594, "learning_rate": 8.205217086273491e-06, "loss": 0.3145, "step": 30026 }, { "epoch": 0.5574718832034398, "grad_norm": 0.32117652893066406, "learning_rate": 8.204069549376198e-06, "loss": 0.3141, "step": 30028 }, { "epoch": 0.5575090133408583, "grad_norm": 0.4574264585971832, "learning_rate": 8.202922036916136e-06, "loss": 0.3445, "step": 30030 }, { "epoch": 0.557546143478277, "grad_norm": 0.26199522614479065, "learning_rate": 8.201774548908921e-06, "loss": 0.1583, "step": 30032 }, { "epoch": 0.5575832736156957, "grad_norm": 0.24008169770240784, "learning_rate": 8.200627085370172e-06, "loss": 0.3128, "step": 30034 }, { "epoch": 0.5576204037531143, "grad_norm": 0.45081865787506104, "learning_rate": 8.199479646315496e-06, "loss": 0.3574, "step": 30036 }, { "epoch": 0.557657533890533, "grad_norm": 0.4980800151824951, "learning_rate": 8.198332231760508e-06, "loss": 0.2122, "step": 30038 }, { "epoch": 0.5576946640279515, "grad_norm": 0.36799395084381104, "learning_rate": 8.197184841720822e-06, "loss": 0.1784, "step": 30040 }, { "epoch": 0.5577317941653702, "grad_norm": 0.40446123480796814, "learning_rate": 8.196037476212056e-06, "loss": 0.4325, "step": 30042 }, { "epoch": 0.5577689243027888, "grad_norm": 0.38782864809036255, "learning_rate": 8.19489013524981e-06, "loss": 0.2808, "step": 30044 }, { "epoch": 0.5578060544402075, "grad_norm": 0.3911173939704895, "learning_rate": 8.193742818849705e-06, "loss": 0.3478, "step": 30046 }, { "epoch": 0.5578431845776262, "grad_norm": 0.3728505074977875, "learning_rate": 8.192595527027349e-06, "loss": 0.34, "step": 30048 }, { "epoch": 0.5578803147150447, "grad_norm": 0.373843252658844, "learning_rate": 8.191448259798353e-06, "loss": 0.2082, "step": 30050 }, { "epoch": 0.5579174448524634, "grad_norm": 0.2734020948410034, "learning_rate": 8.19030101717833e-06, "loss": 0.1867, "step": 30052 }, { "epoch": 0.557954574989882, "grad_norm": 0.37600746750831604, "learning_rate": 8.189153799182891e-06, "loss": 0.2425, "step": 30054 }, { "epoch": 0.5579917051273007, "grad_norm": 0.4295004606246948, "learning_rate": 8.188006605827646e-06, "loss": 0.2155, "step": 30056 }, { "epoch": 0.5580288352647194, "grad_norm": 0.5866034626960754, "learning_rate": 8.186859437128199e-06, "loss": 0.2216, "step": 30058 }, { "epoch": 0.5580659654021379, "grad_norm": 0.47462812066078186, "learning_rate": 8.185712293100166e-06, "loss": 0.3099, "step": 30060 }, { "epoch": 0.5581030955395566, "grad_norm": 0.4157017767429352, "learning_rate": 8.184565173759153e-06, "loss": 0.2573, "step": 30062 }, { "epoch": 0.5581402256769752, "grad_norm": 0.5625922083854675, "learning_rate": 8.183418079120773e-06, "loss": 0.2275, "step": 30064 }, { "epoch": 0.5581773558143939, "grad_norm": 0.28348788619041443, "learning_rate": 8.182271009200631e-06, "loss": 0.2031, "step": 30066 }, { "epoch": 0.5582144859518126, "grad_norm": 0.2383648008108139, "learning_rate": 8.181123964014336e-06, "loss": 0.1278, "step": 30068 }, { "epoch": 0.5582516160892311, "grad_norm": 0.3857385814189911, "learning_rate": 8.179976943577494e-06, "loss": 0.2875, "step": 30070 }, { "epoch": 0.5582887462266498, "grad_norm": 0.4877139627933502, "learning_rate": 8.178829947905713e-06, "loss": 0.2782, "step": 30072 }, { "epoch": 0.5583258763640684, "grad_norm": 0.35712820291519165, "learning_rate": 8.177682977014602e-06, "loss": 0.2625, "step": 30074 }, { "epoch": 0.5583630065014871, "grad_norm": 0.26694273948669434, "learning_rate": 8.17653603091977e-06, "loss": 0.3961, "step": 30076 }, { "epoch": 0.5584001366389058, "grad_norm": 0.624577522277832, "learning_rate": 8.17538910963682e-06, "loss": 0.2291, "step": 30078 }, { "epoch": 0.5584372667763243, "grad_norm": 0.2896917164325714, "learning_rate": 8.174242213181358e-06, "loss": 0.3155, "step": 30080 }, { "epoch": 0.558474396913743, "grad_norm": 0.4383455216884613, "learning_rate": 8.17309534156899e-06, "loss": 0.2642, "step": 30082 }, { "epoch": 0.5585115270511616, "grad_norm": 0.4391283094882965, "learning_rate": 8.171948494815321e-06, "loss": 0.4211, "step": 30084 }, { "epoch": 0.5585486571885803, "grad_norm": 0.3569841980934143, "learning_rate": 8.170801672935961e-06, "loss": 0.2293, "step": 30086 }, { "epoch": 0.5585857873259988, "grad_norm": 0.5980402231216431, "learning_rate": 8.169654875946508e-06, "loss": 0.3138, "step": 30088 }, { "epoch": 0.5586229174634175, "grad_norm": 0.3345278203487396, "learning_rate": 8.16850810386257e-06, "loss": 0.1601, "step": 30090 }, { "epoch": 0.5586600476008362, "grad_norm": 0.2378118485212326, "learning_rate": 8.167361356699756e-06, "loss": 0.1996, "step": 30092 }, { "epoch": 0.5586971777382548, "grad_norm": 0.3939962685108185, "learning_rate": 8.166214634473658e-06, "loss": 0.3647, "step": 30094 }, { "epoch": 0.5587343078756735, "grad_norm": 0.33015215396881104, "learning_rate": 8.165067937199888e-06, "loss": 0.1566, "step": 30096 }, { "epoch": 0.558771438013092, "grad_norm": 0.3020341098308563, "learning_rate": 8.163921264894047e-06, "loss": 0.3261, "step": 30098 }, { "epoch": 0.5588085681505107, "grad_norm": 0.3373798727989197, "learning_rate": 8.162774617571739e-06, "loss": 0.3478, "step": 30100 }, { "epoch": 0.5588456982879294, "grad_norm": 0.5846282243728638, "learning_rate": 8.161627995248562e-06, "loss": 0.4073, "step": 30102 }, { "epoch": 0.558882828425348, "grad_norm": 0.37299925088882446, "learning_rate": 8.160481397940128e-06, "loss": 0.2078, "step": 30104 }, { "epoch": 0.5589199585627667, "grad_norm": 0.28174498677253723, "learning_rate": 8.159334825662026e-06, "loss": 0.1936, "step": 30106 }, { "epoch": 0.5589570887001852, "grad_norm": 0.55593341588974, "learning_rate": 8.158188278429867e-06, "loss": 0.1707, "step": 30108 }, { "epoch": 0.5589942188376039, "grad_norm": 0.4053742289543152, "learning_rate": 8.157041756259247e-06, "loss": 0.2618, "step": 30110 }, { "epoch": 0.5590313489750226, "grad_norm": 0.29380252957344055, "learning_rate": 8.155895259165769e-06, "loss": 0.2076, "step": 30112 }, { "epoch": 0.5590684791124412, "grad_norm": 0.35082343220710754, "learning_rate": 8.15474878716503e-06, "loss": 0.1715, "step": 30114 }, { "epoch": 0.5591056092498599, "grad_norm": 0.6001505851745605, "learning_rate": 8.15360234027264e-06, "loss": 0.2689, "step": 30116 }, { "epoch": 0.5591427393872784, "grad_norm": 0.402310848236084, "learning_rate": 8.152455918504185e-06, "loss": 0.2558, "step": 30118 }, { "epoch": 0.5591798695246971, "grad_norm": 0.34992268681526184, "learning_rate": 8.151309521875275e-06, "loss": 0.4904, "step": 30120 }, { "epoch": 0.5592169996621158, "grad_norm": 0.4056401550769806, "learning_rate": 8.1501631504015e-06, "loss": 0.3431, "step": 30122 }, { "epoch": 0.5592541297995344, "grad_norm": 0.5071715116500854, "learning_rate": 8.149016804098467e-06, "loss": 0.2207, "step": 30124 }, { "epoch": 0.5592912599369531, "grad_norm": 0.36764124035835266, "learning_rate": 8.14787048298177e-06, "loss": 0.4472, "step": 30126 }, { "epoch": 0.5593283900743716, "grad_norm": 0.2616311013698578, "learning_rate": 8.146724187067008e-06, "loss": 0.2446, "step": 30128 }, { "epoch": 0.5593655202117903, "grad_norm": 0.39513325691223145, "learning_rate": 8.14557791636978e-06, "loss": 0.388, "step": 30130 }, { "epoch": 0.559402650349209, "grad_norm": 0.324196994304657, "learning_rate": 8.144431670905683e-06, "loss": 0.3151, "step": 30132 }, { "epoch": 0.5594397804866276, "grad_norm": 0.4376422166824341, "learning_rate": 8.14328545069031e-06, "loss": 0.3717, "step": 30134 }, { "epoch": 0.5594769106240463, "grad_norm": 0.6306324005126953, "learning_rate": 8.14213925573926e-06, "loss": 0.2731, "step": 30136 }, { "epoch": 0.5595140407614648, "grad_norm": 0.3850463628768921, "learning_rate": 8.14099308606813e-06, "loss": 0.1809, "step": 30138 }, { "epoch": 0.5595511708988835, "grad_norm": 0.4078371822834015, "learning_rate": 8.139846941692517e-06, "loss": 0.1432, "step": 30140 }, { "epoch": 0.5595883010363021, "grad_norm": 0.47897517681121826, "learning_rate": 8.138700822628018e-06, "loss": 0.3573, "step": 30142 }, { "epoch": 0.5596254311737208, "grad_norm": 0.4120645821094513, "learning_rate": 8.137554728890222e-06, "loss": 0.4687, "step": 30144 }, { "epoch": 0.5596625613111395, "grad_norm": 0.307976096868515, "learning_rate": 8.136408660494728e-06, "loss": 0.3757, "step": 30146 }, { "epoch": 0.559699691448558, "grad_norm": 0.3964973986148834, "learning_rate": 8.13526261745713e-06, "loss": 0.1903, "step": 30148 }, { "epoch": 0.5597368215859767, "grad_norm": 0.36342376470565796, "learning_rate": 8.134116599793023e-06, "loss": 0.3501, "step": 30150 }, { "epoch": 0.5597739517233953, "grad_norm": 0.42313218116760254, "learning_rate": 8.132970607517998e-06, "loss": 0.3628, "step": 30152 }, { "epoch": 0.559811081860814, "grad_norm": 0.3811320662498474, "learning_rate": 8.131824640647655e-06, "loss": 0.1791, "step": 30154 }, { "epoch": 0.5598482119982326, "grad_norm": 0.3339000344276428, "learning_rate": 8.13067869919758e-06, "loss": 0.0821, "step": 30156 }, { "epoch": 0.5598853421356512, "grad_norm": 0.3232860565185547, "learning_rate": 8.129532783183366e-06, "loss": 0.4528, "step": 30158 }, { "epoch": 0.5599224722730699, "grad_norm": 0.37227779626846313, "learning_rate": 8.128386892620611e-06, "loss": 0.2232, "step": 30160 }, { "epoch": 0.5599596024104885, "grad_norm": 0.36323127150535583, "learning_rate": 8.127241027524904e-06, "loss": 0.1981, "step": 30162 }, { "epoch": 0.5599967325479072, "grad_norm": 0.5447588562965393, "learning_rate": 8.126095187911836e-06, "loss": 0.1229, "step": 30164 }, { "epoch": 0.5600338626853258, "grad_norm": 0.3681600093841553, "learning_rate": 8.124949373797001e-06, "loss": 0.3909, "step": 30166 }, { "epoch": 0.5600709928227444, "grad_norm": 0.32206544280052185, "learning_rate": 8.123803585195991e-06, "loss": 0.217, "step": 30168 }, { "epoch": 0.5601081229601631, "grad_norm": 0.46701258420944214, "learning_rate": 8.12265782212439e-06, "loss": 0.3525, "step": 30170 }, { "epoch": 0.5601452530975817, "grad_norm": 0.7677059173583984, "learning_rate": 8.121512084597796e-06, "loss": 0.1731, "step": 30172 }, { "epoch": 0.5601823832350004, "grad_norm": 0.6768728494644165, "learning_rate": 8.120366372631792e-06, "loss": 0.3812, "step": 30174 }, { "epoch": 0.560219513372419, "grad_norm": 0.2740638852119446, "learning_rate": 8.119220686241974e-06, "loss": 0.2187, "step": 30176 }, { "epoch": 0.5602566435098376, "grad_norm": 0.5069589614868164, "learning_rate": 8.118075025443927e-06, "loss": 0.3207, "step": 30178 }, { "epoch": 0.5602937736472563, "grad_norm": 0.28309041261672974, "learning_rate": 8.116929390253247e-06, "loss": 0.2697, "step": 30180 }, { "epoch": 0.5603309037846749, "grad_norm": 0.4566730260848999, "learning_rate": 8.115783780685512e-06, "loss": 0.2456, "step": 30182 }, { "epoch": 0.5603680339220936, "grad_norm": 0.4592951834201813, "learning_rate": 8.114638196756319e-06, "loss": 0.1655, "step": 30184 }, { "epoch": 0.5604051640595122, "grad_norm": 0.42102646827697754, "learning_rate": 8.113492638481251e-06, "loss": 0.2037, "step": 30186 }, { "epoch": 0.5604422941969308, "grad_norm": 0.47339728474617004, "learning_rate": 8.112347105875897e-06, "loss": 0.1962, "step": 30188 }, { "epoch": 0.5604794243343495, "grad_norm": 0.30878114700317383, "learning_rate": 8.111201598955844e-06, "loss": 0.3053, "step": 30190 }, { "epoch": 0.5605165544717681, "grad_norm": 0.3250124156475067, "learning_rate": 8.110056117736685e-06, "loss": 0.4341, "step": 30192 }, { "epoch": 0.5605536846091868, "grad_norm": 0.5107461214065552, "learning_rate": 8.108910662233999e-06, "loss": 0.1095, "step": 30194 }, { "epoch": 0.5605908147466053, "grad_norm": 0.39063841104507446, "learning_rate": 8.10776523246337e-06, "loss": 0.3132, "step": 30196 }, { "epoch": 0.560627944884024, "grad_norm": 0.3738793730735779, "learning_rate": 8.106619828440391e-06, "loss": 0.2627, "step": 30198 }, { "epoch": 0.5606650750214427, "grad_norm": 0.6023288369178772, "learning_rate": 8.105474450180645e-06, "loss": 0.375, "step": 30200 }, { "epoch": 0.5607022051588613, "grad_norm": 0.5444366931915283, "learning_rate": 8.104329097699718e-06, "loss": 0.3811, "step": 30202 }, { "epoch": 0.56073933529628, "grad_norm": 0.256476491689682, "learning_rate": 8.103183771013195e-06, "loss": 0.4002, "step": 30204 }, { "epoch": 0.5607764654336985, "grad_norm": 0.25194743275642395, "learning_rate": 8.102038470136657e-06, "loss": 0.2382, "step": 30206 }, { "epoch": 0.5608135955711172, "grad_norm": 0.39973315596580505, "learning_rate": 8.10089319508569e-06, "loss": 0.3115, "step": 30208 }, { "epoch": 0.5608507257085359, "grad_norm": 0.3081108331680298, "learning_rate": 8.099747945875878e-06, "loss": 0.0589, "step": 30210 }, { "epoch": 0.5608878558459545, "grad_norm": 0.27437108755111694, "learning_rate": 8.098602722522803e-06, "loss": 0.2016, "step": 30212 }, { "epoch": 0.5609249859833731, "grad_norm": 0.618203341960907, "learning_rate": 8.097457525042053e-06, "loss": 0.1776, "step": 30214 }, { "epoch": 0.5609621161207917, "grad_norm": 0.45953524112701416, "learning_rate": 8.096312353449205e-06, "loss": 0.3395, "step": 30216 }, { "epoch": 0.5609992462582104, "grad_norm": 0.3742344379425049, "learning_rate": 8.095167207759848e-06, "loss": 0.3659, "step": 30218 }, { "epoch": 0.5610363763956291, "grad_norm": 0.5747145414352417, "learning_rate": 8.094022087989557e-06, "loss": 0.1863, "step": 30220 }, { "epoch": 0.5610735065330477, "grad_norm": 0.2893756330013275, "learning_rate": 8.092876994153913e-06, "loss": 0.1415, "step": 30222 }, { "epoch": 0.5611106366704663, "grad_norm": 0.38143131136894226, "learning_rate": 8.091731926268504e-06, "loss": 0.2908, "step": 30224 }, { "epoch": 0.5611477668078849, "grad_norm": 0.27126309275627136, "learning_rate": 8.090586884348906e-06, "loss": 0.4208, "step": 30226 }, { "epoch": 0.5611848969453036, "grad_norm": 0.3374236822128296, "learning_rate": 8.089441868410702e-06, "loss": 0.3781, "step": 30228 }, { "epoch": 0.5612220270827223, "grad_norm": 0.2948911190032959, "learning_rate": 8.088296878469475e-06, "loss": 0.2502, "step": 30230 }, { "epoch": 0.5612591572201409, "grad_norm": 0.42715033888816833, "learning_rate": 8.087151914540796e-06, "loss": 0.1814, "step": 30232 }, { "epoch": 0.5612962873575595, "grad_norm": 0.6408441662788391, "learning_rate": 8.08600697664025e-06, "loss": 0.3599, "step": 30234 }, { "epoch": 0.5613334174949781, "grad_norm": 0.38711637258529663, "learning_rate": 8.084862064783418e-06, "loss": 0.1933, "step": 30236 }, { "epoch": 0.5613705476323968, "grad_norm": 0.38149070739746094, "learning_rate": 8.083717178985877e-06, "loss": 0.195, "step": 30238 }, { "epoch": 0.5614076777698154, "grad_norm": 0.29618707299232483, "learning_rate": 8.082572319263204e-06, "loss": 0.1664, "step": 30240 }, { "epoch": 0.5614448079072341, "grad_norm": 0.4671536087989807, "learning_rate": 8.081427485630981e-06, "loss": 0.2415, "step": 30242 }, { "epoch": 0.5614819380446527, "grad_norm": 0.37555959820747375, "learning_rate": 8.080282678104781e-06, "loss": 0.3336, "step": 30244 }, { "epoch": 0.5615190681820713, "grad_norm": 0.3112461566925049, "learning_rate": 8.079137896700183e-06, "loss": 0.1574, "step": 30246 }, { "epoch": 0.56155619831949, "grad_norm": 0.568596601486206, "learning_rate": 8.077993141432764e-06, "loss": 0.2388, "step": 30248 }, { "epoch": 0.5615933284569086, "grad_norm": 0.33753904700279236, "learning_rate": 8.076848412318102e-06, "loss": 0.3679, "step": 30250 }, { "epoch": 0.5616304585943273, "grad_norm": 0.4225180745124817, "learning_rate": 8.075703709371771e-06, "loss": 0.2318, "step": 30252 }, { "epoch": 0.5616675887317459, "grad_norm": 0.3956039547920227, "learning_rate": 8.074559032609352e-06, "loss": 0.2045, "step": 30254 }, { "epoch": 0.5617047188691645, "grad_norm": 0.31032007932662964, "learning_rate": 8.073414382046418e-06, "loss": 0.3747, "step": 30256 }, { "epoch": 0.5617418490065832, "grad_norm": 0.3781510889530182, "learning_rate": 8.072269757698541e-06, "loss": 0.4621, "step": 30258 }, { "epoch": 0.5617789791440018, "grad_norm": 0.3770076632499695, "learning_rate": 8.071125159581298e-06, "loss": 0.2185, "step": 30260 }, { "epoch": 0.5618161092814205, "grad_norm": 0.3826526701450348, "learning_rate": 8.069980587710264e-06, "loss": 0.3527, "step": 30262 }, { "epoch": 0.5618532394188391, "grad_norm": 0.9721832275390625, "learning_rate": 8.068836042101014e-06, "loss": 0.2795, "step": 30264 }, { "epoch": 0.5618903695562577, "grad_norm": 0.6243985891342163, "learning_rate": 8.06769152276912e-06, "loss": 0.4041, "step": 30266 }, { "epoch": 0.5619274996936764, "grad_norm": 0.5163834691047668, "learning_rate": 8.066547029730158e-06, "loss": 0.237, "step": 30268 }, { "epoch": 0.561964629831095, "grad_norm": 0.35948845744132996, "learning_rate": 8.065402562999701e-06, "loss": 0.2491, "step": 30270 }, { "epoch": 0.5620017599685136, "grad_norm": 0.3760967552661896, "learning_rate": 8.064258122593316e-06, "loss": 0.1981, "step": 30272 }, { "epoch": 0.5620388901059323, "grad_norm": 0.28214970231056213, "learning_rate": 8.063113708526582e-06, "loss": 0.2033, "step": 30274 }, { "epoch": 0.5620760202433509, "grad_norm": 0.4050746560096741, "learning_rate": 8.061969320815066e-06, "loss": 0.2324, "step": 30276 }, { "epoch": 0.5621131503807696, "grad_norm": 0.3127691149711609, "learning_rate": 8.060824959474346e-06, "loss": 0.1505, "step": 30278 }, { "epoch": 0.5621502805181882, "grad_norm": 0.44916513562202454, "learning_rate": 8.059680624519993e-06, "loss": 0.2206, "step": 30280 }, { "epoch": 0.5621874106556068, "grad_norm": 0.3895914852619171, "learning_rate": 8.05853631596757e-06, "loss": 0.3028, "step": 30282 }, { "epoch": 0.5622245407930255, "grad_norm": 0.30895158648490906, "learning_rate": 8.057392033832652e-06, "loss": 0.2037, "step": 30284 }, { "epoch": 0.5622616709304441, "grad_norm": 0.5032930970191956, "learning_rate": 8.05624777813081e-06, "loss": 0.3849, "step": 30286 }, { "epoch": 0.5622988010678628, "grad_norm": 0.3455074429512024, "learning_rate": 8.055103548877614e-06, "loss": 0.2426, "step": 30288 }, { "epoch": 0.5623359312052814, "grad_norm": 0.2796286940574646, "learning_rate": 8.053959346088632e-06, "loss": 0.4726, "step": 30290 }, { "epoch": 0.5623730613427, "grad_norm": 0.3647927939891815, "learning_rate": 8.052815169779434e-06, "loss": 0.3179, "step": 30292 }, { "epoch": 0.5624101914801186, "grad_norm": 0.5455312728881836, "learning_rate": 8.051671019965595e-06, "loss": 0.145, "step": 30294 }, { "epoch": 0.5624473216175373, "grad_norm": 0.4319457709789276, "learning_rate": 8.05052689666267e-06, "loss": 0.3003, "step": 30296 }, { "epoch": 0.562484451754956, "grad_norm": 0.4691339135169983, "learning_rate": 8.049382799886237e-06, "loss": 0.2039, "step": 30298 }, { "epoch": 0.5625215818923746, "grad_norm": 0.2796179950237274, "learning_rate": 8.048238729651864e-06, "loss": 0.3252, "step": 30300 }, { "epoch": 0.5625587120297932, "grad_norm": 0.2572115659713745, "learning_rate": 8.047094685975112e-06, "loss": 0.207, "step": 30302 }, { "epoch": 0.5625958421672118, "grad_norm": 0.5285819172859192, "learning_rate": 8.045950668871551e-06, "loss": 0.2288, "step": 30304 }, { "epoch": 0.5626329723046305, "grad_norm": 0.24390468001365662, "learning_rate": 8.044806678356755e-06, "loss": 0.1286, "step": 30306 }, { "epoch": 0.5626701024420492, "grad_norm": 0.5649982690811157, "learning_rate": 8.043662714446279e-06, "loss": 0.4846, "step": 30308 }, { "epoch": 0.5627072325794678, "grad_norm": 0.48388344049453735, "learning_rate": 8.042518777155694e-06, "loss": 0.313, "step": 30310 }, { "epoch": 0.5627443627168864, "grad_norm": 0.26966148614883423, "learning_rate": 8.041374866500564e-06, "loss": 0.2665, "step": 30312 }, { "epoch": 0.562781492854305, "grad_norm": 0.3024383783340454, "learning_rate": 8.040230982496455e-06, "loss": 0.1354, "step": 30314 }, { "epoch": 0.5628186229917237, "grad_norm": 0.4717419445514679, "learning_rate": 8.039087125158932e-06, "loss": 0.2333, "step": 30316 }, { "epoch": 0.5628557531291424, "grad_norm": 0.3630298376083374, "learning_rate": 8.037943294503565e-06, "loss": 0.4768, "step": 30318 }, { "epoch": 0.562892883266561, "grad_norm": 0.2688644528388977, "learning_rate": 8.036799490545907e-06, "loss": 0.4132, "step": 30320 }, { "epoch": 0.5629300134039796, "grad_norm": 0.41613903641700745, "learning_rate": 8.03565571330153e-06, "loss": 0.3567, "step": 30322 }, { "epoch": 0.5629671435413982, "grad_norm": 0.5882170796394348, "learning_rate": 8.034511962785994e-06, "loss": 0.3226, "step": 30324 }, { "epoch": 0.5630042736788169, "grad_norm": 0.45509546995162964, "learning_rate": 8.03336823901486e-06, "loss": 0.2357, "step": 30326 }, { "epoch": 0.5630414038162356, "grad_norm": 0.19202972948551178, "learning_rate": 8.032224542003696e-06, "loss": 0.2367, "step": 30328 }, { "epoch": 0.5630785339536541, "grad_norm": 0.42827412486076355, "learning_rate": 8.031080871768063e-06, "loss": 0.3667, "step": 30330 }, { "epoch": 0.5631156640910728, "grad_norm": 0.3420211374759674, "learning_rate": 8.029937228323525e-06, "loss": 0.391, "step": 30332 }, { "epoch": 0.5631527942284914, "grad_norm": 0.2570461630821228, "learning_rate": 8.028793611685635e-06, "loss": 0.1686, "step": 30334 }, { "epoch": 0.5631899243659101, "grad_norm": 0.3125964403152466, "learning_rate": 8.02765002186996e-06, "loss": 0.1648, "step": 30336 }, { "epoch": 0.5632270545033288, "grad_norm": 0.24024170637130737, "learning_rate": 8.02650645889206e-06, "loss": 0.2705, "step": 30338 }, { "epoch": 0.5632641846407473, "grad_norm": 0.3164975643157959, "learning_rate": 8.025362922767497e-06, "loss": 0.272, "step": 30340 }, { "epoch": 0.563301314778166, "grad_norm": 0.3534393608570099, "learning_rate": 8.02421941351183e-06, "loss": 0.3129, "step": 30342 }, { "epoch": 0.5633384449155846, "grad_norm": 0.5638948082923889, "learning_rate": 8.02307593114062e-06, "loss": 0.2103, "step": 30344 }, { "epoch": 0.5633755750530033, "grad_norm": 0.2293599545955658, "learning_rate": 8.021932475669423e-06, "loss": 0.1865, "step": 30346 }, { "epoch": 0.5634127051904219, "grad_norm": 0.390081524848938, "learning_rate": 8.0207890471138e-06, "loss": 0.2021, "step": 30348 }, { "epoch": 0.5634498353278405, "grad_norm": 0.4304048717021942, "learning_rate": 8.019645645489308e-06, "loss": 0.29, "step": 30350 }, { "epoch": 0.5634869654652592, "grad_norm": 0.571884274482727, "learning_rate": 8.01850227081151e-06, "loss": 0.2853, "step": 30352 }, { "epoch": 0.5635240956026778, "grad_norm": 0.4301969110965729, "learning_rate": 8.01735892309596e-06, "loss": 0.2862, "step": 30354 }, { "epoch": 0.5635612257400965, "grad_norm": 0.2893680930137634, "learning_rate": 8.016215602358218e-06, "loss": 0.38, "step": 30356 }, { "epoch": 0.5635983558775151, "grad_norm": 0.320781946182251, "learning_rate": 8.015072308613836e-06, "loss": 0.4167, "step": 30358 }, { "epoch": 0.5636354860149337, "grad_norm": 0.5543734431266785, "learning_rate": 8.013929041878375e-06, "loss": 0.393, "step": 30360 }, { "epoch": 0.5636726161523524, "grad_norm": 0.37209829688072205, "learning_rate": 8.012785802167394e-06, "loss": 0.2306, "step": 30362 }, { "epoch": 0.563709746289771, "grad_norm": 0.3098314106464386, "learning_rate": 8.011642589496442e-06, "loss": 0.1432, "step": 30364 }, { "epoch": 0.5637468764271897, "grad_norm": 0.4104865789413452, "learning_rate": 8.010499403881079e-06, "loss": 0.3695, "step": 30366 }, { "epoch": 0.5637840065646083, "grad_norm": 0.3670860230922699, "learning_rate": 8.009356245336865e-06, "loss": 0.2937, "step": 30368 }, { "epoch": 0.5638211367020269, "grad_norm": 0.335248202085495, "learning_rate": 8.008213113879344e-06, "loss": 0.0961, "step": 30370 }, { "epoch": 0.5638582668394456, "grad_norm": 0.3985885977745056, "learning_rate": 8.007070009524077e-06, "loss": 0.2918, "step": 30372 }, { "epoch": 0.5638953969768642, "grad_norm": 0.3513612151145935, "learning_rate": 8.00592693228662e-06, "loss": 0.285, "step": 30374 }, { "epoch": 0.5639325271142829, "grad_norm": 0.4683316648006439, "learning_rate": 8.004783882182523e-06, "loss": 0.1828, "step": 30376 }, { "epoch": 0.5639696572517015, "grad_norm": 0.19643741846084595, "learning_rate": 8.00364085922734e-06, "loss": 0.2809, "step": 30378 }, { "epoch": 0.5640067873891201, "grad_norm": 0.5451693534851074, "learning_rate": 8.002497863436625e-06, "loss": 0.1489, "step": 30380 }, { "epoch": 0.5640439175265388, "grad_norm": 0.3660389482975006, "learning_rate": 8.001354894825936e-06, "loss": 0.26, "step": 30382 }, { "epoch": 0.5640810476639574, "grad_norm": 0.48008474707603455, "learning_rate": 8.000211953410816e-06, "loss": 0.2141, "step": 30384 }, { "epoch": 0.5641181778013761, "grad_norm": 0.36755871772766113, "learning_rate": 7.999069039206822e-06, "loss": 0.2089, "step": 30386 }, { "epoch": 0.5641553079387946, "grad_norm": 0.4247550070285797, "learning_rate": 7.997926152229505e-06, "loss": 0.2291, "step": 30388 }, { "epoch": 0.5641924380762133, "grad_norm": 0.42695823311805725, "learning_rate": 7.996783292494415e-06, "loss": 0.2526, "step": 30390 }, { "epoch": 0.5642295682136319, "grad_norm": 0.4390032887458801, "learning_rate": 7.995640460017103e-06, "loss": 0.3591, "step": 30392 }, { "epoch": 0.5642666983510506, "grad_norm": 0.37121182680130005, "learning_rate": 7.994497654813126e-06, "loss": 0.4137, "step": 30394 }, { "epoch": 0.5643038284884693, "grad_norm": 0.36116474866867065, "learning_rate": 7.993354876898026e-06, "loss": 0.2493, "step": 30396 }, { "epoch": 0.5643409586258878, "grad_norm": 0.3571341037750244, "learning_rate": 7.992212126287355e-06, "loss": 0.0537, "step": 30398 }, { "epoch": 0.5643780887633065, "grad_norm": 0.4802612066268921, "learning_rate": 7.99106940299666e-06, "loss": 0.3075, "step": 30400 }, { "epoch": 0.5644152189007251, "grad_norm": 0.4084606468677521, "learning_rate": 7.989926707041495e-06, "loss": 0.3401, "step": 30402 }, { "epoch": 0.5644523490381438, "grad_norm": 0.35473909974098206, "learning_rate": 7.98878403843741e-06, "loss": 0.3276, "step": 30404 }, { "epoch": 0.5644894791755625, "grad_norm": 0.43000349402427673, "learning_rate": 7.987641397199948e-06, "loss": 0.4182, "step": 30406 }, { "epoch": 0.564526609312981, "grad_norm": 0.26502829790115356, "learning_rate": 7.98649878334466e-06, "loss": 0.3769, "step": 30408 }, { "epoch": 0.5645637394503997, "grad_norm": 0.24508610367774963, "learning_rate": 7.985356196887089e-06, "loss": 0.2474, "step": 30410 }, { "epoch": 0.5646008695878183, "grad_norm": 0.31696271896362305, "learning_rate": 7.984213637842787e-06, "loss": 0.3023, "step": 30412 }, { "epoch": 0.564637999725237, "grad_norm": 0.352560818195343, "learning_rate": 7.983071106227299e-06, "loss": 0.2853, "step": 30414 }, { "epoch": 0.5646751298626557, "grad_norm": 0.2814945578575134, "learning_rate": 7.981928602056173e-06, "loss": 0.2863, "step": 30416 }, { "epoch": 0.5647122600000742, "grad_norm": 0.5061913132667542, "learning_rate": 7.980786125344952e-06, "loss": 0.266, "step": 30418 }, { "epoch": 0.5647493901374929, "grad_norm": 0.642584502696991, "learning_rate": 7.979643676109188e-06, "loss": 0.3441, "step": 30420 }, { "epoch": 0.5647865202749115, "grad_norm": 0.3245680034160614, "learning_rate": 7.978501254364419e-06, "loss": 0.2654, "step": 30422 }, { "epoch": 0.5648236504123302, "grad_norm": 0.4735945761203766, "learning_rate": 7.97735886012619e-06, "loss": 0.2697, "step": 30424 }, { "epoch": 0.5648607805497489, "grad_norm": 0.5074973702430725, "learning_rate": 7.976216493410053e-06, "loss": 0.3992, "step": 30426 }, { "epoch": 0.5648979106871674, "grad_norm": 0.7065256237983704, "learning_rate": 7.975074154231545e-06, "loss": 0.5596, "step": 30428 }, { "epoch": 0.5649350408245861, "grad_norm": 0.6071568131446838, "learning_rate": 7.973931842606212e-06, "loss": 0.328, "step": 30430 }, { "epoch": 0.5649721709620047, "grad_norm": 0.33255162835121155, "learning_rate": 7.972789558549601e-06, "loss": 0.4342, "step": 30432 }, { "epoch": 0.5650093010994234, "grad_norm": 0.3089801073074341, "learning_rate": 7.97164730207725e-06, "loss": 0.1307, "step": 30434 }, { "epoch": 0.5650464312368421, "grad_norm": 0.5067664384841919, "learning_rate": 7.970505073204702e-06, "loss": 0.3456, "step": 30436 }, { "epoch": 0.5650835613742606, "grad_norm": 0.3979984223842621, "learning_rate": 7.969362871947503e-06, "loss": 0.2428, "step": 30438 }, { "epoch": 0.5651206915116793, "grad_norm": 0.37151622772216797, "learning_rate": 7.968220698321191e-06, "loss": 0.2866, "step": 30440 }, { "epoch": 0.5651578216490979, "grad_norm": 0.24620410799980164, "learning_rate": 7.967078552341312e-06, "loss": 0.3118, "step": 30442 }, { "epoch": 0.5651949517865166, "grad_norm": 0.48776742815971375, "learning_rate": 7.965936434023405e-06, "loss": 0.2363, "step": 30444 }, { "epoch": 0.5652320819239351, "grad_norm": 0.2626207172870636, "learning_rate": 7.964794343383007e-06, "loss": 0.3689, "step": 30446 }, { "epoch": 0.5652692120613538, "grad_norm": 0.4751438498497009, "learning_rate": 7.963652280435665e-06, "loss": 0.3387, "step": 30448 }, { "epoch": 0.5653063421987725, "grad_norm": 0.7058199644088745, "learning_rate": 7.962510245196913e-06, "loss": 0.357, "step": 30450 }, { "epoch": 0.5653434723361911, "grad_norm": 0.43393072485923767, "learning_rate": 7.961368237682294e-06, "loss": 0.1695, "step": 30452 }, { "epoch": 0.5653806024736098, "grad_norm": 0.3317243754863739, "learning_rate": 7.960226257907348e-06, "loss": 0.2018, "step": 30454 }, { "epoch": 0.5654177326110283, "grad_norm": 0.534257173538208, "learning_rate": 7.959084305887613e-06, "loss": 0.1866, "step": 30456 }, { "epoch": 0.565454862748447, "grad_norm": 0.43892350792884827, "learning_rate": 7.957942381638628e-06, "loss": 0.2552, "step": 30458 }, { "epoch": 0.5654919928858657, "grad_norm": 0.9451227784156799, "learning_rate": 7.95680048517593e-06, "loss": 0.2506, "step": 30460 }, { "epoch": 0.5655291230232843, "grad_norm": 0.6164782047271729, "learning_rate": 7.955658616515058e-06, "loss": 0.2304, "step": 30462 }, { "epoch": 0.565566253160703, "grad_norm": 0.5831804275512695, "learning_rate": 7.954516775671547e-06, "loss": 0.2441, "step": 30464 }, { "epoch": 0.5656033832981215, "grad_norm": 0.31520023941993713, "learning_rate": 7.953374962660933e-06, "loss": 0.2924, "step": 30466 }, { "epoch": 0.5656405134355402, "grad_norm": 0.4159213900566101, "learning_rate": 7.952233177498761e-06, "loss": 0.3522, "step": 30468 }, { "epoch": 0.5656776435729589, "grad_norm": 0.4521883726119995, "learning_rate": 7.951091420200563e-06, "loss": 0.3285, "step": 30470 }, { "epoch": 0.5657147737103775, "grad_norm": 0.4095653295516968, "learning_rate": 7.949949690781868e-06, "loss": 0.3415, "step": 30472 }, { "epoch": 0.5657519038477962, "grad_norm": 0.359004944562912, "learning_rate": 7.948807989258219e-06, "loss": 0.1731, "step": 30474 }, { "epoch": 0.5657890339852147, "grad_norm": 0.5125638246536255, "learning_rate": 7.947666315645148e-06, "loss": 0.4333, "step": 30476 }, { "epoch": 0.5658261641226334, "grad_norm": 0.2509605586528778, "learning_rate": 7.946524669958194e-06, "loss": 0.1732, "step": 30478 }, { "epoch": 0.5658632942600521, "grad_norm": 0.45520615577697754, "learning_rate": 7.945383052212885e-06, "loss": 0.2306, "step": 30480 }, { "epoch": 0.5659004243974707, "grad_norm": 0.45426324009895325, "learning_rate": 7.944241462424762e-06, "loss": 0.2353, "step": 30482 }, { "epoch": 0.5659375545348894, "grad_norm": 0.4015585482120514, "learning_rate": 7.943099900609352e-06, "loss": 0.3062, "step": 30484 }, { "epoch": 0.5659746846723079, "grad_norm": 0.5005658864974976, "learning_rate": 7.941958366782191e-06, "loss": 0.2741, "step": 30486 }, { "epoch": 0.5660118148097266, "grad_norm": 0.5383375287055969, "learning_rate": 7.940816860958813e-06, "loss": 0.3576, "step": 30488 }, { "epoch": 0.5660489449471452, "grad_norm": 0.3859419822692871, "learning_rate": 7.939675383154752e-06, "loss": 0.1459, "step": 30490 }, { "epoch": 0.5660860750845639, "grad_norm": 0.5962996482849121, "learning_rate": 7.938533933385534e-06, "loss": 0.3759, "step": 30492 }, { "epoch": 0.5661232052219826, "grad_norm": 0.3544987142086029, "learning_rate": 7.937392511666699e-06, "loss": 0.184, "step": 30494 }, { "epoch": 0.5661603353594011, "grad_norm": 0.6087614893913269, "learning_rate": 7.93625111801377e-06, "loss": 0.3119, "step": 30496 }, { "epoch": 0.5661974654968198, "grad_norm": 0.34794753789901733, "learning_rate": 7.93510975244228e-06, "loss": 0.1928, "step": 30498 }, { "epoch": 0.5662345956342384, "grad_norm": 0.3439965844154358, "learning_rate": 7.933968414967763e-06, "loss": 0.2963, "step": 30500 }, { "epoch": 0.5662717257716571, "grad_norm": 0.4580877721309662, "learning_rate": 7.932827105605749e-06, "loss": 0.2651, "step": 30502 }, { "epoch": 0.5663088559090758, "grad_norm": 0.6592761874198914, "learning_rate": 7.931685824371765e-06, "loss": 0.4515, "step": 30504 }, { "epoch": 0.5663459860464943, "grad_norm": 0.384457528591156, "learning_rate": 7.930544571281341e-06, "loss": 0.1629, "step": 30506 }, { "epoch": 0.566383116183913, "grad_norm": 0.5042252540588379, "learning_rate": 7.92940334635001e-06, "loss": 0.2389, "step": 30508 }, { "epoch": 0.5664202463213316, "grad_norm": 0.2814830541610718, "learning_rate": 7.928262149593294e-06, "loss": 0.2782, "step": 30510 }, { "epoch": 0.5664573764587503, "grad_norm": 0.43002599477767944, "learning_rate": 7.927120981026724e-06, "loss": 0.1498, "step": 30512 }, { "epoch": 0.566494506596169, "grad_norm": 0.3772023022174835, "learning_rate": 7.92597984066583e-06, "loss": 0.3443, "step": 30514 }, { "epoch": 0.5665316367335875, "grad_norm": 0.2985260784626007, "learning_rate": 7.924838728526136e-06, "loss": 0.2348, "step": 30516 }, { "epoch": 0.5665687668710062, "grad_norm": 0.4417247772216797, "learning_rate": 7.923697644623171e-06, "loss": 0.2961, "step": 30518 }, { "epoch": 0.5666058970084248, "grad_norm": 0.6379358768463135, "learning_rate": 7.922556588972468e-06, "loss": 0.2178, "step": 30520 }, { "epoch": 0.5666430271458435, "grad_norm": 0.5376560688018799, "learning_rate": 7.92141556158954e-06, "loss": 0.2804, "step": 30522 }, { "epoch": 0.5666801572832622, "grad_norm": 0.4958012104034424, "learning_rate": 7.920274562489925e-06, "loss": 0.1379, "step": 30524 }, { "epoch": 0.5667172874206807, "grad_norm": 0.6264126300811768, "learning_rate": 7.919133591689141e-06, "loss": 0.3168, "step": 30526 }, { "epoch": 0.5667544175580994, "grad_norm": 0.27815523743629456, "learning_rate": 7.917992649202715e-06, "loss": 0.1476, "step": 30528 }, { "epoch": 0.566791547695518, "grad_norm": 0.44340285658836365, "learning_rate": 7.916851735046172e-06, "loss": 0.2596, "step": 30530 }, { "epoch": 0.5668286778329367, "grad_norm": 0.4616158902645111, "learning_rate": 7.915710849235043e-06, "loss": 0.3706, "step": 30532 }, { "epoch": 0.5668658079703554, "grad_norm": 0.2583787739276886, "learning_rate": 7.914569991784844e-06, "loss": 0.214, "step": 30534 }, { "epoch": 0.5669029381077739, "grad_norm": 0.40340855717658997, "learning_rate": 7.913429162711098e-06, "loss": 0.2585, "step": 30536 }, { "epoch": 0.5669400682451926, "grad_norm": 0.5663079619407654, "learning_rate": 7.912288362029331e-06, "loss": 0.2748, "step": 30538 }, { "epoch": 0.5669771983826112, "grad_norm": 0.5247089862823486, "learning_rate": 7.911147589755066e-06, "loss": 0.1839, "step": 30540 }, { "epoch": 0.5670143285200299, "grad_norm": 0.3321037292480469, "learning_rate": 7.910006845903829e-06, "loss": 0.3876, "step": 30542 }, { "epoch": 0.5670514586574484, "grad_norm": 0.45910540223121643, "learning_rate": 7.908866130491135e-06, "loss": 0.2434, "step": 30544 }, { "epoch": 0.5670885887948671, "grad_norm": 0.3489314615726471, "learning_rate": 7.907725443532513e-06, "loss": 0.1678, "step": 30546 }, { "epoch": 0.5671257189322858, "grad_norm": 0.40685340762138367, "learning_rate": 7.906584785043477e-06, "loss": 0.1489, "step": 30548 }, { "epoch": 0.5671628490697044, "grad_norm": 0.3494434058666229, "learning_rate": 7.905444155039553e-06, "loss": 0.2871, "step": 30550 }, { "epoch": 0.5671999792071231, "grad_norm": 0.3340587913990021, "learning_rate": 7.904303553536258e-06, "loss": 0.2398, "step": 30552 }, { "epoch": 0.5672371093445416, "grad_norm": 0.3975366950035095, "learning_rate": 7.903162980549118e-06, "loss": 0.2688, "step": 30554 }, { "epoch": 0.5672742394819603, "grad_norm": 0.4589778184890747, "learning_rate": 7.902022436093646e-06, "loss": 0.3695, "step": 30556 }, { "epoch": 0.567311369619379, "grad_norm": 0.4233415722846985, "learning_rate": 7.900881920185368e-06, "loss": 0.2856, "step": 30558 }, { "epoch": 0.5673484997567976, "grad_norm": 0.27418792247772217, "learning_rate": 7.899741432839796e-06, "loss": 0.3356, "step": 30560 }, { "epoch": 0.5673856298942163, "grad_norm": 0.590097963809967, "learning_rate": 7.898600974072454e-06, "loss": 0.162, "step": 30562 }, { "epoch": 0.5674227600316348, "grad_norm": 0.3199407160282135, "learning_rate": 7.897460543898859e-06, "loss": 0.3109, "step": 30564 }, { "epoch": 0.5674598901690535, "grad_norm": 0.2804213762283325, "learning_rate": 7.896320142334524e-06, "loss": 0.2686, "step": 30566 }, { "epoch": 0.5674970203064722, "grad_norm": 0.3871452510356903, "learning_rate": 7.895179769394972e-06, "loss": 0.2359, "step": 30568 }, { "epoch": 0.5675341504438908, "grad_norm": 0.4456593692302704, "learning_rate": 7.894039425095724e-06, "loss": 0.261, "step": 30570 }, { "epoch": 0.5675712805813095, "grad_norm": 0.3904845118522644, "learning_rate": 7.892899109452287e-06, "loss": 0.4949, "step": 30572 }, { "epoch": 0.567608410718728, "grad_norm": 0.21100293099880219, "learning_rate": 7.89175882248018e-06, "loss": 0.0518, "step": 30574 }, { "epoch": 0.5676455408561467, "grad_norm": 0.3031800389289856, "learning_rate": 7.890618564194925e-06, "loss": 0.1883, "step": 30576 }, { "epoch": 0.5676826709935654, "grad_norm": 0.3527359664440155, "learning_rate": 7.88947833461203e-06, "loss": 0.3409, "step": 30578 }, { "epoch": 0.567719801130984, "grad_norm": 0.45798730850219727, "learning_rate": 7.888338133747012e-06, "loss": 0.4397, "step": 30580 }, { "epoch": 0.5677569312684027, "grad_norm": 0.327743798494339, "learning_rate": 7.887197961615387e-06, "loss": 0.2987, "step": 30582 }, { "epoch": 0.5677940614058212, "grad_norm": 0.4482690691947937, "learning_rate": 7.886057818232675e-06, "loss": 0.4611, "step": 30584 }, { "epoch": 0.5678311915432399, "grad_norm": 0.2777673304080963, "learning_rate": 7.88491770361438e-06, "loss": 0.2406, "step": 30586 }, { "epoch": 0.5678683216806586, "grad_norm": 0.5364262461662292, "learning_rate": 7.883777617776019e-06, "loss": 0.1179, "step": 30588 }, { "epoch": 0.5679054518180772, "grad_norm": 0.43481430411338806, "learning_rate": 7.882637560733105e-06, "loss": 0.2247, "step": 30590 }, { "epoch": 0.5679425819554959, "grad_norm": 0.2687269449234009, "learning_rate": 7.881497532501153e-06, "loss": 0.2994, "step": 30592 }, { "epoch": 0.5679797120929144, "grad_norm": 0.31993377208709717, "learning_rate": 7.880357533095673e-06, "loss": 0.2845, "step": 30594 }, { "epoch": 0.5680168422303331, "grad_norm": 0.3220365643501282, "learning_rate": 7.87921756253218e-06, "loss": 0.2595, "step": 30596 }, { "epoch": 0.5680539723677517, "grad_norm": 0.3849681615829468, "learning_rate": 7.878077620826184e-06, "loss": 0.2725, "step": 30598 }, { "epoch": 0.5680911025051704, "grad_norm": 0.2802127003669739, "learning_rate": 7.876937707993192e-06, "loss": 0.2563, "step": 30600 }, { "epoch": 0.568128232642589, "grad_norm": 0.3694424033164978, "learning_rate": 7.87579782404872e-06, "loss": 0.257, "step": 30602 }, { "epoch": 0.5681653627800076, "grad_norm": 0.2947586178779602, "learning_rate": 7.874657969008277e-06, "loss": 0.3143, "step": 30604 }, { "epoch": 0.5682024929174263, "grad_norm": 0.32467716932296753, "learning_rate": 7.873518142887373e-06, "loss": 0.3619, "step": 30606 }, { "epoch": 0.5682396230548449, "grad_norm": 0.30035674571990967, "learning_rate": 7.87237834570152e-06, "loss": 0.1706, "step": 30608 }, { "epoch": 0.5682767531922636, "grad_norm": 0.3676237165927887, "learning_rate": 7.871238577466222e-06, "loss": 0.2987, "step": 30610 }, { "epoch": 0.5683138833296822, "grad_norm": 0.2749525308609009, "learning_rate": 7.870098838196992e-06, "loss": 0.3511, "step": 30612 }, { "epoch": 0.5683510134671008, "grad_norm": 0.5221290588378906, "learning_rate": 7.868959127909334e-06, "loss": 0.3261, "step": 30614 }, { "epoch": 0.5683881436045195, "grad_norm": 0.44048845767974854, "learning_rate": 7.867819446618762e-06, "loss": 0.3985, "step": 30616 }, { "epoch": 0.5684252737419381, "grad_norm": 0.3135998249053955, "learning_rate": 7.866679794340779e-06, "loss": 0.3617, "step": 30618 }, { "epoch": 0.5684624038793568, "grad_norm": 0.40057504177093506, "learning_rate": 7.865540171090895e-06, "loss": 0.3769, "step": 30620 }, { "epoch": 0.5684995340167754, "grad_norm": 0.35424596071243286, "learning_rate": 7.864400576884618e-06, "loss": 0.2173, "step": 30622 }, { "epoch": 0.568536664154194, "grad_norm": 0.4932957887649536, "learning_rate": 7.863261011737449e-06, "loss": 0.1415, "step": 30624 }, { "epoch": 0.5685737942916127, "grad_norm": 0.3345850706100464, "learning_rate": 7.862121475664897e-06, "loss": 0.3222, "step": 30626 }, { "epoch": 0.5686109244290313, "grad_norm": 0.32648009061813354, "learning_rate": 7.86098196868247e-06, "loss": 0.4924, "step": 30628 }, { "epoch": 0.56864805456645, "grad_norm": 0.42507317662239075, "learning_rate": 7.85984249080567e-06, "loss": 0.2344, "step": 30630 }, { "epoch": 0.5686851847038686, "grad_norm": 0.43238961696624756, "learning_rate": 7.858703042050002e-06, "loss": 0.2487, "step": 30632 }, { "epoch": 0.5687223148412872, "grad_norm": 0.42522186040878296, "learning_rate": 7.857563622430977e-06, "loss": 0.2099, "step": 30634 }, { "epoch": 0.5687594449787059, "grad_norm": 0.5822479128837585, "learning_rate": 7.85642423196409e-06, "loss": 0.3288, "step": 30636 }, { "epoch": 0.5687965751161245, "grad_norm": 0.46770092844963074, "learning_rate": 7.855284870664847e-06, "loss": 0.4783, "step": 30638 }, { "epoch": 0.5688337052535432, "grad_norm": 0.8809147477149963, "learning_rate": 7.854145538548755e-06, "loss": 0.1386, "step": 30640 }, { "epoch": 0.5688708353909617, "grad_norm": 0.38927674293518066, "learning_rate": 7.853006235631314e-06, "loss": 0.327, "step": 30642 }, { "epoch": 0.5689079655283804, "grad_norm": 0.34164538979530334, "learning_rate": 7.851866961928025e-06, "loss": 0.4552, "step": 30644 }, { "epoch": 0.5689450956657991, "grad_norm": 0.27964499592781067, "learning_rate": 7.850727717454399e-06, "loss": 0.1415, "step": 30646 }, { "epoch": 0.5689822258032177, "grad_norm": 0.35941168665885925, "learning_rate": 7.849588502225925e-06, "loss": 0.257, "step": 30648 }, { "epoch": 0.5690193559406364, "grad_norm": 0.12937435507774353, "learning_rate": 7.848449316258113e-06, "loss": 0.2192, "step": 30650 }, { "epoch": 0.5690564860780549, "grad_norm": 0.37457650899887085, "learning_rate": 7.847310159566458e-06, "loss": 0.2718, "step": 30652 }, { "epoch": 0.5690936162154736, "grad_norm": 0.5210898518562317, "learning_rate": 7.846171032166464e-06, "loss": 0.142, "step": 30654 }, { "epoch": 0.5691307463528923, "grad_norm": 0.3666442036628723, "learning_rate": 7.845031934073631e-06, "loss": 0.3833, "step": 30656 }, { "epoch": 0.5691678764903109, "grad_norm": 0.3594425916671753, "learning_rate": 7.843892865303464e-06, "loss": 0.1405, "step": 30658 }, { "epoch": 0.5692050066277295, "grad_norm": 0.24422810971736908, "learning_rate": 7.84275382587145e-06, "loss": 0.2049, "step": 30660 }, { "epoch": 0.5692421367651481, "grad_norm": 0.31878241896629333, "learning_rate": 7.8416148157931e-06, "loss": 0.1513, "step": 30662 }, { "epoch": 0.5692792669025668, "grad_norm": 0.31119266152381897, "learning_rate": 7.840475835083903e-06, "loss": 0.3645, "step": 30664 }, { "epoch": 0.5693163970399855, "grad_norm": 0.24074499309062958, "learning_rate": 7.839336883759362e-06, "loss": 0.211, "step": 30666 }, { "epoch": 0.5693535271774041, "grad_norm": 0.5032479763031006, "learning_rate": 7.838197961834974e-06, "loss": 0.2564, "step": 30668 }, { "epoch": 0.5693906573148227, "grad_norm": 0.40660709142684937, "learning_rate": 7.837059069326238e-06, "loss": 0.3445, "step": 30670 }, { "epoch": 0.5694277874522413, "grad_norm": 0.31265658140182495, "learning_rate": 7.835920206248652e-06, "loss": 0.3057, "step": 30672 }, { "epoch": 0.56946491758966, "grad_norm": 0.4856959283351898, "learning_rate": 7.834781372617706e-06, "loss": 0.3463, "step": 30674 }, { "epoch": 0.5695020477270787, "grad_norm": 0.49358445405960083, "learning_rate": 7.833642568448899e-06, "loss": 0.3, "step": 30676 }, { "epoch": 0.5695391778644973, "grad_norm": 0.19963692128658295, "learning_rate": 7.832503793757729e-06, "loss": 0.2208, "step": 30678 }, { "epoch": 0.5695763080019159, "grad_norm": 0.3275558650493622, "learning_rate": 7.831365048559691e-06, "loss": 0.0805, "step": 30680 }, { "epoch": 0.5696134381393345, "grad_norm": 0.35259246826171875, "learning_rate": 7.830226332870277e-06, "loss": 0.2964, "step": 30682 }, { "epoch": 0.5696505682767532, "grad_norm": 0.4486176073551178, "learning_rate": 7.829087646704987e-06, "loss": 0.18, "step": 30684 }, { "epoch": 0.5696876984141719, "grad_norm": 0.4811548888683319, "learning_rate": 7.827948990079309e-06, "loss": 0.2291, "step": 30686 }, { "epoch": 0.5697248285515905, "grad_norm": 0.731239914894104, "learning_rate": 7.826810363008736e-06, "loss": 0.313, "step": 30688 }, { "epoch": 0.5697619586890091, "grad_norm": 0.2572113275527954, "learning_rate": 7.825671765508766e-06, "loss": 0.3548, "step": 30690 }, { "epoch": 0.5697990888264277, "grad_norm": 0.30911195278167725, "learning_rate": 7.824533197594895e-06, "loss": 0.4283, "step": 30692 }, { "epoch": 0.5698362189638464, "grad_norm": 0.2597065269947052, "learning_rate": 7.823394659282606e-06, "loss": 0.3744, "step": 30694 }, { "epoch": 0.569873349101265, "grad_norm": 0.63627028465271, "learning_rate": 7.8222561505874e-06, "loss": 0.2022, "step": 30696 }, { "epoch": 0.5699104792386837, "grad_norm": 0.3540831506252289, "learning_rate": 7.821117671524763e-06, "loss": 0.2381, "step": 30698 }, { "epoch": 0.5699476093761023, "grad_norm": 0.4699212908744812, "learning_rate": 7.819979222110186e-06, "loss": 0.2419, "step": 30700 }, { "epoch": 0.5699847395135209, "grad_norm": 0.33074215054512024, "learning_rate": 7.818840802359164e-06, "loss": 0.526, "step": 30702 }, { "epoch": 0.5700218696509396, "grad_norm": 0.7920529842376709, "learning_rate": 7.817702412287185e-06, "loss": 0.4891, "step": 30704 }, { "epoch": 0.5700589997883582, "grad_norm": 0.5426456332206726, "learning_rate": 7.816564051909737e-06, "loss": 0.2399, "step": 30706 }, { "epoch": 0.5700961299257769, "grad_norm": 0.40793612599372864, "learning_rate": 7.815425721242313e-06, "loss": 0.2963, "step": 30708 }, { "epoch": 0.5701332600631955, "grad_norm": 0.37187185883522034, "learning_rate": 7.814287420300407e-06, "loss": 0.3898, "step": 30710 }, { "epoch": 0.5701703902006141, "grad_norm": 0.4627900719642639, "learning_rate": 7.813149149099495e-06, "loss": 0.223, "step": 30712 }, { "epoch": 0.5702075203380328, "grad_norm": 0.40232518315315247, "learning_rate": 7.812010907655078e-06, "loss": 0.146, "step": 30714 }, { "epoch": 0.5702446504754514, "grad_norm": 0.4002465009689331, "learning_rate": 7.810872695982636e-06, "loss": 0.2709, "step": 30716 }, { "epoch": 0.57028178061287, "grad_norm": 0.2571702003479004, "learning_rate": 7.809734514097657e-06, "loss": 0.1335, "step": 30718 }, { "epoch": 0.5703189107502887, "grad_norm": 0.2706353962421417, "learning_rate": 7.808596362015633e-06, "loss": 0.204, "step": 30720 }, { "epoch": 0.5703560408877073, "grad_norm": 0.3100935220718384, "learning_rate": 7.807458239752053e-06, "loss": 0.2623, "step": 30722 }, { "epoch": 0.570393171025126, "grad_norm": 0.3397640883922577, "learning_rate": 7.806320147322396e-06, "loss": 0.2088, "step": 30724 }, { "epoch": 0.5704303011625446, "grad_norm": 0.26328396797180176, "learning_rate": 7.805182084742148e-06, "loss": 0.1558, "step": 30726 }, { "epoch": 0.5704674312999632, "grad_norm": 0.4029276669025421, "learning_rate": 7.804044052026799e-06, "loss": 0.3402, "step": 30728 }, { "epoch": 0.5705045614373819, "grad_norm": 0.286686509847641, "learning_rate": 7.802906049191832e-06, "loss": 0.3206, "step": 30730 }, { "epoch": 0.5705416915748005, "grad_norm": 0.49031952023506165, "learning_rate": 7.801768076252735e-06, "loss": 0.233, "step": 30732 }, { "epoch": 0.5705788217122192, "grad_norm": 0.7933528423309326, "learning_rate": 7.80063013322499e-06, "loss": 0.1482, "step": 30734 }, { "epoch": 0.5706159518496378, "grad_norm": 0.4536634385585785, "learning_rate": 7.799492220124082e-06, "loss": 0.2074, "step": 30736 }, { "epoch": 0.5706530819870564, "grad_norm": 0.24374347925186157, "learning_rate": 7.798354336965489e-06, "loss": 0.3752, "step": 30738 }, { "epoch": 0.5706902121244751, "grad_norm": 0.517255961894989, "learning_rate": 7.7972164837647e-06, "loss": 0.2132, "step": 30740 }, { "epoch": 0.5707273422618937, "grad_norm": 0.29550403356552124, "learning_rate": 7.796078660537197e-06, "loss": 0.1578, "step": 30742 }, { "epoch": 0.5707644723993124, "grad_norm": 0.30857229232788086, "learning_rate": 7.794940867298465e-06, "loss": 0.3103, "step": 30744 }, { "epoch": 0.570801602536731, "grad_norm": 0.383404403924942, "learning_rate": 7.793803104063979e-06, "loss": 0.2005, "step": 30746 }, { "epoch": 0.5708387326741496, "grad_norm": 0.3568408191204071, "learning_rate": 7.792665370849229e-06, "loss": 0.2535, "step": 30748 }, { "epoch": 0.5708758628115682, "grad_norm": 0.3433665633201599, "learning_rate": 7.791527667669687e-06, "loss": 0.2532, "step": 30750 }, { "epoch": 0.5709129929489869, "grad_norm": 0.3389299809932709, "learning_rate": 7.790389994540839e-06, "loss": 0.2057, "step": 30752 }, { "epoch": 0.5709501230864056, "grad_norm": 0.44569307565689087, "learning_rate": 7.789252351478167e-06, "loss": 0.1781, "step": 30754 }, { "epoch": 0.5709872532238242, "grad_norm": 0.34246334433555603, "learning_rate": 7.788114738497146e-06, "loss": 0.3095, "step": 30756 }, { "epoch": 0.5710243833612428, "grad_norm": 0.3768478333950043, "learning_rate": 7.786977155613258e-06, "loss": 0.2001, "step": 30758 }, { "epoch": 0.5710615134986614, "grad_norm": 0.42460960149765015, "learning_rate": 7.785839602841986e-06, "loss": 0.2922, "step": 30760 }, { "epoch": 0.5710986436360801, "grad_norm": 0.48252469301223755, "learning_rate": 7.784702080198801e-06, "loss": 0.4393, "step": 30762 }, { "epoch": 0.5711357737734988, "grad_norm": 0.2758078873157501, "learning_rate": 7.783564587699185e-06, "loss": 0.3501, "step": 30764 }, { "epoch": 0.5711729039109174, "grad_norm": 0.3760770559310913, "learning_rate": 7.782427125358619e-06, "loss": 0.4444, "step": 30766 }, { "epoch": 0.571210034048336, "grad_norm": 0.28239133954048157, "learning_rate": 7.781289693192575e-06, "loss": 0.1436, "step": 30768 }, { "epoch": 0.5712471641857546, "grad_norm": 0.3897975981235504, "learning_rate": 7.78015229121653e-06, "loss": 0.207, "step": 30770 }, { "epoch": 0.5712842943231733, "grad_norm": 0.31077486276626587, "learning_rate": 7.779014919445971e-06, "loss": 0.2058, "step": 30772 }, { "epoch": 0.571321424460592, "grad_norm": 0.4491395652294159, "learning_rate": 7.77787757789636e-06, "loss": 0.1487, "step": 30774 }, { "epoch": 0.5713585545980105, "grad_norm": 0.3883741497993469, "learning_rate": 7.776740266583179e-06, "loss": 0.3554, "step": 30776 }, { "epoch": 0.5713956847354292, "grad_norm": 0.3194393217563629, "learning_rate": 7.775602985521907e-06, "loss": 0.3588, "step": 30778 }, { "epoch": 0.5714328148728478, "grad_norm": 0.3997102379798889, "learning_rate": 7.774465734728012e-06, "loss": 0.2643, "step": 30780 }, { "epoch": 0.5714699450102665, "grad_norm": 0.3605523705482483, "learning_rate": 7.773328514216972e-06, "loss": 0.2143, "step": 30782 }, { "epoch": 0.5715070751476852, "grad_norm": 0.467251718044281, "learning_rate": 7.772191324004267e-06, "loss": 0.2502, "step": 30784 }, { "epoch": 0.5715442052851037, "grad_norm": 0.661156952381134, "learning_rate": 7.771054164105361e-06, "loss": 0.3988, "step": 30786 }, { "epoch": 0.5715813354225224, "grad_norm": 0.37096482515335083, "learning_rate": 7.769917034535732e-06, "loss": 0.4458, "step": 30788 }, { "epoch": 0.571618465559941, "grad_norm": 0.3853954076766968, "learning_rate": 7.76877993531085e-06, "loss": 0.1391, "step": 30790 }, { "epoch": 0.5716555956973597, "grad_norm": 0.4599910378456116, "learning_rate": 7.76764286644619e-06, "loss": 0.1767, "step": 30792 }, { "epoch": 0.5716927258347783, "grad_norm": 0.2952060401439667, "learning_rate": 7.766505827957224e-06, "loss": 0.277, "step": 30794 }, { "epoch": 0.5717298559721969, "grad_norm": 0.4265202581882477, "learning_rate": 7.765368819859427e-06, "loss": 0.172, "step": 30796 }, { "epoch": 0.5717669861096156, "grad_norm": 0.5990549921989441, "learning_rate": 7.764231842168267e-06, "loss": 0.4412, "step": 30798 }, { "epoch": 0.5718041162470342, "grad_norm": 0.33042195439338684, "learning_rate": 7.763094894899212e-06, "loss": 0.4162, "step": 30800 }, { "epoch": 0.5718412463844529, "grad_norm": 0.3797372579574585, "learning_rate": 7.761957978067735e-06, "loss": 0.2773, "step": 30802 }, { "epoch": 0.5718783765218715, "grad_norm": 0.4291931390762329, "learning_rate": 7.760821091689307e-06, "loss": 0.4347, "step": 30804 }, { "epoch": 0.5719155066592901, "grad_norm": 0.2676706910133362, "learning_rate": 7.759684235779394e-06, "loss": 0.2687, "step": 30806 }, { "epoch": 0.5719526367967088, "grad_norm": 0.43203699588775635, "learning_rate": 7.758547410353474e-06, "loss": 0.4544, "step": 30808 }, { "epoch": 0.5719897669341274, "grad_norm": 0.5756469368934631, "learning_rate": 7.75741061542701e-06, "loss": 0.0833, "step": 30810 }, { "epoch": 0.5720268970715461, "grad_norm": 0.713797390460968, "learning_rate": 7.756273851015464e-06, "loss": 0.2146, "step": 30812 }, { "epoch": 0.5720640272089647, "grad_norm": 0.2435319572687149, "learning_rate": 7.755137117134312e-06, "loss": 0.2397, "step": 30814 }, { "epoch": 0.5721011573463833, "grad_norm": 0.49905359745025635, "learning_rate": 7.75400041379902e-06, "loss": 0.2173, "step": 30816 }, { "epoch": 0.572138287483802, "grad_norm": 0.27993685007095337, "learning_rate": 7.752863741025057e-06, "loss": 0.3293, "step": 30818 }, { "epoch": 0.5721754176212206, "grad_norm": 0.33293116092681885, "learning_rate": 7.751727098827885e-06, "loss": 0.1664, "step": 30820 }, { "epoch": 0.5722125477586393, "grad_norm": 0.5058616995811462, "learning_rate": 7.750590487222978e-06, "loss": 0.3322, "step": 30822 }, { "epoch": 0.5722496778960579, "grad_norm": 0.2833290696144104, "learning_rate": 7.74945390622579e-06, "loss": 0.2085, "step": 30824 }, { "epoch": 0.5722868080334765, "grad_norm": 0.645182728767395, "learning_rate": 7.748317355851795e-06, "loss": 0.2779, "step": 30826 }, { "epoch": 0.5723239381708952, "grad_norm": 0.4296180009841919, "learning_rate": 7.747180836116456e-06, "loss": 0.1074, "step": 30828 }, { "epoch": 0.5723610683083138, "grad_norm": 0.677361011505127, "learning_rate": 7.74604434703524e-06, "loss": 0.4217, "step": 30830 }, { "epoch": 0.5723981984457325, "grad_norm": 0.4936288893222809, "learning_rate": 7.744907888623607e-06, "loss": 0.1996, "step": 30832 }, { "epoch": 0.572435328583151, "grad_norm": 0.5426415205001831, "learning_rate": 7.743771460897023e-06, "loss": 0.3049, "step": 30834 }, { "epoch": 0.5724724587205697, "grad_norm": 0.3194630444049835, "learning_rate": 7.742635063870955e-06, "loss": 0.2884, "step": 30836 }, { "epoch": 0.5725095888579884, "grad_norm": 0.33115580677986145, "learning_rate": 7.741498697560858e-06, "loss": 0.1595, "step": 30838 }, { "epoch": 0.572546718995407, "grad_norm": 0.3711857497692108, "learning_rate": 7.7403623619822e-06, "loss": 0.177, "step": 30840 }, { "epoch": 0.5725838491328257, "grad_norm": 0.5087011456489563, "learning_rate": 7.739226057150441e-06, "loss": 0.1781, "step": 30842 }, { "epoch": 0.5726209792702442, "grad_norm": 0.3291945159435272, "learning_rate": 7.738089783081045e-06, "loss": 0.2766, "step": 30844 }, { "epoch": 0.5726581094076629, "grad_norm": 0.5278339385986328, "learning_rate": 7.73695353978947e-06, "loss": 0.3735, "step": 30846 }, { "epoch": 0.5726952395450815, "grad_norm": 0.34606456756591797, "learning_rate": 7.735817327291182e-06, "loss": 0.1887, "step": 30848 }, { "epoch": 0.5727323696825002, "grad_norm": 0.35968929529190063, "learning_rate": 7.734681145601635e-06, "loss": 0.169, "step": 30850 }, { "epoch": 0.5727694998199189, "grad_norm": 0.4294825792312622, "learning_rate": 7.733544994736295e-06, "loss": 0.3571, "step": 30852 }, { "epoch": 0.5728066299573374, "grad_norm": 0.3126409649848938, "learning_rate": 7.732408874710615e-06, "loss": 0.0842, "step": 30854 }, { "epoch": 0.5728437600947561, "grad_norm": 0.32681673765182495, "learning_rate": 7.731272785540058e-06, "loss": 0.286, "step": 30856 }, { "epoch": 0.5728808902321747, "grad_norm": 0.40448275208473206, "learning_rate": 7.730136727240085e-06, "loss": 0.3281, "step": 30858 }, { "epoch": 0.5729180203695934, "grad_norm": 0.8401007652282715, "learning_rate": 7.729000699826155e-06, "loss": 0.3482, "step": 30860 }, { "epoch": 0.5729551505070121, "grad_norm": 0.32294583320617676, "learning_rate": 7.72786470331372e-06, "loss": 0.1319, "step": 30862 }, { "epoch": 0.5729922806444306, "grad_norm": 0.4724728465080261, "learning_rate": 7.72672873771824e-06, "loss": 0.1826, "step": 30864 }, { "epoch": 0.5730294107818493, "grad_norm": 0.38687512278556824, "learning_rate": 7.725592803055172e-06, "loss": 0.2544, "step": 30866 }, { "epoch": 0.5730665409192679, "grad_norm": 0.3796022832393646, "learning_rate": 7.724456899339974e-06, "loss": 0.4137, "step": 30868 }, { "epoch": 0.5731036710566866, "grad_norm": 0.29786425828933716, "learning_rate": 7.723321026588102e-06, "loss": 0.1833, "step": 30870 }, { "epoch": 0.5731408011941053, "grad_norm": 0.663848340511322, "learning_rate": 7.722185184815011e-06, "loss": 0.0775, "step": 30872 }, { "epoch": 0.5731779313315238, "grad_norm": 0.43506842851638794, "learning_rate": 7.72104937403616e-06, "loss": 0.2182, "step": 30874 }, { "epoch": 0.5732150614689425, "grad_norm": 0.3086608052253723, "learning_rate": 7.719913594266997e-06, "loss": 0.3267, "step": 30876 }, { "epoch": 0.5732521916063611, "grad_norm": 0.5191611647605896, "learning_rate": 7.71877784552298e-06, "loss": 0.1985, "step": 30878 }, { "epoch": 0.5732893217437798, "grad_norm": 0.2562870681285858, "learning_rate": 7.717642127819564e-06, "loss": 0.5357, "step": 30880 }, { "epoch": 0.5733264518811985, "grad_norm": 0.5959519743919373, "learning_rate": 7.716506441172204e-06, "loss": 0.2628, "step": 30882 }, { "epoch": 0.573363582018617, "grad_norm": 0.8156997561454773, "learning_rate": 7.71537078559635e-06, "loss": 0.1958, "step": 30884 }, { "epoch": 0.5734007121560357, "grad_norm": 0.5089747309684753, "learning_rate": 7.71423516110746e-06, "loss": 0.2883, "step": 30886 }, { "epoch": 0.5734378422934543, "grad_norm": 0.5371935963630676, "learning_rate": 7.713099567720978e-06, "loss": 0.2678, "step": 30888 }, { "epoch": 0.573474972430873, "grad_norm": 0.5082718729972839, "learning_rate": 7.711964005452361e-06, "loss": 0.3693, "step": 30890 }, { "epoch": 0.5735121025682917, "grad_norm": 0.39806991815567017, "learning_rate": 7.710828474317064e-06, "loss": 0.3454, "step": 30892 }, { "epoch": 0.5735492327057102, "grad_norm": 0.27226024866104126, "learning_rate": 7.70969297433053e-06, "loss": 0.2292, "step": 30894 }, { "epoch": 0.5735863628431289, "grad_norm": 0.44892653822898865, "learning_rate": 7.708557505508216e-06, "loss": 0.3301, "step": 30896 }, { "epoch": 0.5736234929805475, "grad_norm": 0.32208141684532166, "learning_rate": 7.707422067865576e-06, "loss": 0.235, "step": 30898 }, { "epoch": 0.5736606231179662, "grad_norm": 0.39069029688835144, "learning_rate": 7.706286661418048e-06, "loss": 0.2074, "step": 30900 }, { "epoch": 0.5736977532553847, "grad_norm": 0.4110138416290283, "learning_rate": 7.705151286181089e-06, "loss": 0.1847, "step": 30902 }, { "epoch": 0.5737348833928034, "grad_norm": 0.4770754277706146, "learning_rate": 7.704015942170148e-06, "loss": 0.3258, "step": 30904 }, { "epoch": 0.5737720135302221, "grad_norm": 0.6607776284217834, "learning_rate": 7.702880629400673e-06, "loss": 0.4048, "step": 30906 }, { "epoch": 0.5738091436676407, "grad_norm": 0.4549581706523895, "learning_rate": 7.701745347888111e-06, "loss": 0.2049, "step": 30908 }, { "epoch": 0.5738462738050594, "grad_norm": 0.28146395087242126, "learning_rate": 7.700610097647911e-06, "loss": 0.1322, "step": 30910 }, { "epoch": 0.5738834039424779, "grad_norm": 0.4381413161754608, "learning_rate": 7.699474878695524e-06, "loss": 0.4039, "step": 30912 }, { "epoch": 0.5739205340798966, "grad_norm": 0.35600990056991577, "learning_rate": 7.698339691046387e-06, "loss": 0.2214, "step": 30914 }, { "epoch": 0.5739576642173153, "grad_norm": 0.3268638551235199, "learning_rate": 7.69720453471596e-06, "loss": 0.1793, "step": 30916 }, { "epoch": 0.5739947943547339, "grad_norm": 0.5677059888839722, "learning_rate": 7.696069409719675e-06, "loss": 0.228, "step": 30918 }, { "epoch": 0.5740319244921526, "grad_norm": 0.37476134300231934, "learning_rate": 7.694934316072983e-06, "loss": 0.2277, "step": 30920 }, { "epoch": 0.5740690546295711, "grad_norm": 0.33506545424461365, "learning_rate": 7.693799253791333e-06, "loss": 0.2208, "step": 30922 }, { "epoch": 0.5741061847669898, "grad_norm": 0.5173262357711792, "learning_rate": 7.692664222890172e-06, "loss": 0.289, "step": 30924 }, { "epoch": 0.5741433149044085, "grad_norm": 0.34582948684692383, "learning_rate": 7.691529223384936e-06, "loss": 0.3179, "step": 30926 }, { "epoch": 0.5741804450418271, "grad_norm": 0.559708833694458, "learning_rate": 7.690394255291072e-06, "loss": 0.3112, "step": 30928 }, { "epoch": 0.5742175751792458, "grad_norm": 0.3752209544181824, "learning_rate": 7.689259318624024e-06, "loss": 0.2925, "step": 30930 }, { "epoch": 0.5742547053166643, "grad_norm": 0.3502529263496399, "learning_rate": 7.688124413399233e-06, "loss": 0.2887, "step": 30932 }, { "epoch": 0.574291835454083, "grad_norm": 0.47667697072029114, "learning_rate": 7.686989539632149e-06, "loss": 0.379, "step": 30934 }, { "epoch": 0.5743289655915017, "grad_norm": 0.2457290142774582, "learning_rate": 7.685854697338208e-06, "loss": 0.2391, "step": 30936 }, { "epoch": 0.5743660957289203, "grad_norm": 0.505111813545227, "learning_rate": 7.684719886532851e-06, "loss": 0.2741, "step": 30938 }, { "epoch": 0.574403225866339, "grad_norm": 0.4584346115589142, "learning_rate": 7.683585107231521e-06, "loss": 0.5324, "step": 30940 }, { "epoch": 0.5744403560037575, "grad_norm": 0.4836214482784271, "learning_rate": 7.682450359449659e-06, "loss": 0.3176, "step": 30942 }, { "epoch": 0.5744774861411762, "grad_norm": 0.32196342945098877, "learning_rate": 7.681315643202707e-06, "loss": 0.2909, "step": 30944 }, { "epoch": 0.5745146162785948, "grad_norm": 0.2770683765411377, "learning_rate": 7.680180958506103e-06, "loss": 0.3103, "step": 30946 }, { "epoch": 0.5745517464160135, "grad_norm": 0.3949334919452667, "learning_rate": 7.679046305375291e-06, "loss": 0.539, "step": 30948 }, { "epoch": 0.5745888765534322, "grad_norm": 0.29031673073768616, "learning_rate": 7.677911683825704e-06, "loss": 0.5197, "step": 30950 }, { "epoch": 0.5746260066908507, "grad_norm": 0.2679411470890045, "learning_rate": 7.676777093872783e-06, "loss": 0.4677, "step": 30952 }, { "epoch": 0.5746631368282694, "grad_norm": 0.4363188147544861, "learning_rate": 7.675642535531966e-06, "loss": 0.1272, "step": 30954 }, { "epoch": 0.574700266965688, "grad_norm": 0.4277131259441376, "learning_rate": 7.674508008818693e-06, "loss": 0.2459, "step": 30956 }, { "epoch": 0.5747373971031067, "grad_norm": 0.4207153916358948, "learning_rate": 7.6733735137484e-06, "loss": 0.3632, "step": 30958 }, { "epoch": 0.5747745272405254, "grad_norm": 0.3746563196182251, "learning_rate": 7.672239050336523e-06, "loss": 0.2765, "step": 30960 }, { "epoch": 0.5748116573779439, "grad_norm": 0.44773221015930176, "learning_rate": 7.671104618598505e-06, "loss": 0.274, "step": 30962 }, { "epoch": 0.5748487875153626, "grad_norm": 3.3151469230651855, "learning_rate": 7.669970218549773e-06, "loss": 0.1843, "step": 30964 }, { "epoch": 0.5748859176527812, "grad_norm": 0.3795017898082733, "learning_rate": 7.668835850205768e-06, "loss": 0.2415, "step": 30966 }, { "epoch": 0.5749230477901999, "grad_norm": 0.29662495851516724, "learning_rate": 7.667701513581926e-06, "loss": 0.3192, "step": 30968 }, { "epoch": 0.5749601779276186, "grad_norm": 0.397330641746521, "learning_rate": 7.666567208693677e-06, "loss": 0.3502, "step": 30970 }, { "epoch": 0.5749973080650371, "grad_norm": 0.32202550768852234, "learning_rate": 7.665432935556461e-06, "loss": 0.0861, "step": 30972 }, { "epoch": 0.5750344382024558, "grad_norm": 0.08321783691644669, "learning_rate": 7.664298694185713e-06, "loss": 0.0712, "step": 30974 }, { "epoch": 0.5750715683398744, "grad_norm": 0.5714452862739563, "learning_rate": 7.66316448459686e-06, "loss": 0.4176, "step": 30976 }, { "epoch": 0.5751086984772931, "grad_norm": 0.5573163032531738, "learning_rate": 7.662030306805338e-06, "loss": 0.5409, "step": 30978 }, { "epoch": 0.5751458286147118, "grad_norm": 0.2605822682380676, "learning_rate": 7.660896160826582e-06, "loss": 0.237, "step": 30980 }, { "epoch": 0.5751829587521303, "grad_norm": 0.4066370725631714, "learning_rate": 7.65976204667602e-06, "loss": 0.2691, "step": 30982 }, { "epoch": 0.575220088889549, "grad_norm": 0.5078721046447754, "learning_rate": 7.658627964369088e-06, "loss": 0.3956, "step": 30984 }, { "epoch": 0.5752572190269676, "grad_norm": 0.38002809882164, "learning_rate": 7.657493913921221e-06, "loss": 0.3838, "step": 30986 }, { "epoch": 0.5752943491643863, "grad_norm": 0.38463693857192993, "learning_rate": 7.65635989534784e-06, "loss": 0.2424, "step": 30988 }, { "epoch": 0.575331479301805, "grad_norm": 0.2529514729976654, "learning_rate": 7.655225908664383e-06, "loss": 0.361, "step": 30990 }, { "epoch": 0.5753686094392235, "grad_norm": 0.38379237055778503, "learning_rate": 7.654091953886276e-06, "loss": 0.4205, "step": 30992 }, { "epoch": 0.5754057395766422, "grad_norm": 0.6508237719535828, "learning_rate": 7.652958031028953e-06, "loss": 0.2591, "step": 30994 }, { "epoch": 0.5754428697140608, "grad_norm": 0.5471687316894531, "learning_rate": 7.651824140107838e-06, "loss": 0.2737, "step": 30996 }, { "epoch": 0.5754799998514795, "grad_norm": 0.37261924147605896, "learning_rate": 7.650690281138365e-06, "loss": 0.3347, "step": 30998 }, { "epoch": 0.575517129988898, "grad_norm": 0.3480701446533203, "learning_rate": 7.649556454135964e-06, "loss": 0.0702, "step": 31000 }, { "epoch": 0.5755542601263167, "grad_norm": 0.3818562626838684, "learning_rate": 7.648422659116053e-06, "loss": 0.3007, "step": 31002 }, { "epoch": 0.5755913902637354, "grad_norm": 0.29535770416259766, "learning_rate": 7.647288896094069e-06, "loss": 0.2153, "step": 31004 }, { "epoch": 0.575628520401154, "grad_norm": 0.4236251711845398, "learning_rate": 7.646155165085434e-06, "loss": 0.2775, "step": 31006 }, { "epoch": 0.5756656505385727, "grad_norm": 0.25951558351516724, "learning_rate": 7.645021466105578e-06, "loss": 0.3522, "step": 31008 }, { "epoch": 0.5757027806759912, "grad_norm": 0.35228031873703003, "learning_rate": 7.643887799169925e-06, "loss": 0.3039, "step": 31010 }, { "epoch": 0.5757399108134099, "grad_norm": 0.22825314104557037, "learning_rate": 7.642754164293905e-06, "loss": 0.1563, "step": 31012 }, { "epoch": 0.5757770409508286, "grad_norm": 0.35881125926971436, "learning_rate": 7.641620561492936e-06, "loss": 0.3959, "step": 31014 }, { "epoch": 0.5758141710882472, "grad_norm": 0.265899121761322, "learning_rate": 7.640486990782447e-06, "loss": 0.2356, "step": 31016 }, { "epoch": 0.5758513012256659, "grad_norm": 0.3160868287086487, "learning_rate": 7.639353452177863e-06, "loss": 0.0942, "step": 31018 }, { "epoch": 0.5758884313630844, "grad_norm": 0.2538793385028839, "learning_rate": 7.63821994569461e-06, "loss": 0.2013, "step": 31020 }, { "epoch": 0.5759255615005031, "grad_norm": 0.37353113293647766, "learning_rate": 7.637086471348106e-06, "loss": 0.4185, "step": 31022 }, { "epoch": 0.5759626916379218, "grad_norm": 0.4472772777080536, "learning_rate": 7.635953029153782e-06, "loss": 0.2368, "step": 31024 }, { "epoch": 0.5759998217753404, "grad_norm": 0.3807167708873749, "learning_rate": 7.634819619127051e-06, "loss": 0.1808, "step": 31026 }, { "epoch": 0.576036951912759, "grad_norm": 0.3559841513633728, "learning_rate": 7.633686241283342e-06, "loss": 0.2481, "step": 31028 }, { "epoch": 0.5760740820501776, "grad_norm": 0.35631564259529114, "learning_rate": 7.632552895638075e-06, "loss": 0.255, "step": 31030 }, { "epoch": 0.5761112121875963, "grad_norm": 0.34104984998703003, "learning_rate": 7.631419582206673e-06, "loss": 0.3576, "step": 31032 }, { "epoch": 0.576148342325015, "grad_norm": 0.4543199837207794, "learning_rate": 7.630286301004556e-06, "loss": 0.4909, "step": 31034 }, { "epoch": 0.5761854724624336, "grad_norm": 0.353497177362442, "learning_rate": 7.6291530520471426e-06, "loss": 0.2809, "step": 31036 }, { "epoch": 0.5762226025998523, "grad_norm": 0.22295664250850677, "learning_rate": 7.628019835349859e-06, "loss": 0.2394, "step": 31038 }, { "epoch": 0.5762597327372708, "grad_norm": 0.2737695574760437, "learning_rate": 7.626886650928116e-06, "loss": 0.3897, "step": 31040 }, { "epoch": 0.5762968628746895, "grad_norm": 0.3815363943576813, "learning_rate": 7.625753498797339e-06, "loss": 0.2597, "step": 31042 }, { "epoch": 0.5763339930121082, "grad_norm": 0.24110160768032074, "learning_rate": 7.624620378972944e-06, "loss": 0.2332, "step": 31044 }, { "epoch": 0.5763711231495268, "grad_norm": 0.5233474373817444, "learning_rate": 7.62348729147035e-06, "loss": 0.2005, "step": 31046 }, { "epoch": 0.5764082532869454, "grad_norm": 0.36254993081092834, "learning_rate": 7.622354236304976e-06, "loss": 0.1988, "step": 31048 }, { "epoch": 0.576445383424364, "grad_norm": 0.33645758032798767, "learning_rate": 7.621221213492244e-06, "loss": 0.228, "step": 31050 }, { "epoch": 0.5764825135617827, "grad_norm": 0.2907795310020447, "learning_rate": 7.62008822304756e-06, "loss": 0.3067, "step": 31052 }, { "epoch": 0.5765196436992013, "grad_norm": 0.44895225763320923, "learning_rate": 7.618955264986351e-06, "loss": 0.3484, "step": 31054 }, { "epoch": 0.57655677383662, "grad_norm": 0.4202222526073456, "learning_rate": 7.617822339324025e-06, "loss": 0.2185, "step": 31056 }, { "epoch": 0.5765939039740386, "grad_norm": 0.47238579392433167, "learning_rate": 7.616689446076001e-06, "loss": 0.3503, "step": 31058 }, { "epoch": 0.5766310341114572, "grad_norm": 0.614136815071106, "learning_rate": 7.615556585257696e-06, "loss": 0.4441, "step": 31060 }, { "epoch": 0.5766681642488759, "grad_norm": 0.358832985162735, "learning_rate": 7.614423756884528e-06, "loss": 0.1489, "step": 31062 }, { "epoch": 0.5767052943862945, "grad_norm": 0.637636661529541, "learning_rate": 7.613290960971904e-06, "loss": 0.3262, "step": 31064 }, { "epoch": 0.5767424245237132, "grad_norm": 0.35878950357437134, "learning_rate": 7.612158197535239e-06, "loss": 0.2943, "step": 31066 }, { "epoch": 0.5767795546611318, "grad_norm": 0.195411816239357, "learning_rate": 7.611025466589949e-06, "loss": 0.0762, "step": 31068 }, { "epoch": 0.5768166847985504, "grad_norm": 0.3723064064979553, "learning_rate": 7.609892768151446e-06, "loss": 0.2061, "step": 31070 }, { "epoch": 0.5768538149359691, "grad_norm": 0.3227402865886688, "learning_rate": 7.608760102235147e-06, "loss": 0.0856, "step": 31072 }, { "epoch": 0.5768909450733877, "grad_norm": 0.2868557870388031, "learning_rate": 7.607627468856458e-06, "loss": 0.2215, "step": 31074 }, { "epoch": 0.5769280752108064, "grad_norm": 0.46220844984054565, "learning_rate": 7.606494868030793e-06, "loss": 0.2236, "step": 31076 }, { "epoch": 0.576965205348225, "grad_norm": 0.44900795817375183, "learning_rate": 7.6053622997735645e-06, "loss": 0.5057, "step": 31078 }, { "epoch": 0.5770023354856436, "grad_norm": 0.43503543734550476, "learning_rate": 7.604229764100179e-06, "loss": 0.2162, "step": 31080 }, { "epoch": 0.5770394656230623, "grad_norm": 0.469188928604126, "learning_rate": 7.603097261026053e-06, "loss": 0.2356, "step": 31082 }, { "epoch": 0.5770765957604809, "grad_norm": 0.37695544958114624, "learning_rate": 7.601964790566594e-06, "loss": 0.3335, "step": 31084 }, { "epoch": 0.5771137258978996, "grad_norm": 0.4378848373889923, "learning_rate": 7.60083235273721e-06, "loss": 0.1724, "step": 31086 }, { "epoch": 0.5771508560353182, "grad_norm": 0.38973599672317505, "learning_rate": 7.599699947553315e-06, "loss": 0.2511, "step": 31088 }, { "epoch": 0.5771879861727368, "grad_norm": 0.3587445318698883, "learning_rate": 7.59856757503031e-06, "loss": 0.2637, "step": 31090 }, { "epoch": 0.5772251163101555, "grad_norm": 0.2674536406993866, "learning_rate": 7.597435235183607e-06, "loss": 0.1289, "step": 31092 }, { "epoch": 0.5772622464475741, "grad_norm": 0.3264833390712738, "learning_rate": 7.596302928028617e-06, "loss": 0.3112, "step": 31094 }, { "epoch": 0.5772993765849928, "grad_norm": 0.27748262882232666, "learning_rate": 7.595170653580741e-06, "loss": 0.082, "step": 31096 }, { "epoch": 0.5773365067224113, "grad_norm": 0.30836835503578186, "learning_rate": 7.594038411855389e-06, "loss": 0.1842, "step": 31098 }, { "epoch": 0.57737363685983, "grad_norm": 0.3451334536075592, "learning_rate": 7.592906202867973e-06, "loss": 0.0799, "step": 31100 }, { "epoch": 0.5774107669972487, "grad_norm": 0.5137263536453247, "learning_rate": 7.591774026633888e-06, "loss": 0.3753, "step": 31102 }, { "epoch": 0.5774478971346673, "grad_norm": 0.4151761829853058, "learning_rate": 7.590641883168545e-06, "loss": 0.4445, "step": 31104 }, { "epoch": 0.577485027272086, "grad_norm": 0.4870668053627014, "learning_rate": 7.5895097724873525e-06, "loss": 0.1672, "step": 31106 }, { "epoch": 0.5775221574095045, "grad_norm": 0.4959615170955658, "learning_rate": 7.588377694605709e-06, "loss": 0.2958, "step": 31108 }, { "epoch": 0.5775592875469232, "grad_norm": 0.3315300941467285, "learning_rate": 7.587245649539021e-06, "loss": 0.2315, "step": 31110 }, { "epoch": 0.5775964176843419, "grad_norm": 0.34610649943351746, "learning_rate": 7.586113637302698e-06, "loss": 0.1462, "step": 31112 }, { "epoch": 0.5776335478217605, "grad_norm": 0.34251880645751953, "learning_rate": 7.584981657912133e-06, "loss": 0.2781, "step": 31114 }, { "epoch": 0.5776706779591791, "grad_norm": 0.3090687692165375, "learning_rate": 7.583849711382736e-06, "loss": 0.3889, "step": 31116 }, { "epoch": 0.5777078080965977, "grad_norm": 0.5516473650932312, "learning_rate": 7.582717797729904e-06, "loss": 0.3875, "step": 31118 }, { "epoch": 0.5777449382340164, "grad_norm": 0.34100228548049927, "learning_rate": 7.5815859169690435e-06, "loss": 0.3291, "step": 31120 }, { "epoch": 0.5777820683714351, "grad_norm": 0.324738472700119, "learning_rate": 7.580454069115555e-06, "loss": 0.2212, "step": 31122 }, { "epoch": 0.5778191985088537, "grad_norm": 0.5066614151000977, "learning_rate": 7.57932225418484e-06, "loss": 0.2936, "step": 31124 }, { "epoch": 0.5778563286462723, "grad_norm": 0.3880600929260254, "learning_rate": 7.578190472192299e-06, "loss": 0.1724, "step": 31126 }, { "epoch": 0.5778934587836909, "grad_norm": 0.3594605326652527, "learning_rate": 7.577058723153332e-06, "loss": 0.4552, "step": 31128 }, { "epoch": 0.5779305889211096, "grad_norm": 0.5140810012817383, "learning_rate": 7.575927007083335e-06, "loss": 0.379, "step": 31130 }, { "epoch": 0.5779677190585283, "grad_norm": 0.43927818536758423, "learning_rate": 7.5747953239977124e-06, "loss": 0.2491, "step": 31132 }, { "epoch": 0.5780048491959469, "grad_norm": 0.25763311982154846, "learning_rate": 7.57366367391186e-06, "loss": 0.2964, "step": 31134 }, { "epoch": 0.5780419793333655, "grad_norm": 0.31850776076316833, "learning_rate": 7.572532056841178e-06, "loss": 0.2405, "step": 31136 }, { "epoch": 0.5780791094707841, "grad_norm": 0.36355075240135193, "learning_rate": 7.5714004728010645e-06, "loss": 0.1795, "step": 31138 }, { "epoch": 0.5781162396082028, "grad_norm": 0.37581780552864075, "learning_rate": 7.570268921806917e-06, "loss": 0.3514, "step": 31140 }, { "epoch": 0.5781533697456215, "grad_norm": 0.3243929147720337, "learning_rate": 7.569137403874129e-06, "loss": 0.187, "step": 31142 }, { "epoch": 0.57819049988304, "grad_norm": 0.39333802461624146, "learning_rate": 7.5680059190181e-06, "loss": 0.2883, "step": 31144 }, { "epoch": 0.5782276300204587, "grad_norm": 0.30685508251190186, "learning_rate": 7.566874467254227e-06, "loss": 0.3749, "step": 31146 }, { "epoch": 0.5782647601578773, "grad_norm": 0.3579385280609131, "learning_rate": 7.565743048597903e-06, "loss": 0.5051, "step": 31148 }, { "epoch": 0.578301890295296, "grad_norm": 0.3715967535972595, "learning_rate": 7.5646116630645295e-06, "loss": 0.3512, "step": 31150 }, { "epoch": 0.5783390204327146, "grad_norm": 0.5116569399833679, "learning_rate": 7.563480310669491e-06, "loss": 0.3125, "step": 31152 }, { "epoch": 0.5783761505701333, "grad_norm": 0.46344009041786194, "learning_rate": 7.562348991428188e-06, "loss": 0.2145, "step": 31154 }, { "epoch": 0.5784132807075519, "grad_norm": 0.3946397006511688, "learning_rate": 7.561217705356012e-06, "loss": 0.2155, "step": 31156 }, { "epoch": 0.5784504108449705, "grad_norm": 0.26402443647384644, "learning_rate": 7.560086452468363e-06, "loss": 0.3264, "step": 31158 }, { "epoch": 0.5784875409823892, "grad_norm": 0.48576226830482483, "learning_rate": 7.558955232780625e-06, "loss": 0.4532, "step": 31160 }, { "epoch": 0.5785246711198078, "grad_norm": 0.4014604389667511, "learning_rate": 7.557824046308194e-06, "loss": 0.2974, "step": 31162 }, { "epoch": 0.5785618012572264, "grad_norm": 0.33381161093711853, "learning_rate": 7.556692893066469e-06, "loss": 0.154, "step": 31164 }, { "epoch": 0.5785989313946451, "grad_norm": 0.2902061343193054, "learning_rate": 7.555561773070828e-06, "loss": 0.3285, "step": 31166 }, { "epoch": 0.5786360615320637, "grad_norm": 0.4292329251766205, "learning_rate": 7.554430686336671e-06, "loss": 0.1008, "step": 31168 }, { "epoch": 0.5786731916694824, "grad_norm": 0.3464864194393158, "learning_rate": 7.553299632879389e-06, "loss": 0.5245, "step": 31170 }, { "epoch": 0.578710321806901, "grad_norm": 0.3979438841342926, "learning_rate": 7.552168612714368e-06, "loss": 0.3633, "step": 31172 }, { "epoch": 0.5787474519443196, "grad_norm": 0.3536602556705475, "learning_rate": 7.551037625857001e-06, "loss": 0.3105, "step": 31174 }, { "epoch": 0.5787845820817383, "grad_norm": 0.36892902851104736, "learning_rate": 7.54990667232268e-06, "loss": 0.253, "step": 31176 }, { "epoch": 0.5788217122191569, "grad_norm": 0.39388081431388855, "learning_rate": 7.548775752126785e-06, "loss": 0.4155, "step": 31178 }, { "epoch": 0.5788588423565756, "grad_norm": 0.6572017073631287, "learning_rate": 7.547644865284713e-06, "loss": 0.1942, "step": 31180 }, { "epoch": 0.5788959724939942, "grad_norm": 0.31375494599342346, "learning_rate": 7.546514011811848e-06, "loss": 0.3266, "step": 31182 }, { "epoch": 0.5789331026314128, "grad_norm": 0.31462037563323975, "learning_rate": 7.545383191723576e-06, "loss": 0.2088, "step": 31184 }, { "epoch": 0.5789702327688315, "grad_norm": 0.34164267778396606, "learning_rate": 7.5442524050352865e-06, "loss": 0.4071, "step": 31186 }, { "epoch": 0.5790073629062501, "grad_norm": 0.3621094822883606, "learning_rate": 7.543121651762371e-06, "loss": 0.2049, "step": 31188 }, { "epoch": 0.5790444930436688, "grad_norm": 0.36736997961997986, "learning_rate": 7.541990931920207e-06, "loss": 0.3622, "step": 31190 }, { "epoch": 0.5790816231810874, "grad_norm": 0.3473871648311615, "learning_rate": 7.540860245524184e-06, "loss": 0.2158, "step": 31192 }, { "epoch": 0.579118753318506, "grad_norm": 0.2878754734992981, "learning_rate": 7.539729592589686e-06, "loss": 0.1341, "step": 31194 }, { "epoch": 0.5791558834559247, "grad_norm": 0.4229128658771515, "learning_rate": 7.538598973132099e-06, "loss": 0.174, "step": 31196 }, { "epoch": 0.5791930135933433, "grad_norm": 0.5712783932685852, "learning_rate": 7.537468387166808e-06, "loss": 0.1855, "step": 31198 }, { "epoch": 0.579230143730762, "grad_norm": 0.25841474533081055, "learning_rate": 7.536337834709196e-06, "loss": 0.2232, "step": 31200 }, { "epoch": 0.5792672738681806, "grad_norm": 0.2668760418891907, "learning_rate": 7.53520731577465e-06, "loss": 0.4626, "step": 31202 }, { "epoch": 0.5793044040055992, "grad_norm": 0.6375037431716919, "learning_rate": 7.534076830378546e-06, "loss": 0.3463, "step": 31204 }, { "epoch": 0.5793415341430178, "grad_norm": 0.6976446509361267, "learning_rate": 7.53294637853627e-06, "loss": 0.2774, "step": 31206 }, { "epoch": 0.5793786642804365, "grad_norm": 0.2875373661518097, "learning_rate": 7.531815960263203e-06, "loss": 0.2808, "step": 31208 }, { "epoch": 0.5794157944178552, "grad_norm": 0.4141579866409302, "learning_rate": 7.53068557557473e-06, "loss": 0.3061, "step": 31210 }, { "epoch": 0.5794529245552738, "grad_norm": 0.4020611047744751, "learning_rate": 7.529555224486228e-06, "loss": 0.291, "step": 31212 }, { "epoch": 0.5794900546926924, "grad_norm": 0.3870101273059845, "learning_rate": 7.5284249070130846e-06, "loss": 0.3585, "step": 31214 }, { "epoch": 0.579527184830111, "grad_norm": 0.429636687040329, "learning_rate": 7.527294623170669e-06, "loss": 0.3685, "step": 31216 }, { "epoch": 0.5795643149675297, "grad_norm": 0.47146692872047424, "learning_rate": 7.526164372974368e-06, "loss": 0.2863, "step": 31218 }, { "epoch": 0.5796014451049484, "grad_norm": 0.30755946040153503, "learning_rate": 7.52503415643956e-06, "loss": 0.3691, "step": 31220 }, { "epoch": 0.579638575242367, "grad_norm": 0.6568705439567566, "learning_rate": 7.523903973581625e-06, "loss": 0.1784, "step": 31222 }, { "epoch": 0.5796757053797856, "grad_norm": 0.3821035325527191, "learning_rate": 7.52277382441594e-06, "loss": 0.15, "step": 31224 }, { "epoch": 0.5797128355172042, "grad_norm": 0.3715713620185852, "learning_rate": 7.5216437089578865e-06, "loss": 0.3992, "step": 31226 }, { "epoch": 0.5797499656546229, "grad_norm": 0.5227676033973694, "learning_rate": 7.520513627222833e-06, "loss": 0.2477, "step": 31228 }, { "epoch": 0.5797870957920416, "grad_norm": 0.4146497845649719, "learning_rate": 7.519383579226163e-06, "loss": 0.3091, "step": 31230 }, { "epoch": 0.5798242259294601, "grad_norm": 0.38209593296051025, "learning_rate": 7.518253564983255e-06, "loss": 0.1763, "step": 31232 }, { "epoch": 0.5798613560668788, "grad_norm": 0.39699509739875793, "learning_rate": 7.517123584509479e-06, "loss": 0.5125, "step": 31234 }, { "epoch": 0.5798984862042974, "grad_norm": 0.30754196643829346, "learning_rate": 7.515993637820214e-06, "loss": 0.3199, "step": 31236 }, { "epoch": 0.5799356163417161, "grad_norm": 0.3600892722606659, "learning_rate": 7.514863724930841e-06, "loss": 0.3063, "step": 31238 }, { "epoch": 0.5799727464791348, "grad_norm": 0.2616475224494934, "learning_rate": 7.5137338458567234e-06, "loss": 0.2201, "step": 31240 }, { "epoch": 0.5800098766165533, "grad_norm": 0.2983090579509735, "learning_rate": 7.512604000613241e-06, "loss": 0.1526, "step": 31242 }, { "epoch": 0.580047006753972, "grad_norm": 0.3064076602458954, "learning_rate": 7.511474189215769e-06, "loss": 0.2026, "step": 31244 }, { "epoch": 0.5800841368913906, "grad_norm": 0.4513185918331146, "learning_rate": 7.510344411679677e-06, "loss": 0.1806, "step": 31246 }, { "epoch": 0.5801212670288093, "grad_norm": 0.49970078468322754, "learning_rate": 7.509214668020342e-06, "loss": 0.2918, "step": 31248 }, { "epoch": 0.5801583971662279, "grad_norm": 0.3199923038482666, "learning_rate": 7.508084958253133e-06, "loss": 0.224, "step": 31250 }, { "epoch": 0.5801955273036465, "grad_norm": 0.4985957443714142, "learning_rate": 7.506955282393429e-06, "loss": 0.3188, "step": 31252 }, { "epoch": 0.5802326574410652, "grad_norm": 0.4084486961364746, "learning_rate": 7.505825640456593e-06, "loss": 0.1931, "step": 31254 }, { "epoch": 0.5802697875784838, "grad_norm": 0.3797971308231354, "learning_rate": 7.504696032457998e-06, "loss": 0.4373, "step": 31256 }, { "epoch": 0.5803069177159025, "grad_norm": 1.5427873134613037, "learning_rate": 7.503566458413014e-06, "loss": 0.3382, "step": 31258 }, { "epoch": 0.580344047853321, "grad_norm": 0.38955816626548767, "learning_rate": 7.502436918337015e-06, "loss": 0.3002, "step": 31260 }, { "epoch": 0.5803811779907397, "grad_norm": 0.44622698426246643, "learning_rate": 7.501307412245369e-06, "loss": 0.2736, "step": 31262 }, { "epoch": 0.5804183081281584, "grad_norm": 0.4852862060070038, "learning_rate": 7.500177940153445e-06, "loss": 0.1185, "step": 31264 }, { "epoch": 0.580455438265577, "grad_norm": 0.6417048573493958, "learning_rate": 7.499048502076611e-06, "loss": 0.3548, "step": 31266 }, { "epoch": 0.5804925684029957, "grad_norm": 0.4444785714149475, "learning_rate": 7.497919098030235e-06, "loss": 0.2291, "step": 31268 }, { "epoch": 0.5805296985404143, "grad_norm": 0.2659972608089447, "learning_rate": 7.496789728029684e-06, "loss": 0.2728, "step": 31270 }, { "epoch": 0.5805668286778329, "grad_norm": 0.6256941556930542, "learning_rate": 7.495660392090327e-06, "loss": 0.2474, "step": 31272 }, { "epoch": 0.5806039588152516, "grad_norm": 0.41704824566841125, "learning_rate": 7.494531090227533e-06, "loss": 0.3616, "step": 31274 }, { "epoch": 0.5806410889526702, "grad_norm": 0.1835106760263443, "learning_rate": 7.493401822456667e-06, "loss": 0.2539, "step": 31276 }, { "epoch": 0.5806782190900889, "grad_norm": 0.3414742648601532, "learning_rate": 7.492272588793093e-06, "loss": 0.2318, "step": 31278 }, { "epoch": 0.5807153492275074, "grad_norm": 0.5324370265007019, "learning_rate": 7.491143389252176e-06, "loss": 0.3272, "step": 31280 }, { "epoch": 0.5807524793649261, "grad_norm": 0.388435035943985, "learning_rate": 7.4900142238492826e-06, "loss": 0.2675, "step": 31282 }, { "epoch": 0.5807896095023448, "grad_norm": 0.21172888576984406, "learning_rate": 7.488885092599779e-06, "loss": 0.1862, "step": 31284 }, { "epoch": 0.5808267396397634, "grad_norm": 0.2763361930847168, "learning_rate": 7.487755995519026e-06, "loss": 0.2257, "step": 31286 }, { "epoch": 0.5808638697771821, "grad_norm": 0.27705878019332886, "learning_rate": 7.486626932622388e-06, "loss": 0.4772, "step": 31288 }, { "epoch": 0.5809009999146006, "grad_norm": 0.40326008200645447, "learning_rate": 7.485497903925234e-06, "loss": 0.1828, "step": 31290 }, { "epoch": 0.5809381300520193, "grad_norm": 0.8272413015365601, "learning_rate": 7.484368909442918e-06, "loss": 0.3258, "step": 31292 }, { "epoch": 0.580975260189438, "grad_norm": 0.4801693558692932, "learning_rate": 7.483239949190805e-06, "loss": 0.1799, "step": 31294 }, { "epoch": 0.5810123903268566, "grad_norm": 0.3801548182964325, "learning_rate": 7.48211102318426e-06, "loss": 0.2798, "step": 31296 }, { "epoch": 0.5810495204642753, "grad_norm": 0.3058623969554901, "learning_rate": 7.48098213143864e-06, "loss": 0.3793, "step": 31298 }, { "epoch": 0.5810866506016938, "grad_norm": 0.28413820266723633, "learning_rate": 7.4798532739693085e-06, "loss": 0.3551, "step": 31300 }, { "epoch": 0.5811237807391125, "grad_norm": 0.18476887047290802, "learning_rate": 7.478724450791628e-06, "loss": 0.1957, "step": 31302 }, { "epoch": 0.5811609108765311, "grad_norm": 0.4460327625274658, "learning_rate": 7.477595661920953e-06, "loss": 0.2001, "step": 31304 }, { "epoch": 0.5811980410139498, "grad_norm": 0.542113721370697, "learning_rate": 7.4764669073726435e-06, "loss": 0.2242, "step": 31306 }, { "epoch": 0.5812351711513685, "grad_norm": 0.37249520421028137, "learning_rate": 7.4753381871620645e-06, "loss": 0.3522, "step": 31308 }, { "epoch": 0.581272301288787, "grad_norm": 0.4218714237213135, "learning_rate": 7.474209501304568e-06, "loss": 0.1671, "step": 31310 }, { "epoch": 0.5813094314262057, "grad_norm": 0.3553682267665863, "learning_rate": 7.473080849815515e-06, "loss": 0.2259, "step": 31312 }, { "epoch": 0.5813465615636243, "grad_norm": 0.5226209759712219, "learning_rate": 7.471952232710267e-06, "loss": 0.3675, "step": 31314 }, { "epoch": 0.581383691701043, "grad_norm": 0.4288221001625061, "learning_rate": 7.4708236500041705e-06, "loss": 0.1673, "step": 31316 }, { "epoch": 0.5814208218384617, "grad_norm": 0.36535248160362244, "learning_rate": 7.469695101712593e-06, "loss": 0.3972, "step": 31318 }, { "epoch": 0.5814579519758802, "grad_norm": 0.4311227798461914, "learning_rate": 7.468566587850882e-06, "loss": 0.2337, "step": 31320 }, { "epoch": 0.5814950821132989, "grad_norm": 0.3497351408004761, "learning_rate": 7.467438108434397e-06, "loss": 0.3453, "step": 31322 }, { "epoch": 0.5815322122507175, "grad_norm": 0.44875404238700867, "learning_rate": 7.466309663478495e-06, "loss": 0.3234, "step": 31324 }, { "epoch": 0.5815693423881362, "grad_norm": 0.22620448470115662, "learning_rate": 7.465181252998531e-06, "loss": 0.2973, "step": 31326 }, { "epoch": 0.5816064725255549, "grad_norm": 0.33616337180137634, "learning_rate": 7.464052877009857e-06, "loss": 0.3884, "step": 31328 }, { "epoch": 0.5816436026629734, "grad_norm": 0.36800092458724976, "learning_rate": 7.462924535527828e-06, "loss": 0.332, "step": 31330 }, { "epoch": 0.5816807328003921, "grad_norm": 0.4493695795536041, "learning_rate": 7.461796228567794e-06, "loss": 0.2977, "step": 31332 }, { "epoch": 0.5817178629378107, "grad_norm": 0.48661890625953674, "learning_rate": 7.46066795614511e-06, "loss": 0.3263, "step": 31334 }, { "epoch": 0.5817549930752294, "grad_norm": 0.6547520756721497, "learning_rate": 7.45953971827513e-06, "loss": 0.3438, "step": 31336 }, { "epoch": 0.5817921232126481, "grad_norm": 0.4534946382045746, "learning_rate": 7.458411514973206e-06, "loss": 0.1855, "step": 31338 }, { "epoch": 0.5818292533500666, "grad_norm": 0.4844074845314026, "learning_rate": 7.457283346254691e-06, "loss": 0.3136, "step": 31340 }, { "epoch": 0.5818663834874853, "grad_norm": 0.3138137459754944, "learning_rate": 7.456155212134929e-06, "loss": 0.2008, "step": 31342 }, { "epoch": 0.5819035136249039, "grad_norm": 0.5542809367179871, "learning_rate": 7.455027112629275e-06, "loss": 0.1143, "step": 31344 }, { "epoch": 0.5819406437623226, "grad_norm": 0.38744744658470154, "learning_rate": 7.4538990477530796e-06, "loss": 0.2222, "step": 31346 }, { "epoch": 0.5819777738997413, "grad_norm": 0.22495150566101074, "learning_rate": 7.452771017521691e-06, "loss": 0.2341, "step": 31348 }, { "epoch": 0.5820149040371598, "grad_norm": 0.3316761255264282, "learning_rate": 7.451643021950461e-06, "loss": 0.1663, "step": 31350 }, { "epoch": 0.5820520341745785, "grad_norm": 0.42802131175994873, "learning_rate": 7.450515061054737e-06, "loss": 0.3642, "step": 31352 }, { "epoch": 0.5820891643119971, "grad_norm": 0.2496299296617508, "learning_rate": 7.449387134849864e-06, "loss": 0.3945, "step": 31354 }, { "epoch": 0.5821262944494158, "grad_norm": 0.2747473418712616, "learning_rate": 7.4482592433511925e-06, "loss": 0.3296, "step": 31356 }, { "epoch": 0.5821634245868343, "grad_norm": 0.2841508686542511, "learning_rate": 7.447131386574068e-06, "loss": 0.1723, "step": 31358 }, { "epoch": 0.582200554724253, "grad_norm": 0.624060869216919, "learning_rate": 7.446003564533841e-06, "loss": 0.4475, "step": 31360 }, { "epoch": 0.5822376848616717, "grad_norm": 0.3869979977607727, "learning_rate": 7.444875777245855e-06, "loss": 0.3269, "step": 31362 }, { "epoch": 0.5822748149990903, "grad_norm": 0.24494224786758423, "learning_rate": 7.443748024725459e-06, "loss": 0.3183, "step": 31364 }, { "epoch": 0.582311945136509, "grad_norm": 0.42008814215660095, "learning_rate": 7.442620306987991e-06, "loss": 0.3629, "step": 31366 }, { "epoch": 0.5823490752739275, "grad_norm": 0.2734806537628174, "learning_rate": 7.4414926240488e-06, "loss": 0.3491, "step": 31368 }, { "epoch": 0.5823862054113462, "grad_norm": 0.3839620351791382, "learning_rate": 7.440364975923233e-06, "loss": 0.4229, "step": 31370 }, { "epoch": 0.5824233355487649, "grad_norm": 0.422510027885437, "learning_rate": 7.43923736262663e-06, "loss": 0.3187, "step": 31372 }, { "epoch": 0.5824604656861835, "grad_norm": 0.2778130769729614, "learning_rate": 7.438109784174336e-06, "loss": 0.3011, "step": 31374 }, { "epoch": 0.5824975958236022, "grad_norm": 0.28930965065956116, "learning_rate": 7.436982240581696e-06, "loss": 0.4143, "step": 31376 }, { "epoch": 0.5825347259610207, "grad_norm": 0.2550574541091919, "learning_rate": 7.435854731864053e-06, "loss": 0.1535, "step": 31378 }, { "epoch": 0.5825718560984394, "grad_norm": 0.3449892997741699, "learning_rate": 7.434727258036742e-06, "loss": 0.2671, "step": 31380 }, { "epoch": 0.5826089862358581, "grad_norm": 0.43867236375808716, "learning_rate": 7.4335998191151115e-06, "loss": 0.2301, "step": 31382 }, { "epoch": 0.5826461163732767, "grad_norm": 0.2857857942581177, "learning_rate": 7.4324724151144985e-06, "loss": 0.193, "step": 31384 }, { "epoch": 0.5826832465106954, "grad_norm": 0.35813453793525696, "learning_rate": 7.431345046050246e-06, "loss": 0.2036, "step": 31386 }, { "epoch": 0.5827203766481139, "grad_norm": 0.40862342715263367, "learning_rate": 7.430217711937692e-06, "loss": 0.214, "step": 31388 }, { "epoch": 0.5827575067855326, "grad_norm": 0.5152801871299744, "learning_rate": 7.429090412792183e-06, "loss": 0.2799, "step": 31390 }, { "epoch": 0.5827946369229513, "grad_norm": 0.6497358083724976, "learning_rate": 7.4279631486290495e-06, "loss": 0.4325, "step": 31392 }, { "epoch": 0.5828317670603699, "grad_norm": 0.36648866534233093, "learning_rate": 7.42683591946363e-06, "loss": 0.3002, "step": 31394 }, { "epoch": 0.5828688971977886, "grad_norm": 0.797605574131012, "learning_rate": 7.4257087253112695e-06, "loss": 0.3095, "step": 31396 }, { "epoch": 0.5829060273352071, "grad_norm": 0.31922563910484314, "learning_rate": 7.424581566187301e-06, "loss": 0.2466, "step": 31398 }, { "epoch": 0.5829431574726258, "grad_norm": 0.2807650566101074, "learning_rate": 7.423454442107066e-06, "loss": 0.3748, "step": 31400 }, { "epoch": 0.5829802876100444, "grad_norm": 0.33038461208343506, "learning_rate": 7.4223273530858964e-06, "loss": 0.3549, "step": 31402 }, { "epoch": 0.5830174177474631, "grad_norm": 0.5780045986175537, "learning_rate": 7.421200299139131e-06, "loss": 0.3044, "step": 31404 }, { "epoch": 0.5830545478848818, "grad_norm": 0.5376818180084229, "learning_rate": 7.420073280282103e-06, "loss": 0.1609, "step": 31406 }, { "epoch": 0.5830916780223003, "grad_norm": 0.37683767080307007, "learning_rate": 7.418946296530151e-06, "loss": 0.2816, "step": 31408 }, { "epoch": 0.583128808159719, "grad_norm": 0.31889238953590393, "learning_rate": 7.4178193478986074e-06, "loss": 0.2946, "step": 31410 }, { "epoch": 0.5831659382971376, "grad_norm": 0.4798712134361267, "learning_rate": 7.4166924344028115e-06, "loss": 0.279, "step": 31412 }, { "epoch": 0.5832030684345563, "grad_norm": 0.3796563446521759, "learning_rate": 7.415565556058091e-06, "loss": 0.3548, "step": 31414 }, { "epoch": 0.583240198571975, "grad_norm": 0.4634820520877838, "learning_rate": 7.4144387128797855e-06, "loss": 0.3351, "step": 31416 }, { "epoch": 0.5832773287093935, "grad_norm": 0.40738263726234436, "learning_rate": 7.4133119048832205e-06, "loss": 0.1525, "step": 31418 }, { "epoch": 0.5833144588468122, "grad_norm": 0.3201143145561218, "learning_rate": 7.412185132083733e-06, "loss": 0.2776, "step": 31420 }, { "epoch": 0.5833515889842308, "grad_norm": 0.25892800092697144, "learning_rate": 7.411058394496657e-06, "loss": 0.2675, "step": 31422 }, { "epoch": 0.5833887191216495, "grad_norm": 0.2496744990348816, "learning_rate": 7.409931692137318e-06, "loss": 0.1254, "step": 31424 }, { "epoch": 0.5834258492590682, "grad_norm": 0.5938456654548645, "learning_rate": 7.4088050250210505e-06, "loss": 0.3177, "step": 31426 }, { "epoch": 0.5834629793964867, "grad_norm": 0.38729992508888245, "learning_rate": 7.407678393163191e-06, "loss": 0.381, "step": 31428 }, { "epoch": 0.5835001095339054, "grad_norm": 0.5222066640853882, "learning_rate": 7.4065517965790576e-06, "loss": 0.3827, "step": 31430 }, { "epoch": 0.583537239671324, "grad_norm": 0.3405328094959259, "learning_rate": 7.405425235283987e-06, "loss": 0.2475, "step": 31432 }, { "epoch": 0.5835743698087427, "grad_norm": 0.4006451666355133, "learning_rate": 7.40429870929331e-06, "loss": 0.2578, "step": 31434 }, { "epoch": 0.5836114999461613, "grad_norm": 0.27298107743263245, "learning_rate": 7.4031722186223485e-06, "loss": 0.2662, "step": 31436 }, { "epoch": 0.5836486300835799, "grad_norm": 0.42411088943481445, "learning_rate": 7.402045763286437e-06, "loss": 0.3057, "step": 31438 }, { "epoch": 0.5836857602209986, "grad_norm": 0.3684844672679901, "learning_rate": 7.400919343300903e-06, "loss": 0.244, "step": 31440 }, { "epoch": 0.5837228903584172, "grad_norm": 0.2758946716785431, "learning_rate": 7.399792958681069e-06, "loss": 0.198, "step": 31442 }, { "epoch": 0.5837600204958359, "grad_norm": 0.4888532757759094, "learning_rate": 7.398666609442263e-06, "loss": 0.275, "step": 31444 }, { "epoch": 0.5837971506332545, "grad_norm": 0.472589373588562, "learning_rate": 7.3975402955998154e-06, "loss": 0.3844, "step": 31446 }, { "epoch": 0.5838342807706731, "grad_norm": 0.25237753987312317, "learning_rate": 7.396414017169047e-06, "loss": 0.3549, "step": 31448 }, { "epoch": 0.5838714109080918, "grad_norm": 0.3862515091896057, "learning_rate": 7.395287774165285e-06, "loss": 0.3569, "step": 31450 }, { "epoch": 0.5839085410455104, "grad_norm": 0.3240642249584198, "learning_rate": 7.394161566603855e-06, "loss": 0.3013, "step": 31452 }, { "epoch": 0.5839456711829291, "grad_norm": 0.3982669711112976, "learning_rate": 7.393035394500084e-06, "loss": 0.1658, "step": 31454 }, { "epoch": 0.5839828013203476, "grad_norm": 0.3717283308506012, "learning_rate": 7.39190925786929e-06, "loss": 0.2017, "step": 31456 }, { "epoch": 0.5840199314577663, "grad_norm": 0.34135472774505615, "learning_rate": 7.390783156726797e-06, "loss": 0.1955, "step": 31458 }, { "epoch": 0.584057061595185, "grad_norm": 0.5701931715011597, "learning_rate": 7.389657091087931e-06, "loss": 0.2218, "step": 31460 }, { "epoch": 0.5840941917326036, "grad_norm": 0.3414704501628876, "learning_rate": 7.388531060968011e-06, "loss": 0.3175, "step": 31462 }, { "epoch": 0.5841313218700223, "grad_norm": 0.5569100975990295, "learning_rate": 7.387405066382366e-06, "loss": 0.2615, "step": 31464 }, { "epoch": 0.5841684520074408, "grad_norm": 0.43285226821899414, "learning_rate": 7.38627910734631e-06, "loss": 0.4792, "step": 31466 }, { "epoch": 0.5842055821448595, "grad_norm": 0.3917894959449768, "learning_rate": 7.385153183875168e-06, "loss": 0.1886, "step": 31468 }, { "epoch": 0.5842427122822782, "grad_norm": 0.7385019063949585, "learning_rate": 7.384027295984255e-06, "loss": 0.2173, "step": 31470 }, { "epoch": 0.5842798424196968, "grad_norm": 0.43126580119132996, "learning_rate": 7.3829014436888965e-06, "loss": 0.4084, "step": 31472 }, { "epoch": 0.5843169725571155, "grad_norm": 0.522849440574646, "learning_rate": 7.38177562700441e-06, "loss": 0.5275, "step": 31474 }, { "epoch": 0.584354102694534, "grad_norm": 0.38137561082839966, "learning_rate": 7.380649845946115e-06, "loss": 0.643, "step": 31476 }, { "epoch": 0.5843912328319527, "grad_norm": 0.4658193290233612, "learning_rate": 7.379524100529334e-06, "loss": 0.1522, "step": 31478 }, { "epoch": 0.5844283629693714, "grad_norm": 0.4742436110973358, "learning_rate": 7.378398390769374e-06, "loss": 0.491, "step": 31480 }, { "epoch": 0.58446549310679, "grad_norm": 0.4431532621383667, "learning_rate": 7.377272716681563e-06, "loss": 0.1469, "step": 31482 }, { "epoch": 0.5845026232442087, "grad_norm": 0.3973146975040436, "learning_rate": 7.37614707828121e-06, "loss": 0.3057, "step": 31484 }, { "epoch": 0.5845397533816272, "grad_norm": 0.45231929421424866, "learning_rate": 7.375021475583641e-06, "loss": 0.3976, "step": 31486 }, { "epoch": 0.5845768835190459, "grad_norm": 0.5320724248886108, "learning_rate": 7.373895908604165e-06, "loss": 0.3556, "step": 31488 }, { "epoch": 0.5846140136564646, "grad_norm": 0.3774247467517853, "learning_rate": 7.372770377358098e-06, "loss": 0.2147, "step": 31490 }, { "epoch": 0.5846511437938832, "grad_norm": 0.3362526595592499, "learning_rate": 7.371644881860762e-06, "loss": 0.1999, "step": 31492 }, { "epoch": 0.5846882739313018, "grad_norm": 0.32823148369789124, "learning_rate": 7.370519422127463e-06, "loss": 0.2647, "step": 31494 }, { "epoch": 0.5847254040687204, "grad_norm": 0.525265634059906, "learning_rate": 7.369393998173516e-06, "loss": 0.0913, "step": 31496 }, { "epoch": 0.5847625342061391, "grad_norm": 0.288187712430954, "learning_rate": 7.368268610014241e-06, "loss": 0.3261, "step": 31498 }, { "epoch": 0.5847996643435578, "grad_norm": 0.4077187478542328, "learning_rate": 7.3671432576649436e-06, "loss": 0.2703, "step": 31500 }, { "epoch": 0.5848367944809764, "grad_norm": 0.25891801714897156, "learning_rate": 7.366017941140941e-06, "loss": 0.339, "step": 31502 }, { "epoch": 0.584873924618395, "grad_norm": 0.2599584758281708, "learning_rate": 7.36489266045755e-06, "loss": 0.1865, "step": 31504 }, { "epoch": 0.5849110547558136, "grad_norm": 0.3618934750556946, "learning_rate": 7.363767415630072e-06, "loss": 0.3521, "step": 31506 }, { "epoch": 0.5849481848932323, "grad_norm": 0.30013081431388855, "learning_rate": 7.3626422066738245e-06, "loss": 0.1472, "step": 31508 }, { "epoch": 0.5849853150306509, "grad_norm": 0.34922075271606445, "learning_rate": 7.361517033604114e-06, "loss": 0.2098, "step": 31510 }, { "epoch": 0.5850224451680696, "grad_norm": 0.2749541997909546, "learning_rate": 7.360391896436254e-06, "loss": 0.3397, "step": 31512 }, { "epoch": 0.5850595753054882, "grad_norm": 0.5792484283447266, "learning_rate": 7.359266795185554e-06, "loss": 0.3064, "step": 31514 }, { "epoch": 0.5850967054429068, "grad_norm": 0.2625385820865631, "learning_rate": 7.358141729867328e-06, "loss": 0.1844, "step": 31516 }, { "epoch": 0.5851338355803255, "grad_norm": 0.18893569707870483, "learning_rate": 7.3570167004968755e-06, "loss": 0.1497, "step": 31518 }, { "epoch": 0.5851709657177441, "grad_norm": 0.5120677947998047, "learning_rate": 7.35589170708951e-06, "loss": 0.4884, "step": 31520 }, { "epoch": 0.5852080958551628, "grad_norm": 0.34654709696769714, "learning_rate": 7.3547667496605365e-06, "loss": 0.338, "step": 31522 }, { "epoch": 0.5852452259925814, "grad_norm": 0.3771117925643921, "learning_rate": 7.353641828225264e-06, "loss": 0.2927, "step": 31524 }, { "epoch": 0.58528235613, "grad_norm": 0.3346656560897827, "learning_rate": 7.352516942799e-06, "loss": 0.1749, "step": 31526 }, { "epoch": 0.5853194862674187, "grad_norm": 0.36570876836776733, "learning_rate": 7.351392093397055e-06, "loss": 0.1835, "step": 31528 }, { "epoch": 0.5853566164048373, "grad_norm": 0.6522613763809204, "learning_rate": 7.350267280034726e-06, "loss": 0.225, "step": 31530 }, { "epoch": 0.585393746542256, "grad_norm": 0.3438888192176819, "learning_rate": 7.3491425027273225e-06, "loss": 0.3963, "step": 31532 }, { "epoch": 0.5854308766796746, "grad_norm": 0.3448486030101776, "learning_rate": 7.348017761490149e-06, "loss": 0.2492, "step": 31534 }, { "epoch": 0.5854680068170932, "grad_norm": 0.5372067093849182, "learning_rate": 7.346893056338508e-06, "loss": 0.3701, "step": 31536 }, { "epoch": 0.5855051369545119, "grad_norm": 0.39993029832839966, "learning_rate": 7.345768387287711e-06, "loss": 0.2831, "step": 31538 }, { "epoch": 0.5855422670919305, "grad_norm": 0.4859073758125305, "learning_rate": 7.344643754353051e-06, "loss": 0.1724, "step": 31540 }, { "epoch": 0.5855793972293492, "grad_norm": 0.32795417308807373, "learning_rate": 7.343519157549842e-06, "loss": 0.3751, "step": 31542 }, { "epoch": 0.5856165273667678, "grad_norm": 0.32972195744514465, "learning_rate": 7.3423945968933755e-06, "loss": 0.2058, "step": 31544 }, { "epoch": 0.5856536575041864, "grad_norm": 0.2241780012845993, "learning_rate": 7.341270072398957e-06, "loss": 0.17, "step": 31546 }, { "epoch": 0.5856907876416051, "grad_norm": 0.6067708730697632, "learning_rate": 7.34014558408189e-06, "loss": 0.4838, "step": 31548 }, { "epoch": 0.5857279177790237, "grad_norm": 0.5196802020072937, "learning_rate": 7.339021131957476e-06, "loss": 0.2343, "step": 31550 }, { "epoch": 0.5857650479164423, "grad_norm": 0.2722427546977997, "learning_rate": 7.337896716041012e-06, "loss": 0.2526, "step": 31552 }, { "epoch": 0.5858021780538609, "grad_norm": 0.4690447449684143, "learning_rate": 7.336772336347804e-06, "loss": 0.2416, "step": 31554 }, { "epoch": 0.5858393081912796, "grad_norm": 0.6072005033493042, "learning_rate": 7.335647992893142e-06, "loss": 0.2913, "step": 31556 }, { "epoch": 0.5858764383286983, "grad_norm": 0.341249018907547, "learning_rate": 7.334523685692332e-06, "loss": 0.1736, "step": 31558 }, { "epoch": 0.5859135684661169, "grad_norm": 0.3197457194328308, "learning_rate": 7.333399414760669e-06, "loss": 0.2647, "step": 31560 }, { "epoch": 0.5859506986035355, "grad_norm": 0.3608039617538452, "learning_rate": 7.332275180113455e-06, "loss": 0.1225, "step": 31562 }, { "epoch": 0.5859878287409541, "grad_norm": 0.27338674664497375, "learning_rate": 7.331150981765984e-06, "loss": 0.3106, "step": 31564 }, { "epoch": 0.5860249588783728, "grad_norm": 0.4414764642715454, "learning_rate": 7.330026819733558e-06, "loss": 0.3487, "step": 31566 }, { "epoch": 0.5860620890157915, "grad_norm": 0.5151707530021667, "learning_rate": 7.328902694031465e-06, "loss": 0.3351, "step": 31568 }, { "epoch": 0.5860992191532101, "grad_norm": 0.413105845451355, "learning_rate": 7.327778604675004e-06, "loss": 0.248, "step": 31570 }, { "epoch": 0.5861363492906287, "grad_norm": 0.27820658683776855, "learning_rate": 7.326654551679475e-06, "loss": 0.3669, "step": 31572 }, { "epoch": 0.5861734794280473, "grad_norm": 0.4441945552825928, "learning_rate": 7.325530535060169e-06, "loss": 0.5604, "step": 31574 }, { "epoch": 0.586210609565466, "grad_norm": 0.39767172932624817, "learning_rate": 7.3244065548323805e-06, "loss": 0.2985, "step": 31576 }, { "epoch": 0.5862477397028847, "grad_norm": 0.6023899912834167, "learning_rate": 7.323282611011404e-06, "loss": 0.1445, "step": 31578 }, { "epoch": 0.5862848698403033, "grad_norm": 0.3366793692111969, "learning_rate": 7.322158703612538e-06, "loss": 0.2848, "step": 31580 }, { "epoch": 0.5863219999777219, "grad_norm": 0.3359677195549011, "learning_rate": 7.3210348326510685e-06, "loss": 0.2832, "step": 31582 }, { "epoch": 0.5863591301151405, "grad_norm": 0.4678323268890381, "learning_rate": 7.31991099814229e-06, "loss": 0.4158, "step": 31584 }, { "epoch": 0.5863962602525592, "grad_norm": 0.2143193930387497, "learning_rate": 7.318787200101495e-06, "loss": 0.3868, "step": 31586 }, { "epoch": 0.5864333903899779, "grad_norm": 0.39950650930404663, "learning_rate": 7.317663438543974e-06, "loss": 0.2197, "step": 31588 }, { "epoch": 0.5864705205273965, "grad_norm": 0.3779835104942322, "learning_rate": 7.316539713485018e-06, "loss": 0.1988, "step": 31590 }, { "epoch": 0.5865076506648151, "grad_norm": 0.6571510434150696, "learning_rate": 7.315416024939925e-06, "loss": 0.3316, "step": 31592 }, { "epoch": 0.5865447808022337, "grad_norm": 0.37568551301956177, "learning_rate": 7.314292372923974e-06, "loss": 0.3847, "step": 31594 }, { "epoch": 0.5865819109396524, "grad_norm": 0.32305559515953064, "learning_rate": 7.31316875745246e-06, "loss": 0.3251, "step": 31596 }, { "epoch": 0.5866190410770711, "grad_norm": 0.275055468082428, "learning_rate": 7.31204517854067e-06, "loss": 0.2718, "step": 31598 }, { "epoch": 0.5866561712144897, "grad_norm": 0.5199087262153625, "learning_rate": 7.310921636203893e-06, "loss": 0.1347, "step": 31600 }, { "epoch": 0.5866933013519083, "grad_norm": 0.4450320601463318, "learning_rate": 7.30979813045742e-06, "loss": 0.3419, "step": 31602 }, { "epoch": 0.5867304314893269, "grad_norm": 0.3427060544490814, "learning_rate": 7.308674661316537e-06, "loss": 0.3143, "step": 31604 }, { "epoch": 0.5867675616267456, "grad_norm": 0.3739967942237854, "learning_rate": 7.30755122879653e-06, "loss": 0.3336, "step": 31606 }, { "epoch": 0.5868046917641642, "grad_norm": 0.5261752009391785, "learning_rate": 7.306427832912685e-06, "loss": 0.2162, "step": 31608 }, { "epoch": 0.5868418219015828, "grad_norm": 0.4019750952720642, "learning_rate": 7.305304473680288e-06, "loss": 0.3365, "step": 31610 }, { "epoch": 0.5868789520390015, "grad_norm": 0.4957427382469177, "learning_rate": 7.304181151114626e-06, "loss": 0.4845, "step": 31612 }, { "epoch": 0.5869160821764201, "grad_norm": 0.25053849816322327, "learning_rate": 7.3030578652309846e-06, "loss": 0.2467, "step": 31614 }, { "epoch": 0.5869532123138388, "grad_norm": 0.32958361506462097, "learning_rate": 7.301934616044646e-06, "loss": 0.4048, "step": 31616 }, { "epoch": 0.5869903424512574, "grad_norm": 0.5286376476287842, "learning_rate": 7.3008114035709e-06, "loss": 0.3093, "step": 31618 }, { "epoch": 0.587027472588676, "grad_norm": 0.3356854319572449, "learning_rate": 7.2996882278250215e-06, "loss": 0.2803, "step": 31620 }, { "epoch": 0.5870646027260947, "grad_norm": 0.3413086235523224, "learning_rate": 7.298565088822298e-06, "loss": 0.206, "step": 31622 }, { "epoch": 0.5871017328635133, "grad_norm": 0.4413459300994873, "learning_rate": 7.297441986578013e-06, "loss": 0.3098, "step": 31624 }, { "epoch": 0.587138863000932, "grad_norm": 0.2945999801158905, "learning_rate": 7.296318921107448e-06, "loss": 0.4644, "step": 31626 }, { "epoch": 0.5871759931383506, "grad_norm": 0.42358773946762085, "learning_rate": 7.295195892425881e-06, "loss": 0.413, "step": 31628 }, { "epoch": 0.5872131232757692, "grad_norm": 0.42129024863243103, "learning_rate": 7.294072900548602e-06, "loss": 0.3324, "step": 31630 }, { "epoch": 0.5872502534131879, "grad_norm": 0.298801988363266, "learning_rate": 7.292949945490881e-06, "loss": 0.3104, "step": 31632 }, { "epoch": 0.5872873835506065, "grad_norm": 0.5141480565071106, "learning_rate": 7.291827027268002e-06, "loss": 0.2152, "step": 31634 }, { "epoch": 0.5873245136880252, "grad_norm": 0.2527773380279541, "learning_rate": 7.290704145895249e-06, "loss": 0.3855, "step": 31636 }, { "epoch": 0.5873616438254438, "grad_norm": 0.7944961786270142, "learning_rate": 7.289581301387894e-06, "loss": 0.4471, "step": 31638 }, { "epoch": 0.5873987739628624, "grad_norm": 0.4543292820453644, "learning_rate": 7.288458493761219e-06, "loss": 0.462, "step": 31640 }, { "epoch": 0.5874359041002811, "grad_norm": 0.3127201795578003, "learning_rate": 7.2873357230305065e-06, "loss": 0.2639, "step": 31642 }, { "epoch": 0.5874730342376997, "grad_norm": 0.3707166314125061, "learning_rate": 7.286212989211024e-06, "loss": 0.169, "step": 31644 }, { "epoch": 0.5875101643751184, "grad_norm": 0.27145740389823914, "learning_rate": 7.285090292318057e-06, "loss": 0.2026, "step": 31646 }, { "epoch": 0.587547294512537, "grad_norm": 0.2943972647190094, "learning_rate": 7.2839676323668775e-06, "loss": 0.1889, "step": 31648 }, { "epoch": 0.5875844246499556, "grad_norm": 0.4220932126045227, "learning_rate": 7.282845009372762e-06, "loss": 0.3244, "step": 31650 }, { "epoch": 0.5876215547873743, "grad_norm": 0.32255157828330994, "learning_rate": 7.281722423350987e-06, "loss": 0.2239, "step": 31652 }, { "epoch": 0.5876586849247929, "grad_norm": 0.3327600359916687, "learning_rate": 7.280599874316833e-06, "loss": 0.4165, "step": 31654 }, { "epoch": 0.5876958150622116, "grad_norm": 0.21703441441059113, "learning_rate": 7.279477362285564e-06, "loss": 0.1965, "step": 31656 }, { "epoch": 0.5877329451996302, "grad_norm": 0.3859167695045471, "learning_rate": 7.278354887272462e-06, "loss": 0.3525, "step": 31658 }, { "epoch": 0.5877700753370488, "grad_norm": 0.2846670150756836, "learning_rate": 7.2772324492927946e-06, "loss": 0.2374, "step": 31660 }, { "epoch": 0.5878072054744674, "grad_norm": 0.698136031627655, "learning_rate": 7.2761100483618395e-06, "loss": 0.4735, "step": 31662 }, { "epoch": 0.5878443356118861, "grad_norm": 0.31667712330818176, "learning_rate": 7.274987684494867e-06, "loss": 0.1797, "step": 31664 }, { "epoch": 0.5878814657493048, "grad_norm": 0.28017285466194153, "learning_rate": 7.273865357707151e-06, "loss": 0.2274, "step": 31666 }, { "epoch": 0.5879185958867233, "grad_norm": 0.40834394097328186, "learning_rate": 7.272743068013962e-06, "loss": 0.1312, "step": 31668 }, { "epoch": 0.587955726024142, "grad_norm": 0.674379825592041, "learning_rate": 7.271620815430571e-06, "loss": 0.225, "step": 31670 }, { "epoch": 0.5879928561615606, "grad_norm": 0.28663137555122375, "learning_rate": 7.270498599972246e-06, "loss": 0.1194, "step": 31672 }, { "epoch": 0.5880299862989793, "grad_norm": 0.308267742395401, "learning_rate": 7.269376421654259e-06, "loss": 0.1885, "step": 31674 }, { "epoch": 0.588067116436398, "grad_norm": 0.2910693883895874, "learning_rate": 7.2682542804918836e-06, "loss": 0.2459, "step": 31676 }, { "epoch": 0.5881042465738165, "grad_norm": 0.29778817296028137, "learning_rate": 7.267132176500381e-06, "loss": 0.3687, "step": 31678 }, { "epoch": 0.5881413767112352, "grad_norm": 0.4061230719089508, "learning_rate": 7.266010109695028e-06, "loss": 0.4514, "step": 31680 }, { "epoch": 0.5881785068486538, "grad_norm": 0.6901503801345825, "learning_rate": 7.2648880800910845e-06, "loss": 0.3275, "step": 31682 }, { "epoch": 0.5882156369860725, "grad_norm": 0.33507126569747925, "learning_rate": 7.263766087703822e-06, "loss": 0.3252, "step": 31684 }, { "epoch": 0.5882527671234912, "grad_norm": 0.41105884313583374, "learning_rate": 7.2626441325485065e-06, "loss": 0.2266, "step": 31686 }, { "epoch": 0.5882898972609097, "grad_norm": 0.3871004581451416, "learning_rate": 7.261522214640406e-06, "loss": 0.0916, "step": 31688 }, { "epoch": 0.5883270273983284, "grad_norm": 0.4171714782714844, "learning_rate": 7.260400333994786e-06, "loss": 0.4098, "step": 31690 }, { "epoch": 0.588364157535747, "grad_norm": 0.4369262158870697, "learning_rate": 7.2592784906269134e-06, "loss": 0.4129, "step": 31692 }, { "epoch": 0.5884012876731657, "grad_norm": 0.47549691796302795, "learning_rate": 7.258156684552048e-06, "loss": 0.1689, "step": 31694 }, { "epoch": 0.5884384178105844, "grad_norm": 0.28676649928092957, "learning_rate": 7.257034915785457e-06, "loss": 0.1609, "step": 31696 }, { "epoch": 0.5884755479480029, "grad_norm": 0.4710913896560669, "learning_rate": 7.255913184342404e-06, "loss": 0.2452, "step": 31698 }, { "epoch": 0.5885126780854216, "grad_norm": 0.5212896466255188, "learning_rate": 7.254791490238156e-06, "loss": 0.5185, "step": 31700 }, { "epoch": 0.5885498082228402, "grad_norm": 0.7548796534538269, "learning_rate": 7.2536698334879704e-06, "loss": 0.3305, "step": 31702 }, { "epoch": 0.5885869383602589, "grad_norm": 0.2140730321407318, "learning_rate": 7.2525482141071135e-06, "loss": 0.1599, "step": 31704 }, { "epoch": 0.5886240684976775, "grad_norm": 0.4729933738708496, "learning_rate": 7.2514266321108484e-06, "loss": 0.3249, "step": 31706 }, { "epoch": 0.5886611986350961, "grad_norm": 0.47176676988601685, "learning_rate": 7.250305087514431e-06, "loss": 0.2916, "step": 31708 }, { "epoch": 0.5886983287725148, "grad_norm": 0.3367743492126465, "learning_rate": 7.2491835803331255e-06, "loss": 0.3558, "step": 31710 }, { "epoch": 0.5887354589099334, "grad_norm": 0.340731680393219, "learning_rate": 7.24806211058219e-06, "loss": 0.2012, "step": 31712 }, { "epoch": 0.5887725890473521, "grad_norm": 0.5731931924819946, "learning_rate": 7.246940678276888e-06, "loss": 0.2506, "step": 31714 }, { "epoch": 0.5888097191847707, "grad_norm": 0.3500525653362274, "learning_rate": 7.245819283432476e-06, "loss": 0.4298, "step": 31716 }, { "epoch": 0.5888468493221893, "grad_norm": 0.3710630238056183, "learning_rate": 7.244697926064217e-06, "loss": 0.3968, "step": 31718 }, { "epoch": 0.588883979459608, "grad_norm": 0.4012805223464966, "learning_rate": 7.243576606187363e-06, "loss": 0.2583, "step": 31720 }, { "epoch": 0.5889211095970266, "grad_norm": 0.8543648719787598, "learning_rate": 7.242455323817177e-06, "loss": 0.3058, "step": 31722 }, { "epoch": 0.5889582397344453, "grad_norm": 0.22527900338172913, "learning_rate": 7.2413340789689114e-06, "loss": 0.0547, "step": 31724 }, { "epoch": 0.5889953698718638, "grad_norm": 0.5282771587371826, "learning_rate": 7.2402128716578265e-06, "loss": 0.4259, "step": 31726 }, { "epoch": 0.5890325000092825, "grad_norm": 0.2811391353607178, "learning_rate": 7.239091701899179e-06, "loss": 0.4504, "step": 31728 }, { "epoch": 0.5890696301467012, "grad_norm": 0.3888522684574127, "learning_rate": 7.237970569708226e-06, "loss": 0.3129, "step": 31730 }, { "epoch": 0.5891067602841198, "grad_norm": 0.2887738049030304, "learning_rate": 7.236849475100218e-06, "loss": 0.0957, "step": 31732 }, { "epoch": 0.5891438904215385, "grad_norm": 0.4300808608531952, "learning_rate": 7.235728418090411e-06, "loss": 0.3205, "step": 31734 }, { "epoch": 0.589181020558957, "grad_norm": 0.3292039632797241, "learning_rate": 7.23460739869406e-06, "loss": 0.4345, "step": 31736 }, { "epoch": 0.5892181506963757, "grad_norm": 0.42816850543022156, "learning_rate": 7.2334864169264196e-06, "loss": 0.4088, "step": 31738 }, { "epoch": 0.5892552808337944, "grad_norm": 0.5426253080368042, "learning_rate": 7.232365472802744e-06, "loss": 0.3536, "step": 31740 }, { "epoch": 0.589292410971213, "grad_norm": 0.24948692321777344, "learning_rate": 7.2312445663382825e-06, "loss": 0.3238, "step": 31742 }, { "epoch": 0.5893295411086317, "grad_norm": 0.4598807394504547, "learning_rate": 7.230123697548292e-06, "loss": 0.2677, "step": 31744 }, { "epoch": 0.5893666712460502, "grad_norm": 0.33556583523750305, "learning_rate": 7.229002866448018e-06, "loss": 0.2597, "step": 31746 }, { "epoch": 0.5894038013834689, "grad_norm": 0.4495238661766052, "learning_rate": 7.227882073052715e-06, "loss": 0.2128, "step": 31748 }, { "epoch": 0.5894409315208876, "grad_norm": 0.45971202850341797, "learning_rate": 7.226761317377633e-06, "loss": 0.4362, "step": 31750 }, { "epoch": 0.5894780616583062, "grad_norm": 0.5171971321105957, "learning_rate": 7.225640599438026e-06, "loss": 0.1982, "step": 31752 }, { "epoch": 0.5895151917957249, "grad_norm": 0.30257871747016907, "learning_rate": 7.224519919249137e-06, "loss": 0.2379, "step": 31754 }, { "epoch": 0.5895523219331434, "grad_norm": 0.3692000210285187, "learning_rate": 7.223399276826222e-06, "loss": 0.1298, "step": 31756 }, { "epoch": 0.5895894520705621, "grad_norm": 0.5173700451850891, "learning_rate": 7.222278672184523e-06, "loss": 0.226, "step": 31758 }, { "epoch": 0.5896265822079807, "grad_norm": 0.6413261294364929, "learning_rate": 7.22115810533929e-06, "loss": 0.2394, "step": 31760 }, { "epoch": 0.5896637123453994, "grad_norm": 0.3544710874557495, "learning_rate": 7.220037576305775e-06, "loss": 0.2489, "step": 31762 }, { "epoch": 0.5897008424828181, "grad_norm": 0.4805065095424652, "learning_rate": 7.2189170850992185e-06, "loss": 0.4889, "step": 31764 }, { "epoch": 0.5897379726202366, "grad_norm": 0.5835082530975342, "learning_rate": 7.2177966317348704e-06, "loss": 0.4198, "step": 31766 }, { "epoch": 0.5897751027576553, "grad_norm": 0.3729493021965027, "learning_rate": 7.2166762162279814e-06, "loss": 0.2666, "step": 31768 }, { "epoch": 0.5898122328950739, "grad_norm": 0.1929493099451065, "learning_rate": 7.215555838593786e-06, "loss": 0.1567, "step": 31770 }, { "epoch": 0.5898493630324926, "grad_norm": 0.3131501376628876, "learning_rate": 7.2144354988475375e-06, "loss": 0.2633, "step": 31772 }, { "epoch": 0.5898864931699113, "grad_norm": 0.4871968924999237, "learning_rate": 7.213315197004479e-06, "loss": 0.3372, "step": 31774 }, { "epoch": 0.5899236233073298, "grad_norm": 0.3772599399089813, "learning_rate": 7.212194933079853e-06, "loss": 0.4734, "step": 31776 }, { "epoch": 0.5899607534447485, "grad_norm": 0.26847440004348755, "learning_rate": 7.2110747070889005e-06, "loss": 0.1963, "step": 31778 }, { "epoch": 0.5899978835821671, "grad_norm": 0.30690836906433105, "learning_rate": 7.20995451904687e-06, "loss": 0.3431, "step": 31780 }, { "epoch": 0.5900350137195858, "grad_norm": 0.32768118381500244, "learning_rate": 7.2088343689690045e-06, "loss": 0.4049, "step": 31782 }, { "epoch": 0.5900721438570045, "grad_norm": 0.48004692792892456, "learning_rate": 7.207714256870542e-06, "loss": 0.3866, "step": 31784 }, { "epoch": 0.590109273994423, "grad_norm": 0.37556806206703186, "learning_rate": 7.20659418276672e-06, "loss": 0.4233, "step": 31786 }, { "epoch": 0.5901464041318417, "grad_norm": 0.3070814311504364, "learning_rate": 7.205474146672786e-06, "loss": 0.3252, "step": 31788 }, { "epoch": 0.5901835342692603, "grad_norm": 0.22532576322555542, "learning_rate": 7.204354148603977e-06, "loss": 0.2487, "step": 31790 }, { "epoch": 0.590220664406679, "grad_norm": 0.2723808288574219, "learning_rate": 7.203234188575537e-06, "loss": 0.3176, "step": 31792 }, { "epoch": 0.5902577945440977, "grad_norm": 0.42170941829681396, "learning_rate": 7.202114266602702e-06, "loss": 0.2805, "step": 31794 }, { "epoch": 0.5902949246815162, "grad_norm": 0.3466981053352356, "learning_rate": 7.2009943827007126e-06, "loss": 0.2003, "step": 31796 }, { "epoch": 0.5903320548189349, "grad_norm": 0.48853299021720886, "learning_rate": 7.199874536884801e-06, "loss": 0.1956, "step": 31798 }, { "epoch": 0.5903691849563535, "grad_norm": 0.35713034868240356, "learning_rate": 7.198754729170212e-06, "loss": 0.1366, "step": 31800 }, { "epoch": 0.5904063150937722, "grad_norm": 0.525822103023529, "learning_rate": 7.19763495957218e-06, "loss": 0.3041, "step": 31802 }, { "epoch": 0.5904434452311909, "grad_norm": 0.2929930090904236, "learning_rate": 7.196515228105943e-06, "loss": 0.211, "step": 31804 }, { "epoch": 0.5904805753686094, "grad_norm": 0.4529709815979004, "learning_rate": 7.195395534786737e-06, "loss": 0.339, "step": 31806 }, { "epoch": 0.5905177055060281, "grad_norm": 0.30272164940834045, "learning_rate": 7.194275879629797e-06, "loss": 0.1839, "step": 31808 }, { "epoch": 0.5905548356434467, "grad_norm": 0.4891115427017212, "learning_rate": 7.193156262650356e-06, "loss": 0.3068, "step": 31810 }, { "epoch": 0.5905919657808654, "grad_norm": 0.30060839653015137, "learning_rate": 7.19203668386365e-06, "loss": 0.2607, "step": 31812 }, { "epoch": 0.5906290959182839, "grad_norm": 0.44671860337257385, "learning_rate": 7.1909171432849166e-06, "loss": 0.2802, "step": 31814 }, { "epoch": 0.5906662260557026, "grad_norm": 0.6356915235519409, "learning_rate": 7.189797640929386e-06, "loss": 0.3191, "step": 31816 }, { "epoch": 0.5907033561931213, "grad_norm": 0.3474804162979126, "learning_rate": 7.188678176812295e-06, "loss": 0.3198, "step": 31818 }, { "epoch": 0.5907404863305399, "grad_norm": 0.2997127175331116, "learning_rate": 7.187558750948868e-06, "loss": 0.4235, "step": 31820 }, { "epoch": 0.5907776164679586, "grad_norm": 0.4708705246448517, "learning_rate": 7.186439363354344e-06, "loss": 0.3383, "step": 31822 }, { "epoch": 0.5908147466053771, "grad_norm": 0.5596514344215393, "learning_rate": 7.185320014043951e-06, "loss": 0.348, "step": 31824 }, { "epoch": 0.5908518767427958, "grad_norm": 0.2771008610725403, "learning_rate": 7.184200703032925e-06, "loss": 0.2181, "step": 31826 }, { "epoch": 0.5908890068802145, "grad_norm": 0.19368411600589752, "learning_rate": 7.183081430336491e-06, "loss": 0.2349, "step": 31828 }, { "epoch": 0.5909261370176331, "grad_norm": 0.3257860541343689, "learning_rate": 7.18196219596988e-06, "loss": 0.067, "step": 31830 }, { "epoch": 0.5909632671550518, "grad_norm": 0.4943435788154602, "learning_rate": 7.180842999948328e-06, "loss": 0.6303, "step": 31832 }, { "epoch": 0.5910003972924703, "grad_norm": 0.33831918239593506, "learning_rate": 7.179723842287054e-06, "loss": 0.289, "step": 31834 }, { "epoch": 0.591037527429889, "grad_norm": 0.24263888597488403, "learning_rate": 7.17860472300129e-06, "loss": 0.1256, "step": 31836 }, { "epoch": 0.5910746575673077, "grad_norm": 0.4024035632610321, "learning_rate": 7.177485642106268e-06, "loss": 0.2259, "step": 31838 }, { "epoch": 0.5911117877047263, "grad_norm": 0.35957103967666626, "learning_rate": 7.1763665996172084e-06, "loss": 0.1774, "step": 31840 }, { "epoch": 0.591148917842145, "grad_norm": 0.3907424807548523, "learning_rate": 7.175247595549342e-06, "loss": 0.272, "step": 31842 }, { "epoch": 0.5911860479795635, "grad_norm": 0.2327420562505722, "learning_rate": 7.1741286299179e-06, "loss": 0.3407, "step": 31844 }, { "epoch": 0.5912231781169822, "grad_norm": 0.2771170139312744, "learning_rate": 7.173009702738097e-06, "loss": 0.2861, "step": 31846 }, { "epoch": 0.5912603082544009, "grad_norm": 0.6723152995109558, "learning_rate": 7.171890814025168e-06, "loss": 0.2464, "step": 31848 }, { "epoch": 0.5912974383918195, "grad_norm": 0.34809747338294983, "learning_rate": 7.17077196379433e-06, "loss": 0.3478, "step": 31850 }, { "epoch": 0.5913345685292382, "grad_norm": 0.352260023355484, "learning_rate": 7.169653152060812e-06, "loss": 0.1227, "step": 31852 }, { "epoch": 0.5913716986666567, "grad_norm": 0.3397088944911957, "learning_rate": 7.1685343788398374e-06, "loss": 0.2267, "step": 31854 }, { "epoch": 0.5914088288040754, "grad_norm": 0.3706021308898926, "learning_rate": 7.167415644146631e-06, "loss": 0.2992, "step": 31856 }, { "epoch": 0.591445958941494, "grad_norm": 0.2912646234035492, "learning_rate": 7.166296947996411e-06, "loss": 0.2023, "step": 31858 }, { "epoch": 0.5914830890789127, "grad_norm": 0.3106619417667389, "learning_rate": 7.165178290404402e-06, "loss": 0.3426, "step": 31860 }, { "epoch": 0.5915202192163314, "grad_norm": 0.24913892149925232, "learning_rate": 7.164059671385825e-06, "loss": 0.2955, "step": 31862 }, { "epoch": 0.5915573493537499, "grad_norm": 0.34791985154151917, "learning_rate": 7.1629410909559e-06, "loss": 0.3267, "step": 31864 }, { "epoch": 0.5915944794911686, "grad_norm": 0.22536355257034302, "learning_rate": 7.161822549129849e-06, "loss": 0.2333, "step": 31866 }, { "epoch": 0.5916316096285872, "grad_norm": 0.33451002836227417, "learning_rate": 7.1607040459228924e-06, "loss": 0.4063, "step": 31868 }, { "epoch": 0.5916687397660059, "grad_norm": 0.2724950611591339, "learning_rate": 7.159585581350253e-06, "loss": 0.3091, "step": 31870 }, { "epoch": 0.5917058699034246, "grad_norm": 0.31680744886398315, "learning_rate": 7.158467155427142e-06, "loss": 0.3817, "step": 31872 }, { "epoch": 0.5917430000408431, "grad_norm": 0.32738280296325684, "learning_rate": 7.1573487681687806e-06, "loss": 0.454, "step": 31874 }, { "epoch": 0.5917801301782618, "grad_norm": 0.3219420611858368, "learning_rate": 7.156230419590388e-06, "loss": 0.1233, "step": 31876 }, { "epoch": 0.5918172603156804, "grad_norm": 0.4125814735889435, "learning_rate": 7.155112109707183e-06, "loss": 0.2718, "step": 31878 }, { "epoch": 0.5918543904530991, "grad_norm": 0.5096237659454346, "learning_rate": 7.153993838534379e-06, "loss": 0.3371, "step": 31880 }, { "epoch": 0.5918915205905178, "grad_norm": 0.5044049024581909, "learning_rate": 7.152875606087198e-06, "loss": 0.3438, "step": 31882 }, { "epoch": 0.5919286507279363, "grad_norm": 0.25340163707733154, "learning_rate": 7.151757412380849e-06, "loss": 0.1829, "step": 31884 }, { "epoch": 0.591965780865355, "grad_norm": 0.4396693706512451, "learning_rate": 7.150639257430549e-06, "loss": 0.1754, "step": 31886 }, { "epoch": 0.5920029110027736, "grad_norm": 0.3053189516067505, "learning_rate": 7.149521141251514e-06, "loss": 0.4049, "step": 31888 }, { "epoch": 0.5920400411401923, "grad_norm": 0.4439174234867096, "learning_rate": 7.14840306385896e-06, "loss": 0.2853, "step": 31890 }, { "epoch": 0.592077171277611, "grad_norm": 0.5069766640663147, "learning_rate": 7.147285025268096e-06, "loss": 0.4567, "step": 31892 }, { "epoch": 0.5921143014150295, "grad_norm": 0.357025146484375, "learning_rate": 7.146167025494144e-06, "loss": 0.2694, "step": 31894 }, { "epoch": 0.5921514315524482, "grad_norm": 0.6058133244514465, "learning_rate": 7.145049064552306e-06, "loss": 0.3387, "step": 31896 }, { "epoch": 0.5921885616898668, "grad_norm": 0.5639526844024658, "learning_rate": 7.143931142457796e-06, "loss": 0.0967, "step": 31898 }, { "epoch": 0.5922256918272855, "grad_norm": 0.21593736112117767, "learning_rate": 7.142813259225834e-06, "loss": 0.1455, "step": 31900 }, { "epoch": 0.5922628219647041, "grad_norm": 0.6025198101997375, "learning_rate": 7.141695414871621e-06, "loss": 0.362, "step": 31902 }, { "epoch": 0.5922999521021227, "grad_norm": 0.4275647699832916, "learning_rate": 7.140577609410373e-06, "loss": 0.2282, "step": 31904 }, { "epoch": 0.5923370822395414, "grad_norm": 0.47565558552742004, "learning_rate": 7.139459842857297e-06, "loss": 0.2381, "step": 31906 }, { "epoch": 0.59237421237696, "grad_norm": 0.48718932271003723, "learning_rate": 7.13834211522761e-06, "loss": 0.341, "step": 31908 }, { "epoch": 0.5924113425143787, "grad_norm": 0.4600697159767151, "learning_rate": 7.137224426536511e-06, "loss": 0.3368, "step": 31910 }, { "epoch": 0.5924484726517972, "grad_norm": 0.3298058807849884, "learning_rate": 7.136106776799214e-06, "loss": 0.1267, "step": 31912 }, { "epoch": 0.5924856027892159, "grad_norm": 0.2750627398490906, "learning_rate": 7.134989166030924e-06, "loss": 0.0969, "step": 31914 }, { "epoch": 0.5925227329266346, "grad_norm": 0.46612748503685, "learning_rate": 7.133871594246849e-06, "loss": 0.3137, "step": 31916 }, { "epoch": 0.5925598630640532, "grad_norm": 0.36794906854629517, "learning_rate": 7.132754061462196e-06, "loss": 0.378, "step": 31918 }, { "epoch": 0.5925969932014719, "grad_norm": 0.33430978655815125, "learning_rate": 7.131636567692177e-06, "loss": 0.253, "step": 31920 }, { "epoch": 0.5926341233388904, "grad_norm": 0.3982916474342346, "learning_rate": 7.13051911295199e-06, "loss": 0.2414, "step": 31922 }, { "epoch": 0.5926712534763091, "grad_norm": 0.658501923084259, "learning_rate": 7.129401697256841e-06, "loss": 0.188, "step": 31924 }, { "epoch": 0.5927083836137278, "grad_norm": 0.3841792345046997, "learning_rate": 7.128284320621936e-06, "loss": 0.3973, "step": 31926 }, { "epoch": 0.5927455137511464, "grad_norm": 0.3503793478012085, "learning_rate": 7.12716698306248e-06, "loss": 0.3195, "step": 31928 }, { "epoch": 0.592782643888565, "grad_norm": 0.4302903115749359, "learning_rate": 7.126049684593679e-06, "loss": 0.1836, "step": 31930 }, { "epoch": 0.5928197740259836, "grad_norm": 0.3483423590660095, "learning_rate": 7.124932425230733e-06, "loss": 0.2631, "step": 31932 }, { "epoch": 0.5928569041634023, "grad_norm": 0.4045327603816986, "learning_rate": 7.123815204988844e-06, "loss": 0.3444, "step": 31934 }, { "epoch": 0.592894034300821, "grad_norm": 0.6940419673919678, "learning_rate": 7.122698023883214e-06, "loss": 0.2876, "step": 31936 }, { "epoch": 0.5929311644382396, "grad_norm": 1.8124761581420898, "learning_rate": 7.1215808819290445e-06, "loss": 0.4224, "step": 31938 }, { "epoch": 0.5929682945756583, "grad_norm": 0.3656772971153259, "learning_rate": 7.120463779141537e-06, "loss": 0.2066, "step": 31940 }, { "epoch": 0.5930054247130768, "grad_norm": 0.32985472679138184, "learning_rate": 7.119346715535896e-06, "loss": 0.2833, "step": 31942 }, { "epoch": 0.5930425548504955, "grad_norm": 0.3622593581676483, "learning_rate": 7.118229691127315e-06, "loss": 0.2956, "step": 31944 }, { "epoch": 0.5930796849879142, "grad_norm": 0.3249266445636749, "learning_rate": 7.1171127059309975e-06, "loss": 0.3316, "step": 31946 }, { "epoch": 0.5931168151253328, "grad_norm": 0.44524556398391724, "learning_rate": 7.1159957599621385e-06, "loss": 0.1417, "step": 31948 }, { "epoch": 0.5931539452627514, "grad_norm": 0.3817630410194397, "learning_rate": 7.114878853235939e-06, "loss": 0.2935, "step": 31950 }, { "epoch": 0.59319107540017, "grad_norm": 0.21469825506210327, "learning_rate": 7.113761985767599e-06, "loss": 0.2504, "step": 31952 }, { "epoch": 0.5932282055375887, "grad_norm": 0.41692715883255005, "learning_rate": 7.11264515757231e-06, "loss": 0.3634, "step": 31954 }, { "epoch": 0.5932653356750074, "grad_norm": 0.65981525182724, "learning_rate": 7.111528368665272e-06, "loss": 0.3637, "step": 31956 }, { "epoch": 0.593302465812426, "grad_norm": 1.1483608484268188, "learning_rate": 7.110411619061686e-06, "loss": 0.2631, "step": 31958 }, { "epoch": 0.5933395959498446, "grad_norm": 0.2627449333667755, "learning_rate": 7.109294908776737e-06, "loss": 0.2614, "step": 31960 }, { "epoch": 0.5933767260872632, "grad_norm": 0.35787415504455566, "learning_rate": 7.108178237825627e-06, "loss": 0.3767, "step": 31962 }, { "epoch": 0.5934138562246819, "grad_norm": 0.6688876152038574, "learning_rate": 7.10706160622355e-06, "loss": 0.352, "step": 31964 }, { "epoch": 0.5934509863621005, "grad_norm": 0.47806447744369507, "learning_rate": 7.105945013985698e-06, "loss": 0.2748, "step": 31966 }, { "epoch": 0.5934881164995192, "grad_norm": 0.4578014016151428, "learning_rate": 7.104828461127264e-06, "loss": 0.498, "step": 31968 }, { "epoch": 0.5935252466369378, "grad_norm": 0.28616851568222046, "learning_rate": 7.103711947663448e-06, "loss": 0.1701, "step": 31970 }, { "epoch": 0.5935623767743564, "grad_norm": 0.4376542568206787, "learning_rate": 7.102595473609433e-06, "loss": 0.27, "step": 31972 }, { "epoch": 0.5935995069117751, "grad_norm": 0.47028684616088867, "learning_rate": 7.1014790389804145e-06, "loss": 0.3354, "step": 31974 }, { "epoch": 0.5936366370491937, "grad_norm": 0.22200638055801392, "learning_rate": 7.100362643791587e-06, "loss": 0.1626, "step": 31976 }, { "epoch": 0.5936737671866124, "grad_norm": 0.24875736236572266, "learning_rate": 7.099246288058136e-06, "loss": 0.2794, "step": 31978 }, { "epoch": 0.593710897324031, "grad_norm": 0.5217177271842957, "learning_rate": 7.098129971795253e-06, "loss": 0.4267, "step": 31980 }, { "epoch": 0.5937480274614496, "grad_norm": 0.3891518712043762, "learning_rate": 7.097013695018136e-06, "loss": 0.3193, "step": 31982 }, { "epoch": 0.5937851575988683, "grad_norm": 0.362983375787735, "learning_rate": 7.095897457741961e-06, "loss": 0.3013, "step": 31984 }, { "epoch": 0.5938222877362869, "grad_norm": 0.44616296887397766, "learning_rate": 7.094781259981925e-06, "loss": 0.2531, "step": 31986 }, { "epoch": 0.5938594178737056, "grad_norm": 0.36302801966667175, "learning_rate": 7.093665101753212e-06, "loss": 0.2163, "step": 31988 }, { "epoch": 0.5938965480111242, "grad_norm": 0.6121655702590942, "learning_rate": 7.092548983071012e-06, "loss": 0.3208, "step": 31990 }, { "epoch": 0.5939336781485428, "grad_norm": 0.4383487403392792, "learning_rate": 7.09143290395051e-06, "loss": 0.2518, "step": 31992 }, { "epoch": 0.5939708082859615, "grad_norm": 0.40191033482551575, "learning_rate": 7.0903168644068976e-06, "loss": 0.2433, "step": 31994 }, { "epoch": 0.5940079384233801, "grad_norm": 0.45267802476882935, "learning_rate": 7.089200864455357e-06, "loss": 0.2352, "step": 31996 }, { "epoch": 0.5940450685607988, "grad_norm": 0.3091062605381012, "learning_rate": 7.088084904111073e-06, "loss": 0.2829, "step": 31998 }, { "epoch": 0.5940821986982174, "grad_norm": 0.4867989122867584, "learning_rate": 7.086968983389229e-06, "loss": 0.2713, "step": 32000 }, { "epoch": 0.594119328835636, "grad_norm": 0.4080210030078888, "learning_rate": 7.085853102305014e-06, "loss": 0.2724, "step": 32002 }, { "epoch": 0.5941564589730547, "grad_norm": 0.39662978053092957, "learning_rate": 7.084737260873606e-06, "loss": 0.2898, "step": 32004 }, { "epoch": 0.5941935891104733, "grad_norm": 0.43277570605278015, "learning_rate": 7.083621459110196e-06, "loss": 0.2786, "step": 32006 }, { "epoch": 0.594230719247892, "grad_norm": 0.5412875413894653, "learning_rate": 7.082505697029964e-06, "loss": 0.243, "step": 32008 }, { "epoch": 0.5942678493853105, "grad_norm": 0.5167099237442017, "learning_rate": 7.081389974648086e-06, "loss": 0.1281, "step": 32010 }, { "epoch": 0.5943049795227292, "grad_norm": 0.44475677609443665, "learning_rate": 7.080274291979748e-06, "loss": 0.2883, "step": 32012 }, { "epoch": 0.5943421096601479, "grad_norm": 0.32310712337493896, "learning_rate": 7.079158649040132e-06, "loss": 0.3817, "step": 32014 }, { "epoch": 0.5943792397975665, "grad_norm": 0.35012197494506836, "learning_rate": 7.078043045844421e-06, "loss": 0.2649, "step": 32016 }, { "epoch": 0.5944163699349851, "grad_norm": 0.5891090035438538, "learning_rate": 7.076927482407787e-06, "loss": 0.2092, "step": 32018 }, { "epoch": 0.5944535000724037, "grad_norm": 0.466403067111969, "learning_rate": 7.07581195874542e-06, "loss": 0.2657, "step": 32020 }, { "epoch": 0.5944906302098224, "grad_norm": 0.4455684721469879, "learning_rate": 7.074696474872489e-06, "loss": 0.2382, "step": 32022 }, { "epoch": 0.5945277603472411, "grad_norm": 0.3129173517227173, "learning_rate": 7.073581030804178e-06, "loss": 0.2216, "step": 32024 }, { "epoch": 0.5945648904846597, "grad_norm": 0.3589586317539215, "learning_rate": 7.072465626555661e-06, "loss": 0.1706, "step": 32026 }, { "epoch": 0.5946020206220783, "grad_norm": 0.4635947048664093, "learning_rate": 7.071350262142122e-06, "loss": 0.2965, "step": 32028 }, { "epoch": 0.5946391507594969, "grad_norm": 0.25965920090675354, "learning_rate": 7.070234937578731e-06, "loss": 0.3181, "step": 32030 }, { "epoch": 0.5946762808969156, "grad_norm": 0.41095206141471863, "learning_rate": 7.0691196528806664e-06, "loss": 0.3135, "step": 32032 }, { "epoch": 0.5947134110343343, "grad_norm": 0.3606607913970947, "learning_rate": 7.068004408063108e-06, "loss": 0.3218, "step": 32034 }, { "epoch": 0.5947505411717529, "grad_norm": 0.4844478368759155, "learning_rate": 7.066889203141224e-06, "loss": 0.4048, "step": 32036 }, { "epoch": 0.5947876713091715, "grad_norm": 0.2948613464832306, "learning_rate": 7.065774038130194e-06, "loss": 0.1446, "step": 32038 }, { "epoch": 0.5948248014465901, "grad_norm": 0.4373939037322998, "learning_rate": 7.064658913045188e-06, "loss": 0.3039, "step": 32040 }, { "epoch": 0.5948619315840088, "grad_norm": 0.3036930561065674, "learning_rate": 7.063543827901382e-06, "loss": 0.4233, "step": 32042 }, { "epoch": 0.5948990617214275, "grad_norm": 0.589603841304779, "learning_rate": 7.0624287827139505e-06, "loss": 0.3637, "step": 32044 }, { "epoch": 0.594936191858846, "grad_norm": 0.41544055938720703, "learning_rate": 7.061313777498066e-06, "loss": 0.3079, "step": 32046 }, { "epoch": 0.5949733219962647, "grad_norm": 0.33734944462776184, "learning_rate": 7.060198812268895e-06, "loss": 0.1934, "step": 32048 }, { "epoch": 0.5950104521336833, "grad_norm": 0.39886340498924255, "learning_rate": 7.059083887041616e-06, "loss": 0.2439, "step": 32050 }, { "epoch": 0.595047582271102, "grad_norm": 0.4452390968799591, "learning_rate": 7.057969001831393e-06, "loss": 0.4094, "step": 32052 }, { "epoch": 0.5950847124085207, "grad_norm": 0.36419904232025146, "learning_rate": 7.056854156653399e-06, "loss": 0.2322, "step": 32054 }, { "epoch": 0.5951218425459393, "grad_norm": 0.7428411841392517, "learning_rate": 7.055739351522803e-06, "loss": 0.389, "step": 32056 }, { "epoch": 0.5951589726833579, "grad_norm": 0.3975401222705841, "learning_rate": 7.054624586454782e-06, "loss": 0.1818, "step": 32058 }, { "epoch": 0.5951961028207765, "grad_norm": 0.3636437654495239, "learning_rate": 7.0535098614644955e-06, "loss": 0.2116, "step": 32060 }, { "epoch": 0.5952332329581952, "grad_norm": 0.5471928715705872, "learning_rate": 7.0523951765671105e-06, "loss": 0.2247, "step": 32062 }, { "epoch": 0.5952703630956138, "grad_norm": 0.30913886427879333, "learning_rate": 7.0512805317778e-06, "loss": 0.276, "step": 32064 }, { "epoch": 0.5953074932330324, "grad_norm": 0.26910400390625, "learning_rate": 7.0501659271117275e-06, "loss": 0.2427, "step": 32066 }, { "epoch": 0.5953446233704511, "grad_norm": 0.3515623211860657, "learning_rate": 7.049051362584064e-06, "loss": 0.2653, "step": 32068 }, { "epoch": 0.5953817535078697, "grad_norm": 0.487458735704422, "learning_rate": 7.04793683820997e-06, "loss": 0.3528, "step": 32070 }, { "epoch": 0.5954188836452884, "grad_norm": 0.2506726086139679, "learning_rate": 7.046822354004617e-06, "loss": 0.2607, "step": 32072 }, { "epoch": 0.595456013782707, "grad_norm": 0.2581019997596741, "learning_rate": 7.045707909983161e-06, "loss": 0.2905, "step": 32074 }, { "epoch": 0.5954931439201256, "grad_norm": 0.24956727027893066, "learning_rate": 7.044593506160773e-06, "loss": 0.2039, "step": 32076 }, { "epoch": 0.5955302740575443, "grad_norm": 0.3519080579280853, "learning_rate": 7.043479142552614e-06, "loss": 0.3707, "step": 32078 }, { "epoch": 0.5955674041949629, "grad_norm": 0.501090407371521, "learning_rate": 7.04236481917385e-06, "loss": 0.1642, "step": 32080 }, { "epoch": 0.5956045343323816, "grad_norm": 0.5079665780067444, "learning_rate": 7.041250536039641e-06, "loss": 0.3733, "step": 32082 }, { "epoch": 0.5956416644698002, "grad_norm": 0.36113592982292175, "learning_rate": 7.040136293165152e-06, "loss": 0.2956, "step": 32084 }, { "epoch": 0.5956787946072188, "grad_norm": 0.3640531599521637, "learning_rate": 7.0390220905655395e-06, "loss": 0.1521, "step": 32086 }, { "epoch": 0.5957159247446375, "grad_norm": 0.32021060585975647, "learning_rate": 7.037907928255966e-06, "loss": 0.2449, "step": 32088 }, { "epoch": 0.5957530548820561, "grad_norm": 0.20007668435573578, "learning_rate": 7.036793806251594e-06, "loss": 0.2787, "step": 32090 }, { "epoch": 0.5957901850194748, "grad_norm": 0.2692842185497284, "learning_rate": 7.035679724567583e-06, "loss": 0.3178, "step": 32092 }, { "epoch": 0.5958273151568934, "grad_norm": 0.2839367687702179, "learning_rate": 7.034565683219092e-06, "loss": 0.3523, "step": 32094 }, { "epoch": 0.595864445294312, "grad_norm": 0.283980131149292, "learning_rate": 7.033451682221282e-06, "loss": 0.3268, "step": 32096 }, { "epoch": 0.5959015754317307, "grad_norm": 0.2893757224082947, "learning_rate": 7.032337721589305e-06, "loss": 0.2633, "step": 32098 }, { "epoch": 0.5959387055691493, "grad_norm": 0.6066461205482483, "learning_rate": 7.031223801338323e-06, "loss": 0.3882, "step": 32100 }, { "epoch": 0.595975835706568, "grad_norm": 0.43130892515182495, "learning_rate": 7.030109921483495e-06, "loss": 0.1755, "step": 32102 }, { "epoch": 0.5960129658439866, "grad_norm": 0.5042694807052612, "learning_rate": 7.028996082039971e-06, "loss": 0.3617, "step": 32104 }, { "epoch": 0.5960500959814052, "grad_norm": 0.3423978388309479, "learning_rate": 7.027882283022913e-06, "loss": 0.2971, "step": 32106 }, { "epoch": 0.5960872261188239, "grad_norm": 0.2865414023399353, "learning_rate": 7.026768524447478e-06, "loss": 0.2837, "step": 32108 }, { "epoch": 0.5961243562562425, "grad_norm": 0.3856591582298279, "learning_rate": 7.025654806328813e-06, "loss": 0.498, "step": 32110 }, { "epoch": 0.5961614863936612, "grad_norm": 0.3129841387271881, "learning_rate": 7.024541128682079e-06, "loss": 0.3237, "step": 32112 }, { "epoch": 0.5961986165310798, "grad_norm": 0.3520040810108185, "learning_rate": 7.023427491522427e-06, "loss": 0.0968, "step": 32114 }, { "epoch": 0.5962357466684984, "grad_norm": 0.19297181069850922, "learning_rate": 7.022313894865009e-06, "loss": 0.1976, "step": 32116 }, { "epoch": 0.596272876805917, "grad_norm": 0.41210314631462097, "learning_rate": 7.021200338724981e-06, "loss": 0.1956, "step": 32118 }, { "epoch": 0.5963100069433357, "grad_norm": 0.4435357451438904, "learning_rate": 7.0200868231174946e-06, "loss": 0.2347, "step": 32120 }, { "epoch": 0.5963471370807544, "grad_norm": 0.3743399977684021, "learning_rate": 7.018973348057704e-06, "loss": 0.3986, "step": 32122 }, { "epoch": 0.596384267218173, "grad_norm": 0.5370433926582336, "learning_rate": 7.0178599135607535e-06, "loss": 0.3055, "step": 32124 }, { "epoch": 0.5964213973555916, "grad_norm": 0.591663122177124, "learning_rate": 7.016746519641797e-06, "loss": 0.3292, "step": 32126 }, { "epoch": 0.5964585274930102, "grad_norm": 0.3489430546760559, "learning_rate": 7.0156331663159836e-06, "loss": 0.416, "step": 32128 }, { "epoch": 0.5964956576304289, "grad_norm": 0.4691818356513977, "learning_rate": 7.014519853598464e-06, "loss": 0.2898, "step": 32130 }, { "epoch": 0.5965327877678476, "grad_norm": 0.4787689745426178, "learning_rate": 7.013406581504388e-06, "loss": 0.2008, "step": 32132 }, { "epoch": 0.5965699179052661, "grad_norm": 0.33664363622665405, "learning_rate": 7.012293350048903e-06, "loss": 0.3116, "step": 32134 }, { "epoch": 0.5966070480426848, "grad_norm": 0.30854278802871704, "learning_rate": 7.011180159247156e-06, "loss": 0.4419, "step": 32136 }, { "epoch": 0.5966441781801034, "grad_norm": 0.41785240173339844, "learning_rate": 7.010067009114293e-06, "loss": 0.2482, "step": 32138 }, { "epoch": 0.5966813083175221, "grad_norm": 0.284810870885849, "learning_rate": 7.008953899665461e-06, "loss": 0.148, "step": 32140 }, { "epoch": 0.5967184384549408, "grad_norm": 0.3168129026889801, "learning_rate": 7.007840830915809e-06, "loss": 0.2592, "step": 32142 }, { "epoch": 0.5967555685923593, "grad_norm": 0.3707023859024048, "learning_rate": 7.006727802880482e-06, "loss": 0.4346, "step": 32144 }, { "epoch": 0.596792698729778, "grad_norm": 0.5273804068565369, "learning_rate": 7.005614815574624e-06, "loss": 0.2916, "step": 32146 }, { "epoch": 0.5968298288671966, "grad_norm": 0.3138352036476135, "learning_rate": 7.004501869013377e-06, "loss": 0.1423, "step": 32148 }, { "epoch": 0.5968669590046153, "grad_norm": 0.3856821060180664, "learning_rate": 7.003388963211887e-06, "loss": 0.2476, "step": 32150 }, { "epoch": 0.596904089142034, "grad_norm": 0.2853991985321045, "learning_rate": 7.002276098185296e-06, "loss": 0.2763, "step": 32152 }, { "epoch": 0.5969412192794525, "grad_norm": 0.3874876797199249, "learning_rate": 7.001163273948752e-06, "loss": 0.2588, "step": 32154 }, { "epoch": 0.5969783494168712, "grad_norm": 0.4165259003639221, "learning_rate": 7.00005049051739e-06, "loss": 0.1325, "step": 32156 }, { "epoch": 0.5970154795542898, "grad_norm": 0.8187820911407471, "learning_rate": 6.998937747906355e-06, "loss": 0.3999, "step": 32158 }, { "epoch": 0.5970526096917085, "grad_norm": 0.3733706474304199, "learning_rate": 6.997825046130793e-06, "loss": 0.2813, "step": 32160 }, { "epoch": 0.597089739829127, "grad_norm": 0.8290793299674988, "learning_rate": 6.996712385205834e-06, "loss": 0.2395, "step": 32162 }, { "epoch": 0.5971268699665457, "grad_norm": 0.47913289070129395, "learning_rate": 6.995599765146624e-06, "loss": 0.3609, "step": 32164 }, { "epoch": 0.5971640001039644, "grad_norm": 0.3290073275566101, "learning_rate": 6.994487185968304e-06, "loss": 0.2946, "step": 32166 }, { "epoch": 0.597201130241383, "grad_norm": 0.4065541923046112, "learning_rate": 6.99337464768601e-06, "loss": 0.3089, "step": 32168 }, { "epoch": 0.5972382603788017, "grad_norm": 0.639032781124115, "learning_rate": 6.99226215031488e-06, "loss": 0.2243, "step": 32170 }, { "epoch": 0.5972753905162203, "grad_norm": 0.5234506130218506, "learning_rate": 6.9911496938700574e-06, "loss": 0.2831, "step": 32172 }, { "epoch": 0.5973125206536389, "grad_norm": 0.4382260739803314, "learning_rate": 6.99003727836667e-06, "loss": 0.167, "step": 32174 }, { "epoch": 0.5973496507910576, "grad_norm": 0.5304849743843079, "learning_rate": 6.988924903819862e-06, "loss": 0.2818, "step": 32176 }, { "epoch": 0.5973867809284762, "grad_norm": 0.2650459408760071, "learning_rate": 6.987812570244765e-06, "loss": 0.2675, "step": 32178 }, { "epoch": 0.5974239110658949, "grad_norm": 0.43515804409980774, "learning_rate": 6.986700277656517e-06, "loss": 0.3576, "step": 32180 }, { "epoch": 0.5974610412033134, "grad_norm": 0.2797057330608368, "learning_rate": 6.985588026070251e-06, "loss": 0.2644, "step": 32182 }, { "epoch": 0.5974981713407321, "grad_norm": 0.32745155692100525, "learning_rate": 6.984475815501108e-06, "loss": 0.1974, "step": 32184 }, { "epoch": 0.5975353014781508, "grad_norm": 0.7134277820587158, "learning_rate": 6.9833636459642116e-06, "loss": 0.409, "step": 32186 }, { "epoch": 0.5975724316155694, "grad_norm": 0.21984697878360748, "learning_rate": 6.982251517474703e-06, "loss": 0.2418, "step": 32188 }, { "epoch": 0.5976095617529881, "grad_norm": 0.4138549268245697, "learning_rate": 6.98113943004771e-06, "loss": 0.359, "step": 32190 }, { "epoch": 0.5976466918904066, "grad_norm": 0.4999110996723175, "learning_rate": 6.980027383698366e-06, "loss": 0.1718, "step": 32192 }, { "epoch": 0.5976838220278253, "grad_norm": 0.6186845302581787, "learning_rate": 6.978915378441804e-06, "loss": 0.3815, "step": 32194 }, { "epoch": 0.597720952165244, "grad_norm": 0.4924602806568146, "learning_rate": 6.977803414293156e-06, "loss": 0.2831, "step": 32196 }, { "epoch": 0.5977580823026626, "grad_norm": 0.5660045146942139, "learning_rate": 6.976691491267551e-06, "loss": 0.2748, "step": 32198 }, { "epoch": 0.5977952124400813, "grad_norm": 0.38521608710289, "learning_rate": 6.975579609380119e-06, "loss": 0.2882, "step": 32200 }, { "epoch": 0.5978323425774998, "grad_norm": 0.4365895390510559, "learning_rate": 6.974467768645989e-06, "loss": 0.1925, "step": 32202 }, { "epoch": 0.5978694727149185, "grad_norm": 0.37729278206825256, "learning_rate": 6.973355969080288e-06, "loss": 0.3939, "step": 32204 }, { "epoch": 0.5979066028523372, "grad_norm": 0.5381395816802979, "learning_rate": 6.972244210698149e-06, "loss": 0.3173, "step": 32206 }, { "epoch": 0.5979437329897558, "grad_norm": 0.32999387383461, "learning_rate": 6.971132493514696e-06, "loss": 0.2296, "step": 32208 }, { "epoch": 0.5979808631271745, "grad_norm": 0.4245683252811432, "learning_rate": 6.97002081754506e-06, "loss": 0.4875, "step": 32210 }, { "epoch": 0.598017993264593, "grad_norm": 0.2963868975639343, "learning_rate": 6.968909182804362e-06, "loss": 0.2813, "step": 32212 }, { "epoch": 0.5980551234020117, "grad_norm": 0.40088337659835815, "learning_rate": 6.96779758930773e-06, "loss": 0.245, "step": 32214 }, { "epoch": 0.5980922535394303, "grad_norm": 0.27033665776252747, "learning_rate": 6.966686037070291e-06, "loss": 0.2149, "step": 32216 }, { "epoch": 0.598129383676849, "grad_norm": 0.25479933619499207, "learning_rate": 6.965574526107171e-06, "loss": 0.2841, "step": 32218 }, { "epoch": 0.5981665138142677, "grad_norm": 0.7035544514656067, "learning_rate": 6.964463056433489e-06, "loss": 0.1424, "step": 32220 }, { "epoch": 0.5982036439516862, "grad_norm": 0.38962796330451965, "learning_rate": 6.963351628064378e-06, "loss": 0.1934, "step": 32222 }, { "epoch": 0.5982407740891049, "grad_norm": 0.3113766014575958, "learning_rate": 6.962240241014952e-06, "loss": 0.3995, "step": 32224 }, { "epoch": 0.5982779042265235, "grad_norm": 0.36976194381713867, "learning_rate": 6.9611288953003355e-06, "loss": 0.2408, "step": 32226 }, { "epoch": 0.5983150343639422, "grad_norm": 0.2940559685230255, "learning_rate": 6.960017590935653e-06, "loss": 0.4856, "step": 32228 }, { "epoch": 0.5983521645013609, "grad_norm": 0.22092384099960327, "learning_rate": 6.958906327936028e-06, "loss": 0.3329, "step": 32230 }, { "epoch": 0.5983892946387794, "grad_norm": 0.6442883610725403, "learning_rate": 6.957795106316576e-06, "loss": 0.2353, "step": 32232 }, { "epoch": 0.5984264247761981, "grad_norm": 0.40700411796569824, "learning_rate": 6.956683926092425e-06, "loss": 0.2563, "step": 32234 }, { "epoch": 0.5984635549136167, "grad_norm": 0.43985840678215027, "learning_rate": 6.955572787278684e-06, "loss": 0.3136, "step": 32236 }, { "epoch": 0.5985006850510354, "grad_norm": 0.3280580937862396, "learning_rate": 6.954461689890479e-06, "loss": 0.3723, "step": 32238 }, { "epoch": 0.5985378151884541, "grad_norm": 0.2075342983007431, "learning_rate": 6.953350633942932e-06, "loss": 0.37, "step": 32240 }, { "epoch": 0.5985749453258726, "grad_norm": 0.14586514234542847, "learning_rate": 6.952239619451153e-06, "loss": 0.0896, "step": 32242 }, { "epoch": 0.5986120754632913, "grad_norm": 0.3405345380306244, "learning_rate": 6.951128646430264e-06, "loss": 0.4005, "step": 32244 }, { "epoch": 0.5986492056007099, "grad_norm": 0.2208608239889145, "learning_rate": 6.950017714895382e-06, "loss": 0.2462, "step": 32246 }, { "epoch": 0.5986863357381286, "grad_norm": 0.33383479714393616, "learning_rate": 6.948906824861628e-06, "loss": 0.3729, "step": 32248 }, { "epoch": 0.5987234658755473, "grad_norm": 0.34075966477394104, "learning_rate": 6.9477959763441076e-06, "loss": 0.2483, "step": 32250 }, { "epoch": 0.5987605960129658, "grad_norm": 0.3031713366508484, "learning_rate": 6.946685169357943e-06, "loss": 0.4888, "step": 32252 }, { "epoch": 0.5987977261503845, "grad_norm": 0.3562350869178772, "learning_rate": 6.945574403918247e-06, "loss": 0.3611, "step": 32254 }, { "epoch": 0.5988348562878031, "grad_norm": 0.2112765610218048, "learning_rate": 6.944463680040135e-06, "loss": 0.1677, "step": 32256 }, { "epoch": 0.5988719864252218, "grad_norm": 0.4124261736869812, "learning_rate": 6.943352997738717e-06, "loss": 0.3567, "step": 32258 }, { "epoch": 0.5989091165626405, "grad_norm": 0.2734866738319397, "learning_rate": 6.9422423570291155e-06, "loss": 0.2741, "step": 32260 }, { "epoch": 0.598946246700059, "grad_norm": 0.2706665098667145, "learning_rate": 6.941131757926434e-06, "loss": 0.245, "step": 32262 }, { "epoch": 0.5989833768374777, "grad_norm": 0.3233645260334015, "learning_rate": 6.940021200445784e-06, "loss": 0.3452, "step": 32264 }, { "epoch": 0.5990205069748963, "grad_norm": 0.516941487789154, "learning_rate": 6.93891068460228e-06, "loss": 0.3022, "step": 32266 }, { "epoch": 0.599057637112315, "grad_norm": 0.31668367981910706, "learning_rate": 6.937800210411034e-06, "loss": 0.2686, "step": 32268 }, { "epoch": 0.5990947672497335, "grad_norm": 0.4152942895889282, "learning_rate": 6.936689777887156e-06, "loss": 0.3642, "step": 32270 }, { "epoch": 0.5991318973871522, "grad_norm": 0.49711453914642334, "learning_rate": 6.935579387045754e-06, "loss": 0.2998, "step": 32272 }, { "epoch": 0.5991690275245709, "grad_norm": 0.43986788392066956, "learning_rate": 6.934469037901937e-06, "loss": 0.3765, "step": 32274 }, { "epoch": 0.5992061576619895, "grad_norm": 0.47349226474761963, "learning_rate": 6.9333587304708135e-06, "loss": 0.4092, "step": 32276 }, { "epoch": 0.5992432877994082, "grad_norm": 0.37648308277130127, "learning_rate": 6.9322484647674906e-06, "loss": 0.1533, "step": 32278 }, { "epoch": 0.5992804179368267, "grad_norm": 0.554593563079834, "learning_rate": 6.931138240807078e-06, "loss": 0.1686, "step": 32280 }, { "epoch": 0.5993175480742454, "grad_norm": 0.5181983709335327, "learning_rate": 6.930028058604683e-06, "loss": 0.2623, "step": 32282 }, { "epoch": 0.5993546782116641, "grad_norm": 0.28140196204185486, "learning_rate": 6.928917918175409e-06, "loss": 0.0696, "step": 32284 }, { "epoch": 0.5993918083490827, "grad_norm": 0.5067324638366699, "learning_rate": 6.927807819534368e-06, "loss": 0.1395, "step": 32286 }, { "epoch": 0.5994289384865014, "grad_norm": 0.5374001860618591, "learning_rate": 6.926697762696654e-06, "loss": 0.5165, "step": 32288 }, { "epoch": 0.5994660686239199, "grad_norm": 0.40321382880210876, "learning_rate": 6.925587747677381e-06, "loss": 0.3077, "step": 32290 }, { "epoch": 0.5995031987613386, "grad_norm": 0.5801188349723816, "learning_rate": 6.924477774491649e-06, "loss": 0.2256, "step": 32292 }, { "epoch": 0.5995403288987573, "grad_norm": 0.41153010725975037, "learning_rate": 6.923367843154562e-06, "loss": 0.2536, "step": 32294 }, { "epoch": 0.5995774590361759, "grad_norm": 0.40889444947242737, "learning_rate": 6.922257953681222e-06, "loss": 0.2995, "step": 32296 }, { "epoch": 0.5996145891735946, "grad_norm": 0.2777296006679535, "learning_rate": 6.9211481060867365e-06, "loss": 0.2279, "step": 32298 }, { "epoch": 0.5996517193110131, "grad_norm": 0.39582282304763794, "learning_rate": 6.920038300386201e-06, "loss": 0.1537, "step": 32300 }, { "epoch": 0.5996888494484318, "grad_norm": 0.24680942296981812, "learning_rate": 6.9189285365947155e-06, "loss": 0.3372, "step": 32302 }, { "epoch": 0.5997259795858505, "grad_norm": 0.2839396893978119, "learning_rate": 6.917818814727389e-06, "loss": 0.3197, "step": 32304 }, { "epoch": 0.5997631097232691, "grad_norm": 0.33715304732322693, "learning_rate": 6.916709134799311e-06, "loss": 0.3246, "step": 32306 }, { "epoch": 0.5998002398606878, "grad_norm": 0.3624929189682007, "learning_rate": 6.915599496825588e-06, "loss": 0.3152, "step": 32308 }, { "epoch": 0.5998373699981063, "grad_norm": 0.4930504560470581, "learning_rate": 6.91448990082132e-06, "loss": 0.1805, "step": 32310 }, { "epoch": 0.599874500135525, "grad_norm": 0.23059092462062836, "learning_rate": 6.913380346801599e-06, "loss": 0.3819, "step": 32312 }, { "epoch": 0.5999116302729436, "grad_norm": 0.42047372460365295, "learning_rate": 6.912270834781528e-06, "loss": 0.3256, "step": 32314 }, { "epoch": 0.5999487604103623, "grad_norm": 0.37749379873275757, "learning_rate": 6.9111613647762e-06, "loss": 0.2435, "step": 32316 }, { "epoch": 0.599985890547781, "grad_norm": 0.7667651176452637, "learning_rate": 6.910051936800715e-06, "loss": 0.3469, "step": 32318 }, { "epoch": 0.6000230206851995, "grad_norm": 0.5379275679588318, "learning_rate": 6.908942550870166e-06, "loss": 0.4098, "step": 32320 }, { "epoch": 0.6000601508226182, "grad_norm": 0.46825674176216125, "learning_rate": 6.907833206999652e-06, "loss": 0.2924, "step": 32322 }, { "epoch": 0.6000972809600368, "grad_norm": 0.40784215927124023, "learning_rate": 6.906723905204266e-06, "loss": 0.4538, "step": 32324 }, { "epoch": 0.6001344110974555, "grad_norm": 0.39177078008651733, "learning_rate": 6.905614645499103e-06, "loss": 0.1647, "step": 32326 }, { "epoch": 0.6001715412348742, "grad_norm": 0.2066248059272766, "learning_rate": 6.9045054278992525e-06, "loss": 0.2552, "step": 32328 }, { "epoch": 0.6002086713722927, "grad_norm": 0.5254664421081543, "learning_rate": 6.903396252419813e-06, "loss": 0.2669, "step": 32330 }, { "epoch": 0.6002458015097114, "grad_norm": 0.31111767888069153, "learning_rate": 6.902287119075874e-06, "loss": 0.1313, "step": 32332 }, { "epoch": 0.60028293164713, "grad_norm": 0.3785063922405243, "learning_rate": 6.901178027882531e-06, "loss": 0.4086, "step": 32334 }, { "epoch": 0.6003200617845487, "grad_norm": 0.3431715667247772, "learning_rate": 6.900068978854872e-06, "loss": 0.2089, "step": 32336 }, { "epoch": 0.6003571919219673, "grad_norm": 0.49535998702049255, "learning_rate": 6.8989599720079905e-06, "loss": 0.303, "step": 32338 }, { "epoch": 0.6003943220593859, "grad_norm": 0.33867746591567993, "learning_rate": 6.897851007356973e-06, "loss": 0.1183, "step": 32340 }, { "epoch": 0.6004314521968046, "grad_norm": 0.8034718632698059, "learning_rate": 6.89674208491691e-06, "loss": 0.3682, "step": 32342 }, { "epoch": 0.6004685823342232, "grad_norm": 0.3230651021003723, "learning_rate": 6.895633204702894e-06, "loss": 0.1903, "step": 32344 }, { "epoch": 0.6005057124716419, "grad_norm": 0.3249034285545349, "learning_rate": 6.894524366730009e-06, "loss": 0.2176, "step": 32346 }, { "epoch": 0.6005428426090605, "grad_norm": 0.48975130915641785, "learning_rate": 6.893415571013351e-06, "loss": 0.4711, "step": 32348 }, { "epoch": 0.6005799727464791, "grad_norm": 0.6179929971694946, "learning_rate": 6.892306817567996e-06, "loss": 0.2394, "step": 32350 }, { "epoch": 0.6006171028838978, "grad_norm": 0.2959885895252228, "learning_rate": 6.891198106409038e-06, "loss": 0.2593, "step": 32352 }, { "epoch": 0.6006542330213164, "grad_norm": 0.4248366355895996, "learning_rate": 6.890089437551562e-06, "loss": 0.2814, "step": 32354 }, { "epoch": 0.6006913631587351, "grad_norm": 0.33484944701194763, "learning_rate": 6.888980811010655e-06, "loss": 0.2525, "step": 32356 }, { "epoch": 0.6007284932961537, "grad_norm": 0.41420888900756836, "learning_rate": 6.887872226801398e-06, "loss": 0.3303, "step": 32358 }, { "epoch": 0.6007656234335723, "grad_norm": 0.36624428629875183, "learning_rate": 6.886763684938877e-06, "loss": 0.2047, "step": 32360 }, { "epoch": 0.600802753570991, "grad_norm": 0.46592411398887634, "learning_rate": 6.885655185438184e-06, "loss": 0.2133, "step": 32362 }, { "epoch": 0.6008398837084096, "grad_norm": 0.37529098987579346, "learning_rate": 6.88454672831439e-06, "loss": 0.2506, "step": 32364 }, { "epoch": 0.6008770138458283, "grad_norm": 0.5220930576324463, "learning_rate": 6.883438313582582e-06, "loss": 0.3615, "step": 32366 }, { "epoch": 0.6009141439832468, "grad_norm": 0.5191646814346313, "learning_rate": 6.882329941257847e-06, "loss": 0.2636, "step": 32368 }, { "epoch": 0.6009512741206655, "grad_norm": 0.4602196514606476, "learning_rate": 6.88122161135526e-06, "loss": 0.2221, "step": 32370 }, { "epoch": 0.6009884042580842, "grad_norm": 0.4736001789569855, "learning_rate": 6.880113323889905e-06, "loss": 0.3182, "step": 32372 }, { "epoch": 0.6010255343955028, "grad_norm": 0.563730001449585, "learning_rate": 6.879005078876868e-06, "loss": 0.4232, "step": 32374 }, { "epoch": 0.6010626645329215, "grad_norm": 0.4768717586994171, "learning_rate": 6.877896876331218e-06, "loss": 0.157, "step": 32376 }, { "epoch": 0.60109979467034, "grad_norm": 0.26671531796455383, "learning_rate": 6.876788716268044e-06, "loss": 0.2049, "step": 32378 }, { "epoch": 0.6011369248077587, "grad_norm": 0.40325430035591125, "learning_rate": 6.875680598702416e-06, "loss": 0.2696, "step": 32380 }, { "epoch": 0.6011740549451774, "grad_norm": 0.3511100709438324, "learning_rate": 6.8745725236494165e-06, "loss": 0.2577, "step": 32382 }, { "epoch": 0.601211185082596, "grad_norm": 0.319902241230011, "learning_rate": 6.873464491124125e-06, "loss": 0.1937, "step": 32384 }, { "epoch": 0.6012483152200147, "grad_norm": 0.4421692490577698, "learning_rate": 6.872356501141619e-06, "loss": 0.1382, "step": 32386 }, { "epoch": 0.6012854453574332, "grad_norm": 0.3024381101131439, "learning_rate": 6.871248553716969e-06, "loss": 0.1976, "step": 32388 }, { "epoch": 0.6013225754948519, "grad_norm": 0.3544471263885498, "learning_rate": 6.8701406488652574e-06, "loss": 0.3233, "step": 32390 }, { "epoch": 0.6013597056322706, "grad_norm": 0.303026020526886, "learning_rate": 6.869032786601553e-06, "loss": 0.4024, "step": 32392 }, { "epoch": 0.6013968357696892, "grad_norm": 0.5401151180267334, "learning_rate": 6.867924966940935e-06, "loss": 0.3826, "step": 32394 }, { "epoch": 0.6014339659071078, "grad_norm": 0.3572685420513153, "learning_rate": 6.866817189898478e-06, "loss": 0.3484, "step": 32396 }, { "epoch": 0.6014710960445264, "grad_norm": 0.5170982480049133, "learning_rate": 6.865709455489256e-06, "loss": 0.3199, "step": 32398 }, { "epoch": 0.6015082261819451, "grad_norm": 0.3436547815799713, "learning_rate": 6.864601763728339e-06, "loss": 0.3344, "step": 32400 }, { "epoch": 0.6015453563193638, "grad_norm": 0.45781105756759644, "learning_rate": 6.863494114630797e-06, "loss": 0.4065, "step": 32402 }, { "epoch": 0.6015824864567824, "grad_norm": 0.18804508447647095, "learning_rate": 6.862386508211707e-06, "loss": 0.1049, "step": 32404 }, { "epoch": 0.601619616594201, "grad_norm": 0.4620898365974426, "learning_rate": 6.861278944486138e-06, "loss": 0.1148, "step": 32406 }, { "epoch": 0.6016567467316196, "grad_norm": 0.3620264232158661, "learning_rate": 6.8601714234691616e-06, "loss": 0.249, "step": 32408 }, { "epoch": 0.6016938768690383, "grad_norm": 0.29990071058273315, "learning_rate": 6.8590639451758475e-06, "loss": 0.3186, "step": 32410 }, { "epoch": 0.601731007006457, "grad_norm": 0.3128824532032013, "learning_rate": 6.857956509621267e-06, "loss": 0.2539, "step": 32412 }, { "epoch": 0.6017681371438756, "grad_norm": 0.34034353494644165, "learning_rate": 6.856849116820484e-06, "loss": 0.4939, "step": 32414 }, { "epoch": 0.6018052672812942, "grad_norm": 0.4037137031555176, "learning_rate": 6.855741766788569e-06, "loss": 0.1733, "step": 32416 }, { "epoch": 0.6018423974187128, "grad_norm": 0.33270686864852905, "learning_rate": 6.854634459540591e-06, "loss": 0.319, "step": 32418 }, { "epoch": 0.6018795275561315, "grad_norm": 0.5087394714355469, "learning_rate": 6.853527195091618e-06, "loss": 0.2471, "step": 32420 }, { "epoch": 0.6019166576935501, "grad_norm": 0.4133280813694, "learning_rate": 6.852419973456714e-06, "loss": 0.1008, "step": 32422 }, { "epoch": 0.6019537878309688, "grad_norm": 0.4253655672073364, "learning_rate": 6.8513127946509495e-06, "loss": 0.1912, "step": 32424 }, { "epoch": 0.6019909179683874, "grad_norm": 0.3918859362602234, "learning_rate": 6.8502056586893815e-06, "loss": 0.4206, "step": 32426 }, { "epoch": 0.602028048105806, "grad_norm": 0.30853214859962463, "learning_rate": 6.849098565587081e-06, "loss": 0.1552, "step": 32428 }, { "epoch": 0.6020651782432247, "grad_norm": 0.3706313371658325, "learning_rate": 6.8479915153591125e-06, "loss": 0.212, "step": 32430 }, { "epoch": 0.6021023083806433, "grad_norm": 0.381692111492157, "learning_rate": 6.846884508020537e-06, "loss": 0.3, "step": 32432 }, { "epoch": 0.602139438518062, "grad_norm": 0.3469703197479248, "learning_rate": 6.845777543586417e-06, "loss": 0.2524, "step": 32434 }, { "epoch": 0.6021765686554806, "grad_norm": 0.3518145680427551, "learning_rate": 6.844670622071823e-06, "loss": 0.3685, "step": 32436 }, { "epoch": 0.6022136987928992, "grad_norm": 0.4700168967247009, "learning_rate": 6.843563743491804e-06, "loss": 0.3805, "step": 32438 }, { "epoch": 0.6022508289303179, "grad_norm": 0.45189833641052246, "learning_rate": 6.842456907861429e-06, "loss": 0.299, "step": 32440 }, { "epoch": 0.6022879590677365, "grad_norm": 0.3574831485748291, "learning_rate": 6.841350115195759e-06, "loss": 0.2671, "step": 32442 }, { "epoch": 0.6023250892051552, "grad_norm": 0.34665700793266296, "learning_rate": 6.840243365509851e-06, "loss": 0.295, "step": 32444 }, { "epoch": 0.6023622193425738, "grad_norm": 0.27932488918304443, "learning_rate": 6.839136658818767e-06, "loss": 0.329, "step": 32446 }, { "epoch": 0.6023993494799924, "grad_norm": 0.5271481871604919, "learning_rate": 6.838029995137565e-06, "loss": 0.2733, "step": 32448 }, { "epoch": 0.6024364796174111, "grad_norm": 0.31140831112861633, "learning_rate": 6.836923374481307e-06, "loss": 0.299, "step": 32450 }, { "epoch": 0.6024736097548297, "grad_norm": 0.296762615442276, "learning_rate": 6.8358167968650445e-06, "loss": 0.1889, "step": 32452 }, { "epoch": 0.6025107398922483, "grad_norm": 0.5563852190971375, "learning_rate": 6.834710262303837e-06, "loss": 0.3739, "step": 32454 }, { "epoch": 0.602547870029667, "grad_norm": 0.41838330030441284, "learning_rate": 6.833603770812741e-06, "loss": 0.509, "step": 32456 }, { "epoch": 0.6025850001670856, "grad_norm": 0.17578670382499695, "learning_rate": 6.8324973224068135e-06, "loss": 0.2262, "step": 32458 }, { "epoch": 0.6026221303045043, "grad_norm": 0.35536518692970276, "learning_rate": 6.8313909171011115e-06, "loss": 0.3172, "step": 32460 }, { "epoch": 0.6026592604419229, "grad_norm": 0.38685643672943115, "learning_rate": 6.8302845549106885e-06, "loss": 0.3369, "step": 32462 }, { "epoch": 0.6026963905793415, "grad_norm": 0.4160190224647522, "learning_rate": 6.829178235850598e-06, "loss": 0.2863, "step": 32464 }, { "epoch": 0.6027335207167601, "grad_norm": 0.3141392767429352, "learning_rate": 6.828071959935891e-06, "loss": 0.3522, "step": 32466 }, { "epoch": 0.6027706508541788, "grad_norm": 0.33029550313949585, "learning_rate": 6.826965727181626e-06, "loss": 0.292, "step": 32468 }, { "epoch": 0.6028077809915975, "grad_norm": 0.2849985361099243, "learning_rate": 6.825859537602851e-06, "loss": 0.2381, "step": 32470 }, { "epoch": 0.6028449111290161, "grad_norm": 0.29119938611984253, "learning_rate": 6.824753391214622e-06, "loss": 0.2892, "step": 32472 }, { "epoch": 0.6028820412664347, "grad_norm": 0.34570741653442383, "learning_rate": 6.8236472880319905e-06, "loss": 0.2646, "step": 32474 }, { "epoch": 0.6029191714038533, "grad_norm": 0.516773521900177, "learning_rate": 6.822541228070003e-06, "loss": 0.394, "step": 32476 }, { "epoch": 0.602956301541272, "grad_norm": 0.3689149022102356, "learning_rate": 6.821435211343711e-06, "loss": 0.2546, "step": 32478 }, { "epoch": 0.6029934316786907, "grad_norm": 0.35306376218795776, "learning_rate": 6.820329237868164e-06, "loss": 0.3622, "step": 32480 }, { "epoch": 0.6030305618161093, "grad_norm": 0.4598345458507538, "learning_rate": 6.819223307658415e-06, "loss": 0.3642, "step": 32482 }, { "epoch": 0.6030676919535279, "grad_norm": 0.34100160002708435, "learning_rate": 6.818117420729506e-06, "loss": 0.33, "step": 32484 }, { "epoch": 0.6031048220909465, "grad_norm": 0.38707593083381653, "learning_rate": 6.817011577096488e-06, "loss": 0.3462, "step": 32486 }, { "epoch": 0.6031419522283652, "grad_norm": 0.405487596988678, "learning_rate": 6.815905776774414e-06, "loss": 0.3477, "step": 32488 }, { "epoch": 0.6031790823657839, "grad_norm": 0.4612721800804138, "learning_rate": 6.8148000197783205e-06, "loss": 0.236, "step": 32490 }, { "epoch": 0.6032162125032025, "grad_norm": 0.3976864218711853, "learning_rate": 6.813694306123256e-06, "loss": 0.3506, "step": 32492 }, { "epoch": 0.6032533426406211, "grad_norm": 0.27394425868988037, "learning_rate": 6.812588635824271e-06, "loss": 0.2758, "step": 32494 }, { "epoch": 0.6032904727780397, "grad_norm": 0.24788857996463776, "learning_rate": 6.811483008896406e-06, "loss": 0.234, "step": 32496 }, { "epoch": 0.6033276029154584, "grad_norm": 0.33730241656303406, "learning_rate": 6.810377425354706e-06, "loss": 0.2366, "step": 32498 }, { "epoch": 0.6033647330528771, "grad_norm": 0.42708128690719604, "learning_rate": 6.8092718852142194e-06, "loss": 0.3964, "step": 32500 }, { "epoch": 0.6034018631902957, "grad_norm": 0.34661149978637695, "learning_rate": 6.8081663884899805e-06, "loss": 0.1334, "step": 32502 }, { "epoch": 0.6034389933277143, "grad_norm": 0.3175419270992279, "learning_rate": 6.807060935197037e-06, "loss": 0.2615, "step": 32504 }, { "epoch": 0.6034761234651329, "grad_norm": 0.5078348517417908, "learning_rate": 6.805955525350432e-06, "loss": 0.2524, "step": 32506 }, { "epoch": 0.6035132536025516, "grad_norm": 0.5579872727394104, "learning_rate": 6.804850158965203e-06, "loss": 0.5139, "step": 32508 }, { "epoch": 0.6035503837399703, "grad_norm": 0.5155790448188782, "learning_rate": 6.803744836056391e-06, "loss": 0.3268, "step": 32510 }, { "epoch": 0.6035875138773888, "grad_norm": 0.39170435070991516, "learning_rate": 6.8026395566390455e-06, "loss": 0.3054, "step": 32512 }, { "epoch": 0.6036246440148075, "grad_norm": 0.3989746570587158, "learning_rate": 6.801534320728192e-06, "loss": 0.1169, "step": 32514 }, { "epoch": 0.6036617741522261, "grad_norm": 0.5660011172294617, "learning_rate": 6.800429128338879e-06, "loss": 0.4021, "step": 32516 }, { "epoch": 0.6036989042896448, "grad_norm": 0.35633864998817444, "learning_rate": 6.799323979486139e-06, "loss": 0.1759, "step": 32518 }, { "epoch": 0.6037360344270634, "grad_norm": 0.31688812375068665, "learning_rate": 6.7982188741850115e-06, "loss": 0.2235, "step": 32520 }, { "epoch": 0.603773164564482, "grad_norm": 0.2559809386730194, "learning_rate": 6.797113812450538e-06, "loss": 0.2623, "step": 32522 }, { "epoch": 0.6038102947019007, "grad_norm": 0.30031338334083557, "learning_rate": 6.7960087942977526e-06, "loss": 0.3032, "step": 32524 }, { "epoch": 0.6038474248393193, "grad_norm": 0.4881744086742401, "learning_rate": 6.794903819741687e-06, "loss": 0.1683, "step": 32526 }, { "epoch": 0.603884554976738, "grad_norm": 0.2855229675769806, "learning_rate": 6.793798888797383e-06, "loss": 0.294, "step": 32528 }, { "epoch": 0.6039216851141566, "grad_norm": 0.318218469619751, "learning_rate": 6.7926940014798695e-06, "loss": 0.3111, "step": 32530 }, { "epoch": 0.6039588152515752, "grad_norm": 0.3056376874446869, "learning_rate": 6.791589157804184e-06, "loss": 0.2483, "step": 32532 }, { "epoch": 0.6039959453889939, "grad_norm": 0.46332964301109314, "learning_rate": 6.790484357785361e-06, "loss": 0.4864, "step": 32534 }, { "epoch": 0.6040330755264125, "grad_norm": 0.37451857328414917, "learning_rate": 6.7893796014384325e-06, "loss": 0.1973, "step": 32536 }, { "epoch": 0.6040702056638312, "grad_norm": 0.3718571364879608, "learning_rate": 6.788274888778434e-06, "loss": 0.3319, "step": 32538 }, { "epoch": 0.6041073358012498, "grad_norm": 0.3948982059955597, "learning_rate": 6.787170219820389e-06, "loss": 0.304, "step": 32540 }, { "epoch": 0.6041444659386684, "grad_norm": 0.3447987735271454, "learning_rate": 6.786065594579334e-06, "loss": 0.2331, "step": 32542 }, { "epoch": 0.6041815960760871, "grad_norm": 0.2462286502122879, "learning_rate": 6.784961013070299e-06, "loss": 0.1373, "step": 32544 }, { "epoch": 0.6042187262135057, "grad_norm": 0.3173312842845917, "learning_rate": 6.783856475308317e-06, "loss": 0.2307, "step": 32546 }, { "epoch": 0.6042558563509244, "grad_norm": 0.38091227412223816, "learning_rate": 6.782751981308413e-06, "loss": 0.3747, "step": 32548 }, { "epoch": 0.604292986488343, "grad_norm": 0.3523854911327362, "learning_rate": 6.7816475310856225e-06, "loss": 0.2326, "step": 32550 }, { "epoch": 0.6043301166257616, "grad_norm": 0.4816212058067322, "learning_rate": 6.780543124654964e-06, "loss": 0.2908, "step": 32552 }, { "epoch": 0.6043672467631803, "grad_norm": 0.39369356632232666, "learning_rate": 6.7794387620314694e-06, "loss": 0.4322, "step": 32554 }, { "epoch": 0.6044043769005989, "grad_norm": 0.34154409170150757, "learning_rate": 6.778334443230168e-06, "loss": 0.3477, "step": 32556 }, { "epoch": 0.6044415070380176, "grad_norm": 0.3572297692298889, "learning_rate": 6.777230168266087e-06, "loss": 0.1544, "step": 32558 }, { "epoch": 0.6044786371754362, "grad_norm": 0.3864055871963501, "learning_rate": 6.776125937154248e-06, "loss": 0.2449, "step": 32560 }, { "epoch": 0.6045157673128548, "grad_norm": 1.211503267288208, "learning_rate": 6.775021749909681e-06, "loss": 0.2319, "step": 32562 }, { "epoch": 0.6045528974502735, "grad_norm": 0.3568444848060608, "learning_rate": 6.7739176065474045e-06, "loss": 0.3187, "step": 32564 }, { "epoch": 0.6045900275876921, "grad_norm": 0.3553500771522522, "learning_rate": 6.772813507082447e-06, "loss": 0.454, "step": 32566 }, { "epoch": 0.6046271577251108, "grad_norm": 0.3139319121837616, "learning_rate": 6.771709451529833e-06, "loss": 0.4255, "step": 32568 }, { "epoch": 0.6046642878625293, "grad_norm": 0.5245634317398071, "learning_rate": 6.77060543990458e-06, "loss": 0.2041, "step": 32570 }, { "epoch": 0.604701417999948, "grad_norm": 0.39949071407318115, "learning_rate": 6.7695014722217155e-06, "loss": 0.2201, "step": 32572 }, { "epoch": 0.6047385481373666, "grad_norm": 0.4564610719680786, "learning_rate": 6.768397548496259e-06, "loss": 0.2674, "step": 32574 }, { "epoch": 0.6047756782747853, "grad_norm": 0.31416645646095276, "learning_rate": 6.767293668743236e-06, "loss": 0.1347, "step": 32576 }, { "epoch": 0.604812808412204, "grad_norm": 0.2084096372127533, "learning_rate": 6.766189832977659e-06, "loss": 0.1212, "step": 32578 }, { "epoch": 0.6048499385496225, "grad_norm": 0.3292437195777893, "learning_rate": 6.765086041214555e-06, "loss": 0.214, "step": 32580 }, { "epoch": 0.6048870686870412, "grad_norm": 0.4148804247379303, "learning_rate": 6.763982293468937e-06, "loss": 0.2818, "step": 32582 }, { "epoch": 0.6049241988244598, "grad_norm": 0.37251049280166626, "learning_rate": 6.762878589755828e-06, "loss": 0.2731, "step": 32584 }, { "epoch": 0.6049613289618785, "grad_norm": 0.3387444317340851, "learning_rate": 6.7617749300902434e-06, "loss": 0.261, "step": 32586 }, { "epoch": 0.6049984590992972, "grad_norm": 0.30089038610458374, "learning_rate": 6.7606713144872085e-06, "loss": 0.2494, "step": 32588 }, { "epoch": 0.6050355892367157, "grad_norm": 0.3548854887485504, "learning_rate": 6.75956774296173e-06, "loss": 0.3065, "step": 32590 }, { "epoch": 0.6050727193741344, "grad_norm": 0.4021078646183014, "learning_rate": 6.758464215528828e-06, "loss": 0.2859, "step": 32592 }, { "epoch": 0.605109849511553, "grad_norm": 0.3577335774898529, "learning_rate": 6.757360732203518e-06, "loss": 0.2636, "step": 32594 }, { "epoch": 0.6051469796489717, "grad_norm": 0.4125879406929016, "learning_rate": 6.7562572930008165e-06, "loss": 0.4176, "step": 32596 }, { "epoch": 0.6051841097863904, "grad_norm": 0.5143601298332214, "learning_rate": 6.755153897935738e-06, "loss": 0.3234, "step": 32598 }, { "epoch": 0.6052212399238089, "grad_norm": 0.26305949687957764, "learning_rate": 6.754050547023294e-06, "loss": 0.254, "step": 32600 }, { "epoch": 0.6052583700612276, "grad_norm": 0.36468344926834106, "learning_rate": 6.752947240278502e-06, "loss": 0.3757, "step": 32602 }, { "epoch": 0.6052955001986462, "grad_norm": 0.3187844753265381, "learning_rate": 6.751843977716368e-06, "loss": 0.1258, "step": 32604 }, { "epoch": 0.6053326303360649, "grad_norm": 0.37027421593666077, "learning_rate": 6.750740759351911e-06, "loss": 0.4116, "step": 32606 }, { "epoch": 0.6053697604734836, "grad_norm": 0.9331082701683044, "learning_rate": 6.749637585200137e-06, "loss": 0.2255, "step": 32608 }, { "epoch": 0.6054068906109021, "grad_norm": 0.4842776358127594, "learning_rate": 6.748534455276062e-06, "loss": 0.3665, "step": 32610 }, { "epoch": 0.6054440207483208, "grad_norm": 0.3069177567958832, "learning_rate": 6.747431369594691e-06, "loss": 0.2839, "step": 32612 }, { "epoch": 0.6054811508857394, "grad_norm": 0.568530797958374, "learning_rate": 6.746328328171043e-06, "loss": 0.3872, "step": 32614 }, { "epoch": 0.6055182810231581, "grad_norm": 0.4535745680332184, "learning_rate": 6.745225331020114e-06, "loss": 0.3662, "step": 32616 }, { "epoch": 0.6055554111605767, "grad_norm": 0.46867963671684265, "learning_rate": 6.744122378156921e-06, "loss": 0.3565, "step": 32618 }, { "epoch": 0.6055925412979953, "grad_norm": 0.3103230595588684, "learning_rate": 6.743019469596468e-06, "loss": 0.5443, "step": 32620 }, { "epoch": 0.605629671435414, "grad_norm": 0.43798306584358215, "learning_rate": 6.741916605353767e-06, "loss": 0.4664, "step": 32622 }, { "epoch": 0.6056668015728326, "grad_norm": 0.40012794733047485, "learning_rate": 6.74081378544382e-06, "loss": 0.1945, "step": 32624 }, { "epoch": 0.6057039317102513, "grad_norm": 0.35662710666656494, "learning_rate": 6.73971100988164e-06, "loss": 0.3303, "step": 32626 }, { "epoch": 0.6057410618476698, "grad_norm": 0.3911553919315338, "learning_rate": 6.738608278682222e-06, "loss": 0.1273, "step": 32628 }, { "epoch": 0.6057781919850885, "grad_norm": 0.29097649455070496, "learning_rate": 6.737505591860578e-06, "loss": 0.2464, "step": 32630 }, { "epoch": 0.6058153221225072, "grad_norm": 0.4287024140357971, "learning_rate": 6.736402949431711e-06, "loss": 0.3172, "step": 32632 }, { "epoch": 0.6058524522599258, "grad_norm": 0.2854858636856079, "learning_rate": 6.735300351410623e-06, "loss": 0.2673, "step": 32634 }, { "epoch": 0.6058895823973445, "grad_norm": 0.39085450768470764, "learning_rate": 6.734197797812318e-06, "loss": 0.1654, "step": 32636 }, { "epoch": 0.605926712534763, "grad_norm": 0.3609722852706909, "learning_rate": 6.733095288651803e-06, "loss": 0.3038, "step": 32638 }, { "epoch": 0.6059638426721817, "grad_norm": 0.3893957734107971, "learning_rate": 6.731992823944072e-06, "loss": 0.2682, "step": 32640 }, { "epoch": 0.6060009728096004, "grad_norm": 0.5668826699256897, "learning_rate": 6.7308904037041286e-06, "loss": 0.276, "step": 32642 }, { "epoch": 0.606038102947019, "grad_norm": 0.6270896792411804, "learning_rate": 6.729788027946977e-06, "loss": 0.1242, "step": 32644 }, { "epoch": 0.6060752330844377, "grad_norm": 0.33200517296791077, "learning_rate": 6.728685696687613e-06, "loss": 0.2337, "step": 32646 }, { "epoch": 0.6061123632218562, "grad_norm": 0.29668793082237244, "learning_rate": 6.72758340994104e-06, "loss": 0.1695, "step": 32648 }, { "epoch": 0.6061494933592749, "grad_norm": 0.2584170997142792, "learning_rate": 6.726481167722252e-06, "loss": 0.1102, "step": 32650 }, { "epoch": 0.6061866234966936, "grad_norm": 0.46197426319122314, "learning_rate": 6.725378970046255e-06, "loss": 0.4541, "step": 32652 }, { "epoch": 0.6062237536341122, "grad_norm": 0.4598432183265686, "learning_rate": 6.7242768169280405e-06, "loss": 0.2369, "step": 32654 }, { "epoch": 0.6062608837715309, "grad_norm": 0.796837329864502, "learning_rate": 6.7231747083826035e-06, "loss": 0.1361, "step": 32656 }, { "epoch": 0.6062980139089494, "grad_norm": 0.31335270404815674, "learning_rate": 6.722072644424944e-06, "loss": 0.155, "step": 32658 }, { "epoch": 0.6063351440463681, "grad_norm": 0.28925031423568726, "learning_rate": 6.7209706250700566e-06, "loss": 0.1149, "step": 32660 }, { "epoch": 0.6063722741837868, "grad_norm": 0.2990482747554779, "learning_rate": 6.719868650332939e-06, "loss": 0.1779, "step": 32662 }, { "epoch": 0.6064094043212054, "grad_norm": 0.2956528663635254, "learning_rate": 6.718766720228586e-06, "loss": 0.2705, "step": 32664 }, { "epoch": 0.6064465344586241, "grad_norm": 0.41349929571151733, "learning_rate": 6.717664834771988e-06, "loss": 0.0894, "step": 32666 }, { "epoch": 0.6064836645960426, "grad_norm": 0.2936456799507141, "learning_rate": 6.716562993978138e-06, "loss": 0.1351, "step": 32668 }, { "epoch": 0.6065207947334613, "grad_norm": 0.3496336340904236, "learning_rate": 6.71546119786203e-06, "loss": 0.2723, "step": 32670 }, { "epoch": 0.6065579248708799, "grad_norm": 0.4455249011516571, "learning_rate": 6.7143594464386564e-06, "loss": 0.4663, "step": 32672 }, { "epoch": 0.6065950550082986, "grad_norm": 0.44359856843948364, "learning_rate": 6.713257739723013e-06, "loss": 0.2588, "step": 32674 }, { "epoch": 0.6066321851457173, "grad_norm": 0.26801928877830505, "learning_rate": 6.712156077730088e-06, "loss": 0.1935, "step": 32676 }, { "epoch": 0.6066693152831358, "grad_norm": 0.6040407419204712, "learning_rate": 6.7110544604748666e-06, "loss": 0.3325, "step": 32678 }, { "epoch": 0.6067064454205545, "grad_norm": 1.4163864850997925, "learning_rate": 6.709952887972342e-06, "loss": 0.2145, "step": 32680 }, { "epoch": 0.6067435755579731, "grad_norm": 0.3321932256221771, "learning_rate": 6.708851360237503e-06, "loss": 0.2204, "step": 32682 }, { "epoch": 0.6067807056953918, "grad_norm": 0.389706552028656, "learning_rate": 6.707749877285342e-06, "loss": 0.3559, "step": 32684 }, { "epoch": 0.6068178358328105, "grad_norm": 0.23836737871170044, "learning_rate": 6.7066484391308415e-06, "loss": 0.2591, "step": 32686 }, { "epoch": 0.606854965970229, "grad_norm": 0.6039220690727234, "learning_rate": 6.705547045788996e-06, "loss": 0.2261, "step": 32688 }, { "epoch": 0.6068920961076477, "grad_norm": 0.31422141194343567, "learning_rate": 6.7044456972747815e-06, "loss": 0.4185, "step": 32690 }, { "epoch": 0.6069292262450663, "grad_norm": 0.45084986090660095, "learning_rate": 6.70334439360319e-06, "loss": 0.2918, "step": 32692 }, { "epoch": 0.606966356382485, "grad_norm": 0.7564059495925903, "learning_rate": 6.702243134789208e-06, "loss": 0.363, "step": 32694 }, { "epoch": 0.6070034865199037, "grad_norm": 0.6141263246536255, "learning_rate": 6.70114192084782e-06, "loss": 0.1423, "step": 32696 }, { "epoch": 0.6070406166573222, "grad_norm": 0.4967813789844513, "learning_rate": 6.700040751794008e-06, "loss": 0.5046, "step": 32698 }, { "epoch": 0.6070777467947409, "grad_norm": 0.4076906144618988, "learning_rate": 6.698939627642755e-06, "loss": 0.2435, "step": 32700 }, { "epoch": 0.6071148769321595, "grad_norm": 0.5306950807571411, "learning_rate": 6.697838548409053e-06, "loss": 0.4185, "step": 32702 }, { "epoch": 0.6071520070695782, "grad_norm": 0.42083579301834106, "learning_rate": 6.696737514107871e-06, "loss": 0.2023, "step": 32704 }, { "epoch": 0.6071891372069969, "grad_norm": 0.47346773743629456, "learning_rate": 6.695636524754199e-06, "loss": 0.2756, "step": 32706 }, { "epoch": 0.6072262673444154, "grad_norm": 0.3367878198623657, "learning_rate": 6.694535580363014e-06, "loss": 0.4296, "step": 32708 }, { "epoch": 0.6072633974818341, "grad_norm": 0.5973307490348816, "learning_rate": 6.6934346809493e-06, "loss": 0.3116, "step": 32710 }, { "epoch": 0.6073005276192527, "grad_norm": 0.3987002670764923, "learning_rate": 6.692333826528034e-06, "loss": 0.3019, "step": 32712 }, { "epoch": 0.6073376577566714, "grad_norm": 0.47669774293899536, "learning_rate": 6.691233017114202e-06, "loss": 0.1935, "step": 32714 }, { "epoch": 0.60737478789409, "grad_norm": 0.3510865271091461, "learning_rate": 6.690132252722774e-06, "loss": 0.4826, "step": 32716 }, { "epoch": 0.6074119180315086, "grad_norm": 0.6552179455757141, "learning_rate": 6.6890315333687335e-06, "loss": 0.3308, "step": 32718 }, { "epoch": 0.6074490481689273, "grad_norm": 0.3568669259548187, "learning_rate": 6.6879308590670556e-06, "loss": 0.3582, "step": 32720 }, { "epoch": 0.6074861783063459, "grad_norm": 0.3111821413040161, "learning_rate": 6.686830229832716e-06, "loss": 0.244, "step": 32722 }, { "epoch": 0.6075233084437646, "grad_norm": 0.5278318524360657, "learning_rate": 6.685729645680694e-06, "loss": 0.1474, "step": 32724 }, { "epoch": 0.6075604385811831, "grad_norm": 0.26630595326423645, "learning_rate": 6.684629106625967e-06, "loss": 0.1953, "step": 32726 }, { "epoch": 0.6075975687186018, "grad_norm": 0.3744412064552307, "learning_rate": 6.683528612683504e-06, "loss": 0.191, "step": 32728 }, { "epoch": 0.6076346988560205, "grad_norm": 0.3799089789390564, "learning_rate": 6.6824281638682845e-06, "loss": 0.193, "step": 32730 }, { "epoch": 0.6076718289934391, "grad_norm": 0.6737147569656372, "learning_rate": 6.681327760195279e-06, "loss": 0.3717, "step": 32732 }, { "epoch": 0.6077089591308578, "grad_norm": 0.42276766896247864, "learning_rate": 6.680227401679461e-06, "loss": 0.4692, "step": 32734 }, { "epoch": 0.6077460892682763, "grad_norm": 0.6538462042808533, "learning_rate": 6.679127088335806e-06, "loss": 0.2624, "step": 32736 }, { "epoch": 0.607783219405695, "grad_norm": 0.32437923550605774, "learning_rate": 6.678026820179284e-06, "loss": 0.2722, "step": 32738 }, { "epoch": 0.6078203495431137, "grad_norm": 0.22122645378112793, "learning_rate": 6.676926597224869e-06, "loss": 0.1708, "step": 32740 }, { "epoch": 0.6078574796805323, "grad_norm": 0.4881673753261566, "learning_rate": 6.675826419487526e-06, "loss": 0.2128, "step": 32742 }, { "epoch": 0.607894609817951, "grad_norm": 0.8026943206787109, "learning_rate": 6.6747262869822275e-06, "loss": 0.2166, "step": 32744 }, { "epoch": 0.6079317399553695, "grad_norm": 0.21508045494556427, "learning_rate": 6.673626199723944e-06, "loss": 0.2798, "step": 32746 }, { "epoch": 0.6079688700927882, "grad_norm": 0.28522974252700806, "learning_rate": 6.672526157727648e-06, "loss": 0.2709, "step": 32748 }, { "epoch": 0.6080060002302069, "grad_norm": 0.46932491660118103, "learning_rate": 6.6714261610083e-06, "loss": 0.2783, "step": 32750 }, { "epoch": 0.6080431303676255, "grad_norm": 0.4139155149459839, "learning_rate": 6.6703262095808766e-06, "loss": 0.369, "step": 32752 }, { "epoch": 0.6080802605050442, "grad_norm": 0.46052414178848267, "learning_rate": 6.669226303460335e-06, "loss": 0.3554, "step": 32754 }, { "epoch": 0.6081173906424627, "grad_norm": 0.3651711046695709, "learning_rate": 6.668126442661648e-06, "loss": 0.3992, "step": 32756 }, { "epoch": 0.6081545207798814, "grad_norm": 0.42071935534477234, "learning_rate": 6.6670266271997795e-06, "loss": 0.2138, "step": 32758 }, { "epoch": 0.6081916509173001, "grad_norm": 0.34669363498687744, "learning_rate": 6.665926857089698e-06, "loss": 0.1847, "step": 32760 }, { "epoch": 0.6082287810547187, "grad_norm": 0.6089686751365662, "learning_rate": 6.664827132346361e-06, "loss": 0.25, "step": 32762 }, { "epoch": 0.6082659111921374, "grad_norm": 0.3463898003101349, "learning_rate": 6.663727452984743e-06, "loss": 0.1514, "step": 32764 }, { "epoch": 0.6083030413295559, "grad_norm": 0.3123079836368561, "learning_rate": 6.662627819019796e-06, "loss": 0.1732, "step": 32766 }, { "epoch": 0.6083401714669746, "grad_norm": 0.33132031559944153, "learning_rate": 6.661528230466487e-06, "loss": 0.2335, "step": 32768 }, { "epoch": 0.6083773016043932, "grad_norm": 0.5688385963439941, "learning_rate": 6.6604286873397815e-06, "loss": 0.292, "step": 32770 }, { "epoch": 0.6084144317418119, "grad_norm": 0.3657228946685791, "learning_rate": 6.659329189654638e-06, "loss": 0.1794, "step": 32772 }, { "epoch": 0.6084515618792306, "grad_norm": 0.43812522292137146, "learning_rate": 6.658229737426016e-06, "loss": 0.3285, "step": 32774 }, { "epoch": 0.6084886920166491, "grad_norm": 0.3374134302139282, "learning_rate": 6.657130330668877e-06, "loss": 0.1921, "step": 32776 }, { "epoch": 0.6085258221540678, "grad_norm": 0.3188070058822632, "learning_rate": 6.656030969398187e-06, "loss": 0.2265, "step": 32778 }, { "epoch": 0.6085629522914864, "grad_norm": 0.4505956470966339, "learning_rate": 6.654931653628894e-06, "loss": 0.2337, "step": 32780 }, { "epoch": 0.6086000824289051, "grad_norm": 0.2641972005367279, "learning_rate": 6.653832383375964e-06, "loss": 0.2661, "step": 32782 }, { "epoch": 0.6086372125663237, "grad_norm": 0.3299517035484314, "learning_rate": 6.652733158654351e-06, "loss": 0.3367, "step": 32784 }, { "epoch": 0.6086743427037423, "grad_norm": 0.2928280830383301, "learning_rate": 6.651633979479013e-06, "loss": 0.2291, "step": 32786 }, { "epoch": 0.608711472841161, "grad_norm": 0.3623118996620178, "learning_rate": 6.650534845864906e-06, "loss": 0.4383, "step": 32788 }, { "epoch": 0.6087486029785796, "grad_norm": 0.5305338501930237, "learning_rate": 6.6494357578269915e-06, "loss": 0.3214, "step": 32790 }, { "epoch": 0.6087857331159983, "grad_norm": 0.27662909030914307, "learning_rate": 6.648336715380219e-06, "loss": 0.313, "step": 32792 }, { "epoch": 0.608822863253417, "grad_norm": 0.4266732633113861, "learning_rate": 6.647237718539541e-06, "loss": 0.4248, "step": 32794 }, { "epoch": 0.6088599933908355, "grad_norm": 0.31537142395973206, "learning_rate": 6.646138767319916e-06, "loss": 0.1635, "step": 32796 }, { "epoch": 0.6088971235282542, "grad_norm": 0.6782666444778442, "learning_rate": 6.6450398617362965e-06, "loss": 0.2337, "step": 32798 }, { "epoch": 0.6089342536656728, "grad_norm": 0.9137536883354187, "learning_rate": 6.643941001803638e-06, "loss": 0.3029, "step": 32800 }, { "epoch": 0.6089713838030915, "grad_norm": 0.4484010934829712, "learning_rate": 6.642842187536888e-06, "loss": 0.4011, "step": 32802 }, { "epoch": 0.6090085139405101, "grad_norm": 0.5610449314117432, "learning_rate": 6.641743418951001e-06, "loss": 0.3067, "step": 32804 }, { "epoch": 0.6090456440779287, "grad_norm": 0.34543943405151367, "learning_rate": 6.640644696060924e-06, "loss": 0.2898, "step": 32806 }, { "epoch": 0.6090827742153474, "grad_norm": 0.33914604783058167, "learning_rate": 6.639546018881611e-06, "loss": 0.3389, "step": 32808 }, { "epoch": 0.609119904352766, "grad_norm": 0.3454466760158539, "learning_rate": 6.638447387428011e-06, "loss": 0.3109, "step": 32810 }, { "epoch": 0.6091570344901847, "grad_norm": 0.5863184928894043, "learning_rate": 6.637348801715076e-06, "loss": 0.3368, "step": 32812 }, { "epoch": 0.6091941646276033, "grad_norm": 0.3228932321071625, "learning_rate": 6.636250261757751e-06, "loss": 0.2055, "step": 32814 }, { "epoch": 0.6092312947650219, "grad_norm": 0.4191569685935974, "learning_rate": 6.635151767570982e-06, "loss": 0.4926, "step": 32816 }, { "epoch": 0.6092684249024406, "grad_norm": 0.3733617067337036, "learning_rate": 6.634053319169719e-06, "loss": 0.2847, "step": 32818 }, { "epoch": 0.6093055550398592, "grad_norm": 0.37717553973197937, "learning_rate": 6.6329549165689065e-06, "loss": 0.2492, "step": 32820 }, { "epoch": 0.6093426851772779, "grad_norm": 0.47194042801856995, "learning_rate": 6.631856559783496e-06, "loss": 0.5036, "step": 32822 }, { "epoch": 0.6093798153146964, "grad_norm": 0.3301788568496704, "learning_rate": 6.6307582488284264e-06, "loss": 0.2047, "step": 32824 }, { "epoch": 0.6094169454521151, "grad_norm": 0.495137482881546, "learning_rate": 6.6296599837186435e-06, "loss": 0.2615, "step": 32826 }, { "epoch": 0.6094540755895338, "grad_norm": 0.7760566473007202, "learning_rate": 6.628561764469099e-06, "loss": 0.4259, "step": 32828 }, { "epoch": 0.6094912057269524, "grad_norm": 0.4803955852985382, "learning_rate": 6.627463591094725e-06, "loss": 0.2227, "step": 32830 }, { "epoch": 0.609528335864371, "grad_norm": 0.4168407618999481, "learning_rate": 6.626365463610469e-06, "loss": 0.1223, "step": 32832 }, { "epoch": 0.6095654660017896, "grad_norm": 0.13965439796447754, "learning_rate": 6.625267382031277e-06, "loss": 0.2154, "step": 32834 }, { "epoch": 0.6096025961392083, "grad_norm": 0.4420328736305237, "learning_rate": 6.624169346372086e-06, "loss": 0.2294, "step": 32836 }, { "epoch": 0.609639726276627, "grad_norm": 0.3596246838569641, "learning_rate": 6.623071356647836e-06, "loss": 0.2922, "step": 32838 }, { "epoch": 0.6096768564140456, "grad_norm": 0.401109904050827, "learning_rate": 6.621973412873477e-06, "loss": 0.3026, "step": 32840 }, { "epoch": 0.6097139865514642, "grad_norm": 0.36242595314979553, "learning_rate": 6.6208755150639355e-06, "loss": 0.1687, "step": 32842 }, { "epoch": 0.6097511166888828, "grad_norm": 0.5056469440460205, "learning_rate": 6.619777663234159e-06, "loss": 0.138, "step": 32844 }, { "epoch": 0.6097882468263015, "grad_norm": 0.2522508203983307, "learning_rate": 6.618679857399082e-06, "loss": 0.3664, "step": 32846 }, { "epoch": 0.6098253769637202, "grad_norm": 0.3038733899593353, "learning_rate": 6.617582097573643e-06, "loss": 0.334, "step": 32848 }, { "epoch": 0.6098625071011388, "grad_norm": 0.23065631091594696, "learning_rate": 6.61648438377278e-06, "loss": 0.2753, "step": 32850 }, { "epoch": 0.6098996372385574, "grad_norm": 0.3708191514015198, "learning_rate": 6.615386716011434e-06, "loss": 0.3781, "step": 32852 }, { "epoch": 0.609936767375976, "grad_norm": 0.2590278387069702, "learning_rate": 6.614289094304534e-06, "loss": 0.305, "step": 32854 }, { "epoch": 0.6099738975133947, "grad_norm": 0.586765468120575, "learning_rate": 6.613191518667019e-06, "loss": 0.2261, "step": 32856 }, { "epoch": 0.6100110276508134, "grad_norm": 0.3995712399482727, "learning_rate": 6.61209398911382e-06, "loss": 0.0987, "step": 32858 }, { "epoch": 0.610048157788232, "grad_norm": 0.31207749247550964, "learning_rate": 6.6109965056598745e-06, "loss": 0.2768, "step": 32860 }, { "epoch": 0.6100852879256506, "grad_norm": 0.4302317500114441, "learning_rate": 6.609899068320116e-06, "loss": 0.4211, "step": 32862 }, { "epoch": 0.6101224180630692, "grad_norm": 0.4288552403450012, "learning_rate": 6.6088016771094775e-06, "loss": 0.221, "step": 32864 }, { "epoch": 0.6101595482004879, "grad_norm": 0.44692501425743103, "learning_rate": 6.607704332042892e-06, "loss": 0.2812, "step": 32866 }, { "epoch": 0.6101966783379066, "grad_norm": 0.5933568477630615, "learning_rate": 6.606607033135288e-06, "loss": 0.2551, "step": 32868 }, { "epoch": 0.6102338084753252, "grad_norm": 0.24897730350494385, "learning_rate": 6.605509780401595e-06, "loss": 0.2425, "step": 32870 }, { "epoch": 0.6102709386127438, "grad_norm": 0.22982865571975708, "learning_rate": 6.604412573856749e-06, "loss": 0.5191, "step": 32872 }, { "epoch": 0.6103080687501624, "grad_norm": 0.3344128131866455, "learning_rate": 6.603315413515678e-06, "loss": 0.2552, "step": 32874 }, { "epoch": 0.6103451988875811, "grad_norm": 0.3237553834915161, "learning_rate": 6.602218299393306e-06, "loss": 0.2905, "step": 32876 }, { "epoch": 0.6103823290249997, "grad_norm": 0.2504872977733612, "learning_rate": 6.601121231504571e-06, "loss": 0.179, "step": 32878 }, { "epoch": 0.6104194591624184, "grad_norm": 0.5493221282958984, "learning_rate": 6.6000242098643916e-06, "loss": 0.4061, "step": 32880 }, { "epoch": 0.610456589299837, "grad_norm": 0.3034321665763855, "learning_rate": 6.598927234487698e-06, "loss": 0.2555, "step": 32882 }, { "epoch": 0.6104937194372556, "grad_norm": 0.3993164896965027, "learning_rate": 6.5978303053894165e-06, "loss": 0.2592, "step": 32884 }, { "epoch": 0.6105308495746743, "grad_norm": 0.4795207977294922, "learning_rate": 6.596733422584478e-06, "loss": 0.3893, "step": 32886 }, { "epoch": 0.6105679797120929, "grad_norm": 0.5287923812866211, "learning_rate": 6.5956365860878e-06, "loss": 0.2128, "step": 32888 }, { "epoch": 0.6106051098495116, "grad_norm": 0.34218212962150574, "learning_rate": 6.594539795914315e-06, "loss": 0.309, "step": 32890 }, { "epoch": 0.6106422399869302, "grad_norm": 0.4681243300437927, "learning_rate": 6.593443052078939e-06, "loss": 0.1562, "step": 32892 }, { "epoch": 0.6106793701243488, "grad_norm": 0.2978992462158203, "learning_rate": 6.592346354596599e-06, "loss": 0.2366, "step": 32894 }, { "epoch": 0.6107165002617675, "grad_norm": 0.2970438301563263, "learning_rate": 6.5912497034822185e-06, "loss": 0.2118, "step": 32896 }, { "epoch": 0.6107536303991861, "grad_norm": 0.49563807249069214, "learning_rate": 6.590153098750721e-06, "loss": 0.2844, "step": 32898 }, { "epoch": 0.6107907605366047, "grad_norm": 0.2288004755973816, "learning_rate": 6.5890565404170244e-06, "loss": 0.3342, "step": 32900 }, { "epoch": 0.6108278906740234, "grad_norm": 0.3173462748527527, "learning_rate": 6.587960028496051e-06, "loss": 0.1512, "step": 32902 }, { "epoch": 0.610865020811442, "grad_norm": 0.42793482542037964, "learning_rate": 6.586863563002725e-06, "loss": 0.3627, "step": 32904 }, { "epoch": 0.6109021509488607, "grad_norm": 0.5063623189926147, "learning_rate": 6.58576714395196e-06, "loss": 0.2035, "step": 32906 }, { "epoch": 0.6109392810862793, "grad_norm": 0.35794708132743835, "learning_rate": 6.5846707713586776e-06, "loss": 0.4158, "step": 32908 }, { "epoch": 0.610976411223698, "grad_norm": 0.2992331087589264, "learning_rate": 6.583574445237795e-06, "loss": 0.4699, "step": 32910 }, { "epoch": 0.6110135413611166, "grad_norm": 0.3020668923854828, "learning_rate": 6.582478165604229e-06, "loss": 0.2197, "step": 32912 }, { "epoch": 0.6110506714985352, "grad_norm": 0.4621583819389343, "learning_rate": 6.581381932472901e-06, "loss": 0.2897, "step": 32914 }, { "epoch": 0.6110878016359539, "grad_norm": 0.31691548228263855, "learning_rate": 6.580285745858728e-06, "loss": 0.2745, "step": 32916 }, { "epoch": 0.6111249317733725, "grad_norm": 0.34798476099967957, "learning_rate": 6.579189605776617e-06, "loss": 0.2279, "step": 32918 }, { "epoch": 0.6111620619107911, "grad_norm": 0.2948818504810333, "learning_rate": 6.578093512241492e-06, "loss": 0.1182, "step": 32920 }, { "epoch": 0.6111991920482097, "grad_norm": 0.41257402300834656, "learning_rate": 6.576997465268264e-06, "loss": 0.242, "step": 32922 }, { "epoch": 0.6112363221856284, "grad_norm": 0.4669298827648163, "learning_rate": 6.575901464871845e-06, "loss": 0.3101, "step": 32924 }, { "epoch": 0.6112734523230471, "grad_norm": 0.5174317359924316, "learning_rate": 6.574805511067153e-06, "loss": 0.2405, "step": 32926 }, { "epoch": 0.6113105824604657, "grad_norm": 0.34562817215919495, "learning_rate": 6.573709603869102e-06, "loss": 0.2381, "step": 32928 }, { "epoch": 0.6113477125978843, "grad_norm": 0.2984899878501892, "learning_rate": 6.572613743292597e-06, "loss": 0.3387, "step": 32930 }, { "epoch": 0.6113848427353029, "grad_norm": 0.5052001476287842, "learning_rate": 6.571517929352552e-06, "loss": 0.3186, "step": 32932 }, { "epoch": 0.6114219728727216, "grad_norm": 0.18774987757205963, "learning_rate": 6.5704221620638785e-06, "loss": 0.0648, "step": 32934 }, { "epoch": 0.6114591030101403, "grad_norm": 0.5624311566352844, "learning_rate": 6.569326441441486e-06, "loss": 0.4532, "step": 32936 }, { "epoch": 0.6114962331475589, "grad_norm": 0.1844814568758011, "learning_rate": 6.568230767500287e-06, "loss": 0.3266, "step": 32938 }, { "epoch": 0.6115333632849775, "grad_norm": 0.3739783465862274, "learning_rate": 6.567135140255185e-06, "loss": 0.2318, "step": 32940 }, { "epoch": 0.6115704934223961, "grad_norm": 0.2932315170764923, "learning_rate": 6.566039559721096e-06, "loss": 0.1094, "step": 32942 }, { "epoch": 0.6116076235598148, "grad_norm": 0.47917065024375916, "learning_rate": 6.564944025912918e-06, "loss": 0.2782, "step": 32944 }, { "epoch": 0.6116447536972335, "grad_norm": 0.46618887782096863, "learning_rate": 6.563848538845563e-06, "loss": 0.4315, "step": 32946 }, { "epoch": 0.611681883834652, "grad_norm": 0.36694973707199097, "learning_rate": 6.562753098533937e-06, "loss": 0.3107, "step": 32948 }, { "epoch": 0.6117190139720707, "grad_norm": 0.32681792974472046, "learning_rate": 6.561657704992947e-06, "loss": 0.2528, "step": 32950 }, { "epoch": 0.6117561441094893, "grad_norm": 0.25283411145210266, "learning_rate": 6.5605623582374966e-06, "loss": 0.3299, "step": 32952 }, { "epoch": 0.611793274246908, "grad_norm": 0.2517596483230591, "learning_rate": 6.559467058282492e-06, "loss": 0.2835, "step": 32954 }, { "epoch": 0.6118304043843267, "grad_norm": 0.2988564074039459, "learning_rate": 6.558371805142832e-06, "loss": 0.1676, "step": 32956 }, { "epoch": 0.6118675345217452, "grad_norm": 0.27291765809059143, "learning_rate": 6.557276598833422e-06, "loss": 0.1826, "step": 32958 }, { "epoch": 0.6119046646591639, "grad_norm": 0.47746115922927856, "learning_rate": 6.556181439369169e-06, "loss": 0.5474, "step": 32960 }, { "epoch": 0.6119417947965825, "grad_norm": 0.49516817927360535, "learning_rate": 6.555086326764968e-06, "loss": 0.1944, "step": 32962 }, { "epoch": 0.6119789249340012, "grad_norm": 0.33475977182388306, "learning_rate": 6.553991261035723e-06, "loss": 0.1576, "step": 32964 }, { "epoch": 0.6120160550714199, "grad_norm": 0.32170072197914124, "learning_rate": 6.552896242196341e-06, "loss": 0.2266, "step": 32966 }, { "epoch": 0.6120531852088384, "grad_norm": 0.5481547117233276, "learning_rate": 6.5518012702617106e-06, "loss": 0.2796, "step": 32968 }, { "epoch": 0.6120903153462571, "grad_norm": 0.42201921343803406, "learning_rate": 6.550706345246736e-06, "loss": 0.2333, "step": 32970 }, { "epoch": 0.6121274454836757, "grad_norm": 0.4481096565723419, "learning_rate": 6.549611467166318e-06, "loss": 0.341, "step": 32972 }, { "epoch": 0.6121645756210944, "grad_norm": 0.4292342960834503, "learning_rate": 6.548516636035352e-06, "loss": 0.2189, "step": 32974 }, { "epoch": 0.612201705758513, "grad_norm": 0.48943889141082764, "learning_rate": 6.547421851868735e-06, "loss": 0.4204, "step": 32976 }, { "epoch": 0.6122388358959316, "grad_norm": 0.780896008014679, "learning_rate": 6.546327114681369e-06, "loss": 0.2284, "step": 32978 }, { "epoch": 0.6122759660333503, "grad_norm": 0.39008629322052, "learning_rate": 6.545232424488143e-06, "loss": 0.3385, "step": 32980 }, { "epoch": 0.6123130961707689, "grad_norm": 0.3087436854839325, "learning_rate": 6.544137781303956e-06, "loss": 0.4305, "step": 32982 }, { "epoch": 0.6123502263081876, "grad_norm": 0.3258281648159027, "learning_rate": 6.543043185143702e-06, "loss": 0.2434, "step": 32984 }, { "epoch": 0.6123873564456062, "grad_norm": 0.30154532194137573, "learning_rate": 6.541948636022274e-06, "loss": 0.2862, "step": 32986 }, { "epoch": 0.6124244865830248, "grad_norm": 0.3135763704776764, "learning_rate": 6.540854133954569e-06, "loss": 0.3838, "step": 32988 }, { "epoch": 0.6124616167204435, "grad_norm": 0.28022530674934387, "learning_rate": 6.539759678955477e-06, "loss": 0.4314, "step": 32990 }, { "epoch": 0.6124987468578621, "grad_norm": 0.3986649513244629, "learning_rate": 6.538665271039892e-06, "loss": 0.1938, "step": 32992 }, { "epoch": 0.6125358769952808, "grad_norm": 0.3138667941093445, "learning_rate": 6.537570910222706e-06, "loss": 0.1722, "step": 32994 }, { "epoch": 0.6125730071326994, "grad_norm": 0.32664528489112854, "learning_rate": 6.536476596518806e-06, "loss": 0.3035, "step": 32996 }, { "epoch": 0.612610137270118, "grad_norm": 0.4007793068885803, "learning_rate": 6.5353823299430855e-06, "loss": 0.4964, "step": 32998 }, { "epoch": 0.6126472674075367, "grad_norm": 0.49423709511756897, "learning_rate": 6.534288110510434e-06, "loss": 0.2719, "step": 33000 }, { "epoch": 0.6126843975449553, "grad_norm": 0.33546826243400574, "learning_rate": 6.533193938235742e-06, "loss": 0.2384, "step": 33002 }, { "epoch": 0.612721527682374, "grad_norm": 0.44680553674697876, "learning_rate": 6.532099813133896e-06, "loss": 0.2929, "step": 33004 }, { "epoch": 0.6127586578197926, "grad_norm": 0.3409819006919861, "learning_rate": 6.531005735219784e-06, "loss": 0.2278, "step": 33006 }, { "epoch": 0.6127957879572112, "grad_norm": 0.8074988126754761, "learning_rate": 6.529911704508292e-06, "loss": 0.5943, "step": 33008 }, { "epoch": 0.6128329180946299, "grad_norm": 0.35389649868011475, "learning_rate": 6.528817721014306e-06, "loss": 0.2371, "step": 33010 }, { "epoch": 0.6128700482320485, "grad_norm": 0.23854900896549225, "learning_rate": 6.527723784752715e-06, "loss": 0.2657, "step": 33012 }, { "epoch": 0.6129071783694672, "grad_norm": 0.3040628731250763, "learning_rate": 6.5266298957384035e-06, "loss": 0.3001, "step": 33014 }, { "epoch": 0.6129443085068857, "grad_norm": 0.32979947328567505, "learning_rate": 6.525536053986257e-06, "loss": 0.378, "step": 33016 }, { "epoch": 0.6129814386443044, "grad_norm": 0.2979823350906372, "learning_rate": 6.524442259511154e-06, "loss": 0.3029, "step": 33018 }, { "epoch": 0.6130185687817231, "grad_norm": 0.37575721740722656, "learning_rate": 6.52334851232798e-06, "loss": 0.4912, "step": 33020 }, { "epoch": 0.6130556989191417, "grad_norm": 0.39125654101371765, "learning_rate": 6.52225481245162e-06, "loss": 0.3983, "step": 33022 }, { "epoch": 0.6130928290565604, "grad_norm": 0.3909904956817627, "learning_rate": 6.5211611598969585e-06, "loss": 0.2852, "step": 33024 }, { "epoch": 0.613129959193979, "grad_norm": 0.35755226016044617, "learning_rate": 6.520067554678871e-06, "loss": 0.0824, "step": 33026 }, { "epoch": 0.6131670893313976, "grad_norm": 0.38608142733573914, "learning_rate": 6.518973996812239e-06, "loss": 0.2652, "step": 33028 }, { "epoch": 0.6132042194688162, "grad_norm": 0.2963379919528961, "learning_rate": 6.517880486311948e-06, "loss": 0.2369, "step": 33030 }, { "epoch": 0.6132413496062349, "grad_norm": 0.6357234120368958, "learning_rate": 6.5167870231928695e-06, "loss": 0.2369, "step": 33032 }, { "epoch": 0.6132784797436536, "grad_norm": 0.48091399669647217, "learning_rate": 6.515693607469888e-06, "loss": 0.3405, "step": 33034 }, { "epoch": 0.6133156098810721, "grad_norm": 0.583322286605835, "learning_rate": 6.51460023915788e-06, "loss": 0.3737, "step": 33036 }, { "epoch": 0.6133527400184908, "grad_norm": 0.4173484444618225, "learning_rate": 6.513506918271722e-06, "loss": 0.5614, "step": 33038 }, { "epoch": 0.6133898701559094, "grad_norm": 0.4138701856136322, "learning_rate": 6.51241364482629e-06, "loss": 0.1895, "step": 33040 }, { "epoch": 0.6134270002933281, "grad_norm": 0.3064877986907959, "learning_rate": 6.511320418836466e-06, "loss": 0.2161, "step": 33042 }, { "epoch": 0.6134641304307468, "grad_norm": 0.28886619210243225, "learning_rate": 6.510227240317118e-06, "loss": 0.2918, "step": 33044 }, { "epoch": 0.6135012605681653, "grad_norm": 0.5084324479103088, "learning_rate": 6.509134109283126e-06, "loss": 0.5349, "step": 33046 }, { "epoch": 0.613538390705584, "grad_norm": 0.6784845590591431, "learning_rate": 6.508041025749361e-06, "loss": 0.3064, "step": 33048 }, { "epoch": 0.6135755208430026, "grad_norm": 0.36370372772216797, "learning_rate": 6.506947989730696e-06, "loss": 0.3529, "step": 33050 }, { "epoch": 0.6136126509804213, "grad_norm": 0.3345828056335449, "learning_rate": 6.505855001242009e-06, "loss": 0.2032, "step": 33052 }, { "epoch": 0.61364978111784, "grad_norm": 0.5631812810897827, "learning_rate": 6.504762060298171e-06, "loss": 0.3461, "step": 33054 }, { "epoch": 0.6136869112552585, "grad_norm": 0.3804876506328583, "learning_rate": 6.503669166914049e-06, "loss": 0.1982, "step": 33056 }, { "epoch": 0.6137240413926772, "grad_norm": 0.7347245812416077, "learning_rate": 6.502576321104519e-06, "loss": 0.2427, "step": 33058 }, { "epoch": 0.6137611715300958, "grad_norm": 0.33977210521698, "learning_rate": 6.501483522884446e-06, "loss": 0.4583, "step": 33060 }, { "epoch": 0.6137983016675145, "grad_norm": 0.3442073464393616, "learning_rate": 6.500390772268703e-06, "loss": 0.2786, "step": 33062 }, { "epoch": 0.6138354318049332, "grad_norm": 0.5103311538696289, "learning_rate": 6.49929806927216e-06, "loss": 0.3866, "step": 33064 }, { "epoch": 0.6138725619423517, "grad_norm": 0.49007707834243774, "learning_rate": 6.498205413909686e-06, "loss": 0.1851, "step": 33066 }, { "epoch": 0.6139096920797704, "grad_norm": 0.5131375193595886, "learning_rate": 6.497112806196151e-06, "loss": 0.4461, "step": 33068 }, { "epoch": 0.613946822217189, "grad_norm": 2.0458126068115234, "learning_rate": 6.496020246146413e-06, "loss": 0.1861, "step": 33070 }, { "epoch": 0.6139839523546077, "grad_norm": 0.18598277866840363, "learning_rate": 6.494927733775343e-06, "loss": 0.1727, "step": 33072 }, { "epoch": 0.6140210824920262, "grad_norm": 0.5223251581192017, "learning_rate": 6.493835269097808e-06, "loss": 0.2294, "step": 33074 }, { "epoch": 0.6140582126294449, "grad_norm": 0.31790274381637573, "learning_rate": 6.492742852128675e-06, "loss": 0.3094, "step": 33076 }, { "epoch": 0.6140953427668636, "grad_norm": 0.29239508509635925, "learning_rate": 6.4916504828828055e-06, "loss": 0.1357, "step": 33078 }, { "epoch": 0.6141324729042822, "grad_norm": 0.40649569034576416, "learning_rate": 6.4905581613750665e-06, "loss": 0.2772, "step": 33080 }, { "epoch": 0.6141696030417009, "grad_norm": 0.3389107286930084, "learning_rate": 6.489465887620315e-06, "loss": 0.3091, "step": 33082 }, { "epoch": 0.6142067331791194, "grad_norm": 0.524733304977417, "learning_rate": 6.4883736616334185e-06, "loss": 0.3464, "step": 33084 }, { "epoch": 0.6142438633165381, "grad_norm": 0.4763987064361572, "learning_rate": 6.487281483429238e-06, "loss": 0.1915, "step": 33086 }, { "epoch": 0.6142809934539568, "grad_norm": 0.3909938335418701, "learning_rate": 6.486189353022636e-06, "loss": 0.3777, "step": 33088 }, { "epoch": 0.6143181235913754, "grad_norm": 0.44554078578948975, "learning_rate": 6.4850972704284695e-06, "loss": 0.229, "step": 33090 }, { "epoch": 0.6143552537287941, "grad_norm": 0.36556869745254517, "learning_rate": 6.484005235661605e-06, "loss": 0.2317, "step": 33092 }, { "epoch": 0.6143923838662126, "grad_norm": 0.4382587969303131, "learning_rate": 6.482913248736895e-06, "loss": 0.2382, "step": 33094 }, { "epoch": 0.6144295140036313, "grad_norm": 0.31412291526794434, "learning_rate": 6.481821309669199e-06, "loss": 0.1976, "step": 33096 }, { "epoch": 0.61446664414105, "grad_norm": 0.2784813344478607, "learning_rate": 6.480729418473379e-06, "loss": 0.1606, "step": 33098 }, { "epoch": 0.6145037742784686, "grad_norm": 0.3418746590614319, "learning_rate": 6.479637575164289e-06, "loss": 0.2878, "step": 33100 }, { "epoch": 0.6145409044158873, "grad_norm": 0.3554566204547882, "learning_rate": 6.478545779756787e-06, "loss": 0.3583, "step": 33102 }, { "epoch": 0.6145780345533058, "grad_norm": 0.4605807065963745, "learning_rate": 6.477454032265733e-06, "loss": 0.4636, "step": 33104 }, { "epoch": 0.6146151646907245, "grad_norm": 0.38364318013191223, "learning_rate": 6.4763623327059745e-06, "loss": 0.2091, "step": 33106 }, { "epoch": 0.6146522948281432, "grad_norm": 0.19605998694896698, "learning_rate": 6.475270681092369e-06, "loss": 0.251, "step": 33108 }, { "epoch": 0.6146894249655618, "grad_norm": 0.3493518531322479, "learning_rate": 6.474179077439774e-06, "loss": 0.1905, "step": 33110 }, { "epoch": 0.6147265551029805, "grad_norm": 0.3489682674407959, "learning_rate": 6.473087521763039e-06, "loss": 0.2315, "step": 33112 }, { "epoch": 0.614763685240399, "grad_norm": 0.27524271607398987, "learning_rate": 6.4719960140770175e-06, "loss": 0.3106, "step": 33114 }, { "epoch": 0.6148008153778177, "grad_norm": 0.45186784863471985, "learning_rate": 6.470904554396564e-06, "loss": 0.2757, "step": 33116 }, { "epoch": 0.6148379455152364, "grad_norm": 0.459164559841156, "learning_rate": 6.469813142736533e-06, "loss": 0.2602, "step": 33118 }, { "epoch": 0.614875075652655, "grad_norm": 0.28344520926475525, "learning_rate": 6.4687217791117685e-06, "loss": 0.1638, "step": 33120 }, { "epoch": 0.6149122057900737, "grad_norm": 0.40025684237480164, "learning_rate": 6.467630463537121e-06, "loss": 0.374, "step": 33122 }, { "epoch": 0.6149493359274922, "grad_norm": 0.4474346935749054, "learning_rate": 6.4665391960274415e-06, "loss": 0.3748, "step": 33124 }, { "epoch": 0.6149864660649109, "grad_norm": 0.27468186616897583, "learning_rate": 6.465447976597581e-06, "loss": 0.2126, "step": 33126 }, { "epoch": 0.6150235962023295, "grad_norm": 0.4217972457408905, "learning_rate": 6.464356805262388e-06, "loss": 0.3537, "step": 33128 }, { "epoch": 0.6150607263397482, "grad_norm": 0.4222817122936249, "learning_rate": 6.463265682036708e-06, "loss": 0.1881, "step": 33130 }, { "epoch": 0.6150978564771669, "grad_norm": 0.3665860891342163, "learning_rate": 6.462174606935387e-06, "loss": 0.3712, "step": 33132 }, { "epoch": 0.6151349866145854, "grad_norm": 0.3084348440170288, "learning_rate": 6.461083579973273e-06, "loss": 0.311, "step": 33134 }, { "epoch": 0.6151721167520041, "grad_norm": 0.5085157155990601, "learning_rate": 6.45999260116521e-06, "loss": 0.429, "step": 33136 }, { "epoch": 0.6152092468894227, "grad_norm": 0.47789356112480164, "learning_rate": 6.458901670526044e-06, "loss": 0.263, "step": 33138 }, { "epoch": 0.6152463770268414, "grad_norm": 0.39849966764450073, "learning_rate": 6.457810788070622e-06, "loss": 0.3208, "step": 33140 }, { "epoch": 0.6152835071642601, "grad_norm": 0.3974080979824066, "learning_rate": 6.456719953813786e-06, "loss": 0.336, "step": 33142 }, { "epoch": 0.6153206373016786, "grad_norm": 0.4094145894050598, "learning_rate": 6.4556291677703766e-06, "loss": 0.1664, "step": 33144 }, { "epoch": 0.6153577674390973, "grad_norm": 0.6352247595787048, "learning_rate": 6.454538429955235e-06, "loss": 0.4701, "step": 33146 }, { "epoch": 0.6153948975765159, "grad_norm": 0.3249083161354065, "learning_rate": 6.453447740383208e-06, "loss": 0.2302, "step": 33148 }, { "epoch": 0.6154320277139346, "grad_norm": 0.3730751574039459, "learning_rate": 6.452357099069131e-06, "loss": 0.2201, "step": 33150 }, { "epoch": 0.6154691578513533, "grad_norm": 0.6651400923728943, "learning_rate": 6.451266506027851e-06, "loss": 0.2801, "step": 33152 }, { "epoch": 0.6155062879887718, "grad_norm": 0.49591532349586487, "learning_rate": 6.450175961274203e-06, "loss": 0.3879, "step": 33154 }, { "epoch": 0.6155434181261905, "grad_norm": 0.325879842042923, "learning_rate": 6.44908546482303e-06, "loss": 0.253, "step": 33156 }, { "epoch": 0.6155805482636091, "grad_norm": 0.34087178111076355, "learning_rate": 6.4479950166891634e-06, "loss": 0.2821, "step": 33158 }, { "epoch": 0.6156176784010278, "grad_norm": 0.40923139452934265, "learning_rate": 6.4469046168874464e-06, "loss": 0.2897, "step": 33160 }, { "epoch": 0.6156548085384465, "grad_norm": 0.3220416009426117, "learning_rate": 6.445814265432716e-06, "loss": 0.322, "step": 33162 }, { "epoch": 0.615691938675865, "grad_norm": 0.24226497113704681, "learning_rate": 6.444723962339805e-06, "loss": 0.1943, "step": 33164 }, { "epoch": 0.6157290688132837, "grad_norm": 0.3367232382297516, "learning_rate": 6.443633707623553e-06, "loss": 0.308, "step": 33166 }, { "epoch": 0.6157661989507023, "grad_norm": 0.3824969530105591, "learning_rate": 6.442543501298797e-06, "loss": 0.3017, "step": 33168 }, { "epoch": 0.615803329088121, "grad_norm": 0.36098381876945496, "learning_rate": 6.441453343380364e-06, "loss": 0.3882, "step": 33170 }, { "epoch": 0.6158404592255395, "grad_norm": 0.44608891010284424, "learning_rate": 6.440363233883091e-06, "loss": 0.4947, "step": 33172 }, { "epoch": 0.6158775893629582, "grad_norm": 0.3605786859989166, "learning_rate": 6.439273172821815e-06, "loss": 0.317, "step": 33174 }, { "epoch": 0.6159147195003769, "grad_norm": 0.454386830329895, "learning_rate": 6.438183160211363e-06, "loss": 0.2987, "step": 33176 }, { "epoch": 0.6159518496377955, "grad_norm": 0.3336338400840759, "learning_rate": 6.437093196066571e-06, "loss": 0.1687, "step": 33178 }, { "epoch": 0.6159889797752142, "grad_norm": 0.44107571244239807, "learning_rate": 6.43600328040227e-06, "loss": 0.2225, "step": 33180 }, { "epoch": 0.6160261099126327, "grad_norm": 0.4113306701183319, "learning_rate": 6.434913413233286e-06, "loss": 0.2094, "step": 33182 }, { "epoch": 0.6160632400500514, "grad_norm": 0.4570377469062805, "learning_rate": 6.433823594574454e-06, "loss": 0.0955, "step": 33184 }, { "epoch": 0.6161003701874701, "grad_norm": 0.4180873930454254, "learning_rate": 6.432733824440599e-06, "loss": 0.3208, "step": 33186 }, { "epoch": 0.6161375003248887, "grad_norm": 0.3868831396102905, "learning_rate": 6.4316441028465515e-06, "loss": 0.198, "step": 33188 }, { "epoch": 0.6161746304623074, "grad_norm": 0.25433504581451416, "learning_rate": 6.430554429807139e-06, "loss": 0.3004, "step": 33190 }, { "epoch": 0.6162117605997259, "grad_norm": 0.2533932030200958, "learning_rate": 6.429464805337191e-06, "loss": 0.4459, "step": 33192 }, { "epoch": 0.6162488907371446, "grad_norm": 0.298885315656662, "learning_rate": 6.428375229451532e-06, "loss": 0.2569, "step": 33194 }, { "epoch": 0.6162860208745633, "grad_norm": 0.27680450677871704, "learning_rate": 6.427285702164988e-06, "loss": 0.3591, "step": 33196 }, { "epoch": 0.6163231510119819, "grad_norm": 0.268452912569046, "learning_rate": 6.426196223492383e-06, "loss": 0.4684, "step": 33198 }, { "epoch": 0.6163602811494006, "grad_norm": 0.46640223264694214, "learning_rate": 6.425106793448541e-06, "loss": 0.4145, "step": 33200 }, { "epoch": 0.6163974112868191, "grad_norm": 0.468563973903656, "learning_rate": 6.4240174120482875e-06, "loss": 0.3722, "step": 33202 }, { "epoch": 0.6164345414242378, "grad_norm": 0.2872581481933594, "learning_rate": 6.422928079306449e-06, "loss": 0.2299, "step": 33204 }, { "epoch": 0.6164716715616565, "grad_norm": 0.5119061470031738, "learning_rate": 6.4218387952378455e-06, "loss": 0.2804, "step": 33206 }, { "epoch": 0.6165088016990751, "grad_norm": 0.41904041171073914, "learning_rate": 6.420749559857296e-06, "loss": 0.2831, "step": 33208 }, { "epoch": 0.6165459318364938, "grad_norm": 0.389636367559433, "learning_rate": 6.419660373179622e-06, "loss": 0.2257, "step": 33210 }, { "epoch": 0.6165830619739123, "grad_norm": 0.38800057768821716, "learning_rate": 6.4185712352196464e-06, "loss": 0.1826, "step": 33212 }, { "epoch": 0.616620192111331, "grad_norm": 0.3436047434806824, "learning_rate": 6.417482145992191e-06, "loss": 0.2883, "step": 33214 }, { "epoch": 0.6166573222487497, "grad_norm": 0.3250725269317627, "learning_rate": 6.416393105512071e-06, "loss": 0.2469, "step": 33216 }, { "epoch": 0.6166944523861683, "grad_norm": 0.451680451631546, "learning_rate": 6.41530411379411e-06, "loss": 0.1499, "step": 33218 }, { "epoch": 0.616731582523587, "grad_norm": 0.3136092722415924, "learning_rate": 6.414215170853119e-06, "loss": 0.2315, "step": 33220 }, { "epoch": 0.6167687126610055, "grad_norm": 0.6403464078903198, "learning_rate": 6.413126276703918e-06, "loss": 0.2244, "step": 33222 }, { "epoch": 0.6168058427984242, "grad_norm": 0.26836350560188293, "learning_rate": 6.412037431361326e-06, "loss": 0.3053, "step": 33224 }, { "epoch": 0.6168429729358428, "grad_norm": 0.41522327065467834, "learning_rate": 6.410948634840158e-06, "loss": 0.4316, "step": 33226 }, { "epoch": 0.6168801030732615, "grad_norm": 0.30513033270835876, "learning_rate": 6.409859887155227e-06, "loss": 0.2539, "step": 33228 }, { "epoch": 0.6169172332106801, "grad_norm": 0.34272241592407227, "learning_rate": 6.4087711883213495e-06, "loss": 0.1907, "step": 33230 }, { "epoch": 0.6169543633480987, "grad_norm": 0.30178695917129517, "learning_rate": 6.407682538353344e-06, "loss": 0.2186, "step": 33232 }, { "epoch": 0.6169914934855174, "grad_norm": 0.29983285069465637, "learning_rate": 6.406593937266013e-06, "loss": 0.2296, "step": 33234 }, { "epoch": 0.617028623622936, "grad_norm": 0.41090255975723267, "learning_rate": 6.405505385074179e-06, "loss": 0.4401, "step": 33236 }, { "epoch": 0.6170657537603547, "grad_norm": 0.32922253012657166, "learning_rate": 6.404416881792646e-06, "loss": 0.3256, "step": 33238 }, { "epoch": 0.6171028838977733, "grad_norm": 0.2647010087966919, "learning_rate": 6.4033284274362305e-06, "loss": 0.3941, "step": 33240 }, { "epoch": 0.6171400140351919, "grad_norm": 0.5008987784385681, "learning_rate": 6.402240022019741e-06, "loss": 0.297, "step": 33242 }, { "epoch": 0.6171771441726106, "grad_norm": 0.4774961471557617, "learning_rate": 6.401151665557994e-06, "loss": 0.2839, "step": 33244 }, { "epoch": 0.6172142743100292, "grad_norm": 0.2755691409111023, "learning_rate": 6.400063358065787e-06, "loss": 0.2683, "step": 33246 }, { "epoch": 0.6172514044474479, "grad_norm": 0.909270167350769, "learning_rate": 6.398975099557938e-06, "loss": 0.422, "step": 33248 }, { "epoch": 0.6172885345848665, "grad_norm": 0.2864827513694763, "learning_rate": 6.397886890049249e-06, "loss": 0.1347, "step": 33250 }, { "epoch": 0.6173256647222851, "grad_norm": 0.5350677371025085, "learning_rate": 6.39679872955453e-06, "loss": 0.2044, "step": 33252 }, { "epoch": 0.6173627948597038, "grad_norm": 0.7250619530677795, "learning_rate": 6.395710618088587e-06, "loss": 0.3853, "step": 33254 }, { "epoch": 0.6173999249971224, "grad_norm": 0.6137080192565918, "learning_rate": 6.39462255566623e-06, "loss": 0.3967, "step": 33256 }, { "epoch": 0.6174370551345411, "grad_norm": 0.41511866450309753, "learning_rate": 6.393534542302258e-06, "loss": 0.4025, "step": 33258 }, { "epoch": 0.6174741852719597, "grad_norm": 0.7183419466018677, "learning_rate": 6.39244657801148e-06, "loss": 0.3129, "step": 33260 }, { "epoch": 0.6175113154093783, "grad_norm": 0.4176501929759979, "learning_rate": 6.391358662808695e-06, "loss": 0.137, "step": 33262 }, { "epoch": 0.617548445546797, "grad_norm": 0.39454394578933716, "learning_rate": 6.390270796708711e-06, "loss": 0.2419, "step": 33264 }, { "epoch": 0.6175855756842156, "grad_norm": 0.28319844603538513, "learning_rate": 6.389182979726331e-06, "loss": 0.1551, "step": 33266 }, { "epoch": 0.6176227058216343, "grad_norm": 0.35768696665763855, "learning_rate": 6.3880952118763525e-06, "loss": 0.3303, "step": 33268 }, { "epoch": 0.6176598359590529, "grad_norm": 0.3630194067955017, "learning_rate": 6.3870074931735824e-06, "loss": 0.2665, "step": 33270 }, { "epoch": 0.6176969660964715, "grad_norm": 0.3873700797557831, "learning_rate": 6.385919823632815e-06, "loss": 0.2194, "step": 33272 }, { "epoch": 0.6177340962338902, "grad_norm": 0.45491498708724976, "learning_rate": 6.384832203268854e-06, "loss": 0.2585, "step": 33274 }, { "epoch": 0.6177712263713088, "grad_norm": 0.35012829303741455, "learning_rate": 6.383744632096495e-06, "loss": 0.379, "step": 33276 }, { "epoch": 0.6178083565087275, "grad_norm": 0.3113013505935669, "learning_rate": 6.382657110130546e-06, "loss": 0.4105, "step": 33278 }, { "epoch": 0.617845486646146, "grad_norm": 0.4309644103050232, "learning_rate": 6.381569637385794e-06, "loss": 0.2201, "step": 33280 }, { "epoch": 0.6178826167835647, "grad_norm": 0.3338206112384796, "learning_rate": 6.380482213877045e-06, "loss": 0.3196, "step": 33282 }, { "epoch": 0.6179197469209834, "grad_norm": 0.4329235255718231, "learning_rate": 6.379394839619088e-06, "loss": 0.1962, "step": 33284 }, { "epoch": 0.617956877058402, "grad_norm": 0.3053397536277771, "learning_rate": 6.378307514626722e-06, "loss": 0.222, "step": 33286 }, { "epoch": 0.6179940071958206, "grad_norm": 0.4653834104537964, "learning_rate": 6.377220238914741e-06, "loss": 0.274, "step": 33288 }, { "epoch": 0.6180311373332392, "grad_norm": 0.3061346411705017, "learning_rate": 6.376133012497945e-06, "loss": 0.1059, "step": 33290 }, { "epoch": 0.6180682674706579, "grad_norm": 0.39407241344451904, "learning_rate": 6.375045835391121e-06, "loss": 0.3046, "step": 33292 }, { "epoch": 0.6181053976080766, "grad_norm": 0.20992477238178253, "learning_rate": 6.373958707609069e-06, "loss": 0.2434, "step": 33294 }, { "epoch": 0.6181425277454952, "grad_norm": 0.37773194909095764, "learning_rate": 6.372871629166575e-06, "loss": 0.4344, "step": 33296 }, { "epoch": 0.6181796578829138, "grad_norm": 0.34386521577835083, "learning_rate": 6.371784600078433e-06, "loss": 0.3349, "step": 33298 }, { "epoch": 0.6182167880203324, "grad_norm": 0.1684887409210205, "learning_rate": 6.370697620359436e-06, "loss": 0.1522, "step": 33300 }, { "epoch": 0.6182539181577511, "grad_norm": 0.43745386600494385, "learning_rate": 6.369610690024373e-06, "loss": 0.3218, "step": 33302 }, { "epoch": 0.6182910482951698, "grad_norm": 0.5287793874740601, "learning_rate": 6.368523809088034e-06, "loss": 0.1382, "step": 33304 }, { "epoch": 0.6183281784325884, "grad_norm": 0.35927218198776245, "learning_rate": 6.3674369775652115e-06, "loss": 0.4028, "step": 33306 }, { "epoch": 0.618365308570007, "grad_norm": 0.4231165945529938, "learning_rate": 6.366350195470687e-06, "loss": 0.4429, "step": 33308 }, { "epoch": 0.6184024387074256, "grad_norm": 0.39693304896354675, "learning_rate": 6.3652634628192525e-06, "loss": 0.2971, "step": 33310 }, { "epoch": 0.6184395688448443, "grad_norm": 0.49616900086402893, "learning_rate": 6.364176779625697e-06, "loss": 0.2773, "step": 33312 }, { "epoch": 0.618476698982263, "grad_norm": 0.36219197511672974, "learning_rate": 6.3630901459048025e-06, "loss": 0.1197, "step": 33314 }, { "epoch": 0.6185138291196816, "grad_norm": 0.4326838552951813, "learning_rate": 6.362003561671358e-06, "loss": 0.2995, "step": 33316 }, { "epoch": 0.6185509592571002, "grad_norm": 0.25405293703079224, "learning_rate": 6.360917026940147e-06, "loss": 0.2371, "step": 33318 }, { "epoch": 0.6185880893945188, "grad_norm": 0.30564671754837036, "learning_rate": 6.35983054172596e-06, "loss": 0.1064, "step": 33320 }, { "epoch": 0.6186252195319375, "grad_norm": 0.3000064194202423, "learning_rate": 6.358744106043574e-06, "loss": 0.3431, "step": 33322 }, { "epoch": 0.6186623496693561, "grad_norm": 0.423822820186615, "learning_rate": 6.357657719907772e-06, "loss": 0.3845, "step": 33324 }, { "epoch": 0.6186994798067748, "grad_norm": 0.3667752146720886, "learning_rate": 6.356571383333337e-06, "loss": 0.3177, "step": 33326 }, { "epoch": 0.6187366099441934, "grad_norm": 0.2843489944934845, "learning_rate": 6.355485096335052e-06, "loss": 0.251, "step": 33328 }, { "epoch": 0.618773740081612, "grad_norm": 0.8024854063987732, "learning_rate": 6.354398858927701e-06, "loss": 0.4505, "step": 33330 }, { "epoch": 0.6188108702190307, "grad_norm": 0.31695571541786194, "learning_rate": 6.353312671126063e-06, "loss": 0.1438, "step": 33332 }, { "epoch": 0.6188480003564493, "grad_norm": 0.32730311155319214, "learning_rate": 6.352226532944915e-06, "loss": 0.2741, "step": 33334 }, { "epoch": 0.618885130493868, "grad_norm": 0.40460073947906494, "learning_rate": 6.351140444399035e-06, "loss": 0.3114, "step": 33336 }, { "epoch": 0.6189222606312866, "grad_norm": 0.5215739607810974, "learning_rate": 6.350054405503205e-06, "loss": 0.3624, "step": 33338 }, { "epoch": 0.6189593907687052, "grad_norm": 0.4731350243091583, "learning_rate": 6.348968416272202e-06, "loss": 0.4386, "step": 33340 }, { "epoch": 0.6189965209061239, "grad_norm": 0.25077199935913086, "learning_rate": 6.347882476720803e-06, "loss": 0.1345, "step": 33342 }, { "epoch": 0.6190336510435425, "grad_norm": 0.3972725570201874, "learning_rate": 6.346796586863788e-06, "loss": 0.3556, "step": 33344 }, { "epoch": 0.6190707811809611, "grad_norm": 0.27672943472862244, "learning_rate": 6.345710746715925e-06, "loss": 0.3759, "step": 33346 }, { "epoch": 0.6191079113183798, "grad_norm": 0.3596516251564026, "learning_rate": 6.344624956291991e-06, "loss": 0.4195, "step": 33348 }, { "epoch": 0.6191450414557984, "grad_norm": 0.457146555185318, "learning_rate": 6.343539215606764e-06, "loss": 0.1052, "step": 33350 }, { "epoch": 0.6191821715932171, "grad_norm": 0.3335888087749481, "learning_rate": 6.342453524675016e-06, "loss": 0.1799, "step": 33352 }, { "epoch": 0.6192193017306357, "grad_norm": 0.3823656737804413, "learning_rate": 6.34136788351152e-06, "loss": 0.2373, "step": 33354 }, { "epoch": 0.6192564318680543, "grad_norm": 0.3855670094490051, "learning_rate": 6.340282292131048e-06, "loss": 0.1443, "step": 33356 }, { "epoch": 0.619293562005473, "grad_norm": 0.437018483877182, "learning_rate": 6.3391967505483756e-06, "loss": 0.3183, "step": 33358 }, { "epoch": 0.6193306921428916, "grad_norm": 0.3893338739871979, "learning_rate": 6.338111258778264e-06, "loss": 0.137, "step": 33360 }, { "epoch": 0.6193678222803103, "grad_norm": 0.30368420481681824, "learning_rate": 6.337025816835491e-06, "loss": 0.1675, "step": 33362 }, { "epoch": 0.6194049524177289, "grad_norm": 0.3592333197593689, "learning_rate": 6.335940424734828e-06, "loss": 0.4007, "step": 33364 }, { "epoch": 0.6194420825551475, "grad_norm": 0.21633483469486237, "learning_rate": 6.334855082491037e-06, "loss": 0.2542, "step": 33366 }, { "epoch": 0.6194792126925662, "grad_norm": 0.5048404335975647, "learning_rate": 6.333769790118891e-06, "loss": 0.1114, "step": 33368 }, { "epoch": 0.6195163428299848, "grad_norm": 0.39145511388778687, "learning_rate": 6.33268454763316e-06, "loss": 0.1339, "step": 33370 }, { "epoch": 0.6195534729674035, "grad_norm": 0.4640060067176819, "learning_rate": 6.3315993550486036e-06, "loss": 0.3211, "step": 33372 }, { "epoch": 0.6195906031048221, "grad_norm": 0.3121573030948639, "learning_rate": 6.3305142123799935e-06, "loss": 0.1447, "step": 33374 }, { "epoch": 0.6196277332422407, "grad_norm": 0.43064436316490173, "learning_rate": 6.329429119642092e-06, "loss": 0.3384, "step": 33376 }, { "epoch": 0.6196648633796593, "grad_norm": 0.3839457631111145, "learning_rate": 6.328344076849665e-06, "loss": 0.3374, "step": 33378 }, { "epoch": 0.619701993517078, "grad_norm": 0.5601218342781067, "learning_rate": 6.327259084017478e-06, "loss": 0.4757, "step": 33380 }, { "epoch": 0.6197391236544967, "grad_norm": 0.31021183729171753, "learning_rate": 6.326174141160297e-06, "loss": 0.2898, "step": 33382 }, { "epoch": 0.6197762537919153, "grad_norm": 0.4099505543708801, "learning_rate": 6.325089248292878e-06, "loss": 0.3153, "step": 33384 }, { "epoch": 0.6198133839293339, "grad_norm": 0.47506025433540344, "learning_rate": 6.324004405429988e-06, "loss": 0.1783, "step": 33386 }, { "epoch": 0.6198505140667525, "grad_norm": 0.6071589589118958, "learning_rate": 6.322919612586387e-06, "loss": 0.2586, "step": 33388 }, { "epoch": 0.6198876442041712, "grad_norm": 0.4597158133983612, "learning_rate": 6.321834869776835e-06, "loss": 0.3362, "step": 33390 }, { "epoch": 0.6199247743415899, "grad_norm": 0.5954412221908569, "learning_rate": 6.320750177016092e-06, "loss": 0.242, "step": 33392 }, { "epoch": 0.6199619044790085, "grad_norm": 0.3859167993068695, "learning_rate": 6.3196655343189235e-06, "loss": 0.2448, "step": 33394 }, { "epoch": 0.6199990346164271, "grad_norm": 0.38169625401496887, "learning_rate": 6.318580941700079e-06, "loss": 0.2692, "step": 33396 }, { "epoch": 0.6200361647538457, "grad_norm": 0.4628596901893616, "learning_rate": 6.317496399174322e-06, "loss": 0.2511, "step": 33398 }, { "epoch": 0.6200732948912644, "grad_norm": 0.37985485792160034, "learning_rate": 6.316411906756408e-06, "loss": 0.1323, "step": 33400 }, { "epoch": 0.6201104250286831, "grad_norm": 0.3160107135772705, "learning_rate": 6.315327464461094e-06, "loss": 0.3513, "step": 33402 }, { "epoch": 0.6201475551661016, "grad_norm": 0.9894459843635559, "learning_rate": 6.314243072303137e-06, "loss": 0.3699, "step": 33404 }, { "epoch": 0.6201846853035203, "grad_norm": 0.5605846047401428, "learning_rate": 6.313158730297291e-06, "loss": 0.3255, "step": 33406 }, { "epoch": 0.6202218154409389, "grad_norm": 0.7524283528327942, "learning_rate": 6.312074438458316e-06, "loss": 0.4171, "step": 33408 }, { "epoch": 0.6202589455783576, "grad_norm": 0.40950241684913635, "learning_rate": 6.310990196800955e-06, "loss": 0.2222, "step": 33410 }, { "epoch": 0.6202960757157763, "grad_norm": 0.34139859676361084, "learning_rate": 6.3099060053399685e-06, "loss": 0.1857, "step": 33412 }, { "epoch": 0.6203332058531948, "grad_norm": 0.3612729012966156, "learning_rate": 6.308821864090109e-06, "loss": 0.3563, "step": 33414 }, { "epoch": 0.6203703359906135, "grad_norm": 0.28802332282066345, "learning_rate": 6.307737773066129e-06, "loss": 0.3761, "step": 33416 }, { "epoch": 0.6204074661280321, "grad_norm": 0.45993295311927795, "learning_rate": 6.306653732282776e-06, "loss": 0.2985, "step": 33418 }, { "epoch": 0.6204445962654508, "grad_norm": 0.3681545853614807, "learning_rate": 6.305569741754807e-06, "loss": 0.2873, "step": 33420 }, { "epoch": 0.6204817264028695, "grad_norm": 0.28756701946258545, "learning_rate": 6.304485801496964e-06, "loss": 0.4095, "step": 33422 }, { "epoch": 0.620518856540288, "grad_norm": 0.24331222474575043, "learning_rate": 6.3034019115239995e-06, "loss": 0.1753, "step": 33424 }, { "epoch": 0.6205559866777067, "grad_norm": 0.5234178304672241, "learning_rate": 6.302318071850664e-06, "loss": 0.3179, "step": 33426 }, { "epoch": 0.6205931168151253, "grad_norm": 0.3460370600223541, "learning_rate": 6.301234282491704e-06, "loss": 0.2699, "step": 33428 }, { "epoch": 0.620630246952544, "grad_norm": 0.47693508863449097, "learning_rate": 6.300150543461865e-06, "loss": 0.24, "step": 33430 }, { "epoch": 0.6206673770899626, "grad_norm": 0.5901803374290466, "learning_rate": 6.299066854775897e-06, "loss": 0.4031, "step": 33432 }, { "epoch": 0.6207045072273812, "grad_norm": 0.4246162474155426, "learning_rate": 6.297983216448542e-06, "loss": 0.2395, "step": 33434 }, { "epoch": 0.6207416373647999, "grad_norm": 0.5186436176300049, "learning_rate": 6.296899628494545e-06, "loss": 0.1754, "step": 33436 }, { "epoch": 0.6207787675022185, "grad_norm": 0.35105907917022705, "learning_rate": 6.295816090928654e-06, "loss": 0.2276, "step": 33438 }, { "epoch": 0.6208158976396372, "grad_norm": 0.4499373137950897, "learning_rate": 6.29473260376561e-06, "loss": 0.2084, "step": 33440 }, { "epoch": 0.6208530277770558, "grad_norm": 0.30024397373199463, "learning_rate": 6.293649167020156e-06, "loss": 0.1673, "step": 33442 }, { "epoch": 0.6208901579144744, "grad_norm": 0.23456653952598572, "learning_rate": 6.292565780707035e-06, "loss": 0.3916, "step": 33444 }, { "epoch": 0.6209272880518931, "grad_norm": 0.3159496784210205, "learning_rate": 6.291482444840993e-06, "loss": 0.2663, "step": 33446 }, { "epoch": 0.6209644181893117, "grad_norm": 0.4011788070201874, "learning_rate": 6.290399159436762e-06, "loss": 0.4351, "step": 33448 }, { "epoch": 0.6210015483267304, "grad_norm": 0.3560031056404114, "learning_rate": 6.2893159245090895e-06, "loss": 0.2579, "step": 33450 }, { "epoch": 0.621038678464149, "grad_norm": 0.3825017511844635, "learning_rate": 6.288232740072711e-06, "loss": 0.1405, "step": 33452 }, { "epoch": 0.6210758086015676, "grad_norm": 0.4587399363517761, "learning_rate": 6.287149606142364e-06, "loss": 0.1194, "step": 33454 }, { "epoch": 0.6211129387389863, "grad_norm": 0.40120425820350647, "learning_rate": 6.286066522732792e-06, "loss": 0.5247, "step": 33456 }, { "epoch": 0.6211500688764049, "grad_norm": 0.41624021530151367, "learning_rate": 6.2849834898587335e-06, "loss": 0.3626, "step": 33458 }, { "epoch": 0.6211871990138236, "grad_norm": 0.4103415012359619, "learning_rate": 6.283900507534921e-06, "loss": 0.2227, "step": 33460 }, { "epoch": 0.6212243291512421, "grad_norm": 0.6267563700675964, "learning_rate": 6.282817575776089e-06, "loss": 0.2891, "step": 33462 }, { "epoch": 0.6212614592886608, "grad_norm": 0.602595865726471, "learning_rate": 6.281734694596975e-06, "loss": 0.1885, "step": 33464 }, { "epoch": 0.6212985894260795, "grad_norm": 0.2548821270465851, "learning_rate": 6.280651864012315e-06, "loss": 0.2907, "step": 33466 }, { "epoch": 0.6213357195634981, "grad_norm": 0.4155820906162262, "learning_rate": 6.279569084036844e-06, "loss": 0.3823, "step": 33468 }, { "epoch": 0.6213728497009168, "grad_norm": 0.4594273865222931, "learning_rate": 6.278486354685294e-06, "loss": 0.4306, "step": 33470 }, { "epoch": 0.6214099798383353, "grad_norm": 0.4496306777000427, "learning_rate": 6.277403675972397e-06, "loss": 0.1366, "step": 33472 }, { "epoch": 0.621447109975754, "grad_norm": 0.512714684009552, "learning_rate": 6.276321047912883e-06, "loss": 0.2604, "step": 33474 }, { "epoch": 0.6214842401131726, "grad_norm": 0.5083045363426208, "learning_rate": 6.275238470521487e-06, "loss": 0.2115, "step": 33476 }, { "epoch": 0.6215213702505913, "grad_norm": 0.26935267448425293, "learning_rate": 6.274155943812938e-06, "loss": 0.225, "step": 33478 }, { "epoch": 0.62155850038801, "grad_norm": 0.32789263129234314, "learning_rate": 6.273073467801969e-06, "loss": 0.2408, "step": 33480 }, { "epoch": 0.6215956305254285, "grad_norm": 0.2412440925836563, "learning_rate": 6.271991042503305e-06, "loss": 0.2569, "step": 33482 }, { "epoch": 0.6216327606628472, "grad_norm": 0.3709932267665863, "learning_rate": 6.270908667931679e-06, "loss": 0.4383, "step": 33484 }, { "epoch": 0.6216698908002658, "grad_norm": 0.3263992369174957, "learning_rate": 6.269826344101813e-06, "loss": 0.435, "step": 33486 }, { "epoch": 0.6217070209376845, "grad_norm": 0.46520960330963135, "learning_rate": 6.268744071028437e-06, "loss": 0.3145, "step": 33488 }, { "epoch": 0.6217441510751032, "grad_norm": 0.3823679983615875, "learning_rate": 6.267661848726279e-06, "loss": 0.3142, "step": 33490 }, { "epoch": 0.6217812812125217, "grad_norm": 0.4349318742752075, "learning_rate": 6.266579677210062e-06, "loss": 0.4574, "step": 33492 }, { "epoch": 0.6218184113499404, "grad_norm": 0.345052033662796, "learning_rate": 6.265497556494513e-06, "loss": 0.2733, "step": 33494 }, { "epoch": 0.621855541487359, "grad_norm": 0.8973308801651001, "learning_rate": 6.26441548659436e-06, "loss": 0.33, "step": 33496 }, { "epoch": 0.6218926716247777, "grad_norm": 0.38141483068466187, "learning_rate": 6.263333467524319e-06, "loss": 0.3403, "step": 33498 }, { "epoch": 0.6219298017621964, "grad_norm": 0.27269378304481506, "learning_rate": 6.262251499299117e-06, "loss": 0.1462, "step": 33500 }, { "epoch": 0.6219669318996149, "grad_norm": 0.6646750569343567, "learning_rate": 6.261169581933477e-06, "loss": 0.4609, "step": 33502 }, { "epoch": 0.6220040620370336, "grad_norm": 0.49614548683166504, "learning_rate": 6.260087715442119e-06, "loss": 0.3653, "step": 33504 }, { "epoch": 0.6220411921744522, "grad_norm": 0.4744861125946045, "learning_rate": 6.2590058998397654e-06, "loss": 0.3324, "step": 33506 }, { "epoch": 0.6220783223118709, "grad_norm": 0.3603343069553375, "learning_rate": 6.257924135141139e-06, "loss": 0.4553, "step": 33508 }, { "epoch": 0.6221154524492896, "grad_norm": 0.4117833375930786, "learning_rate": 6.2568424213609525e-06, "loss": 0.3819, "step": 33510 }, { "epoch": 0.6221525825867081, "grad_norm": 0.3570064902305603, "learning_rate": 6.255760758513931e-06, "loss": 0.3176, "step": 33512 }, { "epoch": 0.6221897127241268, "grad_norm": 0.5464138388633728, "learning_rate": 6.254679146614788e-06, "loss": 0.2544, "step": 33514 }, { "epoch": 0.6222268428615454, "grad_norm": 0.2973516285419464, "learning_rate": 6.253597585678243e-06, "loss": 0.2901, "step": 33516 }, { "epoch": 0.6222639729989641, "grad_norm": 0.38917866349220276, "learning_rate": 6.252516075719013e-06, "loss": 0.3537, "step": 33518 }, { "epoch": 0.6223011031363828, "grad_norm": 0.2568664848804474, "learning_rate": 6.251434616751817e-06, "loss": 0.1946, "step": 33520 }, { "epoch": 0.6223382332738013, "grad_norm": 0.8497270345687866, "learning_rate": 6.250353208791367e-06, "loss": 0.1758, "step": 33522 }, { "epoch": 0.62237536341122, "grad_norm": 0.3346641957759857, "learning_rate": 6.249271851852379e-06, "loss": 0.2139, "step": 33524 }, { "epoch": 0.6224124935486386, "grad_norm": 0.4493895471096039, "learning_rate": 6.2481905459495625e-06, "loss": 0.5815, "step": 33526 }, { "epoch": 0.6224496236860573, "grad_norm": 0.31807294487953186, "learning_rate": 6.247109291097637e-06, "loss": 0.2525, "step": 33528 }, { "epoch": 0.6224867538234758, "grad_norm": 0.7084963321685791, "learning_rate": 6.246028087311311e-06, "loss": 0.2961, "step": 33530 }, { "epoch": 0.6225238839608945, "grad_norm": 0.49804481863975525, "learning_rate": 6.244946934605302e-06, "loss": 0.223, "step": 33532 }, { "epoch": 0.6225610140983132, "grad_norm": 0.39025992155075073, "learning_rate": 6.243865832994316e-06, "loss": 0.3599, "step": 33534 }, { "epoch": 0.6225981442357318, "grad_norm": 0.26710212230682373, "learning_rate": 6.242784782493066e-06, "loss": 0.2713, "step": 33536 }, { "epoch": 0.6226352743731505, "grad_norm": 0.40024060010910034, "learning_rate": 6.24170378311626e-06, "loss": 0.2298, "step": 33538 }, { "epoch": 0.622672404510569, "grad_norm": 0.3097241520881653, "learning_rate": 6.240622834878606e-06, "loss": 0.2941, "step": 33540 }, { "epoch": 0.6227095346479877, "grad_norm": 0.5373071432113647, "learning_rate": 6.239541937794818e-06, "loss": 0.3938, "step": 33542 }, { "epoch": 0.6227466647854064, "grad_norm": 0.2220676839351654, "learning_rate": 6.238461091879597e-06, "loss": 0.3212, "step": 33544 }, { "epoch": 0.622783794922825, "grad_norm": 0.3487304151058197, "learning_rate": 6.237380297147658e-06, "loss": 0.2238, "step": 33546 }, { "epoch": 0.6228209250602437, "grad_norm": 0.45740750432014465, "learning_rate": 6.236299553613699e-06, "loss": 0.386, "step": 33548 }, { "epoch": 0.6228580551976622, "grad_norm": 0.3692704737186432, "learning_rate": 6.235218861292429e-06, "loss": 0.3395, "step": 33550 }, { "epoch": 0.6228951853350809, "grad_norm": 0.5486975312232971, "learning_rate": 6.234138220198554e-06, "loss": 0.4075, "step": 33552 }, { "epoch": 0.6229323154724996, "grad_norm": 0.3254355490207672, "learning_rate": 6.233057630346781e-06, "loss": 0.28, "step": 33554 }, { "epoch": 0.6229694456099182, "grad_norm": 0.5588211417198181, "learning_rate": 6.231977091751806e-06, "loss": 0.3147, "step": 33556 }, { "epoch": 0.6230065757473369, "grad_norm": 0.3213815689086914, "learning_rate": 6.230896604428343e-06, "loss": 0.4682, "step": 33558 }, { "epoch": 0.6230437058847554, "grad_norm": 0.3725796639919281, "learning_rate": 6.2298161683910805e-06, "loss": 0.3156, "step": 33560 }, { "epoch": 0.6230808360221741, "grad_norm": 0.31407278776168823, "learning_rate": 6.2287357836547294e-06, "loss": 0.3139, "step": 33562 }, { "epoch": 0.6231179661595928, "grad_norm": 0.4046039283275604, "learning_rate": 6.227655450233986e-06, "loss": 0.1866, "step": 33564 }, { "epoch": 0.6231550962970114, "grad_norm": 0.26877516508102417, "learning_rate": 6.226575168143555e-06, "loss": 0.2306, "step": 33566 }, { "epoch": 0.6231922264344301, "grad_norm": 0.3913203775882721, "learning_rate": 6.2254949373981314e-06, "loss": 0.333, "step": 33568 }, { "epoch": 0.6232293565718486, "grad_norm": 0.3008491098880768, "learning_rate": 6.224414758012416e-06, "loss": 0.1067, "step": 33570 }, { "epoch": 0.6232664867092673, "grad_norm": 0.44140657782554626, "learning_rate": 6.22333463000111e-06, "loss": 0.2979, "step": 33572 }, { "epoch": 0.623303616846686, "grad_norm": 0.4018612504005432, "learning_rate": 6.222254553378904e-06, "loss": 0.2325, "step": 33574 }, { "epoch": 0.6233407469841046, "grad_norm": 0.4377041459083557, "learning_rate": 6.2211745281605e-06, "loss": 0.265, "step": 33576 }, { "epoch": 0.6233778771215233, "grad_norm": 0.714357316493988, "learning_rate": 6.22009455436059e-06, "loss": 0.324, "step": 33578 }, { "epoch": 0.6234150072589418, "grad_norm": 0.3865002691745758, "learning_rate": 6.219014631993869e-06, "loss": 0.335, "step": 33580 }, { "epoch": 0.6234521373963605, "grad_norm": 0.5451263785362244, "learning_rate": 6.217934761075035e-06, "loss": 0.3478, "step": 33582 }, { "epoch": 0.6234892675337791, "grad_norm": 0.5084574222564697, "learning_rate": 6.216854941618784e-06, "loss": 0.2694, "step": 33584 }, { "epoch": 0.6235263976711978, "grad_norm": 0.2983416020870209, "learning_rate": 6.2157751736398016e-06, "loss": 0.2923, "step": 33586 }, { "epoch": 0.6235635278086165, "grad_norm": 0.412888765335083, "learning_rate": 6.214695457152786e-06, "loss": 0.2773, "step": 33588 }, { "epoch": 0.623600657946035, "grad_norm": 0.44216951727867126, "learning_rate": 6.213615792172425e-06, "loss": 0.3006, "step": 33590 }, { "epoch": 0.6236377880834537, "grad_norm": 0.30473342537879944, "learning_rate": 6.212536178713412e-06, "loss": 0.1612, "step": 33592 }, { "epoch": 0.6236749182208723, "grad_norm": 0.5691676139831543, "learning_rate": 6.211456616790437e-06, "loss": 0.3484, "step": 33594 }, { "epoch": 0.623712048358291, "grad_norm": 0.46212685108184814, "learning_rate": 6.210377106418192e-06, "loss": 0.1805, "step": 33596 }, { "epoch": 0.6237491784957097, "grad_norm": 0.7607839107513428, "learning_rate": 6.209297647611362e-06, "loss": 0.4388, "step": 33598 }, { "epoch": 0.6237863086331282, "grad_norm": 0.42715758085250854, "learning_rate": 6.2082182403846345e-06, "loss": 0.2543, "step": 33600 }, { "epoch": 0.6238234387705469, "grad_norm": 0.3998908996582031, "learning_rate": 6.207138884752699e-06, "loss": 0.3483, "step": 33602 }, { "epoch": 0.6238605689079655, "grad_norm": 0.50295490026474, "learning_rate": 6.2060595807302424e-06, "loss": 0.3534, "step": 33604 }, { "epoch": 0.6238976990453842, "grad_norm": 0.34141379594802856, "learning_rate": 6.204980328331954e-06, "loss": 0.1619, "step": 33606 }, { "epoch": 0.6239348291828029, "grad_norm": 0.4538635313510895, "learning_rate": 6.203901127572512e-06, "loss": 0.2529, "step": 33608 }, { "epoch": 0.6239719593202214, "grad_norm": 0.3219510614871979, "learning_rate": 6.20282197846661e-06, "loss": 0.4102, "step": 33610 }, { "epoch": 0.6240090894576401, "grad_norm": 0.42428040504455566, "learning_rate": 6.201742881028922e-06, "loss": 0.4422, "step": 33612 }, { "epoch": 0.6240462195950587, "grad_norm": 0.4472428262233734, "learning_rate": 6.200663835274138e-06, "loss": 0.1824, "step": 33614 }, { "epoch": 0.6240833497324774, "grad_norm": 0.3867628276348114, "learning_rate": 6.1995848412169364e-06, "loss": 0.3032, "step": 33616 }, { "epoch": 0.624120479869896, "grad_norm": 0.4629480242729187, "learning_rate": 6.198505898872007e-06, "loss": 0.2959, "step": 33618 }, { "epoch": 0.6241576100073146, "grad_norm": 0.23004283010959625, "learning_rate": 6.197427008254021e-06, "loss": 0.1355, "step": 33620 }, { "epoch": 0.6241947401447333, "grad_norm": 0.508292555809021, "learning_rate": 6.196348169377668e-06, "loss": 0.566, "step": 33622 }, { "epoch": 0.6242318702821519, "grad_norm": 0.37736445665359497, "learning_rate": 6.195269382257619e-06, "loss": 0.3869, "step": 33624 }, { "epoch": 0.6242690004195706, "grad_norm": 0.35757002234458923, "learning_rate": 6.194190646908558e-06, "loss": 0.4167, "step": 33626 }, { "epoch": 0.6243061305569891, "grad_norm": 0.3544449210166931, "learning_rate": 6.193111963345165e-06, "loss": 0.2409, "step": 33628 }, { "epoch": 0.6243432606944078, "grad_norm": 0.44588562846183777, "learning_rate": 6.19203333158211e-06, "loss": 0.1527, "step": 33630 }, { "epoch": 0.6243803908318265, "grad_norm": 0.41675305366516113, "learning_rate": 6.190954751634078e-06, "loss": 0.2863, "step": 33632 }, { "epoch": 0.6244175209692451, "grad_norm": 0.34612202644348145, "learning_rate": 6.189876223515746e-06, "loss": 0.4173, "step": 33634 }, { "epoch": 0.6244546511066638, "grad_norm": 0.38405779004096985, "learning_rate": 6.188797747241782e-06, "loss": 0.2842, "step": 33636 }, { "epoch": 0.6244917812440823, "grad_norm": 0.5298508405685425, "learning_rate": 6.187719322826864e-06, "loss": 0.2592, "step": 33638 }, { "epoch": 0.624528911381501, "grad_norm": 0.28581956028938293, "learning_rate": 6.186640950285669e-06, "loss": 0.2859, "step": 33640 }, { "epoch": 0.6245660415189197, "grad_norm": 0.31445547938346863, "learning_rate": 6.185562629632869e-06, "loss": 0.2421, "step": 33642 }, { "epoch": 0.6246031716563383, "grad_norm": 0.3807367980480194, "learning_rate": 6.1844843608831325e-06, "loss": 0.386, "step": 33644 }, { "epoch": 0.624640301793757, "grad_norm": 0.4329812526702881, "learning_rate": 6.1834061440511364e-06, "loss": 0.5375, "step": 33646 }, { "epoch": 0.6246774319311755, "grad_norm": 0.5119138956069946, "learning_rate": 6.1823279791515546e-06, "loss": 0.2728, "step": 33648 }, { "epoch": 0.6247145620685942, "grad_norm": 0.6869155168533325, "learning_rate": 6.181249866199052e-06, "loss": 0.2547, "step": 33650 }, { "epoch": 0.6247516922060129, "grad_norm": 0.35880687832832336, "learning_rate": 6.180171805208298e-06, "loss": 0.6476, "step": 33652 }, { "epoch": 0.6247888223434315, "grad_norm": 0.19963699579238892, "learning_rate": 6.179093796193964e-06, "loss": 0.2864, "step": 33654 }, { "epoch": 0.6248259524808502, "grad_norm": 0.38334372639656067, "learning_rate": 6.178015839170719e-06, "loss": 0.2039, "step": 33656 }, { "epoch": 0.6248630826182687, "grad_norm": 0.28877779841423035, "learning_rate": 6.176937934153231e-06, "loss": 0.2892, "step": 33658 }, { "epoch": 0.6249002127556874, "grad_norm": 0.376616895198822, "learning_rate": 6.175860081156168e-06, "loss": 0.3712, "step": 33660 }, { "epoch": 0.6249373428931061, "grad_norm": 0.4133601486682892, "learning_rate": 6.174782280194194e-06, "loss": 0.2686, "step": 33662 }, { "epoch": 0.6249744730305247, "grad_norm": 0.34283655881881714, "learning_rate": 6.1737045312819725e-06, "loss": 0.4099, "step": 33664 }, { "epoch": 0.6250116031679434, "grad_norm": 0.34196093678474426, "learning_rate": 6.172626834434172e-06, "loss": 0.1498, "step": 33666 }, { "epoch": 0.6250487333053619, "grad_norm": 0.4400765597820282, "learning_rate": 6.171549189665456e-06, "loss": 0.3034, "step": 33668 }, { "epoch": 0.6250858634427806, "grad_norm": 0.40945225954055786, "learning_rate": 6.17047159699049e-06, "loss": 0.181, "step": 33670 }, { "epoch": 0.6251229935801993, "grad_norm": 0.3731880187988281, "learning_rate": 6.169394056423934e-06, "loss": 0.3556, "step": 33672 }, { "epoch": 0.6251601237176179, "grad_norm": 0.2364620715379715, "learning_rate": 6.168316567980452e-06, "loss": 0.1699, "step": 33674 }, { "epoch": 0.6251972538550365, "grad_norm": 0.485349178314209, "learning_rate": 6.167239131674703e-06, "loss": 0.2843, "step": 33676 }, { "epoch": 0.6252343839924551, "grad_norm": 0.29831939935684204, "learning_rate": 6.166161747521347e-06, "loss": 0.22, "step": 33678 }, { "epoch": 0.6252715141298738, "grad_norm": 0.27302294969558716, "learning_rate": 6.1650844155350465e-06, "loss": 0.2673, "step": 33680 }, { "epoch": 0.6253086442672924, "grad_norm": 0.26489681005477905, "learning_rate": 6.164007135730463e-06, "loss": 0.2521, "step": 33682 }, { "epoch": 0.6253457744047111, "grad_norm": 0.5267603993415833, "learning_rate": 6.162929908122253e-06, "loss": 0.5018, "step": 33684 }, { "epoch": 0.6253829045421297, "grad_norm": 0.3156895041465759, "learning_rate": 6.161852732725071e-06, "loss": 0.2595, "step": 33686 }, { "epoch": 0.6254200346795483, "grad_norm": 0.4034641683101654, "learning_rate": 6.160775609553575e-06, "loss": 0.1991, "step": 33688 }, { "epoch": 0.625457164816967, "grad_norm": 0.39785236120224, "learning_rate": 6.159698538622425e-06, "loss": 0.2517, "step": 33690 }, { "epoch": 0.6254942949543856, "grad_norm": 0.2915186285972595, "learning_rate": 6.158621519946275e-06, "loss": 0.1973, "step": 33692 }, { "epoch": 0.6255314250918043, "grad_norm": 0.5866549015045166, "learning_rate": 6.15754455353978e-06, "loss": 0.3051, "step": 33694 }, { "epoch": 0.625568555229223, "grad_norm": 0.37558498978614807, "learning_rate": 6.156467639417593e-06, "loss": 0.094, "step": 33696 }, { "epoch": 0.6256056853666415, "grad_norm": 0.3335569500923157, "learning_rate": 6.155390777594373e-06, "loss": 0.2218, "step": 33698 }, { "epoch": 0.6256428155040602, "grad_norm": 0.505942165851593, "learning_rate": 6.154313968084764e-06, "loss": 0.2639, "step": 33700 }, { "epoch": 0.6256799456414788, "grad_norm": 0.45688754320144653, "learning_rate": 6.153237210903422e-06, "loss": 0.3439, "step": 33702 }, { "epoch": 0.6257170757788975, "grad_norm": 0.5846219658851624, "learning_rate": 6.152160506065004e-06, "loss": 0.2327, "step": 33704 }, { "epoch": 0.6257542059163161, "grad_norm": 0.49177420139312744, "learning_rate": 6.151083853584151e-06, "loss": 0.21, "step": 33706 }, { "epoch": 0.6257913360537347, "grad_norm": 0.4829719662666321, "learning_rate": 6.1500072534755196e-06, "loss": 0.3661, "step": 33708 }, { "epoch": 0.6258284661911534, "grad_norm": 0.6135827898979187, "learning_rate": 6.148930705753761e-06, "loss": 0.1218, "step": 33710 }, { "epoch": 0.625865596328572, "grad_norm": 2.516249895095825, "learning_rate": 6.147854210433515e-06, "loss": 0.3147, "step": 33712 }, { "epoch": 0.6259027264659907, "grad_norm": 0.2733874022960663, "learning_rate": 6.1467777675294385e-06, "loss": 0.304, "step": 33714 }, { "epoch": 0.6259398566034093, "grad_norm": 0.3303776681423187, "learning_rate": 6.145701377056172e-06, "loss": 0.2423, "step": 33716 }, { "epoch": 0.6259769867408279, "grad_norm": 0.4016251564025879, "learning_rate": 6.144625039028365e-06, "loss": 0.336, "step": 33718 }, { "epoch": 0.6260141168782466, "grad_norm": 0.3686492145061493, "learning_rate": 6.143548753460662e-06, "loss": 0.279, "step": 33720 }, { "epoch": 0.6260512470156652, "grad_norm": 0.3058331310749054, "learning_rate": 6.142472520367715e-06, "loss": 0.3619, "step": 33722 }, { "epoch": 0.6260883771530839, "grad_norm": 0.5305180549621582, "learning_rate": 6.141396339764156e-06, "loss": 0.3481, "step": 33724 }, { "epoch": 0.6261255072905025, "grad_norm": 0.3334653377532959, "learning_rate": 6.140320211664639e-06, "loss": 0.2598, "step": 33726 }, { "epoch": 0.6261626374279211, "grad_norm": 0.47472646832466125, "learning_rate": 6.139244136083801e-06, "loss": 0.2044, "step": 33728 }, { "epoch": 0.6261997675653398, "grad_norm": 0.46620553731918335, "learning_rate": 6.138168113036285e-06, "loss": 0.3054, "step": 33730 }, { "epoch": 0.6262368977027584, "grad_norm": 0.3397664725780487, "learning_rate": 6.137092142536733e-06, "loss": 0.3721, "step": 33732 }, { "epoch": 0.626274027840177, "grad_norm": 0.40037786960601807, "learning_rate": 6.136016224599789e-06, "loss": 0.3973, "step": 33734 }, { "epoch": 0.6263111579775956, "grad_norm": 0.3438727855682373, "learning_rate": 6.134940359240091e-06, "loss": 0.3175, "step": 33736 }, { "epoch": 0.6263482881150143, "grad_norm": 0.4470684230327606, "learning_rate": 6.133864546472276e-06, "loss": 0.1328, "step": 33738 }, { "epoch": 0.626385418252433, "grad_norm": 0.3403429687023163, "learning_rate": 6.132788786310983e-06, "loss": 0.362, "step": 33740 }, { "epoch": 0.6264225483898516, "grad_norm": 0.40784934163093567, "learning_rate": 6.13171307877085e-06, "loss": 0.1683, "step": 33742 }, { "epoch": 0.6264596785272702, "grad_norm": 0.3788936138153076, "learning_rate": 6.130637423866518e-06, "loss": 0.4557, "step": 33744 }, { "epoch": 0.6264968086646888, "grad_norm": 0.4682208001613617, "learning_rate": 6.1295618216126175e-06, "loss": 0.4296, "step": 33746 }, { "epoch": 0.6265339388021075, "grad_norm": 0.1407458782196045, "learning_rate": 6.128486272023792e-06, "loss": 0.2423, "step": 33748 }, { "epoch": 0.6265710689395262, "grad_norm": 0.9254353046417236, "learning_rate": 6.1274107751146686e-06, "loss": 0.2534, "step": 33750 }, { "epoch": 0.6266081990769448, "grad_norm": 0.26007452607154846, "learning_rate": 6.126335330899884e-06, "loss": 0.1586, "step": 33752 }, { "epoch": 0.6266453292143634, "grad_norm": 0.7198523283004761, "learning_rate": 6.125259939394073e-06, "loss": 0.2512, "step": 33754 }, { "epoch": 0.626682459351782, "grad_norm": 0.6036335229873657, "learning_rate": 6.1241846006118684e-06, "loss": 0.2661, "step": 33756 }, { "epoch": 0.6267195894892007, "grad_norm": 0.5589054822921753, "learning_rate": 6.123109314567903e-06, "loss": 0.2248, "step": 33758 }, { "epoch": 0.6267567196266194, "grad_norm": 0.4162291884422302, "learning_rate": 6.122034081276809e-06, "loss": 0.3069, "step": 33760 }, { "epoch": 0.626793849764038, "grad_norm": 0.3105548620223999, "learning_rate": 6.120958900753211e-06, "loss": 0.3566, "step": 33762 }, { "epoch": 0.6268309799014566, "grad_norm": 0.38274091482162476, "learning_rate": 6.119883773011746e-06, "loss": 0.3503, "step": 33764 }, { "epoch": 0.6268681100388752, "grad_norm": 0.43922746181488037, "learning_rate": 6.11880869806704e-06, "loss": 0.2227, "step": 33766 }, { "epoch": 0.6269052401762939, "grad_norm": 0.3980352580547333, "learning_rate": 6.11773367593372e-06, "loss": 0.532, "step": 33768 }, { "epoch": 0.6269423703137126, "grad_norm": 0.3464619219303131, "learning_rate": 6.1166587066264174e-06, "loss": 0.1799, "step": 33770 }, { "epoch": 0.6269795004511312, "grad_norm": 0.3709046542644501, "learning_rate": 6.115583790159757e-06, "loss": 0.2985, "step": 33772 }, { "epoch": 0.6270166305885498, "grad_norm": 0.4215565025806427, "learning_rate": 6.11450892654837e-06, "loss": 0.2319, "step": 33774 }, { "epoch": 0.6270537607259684, "grad_norm": 0.35182565450668335, "learning_rate": 6.113434115806874e-06, "loss": 0.3307, "step": 33776 }, { "epoch": 0.6270908908633871, "grad_norm": 0.5552123785018921, "learning_rate": 6.112359357949901e-06, "loss": 0.2357, "step": 33778 }, { "epoch": 0.6271280210008057, "grad_norm": 0.3818530738353729, "learning_rate": 6.111284652992069e-06, "loss": 0.3848, "step": 33780 }, { "epoch": 0.6271651511382244, "grad_norm": 0.44810959696769714, "learning_rate": 6.110210000948006e-06, "loss": 0.3052, "step": 33782 }, { "epoch": 0.627202281275643, "grad_norm": 0.1763920933008194, "learning_rate": 6.109135401832333e-06, "loss": 0.2653, "step": 33784 }, { "epoch": 0.6272394114130616, "grad_norm": 0.4581526219844818, "learning_rate": 6.108060855659677e-06, "loss": 0.2854, "step": 33786 }, { "epoch": 0.6272765415504803, "grad_norm": 0.4450340270996094, "learning_rate": 6.106986362444651e-06, "loss": 0.2037, "step": 33788 }, { "epoch": 0.6273136716878989, "grad_norm": 0.5797021985054016, "learning_rate": 6.105911922201881e-06, "loss": 0.3063, "step": 33790 }, { "epoch": 0.6273508018253175, "grad_norm": 0.46969103813171387, "learning_rate": 6.104837534945985e-06, "loss": 0.1585, "step": 33792 }, { "epoch": 0.6273879319627362, "grad_norm": 0.2981218993663788, "learning_rate": 6.1037632006915815e-06, "loss": 0.2523, "step": 33794 }, { "epoch": 0.6274250621001548, "grad_norm": 0.2927955389022827, "learning_rate": 6.102688919453292e-06, "loss": 0.2513, "step": 33796 }, { "epoch": 0.6274621922375735, "grad_norm": 0.4551640450954437, "learning_rate": 6.101614691245734e-06, "loss": 0.2243, "step": 33798 }, { "epoch": 0.6274993223749921, "grad_norm": 0.41981008648872375, "learning_rate": 6.100540516083522e-06, "loss": 0.2888, "step": 33800 }, { "epoch": 0.6275364525124107, "grad_norm": 0.3514082729816437, "learning_rate": 6.099466393981273e-06, "loss": 0.1425, "step": 33802 }, { "epoch": 0.6275735826498294, "grad_norm": 0.35468536615371704, "learning_rate": 6.0983923249536e-06, "loss": 0.2403, "step": 33804 }, { "epoch": 0.627610712787248, "grad_norm": 0.33821043372154236, "learning_rate": 6.097318309015123e-06, "loss": 0.3153, "step": 33806 }, { "epoch": 0.6276478429246667, "grad_norm": 0.3924834430217743, "learning_rate": 6.096244346180455e-06, "loss": 0.2519, "step": 33808 }, { "epoch": 0.6276849730620853, "grad_norm": 0.4211489260196686, "learning_rate": 6.095170436464208e-06, "loss": 0.2621, "step": 33810 }, { "epoch": 0.627722103199504, "grad_norm": 0.3500491976737976, "learning_rate": 6.094096579880996e-06, "loss": 0.2091, "step": 33812 }, { "epoch": 0.6277592333369226, "grad_norm": 0.30993348360061646, "learning_rate": 6.093022776445428e-06, "loss": 0.2823, "step": 33814 }, { "epoch": 0.6277963634743412, "grad_norm": 0.3980141580104828, "learning_rate": 6.091949026172117e-06, "loss": 0.1791, "step": 33816 }, { "epoch": 0.6278334936117599, "grad_norm": 0.5893418192863464, "learning_rate": 6.090875329075674e-06, "loss": 0.3796, "step": 33818 }, { "epoch": 0.6278706237491785, "grad_norm": 0.5061236023902893, "learning_rate": 6.089801685170709e-06, "loss": 0.2261, "step": 33820 }, { "epoch": 0.6279077538865971, "grad_norm": 0.3368014991283417, "learning_rate": 6.08872809447183e-06, "loss": 0.2813, "step": 33822 }, { "epoch": 0.6279448840240158, "grad_norm": 0.45922359824180603, "learning_rate": 6.087654556993649e-06, "loss": 0.3107, "step": 33824 }, { "epoch": 0.6279820141614344, "grad_norm": 0.4237291216850281, "learning_rate": 6.086581072750769e-06, "loss": 0.3051, "step": 33826 }, { "epoch": 0.6280191442988531, "grad_norm": 0.37289169430732727, "learning_rate": 6.085507641757798e-06, "loss": 0.487, "step": 33828 }, { "epoch": 0.6280562744362717, "grad_norm": 0.25098609924316406, "learning_rate": 6.084434264029343e-06, "loss": 0.3396, "step": 33830 }, { "epoch": 0.6280934045736903, "grad_norm": 0.46209532022476196, "learning_rate": 6.0833609395800074e-06, "loss": 0.3498, "step": 33832 }, { "epoch": 0.6281305347111089, "grad_norm": 0.46025192737579346, "learning_rate": 6.082287668424399e-06, "loss": 0.2618, "step": 33834 }, { "epoch": 0.6281676648485276, "grad_norm": 0.3597796559333801, "learning_rate": 6.081214450577124e-06, "loss": 0.2214, "step": 33836 }, { "epoch": 0.6282047949859463, "grad_norm": 0.4983012080192566, "learning_rate": 6.080141286052778e-06, "loss": 0.3155, "step": 33838 }, { "epoch": 0.6282419251233649, "grad_norm": 0.3218439817428589, "learning_rate": 6.0790681748659674e-06, "loss": 0.1216, "step": 33840 }, { "epoch": 0.6282790552607835, "grad_norm": 0.4289875328540802, "learning_rate": 6.077995117031297e-06, "loss": 0.2489, "step": 33842 }, { "epoch": 0.6283161853982021, "grad_norm": 0.4476417899131775, "learning_rate": 6.076922112563364e-06, "loss": 0.2286, "step": 33844 }, { "epoch": 0.6283533155356208, "grad_norm": 0.48402199149131775, "learning_rate": 6.075849161476769e-06, "loss": 0.5451, "step": 33846 }, { "epoch": 0.6283904456730395, "grad_norm": 0.20987172424793243, "learning_rate": 6.0747762637861175e-06, "loss": 0.1435, "step": 33848 }, { "epoch": 0.628427575810458, "grad_norm": 0.40642261505126953, "learning_rate": 6.073703419505999e-06, "loss": 0.1083, "step": 33850 }, { "epoch": 0.6284647059478767, "grad_norm": 0.27885913848876953, "learning_rate": 6.0726306286510175e-06, "loss": 0.3879, "step": 33852 }, { "epoch": 0.6285018360852953, "grad_norm": 0.43868330121040344, "learning_rate": 6.0715578912357685e-06, "loss": 0.3033, "step": 33854 }, { "epoch": 0.628538966222714, "grad_norm": 0.37691131234169006, "learning_rate": 6.07048520727485e-06, "loss": 0.4333, "step": 33856 }, { "epoch": 0.6285760963601327, "grad_norm": 0.406637042760849, "learning_rate": 6.069412576782856e-06, "loss": 0.3154, "step": 33858 }, { "epoch": 0.6286132264975512, "grad_norm": 0.3655599057674408, "learning_rate": 6.068339999774386e-06, "loss": 0.2653, "step": 33860 }, { "epoch": 0.6286503566349699, "grad_norm": 0.36459845304489136, "learning_rate": 6.0672674762640325e-06, "loss": 0.3079, "step": 33862 }, { "epoch": 0.6286874867723885, "grad_norm": 0.26623696088790894, "learning_rate": 6.066195006266389e-06, "loss": 0.2303, "step": 33864 }, { "epoch": 0.6287246169098072, "grad_norm": 0.3186604082584381, "learning_rate": 6.065122589796045e-06, "loss": 0.2062, "step": 33866 }, { "epoch": 0.6287617470472259, "grad_norm": 0.3088248074054718, "learning_rate": 6.064050226867597e-06, "loss": 0.1533, "step": 33868 }, { "epoch": 0.6287988771846444, "grad_norm": 0.3572561740875244, "learning_rate": 6.062977917495636e-06, "loss": 0.3008, "step": 33870 }, { "epoch": 0.6288360073220631, "grad_norm": 0.3829580247402191, "learning_rate": 6.061905661694755e-06, "loss": 0.1458, "step": 33872 }, { "epoch": 0.6288731374594817, "grad_norm": 0.319844514131546, "learning_rate": 6.0608334594795435e-06, "loss": 0.0907, "step": 33874 }, { "epoch": 0.6289102675969004, "grad_norm": 0.4637920558452606, "learning_rate": 6.059761310864586e-06, "loss": 0.3299, "step": 33876 }, { "epoch": 0.6289473977343191, "grad_norm": 0.23585030436515808, "learning_rate": 6.058689215864474e-06, "loss": 0.3233, "step": 33878 }, { "epoch": 0.6289845278717376, "grad_norm": 0.354974627494812, "learning_rate": 6.0576171744937966e-06, "loss": 0.3089, "step": 33880 }, { "epoch": 0.6290216580091563, "grad_norm": 0.429735392332077, "learning_rate": 6.056545186767142e-06, "loss": 0.2635, "step": 33882 }, { "epoch": 0.6290587881465749, "grad_norm": 0.4359927177429199, "learning_rate": 6.055473252699093e-06, "loss": 0.2483, "step": 33884 }, { "epoch": 0.6290959182839936, "grad_norm": 0.4963236451148987, "learning_rate": 6.0544013723042435e-06, "loss": 0.2013, "step": 33886 }, { "epoch": 0.6291330484214122, "grad_norm": 0.38880491256713867, "learning_rate": 6.0533295455971684e-06, "loss": 0.1589, "step": 33888 }, { "epoch": 0.6291701785588308, "grad_norm": 0.3633100986480713, "learning_rate": 6.052257772592456e-06, "loss": 0.3084, "step": 33890 }, { "epoch": 0.6292073086962495, "grad_norm": 0.43028852343559265, "learning_rate": 6.05118605330469e-06, "loss": 0.3644, "step": 33892 }, { "epoch": 0.6292444388336681, "grad_norm": 0.408188134431839, "learning_rate": 6.050114387748458e-06, "loss": 0.2321, "step": 33894 }, { "epoch": 0.6292815689710868, "grad_norm": 0.32363826036453247, "learning_rate": 6.0490427759383345e-06, "loss": 0.4529, "step": 33896 }, { "epoch": 0.6293186991085054, "grad_norm": 0.37696394324302673, "learning_rate": 6.047971217888904e-06, "loss": 0.2305, "step": 33898 }, { "epoch": 0.629355829245924, "grad_norm": 0.6494757533073425, "learning_rate": 6.046899713614751e-06, "loss": 0.3198, "step": 33900 }, { "epoch": 0.6293929593833427, "grad_norm": 0.2846950888633728, "learning_rate": 6.0458282631304485e-06, "loss": 0.1868, "step": 33902 }, { "epoch": 0.6294300895207613, "grad_norm": 0.3400444984436035, "learning_rate": 6.044756866450582e-06, "loss": 0.3013, "step": 33904 }, { "epoch": 0.62946721965818, "grad_norm": 0.6710299849510193, "learning_rate": 6.043685523589724e-06, "loss": 0.2046, "step": 33906 }, { "epoch": 0.6295043497955986, "grad_norm": 0.31077876687049866, "learning_rate": 6.042614234562456e-06, "loss": 0.3936, "step": 33908 }, { "epoch": 0.6295414799330172, "grad_norm": 0.9020906686782837, "learning_rate": 6.041542999383356e-06, "loss": 0.3091, "step": 33910 }, { "epoch": 0.6295786100704359, "grad_norm": 0.6193994879722595, "learning_rate": 6.040471818067e-06, "loss": 0.4458, "step": 33912 }, { "epoch": 0.6296157402078545, "grad_norm": 0.29391154646873474, "learning_rate": 6.039400690627961e-06, "loss": 0.208, "step": 33914 }, { "epoch": 0.6296528703452732, "grad_norm": 0.3856953978538513, "learning_rate": 6.038329617080816e-06, "loss": 0.2033, "step": 33916 }, { "epoch": 0.6296900004826917, "grad_norm": 0.5634004473686218, "learning_rate": 6.037258597440136e-06, "loss": 0.2772, "step": 33918 }, { "epoch": 0.6297271306201104, "grad_norm": 0.4434337019920349, "learning_rate": 6.036187631720497e-06, "loss": 0.274, "step": 33920 }, { "epoch": 0.6297642607575291, "grad_norm": 0.3064804673194885, "learning_rate": 6.035116719936471e-06, "loss": 0.2363, "step": 33922 }, { "epoch": 0.6298013908949477, "grad_norm": 0.36120733618736267, "learning_rate": 6.034045862102636e-06, "loss": 0.2195, "step": 33924 }, { "epoch": 0.6298385210323664, "grad_norm": 0.37281864881515503, "learning_rate": 6.032975058233552e-06, "loss": 0.4467, "step": 33926 }, { "epoch": 0.629875651169785, "grad_norm": 0.1743318736553192, "learning_rate": 6.031904308343797e-06, "loss": 0.257, "step": 33928 }, { "epoch": 0.6299127813072036, "grad_norm": 0.43869540095329285, "learning_rate": 6.030833612447936e-06, "loss": 0.4597, "step": 33930 }, { "epoch": 0.6299499114446222, "grad_norm": 0.3877910077571869, "learning_rate": 6.0297629705605406e-06, "loss": 0.3908, "step": 33932 }, { "epoch": 0.6299870415820409, "grad_norm": 0.34378233551979065, "learning_rate": 6.0286923826961815e-06, "loss": 0.2751, "step": 33934 }, { "epoch": 0.6300241717194596, "grad_norm": 0.3581828474998474, "learning_rate": 6.027621848869422e-06, "loss": 0.1359, "step": 33936 }, { "epoch": 0.6300613018568781, "grad_norm": 0.33425086736679077, "learning_rate": 6.026551369094833e-06, "loss": 0.1825, "step": 33938 }, { "epoch": 0.6300984319942968, "grad_norm": 0.38773342967033386, "learning_rate": 6.025480943386976e-06, "loss": 0.3022, "step": 33940 }, { "epoch": 0.6301355621317154, "grad_norm": 0.36916765570640564, "learning_rate": 6.024410571760418e-06, "loss": 0.2681, "step": 33942 }, { "epoch": 0.6301726922691341, "grad_norm": 0.5057629942893982, "learning_rate": 6.023340254229721e-06, "loss": 0.2142, "step": 33944 }, { "epoch": 0.6302098224065528, "grad_norm": 0.4288654923439026, "learning_rate": 6.022269990809457e-06, "loss": 0.3665, "step": 33946 }, { "epoch": 0.6302469525439713, "grad_norm": 0.4674459993839264, "learning_rate": 6.0211997815141795e-06, "loss": 0.433, "step": 33948 }, { "epoch": 0.63028408268139, "grad_norm": 1.163735270500183, "learning_rate": 6.020129626358462e-06, "loss": 0.3013, "step": 33950 }, { "epoch": 0.6303212128188086, "grad_norm": 0.2307201474905014, "learning_rate": 6.019059525356852e-06, "loss": 0.3706, "step": 33952 }, { "epoch": 0.6303583429562273, "grad_norm": 0.2800556421279907, "learning_rate": 6.017989478523919e-06, "loss": 0.214, "step": 33954 }, { "epoch": 0.630395473093646, "grad_norm": 0.46497732400894165, "learning_rate": 6.016919485874222e-06, "loss": 0.1697, "step": 33956 }, { "epoch": 0.6304326032310645, "grad_norm": 0.3138503432273865, "learning_rate": 6.015849547422321e-06, "loss": 0.1554, "step": 33958 }, { "epoch": 0.6304697333684832, "grad_norm": 0.19327868521213531, "learning_rate": 6.014779663182773e-06, "loss": 0.2308, "step": 33960 }, { "epoch": 0.6305068635059018, "grad_norm": 0.2802954614162445, "learning_rate": 6.01370983317014e-06, "loss": 0.1992, "step": 33962 }, { "epoch": 0.6305439936433205, "grad_norm": 0.5347206592559814, "learning_rate": 6.012640057398972e-06, "loss": 0.321, "step": 33964 }, { "epoch": 0.6305811237807392, "grad_norm": 0.267868310213089, "learning_rate": 6.0115703358838296e-06, "loss": 0.3132, "step": 33966 }, { "epoch": 0.6306182539181577, "grad_norm": 0.35025647282600403, "learning_rate": 6.01050066863927e-06, "loss": 0.2858, "step": 33968 }, { "epoch": 0.6306553840555764, "grad_norm": 0.3864188492298126, "learning_rate": 6.009431055679844e-06, "loss": 0.1714, "step": 33970 }, { "epoch": 0.630692514192995, "grad_norm": 0.4265131950378418, "learning_rate": 6.008361497020107e-06, "loss": 0.2996, "step": 33972 }, { "epoch": 0.6307296443304137, "grad_norm": 0.5354328155517578, "learning_rate": 6.00729199267462e-06, "loss": 0.2899, "step": 33974 }, { "epoch": 0.6307667744678324, "grad_norm": 0.7740700840950012, "learning_rate": 6.006222542657924e-06, "loss": 0.3264, "step": 33976 }, { "epoch": 0.6308039046052509, "grad_norm": 0.42751383781433105, "learning_rate": 6.005153146984577e-06, "loss": 0.3641, "step": 33978 }, { "epoch": 0.6308410347426696, "grad_norm": 0.41626062989234924, "learning_rate": 6.004083805669132e-06, "loss": 0.4913, "step": 33980 }, { "epoch": 0.6308781648800882, "grad_norm": 0.32094380259513855, "learning_rate": 6.003014518726135e-06, "loss": 0.2442, "step": 33982 }, { "epoch": 0.6309152950175069, "grad_norm": 0.8709374666213989, "learning_rate": 6.001945286170138e-06, "loss": 0.1776, "step": 33984 }, { "epoch": 0.6309524251549254, "grad_norm": 0.2721101641654968, "learning_rate": 6.000876108015689e-06, "loss": 0.0814, "step": 33986 }, { "epoch": 0.6309895552923441, "grad_norm": 0.5212908387184143, "learning_rate": 5.999806984277343e-06, "loss": 0.2076, "step": 33988 }, { "epoch": 0.6310266854297628, "grad_norm": 0.3964730501174927, "learning_rate": 5.99873791496964e-06, "loss": 0.1625, "step": 33990 }, { "epoch": 0.6310638155671814, "grad_norm": 0.4831129312515259, "learning_rate": 5.9976689001071256e-06, "loss": 0.2669, "step": 33992 }, { "epoch": 0.6311009457046001, "grad_norm": 0.3244108259677887, "learning_rate": 5.9965999397043505e-06, "loss": 0.305, "step": 33994 }, { "epoch": 0.6311380758420186, "grad_norm": 0.6332389712333679, "learning_rate": 5.9955310337758575e-06, "loss": 0.3103, "step": 33996 }, { "epoch": 0.6311752059794373, "grad_norm": 0.3181458115577698, "learning_rate": 5.994462182336195e-06, "loss": 0.3471, "step": 33998 }, { "epoch": 0.631212336116856, "grad_norm": 0.3949708938598633, "learning_rate": 5.993393385399904e-06, "loss": 0.2314, "step": 34000 }, { "epoch": 0.6312494662542746, "grad_norm": 0.4090232849121094, "learning_rate": 5.992324642981529e-06, "loss": 0.421, "step": 34002 }, { "epoch": 0.6312865963916933, "grad_norm": 0.3519652783870697, "learning_rate": 5.991255955095607e-06, "loss": 0.2891, "step": 34004 }, { "epoch": 0.6313237265291118, "grad_norm": 0.4112909436225891, "learning_rate": 5.990187321756684e-06, "loss": 0.2938, "step": 34006 }, { "epoch": 0.6313608566665305, "grad_norm": 0.26916155219078064, "learning_rate": 5.989118742979303e-06, "loss": 0.2704, "step": 34008 }, { "epoch": 0.6313979868039492, "grad_norm": 0.39649736881256104, "learning_rate": 5.988050218778002e-06, "loss": 0.2307, "step": 34010 }, { "epoch": 0.6314351169413678, "grad_norm": 0.5152195692062378, "learning_rate": 5.986981749167323e-06, "loss": 0.2929, "step": 34012 }, { "epoch": 0.6314722470787865, "grad_norm": 0.496154248714447, "learning_rate": 5.985913334161798e-06, "loss": 0.2532, "step": 34014 }, { "epoch": 0.631509377216205, "grad_norm": 0.39036989212036133, "learning_rate": 5.98484497377597e-06, "loss": 0.2554, "step": 34016 }, { "epoch": 0.6315465073536237, "grad_norm": 0.20826849341392517, "learning_rate": 5.983776668024372e-06, "loss": 0.4023, "step": 34018 }, { "epoch": 0.6315836374910424, "grad_norm": 0.3139849305152893, "learning_rate": 5.9827084169215485e-06, "loss": 0.3754, "step": 34020 }, { "epoch": 0.631620767628461, "grad_norm": 0.605755627155304, "learning_rate": 5.981640220482028e-06, "loss": 0.1711, "step": 34022 }, { "epoch": 0.6316578977658797, "grad_norm": 0.2503967583179474, "learning_rate": 5.980572078720346e-06, "loss": 0.2507, "step": 34024 }, { "epoch": 0.6316950279032982, "grad_norm": 0.40294012427330017, "learning_rate": 5.979503991651043e-06, "loss": 0.503, "step": 34026 }, { "epoch": 0.6317321580407169, "grad_norm": 0.274328351020813, "learning_rate": 5.978435959288645e-06, "loss": 0.2053, "step": 34028 }, { "epoch": 0.6317692881781356, "grad_norm": 0.46464401483535767, "learning_rate": 5.977367981647688e-06, "loss": 0.3877, "step": 34030 }, { "epoch": 0.6318064183155542, "grad_norm": 0.4474378228187561, "learning_rate": 5.976300058742704e-06, "loss": 0.2505, "step": 34032 }, { "epoch": 0.6318435484529729, "grad_norm": 0.39054033160209656, "learning_rate": 5.975232190588223e-06, "loss": 0.3399, "step": 34034 }, { "epoch": 0.6318806785903914, "grad_norm": 0.33258041739463806, "learning_rate": 5.974164377198774e-06, "loss": 0.1325, "step": 34036 }, { "epoch": 0.6319178087278101, "grad_norm": 0.4047335386276245, "learning_rate": 5.973096618588896e-06, "loss": 0.2571, "step": 34038 }, { "epoch": 0.6319549388652287, "grad_norm": 0.32413750886917114, "learning_rate": 5.972028914773106e-06, "loss": 0.3846, "step": 34040 }, { "epoch": 0.6319920690026474, "grad_norm": 0.27093204855918884, "learning_rate": 5.97096126576594e-06, "loss": 0.254, "step": 34042 }, { "epoch": 0.6320291991400661, "grad_norm": 0.5951005220413208, "learning_rate": 5.969893671581919e-06, "loss": 0.2038, "step": 34044 }, { "epoch": 0.6320663292774846, "grad_norm": 0.4287288188934326, "learning_rate": 5.968826132235574e-06, "loss": 0.2659, "step": 34046 }, { "epoch": 0.6321034594149033, "grad_norm": 0.4471907615661621, "learning_rate": 5.967758647741432e-06, "loss": 0.4219, "step": 34048 }, { "epoch": 0.6321405895523219, "grad_norm": 0.48721233010292053, "learning_rate": 5.96669121811402e-06, "loss": 0.4639, "step": 34050 }, { "epoch": 0.6321777196897406, "grad_norm": 0.5401844382286072, "learning_rate": 5.965623843367855e-06, "loss": 0.208, "step": 34052 }, { "epoch": 0.6322148498271593, "grad_norm": 0.3319588005542755, "learning_rate": 5.964556523517467e-06, "loss": 0.3834, "step": 34054 }, { "epoch": 0.6322519799645778, "grad_norm": 0.4348486065864563, "learning_rate": 5.963489258577376e-06, "loss": 0.243, "step": 34056 }, { "epoch": 0.6322891101019965, "grad_norm": 0.4106428027153015, "learning_rate": 5.962422048562106e-06, "loss": 0.3039, "step": 34058 }, { "epoch": 0.6323262402394151, "grad_norm": 0.21634675562381744, "learning_rate": 5.961354893486176e-06, "loss": 0.1485, "step": 34060 }, { "epoch": 0.6323633703768338, "grad_norm": 0.48626741766929626, "learning_rate": 5.960287793364112e-06, "loss": 0.3464, "step": 34062 }, { "epoch": 0.6324005005142525, "grad_norm": 0.5003921985626221, "learning_rate": 5.95922074821043e-06, "loss": 0.3613, "step": 34064 }, { "epoch": 0.632437630651671, "grad_norm": 0.7658032178878784, "learning_rate": 5.958153758039651e-06, "loss": 0.3402, "step": 34066 }, { "epoch": 0.6324747607890897, "grad_norm": 0.4114118814468384, "learning_rate": 5.95708682286629e-06, "loss": 0.1472, "step": 34068 }, { "epoch": 0.6325118909265083, "grad_norm": 0.3192443549633026, "learning_rate": 5.9560199427048686e-06, "loss": 0.4329, "step": 34070 }, { "epoch": 0.632549021063927, "grad_norm": 0.5751197934150696, "learning_rate": 5.954953117569904e-06, "loss": 0.1912, "step": 34072 }, { "epoch": 0.6325861512013456, "grad_norm": 0.6894327402114868, "learning_rate": 5.9538863474759076e-06, "loss": 0.1483, "step": 34074 }, { "epoch": 0.6326232813387642, "grad_norm": 0.3841079771518707, "learning_rate": 5.9528196324374045e-06, "loss": 0.1184, "step": 34076 }, { "epoch": 0.6326604114761829, "grad_norm": 0.49154019355773926, "learning_rate": 5.951752972468898e-06, "loss": 0.1801, "step": 34078 }, { "epoch": 0.6326975416136015, "grad_norm": 0.41750723123550415, "learning_rate": 5.950686367584909e-06, "loss": 0.5239, "step": 34080 }, { "epoch": 0.6327346717510202, "grad_norm": 0.47046223282814026, "learning_rate": 5.949619817799949e-06, "loss": 0.3747, "step": 34082 }, { "epoch": 0.6327718018884387, "grad_norm": 0.3947194516658783, "learning_rate": 5.948553323128533e-06, "loss": 0.3942, "step": 34084 }, { "epoch": 0.6328089320258574, "grad_norm": 0.7540957927703857, "learning_rate": 5.947486883585169e-06, "loss": 0.2485, "step": 34086 }, { "epoch": 0.6328460621632761, "grad_norm": 0.7194357514381409, "learning_rate": 5.946420499184373e-06, "loss": 0.1445, "step": 34088 }, { "epoch": 0.6328831923006947, "grad_norm": 0.5120323896408081, "learning_rate": 5.9453541699406495e-06, "loss": 0.2514, "step": 34090 }, { "epoch": 0.6329203224381134, "grad_norm": 0.26570555567741394, "learning_rate": 5.944287895868509e-06, "loss": 0.3247, "step": 34092 }, { "epoch": 0.6329574525755319, "grad_norm": 0.4399605393409729, "learning_rate": 5.943221676982462e-06, "loss": 0.2062, "step": 34094 }, { "epoch": 0.6329945827129506, "grad_norm": 0.5909283757209778, "learning_rate": 5.94215551329702e-06, "loss": 0.2199, "step": 34096 }, { "epoch": 0.6330317128503693, "grad_norm": 0.5592047572135925, "learning_rate": 5.941089404826683e-06, "loss": 0.2705, "step": 34098 }, { "epoch": 0.6330688429877879, "grad_norm": 0.31185683608055115, "learning_rate": 5.940023351585968e-06, "loss": 0.1997, "step": 34100 }, { "epoch": 0.6331059731252066, "grad_norm": 0.5447320342063904, "learning_rate": 5.938957353589367e-06, "loss": 0.2916, "step": 34102 }, { "epoch": 0.6331431032626251, "grad_norm": 0.49067893624305725, "learning_rate": 5.9378914108513955e-06, "loss": 0.2305, "step": 34104 }, { "epoch": 0.6331802334000438, "grad_norm": 0.3497582674026489, "learning_rate": 5.936825523386554e-06, "loss": 0.3374, "step": 34106 }, { "epoch": 0.6332173635374625, "grad_norm": 0.412686824798584, "learning_rate": 5.9357596912093454e-06, "loss": 0.2023, "step": 34108 }, { "epoch": 0.6332544936748811, "grad_norm": 0.44322842359542847, "learning_rate": 5.934693914334273e-06, "loss": 0.3528, "step": 34110 }, { "epoch": 0.6332916238122998, "grad_norm": 0.3058278262615204, "learning_rate": 5.93362819277584e-06, "loss": 0.0725, "step": 34112 }, { "epoch": 0.6333287539497183, "grad_norm": 0.27452242374420166, "learning_rate": 5.932562526548551e-06, "loss": 0.261, "step": 34114 }, { "epoch": 0.633365884087137, "grad_norm": 0.5095474720001221, "learning_rate": 5.9314969156668985e-06, "loss": 0.2483, "step": 34116 }, { "epoch": 0.6334030142245557, "grad_norm": 0.5862179398536682, "learning_rate": 5.930431360145389e-06, "loss": 0.1846, "step": 34118 }, { "epoch": 0.6334401443619743, "grad_norm": 0.44797593355178833, "learning_rate": 5.929365859998516e-06, "loss": 0.1571, "step": 34120 }, { "epoch": 0.633477274499393, "grad_norm": 0.5446195006370544, "learning_rate": 5.928300415240782e-06, "loss": 0.2858, "step": 34122 }, { "epoch": 0.6335144046368115, "grad_norm": 0.31766778230667114, "learning_rate": 5.927235025886682e-06, "loss": 0.2677, "step": 34124 }, { "epoch": 0.6335515347742302, "grad_norm": 0.3274802565574646, "learning_rate": 5.926169691950719e-06, "loss": 0.244, "step": 34126 }, { "epoch": 0.6335886649116489, "grad_norm": 0.38085150718688965, "learning_rate": 5.92510441344738e-06, "loss": 0.3819, "step": 34128 }, { "epoch": 0.6336257950490675, "grad_norm": 0.38478055596351624, "learning_rate": 5.9240391903911645e-06, "loss": 0.2507, "step": 34130 }, { "epoch": 0.6336629251864861, "grad_norm": 0.30696019530296326, "learning_rate": 5.922974022796565e-06, "loss": 0.3612, "step": 34132 }, { "epoch": 0.6337000553239047, "grad_norm": 0.36365044116973877, "learning_rate": 5.921908910678077e-06, "loss": 0.5358, "step": 34134 }, { "epoch": 0.6337371854613234, "grad_norm": 0.3938376009464264, "learning_rate": 5.920843854050195e-06, "loss": 0.5446, "step": 34136 }, { "epoch": 0.633774315598742, "grad_norm": 0.4423855245113373, "learning_rate": 5.919778852927412e-06, "loss": 0.1972, "step": 34138 }, { "epoch": 0.6338114457361607, "grad_norm": 0.25173941254615784, "learning_rate": 5.918713907324216e-06, "loss": 0.1984, "step": 34140 }, { "epoch": 0.6338485758735793, "grad_norm": 0.5239980220794678, "learning_rate": 5.917649017255096e-06, "loss": 0.4124, "step": 34142 }, { "epoch": 0.6338857060109979, "grad_norm": 0.2828787565231323, "learning_rate": 5.916584182734546e-06, "loss": 0.2561, "step": 34144 }, { "epoch": 0.6339228361484166, "grad_norm": 0.4133024513721466, "learning_rate": 5.9155194037770525e-06, "loss": 0.3307, "step": 34146 }, { "epoch": 0.6339599662858352, "grad_norm": 1.0381852388381958, "learning_rate": 5.914454680397109e-06, "loss": 0.2988, "step": 34148 }, { "epoch": 0.6339970964232539, "grad_norm": 0.42105981707572937, "learning_rate": 5.913390012609197e-06, "loss": 0.1443, "step": 34150 }, { "epoch": 0.6340342265606725, "grad_norm": 0.5514183044433594, "learning_rate": 5.912325400427811e-06, "loss": 0.1539, "step": 34152 }, { "epoch": 0.6340713566980911, "grad_norm": 0.36305615305900574, "learning_rate": 5.911260843867428e-06, "loss": 0.3981, "step": 34154 }, { "epoch": 0.6341084868355098, "grad_norm": 0.43454042077064514, "learning_rate": 5.910196342942536e-06, "loss": 0.3702, "step": 34156 }, { "epoch": 0.6341456169729284, "grad_norm": 0.4109431505203247, "learning_rate": 5.909131897667626e-06, "loss": 0.2979, "step": 34158 }, { "epoch": 0.6341827471103471, "grad_norm": 0.4199132025241852, "learning_rate": 5.9080675080571736e-06, "loss": 0.2667, "step": 34160 }, { "epoch": 0.6342198772477657, "grad_norm": 0.40708449482917786, "learning_rate": 5.907003174125665e-06, "loss": 0.3371, "step": 34162 }, { "epoch": 0.6342570073851843, "grad_norm": 0.3604401648044586, "learning_rate": 5.905938895887589e-06, "loss": 0.295, "step": 34164 }, { "epoch": 0.634294137522603, "grad_norm": 0.2783866822719574, "learning_rate": 5.904874673357417e-06, "loss": 0.2155, "step": 34166 }, { "epoch": 0.6343312676600216, "grad_norm": 0.6536400318145752, "learning_rate": 5.9038105065496345e-06, "loss": 0.2037, "step": 34168 }, { "epoch": 0.6343683977974403, "grad_norm": 0.24478089809417725, "learning_rate": 5.902746395478722e-06, "loss": 0.4082, "step": 34170 }, { "epoch": 0.6344055279348589, "grad_norm": 0.6131709814071655, "learning_rate": 5.901682340159158e-06, "loss": 0.2629, "step": 34172 }, { "epoch": 0.6344426580722775, "grad_norm": 0.44498828053474426, "learning_rate": 5.900618340605422e-06, "loss": 0.5501, "step": 34174 }, { "epoch": 0.6344797882096962, "grad_norm": 0.3928348124027252, "learning_rate": 5.8995543968319934e-06, "loss": 0.4021, "step": 34176 }, { "epoch": 0.6345169183471148, "grad_norm": 0.6353996992111206, "learning_rate": 5.898490508853344e-06, "loss": 0.2929, "step": 34178 }, { "epoch": 0.6345540484845335, "grad_norm": 0.3568483889102936, "learning_rate": 5.897426676683955e-06, "loss": 0.2086, "step": 34180 }, { "epoch": 0.6345911786219521, "grad_norm": 0.34923484921455383, "learning_rate": 5.8963629003382995e-06, "loss": 0.3246, "step": 34182 }, { "epoch": 0.6346283087593707, "grad_norm": 0.5015848875045776, "learning_rate": 5.895299179830853e-06, "loss": 0.4258, "step": 34184 }, { "epoch": 0.6346654388967894, "grad_norm": 0.5905255079269409, "learning_rate": 5.8942355151760876e-06, "loss": 0.2958, "step": 34186 }, { "epoch": 0.634702569034208, "grad_norm": 0.3284199833869934, "learning_rate": 5.893171906388482e-06, "loss": 0.2813, "step": 34188 }, { "epoch": 0.6347396991716266, "grad_norm": 0.39212074875831604, "learning_rate": 5.892108353482506e-06, "loss": 0.3243, "step": 34190 }, { "epoch": 0.6347768293090452, "grad_norm": 0.4372919499874115, "learning_rate": 5.89104485647263e-06, "loss": 0.2959, "step": 34192 }, { "epoch": 0.6348139594464639, "grad_norm": 0.6657776832580566, "learning_rate": 5.889981415373325e-06, "loss": 0.3296, "step": 34194 }, { "epoch": 0.6348510895838826, "grad_norm": 0.658766508102417, "learning_rate": 5.88891803019906e-06, "loss": 0.3948, "step": 34196 }, { "epoch": 0.6348882197213012, "grad_norm": 0.24199338257312775, "learning_rate": 5.887854700964308e-06, "loss": 0.152, "step": 34198 }, { "epoch": 0.6349253498587198, "grad_norm": 0.4012165367603302, "learning_rate": 5.8867914276835366e-06, "loss": 0.3221, "step": 34200 }, { "epoch": 0.6349624799961384, "grad_norm": 0.26037243008613586, "learning_rate": 5.8857282103712155e-06, "loss": 0.177, "step": 34202 }, { "epoch": 0.6349996101335571, "grad_norm": 0.2393687516450882, "learning_rate": 5.884665049041809e-06, "loss": 0.2401, "step": 34204 }, { "epoch": 0.6350367402709758, "grad_norm": 0.5441455841064453, "learning_rate": 5.883601943709781e-06, "loss": 0.2458, "step": 34206 }, { "epoch": 0.6350738704083944, "grad_norm": 0.40732839703559875, "learning_rate": 5.882538894389602e-06, "loss": 0.3388, "step": 34208 }, { "epoch": 0.635111000545813, "grad_norm": 0.3955431282520294, "learning_rate": 5.881475901095734e-06, "loss": 0.256, "step": 34210 }, { "epoch": 0.6351481306832316, "grad_norm": 0.48650291562080383, "learning_rate": 5.880412963842646e-06, "loss": 0.2512, "step": 34212 }, { "epoch": 0.6351852608206503, "grad_norm": 1.178161859512329, "learning_rate": 5.8793500826448e-06, "loss": 0.3336, "step": 34214 }, { "epoch": 0.635222390958069, "grad_norm": 0.26588088274002075, "learning_rate": 5.878287257516653e-06, "loss": 0.2701, "step": 34216 }, { "epoch": 0.6352595210954876, "grad_norm": 0.3551124930381775, "learning_rate": 5.877224488472668e-06, "loss": 0.187, "step": 34218 }, { "epoch": 0.6352966512329062, "grad_norm": 0.396115779876709, "learning_rate": 5.8761617755273116e-06, "loss": 0.1815, "step": 34220 }, { "epoch": 0.6353337813703248, "grad_norm": 0.25871387124061584, "learning_rate": 5.875099118695042e-06, "loss": 0.1591, "step": 34222 }, { "epoch": 0.6353709115077435, "grad_norm": 0.2751860022544861, "learning_rate": 5.874036517990315e-06, "loss": 0.2249, "step": 34224 }, { "epoch": 0.6354080416451622, "grad_norm": 0.3980858623981476, "learning_rate": 5.872973973427592e-06, "loss": 0.5431, "step": 34226 }, { "epoch": 0.6354451717825808, "grad_norm": 0.47329890727996826, "learning_rate": 5.8719114850213364e-06, "loss": 0.3597, "step": 34228 }, { "epoch": 0.6354823019199994, "grad_norm": 0.3624727427959442, "learning_rate": 5.870849052785996e-06, "loss": 0.2387, "step": 34230 }, { "epoch": 0.635519432057418, "grad_norm": 0.2326180636882782, "learning_rate": 5.869786676736032e-06, "loss": 0.2911, "step": 34232 }, { "epoch": 0.6355565621948367, "grad_norm": 0.3721226453781128, "learning_rate": 5.868724356885902e-06, "loss": 0.2463, "step": 34234 }, { "epoch": 0.6355936923322553, "grad_norm": 0.38494521379470825, "learning_rate": 5.867662093250057e-06, "loss": 0.3036, "step": 34236 }, { "epoch": 0.635630822469674, "grad_norm": 0.43942970037460327, "learning_rate": 5.866599885842953e-06, "loss": 0.1773, "step": 34238 }, { "epoch": 0.6356679526070926, "grad_norm": 0.3306193947792053, "learning_rate": 5.8655377346790476e-06, "loss": 0.2219, "step": 34240 }, { "epoch": 0.6357050827445112, "grad_norm": 0.5862135887145996, "learning_rate": 5.864475639772785e-06, "loss": 0.247, "step": 34242 }, { "epoch": 0.6357422128819299, "grad_norm": 0.5253994464874268, "learning_rate": 5.863413601138625e-06, "loss": 0.2449, "step": 34244 }, { "epoch": 0.6357793430193485, "grad_norm": 0.39305558800697327, "learning_rate": 5.8623516187910115e-06, "loss": 0.282, "step": 34246 }, { "epoch": 0.6358164731567671, "grad_norm": 0.58650803565979, "learning_rate": 5.861289692744401e-06, "loss": 0.4139, "step": 34248 }, { "epoch": 0.6358536032941858, "grad_norm": 0.34237655997276306, "learning_rate": 5.860227823013238e-06, "loss": 0.2476, "step": 34250 }, { "epoch": 0.6358907334316044, "grad_norm": 0.5999776124954224, "learning_rate": 5.859166009611981e-06, "loss": 0.2873, "step": 34252 }, { "epoch": 0.6359278635690231, "grad_norm": 0.3404444456100464, "learning_rate": 5.858104252555065e-06, "loss": 0.523, "step": 34254 }, { "epoch": 0.6359649937064417, "grad_norm": 0.29434454441070557, "learning_rate": 5.857042551856947e-06, "loss": 0.2707, "step": 34256 }, { "epoch": 0.6360021238438603, "grad_norm": 0.3198910355567932, "learning_rate": 5.855980907532069e-06, "loss": 0.3045, "step": 34258 }, { "epoch": 0.636039253981279, "grad_norm": 0.27349331974983215, "learning_rate": 5.854919319594877e-06, "loss": 0.2615, "step": 34260 }, { "epoch": 0.6360763841186976, "grad_norm": 0.25971391797065735, "learning_rate": 5.853857788059818e-06, "loss": 0.1395, "step": 34262 }, { "epoch": 0.6361135142561163, "grad_norm": 0.5142319202423096, "learning_rate": 5.852796312941338e-06, "loss": 0.2637, "step": 34264 }, { "epoch": 0.6361506443935349, "grad_norm": 1.3276456594467163, "learning_rate": 5.851734894253878e-06, "loss": 0.39, "step": 34266 }, { "epoch": 0.6361877745309535, "grad_norm": 0.2671741545200348, "learning_rate": 5.850673532011877e-06, "loss": 0.2268, "step": 34268 }, { "epoch": 0.6362249046683722, "grad_norm": 0.38655802607536316, "learning_rate": 5.84961222622978e-06, "loss": 0.3734, "step": 34270 }, { "epoch": 0.6362620348057908, "grad_norm": 0.3436509966850281, "learning_rate": 5.8485509769220295e-06, "loss": 0.2277, "step": 34272 }, { "epoch": 0.6362991649432095, "grad_norm": 0.49369919300079346, "learning_rate": 5.847489784103067e-06, "loss": 0.122, "step": 34274 }, { "epoch": 0.6363362950806281, "grad_norm": 0.2864399552345276, "learning_rate": 5.846428647787329e-06, "loss": 0.1757, "step": 34276 }, { "epoch": 0.6363734252180467, "grad_norm": 0.28743574023246765, "learning_rate": 5.84536756798926e-06, "loss": 0.2245, "step": 34278 }, { "epoch": 0.6364105553554654, "grad_norm": 0.262789785861969, "learning_rate": 5.844306544723288e-06, "loss": 0.1327, "step": 34280 }, { "epoch": 0.636447685492884, "grad_norm": 0.3452463448047638, "learning_rate": 5.8432455780038576e-06, "loss": 0.3296, "step": 34282 }, { "epoch": 0.6364848156303027, "grad_norm": 0.4265664517879486, "learning_rate": 5.842184667845403e-06, "loss": 0.3715, "step": 34284 }, { "epoch": 0.6365219457677213, "grad_norm": 0.44878312945365906, "learning_rate": 5.8411238142623634e-06, "loss": 0.2373, "step": 34286 }, { "epoch": 0.6365590759051399, "grad_norm": 0.5777428150177002, "learning_rate": 5.840063017269171e-06, "loss": 0.3037, "step": 34288 }, { "epoch": 0.6365962060425585, "grad_norm": 0.4439290761947632, "learning_rate": 5.839002276880263e-06, "loss": 0.5705, "step": 34290 }, { "epoch": 0.6366333361799772, "grad_norm": 0.2997719645500183, "learning_rate": 5.837941593110066e-06, "loss": 0.2721, "step": 34292 }, { "epoch": 0.6366704663173959, "grad_norm": 0.4777323305606842, "learning_rate": 5.8368809659730175e-06, "loss": 0.1673, "step": 34294 }, { "epoch": 0.6367075964548145, "grad_norm": 0.5824756622314453, "learning_rate": 5.835820395483549e-06, "loss": 0.2587, "step": 34296 }, { "epoch": 0.6367447265922331, "grad_norm": 0.42135584354400635, "learning_rate": 5.8347598816560915e-06, "loss": 0.2556, "step": 34298 }, { "epoch": 0.6367818567296517, "grad_norm": 0.43942365050315857, "learning_rate": 5.833699424505081e-06, "loss": 0.474, "step": 34300 }, { "epoch": 0.6368189868670704, "grad_norm": 0.33204424381256104, "learning_rate": 5.832639024044937e-06, "loss": 0.1395, "step": 34302 }, { "epoch": 0.6368561170044891, "grad_norm": 0.4456065595149994, "learning_rate": 5.831578680290096e-06, "loss": 0.305, "step": 34304 }, { "epoch": 0.6368932471419076, "grad_norm": 0.3564998507499695, "learning_rate": 5.83051839325498e-06, "loss": 0.4715, "step": 34306 }, { "epoch": 0.6369303772793263, "grad_norm": 0.509390652179718, "learning_rate": 5.82945816295402e-06, "loss": 0.2404, "step": 34308 }, { "epoch": 0.6369675074167449, "grad_norm": 0.3275364637374878, "learning_rate": 5.828397989401644e-06, "loss": 0.3322, "step": 34310 }, { "epoch": 0.6370046375541636, "grad_norm": 0.316283255815506, "learning_rate": 5.827337872612273e-06, "loss": 0.3737, "step": 34312 }, { "epoch": 0.6370417676915823, "grad_norm": 0.23323306441307068, "learning_rate": 5.826277812600336e-06, "loss": 0.0852, "step": 34314 }, { "epoch": 0.6370788978290008, "grad_norm": 0.46385693550109863, "learning_rate": 5.825217809380261e-06, "loss": 0.2175, "step": 34316 }, { "epoch": 0.6371160279664195, "grad_norm": 0.39158040285110474, "learning_rate": 5.824157862966462e-06, "loss": 0.2945, "step": 34318 }, { "epoch": 0.6371531581038381, "grad_norm": 0.34611204266548157, "learning_rate": 5.823097973373366e-06, "loss": 0.2303, "step": 34320 }, { "epoch": 0.6371902882412568, "grad_norm": 0.7379342913627625, "learning_rate": 5.8220381406154e-06, "loss": 0.2894, "step": 34322 }, { "epoch": 0.6372274183786755, "grad_norm": 0.3979783356189728, "learning_rate": 5.8209783647069774e-06, "loss": 0.2937, "step": 34324 }, { "epoch": 0.637264548516094, "grad_norm": 0.3409028947353363, "learning_rate": 5.819918645662519e-06, "loss": 0.2459, "step": 34326 }, { "epoch": 0.6373016786535127, "grad_norm": 0.301815390586853, "learning_rate": 5.818858983496454e-06, "loss": 0.2811, "step": 34328 }, { "epoch": 0.6373388087909313, "grad_norm": 0.29546594619750977, "learning_rate": 5.817799378223188e-06, "loss": 0.3983, "step": 34330 }, { "epoch": 0.63737593892835, "grad_norm": 0.39727792143821716, "learning_rate": 5.816739829857146e-06, "loss": 0.3101, "step": 34332 }, { "epoch": 0.6374130690657687, "grad_norm": 0.5162875652313232, "learning_rate": 5.8156803384127444e-06, "loss": 0.3013, "step": 34334 }, { "epoch": 0.6374501992031872, "grad_norm": 0.33304065465927124, "learning_rate": 5.8146209039044e-06, "loss": 0.2581, "step": 34336 }, { "epoch": 0.6374873293406059, "grad_norm": 0.4690551459789276, "learning_rate": 5.81356152634653e-06, "loss": 0.2116, "step": 34338 }, { "epoch": 0.6375244594780245, "grad_norm": 0.4315544366836548, "learning_rate": 5.812502205753549e-06, "loss": 0.2398, "step": 34340 }, { "epoch": 0.6375615896154432, "grad_norm": 0.23634302616119385, "learning_rate": 5.811442942139868e-06, "loss": 0.1212, "step": 34342 }, { "epoch": 0.6375987197528618, "grad_norm": 0.3305998742580414, "learning_rate": 5.810383735519902e-06, "loss": 0.1571, "step": 34344 }, { "epoch": 0.6376358498902804, "grad_norm": 0.39975443482398987, "learning_rate": 5.809324585908067e-06, "loss": 0.1924, "step": 34346 }, { "epoch": 0.6376729800276991, "grad_norm": 0.393121600151062, "learning_rate": 5.80826549331877e-06, "loss": 0.3276, "step": 34348 }, { "epoch": 0.6377101101651177, "grad_norm": 0.3924253284931183, "learning_rate": 5.807206457766421e-06, "loss": 0.3868, "step": 34350 }, { "epoch": 0.6377472403025364, "grad_norm": 0.4458809494972229, "learning_rate": 5.806147479265436e-06, "loss": 0.1365, "step": 34352 }, { "epoch": 0.637784370439955, "grad_norm": 0.21137550473213196, "learning_rate": 5.805088557830224e-06, "loss": 0.273, "step": 34354 }, { "epoch": 0.6378215005773736, "grad_norm": 0.4154908359050751, "learning_rate": 5.804029693475188e-06, "loss": 0.3271, "step": 34356 }, { "epoch": 0.6378586307147923, "grad_norm": 0.34669429063796997, "learning_rate": 5.8029708862147404e-06, "loss": 0.2253, "step": 34358 }, { "epoch": 0.6378957608522109, "grad_norm": 0.3073492646217346, "learning_rate": 5.8019121360632855e-06, "loss": 0.316, "step": 34360 }, { "epoch": 0.6379328909896296, "grad_norm": 0.41762781143188477, "learning_rate": 5.800853443035234e-06, "loss": 0.1745, "step": 34362 }, { "epoch": 0.6379700211270481, "grad_norm": 0.41482043266296387, "learning_rate": 5.799794807144992e-06, "loss": 0.1274, "step": 34364 }, { "epoch": 0.6380071512644668, "grad_norm": 0.4193533658981323, "learning_rate": 5.7987362284069584e-06, "loss": 0.3086, "step": 34366 }, { "epoch": 0.6380442814018855, "grad_norm": 0.4279491603374481, "learning_rate": 5.797677706835543e-06, "loss": 0.2947, "step": 34368 }, { "epoch": 0.6380814115393041, "grad_norm": 0.33530905842781067, "learning_rate": 5.796619242445143e-06, "loss": 0.303, "step": 34370 }, { "epoch": 0.6381185416767228, "grad_norm": 0.5515667200088501, "learning_rate": 5.795560835250165e-06, "loss": 0.0954, "step": 34372 }, { "epoch": 0.6381556718141413, "grad_norm": 0.35331907868385315, "learning_rate": 5.79450248526501e-06, "loss": 0.1266, "step": 34374 }, { "epoch": 0.63819280195156, "grad_norm": 2.5497946739196777, "learning_rate": 5.79344419250408e-06, "loss": 0.1536, "step": 34376 }, { "epoch": 0.6382299320889787, "grad_norm": 0.41464558243751526, "learning_rate": 5.792385956981777e-06, "loss": 0.2204, "step": 34378 }, { "epoch": 0.6382670622263973, "grad_norm": 0.5481444597244263, "learning_rate": 5.791327778712496e-06, "loss": 0.3593, "step": 34380 }, { "epoch": 0.638304192363816, "grad_norm": 0.1794361174106598, "learning_rate": 5.790269657710635e-06, "loss": 0.1642, "step": 34382 }, { "epoch": 0.6383413225012345, "grad_norm": 0.2744278013706207, "learning_rate": 5.789211593990596e-06, "loss": 0.19, "step": 34384 }, { "epoch": 0.6383784526386532, "grad_norm": 0.35223159193992615, "learning_rate": 5.788153587566778e-06, "loss": 0.1695, "step": 34386 }, { "epoch": 0.6384155827760718, "grad_norm": 0.4164327085018158, "learning_rate": 5.7870956384535706e-06, "loss": 0.317, "step": 34388 }, { "epoch": 0.6384527129134905, "grad_norm": 0.2784217894077301, "learning_rate": 5.786037746665375e-06, "loss": 0.1223, "step": 34390 }, { "epoch": 0.6384898430509092, "grad_norm": 0.42970484495162964, "learning_rate": 5.784979912216579e-06, "loss": 0.3063, "step": 34392 }, { "epoch": 0.6385269731883277, "grad_norm": 0.4044070541858673, "learning_rate": 5.783922135121582e-06, "loss": 0.2385, "step": 34394 }, { "epoch": 0.6385641033257464, "grad_norm": 0.32016560435295105, "learning_rate": 5.782864415394778e-06, "loss": 0.2341, "step": 34396 }, { "epoch": 0.638601233463165, "grad_norm": 0.38111764192581177, "learning_rate": 5.781806753050555e-06, "loss": 0.288, "step": 34398 }, { "epoch": 0.6386383636005837, "grad_norm": 0.17499271035194397, "learning_rate": 5.780749148103309e-06, "loss": 0.3033, "step": 34400 }, { "epoch": 0.6386754937380024, "grad_norm": 0.7759330868721008, "learning_rate": 5.7796916005674265e-06, "loss": 0.2888, "step": 34402 }, { "epoch": 0.6387126238754209, "grad_norm": 0.3772846460342407, "learning_rate": 5.778634110457305e-06, "loss": 0.3881, "step": 34404 }, { "epoch": 0.6387497540128396, "grad_norm": 0.38556066155433655, "learning_rate": 5.777576677787325e-06, "loss": 0.2721, "step": 34406 }, { "epoch": 0.6387868841502582, "grad_norm": 0.39567604660987854, "learning_rate": 5.776519302571883e-06, "loss": 0.2709, "step": 34408 }, { "epoch": 0.6388240142876769, "grad_norm": 0.37514883279800415, "learning_rate": 5.775461984825359e-06, "loss": 0.2264, "step": 34410 }, { "epoch": 0.6388611444250956, "grad_norm": 0.39258813858032227, "learning_rate": 5.774404724562142e-06, "loss": 0.3255, "step": 34412 }, { "epoch": 0.6388982745625141, "grad_norm": 0.45563045144081116, "learning_rate": 5.77334752179662e-06, "loss": 0.1687, "step": 34414 }, { "epoch": 0.6389354046999328, "grad_norm": 0.3534729778766632, "learning_rate": 5.772290376543184e-06, "loss": 0.4204, "step": 34416 }, { "epoch": 0.6389725348373514, "grad_norm": 0.3805806338787079, "learning_rate": 5.7712332888162056e-06, "loss": 0.4312, "step": 34418 }, { "epoch": 0.6390096649747701, "grad_norm": 0.45399031043052673, "learning_rate": 5.770176258630077e-06, "loss": 0.4205, "step": 34420 }, { "epoch": 0.6390467951121888, "grad_norm": 0.3785521984100342, "learning_rate": 5.7691192859991795e-06, "loss": 0.3137, "step": 34422 }, { "epoch": 0.6390839252496073, "grad_norm": 0.3475157916545868, "learning_rate": 5.7680623709378946e-06, "loss": 0.2657, "step": 34424 }, { "epoch": 0.639121055387026, "grad_norm": 0.3515818417072296, "learning_rate": 5.767005513460606e-06, "loss": 0.3331, "step": 34426 }, { "epoch": 0.6391581855244446, "grad_norm": 0.32471948862075806, "learning_rate": 5.7659487135816975e-06, "loss": 0.353, "step": 34428 }, { "epoch": 0.6391953156618633, "grad_norm": 0.3636452555656433, "learning_rate": 5.764891971315544e-06, "loss": 0.3381, "step": 34430 }, { "epoch": 0.639232445799282, "grad_norm": 0.35272589325904846, "learning_rate": 5.76383528667652e-06, "loss": 0.4716, "step": 34432 }, { "epoch": 0.6392695759367005, "grad_norm": 0.4739702641963959, "learning_rate": 5.76277865967901e-06, "loss": 0.2831, "step": 34434 }, { "epoch": 0.6393067060741192, "grad_norm": 0.33771243691444397, "learning_rate": 5.7617220903373916e-06, "loss": 0.071, "step": 34436 }, { "epoch": 0.6393438362115378, "grad_norm": 0.3048953413963318, "learning_rate": 5.760665578666038e-06, "loss": 0.221, "step": 34438 }, { "epoch": 0.6393809663489565, "grad_norm": 0.5305869579315186, "learning_rate": 5.759609124679329e-06, "loss": 0.332, "step": 34440 }, { "epoch": 0.639418096486375, "grad_norm": 0.5023661255836487, "learning_rate": 5.758552728391642e-06, "loss": 0.1984, "step": 34442 }, { "epoch": 0.6394552266237937, "grad_norm": 0.6118751168251038, "learning_rate": 5.757496389817345e-06, "loss": 0.164, "step": 34444 }, { "epoch": 0.6394923567612124, "grad_norm": 0.27222010493278503, "learning_rate": 5.756440108970813e-06, "loss": 0.3025, "step": 34446 }, { "epoch": 0.639529486898631, "grad_norm": 0.38678622245788574, "learning_rate": 5.7553838858664206e-06, "loss": 0.2028, "step": 34448 }, { "epoch": 0.6395666170360497, "grad_norm": 0.4148137867450714, "learning_rate": 5.754327720518543e-06, "loss": 0.1497, "step": 34450 }, { "epoch": 0.6396037471734682, "grad_norm": 0.377964586019516, "learning_rate": 5.7532716129415444e-06, "loss": 0.2453, "step": 34452 }, { "epoch": 0.6396408773108869, "grad_norm": 0.3893478512763977, "learning_rate": 5.752215563149802e-06, "loss": 0.2501, "step": 34454 }, { "epoch": 0.6396780074483056, "grad_norm": 0.32430949807167053, "learning_rate": 5.751159571157679e-06, "loss": 0.3333, "step": 34456 }, { "epoch": 0.6397151375857242, "grad_norm": 0.35612526535987854, "learning_rate": 5.750103636979547e-06, "loss": 0.4336, "step": 34458 }, { "epoch": 0.6397522677231429, "grad_norm": 0.31727901101112366, "learning_rate": 5.749047760629776e-06, "loss": 0.2042, "step": 34460 }, { "epoch": 0.6397893978605614, "grad_norm": 0.37208810448646545, "learning_rate": 5.7479919421227305e-06, "loss": 0.3188, "step": 34462 }, { "epoch": 0.6398265279979801, "grad_norm": 0.5261711478233337, "learning_rate": 5.746936181472777e-06, "loss": 0.2907, "step": 34464 }, { "epoch": 0.6398636581353988, "grad_norm": 0.2947002053260803, "learning_rate": 5.745880478694289e-06, "loss": 0.3634, "step": 34466 }, { "epoch": 0.6399007882728174, "grad_norm": 0.2871014475822449, "learning_rate": 5.744824833801621e-06, "loss": 0.2795, "step": 34468 }, { "epoch": 0.6399379184102361, "grad_norm": 0.19240085780620575, "learning_rate": 5.743769246809141e-06, "loss": 0.3702, "step": 34470 }, { "epoch": 0.6399750485476546, "grad_norm": 0.4328199028968811, "learning_rate": 5.742713717731216e-06, "loss": 0.1297, "step": 34472 }, { "epoch": 0.6400121786850733, "grad_norm": 0.8787009716033936, "learning_rate": 5.741658246582201e-06, "loss": 0.1305, "step": 34474 }, { "epoch": 0.640049308822492, "grad_norm": 0.33513548970222473, "learning_rate": 5.740602833376463e-06, "loss": 0.3488, "step": 34476 }, { "epoch": 0.6400864389599106, "grad_norm": 0.305398166179657, "learning_rate": 5.739547478128362e-06, "loss": 0.3491, "step": 34478 }, { "epoch": 0.6401235690973293, "grad_norm": 0.5140475630760193, "learning_rate": 5.738492180852262e-06, "loss": 0.351, "step": 34480 }, { "epoch": 0.6401606992347478, "grad_norm": 0.3031245768070221, "learning_rate": 5.737436941562514e-06, "loss": 0.4482, "step": 34482 }, { "epoch": 0.6401978293721665, "grad_norm": 0.48454996943473816, "learning_rate": 5.736381760273482e-06, "loss": 0.1801, "step": 34484 }, { "epoch": 0.6402349595095852, "grad_norm": 0.4383023679256439, "learning_rate": 5.735326636999523e-06, "loss": 0.2544, "step": 34486 }, { "epoch": 0.6402720896470038, "grad_norm": 0.44977495074272156, "learning_rate": 5.734271571754996e-06, "loss": 0.2795, "step": 34488 }, { "epoch": 0.6403092197844225, "grad_norm": 0.3827721178531647, "learning_rate": 5.7332165645542534e-06, "loss": 0.2078, "step": 34490 }, { "epoch": 0.640346349921841, "grad_norm": 0.3157905638217926, "learning_rate": 5.732161615411657e-06, "loss": 0.3334, "step": 34492 }, { "epoch": 0.6403834800592597, "grad_norm": 0.32621458172798157, "learning_rate": 5.73110672434156e-06, "loss": 0.201, "step": 34494 }, { "epoch": 0.6404206101966783, "grad_norm": 0.271375447511673, "learning_rate": 5.730051891358307e-06, "loss": 0.3317, "step": 34496 }, { "epoch": 0.640457740334097, "grad_norm": 0.3328658938407898, "learning_rate": 5.7289971164762585e-06, "loss": 0.3082, "step": 34498 }, { "epoch": 0.6404948704715157, "grad_norm": 0.3727584183216095, "learning_rate": 5.727942399709766e-06, "loss": 0.2013, "step": 34500 }, { "epoch": 0.6405320006089342, "grad_norm": 0.6493826508522034, "learning_rate": 5.726887741073182e-06, "loss": 0.2665, "step": 34502 }, { "epoch": 0.6405691307463529, "grad_norm": 0.2889195382595062, "learning_rate": 5.725833140580859e-06, "loss": 0.2028, "step": 34504 }, { "epoch": 0.6406062608837715, "grad_norm": 0.46604329347610474, "learning_rate": 5.724778598247141e-06, "loss": 0.1538, "step": 34506 }, { "epoch": 0.6406433910211902, "grad_norm": 0.28957000374794006, "learning_rate": 5.7237241140863795e-06, "loss": 0.2749, "step": 34508 }, { "epoch": 0.6406805211586089, "grad_norm": 0.4267987906932831, "learning_rate": 5.722669688112925e-06, "loss": 0.3562, "step": 34510 }, { "epoch": 0.6407176512960274, "grad_norm": 0.43735820055007935, "learning_rate": 5.721615320341125e-06, "loss": 0.3494, "step": 34512 }, { "epoch": 0.6407547814334461, "grad_norm": 0.5622162818908691, "learning_rate": 5.720561010785327e-06, "loss": 0.361, "step": 34514 }, { "epoch": 0.6407919115708647, "grad_norm": 0.3882259428501129, "learning_rate": 5.719506759459872e-06, "loss": 0.392, "step": 34516 }, { "epoch": 0.6408290417082834, "grad_norm": 0.2676790952682495, "learning_rate": 5.718452566379114e-06, "loss": 0.1966, "step": 34518 }, { "epoch": 0.640866171845702, "grad_norm": 0.4147421717643738, "learning_rate": 5.717398431557386e-06, "loss": 0.2436, "step": 34520 }, { "epoch": 0.6409033019831206, "grad_norm": 0.7291442155838013, "learning_rate": 5.716344355009038e-06, "loss": 0.3866, "step": 34522 }, { "epoch": 0.6409404321205393, "grad_norm": 0.4977591335773468, "learning_rate": 5.715290336748413e-06, "loss": 0.2817, "step": 34524 }, { "epoch": 0.6409775622579579, "grad_norm": 0.47533005475997925, "learning_rate": 5.7142363767898515e-06, "loss": 0.1583, "step": 34526 }, { "epoch": 0.6410146923953766, "grad_norm": 0.3429177403450012, "learning_rate": 5.713182475147696e-06, "loss": 0.2045, "step": 34528 }, { "epoch": 0.6410518225327952, "grad_norm": 0.30088746547698975, "learning_rate": 5.712128631836289e-06, "loss": 0.2331, "step": 34530 }, { "epoch": 0.6410889526702138, "grad_norm": 0.4164296090602875, "learning_rate": 5.711074846869966e-06, "loss": 0.28, "step": 34532 }, { "epoch": 0.6411260828076325, "grad_norm": 0.515152633190155, "learning_rate": 5.710021120263066e-06, "loss": 0.223, "step": 34534 }, { "epoch": 0.6411632129450511, "grad_norm": 0.3355543911457062, "learning_rate": 5.708967452029933e-06, "loss": 0.2847, "step": 34536 }, { "epoch": 0.6412003430824698, "grad_norm": 0.39738187193870544, "learning_rate": 5.7079138421848955e-06, "loss": 0.393, "step": 34538 }, { "epoch": 0.6412374732198883, "grad_norm": 0.560077428817749, "learning_rate": 5.706860290742296e-06, "loss": 0.1241, "step": 34540 }, { "epoch": 0.641274603357307, "grad_norm": 0.4178966283798218, "learning_rate": 5.70580679771647e-06, "loss": 0.2877, "step": 34542 }, { "epoch": 0.6413117334947257, "grad_norm": 0.4897531270980835, "learning_rate": 5.704753363121749e-06, "loss": 0.2128, "step": 34544 }, { "epoch": 0.6413488636321443, "grad_norm": 0.3979819715023041, "learning_rate": 5.703699986972467e-06, "loss": 0.2481, "step": 34546 }, { "epoch": 0.641385993769563, "grad_norm": 0.41877618432044983, "learning_rate": 5.702646669282961e-06, "loss": 0.3074, "step": 34548 }, { "epoch": 0.6414231239069815, "grad_norm": 0.43111488223075867, "learning_rate": 5.70159341006756e-06, "loss": 0.2319, "step": 34550 }, { "epoch": 0.6414602540444002, "grad_norm": 0.2287842035293579, "learning_rate": 5.700540209340599e-06, "loss": 0.2187, "step": 34552 }, { "epoch": 0.6414973841818189, "grad_norm": 0.46635547280311584, "learning_rate": 5.699487067116411e-06, "loss": 0.2224, "step": 34554 }, { "epoch": 0.6415345143192375, "grad_norm": 0.5059830546379089, "learning_rate": 5.698433983409318e-06, "loss": 0.1089, "step": 34556 }, { "epoch": 0.6415716444566562, "grad_norm": 0.28047865629196167, "learning_rate": 5.697380958233658e-06, "loss": 0.1653, "step": 34558 }, { "epoch": 0.6416087745940747, "grad_norm": 0.33498844504356384, "learning_rate": 5.696327991603752e-06, "loss": 0.1747, "step": 34560 }, { "epoch": 0.6416459047314934, "grad_norm": 0.39726707339286804, "learning_rate": 5.695275083533931e-06, "loss": 0.3921, "step": 34562 }, { "epoch": 0.6416830348689121, "grad_norm": 0.5147075653076172, "learning_rate": 5.694222234038523e-06, "loss": 0.4023, "step": 34564 }, { "epoch": 0.6417201650063307, "grad_norm": 0.3728301227092743, "learning_rate": 5.693169443131849e-06, "loss": 0.4022, "step": 34566 }, { "epoch": 0.6417572951437494, "grad_norm": 0.4728337228298187, "learning_rate": 5.692116710828246e-06, "loss": 0.1201, "step": 34568 }, { "epoch": 0.6417944252811679, "grad_norm": 0.30708497762680054, "learning_rate": 5.691064037142027e-06, "loss": 0.2886, "step": 34570 }, { "epoch": 0.6418315554185866, "grad_norm": 0.4832508862018585, "learning_rate": 5.690011422087518e-06, "loss": 0.2247, "step": 34572 }, { "epoch": 0.6418686855560053, "grad_norm": 0.4336124062538147, "learning_rate": 5.688958865679044e-06, "loss": 0.3838, "step": 34574 }, { "epoch": 0.6419058156934239, "grad_norm": 0.43545395135879517, "learning_rate": 5.687906367930931e-06, "loss": 0.2008, "step": 34576 }, { "epoch": 0.6419429458308425, "grad_norm": 0.3618486225605011, "learning_rate": 5.686853928857492e-06, "loss": 0.1908, "step": 34578 }, { "epoch": 0.6419800759682611, "grad_norm": 0.343269407749176, "learning_rate": 5.685801548473057e-06, "loss": 0.18, "step": 34580 }, { "epoch": 0.6420172061056798, "grad_norm": 0.33832189440727234, "learning_rate": 5.684749226791935e-06, "loss": 0.2567, "step": 34582 }, { "epoch": 0.6420543362430985, "grad_norm": 0.37028977274894714, "learning_rate": 5.683696963828451e-06, "loss": 0.3512, "step": 34584 }, { "epoch": 0.6420914663805171, "grad_norm": 0.3490130603313446, "learning_rate": 5.682644759596923e-06, "loss": 0.4354, "step": 34586 }, { "epoch": 0.6421285965179357, "grad_norm": 0.4102526307106018, "learning_rate": 5.6815926141116665e-06, "loss": 0.2374, "step": 34588 }, { "epoch": 0.6421657266553543, "grad_norm": 0.3075184226036072, "learning_rate": 5.680540527386999e-06, "loss": 0.3928, "step": 34590 }, { "epoch": 0.642202856792773, "grad_norm": 0.30693307518959045, "learning_rate": 5.679488499437242e-06, "loss": 0.303, "step": 34592 }, { "epoch": 0.6422399869301916, "grad_norm": 0.28893569111824036, "learning_rate": 5.678436530276701e-06, "loss": 0.4322, "step": 34594 }, { "epoch": 0.6422771170676103, "grad_norm": 0.40623289346694946, "learning_rate": 5.677384619919693e-06, "loss": 0.3072, "step": 34596 }, { "epoch": 0.6423142472050289, "grad_norm": 0.5637116432189941, "learning_rate": 5.676332768380535e-06, "loss": 0.5271, "step": 34598 }, { "epoch": 0.6423513773424475, "grad_norm": 0.8443187475204468, "learning_rate": 5.675280975673538e-06, "loss": 0.3471, "step": 34600 }, { "epoch": 0.6423885074798662, "grad_norm": 0.6141651272773743, "learning_rate": 5.674229241813012e-06, "loss": 0.3658, "step": 34602 }, { "epoch": 0.6424256376172848, "grad_norm": 0.6757946610450745, "learning_rate": 5.673177566813266e-06, "loss": 0.4089, "step": 34604 }, { "epoch": 0.6424627677547035, "grad_norm": 0.35423508286476135, "learning_rate": 5.672125950688618e-06, "loss": 0.3595, "step": 34606 }, { "epoch": 0.6424998978921221, "grad_norm": 0.2510984241962433, "learning_rate": 5.67107439345337e-06, "loss": 0.2174, "step": 34608 }, { "epoch": 0.6425370280295407, "grad_norm": 0.43001770973205566, "learning_rate": 5.670022895121832e-06, "loss": 0.3229, "step": 34610 }, { "epoch": 0.6425741581669594, "grad_norm": 0.48932796716690063, "learning_rate": 5.668971455708311e-06, "loss": 0.4381, "step": 34612 }, { "epoch": 0.642611288304378, "grad_norm": 0.3985104262828827, "learning_rate": 5.667920075227117e-06, "loss": 0.1877, "step": 34614 }, { "epoch": 0.6426484184417967, "grad_norm": 0.34801462292671204, "learning_rate": 5.666868753692554e-06, "loss": 0.311, "step": 34616 }, { "epoch": 0.6426855485792153, "grad_norm": 0.36197179555892944, "learning_rate": 5.665817491118932e-06, "loss": 0.3285, "step": 34618 }, { "epoch": 0.6427226787166339, "grad_norm": 0.3744148313999176, "learning_rate": 5.664766287520549e-06, "loss": 0.2925, "step": 34620 }, { "epoch": 0.6427598088540526, "grad_norm": 0.38207340240478516, "learning_rate": 5.663715142911715e-06, "loss": 0.0593, "step": 34622 }, { "epoch": 0.6427969389914712, "grad_norm": 0.1998310089111328, "learning_rate": 5.662664057306724e-06, "loss": 0.2616, "step": 34624 }, { "epoch": 0.6428340691288899, "grad_norm": 0.2842331528663635, "learning_rate": 5.661613030719883e-06, "loss": 0.3475, "step": 34626 }, { "epoch": 0.6428711992663085, "grad_norm": 0.3710392117500305, "learning_rate": 5.660562063165495e-06, "loss": 0.4012, "step": 34628 }, { "epoch": 0.6429083294037271, "grad_norm": 0.3332996368408203, "learning_rate": 5.659511154657862e-06, "loss": 0.4786, "step": 34630 }, { "epoch": 0.6429454595411458, "grad_norm": 0.37276142835617065, "learning_rate": 5.6584603052112776e-06, "loss": 0.1717, "step": 34632 }, { "epoch": 0.6429825896785644, "grad_norm": 0.4062551259994507, "learning_rate": 5.657409514840044e-06, "loss": 0.1661, "step": 34634 }, { "epoch": 0.643019719815983, "grad_norm": 0.3422892391681671, "learning_rate": 5.65635878355846e-06, "loss": 0.2376, "step": 34636 }, { "epoch": 0.6430568499534017, "grad_norm": 0.3834654688835144, "learning_rate": 5.655308111380822e-06, "loss": 0.16, "step": 34638 }, { "epoch": 0.6430939800908203, "grad_norm": 0.3469104766845703, "learning_rate": 5.65425749832143e-06, "loss": 0.2378, "step": 34640 }, { "epoch": 0.643131110228239, "grad_norm": 0.39868664741516113, "learning_rate": 5.6532069443945735e-06, "loss": 0.1257, "step": 34642 }, { "epoch": 0.6431682403656576, "grad_norm": 0.4539540112018585, "learning_rate": 5.6521564496145535e-06, "loss": 0.2833, "step": 34644 }, { "epoch": 0.6432053705030762, "grad_norm": 0.4189632833003998, "learning_rate": 5.651106013995657e-06, "loss": 0.2584, "step": 34646 }, { "epoch": 0.6432425006404948, "grad_norm": 0.34033602476119995, "learning_rate": 5.650055637552181e-06, "loss": 0.4255, "step": 34648 }, { "epoch": 0.6432796307779135, "grad_norm": 0.42274802923202515, "learning_rate": 5.6490053202984185e-06, "loss": 0.4432, "step": 34650 }, { "epoch": 0.6433167609153322, "grad_norm": 0.5104333758354187, "learning_rate": 5.6479550622486616e-06, "loss": 0.3391, "step": 34652 }, { "epoch": 0.6433538910527508, "grad_norm": 0.6274007558822632, "learning_rate": 5.6469048634172e-06, "loss": 0.4583, "step": 34654 }, { "epoch": 0.6433910211901694, "grad_norm": 0.40335410833358765, "learning_rate": 5.645854723818327e-06, "loss": 0.2771, "step": 34656 }, { "epoch": 0.643428151327588, "grad_norm": 0.33272022008895874, "learning_rate": 5.644804643466326e-06, "loss": 0.2386, "step": 34658 }, { "epoch": 0.6434652814650067, "grad_norm": 0.4339945614337921, "learning_rate": 5.643754622375488e-06, "loss": 0.3071, "step": 34660 }, { "epoch": 0.6435024116024254, "grad_norm": 0.34428122639656067, "learning_rate": 5.642704660560105e-06, "loss": 0.2874, "step": 34662 }, { "epoch": 0.643539541739844, "grad_norm": 0.6480374336242676, "learning_rate": 5.6416547580344574e-06, "loss": 0.1827, "step": 34664 }, { "epoch": 0.6435766718772626, "grad_norm": 0.2720595896244049, "learning_rate": 5.640604914812833e-06, "loss": 0.1776, "step": 34666 }, { "epoch": 0.6436138020146812, "grad_norm": 0.28396087884902954, "learning_rate": 5.639555130909522e-06, "loss": 0.1213, "step": 34668 }, { "epoch": 0.6436509321520999, "grad_norm": 0.4466366171836853, "learning_rate": 5.638505406338799e-06, "loss": 0.3292, "step": 34670 }, { "epoch": 0.6436880622895186, "grad_norm": 0.31564563512802124, "learning_rate": 5.637455741114954e-06, "loss": 0.2693, "step": 34672 }, { "epoch": 0.6437251924269372, "grad_norm": 0.2586180865764618, "learning_rate": 5.63640613525227e-06, "loss": 0.0887, "step": 34674 }, { "epoch": 0.6437623225643558, "grad_norm": 0.3365249037742615, "learning_rate": 5.635356588765028e-06, "loss": 0.2423, "step": 34676 }, { "epoch": 0.6437994527017744, "grad_norm": 0.4494837820529938, "learning_rate": 5.634307101667509e-06, "loss": 0.2423, "step": 34678 }, { "epoch": 0.6438365828391931, "grad_norm": 0.34529224038124084, "learning_rate": 5.633257673973997e-06, "loss": 0.2712, "step": 34680 }, { "epoch": 0.6438737129766118, "grad_norm": 0.4213443994522095, "learning_rate": 5.632208305698765e-06, "loss": 0.2152, "step": 34682 }, { "epoch": 0.6439108431140304, "grad_norm": 0.437482088804245, "learning_rate": 5.631158996856099e-06, "loss": 0.2496, "step": 34684 }, { "epoch": 0.643947973251449, "grad_norm": 0.3434986472129822, "learning_rate": 5.630109747460267e-06, "loss": 0.2901, "step": 34686 }, { "epoch": 0.6439851033888676, "grad_norm": 0.4669639468193054, "learning_rate": 5.629060557525554e-06, "loss": 0.336, "step": 34688 }, { "epoch": 0.6440222335262863, "grad_norm": 0.2946740686893463, "learning_rate": 5.628011427066233e-06, "loss": 0.221, "step": 34690 }, { "epoch": 0.6440593636637049, "grad_norm": 0.31371450424194336, "learning_rate": 5.626962356096581e-06, "loss": 0.3982, "step": 34692 }, { "epoch": 0.6440964938011235, "grad_norm": 0.2911532521247864, "learning_rate": 5.625913344630878e-06, "loss": 0.4586, "step": 34694 }, { "epoch": 0.6441336239385422, "grad_norm": 0.35006454586982727, "learning_rate": 5.624864392683387e-06, "loss": 0.4806, "step": 34696 }, { "epoch": 0.6441707540759608, "grad_norm": 0.38948938250541687, "learning_rate": 5.6238155002683885e-06, "loss": 0.3223, "step": 34698 }, { "epoch": 0.6442078842133795, "grad_norm": 0.44755303859710693, "learning_rate": 5.62276666740015e-06, "loss": 0.2799, "step": 34700 }, { "epoch": 0.6442450143507981, "grad_norm": 0.400484174489975, "learning_rate": 5.621717894092947e-06, "loss": 0.1782, "step": 34702 }, { "epoch": 0.6442821444882167, "grad_norm": 0.6038129925727844, "learning_rate": 5.6206691803610536e-06, "loss": 0.4655, "step": 34704 }, { "epoch": 0.6443192746256354, "grad_norm": 0.3647526204586029, "learning_rate": 5.619620526218731e-06, "loss": 0.3203, "step": 34706 }, { "epoch": 0.644356404763054, "grad_norm": 0.36976975202560425, "learning_rate": 5.618571931680255e-06, "loss": 0.3028, "step": 34708 }, { "epoch": 0.6443935349004727, "grad_norm": 0.42161667346954346, "learning_rate": 5.61752339675989e-06, "loss": 0.3183, "step": 34710 }, { "epoch": 0.6444306650378913, "grad_norm": 0.3405196964740753, "learning_rate": 5.616474921471902e-06, "loss": 0.4521, "step": 34712 }, { "epoch": 0.6444677951753099, "grad_norm": 0.4106855094432831, "learning_rate": 5.61542650583056e-06, "loss": 0.3542, "step": 34714 }, { "epoch": 0.6445049253127286, "grad_norm": 0.4027274549007416, "learning_rate": 5.614378149850131e-06, "loss": 0.2645, "step": 34716 }, { "epoch": 0.6445420554501472, "grad_norm": 0.3040961027145386, "learning_rate": 5.613329853544882e-06, "loss": 0.2581, "step": 34718 }, { "epoch": 0.6445791855875659, "grad_norm": 0.2999259829521179, "learning_rate": 5.612281616929071e-06, "loss": 0.2352, "step": 34720 }, { "epoch": 0.6446163157249845, "grad_norm": 0.34112080931663513, "learning_rate": 5.611233440016964e-06, "loss": 0.2061, "step": 34722 }, { "epoch": 0.6446534458624031, "grad_norm": 0.27394694089889526, "learning_rate": 5.610185322822824e-06, "loss": 0.2321, "step": 34724 }, { "epoch": 0.6446905759998218, "grad_norm": 0.1619529277086258, "learning_rate": 5.6091372653609175e-06, "loss": 0.2491, "step": 34726 }, { "epoch": 0.6447277061372404, "grad_norm": 0.3059213161468506, "learning_rate": 5.608089267645496e-06, "loss": 0.4792, "step": 34728 }, { "epoch": 0.6447648362746591, "grad_norm": 0.3793026804924011, "learning_rate": 5.607041329690824e-06, "loss": 0.3044, "step": 34730 }, { "epoch": 0.6448019664120777, "grad_norm": 0.4118514955043793, "learning_rate": 5.605993451511166e-06, "loss": 0.3038, "step": 34732 }, { "epoch": 0.6448390965494963, "grad_norm": 0.4746505618095398, "learning_rate": 5.604945633120771e-06, "loss": 0.2842, "step": 34734 }, { "epoch": 0.644876226686915, "grad_norm": 0.3615133762359619, "learning_rate": 5.603897874533901e-06, "loss": 0.4499, "step": 34736 }, { "epoch": 0.6449133568243336, "grad_norm": 0.4404677748680115, "learning_rate": 5.6028501757648135e-06, "loss": 0.2867, "step": 34738 }, { "epoch": 0.6449504869617523, "grad_norm": 0.474431574344635, "learning_rate": 5.601802536827763e-06, "loss": 0.4728, "step": 34740 }, { "epoch": 0.6449876170991709, "grad_norm": 0.46012353897094727, "learning_rate": 5.6007549577370065e-06, "loss": 0.3034, "step": 34742 }, { "epoch": 0.6450247472365895, "grad_norm": 0.3508109152317047, "learning_rate": 5.599707438506803e-06, "loss": 0.2042, "step": 34744 }, { "epoch": 0.6450618773740081, "grad_norm": 0.3310129940509796, "learning_rate": 5.598659979151395e-06, "loss": 0.1549, "step": 34746 }, { "epoch": 0.6450990075114268, "grad_norm": 0.24820493161678314, "learning_rate": 5.597612579685046e-06, "loss": 0.1644, "step": 34748 }, { "epoch": 0.6451361376488455, "grad_norm": 0.7365579009056091, "learning_rate": 5.596565240121999e-06, "loss": 0.2377, "step": 34750 }, { "epoch": 0.645173267786264, "grad_norm": 0.3869255781173706, "learning_rate": 5.595517960476509e-06, "loss": 0.149, "step": 34752 }, { "epoch": 0.6452103979236827, "grad_norm": 0.4437241554260254, "learning_rate": 5.594470740762827e-06, "loss": 0.1664, "step": 34754 }, { "epoch": 0.6452475280611013, "grad_norm": 0.3068319261074066, "learning_rate": 5.593423580995208e-06, "loss": 0.2778, "step": 34756 }, { "epoch": 0.64528465819852, "grad_norm": 0.4013836681842804, "learning_rate": 5.592376481187888e-06, "loss": 0.4642, "step": 34758 }, { "epoch": 0.6453217883359387, "grad_norm": 0.45737430453300476, "learning_rate": 5.5913294413551225e-06, "loss": 0.2801, "step": 34760 }, { "epoch": 0.6453589184733572, "grad_norm": 0.42493894696235657, "learning_rate": 5.5902824615111584e-06, "loss": 0.2295, "step": 34762 }, { "epoch": 0.6453960486107759, "grad_norm": 0.47333043813705444, "learning_rate": 5.58923554167024e-06, "loss": 0.2335, "step": 34764 }, { "epoch": 0.6454331787481945, "grad_norm": 0.41420790553092957, "learning_rate": 5.588188681846615e-06, "loss": 0.4207, "step": 34766 }, { "epoch": 0.6454703088856132, "grad_norm": 0.3941937983036041, "learning_rate": 5.587141882054532e-06, "loss": 0.2703, "step": 34768 }, { "epoch": 0.6455074390230319, "grad_norm": 0.36116212606430054, "learning_rate": 5.586095142308229e-06, "loss": 0.3403, "step": 34770 }, { "epoch": 0.6455445691604504, "grad_norm": 0.30828458070755005, "learning_rate": 5.5850484626219455e-06, "loss": 0.4133, "step": 34772 }, { "epoch": 0.6455816992978691, "grad_norm": 0.34383338689804077, "learning_rate": 5.58400184300993e-06, "loss": 0.2437, "step": 34774 }, { "epoch": 0.6456188294352877, "grad_norm": 0.3185109794139862, "learning_rate": 5.58295528348642e-06, "loss": 0.2417, "step": 34776 }, { "epoch": 0.6456559595727064, "grad_norm": 0.3361605107784271, "learning_rate": 5.581908784065658e-06, "loss": 0.4926, "step": 34778 }, { "epoch": 0.6456930897101251, "grad_norm": 0.37308335304260254, "learning_rate": 5.580862344761885e-06, "loss": 0.3176, "step": 34780 }, { "epoch": 0.6457302198475436, "grad_norm": 0.4026464521884918, "learning_rate": 5.5798159655893415e-06, "loss": 0.3016, "step": 34782 }, { "epoch": 0.6457673499849623, "grad_norm": 0.47163859009742737, "learning_rate": 5.578769646562259e-06, "loss": 0.2099, "step": 34784 }, { "epoch": 0.6458044801223809, "grad_norm": 0.5056636929512024, "learning_rate": 5.57772338769488e-06, "loss": 0.2569, "step": 34786 }, { "epoch": 0.6458416102597996, "grad_norm": 0.35792338848114014, "learning_rate": 5.576677189001438e-06, "loss": 0.4732, "step": 34788 }, { "epoch": 0.6458787403972183, "grad_norm": 0.3689761459827423, "learning_rate": 5.575631050496173e-06, "loss": 0.3575, "step": 34790 }, { "epoch": 0.6459158705346368, "grad_norm": 0.34515824913978577, "learning_rate": 5.574584972193315e-06, "loss": 0.2438, "step": 34792 }, { "epoch": 0.6459530006720555, "grad_norm": 0.3479342460632324, "learning_rate": 5.573538954107103e-06, "loss": 0.1738, "step": 34794 }, { "epoch": 0.6459901308094741, "grad_norm": 0.33143702149391174, "learning_rate": 5.572492996251763e-06, "loss": 0.3422, "step": 34796 }, { "epoch": 0.6460272609468928, "grad_norm": 0.35069042444229126, "learning_rate": 5.571447098641532e-06, "loss": 0.2284, "step": 34798 }, { "epoch": 0.6460643910843114, "grad_norm": 0.37591543793678284, "learning_rate": 5.57040126129064e-06, "loss": 0.204, "step": 34800 }, { "epoch": 0.64610152122173, "grad_norm": 0.33767032623291016, "learning_rate": 5.569355484213319e-06, "loss": 0.2309, "step": 34802 }, { "epoch": 0.6461386513591487, "grad_norm": 0.28123676776885986, "learning_rate": 5.568309767423799e-06, "loss": 0.2502, "step": 34804 }, { "epoch": 0.6461757814965673, "grad_norm": 0.3622833490371704, "learning_rate": 5.567264110936308e-06, "loss": 0.2284, "step": 34806 }, { "epoch": 0.646212911633986, "grad_norm": 0.45907846093177795, "learning_rate": 5.5662185147650785e-06, "loss": 0.3109, "step": 34808 }, { "epoch": 0.6462500417714045, "grad_norm": 0.41510286927223206, "learning_rate": 5.565172978924331e-06, "loss": 0.3457, "step": 34810 }, { "epoch": 0.6462871719088232, "grad_norm": 0.32006219029426575, "learning_rate": 5.564127503428299e-06, "loss": 0.4219, "step": 34812 }, { "epoch": 0.6463243020462419, "grad_norm": 0.3592986762523651, "learning_rate": 5.563082088291201e-06, "loss": 0.2188, "step": 34814 }, { "epoch": 0.6463614321836605, "grad_norm": 0.5203841924667358, "learning_rate": 5.562036733527265e-06, "loss": 0.2687, "step": 34816 }, { "epoch": 0.6463985623210792, "grad_norm": 0.6168431639671326, "learning_rate": 5.560991439150718e-06, "loss": 0.2128, "step": 34818 }, { "epoch": 0.6464356924584977, "grad_norm": 0.3143469989299774, "learning_rate": 5.559946205175784e-06, "loss": 0.3074, "step": 34820 }, { "epoch": 0.6464728225959164, "grad_norm": 0.8062521815299988, "learning_rate": 5.558901031616677e-06, "loss": 0.3613, "step": 34822 }, { "epoch": 0.6465099527333351, "grad_norm": 0.6121131777763367, "learning_rate": 5.557855918487626e-06, "loss": 0.275, "step": 34824 }, { "epoch": 0.6465470828707537, "grad_norm": 0.40222230553627014, "learning_rate": 5.55681086580285e-06, "loss": 0.1617, "step": 34826 }, { "epoch": 0.6465842130081724, "grad_norm": 0.5299124121665955, "learning_rate": 5.555765873576568e-06, "loss": 0.4369, "step": 34828 }, { "epoch": 0.6466213431455909, "grad_norm": 0.48029881715774536, "learning_rate": 5.554720941823006e-06, "loss": 0.3489, "step": 34830 }, { "epoch": 0.6466584732830096, "grad_norm": 0.3887456953525543, "learning_rate": 5.553676070556373e-06, "loss": 0.215, "step": 34832 }, { "epoch": 0.6466956034204283, "grad_norm": 0.43169882893562317, "learning_rate": 5.5526312597908924e-06, "loss": 0.2094, "step": 34834 }, { "epoch": 0.6467327335578469, "grad_norm": 0.5147672295570374, "learning_rate": 5.551586509540776e-06, "loss": 0.5572, "step": 34836 }, { "epoch": 0.6467698636952656, "grad_norm": 0.4072299003601074, "learning_rate": 5.550541819820243e-06, "loss": 0.1439, "step": 34838 }, { "epoch": 0.6468069938326841, "grad_norm": 0.2971310019493103, "learning_rate": 5.549497190643509e-06, "loss": 0.3772, "step": 34840 }, { "epoch": 0.6468441239701028, "grad_norm": 0.1707666665315628, "learning_rate": 5.548452622024787e-06, "loss": 0.1632, "step": 34842 }, { "epoch": 0.6468812541075214, "grad_norm": 0.3800630271434784, "learning_rate": 5.547408113978294e-06, "loss": 0.1908, "step": 34844 }, { "epoch": 0.6469183842449401, "grad_norm": 0.2611372768878937, "learning_rate": 5.546363666518235e-06, "loss": 0.2418, "step": 34846 }, { "epoch": 0.6469555143823588, "grad_norm": 0.31096985936164856, "learning_rate": 5.5453192796588276e-06, "loss": 0.339, "step": 34848 }, { "epoch": 0.6469926445197773, "grad_norm": 0.43462124466896057, "learning_rate": 5.544274953414282e-06, "loss": 0.453, "step": 34850 }, { "epoch": 0.647029774657196, "grad_norm": 0.7805233597755432, "learning_rate": 5.54323068779881e-06, "loss": 0.3628, "step": 34852 }, { "epoch": 0.6470669047946146, "grad_norm": 0.40374529361724854, "learning_rate": 5.542186482826616e-06, "loss": 0.1895, "step": 34854 }, { "epoch": 0.6471040349320333, "grad_norm": 0.4321712255477905, "learning_rate": 5.541142338511911e-06, "loss": 0.2566, "step": 34856 }, { "epoch": 0.647141165069452, "grad_norm": 0.48027125000953674, "learning_rate": 5.540098254868906e-06, "loss": 0.2572, "step": 34858 }, { "epoch": 0.6471782952068705, "grad_norm": 0.24633851647377014, "learning_rate": 5.539054231911803e-06, "loss": 0.1767, "step": 34860 }, { "epoch": 0.6472154253442892, "grad_norm": 0.3651930093765259, "learning_rate": 5.538010269654807e-06, "loss": 0.2137, "step": 34862 }, { "epoch": 0.6472525554817078, "grad_norm": 0.26462599635124207, "learning_rate": 5.536966368112129e-06, "loss": 0.0579, "step": 34864 }, { "epoch": 0.6472896856191265, "grad_norm": 0.2882705628871918, "learning_rate": 5.5359225272979674e-06, "loss": 0.2956, "step": 34866 }, { "epoch": 0.6473268157565452, "grad_norm": 0.4609827399253845, "learning_rate": 5.534878747226531e-06, "loss": 0.1048, "step": 34868 }, { "epoch": 0.6473639458939637, "grad_norm": 0.3137326240539551, "learning_rate": 5.5338350279120245e-06, "loss": 0.3685, "step": 34870 }, { "epoch": 0.6474010760313824, "grad_norm": 0.2897990345954895, "learning_rate": 5.53279136936864e-06, "loss": 0.2807, "step": 34872 }, { "epoch": 0.647438206168801, "grad_norm": 0.39075005054473877, "learning_rate": 5.531747771610585e-06, "loss": 0.2715, "step": 34874 }, { "epoch": 0.6474753363062197, "grad_norm": 0.32559868693351746, "learning_rate": 5.530704234652062e-06, "loss": 0.215, "step": 34876 }, { "epoch": 0.6475124664436384, "grad_norm": 0.5592748522758484, "learning_rate": 5.529660758507265e-06, "loss": 0.2455, "step": 34878 }, { "epoch": 0.6475495965810569, "grad_norm": 0.37045055627822876, "learning_rate": 5.528617343190393e-06, "loss": 0.3569, "step": 34880 }, { "epoch": 0.6475867267184756, "grad_norm": 0.4509209394454956, "learning_rate": 5.527573988715651e-06, "loss": 0.137, "step": 34882 }, { "epoch": 0.6476238568558942, "grad_norm": 0.3632057309150696, "learning_rate": 5.526530695097225e-06, "loss": 0.1768, "step": 34884 }, { "epoch": 0.6476609869933129, "grad_norm": 0.44686976075172424, "learning_rate": 5.525487462349318e-06, "loss": 0.3729, "step": 34886 }, { "epoch": 0.6476981171307316, "grad_norm": 0.37398457527160645, "learning_rate": 5.524444290486124e-06, "loss": 0.2536, "step": 34888 }, { "epoch": 0.6477352472681501, "grad_norm": 0.314525306224823, "learning_rate": 5.523401179521837e-06, "loss": 0.2924, "step": 34890 }, { "epoch": 0.6477723774055688, "grad_norm": 0.24351750314235687, "learning_rate": 5.52235812947065e-06, "loss": 0.2495, "step": 34892 }, { "epoch": 0.6478095075429874, "grad_norm": 0.3813991844654083, "learning_rate": 5.521315140346761e-06, "loss": 0.2684, "step": 34894 }, { "epoch": 0.6478466376804061, "grad_norm": 0.252829909324646, "learning_rate": 5.520272212164355e-06, "loss": 0.2235, "step": 34896 }, { "epoch": 0.6478837678178246, "grad_norm": 0.45496848225593567, "learning_rate": 5.519229344937629e-06, "loss": 0.297, "step": 34898 }, { "epoch": 0.6479208979552433, "grad_norm": 0.32488980889320374, "learning_rate": 5.518186538680766e-06, "loss": 0.2563, "step": 34900 }, { "epoch": 0.647958028092662, "grad_norm": 0.4425840973854065, "learning_rate": 5.51714379340796e-06, "loss": 0.4079, "step": 34902 }, { "epoch": 0.6479951582300806, "grad_norm": 0.32276326417922974, "learning_rate": 5.5161011091334e-06, "loss": 0.3124, "step": 34904 }, { "epoch": 0.6480322883674993, "grad_norm": 0.24568946659564972, "learning_rate": 5.515058485871272e-06, "loss": 0.3151, "step": 34906 }, { "epoch": 0.6480694185049178, "grad_norm": 0.4530656337738037, "learning_rate": 5.514015923635768e-06, "loss": 0.4042, "step": 34908 }, { "epoch": 0.6481065486423365, "grad_norm": 0.37952136993408203, "learning_rate": 5.512973422441067e-06, "loss": 0.4066, "step": 34910 }, { "epoch": 0.6481436787797552, "grad_norm": 0.21045108139514923, "learning_rate": 5.511930982301357e-06, "loss": 0.3288, "step": 34912 }, { "epoch": 0.6481808089171738, "grad_norm": 0.24382953345775604, "learning_rate": 5.510888603230823e-06, "loss": 0.2468, "step": 34914 }, { "epoch": 0.6482179390545925, "grad_norm": 0.38174641132354736, "learning_rate": 5.509846285243653e-06, "loss": 0.3297, "step": 34916 }, { "epoch": 0.648255069192011, "grad_norm": 0.195305734872818, "learning_rate": 5.508804028354021e-06, "loss": 0.1979, "step": 34918 }, { "epoch": 0.6482921993294297, "grad_norm": 0.4923331141471863, "learning_rate": 5.5077618325761176e-06, "loss": 0.2503, "step": 34920 }, { "epoch": 0.6483293294668484, "grad_norm": 0.3371700942516327, "learning_rate": 5.506719697924116e-06, "loss": 0.1046, "step": 34922 }, { "epoch": 0.648366459604267, "grad_norm": 0.4205317795276642, "learning_rate": 5.5056776244122e-06, "loss": 0.1732, "step": 34924 }, { "epoch": 0.6484035897416857, "grad_norm": 0.4075887203216553, "learning_rate": 5.5046356120545496e-06, "loss": 0.0798, "step": 34926 }, { "epoch": 0.6484407198791042, "grad_norm": 0.9094563126564026, "learning_rate": 5.503593660865344e-06, "loss": 0.144, "step": 34928 }, { "epoch": 0.6484778500165229, "grad_norm": 0.3482798933982849, "learning_rate": 5.502551770858759e-06, "loss": 0.2812, "step": 34930 }, { "epoch": 0.6485149801539416, "grad_norm": 0.4022442400455475, "learning_rate": 5.501509942048975e-06, "loss": 0.1589, "step": 34932 }, { "epoch": 0.6485521102913602, "grad_norm": 0.31005892157554626, "learning_rate": 5.5004681744501684e-06, "loss": 0.3069, "step": 34934 }, { "epoch": 0.6485892404287789, "grad_norm": 0.42672133445739746, "learning_rate": 5.49942646807651e-06, "loss": 0.2984, "step": 34936 }, { "epoch": 0.6486263705661974, "grad_norm": 0.3502918779850006, "learning_rate": 5.498384822942178e-06, "loss": 0.4202, "step": 34938 }, { "epoch": 0.6486635007036161, "grad_norm": 0.486399382352829, "learning_rate": 5.4973432390613435e-06, "loss": 0.3509, "step": 34940 }, { "epoch": 0.6487006308410348, "grad_norm": 0.318162739276886, "learning_rate": 5.496301716448179e-06, "loss": 0.225, "step": 34942 }, { "epoch": 0.6487377609784534, "grad_norm": 0.22610719501972198, "learning_rate": 5.4952602551168584e-06, "loss": 0.2994, "step": 34944 }, { "epoch": 0.648774891115872, "grad_norm": 0.21827496588230133, "learning_rate": 5.494218855081557e-06, "loss": 0.402, "step": 34946 }, { "epoch": 0.6488120212532906, "grad_norm": 0.29414430260658264, "learning_rate": 5.493177516356435e-06, "loss": 0.2127, "step": 34948 }, { "epoch": 0.6488491513907093, "grad_norm": 0.269583523273468, "learning_rate": 5.492136238955667e-06, "loss": 0.3591, "step": 34950 }, { "epoch": 0.6488862815281279, "grad_norm": 0.5178949236869812, "learning_rate": 5.491095022893425e-06, "loss": 0.3144, "step": 34952 }, { "epoch": 0.6489234116655466, "grad_norm": 0.4178057312965393, "learning_rate": 5.490053868183872e-06, "loss": 0.4341, "step": 34954 }, { "epoch": 0.6489605418029653, "grad_norm": 0.4074004888534546, "learning_rate": 5.4890127748411785e-06, "loss": 0.2682, "step": 34956 }, { "epoch": 0.6489976719403838, "grad_norm": 0.2792261242866516, "learning_rate": 5.4879717428795124e-06, "loss": 0.1902, "step": 34958 }, { "epoch": 0.6490348020778025, "grad_norm": 0.5006771087646484, "learning_rate": 5.486930772313035e-06, "loss": 0.3362, "step": 34960 }, { "epoch": 0.6490719322152211, "grad_norm": 0.2366732954978943, "learning_rate": 5.485889863155909e-06, "loss": 0.196, "step": 34962 }, { "epoch": 0.6491090623526398, "grad_norm": 0.3755991756916046, "learning_rate": 5.4848490154222985e-06, "loss": 0.2582, "step": 34964 }, { "epoch": 0.6491461924900584, "grad_norm": 0.3734539747238159, "learning_rate": 5.4838082291263705e-06, "loss": 0.2461, "step": 34966 }, { "epoch": 0.649183322627477, "grad_norm": 0.609535276889801, "learning_rate": 5.482767504282285e-06, "loss": 0.246, "step": 34968 }, { "epoch": 0.6492204527648957, "grad_norm": 0.464240163564682, "learning_rate": 5.481726840904205e-06, "loss": 0.2901, "step": 34970 }, { "epoch": 0.6492575829023143, "grad_norm": 0.42593303322792053, "learning_rate": 5.480686239006286e-06, "loss": 0.3872, "step": 34972 }, { "epoch": 0.649294713039733, "grad_norm": 0.30449581146240234, "learning_rate": 5.4796456986026915e-06, "loss": 0.3897, "step": 34974 }, { "epoch": 0.6493318431771516, "grad_norm": 0.2090335190296173, "learning_rate": 5.478605219707578e-06, "loss": 0.3146, "step": 34976 }, { "epoch": 0.6493689733145702, "grad_norm": 0.2927362024784088, "learning_rate": 5.477564802335105e-06, "loss": 0.1149, "step": 34978 }, { "epoch": 0.6494061034519889, "grad_norm": 0.2984867990016937, "learning_rate": 5.476524446499432e-06, "loss": 0.312, "step": 34980 }, { "epoch": 0.6494432335894075, "grad_norm": 0.3318144679069519, "learning_rate": 5.475484152214707e-06, "loss": 0.4555, "step": 34982 }, { "epoch": 0.6494803637268262, "grad_norm": 0.41366684436798096, "learning_rate": 5.474443919495096e-06, "loss": 0.2546, "step": 34984 }, { "epoch": 0.6495174938642448, "grad_norm": 0.2889772653579712, "learning_rate": 5.473403748354742e-06, "loss": 0.3224, "step": 34986 }, { "epoch": 0.6495546240016634, "grad_norm": 0.4579371511936188, "learning_rate": 5.472363638807806e-06, "loss": 0.1557, "step": 34988 }, { "epoch": 0.6495917541390821, "grad_norm": 0.5011560320854187, "learning_rate": 5.471323590868438e-06, "loss": 0.0507, "step": 34990 }, { "epoch": 0.6496288842765007, "grad_norm": 0.2969849407672882, "learning_rate": 5.470283604550791e-06, "loss": 0.245, "step": 34992 }, { "epoch": 0.6496660144139194, "grad_norm": 0.4154781699180603, "learning_rate": 5.469243679869017e-06, "loss": 0.2046, "step": 34994 }, { "epoch": 0.6497031445513379, "grad_norm": 0.5398894548416138, "learning_rate": 5.468203816837267e-06, "loss": 0.5276, "step": 34996 }, { "epoch": 0.6497402746887566, "grad_norm": 0.30955785512924194, "learning_rate": 5.467164015469688e-06, "loss": 0.3282, "step": 34998 }, { "epoch": 0.6497774048261753, "grad_norm": 0.25512146949768066, "learning_rate": 5.466124275780427e-06, "loss": 0.1096, "step": 35000 }, { "epoch": 0.6498145349635939, "grad_norm": 0.3012755811214447, "learning_rate": 5.465084597783639e-06, "loss": 0.4655, "step": 35002 }, { "epoch": 0.6498516651010126, "grad_norm": 0.48194220662117004, "learning_rate": 5.464044981493461e-06, "loss": 0.2883, "step": 35004 }, { "epoch": 0.6498887952384311, "grad_norm": 0.412505567073822, "learning_rate": 5.463005426924046e-06, "loss": 0.1957, "step": 35006 }, { "epoch": 0.6499259253758498, "grad_norm": 0.482642263174057, "learning_rate": 5.461965934089539e-06, "loss": 0.1745, "step": 35008 }, { "epoch": 0.6499630555132685, "grad_norm": 0.5232049226760864, "learning_rate": 5.46092650300408e-06, "loss": 0.2497, "step": 35010 }, { "epoch": 0.6500001856506871, "grad_norm": 0.5267934799194336, "learning_rate": 5.459887133681816e-06, "loss": 0.231, "step": 35012 }, { "epoch": 0.6500373157881058, "grad_norm": 0.4003421366214752, "learning_rate": 5.458847826136887e-06, "loss": 0.2798, "step": 35014 }, { "epoch": 0.6500744459255243, "grad_norm": 0.33312806487083435, "learning_rate": 5.457808580383438e-06, "loss": 0.3884, "step": 35016 }, { "epoch": 0.650111576062943, "grad_norm": 0.3849543631076813, "learning_rate": 5.456769396435608e-06, "loss": 0.3571, "step": 35018 }, { "epoch": 0.6501487062003617, "grad_norm": 0.2600293755531311, "learning_rate": 5.455730274307538e-06, "loss": 0.1945, "step": 35020 }, { "epoch": 0.6501858363377803, "grad_norm": 0.525417149066925, "learning_rate": 5.454691214013372e-06, "loss": 0.3436, "step": 35022 }, { "epoch": 0.650222966475199, "grad_norm": 0.25680115818977356, "learning_rate": 5.4536522155672435e-06, "loss": 0.2754, "step": 35024 }, { "epoch": 0.6502600966126175, "grad_norm": 0.37854015827178955, "learning_rate": 5.452613278983286e-06, "loss": 0.2252, "step": 35026 }, { "epoch": 0.6502972267500362, "grad_norm": 0.35359838604927063, "learning_rate": 5.451574404275641e-06, "loss": 0.3363, "step": 35028 }, { "epoch": 0.6503343568874549, "grad_norm": 0.1946440190076828, "learning_rate": 5.450535591458445e-06, "loss": 0.2218, "step": 35030 }, { "epoch": 0.6503714870248735, "grad_norm": 0.36694175004959106, "learning_rate": 5.449496840545832e-06, "loss": 0.5371, "step": 35032 }, { "epoch": 0.6504086171622921, "grad_norm": 0.3081398606300354, "learning_rate": 5.448458151551941e-06, "loss": 0.3088, "step": 35034 }, { "epoch": 0.6504457472997107, "grad_norm": 0.45935556292533875, "learning_rate": 5.447419524490895e-06, "loss": 0.3653, "step": 35036 }, { "epoch": 0.6504828774371294, "grad_norm": 0.3218877911567688, "learning_rate": 5.446380959376836e-06, "loss": 0.2441, "step": 35038 }, { "epoch": 0.6505200075745481, "grad_norm": 0.440879762172699, "learning_rate": 5.4453424562238895e-06, "loss": 0.1952, "step": 35040 }, { "epoch": 0.6505571377119667, "grad_norm": 0.3222860097885132, "learning_rate": 5.444304015046192e-06, "loss": 0.1587, "step": 35042 }, { "epoch": 0.6505942678493853, "grad_norm": 0.30197104811668396, "learning_rate": 5.4432656358578724e-06, "loss": 0.2205, "step": 35044 }, { "epoch": 0.6506313979868039, "grad_norm": 0.32392019033432007, "learning_rate": 5.442227318673059e-06, "loss": 0.2343, "step": 35046 }, { "epoch": 0.6506685281242226, "grad_norm": 0.6413139700889587, "learning_rate": 5.441189063505877e-06, "loss": 0.301, "step": 35048 }, { "epoch": 0.6507056582616412, "grad_norm": 0.23965413868427277, "learning_rate": 5.4401508703704565e-06, "loss": 0.362, "step": 35050 }, { "epoch": 0.6507427883990599, "grad_norm": 0.3463403880596161, "learning_rate": 5.439112739280923e-06, "loss": 0.2613, "step": 35052 }, { "epoch": 0.6507799185364785, "grad_norm": 0.40303468704223633, "learning_rate": 5.438074670251406e-06, "loss": 0.3733, "step": 35054 }, { "epoch": 0.6508170486738971, "grad_norm": 0.36131739616394043, "learning_rate": 5.437036663296026e-06, "loss": 0.1621, "step": 35056 }, { "epoch": 0.6508541788113158, "grad_norm": 0.35674044489860535, "learning_rate": 5.4359987184289095e-06, "loss": 0.253, "step": 35058 }, { "epoch": 0.6508913089487344, "grad_norm": 0.6778879165649414, "learning_rate": 5.434960835664185e-06, "loss": 0.2688, "step": 35060 }, { "epoch": 0.650928439086153, "grad_norm": 0.42753756046295166, "learning_rate": 5.433923015015965e-06, "loss": 0.2942, "step": 35062 }, { "epoch": 0.6509655692235717, "grad_norm": 0.2883737087249756, "learning_rate": 5.432885256498375e-06, "loss": 0.2028, "step": 35064 }, { "epoch": 0.6510026993609903, "grad_norm": 1.1320891380310059, "learning_rate": 5.43184756012554e-06, "loss": 0.3115, "step": 35066 }, { "epoch": 0.651039829498409, "grad_norm": 0.3906393051147461, "learning_rate": 5.430809925911575e-06, "loss": 0.5193, "step": 35068 }, { "epoch": 0.6510769596358276, "grad_norm": 0.4349411725997925, "learning_rate": 5.429772353870599e-06, "loss": 0.1746, "step": 35070 }, { "epoch": 0.6511140897732463, "grad_norm": 1.0571234226226807, "learning_rate": 5.4287348440167365e-06, "loss": 0.2444, "step": 35072 }, { "epoch": 0.6511512199106649, "grad_norm": 0.6107184886932373, "learning_rate": 5.4276973963640954e-06, "loss": 0.3767, "step": 35074 }, { "epoch": 0.6511883500480835, "grad_norm": 0.2539249062538147, "learning_rate": 5.4266600109267985e-06, "loss": 0.1762, "step": 35076 }, { "epoch": 0.6512254801855022, "grad_norm": 1.2727235555648804, "learning_rate": 5.425622687718959e-06, "loss": 0.3137, "step": 35078 }, { "epoch": 0.6512626103229208, "grad_norm": 0.3641892373561859, "learning_rate": 5.424585426754694e-06, "loss": 0.3174, "step": 35080 }, { "epoch": 0.6512997404603394, "grad_norm": 0.2908001244068146, "learning_rate": 5.423548228048115e-06, "loss": 0.3235, "step": 35082 }, { "epoch": 0.6513368705977581, "grad_norm": 0.3206023573875427, "learning_rate": 5.422511091613341e-06, "loss": 0.1874, "step": 35084 }, { "epoch": 0.6513740007351767, "grad_norm": 0.4240186810493469, "learning_rate": 5.421474017464476e-06, "loss": 0.2083, "step": 35086 }, { "epoch": 0.6514111308725954, "grad_norm": 0.3637961447238922, "learning_rate": 5.42043700561564e-06, "loss": 0.3179, "step": 35088 }, { "epoch": 0.651448261010014, "grad_norm": 0.49588721990585327, "learning_rate": 5.419400056080933e-06, "loss": 0.3298, "step": 35090 }, { "epoch": 0.6514853911474326, "grad_norm": 0.40184250473976135, "learning_rate": 5.418363168874472e-06, "loss": 0.1944, "step": 35092 }, { "epoch": 0.6515225212848513, "grad_norm": 0.8556508421897888, "learning_rate": 5.417326344010365e-06, "loss": 0.4215, "step": 35094 }, { "epoch": 0.6515596514222699, "grad_norm": 0.3922984302043915, "learning_rate": 5.4162895815027195e-06, "loss": 0.2975, "step": 35096 }, { "epoch": 0.6515967815596886, "grad_norm": 0.4298214316368103, "learning_rate": 5.4152528813656465e-06, "loss": 0.4013, "step": 35098 }, { "epoch": 0.6516339116971072, "grad_norm": 0.49251043796539307, "learning_rate": 5.414216243613246e-06, "loss": 0.3218, "step": 35100 }, { "epoch": 0.6516710418345258, "grad_norm": 0.31654471158981323, "learning_rate": 5.4131796682596245e-06, "loss": 0.2833, "step": 35102 }, { "epoch": 0.6517081719719444, "grad_norm": 0.24148577451705933, "learning_rate": 5.412143155318891e-06, "loss": 0.2764, "step": 35104 }, { "epoch": 0.6517453021093631, "grad_norm": 0.19002054631710052, "learning_rate": 5.411106704805149e-06, "loss": 0.208, "step": 35106 }, { "epoch": 0.6517824322467818, "grad_norm": 0.42621880769729614, "learning_rate": 5.410070316732497e-06, "loss": 0.2284, "step": 35108 }, { "epoch": 0.6518195623842004, "grad_norm": 0.9535357356071472, "learning_rate": 5.409033991115043e-06, "loss": 0.272, "step": 35110 }, { "epoch": 0.651856692521619, "grad_norm": 0.3512863218784332, "learning_rate": 5.40799772796688e-06, "loss": 0.2321, "step": 35112 }, { "epoch": 0.6518938226590376, "grad_norm": 0.2934827506542206, "learning_rate": 5.406961527302114e-06, "loss": 0.1707, "step": 35114 }, { "epoch": 0.6519309527964563, "grad_norm": 0.5100598335266113, "learning_rate": 5.405925389134843e-06, "loss": 0.3555, "step": 35116 }, { "epoch": 0.651968082933875, "grad_norm": 0.2400893121957779, "learning_rate": 5.404889313479168e-06, "loss": 0.2267, "step": 35118 }, { "epoch": 0.6520052130712936, "grad_norm": 0.3128950595855713, "learning_rate": 5.403853300349185e-06, "loss": 0.2256, "step": 35120 }, { "epoch": 0.6520423432087122, "grad_norm": 0.35240858793258667, "learning_rate": 5.402817349758995e-06, "loss": 0.5031, "step": 35122 }, { "epoch": 0.6520794733461308, "grad_norm": 0.6541539430618286, "learning_rate": 5.401781461722687e-06, "loss": 0.3501, "step": 35124 }, { "epoch": 0.6521166034835495, "grad_norm": 0.2375851422548294, "learning_rate": 5.400745636254361e-06, "loss": 0.2076, "step": 35126 }, { "epoch": 0.6521537336209682, "grad_norm": 0.5388584733009338, "learning_rate": 5.3997098733681085e-06, "loss": 0.3356, "step": 35128 }, { "epoch": 0.6521908637583868, "grad_norm": 0.3617672920227051, "learning_rate": 5.398674173078031e-06, "loss": 0.1341, "step": 35130 }, { "epoch": 0.6522279938958054, "grad_norm": 0.30219653248786926, "learning_rate": 5.397638535398211e-06, "loss": 0.1757, "step": 35132 }, { "epoch": 0.652265124033224, "grad_norm": 0.40171170234680176, "learning_rate": 5.396602960342748e-06, "loss": 0.2869, "step": 35134 }, { "epoch": 0.6523022541706427, "grad_norm": 0.3007016181945801, "learning_rate": 5.395567447925727e-06, "loss": 0.3751, "step": 35136 }, { "epoch": 0.6523393843080614, "grad_norm": 0.5373406410217285, "learning_rate": 5.394531998161241e-06, "loss": 0.3456, "step": 35138 }, { "epoch": 0.65237651444548, "grad_norm": 0.29361391067504883, "learning_rate": 5.393496611063379e-06, "loss": 0.3678, "step": 35140 }, { "epoch": 0.6524136445828986, "grad_norm": 0.8628339767456055, "learning_rate": 5.39246128664623e-06, "loss": 0.2087, "step": 35142 }, { "epoch": 0.6524507747203172, "grad_norm": 0.3301093578338623, "learning_rate": 5.391426024923883e-06, "loss": 0.3099, "step": 35144 }, { "epoch": 0.6524879048577359, "grad_norm": 0.41500940918922424, "learning_rate": 5.390390825910422e-06, "loss": 0.2702, "step": 35146 }, { "epoch": 0.6525250349951545, "grad_norm": 0.4091249704360962, "learning_rate": 5.389355689619939e-06, "loss": 0.4012, "step": 35148 }, { "epoch": 0.6525621651325731, "grad_norm": 0.42479413747787476, "learning_rate": 5.38832061606651e-06, "loss": 0.2417, "step": 35150 }, { "epoch": 0.6525992952699918, "grad_norm": 0.19798994064331055, "learning_rate": 5.38728560526423e-06, "loss": 0.1942, "step": 35152 }, { "epoch": 0.6526364254074104, "grad_norm": 0.3743791878223419, "learning_rate": 5.38625065722717e-06, "loss": 0.2146, "step": 35154 }, { "epoch": 0.6526735555448291, "grad_norm": 0.34228742122650146, "learning_rate": 5.3852157719694185e-06, "loss": 0.3625, "step": 35156 }, { "epoch": 0.6527106856822477, "grad_norm": 0.305728942155838, "learning_rate": 5.384180949505059e-06, "loss": 0.4026, "step": 35158 }, { "epoch": 0.6527478158196663, "grad_norm": 0.28715023398399353, "learning_rate": 5.383146189848174e-06, "loss": 0.2366, "step": 35160 }, { "epoch": 0.652784945957085, "grad_norm": 0.483659952878952, "learning_rate": 5.382111493012836e-06, "loss": 0.2486, "step": 35162 }, { "epoch": 0.6528220760945036, "grad_norm": 0.2783755362033844, "learning_rate": 5.38107685901313e-06, "loss": 0.295, "step": 35164 }, { "epoch": 0.6528592062319223, "grad_norm": 0.3489808440208435, "learning_rate": 5.380042287863132e-06, "loss": 0.1985, "step": 35166 }, { "epoch": 0.6528963363693409, "grad_norm": 0.34966909885406494, "learning_rate": 5.379007779576919e-06, "loss": 0.3544, "step": 35168 }, { "epoch": 0.6529334665067595, "grad_norm": 0.4507407546043396, "learning_rate": 5.377973334168574e-06, "loss": 0.3785, "step": 35170 }, { "epoch": 0.6529705966441782, "grad_norm": 0.2911249101161957, "learning_rate": 5.376938951652162e-06, "loss": 0.0946, "step": 35172 }, { "epoch": 0.6530077267815968, "grad_norm": 0.23742179572582245, "learning_rate": 5.375904632041768e-06, "loss": 0.2405, "step": 35174 }, { "epoch": 0.6530448569190155, "grad_norm": 0.2796684801578522, "learning_rate": 5.3748703753514575e-06, "loss": 0.2928, "step": 35176 }, { "epoch": 0.653081987056434, "grad_norm": 0.33510711789131165, "learning_rate": 5.373836181595307e-06, "loss": 0.249, "step": 35178 }, { "epoch": 0.6531191171938527, "grad_norm": 0.3652302622795105, "learning_rate": 5.372802050787388e-06, "loss": 0.333, "step": 35180 }, { "epoch": 0.6531562473312714, "grad_norm": 0.6881268620491028, "learning_rate": 5.371767982941774e-06, "loss": 0.1914, "step": 35182 }, { "epoch": 0.65319337746869, "grad_norm": 0.4327228367328644, "learning_rate": 5.370733978072535e-06, "loss": 0.3138, "step": 35184 }, { "epoch": 0.6532305076061087, "grad_norm": 0.5186774134635925, "learning_rate": 5.369700036193744e-06, "loss": 0.2238, "step": 35186 }, { "epoch": 0.6532676377435273, "grad_norm": 0.36401671171188354, "learning_rate": 5.368666157319462e-06, "loss": 0.4068, "step": 35188 }, { "epoch": 0.6533047678809459, "grad_norm": 0.5229595899581909, "learning_rate": 5.367632341463761e-06, "loss": 0.2368, "step": 35190 }, { "epoch": 0.6533418980183646, "grad_norm": 0.361337274312973, "learning_rate": 5.366598588640713e-06, "loss": 0.2177, "step": 35192 }, { "epoch": 0.6533790281557832, "grad_norm": 0.30592697858810425, "learning_rate": 5.3655648988643745e-06, "loss": 0.2882, "step": 35194 }, { "epoch": 0.6534161582932019, "grad_norm": 0.45658719539642334, "learning_rate": 5.364531272148816e-06, "loss": 0.2335, "step": 35196 }, { "epoch": 0.6534532884306204, "grad_norm": 0.567110538482666, "learning_rate": 5.363497708508107e-06, "loss": 0.2479, "step": 35198 }, { "epoch": 0.6534904185680391, "grad_norm": 0.4558965265750885, "learning_rate": 5.362464207956301e-06, "loss": 0.4069, "step": 35200 }, { "epoch": 0.6535275487054577, "grad_norm": 0.44819381833076477, "learning_rate": 5.361430770507467e-06, "loss": 0.2461, "step": 35202 }, { "epoch": 0.6535646788428764, "grad_norm": 0.5538884401321411, "learning_rate": 5.360397396175667e-06, "loss": 0.397, "step": 35204 }, { "epoch": 0.6536018089802951, "grad_norm": 0.3635983467102051, "learning_rate": 5.359364084974961e-06, "loss": 0.2973, "step": 35206 }, { "epoch": 0.6536389391177136, "grad_norm": 0.4137502908706665, "learning_rate": 5.358330836919407e-06, "loss": 0.42, "step": 35208 }, { "epoch": 0.6536760692551323, "grad_norm": 0.2761116027832031, "learning_rate": 5.357297652023074e-06, "loss": 0.1263, "step": 35210 }, { "epoch": 0.6537131993925509, "grad_norm": 0.6658402681350708, "learning_rate": 5.356264530300009e-06, "loss": 0.1947, "step": 35212 }, { "epoch": 0.6537503295299696, "grad_norm": 0.3985794186592102, "learning_rate": 5.355231471764275e-06, "loss": 0.3537, "step": 35214 }, { "epoch": 0.6537874596673883, "grad_norm": 0.27038154006004333, "learning_rate": 5.354198476429927e-06, "loss": 0.2353, "step": 35216 }, { "epoch": 0.6538245898048068, "grad_norm": 0.24203923344612122, "learning_rate": 5.3531655443110205e-06, "loss": 0.1921, "step": 35218 }, { "epoch": 0.6538617199422255, "grad_norm": 0.22974804043769836, "learning_rate": 5.352132675421613e-06, "loss": 0.1954, "step": 35220 }, { "epoch": 0.6538988500796441, "grad_norm": 0.36859554052352905, "learning_rate": 5.351099869775757e-06, "loss": 0.2946, "step": 35222 }, { "epoch": 0.6539359802170628, "grad_norm": 0.4116674065589905, "learning_rate": 5.350067127387513e-06, "loss": 0.2603, "step": 35224 }, { "epoch": 0.6539731103544815, "grad_norm": 0.36322322487831116, "learning_rate": 5.34903444827092e-06, "loss": 0.1331, "step": 35226 }, { "epoch": 0.6540102404919, "grad_norm": 0.3429400622844696, "learning_rate": 5.34800183244004e-06, "loss": 0.1253, "step": 35228 }, { "epoch": 0.6540473706293187, "grad_norm": 0.4147558808326721, "learning_rate": 5.346969279908918e-06, "loss": 0.2276, "step": 35230 }, { "epoch": 0.6540845007667373, "grad_norm": 0.36440587043762207, "learning_rate": 5.345936790691608e-06, "loss": 0.4382, "step": 35232 }, { "epoch": 0.654121630904156, "grad_norm": 0.4868938624858856, "learning_rate": 5.344904364802162e-06, "loss": 0.2929, "step": 35234 }, { "epoch": 0.6541587610415747, "grad_norm": 0.3093056380748749, "learning_rate": 5.343872002254619e-06, "loss": 0.1217, "step": 35236 }, { "epoch": 0.6541958911789932, "grad_norm": 0.34889519214630127, "learning_rate": 5.342839703063035e-06, "loss": 0.1853, "step": 35238 }, { "epoch": 0.6542330213164119, "grad_norm": 0.4011906385421753, "learning_rate": 5.34180746724145e-06, "loss": 0.2749, "step": 35240 }, { "epoch": 0.6542701514538305, "grad_norm": 0.47876298427581787, "learning_rate": 5.340775294803912e-06, "loss": 0.2958, "step": 35242 }, { "epoch": 0.6543072815912492, "grad_norm": 0.5504870414733887, "learning_rate": 5.339743185764467e-06, "loss": 0.2764, "step": 35244 }, { "epoch": 0.6543444117286679, "grad_norm": 0.7109227776527405, "learning_rate": 5.338711140137157e-06, "loss": 0.3367, "step": 35246 }, { "epoch": 0.6543815418660864, "grad_norm": 0.5481871366500854, "learning_rate": 5.337679157936031e-06, "loss": 0.4409, "step": 35248 }, { "epoch": 0.6544186720035051, "grad_norm": 0.431727796792984, "learning_rate": 5.336647239175121e-06, "loss": 0.32, "step": 35250 }, { "epoch": 0.6544558021409237, "grad_norm": 0.35014235973358154, "learning_rate": 5.335615383868475e-06, "loss": 0.4015, "step": 35252 }, { "epoch": 0.6544929322783424, "grad_norm": 0.4208662211894989, "learning_rate": 5.334583592030133e-06, "loss": 0.2796, "step": 35254 }, { "epoch": 0.654530062415761, "grad_norm": 0.5008327960968018, "learning_rate": 5.333551863674135e-06, "loss": 0.3238, "step": 35256 }, { "epoch": 0.6545671925531796, "grad_norm": 0.3998279869556427, "learning_rate": 5.332520198814517e-06, "loss": 0.1599, "step": 35258 }, { "epoch": 0.6546043226905983, "grad_norm": 0.4773446321487427, "learning_rate": 5.3314885974653215e-06, "loss": 0.4229, "step": 35260 }, { "epoch": 0.6546414528280169, "grad_norm": 0.40094083547592163, "learning_rate": 5.330457059640579e-06, "loss": 0.2248, "step": 35262 }, { "epoch": 0.6546785829654356, "grad_norm": 0.4393852949142456, "learning_rate": 5.329425585354328e-06, "loss": 0.2608, "step": 35264 }, { "epoch": 0.6547157131028541, "grad_norm": 0.3545730412006378, "learning_rate": 5.328394174620606e-06, "loss": 0.3012, "step": 35266 }, { "epoch": 0.6547528432402728, "grad_norm": 0.4005003273487091, "learning_rate": 5.327362827453446e-06, "loss": 0.2422, "step": 35268 }, { "epoch": 0.6547899733776915, "grad_norm": 0.31729578971862793, "learning_rate": 5.3263315438668805e-06, "loss": 0.1843, "step": 35270 }, { "epoch": 0.6548271035151101, "grad_norm": 0.3156442642211914, "learning_rate": 5.3253003238749445e-06, "loss": 0.1221, "step": 35272 }, { "epoch": 0.6548642336525288, "grad_norm": 0.25421348214149475, "learning_rate": 5.324269167491673e-06, "loss": 0.1928, "step": 35274 }, { "epoch": 0.6549013637899473, "grad_norm": 0.4177688658237457, "learning_rate": 5.323238074731088e-06, "loss": 0.2804, "step": 35276 }, { "epoch": 0.654938493927366, "grad_norm": 0.4600485563278198, "learning_rate": 5.322207045607228e-06, "loss": 0.0798, "step": 35278 }, { "epoch": 0.6549756240647847, "grad_norm": 0.648827075958252, "learning_rate": 5.321176080134115e-06, "loss": 0.2939, "step": 35280 }, { "epoch": 0.6550127542022033, "grad_norm": 0.3129625618457794, "learning_rate": 5.320145178325781e-06, "loss": 0.2818, "step": 35282 }, { "epoch": 0.655049884339622, "grad_norm": 0.4650766849517822, "learning_rate": 5.319114340196253e-06, "loss": 0.1527, "step": 35284 }, { "epoch": 0.6550870144770405, "grad_norm": 0.528578519821167, "learning_rate": 5.318083565759562e-06, "loss": 0.3859, "step": 35286 }, { "epoch": 0.6551241446144592, "grad_norm": 0.503020167350769, "learning_rate": 5.317052855029725e-06, "loss": 0.2798, "step": 35288 }, { "epoch": 0.6551612747518779, "grad_norm": 0.43730732798576355, "learning_rate": 5.316022208020772e-06, "loss": 0.4014, "step": 35290 }, { "epoch": 0.6551984048892965, "grad_norm": 0.5357770323753357, "learning_rate": 5.314991624746728e-06, "loss": 0.367, "step": 35292 }, { "epoch": 0.6552355350267152, "grad_norm": 0.5192011594772339, "learning_rate": 5.3139611052216144e-06, "loss": 0.1268, "step": 35294 }, { "epoch": 0.6552726651641337, "grad_norm": 0.4778147339820862, "learning_rate": 5.312930649459454e-06, "loss": 0.2959, "step": 35296 }, { "epoch": 0.6553097953015524, "grad_norm": 0.38792353868484497, "learning_rate": 5.311900257474269e-06, "loss": 0.2345, "step": 35298 }, { "epoch": 0.655346925438971, "grad_norm": 0.6582024693489075, "learning_rate": 5.310869929280082e-06, "loss": 0.3735, "step": 35300 }, { "epoch": 0.6553840555763897, "grad_norm": 0.29808539152145386, "learning_rate": 5.309839664890905e-06, "loss": 0.2849, "step": 35302 }, { "epoch": 0.6554211857138084, "grad_norm": 0.37377986311912537, "learning_rate": 5.308809464320761e-06, "loss": 0.4981, "step": 35304 }, { "epoch": 0.6554583158512269, "grad_norm": 0.6447781920433044, "learning_rate": 5.3077793275836695e-06, "loss": 0.2857, "step": 35306 }, { "epoch": 0.6554954459886456, "grad_norm": 0.3082626461982727, "learning_rate": 5.306749254693646e-06, "loss": 0.201, "step": 35308 }, { "epoch": 0.6555325761260642, "grad_norm": 0.3443126678466797, "learning_rate": 5.305719245664707e-06, "loss": 0.1756, "step": 35310 }, { "epoch": 0.6555697062634829, "grad_norm": 0.419045090675354, "learning_rate": 5.304689300510873e-06, "loss": 0.1877, "step": 35312 }, { "epoch": 0.6556068364009016, "grad_norm": 0.3775356411933899, "learning_rate": 5.303659419246148e-06, "loss": 0.1651, "step": 35314 }, { "epoch": 0.6556439665383201, "grad_norm": 0.41566628217697144, "learning_rate": 5.302629601884552e-06, "loss": 0.316, "step": 35316 }, { "epoch": 0.6556810966757388, "grad_norm": 0.5356295704841614, "learning_rate": 5.301599848440097e-06, "loss": 0.2757, "step": 35318 }, { "epoch": 0.6557182268131574, "grad_norm": 0.31234022974967957, "learning_rate": 5.300570158926799e-06, "loss": 0.3421, "step": 35320 }, { "epoch": 0.6557553569505761, "grad_norm": 0.35381388664245605, "learning_rate": 5.299540533358659e-06, "loss": 0.1255, "step": 35322 }, { "epoch": 0.6557924870879948, "grad_norm": 0.35568520426750183, "learning_rate": 5.298510971749698e-06, "loss": 0.1036, "step": 35324 }, { "epoch": 0.6558296172254133, "grad_norm": 0.47837957739830017, "learning_rate": 5.297481474113917e-06, "loss": 0.2808, "step": 35326 }, { "epoch": 0.655866747362832, "grad_norm": 0.3509726822376251, "learning_rate": 5.2964520404653266e-06, "loss": 0.4463, "step": 35328 }, { "epoch": 0.6559038775002506, "grad_norm": 0.3945107161998749, "learning_rate": 5.295422670817934e-06, "loss": 0.4627, "step": 35330 }, { "epoch": 0.6559410076376693, "grad_norm": 0.39911481738090515, "learning_rate": 5.2943933651857484e-06, "loss": 0.2445, "step": 35332 }, { "epoch": 0.655978137775088, "grad_norm": 0.30275222659111023, "learning_rate": 5.2933641235827735e-06, "loss": 0.2083, "step": 35334 }, { "epoch": 0.6560152679125065, "grad_norm": 0.45349594950675964, "learning_rate": 5.29233494602302e-06, "loss": 0.3519, "step": 35336 }, { "epoch": 0.6560523980499252, "grad_norm": 0.465545117855072, "learning_rate": 5.291305832520483e-06, "loss": 0.2576, "step": 35338 }, { "epoch": 0.6560895281873438, "grad_norm": 0.4254024028778076, "learning_rate": 5.290276783089167e-06, "loss": 0.2364, "step": 35340 }, { "epoch": 0.6561266583247625, "grad_norm": 0.3732546269893646, "learning_rate": 5.289247797743083e-06, "loss": 0.2143, "step": 35342 }, { "epoch": 0.6561637884621812, "grad_norm": 0.5355303287506104, "learning_rate": 5.2882188764962214e-06, "loss": 0.403, "step": 35344 }, { "epoch": 0.6562009185995997, "grad_norm": 0.43809232115745544, "learning_rate": 5.287190019362587e-06, "loss": 0.3841, "step": 35346 }, { "epoch": 0.6562380487370184, "grad_norm": 0.2559813857078552, "learning_rate": 5.286161226356182e-06, "loss": 0.1878, "step": 35348 }, { "epoch": 0.656275178874437, "grad_norm": 0.2984473705291748, "learning_rate": 5.285132497491005e-06, "loss": 0.1147, "step": 35350 }, { "epoch": 0.6563123090118557, "grad_norm": 0.37955009937286377, "learning_rate": 5.284103832781049e-06, "loss": 0.2031, "step": 35352 }, { "epoch": 0.6563494391492742, "grad_norm": 0.5334699749946594, "learning_rate": 5.283075232240314e-06, "loss": 0.1036, "step": 35354 }, { "epoch": 0.6563865692866929, "grad_norm": 0.4127434194087982, "learning_rate": 5.282046695882794e-06, "loss": 0.1393, "step": 35356 }, { "epoch": 0.6564236994241116, "grad_norm": 0.3763151466846466, "learning_rate": 5.281018223722489e-06, "loss": 0.1747, "step": 35358 }, { "epoch": 0.6564608295615302, "grad_norm": 0.3438388407230377, "learning_rate": 5.279989815773394e-06, "loss": 0.3463, "step": 35360 }, { "epoch": 0.6564979596989489, "grad_norm": 0.465952068567276, "learning_rate": 5.278961472049498e-06, "loss": 0.1803, "step": 35362 }, { "epoch": 0.6565350898363674, "grad_norm": 0.558900773525238, "learning_rate": 5.277933192564797e-06, "loss": 0.4426, "step": 35364 }, { "epoch": 0.6565722199737861, "grad_norm": 0.26000380516052246, "learning_rate": 5.276904977333277e-06, "loss": 0.1395, "step": 35366 }, { "epoch": 0.6566093501112048, "grad_norm": 0.5756716132164001, "learning_rate": 5.275876826368933e-06, "loss": 0.22, "step": 35368 }, { "epoch": 0.6566464802486234, "grad_norm": 0.31303897500038147, "learning_rate": 5.274848739685755e-06, "loss": 0.1896, "step": 35370 }, { "epoch": 0.6566836103860421, "grad_norm": 0.17878375947475433, "learning_rate": 5.273820717297733e-06, "loss": 0.1589, "step": 35372 }, { "epoch": 0.6567207405234606, "grad_norm": 0.37714269757270813, "learning_rate": 5.272792759218857e-06, "loss": 0.1716, "step": 35374 }, { "epoch": 0.6567578706608793, "grad_norm": 0.631506621837616, "learning_rate": 5.271764865463109e-06, "loss": 0.283, "step": 35376 }, { "epoch": 0.656795000798298, "grad_norm": 0.3017447888851166, "learning_rate": 5.270737036044479e-06, "loss": 0.3847, "step": 35378 }, { "epoch": 0.6568321309357166, "grad_norm": 0.3440400958061218, "learning_rate": 5.2697092709769505e-06, "loss": 0.3009, "step": 35380 }, { "epoch": 0.6568692610731353, "grad_norm": 0.40685343742370605, "learning_rate": 5.268681570274516e-06, "loss": 0.2495, "step": 35382 }, { "epoch": 0.6569063912105538, "grad_norm": 0.34019628167152405, "learning_rate": 5.267653933951148e-06, "loss": 0.1331, "step": 35384 }, { "epoch": 0.6569435213479725, "grad_norm": 0.34905216097831726, "learning_rate": 5.266626362020835e-06, "loss": 0.2358, "step": 35386 }, { "epoch": 0.6569806514853912, "grad_norm": 0.3839201033115387, "learning_rate": 5.2655988544975636e-06, "loss": 0.2242, "step": 35388 }, { "epoch": 0.6570177816228098, "grad_norm": 0.47935885190963745, "learning_rate": 5.264571411395306e-06, "loss": 0.2663, "step": 35390 }, { "epoch": 0.6570549117602285, "grad_norm": 0.7915021181106567, "learning_rate": 5.263544032728047e-06, "loss": 0.243, "step": 35392 }, { "epoch": 0.657092041897647, "grad_norm": 0.6132286190986633, "learning_rate": 5.262516718509768e-06, "loss": 0.2173, "step": 35394 }, { "epoch": 0.6571291720350657, "grad_norm": 0.35200098156929016, "learning_rate": 5.2614894687544445e-06, "loss": 0.266, "step": 35396 }, { "epoch": 0.6571663021724844, "grad_norm": 0.28726890683174133, "learning_rate": 5.2604622834760575e-06, "loss": 0.3896, "step": 35398 }, { "epoch": 0.657203432309903, "grad_norm": 0.41205185651779175, "learning_rate": 5.259435162688584e-06, "loss": 0.3514, "step": 35400 }, { "epoch": 0.6572405624473217, "grad_norm": 0.830986738204956, "learning_rate": 5.258408106405996e-06, "loss": 0.188, "step": 35402 }, { "epoch": 0.6572776925847402, "grad_norm": 0.45158135890960693, "learning_rate": 5.25738111464227e-06, "loss": 0.2007, "step": 35404 }, { "epoch": 0.6573148227221589, "grad_norm": 0.32918915152549744, "learning_rate": 5.256354187411385e-06, "loss": 0.3151, "step": 35406 }, { "epoch": 0.6573519528595775, "grad_norm": 0.5441567301750183, "learning_rate": 5.255327324727308e-06, "loss": 0.2152, "step": 35408 }, { "epoch": 0.6573890829969962, "grad_norm": 1.4596136808395386, "learning_rate": 5.254300526604015e-06, "loss": 0.2519, "step": 35410 }, { "epoch": 0.6574262131344148, "grad_norm": 0.37669867277145386, "learning_rate": 5.2532737930554776e-06, "loss": 0.404, "step": 35412 }, { "epoch": 0.6574633432718334, "grad_norm": 0.5424704551696777, "learning_rate": 5.2522471240956645e-06, "loss": 0.3582, "step": 35414 }, { "epoch": 0.6575004734092521, "grad_norm": 0.45767876505851746, "learning_rate": 5.251220519738546e-06, "loss": 0.1537, "step": 35416 }, { "epoch": 0.6575376035466707, "grad_norm": 0.40727460384368896, "learning_rate": 5.2501939799980915e-06, "loss": 0.1774, "step": 35418 }, { "epoch": 0.6575747336840894, "grad_norm": 0.40965506434440613, "learning_rate": 5.24916750488827e-06, "loss": 0.3089, "step": 35420 }, { "epoch": 0.657611863821508, "grad_norm": 0.3258717656135559, "learning_rate": 5.248141094423049e-06, "loss": 0.1492, "step": 35422 }, { "epoch": 0.6576489939589266, "grad_norm": 0.46241846680641174, "learning_rate": 5.2471147486163976e-06, "loss": 0.3617, "step": 35424 }, { "epoch": 0.6576861240963453, "grad_norm": 0.28546419739723206, "learning_rate": 5.246088467482273e-06, "loss": 0.4672, "step": 35426 }, { "epoch": 0.6577232542337639, "grad_norm": 1.5007548332214355, "learning_rate": 5.245062251034649e-06, "loss": 0.2412, "step": 35428 }, { "epoch": 0.6577603843711826, "grad_norm": 0.41270267963409424, "learning_rate": 5.244036099287483e-06, "loss": 0.2759, "step": 35430 }, { "epoch": 0.6577975145086012, "grad_norm": 0.23636583983898163, "learning_rate": 5.243010012254739e-06, "loss": 0.1365, "step": 35432 }, { "epoch": 0.6578346446460198, "grad_norm": 0.44029700756073, "learning_rate": 5.241983989950379e-06, "loss": 0.1614, "step": 35434 }, { "epoch": 0.6578717747834385, "grad_norm": 0.2560267746448517, "learning_rate": 5.240958032388366e-06, "loss": 0.3045, "step": 35436 }, { "epoch": 0.6579089049208571, "grad_norm": 0.32967299222946167, "learning_rate": 5.2399321395826615e-06, "loss": 0.2045, "step": 35438 }, { "epoch": 0.6579460350582758, "grad_norm": 0.39231032133102417, "learning_rate": 5.23890631154722e-06, "loss": 0.2917, "step": 35440 }, { "epoch": 0.6579831651956944, "grad_norm": 0.3208622634410858, "learning_rate": 5.237880548296004e-06, "loss": 0.1383, "step": 35442 }, { "epoch": 0.658020295333113, "grad_norm": 0.3608616590499878, "learning_rate": 5.236854849842967e-06, "loss": 0.2382, "step": 35444 }, { "epoch": 0.6580574254705317, "grad_norm": 0.3875238597393036, "learning_rate": 5.235829216202073e-06, "loss": 0.4459, "step": 35446 }, { "epoch": 0.6580945556079503, "grad_norm": 0.5234822034835815, "learning_rate": 5.234803647387269e-06, "loss": 0.2614, "step": 35448 }, { "epoch": 0.658131685745369, "grad_norm": 0.4111554026603699, "learning_rate": 5.2337781434125175e-06, "loss": 0.3282, "step": 35450 }, { "epoch": 0.6581688158827875, "grad_norm": 0.2506873309612274, "learning_rate": 5.232752704291766e-06, "loss": 0.1384, "step": 35452 }, { "epoch": 0.6582059460202062, "grad_norm": 0.40214842557907104, "learning_rate": 5.2317273300389695e-06, "loss": 0.4022, "step": 35454 }, { "epoch": 0.6582430761576249, "grad_norm": 0.5302115082740784, "learning_rate": 5.230702020668083e-06, "loss": 0.2811, "step": 35456 }, { "epoch": 0.6582802062950435, "grad_norm": 0.5096840262413025, "learning_rate": 5.2296767761930555e-06, "loss": 0.4807, "step": 35458 }, { "epoch": 0.6583173364324622, "grad_norm": 0.25991812348365784, "learning_rate": 5.2286515966278375e-06, "loss": 0.1888, "step": 35460 }, { "epoch": 0.6583544665698807, "grad_norm": 0.25767040252685547, "learning_rate": 5.227626481986383e-06, "loss": 0.2201, "step": 35462 }, { "epoch": 0.6583915967072994, "grad_norm": 0.39043304324150085, "learning_rate": 5.226601432282636e-06, "loss": 0.1861, "step": 35464 }, { "epoch": 0.6584287268447181, "grad_norm": 0.34052613377571106, "learning_rate": 5.225576447530543e-06, "loss": 0.2574, "step": 35466 }, { "epoch": 0.6584658569821367, "grad_norm": 0.30887866020202637, "learning_rate": 5.224551527744057e-06, "loss": 0.3945, "step": 35468 }, { "epoch": 0.6585029871195553, "grad_norm": 0.4929267466068268, "learning_rate": 5.2235266729371185e-06, "loss": 0.2169, "step": 35470 }, { "epoch": 0.6585401172569739, "grad_norm": 0.46489089727401733, "learning_rate": 5.222501883123674e-06, "loss": 0.2385, "step": 35472 }, { "epoch": 0.6585772473943926, "grad_norm": 0.3887680470943451, "learning_rate": 5.221477158317669e-06, "loss": 0.2796, "step": 35474 }, { "epoch": 0.6586143775318113, "grad_norm": 0.5122246146202087, "learning_rate": 5.220452498533051e-06, "loss": 0.2728, "step": 35476 }, { "epoch": 0.6586515076692299, "grad_norm": 0.2843627333641052, "learning_rate": 5.2194279037837524e-06, "loss": 0.2369, "step": 35478 }, { "epoch": 0.6586886378066485, "grad_norm": 0.3789721131324768, "learning_rate": 5.218403374083723e-06, "loss": 0.3219, "step": 35480 }, { "epoch": 0.6587257679440671, "grad_norm": 0.19454063475131989, "learning_rate": 5.217378909446899e-06, "loss": 0.3091, "step": 35482 }, { "epoch": 0.6587628980814858, "grad_norm": 0.3968220353126526, "learning_rate": 5.216354509887223e-06, "loss": 0.3076, "step": 35484 }, { "epoch": 0.6588000282189045, "grad_norm": 0.4575442969799042, "learning_rate": 5.215330175418634e-06, "loss": 0.6014, "step": 35486 }, { "epoch": 0.6588371583563231, "grad_norm": 0.672534167766571, "learning_rate": 5.214305906055073e-06, "loss": 0.4092, "step": 35488 }, { "epoch": 0.6588742884937417, "grad_norm": 0.3155250549316406, "learning_rate": 5.2132817018104734e-06, "loss": 0.2757, "step": 35490 }, { "epoch": 0.6589114186311603, "grad_norm": 0.4715367257595062, "learning_rate": 5.212257562698768e-06, "loss": 0.1655, "step": 35492 }, { "epoch": 0.658948548768579, "grad_norm": 1.056399941444397, "learning_rate": 5.211233488733897e-06, "loss": 0.3875, "step": 35494 }, { "epoch": 0.6589856789059977, "grad_norm": 0.4571651220321655, "learning_rate": 5.210209479929793e-06, "loss": 0.2939, "step": 35496 }, { "epoch": 0.6590228090434163, "grad_norm": 0.5640197396278381, "learning_rate": 5.209185536300392e-06, "loss": 0.3932, "step": 35498 }, { "epoch": 0.6590599391808349, "grad_norm": 0.3530406951904297, "learning_rate": 5.20816165785963e-06, "loss": 0.3019, "step": 35500 }, { "epoch": 0.6590970693182535, "grad_norm": 0.488593190908432, "learning_rate": 5.20713784462143e-06, "loss": 0.296, "step": 35502 }, { "epoch": 0.6591341994556722, "grad_norm": 0.2664813995361328, "learning_rate": 5.2061140965997284e-06, "loss": 0.176, "step": 35504 }, { "epoch": 0.6591713295930908, "grad_norm": 0.3595189154148102, "learning_rate": 5.205090413808453e-06, "loss": 0.3127, "step": 35506 }, { "epoch": 0.6592084597305095, "grad_norm": 0.42755237221717834, "learning_rate": 5.204066796261535e-06, "loss": 0.2745, "step": 35508 }, { "epoch": 0.6592455898679281, "grad_norm": 0.5254095792770386, "learning_rate": 5.203043243972907e-06, "loss": 0.19, "step": 35510 }, { "epoch": 0.6592827200053467, "grad_norm": 0.28370365500450134, "learning_rate": 5.202019756956489e-06, "loss": 0.2566, "step": 35512 }, { "epoch": 0.6593198501427654, "grad_norm": 0.38276195526123047, "learning_rate": 5.2009963352262135e-06, "loss": 0.2152, "step": 35514 }, { "epoch": 0.659356980280184, "grad_norm": 0.4603082537651062, "learning_rate": 5.1999729787959986e-06, "loss": 0.1963, "step": 35516 }, { "epoch": 0.6593941104176027, "grad_norm": 0.44894811511039734, "learning_rate": 5.198949687679774e-06, "loss": 0.3753, "step": 35518 }, { "epoch": 0.6594312405550213, "grad_norm": 0.43216943740844727, "learning_rate": 5.197926461891464e-06, "loss": 0.3356, "step": 35520 }, { "epoch": 0.6594683706924399, "grad_norm": 0.3915995955467224, "learning_rate": 5.196903301444991e-06, "loss": 0.1585, "step": 35522 }, { "epoch": 0.6595055008298586, "grad_norm": 0.35694894194602966, "learning_rate": 5.195880206354276e-06, "loss": 0.2996, "step": 35524 }, { "epoch": 0.6595426309672772, "grad_norm": 0.41175082325935364, "learning_rate": 5.194857176633246e-06, "loss": 0.3438, "step": 35526 }, { "epoch": 0.6595797611046958, "grad_norm": 0.3641592860221863, "learning_rate": 5.193834212295813e-06, "loss": 0.3659, "step": 35528 }, { "epoch": 0.6596168912421145, "grad_norm": 0.32085898518562317, "learning_rate": 5.1928113133559e-06, "loss": 0.3078, "step": 35530 }, { "epoch": 0.6596540213795331, "grad_norm": 0.3446044325828552, "learning_rate": 5.1917884798274295e-06, "loss": 0.2144, "step": 35532 }, { "epoch": 0.6596911515169518, "grad_norm": 0.4545195400714874, "learning_rate": 5.1907657117243124e-06, "loss": 0.243, "step": 35534 }, { "epoch": 0.6597282816543704, "grad_norm": 0.6099935173988342, "learning_rate": 5.189743009060468e-06, "loss": 0.4023, "step": 35536 }, { "epoch": 0.659765411791789, "grad_norm": 0.31657537817955017, "learning_rate": 5.188720371849817e-06, "loss": 0.3495, "step": 35538 }, { "epoch": 0.6598025419292077, "grad_norm": 0.5445834994316101, "learning_rate": 5.187697800106266e-06, "loss": 0.2018, "step": 35540 }, { "epoch": 0.6598396720666263, "grad_norm": 0.36553066968917847, "learning_rate": 5.186675293843734e-06, "loss": 0.2207, "step": 35542 }, { "epoch": 0.659876802204045, "grad_norm": 0.43657222390174866, "learning_rate": 5.185652853076133e-06, "loss": 0.4178, "step": 35544 }, { "epoch": 0.6599139323414636, "grad_norm": 0.33869799971580505, "learning_rate": 5.184630477817376e-06, "loss": 0.2608, "step": 35546 }, { "epoch": 0.6599510624788822, "grad_norm": 0.36574652791023254, "learning_rate": 5.183608168081375e-06, "loss": 0.4497, "step": 35548 }, { "epoch": 0.6599881926163009, "grad_norm": 0.47389522194862366, "learning_rate": 5.182585923882044e-06, "loss": 0.1713, "step": 35550 }, { "epoch": 0.6600253227537195, "grad_norm": 0.2933224141597748, "learning_rate": 5.1815637452332845e-06, "loss": 0.3438, "step": 35552 }, { "epoch": 0.6600624528911382, "grad_norm": 0.5897746682167053, "learning_rate": 5.180541632149014e-06, "loss": 0.2287, "step": 35554 }, { "epoch": 0.6600995830285568, "grad_norm": 0.3783877193927765, "learning_rate": 5.179519584643131e-06, "loss": 0.2508, "step": 35556 }, { "epoch": 0.6601367131659754, "grad_norm": 0.3197145462036133, "learning_rate": 5.178497602729549e-06, "loss": 0.4348, "step": 35558 }, { "epoch": 0.660173843303394, "grad_norm": 0.36324411630630493, "learning_rate": 5.177475686422172e-06, "loss": 0.2722, "step": 35560 }, { "epoch": 0.6602109734408127, "grad_norm": 0.573016345500946, "learning_rate": 5.176453835734905e-06, "loss": 0.5282, "step": 35562 }, { "epoch": 0.6602481035782314, "grad_norm": 0.3848489224910736, "learning_rate": 5.175432050681658e-06, "loss": 0.3175, "step": 35564 }, { "epoch": 0.66028523371565, "grad_norm": 0.51182621717453, "learning_rate": 5.174410331276325e-06, "loss": 0.2433, "step": 35566 }, { "epoch": 0.6603223638530686, "grad_norm": 0.44513410329818726, "learning_rate": 5.173388677532813e-06, "loss": 0.1991, "step": 35568 }, { "epoch": 0.6603594939904872, "grad_norm": 0.5000667572021484, "learning_rate": 5.172367089465025e-06, "loss": 0.154, "step": 35570 }, { "epoch": 0.6603966241279059, "grad_norm": 0.43416404724121094, "learning_rate": 5.171345567086859e-06, "loss": 0.1413, "step": 35572 }, { "epoch": 0.6604337542653246, "grad_norm": 0.6004547476768494, "learning_rate": 5.17032411041222e-06, "loss": 0.4135, "step": 35574 }, { "epoch": 0.6604708844027432, "grad_norm": 0.38738057017326355, "learning_rate": 5.169302719455005e-06, "loss": 0.4395, "step": 35576 }, { "epoch": 0.6605080145401618, "grad_norm": 0.3397410809993744, "learning_rate": 5.168281394229104e-06, "loss": 0.1427, "step": 35578 }, { "epoch": 0.6605451446775804, "grad_norm": 0.707030177116394, "learning_rate": 5.167260134748422e-06, "loss": 0.1641, "step": 35580 }, { "epoch": 0.6605822748149991, "grad_norm": 0.3504965007305145, "learning_rate": 5.166238941026855e-06, "loss": 0.3173, "step": 35582 }, { "epoch": 0.6606194049524178, "grad_norm": 0.4131953716278076, "learning_rate": 5.165217813078296e-06, "loss": 0.2435, "step": 35584 }, { "epoch": 0.6606565350898363, "grad_norm": 0.3229162395000458, "learning_rate": 5.16419675091664e-06, "loss": 0.3132, "step": 35586 }, { "epoch": 0.660693665227255, "grad_norm": 0.525147020816803, "learning_rate": 5.163175754555786e-06, "loss": 0.2187, "step": 35588 }, { "epoch": 0.6607307953646736, "grad_norm": 0.37702298164367676, "learning_rate": 5.162154824009617e-06, "loss": 0.3664, "step": 35590 }, { "epoch": 0.6607679255020923, "grad_norm": 0.3066100776195526, "learning_rate": 5.161133959292033e-06, "loss": 0.1916, "step": 35592 }, { "epoch": 0.660805055639511, "grad_norm": 0.315158486366272, "learning_rate": 5.160113160416918e-06, "loss": 0.3567, "step": 35594 }, { "epoch": 0.6608421857769295, "grad_norm": 0.5466799139976501, "learning_rate": 5.159092427398172e-06, "loss": 0.3157, "step": 35596 }, { "epoch": 0.6608793159143482, "grad_norm": 0.23543672263622284, "learning_rate": 5.158071760249673e-06, "loss": 0.2462, "step": 35598 }, { "epoch": 0.6609164460517668, "grad_norm": 0.6227414608001709, "learning_rate": 5.157051158985315e-06, "loss": 0.2832, "step": 35600 }, { "epoch": 0.6609535761891855, "grad_norm": 0.2572740912437439, "learning_rate": 5.156030623618987e-06, "loss": 0.3225, "step": 35602 }, { "epoch": 0.6609907063266041, "grad_norm": 0.3736858069896698, "learning_rate": 5.15501015416457e-06, "loss": 0.1234, "step": 35604 }, { "epoch": 0.6610278364640227, "grad_norm": 0.5968903303146362, "learning_rate": 5.153989750635952e-06, "loss": 0.354, "step": 35606 }, { "epoch": 0.6610649666014414, "grad_norm": 0.4293021261692047, "learning_rate": 5.1529694130470175e-06, "loss": 0.4129, "step": 35608 }, { "epoch": 0.66110209673886, "grad_norm": 0.33938416838645935, "learning_rate": 5.151949141411652e-06, "loss": 0.2606, "step": 35610 }, { "epoch": 0.6611392268762787, "grad_norm": 0.4143626093864441, "learning_rate": 5.150928935743735e-06, "loss": 0.2341, "step": 35612 }, { "epoch": 0.6611763570136973, "grad_norm": 0.3429454267024994, "learning_rate": 5.149908796057157e-06, "loss": 0.3199, "step": 35614 }, { "epoch": 0.6612134871511159, "grad_norm": 0.37814095616340637, "learning_rate": 5.148888722365787e-06, "loss": 0.2741, "step": 35616 }, { "epoch": 0.6612506172885346, "grad_norm": 0.3880716562271118, "learning_rate": 5.147868714683515e-06, "loss": 0.2515, "step": 35618 }, { "epoch": 0.6612877474259532, "grad_norm": 0.22556045651435852, "learning_rate": 5.146848773024213e-06, "loss": 0.3194, "step": 35620 }, { "epoch": 0.6613248775633719, "grad_norm": 0.3572297692298889, "learning_rate": 5.145828897401761e-06, "loss": 0.3897, "step": 35622 }, { "epoch": 0.6613620077007905, "grad_norm": 0.24831829965114594, "learning_rate": 5.144809087830038e-06, "loss": 0.1197, "step": 35624 }, { "epoch": 0.6613991378382091, "grad_norm": 0.3366273045539856, "learning_rate": 5.143789344322925e-06, "loss": 0.3036, "step": 35626 }, { "epoch": 0.6614362679756278, "grad_norm": 0.31361690163612366, "learning_rate": 5.142769666894287e-06, "loss": 0.1911, "step": 35628 }, { "epoch": 0.6614733981130464, "grad_norm": 0.5385131239891052, "learning_rate": 5.141750055558008e-06, "loss": 0.2099, "step": 35630 }, { "epoch": 0.6615105282504651, "grad_norm": 0.5193853974342346, "learning_rate": 5.140730510327956e-06, "loss": 0.2996, "step": 35632 }, { "epoch": 0.6615476583878837, "grad_norm": 0.5467257499694824, "learning_rate": 5.139711031218008e-06, "loss": 0.4593, "step": 35634 }, { "epoch": 0.6615847885253023, "grad_norm": 0.24626703560352325, "learning_rate": 5.1386916182420375e-06, "loss": 0.4301, "step": 35636 }, { "epoch": 0.661621918662721, "grad_norm": 0.41291165351867676, "learning_rate": 5.137672271413909e-06, "loss": 0.3254, "step": 35638 }, { "epoch": 0.6616590488001396, "grad_norm": 0.5426741242408752, "learning_rate": 5.136652990747502e-06, "loss": 0.3513, "step": 35640 }, { "epoch": 0.6616961789375583, "grad_norm": 0.6074190735816956, "learning_rate": 5.135633776256674e-06, "loss": 0.3691, "step": 35642 }, { "epoch": 0.6617333090749768, "grad_norm": 0.28804171085357666, "learning_rate": 5.134614627955301e-06, "loss": 0.2254, "step": 35644 }, { "epoch": 0.6617704392123955, "grad_norm": 0.4080308973789215, "learning_rate": 5.13359554585725e-06, "loss": 0.2475, "step": 35646 }, { "epoch": 0.6618075693498142, "grad_norm": 0.3483557403087616, "learning_rate": 5.132576529976387e-06, "loss": 0.3901, "step": 35648 }, { "epoch": 0.6618446994872328, "grad_norm": 0.42413410544395447, "learning_rate": 5.131557580326577e-06, "loss": 0.3416, "step": 35650 }, { "epoch": 0.6618818296246515, "grad_norm": 0.3393104672431946, "learning_rate": 5.13053869692169e-06, "loss": 0.2569, "step": 35652 }, { "epoch": 0.66191895976207, "grad_norm": 0.43552491068840027, "learning_rate": 5.129519879775582e-06, "loss": 0.4348, "step": 35654 }, { "epoch": 0.6619560898994887, "grad_norm": 0.1484507918357849, "learning_rate": 5.128501128902119e-06, "loss": 0.2366, "step": 35656 }, { "epoch": 0.6619932200369073, "grad_norm": 0.2583087980747223, "learning_rate": 5.127482444315164e-06, "loss": 0.1096, "step": 35658 }, { "epoch": 0.662030350174326, "grad_norm": 0.414387047290802, "learning_rate": 5.126463826028583e-06, "loss": 0.4068, "step": 35660 }, { "epoch": 0.6620674803117447, "grad_norm": 0.533430278301239, "learning_rate": 5.125445274056226e-06, "loss": 0.1305, "step": 35662 }, { "epoch": 0.6621046104491632, "grad_norm": 0.25655046105384827, "learning_rate": 5.124426788411963e-06, "loss": 0.2066, "step": 35664 }, { "epoch": 0.6621417405865819, "grad_norm": 0.39234763383865356, "learning_rate": 5.123408369109642e-06, "loss": 0.3543, "step": 35666 }, { "epoch": 0.6621788707240005, "grad_norm": 0.43655967712402344, "learning_rate": 5.122390016163127e-06, "loss": 0.1866, "step": 35668 }, { "epoch": 0.6622160008614192, "grad_norm": 0.6385837197303772, "learning_rate": 5.121371729586273e-06, "loss": 0.2823, "step": 35670 }, { "epoch": 0.6622531309988379, "grad_norm": 0.31867802143096924, "learning_rate": 5.120353509392937e-06, "loss": 0.3769, "step": 35672 }, { "epoch": 0.6622902611362564, "grad_norm": 0.30754727125167847, "learning_rate": 5.1193353555969734e-06, "loss": 0.2686, "step": 35674 }, { "epoch": 0.6623273912736751, "grad_norm": 0.2654276490211487, "learning_rate": 5.118317268212236e-06, "loss": 0.1817, "step": 35676 }, { "epoch": 0.6623645214110937, "grad_norm": 0.4747851490974426, "learning_rate": 5.117299247252583e-06, "loss": 0.4362, "step": 35678 }, { "epoch": 0.6624016515485124, "grad_norm": 0.40849828720092773, "learning_rate": 5.1162812927318584e-06, "loss": 0.3287, "step": 35680 }, { "epoch": 0.6624387816859311, "grad_norm": 0.3860376179218292, "learning_rate": 5.1152634046639194e-06, "loss": 0.2716, "step": 35682 }, { "epoch": 0.6624759118233496, "grad_norm": 0.6082983016967773, "learning_rate": 5.114245583062612e-06, "loss": 0.3003, "step": 35684 }, { "epoch": 0.6625130419607683, "grad_norm": 0.4881953001022339, "learning_rate": 5.113227827941786e-06, "loss": 0.2605, "step": 35686 }, { "epoch": 0.6625501720981869, "grad_norm": 0.26633453369140625, "learning_rate": 5.112210139315292e-06, "loss": 0.3383, "step": 35688 }, { "epoch": 0.6625873022356056, "grad_norm": 0.30107632279396057, "learning_rate": 5.111192517196981e-06, "loss": 0.3308, "step": 35690 }, { "epoch": 0.6626244323730243, "grad_norm": 0.4587944746017456, "learning_rate": 5.110174961600694e-06, "loss": 0.394, "step": 35692 }, { "epoch": 0.6626615625104428, "grad_norm": 0.3761554956436157, "learning_rate": 5.1091574725402775e-06, "loss": 0.1361, "step": 35694 }, { "epoch": 0.6626986926478615, "grad_norm": 0.49689170718193054, "learning_rate": 5.108140050029577e-06, "loss": 0.1904, "step": 35696 }, { "epoch": 0.6627358227852801, "grad_norm": 0.3448061943054199, "learning_rate": 5.10712269408244e-06, "loss": 0.2597, "step": 35698 }, { "epoch": 0.6627729529226988, "grad_norm": 0.2589368522167206, "learning_rate": 5.10610540471271e-06, "loss": 0.1484, "step": 35700 }, { "epoch": 0.6628100830601175, "grad_norm": 0.3849527835845947, "learning_rate": 5.105088181934222e-06, "loss": 0.2872, "step": 35702 }, { "epoch": 0.662847213197536, "grad_norm": 0.4294360280036926, "learning_rate": 5.104071025760827e-06, "loss": 0.2916, "step": 35704 }, { "epoch": 0.6628843433349547, "grad_norm": 0.4944005012512207, "learning_rate": 5.103053936206356e-06, "loss": 0.19, "step": 35706 }, { "epoch": 0.6629214734723733, "grad_norm": 0.3311578333377838, "learning_rate": 5.102036913284652e-06, "loss": 0.2588, "step": 35708 }, { "epoch": 0.662958603609792, "grad_norm": 0.2882724702358246, "learning_rate": 5.1010199570095565e-06, "loss": 0.1761, "step": 35710 }, { "epoch": 0.6629957337472105, "grad_norm": 0.27424556016921997, "learning_rate": 5.100003067394903e-06, "loss": 0.3292, "step": 35712 }, { "epoch": 0.6630328638846292, "grad_norm": 0.33726057410240173, "learning_rate": 5.098986244454536e-06, "loss": 0.2198, "step": 35714 }, { "epoch": 0.6630699940220479, "grad_norm": 0.3712612986564636, "learning_rate": 5.097969488202281e-06, "loss": 0.1616, "step": 35716 }, { "epoch": 0.6631071241594665, "grad_norm": 0.4006690979003906, "learning_rate": 5.09695279865198e-06, "loss": 0.2564, "step": 35718 }, { "epoch": 0.6631442542968852, "grad_norm": 0.32374322414398193, "learning_rate": 5.095936175817463e-06, "loss": 0.2432, "step": 35720 }, { "epoch": 0.6631813844343037, "grad_norm": 0.25246912240982056, "learning_rate": 5.094919619712571e-06, "loss": 0.1877, "step": 35722 }, { "epoch": 0.6632185145717224, "grad_norm": 0.4558045268058777, "learning_rate": 5.0939031303511246e-06, "loss": 0.3896, "step": 35724 }, { "epoch": 0.6632556447091411, "grad_norm": 0.6932918429374695, "learning_rate": 5.092886707746962e-06, "loss": 0.4129, "step": 35726 }, { "epoch": 0.6632927748465597, "grad_norm": 0.4563109874725342, "learning_rate": 5.091870351913916e-06, "loss": 0.2993, "step": 35728 }, { "epoch": 0.6633299049839784, "grad_norm": 0.2583703398704529, "learning_rate": 5.090854062865811e-06, "loss": 0.3683, "step": 35730 }, { "epoch": 0.6633670351213969, "grad_norm": 0.45667240023612976, "learning_rate": 5.0898378406164765e-06, "loss": 0.2917, "step": 35732 }, { "epoch": 0.6634041652588156, "grad_norm": 0.5004454255104065, "learning_rate": 5.088821685179741e-06, "loss": 0.2969, "step": 35734 }, { "epoch": 0.6634412953962343, "grad_norm": 0.40400418639183044, "learning_rate": 5.087805596569431e-06, "loss": 0.4543, "step": 35736 }, { "epoch": 0.6634784255336529, "grad_norm": 0.3969423174858093, "learning_rate": 5.0867895747993745e-06, "loss": 0.2197, "step": 35738 }, { "epoch": 0.6635155556710716, "grad_norm": 0.4083348214626312, "learning_rate": 5.085773619883398e-06, "loss": 0.3656, "step": 35740 }, { "epoch": 0.6635526858084901, "grad_norm": 0.44520920515060425, "learning_rate": 5.084757731835319e-06, "loss": 0.1845, "step": 35742 }, { "epoch": 0.6635898159459088, "grad_norm": 0.3800777196884155, "learning_rate": 5.083741910668969e-06, "loss": 0.3937, "step": 35744 }, { "epoch": 0.6636269460833275, "grad_norm": 0.3157062530517578, "learning_rate": 5.082726156398162e-06, "loss": 0.087, "step": 35746 }, { "epoch": 0.6636640762207461, "grad_norm": 0.5410152673721313, "learning_rate": 5.081710469036723e-06, "loss": 0.2215, "step": 35748 }, { "epoch": 0.6637012063581648, "grad_norm": 0.4531360864639282, "learning_rate": 5.080694848598472e-06, "loss": 0.3463, "step": 35750 }, { "epoch": 0.6637383364955833, "grad_norm": 0.45560604333877563, "learning_rate": 5.079679295097233e-06, "loss": 0.4001, "step": 35752 }, { "epoch": 0.663775466633002, "grad_norm": 0.464783638715744, "learning_rate": 5.078663808546817e-06, "loss": 0.2929, "step": 35754 }, { "epoch": 0.6638125967704206, "grad_norm": 0.42269477248191833, "learning_rate": 5.077648388961045e-06, "loss": 0.4259, "step": 35756 }, { "epoch": 0.6638497269078393, "grad_norm": 0.37835606932640076, "learning_rate": 5.076633036353735e-06, "loss": 0.1601, "step": 35758 }, { "epoch": 0.663886857045258, "grad_norm": 0.37240105867385864, "learning_rate": 5.075617750738702e-06, "loss": 0.3516, "step": 35760 }, { "epoch": 0.6639239871826765, "grad_norm": 0.27211758494377136, "learning_rate": 5.0746025321297595e-06, "loss": 0.2881, "step": 35762 }, { "epoch": 0.6639611173200952, "grad_norm": 0.623069703578949, "learning_rate": 5.07358738054073e-06, "loss": 0.2386, "step": 35764 }, { "epoch": 0.6639982474575138, "grad_norm": 0.27582481503486633, "learning_rate": 5.072572295985414e-06, "loss": 0.1577, "step": 35766 }, { "epoch": 0.6640353775949325, "grad_norm": 0.30054011940956116, "learning_rate": 5.071557278477634e-06, "loss": 0.336, "step": 35768 }, { "epoch": 0.6640725077323512, "grad_norm": 0.386131227016449, "learning_rate": 5.070542328031194e-06, "loss": 0.1935, "step": 35770 }, { "epoch": 0.6641096378697697, "grad_norm": 0.3890044689178467, "learning_rate": 5.069527444659908e-06, "loss": 0.4169, "step": 35772 }, { "epoch": 0.6641467680071884, "grad_norm": 0.2515128254890442, "learning_rate": 5.068512628377583e-06, "loss": 0.2272, "step": 35774 }, { "epoch": 0.664183898144607, "grad_norm": 0.35916200280189514, "learning_rate": 5.067497879198031e-06, "loss": 0.0774, "step": 35776 }, { "epoch": 0.6642210282820257, "grad_norm": 0.5421344041824341, "learning_rate": 5.0664831971350616e-06, "loss": 0.2332, "step": 35778 }, { "epoch": 0.6642581584194444, "grad_norm": 0.34166672825813293, "learning_rate": 5.065468582202474e-06, "loss": 0.3609, "step": 35780 }, { "epoch": 0.6642952885568629, "grad_norm": 0.3095262348651886, "learning_rate": 5.064454034414079e-06, "loss": 0.1724, "step": 35782 }, { "epoch": 0.6643324186942816, "grad_norm": 0.382196307182312, "learning_rate": 5.063439553783681e-06, "loss": 0.3086, "step": 35784 }, { "epoch": 0.6643695488317002, "grad_norm": 0.3494543433189392, "learning_rate": 5.062425140325088e-06, "loss": 0.1381, "step": 35786 }, { "epoch": 0.6644066789691189, "grad_norm": 0.3344893753528595, "learning_rate": 5.061410794052095e-06, "loss": 0.2822, "step": 35788 }, { "epoch": 0.6644438091065376, "grad_norm": 0.3500637114048004, "learning_rate": 5.060396514978512e-06, "loss": 0.1001, "step": 35790 }, { "epoch": 0.6644809392439561, "grad_norm": 0.30417224764823914, "learning_rate": 5.059382303118132e-06, "loss": 0.271, "step": 35792 }, { "epoch": 0.6645180693813748, "grad_norm": 0.5784962773323059, "learning_rate": 5.058368158484761e-06, "loss": 0.3286, "step": 35794 }, { "epoch": 0.6645551995187934, "grad_norm": 0.3390215337276459, "learning_rate": 5.057354081092198e-06, "loss": 0.3508, "step": 35796 }, { "epoch": 0.6645923296562121, "grad_norm": 0.7221940755844116, "learning_rate": 5.0563400709542394e-06, "loss": 0.4065, "step": 35798 }, { "epoch": 0.6646294597936308, "grad_norm": 0.31197357177734375, "learning_rate": 5.055326128084685e-06, "loss": 0.1717, "step": 35800 }, { "epoch": 0.6646665899310493, "grad_norm": 0.25203120708465576, "learning_rate": 5.054312252497332e-06, "loss": 0.3156, "step": 35802 }, { "epoch": 0.664703720068468, "grad_norm": 0.5010393857955933, "learning_rate": 5.053298444205978e-06, "loss": 0.1339, "step": 35804 }, { "epoch": 0.6647408502058866, "grad_norm": 0.5132566094398499, "learning_rate": 5.052284703224413e-06, "loss": 0.3982, "step": 35806 }, { "epoch": 0.6647779803433053, "grad_norm": 0.3334343135356903, "learning_rate": 5.051271029566435e-06, "loss": 0.4459, "step": 35808 }, { "epoch": 0.6648151104807238, "grad_norm": 0.595818281173706, "learning_rate": 5.050257423245831e-06, "loss": 0.2544, "step": 35810 }, { "epoch": 0.6648522406181425, "grad_norm": 0.27838897705078125, "learning_rate": 5.049243884276398e-06, "loss": 0.4134, "step": 35812 }, { "epoch": 0.6648893707555612, "grad_norm": 0.328463077545166, "learning_rate": 5.048230412671926e-06, "loss": 0.3704, "step": 35814 }, { "epoch": 0.6649265008929798, "grad_norm": 0.3384462594985962, "learning_rate": 5.0472170084462115e-06, "loss": 0.1662, "step": 35816 }, { "epoch": 0.6649636310303985, "grad_norm": 0.2465285211801529, "learning_rate": 5.046203671613033e-06, "loss": 0.1677, "step": 35818 }, { "epoch": 0.665000761167817, "grad_norm": 0.2547358572483063, "learning_rate": 5.045190402186184e-06, "loss": 0.3787, "step": 35820 }, { "epoch": 0.6650378913052357, "grad_norm": 0.3993379473686218, "learning_rate": 5.044177200179452e-06, "loss": 0.157, "step": 35822 }, { "epoch": 0.6650750214426544, "grad_norm": 0.5293014049530029, "learning_rate": 5.043164065606625e-06, "loss": 0.2889, "step": 35824 }, { "epoch": 0.665112151580073, "grad_norm": 0.3078661561012268, "learning_rate": 5.0421509984814875e-06, "loss": 0.2747, "step": 35826 }, { "epoch": 0.6651492817174917, "grad_norm": 0.4443161189556122, "learning_rate": 5.041137998817828e-06, "loss": 0.043, "step": 35828 }, { "epoch": 0.6651864118549102, "grad_norm": 0.2378215342760086, "learning_rate": 5.040125066629426e-06, "loss": 0.3533, "step": 35830 }, { "epoch": 0.6652235419923289, "grad_norm": 0.3818085491657257, "learning_rate": 5.039112201930063e-06, "loss": 0.1341, "step": 35832 }, { "epoch": 0.6652606721297476, "grad_norm": 0.4161611795425415, "learning_rate": 5.038099404733522e-06, "loss": 0.3693, "step": 35834 }, { "epoch": 0.6652978022671662, "grad_norm": 0.5004638433456421, "learning_rate": 5.037086675053586e-06, "loss": 0.3019, "step": 35836 }, { "epoch": 0.6653349324045849, "grad_norm": 0.2755444049835205, "learning_rate": 5.036074012904034e-06, "loss": 0.2303, "step": 35838 }, { "epoch": 0.6653720625420034, "grad_norm": 0.6116583943367004, "learning_rate": 5.035061418298651e-06, "loss": 0.5848, "step": 35840 }, { "epoch": 0.6654091926794221, "grad_norm": 0.34827920794487, "learning_rate": 5.034048891251206e-06, "loss": 0.1568, "step": 35842 }, { "epoch": 0.6654463228168408, "grad_norm": 0.544321596622467, "learning_rate": 5.033036431775481e-06, "loss": 0.1322, "step": 35844 }, { "epoch": 0.6654834529542594, "grad_norm": 0.3289828598499298, "learning_rate": 5.032024039885253e-06, "loss": 0.2501, "step": 35846 }, { "epoch": 0.665520583091678, "grad_norm": 0.5619469881057739, "learning_rate": 5.031011715594296e-06, "loss": 0.2011, "step": 35848 }, { "epoch": 0.6655577132290966, "grad_norm": 0.3207385241985321, "learning_rate": 5.029999458916389e-06, "loss": 0.2082, "step": 35850 }, { "epoch": 0.6655948433665153, "grad_norm": 0.5099183320999146, "learning_rate": 5.0289872698653e-06, "loss": 0.2355, "step": 35852 }, { "epoch": 0.665631973503934, "grad_norm": 0.4620128273963928, "learning_rate": 5.027975148454808e-06, "loss": 0.2548, "step": 35854 }, { "epoch": 0.6656691036413526, "grad_norm": 0.4863109886646271, "learning_rate": 5.026963094698677e-06, "loss": 0.3173, "step": 35856 }, { "epoch": 0.6657062337787713, "grad_norm": 0.3203144669532776, "learning_rate": 5.025951108610683e-06, "loss": 0.2457, "step": 35858 }, { "epoch": 0.6657433639161898, "grad_norm": 0.28783664107322693, "learning_rate": 5.024939190204594e-06, "loss": 0.1186, "step": 35860 }, { "epoch": 0.6657804940536085, "grad_norm": 0.30248650908470154, "learning_rate": 5.0239273394941815e-06, "loss": 0.0793, "step": 35862 }, { "epoch": 0.6658176241910271, "grad_norm": 0.22050417959690094, "learning_rate": 5.022915556493213e-06, "loss": 0.2458, "step": 35864 }, { "epoch": 0.6658547543284458, "grad_norm": 0.3294491171836853, "learning_rate": 5.021903841215459e-06, "loss": 0.4251, "step": 35866 }, { "epoch": 0.6658918844658644, "grad_norm": 0.396463543176651, "learning_rate": 5.020892193674679e-06, "loss": 0.2561, "step": 35868 }, { "epoch": 0.665929014603283, "grad_norm": 0.32678523659706116, "learning_rate": 5.019880613884642e-06, "loss": 0.3692, "step": 35870 }, { "epoch": 0.6659661447407017, "grad_norm": 0.7674769163131714, "learning_rate": 5.018869101859116e-06, "loss": 0.2979, "step": 35872 }, { "epoch": 0.6660032748781203, "grad_norm": 0.498659610748291, "learning_rate": 5.01785765761186e-06, "loss": 0.2214, "step": 35874 }, { "epoch": 0.666040405015539, "grad_norm": 0.3917543888092041, "learning_rate": 5.0168462811566355e-06, "loss": 0.2721, "step": 35876 }, { "epoch": 0.6660775351529576, "grad_norm": 0.31248512864112854, "learning_rate": 5.015834972507212e-06, "loss": 0.3128, "step": 35878 }, { "epoch": 0.6661146652903762, "grad_norm": 0.5265284180641174, "learning_rate": 5.0148237316773405e-06, "loss": 0.2835, "step": 35880 }, { "epoch": 0.6661517954277949, "grad_norm": 0.20794647932052612, "learning_rate": 5.013812558680785e-06, "loss": 0.0209, "step": 35882 }, { "epoch": 0.6661889255652135, "grad_norm": 0.5394459366798401, "learning_rate": 5.012801453531306e-06, "loss": 0.2839, "step": 35884 }, { "epoch": 0.6662260557026322, "grad_norm": 0.3130069971084595, "learning_rate": 5.011790416242661e-06, "loss": 0.2207, "step": 35886 }, { "epoch": 0.6662631858400508, "grad_norm": 2.21299147605896, "learning_rate": 5.010779446828607e-06, "loss": 0.1895, "step": 35888 }, { "epoch": 0.6663003159774694, "grad_norm": 0.5102929472923279, "learning_rate": 5.0097685453029045e-06, "loss": 0.3709, "step": 35890 }, { "epoch": 0.6663374461148881, "grad_norm": 0.30325862765312195, "learning_rate": 5.0087577116793e-06, "loss": 0.2482, "step": 35892 }, { "epoch": 0.6663745762523067, "grad_norm": 0.39358147978782654, "learning_rate": 5.007746945971557e-06, "loss": 0.3156, "step": 35894 }, { "epoch": 0.6664117063897254, "grad_norm": 0.45120489597320557, "learning_rate": 5.00673624819342e-06, "loss": 0.1997, "step": 35896 }, { "epoch": 0.666448836527144, "grad_norm": 0.23626482486724854, "learning_rate": 5.005725618358648e-06, "loss": 0.2561, "step": 35898 }, { "epoch": 0.6664859666645626, "grad_norm": 0.5352849364280701, "learning_rate": 5.004715056480989e-06, "loss": 0.2989, "step": 35900 }, { "epoch": 0.6665230968019813, "grad_norm": 0.3275187313556671, "learning_rate": 5.0037045625741955e-06, "loss": 0.1789, "step": 35902 }, { "epoch": 0.6665602269393999, "grad_norm": 0.3067677617073059, "learning_rate": 5.002694136652021e-06, "loss": 0.2387, "step": 35904 }, { "epoch": 0.6665973570768186, "grad_norm": 0.2859973907470703, "learning_rate": 5.001683778728208e-06, "loss": 0.2383, "step": 35906 }, { "epoch": 0.6666344872142371, "grad_norm": 0.36118507385253906, "learning_rate": 5.000673488816506e-06, "loss": 0.229, "step": 35908 }, { "epoch": 0.6666716173516558, "grad_norm": 0.7263296246528625, "learning_rate": 4.999663266930663e-06, "loss": 0.37, "step": 35910 }, { "epoch": 0.6667087474890745, "grad_norm": 0.35135626792907715, "learning_rate": 4.9986531130844294e-06, "loss": 0.1112, "step": 35912 }, { "epoch": 0.6667458776264931, "grad_norm": 0.2783950865268707, "learning_rate": 4.9976430272915425e-06, "loss": 0.2994, "step": 35914 }, { "epoch": 0.6667830077639118, "grad_norm": 0.24082109332084656, "learning_rate": 4.996633009565753e-06, "loss": 0.229, "step": 35916 }, { "epoch": 0.6668201379013303, "grad_norm": 0.4215172529220581, "learning_rate": 4.9956230599207985e-06, "loss": 0.2137, "step": 35918 }, { "epoch": 0.666857268038749, "grad_norm": 0.5319555401802063, "learning_rate": 4.994613178370424e-06, "loss": 0.2399, "step": 35920 }, { "epoch": 0.6668943981761677, "grad_norm": 0.6436839699745178, "learning_rate": 4.9936033649283725e-06, "loss": 0.5121, "step": 35922 }, { "epoch": 0.6669315283135863, "grad_norm": 0.5118966698646545, "learning_rate": 4.992593619608382e-06, "loss": 0.3657, "step": 35924 }, { "epoch": 0.666968658451005, "grad_norm": 0.23507390916347504, "learning_rate": 4.991583942424194e-06, "loss": 0.0808, "step": 35926 }, { "epoch": 0.6670057885884235, "grad_norm": 0.29171034693717957, "learning_rate": 4.990574333389546e-06, "loss": 0.1848, "step": 35928 }, { "epoch": 0.6670429187258422, "grad_norm": 0.268824964761734, "learning_rate": 4.9895647925181815e-06, "loss": 0.2019, "step": 35930 }, { "epoch": 0.6670800488632609, "grad_norm": 0.5149436593055725, "learning_rate": 4.988555319823827e-06, "loss": 0.1829, "step": 35932 }, { "epoch": 0.6671171790006795, "grad_norm": 0.35516873002052307, "learning_rate": 4.987545915320224e-06, "loss": 0.2225, "step": 35934 }, { "epoch": 0.6671543091380981, "grad_norm": 0.5018120408058167, "learning_rate": 4.986536579021111e-06, "loss": 0.4053, "step": 35936 }, { "epoch": 0.6671914392755167, "grad_norm": 0.3599507808685303, "learning_rate": 4.9855273109402135e-06, "loss": 0.3057, "step": 35938 }, { "epoch": 0.6672285694129354, "grad_norm": 0.44632601737976074, "learning_rate": 4.98451811109127e-06, "loss": 0.1872, "step": 35940 }, { "epoch": 0.6672656995503541, "grad_norm": 0.3860042691230774, "learning_rate": 4.983508979488016e-06, "loss": 0.3172, "step": 35942 }, { "epoch": 0.6673028296877727, "grad_norm": 0.4096100926399231, "learning_rate": 4.982499916144175e-06, "loss": 0.3765, "step": 35944 }, { "epoch": 0.6673399598251913, "grad_norm": 0.439312219619751, "learning_rate": 4.981490921073479e-06, "loss": 0.1419, "step": 35946 }, { "epoch": 0.6673770899626099, "grad_norm": 0.22904759645462036, "learning_rate": 4.980481994289661e-06, "loss": 0.2394, "step": 35948 }, { "epoch": 0.6674142201000286, "grad_norm": 0.2927766442298889, "learning_rate": 4.979473135806448e-06, "loss": 0.307, "step": 35950 }, { "epoch": 0.6674513502374473, "grad_norm": 0.5472964644432068, "learning_rate": 4.978464345637567e-06, "loss": 0.4367, "step": 35952 }, { "epoch": 0.6674884803748659, "grad_norm": 0.4023457467556, "learning_rate": 4.977455623796749e-06, "loss": 0.269, "step": 35954 }, { "epoch": 0.6675256105122845, "grad_norm": 0.2702849507331848, "learning_rate": 4.976446970297711e-06, "loss": 0.3737, "step": 35956 }, { "epoch": 0.6675627406497031, "grad_norm": 0.6069836616516113, "learning_rate": 4.975438385154188e-06, "loss": 0.3533, "step": 35958 }, { "epoch": 0.6675998707871218, "grad_norm": 0.26242130994796753, "learning_rate": 4.974429868379893e-06, "loss": 0.3339, "step": 35960 }, { "epoch": 0.6676370009245404, "grad_norm": 0.5177432298660278, "learning_rate": 4.973421419988555e-06, "loss": 0.3557, "step": 35962 }, { "epoch": 0.667674131061959, "grad_norm": 0.3699071407318115, "learning_rate": 4.972413039993895e-06, "loss": 0.2702, "step": 35964 }, { "epoch": 0.6677112611993777, "grad_norm": 0.35056543350219727, "learning_rate": 4.971404728409633e-06, "loss": 0.2996, "step": 35966 }, { "epoch": 0.6677483913367963, "grad_norm": 0.3380669355392456, "learning_rate": 4.970396485249496e-06, "loss": 0.1783, "step": 35968 }, { "epoch": 0.667785521474215, "grad_norm": 0.39701929688453674, "learning_rate": 4.969388310527192e-06, "loss": 0.4205, "step": 35970 }, { "epoch": 0.6678226516116336, "grad_norm": 0.43648093938827515, "learning_rate": 4.9683802042564455e-06, "loss": 0.3462, "step": 35972 }, { "epoch": 0.6678597817490523, "grad_norm": 0.42095333337783813, "learning_rate": 4.967372166450973e-06, "loss": 0.3145, "step": 35974 }, { "epoch": 0.6678969118864709, "grad_norm": 0.4743790030479431, "learning_rate": 4.966364197124494e-06, "loss": 0.1249, "step": 35976 }, { "epoch": 0.6679340420238895, "grad_norm": 0.5376197695732117, "learning_rate": 4.965356296290718e-06, "loss": 0.2832, "step": 35978 }, { "epoch": 0.6679711721613082, "grad_norm": 0.48700010776519775, "learning_rate": 4.9643484639633655e-06, "loss": 0.0408, "step": 35980 }, { "epoch": 0.6680083022987268, "grad_norm": 0.47989457845687866, "learning_rate": 4.963340700156143e-06, "loss": 0.1935, "step": 35982 }, { "epoch": 0.6680454324361454, "grad_norm": 0.5127720832824707, "learning_rate": 4.9623330048827665e-06, "loss": 0.6321, "step": 35984 }, { "epoch": 0.6680825625735641, "grad_norm": 0.5668118596076965, "learning_rate": 4.961325378156949e-06, "loss": 0.2296, "step": 35986 }, { "epoch": 0.6681196927109827, "grad_norm": 0.33296075463294983, "learning_rate": 4.960317819992401e-06, "loss": 0.2721, "step": 35988 }, { "epoch": 0.6681568228484014, "grad_norm": 0.4003893733024597, "learning_rate": 4.959310330402831e-06, "loss": 0.3534, "step": 35990 }, { "epoch": 0.66819395298582, "grad_norm": 0.5721896886825562, "learning_rate": 4.9583029094019534e-06, "loss": 0.2316, "step": 35992 }, { "epoch": 0.6682310831232386, "grad_norm": 0.6361413598060608, "learning_rate": 4.957295557003467e-06, "loss": 0.4689, "step": 35994 }, { "epoch": 0.6682682132606573, "grad_norm": 0.23571045696735382, "learning_rate": 4.956288273221084e-06, "loss": 0.2726, "step": 35996 }, { "epoch": 0.6683053433980759, "grad_norm": 0.3729521334171295, "learning_rate": 4.955281058068513e-06, "loss": 0.2721, "step": 35998 }, { "epoch": 0.6683424735354946, "grad_norm": 0.34197014570236206, "learning_rate": 4.954273911559453e-06, "loss": 0.2081, "step": 36000 }, { "epoch": 0.6683796036729132, "grad_norm": 0.41357097029685974, "learning_rate": 4.95326683370761e-06, "loss": 0.269, "step": 36002 }, { "epoch": 0.6684167338103318, "grad_norm": 0.5535535216331482, "learning_rate": 4.952259824526694e-06, "loss": 0.3703, "step": 36004 }, { "epoch": 0.6684538639477504, "grad_norm": 0.427957147359848, "learning_rate": 4.9512528840303984e-06, "loss": 0.23, "step": 36006 }, { "epoch": 0.6684909940851691, "grad_norm": 0.2387382835149765, "learning_rate": 4.950246012232427e-06, "loss": 0.165, "step": 36008 }, { "epoch": 0.6685281242225878, "grad_norm": 0.19637629389762878, "learning_rate": 4.949239209146483e-06, "loss": 0.2396, "step": 36010 }, { "epoch": 0.6685652543600064, "grad_norm": 0.3589199483394623, "learning_rate": 4.948232474786263e-06, "loss": 0.395, "step": 36012 }, { "epoch": 0.668602384497425, "grad_norm": 0.2703711688518524, "learning_rate": 4.947225809165469e-06, "loss": 0.2584, "step": 36014 }, { "epoch": 0.6686395146348436, "grad_norm": 0.5052081942558289, "learning_rate": 4.946219212297796e-06, "loss": 0.192, "step": 36016 }, { "epoch": 0.6686766447722623, "grad_norm": 0.39090025424957275, "learning_rate": 4.945212684196945e-06, "loss": 0.3031, "step": 36018 }, { "epoch": 0.668713774909681, "grad_norm": 0.37972021102905273, "learning_rate": 4.94420622487661e-06, "loss": 0.2339, "step": 36020 }, { "epoch": 0.6687509050470996, "grad_norm": 0.28349170088768005, "learning_rate": 4.94319983435048e-06, "loss": 0.1602, "step": 36022 }, { "epoch": 0.6687880351845182, "grad_norm": 0.5201715230941772, "learning_rate": 4.942193512632252e-06, "loss": 0.2667, "step": 36024 }, { "epoch": 0.6688251653219368, "grad_norm": 0.31312376260757446, "learning_rate": 4.941187259735622e-06, "loss": 0.3804, "step": 36026 }, { "epoch": 0.6688622954593555, "grad_norm": 0.4552910029888153, "learning_rate": 4.9401810756742795e-06, "loss": 0.4987, "step": 36028 }, { "epoch": 0.6688994255967742, "grad_norm": 0.3306925296783447, "learning_rate": 4.939174960461921e-06, "loss": 0.1924, "step": 36030 }, { "epoch": 0.6689365557341928, "grad_norm": 0.3504699170589447, "learning_rate": 4.9381689141122294e-06, "loss": 0.3078, "step": 36032 }, { "epoch": 0.6689736858716114, "grad_norm": 0.25688788294792175, "learning_rate": 4.937162936638895e-06, "loss": 0.1717, "step": 36034 }, { "epoch": 0.66901081600903, "grad_norm": 0.4880479872226715, "learning_rate": 4.936157028055609e-06, "loss": 0.1504, "step": 36036 }, { "epoch": 0.6690479461464487, "grad_norm": 0.383085161447525, "learning_rate": 4.935151188376058e-06, "loss": 0.4494, "step": 36038 }, { "epoch": 0.6690850762838674, "grad_norm": 0.4531101882457733, "learning_rate": 4.934145417613933e-06, "loss": 0.2313, "step": 36040 }, { "epoch": 0.669122206421286, "grad_norm": 0.3811333477497101, "learning_rate": 4.93313971578291e-06, "loss": 0.3018, "step": 36042 }, { "epoch": 0.6691593365587046, "grad_norm": 0.9566578269004822, "learning_rate": 4.932134082896682e-06, "loss": 0.2951, "step": 36044 }, { "epoch": 0.6691964666961232, "grad_norm": 0.5044624209403992, "learning_rate": 4.9311285189689265e-06, "loss": 0.221, "step": 36046 }, { "epoch": 0.6692335968335419, "grad_norm": 0.2554212212562561, "learning_rate": 4.930123024013328e-06, "loss": 0.2427, "step": 36048 }, { "epoch": 0.6692707269709606, "grad_norm": 0.1995251178741455, "learning_rate": 4.929117598043569e-06, "loss": 0.1658, "step": 36050 }, { "epoch": 0.6693078571083791, "grad_norm": 0.2876911163330078, "learning_rate": 4.92811224107333e-06, "loss": 0.1636, "step": 36052 }, { "epoch": 0.6693449872457978, "grad_norm": 0.3296375572681427, "learning_rate": 4.927106953116293e-06, "loss": 0.3489, "step": 36054 }, { "epoch": 0.6693821173832164, "grad_norm": 0.3932287096977234, "learning_rate": 4.926101734186139e-06, "loss": 0.4067, "step": 36056 }, { "epoch": 0.6694192475206351, "grad_norm": 0.3363531529903412, "learning_rate": 4.925096584296536e-06, "loss": 0.1688, "step": 36058 }, { "epoch": 0.6694563776580537, "grad_norm": 0.37586840987205505, "learning_rate": 4.924091503461169e-06, "loss": 0.2341, "step": 36060 }, { "epoch": 0.6694935077954723, "grad_norm": 0.4679376184940338, "learning_rate": 4.923086491693717e-06, "loss": 0.2759, "step": 36062 }, { "epoch": 0.669530637932891, "grad_norm": 0.49608898162841797, "learning_rate": 4.922081549007847e-06, "loss": 0.2325, "step": 36064 }, { "epoch": 0.6695677680703096, "grad_norm": 0.5918713212013245, "learning_rate": 4.921076675417235e-06, "loss": 0.2805, "step": 36066 }, { "epoch": 0.6696048982077283, "grad_norm": 0.2984234690666199, "learning_rate": 4.920071870935562e-06, "loss": 0.1844, "step": 36068 }, { "epoch": 0.6696420283451469, "grad_norm": 0.402296781539917, "learning_rate": 4.91906713557649e-06, "loss": 0.2175, "step": 36070 }, { "epoch": 0.6696791584825655, "grad_norm": 0.4155738353729248, "learning_rate": 4.918062469353695e-06, "loss": 0.2917, "step": 36072 }, { "epoch": 0.6697162886199842, "grad_norm": 0.2432553470134735, "learning_rate": 4.9170578722808486e-06, "loss": 0.2065, "step": 36074 }, { "epoch": 0.6697534187574028, "grad_norm": 0.6407560706138611, "learning_rate": 4.916053344371618e-06, "loss": 0.3182, "step": 36076 }, { "epoch": 0.6697905488948215, "grad_norm": 0.3637886047363281, "learning_rate": 4.915048885639675e-06, "loss": 0.2615, "step": 36078 }, { "epoch": 0.66982767903224, "grad_norm": 0.25983428955078125, "learning_rate": 4.914044496098687e-06, "loss": 0.2402, "step": 36080 }, { "epoch": 0.6698648091696587, "grad_norm": 0.46867635846138, "learning_rate": 4.913040175762318e-06, "loss": 0.3428, "step": 36082 }, { "epoch": 0.6699019393070774, "grad_norm": 0.3947322964668274, "learning_rate": 4.912035924644237e-06, "loss": 0.3696, "step": 36084 }, { "epoch": 0.669939069444496, "grad_norm": 0.46786293387413025, "learning_rate": 4.911031742758103e-06, "loss": 0.2109, "step": 36086 }, { "epoch": 0.6699761995819147, "grad_norm": 0.3154354393482208, "learning_rate": 4.910027630117585e-06, "loss": 0.3597, "step": 36088 }, { "epoch": 0.6700133297193333, "grad_norm": 0.4669903516769409, "learning_rate": 4.909023586736344e-06, "loss": 0.2733, "step": 36090 }, { "epoch": 0.6700504598567519, "grad_norm": 0.5217243432998657, "learning_rate": 4.908019612628043e-06, "loss": 0.2234, "step": 36092 }, { "epoch": 0.6700875899941706, "grad_norm": 0.25635436177253723, "learning_rate": 4.907015707806347e-06, "loss": 0.3151, "step": 36094 }, { "epoch": 0.6701247201315892, "grad_norm": 0.32063689827919006, "learning_rate": 4.906011872284907e-06, "loss": 0.1628, "step": 36096 }, { "epoch": 0.6701618502690079, "grad_norm": 0.49992769956588745, "learning_rate": 4.905008106077387e-06, "loss": 0.392, "step": 36098 }, { "epoch": 0.6701989804064264, "grad_norm": 0.3122595250606537, "learning_rate": 4.904004409197446e-06, "loss": 0.3194, "step": 36100 }, { "epoch": 0.6702361105438451, "grad_norm": 0.4619857966899872, "learning_rate": 4.903000781658741e-06, "loss": 0.2745, "step": 36102 }, { "epoch": 0.6702732406812638, "grad_norm": 0.35484379529953003, "learning_rate": 4.901997223474933e-06, "loss": 0.3481, "step": 36104 }, { "epoch": 0.6703103708186824, "grad_norm": 0.6152006387710571, "learning_rate": 4.900993734659671e-06, "loss": 0.2162, "step": 36106 }, { "epoch": 0.6703475009561011, "grad_norm": 0.39619210362434387, "learning_rate": 4.899990315226607e-06, "loss": 0.4153, "step": 36108 }, { "epoch": 0.6703846310935196, "grad_norm": 0.3276269733905792, "learning_rate": 4.898986965189398e-06, "loss": 0.2386, "step": 36110 }, { "epoch": 0.6704217612309383, "grad_norm": 0.44733211398124695, "learning_rate": 4.897983684561699e-06, "loss": 0.1921, "step": 36112 }, { "epoch": 0.6704588913683569, "grad_norm": 0.4223423898220062, "learning_rate": 4.896980473357158e-06, "loss": 0.3669, "step": 36114 }, { "epoch": 0.6704960215057756, "grad_norm": 0.33408883213996887, "learning_rate": 4.8959773315894285e-06, "loss": 0.2087, "step": 36116 }, { "epoch": 0.6705331516431943, "grad_norm": 0.2544315457344055, "learning_rate": 4.894974259272162e-06, "loss": 0.3602, "step": 36118 }, { "epoch": 0.6705702817806128, "grad_norm": 0.3726828098297119, "learning_rate": 4.893971256419003e-06, "loss": 0.186, "step": 36120 }, { "epoch": 0.6706074119180315, "grad_norm": 0.33832740783691406, "learning_rate": 4.8929683230435984e-06, "loss": 0.4682, "step": 36122 }, { "epoch": 0.6706445420554501, "grad_norm": 0.3523814380168915, "learning_rate": 4.891965459159599e-06, "loss": 0.1802, "step": 36124 }, { "epoch": 0.6706816721928688, "grad_norm": 0.438400000333786, "learning_rate": 4.890962664780652e-06, "loss": 0.3291, "step": 36126 }, { "epoch": 0.6707188023302875, "grad_norm": 0.3655209243297577, "learning_rate": 4.889959939920396e-06, "loss": 0.1971, "step": 36128 }, { "epoch": 0.670755932467706, "grad_norm": 0.32059258222579956, "learning_rate": 4.888957284592484e-06, "loss": 0.266, "step": 36130 }, { "epoch": 0.6707930626051247, "grad_norm": 2.950822591781616, "learning_rate": 4.88795469881055e-06, "loss": 0.2509, "step": 36132 }, { "epoch": 0.6708301927425433, "grad_norm": 0.4147107005119324, "learning_rate": 4.886952182588239e-06, "loss": 0.3286, "step": 36134 }, { "epoch": 0.670867322879962, "grad_norm": 0.38981154561042786, "learning_rate": 4.885949735939195e-06, "loss": 0.2042, "step": 36136 }, { "epoch": 0.6709044530173807, "grad_norm": 0.41154104471206665, "learning_rate": 4.884947358877056e-06, "loss": 0.2709, "step": 36138 }, { "epoch": 0.6709415831547992, "grad_norm": 0.3203439712524414, "learning_rate": 4.883945051415462e-06, "loss": 0.3776, "step": 36140 }, { "epoch": 0.6709787132922179, "grad_norm": 0.5013834834098816, "learning_rate": 4.882942813568051e-06, "loss": 0.2415, "step": 36142 }, { "epoch": 0.6710158434296365, "grad_norm": 0.25282618403434753, "learning_rate": 4.881940645348465e-06, "loss": 0.2675, "step": 36144 }, { "epoch": 0.6710529735670552, "grad_norm": 0.3100430965423584, "learning_rate": 4.880938546770333e-06, "loss": 0.3344, "step": 36146 }, { "epoch": 0.6710901037044739, "grad_norm": 0.4388672709465027, "learning_rate": 4.879936517847298e-06, "loss": 0.2189, "step": 36148 }, { "epoch": 0.6711272338418924, "grad_norm": 0.5125184655189514, "learning_rate": 4.878934558592986e-06, "loss": 0.2662, "step": 36150 }, { "epoch": 0.6711643639793111, "grad_norm": 0.47947296500205994, "learning_rate": 4.877932669021036e-06, "loss": 0.1638, "step": 36152 }, { "epoch": 0.6712014941167297, "grad_norm": 0.34604835510253906, "learning_rate": 4.876930849145079e-06, "loss": 0.2757, "step": 36154 }, { "epoch": 0.6712386242541484, "grad_norm": 0.32241278886795044, "learning_rate": 4.875929098978753e-06, "loss": 0.2125, "step": 36156 }, { "epoch": 0.671275754391567, "grad_norm": 0.4844227731227875, "learning_rate": 4.8749274185356796e-06, "loss": 0.4033, "step": 36158 }, { "epoch": 0.6713128845289856, "grad_norm": 0.3899403512477875, "learning_rate": 4.873925807829492e-06, "loss": 0.3905, "step": 36160 }, { "epoch": 0.6713500146664043, "grad_norm": 0.26708874106407166, "learning_rate": 4.87292426687382e-06, "loss": 0.2811, "step": 36162 }, { "epoch": 0.6713871448038229, "grad_norm": 0.38783735036849976, "learning_rate": 4.8719227956822915e-06, "loss": 0.343, "step": 36164 }, { "epoch": 0.6714242749412416, "grad_norm": 0.4256456196308136, "learning_rate": 4.870921394268537e-06, "loss": 0.3944, "step": 36166 }, { "epoch": 0.6714614050786601, "grad_norm": 0.4007189869880676, "learning_rate": 4.869920062646175e-06, "loss": 0.3168, "step": 36168 }, { "epoch": 0.6714985352160788, "grad_norm": 0.48081111907958984, "learning_rate": 4.868918800828839e-06, "loss": 0.3104, "step": 36170 }, { "epoch": 0.6715356653534975, "grad_norm": 0.22749769687652588, "learning_rate": 4.867917608830145e-06, "loss": 0.2267, "step": 36172 }, { "epoch": 0.6715727954909161, "grad_norm": 0.3982134163379669, "learning_rate": 4.866916486663719e-06, "loss": 0.2069, "step": 36174 }, { "epoch": 0.6716099256283348, "grad_norm": 0.3250977098941803, "learning_rate": 4.865915434343185e-06, "loss": 0.2538, "step": 36176 }, { "epoch": 0.6716470557657533, "grad_norm": 0.6489495038986206, "learning_rate": 4.8649144518821634e-06, "loss": 0.3035, "step": 36178 }, { "epoch": 0.671684185903172, "grad_norm": 0.30447709560394287, "learning_rate": 4.863913539294275e-06, "loss": 0.3421, "step": 36180 }, { "epoch": 0.6717213160405907, "grad_norm": 0.4187319278717041, "learning_rate": 4.8629126965931416e-06, "loss": 0.2431, "step": 36182 }, { "epoch": 0.6717584461780093, "grad_norm": 0.24698862433433533, "learning_rate": 4.861911923792377e-06, "loss": 0.2386, "step": 36184 }, { "epoch": 0.671795576315428, "grad_norm": 0.3094417154788971, "learning_rate": 4.8609112209056e-06, "loss": 0.4169, "step": 36186 }, { "epoch": 0.6718327064528465, "grad_norm": 0.18941430747509003, "learning_rate": 4.859910587946427e-06, "loss": 0.2524, "step": 36188 }, { "epoch": 0.6718698365902652, "grad_norm": 0.36202120780944824, "learning_rate": 4.8589100249284795e-06, "loss": 0.1894, "step": 36190 }, { "epoch": 0.6719069667276839, "grad_norm": 0.4387712776660919, "learning_rate": 4.857909531865362e-06, "loss": 0.2049, "step": 36192 }, { "epoch": 0.6719440968651025, "grad_norm": 0.29462796449661255, "learning_rate": 4.856909108770699e-06, "loss": 0.2764, "step": 36194 }, { "epoch": 0.6719812270025212, "grad_norm": 0.44651541113853455, "learning_rate": 4.855908755658093e-06, "loss": 0.2126, "step": 36196 }, { "epoch": 0.6720183571399397, "grad_norm": 0.4596565067768097, "learning_rate": 4.854908472541161e-06, "loss": 0.3989, "step": 36198 }, { "epoch": 0.6720554872773584, "grad_norm": 0.284769743680954, "learning_rate": 4.853908259433513e-06, "loss": 0.1811, "step": 36200 }, { "epoch": 0.6720926174147771, "grad_norm": 0.36276155710220337, "learning_rate": 4.852908116348759e-06, "loss": 0.5226, "step": 36202 }, { "epoch": 0.6721297475521957, "grad_norm": 0.524876058101654, "learning_rate": 4.851908043300509e-06, "loss": 0.3271, "step": 36204 }, { "epoch": 0.6721668776896144, "grad_norm": 0.2024860978126526, "learning_rate": 4.850908040302374e-06, "loss": 0.1087, "step": 36206 }, { "epoch": 0.6722040078270329, "grad_norm": 0.29798269271850586, "learning_rate": 4.849908107367952e-06, "loss": 0.2927, "step": 36208 }, { "epoch": 0.6722411379644516, "grad_norm": 0.3565357029438019, "learning_rate": 4.848908244510856e-06, "loss": 0.2246, "step": 36210 }, { "epoch": 0.6722782681018702, "grad_norm": 0.38925671577453613, "learning_rate": 4.847908451744693e-06, "loss": 0.2996, "step": 36212 }, { "epoch": 0.6723153982392889, "grad_norm": 0.26704326272010803, "learning_rate": 4.846908729083058e-06, "loss": 0.218, "step": 36214 }, { "epoch": 0.6723525283767076, "grad_norm": 0.30685800313949585, "learning_rate": 4.8459090765395625e-06, "loss": 0.1871, "step": 36216 }, { "epoch": 0.6723896585141261, "grad_norm": 0.31705355644226074, "learning_rate": 4.844909494127805e-06, "loss": 0.2916, "step": 36218 }, { "epoch": 0.6724267886515448, "grad_norm": 0.22528082132339478, "learning_rate": 4.843909981861392e-06, "loss": 0.1959, "step": 36220 }, { "epoch": 0.6724639187889634, "grad_norm": 0.44637659192085266, "learning_rate": 4.842910539753915e-06, "loss": 0.2734, "step": 36222 }, { "epoch": 0.6725010489263821, "grad_norm": 0.3254113793373108, "learning_rate": 4.84191116781898e-06, "loss": 0.1706, "step": 36224 }, { "epoch": 0.6725381790638008, "grad_norm": 0.30111074447631836, "learning_rate": 4.840911866070183e-06, "loss": 0.4161, "step": 36226 }, { "epoch": 0.6725753092012193, "grad_norm": 0.4336724281311035, "learning_rate": 4.8399126345211225e-06, "loss": 0.1988, "step": 36228 }, { "epoch": 0.672612439338638, "grad_norm": 0.41153383255004883, "learning_rate": 4.838913473185398e-06, "loss": 0.1378, "step": 36230 }, { "epoch": 0.6726495694760566, "grad_norm": 0.3022269904613495, "learning_rate": 4.837914382076599e-06, "loss": 0.1664, "step": 36232 }, { "epoch": 0.6726866996134753, "grad_norm": 0.4415663480758667, "learning_rate": 4.8369153612083256e-06, "loss": 0.2237, "step": 36234 }, { "epoch": 0.672723829750894, "grad_norm": 0.4283555746078491, "learning_rate": 4.835916410594165e-06, "loss": 0.2907, "step": 36236 }, { "epoch": 0.6727609598883125, "grad_norm": 0.38964778184890747, "learning_rate": 4.8349175302477156e-06, "loss": 0.3674, "step": 36238 }, { "epoch": 0.6727980900257312, "grad_norm": 0.3656388521194458, "learning_rate": 4.833918720182567e-06, "loss": 0.129, "step": 36240 }, { "epoch": 0.6728352201631498, "grad_norm": 0.8354846835136414, "learning_rate": 4.8329199804123085e-06, "loss": 0.1754, "step": 36242 }, { "epoch": 0.6728723503005685, "grad_norm": 0.47813042998313904, "learning_rate": 4.831921310950537e-06, "loss": 0.4138, "step": 36244 }, { "epoch": 0.6729094804379872, "grad_norm": 0.6088079214096069, "learning_rate": 4.830922711810833e-06, "loss": 0.1842, "step": 36246 }, { "epoch": 0.6729466105754057, "grad_norm": 0.3779643476009369, "learning_rate": 4.829924183006787e-06, "loss": 0.361, "step": 36248 }, { "epoch": 0.6729837407128244, "grad_norm": 0.3980613946914673, "learning_rate": 4.828925724551986e-06, "loss": 0.3115, "step": 36250 }, { "epoch": 0.673020870850243, "grad_norm": 0.28450995683670044, "learning_rate": 4.827927336460022e-06, "loss": 0.3824, "step": 36252 }, { "epoch": 0.6730580009876617, "grad_norm": 0.19442516565322876, "learning_rate": 4.82692901874447e-06, "loss": 0.2221, "step": 36254 }, { "epoch": 0.6730951311250803, "grad_norm": 0.3275705575942993, "learning_rate": 4.825930771418919e-06, "loss": 0.2303, "step": 36256 }, { "epoch": 0.6731322612624989, "grad_norm": 0.4632571041584015, "learning_rate": 4.824932594496957e-06, "loss": 0.495, "step": 36258 }, { "epoch": 0.6731693913999176, "grad_norm": 0.5370644330978394, "learning_rate": 4.823934487992157e-06, "loss": 0.2445, "step": 36260 }, { "epoch": 0.6732065215373362, "grad_norm": 0.44724059104919434, "learning_rate": 4.8229364519181035e-06, "loss": 0.2034, "step": 36262 }, { "epoch": 0.6732436516747549, "grad_norm": 0.3070735037326813, "learning_rate": 4.821938486288378e-06, "loss": 0.2268, "step": 36264 }, { "epoch": 0.6732807818121734, "grad_norm": 0.4787311851978302, "learning_rate": 4.8209405911165605e-06, "loss": 0.3295, "step": 36266 }, { "epoch": 0.6733179119495921, "grad_norm": 0.4329741895198822, "learning_rate": 4.819942766416228e-06, "loss": 0.215, "step": 36268 }, { "epoch": 0.6733550420870108, "grad_norm": 0.4220558702945709, "learning_rate": 4.818945012200962e-06, "loss": 0.1188, "step": 36270 }, { "epoch": 0.6733921722244294, "grad_norm": 0.4629891514778137, "learning_rate": 4.8179473284843315e-06, "loss": 0.3295, "step": 36272 }, { "epoch": 0.6734293023618481, "grad_norm": 0.5784451365470886, "learning_rate": 4.8169497152799194e-06, "loss": 0.3255, "step": 36274 }, { "epoch": 0.6734664324992666, "grad_norm": 0.5005277991294861, "learning_rate": 4.815952172601293e-06, "loss": 0.3624, "step": 36276 }, { "epoch": 0.6735035626366853, "grad_norm": 0.2573402225971222, "learning_rate": 4.81495470046203e-06, "loss": 0.3372, "step": 36278 }, { "epoch": 0.673540692774104, "grad_norm": 0.4015173614025116, "learning_rate": 4.813957298875703e-06, "loss": 0.1793, "step": 36280 }, { "epoch": 0.6735778229115226, "grad_norm": 0.2875911295413971, "learning_rate": 4.812959967855886e-06, "loss": 0.3429, "step": 36282 }, { "epoch": 0.6736149530489413, "grad_norm": 0.4089427590370178, "learning_rate": 4.811962707416143e-06, "loss": 0.3275, "step": 36284 }, { "epoch": 0.6736520831863598, "grad_norm": 0.3341543972492218, "learning_rate": 4.810965517570048e-06, "loss": 0.1792, "step": 36286 }, { "epoch": 0.6736892133237785, "grad_norm": 0.34848302602767944, "learning_rate": 4.809968398331169e-06, "loss": 0.2209, "step": 36288 }, { "epoch": 0.6737263434611972, "grad_norm": 0.3384447693824768, "learning_rate": 4.808971349713075e-06, "loss": 0.2138, "step": 36290 }, { "epoch": 0.6737634735986158, "grad_norm": 0.24984313547611237, "learning_rate": 4.807974371729332e-06, "loss": 0.2054, "step": 36292 }, { "epoch": 0.6738006037360345, "grad_norm": 0.3199433982372284, "learning_rate": 4.806977464393509e-06, "loss": 0.1436, "step": 36294 }, { "epoch": 0.673837733873453, "grad_norm": 0.5323359370231628, "learning_rate": 4.8059806277191635e-06, "loss": 0.4386, "step": 36296 }, { "epoch": 0.6738748640108717, "grad_norm": 0.4026868939399719, "learning_rate": 4.804983861719867e-06, "loss": 0.2113, "step": 36298 }, { "epoch": 0.6739119941482904, "grad_norm": 0.4514008164405823, "learning_rate": 4.803987166409177e-06, "loss": 0.1291, "step": 36300 }, { "epoch": 0.673949124285709, "grad_norm": 0.8926435112953186, "learning_rate": 4.802990541800659e-06, "loss": 0.286, "step": 36302 }, { "epoch": 0.6739862544231277, "grad_norm": 0.48347288370132446, "learning_rate": 4.8019939879078705e-06, "loss": 0.3151, "step": 36304 }, { "epoch": 0.6740233845605462, "grad_norm": 0.3353980779647827, "learning_rate": 4.800997504744375e-06, "loss": 0.45, "step": 36306 }, { "epoch": 0.6740605146979649, "grad_norm": 0.3935904800891876, "learning_rate": 4.800001092323734e-06, "loss": 0.262, "step": 36308 }, { "epoch": 0.6740976448353835, "grad_norm": 0.4170452952384949, "learning_rate": 4.799004750659498e-06, "loss": 0.1914, "step": 36310 }, { "epoch": 0.6741347749728022, "grad_norm": 0.3674319386482239, "learning_rate": 4.7980084797652295e-06, "loss": 0.1943, "step": 36312 }, { "epoch": 0.6741719051102208, "grad_norm": 0.43882086873054504, "learning_rate": 4.797012279654484e-06, "loss": 0.2734, "step": 36314 }, { "epoch": 0.6742090352476394, "grad_norm": 0.48057156801223755, "learning_rate": 4.79601615034082e-06, "loss": 0.3316, "step": 36316 }, { "epoch": 0.6742461653850581, "grad_norm": 0.3032284379005432, "learning_rate": 4.795020091837784e-06, "loss": 0.3245, "step": 36318 }, { "epoch": 0.6742832955224767, "grad_norm": 0.32553064823150635, "learning_rate": 4.7940241041589385e-06, "loss": 0.3336, "step": 36320 }, { "epoch": 0.6743204256598954, "grad_norm": 0.37098920345306396, "learning_rate": 4.7930281873178275e-06, "loss": 0.2863, "step": 36322 }, { "epoch": 0.674357555797314, "grad_norm": 0.3477371633052826, "learning_rate": 4.792032341328007e-06, "loss": 0.1795, "step": 36324 }, { "epoch": 0.6743946859347326, "grad_norm": 0.5911028981208801, "learning_rate": 4.791036566203025e-06, "loss": 0.428, "step": 36326 }, { "epoch": 0.6744318160721513, "grad_norm": 0.2589866518974304, "learning_rate": 4.790040861956433e-06, "loss": 0.3601, "step": 36328 }, { "epoch": 0.6744689462095699, "grad_norm": 0.49530717730522156, "learning_rate": 4.78904522860178e-06, "loss": 0.1938, "step": 36330 }, { "epoch": 0.6745060763469886, "grad_norm": 0.5417235493659973, "learning_rate": 4.788049666152615e-06, "loss": 0.182, "step": 36332 }, { "epoch": 0.6745432064844072, "grad_norm": 0.26481157541275024, "learning_rate": 4.7870541746224806e-06, "loss": 0.1308, "step": 36334 }, { "epoch": 0.6745803366218258, "grad_norm": 0.3770453631877899, "learning_rate": 4.786058754024924e-06, "loss": 0.4614, "step": 36336 }, { "epoch": 0.6746174667592445, "grad_norm": 0.37398794293403625, "learning_rate": 4.785063404373493e-06, "loss": 0.3655, "step": 36338 }, { "epoch": 0.6746545968966631, "grad_norm": 0.4845532476902008, "learning_rate": 4.784068125681726e-06, "loss": 0.1364, "step": 36340 }, { "epoch": 0.6746917270340818, "grad_norm": 0.2908177673816681, "learning_rate": 4.783072917963167e-06, "loss": 0.2524, "step": 36342 }, { "epoch": 0.6747288571715004, "grad_norm": 0.3771679401397705, "learning_rate": 4.782077781231359e-06, "loss": 0.1327, "step": 36344 }, { "epoch": 0.674765987308919, "grad_norm": 0.3853102922439575, "learning_rate": 4.781082715499848e-06, "loss": 0.3071, "step": 36346 }, { "epoch": 0.6748031174463377, "grad_norm": 0.47328341007232666, "learning_rate": 4.7800877207821635e-06, "loss": 0.1731, "step": 36348 }, { "epoch": 0.6748402475837563, "grad_norm": 0.44574710726737976, "learning_rate": 4.779092797091851e-06, "loss": 0.228, "step": 36350 }, { "epoch": 0.674877377721175, "grad_norm": 0.3613787889480591, "learning_rate": 4.778097944442447e-06, "loss": 0.3166, "step": 36352 }, { "epoch": 0.6749145078585936, "grad_norm": 0.2751176357269287, "learning_rate": 4.777103162847488e-06, "loss": 0.4194, "step": 36354 }, { "epoch": 0.6749516379960122, "grad_norm": 0.4206976890563965, "learning_rate": 4.77610845232051e-06, "loss": 0.1989, "step": 36356 }, { "epoch": 0.6749887681334309, "grad_norm": 0.5810465812683105, "learning_rate": 4.7751138128750544e-06, "loss": 0.2345, "step": 36358 }, { "epoch": 0.6750258982708495, "grad_norm": 0.49143537878990173, "learning_rate": 4.774119244524648e-06, "loss": 0.2226, "step": 36360 }, { "epoch": 0.6750630284082682, "grad_norm": 0.44942373037338257, "learning_rate": 4.773124747282821e-06, "loss": 0.3655, "step": 36362 }, { "epoch": 0.6751001585456867, "grad_norm": 0.47464489936828613, "learning_rate": 4.772130321163111e-06, "loss": 0.3644, "step": 36364 }, { "epoch": 0.6751372886831054, "grad_norm": 0.4693332612514496, "learning_rate": 4.771135966179047e-06, "loss": 0.3215, "step": 36366 }, { "epoch": 0.6751744188205241, "grad_norm": 0.22810512781143188, "learning_rate": 4.770141682344162e-06, "loss": 0.176, "step": 36368 }, { "epoch": 0.6752115489579427, "grad_norm": 0.4643705189228058, "learning_rate": 4.769147469671985e-06, "loss": 0.3775, "step": 36370 }, { "epoch": 0.6752486790953613, "grad_norm": 0.5585604906082153, "learning_rate": 4.768153328176041e-06, "loss": 0.3275, "step": 36372 }, { "epoch": 0.6752858092327799, "grad_norm": 0.5852408409118652, "learning_rate": 4.767159257869858e-06, "loss": 0.3544, "step": 36374 }, { "epoch": 0.6753229393701986, "grad_norm": 0.4132533371448517, "learning_rate": 4.766165258766962e-06, "loss": 0.2187, "step": 36376 }, { "epoch": 0.6753600695076173, "grad_norm": 0.3609318435192108, "learning_rate": 4.765171330880883e-06, "loss": 0.1367, "step": 36378 }, { "epoch": 0.6753971996450359, "grad_norm": 0.5181414484977722, "learning_rate": 4.764177474225143e-06, "loss": 0.5101, "step": 36380 }, { "epoch": 0.6754343297824545, "grad_norm": 0.6194131374359131, "learning_rate": 4.763183688813262e-06, "loss": 0.1535, "step": 36382 }, { "epoch": 0.6754714599198731, "grad_norm": 0.3785829544067383, "learning_rate": 4.762189974658768e-06, "loss": 0.2727, "step": 36384 }, { "epoch": 0.6755085900572918, "grad_norm": 0.33141621947288513, "learning_rate": 4.761196331775176e-06, "loss": 0.3645, "step": 36386 }, { "epoch": 0.6755457201947105, "grad_norm": 0.2935234308242798, "learning_rate": 4.76020276017601e-06, "loss": 0.273, "step": 36388 }, { "epoch": 0.6755828503321291, "grad_norm": 0.24872499704360962, "learning_rate": 4.7592092598747884e-06, "loss": 0.2151, "step": 36390 }, { "epoch": 0.6756199804695477, "grad_norm": 0.31837916374206543, "learning_rate": 4.758215830885032e-06, "loss": 0.393, "step": 36392 }, { "epoch": 0.6756571106069663, "grad_norm": 0.4265042245388031, "learning_rate": 4.757222473220256e-06, "loss": 0.3093, "step": 36394 }, { "epoch": 0.675694240744385, "grad_norm": 0.36889320611953735, "learning_rate": 4.756229186893983e-06, "loss": 0.4381, "step": 36396 }, { "epoch": 0.6757313708818037, "grad_norm": 0.2660238742828369, "learning_rate": 4.75523597191972e-06, "loss": 0.2768, "step": 36398 }, { "epoch": 0.6757685010192223, "grad_norm": 0.3256976306438446, "learning_rate": 4.7542428283109865e-06, "loss": 0.2292, "step": 36400 }, { "epoch": 0.6758056311566409, "grad_norm": 0.39191898703575134, "learning_rate": 4.7532497560812975e-06, "loss": 0.3031, "step": 36402 }, { "epoch": 0.6758427612940595, "grad_norm": 0.24308307468891144, "learning_rate": 4.75225675524416e-06, "loss": 0.1941, "step": 36404 }, { "epoch": 0.6758798914314782, "grad_norm": 0.4531537592411041, "learning_rate": 4.751263825813089e-06, "loss": 0.3853, "step": 36406 }, { "epoch": 0.6759170215688969, "grad_norm": 0.3413432240486145, "learning_rate": 4.7502709678016e-06, "loss": 0.1684, "step": 36408 }, { "epoch": 0.6759541517063155, "grad_norm": 0.3316570520401001, "learning_rate": 4.749278181223195e-06, "loss": 0.1494, "step": 36410 }, { "epoch": 0.6759912818437341, "grad_norm": 0.48200929164886475, "learning_rate": 4.748285466091385e-06, "loss": 0.415, "step": 36412 }, { "epoch": 0.6760284119811527, "grad_norm": 0.3232986330986023, "learning_rate": 4.74729282241968e-06, "loss": 0.2833, "step": 36414 }, { "epoch": 0.6760655421185714, "grad_norm": 0.3097541630268097, "learning_rate": 4.746300250221585e-06, "loss": 0.31, "step": 36416 }, { "epoch": 0.67610267225599, "grad_norm": 0.37366917729377747, "learning_rate": 4.745307749510608e-06, "loss": 0.3019, "step": 36418 }, { "epoch": 0.6761398023934087, "grad_norm": 0.4714035987854004, "learning_rate": 4.7443153203002555e-06, "loss": 0.3812, "step": 36420 }, { "epoch": 0.6761769325308273, "grad_norm": 0.6430938839912415, "learning_rate": 4.743322962604027e-06, "loss": 0.3415, "step": 36422 }, { "epoch": 0.6762140626682459, "grad_norm": 0.45992711186408997, "learning_rate": 4.742330676435429e-06, "loss": 0.3542, "step": 36424 }, { "epoch": 0.6762511928056646, "grad_norm": 0.4914569854736328, "learning_rate": 4.74133846180796e-06, "loss": 0.3372, "step": 36426 }, { "epoch": 0.6762883229430832, "grad_norm": 0.31977933645248413, "learning_rate": 4.740346318735122e-06, "loss": 0.3087, "step": 36428 }, { "epoch": 0.6763254530805018, "grad_norm": 0.29822492599487305, "learning_rate": 4.739354247230416e-06, "loss": 0.278, "step": 36430 }, { "epoch": 0.6763625832179205, "grad_norm": 0.39992520213127136, "learning_rate": 4.738362247307342e-06, "loss": 0.3478, "step": 36432 }, { "epoch": 0.6763997133553391, "grad_norm": 0.35032975673675537, "learning_rate": 4.7373703189794e-06, "loss": 0.2341, "step": 36434 }, { "epoch": 0.6764368434927578, "grad_norm": 0.4662838578224182, "learning_rate": 4.7363784622600816e-06, "loss": 0.3096, "step": 36436 }, { "epoch": 0.6764739736301764, "grad_norm": 0.4886572062969208, "learning_rate": 4.735386677162886e-06, "loss": 0.1326, "step": 36438 }, { "epoch": 0.676511103767595, "grad_norm": 0.473810076713562, "learning_rate": 4.734394963701309e-06, "loss": 0.3492, "step": 36440 }, { "epoch": 0.6765482339050137, "grad_norm": 0.3377487361431122, "learning_rate": 4.7334033218888475e-06, "loss": 0.1672, "step": 36442 }, { "epoch": 0.6765853640424323, "grad_norm": 0.3469337522983551, "learning_rate": 4.732411751738987e-06, "loss": 0.2293, "step": 36444 }, { "epoch": 0.676622494179851, "grad_norm": 0.4482578635215759, "learning_rate": 4.731420253265229e-06, "loss": 0.322, "step": 36446 }, { "epoch": 0.6766596243172696, "grad_norm": 0.3213288486003876, "learning_rate": 4.730428826481056e-06, "loss": 0.0477, "step": 36448 }, { "epoch": 0.6766967544546882, "grad_norm": 0.34195560216903687, "learning_rate": 4.729437471399962e-06, "loss": 0.2181, "step": 36450 }, { "epoch": 0.6767338845921069, "grad_norm": 0.36801910400390625, "learning_rate": 4.728446188035437e-06, "loss": 0.2719, "step": 36452 }, { "epoch": 0.6767710147295255, "grad_norm": 0.550866425037384, "learning_rate": 4.72745497640097e-06, "loss": 0.3028, "step": 36454 }, { "epoch": 0.6768081448669442, "grad_norm": 0.288461297750473, "learning_rate": 4.726463836510047e-06, "loss": 0.3096, "step": 36456 }, { "epoch": 0.6768452750043628, "grad_norm": 0.35113444924354553, "learning_rate": 4.725472768376159e-06, "loss": 0.1405, "step": 36458 }, { "epoch": 0.6768824051417814, "grad_norm": 0.3291110396385193, "learning_rate": 4.724481772012784e-06, "loss": 0.2513, "step": 36460 }, { "epoch": 0.6769195352792, "grad_norm": 0.4129972755908966, "learning_rate": 4.72349084743341e-06, "loss": 0.3026, "step": 36462 }, { "epoch": 0.6769566654166187, "grad_norm": 0.5406935811042786, "learning_rate": 4.722499994651521e-06, "loss": 0.3771, "step": 36464 }, { "epoch": 0.6769937955540374, "grad_norm": 0.5709956884384155, "learning_rate": 4.721509213680602e-06, "loss": 0.2094, "step": 36466 }, { "epoch": 0.677030925691456, "grad_norm": 0.44346678256988525, "learning_rate": 4.720518504534128e-06, "loss": 0.3095, "step": 36468 }, { "epoch": 0.6770680558288746, "grad_norm": 0.4805660843849182, "learning_rate": 4.719527867225584e-06, "loss": 0.0944, "step": 36470 }, { "epoch": 0.6771051859662932, "grad_norm": 0.3394569754600525, "learning_rate": 4.718537301768452e-06, "loss": 0.3496, "step": 36472 }, { "epoch": 0.6771423161037119, "grad_norm": 0.31394901871681213, "learning_rate": 4.7175468081762046e-06, "loss": 0.2273, "step": 36474 }, { "epoch": 0.6771794462411306, "grad_norm": 0.385687917470932, "learning_rate": 4.716556386462322e-06, "loss": 0.4899, "step": 36476 }, { "epoch": 0.6772165763785492, "grad_norm": 0.30165600776672363, "learning_rate": 4.715566036640281e-06, "loss": 0.2596, "step": 36478 }, { "epoch": 0.6772537065159678, "grad_norm": 0.36663302779197693, "learning_rate": 4.714575758723558e-06, "loss": 0.2198, "step": 36480 }, { "epoch": 0.6772908366533864, "grad_norm": 0.43976643681526184, "learning_rate": 4.713585552725626e-06, "loss": 0.1707, "step": 36482 }, { "epoch": 0.6773279667908051, "grad_norm": 0.3960498571395874, "learning_rate": 4.7125954186599646e-06, "loss": 0.2511, "step": 36484 }, { "epoch": 0.6773650969282238, "grad_norm": 0.3838033974170685, "learning_rate": 4.711605356540038e-06, "loss": 0.3024, "step": 36486 }, { "epoch": 0.6774022270656423, "grad_norm": 0.3273037075996399, "learning_rate": 4.710615366379326e-06, "loss": 0.222, "step": 36488 }, { "epoch": 0.677439357203061, "grad_norm": 0.3275817632675171, "learning_rate": 4.709625448191292e-06, "loss": 0.1962, "step": 36490 }, { "epoch": 0.6774764873404796, "grad_norm": 0.36478307843208313, "learning_rate": 4.708635601989407e-06, "loss": 0.2729, "step": 36492 }, { "epoch": 0.6775136174778983, "grad_norm": 0.600856602191925, "learning_rate": 4.707645827787144e-06, "loss": 0.3337, "step": 36494 }, { "epoch": 0.677550747615317, "grad_norm": 0.2982170879840851, "learning_rate": 4.706656125597972e-06, "loss": 0.2396, "step": 36496 }, { "epoch": 0.6775878777527355, "grad_norm": 0.4709659516811371, "learning_rate": 4.705666495435352e-06, "loss": 0.2135, "step": 36498 }, { "epoch": 0.6776250078901542, "grad_norm": 0.4827239513397217, "learning_rate": 4.704676937312751e-06, "loss": 0.3319, "step": 36500 }, { "epoch": 0.6776621380275728, "grad_norm": 0.2548995018005371, "learning_rate": 4.703687451243637e-06, "loss": 0.1706, "step": 36502 }, { "epoch": 0.6776992681649915, "grad_norm": 0.42466509342193604, "learning_rate": 4.702698037241471e-06, "loss": 0.4044, "step": 36504 }, { "epoch": 0.6777363983024102, "grad_norm": 0.4611983001232147, "learning_rate": 4.701708695319722e-06, "loss": 0.2571, "step": 36506 }, { "epoch": 0.6777735284398287, "grad_norm": 0.26731839776039124, "learning_rate": 4.700719425491844e-06, "loss": 0.2425, "step": 36508 }, { "epoch": 0.6778106585772474, "grad_norm": 0.6715711355209351, "learning_rate": 4.699730227771305e-06, "loss": 0.2327, "step": 36510 }, { "epoch": 0.677847788714666, "grad_norm": 0.4574154019355774, "learning_rate": 4.698741102171558e-06, "loss": 0.4333, "step": 36512 }, { "epoch": 0.6778849188520847, "grad_norm": 0.3367522656917572, "learning_rate": 4.6977520487060645e-06, "loss": 0.3052, "step": 36514 }, { "epoch": 0.6779220489895033, "grad_norm": 0.38283029198646545, "learning_rate": 4.696763067388285e-06, "loss": 0.2659, "step": 36516 }, { "epoch": 0.6779591791269219, "grad_norm": 0.32643836736679077, "learning_rate": 4.695774158231674e-06, "loss": 0.3212, "step": 36518 }, { "epoch": 0.6779963092643406, "grad_norm": 0.6314180493354797, "learning_rate": 4.694785321249688e-06, "loss": 0.2763, "step": 36520 }, { "epoch": 0.6780334394017592, "grad_norm": 0.18016217648983002, "learning_rate": 4.693796556455787e-06, "loss": 0.2662, "step": 36522 }, { "epoch": 0.6780705695391779, "grad_norm": 0.3607742190361023, "learning_rate": 4.692807863863419e-06, "loss": 0.3643, "step": 36524 }, { "epoch": 0.6781076996765965, "grad_norm": 0.3754550814628601, "learning_rate": 4.691819243486038e-06, "loss": 0.169, "step": 36526 }, { "epoch": 0.6781448298140151, "grad_norm": 0.158907949924469, "learning_rate": 4.690830695337101e-06, "loss": 0.3108, "step": 36528 }, { "epoch": 0.6781819599514338, "grad_norm": 0.8036734461784363, "learning_rate": 4.689842219430051e-06, "loss": 0.2687, "step": 36530 }, { "epoch": 0.6782190900888524, "grad_norm": 0.28285539150238037, "learning_rate": 4.688853815778343e-06, "loss": 0.1788, "step": 36532 }, { "epoch": 0.6782562202262711, "grad_norm": 0.5729122757911682, "learning_rate": 4.68786548439543e-06, "loss": 0.2115, "step": 36534 }, { "epoch": 0.6782933503636897, "grad_norm": 0.3527403175830841, "learning_rate": 4.686877225294751e-06, "loss": 0.4524, "step": 36536 }, { "epoch": 0.6783304805011083, "grad_norm": 0.6176527142524719, "learning_rate": 4.68588903848976e-06, "loss": 0.1819, "step": 36538 }, { "epoch": 0.678367610638527, "grad_norm": 0.4776153266429901, "learning_rate": 4.6849009239939016e-06, "loss": 0.3631, "step": 36540 }, { "epoch": 0.6784047407759456, "grad_norm": 0.5935525298118591, "learning_rate": 4.683912881820619e-06, "loss": 0.2165, "step": 36542 }, { "epoch": 0.6784418709133643, "grad_norm": 0.31611618399620056, "learning_rate": 4.682924911983361e-06, "loss": 0.2183, "step": 36544 }, { "epoch": 0.6784790010507828, "grad_norm": 0.27530214190483093, "learning_rate": 4.681937014495567e-06, "loss": 0.3206, "step": 36546 }, { "epoch": 0.6785161311882015, "grad_norm": 0.355207234621048, "learning_rate": 4.6809491893706845e-06, "loss": 0.134, "step": 36548 }, { "epoch": 0.6785532613256202, "grad_norm": 0.2882908582687378, "learning_rate": 4.679961436622152e-06, "loss": 0.2756, "step": 36550 }, { "epoch": 0.6785903914630388, "grad_norm": 0.36620956659317017, "learning_rate": 4.678973756263405e-06, "loss": 0.3932, "step": 36552 }, { "epoch": 0.6786275216004575, "grad_norm": 0.17871969938278198, "learning_rate": 4.677986148307887e-06, "loss": 0.2058, "step": 36554 }, { "epoch": 0.678664651737876, "grad_norm": 0.42708820104599, "learning_rate": 4.6769986127690355e-06, "loss": 0.3689, "step": 36556 }, { "epoch": 0.6787017818752947, "grad_norm": 0.36767616868019104, "learning_rate": 4.676011149660289e-06, "loss": 0.2729, "step": 36558 }, { "epoch": 0.6787389120127134, "grad_norm": 0.3953140079975128, "learning_rate": 4.675023758995088e-06, "loss": 0.2241, "step": 36560 }, { "epoch": 0.678776042150132, "grad_norm": 0.249820277094841, "learning_rate": 4.6740364407868586e-06, "loss": 0.2534, "step": 36562 }, { "epoch": 0.6788131722875507, "grad_norm": 0.37099185585975647, "learning_rate": 4.6730491950490405e-06, "loss": 0.4403, "step": 36564 }, { "epoch": 0.6788503024249692, "grad_norm": 0.383452832698822, "learning_rate": 4.672062021795067e-06, "loss": 0.0932, "step": 36566 }, { "epoch": 0.6788874325623879, "grad_norm": 0.3339192867279053, "learning_rate": 4.67107492103837e-06, "loss": 0.2741, "step": 36568 }, { "epoch": 0.6789245626998065, "grad_norm": 0.39190760254859924, "learning_rate": 4.670087892792385e-06, "loss": 0.2541, "step": 36570 }, { "epoch": 0.6789616928372252, "grad_norm": 0.4393065869808197, "learning_rate": 4.669100937070536e-06, "loss": 0.5056, "step": 36572 }, { "epoch": 0.6789988229746439, "grad_norm": 0.3446149230003357, "learning_rate": 4.668114053886259e-06, "loss": 0.1979, "step": 36574 }, { "epoch": 0.6790359531120624, "grad_norm": 0.3102419972419739, "learning_rate": 4.6671272432529744e-06, "loss": 0.1823, "step": 36576 }, { "epoch": 0.6790730832494811, "grad_norm": 0.5714578032493591, "learning_rate": 4.6661405051841144e-06, "loss": 0.2381, "step": 36578 }, { "epoch": 0.6791102133868997, "grad_norm": 0.6529115438461304, "learning_rate": 4.6651538396931065e-06, "loss": 0.2378, "step": 36580 }, { "epoch": 0.6791473435243184, "grad_norm": 0.38438543677330017, "learning_rate": 4.664167246793373e-06, "loss": 0.2658, "step": 36582 }, { "epoch": 0.6791844736617371, "grad_norm": 0.4142589271068573, "learning_rate": 4.663180726498345e-06, "loss": 0.2232, "step": 36584 }, { "epoch": 0.6792216037991556, "grad_norm": 0.2852151393890381, "learning_rate": 4.662194278821439e-06, "loss": 0.3353, "step": 36586 }, { "epoch": 0.6792587339365743, "grad_norm": 0.38932931423187256, "learning_rate": 4.66120790377608e-06, "loss": 0.3419, "step": 36588 }, { "epoch": 0.6792958640739929, "grad_norm": 0.257129967212677, "learning_rate": 4.660221601375689e-06, "loss": 0.3094, "step": 36590 }, { "epoch": 0.6793329942114116, "grad_norm": 0.34265750646591187, "learning_rate": 4.659235371633692e-06, "loss": 0.399, "step": 36592 }, { "epoch": 0.6793701243488303, "grad_norm": 0.4926569163799286, "learning_rate": 4.6582492145635e-06, "loss": 0.285, "step": 36594 }, { "epoch": 0.6794072544862488, "grad_norm": 0.4248081147670746, "learning_rate": 4.657263130178536e-06, "loss": 0.295, "step": 36596 }, { "epoch": 0.6794443846236675, "grad_norm": 0.3272993862628937, "learning_rate": 4.656277118492222e-06, "loss": 0.2525, "step": 36598 }, { "epoch": 0.6794815147610861, "grad_norm": 0.5105322003364563, "learning_rate": 4.655291179517965e-06, "loss": 0.2202, "step": 36600 }, { "epoch": 0.6795186448985048, "grad_norm": 0.39101994037628174, "learning_rate": 4.654305313269188e-06, "loss": 0.3339, "step": 36602 }, { "epoch": 0.6795557750359235, "grad_norm": 0.3296178877353668, "learning_rate": 4.653319519759301e-06, "loss": 0.5378, "step": 36604 }, { "epoch": 0.679592905173342, "grad_norm": 0.2997519373893738, "learning_rate": 4.652333799001722e-06, "loss": 0.2724, "step": 36606 }, { "epoch": 0.6796300353107607, "grad_norm": 0.34020131826400757, "learning_rate": 4.65134815100986e-06, "loss": 0.3855, "step": 36608 }, { "epoch": 0.6796671654481793, "grad_norm": 0.2601917088031769, "learning_rate": 4.650362575797132e-06, "loss": 0.2613, "step": 36610 }, { "epoch": 0.679704295585598, "grad_norm": 0.297384649515152, "learning_rate": 4.649377073376944e-06, "loss": 0.1312, "step": 36612 }, { "epoch": 0.6797414257230165, "grad_norm": 0.35620924830436707, "learning_rate": 4.64839164376271e-06, "loss": 0.3187, "step": 36614 }, { "epoch": 0.6797785558604352, "grad_norm": 0.4011893570423126, "learning_rate": 4.647406286967832e-06, "loss": 0.1707, "step": 36616 }, { "epoch": 0.6798156859978539, "grad_norm": 0.3564300835132599, "learning_rate": 4.646421003005721e-06, "loss": 0.3464, "step": 36618 }, { "epoch": 0.6798528161352725, "grad_norm": 0.40349623560905457, "learning_rate": 4.645435791889784e-06, "loss": 0.3768, "step": 36620 }, { "epoch": 0.6798899462726912, "grad_norm": 0.7479465007781982, "learning_rate": 4.644450653633432e-06, "loss": 0.3091, "step": 36622 }, { "epoch": 0.6799270764101097, "grad_norm": 0.38486817479133606, "learning_rate": 4.64346558825006e-06, "loss": 0.1475, "step": 36624 }, { "epoch": 0.6799642065475284, "grad_norm": 0.45041412115097046, "learning_rate": 4.6424805957530775e-06, "loss": 0.1883, "step": 36626 }, { "epoch": 0.6800013366849471, "grad_norm": 0.26288503408432007, "learning_rate": 4.641495676155886e-06, "loss": 0.2934, "step": 36628 }, { "epoch": 0.6800384668223657, "grad_norm": 0.4546377956867218, "learning_rate": 4.640510829471888e-06, "loss": 0.2305, "step": 36630 }, { "epoch": 0.6800755969597844, "grad_norm": 0.3075890839099884, "learning_rate": 4.639526055714485e-06, "loss": 0.2574, "step": 36632 }, { "epoch": 0.6801127270972029, "grad_norm": 0.5221927762031555, "learning_rate": 4.638541354897078e-06, "loss": 0.3573, "step": 36634 }, { "epoch": 0.6801498572346216, "grad_norm": 0.30191871523857117, "learning_rate": 4.6375567270330645e-06, "loss": 0.2914, "step": 36636 }, { "epoch": 0.6801869873720403, "grad_norm": 0.4841984212398529, "learning_rate": 4.636572172135838e-06, "loss": 0.2589, "step": 36638 }, { "epoch": 0.6802241175094589, "grad_norm": 0.33180081844329834, "learning_rate": 4.635587690218799e-06, "loss": 0.3972, "step": 36640 }, { "epoch": 0.6802612476468776, "grad_norm": 0.8104206919670105, "learning_rate": 4.634603281295343e-06, "loss": 0.3596, "step": 36642 }, { "epoch": 0.6802983777842961, "grad_norm": 0.3464994728565216, "learning_rate": 4.633618945378865e-06, "loss": 0.2209, "step": 36644 }, { "epoch": 0.6803355079217148, "grad_norm": 0.41266077756881714, "learning_rate": 4.6326346824827585e-06, "loss": 0.1782, "step": 36646 }, { "epoch": 0.6803726380591335, "grad_norm": 0.2535708248615265, "learning_rate": 4.631650492620421e-06, "loss": 0.1666, "step": 36648 }, { "epoch": 0.6804097681965521, "grad_norm": 0.31775355339050293, "learning_rate": 4.630666375805237e-06, "loss": 0.3488, "step": 36650 }, { "epoch": 0.6804468983339708, "grad_norm": 0.3390483856201172, "learning_rate": 4.6296823320506e-06, "loss": 0.3456, "step": 36652 }, { "epoch": 0.6804840284713893, "grad_norm": 0.4517935514450073, "learning_rate": 4.628698361369901e-06, "loss": 0.3289, "step": 36654 }, { "epoch": 0.680521158608808, "grad_norm": 0.47368916869163513, "learning_rate": 4.627714463776531e-06, "loss": 0.2156, "step": 36656 }, { "epoch": 0.6805582887462267, "grad_norm": 0.46265238523483276, "learning_rate": 4.626730639283873e-06, "loss": 0.2898, "step": 36658 }, { "epoch": 0.6805954188836453, "grad_norm": 0.361904114484787, "learning_rate": 4.625746887905319e-06, "loss": 0.3942, "step": 36660 }, { "epoch": 0.680632549021064, "grad_norm": 0.43007397651672363, "learning_rate": 4.624763209654249e-06, "loss": 0.3897, "step": 36662 }, { "epoch": 0.6806696791584825, "grad_norm": 0.5235523581504822, "learning_rate": 4.623779604544051e-06, "loss": 0.2264, "step": 36664 }, { "epoch": 0.6807068092959012, "grad_norm": 0.3871137797832489, "learning_rate": 4.622796072588108e-06, "loss": 0.2331, "step": 36666 }, { "epoch": 0.6807439394333198, "grad_norm": 0.4467279314994812, "learning_rate": 4.621812613799805e-06, "loss": 0.2465, "step": 36668 }, { "epoch": 0.6807810695707385, "grad_norm": 0.29052039980888367, "learning_rate": 4.6208292281925226e-06, "loss": 0.2362, "step": 36670 }, { "epoch": 0.6808181997081572, "grad_norm": 0.5195879340171814, "learning_rate": 4.619845915779641e-06, "loss": 0.2717, "step": 36672 }, { "epoch": 0.6808553298455757, "grad_norm": 0.5728261470794678, "learning_rate": 4.6188626765745455e-06, "loss": 0.2502, "step": 36674 }, { "epoch": 0.6808924599829944, "grad_norm": 0.4399751126766205, "learning_rate": 4.617879510590607e-06, "loss": 0.1459, "step": 36676 }, { "epoch": 0.680929590120413, "grad_norm": 0.31272971630096436, "learning_rate": 4.616896417841211e-06, "loss": 0.3102, "step": 36678 }, { "epoch": 0.6809667202578317, "grad_norm": 0.4640543758869171, "learning_rate": 4.615913398339726e-06, "loss": 0.2266, "step": 36680 }, { "epoch": 0.6810038503952504, "grad_norm": 0.4285161793231964, "learning_rate": 4.614930452099533e-06, "loss": 0.2428, "step": 36682 }, { "epoch": 0.6810409805326689, "grad_norm": 0.633770763874054, "learning_rate": 4.6139475791340064e-06, "loss": 0.2424, "step": 36684 }, { "epoch": 0.6810781106700876, "grad_norm": 0.5195276141166687, "learning_rate": 4.612964779456524e-06, "loss": 0.3027, "step": 36686 }, { "epoch": 0.6811152408075062, "grad_norm": 0.4167497158050537, "learning_rate": 4.61198205308045e-06, "loss": 0.4612, "step": 36688 }, { "epoch": 0.6811523709449249, "grad_norm": 0.2877090871334076, "learning_rate": 4.610999400019162e-06, "loss": 0.3466, "step": 36690 }, { "epoch": 0.6811895010823436, "grad_norm": 0.5440207719802856, "learning_rate": 4.610016820286031e-06, "loss": 0.2927, "step": 36692 }, { "epoch": 0.6812266312197621, "grad_norm": 0.2668544352054596, "learning_rate": 4.609034313894425e-06, "loss": 0.3608, "step": 36694 }, { "epoch": 0.6812637613571808, "grad_norm": 0.5148725509643555, "learning_rate": 4.608051880857719e-06, "loss": 0.2335, "step": 36696 }, { "epoch": 0.6813008914945994, "grad_norm": 0.4221431016921997, "learning_rate": 4.607069521189271e-06, "loss": 0.3397, "step": 36698 }, { "epoch": 0.6813380216320181, "grad_norm": 0.24876393377780914, "learning_rate": 4.6060872349024584e-06, "loss": 0.1289, "step": 36700 }, { "epoch": 0.6813751517694367, "grad_norm": 0.6464248299598694, "learning_rate": 4.605105022010638e-06, "loss": 0.2708, "step": 36702 }, { "epoch": 0.6814122819068553, "grad_norm": 0.32391157746315, "learning_rate": 4.6041228825271775e-06, "loss": 0.213, "step": 36704 }, { "epoch": 0.681449412044274, "grad_norm": 0.47570061683654785, "learning_rate": 4.603140816465442e-06, "loss": 0.2451, "step": 36706 }, { "epoch": 0.6814865421816926, "grad_norm": 0.45762503147125244, "learning_rate": 4.602158823838796e-06, "loss": 0.2294, "step": 36708 }, { "epoch": 0.6815236723191113, "grad_norm": 0.4113583266735077, "learning_rate": 4.601176904660603e-06, "loss": 0.2511, "step": 36710 }, { "epoch": 0.68156080245653, "grad_norm": 0.3370051681995392, "learning_rate": 4.6001950589442165e-06, "loss": 0.3217, "step": 36712 }, { "epoch": 0.6815979325939485, "grad_norm": 0.3481823205947876, "learning_rate": 4.599213286703003e-06, "loss": 0.2484, "step": 36714 }, { "epoch": 0.6816350627313672, "grad_norm": 0.47473111748695374, "learning_rate": 4.598231587950317e-06, "loss": 0.1396, "step": 36716 }, { "epoch": 0.6816721928687858, "grad_norm": 0.5065504312515259, "learning_rate": 4.59724996269952e-06, "loss": 0.2837, "step": 36718 }, { "epoch": 0.6817093230062045, "grad_norm": 0.4569677710533142, "learning_rate": 4.5962684109639725e-06, "loss": 0.1816, "step": 36720 }, { "epoch": 0.681746453143623, "grad_norm": 0.44991540908813477, "learning_rate": 4.595286932757022e-06, "loss": 0.1659, "step": 36722 }, { "epoch": 0.6817835832810417, "grad_norm": 0.46073397994041443, "learning_rate": 4.594305528092029e-06, "loss": 0.3097, "step": 36724 }, { "epoch": 0.6818207134184604, "grad_norm": 0.4513939321041107, "learning_rate": 4.593324196982344e-06, "loss": 0.626, "step": 36726 }, { "epoch": 0.681857843555879, "grad_norm": 0.43300861120224, "learning_rate": 4.592342939441321e-06, "loss": 0.3946, "step": 36728 }, { "epoch": 0.6818949736932977, "grad_norm": 0.32495608925819397, "learning_rate": 4.591361755482313e-06, "loss": 0.2713, "step": 36730 }, { "epoch": 0.6819321038307162, "grad_norm": 0.24395209550857544, "learning_rate": 4.590380645118672e-06, "loss": 0.394, "step": 36732 }, { "epoch": 0.6819692339681349, "grad_norm": 0.31837767362594604, "learning_rate": 4.589399608363745e-06, "loss": 0.2079, "step": 36734 }, { "epoch": 0.6820063641055536, "grad_norm": 0.33453434705734253, "learning_rate": 4.5884186452308865e-06, "loss": 0.1781, "step": 36736 }, { "epoch": 0.6820434942429722, "grad_norm": 0.29054832458496094, "learning_rate": 4.587437755733438e-06, "loss": 0.2355, "step": 36738 }, { "epoch": 0.6820806243803909, "grad_norm": 0.35468605160713196, "learning_rate": 4.586456939884748e-06, "loss": 0.2837, "step": 36740 }, { "epoch": 0.6821177545178094, "grad_norm": 0.2772906720638275, "learning_rate": 4.585476197698169e-06, "loss": 0.1072, "step": 36742 }, { "epoch": 0.6821548846552281, "grad_norm": 0.45635318756103516, "learning_rate": 4.584495529187035e-06, "loss": 0.3416, "step": 36744 }, { "epoch": 0.6821920147926468, "grad_norm": 0.3737885355949402, "learning_rate": 4.583514934364696e-06, "loss": 0.228, "step": 36746 }, { "epoch": 0.6822291449300654, "grad_norm": 0.4764951765537262, "learning_rate": 4.582534413244499e-06, "loss": 0.3466, "step": 36748 }, { "epoch": 0.682266275067484, "grad_norm": 0.4684264063835144, "learning_rate": 4.5815539658397765e-06, "loss": 0.3251, "step": 36750 }, { "epoch": 0.6823034052049026, "grad_norm": 0.2644585967063904, "learning_rate": 4.580573592163876e-06, "loss": 0.2619, "step": 36752 }, { "epoch": 0.6823405353423213, "grad_norm": 0.3051812946796417, "learning_rate": 4.579593292230135e-06, "loss": 0.2618, "step": 36754 }, { "epoch": 0.68237766547974, "grad_norm": 0.3321031928062439, "learning_rate": 4.578613066051894e-06, "loss": 0.2601, "step": 36756 }, { "epoch": 0.6824147956171586, "grad_norm": 0.29542508721351624, "learning_rate": 4.577632913642489e-06, "loss": 0.2886, "step": 36758 }, { "epoch": 0.6824519257545772, "grad_norm": 0.6704930663108826, "learning_rate": 4.5766528350152625e-06, "loss": 0.2636, "step": 36760 }, { "epoch": 0.6824890558919958, "grad_norm": 0.3154360353946686, "learning_rate": 4.575672830183542e-06, "loss": 0.2514, "step": 36762 }, { "epoch": 0.6825261860294145, "grad_norm": 0.5168063044548035, "learning_rate": 4.574692899160671e-06, "loss": 0.319, "step": 36764 }, { "epoch": 0.6825633161668331, "grad_norm": 0.3341941833496094, "learning_rate": 4.573713041959975e-06, "loss": 0.4922, "step": 36766 }, { "epoch": 0.6826004463042518, "grad_norm": 0.48993566632270813, "learning_rate": 4.57273325859479e-06, "loss": 0.2922, "step": 36768 }, { "epoch": 0.6826375764416704, "grad_norm": 0.22482529282569885, "learning_rate": 4.5717535490784505e-06, "loss": 0.2624, "step": 36770 }, { "epoch": 0.682674706579089, "grad_norm": 0.5108135342597961, "learning_rate": 4.570773913424285e-06, "loss": 0.2064, "step": 36772 }, { "epoch": 0.6827118367165077, "grad_norm": 0.6251305341720581, "learning_rate": 4.5697943516456265e-06, "loss": 0.3208, "step": 36774 }, { "epoch": 0.6827489668539263, "grad_norm": 0.3603210151195526, "learning_rate": 4.5688148637557995e-06, "loss": 0.2277, "step": 36776 }, { "epoch": 0.682786096991345, "grad_norm": 0.38427647948265076, "learning_rate": 4.567835449768133e-06, "loss": 0.4129, "step": 36778 }, { "epoch": 0.6828232271287636, "grad_norm": 0.2830137312412262, "learning_rate": 4.5668561096959555e-06, "loss": 0.2281, "step": 36780 }, { "epoch": 0.6828603572661822, "grad_norm": 0.40049147605895996, "learning_rate": 4.565876843552595e-06, "loss": 0.4461, "step": 36782 }, { "epoch": 0.6828974874036009, "grad_norm": 0.2534545660018921, "learning_rate": 4.564897651351371e-06, "loss": 0.3604, "step": 36784 }, { "epoch": 0.6829346175410195, "grad_norm": 0.25542131066322327, "learning_rate": 4.563918533105612e-06, "loss": 0.3382, "step": 36786 }, { "epoch": 0.6829717476784382, "grad_norm": 0.427701473236084, "learning_rate": 4.562939488828637e-06, "loss": 0.185, "step": 36788 }, { "epoch": 0.6830088778158568, "grad_norm": 0.3095892071723938, "learning_rate": 4.561960518533769e-06, "loss": 0.198, "step": 36790 }, { "epoch": 0.6830460079532754, "grad_norm": 0.2594239413738251, "learning_rate": 4.56098162223433e-06, "loss": 0.2297, "step": 36792 }, { "epoch": 0.6830831380906941, "grad_norm": 0.31070205569267273, "learning_rate": 4.560002799943638e-06, "loss": 0.328, "step": 36794 }, { "epoch": 0.6831202682281127, "grad_norm": 0.35155758261680603, "learning_rate": 4.559024051675014e-06, "loss": 0.271, "step": 36796 }, { "epoch": 0.6831573983655314, "grad_norm": 0.3239842653274536, "learning_rate": 4.558045377441774e-06, "loss": 0.3507, "step": 36798 }, { "epoch": 0.68319452850295, "grad_norm": 0.5962363481521606, "learning_rate": 4.557066777257242e-06, "loss": 0.1329, "step": 36800 }, { "epoch": 0.6832316586403686, "grad_norm": 0.3601365089416504, "learning_rate": 4.556088251134722e-06, "loss": 0.3814, "step": 36802 }, { "epoch": 0.6832687887777873, "grad_norm": 0.4445032775402069, "learning_rate": 4.5551097990875395e-06, "loss": 0.2941, "step": 36804 }, { "epoch": 0.6833059189152059, "grad_norm": 0.360127329826355, "learning_rate": 4.554131421128998e-06, "loss": 0.2534, "step": 36806 }, { "epoch": 0.6833430490526246, "grad_norm": 0.5423081517219543, "learning_rate": 4.553153117272417e-06, "loss": 0.2391, "step": 36808 }, { "epoch": 0.6833801791900432, "grad_norm": 0.33139416575431824, "learning_rate": 4.552174887531105e-06, "loss": 0.2029, "step": 36810 }, { "epoch": 0.6834173093274618, "grad_norm": 0.31729045510292053, "learning_rate": 4.55119673191838e-06, "loss": 0.3296, "step": 36812 }, { "epoch": 0.6834544394648805, "grad_norm": 0.3390863537788391, "learning_rate": 4.550218650447543e-06, "loss": 0.3025, "step": 36814 }, { "epoch": 0.6834915696022991, "grad_norm": 0.6686185002326965, "learning_rate": 4.549240643131906e-06, "loss": 0.327, "step": 36816 }, { "epoch": 0.6835286997397177, "grad_norm": 0.29599666595458984, "learning_rate": 4.548262709984777e-06, "loss": 0.2591, "step": 36818 }, { "epoch": 0.6835658298771363, "grad_norm": 0.5164443850517273, "learning_rate": 4.547284851019461e-06, "loss": 0.1666, "step": 36820 }, { "epoch": 0.683602960014555, "grad_norm": 0.28643471002578735, "learning_rate": 4.546307066249267e-06, "loss": 0.284, "step": 36822 }, { "epoch": 0.6836400901519737, "grad_norm": 0.3506264090538025, "learning_rate": 4.545329355687501e-06, "loss": 0.3737, "step": 36824 }, { "epoch": 0.6836772202893923, "grad_norm": 0.3941076397895813, "learning_rate": 4.5443517193474605e-06, "loss": 0.2529, "step": 36826 }, { "epoch": 0.683714350426811, "grad_norm": 0.37025681138038635, "learning_rate": 4.543374157242456e-06, "loss": 0.2857, "step": 36828 }, { "epoch": 0.6837514805642295, "grad_norm": 0.3864564895629883, "learning_rate": 4.54239666938578e-06, "loss": 0.3116, "step": 36830 }, { "epoch": 0.6837886107016482, "grad_norm": 0.844323992729187, "learning_rate": 4.541419255790739e-06, "loss": 0.5784, "step": 36832 }, { "epoch": 0.6838257408390669, "grad_norm": 0.5115868449211121, "learning_rate": 4.540441916470631e-06, "loss": 0.2487, "step": 36834 }, { "epoch": 0.6838628709764855, "grad_norm": 0.17546138167381287, "learning_rate": 4.539464651438754e-06, "loss": 0.088, "step": 36836 }, { "epoch": 0.6839000011139041, "grad_norm": 0.38474082946777344, "learning_rate": 4.5384874607084114e-06, "loss": 0.1687, "step": 36838 }, { "epoch": 0.6839371312513227, "grad_norm": 0.6710985898971558, "learning_rate": 4.5375103442928926e-06, "loss": 0.3016, "step": 36840 }, { "epoch": 0.6839742613887414, "grad_norm": 0.4241836965084076, "learning_rate": 4.536533302205496e-06, "loss": 0.2908, "step": 36842 }, { "epoch": 0.6840113915261601, "grad_norm": 0.49030038714408875, "learning_rate": 4.535556334459516e-06, "loss": 0.116, "step": 36844 }, { "epoch": 0.6840485216635787, "grad_norm": 0.33132204413414, "learning_rate": 4.534579441068249e-06, "loss": 0.0841, "step": 36846 }, { "epoch": 0.6840856518009973, "grad_norm": 0.5838752388954163, "learning_rate": 4.533602622044981e-06, "loss": 0.2382, "step": 36848 }, { "epoch": 0.6841227819384159, "grad_norm": 0.2662561237812042, "learning_rate": 4.532625877403013e-06, "loss": 0.2316, "step": 36850 }, { "epoch": 0.6841599120758346, "grad_norm": 0.3253823220729828, "learning_rate": 4.531649207155625e-06, "loss": 0.2387, "step": 36852 }, { "epoch": 0.6841970422132533, "grad_norm": 0.4482451379299164, "learning_rate": 4.530672611316112e-06, "loss": 0.2095, "step": 36854 }, { "epoch": 0.6842341723506719, "grad_norm": 0.4497491121292114, "learning_rate": 4.529696089897762e-06, "loss": 0.3685, "step": 36856 }, { "epoch": 0.6842713024880905, "grad_norm": 0.31457987427711487, "learning_rate": 4.528719642913863e-06, "loss": 0.3079, "step": 36858 }, { "epoch": 0.6843084326255091, "grad_norm": 0.40683087706565857, "learning_rate": 4.5277432703777025e-06, "loss": 0.1596, "step": 36860 }, { "epoch": 0.6843455627629278, "grad_norm": 0.5581992864608765, "learning_rate": 4.5267669723025674e-06, "loss": 0.3008, "step": 36862 }, { "epoch": 0.6843826929003465, "grad_norm": 0.3747551441192627, "learning_rate": 4.525790748701736e-06, "loss": 0.3854, "step": 36864 }, { "epoch": 0.684419823037765, "grad_norm": 0.18551938235759735, "learning_rate": 4.524814599588495e-06, "loss": 0.1623, "step": 36866 }, { "epoch": 0.6844569531751837, "grad_norm": 0.3616929054260254, "learning_rate": 4.523838524976132e-06, "loss": 0.3776, "step": 36868 }, { "epoch": 0.6844940833126023, "grad_norm": 0.3146587908267975, "learning_rate": 4.522862524877919e-06, "loss": 0.3231, "step": 36870 }, { "epoch": 0.684531213450021, "grad_norm": 0.28498008847236633, "learning_rate": 4.52188659930714e-06, "loss": 0.2179, "step": 36872 }, { "epoch": 0.6845683435874396, "grad_norm": 0.6642209887504578, "learning_rate": 4.520910748277081e-06, "loss": 0.3281, "step": 36874 }, { "epoch": 0.6846054737248582, "grad_norm": 0.2693619430065155, "learning_rate": 4.519934971801011e-06, "loss": 0.2749, "step": 36876 }, { "epoch": 0.6846426038622769, "grad_norm": 0.3265398144721985, "learning_rate": 4.518959269892211e-06, "loss": 0.3605, "step": 36878 }, { "epoch": 0.6846797339996955, "grad_norm": 0.3953653573989868, "learning_rate": 4.5179836425639574e-06, "loss": 0.3328, "step": 36880 }, { "epoch": 0.6847168641371142, "grad_norm": 0.41256698966026306, "learning_rate": 4.517008089829527e-06, "loss": 0.4427, "step": 36882 }, { "epoch": 0.6847539942745328, "grad_norm": 0.4884564280509949, "learning_rate": 4.516032611702192e-06, "loss": 0.3233, "step": 36884 }, { "epoch": 0.6847911244119514, "grad_norm": 0.30782806873321533, "learning_rate": 4.515057208195227e-06, "loss": 0.3393, "step": 36886 }, { "epoch": 0.6848282545493701, "grad_norm": 0.4034641981124878, "learning_rate": 4.514081879321909e-06, "loss": 0.1036, "step": 36888 }, { "epoch": 0.6848653846867887, "grad_norm": 0.24981912970542908, "learning_rate": 4.513106625095503e-06, "loss": 0.036, "step": 36890 }, { "epoch": 0.6849025148242074, "grad_norm": 0.46677884459495544, "learning_rate": 4.512131445529278e-06, "loss": 0.2502, "step": 36892 }, { "epoch": 0.684939644961626, "grad_norm": 0.26096880435943604, "learning_rate": 4.511156340636504e-06, "loss": 0.3246, "step": 36894 }, { "epoch": 0.6849767750990446, "grad_norm": 0.44398149847984314, "learning_rate": 4.510181310430453e-06, "loss": 0.519, "step": 36896 }, { "epoch": 0.6850139052364633, "grad_norm": 0.44387316703796387, "learning_rate": 4.509206354924389e-06, "loss": 0.3591, "step": 36898 }, { "epoch": 0.6850510353738819, "grad_norm": 0.25748056173324585, "learning_rate": 4.508231474131585e-06, "loss": 0.1555, "step": 36900 }, { "epoch": 0.6850881655113006, "grad_norm": 0.3737777769565582, "learning_rate": 4.507256668065296e-06, "loss": 0.377, "step": 36902 }, { "epoch": 0.6851252956487192, "grad_norm": 0.3655168414115906, "learning_rate": 4.506281936738792e-06, "loss": 0.3959, "step": 36904 }, { "epoch": 0.6851624257861378, "grad_norm": 0.5662994980812073, "learning_rate": 4.505307280165335e-06, "loss": 0.2217, "step": 36906 }, { "epoch": 0.6851995559235565, "grad_norm": 0.48609688878059387, "learning_rate": 4.504332698358187e-06, "loss": 0.4649, "step": 36908 }, { "epoch": 0.6852366860609751, "grad_norm": 0.33742737770080566, "learning_rate": 4.5033581913306135e-06, "loss": 0.2685, "step": 36910 }, { "epoch": 0.6852738161983938, "grad_norm": 0.3411497175693512, "learning_rate": 4.502383759095871e-06, "loss": 0.3153, "step": 36912 }, { "epoch": 0.6853109463358124, "grad_norm": 0.564539909362793, "learning_rate": 4.501409401667214e-06, "loss": 0.2923, "step": 36914 }, { "epoch": 0.685348076473231, "grad_norm": 0.31454044580459595, "learning_rate": 4.500435119057905e-06, "loss": 0.4349, "step": 36916 }, { "epoch": 0.6853852066106496, "grad_norm": 0.49167755246162415, "learning_rate": 4.499460911281201e-06, "loss": 0.2544, "step": 36918 }, { "epoch": 0.6854223367480683, "grad_norm": 0.4777070879936218, "learning_rate": 4.4984867783503586e-06, "loss": 0.1857, "step": 36920 }, { "epoch": 0.685459466885487, "grad_norm": 0.48895907402038574, "learning_rate": 4.497512720278631e-06, "loss": 0.3379, "step": 36922 }, { "epoch": 0.6854965970229056, "grad_norm": 0.28372129797935486, "learning_rate": 4.496538737079273e-06, "loss": 0.3508, "step": 36924 }, { "epoch": 0.6855337271603242, "grad_norm": 0.19507038593292236, "learning_rate": 4.495564828765544e-06, "loss": 0.2041, "step": 36926 }, { "epoch": 0.6855708572977428, "grad_norm": 0.43964684009552, "learning_rate": 4.494590995350685e-06, "loss": 0.2629, "step": 36928 }, { "epoch": 0.6856079874351615, "grad_norm": 0.2153591513633728, "learning_rate": 4.493617236847951e-06, "loss": 0.0253, "step": 36930 }, { "epoch": 0.6856451175725802, "grad_norm": 0.3572542369365692, "learning_rate": 4.492643553270598e-06, "loss": 0.1542, "step": 36932 }, { "epoch": 0.6856822477099987, "grad_norm": 0.5211487412452698, "learning_rate": 4.491669944631865e-06, "loss": 0.1944, "step": 36934 }, { "epoch": 0.6857193778474174, "grad_norm": 0.22446173429489136, "learning_rate": 4.490696410945005e-06, "loss": 0.1699, "step": 36936 }, { "epoch": 0.685756507984836, "grad_norm": 0.48938754200935364, "learning_rate": 4.48972295222327e-06, "loss": 0.3704, "step": 36938 }, { "epoch": 0.6857936381222547, "grad_norm": 0.3289289176464081, "learning_rate": 4.488749568479894e-06, "loss": 0.2067, "step": 36940 }, { "epoch": 0.6858307682596734, "grad_norm": 0.2854820489883423, "learning_rate": 4.487776259728131e-06, "loss": 0.1188, "step": 36942 }, { "epoch": 0.685867898397092, "grad_norm": 0.34336429834365845, "learning_rate": 4.48680302598122e-06, "loss": 0.3619, "step": 36944 }, { "epoch": 0.6859050285345106, "grad_norm": 0.43612515926361084, "learning_rate": 4.485829867252407e-06, "loss": 0.4179, "step": 36946 }, { "epoch": 0.6859421586719292, "grad_norm": 0.5785712599754333, "learning_rate": 4.484856783554932e-06, "loss": 0.1893, "step": 36948 }, { "epoch": 0.6859792888093479, "grad_norm": 0.4804275631904602, "learning_rate": 4.48388377490204e-06, "loss": 0.2878, "step": 36950 }, { "epoch": 0.6860164189467666, "grad_norm": 0.4517059922218323, "learning_rate": 4.482910841306964e-06, "loss": 0.2752, "step": 36952 }, { "epoch": 0.6860535490841851, "grad_norm": 0.47712597250938416, "learning_rate": 4.481937982782949e-06, "loss": 0.3416, "step": 36954 }, { "epoch": 0.6860906792216038, "grad_norm": 0.5473611354827881, "learning_rate": 4.480965199343226e-06, "loss": 0.263, "step": 36956 }, { "epoch": 0.6861278093590224, "grad_norm": 0.4020240306854248, "learning_rate": 4.479992491001035e-06, "loss": 0.1193, "step": 36958 }, { "epoch": 0.6861649394964411, "grad_norm": 0.48071375489234924, "learning_rate": 4.4790198577696134e-06, "loss": 0.3832, "step": 36960 }, { "epoch": 0.6862020696338598, "grad_norm": 0.3266705572605133, "learning_rate": 4.4780472996621925e-06, "loss": 0.3906, "step": 36962 }, { "epoch": 0.6862391997712783, "grad_norm": 0.4404544234275818, "learning_rate": 4.4770748166920115e-06, "loss": 0.3278, "step": 36964 }, { "epoch": 0.686276329908697, "grad_norm": 0.19739089906215668, "learning_rate": 4.476102408872296e-06, "loss": 0.2208, "step": 36966 }, { "epoch": 0.6863134600461156, "grad_norm": 0.3867587447166443, "learning_rate": 4.4751300762162805e-06, "loss": 0.2956, "step": 36968 }, { "epoch": 0.6863505901835343, "grad_norm": 0.2025323510169983, "learning_rate": 4.474157818737197e-06, "loss": 0.2535, "step": 36970 }, { "epoch": 0.6863877203209529, "grad_norm": 0.42796045541763306, "learning_rate": 4.473185636448276e-06, "loss": 0.0804, "step": 36972 }, { "epoch": 0.6864248504583715, "grad_norm": 0.4834670126438141, "learning_rate": 4.47221352936274e-06, "loss": 0.4108, "step": 36974 }, { "epoch": 0.6864619805957902, "grad_norm": 0.7377327084541321, "learning_rate": 4.471241497493824e-06, "loss": 0.1303, "step": 36976 }, { "epoch": 0.6864991107332088, "grad_norm": 0.3907416760921478, "learning_rate": 4.470269540854748e-06, "loss": 0.2058, "step": 36978 }, { "epoch": 0.6865362408706275, "grad_norm": 0.2603280544281006, "learning_rate": 4.46929765945874e-06, "loss": 0.2996, "step": 36980 }, { "epoch": 0.686573371008046, "grad_norm": 0.5800443291664124, "learning_rate": 4.468325853319024e-06, "loss": 0.3388, "step": 36982 }, { "epoch": 0.6866105011454647, "grad_norm": 0.5566034913063049, "learning_rate": 4.467354122448824e-06, "loss": 0.2598, "step": 36984 }, { "epoch": 0.6866476312828834, "grad_norm": 0.39501988887786865, "learning_rate": 4.466382466861362e-06, "loss": 0.2347, "step": 36986 }, { "epoch": 0.686684761420302, "grad_norm": 0.5609928369522095, "learning_rate": 4.465410886569863e-06, "loss": 0.2663, "step": 36988 }, { "epoch": 0.6867218915577207, "grad_norm": 0.3192872107028961, "learning_rate": 4.46443938158754e-06, "loss": 0.3682, "step": 36990 }, { "epoch": 0.6867590216951392, "grad_norm": 0.27095746994018555, "learning_rate": 4.463467951927617e-06, "loss": 0.2927, "step": 36992 }, { "epoch": 0.6867961518325579, "grad_norm": 0.5867243409156799, "learning_rate": 4.4624965976033105e-06, "loss": 0.1791, "step": 36994 }, { "epoch": 0.6868332819699766, "grad_norm": 0.1843239665031433, "learning_rate": 4.461525318627843e-06, "loss": 0.2847, "step": 36996 }, { "epoch": 0.6868704121073952, "grad_norm": 0.39618611335754395, "learning_rate": 4.460554115014421e-06, "loss": 0.2212, "step": 36998 }, { "epoch": 0.6869075422448139, "grad_norm": 0.4728761911392212, "learning_rate": 4.4595829867762705e-06, "loss": 0.2755, "step": 37000 }, { "epoch": 0.6869446723822324, "grad_norm": 0.39870527386665344, "learning_rate": 4.4586119339265955e-06, "loss": 0.3097, "step": 37002 }, { "epoch": 0.6869818025196511, "grad_norm": 0.2768265902996063, "learning_rate": 4.457640956478614e-06, "loss": 0.1313, "step": 37004 }, { "epoch": 0.6870189326570698, "grad_norm": 0.44751498103141785, "learning_rate": 4.456670054445538e-06, "loss": 0.2012, "step": 37006 }, { "epoch": 0.6870560627944884, "grad_norm": 0.7260276079177856, "learning_rate": 4.455699227840577e-06, "loss": 0.3053, "step": 37008 }, { "epoch": 0.6870931929319071, "grad_norm": 0.3138602077960968, "learning_rate": 4.454728476676944e-06, "loss": 0.1375, "step": 37010 }, { "epoch": 0.6871303230693256, "grad_norm": 0.35546284914016724, "learning_rate": 4.453757800967845e-06, "loss": 0.2777, "step": 37012 }, { "epoch": 0.6871674532067443, "grad_norm": 0.2587350308895111, "learning_rate": 4.452787200726495e-06, "loss": 0.2965, "step": 37014 }, { "epoch": 0.687204583344163, "grad_norm": 0.35835838317871094, "learning_rate": 4.451816675966089e-06, "loss": 0.2871, "step": 37016 }, { "epoch": 0.6872417134815816, "grad_norm": 0.3472989499568939, "learning_rate": 4.450846226699845e-06, "loss": 0.2521, "step": 37018 }, { "epoch": 0.6872788436190003, "grad_norm": 0.4447984993457794, "learning_rate": 4.449875852940959e-06, "loss": 0.1473, "step": 37020 }, { "epoch": 0.6873159737564188, "grad_norm": 0.3388555347919464, "learning_rate": 4.448905554702637e-06, "loss": 0.1882, "step": 37022 }, { "epoch": 0.6873531038938375, "grad_norm": 0.27160486578941345, "learning_rate": 4.447935331998083e-06, "loss": 0.3541, "step": 37024 }, { "epoch": 0.6873902340312561, "grad_norm": 0.35241034626960754, "learning_rate": 4.446965184840503e-06, "loss": 0.2386, "step": 37026 }, { "epoch": 0.6874273641686748, "grad_norm": 0.29288753867149353, "learning_rate": 4.445995113243089e-06, "loss": 0.1509, "step": 37028 }, { "epoch": 0.6874644943060935, "grad_norm": 0.3887053430080414, "learning_rate": 4.445025117219046e-06, "loss": 0.3515, "step": 37030 }, { "epoch": 0.687501624443512, "grad_norm": 0.34022843837738037, "learning_rate": 4.444055196781572e-06, "loss": 0.2832, "step": 37032 }, { "epoch": 0.6875387545809307, "grad_norm": 0.4085644483566284, "learning_rate": 4.443085351943865e-06, "loss": 0.359, "step": 37034 }, { "epoch": 0.6875758847183493, "grad_norm": 0.44787487387657166, "learning_rate": 4.442115582719124e-06, "loss": 0.3389, "step": 37036 }, { "epoch": 0.687613014855768, "grad_norm": 0.2517201602458954, "learning_rate": 4.441145889120539e-06, "loss": 0.216, "step": 37038 }, { "epoch": 0.6876501449931867, "grad_norm": 0.3492571711540222, "learning_rate": 4.440176271161313e-06, "loss": 0.222, "step": 37040 }, { "epoch": 0.6876872751306052, "grad_norm": 0.6720255613327026, "learning_rate": 4.43920672885463e-06, "loss": 0.3845, "step": 37042 }, { "epoch": 0.6877244052680239, "grad_norm": 0.36674150824546814, "learning_rate": 4.438237262213685e-06, "loss": 0.2585, "step": 37044 }, { "epoch": 0.6877615354054425, "grad_norm": 0.31624552607536316, "learning_rate": 4.437267871251674e-06, "loss": 0.2443, "step": 37046 }, { "epoch": 0.6877986655428612, "grad_norm": 0.37729647755622864, "learning_rate": 4.4362985559817825e-06, "loss": 0.2184, "step": 37048 }, { "epoch": 0.6878357956802799, "grad_norm": 0.32645153999328613, "learning_rate": 4.435329316417204e-06, "loss": 0.1479, "step": 37050 }, { "epoch": 0.6878729258176984, "grad_norm": 0.37583091855049133, "learning_rate": 4.434360152571129e-06, "loss": 0.2287, "step": 37052 }, { "epoch": 0.6879100559551171, "grad_norm": 0.5464087724685669, "learning_rate": 4.433391064456737e-06, "loss": 0.2527, "step": 37054 }, { "epoch": 0.6879471860925357, "grad_norm": 0.5519949793815613, "learning_rate": 4.43242205208722e-06, "loss": 0.2344, "step": 37056 }, { "epoch": 0.6879843162299544, "grad_norm": 0.506227433681488, "learning_rate": 4.431453115475764e-06, "loss": 0.3044, "step": 37058 }, { "epoch": 0.6880214463673731, "grad_norm": 0.43618252873420715, "learning_rate": 4.430484254635548e-06, "loss": 0.2465, "step": 37060 }, { "epoch": 0.6880585765047916, "grad_norm": 0.45950615406036377, "learning_rate": 4.429515469579759e-06, "loss": 0.3313, "step": 37062 }, { "epoch": 0.6880957066422103, "grad_norm": 0.6889962553977966, "learning_rate": 4.428546760321584e-06, "loss": 0.3683, "step": 37064 }, { "epoch": 0.6881328367796289, "grad_norm": 0.4205383360385895, "learning_rate": 4.427578126874193e-06, "loss": 0.3777, "step": 37066 }, { "epoch": 0.6881699669170476, "grad_norm": 0.32074326276779175, "learning_rate": 4.426609569250774e-06, "loss": 0.4353, "step": 37068 }, { "epoch": 0.6882070970544661, "grad_norm": 0.2948293387889862, "learning_rate": 4.425641087464504e-06, "loss": 0.3363, "step": 37070 }, { "epoch": 0.6882442271918848, "grad_norm": 0.25266048312187195, "learning_rate": 4.424672681528563e-06, "loss": 0.2227, "step": 37072 }, { "epoch": 0.6882813573293035, "grad_norm": 0.47830379009246826, "learning_rate": 4.4237043514561254e-06, "loss": 0.2452, "step": 37074 }, { "epoch": 0.6883184874667221, "grad_norm": 0.3104912042617798, "learning_rate": 4.422736097260372e-06, "loss": 0.2647, "step": 37076 }, { "epoch": 0.6883556176041408, "grad_norm": 0.400849312543869, "learning_rate": 4.42176791895447e-06, "loss": 0.2601, "step": 37078 }, { "epoch": 0.6883927477415593, "grad_norm": 0.3141663670539856, "learning_rate": 4.420799816551604e-06, "loss": 0.2259, "step": 37080 }, { "epoch": 0.688429877878978, "grad_norm": 0.3414446711540222, "learning_rate": 4.419831790064936e-06, "loss": 0.095, "step": 37082 }, { "epoch": 0.6884670080163967, "grad_norm": 0.4722117483615875, "learning_rate": 4.418863839507642e-06, "loss": 0.2639, "step": 37084 }, { "epoch": 0.6885041381538153, "grad_norm": 0.29620495438575745, "learning_rate": 4.417895964892895e-06, "loss": 0.335, "step": 37086 }, { "epoch": 0.688541268291234, "grad_norm": 0.6854696273803711, "learning_rate": 4.416928166233862e-06, "loss": 0.1591, "step": 37088 }, { "epoch": 0.6885783984286525, "grad_norm": 0.40646234154701233, "learning_rate": 4.415960443543718e-06, "loss": 0.4555, "step": 37090 }, { "epoch": 0.6886155285660712, "grad_norm": 0.32580870389938354, "learning_rate": 4.414992796835624e-06, "loss": 0.158, "step": 37092 }, { "epoch": 0.6886526587034899, "grad_norm": 0.25465288758277893, "learning_rate": 4.414025226122748e-06, "loss": 0.3046, "step": 37094 }, { "epoch": 0.6886897888409085, "grad_norm": 0.6603150963783264, "learning_rate": 4.413057731418257e-06, "loss": 0.1845, "step": 37096 }, { "epoch": 0.6887269189783272, "grad_norm": 0.24706041812896729, "learning_rate": 4.4120903127353145e-06, "loss": 0.3081, "step": 37098 }, { "epoch": 0.6887640491157457, "grad_norm": 0.2862301170825958, "learning_rate": 4.411122970087089e-06, "loss": 0.1914, "step": 37100 }, { "epoch": 0.6888011792531644, "grad_norm": 0.47546929121017456, "learning_rate": 4.410155703486736e-06, "loss": 0.3477, "step": 37102 }, { "epoch": 0.6888383093905831, "grad_norm": 0.36997124552726746, "learning_rate": 4.409188512947423e-06, "loss": 0.5391, "step": 37104 }, { "epoch": 0.6888754395280017, "grad_norm": 0.2447068989276886, "learning_rate": 4.408221398482305e-06, "loss": 0.235, "step": 37106 }, { "epoch": 0.6889125696654204, "grad_norm": 0.2989845871925354, "learning_rate": 4.407254360104545e-06, "loss": 0.1913, "step": 37108 }, { "epoch": 0.6889496998028389, "grad_norm": 0.45974108576774597, "learning_rate": 4.4062873978273e-06, "loss": 0.3624, "step": 37110 }, { "epoch": 0.6889868299402576, "grad_norm": 0.3065503239631653, "learning_rate": 4.405320511663729e-06, "loss": 0.2184, "step": 37112 }, { "epoch": 0.6890239600776763, "grad_norm": 0.36659955978393555, "learning_rate": 4.404353701626991e-06, "loss": 0.0586, "step": 37114 }, { "epoch": 0.6890610902150949, "grad_norm": 0.536537766456604, "learning_rate": 4.4033869677302355e-06, "loss": 0.3229, "step": 37116 }, { "epoch": 0.6890982203525136, "grad_norm": 0.689934253692627, "learning_rate": 4.402420309986619e-06, "loss": 0.2022, "step": 37118 }, { "epoch": 0.6891353504899321, "grad_norm": 0.29262688755989075, "learning_rate": 4.401453728409295e-06, "loss": 0.1958, "step": 37120 }, { "epoch": 0.6891724806273508, "grad_norm": 0.3789560794830322, "learning_rate": 4.40048722301142e-06, "loss": 0.418, "step": 37122 }, { "epoch": 0.6892096107647694, "grad_norm": 0.26866623759269714, "learning_rate": 4.399520793806138e-06, "loss": 0.1477, "step": 37124 }, { "epoch": 0.6892467409021881, "grad_norm": 0.2915765941143036, "learning_rate": 4.398554440806603e-06, "loss": 0.2777, "step": 37126 }, { "epoch": 0.6892838710396068, "grad_norm": 0.527785062789917, "learning_rate": 4.3975881640259665e-06, "loss": 0.1827, "step": 37128 }, { "epoch": 0.6893210011770253, "grad_norm": 0.336327463388443, "learning_rate": 4.396621963477371e-06, "loss": 0.3295, "step": 37130 }, { "epoch": 0.689358131314444, "grad_norm": 0.2956906259059906, "learning_rate": 4.395655839173967e-06, "loss": 0.289, "step": 37132 }, { "epoch": 0.6893952614518626, "grad_norm": 0.33871522545814514, "learning_rate": 4.3946897911288985e-06, "loss": 0.4447, "step": 37134 }, { "epoch": 0.6894323915892813, "grad_norm": 0.5412474870681763, "learning_rate": 4.3937238193553135e-06, "loss": 0.1601, "step": 37136 }, { "epoch": 0.6894695217267, "grad_norm": 0.557263195514679, "learning_rate": 4.392757923866354e-06, "loss": 0.2278, "step": 37138 }, { "epoch": 0.6895066518641185, "grad_norm": 0.4089566171169281, "learning_rate": 4.391792104675167e-06, "loss": 0.382, "step": 37140 }, { "epoch": 0.6895437820015372, "grad_norm": 0.4293087422847748, "learning_rate": 4.390826361794888e-06, "loss": 0.2292, "step": 37142 }, { "epoch": 0.6895809121389558, "grad_norm": 0.4252699911594391, "learning_rate": 4.389860695238664e-06, "loss": 0.2873, "step": 37144 }, { "epoch": 0.6896180422763745, "grad_norm": 0.2242163121700287, "learning_rate": 4.388895105019627e-06, "loss": 0.1375, "step": 37146 }, { "epoch": 0.6896551724137931, "grad_norm": 0.6254360675811768, "learning_rate": 4.387929591150922e-06, "loss": 0.2736, "step": 37148 }, { "epoch": 0.6896923025512117, "grad_norm": 0.3510236442089081, "learning_rate": 4.386964153645684e-06, "loss": 0.2894, "step": 37150 }, { "epoch": 0.6897294326886304, "grad_norm": 0.48587706685066223, "learning_rate": 4.385998792517056e-06, "loss": 0.3058, "step": 37152 }, { "epoch": 0.689766562826049, "grad_norm": 0.45913079380989075, "learning_rate": 4.385033507778163e-06, "loss": 0.2292, "step": 37154 }, { "epoch": 0.6898036929634677, "grad_norm": 0.33906930685043335, "learning_rate": 4.3840682994421465e-06, "loss": 0.1824, "step": 37156 }, { "epoch": 0.6898408231008863, "grad_norm": 0.41159459948539734, "learning_rate": 4.383103167522138e-06, "loss": 0.346, "step": 37158 }, { "epoch": 0.6898779532383049, "grad_norm": 0.4278871417045593, "learning_rate": 4.382138112031271e-06, "loss": 0.2318, "step": 37160 }, { "epoch": 0.6899150833757236, "grad_norm": 0.39938104152679443, "learning_rate": 4.381173132982678e-06, "loss": 0.3812, "step": 37162 }, { "epoch": 0.6899522135131422, "grad_norm": 0.3679214119911194, "learning_rate": 4.380208230389491e-06, "loss": 0.0884, "step": 37164 }, { "epoch": 0.6899893436505609, "grad_norm": 0.35381996631622314, "learning_rate": 4.3792434042648365e-06, "loss": 0.2069, "step": 37166 }, { "epoch": 0.6900264737879795, "grad_norm": 0.24392426013946533, "learning_rate": 4.378278654621841e-06, "loss": 0.1493, "step": 37168 }, { "epoch": 0.6900636039253981, "grad_norm": 0.4757721722126007, "learning_rate": 4.377313981473632e-06, "loss": 0.0844, "step": 37170 }, { "epoch": 0.6901007340628168, "grad_norm": 0.4118558168411255, "learning_rate": 4.3763493848333385e-06, "loss": 0.4053, "step": 37172 }, { "epoch": 0.6901378642002354, "grad_norm": 0.24378761649131775, "learning_rate": 4.375384864714085e-06, "loss": 0.3587, "step": 37174 }, { "epoch": 0.6901749943376541, "grad_norm": 0.46597692370414734, "learning_rate": 4.374420421128997e-06, "loss": 0.2149, "step": 37176 }, { "epoch": 0.6902121244750726, "grad_norm": 0.5553494095802307, "learning_rate": 4.373456054091199e-06, "loss": 0.1589, "step": 37178 }, { "epoch": 0.6902492546124913, "grad_norm": 0.2945960760116577, "learning_rate": 4.372491763613807e-06, "loss": 0.3541, "step": 37180 }, { "epoch": 0.69028638474991, "grad_norm": 0.40553995966911316, "learning_rate": 4.371527549709945e-06, "loss": 0.3902, "step": 37182 }, { "epoch": 0.6903235148873286, "grad_norm": 0.35749179124832153, "learning_rate": 4.370563412392735e-06, "loss": 0.1027, "step": 37184 }, { "epoch": 0.6903606450247473, "grad_norm": 0.5515786409378052, "learning_rate": 4.369599351675299e-06, "loss": 0.1691, "step": 37186 }, { "epoch": 0.6903977751621658, "grad_norm": 0.5842152833938599, "learning_rate": 4.368635367570746e-06, "loss": 0.3875, "step": 37188 }, { "epoch": 0.6904349052995845, "grad_norm": 0.41106826066970825, "learning_rate": 4.367671460092202e-06, "loss": 0.2702, "step": 37190 }, { "epoch": 0.6904720354370032, "grad_norm": 0.33555296063423157, "learning_rate": 4.366707629252775e-06, "loss": 0.2354, "step": 37192 }, { "epoch": 0.6905091655744218, "grad_norm": 0.2797924876213074, "learning_rate": 4.365743875065584e-06, "loss": 0.1641, "step": 37194 }, { "epoch": 0.6905462957118405, "grad_norm": 0.6033901572227478, "learning_rate": 4.364780197543741e-06, "loss": 0.3995, "step": 37196 }, { "epoch": 0.690583425849259, "grad_norm": 0.5848039984703064, "learning_rate": 4.363816596700361e-06, "loss": 0.3406, "step": 37198 }, { "epoch": 0.6906205559866777, "grad_norm": 0.5522786974906921, "learning_rate": 4.362853072548554e-06, "loss": 0.2111, "step": 37200 }, { "epoch": 0.6906576861240964, "grad_norm": 0.5165181159973145, "learning_rate": 4.361889625101435e-06, "loss": 0.2292, "step": 37202 }, { "epoch": 0.690694816261515, "grad_norm": 0.44205108284950256, "learning_rate": 4.360926254372107e-06, "loss": 0.2782, "step": 37204 }, { "epoch": 0.6907319463989336, "grad_norm": 0.29006579518318176, "learning_rate": 4.359962960373682e-06, "loss": 0.2812, "step": 37206 }, { "epoch": 0.6907690765363522, "grad_norm": 0.46617984771728516, "learning_rate": 4.358999743119271e-06, "loss": 0.2751, "step": 37208 }, { "epoch": 0.6908062066737709, "grad_norm": 0.5370455980300903, "learning_rate": 4.358036602621974e-06, "loss": 0.3863, "step": 37210 }, { "epoch": 0.6908433368111896, "grad_norm": 0.6497635841369629, "learning_rate": 4.357073538894897e-06, "loss": 0.2368, "step": 37212 }, { "epoch": 0.6908804669486082, "grad_norm": 0.5513054728507996, "learning_rate": 4.356110551951148e-06, "loss": 0.1863, "step": 37214 }, { "epoch": 0.6909175970860268, "grad_norm": 0.4180803596973419, "learning_rate": 4.355147641803832e-06, "loss": 0.3554, "step": 37216 }, { "epoch": 0.6909547272234454, "grad_norm": 0.41346290707588196, "learning_rate": 4.354184808466046e-06, "loss": 0.2747, "step": 37218 }, { "epoch": 0.6909918573608641, "grad_norm": 0.3466801643371582, "learning_rate": 4.353222051950892e-06, "loss": 0.1473, "step": 37220 }, { "epoch": 0.6910289874982827, "grad_norm": 0.29186904430389404, "learning_rate": 4.352259372271473e-06, "loss": 0.2756, "step": 37222 }, { "epoch": 0.6910661176357014, "grad_norm": 0.39038529992103577, "learning_rate": 4.351296769440885e-06, "loss": 0.409, "step": 37224 }, { "epoch": 0.69110324777312, "grad_norm": 0.48075252771377563, "learning_rate": 4.350334243472233e-06, "loss": 0.1707, "step": 37226 }, { "epoch": 0.6911403779105386, "grad_norm": 0.43311798572540283, "learning_rate": 4.349371794378606e-06, "loss": 0.1274, "step": 37228 }, { "epoch": 0.6911775080479573, "grad_norm": 0.3471451997756958, "learning_rate": 4.348409422173107e-06, "loss": 0.2148, "step": 37230 }, { "epoch": 0.6912146381853759, "grad_norm": 2.039639949798584, "learning_rate": 4.347447126868822e-06, "loss": 0.1433, "step": 37232 }, { "epoch": 0.6912517683227946, "grad_norm": 0.42357128858566284, "learning_rate": 4.346484908478852e-06, "loss": 0.2106, "step": 37234 }, { "epoch": 0.6912888984602132, "grad_norm": 0.4580634832382202, "learning_rate": 4.345522767016288e-06, "loss": 0.3317, "step": 37236 }, { "epoch": 0.6913260285976318, "grad_norm": 0.2992669939994812, "learning_rate": 4.3445607024942215e-06, "loss": 0.3064, "step": 37238 }, { "epoch": 0.6913631587350505, "grad_norm": 0.42594772577285767, "learning_rate": 4.343598714925747e-06, "loss": 0.1817, "step": 37240 }, { "epoch": 0.6914002888724691, "grad_norm": 0.42388495802879333, "learning_rate": 4.342636804323948e-06, "loss": 0.2835, "step": 37242 }, { "epoch": 0.6914374190098878, "grad_norm": 0.2760024666786194, "learning_rate": 4.3416749707019175e-06, "loss": 0.3488, "step": 37244 }, { "epoch": 0.6914745491473064, "grad_norm": 0.4250372648239136, "learning_rate": 4.340713214072741e-06, "loss": 0.4974, "step": 37246 }, { "epoch": 0.691511679284725, "grad_norm": 0.9084572196006775, "learning_rate": 4.339751534449505e-06, "loss": 0.1477, "step": 37248 }, { "epoch": 0.6915488094221437, "grad_norm": 0.47688087821006775, "learning_rate": 4.338789931845302e-06, "loss": 0.1786, "step": 37250 }, { "epoch": 0.6915859395595623, "grad_norm": 0.3504839241504669, "learning_rate": 4.3378284062732055e-06, "loss": 0.2847, "step": 37252 }, { "epoch": 0.691623069696981, "grad_norm": 0.3641328513622284, "learning_rate": 4.336866957746308e-06, "loss": 0.2366, "step": 37254 }, { "epoch": 0.6916601998343996, "grad_norm": 0.43362095952033997, "learning_rate": 4.335905586277685e-06, "loss": 0.3013, "step": 37256 }, { "epoch": 0.6916973299718182, "grad_norm": 0.21120557188987732, "learning_rate": 4.334944291880422e-06, "loss": 0.1342, "step": 37258 }, { "epoch": 0.6917344601092369, "grad_norm": 0.5634955763816833, "learning_rate": 4.333983074567597e-06, "loss": 0.1717, "step": 37260 }, { "epoch": 0.6917715902466555, "grad_norm": 0.46009761095046997, "learning_rate": 4.333021934352292e-06, "loss": 0.4335, "step": 37262 }, { "epoch": 0.6918087203840741, "grad_norm": 2.8798201084136963, "learning_rate": 4.332060871247582e-06, "loss": 0.1577, "step": 37264 }, { "epoch": 0.6918458505214928, "grad_norm": 0.40647372603416443, "learning_rate": 4.3310998852665505e-06, "loss": 0.2408, "step": 37266 }, { "epoch": 0.6918829806589114, "grad_norm": 0.4678947627544403, "learning_rate": 4.330138976422266e-06, "loss": 0.3207, "step": 37268 }, { "epoch": 0.6919201107963301, "grad_norm": 0.3865966200828552, "learning_rate": 4.329178144727807e-06, "loss": 0.4162, "step": 37270 }, { "epoch": 0.6919572409337487, "grad_norm": 0.3192349970340729, "learning_rate": 4.32821739019625e-06, "loss": 0.2819, "step": 37272 }, { "epoch": 0.6919943710711673, "grad_norm": 0.32833749055862427, "learning_rate": 4.327256712840663e-06, "loss": 0.2574, "step": 37274 }, { "epoch": 0.6920315012085859, "grad_norm": 0.5903516411781311, "learning_rate": 4.3262961126741195e-06, "loss": 0.3459, "step": 37276 }, { "epoch": 0.6920686313460046, "grad_norm": 0.2598225176334381, "learning_rate": 4.325335589709695e-06, "loss": 0.301, "step": 37278 }, { "epoch": 0.6921057614834233, "grad_norm": 0.5802574157714844, "learning_rate": 4.324375143960452e-06, "loss": 0.2321, "step": 37280 }, { "epoch": 0.6921428916208419, "grad_norm": 0.36529380083084106, "learning_rate": 4.323414775439463e-06, "loss": 0.3606, "step": 37282 }, { "epoch": 0.6921800217582605, "grad_norm": 0.34091514348983765, "learning_rate": 4.322454484159795e-06, "loss": 0.2104, "step": 37284 }, { "epoch": 0.6922171518956791, "grad_norm": 0.3278229534626007, "learning_rate": 4.321494270134515e-06, "loss": 0.2301, "step": 37286 }, { "epoch": 0.6922542820330978, "grad_norm": 0.4791397154331207, "learning_rate": 4.3205341333766895e-06, "loss": 0.4197, "step": 37288 }, { "epoch": 0.6922914121705165, "grad_norm": 0.5173566937446594, "learning_rate": 4.319574073899386e-06, "loss": 0.1477, "step": 37290 }, { "epoch": 0.6923285423079351, "grad_norm": 0.5060773491859436, "learning_rate": 4.3186140917156615e-06, "loss": 0.4774, "step": 37292 }, { "epoch": 0.6923656724453537, "grad_norm": 0.39532729983329773, "learning_rate": 4.3176541868385844e-06, "loss": 0.2268, "step": 37294 }, { "epoch": 0.6924028025827723, "grad_norm": 0.2902069389820099, "learning_rate": 4.316694359281209e-06, "loss": 0.145, "step": 37296 }, { "epoch": 0.692439932720191, "grad_norm": 0.4081248641014099, "learning_rate": 4.315734609056601e-06, "loss": 0.1907, "step": 37298 }, { "epoch": 0.6924770628576097, "grad_norm": 0.3958992660045624, "learning_rate": 4.314774936177818e-06, "loss": 0.3141, "step": 37300 }, { "epoch": 0.6925141929950283, "grad_norm": 0.3282458484172821, "learning_rate": 4.3138153406579196e-06, "loss": 0.3317, "step": 37302 }, { "epoch": 0.6925513231324469, "grad_norm": 0.5511623620986938, "learning_rate": 4.3128558225099645e-06, "loss": 0.2571, "step": 37304 }, { "epoch": 0.6925884532698655, "grad_norm": 0.3292279541492462, "learning_rate": 4.311896381747005e-06, "loss": 0.2361, "step": 37306 }, { "epoch": 0.6926255834072842, "grad_norm": 0.3538127541542053, "learning_rate": 4.3109370183820965e-06, "loss": 0.178, "step": 37308 }, { "epoch": 0.6926627135447029, "grad_norm": 0.4217687249183655, "learning_rate": 4.309977732428296e-06, "loss": 0.3557, "step": 37310 }, { "epoch": 0.6926998436821215, "grad_norm": 0.3295333683490753, "learning_rate": 4.309018523898657e-06, "loss": 0.3609, "step": 37312 }, { "epoch": 0.6927369738195401, "grad_norm": 0.47251802682876587, "learning_rate": 4.308059392806227e-06, "loss": 0.2639, "step": 37314 }, { "epoch": 0.6927741039569587, "grad_norm": 0.34967535734176636, "learning_rate": 4.307100339164061e-06, "loss": 0.3354, "step": 37316 }, { "epoch": 0.6928112340943774, "grad_norm": 0.5792964100837708, "learning_rate": 4.306141362985205e-06, "loss": 0.2415, "step": 37318 }, { "epoch": 0.6928483642317961, "grad_norm": 0.3297637104988098, "learning_rate": 4.30518246428271e-06, "loss": 0.2382, "step": 37320 }, { "epoch": 0.6928854943692146, "grad_norm": 0.435459703207016, "learning_rate": 4.304223643069624e-06, "loss": 0.1386, "step": 37322 }, { "epoch": 0.6929226245066333, "grad_norm": 0.3952932357788086, "learning_rate": 4.303264899358992e-06, "loss": 0.4376, "step": 37324 }, { "epoch": 0.6929597546440519, "grad_norm": 0.377327561378479, "learning_rate": 4.302306233163862e-06, "loss": 0.2409, "step": 37326 }, { "epoch": 0.6929968847814706, "grad_norm": 0.3206210434436798, "learning_rate": 4.3013476444972814e-06, "loss": 0.4144, "step": 37328 }, { "epoch": 0.6930340149188892, "grad_norm": 0.29780298471450806, "learning_rate": 4.300389133372286e-06, "loss": 0.2489, "step": 37330 }, { "epoch": 0.6930711450563078, "grad_norm": 0.26677048206329346, "learning_rate": 4.299430699801922e-06, "loss": 0.3057, "step": 37332 }, { "epoch": 0.6931082751937265, "grad_norm": 0.43046334385871887, "learning_rate": 4.298472343799235e-06, "loss": 0.1996, "step": 37334 }, { "epoch": 0.6931454053311451, "grad_norm": 0.4612695574760437, "learning_rate": 4.297514065377257e-06, "loss": 0.3163, "step": 37336 }, { "epoch": 0.6931825354685638, "grad_norm": 0.3922799825668335, "learning_rate": 4.296555864549032e-06, "loss": 0.2494, "step": 37338 }, { "epoch": 0.6932196656059824, "grad_norm": 0.3093006908893585, "learning_rate": 4.295597741327599e-06, "loss": 0.2086, "step": 37340 }, { "epoch": 0.693256795743401, "grad_norm": 0.44677606225013733, "learning_rate": 4.294639695725996e-06, "loss": 0.2079, "step": 37342 }, { "epoch": 0.6932939258808197, "grad_norm": 0.3983103930950165, "learning_rate": 4.2936817277572555e-06, "loss": 0.3056, "step": 37344 }, { "epoch": 0.6933310560182383, "grad_norm": 0.2540654242038727, "learning_rate": 4.292723837434413e-06, "loss": 0.3015, "step": 37346 }, { "epoch": 0.693368186155657, "grad_norm": 0.18011514842510223, "learning_rate": 4.291766024770503e-06, "loss": 0.2771, "step": 37348 }, { "epoch": 0.6934053162930756, "grad_norm": 0.5991212129592896, "learning_rate": 4.290808289778561e-06, "loss": 0.2694, "step": 37350 }, { "epoch": 0.6934424464304942, "grad_norm": 0.2610912024974823, "learning_rate": 4.289850632471616e-06, "loss": 0.2559, "step": 37352 }, { "epoch": 0.6934795765679129, "grad_norm": 0.4103946089744568, "learning_rate": 4.288893052862703e-06, "loss": 0.2397, "step": 37354 }, { "epoch": 0.6935167067053315, "grad_norm": 0.5175892114639282, "learning_rate": 4.287935550964846e-06, "loss": 0.2825, "step": 37356 }, { "epoch": 0.6935538368427502, "grad_norm": 0.43431276082992554, "learning_rate": 4.286978126791082e-06, "loss": 0.2989, "step": 37358 }, { "epoch": 0.6935909669801688, "grad_norm": 0.3124162256717682, "learning_rate": 4.286020780354427e-06, "loss": 0.1383, "step": 37360 }, { "epoch": 0.6936280971175874, "grad_norm": 0.2724204957485199, "learning_rate": 4.285063511667915e-06, "loss": 0.2985, "step": 37362 }, { "epoch": 0.6936652272550061, "grad_norm": 0.47156503796577454, "learning_rate": 4.28410632074457e-06, "loss": 0.5, "step": 37364 }, { "epoch": 0.6937023573924247, "grad_norm": 0.3313979208469391, "learning_rate": 4.283149207597422e-06, "loss": 0.1364, "step": 37366 }, { "epoch": 0.6937394875298434, "grad_norm": 0.46410733461380005, "learning_rate": 4.2821921722394836e-06, "loss": 0.3799, "step": 37368 }, { "epoch": 0.693776617667262, "grad_norm": 0.4512125849723816, "learning_rate": 4.281235214683783e-06, "loss": 0.1625, "step": 37370 }, { "epoch": 0.6938137478046806, "grad_norm": 0.33146151900291443, "learning_rate": 4.280278334943343e-06, "loss": 0.235, "step": 37372 }, { "epoch": 0.6938508779420992, "grad_norm": 0.39799147844314575, "learning_rate": 4.279321533031181e-06, "loss": 0.4697, "step": 37374 }, { "epoch": 0.6938880080795179, "grad_norm": 0.473401814699173, "learning_rate": 4.278364808960322e-06, "loss": 0.2879, "step": 37376 }, { "epoch": 0.6939251382169366, "grad_norm": 0.42851999402046204, "learning_rate": 4.277408162743776e-06, "loss": 0.0827, "step": 37378 }, { "epoch": 0.6939622683543551, "grad_norm": 0.3129371404647827, "learning_rate": 4.276451594394566e-06, "loss": 0.4156, "step": 37380 }, { "epoch": 0.6939993984917738, "grad_norm": 0.26752379536628723, "learning_rate": 4.275495103925704e-06, "loss": 0.2589, "step": 37382 }, { "epoch": 0.6940365286291924, "grad_norm": 0.404712975025177, "learning_rate": 4.274538691350205e-06, "loss": 0.2574, "step": 37384 }, { "epoch": 0.6940736587666111, "grad_norm": 0.2926155626773834, "learning_rate": 4.2735823566810865e-06, "loss": 0.1791, "step": 37386 }, { "epoch": 0.6941107889040298, "grad_norm": 0.34501489996910095, "learning_rate": 4.272626099931359e-06, "loss": 0.1423, "step": 37388 }, { "epoch": 0.6941479190414483, "grad_norm": 0.31262749433517456, "learning_rate": 4.271669921114033e-06, "loss": 0.294, "step": 37390 }, { "epoch": 0.694185049178867, "grad_norm": 0.2422153502702713, "learning_rate": 4.270713820242127e-06, "loss": 0.3373, "step": 37392 }, { "epoch": 0.6942221793162856, "grad_norm": 0.3597490191459656, "learning_rate": 4.26975779732864e-06, "loss": 0.335, "step": 37394 }, { "epoch": 0.6942593094537043, "grad_norm": 0.4977995455265045, "learning_rate": 4.268801852386586e-06, "loss": 0.1368, "step": 37396 }, { "epoch": 0.694296439591123, "grad_norm": 1.1158075332641602, "learning_rate": 4.267845985428974e-06, "loss": 0.3053, "step": 37398 }, { "epoch": 0.6943335697285415, "grad_norm": 0.612968385219574, "learning_rate": 4.266890196468806e-06, "loss": 0.2965, "step": 37400 }, { "epoch": 0.6943706998659602, "grad_norm": 0.3525151014328003, "learning_rate": 4.2659344855190895e-06, "loss": 0.3093, "step": 37402 }, { "epoch": 0.6944078300033788, "grad_norm": 0.446323424577713, "learning_rate": 4.264978852592834e-06, "loss": 0.3158, "step": 37404 }, { "epoch": 0.6944449601407975, "grad_norm": 0.3727822005748749, "learning_rate": 4.2640232977030326e-06, "loss": 0.4399, "step": 37406 }, { "epoch": 0.6944820902782162, "grad_norm": 0.2541791796684265, "learning_rate": 4.263067820862693e-06, "loss": 0.1576, "step": 37408 }, { "epoch": 0.6945192204156347, "grad_norm": 0.4023820459842682, "learning_rate": 4.2621124220848166e-06, "loss": 0.1899, "step": 37410 }, { "epoch": 0.6945563505530534, "grad_norm": 0.4519880712032318, "learning_rate": 4.2611571013824024e-06, "loss": 0.3036, "step": 37412 }, { "epoch": 0.694593480690472, "grad_norm": 0.2853188216686249, "learning_rate": 4.260201858768451e-06, "loss": 0.3678, "step": 37414 }, { "epoch": 0.6946306108278907, "grad_norm": 0.4123152196407318, "learning_rate": 4.259246694255961e-06, "loss": 0.2446, "step": 37416 }, { "epoch": 0.6946677409653094, "grad_norm": 0.5391072034835815, "learning_rate": 4.258291607857929e-06, "loss": 0.3781, "step": 37418 }, { "epoch": 0.6947048711027279, "grad_norm": 0.3825768828392029, "learning_rate": 4.2573365995873515e-06, "loss": 0.2631, "step": 37420 }, { "epoch": 0.6947420012401466, "grad_norm": 0.350849986076355, "learning_rate": 4.256381669457218e-06, "loss": 0.2957, "step": 37422 }, { "epoch": 0.6947791313775652, "grad_norm": 0.3619406223297119, "learning_rate": 4.2554268174805245e-06, "loss": 0.2819, "step": 37424 }, { "epoch": 0.6948162615149839, "grad_norm": 0.3528803884983063, "learning_rate": 4.2544720436702665e-06, "loss": 0.3143, "step": 37426 }, { "epoch": 0.6948533916524025, "grad_norm": 0.2968522608280182, "learning_rate": 4.2535173480394335e-06, "loss": 0.1522, "step": 37428 }, { "epoch": 0.6948905217898211, "grad_norm": 0.7459464073181152, "learning_rate": 4.25256273060102e-06, "loss": 0.4226, "step": 37430 }, { "epoch": 0.6949276519272398, "grad_norm": 0.368559330701828, "learning_rate": 4.2516081913680085e-06, "loss": 0.1973, "step": 37432 }, { "epoch": 0.6949647820646584, "grad_norm": 0.39647242426872253, "learning_rate": 4.250653730353393e-06, "loss": 0.3448, "step": 37434 }, { "epoch": 0.6950019122020771, "grad_norm": 0.6954098343849182, "learning_rate": 4.249699347570156e-06, "loss": 0.4064, "step": 37436 }, { "epoch": 0.6950390423394956, "grad_norm": 0.5225175619125366, "learning_rate": 4.248745043031288e-06, "loss": 0.2281, "step": 37438 }, { "epoch": 0.6950761724769143, "grad_norm": 0.23723793029785156, "learning_rate": 4.247790816749777e-06, "loss": 0.3442, "step": 37440 }, { "epoch": 0.695113302614333, "grad_norm": 0.48138993978500366, "learning_rate": 4.246836668738603e-06, "loss": 0.1428, "step": 37442 }, { "epoch": 0.6951504327517516, "grad_norm": 0.6554580926895142, "learning_rate": 4.245882599010746e-06, "loss": 0.3525, "step": 37444 }, { "epoch": 0.6951875628891703, "grad_norm": 0.3643503785133362, "learning_rate": 4.244928607579191e-06, "loss": 0.3659, "step": 37446 }, { "epoch": 0.6952246930265888, "grad_norm": 0.6216050386428833, "learning_rate": 4.243974694456919e-06, "loss": 0.381, "step": 37448 }, { "epoch": 0.6952618231640075, "grad_norm": 0.49983587861061096, "learning_rate": 4.243020859656909e-06, "loss": 0.4454, "step": 37450 }, { "epoch": 0.6952989533014262, "grad_norm": 0.3337538242340088, "learning_rate": 4.242067103192142e-06, "loss": 0.0458, "step": 37452 }, { "epoch": 0.6953360834388448, "grad_norm": 0.38248777389526367, "learning_rate": 4.241113425075598e-06, "loss": 0.1986, "step": 37454 }, { "epoch": 0.6953732135762635, "grad_norm": 0.2901327311992645, "learning_rate": 4.240159825320247e-06, "loss": 0.2572, "step": 37456 }, { "epoch": 0.695410343713682, "grad_norm": 0.28926882147789, "learning_rate": 4.239206303939069e-06, "loss": 0.2684, "step": 37458 }, { "epoch": 0.6954474738511007, "grad_norm": 0.5033291578292847, "learning_rate": 4.238252860945037e-06, "loss": 0.4442, "step": 37460 }, { "epoch": 0.6954846039885194, "grad_norm": 0.632887601852417, "learning_rate": 4.237299496351128e-06, "loss": 0.2549, "step": 37462 }, { "epoch": 0.695521734125938, "grad_norm": 0.38352036476135254, "learning_rate": 4.236346210170309e-06, "loss": 0.4555, "step": 37464 }, { "epoch": 0.6955588642633567, "grad_norm": 0.46811455488204956, "learning_rate": 4.2353930024155535e-06, "loss": 0.4381, "step": 37466 }, { "epoch": 0.6955959944007752, "grad_norm": 0.38507992029190063, "learning_rate": 4.2344398730998344e-06, "loss": 0.307, "step": 37468 }, { "epoch": 0.6956331245381939, "grad_norm": 0.42499494552612305, "learning_rate": 4.233486822236117e-06, "loss": 0.2722, "step": 37470 }, { "epoch": 0.6956702546756126, "grad_norm": 0.47811153531074524, "learning_rate": 4.232533849837372e-06, "loss": 0.153, "step": 37472 }, { "epoch": 0.6957073848130312, "grad_norm": 0.4635964334011078, "learning_rate": 4.231580955916563e-06, "loss": 0.2245, "step": 37474 }, { "epoch": 0.6957445149504499, "grad_norm": 0.4068347215652466, "learning_rate": 4.230628140486661e-06, "loss": 0.3483, "step": 37476 }, { "epoch": 0.6957816450878684, "grad_norm": 0.42280450463294983, "learning_rate": 4.229675403560628e-06, "loss": 0.1681, "step": 37478 }, { "epoch": 0.6958187752252871, "grad_norm": 0.6900840401649475, "learning_rate": 4.228722745151432e-06, "loss": 0.2912, "step": 37480 }, { "epoch": 0.6958559053627057, "grad_norm": 0.5205751657485962, "learning_rate": 4.227770165272029e-06, "loss": 0.4093, "step": 37482 }, { "epoch": 0.6958930355001244, "grad_norm": 0.3506011366844177, "learning_rate": 4.226817663935389e-06, "loss": 0.3064, "step": 37484 }, { "epoch": 0.6959301656375431, "grad_norm": 0.29959818720817566, "learning_rate": 4.225865241154463e-06, "loss": 0.4677, "step": 37486 }, { "epoch": 0.6959672957749616, "grad_norm": 0.4022466242313385, "learning_rate": 4.224912896942216e-06, "loss": 0.1247, "step": 37488 }, { "epoch": 0.6960044259123803, "grad_norm": 0.3680751621723175, "learning_rate": 4.223960631311605e-06, "loss": 0.1713, "step": 37490 }, { "epoch": 0.6960415560497989, "grad_norm": 0.4107588827610016, "learning_rate": 4.223008444275594e-06, "loss": 0.3979, "step": 37492 }, { "epoch": 0.6960786861872176, "grad_norm": 0.2906644344329834, "learning_rate": 4.222056335847129e-06, "loss": 0.2652, "step": 37494 }, { "epoch": 0.6961158163246363, "grad_norm": 0.48166534304618835, "learning_rate": 4.221104306039171e-06, "loss": 0.2049, "step": 37496 }, { "epoch": 0.6961529464620548, "grad_norm": 0.34526917338371277, "learning_rate": 4.220152354864673e-06, "loss": 0.2235, "step": 37498 }, { "epoch": 0.6961900765994735, "grad_norm": 0.4836229085922241, "learning_rate": 4.21920048233659e-06, "loss": 0.3955, "step": 37500 }, { "epoch": 0.6962272067368921, "grad_norm": 0.26777270436286926, "learning_rate": 4.218248688467875e-06, "loss": 0.4313, "step": 37502 }, { "epoch": 0.6962643368743108, "grad_norm": 0.3902604281902313, "learning_rate": 4.217296973271474e-06, "loss": 0.3593, "step": 37504 }, { "epoch": 0.6963014670117295, "grad_norm": 0.36201363801956177, "learning_rate": 4.216345336760343e-06, "loss": 0.2395, "step": 37506 }, { "epoch": 0.696338597149148, "grad_norm": 0.38509005308151245, "learning_rate": 4.215393778947425e-06, "loss": 0.2041, "step": 37508 }, { "epoch": 0.6963757272865667, "grad_norm": 0.45131295919418335, "learning_rate": 4.214442299845671e-06, "loss": 0.3577, "step": 37510 }, { "epoch": 0.6964128574239853, "grad_norm": 0.37255147099494934, "learning_rate": 4.213490899468028e-06, "loss": 0.3707, "step": 37512 }, { "epoch": 0.696449987561404, "grad_norm": 0.29529792070388794, "learning_rate": 4.21253957782744e-06, "loss": 0.4726, "step": 37514 }, { "epoch": 0.6964871176988227, "grad_norm": 0.38325929641723633, "learning_rate": 4.211588334936854e-06, "loss": 0.3211, "step": 37516 }, { "epoch": 0.6965242478362412, "grad_norm": 0.27217838168144226, "learning_rate": 4.210637170809215e-06, "loss": 0.3198, "step": 37518 }, { "epoch": 0.6965613779736599, "grad_norm": 0.23869448900222778, "learning_rate": 4.20968608545746e-06, "loss": 0.3588, "step": 37520 }, { "epoch": 0.6965985081110785, "grad_norm": 0.4707834720611572, "learning_rate": 4.208735078894533e-06, "loss": 0.2953, "step": 37522 }, { "epoch": 0.6966356382484972, "grad_norm": 0.3063238859176636, "learning_rate": 4.207784151133376e-06, "loss": 0.1503, "step": 37524 }, { "epoch": 0.6966727683859157, "grad_norm": 0.342106431722641, "learning_rate": 4.206833302186929e-06, "loss": 0.1926, "step": 37526 }, { "epoch": 0.6967098985233344, "grad_norm": 0.4294862151145935, "learning_rate": 4.205882532068125e-06, "loss": 0.3563, "step": 37528 }, { "epoch": 0.6967470286607531, "grad_norm": 0.44247958064079285, "learning_rate": 4.204931840789909e-06, "loss": 0.3288, "step": 37530 }, { "epoch": 0.6967841587981717, "grad_norm": 0.6680423021316528, "learning_rate": 4.203981228365207e-06, "loss": 0.3553, "step": 37532 }, { "epoch": 0.6968212889355904, "grad_norm": 0.4630979895591736, "learning_rate": 4.203030694806961e-06, "loss": 0.2738, "step": 37534 }, { "epoch": 0.6968584190730089, "grad_norm": 0.4503055214881897, "learning_rate": 4.202080240128102e-06, "loss": 0.2674, "step": 37536 }, { "epoch": 0.6968955492104276, "grad_norm": 0.28805026412010193, "learning_rate": 4.201129864341565e-06, "loss": 0.2418, "step": 37538 }, { "epoch": 0.6969326793478463, "grad_norm": 0.408663809299469, "learning_rate": 4.20017956746028e-06, "loss": 0.3822, "step": 37540 }, { "epoch": 0.6969698094852649, "grad_norm": 0.4131579101085663, "learning_rate": 4.19922934949718e-06, "loss": 0.1813, "step": 37542 }, { "epoch": 0.6970069396226836, "grad_norm": 0.5442785024642944, "learning_rate": 4.198279210465197e-06, "loss": 0.3224, "step": 37544 }, { "epoch": 0.6970440697601021, "grad_norm": 0.31905093789100647, "learning_rate": 4.19732915037725e-06, "loss": 0.4477, "step": 37546 }, { "epoch": 0.6970811998975208, "grad_norm": 0.5105418562889099, "learning_rate": 4.1963791692462785e-06, "loss": 0.3242, "step": 37548 }, { "epoch": 0.6971183300349395, "grad_norm": 0.16466456651687622, "learning_rate": 4.195429267085198e-06, "loss": 0.1953, "step": 37550 }, { "epoch": 0.6971554601723581, "grad_norm": 0.4903988540172577, "learning_rate": 4.194479443906938e-06, "loss": 0.2386, "step": 37552 }, { "epoch": 0.6971925903097768, "grad_norm": 0.26322484016418457, "learning_rate": 4.193529699724423e-06, "loss": 0.296, "step": 37554 }, { "epoch": 0.6972297204471953, "grad_norm": 0.30767589807510376, "learning_rate": 4.19258003455058e-06, "loss": 0.2821, "step": 37556 }, { "epoch": 0.697266850584614, "grad_norm": 0.4158708155155182, "learning_rate": 4.191630448398325e-06, "loss": 0.1884, "step": 37558 }, { "epoch": 0.6973039807220327, "grad_norm": 0.3561565577983856, "learning_rate": 4.190680941280582e-06, "loss": 0.1984, "step": 37560 }, { "epoch": 0.6973411108594513, "grad_norm": 0.2912541329860687, "learning_rate": 4.189731513210269e-06, "loss": 0.3358, "step": 37562 }, { "epoch": 0.69737824099687, "grad_norm": 0.21195116639137268, "learning_rate": 4.188782164200306e-06, "loss": 0.3805, "step": 37564 }, { "epoch": 0.6974153711342885, "grad_norm": 0.5318635106086731, "learning_rate": 4.187832894263616e-06, "loss": 0.3543, "step": 37566 }, { "epoch": 0.6974525012717072, "grad_norm": 0.43506497144699097, "learning_rate": 4.186883703413106e-06, "loss": 0.0685, "step": 37568 }, { "epoch": 0.6974896314091259, "grad_norm": 0.4608241319656372, "learning_rate": 4.185934591661701e-06, "loss": 0.1482, "step": 37570 }, { "epoch": 0.6975267615465445, "grad_norm": 0.6568871736526489, "learning_rate": 4.184985559022308e-06, "loss": 0.2944, "step": 37572 }, { "epoch": 0.6975638916839632, "grad_norm": 0.43620240688323975, "learning_rate": 4.184036605507842e-06, "loss": 0.122, "step": 37574 }, { "epoch": 0.6976010218213817, "grad_norm": 0.43029579520225525, "learning_rate": 4.1830877311312176e-06, "loss": 0.3049, "step": 37576 }, { "epoch": 0.6976381519588004, "grad_norm": 0.35107287764549255, "learning_rate": 4.182138935905346e-06, "loss": 0.042, "step": 37578 }, { "epoch": 0.697675282096219, "grad_norm": 0.5365400314331055, "learning_rate": 4.18119021984314e-06, "loss": 0.1935, "step": 37580 }, { "epoch": 0.6977124122336377, "grad_norm": 0.265237957239151, "learning_rate": 4.180241582957503e-06, "loss": 0.3054, "step": 37582 }, { "epoch": 0.6977495423710564, "grad_norm": 0.3441588878631592, "learning_rate": 4.179293025261345e-06, "loss": 0.1817, "step": 37584 }, { "epoch": 0.6977866725084749, "grad_norm": 0.3767799735069275, "learning_rate": 4.178344546767574e-06, "loss": 0.3375, "step": 37586 }, { "epoch": 0.6978238026458936, "grad_norm": 0.3517741858959198, "learning_rate": 4.1773961474891e-06, "loss": 0.3532, "step": 37588 }, { "epoch": 0.6978609327833122, "grad_norm": 0.47272446751594543, "learning_rate": 4.1764478274388184e-06, "loss": 0.283, "step": 37590 }, { "epoch": 0.6978980629207309, "grad_norm": 0.38956931233406067, "learning_rate": 4.17549958662964e-06, "loss": 0.2559, "step": 37592 }, { "epoch": 0.6979351930581496, "grad_norm": 0.3741835951805115, "learning_rate": 4.174551425074469e-06, "loss": 0.1902, "step": 37594 }, { "epoch": 0.6979723231955681, "grad_norm": 0.20991989970207214, "learning_rate": 4.173603342786201e-06, "loss": 0.1295, "step": 37596 }, { "epoch": 0.6980094533329868, "grad_norm": 0.5151546597480774, "learning_rate": 4.172655339777738e-06, "loss": 0.1201, "step": 37598 }, { "epoch": 0.6980465834704054, "grad_norm": 0.5855364799499512, "learning_rate": 4.171707416061982e-06, "loss": 0.2507, "step": 37600 }, { "epoch": 0.6980837136078241, "grad_norm": 0.4099443554878235, "learning_rate": 4.17075957165183e-06, "loss": 0.2664, "step": 37602 }, { "epoch": 0.6981208437452427, "grad_norm": 0.3729911148548126, "learning_rate": 4.16981180656018e-06, "loss": 0.2503, "step": 37604 }, { "epoch": 0.6981579738826613, "grad_norm": 0.3064371943473816, "learning_rate": 4.168864120799931e-06, "loss": 0.2979, "step": 37606 }, { "epoch": 0.69819510402008, "grad_norm": 0.24513548612594604, "learning_rate": 4.167916514383972e-06, "loss": 0.1514, "step": 37608 }, { "epoch": 0.6982322341574986, "grad_norm": 0.4624926447868347, "learning_rate": 4.1669689873252026e-06, "loss": 0.3508, "step": 37610 }, { "epoch": 0.6982693642949173, "grad_norm": 0.413936972618103, "learning_rate": 4.1660215396365104e-06, "loss": 0.1642, "step": 37612 }, { "epoch": 0.698306494432336, "grad_norm": 0.5020574927330017, "learning_rate": 4.16507417133079e-06, "loss": 0.119, "step": 37614 }, { "epoch": 0.6983436245697545, "grad_norm": 0.30622127652168274, "learning_rate": 4.1641268824209326e-06, "loss": 0.3045, "step": 37616 }, { "epoch": 0.6983807547071732, "grad_norm": 0.24131809175014496, "learning_rate": 4.163179672919831e-06, "loss": 0.3791, "step": 37618 }, { "epoch": 0.6984178848445918, "grad_norm": 0.3619144856929779, "learning_rate": 4.162232542840369e-06, "loss": 0.227, "step": 37620 }, { "epoch": 0.6984550149820105, "grad_norm": 0.4356173276901245, "learning_rate": 4.161285492195434e-06, "loss": 0.3776, "step": 37622 }, { "epoch": 0.6984921451194291, "grad_norm": 0.34050577878952026, "learning_rate": 4.1603385209979155e-06, "loss": 0.3184, "step": 37624 }, { "epoch": 0.6985292752568477, "grad_norm": 0.43384894728660583, "learning_rate": 4.159391629260698e-06, "loss": 0.3066, "step": 37626 }, { "epoch": 0.6985664053942664, "grad_norm": 0.5321500897407532, "learning_rate": 4.158444816996665e-06, "loss": 0.3077, "step": 37628 }, { "epoch": 0.698603535531685, "grad_norm": 0.3725312352180481, "learning_rate": 4.157498084218704e-06, "loss": 0.3614, "step": 37630 }, { "epoch": 0.6986406656691037, "grad_norm": 0.3035672903060913, "learning_rate": 4.156551430939691e-06, "loss": 0.2768, "step": 37632 }, { "epoch": 0.6986777958065222, "grad_norm": 0.3667818605899811, "learning_rate": 4.1556048571725125e-06, "loss": 0.4527, "step": 37634 }, { "epoch": 0.6987149259439409, "grad_norm": 0.314712792634964, "learning_rate": 4.1546583629300445e-06, "loss": 0.271, "step": 37636 }, { "epoch": 0.6987520560813596, "grad_norm": 0.44323572516441345, "learning_rate": 4.153711948225165e-06, "loss": 0.2772, "step": 37638 }, { "epoch": 0.6987891862187782, "grad_norm": 0.2460128366947174, "learning_rate": 4.152765613070755e-06, "loss": 0.1947, "step": 37640 }, { "epoch": 0.6988263163561969, "grad_norm": 0.41295182704925537, "learning_rate": 4.1518193574796904e-06, "loss": 0.352, "step": 37642 }, { "epoch": 0.6988634464936154, "grad_norm": 0.4137716591358185, "learning_rate": 4.15087318146485e-06, "loss": 0.1422, "step": 37644 }, { "epoch": 0.6989005766310341, "grad_norm": 0.31835871934890747, "learning_rate": 4.149927085039104e-06, "loss": 0.1627, "step": 37646 }, { "epoch": 0.6989377067684528, "grad_norm": 0.5508441925048828, "learning_rate": 4.148981068215325e-06, "loss": 0.3109, "step": 37648 }, { "epoch": 0.6989748369058714, "grad_norm": 0.32172903418540955, "learning_rate": 4.14803513100639e-06, "loss": 0.184, "step": 37650 }, { "epoch": 0.69901196704329, "grad_norm": 0.140688955783844, "learning_rate": 4.14708927342517e-06, "loss": 0.2795, "step": 37652 }, { "epoch": 0.6990490971807086, "grad_norm": 0.2567431330680847, "learning_rate": 4.146143495484531e-06, "loss": 0.3537, "step": 37654 }, { "epoch": 0.6990862273181273, "grad_norm": 0.3427571952342987, "learning_rate": 4.145197797197347e-06, "loss": 0.2126, "step": 37656 }, { "epoch": 0.699123357455546, "grad_norm": 0.29616907238960266, "learning_rate": 4.144252178576481e-06, "loss": 0.0955, "step": 37658 }, { "epoch": 0.6991604875929646, "grad_norm": 0.4767168462276459, "learning_rate": 4.143306639634804e-06, "loss": 0.4462, "step": 37660 }, { "epoch": 0.6991976177303832, "grad_norm": 0.3542271554470062, "learning_rate": 4.142361180385178e-06, "loss": 0.4139, "step": 37662 }, { "epoch": 0.6992347478678018, "grad_norm": 0.346851646900177, "learning_rate": 4.141415800840472e-06, "loss": 0.3809, "step": 37664 }, { "epoch": 0.6992718780052205, "grad_norm": 0.43628594279289246, "learning_rate": 4.140470501013549e-06, "loss": 0.3494, "step": 37666 }, { "epoch": 0.6993090081426392, "grad_norm": 0.41150835156440735, "learning_rate": 4.139525280917272e-06, "loss": 0.4228, "step": 37668 }, { "epoch": 0.6993461382800578, "grad_norm": 0.2593783736228943, "learning_rate": 4.138580140564504e-06, "loss": 0.1452, "step": 37670 }, { "epoch": 0.6993832684174764, "grad_norm": 0.6857787370681763, "learning_rate": 4.1376350799681e-06, "loss": 0.2792, "step": 37672 }, { "epoch": 0.699420398554895, "grad_norm": 0.4030986726284027, "learning_rate": 4.136690099140926e-06, "loss": 0.2579, "step": 37674 }, { "epoch": 0.6994575286923137, "grad_norm": 0.45294061303138733, "learning_rate": 4.135745198095834e-06, "loss": 0.2109, "step": 37676 }, { "epoch": 0.6994946588297323, "grad_norm": 0.33016330003738403, "learning_rate": 4.134800376845685e-06, "loss": 0.2874, "step": 37678 }, { "epoch": 0.699531788967151, "grad_norm": 0.32950305938720703, "learning_rate": 4.133855635403333e-06, "loss": 0.1439, "step": 37680 }, { "epoch": 0.6995689191045696, "grad_norm": 0.3747512400150299, "learning_rate": 4.13291097378164e-06, "loss": 0.2902, "step": 37682 }, { "epoch": 0.6996060492419882, "grad_norm": 0.3436223864555359, "learning_rate": 4.1319663919934495e-06, "loss": 0.0931, "step": 37684 }, { "epoch": 0.6996431793794069, "grad_norm": 0.47288450598716736, "learning_rate": 4.131021890051621e-06, "loss": 0.3023, "step": 37686 }, { "epoch": 0.6996803095168255, "grad_norm": 0.5227087140083313, "learning_rate": 4.130077467969003e-06, "loss": 0.2947, "step": 37688 }, { "epoch": 0.6997174396542442, "grad_norm": 0.3664332330226898, "learning_rate": 4.12913312575845e-06, "loss": 0.3465, "step": 37690 }, { "epoch": 0.6997545697916628, "grad_norm": 0.48409759998321533, "learning_rate": 4.128188863432809e-06, "loss": 0.2001, "step": 37692 }, { "epoch": 0.6997916999290814, "grad_norm": 0.5607906579971313, "learning_rate": 4.127244681004934e-06, "loss": 0.3164, "step": 37694 }, { "epoch": 0.6998288300665001, "grad_norm": 0.4359683096408844, "learning_rate": 4.126300578487667e-06, "loss": 0.452, "step": 37696 }, { "epoch": 0.6998659602039187, "grad_norm": 0.3591301441192627, "learning_rate": 4.125356555893852e-06, "loss": 0.2975, "step": 37698 }, { "epoch": 0.6999030903413374, "grad_norm": 0.3066635727882385, "learning_rate": 4.124412613236338e-06, "loss": 0.3708, "step": 37700 }, { "epoch": 0.699940220478756, "grad_norm": 0.3267544209957123, "learning_rate": 4.123468750527969e-06, "loss": 0.2164, "step": 37702 }, { "epoch": 0.6999773506161746, "grad_norm": 0.459110289812088, "learning_rate": 4.122524967781587e-06, "loss": 0.2629, "step": 37704 }, { "epoch": 0.7000144807535933, "grad_norm": 0.46801772713661194, "learning_rate": 4.121581265010036e-06, "loss": 0.3632, "step": 37706 }, { "epoch": 0.7000516108910119, "grad_norm": 0.4044855833053589, "learning_rate": 4.1206376422261594e-06, "loss": 0.2612, "step": 37708 }, { "epoch": 0.7000887410284306, "grad_norm": 0.2556871175765991, "learning_rate": 4.11969409944279e-06, "loss": 0.1196, "step": 37710 }, { "epoch": 0.7001258711658492, "grad_norm": 0.2033730447292328, "learning_rate": 4.1187506366727715e-06, "loss": 0.1117, "step": 37712 }, { "epoch": 0.7001630013032678, "grad_norm": 0.4286501705646515, "learning_rate": 4.117807253928939e-06, "loss": 0.308, "step": 37714 }, { "epoch": 0.7002001314406865, "grad_norm": 0.5378456115722656, "learning_rate": 4.116863951224135e-06, "loss": 0.3368, "step": 37716 }, { "epoch": 0.7002372615781051, "grad_norm": 0.31801366806030273, "learning_rate": 4.115920728571187e-06, "loss": 0.2687, "step": 37718 }, { "epoch": 0.7002743917155237, "grad_norm": 0.4315456449985504, "learning_rate": 4.114977585982937e-06, "loss": 0.2596, "step": 37720 }, { "epoch": 0.7003115218529424, "grad_norm": 0.3213304281234741, "learning_rate": 4.114034523472209e-06, "loss": 0.1378, "step": 37722 }, { "epoch": 0.700348651990361, "grad_norm": 0.47819802165031433, "learning_rate": 4.113091541051842e-06, "loss": 0.3502, "step": 37724 }, { "epoch": 0.7003857821277797, "grad_norm": 0.43013232946395874, "learning_rate": 4.112148638734665e-06, "loss": 0.4163, "step": 37726 }, { "epoch": 0.7004229122651983, "grad_norm": 0.29877662658691406, "learning_rate": 4.11120581653351e-06, "loss": 0.3496, "step": 37728 }, { "epoch": 0.700460042402617, "grad_norm": 0.4377192258834839, "learning_rate": 4.110263074461205e-06, "loss": 0.3214, "step": 37730 }, { "epoch": 0.7004971725400355, "grad_norm": 0.23702198266983032, "learning_rate": 4.109320412530581e-06, "loss": 0.2188, "step": 37732 }, { "epoch": 0.7005343026774542, "grad_norm": 0.4501229524612427, "learning_rate": 4.108377830754456e-06, "loss": 0.2696, "step": 37734 }, { "epoch": 0.7005714328148729, "grad_norm": 0.41851940751075745, "learning_rate": 4.107435329145664e-06, "loss": 0.3485, "step": 37736 }, { "epoch": 0.7006085629522915, "grad_norm": 0.22722987830638885, "learning_rate": 4.106492907717029e-06, "loss": 0.2295, "step": 37738 }, { "epoch": 0.7006456930897101, "grad_norm": 0.4519592225551605, "learning_rate": 4.1055505664813686e-06, "loss": 0.4492, "step": 37740 }, { "epoch": 0.7006828232271287, "grad_norm": 0.3220359981060028, "learning_rate": 4.1046083054515105e-06, "loss": 0.1947, "step": 37742 }, { "epoch": 0.7007199533645474, "grad_norm": 0.5905147790908813, "learning_rate": 4.103666124640278e-06, "loss": 0.2133, "step": 37744 }, { "epoch": 0.7007570835019661, "grad_norm": 0.5934521555900574, "learning_rate": 4.102724024060483e-06, "loss": 0.2781, "step": 37746 }, { "epoch": 0.7007942136393847, "grad_norm": 0.3777603805065155, "learning_rate": 4.10178200372495e-06, "loss": 0.4022, "step": 37748 }, { "epoch": 0.7008313437768033, "grad_norm": 0.500083863735199, "learning_rate": 4.100840063646497e-06, "loss": 0.2882, "step": 37750 }, { "epoch": 0.7008684739142219, "grad_norm": 0.5655398368835449, "learning_rate": 4.099898203837942e-06, "loss": 0.3197, "step": 37752 }, { "epoch": 0.7009056040516406, "grad_norm": 0.3618127405643463, "learning_rate": 4.098956424312098e-06, "loss": 0.2952, "step": 37754 }, { "epoch": 0.7009427341890593, "grad_norm": 0.45220354199409485, "learning_rate": 4.098014725081786e-06, "loss": 0.4423, "step": 37756 }, { "epoch": 0.7009798643264779, "grad_norm": 0.33686986565589905, "learning_rate": 4.09707310615981e-06, "loss": 0.1224, "step": 37758 }, { "epoch": 0.7010169944638965, "grad_norm": 0.4365827143192291, "learning_rate": 4.096131567558993e-06, "loss": 0.1186, "step": 37760 }, { "epoch": 0.7010541246013151, "grad_norm": 0.5030139684677124, "learning_rate": 4.095190109292136e-06, "loss": 0.1427, "step": 37762 }, { "epoch": 0.7010912547387338, "grad_norm": 0.23562489449977875, "learning_rate": 4.094248731372056e-06, "loss": 0.1305, "step": 37764 }, { "epoch": 0.7011283848761525, "grad_norm": 0.27677369117736816, "learning_rate": 4.0933074338115605e-06, "loss": 0.1817, "step": 37766 }, { "epoch": 0.701165515013571, "grad_norm": 0.3920479118824005, "learning_rate": 4.092366216623458e-06, "loss": 0.2752, "step": 37768 }, { "epoch": 0.7012026451509897, "grad_norm": 0.34162870049476624, "learning_rate": 4.09142507982056e-06, "loss": 0.2336, "step": 37770 }, { "epoch": 0.7012397752884083, "grad_norm": 0.3959967792034149, "learning_rate": 4.090484023415665e-06, "loss": 0.2677, "step": 37772 }, { "epoch": 0.701276905425827, "grad_norm": 0.6603890657424927, "learning_rate": 4.0895430474215805e-06, "loss": 0.1808, "step": 37774 }, { "epoch": 0.7013140355632457, "grad_norm": 0.293867290019989, "learning_rate": 4.088602151851112e-06, "loss": 0.4088, "step": 37776 }, { "epoch": 0.7013511657006642, "grad_norm": 0.32179710268974304, "learning_rate": 4.087661336717062e-06, "loss": 0.2979, "step": 37778 }, { "epoch": 0.7013882958380829, "grad_norm": 0.502496600151062, "learning_rate": 4.086720602032234e-06, "loss": 0.3175, "step": 37780 }, { "epoch": 0.7014254259755015, "grad_norm": 0.29389771819114685, "learning_rate": 4.0857799478094265e-06, "loss": 0.401, "step": 37782 }, { "epoch": 0.7014625561129202, "grad_norm": 0.33370691537857056, "learning_rate": 4.0848393740614355e-06, "loss": 0.4136, "step": 37784 }, { "epoch": 0.7014996862503388, "grad_norm": 0.3592650592327118, "learning_rate": 4.083898880801063e-06, "loss": 0.1797, "step": 37786 }, { "epoch": 0.7015368163877574, "grad_norm": 0.39016252756118774, "learning_rate": 4.082958468041105e-06, "loss": 0.4524, "step": 37788 }, { "epoch": 0.7015739465251761, "grad_norm": 0.3908602297306061, "learning_rate": 4.08201813579436e-06, "loss": 0.2031, "step": 37790 }, { "epoch": 0.7016110766625947, "grad_norm": 0.46403294801712036, "learning_rate": 4.081077884073621e-06, "loss": 0.3684, "step": 37792 }, { "epoch": 0.7016482068000134, "grad_norm": 0.3583206236362457, "learning_rate": 4.080137712891682e-06, "loss": 0.1656, "step": 37794 }, { "epoch": 0.701685336937432, "grad_norm": 0.4870445430278778, "learning_rate": 4.079197622261342e-06, "loss": 0.3314, "step": 37796 }, { "epoch": 0.7017224670748506, "grad_norm": 0.5426719784736633, "learning_rate": 4.078257612195382e-06, "loss": 0.6092, "step": 37798 }, { "epoch": 0.7017595972122693, "grad_norm": 0.5088409781455994, "learning_rate": 4.0773176827065994e-06, "loss": 0.3242, "step": 37800 }, { "epoch": 0.7017967273496879, "grad_norm": 0.3242870569229126, "learning_rate": 4.0763778338077865e-06, "loss": 0.3396, "step": 37802 }, { "epoch": 0.7018338574871066, "grad_norm": 0.34176456928253174, "learning_rate": 4.075438065511724e-06, "loss": 0.3384, "step": 37804 }, { "epoch": 0.7018709876245252, "grad_norm": 0.4830511808395386, "learning_rate": 4.074498377831203e-06, "loss": 0.1871, "step": 37806 }, { "epoch": 0.7019081177619438, "grad_norm": 0.3466936945915222, "learning_rate": 4.073558770779015e-06, "loss": 0.1705, "step": 37808 }, { "epoch": 0.7019452478993625, "grad_norm": 0.3258918225765228, "learning_rate": 4.072619244367935e-06, "loss": 0.2298, "step": 37810 }, { "epoch": 0.7019823780367811, "grad_norm": 0.4227856993675232, "learning_rate": 4.071679798610754e-06, "loss": 0.4509, "step": 37812 }, { "epoch": 0.7020195081741998, "grad_norm": 0.39261776208877563, "learning_rate": 4.070740433520253e-06, "loss": 0.2549, "step": 37814 }, { "epoch": 0.7020566383116184, "grad_norm": 0.3777517080307007, "learning_rate": 4.069801149109214e-06, "loss": 0.2878, "step": 37816 }, { "epoch": 0.702093768449037, "grad_norm": 0.29705309867858887, "learning_rate": 4.068861945390419e-06, "loss": 0.3676, "step": 37818 }, { "epoch": 0.7021308985864557, "grad_norm": 0.4161626994609833, "learning_rate": 4.06792282237665e-06, "loss": 0.3213, "step": 37820 }, { "epoch": 0.7021680287238743, "grad_norm": 0.42853784561157227, "learning_rate": 4.06698378008068e-06, "loss": 0.3133, "step": 37822 }, { "epoch": 0.702205158861293, "grad_norm": 0.35260993242263794, "learning_rate": 4.066044818515292e-06, "loss": 0.2353, "step": 37824 }, { "epoch": 0.7022422889987116, "grad_norm": 0.34866058826446533, "learning_rate": 4.065105937693257e-06, "loss": 0.3114, "step": 37826 }, { "epoch": 0.7022794191361302, "grad_norm": 0.27555426955223083, "learning_rate": 4.064167137627353e-06, "loss": 0.106, "step": 37828 }, { "epoch": 0.7023165492735488, "grad_norm": 0.2515064477920532, "learning_rate": 4.063228418330354e-06, "loss": 0.2234, "step": 37830 }, { "epoch": 0.7023536794109675, "grad_norm": 0.49192655086517334, "learning_rate": 4.062289779815034e-06, "loss": 0.3641, "step": 37832 }, { "epoch": 0.7023908095483862, "grad_norm": 0.3787946105003357, "learning_rate": 4.061351222094167e-06, "loss": 0.214, "step": 37834 }, { "epoch": 0.7024279396858047, "grad_norm": 0.43031445145606995, "learning_rate": 4.06041274518052e-06, "loss": 0.357, "step": 37836 }, { "epoch": 0.7024650698232234, "grad_norm": 0.6578899621963501, "learning_rate": 4.059474349086862e-06, "loss": 0.5055, "step": 37838 }, { "epoch": 0.702502199960642, "grad_norm": 0.381585031747818, "learning_rate": 4.058536033825966e-06, "loss": 0.2847, "step": 37840 }, { "epoch": 0.7025393300980607, "grad_norm": 0.2762907147407532, "learning_rate": 4.057597799410601e-06, "loss": 0.1899, "step": 37842 }, { "epoch": 0.7025764602354794, "grad_norm": 0.5346901416778564, "learning_rate": 4.056659645853527e-06, "loss": 0.3735, "step": 37844 }, { "epoch": 0.702613590372898, "grad_norm": 0.41578084230422974, "learning_rate": 4.055721573167516e-06, "loss": 0.3694, "step": 37846 }, { "epoch": 0.7026507205103166, "grad_norm": 1.122694969177246, "learning_rate": 4.0547835813653255e-06, "loss": 0.3383, "step": 37848 }, { "epoch": 0.7026878506477352, "grad_norm": 0.43183985352516174, "learning_rate": 4.053845670459723e-06, "loss": 0.5496, "step": 37850 }, { "epoch": 0.7027249807851539, "grad_norm": 0.49359989166259766, "learning_rate": 4.0529078404634704e-06, "loss": 0.2695, "step": 37852 }, { "epoch": 0.7027621109225726, "grad_norm": 0.3461620807647705, "learning_rate": 4.051970091389328e-06, "loss": 0.4237, "step": 37854 }, { "epoch": 0.7027992410599911, "grad_norm": 0.3669116795063019, "learning_rate": 4.051032423250056e-06, "loss": 0.2242, "step": 37856 }, { "epoch": 0.7028363711974098, "grad_norm": 0.32042476534843445, "learning_rate": 4.050094836058417e-06, "loss": 0.3721, "step": 37858 }, { "epoch": 0.7028735013348284, "grad_norm": 0.41872918605804443, "learning_rate": 4.049157329827162e-06, "loss": 0.2149, "step": 37860 }, { "epoch": 0.7029106314722471, "grad_norm": 0.5100609064102173, "learning_rate": 4.048219904569051e-06, "loss": 0.2104, "step": 37862 }, { "epoch": 0.7029477616096658, "grad_norm": 0.31945064663887024, "learning_rate": 4.047282560296842e-06, "loss": 0.1569, "step": 37864 }, { "epoch": 0.7029848917470843, "grad_norm": 0.39309704303741455, "learning_rate": 4.046345297023285e-06, "loss": 0.1939, "step": 37866 }, { "epoch": 0.703022021884503, "grad_norm": 0.4301413297653198, "learning_rate": 4.045408114761134e-06, "loss": 0.2032, "step": 37868 }, { "epoch": 0.7030591520219216, "grad_norm": 0.2673805356025696, "learning_rate": 4.044471013523147e-06, "loss": 0.3107, "step": 37870 }, { "epoch": 0.7030962821593403, "grad_norm": 0.3798066973686218, "learning_rate": 4.043533993322066e-06, "loss": 0.4007, "step": 37872 }, { "epoch": 0.703133412296759, "grad_norm": 0.3704039454460144, "learning_rate": 4.042597054170647e-06, "loss": 0.2488, "step": 37874 }, { "epoch": 0.7031705424341775, "grad_norm": 0.3890950083732605, "learning_rate": 4.041660196081637e-06, "loss": 0.3598, "step": 37876 }, { "epoch": 0.7032076725715962, "grad_norm": 0.24388466775417328, "learning_rate": 4.040723419067784e-06, "loss": 0.1541, "step": 37878 }, { "epoch": 0.7032448027090148, "grad_norm": 0.47143498063087463, "learning_rate": 4.039786723141835e-06, "loss": 0.3801, "step": 37880 }, { "epoch": 0.7032819328464335, "grad_norm": 0.5811455845832825, "learning_rate": 4.0388501083165365e-06, "loss": 0.2348, "step": 37882 }, { "epoch": 0.703319062983852, "grad_norm": 0.21302053332328796, "learning_rate": 4.037913574604636e-06, "loss": 0.2076, "step": 37884 }, { "epoch": 0.7033561931212707, "grad_norm": 0.2585172951221466, "learning_rate": 4.036977122018869e-06, "loss": 0.3615, "step": 37886 }, { "epoch": 0.7033933232586894, "grad_norm": 0.4588501751422882, "learning_rate": 4.036040750571987e-06, "loss": 0.3933, "step": 37888 }, { "epoch": 0.703430453396108, "grad_norm": 0.3539140820503235, "learning_rate": 4.035104460276721e-06, "loss": 0.3266, "step": 37890 }, { "epoch": 0.7034675835335267, "grad_norm": 0.2962561845779419, "learning_rate": 4.034168251145817e-06, "loss": 0.3606, "step": 37892 }, { "epoch": 0.7035047136709452, "grad_norm": 0.37991786003112793, "learning_rate": 4.033232123192013e-06, "loss": 0.4681, "step": 37894 }, { "epoch": 0.7035418438083639, "grad_norm": 0.37174397706985474, "learning_rate": 4.0322960764280516e-06, "loss": 0.3439, "step": 37896 }, { "epoch": 0.7035789739457826, "grad_norm": 0.2964187562465668, "learning_rate": 4.031360110866661e-06, "loss": 0.2725, "step": 37898 }, { "epoch": 0.7036161040832012, "grad_norm": 0.33882254362106323, "learning_rate": 4.030424226520581e-06, "loss": 0.4148, "step": 37900 }, { "epoch": 0.7036532342206199, "grad_norm": 0.39383015036582947, "learning_rate": 4.0294884234025475e-06, "loss": 0.2916, "step": 37902 }, { "epoch": 0.7036903643580384, "grad_norm": 0.3852511942386627, "learning_rate": 4.028552701525293e-06, "loss": 0.177, "step": 37904 }, { "epoch": 0.7037274944954571, "grad_norm": 0.3373967707157135, "learning_rate": 4.027617060901552e-06, "loss": 0.2399, "step": 37906 }, { "epoch": 0.7037646246328758, "grad_norm": 0.3069818317890167, "learning_rate": 4.02668150154405e-06, "loss": 0.2286, "step": 37908 }, { "epoch": 0.7038017547702944, "grad_norm": 0.5309270620346069, "learning_rate": 4.025746023465524e-06, "loss": 0.2515, "step": 37910 }, { "epoch": 0.7038388849077131, "grad_norm": 0.2947355806827545, "learning_rate": 4.024810626678697e-06, "loss": 0.169, "step": 37912 }, { "epoch": 0.7038760150451316, "grad_norm": 0.3046303689479828, "learning_rate": 4.023875311196298e-06, "loss": 0.1359, "step": 37914 }, { "epoch": 0.7039131451825503, "grad_norm": 0.42236244678497314, "learning_rate": 4.022940077031057e-06, "loss": 0.2365, "step": 37916 }, { "epoch": 0.703950275319969, "grad_norm": 0.44367754459381104, "learning_rate": 4.022004924195698e-06, "loss": 0.3935, "step": 37918 }, { "epoch": 0.7039874054573876, "grad_norm": 0.5501275658607483, "learning_rate": 4.021069852702946e-06, "loss": 0.3923, "step": 37920 }, { "epoch": 0.7040245355948063, "grad_norm": 0.2703684866428375, "learning_rate": 4.020134862565527e-06, "loss": 0.1829, "step": 37922 }, { "epoch": 0.7040616657322248, "grad_norm": 0.29451727867126465, "learning_rate": 4.0191999537961565e-06, "loss": 0.1554, "step": 37924 }, { "epoch": 0.7040987958696435, "grad_norm": 0.301200807094574, "learning_rate": 4.018265126407563e-06, "loss": 0.2478, "step": 37926 }, { "epoch": 0.7041359260070622, "grad_norm": 0.5121234655380249, "learning_rate": 4.0173303804124645e-06, "loss": 0.2792, "step": 37928 }, { "epoch": 0.7041730561444808, "grad_norm": 0.4367184042930603, "learning_rate": 4.016395715823577e-06, "loss": 0.3085, "step": 37930 }, { "epoch": 0.7042101862818995, "grad_norm": 0.3541370630264282, "learning_rate": 4.01546113265362e-06, "loss": 0.3836, "step": 37932 }, { "epoch": 0.704247316419318, "grad_norm": 0.45512792468070984, "learning_rate": 4.014526630915315e-06, "loss": 0.2901, "step": 37934 }, { "epoch": 0.7042844465567367, "grad_norm": 0.4876466989517212, "learning_rate": 4.013592210621371e-06, "loss": 0.3508, "step": 37936 }, { "epoch": 0.7043215766941553, "grad_norm": 0.4165050983428955, "learning_rate": 4.012657871784506e-06, "loss": 0.2464, "step": 37938 }, { "epoch": 0.704358706831574, "grad_norm": 0.3527783751487732, "learning_rate": 4.011723614417432e-06, "loss": 0.3196, "step": 37940 }, { "epoch": 0.7043958369689927, "grad_norm": 1.5926580429077148, "learning_rate": 4.010789438532864e-06, "loss": 0.3011, "step": 37942 }, { "epoch": 0.7044329671064112, "grad_norm": 0.49057716131210327, "learning_rate": 4.00985534414351e-06, "loss": 0.4677, "step": 37944 }, { "epoch": 0.7044700972438299, "grad_norm": 0.3688822388648987, "learning_rate": 4.0089213312620875e-06, "loss": 0.3078, "step": 37946 }, { "epoch": 0.7045072273812485, "grad_norm": 0.2785656750202179, "learning_rate": 4.0079873999012966e-06, "loss": 0.1786, "step": 37948 }, { "epoch": 0.7045443575186672, "grad_norm": 0.40423738956451416, "learning_rate": 4.007053550073852e-06, "loss": 0.254, "step": 37950 }, { "epoch": 0.7045814876560859, "grad_norm": 0.698407769203186, "learning_rate": 4.0061197817924545e-06, "loss": 0.272, "step": 37952 }, { "epoch": 0.7046186177935044, "grad_norm": 0.4202597439289093, "learning_rate": 4.005186095069813e-06, "loss": 0.3807, "step": 37954 }, { "epoch": 0.7046557479309231, "grad_norm": 0.34914085268974304, "learning_rate": 4.004252489918632e-06, "loss": 0.313, "step": 37956 }, { "epoch": 0.7046928780683417, "grad_norm": 0.5189908146858215, "learning_rate": 4.003318966351616e-06, "loss": 0.2182, "step": 37958 }, { "epoch": 0.7047300082057604, "grad_norm": 0.45974013209342957, "learning_rate": 4.002385524381469e-06, "loss": 0.2361, "step": 37960 }, { "epoch": 0.7047671383431791, "grad_norm": 0.525264322757721, "learning_rate": 4.001452164020887e-06, "loss": 0.4253, "step": 37962 }, { "epoch": 0.7048042684805976, "grad_norm": 0.3296203315258026, "learning_rate": 4.0005188852825734e-06, "loss": 0.2243, "step": 37964 }, { "epoch": 0.7048413986180163, "grad_norm": 0.47202837467193604, "learning_rate": 3.999585688179228e-06, "loss": 0.4431, "step": 37966 }, { "epoch": 0.7048785287554349, "grad_norm": 0.27697524428367615, "learning_rate": 3.998652572723547e-06, "loss": 0.1147, "step": 37968 }, { "epoch": 0.7049156588928536, "grad_norm": 0.4982176721096039, "learning_rate": 3.997719538928233e-06, "loss": 0.2125, "step": 37970 }, { "epoch": 0.7049527890302723, "grad_norm": 0.2884954512119293, "learning_rate": 3.996786586805976e-06, "loss": 0.365, "step": 37972 }, { "epoch": 0.7049899191676908, "grad_norm": 0.3994104862213135, "learning_rate": 3.9958537163694685e-06, "loss": 0.0851, "step": 37974 }, { "epoch": 0.7050270493051095, "grad_norm": 0.32072240114212036, "learning_rate": 3.994920927631408e-06, "loss": 0.3425, "step": 37976 }, { "epoch": 0.7050641794425281, "grad_norm": 0.39811787009239197, "learning_rate": 3.993988220604485e-06, "loss": 0.2506, "step": 37978 }, { "epoch": 0.7051013095799468, "grad_norm": 0.35012760758399963, "learning_rate": 3.993055595301394e-06, "loss": 0.179, "step": 37980 }, { "epoch": 0.7051384397173653, "grad_norm": 0.48031920194625854, "learning_rate": 3.992123051734823e-06, "loss": 0.2654, "step": 37982 }, { "epoch": 0.705175569854784, "grad_norm": 0.45286983251571655, "learning_rate": 3.991190589917465e-06, "loss": 0.2786, "step": 37984 }, { "epoch": 0.7052126999922027, "grad_norm": 0.4098431169986725, "learning_rate": 3.990258209862001e-06, "loss": 0.1861, "step": 37986 }, { "epoch": 0.7052498301296213, "grad_norm": 0.4155251085758209, "learning_rate": 3.989325911581121e-06, "loss": 0.3062, "step": 37988 }, { "epoch": 0.70528696026704, "grad_norm": 0.3577605187892914, "learning_rate": 3.988393695087512e-06, "loss": 0.1595, "step": 37990 }, { "epoch": 0.7053240904044585, "grad_norm": 0.6608291864395142, "learning_rate": 3.987461560393862e-06, "loss": 0.3441, "step": 37992 }, { "epoch": 0.7053612205418772, "grad_norm": 0.39614182710647583, "learning_rate": 3.986529507512845e-06, "loss": 0.4644, "step": 37994 }, { "epoch": 0.7053983506792959, "grad_norm": 0.66713947057724, "learning_rate": 3.985597536457151e-06, "loss": 0.2748, "step": 37996 }, { "epoch": 0.7054354808167145, "grad_norm": 0.37530624866485596, "learning_rate": 3.984665647239462e-06, "loss": 0.1798, "step": 37998 }, { "epoch": 0.7054726109541332, "grad_norm": 0.5856049656867981, "learning_rate": 3.9837338398724525e-06, "loss": 0.1973, "step": 38000 }, { "epoch": 0.7055097410915517, "grad_norm": 0.3465645909309387, "learning_rate": 3.982802114368803e-06, "loss": 0.1632, "step": 38002 }, { "epoch": 0.7055468712289704, "grad_norm": 0.5121181607246399, "learning_rate": 3.981870470741195e-06, "loss": 0.2642, "step": 38004 }, { "epoch": 0.7055840013663891, "grad_norm": 0.3532576858997345, "learning_rate": 3.980938909002303e-06, "loss": 0.4296, "step": 38006 }, { "epoch": 0.7056211315038077, "grad_norm": 0.5497809052467346, "learning_rate": 3.980007429164803e-06, "loss": 0.247, "step": 38008 }, { "epoch": 0.7056582616412264, "grad_norm": 0.3505820631980896, "learning_rate": 3.979076031241374e-06, "loss": 0.1985, "step": 38010 }, { "epoch": 0.7056953917786449, "grad_norm": 0.3434463441371918, "learning_rate": 3.9781447152446805e-06, "loss": 0.3259, "step": 38012 }, { "epoch": 0.7057325219160636, "grad_norm": 0.39860329031944275, "learning_rate": 3.977213481187404e-06, "loss": 0.3177, "step": 38014 }, { "epoch": 0.7057696520534823, "grad_norm": 0.46372976899147034, "learning_rate": 3.97628232908221e-06, "loss": 0.2025, "step": 38016 }, { "epoch": 0.7058067821909009, "grad_norm": 0.2584788203239441, "learning_rate": 3.975351258941769e-06, "loss": 0.1173, "step": 38018 }, { "epoch": 0.7058439123283196, "grad_norm": 0.3340776264667511, "learning_rate": 3.974420270778751e-06, "loss": 0.2663, "step": 38020 }, { "epoch": 0.7058810424657381, "grad_norm": 0.41627636551856995, "learning_rate": 3.973489364605828e-06, "loss": 0.2294, "step": 38022 }, { "epoch": 0.7059181726031568, "grad_norm": 0.4376927316188812, "learning_rate": 3.97255854043566e-06, "loss": 0.2727, "step": 38024 }, { "epoch": 0.7059553027405755, "grad_norm": 0.5108228325843811, "learning_rate": 3.9716277982809156e-06, "loss": 0.3173, "step": 38026 }, { "epoch": 0.7059924328779941, "grad_norm": 0.3428100347518921, "learning_rate": 3.97069713815426e-06, "loss": 0.371, "step": 38028 }, { "epoch": 0.7060295630154128, "grad_norm": 0.6225501894950867, "learning_rate": 3.969766560068358e-06, "loss": 0.228, "step": 38030 }, { "epoch": 0.7060666931528313, "grad_norm": 0.41492998600006104, "learning_rate": 3.968836064035872e-06, "loss": 0.2139, "step": 38032 }, { "epoch": 0.70610382329025, "grad_norm": 0.4596366882324219, "learning_rate": 3.967905650069459e-06, "loss": 0.3766, "step": 38034 }, { "epoch": 0.7061409534276686, "grad_norm": 0.5141488909721375, "learning_rate": 3.966975318181785e-06, "loss": 0.2643, "step": 38036 }, { "epoch": 0.7061780835650873, "grad_norm": 0.3649926781654358, "learning_rate": 3.9660450683855036e-06, "loss": 0.1303, "step": 38038 }, { "epoch": 0.706215213702506, "grad_norm": 0.35929074883461, "learning_rate": 3.965114900693273e-06, "loss": 0.2422, "step": 38040 }, { "epoch": 0.7062523438399245, "grad_norm": 0.6279309988021851, "learning_rate": 3.964184815117754e-06, "loss": 0.3345, "step": 38042 }, { "epoch": 0.7062894739773432, "grad_norm": 0.32389530539512634, "learning_rate": 3.9632548116716e-06, "loss": 0.2764, "step": 38044 }, { "epoch": 0.7063266041147618, "grad_norm": 0.5120017528533936, "learning_rate": 3.9623248903674645e-06, "loss": 0.3296, "step": 38046 }, { "epoch": 0.7063637342521805, "grad_norm": 0.33234813809394836, "learning_rate": 3.961395051218007e-06, "loss": 0.3002, "step": 38048 }, { "epoch": 0.7064008643895991, "grad_norm": 0.36734670400619507, "learning_rate": 3.960465294235871e-06, "loss": 0.1954, "step": 38050 }, { "epoch": 0.7064379945270177, "grad_norm": 0.38332879543304443, "learning_rate": 3.959535619433713e-06, "loss": 0.1737, "step": 38052 }, { "epoch": 0.7064751246644364, "grad_norm": 0.29633790254592896, "learning_rate": 3.958606026824181e-06, "loss": 0.2484, "step": 38054 }, { "epoch": 0.706512254801855, "grad_norm": 0.4117186367511749, "learning_rate": 3.957676516419929e-06, "loss": 0.3316, "step": 38056 }, { "epoch": 0.7065493849392737, "grad_norm": 0.5229418873786926, "learning_rate": 3.956747088233596e-06, "loss": 0.1482, "step": 38058 }, { "epoch": 0.7065865150766923, "grad_norm": 0.5126410126686096, "learning_rate": 3.9558177422778375e-06, "loss": 0.21, "step": 38060 }, { "epoch": 0.7066236452141109, "grad_norm": 0.20744261145591736, "learning_rate": 3.954888478565292e-06, "loss": 0.2527, "step": 38062 }, { "epoch": 0.7066607753515296, "grad_norm": 0.47574397921562195, "learning_rate": 3.953959297108607e-06, "loss": 0.2835, "step": 38064 }, { "epoch": 0.7066979054889482, "grad_norm": 0.44473251700401306, "learning_rate": 3.953030197920427e-06, "loss": 0.2474, "step": 38066 }, { "epoch": 0.7067350356263669, "grad_norm": 0.3982432782649994, "learning_rate": 3.952101181013391e-06, "loss": 0.2211, "step": 38068 }, { "epoch": 0.7067721657637855, "grad_norm": 0.49544739723205566, "learning_rate": 3.951172246400143e-06, "loss": 0.2934, "step": 38070 }, { "epoch": 0.7068092959012041, "grad_norm": 0.47970259189605713, "learning_rate": 3.950243394093327e-06, "loss": 0.2967, "step": 38072 }, { "epoch": 0.7068464260386228, "grad_norm": 0.2899574339389801, "learning_rate": 3.9493146241055735e-06, "loss": 0.27, "step": 38074 }, { "epoch": 0.7068835561760414, "grad_norm": 0.3603079319000244, "learning_rate": 3.948385936449524e-06, "loss": 0.3339, "step": 38076 }, { "epoch": 0.7069206863134601, "grad_norm": 0.4132198095321655, "learning_rate": 3.947457331137818e-06, "loss": 0.1378, "step": 38078 }, { "epoch": 0.7069578164508787, "grad_norm": 0.3556966483592987, "learning_rate": 3.946528808183086e-06, "loss": 0.1308, "step": 38080 }, { "epoch": 0.7069949465882973, "grad_norm": 0.2878161370754242, "learning_rate": 3.945600367597964e-06, "loss": 0.202, "step": 38082 }, { "epoch": 0.707032076725716, "grad_norm": 0.28508999943733215, "learning_rate": 3.944672009395085e-06, "loss": 0.3071, "step": 38084 }, { "epoch": 0.7070692068631346, "grad_norm": 0.39727121591567993, "learning_rate": 3.943743733587088e-06, "loss": 0.2027, "step": 38086 }, { "epoch": 0.7071063370005533, "grad_norm": 0.5152698755264282, "learning_rate": 3.942815540186593e-06, "loss": 0.2075, "step": 38088 }, { "epoch": 0.7071434671379718, "grad_norm": 0.5086638331413269, "learning_rate": 3.941887429206235e-06, "loss": 0.4166, "step": 38090 }, { "epoch": 0.7071805972753905, "grad_norm": 0.39790186285972595, "learning_rate": 3.940959400658644e-06, "loss": 0.2139, "step": 38092 }, { "epoch": 0.7072177274128092, "grad_norm": 0.7603560090065002, "learning_rate": 3.940031454556446e-06, "loss": 0.4891, "step": 38094 }, { "epoch": 0.7072548575502278, "grad_norm": 0.28156888484954834, "learning_rate": 3.939103590912271e-06, "loss": 0.1504, "step": 38096 }, { "epoch": 0.7072919876876465, "grad_norm": 0.5872491002082825, "learning_rate": 3.9381758097387394e-06, "loss": 0.2438, "step": 38098 }, { "epoch": 0.707329117825065, "grad_norm": 0.44886723160743713, "learning_rate": 3.937248111048481e-06, "loss": 0.2038, "step": 38100 }, { "epoch": 0.7073662479624837, "grad_norm": 0.45089030265808105, "learning_rate": 3.9363204948541115e-06, "loss": 0.2464, "step": 38102 }, { "epoch": 0.7074033780999024, "grad_norm": 0.35610711574554443, "learning_rate": 3.935392961168257e-06, "loss": 0.2474, "step": 38104 }, { "epoch": 0.707440508237321, "grad_norm": 0.5256989598274231, "learning_rate": 3.93446551000354e-06, "loss": 0.1086, "step": 38106 }, { "epoch": 0.7074776383747396, "grad_norm": 0.2214541733264923, "learning_rate": 3.933538141372578e-06, "loss": 0.2109, "step": 38108 }, { "epoch": 0.7075147685121582, "grad_norm": 0.2744998037815094, "learning_rate": 3.932610855287995e-06, "loss": 0.3723, "step": 38110 }, { "epoch": 0.7075518986495769, "grad_norm": 0.47936129570007324, "learning_rate": 3.9316836517624e-06, "loss": 0.2884, "step": 38112 }, { "epoch": 0.7075890287869956, "grad_norm": 0.24909447133541107, "learning_rate": 3.9307565308084135e-06, "loss": 0.2459, "step": 38114 }, { "epoch": 0.7076261589244142, "grad_norm": 0.44297605752944946, "learning_rate": 3.929829492438653e-06, "loss": 0.3863, "step": 38116 }, { "epoch": 0.7076632890618328, "grad_norm": 0.45579683780670166, "learning_rate": 3.928902536665733e-06, "loss": 0.2943, "step": 38118 }, { "epoch": 0.7077004191992514, "grad_norm": 0.3342800736427307, "learning_rate": 3.9279756635022616e-06, "loss": 0.3287, "step": 38120 }, { "epoch": 0.7077375493366701, "grad_norm": 0.30340835452079773, "learning_rate": 3.927048872960852e-06, "loss": 0.4079, "step": 38122 }, { "epoch": 0.7077746794740888, "grad_norm": 0.9781947731971741, "learning_rate": 3.926122165054122e-06, "loss": 0.3654, "step": 38124 }, { "epoch": 0.7078118096115074, "grad_norm": 0.5402278900146484, "learning_rate": 3.925195539794672e-06, "loss": 0.2974, "step": 38126 }, { "epoch": 0.707848939748926, "grad_norm": 0.507876455783844, "learning_rate": 3.924268997195114e-06, "loss": 0.4756, "step": 38128 }, { "epoch": 0.7078860698863446, "grad_norm": 0.6688075065612793, "learning_rate": 3.923342537268057e-06, "loss": 0.394, "step": 38130 }, { "epoch": 0.7079232000237633, "grad_norm": 0.352760910987854, "learning_rate": 3.922416160026106e-06, "loss": 0.2092, "step": 38132 }, { "epoch": 0.7079603301611819, "grad_norm": 0.5699179172515869, "learning_rate": 3.921489865481866e-06, "loss": 0.3282, "step": 38134 }, { "epoch": 0.7079974602986006, "grad_norm": 0.5377879738807678, "learning_rate": 3.920563653647945e-06, "loss": 0.2879, "step": 38136 }, { "epoch": 0.7080345904360192, "grad_norm": 0.29251334071159363, "learning_rate": 3.91963752453694e-06, "loss": 0.1489, "step": 38138 }, { "epoch": 0.7080717205734378, "grad_norm": 0.5590978264808655, "learning_rate": 3.918711478161459e-06, "loss": 0.2684, "step": 38140 }, { "epoch": 0.7081088507108565, "grad_norm": 0.26250067353248596, "learning_rate": 3.917785514534095e-06, "loss": 0.2106, "step": 38142 }, { "epoch": 0.7081459808482751, "grad_norm": 0.4042779803276062, "learning_rate": 3.916859633667453e-06, "loss": 0.5555, "step": 38144 }, { "epoch": 0.7081831109856938, "grad_norm": 0.29905450344085693, "learning_rate": 3.915933835574128e-06, "loss": 0.1896, "step": 38146 }, { "epoch": 0.7082202411231124, "grad_norm": 0.33537599444389343, "learning_rate": 3.915008120266724e-06, "loss": 0.2511, "step": 38148 }, { "epoch": 0.708257371260531, "grad_norm": 0.3405703008174896, "learning_rate": 3.914082487757831e-06, "loss": 0.5252, "step": 38150 }, { "epoch": 0.7082945013979497, "grad_norm": 0.462399959564209, "learning_rate": 3.913156938060044e-06, "loss": 0.4158, "step": 38152 }, { "epoch": 0.7083316315353683, "grad_norm": 0.7287722229957581, "learning_rate": 3.91223147118596e-06, "loss": 0.3191, "step": 38154 }, { "epoch": 0.708368761672787, "grad_norm": 0.3973499536514282, "learning_rate": 3.911306087148171e-06, "loss": 0.2025, "step": 38156 }, { "epoch": 0.7084058918102056, "grad_norm": 0.4547470211982727, "learning_rate": 3.910380785959268e-06, "loss": 0.2317, "step": 38158 }, { "epoch": 0.7084430219476242, "grad_norm": 0.2922212481498718, "learning_rate": 3.909455567631845e-06, "loss": 0.1776, "step": 38160 }, { "epoch": 0.7084801520850429, "grad_norm": 0.48210448026657104, "learning_rate": 3.9085304321784845e-06, "loss": 0.2041, "step": 38162 }, { "epoch": 0.7085172822224615, "grad_norm": 0.3467679023742676, "learning_rate": 3.907605379611783e-06, "loss": 0.2111, "step": 38164 }, { "epoch": 0.7085544123598801, "grad_norm": 0.4955320358276367, "learning_rate": 3.90668040994432e-06, "loss": 0.4252, "step": 38166 }, { "epoch": 0.7085915424972988, "grad_norm": 0.3571617007255554, "learning_rate": 3.9057555231886856e-06, "loss": 0.1792, "step": 38168 }, { "epoch": 0.7086286726347174, "grad_norm": 0.32618558406829834, "learning_rate": 3.904830719357463e-06, "loss": 0.2523, "step": 38170 }, { "epoch": 0.7086658027721361, "grad_norm": 0.3874558210372925, "learning_rate": 3.903905998463238e-06, "loss": 0.5445, "step": 38172 }, { "epoch": 0.7087029329095547, "grad_norm": 0.33972954750061035, "learning_rate": 3.902981360518595e-06, "loss": 0.3982, "step": 38174 }, { "epoch": 0.7087400630469733, "grad_norm": 0.35426416993141174, "learning_rate": 3.902056805536109e-06, "loss": 0.1883, "step": 38176 }, { "epoch": 0.708777193184392, "grad_norm": 0.40433260798454285, "learning_rate": 3.9011323335283665e-06, "loss": 0.2872, "step": 38178 }, { "epoch": 0.7088143233218106, "grad_norm": 0.36834022402763367, "learning_rate": 3.900207944507944e-06, "loss": 0.2822, "step": 38180 }, { "epoch": 0.7088514534592293, "grad_norm": 0.5159149169921875, "learning_rate": 3.899283638487424e-06, "loss": 0.226, "step": 38182 }, { "epoch": 0.7088885835966479, "grad_norm": 0.3613691031932831, "learning_rate": 3.898359415479375e-06, "loss": 0.2794, "step": 38184 }, { "epoch": 0.7089257137340665, "grad_norm": 0.3555360734462738, "learning_rate": 3.897435275496383e-06, "loss": 0.4158, "step": 38186 }, { "epoch": 0.7089628438714851, "grad_norm": 0.44267693161964417, "learning_rate": 3.896511218551013e-06, "loss": 0.2187, "step": 38188 }, { "epoch": 0.7089999740089038, "grad_norm": 0.46430134773254395, "learning_rate": 3.895587244655844e-06, "loss": 0.3222, "step": 38190 }, { "epoch": 0.7090371041463225, "grad_norm": 0.19684863090515137, "learning_rate": 3.894663353823447e-06, "loss": 0.2767, "step": 38192 }, { "epoch": 0.7090742342837411, "grad_norm": 0.3146318793296814, "learning_rate": 3.893739546066395e-06, "loss": 0.2753, "step": 38194 }, { "epoch": 0.7091113644211597, "grad_norm": 0.4783351421356201, "learning_rate": 3.892815821397257e-06, "loss": 0.4096, "step": 38196 }, { "epoch": 0.7091484945585783, "grad_norm": 0.4032927453517914, "learning_rate": 3.891892179828605e-06, "loss": 0.1762, "step": 38198 }, { "epoch": 0.709185624695997, "grad_norm": 0.3325378894805908, "learning_rate": 3.890968621373004e-06, "loss": 0.3008, "step": 38200 }, { "epoch": 0.7092227548334157, "grad_norm": 0.21624745428562164, "learning_rate": 3.890045146043019e-06, "loss": 0.1306, "step": 38202 }, { "epoch": 0.7092598849708343, "grad_norm": 0.3842718005180359, "learning_rate": 3.889121753851222e-06, "loss": 0.1393, "step": 38204 }, { "epoch": 0.7092970151082529, "grad_norm": 0.4516873359680176, "learning_rate": 3.888198444810169e-06, "loss": 0.2511, "step": 38206 }, { "epoch": 0.7093341452456715, "grad_norm": 0.6297973394393921, "learning_rate": 3.8872752189324305e-06, "loss": 0.3865, "step": 38208 }, { "epoch": 0.7093712753830902, "grad_norm": 0.618631899356842, "learning_rate": 3.886352076230565e-06, "loss": 0.3341, "step": 38210 }, { "epoch": 0.7094084055205089, "grad_norm": 0.38146597146987915, "learning_rate": 3.885429016717139e-06, "loss": 0.5065, "step": 38212 }, { "epoch": 0.7094455356579275, "grad_norm": 0.42282363772392273, "learning_rate": 3.884506040404706e-06, "loss": 0.2547, "step": 38214 }, { "epoch": 0.7094826657953461, "grad_norm": 0.37700262665748596, "learning_rate": 3.883583147305828e-06, "loss": 0.2777, "step": 38216 }, { "epoch": 0.7095197959327647, "grad_norm": 0.4291996657848358, "learning_rate": 3.882660337433061e-06, "loss": 0.2212, "step": 38218 }, { "epoch": 0.7095569260701834, "grad_norm": 0.23527291417121887, "learning_rate": 3.881737610798965e-06, "loss": 0.2815, "step": 38220 }, { "epoch": 0.7095940562076021, "grad_norm": 0.28370603919029236, "learning_rate": 3.880814967416093e-06, "loss": 0.1791, "step": 38222 }, { "epoch": 0.7096311863450206, "grad_norm": 0.4876558482646942, "learning_rate": 3.879892407297004e-06, "loss": 0.2803, "step": 38224 }, { "epoch": 0.7096683164824393, "grad_norm": 0.2863287329673767, "learning_rate": 3.878969930454247e-06, "loss": 0.1425, "step": 38226 }, { "epoch": 0.7097054466198579, "grad_norm": 0.3879002332687378, "learning_rate": 3.878047536900371e-06, "loss": 0.3444, "step": 38228 }, { "epoch": 0.7097425767572766, "grad_norm": 0.2571834921836853, "learning_rate": 3.877125226647932e-06, "loss": 0.3097, "step": 38230 }, { "epoch": 0.7097797068946953, "grad_norm": 0.45022931694984436, "learning_rate": 3.876202999709477e-06, "loss": 0.4099, "step": 38232 }, { "epoch": 0.7098168370321138, "grad_norm": 0.384562224149704, "learning_rate": 3.875280856097558e-06, "loss": 0.3022, "step": 38234 }, { "epoch": 0.7098539671695325, "grad_norm": 0.8028783202171326, "learning_rate": 3.874358795824722e-06, "loss": 0.258, "step": 38236 }, { "epoch": 0.7098910973069511, "grad_norm": 0.667856752872467, "learning_rate": 3.873436818903512e-06, "loss": 0.3203, "step": 38238 }, { "epoch": 0.7099282274443698, "grad_norm": 0.6080194115638733, "learning_rate": 3.872514925346476e-06, "loss": 0.4514, "step": 38240 }, { "epoch": 0.7099653575817884, "grad_norm": 0.44267651438713074, "learning_rate": 3.871593115166158e-06, "loss": 0.3062, "step": 38242 }, { "epoch": 0.710002487719207, "grad_norm": 0.39824554324150085, "learning_rate": 3.8706713883751e-06, "loss": 0.3794, "step": 38244 }, { "epoch": 0.7100396178566257, "grad_norm": 0.4751010239124298, "learning_rate": 3.86974974498585e-06, "loss": 0.3806, "step": 38246 }, { "epoch": 0.7100767479940443, "grad_norm": 0.24562574923038483, "learning_rate": 3.868828185010939e-06, "loss": 0.2565, "step": 38248 }, { "epoch": 0.710113878131463, "grad_norm": 0.42882511019706726, "learning_rate": 3.867906708462915e-06, "loss": 0.4097, "step": 38250 }, { "epoch": 0.7101510082688816, "grad_norm": 0.2850572466850281, "learning_rate": 3.86698531535431e-06, "loss": 0.1546, "step": 38252 }, { "epoch": 0.7101881384063002, "grad_norm": 0.6860714554786682, "learning_rate": 3.866064005697664e-06, "loss": 0.2506, "step": 38254 }, { "epoch": 0.7102252685437189, "grad_norm": 0.2567194402217865, "learning_rate": 3.865142779505514e-06, "loss": 0.1642, "step": 38256 }, { "epoch": 0.7102623986811375, "grad_norm": 0.42734935879707336, "learning_rate": 3.864221636790395e-06, "loss": 0.2507, "step": 38258 }, { "epoch": 0.7102995288185562, "grad_norm": 0.35731270909309387, "learning_rate": 3.86330057756484e-06, "loss": 0.3818, "step": 38260 }, { "epoch": 0.7103366589559748, "grad_norm": 0.3298490643501282, "learning_rate": 3.862379601841386e-06, "loss": 0.391, "step": 38262 }, { "epoch": 0.7103737890933934, "grad_norm": 0.32279857993125916, "learning_rate": 3.8614587096325585e-06, "loss": 0.412, "step": 38264 }, { "epoch": 0.7104109192308121, "grad_norm": 0.3170374035835266, "learning_rate": 3.860537900950891e-06, "loss": 0.2558, "step": 38266 }, { "epoch": 0.7104480493682307, "grad_norm": 0.4230540692806244, "learning_rate": 3.859617175808915e-06, "loss": 0.253, "step": 38268 }, { "epoch": 0.7104851795056494, "grad_norm": 0.4637199938297272, "learning_rate": 3.858696534219154e-06, "loss": 0.1647, "step": 38270 }, { "epoch": 0.710522309643068, "grad_norm": 0.6628230810165405, "learning_rate": 3.857775976194138e-06, "loss": 0.23, "step": 38272 }, { "epoch": 0.7105594397804866, "grad_norm": 0.35321223735809326, "learning_rate": 3.8568555017463975e-06, "loss": 0.1419, "step": 38274 }, { "epoch": 0.7105965699179053, "grad_norm": 0.39736875891685486, "learning_rate": 3.855935110888447e-06, "loss": 0.3967, "step": 38276 }, { "epoch": 0.7106337000553239, "grad_norm": 0.4303283989429474, "learning_rate": 3.855014803632817e-06, "loss": 0.3366, "step": 38278 }, { "epoch": 0.7106708301927426, "grad_norm": 0.2547784447669983, "learning_rate": 3.854094579992029e-06, "loss": 0.3631, "step": 38280 }, { "epoch": 0.7107079603301611, "grad_norm": 0.39638444781303406, "learning_rate": 3.853174439978605e-06, "loss": 0.1956, "step": 38282 }, { "epoch": 0.7107450904675798, "grad_norm": 0.4775794446468353, "learning_rate": 3.852254383605065e-06, "loss": 0.3613, "step": 38284 }, { "epoch": 0.7107822206049984, "grad_norm": 0.48790913820266724, "learning_rate": 3.851334410883932e-06, "loss": 0.3249, "step": 38286 }, { "epoch": 0.7108193507424171, "grad_norm": 0.5363473892211914, "learning_rate": 3.8504145218277164e-06, "loss": 0.4129, "step": 38288 }, { "epoch": 0.7108564808798358, "grad_norm": 0.3864003121852875, "learning_rate": 3.849494716448943e-06, "loss": 0.2546, "step": 38290 }, { "epoch": 0.7108936110172543, "grad_norm": 0.35749727487564087, "learning_rate": 3.848574994760119e-06, "loss": 0.3398, "step": 38292 }, { "epoch": 0.710930741154673, "grad_norm": 0.35058918595314026, "learning_rate": 3.847655356773764e-06, "loss": 0.3352, "step": 38294 }, { "epoch": 0.7109678712920916, "grad_norm": 0.5434895753860474, "learning_rate": 3.846735802502391e-06, "loss": 0.2436, "step": 38296 }, { "epoch": 0.7110050014295103, "grad_norm": 0.44131389260292053, "learning_rate": 3.845816331958514e-06, "loss": 0.1371, "step": 38298 }, { "epoch": 0.711042131566929, "grad_norm": 0.36640387773513794, "learning_rate": 3.844896945154645e-06, "loss": 0.2946, "step": 38300 }, { "epoch": 0.7110792617043475, "grad_norm": 0.35546785593032837, "learning_rate": 3.84397764210329e-06, "loss": 0.2135, "step": 38302 }, { "epoch": 0.7111163918417662, "grad_norm": 0.3662308156490326, "learning_rate": 3.843058422816958e-06, "loss": 0.4272, "step": 38304 }, { "epoch": 0.7111535219791848, "grad_norm": 0.3492467999458313, "learning_rate": 3.842139287308159e-06, "loss": 0.2522, "step": 38306 }, { "epoch": 0.7111906521166035, "grad_norm": 0.5802592039108276, "learning_rate": 3.841220235589399e-06, "loss": 0.2764, "step": 38308 }, { "epoch": 0.7112277822540222, "grad_norm": 0.31080788373947144, "learning_rate": 3.840301267673189e-06, "loss": 0.1266, "step": 38310 }, { "epoch": 0.7112649123914407, "grad_norm": 0.38897669315338135, "learning_rate": 3.839382383572027e-06, "loss": 0.3534, "step": 38312 }, { "epoch": 0.7113020425288594, "grad_norm": 0.5285455584526062, "learning_rate": 3.838463583298414e-06, "loss": 0.2464, "step": 38314 }, { "epoch": 0.711339172666278, "grad_norm": 0.2628207802772522, "learning_rate": 3.837544866864856e-06, "loss": 0.2145, "step": 38316 }, { "epoch": 0.7113763028036967, "grad_norm": 0.5825287103652954, "learning_rate": 3.836626234283854e-06, "loss": 0.3187, "step": 38318 }, { "epoch": 0.7114134329411154, "grad_norm": 0.28564298152923584, "learning_rate": 3.835707685567908e-06, "loss": 0.0994, "step": 38320 }, { "epoch": 0.7114505630785339, "grad_norm": 0.3276146650314331, "learning_rate": 3.834789220729516e-06, "loss": 0.3169, "step": 38322 }, { "epoch": 0.7114876932159526, "grad_norm": 0.4152318239212036, "learning_rate": 3.833870839781178e-06, "loss": 0.2812, "step": 38324 }, { "epoch": 0.7115248233533712, "grad_norm": 0.34961146116256714, "learning_rate": 3.832952542735387e-06, "loss": 0.3839, "step": 38326 }, { "epoch": 0.7115619534907899, "grad_norm": 0.3483884036540985, "learning_rate": 3.832034329604637e-06, "loss": 0.2881, "step": 38328 }, { "epoch": 0.7115990836282086, "grad_norm": 0.2815057337284088, "learning_rate": 3.831116200401426e-06, "loss": 0.2821, "step": 38330 }, { "epoch": 0.7116362137656271, "grad_norm": 0.5533283948898315, "learning_rate": 3.830198155138248e-06, "loss": 0.3768, "step": 38332 }, { "epoch": 0.7116733439030458, "grad_norm": 0.7363576889038086, "learning_rate": 3.829280193827589e-06, "loss": 0.2616, "step": 38334 }, { "epoch": 0.7117104740404644, "grad_norm": 0.4258559048175812, "learning_rate": 3.828362316481944e-06, "loss": 0.2619, "step": 38336 }, { "epoch": 0.7117476041778831, "grad_norm": 0.4296591281890869, "learning_rate": 3.827444523113805e-06, "loss": 0.2965, "step": 38338 }, { "epoch": 0.7117847343153016, "grad_norm": 0.40978506207466125, "learning_rate": 3.8265268137356535e-06, "loss": 0.2455, "step": 38340 }, { "epoch": 0.7118218644527203, "grad_norm": 0.5486048460006714, "learning_rate": 3.82560918835998e-06, "loss": 0.7542, "step": 38342 }, { "epoch": 0.711858994590139, "grad_norm": 0.3994015157222748, "learning_rate": 3.824691646999271e-06, "loss": 0.1585, "step": 38344 }, { "epoch": 0.7118961247275576, "grad_norm": 0.3969513475894928, "learning_rate": 3.823774189666012e-06, "loss": 0.2761, "step": 38346 }, { "epoch": 0.7119332548649763, "grad_norm": 0.38873445987701416, "learning_rate": 3.822856816372686e-06, "loss": 0.1581, "step": 38348 }, { "epoch": 0.7119703850023948, "grad_norm": 0.31401100754737854, "learning_rate": 3.821939527131779e-06, "loss": 0.3822, "step": 38350 }, { "epoch": 0.7120075151398135, "grad_norm": 0.3767162263393402, "learning_rate": 3.821022321955768e-06, "loss": 0.3399, "step": 38352 }, { "epoch": 0.7120446452772322, "grad_norm": 0.37629154324531555, "learning_rate": 3.8201052008571375e-06, "loss": 0.2292, "step": 38354 }, { "epoch": 0.7120817754146508, "grad_norm": 0.3102743923664093, "learning_rate": 3.8191881638483596e-06, "loss": 0.327, "step": 38356 }, { "epoch": 0.7121189055520695, "grad_norm": 0.5763558149337769, "learning_rate": 3.818271210941918e-06, "loss": 0.2444, "step": 38358 }, { "epoch": 0.712156035689488, "grad_norm": 0.5295024514198303, "learning_rate": 3.817354342150289e-06, "loss": 0.144, "step": 38360 }, { "epoch": 0.7121931658269067, "grad_norm": 0.35246017575263977, "learning_rate": 3.816437557485952e-06, "loss": 0.2769, "step": 38362 }, { "epoch": 0.7122302959643254, "grad_norm": 0.3564547598361969, "learning_rate": 3.815520856961374e-06, "loss": 0.2327, "step": 38364 }, { "epoch": 0.712267426101744, "grad_norm": 0.24249692261219025, "learning_rate": 3.8146042405890326e-06, "loss": 0.4736, "step": 38366 }, { "epoch": 0.7123045562391627, "grad_norm": 0.6170700788497925, "learning_rate": 3.8136877083813993e-06, "loss": 0.2762, "step": 38368 }, { "epoch": 0.7123416863765812, "grad_norm": 0.3345470428466797, "learning_rate": 3.812771260350947e-06, "loss": 0.313, "step": 38370 }, { "epoch": 0.7123788165139999, "grad_norm": 0.9421281218528748, "learning_rate": 3.8118548965101486e-06, "loss": 0.2248, "step": 38372 }, { "epoch": 0.7124159466514186, "grad_norm": 0.285725474357605, "learning_rate": 3.8109386168714657e-06, "loss": 0.1987, "step": 38374 }, { "epoch": 0.7124530767888372, "grad_norm": 0.4872460961341858, "learning_rate": 3.8100224214473734e-06, "loss": 0.255, "step": 38376 }, { "epoch": 0.7124902069262559, "grad_norm": 0.4311636686325073, "learning_rate": 3.809106310250331e-06, "loss": 0.3171, "step": 38378 }, { "epoch": 0.7125273370636744, "grad_norm": 0.5085113644599915, "learning_rate": 3.8081902832928085e-06, "loss": 0.2775, "step": 38380 }, { "epoch": 0.7125644672010931, "grad_norm": 0.5581339597702026, "learning_rate": 3.807274340587269e-06, "loss": 0.285, "step": 38382 }, { "epoch": 0.7126015973385118, "grad_norm": 0.3686096668243408, "learning_rate": 3.8063584821461763e-06, "loss": 0.4368, "step": 38384 }, { "epoch": 0.7126387274759304, "grad_norm": 0.3417530655860901, "learning_rate": 3.805442707981992e-06, "loss": 0.4524, "step": 38386 }, { "epoch": 0.7126758576133491, "grad_norm": 0.45178133249282837, "learning_rate": 3.8045270181071824e-06, "loss": 0.2018, "step": 38388 }, { "epoch": 0.7127129877507676, "grad_norm": 0.43412157893180847, "learning_rate": 3.8036114125341985e-06, "loss": 0.3873, "step": 38390 }, { "epoch": 0.7127501178881863, "grad_norm": 0.3364481031894684, "learning_rate": 3.802695891275503e-06, "loss": 0.6802, "step": 38392 }, { "epoch": 0.7127872480256049, "grad_norm": 0.4735856056213379, "learning_rate": 3.8017804543435568e-06, "loss": 0.2713, "step": 38394 }, { "epoch": 0.7128243781630236, "grad_norm": 0.901459813117981, "learning_rate": 3.800865101750809e-06, "loss": 0.2876, "step": 38396 }, { "epoch": 0.7128615083004423, "grad_norm": 0.39798903465270996, "learning_rate": 3.7999498335097187e-06, "loss": 0.5497, "step": 38398 }, { "epoch": 0.7128986384378608, "grad_norm": 0.4067172110080719, "learning_rate": 3.7990346496327435e-06, "loss": 0.3363, "step": 38400 }, { "epoch": 0.7129357685752795, "grad_norm": 0.40109017491340637, "learning_rate": 3.79811955013233e-06, "loss": 0.2214, "step": 38402 }, { "epoch": 0.7129728987126981, "grad_norm": 0.29809775948524475, "learning_rate": 3.797204535020931e-06, "loss": 0.2152, "step": 38404 }, { "epoch": 0.7130100288501168, "grad_norm": 0.3489615023136139, "learning_rate": 3.796289604311e-06, "loss": 0.2355, "step": 38406 }, { "epoch": 0.7130471589875355, "grad_norm": 0.8688030242919922, "learning_rate": 3.7953747580149847e-06, "loss": 0.2164, "step": 38408 }, { "epoch": 0.713084289124954, "grad_norm": 0.63758784532547, "learning_rate": 3.7944599961453343e-06, "loss": 0.2976, "step": 38410 }, { "epoch": 0.7131214192623727, "grad_norm": 0.5879611372947693, "learning_rate": 3.7935453187144944e-06, "loss": 0.346, "step": 38412 }, { "epoch": 0.7131585493997913, "grad_norm": 0.3939766585826874, "learning_rate": 3.792630725734917e-06, "loss": 0.2136, "step": 38414 }, { "epoch": 0.71319567953721, "grad_norm": 0.5130848288536072, "learning_rate": 3.791716217219038e-06, "loss": 0.466, "step": 38416 }, { "epoch": 0.7132328096746287, "grad_norm": 0.5954288244247437, "learning_rate": 3.7908017931793095e-06, "loss": 0.2828, "step": 38418 }, { "epoch": 0.7132699398120472, "grad_norm": 0.247306227684021, "learning_rate": 3.789887453628166e-06, "loss": 0.2534, "step": 38420 }, { "epoch": 0.7133070699494659, "grad_norm": 0.3098422884941101, "learning_rate": 3.788973198578053e-06, "loss": 0.2136, "step": 38422 }, { "epoch": 0.7133442000868845, "grad_norm": 0.3368147611618042, "learning_rate": 3.788059028041411e-06, "loss": 0.2806, "step": 38424 }, { "epoch": 0.7133813302243032, "grad_norm": 0.3784393072128296, "learning_rate": 3.7871449420306815e-06, "loss": 0.0827, "step": 38426 }, { "epoch": 0.7134184603617219, "grad_norm": 0.2543085217475891, "learning_rate": 3.7862309405582966e-06, "loss": 0.1402, "step": 38428 }, { "epoch": 0.7134555904991404, "grad_norm": 0.6137259602546692, "learning_rate": 3.785317023636695e-06, "loss": 0.4223, "step": 38430 }, { "epoch": 0.7134927206365591, "grad_norm": 0.4496777355670929, "learning_rate": 3.784403191278315e-06, "loss": 0.1814, "step": 38432 }, { "epoch": 0.7135298507739777, "grad_norm": 0.32674068212509155, "learning_rate": 3.7834894434955894e-06, "loss": 0.3442, "step": 38434 }, { "epoch": 0.7135669809113964, "grad_norm": 0.47954410314559937, "learning_rate": 3.7825757803009557e-06, "loss": 0.2269, "step": 38436 }, { "epoch": 0.7136041110488149, "grad_norm": 0.44878795742988586, "learning_rate": 3.781662201706838e-06, "loss": 0.1351, "step": 38438 }, { "epoch": 0.7136412411862336, "grad_norm": 0.9860539436340332, "learning_rate": 3.7807487077256754e-06, "loss": 0.4323, "step": 38440 }, { "epoch": 0.7136783713236523, "grad_norm": 0.30242955684661865, "learning_rate": 3.7798352983698905e-06, "loss": 0.2371, "step": 38442 }, { "epoch": 0.7137155014610709, "grad_norm": 0.39447104930877686, "learning_rate": 3.7789219736519154e-06, "loss": 0.5736, "step": 38444 }, { "epoch": 0.7137526315984896, "grad_norm": 0.23475044965744019, "learning_rate": 3.7780087335841786e-06, "loss": 0.2296, "step": 38446 }, { "epoch": 0.7137897617359081, "grad_norm": 0.5398349165916443, "learning_rate": 3.7770955781791065e-06, "loss": 0.4278, "step": 38448 }, { "epoch": 0.7138268918733268, "grad_norm": 0.4245615005493164, "learning_rate": 3.7761825074491254e-06, "loss": 0.2991, "step": 38450 }, { "epoch": 0.7138640220107455, "grad_norm": 0.20239369571208954, "learning_rate": 3.775269521406656e-06, "loss": 0.1779, "step": 38452 }, { "epoch": 0.7139011521481641, "grad_norm": 0.4447181820869446, "learning_rate": 3.7743566200641225e-06, "loss": 0.2804, "step": 38454 }, { "epoch": 0.7139382822855828, "grad_norm": 0.4130091965198517, "learning_rate": 3.773443803433947e-06, "loss": 0.3643, "step": 38456 }, { "epoch": 0.7139754124230013, "grad_norm": 0.30447736382484436, "learning_rate": 3.772531071528556e-06, "loss": 0.3327, "step": 38458 }, { "epoch": 0.71401254256042, "grad_norm": 0.45896947383880615, "learning_rate": 3.7716184243603583e-06, "loss": 0.2814, "step": 38460 }, { "epoch": 0.7140496726978387, "grad_norm": 0.33555012941360474, "learning_rate": 3.770705861941778e-06, "loss": 0.0649, "step": 38462 }, { "epoch": 0.7140868028352573, "grad_norm": 0.33059367537498474, "learning_rate": 3.7697933842852363e-06, "loss": 0.1829, "step": 38464 }, { "epoch": 0.714123932972676, "grad_norm": 0.5058813691139221, "learning_rate": 3.768880991403141e-06, "loss": 0.3687, "step": 38466 }, { "epoch": 0.7141610631100945, "grad_norm": 0.36249691247940063, "learning_rate": 3.767968683307911e-06, "loss": 0.3887, "step": 38468 }, { "epoch": 0.7141981932475132, "grad_norm": 0.35768887400627136, "learning_rate": 3.7670564600119596e-06, "loss": 0.2034, "step": 38470 }, { "epoch": 0.7142353233849319, "grad_norm": 0.36447030305862427, "learning_rate": 3.7661443215277015e-06, "loss": 0.2411, "step": 38472 }, { "epoch": 0.7142724535223505, "grad_norm": 0.3206687569618225, "learning_rate": 3.7652322678675458e-06, "loss": 0.213, "step": 38474 }, { "epoch": 0.7143095836597692, "grad_norm": 0.36723050475120544, "learning_rate": 3.7643202990439065e-06, "loss": 0.2718, "step": 38476 }, { "epoch": 0.7143467137971877, "grad_norm": 0.20427601039409637, "learning_rate": 3.7634084150691865e-06, "loss": 0.1438, "step": 38478 }, { "epoch": 0.7143838439346064, "grad_norm": 0.2274230718612671, "learning_rate": 3.762496615955802e-06, "loss": 0.1156, "step": 38480 }, { "epoch": 0.7144209740720251, "grad_norm": 0.23206478357315063, "learning_rate": 3.7615849017161497e-06, "loss": 0.3244, "step": 38482 }, { "epoch": 0.7144581042094437, "grad_norm": 0.256886750459671, "learning_rate": 3.7606732723626426e-06, "loss": 0.3071, "step": 38484 }, { "epoch": 0.7144952343468624, "grad_norm": 0.31571251153945923, "learning_rate": 3.7597617279076815e-06, "loss": 0.3229, "step": 38486 }, { "epoch": 0.7145323644842809, "grad_norm": 0.24469232559204102, "learning_rate": 3.7588502683636763e-06, "loss": 0.1685, "step": 38488 }, { "epoch": 0.7145694946216996, "grad_norm": 0.5443921685218811, "learning_rate": 3.7579388937430207e-06, "loss": 0.4606, "step": 38490 }, { "epoch": 0.7146066247591182, "grad_norm": 0.22091738879680634, "learning_rate": 3.75702760405812e-06, "loss": 0.1299, "step": 38492 }, { "epoch": 0.7146437548965369, "grad_norm": 0.5289151668548584, "learning_rate": 3.7561163993213734e-06, "loss": 0.5152, "step": 38494 }, { "epoch": 0.7146808850339555, "grad_norm": 0.3619935214519501, "learning_rate": 3.75520527954518e-06, "loss": 0.1097, "step": 38496 }, { "epoch": 0.7147180151713741, "grad_norm": 0.6360693573951721, "learning_rate": 3.7542942447419374e-06, "loss": 0.2038, "step": 38498 }, { "epoch": 0.7147551453087928, "grad_norm": 0.3501574397087097, "learning_rate": 3.7533832949240457e-06, "loss": 0.1341, "step": 38500 }, { "epoch": 0.7147922754462114, "grad_norm": 0.5388885140419006, "learning_rate": 3.752472430103896e-06, "loss": 0.4631, "step": 38502 }, { "epoch": 0.7148294055836301, "grad_norm": 0.17365466058254242, "learning_rate": 3.75156165029388e-06, "loss": 0.2216, "step": 38504 }, { "epoch": 0.7148665357210487, "grad_norm": 0.35155782103538513, "learning_rate": 3.750650955506394e-06, "loss": 0.338, "step": 38506 }, { "epoch": 0.7149036658584673, "grad_norm": 0.38686272501945496, "learning_rate": 3.7497403457538297e-06, "loss": 0.2281, "step": 38508 }, { "epoch": 0.714940795995886, "grad_norm": 0.3953717350959778, "learning_rate": 3.748829821048576e-06, "loss": 0.1573, "step": 38510 }, { "epoch": 0.7149779261333046, "grad_norm": 0.35503342747688293, "learning_rate": 3.7479193814030255e-06, "loss": 0.1951, "step": 38512 }, { "epoch": 0.7150150562707233, "grad_norm": 0.4534502923488617, "learning_rate": 3.747009026829569e-06, "loss": 0.2853, "step": 38514 }, { "epoch": 0.7150521864081419, "grad_norm": 0.4495185613632202, "learning_rate": 3.746098757340585e-06, "loss": 0.28, "step": 38516 }, { "epoch": 0.7150893165455605, "grad_norm": 0.6893193125724792, "learning_rate": 3.745188572948465e-06, "loss": 0.2927, "step": 38518 }, { "epoch": 0.7151264466829792, "grad_norm": 0.30690300464630127, "learning_rate": 3.7442784736655926e-06, "loss": 0.2728, "step": 38520 }, { "epoch": 0.7151635768203978, "grad_norm": 0.5460488200187683, "learning_rate": 3.743368459504356e-06, "loss": 0.2771, "step": 38522 }, { "epoch": 0.7152007069578165, "grad_norm": 0.5028060674667358, "learning_rate": 3.742458530477131e-06, "loss": 0.2461, "step": 38524 }, { "epoch": 0.7152378370952351, "grad_norm": 0.37725409865379333, "learning_rate": 3.7415486865963047e-06, "loss": 0.2657, "step": 38526 }, { "epoch": 0.7152749672326537, "grad_norm": 0.37274429202079773, "learning_rate": 3.7406389278742503e-06, "loss": 0.2229, "step": 38528 }, { "epoch": 0.7153120973700724, "grad_norm": 0.5873746275901794, "learning_rate": 3.7397292543233523e-06, "loss": 0.1592, "step": 38530 }, { "epoch": 0.715349227507491, "grad_norm": 0.3676605522632599, "learning_rate": 3.7388196659559874e-06, "loss": 0.2246, "step": 38532 }, { "epoch": 0.7153863576449097, "grad_norm": 0.31116873025894165, "learning_rate": 3.7379101627845317e-06, "loss": 0.1475, "step": 38534 }, { "epoch": 0.7154234877823283, "grad_norm": 0.4500538110733032, "learning_rate": 3.737000744821362e-06, "loss": 0.2156, "step": 38536 }, { "epoch": 0.7154606179197469, "grad_norm": 0.42384371161460876, "learning_rate": 3.7360914120788527e-06, "loss": 0.2789, "step": 38538 }, { "epoch": 0.7154977480571656, "grad_norm": 0.41394034028053284, "learning_rate": 3.7351821645693797e-06, "loss": 0.2751, "step": 38540 }, { "epoch": 0.7155348781945842, "grad_norm": 0.41111302375793457, "learning_rate": 3.7342730023053085e-06, "loss": 0.4001, "step": 38542 }, { "epoch": 0.7155720083320029, "grad_norm": 0.4480436146259308, "learning_rate": 3.733363925299017e-06, "loss": 0.7307, "step": 38544 }, { "epoch": 0.7156091384694214, "grad_norm": 0.5152963399887085, "learning_rate": 3.7324549335628692e-06, "loss": 0.3211, "step": 38546 }, { "epoch": 0.7156462686068401, "grad_norm": 0.35430845618247986, "learning_rate": 3.7315460271092363e-06, "loss": 0.3109, "step": 38548 }, { "epoch": 0.7156833987442588, "grad_norm": 0.3785333037376404, "learning_rate": 3.7306372059504846e-06, "loss": 0.2386, "step": 38550 }, { "epoch": 0.7157205288816774, "grad_norm": 0.3565354347229004, "learning_rate": 3.7297284700989866e-06, "loss": 0.1629, "step": 38552 }, { "epoch": 0.715757659019096, "grad_norm": 0.33217304944992065, "learning_rate": 3.7288198195670976e-06, "loss": 0.2015, "step": 38554 }, { "epoch": 0.7157947891565146, "grad_norm": 0.34040918946266174, "learning_rate": 3.727911254367187e-06, "loss": 0.5029, "step": 38556 }, { "epoch": 0.7158319192939333, "grad_norm": 0.3734451234340668, "learning_rate": 3.727002774511618e-06, "loss": 0.3488, "step": 38558 }, { "epoch": 0.715869049431352, "grad_norm": 0.35075655579566956, "learning_rate": 3.72609438001275e-06, "loss": 0.2575, "step": 38560 }, { "epoch": 0.7159061795687706, "grad_norm": 0.37108591198921204, "learning_rate": 3.725186070882949e-06, "loss": 0.1609, "step": 38562 }, { "epoch": 0.7159433097061892, "grad_norm": 0.28926882147789, "learning_rate": 3.7242778471345676e-06, "loss": 0.194, "step": 38564 }, { "epoch": 0.7159804398436078, "grad_norm": 0.30193620920181274, "learning_rate": 3.723369708779969e-06, "loss": 0.1306, "step": 38566 }, { "epoch": 0.7160175699810265, "grad_norm": 0.30801576375961304, "learning_rate": 3.722461655831505e-06, "loss": 0.2919, "step": 38568 }, { "epoch": 0.7160547001184452, "grad_norm": 0.3678168058395386, "learning_rate": 3.7215536883015346e-06, "loss": 0.3321, "step": 38570 }, { "epoch": 0.7160918302558638, "grad_norm": 0.33992069959640503, "learning_rate": 3.720645806202412e-06, "loss": 0.3422, "step": 38572 }, { "epoch": 0.7161289603932824, "grad_norm": 0.37005648016929626, "learning_rate": 3.719738009546492e-06, "loss": 0.2142, "step": 38574 }, { "epoch": 0.716166090530701, "grad_norm": 0.3993622362613678, "learning_rate": 3.7188302983461255e-06, "loss": 0.3502, "step": 38576 }, { "epoch": 0.7162032206681197, "grad_norm": 0.3127598762512207, "learning_rate": 3.7179226726136674e-06, "loss": 0.2744, "step": 38578 }, { "epoch": 0.7162403508055384, "grad_norm": 0.505318820476532, "learning_rate": 3.7170151323614624e-06, "loss": 0.133, "step": 38580 }, { "epoch": 0.716277480942957, "grad_norm": 0.33114007115364075, "learning_rate": 3.716107677601861e-06, "loss": 0.2207, "step": 38582 }, { "epoch": 0.7163146110803756, "grad_norm": 0.38612401485443115, "learning_rate": 3.715200308347211e-06, "loss": 0.1554, "step": 38584 }, { "epoch": 0.7163517412177942, "grad_norm": 0.42608556151390076, "learning_rate": 3.714293024609864e-06, "loss": 0.1669, "step": 38586 }, { "epoch": 0.7163888713552129, "grad_norm": 0.3281004726886749, "learning_rate": 3.713385826402157e-06, "loss": 0.1572, "step": 38588 }, { "epoch": 0.7164260014926315, "grad_norm": 0.5750089883804321, "learning_rate": 3.712478713736443e-06, "loss": 0.2794, "step": 38590 }, { "epoch": 0.7164631316300502, "grad_norm": 0.3530252277851105, "learning_rate": 3.7115716866250563e-06, "loss": 0.3004, "step": 38592 }, { "epoch": 0.7165002617674688, "grad_norm": 0.37191370129585266, "learning_rate": 3.710664745080342e-06, "loss": 0.2944, "step": 38594 }, { "epoch": 0.7165373919048874, "grad_norm": 0.33158376812934875, "learning_rate": 3.7097578891146434e-06, "loss": 0.5051, "step": 38596 }, { "epoch": 0.7165745220423061, "grad_norm": 0.2931126654148102, "learning_rate": 3.7088511187402976e-06, "loss": 0.1531, "step": 38598 }, { "epoch": 0.7166116521797247, "grad_norm": 0.45178812742233276, "learning_rate": 3.7079444339696433e-06, "loss": 0.3219, "step": 38600 }, { "epoch": 0.7166487823171434, "grad_norm": 0.21858333051204681, "learning_rate": 3.707037834815023e-06, "loss": 0.25, "step": 38602 }, { "epoch": 0.716685912454562, "grad_norm": 0.36935535073280334, "learning_rate": 3.706131321288764e-06, "loss": 0.3288, "step": 38604 }, { "epoch": 0.7167230425919806, "grad_norm": 0.38025790452957153, "learning_rate": 3.705224893403205e-06, "loss": 0.1961, "step": 38606 }, { "epoch": 0.7167601727293993, "grad_norm": 0.3116885721683502, "learning_rate": 3.7043185511706847e-06, "loss": 0.2673, "step": 38608 }, { "epoch": 0.7167973028668179, "grad_norm": 0.3381451964378357, "learning_rate": 3.703412294603528e-06, "loss": 0.2672, "step": 38610 }, { "epoch": 0.7168344330042365, "grad_norm": 0.561720073223114, "learning_rate": 3.702506123714068e-06, "loss": 0.2193, "step": 38612 }, { "epoch": 0.7168715631416552, "grad_norm": 0.1104595810174942, "learning_rate": 3.7016000385146413e-06, "loss": 0.2583, "step": 38614 }, { "epoch": 0.7169086932790738, "grad_norm": 0.2584749758243561, "learning_rate": 3.700694039017567e-06, "loss": 0.2453, "step": 38616 }, { "epoch": 0.7169458234164925, "grad_norm": 0.2639651298522949, "learning_rate": 3.6997881252351797e-06, "loss": 0.2326, "step": 38618 }, { "epoch": 0.7169829535539111, "grad_norm": 0.28743451833724976, "learning_rate": 3.6988822971798042e-06, "loss": 0.318, "step": 38620 }, { "epoch": 0.7170200836913297, "grad_norm": 0.32242169976234436, "learning_rate": 3.697976554863767e-06, "loss": 0.3733, "step": 38622 }, { "epoch": 0.7170572138287484, "grad_norm": 0.6726976037025452, "learning_rate": 3.697070898299392e-06, "loss": 0.2761, "step": 38624 }, { "epoch": 0.717094343966167, "grad_norm": 0.3981928825378418, "learning_rate": 3.696165327499006e-06, "loss": 0.2128, "step": 38626 }, { "epoch": 0.7171314741035857, "grad_norm": 0.49595940113067627, "learning_rate": 3.695259842474924e-06, "loss": 0.3546, "step": 38628 }, { "epoch": 0.7171686042410043, "grad_norm": 0.5916197896003723, "learning_rate": 3.694354443239474e-06, "loss": 0.2263, "step": 38630 }, { "epoch": 0.7172057343784229, "grad_norm": 0.5026167035102844, "learning_rate": 3.693449129804969e-06, "loss": 0.3319, "step": 38632 }, { "epoch": 0.7172428645158416, "grad_norm": 0.37770113348960876, "learning_rate": 3.6925439021837316e-06, "loss": 0.1355, "step": 38634 }, { "epoch": 0.7172799946532602, "grad_norm": 0.32266294956207275, "learning_rate": 3.6916387603880787e-06, "loss": 0.2021, "step": 38636 }, { "epoch": 0.7173171247906789, "grad_norm": 0.3962160050868988, "learning_rate": 3.690733704430326e-06, "loss": 0.426, "step": 38638 }, { "epoch": 0.7173542549280975, "grad_norm": 0.4419653117656708, "learning_rate": 3.6898287343227925e-06, "loss": 0.2823, "step": 38640 }, { "epoch": 0.7173913850655161, "grad_norm": 0.31579461693763733, "learning_rate": 3.688923850077786e-06, "loss": 0.2263, "step": 38642 }, { "epoch": 0.7174285152029347, "grad_norm": 0.25499096512794495, "learning_rate": 3.688019051707622e-06, "loss": 0.3703, "step": 38644 }, { "epoch": 0.7174656453403534, "grad_norm": 0.43640610575675964, "learning_rate": 3.687114339224611e-06, "loss": 0.3093, "step": 38646 }, { "epoch": 0.7175027754777721, "grad_norm": 0.41525140404701233, "learning_rate": 3.686209712641069e-06, "loss": 0.1812, "step": 38648 }, { "epoch": 0.7175399056151907, "grad_norm": 0.2539508640766144, "learning_rate": 3.685305171969298e-06, "loss": 0.3593, "step": 38650 }, { "epoch": 0.7175770357526093, "grad_norm": 0.4719572365283966, "learning_rate": 3.684400717221611e-06, "loss": 0.182, "step": 38652 }, { "epoch": 0.7176141658900279, "grad_norm": 0.5121989250183105, "learning_rate": 3.6834963484103102e-06, "loss": 0.3614, "step": 38654 }, { "epoch": 0.7176512960274466, "grad_norm": 0.2766980230808258, "learning_rate": 3.6825920655477034e-06, "loss": 0.1745, "step": 38656 }, { "epoch": 0.7176884261648653, "grad_norm": 0.5101406574249268, "learning_rate": 3.6816878686460967e-06, "loss": 0.3317, "step": 38658 }, { "epoch": 0.7177255563022839, "grad_norm": 0.4496828615665436, "learning_rate": 3.6807837577177916e-06, "loss": 0.1731, "step": 38660 }, { "epoch": 0.7177626864397025, "grad_norm": 0.4221780300140381, "learning_rate": 3.6798797327750913e-06, "loss": 0.2247, "step": 38662 }, { "epoch": 0.7177998165771211, "grad_norm": 0.36494335532188416, "learning_rate": 3.6789757938302973e-06, "loss": 0.2578, "step": 38664 }, { "epoch": 0.7178369467145398, "grad_norm": 0.34253060817718506, "learning_rate": 3.678071940895712e-06, "loss": 0.2417, "step": 38666 }, { "epoch": 0.7178740768519585, "grad_norm": 0.43805208802223206, "learning_rate": 3.6771681739836286e-06, "loss": 0.3266, "step": 38668 }, { "epoch": 0.717911206989377, "grad_norm": 0.6333506107330322, "learning_rate": 3.6762644931063506e-06, "loss": 0.3294, "step": 38670 }, { "epoch": 0.7179483371267957, "grad_norm": 0.3238885998725891, "learning_rate": 3.675360898276168e-06, "loss": 0.2315, "step": 38672 }, { "epoch": 0.7179854672642143, "grad_norm": 0.3620810806751251, "learning_rate": 3.674457389505378e-06, "loss": 0.2176, "step": 38674 }, { "epoch": 0.718022597401633, "grad_norm": 0.33447256684303284, "learning_rate": 3.673553966806276e-06, "loss": 0.2949, "step": 38676 }, { "epoch": 0.7180597275390517, "grad_norm": 0.49827396869659424, "learning_rate": 3.672650630191158e-06, "loss": 0.3553, "step": 38678 }, { "epoch": 0.7180968576764702, "grad_norm": 0.40996673703193665, "learning_rate": 3.671747379672309e-06, "loss": 0.3466, "step": 38680 }, { "epoch": 0.7181339878138889, "grad_norm": 0.35140761733055115, "learning_rate": 3.6708442152620238e-06, "loss": 0.4009, "step": 38682 }, { "epoch": 0.7181711179513075, "grad_norm": 0.47585082054138184, "learning_rate": 3.6699411369725903e-06, "loss": 0.2795, "step": 38684 }, { "epoch": 0.7182082480887262, "grad_norm": 0.5417736172676086, "learning_rate": 3.669038144816296e-06, "loss": 0.2558, "step": 38686 }, { "epoch": 0.7182453782261448, "grad_norm": 0.19093072414398193, "learning_rate": 3.6681352388054303e-06, "loss": 0.1152, "step": 38688 }, { "epoch": 0.7182825083635634, "grad_norm": 0.6106931567192078, "learning_rate": 3.6672324189522813e-06, "loss": 0.2991, "step": 38690 }, { "epoch": 0.7183196385009821, "grad_norm": 0.5000886917114258, "learning_rate": 3.6663296852691265e-06, "loss": 0.2618, "step": 38692 }, { "epoch": 0.7183567686384007, "grad_norm": 0.5093188285827637, "learning_rate": 3.6654270377682566e-06, "loss": 0.2094, "step": 38694 }, { "epoch": 0.7183938987758194, "grad_norm": 0.3340912163257599, "learning_rate": 3.664524476461947e-06, "loss": 0.1836, "step": 38696 }, { "epoch": 0.718431028913238, "grad_norm": 0.528056263923645, "learning_rate": 3.6636220013624823e-06, "loss": 0.4291, "step": 38698 }, { "epoch": 0.7184681590506566, "grad_norm": 0.3353152275085449, "learning_rate": 3.662719612482143e-06, "loss": 0.2204, "step": 38700 }, { "epoch": 0.7185052891880753, "grad_norm": 0.3706236481666565, "learning_rate": 3.6618173098332067e-06, "loss": 0.2895, "step": 38702 }, { "epoch": 0.7185424193254939, "grad_norm": 0.44620075821876526, "learning_rate": 3.6609150934279557e-06, "loss": 0.2825, "step": 38704 }, { "epoch": 0.7185795494629126, "grad_norm": 0.4237919747829437, "learning_rate": 3.6600129632786584e-06, "loss": 0.3391, "step": 38706 }, { "epoch": 0.7186166796003312, "grad_norm": 0.4652755558490753, "learning_rate": 3.659110919397595e-06, "loss": 0.1831, "step": 38708 }, { "epoch": 0.7186538097377498, "grad_norm": 0.38460132479667664, "learning_rate": 3.6582089617970383e-06, "loss": 0.2693, "step": 38710 }, { "epoch": 0.7186909398751685, "grad_norm": 0.30823004245758057, "learning_rate": 3.657307090489266e-06, "loss": 0.1929, "step": 38712 }, { "epoch": 0.7187280700125871, "grad_norm": 0.32021015882492065, "learning_rate": 3.656405305486541e-06, "loss": 0.4334, "step": 38714 }, { "epoch": 0.7187652001500058, "grad_norm": 0.4665972888469696, "learning_rate": 3.655503606801143e-06, "loss": 0.443, "step": 38716 }, { "epoch": 0.7188023302874244, "grad_norm": 0.44418954849243164, "learning_rate": 3.654601994445334e-06, "loss": 0.3701, "step": 38718 }, { "epoch": 0.718839460424843, "grad_norm": 0.39013493061065674, "learning_rate": 3.6537004684313836e-06, "loss": 0.457, "step": 38720 }, { "epoch": 0.7188765905622617, "grad_norm": 0.5318259596824646, "learning_rate": 3.652799028771561e-06, "loss": 0.2509, "step": 38722 }, { "epoch": 0.7189137206996803, "grad_norm": 0.5585427284240723, "learning_rate": 3.651897675478132e-06, "loss": 0.2396, "step": 38724 }, { "epoch": 0.718950850837099, "grad_norm": 0.3859190046787262, "learning_rate": 3.6509964085633607e-06, "loss": 0.4403, "step": 38726 }, { "epoch": 0.7189879809745175, "grad_norm": 0.6153118014335632, "learning_rate": 3.650095228039514e-06, "loss": 0.304, "step": 38728 }, { "epoch": 0.7190251111119362, "grad_norm": 0.4078846573829651, "learning_rate": 3.6491941339188473e-06, "loss": 0.1878, "step": 38730 }, { "epoch": 0.7190622412493549, "grad_norm": 0.47715505957603455, "learning_rate": 3.648293126213627e-06, "loss": 0.2386, "step": 38732 }, { "epoch": 0.7190993713867735, "grad_norm": 0.24284495413303375, "learning_rate": 3.6473922049361134e-06, "loss": 0.2406, "step": 38734 }, { "epoch": 0.7191365015241922, "grad_norm": 0.4004441797733307, "learning_rate": 3.6464913700985616e-06, "loss": 0.4131, "step": 38736 }, { "epoch": 0.7191736316616107, "grad_norm": 0.35591989755630493, "learning_rate": 3.6455906217132297e-06, "loss": 0.4608, "step": 38738 }, { "epoch": 0.7192107617990294, "grad_norm": 0.47812169790267944, "learning_rate": 3.64468995979238e-06, "loss": 0.1441, "step": 38740 }, { "epoch": 0.719247891936448, "grad_norm": 0.3468095064163208, "learning_rate": 3.6437893843482608e-06, "loss": 0.2238, "step": 38742 }, { "epoch": 0.7192850220738667, "grad_norm": 0.4522748291492462, "learning_rate": 3.6428888953931287e-06, "loss": 0.2369, "step": 38744 }, { "epoch": 0.7193221522112854, "grad_norm": 0.5012431740760803, "learning_rate": 3.641988492939237e-06, "loss": 0.2444, "step": 38746 }, { "epoch": 0.7193592823487039, "grad_norm": 0.39005109667778015, "learning_rate": 3.641088176998837e-06, "loss": 0.1278, "step": 38748 }, { "epoch": 0.7193964124861226, "grad_norm": 0.4419976472854614, "learning_rate": 3.6401879475841807e-06, "loss": 0.2312, "step": 38750 }, { "epoch": 0.7194335426235412, "grad_norm": 0.7120946645736694, "learning_rate": 3.6392878047075155e-06, "loss": 0.2971, "step": 38752 }, { "epoch": 0.7194706727609599, "grad_norm": 0.48163482546806335, "learning_rate": 3.6383877483810947e-06, "loss": 0.1335, "step": 38754 }, { "epoch": 0.7195078028983786, "grad_norm": 0.3545670211315155, "learning_rate": 3.6374877786171616e-06, "loss": 0.2138, "step": 38756 }, { "epoch": 0.7195449330357971, "grad_norm": 0.4362489879131317, "learning_rate": 3.6365878954279578e-06, "loss": 0.299, "step": 38758 }, { "epoch": 0.7195820631732158, "grad_norm": 0.5791985392570496, "learning_rate": 3.6356880988257326e-06, "loss": 0.3673, "step": 38760 }, { "epoch": 0.7196191933106344, "grad_norm": 0.39850103855133057, "learning_rate": 3.6347883888227287e-06, "loss": 0.2784, "step": 38762 }, { "epoch": 0.7196563234480531, "grad_norm": 0.5402176380157471, "learning_rate": 3.6338887654311896e-06, "loss": 0.1832, "step": 38764 }, { "epoch": 0.7196934535854718, "grad_norm": 0.4282661974430084, "learning_rate": 3.632989228663358e-06, "loss": 0.3637, "step": 38766 }, { "epoch": 0.7197305837228903, "grad_norm": 0.42857134342193604, "learning_rate": 3.632089778531469e-06, "loss": 0.3296, "step": 38768 }, { "epoch": 0.719767713860309, "grad_norm": 0.29255399107933044, "learning_rate": 3.631190415047764e-06, "loss": 0.2624, "step": 38770 }, { "epoch": 0.7198048439977276, "grad_norm": 0.43775710463523865, "learning_rate": 3.63029113822448e-06, "loss": 0.2476, "step": 38772 }, { "epoch": 0.7198419741351463, "grad_norm": 0.33160141110420227, "learning_rate": 3.629391948073855e-06, "loss": 0.2211, "step": 38774 }, { "epoch": 0.719879104272565, "grad_norm": 0.28628262877464294, "learning_rate": 3.628492844608126e-06, "loss": 0.2424, "step": 38776 }, { "epoch": 0.7199162344099835, "grad_norm": 0.34296634793281555, "learning_rate": 3.6275938278395252e-06, "loss": 0.2618, "step": 38778 }, { "epoch": 0.7199533645474022, "grad_norm": 0.4045264720916748, "learning_rate": 3.6266948977802808e-06, "loss": 0.2621, "step": 38780 }, { "epoch": 0.7199904946848208, "grad_norm": 0.3822517991065979, "learning_rate": 3.6257960544426297e-06, "loss": 0.2516, "step": 38782 }, { "epoch": 0.7200276248222395, "grad_norm": 0.3708129823207855, "learning_rate": 3.6248972978388e-06, "loss": 0.3812, "step": 38784 }, { "epoch": 0.7200647549596582, "grad_norm": 0.3437800109386444, "learning_rate": 3.6239986279810223e-06, "loss": 0.3967, "step": 38786 }, { "epoch": 0.7201018850970767, "grad_norm": 0.47998106479644775, "learning_rate": 3.6231000448815256e-06, "loss": 0.0318, "step": 38788 }, { "epoch": 0.7201390152344954, "grad_norm": 0.3666190803050995, "learning_rate": 3.6222015485525365e-06, "loss": 0.3328, "step": 38790 }, { "epoch": 0.720176145371914, "grad_norm": 0.43083828687667847, "learning_rate": 3.6213031390062846e-06, "loss": 0.204, "step": 38792 }, { "epoch": 0.7202132755093327, "grad_norm": 0.29224878549575806, "learning_rate": 3.6204048162549855e-06, "loss": 0.2584, "step": 38794 }, { "epoch": 0.7202504056467512, "grad_norm": 0.6617841124534607, "learning_rate": 3.6195065803108687e-06, "loss": 0.3285, "step": 38796 }, { "epoch": 0.7202875357841699, "grad_norm": 0.32580098509788513, "learning_rate": 3.618608431186159e-06, "loss": 0.1982, "step": 38798 }, { "epoch": 0.7203246659215886, "grad_norm": 0.4350251853466034, "learning_rate": 3.6177103688930705e-06, "loss": 0.2385, "step": 38800 }, { "epoch": 0.7203617960590072, "grad_norm": 0.34577810764312744, "learning_rate": 3.6168123934438273e-06, "loss": 0.2533, "step": 38802 }, { "epoch": 0.7203989261964259, "grad_norm": 0.4980188012123108, "learning_rate": 3.6159145048506507e-06, "loss": 0.2436, "step": 38804 }, { "epoch": 0.7204360563338444, "grad_norm": 0.6040366888046265, "learning_rate": 3.615016703125751e-06, "loss": 0.2507, "step": 38806 }, { "epoch": 0.7204731864712631, "grad_norm": 0.40945670008659363, "learning_rate": 3.614118988281349e-06, "loss": 0.2976, "step": 38808 }, { "epoch": 0.7205103166086818, "grad_norm": 0.23386062681674957, "learning_rate": 3.6132213603296597e-06, "loss": 0.1554, "step": 38810 }, { "epoch": 0.7205474467461004, "grad_norm": 0.357394814491272, "learning_rate": 3.612323819282897e-06, "loss": 0.2703, "step": 38812 }, { "epoch": 0.7205845768835191, "grad_norm": 0.34070026874542236, "learning_rate": 3.6114263651532745e-06, "loss": 0.219, "step": 38814 }, { "epoch": 0.7206217070209376, "grad_norm": 0.900668203830719, "learning_rate": 3.6105289979530057e-06, "loss": 0.4366, "step": 38816 }, { "epoch": 0.7206588371583563, "grad_norm": 0.490684449672699, "learning_rate": 3.609631717694295e-06, "loss": 0.185, "step": 38818 }, { "epoch": 0.720695967295775, "grad_norm": 0.47929099202156067, "learning_rate": 3.60873452438936e-06, "loss": 0.3218, "step": 38820 }, { "epoch": 0.7207330974331936, "grad_norm": 0.3474893569946289, "learning_rate": 3.6078374180503993e-06, "loss": 0.1781, "step": 38822 }, { "epoch": 0.7207702275706123, "grad_norm": 0.4055374264717102, "learning_rate": 3.606940398689626e-06, "loss": 0.2203, "step": 38824 }, { "epoch": 0.7208073577080308, "grad_norm": 0.08936754614114761, "learning_rate": 3.6060434663192435e-06, "loss": 0.2689, "step": 38826 }, { "epoch": 0.7208444878454495, "grad_norm": 0.3887677490711212, "learning_rate": 3.6051466209514574e-06, "loss": 0.3051, "step": 38828 }, { "epoch": 0.7208816179828682, "grad_norm": 0.36987999081611633, "learning_rate": 3.6042498625984745e-06, "loss": 0.2451, "step": 38830 }, { "epoch": 0.7209187481202868, "grad_norm": 0.3292480707168579, "learning_rate": 3.6033531912724895e-06, "loss": 0.1286, "step": 38832 }, { "epoch": 0.7209558782577055, "grad_norm": 0.31218263506889343, "learning_rate": 3.602456606985708e-06, "loss": 0.1441, "step": 38834 }, { "epoch": 0.720993008395124, "grad_norm": 0.35919642448425293, "learning_rate": 3.6015601097503293e-06, "loss": 0.3411, "step": 38836 }, { "epoch": 0.7210301385325427, "grad_norm": 0.35082876682281494, "learning_rate": 3.6006636995785517e-06, "loss": 0.1913, "step": 38838 }, { "epoch": 0.7210672686699613, "grad_norm": 0.26235806941986084, "learning_rate": 3.599767376482576e-06, "loss": 0.2155, "step": 38840 }, { "epoch": 0.72110439880738, "grad_norm": 0.322581022977829, "learning_rate": 3.598871140474596e-06, "loss": 0.2072, "step": 38842 }, { "epoch": 0.7211415289447987, "grad_norm": 0.45166969299316406, "learning_rate": 3.597974991566803e-06, "loss": 0.6084, "step": 38844 }, { "epoch": 0.7211786590822172, "grad_norm": 0.3500579297542572, "learning_rate": 3.5970789297713924e-06, "loss": 0.2474, "step": 38846 }, { "epoch": 0.7212157892196359, "grad_norm": 0.5835371017456055, "learning_rate": 3.59618295510056e-06, "loss": 0.2226, "step": 38848 }, { "epoch": 0.7212529193570545, "grad_norm": 0.5012018084526062, "learning_rate": 3.5952870675664964e-06, "loss": 0.3825, "step": 38850 }, { "epoch": 0.7212900494944732, "grad_norm": 0.403816819190979, "learning_rate": 3.594391267181391e-06, "loss": 0.3176, "step": 38852 }, { "epoch": 0.7213271796318919, "grad_norm": 0.2957979142665863, "learning_rate": 3.5934955539574355e-06, "loss": 0.1617, "step": 38854 }, { "epoch": 0.7213643097693104, "grad_norm": 0.1890823394060135, "learning_rate": 3.592599927906815e-06, "loss": 0.195, "step": 38856 }, { "epoch": 0.7214014399067291, "grad_norm": 0.6210729479789734, "learning_rate": 3.5917043890417147e-06, "loss": 0.2169, "step": 38858 }, { "epoch": 0.7214385700441477, "grad_norm": 0.2653856873512268, "learning_rate": 3.590808937374324e-06, "loss": 0.0939, "step": 38860 }, { "epoch": 0.7214757001815664, "grad_norm": 0.4769550561904907, "learning_rate": 3.5899135729168287e-06, "loss": 0.3384, "step": 38862 }, { "epoch": 0.721512830318985, "grad_norm": 0.4075268805027008, "learning_rate": 3.5890182956814057e-06, "loss": 0.2335, "step": 38864 }, { "epoch": 0.7215499604564036, "grad_norm": 0.4530476927757263, "learning_rate": 3.5881231056802414e-06, "loss": 0.2859, "step": 38866 }, { "epoch": 0.7215870905938223, "grad_norm": 0.4054543077945709, "learning_rate": 3.587228002925518e-06, "loss": 0.2894, "step": 38868 }, { "epoch": 0.7216242207312409, "grad_norm": 0.4946069121360779, "learning_rate": 3.5863329874294108e-06, "loss": 0.3096, "step": 38870 }, { "epoch": 0.7216613508686596, "grad_norm": 0.5204428434371948, "learning_rate": 3.5854380592040995e-06, "loss": 0.1586, "step": 38872 }, { "epoch": 0.7216984810060783, "grad_norm": 0.6086781024932861, "learning_rate": 3.5845432182617633e-06, "loss": 0.3147, "step": 38874 }, { "epoch": 0.7217356111434968, "grad_norm": 0.20208783447742462, "learning_rate": 3.5836484646145765e-06, "loss": 0.2429, "step": 38876 }, { "epoch": 0.7217727412809155, "grad_norm": 0.37577173113822937, "learning_rate": 3.582753798274715e-06, "loss": 0.2683, "step": 38878 }, { "epoch": 0.7218098714183341, "grad_norm": 0.25602689385414124, "learning_rate": 3.5818592192543567e-06, "loss": 0.1358, "step": 38880 }, { "epoch": 0.7218470015557528, "grad_norm": 0.36536502838134766, "learning_rate": 3.5809647275656657e-06, "loss": 0.4456, "step": 38882 }, { "epoch": 0.7218841316931714, "grad_norm": 0.584618091583252, "learning_rate": 3.5800703232208213e-06, "loss": 0.274, "step": 38884 }, { "epoch": 0.72192126183059, "grad_norm": 0.5205422043800354, "learning_rate": 3.579176006231987e-06, "loss": 0.3038, "step": 38886 }, { "epoch": 0.7219583919680087, "grad_norm": 0.3048643469810486, "learning_rate": 3.578281776611333e-06, "loss": 0.1461, "step": 38888 }, { "epoch": 0.7219955221054273, "grad_norm": 0.32386571168899536, "learning_rate": 3.57738763437103e-06, "loss": 0.3194, "step": 38890 }, { "epoch": 0.722032652242846, "grad_norm": 0.2678619623184204, "learning_rate": 3.576493579523247e-06, "loss": 0.4651, "step": 38892 }, { "epoch": 0.7220697823802645, "grad_norm": 0.49344614148139954, "learning_rate": 3.5755996120801416e-06, "loss": 0.3791, "step": 38894 }, { "epoch": 0.7221069125176832, "grad_norm": 0.3099924921989441, "learning_rate": 3.5747057320538826e-06, "loss": 0.342, "step": 38896 }, { "epoch": 0.7221440426551019, "grad_norm": 0.474805623292923, "learning_rate": 3.5738119394566327e-06, "loss": 0.2895, "step": 38898 }, { "epoch": 0.7221811727925205, "grad_norm": 0.3559885621070862, "learning_rate": 3.572918234300553e-06, "loss": 0.4125, "step": 38900 }, { "epoch": 0.7222183029299392, "grad_norm": 0.29377293586730957, "learning_rate": 3.572024616597809e-06, "loss": 0.2539, "step": 38902 }, { "epoch": 0.7222554330673577, "grad_norm": 0.5472588539123535, "learning_rate": 3.5711310863605517e-06, "loss": 0.203, "step": 38904 }, { "epoch": 0.7222925632047764, "grad_norm": 0.40619930624961853, "learning_rate": 3.570237643600948e-06, "loss": 0.347, "step": 38906 }, { "epoch": 0.7223296933421951, "grad_norm": 0.22255311906337738, "learning_rate": 3.569344288331146e-06, "loss": 0.204, "step": 38908 }, { "epoch": 0.7223668234796137, "grad_norm": 0.47249698638916016, "learning_rate": 3.568451020563307e-06, "loss": 0.1732, "step": 38910 }, { "epoch": 0.7224039536170324, "grad_norm": 0.26445773243904114, "learning_rate": 3.5675578403095845e-06, "loss": 0.2383, "step": 38912 }, { "epoch": 0.7224410837544509, "grad_norm": 0.4643542468547821, "learning_rate": 3.5666647475821325e-06, "loss": 0.2817, "step": 38914 }, { "epoch": 0.7224782138918696, "grad_norm": 0.23916217684745789, "learning_rate": 3.5657717423931038e-06, "loss": 0.3744, "step": 38916 }, { "epoch": 0.7225153440292883, "grad_norm": 0.3727911710739136, "learning_rate": 3.564878824754652e-06, "loss": 0.5394, "step": 38918 }, { "epoch": 0.7225524741667069, "grad_norm": 0.2643372714519501, "learning_rate": 3.56398599467892e-06, "loss": 0.2251, "step": 38920 }, { "epoch": 0.7225896043041256, "grad_norm": 0.3309539258480072, "learning_rate": 3.5630932521780615e-06, "loss": 0.2046, "step": 38922 }, { "epoch": 0.7226267344415441, "grad_norm": 0.2792668342590332, "learning_rate": 3.562200597264226e-06, "loss": 0.2128, "step": 38924 }, { "epoch": 0.7226638645789628, "grad_norm": 0.29750505089759827, "learning_rate": 3.5613080299495526e-06, "loss": 0.1791, "step": 38926 }, { "epoch": 0.7227009947163815, "grad_norm": 0.2564142346382141, "learning_rate": 3.5604155502461924e-06, "loss": 0.0639, "step": 38928 }, { "epoch": 0.7227381248538001, "grad_norm": 0.3811742663383484, "learning_rate": 3.5595231581662903e-06, "loss": 0.2042, "step": 38930 }, { "epoch": 0.7227752549912188, "grad_norm": 0.2708301842212677, "learning_rate": 3.558630853721984e-06, "loss": 0.2386, "step": 38932 }, { "epoch": 0.7228123851286373, "grad_norm": 0.5155462026596069, "learning_rate": 3.5577386369254163e-06, "loss": 0.2811, "step": 38934 }, { "epoch": 0.722849515266056, "grad_norm": 0.5553277730941772, "learning_rate": 3.5568465077887294e-06, "loss": 0.2966, "step": 38936 }, { "epoch": 0.7228866454034747, "grad_norm": 0.3409940004348755, "learning_rate": 3.555954466324062e-06, "loss": 0.25, "step": 38938 }, { "epoch": 0.7229237755408933, "grad_norm": 0.4441196322441101, "learning_rate": 3.5550625125435533e-06, "loss": 0.3048, "step": 38940 }, { "epoch": 0.722960905678312, "grad_norm": 0.29282763600349426, "learning_rate": 3.5541706464593407e-06, "loss": 0.2121, "step": 38942 }, { "epoch": 0.7229980358157305, "grad_norm": 0.649804413318634, "learning_rate": 3.5532788680835563e-06, "loss": 0.2436, "step": 38944 }, { "epoch": 0.7230351659531492, "grad_norm": 0.2992003262042999, "learning_rate": 3.552387177428335e-06, "loss": 0.2873, "step": 38946 }, { "epoch": 0.7230722960905678, "grad_norm": 0.4115583598613739, "learning_rate": 3.5514955745058155e-06, "loss": 0.2796, "step": 38948 }, { "epoch": 0.7231094262279865, "grad_norm": 0.4025840163230896, "learning_rate": 3.5506040593281223e-06, "loss": 0.2979, "step": 38950 }, { "epoch": 0.7231465563654051, "grad_norm": 0.5188094973564148, "learning_rate": 3.5497126319073894e-06, "loss": 0.3105, "step": 38952 }, { "epoch": 0.7231836865028237, "grad_norm": 0.4124195873737335, "learning_rate": 3.5488212922557463e-06, "loss": 0.3394, "step": 38954 }, { "epoch": 0.7232208166402424, "grad_norm": 0.4021106958389282, "learning_rate": 3.547930040385327e-06, "loss": 0.3372, "step": 38956 }, { "epoch": 0.723257946777661, "grad_norm": 0.4518987834453583, "learning_rate": 3.5470388763082485e-06, "loss": 0.2095, "step": 38958 }, { "epoch": 0.7232950769150797, "grad_norm": 0.30070164799690247, "learning_rate": 3.546147800036642e-06, "loss": 0.2437, "step": 38960 }, { "epoch": 0.7233322070524983, "grad_norm": 0.43122047185897827, "learning_rate": 3.5452568115826336e-06, "loss": 0.3089, "step": 38962 }, { "epoch": 0.7233693371899169, "grad_norm": 1.0666844844818115, "learning_rate": 3.544365910958345e-06, "loss": 0.3335, "step": 38964 }, { "epoch": 0.7234064673273356, "grad_norm": 0.4523346722126007, "learning_rate": 3.5434750981759035e-06, "loss": 0.2773, "step": 38966 }, { "epoch": 0.7234435974647542, "grad_norm": 0.4578306972980499, "learning_rate": 3.542584373247423e-06, "loss": 0.3336, "step": 38968 }, { "epoch": 0.7234807276021729, "grad_norm": 0.37370413541793823, "learning_rate": 3.54169373618503e-06, "loss": 0.2858, "step": 38970 }, { "epoch": 0.7235178577395915, "grad_norm": 0.35634171962738037, "learning_rate": 3.5408031870008363e-06, "loss": 0.3599, "step": 38972 }, { "epoch": 0.7235549878770101, "grad_norm": 0.19497917592525482, "learning_rate": 3.5399127257069643e-06, "loss": 0.4497, "step": 38974 }, { "epoch": 0.7235921180144288, "grad_norm": 0.5412720441818237, "learning_rate": 3.5390223523155296e-06, "loss": 0.1097, "step": 38976 }, { "epoch": 0.7236292481518474, "grad_norm": 0.3420836329460144, "learning_rate": 3.5381320668386487e-06, "loss": 0.3406, "step": 38978 }, { "epoch": 0.723666378289266, "grad_norm": 0.4148283898830414, "learning_rate": 3.537241869288438e-06, "loss": 0.2928, "step": 38980 }, { "epoch": 0.7237035084266847, "grad_norm": 0.3723365366458893, "learning_rate": 3.5363517596770035e-06, "loss": 0.1594, "step": 38982 }, { "epoch": 0.7237406385641033, "grad_norm": 0.3955720365047455, "learning_rate": 3.5354617380164614e-06, "loss": 0.1664, "step": 38984 }, { "epoch": 0.723777768701522, "grad_norm": 0.37481918931007385, "learning_rate": 3.5345718043189204e-06, "loss": 0.3109, "step": 38986 }, { "epoch": 0.7238148988389406, "grad_norm": 0.32017987966537476, "learning_rate": 3.5336819585964943e-06, "loss": 0.1629, "step": 38988 }, { "epoch": 0.7238520289763593, "grad_norm": 0.1633184552192688, "learning_rate": 3.532792200861286e-06, "loss": 0.2089, "step": 38990 }, { "epoch": 0.7238891591137778, "grad_norm": 0.346179336309433, "learning_rate": 3.531902531125403e-06, "loss": 0.4263, "step": 38992 }, { "epoch": 0.7239262892511965, "grad_norm": 0.42145711183547974, "learning_rate": 3.5310129494009558e-06, "loss": 0.224, "step": 38994 }, { "epoch": 0.7239634193886152, "grad_norm": 0.3653205633163452, "learning_rate": 3.530123455700043e-06, "loss": 0.4381, "step": 38996 }, { "epoch": 0.7240005495260338, "grad_norm": 0.42879360914230347, "learning_rate": 3.5292340500347687e-06, "loss": 0.4225, "step": 38998 }, { "epoch": 0.7240376796634524, "grad_norm": 0.472304105758667, "learning_rate": 3.528344732417238e-06, "loss": 0.3585, "step": 39000 }, { "epoch": 0.724074809800871, "grad_norm": 0.2679572105407715, "learning_rate": 3.5274555028595515e-06, "loss": 0.2104, "step": 39002 }, { "epoch": 0.7241119399382897, "grad_norm": 0.5813097357749939, "learning_rate": 3.5265663613738067e-06, "loss": 0.1899, "step": 39004 }, { "epoch": 0.7241490700757084, "grad_norm": 0.3221306800842285, "learning_rate": 3.5256773079721074e-06, "loss": 0.3697, "step": 39006 }, { "epoch": 0.724186200213127, "grad_norm": 0.6098265051841736, "learning_rate": 3.524788342666544e-06, "loss": 0.236, "step": 39008 }, { "epoch": 0.7242233303505456, "grad_norm": 0.3353172838687897, "learning_rate": 3.523899465469218e-06, "loss": 0.2855, "step": 39010 }, { "epoch": 0.7242604604879642, "grad_norm": 0.3790881037712097, "learning_rate": 3.52301067639222e-06, "loss": 0.2982, "step": 39012 }, { "epoch": 0.7242975906253829, "grad_norm": 0.3624052405357361, "learning_rate": 3.5221219754476454e-06, "loss": 0.3355, "step": 39014 }, { "epoch": 0.7243347207628016, "grad_norm": 0.319830983877182, "learning_rate": 3.521233362647587e-06, "loss": 0.2454, "step": 39016 }, { "epoch": 0.7243718509002202, "grad_norm": 0.3455754220485687, "learning_rate": 3.5203448380041405e-06, "loss": 0.3669, "step": 39018 }, { "epoch": 0.7244089810376388, "grad_norm": 0.2667527198791504, "learning_rate": 3.5194564015293876e-06, "loss": 0.1728, "step": 39020 }, { "epoch": 0.7244461111750574, "grad_norm": 0.3749663531780243, "learning_rate": 3.5185680532354228e-06, "loss": 0.1594, "step": 39022 }, { "epoch": 0.7244832413124761, "grad_norm": 0.3437434732913971, "learning_rate": 3.517679793134332e-06, "loss": 0.125, "step": 39024 }, { "epoch": 0.7245203714498948, "grad_norm": 0.3875744342803955, "learning_rate": 3.5167916212382013e-06, "loss": 0.3497, "step": 39026 }, { "epoch": 0.7245575015873134, "grad_norm": 0.2606813311576843, "learning_rate": 3.515903537559119e-06, "loss": 0.2169, "step": 39028 }, { "epoch": 0.724594631724732, "grad_norm": 0.33072134852409363, "learning_rate": 3.5150155421091703e-06, "loss": 0.3459, "step": 39030 }, { "epoch": 0.7246317618621506, "grad_norm": 0.3636607229709625, "learning_rate": 3.514127634900435e-06, "loss": 0.2582, "step": 39032 }, { "epoch": 0.7246688919995693, "grad_norm": 0.37538769841194153, "learning_rate": 3.513239815944991e-06, "loss": 0.3284, "step": 39034 }, { "epoch": 0.724706022136988, "grad_norm": 0.3180486857891083, "learning_rate": 3.512352085254924e-06, "loss": 0.4365, "step": 39036 }, { "epoch": 0.7247431522744066, "grad_norm": 0.4815988540649414, "learning_rate": 3.511464442842313e-06, "loss": 0.3549, "step": 39038 }, { "epoch": 0.7247802824118252, "grad_norm": 0.6721015572547913, "learning_rate": 3.510576888719234e-06, "loss": 0.2752, "step": 39040 }, { "epoch": 0.7248174125492438, "grad_norm": 0.3308669626712799, "learning_rate": 3.5096894228977664e-06, "loss": 0.1925, "step": 39042 }, { "epoch": 0.7248545426866625, "grad_norm": 0.5335550308227539, "learning_rate": 3.5088020453899887e-06, "loss": 0.3357, "step": 39044 }, { "epoch": 0.7248916728240811, "grad_norm": 0.39075931906700134, "learning_rate": 3.5079147562079674e-06, "loss": 0.2614, "step": 39046 }, { "epoch": 0.7249288029614998, "grad_norm": 0.4553295373916626, "learning_rate": 3.50702755536378e-06, "loss": 0.1827, "step": 39048 }, { "epoch": 0.7249659330989184, "grad_norm": 0.29857999086380005, "learning_rate": 3.5061404428695002e-06, "loss": 0.1295, "step": 39050 }, { "epoch": 0.725003063236337, "grad_norm": 0.49474528431892395, "learning_rate": 3.5052534187372002e-06, "loss": 0.2598, "step": 39052 }, { "epoch": 0.7250401933737557, "grad_norm": 0.3226276636123657, "learning_rate": 3.5043664829789435e-06, "loss": 0.2712, "step": 39054 }, { "epoch": 0.7250773235111743, "grad_norm": 0.4511318802833557, "learning_rate": 3.5034796356068067e-06, "loss": 0.308, "step": 39056 }, { "epoch": 0.725114453648593, "grad_norm": 0.3627657890319824, "learning_rate": 3.502592876632849e-06, "loss": 0.3243, "step": 39058 }, { "epoch": 0.7251515837860116, "grad_norm": 0.5099171996116638, "learning_rate": 3.5017062060691396e-06, "loss": 0.1566, "step": 39060 }, { "epoch": 0.7251887139234302, "grad_norm": 0.37432652711868286, "learning_rate": 3.5008196239277447e-06, "loss": 0.1857, "step": 39062 }, { "epoch": 0.7252258440608489, "grad_norm": 0.4011692404747009, "learning_rate": 3.4999331302207273e-06, "loss": 0.3195, "step": 39064 }, { "epoch": 0.7252629741982675, "grad_norm": 0.6910839080810547, "learning_rate": 3.49904672496015e-06, "loss": 0.2733, "step": 39066 }, { "epoch": 0.7253001043356861, "grad_norm": 0.3382725417613983, "learning_rate": 3.498160408158077e-06, "loss": 0.2549, "step": 39068 }, { "epoch": 0.7253372344731048, "grad_norm": 0.3551293611526489, "learning_rate": 3.497274179826563e-06, "loss": 0.2115, "step": 39070 }, { "epoch": 0.7253743646105234, "grad_norm": 0.3523249626159668, "learning_rate": 3.4963880399776696e-06, "loss": 0.2902, "step": 39072 }, { "epoch": 0.7254114947479421, "grad_norm": 0.4653478264808655, "learning_rate": 3.4955019886234563e-06, "loss": 0.1894, "step": 39074 }, { "epoch": 0.7254486248853607, "grad_norm": 0.38542091846466064, "learning_rate": 3.4946160257759753e-06, "loss": 0.2841, "step": 39076 }, { "epoch": 0.7254857550227793, "grad_norm": 0.49824249744415283, "learning_rate": 3.4937301514472854e-06, "loss": 0.5006, "step": 39078 }, { "epoch": 0.725522885160198, "grad_norm": 0.2981157898902893, "learning_rate": 3.492844365649438e-06, "loss": 0.0844, "step": 39080 }, { "epoch": 0.7255600152976166, "grad_norm": 0.3175773620605469, "learning_rate": 3.491958668394492e-06, "loss": 0.1994, "step": 39082 }, { "epoch": 0.7255971454350353, "grad_norm": 0.3956991136074066, "learning_rate": 3.491073059694491e-06, "loss": 0.3547, "step": 39084 }, { "epoch": 0.7256342755724539, "grad_norm": 0.3419788181781769, "learning_rate": 3.490187539561489e-06, "loss": 0.2259, "step": 39086 }, { "epoch": 0.7256714057098725, "grad_norm": 0.4093378186225891, "learning_rate": 3.489302108007535e-06, "loss": 0.233, "step": 39088 }, { "epoch": 0.7257085358472912, "grad_norm": 0.29068395495414734, "learning_rate": 3.488416765044679e-06, "loss": 0.4468, "step": 39090 }, { "epoch": 0.7257456659847098, "grad_norm": 0.6384782195091248, "learning_rate": 3.4875315106849682e-06, "loss": 0.2849, "step": 39092 }, { "epoch": 0.7257827961221285, "grad_norm": 0.618625283241272, "learning_rate": 3.486646344940443e-06, "loss": 0.3089, "step": 39094 }, { "epoch": 0.725819926259547, "grad_norm": 0.3980785310268402, "learning_rate": 3.4857612678231554e-06, "loss": 0.2213, "step": 39096 }, { "epoch": 0.7258570563969657, "grad_norm": 0.4390384256839752, "learning_rate": 3.4848762793451417e-06, "loss": 0.3302, "step": 39098 }, { "epoch": 0.7258941865343843, "grad_norm": 0.3453618288040161, "learning_rate": 3.4839913795184453e-06, "loss": 0.2619, "step": 39100 }, { "epoch": 0.725931316671803, "grad_norm": 0.6470021605491638, "learning_rate": 3.4831065683551104e-06, "loss": 0.363, "step": 39102 }, { "epoch": 0.7259684468092217, "grad_norm": 0.332330584526062, "learning_rate": 3.482221845867173e-06, "loss": 0.4367, "step": 39104 }, { "epoch": 0.7260055769466403, "grad_norm": 0.3575783371925354, "learning_rate": 3.481337212066678e-06, "loss": 0.393, "step": 39106 }, { "epoch": 0.7260427070840589, "grad_norm": 0.5372342467308044, "learning_rate": 3.4804526669656536e-06, "loss": 0.2858, "step": 39108 }, { "epoch": 0.7260798372214775, "grad_norm": 0.3620225191116333, "learning_rate": 3.479568210576141e-06, "loss": 0.2535, "step": 39110 }, { "epoch": 0.7261169673588962, "grad_norm": 0.3068416714668274, "learning_rate": 3.478683842910173e-06, "loss": 0.2907, "step": 39112 }, { "epoch": 0.7261540974963149, "grad_norm": 0.5327022671699524, "learning_rate": 3.4777995639797844e-06, "loss": 0.3093, "step": 39114 }, { "epoch": 0.7261912276337334, "grad_norm": 0.34905552864074707, "learning_rate": 3.4769153737970117e-06, "loss": 0.3011, "step": 39116 }, { "epoch": 0.7262283577711521, "grad_norm": 0.5817269682884216, "learning_rate": 3.4760312723738776e-06, "loss": 0.1598, "step": 39118 }, { "epoch": 0.7262654879085707, "grad_norm": 0.4042501449584961, "learning_rate": 3.47514725972242e-06, "loss": 0.2427, "step": 39120 }, { "epoch": 0.7263026180459894, "grad_norm": 0.4381115138530731, "learning_rate": 3.4742633358546605e-06, "loss": 0.2477, "step": 39122 }, { "epoch": 0.7263397481834081, "grad_norm": 0.5624004602432251, "learning_rate": 3.473379500782631e-06, "loss": 0.2253, "step": 39124 }, { "epoch": 0.7263768783208266, "grad_norm": 0.38847243785858154, "learning_rate": 3.4724957545183556e-06, "loss": 0.3138, "step": 39126 }, { "epoch": 0.7264140084582453, "grad_norm": 0.3769180178642273, "learning_rate": 3.471612097073862e-06, "loss": 0.1553, "step": 39128 }, { "epoch": 0.7264511385956639, "grad_norm": 0.3771804869174957, "learning_rate": 3.4707285284611724e-06, "loss": 0.0705, "step": 39130 }, { "epoch": 0.7264882687330826, "grad_norm": 0.2771605849266052, "learning_rate": 3.469845048692314e-06, "loss": 0.2467, "step": 39132 }, { "epoch": 0.7265253988705013, "grad_norm": 0.4463084936141968, "learning_rate": 3.4689616577793007e-06, "loss": 0.192, "step": 39134 }, { "epoch": 0.7265625290079198, "grad_norm": 0.36550235748291016, "learning_rate": 3.4680783557341567e-06, "loss": 0.2851, "step": 39136 }, { "epoch": 0.7265996591453385, "grad_norm": 0.7167015075683594, "learning_rate": 3.467195142568904e-06, "loss": 0.2407, "step": 39138 }, { "epoch": 0.7266367892827571, "grad_norm": 0.5674812197685242, "learning_rate": 3.4663120182955547e-06, "loss": 0.3297, "step": 39140 }, { "epoch": 0.7266739194201758, "grad_norm": 0.2951454222202301, "learning_rate": 3.4654289829261265e-06, "loss": 0.2042, "step": 39142 }, { "epoch": 0.7267110495575944, "grad_norm": 0.29790204763412476, "learning_rate": 3.464546036472641e-06, "loss": 0.1843, "step": 39144 }, { "epoch": 0.726748179695013, "grad_norm": 0.38922035694122314, "learning_rate": 3.4636631789471053e-06, "loss": 0.3293, "step": 39146 }, { "epoch": 0.7267853098324317, "grad_norm": 0.5129856467247009, "learning_rate": 3.462780410361535e-06, "loss": 0.166, "step": 39148 }, { "epoch": 0.7268224399698503, "grad_norm": 0.36723607778549194, "learning_rate": 3.4618977307279412e-06, "loss": 0.149, "step": 39150 }, { "epoch": 0.726859570107269, "grad_norm": 0.30715444684028625, "learning_rate": 3.461015140058336e-06, "loss": 0.3101, "step": 39152 }, { "epoch": 0.7268967002446876, "grad_norm": 0.3355923891067505, "learning_rate": 3.460132638364727e-06, "loss": 0.3299, "step": 39154 }, { "epoch": 0.7269338303821062, "grad_norm": 0.5309499502182007, "learning_rate": 3.4592502256591287e-06, "loss": 0.1902, "step": 39156 }, { "epoch": 0.7269709605195249, "grad_norm": 0.5021377205848694, "learning_rate": 3.4583679019535386e-06, "loss": 0.3357, "step": 39158 }, { "epoch": 0.7270080906569435, "grad_norm": 0.3348003625869751, "learning_rate": 3.457485667259971e-06, "loss": 0.2555, "step": 39160 }, { "epoch": 0.7270452207943622, "grad_norm": 0.5919007062911987, "learning_rate": 3.4566035215904227e-06, "loss": 0.2806, "step": 39162 }, { "epoch": 0.7270823509317808, "grad_norm": 0.37456679344177246, "learning_rate": 3.4557214649569005e-06, "loss": 0.1653, "step": 39164 }, { "epoch": 0.7271194810691994, "grad_norm": 0.30915290117263794, "learning_rate": 3.454839497371406e-06, "loss": 0.3886, "step": 39166 }, { "epoch": 0.7271566112066181, "grad_norm": 0.43398627638816833, "learning_rate": 3.4539576188459425e-06, "loss": 0.5498, "step": 39168 }, { "epoch": 0.7271937413440367, "grad_norm": 0.3363274931907654, "learning_rate": 3.4530758293925103e-06, "loss": 0.1289, "step": 39170 }, { "epoch": 0.7272308714814554, "grad_norm": 0.40138885378837585, "learning_rate": 3.452194129023103e-06, "loss": 0.367, "step": 39172 }, { "epoch": 0.727268001618874, "grad_norm": 0.2927631437778473, "learning_rate": 3.451312517749721e-06, "loss": 0.098, "step": 39174 }, { "epoch": 0.7273051317562926, "grad_norm": 0.43219462037086487, "learning_rate": 3.4504309955843595e-06, "loss": 0.4389, "step": 39176 }, { "epoch": 0.7273422618937113, "grad_norm": 0.4215664267539978, "learning_rate": 3.4495495625390172e-06, "loss": 0.3566, "step": 39178 }, { "epoch": 0.7273793920311299, "grad_norm": 0.4232175350189209, "learning_rate": 3.4486682186256815e-06, "loss": 0.2824, "step": 39180 }, { "epoch": 0.7274165221685486, "grad_norm": 0.3134605288505554, "learning_rate": 3.4477869638563512e-06, "loss": 0.2967, "step": 39182 }, { "epoch": 0.7274536523059671, "grad_norm": 0.6582512259483337, "learning_rate": 3.44690579824301e-06, "loss": 0.1889, "step": 39184 }, { "epoch": 0.7274907824433858, "grad_norm": 0.4590010344982147, "learning_rate": 3.446024721797653e-06, "loss": 0.2772, "step": 39186 }, { "epoch": 0.7275279125808045, "grad_norm": 0.35719501972198486, "learning_rate": 3.445143734532268e-06, "loss": 0.2911, "step": 39188 }, { "epoch": 0.7275650427182231, "grad_norm": 0.19828538596630096, "learning_rate": 3.4442628364588425e-06, "loss": 0.0846, "step": 39190 }, { "epoch": 0.7276021728556418, "grad_norm": 0.4374946057796478, "learning_rate": 3.4433820275893626e-06, "loss": 0.294, "step": 39192 }, { "epoch": 0.7276393029930603, "grad_norm": 0.505297064781189, "learning_rate": 3.4425013079358184e-06, "loss": 0.4135, "step": 39194 }, { "epoch": 0.727676433130479, "grad_norm": 0.39555108547210693, "learning_rate": 3.4416206775101846e-06, "loss": 0.1913, "step": 39196 }, { "epoch": 0.7277135632678976, "grad_norm": 0.3262774348258972, "learning_rate": 3.44074013632445e-06, "loss": 0.1972, "step": 39198 }, { "epoch": 0.7277506934053163, "grad_norm": 0.31796813011169434, "learning_rate": 3.4398596843905976e-06, "loss": 0.3438, "step": 39200 }, { "epoch": 0.727787823542735, "grad_norm": 0.37448248267173767, "learning_rate": 3.4389793217206026e-06, "loss": 0.4026, "step": 39202 }, { "epoch": 0.7278249536801535, "grad_norm": 0.5521661639213562, "learning_rate": 3.4380990483264453e-06, "loss": 0.1558, "step": 39204 }, { "epoch": 0.7278620838175722, "grad_norm": 0.5784611105918884, "learning_rate": 3.4372188642201055e-06, "loss": 0.1396, "step": 39206 }, { "epoch": 0.7278992139549908, "grad_norm": 0.3011987507343292, "learning_rate": 3.436338769413562e-06, "loss": 0.1555, "step": 39208 }, { "epoch": 0.7279363440924095, "grad_norm": 0.39167916774749756, "learning_rate": 3.4354587639187843e-06, "loss": 0.2627, "step": 39210 }, { "epoch": 0.7279734742298282, "grad_norm": 0.3705577254295349, "learning_rate": 3.4345788477477502e-06, "loss": 0.3626, "step": 39212 }, { "epoch": 0.7280106043672467, "grad_norm": 0.41052165627479553, "learning_rate": 3.433699020912432e-06, "loss": 0.1662, "step": 39214 }, { "epoch": 0.7280477345046654, "grad_norm": 0.32566291093826294, "learning_rate": 3.4328192834248007e-06, "loss": 0.3315, "step": 39216 }, { "epoch": 0.728084864642084, "grad_norm": 0.30807965993881226, "learning_rate": 3.431939635296829e-06, "loss": 0.2695, "step": 39218 }, { "epoch": 0.7281219947795027, "grad_norm": 0.43532320857048035, "learning_rate": 3.431060076540489e-06, "loss": 0.2736, "step": 39220 }, { "epoch": 0.7281591249169214, "grad_norm": 0.2833642363548279, "learning_rate": 3.430180607167741e-06, "loss": 0.0409, "step": 39222 }, { "epoch": 0.7281962550543399, "grad_norm": 0.3956841826438904, "learning_rate": 3.4293012271905602e-06, "loss": 0.5156, "step": 39224 }, { "epoch": 0.7282333851917586, "grad_norm": 0.3698021471500397, "learning_rate": 3.4284219366209036e-06, "loss": 0.4876, "step": 39226 }, { "epoch": 0.7282705153291772, "grad_norm": 0.42781808972358704, "learning_rate": 3.4275427354707414e-06, "loss": 0.1689, "step": 39228 }, { "epoch": 0.7283076454665959, "grad_norm": 0.373699426651001, "learning_rate": 3.4266636237520357e-06, "loss": 0.2146, "step": 39230 }, { "epoch": 0.7283447756040146, "grad_norm": 0.38557422161102295, "learning_rate": 3.4257846014767515e-06, "loss": 0.2868, "step": 39232 }, { "epoch": 0.7283819057414331, "grad_norm": 0.3292366564273834, "learning_rate": 3.424905668656844e-06, "loss": 0.372, "step": 39234 }, { "epoch": 0.7284190358788518, "grad_norm": 0.3065301775932312, "learning_rate": 3.4240268253042754e-06, "loss": 0.2086, "step": 39236 }, { "epoch": 0.7284561660162704, "grad_norm": 0.21838930249214172, "learning_rate": 3.423148071431004e-06, "loss": 0.2176, "step": 39238 }, { "epoch": 0.7284932961536891, "grad_norm": 0.42174071073532104, "learning_rate": 3.4222694070489872e-06, "loss": 0.3312, "step": 39240 }, { "epoch": 0.7285304262911078, "grad_norm": 0.5030283331871033, "learning_rate": 3.4213908321701848e-06, "loss": 0.1766, "step": 39242 }, { "epoch": 0.7285675564285263, "grad_norm": 0.21340882778167725, "learning_rate": 3.420512346806545e-06, "loss": 0.2802, "step": 39244 }, { "epoch": 0.728604686565945, "grad_norm": 0.5842553377151489, "learning_rate": 3.4196339509700284e-06, "loss": 0.3174, "step": 39246 }, { "epoch": 0.7286418167033636, "grad_norm": 0.3922097980976105, "learning_rate": 3.418755644672579e-06, "loss": 0.1877, "step": 39248 }, { "epoch": 0.7286789468407823, "grad_norm": 0.5512311458587646, "learning_rate": 3.4178774279261516e-06, "loss": 0.3478, "step": 39250 }, { "epoch": 0.7287160769782008, "grad_norm": 0.4869624376296997, "learning_rate": 3.416999300742697e-06, "loss": 0.1459, "step": 39252 }, { "epoch": 0.7287532071156195, "grad_norm": 0.42814716696739197, "learning_rate": 3.4161212631341645e-06, "loss": 0.3873, "step": 39254 }, { "epoch": 0.7287903372530382, "grad_norm": 0.2851299047470093, "learning_rate": 3.4152433151124996e-06, "loss": 0.2699, "step": 39256 }, { "epoch": 0.7288274673904568, "grad_norm": 0.5726222395896912, "learning_rate": 3.4143654566896533e-06, "loss": 0.4267, "step": 39258 }, { "epoch": 0.7288645975278755, "grad_norm": 0.41617101430892944, "learning_rate": 3.4134876878775637e-06, "loss": 0.2671, "step": 39260 }, { "epoch": 0.728901727665294, "grad_norm": 0.4523519277572632, "learning_rate": 3.412610008688176e-06, "loss": 0.2047, "step": 39262 }, { "epoch": 0.7289388578027127, "grad_norm": 0.41227251291275024, "learning_rate": 3.41173241913344e-06, "loss": 0.2582, "step": 39264 }, { "epoch": 0.7289759879401314, "grad_norm": 0.5662005543708801, "learning_rate": 3.410854919225288e-06, "loss": 0.3484, "step": 39266 }, { "epoch": 0.72901311807755, "grad_norm": 0.3841313123703003, "learning_rate": 3.409977508975664e-06, "loss": 0.3902, "step": 39268 }, { "epoch": 0.7290502482149687, "grad_norm": 0.3165387809276581, "learning_rate": 3.409100188396509e-06, "loss": 0.4446, "step": 39270 }, { "epoch": 0.7290873783523872, "grad_norm": 0.428327739238739, "learning_rate": 3.408222957499755e-06, "loss": 0.306, "step": 39272 }, { "epoch": 0.7291245084898059, "grad_norm": 0.4113166034221649, "learning_rate": 3.4073458162973426e-06, "loss": 0.197, "step": 39274 }, { "epoch": 0.7291616386272246, "grad_norm": 0.4515472948551178, "learning_rate": 3.4064687648012065e-06, "loss": 0.0695, "step": 39276 }, { "epoch": 0.7291987687646432, "grad_norm": 0.36745816469192505, "learning_rate": 3.4055918030232803e-06, "loss": 0.174, "step": 39278 }, { "epoch": 0.7292358989020619, "grad_norm": 0.25630059838294983, "learning_rate": 3.404714930975497e-06, "loss": 0.1856, "step": 39280 }, { "epoch": 0.7292730290394804, "grad_norm": 0.3291914761066437, "learning_rate": 3.403838148669789e-06, "loss": 0.2835, "step": 39282 }, { "epoch": 0.7293101591768991, "grad_norm": 0.2569425106048584, "learning_rate": 3.402961456118088e-06, "loss": 0.117, "step": 39284 }, { "epoch": 0.7293472893143178, "grad_norm": 0.352387934923172, "learning_rate": 3.4020848533323224e-06, "loss": 0.3116, "step": 39286 }, { "epoch": 0.7293844194517364, "grad_norm": 0.32251402735710144, "learning_rate": 3.4012083403244156e-06, "loss": 0.2413, "step": 39288 }, { "epoch": 0.7294215495891551, "grad_norm": 0.260231614112854, "learning_rate": 3.4003319171062966e-06, "loss": 0.1673, "step": 39290 }, { "epoch": 0.7294586797265736, "grad_norm": 0.58722984790802, "learning_rate": 3.3994555836898924e-06, "loss": 0.2837, "step": 39292 }, { "epoch": 0.7294958098639923, "grad_norm": 0.33602431416511536, "learning_rate": 3.398579340087127e-06, "loss": 0.1967, "step": 39294 }, { "epoch": 0.7295329400014109, "grad_norm": 0.3525410592556, "learning_rate": 3.3977031863099264e-06, "loss": 0.2276, "step": 39296 }, { "epoch": 0.7295700701388296, "grad_norm": 0.30403149127960205, "learning_rate": 3.3968271223702052e-06, "loss": 0.3183, "step": 39298 }, { "epoch": 0.7296072002762483, "grad_norm": 0.7099027037620544, "learning_rate": 3.395951148279889e-06, "loss": 0.3452, "step": 39300 }, { "epoch": 0.7296443304136668, "grad_norm": 0.5833398103713989, "learning_rate": 3.3950752640508965e-06, "loss": 0.1752, "step": 39302 }, { "epoch": 0.7296814605510855, "grad_norm": 0.5422753095626831, "learning_rate": 3.3941994696951452e-06, "loss": 0.3048, "step": 39304 }, { "epoch": 0.7297185906885041, "grad_norm": 0.34894996881484985, "learning_rate": 3.3933237652245555e-06, "loss": 0.1206, "step": 39306 }, { "epoch": 0.7297557208259228, "grad_norm": 0.3858398199081421, "learning_rate": 3.392448150651041e-06, "loss": 0.1022, "step": 39308 }, { "epoch": 0.7297928509633415, "grad_norm": 0.259263277053833, "learning_rate": 3.3915726259865113e-06, "loss": 0.1286, "step": 39310 }, { "epoch": 0.72982998110076, "grad_norm": 0.2320471554994583, "learning_rate": 3.3906971912428834e-06, "loss": 0.3445, "step": 39312 }, { "epoch": 0.7298671112381787, "grad_norm": 0.22795124351978302, "learning_rate": 3.3898218464320698e-06, "loss": 0.1224, "step": 39314 }, { "epoch": 0.7299042413755973, "grad_norm": 0.3710617423057556, "learning_rate": 3.3889465915659815e-06, "loss": 0.2996, "step": 39316 }, { "epoch": 0.729941371513016, "grad_norm": 0.42007747292518616, "learning_rate": 3.388071426656526e-06, "loss": 0.2574, "step": 39318 }, { "epoch": 0.7299785016504347, "grad_norm": 0.28993046283721924, "learning_rate": 3.387196351715619e-06, "loss": 0.1761, "step": 39320 }, { "epoch": 0.7300156317878532, "grad_norm": 0.28955498337745667, "learning_rate": 3.3863213667551577e-06, "loss": 0.231, "step": 39322 }, { "epoch": 0.7300527619252719, "grad_norm": 0.5247309803962708, "learning_rate": 3.3854464717870515e-06, "loss": 0.3921, "step": 39324 }, { "epoch": 0.7300898920626905, "grad_norm": 0.5087648630142212, "learning_rate": 3.3845716668232077e-06, "loss": 0.4016, "step": 39326 }, { "epoch": 0.7301270222001092, "grad_norm": 0.30517423152923584, "learning_rate": 3.3836969518755305e-06, "loss": 0.2436, "step": 39328 }, { "epoch": 0.7301641523375278, "grad_norm": 0.39813855290412903, "learning_rate": 3.3828223269559156e-06, "loss": 0.2611, "step": 39330 }, { "epoch": 0.7302012824749464, "grad_norm": 0.4249803423881531, "learning_rate": 3.381947792076269e-06, "loss": 0.2707, "step": 39332 }, { "epoch": 0.7302384126123651, "grad_norm": 0.36574143171310425, "learning_rate": 3.3810733472484925e-06, "loss": 0.1892, "step": 39334 }, { "epoch": 0.7302755427497837, "grad_norm": 0.37473776936531067, "learning_rate": 3.380198992484478e-06, "loss": 0.243, "step": 39336 }, { "epoch": 0.7303126728872024, "grad_norm": 0.6076406836509705, "learning_rate": 3.379324727796127e-06, "loss": 0.1917, "step": 39338 }, { "epoch": 0.730349803024621, "grad_norm": 0.40308690071105957, "learning_rate": 3.378450553195336e-06, "loss": 0.3379, "step": 39340 }, { "epoch": 0.7303869331620396, "grad_norm": 0.4628306031227112, "learning_rate": 3.377576468693998e-06, "loss": 0.3224, "step": 39342 }, { "epoch": 0.7304240632994583, "grad_norm": 0.29092615842819214, "learning_rate": 3.3767024743040087e-06, "loss": 0.2771, "step": 39344 }, { "epoch": 0.7304611934368769, "grad_norm": 0.48770588636398315, "learning_rate": 3.3758285700372627e-06, "loss": 0.4219, "step": 39346 }, { "epoch": 0.7304983235742956, "grad_norm": 0.8077456951141357, "learning_rate": 3.374954755905645e-06, "loss": 0.3397, "step": 39348 }, { "epoch": 0.7305354537117141, "grad_norm": 0.5066648125648499, "learning_rate": 3.3740810319210517e-06, "loss": 0.4439, "step": 39350 }, { "epoch": 0.7305725838491328, "grad_norm": 0.29786914587020874, "learning_rate": 3.373207398095365e-06, "loss": 0.4096, "step": 39352 }, { "epoch": 0.7306097139865515, "grad_norm": 0.37848058342933655, "learning_rate": 3.372333854440477e-06, "loss": 0.5382, "step": 39354 }, { "epoch": 0.7306468441239701, "grad_norm": 0.4330310523509979, "learning_rate": 3.3714604009682715e-06, "loss": 0.3551, "step": 39356 }, { "epoch": 0.7306839742613888, "grad_norm": 0.5164570808410645, "learning_rate": 3.37058703769064e-06, "loss": 0.1785, "step": 39358 }, { "epoch": 0.7307211043988073, "grad_norm": 0.4931236505508423, "learning_rate": 3.3697137646194577e-06, "loss": 0.1962, "step": 39360 }, { "epoch": 0.730758234536226, "grad_norm": 0.24815839529037476, "learning_rate": 3.3688405817666114e-06, "loss": 0.2879, "step": 39362 }, { "epoch": 0.7307953646736447, "grad_norm": 0.4302636981010437, "learning_rate": 3.367967489143982e-06, "loss": 0.0748, "step": 39364 }, { "epoch": 0.7308324948110633, "grad_norm": 0.2541126012802124, "learning_rate": 3.367094486763449e-06, "loss": 0.294, "step": 39366 }, { "epoch": 0.730869624948482, "grad_norm": 0.42234769463539124, "learning_rate": 3.366221574636893e-06, "loss": 0.1281, "step": 39368 }, { "epoch": 0.7309067550859005, "grad_norm": 0.39338475465774536, "learning_rate": 3.365348752776195e-06, "loss": 0.1647, "step": 39370 }, { "epoch": 0.7309438852233192, "grad_norm": 0.4355759918689728, "learning_rate": 3.364476021193226e-06, "loss": 0.3694, "step": 39372 }, { "epoch": 0.7309810153607379, "grad_norm": 0.32050520181655884, "learning_rate": 3.3636033798998602e-06, "loss": 0.304, "step": 39374 }, { "epoch": 0.7310181454981565, "grad_norm": 0.37411239743232727, "learning_rate": 3.362730828907974e-06, "loss": 0.3821, "step": 39376 }, { "epoch": 0.7310552756355752, "grad_norm": 0.6035988330841064, "learning_rate": 3.36185836822944e-06, "loss": 0.4661, "step": 39378 }, { "epoch": 0.7310924057729937, "grad_norm": 0.36967048048973083, "learning_rate": 3.3609859978761294e-06, "loss": 0.2105, "step": 39380 }, { "epoch": 0.7311295359104124, "grad_norm": 0.320677787065506, "learning_rate": 3.360113717859914e-06, "loss": 0.2503, "step": 39382 }, { "epoch": 0.7311666660478311, "grad_norm": 0.2092316448688507, "learning_rate": 3.359241528192666e-06, "loss": 0.1744, "step": 39384 }, { "epoch": 0.7312037961852497, "grad_norm": 0.34371185302734375, "learning_rate": 3.3583694288862454e-06, "loss": 0.2696, "step": 39386 }, { "epoch": 0.7312409263226683, "grad_norm": 0.5447477102279663, "learning_rate": 3.3574974199525234e-06, "loss": 0.2308, "step": 39388 }, { "epoch": 0.7312780564600869, "grad_norm": 0.41978776454925537, "learning_rate": 3.3566255014033645e-06, "loss": 0.1992, "step": 39390 }, { "epoch": 0.7313151865975056, "grad_norm": 0.7561306953430176, "learning_rate": 3.3557536732506367e-06, "loss": 0.3107, "step": 39392 }, { "epoch": 0.7313523167349243, "grad_norm": 0.2962585687637329, "learning_rate": 3.354881935506196e-06, "loss": 0.2137, "step": 39394 }, { "epoch": 0.7313894468723429, "grad_norm": 0.4822298288345337, "learning_rate": 3.3540102881819114e-06, "loss": 0.1435, "step": 39396 }, { "epoch": 0.7314265770097615, "grad_norm": 0.6164640188217163, "learning_rate": 3.353138731289636e-06, "loss": 0.2851, "step": 39398 }, { "epoch": 0.7314637071471801, "grad_norm": 0.41828978061676025, "learning_rate": 3.3522672648412336e-06, "loss": 0.2906, "step": 39400 }, { "epoch": 0.7315008372845988, "grad_norm": 0.29566556215286255, "learning_rate": 3.351395888848561e-06, "loss": 0.118, "step": 39402 }, { "epoch": 0.7315379674220174, "grad_norm": 0.4122690260410309, "learning_rate": 3.350524603323475e-06, "loss": 0.2738, "step": 39404 }, { "epoch": 0.7315750975594361, "grad_norm": 0.47900405526161194, "learning_rate": 3.349653408277832e-06, "loss": 0.2906, "step": 39406 }, { "epoch": 0.7316122276968547, "grad_norm": 0.36774539947509766, "learning_rate": 3.348782303723486e-06, "loss": 0.1752, "step": 39408 }, { "epoch": 0.7316493578342733, "grad_norm": 0.34023237228393555, "learning_rate": 3.3479112896722934e-06, "loss": 0.1673, "step": 39410 }, { "epoch": 0.731686487971692, "grad_norm": 0.44448593258857727, "learning_rate": 3.3470403661360996e-06, "loss": 0.4472, "step": 39412 }, { "epoch": 0.7317236181091106, "grad_norm": 0.31473252177238464, "learning_rate": 3.346169533126762e-06, "loss": 0.0896, "step": 39414 }, { "epoch": 0.7317607482465293, "grad_norm": 0.3743666708469391, "learning_rate": 3.3452987906561228e-06, "loss": 0.4201, "step": 39416 }, { "epoch": 0.7317978783839479, "grad_norm": 0.4425252377986908, "learning_rate": 3.3444281387360343e-06, "loss": 0.3131, "step": 39418 }, { "epoch": 0.7318350085213665, "grad_norm": 0.5893719792366028, "learning_rate": 3.3435575773783436e-06, "loss": 0.2184, "step": 39420 }, { "epoch": 0.7318721386587852, "grad_norm": 0.42541906237602234, "learning_rate": 3.3426871065948986e-06, "loss": 0.3098, "step": 39422 }, { "epoch": 0.7319092687962038, "grad_norm": 0.5064725875854492, "learning_rate": 3.341816726397538e-06, "loss": 0.2096, "step": 39424 }, { "epoch": 0.7319463989336225, "grad_norm": 0.4209304749965668, "learning_rate": 3.3409464367981083e-06, "loss": 0.1485, "step": 39426 }, { "epoch": 0.7319835290710411, "grad_norm": 0.4730643033981323, "learning_rate": 3.3400762378084507e-06, "loss": 0.1575, "step": 39428 }, { "epoch": 0.7320206592084597, "grad_norm": 0.27889904379844666, "learning_rate": 3.339206129440408e-06, "loss": 0.4311, "step": 39430 }, { "epoch": 0.7320577893458784, "grad_norm": 0.4501406252384186, "learning_rate": 3.33833611170582e-06, "loss": 0.2664, "step": 39432 }, { "epoch": 0.732094919483297, "grad_norm": 0.48181676864624023, "learning_rate": 3.3374661846165214e-06, "loss": 0.3467, "step": 39434 }, { "epoch": 0.7321320496207157, "grad_norm": 0.38327500224113464, "learning_rate": 3.3365963481843544e-06, "loss": 0.4224, "step": 39436 }, { "epoch": 0.7321691797581343, "grad_norm": 0.36210042238235474, "learning_rate": 3.335726602421149e-06, "loss": 0.1029, "step": 39438 }, { "epoch": 0.7322063098955529, "grad_norm": 0.2959725260734558, "learning_rate": 3.3348569473387427e-06, "loss": 0.2102, "step": 39440 }, { "epoch": 0.7322434400329716, "grad_norm": 0.34667396545410156, "learning_rate": 3.3339873829489686e-06, "loss": 0.3824, "step": 39442 }, { "epoch": 0.7322805701703902, "grad_norm": 0.5159398317337036, "learning_rate": 3.333117909263659e-06, "loss": 0.1984, "step": 39444 }, { "epoch": 0.7323177003078088, "grad_norm": 0.306907057762146, "learning_rate": 3.3322485262946456e-06, "loss": 0.2022, "step": 39446 }, { "epoch": 0.7323548304452274, "grad_norm": 0.24252590537071228, "learning_rate": 3.3313792340537597e-06, "loss": 0.1741, "step": 39448 }, { "epoch": 0.7323919605826461, "grad_norm": 0.4049168527126312, "learning_rate": 3.3305100325528263e-06, "loss": 0.5092, "step": 39450 }, { "epoch": 0.7324290907200648, "grad_norm": 0.35631072521209717, "learning_rate": 3.329640921803672e-06, "loss": 0.4262, "step": 39452 }, { "epoch": 0.7324662208574834, "grad_norm": 0.4704231917858124, "learning_rate": 3.3287719018181286e-06, "loss": 0.3182, "step": 39454 }, { "epoch": 0.732503350994902, "grad_norm": 0.3431044816970825, "learning_rate": 3.3279029726080148e-06, "loss": 0.3056, "step": 39456 }, { "epoch": 0.7325404811323206, "grad_norm": 0.5290939807891846, "learning_rate": 3.3270341341851555e-06, "loss": 0.4403, "step": 39458 }, { "epoch": 0.7325776112697393, "grad_norm": 0.3120960295200348, "learning_rate": 3.326165386561376e-06, "loss": 0.2928, "step": 39460 }, { "epoch": 0.732614741407158, "grad_norm": 0.3846617043018341, "learning_rate": 3.325296729748493e-06, "loss": 0.1502, "step": 39462 }, { "epoch": 0.7326518715445766, "grad_norm": 0.5202431082725525, "learning_rate": 3.3244281637583288e-06, "loss": 0.2824, "step": 39464 }, { "epoch": 0.7326890016819952, "grad_norm": 0.5688328146934509, "learning_rate": 3.323559688602701e-06, "loss": 0.12, "step": 39466 }, { "epoch": 0.7327261318194138, "grad_norm": 0.3555378317832947, "learning_rate": 3.3226913042934272e-06, "loss": 0.2833, "step": 39468 }, { "epoch": 0.7327632619568325, "grad_norm": 0.28759363293647766, "learning_rate": 3.3218230108423242e-06, "loss": 0.1606, "step": 39470 }, { "epoch": 0.7328003920942512, "grad_norm": 0.3010222911834717, "learning_rate": 3.3209548082612097e-06, "loss": 0.2953, "step": 39472 }, { "epoch": 0.7328375222316698, "grad_norm": 0.38120535016059875, "learning_rate": 3.320086696561892e-06, "loss": 0.2438, "step": 39474 }, { "epoch": 0.7328746523690884, "grad_norm": 0.5771275758743286, "learning_rate": 3.319218675756185e-06, "loss": 0.1046, "step": 39476 }, { "epoch": 0.732911782506507, "grad_norm": 0.3151102364063263, "learning_rate": 3.3183507458559037e-06, "loss": 0.3682, "step": 39478 }, { "epoch": 0.7329489126439257, "grad_norm": 0.642266035079956, "learning_rate": 3.317482906872851e-06, "loss": 0.3063, "step": 39480 }, { "epoch": 0.7329860427813444, "grad_norm": 0.6876683831214905, "learning_rate": 3.316615158818841e-06, "loss": 0.3108, "step": 39482 }, { "epoch": 0.733023172918763, "grad_norm": 0.42287275195121765, "learning_rate": 3.315747501705682e-06, "loss": 0.2266, "step": 39484 }, { "epoch": 0.7330603030561816, "grad_norm": 0.5441722869873047, "learning_rate": 3.3148799355451745e-06, "loss": 0.2867, "step": 39486 }, { "epoch": 0.7330974331936002, "grad_norm": 0.33868300914764404, "learning_rate": 3.3140124603491265e-06, "loss": 0.2, "step": 39488 }, { "epoch": 0.7331345633310189, "grad_norm": 0.43034493923187256, "learning_rate": 3.3131450761293425e-06, "loss": 0.3117, "step": 39490 }, { "epoch": 0.7331716934684376, "grad_norm": 0.27191710472106934, "learning_rate": 3.312277782897625e-06, "loss": 0.2676, "step": 39492 }, { "epoch": 0.7332088236058562, "grad_norm": 0.3336094319820404, "learning_rate": 3.3114105806657747e-06, "loss": 0.3404, "step": 39494 }, { "epoch": 0.7332459537432748, "grad_norm": 0.2710396647453308, "learning_rate": 3.310543469445594e-06, "loss": 0.1291, "step": 39496 }, { "epoch": 0.7332830838806934, "grad_norm": 0.4027945399284363, "learning_rate": 3.3096764492488776e-06, "loss": 0.6429, "step": 39498 }, { "epoch": 0.7333202140181121, "grad_norm": 0.3284349739551544, "learning_rate": 3.308809520087427e-06, "loss": 0.3183, "step": 39500 }, { "epoch": 0.7333573441555307, "grad_norm": 0.3933369815349579, "learning_rate": 3.3079426819730344e-06, "loss": 0.329, "step": 39502 }, { "epoch": 0.7333944742929493, "grad_norm": 0.35107147693634033, "learning_rate": 3.3070759349174953e-06, "loss": 0.4135, "step": 39504 }, { "epoch": 0.733431604430368, "grad_norm": 0.4709293246269226, "learning_rate": 3.306209278932606e-06, "loss": 0.3575, "step": 39506 }, { "epoch": 0.7334687345677866, "grad_norm": 0.3905387818813324, "learning_rate": 3.3053427140301587e-06, "loss": 0.2984, "step": 39508 }, { "epoch": 0.7335058647052053, "grad_norm": 0.3877747654914856, "learning_rate": 3.3044762402219464e-06, "loss": 0.1817, "step": 39510 }, { "epoch": 0.7335429948426239, "grad_norm": 0.24385499954223633, "learning_rate": 3.303609857519754e-06, "loss": 0.363, "step": 39512 }, { "epoch": 0.7335801249800425, "grad_norm": 0.3909846246242523, "learning_rate": 3.3027435659353735e-06, "loss": 0.3082, "step": 39514 }, { "epoch": 0.7336172551174612, "grad_norm": 0.221132293343544, "learning_rate": 3.3018773654805934e-06, "loss": 0.2259, "step": 39516 }, { "epoch": 0.7336543852548798, "grad_norm": 0.5370063781738281, "learning_rate": 3.3010112561672013e-06, "loss": 0.2294, "step": 39518 }, { "epoch": 0.7336915153922985, "grad_norm": 0.39975836873054504, "learning_rate": 3.300145238006978e-06, "loss": 0.2423, "step": 39520 }, { "epoch": 0.7337286455297171, "grad_norm": 0.3813387155532837, "learning_rate": 3.2992793110117114e-06, "loss": 0.4738, "step": 39522 }, { "epoch": 0.7337657756671357, "grad_norm": 0.42539745569229126, "learning_rate": 3.2984134751931797e-06, "loss": 0.305, "step": 39524 }, { "epoch": 0.7338029058045544, "grad_norm": 0.7070034146308899, "learning_rate": 3.2975477305631675e-06, "loss": 0.3733, "step": 39526 }, { "epoch": 0.733840035941973, "grad_norm": 0.44668787717819214, "learning_rate": 3.296682077133454e-06, "loss": 0.1876, "step": 39528 }, { "epoch": 0.7338771660793917, "grad_norm": 0.37090885639190674, "learning_rate": 3.2958165149158195e-06, "loss": 0.2827, "step": 39530 }, { "epoch": 0.7339142962168103, "grad_norm": 0.23615257441997528, "learning_rate": 3.29495104392204e-06, "loss": 0.1217, "step": 39532 }, { "epoch": 0.7339514263542289, "grad_norm": 0.3470562696456909, "learning_rate": 3.294085664163893e-06, "loss": 0.266, "step": 39534 }, { "epoch": 0.7339885564916476, "grad_norm": 0.29207083582878113, "learning_rate": 3.2932203756531577e-06, "loss": 0.2794, "step": 39536 }, { "epoch": 0.7340256866290662, "grad_norm": 0.3248654305934906, "learning_rate": 3.2923551784015993e-06, "loss": 0.2084, "step": 39538 }, { "epoch": 0.7340628167664849, "grad_norm": 0.4295586347579956, "learning_rate": 3.2914900724209997e-06, "loss": 0.3463, "step": 39540 }, { "epoch": 0.7340999469039035, "grad_norm": 0.38350972533226013, "learning_rate": 3.2906250577231227e-06, "loss": 0.4041, "step": 39542 }, { "epoch": 0.7341370770413221, "grad_norm": 0.31241822242736816, "learning_rate": 3.2897601343197404e-06, "loss": 0.2645, "step": 39544 }, { "epoch": 0.7341742071787408, "grad_norm": 0.42178383469581604, "learning_rate": 3.2888953022226245e-06, "loss": 0.1052, "step": 39546 }, { "epoch": 0.7342113373161594, "grad_norm": 0.5436000227928162, "learning_rate": 3.2880305614435436e-06, "loss": 0.4197, "step": 39548 }, { "epoch": 0.7342484674535781, "grad_norm": 0.2490389496088028, "learning_rate": 3.287165911994259e-06, "loss": 0.1374, "step": 39550 }, { "epoch": 0.7342855975909967, "grad_norm": 0.4188910126686096, "learning_rate": 3.2863013538865398e-06, "loss": 0.23, "step": 39552 }, { "epoch": 0.7343227277284153, "grad_norm": 0.396639347076416, "learning_rate": 3.2854368871321486e-06, "loss": 0.2075, "step": 39554 }, { "epoch": 0.7343598578658339, "grad_norm": 0.39778420329093933, "learning_rate": 3.2845725117428494e-06, "loss": 0.1571, "step": 39556 }, { "epoch": 0.7343969880032526, "grad_norm": 0.8328045606613159, "learning_rate": 3.283708227730403e-06, "loss": 0.2332, "step": 39558 }, { "epoch": 0.7344341181406713, "grad_norm": 0.3782265782356262, "learning_rate": 3.2828440351065736e-06, "loss": 0.26, "step": 39560 }, { "epoch": 0.7344712482780898, "grad_norm": 0.5119785666465759, "learning_rate": 3.281979933883117e-06, "loss": 0.2451, "step": 39562 }, { "epoch": 0.7345083784155085, "grad_norm": 0.3259422481060028, "learning_rate": 3.2811159240717873e-06, "loss": 0.3005, "step": 39564 }, { "epoch": 0.7345455085529271, "grad_norm": 0.48967939615249634, "learning_rate": 3.2802520056843434e-06, "loss": 0.2943, "step": 39566 }, { "epoch": 0.7345826386903458, "grad_norm": 0.5661710500717163, "learning_rate": 3.279388178732542e-06, "loss": 0.2235, "step": 39568 }, { "epoch": 0.7346197688277645, "grad_norm": 0.44337931275367737, "learning_rate": 3.278524443228138e-06, "loss": 0.3713, "step": 39570 }, { "epoch": 0.734656898965183, "grad_norm": 0.5170077681541443, "learning_rate": 3.2776607991828822e-06, "loss": 0.2708, "step": 39572 }, { "epoch": 0.7346940291026017, "grad_norm": 0.3662564754486084, "learning_rate": 3.2767972466085306e-06, "loss": 0.22, "step": 39574 }, { "epoch": 0.7347311592400203, "grad_norm": 0.1713951677083969, "learning_rate": 3.275933785516827e-06, "loss": 0.1951, "step": 39576 }, { "epoch": 0.734768289377439, "grad_norm": 0.22462594509124756, "learning_rate": 3.2750704159195247e-06, "loss": 0.3248, "step": 39578 }, { "epoch": 0.7348054195148577, "grad_norm": 0.3305954933166504, "learning_rate": 3.274207137828369e-06, "loss": 0.2504, "step": 39580 }, { "epoch": 0.7348425496522762, "grad_norm": 0.4176628291606903, "learning_rate": 3.2733439512551123e-06, "loss": 0.3172, "step": 39582 }, { "epoch": 0.7348796797896949, "grad_norm": 0.3020023703575134, "learning_rate": 3.272480856211492e-06, "loss": 0.3791, "step": 39584 }, { "epoch": 0.7349168099271135, "grad_norm": 0.5806213617324829, "learning_rate": 3.271617852709259e-06, "loss": 0.3438, "step": 39586 }, { "epoch": 0.7349539400645322, "grad_norm": 0.3570534586906433, "learning_rate": 3.2707549407601504e-06, "loss": 0.1697, "step": 39588 }, { "epoch": 0.7349910702019509, "grad_norm": 0.30950888991355896, "learning_rate": 3.2698921203759103e-06, "loss": 0.1169, "step": 39590 }, { "epoch": 0.7350282003393694, "grad_norm": 0.5300682783126831, "learning_rate": 3.2690293915682803e-06, "loss": 0.4432, "step": 39592 }, { "epoch": 0.7350653304767881, "grad_norm": 0.2543310523033142, "learning_rate": 3.268166754348998e-06, "loss": 0.1145, "step": 39594 }, { "epoch": 0.7351024606142067, "grad_norm": 0.3860691785812378, "learning_rate": 3.267304208729801e-06, "loss": 0.3455, "step": 39596 }, { "epoch": 0.7351395907516254, "grad_norm": 0.5638052821159363, "learning_rate": 3.2664417547224325e-06, "loss": 0.2403, "step": 39598 }, { "epoch": 0.735176720889044, "grad_norm": 0.20706762373447418, "learning_rate": 3.265579392338617e-06, "loss": 0.2002, "step": 39600 }, { "epoch": 0.7352138510264626, "grad_norm": 0.42782899737358093, "learning_rate": 3.2647171215900953e-06, "loss": 0.1812, "step": 39602 }, { "epoch": 0.7352509811638813, "grad_norm": 0.2844115197658539, "learning_rate": 3.2638549424886025e-06, "loss": 0.3141, "step": 39604 }, { "epoch": 0.7352881113012999, "grad_norm": 0.2873739004135132, "learning_rate": 3.262992855045862e-06, "loss": 0.296, "step": 39606 }, { "epoch": 0.7353252414387186, "grad_norm": 0.3876924216747284, "learning_rate": 3.262130859273611e-06, "loss": 0.2826, "step": 39608 }, { "epoch": 0.7353623715761372, "grad_norm": 1.2880855798721313, "learning_rate": 3.2612689551835785e-06, "loss": 0.428, "step": 39610 }, { "epoch": 0.7353995017135558, "grad_norm": 0.3159998953342438, "learning_rate": 3.2604071427874882e-06, "loss": 0.2949, "step": 39612 }, { "epoch": 0.7354366318509745, "grad_norm": 0.3501976728439331, "learning_rate": 3.2595454220970692e-06, "loss": 0.2649, "step": 39614 }, { "epoch": 0.7354737619883931, "grad_norm": 3.603623867034912, "learning_rate": 3.2586837931240465e-06, "loss": 0.2248, "step": 39616 }, { "epoch": 0.7355108921258118, "grad_norm": 0.4075184166431427, "learning_rate": 3.2578222558801455e-06, "loss": 0.3439, "step": 39618 }, { "epoch": 0.7355480222632303, "grad_norm": 0.286770224571228, "learning_rate": 3.2569608103770887e-06, "loss": 0.1231, "step": 39620 }, { "epoch": 0.735585152400649, "grad_norm": 0.2635183036327362, "learning_rate": 3.2560994566266e-06, "loss": 0.3503, "step": 39622 }, { "epoch": 0.7356222825380677, "grad_norm": 0.32128143310546875, "learning_rate": 3.2552381946403942e-06, "loss": 0.4085, "step": 39624 }, { "epoch": 0.7356594126754863, "grad_norm": 0.48798471689224243, "learning_rate": 3.2543770244301977e-06, "loss": 0.2308, "step": 39626 }, { "epoch": 0.735696542812905, "grad_norm": 0.345411479473114, "learning_rate": 3.25351594600772e-06, "loss": 0.1833, "step": 39628 }, { "epoch": 0.7357336729503235, "grad_norm": 0.22205598652362823, "learning_rate": 3.2526549593846834e-06, "loss": 0.1376, "step": 39630 }, { "epoch": 0.7357708030877422, "grad_norm": 0.510295033454895, "learning_rate": 3.251794064572801e-06, "loss": 0.154, "step": 39632 }, { "epoch": 0.7358079332251609, "grad_norm": 0.30556991696357727, "learning_rate": 3.2509332615837886e-06, "loss": 0.3628, "step": 39634 }, { "epoch": 0.7358450633625795, "grad_norm": 0.38054174184799194, "learning_rate": 3.2500725504293616e-06, "loss": 0.154, "step": 39636 }, { "epoch": 0.7358821934999982, "grad_norm": 0.6307283639907837, "learning_rate": 3.249211931121227e-06, "loss": 0.5357, "step": 39638 }, { "epoch": 0.7359193236374167, "grad_norm": 0.3596384823322296, "learning_rate": 3.2483514036710963e-06, "loss": 0.1648, "step": 39640 }, { "epoch": 0.7359564537748354, "grad_norm": 0.3554809093475342, "learning_rate": 3.2474909680906784e-06, "loss": 0.3361, "step": 39642 }, { "epoch": 0.7359935839122541, "grad_norm": 0.29304617643356323, "learning_rate": 3.2466306243916834e-06, "loss": 0.2129, "step": 39644 }, { "epoch": 0.7360307140496727, "grad_norm": 0.47748661041259766, "learning_rate": 3.2457703725858203e-06, "loss": 0.2595, "step": 39646 }, { "epoch": 0.7360678441870914, "grad_norm": 0.4446309506893158, "learning_rate": 3.244910212684791e-06, "loss": 0.5336, "step": 39648 }, { "epoch": 0.7361049743245099, "grad_norm": 0.49341025948524475, "learning_rate": 3.244050144700296e-06, "loss": 0.2737, "step": 39650 }, { "epoch": 0.7361421044619286, "grad_norm": 0.1671096533536911, "learning_rate": 3.2431901686440424e-06, "loss": 0.1101, "step": 39652 }, { "epoch": 0.7361792345993472, "grad_norm": 0.42571279406547546, "learning_rate": 3.2423302845277316e-06, "loss": 0.3083, "step": 39654 }, { "epoch": 0.7362163647367659, "grad_norm": 0.4584483802318573, "learning_rate": 3.2414704923630645e-06, "loss": 0.1336, "step": 39656 }, { "epoch": 0.7362534948741846, "grad_norm": 0.36921730637550354, "learning_rate": 3.240610792161739e-06, "loss": 0.3621, "step": 39658 }, { "epoch": 0.7362906250116031, "grad_norm": 0.49731460213661194, "learning_rate": 3.2397511839354555e-06, "loss": 0.1976, "step": 39660 }, { "epoch": 0.7363277551490218, "grad_norm": 0.3102233111858368, "learning_rate": 3.2388916676959117e-06, "loss": 0.2801, "step": 39662 }, { "epoch": 0.7363648852864404, "grad_norm": 0.43967992067337036, "learning_rate": 3.238032243454796e-06, "loss": 0.1177, "step": 39664 }, { "epoch": 0.7364020154238591, "grad_norm": 0.39288806915283203, "learning_rate": 3.2371729112238083e-06, "loss": 0.1723, "step": 39666 }, { "epoch": 0.7364391455612778, "grad_norm": 0.3797621428966522, "learning_rate": 3.2363136710146426e-06, "loss": 0.2175, "step": 39668 }, { "epoch": 0.7364762756986963, "grad_norm": 0.3723621666431427, "learning_rate": 3.2354545228389855e-06, "loss": 0.1907, "step": 39670 }, { "epoch": 0.736513405836115, "grad_norm": 0.4345323145389557, "learning_rate": 3.23459546670853e-06, "loss": 0.325, "step": 39672 }, { "epoch": 0.7365505359735336, "grad_norm": 0.4412917196750641, "learning_rate": 3.2337365026349686e-06, "loss": 0.4544, "step": 39674 }, { "epoch": 0.7365876661109523, "grad_norm": 0.48725977540016174, "learning_rate": 3.2328776306299837e-06, "loss": 0.157, "step": 39676 }, { "epoch": 0.736624796248371, "grad_norm": 0.4334671199321747, "learning_rate": 3.2320188507052643e-06, "loss": 0.2227, "step": 39678 }, { "epoch": 0.7366619263857895, "grad_norm": 0.43429210782051086, "learning_rate": 3.2311601628724954e-06, "loss": 0.2566, "step": 39680 }, { "epoch": 0.7366990565232082, "grad_norm": 0.2580873966217041, "learning_rate": 3.2303015671433613e-06, "loss": 0.1283, "step": 39682 }, { "epoch": 0.7367361866606268, "grad_norm": 0.5126158595085144, "learning_rate": 3.229443063529546e-06, "loss": 0.356, "step": 39684 }, { "epoch": 0.7367733167980455, "grad_norm": 0.3800863027572632, "learning_rate": 3.2285846520427334e-06, "loss": 0.2777, "step": 39686 }, { "epoch": 0.7368104469354642, "grad_norm": 0.31357845664024353, "learning_rate": 3.2277263326945973e-06, "loss": 0.2567, "step": 39688 }, { "epoch": 0.7368475770728827, "grad_norm": 0.3310134708881378, "learning_rate": 3.2268681054968244e-06, "loss": 0.0345, "step": 39690 }, { "epoch": 0.7368847072103014, "grad_norm": 0.42560744285583496, "learning_rate": 3.2260099704610846e-06, "loss": 0.1788, "step": 39692 }, { "epoch": 0.73692183734772, "grad_norm": 0.43040984869003296, "learning_rate": 3.2251519275990597e-06, "loss": 0.3009, "step": 39694 }, { "epoch": 0.7369589674851387, "grad_norm": 0.5072306394577026, "learning_rate": 3.2242939769224222e-06, "loss": 0.3337, "step": 39696 }, { "epoch": 0.7369960976225574, "grad_norm": 0.3687780797481537, "learning_rate": 3.22343611844285e-06, "loss": 0.6503, "step": 39698 }, { "epoch": 0.7370332277599759, "grad_norm": 0.5077753067016602, "learning_rate": 3.222578352172017e-06, "loss": 0.4813, "step": 39700 }, { "epoch": 0.7370703578973946, "grad_norm": 0.27504462003707886, "learning_rate": 3.221720678121588e-06, "loss": 0.1662, "step": 39702 }, { "epoch": 0.7371074880348132, "grad_norm": 0.6009109020233154, "learning_rate": 3.2208630963032373e-06, "loss": 0.3595, "step": 39704 }, { "epoch": 0.7371446181722319, "grad_norm": 0.43973997235298157, "learning_rate": 3.2200056067286346e-06, "loss": 0.2319, "step": 39706 }, { "epoch": 0.7371817483096504, "grad_norm": 0.4314044713973999, "learning_rate": 3.2191482094094507e-06, "loss": 0.19, "step": 39708 }, { "epoch": 0.7372188784470691, "grad_norm": 0.3657730221748352, "learning_rate": 3.218290904357344e-06, "loss": 0.1613, "step": 39710 }, { "epoch": 0.7372560085844878, "grad_norm": 0.8209778070449829, "learning_rate": 3.217433691583989e-06, "loss": 0.4598, "step": 39712 }, { "epoch": 0.7372931387219064, "grad_norm": 0.19347453117370605, "learning_rate": 3.2165765711010422e-06, "loss": 0.2209, "step": 39714 }, { "epoch": 0.7373302688593251, "grad_norm": 0.5446634888648987, "learning_rate": 3.2157195429201682e-06, "loss": 0.4326, "step": 39716 }, { "epoch": 0.7373673989967436, "grad_norm": 0.6134762763977051, "learning_rate": 3.214862607053031e-06, "loss": 0.392, "step": 39718 }, { "epoch": 0.7374045291341623, "grad_norm": 0.5886852145195007, "learning_rate": 3.2140057635112897e-06, "loss": 0.3081, "step": 39720 }, { "epoch": 0.737441659271581, "grad_norm": 0.2883561849594116, "learning_rate": 3.213149012306603e-06, "loss": 0.2047, "step": 39722 }, { "epoch": 0.7374787894089996, "grad_norm": 0.4255123436450958, "learning_rate": 3.2122923534506345e-06, "loss": 0.1911, "step": 39724 }, { "epoch": 0.7375159195464183, "grad_norm": 0.349149614572525, "learning_rate": 3.2114357869550304e-06, "loss": 0.2862, "step": 39726 }, { "epoch": 0.7375530496838368, "grad_norm": 0.3913489282131195, "learning_rate": 3.210579312831451e-06, "loss": 0.3193, "step": 39728 }, { "epoch": 0.7375901798212555, "grad_norm": 0.3857501745223999, "learning_rate": 3.2097229310915543e-06, "loss": 0.2556, "step": 39730 }, { "epoch": 0.7376273099586742, "grad_norm": 0.4340142607688904, "learning_rate": 3.208866641746986e-06, "loss": 0.3038, "step": 39732 }, { "epoch": 0.7376644400960928, "grad_norm": 0.4785262942314148, "learning_rate": 3.2080104448094016e-06, "loss": 0.3301, "step": 39734 }, { "epoch": 0.7377015702335115, "grad_norm": 0.42924487590789795, "learning_rate": 3.2071543402904505e-06, "loss": 0.2738, "step": 39736 }, { "epoch": 0.73773870037093, "grad_norm": 0.37452811002731323, "learning_rate": 3.2062983282017844e-06, "loss": 0.4482, "step": 39738 }, { "epoch": 0.7377758305083487, "grad_norm": 0.7111393809318542, "learning_rate": 3.205442408555045e-06, "loss": 0.2099, "step": 39740 }, { "epoch": 0.7378129606457674, "grad_norm": 0.4545170068740845, "learning_rate": 3.2045865813618835e-06, "loss": 0.3603, "step": 39742 }, { "epoch": 0.737850090783186, "grad_norm": 0.4308466911315918, "learning_rate": 3.2037308466339434e-06, "loss": 0.5005, "step": 39744 }, { "epoch": 0.7378872209206047, "grad_norm": 0.3586469292640686, "learning_rate": 3.202875204382869e-06, "loss": 0.1148, "step": 39746 }, { "epoch": 0.7379243510580232, "grad_norm": 0.4995114505290985, "learning_rate": 3.202019654620304e-06, "loss": 0.1833, "step": 39748 }, { "epoch": 0.7379614811954419, "grad_norm": 0.4337833821773529, "learning_rate": 3.201164197357892e-06, "loss": 0.4176, "step": 39750 }, { "epoch": 0.7379986113328605, "grad_norm": 0.4128907024860382, "learning_rate": 3.2003088326072675e-06, "loss": 0.2835, "step": 39752 }, { "epoch": 0.7380357414702792, "grad_norm": 0.4682595133781433, "learning_rate": 3.199453560380075e-06, "loss": 0.2604, "step": 39754 }, { "epoch": 0.7380728716076979, "grad_norm": 0.3407163619995117, "learning_rate": 3.1985983806879473e-06, "loss": 0.3679, "step": 39756 }, { "epoch": 0.7381100017451164, "grad_norm": 0.6640310883522034, "learning_rate": 3.197743293542522e-06, "loss": 0.3785, "step": 39758 }, { "epoch": 0.7381471318825351, "grad_norm": 0.3402019143104553, "learning_rate": 3.196888298955436e-06, "loss": 0.1933, "step": 39760 }, { "epoch": 0.7381842620199537, "grad_norm": 0.617081344127655, "learning_rate": 3.196033396938325e-06, "loss": 0.3261, "step": 39762 }, { "epoch": 0.7382213921573724, "grad_norm": 0.44483864307403564, "learning_rate": 3.1951785875028172e-06, "loss": 0.2887, "step": 39764 }, { "epoch": 0.738258522294791, "grad_norm": 0.3634507358074188, "learning_rate": 3.194323870660545e-06, "loss": 0.3555, "step": 39766 }, { "epoch": 0.7382956524322096, "grad_norm": 0.39583802223205566, "learning_rate": 3.19346924642314e-06, "loss": 0.388, "step": 39768 }, { "epoch": 0.7383327825696283, "grad_norm": 0.4209096431732178, "learning_rate": 3.192614714802231e-06, "loss": 0.191, "step": 39770 }, { "epoch": 0.7383699127070469, "grad_norm": 0.5071036219596863, "learning_rate": 3.191760275809448e-06, "loss": 0.369, "step": 39772 }, { "epoch": 0.7384070428444656, "grad_norm": 0.5053966045379639, "learning_rate": 3.1909059294564115e-06, "loss": 0.1357, "step": 39774 }, { "epoch": 0.7384441729818843, "grad_norm": 0.34127503633499146, "learning_rate": 3.190051675754753e-06, "loss": 0.1335, "step": 39776 }, { "epoch": 0.7384813031193028, "grad_norm": 0.3356647491455078, "learning_rate": 3.1891975147160893e-06, "loss": 0.3487, "step": 39778 }, { "epoch": 0.7385184332567215, "grad_norm": 0.5177041888237, "learning_rate": 3.188343446352048e-06, "loss": 0.45, "step": 39780 }, { "epoch": 0.7385555633941401, "grad_norm": 0.33546823263168335, "learning_rate": 3.1874894706742476e-06, "loss": 0.2361, "step": 39782 }, { "epoch": 0.7385926935315588, "grad_norm": 0.24496221542358398, "learning_rate": 3.18663558769431e-06, "loss": 0.1783, "step": 39784 }, { "epoch": 0.7386298236689774, "grad_norm": 0.19015000760555267, "learning_rate": 3.185781797423855e-06, "loss": 0.2165, "step": 39786 }, { "epoch": 0.738666953806396, "grad_norm": 0.6021552681922913, "learning_rate": 3.1849280998745003e-06, "loss": 0.3359, "step": 39788 }, { "epoch": 0.7387040839438147, "grad_norm": 0.3418736457824707, "learning_rate": 3.1840744950578583e-06, "loss": 0.2606, "step": 39790 }, { "epoch": 0.7387412140812333, "grad_norm": 0.412597119808197, "learning_rate": 3.1832209829855465e-06, "loss": 0.2359, "step": 39792 }, { "epoch": 0.738778344218652, "grad_norm": 0.6657752990722656, "learning_rate": 3.182367563669181e-06, "loss": 0.2669, "step": 39794 }, { "epoch": 0.7388154743560706, "grad_norm": 0.6148823499679565, "learning_rate": 3.1815142371203687e-06, "loss": 0.2966, "step": 39796 }, { "epoch": 0.7388526044934892, "grad_norm": 0.4595191776752472, "learning_rate": 3.1806610033507246e-06, "loss": 0.2413, "step": 39798 }, { "epoch": 0.7388897346309079, "grad_norm": 0.23713254928588867, "learning_rate": 3.179807862371861e-06, "loss": 0.2934, "step": 39800 }, { "epoch": 0.7389268647683265, "grad_norm": 0.4516656994819641, "learning_rate": 3.1789548141953798e-06, "loss": 0.0727, "step": 39802 }, { "epoch": 0.7389639949057452, "grad_norm": 0.42876461148262024, "learning_rate": 3.178101858832893e-06, "loss": 0.2759, "step": 39804 }, { "epoch": 0.7390011250431637, "grad_norm": 0.5186970829963684, "learning_rate": 3.177248996296005e-06, "loss": 0.0909, "step": 39806 }, { "epoch": 0.7390382551805824, "grad_norm": 0.16999712586402893, "learning_rate": 3.176396226596322e-06, "loss": 0.2335, "step": 39808 }, { "epoch": 0.7390753853180011, "grad_norm": 0.4646111726760864, "learning_rate": 3.1755435497454477e-06, "loss": 0.4137, "step": 39810 }, { "epoch": 0.7391125154554197, "grad_norm": 0.3292817771434784, "learning_rate": 3.174690965754987e-06, "loss": 0.4099, "step": 39812 }, { "epoch": 0.7391496455928384, "grad_norm": 0.5161586403846741, "learning_rate": 3.1738384746365347e-06, "loss": 0.4954, "step": 39814 }, { "epoch": 0.7391867757302569, "grad_norm": 0.3667740225791931, "learning_rate": 3.172986076401697e-06, "loss": 0.174, "step": 39816 }, { "epoch": 0.7392239058676756, "grad_norm": 0.4248465597629547, "learning_rate": 3.172133771062067e-06, "loss": 0.3805, "step": 39818 }, { "epoch": 0.7392610360050943, "grad_norm": 0.3711097240447998, "learning_rate": 3.1712815586292445e-06, "loss": 0.2976, "step": 39820 }, { "epoch": 0.7392981661425129, "grad_norm": 0.28385937213897705, "learning_rate": 3.170429439114825e-06, "loss": 0.1331, "step": 39822 }, { "epoch": 0.7393352962799316, "grad_norm": 0.4015321731567383, "learning_rate": 3.1695774125304047e-06, "loss": 0.2598, "step": 39824 }, { "epoch": 0.7393724264173501, "grad_norm": 0.36594337224960327, "learning_rate": 3.168725478887579e-06, "loss": 0.235, "step": 39826 }, { "epoch": 0.7394095565547688, "grad_norm": 0.40637338161468506, "learning_rate": 3.1678736381979347e-06, "loss": 0.3429, "step": 39828 }, { "epoch": 0.7394466866921875, "grad_norm": 0.369426429271698, "learning_rate": 3.1670218904730653e-06, "loss": 0.2319, "step": 39830 }, { "epoch": 0.7394838168296061, "grad_norm": 0.35711997747421265, "learning_rate": 3.1661702357245604e-06, "loss": 0.2706, "step": 39832 }, { "epoch": 0.7395209469670248, "grad_norm": 0.4420630931854248, "learning_rate": 3.1653186739640098e-06, "loss": 0.3401, "step": 39834 }, { "epoch": 0.7395580771044433, "grad_norm": 0.387168824672699, "learning_rate": 3.1644672052030023e-06, "loss": 0.3225, "step": 39836 }, { "epoch": 0.739595207241862, "grad_norm": 0.4297964572906494, "learning_rate": 3.1636158294531218e-06, "loss": 0.3028, "step": 39838 }, { "epoch": 0.7396323373792807, "grad_norm": 0.33744072914123535, "learning_rate": 3.1627645467259495e-06, "loss": 0.2597, "step": 39840 }, { "epoch": 0.7396694675166993, "grad_norm": 0.30913621187210083, "learning_rate": 3.1619133570330705e-06, "loss": 0.2664, "step": 39842 }, { "epoch": 0.739706597654118, "grad_norm": 0.5663681626319885, "learning_rate": 3.1610622603860696e-06, "loss": 0.2754, "step": 39844 }, { "epoch": 0.7397437277915365, "grad_norm": 0.4782780706882477, "learning_rate": 3.160211256796526e-06, "loss": 0.3141, "step": 39846 }, { "epoch": 0.7397808579289552, "grad_norm": 0.3798877000808716, "learning_rate": 3.1593603462760202e-06, "loss": 0.3105, "step": 39848 }, { "epoch": 0.7398179880663739, "grad_norm": 0.21026486158370972, "learning_rate": 3.1585095288361322e-06, "loss": 0.2449, "step": 39850 }, { "epoch": 0.7398551182037925, "grad_norm": 0.7013682723045349, "learning_rate": 3.157658804488434e-06, "loss": 0.3149, "step": 39852 }, { "epoch": 0.7398922483412111, "grad_norm": 0.3784891664981842, "learning_rate": 3.1568081732445044e-06, "loss": 0.1808, "step": 39854 }, { "epoch": 0.7399293784786297, "grad_norm": 0.4772290885448456, "learning_rate": 3.1559576351159183e-06, "loss": 0.3087, "step": 39856 }, { "epoch": 0.7399665086160484, "grad_norm": 0.2988712191581726, "learning_rate": 3.1551071901142505e-06, "loss": 0.1484, "step": 39858 }, { "epoch": 0.740003638753467, "grad_norm": 0.5337780117988586, "learning_rate": 3.1542568382510686e-06, "loss": 0.4094, "step": 39860 }, { "epoch": 0.7400407688908857, "grad_norm": 0.30269649624824524, "learning_rate": 3.1534065795379464e-06, "loss": 0.2871, "step": 39862 }, { "epoch": 0.7400778990283043, "grad_norm": 0.4526297152042389, "learning_rate": 3.1525564139864553e-06, "loss": 0.2711, "step": 39864 }, { "epoch": 0.7401150291657229, "grad_norm": 0.3100440502166748, "learning_rate": 3.1517063416081572e-06, "loss": 0.4672, "step": 39866 }, { "epoch": 0.7401521593031416, "grad_norm": 0.37063223123550415, "learning_rate": 3.1508563624146227e-06, "loss": 0.2753, "step": 39868 }, { "epoch": 0.7401892894405602, "grad_norm": 0.7800668478012085, "learning_rate": 3.150006476417419e-06, "loss": 0.1789, "step": 39870 }, { "epoch": 0.7402264195779789, "grad_norm": 0.5451666116714478, "learning_rate": 3.1491566836281075e-06, "loss": 0.3022, "step": 39872 }, { "epoch": 0.7402635497153975, "grad_norm": 0.3655959665775299, "learning_rate": 3.1483069840582538e-06, "loss": 0.3683, "step": 39874 }, { "epoch": 0.7403006798528161, "grad_norm": 0.378343403339386, "learning_rate": 3.1474573777194217e-06, "loss": 0.1686, "step": 39876 }, { "epoch": 0.7403378099902348, "grad_norm": 0.3279966115951538, "learning_rate": 3.1466078646231658e-06, "loss": 0.2033, "step": 39878 }, { "epoch": 0.7403749401276534, "grad_norm": 0.3416300117969513, "learning_rate": 3.1457584447810517e-06, "loss": 0.327, "step": 39880 }, { "epoch": 0.740412070265072, "grad_norm": 0.2918615937232971, "learning_rate": 3.144909118204631e-06, "loss": 0.2836, "step": 39882 }, { "epoch": 0.7404492004024907, "grad_norm": 0.34912699460983276, "learning_rate": 3.144059884905465e-06, "loss": 0.4867, "step": 39884 }, { "epoch": 0.7404863305399093, "grad_norm": 0.3318246901035309, "learning_rate": 3.1432107448951066e-06, "loss": 0.1362, "step": 39886 }, { "epoch": 0.740523460677328, "grad_norm": 0.4059092700481415, "learning_rate": 3.1423616981851148e-06, "loss": 0.2818, "step": 39888 }, { "epoch": 0.7405605908147466, "grad_norm": 0.43322592973709106, "learning_rate": 3.1415127447870363e-06, "loss": 0.2981, "step": 39890 }, { "epoch": 0.7405977209521653, "grad_norm": 0.43988093733787537, "learning_rate": 3.140663884712426e-06, "loss": 0.1899, "step": 39892 }, { "epoch": 0.7406348510895839, "grad_norm": 0.3788839876651764, "learning_rate": 3.1398151179728344e-06, "loss": 0.2817, "step": 39894 }, { "epoch": 0.7406719812270025, "grad_norm": 0.4490831196308136, "learning_rate": 3.1389664445798107e-06, "loss": 0.2957, "step": 39896 }, { "epoch": 0.7407091113644212, "grad_norm": 0.3374093472957611, "learning_rate": 3.138117864544902e-06, "loss": 0.3962, "step": 39898 }, { "epoch": 0.7407462415018398, "grad_norm": 0.4968375265598297, "learning_rate": 3.1372693778796583e-06, "loss": 0.3103, "step": 39900 }, { "epoch": 0.7407833716392584, "grad_norm": 0.4294300377368927, "learning_rate": 3.136420984595623e-06, "loss": 0.4285, "step": 39902 }, { "epoch": 0.740820501776677, "grad_norm": 0.472463995218277, "learning_rate": 3.1355726847043343e-06, "loss": 0.1948, "step": 39904 }, { "epoch": 0.7408576319140957, "grad_norm": 0.37947896122932434, "learning_rate": 3.1347244782173413e-06, "loss": 0.2748, "step": 39906 }, { "epoch": 0.7408947620515144, "grad_norm": 0.3646430969238281, "learning_rate": 3.1338763651461846e-06, "loss": 0.3316, "step": 39908 }, { "epoch": 0.740931892188933, "grad_norm": 0.35680311918258667, "learning_rate": 3.1330283455024037e-06, "loss": 0.2169, "step": 39910 }, { "epoch": 0.7409690223263516, "grad_norm": 0.4057343602180481, "learning_rate": 3.132180419297538e-06, "loss": 0.5018, "step": 39912 }, { "epoch": 0.7410061524637702, "grad_norm": 0.29532474279403687, "learning_rate": 3.1313325865431288e-06, "loss": 0.1944, "step": 39914 }, { "epoch": 0.7410432826011889, "grad_norm": 0.3516652286052704, "learning_rate": 3.1304848472507054e-06, "loss": 0.3425, "step": 39916 }, { "epoch": 0.7410804127386076, "grad_norm": 0.6633748412132263, "learning_rate": 3.1296372014318066e-06, "loss": 0.2972, "step": 39918 }, { "epoch": 0.7411175428760262, "grad_norm": 0.47135451436042786, "learning_rate": 3.1287896490979675e-06, "loss": 0.2095, "step": 39920 }, { "epoch": 0.7411546730134448, "grad_norm": 0.4451085925102234, "learning_rate": 3.127942190260722e-06, "loss": 0.2657, "step": 39922 }, { "epoch": 0.7411918031508634, "grad_norm": 0.39433467388153076, "learning_rate": 3.1270948249315956e-06, "loss": 0.3109, "step": 39924 }, { "epoch": 0.7412289332882821, "grad_norm": 0.30415859818458557, "learning_rate": 3.1262475531221246e-06, "loss": 0.2722, "step": 39926 }, { "epoch": 0.7412660634257008, "grad_norm": 0.31332701444625854, "learning_rate": 3.1254003748438333e-06, "loss": 0.1085, "step": 39928 }, { "epoch": 0.7413031935631194, "grad_norm": 0.285642147064209, "learning_rate": 3.124553290108251e-06, "loss": 0.4008, "step": 39930 }, { "epoch": 0.741340323700538, "grad_norm": 0.41343799233436584, "learning_rate": 3.123706298926903e-06, "loss": 0.4399, "step": 39932 }, { "epoch": 0.7413774538379566, "grad_norm": 0.3357764482498169, "learning_rate": 3.1228594013113157e-06, "loss": 0.3903, "step": 39934 }, { "epoch": 0.7414145839753753, "grad_norm": 0.4706973731517792, "learning_rate": 3.122012597273013e-06, "loss": 0.2892, "step": 39936 }, { "epoch": 0.741451714112794, "grad_norm": 0.23890571296215057, "learning_rate": 3.1211658868235206e-06, "loss": 0.1868, "step": 39938 }, { "epoch": 0.7414888442502126, "grad_norm": 0.3745531439781189, "learning_rate": 3.1203192699743512e-06, "loss": 0.1773, "step": 39940 }, { "epoch": 0.7415259743876312, "grad_norm": 0.36497291922569275, "learning_rate": 3.1194727467370313e-06, "loss": 0.1237, "step": 39942 }, { "epoch": 0.7415631045250498, "grad_norm": 0.5514853596687317, "learning_rate": 3.118626317123079e-06, "loss": 0.2574, "step": 39944 }, { "epoch": 0.7416002346624685, "grad_norm": 0.49003151059150696, "learning_rate": 3.117779981144009e-06, "loss": 0.2311, "step": 39946 }, { "epoch": 0.7416373647998872, "grad_norm": 0.25451457500457764, "learning_rate": 3.1169337388113372e-06, "loss": 0.2101, "step": 39948 }, { "epoch": 0.7416744949373058, "grad_norm": 0.5134959816932678, "learning_rate": 3.1160875901365817e-06, "loss": 0.3662, "step": 39950 }, { "epoch": 0.7417116250747244, "grad_norm": 0.31824877858161926, "learning_rate": 3.115241535131257e-06, "loss": 0.3163, "step": 39952 }, { "epoch": 0.741748755212143, "grad_norm": 0.4710465967655182, "learning_rate": 3.114395573806869e-06, "loss": 0.3642, "step": 39954 }, { "epoch": 0.7417858853495617, "grad_norm": 0.45654985308647156, "learning_rate": 3.113549706174933e-06, "loss": 0.2995, "step": 39956 }, { "epoch": 0.7418230154869803, "grad_norm": 0.5098035335540771, "learning_rate": 3.1127039322469577e-06, "loss": 0.1788, "step": 39958 }, { "epoch": 0.741860145624399, "grad_norm": 0.3298649489879608, "learning_rate": 3.111858252034452e-06, "loss": 0.4088, "step": 39960 }, { "epoch": 0.7418972757618176, "grad_norm": 0.48691198229789734, "learning_rate": 3.111012665548927e-06, "loss": 0.2603, "step": 39962 }, { "epoch": 0.7419344058992362, "grad_norm": 0.22462467849254608, "learning_rate": 3.1101671728018812e-06, "loss": 0.209, "step": 39964 }, { "epoch": 0.7419715360366549, "grad_norm": 0.38143405318260193, "learning_rate": 3.1093217738048263e-06, "loss": 0.2922, "step": 39966 }, { "epoch": 0.7420086661740735, "grad_norm": 0.34232017397880554, "learning_rate": 3.108476468569258e-06, "loss": 0.3315, "step": 39968 }, { "epoch": 0.7420457963114921, "grad_norm": 0.3852739632129669, "learning_rate": 3.1076312571066837e-06, "loss": 0.2004, "step": 39970 }, { "epoch": 0.7420829264489108, "grad_norm": 0.489287406206131, "learning_rate": 3.1067861394286015e-06, "loss": 0.2068, "step": 39972 }, { "epoch": 0.7421200565863294, "grad_norm": 0.43663865327835083, "learning_rate": 3.1059411155465137e-06, "loss": 0.235, "step": 39974 }, { "epoch": 0.7421571867237481, "grad_norm": 0.3968579173088074, "learning_rate": 3.10509618547192e-06, "loss": 0.1015, "step": 39976 }, { "epoch": 0.7421943168611667, "grad_norm": 0.40655627846717834, "learning_rate": 3.1042513492163107e-06, "loss": 0.2812, "step": 39978 }, { "epoch": 0.7422314469985853, "grad_norm": 0.2259611189365387, "learning_rate": 3.103406606791186e-06, "loss": 0.3766, "step": 39980 }, { "epoch": 0.742268577136004, "grad_norm": 0.4808831512928009, "learning_rate": 3.10256195820804e-06, "loss": 0.2848, "step": 39982 }, { "epoch": 0.7423057072734226, "grad_norm": 0.3178793489933014, "learning_rate": 3.1017174034783682e-06, "loss": 0.1367, "step": 39984 }, { "epoch": 0.7423428374108413, "grad_norm": 0.5424390435218811, "learning_rate": 3.1008729426136564e-06, "loss": 0.2466, "step": 39986 }, { "epoch": 0.7423799675482599, "grad_norm": 0.4486656188964844, "learning_rate": 3.100028575625399e-06, "loss": 0.2471, "step": 39988 }, { "epoch": 0.7424170976856785, "grad_norm": 0.46225687861442566, "learning_rate": 3.099184302525088e-06, "loss": 0.2324, "step": 39990 }, { "epoch": 0.7424542278230972, "grad_norm": 0.3951317071914673, "learning_rate": 3.098340123324205e-06, "loss": 0.1971, "step": 39992 }, { "epoch": 0.7424913579605158, "grad_norm": 0.4322395622730255, "learning_rate": 3.09749603803424e-06, "loss": 0.2646, "step": 39994 }, { "epoch": 0.7425284880979345, "grad_norm": 0.30992117524147034, "learning_rate": 3.0966520466666796e-06, "loss": 0.2623, "step": 39996 }, { "epoch": 0.742565618235353, "grad_norm": 0.4846774637699127, "learning_rate": 3.095808149233005e-06, "loss": 0.3134, "step": 39998 }, { "epoch": 0.7426027483727717, "grad_norm": 0.30779051780700684, "learning_rate": 3.0949643457447022e-06, "loss": 0.2868, "step": 40000 }, { "epoch": 0.7426398785101904, "grad_norm": 0.27895352244377136, "learning_rate": 3.0941206362132546e-06, "loss": 0.2276, "step": 40002 }, { "epoch": 0.742677008647609, "grad_norm": 0.37713536620140076, "learning_rate": 3.0932770206501373e-06, "loss": 0.4282, "step": 40004 }, { "epoch": 0.7427141387850277, "grad_norm": 0.5171748995780945, "learning_rate": 3.0924334990668304e-06, "loss": 0.2221, "step": 40006 }, { "epoch": 0.7427512689224463, "grad_norm": 0.427176296710968, "learning_rate": 3.091590071474817e-06, "loss": 0.24, "step": 40008 }, { "epoch": 0.7427883990598649, "grad_norm": 0.40278199315071106, "learning_rate": 3.090746737885566e-06, "loss": 0.2462, "step": 40010 }, { "epoch": 0.7428255291972835, "grad_norm": 0.4706867039203644, "learning_rate": 3.089903498310557e-06, "loss": 0.1171, "step": 40012 }, { "epoch": 0.7428626593347022, "grad_norm": 0.39051365852355957, "learning_rate": 3.089060352761266e-06, "loss": 0.1655, "step": 40014 }, { "epoch": 0.7428997894721209, "grad_norm": 0.6257150769233704, "learning_rate": 3.088217301249159e-06, "loss": 0.3879, "step": 40016 }, { "epoch": 0.7429369196095394, "grad_norm": 0.30941474437713623, "learning_rate": 3.087374343785713e-06, "loss": 0.3056, "step": 40018 }, { "epoch": 0.7429740497469581, "grad_norm": 0.4920826554298401, "learning_rate": 3.0865314803823954e-06, "loss": 0.3418, "step": 40020 }, { "epoch": 0.7430111798843767, "grad_norm": 0.47558194398880005, "learning_rate": 3.085688711050676e-06, "loss": 0.274, "step": 40022 }, { "epoch": 0.7430483100217954, "grad_norm": 0.35618728399276733, "learning_rate": 3.084846035802023e-06, "loss": 0.2482, "step": 40024 }, { "epoch": 0.7430854401592141, "grad_norm": 0.4760759770870209, "learning_rate": 3.084003454647905e-06, "loss": 0.3701, "step": 40026 }, { "epoch": 0.7431225702966326, "grad_norm": 0.3827405869960785, "learning_rate": 3.083160967599781e-06, "loss": 0.273, "step": 40028 }, { "epoch": 0.7431597004340513, "grad_norm": 0.3624255359172821, "learning_rate": 3.082318574669121e-06, "loss": 0.348, "step": 40030 }, { "epoch": 0.7431968305714699, "grad_norm": 0.2995072305202484, "learning_rate": 3.081476275867381e-06, "loss": 0.2827, "step": 40032 }, { "epoch": 0.7432339607088886, "grad_norm": 0.24099791049957275, "learning_rate": 3.0806340712060258e-06, "loss": 0.2786, "step": 40034 }, { "epoch": 0.7432710908463073, "grad_norm": 0.3649945557117462, "learning_rate": 3.0797919606965154e-06, "loss": 0.4394, "step": 40036 }, { "epoch": 0.7433082209837258, "grad_norm": 0.406194806098938, "learning_rate": 3.078949944350308e-06, "loss": 0.282, "step": 40038 }, { "epoch": 0.7433453511211445, "grad_norm": 0.38495367765426636, "learning_rate": 3.0781080221788628e-06, "loss": 0.3534, "step": 40040 }, { "epoch": 0.7433824812585631, "grad_norm": 0.580451250076294, "learning_rate": 3.0772661941936323e-06, "loss": 0.2458, "step": 40042 }, { "epoch": 0.7434196113959818, "grad_norm": 0.5322532057762146, "learning_rate": 3.0764244604060724e-06, "loss": 0.1291, "step": 40044 }, { "epoch": 0.7434567415334005, "grad_norm": 0.3556428849697113, "learning_rate": 3.075582820827636e-06, "loss": 0.1982, "step": 40046 }, { "epoch": 0.743493871670819, "grad_norm": 0.7268369197845459, "learning_rate": 3.0747412754697813e-06, "loss": 0.3917, "step": 40048 }, { "epoch": 0.7435310018082377, "grad_norm": 0.4990501403808594, "learning_rate": 3.07389982434395e-06, "loss": 0.3903, "step": 40050 }, { "epoch": 0.7435681319456563, "grad_norm": 0.25578606128692627, "learning_rate": 3.073058467461599e-06, "loss": 0.3038, "step": 40052 }, { "epoch": 0.743605262083075, "grad_norm": 0.2587500512599945, "learning_rate": 3.0722172048341713e-06, "loss": 0.3948, "step": 40054 }, { "epoch": 0.7436423922204936, "grad_norm": 0.30377891659736633, "learning_rate": 3.071376036473116e-06, "loss": 0.2219, "step": 40056 }, { "epoch": 0.7436795223579122, "grad_norm": 0.36774876713752747, "learning_rate": 3.0705349623898796e-06, "loss": 0.4256, "step": 40058 }, { "epoch": 0.7437166524953309, "grad_norm": 0.6472439765930176, "learning_rate": 3.069693982595906e-06, "loss": 0.263, "step": 40060 }, { "epoch": 0.7437537826327495, "grad_norm": 0.33867040276527405, "learning_rate": 3.0688530971026388e-06, "loss": 0.2411, "step": 40062 }, { "epoch": 0.7437909127701682, "grad_norm": 0.36092501878738403, "learning_rate": 3.068012305921523e-06, "loss": 0.1856, "step": 40064 }, { "epoch": 0.7438280429075868, "grad_norm": 0.3139655888080597, "learning_rate": 3.0671716090639915e-06, "loss": 0.2603, "step": 40066 }, { "epoch": 0.7438651730450054, "grad_norm": 0.7444578409194946, "learning_rate": 3.0663310065414897e-06, "loss": 0.3964, "step": 40068 }, { "epoch": 0.7439023031824241, "grad_norm": 0.419436514377594, "learning_rate": 3.0654904983654564e-06, "loss": 0.4007, "step": 40070 }, { "epoch": 0.7439394333198427, "grad_norm": 0.6879094243049622, "learning_rate": 3.0646500845473237e-06, "loss": 0.1479, "step": 40072 }, { "epoch": 0.7439765634572614, "grad_norm": 0.2978479266166687, "learning_rate": 3.063809765098529e-06, "loss": 0.3227, "step": 40074 }, { "epoch": 0.74401369359468, "grad_norm": 0.3856264650821686, "learning_rate": 3.062969540030507e-06, "loss": 0.2612, "step": 40076 }, { "epoch": 0.7440508237320986, "grad_norm": 0.5719103217124939, "learning_rate": 3.0621294093546948e-06, "loss": 0.3589, "step": 40078 }, { "epoch": 0.7440879538695173, "grad_norm": 0.38829633593559265, "learning_rate": 3.0612893730825155e-06, "loss": 0.1944, "step": 40080 }, { "epoch": 0.7441250840069359, "grad_norm": 0.4330374300479889, "learning_rate": 3.060449431225405e-06, "loss": 0.2702, "step": 40082 }, { "epoch": 0.7441622141443546, "grad_norm": 0.44694167375564575, "learning_rate": 3.0596095837947914e-06, "loss": 0.1857, "step": 40084 }, { "epoch": 0.7441993442817731, "grad_norm": 0.29867634177207947, "learning_rate": 3.058769830802102e-06, "loss": 0.2242, "step": 40086 }, { "epoch": 0.7442364744191918, "grad_norm": 0.4112626612186432, "learning_rate": 3.057930172258764e-06, "loss": 0.3648, "step": 40088 }, { "epoch": 0.7442736045566105, "grad_norm": 0.33459389209747314, "learning_rate": 3.0570906081762063e-06, "loss": 0.2828, "step": 40090 }, { "epoch": 0.7443107346940291, "grad_norm": 0.21339619159698486, "learning_rate": 3.056251138565848e-06, "loss": 0.3155, "step": 40092 }, { "epoch": 0.7443478648314478, "grad_norm": 0.5885301828384399, "learning_rate": 3.05541176343911e-06, "loss": 0.2771, "step": 40094 }, { "epoch": 0.7443849949688663, "grad_norm": 0.3786029517650604, "learning_rate": 3.0545724828074162e-06, "loss": 0.2223, "step": 40096 }, { "epoch": 0.744422125106285, "grad_norm": 0.7119247317314148, "learning_rate": 3.0537332966821874e-06, "loss": 0.247, "step": 40098 }, { "epoch": 0.7444592552437037, "grad_norm": 0.277366042137146, "learning_rate": 3.0528942050748422e-06, "loss": 0.2029, "step": 40100 }, { "epoch": 0.7444963853811223, "grad_norm": 0.6237053275108337, "learning_rate": 3.0520552079968e-06, "loss": 0.2902, "step": 40102 }, { "epoch": 0.744533515518541, "grad_norm": 0.6523361206054688, "learning_rate": 3.0512163054594725e-06, "loss": 0.2365, "step": 40104 }, { "epoch": 0.7445706456559595, "grad_norm": 0.33660900592803955, "learning_rate": 3.050377497474277e-06, "loss": 0.252, "step": 40106 }, { "epoch": 0.7446077757933782, "grad_norm": 0.38165482878685, "learning_rate": 3.049538784052627e-06, "loss": 0.2441, "step": 40108 }, { "epoch": 0.7446449059307968, "grad_norm": 0.369779497385025, "learning_rate": 3.0487001652059355e-06, "loss": 0.2445, "step": 40110 }, { "epoch": 0.7446820360682155, "grad_norm": 0.4809916317462921, "learning_rate": 3.047861640945615e-06, "loss": 0.2055, "step": 40112 }, { "epoch": 0.7447191662056342, "grad_norm": 0.4940428137779236, "learning_rate": 3.0470232112830723e-06, "loss": 0.301, "step": 40114 }, { "epoch": 0.7447562963430527, "grad_norm": 0.32643792033195496, "learning_rate": 3.0461848762297195e-06, "loss": 0.3649, "step": 40116 }, { "epoch": 0.7447934264804714, "grad_norm": 0.5354380011558533, "learning_rate": 3.0453466357969574e-06, "loss": 0.2329, "step": 40118 }, { "epoch": 0.74483055661789, "grad_norm": 0.4701504707336426, "learning_rate": 3.0445084899961976e-06, "loss": 0.157, "step": 40120 }, { "epoch": 0.7448676867553087, "grad_norm": 0.4035387933254242, "learning_rate": 3.0436704388388416e-06, "loss": 0.3417, "step": 40122 }, { "epoch": 0.7449048168927274, "grad_norm": 0.22432273626327515, "learning_rate": 3.042832482336295e-06, "loss": 0.2711, "step": 40124 }, { "epoch": 0.7449419470301459, "grad_norm": 0.4862229824066162, "learning_rate": 3.0419946204999597e-06, "loss": 0.2719, "step": 40126 }, { "epoch": 0.7449790771675646, "grad_norm": 0.2762148082256317, "learning_rate": 3.0411568533412385e-06, "loss": 0.1014, "step": 40128 }, { "epoch": 0.7450162073049832, "grad_norm": 0.5028297305107117, "learning_rate": 3.040319180871526e-06, "loss": 0.3867, "step": 40130 }, { "epoch": 0.7450533374424019, "grad_norm": 0.4299204349517822, "learning_rate": 3.0394816031022214e-06, "loss": 0.2066, "step": 40132 }, { "epoch": 0.7450904675798206, "grad_norm": 0.4392630159854889, "learning_rate": 3.038644120044727e-06, "loss": 0.1771, "step": 40134 }, { "epoch": 0.7451275977172391, "grad_norm": 0.24648761749267578, "learning_rate": 3.037806731710431e-06, "loss": 0.242, "step": 40136 }, { "epoch": 0.7451647278546578, "grad_norm": 0.19979175925254822, "learning_rate": 3.0369694381107315e-06, "loss": 0.2478, "step": 40138 }, { "epoch": 0.7452018579920764, "grad_norm": 0.43782052397727966, "learning_rate": 3.0361322392570247e-06, "loss": 0.2497, "step": 40140 }, { "epoch": 0.7452389881294951, "grad_norm": 0.30452510714530945, "learning_rate": 3.035295135160695e-06, "loss": 0.1498, "step": 40142 }, { "epoch": 0.7452761182669138, "grad_norm": 0.3463420569896698, "learning_rate": 3.0344581258331373e-06, "loss": 0.2236, "step": 40144 }, { "epoch": 0.7453132484043323, "grad_norm": 0.3357095420360565, "learning_rate": 3.03362121128574e-06, "loss": 0.3303, "step": 40146 }, { "epoch": 0.745350378541751, "grad_norm": 0.40374359488487244, "learning_rate": 3.0327843915298918e-06, "loss": 0.2101, "step": 40148 }, { "epoch": 0.7453875086791696, "grad_norm": 0.31130921840667725, "learning_rate": 3.0319476665769786e-06, "loss": 0.1229, "step": 40150 }, { "epoch": 0.7454246388165883, "grad_norm": 0.5515148639678955, "learning_rate": 3.031111036438389e-06, "loss": 0.1517, "step": 40152 }, { "epoch": 0.745461768954007, "grad_norm": 0.42077669501304626, "learning_rate": 3.030274501125501e-06, "loss": 0.2315, "step": 40154 }, { "epoch": 0.7454988990914255, "grad_norm": 0.44162219762802124, "learning_rate": 3.0294380606497032e-06, "loss": 0.3968, "step": 40156 }, { "epoch": 0.7455360292288442, "grad_norm": 0.40335991978645325, "learning_rate": 3.028601715022371e-06, "loss": 0.1982, "step": 40158 }, { "epoch": 0.7455731593662628, "grad_norm": 0.42558398842811584, "learning_rate": 3.027765464254887e-06, "loss": 0.1857, "step": 40160 }, { "epoch": 0.7456102895036815, "grad_norm": 0.4341014325618744, "learning_rate": 3.0269293083586314e-06, "loss": 0.1832, "step": 40162 }, { "epoch": 0.7456474196411, "grad_norm": 0.4233351945877075, "learning_rate": 3.0260932473449824e-06, "loss": 0.3077, "step": 40164 }, { "epoch": 0.7456845497785187, "grad_norm": 0.4022819697856903, "learning_rate": 3.0252572812253167e-06, "loss": 0.2358, "step": 40166 }, { "epoch": 0.7457216799159374, "grad_norm": 0.4743821620941162, "learning_rate": 3.024421410011005e-06, "loss": 0.2105, "step": 40168 }, { "epoch": 0.745758810053356, "grad_norm": 0.36108964681625366, "learning_rate": 3.023585633713423e-06, "loss": 0.2758, "step": 40170 }, { "epoch": 0.7457959401907747, "grad_norm": 0.24381716549396515, "learning_rate": 3.022749952343944e-06, "loss": 0.1177, "step": 40172 }, { "epoch": 0.7458330703281932, "grad_norm": 0.34277912974357605, "learning_rate": 3.0219143659139394e-06, "loss": 0.3313, "step": 40174 }, { "epoch": 0.7458702004656119, "grad_norm": 0.2952805161476135, "learning_rate": 3.0210788744347807e-06, "loss": 0.3066, "step": 40176 }, { "epoch": 0.7459073306030306, "grad_norm": 0.5980736017227173, "learning_rate": 3.020243477917836e-06, "loss": 0.2788, "step": 40178 }, { "epoch": 0.7459444607404492, "grad_norm": 0.3407723903656006, "learning_rate": 3.0194081763744655e-06, "loss": 0.2993, "step": 40180 }, { "epoch": 0.7459815908778679, "grad_norm": 0.2439137101173401, "learning_rate": 3.0185729698160416e-06, "loss": 0.2172, "step": 40182 }, { "epoch": 0.7460187210152864, "grad_norm": 0.420911580324173, "learning_rate": 3.0177378582539276e-06, "loss": 0.2, "step": 40184 }, { "epoch": 0.7460558511527051, "grad_norm": 0.35678088665008545, "learning_rate": 3.0169028416994873e-06, "loss": 0.159, "step": 40186 }, { "epoch": 0.7460929812901238, "grad_norm": 0.4752412736415863, "learning_rate": 3.016067920164082e-06, "loss": 0.1152, "step": 40188 }, { "epoch": 0.7461301114275424, "grad_norm": 0.24738076329231262, "learning_rate": 3.015233093659077e-06, "loss": 0.3083, "step": 40190 }, { "epoch": 0.7461672415649611, "grad_norm": 0.47351518273353577, "learning_rate": 3.014398362195824e-06, "loss": 0.4381, "step": 40192 }, { "epoch": 0.7462043717023796, "grad_norm": 0.41921135783195496, "learning_rate": 3.0135637257856843e-06, "loss": 0.3212, "step": 40194 }, { "epoch": 0.7462415018397983, "grad_norm": 0.33750271797180176, "learning_rate": 3.012729184440016e-06, "loss": 0.2053, "step": 40196 }, { "epoch": 0.746278631977217, "grad_norm": 0.6457516551017761, "learning_rate": 3.011894738170178e-06, "loss": 0.3314, "step": 40198 }, { "epoch": 0.7463157621146356, "grad_norm": 0.4144114553928375, "learning_rate": 3.0110603869875175e-06, "loss": 0.2872, "step": 40200 }, { "epoch": 0.7463528922520543, "grad_norm": 0.49976420402526855, "learning_rate": 3.01022613090339e-06, "loss": 0.4605, "step": 40202 }, { "epoch": 0.7463900223894728, "grad_norm": 0.6803915500640869, "learning_rate": 3.009391969929152e-06, "loss": 0.2895, "step": 40204 }, { "epoch": 0.7464271525268915, "grad_norm": 0.5855103135108948, "learning_rate": 3.008557904076147e-06, "loss": 0.3693, "step": 40206 }, { "epoch": 0.7464642826643101, "grad_norm": 0.519738495349884, "learning_rate": 3.0077239333557262e-06, "loss": 0.3317, "step": 40208 }, { "epoch": 0.7465014128017288, "grad_norm": 0.5718578696250916, "learning_rate": 3.006890057779239e-06, "loss": 0.3804, "step": 40210 }, { "epoch": 0.7465385429391475, "grad_norm": 0.45269685983657837, "learning_rate": 3.0060562773580315e-06, "loss": 0.4294, "step": 40212 }, { "epoch": 0.746575673076566, "grad_norm": 0.3525994122028351, "learning_rate": 3.0052225921034484e-06, "loss": 0.236, "step": 40214 }, { "epoch": 0.7466128032139847, "grad_norm": 0.2537766098976135, "learning_rate": 3.0043890020268375e-06, "loss": 0.2448, "step": 40216 }, { "epoch": 0.7466499333514033, "grad_norm": 0.3200022876262665, "learning_rate": 3.003555507139535e-06, "loss": 0.1323, "step": 40218 }, { "epoch": 0.746687063488822, "grad_norm": 0.3174964487552643, "learning_rate": 3.0027221074528877e-06, "loss": 0.1313, "step": 40220 }, { "epoch": 0.7467241936262407, "grad_norm": 0.1668201982975006, "learning_rate": 3.00188880297823e-06, "loss": 0.1752, "step": 40222 }, { "epoch": 0.7467613237636592, "grad_norm": 0.5355526804924011, "learning_rate": 3.0010555937269047e-06, "loss": 0.3681, "step": 40224 }, { "epoch": 0.7467984539010779, "grad_norm": 0.4486269950866699, "learning_rate": 3.0002224797102486e-06, "loss": 0.301, "step": 40226 }, { "epoch": 0.7468355840384965, "grad_norm": 0.3842775821685791, "learning_rate": 2.9993894609396e-06, "loss": 0.2093, "step": 40228 }, { "epoch": 0.7468727141759152, "grad_norm": 0.2321619838476181, "learning_rate": 2.9985565374262894e-06, "loss": 0.3219, "step": 40230 }, { "epoch": 0.7469098443133338, "grad_norm": 0.4199943244457245, "learning_rate": 2.9977237091816526e-06, "loss": 0.1489, "step": 40232 }, { "epoch": 0.7469469744507524, "grad_norm": 0.4171363413333893, "learning_rate": 2.9968909762170208e-06, "loss": 0.2019, "step": 40234 }, { "epoch": 0.7469841045881711, "grad_norm": 0.5603946447372437, "learning_rate": 2.996058338543727e-06, "loss": 0.3675, "step": 40236 }, { "epoch": 0.7470212347255897, "grad_norm": 0.36896660923957825, "learning_rate": 2.9952257961731023e-06, "loss": 0.1187, "step": 40238 }, { "epoch": 0.7470583648630084, "grad_norm": 0.35125967860221863, "learning_rate": 2.99439334911647e-06, "loss": 0.5072, "step": 40240 }, { "epoch": 0.747095495000427, "grad_norm": 0.5793779492378235, "learning_rate": 2.9935609973851644e-06, "loss": 0.3325, "step": 40242 }, { "epoch": 0.7471326251378456, "grad_norm": 0.2723342180252075, "learning_rate": 2.9927287409905028e-06, "loss": 0.3587, "step": 40244 }, { "epoch": 0.7471697552752643, "grad_norm": 0.2540375888347626, "learning_rate": 2.9918965799438125e-06, "loss": 0.2405, "step": 40246 }, { "epoch": 0.7472068854126829, "grad_norm": 0.521479070186615, "learning_rate": 2.99106451425642e-06, "loss": 0.5053, "step": 40248 }, { "epoch": 0.7472440155501016, "grad_norm": 0.48804280161857605, "learning_rate": 2.990232543939644e-06, "loss": 0.2348, "step": 40250 }, { "epoch": 0.7472811456875202, "grad_norm": 0.5943536162376404, "learning_rate": 2.9894006690048072e-06, "loss": 0.3825, "step": 40252 }, { "epoch": 0.7473182758249388, "grad_norm": 0.2933368384838104, "learning_rate": 2.9885688894632315e-06, "loss": 0.4072, "step": 40254 }, { "epoch": 0.7473554059623575, "grad_norm": 0.46099716424942017, "learning_rate": 2.9877372053262275e-06, "loss": 0.2886, "step": 40256 }, { "epoch": 0.7473925360997761, "grad_norm": 0.2820901870727539, "learning_rate": 2.9869056166051167e-06, "loss": 0.2344, "step": 40258 }, { "epoch": 0.7474296662371948, "grad_norm": 0.2998506724834442, "learning_rate": 2.9860741233112168e-06, "loss": 0.3496, "step": 40260 }, { "epoch": 0.7474667963746133, "grad_norm": 0.40661871433258057, "learning_rate": 2.9852427254558347e-06, "loss": 0.3477, "step": 40262 }, { "epoch": 0.747503926512032, "grad_norm": 0.3183445930480957, "learning_rate": 2.9844114230502886e-06, "loss": 0.2939, "step": 40264 }, { "epoch": 0.7475410566494507, "grad_norm": 0.3170273005962372, "learning_rate": 2.9835802161058926e-06, "loss": 0.4823, "step": 40266 }, { "epoch": 0.7475781867868693, "grad_norm": 0.4835923910140991, "learning_rate": 2.9827491046339483e-06, "loss": 0.308, "step": 40268 }, { "epoch": 0.747615316924288, "grad_norm": 0.4837155342102051, "learning_rate": 2.981918088645771e-06, "loss": 0.3922, "step": 40270 }, { "epoch": 0.7476524470617065, "grad_norm": 0.28407055139541626, "learning_rate": 2.9810871681526664e-06, "loss": 0.1866, "step": 40272 }, { "epoch": 0.7476895771991252, "grad_norm": 0.4125809669494629, "learning_rate": 2.9802563431659416e-06, "loss": 0.2422, "step": 40274 }, { "epoch": 0.7477267073365439, "grad_norm": 0.30942633748054504, "learning_rate": 2.9794256136969013e-06, "loss": 0.235, "step": 40276 }, { "epoch": 0.7477638374739625, "grad_norm": 0.5042203068733215, "learning_rate": 2.978594979756849e-06, "loss": 0.3663, "step": 40278 }, { "epoch": 0.7478009676113812, "grad_norm": 0.620173454284668, "learning_rate": 2.9777644413570915e-06, "loss": 0.3015, "step": 40280 }, { "epoch": 0.7478380977487997, "grad_norm": 0.3843126595020294, "learning_rate": 2.9769339985089217e-06, "loss": 0.1388, "step": 40282 }, { "epoch": 0.7478752278862184, "grad_norm": 0.32178694009780884, "learning_rate": 2.9761036512236484e-06, "loss": 0.3168, "step": 40284 }, { "epoch": 0.7479123580236371, "grad_norm": 0.8829051852226257, "learning_rate": 2.9752733995125603e-06, "loss": 0.3297, "step": 40286 }, { "epoch": 0.7479494881610557, "grad_norm": 0.21603497862815857, "learning_rate": 2.9744432433869608e-06, "loss": 0.3962, "step": 40288 }, { "epoch": 0.7479866182984743, "grad_norm": 0.5693198442459106, "learning_rate": 2.973613182858145e-06, "loss": 0.3574, "step": 40290 }, { "epoch": 0.7480237484358929, "grad_norm": 0.37900087237358093, "learning_rate": 2.9727832179374107e-06, "loss": 0.3285, "step": 40292 }, { "epoch": 0.7480608785733116, "grad_norm": 0.3850388824939728, "learning_rate": 2.971953348636044e-06, "loss": 0.32, "step": 40294 }, { "epoch": 0.7480980087107303, "grad_norm": 0.44898733496665955, "learning_rate": 2.971123574965341e-06, "loss": 0.421, "step": 40296 }, { "epoch": 0.7481351388481489, "grad_norm": 0.41429710388183594, "learning_rate": 2.9702938969365915e-06, "loss": 0.2416, "step": 40298 }, { "epoch": 0.7481722689855675, "grad_norm": 0.5588423013687134, "learning_rate": 2.969464314561087e-06, "loss": 0.4317, "step": 40300 }, { "epoch": 0.7482093991229861, "grad_norm": 0.2603084146976471, "learning_rate": 2.9686348278501166e-06, "loss": 0.3596, "step": 40302 }, { "epoch": 0.7482465292604048, "grad_norm": 0.37287580966949463, "learning_rate": 2.9678054368149613e-06, "loss": 0.3465, "step": 40304 }, { "epoch": 0.7482836593978235, "grad_norm": 0.4959515929222107, "learning_rate": 2.966976141466914e-06, "loss": 0.2328, "step": 40306 }, { "epoch": 0.7483207895352421, "grad_norm": 0.34679073095321655, "learning_rate": 2.966146941817252e-06, "loss": 0.184, "step": 40308 }, { "epoch": 0.7483579196726607, "grad_norm": 0.3961580991744995, "learning_rate": 2.9653178378772606e-06, "loss": 0.2349, "step": 40310 }, { "epoch": 0.7483950498100793, "grad_norm": 0.3487274944782257, "learning_rate": 2.964488829658223e-06, "loss": 0.2646, "step": 40312 }, { "epoch": 0.748432179947498, "grad_norm": 0.6037985682487488, "learning_rate": 2.9636599171714177e-06, "loss": 0.4347, "step": 40314 }, { "epoch": 0.7484693100849166, "grad_norm": 0.4712100923061371, "learning_rate": 2.962831100428125e-06, "loss": 0.3173, "step": 40316 }, { "epoch": 0.7485064402223353, "grad_norm": 0.3319389522075653, "learning_rate": 2.962002379439626e-06, "loss": 0.2744, "step": 40318 }, { "epoch": 0.7485435703597539, "grad_norm": 0.5780928730964661, "learning_rate": 2.9611737542171894e-06, "loss": 0.2695, "step": 40320 }, { "epoch": 0.7485807004971725, "grad_norm": 0.32323622703552246, "learning_rate": 2.9603452247720942e-06, "loss": 0.178, "step": 40322 }, { "epoch": 0.7486178306345912, "grad_norm": 0.401479035615921, "learning_rate": 2.9595167911156185e-06, "loss": 0.2292, "step": 40324 }, { "epoch": 0.7486549607720098, "grad_norm": 0.33542394638061523, "learning_rate": 2.9586884532590266e-06, "loss": 0.2103, "step": 40326 }, { "epoch": 0.7486920909094285, "grad_norm": 0.5383530259132385, "learning_rate": 2.9578602112135936e-06, "loss": 0.1851, "step": 40328 }, { "epoch": 0.7487292210468471, "grad_norm": 0.35175302624702454, "learning_rate": 2.9570320649905924e-06, "loss": 0.2687, "step": 40330 }, { "epoch": 0.7487663511842657, "grad_norm": 0.3417210876941681, "learning_rate": 2.956204014601286e-06, "loss": 0.2776, "step": 40332 }, { "epoch": 0.7488034813216844, "grad_norm": 0.7169796824455261, "learning_rate": 2.955376060056945e-06, "loss": 0.2694, "step": 40334 }, { "epoch": 0.748840611459103, "grad_norm": 0.26623085141181946, "learning_rate": 2.9545482013688342e-06, "loss": 0.2998, "step": 40336 }, { "epoch": 0.7488777415965217, "grad_norm": 0.3821668326854706, "learning_rate": 2.953720438548219e-06, "loss": 0.2424, "step": 40338 }, { "epoch": 0.7489148717339403, "grad_norm": 0.5337623357772827, "learning_rate": 2.9528927716063615e-06, "loss": 0.2972, "step": 40340 }, { "epoch": 0.7489520018713589, "grad_norm": 0.3525103032588959, "learning_rate": 2.9520652005545292e-06, "loss": 0.296, "step": 40342 }, { "epoch": 0.7489891320087776, "grad_norm": 0.4213385581970215, "learning_rate": 2.9512377254039747e-06, "loss": 0.1518, "step": 40344 }, { "epoch": 0.7490262621461962, "grad_norm": 0.4230386018753052, "learning_rate": 2.950410346165965e-06, "loss": 0.3798, "step": 40346 }, { "epoch": 0.7490633922836148, "grad_norm": 0.28160858154296875, "learning_rate": 2.9495830628517508e-06, "loss": 0.2271, "step": 40348 }, { "epoch": 0.7491005224210335, "grad_norm": 0.22811903059482574, "learning_rate": 2.9487558754725933e-06, "loss": 0.1881, "step": 40350 }, { "epoch": 0.7491376525584521, "grad_norm": 0.5182791352272034, "learning_rate": 2.947928784039746e-06, "loss": 0.3255, "step": 40352 }, { "epoch": 0.7491747826958708, "grad_norm": 0.40683820843696594, "learning_rate": 2.9471017885644694e-06, "loss": 0.1133, "step": 40354 }, { "epoch": 0.7492119128332894, "grad_norm": 0.3923896253108978, "learning_rate": 2.9462748890580073e-06, "loss": 0.4264, "step": 40356 }, { "epoch": 0.749249042970708, "grad_norm": 0.44196438789367676, "learning_rate": 2.9454480855316157e-06, "loss": 0.1462, "step": 40358 }, { "epoch": 0.7492861731081266, "grad_norm": 0.40478309988975525, "learning_rate": 2.944621377996546e-06, "loss": 0.5034, "step": 40360 }, { "epoch": 0.7493233032455453, "grad_norm": 0.5707475543022156, "learning_rate": 2.9437947664640453e-06, "loss": 0.3089, "step": 40362 }, { "epoch": 0.749360433382964, "grad_norm": 0.3860756754875183, "learning_rate": 2.9429682509453615e-06, "loss": 0.165, "step": 40364 }, { "epoch": 0.7493975635203826, "grad_norm": 0.34053704142570496, "learning_rate": 2.9421418314517447e-06, "loss": 0.2351, "step": 40366 }, { "epoch": 0.7494346936578012, "grad_norm": 0.3145916163921356, "learning_rate": 2.9413155079944376e-06, "loss": 0.2494, "step": 40368 }, { "epoch": 0.7494718237952198, "grad_norm": 0.44671714305877686, "learning_rate": 2.9404892805846794e-06, "loss": 0.2587, "step": 40370 }, { "epoch": 0.7495089539326385, "grad_norm": 0.4623572528362274, "learning_rate": 2.9396631492337168e-06, "loss": 0.2719, "step": 40372 }, { "epoch": 0.7495460840700572, "grad_norm": 0.62232905626297, "learning_rate": 2.9388371139527918e-06, "loss": 0.1431, "step": 40374 }, { "epoch": 0.7495832142074758, "grad_norm": 0.35104212164878845, "learning_rate": 2.9380111747531416e-06, "loss": 0.4099, "step": 40376 }, { "epoch": 0.7496203443448944, "grad_norm": 0.3375902473926544, "learning_rate": 2.9371853316460065e-06, "loss": 0.2952, "step": 40378 }, { "epoch": 0.749657474482313, "grad_norm": 0.2781085968017578, "learning_rate": 2.9363595846426264e-06, "loss": 0.1928, "step": 40380 }, { "epoch": 0.7496946046197317, "grad_norm": 0.25676101446151733, "learning_rate": 2.935533933754231e-06, "loss": 0.0724, "step": 40382 }, { "epoch": 0.7497317347571504, "grad_norm": 0.4692309498786926, "learning_rate": 2.9347083789920583e-06, "loss": 0.1377, "step": 40384 }, { "epoch": 0.749768864894569, "grad_norm": 0.470002681016922, "learning_rate": 2.9338829203673415e-06, "loss": 0.2447, "step": 40386 }, { "epoch": 0.7498059950319876, "grad_norm": 0.5296831727027893, "learning_rate": 2.9330575578913167e-06, "loss": 0.2101, "step": 40388 }, { "epoch": 0.7498431251694062, "grad_norm": 0.3243440091609955, "learning_rate": 2.9322322915752065e-06, "loss": 0.4029, "step": 40390 }, { "epoch": 0.7498802553068249, "grad_norm": 0.46210619807243347, "learning_rate": 2.9314071214302473e-06, "loss": 0.2203, "step": 40392 }, { "epoch": 0.7499173854442436, "grad_norm": 0.598175585269928, "learning_rate": 2.9305820474676627e-06, "loss": 0.3718, "step": 40394 }, { "epoch": 0.7499545155816622, "grad_norm": 0.3679395318031311, "learning_rate": 2.92975706969868e-06, "loss": 0.2839, "step": 40396 }, { "epoch": 0.7499916457190808, "grad_norm": 0.3775363862514496, "learning_rate": 2.9289321881345257e-06, "loss": 0.3137, "step": 40398 }, { "epoch": 0.7500287758564994, "grad_norm": 0.3015807569026947, "learning_rate": 2.9281074027864243e-06, "loss": 0.0868, "step": 40400 }, { "epoch": 0.7500659059939181, "grad_norm": 0.2993025481700897, "learning_rate": 2.927282713665598e-06, "loss": 0.2463, "step": 40402 }, { "epoch": 0.7501030361313368, "grad_norm": 0.29711946845054626, "learning_rate": 2.9264581207832687e-06, "loss": 0.264, "step": 40404 }, { "epoch": 0.7501401662687553, "grad_norm": 0.5153632760047913, "learning_rate": 2.92563362415066e-06, "loss": 0.423, "step": 40406 }, { "epoch": 0.750177296406174, "grad_norm": 0.30841514468193054, "learning_rate": 2.924809223778985e-06, "loss": 0.5534, "step": 40408 }, { "epoch": 0.7502144265435926, "grad_norm": 0.4487874507904053, "learning_rate": 2.923984919679467e-06, "loss": 0.3449, "step": 40410 }, { "epoch": 0.7502515566810113, "grad_norm": 0.3042234778404236, "learning_rate": 2.9231607118633143e-06, "loss": 0.2943, "step": 40412 }, { "epoch": 0.7502886868184299, "grad_norm": 0.38850268721580505, "learning_rate": 2.9223366003417487e-06, "loss": 0.239, "step": 40414 }, { "epoch": 0.7503258169558485, "grad_norm": 0.5220369100570679, "learning_rate": 2.9215125851259806e-06, "loss": 0.2571, "step": 40416 }, { "epoch": 0.7503629470932672, "grad_norm": 0.34970369935035706, "learning_rate": 2.920688666227227e-06, "loss": 0.284, "step": 40418 }, { "epoch": 0.7504000772306858, "grad_norm": 0.3791286051273346, "learning_rate": 2.9198648436566935e-06, "loss": 0.1438, "step": 40420 }, { "epoch": 0.7504372073681045, "grad_norm": 0.45752692222595215, "learning_rate": 2.9190411174255907e-06, "loss": 0.1872, "step": 40422 }, { "epoch": 0.7504743375055231, "grad_norm": 0.662827730178833, "learning_rate": 2.9182174875451287e-06, "loss": 0.3803, "step": 40424 }, { "epoch": 0.7505114676429417, "grad_norm": 0.49187585711479187, "learning_rate": 2.917393954026515e-06, "loss": 0.2737, "step": 40426 }, { "epoch": 0.7505485977803604, "grad_norm": 0.3631449341773987, "learning_rate": 2.9165705168809544e-06, "loss": 0.2014, "step": 40428 }, { "epoch": 0.750585727917779, "grad_norm": 0.3902400732040405, "learning_rate": 2.9157471761196543e-06, "loss": 0.1933, "step": 40430 }, { "epoch": 0.7506228580551977, "grad_norm": 0.340748131275177, "learning_rate": 2.9149239317538156e-06, "loss": 0.3324, "step": 40432 }, { "epoch": 0.7506599881926163, "grad_norm": 0.4365726709365845, "learning_rate": 2.9141007837946378e-06, "loss": 0.3099, "step": 40434 }, { "epoch": 0.7506971183300349, "grad_norm": 0.5434020161628723, "learning_rate": 2.9132777322533224e-06, "loss": 0.2396, "step": 40436 }, { "epoch": 0.7507342484674536, "grad_norm": 0.5789579153060913, "learning_rate": 2.9124547771410706e-06, "loss": 0.3646, "step": 40438 }, { "epoch": 0.7507713786048722, "grad_norm": 0.5691447854042053, "learning_rate": 2.911631918469079e-06, "loss": 0.1807, "step": 40440 }, { "epoch": 0.7508085087422909, "grad_norm": 0.5294763445854187, "learning_rate": 2.910809156248545e-06, "loss": 0.2373, "step": 40442 }, { "epoch": 0.7508456388797095, "grad_norm": 0.5125042200088501, "learning_rate": 2.909986490490667e-06, "loss": 0.186, "step": 40444 }, { "epoch": 0.7508827690171281, "grad_norm": 0.5759803056716919, "learning_rate": 2.9091639212066323e-06, "loss": 0.3935, "step": 40446 }, { "epoch": 0.7509198991545468, "grad_norm": 0.4840807616710663, "learning_rate": 2.9083414484076377e-06, "loss": 0.3367, "step": 40448 }, { "epoch": 0.7509570292919654, "grad_norm": 0.46738529205322266, "learning_rate": 2.907519072104874e-06, "loss": 0.4648, "step": 40450 }, { "epoch": 0.7509941594293841, "grad_norm": 0.6332831978797913, "learning_rate": 2.9066967923095345e-06, "loss": 0.1158, "step": 40452 }, { "epoch": 0.7510312895668027, "grad_norm": 0.5030101537704468, "learning_rate": 2.9058746090328015e-06, "loss": 0.2799, "step": 40454 }, { "epoch": 0.7510684197042213, "grad_norm": 0.43413105607032776, "learning_rate": 2.9050525222858693e-06, "loss": 0.2372, "step": 40456 }, { "epoch": 0.75110554984164, "grad_norm": 0.3051797151565552, "learning_rate": 2.904230532079917e-06, "loss": 0.3472, "step": 40458 }, { "epoch": 0.7511426799790586, "grad_norm": 0.24058611690998077, "learning_rate": 2.903408638426132e-06, "loss": 0.1473, "step": 40460 }, { "epoch": 0.7511798101164773, "grad_norm": 0.5001422762870789, "learning_rate": 2.9025868413356995e-06, "loss": 0.2563, "step": 40462 }, { "epoch": 0.7512169402538958, "grad_norm": 0.31162333488464355, "learning_rate": 2.901765140819801e-06, "loss": 0.3397, "step": 40464 }, { "epoch": 0.7512540703913145, "grad_norm": 0.48022735118865967, "learning_rate": 2.9009435368896167e-06, "loss": 0.2954, "step": 40466 }, { "epoch": 0.7512912005287331, "grad_norm": 0.35977819561958313, "learning_rate": 2.9001220295563306e-06, "loss": 0.3145, "step": 40468 }, { "epoch": 0.7513283306661518, "grad_norm": 0.4534611701965332, "learning_rate": 2.8993006188311136e-06, "loss": 0.3072, "step": 40470 }, { "epoch": 0.7513654608035705, "grad_norm": 0.27284952998161316, "learning_rate": 2.898479304725146e-06, "loss": 0.166, "step": 40472 }, { "epoch": 0.751402590940989, "grad_norm": 0.39503586292266846, "learning_rate": 2.897658087249606e-06, "loss": 0.2237, "step": 40474 }, { "epoch": 0.7514397210784077, "grad_norm": 0.3290507197380066, "learning_rate": 2.8968369664156636e-06, "loss": 0.4754, "step": 40476 }, { "epoch": 0.7514768512158263, "grad_norm": 0.3885556757450104, "learning_rate": 2.8960159422344925e-06, "loss": 0.5884, "step": 40478 }, { "epoch": 0.751513981353245, "grad_norm": 0.32867395877838135, "learning_rate": 2.8951950147172694e-06, "loss": 0.225, "step": 40480 }, { "epoch": 0.7515511114906637, "grad_norm": 0.8079530000686646, "learning_rate": 2.8943741838751572e-06, "loss": 0.2395, "step": 40482 }, { "epoch": 0.7515882416280822, "grad_norm": 0.3274528682231903, "learning_rate": 2.8935534497193297e-06, "loss": 0.1432, "step": 40484 }, { "epoch": 0.7516253717655009, "grad_norm": 0.32301655411720276, "learning_rate": 2.892732812260952e-06, "loss": 0.2714, "step": 40486 }, { "epoch": 0.7516625019029195, "grad_norm": 0.45325884222984314, "learning_rate": 2.8919122715111924e-06, "loss": 0.242, "step": 40488 }, { "epoch": 0.7516996320403382, "grad_norm": 0.6501412987709045, "learning_rate": 2.891091827481216e-06, "loss": 0.5304, "step": 40490 }, { "epoch": 0.7517367621777569, "grad_norm": 0.32770755887031555, "learning_rate": 2.8902714801821896e-06, "loss": 0.2237, "step": 40492 }, { "epoch": 0.7517738923151754, "grad_norm": 0.4423248767852783, "learning_rate": 2.8894512296252688e-06, "loss": 0.3921, "step": 40494 }, { "epoch": 0.7518110224525941, "grad_norm": 0.2605426013469696, "learning_rate": 2.8886310758216206e-06, "loss": 0.2229, "step": 40496 }, { "epoch": 0.7518481525900127, "grad_norm": 0.5913214087486267, "learning_rate": 2.8878110187823993e-06, "loss": 0.348, "step": 40498 }, { "epoch": 0.7518852827274314, "grad_norm": 0.21425849199295044, "learning_rate": 2.886991058518768e-06, "loss": 0.1859, "step": 40500 }, { "epoch": 0.7519224128648501, "grad_norm": 0.5110229253768921, "learning_rate": 2.8861711950418813e-06, "loss": 0.3533, "step": 40502 }, { "epoch": 0.7519595430022686, "grad_norm": 0.4079522490501404, "learning_rate": 2.885351428362897e-06, "loss": 0.176, "step": 40504 }, { "epoch": 0.7519966731396873, "grad_norm": 0.36067524552345276, "learning_rate": 2.884531758492971e-06, "loss": 0.2977, "step": 40506 }, { "epoch": 0.7520338032771059, "grad_norm": 0.3455793261528015, "learning_rate": 2.8837121854432524e-06, "loss": 0.3418, "step": 40508 }, { "epoch": 0.7520709334145246, "grad_norm": 0.3733421266078949, "learning_rate": 2.882892709224895e-06, "loss": 0.2708, "step": 40510 }, { "epoch": 0.7521080635519432, "grad_norm": 0.617104172706604, "learning_rate": 2.882073329849049e-06, "loss": 0.2565, "step": 40512 }, { "epoch": 0.7521451936893618, "grad_norm": 0.33861207962036133, "learning_rate": 2.881254047326868e-06, "loss": 0.1494, "step": 40514 }, { "epoch": 0.7521823238267805, "grad_norm": 0.37827083468437195, "learning_rate": 2.8804348616694933e-06, "loss": 0.4645, "step": 40516 }, { "epoch": 0.7522194539641991, "grad_norm": 0.33447524905204773, "learning_rate": 2.8796157728880767e-06, "loss": 0.3223, "step": 40518 }, { "epoch": 0.7522565841016178, "grad_norm": 0.2812304198741913, "learning_rate": 2.878796780993759e-06, "loss": 0.2313, "step": 40520 }, { "epoch": 0.7522937142390363, "grad_norm": 0.39499467611312866, "learning_rate": 2.877977885997687e-06, "loss": 0.3434, "step": 40522 }, { "epoch": 0.752330844376455, "grad_norm": 0.5273628234863281, "learning_rate": 2.8771590879110022e-06, "loss": 0.144, "step": 40524 }, { "epoch": 0.7523679745138737, "grad_norm": 0.3484586477279663, "learning_rate": 2.8763403867448482e-06, "loss": 0.2065, "step": 40526 }, { "epoch": 0.7524051046512923, "grad_norm": 0.31164637207984924, "learning_rate": 2.875521782510362e-06, "loss": 0.286, "step": 40528 }, { "epoch": 0.752442234788711, "grad_norm": 0.2688003480434418, "learning_rate": 2.8747032752186856e-06, "loss": 0.3688, "step": 40530 }, { "epoch": 0.7524793649261295, "grad_norm": 0.3683149218559265, "learning_rate": 2.873884864880957e-06, "loss": 0.3259, "step": 40532 }, { "epoch": 0.7525164950635482, "grad_norm": 0.412943959236145, "learning_rate": 2.8730665515083066e-06, "loss": 0.3885, "step": 40534 }, { "epoch": 0.7525536252009669, "grad_norm": 0.3783700466156006, "learning_rate": 2.8722483351118735e-06, "loss": 0.3355, "step": 40536 }, { "epoch": 0.7525907553383855, "grad_norm": 0.40683338046073914, "learning_rate": 2.871430215702794e-06, "loss": 0.2741, "step": 40538 }, { "epoch": 0.7526278854758042, "grad_norm": 0.198213130235672, "learning_rate": 2.870612193292194e-06, "loss": 0.0902, "step": 40540 }, { "epoch": 0.7526650156132227, "grad_norm": 0.4948136806488037, "learning_rate": 2.8697942678912062e-06, "loss": 0.2335, "step": 40542 }, { "epoch": 0.7527021457506414, "grad_norm": 0.4490640163421631, "learning_rate": 2.8689764395109643e-06, "loss": 0.1516, "step": 40544 }, { "epoch": 0.7527392758880601, "grad_norm": 0.3021415174007416, "learning_rate": 2.8681587081625893e-06, "loss": 0.4037, "step": 40546 }, { "epoch": 0.7527764060254787, "grad_norm": 0.3851489722728729, "learning_rate": 2.867341073857213e-06, "loss": 0.3788, "step": 40548 }, { "epoch": 0.7528135361628974, "grad_norm": 0.2587454319000244, "learning_rate": 2.8665235366059596e-06, "loss": 0.2941, "step": 40550 }, { "epoch": 0.7528506663003159, "grad_norm": 0.40624019503593445, "learning_rate": 2.8657060964199535e-06, "loss": 0.2295, "step": 40552 }, { "epoch": 0.7528877964377346, "grad_norm": 0.3450055718421936, "learning_rate": 2.8648887533103178e-06, "loss": 0.3944, "step": 40554 }, { "epoch": 0.7529249265751533, "grad_norm": 0.6019555926322937, "learning_rate": 2.8640715072881766e-06, "loss": 0.4686, "step": 40556 }, { "epoch": 0.7529620567125719, "grad_norm": 0.3090994954109192, "learning_rate": 2.8632543583646456e-06, "loss": 0.2455, "step": 40558 }, { "epoch": 0.7529991868499906, "grad_norm": 0.43548765778541565, "learning_rate": 2.8624373065508483e-06, "loss": 0.3514, "step": 40560 }, { "epoch": 0.7530363169874091, "grad_norm": 0.2948155105113983, "learning_rate": 2.8616203518578966e-06, "loss": 0.1562, "step": 40562 }, { "epoch": 0.7530734471248278, "grad_norm": 0.35642364621162415, "learning_rate": 2.86080349429691e-06, "loss": 0.3864, "step": 40564 }, { "epoch": 0.7531105772622464, "grad_norm": 0.774732232093811, "learning_rate": 2.859986733879003e-06, "loss": 0.3173, "step": 40566 }, { "epoch": 0.7531477073996651, "grad_norm": 0.3449834883213043, "learning_rate": 2.8591700706152904e-06, "loss": 0.218, "step": 40568 }, { "epoch": 0.7531848375370838, "grad_norm": 0.4287796914577484, "learning_rate": 2.8583535045168865e-06, "loss": 0.2804, "step": 40570 }, { "epoch": 0.7532219676745023, "grad_norm": 0.5407571196556091, "learning_rate": 2.8575370355948974e-06, "loss": 0.3317, "step": 40572 }, { "epoch": 0.753259097811921, "grad_norm": 0.43815404176712036, "learning_rate": 2.856720663860435e-06, "loss": 0.5291, "step": 40574 }, { "epoch": 0.7532962279493396, "grad_norm": 0.27817341685295105, "learning_rate": 2.8559043893246074e-06, "loss": 0.258, "step": 40576 }, { "epoch": 0.7533333580867583, "grad_norm": 0.2578461766242981, "learning_rate": 2.8550882119985245e-06, "loss": 0.1228, "step": 40578 }, { "epoch": 0.753370488224177, "grad_norm": 0.5653959512710571, "learning_rate": 2.8542721318932875e-06, "loss": 0.1571, "step": 40580 }, { "epoch": 0.7534076183615955, "grad_norm": 0.605609655380249, "learning_rate": 2.853456149020005e-06, "loss": 0.1191, "step": 40582 }, { "epoch": 0.7534447484990142, "grad_norm": 0.47384288907051086, "learning_rate": 2.8526402633897754e-06, "loss": 0.3095, "step": 40584 }, { "epoch": 0.7534818786364328, "grad_norm": 0.4427448511123657, "learning_rate": 2.8518244750137024e-06, "loss": 0.3287, "step": 40586 }, { "epoch": 0.7535190087738515, "grad_norm": 0.8036258220672607, "learning_rate": 2.8510087839028877e-06, "loss": 0.1581, "step": 40588 }, { "epoch": 0.7535561389112702, "grad_norm": 0.30833467841148376, "learning_rate": 2.85019319006843e-06, "loss": 0.2485, "step": 40590 }, { "epoch": 0.7535932690486887, "grad_norm": 0.5380029678344727, "learning_rate": 2.8493776935214268e-06, "loss": 0.1126, "step": 40592 }, { "epoch": 0.7536303991861074, "grad_norm": 0.32691627740859985, "learning_rate": 2.8485622942729775e-06, "loss": 0.2155, "step": 40594 }, { "epoch": 0.753667529323526, "grad_norm": 0.3697568476200104, "learning_rate": 2.8477469923341707e-06, "loss": 0.3845, "step": 40596 }, { "epoch": 0.7537046594609447, "grad_norm": 0.4236752688884735, "learning_rate": 2.846931787716104e-06, "loss": 0.2602, "step": 40598 }, { "epoch": 0.7537417895983634, "grad_norm": 0.48736318945884705, "learning_rate": 2.8461166804298736e-06, "loss": 0.3049, "step": 40600 }, { "epoch": 0.7537789197357819, "grad_norm": 0.2925927937030792, "learning_rate": 2.8453016704865633e-06, "loss": 0.2303, "step": 40602 }, { "epoch": 0.7538160498732006, "grad_norm": 0.2856064438819885, "learning_rate": 2.8444867578972656e-06, "loss": 0.3776, "step": 40604 }, { "epoch": 0.7538531800106192, "grad_norm": 0.36873093247413635, "learning_rate": 2.8436719426730706e-06, "loss": 0.3348, "step": 40606 }, { "epoch": 0.7538903101480379, "grad_norm": 0.2899578809738159, "learning_rate": 2.8428572248250687e-06, "loss": 0.1771, "step": 40608 }, { "epoch": 0.7539274402854566, "grad_norm": 0.5378159880638123, "learning_rate": 2.8420426043643377e-06, "loss": 0.1927, "step": 40610 }, { "epoch": 0.7539645704228751, "grad_norm": 0.26763346791267395, "learning_rate": 2.8412280813019665e-06, "loss": 0.2314, "step": 40612 }, { "epoch": 0.7540017005602938, "grad_norm": 0.3388088345527649, "learning_rate": 2.840413655649038e-06, "loss": 0.2281, "step": 40614 }, { "epoch": 0.7540388306977124, "grad_norm": 0.36535537242889404, "learning_rate": 2.8395993274166344e-06, "loss": 0.2545, "step": 40616 }, { "epoch": 0.7540759608351311, "grad_norm": 0.35980138182640076, "learning_rate": 2.8387850966158357e-06, "loss": 0.3308, "step": 40618 }, { "epoch": 0.7541130909725496, "grad_norm": 0.5129694938659668, "learning_rate": 2.837970963257726e-06, "loss": 0.1353, "step": 40620 }, { "epoch": 0.7541502211099683, "grad_norm": 0.6546342968940735, "learning_rate": 2.8371569273533773e-06, "loss": 0.2279, "step": 40622 }, { "epoch": 0.754187351247387, "grad_norm": 0.3177323341369629, "learning_rate": 2.836342988913865e-06, "loss": 0.4437, "step": 40624 }, { "epoch": 0.7542244813848056, "grad_norm": 0.34303364157676697, "learning_rate": 2.8355291479502665e-06, "loss": 0.3014, "step": 40626 }, { "epoch": 0.7542616115222243, "grad_norm": 0.4379226565361023, "learning_rate": 2.8347154044736567e-06, "loss": 0.4724, "step": 40628 }, { "epoch": 0.7542987416596428, "grad_norm": 0.2685122787952423, "learning_rate": 2.833901758495108e-06, "loss": 0.2648, "step": 40630 }, { "epoch": 0.7543358717970615, "grad_norm": 0.49419277906417847, "learning_rate": 2.833088210025694e-06, "loss": 0.288, "step": 40632 }, { "epoch": 0.7543730019344802, "grad_norm": 0.41111984848976135, "learning_rate": 2.8322747590764786e-06, "loss": 0.2239, "step": 40634 }, { "epoch": 0.7544101320718988, "grad_norm": 0.34498411417007446, "learning_rate": 2.8314614056585342e-06, "loss": 0.1597, "step": 40636 }, { "epoch": 0.7544472622093175, "grad_norm": 0.13146619498729706, "learning_rate": 2.8306481497829288e-06, "loss": 0.0745, "step": 40638 }, { "epoch": 0.754484392346736, "grad_norm": 0.41899600625038147, "learning_rate": 2.8298349914607258e-06, "loss": 0.1341, "step": 40640 }, { "epoch": 0.7545215224841547, "grad_norm": 0.35171565413475037, "learning_rate": 2.8290219307029963e-06, "loss": 0.1642, "step": 40642 }, { "epoch": 0.7545586526215734, "grad_norm": 0.2993064522743225, "learning_rate": 2.8282089675207947e-06, "loss": 0.216, "step": 40644 }, { "epoch": 0.754595782758992, "grad_norm": 0.7365838885307312, "learning_rate": 2.8273961019251895e-06, "loss": 0.2661, "step": 40646 }, { "epoch": 0.7546329128964107, "grad_norm": 0.3117922246456146, "learning_rate": 2.8265833339272366e-06, "loss": 0.3188, "step": 40648 }, { "epoch": 0.7546700430338292, "grad_norm": 0.2788543701171875, "learning_rate": 2.8257706635379977e-06, "loss": 0.1794, "step": 40650 }, { "epoch": 0.7547071731712479, "grad_norm": 0.5035040974617004, "learning_rate": 2.8249580907685302e-06, "loss": 0.2956, "step": 40652 }, { "epoch": 0.7547443033086666, "grad_norm": 0.35695546865463257, "learning_rate": 2.824145615629892e-06, "loss": 0.2673, "step": 40654 }, { "epoch": 0.7547814334460852, "grad_norm": 0.4621291756629944, "learning_rate": 2.823333238133138e-06, "loss": 0.1805, "step": 40656 }, { "epoch": 0.7548185635835039, "grad_norm": 0.3534008264541626, "learning_rate": 2.822520958289324e-06, "loss": 0.1501, "step": 40658 }, { "epoch": 0.7548556937209224, "grad_norm": 0.41241827607154846, "learning_rate": 2.8217087761094986e-06, "loss": 0.2812, "step": 40660 }, { "epoch": 0.7548928238583411, "grad_norm": 0.34639212489128113, "learning_rate": 2.8208966916047154e-06, "loss": 0.1477, "step": 40662 }, { "epoch": 0.7549299539957597, "grad_norm": 0.35248103737831116, "learning_rate": 2.820084704786027e-06, "loss": 0.148, "step": 40664 }, { "epoch": 0.7549670841331784, "grad_norm": 0.4236123263835907, "learning_rate": 2.8192728156644766e-06, "loss": 0.1804, "step": 40666 }, { "epoch": 0.755004214270597, "grad_norm": 0.2215155065059662, "learning_rate": 2.8184610242511134e-06, "loss": 0.2202, "step": 40668 }, { "epoch": 0.7550413444080156, "grad_norm": 0.2704628109931946, "learning_rate": 2.8176493305569885e-06, "loss": 0.3568, "step": 40670 }, { "epoch": 0.7550784745454343, "grad_norm": 0.4117063283920288, "learning_rate": 2.8168377345931396e-06, "loss": 0.3519, "step": 40672 }, { "epoch": 0.7551156046828529, "grad_norm": 0.3211822211742401, "learning_rate": 2.8160262363706126e-06, "loss": 0.2875, "step": 40674 }, { "epoch": 0.7551527348202716, "grad_norm": 0.5700035691261292, "learning_rate": 2.8152148359004505e-06, "loss": 0.1892, "step": 40676 }, { "epoch": 0.7551898649576902, "grad_norm": 0.16704770922660828, "learning_rate": 2.8144035331936924e-06, "loss": 0.3511, "step": 40678 }, { "epoch": 0.7552269950951088, "grad_norm": 0.2647544741630554, "learning_rate": 2.8135923282613797e-06, "loss": 0.3958, "step": 40680 }, { "epoch": 0.7552641252325275, "grad_norm": 0.37671977281570435, "learning_rate": 2.812781221114553e-06, "loss": 0.3345, "step": 40682 }, { "epoch": 0.7553012553699461, "grad_norm": 0.5648159980773926, "learning_rate": 2.8119702117642422e-06, "loss": 0.2912, "step": 40684 }, { "epoch": 0.7553383855073648, "grad_norm": 0.4012536406517029, "learning_rate": 2.81115930022149e-06, "loss": 0.1996, "step": 40686 }, { "epoch": 0.7553755156447834, "grad_norm": 0.3259325325489044, "learning_rate": 2.810348486497323e-06, "loss": 0.2116, "step": 40688 }, { "epoch": 0.755412645782202, "grad_norm": 0.3881808817386627, "learning_rate": 2.809537770602777e-06, "loss": 0.348, "step": 40690 }, { "epoch": 0.7554497759196207, "grad_norm": 0.39706704020500183, "learning_rate": 2.8087271525488847e-06, "loss": 0.3307, "step": 40692 }, { "epoch": 0.7554869060570393, "grad_norm": 0.24664393067359924, "learning_rate": 2.8079166323466754e-06, "loss": 0.2776, "step": 40694 }, { "epoch": 0.755524036194458, "grad_norm": 0.33150362968444824, "learning_rate": 2.807106210007181e-06, "loss": 0.2319, "step": 40696 }, { "epoch": 0.7555611663318766, "grad_norm": 0.638157069683075, "learning_rate": 2.8062958855414225e-06, "loss": 0.3357, "step": 40698 }, { "epoch": 0.7555982964692952, "grad_norm": 0.4024103581905365, "learning_rate": 2.8054856589604294e-06, "loss": 0.2249, "step": 40700 }, { "epoch": 0.7556354266067139, "grad_norm": 0.3136601150035858, "learning_rate": 2.8046755302752272e-06, "loss": 0.1606, "step": 40702 }, { "epoch": 0.7556725567441325, "grad_norm": 0.3830137252807617, "learning_rate": 2.8038654994968385e-06, "loss": 0.2685, "step": 40704 }, { "epoch": 0.7557096868815512, "grad_norm": 0.4332003593444824, "learning_rate": 2.803055566636288e-06, "loss": 0.2527, "step": 40706 }, { "epoch": 0.7557468170189698, "grad_norm": 0.39073067903518677, "learning_rate": 2.8022457317045938e-06, "loss": 0.3277, "step": 40708 }, { "epoch": 0.7557839471563884, "grad_norm": 0.3377404510974884, "learning_rate": 2.801435994712772e-06, "loss": 0.2248, "step": 40710 }, { "epoch": 0.7558210772938071, "grad_norm": 0.4313433766365051, "learning_rate": 2.800626355671845e-06, "loss": 0.4138, "step": 40712 }, { "epoch": 0.7558582074312257, "grad_norm": 0.3503766655921936, "learning_rate": 2.7998168145928274e-06, "loss": 0.1313, "step": 40714 }, { "epoch": 0.7558953375686444, "grad_norm": 0.3087615370750427, "learning_rate": 2.7990073714867373e-06, "loss": 0.2765, "step": 40716 }, { "epoch": 0.7559324677060629, "grad_norm": 0.36310046911239624, "learning_rate": 2.798198026364587e-06, "loss": 0.2314, "step": 40718 }, { "epoch": 0.7559695978434816, "grad_norm": 0.7252974510192871, "learning_rate": 2.797388779237392e-06, "loss": 0.3686, "step": 40720 }, { "epoch": 0.7560067279809003, "grad_norm": 0.25719764828681946, "learning_rate": 2.7965796301161596e-06, "loss": 0.198, "step": 40722 }, { "epoch": 0.7560438581183189, "grad_norm": 0.45833274722099304, "learning_rate": 2.7957705790119005e-06, "loss": 0.287, "step": 40724 }, { "epoch": 0.7560809882557376, "grad_norm": 0.38273656368255615, "learning_rate": 2.794961625935625e-06, "loss": 0.464, "step": 40726 }, { "epoch": 0.7561181183931561, "grad_norm": 0.32022449374198914, "learning_rate": 2.794152770898344e-06, "loss": 0.1667, "step": 40728 }, { "epoch": 0.7561552485305748, "grad_norm": 0.4735936224460602, "learning_rate": 2.793344013911056e-06, "loss": 0.3079, "step": 40730 }, { "epoch": 0.7561923786679935, "grad_norm": 0.2802967429161072, "learning_rate": 2.792535354984769e-06, "loss": 0.1607, "step": 40732 }, { "epoch": 0.7562295088054121, "grad_norm": 0.3773616552352905, "learning_rate": 2.7917267941304917e-06, "loss": 0.2924, "step": 40734 }, { "epoch": 0.7562666389428307, "grad_norm": 0.3613657057285309, "learning_rate": 2.790918331359217e-06, "loss": 0.1795, "step": 40736 }, { "epoch": 0.7563037690802493, "grad_norm": 0.6214427351951599, "learning_rate": 2.7901099666819497e-06, "loss": 0.2521, "step": 40738 }, { "epoch": 0.756340899217668, "grad_norm": 0.277411550283432, "learning_rate": 2.789301700109691e-06, "loss": 0.1323, "step": 40740 }, { "epoch": 0.7563780293550867, "grad_norm": 0.3306434452533722, "learning_rate": 2.788493531653437e-06, "loss": 0.2587, "step": 40742 }, { "epoch": 0.7564151594925053, "grad_norm": 0.3856564462184906, "learning_rate": 2.7876854613241854e-06, "loss": 0.4018, "step": 40744 }, { "epoch": 0.756452289629924, "grad_norm": 0.20774756371974945, "learning_rate": 2.786877489132934e-06, "loss": 0.3185, "step": 40746 }, { "epoch": 0.7564894197673425, "grad_norm": 0.3551657199859619, "learning_rate": 2.786069615090671e-06, "loss": 0.2058, "step": 40748 }, { "epoch": 0.7565265499047612, "grad_norm": 0.4539802074432373, "learning_rate": 2.785261839208395e-06, "loss": 0.3063, "step": 40750 }, { "epoch": 0.7565636800421799, "grad_norm": 0.3690294921398163, "learning_rate": 2.7844541614970934e-06, "loss": 0.2518, "step": 40752 }, { "epoch": 0.7566008101795985, "grad_norm": 0.3058300316333771, "learning_rate": 2.783646581967756e-06, "loss": 0.2164, "step": 40754 }, { "epoch": 0.7566379403170171, "grad_norm": 0.33675098419189453, "learning_rate": 2.782839100631374e-06, "loss": 0.3299, "step": 40756 }, { "epoch": 0.7566750704544357, "grad_norm": 0.3547455668449402, "learning_rate": 2.7820317174989354e-06, "loss": 0.2072, "step": 40758 }, { "epoch": 0.7567122005918544, "grad_norm": 0.3308797776699066, "learning_rate": 2.781224432581423e-06, "loss": 0.0879, "step": 40760 }, { "epoch": 0.7567493307292731, "grad_norm": 0.4448954463005066, "learning_rate": 2.7804172458898236e-06, "loss": 0.2873, "step": 40762 }, { "epoch": 0.7567864608666917, "grad_norm": 0.2275913506746292, "learning_rate": 2.77961015743512e-06, "loss": 0.0451, "step": 40764 }, { "epoch": 0.7568235910041103, "grad_norm": 0.46014997363090515, "learning_rate": 2.7788031672282946e-06, "loss": 0.2575, "step": 40766 }, { "epoch": 0.7568607211415289, "grad_norm": 0.9196771383285522, "learning_rate": 2.7779962752803315e-06, "loss": 0.1706, "step": 40768 }, { "epoch": 0.7568978512789476, "grad_norm": 0.3335045874118805, "learning_rate": 2.777189481602203e-06, "loss": 0.3175, "step": 40770 }, { "epoch": 0.7569349814163662, "grad_norm": 0.4726858139038086, "learning_rate": 2.776382786204894e-06, "loss": 0.2372, "step": 40772 }, { "epoch": 0.7569721115537849, "grad_norm": 0.35630112886428833, "learning_rate": 2.775576189099375e-06, "loss": 0.2678, "step": 40774 }, { "epoch": 0.7570092416912035, "grad_norm": 0.49457061290740967, "learning_rate": 2.7747696902966246e-06, "loss": 0.2602, "step": 40776 }, { "epoch": 0.7570463718286221, "grad_norm": 0.48716500401496887, "learning_rate": 2.7739632898076164e-06, "loss": 0.327, "step": 40778 }, { "epoch": 0.7570835019660408, "grad_norm": 0.3364877998828888, "learning_rate": 2.773156987643324e-06, "loss": 0.1637, "step": 40780 }, { "epoch": 0.7571206321034594, "grad_norm": 0.38432666659355164, "learning_rate": 2.7723507838147167e-06, "loss": 0.3638, "step": 40782 }, { "epoch": 0.757157762240878, "grad_norm": 0.29207751154899597, "learning_rate": 2.7715446783327706e-06, "loss": 0.1658, "step": 40784 }, { "epoch": 0.7571948923782967, "grad_norm": 0.47117847204208374, "learning_rate": 2.770738671208446e-06, "loss": 0.4867, "step": 40786 }, { "epoch": 0.7572320225157153, "grad_norm": 0.24877022206783295, "learning_rate": 2.769932762452714e-06, "loss": 0.1501, "step": 40788 }, { "epoch": 0.757269152653134, "grad_norm": 0.6729599833488464, "learning_rate": 2.769126952076543e-06, "loss": 0.3526, "step": 40790 }, { "epoch": 0.7573062827905526, "grad_norm": 0.6289013028144836, "learning_rate": 2.768321240090892e-06, "loss": 0.3321, "step": 40792 }, { "epoch": 0.7573434129279712, "grad_norm": 0.5748385190963745, "learning_rate": 2.767515626506728e-06, "loss": 0.3446, "step": 40794 }, { "epoch": 0.7573805430653899, "grad_norm": 0.34937381744384766, "learning_rate": 2.766710111335017e-06, "loss": 0.1764, "step": 40796 }, { "epoch": 0.7574176732028085, "grad_norm": 0.416733980178833, "learning_rate": 2.765904694586711e-06, "loss": 0.1628, "step": 40798 }, { "epoch": 0.7574548033402272, "grad_norm": 0.2753572463989258, "learning_rate": 2.765099376272773e-06, "loss": 0.2028, "step": 40800 }, { "epoch": 0.7574919334776458, "grad_norm": 0.27628135681152344, "learning_rate": 2.7642941564041613e-06, "loss": 0.3271, "step": 40802 }, { "epoch": 0.7575290636150644, "grad_norm": 0.4308212995529175, "learning_rate": 2.763489034991833e-06, "loss": 0.2245, "step": 40804 }, { "epoch": 0.7575661937524831, "grad_norm": 0.381976455450058, "learning_rate": 2.7626840120467434e-06, "loss": 0.1739, "step": 40806 }, { "epoch": 0.7576033238899017, "grad_norm": 0.49038568139076233, "learning_rate": 2.7618790875798498e-06, "loss": 0.3273, "step": 40808 }, { "epoch": 0.7576404540273204, "grad_norm": 0.2933395802974701, "learning_rate": 2.761074261602097e-06, "loss": 0.2748, "step": 40810 }, { "epoch": 0.757677584164739, "grad_norm": 0.3352925777435303, "learning_rate": 2.76026953412444e-06, "loss": 0.156, "step": 40812 }, { "epoch": 0.7577147143021576, "grad_norm": 0.5246044397354126, "learning_rate": 2.7594649051578337e-06, "loss": 0.3903, "step": 40814 }, { "epoch": 0.7577518444395762, "grad_norm": 0.3429300785064697, "learning_rate": 2.758660374713218e-06, "loss": 0.3326, "step": 40816 }, { "epoch": 0.7577889745769949, "grad_norm": 0.3924695551395416, "learning_rate": 2.757855942801544e-06, "loss": 0.2355, "step": 40818 }, { "epoch": 0.7578261047144136, "grad_norm": 0.24280685186386108, "learning_rate": 2.7570516094337583e-06, "loss": 0.2993, "step": 40820 }, { "epoch": 0.7578632348518322, "grad_norm": 0.3577711880207062, "learning_rate": 2.7562473746208083e-06, "loss": 0.3782, "step": 40822 }, { "epoch": 0.7579003649892508, "grad_norm": 2.0443711280822754, "learning_rate": 2.755443238373632e-06, "loss": 0.4801, "step": 40824 }, { "epoch": 0.7579374951266694, "grad_norm": 0.4994984567165375, "learning_rate": 2.7546392007031718e-06, "loss": 0.2256, "step": 40826 }, { "epoch": 0.7579746252640881, "grad_norm": 0.38579806685447693, "learning_rate": 2.75383526162037e-06, "loss": 0.3012, "step": 40828 }, { "epoch": 0.7580117554015068, "grad_norm": 0.29628825187683105, "learning_rate": 2.7530314211361653e-06, "loss": 0.1733, "step": 40830 }, { "epoch": 0.7580488855389254, "grad_norm": 0.37760624289512634, "learning_rate": 2.7522276792614988e-06, "loss": 0.2721, "step": 40832 }, { "epoch": 0.758086015676344, "grad_norm": 0.5267041325569153, "learning_rate": 2.751424036007302e-06, "loss": 0.1598, "step": 40834 }, { "epoch": 0.7581231458137626, "grad_norm": 0.49770599603652954, "learning_rate": 2.7506204913845134e-06, "loss": 0.3423, "step": 40836 }, { "epoch": 0.7581602759511813, "grad_norm": 0.3012617230415344, "learning_rate": 2.749817045404064e-06, "loss": 0.1593, "step": 40838 }, { "epoch": 0.7581974060886, "grad_norm": 0.1653476506471634, "learning_rate": 2.7490136980768867e-06, "loss": 0.1664, "step": 40840 }, { "epoch": 0.7582345362260186, "grad_norm": 0.26005804538726807, "learning_rate": 2.748210449413915e-06, "loss": 0.335, "step": 40842 }, { "epoch": 0.7582716663634372, "grad_norm": 0.463040828704834, "learning_rate": 2.747407299426076e-06, "loss": 0.2482, "step": 40844 }, { "epoch": 0.7583087965008558, "grad_norm": 0.8196528553962708, "learning_rate": 2.7466042481243036e-06, "loss": 0.1671, "step": 40846 }, { "epoch": 0.7583459266382745, "grad_norm": 0.3480672538280487, "learning_rate": 2.745801295519518e-06, "loss": 0.2907, "step": 40848 }, { "epoch": 0.7583830567756932, "grad_norm": 0.4572582542896271, "learning_rate": 2.7449984416226473e-06, "loss": 0.259, "step": 40850 }, { "epoch": 0.7584201869131117, "grad_norm": 0.2905207872390747, "learning_rate": 2.7441956864446175e-06, "loss": 0.2163, "step": 40852 }, { "epoch": 0.7584573170505304, "grad_norm": 0.2966296970844269, "learning_rate": 2.743393029996353e-06, "loss": 0.2132, "step": 40854 }, { "epoch": 0.758494447187949, "grad_norm": 0.5910990238189697, "learning_rate": 2.7425904722887696e-06, "loss": 0.2204, "step": 40856 }, { "epoch": 0.7585315773253677, "grad_norm": 0.3515777587890625, "learning_rate": 2.7417880133327922e-06, "loss": 0.3988, "step": 40858 }, { "epoch": 0.7585687074627864, "grad_norm": 0.43112078309059143, "learning_rate": 2.740985653139343e-06, "loss": 0.4316, "step": 40860 }, { "epoch": 0.758605837600205, "grad_norm": 0.4284381866455078, "learning_rate": 2.740183391719332e-06, "loss": 0.4098, "step": 40862 }, { "epoch": 0.7586429677376236, "grad_norm": 0.4038177728652954, "learning_rate": 2.7393812290836784e-06, "loss": 0.3272, "step": 40864 }, { "epoch": 0.7586800978750422, "grad_norm": 0.3926438093185425, "learning_rate": 2.7385791652433e-06, "loss": 0.3342, "step": 40866 }, { "epoch": 0.7587172280124609, "grad_norm": 0.5182461738586426, "learning_rate": 2.7377772002091076e-06, "loss": 0.3247, "step": 40868 }, { "epoch": 0.7587543581498795, "grad_norm": 0.6056269407272339, "learning_rate": 2.7369753339920145e-06, "loss": 0.2858, "step": 40870 }, { "epoch": 0.7587914882872981, "grad_norm": 0.5373923778533936, "learning_rate": 2.7361735666029356e-06, "loss": 0.506, "step": 40872 }, { "epoch": 0.7588286184247168, "grad_norm": 0.4367193281650543, "learning_rate": 2.735371898052773e-06, "loss": 0.262, "step": 40874 }, { "epoch": 0.7588657485621354, "grad_norm": 0.3949589431285858, "learning_rate": 2.7345703283524428e-06, "loss": 0.145, "step": 40876 }, { "epoch": 0.7589028786995541, "grad_norm": 0.3879443407058716, "learning_rate": 2.733768857512844e-06, "loss": 0.2715, "step": 40878 }, { "epoch": 0.7589400088369727, "grad_norm": 0.457638680934906, "learning_rate": 2.732967485544886e-06, "loss": 0.4456, "step": 40880 }, { "epoch": 0.7589771389743913, "grad_norm": 0.40680208802223206, "learning_rate": 2.732166212459474e-06, "loss": 0.139, "step": 40882 }, { "epoch": 0.75901426911181, "grad_norm": 0.3470662534236908, "learning_rate": 2.7313650382675127e-06, "loss": 0.3023, "step": 40884 }, { "epoch": 0.7590513992492286, "grad_norm": 0.34856969118118286, "learning_rate": 2.7305639629798986e-06, "loss": 0.1068, "step": 40886 }, { "epoch": 0.7590885293866473, "grad_norm": 0.3986126780509949, "learning_rate": 2.729762986607534e-06, "loss": 0.3204, "step": 40888 }, { "epoch": 0.7591256595240659, "grad_norm": 0.32615989446640015, "learning_rate": 2.7289621091613184e-06, "loss": 0.1, "step": 40890 }, { "epoch": 0.7591627896614845, "grad_norm": 0.3104472756385803, "learning_rate": 2.7281613306521494e-06, "loss": 0.2878, "step": 40892 }, { "epoch": 0.7591999197989032, "grad_norm": 0.4942153990268707, "learning_rate": 2.7273606510909222e-06, "loss": 0.183, "step": 40894 }, { "epoch": 0.7592370499363218, "grad_norm": 0.37183234095573425, "learning_rate": 2.7265600704885365e-06, "loss": 0.3119, "step": 40896 }, { "epoch": 0.7592741800737405, "grad_norm": 0.30014580488204956, "learning_rate": 2.725759588855882e-06, "loss": 0.3464, "step": 40898 }, { "epoch": 0.759311310211159, "grad_norm": 0.33586806058883667, "learning_rate": 2.7249592062038467e-06, "loss": 0.1825, "step": 40900 }, { "epoch": 0.7593484403485777, "grad_norm": 0.23866114020347595, "learning_rate": 2.724158922543325e-06, "loss": 0.1541, "step": 40902 }, { "epoch": 0.7593855704859964, "grad_norm": 0.3009186089038849, "learning_rate": 2.7233587378852076e-06, "loss": 0.1852, "step": 40904 }, { "epoch": 0.759422700623415, "grad_norm": 0.4748780131340027, "learning_rate": 2.722558652240381e-06, "loss": 0.1761, "step": 40906 }, { "epoch": 0.7594598307608337, "grad_norm": 0.3828006982803345, "learning_rate": 2.7217586656197336e-06, "loss": 0.389, "step": 40908 }, { "epoch": 0.7594969608982522, "grad_norm": 0.3322638273239136, "learning_rate": 2.720958778034153e-06, "loss": 0.159, "step": 40910 }, { "epoch": 0.7595340910356709, "grad_norm": 0.5305048227310181, "learning_rate": 2.720158989494517e-06, "loss": 0.2746, "step": 40912 }, { "epoch": 0.7595712211730896, "grad_norm": 0.3397318422794342, "learning_rate": 2.71935930001171e-06, "loss": 0.2252, "step": 40914 }, { "epoch": 0.7596083513105082, "grad_norm": 0.5906352996826172, "learning_rate": 2.7185597095966165e-06, "loss": 0.2785, "step": 40916 }, { "epoch": 0.7596454814479269, "grad_norm": 0.43257367610931396, "learning_rate": 2.717760218260117e-06, "loss": 0.5038, "step": 40918 }, { "epoch": 0.7596826115853454, "grad_norm": 0.27028948068618774, "learning_rate": 2.7169608260130862e-06, "loss": 0.2928, "step": 40920 }, { "epoch": 0.7597197417227641, "grad_norm": 0.44712451100349426, "learning_rate": 2.716161532866406e-06, "loss": 0.2535, "step": 40922 }, { "epoch": 0.7597568718601827, "grad_norm": 0.3930681049823761, "learning_rate": 2.715362338830946e-06, "loss": 0.4347, "step": 40924 }, { "epoch": 0.7597940019976014, "grad_norm": 0.3414361774921417, "learning_rate": 2.7145632439175853e-06, "loss": 0.2947, "step": 40926 }, { "epoch": 0.7598311321350201, "grad_norm": 0.4201281666755676, "learning_rate": 2.7137642481371953e-06, "loss": 0.1969, "step": 40928 }, { "epoch": 0.7598682622724386, "grad_norm": 0.4431511461734772, "learning_rate": 2.7129653515006506e-06, "loss": 0.1672, "step": 40930 }, { "epoch": 0.7599053924098573, "grad_norm": 0.4840647280216217, "learning_rate": 2.712166554018819e-06, "loss": 0.2179, "step": 40932 }, { "epoch": 0.7599425225472759, "grad_norm": 0.3678118586540222, "learning_rate": 2.711367855702575e-06, "loss": 0.2847, "step": 40934 }, { "epoch": 0.7599796526846946, "grad_norm": 0.3222925364971161, "learning_rate": 2.7105692565627782e-06, "loss": 0.3016, "step": 40936 }, { "epoch": 0.7600167828221133, "grad_norm": 0.28203877806663513, "learning_rate": 2.7097707566103005e-06, "loss": 0.2516, "step": 40938 }, { "epoch": 0.7600539129595318, "grad_norm": 0.42676109075546265, "learning_rate": 2.708972355856009e-06, "loss": 0.3502, "step": 40940 }, { "epoch": 0.7600910430969505, "grad_norm": 0.5906369686126709, "learning_rate": 2.7081740543107614e-06, "loss": 0.2216, "step": 40942 }, { "epoch": 0.7601281732343691, "grad_norm": 0.2692893445491791, "learning_rate": 2.707375851985423e-06, "loss": 0.2279, "step": 40944 }, { "epoch": 0.7601653033717878, "grad_norm": 0.5732405781745911, "learning_rate": 2.706577748890856e-06, "loss": 0.2457, "step": 40946 }, { "epoch": 0.7602024335092065, "grad_norm": 0.25625014305114746, "learning_rate": 2.7057797450379218e-06, "loss": 0.3518, "step": 40948 }, { "epoch": 0.760239563646625, "grad_norm": 0.48644208908081055, "learning_rate": 2.704981840437474e-06, "loss": 0.3543, "step": 40950 }, { "epoch": 0.7602766937840437, "grad_norm": 0.57731693983078, "learning_rate": 2.7041840351003722e-06, "loss": 0.1213, "step": 40952 }, { "epoch": 0.7603138239214623, "grad_norm": 0.8326742649078369, "learning_rate": 2.703386329037473e-06, "loss": 0.3231, "step": 40954 }, { "epoch": 0.760350954058881, "grad_norm": 0.2908480167388916, "learning_rate": 2.702588722259628e-06, "loss": 0.3126, "step": 40956 }, { "epoch": 0.7603880841962997, "grad_norm": 0.29514217376708984, "learning_rate": 2.7017912147776938e-06, "loss": 0.2398, "step": 40958 }, { "epoch": 0.7604252143337182, "grad_norm": 0.4586414694786072, "learning_rate": 2.700993806602523e-06, "loss": 0.3116, "step": 40960 }, { "epoch": 0.7604623444711369, "grad_norm": 0.31312039494514465, "learning_rate": 2.7001964977449637e-06, "loss": 0.3059, "step": 40962 }, { "epoch": 0.7604994746085555, "grad_norm": 0.3502884805202484, "learning_rate": 2.6993992882158616e-06, "loss": 0.3452, "step": 40964 }, { "epoch": 0.7605366047459742, "grad_norm": 0.469490647315979, "learning_rate": 2.6986021780260674e-06, "loss": 0.2631, "step": 40966 }, { "epoch": 0.7605737348833927, "grad_norm": 0.3721438944339752, "learning_rate": 2.697805167186427e-06, "loss": 0.3057, "step": 40968 }, { "epoch": 0.7606108650208114, "grad_norm": 0.20231510698795319, "learning_rate": 2.697008255707786e-06, "loss": 0.2964, "step": 40970 }, { "epoch": 0.7606479951582301, "grad_norm": 0.38106149435043335, "learning_rate": 2.69621144360099e-06, "loss": 0.2287, "step": 40972 }, { "epoch": 0.7606851252956487, "grad_norm": 0.36628153920173645, "learning_rate": 2.6954147308768764e-06, "loss": 0.3927, "step": 40974 }, { "epoch": 0.7607222554330674, "grad_norm": 0.4119666814804077, "learning_rate": 2.6946181175462884e-06, "loss": 0.2832, "step": 40976 }, { "epoch": 0.760759385570486, "grad_norm": 0.649245023727417, "learning_rate": 2.693821603620066e-06, "loss": 0.2484, "step": 40978 }, { "epoch": 0.7607965157079046, "grad_norm": 0.3675916790962219, "learning_rate": 2.6930251891090463e-06, "loss": 0.279, "step": 40980 }, { "epoch": 0.7608336458453233, "grad_norm": 0.35791996121406555, "learning_rate": 2.692228874024071e-06, "loss": 0.2104, "step": 40982 }, { "epoch": 0.7608707759827419, "grad_norm": 0.26106682419776917, "learning_rate": 2.6914326583759677e-06, "loss": 0.3992, "step": 40984 }, { "epoch": 0.7609079061201606, "grad_norm": 0.37780362367630005, "learning_rate": 2.6906365421755763e-06, "loss": 0.3532, "step": 40986 }, { "epoch": 0.7609450362575791, "grad_norm": 0.35651278495788574, "learning_rate": 2.6898405254337258e-06, "loss": 0.2441, "step": 40988 }, { "epoch": 0.7609821663949978, "grad_norm": 0.4947573244571686, "learning_rate": 2.689044608161249e-06, "loss": 0.3364, "step": 40990 }, { "epoch": 0.7610192965324165, "grad_norm": 0.7682983875274658, "learning_rate": 2.6882487903689757e-06, "loss": 0.252, "step": 40992 }, { "epoch": 0.7610564266698351, "grad_norm": 0.18238750100135803, "learning_rate": 2.687453072067736e-06, "loss": 0.288, "step": 40994 }, { "epoch": 0.7610935568072538, "grad_norm": 0.3088197112083435, "learning_rate": 2.6866574532683563e-06, "loss": 0.283, "step": 40996 }, { "epoch": 0.7611306869446723, "grad_norm": 0.915503203868866, "learning_rate": 2.685861933981665e-06, "loss": 0.2549, "step": 40998 }, { "epoch": 0.761167817082091, "grad_norm": 0.5539266467094421, "learning_rate": 2.6850665142184818e-06, "loss": 0.2969, "step": 41000 }, { "epoch": 0.7612049472195097, "grad_norm": 0.3839748799800873, "learning_rate": 2.6842711939896325e-06, "loss": 0.2877, "step": 41002 }, { "epoch": 0.7612420773569283, "grad_norm": 0.2746099829673767, "learning_rate": 2.683475973305942e-06, "loss": 0.3158, "step": 41004 }, { "epoch": 0.761279207494347, "grad_norm": 0.2909497320652008, "learning_rate": 2.6826808521782256e-06, "loss": 0.4406, "step": 41006 }, { "epoch": 0.7613163376317655, "grad_norm": 0.3750675916671753, "learning_rate": 2.6818858306173034e-06, "loss": 0.265, "step": 41008 }, { "epoch": 0.7613534677691842, "grad_norm": 0.3192642033100128, "learning_rate": 2.6810909086339988e-06, "loss": 0.1916, "step": 41010 }, { "epoch": 0.7613905979066029, "grad_norm": 0.3078516721725464, "learning_rate": 2.6802960862391216e-06, "loss": 0.3007, "step": 41012 }, { "epoch": 0.7614277280440215, "grad_norm": 0.39554136991500854, "learning_rate": 2.67950136344349e-06, "loss": 0.4257, "step": 41014 }, { "epoch": 0.7614648581814402, "grad_norm": 0.34376537799835205, "learning_rate": 2.6787067402579158e-06, "loss": 0.3747, "step": 41016 }, { "epoch": 0.7615019883188587, "grad_norm": 0.49329254031181335, "learning_rate": 2.6779122166932135e-06, "loss": 0.1303, "step": 41018 }, { "epoch": 0.7615391184562774, "grad_norm": 0.46545106172561646, "learning_rate": 2.677117792760194e-06, "loss": 0.2855, "step": 41020 }, { "epoch": 0.761576248593696, "grad_norm": 0.4448985457420349, "learning_rate": 2.6763234684696706e-06, "loss": 0.2375, "step": 41022 }, { "epoch": 0.7616133787311147, "grad_norm": 0.752397358417511, "learning_rate": 2.6755292438324444e-06, "loss": 0.1571, "step": 41024 }, { "epoch": 0.7616505088685334, "grad_norm": 0.22327204048633575, "learning_rate": 2.6747351188593286e-06, "loss": 0.0794, "step": 41026 }, { "epoch": 0.7616876390059519, "grad_norm": 0.41629987955093384, "learning_rate": 2.673941093561123e-06, "loss": 0.2194, "step": 41028 }, { "epoch": 0.7617247691433706, "grad_norm": 0.47478723526000977, "learning_rate": 2.673147167948634e-06, "loss": 0.4039, "step": 41030 }, { "epoch": 0.7617618992807892, "grad_norm": 0.5064082741737366, "learning_rate": 2.6723533420326675e-06, "loss": 0.2813, "step": 41032 }, { "epoch": 0.7617990294182079, "grad_norm": 0.48901456594467163, "learning_rate": 2.671559615824022e-06, "loss": 0.2156, "step": 41034 }, { "epoch": 0.7618361595556266, "grad_norm": 0.5695748925209045, "learning_rate": 2.6707659893335024e-06, "loss": 0.2217, "step": 41036 }, { "epoch": 0.7618732896930451, "grad_norm": 0.45571279525756836, "learning_rate": 2.6699724625719015e-06, "loss": 0.3061, "step": 41038 }, { "epoch": 0.7619104198304638, "grad_norm": 0.3587796986103058, "learning_rate": 2.6691790355500192e-06, "loss": 0.3659, "step": 41040 }, { "epoch": 0.7619475499678824, "grad_norm": 0.2791910767555237, "learning_rate": 2.6683857082786514e-06, "loss": 0.0489, "step": 41042 }, { "epoch": 0.7619846801053011, "grad_norm": 0.4093324542045593, "learning_rate": 2.667592480768596e-06, "loss": 0.2645, "step": 41044 }, { "epoch": 0.7620218102427198, "grad_norm": 0.4447821080684662, "learning_rate": 2.666799353030641e-06, "loss": 0.5226, "step": 41046 }, { "epoch": 0.7620589403801383, "grad_norm": 0.4293154776096344, "learning_rate": 2.6660063250755853e-06, "loss": 0.1619, "step": 41048 }, { "epoch": 0.762096070517557, "grad_norm": 0.2621178925037384, "learning_rate": 2.6652133969142126e-06, "loss": 0.2014, "step": 41050 }, { "epoch": 0.7621332006549756, "grad_norm": 0.5110308527946472, "learning_rate": 2.664420568557314e-06, "loss": 0.3006, "step": 41052 }, { "epoch": 0.7621703307923943, "grad_norm": 0.8813275694847107, "learning_rate": 2.6636278400156803e-06, "loss": 0.4204, "step": 41054 }, { "epoch": 0.762207460929813, "grad_norm": 0.26043567061424255, "learning_rate": 2.6628352113000954e-06, "loss": 0.1976, "step": 41056 }, { "epoch": 0.7622445910672315, "grad_norm": 0.4937804341316223, "learning_rate": 2.6620426824213473e-06, "loss": 0.2605, "step": 41058 }, { "epoch": 0.7622817212046502, "grad_norm": 1.9092246294021606, "learning_rate": 2.6612502533902206e-06, "loss": 0.2245, "step": 41060 }, { "epoch": 0.7623188513420688, "grad_norm": 0.40777167677879333, "learning_rate": 2.660457924217493e-06, "loss": 0.2333, "step": 41062 }, { "epoch": 0.7623559814794875, "grad_norm": 0.47166988253593445, "learning_rate": 2.659665694913949e-06, "loss": 0.1921, "step": 41064 }, { "epoch": 0.7623931116169061, "grad_norm": 0.41206973791122437, "learning_rate": 2.6588735654903675e-06, "loss": 0.3573, "step": 41066 }, { "epoch": 0.7624302417543247, "grad_norm": 0.3124183118343353, "learning_rate": 2.6580815359575306e-06, "loss": 0.1857, "step": 41068 }, { "epoch": 0.7624673718917434, "grad_norm": 0.45635151863098145, "learning_rate": 2.65728960632621e-06, "loss": 0.3106, "step": 41070 }, { "epoch": 0.762504502029162, "grad_norm": 0.5393530130386353, "learning_rate": 2.656497776607183e-06, "loss": 0.2173, "step": 41072 }, { "epoch": 0.7625416321665807, "grad_norm": 0.4308006465435028, "learning_rate": 2.6557060468112284e-06, "loss": 0.2219, "step": 41074 }, { "epoch": 0.7625787623039992, "grad_norm": 0.14382848143577576, "learning_rate": 2.6549144169491135e-06, "loss": 0.3323, "step": 41076 }, { "epoch": 0.7626158924414179, "grad_norm": 0.4609472155570984, "learning_rate": 2.6541228870316115e-06, "loss": 0.3864, "step": 41078 }, { "epoch": 0.7626530225788366, "grad_norm": 0.45031967759132385, "learning_rate": 2.6533314570694933e-06, "loss": 0.4156, "step": 41080 }, { "epoch": 0.7626901527162552, "grad_norm": 0.4361385107040405, "learning_rate": 2.6525401270735284e-06, "loss": 0.1051, "step": 41082 }, { "epoch": 0.7627272828536739, "grad_norm": 0.4677934944629669, "learning_rate": 2.6517488970544847e-06, "loss": 0.2938, "step": 41084 }, { "epoch": 0.7627644129910924, "grad_norm": 0.5199944376945496, "learning_rate": 2.6509577670231303e-06, "loss": 0.3326, "step": 41086 }, { "epoch": 0.7628015431285111, "grad_norm": 0.2940211296081543, "learning_rate": 2.6501667369902252e-06, "loss": 0.3922, "step": 41088 }, { "epoch": 0.7628386732659298, "grad_norm": 0.4243672788143158, "learning_rate": 2.649375806966539e-06, "loss": 0.3212, "step": 41090 }, { "epoch": 0.7628758034033484, "grad_norm": 0.3078504502773285, "learning_rate": 2.648584976962826e-06, "loss": 0.1186, "step": 41092 }, { "epoch": 0.7629129335407671, "grad_norm": 0.35507500171661377, "learning_rate": 2.647794246989852e-06, "loss": 0.2292, "step": 41094 }, { "epoch": 0.7629500636781856, "grad_norm": 0.36059433221817017, "learning_rate": 2.6470036170583768e-06, "loss": 0.2498, "step": 41096 }, { "epoch": 0.7629871938156043, "grad_norm": 0.48841819167137146, "learning_rate": 2.6462130871791592e-06, "loss": 0.3218, "step": 41098 }, { "epoch": 0.763024323953023, "grad_norm": 0.3681288957595825, "learning_rate": 2.645422657362953e-06, "loss": 0.2215, "step": 41100 }, { "epoch": 0.7630614540904416, "grad_norm": 0.20018750429153442, "learning_rate": 2.644632327620513e-06, "loss": 0.1489, "step": 41102 }, { "epoch": 0.7630985842278603, "grad_norm": 0.2612859904766083, "learning_rate": 2.6438420979625967e-06, "loss": 0.1815, "step": 41104 }, { "epoch": 0.7631357143652788, "grad_norm": 0.4553103446960449, "learning_rate": 2.6430519683999545e-06, "loss": 0.1839, "step": 41106 }, { "epoch": 0.7631728445026975, "grad_norm": 0.46828901767730713, "learning_rate": 2.6422619389433413e-06, "loss": 0.3429, "step": 41108 }, { "epoch": 0.7632099746401162, "grad_norm": 0.290111780166626, "learning_rate": 2.641472009603501e-06, "loss": 0.2655, "step": 41110 }, { "epoch": 0.7632471047775348, "grad_norm": 0.3366932272911072, "learning_rate": 2.6406821803911887e-06, "loss": 0.2062, "step": 41112 }, { "epoch": 0.7632842349149535, "grad_norm": 0.6364966034889221, "learning_rate": 2.6398924513171454e-06, "loss": 0.323, "step": 41114 }, { "epoch": 0.763321365052372, "grad_norm": 0.5466961860656738, "learning_rate": 2.639102822392119e-06, "loss": 0.3669, "step": 41116 }, { "epoch": 0.7633584951897907, "grad_norm": 0.32155272364616394, "learning_rate": 2.6383132936268553e-06, "loss": 0.1624, "step": 41118 }, { "epoch": 0.7633956253272093, "grad_norm": 0.49891331791877747, "learning_rate": 2.637523865032097e-06, "loss": 0.3506, "step": 41120 }, { "epoch": 0.763432755464628, "grad_norm": 0.4251953959465027, "learning_rate": 2.636734536618586e-06, "loss": 0.178, "step": 41122 }, { "epoch": 0.7634698856020466, "grad_norm": 0.45255738496780396, "learning_rate": 2.635945308397064e-06, "loss": 0.3099, "step": 41124 }, { "epoch": 0.7635070157394652, "grad_norm": 0.6898943185806274, "learning_rate": 2.6351561803782668e-06, "loss": 0.3141, "step": 41126 }, { "epoch": 0.7635441458768839, "grad_norm": 0.3628913462162018, "learning_rate": 2.634367152572933e-06, "loss": 0.1773, "step": 41128 }, { "epoch": 0.7635812760143025, "grad_norm": 0.38953152298927307, "learning_rate": 2.633578224991803e-06, "loss": 0.2982, "step": 41130 }, { "epoch": 0.7636184061517212, "grad_norm": 10.194908142089844, "learning_rate": 2.6327893976456055e-06, "loss": 0.1907, "step": 41132 }, { "epoch": 0.7636555362891398, "grad_norm": 0.33288565278053284, "learning_rate": 2.632000670545077e-06, "loss": 0.2028, "step": 41134 }, { "epoch": 0.7636926664265584, "grad_norm": 0.45828086137771606, "learning_rate": 2.6312120437009524e-06, "loss": 0.1836, "step": 41136 }, { "epoch": 0.7637297965639771, "grad_norm": 0.5351374745368958, "learning_rate": 2.630423517123957e-06, "loss": 0.3413, "step": 41138 }, { "epoch": 0.7637669267013957, "grad_norm": 0.5626407265663147, "learning_rate": 2.6296350908248227e-06, "loss": 0.1455, "step": 41140 }, { "epoch": 0.7638040568388144, "grad_norm": 0.30331602692604065, "learning_rate": 2.628846764814278e-06, "loss": 0.1756, "step": 41142 }, { "epoch": 0.763841186976233, "grad_norm": 0.32940930128097534, "learning_rate": 2.62805853910305e-06, "loss": 0.1913, "step": 41144 }, { "epoch": 0.7638783171136516, "grad_norm": 0.4012812674045563, "learning_rate": 2.6272704137018646e-06, "loss": 0.3143, "step": 41146 }, { "epoch": 0.7639154472510703, "grad_norm": 0.4202346205711365, "learning_rate": 2.626482388621443e-06, "loss": 0.2092, "step": 41148 }, { "epoch": 0.7639525773884889, "grad_norm": 0.5187693238258362, "learning_rate": 2.6256944638725147e-06, "loss": 0.096, "step": 41150 }, { "epoch": 0.7639897075259076, "grad_norm": 0.7458369135856628, "learning_rate": 2.624906639465795e-06, "loss": 0.3796, "step": 41152 }, { "epoch": 0.7640268376633262, "grad_norm": 0.33917543292045593, "learning_rate": 2.624118915412003e-06, "loss": 0.3203, "step": 41154 }, { "epoch": 0.7640639678007448, "grad_norm": 0.34963271021842957, "learning_rate": 2.623331291721858e-06, "loss": 0.3116, "step": 41156 }, { "epoch": 0.7641010979381635, "grad_norm": 0.33579564094543457, "learning_rate": 2.6225437684060793e-06, "loss": 0.3612, "step": 41158 }, { "epoch": 0.7641382280755821, "grad_norm": 0.2659354507923126, "learning_rate": 2.6217563454753815e-06, "loss": 0.1381, "step": 41160 }, { "epoch": 0.7641753582130008, "grad_norm": 0.7723900079727173, "learning_rate": 2.6209690229404828e-06, "loss": 0.3926, "step": 41162 }, { "epoch": 0.7642124883504194, "grad_norm": 0.2998749017715454, "learning_rate": 2.6201818008120894e-06, "loss": 0.3617, "step": 41164 }, { "epoch": 0.764249618487838, "grad_norm": 0.2181549072265625, "learning_rate": 2.6193946791009173e-06, "loss": 0.3556, "step": 41166 }, { "epoch": 0.7642867486252567, "grad_norm": 0.2983764111995697, "learning_rate": 2.6186076578176756e-06, "loss": 0.253, "step": 41168 }, { "epoch": 0.7643238787626753, "grad_norm": 0.38671931624412537, "learning_rate": 2.6178207369730736e-06, "loss": 0.2498, "step": 41170 }, { "epoch": 0.764361008900094, "grad_norm": 0.29907190799713135, "learning_rate": 2.6170339165778225e-06, "loss": 0.2993, "step": 41172 }, { "epoch": 0.7643981390375125, "grad_norm": 0.44874662160873413, "learning_rate": 2.6162471966426216e-06, "loss": 0.2429, "step": 41174 }, { "epoch": 0.7644352691749312, "grad_norm": 0.3117087483406067, "learning_rate": 2.6154605771781838e-06, "loss": 0.2427, "step": 41176 }, { "epoch": 0.7644723993123499, "grad_norm": 0.3766685426235199, "learning_rate": 2.6146740581952045e-06, "loss": 0.2231, "step": 41178 }, { "epoch": 0.7645095294497685, "grad_norm": 0.44768568873405457, "learning_rate": 2.61388763970439e-06, "loss": 0.3214, "step": 41180 }, { "epoch": 0.7645466595871871, "grad_norm": 0.3736570179462433, "learning_rate": 2.6131013217164404e-06, "loss": 0.2873, "step": 41182 }, { "epoch": 0.7645837897246057, "grad_norm": 0.3789091408252716, "learning_rate": 2.612315104242056e-06, "loss": 0.16, "step": 41184 }, { "epoch": 0.7646209198620244, "grad_norm": 0.3238079845905304, "learning_rate": 2.6115289872919334e-06, "loss": 0.3223, "step": 41186 }, { "epoch": 0.7646580499994431, "grad_norm": 0.4763450026512146, "learning_rate": 2.610742970876774e-06, "loss": 0.1994, "step": 41188 }, { "epoch": 0.7646951801368617, "grad_norm": 0.3456266522407532, "learning_rate": 2.609957055007266e-06, "loss": 0.1561, "step": 41190 }, { "epoch": 0.7647323102742803, "grad_norm": 0.4724782407283783, "learning_rate": 2.609171239694107e-06, "loss": 0.1384, "step": 41192 }, { "epoch": 0.7647694404116989, "grad_norm": 0.4341837465763092, "learning_rate": 2.608385524947993e-06, "loss": 0.382, "step": 41194 }, { "epoch": 0.7648065705491176, "grad_norm": 0.3822661340236664, "learning_rate": 2.6075999107796067e-06, "loss": 0.2093, "step": 41196 }, { "epoch": 0.7648437006865363, "grad_norm": 0.3665282130241394, "learning_rate": 2.606814397199644e-06, "loss": 0.2275, "step": 41198 }, { "epoch": 0.7648808308239549, "grad_norm": 0.40559834241867065, "learning_rate": 2.6060289842187956e-06, "loss": 0.3496, "step": 41200 }, { "epoch": 0.7649179609613735, "grad_norm": 0.40877243876457214, "learning_rate": 2.6052436718477413e-06, "loss": 0.2161, "step": 41202 }, { "epoch": 0.7649550910987921, "grad_norm": 0.3546595871448517, "learning_rate": 2.6044584600971723e-06, "loss": 0.2223, "step": 41204 }, { "epoch": 0.7649922212362108, "grad_norm": 0.4898965060710907, "learning_rate": 2.60367334897777e-06, "loss": 0.3304, "step": 41206 }, { "epoch": 0.7650293513736295, "grad_norm": 0.6012561917304993, "learning_rate": 2.602888338500219e-06, "loss": 0.1831, "step": 41208 }, { "epoch": 0.7650664815110481, "grad_norm": 0.3670376241207123, "learning_rate": 2.602103428675201e-06, "loss": 0.297, "step": 41210 }, { "epoch": 0.7651036116484667, "grad_norm": 0.3435477912425995, "learning_rate": 2.601318619513399e-06, "loss": 0.1587, "step": 41212 }, { "epoch": 0.7651407417858853, "grad_norm": 0.25566667318344116, "learning_rate": 2.6005339110254866e-06, "loss": 0.2079, "step": 41214 }, { "epoch": 0.765177871923304, "grad_norm": 0.3421875238418579, "learning_rate": 2.5997493032221455e-06, "loss": 0.4131, "step": 41216 }, { "epoch": 0.7652150020607227, "grad_norm": 0.393698126077652, "learning_rate": 2.5989647961140485e-06, "loss": 0.3347, "step": 41218 }, { "epoch": 0.7652521321981413, "grad_norm": 0.43134650588035583, "learning_rate": 2.5981803897118705e-06, "loss": 0.1141, "step": 41220 }, { "epoch": 0.7652892623355599, "grad_norm": 0.34233418107032776, "learning_rate": 2.5973960840262878e-06, "loss": 0.2819, "step": 41222 }, { "epoch": 0.7653263924729785, "grad_norm": 0.41467759013175964, "learning_rate": 2.596611879067973e-06, "loss": 0.1764, "step": 41224 }, { "epoch": 0.7653635226103972, "grad_norm": 0.6019743084907532, "learning_rate": 2.5958277748475924e-06, "loss": 0.2623, "step": 41226 }, { "epoch": 0.7654006527478158, "grad_norm": 0.3277321457862854, "learning_rate": 2.595043771375817e-06, "loss": 0.2983, "step": 41228 }, { "epoch": 0.7654377828852345, "grad_norm": 0.3492259979248047, "learning_rate": 2.5942598686633157e-06, "loss": 0.2445, "step": 41230 }, { "epoch": 0.7654749130226531, "grad_norm": 0.4110633432865143, "learning_rate": 2.5934760667207548e-06, "loss": 0.228, "step": 41232 }, { "epoch": 0.7655120431600717, "grad_norm": 0.5250723361968994, "learning_rate": 2.5926923655588e-06, "loss": 0.3368, "step": 41234 }, { "epoch": 0.7655491732974904, "grad_norm": 0.7087105512619019, "learning_rate": 2.591908765188118e-06, "loss": 0.2308, "step": 41236 }, { "epoch": 0.765586303434909, "grad_norm": 0.3392013609409332, "learning_rate": 2.5911252656193676e-06, "loss": 0.3082, "step": 41238 }, { "epoch": 0.7656234335723276, "grad_norm": 0.371787428855896, "learning_rate": 2.5903418668632062e-06, "loss": 0.2243, "step": 41240 }, { "epoch": 0.7656605637097463, "grad_norm": 0.47207289934158325, "learning_rate": 2.589558568930298e-06, "loss": 0.2583, "step": 41242 }, { "epoch": 0.7656976938471649, "grad_norm": 0.3173682689666748, "learning_rate": 2.5887753718313012e-06, "loss": 0.227, "step": 41244 }, { "epoch": 0.7657348239845836, "grad_norm": 0.6015033721923828, "learning_rate": 2.5879922755768727e-06, "loss": 0.259, "step": 41246 }, { "epoch": 0.7657719541220022, "grad_norm": 0.45075878500938416, "learning_rate": 2.587209280177667e-06, "loss": 0.1965, "step": 41248 }, { "epoch": 0.7658090842594208, "grad_norm": 0.448700875043869, "learning_rate": 2.586426385644343e-06, "loss": 0.3554, "step": 41250 }, { "epoch": 0.7658462143968395, "grad_norm": 0.45653247833251953, "learning_rate": 2.5856435919875457e-06, "loss": 0.2149, "step": 41252 }, { "epoch": 0.7658833445342581, "grad_norm": 0.384328156709671, "learning_rate": 2.5848608992179303e-06, "loss": 0.317, "step": 41254 }, { "epoch": 0.7659204746716768, "grad_norm": 0.21103191375732422, "learning_rate": 2.584078307346148e-06, "loss": 0.108, "step": 41256 }, { "epoch": 0.7659576048090954, "grad_norm": 0.32534170150756836, "learning_rate": 2.5832958163828504e-06, "loss": 0.3078, "step": 41258 }, { "epoch": 0.765994734946514, "grad_norm": 0.33861976861953735, "learning_rate": 2.5825134263386764e-06, "loss": 0.3208, "step": 41260 }, { "epoch": 0.7660318650839327, "grad_norm": 0.39179614186286926, "learning_rate": 2.5817311372242803e-06, "loss": 0.2554, "step": 41262 }, { "epoch": 0.7660689952213513, "grad_norm": 0.30000942945480347, "learning_rate": 2.5809489490503003e-06, "loss": 0.2221, "step": 41264 }, { "epoch": 0.76610612535877, "grad_norm": 0.38789182901382446, "learning_rate": 2.5801668618273833e-06, "loss": 0.2187, "step": 41266 }, { "epoch": 0.7661432554961886, "grad_norm": 0.49861547350883484, "learning_rate": 2.57938487556617e-06, "loss": 0.2791, "step": 41268 }, { "epoch": 0.7661803856336072, "grad_norm": 0.28582337498664856, "learning_rate": 2.5786029902773e-06, "loss": 0.1793, "step": 41270 }, { "epoch": 0.7662175157710258, "grad_norm": 0.38925376534461975, "learning_rate": 2.5778212059714145e-06, "loss": 0.2437, "step": 41272 }, { "epoch": 0.7662546459084445, "grad_norm": 0.4097864627838135, "learning_rate": 2.5770395226591505e-06, "loss": 0.4566, "step": 41274 }, { "epoch": 0.7662917760458632, "grad_norm": 0.3490731120109558, "learning_rate": 2.576257940351147e-06, "loss": 0.3496, "step": 41276 }, { "epoch": 0.7663289061832818, "grad_norm": 0.47219184041023254, "learning_rate": 2.575476459058034e-06, "loss": 0.3711, "step": 41278 }, { "epoch": 0.7663660363207004, "grad_norm": 0.1974325031042099, "learning_rate": 2.574695078790449e-06, "loss": 0.2986, "step": 41280 }, { "epoch": 0.766403166458119, "grad_norm": 0.33266741037368774, "learning_rate": 2.5739137995590204e-06, "loss": 0.2612, "step": 41282 }, { "epoch": 0.7664402965955377, "grad_norm": 0.3887407183647156, "learning_rate": 2.5731326213743814e-06, "loss": 0.2571, "step": 41284 }, { "epoch": 0.7664774267329564, "grad_norm": 0.3010798692703247, "learning_rate": 2.5723515442471614e-06, "loss": 0.3578, "step": 41286 }, { "epoch": 0.766514556870375, "grad_norm": 0.37065666913986206, "learning_rate": 2.571570568187991e-06, "loss": 0.3085, "step": 41288 }, { "epoch": 0.7665516870077936, "grad_norm": 0.9915363192558289, "learning_rate": 2.5707896932074915e-06, "loss": 0.3521, "step": 41290 }, { "epoch": 0.7665888171452122, "grad_norm": 0.31265366077423096, "learning_rate": 2.5700089193162912e-06, "loss": 0.2885, "step": 41292 }, { "epoch": 0.7666259472826309, "grad_norm": 0.48649343848228455, "learning_rate": 2.5692282465250152e-06, "loss": 0.3434, "step": 41294 }, { "epoch": 0.7666630774200496, "grad_norm": 0.31086084246635437, "learning_rate": 2.5684476748442845e-06, "loss": 0.1331, "step": 41296 }, { "epoch": 0.7667002075574681, "grad_norm": 0.40793880820274353, "learning_rate": 2.567667204284723e-06, "loss": 0.378, "step": 41298 }, { "epoch": 0.7667373376948868, "grad_norm": 0.28296178579330444, "learning_rate": 2.5668868348569464e-06, "loss": 0.2055, "step": 41300 }, { "epoch": 0.7667744678323054, "grad_norm": 0.3801786005496979, "learning_rate": 2.566106566571578e-06, "loss": 0.1626, "step": 41302 }, { "epoch": 0.7668115979697241, "grad_norm": 0.591468870639801, "learning_rate": 2.5653263994392285e-06, "loss": 0.234, "step": 41304 }, { "epoch": 0.7668487281071428, "grad_norm": 0.3889619708061218, "learning_rate": 2.564546333470518e-06, "loss": 0.2971, "step": 41306 }, { "epoch": 0.7668858582445613, "grad_norm": 0.4041593372821808, "learning_rate": 2.5637663686760592e-06, "loss": 0.182, "step": 41308 }, { "epoch": 0.76692298838198, "grad_norm": 0.4182489514350891, "learning_rate": 2.562986505066467e-06, "loss": 0.3509, "step": 41310 }, { "epoch": 0.7669601185193986, "grad_norm": 0.4631083905696869, "learning_rate": 2.562206742652352e-06, "loss": 0.3249, "step": 41312 }, { "epoch": 0.7669972486568173, "grad_norm": 0.5836560726165771, "learning_rate": 2.5614270814443264e-06, "loss": 0.2897, "step": 41314 }, { "epoch": 0.767034378794236, "grad_norm": 0.3607669472694397, "learning_rate": 2.5606475214529958e-06, "loss": 0.2521, "step": 41316 }, { "epoch": 0.7670715089316545, "grad_norm": 0.15277954936027527, "learning_rate": 2.5598680626889684e-06, "loss": 0.1379, "step": 41318 }, { "epoch": 0.7671086390690732, "grad_norm": 0.5786731839179993, "learning_rate": 2.5590887051628533e-06, "loss": 0.3198, "step": 41320 }, { "epoch": 0.7671457692064918, "grad_norm": 0.34919285774230957, "learning_rate": 2.55830944888525e-06, "loss": 0.3126, "step": 41322 }, { "epoch": 0.7671828993439105, "grad_norm": 0.3165053725242615, "learning_rate": 2.5575302938667647e-06, "loss": 0.2398, "step": 41324 }, { "epoch": 0.7672200294813291, "grad_norm": 0.49193501472473145, "learning_rate": 2.5567512401180027e-06, "loss": 0.378, "step": 41326 }, { "epoch": 0.7672571596187477, "grad_norm": 0.26617106795310974, "learning_rate": 2.5559722876495575e-06, "loss": 0.1259, "step": 41328 }, { "epoch": 0.7672942897561664, "grad_norm": 0.2973635792732239, "learning_rate": 2.555193436472032e-06, "loss": 0.2417, "step": 41330 }, { "epoch": 0.767331419893585, "grad_norm": 0.363119900226593, "learning_rate": 2.554414686596025e-06, "loss": 0.2647, "step": 41332 }, { "epoch": 0.7673685500310037, "grad_norm": 0.3887489140033722, "learning_rate": 2.5536360380321314e-06, "loss": 0.4158, "step": 41334 }, { "epoch": 0.7674056801684223, "grad_norm": 0.5190216302871704, "learning_rate": 2.5528574907909466e-06, "loss": 0.2936, "step": 41336 }, { "epoch": 0.7674428103058409, "grad_norm": 0.31827104091644287, "learning_rate": 2.5520790448830677e-06, "loss": 0.1956, "step": 41338 }, { "epoch": 0.7674799404432596, "grad_norm": 0.5050176978111267, "learning_rate": 2.5513007003190816e-06, "loss": 0.437, "step": 41340 }, { "epoch": 0.7675170705806782, "grad_norm": 0.3137609362602234, "learning_rate": 2.550522457109581e-06, "loss": 0.5051, "step": 41342 }, { "epoch": 0.7675542007180969, "grad_norm": 0.48798704147338867, "learning_rate": 2.5497443152651593e-06, "loss": 0.3145, "step": 41344 }, { "epoch": 0.7675913308555155, "grad_norm": 0.3828844130039215, "learning_rate": 2.5489662747963985e-06, "loss": 0.3825, "step": 41346 }, { "epoch": 0.7676284609929341, "grad_norm": 0.4482521712779999, "learning_rate": 2.548188335713887e-06, "loss": 0.3455, "step": 41348 }, { "epoch": 0.7676655911303528, "grad_norm": 0.46659302711486816, "learning_rate": 2.5474104980282156e-06, "loss": 0.2417, "step": 41350 }, { "epoch": 0.7677027212677714, "grad_norm": 0.48956945538520813, "learning_rate": 2.546632761749961e-06, "loss": 0.3291, "step": 41352 }, { "epoch": 0.7677398514051901, "grad_norm": 0.4789769649505615, "learning_rate": 2.545855126889709e-06, "loss": 0.2285, "step": 41354 }, { "epoch": 0.7677769815426086, "grad_norm": 0.340114027261734, "learning_rate": 2.545077593458042e-06, "loss": 0.1609, "step": 41356 }, { "epoch": 0.7678141116800273, "grad_norm": 0.3755229413509369, "learning_rate": 2.5443001614655373e-06, "loss": 0.5751, "step": 41358 }, { "epoch": 0.767851241817446, "grad_norm": 0.51650071144104, "learning_rate": 2.5435228309227754e-06, "loss": 0.2585, "step": 41360 }, { "epoch": 0.7678883719548646, "grad_norm": 0.4635002613067627, "learning_rate": 2.5427456018403363e-06, "loss": 0.1262, "step": 41362 }, { "epoch": 0.7679255020922833, "grad_norm": 0.4288936257362366, "learning_rate": 2.5419684742287897e-06, "loss": 0.2473, "step": 41364 }, { "epoch": 0.7679626322297018, "grad_norm": 0.27689090371131897, "learning_rate": 2.5411914480987156e-06, "loss": 0.1617, "step": 41366 }, { "epoch": 0.7679997623671205, "grad_norm": 0.6193722486495972, "learning_rate": 2.5404145234606814e-06, "loss": 0.2986, "step": 41368 }, { "epoch": 0.7680368925045392, "grad_norm": 0.23524649441242218, "learning_rate": 2.5396377003252617e-06, "loss": 0.3073, "step": 41370 }, { "epoch": 0.7680740226419578, "grad_norm": 0.39432278275489807, "learning_rate": 2.538860978703025e-06, "loss": 0.3311, "step": 41372 }, { "epoch": 0.7681111527793765, "grad_norm": 0.31275299191474915, "learning_rate": 2.538084358604543e-06, "loss": 0.331, "step": 41374 }, { "epoch": 0.768148282916795, "grad_norm": 0.3133927881717682, "learning_rate": 2.537307840040385e-06, "loss": 0.1934, "step": 41376 }, { "epoch": 0.7681854130542137, "grad_norm": 0.41305121779441833, "learning_rate": 2.53653142302111e-06, "loss": 0.1964, "step": 41378 }, { "epoch": 0.7682225431916323, "grad_norm": 0.29816102981567383, "learning_rate": 2.5357551075572874e-06, "loss": 0.3428, "step": 41380 }, { "epoch": 0.768259673329051, "grad_norm": 0.6308209300041199, "learning_rate": 2.53497889365948e-06, "loss": 0.1286, "step": 41382 }, { "epoch": 0.7682968034664697, "grad_norm": 0.40792134404182434, "learning_rate": 2.534202781338252e-06, "loss": 0.2186, "step": 41384 }, { "epoch": 0.7683339336038882, "grad_norm": 0.43371978402137756, "learning_rate": 2.533426770604158e-06, "loss": 0.4609, "step": 41386 }, { "epoch": 0.7683710637413069, "grad_norm": 0.36207014322280884, "learning_rate": 2.532650861467765e-06, "loss": 0.1504, "step": 41388 }, { "epoch": 0.7684081938787255, "grad_norm": 0.42898428440093994, "learning_rate": 2.531875053939622e-06, "loss": 0.3226, "step": 41390 }, { "epoch": 0.7684453240161442, "grad_norm": 0.3628973960876465, "learning_rate": 2.5310993480302916e-06, "loss": 0.2205, "step": 41392 }, { "epoch": 0.7684824541535629, "grad_norm": 0.5887414813041687, "learning_rate": 2.530323743750326e-06, "loss": 0.143, "step": 41394 }, { "epoch": 0.7685195842909814, "grad_norm": 0.2971482574939728, "learning_rate": 2.52954824111028e-06, "loss": 0.2751, "step": 41396 }, { "epoch": 0.7685567144284001, "grad_norm": 0.28399187326431274, "learning_rate": 2.5287728401207056e-06, "loss": 0.4495, "step": 41398 }, { "epoch": 0.7685938445658187, "grad_norm": 0.459029883146286, "learning_rate": 2.5279975407921543e-06, "loss": 0.4372, "step": 41400 }, { "epoch": 0.7686309747032374, "grad_norm": 0.5715258717536926, "learning_rate": 2.5272223431351784e-06, "loss": 0.3319, "step": 41402 }, { "epoch": 0.7686681048406561, "grad_norm": 0.4502350389957428, "learning_rate": 2.5264472471603197e-06, "loss": 0.3261, "step": 41404 }, { "epoch": 0.7687052349780746, "grad_norm": 0.5230157971382141, "learning_rate": 2.525672252878131e-06, "loss": 0.2604, "step": 41406 }, { "epoch": 0.7687423651154933, "grad_norm": 0.23249544203281403, "learning_rate": 2.524897360299152e-06, "loss": 0.2151, "step": 41408 }, { "epoch": 0.7687794952529119, "grad_norm": 0.3170226812362671, "learning_rate": 2.5241225694339288e-06, "loss": 0.3592, "step": 41410 }, { "epoch": 0.7688166253903306, "grad_norm": 0.42040392756462097, "learning_rate": 2.5233478802930057e-06, "loss": 0.1604, "step": 41412 }, { "epoch": 0.7688537555277493, "grad_norm": 0.25480395555496216, "learning_rate": 2.522573292886925e-06, "loss": 0.3222, "step": 41414 }, { "epoch": 0.7688908856651678, "grad_norm": 0.36990073323249817, "learning_rate": 2.521798807226221e-06, "loss": 0.4038, "step": 41416 }, { "epoch": 0.7689280158025865, "grad_norm": 0.35002514719963074, "learning_rate": 2.5210244233214353e-06, "loss": 0.2675, "step": 41418 }, { "epoch": 0.7689651459400051, "grad_norm": 0.3868134915828705, "learning_rate": 2.5202501411831058e-06, "loss": 0.3042, "step": 41420 }, { "epoch": 0.7690022760774238, "grad_norm": 0.6874318718910217, "learning_rate": 2.519475960821768e-06, "loss": 0.3285, "step": 41422 }, { "epoch": 0.7690394062148423, "grad_norm": 0.3507044315338135, "learning_rate": 2.518701882247955e-06, "loss": 0.2658, "step": 41424 }, { "epoch": 0.769076536352261, "grad_norm": 0.3999587595462799, "learning_rate": 2.5179279054722026e-06, "loss": 0.3024, "step": 41426 }, { "epoch": 0.7691136664896797, "grad_norm": 0.42895013093948364, "learning_rate": 2.51715403050504e-06, "loss": 0.2357, "step": 41428 }, { "epoch": 0.7691507966270983, "grad_norm": 0.3126152753829956, "learning_rate": 2.516380257356995e-06, "loss": 0.4376, "step": 41430 }, { "epoch": 0.769187926764517, "grad_norm": 0.36698758602142334, "learning_rate": 2.5156065860385994e-06, "loss": 0.3081, "step": 41432 }, { "epoch": 0.7692250569019355, "grad_norm": 0.6157290935516357, "learning_rate": 2.5148330165603783e-06, "loss": 0.3686, "step": 41434 }, { "epoch": 0.7692621870393542, "grad_norm": 0.3823455274105072, "learning_rate": 2.5140595489328603e-06, "loss": 0.0638, "step": 41436 }, { "epoch": 0.7692993171767729, "grad_norm": 0.44972336292266846, "learning_rate": 2.513286183166568e-06, "loss": 0.2873, "step": 41438 }, { "epoch": 0.7693364473141915, "grad_norm": 0.3725402057170868, "learning_rate": 2.512512919272029e-06, "loss": 0.4105, "step": 41440 }, { "epoch": 0.7693735774516102, "grad_norm": 0.388278067111969, "learning_rate": 2.511739757259758e-06, "loss": 0.1952, "step": 41442 }, { "epoch": 0.7694107075890287, "grad_norm": 0.5143905282020569, "learning_rate": 2.5109666971402792e-06, "loss": 0.4773, "step": 41444 }, { "epoch": 0.7694478377264474, "grad_norm": 0.4239763617515564, "learning_rate": 2.5101937389241117e-06, "loss": 0.1891, "step": 41446 }, { "epoch": 0.7694849678638661, "grad_norm": 0.19605471193790436, "learning_rate": 2.5094208826217758e-06, "loss": 0.1952, "step": 41448 }, { "epoch": 0.7695220980012847, "grad_norm": 0.49823063611984253, "learning_rate": 2.5086481282437813e-06, "loss": 0.2845, "step": 41450 }, { "epoch": 0.7695592281387034, "grad_norm": 0.4230511784553528, "learning_rate": 2.5078754758006505e-06, "loss": 0.4119, "step": 41452 }, { "epoch": 0.7695963582761219, "grad_norm": 0.34655478596687317, "learning_rate": 2.507102925302889e-06, "loss": 0.3519, "step": 41454 }, { "epoch": 0.7696334884135406, "grad_norm": 0.3689468801021576, "learning_rate": 2.506330476761013e-06, "loss": 0.2239, "step": 41456 }, { "epoch": 0.7696706185509593, "grad_norm": 0.23845991492271423, "learning_rate": 2.505558130185534e-06, "loss": 0.1857, "step": 41458 }, { "epoch": 0.7697077486883779, "grad_norm": 0.26497703790664673, "learning_rate": 2.50478588558696e-06, "loss": 0.4189, "step": 41460 }, { "epoch": 0.7697448788257966, "grad_norm": 0.22562922537326813, "learning_rate": 2.5040137429757982e-06, "loss": 0.1965, "step": 41462 }, { "epoch": 0.7697820089632151, "grad_norm": 0.4532822072505951, "learning_rate": 2.503241702362561e-06, "loss": 0.3235, "step": 41464 }, { "epoch": 0.7698191391006338, "grad_norm": 0.3611896336078644, "learning_rate": 2.5024697637577445e-06, "loss": 0.1812, "step": 41466 }, { "epoch": 0.7698562692380525, "grad_norm": 0.41187095642089844, "learning_rate": 2.501697927171858e-06, "loss": 0.374, "step": 41468 }, { "epoch": 0.7698933993754711, "grad_norm": 0.38752293586730957, "learning_rate": 2.500926192615405e-06, "loss": 0.2303, "step": 41470 }, { "epoch": 0.7699305295128898, "grad_norm": 0.4563531279563904, "learning_rate": 2.5001545600988806e-06, "loss": 0.5203, "step": 41472 }, { "epoch": 0.7699676596503083, "grad_norm": 0.30065885186195374, "learning_rate": 2.499383029632788e-06, "loss": 0.2839, "step": 41474 }, { "epoch": 0.770004789787727, "grad_norm": 0.4574066698551178, "learning_rate": 2.4986116012276263e-06, "loss": 0.1869, "step": 41476 }, { "epoch": 0.7700419199251456, "grad_norm": 0.5186640620231628, "learning_rate": 2.4978402748938944e-06, "loss": 0.464, "step": 41478 }, { "epoch": 0.7700790500625643, "grad_norm": 0.42823919653892517, "learning_rate": 2.4970690506420814e-06, "loss": 0.2618, "step": 41480 }, { "epoch": 0.770116180199983, "grad_norm": 0.8293660283088684, "learning_rate": 2.496297928482685e-06, "loss": 0.3068, "step": 41482 }, { "epoch": 0.7701533103374015, "grad_norm": 0.5593637228012085, "learning_rate": 2.495526908426198e-06, "loss": 0.3727, "step": 41484 }, { "epoch": 0.7701904404748202, "grad_norm": 0.6823593378067017, "learning_rate": 2.494755990483111e-06, "loss": 0.2315, "step": 41486 }, { "epoch": 0.7702275706122388, "grad_norm": 0.33526623249053955, "learning_rate": 2.4939851746639133e-06, "loss": 0.2859, "step": 41488 }, { "epoch": 0.7702647007496575, "grad_norm": 0.3988734781742096, "learning_rate": 2.4932144609790977e-06, "loss": 0.4348, "step": 41490 }, { "epoch": 0.7703018308870762, "grad_norm": 0.4875473082065582, "learning_rate": 2.492443849439148e-06, "loss": 0.3288, "step": 41492 }, { "epoch": 0.7703389610244947, "grad_norm": 0.33681178092956543, "learning_rate": 2.491673340054547e-06, "loss": 0.1195, "step": 41494 }, { "epoch": 0.7703760911619134, "grad_norm": 0.5214869976043701, "learning_rate": 2.4909029328357816e-06, "loss": 0.2344, "step": 41496 }, { "epoch": 0.770413221299332, "grad_norm": 0.4211408197879791, "learning_rate": 2.490132627793335e-06, "loss": 0.2452, "step": 41498 }, { "epoch": 0.7704503514367507, "grad_norm": 0.4766230285167694, "learning_rate": 2.489362424937689e-06, "loss": 0.2905, "step": 41500 }, { "epoch": 0.7704874815741694, "grad_norm": 0.37303000688552856, "learning_rate": 2.488592324279325e-06, "loss": 0.2822, "step": 41502 }, { "epoch": 0.7705246117115879, "grad_norm": 0.2579355537891388, "learning_rate": 2.4878223258287194e-06, "loss": 0.1827, "step": 41504 }, { "epoch": 0.7705617418490066, "grad_norm": 0.24069981276988983, "learning_rate": 2.4870524295963485e-06, "loss": 0.1159, "step": 41506 }, { "epoch": 0.7705988719864252, "grad_norm": 0.3725051283836365, "learning_rate": 2.486282635592692e-06, "loss": 0.4259, "step": 41508 }, { "epoch": 0.7706360021238439, "grad_norm": 0.447422593832016, "learning_rate": 2.485512943828221e-06, "loss": 0.1063, "step": 41510 }, { "epoch": 0.7706731322612626, "grad_norm": 0.2678152322769165, "learning_rate": 2.4847433543134137e-06, "loss": 0.2049, "step": 41512 }, { "epoch": 0.7707102623986811, "grad_norm": 0.29245525598526, "learning_rate": 2.483973867058739e-06, "loss": 0.2605, "step": 41514 }, { "epoch": 0.7707473925360998, "grad_norm": 0.3939357399940491, "learning_rate": 2.4832044820746627e-06, "loss": 0.3611, "step": 41516 }, { "epoch": 0.7707845226735184, "grad_norm": 0.23557208478450775, "learning_rate": 2.482435199371659e-06, "loss": 0.2193, "step": 41518 }, { "epoch": 0.7708216528109371, "grad_norm": 0.44960853457450867, "learning_rate": 2.481666018960195e-06, "loss": 0.1783, "step": 41520 }, { "epoch": 0.7708587829483556, "grad_norm": 0.4091038703918457, "learning_rate": 2.4808969408507355e-06, "loss": 0.3157, "step": 41522 }, { "epoch": 0.7708959130857743, "grad_norm": 0.4602786600589752, "learning_rate": 2.4801279650537467e-06, "loss": 0.2898, "step": 41524 }, { "epoch": 0.770933043223193, "grad_norm": 0.3340320885181427, "learning_rate": 2.479359091579692e-06, "loss": 0.3423, "step": 41526 }, { "epoch": 0.7709701733606116, "grad_norm": 0.3317561745643616, "learning_rate": 2.478590320439035e-06, "loss": 0.2855, "step": 41528 }, { "epoch": 0.7710073034980303, "grad_norm": 0.3557310998439789, "learning_rate": 2.4778216516422328e-06, "loss": 0.2481, "step": 41530 }, { "epoch": 0.7710444336354488, "grad_norm": 0.2813953161239624, "learning_rate": 2.4770530851997455e-06, "loss": 0.1293, "step": 41532 }, { "epoch": 0.7710815637728675, "grad_norm": 0.322879433631897, "learning_rate": 2.4762846211220358e-06, "loss": 0.2917, "step": 41534 }, { "epoch": 0.7711186939102862, "grad_norm": 0.3595936894416809, "learning_rate": 2.4755162594195524e-06, "loss": 0.2568, "step": 41536 }, { "epoch": 0.7711558240477048, "grad_norm": 0.6751272082328796, "learning_rate": 2.4747480001027547e-06, "loss": 0.1539, "step": 41538 }, { "epoch": 0.7711929541851235, "grad_norm": 0.40497925877571106, "learning_rate": 2.4739798431820983e-06, "loss": 0.1931, "step": 41540 }, { "epoch": 0.771230084322542, "grad_norm": 0.3457527160644531, "learning_rate": 2.4732117886680306e-06, "loss": 0.336, "step": 41542 }, { "epoch": 0.7712672144599607, "grad_norm": 0.3313685357570648, "learning_rate": 2.4724438365710057e-06, "loss": 0.3642, "step": 41544 }, { "epoch": 0.7713043445973794, "grad_norm": 0.290331095457077, "learning_rate": 2.471675986901473e-06, "loss": 0.049, "step": 41546 }, { "epoch": 0.771341474734798, "grad_norm": 0.28030937910079956, "learning_rate": 2.470908239669878e-06, "loss": 0.3186, "step": 41548 }, { "epoch": 0.7713786048722167, "grad_norm": 0.6392655968666077, "learning_rate": 2.470140594886672e-06, "loss": 0.2418, "step": 41550 }, { "epoch": 0.7714157350096352, "grad_norm": 0.36246079206466675, "learning_rate": 2.4693730525622995e-06, "loss": 0.1193, "step": 41552 }, { "epoch": 0.7714528651470539, "grad_norm": 0.2808990776538849, "learning_rate": 2.4686056127072e-06, "loss": 0.3331, "step": 41554 }, { "epoch": 0.7714899952844726, "grad_norm": 0.3309437334537506, "learning_rate": 2.467838275331822e-06, "loss": 0.1739, "step": 41556 }, { "epoch": 0.7715271254218912, "grad_norm": 0.6773702502250671, "learning_rate": 2.4670710404466016e-06, "loss": 0.1455, "step": 41558 }, { "epoch": 0.7715642555593099, "grad_norm": 0.4083969295024872, "learning_rate": 2.46630390806198e-06, "loss": 0.189, "step": 41560 }, { "epoch": 0.7716013856967284, "grad_norm": 0.4024791121482849, "learning_rate": 2.465536878188396e-06, "loss": 0.4499, "step": 41562 }, { "epoch": 0.7716385158341471, "grad_norm": 0.5677170157432556, "learning_rate": 2.4647699508362864e-06, "loss": 0.1573, "step": 41564 }, { "epoch": 0.7716756459715658, "grad_norm": 0.3135163486003876, "learning_rate": 2.46400312601609e-06, "loss": 0.0972, "step": 41566 }, { "epoch": 0.7717127761089844, "grad_norm": 0.5223904848098755, "learning_rate": 2.4632364037382362e-06, "loss": 0.2923, "step": 41568 }, { "epoch": 0.771749906246403, "grad_norm": 0.3882283568382263, "learning_rate": 2.4624697840131593e-06, "loss": 0.1685, "step": 41570 }, { "epoch": 0.7717870363838216, "grad_norm": 0.7938657402992249, "learning_rate": 2.461703266851291e-06, "loss": 0.3399, "step": 41572 }, { "epoch": 0.7718241665212403, "grad_norm": 0.5284688472747803, "learning_rate": 2.460936852263065e-06, "loss": 0.2061, "step": 41574 }, { "epoch": 0.7718612966586589, "grad_norm": 0.4011911451816559, "learning_rate": 2.460170540258903e-06, "loss": 0.4157, "step": 41576 }, { "epoch": 0.7718984267960776, "grad_norm": 0.5499333143234253, "learning_rate": 2.4594043308492377e-06, "loss": 0.432, "step": 41578 }, { "epoch": 0.7719355569334962, "grad_norm": 0.3336786925792694, "learning_rate": 2.4586382240444906e-06, "loss": 0.1987, "step": 41580 }, { "epoch": 0.7719726870709148, "grad_norm": 0.4181004464626312, "learning_rate": 2.4578722198550873e-06, "loss": 0.3984, "step": 41582 }, { "epoch": 0.7720098172083335, "grad_norm": 0.6200852990150452, "learning_rate": 2.4571063182914534e-06, "loss": 0.322, "step": 41584 }, { "epoch": 0.7720469473457521, "grad_norm": 0.4113323986530304, "learning_rate": 2.4563405193640076e-06, "loss": 0.3082, "step": 41586 }, { "epoch": 0.7720840774831708, "grad_norm": 0.42549222707748413, "learning_rate": 2.4555748230831724e-06, "loss": 0.3536, "step": 41588 }, { "epoch": 0.7721212076205894, "grad_norm": 0.30046191811561584, "learning_rate": 2.4548092294593685e-06, "loss": 0.4161, "step": 41590 }, { "epoch": 0.772158337758008, "grad_norm": 0.30612480640411377, "learning_rate": 2.454043738503007e-06, "loss": 0.3458, "step": 41592 }, { "epoch": 0.7721954678954267, "grad_norm": 0.4304139018058777, "learning_rate": 2.4532783502245085e-06, "loss": 0.2181, "step": 41594 }, { "epoch": 0.7722325980328453, "grad_norm": 0.46697211265563965, "learning_rate": 2.4525130646342856e-06, "loss": 0.3102, "step": 41596 }, { "epoch": 0.772269728170264, "grad_norm": 0.25491881370544434, "learning_rate": 2.4517478817427553e-06, "loss": 0.3937, "step": 41598 }, { "epoch": 0.7723068583076826, "grad_norm": 0.42776280641555786, "learning_rate": 2.4509828015603242e-06, "loss": 0.2448, "step": 41600 }, { "epoch": 0.7723439884451012, "grad_norm": 0.2742282450199127, "learning_rate": 2.450217824097405e-06, "loss": 0.4178, "step": 41602 }, { "epoch": 0.7723811185825199, "grad_norm": 0.5798923969268799, "learning_rate": 2.4494529493644104e-06, "loss": 0.3012, "step": 41604 }, { "epoch": 0.7724182487199385, "grad_norm": 0.6069666147232056, "learning_rate": 2.4486881773717417e-06, "loss": 0.4084, "step": 41606 }, { "epoch": 0.7724553788573572, "grad_norm": 0.666145384311676, "learning_rate": 2.4479235081298083e-06, "loss": 0.1521, "step": 41608 }, { "epoch": 0.7724925089947758, "grad_norm": 0.4640580415725708, "learning_rate": 2.4471589416490137e-06, "loss": 0.3958, "step": 41610 }, { "epoch": 0.7725296391321944, "grad_norm": 0.3107159733772278, "learning_rate": 2.446394477939764e-06, "loss": 0.1295, "step": 41612 }, { "epoch": 0.7725667692696131, "grad_norm": 0.3692168891429901, "learning_rate": 2.4456301170124584e-06, "loss": 0.2517, "step": 41614 }, { "epoch": 0.7726038994070317, "grad_norm": 0.44470641016960144, "learning_rate": 2.444865858877503e-06, "loss": 0.3008, "step": 41616 }, { "epoch": 0.7726410295444504, "grad_norm": 0.477758526802063, "learning_rate": 2.4441017035452897e-06, "loss": 0.3206, "step": 41618 }, { "epoch": 0.772678159681869, "grad_norm": 0.6561027765274048, "learning_rate": 2.443337651026223e-06, "loss": 0.2473, "step": 41620 }, { "epoch": 0.7727152898192876, "grad_norm": 0.31912335753440857, "learning_rate": 2.4425737013306926e-06, "loss": 0.1001, "step": 41622 }, { "epoch": 0.7727524199567063, "grad_norm": 0.5909704566001892, "learning_rate": 2.441809854469097e-06, "loss": 0.217, "step": 41624 }, { "epoch": 0.7727895500941249, "grad_norm": 0.3824181854724884, "learning_rate": 2.44104611045183e-06, "loss": 0.3191, "step": 41626 }, { "epoch": 0.7728266802315436, "grad_norm": 0.3913993537425995, "learning_rate": 2.4402824692892867e-06, "loss": 0.3017, "step": 41628 }, { "epoch": 0.7728638103689621, "grad_norm": 0.4373913109302521, "learning_rate": 2.4395189309918522e-06, "loss": 0.3209, "step": 41630 }, { "epoch": 0.7729009405063808, "grad_norm": 0.3915177285671234, "learning_rate": 2.438755495569918e-06, "loss": 0.2948, "step": 41632 }, { "epoch": 0.7729380706437995, "grad_norm": 0.4179781973361969, "learning_rate": 2.4379921630338744e-06, "loss": 0.3249, "step": 41634 }, { "epoch": 0.7729752007812181, "grad_norm": 0.5380401015281677, "learning_rate": 2.4372289333941058e-06, "loss": 0.3862, "step": 41636 }, { "epoch": 0.7730123309186367, "grad_norm": 0.44070830941200256, "learning_rate": 2.436465806661e-06, "loss": 0.5223, "step": 41638 }, { "epoch": 0.7730494610560553, "grad_norm": 0.40604567527770996, "learning_rate": 2.4357027828449386e-06, "loss": 0.3453, "step": 41640 }, { "epoch": 0.773086591193474, "grad_norm": 0.28185978531837463, "learning_rate": 2.434939861956306e-06, "loss": 0.2626, "step": 41642 }, { "epoch": 0.7731237213308927, "grad_norm": 0.4027245342731476, "learning_rate": 2.434177044005479e-06, "loss": 0.3112, "step": 41644 }, { "epoch": 0.7731608514683113, "grad_norm": 0.4761393368244171, "learning_rate": 2.433414329002841e-06, "loss": 0.4452, "step": 41646 }, { "epoch": 0.77319798160573, "grad_norm": 0.4719437062740326, "learning_rate": 2.4326517169587695e-06, "loss": 0.1538, "step": 41648 }, { "epoch": 0.7732351117431485, "grad_norm": 0.3605237603187561, "learning_rate": 2.43188920788364e-06, "loss": 0.4141, "step": 41650 }, { "epoch": 0.7732722418805672, "grad_norm": 0.3867614269256592, "learning_rate": 2.431126801787831e-06, "loss": 0.1989, "step": 41652 }, { "epoch": 0.7733093720179859, "grad_norm": 0.40725454688072205, "learning_rate": 2.4303644986817165e-06, "loss": 0.1945, "step": 41654 }, { "epoch": 0.7733465021554045, "grad_norm": 0.6267882585525513, "learning_rate": 2.4296022985756653e-06, "loss": 0.3202, "step": 41656 }, { "epoch": 0.7733836322928231, "grad_norm": 0.46100282669067383, "learning_rate": 2.4288402014800495e-06, "loss": 0.2566, "step": 41658 }, { "epoch": 0.7734207624302417, "grad_norm": 0.45701903104782104, "learning_rate": 2.4280782074052446e-06, "loss": 0.2879, "step": 41660 }, { "epoch": 0.7734578925676604, "grad_norm": 0.32045847177505493, "learning_rate": 2.427316316361611e-06, "loss": 0.212, "step": 41662 }, { "epoch": 0.7734950227050791, "grad_norm": 0.21286286413669586, "learning_rate": 2.4265545283595206e-06, "loss": 0.1692, "step": 41664 }, { "epoch": 0.7735321528424977, "grad_norm": 0.577670693397522, "learning_rate": 2.4257928434093402e-06, "loss": 0.2247, "step": 41666 }, { "epoch": 0.7735692829799163, "grad_norm": 0.241823211312294, "learning_rate": 2.4250312615214287e-06, "loss": 0.2234, "step": 41668 }, { "epoch": 0.7736064131173349, "grad_norm": 0.5653408169746399, "learning_rate": 2.4242697827061524e-06, "loss": 0.312, "step": 41670 }, { "epoch": 0.7736435432547536, "grad_norm": 0.4778139293193817, "learning_rate": 2.4235084069738723e-06, "loss": 0.5307, "step": 41672 }, { "epoch": 0.7736806733921722, "grad_norm": 0.26522332429885864, "learning_rate": 2.4227471343349484e-06, "loss": 0.1984, "step": 41674 }, { "epoch": 0.7737178035295909, "grad_norm": 0.37601980566978455, "learning_rate": 2.4219859647997403e-06, "loss": 0.2903, "step": 41676 }, { "epoch": 0.7737549336670095, "grad_norm": 0.29758980870246887, "learning_rate": 2.4212248983786056e-06, "loss": 0.1257, "step": 41678 }, { "epoch": 0.7737920638044281, "grad_norm": 0.44700977206230164, "learning_rate": 2.420463935081898e-06, "loss": 0.1678, "step": 41680 }, { "epoch": 0.7738291939418468, "grad_norm": 0.3596184253692627, "learning_rate": 2.4197030749199746e-06, "loss": 0.2693, "step": 41682 }, { "epoch": 0.7738663240792654, "grad_norm": 0.3034147620201111, "learning_rate": 2.4189423179031844e-06, "loss": 0.2562, "step": 41684 }, { "epoch": 0.773903454216684, "grad_norm": 0.3415099084377289, "learning_rate": 2.418181664041881e-06, "loss": 0.4591, "step": 41686 }, { "epoch": 0.7739405843541027, "grad_norm": 0.4192591607570648, "learning_rate": 2.4174211133464155e-06, "loss": 0.2941, "step": 41688 }, { "epoch": 0.7739777144915213, "grad_norm": 0.3644876778125763, "learning_rate": 2.4166606658271353e-06, "loss": 0.1712, "step": 41690 }, { "epoch": 0.77401484462894, "grad_norm": 0.20664454996585846, "learning_rate": 2.415900321494392e-06, "loss": 0.1908, "step": 41692 }, { "epoch": 0.7740519747663586, "grad_norm": 0.5559563040733337, "learning_rate": 2.4151400803585267e-06, "loss": 0.1693, "step": 41694 }, { "epoch": 0.7740891049037772, "grad_norm": 0.28753381967544556, "learning_rate": 2.414379942429884e-06, "loss": 0.2624, "step": 41696 }, { "epoch": 0.7741262350411959, "grad_norm": 0.5968526005744934, "learning_rate": 2.4136199077188095e-06, "loss": 0.1892, "step": 41698 }, { "epoch": 0.7741633651786145, "grad_norm": 0.3468347191810608, "learning_rate": 2.412859976235644e-06, "loss": 0.2166, "step": 41700 }, { "epoch": 0.7742004953160332, "grad_norm": 0.28130000829696655, "learning_rate": 2.4121001479907303e-06, "loss": 0.162, "step": 41702 }, { "epoch": 0.7742376254534518, "grad_norm": 0.28966277837753296, "learning_rate": 2.4113404229944035e-06, "loss": 0.2251, "step": 41704 }, { "epoch": 0.7742747555908704, "grad_norm": 0.2702259421348572, "learning_rate": 2.4105808012570054e-06, "loss": 0.2324, "step": 41706 }, { "epoch": 0.7743118857282891, "grad_norm": 0.32897037267684937, "learning_rate": 2.409821282788867e-06, "loss": 0.2493, "step": 41708 }, { "epoch": 0.7743490158657077, "grad_norm": 0.5918217301368713, "learning_rate": 2.4090618676003242e-06, "loss": 0.2549, "step": 41710 }, { "epoch": 0.7743861460031264, "grad_norm": 0.36162179708480835, "learning_rate": 2.408302555701714e-06, "loss": 0.3247, "step": 41712 }, { "epoch": 0.774423276140545, "grad_norm": 0.3251967430114746, "learning_rate": 2.4075433471033647e-06, "loss": 0.2881, "step": 41714 }, { "epoch": 0.7744604062779636, "grad_norm": 0.24003437161445618, "learning_rate": 2.4067842418156117e-06, "loss": 0.2203, "step": 41716 }, { "epoch": 0.7744975364153823, "grad_norm": 0.6811937689781189, "learning_rate": 2.4060252398487784e-06, "loss": 0.2679, "step": 41718 }, { "epoch": 0.7745346665528009, "grad_norm": 0.41955360770225525, "learning_rate": 2.405266341213195e-06, "loss": 0.3401, "step": 41720 }, { "epoch": 0.7745717966902196, "grad_norm": 0.3426463007926941, "learning_rate": 2.4045075459191868e-06, "loss": 0.3038, "step": 41722 }, { "epoch": 0.7746089268276382, "grad_norm": 0.2100699245929718, "learning_rate": 2.403748853977084e-06, "loss": 0.1662, "step": 41724 }, { "epoch": 0.7746460569650568, "grad_norm": 0.23500922322273254, "learning_rate": 2.4029902653972024e-06, "loss": 0.3091, "step": 41726 }, { "epoch": 0.7746831871024754, "grad_norm": 0.3701079785823822, "learning_rate": 2.4022317801898677e-06, "loss": 0.3313, "step": 41728 }, { "epoch": 0.7747203172398941, "grad_norm": 0.4769270420074463, "learning_rate": 2.4014733983654025e-06, "loss": 0.3257, "step": 41730 }, { "epoch": 0.7747574473773128, "grad_norm": 0.5516478419303894, "learning_rate": 2.400715119934123e-06, "loss": 0.2694, "step": 41732 }, { "epoch": 0.7747945775147314, "grad_norm": 0.49802184104919434, "learning_rate": 2.3999569449063464e-06, "loss": 0.3135, "step": 41734 }, { "epoch": 0.77483170765215, "grad_norm": 0.42903468012809753, "learning_rate": 2.3991988732923923e-06, "loss": 0.2142, "step": 41736 }, { "epoch": 0.7748688377895686, "grad_norm": 0.2886156737804413, "learning_rate": 2.398440905102574e-06, "loss": 0.2809, "step": 41738 }, { "epoch": 0.7749059679269873, "grad_norm": 0.34705883264541626, "learning_rate": 2.3976830403472062e-06, "loss": 0.2142, "step": 41740 }, { "epoch": 0.774943098064406, "grad_norm": 0.5105149745941162, "learning_rate": 2.3969252790366026e-06, "loss": 0.2641, "step": 41742 }, { "epoch": 0.7749802282018246, "grad_norm": 0.37746235728263855, "learning_rate": 2.3961676211810704e-06, "loss": 0.3541, "step": 41744 }, { "epoch": 0.7750173583392432, "grad_norm": 0.3768836557865143, "learning_rate": 2.395410066790922e-06, "loss": 0.1611, "step": 41746 }, { "epoch": 0.7750544884766618, "grad_norm": 0.40296801924705505, "learning_rate": 2.3946526158764626e-06, "loss": 0.2451, "step": 41748 }, { "epoch": 0.7750916186140805, "grad_norm": 0.33458423614501953, "learning_rate": 2.393895268448e-06, "loss": 0.2358, "step": 41750 }, { "epoch": 0.7751287487514992, "grad_norm": 0.4118732511997223, "learning_rate": 2.39313802451584e-06, "loss": 0.2973, "step": 41752 }, { "epoch": 0.7751658788889177, "grad_norm": 0.2701396942138672, "learning_rate": 2.3923808840902898e-06, "loss": 0.2615, "step": 41754 }, { "epoch": 0.7752030090263364, "grad_norm": 0.40476492047309875, "learning_rate": 2.3916238471816444e-06, "loss": 0.2912, "step": 41756 }, { "epoch": 0.775240139163755, "grad_norm": 0.406203955411911, "learning_rate": 2.3908669138002094e-06, "loss": 0.5011, "step": 41758 }, { "epoch": 0.7752772693011737, "grad_norm": 0.4594648778438568, "learning_rate": 2.390110083956283e-06, "loss": 0.1457, "step": 41760 }, { "epoch": 0.7753143994385924, "grad_norm": 0.1936478465795517, "learning_rate": 2.389353357660165e-06, "loss": 0.3277, "step": 41762 }, { "epoch": 0.775351529576011, "grad_norm": 0.41431301832199097, "learning_rate": 2.3885967349221505e-06, "loss": 0.1855, "step": 41764 }, { "epoch": 0.7753886597134296, "grad_norm": 0.48124954104423523, "learning_rate": 2.3878402157525393e-06, "loss": 0.2191, "step": 41766 }, { "epoch": 0.7754257898508482, "grad_norm": 0.33568528294563293, "learning_rate": 2.3870838001616216e-06, "loss": 0.3367, "step": 41768 }, { "epoch": 0.7754629199882669, "grad_norm": 0.2685660719871521, "learning_rate": 2.3863274881596866e-06, "loss": 0.2314, "step": 41770 }, { "epoch": 0.7755000501256856, "grad_norm": 0.6494361162185669, "learning_rate": 2.3855712797570287e-06, "loss": 0.3372, "step": 41772 }, { "epoch": 0.7755371802631041, "grad_norm": 0.36974701285362244, "learning_rate": 2.384815174963938e-06, "loss": 0.2773, "step": 41774 }, { "epoch": 0.7755743104005228, "grad_norm": 0.3431699573993683, "learning_rate": 2.3840591737907038e-06, "loss": 0.2583, "step": 41776 }, { "epoch": 0.7756114405379414, "grad_norm": 0.21646954119205475, "learning_rate": 2.3833032762476103e-06, "loss": 0.4935, "step": 41778 }, { "epoch": 0.7756485706753601, "grad_norm": 0.467838853597641, "learning_rate": 2.382547482344948e-06, "loss": 0.1118, "step": 41780 }, { "epoch": 0.7756857008127787, "grad_norm": 0.4340430796146393, "learning_rate": 2.3817917920929946e-06, "loss": 0.4535, "step": 41782 }, { "epoch": 0.7757228309501973, "grad_norm": 0.4275455176830292, "learning_rate": 2.3810362055020365e-06, "loss": 0.3143, "step": 41784 }, { "epoch": 0.775759961087616, "grad_norm": 0.43500059843063354, "learning_rate": 2.3802807225823534e-06, "loss": 0.2285, "step": 41786 }, { "epoch": 0.7757970912250346, "grad_norm": 0.4928950369358063, "learning_rate": 2.379525343344229e-06, "loss": 0.1554, "step": 41788 }, { "epoch": 0.7758342213624533, "grad_norm": 0.4028860330581665, "learning_rate": 2.3787700677979363e-06, "loss": 0.1325, "step": 41790 }, { "epoch": 0.7758713514998719, "grad_norm": 0.3991203308105469, "learning_rate": 2.3780148959537564e-06, "loss": 0.2951, "step": 41792 }, { "epoch": 0.7759084816372905, "grad_norm": 0.4491865336894989, "learning_rate": 2.3772598278219618e-06, "loss": 0.2211, "step": 41794 }, { "epoch": 0.7759456117747092, "grad_norm": 0.361009806394577, "learning_rate": 2.3765048634128273e-06, "loss": 0.3317, "step": 41796 }, { "epoch": 0.7759827419121278, "grad_norm": 0.41626113653182983, "learning_rate": 2.3757500027366276e-06, "loss": 0.1571, "step": 41798 }, { "epoch": 0.7760198720495465, "grad_norm": 0.3314739465713501, "learning_rate": 2.374995245803632e-06, "loss": 0.379, "step": 41800 }, { "epoch": 0.776057002186965, "grad_norm": 0.5541840195655823, "learning_rate": 2.374240592624112e-06, "loss": 0.2985, "step": 41802 }, { "epoch": 0.7760941323243837, "grad_norm": 0.38556793332099915, "learning_rate": 2.3734860432083384e-06, "loss": 0.2697, "step": 41804 }, { "epoch": 0.7761312624618024, "grad_norm": 0.5331282615661621, "learning_rate": 2.372731597566573e-06, "loss": 0.2788, "step": 41806 }, { "epoch": 0.776168392599221, "grad_norm": 0.46881264448165894, "learning_rate": 2.371977255709085e-06, "loss": 0.2824, "step": 41808 }, { "epoch": 0.7762055227366397, "grad_norm": 0.28259921073913574, "learning_rate": 2.3712230176461394e-06, "loss": 0.2136, "step": 41810 }, { "epoch": 0.7762426528740582, "grad_norm": 0.4571775794029236, "learning_rate": 2.370468883387995e-06, "loss": 0.4443, "step": 41812 }, { "epoch": 0.7762797830114769, "grad_norm": 0.4409199655056, "learning_rate": 2.369714852944918e-06, "loss": 0.3909, "step": 41814 }, { "epoch": 0.7763169131488956, "grad_norm": 0.2831723988056183, "learning_rate": 2.368960926327164e-06, "loss": 0.2213, "step": 41816 }, { "epoch": 0.7763540432863142, "grad_norm": 0.6265026330947876, "learning_rate": 2.3682071035449984e-06, "loss": 0.3633, "step": 41818 }, { "epoch": 0.7763911734237329, "grad_norm": 0.2759802043437958, "learning_rate": 2.367453384608671e-06, "loss": 0.415, "step": 41820 }, { "epoch": 0.7764283035611514, "grad_norm": 0.4581483006477356, "learning_rate": 2.366699769528441e-06, "loss": 0.2496, "step": 41822 }, { "epoch": 0.7764654336985701, "grad_norm": 0.5159185528755188, "learning_rate": 2.365946258314563e-06, "loss": 0.4825, "step": 41824 }, { "epoch": 0.7765025638359887, "grad_norm": 0.5229365229606628, "learning_rate": 2.365192850977289e-06, "loss": 0.2857, "step": 41826 }, { "epoch": 0.7765396939734074, "grad_norm": 0.6025939583778381, "learning_rate": 2.3644395475268754e-06, "loss": 0.3303, "step": 41828 }, { "epoch": 0.7765768241108261, "grad_norm": 0.31592264771461487, "learning_rate": 2.363686347973565e-06, "loss": 0.2831, "step": 41830 }, { "epoch": 0.7766139542482446, "grad_norm": 0.2643633186817169, "learning_rate": 2.3629332523276126e-06, "loss": 0.3305, "step": 41832 }, { "epoch": 0.7766510843856633, "grad_norm": 0.2707612216472626, "learning_rate": 2.3621802605992605e-06, "loss": 0.2775, "step": 41834 }, { "epoch": 0.7766882145230819, "grad_norm": 0.4132586121559143, "learning_rate": 2.361427372798757e-06, "loss": 0.3679, "step": 41836 }, { "epoch": 0.7767253446605006, "grad_norm": 0.4120941758155823, "learning_rate": 2.3606745889363456e-06, "loss": 0.2227, "step": 41838 }, { "epoch": 0.7767624747979193, "grad_norm": 0.5422465801239014, "learning_rate": 2.3599219090222725e-06, "loss": 0.3482, "step": 41840 }, { "epoch": 0.7767996049353378, "grad_norm": 0.27213039994239807, "learning_rate": 2.35916933306678e-06, "loss": 0.2711, "step": 41842 }, { "epoch": 0.7768367350727565, "grad_norm": 0.4004543423652649, "learning_rate": 2.358416861080103e-06, "loss": 0.3512, "step": 41844 }, { "epoch": 0.7768738652101751, "grad_norm": 0.5901560187339783, "learning_rate": 2.3576644930724825e-06, "loss": 0.3286, "step": 41846 }, { "epoch": 0.7769109953475938, "grad_norm": 0.4954366087913513, "learning_rate": 2.3569122290541568e-06, "loss": 0.2201, "step": 41848 }, { "epoch": 0.7769481254850125, "grad_norm": 0.4452698528766632, "learning_rate": 2.356160069035366e-06, "loss": 0.2376, "step": 41850 }, { "epoch": 0.776985255622431, "grad_norm": 0.37313178181648254, "learning_rate": 2.355408013026337e-06, "loss": 0.2182, "step": 41852 }, { "epoch": 0.7770223857598497, "grad_norm": 0.43775200843811035, "learning_rate": 2.354656061037307e-06, "loss": 0.2139, "step": 41854 }, { "epoch": 0.7770595158972683, "grad_norm": 0.24441379308700562, "learning_rate": 2.3539042130785106e-06, "loss": 0.1814, "step": 41856 }, { "epoch": 0.777096646034687, "grad_norm": 0.3462609052658081, "learning_rate": 2.3531524691601714e-06, "loss": 0.2181, "step": 41858 }, { "epoch": 0.7771337761721057, "grad_norm": 0.31912118196487427, "learning_rate": 2.3524008292925237e-06, "loss": 0.2718, "step": 41860 }, { "epoch": 0.7771709063095242, "grad_norm": 0.23799973726272583, "learning_rate": 2.3516492934857925e-06, "loss": 0.1257, "step": 41862 }, { "epoch": 0.7772080364469429, "grad_norm": 0.445029079914093, "learning_rate": 2.3508978617502044e-06, "loss": 0.4255, "step": 41864 }, { "epoch": 0.7772451665843615, "grad_norm": 0.40211769938468933, "learning_rate": 2.350146534095986e-06, "loss": 0.2717, "step": 41866 }, { "epoch": 0.7772822967217802, "grad_norm": 0.38863760232925415, "learning_rate": 2.349395310533361e-06, "loss": 0.3589, "step": 41868 }, { "epoch": 0.7773194268591989, "grad_norm": 0.4855385720729828, "learning_rate": 2.3486441910725477e-06, "loss": 0.2991, "step": 41870 }, { "epoch": 0.7773565569966174, "grad_norm": 0.3380052149295807, "learning_rate": 2.3478931757237675e-06, "loss": 0.2445, "step": 41872 }, { "epoch": 0.7773936871340361, "grad_norm": 0.3508371114730835, "learning_rate": 2.3471422644972443e-06, "loss": 0.2756, "step": 41874 }, { "epoch": 0.7774308172714547, "grad_norm": 0.23072625696659088, "learning_rate": 2.346391457403189e-06, "loss": 0.1805, "step": 41876 }, { "epoch": 0.7774679474088734, "grad_norm": 0.3609980046749115, "learning_rate": 2.34564075445182e-06, "loss": 0.2276, "step": 41878 }, { "epoch": 0.777505077546292, "grad_norm": 0.30046242475509644, "learning_rate": 2.3448901556533565e-06, "loss": 0.1758, "step": 41880 }, { "epoch": 0.7775422076837106, "grad_norm": 0.2343037724494934, "learning_rate": 2.3441396610180044e-06, "loss": 0.1896, "step": 41882 }, { "epoch": 0.7775793378211293, "grad_norm": 0.48427316546440125, "learning_rate": 2.3433892705559803e-06, "loss": 0.2592, "step": 41884 }, { "epoch": 0.7776164679585479, "grad_norm": 0.2749733626842499, "learning_rate": 2.3426389842774945e-06, "loss": 0.2929, "step": 41886 }, { "epoch": 0.7776535980959666, "grad_norm": 0.3403457701206207, "learning_rate": 2.3418888021927544e-06, "loss": 0.1602, "step": 41888 }, { "epoch": 0.7776907282333851, "grad_norm": 0.5123617649078369, "learning_rate": 2.34113872431197e-06, "loss": 0.265, "step": 41890 }, { "epoch": 0.7777278583708038, "grad_norm": 0.4079894423484802, "learning_rate": 2.3403887506453495e-06, "loss": 0.161, "step": 41892 }, { "epoch": 0.7777649885082225, "grad_norm": 0.39733049273490906, "learning_rate": 2.3396388812030913e-06, "loss": 0.2258, "step": 41894 }, { "epoch": 0.7778021186456411, "grad_norm": 0.40713706612586975, "learning_rate": 2.3388891159954053e-06, "loss": 0.3025, "step": 41896 }, { "epoch": 0.7778392487830598, "grad_norm": 0.4357052445411682, "learning_rate": 2.3381394550324886e-06, "loss": 0.2517, "step": 41898 }, { "epoch": 0.7778763789204783, "grad_norm": 0.31053557991981506, "learning_rate": 2.3373898983245435e-06, "loss": 0.1198, "step": 41900 }, { "epoch": 0.777913509057897, "grad_norm": 0.31501707434654236, "learning_rate": 2.3366404458817705e-06, "loss": 0.2008, "step": 41902 }, { "epoch": 0.7779506391953157, "grad_norm": 0.26989200711250305, "learning_rate": 2.3358910977143657e-06, "loss": 0.484, "step": 41904 }, { "epoch": 0.7779877693327343, "grad_norm": 0.3833233714103699, "learning_rate": 2.33514185383253e-06, "loss": 0.2718, "step": 41906 }, { "epoch": 0.778024899470153, "grad_norm": 0.29126936197280884, "learning_rate": 2.334392714246452e-06, "loss": 0.1786, "step": 41908 }, { "epoch": 0.7780620296075715, "grad_norm": 0.4694153368473053, "learning_rate": 2.3336436789663276e-06, "loss": 0.2016, "step": 41910 }, { "epoch": 0.7780991597449902, "grad_norm": 0.4486958682537079, "learning_rate": 2.332894748002349e-06, "loss": 0.2268, "step": 41912 }, { "epoch": 0.7781362898824089, "grad_norm": 0.4123126268386841, "learning_rate": 2.33214592136471e-06, "loss": 0.2729, "step": 41914 }, { "epoch": 0.7781734200198275, "grad_norm": 0.27961716055870056, "learning_rate": 2.3313971990635943e-06, "loss": 0.184, "step": 41916 }, { "epoch": 0.7782105501572462, "grad_norm": 0.5163271427154541, "learning_rate": 2.3306485811091962e-06, "loss": 0.2869, "step": 41918 }, { "epoch": 0.7782476802946647, "grad_norm": 0.56910240650177, "learning_rate": 2.3299000675116954e-06, "loss": 0.2688, "step": 41920 }, { "epoch": 0.7782848104320834, "grad_norm": 0.34338659048080444, "learning_rate": 2.32915165828128e-06, "loss": 0.4302, "step": 41922 }, { "epoch": 0.7783219405695021, "grad_norm": 0.4305938184261322, "learning_rate": 2.3284033534281334e-06, "loss": 0.4123, "step": 41924 }, { "epoch": 0.7783590707069207, "grad_norm": 0.8954707384109497, "learning_rate": 2.327655152962438e-06, "loss": 0.2256, "step": 41926 }, { "epoch": 0.7783962008443394, "grad_norm": 0.6107361912727356, "learning_rate": 2.326907056894375e-06, "loss": 0.2014, "step": 41928 }, { "epoch": 0.7784333309817579, "grad_norm": 0.38959434628486633, "learning_rate": 2.3261590652341257e-06, "loss": 0.3396, "step": 41930 }, { "epoch": 0.7784704611191766, "grad_norm": 0.38291648030281067, "learning_rate": 2.325411177991864e-06, "loss": 0.2578, "step": 41932 }, { "epoch": 0.7785075912565952, "grad_norm": 0.40688517689704895, "learning_rate": 2.3246633951777675e-06, "loss": 0.3241, "step": 41934 }, { "epoch": 0.7785447213940139, "grad_norm": 0.9046632647514343, "learning_rate": 2.323915716802014e-06, "loss": 0.3008, "step": 41936 }, { "epoch": 0.7785818515314326, "grad_norm": 0.5526497960090637, "learning_rate": 2.323168142874773e-06, "loss": 0.2242, "step": 41938 }, { "epoch": 0.7786189816688511, "grad_norm": 0.43880075216293335, "learning_rate": 2.3224206734062196e-06, "loss": 0.2335, "step": 41940 }, { "epoch": 0.7786561118062698, "grad_norm": 0.4893726408481598, "learning_rate": 2.3216733084065223e-06, "loss": 0.5103, "step": 41942 }, { "epoch": 0.7786932419436884, "grad_norm": 0.3787483870983124, "learning_rate": 2.3209260478858552e-06, "loss": 0.3352, "step": 41944 }, { "epoch": 0.7787303720811071, "grad_norm": 0.3361474275588989, "learning_rate": 2.320178891854381e-06, "loss": 0.1912, "step": 41946 }, { "epoch": 0.7787675022185258, "grad_norm": 0.4269227087497711, "learning_rate": 2.319431840322268e-06, "loss": 0.3048, "step": 41948 }, { "epoch": 0.7788046323559443, "grad_norm": 0.3966909945011139, "learning_rate": 2.318684893299682e-06, "loss": 0.2092, "step": 41950 }, { "epoch": 0.778841762493363, "grad_norm": 0.42484644055366516, "learning_rate": 2.3179380507967853e-06, "loss": 0.1543, "step": 41952 }, { "epoch": 0.7788788926307816, "grad_norm": 0.18529032170772552, "learning_rate": 2.3171913128237424e-06, "loss": 0.2175, "step": 41954 }, { "epoch": 0.7789160227682003, "grad_norm": 0.303256094455719, "learning_rate": 2.3164446793907146e-06, "loss": 0.2586, "step": 41956 }, { "epoch": 0.778953152905619, "grad_norm": 0.4764872193336487, "learning_rate": 2.3156981505078614e-06, "loss": 0.2801, "step": 41958 }, { "epoch": 0.7789902830430375, "grad_norm": 0.2969540059566498, "learning_rate": 2.314951726185336e-06, "loss": 0.3265, "step": 41960 }, { "epoch": 0.7790274131804562, "grad_norm": 0.5049210786819458, "learning_rate": 2.3142054064332973e-06, "loss": 0.2828, "step": 41962 }, { "epoch": 0.7790645433178748, "grad_norm": 0.30695393681526184, "learning_rate": 2.313459191261902e-06, "loss": 0.1335, "step": 41964 }, { "epoch": 0.7791016734552935, "grad_norm": 0.3008454144001007, "learning_rate": 2.3127130806813037e-06, "loss": 0.1898, "step": 41966 }, { "epoch": 0.7791388035927121, "grad_norm": 0.5297009944915771, "learning_rate": 2.3119670747016565e-06, "loss": 0.3236, "step": 41968 }, { "epoch": 0.7791759337301307, "grad_norm": 0.42279383540153503, "learning_rate": 2.311221173333106e-06, "loss": 0.2575, "step": 41970 }, { "epoch": 0.7792130638675494, "grad_norm": 0.4686029851436615, "learning_rate": 2.3104753765858056e-06, "loss": 0.1267, "step": 41972 }, { "epoch": 0.779250194004968, "grad_norm": 0.39017078280448914, "learning_rate": 2.3097296844699015e-06, "loss": 0.4226, "step": 41974 }, { "epoch": 0.7792873241423867, "grad_norm": 0.35871565341949463, "learning_rate": 2.3089840969955425e-06, "loss": 0.3409, "step": 41976 }, { "epoch": 0.7793244542798052, "grad_norm": 0.34710395336151123, "learning_rate": 2.308238614172875e-06, "loss": 0.3553, "step": 41978 }, { "epoch": 0.7793615844172239, "grad_norm": 0.31683486700057983, "learning_rate": 2.307493236012037e-06, "loss": 0.211, "step": 41980 }, { "epoch": 0.7793987145546426, "grad_norm": 0.3108522593975067, "learning_rate": 2.306747962523178e-06, "loss": 0.2902, "step": 41982 }, { "epoch": 0.7794358446920612, "grad_norm": 0.472512811422348, "learning_rate": 2.306002793716432e-06, "loss": 0.2839, "step": 41984 }, { "epoch": 0.7794729748294799, "grad_norm": 0.14931365847587585, "learning_rate": 2.305257729601942e-06, "loss": 0.0513, "step": 41986 }, { "epoch": 0.7795101049668984, "grad_norm": 0.5165044069290161, "learning_rate": 2.304512770189846e-06, "loss": 0.2634, "step": 41988 }, { "epoch": 0.7795472351043171, "grad_norm": 0.29606005549430847, "learning_rate": 2.3037679154902805e-06, "loss": 0.2472, "step": 41990 }, { "epoch": 0.7795843652417358, "grad_norm": 0.2096748650074005, "learning_rate": 2.3030231655133806e-06, "loss": 0.3337, "step": 41992 }, { "epoch": 0.7796214953791544, "grad_norm": 0.5780453085899353, "learning_rate": 2.302278520269283e-06, "loss": 0.1443, "step": 41994 }, { "epoch": 0.7796586255165731, "grad_norm": 0.5483688712120056, "learning_rate": 2.3015339797681158e-06, "loss": 0.3706, "step": 41996 }, { "epoch": 0.7796957556539916, "grad_norm": 0.6324063539505005, "learning_rate": 2.30078954402001e-06, "loss": 0.2967, "step": 41998 }, { "epoch": 0.7797328857914103, "grad_norm": 0.29727110266685486, "learning_rate": 2.3000452130351e-06, "loss": 0.1619, "step": 42000 }, { "epoch": 0.779770015928829, "grad_norm": 0.4517129957675934, "learning_rate": 2.2993009868235084e-06, "loss": 0.2119, "step": 42002 }, { "epoch": 0.7798071460662476, "grad_norm": 0.37397605180740356, "learning_rate": 2.2985568653953637e-06, "loss": 0.3, "step": 42004 }, { "epoch": 0.7798442762036663, "grad_norm": 0.5794094800949097, "learning_rate": 2.2978128487607943e-06, "loss": 0.15, "step": 42006 }, { "epoch": 0.7798814063410848, "grad_norm": 0.5519453883171082, "learning_rate": 2.297068936929918e-06, "loss": 0.2503, "step": 42008 }, { "epoch": 0.7799185364785035, "grad_norm": 0.28805795311927795, "learning_rate": 2.296325129912861e-06, "loss": 0.2611, "step": 42010 }, { "epoch": 0.7799556666159222, "grad_norm": 0.368137925863266, "learning_rate": 2.295581427719744e-06, "loss": 0.2944, "step": 42012 }, { "epoch": 0.7799927967533408, "grad_norm": 0.23401226103305817, "learning_rate": 2.2948378303606855e-06, "loss": 0.2212, "step": 42014 }, { "epoch": 0.7800299268907595, "grad_norm": 0.4040607810020447, "learning_rate": 2.294094337845806e-06, "loss": 0.3175, "step": 42016 }, { "epoch": 0.780067057028178, "grad_norm": 0.4615797698497772, "learning_rate": 2.293350950185219e-06, "loss": 0.3436, "step": 42018 }, { "epoch": 0.7801041871655967, "grad_norm": 0.9331826567649841, "learning_rate": 2.292607667389045e-06, "loss": 0.3342, "step": 42020 }, { "epoch": 0.7801413173030154, "grad_norm": 0.34921762347221375, "learning_rate": 2.2918644894673948e-06, "loss": 0.153, "step": 42022 }, { "epoch": 0.780178447440434, "grad_norm": 0.2864750623703003, "learning_rate": 2.2911214164303776e-06, "loss": 0.2322, "step": 42024 }, { "epoch": 0.7802155775778526, "grad_norm": 0.2754424512386322, "learning_rate": 2.2903784482881063e-06, "loss": 0.2532, "step": 42026 }, { "epoch": 0.7802527077152712, "grad_norm": 0.3516775965690613, "learning_rate": 2.2896355850506923e-06, "loss": 0.2639, "step": 42028 }, { "epoch": 0.7802898378526899, "grad_norm": 1.0208972692489624, "learning_rate": 2.2888928267282428e-06, "loss": 0.1987, "step": 42030 }, { "epoch": 0.7803269679901085, "grad_norm": 0.47181200981140137, "learning_rate": 2.2881501733308663e-06, "loss": 0.2332, "step": 42032 }, { "epoch": 0.7803640981275272, "grad_norm": 0.3775668740272522, "learning_rate": 2.2874076248686637e-06, "loss": 0.2129, "step": 42034 }, { "epoch": 0.7804012282649458, "grad_norm": 0.4194645583629608, "learning_rate": 2.2866651813517417e-06, "loss": 0.315, "step": 42036 }, { "epoch": 0.7804383584023644, "grad_norm": 0.47878801822662354, "learning_rate": 2.2859228427902026e-06, "loss": 0.2684, "step": 42038 }, { "epoch": 0.7804754885397831, "grad_norm": 0.6330925822257996, "learning_rate": 2.2851806091941476e-06, "loss": 0.248, "step": 42040 }, { "epoch": 0.7805126186772017, "grad_norm": 0.5842950344085693, "learning_rate": 2.2844384805736776e-06, "loss": 0.2271, "step": 42042 }, { "epoch": 0.7805497488146204, "grad_norm": 0.49504387378692627, "learning_rate": 2.2836964569388895e-06, "loss": 0.3843, "step": 42044 }, { "epoch": 0.780586878952039, "grad_norm": 0.4040229618549347, "learning_rate": 2.2829545382998777e-06, "loss": 0.2696, "step": 42046 }, { "epoch": 0.7806240090894576, "grad_norm": 0.20205792784690857, "learning_rate": 2.282212724666739e-06, "loss": 0.2228, "step": 42048 }, { "epoch": 0.7806611392268763, "grad_norm": 0.46668437123298645, "learning_rate": 2.281471016049568e-06, "loss": 0.3114, "step": 42050 }, { "epoch": 0.7806982693642949, "grad_norm": 0.6821794509887695, "learning_rate": 2.2807294124584557e-06, "loss": 0.3434, "step": 42052 }, { "epoch": 0.7807353995017136, "grad_norm": 0.4331595003604889, "learning_rate": 2.2799879139034943e-06, "loss": 0.4891, "step": 42054 }, { "epoch": 0.7807725296391322, "grad_norm": 0.41589832305908203, "learning_rate": 2.2792465203947744e-06, "loss": 0.2665, "step": 42056 }, { "epoch": 0.7808096597765508, "grad_norm": 0.5310564041137695, "learning_rate": 2.278505231942385e-06, "loss": 0.4039, "step": 42058 }, { "epoch": 0.7808467899139695, "grad_norm": 0.38395994901657104, "learning_rate": 2.277764048556408e-06, "loss": 0.2773, "step": 42060 }, { "epoch": 0.7808839200513881, "grad_norm": 0.5153673887252808, "learning_rate": 2.2770229702469314e-06, "loss": 0.1792, "step": 42062 }, { "epoch": 0.7809210501888068, "grad_norm": 0.2802532911300659, "learning_rate": 2.2762819970240425e-06, "loss": 0.1351, "step": 42064 }, { "epoch": 0.7809581803262254, "grad_norm": 0.30472829937934875, "learning_rate": 2.275541128897818e-06, "loss": 0.4752, "step": 42066 }, { "epoch": 0.780995310463644, "grad_norm": 0.30499258637428284, "learning_rate": 2.2748003658783403e-06, "loss": 0.1809, "step": 42068 }, { "epoch": 0.7810324406010627, "grad_norm": 0.39047542214393616, "learning_rate": 2.274059707975693e-06, "loss": 0.1683, "step": 42070 }, { "epoch": 0.7810695707384813, "grad_norm": 0.5352392196655273, "learning_rate": 2.273319155199949e-06, "loss": 0.1672, "step": 42072 }, { "epoch": 0.7811067008759, "grad_norm": 0.39798739552497864, "learning_rate": 2.2725787075611873e-06, "loss": 0.3741, "step": 42074 }, { "epoch": 0.7811438310133186, "grad_norm": 0.2674466669559479, "learning_rate": 2.271838365069482e-06, "loss": 0.1709, "step": 42076 }, { "epoch": 0.7811809611507372, "grad_norm": 0.7873106598854065, "learning_rate": 2.2710981277349085e-06, "loss": 0.2155, "step": 42078 }, { "epoch": 0.7812180912881559, "grad_norm": 0.6064042448997498, "learning_rate": 2.2703579955675393e-06, "loss": 0.2535, "step": 42080 }, { "epoch": 0.7812552214255745, "grad_norm": 0.4949207603931427, "learning_rate": 2.2696179685774467e-06, "loss": 0.4307, "step": 42082 }, { "epoch": 0.7812923515629931, "grad_norm": 0.3666563928127289, "learning_rate": 2.268878046774696e-06, "loss": 0.198, "step": 42084 }, { "epoch": 0.7813294817004117, "grad_norm": 0.3478747606277466, "learning_rate": 2.268138230169361e-06, "loss": 0.23, "step": 42086 }, { "epoch": 0.7813666118378304, "grad_norm": 0.4900912046432495, "learning_rate": 2.2673985187715018e-06, "loss": 0.4835, "step": 42088 }, { "epoch": 0.7814037419752491, "grad_norm": 0.25482162833213806, "learning_rate": 2.2666589125911865e-06, "loss": 0.2716, "step": 42090 }, { "epoch": 0.7814408721126677, "grad_norm": 0.4350324869155884, "learning_rate": 2.2659194116384807e-06, "loss": 0.1359, "step": 42092 }, { "epoch": 0.7814780022500863, "grad_norm": 0.6658808588981628, "learning_rate": 2.265180015923447e-06, "loss": 0.2899, "step": 42094 }, { "epoch": 0.7815151323875049, "grad_norm": 0.46713539958000183, "learning_rate": 2.264440725456143e-06, "loss": 0.4535, "step": 42096 }, { "epoch": 0.7815522625249236, "grad_norm": 0.5268535614013672, "learning_rate": 2.263701540246629e-06, "loss": 0.4016, "step": 42098 }, { "epoch": 0.7815893926623423, "grad_norm": 0.27765798568725586, "learning_rate": 2.262962460304965e-06, "loss": 0.2465, "step": 42100 }, { "epoch": 0.7816265227997609, "grad_norm": 0.24521902203559875, "learning_rate": 2.262223485641206e-06, "loss": 0.3014, "step": 42102 }, { "epoch": 0.7816636529371795, "grad_norm": 0.23071905970573425, "learning_rate": 2.2614846162654114e-06, "loss": 0.2213, "step": 42104 }, { "epoch": 0.7817007830745981, "grad_norm": 0.32160794734954834, "learning_rate": 2.260745852187628e-06, "loss": 0.2285, "step": 42106 }, { "epoch": 0.7817379132120168, "grad_norm": 0.5220719575881958, "learning_rate": 2.2600071934179156e-06, "loss": 0.4042, "step": 42108 }, { "epoch": 0.7817750433494355, "grad_norm": 0.27637115120887756, "learning_rate": 2.2592686399663176e-06, "loss": 0.4533, "step": 42110 }, { "epoch": 0.7818121734868541, "grad_norm": 0.2628991901874542, "learning_rate": 2.258530191842888e-06, "loss": 0.3948, "step": 42112 }, { "epoch": 0.7818493036242727, "grad_norm": 0.37218040227890015, "learning_rate": 2.2577918490576747e-06, "loss": 0.2119, "step": 42114 }, { "epoch": 0.7818864337616913, "grad_norm": 0.4413298964500427, "learning_rate": 2.257053611620723e-06, "loss": 0.363, "step": 42116 }, { "epoch": 0.78192356389911, "grad_norm": 0.5194867253303528, "learning_rate": 2.2563154795420785e-06, "loss": 0.2652, "step": 42118 }, { "epoch": 0.7819606940365287, "grad_norm": 0.555852472782135, "learning_rate": 2.2555774528317885e-06, "loss": 0.1859, "step": 42120 }, { "epoch": 0.7819978241739473, "grad_norm": 0.4232317805290222, "learning_rate": 2.2548395314998895e-06, "loss": 0.185, "step": 42122 }, { "epoch": 0.7820349543113659, "grad_norm": 0.43475258350372314, "learning_rate": 2.254101715556425e-06, "loss": 0.3747, "step": 42124 }, { "epoch": 0.7820720844487845, "grad_norm": 0.4701539874076843, "learning_rate": 2.2533640050114338e-06, "loss": 0.2549, "step": 42126 }, { "epoch": 0.7821092145862032, "grad_norm": 0.45172321796417236, "learning_rate": 2.2526263998749575e-06, "loss": 0.4192, "step": 42128 }, { "epoch": 0.7821463447236218, "grad_norm": 0.3604142367839813, "learning_rate": 2.2518889001570276e-06, "loss": 0.3117, "step": 42130 }, { "epoch": 0.7821834748610405, "grad_norm": 0.5219382643699646, "learning_rate": 2.2511515058676848e-06, "loss": 0.4455, "step": 42132 }, { "epoch": 0.7822206049984591, "grad_norm": 0.4895544648170471, "learning_rate": 2.2504142170169554e-06, "loss": 0.3093, "step": 42134 }, { "epoch": 0.7822577351358777, "grad_norm": 0.25226712226867676, "learning_rate": 2.2496770336148767e-06, "loss": 0.3284, "step": 42136 }, { "epoch": 0.7822948652732964, "grad_norm": 0.37229016423225403, "learning_rate": 2.2489399556714786e-06, "loss": 0.2861, "step": 42138 }, { "epoch": 0.782331995410715, "grad_norm": 0.28121745586395264, "learning_rate": 2.24820298319679e-06, "loss": 0.2673, "step": 42140 }, { "epoch": 0.7823691255481336, "grad_norm": 0.527967095375061, "learning_rate": 2.2474661162008404e-06, "loss": 0.2407, "step": 42142 }, { "epoch": 0.7824062556855523, "grad_norm": 0.46580854058265686, "learning_rate": 2.2467293546936543e-06, "loss": 0.3723, "step": 42144 }, { "epoch": 0.7824433858229709, "grad_norm": 0.2727673053741455, "learning_rate": 2.245992698685262e-06, "loss": 0.3469, "step": 42146 }, { "epoch": 0.7824805159603896, "grad_norm": 0.7394891977310181, "learning_rate": 2.2452561481856794e-06, "loss": 0.3744, "step": 42148 }, { "epoch": 0.7825176460978082, "grad_norm": 0.5637769103050232, "learning_rate": 2.244519703204936e-06, "loss": 0.1769, "step": 42150 }, { "epoch": 0.7825547762352268, "grad_norm": 0.339983195066452, "learning_rate": 2.243783363753045e-06, "loss": 0.2278, "step": 42152 }, { "epoch": 0.7825919063726455, "grad_norm": 0.4092691242694855, "learning_rate": 2.2430471298400315e-06, "loss": 0.2464, "step": 42154 }, { "epoch": 0.7826290365100641, "grad_norm": 0.35632917284965515, "learning_rate": 2.2423110014759106e-06, "loss": 0.2528, "step": 42156 }, { "epoch": 0.7826661666474828, "grad_norm": 0.36183103919029236, "learning_rate": 2.241574978670703e-06, "loss": 0.3273, "step": 42158 }, { "epoch": 0.7827032967849014, "grad_norm": 0.35036203265190125, "learning_rate": 2.240839061434419e-06, "loss": 0.2789, "step": 42160 }, { "epoch": 0.78274042692232, "grad_norm": 0.3006502389907837, "learning_rate": 2.2401032497770725e-06, "loss": 0.3745, "step": 42162 }, { "epoch": 0.7827775570597387, "grad_norm": 0.4061864912509918, "learning_rate": 2.239367543708678e-06, "loss": 0.2231, "step": 42164 }, { "epoch": 0.7828146871971573, "grad_norm": 0.39592668414115906, "learning_rate": 2.2386319432392457e-06, "loss": 0.2048, "step": 42166 }, { "epoch": 0.782851817334576, "grad_norm": 0.2266625612974167, "learning_rate": 2.237896448378787e-06, "loss": 0.2765, "step": 42168 }, { "epoch": 0.7828889474719946, "grad_norm": 0.363943487405777, "learning_rate": 2.237161059137305e-06, "loss": 0.1758, "step": 42170 }, { "epoch": 0.7829260776094132, "grad_norm": 0.43294623494148254, "learning_rate": 2.236425775524811e-06, "loss": 0.358, "step": 42172 }, { "epoch": 0.7829632077468319, "grad_norm": 0.42422956228256226, "learning_rate": 2.2356905975513042e-06, "loss": 0.1218, "step": 42174 }, { "epoch": 0.7830003378842505, "grad_norm": 0.5290468335151672, "learning_rate": 2.2349555252267928e-06, "loss": 0.4254, "step": 42176 }, { "epoch": 0.7830374680216692, "grad_norm": 0.2562585175037384, "learning_rate": 2.2342205585612775e-06, "loss": 0.2152, "step": 42178 }, { "epoch": 0.7830745981590878, "grad_norm": 0.632354199886322, "learning_rate": 2.233485697564758e-06, "loss": 0.4759, "step": 42180 }, { "epoch": 0.7831117282965064, "grad_norm": 0.588380753993988, "learning_rate": 2.232750942247236e-06, "loss": 0.31, "step": 42182 }, { "epoch": 0.783148858433925, "grad_norm": 0.43167319893836975, "learning_rate": 2.2320162926187108e-06, "loss": 0.3145, "step": 42184 }, { "epoch": 0.7831859885713437, "grad_norm": 0.4440044164657593, "learning_rate": 2.2312817486891723e-06, "loss": 0.2789, "step": 42186 }, { "epoch": 0.7832231187087624, "grad_norm": 0.31821054220199585, "learning_rate": 2.2305473104686205e-06, "loss": 0.3079, "step": 42188 }, { "epoch": 0.783260248846181, "grad_norm": 0.38078948855400085, "learning_rate": 2.2298129779670497e-06, "loss": 0.3356, "step": 42190 }, { "epoch": 0.7832973789835996, "grad_norm": 0.32156088948249817, "learning_rate": 2.2290787511944467e-06, "loss": 0.2949, "step": 42192 }, { "epoch": 0.7833345091210182, "grad_norm": 0.29858896136283875, "learning_rate": 2.2283446301608056e-06, "loss": 0.3889, "step": 42194 }, { "epoch": 0.7833716392584369, "grad_norm": 0.32905834913253784, "learning_rate": 2.227610614876119e-06, "loss": 0.2349, "step": 42196 }, { "epoch": 0.7834087693958556, "grad_norm": 0.3558305501937866, "learning_rate": 2.226876705350367e-06, "loss": 0.3838, "step": 42198 }, { "epoch": 0.7834458995332741, "grad_norm": 0.5639286637306213, "learning_rate": 2.22614290159354e-06, "loss": 0.2484, "step": 42200 }, { "epoch": 0.7834830296706928, "grad_norm": 0.4013896882534027, "learning_rate": 2.2254092036156226e-06, "loss": 0.2619, "step": 42202 }, { "epoch": 0.7835201598081114, "grad_norm": 0.46153512597084045, "learning_rate": 2.2246756114265986e-06, "loss": 0.1666, "step": 42204 }, { "epoch": 0.7835572899455301, "grad_norm": 0.49233052134513855, "learning_rate": 2.22394212503645e-06, "loss": 0.3297, "step": 42206 }, { "epoch": 0.7835944200829488, "grad_norm": 0.4743593633174896, "learning_rate": 2.2232087444551596e-06, "loss": 0.1964, "step": 42208 }, { "epoch": 0.7836315502203673, "grad_norm": 0.49502843618392944, "learning_rate": 2.2224754696927007e-06, "loss": 0.2462, "step": 42210 }, { "epoch": 0.783668680357786, "grad_norm": 0.3522895276546478, "learning_rate": 2.2217423007590575e-06, "loss": 0.2152, "step": 42212 }, { "epoch": 0.7837058104952046, "grad_norm": 0.43937209248542786, "learning_rate": 2.2210092376642012e-06, "loss": 0.2305, "step": 42214 }, { "epoch": 0.7837429406326233, "grad_norm": 0.5793728828430176, "learning_rate": 2.220276280418108e-06, "loss": 0.4118, "step": 42216 }, { "epoch": 0.783780070770042, "grad_norm": 0.34709978103637695, "learning_rate": 2.2195434290307507e-06, "loss": 0.3892, "step": 42218 }, { "epoch": 0.7838172009074605, "grad_norm": 0.4877430498600006, "learning_rate": 2.218810683512105e-06, "loss": 0.1858, "step": 42220 }, { "epoch": 0.7838543310448792, "grad_norm": 0.20193272829055786, "learning_rate": 2.218078043872136e-06, "loss": 0.1909, "step": 42222 }, { "epoch": 0.7838914611822978, "grad_norm": 0.6479068994522095, "learning_rate": 2.217345510120816e-06, "loss": 0.1777, "step": 42224 }, { "epoch": 0.7839285913197165, "grad_norm": 0.3782392740249634, "learning_rate": 2.216613082268111e-06, "loss": 0.3121, "step": 42226 }, { "epoch": 0.7839657214571352, "grad_norm": 0.5034322142601013, "learning_rate": 2.2158807603239883e-06, "loss": 0.1657, "step": 42228 }, { "epoch": 0.7840028515945537, "grad_norm": 0.6439116597175598, "learning_rate": 2.2151485442984123e-06, "loss": 0.2578, "step": 42230 }, { "epoch": 0.7840399817319724, "grad_norm": 0.3102177679538727, "learning_rate": 2.2144164342013495e-06, "loss": 0.2814, "step": 42232 }, { "epoch": 0.784077111869391, "grad_norm": 0.43839606642723083, "learning_rate": 2.213684430042755e-06, "loss": 0.4285, "step": 42234 }, { "epoch": 0.7841142420068097, "grad_norm": 0.3004555404186249, "learning_rate": 2.2129525318325962e-06, "loss": 0.2383, "step": 42236 }, { "epoch": 0.7841513721442283, "grad_norm": 0.4291225075721741, "learning_rate": 2.212220739580825e-06, "loss": 0.301, "step": 42238 }, { "epoch": 0.7841885022816469, "grad_norm": 0.2695433497428894, "learning_rate": 2.2114890532974033e-06, "loss": 0.1461, "step": 42240 }, { "epoch": 0.7842256324190656, "grad_norm": 0.7617182731628418, "learning_rate": 2.2107574729922855e-06, "loss": 0.2494, "step": 42242 }, { "epoch": 0.7842627625564842, "grad_norm": 0.5847040414810181, "learning_rate": 2.2100259986754267e-06, "loss": 0.27, "step": 42244 }, { "epoch": 0.7842998926939029, "grad_norm": 0.3790019750595093, "learning_rate": 2.2092946303567842e-06, "loss": 0.2949, "step": 42246 }, { "epoch": 0.7843370228313215, "grad_norm": 0.38911423087120056, "learning_rate": 2.2085633680463026e-06, "loss": 0.2054, "step": 42248 }, { "epoch": 0.7843741529687401, "grad_norm": 0.2824215292930603, "learning_rate": 2.2078322117539363e-06, "loss": 0.1621, "step": 42250 }, { "epoch": 0.7844112831061588, "grad_norm": 0.37508857250213623, "learning_rate": 2.207101161489632e-06, "loss": 0.2754, "step": 42252 }, { "epoch": 0.7844484132435774, "grad_norm": 0.45144888758659363, "learning_rate": 2.2063702172633417e-06, "loss": 0.1477, "step": 42254 }, { "epoch": 0.7844855433809961, "grad_norm": 0.4616270661354065, "learning_rate": 2.205639379085006e-06, "loss": 0.3076, "step": 42256 }, { "epoch": 0.7845226735184146, "grad_norm": 0.3871453106403351, "learning_rate": 2.204908646964574e-06, "loss": 0.1862, "step": 42258 }, { "epoch": 0.7845598036558333, "grad_norm": 0.2531983256340027, "learning_rate": 2.2041780209119833e-06, "loss": 0.2268, "step": 42260 }, { "epoch": 0.784596933793252, "grad_norm": 0.227292999625206, "learning_rate": 2.203447500937178e-06, "loss": 0.2692, "step": 42262 }, { "epoch": 0.7846340639306706, "grad_norm": 0.6295682787895203, "learning_rate": 2.2027170870501002e-06, "loss": 0.1114, "step": 42264 }, { "epoch": 0.7846711940680893, "grad_norm": 0.45311617851257324, "learning_rate": 2.2019867792606865e-06, "loss": 0.2884, "step": 42266 }, { "epoch": 0.7847083242055078, "grad_norm": 0.45629945397377014, "learning_rate": 2.2012565775788754e-06, "loss": 0.2424, "step": 42268 }, { "epoch": 0.7847454543429265, "grad_norm": 0.6066263318061829, "learning_rate": 2.2005264820146012e-06, "loss": 0.3604, "step": 42270 }, { "epoch": 0.7847825844803452, "grad_norm": 0.3385358452796936, "learning_rate": 2.1997964925778028e-06, "loss": 0.2448, "step": 42272 }, { "epoch": 0.7848197146177638, "grad_norm": 0.27375373244285583, "learning_rate": 2.1990666092784076e-06, "loss": 0.3813, "step": 42274 }, { "epoch": 0.7848568447551825, "grad_norm": 0.20720945298671722, "learning_rate": 2.198336832126352e-06, "loss": 0.2247, "step": 42276 }, { "epoch": 0.784893974892601, "grad_norm": 0.5400685667991638, "learning_rate": 2.19760716113156e-06, "loss": 0.2763, "step": 42278 }, { "epoch": 0.7849311050300197, "grad_norm": 0.5421027541160583, "learning_rate": 2.1968775963039647e-06, "loss": 0.2893, "step": 42280 }, { "epoch": 0.7849682351674383, "grad_norm": 0.3093223571777344, "learning_rate": 2.196148137653491e-06, "loss": 0.2853, "step": 42282 }, { "epoch": 0.785005365304857, "grad_norm": 0.43486881256103516, "learning_rate": 2.19541878519007e-06, "loss": 0.2939, "step": 42284 }, { "epoch": 0.7850424954422757, "grad_norm": 0.35611236095428467, "learning_rate": 2.1946895389236177e-06, "loss": 0.2443, "step": 42286 }, { "epoch": 0.7850796255796942, "grad_norm": 0.3992459177970886, "learning_rate": 2.1939603988640623e-06, "loss": 0.1532, "step": 42288 }, { "epoch": 0.7851167557171129, "grad_norm": 0.4848586916923523, "learning_rate": 2.1932313650213234e-06, "loss": 0.3774, "step": 42290 }, { "epoch": 0.7851538858545315, "grad_norm": 0.7071346044540405, "learning_rate": 2.192502437405322e-06, "loss": 0.2309, "step": 42292 }, { "epoch": 0.7851910159919502, "grad_norm": 0.39986705780029297, "learning_rate": 2.1917736160259763e-06, "loss": 0.1922, "step": 42294 }, { "epoch": 0.7852281461293689, "grad_norm": 0.4086349606513977, "learning_rate": 2.1910449008932057e-06, "loss": 0.3061, "step": 42296 }, { "epoch": 0.7852652762667874, "grad_norm": 0.3096737265586853, "learning_rate": 2.190316292016924e-06, "loss": 0.1053, "step": 42298 }, { "epoch": 0.7853024064042061, "grad_norm": 0.2733769416809082, "learning_rate": 2.1895877894070415e-06, "loss": 0.3471, "step": 42300 }, { "epoch": 0.7853395365416247, "grad_norm": 1.0041214227676392, "learning_rate": 2.188859393073475e-06, "loss": 0.0803, "step": 42302 }, { "epoch": 0.7853766666790434, "grad_norm": 0.5119099617004395, "learning_rate": 2.1881311030261345e-06, "loss": 0.3452, "step": 42304 }, { "epoch": 0.7854137968164621, "grad_norm": 0.3575119972229004, "learning_rate": 2.18740291927493e-06, "loss": 0.3916, "step": 42306 }, { "epoch": 0.7854509269538806, "grad_norm": 0.368917852640152, "learning_rate": 2.1866748418297724e-06, "loss": 0.1356, "step": 42308 }, { "epoch": 0.7854880570912993, "grad_norm": 0.35192573070526123, "learning_rate": 2.185946870700567e-06, "loss": 0.1853, "step": 42310 }, { "epoch": 0.7855251872287179, "grad_norm": 0.468219131231308, "learning_rate": 2.1852190058972176e-06, "loss": 0.2264, "step": 42312 }, { "epoch": 0.7855623173661366, "grad_norm": 0.27757829427719116, "learning_rate": 2.1844912474296288e-06, "loss": 0.1853, "step": 42314 }, { "epoch": 0.7855994475035553, "grad_norm": 0.41886138916015625, "learning_rate": 2.183763595307704e-06, "loss": 0.536, "step": 42316 }, { "epoch": 0.7856365776409738, "grad_norm": 0.35210514068603516, "learning_rate": 2.183036049541347e-06, "loss": 0.2265, "step": 42318 }, { "epoch": 0.7856737077783925, "grad_norm": 0.3935351073741913, "learning_rate": 2.1823086101404524e-06, "loss": 0.2188, "step": 42320 }, { "epoch": 0.7857108379158111, "grad_norm": 0.26120105385780334, "learning_rate": 2.1815812771149235e-06, "loss": 0.1932, "step": 42322 }, { "epoch": 0.7857479680532298, "grad_norm": 0.3296315371990204, "learning_rate": 2.1808540504746524e-06, "loss": 0.1641, "step": 42324 }, { "epoch": 0.7857850981906485, "grad_norm": 0.31115928292274475, "learning_rate": 2.180126930229536e-06, "loss": 0.3933, "step": 42326 }, { "epoch": 0.785822228328067, "grad_norm": 0.3197717070579529, "learning_rate": 2.1793999163894695e-06, "loss": 0.249, "step": 42328 }, { "epoch": 0.7858593584654857, "grad_norm": 0.4750738739967346, "learning_rate": 2.178673008964344e-06, "loss": 0.3761, "step": 42330 }, { "epoch": 0.7858964886029043, "grad_norm": 0.232594296336174, "learning_rate": 2.1779462079640513e-06, "loss": 0.2651, "step": 42332 }, { "epoch": 0.785933618740323, "grad_norm": 0.3119640648365021, "learning_rate": 2.177219513398484e-06, "loss": 0.1816, "step": 42334 }, { "epoch": 0.7859707488777415, "grad_norm": 0.4839460849761963, "learning_rate": 2.1764929252775247e-06, "loss": 0.2784, "step": 42336 }, { "epoch": 0.7860078790151602, "grad_norm": 0.46130073070526123, "learning_rate": 2.1757664436110613e-06, "loss": 0.3859, "step": 42338 }, { "epoch": 0.7860450091525789, "grad_norm": 0.32810646295547485, "learning_rate": 2.1750400684089844e-06, "loss": 0.2414, "step": 42340 }, { "epoch": 0.7860821392899975, "grad_norm": 0.5095705986022949, "learning_rate": 2.17431379968117e-06, "loss": 0.2154, "step": 42342 }, { "epoch": 0.7861192694274162, "grad_norm": 0.5689878463745117, "learning_rate": 2.1735876374375054e-06, "loss": 0.0926, "step": 42344 }, { "epoch": 0.7861563995648347, "grad_norm": 0.28199559450149536, "learning_rate": 2.1728615816878694e-06, "loss": 0.1367, "step": 42346 }, { "epoch": 0.7861935297022534, "grad_norm": 0.4046151041984558, "learning_rate": 2.172135632442145e-06, "loss": 0.3003, "step": 42348 }, { "epoch": 0.7862306598396721, "grad_norm": 0.2964744567871094, "learning_rate": 2.171409789710205e-06, "loss": 0.3055, "step": 42350 }, { "epoch": 0.7862677899770907, "grad_norm": 0.33051028847694397, "learning_rate": 2.170684053501928e-06, "loss": 0.3447, "step": 42352 }, { "epoch": 0.7863049201145094, "grad_norm": 0.45862480998039246, "learning_rate": 2.16995842382719e-06, "loss": 0.2065, "step": 42354 }, { "epoch": 0.7863420502519279, "grad_norm": 0.2809135615825653, "learning_rate": 2.1692329006958637e-06, "loss": 0.1613, "step": 42356 }, { "epoch": 0.7863791803893466, "grad_norm": 0.34259822964668274, "learning_rate": 2.1685074841178245e-06, "loss": 0.3361, "step": 42358 }, { "epoch": 0.7864163105267653, "grad_norm": 0.32796937227249146, "learning_rate": 2.167782174102938e-06, "loss": 0.214, "step": 42360 }, { "epoch": 0.7864534406641839, "grad_norm": 0.47342145442962646, "learning_rate": 2.167056970661079e-06, "loss": 0.2404, "step": 42362 }, { "epoch": 0.7864905708016026, "grad_norm": 2.0475125312805176, "learning_rate": 2.1663318738021087e-06, "loss": 0.1652, "step": 42364 }, { "epoch": 0.7865277009390211, "grad_norm": 0.4430631697177887, "learning_rate": 2.1656068835358967e-06, "loss": 0.29, "step": 42366 }, { "epoch": 0.7865648310764398, "grad_norm": 0.2994740605354309, "learning_rate": 2.164881999872309e-06, "loss": 0.2988, "step": 42368 }, { "epoch": 0.7866019612138585, "grad_norm": 0.41451776027679443, "learning_rate": 2.1641572228212095e-06, "loss": 0.3117, "step": 42370 }, { "epoch": 0.7866390913512771, "grad_norm": 0.7492827773094177, "learning_rate": 2.1634325523924603e-06, "loss": 0.1539, "step": 42372 }, { "epoch": 0.7866762214886958, "grad_norm": 0.40187567472457886, "learning_rate": 2.162707988595918e-06, "loss": 0.205, "step": 42374 }, { "epoch": 0.7867133516261143, "grad_norm": 0.3473794162273407, "learning_rate": 2.161983531441445e-06, "loss": 0.1093, "step": 42376 }, { "epoch": 0.786750481763533, "grad_norm": 0.39647749066352844, "learning_rate": 2.1612591809388993e-06, "loss": 0.2748, "step": 42378 }, { "epoch": 0.7867876119009517, "grad_norm": 0.4211435616016388, "learning_rate": 2.160534937098139e-06, "loss": 0.2987, "step": 42380 }, { "epoch": 0.7868247420383703, "grad_norm": 0.34683167934417725, "learning_rate": 2.1598107999290117e-06, "loss": 0.106, "step": 42382 }, { "epoch": 0.786861872175789, "grad_norm": 0.2671588659286499, "learning_rate": 2.15908676944138e-06, "loss": 0.1848, "step": 42384 }, { "epoch": 0.7868990023132075, "grad_norm": 0.4586368501186371, "learning_rate": 2.1583628456450878e-06, "loss": 0.2322, "step": 42386 }, { "epoch": 0.7869361324506262, "grad_norm": 0.28732019662857056, "learning_rate": 2.157639028549989e-06, "loss": 0.2642, "step": 42388 }, { "epoch": 0.7869732625880448, "grad_norm": 0.62124103307724, "learning_rate": 2.156915318165932e-06, "loss": 0.2349, "step": 42390 }, { "epoch": 0.7870103927254635, "grad_norm": 0.6514405608177185, "learning_rate": 2.1561917145027657e-06, "loss": 0.3771, "step": 42392 }, { "epoch": 0.7870475228628822, "grad_norm": 0.3576472997665405, "learning_rate": 2.1554682175703333e-06, "loss": 0.2889, "step": 42394 }, { "epoch": 0.7870846530003007, "grad_norm": 0.2909958064556122, "learning_rate": 2.154744827378483e-06, "loss": 0.1988, "step": 42396 }, { "epoch": 0.7871217831377194, "grad_norm": 0.3952322006225586, "learning_rate": 2.1540215439370594e-06, "loss": 0.2046, "step": 42398 }, { "epoch": 0.787158913275138, "grad_norm": 0.33975329995155334, "learning_rate": 2.153298367255897e-06, "loss": 0.1792, "step": 42400 }, { "epoch": 0.7871960434125567, "grad_norm": 0.3457679748535156, "learning_rate": 2.152575297344841e-06, "loss": 0.2563, "step": 42402 }, { "epoch": 0.7872331735499754, "grad_norm": 0.3037508726119995, "learning_rate": 2.151852334213732e-06, "loss": 0.3027, "step": 42404 }, { "epoch": 0.7872703036873939, "grad_norm": 0.3304939568042755, "learning_rate": 2.151129477872402e-06, "loss": 0.3583, "step": 42406 }, { "epoch": 0.7873074338248126, "grad_norm": 0.27041134238243103, "learning_rate": 2.1504067283306906e-06, "loss": 0.1613, "step": 42408 }, { "epoch": 0.7873445639622312, "grad_norm": 0.5043504238128662, "learning_rate": 2.1496840855984336e-06, "loss": 0.3279, "step": 42410 }, { "epoch": 0.7873816940996499, "grad_norm": 0.39772164821624756, "learning_rate": 2.1489615496854587e-06, "loss": 0.4533, "step": 42412 }, { "epoch": 0.7874188242370685, "grad_norm": 0.4006732106208801, "learning_rate": 2.1482391206016006e-06, "loss": 0.2669, "step": 42414 }, { "epoch": 0.7874559543744871, "grad_norm": 0.3448793590068817, "learning_rate": 2.1475167983566893e-06, "loss": 0.3113, "step": 42416 }, { "epoch": 0.7874930845119058, "grad_norm": 0.4120835065841675, "learning_rate": 2.146794582960554e-06, "loss": 0.3534, "step": 42418 }, { "epoch": 0.7875302146493244, "grad_norm": 0.28927019238471985, "learning_rate": 2.146072474423021e-06, "loss": 0.2854, "step": 42420 }, { "epoch": 0.7875673447867431, "grad_norm": 0.35343897342681885, "learning_rate": 2.1453504727539188e-06, "loss": 0.272, "step": 42422 }, { "epoch": 0.7876044749241617, "grad_norm": 0.32779720425605774, "learning_rate": 2.144628577963067e-06, "loss": 0.1675, "step": 42424 }, { "epoch": 0.7876416050615803, "grad_norm": 0.30982351303100586, "learning_rate": 2.143906790060293e-06, "loss": 0.1517, "step": 42426 }, { "epoch": 0.787678735198999, "grad_norm": 0.19453322887420654, "learning_rate": 2.1431851090554135e-06, "loss": 0.1929, "step": 42428 }, { "epoch": 0.7877158653364176, "grad_norm": 0.4988711476325989, "learning_rate": 2.14246353495825e-06, "loss": 0.1879, "step": 42430 }, { "epoch": 0.7877529954738363, "grad_norm": 0.4961640536785126, "learning_rate": 2.1417420677786227e-06, "loss": 0.4679, "step": 42432 }, { "epoch": 0.7877901256112548, "grad_norm": 0.4295714199542999, "learning_rate": 2.141020707526347e-06, "loss": 0.3894, "step": 42434 }, { "epoch": 0.7878272557486735, "grad_norm": 0.4100876450538635, "learning_rate": 2.140299454211242e-06, "loss": 0.3361, "step": 42436 }, { "epoch": 0.7878643858860922, "grad_norm": 0.40491577982902527, "learning_rate": 2.1395783078431167e-06, "loss": 0.3033, "step": 42438 }, { "epoch": 0.7879015160235108, "grad_norm": 0.35602709650993347, "learning_rate": 2.1388572684317856e-06, "loss": 0.2957, "step": 42440 }, { "epoch": 0.7879386461609295, "grad_norm": 0.4789207875728607, "learning_rate": 2.13813633598706e-06, "loss": 0.3623, "step": 42442 }, { "epoch": 0.787975776298348, "grad_norm": 0.4444025158882141, "learning_rate": 2.1374155105187533e-06, "loss": 0.3309, "step": 42444 }, { "epoch": 0.7880129064357667, "grad_norm": 0.6774869561195374, "learning_rate": 2.1366947920366663e-06, "loss": 0.3555, "step": 42446 }, { "epoch": 0.7880500365731854, "grad_norm": 0.35451751947402954, "learning_rate": 2.135974180550614e-06, "loss": 0.3058, "step": 42448 }, { "epoch": 0.788087166710604, "grad_norm": 0.43377426266670227, "learning_rate": 2.1352536760703946e-06, "loss": 0.2583, "step": 42450 }, { "epoch": 0.7881242968480227, "grad_norm": 0.4566328525543213, "learning_rate": 2.134533278605814e-06, "loss": 0.28, "step": 42452 }, { "epoch": 0.7881614269854412, "grad_norm": 0.38286250829696655, "learning_rate": 2.133812988166677e-06, "loss": 0.1169, "step": 42454 }, { "epoch": 0.7881985571228599, "grad_norm": 0.4602077305316925, "learning_rate": 2.133092804762782e-06, "loss": 0.3982, "step": 42456 }, { "epoch": 0.7882356872602786, "grad_norm": 0.341356098651886, "learning_rate": 2.132372728403931e-06, "loss": 0.2433, "step": 42458 }, { "epoch": 0.7882728173976972, "grad_norm": 0.5872789621353149, "learning_rate": 2.1316527590999225e-06, "loss": 0.4408, "step": 42460 }, { "epoch": 0.7883099475351159, "grad_norm": 0.42184120416641235, "learning_rate": 2.130932896860549e-06, "loss": 0.4544, "step": 42462 }, { "epoch": 0.7883470776725344, "grad_norm": 0.2469044029712677, "learning_rate": 2.1302131416956094e-06, "loss": 0.0916, "step": 42464 }, { "epoch": 0.7883842078099531, "grad_norm": 0.44726428389549255, "learning_rate": 2.129493493614898e-06, "loss": 0.1535, "step": 42466 }, { "epoch": 0.7884213379473718, "grad_norm": 0.3943835198879242, "learning_rate": 2.1287739526282026e-06, "loss": 0.4625, "step": 42468 }, { "epoch": 0.7884584680847904, "grad_norm": 0.30070164799690247, "learning_rate": 2.128054518745316e-06, "loss": 0.307, "step": 42470 }, { "epoch": 0.788495598222209, "grad_norm": 0.29949524998664856, "learning_rate": 2.1273351919760287e-06, "loss": 0.2415, "step": 42472 }, { "epoch": 0.7885327283596276, "grad_norm": 0.17144176363945007, "learning_rate": 2.1266159723301315e-06, "loss": 0.3365, "step": 42474 }, { "epoch": 0.7885698584970463, "grad_norm": 0.4717441201210022, "learning_rate": 2.1258968598174046e-06, "loss": 0.5046, "step": 42476 }, { "epoch": 0.788606988634465, "grad_norm": 0.32951632142066956, "learning_rate": 2.1251778544476344e-06, "loss": 0.3228, "step": 42478 }, { "epoch": 0.7886441187718836, "grad_norm": 0.668451726436615, "learning_rate": 2.124458956230607e-06, "loss": 0.3139, "step": 42480 }, { "epoch": 0.7886812489093022, "grad_norm": 0.41362839937210083, "learning_rate": 2.1237401651761024e-06, "loss": 0.3509, "step": 42482 }, { "epoch": 0.7887183790467208, "grad_norm": 0.4281545877456665, "learning_rate": 2.1230214812939032e-06, "loss": 0.2162, "step": 42484 }, { "epoch": 0.7887555091841395, "grad_norm": 0.2861446738243103, "learning_rate": 2.122302904593789e-06, "loss": 0.315, "step": 42486 }, { "epoch": 0.7887926393215581, "grad_norm": 0.33064553141593933, "learning_rate": 2.1215844350855365e-06, "loss": 0.3075, "step": 42488 }, { "epoch": 0.7888297694589768, "grad_norm": 0.3047144114971161, "learning_rate": 2.120866072778919e-06, "loss": 0.1781, "step": 42490 }, { "epoch": 0.7888668995963954, "grad_norm": 0.36978021264076233, "learning_rate": 2.120147817683712e-06, "loss": 0.3506, "step": 42492 }, { "epoch": 0.788904029733814, "grad_norm": 0.25501900911331177, "learning_rate": 2.119429669809692e-06, "loss": 0.2326, "step": 42494 }, { "epoch": 0.7889411598712327, "grad_norm": 0.39574605226516724, "learning_rate": 2.1187116291666278e-06, "loss": 0.2789, "step": 42496 }, { "epoch": 0.7889782900086513, "grad_norm": 0.500994861125946, "learning_rate": 2.1179936957642943e-06, "loss": 0.2235, "step": 42498 }, { "epoch": 0.78901542014607, "grad_norm": 0.43373164534568787, "learning_rate": 2.117275869612454e-06, "loss": 0.3813, "step": 42500 }, { "epoch": 0.7890525502834886, "grad_norm": 0.39462223649024963, "learning_rate": 2.1165581507208786e-06, "loss": 0.3382, "step": 42502 }, { "epoch": 0.7890896804209072, "grad_norm": 0.5577334761619568, "learning_rate": 2.115840539099332e-06, "loss": 0.1985, "step": 42504 }, { "epoch": 0.7891268105583259, "grad_norm": 0.4708207845687866, "learning_rate": 2.1151230347575814e-06, "loss": 0.0915, "step": 42506 }, { "epoch": 0.7891639406957445, "grad_norm": 0.3929736912250519, "learning_rate": 2.11440563770539e-06, "loss": 0.1518, "step": 42508 }, { "epoch": 0.7892010708331632, "grad_norm": 0.6720669269561768, "learning_rate": 2.113688347952515e-06, "loss": 0.2856, "step": 42510 }, { "epoch": 0.7892382009705818, "grad_norm": 0.4696556329727173, "learning_rate": 2.1129711655087227e-06, "loss": 0.1705, "step": 42512 }, { "epoch": 0.7892753311080004, "grad_norm": 0.4105079174041748, "learning_rate": 2.112254090383765e-06, "loss": 0.2812, "step": 42514 }, { "epoch": 0.7893124612454191, "grad_norm": 0.4384016990661621, "learning_rate": 2.1115371225874027e-06, "loss": 0.4045, "step": 42516 }, { "epoch": 0.7893495913828377, "grad_norm": 0.3876022398471832, "learning_rate": 2.1108202621293917e-06, "loss": 0.2695, "step": 42518 }, { "epoch": 0.7893867215202564, "grad_norm": 0.4062402546405792, "learning_rate": 2.1101035090194867e-06, "loss": 0.1986, "step": 42520 }, { "epoch": 0.789423851657675, "grad_norm": 0.33494803309440613, "learning_rate": 2.1093868632674396e-06, "loss": 0.3949, "step": 42522 }, { "epoch": 0.7894609817950936, "grad_norm": 0.3725855052471161, "learning_rate": 2.108670324883004e-06, "loss": 0.2272, "step": 42524 }, { "epoch": 0.7894981119325123, "grad_norm": 0.34512820839881897, "learning_rate": 2.107953893875927e-06, "loss": 0.2717, "step": 42526 }, { "epoch": 0.7895352420699309, "grad_norm": 0.4230319559574127, "learning_rate": 2.1072375702559565e-06, "loss": 0.3753, "step": 42528 }, { "epoch": 0.7895723722073495, "grad_norm": 0.3849067687988281, "learning_rate": 2.1065213540328444e-06, "loss": 0.1527, "step": 42530 }, { "epoch": 0.7896095023447682, "grad_norm": 0.7458896040916443, "learning_rate": 2.10580524521633e-06, "loss": 0.2073, "step": 42532 }, { "epoch": 0.7896466324821868, "grad_norm": 0.5071929693222046, "learning_rate": 2.1050892438161607e-06, "loss": 0.3121, "step": 42534 }, { "epoch": 0.7896837626196055, "grad_norm": 0.38275641202926636, "learning_rate": 2.1043733498420815e-06, "loss": 0.1871, "step": 42536 }, { "epoch": 0.7897208927570241, "grad_norm": 0.39187997579574585, "learning_rate": 2.103657563303828e-06, "loss": 0.3532, "step": 42538 }, { "epoch": 0.7897580228944427, "grad_norm": 0.3579058051109314, "learning_rate": 2.102941884211144e-06, "loss": 0.3955, "step": 42540 }, { "epoch": 0.7897951530318613, "grad_norm": 0.545877993106842, "learning_rate": 2.102226312573765e-06, "loss": 0.2927, "step": 42542 }, { "epoch": 0.78983228316928, "grad_norm": 0.4625374674797058, "learning_rate": 2.101510848401429e-06, "loss": 0.3327, "step": 42544 }, { "epoch": 0.7898694133066987, "grad_norm": 0.4088573753833771, "learning_rate": 2.100795491703873e-06, "loss": 0.2822, "step": 42546 }, { "epoch": 0.7899065434441173, "grad_norm": 0.6364077925682068, "learning_rate": 2.1000802424908316e-06, "loss": 0.1923, "step": 42548 }, { "epoch": 0.7899436735815359, "grad_norm": 0.8586711883544922, "learning_rate": 2.0993651007720318e-06, "loss": 0.3471, "step": 42550 }, { "epoch": 0.7899808037189545, "grad_norm": 0.4499157667160034, "learning_rate": 2.098650066557212e-06, "loss": 0.1479, "step": 42552 }, { "epoch": 0.7900179338563732, "grad_norm": 0.25522860884666443, "learning_rate": 2.097935139856093e-06, "loss": 0.3336, "step": 42554 }, { "epoch": 0.7900550639937919, "grad_norm": 0.38141849637031555, "learning_rate": 2.097220320678409e-06, "loss": 0.198, "step": 42556 }, { "epoch": 0.7900921941312105, "grad_norm": 0.2683364748954773, "learning_rate": 2.0965056090338844e-06, "loss": 0.124, "step": 42558 }, { "epoch": 0.7901293242686291, "grad_norm": 0.3264162540435791, "learning_rate": 2.095791004932244e-06, "loss": 0.2497, "step": 42560 }, { "epoch": 0.7901664544060477, "grad_norm": 0.23377372324466705, "learning_rate": 2.0950765083832146e-06, "loss": 0.3535, "step": 42562 }, { "epoch": 0.7902035845434664, "grad_norm": 0.47008216381073, "learning_rate": 2.0943621193965145e-06, "loss": 0.3933, "step": 42564 }, { "epoch": 0.7902407146808851, "grad_norm": 0.31055015325546265, "learning_rate": 2.0936478379818657e-06, "loss": 0.4835, "step": 42566 }, { "epoch": 0.7902778448183037, "grad_norm": 0.38977161049842834, "learning_rate": 2.092933664148987e-06, "loss": 0.2732, "step": 42568 }, { "epoch": 0.7903149749557223, "grad_norm": 0.342987060546875, "learning_rate": 2.0922195979075965e-06, "loss": 0.1817, "step": 42570 }, { "epoch": 0.7903521050931409, "grad_norm": 0.36170870065689087, "learning_rate": 2.091505639267414e-06, "loss": 0.2003, "step": 42572 }, { "epoch": 0.7903892352305596, "grad_norm": 0.33908453583717346, "learning_rate": 2.090791788238151e-06, "loss": 0.2686, "step": 42574 }, { "epoch": 0.7904263653679783, "grad_norm": 0.23294876515865326, "learning_rate": 2.0900780448295177e-06, "loss": 0.2642, "step": 42576 }, { "epoch": 0.7904634955053969, "grad_norm": 0.4259909391403198, "learning_rate": 2.08936440905123e-06, "loss": 0.3708, "step": 42578 }, { "epoch": 0.7905006256428155, "grad_norm": 0.3567473292350769, "learning_rate": 2.088650880912997e-06, "loss": 0.3266, "step": 42580 }, { "epoch": 0.7905377557802341, "grad_norm": 0.3649671971797943, "learning_rate": 2.0879374604245286e-06, "loss": 0.2853, "step": 42582 }, { "epoch": 0.7905748859176528, "grad_norm": 0.38092663884162903, "learning_rate": 2.0872241475955325e-06, "loss": 0.3168, "step": 42584 }, { "epoch": 0.7906120160550714, "grad_norm": 0.35730108618736267, "learning_rate": 2.0865109424357154e-06, "loss": 0.1507, "step": 42586 }, { "epoch": 0.79064914619249, "grad_norm": 0.4844895899295807, "learning_rate": 2.08579784495478e-06, "loss": 0.1877, "step": 42588 }, { "epoch": 0.7906862763299087, "grad_norm": 0.3098269999027252, "learning_rate": 2.085084855162429e-06, "loss": 0.3054, "step": 42590 }, { "epoch": 0.7907234064673273, "grad_norm": 0.4970020353794098, "learning_rate": 2.0843719730683655e-06, "loss": 0.3478, "step": 42592 }, { "epoch": 0.790760536604746, "grad_norm": 0.2863791286945343, "learning_rate": 2.083659198682292e-06, "loss": 0.2598, "step": 42594 }, { "epoch": 0.7907976667421646, "grad_norm": 0.3540296256542206, "learning_rate": 2.082946532013902e-06, "loss": 0.3192, "step": 42596 }, { "epoch": 0.7908347968795832, "grad_norm": 0.49094539880752563, "learning_rate": 2.082233973072895e-06, "loss": 0.3213, "step": 42598 }, { "epoch": 0.7908719270170019, "grad_norm": 0.24183067679405212, "learning_rate": 2.0815215218689698e-06, "loss": 0.1513, "step": 42600 }, { "epoch": 0.7909090571544205, "grad_norm": 0.4134245812892914, "learning_rate": 2.0808091784118157e-06, "loss": 0.2975, "step": 42602 }, { "epoch": 0.7909461872918392, "grad_norm": 0.5017105340957642, "learning_rate": 2.0800969427111285e-06, "loss": 0.3647, "step": 42604 }, { "epoch": 0.7909833174292578, "grad_norm": 0.4237438142299652, "learning_rate": 2.079384814776598e-06, "loss": 0.4172, "step": 42606 }, { "epoch": 0.7910204475666764, "grad_norm": 0.5569114089012146, "learning_rate": 2.0786727946179163e-06, "loss": 0.3092, "step": 42608 }, { "epoch": 0.7910575777040951, "grad_norm": 0.52723628282547, "learning_rate": 2.0779608822447693e-06, "loss": 0.1331, "step": 42610 }, { "epoch": 0.7910947078415137, "grad_norm": 0.41421768069267273, "learning_rate": 2.07724907766685e-06, "loss": 0.2478, "step": 42612 }, { "epoch": 0.7911318379789324, "grad_norm": 0.508815348148346, "learning_rate": 2.076537380893835e-06, "loss": 0.2951, "step": 42614 }, { "epoch": 0.791168968116351, "grad_norm": 0.3274082839488983, "learning_rate": 2.0758257919354163e-06, "loss": 0.183, "step": 42616 }, { "epoch": 0.7912060982537696, "grad_norm": 0.33555614948272705, "learning_rate": 2.0751143108012693e-06, "loss": 0.336, "step": 42618 }, { "epoch": 0.7912432283911883, "grad_norm": 0.6409013271331787, "learning_rate": 2.07440293750108e-06, "loss": 0.1538, "step": 42620 }, { "epoch": 0.7912803585286069, "grad_norm": 0.4628830850124359, "learning_rate": 2.0736916720445264e-06, "loss": 0.2658, "step": 42622 }, { "epoch": 0.7913174886660256, "grad_norm": 0.3782122731208801, "learning_rate": 2.0729805144412895e-06, "loss": 0.2932, "step": 42624 }, { "epoch": 0.7913546188034442, "grad_norm": 0.286088764667511, "learning_rate": 2.072269464701041e-06, "loss": 0.2568, "step": 42626 }, { "epoch": 0.7913917489408628, "grad_norm": 0.5430276989936829, "learning_rate": 2.071558522833459e-06, "loss": 0.2003, "step": 42628 }, { "epoch": 0.7914288790782815, "grad_norm": 0.501216471195221, "learning_rate": 2.0708476888482174e-06, "loss": 0.3363, "step": 42630 }, { "epoch": 0.7914660092157001, "grad_norm": 0.38458243012428284, "learning_rate": 2.070136962754987e-06, "loss": 0.2582, "step": 42632 }, { "epoch": 0.7915031393531188, "grad_norm": 0.3722245991230011, "learning_rate": 2.0694263445634445e-06, "loss": 0.1312, "step": 42634 }, { "epoch": 0.7915402694905374, "grad_norm": 0.3490140438079834, "learning_rate": 2.06871583428325e-06, "loss": 0.3663, "step": 42636 }, { "epoch": 0.791577399627956, "grad_norm": 0.49844953417778015, "learning_rate": 2.0680054319240793e-06, "loss": 0.2152, "step": 42638 }, { "epoch": 0.7916145297653746, "grad_norm": 0.25851577520370483, "learning_rate": 2.0672951374955928e-06, "loss": 0.2768, "step": 42640 }, { "epoch": 0.7916516599027933, "grad_norm": 0.481437087059021, "learning_rate": 2.0665849510074585e-06, "loss": 0.3078, "step": 42642 }, { "epoch": 0.791688790040212, "grad_norm": 0.4198446571826935, "learning_rate": 2.06587487246934e-06, "loss": 0.2242, "step": 42644 }, { "epoch": 0.7917259201776305, "grad_norm": 0.3985229432582855, "learning_rate": 2.0651649018908993e-06, "loss": 0.2596, "step": 42646 }, { "epoch": 0.7917630503150492, "grad_norm": 0.2468804270029068, "learning_rate": 2.0644550392817964e-06, "loss": 0.417, "step": 42648 }, { "epoch": 0.7918001804524678, "grad_norm": 0.28037264943122864, "learning_rate": 2.0637452846516925e-06, "loss": 0.2331, "step": 42650 }, { "epoch": 0.7918373105898865, "grad_norm": 0.37517064809799194, "learning_rate": 2.063035638010242e-06, "loss": 0.173, "step": 42652 }, { "epoch": 0.7918744407273052, "grad_norm": 0.32266315817832947, "learning_rate": 2.062326099367101e-06, "loss": 0.3654, "step": 42654 }, { "epoch": 0.7919115708647237, "grad_norm": 0.20128904283046722, "learning_rate": 2.0616166687319273e-06, "loss": 0.4743, "step": 42656 }, { "epoch": 0.7919487010021424, "grad_norm": 0.3021332025527954, "learning_rate": 2.0609073461143747e-06, "loss": 0.2246, "step": 42658 }, { "epoch": 0.791985831139561, "grad_norm": 0.5699960589408875, "learning_rate": 2.0601981315240892e-06, "loss": 0.3673, "step": 42660 }, { "epoch": 0.7920229612769797, "grad_norm": 0.5187786817550659, "learning_rate": 2.059489024970729e-06, "loss": 0.2218, "step": 42662 }, { "epoch": 0.7920600914143984, "grad_norm": 0.34131884574890137, "learning_rate": 2.0587800264639346e-06, "loss": 0.3281, "step": 42664 }, { "epoch": 0.7920972215518169, "grad_norm": 0.3134617507457733, "learning_rate": 2.0580711360133574e-06, "loss": 0.2675, "step": 42666 }, { "epoch": 0.7921343516892356, "grad_norm": 0.27192190289497375, "learning_rate": 2.0573623536286434e-06, "loss": 0.3998, "step": 42668 }, { "epoch": 0.7921714818266542, "grad_norm": 0.32284533977508545, "learning_rate": 2.056653679319438e-06, "loss": 0.3717, "step": 42670 }, { "epoch": 0.7922086119640729, "grad_norm": 0.504384458065033, "learning_rate": 2.055945113095381e-06, "loss": 0.3891, "step": 42672 }, { "epoch": 0.7922457421014916, "grad_norm": 0.41510099172592163, "learning_rate": 2.0552366549661194e-06, "loss": 0.173, "step": 42674 }, { "epoch": 0.7922828722389101, "grad_norm": 0.343056321144104, "learning_rate": 2.054528304941288e-06, "loss": 0.1487, "step": 42676 }, { "epoch": 0.7923200023763288, "grad_norm": 0.3308608829975128, "learning_rate": 2.053820063030525e-06, "loss": 0.2845, "step": 42678 }, { "epoch": 0.7923571325137474, "grad_norm": 0.3284551203250885, "learning_rate": 2.053111929243473e-06, "loss": 0.2697, "step": 42680 }, { "epoch": 0.7923942626511661, "grad_norm": 0.250306099653244, "learning_rate": 2.0524039035897614e-06, "loss": 0.1573, "step": 42682 }, { "epoch": 0.7924313927885848, "grad_norm": 0.3870696723461151, "learning_rate": 2.0516959860790263e-06, "loss": 0.2162, "step": 42684 }, { "epoch": 0.7924685229260033, "grad_norm": 0.4715813994407654, "learning_rate": 2.0509881767209015e-06, "loss": 0.1898, "step": 42686 }, { "epoch": 0.792505653063422, "grad_norm": 0.4110667407512665, "learning_rate": 2.0502804755250203e-06, "loss": 0.2649, "step": 42688 }, { "epoch": 0.7925427832008406, "grad_norm": 0.5015087127685547, "learning_rate": 2.0495728825010064e-06, "loss": 0.1971, "step": 42690 }, { "epoch": 0.7925799133382593, "grad_norm": 0.6716548204421997, "learning_rate": 2.0488653976584916e-06, "loss": 0.4662, "step": 42692 }, { "epoch": 0.7926170434756779, "grad_norm": 0.33569470047950745, "learning_rate": 2.048158021007103e-06, "loss": 0.3139, "step": 42694 }, { "epoch": 0.7926541736130965, "grad_norm": 0.272247314453125, "learning_rate": 2.0474507525564633e-06, "loss": 0.2761, "step": 42696 }, { "epoch": 0.7926913037505152, "grad_norm": 0.35322609543800354, "learning_rate": 2.046743592316203e-06, "loss": 0.2827, "step": 42698 }, { "epoch": 0.7927284338879338, "grad_norm": 0.2537001073360443, "learning_rate": 2.046036540295936e-06, "loss": 0.4237, "step": 42700 }, { "epoch": 0.7927655640253525, "grad_norm": 0.40118294954299927, "learning_rate": 2.045329596505289e-06, "loss": 0.2705, "step": 42702 }, { "epoch": 0.792802694162771, "grad_norm": 0.41252779960632324, "learning_rate": 2.044622760953877e-06, "loss": 0.2519, "step": 42704 }, { "epoch": 0.7928398243001897, "grad_norm": 0.26591622829437256, "learning_rate": 2.043916033651321e-06, "loss": 0.2575, "step": 42706 }, { "epoch": 0.7928769544376084, "grad_norm": 0.38855138421058655, "learning_rate": 2.0432094146072356e-06, "loss": 0.4377, "step": 42708 }, { "epoch": 0.792914084575027, "grad_norm": 0.43108057975769043, "learning_rate": 2.042502903831236e-06, "loss": 0.3193, "step": 42710 }, { "epoch": 0.7929512147124457, "grad_norm": 0.6577187180519104, "learning_rate": 2.041796501332941e-06, "loss": 0.4136, "step": 42712 }, { "epoch": 0.7929883448498642, "grad_norm": 0.481794148683548, "learning_rate": 2.041090207121954e-06, "loss": 0.3517, "step": 42714 }, { "epoch": 0.7930254749872829, "grad_norm": 0.5122260451316833, "learning_rate": 2.0403840212078898e-06, "loss": 0.0995, "step": 42716 }, { "epoch": 0.7930626051247016, "grad_norm": 0.4683792293071747, "learning_rate": 2.0396779436003577e-06, "loss": 0.3361, "step": 42718 }, { "epoch": 0.7930997352621202, "grad_norm": 0.42059043049812317, "learning_rate": 2.0389719743089663e-06, "loss": 0.1698, "step": 42720 }, { "epoch": 0.7931368653995389, "grad_norm": 0.33432382345199585, "learning_rate": 2.0382661133433192e-06, "loss": 0.326, "step": 42722 }, { "epoch": 0.7931739955369574, "grad_norm": 0.39011383056640625, "learning_rate": 2.0375603607130213e-06, "loss": 0.2388, "step": 42724 }, { "epoch": 0.7932111256743761, "grad_norm": 0.38684576749801636, "learning_rate": 2.0368547164276796e-06, "loss": 0.3854, "step": 42726 }, { "epoch": 0.7932482558117948, "grad_norm": 0.35732534527778625, "learning_rate": 2.0361491804968902e-06, "loss": 0.0299, "step": 42728 }, { "epoch": 0.7932853859492134, "grad_norm": 0.3931863605976105, "learning_rate": 2.035443752930255e-06, "loss": 0.2579, "step": 42730 }, { "epoch": 0.7933225160866321, "grad_norm": 0.7063838243484497, "learning_rate": 2.0347384337373745e-06, "loss": 0.3594, "step": 42732 }, { "epoch": 0.7933596462240506, "grad_norm": 0.3051188290119171, "learning_rate": 2.0340332229278458e-06, "loss": 0.2238, "step": 42734 }, { "epoch": 0.7933967763614693, "grad_norm": 0.6285567879676819, "learning_rate": 2.033328120511263e-06, "loss": 0.2459, "step": 42736 }, { "epoch": 0.7934339064988879, "grad_norm": 0.43248236179351807, "learning_rate": 2.0326231264972253e-06, "loss": 0.3369, "step": 42738 }, { "epoch": 0.7934710366363066, "grad_norm": 0.6834849715232849, "learning_rate": 2.0319182408953186e-06, "loss": 0.4614, "step": 42740 }, { "epoch": 0.7935081667737253, "grad_norm": 0.3905276358127594, "learning_rate": 2.0312134637151393e-06, "loss": 0.2969, "step": 42742 }, { "epoch": 0.7935452969111438, "grad_norm": 0.2289569079875946, "learning_rate": 2.030508794966274e-06, "loss": 0.1603, "step": 42744 }, { "epoch": 0.7935824270485625, "grad_norm": 0.31870412826538086, "learning_rate": 2.0298042346583122e-06, "loss": 0.3701, "step": 42746 }, { "epoch": 0.7936195571859811, "grad_norm": 0.3010765314102173, "learning_rate": 2.029099782800841e-06, "loss": 0.2594, "step": 42748 }, { "epoch": 0.7936566873233998, "grad_norm": 0.46039465069770813, "learning_rate": 2.028395439403449e-06, "loss": 0.3354, "step": 42750 }, { "epoch": 0.7936938174608185, "grad_norm": 0.28095942735671997, "learning_rate": 2.027691204475715e-06, "loss": 0.2535, "step": 42752 }, { "epoch": 0.793730947598237, "grad_norm": 0.3010506331920624, "learning_rate": 2.026987078027224e-06, "loss": 0.4395, "step": 42754 }, { "epoch": 0.7937680777356557, "grad_norm": 0.4402254521846771, "learning_rate": 2.0262830600675563e-06, "loss": 0.2088, "step": 42756 }, { "epoch": 0.7938052078730743, "grad_norm": 0.42874911427497864, "learning_rate": 2.0255791506062915e-06, "loss": 0.1753, "step": 42758 }, { "epoch": 0.793842338010493, "grad_norm": 0.2947536110877991, "learning_rate": 2.024875349653009e-06, "loss": 0.1437, "step": 42760 }, { "epoch": 0.7938794681479117, "grad_norm": 0.2735714316368103, "learning_rate": 2.0241716572172885e-06, "loss": 0.2229, "step": 42762 }, { "epoch": 0.7939165982853302, "grad_norm": 0.3092462122440338, "learning_rate": 2.0234680733086977e-06, "loss": 0.2173, "step": 42764 }, { "epoch": 0.7939537284227489, "grad_norm": 0.364353746175766, "learning_rate": 2.022764597936816e-06, "loss": 0.3058, "step": 42766 }, { "epoch": 0.7939908585601675, "grad_norm": 0.4056829810142517, "learning_rate": 2.022061231111212e-06, "loss": 0.3435, "step": 42768 }, { "epoch": 0.7940279886975862, "grad_norm": 0.6425531506538391, "learning_rate": 2.021357972841458e-06, "loss": 0.2642, "step": 42770 }, { "epoch": 0.7940651188350049, "grad_norm": 0.2900737226009369, "learning_rate": 2.0206548231371225e-06, "loss": 0.2802, "step": 42772 }, { "epoch": 0.7941022489724234, "grad_norm": 0.3155817687511444, "learning_rate": 2.019951782007774e-06, "loss": 0.1816, "step": 42774 }, { "epoch": 0.7941393791098421, "grad_norm": 0.4117618203163147, "learning_rate": 2.0192488494629816e-06, "loss": 0.1825, "step": 42776 }, { "epoch": 0.7941765092472607, "grad_norm": 0.6537511348724365, "learning_rate": 2.018546025512305e-06, "loss": 0.2367, "step": 42778 }, { "epoch": 0.7942136393846794, "grad_norm": 0.24910946190357208, "learning_rate": 2.0178433101653084e-06, "loss": 0.1547, "step": 42780 }, { "epoch": 0.794250769522098, "grad_norm": 0.48479151725769043, "learning_rate": 2.017140703431556e-06, "loss": 0.1673, "step": 42782 }, { "epoch": 0.7942878996595166, "grad_norm": 0.40298548340797424, "learning_rate": 2.01643820532061e-06, "loss": 0.3878, "step": 42784 }, { "epoch": 0.7943250297969353, "grad_norm": 0.26045864820480347, "learning_rate": 2.0157358158420228e-06, "loss": 0.1864, "step": 42786 }, { "epoch": 0.7943621599343539, "grad_norm": 0.3012307286262512, "learning_rate": 2.0150335350053596e-06, "loss": 0.2213, "step": 42788 }, { "epoch": 0.7943992900717726, "grad_norm": 0.43078145384788513, "learning_rate": 2.014331362820169e-06, "loss": 0.1012, "step": 42790 }, { "epoch": 0.7944364202091911, "grad_norm": 0.3820379972457886, "learning_rate": 2.013629299296008e-06, "loss": 0.1971, "step": 42792 }, { "epoch": 0.7944735503466098, "grad_norm": 0.32335078716278076, "learning_rate": 2.0129273444424315e-06, "loss": 0.3745, "step": 42794 }, { "epoch": 0.7945106804840285, "grad_norm": 0.3137783110141754, "learning_rate": 2.0122254982689892e-06, "loss": 0.3607, "step": 42796 }, { "epoch": 0.7945478106214471, "grad_norm": 0.3571280241012573, "learning_rate": 2.0115237607852312e-06, "loss": 0.3246, "step": 42798 }, { "epoch": 0.7945849407588658, "grad_norm": 0.4563015103340149, "learning_rate": 2.0108221320007104e-06, "loss": 0.2029, "step": 42800 }, { "epoch": 0.7946220708962843, "grad_norm": 0.5401472449302673, "learning_rate": 2.0101206119249663e-06, "loss": 0.2232, "step": 42802 }, { "epoch": 0.794659201033703, "grad_norm": 0.45033398270606995, "learning_rate": 2.0094192005675485e-06, "loss": 0.3162, "step": 42804 }, { "epoch": 0.7946963311711217, "grad_norm": 0.22998209297657013, "learning_rate": 2.008717897938004e-06, "loss": 0.2192, "step": 42806 }, { "epoch": 0.7947334613085403, "grad_norm": 0.5538310408592224, "learning_rate": 2.0080167040458686e-06, "loss": 0.1284, "step": 42808 }, { "epoch": 0.794770591445959, "grad_norm": 0.5740215182304382, "learning_rate": 2.007315618900688e-06, "loss": 0.2657, "step": 42810 }, { "epoch": 0.7948077215833775, "grad_norm": 0.37102216482162476, "learning_rate": 2.0066146425120004e-06, "loss": 0.2641, "step": 42812 }, { "epoch": 0.7948448517207962, "grad_norm": 0.4133845865726471, "learning_rate": 2.0059137748893477e-06, "loss": 0.37, "step": 42814 }, { "epoch": 0.7948819818582149, "grad_norm": 0.7754574418067932, "learning_rate": 2.0052130160422602e-06, "loss": 0.3412, "step": 42816 }, { "epoch": 0.7949191119956335, "grad_norm": 0.33565637469291687, "learning_rate": 2.0045123659802766e-06, "loss": 0.2758, "step": 42818 }, { "epoch": 0.7949562421330522, "grad_norm": 0.27820920944213867, "learning_rate": 2.0038118247129314e-06, "loss": 0.3167, "step": 42820 }, { "epoch": 0.7949933722704707, "grad_norm": 0.4165036082267761, "learning_rate": 2.0031113922497546e-06, "loss": 0.3382, "step": 42822 }, { "epoch": 0.7950305024078894, "grad_norm": 0.4344783127307892, "learning_rate": 2.0024110686002783e-06, "loss": 0.2094, "step": 42824 }, { "epoch": 0.7950676325453081, "grad_norm": 0.4044579863548279, "learning_rate": 2.0017108537740358e-06, "loss": 0.2182, "step": 42826 }, { "epoch": 0.7951047626827267, "grad_norm": 0.4348785877227783, "learning_rate": 2.0010107477805494e-06, "loss": 0.2287, "step": 42828 }, { "epoch": 0.7951418928201454, "grad_norm": 0.4260804355144501, "learning_rate": 2.000310750629345e-06, "loss": 0.3942, "step": 42830 }, { "epoch": 0.7951790229575639, "grad_norm": 0.38059136271476746, "learning_rate": 1.9996108623299504e-06, "loss": 0.253, "step": 42832 }, { "epoch": 0.7952161530949826, "grad_norm": 0.41512730717658997, "learning_rate": 1.9989110828918865e-06, "loss": 0.4418, "step": 42834 }, { "epoch": 0.7952532832324013, "grad_norm": 0.6739332675933838, "learning_rate": 1.9982114123246776e-06, "loss": 0.3184, "step": 42836 }, { "epoch": 0.7952904133698199, "grad_norm": 0.6416725516319275, "learning_rate": 1.9975118506378454e-06, "loss": 0.3195, "step": 42838 }, { "epoch": 0.7953275435072386, "grad_norm": 0.37072065472602844, "learning_rate": 1.996812397840904e-06, "loss": 0.2285, "step": 42840 }, { "epoch": 0.7953646736446571, "grad_norm": 0.3906102478504181, "learning_rate": 1.9961130539433737e-06, "loss": 0.315, "step": 42842 }, { "epoch": 0.7954018037820758, "grad_norm": 0.6522887349128723, "learning_rate": 1.9954138189547712e-06, "loss": 0.4844, "step": 42844 }, { "epoch": 0.7954389339194944, "grad_norm": 0.5065922141075134, "learning_rate": 1.9947146928846083e-06, "loss": 0.4491, "step": 42846 }, { "epoch": 0.7954760640569131, "grad_norm": 0.6735919713973999, "learning_rate": 1.9940156757424046e-06, "loss": 0.1456, "step": 42848 }, { "epoch": 0.7955131941943318, "grad_norm": 0.3414495587348938, "learning_rate": 1.993316767537663e-06, "loss": 0.478, "step": 42850 }, { "epoch": 0.7955503243317503, "grad_norm": 0.4428774118423462, "learning_rate": 1.9926179682798997e-06, "loss": 0.3729, "step": 42852 }, { "epoch": 0.795587454469169, "grad_norm": 0.5150340795516968, "learning_rate": 1.991919277978619e-06, "loss": 0.1835, "step": 42854 }, { "epoch": 0.7956245846065876, "grad_norm": 0.5379697680473328, "learning_rate": 1.991220696643329e-06, "loss": 0.272, "step": 42856 }, { "epoch": 0.7956617147440063, "grad_norm": 0.37882760167121887, "learning_rate": 1.9905222242835375e-06, "loss": 0.3643, "step": 42858 }, { "epoch": 0.795698844881425, "grad_norm": 0.3648139536380768, "learning_rate": 1.989823860908747e-06, "loss": 0.3321, "step": 42860 }, { "epoch": 0.7957359750188435, "grad_norm": 0.4506866931915283, "learning_rate": 1.98912560652846e-06, "loss": 0.3394, "step": 42862 }, { "epoch": 0.7957731051562622, "grad_norm": 0.24526220560073853, "learning_rate": 1.9884274611521816e-06, "loss": 0.2202, "step": 42864 }, { "epoch": 0.7958102352936808, "grad_norm": 0.3648502230644226, "learning_rate": 1.9877294247894043e-06, "loss": 0.4003, "step": 42866 }, { "epoch": 0.7958473654310995, "grad_norm": 0.3403089642524719, "learning_rate": 1.987031497449631e-06, "loss": 0.2933, "step": 42868 }, { "epoch": 0.7958844955685181, "grad_norm": 0.37401846051216125, "learning_rate": 1.9863336791423594e-06, "loss": 0.3157, "step": 42870 }, { "epoch": 0.7959216257059367, "grad_norm": 0.43707484006881714, "learning_rate": 1.9856359698770798e-06, "loss": 0.203, "step": 42872 }, { "epoch": 0.7959587558433554, "grad_norm": 0.767066478729248, "learning_rate": 1.9849383696632895e-06, "loss": 0.4028, "step": 42874 }, { "epoch": 0.795995885980774, "grad_norm": 0.28582361340522766, "learning_rate": 1.984240878510483e-06, "loss": 0.2731, "step": 42876 }, { "epoch": 0.7960330161181927, "grad_norm": 0.47415080666542053, "learning_rate": 1.983543496428145e-06, "loss": 0.393, "step": 42878 }, { "epoch": 0.7960701462556113, "grad_norm": 0.3783133625984192, "learning_rate": 1.9828462234257683e-06, "loss": 0.3412, "step": 42880 }, { "epoch": 0.7961072763930299, "grad_norm": 0.7819389700889587, "learning_rate": 1.98214905951284e-06, "loss": 0.1384, "step": 42882 }, { "epoch": 0.7961444065304486, "grad_norm": 0.45372262597084045, "learning_rate": 1.981452004698847e-06, "loss": 0.1567, "step": 42884 }, { "epoch": 0.7961815366678672, "grad_norm": 0.37073835730552673, "learning_rate": 1.980755058993273e-06, "loss": 0.2749, "step": 42886 }, { "epoch": 0.7962186668052859, "grad_norm": 0.2847364842891693, "learning_rate": 1.9800582224056066e-06, "loss": 0.3282, "step": 42888 }, { "epoch": 0.7962557969427044, "grad_norm": 0.3301716148853302, "learning_rate": 1.979361494945321e-06, "loss": 0.2652, "step": 42890 }, { "epoch": 0.7962929270801231, "grad_norm": 0.33778923749923706, "learning_rate": 1.978664876621904e-06, "loss": 0.272, "step": 42892 }, { "epoch": 0.7963300572175418, "grad_norm": 0.3764488399028778, "learning_rate": 1.977968367444829e-06, "loss": 0.3562, "step": 42894 }, { "epoch": 0.7963671873549604, "grad_norm": 0.33144626021385193, "learning_rate": 1.9772719674235764e-06, "loss": 0.3188, "step": 42896 }, { "epoch": 0.796404317492379, "grad_norm": 0.3059694766998291, "learning_rate": 1.97657567656762e-06, "loss": 0.3798, "step": 42898 }, { "epoch": 0.7964414476297976, "grad_norm": 0.6471332311630249, "learning_rate": 1.975879494886437e-06, "loss": 0.2643, "step": 42900 }, { "epoch": 0.7964785777672163, "grad_norm": 0.40864214301109314, "learning_rate": 1.9751834223895004e-06, "loss": 0.2001, "step": 42902 }, { "epoch": 0.796515707904635, "grad_norm": 0.40074893832206726, "learning_rate": 1.974487459086278e-06, "loss": 0.4009, "step": 42904 }, { "epoch": 0.7965528380420536, "grad_norm": 0.3613109886646271, "learning_rate": 1.9737916049862425e-06, "loss": 0.3632, "step": 42906 }, { "epoch": 0.7965899681794723, "grad_norm": 0.4784510135650635, "learning_rate": 1.9730958600988627e-06, "loss": 0.2825, "step": 42908 }, { "epoch": 0.7966270983168908, "grad_norm": 0.3904340863227844, "learning_rate": 1.972400224433606e-06, "loss": 0.2128, "step": 42910 }, { "epoch": 0.7966642284543095, "grad_norm": 0.48943015933036804, "learning_rate": 1.9717046979999356e-06, "loss": 0.1876, "step": 42912 }, { "epoch": 0.7967013585917282, "grad_norm": 0.36092355847358704, "learning_rate": 1.971009280807319e-06, "loss": 0.2211, "step": 42914 }, { "epoch": 0.7967384887291468, "grad_norm": 0.30325502157211304, "learning_rate": 1.970313972865213e-06, "loss": 0.3446, "step": 42916 }, { "epoch": 0.7967756188665654, "grad_norm": 0.4336217939853668, "learning_rate": 1.969618774183083e-06, "loss": 0.2004, "step": 42918 }, { "epoch": 0.796812749003984, "grad_norm": 0.35627490282058716, "learning_rate": 1.9689236847703873e-06, "loss": 0.2784, "step": 42920 }, { "epoch": 0.7968498791414027, "grad_norm": 0.4171198904514313, "learning_rate": 1.9682287046365846e-06, "loss": 0.1664, "step": 42922 }, { "epoch": 0.7968870092788214, "grad_norm": 0.39028748869895935, "learning_rate": 1.967533833791131e-06, "loss": 0.2367, "step": 42924 }, { "epoch": 0.79692413941624, "grad_norm": 0.42063596844673157, "learning_rate": 1.9668390722434816e-06, "loss": 0.2655, "step": 42926 }, { "epoch": 0.7969612695536586, "grad_norm": 0.4091411530971527, "learning_rate": 1.9661444200030933e-06, "loss": 0.4197, "step": 42928 }, { "epoch": 0.7969983996910772, "grad_norm": 0.3154064416885376, "learning_rate": 1.965449877079413e-06, "loss": 0.2299, "step": 42930 }, { "epoch": 0.7970355298284959, "grad_norm": 0.46301957964897156, "learning_rate": 1.9647554434818928e-06, "loss": 0.3565, "step": 42932 }, { "epoch": 0.7970726599659146, "grad_norm": 0.36202600598335266, "learning_rate": 1.9640611192199864e-06, "loss": 0.0981, "step": 42934 }, { "epoch": 0.7971097901033332, "grad_norm": 0.344178169965744, "learning_rate": 1.9633669043031344e-06, "loss": 0.2722, "step": 42936 }, { "epoch": 0.7971469202407518, "grad_norm": 0.28540897369384766, "learning_rate": 1.9626727987407867e-06, "loss": 0.3595, "step": 42938 }, { "epoch": 0.7971840503781704, "grad_norm": 0.3103768229484558, "learning_rate": 1.9619788025423903e-06, "loss": 0.3873, "step": 42940 }, { "epoch": 0.7972211805155891, "grad_norm": 0.38639652729034424, "learning_rate": 1.961284915717382e-06, "loss": 0.2072, "step": 42942 }, { "epoch": 0.7972583106530077, "grad_norm": 0.5752630829811096, "learning_rate": 1.9605911382752084e-06, "loss": 0.1325, "step": 42944 }, { "epoch": 0.7972954407904264, "grad_norm": 0.4168678820133209, "learning_rate": 1.9598974702253082e-06, "loss": 0.3326, "step": 42946 }, { "epoch": 0.797332570927845, "grad_norm": 0.33254486322402954, "learning_rate": 1.959203911577121e-06, "loss": 0.2906, "step": 42948 }, { "epoch": 0.7973697010652636, "grad_norm": 0.3816289007663727, "learning_rate": 1.958510462340083e-06, "loss": 0.2565, "step": 42950 }, { "epoch": 0.7974068312026823, "grad_norm": 0.5795354247093201, "learning_rate": 1.957817122523633e-06, "loss": 0.2944, "step": 42952 }, { "epoch": 0.7974439613401009, "grad_norm": 0.5652730464935303, "learning_rate": 1.9571238921372017e-06, "loss": 0.3168, "step": 42954 }, { "epoch": 0.7974810914775196, "grad_norm": 0.4556003212928772, "learning_rate": 1.956430771190224e-06, "loss": 0.3636, "step": 42956 }, { "epoch": 0.7975182216149382, "grad_norm": 0.35166963934898376, "learning_rate": 1.955737759692128e-06, "loss": 0.2805, "step": 42958 }, { "epoch": 0.7975553517523568, "grad_norm": 0.4868939220905304, "learning_rate": 1.955044857652346e-06, "loss": 0.2895, "step": 42960 }, { "epoch": 0.7975924818897755, "grad_norm": 0.47664740681648254, "learning_rate": 1.954352065080306e-06, "loss": 0.2442, "step": 42962 }, { "epoch": 0.7976296120271941, "grad_norm": 0.37350475788116455, "learning_rate": 1.9536593819854377e-06, "loss": 0.276, "step": 42964 }, { "epoch": 0.7976667421646128, "grad_norm": 0.30765920877456665, "learning_rate": 1.95296680837716e-06, "loss": 0.3317, "step": 42966 }, { "epoch": 0.7977038723020314, "grad_norm": 0.4172854423522949, "learning_rate": 1.9522743442649015e-06, "loss": 0.2897, "step": 42968 }, { "epoch": 0.79774100243945, "grad_norm": 0.4467003345489502, "learning_rate": 1.9515819896580835e-06, "loss": 0.3109, "step": 42970 }, { "epoch": 0.7977781325768687, "grad_norm": 0.26181867718696594, "learning_rate": 1.950889744566126e-06, "loss": 0.166, "step": 42972 }, { "epoch": 0.7978152627142873, "grad_norm": 0.40757080912590027, "learning_rate": 1.950197608998452e-06, "loss": 0.2841, "step": 42974 }, { "epoch": 0.797852392851706, "grad_norm": 0.24695859849452972, "learning_rate": 1.9495055829644748e-06, "loss": 0.2295, "step": 42976 }, { "epoch": 0.7978895229891246, "grad_norm": 0.6922567486763, "learning_rate": 1.9488136664736148e-06, "loss": 0.4111, "step": 42978 }, { "epoch": 0.7979266531265432, "grad_norm": 0.430816113948822, "learning_rate": 1.9481218595352814e-06, "loss": 0.4955, "step": 42980 }, { "epoch": 0.7979637832639619, "grad_norm": 0.25292617082595825, "learning_rate": 1.947430162158892e-06, "loss": 0.1351, "step": 42982 }, { "epoch": 0.7980009134013805, "grad_norm": 0.3127245604991913, "learning_rate": 1.946738574353858e-06, "loss": 0.3354, "step": 42984 }, { "epoch": 0.7980380435387991, "grad_norm": 0.3873004615306854, "learning_rate": 1.9460470961295887e-06, "loss": 0.2199, "step": 42986 }, { "epoch": 0.7980751736762178, "grad_norm": 0.6320559978485107, "learning_rate": 1.945355727495495e-06, "loss": 0.2128, "step": 42988 }, { "epoch": 0.7981123038136364, "grad_norm": 0.5764524936676025, "learning_rate": 1.9446644684609863e-06, "loss": 0.1694, "step": 42990 }, { "epoch": 0.7981494339510551, "grad_norm": 0.3776129186153412, "learning_rate": 1.9439733190354617e-06, "loss": 0.4406, "step": 42992 }, { "epoch": 0.7981865640884737, "grad_norm": 0.3860102891921997, "learning_rate": 1.9432822792283303e-06, "loss": 0.2736, "step": 42994 }, { "epoch": 0.7982236942258923, "grad_norm": 0.25898656249046326, "learning_rate": 1.942591349048998e-06, "loss": 0.3975, "step": 42996 }, { "epoch": 0.7982608243633109, "grad_norm": 0.6694298982620239, "learning_rate": 1.9419005285068584e-06, "loss": 0.3899, "step": 42998 }, { "epoch": 0.7982979545007296, "grad_norm": 0.37497520446777344, "learning_rate": 1.941209817611317e-06, "loss": 0.1908, "step": 43000 }, { "epoch": 0.7983350846381483, "grad_norm": 0.5986818671226501, "learning_rate": 1.9405192163717735e-06, "loss": 0.3549, "step": 43002 }, { "epoch": 0.7983722147755669, "grad_norm": 0.34583595395088196, "learning_rate": 1.93982872479762e-06, "loss": 0.065, "step": 43004 }, { "epoch": 0.7984093449129855, "grad_norm": 0.2747690975666046, "learning_rate": 1.9391383428982536e-06, "loss": 0.3264, "step": 43006 }, { "epoch": 0.7984464750504041, "grad_norm": 0.4840778112411499, "learning_rate": 1.93844807068307e-06, "loss": 0.2221, "step": 43008 }, { "epoch": 0.7984836051878228, "grad_norm": 0.24710655212402344, "learning_rate": 1.9377579081614615e-06, "loss": 0.2501, "step": 43010 }, { "epoch": 0.7985207353252415, "grad_norm": 0.3082912266254425, "learning_rate": 1.937067855342818e-06, "loss": 0.1341, "step": 43012 }, { "epoch": 0.79855786546266, "grad_norm": 0.21395233273506165, "learning_rate": 1.93637791223653e-06, "loss": 0.1109, "step": 43014 }, { "epoch": 0.7985949956000787, "grad_norm": 0.3201897442340851, "learning_rate": 1.9356880788519874e-06, "loss": 0.3512, "step": 43016 }, { "epoch": 0.7986321257374973, "grad_norm": 0.7574721574783325, "learning_rate": 1.934998355198575e-06, "loss": 0.2542, "step": 43018 }, { "epoch": 0.798669255874916, "grad_norm": 0.38197097182273865, "learning_rate": 1.9343087412856755e-06, "loss": 0.2005, "step": 43020 }, { "epoch": 0.7987063860123347, "grad_norm": 0.519420325756073, "learning_rate": 1.9336192371226735e-06, "loss": 0.1919, "step": 43022 }, { "epoch": 0.7987435161497533, "grad_norm": 0.22292166948318481, "learning_rate": 1.9329298427189525e-06, "loss": 0.3386, "step": 43024 }, { "epoch": 0.7987806462871719, "grad_norm": 0.1667504757642746, "learning_rate": 1.932240558083892e-06, "loss": 0.3664, "step": 43026 }, { "epoch": 0.7988177764245905, "grad_norm": 0.41540899872779846, "learning_rate": 1.9315513832268763e-06, "loss": 0.1933, "step": 43028 }, { "epoch": 0.7988549065620092, "grad_norm": 0.7609919905662537, "learning_rate": 1.930862318157274e-06, "loss": 0.5274, "step": 43030 }, { "epoch": 0.7988920366994279, "grad_norm": 0.40697553753852844, "learning_rate": 1.9301733628844666e-06, "loss": 0.3669, "step": 43032 }, { "epoch": 0.7989291668368464, "grad_norm": 0.34802672266960144, "learning_rate": 1.929484517417827e-06, "loss": 0.2315, "step": 43034 }, { "epoch": 0.7989662969742651, "grad_norm": 0.45419275760650635, "learning_rate": 1.928795781766729e-06, "loss": 0.107, "step": 43036 }, { "epoch": 0.7990034271116837, "grad_norm": 0.2737857401371002, "learning_rate": 1.9281071559405484e-06, "loss": 0.254, "step": 43038 }, { "epoch": 0.7990405572491024, "grad_norm": 0.3402908146381378, "learning_rate": 1.927418639948647e-06, "loss": 0.4026, "step": 43040 }, { "epoch": 0.799077687386521, "grad_norm": 0.2896142303943634, "learning_rate": 1.9267302338004012e-06, "loss": 0.1903, "step": 43042 }, { "epoch": 0.7991148175239396, "grad_norm": 0.4730847179889679, "learning_rate": 1.9260419375051732e-06, "loss": 0.4513, "step": 43044 }, { "epoch": 0.7991519476613583, "grad_norm": 0.33516234159469604, "learning_rate": 1.9253537510723284e-06, "loss": 0.2122, "step": 43046 }, { "epoch": 0.7991890777987769, "grad_norm": 0.45864537358283997, "learning_rate": 1.9246656745112337e-06, "loss": 0.2465, "step": 43048 }, { "epoch": 0.7992262079361956, "grad_norm": 0.305649071931839, "learning_rate": 1.923977707831249e-06, "loss": 0.4121, "step": 43050 }, { "epoch": 0.7992633380736142, "grad_norm": 0.29891708493232727, "learning_rate": 1.9232898510417397e-06, "loss": 0.1992, "step": 43052 }, { "epoch": 0.7993004682110328, "grad_norm": 0.21371294558048248, "learning_rate": 1.9226021041520636e-06, "loss": 0.1172, "step": 43054 }, { "epoch": 0.7993375983484515, "grad_norm": 0.38667210936546326, "learning_rate": 1.9219144671715774e-06, "loss": 0.5409, "step": 43056 }, { "epoch": 0.7993747284858701, "grad_norm": 0.29345375299453735, "learning_rate": 1.9212269401096372e-06, "loss": 0.3697, "step": 43058 }, { "epoch": 0.7994118586232888, "grad_norm": 0.5943610668182373, "learning_rate": 1.920539522975603e-06, "loss": 0.3014, "step": 43060 }, { "epoch": 0.7994489887607074, "grad_norm": 0.37347593903541565, "learning_rate": 1.9198522157788214e-06, "loss": 0.2693, "step": 43062 }, { "epoch": 0.799486118898126, "grad_norm": 0.15082262456417084, "learning_rate": 1.919165018528649e-06, "loss": 0.1237, "step": 43064 }, { "epoch": 0.7995232490355447, "grad_norm": 0.1935180425643921, "learning_rate": 1.9184779312344393e-06, "loss": 0.2395, "step": 43066 }, { "epoch": 0.7995603791729633, "grad_norm": 0.41766008734703064, "learning_rate": 1.917790953905534e-06, "loss": 0.4238, "step": 43068 }, { "epoch": 0.799597509310382, "grad_norm": 0.45843106508255005, "learning_rate": 1.917104086551286e-06, "loss": 0.2885, "step": 43070 }, { "epoch": 0.7996346394478006, "grad_norm": 0.5241666436195374, "learning_rate": 1.9164173291810394e-06, "loss": 0.3662, "step": 43072 }, { "epoch": 0.7996717695852192, "grad_norm": 0.4685191214084625, "learning_rate": 1.91573068180414e-06, "loss": 0.2577, "step": 43074 }, { "epoch": 0.7997088997226379, "grad_norm": 0.45422470569610596, "learning_rate": 1.9150441444299305e-06, "loss": 0.315, "step": 43076 }, { "epoch": 0.7997460298600565, "grad_norm": 0.8422712683677673, "learning_rate": 1.9143577170677564e-06, "loss": 0.2394, "step": 43078 }, { "epoch": 0.7997831599974752, "grad_norm": 0.5112595558166504, "learning_rate": 1.9136713997269517e-06, "loss": 0.1634, "step": 43080 }, { "epoch": 0.7998202901348938, "grad_norm": 0.38265547156333923, "learning_rate": 1.9129851924168594e-06, "loss": 0.4626, "step": 43082 }, { "epoch": 0.7998574202723124, "grad_norm": 0.5256198048591614, "learning_rate": 1.9122990951468134e-06, "loss": 0.4621, "step": 43084 }, { "epoch": 0.7998945504097311, "grad_norm": 0.3918473720550537, "learning_rate": 1.911613107926151e-06, "loss": 0.3365, "step": 43086 }, { "epoch": 0.7999316805471497, "grad_norm": 0.4054127335548401, "learning_rate": 1.910927230764208e-06, "loss": 0.2619, "step": 43088 }, { "epoch": 0.7999688106845684, "grad_norm": 0.4376373887062073, "learning_rate": 1.910241463670317e-06, "loss": 0.3786, "step": 43090 }, { "epoch": 0.800005940821987, "grad_norm": 0.5270215272903442, "learning_rate": 1.9095558066538056e-06, "loss": 0.2439, "step": 43092 }, { "epoch": 0.8000430709594056, "grad_norm": 0.6403298377990723, "learning_rate": 1.908870259724007e-06, "loss": 0.2846, "step": 43094 }, { "epoch": 0.8000802010968242, "grad_norm": 0.3347005844116211, "learning_rate": 1.9081848228902477e-06, "loss": 0.4026, "step": 43096 }, { "epoch": 0.8001173312342429, "grad_norm": 0.44083598256111145, "learning_rate": 1.9074994961618543e-06, "loss": 0.2276, "step": 43098 }, { "epoch": 0.8001544613716616, "grad_norm": 0.39511510729789734, "learning_rate": 1.9068142795481537e-06, "loss": 0.2694, "step": 43100 }, { "epoch": 0.8001915915090801, "grad_norm": 0.33721503615379333, "learning_rate": 1.9061291730584718e-06, "loss": 0.261, "step": 43102 }, { "epoch": 0.8002287216464988, "grad_norm": 0.39148256182670593, "learning_rate": 1.9054441767021282e-06, "loss": 0.2343, "step": 43104 }, { "epoch": 0.8002658517839174, "grad_norm": 0.38831228017807007, "learning_rate": 1.9047592904884393e-06, "loss": 0.2623, "step": 43106 }, { "epoch": 0.8003029819213361, "grad_norm": 0.2891169786453247, "learning_rate": 1.9040745144267293e-06, "loss": 0.4191, "step": 43108 }, { "epoch": 0.8003401120587548, "grad_norm": 0.46889522671699524, "learning_rate": 1.9033898485263159e-06, "loss": 0.345, "step": 43110 }, { "epoch": 0.8003772421961733, "grad_norm": 0.4684017598628998, "learning_rate": 1.9027052927965129e-06, "loss": 0.3747, "step": 43112 }, { "epoch": 0.800414372333592, "grad_norm": 0.3638305068016052, "learning_rate": 1.9020208472466372e-06, "loss": 0.1795, "step": 43114 }, { "epoch": 0.8004515024710106, "grad_norm": 0.4729718863964081, "learning_rate": 1.901336511886005e-06, "loss": 0.1291, "step": 43116 }, { "epoch": 0.8004886326084293, "grad_norm": 0.3358307182788849, "learning_rate": 1.9006522867239207e-06, "loss": 0.1718, "step": 43118 }, { "epoch": 0.800525762745848, "grad_norm": 0.5696702003479004, "learning_rate": 1.8999681717696993e-06, "loss": 0.2886, "step": 43120 }, { "epoch": 0.8005628928832665, "grad_norm": 0.450535386800766, "learning_rate": 1.8992841670326479e-06, "loss": 0.2308, "step": 43122 }, { "epoch": 0.8006000230206852, "grad_norm": 0.5439473986625671, "learning_rate": 1.8986002725220775e-06, "loss": 0.1825, "step": 43124 }, { "epoch": 0.8006371531581038, "grad_norm": 0.30289989709854126, "learning_rate": 1.897916488247289e-06, "loss": 0.1576, "step": 43126 }, { "epoch": 0.8006742832955225, "grad_norm": 0.367119163274765, "learning_rate": 1.8972328142175899e-06, "loss": 0.2993, "step": 43128 }, { "epoch": 0.8007114134329412, "grad_norm": 0.4330754280090332, "learning_rate": 1.89654925044228e-06, "loss": 0.3078, "step": 43130 }, { "epoch": 0.8007485435703597, "grad_norm": 0.5412703156471252, "learning_rate": 1.8958657969306615e-06, "loss": 0.2562, "step": 43132 }, { "epoch": 0.8007856737077784, "grad_norm": 0.8298667073249817, "learning_rate": 1.8951824536920349e-06, "loss": 0.2649, "step": 43134 }, { "epoch": 0.800822803845197, "grad_norm": 0.29756802320480347, "learning_rate": 1.8944992207356993e-06, "loss": 0.3362, "step": 43136 }, { "epoch": 0.8008599339826157, "grad_norm": 0.5000346899032593, "learning_rate": 1.8938160980709497e-06, "loss": 0.3412, "step": 43138 }, { "epoch": 0.8008970641200344, "grad_norm": 0.41299769282341003, "learning_rate": 1.8931330857070817e-06, "loss": 0.3147, "step": 43140 }, { "epoch": 0.8009341942574529, "grad_norm": 0.1876785159111023, "learning_rate": 1.8924501836533926e-06, "loss": 0.1849, "step": 43142 }, { "epoch": 0.8009713243948716, "grad_norm": 0.2150462567806244, "learning_rate": 1.8917673919191693e-06, "loss": 0.3636, "step": 43144 }, { "epoch": 0.8010084545322902, "grad_norm": 0.29601457715034485, "learning_rate": 1.8910847105137075e-06, "loss": 0.4118, "step": 43146 }, { "epoch": 0.8010455846697089, "grad_norm": 0.3665584623813629, "learning_rate": 1.8904021394462902e-06, "loss": 0.3455, "step": 43148 }, { "epoch": 0.8010827148071274, "grad_norm": 0.3078038990497589, "learning_rate": 1.8897196787262095e-06, "loss": 0.1855, "step": 43150 }, { "epoch": 0.8011198449445461, "grad_norm": 0.5149518847465515, "learning_rate": 1.8890373283627505e-06, "loss": 0.2345, "step": 43152 }, { "epoch": 0.8011569750819648, "grad_norm": 0.5233734250068665, "learning_rate": 1.8883550883652014e-06, "loss": 0.3004, "step": 43154 }, { "epoch": 0.8011941052193834, "grad_norm": 0.5146218538284302, "learning_rate": 1.88767295874284e-06, "loss": 0.37, "step": 43156 }, { "epoch": 0.8012312353568021, "grad_norm": 0.40019264817237854, "learning_rate": 1.8869909395049502e-06, "loss": 0.1849, "step": 43158 }, { "epoch": 0.8012683654942206, "grad_norm": 0.8153854608535767, "learning_rate": 1.8863090306608113e-06, "loss": 0.2683, "step": 43160 }, { "epoch": 0.8013054956316393, "grad_norm": 0.4583037197589874, "learning_rate": 1.8856272322197044e-06, "loss": 0.215, "step": 43162 }, { "epoch": 0.801342625769058, "grad_norm": 1.4215084314346313, "learning_rate": 1.884945544190908e-06, "loss": 0.0513, "step": 43164 }, { "epoch": 0.8013797559064766, "grad_norm": 0.22587573528289795, "learning_rate": 1.8842639665836926e-06, "loss": 0.31, "step": 43166 }, { "epoch": 0.8014168860438953, "grad_norm": 0.495988130569458, "learning_rate": 1.883582499407337e-06, "loss": 0.2364, "step": 43168 }, { "epoch": 0.8014540161813138, "grad_norm": 0.49853113293647766, "learning_rate": 1.8829011426711109e-06, "loss": 0.1589, "step": 43170 }, { "epoch": 0.8014911463187325, "grad_norm": 0.5496615767478943, "learning_rate": 1.8822198963842852e-06, "loss": 0.2869, "step": 43172 }, { "epoch": 0.8015282764561512, "grad_norm": 0.3180420994758606, "learning_rate": 1.8815387605561319e-06, "loss": 0.3316, "step": 43174 }, { "epoch": 0.8015654065935698, "grad_norm": 0.4430505931377411, "learning_rate": 1.8808577351959189e-06, "loss": 0.4357, "step": 43176 }, { "epoch": 0.8016025367309885, "grad_norm": 0.2955609858036041, "learning_rate": 1.8801768203129122e-06, "loss": 0.2116, "step": 43178 }, { "epoch": 0.801639666868407, "grad_norm": 0.46044185757637024, "learning_rate": 1.879496015916379e-06, "loss": 0.3636, "step": 43180 }, { "epoch": 0.8016767970058257, "grad_norm": 0.5946810245513916, "learning_rate": 1.8788153220155803e-06, "loss": 0.2224, "step": 43182 }, { "epoch": 0.8017139271432444, "grad_norm": 0.45550569891929626, "learning_rate": 1.878134738619778e-06, "loss": 0.2776, "step": 43184 }, { "epoch": 0.801751057280663, "grad_norm": 0.34014013409614563, "learning_rate": 1.8774542657382344e-06, "loss": 0.2012, "step": 43186 }, { "epoch": 0.8017881874180817, "grad_norm": 1.281672477722168, "learning_rate": 1.8767739033802113e-06, "loss": 0.1994, "step": 43188 }, { "epoch": 0.8018253175555002, "grad_norm": 0.41418516635894775, "learning_rate": 1.876093651554961e-06, "loss": 0.2363, "step": 43190 }, { "epoch": 0.8018624476929189, "grad_norm": 0.42823171615600586, "learning_rate": 1.8754135102717442e-06, "loss": 0.3085, "step": 43192 }, { "epoch": 0.8018995778303375, "grad_norm": 0.4472355246543884, "learning_rate": 1.874733479539811e-06, "loss": 0.2975, "step": 43194 }, { "epoch": 0.8019367079677562, "grad_norm": 0.36870241165161133, "learning_rate": 1.8740535593684184e-06, "loss": 0.2924, "step": 43196 }, { "epoch": 0.8019738381051749, "grad_norm": 0.8169774413108826, "learning_rate": 1.8733737497668158e-06, "loss": 0.1596, "step": 43198 }, { "epoch": 0.8020109682425934, "grad_norm": 0.3381081521511078, "learning_rate": 1.8726940507442536e-06, "loss": 0.1528, "step": 43200 }, { "epoch": 0.8020480983800121, "grad_norm": 0.5653008222579956, "learning_rate": 1.8720144623099823e-06, "loss": 0.3512, "step": 43202 }, { "epoch": 0.8020852285174307, "grad_norm": 0.19422206282615662, "learning_rate": 1.871334984473251e-06, "loss": 0.1944, "step": 43204 }, { "epoch": 0.8021223586548494, "grad_norm": 0.24484436213970184, "learning_rate": 1.8706556172432988e-06, "loss": 0.2368, "step": 43206 }, { "epoch": 0.8021594887922681, "grad_norm": 0.35575175285339355, "learning_rate": 1.8699763606293742e-06, "loss": 0.2693, "step": 43208 }, { "epoch": 0.8021966189296866, "grad_norm": 0.5147548317909241, "learning_rate": 1.8692972146407219e-06, "loss": 0.2344, "step": 43210 }, { "epoch": 0.8022337490671053, "grad_norm": 0.38157403469085693, "learning_rate": 1.8686181792865764e-06, "loss": 0.3073, "step": 43212 }, { "epoch": 0.8022708792045239, "grad_norm": 0.6146352291107178, "learning_rate": 1.8679392545761821e-06, "loss": 0.2166, "step": 43214 }, { "epoch": 0.8023080093419426, "grad_norm": 0.5257559418678284, "learning_rate": 1.8672604405187755e-06, "loss": 0.2163, "step": 43216 }, { "epoch": 0.8023451394793613, "grad_norm": 0.4373333752155304, "learning_rate": 1.8665817371235973e-06, "loss": 0.2744, "step": 43218 }, { "epoch": 0.8023822696167798, "grad_norm": 0.3160369396209717, "learning_rate": 1.8659031443998766e-06, "loss": 0.2421, "step": 43220 }, { "epoch": 0.8024193997541985, "grad_norm": 0.41223639249801636, "learning_rate": 1.8652246623568493e-06, "loss": 0.3403, "step": 43222 }, { "epoch": 0.8024565298916171, "grad_norm": 0.5229523777961731, "learning_rate": 1.8645462910037481e-06, "loss": 0.3453, "step": 43224 }, { "epoch": 0.8024936600290358, "grad_norm": 0.4297162890434265, "learning_rate": 1.8638680303498036e-06, "loss": 0.2659, "step": 43226 }, { "epoch": 0.8025307901664545, "grad_norm": 0.3684268295764923, "learning_rate": 1.863189880404247e-06, "loss": 0.3765, "step": 43228 }, { "epoch": 0.802567920303873, "grad_norm": 0.24633879959583282, "learning_rate": 1.862511841176301e-06, "loss": 0.214, "step": 43230 }, { "epoch": 0.8026050504412917, "grad_norm": 0.5089553594589233, "learning_rate": 1.8618339126751961e-06, "loss": 0.2495, "step": 43232 }, { "epoch": 0.8026421805787103, "grad_norm": 0.5402042865753174, "learning_rate": 1.8611560949101537e-06, "loss": 0.388, "step": 43234 }, { "epoch": 0.802679310716129, "grad_norm": 0.24427495896816254, "learning_rate": 1.8604783878903975e-06, "loss": 0.2162, "step": 43236 }, { "epoch": 0.8027164408535477, "grad_norm": 0.6427628993988037, "learning_rate": 1.8598007916251503e-06, "loss": 0.1537, "step": 43238 }, { "epoch": 0.8027535709909662, "grad_norm": 0.19967681169509888, "learning_rate": 1.8591233061236301e-06, "loss": 0.289, "step": 43240 }, { "epoch": 0.8027907011283849, "grad_norm": 0.22936366498470306, "learning_rate": 1.858445931395061e-06, "loss": 0.122, "step": 43242 }, { "epoch": 0.8028278312658035, "grad_norm": 0.4564876854419708, "learning_rate": 1.8577686674486528e-06, "loss": 0.3432, "step": 43244 }, { "epoch": 0.8028649614032222, "grad_norm": 0.3696427047252655, "learning_rate": 1.8570915142936251e-06, "loss": 0.2891, "step": 43246 }, { "epoch": 0.8029020915406407, "grad_norm": 0.3821374475955963, "learning_rate": 1.8564144719391897e-06, "loss": 0.2975, "step": 43248 }, { "epoch": 0.8029392216780594, "grad_norm": 0.33499541878700256, "learning_rate": 1.8557375403945643e-06, "loss": 0.1411, "step": 43250 }, { "epoch": 0.8029763518154781, "grad_norm": 0.5015783309936523, "learning_rate": 1.855060719668953e-06, "loss": 0.2137, "step": 43252 }, { "epoch": 0.8030134819528967, "grad_norm": 0.34600141644477844, "learning_rate": 1.8543840097715705e-06, "loss": 0.2724, "step": 43254 }, { "epoch": 0.8030506120903154, "grad_norm": 0.2666163742542267, "learning_rate": 1.8537074107116205e-06, "loss": 0.2577, "step": 43256 }, { "epoch": 0.8030877422277339, "grad_norm": 0.655259907245636, "learning_rate": 1.8530309224983123e-06, "loss": 0.2425, "step": 43258 }, { "epoch": 0.8031248723651526, "grad_norm": 0.40070047974586487, "learning_rate": 1.8523545451408497e-06, "loss": 0.3429, "step": 43260 }, { "epoch": 0.8031620025025713, "grad_norm": 0.6754332184791565, "learning_rate": 1.8516782786484367e-06, "loss": 0.3918, "step": 43262 }, { "epoch": 0.8031991326399899, "grad_norm": 0.2892024517059326, "learning_rate": 1.8510021230302744e-06, "loss": 0.2347, "step": 43264 }, { "epoch": 0.8032362627774086, "grad_norm": 0.25573599338531494, "learning_rate": 1.8503260782955656e-06, "loss": 0.2774, "step": 43266 }, { "epoch": 0.8032733929148271, "grad_norm": 0.295149028301239, "learning_rate": 1.8496501444535097e-06, "loss": 0.2412, "step": 43268 }, { "epoch": 0.8033105230522458, "grad_norm": 0.6195911765098572, "learning_rate": 1.8489743215132993e-06, "loss": 0.4111, "step": 43270 }, { "epoch": 0.8033476531896645, "grad_norm": 0.3579482138156891, "learning_rate": 1.848298609484136e-06, "loss": 0.3796, "step": 43272 }, { "epoch": 0.8033847833270831, "grad_norm": 0.4425990581512451, "learning_rate": 1.8476230083752088e-06, "loss": 0.194, "step": 43274 }, { "epoch": 0.8034219134645018, "grad_norm": 0.5649939775466919, "learning_rate": 1.8469475181957142e-06, "loss": 0.3571, "step": 43276 }, { "epoch": 0.8034590436019203, "grad_norm": 0.5413402318954468, "learning_rate": 1.8462721389548422e-06, "loss": 0.0872, "step": 43278 }, { "epoch": 0.803496173739339, "grad_norm": 0.3577542006969452, "learning_rate": 1.8455968706617854e-06, "loss": 0.203, "step": 43280 }, { "epoch": 0.8035333038767577, "grad_norm": 0.4656463861465454, "learning_rate": 1.844921713325727e-06, "loss": 0.3137, "step": 43282 }, { "epoch": 0.8035704340141763, "grad_norm": 0.3261988162994385, "learning_rate": 1.8442466669558578e-06, "loss": 0.2861, "step": 43284 }, { "epoch": 0.803607564151595, "grad_norm": 0.30262404680252075, "learning_rate": 1.843571731561361e-06, "loss": 0.389, "step": 43286 }, { "epoch": 0.8036446942890135, "grad_norm": 0.3228495717048645, "learning_rate": 1.8428969071514225e-06, "loss": 0.3675, "step": 43288 }, { "epoch": 0.8036818244264322, "grad_norm": 0.3985765874385834, "learning_rate": 1.8422221937352236e-06, "loss": 0.2045, "step": 43290 }, { "epoch": 0.8037189545638509, "grad_norm": 0.32033851742744446, "learning_rate": 1.841547591321947e-06, "loss": 0.2345, "step": 43292 }, { "epoch": 0.8037560847012695, "grad_norm": 0.6886205077171326, "learning_rate": 1.840873099920769e-06, "loss": 0.1642, "step": 43294 }, { "epoch": 0.8037932148386882, "grad_norm": 0.39457011222839355, "learning_rate": 1.84019871954087e-06, "loss": 0.1899, "step": 43296 }, { "epoch": 0.8038303449761067, "grad_norm": 0.5090628862380981, "learning_rate": 1.8395244501914234e-06, "loss": 0.2008, "step": 43298 }, { "epoch": 0.8038674751135254, "grad_norm": 0.4681437313556671, "learning_rate": 1.8388502918816053e-06, "loss": 0.3015, "step": 43300 }, { "epoch": 0.803904605250944, "grad_norm": 0.38829556107521057, "learning_rate": 1.8381762446205887e-06, "loss": 0.2017, "step": 43302 }, { "epoch": 0.8039417353883627, "grad_norm": 0.25878778100013733, "learning_rate": 1.8375023084175469e-06, "loss": 0.2045, "step": 43304 }, { "epoch": 0.8039788655257813, "grad_norm": 0.3184488117694855, "learning_rate": 1.8368284832816508e-06, "loss": 0.2564, "step": 43306 }, { "epoch": 0.8040159956631999, "grad_norm": 0.4944175183773041, "learning_rate": 1.836154769222066e-06, "loss": 0.2729, "step": 43308 }, { "epoch": 0.8040531258006186, "grad_norm": 0.39799389243125916, "learning_rate": 1.83548116624796e-06, "loss": 0.2656, "step": 43310 }, { "epoch": 0.8040902559380372, "grad_norm": 0.47483983635902405, "learning_rate": 1.8348076743685005e-06, "loss": 0.3832, "step": 43312 }, { "epoch": 0.8041273860754559, "grad_norm": 0.4117342531681061, "learning_rate": 1.8341342935928542e-06, "loss": 0.4255, "step": 43314 }, { "epoch": 0.8041645162128745, "grad_norm": 0.40704625844955444, "learning_rate": 1.8334610239301765e-06, "loss": 0.1167, "step": 43316 }, { "epoch": 0.8042016463502931, "grad_norm": 0.38332149386405945, "learning_rate": 1.8327878653896358e-06, "loss": 0.2752, "step": 43318 }, { "epoch": 0.8042387764877118, "grad_norm": 0.43960854411125183, "learning_rate": 1.8321148179803871e-06, "loss": 0.4366, "step": 43320 }, { "epoch": 0.8042759066251304, "grad_norm": 0.5045108199119568, "learning_rate": 1.8314418817115887e-06, "loss": 0.0976, "step": 43322 }, { "epoch": 0.8043130367625491, "grad_norm": 0.41069746017456055, "learning_rate": 1.8307690565923986e-06, "loss": 0.2607, "step": 43324 }, { "epoch": 0.8043501668999677, "grad_norm": 0.518173336982727, "learning_rate": 1.830096342631973e-06, "loss": 0.3668, "step": 43326 }, { "epoch": 0.8043872970373863, "grad_norm": 0.30913227796554565, "learning_rate": 1.8294237398394643e-06, "loss": 0.2582, "step": 43328 }, { "epoch": 0.804424427174805, "grad_norm": 0.3206116855144501, "learning_rate": 1.8287512482240266e-06, "loss": 0.143, "step": 43330 }, { "epoch": 0.8044615573122236, "grad_norm": 0.48731645941734314, "learning_rate": 1.8280788677948068e-06, "loss": 0.2488, "step": 43332 }, { "epoch": 0.8044986874496423, "grad_norm": 0.3795047402381897, "learning_rate": 1.8274065985609557e-06, "loss": 0.346, "step": 43334 }, { "epoch": 0.8045358175870609, "grad_norm": 0.3395436108112335, "learning_rate": 1.8267344405316235e-06, "loss": 0.2858, "step": 43336 }, { "epoch": 0.8045729477244795, "grad_norm": 0.3880978524684906, "learning_rate": 1.8260623937159506e-06, "loss": 0.1366, "step": 43338 }, { "epoch": 0.8046100778618982, "grad_norm": 0.5826380848884583, "learning_rate": 1.8253904581230851e-06, "loss": 0.2398, "step": 43340 }, { "epoch": 0.8046472079993168, "grad_norm": 0.3195175528526306, "learning_rate": 1.8247186337621702e-06, "loss": 0.4254, "step": 43342 }, { "epoch": 0.8046843381367355, "grad_norm": 0.4237681031227112, "learning_rate": 1.8240469206423495e-06, "loss": 0.2074, "step": 43344 }, { "epoch": 0.804721468274154, "grad_norm": 0.47419479489326477, "learning_rate": 1.8233753187727577e-06, "loss": 0.3294, "step": 43346 }, { "epoch": 0.8047585984115727, "grad_norm": 0.7384077310562134, "learning_rate": 1.8227038281625353e-06, "loss": 0.3279, "step": 43348 }, { "epoch": 0.8047957285489914, "grad_norm": 0.7821800708770752, "learning_rate": 1.8220324488208207e-06, "loss": 0.2406, "step": 43350 }, { "epoch": 0.80483285868641, "grad_norm": 0.27602943778038025, "learning_rate": 1.8213611807567488e-06, "loss": 0.1983, "step": 43352 }, { "epoch": 0.8048699888238287, "grad_norm": 0.4104331433773041, "learning_rate": 1.8206900239794533e-06, "loss": 0.4232, "step": 43354 }, { "epoch": 0.8049071189612472, "grad_norm": 0.4724610149860382, "learning_rate": 1.8200189784980683e-06, "loss": 0.1915, "step": 43356 }, { "epoch": 0.8049442490986659, "grad_norm": 0.4686844050884247, "learning_rate": 1.8193480443217238e-06, "loss": 0.2924, "step": 43358 }, { "epoch": 0.8049813792360846, "grad_norm": 0.3730349540710449, "learning_rate": 1.818677221459546e-06, "loss": 0.1122, "step": 43360 }, { "epoch": 0.8050185093735032, "grad_norm": 0.3631017804145813, "learning_rate": 1.8180065099206657e-06, "loss": 0.406, "step": 43362 }, { "epoch": 0.8050556395109218, "grad_norm": 0.41883212327957153, "learning_rate": 1.8173359097142085e-06, "loss": 0.1862, "step": 43364 }, { "epoch": 0.8050927696483404, "grad_norm": 0.1606208086013794, "learning_rate": 1.8166654208492994e-06, "loss": 0.2055, "step": 43366 }, { "epoch": 0.8051298997857591, "grad_norm": 0.3141595721244812, "learning_rate": 1.8159950433350648e-06, "loss": 0.1557, "step": 43368 }, { "epoch": 0.8051670299231778, "grad_norm": 0.48711565136909485, "learning_rate": 1.8153247771806215e-06, "loss": 0.2681, "step": 43370 }, { "epoch": 0.8052041600605964, "grad_norm": 0.2692526876926422, "learning_rate": 1.8146546223950912e-06, "loss": 0.45, "step": 43372 }, { "epoch": 0.805241290198015, "grad_norm": 0.19378980994224548, "learning_rate": 1.8139845789875943e-06, "loss": 0.2439, "step": 43374 }, { "epoch": 0.8052784203354336, "grad_norm": 0.4063666760921478, "learning_rate": 1.8133146469672469e-06, "loss": 0.3842, "step": 43376 }, { "epoch": 0.8053155504728523, "grad_norm": 0.4238714873790741, "learning_rate": 1.8126448263431673e-06, "loss": 0.2104, "step": 43378 }, { "epoch": 0.805352680610271, "grad_norm": 0.47500571608543396, "learning_rate": 1.8119751171244681e-06, "loss": 0.4069, "step": 43380 }, { "epoch": 0.8053898107476896, "grad_norm": 0.34225329756736755, "learning_rate": 1.81130551932026e-06, "loss": 0.1589, "step": 43382 }, { "epoch": 0.8054269408851082, "grad_norm": 0.47826430201530457, "learning_rate": 1.8106360329396544e-06, "loss": 0.2706, "step": 43384 }, { "epoch": 0.8054640710225268, "grad_norm": 0.49866336584091187, "learning_rate": 1.809966657991763e-06, "loss": 0.2475, "step": 43386 }, { "epoch": 0.8055012011599455, "grad_norm": 0.3814980089664459, "learning_rate": 1.809297394485694e-06, "loss": 0.1413, "step": 43388 }, { "epoch": 0.8055383312973642, "grad_norm": 0.31049486994743347, "learning_rate": 1.8086282424305525e-06, "loss": 0.2359, "step": 43390 }, { "epoch": 0.8055754614347828, "grad_norm": 0.5000649690628052, "learning_rate": 1.8079592018354453e-06, "loss": 0.22, "step": 43392 }, { "epoch": 0.8056125915722014, "grad_norm": 0.6004167795181274, "learning_rate": 1.8072902727094777e-06, "loss": 0.1379, "step": 43394 }, { "epoch": 0.80564972170962, "grad_norm": 0.335161030292511, "learning_rate": 1.8066214550617466e-06, "loss": 0.4015, "step": 43396 }, { "epoch": 0.8056868518470387, "grad_norm": 0.37503138184547424, "learning_rate": 1.8059527489013551e-06, "loss": 0.2933, "step": 43398 }, { "epoch": 0.8057239819844573, "grad_norm": 0.27074676752090454, "learning_rate": 1.8052841542374056e-06, "loss": 0.3332, "step": 43400 }, { "epoch": 0.805761112121876, "grad_norm": 0.4484935998916626, "learning_rate": 1.8046156710789907e-06, "loss": 0.4415, "step": 43402 }, { "epoch": 0.8057982422592946, "grad_norm": 0.3852640688419342, "learning_rate": 1.803947299435208e-06, "loss": 0.1681, "step": 43404 }, { "epoch": 0.8058353723967132, "grad_norm": 0.3704209625720978, "learning_rate": 1.8032790393151556e-06, "loss": 0.0885, "step": 43406 }, { "epoch": 0.8058725025341319, "grad_norm": 0.3663286864757538, "learning_rate": 1.8026108907279204e-06, "loss": 0.2001, "step": 43408 }, { "epoch": 0.8059096326715505, "grad_norm": 0.7199390530586243, "learning_rate": 1.8019428536825978e-06, "loss": 0.4433, "step": 43410 }, { "epoch": 0.8059467628089692, "grad_norm": 0.31049951910972595, "learning_rate": 1.8012749281882759e-06, "loss": 0.2676, "step": 43412 }, { "epoch": 0.8059838929463878, "grad_norm": 0.403421014547348, "learning_rate": 1.8006071142540448e-06, "loss": 0.2044, "step": 43414 }, { "epoch": 0.8060210230838064, "grad_norm": 0.3136270344257355, "learning_rate": 1.7999394118889901e-06, "loss": 0.1751, "step": 43416 }, { "epoch": 0.8060581532212251, "grad_norm": 0.26560845971107483, "learning_rate": 1.799271821102201e-06, "loss": 0.1704, "step": 43418 }, { "epoch": 0.8060952833586437, "grad_norm": 0.45790261030197144, "learning_rate": 1.7986043419027565e-06, "loss": 0.3555, "step": 43420 }, { "epoch": 0.8061324134960623, "grad_norm": 0.327329158782959, "learning_rate": 1.7979369742997421e-06, "loss": 0.1462, "step": 43422 }, { "epoch": 0.806169543633481, "grad_norm": 0.2910301983356476, "learning_rate": 1.797269718302236e-06, "loss": 0.3186, "step": 43424 }, { "epoch": 0.8062066737708996, "grad_norm": 0.7805314064025879, "learning_rate": 1.7966025739193194e-06, "loss": 0.2915, "step": 43426 }, { "epoch": 0.8062438039083183, "grad_norm": 0.3197399377822876, "learning_rate": 1.7959355411600688e-06, "loss": 0.3809, "step": 43428 }, { "epoch": 0.8062809340457369, "grad_norm": 0.2715917229652405, "learning_rate": 1.7952686200335612e-06, "loss": 0.2173, "step": 43430 }, { "epoch": 0.8063180641831555, "grad_norm": 0.28683269023895264, "learning_rate": 1.794601810548875e-06, "loss": 0.1731, "step": 43432 }, { "epoch": 0.8063551943205742, "grad_norm": 0.31278592348098755, "learning_rate": 1.7939351127150773e-06, "loss": 0.1094, "step": 43434 }, { "epoch": 0.8063923244579928, "grad_norm": 0.6151852011680603, "learning_rate": 1.793268526541242e-06, "loss": 0.4134, "step": 43436 }, { "epoch": 0.8064294545954115, "grad_norm": 0.6536759734153748, "learning_rate": 1.7926020520364407e-06, "loss": 0.3735, "step": 43438 }, { "epoch": 0.8064665847328301, "grad_norm": 0.5904718041419983, "learning_rate": 1.791935689209745e-06, "loss": 0.445, "step": 43440 }, { "epoch": 0.8065037148702487, "grad_norm": 0.5025285482406616, "learning_rate": 1.7912694380702144e-06, "loss": 0.0996, "step": 43442 }, { "epoch": 0.8065408450076674, "grad_norm": 0.5815796256065369, "learning_rate": 1.7906032986269216e-06, "loss": 0.2888, "step": 43444 }, { "epoch": 0.806577975145086, "grad_norm": 0.27919381856918335, "learning_rate": 1.7899372708889262e-06, "loss": 0.1935, "step": 43446 }, { "epoch": 0.8066151052825047, "grad_norm": 0.6288081407546997, "learning_rate": 1.7892713548652918e-06, "loss": 0.3838, "step": 43448 }, { "epoch": 0.8066522354199233, "grad_norm": 0.37158867716789246, "learning_rate": 1.7886055505650812e-06, "loss": 0.2229, "step": 43450 }, { "epoch": 0.8066893655573419, "grad_norm": 0.35470694303512573, "learning_rate": 1.7879398579973518e-06, "loss": 0.3228, "step": 43452 }, { "epoch": 0.8067264956947605, "grad_norm": 0.34727033972740173, "learning_rate": 1.7872742771711638e-06, "loss": 0.2563, "step": 43454 }, { "epoch": 0.8067636258321792, "grad_norm": 0.49045348167419434, "learning_rate": 1.7866088080955758e-06, "loss": 0.2129, "step": 43456 }, { "epoch": 0.8068007559695979, "grad_norm": 0.3321934938430786, "learning_rate": 1.7859434507796368e-06, "loss": 0.2538, "step": 43458 }, { "epoch": 0.8068378861070165, "grad_norm": 0.3904259502887726, "learning_rate": 1.7852782052324035e-06, "loss": 0.3278, "step": 43460 }, { "epoch": 0.8068750162444351, "grad_norm": 0.49974340200424194, "learning_rate": 1.7846130714629284e-06, "loss": 0.2433, "step": 43462 }, { "epoch": 0.8069121463818537, "grad_norm": 0.3082742989063263, "learning_rate": 1.783948049480263e-06, "loss": 0.2594, "step": 43464 }, { "epoch": 0.8069492765192724, "grad_norm": 0.42806223034858704, "learning_rate": 1.7832831392934536e-06, "loss": 0.254, "step": 43466 }, { "epoch": 0.8069864066566911, "grad_norm": 0.3587949275970459, "learning_rate": 1.7826183409115471e-06, "loss": 0.3253, "step": 43468 }, { "epoch": 0.8070235367941097, "grad_norm": 0.4119024872779846, "learning_rate": 1.7819536543435945e-06, "loss": 0.4468, "step": 43470 }, { "epoch": 0.8070606669315283, "grad_norm": 0.32273563742637634, "learning_rate": 1.781289079598635e-06, "loss": 0.2009, "step": 43472 }, { "epoch": 0.8070977970689469, "grad_norm": 0.6856324076652527, "learning_rate": 1.7806246166857122e-06, "loss": 0.2399, "step": 43474 }, { "epoch": 0.8071349272063656, "grad_norm": 0.43645039200782776, "learning_rate": 1.7799602656138681e-06, "loss": 0.2001, "step": 43476 }, { "epoch": 0.8071720573437843, "grad_norm": 0.32688260078430176, "learning_rate": 1.7792960263921433e-06, "loss": 0.2047, "step": 43478 }, { "epoch": 0.8072091874812029, "grad_norm": 0.3702280819416046, "learning_rate": 1.7786318990295758e-06, "loss": 0.2412, "step": 43480 }, { "epoch": 0.8072463176186215, "grad_norm": 0.30157771706581116, "learning_rate": 1.7779678835352054e-06, "loss": 0.3085, "step": 43482 }, { "epoch": 0.8072834477560401, "grad_norm": 0.4237672686576843, "learning_rate": 1.7773039799180614e-06, "loss": 0.5321, "step": 43484 }, { "epoch": 0.8073205778934588, "grad_norm": 0.47945550084114075, "learning_rate": 1.776640188187183e-06, "loss": 0.3135, "step": 43486 }, { "epoch": 0.8073577080308775, "grad_norm": 0.29122671484947205, "learning_rate": 1.7759765083515967e-06, "loss": 0.2092, "step": 43488 }, { "epoch": 0.807394838168296, "grad_norm": 0.3656920790672302, "learning_rate": 1.7753129404203372e-06, "loss": 0.3622, "step": 43490 }, { "epoch": 0.8074319683057147, "grad_norm": 0.41408294439315796, "learning_rate": 1.774649484402432e-06, "loss": 0.1361, "step": 43492 }, { "epoch": 0.8074690984431333, "grad_norm": 0.25159546732902527, "learning_rate": 1.7739861403069124e-06, "loss": 0.2163, "step": 43494 }, { "epoch": 0.807506228580552, "grad_norm": 0.2922087013721466, "learning_rate": 1.7733229081427995e-06, "loss": 0.2089, "step": 43496 }, { "epoch": 0.8075433587179706, "grad_norm": 0.4318699836730957, "learning_rate": 1.7726597879191198e-06, "loss": 0.297, "step": 43498 }, { "epoch": 0.8075804888553892, "grad_norm": 0.5053518414497375, "learning_rate": 1.7719967796448968e-06, "loss": 0.3329, "step": 43500 }, { "epoch": 0.8076176189928079, "grad_norm": 0.5303958654403687, "learning_rate": 1.7713338833291516e-06, "loss": 0.281, "step": 43502 }, { "epoch": 0.8076547491302265, "grad_norm": 0.3132563829421997, "learning_rate": 1.7706710989809074e-06, "loss": 0.1988, "step": 43504 }, { "epoch": 0.8076918792676452, "grad_norm": 0.3681938052177429, "learning_rate": 1.7700084266091766e-06, "loss": 0.1013, "step": 43506 }, { "epoch": 0.8077290094050638, "grad_norm": 0.379341721534729, "learning_rate": 1.7693458662229824e-06, "loss": 0.2417, "step": 43508 }, { "epoch": 0.8077661395424824, "grad_norm": 0.2612236738204956, "learning_rate": 1.768683417831335e-06, "loss": 0.3124, "step": 43510 }, { "epoch": 0.8078032696799011, "grad_norm": 0.3381321132183075, "learning_rate": 1.7680210814432508e-06, "loss": 0.2379, "step": 43512 }, { "epoch": 0.8078403998173197, "grad_norm": 0.3066912889480591, "learning_rate": 1.7673588570677414e-06, "loss": 0.1938, "step": 43514 }, { "epoch": 0.8078775299547384, "grad_norm": 0.4357501268386841, "learning_rate": 1.7666967447138184e-06, "loss": 0.3481, "step": 43516 }, { "epoch": 0.807914660092157, "grad_norm": 0.25423452258110046, "learning_rate": 1.7660347443904912e-06, "loss": 0.4242, "step": 43518 }, { "epoch": 0.8079517902295756, "grad_norm": 0.255190908908844, "learning_rate": 1.7653728561067707e-06, "loss": 0.1953, "step": 43520 }, { "epoch": 0.8079889203669943, "grad_norm": 0.455807089805603, "learning_rate": 1.7647110798716571e-06, "loss": 0.104, "step": 43522 }, { "epoch": 0.8080260505044129, "grad_norm": 0.3583429753780365, "learning_rate": 1.7640494156941579e-06, "loss": 0.1357, "step": 43524 }, { "epoch": 0.8080631806418316, "grad_norm": 0.3683110475540161, "learning_rate": 1.763387863583279e-06, "loss": 0.1254, "step": 43526 }, { "epoch": 0.8081003107792502, "grad_norm": 0.35647693276405334, "learning_rate": 1.762726423548018e-06, "loss": 0.4962, "step": 43528 }, { "epoch": 0.8081374409166688, "grad_norm": 0.38255202770233154, "learning_rate": 1.762065095597376e-06, "loss": 0.1685, "step": 43530 }, { "epoch": 0.8081745710540875, "grad_norm": 0.36853960156440735, "learning_rate": 1.7614038797403555e-06, "loss": 0.3672, "step": 43532 }, { "epoch": 0.8082117011915061, "grad_norm": 0.518334150314331, "learning_rate": 1.7607427759859487e-06, "loss": 0.2918, "step": 43534 }, { "epoch": 0.8082488313289248, "grad_norm": 0.23984551429748535, "learning_rate": 1.7600817843431528e-06, "loss": 0.1752, "step": 43536 }, { "epoch": 0.8082859614663434, "grad_norm": 0.39685407280921936, "learning_rate": 1.7594209048209632e-06, "loss": 0.2374, "step": 43538 }, { "epoch": 0.808323091603762, "grad_norm": 0.4364679455757141, "learning_rate": 1.7587601374283703e-06, "loss": 0.1603, "step": 43540 }, { "epoch": 0.8083602217411807, "grad_norm": 0.41620877385139465, "learning_rate": 1.7580994821743668e-06, "loss": 0.2609, "step": 43542 }, { "epoch": 0.8083973518785993, "grad_norm": 0.19475655257701874, "learning_rate": 1.7574389390679458e-06, "loss": 0.2037, "step": 43544 }, { "epoch": 0.808434482016018, "grad_norm": 0.31679633259773254, "learning_rate": 1.7567785081180876e-06, "loss": 0.3769, "step": 43546 }, { "epoch": 0.8084716121534365, "grad_norm": 0.4953288435935974, "learning_rate": 1.7561181893337865e-06, "loss": 0.2136, "step": 43548 }, { "epoch": 0.8085087422908552, "grad_norm": 0.5830520391464233, "learning_rate": 1.7554579827240192e-06, "loss": 0.3411, "step": 43550 }, { "epoch": 0.8085458724282738, "grad_norm": 0.3301014006137848, "learning_rate": 1.7547978882977745e-06, "loss": 0.2588, "step": 43552 }, { "epoch": 0.8085830025656925, "grad_norm": 0.4469843804836273, "learning_rate": 1.754137906064034e-06, "loss": 0.2422, "step": 43554 }, { "epoch": 0.8086201327031112, "grad_norm": 0.3511788845062256, "learning_rate": 1.753478036031776e-06, "loss": 0.2667, "step": 43556 }, { "epoch": 0.8086572628405297, "grad_norm": 0.4620882272720337, "learning_rate": 1.7528182782099845e-06, "loss": 0.2264, "step": 43558 }, { "epoch": 0.8086943929779484, "grad_norm": 0.3282831609249115, "learning_rate": 1.75215863260763e-06, "loss": 0.3191, "step": 43560 }, { "epoch": 0.808731523115367, "grad_norm": 0.6383742094039917, "learning_rate": 1.7514990992336912e-06, "loss": 0.4763, "step": 43562 }, { "epoch": 0.8087686532527857, "grad_norm": 0.39464277029037476, "learning_rate": 1.7508396780971426e-06, "loss": 0.2699, "step": 43564 }, { "epoch": 0.8088057833902044, "grad_norm": 0.3656119704246521, "learning_rate": 1.7501803692069575e-06, "loss": 0.3724, "step": 43566 }, { "epoch": 0.8088429135276229, "grad_norm": 0.3517090380191803, "learning_rate": 1.7495211725721084e-06, "loss": 0.2025, "step": 43568 }, { "epoch": 0.8088800436650416, "grad_norm": 0.37723904848098755, "learning_rate": 1.7488620882015606e-06, "loss": 0.1969, "step": 43570 }, { "epoch": 0.8089171738024602, "grad_norm": 0.5901374816894531, "learning_rate": 1.7482031161042868e-06, "loss": 0.2981, "step": 43572 }, { "epoch": 0.8089543039398789, "grad_norm": 0.24585402011871338, "learning_rate": 1.7475442562892497e-06, "loss": 0.2039, "step": 43574 }, { "epoch": 0.8089914340772976, "grad_norm": 0.31929776072502136, "learning_rate": 1.7468855087654168e-06, "loss": 0.3258, "step": 43576 }, { "epoch": 0.8090285642147161, "grad_norm": 0.36173340678215027, "learning_rate": 1.746226873541751e-06, "loss": 0.3943, "step": 43578 }, { "epoch": 0.8090656943521348, "grad_norm": 0.518196165561676, "learning_rate": 1.7455683506272137e-06, "loss": 0.3303, "step": 43580 }, { "epoch": 0.8091028244895534, "grad_norm": 0.3349676728248596, "learning_rate": 1.744909940030769e-06, "loss": 0.3504, "step": 43582 }, { "epoch": 0.8091399546269721, "grad_norm": 0.39362040162086487, "learning_rate": 1.7442516417613708e-06, "loss": 0.3603, "step": 43584 }, { "epoch": 0.8091770847643908, "grad_norm": 0.34264281392097473, "learning_rate": 1.7435934558279787e-06, "loss": 0.3535, "step": 43586 }, { "epoch": 0.8092142149018093, "grad_norm": 0.46950778365135193, "learning_rate": 1.7429353822395501e-06, "loss": 0.1485, "step": 43588 }, { "epoch": 0.809251345039228, "grad_norm": 0.32493266463279724, "learning_rate": 1.7422774210050398e-06, "loss": 0.116, "step": 43590 }, { "epoch": 0.8092884751766466, "grad_norm": 0.41430068016052246, "learning_rate": 1.741619572133396e-06, "loss": 0.2963, "step": 43592 }, { "epoch": 0.8093256053140653, "grad_norm": 0.6539960503578186, "learning_rate": 1.7409618356335733e-06, "loss": 0.3436, "step": 43594 }, { "epoch": 0.809362735451484, "grad_norm": 0.39869973063468933, "learning_rate": 1.740304211514524e-06, "loss": 0.3251, "step": 43596 }, { "epoch": 0.8093998655889025, "grad_norm": 0.329857736825943, "learning_rate": 1.7396466997851925e-06, "loss": 0.3835, "step": 43598 }, { "epoch": 0.8094369957263212, "grad_norm": 0.6175298094749451, "learning_rate": 1.7389893004545254e-06, "loss": 0.5915, "step": 43600 }, { "epoch": 0.8094741258637398, "grad_norm": 0.7558514475822449, "learning_rate": 1.7383320135314697e-06, "loss": 0.3094, "step": 43602 }, { "epoch": 0.8095112560011585, "grad_norm": 0.44288742542266846, "learning_rate": 1.7376748390249686e-06, "loss": 0.1627, "step": 43604 }, { "epoch": 0.809548386138577, "grad_norm": 0.3741776943206787, "learning_rate": 1.7370177769439644e-06, "loss": 0.3526, "step": 43606 }, { "epoch": 0.8095855162759957, "grad_norm": 0.6091138124465942, "learning_rate": 1.7363608272973997e-06, "loss": 0.2258, "step": 43608 }, { "epoch": 0.8096226464134144, "grad_norm": 0.37969157099723816, "learning_rate": 1.7357039900942108e-06, "loss": 0.3495, "step": 43610 }, { "epoch": 0.809659776550833, "grad_norm": 0.3240143954753876, "learning_rate": 1.735047265343337e-06, "loss": 0.1554, "step": 43612 }, { "epoch": 0.8096969066882517, "grad_norm": 0.4269893169403076, "learning_rate": 1.7343906530537114e-06, "loss": 0.3377, "step": 43614 }, { "epoch": 0.8097340368256702, "grad_norm": 0.3002816438674927, "learning_rate": 1.733734153234271e-06, "loss": 0.2701, "step": 43616 }, { "epoch": 0.8097711669630889, "grad_norm": 0.26816660165786743, "learning_rate": 1.7330777658939491e-06, "loss": 0.3328, "step": 43618 }, { "epoch": 0.8098082971005076, "grad_norm": 0.5631070733070374, "learning_rate": 1.732421491041678e-06, "loss": 0.3373, "step": 43620 }, { "epoch": 0.8098454272379262, "grad_norm": 0.20925818383693695, "learning_rate": 1.7317653286863833e-06, "loss": 0.2821, "step": 43622 }, { "epoch": 0.8098825573753449, "grad_norm": 0.43348854780197144, "learning_rate": 1.7311092788369977e-06, "loss": 0.5323, "step": 43624 }, { "epoch": 0.8099196875127634, "grad_norm": 0.8420249819755554, "learning_rate": 1.730453341502445e-06, "loss": 0.1256, "step": 43626 }, { "epoch": 0.8099568176501821, "grad_norm": 0.3945586085319519, "learning_rate": 1.7297975166916537e-06, "loss": 0.1872, "step": 43628 }, { "epoch": 0.8099939477876008, "grad_norm": 0.47009968757629395, "learning_rate": 1.7291418044135445e-06, "loss": 0.2801, "step": 43630 }, { "epoch": 0.8100310779250194, "grad_norm": 0.5788520574569702, "learning_rate": 1.7284862046770444e-06, "loss": 0.4035, "step": 43632 }, { "epoch": 0.8100682080624381, "grad_norm": 0.45796093344688416, "learning_rate": 1.7278307174910713e-06, "loss": 0.1926, "step": 43634 }, { "epoch": 0.8101053381998566, "grad_norm": 0.3990139961242676, "learning_rate": 1.727175342864542e-06, "loss": 0.2553, "step": 43636 }, { "epoch": 0.8101424683372753, "grad_norm": 0.3560784161090851, "learning_rate": 1.7265200808063753e-06, "loss": 0.3187, "step": 43638 }, { "epoch": 0.810179598474694, "grad_norm": 0.41287750005722046, "learning_rate": 1.7258649313254894e-06, "loss": 0.4058, "step": 43640 }, { "epoch": 0.8102167286121126, "grad_norm": 0.37703830003738403, "learning_rate": 1.725209894430797e-06, "loss": 0.1721, "step": 43642 }, { "epoch": 0.8102538587495313, "grad_norm": 0.45382794737815857, "learning_rate": 1.7245549701312125e-06, "loss": 0.2617, "step": 43644 }, { "epoch": 0.8102909888869498, "grad_norm": 0.408495157957077, "learning_rate": 1.7239001584356497e-06, "loss": 0.2693, "step": 43646 }, { "epoch": 0.8103281190243685, "grad_norm": 0.25961384177207947, "learning_rate": 1.7232454593530134e-06, "loss": 0.3486, "step": 43648 }, { "epoch": 0.8103652491617871, "grad_norm": 0.25137338042259216, "learning_rate": 1.7225908728922136e-06, "loss": 0.2948, "step": 43650 }, { "epoch": 0.8104023792992058, "grad_norm": 0.4384453296661377, "learning_rate": 1.7219363990621595e-06, "loss": 0.2747, "step": 43652 }, { "epoch": 0.8104395094366245, "grad_norm": 0.46359142661094666, "learning_rate": 1.7212820378717577e-06, "loss": 0.272, "step": 43654 }, { "epoch": 0.810476639574043, "grad_norm": 0.4201693534851074, "learning_rate": 1.7206277893299073e-06, "loss": 0.1944, "step": 43656 }, { "epoch": 0.8105137697114617, "grad_norm": 0.4915314316749573, "learning_rate": 1.7199736534455146e-06, "loss": 0.3, "step": 43658 }, { "epoch": 0.8105508998488803, "grad_norm": 0.42742177844047546, "learning_rate": 1.7193196302274773e-06, "loss": 0.2002, "step": 43660 }, { "epoch": 0.810588029986299, "grad_norm": 0.29782113432884216, "learning_rate": 1.7186657196846968e-06, "loss": 0.3463, "step": 43662 }, { "epoch": 0.8106251601237177, "grad_norm": 0.2809637188911438, "learning_rate": 1.7180119218260694e-06, "loss": 0.2557, "step": 43664 }, { "epoch": 0.8106622902611362, "grad_norm": 0.45260393619537354, "learning_rate": 1.7173582366604923e-06, "loss": 0.2476, "step": 43666 }, { "epoch": 0.8106994203985549, "grad_norm": 0.3119635283946991, "learning_rate": 1.716704664196861e-06, "loss": 0.4167, "step": 43668 }, { "epoch": 0.8107365505359735, "grad_norm": 0.2795673906803131, "learning_rate": 1.7160512044440704e-06, "loss": 0.1192, "step": 43670 }, { "epoch": 0.8107736806733922, "grad_norm": 0.37746718525886536, "learning_rate": 1.7153978574110063e-06, "loss": 0.206, "step": 43672 }, { "epoch": 0.8108108108108109, "grad_norm": 0.28515276312828064, "learning_rate": 1.7147446231065623e-06, "loss": 0.2016, "step": 43674 }, { "epoch": 0.8108479409482294, "grad_norm": 0.48526784777641296, "learning_rate": 1.7140915015396299e-06, "loss": 0.2982, "step": 43676 }, { "epoch": 0.8108850710856481, "grad_norm": 0.24649102985858917, "learning_rate": 1.7134384927190895e-06, "loss": 0.2257, "step": 43678 }, { "epoch": 0.8109222012230667, "grad_norm": 0.7558265924453735, "learning_rate": 1.7127855966538309e-06, "loss": 0.2141, "step": 43680 }, { "epoch": 0.8109593313604854, "grad_norm": 0.29816606640815735, "learning_rate": 1.7121328133527371e-06, "loss": 0.1889, "step": 43682 }, { "epoch": 0.810996461497904, "grad_norm": 0.5247555375099182, "learning_rate": 1.711480142824693e-06, "loss": 0.5191, "step": 43684 }, { "epoch": 0.8110335916353226, "grad_norm": 0.3596543073654175, "learning_rate": 1.7108275850785737e-06, "loss": 0.1118, "step": 43686 }, { "epoch": 0.8110707217727413, "grad_norm": 0.332597017288208, "learning_rate": 1.7101751401232636e-06, "loss": 0.2631, "step": 43688 }, { "epoch": 0.8111078519101599, "grad_norm": 0.31524085998535156, "learning_rate": 1.7095228079676385e-06, "loss": 0.2453, "step": 43690 }, { "epoch": 0.8111449820475786, "grad_norm": 0.5466545820236206, "learning_rate": 1.7088705886205748e-06, "loss": 0.2723, "step": 43692 }, { "epoch": 0.8111821121849973, "grad_norm": 0.37573733925819397, "learning_rate": 1.708218482090951e-06, "loss": 0.2743, "step": 43694 }, { "epoch": 0.8112192423224158, "grad_norm": 0.32732468843460083, "learning_rate": 1.7075664883876331e-06, "loss": 0.3459, "step": 43696 }, { "epoch": 0.8112563724598345, "grad_norm": 0.4555458724498749, "learning_rate": 1.7069146075194998e-06, "loss": 0.1121, "step": 43698 }, { "epoch": 0.8112935025972531, "grad_norm": 0.4011319875717163, "learning_rate": 1.706262839495415e-06, "loss": 0.1717, "step": 43700 }, { "epoch": 0.8113306327346718, "grad_norm": 0.29051095247268677, "learning_rate": 1.7056111843242507e-06, "loss": 0.1859, "step": 43702 }, { "epoch": 0.8113677628720903, "grad_norm": 1.0778127908706665, "learning_rate": 1.704959642014874e-06, "loss": 0.5908, "step": 43704 }, { "epoch": 0.811404893009509, "grad_norm": 0.5177686214447021, "learning_rate": 1.7043082125761502e-06, "loss": 0.3489, "step": 43706 }, { "epoch": 0.8114420231469277, "grad_norm": 0.3519238829612732, "learning_rate": 1.7036568960169443e-06, "loss": 0.3251, "step": 43708 }, { "epoch": 0.8114791532843463, "grad_norm": 0.3471061885356903, "learning_rate": 1.7030056923461158e-06, "loss": 0.14, "step": 43710 }, { "epoch": 0.811516283421765, "grad_norm": 0.4146586060523987, "learning_rate": 1.702354601572528e-06, "loss": 0.333, "step": 43712 }, { "epoch": 0.8115534135591835, "grad_norm": 0.305102676153183, "learning_rate": 1.7017036237050387e-06, "loss": 0.2562, "step": 43714 }, { "epoch": 0.8115905436966022, "grad_norm": 0.32578641176223755, "learning_rate": 1.7010527587525084e-06, "loss": 0.2171, "step": 43716 }, { "epoch": 0.8116276738340209, "grad_norm": 0.45331937074661255, "learning_rate": 1.7004020067237925e-06, "loss": 0.2093, "step": 43718 }, { "epoch": 0.8116648039714395, "grad_norm": 0.3839343786239624, "learning_rate": 1.6997513676277445e-06, "loss": 0.1565, "step": 43720 }, { "epoch": 0.8117019341088582, "grad_norm": 0.44491147994995117, "learning_rate": 1.699100841473219e-06, "loss": 0.4958, "step": 43722 }, { "epoch": 0.8117390642462767, "grad_norm": 0.2750934064388275, "learning_rate": 1.698450428269065e-06, "loss": 0.2854, "step": 43724 }, { "epoch": 0.8117761943836954, "grad_norm": 0.508905827999115, "learning_rate": 1.6978001280241353e-06, "loss": 0.2595, "step": 43726 }, { "epoch": 0.8118133245211141, "grad_norm": 0.42436084151268005, "learning_rate": 1.697149940747278e-06, "loss": 0.3276, "step": 43728 }, { "epoch": 0.8118504546585327, "grad_norm": 0.25700637698173523, "learning_rate": 1.6964998664473388e-06, "loss": 0.2736, "step": 43730 }, { "epoch": 0.8118875847959514, "grad_norm": 0.32283902168273926, "learning_rate": 1.6958499051331657e-06, "loss": 0.2861, "step": 43732 }, { "epoch": 0.8119247149333699, "grad_norm": 0.5871114730834961, "learning_rate": 1.695200056813604e-06, "loss": 0.2956, "step": 43734 }, { "epoch": 0.8119618450707886, "grad_norm": 0.37595927715301514, "learning_rate": 1.6945503214974912e-06, "loss": 0.1454, "step": 43736 }, { "epoch": 0.8119989752082073, "grad_norm": 0.5304111838340759, "learning_rate": 1.6939006991936702e-06, "loss": 0.4389, "step": 43738 }, { "epoch": 0.8120361053456259, "grad_norm": 0.6789892911911011, "learning_rate": 1.6932511899109837e-06, "loss": 0.2895, "step": 43740 }, { "epoch": 0.8120732354830446, "grad_norm": 0.4144756495952606, "learning_rate": 1.692601793658265e-06, "loss": 0.355, "step": 43742 }, { "epoch": 0.8121103656204631, "grad_norm": 0.3600856363773346, "learning_rate": 1.6919525104443513e-06, "loss": 0.1588, "step": 43744 }, { "epoch": 0.8121474957578818, "grad_norm": 0.33181440830230713, "learning_rate": 1.691303340278082e-06, "loss": 0.3635, "step": 43746 }, { "epoch": 0.8121846258953005, "grad_norm": 0.2566782236099243, "learning_rate": 1.6906542831682837e-06, "loss": 0.1676, "step": 43748 }, { "epoch": 0.8122217560327191, "grad_norm": 0.4105609059333801, "learning_rate": 1.6900053391237914e-06, "loss": 0.2778, "step": 43750 }, { "epoch": 0.8122588861701378, "grad_norm": 0.46978235244750977, "learning_rate": 1.6893565081534359e-06, "loss": 0.5005, "step": 43752 }, { "epoch": 0.8122960163075563, "grad_norm": 0.2576170861721039, "learning_rate": 1.6887077902660443e-06, "loss": 0.3812, "step": 43754 }, { "epoch": 0.812333146444975, "grad_norm": 0.4677191972732544, "learning_rate": 1.6880591854704443e-06, "loss": 0.0429, "step": 43756 }, { "epoch": 0.8123702765823936, "grad_norm": 0.3043809235095978, "learning_rate": 1.6874106937754642e-06, "loss": 0.2045, "step": 43758 }, { "epoch": 0.8124074067198123, "grad_norm": 0.44103580713272095, "learning_rate": 1.6867623151899237e-06, "loss": 0.3023, "step": 43760 }, { "epoch": 0.812444536857231, "grad_norm": 0.49585819244384766, "learning_rate": 1.6861140497226492e-06, "loss": 0.1249, "step": 43762 }, { "epoch": 0.8124816669946495, "grad_norm": 0.17080366611480713, "learning_rate": 1.6854658973824566e-06, "loss": 0.2339, "step": 43764 }, { "epoch": 0.8125187971320682, "grad_norm": 0.3271997272968292, "learning_rate": 1.6848178581781682e-06, "loss": 0.2231, "step": 43766 }, { "epoch": 0.8125559272694868, "grad_norm": 1.031008005142212, "learning_rate": 1.6841699321186023e-06, "loss": 0.1958, "step": 43768 }, { "epoch": 0.8125930574069055, "grad_norm": 0.5688890814781189, "learning_rate": 1.683522119212575e-06, "loss": 0.2949, "step": 43770 }, { "epoch": 0.8126301875443241, "grad_norm": 0.4250592291355133, "learning_rate": 1.682874419468903e-06, "loss": 0.1574, "step": 43772 }, { "epoch": 0.8126673176817427, "grad_norm": 0.34404996037483215, "learning_rate": 1.6822268328963954e-06, "loss": 0.5192, "step": 43774 }, { "epoch": 0.8127044478191614, "grad_norm": 0.5535452961921692, "learning_rate": 1.6815793595038655e-06, "loss": 0.3362, "step": 43776 }, { "epoch": 0.81274157795658, "grad_norm": 0.41953033208847046, "learning_rate": 1.6809319993001239e-06, "loss": 0.3028, "step": 43778 }, { "epoch": 0.8127787080939987, "grad_norm": 0.39952221512794495, "learning_rate": 1.6802847522939814e-06, "loss": 0.507, "step": 43780 }, { "epoch": 0.8128158382314173, "grad_norm": 0.4492514729499817, "learning_rate": 1.679637618494242e-06, "loss": 0.1695, "step": 43782 }, { "epoch": 0.8128529683688359, "grad_norm": 0.5864405035972595, "learning_rate": 1.6789905979097132e-06, "loss": 0.2931, "step": 43784 }, { "epoch": 0.8128900985062546, "grad_norm": 0.374038964509964, "learning_rate": 1.6783436905491969e-06, "loss": 0.2267, "step": 43786 }, { "epoch": 0.8129272286436732, "grad_norm": 0.5111145973205566, "learning_rate": 1.6776968964214957e-06, "loss": 0.3198, "step": 43788 }, { "epoch": 0.8129643587810919, "grad_norm": 0.20995105803012848, "learning_rate": 1.6770502155354118e-06, "loss": 0.2311, "step": 43790 }, { "epoch": 0.8130014889185105, "grad_norm": 0.23587463796138763, "learning_rate": 1.6764036478997448e-06, "loss": 0.139, "step": 43792 }, { "epoch": 0.8130386190559291, "grad_norm": 0.4376339912414551, "learning_rate": 1.6757571935232918e-06, "loss": 0.383, "step": 43794 }, { "epoch": 0.8130757491933478, "grad_norm": 0.4064905047416687, "learning_rate": 1.6751108524148484e-06, "loss": 0.3448, "step": 43796 }, { "epoch": 0.8131128793307664, "grad_norm": 0.38001057505607605, "learning_rate": 1.6744646245832142e-06, "loss": 0.3068, "step": 43798 }, { "epoch": 0.813150009468185, "grad_norm": 0.44383519887924194, "learning_rate": 1.6738185100371763e-06, "loss": 0.2787, "step": 43800 }, { "epoch": 0.8131871396056036, "grad_norm": 0.36150261759757996, "learning_rate": 1.67317250878553e-06, "loss": 0.2444, "step": 43802 }, { "epoch": 0.8132242697430223, "grad_norm": 0.5254998803138733, "learning_rate": 1.6725266208370628e-06, "loss": 0.1087, "step": 43804 }, { "epoch": 0.813261399880441, "grad_norm": 0.4981311559677124, "learning_rate": 1.6718808462005632e-06, "loss": 0.1902, "step": 43806 }, { "epoch": 0.8132985300178596, "grad_norm": 0.4114186763763428, "learning_rate": 1.6712351848848207e-06, "loss": 0.2026, "step": 43808 }, { "epoch": 0.8133356601552783, "grad_norm": 0.45485007762908936, "learning_rate": 1.670589636898622e-06, "loss": 0.4116, "step": 43810 }, { "epoch": 0.8133727902926968, "grad_norm": 0.2727290093898773, "learning_rate": 1.6699442022507462e-06, "loss": 0.2806, "step": 43812 }, { "epoch": 0.8134099204301155, "grad_norm": 0.4047304093837738, "learning_rate": 1.6692988809499788e-06, "loss": 0.2575, "step": 43814 }, { "epoch": 0.8134470505675342, "grad_norm": 0.36853736639022827, "learning_rate": 1.6686536730051006e-06, "loss": 0.2622, "step": 43816 }, { "epoch": 0.8134841807049528, "grad_norm": 0.3714786469936371, "learning_rate": 1.66800857842489e-06, "loss": 0.2092, "step": 43818 }, { "epoch": 0.8135213108423714, "grad_norm": 0.4923979341983795, "learning_rate": 1.6673635972181257e-06, "loss": 0.2292, "step": 43820 }, { "epoch": 0.81355844097979, "grad_norm": 0.26455625891685486, "learning_rate": 1.666718729393586e-06, "loss": 0.1163, "step": 43822 }, { "epoch": 0.8135955711172087, "grad_norm": 0.3776501417160034, "learning_rate": 1.6660739749600408e-06, "loss": 0.2926, "step": 43824 }, { "epoch": 0.8136327012546274, "grad_norm": 0.4441525638103485, "learning_rate": 1.6654293339262684e-06, "loss": 0.1847, "step": 43826 }, { "epoch": 0.813669831392046, "grad_norm": 0.2903951406478882, "learning_rate": 1.6647848063010364e-06, "loss": 0.1942, "step": 43828 }, { "epoch": 0.8137069615294646, "grad_norm": 0.2905699610710144, "learning_rate": 1.6641403920931142e-06, "loss": 0.2435, "step": 43830 }, { "epoch": 0.8137440916668832, "grad_norm": 0.29813718795776367, "learning_rate": 1.6634960913112742e-06, "loss": 0.3317, "step": 43832 }, { "epoch": 0.8137812218043019, "grad_norm": 0.4835970401763916, "learning_rate": 1.6628519039642832e-06, "loss": 0.3703, "step": 43834 }, { "epoch": 0.8138183519417206, "grad_norm": 0.3298129439353943, "learning_rate": 1.6622078300609035e-06, "loss": 0.2678, "step": 43836 }, { "epoch": 0.8138554820791392, "grad_norm": 0.4878776967525482, "learning_rate": 1.6615638696099002e-06, "loss": 0.1213, "step": 43838 }, { "epoch": 0.8138926122165578, "grad_norm": 0.33900871872901917, "learning_rate": 1.6609200226200362e-06, "loss": 0.2325, "step": 43840 }, { "epoch": 0.8139297423539764, "grad_norm": 0.4741169214248657, "learning_rate": 1.6602762891000724e-06, "loss": 0.5836, "step": 43842 }, { "epoch": 0.8139668724913951, "grad_norm": 0.6076623201370239, "learning_rate": 1.6596326690587695e-06, "loss": 0.3045, "step": 43844 }, { "epoch": 0.8140040026288138, "grad_norm": 0.3318552076816559, "learning_rate": 1.6589891625048816e-06, "loss": 0.2573, "step": 43846 }, { "epoch": 0.8140411327662324, "grad_norm": 0.2661123275756836, "learning_rate": 1.6583457694471705e-06, "loss": 0.1295, "step": 43848 }, { "epoch": 0.814078262903651, "grad_norm": 0.27453526854515076, "learning_rate": 1.6577024898943838e-06, "loss": 0.3613, "step": 43850 }, { "epoch": 0.8141153930410696, "grad_norm": 0.353855699300766, "learning_rate": 1.6570593238552779e-06, "loss": 0.3577, "step": 43852 }, { "epoch": 0.8141525231784883, "grad_norm": 0.31913813948631287, "learning_rate": 1.6564162713386056e-06, "loss": 0.3685, "step": 43854 }, { "epoch": 0.8141896533159069, "grad_norm": 0.3153173625469208, "learning_rate": 1.6557733323531155e-06, "loss": 0.2144, "step": 43856 }, { "epoch": 0.8142267834533256, "grad_norm": 0.2876848578453064, "learning_rate": 1.6551305069075564e-06, "loss": 0.2075, "step": 43858 }, { "epoch": 0.8142639135907442, "grad_norm": 0.4811458885669708, "learning_rate": 1.6544877950106774e-06, "loss": 0.2782, "step": 43860 }, { "epoch": 0.8143010437281628, "grad_norm": 0.29733672738075256, "learning_rate": 1.6538451966712198e-06, "loss": 0.4645, "step": 43862 }, { "epoch": 0.8143381738655815, "grad_norm": 0.3425471782684326, "learning_rate": 1.65320271189793e-06, "loss": 0.2849, "step": 43864 }, { "epoch": 0.8143753040030001, "grad_norm": 0.38238421082496643, "learning_rate": 1.6525603406995516e-06, "loss": 0.2479, "step": 43866 }, { "epoch": 0.8144124341404188, "grad_norm": 0.5536106824874878, "learning_rate": 1.6519180830848213e-06, "loss": 0.1826, "step": 43868 }, { "epoch": 0.8144495642778374, "grad_norm": 0.350225031375885, "learning_rate": 1.65127593906248e-06, "loss": 0.2923, "step": 43870 }, { "epoch": 0.814486694415256, "grad_norm": 0.2707739770412445, "learning_rate": 1.6506339086412682e-06, "loss": 0.3373, "step": 43872 }, { "epoch": 0.8145238245526747, "grad_norm": 0.4076531231403351, "learning_rate": 1.649991991829918e-06, "loss": 0.2416, "step": 43874 }, { "epoch": 0.8145609546900933, "grad_norm": 0.3382411003112793, "learning_rate": 1.6493501886371655e-06, "loss": 0.2422, "step": 43876 }, { "epoch": 0.814598084827512, "grad_norm": 0.4349682927131653, "learning_rate": 1.6487084990717428e-06, "loss": 0.2905, "step": 43878 }, { "epoch": 0.8146352149649306, "grad_norm": 0.3168734908103943, "learning_rate": 1.648066923142383e-06, "loss": 0.3292, "step": 43880 }, { "epoch": 0.8146723451023492, "grad_norm": 0.46333861351013184, "learning_rate": 1.6474254608578156e-06, "loss": 0.2352, "step": 43882 }, { "epoch": 0.8147094752397679, "grad_norm": 0.3276359736919403, "learning_rate": 1.6467841122267692e-06, "loss": 0.1302, "step": 43884 }, { "epoch": 0.8147466053771865, "grad_norm": 0.3139045834541321, "learning_rate": 1.6461428772579724e-06, "loss": 0.129, "step": 43886 }, { "epoch": 0.8147837355146051, "grad_norm": 0.4689866602420807, "learning_rate": 1.6455017559601483e-06, "loss": 0.3683, "step": 43888 }, { "epoch": 0.8148208656520238, "grad_norm": 0.44363242387771606, "learning_rate": 1.6448607483420175e-06, "loss": 0.3406, "step": 43890 }, { "epoch": 0.8148579957894424, "grad_norm": 0.41888853907585144, "learning_rate": 1.6442198544123067e-06, "loss": 0.432, "step": 43892 }, { "epoch": 0.8148951259268611, "grad_norm": 0.5602725744247437, "learning_rate": 1.6435790741797342e-06, "loss": 0.1871, "step": 43894 }, { "epoch": 0.8149322560642797, "grad_norm": 0.4456685185432434, "learning_rate": 1.6429384076530208e-06, "loss": 0.3115, "step": 43896 }, { "epoch": 0.8149693862016983, "grad_norm": 0.35547104477882385, "learning_rate": 1.6422978548408852e-06, "loss": 0.1889, "step": 43898 }, { "epoch": 0.815006516339117, "grad_norm": 0.539585530757904, "learning_rate": 1.6416574157520394e-06, "loss": 0.0922, "step": 43900 }, { "epoch": 0.8150436464765356, "grad_norm": 0.3318989872932434, "learning_rate": 1.6410170903952006e-06, "loss": 0.1243, "step": 43902 }, { "epoch": 0.8150807766139543, "grad_norm": 0.3800659477710724, "learning_rate": 1.6403768787790797e-06, "loss": 0.2037, "step": 43904 }, { "epoch": 0.8151179067513729, "grad_norm": 0.5128672122955322, "learning_rate": 1.639736780912391e-06, "loss": 0.3348, "step": 43906 }, { "epoch": 0.8151550368887915, "grad_norm": 0.4685610830783844, "learning_rate": 1.639096796803844e-06, "loss": 0.2097, "step": 43908 }, { "epoch": 0.8151921670262101, "grad_norm": 0.23054294288158417, "learning_rate": 1.6384569264621474e-06, "loss": 0.2733, "step": 43910 }, { "epoch": 0.8152292971636288, "grad_norm": 0.5500622391700745, "learning_rate": 1.6378171698960022e-06, "loss": 0.2587, "step": 43912 }, { "epoch": 0.8152664273010475, "grad_norm": 0.326969712972641, "learning_rate": 1.6371775271141187e-06, "loss": 0.3395, "step": 43914 }, { "epoch": 0.815303557438466, "grad_norm": 0.4631718695163727, "learning_rate": 1.6365379981251994e-06, "loss": 0.3009, "step": 43916 }, { "epoch": 0.8153406875758847, "grad_norm": 0.35248807072639465, "learning_rate": 1.6358985829379459e-06, "loss": 0.4968, "step": 43918 }, { "epoch": 0.8153778177133033, "grad_norm": 0.24632041156291962, "learning_rate": 1.6352592815610603e-06, "loss": 0.2135, "step": 43920 }, { "epoch": 0.815414947850722, "grad_norm": 0.5282867550849915, "learning_rate": 1.6346200940032397e-06, "loss": 0.4033, "step": 43922 }, { "epoch": 0.8154520779881407, "grad_norm": 0.5651643872261047, "learning_rate": 1.6339810202731854e-06, "loss": 0.3463, "step": 43924 }, { "epoch": 0.8154892081255593, "grad_norm": 0.45410507917404175, "learning_rate": 1.6333420603795868e-06, "loss": 0.1565, "step": 43926 }, { "epoch": 0.8155263382629779, "grad_norm": 0.3409724831581116, "learning_rate": 1.6327032143311427e-06, "loss": 0.2562, "step": 43928 }, { "epoch": 0.8155634684003965, "grad_norm": 0.50171959400177, "learning_rate": 1.6320644821365471e-06, "loss": 0.1365, "step": 43930 }, { "epoch": 0.8156005985378152, "grad_norm": 0.3927929103374481, "learning_rate": 1.6314258638044866e-06, "loss": 0.3394, "step": 43932 }, { "epoch": 0.8156377286752339, "grad_norm": 0.4505486786365509, "learning_rate": 1.630787359343654e-06, "loss": 0.2718, "step": 43934 }, { "epoch": 0.8156748588126524, "grad_norm": 0.6248704791069031, "learning_rate": 1.6301489687627382e-06, "loss": 0.2119, "step": 43936 }, { "epoch": 0.8157119889500711, "grad_norm": 0.3558638095855713, "learning_rate": 1.6295106920704228e-06, "loss": 0.2582, "step": 43938 }, { "epoch": 0.8157491190874897, "grad_norm": 0.37645500898361206, "learning_rate": 1.6288725292753937e-06, "loss": 0.4796, "step": 43940 }, { "epoch": 0.8157862492249084, "grad_norm": 0.3724375367164612, "learning_rate": 1.6282344803863348e-06, "loss": 0.3206, "step": 43942 }, { "epoch": 0.8158233793623271, "grad_norm": 0.4562768340110779, "learning_rate": 1.6275965454119292e-06, "loss": 0.3631, "step": 43944 }, { "epoch": 0.8158605094997456, "grad_norm": 0.4188615679740906, "learning_rate": 1.6269587243608553e-06, "loss": 0.2426, "step": 43946 }, { "epoch": 0.8158976396371643, "grad_norm": 0.5399248003959656, "learning_rate": 1.6263210172417954e-06, "loss": 0.2613, "step": 43948 }, { "epoch": 0.8159347697745829, "grad_norm": 0.4648520350456238, "learning_rate": 1.6256834240634222e-06, "loss": 0.3088, "step": 43950 }, { "epoch": 0.8159718999120016, "grad_norm": 0.4649866819381714, "learning_rate": 1.625045944834417e-06, "loss": 0.1763, "step": 43952 }, { "epoch": 0.8160090300494202, "grad_norm": 0.5629376173019409, "learning_rate": 1.624408579563448e-06, "loss": 0.3001, "step": 43954 }, { "epoch": 0.8160461601868388, "grad_norm": 0.5710993409156799, "learning_rate": 1.6237713282591893e-06, "loss": 0.4661, "step": 43956 }, { "epoch": 0.8160832903242575, "grad_norm": 0.3183075189590454, "learning_rate": 1.6231341909303133e-06, "loss": 0.1294, "step": 43958 }, { "epoch": 0.8161204204616761, "grad_norm": 0.5468972325325012, "learning_rate": 1.6224971675854928e-06, "loss": 0.3241, "step": 43960 }, { "epoch": 0.8161575505990948, "grad_norm": 0.4538935422897339, "learning_rate": 1.6218602582333886e-06, "loss": 0.3479, "step": 43962 }, { "epoch": 0.8161946807365134, "grad_norm": 0.3551821708679199, "learning_rate": 1.6212234628826717e-06, "loss": 0.1079, "step": 43964 }, { "epoch": 0.816231810873932, "grad_norm": 0.3374553918838501, "learning_rate": 1.6205867815420062e-06, "loss": 0.3534, "step": 43966 }, { "epoch": 0.8162689410113507, "grad_norm": 0.5981477499008179, "learning_rate": 1.6199502142200551e-06, "loss": 0.2684, "step": 43968 }, { "epoch": 0.8163060711487693, "grad_norm": 0.42788711190223694, "learning_rate": 1.6193137609254829e-06, "loss": 0.1529, "step": 43970 }, { "epoch": 0.816343201286188, "grad_norm": 0.3363659977912903, "learning_rate": 1.6186774216669455e-06, "loss": 0.2782, "step": 43972 }, { "epoch": 0.8163803314236066, "grad_norm": 0.32389792799949646, "learning_rate": 1.6180411964531052e-06, "loss": 0.1284, "step": 43974 }, { "epoch": 0.8164174615610252, "grad_norm": 0.3659617006778717, "learning_rate": 1.6174050852926148e-06, "loss": 0.1553, "step": 43976 }, { "epoch": 0.8164545916984439, "grad_norm": 0.4228678345680237, "learning_rate": 1.6167690881941323e-06, "loss": 0.232, "step": 43978 }, { "epoch": 0.8164917218358625, "grad_norm": 0.4784436523914337, "learning_rate": 1.6161332051663125e-06, "loss": 0.3913, "step": 43980 }, { "epoch": 0.8165288519732812, "grad_norm": 0.9528792500495911, "learning_rate": 1.6154974362178067e-06, "loss": 0.4612, "step": 43982 }, { "epoch": 0.8165659821106998, "grad_norm": 0.4677400588989258, "learning_rate": 1.6148617813572664e-06, "loss": 0.1463, "step": 43984 }, { "epoch": 0.8166031122481184, "grad_norm": 0.5206021070480347, "learning_rate": 1.6142262405933428e-06, "loss": 0.3774, "step": 43986 }, { "epoch": 0.8166402423855371, "grad_norm": 0.43053147196769714, "learning_rate": 1.61359081393468e-06, "loss": 0.1857, "step": 43988 }, { "epoch": 0.8166773725229557, "grad_norm": 0.40627163648605347, "learning_rate": 1.6129555013899256e-06, "loss": 0.2517, "step": 43990 }, { "epoch": 0.8167145026603744, "grad_norm": 0.4479183852672577, "learning_rate": 1.6123203029677247e-06, "loss": 0.3315, "step": 43992 }, { "epoch": 0.816751632797793, "grad_norm": 0.4351753294467926, "learning_rate": 1.611685218676724e-06, "loss": 0.3958, "step": 43994 }, { "epoch": 0.8167887629352116, "grad_norm": 0.36620035767555237, "learning_rate": 1.6110502485255575e-06, "loss": 0.236, "step": 43996 }, { "epoch": 0.8168258930726303, "grad_norm": 0.24258331954479218, "learning_rate": 1.6104153925228728e-06, "loss": 0.2845, "step": 43998 }, { "epoch": 0.8168630232100489, "grad_norm": 0.3536120355129242, "learning_rate": 1.6097806506773027e-06, "loss": 0.2841, "step": 44000 }, { "epoch": 0.8169001533474676, "grad_norm": 0.31925177574157715, "learning_rate": 1.6091460229974853e-06, "loss": 0.3033, "step": 44002 }, { "epoch": 0.8169372834848861, "grad_norm": 0.31828728318214417, "learning_rate": 1.6085115094920578e-06, "loss": 0.2449, "step": 44004 }, { "epoch": 0.8169744136223048, "grad_norm": 0.32721397280693054, "learning_rate": 1.6078771101696534e-06, "loss": 0.1434, "step": 44006 }, { "epoch": 0.8170115437597234, "grad_norm": 0.33132708072662354, "learning_rate": 1.6072428250389028e-06, "loss": 0.3186, "step": 44008 }, { "epoch": 0.8170486738971421, "grad_norm": 0.5203912258148193, "learning_rate": 1.6066086541084391e-06, "loss": 0.208, "step": 44010 }, { "epoch": 0.8170858040345608, "grad_norm": 0.34158751368522644, "learning_rate": 1.6059745973868933e-06, "loss": 0.2157, "step": 44012 }, { "epoch": 0.8171229341719793, "grad_norm": 0.5236937403678894, "learning_rate": 1.6053406548828876e-06, "loss": 0.3309, "step": 44014 }, { "epoch": 0.817160064309398, "grad_norm": 0.3399065434932709, "learning_rate": 1.6047068266050524e-06, "loss": 0.1099, "step": 44016 }, { "epoch": 0.8171971944468166, "grad_norm": 0.19152748584747314, "learning_rate": 1.604073112562009e-06, "loss": 0.1964, "step": 44018 }, { "epoch": 0.8172343245842353, "grad_norm": 0.43222638964653015, "learning_rate": 1.6034395127623815e-06, "loss": 0.3053, "step": 44020 }, { "epoch": 0.817271454721654, "grad_norm": 0.4710613191127777, "learning_rate": 1.6028060272147915e-06, "loss": 0.172, "step": 44022 }, { "epoch": 0.8173085848590725, "grad_norm": 0.23491613566875458, "learning_rate": 1.6021726559278616e-06, "loss": 0.1829, "step": 44024 }, { "epoch": 0.8173457149964912, "grad_norm": 0.8284361362457275, "learning_rate": 1.6015393989102047e-06, "loss": 0.298, "step": 44026 }, { "epoch": 0.8173828451339098, "grad_norm": 0.3189184069633484, "learning_rate": 1.6009062561704402e-06, "loss": 0.0867, "step": 44028 }, { "epoch": 0.8174199752713285, "grad_norm": 0.47937431931495667, "learning_rate": 1.600273227717183e-06, "loss": 0.2103, "step": 44030 }, { "epoch": 0.8174571054087472, "grad_norm": 0.5728077292442322, "learning_rate": 1.5996403135590478e-06, "loss": 0.0759, "step": 44032 }, { "epoch": 0.8174942355461657, "grad_norm": 0.5197650194168091, "learning_rate": 1.5990075137046479e-06, "loss": 0.4132, "step": 44034 }, { "epoch": 0.8175313656835844, "grad_norm": 0.37295806407928467, "learning_rate": 1.5983748281625899e-06, "loss": 0.3885, "step": 44036 }, { "epoch": 0.817568495821003, "grad_norm": 0.3562738597393036, "learning_rate": 1.5977422569414868e-06, "loss": 0.184, "step": 44038 }, { "epoch": 0.8176056259584217, "grad_norm": 0.3940262198448181, "learning_rate": 1.5971098000499418e-06, "loss": 0.1789, "step": 44040 }, { "epoch": 0.8176427560958404, "grad_norm": 0.3284790813922882, "learning_rate": 1.5964774574965624e-06, "loss": 0.3115, "step": 44042 }, { "epoch": 0.8176798862332589, "grad_norm": 0.4503403604030609, "learning_rate": 1.595845229289954e-06, "loss": 0.3403, "step": 44044 }, { "epoch": 0.8177170163706776, "grad_norm": 0.4714115262031555, "learning_rate": 1.5952131154387184e-06, "loss": 0.1564, "step": 44046 }, { "epoch": 0.8177541465080962, "grad_norm": 0.36949166655540466, "learning_rate": 1.5945811159514568e-06, "loss": 0.2124, "step": 44048 }, { "epoch": 0.8177912766455149, "grad_norm": 0.516167402267456, "learning_rate": 1.5939492308367722e-06, "loss": 0.2565, "step": 44050 }, { "epoch": 0.8178284067829336, "grad_norm": 0.46870315074920654, "learning_rate": 1.5933174601032564e-06, "loss": 0.5464, "step": 44052 }, { "epoch": 0.8178655369203521, "grad_norm": 1.149971604347229, "learning_rate": 1.5926858037595095e-06, "loss": 0.3273, "step": 44054 }, { "epoch": 0.8179026670577708, "grad_norm": 0.40364304184913635, "learning_rate": 1.5920542618141278e-06, "loss": 0.3236, "step": 44056 }, { "epoch": 0.8179397971951894, "grad_norm": 0.41620972752571106, "learning_rate": 1.591422834275701e-06, "loss": 0.1287, "step": 44058 }, { "epoch": 0.8179769273326081, "grad_norm": 0.2796846628189087, "learning_rate": 1.5907915211528225e-06, "loss": 0.3472, "step": 44060 }, { "epoch": 0.8180140574700266, "grad_norm": 0.3278990089893341, "learning_rate": 1.5901603224540851e-06, "loss": 0.3872, "step": 44062 }, { "epoch": 0.8180511876074453, "grad_norm": 0.2739548981189728, "learning_rate": 1.5895292381880735e-06, "loss": 0.2449, "step": 44064 }, { "epoch": 0.818088317744864, "grad_norm": 0.360414057970047, "learning_rate": 1.5888982683633759e-06, "loss": 0.1413, "step": 44066 }, { "epoch": 0.8181254478822826, "grad_norm": 0.40283066034317017, "learning_rate": 1.588267412988579e-06, "loss": 0.1498, "step": 44068 }, { "epoch": 0.8181625780197013, "grad_norm": 0.316351979970932, "learning_rate": 1.5876366720722668e-06, "loss": 0.3619, "step": 44070 }, { "epoch": 0.8181997081571198, "grad_norm": 0.42310062050819397, "learning_rate": 1.5870060456230218e-06, "loss": 0.1621, "step": 44072 }, { "epoch": 0.8182368382945385, "grad_norm": 0.48297426104545593, "learning_rate": 1.5863755336494268e-06, "loss": 0.3274, "step": 44074 }, { "epoch": 0.8182739684319572, "grad_norm": 0.908526599407196, "learning_rate": 1.5857451361600561e-06, "loss": 0.4363, "step": 44076 }, { "epoch": 0.8183110985693758, "grad_norm": 0.2524868845939636, "learning_rate": 1.5851148531634942e-06, "loss": 0.1849, "step": 44078 }, { "epoch": 0.8183482287067945, "grad_norm": 0.6079502105712891, "learning_rate": 1.5844846846683104e-06, "loss": 0.2352, "step": 44080 }, { "epoch": 0.818385358844213, "grad_norm": 0.7709314823150635, "learning_rate": 1.5838546306830827e-06, "loss": 0.2474, "step": 44082 }, { "epoch": 0.8184224889816317, "grad_norm": 0.35421517491340637, "learning_rate": 1.583224691216384e-06, "loss": 0.216, "step": 44084 }, { "epoch": 0.8184596191190504, "grad_norm": 0.44044116139411926, "learning_rate": 1.5825948662767866e-06, "loss": 0.2545, "step": 44086 }, { "epoch": 0.818496749256469, "grad_norm": 0.3081692159175873, "learning_rate": 1.5819651558728621e-06, "loss": 0.1488, "step": 44088 }, { "epoch": 0.8185338793938877, "grad_norm": 0.33745357394218445, "learning_rate": 1.5813355600131742e-06, "loss": 0.2372, "step": 44090 }, { "epoch": 0.8185710095313062, "grad_norm": 0.20291011035442352, "learning_rate": 1.5807060787062933e-06, "loss": 0.3586, "step": 44092 }, { "epoch": 0.8186081396687249, "grad_norm": 0.5969129204750061, "learning_rate": 1.580076711960783e-06, "loss": 0.2437, "step": 44094 }, { "epoch": 0.8186452698061436, "grad_norm": 0.49870145320892334, "learning_rate": 1.5794474597852095e-06, "loss": 0.2061, "step": 44096 }, { "epoch": 0.8186823999435622, "grad_norm": 0.3558120131492615, "learning_rate": 1.5788183221881347e-06, "loss": 0.3857, "step": 44098 }, { "epoch": 0.8187195300809809, "grad_norm": 0.5142087340354919, "learning_rate": 1.5781892991781156e-06, "loss": 0.4661, "step": 44100 }, { "epoch": 0.8187566602183994, "grad_norm": 0.5399808287620544, "learning_rate": 1.5775603907637171e-06, "loss": 0.2665, "step": 44102 }, { "epoch": 0.8187937903558181, "grad_norm": 0.2618241310119629, "learning_rate": 1.5769315969534916e-06, "loss": 0.1562, "step": 44104 }, { "epoch": 0.8188309204932367, "grad_norm": 0.368053138256073, "learning_rate": 1.5763029177559963e-06, "loss": 0.3559, "step": 44106 }, { "epoch": 0.8188680506306554, "grad_norm": 0.3717603087425232, "learning_rate": 1.5756743531797857e-06, "loss": 0.3437, "step": 44108 }, { "epoch": 0.8189051807680741, "grad_norm": 0.21356190741062164, "learning_rate": 1.5750459032334142e-06, "loss": 0.2217, "step": 44110 }, { "epoch": 0.8189423109054926, "grad_norm": 0.39795204997062683, "learning_rate": 1.5744175679254347e-06, "loss": 0.3126, "step": 44112 }, { "epoch": 0.8189794410429113, "grad_norm": 0.37931033968925476, "learning_rate": 1.5737893472643927e-06, "loss": 0.3546, "step": 44114 }, { "epoch": 0.8190165711803299, "grad_norm": 0.4684171676635742, "learning_rate": 1.5731612412588371e-06, "loss": 0.2365, "step": 44116 }, { "epoch": 0.8190537013177486, "grad_norm": 0.2616354823112488, "learning_rate": 1.5725332499173163e-06, "loss": 0.2437, "step": 44118 }, { "epoch": 0.8190908314551673, "grad_norm": 0.6100311279296875, "learning_rate": 1.5719053732483768e-06, "loss": 0.4585, "step": 44120 }, { "epoch": 0.8191279615925858, "grad_norm": 0.36489251255989075, "learning_rate": 1.5712776112605589e-06, "loss": 0.256, "step": 44122 }, { "epoch": 0.8191650917300045, "grad_norm": 0.32745206356048584, "learning_rate": 1.5706499639624072e-06, "loss": 0.0543, "step": 44124 }, { "epoch": 0.8192022218674231, "grad_norm": 0.41591495275497437, "learning_rate": 1.5700224313624578e-06, "loss": 0.2679, "step": 44126 }, { "epoch": 0.8192393520048418, "grad_norm": 0.6219328045845032, "learning_rate": 1.5693950134692537e-06, "loss": 0.4099, "step": 44128 }, { "epoch": 0.8192764821422605, "grad_norm": 0.2517624795436859, "learning_rate": 1.56876771029133e-06, "loss": 0.2144, "step": 44130 }, { "epoch": 0.819313612279679, "grad_norm": 0.5595732927322388, "learning_rate": 1.5681405218372237e-06, "loss": 0.3024, "step": 44132 }, { "epoch": 0.8193507424170977, "grad_norm": 0.23955067992210388, "learning_rate": 1.5675134481154674e-06, "loss": 0.2433, "step": 44134 }, { "epoch": 0.8193878725545163, "grad_norm": 1.2503514289855957, "learning_rate": 1.5668864891345959e-06, "loss": 0.1195, "step": 44136 }, { "epoch": 0.819425002691935, "grad_norm": 0.2486780732870102, "learning_rate": 1.566259644903142e-06, "loss": 0.2884, "step": 44138 }, { "epoch": 0.8194621328293537, "grad_norm": 0.5575205683708191, "learning_rate": 1.565632915429629e-06, "loss": 0.3196, "step": 44140 }, { "epoch": 0.8194992629667722, "grad_norm": 0.4635719656944275, "learning_rate": 1.5650063007225913e-06, "loss": 0.5353, "step": 44142 }, { "epoch": 0.8195363931041909, "grad_norm": 0.29539725184440613, "learning_rate": 1.5643798007905486e-06, "loss": 0.1751, "step": 44144 }, { "epoch": 0.8195735232416095, "grad_norm": 0.3746224343776703, "learning_rate": 1.563753415642031e-06, "loss": 0.321, "step": 44146 }, { "epoch": 0.8196106533790282, "grad_norm": 0.37644311785697937, "learning_rate": 1.563127145285558e-06, "loss": 0.2188, "step": 44148 }, { "epoch": 0.8196477835164468, "grad_norm": 0.4069465398788452, "learning_rate": 1.5625009897296572e-06, "loss": 0.2552, "step": 44150 }, { "epoch": 0.8196849136538654, "grad_norm": 0.321184903383255, "learning_rate": 1.5618749489828411e-06, "loss": 0.3302, "step": 44152 }, { "epoch": 0.8197220437912841, "grad_norm": 0.4351695477962494, "learning_rate": 1.5612490230536325e-06, "loss": 0.2446, "step": 44154 }, { "epoch": 0.8197591739287027, "grad_norm": 0.4680473208427429, "learning_rate": 1.560623211950547e-06, "loss": 0.3693, "step": 44156 }, { "epoch": 0.8197963040661214, "grad_norm": 0.5691108703613281, "learning_rate": 1.5599975156821024e-06, "loss": 0.2885, "step": 44158 }, { "epoch": 0.8198334342035399, "grad_norm": 0.38574764132499695, "learning_rate": 1.5593719342568093e-06, "loss": 0.2292, "step": 44160 }, { "epoch": 0.8198705643409586, "grad_norm": 0.3775882422924042, "learning_rate": 1.5587464676831843e-06, "loss": 0.2681, "step": 44162 }, { "epoch": 0.8199076944783773, "grad_norm": 0.2684120833873749, "learning_rate": 1.5581211159697352e-06, "loss": 0.2878, "step": 44164 }, { "epoch": 0.8199448246157959, "grad_norm": 0.5364150404930115, "learning_rate": 1.5574958791249694e-06, "loss": 0.3013, "step": 44166 }, { "epoch": 0.8199819547532146, "grad_norm": 0.4092472493648529, "learning_rate": 1.556870757157396e-06, "loss": 0.151, "step": 44168 }, { "epoch": 0.8200190848906331, "grad_norm": 0.555978000164032, "learning_rate": 1.5562457500755223e-06, "loss": 0.3746, "step": 44170 }, { "epoch": 0.8200562150280518, "grad_norm": 0.2833896279335022, "learning_rate": 1.5556208578878518e-06, "loss": 0.2734, "step": 44172 }, { "epoch": 0.8200933451654705, "grad_norm": 0.4228624403476715, "learning_rate": 1.5549960806028875e-06, "loss": 0.1934, "step": 44174 }, { "epoch": 0.8201304753028891, "grad_norm": 0.4768389165401459, "learning_rate": 1.5543714182291325e-06, "loss": 0.1256, "step": 44176 }, { "epoch": 0.8201676054403078, "grad_norm": 0.35951560735702515, "learning_rate": 1.5537468707750824e-06, "loss": 0.1034, "step": 44178 }, { "epoch": 0.8202047355777263, "grad_norm": 0.5261889696121216, "learning_rate": 1.5531224382492393e-06, "loss": 0.3207, "step": 44180 }, { "epoch": 0.820241865715145, "grad_norm": 0.6791105270385742, "learning_rate": 1.5524981206600976e-06, "loss": 0.3543, "step": 44182 }, { "epoch": 0.8202789958525637, "grad_norm": 0.4426318109035492, "learning_rate": 1.5518739180161556e-06, "loss": 0.1638, "step": 44184 }, { "epoch": 0.8203161259899823, "grad_norm": 0.4529627561569214, "learning_rate": 1.5512498303259027e-06, "loss": 0.3714, "step": 44186 }, { "epoch": 0.820353256127401, "grad_norm": 0.37510091066360474, "learning_rate": 1.5506258575978339e-06, "loss": 0.1934, "step": 44188 }, { "epoch": 0.8203903862648195, "grad_norm": 0.4047066569328308, "learning_rate": 1.550001999840437e-06, "loss": 0.2071, "step": 44190 }, { "epoch": 0.8204275164022382, "grad_norm": 0.5375328660011292, "learning_rate": 1.5493782570622018e-06, "loss": 0.4762, "step": 44192 }, { "epoch": 0.8204646465396569, "grad_norm": 0.37993961572647095, "learning_rate": 1.548754629271616e-06, "loss": 0.3103, "step": 44194 }, { "epoch": 0.8205017766770755, "grad_norm": 0.41086775064468384, "learning_rate": 1.5481311164771661e-06, "loss": 0.3731, "step": 44196 }, { "epoch": 0.8205389068144942, "grad_norm": 0.40745508670806885, "learning_rate": 1.5475077186873345e-06, "loss": 0.1917, "step": 44198 }, { "epoch": 0.8205760369519127, "grad_norm": 0.2797167897224426, "learning_rate": 1.5468844359106072e-06, "loss": 0.2564, "step": 44200 }, { "epoch": 0.8206131670893314, "grad_norm": 0.46691906452178955, "learning_rate": 1.5462612681554613e-06, "loss": 0.3409, "step": 44202 }, { "epoch": 0.82065029722675, "grad_norm": 0.33289816975593567, "learning_rate": 1.5456382154303763e-06, "loss": 0.188, "step": 44204 }, { "epoch": 0.8206874273641687, "grad_norm": 0.39042872190475464, "learning_rate": 1.5450152777438343e-06, "loss": 0.2446, "step": 44206 }, { "epoch": 0.8207245575015873, "grad_norm": 0.28449153900146484, "learning_rate": 1.5443924551043066e-06, "loss": 0.1707, "step": 44208 }, { "epoch": 0.8207616876390059, "grad_norm": 0.43035706877708435, "learning_rate": 1.5437697475202694e-06, "loss": 0.2923, "step": 44210 }, { "epoch": 0.8207988177764246, "grad_norm": 0.43349942564964294, "learning_rate": 1.5431471550001975e-06, "loss": 0.3461, "step": 44212 }, { "epoch": 0.8208359479138432, "grad_norm": 0.3353295922279358, "learning_rate": 1.5425246775525637e-06, "loss": 0.308, "step": 44214 }, { "epoch": 0.8208730780512619, "grad_norm": 0.3252819776535034, "learning_rate": 1.5419023151858336e-06, "loss": 0.1301, "step": 44216 }, { "epoch": 0.8209102081886805, "grad_norm": 0.4683806300163269, "learning_rate": 1.5412800679084784e-06, "loss": 0.4131, "step": 44218 }, { "epoch": 0.8209473383260991, "grad_norm": 0.37426263093948364, "learning_rate": 1.540657935728964e-06, "loss": 0.3867, "step": 44220 }, { "epoch": 0.8209844684635178, "grad_norm": 0.4133332073688507, "learning_rate": 1.5400359186557567e-06, "loss": 0.351, "step": 44222 }, { "epoch": 0.8210215986009364, "grad_norm": 0.5562565326690674, "learning_rate": 1.5394140166973227e-06, "loss": 0.2968, "step": 44224 }, { "epoch": 0.8210587287383551, "grad_norm": 0.360899955034256, "learning_rate": 1.5387922298621182e-06, "loss": 0.21, "step": 44226 }, { "epoch": 0.8210958588757737, "grad_norm": 0.4655608832836151, "learning_rate": 1.5381705581586104e-06, "loss": 0.149, "step": 44228 }, { "epoch": 0.8211329890131923, "grad_norm": 0.3761020004749298, "learning_rate": 1.537549001595252e-06, "loss": 0.2578, "step": 44230 }, { "epoch": 0.821170119150611, "grad_norm": 0.4797358214855194, "learning_rate": 1.5369275601805045e-06, "loss": 0.365, "step": 44232 }, { "epoch": 0.8212072492880296, "grad_norm": 0.32516419887542725, "learning_rate": 1.536306233922823e-06, "loss": 0.1893, "step": 44234 }, { "epoch": 0.8212443794254483, "grad_norm": 0.5489771962165833, "learning_rate": 1.535685022830662e-06, "loss": 0.2811, "step": 44236 }, { "epoch": 0.8212815095628669, "grad_norm": 0.40650615096092224, "learning_rate": 1.5350639269124768e-06, "loss": 0.1781, "step": 44238 }, { "epoch": 0.8213186397002855, "grad_norm": 0.34532561898231506, "learning_rate": 1.534442946176713e-06, "loss": 0.2754, "step": 44240 }, { "epoch": 0.8213557698377042, "grad_norm": 0.141364187002182, "learning_rate": 1.5338220806318238e-06, "loss": 0.142, "step": 44242 }, { "epoch": 0.8213928999751228, "grad_norm": 0.4559357464313507, "learning_rate": 1.5332013302862581e-06, "loss": 0.2467, "step": 44244 }, { "epoch": 0.8214300301125415, "grad_norm": 0.5665714740753174, "learning_rate": 1.53258069514846e-06, "loss": 0.3813, "step": 44246 }, { "epoch": 0.8214671602499601, "grad_norm": 0.30410560965538025, "learning_rate": 1.5319601752268786e-06, "loss": 0.3216, "step": 44248 }, { "epoch": 0.8215042903873787, "grad_norm": 0.4002247452735901, "learning_rate": 1.5313397705299537e-06, "loss": 0.2743, "step": 44250 }, { "epoch": 0.8215414205247974, "grad_norm": 0.33095186948776245, "learning_rate": 1.5307194810661262e-06, "loss": 0.2396, "step": 44252 }, { "epoch": 0.821578550662216, "grad_norm": 0.18876159191131592, "learning_rate": 1.5300993068438386e-06, "loss": 0.0076, "step": 44254 }, { "epoch": 0.8216156807996347, "grad_norm": 0.31562960147857666, "learning_rate": 1.5294792478715282e-06, "loss": 0.2895, "step": 44256 }, { "epoch": 0.8216528109370532, "grad_norm": 0.38490352034568787, "learning_rate": 1.5288593041576337e-06, "loss": 0.3119, "step": 44258 }, { "epoch": 0.8216899410744719, "grad_norm": 0.2892584502696991, "learning_rate": 1.5282394757105901e-06, "loss": 0.2146, "step": 44260 }, { "epoch": 0.8217270712118906, "grad_norm": 0.4311923682689667, "learning_rate": 1.5276197625388312e-06, "loss": 0.2787, "step": 44262 }, { "epoch": 0.8217642013493092, "grad_norm": 0.32901525497436523, "learning_rate": 1.5270001646507914e-06, "loss": 0.1997, "step": 44264 }, { "epoch": 0.8218013314867278, "grad_norm": 0.5071597695350647, "learning_rate": 1.5263806820548987e-06, "loss": 0.2246, "step": 44266 }, { "epoch": 0.8218384616241464, "grad_norm": 0.3114582300186157, "learning_rate": 1.5257613147595829e-06, "loss": 0.3343, "step": 44268 }, { "epoch": 0.8218755917615651, "grad_norm": 0.33477526903152466, "learning_rate": 1.5251420627732739e-06, "loss": 0.2512, "step": 44270 }, { "epoch": 0.8219127218989838, "grad_norm": 0.5042020082473755, "learning_rate": 1.5245229261043948e-06, "loss": 0.3139, "step": 44272 }, { "epoch": 0.8219498520364024, "grad_norm": 0.2940605878829956, "learning_rate": 1.5239039047613713e-06, "loss": 0.2378, "step": 44274 }, { "epoch": 0.821986982173821, "grad_norm": 0.3481087386608124, "learning_rate": 1.52328499875263e-06, "loss": 0.3632, "step": 44276 }, { "epoch": 0.8220241123112396, "grad_norm": 0.2972831428050995, "learning_rate": 1.5226662080865861e-06, "loss": 0.3158, "step": 44278 }, { "epoch": 0.8220612424486583, "grad_norm": 0.41324931383132935, "learning_rate": 1.522047532771662e-06, "loss": 0.1576, "step": 44280 }, { "epoch": 0.822098372586077, "grad_norm": 0.3038996458053589, "learning_rate": 1.5214289728162778e-06, "loss": 0.1198, "step": 44282 }, { "epoch": 0.8221355027234956, "grad_norm": 0.4997726380825043, "learning_rate": 1.5208105282288477e-06, "loss": 0.408, "step": 44284 }, { "epoch": 0.8221726328609142, "grad_norm": 0.40526893734931946, "learning_rate": 1.5201921990177892e-06, "loss": 0.2071, "step": 44286 }, { "epoch": 0.8222097629983328, "grad_norm": 0.3132908344268799, "learning_rate": 1.519573985191517e-06, "loss": 0.2834, "step": 44288 }, { "epoch": 0.8222468931357515, "grad_norm": 0.6940421462059021, "learning_rate": 1.5189558867584386e-06, "loss": 0.1553, "step": 44290 }, { "epoch": 0.8222840232731702, "grad_norm": 0.3042430579662323, "learning_rate": 1.5183379037269697e-06, "loss": 0.3869, "step": 44292 }, { "epoch": 0.8223211534105888, "grad_norm": 0.33500972390174866, "learning_rate": 1.5177200361055133e-06, "loss": 0.13, "step": 44294 }, { "epoch": 0.8223582835480074, "grad_norm": 0.4052986204624176, "learning_rate": 1.5171022839024808e-06, "loss": 0.3161, "step": 44296 }, { "epoch": 0.822395413685426, "grad_norm": 0.7102774381637573, "learning_rate": 1.5164846471262762e-06, "loss": 0.2341, "step": 44298 }, { "epoch": 0.8224325438228447, "grad_norm": 0.27940109372138977, "learning_rate": 1.5158671257853042e-06, "loss": 0.1825, "step": 44300 }, { "epoch": 0.8224696739602634, "grad_norm": 1.5168792009353638, "learning_rate": 1.5152497198879713e-06, "loss": 0.2945, "step": 44302 }, { "epoch": 0.822506804097682, "grad_norm": 0.27114924788475037, "learning_rate": 1.5146324294426718e-06, "loss": 0.437, "step": 44304 }, { "epoch": 0.8225439342351006, "grad_norm": 0.5465795397758484, "learning_rate": 1.5140152544578079e-06, "loss": 0.5151, "step": 44306 }, { "epoch": 0.8225810643725192, "grad_norm": 0.25701451301574707, "learning_rate": 1.5133981949417787e-06, "loss": 0.2856, "step": 44308 }, { "epoch": 0.8226181945099379, "grad_norm": 0.5882032513618469, "learning_rate": 1.5127812509029815e-06, "loss": 0.2083, "step": 44310 }, { "epoch": 0.8226553246473565, "grad_norm": 0.37077853083610535, "learning_rate": 1.5121644223498066e-06, "loss": 0.3286, "step": 44312 }, { "epoch": 0.8226924547847752, "grad_norm": 0.4213380515575409, "learning_rate": 1.5115477092906539e-06, "loss": 0.2537, "step": 44314 }, { "epoch": 0.8227295849221938, "grad_norm": 0.4263063073158264, "learning_rate": 1.5109311117339076e-06, "loss": 0.1955, "step": 44316 }, { "epoch": 0.8227667150596124, "grad_norm": 0.4740867018699646, "learning_rate": 1.5103146296879612e-06, "loss": 0.2858, "step": 44318 }, { "epoch": 0.8228038451970311, "grad_norm": 0.4690374732017517, "learning_rate": 1.5096982631612023e-06, "loss": 0.2006, "step": 44320 }, { "epoch": 0.8228409753344497, "grad_norm": 0.6269438862800598, "learning_rate": 1.5090820121620197e-06, "loss": 0.3952, "step": 44322 }, { "epoch": 0.8228781054718683, "grad_norm": 0.46135011315345764, "learning_rate": 1.508465876698798e-06, "loss": 0.3379, "step": 44324 }, { "epoch": 0.822915235609287, "grad_norm": 0.42056548595428467, "learning_rate": 1.5078498567799227e-06, "loss": 0.273, "step": 44326 }, { "epoch": 0.8229523657467056, "grad_norm": 0.5488064885139465, "learning_rate": 1.5072339524137714e-06, "loss": 0.1931, "step": 44328 }, { "epoch": 0.8229894958841243, "grad_norm": 0.3139699101448059, "learning_rate": 1.5066181636087262e-06, "loss": 0.309, "step": 44330 }, { "epoch": 0.8230266260215429, "grad_norm": 0.3122418522834778, "learning_rate": 1.5060024903731707e-06, "loss": 0.1209, "step": 44332 }, { "epoch": 0.8230637561589615, "grad_norm": 0.3864465057849884, "learning_rate": 1.505386932715477e-06, "loss": 0.2298, "step": 44334 }, { "epoch": 0.8231008862963802, "grad_norm": 0.3195379674434662, "learning_rate": 1.5047714906440215e-06, "loss": 0.1217, "step": 44336 }, { "epoch": 0.8231380164337988, "grad_norm": 0.49088746309280396, "learning_rate": 1.504156164167181e-06, "loss": 0.3662, "step": 44338 }, { "epoch": 0.8231751465712175, "grad_norm": 0.2560213506221771, "learning_rate": 1.5035409532933287e-06, "loss": 0.1689, "step": 44340 }, { "epoch": 0.8232122767086361, "grad_norm": 0.3888169825077057, "learning_rate": 1.5029258580308314e-06, "loss": 0.2426, "step": 44342 }, { "epoch": 0.8232494068460547, "grad_norm": 0.359810471534729, "learning_rate": 1.5023108783880625e-06, "loss": 0.5328, "step": 44344 }, { "epoch": 0.8232865369834734, "grad_norm": 0.5266593098640442, "learning_rate": 1.5016960143733883e-06, "loss": 0.2677, "step": 44346 }, { "epoch": 0.823323667120892, "grad_norm": 0.4259595274925232, "learning_rate": 1.5010812659951767e-06, "loss": 0.348, "step": 44348 }, { "epoch": 0.8233607972583107, "grad_norm": 0.47508442401885986, "learning_rate": 1.5004666332617913e-06, "loss": 0.293, "step": 44350 }, { "epoch": 0.8233979273957293, "grad_norm": 0.5308201313018799, "learning_rate": 1.4998521161815981e-06, "loss": 0.2716, "step": 44352 }, { "epoch": 0.8234350575331479, "grad_norm": 0.4218639135360718, "learning_rate": 1.4992377147629556e-06, "loss": 0.3105, "step": 44354 }, { "epoch": 0.8234721876705665, "grad_norm": 0.475699245929718, "learning_rate": 1.4986234290142265e-06, "loss": 0.2317, "step": 44356 }, { "epoch": 0.8235093178079852, "grad_norm": 0.3580717444419861, "learning_rate": 1.4980092589437656e-06, "loss": 0.2966, "step": 44358 }, { "epoch": 0.8235464479454039, "grad_norm": 0.33864977955818176, "learning_rate": 1.4973952045599316e-06, "loss": 0.231, "step": 44360 }, { "epoch": 0.8235835780828225, "grad_norm": 0.3307175636291504, "learning_rate": 1.4967812658710822e-06, "loss": 0.2193, "step": 44362 }, { "epoch": 0.8236207082202411, "grad_norm": 0.548649251461029, "learning_rate": 1.496167442885571e-06, "loss": 0.3306, "step": 44364 }, { "epoch": 0.8236578383576597, "grad_norm": 0.3636893630027771, "learning_rate": 1.4955537356117466e-06, "loss": 0.2705, "step": 44366 }, { "epoch": 0.8236949684950784, "grad_norm": 0.5229859948158264, "learning_rate": 1.4949401440579625e-06, "loss": 0.2167, "step": 44368 }, { "epoch": 0.8237320986324971, "grad_norm": 0.2911817133426666, "learning_rate": 1.4943266682325675e-06, "loss": 0.3016, "step": 44370 }, { "epoch": 0.8237692287699157, "grad_norm": 0.4500266909599304, "learning_rate": 1.4937133081439081e-06, "loss": 0.3195, "step": 44372 }, { "epoch": 0.8238063589073343, "grad_norm": 0.304797887802124, "learning_rate": 1.4931000638003346e-06, "loss": 0.0676, "step": 44374 }, { "epoch": 0.8238434890447529, "grad_norm": 0.4162597060203552, "learning_rate": 1.4924869352101856e-06, "loss": 0.3111, "step": 44376 }, { "epoch": 0.8238806191821716, "grad_norm": 0.47652313113212585, "learning_rate": 1.4918739223818091e-06, "loss": 0.3251, "step": 44378 }, { "epoch": 0.8239177493195903, "grad_norm": 0.20339912176132202, "learning_rate": 1.4912610253235405e-06, "loss": 0.2798, "step": 44380 }, { "epoch": 0.8239548794570088, "grad_norm": 0.5866124033927917, "learning_rate": 1.4906482440437241e-06, "loss": 0.4829, "step": 44382 }, { "epoch": 0.8239920095944275, "grad_norm": 0.37735360860824585, "learning_rate": 1.490035578550696e-06, "loss": 0.5747, "step": 44384 }, { "epoch": 0.8240291397318461, "grad_norm": 0.4492835998535156, "learning_rate": 1.4894230288527921e-06, "loss": 0.1048, "step": 44386 }, { "epoch": 0.8240662698692648, "grad_norm": 0.4577048718929291, "learning_rate": 1.4888105949583508e-06, "loss": 0.145, "step": 44388 }, { "epoch": 0.8241034000066835, "grad_norm": 0.5359029769897461, "learning_rate": 1.4881982768757043e-06, "loss": 0.3492, "step": 44390 }, { "epoch": 0.824140530144102, "grad_norm": 0.33775848150253296, "learning_rate": 1.487586074613181e-06, "loss": 0.3007, "step": 44392 }, { "epoch": 0.8241776602815207, "grad_norm": 0.4457545578479767, "learning_rate": 1.4869739881791146e-06, "loss": 0.3328, "step": 44394 }, { "epoch": 0.8242147904189393, "grad_norm": 0.3008882999420166, "learning_rate": 1.4863620175818337e-06, "loss": 0.4618, "step": 44396 }, { "epoch": 0.824251920556358, "grad_norm": 0.45163577795028687, "learning_rate": 1.4857501628296634e-06, "loss": 0.3514, "step": 44398 }, { "epoch": 0.8242890506937767, "grad_norm": 0.4003308415412903, "learning_rate": 1.4851384239309296e-06, "loss": 0.273, "step": 44400 }, { "epoch": 0.8243261808311952, "grad_norm": 0.31293508410453796, "learning_rate": 1.4845268008939596e-06, "loss": 0.1541, "step": 44402 }, { "epoch": 0.8243633109686139, "grad_norm": 0.2741767168045044, "learning_rate": 1.4839152937270706e-06, "loss": 0.1512, "step": 44404 }, { "epoch": 0.8244004411060325, "grad_norm": 0.48680737614631653, "learning_rate": 1.4833039024385854e-06, "loss": 0.3504, "step": 44406 }, { "epoch": 0.8244375712434512, "grad_norm": 0.2666643261909485, "learning_rate": 1.4826926270368248e-06, "loss": 0.3203, "step": 44408 }, { "epoch": 0.8244747013808698, "grad_norm": 0.3038474917411804, "learning_rate": 1.4820814675301043e-06, "loss": 0.1406, "step": 44410 }, { "epoch": 0.8245118315182884, "grad_norm": 0.40017247200012207, "learning_rate": 1.4814704239267407e-06, "loss": 0.3732, "step": 44412 }, { "epoch": 0.8245489616557071, "grad_norm": 0.3195667266845703, "learning_rate": 1.480859496235052e-06, "loss": 0.217, "step": 44414 }, { "epoch": 0.8245860917931257, "grad_norm": 0.34560245275497437, "learning_rate": 1.4802486844633446e-06, "loss": 0.2728, "step": 44416 }, { "epoch": 0.8246232219305444, "grad_norm": 0.44810667634010315, "learning_rate": 1.4796379886199353e-06, "loss": 0.1509, "step": 44418 }, { "epoch": 0.824660352067963, "grad_norm": 0.4631747007369995, "learning_rate": 1.4790274087131296e-06, "loss": 0.4565, "step": 44420 }, { "epoch": 0.8246974822053816, "grad_norm": 0.3789494037628174, "learning_rate": 1.4784169447512375e-06, "loss": 0.1867, "step": 44422 }, { "epoch": 0.8247346123428003, "grad_norm": 0.24619998037815094, "learning_rate": 1.477806596742566e-06, "loss": 0.1749, "step": 44424 }, { "epoch": 0.8247717424802189, "grad_norm": 0.36924728751182556, "learning_rate": 1.477196364695419e-06, "loss": 0.361, "step": 44426 }, { "epoch": 0.8248088726176376, "grad_norm": 0.468763142824173, "learning_rate": 1.4765862486181037e-06, "loss": 0.328, "step": 44428 }, { "epoch": 0.8248460027550562, "grad_norm": 0.6404542326927185, "learning_rate": 1.4759762485189154e-06, "loss": 0.2467, "step": 44430 }, { "epoch": 0.8248831328924748, "grad_norm": 0.33120644092559814, "learning_rate": 1.47536636440616e-06, "loss": 0.3932, "step": 44432 }, { "epoch": 0.8249202630298935, "grad_norm": 0.2860252857208252, "learning_rate": 1.4747565962881328e-06, "loss": 0.3304, "step": 44434 }, { "epoch": 0.8249573931673121, "grad_norm": 0.7179960608482361, "learning_rate": 1.4741469441731327e-06, "loss": 0.2848, "step": 44436 }, { "epoch": 0.8249945233047308, "grad_norm": 0.18831995129585266, "learning_rate": 1.4735374080694564e-06, "loss": 0.1106, "step": 44438 }, { "epoch": 0.8250316534421493, "grad_norm": 0.3486173450946808, "learning_rate": 1.4729279879853976e-06, "loss": 0.2194, "step": 44440 }, { "epoch": 0.825068783579568, "grad_norm": 0.5629202723503113, "learning_rate": 1.4723186839292436e-06, "loss": 0.2457, "step": 44442 }, { "epoch": 0.8251059137169867, "grad_norm": 0.29227855801582336, "learning_rate": 1.4717094959092904e-06, "loss": 0.2108, "step": 44444 }, { "epoch": 0.8251430438544053, "grad_norm": 0.3910392224788666, "learning_rate": 1.4711004239338245e-06, "loss": 0.0904, "step": 44446 }, { "epoch": 0.825180173991824, "grad_norm": 0.28663647174835205, "learning_rate": 1.470491468011136e-06, "loss": 0.3289, "step": 44448 }, { "epoch": 0.8252173041292425, "grad_norm": 0.2817362844944, "learning_rate": 1.4698826281495083e-06, "loss": 0.2783, "step": 44450 }, { "epoch": 0.8252544342666612, "grad_norm": 0.42513948678970337, "learning_rate": 1.4692739043572313e-06, "loss": 0.2589, "step": 44452 }, { "epoch": 0.8252915644040799, "grad_norm": 0.2713676989078522, "learning_rate": 1.4686652966425807e-06, "loss": 0.3299, "step": 44454 }, { "epoch": 0.8253286945414985, "grad_norm": 0.25707992911338806, "learning_rate": 1.468056805013841e-06, "loss": 0.1729, "step": 44456 }, { "epoch": 0.8253658246789172, "grad_norm": 0.3693886399269104, "learning_rate": 1.4674484294792923e-06, "loss": 0.3036, "step": 44458 }, { "epoch": 0.8254029548163357, "grad_norm": 0.47757822275161743, "learning_rate": 1.4668401700472146e-06, "loss": 0.3611, "step": 44460 }, { "epoch": 0.8254400849537544, "grad_norm": 0.39343148469924927, "learning_rate": 1.46623202672588e-06, "loss": 0.1552, "step": 44462 }, { "epoch": 0.825477215091173, "grad_norm": 0.305357426404953, "learning_rate": 1.4656239995235666e-06, "loss": 0.3068, "step": 44464 }, { "epoch": 0.8255143452285917, "grad_norm": 0.6944701671600342, "learning_rate": 1.4650160884485498e-06, "loss": 0.3141, "step": 44466 }, { "epoch": 0.8255514753660104, "grad_norm": 0.4313117563724518, "learning_rate": 1.4644082935090952e-06, "loss": 0.2253, "step": 44468 }, { "epoch": 0.8255886055034289, "grad_norm": 0.5051404237747192, "learning_rate": 1.4638006147134776e-06, "loss": 0.4782, "step": 44470 }, { "epoch": 0.8256257356408476, "grad_norm": 0.2958587408065796, "learning_rate": 1.4631930520699645e-06, "loss": 0.2865, "step": 44472 }, { "epoch": 0.8256628657782662, "grad_norm": 0.48300692439079285, "learning_rate": 1.4625856055868227e-06, "loss": 0.3818, "step": 44474 }, { "epoch": 0.8256999959156849, "grad_norm": 0.42771753668785095, "learning_rate": 1.4619782752723188e-06, "loss": 0.2083, "step": 44476 }, { "epoch": 0.8257371260531036, "grad_norm": 0.4528143107891083, "learning_rate": 1.4613710611347187e-06, "loss": 0.3938, "step": 44478 }, { "epoch": 0.8257742561905221, "grad_norm": 0.48317664861679077, "learning_rate": 1.46076396318228e-06, "loss": 0.3612, "step": 44480 }, { "epoch": 0.8258113863279408, "grad_norm": 0.6486336588859558, "learning_rate": 1.4601569814232686e-06, "loss": 0.4703, "step": 44482 }, { "epoch": 0.8258485164653594, "grad_norm": 0.25663724541664124, "learning_rate": 1.4595501158659376e-06, "loss": 0.3532, "step": 44484 }, { "epoch": 0.8258856466027781, "grad_norm": 0.34411704540252686, "learning_rate": 1.4589433665185482e-06, "loss": 0.302, "step": 44486 }, { "epoch": 0.8259227767401968, "grad_norm": 0.4348393976688385, "learning_rate": 1.458336733389356e-06, "loss": 0.2079, "step": 44488 }, { "epoch": 0.8259599068776153, "grad_norm": 0.45254871249198914, "learning_rate": 1.457730216486618e-06, "loss": 0.3249, "step": 44490 }, { "epoch": 0.825997037015034, "grad_norm": 0.32711702585220337, "learning_rate": 1.4571238158185829e-06, "loss": 0.416, "step": 44492 }, { "epoch": 0.8260341671524526, "grad_norm": 0.40807783603668213, "learning_rate": 1.4565175313935032e-06, "loss": 0.2627, "step": 44494 }, { "epoch": 0.8260712972898713, "grad_norm": 0.22464247047901154, "learning_rate": 1.4559113632196299e-06, "loss": 0.2115, "step": 44496 }, { "epoch": 0.82610842742729, "grad_norm": 0.34408894181251526, "learning_rate": 1.4553053113052096e-06, "loss": 0.3454, "step": 44498 }, { "epoch": 0.8261455575647085, "grad_norm": 0.6072781682014465, "learning_rate": 1.4546993756584927e-06, "loss": 0.1457, "step": 44500 }, { "epoch": 0.8261826877021272, "grad_norm": 0.24276113510131836, "learning_rate": 1.454093556287718e-06, "loss": 0.2323, "step": 44502 }, { "epoch": 0.8262198178395458, "grad_norm": 0.3045775592327118, "learning_rate": 1.4534878532011354e-06, "loss": 0.2686, "step": 44504 }, { "epoch": 0.8262569479769645, "grad_norm": 0.38654378056526184, "learning_rate": 1.4528822664069798e-06, "loss": 0.246, "step": 44506 }, { "epoch": 0.826294078114383, "grad_norm": 0.3688133955001831, "learning_rate": 1.4522767959134965e-06, "loss": 0.2253, "step": 44508 }, { "epoch": 0.8263312082518017, "grad_norm": 0.39238420128822327, "learning_rate": 1.4516714417289213e-06, "loss": 0.3124, "step": 44510 }, { "epoch": 0.8263683383892204, "grad_norm": 0.35250505805015564, "learning_rate": 1.4510662038614931e-06, "loss": 0.2764, "step": 44512 }, { "epoch": 0.826405468526639, "grad_norm": 0.49556204676628113, "learning_rate": 1.4504610823194464e-06, "loss": 0.414, "step": 44514 }, { "epoch": 0.8264425986640577, "grad_norm": 0.4538097381591797, "learning_rate": 1.4498560771110182e-06, "loss": 0.3608, "step": 44516 }, { "epoch": 0.8264797288014762, "grad_norm": 0.3939908444881439, "learning_rate": 1.449251188244436e-06, "loss": 0.2234, "step": 44518 }, { "epoch": 0.8265168589388949, "grad_norm": 0.5191604495048523, "learning_rate": 1.4486464157279324e-06, "loss": 0.2397, "step": 44520 }, { "epoch": 0.8265539890763136, "grad_norm": 0.3154982030391693, "learning_rate": 1.4480417595697371e-06, "loss": 0.2441, "step": 44522 }, { "epoch": 0.8265911192137322, "grad_norm": 0.5836181640625, "learning_rate": 1.4474372197780795e-06, "loss": 0.2214, "step": 44524 }, { "epoch": 0.8266282493511509, "grad_norm": 0.36124810576438904, "learning_rate": 1.4468327963611816e-06, "loss": 0.2832, "step": 44526 }, { "epoch": 0.8266653794885694, "grad_norm": 0.6468775272369385, "learning_rate": 1.4462284893272716e-06, "loss": 0.3449, "step": 44528 }, { "epoch": 0.8267025096259881, "grad_norm": 0.42911240458488464, "learning_rate": 1.445624298684568e-06, "loss": 0.3639, "step": 44530 }, { "epoch": 0.8267396397634068, "grad_norm": 0.4560735523700714, "learning_rate": 1.4450202244412936e-06, "loss": 0.1937, "step": 44532 }, { "epoch": 0.8267767699008254, "grad_norm": 0.33229440450668335, "learning_rate": 1.4444162666056705e-06, "loss": 0.1003, "step": 44534 }, { "epoch": 0.8268139000382441, "grad_norm": 0.4366031587123871, "learning_rate": 1.443812425185913e-06, "loss": 0.2276, "step": 44536 }, { "epoch": 0.8268510301756626, "grad_norm": 0.36350634694099426, "learning_rate": 1.4432087001902417e-06, "loss": 0.3317, "step": 44538 }, { "epoch": 0.8268881603130813, "grad_norm": 0.4376106262207031, "learning_rate": 1.4426050916268708e-06, "loss": 0.1018, "step": 44540 }, { "epoch": 0.8269252904505, "grad_norm": 0.41464704275131226, "learning_rate": 1.4420015995040093e-06, "loss": 0.3012, "step": 44542 }, { "epoch": 0.8269624205879186, "grad_norm": 0.3919060528278351, "learning_rate": 1.441398223829873e-06, "loss": 0.263, "step": 44544 }, { "epoch": 0.8269995507253373, "grad_norm": 0.36826562881469727, "learning_rate": 1.4407949646126729e-06, "loss": 0.2151, "step": 44546 }, { "epoch": 0.8270366808627558, "grad_norm": 0.4565935432910919, "learning_rate": 1.4401918218606127e-06, "loss": 0.3597, "step": 44548 }, { "epoch": 0.8270738110001745, "grad_norm": 0.33963632583618164, "learning_rate": 1.4395887955819022e-06, "loss": 0.2335, "step": 44550 }, { "epoch": 0.8271109411375932, "grad_norm": 0.3320991098880768, "learning_rate": 1.4389858857847473e-06, "loss": 0.1616, "step": 44552 }, { "epoch": 0.8271480712750118, "grad_norm": 0.4420614242553711, "learning_rate": 1.4383830924773534e-06, "loss": 0.353, "step": 44554 }, { "epoch": 0.8271852014124305, "grad_norm": 0.5286855101585388, "learning_rate": 1.4377804156679187e-06, "loss": 0.368, "step": 44556 }, { "epoch": 0.827222331549849, "grad_norm": 0.3630439341068268, "learning_rate": 1.4371778553646454e-06, "loss": 0.2322, "step": 44558 }, { "epoch": 0.8272594616872677, "grad_norm": 0.30532121658325195, "learning_rate": 1.4365754115757325e-06, "loss": 0.1734, "step": 44560 }, { "epoch": 0.8272965918246863, "grad_norm": 0.27730900049209595, "learning_rate": 1.4359730843093778e-06, "loss": 0.2502, "step": 44562 }, { "epoch": 0.827333721962105, "grad_norm": 0.5762947797775269, "learning_rate": 1.4353708735737803e-06, "loss": 0.3917, "step": 44564 }, { "epoch": 0.8273708520995237, "grad_norm": 0.431096613407135, "learning_rate": 1.434768779377128e-06, "loss": 0.1643, "step": 44566 }, { "epoch": 0.8274079822369422, "grad_norm": 0.6292319297790527, "learning_rate": 1.43416680172762e-06, "loss": 0.2437, "step": 44568 }, { "epoch": 0.8274451123743609, "grad_norm": 0.3266674280166626, "learning_rate": 1.4335649406334417e-06, "loss": 0.3388, "step": 44570 }, { "epoch": 0.8274822425117795, "grad_norm": 0.3215568959712982, "learning_rate": 1.4329631961027845e-06, "loss": 0.1551, "step": 44572 }, { "epoch": 0.8275193726491982, "grad_norm": 0.4225177466869354, "learning_rate": 1.4323615681438374e-06, "loss": 0.1401, "step": 44574 }, { "epoch": 0.8275565027866169, "grad_norm": 0.4300943911075592, "learning_rate": 1.431760056764786e-06, "loss": 0.227, "step": 44576 }, { "epoch": 0.8275936329240354, "grad_norm": 0.2382565587759018, "learning_rate": 1.4311586619738183e-06, "loss": 0.3377, "step": 44578 }, { "epoch": 0.8276307630614541, "grad_norm": 0.38147297501564026, "learning_rate": 1.4305573837791109e-06, "loss": 0.3395, "step": 44580 }, { "epoch": 0.8276678931988727, "grad_norm": 0.1967245191335678, "learning_rate": 1.4299562221888507e-06, "loss": 0.2735, "step": 44582 }, { "epoch": 0.8277050233362914, "grad_norm": 0.3886331021785736, "learning_rate": 1.4293551772112146e-06, "loss": 0.2476, "step": 44584 }, { "epoch": 0.82774215347371, "grad_norm": 0.46296975016593933, "learning_rate": 1.4287542488543859e-06, "loss": 0.4051, "step": 44586 }, { "epoch": 0.8277792836111286, "grad_norm": 0.5586023330688477, "learning_rate": 1.4281534371265348e-06, "loss": 0.2931, "step": 44588 }, { "epoch": 0.8278164137485473, "grad_norm": 0.5555031895637512, "learning_rate": 1.4275527420358403e-06, "loss": 0.226, "step": 44590 }, { "epoch": 0.8278535438859659, "grad_norm": 0.5167964100837708, "learning_rate": 1.4269521635904782e-06, "loss": 0.2346, "step": 44592 }, { "epoch": 0.8278906740233846, "grad_norm": 0.641940712928772, "learning_rate": 1.426351701798615e-06, "loss": 0.4475, "step": 44594 }, { "epoch": 0.8279278041608032, "grad_norm": 0.4867044985294342, "learning_rate": 1.4257513566684245e-06, "loss": 0.3513, "step": 44596 }, { "epoch": 0.8279649342982218, "grad_norm": 0.5539929270744324, "learning_rate": 1.4251511282080754e-06, "loss": 0.3085, "step": 44598 }, { "epoch": 0.8280020644356405, "grad_norm": 0.20153586566448212, "learning_rate": 1.4245510164257336e-06, "loss": 0.2367, "step": 44600 }, { "epoch": 0.8280391945730591, "grad_norm": 0.3592088222503662, "learning_rate": 1.4239510213295671e-06, "loss": 0.3146, "step": 44602 }, { "epoch": 0.8280763247104778, "grad_norm": 1.0701875686645508, "learning_rate": 1.4233511429277414e-06, "loss": 0.2778, "step": 44604 }, { "epoch": 0.8281134548478964, "grad_norm": 0.4356667995452881, "learning_rate": 1.4227513812284133e-06, "loss": 0.3339, "step": 44606 }, { "epoch": 0.828150584985315, "grad_norm": 0.16823738813400269, "learning_rate": 1.4221517362397497e-06, "loss": 0.1137, "step": 44608 }, { "epoch": 0.8281877151227337, "grad_norm": 0.38515982031822205, "learning_rate": 1.421552207969905e-06, "loss": 0.1705, "step": 44610 }, { "epoch": 0.8282248452601523, "grad_norm": 0.3402326703071594, "learning_rate": 1.4209527964270398e-06, "loss": 0.348, "step": 44612 }, { "epoch": 0.828261975397571, "grad_norm": 0.38503968715667725, "learning_rate": 1.4203535016193104e-06, "loss": 0.2077, "step": 44614 }, { "epoch": 0.8282991055349895, "grad_norm": 0.3827439248561859, "learning_rate": 1.4197543235548715e-06, "loss": 0.256, "step": 44616 }, { "epoch": 0.8283362356724082, "grad_norm": 0.3246382772922516, "learning_rate": 1.4191552622418747e-06, "loss": 0.2755, "step": 44618 }, { "epoch": 0.8283733658098269, "grad_norm": 0.3551013767719269, "learning_rate": 1.4185563176884708e-06, "loss": 0.31, "step": 44620 }, { "epoch": 0.8284104959472455, "grad_norm": 0.46833574771881104, "learning_rate": 1.4179574899028125e-06, "loss": 0.3474, "step": 44622 }, { "epoch": 0.8284476260846642, "grad_norm": 0.3563046455383301, "learning_rate": 1.4173587788930454e-06, "loss": 0.399, "step": 44624 }, { "epoch": 0.8284847562220827, "grad_norm": 0.610089123249054, "learning_rate": 1.4167601846673172e-06, "loss": 0.1886, "step": 44626 }, { "epoch": 0.8285218863595014, "grad_norm": 0.3741333782672882, "learning_rate": 1.4161617072337764e-06, "loss": 0.315, "step": 44628 }, { "epoch": 0.8285590164969201, "grad_norm": 0.3993767201900482, "learning_rate": 1.4155633466005602e-06, "loss": 0.1996, "step": 44630 }, { "epoch": 0.8285961466343387, "grad_norm": 0.3678515553474426, "learning_rate": 1.414965102775816e-06, "loss": 0.3375, "step": 44632 }, { "epoch": 0.8286332767717574, "grad_norm": 0.27323460578918457, "learning_rate": 1.4143669757676792e-06, "loss": 0.1924, "step": 44634 }, { "epoch": 0.8286704069091759, "grad_norm": 0.3332717716693878, "learning_rate": 1.4137689655842913e-06, "loss": 0.2877, "step": 44636 }, { "epoch": 0.8287075370465946, "grad_norm": 0.5685442686080933, "learning_rate": 1.4131710722337889e-06, "loss": 0.3571, "step": 44638 }, { "epoch": 0.8287446671840133, "grad_norm": 0.4489130675792694, "learning_rate": 1.4125732957243077e-06, "loss": 0.2052, "step": 44640 }, { "epoch": 0.8287817973214319, "grad_norm": 0.3397492468357086, "learning_rate": 1.4119756360639835e-06, "loss": 0.4944, "step": 44642 }, { "epoch": 0.8288189274588506, "grad_norm": 0.39863723516464233, "learning_rate": 1.4113780932609444e-06, "loss": 0.384, "step": 44644 }, { "epoch": 0.8288560575962691, "grad_norm": 0.32967403531074524, "learning_rate": 1.4107806673233237e-06, "loss": 0.2859, "step": 44646 }, { "epoch": 0.8288931877336878, "grad_norm": 0.42263832688331604, "learning_rate": 1.4101833582592506e-06, "loss": 0.2066, "step": 44648 }, { "epoch": 0.8289303178711065, "grad_norm": 0.4866623878479004, "learning_rate": 1.4095861660768551e-06, "loss": 0.1208, "step": 44650 }, { "epoch": 0.8289674480085251, "grad_norm": 0.42383044958114624, "learning_rate": 1.4089890907842574e-06, "loss": 0.449, "step": 44652 }, { "epoch": 0.8290045781459437, "grad_norm": 0.31940940022468567, "learning_rate": 1.4083921323895888e-06, "loss": 0.323, "step": 44654 }, { "epoch": 0.8290417082833623, "grad_norm": 0.4039912521839142, "learning_rate": 1.4077952909009652e-06, "loss": 0.2996, "step": 44656 }, { "epoch": 0.829078838420781, "grad_norm": 0.3600819408893585, "learning_rate": 1.4071985663265108e-06, "loss": 0.2401, "step": 44658 }, { "epoch": 0.8291159685581996, "grad_norm": 0.32907411456108093, "learning_rate": 1.4066019586743461e-06, "loss": 0.3753, "step": 44660 }, { "epoch": 0.8291530986956183, "grad_norm": 0.44636255502700806, "learning_rate": 1.406005467952588e-06, "loss": 0.133, "step": 44662 }, { "epoch": 0.829190228833037, "grad_norm": 0.5170493125915527, "learning_rate": 1.4054090941693544e-06, "loss": 0.4588, "step": 44664 }, { "epoch": 0.8292273589704555, "grad_norm": 0.30385977029800415, "learning_rate": 1.4048128373327585e-06, "loss": 0.2286, "step": 44666 }, { "epoch": 0.8292644891078742, "grad_norm": 0.3809641897678375, "learning_rate": 1.4042166974509164e-06, "loss": 0.1742, "step": 44668 }, { "epoch": 0.8293016192452928, "grad_norm": 0.3599933981895447, "learning_rate": 1.4036206745319359e-06, "loss": 0.2773, "step": 44670 }, { "epoch": 0.8293387493827115, "grad_norm": 0.34669196605682373, "learning_rate": 1.4030247685839316e-06, "loss": 0.1939, "step": 44672 }, { "epoch": 0.8293758795201301, "grad_norm": 0.31091058254241943, "learning_rate": 1.402428979615006e-06, "loss": 0.2555, "step": 44674 }, { "epoch": 0.8294130096575487, "grad_norm": 0.4375550150871277, "learning_rate": 1.4018333076332703e-06, "loss": 0.2974, "step": 44676 }, { "epoch": 0.8294501397949674, "grad_norm": 0.3333878815174103, "learning_rate": 1.401237752646828e-06, "loss": 0.0489, "step": 44678 }, { "epoch": 0.829487269932386, "grad_norm": 0.17322520911693573, "learning_rate": 1.400642314663786e-06, "loss": 0.2441, "step": 44680 }, { "epoch": 0.8295244000698047, "grad_norm": 0.41641905903816223, "learning_rate": 1.4000469936922424e-06, "loss": 0.439, "step": 44682 }, { "epoch": 0.8295615302072233, "grad_norm": 0.3629946708679199, "learning_rate": 1.3994517897402981e-06, "loss": 0.1693, "step": 44684 }, { "epoch": 0.8295986603446419, "grad_norm": 0.3358135521411896, "learning_rate": 1.398856702816055e-06, "loss": 0.1988, "step": 44686 }, { "epoch": 0.8296357904820606, "grad_norm": 0.49397900700569153, "learning_rate": 1.398261732927607e-06, "loss": 0.3042, "step": 44688 }, { "epoch": 0.8296729206194792, "grad_norm": 0.22138436138629913, "learning_rate": 1.3976668800830528e-06, "loss": 0.2268, "step": 44690 }, { "epoch": 0.8297100507568979, "grad_norm": 0.29525843262672424, "learning_rate": 1.3970721442904877e-06, "loss": 0.1323, "step": 44692 }, { "epoch": 0.8297471808943165, "grad_norm": 0.5254817008972168, "learning_rate": 1.396477525558001e-06, "loss": 0.4727, "step": 44694 }, { "epoch": 0.8297843110317351, "grad_norm": 1.223310112953186, "learning_rate": 1.3958830238936826e-06, "loss": 0.0997, "step": 44696 }, { "epoch": 0.8298214411691538, "grad_norm": 0.25934192538261414, "learning_rate": 1.3952886393056254e-06, "loss": 0.2234, "step": 44698 }, { "epoch": 0.8298585713065724, "grad_norm": 0.3460824489593506, "learning_rate": 1.3946943718019134e-06, "loss": 0.1043, "step": 44700 }, { "epoch": 0.829895701443991, "grad_norm": 0.29451557993888855, "learning_rate": 1.3941002213906374e-06, "loss": 0.3829, "step": 44702 }, { "epoch": 0.8299328315814097, "grad_norm": 0.4082031846046448, "learning_rate": 1.3935061880798806e-06, "loss": 0.3364, "step": 44704 }, { "epoch": 0.8299699617188283, "grad_norm": 0.5123321413993835, "learning_rate": 1.3929122718777233e-06, "loss": 0.2749, "step": 44706 }, { "epoch": 0.830007091856247, "grad_norm": 0.3799402117729187, "learning_rate": 1.392318472792249e-06, "loss": 0.2888, "step": 44708 }, { "epoch": 0.8300442219936656, "grad_norm": 0.3698118329048157, "learning_rate": 1.3917247908315368e-06, "loss": 0.3204, "step": 44710 }, { "epoch": 0.8300813521310842, "grad_norm": 0.7637192606925964, "learning_rate": 1.3911312260036658e-06, "loss": 0.4519, "step": 44712 }, { "epoch": 0.8301184822685028, "grad_norm": 0.36693376302719116, "learning_rate": 1.390537778316714e-06, "loss": 0.401, "step": 44714 }, { "epoch": 0.8301556124059215, "grad_norm": 0.24672552943229675, "learning_rate": 1.3899444477787528e-06, "loss": 0.4109, "step": 44716 }, { "epoch": 0.8301927425433402, "grad_norm": 0.2971721291542053, "learning_rate": 1.38935123439786e-06, "loss": 0.2368, "step": 44718 }, { "epoch": 0.8302298726807588, "grad_norm": 0.21379490196704865, "learning_rate": 1.3887581381821025e-06, "loss": 0.1113, "step": 44720 }, { "epoch": 0.8302670028181774, "grad_norm": 0.3707433342933655, "learning_rate": 1.388165159139553e-06, "loss": 0.319, "step": 44722 }, { "epoch": 0.830304132955596, "grad_norm": 0.353485643863678, "learning_rate": 1.3875722972782802e-06, "loss": 0.2927, "step": 44724 }, { "epoch": 0.8303412630930147, "grad_norm": 0.4065439999103546, "learning_rate": 1.3869795526063512e-06, "loss": 0.2746, "step": 44726 }, { "epoch": 0.8303783932304334, "grad_norm": 0.37699365615844727, "learning_rate": 1.3863869251318319e-06, "loss": 0.1144, "step": 44728 }, { "epoch": 0.830415523367852, "grad_norm": 0.3291468918323517, "learning_rate": 1.3857944148627878e-06, "loss": 0.3053, "step": 44730 }, { "epoch": 0.8304526535052706, "grad_norm": 0.3150242567062378, "learning_rate": 1.385202021807277e-06, "loss": 0.2804, "step": 44732 }, { "epoch": 0.8304897836426892, "grad_norm": 0.5001360177993774, "learning_rate": 1.3846097459733632e-06, "loss": 0.2991, "step": 44734 }, { "epoch": 0.8305269137801079, "grad_norm": 0.3275878429412842, "learning_rate": 1.3840175873691054e-06, "loss": 0.0844, "step": 44736 }, { "epoch": 0.8305640439175266, "grad_norm": 0.5636890530586243, "learning_rate": 1.3834255460025592e-06, "loss": 0.1857, "step": 44738 }, { "epoch": 0.8306011740549452, "grad_norm": 0.3712332546710968, "learning_rate": 1.3828336218817816e-06, "loss": 0.2335, "step": 44740 }, { "epoch": 0.8306383041923638, "grad_norm": 0.2726050317287445, "learning_rate": 1.3822418150148286e-06, "loss": 0.2746, "step": 44742 }, { "epoch": 0.8306754343297824, "grad_norm": 0.5958617925643921, "learning_rate": 1.381650125409749e-06, "loss": 0.3704, "step": 44744 }, { "epoch": 0.8307125644672011, "grad_norm": 0.4265875220298767, "learning_rate": 1.3810585530745968e-06, "loss": 0.1793, "step": 44746 }, { "epoch": 0.8307496946046198, "grad_norm": 0.3158298134803772, "learning_rate": 1.3804670980174218e-06, "loss": 0.2676, "step": 44748 }, { "epoch": 0.8307868247420384, "grad_norm": 0.35127800703048706, "learning_rate": 1.3798757602462698e-06, "loss": 0.2542, "step": 44750 }, { "epoch": 0.830823954879457, "grad_norm": 0.4771256148815155, "learning_rate": 1.379284539769189e-06, "loss": 0.4213, "step": 44752 }, { "epoch": 0.8308610850168756, "grad_norm": 0.3518950343132019, "learning_rate": 1.378693436594225e-06, "loss": 0.1961, "step": 44754 }, { "epoch": 0.8308982151542943, "grad_norm": 0.3614540696144104, "learning_rate": 1.3781024507294184e-06, "loss": 0.1611, "step": 44756 }, { "epoch": 0.830935345291713, "grad_norm": 0.31773507595062256, "learning_rate": 1.3775115821828132e-06, "loss": 0.3878, "step": 44758 }, { "epoch": 0.8309724754291316, "grad_norm": 0.25153690576553345, "learning_rate": 1.3769208309624472e-06, "loss": 0.4208, "step": 44760 }, { "epoch": 0.8310096055665502, "grad_norm": 0.4551907479763031, "learning_rate": 1.3763301970763577e-06, "loss": 0.2535, "step": 44762 }, { "epoch": 0.8310467357039688, "grad_norm": 0.4253501892089844, "learning_rate": 1.375739680532585e-06, "loss": 0.1408, "step": 44764 }, { "epoch": 0.8310838658413875, "grad_norm": 0.34814268350601196, "learning_rate": 1.375149281339162e-06, "loss": 0.3096, "step": 44766 }, { "epoch": 0.8311209959788061, "grad_norm": 0.3495209515094757, "learning_rate": 1.3745589995041241e-06, "loss": 0.1151, "step": 44768 }, { "epoch": 0.8311581261162247, "grad_norm": 0.3733285665512085, "learning_rate": 1.3739688350355007e-06, "loss": 0.0974, "step": 44770 }, { "epoch": 0.8311952562536434, "grad_norm": 0.33342117071151733, "learning_rate": 1.373378787941324e-06, "loss": 0.2385, "step": 44772 }, { "epoch": 0.831232386391062, "grad_norm": 0.5446091294288635, "learning_rate": 1.3727888582296211e-06, "loss": 0.2575, "step": 44774 }, { "epoch": 0.8312695165284807, "grad_norm": 0.27492138743400574, "learning_rate": 1.372199045908421e-06, "loss": 0.1555, "step": 44776 }, { "epoch": 0.8313066466658993, "grad_norm": 0.3746403157711029, "learning_rate": 1.3716093509857509e-06, "loss": 0.5507, "step": 44778 }, { "epoch": 0.831343776803318, "grad_norm": 0.41709232330322266, "learning_rate": 1.3710197734696329e-06, "loss": 0.2431, "step": 44780 }, { "epoch": 0.8313809069407366, "grad_norm": 0.36673423647880554, "learning_rate": 1.3704303133680862e-06, "loss": 0.2332, "step": 44782 }, { "epoch": 0.8314180370781552, "grad_norm": 0.46338629722595215, "learning_rate": 1.3698409706891347e-06, "loss": 0.3013, "step": 44784 }, { "epoch": 0.8314551672155739, "grad_norm": 0.46083107590675354, "learning_rate": 1.3692517454407971e-06, "loss": 0.1703, "step": 44786 }, { "epoch": 0.8314922973529925, "grad_norm": 0.4763292372226715, "learning_rate": 1.3686626376310908e-06, "loss": 0.2822, "step": 44788 }, { "epoch": 0.8315294274904111, "grad_norm": 0.334837943315506, "learning_rate": 1.3680736472680322e-06, "loss": 0.1577, "step": 44790 }, { "epoch": 0.8315665576278298, "grad_norm": 0.28181159496307373, "learning_rate": 1.3674847743596365e-06, "loss": 0.1855, "step": 44792 }, { "epoch": 0.8316036877652484, "grad_norm": 0.2867758274078369, "learning_rate": 1.366896018913917e-06, "loss": 0.143, "step": 44794 }, { "epoch": 0.8316408179026671, "grad_norm": 0.7849120497703552, "learning_rate": 1.3663073809388816e-06, "loss": 0.2942, "step": 44796 }, { "epoch": 0.8316779480400857, "grad_norm": 0.38695085048675537, "learning_rate": 1.365718860442542e-06, "loss": 0.2564, "step": 44798 }, { "epoch": 0.8317150781775043, "grad_norm": 0.2648868262767792, "learning_rate": 1.3651304574329083e-06, "loss": 0.2921, "step": 44800 }, { "epoch": 0.831752208314923, "grad_norm": 0.5481866598129272, "learning_rate": 1.364542171917982e-06, "loss": 0.1832, "step": 44802 }, { "epoch": 0.8317893384523416, "grad_norm": 0.12923914194107056, "learning_rate": 1.3639540039057708e-06, "loss": 0.1621, "step": 44804 }, { "epoch": 0.8318264685897603, "grad_norm": 0.3016194999217987, "learning_rate": 1.3633659534042797e-06, "loss": 0.2537, "step": 44806 }, { "epoch": 0.8318635987271789, "grad_norm": 0.4570295810699463, "learning_rate": 1.3627780204215069e-06, "loss": 0.2145, "step": 44808 }, { "epoch": 0.8319007288645975, "grad_norm": 1.4479295015335083, "learning_rate": 1.3621902049654523e-06, "loss": 0.2671, "step": 44810 }, { "epoch": 0.8319378590020161, "grad_norm": 0.46774137020111084, "learning_rate": 1.3616025070441163e-06, "loss": 0.2851, "step": 44812 }, { "epoch": 0.8319749891394348, "grad_norm": 0.6291539669036865, "learning_rate": 1.361014926665496e-06, "loss": 0.2924, "step": 44814 }, { "epoch": 0.8320121192768535, "grad_norm": 0.35860252380371094, "learning_rate": 1.3604274638375846e-06, "loss": 0.3007, "step": 44816 }, { "epoch": 0.832049249414272, "grad_norm": 0.24057145416736603, "learning_rate": 1.3598401185683806e-06, "loss": 0.3104, "step": 44818 }, { "epoch": 0.8320863795516907, "grad_norm": 0.297654926776886, "learning_rate": 1.3592528908658687e-06, "loss": 0.2417, "step": 44820 }, { "epoch": 0.8321235096891093, "grad_norm": 0.40400195121765137, "learning_rate": 1.3586657807380454e-06, "loss": 0.2908, "step": 44822 }, { "epoch": 0.832160639826528, "grad_norm": 0.3872624337673187, "learning_rate": 1.3580787881928958e-06, "loss": 0.346, "step": 44824 }, { "epoch": 0.8321977699639467, "grad_norm": 0.5213940739631653, "learning_rate": 1.357491913238408e-06, "loss": 0.3703, "step": 44826 }, { "epoch": 0.8322349001013652, "grad_norm": 0.6502837538719177, "learning_rate": 1.3569051558825675e-06, "loss": 0.2462, "step": 44828 }, { "epoch": 0.8322720302387839, "grad_norm": 0.47773054242134094, "learning_rate": 1.3563185161333603e-06, "loss": 0.3946, "step": 44830 }, { "epoch": 0.8323091603762025, "grad_norm": 0.436495840549469, "learning_rate": 1.3557319939987657e-06, "loss": 0.1567, "step": 44832 }, { "epoch": 0.8323462905136212, "grad_norm": 0.2304411232471466, "learning_rate": 1.355145589486767e-06, "loss": 0.327, "step": 44834 }, { "epoch": 0.8323834206510399, "grad_norm": 0.3646831810474396, "learning_rate": 1.3545593026053417e-06, "loss": 0.2137, "step": 44836 }, { "epoch": 0.8324205507884584, "grad_norm": 0.2903871536254883, "learning_rate": 1.3539731333624684e-06, "loss": 0.2467, "step": 44838 }, { "epoch": 0.8324576809258771, "grad_norm": 0.46289902925491333, "learning_rate": 1.3533870817661242e-06, "loss": 0.3503, "step": 44840 }, { "epoch": 0.8324948110632957, "grad_norm": 0.24181193113327026, "learning_rate": 1.3528011478242808e-06, "loss": 0.1209, "step": 44842 }, { "epoch": 0.8325319412007144, "grad_norm": 0.5549391508102417, "learning_rate": 1.352215331544915e-06, "loss": 0.2863, "step": 44844 }, { "epoch": 0.8325690713381331, "grad_norm": 0.3590112030506134, "learning_rate": 1.3516296329359924e-06, "loss": 0.0841, "step": 44846 }, { "epoch": 0.8326062014755516, "grad_norm": 0.2760082483291626, "learning_rate": 1.3510440520054858e-06, "loss": 0.3155, "step": 44848 }, { "epoch": 0.8326433316129703, "grad_norm": 0.4257012903690338, "learning_rate": 1.3504585887613631e-06, "loss": 0.4746, "step": 44850 }, { "epoch": 0.8326804617503889, "grad_norm": 0.43495070934295654, "learning_rate": 1.3498732432115914e-06, "loss": 0.167, "step": 44852 }, { "epoch": 0.8327175918878076, "grad_norm": 0.4181768596172333, "learning_rate": 1.3492880153641342e-06, "loss": 0.3122, "step": 44854 }, { "epoch": 0.8327547220252263, "grad_norm": 0.44027766585350037, "learning_rate": 1.3487029052269563e-06, "loss": 0.1784, "step": 44856 }, { "epoch": 0.8327918521626448, "grad_norm": 0.3995511829853058, "learning_rate": 1.348117912808018e-06, "loss": 0.2844, "step": 44858 }, { "epoch": 0.8328289823000635, "grad_norm": 0.4001339077949524, "learning_rate": 1.3475330381152784e-06, "loss": 0.2239, "step": 44860 }, { "epoch": 0.8328661124374821, "grad_norm": 0.5202770829200745, "learning_rate": 1.3469482811566993e-06, "loss": 0.2926, "step": 44862 }, { "epoch": 0.8329032425749008, "grad_norm": 0.5506640672683716, "learning_rate": 1.3463636419402327e-06, "loss": 0.3624, "step": 44864 }, { "epoch": 0.8329403727123194, "grad_norm": 0.5263121128082275, "learning_rate": 1.3457791204738368e-06, "loss": 0.1907, "step": 44866 }, { "epoch": 0.832977502849738, "grad_norm": 0.28308677673339844, "learning_rate": 1.3451947167654666e-06, "loss": 0.3358, "step": 44868 }, { "epoch": 0.8330146329871567, "grad_norm": 0.32979995012283325, "learning_rate": 1.3446104308230701e-06, "loss": 0.4196, "step": 44870 }, { "epoch": 0.8330517631245753, "grad_norm": 0.34685221314430237, "learning_rate": 1.3440262626545997e-06, "loss": 0.3335, "step": 44872 }, { "epoch": 0.833088893261994, "grad_norm": 0.37179824709892273, "learning_rate": 1.3434422122680046e-06, "loss": 0.4498, "step": 44874 }, { "epoch": 0.8331260233994126, "grad_norm": 0.23270311951637268, "learning_rate": 1.3428582796712309e-06, "loss": 0.3719, "step": 44876 }, { "epoch": 0.8331631535368312, "grad_norm": 0.30043721199035645, "learning_rate": 1.3422744648722251e-06, "loss": 0.3748, "step": 44878 }, { "epoch": 0.8332002836742499, "grad_norm": 0.5993932485580444, "learning_rate": 1.3416907678789314e-06, "loss": 0.4098, "step": 44880 }, { "epoch": 0.8332374138116685, "grad_norm": 0.20825675129890442, "learning_rate": 1.3411071886992932e-06, "loss": 0.161, "step": 44882 }, { "epoch": 0.8332745439490872, "grad_norm": 0.4286157488822937, "learning_rate": 1.3405237273412485e-06, "loss": 0.2314, "step": 44884 }, { "epoch": 0.8333116740865057, "grad_norm": 0.3126898407936096, "learning_rate": 1.3399403838127388e-06, "loss": 0.192, "step": 44886 }, { "epoch": 0.8333488042239244, "grad_norm": 0.28187185525894165, "learning_rate": 1.339357158121699e-06, "loss": 0.2808, "step": 44888 }, { "epoch": 0.8333859343613431, "grad_norm": 0.3941890299320221, "learning_rate": 1.3387740502760672e-06, "loss": 0.2257, "step": 44890 }, { "epoch": 0.8334230644987617, "grad_norm": 0.5147727131843567, "learning_rate": 1.3381910602837767e-06, "loss": 0.2975, "step": 44892 }, { "epoch": 0.8334601946361804, "grad_norm": 0.37041109800338745, "learning_rate": 1.3376081881527626e-06, "loss": 0.2131, "step": 44894 }, { "epoch": 0.833497324773599, "grad_norm": 0.2776052951812744, "learning_rate": 1.3370254338909528e-06, "loss": 0.2571, "step": 44896 }, { "epoch": 0.8335344549110176, "grad_norm": 0.39083975553512573, "learning_rate": 1.3364427975062777e-06, "loss": 0.2832, "step": 44898 }, { "epoch": 0.8335715850484363, "grad_norm": 0.5486248731613159, "learning_rate": 1.3358602790066655e-06, "loss": 0.3231, "step": 44900 }, { "epoch": 0.8336087151858549, "grad_norm": 0.6890703439712524, "learning_rate": 1.335277878400043e-06, "loss": 0.3461, "step": 44902 }, { "epoch": 0.8336458453232736, "grad_norm": 0.46512311697006226, "learning_rate": 1.3346955956943363e-06, "loss": 0.2562, "step": 44904 }, { "epoch": 0.8336829754606921, "grad_norm": 0.6313436627388, "learning_rate": 1.3341134308974658e-06, "loss": 0.3606, "step": 44906 }, { "epoch": 0.8337201055981108, "grad_norm": 0.5636327862739563, "learning_rate": 1.3335313840173559e-06, "loss": 0.2092, "step": 44908 }, { "epoch": 0.8337572357355295, "grad_norm": 0.30125662684440613, "learning_rate": 1.3329494550619227e-06, "loss": 0.2961, "step": 44910 }, { "epoch": 0.8337943658729481, "grad_norm": 0.5612263679504395, "learning_rate": 1.3323676440390864e-06, "loss": 0.3958, "step": 44912 }, { "epoch": 0.8338314960103668, "grad_norm": 0.3315229117870331, "learning_rate": 1.3317859509567643e-06, "loss": 0.3512, "step": 44914 }, { "epoch": 0.8338686261477853, "grad_norm": 0.4897848665714264, "learning_rate": 1.331204375822871e-06, "loss": 0.1967, "step": 44916 }, { "epoch": 0.833905756285204, "grad_norm": 0.38958385586738586, "learning_rate": 1.33062291864532e-06, "loss": 0.1435, "step": 44918 }, { "epoch": 0.8339428864226226, "grad_norm": 0.2965165674686432, "learning_rate": 1.3300415794320255e-06, "loss": 0.1852, "step": 44920 }, { "epoch": 0.8339800165600413, "grad_norm": 0.7399211525917053, "learning_rate": 1.3294603581908938e-06, "loss": 0.2411, "step": 44922 }, { "epoch": 0.83401714669746, "grad_norm": 0.2486562579870224, "learning_rate": 1.3288792549298347e-06, "loss": 0.1999, "step": 44924 }, { "epoch": 0.8340542768348785, "grad_norm": 0.35691970586776733, "learning_rate": 1.328298269656758e-06, "loss": 0.3243, "step": 44926 }, { "epoch": 0.8340914069722972, "grad_norm": 0.48722851276397705, "learning_rate": 1.3277174023795659e-06, "loss": 0.2818, "step": 44928 }, { "epoch": 0.8341285371097158, "grad_norm": 0.46762779355049133, "learning_rate": 1.3271366531061625e-06, "loss": 0.2628, "step": 44930 }, { "epoch": 0.8341656672471345, "grad_norm": 0.40678316354751587, "learning_rate": 1.326556021844454e-06, "loss": 0.3499, "step": 44932 }, { "epoch": 0.8342027973845532, "grad_norm": 0.47252246737480164, "learning_rate": 1.325975508602335e-06, "loss": 0.4584, "step": 44934 }, { "epoch": 0.8342399275219717, "grad_norm": 0.506563663482666, "learning_rate": 1.3253951133877075e-06, "loss": 0.256, "step": 44936 }, { "epoch": 0.8342770576593904, "grad_norm": 0.3010956943035126, "learning_rate": 1.32481483620847e-06, "loss": 0.3433, "step": 44938 }, { "epoch": 0.834314187796809, "grad_norm": 0.5088716745376587, "learning_rate": 1.3242346770725167e-06, "loss": 0.2732, "step": 44940 }, { "epoch": 0.8343513179342277, "grad_norm": 0.34784966707229614, "learning_rate": 1.3236546359877433e-06, "loss": 0.1205, "step": 44942 }, { "epoch": 0.8343884480716464, "grad_norm": 0.4532397985458374, "learning_rate": 1.3230747129620425e-06, "loss": 0.3476, "step": 44944 }, { "epoch": 0.8344255782090649, "grad_norm": 0.28261974453926086, "learning_rate": 1.3224949080033034e-06, "loss": 0.2151, "step": 44946 }, { "epoch": 0.8344627083464836, "grad_norm": 0.5063139200210571, "learning_rate": 1.3219152211194186e-06, "loss": 0.3357, "step": 44948 }, { "epoch": 0.8344998384839022, "grad_norm": 0.5508877038955688, "learning_rate": 1.3213356523182708e-06, "loss": 0.2917, "step": 44950 }, { "epoch": 0.8345369686213209, "grad_norm": 0.5083823800086975, "learning_rate": 1.3207562016077501e-06, "loss": 0.3175, "step": 44952 }, { "epoch": 0.8345740987587396, "grad_norm": 0.7302245497703552, "learning_rate": 1.3201768689957396e-06, "loss": 0.191, "step": 44954 }, { "epoch": 0.8346112288961581, "grad_norm": 0.35993441939353943, "learning_rate": 1.3195976544901235e-06, "loss": 0.2678, "step": 44956 }, { "epoch": 0.8346483590335768, "grad_norm": 0.5507876873016357, "learning_rate": 1.3190185580987835e-06, "loss": 0.3134, "step": 44958 }, { "epoch": 0.8346854891709954, "grad_norm": 0.328533411026001, "learning_rate": 1.3184395798295968e-06, "loss": 0.4651, "step": 44960 }, { "epoch": 0.8347226193084141, "grad_norm": 0.39272600412368774, "learning_rate": 1.3178607196904437e-06, "loss": 0.2403, "step": 44962 }, { "epoch": 0.8347597494458326, "grad_norm": 0.35391271114349365, "learning_rate": 1.3172819776891988e-06, "loss": 0.3794, "step": 44964 }, { "epoch": 0.8347968795832513, "grad_norm": 0.5653648972511292, "learning_rate": 1.3167033538337392e-06, "loss": 0.1661, "step": 44966 }, { "epoch": 0.83483400972067, "grad_norm": 0.3520868122577667, "learning_rate": 1.31612484813194e-06, "loss": 0.3469, "step": 44968 }, { "epoch": 0.8348711398580886, "grad_norm": 0.4825683534145355, "learning_rate": 1.3155464605916702e-06, "loss": 0.2464, "step": 44970 }, { "epoch": 0.8349082699955073, "grad_norm": 0.3775075376033783, "learning_rate": 1.3149681912207978e-06, "loss": 0.3343, "step": 44972 }, { "epoch": 0.8349454001329258, "grad_norm": 0.5979307889938354, "learning_rate": 1.3143900400271937e-06, "loss": 0.1849, "step": 44974 }, { "epoch": 0.8349825302703445, "grad_norm": 0.5886656045913696, "learning_rate": 1.3138120070187254e-06, "loss": 0.318, "step": 44976 }, { "epoch": 0.8350196604077632, "grad_norm": 0.44074031710624695, "learning_rate": 1.3132340922032561e-06, "loss": 0.3487, "step": 44978 }, { "epoch": 0.8350567905451818, "grad_norm": 0.4174761176109314, "learning_rate": 1.3126562955886524e-06, "loss": 0.1454, "step": 44980 }, { "epoch": 0.8350939206826005, "grad_norm": 0.4605412781238556, "learning_rate": 1.3120786171827759e-06, "loss": 0.2476, "step": 44982 }, { "epoch": 0.835131050820019, "grad_norm": 0.29947465658187866, "learning_rate": 1.311501056993485e-06, "loss": 0.2662, "step": 44984 }, { "epoch": 0.8351681809574377, "grad_norm": 0.4422217607498169, "learning_rate": 1.3109236150286386e-06, "loss": 0.382, "step": 44986 }, { "epoch": 0.8352053110948564, "grad_norm": 0.5025306940078735, "learning_rate": 1.310346291296095e-06, "loss": 0.1374, "step": 44988 }, { "epoch": 0.835242441232275, "grad_norm": 0.43528711795806885, "learning_rate": 1.3097690858037126e-06, "loss": 0.1877, "step": 44990 }, { "epoch": 0.8352795713696937, "grad_norm": 0.2730690836906433, "learning_rate": 1.3091919985593404e-06, "loss": 0.2233, "step": 44992 }, { "epoch": 0.8353167015071122, "grad_norm": 0.34336599707603455, "learning_rate": 1.3086150295708355e-06, "loss": 0.2124, "step": 44994 }, { "epoch": 0.8353538316445309, "grad_norm": 0.37034472823143005, "learning_rate": 1.3080381788460438e-06, "loss": 0.272, "step": 44996 }, { "epoch": 0.8353909617819496, "grad_norm": 0.31635940074920654, "learning_rate": 1.3074614463928171e-06, "loss": 0.2578, "step": 44998 }, { "epoch": 0.8354280919193682, "grad_norm": 0.2950815260410309, "learning_rate": 1.306884832219003e-06, "loss": 0.3134, "step": 45000 }, { "epoch": 0.8354652220567869, "grad_norm": 0.4889046549797058, "learning_rate": 1.306308336332448e-06, "loss": 0.2546, "step": 45002 }, { "epoch": 0.8355023521942054, "grad_norm": 0.43934768438339233, "learning_rate": 1.3057319587409956e-06, "loss": 0.122, "step": 45004 }, { "epoch": 0.8355394823316241, "grad_norm": 0.43278878927230835, "learning_rate": 1.3051556994524883e-06, "loss": 0.3224, "step": 45006 }, { "epoch": 0.8355766124690428, "grad_norm": 0.4403007924556732, "learning_rate": 1.3045795584747712e-06, "loss": 0.237, "step": 45008 }, { "epoch": 0.8356137426064614, "grad_norm": 0.7316752076148987, "learning_rate": 1.3040035358156766e-06, "loss": 0.1876, "step": 45010 }, { "epoch": 0.8356508727438801, "grad_norm": 0.5798961520195007, "learning_rate": 1.3034276314830496e-06, "loss": 0.1681, "step": 45012 }, { "epoch": 0.8356880028812986, "grad_norm": 0.3139997124671936, "learning_rate": 1.3028518454847216e-06, "loss": 0.2349, "step": 45014 }, { "epoch": 0.8357251330187173, "grad_norm": 0.34415721893310547, "learning_rate": 1.3022761778285275e-06, "loss": 0.1386, "step": 45016 }, { "epoch": 0.8357622631561359, "grad_norm": 0.4023856520652771, "learning_rate": 1.3017006285223033e-06, "loss": 0.3346, "step": 45018 }, { "epoch": 0.8357993932935546, "grad_norm": 0.5252102017402649, "learning_rate": 1.3011251975738815e-06, "loss": 0.4141, "step": 45020 }, { "epoch": 0.8358365234309733, "grad_norm": 0.43758922815322876, "learning_rate": 1.3005498849910857e-06, "loss": 0.1681, "step": 45022 }, { "epoch": 0.8358736535683918, "grad_norm": 0.3989613354206085, "learning_rate": 1.2999746907817501e-06, "loss": 0.3502, "step": 45024 }, { "epoch": 0.8359107837058105, "grad_norm": 0.4268325865268707, "learning_rate": 1.299399614953698e-06, "loss": 0.245, "step": 45026 }, { "epoch": 0.8359479138432291, "grad_norm": 0.25815847516059875, "learning_rate": 1.2988246575147567e-06, "loss": 0.1612, "step": 45028 }, { "epoch": 0.8359850439806478, "grad_norm": 0.31244388222694397, "learning_rate": 1.2982498184727498e-06, "loss": 0.2607, "step": 45030 }, { "epoch": 0.8360221741180665, "grad_norm": 0.4806983172893524, "learning_rate": 1.2976750978354968e-06, "loss": 0.2321, "step": 45032 }, { "epoch": 0.836059304255485, "grad_norm": 0.41797515749931335, "learning_rate": 1.2971004956108213e-06, "loss": 0.3074, "step": 45034 }, { "epoch": 0.8360964343929037, "grad_norm": 0.686809241771698, "learning_rate": 1.2965260118065382e-06, "loss": 0.3773, "step": 45036 }, { "epoch": 0.8361335645303223, "grad_norm": 0.3613864481449127, "learning_rate": 1.2959516464304656e-06, "loss": 0.3173, "step": 45038 }, { "epoch": 0.836170694667741, "grad_norm": 0.4357624351978302, "learning_rate": 1.2953773994904185e-06, "loss": 0.196, "step": 45040 }, { "epoch": 0.8362078248051596, "grad_norm": 0.38213497400283813, "learning_rate": 1.2948032709942126e-06, "loss": 0.0931, "step": 45042 }, { "epoch": 0.8362449549425782, "grad_norm": 0.29286354780197144, "learning_rate": 1.2942292609496598e-06, "loss": 0.2969, "step": 45044 }, { "epoch": 0.8362820850799969, "grad_norm": 0.5897377133369446, "learning_rate": 1.2936553693645714e-06, "loss": 0.5881, "step": 45046 }, { "epoch": 0.8363192152174155, "grad_norm": 0.36343255639076233, "learning_rate": 1.2930815962467525e-06, "loss": 0.4188, "step": 45048 }, { "epoch": 0.8363563453548342, "grad_norm": 0.33690643310546875, "learning_rate": 1.292507941604013e-06, "loss": 0.4453, "step": 45050 }, { "epoch": 0.8363934754922528, "grad_norm": 0.35522323846817017, "learning_rate": 1.2919344054441585e-06, "loss": 0.3767, "step": 45052 }, { "epoch": 0.8364306056296714, "grad_norm": 0.4292003810405731, "learning_rate": 1.2913609877749956e-06, "loss": 0.5735, "step": 45054 }, { "epoch": 0.8364677357670901, "grad_norm": 0.4304521381855011, "learning_rate": 1.2907876886043214e-06, "loss": 0.4891, "step": 45056 }, { "epoch": 0.8365048659045087, "grad_norm": 0.3469237983226776, "learning_rate": 1.290214507939942e-06, "loss": 0.1458, "step": 45058 }, { "epoch": 0.8365419960419274, "grad_norm": 0.6322197318077087, "learning_rate": 1.2896414457896522e-06, "loss": 0.3187, "step": 45060 }, { "epoch": 0.836579126179346, "grad_norm": 0.7496894001960754, "learning_rate": 1.2890685021612515e-06, "loss": 0.2334, "step": 45062 }, { "epoch": 0.8366162563167646, "grad_norm": 0.35302454233169556, "learning_rate": 1.2884956770625368e-06, "loss": 0.3174, "step": 45064 }, { "epoch": 0.8366533864541833, "grad_norm": 0.25830864906311035, "learning_rate": 1.287922970501302e-06, "loss": 0.1221, "step": 45066 }, { "epoch": 0.8366905165916019, "grad_norm": 0.1809079796075821, "learning_rate": 1.2873503824853395e-06, "loss": 0.3441, "step": 45068 }, { "epoch": 0.8367276467290206, "grad_norm": 0.2895350456237793, "learning_rate": 1.2867779130224433e-06, "loss": 0.3351, "step": 45070 }, { "epoch": 0.8367647768664391, "grad_norm": 0.30710190534591675, "learning_rate": 1.2862055621203985e-06, "loss": 0.1846, "step": 45072 }, { "epoch": 0.8368019070038578, "grad_norm": 0.352020800113678, "learning_rate": 1.285633329786995e-06, "loss": 0.4566, "step": 45074 }, { "epoch": 0.8368390371412765, "grad_norm": 0.6144376993179321, "learning_rate": 1.2850612160300213e-06, "loss": 0.302, "step": 45076 }, { "epoch": 0.8368761672786951, "grad_norm": 1.1882773637771606, "learning_rate": 1.284489220857258e-06, "loss": 0.2792, "step": 45078 }, { "epoch": 0.8369132974161138, "grad_norm": 0.37534239888191223, "learning_rate": 1.2839173442764907e-06, "loss": 0.2337, "step": 45080 }, { "epoch": 0.8369504275535323, "grad_norm": 0.19285543262958527, "learning_rate": 1.2833455862955013e-06, "loss": 0.2437, "step": 45082 }, { "epoch": 0.836987557690951, "grad_norm": 0.38733842968940735, "learning_rate": 1.28277394692207e-06, "loss": 0.4651, "step": 45084 }, { "epoch": 0.8370246878283697, "grad_norm": 0.5257609486579895, "learning_rate": 1.2822024261639721e-06, "loss": 0.1413, "step": 45086 }, { "epoch": 0.8370618179657883, "grad_norm": 0.39427798986434937, "learning_rate": 1.2816310240289876e-06, "loss": 0.3594, "step": 45088 }, { "epoch": 0.837098948103207, "grad_norm": 0.5428263545036316, "learning_rate": 1.2810597405248893e-06, "loss": 0.2698, "step": 45090 }, { "epoch": 0.8371360782406255, "grad_norm": 0.49413853883743286, "learning_rate": 1.280488575659452e-06, "loss": 0.1077, "step": 45092 }, { "epoch": 0.8371732083780442, "grad_norm": 0.5706779360771179, "learning_rate": 1.2799175294404486e-06, "loss": 0.4263, "step": 45094 }, { "epoch": 0.8372103385154629, "grad_norm": 0.38936561346054077, "learning_rate": 1.2793466018756473e-06, "loss": 0.1909, "step": 45096 }, { "epoch": 0.8372474686528815, "grad_norm": 0.5588836669921875, "learning_rate": 1.2787757929728184e-06, "loss": 0.2535, "step": 45098 }, { "epoch": 0.8372845987903001, "grad_norm": 0.303112268447876, "learning_rate": 1.2782051027397257e-06, "loss": 0.1207, "step": 45100 }, { "epoch": 0.8373217289277187, "grad_norm": 0.366486519575119, "learning_rate": 1.2776345311841376e-06, "loss": 0.1838, "step": 45102 }, { "epoch": 0.8373588590651374, "grad_norm": 0.3416554927825928, "learning_rate": 1.2770640783138155e-06, "loss": 0.3471, "step": 45104 }, { "epoch": 0.8373959892025561, "grad_norm": 0.36398664116859436, "learning_rate": 1.2764937441365243e-06, "loss": 0.3675, "step": 45106 }, { "epoch": 0.8374331193399747, "grad_norm": 0.28080499172210693, "learning_rate": 1.2759235286600258e-06, "loss": 0.2433, "step": 45108 }, { "epoch": 0.8374702494773933, "grad_norm": 0.4126347303390503, "learning_rate": 1.2753534318920736e-06, "loss": 0.2695, "step": 45110 }, { "epoch": 0.8375073796148119, "grad_norm": 0.29457613825798035, "learning_rate": 1.274783453840428e-06, "loss": 0.3346, "step": 45112 }, { "epoch": 0.8375445097522306, "grad_norm": 0.33102530241012573, "learning_rate": 1.2742135945128441e-06, "loss": 0.2748, "step": 45114 }, { "epoch": 0.8375816398896492, "grad_norm": 0.3512861430644989, "learning_rate": 1.2736438539170793e-06, "loss": 0.3683, "step": 45116 }, { "epoch": 0.8376187700270679, "grad_norm": 0.6879599094390869, "learning_rate": 1.2730742320608801e-06, "loss": 0.3493, "step": 45118 }, { "epoch": 0.8376559001644865, "grad_norm": 0.38871631026268005, "learning_rate": 1.272504728952003e-06, "loss": 0.3884, "step": 45120 }, { "epoch": 0.8376930303019051, "grad_norm": 0.29564133286476135, "learning_rate": 1.271935344598193e-06, "loss": 0.1928, "step": 45122 }, { "epoch": 0.8377301604393238, "grad_norm": 0.3025103807449341, "learning_rate": 1.271366079007199e-06, "loss": 0.2815, "step": 45124 }, { "epoch": 0.8377672905767424, "grad_norm": 0.41453662514686584, "learning_rate": 1.2707969321867674e-06, "loss": 0.2279, "step": 45126 }, { "epoch": 0.8378044207141611, "grad_norm": 0.38855987787246704, "learning_rate": 1.270227904144643e-06, "loss": 0.189, "step": 45128 }, { "epoch": 0.8378415508515797, "grad_norm": 0.2858559191226959, "learning_rate": 1.2696589948885674e-06, "loss": 0.299, "step": 45130 }, { "epoch": 0.8378786809889983, "grad_norm": 0.3040689527988434, "learning_rate": 1.2690902044262832e-06, "loss": 0.4417, "step": 45132 }, { "epoch": 0.837915811126417, "grad_norm": 0.442325621843338, "learning_rate": 1.268521532765531e-06, "loss": 0.3526, "step": 45134 }, { "epoch": 0.8379529412638356, "grad_norm": 0.25379738211631775, "learning_rate": 1.2679529799140445e-06, "loss": 0.2031, "step": 45136 }, { "epoch": 0.8379900714012543, "grad_norm": 0.6169052720069885, "learning_rate": 1.2673845458795652e-06, "loss": 0.3989, "step": 45138 }, { "epoch": 0.8380272015386729, "grad_norm": 0.395910382270813, "learning_rate": 1.2668162306698227e-06, "loss": 0.2172, "step": 45140 }, { "epoch": 0.8380643316760915, "grad_norm": 0.6024186611175537, "learning_rate": 1.266248034292552e-06, "loss": 0.46, "step": 45142 }, { "epoch": 0.8381014618135102, "grad_norm": 0.3437790870666504, "learning_rate": 1.2656799567554845e-06, "loss": 0.403, "step": 45144 }, { "epoch": 0.8381385919509288, "grad_norm": 0.2517353594303131, "learning_rate": 1.2651119980663539e-06, "loss": 0.1684, "step": 45146 }, { "epoch": 0.8381757220883475, "grad_norm": 0.5301719307899475, "learning_rate": 1.2645441582328822e-06, "loss": 0.2446, "step": 45148 }, { "epoch": 0.8382128522257661, "grad_norm": 0.7153987884521484, "learning_rate": 1.2639764372627982e-06, "loss": 0.2055, "step": 45150 }, { "epoch": 0.8382499823631847, "grad_norm": 0.35205817222595215, "learning_rate": 1.2634088351638285e-06, "loss": 0.2134, "step": 45152 }, { "epoch": 0.8382871125006034, "grad_norm": 0.47933775186538696, "learning_rate": 1.2628413519436955e-06, "loss": 0.2499, "step": 45154 }, { "epoch": 0.838324242638022, "grad_norm": 0.39602696895599365, "learning_rate": 1.2622739876101197e-06, "loss": 0.2024, "step": 45156 }, { "epoch": 0.8383613727754406, "grad_norm": 0.3977866768836975, "learning_rate": 1.2617067421708252e-06, "loss": 0.3727, "step": 45158 }, { "epoch": 0.8383985029128593, "grad_norm": 0.4344384968280792, "learning_rate": 1.2611396156335253e-06, "loss": 0.5792, "step": 45160 }, { "epoch": 0.8384356330502779, "grad_norm": 0.6845353841781616, "learning_rate": 1.2605726080059421e-06, "loss": 0.3728, "step": 45162 }, { "epoch": 0.8384727631876966, "grad_norm": 0.7672502398490906, "learning_rate": 1.260005719295786e-06, "loss": 0.3233, "step": 45164 }, { "epoch": 0.8385098933251152, "grad_norm": 0.4859590232372284, "learning_rate": 1.2594389495107718e-06, "loss": 0.3, "step": 45166 }, { "epoch": 0.8385470234625338, "grad_norm": 0.8549558520317078, "learning_rate": 1.2588722986586132e-06, "loss": 0.1337, "step": 45168 }, { "epoch": 0.8385841535999524, "grad_norm": 0.5360096096992493, "learning_rate": 1.25830576674702e-06, "loss": 0.3736, "step": 45170 }, { "epoch": 0.8386212837373711, "grad_norm": 0.3445279598236084, "learning_rate": 1.2577393537837024e-06, "loss": 0.3243, "step": 45172 }, { "epoch": 0.8386584138747898, "grad_norm": 0.5684767961502075, "learning_rate": 1.2571730597763644e-06, "loss": 0.4087, "step": 45174 }, { "epoch": 0.8386955440122084, "grad_norm": 0.47289934754371643, "learning_rate": 1.256606884732714e-06, "loss": 0.21, "step": 45176 }, { "epoch": 0.838732674149627, "grad_norm": 0.3160935938358307, "learning_rate": 1.256040828660453e-06, "loss": 0.206, "step": 45178 }, { "epoch": 0.8387698042870456, "grad_norm": 0.4236598610877991, "learning_rate": 1.2554748915672876e-06, "loss": 0.3754, "step": 45180 }, { "epoch": 0.8388069344244643, "grad_norm": 0.28959739208221436, "learning_rate": 1.254909073460915e-06, "loss": 0.1838, "step": 45182 }, { "epoch": 0.838844064561883, "grad_norm": 0.35430288314819336, "learning_rate": 1.2543433743490362e-06, "loss": 0.2966, "step": 45184 }, { "epoch": 0.8388811946993016, "grad_norm": 0.3100769817829132, "learning_rate": 1.2537777942393458e-06, "loss": 0.3051, "step": 45186 }, { "epoch": 0.8389183248367202, "grad_norm": 0.48462754487991333, "learning_rate": 1.2532123331395428e-06, "loss": 0.1254, "step": 45188 }, { "epoch": 0.8389554549741388, "grad_norm": 0.4009462594985962, "learning_rate": 1.2526469910573191e-06, "loss": 0.2094, "step": 45190 }, { "epoch": 0.8389925851115575, "grad_norm": 0.2622302770614624, "learning_rate": 1.2520817680003682e-06, "loss": 0.1846, "step": 45192 }, { "epoch": 0.8390297152489762, "grad_norm": 0.5035792589187622, "learning_rate": 1.2515166639763831e-06, "loss": 0.3289, "step": 45194 }, { "epoch": 0.8390668453863948, "grad_norm": 0.2800925672054291, "learning_rate": 1.2509516789930521e-06, "loss": 0.4425, "step": 45196 }, { "epoch": 0.8391039755238134, "grad_norm": 0.360747754573822, "learning_rate": 1.250386813058061e-06, "loss": 0.3929, "step": 45198 }, { "epoch": 0.839141105661232, "grad_norm": 0.2983044385910034, "learning_rate": 1.249822066179096e-06, "loss": 0.3141, "step": 45200 }, { "epoch": 0.8391782357986507, "grad_norm": 0.3961217999458313, "learning_rate": 1.2492574383638466e-06, "loss": 0.2449, "step": 45202 }, { "epoch": 0.8392153659360694, "grad_norm": 0.6232540011405945, "learning_rate": 1.248692929619989e-06, "loss": 0.4143, "step": 45204 }, { "epoch": 0.839252496073488, "grad_norm": 0.3401852548122406, "learning_rate": 1.248128539955208e-06, "loss": 0.3741, "step": 45206 }, { "epoch": 0.8392896262109066, "grad_norm": 0.53154057264328, "learning_rate": 1.2475642693771817e-06, "loss": 0.2759, "step": 45208 }, { "epoch": 0.8393267563483252, "grad_norm": 0.24535687267780304, "learning_rate": 1.247000117893592e-06, "loss": 0.3862, "step": 45210 }, { "epoch": 0.8393638864857439, "grad_norm": 0.39021027088165283, "learning_rate": 1.2464360855121093e-06, "loss": 0.442, "step": 45212 }, { "epoch": 0.8394010166231626, "grad_norm": 0.16382379829883575, "learning_rate": 1.2458721722404122e-06, "loss": 0.2771, "step": 45214 }, { "epoch": 0.8394381467605811, "grad_norm": 0.33260294795036316, "learning_rate": 1.245308378086174e-06, "loss": 0.2285, "step": 45216 }, { "epoch": 0.8394752768979998, "grad_norm": 0.37016791105270386, "learning_rate": 1.2447447030570648e-06, "loss": 0.3945, "step": 45218 }, { "epoch": 0.8395124070354184, "grad_norm": 0.5065252780914307, "learning_rate": 1.2441811471607546e-06, "loss": 0.2284, "step": 45220 }, { "epoch": 0.8395495371728371, "grad_norm": 0.3292807936668396, "learning_rate": 1.2436177104049151e-06, "loss": 0.2048, "step": 45222 }, { "epoch": 0.8395866673102557, "grad_norm": 0.48583173751831055, "learning_rate": 1.2430543927972094e-06, "loss": 0.0893, "step": 45224 }, { "epoch": 0.8396237974476743, "grad_norm": 0.34939268231391907, "learning_rate": 1.2424911943453023e-06, "loss": 0.2496, "step": 45226 }, { "epoch": 0.839660927585093, "grad_norm": 0.33939021825790405, "learning_rate": 1.2419281150568575e-06, "loss": 0.2534, "step": 45228 }, { "epoch": 0.8396980577225116, "grad_norm": 0.2972109019756317, "learning_rate": 1.2413651549395377e-06, "loss": 0.1642, "step": 45230 }, { "epoch": 0.8397351878599303, "grad_norm": 0.37945112586021423, "learning_rate": 1.2408023140010029e-06, "loss": 0.3842, "step": 45232 }, { "epoch": 0.8397723179973489, "grad_norm": 0.4863608181476593, "learning_rate": 1.240239592248914e-06, "loss": 0.4406, "step": 45234 }, { "epoch": 0.8398094481347675, "grad_norm": 0.3947312533855438, "learning_rate": 1.2396769896909233e-06, "loss": 0.2576, "step": 45236 }, { "epoch": 0.8398465782721862, "grad_norm": 0.5981470346450806, "learning_rate": 1.2391145063346888e-06, "loss": 0.3424, "step": 45238 }, { "epoch": 0.8398837084096048, "grad_norm": 0.3994324803352356, "learning_rate": 1.2385521421878644e-06, "loss": 0.3285, "step": 45240 }, { "epoch": 0.8399208385470235, "grad_norm": 0.3641209602355957, "learning_rate": 1.2379898972581017e-06, "loss": 0.2056, "step": 45242 }, { "epoch": 0.8399579686844421, "grad_norm": 0.5160177946090698, "learning_rate": 1.2374277715530537e-06, "loss": 0.333, "step": 45244 }, { "epoch": 0.8399950988218607, "grad_norm": 0.6410272717475891, "learning_rate": 1.236865765080364e-06, "loss": 0.2987, "step": 45246 }, { "epoch": 0.8400322289592794, "grad_norm": 0.21218463778495789, "learning_rate": 1.2363038778476844e-06, "loss": 0.2423, "step": 45248 }, { "epoch": 0.840069359096698, "grad_norm": 0.3017255365848541, "learning_rate": 1.2357421098626565e-06, "loss": 0.2186, "step": 45250 }, { "epoch": 0.8401064892341167, "grad_norm": 0.38248953223228455, "learning_rate": 1.2351804611329276e-06, "loss": 0.3396, "step": 45252 }, { "epoch": 0.8401436193715353, "grad_norm": 0.3236926198005676, "learning_rate": 1.2346189316661384e-06, "loss": 0.2366, "step": 45254 }, { "epoch": 0.8401807495089539, "grad_norm": 0.36892765760421753, "learning_rate": 1.2340575214699302e-06, "loss": 0.1886, "step": 45256 }, { "epoch": 0.8402178796463726, "grad_norm": 0.3523741662502289, "learning_rate": 1.2334962305519415e-06, "loss": 0.2918, "step": 45258 }, { "epoch": 0.8402550097837912, "grad_norm": 0.5126590728759766, "learning_rate": 1.232935058919813e-06, "loss": 0.3094, "step": 45260 }, { "epoch": 0.8402921399212099, "grad_norm": 0.29195207357406616, "learning_rate": 1.2323740065811762e-06, "loss": 0.5404, "step": 45262 }, { "epoch": 0.8403292700586285, "grad_norm": 0.35124409198760986, "learning_rate": 1.2318130735436673e-06, "loss": 0.2936, "step": 45264 }, { "epoch": 0.8403664001960471, "grad_norm": 0.40031588077545166, "learning_rate": 1.2312522598149201e-06, "loss": 0.3257, "step": 45266 }, { "epoch": 0.8404035303334657, "grad_norm": 0.36197513341903687, "learning_rate": 1.2306915654025619e-06, "loss": 0.2938, "step": 45268 }, { "epoch": 0.8404406604708844, "grad_norm": 0.5545368194580078, "learning_rate": 1.2301309903142245e-06, "loss": 0.3459, "step": 45270 }, { "epoch": 0.8404777906083031, "grad_norm": 0.34288302063941956, "learning_rate": 1.2295705345575382e-06, "loss": 0.2677, "step": 45272 }, { "epoch": 0.8405149207457216, "grad_norm": 0.44922173023223877, "learning_rate": 1.2290101981401238e-06, "loss": 0.3151, "step": 45274 }, { "epoch": 0.8405520508831403, "grad_norm": 0.43209108710289, "learning_rate": 1.2284499810696093e-06, "loss": 0.2092, "step": 45276 }, { "epoch": 0.8405891810205589, "grad_norm": 0.3646174669265747, "learning_rate": 1.2278898833536157e-06, "loss": 0.2266, "step": 45278 }, { "epoch": 0.8406263111579776, "grad_norm": 0.31939801573753357, "learning_rate": 1.2273299049997656e-06, "loss": 0.1111, "step": 45280 }, { "epoch": 0.8406634412953963, "grad_norm": 0.3142664134502411, "learning_rate": 1.2267700460156784e-06, "loss": 0.2619, "step": 45282 }, { "epoch": 0.8407005714328148, "grad_norm": 0.461139053106308, "learning_rate": 1.2262103064089737e-06, "loss": 0.1923, "step": 45284 }, { "epoch": 0.8407377015702335, "grad_norm": 0.4799596071243286, "learning_rate": 1.2256506861872652e-06, "loss": 0.2422, "step": 45286 }, { "epoch": 0.8407748317076521, "grad_norm": 0.30488982796669006, "learning_rate": 1.2250911853581692e-06, "loss": 0.2267, "step": 45288 }, { "epoch": 0.8408119618450708, "grad_norm": 0.44499513506889343, "learning_rate": 1.2245318039292976e-06, "loss": 0.2358, "step": 45290 }, { "epoch": 0.8408490919824895, "grad_norm": 0.6676802039146423, "learning_rate": 1.2239725419082614e-06, "loss": 0.2242, "step": 45292 }, { "epoch": 0.840886222119908, "grad_norm": 0.27867016196250916, "learning_rate": 1.223413399302673e-06, "loss": 0.1809, "step": 45294 }, { "epoch": 0.8409233522573267, "grad_norm": 0.3164690434932709, "learning_rate": 1.2228543761201383e-06, "loss": 0.3284, "step": 45296 }, { "epoch": 0.8409604823947453, "grad_norm": 0.24337244033813477, "learning_rate": 1.2222954723682667e-06, "loss": 0.2519, "step": 45298 }, { "epoch": 0.840997612532164, "grad_norm": 0.33757564425468445, "learning_rate": 1.2217366880546599e-06, "loss": 0.2743, "step": 45300 }, { "epoch": 0.8410347426695827, "grad_norm": 0.21648545563220978, "learning_rate": 1.221178023186923e-06, "loss": 0.2054, "step": 45302 }, { "epoch": 0.8410718728070012, "grad_norm": 0.3650580942630768, "learning_rate": 1.2206194777726576e-06, "loss": 0.1946, "step": 45304 }, { "epoch": 0.8411090029444199, "grad_norm": 0.4334535002708435, "learning_rate": 1.2200610518194644e-06, "loss": 0.3808, "step": 45306 }, { "epoch": 0.8411461330818385, "grad_norm": 0.27494171261787415, "learning_rate": 1.219502745334943e-06, "loss": 0.3471, "step": 45308 }, { "epoch": 0.8411832632192572, "grad_norm": 0.5048553943634033, "learning_rate": 1.2189445583266878e-06, "loss": 0.1819, "step": 45310 }, { "epoch": 0.8412203933566759, "grad_norm": 0.4492442011833191, "learning_rate": 1.2183864908022935e-06, "loss": 0.2393, "step": 45312 }, { "epoch": 0.8412575234940944, "grad_norm": 0.3076966106891632, "learning_rate": 1.2178285427693547e-06, "loss": 0.4657, "step": 45314 }, { "epoch": 0.8412946536315131, "grad_norm": 0.3456513583660126, "learning_rate": 1.2172707142354633e-06, "loss": 0.4455, "step": 45316 }, { "epoch": 0.8413317837689317, "grad_norm": 0.38558804988861084, "learning_rate": 1.216713005208211e-06, "loss": 0.4214, "step": 45318 }, { "epoch": 0.8413689139063504, "grad_norm": 0.2730877101421356, "learning_rate": 1.216155415695186e-06, "loss": 0.2416, "step": 45320 }, { "epoch": 0.841406044043769, "grad_norm": 0.39779239892959595, "learning_rate": 1.2155979457039768e-06, "loss": 0.1751, "step": 45322 }, { "epoch": 0.8414431741811876, "grad_norm": 0.4569144546985626, "learning_rate": 1.215040595242165e-06, "loss": 0.4193, "step": 45324 }, { "epoch": 0.8414803043186063, "grad_norm": 0.4729786217212677, "learning_rate": 1.214483364317336e-06, "loss": 0.235, "step": 45326 }, { "epoch": 0.8415174344560249, "grad_norm": 0.23023664951324463, "learning_rate": 1.2139262529370743e-06, "loss": 0.2322, "step": 45328 }, { "epoch": 0.8415545645934436, "grad_norm": 0.449933797121048, "learning_rate": 1.2133692611089599e-06, "loss": 0.4932, "step": 45330 }, { "epoch": 0.8415916947308621, "grad_norm": 0.4338637888431549, "learning_rate": 1.2128123888405685e-06, "loss": 0.2744, "step": 45332 }, { "epoch": 0.8416288248682808, "grad_norm": 0.3076188862323761, "learning_rate": 1.2122556361394812e-06, "loss": 0.2273, "step": 45334 }, { "epoch": 0.8416659550056995, "grad_norm": 0.47620299458503723, "learning_rate": 1.2116990030132735e-06, "loss": 0.1985, "step": 45336 }, { "epoch": 0.8417030851431181, "grad_norm": 0.38767027854919434, "learning_rate": 1.2111424894695167e-06, "loss": 0.1874, "step": 45338 }, { "epoch": 0.8417402152805368, "grad_norm": 0.6446080207824707, "learning_rate": 1.2105860955157844e-06, "loss": 0.283, "step": 45340 }, { "epoch": 0.8417773454179553, "grad_norm": 0.41974738240242004, "learning_rate": 1.2100298211596485e-06, "loss": 0.2151, "step": 45342 }, { "epoch": 0.841814475555374, "grad_norm": 0.4539782404899597, "learning_rate": 1.209473666408677e-06, "loss": 0.1631, "step": 45344 }, { "epoch": 0.8418516056927927, "grad_norm": 0.42491239309310913, "learning_rate": 1.2089176312704387e-06, "loss": 0.3079, "step": 45346 }, { "epoch": 0.8418887358302113, "grad_norm": 0.2321523129940033, "learning_rate": 1.2083617157525018e-06, "loss": 0.2417, "step": 45348 }, { "epoch": 0.84192586596763, "grad_norm": 0.3383848965167999, "learning_rate": 1.2078059198624249e-06, "loss": 0.1232, "step": 45350 }, { "epoch": 0.8419629961050485, "grad_norm": 0.3025696277618408, "learning_rate": 1.2072502436077759e-06, "loss": 0.2221, "step": 45352 }, { "epoch": 0.8420001262424672, "grad_norm": 0.3008034825325012, "learning_rate": 1.2066946869961127e-06, "loss": 0.3954, "step": 45354 }, { "epoch": 0.8420372563798859, "grad_norm": 0.7164477705955505, "learning_rate": 1.2061392500349944e-06, "loss": 0.2334, "step": 45356 }, { "epoch": 0.8420743865173045, "grad_norm": 0.4537184536457062, "learning_rate": 1.2055839327319808e-06, "loss": 0.2691, "step": 45358 }, { "epoch": 0.8421115166547232, "grad_norm": 0.3986304998397827, "learning_rate": 1.20502873509463e-06, "loss": 0.2501, "step": 45360 }, { "epoch": 0.8421486467921417, "grad_norm": 0.4589011073112488, "learning_rate": 1.2044736571304915e-06, "loss": 0.2088, "step": 45362 }, { "epoch": 0.8421857769295604, "grad_norm": 0.4428768455982208, "learning_rate": 1.2039186988471218e-06, "loss": 0.164, "step": 45364 }, { "epoch": 0.8422229070669791, "grad_norm": 0.5436967015266418, "learning_rate": 1.2033638602520702e-06, "loss": 0.5053, "step": 45366 }, { "epoch": 0.8422600372043977, "grad_norm": 0.4356236755847931, "learning_rate": 1.2028091413528887e-06, "loss": 0.5368, "step": 45368 }, { "epoch": 0.8422971673418164, "grad_norm": 0.39481520652770996, "learning_rate": 1.2022545421571252e-06, "loss": 0.3726, "step": 45370 }, { "epoch": 0.8423342974792349, "grad_norm": 0.2691798508167267, "learning_rate": 1.201700062672323e-06, "loss": 0.1208, "step": 45372 }, { "epoch": 0.8423714276166536, "grad_norm": 0.40969720482826233, "learning_rate": 1.2011457029060313e-06, "loss": 0.3441, "step": 45374 }, { "epoch": 0.8424085577540722, "grad_norm": 0.3877717852592468, "learning_rate": 1.200591462865789e-06, "loss": 0.3389, "step": 45376 }, { "epoch": 0.8424456878914909, "grad_norm": 0.388508677482605, "learning_rate": 1.2000373425591395e-06, "loss": 0.3556, "step": 45378 }, { "epoch": 0.8424828180289096, "grad_norm": 0.37963294982910156, "learning_rate": 1.1994833419936225e-06, "loss": 0.2176, "step": 45380 }, { "epoch": 0.8425199481663281, "grad_norm": 0.5552801489830017, "learning_rate": 1.1989294611767776e-06, "loss": 0.3159, "step": 45382 }, { "epoch": 0.8425570783037468, "grad_norm": 0.41281190514564514, "learning_rate": 1.19837570011614e-06, "loss": 0.1046, "step": 45384 }, { "epoch": 0.8425942084411654, "grad_norm": 0.3915144205093384, "learning_rate": 1.1978220588192468e-06, "loss": 0.2836, "step": 45386 }, { "epoch": 0.8426313385785841, "grad_norm": 0.532941460609436, "learning_rate": 1.1972685372936276e-06, "loss": 0.3094, "step": 45388 }, { "epoch": 0.8426684687160028, "grad_norm": 0.4412866532802582, "learning_rate": 1.1967151355468176e-06, "loss": 0.3807, "step": 45390 }, { "epoch": 0.8427055988534213, "grad_norm": 0.37104952335357666, "learning_rate": 1.1961618535863472e-06, "loss": 0.2102, "step": 45392 }, { "epoch": 0.84274272899084, "grad_norm": 0.22386036813259125, "learning_rate": 1.1956086914197407e-06, "loss": 0.1633, "step": 45394 }, { "epoch": 0.8427798591282586, "grad_norm": 0.3792182207107544, "learning_rate": 1.1950556490545283e-06, "loss": 0.296, "step": 45396 }, { "epoch": 0.8428169892656773, "grad_norm": 0.25072771310806274, "learning_rate": 1.1945027264982367e-06, "loss": 0.318, "step": 45398 }, { "epoch": 0.842854119403096, "grad_norm": 0.4799729883670807, "learning_rate": 1.193949923758385e-06, "loss": 0.3728, "step": 45400 }, { "epoch": 0.8428912495405145, "grad_norm": 2.328273057937622, "learning_rate": 1.1933972408424988e-06, "loss": 0.2725, "step": 45402 }, { "epoch": 0.8429283796779332, "grad_norm": 0.33754175901412964, "learning_rate": 1.192844677758096e-06, "loss": 0.3185, "step": 45404 }, { "epoch": 0.8429655098153518, "grad_norm": 0.4751463532447815, "learning_rate": 1.1922922345126974e-06, "loss": 0.1721, "step": 45406 }, { "epoch": 0.8430026399527705, "grad_norm": 0.3931720554828644, "learning_rate": 1.1917399111138184e-06, "loss": 0.2646, "step": 45408 }, { "epoch": 0.8430397700901892, "grad_norm": 0.3468567728996277, "learning_rate": 1.1911877075689792e-06, "loss": 0.2787, "step": 45410 }, { "epoch": 0.8430769002276077, "grad_norm": 0.5384902954101562, "learning_rate": 1.1906356238856865e-06, "loss": 0.244, "step": 45412 }, { "epoch": 0.8431140303650264, "grad_norm": 0.3300603926181793, "learning_rate": 1.190083660071456e-06, "loss": 0.2345, "step": 45414 }, { "epoch": 0.843151160502445, "grad_norm": 0.27469688653945923, "learning_rate": 1.1895318161337999e-06, "loss": 0.1956, "step": 45416 }, { "epoch": 0.8431882906398637, "grad_norm": 0.4329138994216919, "learning_rate": 1.1889800920802241e-06, "loss": 0.2235, "step": 45418 }, { "epoch": 0.8432254207772822, "grad_norm": 0.41585421562194824, "learning_rate": 1.1884284879182373e-06, "loss": 0.1964, "step": 45420 }, { "epoch": 0.8432625509147009, "grad_norm": 0.4802803099155426, "learning_rate": 1.1878770036553445e-06, "loss": 0.2146, "step": 45422 }, { "epoch": 0.8432996810521196, "grad_norm": 0.5831377506256104, "learning_rate": 1.1873256392990517e-06, "loss": 0.2558, "step": 45424 }, { "epoch": 0.8433368111895382, "grad_norm": 0.48825928568840027, "learning_rate": 1.1867743948568588e-06, "loss": 0.2005, "step": 45426 }, { "epoch": 0.8433739413269569, "grad_norm": 0.6146504878997803, "learning_rate": 1.1862232703362675e-06, "loss": 0.4772, "step": 45428 }, { "epoch": 0.8434110714643754, "grad_norm": 0.2638562023639679, "learning_rate": 1.1856722657447772e-06, "loss": 0.373, "step": 45430 }, { "epoch": 0.8434482016017941, "grad_norm": 0.7274913191795349, "learning_rate": 1.1851213810898855e-06, "loss": 0.2684, "step": 45432 }, { "epoch": 0.8434853317392128, "grad_norm": 0.46377524733543396, "learning_rate": 1.1845706163790904e-06, "loss": 0.2082, "step": 45434 }, { "epoch": 0.8435224618766314, "grad_norm": 0.3601382076740265, "learning_rate": 1.184019971619882e-06, "loss": 0.1814, "step": 45436 }, { "epoch": 0.8435595920140501, "grad_norm": 0.3286453187465668, "learning_rate": 1.183469446819756e-06, "loss": 0.3388, "step": 45438 }, { "epoch": 0.8435967221514686, "grad_norm": 0.261030375957489, "learning_rate": 1.1829190419862014e-06, "loss": 0.2928, "step": 45440 }, { "epoch": 0.8436338522888873, "grad_norm": 0.3657304346561432, "learning_rate": 1.1823687571267072e-06, "loss": 0.2001, "step": 45442 }, { "epoch": 0.843670982426306, "grad_norm": 0.32285332679748535, "learning_rate": 1.1818185922487636e-06, "loss": 0.1254, "step": 45444 }, { "epoch": 0.8437081125637246, "grad_norm": 0.5104774236679077, "learning_rate": 1.1812685473598551e-06, "loss": 0.4094, "step": 45446 }, { "epoch": 0.8437452427011433, "grad_norm": 0.451045960187912, "learning_rate": 1.1807186224674684e-06, "loss": 0.2133, "step": 45448 }, { "epoch": 0.8437823728385618, "grad_norm": 0.4571956694126129, "learning_rate": 1.1801688175790815e-06, "loss": 0.3241, "step": 45450 }, { "epoch": 0.8438195029759805, "grad_norm": 0.5230547785758972, "learning_rate": 1.17961913270218e-06, "loss": 0.3487, "step": 45452 }, { "epoch": 0.8438566331133992, "grad_norm": 0.258680522441864, "learning_rate": 1.179069567844241e-06, "loss": 0.4542, "step": 45454 }, { "epoch": 0.8438937632508178, "grad_norm": 0.41075125336647034, "learning_rate": 1.1785201230127462e-06, "loss": 0.2004, "step": 45456 }, { "epoch": 0.8439308933882365, "grad_norm": 0.30592939257621765, "learning_rate": 1.1779707982151666e-06, "loss": 0.1041, "step": 45458 }, { "epoch": 0.843968023525655, "grad_norm": 0.23186159133911133, "learning_rate": 1.1774215934589794e-06, "loss": 0.1223, "step": 45460 }, { "epoch": 0.8440051536630737, "grad_norm": 0.41624850034713745, "learning_rate": 1.1768725087516597e-06, "loss": 0.303, "step": 45462 }, { "epoch": 0.8440422838004924, "grad_norm": 0.39758405089378357, "learning_rate": 1.1763235441006749e-06, "loss": 0.2337, "step": 45464 }, { "epoch": 0.844079413937911, "grad_norm": 0.328519344329834, "learning_rate": 1.1757746995134966e-06, "loss": 0.2406, "step": 45466 }, { "epoch": 0.8441165440753297, "grad_norm": 0.24247892200946808, "learning_rate": 1.1752259749975924e-06, "loss": 0.1872, "step": 45468 }, { "epoch": 0.8441536742127482, "grad_norm": 0.483316570520401, "learning_rate": 1.174677370560431e-06, "loss": 0.3486, "step": 45470 }, { "epoch": 0.8441908043501669, "grad_norm": 0.42939358949661255, "learning_rate": 1.1741288862094747e-06, "loss": 0.3496, "step": 45472 }, { "epoch": 0.8442279344875855, "grad_norm": 0.5680773854255676, "learning_rate": 1.17358052195219e-06, "loss": 0.4171, "step": 45474 }, { "epoch": 0.8442650646250042, "grad_norm": 0.2503086030483246, "learning_rate": 1.1730322777960334e-06, "loss": 0.1558, "step": 45476 }, { "epoch": 0.8443021947624229, "grad_norm": 0.42405831813812256, "learning_rate": 1.172484153748471e-06, "loss": 0.3673, "step": 45478 }, { "epoch": 0.8443393248998414, "grad_norm": 0.29770106077194214, "learning_rate": 1.1719361498169545e-06, "loss": 0.3578, "step": 45480 }, { "epoch": 0.8443764550372601, "grad_norm": 0.6463533043861389, "learning_rate": 1.1713882660089448e-06, "loss": 0.1636, "step": 45482 }, { "epoch": 0.8444135851746787, "grad_norm": 0.4354647696018219, "learning_rate": 1.170840502331896e-06, "loss": 0.2478, "step": 45484 }, { "epoch": 0.8444507153120974, "grad_norm": 0.4315684735774994, "learning_rate": 1.1702928587932626e-06, "loss": 0.3181, "step": 45486 }, { "epoch": 0.844487845449516, "grad_norm": 0.46276187896728516, "learning_rate": 1.169745335400494e-06, "loss": 0.3846, "step": 45488 }, { "epoch": 0.8445249755869346, "grad_norm": 0.4298904836177826, "learning_rate": 1.1691979321610414e-06, "loss": 0.2277, "step": 45490 }, { "epoch": 0.8445621057243533, "grad_norm": 0.2679091989994049, "learning_rate": 1.1686506490823546e-06, "loss": 0.343, "step": 45492 }, { "epoch": 0.8445992358617719, "grad_norm": 0.6608708500862122, "learning_rate": 1.1681034861718787e-06, "loss": 0.2582, "step": 45494 }, { "epoch": 0.8446363659991906, "grad_norm": 0.3383752107620239, "learning_rate": 1.16755644343706e-06, "loss": 0.2619, "step": 45496 }, { "epoch": 0.8446734961366092, "grad_norm": 0.16610772907733917, "learning_rate": 1.1670095208853438e-06, "loss": 0.1382, "step": 45498 }, { "epoch": 0.8447106262740278, "grad_norm": 0.33105698227882385, "learning_rate": 1.1664627185241705e-06, "loss": 0.2885, "step": 45500 }, { "epoch": 0.8447477564114465, "grad_norm": 0.4174129366874695, "learning_rate": 1.1659160363609778e-06, "loss": 0.1138, "step": 45502 }, { "epoch": 0.8447848865488651, "grad_norm": 0.4087831974029541, "learning_rate": 1.1653694744032062e-06, "loss": 0.2137, "step": 45504 }, { "epoch": 0.8448220166862838, "grad_norm": 0.35414040088653564, "learning_rate": 1.1648230326582943e-06, "loss": 0.3037, "step": 45506 }, { "epoch": 0.8448591468237024, "grad_norm": 0.42537495493888855, "learning_rate": 1.164276711133675e-06, "loss": 0.1782, "step": 45508 }, { "epoch": 0.844896276961121, "grad_norm": 0.22932176291942596, "learning_rate": 1.1637305098367847e-06, "loss": 0.243, "step": 45510 }, { "epoch": 0.8449334070985397, "grad_norm": 0.2516622245311737, "learning_rate": 1.1631844287750572e-06, "loss": 0.2511, "step": 45512 }, { "epoch": 0.8449705372359583, "grad_norm": 0.6327522397041321, "learning_rate": 1.1626384679559167e-06, "loss": 0.5104, "step": 45514 }, { "epoch": 0.845007667373377, "grad_norm": 0.3257109224796295, "learning_rate": 1.1620926273867973e-06, "loss": 0.2925, "step": 45516 }, { "epoch": 0.8450447975107956, "grad_norm": 0.35986414551734924, "learning_rate": 1.161546907075123e-06, "loss": 0.3441, "step": 45518 }, { "epoch": 0.8450819276482142, "grad_norm": 0.4351577162742615, "learning_rate": 1.1610013070283243e-06, "loss": 0.3029, "step": 45520 }, { "epoch": 0.8451190577856329, "grad_norm": 0.2859640419483185, "learning_rate": 1.160455827253819e-06, "loss": 0.1099, "step": 45522 }, { "epoch": 0.8451561879230515, "grad_norm": 0.33043181896209717, "learning_rate": 1.1599104677590356e-06, "loss": 0.2182, "step": 45524 }, { "epoch": 0.8451933180604702, "grad_norm": 0.6442746520042419, "learning_rate": 1.1593652285513879e-06, "loss": 0.3305, "step": 45526 }, { "epoch": 0.8452304481978887, "grad_norm": 0.3886159062385559, "learning_rate": 1.1588201096383001e-06, "loss": 0.2982, "step": 45528 }, { "epoch": 0.8452675783353074, "grad_norm": 0.38991186022758484, "learning_rate": 1.1582751110271872e-06, "loss": 0.2858, "step": 45530 }, { "epoch": 0.8453047084727261, "grad_norm": 0.3442493975162506, "learning_rate": 1.157730232725467e-06, "loss": 0.2996, "step": 45532 }, { "epoch": 0.8453418386101447, "grad_norm": 0.9192871451377869, "learning_rate": 1.157185474740552e-06, "loss": 0.3112, "step": 45534 }, { "epoch": 0.8453789687475634, "grad_norm": 0.3962438106536865, "learning_rate": 1.1566408370798554e-06, "loss": 0.238, "step": 45536 }, { "epoch": 0.8454160988849819, "grad_norm": 0.3688827157020569, "learning_rate": 1.1560963197507902e-06, "loss": 0.3575, "step": 45538 }, { "epoch": 0.8454532290224006, "grad_norm": 0.23311764001846313, "learning_rate": 1.1555519227607625e-06, "loss": 0.1733, "step": 45540 }, { "epoch": 0.8454903591598193, "grad_norm": 0.3592592179775238, "learning_rate": 1.155007646117182e-06, "loss": 0.3836, "step": 45542 }, { "epoch": 0.8455274892972379, "grad_norm": 0.35091525316238403, "learning_rate": 1.154463489827451e-06, "loss": 0.2495, "step": 45544 }, { "epoch": 0.8455646194346566, "grad_norm": 0.5616759657859802, "learning_rate": 1.1539194538989773e-06, "loss": 0.4544, "step": 45546 }, { "epoch": 0.8456017495720751, "grad_norm": 0.34819141030311584, "learning_rate": 1.1533755383391633e-06, "loss": 0.2382, "step": 45548 }, { "epoch": 0.8456388797094938, "grad_norm": 0.32599616050720215, "learning_rate": 1.1528317431554103e-06, "loss": 0.2164, "step": 45550 }, { "epoch": 0.8456760098469125, "grad_norm": 0.41436079144477844, "learning_rate": 1.1522880683551153e-06, "loss": 0.1302, "step": 45552 }, { "epoch": 0.8457131399843311, "grad_norm": 0.5267750024795532, "learning_rate": 1.1517445139456785e-06, "loss": 0.2319, "step": 45554 }, { "epoch": 0.8457502701217497, "grad_norm": 0.2984572947025299, "learning_rate": 1.151201079934494e-06, "loss": 0.2153, "step": 45556 }, { "epoch": 0.8457874002591683, "grad_norm": 0.31812795996665955, "learning_rate": 1.1506577663289587e-06, "loss": 0.3158, "step": 45558 }, { "epoch": 0.845824530396587, "grad_norm": 0.5453408360481262, "learning_rate": 1.1501145731364649e-06, "loss": 0.4759, "step": 45560 }, { "epoch": 0.8458616605340057, "grad_norm": 0.5357164144515991, "learning_rate": 1.1495715003644026e-06, "loss": 0.2113, "step": 45562 }, { "epoch": 0.8458987906714243, "grad_norm": 0.3437976539134979, "learning_rate": 1.1490285480201636e-06, "loss": 0.3269, "step": 45564 }, { "epoch": 0.845935920808843, "grad_norm": 0.3676532506942749, "learning_rate": 1.148485716111133e-06, "loss": 0.2621, "step": 45566 }, { "epoch": 0.8459730509462615, "grad_norm": 0.29599782824516296, "learning_rate": 1.1479430046446981e-06, "loss": 0.1774, "step": 45568 }, { "epoch": 0.8460101810836802, "grad_norm": 0.25979599356651306, "learning_rate": 1.1474004136282434e-06, "loss": 0.2226, "step": 45570 }, { "epoch": 0.8460473112210988, "grad_norm": 0.4676661491394043, "learning_rate": 1.1468579430691528e-06, "loss": 0.352, "step": 45572 }, { "epoch": 0.8460844413585175, "grad_norm": 0.4226343333721161, "learning_rate": 1.1463155929748093e-06, "loss": 0.3163, "step": 45574 }, { "epoch": 0.8461215714959361, "grad_norm": 0.43027463555336, "learning_rate": 1.145773363352589e-06, "loss": 0.2832, "step": 45576 }, { "epoch": 0.8461587016333547, "grad_norm": 0.3799506723880768, "learning_rate": 1.145231254209871e-06, "loss": 0.2021, "step": 45578 }, { "epoch": 0.8461958317707734, "grad_norm": 0.42559731006622314, "learning_rate": 1.1446892655540332e-06, "loss": 0.3203, "step": 45580 }, { "epoch": 0.846232961908192, "grad_norm": 0.2443203181028366, "learning_rate": 1.14414739739245e-06, "loss": 0.1859, "step": 45582 }, { "epoch": 0.8462700920456107, "grad_norm": 0.2973026633262634, "learning_rate": 1.1436056497324955e-06, "loss": 0.4137, "step": 45584 }, { "epoch": 0.8463072221830293, "grad_norm": 0.45685309171676636, "learning_rate": 1.1430640225815392e-06, "loss": 0.4294, "step": 45586 }, { "epoch": 0.8463443523204479, "grad_norm": 0.5407895445823669, "learning_rate": 1.1425225159469533e-06, "loss": 0.3843, "step": 45588 }, { "epoch": 0.8463814824578666, "grad_norm": 0.18578919768333435, "learning_rate": 1.1419811298361027e-06, "loss": 0.2257, "step": 45590 }, { "epoch": 0.8464186125952852, "grad_norm": 0.3667568266391754, "learning_rate": 1.1414398642563562e-06, "loss": 0.411, "step": 45592 }, { "epoch": 0.8464557427327039, "grad_norm": 0.3920191526412964, "learning_rate": 1.1408987192150789e-06, "loss": 0.1298, "step": 45594 }, { "epoch": 0.8464928728701225, "grad_norm": 0.42858630418777466, "learning_rate": 1.1403576947196337e-06, "loss": 0.2344, "step": 45596 }, { "epoch": 0.8465300030075411, "grad_norm": 0.3888542652130127, "learning_rate": 1.1398167907773827e-06, "loss": 0.3274, "step": 45598 }, { "epoch": 0.8465671331449598, "grad_norm": 0.414314329624176, "learning_rate": 1.1392760073956876e-06, "loss": 0.2835, "step": 45600 }, { "epoch": 0.8466042632823784, "grad_norm": 0.5580448508262634, "learning_rate": 1.1387353445819026e-06, "loss": 0.2202, "step": 45602 }, { "epoch": 0.846641393419797, "grad_norm": 0.32915905117988586, "learning_rate": 1.1381948023433886e-06, "loss": 0.2831, "step": 45604 }, { "epoch": 0.8466785235572157, "grad_norm": 0.40838298201560974, "learning_rate": 1.1376543806874996e-06, "loss": 0.28, "step": 45606 }, { "epoch": 0.8467156536946343, "grad_norm": 0.3009864091873169, "learning_rate": 1.1371140796215873e-06, "loss": 0.2985, "step": 45608 }, { "epoch": 0.846752783832053, "grad_norm": 0.4377017319202423, "learning_rate": 1.136573899153005e-06, "loss": 0.4453, "step": 45610 }, { "epoch": 0.8467899139694716, "grad_norm": 0.17988137900829315, "learning_rate": 1.1360338392891057e-06, "loss": 0.1987, "step": 45612 }, { "epoch": 0.8468270441068902, "grad_norm": 0.30020540952682495, "learning_rate": 1.1354939000372323e-06, "loss": 0.3311, "step": 45614 }, { "epoch": 0.8468641742443089, "grad_norm": 0.24698926508426666, "learning_rate": 1.1349540814047345e-06, "loss": 0.1095, "step": 45616 }, { "epoch": 0.8469013043817275, "grad_norm": 0.6415321230888367, "learning_rate": 1.1344143833989575e-06, "loss": 0.2231, "step": 45618 }, { "epoch": 0.8469384345191462, "grad_norm": 0.31010857224464417, "learning_rate": 1.1338748060272464e-06, "loss": 0.2364, "step": 45620 }, { "epoch": 0.8469755646565648, "grad_norm": 0.5542388558387756, "learning_rate": 1.1333353492969412e-06, "loss": 0.2095, "step": 45622 }, { "epoch": 0.8470126947939834, "grad_norm": 0.44835609197616577, "learning_rate": 1.1327960132153848e-06, "loss": 0.183, "step": 45624 }, { "epoch": 0.847049824931402, "grad_norm": 0.4196886420249939, "learning_rate": 1.1322567977899135e-06, "loss": 0.2147, "step": 45626 }, { "epoch": 0.8470869550688207, "grad_norm": 0.3011663854122162, "learning_rate": 1.131717703027866e-06, "loss": 0.223, "step": 45628 }, { "epoch": 0.8471240852062394, "grad_norm": 0.5968034267425537, "learning_rate": 1.1311787289365762e-06, "loss": 0.276, "step": 45630 }, { "epoch": 0.847161215343658, "grad_norm": 0.48993223905563354, "learning_rate": 1.1306398755233782e-06, "loss": 0.3658, "step": 45632 }, { "epoch": 0.8471983454810766, "grad_norm": 0.3149981200695038, "learning_rate": 1.1301011427956044e-06, "loss": 0.1806, "step": 45634 }, { "epoch": 0.8472354756184952, "grad_norm": 0.47624871134757996, "learning_rate": 1.1295625307605852e-06, "loss": 0.1935, "step": 45636 }, { "epoch": 0.8472726057559139, "grad_norm": 0.5210164189338684, "learning_rate": 1.1290240394256524e-06, "loss": 0.3052, "step": 45638 }, { "epoch": 0.8473097358933326, "grad_norm": 0.5276150107383728, "learning_rate": 1.1284856687981295e-06, "loss": 0.3865, "step": 45640 }, { "epoch": 0.8473468660307512, "grad_norm": 0.5263790488243103, "learning_rate": 1.1279474188853422e-06, "loss": 0.28, "step": 45642 }, { "epoch": 0.8473839961681698, "grad_norm": 0.49327078461647034, "learning_rate": 1.1274092896946165e-06, "loss": 0.2888, "step": 45644 }, { "epoch": 0.8474211263055884, "grad_norm": 0.40713727474212646, "learning_rate": 1.1268712812332761e-06, "loss": 0.3151, "step": 45646 }, { "epoch": 0.8474582564430071, "grad_norm": 0.3393518030643463, "learning_rate": 1.1263333935086363e-06, "loss": 0.3122, "step": 45648 }, { "epoch": 0.8474953865804258, "grad_norm": 0.42300945520401, "learning_rate": 1.1257956265280222e-06, "loss": 0.3341, "step": 45650 }, { "epoch": 0.8475325167178444, "grad_norm": 0.5484139323234558, "learning_rate": 1.1252579802987463e-06, "loss": 0.2483, "step": 45652 }, { "epoch": 0.847569646855263, "grad_norm": 0.5140788555145264, "learning_rate": 1.1247204548281254e-06, "loss": 0.3352, "step": 45654 }, { "epoch": 0.8476067769926816, "grad_norm": 0.3372455835342407, "learning_rate": 1.1241830501234763e-06, "loss": 0.1911, "step": 45656 }, { "epoch": 0.8476439071301003, "grad_norm": 0.43498682975769043, "learning_rate": 1.1236457661921085e-06, "loss": 0.2873, "step": 45658 }, { "epoch": 0.847681037267519, "grad_norm": 0.37636634707450867, "learning_rate": 1.123108603041334e-06, "loss": 0.1302, "step": 45660 }, { "epoch": 0.8477181674049376, "grad_norm": 0.617347002029419, "learning_rate": 1.1225715606784626e-06, "loss": 0.2554, "step": 45662 }, { "epoch": 0.8477552975423562, "grad_norm": 0.2638891935348511, "learning_rate": 1.1220346391108027e-06, "loss": 0.2384, "step": 45664 }, { "epoch": 0.8477924276797748, "grad_norm": 0.3865855932235718, "learning_rate": 1.1214978383456576e-06, "loss": 0.275, "step": 45666 }, { "epoch": 0.8478295578171935, "grad_norm": 0.3010675609111786, "learning_rate": 1.1209611583903334e-06, "loss": 0.1636, "step": 45668 }, { "epoch": 0.8478666879546122, "grad_norm": 0.45228472352027893, "learning_rate": 1.1204245992521313e-06, "loss": 0.2742, "step": 45670 }, { "epoch": 0.8479038180920307, "grad_norm": 0.6429480910301208, "learning_rate": 1.1198881609383527e-06, "loss": 0.4056, "step": 45672 }, { "epoch": 0.8479409482294494, "grad_norm": 0.4128681719303131, "learning_rate": 1.1193518434562966e-06, "loss": 0.2397, "step": 45674 }, { "epoch": 0.847978078366868, "grad_norm": 0.4099940061569214, "learning_rate": 1.1188156468132639e-06, "loss": 0.2141, "step": 45676 }, { "epoch": 0.8480152085042867, "grad_norm": 0.3877869248390198, "learning_rate": 1.1182795710165462e-06, "loss": 0.2067, "step": 45678 }, { "epoch": 0.8480523386417053, "grad_norm": 0.2818913161754608, "learning_rate": 1.117743616073439e-06, "loss": 0.1738, "step": 45680 }, { "epoch": 0.848089468779124, "grad_norm": 0.32833632826805115, "learning_rate": 1.1172077819912363e-06, "loss": 0.215, "step": 45682 }, { "epoch": 0.8481265989165426, "grad_norm": 0.33793866634368896, "learning_rate": 1.116672068777228e-06, "loss": 0.1822, "step": 45684 }, { "epoch": 0.8481637290539612, "grad_norm": 0.6777687668800354, "learning_rate": 1.116136476438705e-06, "loss": 0.259, "step": 45686 }, { "epoch": 0.8482008591913799, "grad_norm": 0.46340078115463257, "learning_rate": 1.1156010049829557e-06, "loss": 0.2372, "step": 45688 }, { "epoch": 0.8482379893287985, "grad_norm": 0.38235950469970703, "learning_rate": 1.1150656544172633e-06, "loss": 0.3629, "step": 45690 }, { "epoch": 0.8482751194662171, "grad_norm": 0.33072811365127563, "learning_rate": 1.1145304247489164e-06, "loss": 0.2686, "step": 45692 }, { "epoch": 0.8483122496036358, "grad_norm": 0.2648681104183197, "learning_rate": 1.1139953159851924e-06, "loss": 0.3272, "step": 45694 }, { "epoch": 0.8483493797410544, "grad_norm": 0.36077794432640076, "learning_rate": 1.1134603281333756e-06, "loss": 0.1412, "step": 45696 }, { "epoch": 0.8483865098784731, "grad_norm": 0.35475361347198486, "learning_rate": 1.1129254612007456e-06, "loss": 0.3815, "step": 45698 }, { "epoch": 0.8484236400158917, "grad_norm": 0.3396094739437103, "learning_rate": 1.1123907151945823e-06, "loss": 0.2239, "step": 45700 }, { "epoch": 0.8484607701533103, "grad_norm": 0.4191611111164093, "learning_rate": 1.1118560901221575e-06, "loss": 0.3854, "step": 45702 }, { "epoch": 0.848497900290729, "grad_norm": 0.4581218659877777, "learning_rate": 1.111321585990749e-06, "loss": 0.2288, "step": 45704 }, { "epoch": 0.8485350304281476, "grad_norm": 0.531073808670044, "learning_rate": 1.1107872028076283e-06, "loss": 0.1798, "step": 45706 }, { "epoch": 0.8485721605655663, "grad_norm": 0.32432320713996887, "learning_rate": 1.1102529405800678e-06, "loss": 0.299, "step": 45708 }, { "epoch": 0.8486092907029849, "grad_norm": 0.35589516162872314, "learning_rate": 1.1097187993153392e-06, "loss": 0.0717, "step": 45710 }, { "epoch": 0.8486464208404035, "grad_norm": 0.4751163125038147, "learning_rate": 1.1091847790207066e-06, "loss": 0.3102, "step": 45712 }, { "epoch": 0.8486835509778222, "grad_norm": 0.4791879653930664, "learning_rate": 1.108650879703439e-06, "loss": 0.3354, "step": 45714 }, { "epoch": 0.8487206811152408, "grad_norm": 0.38955581188201904, "learning_rate": 1.1081171013707992e-06, "loss": 0.3327, "step": 45716 }, { "epoch": 0.8487578112526595, "grad_norm": 0.6199437975883484, "learning_rate": 1.1075834440300503e-06, "loss": 0.3467, "step": 45718 }, { "epoch": 0.848794941390078, "grad_norm": 0.4170013666152954, "learning_rate": 1.1070499076884555e-06, "loss": 0.3848, "step": 45720 }, { "epoch": 0.8488320715274967, "grad_norm": 0.3895348012447357, "learning_rate": 1.1065164923532746e-06, "loss": 0.2023, "step": 45722 }, { "epoch": 0.8488692016649153, "grad_norm": 0.3582184612751007, "learning_rate": 1.1059831980317636e-06, "loss": 0.1324, "step": 45724 }, { "epoch": 0.848906331802334, "grad_norm": 0.340768426656723, "learning_rate": 1.1054500247311839e-06, "loss": 0.232, "step": 45726 }, { "epoch": 0.8489434619397527, "grad_norm": 0.5099552273750305, "learning_rate": 1.104916972458785e-06, "loss": 0.1743, "step": 45728 }, { "epoch": 0.8489805920771712, "grad_norm": 0.47041523456573486, "learning_rate": 1.104384041221822e-06, "loss": 0.3169, "step": 45730 }, { "epoch": 0.8490177222145899, "grad_norm": 0.30012568831443787, "learning_rate": 1.1038512310275484e-06, "loss": 0.1755, "step": 45732 }, { "epoch": 0.8490548523520085, "grad_norm": 0.2999100685119629, "learning_rate": 1.1033185418832105e-06, "loss": 0.2874, "step": 45734 }, { "epoch": 0.8490919824894272, "grad_norm": 0.27719348669052124, "learning_rate": 1.1027859737960588e-06, "loss": 0.3284, "step": 45736 }, { "epoch": 0.8491291126268459, "grad_norm": 0.3581984341144562, "learning_rate": 1.1022535267733426e-06, "loss": 0.3144, "step": 45738 }, { "epoch": 0.8491662427642644, "grad_norm": 0.2818341553211212, "learning_rate": 1.101721200822301e-06, "loss": 0.2828, "step": 45740 }, { "epoch": 0.8492033729016831, "grad_norm": 0.44229641556739807, "learning_rate": 1.101188995950181e-06, "loss": 0.302, "step": 45742 }, { "epoch": 0.8492405030391017, "grad_norm": 0.35186806321144104, "learning_rate": 1.100656912164223e-06, "loss": 0.2456, "step": 45744 }, { "epoch": 0.8492776331765204, "grad_norm": 0.3045912981033325, "learning_rate": 1.1001249494716682e-06, "loss": 0.1405, "step": 45746 }, { "epoch": 0.8493147633139391, "grad_norm": 0.4344618320465088, "learning_rate": 1.099593107879755e-06, "loss": 0.1408, "step": 45748 }, { "epoch": 0.8493518934513576, "grad_norm": 0.3132820427417755, "learning_rate": 1.099061387395719e-06, "loss": 0.387, "step": 45750 }, { "epoch": 0.8493890235887763, "grad_norm": 0.4699723720550537, "learning_rate": 1.0985297880267986e-06, "loss": 0.435, "step": 45752 }, { "epoch": 0.8494261537261949, "grad_norm": 0.4801592230796814, "learning_rate": 1.097998309780225e-06, "loss": 0.4126, "step": 45754 }, { "epoch": 0.8494632838636136, "grad_norm": 0.22513142228126526, "learning_rate": 1.0974669526632275e-06, "loss": 0.3236, "step": 45756 }, { "epoch": 0.8495004140010323, "grad_norm": 0.6874632239341736, "learning_rate": 1.0969357166830397e-06, "loss": 0.2838, "step": 45758 }, { "epoch": 0.8495375441384508, "grad_norm": 0.48389434814453125, "learning_rate": 1.0964046018468877e-06, "loss": 0.1719, "step": 45760 }, { "epoch": 0.8495746742758695, "grad_norm": 0.3801978528499603, "learning_rate": 1.0958736081620003e-06, "loss": 0.3376, "step": 45762 }, { "epoch": 0.8496118044132881, "grad_norm": 0.315958708524704, "learning_rate": 1.0953427356356038e-06, "loss": 0.1782, "step": 45764 }, { "epoch": 0.8496489345507068, "grad_norm": 0.43887069821357727, "learning_rate": 1.0948119842749182e-06, "loss": 0.1563, "step": 45766 }, { "epoch": 0.8496860646881255, "grad_norm": 0.3497162461280823, "learning_rate": 1.0942813540871677e-06, "loss": 0.337, "step": 45768 }, { "epoch": 0.849723194825544, "grad_norm": 0.4325413405895233, "learning_rate": 1.0937508450795719e-06, "loss": 0.2204, "step": 45770 }, { "epoch": 0.8497603249629627, "grad_norm": 0.33843863010406494, "learning_rate": 1.0932204572593496e-06, "loss": 0.2056, "step": 45772 }, { "epoch": 0.8497974551003813, "grad_norm": 0.3847010135650635, "learning_rate": 1.0926901906337205e-06, "loss": 0.2986, "step": 45774 }, { "epoch": 0.8498345852378, "grad_norm": 0.2653925120830536, "learning_rate": 1.0921600452098968e-06, "loss": 0.3462, "step": 45776 }, { "epoch": 0.8498717153752186, "grad_norm": 0.4412490427494049, "learning_rate": 1.0916300209950915e-06, "loss": 0.3688, "step": 45778 }, { "epoch": 0.8499088455126372, "grad_norm": 0.6875383853912354, "learning_rate": 1.0911001179965175e-06, "loss": 0.3007, "step": 45780 }, { "epoch": 0.8499459756500559, "grad_norm": 0.2566690146923065, "learning_rate": 1.090570336221386e-06, "loss": 0.2596, "step": 45782 }, { "epoch": 0.8499831057874745, "grad_norm": 0.36915159225463867, "learning_rate": 1.0900406756769055e-06, "loss": 0.3109, "step": 45784 }, { "epoch": 0.8500202359248932, "grad_norm": 0.2693183720111847, "learning_rate": 1.0895111363702826e-06, "loss": 0.1505, "step": 45786 }, { "epoch": 0.8500573660623117, "grad_norm": 0.5959376692771912, "learning_rate": 1.0889817183087236e-06, "loss": 0.3289, "step": 45788 }, { "epoch": 0.8500944961997304, "grad_norm": 0.2837160527706146, "learning_rate": 1.088452421499433e-06, "loss": 0.2686, "step": 45790 }, { "epoch": 0.8501316263371491, "grad_norm": 0.3837205171585083, "learning_rate": 1.0879232459496103e-06, "loss": 0.0845, "step": 45792 }, { "epoch": 0.8501687564745677, "grad_norm": 0.3714580833911896, "learning_rate": 1.0873941916664578e-06, "loss": 0.1331, "step": 45794 }, { "epoch": 0.8502058866119864, "grad_norm": 0.41546598076820374, "learning_rate": 1.0868652586571748e-06, "loss": 0.2922, "step": 45796 }, { "epoch": 0.850243016749405, "grad_norm": 0.374381959438324, "learning_rate": 1.0863364469289573e-06, "loss": 0.3212, "step": 45798 }, { "epoch": 0.8502801468868236, "grad_norm": 0.3366502523422241, "learning_rate": 1.0858077564890002e-06, "loss": 0.3412, "step": 45800 }, { "epoch": 0.8503172770242423, "grad_norm": 0.32865509390830994, "learning_rate": 1.0852791873445002e-06, "loss": 0.43, "step": 45802 }, { "epoch": 0.8503544071616609, "grad_norm": 0.28559446334838867, "learning_rate": 1.0847507395026468e-06, "loss": 0.2814, "step": 45804 }, { "epoch": 0.8503915372990796, "grad_norm": 0.43025457859039307, "learning_rate": 1.0842224129706302e-06, "loss": 0.3273, "step": 45806 }, { "epoch": 0.8504286674364981, "grad_norm": 0.38377469778060913, "learning_rate": 1.0836942077556423e-06, "loss": 0.3647, "step": 45808 }, { "epoch": 0.8504657975739168, "grad_norm": 0.22295059263706207, "learning_rate": 1.0831661238648684e-06, "loss": 0.3308, "step": 45810 }, { "epoch": 0.8505029277113355, "grad_norm": 0.36052536964416504, "learning_rate": 1.0826381613054937e-06, "loss": 0.2514, "step": 45812 }, { "epoch": 0.8505400578487541, "grad_norm": 0.2967050075531006, "learning_rate": 1.082110320084705e-06, "loss": 0.2162, "step": 45814 }, { "epoch": 0.8505771879861728, "grad_norm": 0.5921688675880432, "learning_rate": 1.0815826002096819e-06, "loss": 0.2528, "step": 45816 }, { "epoch": 0.8506143181235913, "grad_norm": 0.5063170194625854, "learning_rate": 1.0810550016876066e-06, "loss": 0.2938, "step": 45818 }, { "epoch": 0.85065144826101, "grad_norm": 0.3069388270378113, "learning_rate": 1.0805275245256563e-06, "loss": 0.2259, "step": 45820 }, { "epoch": 0.8506885783984287, "grad_norm": 0.5941773056983948, "learning_rate": 1.0800001687310091e-06, "loss": 0.2344, "step": 45822 }, { "epoch": 0.8507257085358473, "grad_norm": 0.3848460614681244, "learning_rate": 1.0794729343108413e-06, "loss": 0.1967, "step": 45824 }, { "epoch": 0.850762838673266, "grad_norm": 0.4966377019882202, "learning_rate": 1.0789458212723269e-06, "loss": 0.3422, "step": 45826 }, { "epoch": 0.8507999688106845, "grad_norm": 0.4261513650417328, "learning_rate": 1.0784188296226394e-06, "loss": 0.2784, "step": 45828 }, { "epoch": 0.8508370989481032, "grad_norm": 0.4619360864162445, "learning_rate": 1.0778919593689475e-06, "loss": 0.1112, "step": 45830 }, { "epoch": 0.8508742290855218, "grad_norm": 0.16798186302185059, "learning_rate": 1.0773652105184206e-06, "loss": 0.2746, "step": 45832 }, { "epoch": 0.8509113592229405, "grad_norm": 0.443312406539917, "learning_rate": 1.0768385830782268e-06, "loss": 0.3658, "step": 45834 }, { "epoch": 0.8509484893603592, "grad_norm": 0.2957082986831665, "learning_rate": 1.0763120770555313e-06, "loss": 0.3061, "step": 45836 }, { "epoch": 0.8509856194977777, "grad_norm": 0.4394667148590088, "learning_rate": 1.0757856924575017e-06, "loss": 0.3056, "step": 45838 }, { "epoch": 0.8510227496351964, "grad_norm": 0.36978834867477417, "learning_rate": 1.0752594292912976e-06, "loss": 0.2392, "step": 45840 }, { "epoch": 0.851059879772615, "grad_norm": 0.6332898139953613, "learning_rate": 1.0747332875640782e-06, "loss": 0.233, "step": 45842 }, { "epoch": 0.8510970099100337, "grad_norm": 0.1708012968301773, "learning_rate": 1.0742072672830039e-06, "loss": 0.2769, "step": 45844 }, { "epoch": 0.8511341400474524, "grad_norm": 0.49460455775260925, "learning_rate": 1.0736813684552338e-06, "loss": 0.1069, "step": 45846 }, { "epoch": 0.8511712701848709, "grad_norm": 0.4883250296115875, "learning_rate": 1.0731555910879221e-06, "loss": 0.2566, "step": 45848 }, { "epoch": 0.8512084003222896, "grad_norm": 0.3710325360298157, "learning_rate": 1.072629935188224e-06, "loss": 0.2929, "step": 45850 }, { "epoch": 0.8512455304597082, "grad_norm": 0.4473518431186676, "learning_rate": 1.0721044007632942e-06, "loss": 0.2774, "step": 45852 }, { "epoch": 0.8512826605971269, "grad_norm": 0.3142022490501404, "learning_rate": 1.0715789878202787e-06, "loss": 0.3252, "step": 45854 }, { "epoch": 0.8513197907345456, "grad_norm": 0.5459117293357849, "learning_rate": 1.0710536963663299e-06, "loss": 0.3411, "step": 45856 }, { "epoch": 0.8513569208719641, "grad_norm": 0.4470216631889343, "learning_rate": 1.0705285264085952e-06, "loss": 0.2737, "step": 45858 }, { "epoch": 0.8513940510093828, "grad_norm": 0.3486539125442505, "learning_rate": 1.0700034779542213e-06, "loss": 0.2203, "step": 45860 }, { "epoch": 0.8514311811468014, "grad_norm": 0.4523930251598358, "learning_rate": 1.069478551010351e-06, "loss": 0.1823, "step": 45862 }, { "epoch": 0.8514683112842201, "grad_norm": 0.44851231575012207, "learning_rate": 1.0689537455841293e-06, "loss": 0.2858, "step": 45864 }, { "epoch": 0.8515054414216388, "grad_norm": 0.3544789254665375, "learning_rate": 1.0684290616826932e-06, "loss": 0.2376, "step": 45866 }, { "epoch": 0.8515425715590573, "grad_norm": 0.2903895974159241, "learning_rate": 1.0679044993131838e-06, "loss": 0.3488, "step": 45868 }, { "epoch": 0.851579701696476, "grad_norm": 0.28002220392227173, "learning_rate": 1.067380058482741e-06, "loss": 0.1652, "step": 45870 }, { "epoch": 0.8516168318338946, "grad_norm": 0.5367836952209473, "learning_rate": 1.066855739198498e-06, "loss": 0.3014, "step": 45872 }, { "epoch": 0.8516539619713133, "grad_norm": 0.28074708580970764, "learning_rate": 1.0663315414675912e-06, "loss": 0.3915, "step": 45874 }, { "epoch": 0.8516910921087318, "grad_norm": 0.35318630933761597, "learning_rate": 1.0658074652971518e-06, "loss": 0.1274, "step": 45876 }, { "epoch": 0.8517282222461505, "grad_norm": 0.39961472153663635, "learning_rate": 1.065283510694315e-06, "loss": 0.4723, "step": 45878 }, { "epoch": 0.8517653523835692, "grad_norm": 0.3566596806049347, "learning_rate": 1.064759677666204e-06, "loss": 0.1698, "step": 45880 }, { "epoch": 0.8518024825209878, "grad_norm": 0.4198572039604187, "learning_rate": 1.064235966219952e-06, "loss": 0.2475, "step": 45882 }, { "epoch": 0.8518396126584065, "grad_norm": 0.41657543182373047, "learning_rate": 1.0637123763626812e-06, "loss": 0.3093, "step": 45884 }, { "epoch": 0.851876742795825, "grad_norm": 0.5737757086753845, "learning_rate": 1.0631889081015178e-06, "loss": 0.2677, "step": 45886 }, { "epoch": 0.8519138729332437, "grad_norm": 0.5748029947280884, "learning_rate": 1.0626655614435843e-06, "loss": 0.153, "step": 45888 }, { "epoch": 0.8519510030706624, "grad_norm": 0.4245150089263916, "learning_rate": 1.0621423363960038e-06, "loss": 0.2452, "step": 45890 }, { "epoch": 0.851988133208081, "grad_norm": 0.2722765803337097, "learning_rate": 1.0616192329658914e-06, "loss": 0.2851, "step": 45892 }, { "epoch": 0.8520252633454997, "grad_norm": 0.4790297746658325, "learning_rate": 1.0610962511603695e-06, "loss": 0.2379, "step": 45894 }, { "epoch": 0.8520623934829182, "grad_norm": 0.180750772356987, "learning_rate": 1.0605733909865512e-06, "loss": 0.5136, "step": 45896 }, { "epoch": 0.8520995236203369, "grad_norm": 0.6062609553337097, "learning_rate": 1.060050652451553e-06, "loss": 0.2474, "step": 45898 }, { "epoch": 0.8521366537577556, "grad_norm": 0.3892255127429962, "learning_rate": 1.0595280355624882e-06, "loss": 0.2568, "step": 45900 }, { "epoch": 0.8521737838951742, "grad_norm": 0.5422790050506592, "learning_rate": 1.0590055403264655e-06, "loss": 0.2393, "step": 45902 }, { "epoch": 0.8522109140325929, "grad_norm": 0.5770406126976013, "learning_rate": 1.0584831667505991e-06, "loss": 0.4254, "step": 45904 }, { "epoch": 0.8522480441700114, "grad_norm": 0.16075527667999268, "learning_rate": 1.05796091484199e-06, "loss": 0.1344, "step": 45906 }, { "epoch": 0.8522851743074301, "grad_norm": 0.49856898188591003, "learning_rate": 1.0574387846077494e-06, "loss": 0.3245, "step": 45908 }, { "epoch": 0.8523223044448488, "grad_norm": 0.33979272842407227, "learning_rate": 1.0569167760549814e-06, "loss": 0.3413, "step": 45910 }, { "epoch": 0.8523594345822674, "grad_norm": 0.24217449128627777, "learning_rate": 1.0563948891907871e-06, "loss": 0.1777, "step": 45912 }, { "epoch": 0.8523965647196861, "grad_norm": 0.47848424315452576, "learning_rate": 1.0558731240222698e-06, "loss": 0.2715, "step": 45914 }, { "epoch": 0.8524336948571046, "grad_norm": 0.31679052114486694, "learning_rate": 1.0553514805565312e-06, "loss": 0.2867, "step": 45916 }, { "epoch": 0.8524708249945233, "grad_norm": 0.5799715518951416, "learning_rate": 1.054829958800664e-06, "loss": 0.1701, "step": 45918 }, { "epoch": 0.852507955131942, "grad_norm": 0.5492575764656067, "learning_rate": 1.0543085587617674e-06, "loss": 0.1786, "step": 45920 }, { "epoch": 0.8525450852693606, "grad_norm": 0.2964135706424713, "learning_rate": 1.0537872804469374e-06, "loss": 0.1169, "step": 45922 }, { "epoch": 0.8525822154067793, "grad_norm": 0.7114038467407227, "learning_rate": 1.0532661238632636e-06, "loss": 0.1051, "step": 45924 }, { "epoch": 0.8526193455441978, "grad_norm": 0.3371790051460266, "learning_rate": 1.0527450890178392e-06, "loss": 0.2555, "step": 45926 }, { "epoch": 0.8526564756816165, "grad_norm": 0.4589039087295532, "learning_rate": 1.0522241759177566e-06, "loss": 0.3586, "step": 45928 }, { "epoch": 0.8526936058190351, "grad_norm": 0.3742820918560028, "learning_rate": 1.0517033845700985e-06, "loss": 0.2502, "step": 45930 }, { "epoch": 0.8527307359564538, "grad_norm": 0.3792300820350647, "learning_rate": 1.0511827149819553e-06, "loss": 0.1868, "step": 45932 }, { "epoch": 0.8527678660938725, "grad_norm": 0.24446897208690643, "learning_rate": 1.050662167160409e-06, "loss": 0.2214, "step": 45934 }, { "epoch": 0.852804996231291, "grad_norm": 0.451593816280365, "learning_rate": 1.050141741112546e-06, "loss": 0.1768, "step": 45936 }, { "epoch": 0.8528421263687097, "grad_norm": 0.2400733381509781, "learning_rate": 1.0496214368454438e-06, "loss": 0.1928, "step": 45938 }, { "epoch": 0.8528792565061283, "grad_norm": 0.31485554575920105, "learning_rate": 1.0491012543661882e-06, "loss": 0.1486, "step": 45940 }, { "epoch": 0.852916386643547, "grad_norm": 0.2525891661643982, "learning_rate": 1.0485811936818502e-06, "loss": 0.2328, "step": 45942 }, { "epoch": 0.8529535167809656, "grad_norm": 0.42416176199913025, "learning_rate": 1.0480612547995106e-06, "loss": 0.3785, "step": 45944 }, { "epoch": 0.8529906469183842, "grad_norm": 0.4395844638347626, "learning_rate": 1.047541437726245e-06, "loss": 0.2917, "step": 45946 }, { "epoch": 0.8530277770558029, "grad_norm": 0.7794254422187805, "learning_rate": 1.047021742469122e-06, "loss": 0.1704, "step": 45948 }, { "epoch": 0.8530649071932215, "grad_norm": 0.43471774458885193, "learning_rate": 1.0465021690352162e-06, "loss": 0.4344, "step": 45950 }, { "epoch": 0.8531020373306402, "grad_norm": 0.5441377758979797, "learning_rate": 1.0459827174315973e-06, "loss": 0.3384, "step": 45952 }, { "epoch": 0.8531391674680588, "grad_norm": 0.43783295154571533, "learning_rate": 1.0454633876653353e-06, "loss": 0.1135, "step": 45954 }, { "epoch": 0.8531762976054774, "grad_norm": 0.48397764563560486, "learning_rate": 1.0449441797434923e-06, "loss": 0.1306, "step": 45956 }, { "epoch": 0.8532134277428961, "grad_norm": 0.4890786111354828, "learning_rate": 1.0444250936731347e-06, "loss": 0.2852, "step": 45958 }, { "epoch": 0.8532505578803147, "grad_norm": 0.4246615171432495, "learning_rate": 1.0439061294613284e-06, "loss": 0.2106, "step": 45960 }, { "epoch": 0.8532876880177334, "grad_norm": 0.2709071934223175, "learning_rate": 1.0433872871151319e-06, "loss": 0.2762, "step": 45962 }, { "epoch": 0.853324818155152, "grad_norm": 0.5390672087669373, "learning_rate": 1.0428685666416083e-06, "loss": 0.4722, "step": 45964 }, { "epoch": 0.8533619482925706, "grad_norm": 0.23091760277748108, "learning_rate": 1.0423499680478122e-06, "loss": 0.0603, "step": 45966 }, { "epoch": 0.8533990784299893, "grad_norm": 0.4271117150783539, "learning_rate": 1.0418314913408034e-06, "loss": 0.4149, "step": 45968 }, { "epoch": 0.8534362085674079, "grad_norm": 0.4028742015361786, "learning_rate": 1.041313136527633e-06, "loss": 0.388, "step": 45970 }, { "epoch": 0.8534733387048266, "grad_norm": 0.6057612895965576, "learning_rate": 1.0407949036153564e-06, "loss": 0.27, "step": 45972 }, { "epoch": 0.8535104688422452, "grad_norm": 0.4208018481731415, "learning_rate": 1.0402767926110258e-06, "loss": 0.364, "step": 45974 }, { "epoch": 0.8535475989796638, "grad_norm": 0.44113689661026, "learning_rate": 1.0397588035216899e-06, "loss": 0.2217, "step": 45976 }, { "epoch": 0.8535847291170825, "grad_norm": 0.3366793394088745, "learning_rate": 1.039240936354401e-06, "loss": 0.3272, "step": 45978 }, { "epoch": 0.8536218592545011, "grad_norm": 0.4800073802471161, "learning_rate": 1.0387231911161988e-06, "loss": 0.2765, "step": 45980 }, { "epoch": 0.8536589893919198, "grad_norm": 0.16636383533477783, "learning_rate": 1.0382055678141323e-06, "loss": 0.2887, "step": 45982 }, { "epoch": 0.8536961195293383, "grad_norm": 0.2764129042625427, "learning_rate": 1.0376880664552446e-06, "loss": 0.3735, "step": 45984 }, { "epoch": 0.853733249666757, "grad_norm": 0.32426717877388, "learning_rate": 1.0371706870465793e-06, "loss": 0.1868, "step": 45986 }, { "epoch": 0.8537703798041757, "grad_norm": 0.3913825452327728, "learning_rate": 1.0366534295951714e-06, "loss": 0.2667, "step": 45988 }, { "epoch": 0.8538075099415943, "grad_norm": 0.4962126314640045, "learning_rate": 1.0361362941080643e-06, "loss": 0.3124, "step": 45990 }, { "epoch": 0.853844640079013, "grad_norm": 0.34363263845443726, "learning_rate": 1.0356192805922915e-06, "loss": 0.5479, "step": 45992 }, { "epoch": 0.8538817702164315, "grad_norm": 0.3517397940158844, "learning_rate": 1.0351023890548872e-06, "loss": 0.3074, "step": 45994 }, { "epoch": 0.8539189003538502, "grad_norm": 0.447147935628891, "learning_rate": 1.0345856195028869e-06, "loss": 0.2679, "step": 45996 }, { "epoch": 0.8539560304912689, "grad_norm": 0.4513747990131378, "learning_rate": 1.0340689719433228e-06, "loss": 0.2177, "step": 45998 }, { "epoch": 0.8539931606286875, "grad_norm": 0.2791364789009094, "learning_rate": 1.0335524463832225e-06, "loss": 0.373, "step": 46000 }, { "epoch": 0.8540302907661061, "grad_norm": 0.25084614753723145, "learning_rate": 1.033036042829617e-06, "loss": 0.1725, "step": 46002 }, { "epoch": 0.8540674209035247, "grad_norm": 0.4851343035697937, "learning_rate": 1.0325197612895333e-06, "loss": 0.2809, "step": 46004 }, { "epoch": 0.8541045510409434, "grad_norm": 0.27796807885169983, "learning_rate": 1.032003601769993e-06, "loss": 0.5019, "step": 46006 }, { "epoch": 0.8541416811783621, "grad_norm": 0.44860410690307617, "learning_rate": 1.031487564278023e-06, "loss": 0.2012, "step": 46008 }, { "epoch": 0.8541788113157807, "grad_norm": 0.36061036586761475, "learning_rate": 1.0309716488206411e-06, "loss": 0.2425, "step": 46010 }, { "epoch": 0.8542159414531993, "grad_norm": 0.29998350143432617, "learning_rate": 1.0304558554048705e-06, "loss": 0.3274, "step": 46012 }, { "epoch": 0.8542530715906179, "grad_norm": 0.36201295256614685, "learning_rate": 1.0299401840377287e-06, "loss": 0.2967, "step": 46014 }, { "epoch": 0.8542902017280366, "grad_norm": 0.5824465155601501, "learning_rate": 1.0294246347262349e-06, "loss": 0.5454, "step": 46016 }, { "epoch": 0.8543273318654553, "grad_norm": 0.2742396593093872, "learning_rate": 1.0289092074773988e-06, "loss": 0.269, "step": 46018 }, { "epoch": 0.8543644620028739, "grad_norm": 0.17151041328907013, "learning_rate": 1.028393902298238e-06, "loss": 0.1378, "step": 46020 }, { "epoch": 0.8544015921402925, "grad_norm": 0.33328571915626526, "learning_rate": 1.0278787191957617e-06, "loss": 0.2217, "step": 46022 }, { "epoch": 0.8544387222777111, "grad_norm": 0.38433122634887695, "learning_rate": 1.027363658176983e-06, "loss": 0.1776, "step": 46024 }, { "epoch": 0.8544758524151298, "grad_norm": 0.289621502161026, "learning_rate": 1.0268487192489075e-06, "loss": 0.1082, "step": 46026 }, { "epoch": 0.8545129825525484, "grad_norm": 0.4766714870929718, "learning_rate": 1.0263339024185458e-06, "loss": 0.3294, "step": 46028 }, { "epoch": 0.8545501126899671, "grad_norm": 0.3471944034099579, "learning_rate": 1.0258192076929008e-06, "loss": 0.2152, "step": 46030 }, { "epoch": 0.8545872428273857, "grad_norm": 0.320366770029068, "learning_rate": 1.025304635078973e-06, "loss": 0.2824, "step": 46032 }, { "epoch": 0.8546243729648043, "grad_norm": 0.9767128825187683, "learning_rate": 1.0247901845837672e-06, "loss": 0.3382, "step": 46034 }, { "epoch": 0.854661503102223, "grad_norm": 0.43145957589149475, "learning_rate": 1.024275856214283e-06, "loss": 0.3641, "step": 46036 }, { "epoch": 0.8546986332396416, "grad_norm": 0.4482840597629547, "learning_rate": 1.0237616499775194e-06, "loss": 0.1859, "step": 46038 }, { "epoch": 0.8547357633770603, "grad_norm": 0.6534146070480347, "learning_rate": 1.023247565880472e-06, "loss": 0.351, "step": 46040 }, { "epoch": 0.8547728935144789, "grad_norm": 0.42724812030792236, "learning_rate": 1.0227336039301394e-06, "loss": 0.3739, "step": 46042 }, { "epoch": 0.8548100236518975, "grad_norm": 0.3642466366291046, "learning_rate": 1.0222197641335096e-06, "loss": 0.3619, "step": 46044 }, { "epoch": 0.8548471537893162, "grad_norm": 0.3381684720516205, "learning_rate": 1.021706046497578e-06, "loss": 0.2021, "step": 46046 }, { "epoch": 0.8548842839267348, "grad_norm": 0.383078396320343, "learning_rate": 1.0211924510293337e-06, "loss": 0.2247, "step": 46048 }, { "epoch": 0.8549214140641535, "grad_norm": 0.30971020460128784, "learning_rate": 1.0206789777357672e-06, "loss": 0.2024, "step": 46050 }, { "epoch": 0.8549585442015721, "grad_norm": 0.20407156646251678, "learning_rate": 1.0201656266238624e-06, "loss": 0.3147, "step": 46052 }, { "epoch": 0.8549956743389907, "grad_norm": 0.5261774659156799, "learning_rate": 1.0196523977006067e-06, "loss": 0.2627, "step": 46054 }, { "epoch": 0.8550328044764094, "grad_norm": 0.28000393509864807, "learning_rate": 1.0191392909729814e-06, "loss": 0.1602, "step": 46056 }, { "epoch": 0.855069934613828, "grad_norm": 0.28807374835014343, "learning_rate": 1.0186263064479695e-06, "loss": 0.3009, "step": 46058 }, { "epoch": 0.8551070647512466, "grad_norm": 0.4003903567790985, "learning_rate": 1.0181134441325512e-06, "loss": 0.3935, "step": 46060 }, { "epoch": 0.8551441948886653, "grad_norm": 0.44871625304222107, "learning_rate": 1.0176007040337043e-06, "loss": 0.2415, "step": 46062 }, { "epoch": 0.8551813250260839, "grad_norm": 0.34174075722694397, "learning_rate": 1.0170880861584076e-06, "loss": 0.2746, "step": 46064 }, { "epoch": 0.8552184551635026, "grad_norm": 0.4288659691810608, "learning_rate": 1.0165755905136364e-06, "loss": 0.3563, "step": 46066 }, { "epoch": 0.8552555853009212, "grad_norm": 0.5128308534622192, "learning_rate": 1.016063217106361e-06, "loss": 0.0958, "step": 46068 }, { "epoch": 0.8552927154383398, "grad_norm": 0.5027909874916077, "learning_rate": 1.015550965943556e-06, "loss": 0.2644, "step": 46070 }, { "epoch": 0.8553298455757585, "grad_norm": 0.3130652904510498, "learning_rate": 1.0150388370321917e-06, "loss": 0.1813, "step": 46072 }, { "epoch": 0.8553669757131771, "grad_norm": 0.3760024607181549, "learning_rate": 1.0145268303792343e-06, "loss": 0.4993, "step": 46074 }, { "epoch": 0.8554041058505958, "grad_norm": 0.45926809310913086, "learning_rate": 1.0140149459916516e-06, "loss": 0.3788, "step": 46076 }, { "epoch": 0.8554412359880144, "grad_norm": 0.695491373538971, "learning_rate": 1.0135031838764099e-06, "loss": 0.3275, "step": 46078 }, { "epoch": 0.855478366125433, "grad_norm": 0.4763723611831665, "learning_rate": 1.0129915440404747e-06, "loss": 0.2214, "step": 46080 }, { "epoch": 0.8555154962628516, "grad_norm": 0.387323260307312, "learning_rate": 1.0124800264908018e-06, "loss": 0.0521, "step": 46082 }, { "epoch": 0.8555526264002703, "grad_norm": 0.2989139258861542, "learning_rate": 1.011968631234357e-06, "loss": 0.2115, "step": 46084 }, { "epoch": 0.855589756537689, "grad_norm": 0.32972052693367004, "learning_rate": 1.0114573582780952e-06, "loss": 0.1938, "step": 46086 }, { "epoch": 0.8556268866751076, "grad_norm": 0.31097927689552307, "learning_rate": 1.0109462076289755e-06, "loss": 0.2783, "step": 46088 }, { "epoch": 0.8556640168125262, "grad_norm": 0.2864035964012146, "learning_rate": 1.0104351792939549e-06, "loss": 0.1666, "step": 46090 }, { "epoch": 0.8557011469499448, "grad_norm": 0.46506014466285706, "learning_rate": 1.0099242732799818e-06, "loss": 0.2935, "step": 46092 }, { "epoch": 0.8557382770873635, "grad_norm": 0.40353989601135254, "learning_rate": 1.0094134895940144e-06, "loss": 0.3407, "step": 46094 }, { "epoch": 0.8557754072247822, "grad_norm": 0.3506017029285431, "learning_rate": 1.0089028282429958e-06, "loss": 0.2645, "step": 46096 }, { "epoch": 0.8558125373622008, "grad_norm": 0.42331644892692566, "learning_rate": 1.0083922892338794e-06, "loss": 0.2844, "step": 46098 }, { "epoch": 0.8558496674996194, "grad_norm": 0.19131030142307281, "learning_rate": 1.0078818725736105e-06, "loss": 0.1323, "step": 46100 }, { "epoch": 0.855886797637038, "grad_norm": 0.31756022572517395, "learning_rate": 1.007371578269135e-06, "loss": 0.294, "step": 46102 }, { "epoch": 0.8559239277744567, "grad_norm": 0.5922867655754089, "learning_rate": 1.0068614063273986e-06, "loss": 0.2922, "step": 46104 }, { "epoch": 0.8559610579118754, "grad_norm": 0.5192360281944275, "learning_rate": 1.0063513567553384e-06, "loss": 0.336, "step": 46106 }, { "epoch": 0.855998188049294, "grad_norm": 0.44299548864364624, "learning_rate": 1.0058414295598973e-06, "loss": 0.2214, "step": 46108 }, { "epoch": 0.8560353181867126, "grad_norm": 0.4147316515445709, "learning_rate": 1.0053316247480138e-06, "loss": 0.1448, "step": 46110 }, { "epoch": 0.8560724483241312, "grad_norm": 0.4246554374694824, "learning_rate": 1.0048219423266236e-06, "loss": 0.1484, "step": 46112 }, { "epoch": 0.8561095784615499, "grad_norm": 0.5457956194877625, "learning_rate": 1.0043123823026668e-06, "loss": 0.2399, "step": 46114 }, { "epoch": 0.8561467085989686, "grad_norm": 0.414995938539505, "learning_rate": 1.00380294468307e-06, "loss": 0.1696, "step": 46116 }, { "epoch": 0.8561838387363871, "grad_norm": 0.43596112728118896, "learning_rate": 1.003293629474771e-06, "loss": 0.2017, "step": 46118 }, { "epoch": 0.8562209688738058, "grad_norm": 0.559964120388031, "learning_rate": 1.0027844366846962e-06, "loss": 0.2874, "step": 46120 }, { "epoch": 0.8562580990112244, "grad_norm": 0.4684061110019684, "learning_rate": 1.0022753663197737e-06, "loss": 0.1829, "step": 46122 }, { "epoch": 0.8562952291486431, "grad_norm": 0.8103684782981873, "learning_rate": 1.0017664183869336e-06, "loss": 0.3036, "step": 46124 }, { "epoch": 0.8563323592860618, "grad_norm": 0.5137556791305542, "learning_rate": 1.001257592893099e-06, "loss": 0.2096, "step": 46126 }, { "epoch": 0.8563694894234803, "grad_norm": 0.5199487209320068, "learning_rate": 1.0007488898451945e-06, "loss": 0.1466, "step": 46128 }, { "epoch": 0.856406619560899, "grad_norm": 0.795049250125885, "learning_rate": 1.0002403092501434e-06, "loss": 0.5016, "step": 46130 }, { "epoch": 0.8564437496983176, "grad_norm": 0.37341001629829407, "learning_rate": 9.997318511148623e-07, "loss": 0.1678, "step": 46132 }, { "epoch": 0.8564808798357363, "grad_norm": 0.5146586298942566, "learning_rate": 9.992235154462726e-07, "loss": 0.4902, "step": 46134 }, { "epoch": 0.8565180099731549, "grad_norm": 0.49121350049972534, "learning_rate": 9.98715302251292e-07, "loss": 0.2585, "step": 46136 }, { "epoch": 0.8565551401105735, "grad_norm": 0.4088270664215088, "learning_rate": 9.982072115368313e-07, "loss": 0.2735, "step": 46138 }, { "epoch": 0.8565922702479922, "grad_norm": 0.35057932138442993, "learning_rate": 9.976992433098077e-07, "loss": 0.3379, "step": 46140 }, { "epoch": 0.8566294003854108, "grad_norm": 0.25527843832969666, "learning_rate": 9.971913975771341e-07, "loss": 0.2667, "step": 46142 }, { "epoch": 0.8566665305228295, "grad_norm": 0.4950246214866638, "learning_rate": 9.966836743457175e-07, "loss": 0.4714, "step": 46144 }, { "epoch": 0.8567036606602481, "grad_norm": 0.43320509791374207, "learning_rate": 9.96176073622468e-07, "loss": 0.2991, "step": 46146 }, { "epoch": 0.8567407907976667, "grad_norm": 1.155590295791626, "learning_rate": 9.95668595414293e-07, "loss": 0.152, "step": 46148 }, { "epoch": 0.8567779209350854, "grad_norm": 0.2639387249946594, "learning_rate": 9.951612397280963e-07, "loss": 0.2366, "step": 46150 }, { "epoch": 0.856815051072504, "grad_norm": 0.4478684067726135, "learning_rate": 9.946540065707833e-07, "loss": 0.3208, "step": 46152 }, { "epoch": 0.8568521812099227, "grad_norm": 0.34140002727508545, "learning_rate": 9.941468959492562e-07, "loss": 0.2934, "step": 46154 }, { "epoch": 0.8568893113473413, "grad_norm": 0.26276132464408875, "learning_rate": 9.936399078704129e-07, "loss": 0.2396, "step": 46156 }, { "epoch": 0.8569264414847599, "grad_norm": 0.4313145875930786, "learning_rate": 9.931330423411545e-07, "loss": 0.2825, "step": 46158 }, { "epoch": 0.8569635716221786, "grad_norm": 0.37394723296165466, "learning_rate": 9.926262993683755e-07, "loss": 0.2194, "step": 46160 }, { "epoch": 0.8570007017595972, "grad_norm": 0.21449095010757446, "learning_rate": 9.921196789589726e-07, "loss": 0.2751, "step": 46162 }, { "epoch": 0.8570378318970159, "grad_norm": 0.37025001645088196, "learning_rate": 9.916131811198382e-07, "loss": 0.1488, "step": 46164 }, { "epoch": 0.8570749620344345, "grad_norm": 0.47388169169425964, "learning_rate": 9.911068058578666e-07, "loss": 0.2557, "step": 46166 }, { "epoch": 0.8571120921718531, "grad_norm": 0.3557104468345642, "learning_rate": 9.906005531799467e-07, "loss": 0.0881, "step": 46168 }, { "epoch": 0.8571492223092718, "grad_norm": 0.5810810327529907, "learning_rate": 9.900944230929666e-07, "loss": 0.2012, "step": 46170 }, { "epoch": 0.8571863524466904, "grad_norm": 0.33764055371284485, "learning_rate": 9.895884156038128e-07, "loss": 0.2997, "step": 46172 }, { "epoch": 0.8572234825841091, "grad_norm": 0.5022072792053223, "learning_rate": 9.890825307193719e-07, "loss": 0.3853, "step": 46174 }, { "epoch": 0.8572606127215276, "grad_norm": 0.3528594672679901, "learning_rate": 9.885767684465286e-07, "loss": 0.2999, "step": 46176 }, { "epoch": 0.8572977428589463, "grad_norm": 0.38557982444763184, "learning_rate": 9.880711287921607e-07, "loss": 0.3685, "step": 46178 }, { "epoch": 0.8573348729963649, "grad_norm": 0.3270796537399292, "learning_rate": 9.875656117631538e-07, "loss": 0.3019, "step": 46180 }, { "epoch": 0.8573720031337836, "grad_norm": 0.36792823672294617, "learning_rate": 9.870602173663812e-07, "loss": 0.177, "step": 46182 }, { "epoch": 0.8574091332712023, "grad_norm": 0.30004703998565674, "learning_rate": 9.86554945608722e-07, "loss": 0.4977, "step": 46184 }, { "epoch": 0.8574462634086208, "grad_norm": 0.3589276969432831, "learning_rate": 9.860497964970516e-07, "loss": 0.2549, "step": 46186 }, { "epoch": 0.8574833935460395, "grad_norm": 0.6069404482841492, "learning_rate": 9.855447700382437e-07, "loss": 0.3117, "step": 46188 }, { "epoch": 0.8575205236834581, "grad_norm": 0.30522945523262024, "learning_rate": 9.850398662391692e-07, "loss": 0.3231, "step": 46190 }, { "epoch": 0.8575576538208768, "grad_norm": 0.6054765582084656, "learning_rate": 9.845350851067026e-07, "loss": 0.359, "step": 46192 }, { "epoch": 0.8575947839582955, "grad_norm": 0.2566866874694824, "learning_rate": 9.840304266477063e-07, "loss": 0.2009, "step": 46194 }, { "epoch": 0.857631914095714, "grad_norm": 0.42071259021759033, "learning_rate": 9.835258908690504e-07, "loss": 0.197, "step": 46196 }, { "epoch": 0.8576690442331327, "grad_norm": 0.36751270294189453, "learning_rate": 9.830214777776004e-07, "loss": 0.1363, "step": 46198 }, { "epoch": 0.8577061743705513, "grad_norm": 0.26197052001953125, "learning_rate": 9.825171873802186e-07, "loss": 0.1955, "step": 46200 }, { "epoch": 0.85774330450797, "grad_norm": 0.45065221190452576, "learning_rate": 9.820130196837663e-07, "loss": 0.2661, "step": 46202 }, { "epoch": 0.8577804346453887, "grad_norm": 0.49744081497192383, "learning_rate": 9.815089746951057e-07, "loss": 0.4067, "step": 46204 }, { "epoch": 0.8578175647828072, "grad_norm": 0.37906700372695923, "learning_rate": 9.810050524210956e-07, "loss": 0.4966, "step": 46206 }, { "epoch": 0.8578546949202259, "grad_norm": 0.4864760935306549, "learning_rate": 9.805012528685908e-07, "loss": 0.2898, "step": 46208 }, { "epoch": 0.8578918250576445, "grad_norm": 0.3086865246295929, "learning_rate": 9.799975760444468e-07, "loss": 0.3064, "step": 46210 }, { "epoch": 0.8579289551950632, "grad_norm": 1.6721597909927368, "learning_rate": 9.79494021955517e-07, "loss": 0.1497, "step": 46212 }, { "epoch": 0.8579660853324819, "grad_norm": 0.3922683894634247, "learning_rate": 9.78990590608655e-07, "loss": 0.2725, "step": 46214 }, { "epoch": 0.8580032154699004, "grad_norm": 0.3594084680080414, "learning_rate": 9.784872820107106e-07, "loss": 0.201, "step": 46216 }, { "epoch": 0.8580403456073191, "grad_norm": 0.21988588571548462, "learning_rate": 9.779840961685328e-07, "loss": 0.0882, "step": 46218 }, { "epoch": 0.8580774757447377, "grad_norm": 0.5878615379333496, "learning_rate": 9.77481033088965e-07, "loss": 0.0566, "step": 46220 }, { "epoch": 0.8581146058821564, "grad_norm": 0.3512178957462311, "learning_rate": 9.769780927788575e-07, "loss": 0.1164, "step": 46222 }, { "epoch": 0.8581517360195751, "grad_norm": 0.3427608907222748, "learning_rate": 9.76475275245049e-07, "loss": 0.1778, "step": 46224 }, { "epoch": 0.8581888661569936, "grad_norm": 0.23185452818870544, "learning_rate": 9.759725804943832e-07, "loss": 0.3727, "step": 46226 }, { "epoch": 0.8582259962944123, "grad_norm": 0.49998024106025696, "learning_rate": 9.754700085336999e-07, "loss": 0.5672, "step": 46228 }, { "epoch": 0.8582631264318309, "grad_norm": 0.5075954794883728, "learning_rate": 9.749675593698416e-07, "loss": 0.2543, "step": 46230 }, { "epoch": 0.8583002565692496, "grad_norm": 0.3351147770881653, "learning_rate": 9.744652330096383e-07, "loss": 0.1896, "step": 46232 }, { "epoch": 0.8583373867066681, "grad_norm": 0.42122018337249756, "learning_rate": 9.739630294599289e-07, "loss": 0.2289, "step": 46234 }, { "epoch": 0.8583745168440868, "grad_norm": 0.3258845806121826, "learning_rate": 9.73460948727547e-07, "loss": 0.2461, "step": 46236 }, { "epoch": 0.8584116469815055, "grad_norm": 0.28528159856796265, "learning_rate": 9.729589908193237e-07, "loss": 0.1234, "step": 46238 }, { "epoch": 0.8584487771189241, "grad_norm": 0.3912682831287384, "learning_rate": 9.724571557420925e-07, "loss": 0.3685, "step": 46240 }, { "epoch": 0.8584859072563428, "grad_norm": 0.4478892683982849, "learning_rate": 9.719554435026757e-07, "loss": 0.2592, "step": 46242 }, { "epoch": 0.8585230373937613, "grad_norm": 0.45767948031425476, "learning_rate": 9.714538541079055e-07, "loss": 0.2928, "step": 46244 }, { "epoch": 0.85856016753118, "grad_norm": 0.36360448598861694, "learning_rate": 9.709523875646022e-07, "loss": 0.2248, "step": 46246 }, { "epoch": 0.8585972976685987, "grad_norm": 0.2076425701379776, "learning_rate": 9.704510438795923e-07, "loss": 0.2488, "step": 46248 }, { "epoch": 0.8586344278060173, "grad_norm": 0.39231494069099426, "learning_rate": 9.69949823059697e-07, "loss": 0.4904, "step": 46250 }, { "epoch": 0.858671557943436, "grad_norm": 0.22408731281757355, "learning_rate": 9.694487251117357e-07, "loss": 0.39, "step": 46252 }, { "epoch": 0.8587086880808545, "grad_norm": 0.3209182322025299, "learning_rate": 9.689477500425293e-07, "loss": 0.3166, "step": 46254 }, { "epoch": 0.8587458182182732, "grad_norm": 0.3581145107746124, "learning_rate": 9.684468978588934e-07, "loss": 0.2715, "step": 46256 }, { "epoch": 0.8587829483556919, "grad_norm": 0.5015003681182861, "learning_rate": 9.679461685676407e-07, "loss": 0.355, "step": 46258 }, { "epoch": 0.8588200784931105, "grad_norm": 0.3776834309101105, "learning_rate": 9.674455621755873e-07, "loss": 0.2292, "step": 46260 }, { "epoch": 0.8588572086305292, "grad_norm": 0.1708477884531021, "learning_rate": 9.669450786895452e-07, "loss": 0.3651, "step": 46262 }, { "epoch": 0.8588943387679477, "grad_norm": 0.424582302570343, "learning_rate": 9.664447181163228e-07, "loss": 0.3992, "step": 46264 }, { "epoch": 0.8589314689053664, "grad_norm": 0.452316552400589, "learning_rate": 9.659444804627283e-07, "loss": 0.1647, "step": 46266 }, { "epoch": 0.8589685990427851, "grad_norm": 0.33175915479660034, "learning_rate": 9.654443657355706e-07, "loss": 0.3052, "step": 46268 }, { "epoch": 0.8590057291802037, "grad_norm": 0.554253101348877, "learning_rate": 9.64944373941652e-07, "loss": 0.4591, "step": 46270 }, { "epoch": 0.8590428593176224, "grad_norm": 0.39185282588005066, "learning_rate": 9.644445050877782e-07, "loss": 0.3826, "step": 46272 }, { "epoch": 0.8590799894550409, "grad_norm": 0.2625126540660858, "learning_rate": 9.639447591807495e-07, "loss": 0.293, "step": 46274 }, { "epoch": 0.8591171195924596, "grad_norm": 0.5193026065826416, "learning_rate": 9.634451362273667e-07, "loss": 0.3323, "step": 46276 }, { "epoch": 0.8591542497298783, "grad_norm": 0.48075148463249207, "learning_rate": 9.62945636234427e-07, "loss": 0.3628, "step": 46278 }, { "epoch": 0.8591913798672969, "grad_norm": 0.39393192529678345, "learning_rate": 9.624462592087314e-07, "loss": 0.3218, "step": 46280 }, { "epoch": 0.8592285100047156, "grad_norm": 0.38466787338256836, "learning_rate": 9.61947005157069e-07, "loss": 0.2285, "step": 46282 }, { "epoch": 0.8592656401421341, "grad_norm": 0.32856759428977966, "learning_rate": 9.614478740862377e-07, "loss": 0.2078, "step": 46284 }, { "epoch": 0.8593027702795528, "grad_norm": 0.31078091263771057, "learning_rate": 9.609488660030252e-07, "loss": 0.2823, "step": 46286 }, { "epoch": 0.8593399004169714, "grad_norm": 0.5123100280761719, "learning_rate": 9.604499809142232e-07, "loss": 0.3058, "step": 46288 }, { "epoch": 0.8593770305543901, "grad_norm": 0.47025883197784424, "learning_rate": 9.599512188266213e-07, "loss": 0.19, "step": 46290 }, { "epoch": 0.8594141606918088, "grad_norm": 0.22942614555358887, "learning_rate": 9.594525797470044e-07, "loss": 0.2825, "step": 46292 }, { "epoch": 0.8594512908292273, "grad_norm": 0.24472802877426147, "learning_rate": 9.589540636821603e-07, "loss": 0.235, "step": 46294 }, { "epoch": 0.859488420966646, "grad_norm": 0.3846464455127716, "learning_rate": 9.584556706388692e-07, "loss": 0.303, "step": 46296 }, { "epoch": 0.8595255511040646, "grad_norm": 0.37022069096565247, "learning_rate": 9.579574006239135e-07, "loss": 0.518, "step": 46298 }, { "epoch": 0.8595626812414833, "grad_norm": 0.3093615472316742, "learning_rate": 9.57459253644073e-07, "loss": 0.274, "step": 46300 }, { "epoch": 0.859599811378902, "grad_norm": 0.45878714323043823, "learning_rate": 9.56961229706127e-07, "loss": 0.2058, "step": 46302 }, { "epoch": 0.8596369415163205, "grad_norm": 0.39811742305755615, "learning_rate": 9.564633288168534e-07, "loss": 0.2323, "step": 46304 }, { "epoch": 0.8596740716537392, "grad_norm": 0.44109609723091125, "learning_rate": 9.559655509830256e-07, "loss": 0.4374, "step": 46306 }, { "epoch": 0.8597112017911578, "grad_norm": 0.30011868476867676, "learning_rate": 9.55467896211414e-07, "loss": 0.2943, "step": 46308 }, { "epoch": 0.8597483319285765, "grad_norm": 0.7884966135025024, "learning_rate": 9.549703645087937e-07, "loss": 0.4794, "step": 46310 }, { "epoch": 0.8597854620659952, "grad_norm": 0.578792929649353, "learning_rate": 9.544729558819332e-07, "loss": 0.2461, "step": 46312 }, { "epoch": 0.8598225922034137, "grad_norm": 0.42589816451072693, "learning_rate": 9.539756703376012e-07, "loss": 0.2991, "step": 46314 }, { "epoch": 0.8598597223408324, "grad_norm": 0.5310109257698059, "learning_rate": 9.534785078825648e-07, "loss": 0.3408, "step": 46316 }, { "epoch": 0.859896852478251, "grad_norm": 0.376668781042099, "learning_rate": 9.529814685235905e-07, "loss": 0.3986, "step": 46318 }, { "epoch": 0.8599339826156697, "grad_norm": 0.28847813606262207, "learning_rate": 9.524845522674364e-07, "loss": 0.2796, "step": 46320 }, { "epoch": 0.8599711127530884, "grad_norm": 0.3884340524673462, "learning_rate": 9.519877591208681e-07, "loss": 0.3223, "step": 46322 }, { "epoch": 0.8600082428905069, "grad_norm": 0.42567428946495056, "learning_rate": 9.514910890906448e-07, "loss": 0.2509, "step": 46324 }, { "epoch": 0.8600453730279256, "grad_norm": 0.4189719259738922, "learning_rate": 9.509945421835253e-07, "loss": 0.214, "step": 46326 }, { "epoch": 0.8600825031653442, "grad_norm": 0.36907124519348145, "learning_rate": 9.504981184062634e-07, "loss": 0.2991, "step": 46328 }, { "epoch": 0.8601196333027629, "grad_norm": 0.3586653172969818, "learning_rate": 9.500018177656167e-07, "loss": 0.1652, "step": 46330 }, { "epoch": 0.8601567634401814, "grad_norm": 0.45447200536727905, "learning_rate": 9.495056402683378e-07, "loss": 0.2426, "step": 46332 }, { "epoch": 0.8601938935776001, "grad_norm": 0.3858547508716583, "learning_rate": 9.490095859211768e-07, "loss": 0.2238, "step": 46334 }, { "epoch": 0.8602310237150188, "grad_norm": 0.45476511120796204, "learning_rate": 9.485136547308848e-07, "loss": 0.5031, "step": 46336 }, { "epoch": 0.8602681538524374, "grad_norm": 0.48580729961395264, "learning_rate": 9.480178467042089e-07, "loss": 0.1914, "step": 46338 }, { "epoch": 0.8603052839898561, "grad_norm": 0.305665522813797, "learning_rate": 9.475221618478969e-07, "loss": 0.3223, "step": 46340 }, { "epoch": 0.8603424141272746, "grad_norm": 0.3031717538833618, "learning_rate": 9.470266001686923e-07, "loss": 0.2866, "step": 46342 }, { "epoch": 0.8603795442646933, "grad_norm": 0.4734834134578705, "learning_rate": 9.465311616733408e-07, "loss": 0.2243, "step": 46344 }, { "epoch": 0.860416674402112, "grad_norm": 0.4653489887714386, "learning_rate": 9.460358463685804e-07, "loss": 0.2989, "step": 46346 }, { "epoch": 0.8604538045395306, "grad_norm": 0.41710546612739563, "learning_rate": 9.455406542611545e-07, "loss": 0.4282, "step": 46348 }, { "epoch": 0.8604909346769493, "grad_norm": 0.4607117176055908, "learning_rate": 9.450455853577967e-07, "loss": 0.369, "step": 46350 }, { "epoch": 0.8605280648143678, "grad_norm": 0.6424175500869751, "learning_rate": 9.445506396652459e-07, "loss": 0.2617, "step": 46352 }, { "epoch": 0.8605651949517865, "grad_norm": 0.4699985086917877, "learning_rate": 9.440558171902359e-07, "loss": 0.2994, "step": 46354 }, { "epoch": 0.8606023250892052, "grad_norm": 0.44466492533683777, "learning_rate": 9.435611179395033e-07, "loss": 0.2322, "step": 46356 }, { "epoch": 0.8606394552266238, "grad_norm": 0.42388564348220825, "learning_rate": 9.430665419197749e-07, "loss": 0.175, "step": 46358 }, { "epoch": 0.8606765853640425, "grad_norm": 0.40325218439102173, "learning_rate": 9.42572089137782e-07, "loss": 0.3197, "step": 46360 }, { "epoch": 0.860713715501461, "grad_norm": 0.4469625949859619, "learning_rate": 9.420777596002528e-07, "loss": 0.5115, "step": 46362 }, { "epoch": 0.8607508456388797, "grad_norm": 0.4007731080055237, "learning_rate": 9.41583553313914e-07, "loss": 0.2152, "step": 46364 }, { "epoch": 0.8607879757762984, "grad_norm": 0.4601231515407562, "learning_rate": 9.41089470285489e-07, "loss": 0.3239, "step": 46366 }, { "epoch": 0.860825105913717, "grad_norm": 0.37026742100715637, "learning_rate": 9.405955105217046e-07, "loss": 0.1786, "step": 46368 }, { "epoch": 0.8608622360511357, "grad_norm": 0.6052196025848389, "learning_rate": 9.40101674029279e-07, "loss": 0.2635, "step": 46370 }, { "epoch": 0.8608993661885542, "grad_norm": 0.5184234380722046, "learning_rate": 9.3960796081493e-07, "loss": 0.1757, "step": 46372 }, { "epoch": 0.8609364963259729, "grad_norm": 0.36181288957595825, "learning_rate": 9.391143708853778e-07, "loss": 0.42, "step": 46374 }, { "epoch": 0.8609736264633916, "grad_norm": 0.2517626881599426, "learning_rate": 9.386209042473382e-07, "loss": 0.3591, "step": 46376 }, { "epoch": 0.8610107566008102, "grad_norm": 0.29608917236328125, "learning_rate": 9.381275609075269e-07, "loss": 0.2106, "step": 46378 }, { "epoch": 0.8610478867382289, "grad_norm": 0.3649085462093353, "learning_rate": 9.376343408726551e-07, "loss": 0.4871, "step": 46380 }, { "epoch": 0.8610850168756474, "grad_norm": 0.64967942237854, "learning_rate": 9.371412441494377e-07, "loss": 0.3645, "step": 46382 }, { "epoch": 0.8611221470130661, "grad_norm": 0.4360603392124176, "learning_rate": 9.36648270744579e-07, "loss": 0.2414, "step": 46384 }, { "epoch": 0.8611592771504847, "grad_norm": 0.4184345006942749, "learning_rate": 9.361554206647894e-07, "loss": 0.2459, "step": 46386 }, { "epoch": 0.8611964072879034, "grad_norm": 0.44913792610168457, "learning_rate": 9.356626939167757e-07, "loss": 0.1666, "step": 46388 }, { "epoch": 0.861233537425322, "grad_norm": 0.3589648902416229, "learning_rate": 9.351700905072437e-07, "loss": 0.2685, "step": 46390 }, { "epoch": 0.8612706675627406, "grad_norm": 0.3720960319042206, "learning_rate": 9.346776104428923e-07, "loss": 0.246, "step": 46392 }, { "epoch": 0.8613077977001593, "grad_norm": 0.6639584898948669, "learning_rate": 9.341852537304263e-07, "loss": 0.4005, "step": 46394 }, { "epoch": 0.8613449278375779, "grad_norm": 0.39261895418167114, "learning_rate": 9.336930203765426e-07, "loss": 0.427, "step": 46396 }, { "epoch": 0.8613820579749966, "grad_norm": 0.5158554315567017, "learning_rate": 9.332009103879403e-07, "loss": 0.3459, "step": 46398 }, { "epoch": 0.8614191881124152, "grad_norm": 0.3893435597419739, "learning_rate": 9.327089237713149e-07, "loss": 0.3889, "step": 46400 }, { "epoch": 0.8614563182498338, "grad_norm": 0.22373245656490326, "learning_rate": 9.322170605333614e-07, "loss": 0.2786, "step": 46402 }, { "epoch": 0.8614934483872525, "grad_norm": 0.5546050667762756, "learning_rate": 9.31725320680773e-07, "loss": 0.2727, "step": 46404 }, { "epoch": 0.8615305785246711, "grad_norm": 0.44842684268951416, "learning_rate": 9.312337042202402e-07, "loss": 0.2835, "step": 46406 }, { "epoch": 0.8615677086620898, "grad_norm": 0.5530038475990295, "learning_rate": 9.307422111584541e-07, "loss": 0.1222, "step": 46408 }, { "epoch": 0.8616048387995084, "grad_norm": 0.6688160300254822, "learning_rate": 9.302508415020994e-07, "loss": 0.3912, "step": 46410 }, { "epoch": 0.861641968936927, "grad_norm": 0.71479731798172, "learning_rate": 9.297595952578653e-07, "loss": 0.2258, "step": 46412 }, { "epoch": 0.8616790990743457, "grad_norm": 0.4237983226776123, "learning_rate": 9.29268472432433e-07, "loss": 0.1749, "step": 46414 }, { "epoch": 0.8617162292117643, "grad_norm": 0.4547152519226074, "learning_rate": 9.287774730324861e-07, "loss": 0.2293, "step": 46416 }, { "epoch": 0.861753359349183, "grad_norm": 0.37682297825813293, "learning_rate": 9.28286597064707e-07, "loss": 0.2141, "step": 46418 }, { "epoch": 0.8617904894866016, "grad_norm": 0.519481360912323, "learning_rate": 9.27795844535776e-07, "loss": 0.3248, "step": 46420 }, { "epoch": 0.8618276196240202, "grad_norm": 0.6105753779411316, "learning_rate": 9.273052154523676e-07, "loss": 0.4192, "step": 46422 }, { "epoch": 0.8618647497614389, "grad_norm": 0.35898518562316895, "learning_rate": 9.2681470982116e-07, "loss": 0.3695, "step": 46424 }, { "epoch": 0.8619018798988575, "grad_norm": 0.42922988533973694, "learning_rate": 9.263243276488254e-07, "loss": 0.4508, "step": 46426 }, { "epoch": 0.8619390100362762, "grad_norm": 0.4655109941959381, "learning_rate": 9.258340689420386e-07, "loss": 0.3069, "step": 46428 }, { "epoch": 0.8619761401736948, "grad_norm": 0.18952637910842896, "learning_rate": 9.253439337074721e-07, "loss": 0.1967, "step": 46430 }, { "epoch": 0.8620132703111134, "grad_norm": 0.4921724796295166, "learning_rate": 9.248539219517904e-07, "loss": 0.1317, "step": 46432 }, { "epoch": 0.8620504004485321, "grad_norm": 0.39536353945732117, "learning_rate": 9.243640336816651e-07, "loss": 0.2687, "step": 46434 }, { "epoch": 0.8620875305859507, "grad_norm": 0.4436925947666168, "learning_rate": 9.238742689037594e-07, "loss": 0.2681, "step": 46436 }, { "epoch": 0.8621246607233694, "grad_norm": 0.34235039353370667, "learning_rate": 9.233846276247394e-07, "loss": 0.2058, "step": 46438 }, { "epoch": 0.8621617908607879, "grad_norm": 0.6510866284370422, "learning_rate": 9.228951098512661e-07, "loss": 0.2409, "step": 46440 }, { "epoch": 0.8621989209982066, "grad_norm": 0.4142334461212158, "learning_rate": 9.224057155900013e-07, "loss": 0.2129, "step": 46442 }, { "epoch": 0.8622360511356253, "grad_norm": 0.28202468156814575, "learning_rate": 9.21916444847607e-07, "loss": 0.0866, "step": 46444 }, { "epoch": 0.8622731812730439, "grad_norm": 0.44989195466041565, "learning_rate": 9.214272976307348e-07, "loss": 0.3838, "step": 46446 }, { "epoch": 0.8623103114104625, "grad_norm": 0.6221481561660767, "learning_rate": 9.209382739460438e-07, "loss": 0.3436, "step": 46448 }, { "epoch": 0.8623474415478811, "grad_norm": 0.2253553867340088, "learning_rate": 9.204493738001885e-07, "loss": 0.2152, "step": 46450 }, { "epoch": 0.8623845716852998, "grad_norm": 0.6241713762283325, "learning_rate": 9.199605971998227e-07, "loss": 0.3115, "step": 46452 }, { "epoch": 0.8624217018227185, "grad_norm": 0.24273280799388885, "learning_rate": 9.194719441515931e-07, "loss": 0.139, "step": 46454 }, { "epoch": 0.8624588319601371, "grad_norm": 0.5003330111503601, "learning_rate": 9.189834146621501e-07, "loss": 0.4003, "step": 46456 }, { "epoch": 0.8624959620975557, "grad_norm": 0.5129695534706116, "learning_rate": 9.18495008738145e-07, "loss": 0.3602, "step": 46458 }, { "epoch": 0.8625330922349743, "grad_norm": 0.3395385444164276, "learning_rate": 9.180067263862192e-07, "loss": 0.3788, "step": 46460 }, { "epoch": 0.862570222372393, "grad_norm": 0.3349679112434387, "learning_rate": 9.175185676130171e-07, "loss": 0.2056, "step": 46462 }, { "epoch": 0.8626073525098117, "grad_norm": 0.5682922601699829, "learning_rate": 9.170305324251827e-07, "loss": 0.2257, "step": 46464 }, { "epoch": 0.8626444826472303, "grad_norm": 0.2856213450431824, "learning_rate": 9.16542620829356e-07, "loss": 0.1117, "step": 46466 }, { "epoch": 0.8626816127846489, "grad_norm": 0.2910298705101013, "learning_rate": 9.160548328321762e-07, "loss": 0.3455, "step": 46468 }, { "epoch": 0.8627187429220675, "grad_norm": 0.28439223766326904, "learning_rate": 9.155671684402823e-07, "loss": 0.1661, "step": 46470 }, { "epoch": 0.8627558730594862, "grad_norm": 0.34871184825897217, "learning_rate": 9.150796276603057e-07, "loss": 0.3151, "step": 46472 }, { "epoch": 0.8627930031969049, "grad_norm": 0.3978327214717865, "learning_rate": 9.145922104988836e-07, "loss": 0.3079, "step": 46474 }, { "epoch": 0.8628301333343235, "grad_norm": 0.30677497386932373, "learning_rate": 9.141049169626492e-07, "loss": 0.325, "step": 46476 }, { "epoch": 0.8628672634717421, "grad_norm": 0.34825631976127625, "learning_rate": 9.136177470582297e-07, "loss": 0.2891, "step": 46478 }, { "epoch": 0.8629043936091607, "grad_norm": 0.5264063477516174, "learning_rate": 9.131307007922562e-07, "loss": 0.2168, "step": 46480 }, { "epoch": 0.8629415237465794, "grad_norm": 0.5684126019477844, "learning_rate": 9.12643778171357e-07, "loss": 0.2151, "step": 46482 }, { "epoch": 0.862978653883998, "grad_norm": 0.3642502725124359, "learning_rate": 9.121569792021545e-07, "loss": 0.3772, "step": 46484 }, { "epoch": 0.8630157840214167, "grad_norm": 0.2748647928237915, "learning_rate": 9.116703038912744e-07, "loss": 0.2337, "step": 46486 }, { "epoch": 0.8630529141588353, "grad_norm": 0.2251640260219574, "learning_rate": 9.111837522453382e-07, "loss": 0.3718, "step": 46488 }, { "epoch": 0.8630900442962539, "grad_norm": 0.34252214431762695, "learning_rate": 9.106973242709672e-07, "loss": 0.2611, "step": 46490 }, { "epoch": 0.8631271744336726, "grad_norm": 0.5877100825309753, "learning_rate": 9.102110199747805e-07, "loss": 0.2496, "step": 46492 }, { "epoch": 0.8631643045710912, "grad_norm": 0.21235185861587524, "learning_rate": 9.097248393633961e-07, "loss": 0.1775, "step": 46494 }, { "epoch": 0.8632014347085099, "grad_norm": 0.3401835858821869, "learning_rate": 9.092387824434256e-07, "loss": 0.3657, "step": 46496 }, { "epoch": 0.8632385648459285, "grad_norm": 0.3647737503051758, "learning_rate": 9.087528492214882e-07, "loss": 0.335, "step": 46498 }, { "epoch": 0.8632756949833471, "grad_norm": 0.4141778349876404, "learning_rate": 9.082670397041915e-07, "loss": 0.3524, "step": 46500 }, { "epoch": 0.8633128251207658, "grad_norm": 0.26676759123802185, "learning_rate": 9.077813538981461e-07, "loss": 0.4065, "step": 46502 }, { "epoch": 0.8633499552581844, "grad_norm": 0.34450563788414, "learning_rate": 9.072957918099635e-07, "loss": 0.1958, "step": 46504 }, { "epoch": 0.863387085395603, "grad_norm": 0.48400068283081055, "learning_rate": 9.06810353446248e-07, "loss": 0.2814, "step": 46506 }, { "epoch": 0.8634242155330217, "grad_norm": 0.22704671323299408, "learning_rate": 9.063250388136091e-07, "loss": 0.0744, "step": 46508 }, { "epoch": 0.8634613456704403, "grad_norm": 0.36942172050476074, "learning_rate": 9.058398479186459e-07, "loss": 0.2967, "step": 46510 }, { "epoch": 0.863498475807859, "grad_norm": 0.4508446753025055, "learning_rate": 9.053547807679619e-07, "loss": 0.3865, "step": 46512 }, { "epoch": 0.8635356059452776, "grad_norm": 0.47393882274627686, "learning_rate": 9.048698373681586e-07, "loss": 0.3714, "step": 46514 }, { "epoch": 0.8635727360826962, "grad_norm": 0.4719904959201813, "learning_rate": 9.04385017725834e-07, "loss": 0.3366, "step": 46516 }, { "epoch": 0.8636098662201149, "grad_norm": 0.445715069770813, "learning_rate": 9.039003218475839e-07, "loss": 0.2088, "step": 46518 }, { "epoch": 0.8636469963575335, "grad_norm": 0.48084166646003723, "learning_rate": 9.034157497400065e-07, "loss": 0.3843, "step": 46520 }, { "epoch": 0.8636841264949522, "grad_norm": 0.3710857927799225, "learning_rate": 9.029313014096908e-07, "loss": 0.3525, "step": 46522 }, { "epoch": 0.8637212566323708, "grad_norm": 0.5550365447998047, "learning_rate": 9.024469768632316e-07, "loss": 0.1933, "step": 46524 }, { "epoch": 0.8637583867697894, "grad_norm": 0.4179303050041199, "learning_rate": 9.019627761072192e-07, "loss": 0.1408, "step": 46526 }, { "epoch": 0.8637955169072081, "grad_norm": 0.4967989921569824, "learning_rate": 9.014786991482416e-07, "loss": 0.1584, "step": 46528 }, { "epoch": 0.8638326470446267, "grad_norm": 0.340951144695282, "learning_rate": 9.009947459928847e-07, "loss": 0.1502, "step": 46530 }, { "epoch": 0.8638697771820454, "grad_norm": 0.26855772733688354, "learning_rate": 9.005109166477355e-07, "loss": 0.2932, "step": 46532 }, { "epoch": 0.863906907319464, "grad_norm": 0.32000988721847534, "learning_rate": 9.000272111193775e-07, "loss": 0.1468, "step": 46534 }, { "epoch": 0.8639440374568826, "grad_norm": 0.601523756980896, "learning_rate": 8.995436294143911e-07, "loss": 0.3262, "step": 46536 }, { "epoch": 0.8639811675943012, "grad_norm": 0.5269022583961487, "learning_rate": 8.990601715393577e-07, "loss": 0.2056, "step": 46538 }, { "epoch": 0.8640182977317199, "grad_norm": 0.5854669809341431, "learning_rate": 8.98576837500853e-07, "loss": 0.2651, "step": 46540 }, { "epoch": 0.8640554278691386, "grad_norm": 0.32356351613998413, "learning_rate": 8.980936273054564e-07, "loss": 0.1804, "step": 46542 }, { "epoch": 0.8640925580065572, "grad_norm": 0.4773086905479431, "learning_rate": 8.976105409597413e-07, "loss": 0.1683, "step": 46544 }, { "epoch": 0.8641296881439758, "grad_norm": 0.4852351248264313, "learning_rate": 8.971275784702837e-07, "loss": 0.4865, "step": 46546 }, { "epoch": 0.8641668182813944, "grad_norm": 0.2547571659088135, "learning_rate": 8.966447398436529e-07, "loss": 0.3646, "step": 46548 }, { "epoch": 0.8642039484188131, "grad_norm": 0.40590035915374756, "learning_rate": 8.961620250864189e-07, "loss": 0.3467, "step": 46550 }, { "epoch": 0.8642410785562318, "grad_norm": 0.28504928946495056, "learning_rate": 8.9567943420515e-07, "loss": 0.4209, "step": 46552 }, { "epoch": 0.8642782086936504, "grad_norm": 0.35393908619880676, "learning_rate": 8.951969672064142e-07, "loss": 0.4074, "step": 46554 }, { "epoch": 0.864315338831069, "grad_norm": 0.3662143647670746, "learning_rate": 8.947146240967752e-07, "loss": 0.2115, "step": 46556 }, { "epoch": 0.8643524689684876, "grad_norm": 0.6477339267730713, "learning_rate": 8.942324048827977e-07, "loss": 0.2138, "step": 46558 }, { "epoch": 0.8643895991059063, "grad_norm": 0.36759456992149353, "learning_rate": 8.937503095710431e-07, "loss": 0.3306, "step": 46560 }, { "epoch": 0.864426729243325, "grad_norm": 0.3682751953601837, "learning_rate": 8.932683381680696e-07, "loss": 0.1948, "step": 46562 }, { "epoch": 0.8644638593807435, "grad_norm": 0.3496985137462616, "learning_rate": 8.927864906804351e-07, "loss": 0.306, "step": 46564 }, { "epoch": 0.8645009895181622, "grad_norm": 0.47471165657043457, "learning_rate": 8.923047671146967e-07, "loss": 0.1044, "step": 46566 }, { "epoch": 0.8645381196555808, "grad_norm": 0.5156015753746033, "learning_rate": 8.918231674774103e-07, "loss": 0.1427, "step": 46568 }, { "epoch": 0.8645752497929995, "grad_norm": 0.4092327654361725, "learning_rate": 8.913416917751305e-07, "loss": 0.312, "step": 46570 }, { "epoch": 0.8646123799304182, "grad_norm": 0.47803959250450134, "learning_rate": 8.908603400144045e-07, "loss": 0.3901, "step": 46572 }, { "epoch": 0.8646495100678367, "grad_norm": 0.3265105187892914, "learning_rate": 8.903791122017847e-07, "loss": 0.2851, "step": 46574 }, { "epoch": 0.8646866402052554, "grad_norm": 0.23752376437187195, "learning_rate": 8.89898008343818e-07, "loss": 0.216, "step": 46576 }, { "epoch": 0.864723770342674, "grad_norm": 0.3154624402523041, "learning_rate": 8.894170284470515e-07, "loss": 0.1441, "step": 46578 }, { "epoch": 0.8647609004800927, "grad_norm": 0.4522281587123871, "learning_rate": 8.889361725180323e-07, "loss": 0.4899, "step": 46580 }, { "epoch": 0.8647980306175114, "grad_norm": 0.2667147219181061, "learning_rate": 8.884554405632984e-07, "loss": 0.0726, "step": 46582 }, { "epoch": 0.8648351607549299, "grad_norm": 0.5061129331588745, "learning_rate": 8.879748325893966e-07, "loss": 0.1911, "step": 46584 }, { "epoch": 0.8648722908923486, "grad_norm": 0.15854398906230927, "learning_rate": 8.874943486028609e-07, "loss": 0.2873, "step": 46586 }, { "epoch": 0.8649094210297672, "grad_norm": 0.48331233859062195, "learning_rate": 8.870139886102325e-07, "loss": 0.3132, "step": 46588 }, { "epoch": 0.8649465511671859, "grad_norm": 0.2915746569633484, "learning_rate": 8.865337526180473e-07, "loss": 0.2558, "step": 46590 }, { "epoch": 0.8649836813046045, "grad_norm": 0.4209698438644409, "learning_rate": 8.86053640632839e-07, "loss": 0.1905, "step": 46592 }, { "epoch": 0.8650208114420231, "grad_norm": 0.29790475964546204, "learning_rate": 8.855736526611424e-07, "loss": 0.0867, "step": 46594 }, { "epoch": 0.8650579415794418, "grad_norm": 0.49681714177131653, "learning_rate": 8.85093788709489e-07, "loss": 0.3693, "step": 46596 }, { "epoch": 0.8650950717168604, "grad_norm": 0.4203861951828003, "learning_rate": 8.846140487844046e-07, "loss": 0.462, "step": 46598 }, { "epoch": 0.8651322018542791, "grad_norm": 0.3562068045139313, "learning_rate": 8.841344328924185e-07, "loss": 0.149, "step": 46600 }, { "epoch": 0.8651693319916977, "grad_norm": 0.32756155729293823, "learning_rate": 8.836549410400608e-07, "loss": 0.326, "step": 46602 }, { "epoch": 0.8652064621291163, "grad_norm": 0.4007483422756195, "learning_rate": 8.831755732338498e-07, "loss": 0.5249, "step": 46604 }, { "epoch": 0.865243592266535, "grad_norm": 0.47130683064460754, "learning_rate": 8.826963294803126e-07, "loss": 0.3475, "step": 46606 }, { "epoch": 0.8652807224039536, "grad_norm": 0.3040938973426819, "learning_rate": 8.822172097859693e-07, "loss": 0.4274, "step": 46608 }, { "epoch": 0.8653178525413723, "grad_norm": 0.2896493375301361, "learning_rate": 8.817382141573372e-07, "loss": 0.1457, "step": 46610 }, { "epoch": 0.8653549826787909, "grad_norm": 0.455607533454895, "learning_rate": 8.812593426009364e-07, "loss": 0.2119, "step": 46612 }, { "epoch": 0.8653921128162095, "grad_norm": 0.38733503222465515, "learning_rate": 8.807805951232817e-07, "loss": 0.1935, "step": 46614 }, { "epoch": 0.8654292429536282, "grad_norm": 0.27781516313552856, "learning_rate": 8.80301971730888e-07, "loss": 0.2212, "step": 46616 }, { "epoch": 0.8654663730910468, "grad_norm": 0.5063551664352417, "learning_rate": 8.798234724302679e-07, "loss": 0.3119, "step": 46618 }, { "epoch": 0.8655035032284655, "grad_norm": 0.48605701327323914, "learning_rate": 8.793450972279338e-07, "loss": 0.2476, "step": 46620 }, { "epoch": 0.865540633365884, "grad_norm": 0.4494832754135132, "learning_rate": 8.788668461303917e-07, "loss": 0.2823, "step": 46622 }, { "epoch": 0.8655777635033027, "grad_norm": 0.39770105481147766, "learning_rate": 8.783887191441531e-07, "loss": 0.4773, "step": 46624 }, { "epoch": 0.8656148936407214, "grad_norm": 0.20229730010032654, "learning_rate": 8.779107162757194e-07, "loss": 0.2204, "step": 46626 }, { "epoch": 0.86565202377814, "grad_norm": 0.27505743503570557, "learning_rate": 8.774328375315966e-07, "loss": 0.1241, "step": 46628 }, { "epoch": 0.8656891539155587, "grad_norm": 0.21107247471809387, "learning_rate": 8.769550829182883e-07, "loss": 0.2119, "step": 46630 }, { "epoch": 0.8657262840529772, "grad_norm": 0.612760066986084, "learning_rate": 8.764774524422947e-07, "loss": 0.3248, "step": 46632 }, { "epoch": 0.8657634141903959, "grad_norm": 0.3635815382003784, "learning_rate": 8.759999461101165e-07, "loss": 0.3673, "step": 46634 }, { "epoch": 0.8658005443278145, "grad_norm": 0.2670760154724121, "learning_rate": 8.755225639282472e-07, "loss": 0.1225, "step": 46636 }, { "epoch": 0.8658376744652332, "grad_norm": 0.44253015518188477, "learning_rate": 8.750453059031849e-07, "loss": 0.3035, "step": 46638 }, { "epoch": 0.8658748046026519, "grad_norm": 0.4554644227027893, "learning_rate": 8.745681720414245e-07, "loss": 0.2483, "step": 46640 }, { "epoch": 0.8659119347400704, "grad_norm": 0.3817485570907593, "learning_rate": 8.740911623494563e-07, "loss": 0.1405, "step": 46642 }, { "epoch": 0.8659490648774891, "grad_norm": 0.37608328461647034, "learning_rate": 8.736142768337741e-07, "loss": 0.2747, "step": 46644 }, { "epoch": 0.8659861950149077, "grad_norm": 0.6206964254379272, "learning_rate": 8.731375155008659e-07, "loss": 0.3702, "step": 46646 }, { "epoch": 0.8660233251523264, "grad_norm": 0.4811398684978485, "learning_rate": 8.726608783572155e-07, "loss": 0.5638, "step": 46648 }, { "epoch": 0.8660604552897451, "grad_norm": 0.40012407302856445, "learning_rate": 8.721843654093109e-07, "loss": 0.2569, "step": 46650 }, { "epoch": 0.8660975854271636, "grad_norm": 0.6387036442756653, "learning_rate": 8.71707976663636e-07, "loss": 0.5028, "step": 46652 }, { "epoch": 0.8661347155645823, "grad_norm": 0.3891315758228302, "learning_rate": 8.712317121266733e-07, "loss": 0.1302, "step": 46654 }, { "epoch": 0.8661718457020009, "grad_norm": 0.4630424976348877, "learning_rate": 8.707555718049032e-07, "loss": 0.2573, "step": 46656 }, { "epoch": 0.8662089758394196, "grad_norm": 0.404838889837265, "learning_rate": 8.702795557048049e-07, "loss": 0.4076, "step": 46658 }, { "epoch": 0.8662461059768383, "grad_norm": 0.4439959228038788, "learning_rate": 8.698036638328567e-07, "loss": 0.2787, "step": 46660 }, { "epoch": 0.8662832361142568, "grad_norm": 0.646397590637207, "learning_rate": 8.693278961955309e-07, "loss": 0.2841, "step": 46662 }, { "epoch": 0.8663203662516755, "grad_norm": 0.48292675614356995, "learning_rate": 8.688522527993026e-07, "loss": 0.3776, "step": 46664 }, { "epoch": 0.8663574963890941, "grad_norm": 0.4071963131427765, "learning_rate": 8.683767336506466e-07, "loss": 0.4973, "step": 46666 }, { "epoch": 0.8663946265265128, "grad_norm": 0.4584599435329437, "learning_rate": 8.679013387560276e-07, "loss": 0.5439, "step": 46668 }, { "epoch": 0.8664317566639315, "grad_norm": 0.32005393505096436, "learning_rate": 8.674260681219193e-07, "loss": 0.1204, "step": 46670 }, { "epoch": 0.86646888680135, "grad_norm": 0.44103845953941345, "learning_rate": 8.669509217547877e-07, "loss": 0.3124, "step": 46672 }, { "epoch": 0.8665060169387687, "grad_norm": 0.24196313321590424, "learning_rate": 8.664758996610956e-07, "loss": 0.0286, "step": 46674 }, { "epoch": 0.8665431470761873, "grad_norm": 0.41530174016952515, "learning_rate": 8.660010018473086e-07, "loss": 0.3668, "step": 46676 }, { "epoch": 0.866580277213606, "grad_norm": 0.49675968289375305, "learning_rate": 8.655262283198895e-07, "loss": 0.1009, "step": 46678 }, { "epoch": 0.8666174073510247, "grad_norm": 0.33067792654037476, "learning_rate": 8.650515790852964e-07, "loss": 0.2376, "step": 46680 }, { "epoch": 0.8666545374884432, "grad_norm": 0.2632119655609131, "learning_rate": 8.645770541499887e-07, "loss": 0.1833, "step": 46682 }, { "epoch": 0.8666916676258619, "grad_norm": 0.4997495412826538, "learning_rate": 8.641026535204267e-07, "loss": 0.2493, "step": 46684 }, { "epoch": 0.8667287977632805, "grad_norm": 0.20244520902633667, "learning_rate": 8.636283772030596e-07, "loss": 0.303, "step": 46686 }, { "epoch": 0.8667659279006992, "grad_norm": 0.21397551894187927, "learning_rate": 8.631542252043457e-07, "loss": 0.2644, "step": 46688 }, { "epoch": 0.8668030580381177, "grad_norm": 0.5239880681037903, "learning_rate": 8.62680197530732e-07, "loss": 0.1335, "step": 46690 }, { "epoch": 0.8668401881755364, "grad_norm": 0.3104003667831421, "learning_rate": 8.622062941886732e-07, "loss": 0.339, "step": 46692 }, { "epoch": 0.8668773183129551, "grad_norm": 0.24175812304019928, "learning_rate": 8.617325151846146e-07, "loss": 0.1051, "step": 46694 }, { "epoch": 0.8669144484503737, "grad_norm": 0.3070138990879059, "learning_rate": 8.61258860525005e-07, "loss": 0.3769, "step": 46696 }, { "epoch": 0.8669515785877924, "grad_norm": 0.2731201648712158, "learning_rate": 8.607853302162894e-07, "loss": 0.0957, "step": 46698 }, { "epoch": 0.8669887087252109, "grad_norm": 0.43242397904396057, "learning_rate": 8.603119242649094e-07, "loss": 0.3822, "step": 46700 }, { "epoch": 0.8670258388626296, "grad_norm": 0.36537522077560425, "learning_rate": 8.598386426773064e-07, "loss": 0.341, "step": 46702 }, { "epoch": 0.8670629690000483, "grad_norm": 0.33908945322036743, "learning_rate": 8.593654854599231e-07, "loss": 0.3787, "step": 46704 }, { "epoch": 0.8671000991374669, "grad_norm": 0.5095226168632507, "learning_rate": 8.588924526191966e-07, "loss": 0.2708, "step": 46706 }, { "epoch": 0.8671372292748856, "grad_norm": 0.3375324606895447, "learning_rate": 8.584195441615628e-07, "loss": 0.2631, "step": 46708 }, { "epoch": 0.8671743594123041, "grad_norm": 0.30688804388046265, "learning_rate": 8.579467600934577e-07, "loss": 0.3349, "step": 46710 }, { "epoch": 0.8672114895497228, "grad_norm": 0.7809726595878601, "learning_rate": 8.574741004213117e-07, "loss": 0.1772, "step": 46712 }, { "epoch": 0.8672486196871415, "grad_norm": 0.2520394027233124, "learning_rate": 8.570015651515584e-07, "loss": 0.132, "step": 46714 }, { "epoch": 0.8672857498245601, "grad_norm": 0.2935698330402374, "learning_rate": 8.565291542906262e-07, "loss": 0.3239, "step": 46716 }, { "epoch": 0.8673228799619788, "grad_norm": 0.6291429996490479, "learning_rate": 8.560568678449455e-07, "loss": 0.208, "step": 46718 }, { "epoch": 0.8673600100993973, "grad_norm": 0.2916485369205475, "learning_rate": 8.555847058209421e-07, "loss": 0.2314, "step": 46720 }, { "epoch": 0.867397140236816, "grad_norm": 0.4152311384677887, "learning_rate": 8.551126682250411e-07, "loss": 0.2583, "step": 46722 }, { "epoch": 0.8674342703742347, "grad_norm": 0.33467012643814087, "learning_rate": 8.546407550636626e-07, "loss": 0.3899, "step": 46724 }, { "epoch": 0.8674714005116533, "grad_norm": 0.5513776540756226, "learning_rate": 8.541689663432307e-07, "loss": 0.3961, "step": 46726 }, { "epoch": 0.867508530649072, "grad_norm": 0.6702874898910522, "learning_rate": 8.536973020701644e-07, "loss": 0.4056, "step": 46728 }, { "epoch": 0.8675456607864905, "grad_norm": 0.4560350775718689, "learning_rate": 8.532257622508811e-07, "loss": 0.2708, "step": 46730 }, { "epoch": 0.8675827909239092, "grad_norm": 0.6269465684890747, "learning_rate": 8.527543468917976e-07, "loss": 0.2176, "step": 46732 }, { "epoch": 0.8676199210613279, "grad_norm": 0.2301567643880844, "learning_rate": 8.52283055999329e-07, "loss": 0.1776, "step": 46734 }, { "epoch": 0.8676570511987465, "grad_norm": 0.4391860067844391, "learning_rate": 8.518118895798866e-07, "loss": 0.2161, "step": 46736 }, { "epoch": 0.8676941813361652, "grad_norm": 0.2992490530014038, "learning_rate": 8.513408476398821e-07, "loss": 0.3297, "step": 46738 }, { "epoch": 0.8677313114735837, "grad_norm": 0.35348713397979736, "learning_rate": 8.508699301857248e-07, "loss": 0.1737, "step": 46740 }, { "epoch": 0.8677684416110024, "grad_norm": 0.42722514271736145, "learning_rate": 8.503991372238241e-07, "loss": 0.3177, "step": 46742 }, { "epoch": 0.867805571748421, "grad_norm": 0.4299079477787018, "learning_rate": 8.499284687605835e-07, "loss": 0.4918, "step": 46744 }, { "epoch": 0.8678427018858397, "grad_norm": 0.38608160614967346, "learning_rate": 8.494579248024093e-07, "loss": 0.1828, "step": 46746 }, { "epoch": 0.8678798320232584, "grad_norm": 0.4946957230567932, "learning_rate": 8.489875053557062e-07, "loss": 0.2828, "step": 46748 }, { "epoch": 0.8679169621606769, "grad_norm": 0.44987526535987854, "learning_rate": 8.485172104268702e-07, "loss": 0.3871, "step": 46750 }, { "epoch": 0.8679540922980956, "grad_norm": 0.5741338729858398, "learning_rate": 8.480470400223051e-07, "loss": 0.2577, "step": 46752 }, { "epoch": 0.8679912224355142, "grad_norm": 0.4036657512187958, "learning_rate": 8.475769941484036e-07, "loss": 0.2005, "step": 46754 }, { "epoch": 0.8680283525729329, "grad_norm": 0.404279887676239, "learning_rate": 8.47107072811566e-07, "loss": 0.205, "step": 46756 }, { "epoch": 0.8680654827103516, "grad_norm": 0.54813152551651, "learning_rate": 8.466372760181851e-07, "loss": 0.1342, "step": 46758 }, { "epoch": 0.8681026128477701, "grad_norm": 0.29971837997436523, "learning_rate": 8.461676037746547e-07, "loss": 0.1997, "step": 46760 }, { "epoch": 0.8681397429851888, "grad_norm": 0.4502348303794861, "learning_rate": 8.456980560873618e-07, "loss": 0.2192, "step": 46762 }, { "epoch": 0.8681768731226074, "grad_norm": 0.3986245393753052, "learning_rate": 8.452286329626991e-07, "loss": 0.1689, "step": 46764 }, { "epoch": 0.8682140032600261, "grad_norm": 0.33909183740615845, "learning_rate": 8.447593344070526e-07, "loss": 0.2688, "step": 46766 }, { "epoch": 0.8682511333974448, "grad_norm": 0.48738452792167664, "learning_rate": 8.442901604268084e-07, "loss": 0.3905, "step": 46768 }, { "epoch": 0.8682882635348633, "grad_norm": 0.36339446902275085, "learning_rate": 8.438211110283523e-07, "loss": 0.3349, "step": 46770 }, { "epoch": 0.868325393672282, "grad_norm": 0.21917188167572021, "learning_rate": 8.433521862180638e-07, "loss": 0.3015, "step": 46772 }, { "epoch": 0.8683625238097006, "grad_norm": 0.45472899079322815, "learning_rate": 8.428833860023255e-07, "loss": 0.2701, "step": 46774 }, { "epoch": 0.8683996539471193, "grad_norm": 0.3960273861885071, "learning_rate": 8.424147103875147e-07, "loss": 0.4122, "step": 46776 }, { "epoch": 0.868436784084538, "grad_norm": 0.3743881583213806, "learning_rate": 8.419461593800082e-07, "loss": 0.3918, "step": 46778 }, { "epoch": 0.8684739142219565, "grad_norm": 0.30909842252731323, "learning_rate": 8.414777329861845e-07, "loss": 0.5219, "step": 46780 }, { "epoch": 0.8685110443593752, "grad_norm": 0.40392014384269714, "learning_rate": 8.410094312124151e-07, "loss": 0.3892, "step": 46782 }, { "epoch": 0.8685481744967938, "grad_norm": 0.3154885768890381, "learning_rate": 8.405412540650737e-07, "loss": 0.3524, "step": 46784 }, { "epoch": 0.8685853046342125, "grad_norm": 0.8122543096542358, "learning_rate": 8.400732015505309e-07, "loss": 0.1452, "step": 46786 }, { "epoch": 0.868622434771631, "grad_norm": 0.2996312975883484, "learning_rate": 8.396052736751537e-07, "loss": 0.3771, "step": 46788 }, { "epoch": 0.8686595649090497, "grad_norm": 0.423929363489151, "learning_rate": 8.391374704453093e-07, "loss": 0.0882, "step": 46790 }, { "epoch": 0.8686966950464684, "grad_norm": 0.3788263201713562, "learning_rate": 8.386697918673659e-07, "loss": 0.1789, "step": 46792 }, { "epoch": 0.868733825183887, "grad_norm": 0.5026527047157288, "learning_rate": 8.38202237947684e-07, "loss": 0.2108, "step": 46794 }, { "epoch": 0.8687709553213057, "grad_norm": 0.5362962484359741, "learning_rate": 8.377348086926262e-07, "loss": 0.2154, "step": 46796 }, { "epoch": 0.8688080854587242, "grad_norm": 0.3448481559753418, "learning_rate": 8.372675041085565e-07, "loss": 0.2406, "step": 46798 }, { "epoch": 0.8688452155961429, "grad_norm": 0.30452924966812134, "learning_rate": 8.368003242018274e-07, "loss": 0.1886, "step": 46800 }, { "epoch": 0.8688823457335616, "grad_norm": 0.3973161280155182, "learning_rate": 8.363332689787995e-07, "loss": 0.2206, "step": 46802 }, { "epoch": 0.8689194758709802, "grad_norm": 0.34383252263069153, "learning_rate": 8.358663384458276e-07, "loss": 0.1737, "step": 46804 }, { "epoch": 0.8689566060083989, "grad_norm": 0.8220702409744263, "learning_rate": 8.353995326092645e-07, "loss": 0.4832, "step": 46806 }, { "epoch": 0.8689937361458174, "grad_norm": 0.27525028586387634, "learning_rate": 8.34932851475464e-07, "loss": 0.2589, "step": 46808 }, { "epoch": 0.8690308662832361, "grad_norm": 0.3732289671897888, "learning_rate": 8.344662950507753e-07, "loss": 0.3137, "step": 46810 }, { "epoch": 0.8690679964206548, "grad_norm": 0.3179345428943634, "learning_rate": 8.339998633415447e-07, "loss": 0.3431, "step": 46812 }, { "epoch": 0.8691051265580734, "grad_norm": 0.45786359906196594, "learning_rate": 8.335335563541225e-07, "loss": 0.257, "step": 46814 }, { "epoch": 0.869142256695492, "grad_norm": 0.32367852330207825, "learning_rate": 8.330673740948503e-07, "loss": 0.3346, "step": 46816 }, { "epoch": 0.8691793868329106, "grad_norm": 0.5150794982910156, "learning_rate": 8.326013165700731e-07, "loss": 0.4357, "step": 46818 }, { "epoch": 0.8692165169703293, "grad_norm": 0.563178300857544, "learning_rate": 8.321353837861323e-07, "loss": 0.2732, "step": 46820 }, { "epoch": 0.869253647107748, "grad_norm": 0.3412834107875824, "learning_rate": 8.316695757493676e-07, "loss": 0.2039, "step": 46822 }, { "epoch": 0.8692907772451666, "grad_norm": 0.42963916063308716, "learning_rate": 8.312038924661203e-07, "loss": 0.1618, "step": 46824 }, { "epoch": 0.8693279073825853, "grad_norm": 0.5310094952583313, "learning_rate": 8.307383339427222e-07, "loss": 0.3348, "step": 46826 }, { "epoch": 0.8693650375200038, "grad_norm": 0.4207155108451843, "learning_rate": 8.302729001855092e-07, "loss": 0.1522, "step": 46828 }, { "epoch": 0.8694021676574225, "grad_norm": 0.4837324023246765, "learning_rate": 8.298075912008164e-07, "loss": 0.2672, "step": 46830 }, { "epoch": 0.8694392977948412, "grad_norm": 0.7493934035301208, "learning_rate": 8.293424069949751e-07, "loss": 0.2496, "step": 46832 }, { "epoch": 0.8694764279322598, "grad_norm": 0.32899099588394165, "learning_rate": 8.28877347574315e-07, "loss": 0.2558, "step": 46834 }, { "epoch": 0.8695135580696784, "grad_norm": 0.25818580389022827, "learning_rate": 8.284124129451643e-07, "loss": 0.366, "step": 46836 }, { "epoch": 0.869550688207097, "grad_norm": 0.3294360339641571, "learning_rate": 8.279476031138456e-07, "loss": 0.1956, "step": 46838 }, { "epoch": 0.8695878183445157, "grad_norm": 0.3252273499965668, "learning_rate": 8.274829180866873e-07, "loss": 0.307, "step": 46840 }, { "epoch": 0.8696249484819343, "grad_norm": 0.22662734985351562, "learning_rate": 8.27018357870012e-07, "loss": 0.1231, "step": 46842 }, { "epoch": 0.869662078619353, "grad_norm": 0.46891823410987854, "learning_rate": 8.265539224701402e-07, "loss": 0.1574, "step": 46844 }, { "epoch": 0.8696992087567716, "grad_norm": 0.4036763608455658, "learning_rate": 8.260896118933914e-07, "loss": 0.279, "step": 46846 }, { "epoch": 0.8697363388941902, "grad_norm": 0.3151242136955261, "learning_rate": 8.256254261460861e-07, "loss": 0.198, "step": 46848 }, { "epoch": 0.8697734690316089, "grad_norm": 0.2610291540622711, "learning_rate": 8.25161365234537e-07, "loss": 0.1894, "step": 46850 }, { "epoch": 0.8698105991690275, "grad_norm": 0.3865082859992981, "learning_rate": 8.2469742916506e-07, "loss": 0.2579, "step": 46852 }, { "epoch": 0.8698477293064462, "grad_norm": 0.2691592276096344, "learning_rate": 8.242336179439669e-07, "loss": 0.2302, "step": 46854 }, { "epoch": 0.8698848594438648, "grad_norm": 0.34262847900390625, "learning_rate": 8.237699315775716e-07, "loss": 0.428, "step": 46856 }, { "epoch": 0.8699219895812834, "grad_norm": 0.4384131133556366, "learning_rate": 8.2330637007218e-07, "loss": 0.3155, "step": 46858 }, { "epoch": 0.8699591197187021, "grad_norm": 0.41056200861930847, "learning_rate": 8.228429334341037e-07, "loss": 0.3629, "step": 46860 }, { "epoch": 0.8699962498561207, "grad_norm": 0.5439568161964417, "learning_rate": 8.223796216696433e-07, "loss": 0.1458, "step": 46862 }, { "epoch": 0.8700333799935394, "grad_norm": 0.5890020728111267, "learning_rate": 8.219164347851071e-07, "loss": 0.1753, "step": 46864 }, { "epoch": 0.870070510130958, "grad_norm": 0.33165672421455383, "learning_rate": 8.214533727867957e-07, "loss": 0.1726, "step": 46866 }, { "epoch": 0.8701076402683766, "grad_norm": 0.47224748134613037, "learning_rate": 8.209904356810116e-07, "loss": 0.4172, "step": 46868 }, { "epoch": 0.8701447704057953, "grad_norm": 0.3229072093963623, "learning_rate": 8.205276234740534e-07, "loss": 0.1533, "step": 46870 }, { "epoch": 0.8701819005432139, "grad_norm": 0.43850961327552795, "learning_rate": 8.20064936172219e-07, "loss": 0.2847, "step": 46872 }, { "epoch": 0.8702190306806326, "grad_norm": 0.4134316146373749, "learning_rate": 8.196023737818048e-07, "loss": 0.3256, "step": 46874 }, { "epoch": 0.8702561608180512, "grad_norm": 0.41142797470092773, "learning_rate": 8.191399363091024e-07, "loss": 0.5251, "step": 46876 }, { "epoch": 0.8702932909554698, "grad_norm": 0.37773585319519043, "learning_rate": 8.186776237604066e-07, "loss": 0.4785, "step": 46878 }, { "epoch": 0.8703304210928885, "grad_norm": 0.3806646764278412, "learning_rate": 8.182154361420059e-07, "loss": 0.199, "step": 46880 }, { "epoch": 0.8703675512303071, "grad_norm": 0.40309634804725647, "learning_rate": 8.177533734601905e-07, "loss": 0.3107, "step": 46882 }, { "epoch": 0.8704046813677258, "grad_norm": 0.4841700792312622, "learning_rate": 8.172914357212481e-07, "loss": 0.3472, "step": 46884 }, { "epoch": 0.8704418115051444, "grad_norm": 0.5577847957611084, "learning_rate": 8.168296229314653e-07, "loss": 0.4672, "step": 46886 }, { "epoch": 0.870478941642563, "grad_norm": 0.3581286370754242, "learning_rate": 8.16367935097122e-07, "loss": 0.3389, "step": 46888 }, { "epoch": 0.8705160717799817, "grad_norm": 0.3973256051540375, "learning_rate": 8.159063722245042e-07, "loss": 0.2965, "step": 46890 }, { "epoch": 0.8705532019174003, "grad_norm": 0.24436356127262115, "learning_rate": 8.1544493431989e-07, "loss": 0.2871, "step": 46892 }, { "epoch": 0.870590332054819, "grad_norm": 0.4235929548740387, "learning_rate": 8.149836213895601e-07, "loss": 0.224, "step": 46894 }, { "epoch": 0.8706274621922375, "grad_norm": 0.29665058851242065, "learning_rate": 8.145224334397906e-07, "loss": 0.6374, "step": 46896 }, { "epoch": 0.8706645923296562, "grad_norm": 0.28437289595603943, "learning_rate": 8.140613704768597e-07, "loss": 0.1848, "step": 46898 }, { "epoch": 0.8707017224670749, "grad_norm": 0.34889310598373413, "learning_rate": 8.136004325070368e-07, "loss": 0.2822, "step": 46900 }, { "epoch": 0.8707388526044935, "grad_norm": 0.3243907690048218, "learning_rate": 8.131396195365948e-07, "loss": 0.3827, "step": 46902 }, { "epoch": 0.8707759827419121, "grad_norm": 0.34206849336624146, "learning_rate": 8.126789315718042e-07, "loss": 0.3107, "step": 46904 }, { "epoch": 0.8708131128793307, "grad_norm": 0.347871333360672, "learning_rate": 8.122183686189345e-07, "loss": 0.1952, "step": 46906 }, { "epoch": 0.8708502430167494, "grad_norm": 0.4802897274494171, "learning_rate": 8.117579306842527e-07, "loss": 0.2035, "step": 46908 }, { "epoch": 0.8708873731541681, "grad_norm": 0.4295545816421509, "learning_rate": 8.112976177740228e-07, "loss": 0.2716, "step": 46910 }, { "epoch": 0.8709245032915867, "grad_norm": 0.33927562832832336, "learning_rate": 8.10837429894511e-07, "loss": 0.3697, "step": 46912 }, { "epoch": 0.8709616334290053, "grad_norm": 0.4792118966579437, "learning_rate": 8.103773670519755e-07, "loss": 0.33, "step": 46914 }, { "epoch": 0.8709987635664239, "grad_norm": 0.3199096918106079, "learning_rate": 8.099174292526768e-07, "loss": 0.1472, "step": 46916 }, { "epoch": 0.8710358937038426, "grad_norm": 0.44600316882133484, "learning_rate": 8.094576165028756e-07, "loss": 0.3872, "step": 46918 }, { "epoch": 0.8710730238412613, "grad_norm": 0.41273948550224304, "learning_rate": 8.08997928808829e-07, "loss": 0.2859, "step": 46920 }, { "epoch": 0.8711101539786799, "grad_norm": 0.5141773223876953, "learning_rate": 8.085383661767887e-07, "loss": 0.1894, "step": 46922 }, { "epoch": 0.8711472841160985, "grad_norm": 0.33083102107048035, "learning_rate": 8.080789286130108e-07, "loss": 0.4187, "step": 46924 }, { "epoch": 0.8711844142535171, "grad_norm": 0.42429351806640625, "learning_rate": 8.076196161237437e-07, "loss": 0.1417, "step": 46926 }, { "epoch": 0.8712215443909358, "grad_norm": 0.33410561084747314, "learning_rate": 8.071604287152401e-07, "loss": 0.3883, "step": 46928 }, { "epoch": 0.8712586745283545, "grad_norm": 0.39677125215530396, "learning_rate": 8.067013663937473e-07, "loss": 0.4148, "step": 46930 }, { "epoch": 0.871295804665773, "grad_norm": 0.4086270332336426, "learning_rate": 8.062424291655114e-07, "loss": 0.3543, "step": 46932 }, { "epoch": 0.8713329348031917, "grad_norm": 0.3252888023853302, "learning_rate": 8.057836170367772e-07, "loss": 0.3542, "step": 46934 }, { "epoch": 0.8713700649406103, "grad_norm": 0.14655110239982605, "learning_rate": 8.0532493001379e-07, "loss": 0.1796, "step": 46936 }, { "epoch": 0.871407195078029, "grad_norm": 0.3896908164024353, "learning_rate": 8.04866368102788e-07, "loss": 0.22, "step": 46938 }, { "epoch": 0.8714443252154476, "grad_norm": 0.4422926604747772, "learning_rate": 8.044079313100117e-07, "loss": 0.4266, "step": 46940 }, { "epoch": 0.8714814553528663, "grad_norm": 0.28147879242897034, "learning_rate": 8.039496196417018e-07, "loss": 0.201, "step": 46942 }, { "epoch": 0.8715185854902849, "grad_norm": 0.5568515062332153, "learning_rate": 8.034914331040899e-07, "loss": 0.3268, "step": 46944 }, { "epoch": 0.8715557156277035, "grad_norm": 0.9151408672332764, "learning_rate": 8.030333717034133e-07, "loss": 0.2655, "step": 46946 }, { "epoch": 0.8715928457651222, "grad_norm": 0.39348793029785156, "learning_rate": 8.025754354459036e-07, "loss": 0.2175, "step": 46948 }, { "epoch": 0.8716299759025408, "grad_norm": 0.7325469255447388, "learning_rate": 8.021176243377948e-07, "loss": 0.3379, "step": 46950 }, { "epoch": 0.8716671060399594, "grad_norm": 0.24807314574718475, "learning_rate": 8.01659938385313e-07, "loss": 0.1573, "step": 46952 }, { "epoch": 0.8717042361773781, "grad_norm": 0.3901421129703522, "learning_rate": 8.012023775946875e-07, "loss": 0.4307, "step": 46954 }, { "epoch": 0.8717413663147967, "grad_norm": 0.3493906855583191, "learning_rate": 8.007449419721436e-07, "loss": 0.3036, "step": 46956 }, { "epoch": 0.8717784964522154, "grad_norm": 0.46491196751594543, "learning_rate": 8.002876315239061e-07, "loss": 0.187, "step": 46958 }, { "epoch": 0.871815626589634, "grad_norm": 0.2769257128238678, "learning_rate": 7.998304462561989e-07, "loss": 0.2518, "step": 46960 }, { "epoch": 0.8718527567270526, "grad_norm": 0.5014168620109558, "learning_rate": 7.993733861752407e-07, "loss": 0.2515, "step": 46962 }, { "epoch": 0.8718898868644713, "grad_norm": 0.46553605794906616, "learning_rate": 7.989164512872527e-07, "loss": 0.217, "step": 46964 }, { "epoch": 0.8719270170018899, "grad_norm": 0.30732080340385437, "learning_rate": 7.984596415984491e-07, "loss": 0.2431, "step": 46966 }, { "epoch": 0.8719641471393086, "grad_norm": 0.43322813510894775, "learning_rate": 7.980029571150494e-07, "loss": 0.2653, "step": 46968 }, { "epoch": 0.8720012772767272, "grad_norm": 0.9851709604263306, "learning_rate": 7.975463978432652e-07, "loss": 0.3684, "step": 46970 }, { "epoch": 0.8720384074141458, "grad_norm": 0.41028451919555664, "learning_rate": 7.970899637893104e-07, "loss": 0.2304, "step": 46972 }, { "epoch": 0.8720755375515645, "grad_norm": 0.40107613801956177, "learning_rate": 7.966336549593967e-07, "loss": 0.1848, "step": 46974 }, { "epoch": 0.8721126676889831, "grad_norm": 0.21391628682613373, "learning_rate": 7.961774713597304e-07, "loss": 0.2186, "step": 46976 }, { "epoch": 0.8721497978264018, "grad_norm": 0.3562232255935669, "learning_rate": 7.957214129965207e-07, "loss": 0.2498, "step": 46978 }, { "epoch": 0.8721869279638204, "grad_norm": 0.3141767382621765, "learning_rate": 7.952654798759718e-07, "loss": 0.4085, "step": 46980 }, { "epoch": 0.872224058101239, "grad_norm": 0.5665150284767151, "learning_rate": 7.948096720042897e-07, "loss": 0.3, "step": 46982 }, { "epoch": 0.8722611882386577, "grad_norm": 0.4706290364265442, "learning_rate": 7.94353989387674e-07, "loss": 0.2393, "step": 46984 }, { "epoch": 0.8722983183760763, "grad_norm": 0.42412692308425903, "learning_rate": 7.938984320323262e-07, "loss": 0.4413, "step": 46986 }, { "epoch": 0.872335448513495, "grad_norm": 0.2788406312465668, "learning_rate": 7.93442999944447e-07, "loss": 0.2225, "step": 46988 }, { "epoch": 0.8723725786509136, "grad_norm": 0.5291059613227844, "learning_rate": 7.929876931302305e-07, "loss": 0.1665, "step": 46990 }, { "epoch": 0.8724097087883322, "grad_norm": 0.3849498927593231, "learning_rate": 7.925325115958727e-07, "loss": 0.1583, "step": 46992 }, { "epoch": 0.8724468389257508, "grad_norm": 0.4301437437534332, "learning_rate": 7.920774553475685e-07, "loss": 0.3543, "step": 46994 }, { "epoch": 0.8724839690631695, "grad_norm": 0.3828851580619812, "learning_rate": 7.916225243915087e-07, "loss": 0.1856, "step": 46996 }, { "epoch": 0.8725210992005882, "grad_norm": 0.36980459094047546, "learning_rate": 7.91167718733884e-07, "loss": 0.2612, "step": 46998 }, { "epoch": 0.8725582293380068, "grad_norm": 0.31478601694107056, "learning_rate": 7.907130383808847e-07, "loss": 0.2121, "step": 47000 }, { "epoch": 0.8725953594754254, "grad_norm": 0.29716619849205017, "learning_rate": 7.902584833386939e-07, "loss": 0.3253, "step": 47002 }, { "epoch": 0.872632489612844, "grad_norm": 0.3973672091960907, "learning_rate": 7.898040536134999e-07, "loss": 0.4157, "step": 47004 }, { "epoch": 0.8726696197502627, "grad_norm": 0.3406217694282532, "learning_rate": 7.893497492114854e-07, "loss": 0.2602, "step": 47006 }, { "epoch": 0.8727067498876814, "grad_norm": 0.40049558877944946, "learning_rate": 7.888955701388312e-07, "loss": 0.3306, "step": 47008 }, { "epoch": 0.8727438800251, "grad_norm": 0.44139474630355835, "learning_rate": 7.884415164017167e-07, "loss": 0.1273, "step": 47010 }, { "epoch": 0.8727810101625186, "grad_norm": 0.4285643696784973, "learning_rate": 7.879875880063237e-07, "loss": 0.2803, "step": 47012 }, { "epoch": 0.8728181402999372, "grad_norm": 0.4406658411026001, "learning_rate": 7.875337849588249e-07, "loss": 0.1408, "step": 47014 }, { "epoch": 0.8728552704373559, "grad_norm": 0.36838412284851074, "learning_rate": 7.870801072653966e-07, "loss": 0.145, "step": 47016 }, { "epoch": 0.8728924005747746, "grad_norm": 0.8065474033355713, "learning_rate": 7.866265549322127e-07, "loss": 0.2099, "step": 47018 }, { "epoch": 0.8729295307121931, "grad_norm": 0.3564400374889374, "learning_rate": 7.861731279654428e-07, "loss": 0.3134, "step": 47020 }, { "epoch": 0.8729666608496118, "grad_norm": 0.30831846594810486, "learning_rate": 7.857198263712595e-07, "loss": 0.2754, "step": 47022 }, { "epoch": 0.8730037909870304, "grad_norm": 0.4285091161727905, "learning_rate": 7.852666501558304e-07, "loss": 0.1815, "step": 47024 }, { "epoch": 0.8730409211244491, "grad_norm": 0.294721782207489, "learning_rate": 7.848135993253192e-07, "loss": 0.1206, "step": 47026 }, { "epoch": 0.8730780512618678, "grad_norm": 0.4550662636756897, "learning_rate": 7.843606738858934e-07, "loss": 0.3837, "step": 47028 }, { "epoch": 0.8731151813992863, "grad_norm": 0.37050822377204895, "learning_rate": 7.839078738437133e-07, "loss": 0.1648, "step": 47030 }, { "epoch": 0.873152311536705, "grad_norm": 0.45092859864234924, "learning_rate": 7.834551992049422e-07, "loss": 0.3774, "step": 47032 }, { "epoch": 0.8731894416741236, "grad_norm": 0.30006077885627747, "learning_rate": 7.830026499757393e-07, "loss": 0.2258, "step": 47034 }, { "epoch": 0.8732265718115423, "grad_norm": 0.5308423638343811, "learning_rate": 7.825502261622608e-07, "loss": 0.0827, "step": 47036 }, { "epoch": 0.8732637019489609, "grad_norm": 0.5286074876785278, "learning_rate": 7.820979277706675e-07, "loss": 0.2868, "step": 47038 }, { "epoch": 0.8733008320863795, "grad_norm": 0.177654430270195, "learning_rate": 7.816457548071088e-07, "loss": 0.177, "step": 47040 }, { "epoch": 0.8733379622237982, "grad_norm": 0.38603055477142334, "learning_rate": 7.811937072777387e-07, "loss": 0.253, "step": 47042 }, { "epoch": 0.8733750923612168, "grad_norm": 0.250284880399704, "learning_rate": 7.807417851887078e-07, "loss": 0.3812, "step": 47044 }, { "epoch": 0.8734122224986355, "grad_norm": 0.16648025810718536, "learning_rate": 7.80289988546169e-07, "loss": 0.3247, "step": 47046 }, { "epoch": 0.873449352636054, "grad_norm": 0.6877380013465881, "learning_rate": 7.798383173562663e-07, "loss": 0.2944, "step": 47048 }, { "epoch": 0.8734864827734727, "grad_norm": 0.3495990037918091, "learning_rate": 7.793867716251468e-07, "loss": 0.1867, "step": 47050 }, { "epoch": 0.8735236129108914, "grad_norm": 0.4121159315109253, "learning_rate": 7.789353513589537e-07, "loss": 0.4015, "step": 47052 }, { "epoch": 0.87356074304831, "grad_norm": 0.31774282455444336, "learning_rate": 7.784840565638296e-07, "loss": 0.3181, "step": 47054 }, { "epoch": 0.8735978731857287, "grad_norm": 0.5005460381507874, "learning_rate": 7.780328872459164e-07, "loss": 0.3186, "step": 47056 }, { "epoch": 0.8736350033231473, "grad_norm": 0.4335612654685974, "learning_rate": 7.775818434113514e-07, "loss": 0.3512, "step": 47058 }, { "epoch": 0.8736721334605659, "grad_norm": 0.5132916569709778, "learning_rate": 7.77130925066274e-07, "loss": 0.3421, "step": 47060 }, { "epoch": 0.8737092635979846, "grad_norm": 0.5163152813911438, "learning_rate": 7.766801322168216e-07, "loss": 0.3677, "step": 47062 }, { "epoch": 0.8737463937354032, "grad_norm": 0.37245506048202515, "learning_rate": 7.762294648691226e-07, "loss": 0.3194, "step": 47064 }, { "epoch": 0.8737835238728219, "grad_norm": 0.34529978036880493, "learning_rate": 7.757789230293122e-07, "loss": 0.3476, "step": 47066 }, { "epoch": 0.8738206540102404, "grad_norm": 0.4306011199951172, "learning_rate": 7.753285067035232e-07, "loss": 0.1598, "step": 47068 }, { "epoch": 0.8738577841476591, "grad_norm": 0.5175257325172424, "learning_rate": 7.748782158978807e-07, "loss": 0.2032, "step": 47070 }, { "epoch": 0.8738949142850778, "grad_norm": 0.35490211844444275, "learning_rate": 7.744280506185131e-07, "loss": 0.3039, "step": 47072 }, { "epoch": 0.8739320444224964, "grad_norm": 0.25291216373443604, "learning_rate": 7.739780108715455e-07, "loss": 0.2381, "step": 47074 }, { "epoch": 0.8739691745599151, "grad_norm": 0.3430781662464142, "learning_rate": 7.735280966631032e-07, "loss": 0.3225, "step": 47076 }, { "epoch": 0.8740063046973336, "grad_norm": 0.23195859789848328, "learning_rate": 7.730783079993065e-07, "loss": 0.1756, "step": 47078 }, { "epoch": 0.8740434348347523, "grad_norm": 0.5487247705459595, "learning_rate": 7.726286448862752e-07, "loss": 0.2531, "step": 47080 }, { "epoch": 0.874080564972171, "grad_norm": 0.43528392910957336, "learning_rate": 7.721791073301299e-07, "loss": 0.3473, "step": 47082 }, { "epoch": 0.8741176951095896, "grad_norm": 0.3767924904823303, "learning_rate": 7.717296953369857e-07, "loss": 0.1429, "step": 47084 }, { "epoch": 0.8741548252470083, "grad_norm": 0.36049363017082214, "learning_rate": 7.712804089129589e-07, "loss": 0.2052, "step": 47086 }, { "epoch": 0.8741919553844268, "grad_norm": 0.17538847029209137, "learning_rate": 7.708312480641633e-07, "loss": 0.156, "step": 47088 }, { "epoch": 0.8742290855218455, "grad_norm": 0.4045586585998535, "learning_rate": 7.703822127967108e-07, "loss": 0.2576, "step": 47090 }, { "epoch": 0.8742662156592641, "grad_norm": 0.5529013276100159, "learning_rate": 7.699333031167089e-07, "loss": 0.3724, "step": 47092 }, { "epoch": 0.8743033457966828, "grad_norm": 0.5965579748153687, "learning_rate": 7.694845190302669e-07, "loss": 0.2175, "step": 47094 }, { "epoch": 0.8743404759341015, "grad_norm": 0.683628261089325, "learning_rate": 7.690358605434923e-07, "loss": 0.1316, "step": 47096 }, { "epoch": 0.87437760607152, "grad_norm": 0.3826424479484558, "learning_rate": 7.685873276624888e-07, "loss": 0.1161, "step": 47098 }, { "epoch": 0.8744147362089387, "grad_norm": 0.586037278175354, "learning_rate": 7.681389203933631e-07, "loss": 0.1938, "step": 47100 }, { "epoch": 0.8744518663463573, "grad_norm": 0.5722419023513794, "learning_rate": 7.676906387422123e-07, "loss": 0.3818, "step": 47102 }, { "epoch": 0.874488996483776, "grad_norm": 0.2506225109100342, "learning_rate": 7.672424827151381e-07, "loss": 0.3142, "step": 47104 }, { "epoch": 0.8745261266211947, "grad_norm": 0.3636063039302826, "learning_rate": 7.66794452318238e-07, "loss": 0.1996, "step": 47106 }, { "epoch": 0.8745632567586132, "grad_norm": 0.3397851288318634, "learning_rate": 7.663465475576093e-07, "loss": 0.2193, "step": 47108 }, { "epoch": 0.8746003868960319, "grad_norm": 0.3968251943588257, "learning_rate": 7.658987684393471e-07, "loss": 0.4408, "step": 47110 }, { "epoch": 0.8746375170334505, "grad_norm": 0.4553351104259491, "learning_rate": 7.65451114969542e-07, "loss": 0.1583, "step": 47112 }, { "epoch": 0.8746746471708692, "grad_norm": 0.3876280188560486, "learning_rate": 7.650035871542883e-07, "loss": 0.2165, "step": 47114 }, { "epoch": 0.8747117773082879, "grad_norm": 0.587955892086029, "learning_rate": 7.645561849996719e-07, "loss": 0.1299, "step": 47116 }, { "epoch": 0.8747489074457064, "grad_norm": 0.48590391874313354, "learning_rate": 7.641089085117825e-07, "loss": 0.1491, "step": 47118 }, { "epoch": 0.8747860375831251, "grad_norm": 0.6574304103851318, "learning_rate": 7.636617576967065e-07, "loss": 0.1806, "step": 47120 }, { "epoch": 0.8748231677205437, "grad_norm": 0.34984254837036133, "learning_rate": 7.632147325605277e-07, "loss": 0.3646, "step": 47122 }, { "epoch": 0.8748602978579624, "grad_norm": 0.2127975970506668, "learning_rate": 7.627678331093291e-07, "loss": 0.1612, "step": 47124 }, { "epoch": 0.8748974279953811, "grad_norm": 0.5778454542160034, "learning_rate": 7.623210593491936e-07, "loss": 0.3123, "step": 47126 }, { "epoch": 0.8749345581327996, "grad_norm": 0.3468020260334015, "learning_rate": 7.618744112861954e-07, "loss": 0.4669, "step": 47128 }, { "epoch": 0.8749716882702183, "grad_norm": 0.3819958567619324, "learning_rate": 7.61427888926416e-07, "loss": 0.2631, "step": 47130 }, { "epoch": 0.8750088184076369, "grad_norm": 0.32784876227378845, "learning_rate": 7.609814922759318e-07, "loss": 0.2944, "step": 47132 }, { "epoch": 0.8750459485450556, "grad_norm": 0.4688751995563507, "learning_rate": 7.605352213408145e-07, "loss": 0.2224, "step": 47134 }, { "epoch": 0.8750830786824743, "grad_norm": 0.2833612859249115, "learning_rate": 7.600890761271363e-07, "loss": 0.2655, "step": 47136 }, { "epoch": 0.8751202088198928, "grad_norm": 0.5316777229309082, "learning_rate": 7.596430566409719e-07, "loss": 0.1875, "step": 47138 }, { "epoch": 0.8751573389573115, "grad_norm": 0.2816583216190338, "learning_rate": 7.591971628883843e-07, "loss": 0.1647, "step": 47140 }, { "epoch": 0.8751944690947301, "grad_norm": 0.3718714416027069, "learning_rate": 7.587513948754455e-07, "loss": 0.3959, "step": 47142 }, { "epoch": 0.8752315992321488, "grad_norm": 0.35715925693511963, "learning_rate": 7.583057526082183e-07, "loss": 0.1979, "step": 47144 }, { "epoch": 0.8752687293695673, "grad_norm": 0.4522768259048462, "learning_rate": 7.578602360927678e-07, "loss": 0.4378, "step": 47146 }, { "epoch": 0.875305859506986, "grad_norm": 0.19632217288017273, "learning_rate": 7.574148453351571e-07, "loss": 0.2197, "step": 47148 }, { "epoch": 0.8753429896444047, "grad_norm": 0.8570135235786438, "learning_rate": 7.569695803414456e-07, "loss": 0.241, "step": 47150 }, { "epoch": 0.8753801197818233, "grad_norm": 0.24787214398384094, "learning_rate": 7.56524441117692e-07, "loss": 0.1696, "step": 47152 }, { "epoch": 0.875417249919242, "grad_norm": 0.5230529308319092, "learning_rate": 7.560794276699545e-07, "loss": 0.3421, "step": 47154 }, { "epoch": 0.8754543800566605, "grad_norm": 0.38280364871025085, "learning_rate": 7.556345400042853e-07, "loss": 0.1687, "step": 47156 }, { "epoch": 0.8754915101940792, "grad_norm": 0.38877183198928833, "learning_rate": 7.551897781267392e-07, "loss": 0.1554, "step": 47158 }, { "epoch": 0.8755286403314979, "grad_norm": 0.34239476919174194, "learning_rate": 7.547451420433704e-07, "loss": 0.3574, "step": 47160 }, { "epoch": 0.8755657704689165, "grad_norm": 0.3600442111492157, "learning_rate": 7.543006317602263e-07, "loss": 0.1388, "step": 47162 }, { "epoch": 0.8756029006063352, "grad_norm": 0.4931805729866028, "learning_rate": 7.538562472833588e-07, "loss": 0.1724, "step": 47164 }, { "epoch": 0.8756400307437537, "grad_norm": 0.712645947933197, "learning_rate": 7.534119886188108e-07, "loss": 0.1219, "step": 47166 }, { "epoch": 0.8756771608811724, "grad_norm": 0.2916858196258545, "learning_rate": 7.529678557726283e-07, "loss": 0.2207, "step": 47168 }, { "epoch": 0.8757142910185911, "grad_norm": 0.41966116428375244, "learning_rate": 7.525238487508557e-07, "loss": 0.3076, "step": 47170 }, { "epoch": 0.8757514211560097, "grad_norm": 0.3707370162010193, "learning_rate": 7.520799675595336e-07, "loss": 0.3253, "step": 47172 }, { "epoch": 0.8757885512934284, "grad_norm": 0.6048619747161865, "learning_rate": 7.516362122047038e-07, "loss": 0.2772, "step": 47174 }, { "epoch": 0.8758256814308469, "grad_norm": 0.3265807330608368, "learning_rate": 7.511925826924038e-07, "loss": 0.1958, "step": 47176 }, { "epoch": 0.8758628115682656, "grad_norm": 0.45436006784439087, "learning_rate": 7.507490790286675e-07, "loss": 0.2952, "step": 47178 }, { "epoch": 0.8758999417056843, "grad_norm": 0.4619840085506439, "learning_rate": 7.503057012195325e-07, "loss": 0.3443, "step": 47180 }, { "epoch": 0.8759370718431029, "grad_norm": 0.5944494605064392, "learning_rate": 7.498624492710293e-07, "loss": 0.1944, "step": 47182 }, { "epoch": 0.8759742019805216, "grad_norm": 0.4579395651817322, "learning_rate": 7.494193231891922e-07, "loss": 0.1944, "step": 47184 }, { "epoch": 0.8760113321179401, "grad_norm": 0.37032416462898254, "learning_rate": 7.489763229800484e-07, "loss": 0.3257, "step": 47186 }, { "epoch": 0.8760484622553588, "grad_norm": 0.5908249616622925, "learning_rate": 7.485334486496287e-07, "loss": 0.3229, "step": 47188 }, { "epoch": 0.8760855923927774, "grad_norm": 0.49008986353874207, "learning_rate": 7.480907002039561e-07, "loss": 0.2797, "step": 47190 }, { "epoch": 0.8761227225301961, "grad_norm": 0.44043025374412537, "learning_rate": 7.476480776490558e-07, "loss": 0.4102, "step": 47192 }, { "epoch": 0.8761598526676148, "grad_norm": 0.5974876284599304, "learning_rate": 7.472055809909517e-07, "loss": 0.3626, "step": 47194 }, { "epoch": 0.8761969828050333, "grad_norm": 0.44135090708732605, "learning_rate": 7.467632102356659e-07, "loss": 0.3204, "step": 47196 }, { "epoch": 0.876234112942452, "grad_norm": 0.5945812463760376, "learning_rate": 7.463209653892134e-07, "loss": 0.2041, "step": 47198 }, { "epoch": 0.8762712430798706, "grad_norm": 0.38129401206970215, "learning_rate": 7.458788464576161e-07, "loss": 0.3784, "step": 47200 }, { "epoch": 0.8763083732172893, "grad_norm": 0.230791836977005, "learning_rate": 7.454368534468892e-07, "loss": 0.106, "step": 47202 }, { "epoch": 0.876345503354708, "grad_norm": 0.5226245522499084, "learning_rate": 7.449949863630446e-07, "loss": 0.1858, "step": 47204 }, { "epoch": 0.8763826334921265, "grad_norm": 0.4472498297691345, "learning_rate": 7.445532452120963e-07, "loss": 0.2524, "step": 47206 }, { "epoch": 0.8764197636295452, "grad_norm": 0.3954940140247345, "learning_rate": 7.44111630000055e-07, "loss": 0.2839, "step": 47208 }, { "epoch": 0.8764568937669638, "grad_norm": 0.46032893657684326, "learning_rate": 7.436701407329305e-07, "loss": 0.207, "step": 47210 }, { "epoch": 0.8764940239043825, "grad_norm": 0.36054563522338867, "learning_rate": 7.43228777416729e-07, "loss": 0.178, "step": 47212 }, { "epoch": 0.8765311540418012, "grad_norm": 0.3760378658771515, "learning_rate": 7.42787540057458e-07, "loss": 0.3141, "step": 47214 }, { "epoch": 0.8765682841792197, "grad_norm": 0.39963260293006897, "learning_rate": 7.423464286611181e-07, "loss": 0.1283, "step": 47216 }, { "epoch": 0.8766054143166384, "grad_norm": 0.4356691241264343, "learning_rate": 7.419054432337159e-07, "loss": 0.2503, "step": 47218 }, { "epoch": 0.876642544454057, "grad_norm": 0.2949202358722687, "learning_rate": 7.414645837812473e-07, "loss": 0.3541, "step": 47220 }, { "epoch": 0.8766796745914757, "grad_norm": 0.4196102023124695, "learning_rate": 7.410238503097134e-07, "loss": 0.2764, "step": 47222 }, { "epoch": 0.8767168047288943, "grad_norm": 0.28274640440940857, "learning_rate": 7.405832428251115e-07, "loss": 0.1035, "step": 47224 }, { "epoch": 0.8767539348663129, "grad_norm": 0.3193435072898865, "learning_rate": 7.401427613334377e-07, "loss": 0.3406, "step": 47226 }, { "epoch": 0.8767910650037316, "grad_norm": 0.3037477731704712, "learning_rate": 7.397024058406821e-07, "loss": 0.2486, "step": 47228 }, { "epoch": 0.8768281951411502, "grad_norm": 0.20076969265937805, "learning_rate": 7.392621763528396e-07, "loss": 0.2279, "step": 47230 }, { "epoch": 0.8768653252785689, "grad_norm": 0.3188711404800415, "learning_rate": 7.388220728758999e-07, "loss": 0.3365, "step": 47232 }, { "epoch": 0.8769024554159875, "grad_norm": 0.42716357111930847, "learning_rate": 7.383820954158516e-07, "loss": 0.4188, "step": 47234 }, { "epoch": 0.8769395855534061, "grad_norm": 0.836982011795044, "learning_rate": 7.379422439786821e-07, "loss": 0.2588, "step": 47236 }, { "epoch": 0.8769767156908248, "grad_norm": 0.5173723101615906, "learning_rate": 7.375025185703744e-07, "loss": 0.2435, "step": 47238 }, { "epoch": 0.8770138458282434, "grad_norm": 0.29863041639328003, "learning_rate": 7.370629191969148e-07, "loss": 0.4831, "step": 47240 }, { "epoch": 0.8770509759656621, "grad_norm": 0.35990795493125916, "learning_rate": 7.366234458642806e-07, "loss": 0.3187, "step": 47242 }, { "epoch": 0.8770881061030806, "grad_norm": 0.4894818961620331, "learning_rate": 7.361840985784552e-07, "loss": 0.3501, "step": 47244 }, { "epoch": 0.8771252362404993, "grad_norm": 0.38101810216903687, "learning_rate": 7.357448773454156e-07, "loss": 0.1858, "step": 47246 }, { "epoch": 0.877162366377918, "grad_norm": 0.3442534804344177, "learning_rate": 7.353057821711384e-07, "loss": 0.2225, "step": 47248 }, { "epoch": 0.8771994965153366, "grad_norm": 0.3877589702606201, "learning_rate": 7.348668130615988e-07, "loss": 0.2142, "step": 47250 }, { "epoch": 0.8772366266527553, "grad_norm": 0.3409903049468994, "learning_rate": 7.344279700227708e-07, "loss": 0.2784, "step": 47252 }, { "epoch": 0.8772737567901738, "grad_norm": 0.41828882694244385, "learning_rate": 7.33989253060623e-07, "loss": 0.402, "step": 47254 }, { "epoch": 0.8773108869275925, "grad_norm": 0.3038763105869293, "learning_rate": 7.335506621811272e-07, "loss": 0.3185, "step": 47256 }, { "epoch": 0.8773480170650112, "grad_norm": 0.40076905488967896, "learning_rate": 7.331121973902522e-07, "loss": 0.3043, "step": 47258 }, { "epoch": 0.8773851472024298, "grad_norm": 0.4108754098415375, "learning_rate": 7.326738586939608e-07, "loss": 0.1978, "step": 47260 }, { "epoch": 0.8774222773398485, "grad_norm": 0.30718308687210083, "learning_rate": 7.322356460982183e-07, "loss": 0.3155, "step": 47262 }, { "epoch": 0.877459407477267, "grad_norm": 0.5411142706871033, "learning_rate": 7.31797559608991e-07, "loss": 0.2352, "step": 47264 }, { "epoch": 0.8774965376146857, "grad_norm": 0.43927431106567383, "learning_rate": 7.313595992322364e-07, "loss": 0.231, "step": 47266 }, { "epoch": 0.8775336677521044, "grad_norm": 0.5780447125434875, "learning_rate": 7.309217649739142e-07, "loss": 0.21, "step": 47268 }, { "epoch": 0.877570797889523, "grad_norm": 0.4709116220474243, "learning_rate": 7.304840568399829e-07, "loss": 0.1916, "step": 47270 }, { "epoch": 0.8776079280269417, "grad_norm": 0.3406688868999481, "learning_rate": 7.300464748363978e-07, "loss": 0.314, "step": 47272 }, { "epoch": 0.8776450581643602, "grad_norm": 0.5913991928100586, "learning_rate": 7.296090189691141e-07, "loss": 0.3777, "step": 47274 }, { "epoch": 0.8776821883017789, "grad_norm": 0.519207775592804, "learning_rate": 7.291716892440848e-07, "loss": 0.3022, "step": 47276 }, { "epoch": 0.8777193184391976, "grad_norm": 0.5524421334266663, "learning_rate": 7.287344856672574e-07, "loss": 0.3045, "step": 47278 }, { "epoch": 0.8777564485766162, "grad_norm": 0.6641913652420044, "learning_rate": 7.282974082445837e-07, "loss": 0.4042, "step": 47280 }, { "epoch": 0.8777935787140349, "grad_norm": 0.3197381794452667, "learning_rate": 7.278604569820113e-07, "loss": 0.398, "step": 47282 }, { "epoch": 0.8778307088514534, "grad_norm": 0.5199897885322571, "learning_rate": 7.274236318854844e-07, "loss": 0.385, "step": 47284 }, { "epoch": 0.8778678389888721, "grad_norm": 0.49087437987327576, "learning_rate": 7.269869329609458e-07, "loss": 0.2597, "step": 47286 }, { "epoch": 0.8779049691262908, "grad_norm": 0.45158281922340393, "learning_rate": 7.265503602143398e-07, "loss": 0.32, "step": 47288 }, { "epoch": 0.8779420992637094, "grad_norm": 0.36116576194763184, "learning_rate": 7.261139136516082e-07, "loss": 0.3255, "step": 47290 }, { "epoch": 0.877979229401128, "grad_norm": 0.4417078197002411, "learning_rate": 7.256775932786853e-07, "loss": 0.2966, "step": 47292 }, { "epoch": 0.8780163595385466, "grad_norm": 0.29719746112823486, "learning_rate": 7.252413991015117e-07, "loss": 0.2038, "step": 47294 }, { "epoch": 0.8780534896759653, "grad_norm": 0.3423810303211212, "learning_rate": 7.248053311260206e-07, "loss": 0.1581, "step": 47296 }, { "epoch": 0.8780906198133839, "grad_norm": 3.120774030685425, "learning_rate": 7.243693893581461e-07, "loss": 0.282, "step": 47298 }, { "epoch": 0.8781277499508026, "grad_norm": 0.22917719185352325, "learning_rate": 7.239335738038234e-07, "loss": 0.3139, "step": 47300 }, { "epoch": 0.8781648800882212, "grad_norm": 0.3209822177886963, "learning_rate": 7.234978844689778e-07, "loss": 0.29, "step": 47302 }, { "epoch": 0.8782020102256398, "grad_norm": 0.26659253239631653, "learning_rate": 7.2306232135954e-07, "loss": 0.1453, "step": 47304 }, { "epoch": 0.8782391403630585, "grad_norm": 0.5321511030197144, "learning_rate": 7.226268844814366e-07, "loss": 0.3141, "step": 47306 }, { "epoch": 0.8782762705004771, "grad_norm": 0.324145644903183, "learning_rate": 7.221915738405905e-07, "loss": 0.231, "step": 47308 }, { "epoch": 0.8783134006378958, "grad_norm": 0.24725677073001862, "learning_rate": 7.21756389442928e-07, "loss": 0.2123, "step": 47310 }, { "epoch": 0.8783505307753144, "grad_norm": 0.21191512048244476, "learning_rate": 7.21321331294369e-07, "loss": 0.2239, "step": 47312 }, { "epoch": 0.878387660912733, "grad_norm": 0.2750058174133301, "learning_rate": 7.208863994008364e-07, "loss": 0.217, "step": 47314 }, { "epoch": 0.8784247910501517, "grad_norm": 0.2188194990158081, "learning_rate": 7.204515937682433e-07, "loss": 0.1569, "step": 47316 }, { "epoch": 0.8784619211875703, "grad_norm": 0.4977736473083496, "learning_rate": 7.200169144025082e-07, "loss": 0.2384, "step": 47318 }, { "epoch": 0.878499051324989, "grad_norm": 0.40972423553466797, "learning_rate": 7.195823613095465e-07, "loss": 0.2436, "step": 47320 }, { "epoch": 0.8785361814624076, "grad_norm": 0.461217999458313, "learning_rate": 7.191479344952723e-07, "loss": 0.3921, "step": 47322 }, { "epoch": 0.8785733115998262, "grad_norm": 0.6426409482955933, "learning_rate": 7.187136339655943e-07, "loss": 0.4639, "step": 47324 }, { "epoch": 0.8786104417372449, "grad_norm": 0.3847665786743164, "learning_rate": 7.182794597264231e-07, "loss": 0.2273, "step": 47326 }, { "epoch": 0.8786475718746635, "grad_norm": 0.5064146518707275, "learning_rate": 7.178454117836675e-07, "loss": 0.3605, "step": 47328 }, { "epoch": 0.8786847020120822, "grad_norm": 0.39227500557899475, "learning_rate": 7.174114901432305e-07, "loss": 0.1797, "step": 47330 }, { "epoch": 0.8787218321495008, "grad_norm": 0.36443546414375305, "learning_rate": 7.169776948110196e-07, "loss": 0.0445, "step": 47332 }, { "epoch": 0.8787589622869194, "grad_norm": 0.4813399612903595, "learning_rate": 7.165440257929357e-07, "loss": 0.2521, "step": 47334 }, { "epoch": 0.8787960924243381, "grad_norm": 0.22999557852745056, "learning_rate": 7.161104830948806e-07, "loss": 0.0922, "step": 47336 }, { "epoch": 0.8788332225617567, "grad_norm": 0.4467817544937134, "learning_rate": 7.156770667227542e-07, "loss": 0.2904, "step": 47338 }, { "epoch": 0.8788703526991754, "grad_norm": 0.3459838628768921, "learning_rate": 7.152437766824538e-07, "loss": 0.2703, "step": 47340 }, { "epoch": 0.8789074828365939, "grad_norm": 0.35821595788002014, "learning_rate": 7.148106129798727e-07, "loss": 0.2091, "step": 47342 }, { "epoch": 0.8789446129740126, "grad_norm": 0.2748994827270508, "learning_rate": 7.143775756209093e-07, "loss": 0.2197, "step": 47344 }, { "epoch": 0.8789817431114313, "grad_norm": 0.42028987407684326, "learning_rate": 7.139446646114511e-07, "loss": 0.2187, "step": 47346 }, { "epoch": 0.8790188732488499, "grad_norm": 0.28626084327697754, "learning_rate": 7.135118799573914e-07, "loss": 0.304, "step": 47348 }, { "epoch": 0.8790560033862685, "grad_norm": 0.3073729872703552, "learning_rate": 7.130792216646187e-07, "loss": 0.1929, "step": 47350 }, { "epoch": 0.8790931335236871, "grad_norm": 0.25476524233818054, "learning_rate": 7.126466897390217e-07, "loss": 0.3705, "step": 47352 }, { "epoch": 0.8791302636611058, "grad_norm": 0.3203011453151703, "learning_rate": 7.122142841864832e-07, "loss": 0.3743, "step": 47354 }, { "epoch": 0.8791673937985245, "grad_norm": 0.39025232195854187, "learning_rate": 7.117820050128877e-07, "loss": 0.2974, "step": 47356 }, { "epoch": 0.8792045239359431, "grad_norm": 0.37883907556533813, "learning_rate": 7.11349852224118e-07, "loss": 0.3199, "step": 47358 }, { "epoch": 0.8792416540733617, "grad_norm": 0.2734426259994507, "learning_rate": 7.109178258260541e-07, "loss": 0.2347, "step": 47360 }, { "epoch": 0.8792787842107803, "grad_norm": 0.3571133017539978, "learning_rate": 7.104859258245744e-07, "loss": 0.1747, "step": 47362 }, { "epoch": 0.879315914348199, "grad_norm": 1.1186449527740479, "learning_rate": 7.100541522255577e-07, "loss": 0.5827, "step": 47364 }, { "epoch": 0.8793530444856177, "grad_norm": 0.4527154862880707, "learning_rate": 7.096225050348771e-07, "loss": 0.3646, "step": 47366 }, { "epoch": 0.8793901746230363, "grad_norm": 0.877415120601654, "learning_rate": 7.091909842584055e-07, "loss": 0.3252, "step": 47368 }, { "epoch": 0.8794273047604549, "grad_norm": 0.403403639793396, "learning_rate": 7.087595899020139e-07, "loss": 0.2817, "step": 47370 }, { "epoch": 0.8794644348978735, "grad_norm": 0.6720659732818604, "learning_rate": 7.083283219715753e-07, "loss": 0.2892, "step": 47372 }, { "epoch": 0.8795015650352922, "grad_norm": 0.40734872221946716, "learning_rate": 7.078971804729562e-07, "loss": 0.6437, "step": 47374 }, { "epoch": 0.8795386951727109, "grad_norm": 0.287028431892395, "learning_rate": 7.07466165412023e-07, "loss": 0.2804, "step": 47376 }, { "epoch": 0.8795758253101295, "grad_norm": 0.5888128876686096, "learning_rate": 7.07035276794642e-07, "loss": 0.2229, "step": 47378 }, { "epoch": 0.8796129554475481, "grad_norm": 0.44357457756996155, "learning_rate": 7.066045146266743e-07, "loss": 0.2179, "step": 47380 }, { "epoch": 0.8796500855849667, "grad_norm": 0.2920122742652893, "learning_rate": 7.061738789139816e-07, "loss": 0.1843, "step": 47382 }, { "epoch": 0.8796872157223854, "grad_norm": 0.5150615572929382, "learning_rate": 7.057433696624249e-07, "loss": 0.3675, "step": 47384 }, { "epoch": 0.8797243458598041, "grad_norm": 0.3968726098537445, "learning_rate": 7.05312986877863e-07, "loss": 0.3215, "step": 47386 }, { "epoch": 0.8797614759972227, "grad_norm": 0.4688175618648529, "learning_rate": 7.048827305661487e-07, "loss": 0.2676, "step": 47388 }, { "epoch": 0.8797986061346413, "grad_norm": 0.548578143119812, "learning_rate": 7.044526007331398e-07, "loss": 0.1865, "step": 47390 }, { "epoch": 0.8798357362720599, "grad_norm": 0.14987076818943024, "learning_rate": 7.040225973846871e-07, "loss": 0.2546, "step": 47392 }, { "epoch": 0.8798728664094786, "grad_norm": 0.254211962223053, "learning_rate": 7.035927205266402e-07, "loss": 0.24, "step": 47394 }, { "epoch": 0.8799099965468972, "grad_norm": 0.46713411808013916, "learning_rate": 7.031629701648523e-07, "loss": 0.3307, "step": 47396 }, { "epoch": 0.8799471266843159, "grad_norm": 0.3724933862686157, "learning_rate": 7.027333463051678e-07, "loss": 0.1134, "step": 47398 }, { "epoch": 0.8799842568217345, "grad_norm": 0.43817073106765747, "learning_rate": 7.02303848953435e-07, "loss": 0.4231, "step": 47400 }, { "epoch": 0.8800213869591531, "grad_norm": 0.3878483176231384, "learning_rate": 7.018744781154963e-07, "loss": 0.4896, "step": 47402 }, { "epoch": 0.8800585170965718, "grad_norm": 0.34842830896377563, "learning_rate": 7.014452337971966e-07, "loss": 0.3072, "step": 47404 }, { "epoch": 0.8800956472339904, "grad_norm": 0.4477299451828003, "learning_rate": 7.010161160043727e-07, "loss": 0.3656, "step": 47406 }, { "epoch": 0.880132777371409, "grad_norm": 0.5465213656425476, "learning_rate": 7.005871247428675e-07, "loss": 0.1192, "step": 47408 }, { "epoch": 0.8801699075088277, "grad_norm": 0.2922872006893158, "learning_rate": 7.001582600185164e-07, "loss": 0.2156, "step": 47410 }, { "epoch": 0.8802070376462463, "grad_norm": 0.6058338284492493, "learning_rate": 6.997295218371536e-07, "loss": 0.261, "step": 47412 }, { "epoch": 0.880244167783665, "grad_norm": 0.39384642243385315, "learning_rate": 6.993009102046144e-07, "loss": 0.2768, "step": 47414 }, { "epoch": 0.8802812979210836, "grad_norm": 0.37298253178596497, "learning_rate": 6.988724251267343e-07, "loss": 0.3585, "step": 47416 }, { "epoch": 0.8803184280585022, "grad_norm": 0.3825553059577942, "learning_rate": 6.984440666093373e-07, "loss": 0.1971, "step": 47418 }, { "epoch": 0.8803555581959209, "grad_norm": 0.4948826730251312, "learning_rate": 6.980158346582555e-07, "loss": 0.2619, "step": 47420 }, { "epoch": 0.8803926883333395, "grad_norm": 0.4946448802947998, "learning_rate": 6.975877292793154e-07, "loss": 0.4049, "step": 47422 }, { "epoch": 0.8804298184707582, "grad_norm": 0.3572228252887726, "learning_rate": 6.971597504783422e-07, "loss": 0.1871, "step": 47424 }, { "epoch": 0.8804669486081768, "grad_norm": 0.2821890413761139, "learning_rate": 6.967318982611604e-07, "loss": 0.27, "step": 47426 }, { "epoch": 0.8805040787455954, "grad_norm": 0.4213487505912781, "learning_rate": 6.963041726335918e-07, "loss": 0.1807, "step": 47428 }, { "epoch": 0.8805412088830141, "grad_norm": 0.18453316390514374, "learning_rate": 6.958765736014561e-07, "loss": 0.238, "step": 47430 }, { "epoch": 0.8805783390204327, "grad_norm": 0.4465988278388977, "learning_rate": 6.954491011705689e-07, "loss": 0.4429, "step": 47432 }, { "epoch": 0.8806154691578514, "grad_norm": 0.5186497569084167, "learning_rate": 6.950217553467497e-07, "loss": 0.2183, "step": 47434 }, { "epoch": 0.88065259929527, "grad_norm": 0.32859376072883606, "learning_rate": 6.94594536135812e-07, "loss": 0.219, "step": 47436 }, { "epoch": 0.8806897294326886, "grad_norm": 0.49729156494140625, "learning_rate": 6.941674435435708e-07, "loss": 0.337, "step": 47438 }, { "epoch": 0.8807268595701073, "grad_norm": 0.4608840048313141, "learning_rate": 6.937404775758371e-07, "loss": 0.2518, "step": 47440 }, { "epoch": 0.8807639897075259, "grad_norm": 0.23189140856266022, "learning_rate": 6.933136382384198e-07, "loss": 0.2876, "step": 47442 }, { "epoch": 0.8808011198449446, "grad_norm": 0.5379676222801208, "learning_rate": 6.928869255371262e-07, "loss": 0.2543, "step": 47444 }, { "epoch": 0.8808382499823632, "grad_norm": 0.39725255966186523, "learning_rate": 6.92460339477763e-07, "loss": 0.2817, "step": 47446 }, { "epoch": 0.8808753801197818, "grad_norm": 0.6109122037887573, "learning_rate": 6.920338800661364e-07, "loss": 0.2363, "step": 47448 }, { "epoch": 0.8809125102572004, "grad_norm": 0.4034080505371094, "learning_rate": 6.916075473080486e-07, "loss": 0.2007, "step": 47450 }, { "epoch": 0.8809496403946191, "grad_norm": 0.27598267793655396, "learning_rate": 6.911813412092993e-07, "loss": 0.2483, "step": 47452 }, { "epoch": 0.8809867705320378, "grad_norm": 0.3307301700115204, "learning_rate": 6.907552617756896e-07, "loss": 0.389, "step": 47454 }, { "epoch": 0.8810239006694564, "grad_norm": 0.5051745176315308, "learning_rate": 6.903293090130158e-07, "loss": 0.2884, "step": 47456 }, { "epoch": 0.881061030806875, "grad_norm": 0.6947413682937622, "learning_rate": 6.899034829270746e-07, "loss": 0.2488, "step": 47458 }, { "epoch": 0.8810981609442936, "grad_norm": 0.32664889097213745, "learning_rate": 6.894777835236588e-07, "loss": 0.4003, "step": 47460 }, { "epoch": 0.8811352910817123, "grad_norm": 0.43620002269744873, "learning_rate": 6.890522108085629e-07, "loss": 0.1883, "step": 47462 }, { "epoch": 0.881172421219131, "grad_norm": 0.3556111454963684, "learning_rate": 6.886267647875766e-07, "loss": 0.3559, "step": 47464 }, { "epoch": 0.8812095513565495, "grad_norm": 0.49104854464530945, "learning_rate": 6.882014454664909e-07, "loss": 0.2284, "step": 47466 }, { "epoch": 0.8812466814939682, "grad_norm": 0.39098259806632996, "learning_rate": 6.877762528510901e-07, "loss": 0.1079, "step": 47468 }, { "epoch": 0.8812838116313868, "grad_norm": 0.39154544472694397, "learning_rate": 6.873511869471616e-07, "loss": 0.2838, "step": 47470 }, { "epoch": 0.8813209417688055, "grad_norm": 0.2689366042613983, "learning_rate": 6.869262477604899e-07, "loss": 0.2355, "step": 47472 }, { "epoch": 0.8813580719062242, "grad_norm": 0.4578331410884857, "learning_rate": 6.865014352968547e-07, "loss": 0.2281, "step": 47474 }, { "epoch": 0.8813952020436427, "grad_norm": 0.3212837278842926, "learning_rate": 6.860767495620369e-07, "loss": 0.2668, "step": 47476 }, { "epoch": 0.8814323321810614, "grad_norm": 0.34555530548095703, "learning_rate": 6.856521905618185e-07, "loss": 0.5203, "step": 47478 }, { "epoch": 0.88146946231848, "grad_norm": 0.4289441704750061, "learning_rate": 6.852277583019729e-07, "loss": 0.3192, "step": 47480 }, { "epoch": 0.8815065924558987, "grad_norm": 0.4660801887512207, "learning_rate": 6.848034527882752e-07, "loss": 0.2153, "step": 47482 }, { "epoch": 0.8815437225933174, "grad_norm": 0.2465222179889679, "learning_rate": 6.84379274026501e-07, "loss": 0.2545, "step": 47484 }, { "epoch": 0.8815808527307359, "grad_norm": 0.3759123384952545, "learning_rate": 6.839552220224222e-07, "loss": 0.3635, "step": 47486 }, { "epoch": 0.8816179828681546, "grad_norm": 0.5579144954681396, "learning_rate": 6.835312967818065e-07, "loss": 0.2752, "step": 47488 }, { "epoch": 0.8816551130055732, "grad_norm": 0.42652541399002075, "learning_rate": 6.831074983104268e-07, "loss": 0.2592, "step": 47490 }, { "epoch": 0.8816922431429919, "grad_norm": 0.4003450870513916, "learning_rate": 6.826838266140445e-07, "loss": 0.3484, "step": 47492 }, { "epoch": 0.8817293732804105, "grad_norm": 0.41991809010505676, "learning_rate": 6.82260281698428e-07, "loss": 0.3751, "step": 47494 }, { "epoch": 0.8817665034178291, "grad_norm": 0.6549077033996582, "learning_rate": 6.818368635693385e-07, "loss": 0.2237, "step": 47496 }, { "epoch": 0.8818036335552478, "grad_norm": 0.3264525234699249, "learning_rate": 6.814135722325365e-07, "loss": 0.2033, "step": 47498 }, { "epoch": 0.8818407636926664, "grad_norm": 0.4776899814605713, "learning_rate": 6.809904076937845e-07, "loss": 0.4267, "step": 47500 }, { "epoch": 0.8818778938300851, "grad_norm": 0.3332924544811249, "learning_rate": 6.8056736995884e-07, "loss": 0.1506, "step": 47502 }, { "epoch": 0.8819150239675037, "grad_norm": 0.561475396156311, "learning_rate": 6.801444590334594e-07, "loss": 0.2508, "step": 47504 }, { "epoch": 0.8819521541049223, "grad_norm": 0.422680139541626, "learning_rate": 6.797216749233948e-07, "loss": 0.3893, "step": 47506 }, { "epoch": 0.881989284242341, "grad_norm": 0.5122622847557068, "learning_rate": 6.792990176344017e-07, "loss": 0.4203, "step": 47508 }, { "epoch": 0.8820264143797596, "grad_norm": 0.44281259179115295, "learning_rate": 6.788764871722298e-07, "loss": 0.2362, "step": 47510 }, { "epoch": 0.8820635445171783, "grad_norm": 0.4901391267776489, "learning_rate": 6.784540835426301e-07, "loss": 0.0903, "step": 47512 }, { "epoch": 0.8821006746545969, "grad_norm": 0.5330834984779358, "learning_rate": 6.780318067513469e-07, "loss": 0.4631, "step": 47514 }, { "epoch": 0.8821378047920155, "grad_norm": 0.29579687118530273, "learning_rate": 6.776096568041312e-07, "loss": 0.4088, "step": 47516 }, { "epoch": 0.8821749349294342, "grad_norm": 0.3763114809989929, "learning_rate": 6.771876337067218e-07, "loss": 0.2277, "step": 47518 }, { "epoch": 0.8822120650668528, "grad_norm": 0.34977829456329346, "learning_rate": 6.76765737464864e-07, "loss": 0.1521, "step": 47520 }, { "epoch": 0.8822491952042715, "grad_norm": 0.8410979509353638, "learning_rate": 6.763439680842987e-07, "loss": 0.2808, "step": 47522 }, { "epoch": 0.88228632534169, "grad_norm": 0.37012603878974915, "learning_rate": 6.759223255707637e-07, "loss": 0.2859, "step": 47524 }, { "epoch": 0.8823234554791087, "grad_norm": 0.33140870928764343, "learning_rate": 6.755008099299976e-07, "loss": 0.276, "step": 47526 }, { "epoch": 0.8823605856165274, "grad_norm": 0.5419058203697205, "learning_rate": 6.750794211677348e-07, "loss": 0.293, "step": 47528 }, { "epoch": 0.882397715753946, "grad_norm": 0.383899986743927, "learning_rate": 6.746581592897106e-07, "loss": 0.4261, "step": 47530 }, { "epoch": 0.8824348458913647, "grad_norm": 0.41641825437545776, "learning_rate": 6.74237024301656e-07, "loss": 0.2288, "step": 47532 }, { "epoch": 0.8824719760287832, "grad_norm": 0.41651269793510437, "learning_rate": 6.738160162093011e-07, "loss": 0.3479, "step": 47534 }, { "epoch": 0.8825091061662019, "grad_norm": 0.36212146282196045, "learning_rate": 6.733951350183765e-07, "loss": 0.3046, "step": 47536 }, { "epoch": 0.8825462363036206, "grad_norm": 0.37156036496162415, "learning_rate": 6.729743807346068e-07, "loss": 0.3054, "step": 47538 }, { "epoch": 0.8825833664410392, "grad_norm": 0.5652598142623901, "learning_rate": 6.725537533637172e-07, "loss": 0.1989, "step": 47540 }, { "epoch": 0.8826204965784579, "grad_norm": 0.5056232810020447, "learning_rate": 6.721332529114344e-07, "loss": 0.3204, "step": 47542 }, { "epoch": 0.8826576267158764, "grad_norm": 0.311834454536438, "learning_rate": 6.717128793834749e-07, "loss": 0.2458, "step": 47544 }, { "epoch": 0.8826947568532951, "grad_norm": 0.2799263894557953, "learning_rate": 6.712926327855629e-07, "loss": 0.4316, "step": 47546 }, { "epoch": 0.8827318869907137, "grad_norm": 0.3284052908420563, "learning_rate": 6.708725131234151e-07, "loss": 0.2834, "step": 47548 }, { "epoch": 0.8827690171281324, "grad_norm": 0.31405022740364075, "learning_rate": 6.70452520402749e-07, "loss": 0.2321, "step": 47550 }, { "epoch": 0.8828061472655511, "grad_norm": 0.4365288019180298, "learning_rate": 6.700326546292779e-07, "loss": 0.276, "step": 47552 }, { "epoch": 0.8828432774029696, "grad_norm": 0.20331554114818573, "learning_rate": 6.696129158087183e-07, "loss": 0.161, "step": 47554 }, { "epoch": 0.8828804075403883, "grad_norm": 0.5059957504272461, "learning_rate": 6.691933039467768e-07, "loss": 0.1668, "step": 47556 }, { "epoch": 0.8829175376778069, "grad_norm": 0.269768089056015, "learning_rate": 6.687738190491677e-07, "loss": 0.3041, "step": 47558 }, { "epoch": 0.8829546678152256, "grad_norm": 0.502941906452179, "learning_rate": 6.683544611215953e-07, "loss": 0.2919, "step": 47560 }, { "epoch": 0.8829917979526443, "grad_norm": 0.33053064346313477, "learning_rate": 6.679352301697661e-07, "loss": 0.3925, "step": 47562 }, { "epoch": 0.8830289280900628, "grad_norm": 0.4819253981113434, "learning_rate": 6.675161261993868e-07, "loss": 0.2319, "step": 47564 }, { "epoch": 0.8830660582274815, "grad_norm": 0.17919844388961792, "learning_rate": 6.670971492161593e-07, "loss": 0.1961, "step": 47566 }, { "epoch": 0.8831031883649001, "grad_norm": 0.2559063136577606, "learning_rate": 6.666782992257837e-07, "loss": 0.2309, "step": 47568 }, { "epoch": 0.8831403185023188, "grad_norm": 0.32767990231513977, "learning_rate": 6.662595762339597e-07, "loss": 0.3885, "step": 47570 }, { "epoch": 0.8831774486397375, "grad_norm": 0.5749056339263916, "learning_rate": 6.65840980246385e-07, "loss": 0.1389, "step": 47572 }, { "epoch": 0.883214578777156, "grad_norm": 0.4745482802391052, "learning_rate": 6.654225112687551e-07, "loss": 0.2405, "step": 47574 }, { "epoch": 0.8832517089145747, "grad_norm": 0.2447897046804428, "learning_rate": 6.650041693067666e-07, "loss": 0.196, "step": 47576 }, { "epoch": 0.8832888390519933, "grad_norm": 0.35503894090652466, "learning_rate": 6.645859543661082e-07, "loss": 0.253, "step": 47578 }, { "epoch": 0.883325969189412, "grad_norm": 0.5454164743423462, "learning_rate": 6.641678664524742e-07, "loss": 0.1569, "step": 47580 }, { "epoch": 0.8833630993268307, "grad_norm": 0.43491607904434204, "learning_rate": 6.637499055715491e-07, "loss": 0.413, "step": 47582 }, { "epoch": 0.8834002294642492, "grad_norm": 0.24743662774562836, "learning_rate": 6.633320717290237e-07, "loss": 0.1405, "step": 47584 }, { "epoch": 0.8834373596016679, "grad_norm": 0.593473494052887, "learning_rate": 6.629143649305814e-07, "loss": 0.2301, "step": 47586 }, { "epoch": 0.8834744897390865, "grad_norm": 0.49163755774497986, "learning_rate": 6.624967851819065e-07, "loss": 0.233, "step": 47588 }, { "epoch": 0.8835116198765052, "grad_norm": 0.3950682282447815, "learning_rate": 6.620793324886809e-07, "loss": 0.5542, "step": 47590 }, { "epoch": 0.8835487500139239, "grad_norm": 0.2998001277446747, "learning_rate": 6.616620068565882e-07, "loss": 0.1195, "step": 47592 }, { "epoch": 0.8835858801513424, "grad_norm": 0.4829131066799164, "learning_rate": 6.612448082913014e-07, "loss": 0.3492, "step": 47594 }, { "epoch": 0.8836230102887611, "grad_norm": 0.40049058198928833, "learning_rate": 6.608277367985005e-07, "loss": 0.3441, "step": 47596 }, { "epoch": 0.8836601404261797, "grad_norm": 0.33591926097869873, "learning_rate": 6.60410792383861e-07, "loss": 0.239, "step": 47598 }, { "epoch": 0.8836972705635984, "grad_norm": 0.5166113376617432, "learning_rate": 6.599939750530537e-07, "loss": 0.2638, "step": 47600 }, { "epoch": 0.8837344007010169, "grad_norm": 0.3719906210899353, "learning_rate": 6.595772848117532e-07, "loss": 0.3025, "step": 47602 }, { "epoch": 0.8837715308384356, "grad_norm": 0.3433866798877716, "learning_rate": 6.591607216656281e-07, "loss": 0.2995, "step": 47604 }, { "epoch": 0.8838086609758543, "grad_norm": 0.4769951403141022, "learning_rate": 6.587442856203464e-07, "loss": 0.3557, "step": 47606 }, { "epoch": 0.8838457911132729, "grad_norm": 0.6962431073188782, "learning_rate": 6.583279766815731e-07, "loss": 0.2031, "step": 47608 }, { "epoch": 0.8838829212506916, "grad_norm": 0.49196648597717285, "learning_rate": 6.579117948549751e-07, "loss": 0.2142, "step": 47610 }, { "epoch": 0.8839200513881101, "grad_norm": 0.25094518065452576, "learning_rate": 6.574957401462156e-07, "loss": 0.2941, "step": 47612 }, { "epoch": 0.8839571815255288, "grad_norm": 0.43323275446891785, "learning_rate": 6.570798125609535e-07, "loss": 0.5006, "step": 47614 }, { "epoch": 0.8839943116629475, "grad_norm": 0.3959193527698517, "learning_rate": 6.566640121048506e-07, "loss": 0.2995, "step": 47616 }, { "epoch": 0.8840314418003661, "grad_norm": 0.285462886095047, "learning_rate": 6.562483387835661e-07, "loss": 0.258, "step": 47618 }, { "epoch": 0.8840685719377848, "grad_norm": 0.27391335368156433, "learning_rate": 6.55832792602753e-07, "loss": 0.3142, "step": 47620 }, { "epoch": 0.8841057020752033, "grad_norm": 0.45922282338142395, "learning_rate": 6.554173735680657e-07, "loss": 0.2239, "step": 47622 }, { "epoch": 0.884142832212622, "grad_norm": 0.34486123919487, "learning_rate": 6.550020816851577e-07, "loss": 0.1943, "step": 47624 }, { "epoch": 0.8841799623500407, "grad_norm": 0.525230884552002, "learning_rate": 6.545869169596797e-07, "loss": 0.1174, "step": 47626 }, { "epoch": 0.8842170924874593, "grad_norm": 0.5819212794303894, "learning_rate": 6.541718793972807e-07, "loss": 0.2081, "step": 47628 }, { "epoch": 0.884254222624878, "grad_norm": 0.5742965340614319, "learning_rate": 6.537569690036105e-07, "loss": 0.2302, "step": 47630 }, { "epoch": 0.8842913527622965, "grad_norm": 0.26287642121315, "learning_rate": 6.533421857843104e-07, "loss": 0.1799, "step": 47632 }, { "epoch": 0.8843284828997152, "grad_norm": 0.47685742378234863, "learning_rate": 6.529275297450277e-07, "loss": 0.3099, "step": 47634 }, { "epoch": 0.8843656130371339, "grad_norm": 0.39257556200027466, "learning_rate": 6.525130008914027e-07, "loss": 0.3593, "step": 47636 }, { "epoch": 0.8844027431745525, "grad_norm": 0.45302775502204895, "learning_rate": 6.520985992290774e-07, "loss": 0.3287, "step": 47638 }, { "epoch": 0.8844398733119712, "grad_norm": 0.2566416263580322, "learning_rate": 6.516843247636906e-07, "loss": 0.2613, "step": 47640 }, { "epoch": 0.8844770034493897, "grad_norm": 0.3361918330192566, "learning_rate": 6.512701775008778e-07, "loss": 0.1852, "step": 47642 }, { "epoch": 0.8845141335868084, "grad_norm": 0.32291272282600403, "learning_rate": 6.508561574462769e-07, "loss": 0.3664, "step": 47644 }, { "epoch": 0.884551263724227, "grad_norm": 0.3564322590827942, "learning_rate": 6.504422646055175e-07, "loss": 0.1937, "step": 47646 }, { "epoch": 0.8845883938616457, "grad_norm": 0.32213884592056274, "learning_rate": 6.500284989842342e-07, "loss": 0.3376, "step": 47648 }, { "epoch": 0.8846255239990644, "grad_norm": 0.3198153078556061, "learning_rate": 6.49614860588057e-07, "loss": 0.1232, "step": 47650 }, { "epoch": 0.8846626541364829, "grad_norm": 0.3309187889099121, "learning_rate": 6.492013494226135e-07, "loss": 0.2061, "step": 47652 }, { "epoch": 0.8846997842739016, "grad_norm": 0.5339601635932922, "learning_rate": 6.487879654935314e-07, "loss": 0.2892, "step": 47654 }, { "epoch": 0.8847369144113202, "grad_norm": 0.4980430603027344, "learning_rate": 6.483747088064351e-07, "loss": 0.2893, "step": 47656 }, { "epoch": 0.8847740445487389, "grad_norm": 0.4119316637516022, "learning_rate": 6.479615793669469e-07, "loss": 0.2262, "step": 47658 }, { "epoch": 0.8848111746861576, "grad_norm": 0.16685384511947632, "learning_rate": 6.4754857718069e-07, "loss": 0.1812, "step": 47660 }, { "epoch": 0.8848483048235761, "grad_norm": 0.5277692079544067, "learning_rate": 6.471357022532831e-07, "loss": 0.4157, "step": 47662 }, { "epoch": 0.8848854349609948, "grad_norm": 0.4435460567474365, "learning_rate": 6.46722954590343e-07, "loss": 0.1823, "step": 47664 }, { "epoch": 0.8849225650984134, "grad_norm": 0.33021536469459534, "learning_rate": 6.463103341974875e-07, "loss": 0.2036, "step": 47666 }, { "epoch": 0.8849596952358321, "grad_norm": 0.39361444115638733, "learning_rate": 6.45897841080333e-07, "loss": 0.3195, "step": 47668 }, { "epoch": 0.8849968253732508, "grad_norm": 0.37897324562072754, "learning_rate": 6.454854752444872e-07, "loss": 0.3483, "step": 47670 }, { "epoch": 0.8850339555106693, "grad_norm": 0.3185615837574005, "learning_rate": 6.450732366955648e-07, "loss": 0.3074, "step": 47672 }, { "epoch": 0.885071085648088, "grad_norm": 0.5312720537185669, "learning_rate": 6.446611254391744e-07, "loss": 0.2821, "step": 47674 }, { "epoch": 0.8851082157855066, "grad_norm": 0.3081205189228058, "learning_rate": 6.442491414809238e-07, "loss": 0.2136, "step": 47676 }, { "epoch": 0.8851453459229253, "grad_norm": 0.37605416774749756, "learning_rate": 6.438372848264185e-07, "loss": 0.2773, "step": 47678 }, { "epoch": 0.885182476060344, "grad_norm": 0.3170308768749237, "learning_rate": 6.43425555481264e-07, "loss": 0.2276, "step": 47680 }, { "epoch": 0.8852196061977625, "grad_norm": 0.368425190448761, "learning_rate": 6.430139534510604e-07, "loss": 0.1798, "step": 47682 }, { "epoch": 0.8852567363351812, "grad_norm": 0.28887829184532166, "learning_rate": 6.426024787414109e-07, "loss": 0.2493, "step": 47684 }, { "epoch": 0.8852938664725998, "grad_norm": 0.37934792041778564, "learning_rate": 6.42191131357911e-07, "loss": 0.2498, "step": 47686 }, { "epoch": 0.8853309966100185, "grad_norm": 0.4310918152332306, "learning_rate": 6.417799113061607e-07, "loss": 0.1756, "step": 47688 }, { "epoch": 0.8853681267474371, "grad_norm": 0.4184538722038269, "learning_rate": 6.413688185917543e-07, "loss": 0.2912, "step": 47690 }, { "epoch": 0.8854052568848557, "grad_norm": 0.5241885185241699, "learning_rate": 6.409578532202865e-07, "loss": 0.2817, "step": 47692 }, { "epoch": 0.8854423870222744, "grad_norm": 0.4601022005081177, "learning_rate": 6.405470151973492e-07, "loss": 0.4263, "step": 47694 }, { "epoch": 0.885479517159693, "grad_norm": 0.6604556441307068, "learning_rate": 6.401363045285314e-07, "loss": 0.4602, "step": 47696 }, { "epoch": 0.8855166472971117, "grad_norm": 0.44825366139411926, "learning_rate": 6.397257212194219e-07, "loss": 0.1669, "step": 47698 }, { "epoch": 0.8855537774345302, "grad_norm": 0.339515745639801, "learning_rate": 6.393152652756085e-07, "loss": 0.3069, "step": 47700 }, { "epoch": 0.8855909075719489, "grad_norm": 0.5635107159614563, "learning_rate": 6.389049367026756e-07, "loss": 0.3177, "step": 47702 }, { "epoch": 0.8856280377093676, "grad_norm": 0.2347307950258255, "learning_rate": 6.384947355062088e-07, "loss": 0.1506, "step": 47704 }, { "epoch": 0.8856651678467862, "grad_norm": 0.382336288690567, "learning_rate": 6.380846616917879e-07, "loss": 0.4081, "step": 47706 }, { "epoch": 0.8857022979842049, "grad_norm": 0.2988913059234619, "learning_rate": 6.376747152649898e-07, "loss": 0.2154, "step": 47708 }, { "epoch": 0.8857394281216234, "grad_norm": 0.272087424993515, "learning_rate": 6.372648962313965e-07, "loss": 0.2419, "step": 47710 }, { "epoch": 0.8857765582590421, "grad_norm": 0.292536199092865, "learning_rate": 6.368552045965826e-07, "loss": 0.3579, "step": 47712 }, { "epoch": 0.8858136883964608, "grad_norm": 0.4091271460056305, "learning_rate": 6.364456403661245e-07, "loss": 0.4355, "step": 47714 }, { "epoch": 0.8858508185338794, "grad_norm": 0.3770033121109009, "learning_rate": 6.360362035455936e-07, "loss": 0.1661, "step": 47716 }, { "epoch": 0.885887948671298, "grad_norm": 0.4006262421607971, "learning_rate": 6.35626894140563e-07, "loss": 0.2791, "step": 47718 }, { "epoch": 0.8859250788087166, "grad_norm": 0.36582180857658386, "learning_rate": 6.352177121565994e-07, "loss": 0.1738, "step": 47720 }, { "epoch": 0.8859622089461353, "grad_norm": 0.2699072062969208, "learning_rate": 6.348086575992718e-07, "loss": 0.1488, "step": 47722 }, { "epoch": 0.885999339083554, "grad_norm": 0.39797115325927734, "learning_rate": 6.343997304741467e-07, "loss": 0.245, "step": 47724 }, { "epoch": 0.8860364692209726, "grad_norm": 0.3836439847946167, "learning_rate": 6.339909307867886e-07, "loss": 0.5565, "step": 47726 }, { "epoch": 0.8860735993583913, "grad_norm": 0.3095533549785614, "learning_rate": 6.335822585427587e-07, "loss": 0.209, "step": 47728 }, { "epoch": 0.8861107294958098, "grad_norm": 0.4126643240451813, "learning_rate": 6.331737137476202e-07, "loss": 0.2995, "step": 47730 }, { "epoch": 0.8861478596332285, "grad_norm": 0.5190250277519226, "learning_rate": 6.327652964069298e-07, "loss": 0.1056, "step": 47732 }, { "epoch": 0.8861849897706472, "grad_norm": 0.3109513521194458, "learning_rate": 6.323570065262441e-07, "loss": 0.2578, "step": 47734 }, { "epoch": 0.8862221199080658, "grad_norm": 0.27223071455955505, "learning_rate": 6.31948844111121e-07, "loss": 0.1946, "step": 47736 }, { "epoch": 0.8862592500454844, "grad_norm": 0.29826822876930237, "learning_rate": 6.315408091671138e-07, "loss": 0.197, "step": 47738 }, { "epoch": 0.886296380182903, "grad_norm": 0.7054978013038635, "learning_rate": 6.311329016997736e-07, "loss": 0.2928, "step": 47740 }, { "epoch": 0.8863335103203217, "grad_norm": 0.4770456850528717, "learning_rate": 6.307251217146526e-07, "loss": 0.2706, "step": 47742 }, { "epoch": 0.8863706404577404, "grad_norm": 0.4543619453907013, "learning_rate": 6.303174692172997e-07, "loss": 0.3125, "step": 47744 }, { "epoch": 0.886407770595159, "grad_norm": 0.2943274676799774, "learning_rate": 6.299099442132584e-07, "loss": 0.2069, "step": 47746 }, { "epoch": 0.8864449007325776, "grad_norm": 0.30973318219184875, "learning_rate": 6.295025467080785e-07, "loss": 0.3188, "step": 47748 }, { "epoch": 0.8864820308699962, "grad_norm": 0.6491543054580688, "learning_rate": 6.290952767072988e-07, "loss": 0.2978, "step": 47750 }, { "epoch": 0.8865191610074149, "grad_norm": 0.3792312741279602, "learning_rate": 6.286881342164641e-07, "loss": 0.2469, "step": 47752 }, { "epoch": 0.8865562911448335, "grad_norm": 0.24689604341983795, "learning_rate": 6.282811192411131e-07, "loss": 0.3013, "step": 47754 }, { "epoch": 0.8865934212822522, "grad_norm": 0.47705626487731934, "learning_rate": 6.278742317867859e-07, "loss": 0.2608, "step": 47756 }, { "epoch": 0.8866305514196708, "grad_norm": 0.30380791425704956, "learning_rate": 6.274674718590157e-07, "loss": 0.4468, "step": 47758 }, { "epoch": 0.8866676815570894, "grad_norm": 0.3462724983692169, "learning_rate": 6.270608394633393e-07, "loss": 0.5226, "step": 47760 }, { "epoch": 0.8867048116945081, "grad_norm": 0.505673348903656, "learning_rate": 6.2665433460529e-07, "loss": 0.3019, "step": 47762 }, { "epoch": 0.8867419418319267, "grad_norm": 0.37957751750946045, "learning_rate": 6.26247957290399e-07, "loss": 0.3277, "step": 47764 }, { "epoch": 0.8867790719693454, "grad_norm": 0.40392905473709106, "learning_rate": 6.258417075241973e-07, "loss": 0.3923, "step": 47766 }, { "epoch": 0.886816202106764, "grad_norm": 0.39244788885116577, "learning_rate": 6.254355853122096e-07, "loss": 0.3766, "step": 47768 }, { "epoch": 0.8868533322441826, "grad_norm": 0.2624119520187378, "learning_rate": 6.250295906599646e-07, "loss": 0.2528, "step": 47770 }, { "epoch": 0.8868904623816013, "grad_norm": 0.37868067622184753, "learning_rate": 6.246237235729846e-07, "loss": 0.1362, "step": 47772 }, { "epoch": 0.8869275925190199, "grad_norm": 0.36689993739128113, "learning_rate": 6.24217984056793e-07, "loss": 0.1454, "step": 47774 }, { "epoch": 0.8869647226564386, "grad_norm": 0.5005046725273132, "learning_rate": 6.238123721169109e-07, "loss": 0.2849, "step": 47776 }, { "epoch": 0.8870018527938572, "grad_norm": 0.3269892930984497, "learning_rate": 6.234068877588573e-07, "loss": 0.3642, "step": 47778 }, { "epoch": 0.8870389829312758, "grad_norm": 0.46620404720306396, "learning_rate": 6.230015309881499e-07, "loss": 0.266, "step": 47780 }, { "epoch": 0.8870761130686945, "grad_norm": 0.3269730508327484, "learning_rate": 6.225963018103054e-07, "loss": 0.1691, "step": 47782 }, { "epoch": 0.8871132432061131, "grad_norm": 0.3888849914073944, "learning_rate": 6.22191200230835e-07, "loss": 0.2703, "step": 47784 }, { "epoch": 0.8871503733435318, "grad_norm": 0.31519120931625366, "learning_rate": 6.217862262552532e-07, "loss": 0.2591, "step": 47786 }, { "epoch": 0.8871875034809504, "grad_norm": 0.398238867521286, "learning_rate": 6.213813798890711e-07, "loss": 0.2628, "step": 47788 }, { "epoch": 0.887224633618369, "grad_norm": 0.48543480038642883, "learning_rate": 6.209766611377954e-07, "loss": 0.2232, "step": 47790 }, { "epoch": 0.8872617637557877, "grad_norm": 0.3339442312717438, "learning_rate": 6.205720700069329e-07, "loss": 0.1835, "step": 47792 }, { "epoch": 0.8872988938932063, "grad_norm": 0.5636805295944214, "learning_rate": 6.201676065019924e-07, "loss": 0.3088, "step": 47794 }, { "epoch": 0.887336024030625, "grad_norm": 0.5825998783111572, "learning_rate": 6.197632706284729e-07, "loss": 0.4153, "step": 47796 }, { "epoch": 0.8873731541680435, "grad_norm": 0.3999953866004944, "learning_rate": 6.193590623918777e-07, "loss": 0.2238, "step": 47798 }, { "epoch": 0.8874102843054622, "grad_norm": 0.37967583537101746, "learning_rate": 6.189549817977081e-07, "loss": 0.4268, "step": 47800 }, { "epoch": 0.8874474144428809, "grad_norm": 0.12930822372436523, "learning_rate": 6.185510288514618e-07, "loss": 0.242, "step": 47802 }, { "epoch": 0.8874845445802995, "grad_norm": 0.5515783429145813, "learning_rate": 6.181472035586344e-07, "loss": 0.3378, "step": 47804 }, { "epoch": 0.8875216747177181, "grad_norm": 0.34625184535980225, "learning_rate": 6.177435059247238e-07, "loss": 0.22, "step": 47806 }, { "epoch": 0.8875588048551367, "grad_norm": 0.3984210789203644, "learning_rate": 6.173399359552201e-07, "loss": 0.2351, "step": 47808 }, { "epoch": 0.8875959349925554, "grad_norm": 0.42594677209854126, "learning_rate": 6.169364936556144e-07, "loss": 0.344, "step": 47810 }, { "epoch": 0.8876330651299741, "grad_norm": 0.34776416420936584, "learning_rate": 6.165331790314e-07, "loss": 0.2106, "step": 47812 }, { "epoch": 0.8876701952673927, "grad_norm": 0.8937525749206543, "learning_rate": 6.161299920880604e-07, "loss": 0.266, "step": 47814 }, { "epoch": 0.8877073254048113, "grad_norm": 0.30978524684906006, "learning_rate": 6.157269328310833e-07, "loss": 0.2721, "step": 47816 }, { "epoch": 0.8877444555422299, "grad_norm": 0.407779723405838, "learning_rate": 6.153240012659534e-07, "loss": 0.1965, "step": 47818 }, { "epoch": 0.8877815856796486, "grad_norm": 0.47428229451179504, "learning_rate": 6.149211973981562e-07, "loss": 0.3347, "step": 47820 }, { "epoch": 0.8878187158170673, "grad_norm": 0.4276329278945923, "learning_rate": 6.145185212331673e-07, "loss": 0.4252, "step": 47822 }, { "epoch": 0.8878558459544859, "grad_norm": 0.5838897228240967, "learning_rate": 6.14115972776469e-07, "loss": 0.4623, "step": 47824 }, { "epoch": 0.8878929760919045, "grad_norm": 0.5917683839797974, "learning_rate": 6.13713552033538e-07, "loss": 0.2913, "step": 47826 }, { "epoch": 0.8879301062293231, "grad_norm": 0.6393962502479553, "learning_rate": 6.1331125900985e-07, "loss": 0.3586, "step": 47828 }, { "epoch": 0.8879672363667418, "grad_norm": 0.4340791702270508, "learning_rate": 6.129090937108817e-07, "loss": 0.1538, "step": 47830 }, { "epoch": 0.8880043665041605, "grad_norm": 0.3339197337627411, "learning_rate": 6.125070561421009e-07, "loss": 0.1525, "step": 47832 }, { "epoch": 0.888041496641579, "grad_norm": 0.3617398142814636, "learning_rate": 6.121051463089822e-07, "loss": 0.2503, "step": 47834 }, { "epoch": 0.8880786267789977, "grad_norm": 0.259944885969162, "learning_rate": 6.117033642169911e-07, "loss": 0.1742, "step": 47836 }, { "epoch": 0.8881157569164163, "grad_norm": 0.435729056596756, "learning_rate": 6.113017098715957e-07, "loss": 0.356, "step": 47838 }, { "epoch": 0.888152887053835, "grad_norm": 0.2981502413749695, "learning_rate": 6.109001832782624e-07, "loss": 0.1366, "step": 47840 }, { "epoch": 0.8881900171912537, "grad_norm": 0.5559622049331665, "learning_rate": 6.104987844424526e-07, "loss": 0.2665, "step": 47842 }, { "epoch": 0.8882271473286723, "grad_norm": 0.3082905113697052, "learning_rate": 6.100975133696318e-07, "loss": 0.1721, "step": 47844 }, { "epoch": 0.8882642774660909, "grad_norm": 0.2573748826980591, "learning_rate": 6.096963700652569e-07, "loss": 0.3917, "step": 47846 }, { "epoch": 0.8883014076035095, "grad_norm": 0.28783687949180603, "learning_rate": 6.092953545347857e-07, "loss": 0.2502, "step": 47848 }, { "epoch": 0.8883385377409282, "grad_norm": 0.25555160641670227, "learning_rate": 6.088944667836772e-07, "loss": 0.2047, "step": 47850 }, { "epoch": 0.8883756678783468, "grad_norm": 0.3898650109767914, "learning_rate": 6.084937068173868e-07, "loss": 0.1294, "step": 47852 }, { "epoch": 0.8884127980157654, "grad_norm": 0.35594040155410767, "learning_rate": 6.080930746413649e-07, "loss": 0.2457, "step": 47854 }, { "epoch": 0.8884499281531841, "grad_norm": 0.20641061663627625, "learning_rate": 6.076925702610659e-07, "loss": 0.13, "step": 47856 }, { "epoch": 0.8884870582906027, "grad_norm": 0.3245171904563904, "learning_rate": 6.072921936819365e-07, "loss": 0.3288, "step": 47858 }, { "epoch": 0.8885241884280214, "grad_norm": 0.4686327874660492, "learning_rate": 6.068919449094246e-07, "loss": 0.2744, "step": 47860 }, { "epoch": 0.88856131856544, "grad_norm": 0.2799648940563202, "learning_rate": 6.064918239489792e-07, "loss": 0.3497, "step": 47862 }, { "epoch": 0.8885984487028586, "grad_norm": 0.30769261717796326, "learning_rate": 6.060918308060426e-07, "loss": 0.1527, "step": 47864 }, { "epoch": 0.8886355788402773, "grad_norm": 0.4349844753742218, "learning_rate": 6.056919654860583e-07, "loss": 0.4599, "step": 47866 }, { "epoch": 0.8886727089776959, "grad_norm": 0.26441556215286255, "learning_rate": 6.052922279944661e-07, "loss": 0.123, "step": 47868 }, { "epoch": 0.8887098391151146, "grad_norm": 0.4737924039363861, "learning_rate": 6.048926183367098e-07, "loss": 0.389, "step": 47870 }, { "epoch": 0.8887469692525332, "grad_norm": 0.23293496668338776, "learning_rate": 6.044931365182205e-07, "loss": 0.2639, "step": 47872 }, { "epoch": 0.8887840993899518, "grad_norm": 0.5110318064689636, "learning_rate": 6.040937825444382e-07, "loss": 0.2486, "step": 47874 }, { "epoch": 0.8888212295273705, "grad_norm": 0.33499446511268616, "learning_rate": 6.036945564207941e-07, "loss": 0.2498, "step": 47876 }, { "epoch": 0.8888583596647891, "grad_norm": 0.378844290971756, "learning_rate": 6.032954581527228e-07, "loss": 0.305, "step": 47878 }, { "epoch": 0.8888954898022078, "grad_norm": 0.3518020808696747, "learning_rate": 6.028964877456533e-07, "loss": 0.1669, "step": 47880 }, { "epoch": 0.8889326199396264, "grad_norm": 0.33926478028297424, "learning_rate": 6.024976452050169e-07, "loss": 0.3425, "step": 47882 }, { "epoch": 0.888969750077045, "grad_norm": 0.42721250653266907, "learning_rate": 6.020989305362368e-07, "loss": 0.1519, "step": 47884 }, { "epoch": 0.8890068802144637, "grad_norm": 0.5861724615097046, "learning_rate": 6.017003437447399e-07, "loss": 0.2668, "step": 47886 }, { "epoch": 0.8890440103518823, "grad_norm": 0.27624666690826416, "learning_rate": 6.013018848359508e-07, "loss": 0.2564, "step": 47888 }, { "epoch": 0.889081140489301, "grad_norm": 0.3486524820327759, "learning_rate": 6.009035538152907e-07, "loss": 0.4284, "step": 47890 }, { "epoch": 0.8891182706267196, "grad_norm": 0.33791929483413696, "learning_rate": 6.005053506881797e-07, "loss": 0.272, "step": 47892 }, { "epoch": 0.8891554007641382, "grad_norm": 0.3999594748020172, "learning_rate": 6.001072754600367e-07, "loss": 0.2798, "step": 47894 }, { "epoch": 0.8891925309015569, "grad_norm": 0.44175106287002563, "learning_rate": 5.997093281362787e-07, "loss": 0.4866, "step": 47896 }, { "epoch": 0.8892296610389755, "grad_norm": 0.4189186096191406, "learning_rate": 5.993115087223178e-07, "loss": 0.3025, "step": 47898 }, { "epoch": 0.8892667911763942, "grad_norm": 0.40308451652526855, "learning_rate": 5.989138172235687e-07, "loss": 0.2799, "step": 47900 }, { "epoch": 0.8893039213138128, "grad_norm": 0.2144613265991211, "learning_rate": 5.985162536454436e-07, "loss": 0.3259, "step": 47902 }, { "epoch": 0.8893410514512314, "grad_norm": 0.26510271430015564, "learning_rate": 5.981188179933517e-07, "loss": 0.4199, "step": 47904 }, { "epoch": 0.88937818158865, "grad_norm": 0.5337905883789062, "learning_rate": 5.977215102727008e-07, "loss": 0.227, "step": 47906 }, { "epoch": 0.8894153117260687, "grad_norm": 0.2218475490808487, "learning_rate": 5.973243304888976e-07, "loss": 0.2282, "step": 47908 }, { "epoch": 0.8894524418634874, "grad_norm": 1.2308385372161865, "learning_rate": 5.969272786473457e-07, "loss": 0.3518, "step": 47910 }, { "epoch": 0.889489572000906, "grad_norm": 0.4328368902206421, "learning_rate": 5.965303547534474e-07, "loss": 0.3292, "step": 47912 }, { "epoch": 0.8895267021383246, "grad_norm": 0.4345044791698456, "learning_rate": 5.961335588126038e-07, "loss": 0.1753, "step": 47914 }, { "epoch": 0.8895638322757432, "grad_norm": 0.2850533723831177, "learning_rate": 5.957368908302175e-07, "loss": 0.2642, "step": 47916 }, { "epoch": 0.8896009624131619, "grad_norm": 0.5604518055915833, "learning_rate": 5.953403508116806e-07, "loss": 0.2437, "step": 47918 }, { "epoch": 0.8896380925505806, "grad_norm": 0.6172801852226257, "learning_rate": 5.949439387623923e-07, "loss": 0.5686, "step": 47920 }, { "epoch": 0.8896752226879991, "grad_norm": 0.5851142406463623, "learning_rate": 5.945476546877449e-07, "loss": 0.3085, "step": 47922 }, { "epoch": 0.8897123528254178, "grad_norm": 0.35563135147094727, "learning_rate": 5.941514985931318e-07, "loss": 0.2781, "step": 47924 }, { "epoch": 0.8897494829628364, "grad_norm": 0.31452447175979614, "learning_rate": 5.937554704839421e-07, "loss": 0.2733, "step": 47926 }, { "epoch": 0.8897866131002551, "grad_norm": 0.41809457540512085, "learning_rate": 5.933595703655659e-07, "loss": 0.2288, "step": 47928 }, { "epoch": 0.8898237432376738, "grad_norm": 0.5139526724815369, "learning_rate": 5.9296379824339e-07, "loss": 0.1626, "step": 47930 }, { "epoch": 0.8898608733750923, "grad_norm": 0.3840597867965698, "learning_rate": 5.925681541228001e-07, "loss": 0.3106, "step": 47932 }, { "epoch": 0.889898003512511, "grad_norm": 0.5569860935211182, "learning_rate": 5.921726380091785e-07, "loss": 0.3707, "step": 47934 }, { "epoch": 0.8899351336499296, "grad_norm": 0.4887702763080597, "learning_rate": 5.917772499079078e-07, "loss": 0.3392, "step": 47936 }, { "epoch": 0.8899722637873483, "grad_norm": 0.31022652983665466, "learning_rate": 5.913819898243678e-07, "loss": 0.3608, "step": 47938 }, { "epoch": 0.890009393924767, "grad_norm": 0.3625992238521576, "learning_rate": 5.909868577639366e-07, "loss": 0.546, "step": 47940 }, { "epoch": 0.8900465240621855, "grad_norm": 0.7216010689735413, "learning_rate": 5.90591853731991e-07, "loss": 0.1109, "step": 47942 }, { "epoch": 0.8900836541996042, "grad_norm": 0.4317566156387329, "learning_rate": 5.901969777339056e-07, "loss": 0.1985, "step": 47944 }, { "epoch": 0.8901207843370228, "grad_norm": 0.3587350845336914, "learning_rate": 5.898022297750549e-07, "loss": 0.2373, "step": 47946 }, { "epoch": 0.8901579144744415, "grad_norm": 0.2826683223247528, "learning_rate": 5.894076098608081e-07, "loss": 0.3457, "step": 47948 }, { "epoch": 0.89019504461186, "grad_norm": 0.5661956071853638, "learning_rate": 5.890131179965353e-07, "loss": 0.2071, "step": 47950 }, { "epoch": 0.8902321747492787, "grad_norm": 0.27288126945495605, "learning_rate": 5.886187541876054e-07, "loss": 0.0497, "step": 47952 }, { "epoch": 0.8902693048866974, "grad_norm": 0.3725036680698395, "learning_rate": 5.882245184393831e-07, "loss": 0.3025, "step": 47954 }, { "epoch": 0.890306435024116, "grad_norm": 0.531634509563446, "learning_rate": 5.878304107572341e-07, "loss": 0.2664, "step": 47956 }, { "epoch": 0.8903435651615347, "grad_norm": 0.3659232556819916, "learning_rate": 5.87436431146522e-07, "loss": 0.1179, "step": 47958 }, { "epoch": 0.8903806952989533, "grad_norm": 1.1382054090499878, "learning_rate": 5.870425796126067e-07, "loss": 0.221, "step": 47960 }, { "epoch": 0.8904178254363719, "grad_norm": 0.4274129867553711, "learning_rate": 5.866488561608442e-07, "loss": 0.1932, "step": 47962 }, { "epoch": 0.8904549555737906, "grad_norm": 0.36734524369239807, "learning_rate": 5.862552607965954e-07, "loss": 0.303, "step": 47964 }, { "epoch": 0.8904920857112092, "grad_norm": 0.37382832169532776, "learning_rate": 5.858617935252142e-07, "loss": 0.3215, "step": 47966 }, { "epoch": 0.8905292158486279, "grad_norm": 0.28260767459869385, "learning_rate": 5.854684543520561e-07, "loss": 0.4027, "step": 47968 }, { "epoch": 0.8905663459860464, "grad_norm": 0.5251832008361816, "learning_rate": 5.850752432824735e-07, "loss": 0.2231, "step": 47970 }, { "epoch": 0.8906034761234651, "grad_norm": 0.3341313898563385, "learning_rate": 5.846821603218145e-07, "loss": 0.2017, "step": 47972 }, { "epoch": 0.8906406062608838, "grad_norm": 0.41732171177864075, "learning_rate": 5.842892054754301e-07, "loss": 0.2607, "step": 47974 }, { "epoch": 0.8906777363983024, "grad_norm": 0.18817701935768127, "learning_rate": 5.838963787486651e-07, "loss": 0.2496, "step": 47976 }, { "epoch": 0.8907148665357211, "grad_norm": 0.5483757853507996, "learning_rate": 5.835036801468663e-07, "loss": 0.4654, "step": 47978 }, { "epoch": 0.8907519966731396, "grad_norm": 0.5468409061431885, "learning_rate": 5.831111096753784e-07, "loss": 0.4133, "step": 47980 }, { "epoch": 0.8907891268105583, "grad_norm": 0.41872891783714294, "learning_rate": 5.827186673395401e-07, "loss": 0.1973, "step": 47982 }, { "epoch": 0.890826256947977, "grad_norm": 0.3206910490989685, "learning_rate": 5.823263531446932e-07, "loss": 0.207, "step": 47984 }, { "epoch": 0.8908633870853956, "grad_norm": 0.320258229970932, "learning_rate": 5.819341670961753e-07, "loss": 0.2711, "step": 47986 }, { "epoch": 0.8909005172228143, "grad_norm": 0.43495386838912964, "learning_rate": 5.815421091993223e-07, "loss": 0.315, "step": 47988 }, { "epoch": 0.8909376473602328, "grad_norm": 0.36082538962364197, "learning_rate": 5.811501794594698e-07, "loss": 0.417, "step": 47990 }, { "epoch": 0.8909747774976515, "grad_norm": 0.3062189817428589, "learning_rate": 5.807583778819503e-07, "loss": 0.1601, "step": 47992 }, { "epoch": 0.8910119076350702, "grad_norm": 0.2824788987636566, "learning_rate": 5.803667044720951e-07, "loss": 0.2261, "step": 47994 }, { "epoch": 0.8910490377724888, "grad_norm": 0.30771294236183167, "learning_rate": 5.799751592352366e-07, "loss": 0.208, "step": 47996 }, { "epoch": 0.8910861679099075, "grad_norm": 0.35971924662590027, "learning_rate": 5.795837421766981e-07, "loss": 0.1885, "step": 47998 }, { "epoch": 0.891123298047326, "grad_norm": 0.22116224467754364, "learning_rate": 5.791924533018068e-07, "loss": 0.1325, "step": 48000 }, { "epoch": 0.8911604281847447, "grad_norm": 0.4358120858669281, "learning_rate": 5.788012926158892e-07, "loss": 0.2435, "step": 48002 }, { "epoch": 0.8911975583221633, "grad_norm": 0.3738580048084259, "learning_rate": 5.784102601242659e-07, "loss": 0.3359, "step": 48004 }, { "epoch": 0.891234688459582, "grad_norm": 0.4875073730945587, "learning_rate": 5.780193558322567e-07, "loss": 0.1977, "step": 48006 }, { "epoch": 0.8912718185970007, "grad_norm": 0.4058280289173126, "learning_rate": 5.776285797451841e-07, "loss": 0.3417, "step": 48008 }, { "epoch": 0.8913089487344192, "grad_norm": 0.5195993185043335, "learning_rate": 5.772379318683619e-07, "loss": 0.315, "step": 48010 }, { "epoch": 0.8913460788718379, "grad_norm": 0.42354103922843933, "learning_rate": 5.768474122071066e-07, "loss": 0.3266, "step": 48012 }, { "epoch": 0.8913832090092565, "grad_norm": 0.7197782397270203, "learning_rate": 5.76457020766733e-07, "loss": 0.258, "step": 48014 }, { "epoch": 0.8914203391466752, "grad_norm": 0.21858695149421692, "learning_rate": 5.760667575525513e-07, "loss": 0.1692, "step": 48016 }, { "epoch": 0.8914574692840939, "grad_norm": 0.2296631634235382, "learning_rate": 5.75676622569874e-07, "loss": 0.0613, "step": 48018 }, { "epoch": 0.8914945994215124, "grad_norm": 0.3595762848854065, "learning_rate": 5.7528661582401e-07, "loss": 0.1777, "step": 48020 }, { "epoch": 0.8915317295589311, "grad_norm": 0.4881545901298523, "learning_rate": 5.748967373202641e-07, "loss": 0.3185, "step": 48022 }, { "epoch": 0.8915688596963497, "grad_norm": 0.376855731010437, "learning_rate": 5.74506987063943e-07, "loss": 0.1649, "step": 48024 }, { "epoch": 0.8916059898337684, "grad_norm": 0.4989224970340729, "learning_rate": 5.741173650603482e-07, "loss": 0.3102, "step": 48026 }, { "epoch": 0.8916431199711871, "grad_norm": 0.5492688417434692, "learning_rate": 5.737278713147809e-07, "loss": 0.3578, "step": 48028 }, { "epoch": 0.8916802501086056, "grad_norm": 0.24938735365867615, "learning_rate": 5.733385058325436e-07, "loss": 0.2354, "step": 48030 }, { "epoch": 0.8917173802460243, "grad_norm": 0.4122154116630554, "learning_rate": 5.729492686189331e-07, "loss": 0.2711, "step": 48032 }, { "epoch": 0.8917545103834429, "grad_norm": 0.20196297764778137, "learning_rate": 5.725601596792474e-07, "loss": 0.2054, "step": 48034 }, { "epoch": 0.8917916405208616, "grad_norm": 0.3957515060901642, "learning_rate": 5.721711790187768e-07, "loss": 0.2445, "step": 48036 }, { "epoch": 0.8918287706582803, "grad_norm": 0.5129810571670532, "learning_rate": 5.71782326642818e-07, "loss": 0.374, "step": 48038 }, { "epoch": 0.8918659007956988, "grad_norm": 0.400850385427475, "learning_rate": 5.713936025566602e-07, "loss": 0.0983, "step": 48040 }, { "epoch": 0.8919030309331175, "grad_norm": 0.7423719763755798, "learning_rate": 5.710050067655959e-07, "loss": 0.3056, "step": 48042 }, { "epoch": 0.8919401610705361, "grad_norm": 0.2746523916721344, "learning_rate": 5.706165392749075e-07, "loss": 0.2498, "step": 48044 }, { "epoch": 0.8919772912079548, "grad_norm": 0.3743668496608734, "learning_rate": 5.702282000898863e-07, "loss": 0.1756, "step": 48046 }, { "epoch": 0.8920144213453735, "grad_norm": 0.3331935405731201, "learning_rate": 5.698399892158124e-07, "loss": 0.0956, "step": 48048 }, { "epoch": 0.892051551482792, "grad_norm": 0.3176688551902771, "learning_rate": 5.694519066579695e-07, "loss": 0.215, "step": 48050 }, { "epoch": 0.8920886816202107, "grad_norm": 0.4722760319709778, "learning_rate": 5.690639524216379e-07, "loss": 0.3981, "step": 48052 }, { "epoch": 0.8921258117576293, "grad_norm": 0.5450557470321655, "learning_rate": 5.686761265120977e-07, "loss": 0.1765, "step": 48054 }, { "epoch": 0.892162941895048, "grad_norm": 0.3873656094074249, "learning_rate": 5.682884289346247e-07, "loss": 0.1573, "step": 48056 }, { "epoch": 0.8922000720324665, "grad_norm": 0.37894609570503235, "learning_rate": 5.679008596944969e-07, "loss": 0.3798, "step": 48058 }, { "epoch": 0.8922372021698852, "grad_norm": 0.5657567977905273, "learning_rate": 5.675134187969833e-07, "loss": 0.4067, "step": 48060 }, { "epoch": 0.8922743323073039, "grad_norm": 0.27479425072669983, "learning_rate": 5.671261062473599e-07, "loss": 0.1712, "step": 48062 }, { "epoch": 0.8923114624447225, "grad_norm": 0.3373755216598511, "learning_rate": 5.667389220508946e-07, "loss": 0.281, "step": 48064 }, { "epoch": 0.8923485925821412, "grad_norm": 0.23779384791851044, "learning_rate": 5.663518662128586e-07, "loss": 0.1429, "step": 48066 }, { "epoch": 0.8923857227195597, "grad_norm": 0.6319172978401184, "learning_rate": 5.659649387385158e-07, "loss": 0.4282, "step": 48068 }, { "epoch": 0.8924228528569784, "grad_norm": 0.4576117992401123, "learning_rate": 5.655781396331306e-07, "loss": 0.3398, "step": 48070 }, { "epoch": 0.8924599829943971, "grad_norm": 0.36553531885147095, "learning_rate": 5.6519146890197e-07, "loss": 0.4287, "step": 48072 }, { "epoch": 0.8924971131318157, "grad_norm": 0.4878500998020172, "learning_rate": 5.64804926550292e-07, "loss": 0.2953, "step": 48074 }, { "epoch": 0.8925342432692344, "grad_norm": 0.4361942410469055, "learning_rate": 5.644185125833557e-07, "loss": 0.2574, "step": 48076 }, { "epoch": 0.8925713734066529, "grad_norm": 0.16884243488311768, "learning_rate": 5.640322270064214e-07, "loss": 0.075, "step": 48078 }, { "epoch": 0.8926085035440716, "grad_norm": 0.2505477964878082, "learning_rate": 5.636460698247448e-07, "loss": 0.3663, "step": 48080 }, { "epoch": 0.8926456336814903, "grad_norm": 0.4852755069732666, "learning_rate": 5.632600410435796e-07, "loss": 0.379, "step": 48082 }, { "epoch": 0.8926827638189089, "grad_norm": 0.24749785661697388, "learning_rate": 5.628741406681815e-07, "loss": 0.194, "step": 48084 }, { "epoch": 0.8927198939563276, "grad_norm": 0.4386451542377472, "learning_rate": 5.624883687037964e-07, "loss": 0.3711, "step": 48086 }, { "epoch": 0.8927570240937461, "grad_norm": 0.27535343170166016, "learning_rate": 5.621027251556777e-07, "loss": 0.0969, "step": 48088 }, { "epoch": 0.8927941542311648, "grad_norm": 0.38260334730148315, "learning_rate": 5.617172100290691e-07, "loss": 0.2146, "step": 48090 }, { "epoch": 0.8928312843685835, "grad_norm": 0.43588241934776306, "learning_rate": 5.613318233292187e-07, "loss": 0.4238, "step": 48092 }, { "epoch": 0.8928684145060021, "grad_norm": 0.39827802777290344, "learning_rate": 5.609465650613699e-07, "loss": 0.2679, "step": 48094 }, { "epoch": 0.8929055446434208, "grad_norm": 0.5190327167510986, "learning_rate": 5.605614352307675e-07, "loss": 0.3406, "step": 48096 }, { "epoch": 0.8929426747808393, "grad_norm": 0.8550142049789429, "learning_rate": 5.601764338426474e-07, "loss": 0.3625, "step": 48098 }, { "epoch": 0.892979804918258, "grad_norm": 0.5571878552436829, "learning_rate": 5.597915609022498e-07, "loss": 0.2583, "step": 48100 }, { "epoch": 0.8930169350556766, "grad_norm": 0.4084494709968567, "learning_rate": 5.594068164148137e-07, "loss": 0.3143, "step": 48102 }, { "epoch": 0.8930540651930953, "grad_norm": 0.35584506392478943, "learning_rate": 5.590222003855716e-07, "loss": 0.2047, "step": 48104 }, { "epoch": 0.893091195330514, "grad_norm": 0.3852481544017792, "learning_rate": 5.586377128197606e-07, "loss": 0.3441, "step": 48106 }, { "epoch": 0.8931283254679325, "grad_norm": 0.36523163318634033, "learning_rate": 5.582533537226087e-07, "loss": 0.2334, "step": 48108 }, { "epoch": 0.8931654556053512, "grad_norm": 0.6220179200172424, "learning_rate": 5.578691230993493e-07, "loss": 0.188, "step": 48110 }, { "epoch": 0.8932025857427698, "grad_norm": 0.3718593716621399, "learning_rate": 5.574850209552063e-07, "loss": 0.2723, "step": 48112 }, { "epoch": 0.8932397158801885, "grad_norm": 0.2846348285675049, "learning_rate": 5.571010472954097e-07, "loss": 0.2902, "step": 48114 }, { "epoch": 0.8932768460176072, "grad_norm": 0.49333465099334717, "learning_rate": 5.567172021251821e-07, "loss": 0.4848, "step": 48116 }, { "epoch": 0.8933139761550257, "grad_norm": 0.5860490798950195, "learning_rate": 5.563334854497482e-07, "loss": 0.4137, "step": 48118 }, { "epoch": 0.8933511062924444, "grad_norm": 0.3317994773387909, "learning_rate": 5.559498972743272e-07, "loss": 0.2115, "step": 48120 }, { "epoch": 0.893388236429863, "grad_norm": 0.5351721048355103, "learning_rate": 5.555664376041426e-07, "loss": 0.2338, "step": 48122 }, { "epoch": 0.8934253665672817, "grad_norm": 0.3390806019306183, "learning_rate": 5.55183106444408e-07, "loss": 0.2585, "step": 48124 }, { "epoch": 0.8934624967047003, "grad_norm": 0.44500523805618286, "learning_rate": 5.547999038003404e-07, "loss": 0.2105, "step": 48126 }, { "epoch": 0.8934996268421189, "grad_norm": 0.27266427874565125, "learning_rate": 5.544168296771557e-07, "loss": 0.1557, "step": 48128 }, { "epoch": 0.8935367569795376, "grad_norm": 0.5000883936882019, "learning_rate": 5.540338840800641e-07, "loss": 0.255, "step": 48130 }, { "epoch": 0.8935738871169562, "grad_norm": 0.4541560709476471, "learning_rate": 5.53651067014277e-07, "loss": 0.2206, "step": 48132 }, { "epoch": 0.8936110172543749, "grad_norm": 0.6393914818763733, "learning_rate": 5.532683784850057e-07, "loss": 0.171, "step": 48134 }, { "epoch": 0.8936481473917935, "grad_norm": 0.300985187292099, "learning_rate": 5.528858184974539e-07, "loss": 0.1264, "step": 48136 }, { "epoch": 0.8936852775292121, "grad_norm": 0.5072539448738098, "learning_rate": 5.525033870568286e-07, "loss": 0.4321, "step": 48138 }, { "epoch": 0.8937224076666308, "grad_norm": 0.38438692688941956, "learning_rate": 5.521210841683333e-07, "loss": 0.1433, "step": 48140 }, { "epoch": 0.8937595378040494, "grad_norm": 0.29546865820884705, "learning_rate": 5.517389098371706e-07, "loss": 0.2669, "step": 48142 }, { "epoch": 0.8937966679414681, "grad_norm": 0.26627209782600403, "learning_rate": 5.513568640685407e-07, "loss": 0.1287, "step": 48144 }, { "epoch": 0.8938337980788867, "grad_norm": 0.3024660646915436, "learning_rate": 5.50974946867644e-07, "loss": 0.3433, "step": 48146 }, { "epoch": 0.8938709282163053, "grad_norm": 0.2645702362060547, "learning_rate": 5.505931582396729e-07, "loss": 0.1188, "step": 48148 }, { "epoch": 0.893908058353724, "grad_norm": 0.21912391483783722, "learning_rate": 5.502114981898266e-07, "loss": 0.2388, "step": 48150 }, { "epoch": 0.8939451884911426, "grad_norm": 0.555739164352417, "learning_rate": 5.498299667232942e-07, "loss": 0.4138, "step": 48152 }, { "epoch": 0.8939823186285613, "grad_norm": 0.34279948472976685, "learning_rate": 5.494485638452696e-07, "loss": 0.3392, "step": 48154 }, { "epoch": 0.8940194487659798, "grad_norm": 0.2938547432422638, "learning_rate": 5.490672895609428e-07, "loss": 0.4222, "step": 48156 }, { "epoch": 0.8940565789033985, "grad_norm": 0.36053842306137085, "learning_rate": 5.486861438755009e-07, "loss": 0.1888, "step": 48158 }, { "epoch": 0.8940937090408172, "grad_norm": 0.39039263129234314, "learning_rate": 5.483051267941331e-07, "loss": 0.2318, "step": 48160 }, { "epoch": 0.8941308391782358, "grad_norm": 0.4221782088279724, "learning_rate": 5.479242383220196e-07, "loss": 0.1981, "step": 48162 }, { "epoch": 0.8941679693156545, "grad_norm": 0.4096442759037018, "learning_rate": 5.475434784643441e-07, "loss": 0.343, "step": 48164 }, { "epoch": 0.894205099453073, "grad_norm": 0.3916064202785492, "learning_rate": 5.471628472262891e-07, "loss": 0.2725, "step": 48166 }, { "epoch": 0.8942422295904917, "grad_norm": 0.38425859808921814, "learning_rate": 5.467823446130327e-07, "loss": 0.2844, "step": 48168 }, { "epoch": 0.8942793597279104, "grad_norm": 0.535915195941925, "learning_rate": 5.464019706297551e-07, "loss": 0.1161, "step": 48170 }, { "epoch": 0.894316489865329, "grad_norm": 0.4379023611545563, "learning_rate": 5.46021725281628e-07, "loss": 0.3075, "step": 48172 }, { "epoch": 0.8943536200027477, "grad_norm": 0.4208337962627411, "learning_rate": 5.456416085738292e-07, "loss": 0.2464, "step": 48174 }, { "epoch": 0.8943907501401662, "grad_norm": 0.6813507676124573, "learning_rate": 5.452616205115269e-07, "loss": 0.435, "step": 48176 }, { "epoch": 0.8944278802775849, "grad_norm": 0.3636918067932129, "learning_rate": 5.448817610998947e-07, "loss": 0.17, "step": 48178 }, { "epoch": 0.8944650104150036, "grad_norm": 0.3207550346851349, "learning_rate": 5.445020303440995e-07, "loss": 0.2887, "step": 48180 }, { "epoch": 0.8945021405524222, "grad_norm": 0.23400770127773285, "learning_rate": 5.441224282493096e-07, "loss": 0.3307, "step": 48182 }, { "epoch": 0.8945392706898408, "grad_norm": 0.463784396648407, "learning_rate": 5.437429548206907e-07, "loss": 0.2694, "step": 48184 }, { "epoch": 0.8945764008272594, "grad_norm": 0.36724919080734253, "learning_rate": 5.433636100634043e-07, "loss": 0.1757, "step": 48186 }, { "epoch": 0.8946135309646781, "grad_norm": 0.38956594467163086, "learning_rate": 5.429843939826129e-07, "loss": 0.2015, "step": 48188 }, { "epoch": 0.8946506611020968, "grad_norm": 0.34098151326179504, "learning_rate": 5.42605306583478e-07, "loss": 0.2966, "step": 48190 }, { "epoch": 0.8946877912395154, "grad_norm": 0.4431378245353699, "learning_rate": 5.422263478711575e-07, "loss": 0.2354, "step": 48192 }, { "epoch": 0.894724921376934, "grad_norm": 0.3878399133682251, "learning_rate": 5.418475178508054e-07, "loss": 0.3826, "step": 48194 }, { "epoch": 0.8947620515143526, "grad_norm": 0.21570952236652374, "learning_rate": 5.414688165275784e-07, "loss": 0.2003, "step": 48196 }, { "epoch": 0.8947991816517713, "grad_norm": 0.13801129162311554, "learning_rate": 5.410902439066302e-07, "loss": 0.0929, "step": 48198 }, { "epoch": 0.89483631178919, "grad_norm": 1.528064250946045, "learning_rate": 5.4071179999311e-07, "loss": 0.2319, "step": 48200 }, { "epoch": 0.8948734419266086, "grad_norm": 0.2992658317089081, "learning_rate": 5.403334847921681e-07, "loss": 0.3238, "step": 48202 }, { "epoch": 0.8949105720640272, "grad_norm": 0.4056158661842346, "learning_rate": 5.399552983089529e-07, "loss": 0.2226, "step": 48204 }, { "epoch": 0.8949477022014458, "grad_norm": 0.6549992561340332, "learning_rate": 5.395772405486099e-07, "loss": 0.3028, "step": 48206 }, { "epoch": 0.8949848323388645, "grad_norm": 0.4309667944908142, "learning_rate": 5.39199311516283e-07, "loss": 0.4625, "step": 48208 }, { "epoch": 0.8950219624762831, "grad_norm": 0.38017329573631287, "learning_rate": 5.388215112171169e-07, "loss": 0.3197, "step": 48210 }, { "epoch": 0.8950590926137018, "grad_norm": 0.365291029214859, "learning_rate": 5.384438396562497e-07, "loss": 0.1926, "step": 48212 }, { "epoch": 0.8950962227511204, "grad_norm": 0.35767385363578796, "learning_rate": 5.380662968388217e-07, "loss": 0.1937, "step": 48214 }, { "epoch": 0.895133352888539, "grad_norm": 0.3258022367954254, "learning_rate": 5.376888827699689e-07, "loss": 0.1238, "step": 48216 }, { "epoch": 0.8951704830259577, "grad_norm": 0.38413605093955994, "learning_rate": 5.373115974548271e-07, "loss": 0.2322, "step": 48218 }, { "epoch": 0.8952076131633763, "grad_norm": 0.2799179255962372, "learning_rate": 5.3693444089853e-07, "loss": 0.3519, "step": 48220 }, { "epoch": 0.895244743300795, "grad_norm": 0.21654854714870453, "learning_rate": 5.365574131062112e-07, "loss": 0.3351, "step": 48222 }, { "epoch": 0.8952818734382136, "grad_norm": 0.3522450625896454, "learning_rate": 5.361805140829989e-07, "loss": 0.3591, "step": 48224 }, { "epoch": 0.8953190035756322, "grad_norm": 0.7935491800308228, "learning_rate": 5.358037438340225e-07, "loss": 0.1182, "step": 48226 }, { "epoch": 0.8953561337130509, "grad_norm": 0.27044638991355896, "learning_rate": 5.354271023644087e-07, "loss": 0.1983, "step": 48228 }, { "epoch": 0.8953932638504695, "grad_norm": 0.35649988055229187, "learning_rate": 5.350505896792824e-07, "loss": 0.0751, "step": 48230 }, { "epoch": 0.8954303939878882, "grad_norm": 0.2961735725402832, "learning_rate": 5.346742057837661e-07, "loss": 0.3225, "step": 48232 }, { "epoch": 0.8954675241253068, "grad_norm": 0.3365193009376526, "learning_rate": 5.342979506829837e-07, "loss": 0.1909, "step": 48234 }, { "epoch": 0.8955046542627254, "grad_norm": 0.36907002329826355, "learning_rate": 5.339218243820532e-07, "loss": 0.3726, "step": 48236 }, { "epoch": 0.8955417844001441, "grad_norm": 0.31047526001930237, "learning_rate": 5.335458268860916e-07, "loss": 0.2041, "step": 48238 }, { "epoch": 0.8955789145375627, "grad_norm": 0.3974595069885254, "learning_rate": 5.331699582002148e-07, "loss": 0.1949, "step": 48240 }, { "epoch": 0.8956160446749813, "grad_norm": 0.26994895935058594, "learning_rate": 5.327942183295399e-07, "loss": 0.256, "step": 48242 }, { "epoch": 0.8956531748124, "grad_norm": 0.38351067900657654, "learning_rate": 5.32418607279177e-07, "loss": 0.4123, "step": 48244 }, { "epoch": 0.8956903049498186, "grad_norm": 0.3229714632034302, "learning_rate": 5.32043125054238e-07, "loss": 0.2973, "step": 48246 }, { "epoch": 0.8957274350872373, "grad_norm": 0.5426089763641357, "learning_rate": 5.31667771659834e-07, "loss": 0.4656, "step": 48248 }, { "epoch": 0.8957645652246559, "grad_norm": 0.4776879847049713, "learning_rate": 5.312925471010699e-07, "loss": 0.2504, "step": 48250 }, { "epoch": 0.8958016953620745, "grad_norm": 0.5026327967643738, "learning_rate": 5.309174513830518e-07, "loss": 0.1962, "step": 48252 }, { "epoch": 0.8958388254994931, "grad_norm": 0.42700284719467163, "learning_rate": 5.305424845108842e-07, "loss": 0.3031, "step": 48254 }, { "epoch": 0.8958759556369118, "grad_norm": 0.5479990839958191, "learning_rate": 5.301676464896699e-07, "loss": 0.1597, "step": 48256 }, { "epoch": 0.8959130857743305, "grad_norm": 0.3768352270126343, "learning_rate": 5.29792937324507e-07, "loss": 0.2337, "step": 48258 }, { "epoch": 0.8959502159117491, "grad_norm": 0.29962170124053955, "learning_rate": 5.29418357020498e-07, "loss": 0.3534, "step": 48260 }, { "epoch": 0.8959873460491677, "grad_norm": 0.46734151244163513, "learning_rate": 5.290439055827346e-07, "loss": 0.18, "step": 48262 }, { "epoch": 0.8960244761865863, "grad_norm": 0.35517027974128723, "learning_rate": 5.286695830163158e-07, "loss": 0.374, "step": 48264 }, { "epoch": 0.896061606324005, "grad_norm": 0.328797310590744, "learning_rate": 5.282953893263343e-07, "loss": 0.2609, "step": 48266 }, { "epoch": 0.8960987364614237, "grad_norm": 0.3383162021636963, "learning_rate": 5.279213245178816e-07, "loss": 0.2581, "step": 48268 }, { "epoch": 0.8961358665988423, "grad_norm": 0.3055339753627777, "learning_rate": 5.275473885960469e-07, "loss": 0.2914, "step": 48270 }, { "epoch": 0.8961729967362609, "grad_norm": 0.3556740880012512, "learning_rate": 5.271735815659195e-07, "loss": 0.3102, "step": 48272 }, { "epoch": 0.8962101268736795, "grad_norm": 0.352333128452301, "learning_rate": 5.267999034325855e-07, "loss": 0.1898, "step": 48274 }, { "epoch": 0.8962472570110982, "grad_norm": 0.28771793842315674, "learning_rate": 5.264263542011294e-07, "loss": 0.2877, "step": 48276 }, { "epoch": 0.8962843871485169, "grad_norm": 0.26419955492019653, "learning_rate": 5.260529338766341e-07, "loss": 0.2841, "step": 48278 }, { "epoch": 0.8963215172859355, "grad_norm": 0.456599622964859, "learning_rate": 5.256796424641808e-07, "loss": 0.4229, "step": 48280 }, { "epoch": 0.8963586474233541, "grad_norm": 0.5157799124717712, "learning_rate": 5.253064799688467e-07, "loss": 0.3144, "step": 48282 }, { "epoch": 0.8963957775607727, "grad_norm": 0.3839508295059204, "learning_rate": 5.249334463957134e-07, "loss": 0.2781, "step": 48284 }, { "epoch": 0.8964329076981914, "grad_norm": 0.4695371389389038, "learning_rate": 5.245605417498545e-07, "loss": 0.4401, "step": 48286 }, { "epoch": 0.8964700378356101, "grad_norm": 0.20175091922283173, "learning_rate": 5.241877660363437e-07, "loss": 0.1287, "step": 48288 }, { "epoch": 0.8965071679730287, "grad_norm": 0.5565608739852905, "learning_rate": 5.238151192602548e-07, "loss": 0.1541, "step": 48290 }, { "epoch": 0.8965442981104473, "grad_norm": 0.413633793592453, "learning_rate": 5.234426014266569e-07, "loss": 0.2915, "step": 48292 }, { "epoch": 0.8965814282478659, "grad_norm": 0.39114534854888916, "learning_rate": 5.230702125406195e-07, "loss": 0.4223, "step": 48294 }, { "epoch": 0.8966185583852846, "grad_norm": 0.3685234487056732, "learning_rate": 5.226979526072107e-07, "loss": 0.3139, "step": 48296 }, { "epoch": 0.8966556885227033, "grad_norm": 0.41675615310668945, "learning_rate": 5.223258216314941e-07, "loss": 0.328, "step": 48298 }, { "epoch": 0.8966928186601218, "grad_norm": 0.4430311620235443, "learning_rate": 5.219538196185359e-07, "loss": 0.3075, "step": 48300 }, { "epoch": 0.8967299487975405, "grad_norm": 0.28352394700050354, "learning_rate": 5.215819465733951e-07, "loss": 0.2421, "step": 48302 }, { "epoch": 0.8967670789349591, "grad_norm": 0.46194711327552795, "learning_rate": 5.212102025011323e-07, "loss": 0.1643, "step": 48304 }, { "epoch": 0.8968042090723778, "grad_norm": 0.49363917112350464, "learning_rate": 5.208385874068067e-07, "loss": 0.4399, "step": 48306 }, { "epoch": 0.8968413392097964, "grad_norm": 0.4763575792312622, "learning_rate": 5.204671012954743e-07, "loss": 0.1382, "step": 48308 }, { "epoch": 0.896878469347215, "grad_norm": 0.6243524551391602, "learning_rate": 5.20095744172191e-07, "loss": 0.1494, "step": 48310 }, { "epoch": 0.8969155994846337, "grad_norm": 0.27767983078956604, "learning_rate": 5.197245160420084e-07, "loss": 0.3101, "step": 48312 }, { "epoch": 0.8969527296220523, "grad_norm": 0.2878401279449463, "learning_rate": 5.193534169099778e-07, "loss": 0.324, "step": 48314 }, { "epoch": 0.896989859759471, "grad_norm": 0.3336949944496155, "learning_rate": 5.1898244678115e-07, "loss": 0.1184, "step": 48316 }, { "epoch": 0.8970269898968896, "grad_norm": 0.35772940516471863, "learning_rate": 5.186116056605728e-07, "loss": 0.252, "step": 48318 }, { "epoch": 0.8970641200343082, "grad_norm": 0.19549548625946045, "learning_rate": 5.182408935532912e-07, "loss": 0.1601, "step": 48320 }, { "epoch": 0.8971012501717269, "grad_norm": 0.47092145681381226, "learning_rate": 5.17870310464349e-07, "loss": 0.1275, "step": 48322 }, { "epoch": 0.8971383803091455, "grad_norm": 0.5483776926994324, "learning_rate": 5.17499856398792e-07, "loss": 0.4689, "step": 48324 }, { "epoch": 0.8971755104465642, "grad_norm": 0.3513294458389282, "learning_rate": 5.171295313616565e-07, "loss": 0.3317, "step": 48326 }, { "epoch": 0.8972126405839828, "grad_norm": 0.2194170206785202, "learning_rate": 5.167593353579836e-07, "loss": 0.2261, "step": 48328 }, { "epoch": 0.8972497707214014, "grad_norm": 0.2987911105155945, "learning_rate": 5.163892683928107e-07, "loss": 0.2115, "step": 48330 }, { "epoch": 0.8972869008588201, "grad_norm": 0.48497337102890015, "learning_rate": 5.160193304711725e-07, "loss": 0.4345, "step": 48332 }, { "epoch": 0.8973240309962387, "grad_norm": 0.63909512758255, "learning_rate": 5.156495215981039e-07, "loss": 0.3077, "step": 48334 }, { "epoch": 0.8973611611336574, "grad_norm": 0.7699863314628601, "learning_rate": 5.152798417786386e-07, "loss": 0.2063, "step": 48336 }, { "epoch": 0.897398291271076, "grad_norm": 0.4420967698097229, "learning_rate": 5.149102910178017e-07, "loss": 0.1443, "step": 48338 }, { "epoch": 0.8974354214084946, "grad_norm": 0.3161070644855499, "learning_rate": 5.145408693206255e-07, "loss": 0.3842, "step": 48340 }, { "epoch": 0.8974725515459133, "grad_norm": 0.29047083854675293, "learning_rate": 5.141715766921373e-07, "loss": 0.313, "step": 48342 }, { "epoch": 0.8975096816833319, "grad_norm": 0.7735493183135986, "learning_rate": 5.138024131373598e-07, "loss": 0.2356, "step": 48344 }, { "epoch": 0.8975468118207506, "grad_norm": 0.22955092787742615, "learning_rate": 5.134333786613155e-07, "loss": 0.4078, "step": 48346 }, { "epoch": 0.8975839419581692, "grad_norm": 0.3891378939151764, "learning_rate": 5.130644732690293e-07, "loss": 0.607, "step": 48348 }, { "epoch": 0.8976210720955878, "grad_norm": 0.2374430000782013, "learning_rate": 5.126956969655184e-07, "loss": 0.2733, "step": 48350 }, { "epoch": 0.8976582022330065, "grad_norm": 0.44031134247779846, "learning_rate": 5.123270497558009e-07, "loss": 0.3678, "step": 48352 }, { "epoch": 0.8976953323704251, "grad_norm": 0.41577571630477905, "learning_rate": 5.119585316448927e-07, "loss": 0.2075, "step": 48354 }, { "epoch": 0.8977324625078438, "grad_norm": 0.43238401412963867, "learning_rate": 5.11590142637809e-07, "loss": 0.2338, "step": 48356 }, { "epoch": 0.8977695926452623, "grad_norm": 0.3450582027435303, "learning_rate": 5.112218827395631e-07, "loss": 0.1607, "step": 48358 }, { "epoch": 0.897806722782681, "grad_norm": 0.4294826090335846, "learning_rate": 5.108537519551659e-07, "loss": 0.3226, "step": 48360 }, { "epoch": 0.8978438529200996, "grad_norm": 0.46395614743232727, "learning_rate": 5.104857502896254e-07, "loss": 0.1944, "step": 48362 }, { "epoch": 0.8978809830575183, "grad_norm": 0.5648952722549438, "learning_rate": 5.101178777479498e-07, "loss": 0.1904, "step": 48364 }, { "epoch": 0.897918113194937, "grad_norm": 0.3792033791542053, "learning_rate": 5.09750134335143e-07, "loss": 0.1977, "step": 48366 }, { "epoch": 0.8979552433323555, "grad_norm": 0.4301142990589142, "learning_rate": 5.093825200562108e-07, "loss": 0.4331, "step": 48368 }, { "epoch": 0.8979923734697742, "grad_norm": 0.5916496515274048, "learning_rate": 5.090150349161549e-07, "loss": 0.3613, "step": 48370 }, { "epoch": 0.8980295036071928, "grad_norm": 0.6146742701530457, "learning_rate": 5.086476789199757e-07, "loss": 0.3902, "step": 48372 }, { "epoch": 0.8980666337446115, "grad_norm": 0.4244139492511749, "learning_rate": 5.082804520726726e-07, "loss": 0.243, "step": 48374 }, { "epoch": 0.8981037638820302, "grad_norm": 0.5452306270599365, "learning_rate": 5.079133543792403e-07, "loss": 0.3112, "step": 48376 }, { "epoch": 0.8981408940194487, "grad_norm": 0.5164527893066406, "learning_rate": 5.075463858446749e-07, "loss": 0.2087, "step": 48378 }, { "epoch": 0.8981780241568674, "grad_norm": 0.36262694001197815, "learning_rate": 5.071795464739704e-07, "loss": 0.3659, "step": 48380 }, { "epoch": 0.898215154294286, "grad_norm": 0.2508375942707062, "learning_rate": 5.068128362721192e-07, "loss": 0.1814, "step": 48382 }, { "epoch": 0.8982522844317047, "grad_norm": 0.5330743193626404, "learning_rate": 5.064462552441085e-07, "loss": 0.2721, "step": 48384 }, { "epoch": 0.8982894145691234, "grad_norm": 0.19209109246730804, "learning_rate": 5.060798033949299e-07, "loss": 0.2464, "step": 48386 }, { "epoch": 0.8983265447065419, "grad_norm": 0.362298846244812, "learning_rate": 5.057134807295649e-07, "loss": 0.1363, "step": 48388 }, { "epoch": 0.8983636748439606, "grad_norm": 0.4734501242637634, "learning_rate": 5.053472872530018e-07, "loss": 0.3012, "step": 48390 }, { "epoch": 0.8984008049813792, "grad_norm": 0.35991984605789185, "learning_rate": 5.049812229702222e-07, "loss": 0.3406, "step": 48392 }, { "epoch": 0.8984379351187979, "grad_norm": 0.45024046301841736, "learning_rate": 5.046152878862065e-07, "loss": 0.4601, "step": 48394 }, { "epoch": 0.8984750652562166, "grad_norm": 0.33652982115745544, "learning_rate": 5.042494820059352e-07, "loss": 0.2312, "step": 48396 }, { "epoch": 0.8985121953936351, "grad_norm": 0.30189865827560425, "learning_rate": 5.038838053343864e-07, "loss": 0.2037, "step": 48398 }, { "epoch": 0.8985493255310538, "grad_norm": 0.2663382291793823, "learning_rate": 5.035182578765352e-07, "loss": 0.3458, "step": 48400 }, { "epoch": 0.8985864556684724, "grad_norm": 0.47923770546913147, "learning_rate": 5.03152839637353e-07, "loss": 0.354, "step": 48402 }, { "epoch": 0.8986235858058911, "grad_norm": 0.5102172493934631, "learning_rate": 5.027875506218172e-07, "loss": 0.2568, "step": 48404 }, { "epoch": 0.8986607159433097, "grad_norm": 0.3228839337825775, "learning_rate": 5.024223908348935e-07, "loss": 0.1378, "step": 48406 }, { "epoch": 0.8986978460807283, "grad_norm": 0.4514017403125763, "learning_rate": 5.020573602815526e-07, "loss": 0.2186, "step": 48408 }, { "epoch": 0.898734976218147, "grad_norm": 0.39048248529434204, "learning_rate": 5.016924589667615e-07, "loss": 0.3, "step": 48410 }, { "epoch": 0.8987721063555656, "grad_norm": 0.9240968823432922, "learning_rate": 5.013276868954864e-07, "loss": 0.2054, "step": 48412 }, { "epoch": 0.8988092364929843, "grad_norm": 0.5309037566184998, "learning_rate": 5.009630440726887e-07, "loss": 0.2719, "step": 48414 }, { "epoch": 0.8988463666304028, "grad_norm": 0.5092172026634216, "learning_rate": 5.0059853050333e-07, "loss": 0.2437, "step": 48416 }, { "epoch": 0.8988834967678215, "grad_norm": 0.4179666340351105, "learning_rate": 5.002341461923721e-07, "loss": 0.2189, "step": 48418 }, { "epoch": 0.8989206269052402, "grad_norm": 0.3597012162208557, "learning_rate": 4.99869891144773e-07, "loss": 0.1073, "step": 48420 }, { "epoch": 0.8989577570426588, "grad_norm": 0.4236372113227844, "learning_rate": 4.995057653654878e-07, "loss": 0.2731, "step": 48422 }, { "epoch": 0.8989948871800775, "grad_norm": 0.24848729372024536, "learning_rate": 4.991417688594724e-07, "loss": 0.5007, "step": 48424 }, { "epoch": 0.899032017317496, "grad_norm": 0.4562910497188568, "learning_rate": 4.987779016316807e-07, "loss": 0.4097, "step": 48426 }, { "epoch": 0.8990691474549147, "grad_norm": 0.6012105345726013, "learning_rate": 4.984141636870599e-07, "loss": 0.3095, "step": 48428 }, { "epoch": 0.8991062775923334, "grad_norm": 0.6279494166374207, "learning_rate": 4.980505550305626e-07, "loss": 0.4106, "step": 48430 }, { "epoch": 0.899143407729752, "grad_norm": 0.48101726174354553, "learning_rate": 4.976870756671348e-07, "loss": 0.1722, "step": 48432 }, { "epoch": 0.8991805378671707, "grad_norm": 0.4528746008872986, "learning_rate": 4.973237256017238e-07, "loss": 0.3341, "step": 48434 }, { "epoch": 0.8992176680045892, "grad_norm": 0.3787568211555481, "learning_rate": 4.969605048392733e-07, "loss": 0.4043, "step": 48436 }, { "epoch": 0.8992547981420079, "grad_norm": 0.5023171305656433, "learning_rate": 4.965974133847251e-07, "loss": 0.2142, "step": 48438 }, { "epoch": 0.8992919282794266, "grad_norm": 0.3304109275341034, "learning_rate": 4.962344512430206e-07, "loss": 0.4158, "step": 48440 }, { "epoch": 0.8993290584168452, "grad_norm": 0.5913110971450806, "learning_rate": 4.95871618419097e-07, "loss": 0.18, "step": 48442 }, { "epoch": 0.8993661885542639, "grad_norm": 0.22067943215370178, "learning_rate": 4.955089149178927e-07, "loss": 0.1343, "step": 48444 }, { "epoch": 0.8994033186916824, "grad_norm": 0.5273178219795227, "learning_rate": 4.951463407443446e-07, "loss": 0.1051, "step": 48446 }, { "epoch": 0.8994404488291011, "grad_norm": 0.4371103048324585, "learning_rate": 4.947838959033835e-07, "loss": 0.1502, "step": 48448 }, { "epoch": 0.8994775789665198, "grad_norm": 0.46968066692352295, "learning_rate": 4.94421580399943e-07, "loss": 0.4376, "step": 48450 }, { "epoch": 0.8995147091039384, "grad_norm": 0.33224064111709595, "learning_rate": 4.940593942389505e-07, "loss": 0.2132, "step": 48452 }, { "epoch": 0.8995518392413571, "grad_norm": 0.47070813179016113, "learning_rate": 4.936973374253362e-07, "loss": 0.2735, "step": 48454 }, { "epoch": 0.8995889693787756, "grad_norm": 0.4460408091545105, "learning_rate": 4.933354099640275e-07, "loss": 0.3298, "step": 48456 }, { "epoch": 0.8996260995161943, "grad_norm": 0.5226757526397705, "learning_rate": 4.929736118599471e-07, "loss": 0.3876, "step": 48458 }, { "epoch": 0.8996632296536129, "grad_norm": 0.49964573979377747, "learning_rate": 4.926119431180188e-07, "loss": 0.3311, "step": 48460 }, { "epoch": 0.8997003597910316, "grad_norm": 0.4073947072029114, "learning_rate": 4.922504037431664e-07, "loss": 0.4149, "step": 48462 }, { "epoch": 0.8997374899284503, "grad_norm": 0.2610103189945221, "learning_rate": 4.918889937403049e-07, "loss": 0.2189, "step": 48464 }, { "epoch": 0.8997746200658688, "grad_norm": 0.58796226978302, "learning_rate": 4.915277131143548e-07, "loss": 0.2779, "step": 48466 }, { "epoch": 0.8998117502032875, "grad_norm": 0.602207601070404, "learning_rate": 4.911665618702321e-07, "loss": 0.4202, "step": 48468 }, { "epoch": 0.8998488803407061, "grad_norm": 0.32652461528778076, "learning_rate": 4.908055400128486e-07, "loss": 0.3683, "step": 48470 }, { "epoch": 0.8998860104781248, "grad_norm": 0.22407186031341553, "learning_rate": 4.904446475471192e-07, "loss": 0.1038, "step": 48472 }, { "epoch": 0.8999231406155435, "grad_norm": 0.39134252071380615, "learning_rate": 4.900838844779543e-07, "loss": 0.2572, "step": 48474 }, { "epoch": 0.899960270752962, "grad_norm": 0.3454612195491791, "learning_rate": 4.897232508102611e-07, "loss": 0.3745, "step": 48476 }, { "epoch": 0.8999974008903807, "grad_norm": 0.4180029630661011, "learning_rate": 4.893627465489481e-07, "loss": 0.3087, "step": 48478 }, { "epoch": 0.9000345310277993, "grad_norm": 0.4802205264568329, "learning_rate": 4.8900237169892e-07, "loss": 0.2964, "step": 48480 }, { "epoch": 0.900071661165218, "grad_norm": 0.37667667865753174, "learning_rate": 4.88642126265082e-07, "loss": 0.2227, "step": 48482 }, { "epoch": 0.9001087913026367, "grad_norm": 0.517568051815033, "learning_rate": 4.882820102523334e-07, "loss": 0.2097, "step": 48484 }, { "epoch": 0.9001459214400552, "grad_norm": 0.2966461777687073, "learning_rate": 4.879220236655769e-07, "loss": 0.2065, "step": 48486 }, { "epoch": 0.9001830515774739, "grad_norm": 0.3757840394973755, "learning_rate": 4.875621665097107e-07, "loss": 0.2805, "step": 48488 }, { "epoch": 0.9002201817148925, "grad_norm": 0.30103421211242676, "learning_rate": 4.872024387896302e-07, "loss": 0.1797, "step": 48490 }, { "epoch": 0.9002573118523112, "grad_norm": 0.30888625979423523, "learning_rate": 4.868428405102287e-07, "loss": 0.3144, "step": 48492 }, { "epoch": 0.9002944419897299, "grad_norm": 0.3544558584690094, "learning_rate": 4.864833716764017e-07, "loss": 0.1703, "step": 48494 }, { "epoch": 0.9003315721271484, "grad_norm": 0.4850054085254669, "learning_rate": 4.861240322930394e-07, "loss": 0.3943, "step": 48496 }, { "epoch": 0.9003687022645671, "grad_norm": 0.47597599029541016, "learning_rate": 4.857648223650313e-07, "loss": 0.3764, "step": 48498 }, { "epoch": 0.9004058324019857, "grad_norm": 0.49046382308006287, "learning_rate": 4.854057418972669e-07, "loss": 0.0654, "step": 48500 }, { "epoch": 0.9004429625394044, "grad_norm": 0.5843791365623474, "learning_rate": 4.850467908946299e-07, "loss": 0.4429, "step": 48502 }, { "epoch": 0.900480092676823, "grad_norm": 0.7395893335342407, "learning_rate": 4.846879693620054e-07, "loss": 0.1962, "step": 48504 }, { "epoch": 0.9005172228142416, "grad_norm": 0.48328065872192383, "learning_rate": 4.843292773042762e-07, "loss": 0.3219, "step": 48506 }, { "epoch": 0.9005543529516603, "grad_norm": 0.4616237282752991, "learning_rate": 4.839707147263228e-07, "loss": 0.247, "step": 48508 }, { "epoch": 0.9005914830890789, "grad_norm": 0.29741501808166504, "learning_rate": 4.836122816330247e-07, "loss": 0.2574, "step": 48510 }, { "epoch": 0.9006286132264976, "grad_norm": 0.5822627544403076, "learning_rate": 4.8325397802926e-07, "loss": 0.3204, "step": 48512 }, { "epoch": 0.9006657433639161, "grad_norm": 0.3079688549041748, "learning_rate": 4.828958039199005e-07, "loss": 0.1709, "step": 48514 }, { "epoch": 0.9007028735013348, "grad_norm": 0.4466368556022644, "learning_rate": 4.825377593098213e-07, "loss": 0.3316, "step": 48516 }, { "epoch": 0.9007400036387535, "grad_norm": 0.530005156993866, "learning_rate": 4.82179844203896e-07, "loss": 0.2375, "step": 48518 }, { "epoch": 0.9007771337761721, "grad_norm": 0.7831388115882874, "learning_rate": 4.818220586069944e-07, "loss": 0.2368, "step": 48520 }, { "epoch": 0.9008142639135908, "grad_norm": 0.4331243336200714, "learning_rate": 4.814644025239834e-07, "loss": 0.4713, "step": 48522 }, { "epoch": 0.9008513940510093, "grad_norm": 0.39379361271858215, "learning_rate": 4.811068759597304e-07, "loss": 0.3175, "step": 48524 }, { "epoch": 0.900888524188428, "grad_norm": 0.3967888355255127, "learning_rate": 4.807494789191026e-07, "loss": 0.1991, "step": 48526 }, { "epoch": 0.9009256543258467, "grad_norm": 0.4109238386154175, "learning_rate": 4.803922114069592e-07, "loss": 0.302, "step": 48528 }, { "epoch": 0.9009627844632653, "grad_norm": 0.4563678205013275, "learning_rate": 4.800350734281633e-07, "loss": 0.2902, "step": 48530 }, { "epoch": 0.900999914600684, "grad_norm": 0.24118366837501526, "learning_rate": 4.796780649875754e-07, "loss": 0.3213, "step": 48532 }, { "epoch": 0.9010370447381025, "grad_norm": 0.45349451899528503, "learning_rate": 4.793211860900516e-07, "loss": 0.2975, "step": 48534 }, { "epoch": 0.9010741748755212, "grad_norm": 0.4888429641723633, "learning_rate": 4.789644367404489e-07, "loss": 0.2595, "step": 48536 }, { "epoch": 0.9011113050129399, "grad_norm": 0.3797314167022705, "learning_rate": 4.786078169436214e-07, "loss": 0.257, "step": 48538 }, { "epoch": 0.9011484351503585, "grad_norm": 0.38324347138404846, "learning_rate": 4.78251326704422e-07, "loss": 0.2267, "step": 48540 }, { "epoch": 0.9011855652877772, "grad_norm": 0.3864246606826782, "learning_rate": 4.778949660277e-07, "loss": 0.2727, "step": 48542 }, { "epoch": 0.9012226954251957, "grad_norm": 0.28465691208839417, "learning_rate": 4.775387349183058e-07, "loss": 0.4133, "step": 48544 }, { "epoch": 0.9012598255626144, "grad_norm": 0.2729600965976715, "learning_rate": 4.77182633381087e-07, "loss": 0.1565, "step": 48546 }, { "epoch": 0.9012969557000331, "grad_norm": 0.32435914874076843, "learning_rate": 4.768266614208872e-07, "loss": 0.2378, "step": 48548 }, { "epoch": 0.9013340858374517, "grad_norm": 0.33168768882751465, "learning_rate": 4.7647081904255375e-07, "loss": 0.312, "step": 48550 }, { "epoch": 0.9013712159748704, "grad_norm": 0.40228667855262756, "learning_rate": 4.7611510625092397e-07, "loss": 0.3253, "step": 48552 }, { "epoch": 0.9014083461122889, "grad_norm": 0.38454997539520264, "learning_rate": 4.757595230508427e-07, "loss": 0.3319, "step": 48554 }, { "epoch": 0.9014454762497076, "grad_norm": 0.5442188382148743, "learning_rate": 4.75404069447144e-07, "loss": 0.3861, "step": 48556 }, { "epoch": 0.9014826063871262, "grad_norm": 0.34160980582237244, "learning_rate": 4.7504874544466615e-07, "loss": 0.2008, "step": 48558 }, { "epoch": 0.9015197365245449, "grad_norm": 0.4575362205505371, "learning_rate": 4.7469355104824423e-07, "loss": 0.235, "step": 48560 }, { "epoch": 0.9015568666619636, "grad_norm": 0.5059443712234497, "learning_rate": 4.7433848626271204e-07, "loss": 0.1898, "step": 48562 }, { "epoch": 0.9015939967993821, "grad_norm": 0.5246545076370239, "learning_rate": 4.739835510929014e-07, "loss": 0.4661, "step": 48564 }, { "epoch": 0.9016311269368008, "grad_norm": 0.573365330696106, "learning_rate": 4.7362874554363944e-07, "loss": 0.1923, "step": 48566 }, { "epoch": 0.9016682570742194, "grad_norm": 0.5026063919067383, "learning_rate": 4.7327406961975574e-07, "loss": 0.3593, "step": 48568 }, { "epoch": 0.9017053872116381, "grad_norm": 0.35271692276000977, "learning_rate": 4.729195233260764e-07, "loss": 0.1609, "step": 48570 }, { "epoch": 0.9017425173490567, "grad_norm": 0.5646474361419678, "learning_rate": 4.7256510666742637e-07, "loss": 0.2552, "step": 48572 }, { "epoch": 0.9017796474864753, "grad_norm": 0.5500047206878662, "learning_rate": 4.7221081964862636e-07, "loss": 0.3627, "step": 48574 }, { "epoch": 0.901816777623894, "grad_norm": 0.3591936230659485, "learning_rate": 4.71856662274498e-07, "loss": 0.2086, "step": 48576 }, { "epoch": 0.9018539077613126, "grad_norm": 0.5220582485198975, "learning_rate": 4.715026345498608e-07, "loss": 0.2577, "step": 48578 }, { "epoch": 0.9018910378987313, "grad_norm": 0.33975934982299805, "learning_rate": 4.7114873647953087e-07, "loss": 0.1905, "step": 48580 }, { "epoch": 0.90192816803615, "grad_norm": 0.32543617486953735, "learning_rate": 4.7079496806832434e-07, "loss": 0.3184, "step": 48582 }, { "epoch": 0.9019652981735685, "grad_norm": 0.46310609579086304, "learning_rate": 4.7044132932105526e-07, "loss": 0.5333, "step": 48584 }, { "epoch": 0.9020024283109872, "grad_norm": 0.48632553219795227, "learning_rate": 4.700878202425352e-07, "loss": 0.2166, "step": 48586 }, { "epoch": 0.9020395584484058, "grad_norm": 0.42180293798446655, "learning_rate": 4.697344408375759e-07, "loss": 0.1619, "step": 48588 }, { "epoch": 0.9020766885858245, "grad_norm": 0.22348080575466156, "learning_rate": 4.6938119111098246e-07, "loss": 0.2101, "step": 48590 }, { "epoch": 0.9021138187232431, "grad_norm": 0.38572800159454346, "learning_rate": 4.6902807106756433e-07, "loss": 0.2411, "step": 48592 }, { "epoch": 0.9021509488606617, "grad_norm": 0.7147664427757263, "learning_rate": 4.686750807121254e-07, "loss": 0.302, "step": 48594 }, { "epoch": 0.9021880789980804, "grad_norm": 0.8897229433059692, "learning_rate": 4.683222200494697e-07, "loss": 0.3076, "step": 48596 }, { "epoch": 0.902225209135499, "grad_norm": 0.5248354077339172, "learning_rate": 4.6796948908439775e-07, "loss": 0.422, "step": 48598 }, { "epoch": 0.9022623392729177, "grad_norm": 0.32111644744873047, "learning_rate": 4.676168878217102e-07, "loss": 0.3104, "step": 48600 }, { "epoch": 0.9022994694103363, "grad_norm": 0.40496209263801575, "learning_rate": 4.672644162662021e-07, "loss": 0.2711, "step": 48602 }, { "epoch": 0.9023365995477549, "grad_norm": 0.3792456090450287, "learning_rate": 4.669120744226718e-07, "loss": 0.4368, "step": 48604 }, { "epoch": 0.9023737296851736, "grad_norm": 0.5568548440933228, "learning_rate": 4.6655986229591334e-07, "loss": 0.4542, "step": 48606 }, { "epoch": 0.9024108598225922, "grad_norm": 0.355587363243103, "learning_rate": 4.662077798907194e-07, "loss": 0.1886, "step": 48608 }, { "epoch": 0.9024479899600109, "grad_norm": 0.41871869564056396, "learning_rate": 4.658558272118796e-07, "loss": 0.4242, "step": 48610 }, { "epoch": 0.9024851200974294, "grad_norm": 0.3598412573337555, "learning_rate": 4.655040042641845e-07, "loss": 0.3388, "step": 48612 }, { "epoch": 0.9025222502348481, "grad_norm": 0.33449074625968933, "learning_rate": 4.651523110524225e-07, "loss": 0.2815, "step": 48614 }, { "epoch": 0.9025593803722668, "grad_norm": 0.4386392831802368, "learning_rate": 4.648007475813765e-07, "loss": 0.1842, "step": 48616 }, { "epoch": 0.9025965105096854, "grad_norm": 0.41692081093788147, "learning_rate": 4.6444931385583145e-07, "loss": 0.3576, "step": 48618 }, { "epoch": 0.902633640647104, "grad_norm": 0.2905862331390381, "learning_rate": 4.640980098805681e-07, "loss": 0.1182, "step": 48620 }, { "epoch": 0.9026707707845226, "grad_norm": 0.4227570593357086, "learning_rate": 4.637468356603669e-07, "loss": 0.291, "step": 48622 }, { "epoch": 0.9027079009219413, "grad_norm": 0.5767644047737122, "learning_rate": 4.6339579120000757e-07, "loss": 0.4244, "step": 48624 }, { "epoch": 0.90274503105936, "grad_norm": 0.6080482602119446, "learning_rate": 4.630448765042672e-07, "loss": 0.3619, "step": 48626 }, { "epoch": 0.9027821611967786, "grad_norm": 0.2802591919898987, "learning_rate": 4.6269409157791765e-07, "loss": 0.1157, "step": 48628 }, { "epoch": 0.9028192913341972, "grad_norm": 0.37483635544776917, "learning_rate": 4.623434364257351e-07, "loss": 0.2988, "step": 48630 }, { "epoch": 0.9028564214716158, "grad_norm": 0.2253986895084381, "learning_rate": 4.61992911052489e-07, "loss": 0.0688, "step": 48632 }, { "epoch": 0.9028935516090345, "grad_norm": 0.49188441038131714, "learning_rate": 4.6164251546295e-07, "loss": 0.2907, "step": 48634 }, { "epoch": 0.9029306817464532, "grad_norm": 0.6930758953094482, "learning_rate": 4.6129224966188656e-07, "loss": 0.4504, "step": 48636 }, { "epoch": 0.9029678118838718, "grad_norm": 0.3726752698421478, "learning_rate": 4.6094211365406263e-07, "loss": 0.1653, "step": 48638 }, { "epoch": 0.9030049420212904, "grad_norm": 0.46654459834098816, "learning_rate": 4.605921074442454e-07, "loss": 0.2352, "step": 48640 }, { "epoch": 0.903042072158709, "grad_norm": 0.5596013069152832, "learning_rate": 4.602422310371946e-07, "loss": 0.252, "step": 48642 }, { "epoch": 0.9030792022961277, "grad_norm": 0.47604313492774963, "learning_rate": 4.5989248443767175e-07, "loss": 0.2412, "step": 48644 }, { "epoch": 0.9031163324335464, "grad_norm": 0.4394693970680237, "learning_rate": 4.595428676504365e-07, "loss": 0.3459, "step": 48646 }, { "epoch": 0.903153462570965, "grad_norm": 0.23227445781230927, "learning_rate": 4.5919338068024623e-07, "loss": 0.0883, "step": 48648 }, { "epoch": 0.9031905927083836, "grad_norm": 0.4927299916744232, "learning_rate": 4.5884402353185477e-07, "loss": 0.3359, "step": 48650 }, { "epoch": 0.9032277228458022, "grad_norm": 0.29460251331329346, "learning_rate": 4.5849479621001944e-07, "loss": 0.1813, "step": 48652 }, { "epoch": 0.9032648529832209, "grad_norm": 0.2480081170797348, "learning_rate": 4.5814569871948876e-07, "loss": 0.1543, "step": 48654 }, { "epoch": 0.9033019831206396, "grad_norm": 0.38747185468673706, "learning_rate": 4.577967310650133e-07, "loss": 0.205, "step": 48656 }, { "epoch": 0.9033391132580582, "grad_norm": 0.4756328761577606, "learning_rate": 4.5744789325134373e-07, "loss": 0.3595, "step": 48658 }, { "epoch": 0.9033762433954768, "grad_norm": 0.48978331685066223, "learning_rate": 4.5709918528322293e-07, "loss": 0.2821, "step": 48660 }, { "epoch": 0.9034133735328954, "grad_norm": 0.43526962399482727, "learning_rate": 4.567506071653993e-07, "loss": 0.3355, "step": 48662 }, { "epoch": 0.9034505036703141, "grad_norm": 0.6052113175392151, "learning_rate": 4.5640215890261464e-07, "loss": 0.523, "step": 48664 }, { "epoch": 0.9034876338077327, "grad_norm": 0.5025112628936768, "learning_rate": 4.5605384049960954e-07, "loss": 0.3155, "step": 48666 }, { "epoch": 0.9035247639451514, "grad_norm": 0.5073603391647339, "learning_rate": 4.557056519611247e-07, "loss": 0.2931, "step": 48668 }, { "epoch": 0.90356189408257, "grad_norm": 0.4151134192943573, "learning_rate": 4.553575932918963e-07, "loss": 0.2682, "step": 48670 }, { "epoch": 0.9035990242199886, "grad_norm": 0.18232154846191406, "learning_rate": 4.5500966449666285e-07, "loss": 0.2648, "step": 48672 }, { "epoch": 0.9036361543574073, "grad_norm": 0.47252264618873596, "learning_rate": 4.54661865580156e-07, "loss": 0.2682, "step": 48674 }, { "epoch": 0.9036732844948259, "grad_norm": 0.33984145522117615, "learning_rate": 4.5431419654711205e-07, "loss": 0.2533, "step": 48676 }, { "epoch": 0.9037104146322446, "grad_norm": 0.43220630288124084, "learning_rate": 4.539666574022572e-07, "loss": 0.2848, "step": 48678 }, { "epoch": 0.9037475447696632, "grad_norm": 0.2816605865955353, "learning_rate": 4.5361924815032435e-07, "loss": 0.5339, "step": 48680 }, { "epoch": 0.9037846749070818, "grad_norm": 0.4373616576194763, "learning_rate": 4.532719687960363e-07, "loss": 0.229, "step": 48682 }, { "epoch": 0.9038218050445005, "grad_norm": 0.47878390550613403, "learning_rate": 4.5292481934412267e-07, "loss": 0.4327, "step": 48684 }, { "epoch": 0.9038589351819191, "grad_norm": 0.4835664629936218, "learning_rate": 4.525777997993042e-07, "loss": 0.3592, "step": 48686 }, { "epoch": 0.9038960653193377, "grad_norm": 0.35434237122535706, "learning_rate": 4.5223091016630474e-07, "loss": 0.2468, "step": 48688 }, { "epoch": 0.9039331954567564, "grad_norm": 0.2538731098175049, "learning_rate": 4.518841504498439e-07, "loss": 0.1439, "step": 48690 }, { "epoch": 0.903970325594175, "grad_norm": 0.40721482038497925, "learning_rate": 4.515375206546402e-07, "loss": 0.0177, "step": 48692 }, { "epoch": 0.9040074557315937, "grad_norm": 0.24740934371948242, "learning_rate": 4.5119102078540865e-07, "loss": 0.3613, "step": 48694 }, { "epoch": 0.9040445858690123, "grad_norm": 0.29808369278907776, "learning_rate": 4.5084465084686556e-07, "loss": 0.3033, "step": 48696 }, { "epoch": 0.904081716006431, "grad_norm": 0.4125361144542694, "learning_rate": 4.504984108437238e-07, "loss": 0.2456, "step": 48698 }, { "epoch": 0.9041188461438496, "grad_norm": 0.6495465040206909, "learning_rate": 4.5015230078069626e-07, "loss": 0.2548, "step": 48700 }, { "epoch": 0.9041559762812682, "grad_norm": 0.4618299901485443, "learning_rate": 4.498063206624892e-07, "loss": 0.2063, "step": 48702 }, { "epoch": 0.9041931064186869, "grad_norm": 0.7141796350479126, "learning_rate": 4.4946047049381323e-07, "loss": 0.3362, "step": 48704 }, { "epoch": 0.9042302365561055, "grad_norm": 0.38592690229415894, "learning_rate": 4.4911475027937137e-07, "loss": 0.1687, "step": 48706 }, { "epoch": 0.9042673666935241, "grad_norm": 0.5550637245178223, "learning_rate": 4.4876916002387086e-07, "loss": 0.2972, "step": 48708 }, { "epoch": 0.9043044968309427, "grad_norm": 0.3366844356060028, "learning_rate": 4.484236997320113e-07, "loss": 0.1932, "step": 48710 }, { "epoch": 0.9043416269683614, "grad_norm": 0.3520852029323578, "learning_rate": 4.480783694084956e-07, "loss": 0.2444, "step": 48712 }, { "epoch": 0.9043787571057801, "grad_norm": 0.46371978521347046, "learning_rate": 4.4773316905802335e-07, "loss": 0.1259, "step": 48714 }, { "epoch": 0.9044158872431987, "grad_norm": 0.440447598695755, "learning_rate": 4.4738809868528964e-07, "loss": 0.2993, "step": 48716 }, { "epoch": 0.9044530173806173, "grad_norm": 0.34427210688591003, "learning_rate": 4.470431582949897e-07, "loss": 0.1995, "step": 48718 }, { "epoch": 0.9044901475180359, "grad_norm": 0.4193849563598633, "learning_rate": 4.466983478918174e-07, "loss": 0.2159, "step": 48720 }, { "epoch": 0.9045272776554546, "grad_norm": 0.5327879786491394, "learning_rate": 4.4635366748046693e-07, "loss": 0.1666, "step": 48722 }, { "epoch": 0.9045644077928733, "grad_norm": 0.4924314022064209, "learning_rate": 4.4600911706562555e-07, "loss": 0.2975, "step": 48724 }, { "epoch": 0.9046015379302919, "grad_norm": 0.35008883476257324, "learning_rate": 4.45664696651984e-07, "loss": 0.2516, "step": 48726 }, { "epoch": 0.9046386680677105, "grad_norm": 0.644422173500061, "learning_rate": 4.4532040624422624e-07, "loss": 0.1305, "step": 48728 }, { "epoch": 0.9046757982051291, "grad_norm": 0.3314388394355774, "learning_rate": 4.449762458470375e-07, "loss": 0.1492, "step": 48730 }, { "epoch": 0.9047129283425478, "grad_norm": 0.339915007352829, "learning_rate": 4.4463221546510173e-07, "loss": 0.4234, "step": 48732 }, { "epoch": 0.9047500584799665, "grad_norm": 0.5737096071243286, "learning_rate": 4.442883151030997e-07, "loss": 0.1858, "step": 48734 }, { "epoch": 0.904787188617385, "grad_norm": 0.32395225763320923, "learning_rate": 4.439445447657109e-07, "loss": 0.2272, "step": 48736 }, { "epoch": 0.9048243187548037, "grad_norm": 0.2530769407749176, "learning_rate": 4.4360090445761397e-07, "loss": 0.0963, "step": 48738 }, { "epoch": 0.9048614488922223, "grad_norm": 0.5057935118675232, "learning_rate": 4.43257394183485e-07, "loss": 0.1918, "step": 48740 }, { "epoch": 0.904898579029641, "grad_norm": 0.25706174969673157, "learning_rate": 4.429140139479948e-07, "loss": 0.1854, "step": 48742 }, { "epoch": 0.9049357091670597, "grad_norm": 0.2767575681209564, "learning_rate": 4.425707637558208e-07, "loss": 0.1404, "step": 48744 }, { "epoch": 0.9049728393044782, "grad_norm": 0.40217137336730957, "learning_rate": 4.422276436116291e-07, "loss": 0.3098, "step": 48746 }, { "epoch": 0.9050099694418969, "grad_norm": 0.4474918842315674, "learning_rate": 4.418846535200894e-07, "loss": 0.411, "step": 48748 }, { "epoch": 0.9050470995793155, "grad_norm": 0.45403483510017395, "learning_rate": 4.4154179348587124e-07, "loss": 0.4259, "step": 48750 }, { "epoch": 0.9050842297167342, "grad_norm": 0.19300459325313568, "learning_rate": 4.411990635136387e-07, "loss": 0.2264, "step": 48752 }, { "epoch": 0.9051213598541529, "grad_norm": 0.44730132818222046, "learning_rate": 4.408564636080537e-07, "loss": 0.4131, "step": 48754 }, { "epoch": 0.9051584899915714, "grad_norm": 0.41435182094573975, "learning_rate": 4.405139937737801e-07, "loss": 0.206, "step": 48756 }, { "epoch": 0.9051956201289901, "grad_norm": 0.3289092481136322, "learning_rate": 4.401716540154766e-07, "loss": 0.2669, "step": 48758 }, { "epoch": 0.9052327502664087, "grad_norm": 0.46171995997428894, "learning_rate": 4.398294443378015e-07, "loss": 0.3089, "step": 48760 }, { "epoch": 0.9052698804038274, "grad_norm": 0.31057822704315186, "learning_rate": 4.394873647454112e-07, "loss": 0.2744, "step": 48762 }, { "epoch": 0.905307010541246, "grad_norm": 0.34856677055358887, "learning_rate": 4.3914541524296304e-07, "loss": 0.1834, "step": 48764 }, { "epoch": 0.9053441406786646, "grad_norm": 0.5252789855003357, "learning_rate": 4.388035958351078e-07, "loss": 0.0549, "step": 48766 }, { "epoch": 0.9053812708160833, "grad_norm": 0.3833712339401245, "learning_rate": 4.38461906526495e-07, "loss": 0.2972, "step": 48768 }, { "epoch": 0.9054184009535019, "grad_norm": 0.2802814543247223, "learning_rate": 4.381203473217754e-07, "loss": 0.2576, "step": 48770 }, { "epoch": 0.9054555310909206, "grad_norm": 0.3977065086364746, "learning_rate": 4.3777891822559646e-07, "loss": 0.3733, "step": 48772 }, { "epoch": 0.9054926612283392, "grad_norm": 0.2784053385257721, "learning_rate": 4.374376192426044e-07, "loss": 0.2598, "step": 48774 }, { "epoch": 0.9055297913657578, "grad_norm": 0.6640099883079529, "learning_rate": 4.370964503774433e-07, "loss": 0.2136, "step": 48776 }, { "epoch": 0.9055669215031765, "grad_norm": 0.5974552035331726, "learning_rate": 4.367554116347572e-07, "loss": 0.1108, "step": 48778 }, { "epoch": 0.9056040516405951, "grad_norm": 0.5266056656837463, "learning_rate": 4.3641450301918244e-07, "loss": 0.1761, "step": 48780 }, { "epoch": 0.9056411817780138, "grad_norm": 0.39355018734931946, "learning_rate": 4.360737245353608e-07, "loss": 0.357, "step": 48782 }, { "epoch": 0.9056783119154324, "grad_norm": 0.32268476486206055, "learning_rate": 4.357330761879275e-07, "loss": 0.3456, "step": 48784 }, { "epoch": 0.905715442052851, "grad_norm": 0.5596379637718201, "learning_rate": 4.35392557981521e-07, "loss": 0.3907, "step": 48786 }, { "epoch": 0.9057525721902697, "grad_norm": 0.41029390692710876, "learning_rate": 4.3505216992077103e-07, "loss": 0.1038, "step": 48788 }, { "epoch": 0.9057897023276883, "grad_norm": 0.8101235628128052, "learning_rate": 4.347119120103116e-07, "loss": 0.3004, "step": 48790 }, { "epoch": 0.905826832465107, "grad_norm": 0.3662549555301666, "learning_rate": 4.343717842547701e-07, "loss": 0.1158, "step": 48792 }, { "epoch": 0.9058639626025256, "grad_norm": 0.23307795822620392, "learning_rate": 4.3403178665877733e-07, "loss": 0.2989, "step": 48794 }, { "epoch": 0.9059010927399442, "grad_norm": 0.3276031017303467, "learning_rate": 4.336919192269573e-07, "loss": 0.3171, "step": 48796 }, { "epoch": 0.9059382228773629, "grad_norm": 0.3484291732311249, "learning_rate": 4.333521819639364e-07, "loss": 0.2481, "step": 48798 }, { "epoch": 0.9059753530147815, "grad_norm": 0.4199477434158325, "learning_rate": 4.3301257487433743e-07, "loss": 0.369, "step": 48800 }, { "epoch": 0.9060124831522002, "grad_norm": 0.3740865886211395, "learning_rate": 4.3267309796278136e-07, "loss": 0.3463, "step": 48802 }, { "epoch": 0.9060496132896187, "grad_norm": 0.5298373699188232, "learning_rate": 4.3233375123388656e-07, "loss": 0.2772, "step": 48804 }, { "epoch": 0.9060867434270374, "grad_norm": 0.46103841066360474, "learning_rate": 4.319945346922705e-07, "loss": 0.336, "step": 48806 }, { "epoch": 0.9061238735644561, "grad_norm": 0.40721645951271057, "learning_rate": 4.316554483425506e-07, "loss": 0.3884, "step": 48808 }, { "epoch": 0.9061610037018747, "grad_norm": 0.35696664452552795, "learning_rate": 4.313164921893376e-07, "loss": 0.2778, "step": 48810 }, { "epoch": 0.9061981338392934, "grad_norm": 0.4891475439071655, "learning_rate": 4.309776662372467e-07, "loss": 0.4953, "step": 48812 }, { "epoch": 0.906235263976712, "grad_norm": 0.4484405815601349, "learning_rate": 4.306389704908864e-07, "loss": 0.2813, "step": 48814 }, { "epoch": 0.9062723941141306, "grad_norm": 0.40783825516700745, "learning_rate": 4.3030040495486757e-07, "loss": 0.2898, "step": 48816 }, { "epoch": 0.9063095242515492, "grad_norm": 0.3756624460220337, "learning_rate": 4.299619696337942e-07, "loss": 0.3026, "step": 48818 }, { "epoch": 0.9063466543889679, "grad_norm": 0.2260306030511856, "learning_rate": 4.2962366453227377e-07, "loss": 0.3094, "step": 48820 }, { "epoch": 0.9063837845263866, "grad_norm": 0.39833173155784607, "learning_rate": 4.2928548965490814e-07, "loss": 0.1883, "step": 48822 }, { "epoch": 0.9064209146638051, "grad_norm": 0.23823966085910797, "learning_rate": 4.2894744500629915e-07, "loss": 0.0772, "step": 48824 }, { "epoch": 0.9064580448012238, "grad_norm": 0.3064398467540741, "learning_rate": 4.286095305910476e-07, "loss": 0.3645, "step": 48826 }, { "epoch": 0.9064951749386424, "grad_norm": 0.2669902443885803, "learning_rate": 4.282717464137509e-07, "loss": 0.3168, "step": 48828 }, { "epoch": 0.9065323050760611, "grad_norm": 0.35009729862213135, "learning_rate": 4.279340924790054e-07, "loss": 0.2819, "step": 48830 }, { "epoch": 0.9065694352134798, "grad_norm": 0.38395726680755615, "learning_rate": 4.275965687914041e-07, "loss": 0.31, "step": 48832 }, { "epoch": 0.9066065653508983, "grad_norm": 0.2580207586288452, "learning_rate": 4.272591753555411e-07, "loss": 0.1479, "step": 48834 }, { "epoch": 0.906643695488317, "grad_norm": 0.4046536087989807, "learning_rate": 4.26921912176006e-07, "loss": 0.3235, "step": 48836 }, { "epoch": 0.9066808256257356, "grad_norm": 0.3600980341434479, "learning_rate": 4.265847792573896e-07, "loss": 0.414, "step": 48838 }, { "epoch": 0.9067179557631543, "grad_norm": 0.3328019678592682, "learning_rate": 4.2624777660428053e-07, "loss": 0.2962, "step": 48840 }, { "epoch": 0.906755085900573, "grad_norm": 0.3814980983734131, "learning_rate": 4.259109042212606e-07, "loss": 0.1527, "step": 48842 }, { "epoch": 0.9067922160379915, "grad_norm": 0.3758595287799835, "learning_rate": 4.2557416211291614e-07, "loss": 0.3528, "step": 48844 }, { "epoch": 0.9068293461754102, "grad_norm": 0.3968588709831238, "learning_rate": 4.2523755028382795e-07, "loss": 0.2607, "step": 48846 }, { "epoch": 0.9068664763128288, "grad_norm": 0.5646262764930725, "learning_rate": 4.2490106873857907e-07, "loss": 0.1548, "step": 48848 }, { "epoch": 0.9069036064502475, "grad_norm": 0.38165464997291565, "learning_rate": 4.245647174817435e-07, "loss": 0.4244, "step": 48850 }, { "epoch": 0.9069407365876662, "grad_norm": 0.3180619776248932, "learning_rate": 4.242284965179011e-07, "loss": 0.3808, "step": 48852 }, { "epoch": 0.9069778667250847, "grad_norm": 0.5579198598861694, "learning_rate": 4.2389240585162694e-07, "loss": 0.2316, "step": 48854 }, { "epoch": 0.9070149968625034, "grad_norm": 0.3150941729545593, "learning_rate": 4.2355644548749296e-07, "loss": 0.2236, "step": 48856 }, { "epoch": 0.907052126999922, "grad_norm": 0.5002660155296326, "learning_rate": 4.2322061543007e-07, "loss": 0.1426, "step": 48858 }, { "epoch": 0.9070892571373407, "grad_norm": 0.5429056882858276, "learning_rate": 4.2288491568392873e-07, "loss": 0.4129, "step": 48860 }, { "epoch": 0.9071263872747592, "grad_norm": 0.3593650758266449, "learning_rate": 4.2254934625363787e-07, "loss": 0.1739, "step": 48862 }, { "epoch": 0.9071635174121779, "grad_norm": 0.48075568675994873, "learning_rate": 4.222139071437625e-07, "loss": 0.3493, "step": 48864 }, { "epoch": 0.9072006475495966, "grad_norm": 0.5086880922317505, "learning_rate": 4.21878598358868e-07, "loss": 0.2805, "step": 48866 }, { "epoch": 0.9072377776870152, "grad_norm": 0.26627203822135925, "learning_rate": 4.21543419903514e-07, "loss": 0.303, "step": 48868 }, { "epoch": 0.9072749078244339, "grad_norm": 0.38790273666381836, "learning_rate": 4.212083717822646e-07, "loss": 0.3013, "step": 48870 }, { "epoch": 0.9073120379618524, "grad_norm": 0.30810248851776123, "learning_rate": 4.208734539996773e-07, "loss": 0.1382, "step": 48872 }, { "epoch": 0.9073491680992711, "grad_norm": 0.48359960317611694, "learning_rate": 4.205386665603095e-07, "loss": 0.3672, "step": 48874 }, { "epoch": 0.9073862982366898, "grad_norm": 0.9051299691200256, "learning_rate": 4.202040094687154e-07, "loss": 0.3244, "step": 48876 }, { "epoch": 0.9074234283741084, "grad_norm": 0.22864651679992676, "learning_rate": 4.198694827294525e-07, "loss": 0.3099, "step": 48878 }, { "epoch": 0.9074605585115271, "grad_norm": 0.4373929500579834, "learning_rate": 4.195350863470682e-07, "loss": 0.2349, "step": 48880 }, { "epoch": 0.9074976886489456, "grad_norm": 0.358294814825058, "learning_rate": 4.192008203261144e-07, "loss": 0.1118, "step": 48882 }, { "epoch": 0.9075348187863643, "grad_norm": 0.22718486189842224, "learning_rate": 4.1886668467113976e-07, "loss": 0.1764, "step": 48884 }, { "epoch": 0.907571948923783, "grad_norm": 0.4960615634918213, "learning_rate": 4.185326793866906e-07, "loss": 0.4519, "step": 48886 }, { "epoch": 0.9076090790612016, "grad_norm": 0.46453729271888733, "learning_rate": 4.181988044773122e-07, "loss": 0.3567, "step": 48888 }, { "epoch": 0.9076462091986203, "grad_norm": 0.4068054258823395, "learning_rate": 4.1786505994754865e-07, "loss": 0.2467, "step": 48890 }, { "epoch": 0.9076833393360388, "grad_norm": 0.4161670506000519, "learning_rate": 4.175314458019375e-07, "loss": 0.166, "step": 48892 }, { "epoch": 0.9077204694734575, "grad_norm": 0.6045944690704346, "learning_rate": 4.1719796204502284e-07, "loss": 0.2245, "step": 48894 }, { "epoch": 0.9077575996108762, "grad_norm": 0.4963160753250122, "learning_rate": 4.168646086813388e-07, "loss": 0.4962, "step": 48896 }, { "epoch": 0.9077947297482948, "grad_norm": 0.3763733506202698, "learning_rate": 4.1653138571542296e-07, "loss": 0.32, "step": 48898 }, { "epoch": 0.9078318598857135, "grad_norm": 0.41575342416763306, "learning_rate": 4.1619829315180825e-07, "loss": 0.2322, "step": 48900 }, { "epoch": 0.907868990023132, "grad_norm": 0.6435741782188416, "learning_rate": 4.158653309950278e-07, "loss": 0.2855, "step": 48902 }, { "epoch": 0.9079061201605507, "grad_norm": 0.29290878772735596, "learning_rate": 4.155324992496146e-07, "loss": 0.1809, "step": 48904 }, { "epoch": 0.9079432502979694, "grad_norm": 0.4652480185031891, "learning_rate": 4.151997979200939e-07, "loss": 0.2356, "step": 48906 }, { "epoch": 0.907980380435388, "grad_norm": 0.27646857500076294, "learning_rate": 4.148672270109932e-07, "loss": 0.1333, "step": 48908 }, { "epoch": 0.9080175105728067, "grad_norm": 0.5264910459518433, "learning_rate": 4.1453478652683897e-07, "loss": 0.366, "step": 48910 }, { "epoch": 0.9080546407102252, "grad_norm": 0.33842936158180237, "learning_rate": 4.142024764721564e-07, "loss": 0.4331, "step": 48912 }, { "epoch": 0.9080917708476439, "grad_norm": 0.30302634835243225, "learning_rate": 4.138702968514641e-07, "loss": 0.3809, "step": 48914 }, { "epoch": 0.9081289009850625, "grad_norm": 0.2515854835510254, "learning_rate": 4.1353824766928396e-07, "loss": 0.2734, "step": 48916 }, { "epoch": 0.9081660311224812, "grad_norm": 0.3847886919975281, "learning_rate": 4.1320632893013244e-07, "loss": 0.1305, "step": 48918 }, { "epoch": 0.9082031612598999, "grad_norm": 0.3655715584754944, "learning_rate": 4.128745406385271e-07, "loss": 0.2396, "step": 48920 }, { "epoch": 0.9082402913973184, "grad_norm": 0.41779062151908875, "learning_rate": 4.1254288279898303e-07, "loss": 0.2914, "step": 48922 }, { "epoch": 0.9082774215347371, "grad_norm": 0.31139156222343445, "learning_rate": 4.122113554160123e-07, "loss": 0.224, "step": 48924 }, { "epoch": 0.9083145516721557, "grad_norm": 0.5558975338935852, "learning_rate": 4.118799584941258e-07, "loss": 0.3412, "step": 48926 }, { "epoch": 0.9083516818095744, "grad_norm": 0.25206711888313293, "learning_rate": 4.1154869203783423e-07, "loss": 0.198, "step": 48928 }, { "epoch": 0.9083888119469931, "grad_norm": 0.5340746641159058, "learning_rate": 4.1121755605164405e-07, "loss": 0.2658, "step": 48930 }, { "epoch": 0.9084259420844116, "grad_norm": 0.4344080686569214, "learning_rate": 4.1088655054006056e-07, "loss": 0.2056, "step": 48932 }, { "epoch": 0.9084630722218303, "grad_norm": 0.2510961592197418, "learning_rate": 4.1055567550759013e-07, "loss": 0.3277, "step": 48934 }, { "epoch": 0.9085002023592489, "grad_norm": 0.27498647570610046, "learning_rate": 4.102249309587314e-07, "loss": 0.1242, "step": 48936 }, { "epoch": 0.9085373324966676, "grad_norm": 0.35060954093933105, "learning_rate": 4.0989431689798633e-07, "loss": 0.4163, "step": 48938 }, { "epoch": 0.9085744626340863, "grad_norm": 0.7289052605628967, "learning_rate": 4.095638333298546e-07, "loss": 0.2473, "step": 48940 }, { "epoch": 0.9086115927715048, "grad_norm": 0.33154094219207764, "learning_rate": 4.0923348025883383e-07, "loss": 0.4336, "step": 48942 }, { "epoch": 0.9086487229089235, "grad_norm": 0.37724632024765015, "learning_rate": 4.0890325768941586e-07, "loss": 0.2498, "step": 48944 }, { "epoch": 0.9086858530463421, "grad_norm": 0.3685709536075592, "learning_rate": 4.0857316562609717e-07, "loss": 0.3557, "step": 48946 }, { "epoch": 0.9087229831837608, "grad_norm": 0.4333646297454834, "learning_rate": 4.082432040733664e-07, "loss": 0.3655, "step": 48948 }, { "epoch": 0.9087601133211795, "grad_norm": 0.3445500135421753, "learning_rate": 4.079133730357154e-07, "loss": 0.2101, "step": 48950 }, { "epoch": 0.908797243458598, "grad_norm": 0.6022473573684692, "learning_rate": 4.07583672517633e-07, "loss": 0.1489, "step": 48952 }, { "epoch": 0.9088343735960167, "grad_norm": 0.47513312101364136, "learning_rate": 4.072541025236043e-07, "loss": 0.3182, "step": 48954 }, { "epoch": 0.9088715037334353, "grad_norm": 0.36885449290275574, "learning_rate": 4.0692466305811363e-07, "loss": 0.2489, "step": 48956 }, { "epoch": 0.908908633870854, "grad_norm": 0.4169812798500061, "learning_rate": 4.0659535412564287e-07, "loss": 0.4074, "step": 48958 }, { "epoch": 0.9089457640082726, "grad_norm": 0.5923995971679688, "learning_rate": 4.0626617573067295e-07, "loss": 0.3072, "step": 48960 }, { "epoch": 0.9089828941456912, "grad_norm": 0.4372858703136444, "learning_rate": 4.059371278776847e-07, "loss": 0.2164, "step": 48962 }, { "epoch": 0.9090200242831099, "grad_norm": 0.5354858040809631, "learning_rate": 4.0560821057115454e-07, "loss": 0.2762, "step": 48964 }, { "epoch": 0.9090571544205285, "grad_norm": 0.4759131968021393, "learning_rate": 4.0527942381555886e-07, "loss": 0.2979, "step": 48966 }, { "epoch": 0.9090942845579472, "grad_norm": 0.2577284872531891, "learning_rate": 4.0495076761536966e-07, "loss": 0.1724, "step": 48968 }, { "epoch": 0.9091314146953657, "grad_norm": 0.3899078369140625, "learning_rate": 4.046222419750601e-07, "loss": 0.384, "step": 48970 }, { "epoch": 0.9091685448327844, "grad_norm": 0.467097669839859, "learning_rate": 4.042938468991009e-07, "loss": 0.247, "step": 48972 }, { "epoch": 0.9092056749702031, "grad_norm": 0.39911025762557983, "learning_rate": 4.0396558239195973e-07, "loss": 0.2942, "step": 48974 }, { "epoch": 0.9092428051076217, "grad_norm": 0.27958929538726807, "learning_rate": 4.0363744845810405e-07, "loss": 0.1008, "step": 48976 }, { "epoch": 0.9092799352450404, "grad_norm": 0.18853436410427094, "learning_rate": 4.03309445101997e-07, "loss": 0.2228, "step": 48978 }, { "epoch": 0.9093170653824589, "grad_norm": 0.5499475002288818, "learning_rate": 4.02981572328105e-07, "loss": 0.1579, "step": 48980 }, { "epoch": 0.9093541955198776, "grad_norm": 0.44660812616348267, "learning_rate": 4.0265383014088555e-07, "loss": 0.3534, "step": 48982 }, { "epoch": 0.9093913256572963, "grad_norm": 0.4025813639163971, "learning_rate": 4.023262185448007e-07, "loss": 0.1882, "step": 48984 }, { "epoch": 0.9094284557947149, "grad_norm": 0.5073099732398987, "learning_rate": 4.01998737544308e-07, "loss": 0.33, "step": 48986 }, { "epoch": 0.9094655859321336, "grad_norm": 0.37051287293434143, "learning_rate": 4.016713871438616e-07, "loss": 0.3965, "step": 48988 }, { "epoch": 0.9095027160695521, "grad_norm": 0.6194114089012146, "learning_rate": 4.0134416734791904e-07, "loss": 0.2328, "step": 48990 }, { "epoch": 0.9095398462069708, "grad_norm": 0.3913637101650238, "learning_rate": 4.010170781609313e-07, "loss": 0.2516, "step": 48992 }, { "epoch": 0.9095769763443895, "grad_norm": 0.597536563873291, "learning_rate": 4.006901195873469e-07, "loss": 0.1999, "step": 48994 }, { "epoch": 0.9096141064818081, "grad_norm": 0.31112778186798096, "learning_rate": 4.00363291631618e-07, "loss": 0.3492, "step": 48996 }, { "epoch": 0.9096512366192268, "grad_norm": 0.3149658441543579, "learning_rate": 4.0003659429819097e-07, "loss": 0.1568, "step": 48998 }, { "epoch": 0.9096883667566453, "grad_norm": 0.44385287165641785, "learning_rate": 3.9971002759151e-07, "loss": 0.2742, "step": 49000 }, { "epoch": 0.909725496894064, "grad_norm": 0.4968036711215973, "learning_rate": 3.993835915160194e-07, "loss": 0.2495, "step": 49002 }, { "epoch": 0.9097626270314827, "grad_norm": 0.31728169322013855, "learning_rate": 3.990572860761621e-07, "loss": 0.3141, "step": 49004 }, { "epoch": 0.9097997571689013, "grad_norm": 0.42736145853996277, "learning_rate": 3.9873111127637476e-07, "loss": 0.1715, "step": 49006 }, { "epoch": 0.90983688730632, "grad_norm": 0.4397246241569519, "learning_rate": 3.9840506712109927e-07, "loss": 0.2555, "step": 49008 }, { "epoch": 0.9098740174437385, "grad_norm": 0.2928980886936188, "learning_rate": 3.980791536147699e-07, "loss": 0.3205, "step": 49010 }, { "epoch": 0.9099111475811572, "grad_norm": 0.5345935225486755, "learning_rate": 3.97753370761822e-07, "loss": 0.1594, "step": 49012 }, { "epoch": 0.9099482777185758, "grad_norm": 0.40242403745651245, "learning_rate": 3.974277185666886e-07, "loss": 0.2283, "step": 49014 }, { "epoch": 0.9099854078559945, "grad_norm": 0.4264202415943146, "learning_rate": 3.971021970338029e-07, "loss": 0.3024, "step": 49016 }, { "epoch": 0.9100225379934131, "grad_norm": 0.43068256974220276, "learning_rate": 3.967768061675903e-07, "loss": 0.2781, "step": 49018 }, { "epoch": 0.9100596681308317, "grad_norm": 0.48553401231765747, "learning_rate": 3.964515459724827e-07, "loss": 0.3174, "step": 49020 }, { "epoch": 0.9100967982682504, "grad_norm": 0.47729888558387756, "learning_rate": 3.9612641645290106e-07, "loss": 0.3663, "step": 49022 }, { "epoch": 0.910133928405669, "grad_norm": 0.8070148229598999, "learning_rate": 3.9580141761327297e-07, "loss": 0.1778, "step": 49024 }, { "epoch": 0.9101710585430877, "grad_norm": 0.5114554762840271, "learning_rate": 3.9547654945801927e-07, "loss": 0.2263, "step": 49026 }, { "epoch": 0.9102081886805063, "grad_norm": 0.3578938841819763, "learning_rate": 3.9515181199156095e-07, "loss": 0.344, "step": 49028 }, { "epoch": 0.9102453188179249, "grad_norm": 0.7296833395957947, "learning_rate": 3.9482720521831773e-07, "loss": 0.1101, "step": 49030 }, { "epoch": 0.9102824489553436, "grad_norm": 0.46377238631248474, "learning_rate": 3.945027291427039e-07, "loss": 0.3456, "step": 49032 }, { "epoch": 0.9103195790927622, "grad_norm": 0.3783925175666809, "learning_rate": 3.941783837691371e-07, "loss": 0.3248, "step": 49034 }, { "epoch": 0.9103567092301809, "grad_norm": 0.41273605823516846, "learning_rate": 3.938541691020281e-07, "loss": 0.2744, "step": 49036 }, { "epoch": 0.9103938393675995, "grad_norm": 0.5084975957870483, "learning_rate": 3.9353008514579125e-07, "loss": 0.203, "step": 49038 }, { "epoch": 0.9104309695050181, "grad_norm": 0.38751763105392456, "learning_rate": 3.932061319048364e-07, "loss": 0.3638, "step": 49040 }, { "epoch": 0.9104680996424368, "grad_norm": 0.28096672892570496, "learning_rate": 3.9288230938357095e-07, "loss": 0.3452, "step": 49042 }, { "epoch": 0.9105052297798554, "grad_norm": 0.3201262056827545, "learning_rate": 3.9255861758639824e-07, "loss": 0.1162, "step": 49044 }, { "epoch": 0.9105423599172741, "grad_norm": 0.4111940264701843, "learning_rate": 3.9223505651772574e-07, "loss": 0.1254, "step": 49046 }, { "epoch": 0.9105794900546927, "grad_norm": 0.5038614273071289, "learning_rate": 3.9191162618195553e-07, "loss": 0.2394, "step": 49048 }, { "epoch": 0.9106166201921113, "grad_norm": 0.38402634859085083, "learning_rate": 3.9158832658348854e-07, "loss": 0.2193, "step": 49050 }, { "epoch": 0.91065375032953, "grad_norm": 0.48965758085250854, "learning_rate": 3.9126515772672345e-07, "loss": 0.2713, "step": 49052 }, { "epoch": 0.9106908804669486, "grad_norm": 0.24301788210868835, "learning_rate": 3.909421196160601e-07, "loss": 0.2083, "step": 49054 }, { "epoch": 0.9107280106043673, "grad_norm": 0.30776247382164, "learning_rate": 3.906192122558905e-07, "loss": 0.2991, "step": 49056 }, { "epoch": 0.9107651407417859, "grad_norm": 0.6241234540939331, "learning_rate": 3.9029643565061006e-07, "loss": 0.1556, "step": 49058 }, { "epoch": 0.9108022708792045, "grad_norm": 0.39345285296440125, "learning_rate": 3.8997378980460964e-07, "loss": 0.4543, "step": 49060 }, { "epoch": 0.9108394010166232, "grad_norm": 0.7426944375038147, "learning_rate": 3.896512747222836e-07, "loss": 0.2158, "step": 49062 }, { "epoch": 0.9108765311540418, "grad_norm": 0.36365142464637756, "learning_rate": 3.8932889040801503e-07, "loss": 0.3242, "step": 49064 }, { "epoch": 0.9109136612914605, "grad_norm": 0.25166434049606323, "learning_rate": 3.8900663686619266e-07, "loss": 0.2134, "step": 49066 }, { "epoch": 0.910950791428879, "grad_norm": 0.41440215706825256, "learning_rate": 3.886845141012041e-07, "loss": 0.2634, "step": 49068 }, { "epoch": 0.9109879215662977, "grad_norm": 0.4218740463256836, "learning_rate": 3.883625221174281e-07, "loss": 0.3879, "step": 49070 }, { "epoch": 0.9110250517037164, "grad_norm": 0.23077178001403809, "learning_rate": 3.8804066091924773e-07, "loss": 0.2697, "step": 49072 }, { "epoch": 0.911062181841135, "grad_norm": 0.3843381106853485, "learning_rate": 3.8771893051104406e-07, "loss": 0.2875, "step": 49074 }, { "epoch": 0.9110993119785536, "grad_norm": 0.9045064449310303, "learning_rate": 3.873973308971923e-07, "loss": 0.0794, "step": 49076 }, { "epoch": 0.9111364421159722, "grad_norm": 0.2245081067085266, "learning_rate": 3.8707586208207026e-07, "loss": 0.2742, "step": 49078 }, { "epoch": 0.9111735722533909, "grad_norm": 0.2783846855163574, "learning_rate": 3.867545240700532e-07, "loss": 0.2055, "step": 49080 }, { "epoch": 0.9112107023908096, "grad_norm": 0.3626197278499603, "learning_rate": 3.8643331686550987e-07, "loss": 0.2602, "step": 49082 }, { "epoch": 0.9112478325282282, "grad_norm": 0.5653846859931946, "learning_rate": 3.861122404728157e-07, "loss": 0.0797, "step": 49084 }, { "epoch": 0.9112849626656468, "grad_norm": 0.17433226108551025, "learning_rate": 3.857912948963349e-07, "loss": 0.2793, "step": 49086 }, { "epoch": 0.9113220928030654, "grad_norm": 0.4426973760128021, "learning_rate": 3.8547048014043744e-07, "loss": 0.4072, "step": 49088 }, { "epoch": 0.9113592229404841, "grad_norm": 0.43427780270576477, "learning_rate": 3.851497962094863e-07, "loss": 0.266, "step": 49090 }, { "epoch": 0.9113963530779028, "grad_norm": 0.6094973683357239, "learning_rate": 3.848292431078493e-07, "loss": 0.5219, "step": 49092 }, { "epoch": 0.9114334832153214, "grad_norm": 0.4207667112350464, "learning_rate": 3.8450882083988394e-07, "loss": 0.2247, "step": 49094 }, { "epoch": 0.91147061335274, "grad_norm": 0.48359277844429016, "learning_rate": 3.841885294099523e-07, "loss": 0.1877, "step": 49096 }, { "epoch": 0.9115077434901586, "grad_norm": 0.3277426064014435, "learning_rate": 3.83868368822411e-07, "loss": 0.2218, "step": 49098 }, { "epoch": 0.9115448736275773, "grad_norm": 0.33029836416244507, "learning_rate": 3.835483390816186e-07, "loss": 0.1454, "step": 49100 }, { "epoch": 0.911582003764996, "grad_norm": 0.6335600018501282, "learning_rate": 3.832284401919295e-07, "loss": 0.299, "step": 49102 }, { "epoch": 0.9116191339024146, "grad_norm": 0.3945949971675873, "learning_rate": 3.829086721576947e-07, "loss": 0.194, "step": 49104 }, { "epoch": 0.9116562640398332, "grad_norm": 0.41849344968795776, "learning_rate": 3.8258903498326727e-07, "loss": 0.4278, "step": 49106 }, { "epoch": 0.9116933941772518, "grad_norm": 0.5891265273094177, "learning_rate": 3.822695286729938e-07, "loss": 0.2895, "step": 49108 }, { "epoch": 0.9117305243146705, "grad_norm": 0.2867845892906189, "learning_rate": 3.819501532312242e-07, "loss": 0.2817, "step": 49110 }, { "epoch": 0.9117676544520892, "grad_norm": 0.35573574900627136, "learning_rate": 3.8163090866230377e-07, "loss": 0.3439, "step": 49112 }, { "epoch": 0.9118047845895078, "grad_norm": 0.3361247777938843, "learning_rate": 3.813117949705769e-07, "loss": 0.2586, "step": 49114 }, { "epoch": 0.9118419147269264, "grad_norm": 0.5398594737052917, "learning_rate": 3.8099281216038453e-07, "loss": 0.3119, "step": 49116 }, { "epoch": 0.911879044864345, "grad_norm": 0.4364480972290039, "learning_rate": 3.806739602360687e-07, "loss": 0.3951, "step": 49118 }, { "epoch": 0.9119161750017637, "grad_norm": 0.22037175297737122, "learning_rate": 3.8035523920196605e-07, "loss": 0.3499, "step": 49120 }, { "epoch": 0.9119533051391823, "grad_norm": 0.3939928114414215, "learning_rate": 3.8003664906241413e-07, "loss": 0.1694, "step": 49122 }, { "epoch": 0.911990435276601, "grad_norm": 0.39297863841056824, "learning_rate": 3.7971818982174834e-07, "loss": 0.2824, "step": 49124 }, { "epoch": 0.9120275654140196, "grad_norm": 0.36904844641685486, "learning_rate": 3.793998614843042e-07, "loss": 0.3212, "step": 49126 }, { "epoch": 0.9120646955514382, "grad_norm": 0.4340082108974457, "learning_rate": 3.790816640544082e-07, "loss": 0.3197, "step": 49128 }, { "epoch": 0.9121018256888569, "grad_norm": 0.3527190089225769, "learning_rate": 3.787635975363946e-07, "loss": 0.0881, "step": 49130 }, { "epoch": 0.9121389558262755, "grad_norm": 0.2179195135831833, "learning_rate": 3.784456619345889e-07, "loss": 0.2133, "step": 49132 }, { "epoch": 0.9121760859636941, "grad_norm": 0.5752211213111877, "learning_rate": 3.7812785725331756e-07, "loss": 0.3775, "step": 49134 }, { "epoch": 0.9122132161011128, "grad_norm": 0.3137204647064209, "learning_rate": 3.77810183496905e-07, "loss": 0.3147, "step": 49136 }, { "epoch": 0.9122503462385314, "grad_norm": 0.3498871922492981, "learning_rate": 3.7749264066967327e-07, "loss": 0.2392, "step": 49138 }, { "epoch": 0.9122874763759501, "grad_norm": 0.4859921634197235, "learning_rate": 3.771752287759445e-07, "loss": 0.1795, "step": 49140 }, { "epoch": 0.9123246065133687, "grad_norm": 0.515813410282135, "learning_rate": 3.768579478200374e-07, "loss": 0.4419, "step": 49142 }, { "epoch": 0.9123617366507873, "grad_norm": 0.3591817617416382, "learning_rate": 3.7654079780627075e-07, "loss": 0.2977, "step": 49144 }, { "epoch": 0.912398866788206, "grad_norm": 0.4341873228549957, "learning_rate": 3.7622377873895664e-07, "loss": 0.2359, "step": 49146 }, { "epoch": 0.9124359969256246, "grad_norm": 0.5378904342651367, "learning_rate": 3.759068906224106e-07, "loss": 0.087, "step": 49148 }, { "epoch": 0.9124731270630433, "grad_norm": 0.5372897386550903, "learning_rate": 3.7559013346094463e-07, "loss": 0.2695, "step": 49150 }, { "epoch": 0.9125102572004619, "grad_norm": 0.3655979633331299, "learning_rate": 3.752735072588676e-07, "loss": 0.2166, "step": 49152 }, { "epoch": 0.9125473873378805, "grad_norm": 0.45120567083358765, "learning_rate": 3.7495701202048817e-07, "loss": 0.1716, "step": 49154 }, { "epoch": 0.9125845174752992, "grad_norm": 0.8137903809547424, "learning_rate": 3.7464064775011633e-07, "loss": 0.3569, "step": 49156 }, { "epoch": 0.9126216476127178, "grad_norm": 0.3004733920097351, "learning_rate": 3.743244144520519e-07, "loss": 0.1815, "step": 49158 }, { "epoch": 0.9126587777501365, "grad_norm": 0.3396545648574829, "learning_rate": 3.740083121305993e-07, "loss": 0.2561, "step": 49160 }, { "epoch": 0.9126959078875551, "grad_norm": 0.400591641664505, "learning_rate": 3.736923407900606e-07, "loss": 0.3533, "step": 49162 }, { "epoch": 0.9127330380249737, "grad_norm": 0.5410148501396179, "learning_rate": 3.733765004347356e-07, "loss": 0.1558, "step": 49164 }, { "epoch": 0.9127701681623923, "grad_norm": 0.49031898379325867, "learning_rate": 3.730607910689221e-07, "loss": 0.3002, "step": 49166 }, { "epoch": 0.912807298299811, "grad_norm": 0.4291669428348541, "learning_rate": 3.7274521269691446e-07, "loss": 0.279, "step": 49168 }, { "epoch": 0.9128444284372297, "grad_norm": 0.3539947271347046, "learning_rate": 3.7242976532300913e-07, "loss": 0.22, "step": 49170 }, { "epoch": 0.9128815585746483, "grad_norm": 0.3904728889465332, "learning_rate": 3.7211444895149493e-07, "loss": 0.4332, "step": 49172 }, { "epoch": 0.9129186887120669, "grad_norm": 0.36876776814460754, "learning_rate": 3.717992635866641e-07, "loss": 0.3549, "step": 49174 }, { "epoch": 0.9129558188494855, "grad_norm": 0.38211798667907715, "learning_rate": 3.7148420923280527e-07, "loss": 0.3592, "step": 49176 }, { "epoch": 0.9129929489869042, "grad_norm": 0.31899258494377136, "learning_rate": 3.711692858942062e-07, "loss": 0.3396, "step": 49178 }, { "epoch": 0.9130300791243229, "grad_norm": 0.48154133558273315, "learning_rate": 3.7085449357515237e-07, "loss": 0.2658, "step": 49180 }, { "epoch": 0.9130672092617415, "grad_norm": 0.4364120662212372, "learning_rate": 3.7053983227992476e-07, "loss": 0.249, "step": 49182 }, { "epoch": 0.9131043393991601, "grad_norm": 0.342742383480072, "learning_rate": 3.702253020128066e-07, "loss": 0.1758, "step": 49184 }, { "epoch": 0.9131414695365787, "grad_norm": 0.4467649757862091, "learning_rate": 3.6991090277807785e-07, "loss": 0.3164, "step": 49186 }, { "epoch": 0.9131785996739974, "grad_norm": 0.6182628273963928, "learning_rate": 3.695966345800173e-07, "loss": 0.1756, "step": 49188 }, { "epoch": 0.9132157298114161, "grad_norm": 0.42345836758613586, "learning_rate": 3.6928249742289924e-07, "loss": 0.4751, "step": 49190 }, { "epoch": 0.9132528599488346, "grad_norm": 0.3199913203716278, "learning_rate": 3.689684913109992e-07, "loss": 0.1678, "step": 49192 }, { "epoch": 0.9132899900862533, "grad_norm": 0.5057575106620789, "learning_rate": 3.6865461624859043e-07, "loss": 0.2928, "step": 49194 }, { "epoch": 0.9133271202236719, "grad_norm": 0.27833977341651917, "learning_rate": 3.6834087223994174e-07, "loss": 0.2423, "step": 49196 }, { "epoch": 0.9133642503610906, "grad_norm": 0.4165906608104706, "learning_rate": 3.6802725928932417e-07, "loss": 0.2376, "step": 49198 }, { "epoch": 0.9134013804985093, "grad_norm": 0.36996570229530334, "learning_rate": 3.6771377740100424e-07, "loss": 0.3135, "step": 49200 }, { "epoch": 0.9134385106359278, "grad_norm": 0.3783271908760071, "learning_rate": 3.674004265792475e-07, "loss": 0.3374, "step": 49202 }, { "epoch": 0.9134756407733465, "grad_norm": 0.49599137902259827, "learning_rate": 3.6708720682831825e-07, "loss": 0.407, "step": 49204 }, { "epoch": 0.9135127709107651, "grad_norm": 0.6829783320426941, "learning_rate": 3.6677411815247975e-07, "loss": 0.5872, "step": 49206 }, { "epoch": 0.9135499010481838, "grad_norm": 0.5518897175788879, "learning_rate": 3.664611605559887e-07, "loss": 0.1766, "step": 49208 }, { "epoch": 0.9135870311856025, "grad_norm": 0.2539489269256592, "learning_rate": 3.661483340431071e-07, "loss": 0.2981, "step": 49210 }, { "epoch": 0.913624161323021, "grad_norm": 0.26490354537963867, "learning_rate": 3.658356386180895e-07, "loss": 0.1054, "step": 49212 }, { "epoch": 0.9136612914604397, "grad_norm": 0.3780193328857422, "learning_rate": 3.6552307428519005e-07, "loss": 0.1017, "step": 49214 }, { "epoch": 0.9136984215978583, "grad_norm": 0.43187615275382996, "learning_rate": 3.6521064104866333e-07, "loss": 0.1969, "step": 49216 }, { "epoch": 0.913735551735277, "grad_norm": 0.35013946890830994, "learning_rate": 3.648983389127614e-07, "loss": 0.1331, "step": 49218 }, { "epoch": 0.9137726818726956, "grad_norm": 0.2578405737876892, "learning_rate": 3.6458616788173195e-07, "loss": 0.1971, "step": 49220 }, { "epoch": 0.9138098120101142, "grad_norm": 0.5299789309501648, "learning_rate": 3.6427412795982275e-07, "loss": 0.2827, "step": 49222 }, { "epoch": 0.9138469421475329, "grad_norm": 0.3868010342121124, "learning_rate": 3.639622191512804e-07, "loss": 0.396, "step": 49224 }, { "epoch": 0.9138840722849515, "grad_norm": 0.22832293808460236, "learning_rate": 3.6365044146034924e-07, "loss": 0.3655, "step": 49226 }, { "epoch": 0.9139212024223702, "grad_norm": 0.36799561977386475, "learning_rate": 3.6333879489127034e-07, "loss": 0.2704, "step": 49228 }, { "epoch": 0.9139583325597888, "grad_norm": 0.36178672313690186, "learning_rate": 3.6302727944828696e-07, "loss": 0.2905, "step": 49230 }, { "epoch": 0.9139954626972074, "grad_norm": 0.7449304461479187, "learning_rate": 3.6271589513563575e-07, "loss": 0.29, "step": 49232 }, { "epoch": 0.9140325928346261, "grad_norm": 0.27457907795906067, "learning_rate": 3.624046419575555e-07, "loss": 0.2618, "step": 49234 }, { "epoch": 0.9140697229720447, "grad_norm": 0.4801813066005707, "learning_rate": 3.6209351991827847e-07, "loss": 0.2464, "step": 49236 }, { "epoch": 0.9141068531094634, "grad_norm": 0.39341965317726135, "learning_rate": 3.617825290220389e-07, "loss": 0.0933, "step": 49238 }, { "epoch": 0.914143983246882, "grad_norm": 0.5951845645904541, "learning_rate": 3.6147166927307023e-07, "loss": 0.3771, "step": 49240 }, { "epoch": 0.9141811133843006, "grad_norm": 0.2941252291202545, "learning_rate": 3.6116094067560116e-07, "loss": 0.339, "step": 49242 }, { "epoch": 0.9142182435217193, "grad_norm": 0.505480170249939, "learning_rate": 3.608503432338617e-07, "loss": 0.2142, "step": 49244 }, { "epoch": 0.9142553736591379, "grad_norm": 0.37368538975715637, "learning_rate": 3.6053987695207514e-07, "loss": 0.2307, "step": 49246 }, { "epoch": 0.9142925037965566, "grad_norm": 0.5377260446548462, "learning_rate": 3.60229541834467e-07, "loss": 0.2101, "step": 49248 }, { "epoch": 0.9143296339339751, "grad_norm": 0.22763806581497192, "learning_rate": 3.599193378852606e-07, "loss": 0.1591, "step": 49250 }, { "epoch": 0.9143667640713938, "grad_norm": 0.42656072974205017, "learning_rate": 3.59609265108678e-07, "loss": 0.3388, "step": 49252 }, { "epoch": 0.9144038942088125, "grad_norm": 1.0524674654006958, "learning_rate": 3.592993235089348e-07, "loss": 0.3559, "step": 49254 }, { "epoch": 0.9144410243462311, "grad_norm": 0.3886829614639282, "learning_rate": 3.589895130902532e-07, "loss": 0.4413, "step": 49256 }, { "epoch": 0.9144781544836498, "grad_norm": 0.45653024315834045, "learning_rate": 3.5867983385684425e-07, "loss": 0.2516, "step": 49258 }, { "epoch": 0.9145152846210683, "grad_norm": 0.3318355977535248, "learning_rate": 3.5837028581292343e-07, "loss": 0.1334, "step": 49260 }, { "epoch": 0.914552414758487, "grad_norm": 0.9061023592948914, "learning_rate": 3.5806086896270296e-07, "loss": 0.3228, "step": 49262 }, { "epoch": 0.9145895448959057, "grad_norm": 0.3987681269645691, "learning_rate": 3.5775158331039396e-07, "loss": 0.4032, "step": 49264 }, { "epoch": 0.9146266750333243, "grad_norm": 0.3664870858192444, "learning_rate": 3.5744242886020296e-07, "loss": 0.1708, "step": 49266 }, { "epoch": 0.914663805170743, "grad_norm": 0.35846784710884094, "learning_rate": 3.571334056163378e-07, "loss": 0.2191, "step": 49268 }, { "epoch": 0.9147009353081615, "grad_norm": 0.3139619529247284, "learning_rate": 3.5682451358300505e-07, "loss": 0.2191, "step": 49270 }, { "epoch": 0.9147380654455802, "grad_norm": 0.46333786845207214, "learning_rate": 3.5651575276440474e-07, "loss": 0.2307, "step": 49272 }, { "epoch": 0.9147751955829988, "grad_norm": 0.6021817326545715, "learning_rate": 3.5620712316474016e-07, "loss": 0.2243, "step": 49274 }, { "epoch": 0.9148123257204175, "grad_norm": 0.3311045467853546, "learning_rate": 3.5589862478820904e-07, "loss": 0.2095, "step": 49276 }, { "epoch": 0.9148494558578362, "grad_norm": 0.29638537764549255, "learning_rate": 3.5559025763901135e-07, "loss": 0.2053, "step": 49278 }, { "epoch": 0.9148865859952547, "grad_norm": 0.28549426794052124, "learning_rate": 3.552820217213404e-07, "loss": 0.1115, "step": 49280 }, { "epoch": 0.9149237161326734, "grad_norm": 0.4278050661087036, "learning_rate": 3.54973917039394e-07, "loss": 0.1262, "step": 49282 }, { "epoch": 0.914960846270092, "grad_norm": 0.47603076696395874, "learning_rate": 3.5466594359736097e-07, "loss": 0.2412, "step": 49284 }, { "epoch": 0.9149979764075107, "grad_norm": 0.3592650294303894, "learning_rate": 3.5435810139943236e-07, "loss": 0.1617, "step": 49286 }, { "epoch": 0.9150351065449294, "grad_norm": 0.37813347578048706, "learning_rate": 3.540503904497994e-07, "loss": 0.2273, "step": 49288 }, { "epoch": 0.9150722366823479, "grad_norm": 0.40391597151756287, "learning_rate": 3.537428107526464e-07, "loss": 0.2648, "step": 49290 }, { "epoch": 0.9151093668197666, "grad_norm": 0.46821025013923645, "learning_rate": 3.5343536231216116e-07, "loss": 0.2661, "step": 49292 }, { "epoch": 0.9151464969571852, "grad_norm": 0.3504076600074768, "learning_rate": 3.5312804513252586e-07, "loss": 0.1743, "step": 49294 }, { "epoch": 0.9151836270946039, "grad_norm": 0.45604148507118225, "learning_rate": 3.5282085921792273e-07, "loss": 0.2164, "step": 49296 }, { "epoch": 0.9152207572320226, "grad_norm": 0.41255414485931396, "learning_rate": 3.525138045725296e-07, "loss": 0.4561, "step": 49298 }, { "epoch": 0.9152578873694411, "grad_norm": 0.35116198658943176, "learning_rate": 3.522068812005264e-07, "loss": 0.2102, "step": 49300 }, { "epoch": 0.9152950175068598, "grad_norm": 0.630753755569458, "learning_rate": 3.5190008910608863e-07, "loss": 0.2681, "step": 49302 }, { "epoch": 0.9153321476442784, "grad_norm": 0.5424085855484009, "learning_rate": 3.515934282933919e-07, "loss": 0.4646, "step": 49304 }, { "epoch": 0.9153692777816971, "grad_norm": 0.18050970137119293, "learning_rate": 3.512868987666096e-07, "loss": 0.1358, "step": 49306 }, { "epoch": 0.9154064079191158, "grad_norm": 0.3194495737552643, "learning_rate": 3.509805005299094e-07, "loss": 0.2835, "step": 49308 }, { "epoch": 0.9154435380565343, "grad_norm": 0.43440771102905273, "learning_rate": 3.5067423358746246e-07, "loss": 0.4712, "step": 49310 }, { "epoch": 0.915480668193953, "grad_norm": 0.2993130683898926, "learning_rate": 3.5036809794343653e-07, "loss": 0.2495, "step": 49312 }, { "epoch": 0.9155177983313716, "grad_norm": 0.25007620453834534, "learning_rate": 3.500620936019961e-07, "loss": 0.2258, "step": 49314 }, { "epoch": 0.9155549284687903, "grad_norm": 0.22476312518119812, "learning_rate": 3.4975622056730776e-07, "loss": 0.2785, "step": 49316 }, { "epoch": 0.9155920586062088, "grad_norm": 0.2292502373456955, "learning_rate": 3.4945047884352934e-07, "loss": 0.348, "step": 49318 }, { "epoch": 0.9156291887436275, "grad_norm": 0.26134833693504333, "learning_rate": 3.491448684348253e-07, "loss": 0.226, "step": 49320 }, { "epoch": 0.9156663188810462, "grad_norm": 0.40726497769355774, "learning_rate": 3.488393893453501e-07, "loss": 0.2901, "step": 49322 }, { "epoch": 0.9157034490184648, "grad_norm": 0.42395174503326416, "learning_rate": 3.4853404157926265e-07, "loss": 0.2595, "step": 49324 }, { "epoch": 0.9157405791558835, "grad_norm": 0.22526511549949646, "learning_rate": 3.4822882514071733e-07, "loss": 0.2162, "step": 49326 }, { "epoch": 0.915777709293302, "grad_norm": 0.3842701017856598, "learning_rate": 3.479237400338664e-07, "loss": 0.3288, "step": 49328 }, { "epoch": 0.9158148394307207, "grad_norm": 0.5759210586547852, "learning_rate": 3.4761878626286326e-07, "loss": 0.2733, "step": 49330 }, { "epoch": 0.9158519695681394, "grad_norm": 0.3414568603038788, "learning_rate": 3.473139638318568e-07, "loss": 0.0647, "step": 49332 }, { "epoch": 0.915889099705558, "grad_norm": 0.4282447397708893, "learning_rate": 3.4700927274499363e-07, "loss": 0.182, "step": 49334 }, { "epoch": 0.9159262298429767, "grad_norm": 0.6249219179153442, "learning_rate": 3.467047130064194e-07, "loss": 0.2051, "step": 49336 }, { "epoch": 0.9159633599803952, "grad_norm": 0.23117023706436157, "learning_rate": 3.4640028462028075e-07, "loss": 0.2968, "step": 49338 }, { "epoch": 0.9160004901178139, "grad_norm": 0.6320258378982544, "learning_rate": 3.460959875907166e-07, "loss": 0.2675, "step": 49340 }, { "epoch": 0.9160376202552326, "grad_norm": 0.29232069849967957, "learning_rate": 3.457918219218692e-07, "loss": 0.4168, "step": 49342 }, { "epoch": 0.9160747503926512, "grad_norm": 0.3106766939163208, "learning_rate": 3.454877876178797e-07, "loss": 0.3143, "step": 49344 }, { "epoch": 0.9161118805300699, "grad_norm": 0.1453436315059662, "learning_rate": 3.451838846828803e-07, "loss": 0.2135, "step": 49346 }, { "epoch": 0.9161490106674884, "grad_norm": 0.4814920723438263, "learning_rate": 3.4488011312101e-07, "loss": 0.4099, "step": 49348 }, { "epoch": 0.9161861408049071, "grad_norm": 1.1180670261383057, "learning_rate": 3.4457647293639984e-07, "loss": 0.3466, "step": 49350 }, { "epoch": 0.9162232709423258, "grad_norm": 0.36522749066352844, "learning_rate": 3.4427296413318323e-07, "loss": 0.248, "step": 49352 }, { "epoch": 0.9162604010797444, "grad_norm": 0.48361122608184814, "learning_rate": 3.4396958671548906e-07, "loss": 0.1167, "step": 49354 }, { "epoch": 0.9162975312171631, "grad_norm": 0.2928771376609802, "learning_rate": 3.436663406874463e-07, "loss": 0.1727, "step": 49356 }, { "epoch": 0.9163346613545816, "grad_norm": 0.42670348286628723, "learning_rate": 3.433632260531805e-07, "loss": 0.1911, "step": 49358 }, { "epoch": 0.9163717914920003, "grad_norm": 0.3926699757575989, "learning_rate": 3.4306024281681614e-07, "loss": 0.316, "step": 49360 }, { "epoch": 0.916408921629419, "grad_norm": 0.49050793051719666, "learning_rate": 3.427573909824755e-07, "loss": 0.1766, "step": 49362 }, { "epoch": 0.9164460517668376, "grad_norm": 0.3577752709388733, "learning_rate": 3.4245467055428084e-07, "loss": 0.3496, "step": 49364 }, { "epoch": 0.9164831819042563, "grad_norm": 0.43557503819465637, "learning_rate": 3.4215208153634884e-07, "loss": 0.2187, "step": 49366 }, { "epoch": 0.9165203120416748, "grad_norm": 0.28730323910713196, "learning_rate": 3.4184962393279955e-07, "loss": 0.3753, "step": 49368 }, { "epoch": 0.9165574421790935, "grad_norm": 0.30963724851608276, "learning_rate": 3.415472977477474e-07, "loss": 0.2793, "step": 49370 }, { "epoch": 0.9165945723165121, "grad_norm": 0.4687226712703705, "learning_rate": 3.4124510298530587e-07, "loss": 0.2461, "step": 49372 }, { "epoch": 0.9166317024539308, "grad_norm": 0.7027088403701782, "learning_rate": 3.409430396495872e-07, "loss": 0.1566, "step": 49374 }, { "epoch": 0.9166688325913495, "grad_norm": 0.3863414525985718, "learning_rate": 3.4064110774470137e-07, "loss": 0.2191, "step": 49376 }, { "epoch": 0.916705962728768, "grad_norm": 0.47203171253204346, "learning_rate": 3.403393072747574e-07, "loss": 0.1495, "step": 49378 }, { "epoch": 0.9167430928661867, "grad_norm": 0.745822012424469, "learning_rate": 3.400376382438608e-07, "loss": 0.3816, "step": 49380 }, { "epoch": 0.9167802230036053, "grad_norm": 0.3202285170555115, "learning_rate": 3.397361006561184e-07, "loss": 0.3679, "step": 49382 }, { "epoch": 0.916817353141024, "grad_norm": 0.37226977944374084, "learning_rate": 3.3943469451563125e-07, "loss": 0.0318, "step": 49384 }, { "epoch": 0.9168544832784427, "grad_norm": 0.36163854598999023, "learning_rate": 3.391334198265006e-07, "loss": 0.3257, "step": 49386 }, { "epoch": 0.9168916134158612, "grad_norm": 0.4392906427383423, "learning_rate": 3.3883227659282536e-07, "loss": 0.2706, "step": 49388 }, { "epoch": 0.9169287435532799, "grad_norm": 0.6020591259002686, "learning_rate": 3.3853126481870555e-07, "loss": 0.2558, "step": 49390 }, { "epoch": 0.9169658736906985, "grad_norm": 0.4009425640106201, "learning_rate": 3.3823038450823575e-07, "loss": 0.4351, "step": 49392 }, { "epoch": 0.9170030038281172, "grad_norm": 0.6857998967170715, "learning_rate": 3.379296356655093e-07, "loss": 0.16, "step": 49394 }, { "epoch": 0.9170401339655359, "grad_norm": 0.49467557668685913, "learning_rate": 3.376290182946207e-07, "loss": 0.3361, "step": 49396 }, { "epoch": 0.9170772641029544, "grad_norm": 0.48406773805618286, "learning_rate": 3.373285323996578e-07, "loss": 0.2949, "step": 49398 }, { "epoch": 0.9171143942403731, "grad_norm": 0.39995500445365906, "learning_rate": 3.3702817798471175e-07, "loss": 0.2345, "step": 49400 }, { "epoch": 0.9171515243777917, "grad_norm": 0.34932398796081543, "learning_rate": 3.3672795505386824e-07, "loss": 0.1812, "step": 49402 }, { "epoch": 0.9171886545152104, "grad_norm": 0.422699511051178, "learning_rate": 3.364278636112106e-07, "loss": 0.133, "step": 49404 }, { "epoch": 0.917225784652629, "grad_norm": 0.24442869424819946, "learning_rate": 3.361279036608256e-07, "loss": 0.4778, "step": 49406 }, { "epoch": 0.9172629147900476, "grad_norm": 0.37181442975997925, "learning_rate": 3.3582807520679326e-07, "loss": 0.0535, "step": 49408 }, { "epoch": 0.9173000449274663, "grad_norm": 0.4095766246318817, "learning_rate": 3.3552837825319263e-07, "loss": 0.7202, "step": 49410 }, { "epoch": 0.9173371750648849, "grad_norm": 0.4311332106590271, "learning_rate": 3.3522881280410145e-07, "loss": 0.3445, "step": 49412 }, { "epoch": 0.9173743052023036, "grad_norm": 0.43774715065956116, "learning_rate": 3.3492937886359765e-07, "loss": 0.3055, "step": 49414 }, { "epoch": 0.9174114353397222, "grad_norm": 0.4392765164375305, "learning_rate": 3.346300764357546e-07, "loss": 0.2215, "step": 49416 }, { "epoch": 0.9174485654771408, "grad_norm": 0.39213061332702637, "learning_rate": 3.3433090552464464e-07, "loss": 0.3119, "step": 49418 }, { "epoch": 0.9174856956145595, "grad_norm": 0.3477953374385834, "learning_rate": 3.3403186613434115e-07, "loss": 0.3942, "step": 49420 }, { "epoch": 0.9175228257519781, "grad_norm": 0.3387806713581085, "learning_rate": 3.337329582689086e-07, "loss": 0.4013, "step": 49422 }, { "epoch": 0.9175599558893968, "grad_norm": 0.34520232677459717, "learning_rate": 3.334341819324194e-07, "loss": 0.3143, "step": 49424 }, { "epoch": 0.9175970860268153, "grad_norm": 0.34256818890571594, "learning_rate": 3.3313553712893354e-07, "loss": 0.3453, "step": 49426 }, { "epoch": 0.917634216164234, "grad_norm": 0.3753448724746704, "learning_rate": 3.3283702386251783e-07, "loss": 0.3723, "step": 49428 }, { "epoch": 0.9176713463016527, "grad_norm": 0.6176601648330688, "learning_rate": 3.325386421372345e-07, "loss": 0.1915, "step": 49430 }, { "epoch": 0.9177084764390713, "grad_norm": 0.3762880563735962, "learning_rate": 3.3224039195714263e-07, "loss": 0.1986, "step": 49432 }, { "epoch": 0.91774560657649, "grad_norm": 0.35661986470222473, "learning_rate": 3.3194227332630116e-07, "loss": 0.2276, "step": 49434 }, { "epoch": 0.9177827367139085, "grad_norm": 0.4317947328090668, "learning_rate": 3.3164428624876564e-07, "loss": 0.4489, "step": 49436 }, { "epoch": 0.9178198668513272, "grad_norm": 0.7085149884223938, "learning_rate": 3.313464307285907e-07, "loss": 0.3009, "step": 49438 }, { "epoch": 0.9178569969887459, "grad_norm": 0.462202787399292, "learning_rate": 3.3104870676983093e-07, "loss": 0.1943, "step": 49440 }, { "epoch": 0.9178941271261645, "grad_norm": 0.4702221155166626, "learning_rate": 3.307511143765363e-07, "loss": 0.36, "step": 49442 }, { "epoch": 0.9179312572635832, "grad_norm": 0.4293614327907562, "learning_rate": 3.304536535527558e-07, "loss": 0.282, "step": 49444 }, { "epoch": 0.9179683874010017, "grad_norm": 0.34036514163017273, "learning_rate": 3.301563243025385e-07, "loss": 0.2922, "step": 49446 }, { "epoch": 0.9180055175384204, "grad_norm": 0.3849351704120636, "learning_rate": 3.298591266299278e-07, "loss": 0.4261, "step": 49448 }, { "epoch": 0.9180426476758391, "grad_norm": 0.47949573397636414, "learning_rate": 3.2956206053896934e-07, "loss": 0.3775, "step": 49450 }, { "epoch": 0.9180797778132577, "grad_norm": 0.36911675333976746, "learning_rate": 3.292651260337043e-07, "loss": 0.191, "step": 49452 }, { "epoch": 0.9181169079506764, "grad_norm": 0.32444387674331665, "learning_rate": 3.289683231181739e-07, "loss": 0.1477, "step": 49454 }, { "epoch": 0.9181540380880949, "grad_norm": 0.58058100938797, "learning_rate": 3.286716517964172e-07, "loss": 0.3939, "step": 49456 }, { "epoch": 0.9181911682255136, "grad_norm": 0.6614341735839844, "learning_rate": 3.2837511207247096e-07, "loss": 0.5406, "step": 49458 }, { "epoch": 0.9182282983629323, "grad_norm": 0.344318151473999, "learning_rate": 3.2807870395036855e-07, "loss": 0.1165, "step": 49460 }, { "epoch": 0.9182654285003509, "grad_norm": 0.46496352553367615, "learning_rate": 3.2778242743414346e-07, "loss": 0.2003, "step": 49462 }, { "epoch": 0.9183025586377696, "grad_norm": 0.2755275070667267, "learning_rate": 3.274862825278302e-07, "loss": 0.3382, "step": 49464 }, { "epoch": 0.9183396887751881, "grad_norm": 0.7228017449378967, "learning_rate": 3.271902692354545e-07, "loss": 0.0869, "step": 49466 }, { "epoch": 0.9183768189126068, "grad_norm": 0.3955008089542389, "learning_rate": 3.268943875610453e-07, "loss": 0.2828, "step": 49468 }, { "epoch": 0.9184139490500254, "grad_norm": 0.4280640780925751, "learning_rate": 3.2659863750863053e-07, "loss": 0.1533, "step": 49470 }, { "epoch": 0.9184510791874441, "grad_norm": 0.3458564877510071, "learning_rate": 3.263030190822325e-07, "loss": 0.2175, "step": 49472 }, { "epoch": 0.9184882093248627, "grad_norm": 0.2705119550228119, "learning_rate": 3.2600753228587355e-07, "loss": 0.251, "step": 49474 }, { "epoch": 0.9185253394622813, "grad_norm": 0.31712132692337036, "learning_rate": 3.2571217712357605e-07, "loss": 0.3142, "step": 49476 }, { "epoch": 0.9185624695997, "grad_norm": 0.19012252986431122, "learning_rate": 3.254169535993579e-07, "loss": 0.2775, "step": 49478 }, { "epoch": 0.9185995997371186, "grad_norm": 0.3018691837787628, "learning_rate": 3.251218617172358e-07, "loss": 0.1122, "step": 49480 }, { "epoch": 0.9186367298745373, "grad_norm": 0.4619494080543518, "learning_rate": 3.2482690148122553e-07, "loss": 0.3475, "step": 49482 }, { "epoch": 0.918673860011956, "grad_norm": 0.25093284249305725, "learning_rate": 3.245320728953416e-07, "loss": 0.2918, "step": 49484 }, { "epoch": 0.9187109901493745, "grad_norm": 0.38401415944099426, "learning_rate": 3.2423737596359527e-07, "loss": 0.2384, "step": 49486 }, { "epoch": 0.9187481202867932, "grad_norm": 0.6986663937568665, "learning_rate": 3.2394281068999447e-07, "loss": 0.5075, "step": 49488 }, { "epoch": 0.9187852504242118, "grad_norm": 0.7526283264160156, "learning_rate": 3.2364837707854923e-07, "loss": 0.3013, "step": 49490 }, { "epoch": 0.9188223805616305, "grad_norm": 0.38385629653930664, "learning_rate": 3.2335407513326534e-07, "loss": 0.1869, "step": 49492 }, { "epoch": 0.9188595106990491, "grad_norm": 0.2951247990131378, "learning_rate": 3.230599048581473e-07, "loss": 0.1548, "step": 49494 }, { "epoch": 0.9188966408364677, "grad_norm": 0.45683056116104126, "learning_rate": 3.2276586625719976e-07, "loss": 0.3015, "step": 49496 }, { "epoch": 0.9189337709738864, "grad_norm": 0.31408703327178955, "learning_rate": 3.2247195933442053e-07, "loss": 0.2771, "step": 49498 }, { "epoch": 0.918970901111305, "grad_norm": 0.3921660780906677, "learning_rate": 3.22178184093811e-07, "loss": 0.4114, "step": 49500 }, { "epoch": 0.9190080312487237, "grad_norm": 0.2388375848531723, "learning_rate": 3.218845405393667e-07, "loss": 0.268, "step": 49502 }, { "epoch": 0.9190451613861423, "grad_norm": 0.5018376708030701, "learning_rate": 3.215910286750856e-07, "loss": 0.2191, "step": 49504 }, { "epoch": 0.9190822915235609, "grad_norm": 0.401623398065567, "learning_rate": 3.212976485049613e-07, "loss": 0.1783, "step": 49506 }, { "epoch": 0.9191194216609796, "grad_norm": 0.3418211042881012, "learning_rate": 3.2100440003298373e-07, "loss": 0.1708, "step": 49508 }, { "epoch": 0.9191565517983982, "grad_norm": 0.4160067141056061, "learning_rate": 3.2071128326314536e-07, "loss": 0.2558, "step": 49510 }, { "epoch": 0.9191936819358169, "grad_norm": 0.627045214176178, "learning_rate": 3.2041829819943303e-07, "loss": 0.2494, "step": 49512 }, { "epoch": 0.9192308120732355, "grad_norm": 0.3711293935775757, "learning_rate": 3.2012544484583353e-07, "loss": 0.1625, "step": 49514 }, { "epoch": 0.9192679422106541, "grad_norm": 0.2925248146057129, "learning_rate": 3.198327232063325e-07, "loss": 0.1464, "step": 49516 }, { "epoch": 0.9193050723480728, "grad_norm": 0.3501226603984833, "learning_rate": 3.195401332849124e-07, "loss": 0.293, "step": 49518 }, { "epoch": 0.9193422024854914, "grad_norm": 0.4803522229194641, "learning_rate": 3.1924767508555444e-07, "loss": 0.4118, "step": 49520 }, { "epoch": 0.91937933262291, "grad_norm": 0.2744458317756653, "learning_rate": 3.1895534861223987e-07, "loss": 0.2021, "step": 49522 }, { "epoch": 0.9194164627603286, "grad_norm": 0.32981640100479126, "learning_rate": 3.1866315386894333e-07, "loss": 0.2452, "step": 49524 }, { "epoch": 0.9194535928977473, "grad_norm": 0.3410360515117645, "learning_rate": 3.1837109085964267e-07, "loss": 0.324, "step": 49526 }, { "epoch": 0.919490723035166, "grad_norm": 0.325719952583313, "learning_rate": 3.180791595883137e-07, "loss": 0.1961, "step": 49528 }, { "epoch": 0.9195278531725846, "grad_norm": 0.5396624207496643, "learning_rate": 3.177873600589243e-07, "loss": 0.34, "step": 49530 }, { "epoch": 0.9195649833100032, "grad_norm": 0.41553807258605957, "learning_rate": 3.17495692275448e-07, "loss": 0.2987, "step": 49532 }, { "epoch": 0.9196021134474218, "grad_norm": 0.3280593454837799, "learning_rate": 3.1720415624185374e-07, "loss": 0.3333, "step": 49534 }, { "epoch": 0.9196392435848405, "grad_norm": 0.20500224828720093, "learning_rate": 3.1691275196210626e-07, "loss": 0.1586, "step": 49536 }, { "epoch": 0.9196763737222592, "grad_norm": 0.34626907110214233, "learning_rate": 3.166214794401712e-07, "loss": 0.2307, "step": 49538 }, { "epoch": 0.9197135038596778, "grad_norm": 0.4983569383621216, "learning_rate": 3.163303386800143e-07, "loss": 0.3378, "step": 49540 }, { "epoch": 0.9197506339970964, "grad_norm": 0.29788559675216675, "learning_rate": 3.1603932968559457e-07, "loss": 0.1536, "step": 49542 }, { "epoch": 0.919787764134515, "grad_norm": 0.4228670299053192, "learning_rate": 3.157484524608734e-07, "loss": 0.2612, "step": 49544 }, { "epoch": 0.9198248942719337, "grad_norm": 0.26527491211891174, "learning_rate": 3.154577070098086e-07, "loss": 0.2616, "step": 49546 }, { "epoch": 0.9198620244093524, "grad_norm": 0.410087913274765, "learning_rate": 3.1516709333635485e-07, "loss": 0.2019, "step": 49548 }, { "epoch": 0.919899154546771, "grad_norm": 0.23622356355190277, "learning_rate": 3.14876611444469e-07, "loss": 0.2428, "step": 49550 }, { "epoch": 0.9199362846841896, "grad_norm": 0.42468416690826416, "learning_rate": 3.145862613381012e-07, "loss": 0.1768, "step": 49552 }, { "epoch": 0.9199734148216082, "grad_norm": 0.39423689246177673, "learning_rate": 3.1429604302120276e-07, "loss": 0.2702, "step": 49554 }, { "epoch": 0.9200105449590269, "grad_norm": 0.4662618637084961, "learning_rate": 3.140059564977227e-07, "loss": 0.0762, "step": 49556 }, { "epoch": 0.9200476750964456, "grad_norm": 0.3346233367919922, "learning_rate": 3.137160017716101e-07, "loss": 0.6277, "step": 49558 }, { "epoch": 0.9200848052338642, "grad_norm": 0.32841598987579346, "learning_rate": 3.1342617884680846e-07, "loss": 0.0838, "step": 49560 }, { "epoch": 0.9201219353712828, "grad_norm": 0.35583242774009705, "learning_rate": 3.1313648772726135e-07, "loss": 0.3734, "step": 49562 }, { "epoch": 0.9201590655087014, "grad_norm": 0.3716599941253662, "learning_rate": 3.128469284169111e-07, "loss": 0.4392, "step": 49564 }, { "epoch": 0.9201961956461201, "grad_norm": 0.5347161293029785, "learning_rate": 3.125575009196979e-07, "loss": 0.2932, "step": 49566 }, { "epoch": 0.9202333257835388, "grad_norm": 0.4140762984752655, "learning_rate": 3.122682052395609e-07, "loss": 0.2566, "step": 49568 }, { "epoch": 0.9202704559209574, "grad_norm": 0.8641968369483948, "learning_rate": 3.1197904138043466e-07, "loss": 0.252, "step": 49570 }, { "epoch": 0.920307586058376, "grad_norm": 0.46658557653427124, "learning_rate": 3.1169000934625604e-07, "loss": 0.2141, "step": 49572 }, { "epoch": 0.9203447161957946, "grad_norm": 0.5028789043426514, "learning_rate": 3.114011091409552e-07, "loss": 0.21, "step": 49574 }, { "epoch": 0.9203818463332133, "grad_norm": 0.5960252285003662, "learning_rate": 3.1111234076846466e-07, "loss": 0.3549, "step": 49576 }, { "epoch": 0.9204189764706319, "grad_norm": 0.41230836510658264, "learning_rate": 3.1082370423271337e-07, "loss": 0.3736, "step": 49578 }, { "epoch": 0.9204561066080506, "grad_norm": 0.4283335506916046, "learning_rate": 3.1053519953762825e-07, "loss": 0.2163, "step": 49580 }, { "epoch": 0.9204932367454692, "grad_norm": 0.40746942162513733, "learning_rate": 3.1024682668713615e-07, "loss": 0.3066, "step": 49582 }, { "epoch": 0.9205303668828878, "grad_norm": 0.6251316666603088, "learning_rate": 3.099585856851628e-07, "loss": 0.2551, "step": 49584 }, { "epoch": 0.9205674970203065, "grad_norm": 0.4499858021736145, "learning_rate": 3.0967047653562623e-07, "loss": 0.1648, "step": 49586 }, { "epoch": 0.9206046271577251, "grad_norm": 0.4417949616909027, "learning_rate": 3.0938249924244766e-07, "loss": 0.3586, "step": 49588 }, { "epoch": 0.9206417572951437, "grad_norm": 0.43329888582229614, "learning_rate": 3.090946538095474e-07, "loss": 0.1925, "step": 49590 }, { "epoch": 0.9206788874325624, "grad_norm": 0.43943285942077637, "learning_rate": 3.088069402408422e-07, "loss": 0.2741, "step": 49592 }, { "epoch": 0.920716017569981, "grad_norm": 0.47027555108070374, "learning_rate": 3.085193585402457e-07, "loss": 0.2995, "step": 49594 }, { "epoch": 0.9207531477073997, "grad_norm": 0.5543423891067505, "learning_rate": 3.082319087116714e-07, "loss": 0.48, "step": 49596 }, { "epoch": 0.9207902778448183, "grad_norm": 0.3214503824710846, "learning_rate": 3.079445907590306e-07, "loss": 0.2166, "step": 49598 }, { "epoch": 0.920827407982237, "grad_norm": 0.5501466989517212, "learning_rate": 3.076574046862324e-07, "loss": 0.1658, "step": 49600 }, { "epoch": 0.9208645381196556, "grad_norm": 0.352893203496933, "learning_rate": 3.0737035049718476e-07, "loss": 0.5363, "step": 49602 }, { "epoch": 0.9209016682570742, "grad_norm": 0.5082792639732361, "learning_rate": 3.0708342819579353e-07, "loss": 0.4783, "step": 49604 }, { "epoch": 0.9209387983944929, "grad_norm": 0.24410173296928406, "learning_rate": 3.0679663778596325e-07, "loss": 0.3532, "step": 49606 }, { "epoch": 0.9209759285319115, "grad_norm": 0.4608539044857025, "learning_rate": 3.0650997927159644e-07, "loss": 0.2335, "step": 49608 }, { "epoch": 0.9210130586693301, "grad_norm": 0.5262891054153442, "learning_rate": 3.0622345265659546e-07, "loss": 0.3605, "step": 49610 }, { "epoch": 0.9210501888067488, "grad_norm": 0.44862616062164307, "learning_rate": 3.0593705794485505e-07, "loss": 0.3602, "step": 49612 }, { "epoch": 0.9210873189441674, "grad_norm": 0.17595121264457703, "learning_rate": 3.056507951402754e-07, "loss": 0.2601, "step": 49614 }, { "epoch": 0.9211244490815861, "grad_norm": 0.5183852910995483, "learning_rate": 3.053646642467489e-07, "loss": 0.4258, "step": 49616 }, { "epoch": 0.9211615792190047, "grad_norm": 0.21415559947490692, "learning_rate": 3.0507866526817143e-07, "loss": 0.1418, "step": 49618 }, { "epoch": 0.9211987093564233, "grad_norm": 0.45647209882736206, "learning_rate": 3.047927982084331e-07, "loss": 0.2802, "step": 49620 }, { "epoch": 0.9212358394938419, "grad_norm": 0.445491224527359, "learning_rate": 3.045070630714253e-07, "loss": 0.4242, "step": 49622 }, { "epoch": 0.9212729696312606, "grad_norm": 0.2769622206687927, "learning_rate": 3.042214598610349e-07, "loss": 0.1816, "step": 49624 }, { "epoch": 0.9213100997686793, "grad_norm": 0.5608027577400208, "learning_rate": 3.039359885811477e-07, "loss": 0.1933, "step": 49626 }, { "epoch": 0.9213472299060979, "grad_norm": 0.2970820963382721, "learning_rate": 3.0365064923564835e-07, "loss": 0.259, "step": 49628 }, { "epoch": 0.9213843600435165, "grad_norm": 0.24148432910442352, "learning_rate": 3.033654418284193e-07, "loss": 0.3813, "step": 49630 }, { "epoch": 0.9214214901809351, "grad_norm": 0.3515041172504425, "learning_rate": 3.0308036636334303e-07, "loss": 0.1049, "step": 49632 }, { "epoch": 0.9214586203183538, "grad_norm": 0.2566092610359192, "learning_rate": 3.027954228442975e-07, "loss": 0.3213, "step": 49634 }, { "epoch": 0.9214957504557725, "grad_norm": 0.4311188757419586, "learning_rate": 3.025106112751597e-07, "loss": 0.2825, "step": 49636 }, { "epoch": 0.921532880593191, "grad_norm": 0.4219937026500702, "learning_rate": 3.022259316598042e-07, "loss": 0.2366, "step": 49638 }, { "epoch": 0.9215700107306097, "grad_norm": 0.31171107292175293, "learning_rate": 3.0194138400210573e-07, "loss": 0.1902, "step": 49640 }, { "epoch": 0.9216071408680283, "grad_norm": 0.4017420709133148, "learning_rate": 3.016569683059356e-07, "loss": 0.3359, "step": 49642 }, { "epoch": 0.921644271005447, "grad_norm": 0.5019659399986267, "learning_rate": 3.0137268457516523e-07, "loss": 0.4369, "step": 49644 }, { "epoch": 0.9216814011428657, "grad_norm": 0.6282856464385986, "learning_rate": 3.0108853281366035e-07, "loss": 0.3621, "step": 49646 }, { "epoch": 0.9217185312802842, "grad_norm": 0.46868380904197693, "learning_rate": 3.0080451302529125e-07, "loss": 0.2782, "step": 49648 }, { "epoch": 0.9217556614177029, "grad_norm": 0.28179624676704407, "learning_rate": 3.005206252139192e-07, "loss": 0.248, "step": 49650 }, { "epoch": 0.9217927915551215, "grad_norm": 0.2756509780883789, "learning_rate": 3.002368693834079e-07, "loss": 0.2894, "step": 49652 }, { "epoch": 0.9218299216925402, "grad_norm": 0.4053601622581482, "learning_rate": 2.9995324553761753e-07, "loss": 0.3389, "step": 49654 }, { "epoch": 0.9218670518299589, "grad_norm": 0.43468454480171204, "learning_rate": 2.996697536804105e-07, "loss": 0.3651, "step": 49656 }, { "epoch": 0.9219041819673774, "grad_norm": 0.26193007826805115, "learning_rate": 2.993863938156416e-07, "loss": 0.4597, "step": 49658 }, { "epoch": 0.9219413121047961, "grad_norm": 0.3843808174133301, "learning_rate": 2.991031659471677e-07, "loss": 0.2793, "step": 49660 }, { "epoch": 0.9219784422422147, "grad_norm": 0.4789116680622101, "learning_rate": 2.9882007007884126e-07, "loss": 0.2201, "step": 49662 }, { "epoch": 0.9220155723796334, "grad_norm": 0.490196168422699, "learning_rate": 2.985371062145159e-07, "loss": 0.1971, "step": 49664 }, { "epoch": 0.9220527025170521, "grad_norm": 0.24228043854236603, "learning_rate": 2.982542743580408e-07, "loss": 0.1156, "step": 49666 }, { "epoch": 0.9220898326544706, "grad_norm": 0.5311570167541504, "learning_rate": 2.9797157451326497e-07, "loss": 0.3389, "step": 49668 }, { "epoch": 0.9221269627918893, "grad_norm": 0.45341333746910095, "learning_rate": 2.9768900668403433e-07, "loss": 0.1584, "step": 49670 }, { "epoch": 0.9221640929293079, "grad_norm": 0.34992337226867676, "learning_rate": 2.9740657087419577e-07, "loss": 0.31, "step": 49672 }, { "epoch": 0.9222012230667266, "grad_norm": 0.3912426233291626, "learning_rate": 2.971242670875907e-07, "loss": 0.2488, "step": 49674 }, { "epoch": 0.9222383532041452, "grad_norm": 0.31488868594169617, "learning_rate": 2.968420953280604e-07, "loss": 0.2811, "step": 49676 }, { "epoch": 0.9222754833415638, "grad_norm": 0.5603411197662354, "learning_rate": 2.965600555994463e-07, "loss": 0.3825, "step": 49678 }, { "epoch": 0.9223126134789825, "grad_norm": 0.3272095024585724, "learning_rate": 2.962781479055843e-07, "loss": 0.2843, "step": 49680 }, { "epoch": 0.9223497436164011, "grad_norm": 0.3244319260120392, "learning_rate": 2.9599637225030895e-07, "loss": 0.1981, "step": 49682 }, { "epoch": 0.9223868737538198, "grad_norm": 0.4519628882408142, "learning_rate": 2.957147286374573e-07, "loss": 0.4298, "step": 49684 }, { "epoch": 0.9224240038912384, "grad_norm": 0.5805622935295105, "learning_rate": 2.954332170708618e-07, "loss": 0.2623, "step": 49686 }, { "epoch": 0.922461134028657, "grad_norm": 0.3464388847351074, "learning_rate": 2.951518375543494e-07, "loss": 0.1971, "step": 49688 }, { "epoch": 0.9224982641660757, "grad_norm": 0.5249294638633728, "learning_rate": 2.948705900917526e-07, "loss": 0.2506, "step": 49690 }, { "epoch": 0.9225353943034943, "grad_norm": 0.39281004667282104, "learning_rate": 2.94589474686896e-07, "loss": 0.3173, "step": 49692 }, { "epoch": 0.922572524440913, "grad_norm": 0.49507346749305725, "learning_rate": 2.943084913436056e-07, "loss": 0.2218, "step": 49694 }, { "epoch": 0.9226096545783316, "grad_norm": 0.43025633692741394, "learning_rate": 2.9402764006570605e-07, "loss": 0.1729, "step": 49696 }, { "epoch": 0.9226467847157502, "grad_norm": 0.34313881397247314, "learning_rate": 2.937469208570154e-07, "loss": 0.3711, "step": 49698 }, { "epoch": 0.9226839148531689, "grad_norm": 0.44230666756629944, "learning_rate": 2.9346633372135833e-07, "loss": 0.3091, "step": 49700 }, { "epoch": 0.9227210449905875, "grad_norm": 0.34169286489486694, "learning_rate": 2.9318587866254745e-07, "loss": 0.3142, "step": 49702 }, { "epoch": 0.9227581751280062, "grad_norm": 0.639430820941925, "learning_rate": 2.9290555568440296e-07, "loss": 0.2785, "step": 49704 }, { "epoch": 0.9227953052654247, "grad_norm": 0.3331637382507324, "learning_rate": 2.9262536479073624e-07, "loss": 0.1922, "step": 49706 }, { "epoch": 0.9228324354028434, "grad_norm": 0.4130639433860779, "learning_rate": 2.92345305985362e-07, "loss": 0.1663, "step": 49708 }, { "epoch": 0.9228695655402621, "grad_norm": 0.2497435212135315, "learning_rate": 2.9206537927209177e-07, "loss": 0.2688, "step": 49710 }, { "epoch": 0.9229066956776807, "grad_norm": 0.44751647114753723, "learning_rate": 2.917855846547313e-07, "loss": 0.1734, "step": 49712 }, { "epoch": 0.9229438258150994, "grad_norm": 0.24870429933071136, "learning_rate": 2.915059221370908e-07, "loss": 0.2867, "step": 49714 }, { "epoch": 0.922980955952518, "grad_norm": 0.38127273321151733, "learning_rate": 2.9122639172297293e-07, "loss": 0.3449, "step": 49716 }, { "epoch": 0.9230180860899366, "grad_norm": 0.33446675539016724, "learning_rate": 2.909469934161846e-07, "loss": 0.2816, "step": 49718 }, { "epoch": 0.9230552162273553, "grad_norm": 0.4531771242618561, "learning_rate": 2.9066772722052493e-07, "loss": 0.2424, "step": 49720 }, { "epoch": 0.9230923463647739, "grad_norm": 0.36100202798843384, "learning_rate": 2.903885931397943e-07, "loss": 0.189, "step": 49722 }, { "epoch": 0.9231294765021926, "grad_norm": 0.4066493511199951, "learning_rate": 2.901095911777918e-07, "loss": 0.2697, "step": 49724 }, { "epoch": 0.9231666066396111, "grad_norm": 0.34657007455825806, "learning_rate": 2.8983072133831223e-07, "loss": 0.1586, "step": 49726 }, { "epoch": 0.9232037367770298, "grad_norm": 0.4429459571838379, "learning_rate": 2.895519836251526e-07, "loss": 0.2624, "step": 49728 }, { "epoch": 0.9232408669144484, "grad_norm": 0.8523456454277039, "learning_rate": 2.892733780421031e-07, "loss": 0.4041, "step": 49730 }, { "epoch": 0.9232779970518671, "grad_norm": 0.3585398495197296, "learning_rate": 2.889949045929563e-07, "loss": 0.4686, "step": 49732 }, { "epoch": 0.9233151271892858, "grad_norm": 0.4353378117084503, "learning_rate": 2.887165632815003e-07, "loss": 0.2502, "step": 49734 }, { "epoch": 0.9233522573267043, "grad_norm": 0.4477533996105194, "learning_rate": 2.884383541115254e-07, "loss": 0.244, "step": 49736 }, { "epoch": 0.923389387464123, "grad_norm": 0.36425039172172546, "learning_rate": 2.8816027708681305e-07, "loss": 0.1456, "step": 49738 }, { "epoch": 0.9234265176015416, "grad_norm": 0.517238974571228, "learning_rate": 2.8788233221115123e-07, "loss": 0.3529, "step": 49740 }, { "epoch": 0.9234636477389603, "grad_norm": 0.49075785279273987, "learning_rate": 2.87604519488317e-07, "loss": 0.1766, "step": 49742 }, { "epoch": 0.923500777876379, "grad_norm": 0.4419189691543579, "learning_rate": 2.8732683892209514e-07, "loss": 0.4288, "step": 49744 }, { "epoch": 0.9235379080137975, "grad_norm": 0.3829626739025116, "learning_rate": 2.8704929051626027e-07, "loss": 0.4717, "step": 49746 }, { "epoch": 0.9235750381512162, "grad_norm": 0.523746132850647, "learning_rate": 2.867718742745929e-07, "loss": 0.2716, "step": 49748 }, { "epoch": 0.9236121682886348, "grad_norm": 0.3787669539451599, "learning_rate": 2.8649459020086533e-07, "loss": 0.2094, "step": 49750 }, { "epoch": 0.9236492984260535, "grad_norm": 0.5381085872650146, "learning_rate": 2.8621743829885027e-07, "loss": 0.273, "step": 49752 }, { "epoch": 0.9236864285634722, "grad_norm": 0.2545231580734253, "learning_rate": 2.8594041857232027e-07, "loss": 0.1999, "step": 49754 }, { "epoch": 0.9237235587008907, "grad_norm": 0.8038944602012634, "learning_rate": 2.856635310250433e-07, "loss": 0.2483, "step": 49756 }, { "epoch": 0.9237606888383094, "grad_norm": 0.5130699276924133, "learning_rate": 2.8538677566078866e-07, "loss": 0.2003, "step": 49758 }, { "epoch": 0.923797818975728, "grad_norm": 0.37654849886894226, "learning_rate": 2.851101524833222e-07, "loss": 0.2984, "step": 49760 }, { "epoch": 0.9238349491131467, "grad_norm": 0.22724920511245728, "learning_rate": 2.8483366149640645e-07, "loss": 0.239, "step": 49762 }, { "epoch": 0.9238720792505654, "grad_norm": 0.33984535932540894, "learning_rate": 2.845573027038051e-07, "loss": 0.115, "step": 49764 }, { "epoch": 0.9239092093879839, "grad_norm": 0.25486499071121216, "learning_rate": 2.8428107610927624e-07, "loss": 0.1202, "step": 49766 }, { "epoch": 0.9239463395254026, "grad_norm": 0.22435320913791656, "learning_rate": 2.840049817165802e-07, "loss": 0.1506, "step": 49768 }, { "epoch": 0.9239834696628212, "grad_norm": 0.4320261478424072, "learning_rate": 2.8372901952947394e-07, "loss": 0.327, "step": 49770 }, { "epoch": 0.9240205998002399, "grad_norm": 0.3730677664279938, "learning_rate": 2.834531895517112e-07, "loss": 0.2633, "step": 49772 }, { "epoch": 0.9240577299376584, "grad_norm": 0.3902011513710022, "learning_rate": 2.831774917870478e-07, "loss": 0.3242, "step": 49774 }, { "epoch": 0.9240948600750771, "grad_norm": 0.37957963347435, "learning_rate": 2.8290192623923294e-07, "loss": 0.3149, "step": 49776 }, { "epoch": 0.9241319902124958, "grad_norm": 0.49002882838249207, "learning_rate": 2.8262649291201596e-07, "loss": 0.219, "step": 49778 }, { "epoch": 0.9241691203499144, "grad_norm": 0.6142514944076538, "learning_rate": 2.823511918091459e-07, "loss": 0.2665, "step": 49780 }, { "epoch": 0.9242062504873331, "grad_norm": 0.4994458258152008, "learning_rate": 2.820760229343689e-07, "loss": 0.2954, "step": 49782 }, { "epoch": 0.9242433806247516, "grad_norm": 0.5009868144989014, "learning_rate": 2.818009862914284e-07, "loss": 0.2169, "step": 49784 }, { "epoch": 0.9242805107621703, "grad_norm": 0.2878788709640503, "learning_rate": 2.8152608188406817e-07, "loss": 0.2727, "step": 49786 }, { "epoch": 0.924317640899589, "grad_norm": 0.456452876329422, "learning_rate": 2.812513097160263e-07, "loss": 0.4672, "step": 49788 }, { "epoch": 0.9243547710370076, "grad_norm": 0.3580019474029541, "learning_rate": 2.8097666979104323e-07, "loss": 0.3862, "step": 49790 }, { "epoch": 0.9243919011744263, "grad_norm": 0.5655384063720703, "learning_rate": 2.8070216211285584e-07, "loss": 0.3802, "step": 49792 }, { "epoch": 0.9244290313118448, "grad_norm": 0.7612162828445435, "learning_rate": 2.80427786685199e-07, "loss": 0.3959, "step": 49794 }, { "epoch": 0.9244661614492635, "grad_norm": 0.37837541103363037, "learning_rate": 2.8015354351180634e-07, "loss": 0.3358, "step": 49796 }, { "epoch": 0.9245032915866822, "grad_norm": 0.39777854084968567, "learning_rate": 2.798794325964116e-07, "loss": 0.2923, "step": 49798 }, { "epoch": 0.9245404217241008, "grad_norm": 0.29153814911842346, "learning_rate": 2.7960545394274176e-07, "loss": 0.2295, "step": 49800 }, { "epoch": 0.9245775518615195, "grad_norm": 0.4093559682369232, "learning_rate": 2.7933160755452495e-07, "loss": 0.1795, "step": 49802 }, { "epoch": 0.924614681998938, "grad_norm": 0.5435904264450073, "learning_rate": 2.790578934354893e-07, "loss": 0.3073, "step": 49804 }, { "epoch": 0.9246518121363567, "grad_norm": 0.6149637699127197, "learning_rate": 2.7878431158935735e-07, "loss": 0.2266, "step": 49806 }, { "epoch": 0.9246889422737754, "grad_norm": 0.269695520401001, "learning_rate": 2.785108620198518e-07, "loss": 0.2975, "step": 49808 }, { "epoch": 0.924726072411194, "grad_norm": 0.3480795919895172, "learning_rate": 2.782375447306951e-07, "loss": 0.3703, "step": 49810 }, { "epoch": 0.9247632025486127, "grad_norm": 0.26122185587882996, "learning_rate": 2.7796435972560545e-07, "loss": 0.3722, "step": 49812 }, { "epoch": 0.9248003326860312, "grad_norm": 0.5064325332641602, "learning_rate": 2.7769130700829983e-07, "loss": 0.3964, "step": 49814 }, { "epoch": 0.9248374628234499, "grad_norm": 0.334913045167923, "learning_rate": 2.774183865824942e-07, "loss": 0.21, "step": 49816 }, { "epoch": 0.9248745929608686, "grad_norm": 0.25168871879577637, "learning_rate": 2.7714559845190117e-07, "loss": 0.1169, "step": 49818 }, { "epoch": 0.9249117230982872, "grad_norm": 0.3868754804134369, "learning_rate": 2.7687294262023325e-07, "loss": 0.2624, "step": 49820 }, { "epoch": 0.9249488532357059, "grad_norm": 0.290458083152771, "learning_rate": 2.7660041909119973e-07, "loss": 0.3303, "step": 49822 }, { "epoch": 0.9249859833731244, "grad_norm": 0.2924361526966095, "learning_rate": 2.76328027868511e-07, "loss": 0.1211, "step": 49824 }, { "epoch": 0.9250231135105431, "grad_norm": 0.3289566934108734, "learning_rate": 2.760557689558729e-07, "loss": 0.2383, "step": 49826 }, { "epoch": 0.9250602436479617, "grad_norm": 0.3992975056171417, "learning_rate": 2.757836423569871e-07, "loss": 0.3275, "step": 49828 }, { "epoch": 0.9250973737853804, "grad_norm": 0.39561134576797485, "learning_rate": 2.755116480755593e-07, "loss": 0.1999, "step": 49830 }, { "epoch": 0.9251345039227991, "grad_norm": 0.3623635768890381, "learning_rate": 2.7523978611529e-07, "loss": 0.339, "step": 49832 }, { "epoch": 0.9251716340602176, "grad_norm": 0.4861413538455963, "learning_rate": 2.7496805647987733e-07, "loss": 0.1024, "step": 49834 }, { "epoch": 0.9252087641976363, "grad_norm": 0.4445790648460388, "learning_rate": 2.7469645917302057e-07, "loss": 0.2267, "step": 49836 }, { "epoch": 0.9252458943350549, "grad_norm": 0.3816763162612915, "learning_rate": 2.744249941984145e-07, "loss": 0.2856, "step": 49838 }, { "epoch": 0.9252830244724736, "grad_norm": 0.3544054627418518, "learning_rate": 2.7415366155975177e-07, "loss": 0.6093, "step": 49840 }, { "epoch": 0.9253201546098923, "grad_norm": 0.23181086778640747, "learning_rate": 2.7388246126072494e-07, "loss": 0.2593, "step": 49842 }, { "epoch": 0.9253572847473108, "grad_norm": 0.5426713824272156, "learning_rate": 2.736113933050255e-07, "loss": 0.3157, "step": 49844 }, { "epoch": 0.9253944148847295, "grad_norm": 0.4052729904651642, "learning_rate": 2.7334045769634277e-07, "loss": 0.4713, "step": 49846 }, { "epoch": 0.9254315450221481, "grad_norm": 0.3568899631500244, "learning_rate": 2.730696544383593e-07, "loss": 0.2084, "step": 49848 }, { "epoch": 0.9254686751595668, "grad_norm": 0.4612375497817993, "learning_rate": 2.7279898353476443e-07, "loss": 0.1696, "step": 49850 }, { "epoch": 0.9255058052969855, "grad_norm": 0.48294755816459656, "learning_rate": 2.725284449892374e-07, "loss": 0.2931, "step": 49852 }, { "epoch": 0.925542935434404, "grad_norm": 0.5002955794334412, "learning_rate": 2.7225803880546074e-07, "loss": 0.3502, "step": 49854 }, { "epoch": 0.9255800655718227, "grad_norm": 0.309332013130188, "learning_rate": 2.7198776498711497e-07, "loss": 0.1719, "step": 49856 }, { "epoch": 0.9256171957092413, "grad_norm": 0.34766149520874023, "learning_rate": 2.717176235378771e-07, "loss": 0.3784, "step": 49858 }, { "epoch": 0.92565432584666, "grad_norm": 0.42977020144462585, "learning_rate": 2.71447614461422e-07, "loss": 0.1214, "step": 49860 }, { "epoch": 0.9256914559840786, "grad_norm": 0.31497064232826233, "learning_rate": 2.711777377614255e-07, "loss": 0.1847, "step": 49862 }, { "epoch": 0.9257285861214972, "grad_norm": 0.30456656217575073, "learning_rate": 2.709079934415582e-07, "loss": 0.3104, "step": 49864 }, { "epoch": 0.9257657162589159, "grad_norm": 0.25535306334495544, "learning_rate": 2.7063838150549137e-07, "loss": 0.1593, "step": 49866 }, { "epoch": 0.9258028463963345, "grad_norm": 0.2112947702407837, "learning_rate": 2.703689019568945e-07, "loss": 0.2687, "step": 49868 }, { "epoch": 0.9258399765337532, "grad_norm": 0.34325939416885376, "learning_rate": 2.7009955479943116e-07, "loss": 0.1482, "step": 49870 }, { "epoch": 0.9258771066711717, "grad_norm": 0.4030865728855133, "learning_rate": 2.698303400367697e-07, "loss": 0.2087, "step": 49872 }, { "epoch": 0.9259142368085904, "grad_norm": 0.5946475267410278, "learning_rate": 2.6956125767257147e-07, "loss": 0.2744, "step": 49874 }, { "epoch": 0.9259513669460091, "grad_norm": 0.4652072787284851, "learning_rate": 2.692923077104992e-07, "loss": 0.2395, "step": 49876 }, { "epoch": 0.9259884970834277, "grad_norm": 0.30950677394866943, "learning_rate": 2.6902349015421105e-07, "loss": 0.2437, "step": 49878 }, { "epoch": 0.9260256272208464, "grad_norm": 0.41209298372268677, "learning_rate": 2.687548050073652e-07, "loss": 0.2616, "step": 49880 }, { "epoch": 0.9260627573582649, "grad_norm": 0.24194031953811646, "learning_rate": 2.684862522736187e-07, "loss": 0.36, "step": 49882 }, { "epoch": 0.9260998874956836, "grad_norm": 0.376455694437027, "learning_rate": 2.682178319566242e-07, "loss": 0.2208, "step": 49884 }, { "epoch": 0.9261370176331023, "grad_norm": 0.6508611440658569, "learning_rate": 2.679495440600366e-07, "loss": 0.251, "step": 49886 }, { "epoch": 0.9261741477705209, "grad_norm": 0.32797205448150635, "learning_rate": 2.6768138858750404e-07, "loss": 0.1273, "step": 49888 }, { "epoch": 0.9262112779079396, "grad_norm": 0.31941789388656616, "learning_rate": 2.674133655426769e-07, "loss": 0.4069, "step": 49890 }, { "epoch": 0.9262484080453581, "grad_norm": 0.3808702826499939, "learning_rate": 2.6714547492920017e-07, "loss": 0.3239, "step": 49892 }, { "epoch": 0.9262855381827768, "grad_norm": 0.35558995604515076, "learning_rate": 2.6687771675071973e-07, "loss": 0.2458, "step": 49894 }, { "epoch": 0.9263226683201955, "grad_norm": 0.5020599365234375, "learning_rate": 2.6661009101088044e-07, "loss": 0.2597, "step": 49896 }, { "epoch": 0.9263597984576141, "grad_norm": 0.4423373341560364, "learning_rate": 2.663425977133227e-07, "loss": 0.29, "step": 49898 }, { "epoch": 0.9263969285950328, "grad_norm": 0.24018560349941254, "learning_rate": 2.6607523686168703e-07, "loss": 0.1913, "step": 49900 }, { "epoch": 0.9264340587324513, "grad_norm": 0.35924842953681946, "learning_rate": 2.658080084596104e-07, "loss": 0.2498, "step": 49902 }, { "epoch": 0.92647118886987, "grad_norm": 0.2985548675060272, "learning_rate": 2.655409125107289e-07, "loss": 0.0891, "step": 49904 }, { "epoch": 0.9265083190072887, "grad_norm": 0.38645586371421814, "learning_rate": 2.652739490186784e-07, "loss": 0.3805, "step": 49906 }, { "epoch": 0.9265454491447073, "grad_norm": 0.26703301072120667, "learning_rate": 2.650071179870917e-07, "loss": 0.1515, "step": 49908 }, { "epoch": 0.926582579282126, "grad_norm": 0.2515755891799927, "learning_rate": 2.6474041941959684e-07, "loss": 0.1603, "step": 49910 }, { "epoch": 0.9266197094195445, "grad_norm": 0.44288450479507446, "learning_rate": 2.6447385331982544e-07, "loss": 0.3095, "step": 49912 }, { "epoch": 0.9266568395569632, "grad_norm": 0.41956478357315063, "learning_rate": 2.642074196914024e-07, "loss": 0.1451, "step": 49914 }, { "epoch": 0.9266939696943819, "grad_norm": 0.3735790252685547, "learning_rate": 2.639411185379537e-07, "loss": 0.3441, "step": 49916 }, { "epoch": 0.9267310998318005, "grad_norm": 0.6341722011566162, "learning_rate": 2.636749498631053e-07, "loss": 0.3768, "step": 49918 }, { "epoch": 0.9267682299692191, "grad_norm": 0.5440880656242371, "learning_rate": 2.6340891367047536e-07, "loss": 0.2471, "step": 49920 }, { "epoch": 0.9268053601066377, "grad_norm": 0.3225160241127014, "learning_rate": 2.631430099636867e-07, "loss": 0.2332, "step": 49922 }, { "epoch": 0.9268424902440564, "grad_norm": 0.44770368933677673, "learning_rate": 2.628772387463574e-07, "loss": 0.1694, "step": 49924 }, { "epoch": 0.926879620381475, "grad_norm": 0.5047432780265808, "learning_rate": 2.626116000221013e-07, "loss": 0.4647, "step": 49926 }, { "epoch": 0.9269167505188937, "grad_norm": 0.36197927594184875, "learning_rate": 2.6234609379453547e-07, "loss": 0.1238, "step": 49928 }, { "epoch": 0.9269538806563123, "grad_norm": 0.22939473390579224, "learning_rate": 2.6208072006727034e-07, "loss": 0.194, "step": 49930 }, { "epoch": 0.9269910107937309, "grad_norm": 0.39572539925575256, "learning_rate": 2.618154788439198e-07, "loss": 0.2093, "step": 49932 }, { "epoch": 0.9270281409311496, "grad_norm": 0.38085970282554626, "learning_rate": 2.6155037012809084e-07, "loss": 0.1272, "step": 49934 }, { "epoch": 0.9270652710685682, "grad_norm": 0.6949074864387512, "learning_rate": 2.612853939233906e-07, "loss": 0.3489, "step": 49936 }, { "epoch": 0.9271024012059869, "grad_norm": 0.45535701513290405, "learning_rate": 2.610205502334262e-07, "loss": 0.1939, "step": 49938 }, { "epoch": 0.9271395313434055, "grad_norm": 0.31363987922668457, "learning_rate": 2.607558390618004e-07, "loss": 0.2817, "step": 49940 }, { "epoch": 0.9271766614808241, "grad_norm": 0.519798755645752, "learning_rate": 2.604912604121146e-07, "loss": 0.3837, "step": 49942 }, { "epoch": 0.9272137916182428, "grad_norm": 0.4134761691093445, "learning_rate": 2.6022681428796937e-07, "loss": 0.2475, "step": 49944 }, { "epoch": 0.9272509217556614, "grad_norm": 0.4680531322956085, "learning_rate": 2.599625006929629e-07, "loss": 0.2266, "step": 49946 }, { "epoch": 0.9272880518930801, "grad_norm": 0.5666810274124146, "learning_rate": 2.596983196306924e-07, "loss": 0.3188, "step": 49948 }, { "epoch": 0.9273251820304987, "grad_norm": 0.24031789600849152, "learning_rate": 2.594342711047537e-07, "loss": 0.2657, "step": 49950 }, { "epoch": 0.9273623121679173, "grad_norm": 0.31892114877700806, "learning_rate": 2.591703551187363e-07, "loss": 0.3445, "step": 49952 }, { "epoch": 0.927399442305336, "grad_norm": 0.3310941457748413, "learning_rate": 2.5890657167623514e-07, "loss": 0.2466, "step": 49954 }, { "epoch": 0.9274365724427546, "grad_norm": 0.36471033096313477, "learning_rate": 2.586429207808361e-07, "loss": 0.3944, "step": 49956 }, { "epoch": 0.9274737025801733, "grad_norm": 0.24163736402988434, "learning_rate": 2.583794024361286e-07, "loss": 0.2134, "step": 49958 }, { "epoch": 0.9275108327175919, "grad_norm": 0.584182858467102, "learning_rate": 2.5811601664569754e-07, "loss": 0.1063, "step": 49960 }, { "epoch": 0.9275479628550105, "grad_norm": 0.4147902727127075, "learning_rate": 2.5785276341312784e-07, "loss": 0.3217, "step": 49962 }, { "epoch": 0.9275850929924292, "grad_norm": 0.3719520568847656, "learning_rate": 2.5758964274199995e-07, "loss": 0.1422, "step": 49964 }, { "epoch": 0.9276222231298478, "grad_norm": 0.20314651727676392, "learning_rate": 2.5732665463589545e-07, "loss": 0.2562, "step": 49966 }, { "epoch": 0.9276593532672665, "grad_norm": 0.5559768676757812, "learning_rate": 2.5706379909839263e-07, "loss": 0.2322, "step": 49968 }, { "epoch": 0.9276964834046851, "grad_norm": 0.31284722685813904, "learning_rate": 2.568010761330675e-07, "loss": 0.1512, "step": 49970 }, { "epoch": 0.9277336135421037, "grad_norm": 0.2678031325340271, "learning_rate": 2.5653848574349715e-07, "loss": 0.2352, "step": 49972 }, { "epoch": 0.9277707436795224, "grad_norm": 0.33180689811706543, "learning_rate": 2.56276027933251e-07, "loss": 0.2884, "step": 49974 }, { "epoch": 0.927807873816941, "grad_norm": 0.18834978342056274, "learning_rate": 2.56013702705904e-07, "loss": 0.2058, "step": 49976 }, { "epoch": 0.9278450039543596, "grad_norm": 0.46009984612464905, "learning_rate": 2.557515100650232e-07, "loss": 0.2194, "step": 49978 }, { "epoch": 0.9278821340917782, "grad_norm": 0.25312185287475586, "learning_rate": 2.5548945001417693e-07, "loss": 0.2198, "step": 49980 }, { "epoch": 0.9279192642291969, "grad_norm": 0.3930930495262146, "learning_rate": 2.5522752255693004e-07, "loss": 0.2124, "step": 49982 }, { "epoch": 0.9279563943666156, "grad_norm": 0.5646523237228394, "learning_rate": 2.549657276968487e-07, "loss": 0.1545, "step": 49984 }, { "epoch": 0.9279935245040342, "grad_norm": 0.3822021186351776, "learning_rate": 2.5470406543749326e-07, "loss": 0.2593, "step": 49986 }, { "epoch": 0.9280306546414528, "grad_norm": 0.5157206654548645, "learning_rate": 2.544425357824265e-07, "loss": 0.1803, "step": 49988 }, { "epoch": 0.9280677847788714, "grad_norm": 0.4178747832775116, "learning_rate": 2.5418113873520444e-07, "loss": 0.1658, "step": 49990 }, { "epoch": 0.9281049149162901, "grad_norm": 0.2807450294494629, "learning_rate": 2.539198742993842e-07, "loss": 0.1151, "step": 49992 }, { "epoch": 0.9281420450537088, "grad_norm": 0.2861728072166443, "learning_rate": 2.5365874247852306e-07, "loss": 0.3368, "step": 49994 }, { "epoch": 0.9281791751911274, "grad_norm": 0.4530234634876251, "learning_rate": 2.533977432761725e-07, "loss": 0.2256, "step": 49996 }, { "epoch": 0.928216305328546, "grad_norm": 0.6383476257324219, "learning_rate": 2.5313687669588304e-07, "loss": 0.3119, "step": 49998 }, { "epoch": 0.9282534354659646, "grad_norm": 0.3517311215400696, "learning_rate": 2.528761427412074e-07, "loss": 0.2339, "step": 50000 }, { "epoch": 0.9282905656033833, "grad_norm": 0.5342261791229248, "learning_rate": 2.5261554141568943e-07, "loss": 0.1814, "step": 50002 }, { "epoch": 0.928327695740802, "grad_norm": 0.46976590156555176, "learning_rate": 2.523550727228785e-07, "loss": 0.1672, "step": 50004 }, { "epoch": 0.9283648258782206, "grad_norm": 0.3364795446395874, "learning_rate": 2.5209473666631625e-07, "loss": 0.261, "step": 50006 }, { "epoch": 0.9284019560156392, "grad_norm": 0.3253527879714966, "learning_rate": 2.518345332495464e-07, "loss": 0.0617, "step": 50008 }, { "epoch": 0.9284390861530578, "grad_norm": 0.4243648052215576, "learning_rate": 2.515744624761096e-07, "loss": 0.3977, "step": 50010 }, { "epoch": 0.9284762162904765, "grad_norm": 0.5059959292411804, "learning_rate": 2.513145243495452e-07, "loss": 0.2348, "step": 50012 }, { "epoch": 0.9285133464278952, "grad_norm": 0.23626206815242767, "learning_rate": 2.5105471887338915e-07, "loss": 0.1201, "step": 50014 }, { "epoch": 0.9285504765653138, "grad_norm": 0.41124823689460754, "learning_rate": 2.5079504605117765e-07, "loss": 0.2356, "step": 50016 }, { "epoch": 0.9285876067027324, "grad_norm": 0.4590713381767273, "learning_rate": 2.505355058864423e-07, "loss": 0.1258, "step": 50018 }, { "epoch": 0.928624736840151, "grad_norm": 0.2713387608528137, "learning_rate": 2.502760983827157e-07, "loss": 0.265, "step": 50020 }, { "epoch": 0.9286618669775697, "grad_norm": 0.4963763952255249, "learning_rate": 2.500168235435274e-07, "loss": 0.2598, "step": 50022 }, { "epoch": 0.9286989971149883, "grad_norm": 0.2727981209754944, "learning_rate": 2.4975768137240565e-07, "loss": 0.2796, "step": 50024 }, { "epoch": 0.928736127252407, "grad_norm": 0.5048683881759644, "learning_rate": 2.4949867187287756e-07, "loss": 0.2263, "step": 50026 }, { "epoch": 0.9287732573898256, "grad_norm": 0.326703280210495, "learning_rate": 2.49239795048466e-07, "loss": 0.2829, "step": 50028 }, { "epoch": 0.9288103875272442, "grad_norm": 0.3353484570980072, "learning_rate": 2.4898105090269354e-07, "loss": 0.2288, "step": 50030 }, { "epoch": 0.9288475176646629, "grad_norm": 0.5045747756958008, "learning_rate": 2.487224394390808e-07, "loss": 0.4539, "step": 50032 }, { "epoch": 0.9288846478020815, "grad_norm": 0.5396079421043396, "learning_rate": 2.484639606611472e-07, "loss": 0.2241, "step": 50034 }, { "epoch": 0.9289217779395001, "grad_norm": 0.48862725496292114, "learning_rate": 2.48205614572411e-07, "loss": 0.2605, "step": 50036 }, { "epoch": 0.9289589080769188, "grad_norm": 0.6225519180297852, "learning_rate": 2.47947401176386e-07, "loss": 0.1512, "step": 50038 }, { "epoch": 0.9289960382143374, "grad_norm": 0.27551329135894775, "learning_rate": 2.4768932047658625e-07, "loss": 0.2087, "step": 50040 }, { "epoch": 0.9290331683517561, "grad_norm": 0.3404499292373657, "learning_rate": 2.4743137247652315e-07, "loss": 0.2611, "step": 50042 }, { "epoch": 0.9290702984891747, "grad_norm": 0.3554465174674988, "learning_rate": 2.4717355717970624e-07, "loss": 0.0476, "step": 50044 }, { "epoch": 0.9291074286265933, "grad_norm": 0.29980018734931946, "learning_rate": 2.4691587458964383e-07, "loss": 0.1652, "step": 50046 }, { "epoch": 0.929144558764012, "grad_norm": 0.3189743161201477, "learning_rate": 2.4665832470984307e-07, "loss": 0.2194, "step": 50048 }, { "epoch": 0.9291816889014306, "grad_norm": 0.43185773491859436, "learning_rate": 2.4640090754380785e-07, "loss": 0.1238, "step": 50050 }, { "epoch": 0.9292188190388493, "grad_norm": 0.35565221309661865, "learning_rate": 2.4614362309504093e-07, "loss": 0.108, "step": 50052 }, { "epoch": 0.9292559491762679, "grad_norm": 0.4460527002811432, "learning_rate": 2.458864713670428e-07, "loss": 0.4787, "step": 50054 }, { "epoch": 0.9292930793136865, "grad_norm": 0.5744699835777283, "learning_rate": 2.456294523633129e-07, "loss": 0.2548, "step": 50056 }, { "epoch": 0.9293302094511052, "grad_norm": 0.5349975228309631, "learning_rate": 2.453725660873496e-07, "loss": 0.3363, "step": 50058 }, { "epoch": 0.9293673395885238, "grad_norm": 0.3433665335178375, "learning_rate": 2.451158125426456e-07, "loss": 0.1984, "step": 50060 }, { "epoch": 0.9294044697259425, "grad_norm": 0.33166879415512085, "learning_rate": 2.448591917326959e-07, "loss": 0.3093, "step": 50062 }, { "epoch": 0.9294415998633611, "grad_norm": 0.2712383270263672, "learning_rate": 2.4460270366099435e-07, "loss": 0.2089, "step": 50064 }, { "epoch": 0.9294787300007797, "grad_norm": 0.2754560708999634, "learning_rate": 2.4434634833102823e-07, "loss": 0.2261, "step": 50066 }, { "epoch": 0.9295158601381984, "grad_norm": 0.418683797121048, "learning_rate": 2.440901257462869e-07, "loss": 0.2141, "step": 50068 }, { "epoch": 0.929552990275617, "grad_norm": 0.3829540014266968, "learning_rate": 2.4383403591025533e-07, "loss": 0.1168, "step": 50070 }, { "epoch": 0.9295901204130357, "grad_norm": 0.2854011356830597, "learning_rate": 2.435780788264208e-07, "loss": 0.172, "step": 50072 }, { "epoch": 0.9296272505504543, "grad_norm": 0.38260915875434875, "learning_rate": 2.4332225449826386e-07, "loss": 0.1909, "step": 50074 }, { "epoch": 0.9296643806878729, "grad_norm": 0.3819912374019623, "learning_rate": 2.4306656292926835e-07, "loss": 0.2626, "step": 50076 }, { "epoch": 0.9297015108252915, "grad_norm": 0.5275866985321045, "learning_rate": 2.428110041229104e-07, "loss": 0.1463, "step": 50078 }, { "epoch": 0.9297386409627102, "grad_norm": 0.2746739089488983, "learning_rate": 2.4255557808266827e-07, "loss": 0.2714, "step": 50080 }, { "epoch": 0.9297757711001289, "grad_norm": 0.44585299491882324, "learning_rate": 2.4230028481201816e-07, "loss": 0.3139, "step": 50082 }, { "epoch": 0.9298129012375475, "grad_norm": 0.6144194602966309, "learning_rate": 2.4204512431443284e-07, "loss": 0.1877, "step": 50084 }, { "epoch": 0.9298500313749661, "grad_norm": 0.3143332600593567, "learning_rate": 2.4179009659338504e-07, "loss": 0.2184, "step": 50086 }, { "epoch": 0.9298871615123847, "grad_norm": 0.47258126735687256, "learning_rate": 2.415352016523453e-07, "loss": 0.1942, "step": 50088 }, { "epoch": 0.9299242916498034, "grad_norm": 0.5165525078773499, "learning_rate": 2.4128043949478207e-07, "loss": 0.3612, "step": 50090 }, { "epoch": 0.9299614217872221, "grad_norm": 0.3326161801815033, "learning_rate": 2.410258101241592e-07, "loss": 0.3223, "step": 50092 }, { "epoch": 0.9299985519246406, "grad_norm": 0.41989514231681824, "learning_rate": 2.407713135439449e-07, "loss": 0.4066, "step": 50094 }, { "epoch": 0.9300356820620593, "grad_norm": 0.5700629949569702, "learning_rate": 2.4051694975760097e-07, "loss": 0.2163, "step": 50096 }, { "epoch": 0.9300728121994779, "grad_norm": 0.30251359939575195, "learning_rate": 2.4026271876858687e-07, "loss": 0.2664, "step": 50098 }, { "epoch": 0.9301099423368966, "grad_norm": 0.6367429494857788, "learning_rate": 2.4000862058036534e-07, "loss": 0.2941, "step": 50100 }, { "epoch": 0.9301470724743153, "grad_norm": 0.36553218960762024, "learning_rate": 2.3975465519639254e-07, "loss": 0.2799, "step": 50102 }, { "epoch": 0.9301842026117338, "grad_norm": 0.40600821375846863, "learning_rate": 2.395008226201212e-07, "loss": 0.2766, "step": 50104 }, { "epoch": 0.9302213327491525, "grad_norm": 0.44274744391441345, "learning_rate": 2.3924712285500863e-07, "loss": 0.5092, "step": 50106 }, { "epoch": 0.9302584628865711, "grad_norm": 0.3577493727207184, "learning_rate": 2.3899355590450533e-07, "loss": 0.3258, "step": 50108 }, { "epoch": 0.9302955930239898, "grad_norm": 0.41966712474823, "learning_rate": 2.3874012177206195e-07, "loss": 0.1704, "step": 50110 }, { "epoch": 0.9303327231614085, "grad_norm": 0.3496668338775635, "learning_rate": 2.3848682046112793e-07, "loss": 0.1664, "step": 50112 }, { "epoch": 0.930369853298827, "grad_norm": 0.34141188859939575, "learning_rate": 2.3823365197514937e-07, "loss": 0.336, "step": 50114 }, { "epoch": 0.9304069834362457, "grad_norm": 0.4654254913330078, "learning_rate": 2.379806163175713e-07, "loss": 0.283, "step": 50116 }, { "epoch": 0.9304441135736643, "grad_norm": 0.5817670226097107, "learning_rate": 2.3772771349183544e-07, "loss": 0.3705, "step": 50118 }, { "epoch": 0.930481243711083, "grad_norm": 0.32937487959861755, "learning_rate": 2.3747494350138345e-07, "loss": 0.1033, "step": 50120 }, { "epoch": 0.9305183738485017, "grad_norm": 0.3401328921318054, "learning_rate": 2.3722230634965703e-07, "loss": 0.4134, "step": 50122 }, { "epoch": 0.9305555039859202, "grad_norm": 0.2706407308578491, "learning_rate": 2.3696980204009124e-07, "loss": 0.3092, "step": 50124 }, { "epoch": 0.9305926341233389, "grad_norm": 0.45439186692237854, "learning_rate": 2.367174305761233e-07, "loss": 0.1719, "step": 50126 }, { "epoch": 0.9306297642607575, "grad_norm": 0.3255786895751953, "learning_rate": 2.3646519196118601e-07, "loss": 0.1929, "step": 50128 }, { "epoch": 0.9306668943981762, "grad_norm": 0.39936649799346924, "learning_rate": 2.362130861987122e-07, "loss": 0.3221, "step": 50130 }, { "epoch": 0.9307040245355948, "grad_norm": 0.22375917434692383, "learning_rate": 2.3596111329213244e-07, "loss": 0.4441, "step": 50132 }, { "epoch": 0.9307411546730134, "grad_norm": 0.4194203019142151, "learning_rate": 2.3570927324487513e-07, "loss": 0.3366, "step": 50134 }, { "epoch": 0.9307782848104321, "grad_norm": 0.35186129808425903, "learning_rate": 2.354575660603664e-07, "loss": 0.1242, "step": 50136 }, { "epoch": 0.9308154149478507, "grad_norm": 0.8295299410820007, "learning_rate": 2.3520599174203351e-07, "loss": 0.2764, "step": 50138 }, { "epoch": 0.9308525450852694, "grad_norm": 0.40202224254608154, "learning_rate": 2.3495455029329817e-07, "loss": 0.2701, "step": 50140 }, { "epoch": 0.930889675222688, "grad_norm": 0.329073429107666, "learning_rate": 2.3470324171758096e-07, "loss": 0.1915, "step": 50142 }, { "epoch": 0.9309268053601066, "grad_norm": 0.3940455913543701, "learning_rate": 2.344520660183025e-07, "loss": 0.1817, "step": 50144 }, { "epoch": 0.9309639354975253, "grad_norm": 0.3769148290157318, "learning_rate": 2.3420102319888005e-07, "loss": 0.2202, "step": 50146 }, { "epoch": 0.9310010656349439, "grad_norm": 0.3352530896663666, "learning_rate": 2.3395011326272975e-07, "loss": 0.0896, "step": 50148 }, { "epoch": 0.9310381957723626, "grad_norm": 0.5480664372444153, "learning_rate": 2.3369933621326557e-07, "loss": 0.4502, "step": 50150 }, { "epoch": 0.9310753259097811, "grad_norm": 0.48081859946250916, "learning_rate": 2.3344869205390143e-07, "loss": 0.3651, "step": 50152 }, { "epoch": 0.9311124560471998, "grad_norm": 0.316737562417984, "learning_rate": 2.3319818078804458e-07, "loss": 0.1826, "step": 50154 }, { "epoch": 0.9311495861846185, "grad_norm": 0.436523973941803, "learning_rate": 2.3294780241910563e-07, "loss": 0.1889, "step": 50156 }, { "epoch": 0.9311867163220371, "grad_norm": 0.3270857632160187, "learning_rate": 2.326975569504919e-07, "loss": 0.2512, "step": 50158 }, { "epoch": 0.9312238464594558, "grad_norm": 0.35123908519744873, "learning_rate": 2.324474443856073e-07, "loss": 0.0923, "step": 50160 }, { "epoch": 0.9312609765968743, "grad_norm": 0.5527810454368591, "learning_rate": 2.3219746472785688e-07, "loss": 0.2976, "step": 50162 }, { "epoch": 0.931298106734293, "grad_norm": 0.30166253447532654, "learning_rate": 2.3194761798064013e-07, "loss": 0.1078, "step": 50164 }, { "epoch": 0.9313352368717117, "grad_norm": 0.42234131693840027, "learning_rate": 2.316979041473588e-07, "loss": 0.3371, "step": 50166 }, { "epoch": 0.9313723670091303, "grad_norm": 0.480018675327301, "learning_rate": 2.3144832323140909e-07, "loss": 0.1519, "step": 50168 }, { "epoch": 0.931409497146549, "grad_norm": 0.2924879193305969, "learning_rate": 2.31198875236186e-07, "loss": 0.2406, "step": 50170 }, { "epoch": 0.9314466272839675, "grad_norm": 0.22740890085697174, "learning_rate": 2.3094956016508686e-07, "loss": 0.1872, "step": 50172 }, { "epoch": 0.9314837574213862, "grad_norm": 0.3323245048522949, "learning_rate": 2.3070037802150113e-07, "loss": 0.2727, "step": 50174 }, { "epoch": 0.9315208875588048, "grad_norm": 0.9086126685142517, "learning_rate": 2.3045132880882282e-07, "loss": 0.2833, "step": 50176 }, { "epoch": 0.9315580176962235, "grad_norm": 0.3626006841659546, "learning_rate": 2.3020241253043696e-07, "loss": 0.3513, "step": 50178 }, { "epoch": 0.9315951478336422, "grad_norm": 0.4872686564922333, "learning_rate": 2.2995362918973308e-07, "loss": 0.2393, "step": 50180 }, { "epoch": 0.9316322779710607, "grad_norm": 0.42003950476646423, "learning_rate": 2.2970497879009513e-07, "loss": 0.2756, "step": 50182 }, { "epoch": 0.9316694081084794, "grad_norm": 0.5618094205856323, "learning_rate": 2.2945646133490708e-07, "loss": 0.4697, "step": 50184 }, { "epoch": 0.931706538245898, "grad_norm": 0.3172289729118347, "learning_rate": 2.2920807682755175e-07, "loss": 0.3055, "step": 50186 }, { "epoch": 0.9317436683833167, "grad_norm": 0.3681546449661255, "learning_rate": 2.289598252714065e-07, "loss": 0.1397, "step": 50188 }, { "epoch": 0.9317807985207354, "grad_norm": 0.3824702203273773, "learning_rate": 2.2871170666985076e-07, "loss": 0.4014, "step": 50190 }, { "epoch": 0.9318179286581539, "grad_norm": 0.6514669060707092, "learning_rate": 2.2846372102625968e-07, "loss": 0.3967, "step": 50192 }, { "epoch": 0.9318550587955726, "grad_norm": 0.3041110634803772, "learning_rate": 2.2821586834400833e-07, "loss": 0.3218, "step": 50194 }, { "epoch": 0.9318921889329912, "grad_norm": 0.33173733949661255, "learning_rate": 2.2796814862646954e-07, "loss": 0.1871, "step": 50196 }, { "epoch": 0.9319293190704099, "grad_norm": 0.2545624375343323, "learning_rate": 2.2772056187701286e-07, "loss": 0.2165, "step": 50198 }, { "epoch": 0.9319664492078286, "grad_norm": 0.3800126016139984, "learning_rate": 2.2747310809900892e-07, "loss": 0.2508, "step": 50200 }, { "epoch": 0.9320035793452471, "grad_norm": 0.47080495953559875, "learning_rate": 2.272257872958239e-07, "loss": 0.3466, "step": 50202 }, { "epoch": 0.9320407094826658, "grad_norm": 0.3630296289920807, "learning_rate": 2.269785994708218e-07, "loss": 0.1049, "step": 50204 }, { "epoch": 0.9320778396200844, "grad_norm": 0.3728260397911072, "learning_rate": 2.267315446273688e-07, "loss": 0.2794, "step": 50206 }, { "epoch": 0.9321149697575031, "grad_norm": 0.2389519363641739, "learning_rate": 2.2648462276882445e-07, "loss": 0.3124, "step": 50208 }, { "epoch": 0.9321520998949218, "grad_norm": 0.45861950516700745, "learning_rate": 2.2623783389854935e-07, "loss": 0.1979, "step": 50210 }, { "epoch": 0.9321892300323403, "grad_norm": 0.9209721088409424, "learning_rate": 2.25991178019902e-07, "loss": 0.4842, "step": 50212 }, { "epoch": 0.932226360169759, "grad_norm": 0.31795015931129456, "learning_rate": 2.2574465513623856e-07, "loss": 0.296, "step": 50214 }, { "epoch": 0.9322634903071776, "grad_norm": 0.3507000505924225, "learning_rate": 2.254982652509119e-07, "loss": 0.2245, "step": 50216 }, { "epoch": 0.9323006204445963, "grad_norm": 1.693692684173584, "learning_rate": 2.2525200836727601e-07, "loss": 0.245, "step": 50218 }, { "epoch": 0.932337750582015, "grad_norm": 0.27702951431274414, "learning_rate": 2.2500588448868155e-07, "loss": 0.1549, "step": 50220 }, { "epoch": 0.9323748807194335, "grad_norm": 0.3858413100242615, "learning_rate": 2.2475989361847695e-07, "loss": 0.3221, "step": 50222 }, { "epoch": 0.9324120108568522, "grad_norm": 0.3768874704837799, "learning_rate": 2.2451403576001062e-07, "loss": 0.3847, "step": 50224 }, { "epoch": 0.9324491409942708, "grad_norm": 0.36448800563812256, "learning_rate": 2.242683109166277e-07, "loss": 0.3161, "step": 50226 }, { "epoch": 0.9324862711316895, "grad_norm": 0.3610519468784332, "learning_rate": 2.2402271909167105e-07, "loss": 0.3058, "step": 50228 }, { "epoch": 0.932523401269108, "grad_norm": 0.36512672901153564, "learning_rate": 2.2377726028848356e-07, "loss": 0.2822, "step": 50230 }, { "epoch": 0.9325605314065267, "grad_norm": 0.7213726043701172, "learning_rate": 2.2353193451040368e-07, "loss": 0.1611, "step": 50232 }, { "epoch": 0.9325976615439454, "grad_norm": 0.3615325391292572, "learning_rate": 2.232867417607698e-07, "loss": 0.2929, "step": 50234 }, { "epoch": 0.932634791681364, "grad_norm": 0.6579441428184509, "learning_rate": 2.230416820429182e-07, "loss": 0.1527, "step": 50236 }, { "epoch": 0.9326719218187827, "grad_norm": 0.38399797677993774, "learning_rate": 2.2279675536018508e-07, "loss": 0.1879, "step": 50238 }, { "epoch": 0.9327090519562012, "grad_norm": 0.2739553451538086, "learning_rate": 2.225519617159022e-07, "loss": 0.0232, "step": 50240 }, { "epoch": 0.9327461820936199, "grad_norm": 0.4344296455383301, "learning_rate": 2.2230730111340025e-07, "loss": 0.3672, "step": 50242 }, { "epoch": 0.9327833122310386, "grad_norm": 0.2888500392436981, "learning_rate": 2.220627735560077e-07, "loss": 0.2945, "step": 50244 }, { "epoch": 0.9328204423684572, "grad_norm": 0.26970380544662476, "learning_rate": 2.2181837904705184e-07, "loss": 0.1132, "step": 50246 }, { "epoch": 0.9328575725058759, "grad_norm": 0.3661527931690216, "learning_rate": 2.2157411758986003e-07, "loss": 0.2957, "step": 50248 }, { "epoch": 0.9328947026432944, "grad_norm": 0.9967791438102722, "learning_rate": 2.2132998918775406e-07, "loss": 0.4025, "step": 50250 }, { "epoch": 0.9329318327807131, "grad_norm": 0.2456521987915039, "learning_rate": 2.2108599384405793e-07, "loss": 0.3097, "step": 50252 }, { "epoch": 0.9329689629181318, "grad_norm": 0.37923794984817505, "learning_rate": 2.20842131562089e-07, "loss": 0.1301, "step": 50254 }, { "epoch": 0.9330060930555504, "grad_norm": 0.22194752097129822, "learning_rate": 2.2059840234516572e-07, "loss": 0.2078, "step": 50256 }, { "epoch": 0.9330432231929691, "grad_norm": 0.3658318817615509, "learning_rate": 2.2035480619660654e-07, "loss": 0.1605, "step": 50258 }, { "epoch": 0.9330803533303876, "grad_norm": 0.1737578958272934, "learning_rate": 2.2011134311972438e-07, "loss": 0.1847, "step": 50260 }, { "epoch": 0.9331174834678063, "grad_norm": 0.621938943862915, "learning_rate": 2.1986801311783323e-07, "loss": 0.3555, "step": 50262 }, { "epoch": 0.933154613605225, "grad_norm": 0.6269360780715942, "learning_rate": 2.1962481619424268e-07, "loss": 0.3036, "step": 50264 }, { "epoch": 0.9331917437426436, "grad_norm": 1.773868441581726, "learning_rate": 2.1938175235226456e-07, "loss": 0.1978, "step": 50266 }, { "epoch": 0.9332288738800623, "grad_norm": 0.9219141602516174, "learning_rate": 2.1913882159520283e-07, "loss": 0.3084, "step": 50268 }, { "epoch": 0.9332660040174808, "grad_norm": 0.19909599423408508, "learning_rate": 2.18896023926366e-07, "loss": 0.1226, "step": 50270 }, { "epoch": 0.9333031341548995, "grad_norm": 0.5098899006843567, "learning_rate": 2.1865335934905584e-07, "loss": 0.2954, "step": 50272 }, { "epoch": 0.9333402642923182, "grad_norm": 0.3619163930416107, "learning_rate": 2.184108278665753e-07, "loss": 0.2906, "step": 50274 }, { "epoch": 0.9333773944297368, "grad_norm": 0.32638904452323914, "learning_rate": 2.1816842948222284e-07, "loss": 0.2576, "step": 50276 }, { "epoch": 0.9334145245671555, "grad_norm": 0.5350068211555481, "learning_rate": 2.1792616419930024e-07, "loss": 0.2234, "step": 50278 }, { "epoch": 0.933451654704574, "grad_norm": 0.35978618264198303, "learning_rate": 2.1768403202109934e-07, "loss": 0.2361, "step": 50280 }, { "epoch": 0.9334887848419927, "grad_norm": 0.24880273640155792, "learning_rate": 2.1744203295091858e-07, "loss": 0.297, "step": 50282 }, { "epoch": 0.9335259149794113, "grad_norm": 0.19702662527561188, "learning_rate": 2.172001669920498e-07, "loss": 0.2972, "step": 50284 }, { "epoch": 0.93356304511683, "grad_norm": 0.4363676607608795, "learning_rate": 2.1695843414778262e-07, "loss": 0.3394, "step": 50286 }, { "epoch": 0.9336001752542487, "grad_norm": 0.6145041584968567, "learning_rate": 2.167168344214088e-07, "loss": 0.2762, "step": 50288 }, { "epoch": 0.9336373053916672, "grad_norm": 0.48047691583633423, "learning_rate": 2.164753678162146e-07, "loss": 0.3471, "step": 50290 }, { "epoch": 0.9336744355290859, "grad_norm": 0.4448888599872589, "learning_rate": 2.1623403433548518e-07, "loss": 0.3481, "step": 50292 }, { "epoch": 0.9337115656665045, "grad_norm": 0.3121815621852875, "learning_rate": 2.159928339825057e-07, "loss": 0.3047, "step": 50294 }, { "epoch": 0.9337486958039232, "grad_norm": 0.3360520899295807, "learning_rate": 2.1575176676055688e-07, "loss": 0.4464, "step": 50296 }, { "epoch": 0.9337858259413419, "grad_norm": 0.4055921733379364, "learning_rate": 2.1551083267291829e-07, "loss": 0.2614, "step": 50298 }, { "epoch": 0.9338229560787604, "grad_norm": 0.23660337924957275, "learning_rate": 2.1527003172286954e-07, "loss": 0.3921, "step": 50300 }, { "epoch": 0.9338600862161791, "grad_norm": 0.30888649821281433, "learning_rate": 2.150293639136869e-07, "loss": 0.1719, "step": 50302 }, { "epoch": 0.9338972163535977, "grad_norm": 0.42983853816986084, "learning_rate": 2.1478882924864665e-07, "loss": 0.3817, "step": 50304 }, { "epoch": 0.9339343464910164, "grad_norm": 0.38036930561065674, "learning_rate": 2.1454842773101946e-07, "loss": 0.3468, "step": 50306 }, { "epoch": 0.933971476628435, "grad_norm": 0.37760263681411743, "learning_rate": 2.1430815936407722e-07, "loss": 0.5105, "step": 50308 }, { "epoch": 0.9340086067658536, "grad_norm": 0.4960090219974518, "learning_rate": 2.140680241510884e-07, "loss": 0.1866, "step": 50310 }, { "epoch": 0.9340457369032723, "grad_norm": 0.4065636992454529, "learning_rate": 2.138280220953237e-07, "loss": 0.409, "step": 50312 }, { "epoch": 0.9340828670406909, "grad_norm": 0.4962323009967804, "learning_rate": 2.13588153200045e-07, "loss": 0.4225, "step": 50314 }, { "epoch": 0.9341199971781096, "grad_norm": 0.6142938733100891, "learning_rate": 2.1334841746851964e-07, "loss": 0.3435, "step": 50316 }, { "epoch": 0.9341571273155282, "grad_norm": 0.22493356466293335, "learning_rate": 2.1310881490400615e-07, "loss": 0.4127, "step": 50318 }, { "epoch": 0.9341942574529468, "grad_norm": 0.3735158443450928, "learning_rate": 2.1286934550976635e-07, "loss": 0.1864, "step": 50320 }, { "epoch": 0.9342313875903655, "grad_norm": 0.4285488426685333, "learning_rate": 2.1263000928905986e-07, "loss": 0.4334, "step": 50322 }, { "epoch": 0.9342685177277841, "grad_norm": 0.4024764597415924, "learning_rate": 2.1239080624514187e-07, "loss": 0.3155, "step": 50324 }, { "epoch": 0.9343056478652028, "grad_norm": 0.5392751097679138, "learning_rate": 2.121517363812675e-07, "loss": 0.1325, "step": 50326 }, { "epoch": 0.9343427780026213, "grad_norm": 0.46233445405960083, "learning_rate": 2.119127997006909e-07, "loss": 0.3244, "step": 50328 }, { "epoch": 0.93437990814004, "grad_norm": 0.4473434090614319, "learning_rate": 2.1167399620666162e-07, "loss": 0.2035, "step": 50330 }, { "epoch": 0.9344170382774587, "grad_norm": 0.5246615409851074, "learning_rate": 2.1143532590243044e-07, "loss": 0.4241, "step": 50332 }, { "epoch": 0.9344541684148773, "grad_norm": 0.34634634852409363, "learning_rate": 2.1119678879124472e-07, "loss": 0.2863, "step": 50334 }, { "epoch": 0.934491298552296, "grad_norm": 0.6041848063468933, "learning_rate": 2.1095838487634856e-07, "loss": 0.321, "step": 50336 }, { "epoch": 0.9345284286897145, "grad_norm": 0.5718795657157898, "learning_rate": 2.1072011416098715e-07, "loss": 0.1721, "step": 50338 }, { "epoch": 0.9345655588271332, "grad_norm": 0.47210752964019775, "learning_rate": 2.1048197664840453e-07, "loss": 0.2688, "step": 50340 }, { "epoch": 0.9346026889645519, "grad_norm": 0.43718093633651733, "learning_rate": 2.1024397234183702e-07, "loss": 0.2051, "step": 50342 }, { "epoch": 0.9346398191019705, "grad_norm": 0.3578401803970337, "learning_rate": 2.1000610124452648e-07, "loss": 0.2384, "step": 50344 }, { "epoch": 0.9346769492393892, "grad_norm": 0.31479063630104065, "learning_rate": 2.0976836335970807e-07, "loss": 0.1628, "step": 50346 }, { "epoch": 0.9347140793768077, "grad_norm": 0.5599633455276489, "learning_rate": 2.09530758690617e-07, "loss": 0.3048, "step": 50348 }, { "epoch": 0.9347512095142264, "grad_norm": 0.3701847493648529, "learning_rate": 2.0929328724048626e-07, "loss": 0.1816, "step": 50350 }, { "epoch": 0.9347883396516451, "grad_norm": 0.44199663400650024, "learning_rate": 2.0905594901254767e-07, "loss": 0.2723, "step": 50352 }, { "epoch": 0.9348254697890637, "grad_norm": 0.3768458366394043, "learning_rate": 2.08818744010032e-07, "loss": 0.3407, "step": 50354 }, { "epoch": 0.9348625999264824, "grad_norm": 0.364531546831131, "learning_rate": 2.0858167223616444e-07, "loss": 0.2374, "step": 50356 }, { "epoch": 0.9348997300639009, "grad_norm": 0.45745018124580383, "learning_rate": 2.083447336941702e-07, "loss": 0.3061, "step": 50358 }, { "epoch": 0.9349368602013196, "grad_norm": 0.3720918893814087, "learning_rate": 2.0810792838727556e-07, "loss": 0.2701, "step": 50360 }, { "epoch": 0.9349739903387383, "grad_norm": 0.3258405029773712, "learning_rate": 2.078712563187013e-07, "loss": 0.1801, "step": 50362 }, { "epoch": 0.9350111204761569, "grad_norm": 0.3929935097694397, "learning_rate": 2.076347174916682e-07, "loss": 0.2558, "step": 50364 }, { "epoch": 0.9350482506135755, "grad_norm": 0.5458317995071411, "learning_rate": 2.0739831190939697e-07, "loss": 0.2046, "step": 50366 }, { "epoch": 0.9350853807509941, "grad_norm": 0.2508763372898102, "learning_rate": 2.071620395751006e-07, "loss": 0.1868, "step": 50368 }, { "epoch": 0.9351225108884128, "grad_norm": 1.228243350982666, "learning_rate": 2.0692590049199656e-07, "loss": 0.3273, "step": 50370 }, { "epoch": 0.9351596410258315, "grad_norm": 0.4273628890514374, "learning_rate": 2.066898946632978e-07, "loss": 0.1735, "step": 50372 }, { "epoch": 0.9351967711632501, "grad_norm": 0.18382541835308075, "learning_rate": 2.06454022092214e-07, "loss": 0.0686, "step": 50374 }, { "epoch": 0.9352339013006687, "grad_norm": 0.3270367980003357, "learning_rate": 2.062182827819581e-07, "loss": 0.1611, "step": 50376 }, { "epoch": 0.9352710314380873, "grad_norm": 0.4754035472869873, "learning_rate": 2.0598267673573424e-07, "loss": 0.2313, "step": 50378 }, { "epoch": 0.935308161575506, "grad_norm": 0.5821787714958191, "learning_rate": 2.0574720395674986e-07, "loss": 0.2899, "step": 50380 }, { "epoch": 0.9353452917129246, "grad_norm": 0.33625200390815735, "learning_rate": 2.055118644482079e-07, "loss": 0.3867, "step": 50382 }, { "epoch": 0.9353824218503433, "grad_norm": 0.31917238235473633, "learning_rate": 2.0527665821331256e-07, "loss": 0.2235, "step": 50384 }, { "epoch": 0.9354195519877619, "grad_norm": 0.29225248098373413, "learning_rate": 2.050415852552623e-07, "loss": 0.2697, "step": 50386 }, { "epoch": 0.9354566821251805, "grad_norm": 0.3765079081058502, "learning_rate": 2.0480664557725793e-07, "loss": 0.2639, "step": 50388 }, { "epoch": 0.9354938122625992, "grad_norm": 0.4467739760875702, "learning_rate": 2.045718391824947e-07, "loss": 0.2681, "step": 50390 }, { "epoch": 0.9355309424000178, "grad_norm": 0.38358041644096375, "learning_rate": 2.0433716607416886e-07, "loss": 0.2499, "step": 50392 }, { "epoch": 0.9355680725374365, "grad_norm": 0.39348816871643066, "learning_rate": 2.0410262625547128e-07, "loss": 0.1173, "step": 50394 }, { "epoch": 0.9356052026748551, "grad_norm": 0.5106677412986755, "learning_rate": 2.038682197295949e-07, "loss": 0.2704, "step": 50396 }, { "epoch": 0.9356423328122737, "grad_norm": 0.41727590560913086, "learning_rate": 2.0363394649973056e-07, "loss": 0.2787, "step": 50398 }, { "epoch": 0.9356794629496924, "grad_norm": 0.36754557490348816, "learning_rate": 2.0339980656906455e-07, "loss": 0.2348, "step": 50400 }, { "epoch": 0.935716593087111, "grad_norm": 0.3274444341659546, "learning_rate": 2.0316579994078212e-07, "loss": 0.2705, "step": 50402 }, { "epoch": 0.9357537232245297, "grad_norm": 0.3400157392024994, "learning_rate": 2.029319266180696e-07, "loss": 0.3042, "step": 50404 }, { "epoch": 0.9357908533619483, "grad_norm": 0.45816606283187866, "learning_rate": 2.0269818660410667e-07, "loss": 0.2454, "step": 50406 }, { "epoch": 0.9358279834993669, "grad_norm": 0.4133220613002777, "learning_rate": 2.0246457990207414e-07, "loss": 0.3562, "step": 50408 }, { "epoch": 0.9358651136367856, "grad_norm": 0.26466718316078186, "learning_rate": 2.0223110651515275e-07, "loss": 0.2541, "step": 50410 }, { "epoch": 0.9359022437742042, "grad_norm": 0.40572988986968994, "learning_rate": 2.0199776644651893e-07, "loss": 0.3739, "step": 50412 }, { "epoch": 0.9359393739116229, "grad_norm": 0.5456663966178894, "learning_rate": 2.017645596993456e-07, "loss": 0.2566, "step": 50414 }, { "epoch": 0.9359765040490415, "grad_norm": 0.6315339207649231, "learning_rate": 2.0153148627680917e-07, "loss": 0.2558, "step": 50416 }, { "epoch": 0.9360136341864601, "grad_norm": 0.29461973905563354, "learning_rate": 2.0129854618207823e-07, "loss": 0.2117, "step": 50418 }, { "epoch": 0.9360507643238788, "grad_norm": 0.482860267162323, "learning_rate": 2.0106573941832463e-07, "loss": 0.2068, "step": 50420 }, { "epoch": 0.9360878944612974, "grad_norm": 0.4696381986141205, "learning_rate": 2.0083306598871478e-07, "loss": 0.4644, "step": 50422 }, { "epoch": 0.936125024598716, "grad_norm": 0.3810580372810364, "learning_rate": 2.0060052589641498e-07, "loss": 0.2406, "step": 50424 }, { "epoch": 0.9361621547361347, "grad_norm": 0.33511000871658325, "learning_rate": 2.003681191445883e-07, "loss": 0.1702, "step": 50426 }, { "epoch": 0.9361992848735533, "grad_norm": 0.7196905016899109, "learning_rate": 2.0013584573639888e-07, "loss": 0.2437, "step": 50428 }, { "epoch": 0.936236415010972, "grad_norm": 0.5981458425521851, "learning_rate": 1.999037056750075e-07, "loss": 0.273, "step": 50430 }, { "epoch": 0.9362735451483906, "grad_norm": 0.5226945281028748, "learning_rate": 1.9967169896357164e-07, "loss": 0.1275, "step": 50432 }, { "epoch": 0.9363106752858092, "grad_norm": 0.36623862385749817, "learning_rate": 1.9943982560524765e-07, "loss": 0.1688, "step": 50434 }, { "epoch": 0.9363478054232278, "grad_norm": 0.2931693494319916, "learning_rate": 1.9920808560319194e-07, "loss": 0.4856, "step": 50436 }, { "epoch": 0.9363849355606465, "grad_norm": 0.505641758441925, "learning_rate": 1.989764789605586e-07, "loss": 0.2772, "step": 50438 }, { "epoch": 0.9364220656980652, "grad_norm": 0.5909997820854187, "learning_rate": 1.9874500568049625e-07, "loss": 0.3388, "step": 50440 }, { "epoch": 0.9364591958354838, "grad_norm": 0.4241778552532196, "learning_rate": 1.9851366576615793e-07, "loss": 0.4256, "step": 50442 }, { "epoch": 0.9364963259729024, "grad_norm": 0.7040421962738037, "learning_rate": 1.982824592206878e-07, "loss": 0.2828, "step": 50444 }, { "epoch": 0.936533456110321, "grad_norm": 0.459684282541275, "learning_rate": 1.980513860472355e-07, "loss": 0.2272, "step": 50446 }, { "epoch": 0.9365705862477397, "grad_norm": 0.37046897411346436, "learning_rate": 1.9782044624894193e-07, "loss": 0.3529, "step": 50448 }, { "epoch": 0.9366077163851584, "grad_norm": 0.4277670383453369, "learning_rate": 1.9758963982895229e-07, "loss": 0.1935, "step": 50450 }, { "epoch": 0.936644846522577, "grad_norm": 0.3797200322151184, "learning_rate": 1.9735896679040522e-07, "loss": 0.2852, "step": 50452 }, { "epoch": 0.9366819766599956, "grad_norm": 0.42556628584861755, "learning_rate": 1.9712842713644153e-07, "loss": 0.3725, "step": 50454 }, { "epoch": 0.9367191067974142, "grad_norm": 0.2555093765258789, "learning_rate": 1.968980208701965e-07, "loss": 0.2586, "step": 50456 }, { "epoch": 0.9367562369348329, "grad_norm": 0.45253226161003113, "learning_rate": 1.9666774799480537e-07, "loss": 0.3277, "step": 50458 }, { "epoch": 0.9367933670722516, "grad_norm": 0.2537003457546234, "learning_rate": 1.9643760851340232e-07, "loss": 0.3003, "step": 50460 }, { "epoch": 0.9368304972096702, "grad_norm": 0.256867378950119, "learning_rate": 1.9620760242911818e-07, "loss": 0.2997, "step": 50462 }, { "epoch": 0.9368676273470888, "grad_norm": 0.3946177661418915, "learning_rate": 1.9597772974508266e-07, "loss": 0.4203, "step": 50464 }, { "epoch": 0.9369047574845074, "grad_norm": 0.2898321747779846, "learning_rate": 1.9574799046442438e-07, "loss": 0.3947, "step": 50466 }, { "epoch": 0.9369418876219261, "grad_norm": 0.30333712697029114, "learning_rate": 1.9551838459026863e-07, "loss": 0.3152, "step": 50468 }, { "epoch": 0.9369790177593448, "grad_norm": 0.5426267981529236, "learning_rate": 1.952889121257384e-07, "loss": 0.4185, "step": 50470 }, { "epoch": 0.9370161478967634, "grad_norm": 0.20420224964618683, "learning_rate": 1.9505957307395907e-07, "loss": 0.2163, "step": 50472 }, { "epoch": 0.937053278034182, "grad_norm": 0.2776103913784027, "learning_rate": 1.9483036743804916e-07, "loss": 0.1981, "step": 50474 }, { "epoch": 0.9370904081716006, "grad_norm": 0.3246515989303589, "learning_rate": 1.9460129522112737e-07, "loss": 0.3298, "step": 50476 }, { "epoch": 0.9371275383090193, "grad_norm": 0.3749484419822693, "learning_rate": 1.9437235642631225e-07, "loss": 0.1431, "step": 50478 }, { "epoch": 0.9371646684464379, "grad_norm": 0.314248651266098, "learning_rate": 1.941435510567191e-07, "loss": 0.2078, "step": 50480 }, { "epoch": 0.9372017985838565, "grad_norm": 0.4632081389427185, "learning_rate": 1.939148791154588e-07, "loss": 0.2562, "step": 50482 }, { "epoch": 0.9372389287212752, "grad_norm": 0.6664503812789917, "learning_rate": 1.9368634060564552e-07, "loss": 0.185, "step": 50484 }, { "epoch": 0.9372760588586938, "grad_norm": 0.40528416633605957, "learning_rate": 1.9345793553038671e-07, "loss": 0.4443, "step": 50486 }, { "epoch": 0.9373131889961125, "grad_norm": 0.3443734645843506, "learning_rate": 1.932296638927922e-07, "loss": 0.2818, "step": 50488 }, { "epoch": 0.9373503191335311, "grad_norm": 0.26927846670150757, "learning_rate": 1.930015256959661e-07, "loss": 0.3232, "step": 50490 }, { "epoch": 0.9373874492709497, "grad_norm": 0.283528596162796, "learning_rate": 1.9277352094301592e-07, "loss": 0.319, "step": 50492 }, { "epoch": 0.9374245794083684, "grad_norm": 0.18127410113811493, "learning_rate": 1.9254564963704036e-07, "loss": 0.0619, "step": 50494 }, { "epoch": 0.937461709545787, "grad_norm": 0.37580209970474243, "learning_rate": 1.9231791178114245e-07, "loss": 0.1107, "step": 50496 }, { "epoch": 0.9374988396832057, "grad_norm": 0.31789058446884155, "learning_rate": 1.9209030737841972e-07, "loss": 0.2323, "step": 50498 }, { "epoch": 0.9375359698206243, "grad_norm": 0.6327764987945557, "learning_rate": 1.9186283643196967e-07, "loss": 0.1488, "step": 50500 }, { "epoch": 0.9375730999580429, "grad_norm": 0.5328879356384277, "learning_rate": 1.9163549894488875e-07, "loss": 0.1919, "step": 50502 }, { "epoch": 0.9376102300954616, "grad_norm": 0.35823971033096313, "learning_rate": 1.9140829492026892e-07, "loss": 0.3331, "step": 50504 }, { "epoch": 0.9376473602328802, "grad_norm": 0.5131212472915649, "learning_rate": 1.9118122436120213e-07, "loss": 0.1846, "step": 50506 }, { "epoch": 0.9376844903702989, "grad_norm": 0.5100772976875305, "learning_rate": 1.9095428727077813e-07, "loss": 0.5072, "step": 50508 }, { "epoch": 0.9377216205077175, "grad_norm": 0.36215993762016296, "learning_rate": 1.907274836520845e-07, "loss": 0.0539, "step": 50510 }, { "epoch": 0.9377587506451361, "grad_norm": 0.31194761395454407, "learning_rate": 1.905008135082076e-07, "loss": 0.3013, "step": 50512 }, { "epoch": 0.9377958807825548, "grad_norm": 0.36816251277923584, "learning_rate": 1.9027427684223053e-07, "loss": 0.3429, "step": 50514 }, { "epoch": 0.9378330109199734, "grad_norm": 0.5024937391281128, "learning_rate": 1.9004787365723864e-07, "loss": 0.3006, "step": 50516 }, { "epoch": 0.9378701410573921, "grad_norm": 0.40359485149383545, "learning_rate": 1.8982160395631167e-07, "loss": 0.274, "step": 50518 }, { "epoch": 0.9379072711948107, "grad_norm": 0.27537867426872253, "learning_rate": 1.8959546774252602e-07, "loss": 0.1818, "step": 50520 }, { "epoch": 0.9379444013322293, "grad_norm": 0.6612551808357239, "learning_rate": 1.8936946501896147e-07, "loss": 0.3089, "step": 50522 }, { "epoch": 0.937981531469648, "grad_norm": 2.1385087966918945, "learning_rate": 1.8914359578869223e-07, "loss": 0.3573, "step": 50524 }, { "epoch": 0.9380186616070666, "grad_norm": 0.3032298982143402, "learning_rate": 1.889178600547914e-07, "loss": 0.4587, "step": 50526 }, { "epoch": 0.9380557917444853, "grad_norm": 0.3252864181995392, "learning_rate": 1.8869225782033096e-07, "loss": 0.347, "step": 50528 }, { "epoch": 0.9380929218819039, "grad_norm": 0.3504236936569214, "learning_rate": 1.8846678908838068e-07, "loss": 0.3391, "step": 50530 }, { "epoch": 0.9381300520193225, "grad_norm": 0.4330886900424957, "learning_rate": 1.882414538620081e-07, "loss": 0.3414, "step": 50532 }, { "epoch": 0.9381671821567411, "grad_norm": 0.3574167490005493, "learning_rate": 1.8801625214428076e-07, "loss": 0.2797, "step": 50534 }, { "epoch": 0.9382043122941598, "grad_norm": 0.35796743631362915, "learning_rate": 1.877911839382607e-07, "loss": 0.4347, "step": 50536 }, { "epoch": 0.9382414424315785, "grad_norm": 0.36988940834999084, "learning_rate": 1.8756624924701204e-07, "loss": 0.3294, "step": 50538 }, { "epoch": 0.938278572568997, "grad_norm": 0.2854657769203186, "learning_rate": 1.8734144807359466e-07, "loss": 0.3095, "step": 50540 }, { "epoch": 0.9383157027064157, "grad_norm": 0.31911319494247437, "learning_rate": 1.871167804210694e-07, "loss": 0.3963, "step": 50542 }, { "epoch": 0.9383528328438343, "grad_norm": 0.5411933064460754, "learning_rate": 1.8689224629249048e-07, "loss": 0.2804, "step": 50544 }, { "epoch": 0.938389962981253, "grad_norm": 0.18840636312961578, "learning_rate": 1.8666784569091544e-07, "loss": 0.2103, "step": 50546 }, { "epoch": 0.9384270931186717, "grad_norm": 0.42887693643569946, "learning_rate": 1.864435786193963e-07, "loss": 0.0617, "step": 50548 }, { "epoch": 0.9384642232560902, "grad_norm": 0.49172353744506836, "learning_rate": 1.86219445080984e-07, "loss": 0.298, "step": 50550 }, { "epoch": 0.9385013533935089, "grad_norm": 0.3039313852787018, "learning_rate": 1.8599544507873046e-07, "loss": 0.3195, "step": 50552 }, { "epoch": 0.9385384835309275, "grad_norm": 0.3312618136405945, "learning_rate": 1.8577157861568107e-07, "loss": 0.3235, "step": 50554 }, { "epoch": 0.9385756136683462, "grad_norm": 0.38901248574256897, "learning_rate": 1.855478456948856e-07, "loss": 0.3924, "step": 50556 }, { "epoch": 0.9386127438057649, "grad_norm": 0.23433274030685425, "learning_rate": 1.8532424631938496e-07, "loss": 0.2871, "step": 50558 }, { "epoch": 0.9386498739431834, "grad_norm": 0.41085517406463623, "learning_rate": 1.8510078049222335e-07, "loss": 0.3958, "step": 50560 }, { "epoch": 0.9386870040806021, "grad_norm": 0.43302658200263977, "learning_rate": 1.8487744821644061e-07, "loss": 0.275, "step": 50562 }, { "epoch": 0.9387241342180207, "grad_norm": 0.5038411021232605, "learning_rate": 1.846542494950765e-07, "loss": 0.2495, "step": 50564 }, { "epoch": 0.9387612643554394, "grad_norm": 0.47242259979248047, "learning_rate": 1.8443118433116857e-07, "loss": 0.2778, "step": 50566 }, { "epoch": 0.9387983944928581, "grad_norm": 0.5323737859725952, "learning_rate": 1.8420825272775e-07, "loss": 0.1311, "step": 50568 }, { "epoch": 0.9388355246302766, "grad_norm": 0.5178433060646057, "learning_rate": 1.839854546878561e-07, "loss": 0.1369, "step": 50570 }, { "epoch": 0.9388726547676953, "grad_norm": 0.4531875252723694, "learning_rate": 1.8376279021451783e-07, "loss": 0.2166, "step": 50572 }, { "epoch": 0.9389097849051139, "grad_norm": 0.27651655673980713, "learning_rate": 1.835402593107638e-07, "loss": 0.3026, "step": 50574 }, { "epoch": 0.9389469150425326, "grad_norm": 0.6184964776039124, "learning_rate": 1.8331786197962386e-07, "loss": 0.5048, "step": 50576 }, { "epoch": 0.9389840451799513, "grad_norm": 0.17431627213954926, "learning_rate": 1.8309559822412337e-07, "loss": 0.2446, "step": 50578 }, { "epoch": 0.9390211753173698, "grad_norm": 0.531634509563446, "learning_rate": 1.8287346804728768e-07, "loss": 0.396, "step": 50580 }, { "epoch": 0.9390583054547885, "grad_norm": 0.5510864853858948, "learning_rate": 1.826514714521377e-07, "loss": 0.2673, "step": 50582 }, { "epoch": 0.9390954355922071, "grad_norm": 0.27180781960487366, "learning_rate": 1.8242960844169434e-07, "loss": 0.175, "step": 50584 }, { "epoch": 0.9391325657296258, "grad_norm": 0.4251702129840851, "learning_rate": 1.822078790189774e-07, "loss": 0.2656, "step": 50586 }, { "epoch": 0.9391696958670444, "grad_norm": 0.35033923387527466, "learning_rate": 1.819862831870034e-07, "loss": 0.1701, "step": 50588 }, { "epoch": 0.939206826004463, "grad_norm": 0.36468085646629333, "learning_rate": 1.8176482094878767e-07, "loss": 0.2366, "step": 50590 }, { "epoch": 0.9392439561418817, "grad_norm": 0.3783363997936249, "learning_rate": 1.8154349230734446e-07, "loss": 0.4572, "step": 50592 }, { "epoch": 0.9392810862793003, "grad_norm": 0.6165767312049866, "learning_rate": 1.8132229726568473e-07, "loss": 0.3959, "step": 50594 }, { "epoch": 0.939318216416719, "grad_norm": 0.4167231023311615, "learning_rate": 1.8110123582681716e-07, "loss": 0.3769, "step": 50596 }, { "epoch": 0.9393553465541375, "grad_norm": 0.31226852536201477, "learning_rate": 1.8088030799375045e-07, "loss": 0.3572, "step": 50598 }, { "epoch": 0.9393924766915562, "grad_norm": 0.1898368000984192, "learning_rate": 1.806595137694922e-07, "loss": 0.2176, "step": 50600 }, { "epoch": 0.9394296068289749, "grad_norm": 0.49393194913864136, "learning_rate": 1.8043885315704445e-07, "loss": 0.336, "step": 50602 }, { "epoch": 0.9394667369663935, "grad_norm": 0.3919266164302826, "learning_rate": 1.802183261594115e-07, "loss": 0.2408, "step": 50604 }, { "epoch": 0.9395038671038122, "grad_norm": 0.3320578634738922, "learning_rate": 1.799979327795942e-07, "loss": 0.1498, "step": 50606 }, { "epoch": 0.9395409972412307, "grad_norm": 0.41016069054603577, "learning_rate": 1.7977767302059025e-07, "loss": 0.2311, "step": 50608 }, { "epoch": 0.9395781273786494, "grad_norm": 0.3183784484863281, "learning_rate": 1.7955754688539718e-07, "loss": 0.2397, "step": 50610 }, { "epoch": 0.9396152575160681, "grad_norm": 0.33483946323394775, "learning_rate": 1.793375543770104e-07, "loss": 0.2633, "step": 50612 }, { "epoch": 0.9396523876534867, "grad_norm": 0.41786736249923706, "learning_rate": 1.7911769549842307e-07, "loss": 0.5234, "step": 50614 }, { "epoch": 0.9396895177909054, "grad_norm": 0.3217369616031647, "learning_rate": 1.788979702526261e-07, "loss": 0.4471, "step": 50616 }, { "epoch": 0.9397266479283239, "grad_norm": 0.37516579031944275, "learning_rate": 1.786783786426116e-07, "loss": 0.3092, "step": 50618 }, { "epoch": 0.9397637780657426, "grad_norm": 0.36329740285873413, "learning_rate": 1.7845892067136604e-07, "loss": 0.2399, "step": 50620 }, { "epoch": 0.9398009082031613, "grad_norm": 0.40958651900291443, "learning_rate": 1.782395963418748e-07, "loss": 0.2678, "step": 50622 }, { "epoch": 0.9398380383405799, "grad_norm": 0.476159930229187, "learning_rate": 1.780204056571222e-07, "loss": 0.2439, "step": 50624 }, { "epoch": 0.9398751684779986, "grad_norm": 0.6685574054718018, "learning_rate": 1.7780134862009356e-07, "loss": 0.2646, "step": 50626 }, { "epoch": 0.9399122986154171, "grad_norm": 0.42609867453575134, "learning_rate": 1.7758242523376658e-07, "loss": 0.3007, "step": 50628 }, { "epoch": 0.9399494287528358, "grad_norm": 0.44763118028640747, "learning_rate": 1.7736363550112213e-07, "loss": 0.2261, "step": 50630 }, { "epoch": 0.9399865588902544, "grad_norm": 0.4175860583782196, "learning_rate": 1.7714497942513565e-07, "loss": 0.4953, "step": 50632 }, { "epoch": 0.9400236890276731, "grad_norm": 0.37613430619239807, "learning_rate": 1.7692645700878365e-07, "loss": 0.38, "step": 50634 }, { "epoch": 0.9400608191650918, "grad_norm": 0.5618124604225159, "learning_rate": 1.7670806825503816e-07, "loss": 0.2512, "step": 50636 }, { "epoch": 0.9400979493025103, "grad_norm": 0.35358425974845886, "learning_rate": 1.7648981316687242e-07, "loss": 0.1269, "step": 50638 }, { "epoch": 0.940135079439929, "grad_norm": 0.3483954966068268, "learning_rate": 1.762716917472551e-07, "loss": 0.2341, "step": 50640 }, { "epoch": 0.9401722095773476, "grad_norm": 0.26897183060646057, "learning_rate": 1.76053703999155e-07, "loss": 0.2906, "step": 50642 }, { "epoch": 0.9402093397147663, "grad_norm": 0.33310216665267944, "learning_rate": 1.758358499255375e-07, "loss": 0.3203, "step": 50644 }, { "epoch": 0.940246469852185, "grad_norm": 0.3106984794139862, "learning_rate": 1.7561812952936795e-07, "loss": 0.0731, "step": 50646 }, { "epoch": 0.9402835999896035, "grad_norm": 0.42390069365501404, "learning_rate": 1.7540054281360742e-07, "loss": 0.3323, "step": 50648 }, { "epoch": 0.9403207301270222, "grad_norm": 0.5804970264434814, "learning_rate": 1.751830897812179e-07, "loss": 0.2338, "step": 50650 }, { "epoch": 0.9403578602644408, "grad_norm": 0.39209285378456116, "learning_rate": 1.7496577043515817e-07, "loss": 0.4027, "step": 50652 }, { "epoch": 0.9403949904018595, "grad_norm": 0.2924545705318451, "learning_rate": 1.7474858477838363e-07, "loss": 0.1227, "step": 50654 }, { "epoch": 0.9404321205392782, "grad_norm": 0.3210468590259552, "learning_rate": 1.7453153281385304e-07, "loss": 0.3435, "step": 50656 }, { "epoch": 0.9404692506766967, "grad_norm": 0.38592904806137085, "learning_rate": 1.7431461454451514e-07, "loss": 0.1794, "step": 50658 }, { "epoch": 0.9405063808141154, "grad_norm": 0.5031899809837341, "learning_rate": 1.7409782997332536e-07, "loss": 0.2234, "step": 50660 }, { "epoch": 0.940543510951534, "grad_norm": 0.4274146854877472, "learning_rate": 1.7388117910323133e-07, "loss": 0.1912, "step": 50662 }, { "epoch": 0.9405806410889527, "grad_norm": 0.9583744406700134, "learning_rate": 1.736646619371818e-07, "loss": 0.1072, "step": 50664 }, { "epoch": 0.9406177712263714, "grad_norm": 0.32100996375083923, "learning_rate": 1.734482784781233e-07, "loss": 0.3274, "step": 50666 }, { "epoch": 0.9406549013637899, "grad_norm": 0.34991973638534546, "learning_rate": 1.7323202872900126e-07, "loss": 0.3383, "step": 50668 }, { "epoch": 0.9406920315012086, "grad_norm": 0.4815565049648285, "learning_rate": 1.730159126927544e-07, "loss": 0.1787, "step": 50670 }, { "epoch": 0.9407291616386272, "grad_norm": 0.27533143758773804, "learning_rate": 1.7279993037232712e-07, "loss": 0.2097, "step": 50672 }, { "epoch": 0.9407662917760459, "grad_norm": 0.26766160130500793, "learning_rate": 1.72584081770657e-07, "loss": 0.286, "step": 50674 }, { "epoch": 0.9408034219134646, "grad_norm": 0.282402902841568, "learning_rate": 1.7236836689068058e-07, "loss": 0.2247, "step": 50676 }, { "epoch": 0.9408405520508831, "grad_norm": 0.523047685623169, "learning_rate": 1.7215278573533334e-07, "loss": 0.228, "step": 50678 }, { "epoch": 0.9408776821883018, "grad_norm": 0.24758701026439667, "learning_rate": 1.7193733830754845e-07, "loss": 0.0928, "step": 50680 }, { "epoch": 0.9409148123257204, "grad_norm": 0.5844394564628601, "learning_rate": 1.7172202461025912e-07, "loss": 0.1938, "step": 50682 }, { "epoch": 0.9409519424631391, "grad_norm": 0.7414089441299438, "learning_rate": 1.71506844646393e-07, "loss": 0.1886, "step": 50684 }, { "epoch": 0.9409890726005576, "grad_norm": 0.3517048954963684, "learning_rate": 1.712917984188789e-07, "loss": 0.1523, "step": 50686 }, { "epoch": 0.9410262027379763, "grad_norm": 0.3593765199184418, "learning_rate": 1.710768859306433e-07, "loss": 0.297, "step": 50688 }, { "epoch": 0.941063332875395, "grad_norm": 0.547726035118103, "learning_rate": 1.708621071846106e-07, "loss": 0.1596, "step": 50690 }, { "epoch": 0.9411004630128136, "grad_norm": 0.3129470944404602, "learning_rate": 1.7064746218370288e-07, "loss": 0.2858, "step": 50692 }, { "epoch": 0.9411375931502323, "grad_norm": 0.4534943997859955, "learning_rate": 1.7043295093084112e-07, "loss": 0.263, "step": 50694 }, { "epoch": 0.9411747232876508, "grad_norm": 0.4176694452762604, "learning_rate": 1.702185734289441e-07, "loss": 0.3454, "step": 50696 }, { "epoch": 0.9412118534250695, "grad_norm": 0.705500602722168, "learning_rate": 1.700043296809273e-07, "loss": 0.1812, "step": 50698 }, { "epoch": 0.9412489835624882, "grad_norm": 0.28096455335617065, "learning_rate": 1.6979021968970832e-07, "loss": 0.2047, "step": 50700 }, { "epoch": 0.9412861136999068, "grad_norm": 0.2851487398147583, "learning_rate": 1.6957624345819934e-07, "loss": 0.3721, "step": 50702 }, { "epoch": 0.9413232438373255, "grad_norm": 0.3478700518608093, "learning_rate": 1.6936240098931135e-07, "loss": 0.3118, "step": 50704 }, { "epoch": 0.941360373974744, "grad_norm": 0.35653167963027954, "learning_rate": 1.6914869228595644e-07, "loss": 0.2814, "step": 50706 }, { "epoch": 0.9413975041121627, "grad_norm": 0.3799697756767273, "learning_rate": 1.6893511735104007e-07, "loss": 0.2944, "step": 50708 }, { "epoch": 0.9414346342495814, "grad_norm": 0.42187803983688354, "learning_rate": 1.6872167618746992e-07, "loss": 0.3894, "step": 50710 }, { "epoch": 0.941471764387, "grad_norm": 0.31701526045799255, "learning_rate": 1.6850836879814925e-07, "loss": 0.1939, "step": 50712 }, { "epoch": 0.9415088945244187, "grad_norm": 0.41651391983032227, "learning_rate": 1.6829519518598014e-07, "loss": 0.4861, "step": 50714 }, { "epoch": 0.9415460246618372, "grad_norm": 0.3945968747138977, "learning_rate": 1.6808215535386584e-07, "loss": 0.0756, "step": 50716 }, { "epoch": 0.9415831547992559, "grad_norm": 0.48246362805366516, "learning_rate": 1.6786924930470293e-07, "loss": 0.1649, "step": 50718 }, { "epoch": 0.9416202849366746, "grad_norm": 0.508870542049408, "learning_rate": 1.6765647704138909e-07, "loss": 0.262, "step": 50720 }, { "epoch": 0.9416574150740932, "grad_norm": 0.4178345501422882, "learning_rate": 1.6744383856681867e-07, "loss": 0.2277, "step": 50722 }, { "epoch": 0.9416945452115119, "grad_norm": 0.6024828553199768, "learning_rate": 1.672313338838849e-07, "loss": 0.3844, "step": 50724 }, { "epoch": 0.9417316753489304, "grad_norm": 0.47281378507614136, "learning_rate": 1.6701896299548105e-07, "loss": 0.2748, "step": 50726 }, { "epoch": 0.9417688054863491, "grad_norm": 0.9686122536659241, "learning_rate": 1.6680672590449588e-07, "loss": 0.2208, "step": 50728 }, { "epoch": 0.9418059356237678, "grad_norm": 0.42115071415901184, "learning_rate": 1.6659462261381709e-07, "loss": 0.3366, "step": 50730 }, { "epoch": 0.9418430657611864, "grad_norm": 0.4273863732814789, "learning_rate": 1.6638265312633129e-07, "loss": 0.2523, "step": 50732 }, { "epoch": 0.941880195898605, "grad_norm": 0.4353385865688324, "learning_rate": 1.661708174449228e-07, "loss": 0.1577, "step": 50734 }, { "epoch": 0.9419173260360236, "grad_norm": 0.4323591887950897, "learning_rate": 1.6595911557247267e-07, "loss": 0.2597, "step": 50736 }, { "epoch": 0.9419544561734423, "grad_norm": 0.31123489141464233, "learning_rate": 1.6574754751186416e-07, "loss": 0.3125, "step": 50738 }, { "epoch": 0.9419915863108609, "grad_norm": 0.29007601737976074, "learning_rate": 1.6553611326597384e-07, "loss": 0.1705, "step": 50740 }, { "epoch": 0.9420287164482796, "grad_norm": 0.28883588314056396, "learning_rate": 1.6532481283767833e-07, "loss": 0.2308, "step": 50742 }, { "epoch": 0.9420658465856983, "grad_norm": 0.4793853461742401, "learning_rate": 1.6511364622985527e-07, "loss": 0.214, "step": 50744 }, { "epoch": 0.9421029767231168, "grad_norm": 0.5205013751983643, "learning_rate": 1.6490261344537684e-07, "loss": 0.2724, "step": 50746 }, { "epoch": 0.9421401068605355, "grad_norm": 0.4871504604816437, "learning_rate": 1.6469171448711298e-07, "loss": 0.1824, "step": 50748 }, { "epoch": 0.9421772369979541, "grad_norm": 0.2478114813566208, "learning_rate": 1.6448094935793469e-07, "loss": 0.2894, "step": 50750 }, { "epoch": 0.9422143671353728, "grad_norm": 0.3564758598804474, "learning_rate": 1.642703180607108e-07, "loss": 0.2557, "step": 50752 }, { "epoch": 0.9422514972727914, "grad_norm": 0.34629225730895996, "learning_rate": 1.6405982059830573e-07, "loss": 0.1682, "step": 50754 }, { "epoch": 0.94228862741021, "grad_norm": 0.3364121913909912, "learning_rate": 1.63849456973586e-07, "loss": 0.2919, "step": 50756 }, { "epoch": 0.9423257575476287, "grad_norm": 0.5090534687042236, "learning_rate": 1.636392271894105e-07, "loss": 0.1951, "step": 50758 }, { "epoch": 0.9423628876850473, "grad_norm": 0.47575676441192627, "learning_rate": 1.6342913124864356e-07, "loss": 0.321, "step": 50760 }, { "epoch": 0.942400017822466, "grad_norm": 0.6369500756263733, "learning_rate": 1.6321916915414182e-07, "loss": 0.1285, "step": 50762 }, { "epoch": 0.9424371479598846, "grad_norm": 0.38148143887519836, "learning_rate": 1.6300934090876187e-07, "loss": 0.2228, "step": 50764 }, { "epoch": 0.9424742780973032, "grad_norm": 0.5819621682167053, "learning_rate": 1.6279964651535919e-07, "loss": 0.4027, "step": 50766 }, { "epoch": 0.9425114082347219, "grad_norm": 0.2398240864276886, "learning_rate": 1.6259008597678817e-07, "loss": 0.1611, "step": 50768 }, { "epoch": 0.9425485383721405, "grad_norm": 0.31871211528778076, "learning_rate": 1.6238065929589985e-07, "loss": 0.0933, "step": 50770 }, { "epoch": 0.9425856685095592, "grad_norm": 0.3966899812221527, "learning_rate": 1.621713664755431e-07, "loss": 0.3231, "step": 50772 }, { "epoch": 0.9426227986469778, "grad_norm": 0.366228848695755, "learning_rate": 1.6196220751856674e-07, "loss": 0.2633, "step": 50774 }, { "epoch": 0.9426599287843964, "grad_norm": 0.18636897206306458, "learning_rate": 1.6175318242781623e-07, "loss": 0.1736, "step": 50776 }, { "epoch": 0.9426970589218151, "grad_norm": 0.49861106276512146, "learning_rate": 1.6154429120613712e-07, "loss": 0.2072, "step": 50778 }, { "epoch": 0.9427341890592337, "grad_norm": 0.3963800370693207, "learning_rate": 1.6133553385636935e-07, "loss": 0.3982, "step": 50780 }, { "epoch": 0.9427713191966524, "grad_norm": 1.3076131343841553, "learning_rate": 1.611269103813551e-07, "loss": 0.3215, "step": 50782 }, { "epoch": 0.9428084493340709, "grad_norm": 0.4988119900226593, "learning_rate": 1.6091842078393204e-07, "loss": 0.267, "step": 50784 }, { "epoch": 0.9428455794714896, "grad_norm": 0.33033621311187744, "learning_rate": 1.607100650669391e-07, "loss": 0.2916, "step": 50786 }, { "epoch": 0.9428827096089083, "grad_norm": 0.40125006437301636, "learning_rate": 1.605018432332084e-07, "loss": 0.2322, "step": 50788 }, { "epoch": 0.9429198397463269, "grad_norm": 0.34340158104896545, "learning_rate": 1.6029375528557544e-07, "loss": 0.3728, "step": 50790 }, { "epoch": 0.9429569698837456, "grad_norm": 0.3459474742412567, "learning_rate": 1.6008580122687244e-07, "loss": 0.3021, "step": 50792 }, { "epoch": 0.9429941000211641, "grad_norm": 0.4168727695941925, "learning_rate": 1.5987798105992714e-07, "loss": 0.2049, "step": 50794 }, { "epoch": 0.9430312301585828, "grad_norm": 0.37913772463798523, "learning_rate": 1.5967029478756724e-07, "loss": 0.4085, "step": 50796 }, { "epoch": 0.9430683602960015, "grad_norm": 0.3915462791919708, "learning_rate": 1.594627424126194e-07, "loss": 0.2323, "step": 50798 }, { "epoch": 0.9431054904334201, "grad_norm": 0.4993734061717987, "learning_rate": 1.592553239379091e-07, "loss": 0.3659, "step": 50800 }, { "epoch": 0.9431426205708388, "grad_norm": 1.349609136581421, "learning_rate": 1.5904803936625635e-07, "loss": 0.1961, "step": 50802 }, { "epoch": 0.9431797507082573, "grad_norm": 0.3478587567806244, "learning_rate": 1.588408887004833e-07, "loss": 0.4316, "step": 50804 }, { "epoch": 0.943216880845676, "grad_norm": 0.34329503774642944, "learning_rate": 1.5863387194340663e-07, "loss": 0.2534, "step": 50806 }, { "epoch": 0.9432540109830947, "grad_norm": 0.3742624819278717, "learning_rate": 1.5842698909784737e-07, "loss": 0.0962, "step": 50808 }, { "epoch": 0.9432911411205133, "grad_norm": 0.3517443537712097, "learning_rate": 1.5822024016661553e-07, "loss": 0.285, "step": 50810 }, { "epoch": 0.943328271257932, "grad_norm": 0.36728939414024353, "learning_rate": 1.5801362515252773e-07, "loss": 0.252, "step": 50812 }, { "epoch": 0.9433654013953505, "grad_norm": 0.456514835357666, "learning_rate": 1.578071440583928e-07, "loss": 0.2715, "step": 50814 }, { "epoch": 0.9434025315327692, "grad_norm": 0.5580490827560425, "learning_rate": 1.57600796887023e-07, "loss": 0.3401, "step": 50816 }, { "epoch": 0.9434396616701879, "grad_norm": 0.43877464532852173, "learning_rate": 1.5739458364122494e-07, "loss": 0.0728, "step": 50818 }, { "epoch": 0.9434767918076065, "grad_norm": 0.258999228477478, "learning_rate": 1.5718850432380528e-07, "loss": 0.3801, "step": 50820 }, { "epoch": 0.9435139219450251, "grad_norm": 0.38710400462150574, "learning_rate": 1.569825589375673e-07, "loss": 0.0745, "step": 50822 }, { "epoch": 0.9435510520824437, "grad_norm": 0.4101353883743286, "learning_rate": 1.5677674748531323e-07, "loss": 0.1797, "step": 50824 }, { "epoch": 0.9435881822198624, "grad_norm": 0.34796273708343506, "learning_rate": 1.5657106996984305e-07, "loss": 0.2001, "step": 50826 }, { "epoch": 0.9436253123572811, "grad_norm": 0.5266385078430176, "learning_rate": 1.5636552639395676e-07, "loss": 0.2376, "step": 50828 }, { "epoch": 0.9436624424946997, "grad_norm": 0.2701573967933655, "learning_rate": 1.5616011676044984e-07, "loss": 0.3514, "step": 50830 }, { "epoch": 0.9436995726321183, "grad_norm": 0.376899391412735, "learning_rate": 1.55954841072119e-07, "loss": 0.2988, "step": 50832 }, { "epoch": 0.9437367027695369, "grad_norm": 0.46510744094848633, "learning_rate": 1.5574969933175532e-07, "loss": 0.1707, "step": 50834 }, { "epoch": 0.9437738329069556, "grad_norm": 0.392042338848114, "learning_rate": 1.5554469154215212e-07, "loss": 0.4621, "step": 50836 }, { "epoch": 0.9438109630443742, "grad_norm": 0.269295871257782, "learning_rate": 1.553398177060983e-07, "loss": 0.3467, "step": 50838 }, { "epoch": 0.9438480931817929, "grad_norm": 0.3690491318702698, "learning_rate": 1.551350778263805e-07, "loss": 0.0764, "step": 50840 }, { "epoch": 0.9438852233192115, "grad_norm": 0.33338361978530884, "learning_rate": 1.5493047190578647e-07, "loss": 0.2309, "step": 50842 }, { "epoch": 0.9439223534566301, "grad_norm": 0.4011947810649872, "learning_rate": 1.5472599994709848e-07, "loss": 0.145, "step": 50844 }, { "epoch": 0.9439594835940488, "grad_norm": 0.36638596653938293, "learning_rate": 1.5452166195310093e-07, "loss": 0.3649, "step": 50846 }, { "epoch": 0.9439966137314674, "grad_norm": 0.5089031457901001, "learning_rate": 1.543174579265716e-07, "loss": 0.5912, "step": 50848 }, { "epoch": 0.944033743868886, "grad_norm": 0.40648385882377625, "learning_rate": 1.541133878702905e-07, "loss": 0.3476, "step": 50850 }, { "epoch": 0.9440708740063047, "grad_norm": 0.3949213922023773, "learning_rate": 1.5390945178703543e-07, "loss": 0.2193, "step": 50852 }, { "epoch": 0.9441080041437233, "grad_norm": 0.36781901121139526, "learning_rate": 1.5370564967957857e-07, "loss": 0.3288, "step": 50854 }, { "epoch": 0.944145134281142, "grad_norm": 0.46677350997924805, "learning_rate": 1.535019815506955e-07, "loss": 0.2375, "step": 50856 }, { "epoch": 0.9441822644185606, "grad_norm": 0.3283880054950714, "learning_rate": 1.5329844740315736e-07, "loss": 0.2453, "step": 50858 }, { "epoch": 0.9442193945559793, "grad_norm": 0.3368111848831177, "learning_rate": 1.53095047239733e-07, "loss": 0.2168, "step": 50860 }, { "epoch": 0.9442565246933979, "grad_norm": 0.44965943694114685, "learning_rate": 1.5289178106319024e-07, "loss": 0.3098, "step": 50862 }, { "epoch": 0.9442936548308165, "grad_norm": 0.3287608027458191, "learning_rate": 1.5268864887629465e-07, "loss": 0.1679, "step": 50864 }, { "epoch": 0.9443307849682352, "grad_norm": 0.22254541516304016, "learning_rate": 1.5248565068180954e-07, "loss": 0.3215, "step": 50866 }, { "epoch": 0.9443679151056538, "grad_norm": 0.5609930157661438, "learning_rate": 1.5228278648249938e-07, "loss": 0.22, "step": 50868 }, { "epoch": 0.9444050452430724, "grad_norm": 0.36397600173950195, "learning_rate": 1.520800562811231e-07, "loss": 0.3221, "step": 50870 }, { "epoch": 0.9444421753804911, "grad_norm": 0.32637637853622437, "learning_rate": 1.5187746008043846e-07, "loss": 0.2578, "step": 50872 }, { "epoch": 0.9444793055179097, "grad_norm": 0.37783822417259216, "learning_rate": 1.5167499788320216e-07, "loss": 0.2866, "step": 50874 }, { "epoch": 0.9445164356553284, "grad_norm": 0.43859991431236267, "learning_rate": 1.51472669692172e-07, "loss": 0.2698, "step": 50876 }, { "epoch": 0.944553565792747, "grad_norm": 0.47613459825515747, "learning_rate": 1.51270475510098e-07, "loss": 0.3631, "step": 50878 }, { "epoch": 0.9445906959301656, "grad_norm": 0.3435044586658478, "learning_rate": 1.5106841533973238e-07, "loss": 0.1484, "step": 50880 }, { "epoch": 0.9446278260675843, "grad_norm": 0.3109498620033264, "learning_rate": 1.5086648918382407e-07, "loss": 0.2662, "step": 50882 }, { "epoch": 0.9446649562050029, "grad_norm": 0.21308717131614685, "learning_rate": 1.506646970451231e-07, "loss": 0.1752, "step": 50884 }, { "epoch": 0.9447020863424216, "grad_norm": 0.3168817460536957, "learning_rate": 1.5046303892637282e-07, "loss": 0.2611, "step": 50886 }, { "epoch": 0.9447392164798402, "grad_norm": 0.39375633001327515, "learning_rate": 1.502615148303177e-07, "loss": 0.2608, "step": 50888 }, { "epoch": 0.9447763466172588, "grad_norm": 0.4844721257686615, "learning_rate": 1.5006012475969888e-07, "loss": 0.2319, "step": 50890 }, { "epoch": 0.9448134767546774, "grad_norm": 0.28992506861686707, "learning_rate": 1.4985886871725862e-07, "loss": 0.1648, "step": 50892 }, { "epoch": 0.9448506068920961, "grad_norm": 0.3431810438632965, "learning_rate": 1.4965774670573473e-07, "loss": 0.4382, "step": 50894 }, { "epoch": 0.9448877370295148, "grad_norm": 0.42396751046180725, "learning_rate": 1.4945675872786392e-07, "loss": 0.3528, "step": 50896 }, { "epoch": 0.9449248671669334, "grad_norm": 0.1901555210351944, "learning_rate": 1.4925590478638064e-07, "loss": 0.2131, "step": 50898 }, { "epoch": 0.944961997304352, "grad_norm": 0.3956177532672882, "learning_rate": 1.4905518488401714e-07, "loss": 0.3167, "step": 50900 }, { "epoch": 0.9449991274417706, "grad_norm": 0.4629685580730438, "learning_rate": 1.4885459902350685e-07, "loss": 0.2148, "step": 50902 }, { "epoch": 0.9450362575791893, "grad_norm": 0.35121551156044006, "learning_rate": 1.4865414720757641e-07, "loss": 0.3034, "step": 50904 }, { "epoch": 0.945073387716608, "grad_norm": 0.21928615868091583, "learning_rate": 1.4845382943895704e-07, "loss": 0.3247, "step": 50906 }, { "epoch": 0.9451105178540266, "grad_norm": 0.2957574725151062, "learning_rate": 1.4825364572037093e-07, "loss": 0.1804, "step": 50908 }, { "epoch": 0.9451476479914452, "grad_norm": 0.22765344381332397, "learning_rate": 1.4805359605454373e-07, "loss": 0.183, "step": 50910 }, { "epoch": 0.9451847781288638, "grad_norm": 0.41308075189590454, "learning_rate": 1.4785368044419657e-07, "loss": 0.2705, "step": 50912 }, { "epoch": 0.9452219082662825, "grad_norm": 0.5689462423324585, "learning_rate": 1.476538988920506e-07, "loss": 0.2481, "step": 50914 }, { "epoch": 0.9452590384037012, "grad_norm": 0.6958836913108826, "learning_rate": 1.4745425140082258e-07, "loss": 0.2674, "step": 50916 }, { "epoch": 0.9452961685411198, "grad_norm": 0.30955561995506287, "learning_rate": 1.472547379732314e-07, "loss": 0.1922, "step": 50918 }, { "epoch": 0.9453332986785384, "grad_norm": 0.21716073155403137, "learning_rate": 1.4705535861199162e-07, "loss": 0.3639, "step": 50920 }, { "epoch": 0.945370428815957, "grad_norm": 0.2723653018474579, "learning_rate": 1.468561133198143e-07, "loss": 0.3251, "step": 50922 }, { "epoch": 0.9454075589533757, "grad_norm": 0.35107341408729553, "learning_rate": 1.4665700209941292e-07, "loss": 0.2167, "step": 50924 }, { "epoch": 0.9454446890907944, "grad_norm": 0.466427743434906, "learning_rate": 1.4645802495349414e-07, "loss": 0.2326, "step": 50926 }, { "epoch": 0.945481819228213, "grad_norm": 0.35235506296157837, "learning_rate": 1.4625918188476806e-07, "loss": 0.277, "step": 50928 }, { "epoch": 0.9455189493656316, "grad_norm": 0.24556496739387512, "learning_rate": 1.4606047289593805e-07, "loss": 0.2478, "step": 50930 }, { "epoch": 0.9455560795030502, "grad_norm": 0.41484537720680237, "learning_rate": 1.4586189798970973e-07, "loss": 0.2627, "step": 50932 }, { "epoch": 0.9455932096404689, "grad_norm": 0.530802845954895, "learning_rate": 1.4566345716878539e-07, "loss": 0.2982, "step": 50934 }, { "epoch": 0.9456303397778875, "grad_norm": 0.18776439130306244, "learning_rate": 1.4546515043586285e-07, "loss": 0.0632, "step": 50936 }, { "epoch": 0.9456674699153061, "grad_norm": 0.4547887444496155, "learning_rate": 1.4526697779364218e-07, "loss": 0.1809, "step": 50938 }, { "epoch": 0.9457046000527248, "grad_norm": 0.41720056533813477, "learning_rate": 1.4506893924481902e-07, "loss": 0.2367, "step": 50940 }, { "epoch": 0.9457417301901434, "grad_norm": 0.41276952624320984, "learning_rate": 1.4487103479208898e-07, "loss": 0.4067, "step": 50942 }, { "epoch": 0.9457788603275621, "grad_norm": 0.4630793631076813, "learning_rate": 1.4467326443814433e-07, "loss": 0.3856, "step": 50944 }, { "epoch": 0.9458159904649807, "grad_norm": 0.42008623480796814, "learning_rate": 1.444756281856774e-07, "loss": 0.4544, "step": 50946 }, { "epoch": 0.9458531206023993, "grad_norm": 0.4949977993965149, "learning_rate": 1.4427812603737601e-07, "loss": 0.2481, "step": 50948 }, { "epoch": 0.945890250739818, "grad_norm": 0.35529232025146484, "learning_rate": 1.4408075799592913e-07, "loss": 0.4289, "step": 50950 }, { "epoch": 0.9459273808772366, "grad_norm": 0.6116740703582764, "learning_rate": 1.4388352406401905e-07, "loss": 0.3819, "step": 50952 }, { "epoch": 0.9459645110146553, "grad_norm": 0.3929576277732849, "learning_rate": 1.4368642424433365e-07, "loss": 0.2431, "step": 50954 }, { "epoch": 0.9460016411520739, "grad_norm": 0.30607128143310547, "learning_rate": 1.4348945853955188e-07, "loss": 0.2749, "step": 50956 }, { "epoch": 0.9460387712894925, "grad_norm": 0.32680752873420715, "learning_rate": 1.432926269523549e-07, "loss": 0.2426, "step": 50958 }, { "epoch": 0.9460759014269112, "grad_norm": 0.3459329307079315, "learning_rate": 1.4309592948542172e-07, "loss": 0.1661, "step": 50960 }, { "epoch": 0.9461130315643298, "grad_norm": 0.23430697619915009, "learning_rate": 1.4289936614142686e-07, "loss": 0.1726, "step": 50962 }, { "epoch": 0.9461501617017485, "grad_norm": 0.5292536020278931, "learning_rate": 1.4270293692304704e-07, "loss": 0.3447, "step": 50964 }, { "epoch": 0.946187291839167, "grad_norm": 0.5701496005058289, "learning_rate": 1.4250664183295347e-07, "loss": 0.631, "step": 50966 }, { "epoch": 0.9462244219765857, "grad_norm": 0.31739240884780884, "learning_rate": 1.423104808738196e-07, "loss": 0.1677, "step": 50968 }, { "epoch": 0.9462615521140044, "grad_norm": 0.3869602084159851, "learning_rate": 1.4211445404831214e-07, "loss": 0.5141, "step": 50970 }, { "epoch": 0.946298682251423, "grad_norm": 0.3388206958770752, "learning_rate": 1.41918561359099e-07, "loss": 0.2797, "step": 50972 }, { "epoch": 0.9463358123888417, "grad_norm": 0.5334675312042236, "learning_rate": 1.4172280280884576e-07, "loss": 0.3169, "step": 50974 }, { "epoch": 0.9463729425262603, "grad_norm": 0.361398845911026, "learning_rate": 1.415271784002159e-07, "loss": 0.2522, "step": 50976 }, { "epoch": 0.9464100726636789, "grad_norm": 0.5255299806594849, "learning_rate": 1.4133168813587173e-07, "loss": 0.1057, "step": 50978 }, { "epoch": 0.9464472028010976, "grad_norm": 0.42784133553504944, "learning_rate": 1.4113633201847333e-07, "loss": 0.5189, "step": 50980 }, { "epoch": 0.9464843329385162, "grad_norm": 0.33455702662467957, "learning_rate": 1.4094111005067856e-07, "loss": 0.3069, "step": 50982 }, { "epoch": 0.9465214630759349, "grad_norm": 0.5092288255691528, "learning_rate": 1.4074602223514422e-07, "loss": 0.3683, "step": 50984 }, { "epoch": 0.9465585932133534, "grad_norm": 0.4560586214065552, "learning_rate": 1.4055106857452483e-07, "loss": 0.3923, "step": 50986 }, { "epoch": 0.9465957233507721, "grad_norm": 0.4736967086791992, "learning_rate": 1.403562490714716e-07, "loss": 0.2505, "step": 50988 }, { "epoch": 0.9466328534881907, "grad_norm": 0.564443826675415, "learning_rate": 1.4016156372863798e-07, "loss": 0.2412, "step": 50990 }, { "epoch": 0.9466699836256094, "grad_norm": 0.4422684609889984, "learning_rate": 1.3996701254867185e-07, "loss": 0.395, "step": 50992 }, { "epoch": 0.9467071137630281, "grad_norm": 0.4116092324256897, "learning_rate": 1.3977259553422106e-07, "loss": 0.3189, "step": 50994 }, { "epoch": 0.9467442439004466, "grad_norm": 0.31543976068496704, "learning_rate": 1.395783126879302e-07, "loss": 0.4607, "step": 50996 }, { "epoch": 0.9467813740378653, "grad_norm": 0.2742465138435364, "learning_rate": 1.393841640124416e-07, "loss": 0.2619, "step": 50998 }, { "epoch": 0.9468185041752839, "grad_norm": 0.4801630973815918, "learning_rate": 1.3919014951039977e-07, "loss": 0.2777, "step": 51000 }, { "epoch": 0.9468556343127026, "grad_norm": 0.398260235786438, "learning_rate": 1.3899626918444265e-07, "loss": 0.1127, "step": 51002 }, { "epoch": 0.9468927644501213, "grad_norm": 0.5946736931800842, "learning_rate": 1.388025230372103e-07, "loss": 0.2116, "step": 51004 }, { "epoch": 0.9469298945875398, "grad_norm": 0.5703896880149841, "learning_rate": 1.3860891107133734e-07, "loss": 0.3117, "step": 51006 }, { "epoch": 0.9469670247249585, "grad_norm": 0.3947688341140747, "learning_rate": 1.3841543328945828e-07, "loss": 0.2346, "step": 51008 }, { "epoch": 0.9470041548623771, "grad_norm": 0.30989542603492737, "learning_rate": 1.382220896942077e-07, "loss": 0.2145, "step": 51010 }, { "epoch": 0.9470412849997958, "grad_norm": 1.6001261472702026, "learning_rate": 1.3802888028821348e-07, "loss": 0.3382, "step": 51012 }, { "epoch": 0.9470784151372145, "grad_norm": 0.5119905471801758, "learning_rate": 1.37835805074108e-07, "loss": 0.2928, "step": 51014 }, { "epoch": 0.947115545274633, "grad_norm": 0.2686319351196289, "learning_rate": 1.3764286405451578e-07, "loss": 0.1819, "step": 51016 }, { "epoch": 0.9471526754120517, "grad_norm": 0.421956330537796, "learning_rate": 1.3745005723206251e-07, "loss": 0.2522, "step": 51018 }, { "epoch": 0.9471898055494703, "grad_norm": 0.170281320810318, "learning_rate": 1.3725738460937166e-07, "loss": 0.1865, "step": 51020 }, { "epoch": 0.947226935686889, "grad_norm": 0.48961326479911804, "learning_rate": 1.370648461890667e-07, "loss": 0.2206, "step": 51022 }, { "epoch": 0.9472640658243077, "grad_norm": 0.22081826627254486, "learning_rate": 1.3687244197376548e-07, "loss": 0.248, "step": 51024 }, { "epoch": 0.9473011959617262, "grad_norm": 0.26116743683815, "learning_rate": 1.3668017196608707e-07, "loss": 0.1911, "step": 51026 }, { "epoch": 0.9473383260991449, "grad_norm": 0.4485781192779541, "learning_rate": 1.3648803616864825e-07, "loss": 0.2728, "step": 51028 }, { "epoch": 0.9473754562365635, "grad_norm": 0.3974182903766632, "learning_rate": 1.362960345840614e-07, "loss": 0.1765, "step": 51030 }, { "epoch": 0.9474125863739822, "grad_norm": 0.32474008202552795, "learning_rate": 1.3610416721494103e-07, "loss": 0.3082, "step": 51032 }, { "epoch": 0.9474497165114009, "grad_norm": 0.4679239094257355, "learning_rate": 1.3591243406389733e-07, "loss": 0.2614, "step": 51034 }, { "epoch": 0.9474868466488194, "grad_norm": 0.5621490478515625, "learning_rate": 1.3572083513353928e-07, "loss": 0.3231, "step": 51036 }, { "epoch": 0.9475239767862381, "grad_norm": 0.6725314259529114, "learning_rate": 1.3552937042647263e-07, "loss": 0.2987, "step": 51038 }, { "epoch": 0.9475611069236567, "grad_norm": 0.2390737235546112, "learning_rate": 1.3533803994530415e-07, "loss": 0.2607, "step": 51040 }, { "epoch": 0.9475982370610754, "grad_norm": 0.32657134532928467, "learning_rate": 1.351468436926362e-07, "loss": 0.141, "step": 51042 }, { "epoch": 0.947635367198494, "grad_norm": 0.37334662675857544, "learning_rate": 1.3495578167107226e-07, "loss": 0.3399, "step": 51044 }, { "epoch": 0.9476724973359126, "grad_norm": 0.31984302401542664, "learning_rate": 1.3476485388321136e-07, "loss": 0.3492, "step": 51046 }, { "epoch": 0.9477096274733313, "grad_norm": 0.3173534870147705, "learning_rate": 1.3457406033164922e-07, "loss": 0.3486, "step": 51048 }, { "epoch": 0.9477467576107499, "grad_norm": 0.30752885341644287, "learning_rate": 1.3438340101898483e-07, "loss": 0.2684, "step": 51050 }, { "epoch": 0.9477838877481686, "grad_norm": 0.4655018150806427, "learning_rate": 1.341928759478106e-07, "loss": 0.1938, "step": 51052 }, { "epoch": 0.9478210178855871, "grad_norm": 0.40786081552505493, "learning_rate": 1.3400248512072112e-07, "loss": 0.442, "step": 51054 }, { "epoch": 0.9478581480230058, "grad_norm": 0.6336261630058289, "learning_rate": 1.338122285403054e-07, "loss": 0.3988, "step": 51056 }, { "epoch": 0.9478952781604245, "grad_norm": 0.32837778329849243, "learning_rate": 1.336221062091514e-07, "loss": 0.1753, "step": 51058 }, { "epoch": 0.9479324082978431, "grad_norm": 0.5618468523025513, "learning_rate": 1.3343211812984923e-07, "loss": 0.325, "step": 51060 }, { "epoch": 0.9479695384352618, "grad_norm": 0.3489518463611603, "learning_rate": 1.3324226430498022e-07, "loss": 0.1879, "step": 51062 }, { "epoch": 0.9480066685726803, "grad_norm": 0.21018348634243011, "learning_rate": 1.3305254473713003e-07, "loss": 0.1693, "step": 51064 }, { "epoch": 0.948043798710099, "grad_norm": 0.8977999091148376, "learning_rate": 1.3286295942887994e-07, "loss": 0.3582, "step": 51066 }, { "epoch": 0.9480809288475177, "grad_norm": 0.3504578471183777, "learning_rate": 1.326735083828101e-07, "loss": 0.1833, "step": 51068 }, { "epoch": 0.9481180589849363, "grad_norm": 0.7570350170135498, "learning_rate": 1.3248419160149738e-07, "loss": 0.2474, "step": 51070 }, { "epoch": 0.948155189122355, "grad_norm": 0.482299268245697, "learning_rate": 1.3229500908751858e-07, "loss": 0.2272, "step": 51072 }, { "epoch": 0.9481923192597735, "grad_norm": 0.5501895546913147, "learning_rate": 1.3210596084344718e-07, "loss": 0.2972, "step": 51074 }, { "epoch": 0.9482294493971922, "grad_norm": 0.5036604404449463, "learning_rate": 1.3191704687185558e-07, "loss": 0.2989, "step": 51076 }, { "epoch": 0.9482665795346109, "grad_norm": 0.36826324462890625, "learning_rate": 1.3172826717531506e-07, "loss": 0.2826, "step": 51078 }, { "epoch": 0.9483037096720295, "grad_norm": 0.40228116512298584, "learning_rate": 1.3153962175639357e-07, "loss": 0.4322, "step": 51080 }, { "epoch": 0.9483408398094482, "grad_norm": 0.6288641691207886, "learning_rate": 1.3135111061765793e-07, "loss": 0.3082, "step": 51082 }, { "epoch": 0.9483779699468667, "grad_norm": 0.45098572969436646, "learning_rate": 1.3116273376167498e-07, "loss": 0.3693, "step": 51084 }, { "epoch": 0.9484151000842854, "grad_norm": 0.32466498017311096, "learning_rate": 1.3097449119100604e-07, "loss": 0.4027, "step": 51086 }, { "epoch": 0.948452230221704, "grad_norm": 0.471682071685791, "learning_rate": 1.3078638290821234e-07, "loss": 0.3085, "step": 51088 }, { "epoch": 0.9484893603591227, "grad_norm": 0.3796114921569824, "learning_rate": 1.3059840891585518e-07, "loss": 0.1774, "step": 51090 }, { "epoch": 0.9485264904965414, "grad_norm": 0.1632833182811737, "learning_rate": 1.3041056921649142e-07, "loss": 0.1883, "step": 51092 }, { "epoch": 0.9485636206339599, "grad_norm": 0.48637375235557556, "learning_rate": 1.3022286381267567e-07, "loss": 0.3603, "step": 51094 }, { "epoch": 0.9486007507713786, "grad_norm": 0.322948694229126, "learning_rate": 1.300352927069648e-07, "loss": 0.2058, "step": 51096 }, { "epoch": 0.9486378809087972, "grad_norm": 0.4468685984611511, "learning_rate": 1.2984785590190897e-07, "loss": 0.2485, "step": 51098 }, { "epoch": 0.9486750110462159, "grad_norm": 0.3279907703399658, "learning_rate": 1.2966055340006057e-07, "loss": 0.1749, "step": 51100 }, { "epoch": 0.9487121411836346, "grad_norm": 0.4309535622596741, "learning_rate": 1.2947338520396535e-07, "loss": 0.2039, "step": 51102 }, { "epoch": 0.9487492713210531, "grad_norm": 0.35730135440826416, "learning_rate": 1.292863513161724e-07, "loss": 0.4053, "step": 51104 }, { "epoch": 0.9487864014584718, "grad_norm": 0.40997830033302307, "learning_rate": 1.2909945173922522e-07, "loss": 0.2538, "step": 51106 }, { "epoch": 0.9488235315958904, "grad_norm": 0.39124903082847595, "learning_rate": 1.2891268647566846e-07, "loss": 0.2043, "step": 51108 }, { "epoch": 0.9488606617333091, "grad_norm": 0.38807371258735657, "learning_rate": 1.287260555280434e-07, "loss": 0.1858, "step": 51110 }, { "epoch": 0.9488977918707278, "grad_norm": 0.29328930377960205, "learning_rate": 1.2853955889888804e-07, "loss": 0.0799, "step": 51112 }, { "epoch": 0.9489349220081463, "grad_norm": 0.3937098979949951, "learning_rate": 1.283531965907403e-07, "loss": 0.2762, "step": 51114 }, { "epoch": 0.948972052145565, "grad_norm": 0.4479004144668579, "learning_rate": 1.281669686061371e-07, "loss": 0.2098, "step": 51116 }, { "epoch": 0.9490091822829836, "grad_norm": 0.4203970432281494, "learning_rate": 1.2798087494761192e-07, "loss": 0.3501, "step": 51118 }, { "epoch": 0.9490463124204023, "grad_norm": 0.3023132085800171, "learning_rate": 1.2779491561769719e-07, "loss": 0.2805, "step": 51120 }, { "epoch": 0.949083442557821, "grad_norm": 0.4120701849460602, "learning_rate": 1.2760909061892313e-07, "loss": 0.334, "step": 51122 }, { "epoch": 0.9491205726952395, "grad_norm": 0.38511180877685547, "learning_rate": 1.2742339995381768e-07, "loss": 0.3025, "step": 51124 }, { "epoch": 0.9491577028326582, "grad_norm": 0.39255228638648987, "learning_rate": 1.2723784362490777e-07, "loss": 0.257, "step": 51126 }, { "epoch": 0.9491948329700768, "grad_norm": 0.5008650422096252, "learning_rate": 1.2705242163471798e-07, "loss": 0.3305, "step": 51128 }, { "epoch": 0.9492319631074955, "grad_norm": 0.3733157217502594, "learning_rate": 1.268671339857719e-07, "loss": 0.0745, "step": 51130 }, { "epoch": 0.9492690932449142, "grad_norm": 0.40562862157821655, "learning_rate": 1.266819806805908e-07, "loss": 0.19, "step": 51132 }, { "epoch": 0.9493062233823327, "grad_norm": 0.47175151109695435, "learning_rate": 1.264969617216949e-07, "loss": 0.2528, "step": 51134 }, { "epoch": 0.9493433535197514, "grad_norm": 0.31468597054481506, "learning_rate": 1.263120771116011e-07, "loss": 0.1944, "step": 51136 }, { "epoch": 0.94938048365717, "grad_norm": 0.584264874458313, "learning_rate": 1.2612732685282291e-07, "loss": 0.3335, "step": 51138 }, { "epoch": 0.9494176137945887, "grad_norm": 0.6311646699905396, "learning_rate": 1.2594271094787835e-07, "loss": 0.3943, "step": 51140 }, { "epoch": 0.9494547439320072, "grad_norm": 0.38294675946235657, "learning_rate": 1.257582293992754e-07, "loss": 0.3575, "step": 51142 }, { "epoch": 0.9494918740694259, "grad_norm": 0.4584772288799286, "learning_rate": 1.255738822095265e-07, "loss": 0.3486, "step": 51144 }, { "epoch": 0.9495290042068446, "grad_norm": 0.4281284213066101, "learning_rate": 1.2538966938114072e-07, "loss": 0.1817, "step": 51146 }, { "epoch": 0.9495661343442632, "grad_norm": 0.3020859360694885, "learning_rate": 1.2520559091662278e-07, "loss": 0.2531, "step": 51148 }, { "epoch": 0.9496032644816819, "grad_norm": 0.41157469153404236, "learning_rate": 1.250216468184784e-07, "loss": 0.2727, "step": 51150 }, { "epoch": 0.9496403946191004, "grad_norm": 0.5780795216560364, "learning_rate": 1.2483783708921116e-07, "loss": 0.2293, "step": 51152 }, { "epoch": 0.9496775247565191, "grad_norm": 0.2992190420627594, "learning_rate": 1.2465416173132017e-07, "loss": 0.21, "step": 51154 }, { "epoch": 0.9497146548939378, "grad_norm": 0.4864193797111511, "learning_rate": 1.2447062074730676e-07, "loss": 0.3428, "step": 51156 }, { "epoch": 0.9497517850313564, "grad_norm": 0.4156431555747986, "learning_rate": 1.2428721413966672e-07, "loss": 0.2909, "step": 51158 }, { "epoch": 0.9497889151687751, "grad_norm": 0.4218223989009857, "learning_rate": 1.2410394191089802e-07, "loss": 0.2617, "step": 51160 }, { "epoch": 0.9498260453061936, "grad_norm": 0.3566693663597107, "learning_rate": 1.2392080406349316e-07, "loss": 0.2557, "step": 51162 }, { "epoch": 0.9498631754436123, "grad_norm": 0.3107702136039734, "learning_rate": 1.2373780059994233e-07, "loss": 0.282, "step": 51164 }, { "epoch": 0.949900305581031, "grad_norm": 0.6074591279029846, "learning_rate": 1.23554931522738e-07, "loss": 0.3319, "step": 51166 }, { "epoch": 0.9499374357184496, "grad_norm": 0.5217092633247375, "learning_rate": 1.2337219683436708e-07, "loss": 0.3936, "step": 51168 }, { "epoch": 0.9499745658558683, "grad_norm": 0.39786359667778015, "learning_rate": 1.2318959653731643e-07, "loss": 0.2049, "step": 51170 }, { "epoch": 0.9500116959932868, "grad_norm": 0.39279642701148987, "learning_rate": 1.2300713063407076e-07, "loss": 0.4446, "step": 51172 }, { "epoch": 0.9500488261307055, "grad_norm": 0.48613661527633667, "learning_rate": 1.2282479912711477e-07, "loss": 0.3357, "step": 51174 }, { "epoch": 0.9500859562681242, "grad_norm": 0.3647420108318329, "learning_rate": 1.226426020189264e-07, "loss": 0.13, "step": 51176 }, { "epoch": 0.9501230864055428, "grad_norm": 0.3206399381160736, "learning_rate": 1.2246053931198597e-07, "loss": 0.1944, "step": 51178 }, { "epoch": 0.9501602165429615, "grad_norm": 0.3007342517375946, "learning_rate": 1.2227861100877147e-07, "loss": 0.2947, "step": 51180 }, { "epoch": 0.95019734668038, "grad_norm": 0.5943436622619629, "learning_rate": 1.220968171117576e-07, "loss": 0.25, "step": 51182 }, { "epoch": 0.9502344768177987, "grad_norm": 0.6613900661468506, "learning_rate": 1.2191515762341898e-07, "loss": 0.2655, "step": 51184 }, { "epoch": 0.9502716069552174, "grad_norm": 0.48037734627723694, "learning_rate": 1.2173363254622594e-07, "loss": 0.4709, "step": 51186 }, { "epoch": 0.950308737092636, "grad_norm": 0.3712351620197296, "learning_rate": 1.215522418826498e-07, "loss": 0.2338, "step": 51188 }, { "epoch": 0.9503458672300547, "grad_norm": 1.0520695447921753, "learning_rate": 1.2137098563515748e-07, "loss": 0.3828, "step": 51190 }, { "epoch": 0.9503829973674732, "grad_norm": 0.49460604786872864, "learning_rate": 1.21189863806217e-07, "loss": 0.2952, "step": 51192 }, { "epoch": 0.9504201275048919, "grad_norm": 0.24679668247699738, "learning_rate": 1.2100887639829196e-07, "loss": 0.3813, "step": 51194 }, { "epoch": 0.9504572576423105, "grad_norm": 0.6260696053504944, "learning_rate": 1.2082802341384482e-07, "loss": 0.3651, "step": 51196 }, { "epoch": 0.9504943877797292, "grad_norm": 0.4114972949028015, "learning_rate": 1.2064730485533694e-07, "loss": 0.4719, "step": 51198 }, { "epoch": 0.9505315179171479, "grad_norm": 0.5454961061477661, "learning_rate": 1.2046672072522636e-07, "loss": 0.1759, "step": 51200 }, { "epoch": 0.9505686480545664, "grad_norm": 0.2732129395008087, "learning_rate": 1.2028627102597223e-07, "loss": 0.211, "step": 51202 }, { "epoch": 0.9506057781919851, "grad_norm": 0.4664188325405121, "learning_rate": 1.2010595576002816e-07, "loss": 0.3029, "step": 51204 }, { "epoch": 0.9506429083294037, "grad_norm": 0.3517145812511444, "learning_rate": 1.199257749298488e-07, "loss": 0.1972, "step": 51206 }, { "epoch": 0.9506800384668224, "grad_norm": 0.44931256771087646, "learning_rate": 1.1974572853788446e-07, "loss": 0.381, "step": 51208 }, { "epoch": 0.950717168604241, "grad_norm": 0.422444224357605, "learning_rate": 1.1956581658658762e-07, "loss": 0.4643, "step": 51210 }, { "epoch": 0.9507542987416596, "grad_norm": 0.4548596143722534, "learning_rate": 1.1938603907840295e-07, "loss": 0.2382, "step": 51212 }, { "epoch": 0.9507914288790783, "grad_norm": 0.42188867926597595, "learning_rate": 1.1920639601577855e-07, "loss": 0.363, "step": 51214 }, { "epoch": 0.9508285590164969, "grad_norm": 0.37047120928764343, "learning_rate": 1.1902688740115909e-07, "loss": 0.2885, "step": 51216 }, { "epoch": 0.9508656891539156, "grad_norm": 0.46173036098480225, "learning_rate": 1.1884751323698706e-07, "loss": 0.1614, "step": 51218 }, { "epoch": 0.9509028192913342, "grad_norm": 0.4396073818206787, "learning_rate": 1.1866827352570277e-07, "loss": 0.2405, "step": 51220 }, { "epoch": 0.9509399494287528, "grad_norm": 0.5908403396606445, "learning_rate": 1.1848916826974533e-07, "loss": 0.3247, "step": 51222 }, { "epoch": 0.9509770795661715, "grad_norm": 0.38285836577415466, "learning_rate": 1.183101974715517e-07, "loss": 0.309, "step": 51224 }, { "epoch": 0.9510142097035901, "grad_norm": 0.31784898042678833, "learning_rate": 1.1813136113355772e-07, "loss": 0.1693, "step": 51226 }, { "epoch": 0.9510513398410088, "grad_norm": 0.475458562374115, "learning_rate": 1.1795265925819588e-07, "loss": 0.215, "step": 51228 }, { "epoch": 0.9510884699784274, "grad_norm": 0.3930922746658325, "learning_rate": 1.1777409184789756e-07, "loss": 0.239, "step": 51230 }, { "epoch": 0.951125600115846, "grad_norm": 0.2587066888809204, "learning_rate": 1.1759565890509305e-07, "loss": 0.2819, "step": 51232 }, { "epoch": 0.9511627302532647, "grad_norm": 0.38370686769485474, "learning_rate": 1.1741736043221153e-07, "loss": 0.3256, "step": 51234 }, { "epoch": 0.9511998603906833, "grad_norm": 0.38192781805992126, "learning_rate": 1.172391964316777e-07, "loss": 0.2076, "step": 51236 }, { "epoch": 0.951236990528102, "grad_norm": 0.3160693645477295, "learning_rate": 1.1706116690591518e-07, "loss": 0.2255, "step": 51238 }, { "epoch": 0.9512741206655205, "grad_norm": 0.38616102933883667, "learning_rate": 1.168832718573476e-07, "loss": 0.1585, "step": 51240 }, { "epoch": 0.9513112508029392, "grad_norm": 0.40396353602409363, "learning_rate": 1.1670551128839636e-07, "loss": 0.3848, "step": 51242 }, { "epoch": 0.9513483809403579, "grad_norm": 0.4451180398464203, "learning_rate": 1.165278852014784e-07, "loss": 0.2376, "step": 51244 }, { "epoch": 0.9513855110777765, "grad_norm": 0.5604163408279419, "learning_rate": 1.1635039359901179e-07, "loss": 0.2264, "step": 51246 }, { "epoch": 0.9514226412151952, "grad_norm": 0.31771785020828247, "learning_rate": 1.1617303648341127e-07, "loss": 0.1552, "step": 51248 }, { "epoch": 0.9514597713526137, "grad_norm": 0.6490215063095093, "learning_rate": 1.1599581385709047e-07, "loss": 0.4153, "step": 51250 }, { "epoch": 0.9514969014900324, "grad_norm": 0.28555601835250854, "learning_rate": 1.1581872572245967e-07, "loss": 0.3914, "step": 51252 }, { "epoch": 0.9515340316274511, "grad_norm": 0.3286740779876709, "learning_rate": 1.1564177208193029e-07, "loss": 0.2707, "step": 51254 }, { "epoch": 0.9515711617648697, "grad_norm": 0.5443800687789917, "learning_rate": 1.1546495293790927e-07, "loss": 0.3459, "step": 51256 }, { "epoch": 0.9516082919022884, "grad_norm": 0.5195097327232361, "learning_rate": 1.1528826829280248e-07, "loss": 0.3158, "step": 51258 }, { "epoch": 0.9516454220397069, "grad_norm": 0.30374205112457275, "learning_rate": 1.1511171814901356e-07, "loss": 0.4355, "step": 51260 }, { "epoch": 0.9516825521771256, "grad_norm": 0.32916373014450073, "learning_rate": 1.1493530250894724e-07, "loss": 0.1155, "step": 51262 }, { "epoch": 0.9517196823145443, "grad_norm": 0.42171844840049744, "learning_rate": 1.147590213750005e-07, "loss": 0.2959, "step": 51264 }, { "epoch": 0.9517568124519629, "grad_norm": 0.561106264591217, "learning_rate": 1.1458287474957475e-07, "loss": 0.4802, "step": 51266 }, { "epoch": 0.9517939425893815, "grad_norm": 0.18155167996883392, "learning_rate": 1.1440686263506584e-07, "loss": 0.2088, "step": 51268 }, { "epoch": 0.9518310727268001, "grad_norm": 0.4434301257133484, "learning_rate": 1.1423098503386853e-07, "loss": 0.3417, "step": 51270 }, { "epoch": 0.9518682028642188, "grad_norm": 0.27022165060043335, "learning_rate": 1.1405524194837536e-07, "loss": 0.354, "step": 51272 }, { "epoch": 0.9519053330016375, "grad_norm": 0.3725062608718872, "learning_rate": 1.1387963338097996e-07, "loss": 0.2444, "step": 51274 }, { "epoch": 0.9519424631390561, "grad_norm": 0.4949437975883484, "learning_rate": 1.1370415933407042e-07, "loss": 0.1128, "step": 51276 }, { "epoch": 0.9519795932764747, "grad_norm": 0.3151581287384033, "learning_rate": 1.135288198100326e-07, "loss": 0.3854, "step": 51278 }, { "epoch": 0.9520167234138933, "grad_norm": 0.6056416034698486, "learning_rate": 1.1335361481125573e-07, "loss": 0.4175, "step": 51280 }, { "epoch": 0.952053853551312, "grad_norm": 0.3668951988220215, "learning_rate": 1.1317854434012121e-07, "loss": 0.2336, "step": 51282 }, { "epoch": 0.9520909836887307, "grad_norm": 0.43334779143333435, "learning_rate": 1.130036083990127e-07, "loss": 0.3276, "step": 51284 }, { "epoch": 0.9521281138261493, "grad_norm": 0.47040700912475586, "learning_rate": 1.1282880699031051e-07, "loss": 0.2232, "step": 51286 }, { "epoch": 0.9521652439635679, "grad_norm": 0.3533232808113098, "learning_rate": 1.1265414011639275e-07, "loss": 0.1173, "step": 51288 }, { "epoch": 0.9522023741009865, "grad_norm": 0.43317684531211853, "learning_rate": 1.1247960777963529e-07, "loss": 0.364, "step": 51290 }, { "epoch": 0.9522395042384052, "grad_norm": 0.27443671226501465, "learning_rate": 1.1230520998241401e-07, "loss": 0.2527, "step": 51292 }, { "epoch": 0.9522766343758238, "grad_norm": 0.3441377282142639, "learning_rate": 1.1213094672710256e-07, "loss": 0.3496, "step": 51294 }, { "epoch": 0.9523137645132425, "grad_norm": 0.614155113697052, "learning_rate": 1.1195681801607016e-07, "loss": 0.2428, "step": 51296 }, { "epoch": 0.9523508946506611, "grad_norm": 0.48285776376724243, "learning_rate": 1.1178282385168826e-07, "loss": 0.4336, "step": 51298 }, { "epoch": 0.9523880247880797, "grad_norm": 0.3487530052661896, "learning_rate": 1.1160896423632384e-07, "loss": 0.2833, "step": 51300 }, { "epoch": 0.9524251549254984, "grad_norm": 0.4644484221935272, "learning_rate": 1.1143523917234056e-07, "loss": 0.2472, "step": 51302 }, { "epoch": 0.952462285062917, "grad_norm": 0.452174574136734, "learning_rate": 1.1126164866210542e-07, "loss": 0.1775, "step": 51304 }, { "epoch": 0.9524994152003357, "grad_norm": 0.36379164457321167, "learning_rate": 1.1108819270797877e-07, "loss": 0.2419, "step": 51306 }, { "epoch": 0.9525365453377543, "grad_norm": 0.42875194549560547, "learning_rate": 1.1091487131232093e-07, "loss": 0.4057, "step": 51308 }, { "epoch": 0.9525736754751729, "grad_norm": 0.3376370072364807, "learning_rate": 1.1074168447749111e-07, "loss": 0.1452, "step": 51310 }, { "epoch": 0.9526108056125916, "grad_norm": 0.39539510011672974, "learning_rate": 1.1056863220584412e-07, "loss": 0.3155, "step": 51312 }, { "epoch": 0.9526479357500102, "grad_norm": 0.31804022192955017, "learning_rate": 1.1039571449973696e-07, "loss": 0.2814, "step": 51314 }, { "epoch": 0.9526850658874289, "grad_norm": 0.5603212118148804, "learning_rate": 1.1022293136151996e-07, "loss": 0.439, "step": 51316 }, { "epoch": 0.9527221960248475, "grad_norm": 0.6551733613014221, "learning_rate": 1.1005028279354568e-07, "loss": 0.2828, "step": 51318 }, { "epoch": 0.9527593261622661, "grad_norm": 0.9215043783187866, "learning_rate": 1.0987776879816447e-07, "loss": 0.198, "step": 51320 }, { "epoch": 0.9527964562996848, "grad_norm": 0.5769176483154297, "learning_rate": 1.0970538937772113e-07, "loss": 0.2796, "step": 51322 }, { "epoch": 0.9528335864371034, "grad_norm": 0.19632776081562042, "learning_rate": 1.0953314453456376e-07, "loss": 0.3823, "step": 51324 }, { "epoch": 0.952870716574522, "grad_norm": 0.5432040095329285, "learning_rate": 1.0936103427103384e-07, "loss": 0.4746, "step": 51326 }, { "epoch": 0.9529078467119407, "grad_norm": 0.4054275155067444, "learning_rate": 1.0918905858947504e-07, "loss": 0.4682, "step": 51328 }, { "epoch": 0.9529449768493593, "grad_norm": 0.44310373067855835, "learning_rate": 1.0901721749222772e-07, "loss": 0.1581, "step": 51330 }, { "epoch": 0.952982106986778, "grad_norm": 0.2824499309062958, "learning_rate": 1.0884551098162777e-07, "loss": 0.0859, "step": 51332 }, { "epoch": 0.9530192371241966, "grad_norm": 0.2441079169511795, "learning_rate": 1.0867393906001222e-07, "loss": 0.3679, "step": 51334 }, { "epoch": 0.9530563672616152, "grad_norm": 0.4055043160915375, "learning_rate": 1.085025017297181e-07, "loss": 0.2128, "step": 51336 }, { "epoch": 0.9530934973990339, "grad_norm": 0.2868565320968628, "learning_rate": 1.0833119899307576e-07, "loss": 0.3097, "step": 51338 }, { "epoch": 0.9531306275364525, "grad_norm": 0.6610684990882874, "learning_rate": 1.0816003085241666e-07, "loss": 0.2934, "step": 51340 }, { "epoch": 0.9531677576738712, "grad_norm": 0.3810104727745056, "learning_rate": 1.0798899731007006e-07, "loss": 0.269, "step": 51342 }, { "epoch": 0.9532048878112898, "grad_norm": 0.288468599319458, "learning_rate": 1.0781809836836299e-07, "loss": 0.3354, "step": 51344 }, { "epoch": 0.9532420179487084, "grad_norm": 0.5050945281982422, "learning_rate": 1.0764733402962136e-07, "loss": 0.3589, "step": 51346 }, { "epoch": 0.953279148086127, "grad_norm": 0.5115669965744019, "learning_rate": 1.0747670429616886e-07, "loss": 0.4395, "step": 51348 }, { "epoch": 0.9533162782235457, "grad_norm": 0.5838960409164429, "learning_rate": 1.0730620917032697e-07, "loss": 0.3846, "step": 51350 }, { "epoch": 0.9533534083609644, "grad_norm": 0.34467118978500366, "learning_rate": 1.0713584865441495e-07, "loss": 0.1637, "step": 51352 }, { "epoch": 0.953390538498383, "grad_norm": 0.3816034197807312, "learning_rate": 1.0696562275075206e-07, "loss": 0.3864, "step": 51354 }, { "epoch": 0.9534276686358016, "grad_norm": 0.4820430874824524, "learning_rate": 1.0679553146165312e-07, "loss": 0.3548, "step": 51356 }, { "epoch": 0.9534647987732202, "grad_norm": 0.3864564001560211, "learning_rate": 1.0662557478943403e-07, "loss": 0.1949, "step": 51358 }, { "epoch": 0.9535019289106389, "grad_norm": 0.4808722734451294, "learning_rate": 1.0645575273640629e-07, "loss": 0.1143, "step": 51360 }, { "epoch": 0.9535390590480576, "grad_norm": 0.38032978773117065, "learning_rate": 1.062860653048825e-07, "loss": 0.2929, "step": 51362 }, { "epoch": 0.9535761891854762, "grad_norm": 0.2155487835407257, "learning_rate": 1.061165124971686e-07, "loss": 0.1088, "step": 51364 }, { "epoch": 0.9536133193228948, "grad_norm": 0.36278069019317627, "learning_rate": 1.0594709431557382e-07, "loss": 0.2193, "step": 51366 }, { "epoch": 0.9536504494603134, "grad_norm": 0.49232059717178345, "learning_rate": 1.0577781076240301e-07, "loss": 0.1183, "step": 51368 }, { "epoch": 0.9536875795977321, "grad_norm": 0.3859732151031494, "learning_rate": 1.0560866183995988e-07, "loss": 0.1119, "step": 51370 }, { "epoch": 0.9537247097351508, "grad_norm": 0.5421310663223267, "learning_rate": 1.0543964755054593e-07, "loss": 0.2101, "step": 51372 }, { "epoch": 0.9537618398725694, "grad_norm": 0.33848968148231506, "learning_rate": 1.052707678964604e-07, "loss": 0.1709, "step": 51374 }, { "epoch": 0.953798970009988, "grad_norm": 0.4361926317214966, "learning_rate": 1.0510202288000259e-07, "loss": 0.1234, "step": 51376 }, { "epoch": 0.9538361001474066, "grad_norm": 0.32363268733024597, "learning_rate": 1.0493341250346622e-07, "loss": 0.2823, "step": 51378 }, { "epoch": 0.9538732302848253, "grad_norm": 0.2282724678516388, "learning_rate": 1.0476493676914723e-07, "loss": 0.2135, "step": 51380 }, { "epoch": 0.953910360422244, "grad_norm": 0.20674701035022736, "learning_rate": 1.045965956793371e-07, "loss": 0.2888, "step": 51382 }, { "epoch": 0.9539474905596625, "grad_norm": 0.21321214735507965, "learning_rate": 1.0442838923632847e-07, "loss": 0.2131, "step": 51384 }, { "epoch": 0.9539846206970812, "grad_norm": 0.3474956154823303, "learning_rate": 1.0426031744240727e-07, "loss": 0.2003, "step": 51386 }, { "epoch": 0.9540217508344998, "grad_norm": 0.31653910875320435, "learning_rate": 1.0409238029986391e-07, "loss": 0.3052, "step": 51388 }, { "epoch": 0.9540588809719185, "grad_norm": 0.49861517548561096, "learning_rate": 1.0392457781097986e-07, "loss": 0.1798, "step": 51390 }, { "epoch": 0.9540960111093371, "grad_norm": 0.34396955370903015, "learning_rate": 1.037569099780411e-07, "loss": 0.1365, "step": 51392 }, { "epoch": 0.9541331412467557, "grad_norm": 0.38625603914260864, "learning_rate": 1.0358937680332804e-07, "loss": 0.2548, "step": 51394 }, { "epoch": 0.9541702713841744, "grad_norm": 0.37313809990882874, "learning_rate": 1.0342197828912104e-07, "loss": 0.3021, "step": 51396 }, { "epoch": 0.954207401521593, "grad_norm": 0.5972605347633362, "learning_rate": 1.0325471443769609e-07, "loss": 0.1474, "step": 51398 }, { "epoch": 0.9542445316590117, "grad_norm": 0.4858228862285614, "learning_rate": 1.0308758525133022e-07, "loss": 0.2084, "step": 51400 }, { "epoch": 0.9542816617964303, "grad_norm": 0.40986567735671997, "learning_rate": 1.0292059073229832e-07, "loss": 0.2341, "step": 51402 }, { "epoch": 0.9543187919338489, "grad_norm": 0.3499915599822998, "learning_rate": 1.0275373088287189e-07, "loss": 0.2386, "step": 51404 }, { "epoch": 0.9543559220712676, "grad_norm": 0.3555004298686981, "learning_rate": 1.0258700570532131e-07, "loss": 0.3372, "step": 51406 }, { "epoch": 0.9543930522086862, "grad_norm": 0.3637676537036896, "learning_rate": 1.0242041520191481e-07, "loss": 0.3126, "step": 51408 }, { "epoch": 0.9544301823461049, "grad_norm": 0.32187241315841675, "learning_rate": 1.0225395937492056e-07, "loss": 0.3867, "step": 51410 }, { "epoch": 0.9544673124835235, "grad_norm": 0.4091941714286804, "learning_rate": 1.0208763822660228e-07, "loss": 0.241, "step": 51412 }, { "epoch": 0.9545044426209421, "grad_norm": 0.6092678308486938, "learning_rate": 1.0192145175922374e-07, "loss": 0.3584, "step": 51414 }, { "epoch": 0.9545415727583608, "grad_norm": 0.5188865661621094, "learning_rate": 1.0175539997504647e-07, "loss": 0.2723, "step": 51416 }, { "epoch": 0.9545787028957794, "grad_norm": 0.44235536456108093, "learning_rate": 1.0158948287632975e-07, "loss": 0.2762, "step": 51418 }, { "epoch": 0.9546158330331981, "grad_norm": 0.36820608377456665, "learning_rate": 1.0142370046533067e-07, "loss": 0.1337, "step": 51420 }, { "epoch": 0.9546529631706167, "grad_norm": 0.3130168318748474, "learning_rate": 1.0125805274430411e-07, "loss": 0.2306, "step": 51422 }, { "epoch": 0.9546900933080353, "grad_norm": 0.502172589302063, "learning_rate": 1.0109253971550603e-07, "loss": 0.3917, "step": 51424 }, { "epoch": 0.954727223445454, "grad_norm": 0.48643258213996887, "learning_rate": 1.0092716138118908e-07, "loss": 0.5205, "step": 51426 }, { "epoch": 0.9547643535828726, "grad_norm": 0.5207310318946838, "learning_rate": 1.0076191774360144e-07, "loss": 0.2222, "step": 51428 }, { "epoch": 0.9548014837202913, "grad_norm": 0.5100780725479126, "learning_rate": 1.0059680880499245e-07, "loss": 0.1614, "step": 51430 }, { "epoch": 0.9548386138577099, "grad_norm": 0.4287956655025482, "learning_rate": 1.0043183456760808e-07, "loss": 0.3084, "step": 51432 }, { "epoch": 0.9548757439951285, "grad_norm": 0.27186858654022217, "learning_rate": 1.002669950336943e-07, "loss": 0.466, "step": 51434 }, { "epoch": 0.9549128741325472, "grad_norm": 0.4396379590034485, "learning_rate": 1.0010229020549378e-07, "loss": 0.5569, "step": 51436 }, { "epoch": 0.9549500042699658, "grad_norm": 0.34844908118247986, "learning_rate": 9.993772008524805e-08, "loss": 0.1378, "step": 51438 }, { "epoch": 0.9549871344073845, "grad_norm": 0.32082870602607727, "learning_rate": 9.977328467519532e-08, "loss": 0.2846, "step": 51440 }, { "epoch": 0.955024264544803, "grad_norm": 0.4624224007129669, "learning_rate": 9.960898397757268e-08, "loss": 0.4315, "step": 51442 }, { "epoch": 0.9550613946822217, "grad_norm": 0.1708763688802719, "learning_rate": 9.944481799461725e-08, "loss": 0.2394, "step": 51444 }, { "epoch": 0.9550985248196403, "grad_norm": 0.28764525055885315, "learning_rate": 9.928078672856278e-08, "loss": 0.2519, "step": 51446 }, { "epoch": 0.955135654957059, "grad_norm": 0.3690250813961029, "learning_rate": 9.911689018163973e-08, "loss": 0.2175, "step": 51448 }, { "epoch": 0.9551727850944777, "grad_norm": 0.27418121695518494, "learning_rate": 9.895312835608073e-08, "loss": 0.2164, "step": 51450 }, { "epoch": 0.9552099152318962, "grad_norm": 0.47681599855422974, "learning_rate": 9.878950125411068e-08, "loss": 0.142, "step": 51452 }, { "epoch": 0.9552470453693149, "grad_norm": 0.37893256545066833, "learning_rate": 9.862600887795893e-08, "loss": 0.2912, "step": 51454 }, { "epoch": 0.9552841755067335, "grad_norm": 0.5107290744781494, "learning_rate": 9.846265122984921e-08, "loss": 0.3965, "step": 51456 }, { "epoch": 0.9553213056441522, "grad_norm": 0.2970569133758545, "learning_rate": 9.829942831200423e-08, "loss": 0.1353, "step": 51458 }, { "epoch": 0.9553584357815709, "grad_norm": 0.3862713575363159, "learning_rate": 9.813634012664552e-08, "loss": 0.3067, "step": 51460 }, { "epoch": 0.9553955659189894, "grad_norm": 0.44956332445144653, "learning_rate": 9.797338667599132e-08, "loss": 0.1933, "step": 51462 }, { "epoch": 0.9554326960564081, "grad_norm": 0.2788529396057129, "learning_rate": 9.781056796225874e-08, "loss": 0.1533, "step": 51464 }, { "epoch": 0.9554698261938267, "grad_norm": 0.5427239537239075, "learning_rate": 9.764788398766489e-08, "loss": 0.3225, "step": 51466 }, { "epoch": 0.9555069563312454, "grad_norm": 0.42808350920677185, "learning_rate": 9.748533475442135e-08, "loss": 0.278, "step": 51468 }, { "epoch": 0.9555440864686641, "grad_norm": 0.17554649710655212, "learning_rate": 9.732292026474077e-08, "loss": 0.0411, "step": 51470 }, { "epoch": 0.9555812166060826, "grad_norm": 0.4540509879589081, "learning_rate": 9.716064052083362e-08, "loss": 0.373, "step": 51472 }, { "epoch": 0.9556183467435013, "grad_norm": 1.3159315586090088, "learning_rate": 9.699849552490815e-08, "loss": 0.4413, "step": 51474 }, { "epoch": 0.9556554768809199, "grad_norm": 0.3383181393146515, "learning_rate": 9.683648527917034e-08, "loss": 0.2005, "step": 51476 }, { "epoch": 0.9556926070183386, "grad_norm": 0.4478803873062134, "learning_rate": 9.667460978582399e-08, "loss": 0.2232, "step": 51478 }, { "epoch": 0.9557297371557573, "grad_norm": 0.36779308319091797, "learning_rate": 9.651286904707292e-08, "loss": 0.2742, "step": 51480 }, { "epoch": 0.9557668672931758, "grad_norm": 0.44625532627105713, "learning_rate": 9.635126306511755e-08, "loss": 0.4092, "step": 51482 }, { "epoch": 0.9558039974305945, "grad_norm": 0.14745530486106873, "learning_rate": 9.618979184215504e-08, "loss": 0.0803, "step": 51484 }, { "epoch": 0.9558411275680131, "grad_norm": 0.5856987237930298, "learning_rate": 9.602845538038585e-08, "loss": 0.2287, "step": 51486 }, { "epoch": 0.9558782577054318, "grad_norm": 0.554949164390564, "learning_rate": 9.586725368200267e-08, "loss": 0.2197, "step": 51488 }, { "epoch": 0.9559153878428505, "grad_norm": 0.2984238266944885, "learning_rate": 9.57061867492004e-08, "loss": 0.3928, "step": 51490 }, { "epoch": 0.955952517980269, "grad_norm": 0.6525359749794006, "learning_rate": 9.554525458416953e-08, "loss": 0.1239, "step": 51492 }, { "epoch": 0.9559896481176877, "grad_norm": 0.2128794640302658, "learning_rate": 9.53844571891005e-08, "loss": 0.1859, "step": 51494 }, { "epoch": 0.9560267782551063, "grad_norm": 0.36496207118034363, "learning_rate": 9.522379456618047e-08, "loss": 0.1817, "step": 51496 }, { "epoch": 0.956063908392525, "grad_norm": 0.5023206472396851, "learning_rate": 9.50632667175988e-08, "loss": 0.3518, "step": 51498 }, { "epoch": 0.9561010385299435, "grad_norm": 0.3819437623023987, "learning_rate": 9.490287364553485e-08, "loss": 0.1771, "step": 51500 }, { "epoch": 0.9561381686673622, "grad_norm": 0.40997087955474854, "learning_rate": 9.474261535217577e-08, "loss": 0.2268, "step": 51502 }, { "epoch": 0.9561752988047809, "grad_norm": 0.43543556332588196, "learning_rate": 9.45824918396987e-08, "loss": 0.3131, "step": 51504 }, { "epoch": 0.9562124289421995, "grad_norm": 0.48614105582237244, "learning_rate": 9.442250311028301e-08, "loss": 0.2235, "step": 51506 }, { "epoch": 0.9562495590796182, "grad_norm": 0.6929818391799927, "learning_rate": 9.426264916610806e-08, "loss": 0.4692, "step": 51508 }, { "epoch": 0.9562866892170367, "grad_norm": 0.2409350425004959, "learning_rate": 9.410293000934545e-08, "loss": 0.1772, "step": 51510 }, { "epoch": 0.9563238193544554, "grad_norm": 0.22209814190864563, "learning_rate": 9.394334564217233e-08, "loss": 0.2402, "step": 51512 }, { "epoch": 0.9563609494918741, "grad_norm": 0.2895338535308838, "learning_rate": 9.378389606675698e-08, "loss": 0.1887, "step": 51514 }, { "epoch": 0.9563980796292927, "grad_norm": 0.546881377696991, "learning_rate": 9.362458128527096e-08, "loss": 0.1819, "step": 51516 }, { "epoch": 0.9564352097667114, "grad_norm": 0.36732521653175354, "learning_rate": 9.346540129988035e-08, "loss": 0.2193, "step": 51518 }, { "epoch": 0.9564723399041299, "grad_norm": 0.3495822846889496, "learning_rate": 9.330635611275341e-08, "loss": 0.3648, "step": 51520 }, { "epoch": 0.9565094700415486, "grad_norm": 0.3063301146030426, "learning_rate": 9.314744572605283e-08, "loss": 0.1641, "step": 51522 }, { "epoch": 0.9565466001789673, "grad_norm": 0.40818724036216736, "learning_rate": 9.298867014194024e-08, "loss": 0.268, "step": 51524 }, { "epoch": 0.9565837303163859, "grad_norm": 0.339240700006485, "learning_rate": 9.283002936257723e-08, "loss": 0.2768, "step": 51526 }, { "epoch": 0.9566208604538046, "grad_norm": 0.45224931836128235, "learning_rate": 9.267152339012208e-08, "loss": 0.3243, "step": 51528 }, { "epoch": 0.9566579905912231, "grad_norm": 0.501471757888794, "learning_rate": 9.251315222673196e-08, "loss": 0.3255, "step": 51530 }, { "epoch": 0.9566951207286418, "grad_norm": 0.2901224195957184, "learning_rate": 9.235491587456069e-08, "loss": 0.3532, "step": 51532 }, { "epoch": 0.9567322508660605, "grad_norm": 0.37131261825561523, "learning_rate": 9.219681433576321e-08, "loss": 0.3123, "step": 51534 }, { "epoch": 0.9567693810034791, "grad_norm": 0.3656637966632843, "learning_rate": 9.203884761248894e-08, "loss": 0.2702, "step": 51536 }, { "epoch": 0.9568065111408978, "grad_norm": 0.34161725640296936, "learning_rate": 9.188101570688834e-08, "loss": 0.333, "step": 51538 }, { "epoch": 0.9568436412783163, "grad_norm": 0.29506412148475647, "learning_rate": 9.172331862110972e-08, "loss": 0.3374, "step": 51540 }, { "epoch": 0.956880771415735, "grad_norm": 0.4480403661727905, "learning_rate": 9.156575635729692e-08, "loss": 0.3452, "step": 51542 }, { "epoch": 0.9569179015531536, "grad_norm": 0.2707580029964447, "learning_rate": 9.140832891759599e-08, "loss": 0.2365, "step": 51544 }, { "epoch": 0.9569550316905723, "grad_norm": 0.34321004152297974, "learning_rate": 9.125103630414744e-08, "loss": 0.2322, "step": 51546 }, { "epoch": 0.956992161827991, "grad_norm": 0.4533778131008148, "learning_rate": 9.10938785190918e-08, "loss": 0.3507, "step": 51548 }, { "epoch": 0.9570292919654095, "grad_norm": 0.5854126811027527, "learning_rate": 9.093685556456733e-08, "loss": 0.2008, "step": 51550 }, { "epoch": 0.9570664221028282, "grad_norm": 0.5888938307762146, "learning_rate": 9.077996744271345e-08, "loss": 0.3178, "step": 51552 }, { "epoch": 0.9571035522402468, "grad_norm": 0.3842276334762573, "learning_rate": 9.062321415566066e-08, "loss": 0.3804, "step": 51554 }, { "epoch": 0.9571406823776655, "grad_norm": 0.35984310507774353, "learning_rate": 9.046659570554395e-08, "loss": 0.3131, "step": 51556 }, { "epoch": 0.9571778125150842, "grad_norm": 0.4122650921344757, "learning_rate": 9.031011209449492e-08, "loss": 0.2193, "step": 51558 }, { "epoch": 0.9572149426525027, "grad_norm": 0.545009970664978, "learning_rate": 9.015376332464298e-08, "loss": 0.4237, "step": 51560 }, { "epoch": 0.9572520727899214, "grad_norm": 0.39067867398262024, "learning_rate": 8.999754939811422e-08, "loss": 0.3222, "step": 51562 }, { "epoch": 0.95728920292734, "grad_norm": 1.3999227285385132, "learning_rate": 8.984147031703472e-08, "loss": 0.3103, "step": 51564 }, { "epoch": 0.9573263330647587, "grad_norm": 0.3567414879798889, "learning_rate": 8.968552608352943e-08, "loss": 0.2156, "step": 51566 }, { "epoch": 0.9573634632021774, "grad_norm": 0.47114434838294983, "learning_rate": 8.952971669971777e-08, "loss": 0.3099, "step": 51568 }, { "epoch": 0.9574005933395959, "grad_norm": 0.38934656977653503, "learning_rate": 8.93740421677225e-08, "loss": 0.2393, "step": 51570 }, { "epoch": 0.9574377234770146, "grad_norm": 0.4058121144771576, "learning_rate": 8.921850248965968e-08, "loss": 0.2452, "step": 51572 }, { "epoch": 0.9574748536144332, "grad_norm": 0.5005676746368408, "learning_rate": 8.906309766764765e-08, "loss": 0.3649, "step": 51574 }, { "epoch": 0.9575119837518519, "grad_norm": 0.48660942912101746, "learning_rate": 8.890782770380024e-08, "loss": 0.0588, "step": 51576 }, { "epoch": 0.9575491138892706, "grad_norm": 0.6719896793365479, "learning_rate": 8.875269260023023e-08, "loss": 0.3653, "step": 51578 }, { "epoch": 0.9575862440266891, "grad_norm": 0.39558500051498413, "learning_rate": 8.859769235904814e-08, "loss": 0.3686, "step": 51580 }, { "epoch": 0.9576233741641078, "grad_norm": 0.5397294759750366, "learning_rate": 8.84428269823634e-08, "loss": 0.264, "step": 51582 }, { "epoch": 0.9576605043015264, "grad_norm": 0.3553774952888489, "learning_rate": 8.82880964722832e-08, "loss": 0.2656, "step": 51584 }, { "epoch": 0.9576976344389451, "grad_norm": 0.34070703387260437, "learning_rate": 8.813350083091255e-08, "loss": 0.2815, "step": 51586 }, { "epoch": 0.9577347645763638, "grad_norm": 0.23878291249275208, "learning_rate": 8.79790400603564e-08, "loss": 0.2088, "step": 51588 }, { "epoch": 0.9577718947137823, "grad_norm": 0.3160630464553833, "learning_rate": 8.782471416271532e-08, "loss": 0.2797, "step": 51590 }, { "epoch": 0.957809024851201, "grad_norm": 0.2325183004140854, "learning_rate": 8.767052314008872e-08, "loss": 0.3662, "step": 51592 }, { "epoch": 0.9578461549886196, "grad_norm": 0.546848475933075, "learning_rate": 8.751646699457605e-08, "loss": 0.1907, "step": 51594 }, { "epoch": 0.9578832851260383, "grad_norm": 0.3785562813282013, "learning_rate": 8.736254572827341e-08, "loss": 0.2578, "step": 51596 }, { "epoch": 0.9579204152634568, "grad_norm": 0.5302824378013611, "learning_rate": 8.720875934327356e-08, "loss": 0.2715, "step": 51598 }, { "epoch": 0.9579575454008755, "grad_norm": 0.3110813498497009, "learning_rate": 8.705510784167037e-08, "loss": 0.0403, "step": 51600 }, { "epoch": 0.9579946755382942, "grad_norm": 0.6449050307273865, "learning_rate": 8.690159122555552e-08, "loss": 0.3297, "step": 51602 }, { "epoch": 0.9580318056757128, "grad_norm": 0.38409891724586487, "learning_rate": 8.674820949701623e-08, "loss": 0.4097, "step": 51604 }, { "epoch": 0.9580689358131315, "grad_norm": 0.25078266859054565, "learning_rate": 8.659496265814082e-08, "loss": 0.2994, "step": 51606 }, { "epoch": 0.95810606595055, "grad_norm": 0.39670518040657043, "learning_rate": 8.644185071101319e-08, "loss": 0.2524, "step": 51608 }, { "epoch": 0.9581431960879687, "grad_norm": 0.4671781063079834, "learning_rate": 8.628887365771721e-08, "loss": 0.2871, "step": 51610 }, { "epoch": 0.9581803262253874, "grad_norm": 0.5040252804756165, "learning_rate": 8.613603150033567e-08, "loss": 0.4066, "step": 51612 }, { "epoch": 0.958217456362806, "grad_norm": 0.41721487045288086, "learning_rate": 8.59833242409469e-08, "loss": 0.296, "step": 51614 }, { "epoch": 0.9582545865002247, "grad_norm": 0.3969128429889679, "learning_rate": 8.583075188162815e-08, "loss": 0.1765, "step": 51616 }, { "epoch": 0.9582917166376432, "grad_norm": 0.4134426414966583, "learning_rate": 8.567831442445773e-08, "loss": 0.2495, "step": 51618 }, { "epoch": 0.9583288467750619, "grad_norm": 0.4129183292388916, "learning_rate": 8.552601187150734e-08, "loss": 0.4507, "step": 51620 }, { "epoch": 0.9583659769124806, "grad_norm": 0.38789424300193787, "learning_rate": 8.537384422485195e-08, "loss": 0.2077, "step": 51622 }, { "epoch": 0.9584031070498992, "grad_norm": 0.2857956886291504, "learning_rate": 8.522181148655994e-08, "loss": 0.2961, "step": 51624 }, { "epoch": 0.9584402371873179, "grad_norm": 0.4785972237586975, "learning_rate": 8.506991365870188e-08, "loss": 0.4389, "step": 51626 }, { "epoch": 0.9584773673247364, "grad_norm": 0.5297247171401978, "learning_rate": 8.491815074334276e-08, "loss": 0.2955, "step": 51628 }, { "epoch": 0.9585144974621551, "grad_norm": 0.4262286126613617, "learning_rate": 8.47665227425487e-08, "loss": 0.202, "step": 51630 }, { "epoch": 0.9585516275995738, "grad_norm": 0.34194982051849365, "learning_rate": 8.461502965838253e-08, "loss": 0.0884, "step": 51632 }, { "epoch": 0.9585887577369924, "grad_norm": 0.2540087103843689, "learning_rate": 8.446367149290591e-08, "loss": 0.2065, "step": 51634 }, { "epoch": 0.958625887874411, "grad_norm": 0.2380489856004715, "learning_rate": 8.43124482481783e-08, "loss": 0.3655, "step": 51636 }, { "epoch": 0.9586630180118296, "grad_norm": 0.45640599727630615, "learning_rate": 8.416135992625696e-08, "loss": 0.2984, "step": 51638 }, { "epoch": 0.9587001481492483, "grad_norm": 0.42374998331069946, "learning_rate": 8.401040652919912e-08, "loss": 0.3952, "step": 51640 }, { "epoch": 0.958737278286667, "grad_norm": 0.44691967964172363, "learning_rate": 8.385958805905758e-08, "loss": 0.299, "step": 51642 }, { "epoch": 0.9587744084240856, "grad_norm": 0.6609805822372437, "learning_rate": 8.370890451788405e-08, "loss": 0.3957, "step": 51644 }, { "epoch": 0.9588115385615043, "grad_norm": 0.3307952284812927, "learning_rate": 8.35583559077291e-08, "loss": 0.2903, "step": 51646 }, { "epoch": 0.9588486686989228, "grad_norm": 0.3113168179988861, "learning_rate": 8.34079422306433e-08, "loss": 0.2529, "step": 51648 }, { "epoch": 0.9588857988363415, "grad_norm": 0.7100905776023865, "learning_rate": 8.325766348867059e-08, "loss": 0.1601, "step": 51650 }, { "epoch": 0.9589229289737601, "grad_norm": 0.4381139576435089, "learning_rate": 8.310751968385821e-08, "loss": 0.2397, "step": 51652 }, { "epoch": 0.9589600591111788, "grad_norm": 0.35432446002960205, "learning_rate": 8.295751081824676e-08, "loss": 0.2826, "step": 51654 }, { "epoch": 0.9589971892485974, "grad_norm": 0.48388224840164185, "learning_rate": 8.280763689387906e-08, "loss": 0.2811, "step": 51656 }, { "epoch": 0.959034319386016, "grad_norm": 0.35934674739837646, "learning_rate": 8.265789791279456e-08, "loss": 0.2151, "step": 51658 }, { "epoch": 0.9590714495234347, "grad_norm": 0.43315452337265015, "learning_rate": 8.25082938770294e-08, "loss": 0.2681, "step": 51660 }, { "epoch": 0.9591085796608533, "grad_norm": 0.44586893916130066, "learning_rate": 8.235882478861978e-08, "loss": 0.2726, "step": 51662 }, { "epoch": 0.959145709798272, "grad_norm": 0.2879968583583832, "learning_rate": 8.220949064960071e-08, "loss": 0.2467, "step": 51664 }, { "epoch": 0.9591828399356906, "grad_norm": 0.4043121039867401, "learning_rate": 8.206029146200278e-08, "loss": 0.1181, "step": 51666 }, { "epoch": 0.9592199700731092, "grad_norm": 0.3585694432258606, "learning_rate": 8.19112272278566e-08, "loss": 0.1599, "step": 51668 }, { "epoch": 0.9592571002105279, "grad_norm": 0.267323762178421, "learning_rate": 8.176229794919055e-08, "loss": 0.3345, "step": 51670 }, { "epoch": 0.9592942303479465, "grad_norm": 0.46867766976356506, "learning_rate": 8.161350362803078e-08, "loss": 0.2797, "step": 51672 }, { "epoch": 0.9593313604853652, "grad_norm": 0.7019122838973999, "learning_rate": 8.146484426640344e-08, "loss": 0.4138, "step": 51674 }, { "epoch": 0.9593684906227838, "grad_norm": 0.4788989722728729, "learning_rate": 8.131631986632804e-08, "loss": 0.1903, "step": 51676 }, { "epoch": 0.9594056207602024, "grad_norm": 0.2404189109802246, "learning_rate": 8.116793042982962e-08, "loss": 0.2649, "step": 51678 }, { "epoch": 0.9594427508976211, "grad_norm": 0.377534419298172, "learning_rate": 8.101967595892435e-08, "loss": 0.187, "step": 51680 }, { "epoch": 0.9594798810350397, "grad_norm": 0.40210217237472534, "learning_rate": 8.08715564556306e-08, "loss": 0.1981, "step": 51682 }, { "epoch": 0.9595170111724584, "grad_norm": 0.5479485988616943, "learning_rate": 8.072357192196345e-08, "loss": 0.1356, "step": 51684 }, { "epoch": 0.959554141309877, "grad_norm": 0.46031203866004944, "learning_rate": 8.057572235993682e-08, "loss": 0.4076, "step": 51686 }, { "epoch": 0.9595912714472956, "grad_norm": 0.29781094193458557, "learning_rate": 8.042800777156245e-08, "loss": 0.3117, "step": 51688 }, { "epoch": 0.9596284015847143, "grad_norm": 0.4593803286552429, "learning_rate": 8.028042815885206e-08, "loss": 0.3926, "step": 51690 }, { "epoch": 0.9596655317221329, "grad_norm": 0.55975341796875, "learning_rate": 8.013298352381072e-08, "loss": 0.3164, "step": 51692 }, { "epoch": 0.9597026618595516, "grad_norm": 0.28401824831962585, "learning_rate": 7.998567386844569e-08, "loss": 0.1594, "step": 51694 }, { "epoch": 0.9597397919969701, "grad_norm": 0.48278170824050903, "learning_rate": 7.983849919476206e-08, "loss": 0.2595, "step": 51696 }, { "epoch": 0.9597769221343888, "grad_norm": 0.24228018522262573, "learning_rate": 7.969145950476154e-08, "loss": 0.4278, "step": 51698 }, { "epoch": 0.9598140522718075, "grad_norm": 0.28728726506233215, "learning_rate": 7.954455480044587e-08, "loss": 0.2734, "step": 51700 }, { "epoch": 0.9598511824092261, "grad_norm": 0.30777013301849365, "learning_rate": 7.939778508381457e-08, "loss": 0.3074, "step": 51702 }, { "epoch": 0.9598883125466448, "grad_norm": 0.32437658309936523, "learning_rate": 7.925115035686271e-08, "loss": 0.3027, "step": 51704 }, { "epoch": 0.9599254426840633, "grad_norm": 0.459170937538147, "learning_rate": 7.910465062158756e-08, "loss": 0.2865, "step": 51706 }, { "epoch": 0.959962572821482, "grad_norm": 0.3641872704029083, "learning_rate": 7.895828587997978e-08, "loss": 0.3881, "step": 51708 }, { "epoch": 0.9599997029589007, "grad_norm": 0.4682431221008301, "learning_rate": 7.881205613403442e-08, "loss": 0.2795, "step": 51710 }, { "epoch": 0.9600368330963193, "grad_norm": 0.3898244798183441, "learning_rate": 7.866596138573989e-08, "loss": 0.3468, "step": 51712 }, { "epoch": 0.960073963233738, "grad_norm": 0.480672687292099, "learning_rate": 7.85200016370824e-08, "loss": 0.4507, "step": 51714 }, { "epoch": 0.9601110933711565, "grad_norm": 0.2736056447029114, "learning_rate": 7.837417689005034e-08, "loss": 0.143, "step": 51716 }, { "epoch": 0.9601482235085752, "grad_norm": 0.5502651333808899, "learning_rate": 7.822848714662656e-08, "loss": 0.2166, "step": 51718 }, { "epoch": 0.9601853536459939, "grad_norm": 0.5269626975059509, "learning_rate": 7.808293240879395e-08, "loss": 0.1047, "step": 51720 }, { "epoch": 0.9602224837834125, "grad_norm": 0.48788073658943176, "learning_rate": 7.793751267853312e-08, "loss": 0.2866, "step": 51722 }, { "epoch": 0.9602596139208311, "grad_norm": 0.5617185235023499, "learning_rate": 7.779222795782249e-08, "loss": 0.1351, "step": 51724 }, { "epoch": 0.9602967440582497, "grad_norm": 0.38651520013809204, "learning_rate": 7.764707824863937e-08, "loss": 0.0833, "step": 51726 }, { "epoch": 0.9603338741956684, "grad_norm": 0.4881591200828552, "learning_rate": 7.750206355295886e-08, "loss": 0.2759, "step": 51728 }, { "epoch": 0.9603710043330871, "grad_norm": 0.3922053873538971, "learning_rate": 7.735718387275382e-08, "loss": 0.2945, "step": 51730 }, { "epoch": 0.9604081344705057, "grad_norm": 0.45379650592803955, "learning_rate": 7.721243920999599e-08, "loss": 0.303, "step": 51732 }, { "epoch": 0.9604452646079243, "grad_norm": 0.38330143690109253, "learning_rate": 7.706782956665493e-08, "loss": 0.3267, "step": 51734 }, { "epoch": 0.9604823947453429, "grad_norm": 0.44030776619911194, "learning_rate": 7.692335494469683e-08, "loss": 0.3364, "step": 51736 }, { "epoch": 0.9605195248827616, "grad_norm": 0.3758874237537384, "learning_rate": 7.6779015346089e-08, "loss": 0.3852, "step": 51738 }, { "epoch": 0.9605566550201803, "grad_norm": 0.2617555260658264, "learning_rate": 7.663481077279655e-08, "loss": 0.227, "step": 51740 }, { "epoch": 0.9605937851575989, "grad_norm": 0.32857197523117065, "learning_rate": 7.649074122678013e-08, "loss": 0.3449, "step": 51742 }, { "epoch": 0.9606309152950175, "grad_norm": 0.3772386908531189, "learning_rate": 7.634680670999928e-08, "loss": 0.2439, "step": 51744 }, { "epoch": 0.9606680454324361, "grad_norm": 0.3572908043861389, "learning_rate": 7.620300722441465e-08, "loss": 0.3608, "step": 51746 }, { "epoch": 0.9607051755698548, "grad_norm": 0.39418825507164, "learning_rate": 7.605934277198134e-08, "loss": 0.3055, "step": 51748 }, { "epoch": 0.9607423057072734, "grad_norm": 0.271115243434906, "learning_rate": 7.591581335465448e-08, "loss": 0.234, "step": 51750 }, { "epoch": 0.960779435844692, "grad_norm": 0.3313447833061218, "learning_rate": 7.577241897438802e-08, "loss": 0.3719, "step": 51752 }, { "epoch": 0.9608165659821107, "grad_norm": 0.40390798449516296, "learning_rate": 7.562915963313156e-08, "loss": 0.4454, "step": 51754 }, { "epoch": 0.9608536961195293, "grad_norm": 0.1685211956501007, "learning_rate": 7.548603533283683e-08, "loss": 0.1217, "step": 51756 }, { "epoch": 0.960890826256948, "grad_norm": 0.31904810667037964, "learning_rate": 7.534304607544895e-08, "loss": 0.2766, "step": 51758 }, { "epoch": 0.9609279563943666, "grad_norm": 0.37015795707702637, "learning_rate": 7.520019186291416e-08, "loss": 0.493, "step": 51760 }, { "epoch": 0.9609650865317853, "grad_norm": 0.7840816974639893, "learning_rate": 7.505747269717645e-08, "loss": 0.3699, "step": 51762 }, { "epoch": 0.9610022166692039, "grad_norm": 0.4443550705909729, "learning_rate": 7.49148885801776e-08, "loss": 0.2254, "step": 51764 }, { "epoch": 0.9610393468066225, "grad_norm": 0.3132689893245697, "learning_rate": 7.477243951385937e-08, "loss": 0.305, "step": 51766 }, { "epoch": 0.9610764769440412, "grad_norm": 0.5389214754104614, "learning_rate": 7.463012550015803e-08, "loss": 0.4715, "step": 51768 }, { "epoch": 0.9611136070814598, "grad_norm": 0.42372971773147583, "learning_rate": 7.448794654100976e-08, "loss": 0.3061, "step": 51770 }, { "epoch": 0.9611507372188784, "grad_norm": 0.2763903737068176, "learning_rate": 7.434590263835084e-08, "loss": 0.1386, "step": 51772 }, { "epoch": 0.9611878673562971, "grad_norm": 0.45145857334136963, "learning_rate": 7.420399379411414e-08, "loss": 0.2279, "step": 51774 }, { "epoch": 0.9612249974937157, "grad_norm": 0.4513481557369232, "learning_rate": 7.406222001022923e-08, "loss": 0.3085, "step": 51776 }, { "epoch": 0.9612621276311344, "grad_norm": 0.43919387459754944, "learning_rate": 7.392058128862677e-08, "loss": 0.307, "step": 51778 }, { "epoch": 0.961299257768553, "grad_norm": 0.2974982261657715, "learning_rate": 7.377907763123304e-08, "loss": 0.4287, "step": 51780 }, { "epoch": 0.9613363879059716, "grad_norm": 0.42442306876182556, "learning_rate": 7.363770903997203e-08, "loss": 0.1967, "step": 51782 }, { "epoch": 0.9613735180433903, "grad_norm": 0.5041149258613586, "learning_rate": 7.349647551676997e-08, "loss": 0.2604, "step": 51784 }, { "epoch": 0.9614106481808089, "grad_norm": 0.49555397033691406, "learning_rate": 7.335537706354756e-08, "loss": 0.3164, "step": 51786 }, { "epoch": 0.9614477783182276, "grad_norm": 0.5422022938728333, "learning_rate": 7.321441368222437e-08, "loss": 0.2222, "step": 51788 }, { "epoch": 0.9614849084556462, "grad_norm": 0.27792268991470337, "learning_rate": 7.307358537471998e-08, "loss": 0.2901, "step": 51790 }, { "epoch": 0.9615220385930648, "grad_norm": 0.24670034646987915, "learning_rate": 7.29328921429484e-08, "loss": 0.1027, "step": 51792 }, { "epoch": 0.9615591687304835, "grad_norm": 0.5402873158454895, "learning_rate": 7.279233398882479e-08, "loss": 0.1407, "step": 51794 }, { "epoch": 0.9615962988679021, "grad_norm": 0.5390249490737915, "learning_rate": 7.265191091426204e-08, "loss": 0.356, "step": 51796 }, { "epoch": 0.9616334290053208, "grad_norm": 0.36146315932273865, "learning_rate": 7.251162292117198e-08, "loss": 0.1821, "step": 51798 }, { "epoch": 0.9616705591427394, "grad_norm": 0.5167868137359619, "learning_rate": 7.237147001146195e-08, "loss": 0.2239, "step": 51800 }, { "epoch": 0.961707689280158, "grad_norm": 0.2698373794555664, "learning_rate": 7.223145218704042e-08, "loss": 0.3792, "step": 51802 }, { "epoch": 0.9617448194175766, "grad_norm": 0.3992241621017456, "learning_rate": 7.209156944981033e-08, "loss": 0.079, "step": 51804 }, { "epoch": 0.9617819495549953, "grad_norm": 0.30803990364074707, "learning_rate": 7.195182180167793e-08, "loss": 0.2093, "step": 51806 }, { "epoch": 0.961819079692414, "grad_norm": 0.41861769556999207, "learning_rate": 7.181220924454169e-08, "loss": 0.5198, "step": 51808 }, { "epoch": 0.9618562098298326, "grad_norm": 0.5619530081748962, "learning_rate": 7.167273178030343e-08, "loss": 0.1621, "step": 51810 }, { "epoch": 0.9618933399672512, "grad_norm": 0.42366135120391846, "learning_rate": 7.153338941086052e-08, "loss": 0.3556, "step": 51812 }, { "epoch": 0.9619304701046698, "grad_norm": 0.32972222566604614, "learning_rate": 7.139418213810922e-08, "loss": 0.2898, "step": 51814 }, { "epoch": 0.9619676002420885, "grad_norm": 0.33899882435798645, "learning_rate": 7.125510996394358e-08, "loss": 0.1679, "step": 51816 }, { "epoch": 0.9620047303795072, "grad_norm": 0.36961978673934937, "learning_rate": 7.111617289025541e-08, "loss": 0.2689, "step": 51818 }, { "epoch": 0.9620418605169258, "grad_norm": 0.38777318596839905, "learning_rate": 7.097737091893652e-08, "loss": 0.275, "step": 51820 }, { "epoch": 0.9620789906543444, "grad_norm": 0.49063849449157715, "learning_rate": 7.083870405187432e-08, "loss": 0.2872, "step": 51822 }, { "epoch": 0.962116120791763, "grad_norm": 0.35705888271331787, "learning_rate": 7.070017229095615e-08, "loss": 0.3404, "step": 51824 }, { "epoch": 0.9621532509291817, "grad_norm": 0.3247383236885071, "learning_rate": 7.05617756380661e-08, "loss": 0.3949, "step": 51826 }, { "epoch": 0.9621903810666004, "grad_norm": 0.26819127798080444, "learning_rate": 7.04235140950893e-08, "loss": 0.3194, "step": 51828 }, { "epoch": 0.962227511204019, "grad_norm": 0.5702958106994629, "learning_rate": 7.028538766390536e-08, "loss": 0.2757, "step": 51830 }, { "epoch": 0.9622646413414376, "grad_norm": 0.30072227120399475, "learning_rate": 7.014739634639389e-08, "loss": 0.283, "step": 51832 }, { "epoch": 0.9623017714788562, "grad_norm": 0.4023928642272949, "learning_rate": 7.00095401444334e-08, "loss": 0.2026, "step": 51834 }, { "epoch": 0.9623389016162749, "grad_norm": 0.5951882600784302, "learning_rate": 6.987181905990014e-08, "loss": 0.3694, "step": 51836 }, { "epoch": 0.9623760317536936, "grad_norm": 0.4866952896118164, "learning_rate": 6.97342330946671e-08, "loss": 0.1818, "step": 51838 }, { "epoch": 0.9624131618911121, "grad_norm": 0.42780017852783203, "learning_rate": 6.959678225060496e-08, "loss": 0.2664, "step": 51840 }, { "epoch": 0.9624502920285308, "grad_norm": 0.43010371923446655, "learning_rate": 6.94594665295878e-08, "loss": 0.119, "step": 51842 }, { "epoch": 0.9624874221659494, "grad_norm": 0.4382522702217102, "learning_rate": 6.932228593348078e-08, "loss": 0.1905, "step": 51844 }, { "epoch": 0.9625245523033681, "grad_norm": 0.5566995739936829, "learning_rate": 6.918524046415132e-08, "loss": 0.35, "step": 51846 }, { "epoch": 0.9625616824407867, "grad_norm": 0.22547376155853271, "learning_rate": 6.904833012346457e-08, "loss": 0.1593, "step": 51848 }, { "epoch": 0.9625988125782053, "grad_norm": 0.38808420300483704, "learning_rate": 6.89115549132835e-08, "loss": 0.1203, "step": 51850 }, { "epoch": 0.962635942715624, "grad_norm": 0.40613439679145813, "learning_rate": 6.877491483546883e-08, "loss": 0.1904, "step": 51852 }, { "epoch": 0.9626730728530426, "grad_norm": 0.4027932286262512, "learning_rate": 6.863840989188131e-08, "loss": 0.2431, "step": 51854 }, { "epoch": 0.9627102029904613, "grad_norm": 0.3869066536426544, "learning_rate": 6.850204008437611e-08, "loss": 0.2266, "step": 51856 }, { "epoch": 0.9627473331278799, "grad_norm": 0.4246046841144562, "learning_rate": 6.836580541480953e-08, "loss": 0.159, "step": 51858 }, { "epoch": 0.9627844632652985, "grad_norm": 0.6201530694961548, "learning_rate": 6.822970588503675e-08, "loss": 0.238, "step": 51860 }, { "epoch": 0.9628215934027172, "grad_norm": 0.39868947863578796, "learning_rate": 6.80937414969085e-08, "loss": 0.1883, "step": 51862 }, { "epoch": 0.9628587235401358, "grad_norm": 0.33960285782814026, "learning_rate": 6.795791225227443e-08, "loss": 0.3644, "step": 51864 }, { "epoch": 0.9628958536775545, "grad_norm": 0.1643591970205307, "learning_rate": 6.782221815298306e-08, "loss": 0.123, "step": 51866 }, { "epoch": 0.962932983814973, "grad_norm": 0.46688312292099, "learning_rate": 6.76866592008818e-08, "loss": 0.2929, "step": 51868 }, { "epoch": 0.9629701139523917, "grad_norm": 0.4753555655479431, "learning_rate": 6.75512353978136e-08, "loss": 0.3647, "step": 51870 }, { "epoch": 0.9630072440898104, "grad_norm": 0.6376470327377319, "learning_rate": 6.741594674562147e-08, "loss": 0.2634, "step": 51872 }, { "epoch": 0.963044374227229, "grad_norm": 0.5328581929206848, "learning_rate": 6.728079324614723e-08, "loss": 0.3076, "step": 51874 }, { "epoch": 0.9630815043646477, "grad_norm": 0.3925354480743408, "learning_rate": 6.714577490123053e-08, "loss": 0.3007, "step": 51876 }, { "epoch": 0.9631186345020663, "grad_norm": 0.2147914320230484, "learning_rate": 6.701089171270658e-08, "loss": 0.1747, "step": 51878 }, { "epoch": 0.9631557646394849, "grad_norm": 0.36584752798080444, "learning_rate": 6.687614368241168e-08, "loss": 0.0897, "step": 51880 }, { "epoch": 0.9631928947769036, "grad_norm": 0.41972261667251587, "learning_rate": 6.674153081217882e-08, "loss": 0.2913, "step": 51882 }, { "epoch": 0.9632300249143222, "grad_norm": 0.403815895318985, "learning_rate": 6.660705310384207e-08, "loss": 0.3287, "step": 51884 }, { "epoch": 0.9632671550517409, "grad_norm": 0.269770085811615, "learning_rate": 6.647271055922777e-08, "loss": 0.1702, "step": 51886 }, { "epoch": 0.9633042851891594, "grad_norm": 0.38881126046180725, "learning_rate": 6.633850318016554e-08, "loss": 0.403, "step": 51888 }, { "epoch": 0.9633414153265781, "grad_norm": 0.3687662184238434, "learning_rate": 6.620443096848172e-08, "loss": 0.2, "step": 51890 }, { "epoch": 0.9633785454639968, "grad_norm": 0.656246542930603, "learning_rate": 6.60704939260004e-08, "loss": 0.1359, "step": 51892 }, { "epoch": 0.9634156756014154, "grad_norm": 0.3333469033241272, "learning_rate": 6.593669205454457e-08, "loss": 0.1591, "step": 51894 }, { "epoch": 0.9634528057388341, "grad_norm": 0.4012209177017212, "learning_rate": 6.580302535593385e-08, "loss": 0.3402, "step": 51896 }, { "epoch": 0.9634899358762526, "grad_norm": 0.5394437313079834, "learning_rate": 6.566949383198684e-08, "loss": 0.1841, "step": 51898 }, { "epoch": 0.9635270660136713, "grad_norm": 0.2256866991519928, "learning_rate": 6.553609748452206e-08, "loss": 0.323, "step": 51900 }, { "epoch": 0.9635641961510899, "grad_norm": 0.28045791387557983, "learning_rate": 6.540283631535471e-08, "loss": 0.3361, "step": 51902 }, { "epoch": 0.9636013262885086, "grad_norm": 0.39396989345550537, "learning_rate": 6.52697103262967e-08, "loss": 0.2667, "step": 51904 }, { "epoch": 0.9636384564259273, "grad_norm": 0.5530263781547546, "learning_rate": 6.51367195191599e-08, "loss": 0.4838, "step": 51906 }, { "epoch": 0.9636755865633458, "grad_norm": 0.27274277806282043, "learning_rate": 6.500386389575286e-08, "loss": 0.3169, "step": 51908 }, { "epoch": 0.9637127167007645, "grad_norm": 0.47739526629447937, "learning_rate": 6.487114345788526e-08, "loss": 0.4687, "step": 51910 }, { "epoch": 0.9637498468381831, "grad_norm": 0.47350066900253296, "learning_rate": 6.47385582073623e-08, "loss": 0.3993, "step": 51912 }, { "epoch": 0.9637869769756018, "grad_norm": 0.38004085421562195, "learning_rate": 6.460610814598811e-08, "loss": 0.3426, "step": 51914 }, { "epoch": 0.9638241071130205, "grad_norm": 0.23073621094226837, "learning_rate": 6.44737932755657e-08, "loss": 0.2882, "step": 51916 }, { "epoch": 0.963861237250439, "grad_norm": 0.22326688468456268, "learning_rate": 6.434161359789359e-08, "loss": 0.1193, "step": 51918 }, { "epoch": 0.9638983673878577, "grad_norm": 0.30562201142311096, "learning_rate": 6.42095691147715e-08, "loss": 0.1272, "step": 51920 }, { "epoch": 0.9639354975252763, "grad_norm": 0.43220290541648865, "learning_rate": 6.407765982799574e-08, "loss": 0.1837, "step": 51922 }, { "epoch": 0.963972627662695, "grad_norm": 0.43850141763687134, "learning_rate": 6.394588573936156e-08, "loss": 0.1758, "step": 51924 }, { "epoch": 0.9640097578001137, "grad_norm": 0.5628682374954224, "learning_rate": 6.381424685066195e-08, "loss": 0.3684, "step": 51926 }, { "epoch": 0.9640468879375322, "grad_norm": 0.3620678782463074, "learning_rate": 6.368274316368884e-08, "loss": 0.141, "step": 51928 }, { "epoch": 0.9640840180749509, "grad_norm": 0.40990346670150757, "learning_rate": 6.355137468023076e-08, "loss": 0.2374, "step": 51930 }, { "epoch": 0.9641211482123695, "grad_norm": 0.27141252160072327, "learning_rate": 6.34201414020752e-08, "loss": 0.1238, "step": 51932 }, { "epoch": 0.9641582783497882, "grad_norm": 0.605760395526886, "learning_rate": 6.328904333100739e-08, "loss": 0.2618, "step": 51934 }, { "epoch": 0.9641954084872069, "grad_norm": 0.47716793417930603, "learning_rate": 6.315808046881144e-08, "loss": 0.2998, "step": 51936 }, { "epoch": 0.9642325386246254, "grad_norm": 0.23172542452812195, "learning_rate": 6.30272528172704e-08, "loss": 0.0864, "step": 51938 }, { "epoch": 0.9642696687620441, "grad_norm": 0.48228976130485535, "learning_rate": 6.289656037816394e-08, "loss": 0.2307, "step": 51940 }, { "epoch": 0.9643067988994627, "grad_norm": 0.29130199551582336, "learning_rate": 6.276600315327064e-08, "loss": 0.3453, "step": 51942 }, { "epoch": 0.9643439290368814, "grad_norm": 0.41725465655326843, "learning_rate": 6.263558114436574e-08, "loss": 0.1707, "step": 51944 }, { "epoch": 0.9643810591743001, "grad_norm": 0.19328583776950836, "learning_rate": 6.250529435322561e-08, "loss": 0.2533, "step": 51946 }, { "epoch": 0.9644181893117186, "grad_norm": 0.3093467652797699, "learning_rate": 6.237514278162215e-08, "loss": 0.2097, "step": 51948 }, { "epoch": 0.9644553194491373, "grad_norm": 0.3799770474433899, "learning_rate": 6.224512643132508e-08, "loss": 0.4986, "step": 51950 }, { "epoch": 0.9644924495865559, "grad_norm": 0.3797805905342102, "learning_rate": 6.211524530410628e-08, "loss": 0.0691, "step": 51952 }, { "epoch": 0.9645295797239746, "grad_norm": 0.7204617261886597, "learning_rate": 6.198549940173104e-08, "loss": 0.3494, "step": 51954 }, { "epoch": 0.9645667098613931, "grad_norm": 0.41263675689697266, "learning_rate": 6.18558887259646e-08, "loss": 0.1385, "step": 51956 }, { "epoch": 0.9646038399988118, "grad_norm": 0.4448762834072113, "learning_rate": 6.17264132785722e-08, "loss": 0.1774, "step": 51958 }, { "epoch": 0.9646409701362305, "grad_norm": 0.23807239532470703, "learning_rate": 6.159707306131468e-08, "loss": 0.1604, "step": 51960 }, { "epoch": 0.9646781002736491, "grad_norm": 0.3758525550365448, "learning_rate": 6.146786807595173e-08, "loss": 0.1963, "step": 51962 }, { "epoch": 0.9647152304110678, "grad_norm": 0.32541969418525696, "learning_rate": 6.133879832424084e-08, "loss": 0.1494, "step": 51964 }, { "epoch": 0.9647523605484863, "grad_norm": 0.5362234115600586, "learning_rate": 6.120986380794059e-08, "loss": 0.2944, "step": 51966 }, { "epoch": 0.964789490685905, "grad_norm": 0.3478713631629944, "learning_rate": 6.108106452880403e-08, "loss": 0.2385, "step": 51968 }, { "epoch": 0.9648266208233237, "grad_norm": 0.5390769243240356, "learning_rate": 6.095240048858308e-08, "loss": 0.3323, "step": 51970 }, { "epoch": 0.9648637509607423, "grad_norm": 0.5467588305473328, "learning_rate": 6.08238716890297e-08, "loss": 0.4634, "step": 51972 }, { "epoch": 0.964900881098161, "grad_norm": 0.4785202741622925, "learning_rate": 6.069547813189136e-08, "loss": 0.3048, "step": 51974 }, { "epoch": 0.9649380112355795, "grad_norm": 0.24885335564613342, "learning_rate": 6.056721981891556e-08, "loss": 0.3631, "step": 51976 }, { "epoch": 0.9649751413729982, "grad_norm": 0.3617143929004669, "learning_rate": 6.043909675184867e-08, "loss": 0.1883, "step": 51978 }, { "epoch": 0.9650122715104169, "grad_norm": 0.4388920068740845, "learning_rate": 6.031110893243375e-08, "loss": 0.2838, "step": 51980 }, { "epoch": 0.9650494016478355, "grad_norm": 0.23073747754096985, "learning_rate": 6.018325636241052e-08, "loss": 0.3341, "step": 51982 }, { "epoch": 0.9650865317852542, "grad_norm": 0.2821613848209381, "learning_rate": 6.005553904352091e-08, "loss": 0.2022, "step": 51984 }, { "epoch": 0.9651236619226727, "grad_norm": 0.6161848902702332, "learning_rate": 5.992795697750132e-08, "loss": 0.0772, "step": 51986 }, { "epoch": 0.9651607920600914, "grad_norm": 0.7914007306098938, "learning_rate": 5.980051016608812e-08, "loss": 0.4433, "step": 51988 }, { "epoch": 0.9651979221975101, "grad_norm": 0.3046756982803345, "learning_rate": 5.967319861101661e-08, "loss": 0.2908, "step": 51990 }, { "epoch": 0.9652350523349287, "grad_norm": 0.3921298682689667, "learning_rate": 5.954602231401763e-08, "loss": 0.1715, "step": 51992 }, { "epoch": 0.9652721824723474, "grad_norm": 0.44833242893218994, "learning_rate": 5.9418981276821995e-08, "loss": 0.2819, "step": 51994 }, { "epoch": 0.9653093126097659, "grad_norm": 0.5143845081329346, "learning_rate": 5.929207550115834e-08, "loss": 0.2821, "step": 51996 }, { "epoch": 0.9653464427471846, "grad_norm": 0.26001134514808655, "learning_rate": 5.916530498875417e-08, "loss": 0.3764, "step": 51998 }, { "epoch": 0.9653835728846032, "grad_norm": 0.3874657452106476, "learning_rate": 5.9038669741332547e-08, "loss": 0.2241, "step": 52000 }, { "epoch": 0.9654207030220219, "grad_norm": 0.40083709359169006, "learning_rate": 5.8912169760619866e-08, "loss": 0.3464, "step": 52002 }, { "epoch": 0.9654578331594406, "grad_norm": 0.22208306193351746, "learning_rate": 5.878580504833475e-08, "loss": 0.1798, "step": 52004 }, { "epoch": 0.9654949632968591, "grad_norm": 0.33477073907852173, "learning_rate": 5.865957560619695e-08, "loss": 0.2475, "step": 52006 }, { "epoch": 0.9655320934342778, "grad_norm": 0.664359986782074, "learning_rate": 5.853348143592508e-08, "loss": 0.2207, "step": 52008 }, { "epoch": 0.9655692235716964, "grad_norm": 0.34503164887428284, "learning_rate": 5.8407522539235537e-08, "loss": 0.4315, "step": 52010 }, { "epoch": 0.9656063537091151, "grad_norm": 0.1442912518978119, "learning_rate": 5.828169891783919e-08, "loss": 0.1798, "step": 52012 }, { "epoch": 0.9656434838465338, "grad_norm": 0.4738776981830597, "learning_rate": 5.815601057345133e-08, "loss": 0.3199, "step": 52014 }, { "epoch": 0.9656806139839523, "grad_norm": 0.31175050139427185, "learning_rate": 5.803045750778058e-08, "loss": 0.2108, "step": 52016 }, { "epoch": 0.965717744121371, "grad_norm": 0.3600679337978363, "learning_rate": 5.7905039722535585e-08, "loss": 0.0992, "step": 52018 }, { "epoch": 0.9657548742587896, "grad_norm": 0.33089563250541687, "learning_rate": 5.777975721942275e-08, "loss": 0.0236, "step": 52020 }, { "epoch": 0.9657920043962083, "grad_norm": 0.28495630621910095, "learning_rate": 5.765461000014627e-08, "loss": 0.2472, "step": 52022 }, { "epoch": 0.965829134533627, "grad_norm": 0.28831151127815247, "learning_rate": 5.752959806641145e-08, "loss": 0.2628, "step": 52024 }, { "epoch": 0.9658662646710455, "grad_norm": 0.26042693853378296, "learning_rate": 5.740472141991582e-08, "loss": 0.2352, "step": 52026 }, { "epoch": 0.9659033948084642, "grad_norm": 0.30099377036094666, "learning_rate": 5.7279980062362464e-08, "loss": 0.3728, "step": 52028 }, { "epoch": 0.9659405249458828, "grad_norm": 0.3281489610671997, "learning_rate": 5.7155373995445575e-08, "loss": 0.2401, "step": 52030 }, { "epoch": 0.9659776550833015, "grad_norm": 0.4513225257396698, "learning_rate": 5.703090322086158e-08, "loss": 0.4553, "step": 52032 }, { "epoch": 0.9660147852207202, "grad_norm": 0.48078542947769165, "learning_rate": 5.690656774030468e-08, "loss": 0.1003, "step": 52034 }, { "epoch": 0.9660519153581387, "grad_norm": 0.3619637191295624, "learning_rate": 5.678236755546684e-08, "loss": 0.3818, "step": 52036 }, { "epoch": 0.9660890454955574, "grad_norm": 0.592544674873352, "learning_rate": 5.6658302668036734e-08, "loss": 0.2069, "step": 52038 }, { "epoch": 0.966126175632976, "grad_norm": 0.30436888337135315, "learning_rate": 5.6534373079703e-08, "loss": 0.1948, "step": 52040 }, { "epoch": 0.9661633057703947, "grad_norm": 0.32228145003318787, "learning_rate": 5.641057879215317e-08, "loss": 0.2733, "step": 52042 }, { "epoch": 0.9662004359078133, "grad_norm": 0.4426427185535431, "learning_rate": 5.628691980707146e-08, "loss": 0.3353, "step": 52044 }, { "epoch": 0.9662375660452319, "grad_norm": 0.46166566014289856, "learning_rate": 5.616339612613875e-08, "loss": 0.2531, "step": 52046 }, { "epoch": 0.9662746961826506, "grad_norm": 0.2919369637966156, "learning_rate": 5.604000775103702e-08, "loss": 0.4079, "step": 52048 }, { "epoch": 0.9663118263200692, "grad_norm": 0.2622028887271881, "learning_rate": 5.591675468344604e-08, "loss": 0.296, "step": 52050 }, { "epoch": 0.9663489564574879, "grad_norm": 0.2078421264886856, "learning_rate": 5.5793636925042246e-08, "loss": 0.4202, "step": 52052 }, { "epoch": 0.9663860865949064, "grad_norm": 0.2534765303134918, "learning_rate": 5.567065447749987e-08, "loss": 0.2572, "step": 52054 }, { "epoch": 0.9664232167323251, "grad_norm": 0.38242074847221375, "learning_rate": 5.55478073424931e-08, "loss": 0.2907, "step": 52056 }, { "epoch": 0.9664603468697438, "grad_norm": 0.37709495425224304, "learning_rate": 5.542509552169395e-08, "loss": 0.2081, "step": 52058 }, { "epoch": 0.9664974770071624, "grad_norm": 0.3213070034980774, "learning_rate": 5.5302519016771085e-08, "loss": 0.1386, "step": 52060 }, { "epoch": 0.9665346071445811, "grad_norm": 0.4885368049144745, "learning_rate": 5.518007782939428e-08, "loss": 0.1624, "step": 52062 }, { "epoch": 0.9665717372819996, "grad_norm": 0.24357494711875916, "learning_rate": 5.5057771961227744e-08, "loss": 0.3749, "step": 52064 }, { "epoch": 0.9666088674194183, "grad_norm": 0.8089112639427185, "learning_rate": 5.493560141393572e-08, "loss": 0.1917, "step": 52066 }, { "epoch": 0.966645997556837, "grad_norm": 0.40955623984336853, "learning_rate": 5.481356618918243e-08, "loss": 0.2914, "step": 52068 }, { "epoch": 0.9666831276942556, "grad_norm": 0.2952806055545807, "learning_rate": 5.469166628862765e-08, "loss": 0.3548, "step": 52070 }, { "epoch": 0.9667202578316743, "grad_norm": 0.43467700481414795, "learning_rate": 5.4569901713927844e-08, "loss": 0.2834, "step": 52072 }, { "epoch": 0.9667573879690928, "grad_norm": 0.28507158160209656, "learning_rate": 5.444827246674278e-08, "loss": 0.2506, "step": 52074 }, { "epoch": 0.9667945181065115, "grad_norm": 0.22088083624839783, "learning_rate": 5.43267785487267e-08, "loss": 0.2486, "step": 52076 }, { "epoch": 0.9668316482439302, "grad_norm": 0.3074319064617157, "learning_rate": 5.420541996153161e-08, "loss": 0.2668, "step": 52078 }, { "epoch": 0.9668687783813488, "grad_norm": 0.2029096633195877, "learning_rate": 5.4084196706810646e-08, "loss": 0.2417, "step": 52080 }, { "epoch": 0.9669059085187675, "grad_norm": 0.3142991065979004, "learning_rate": 5.396310878621136e-08, "loss": 0.2092, "step": 52082 }, { "epoch": 0.966943038656186, "grad_norm": 0.4453062415122986, "learning_rate": 5.3842156201382444e-08, "loss": 0.4936, "step": 52084 }, { "epoch": 0.9669801687936047, "grad_norm": 0.9171554446220398, "learning_rate": 5.3721338953969246e-08, "loss": 0.2611, "step": 52086 }, { "epoch": 0.9670172989310234, "grad_norm": 0.4402405321598053, "learning_rate": 5.360065704561712e-08, "loss": 0.3302, "step": 52088 }, { "epoch": 0.967054429068442, "grad_norm": 0.40600696206092834, "learning_rate": 5.3480110477965864e-08, "loss": 0.318, "step": 52090 }, { "epoch": 0.9670915592058607, "grad_norm": 0.4360716640949249, "learning_rate": 5.33596992526586e-08, "loss": 0.3749, "step": 52092 }, { "epoch": 0.9671286893432792, "grad_norm": 0.3171249330043793, "learning_rate": 5.32394233713307e-08, "loss": 0.2076, "step": 52094 }, { "epoch": 0.9671658194806979, "grad_norm": 0.3083970844745636, "learning_rate": 5.3119282835621956e-08, "loss": 0.1401, "step": 52096 }, { "epoch": 0.9672029496181166, "grad_norm": 0.36607569456100464, "learning_rate": 5.2999277647163284e-08, "loss": 0.4236, "step": 52098 }, { "epoch": 0.9672400797555352, "grad_norm": 0.33107033371925354, "learning_rate": 5.2879407807591156e-08, "loss": 0.2523, "step": 52100 }, { "epoch": 0.9672772098929538, "grad_norm": 0.35962867736816406, "learning_rate": 5.2759673318534266e-08, "loss": 0.1013, "step": 52102 }, { "epoch": 0.9673143400303724, "grad_norm": 0.290518581867218, "learning_rate": 5.2640074181623535e-08, "loss": 0.28, "step": 52104 }, { "epoch": 0.9673514701677911, "grad_norm": 0.7249562740325928, "learning_rate": 5.252061039848655e-08, "loss": 0.1727, "step": 52106 }, { "epoch": 0.9673886003052097, "grad_norm": 1.3534290790557861, "learning_rate": 5.240128197074645e-08, "loss": 0.3881, "step": 52108 }, { "epoch": 0.9674257304426284, "grad_norm": 0.3808611333370209, "learning_rate": 5.2282088900028615e-08, "loss": 0.4356, "step": 52110 }, { "epoch": 0.967462860580047, "grad_norm": 0.6429833769798279, "learning_rate": 5.216303118795396e-08, "loss": 0.3072, "step": 52112 }, { "epoch": 0.9674999907174656, "grad_norm": 0.33377113938331604, "learning_rate": 5.2044108836144525e-08, "loss": 0.2278, "step": 52114 }, { "epoch": 0.9675371208548843, "grad_norm": 0.25443726778030396, "learning_rate": 5.1925321846216794e-08, "loss": 0.2458, "step": 52116 }, { "epoch": 0.9675742509923029, "grad_norm": 0.4367438852787018, "learning_rate": 5.1806670219788357e-08, "loss": 0.3858, "step": 52118 }, { "epoch": 0.9676113811297216, "grad_norm": 0.3579873740673065, "learning_rate": 5.168815395847127e-08, "loss": 0.1641, "step": 52120 }, { "epoch": 0.9676485112671402, "grad_norm": 0.3920475244522095, "learning_rate": 5.156977306388089e-08, "loss": 0.2737, "step": 52122 }, { "epoch": 0.9676856414045588, "grad_norm": 0.34164249897003174, "learning_rate": 5.145152753762595e-08, "loss": 0.4361, "step": 52124 }, { "epoch": 0.9677227715419775, "grad_norm": 0.6192024350166321, "learning_rate": 5.133341738131625e-08, "loss": 0.3249, "step": 52126 }, { "epoch": 0.9677599016793961, "grad_norm": 0.4364921748638153, "learning_rate": 5.121544259656053e-08, "loss": 0.3604, "step": 52128 }, { "epoch": 0.9677970318168148, "grad_norm": 0.2823511064052582, "learning_rate": 5.109760318496082e-08, "loss": 0.2594, "step": 52130 }, { "epoch": 0.9678341619542334, "grad_norm": 0.4863722324371338, "learning_rate": 5.0979899148123624e-08, "loss": 0.2451, "step": 52132 }, { "epoch": 0.967871292091652, "grad_norm": 0.3227176070213318, "learning_rate": 5.086233048764877e-08, "loss": 0.2854, "step": 52134 }, { "epoch": 0.9679084222290707, "grad_norm": 0.26267120242118835, "learning_rate": 5.07448972051372e-08, "loss": 0.2057, "step": 52136 }, { "epoch": 0.9679455523664893, "grad_norm": 0.42357468605041504, "learning_rate": 5.062759930218541e-08, "loss": 0.34, "step": 52138 }, { "epoch": 0.967982682503908, "grad_norm": 0.35861295461654663, "learning_rate": 5.051043678038992e-08, "loss": 0.4334, "step": 52140 }, { "epoch": 0.9680198126413266, "grad_norm": 0.5864925384521484, "learning_rate": 5.039340964134498e-08, "loss": 0.2277, "step": 52142 }, { "epoch": 0.9680569427787452, "grad_norm": 0.5402977466583252, "learning_rate": 5.0276517886643784e-08, "loss": 0.2578, "step": 52144 }, { "epoch": 0.9680940729161639, "grad_norm": 0.3470340371131897, "learning_rate": 5.015976151787616e-08, "loss": 0.1117, "step": 52146 }, { "epoch": 0.9681312030535825, "grad_norm": 0.44760286808013916, "learning_rate": 5.004314053663084e-08, "loss": 0.1137, "step": 52148 }, { "epoch": 0.9681683331910012, "grad_norm": 0.4367511570453644, "learning_rate": 4.9926654944495447e-08, "loss": 0.3033, "step": 52150 }, { "epoch": 0.9682054633284197, "grad_norm": 0.5647957921028137, "learning_rate": 4.981030474305426e-08, "loss": 0.3964, "step": 52152 }, { "epoch": 0.9682425934658384, "grad_norm": 0.32753831148147583, "learning_rate": 4.9694089933889353e-08, "loss": 0.2173, "step": 52154 }, { "epoch": 0.9682797236032571, "grad_norm": 0.5231319069862366, "learning_rate": 4.9578010518585015e-08, "loss": 0.3118, "step": 52156 }, { "epoch": 0.9683168537406757, "grad_norm": 0.5173414349555969, "learning_rate": 4.9462066498717764e-08, "loss": 0.2905, "step": 52158 }, { "epoch": 0.9683539838780943, "grad_norm": 0.27994275093078613, "learning_rate": 4.934625787586744e-08, "loss": 0.1202, "step": 52160 }, { "epoch": 0.9683911140155129, "grad_norm": 0.4744930863380432, "learning_rate": 4.923058465160724e-08, "loss": 0.3823, "step": 52162 }, { "epoch": 0.9684282441529316, "grad_norm": 0.25886625051498413, "learning_rate": 4.911504682751367e-08, "loss": 0.31, "step": 52164 }, { "epoch": 0.9684653742903503, "grad_norm": 0.4309345781803131, "learning_rate": 4.899964440515881e-08, "loss": 0.4171, "step": 52166 }, { "epoch": 0.9685025044277689, "grad_norm": 0.2908543050289154, "learning_rate": 4.8884377386111407e-08, "loss": 0.2764, "step": 52168 }, { "epoch": 0.9685396345651875, "grad_norm": 0.3983971178531647, "learning_rate": 4.87692457719402e-08, "loss": 0.3448, "step": 52170 }, { "epoch": 0.9685767647026061, "grad_norm": 0.4219726622104645, "learning_rate": 4.865424956421283e-08, "loss": 0.3789, "step": 52172 }, { "epoch": 0.9686138948400248, "grad_norm": 0.8485954403877258, "learning_rate": 4.853938876449249e-08, "loss": 0.2404, "step": 52174 }, { "epoch": 0.9686510249774435, "grad_norm": 0.28808602690696716, "learning_rate": 4.8424663374343485e-08, "loss": 0.3382, "step": 52176 }, { "epoch": 0.9686881551148621, "grad_norm": 0.3417855501174927, "learning_rate": 4.831007339532678e-08, "loss": 0.1123, "step": 52178 }, { "epoch": 0.9687252852522807, "grad_norm": 0.37864014506340027, "learning_rate": 4.8195618829001145e-08, "loss": 0.3089, "step": 52180 }, { "epoch": 0.9687624153896993, "grad_norm": 0.35535019636154175, "learning_rate": 4.808129967692421e-08, "loss": 0.2048, "step": 52182 }, { "epoch": 0.968799545527118, "grad_norm": 0.9459232091903687, "learning_rate": 4.7967115940650287e-08, "loss": 0.1914, "step": 52184 }, { "epoch": 0.9688366756645367, "grad_norm": 0.4172569215297699, "learning_rate": 4.7853067621734806e-08, "loss": 0.354, "step": 52186 }, { "epoch": 0.9688738058019553, "grad_norm": 0.5325927734375, "learning_rate": 4.773915472172874e-08, "loss": 0.3308, "step": 52188 }, { "epoch": 0.9689109359393739, "grad_norm": 0.7270974516868591, "learning_rate": 4.762537724218308e-08, "loss": 0.3195, "step": 52190 }, { "epoch": 0.9689480660767925, "grad_norm": 0.2682584524154663, "learning_rate": 4.751173518464436e-08, "loss": 0.0756, "step": 52192 }, { "epoch": 0.9689851962142112, "grad_norm": 0.2906855642795563, "learning_rate": 4.739822855066023e-08, "loss": 0.2218, "step": 52194 }, { "epoch": 0.9690223263516299, "grad_norm": 0.3794834315776825, "learning_rate": 4.7284857341773903e-08, "loss": 0.2578, "step": 52196 }, { "epoch": 0.9690594564890485, "grad_norm": 0.5507210493087769, "learning_rate": 4.7171621559529704e-08, "loss": 0.4141, "step": 52198 }, { "epoch": 0.9690965866264671, "grad_norm": 0.3101101815700531, "learning_rate": 4.70585212054675e-08, "loss": 0.1847, "step": 52200 }, { "epoch": 0.9691337167638857, "grad_norm": 0.2991110682487488, "learning_rate": 4.694555628112607e-08, "loss": 0.2987, "step": 52202 }, { "epoch": 0.9691708469013044, "grad_norm": 0.6705873012542725, "learning_rate": 4.683272678804307e-08, "loss": 0.1893, "step": 52204 }, { "epoch": 0.969207977038723, "grad_norm": 0.4680391550064087, "learning_rate": 4.672003272775283e-08, "loss": 0.1342, "step": 52206 }, { "epoch": 0.9692451071761417, "grad_norm": 0.7091013789176941, "learning_rate": 4.660747410178967e-08, "loss": 0.0988, "step": 52208 }, { "epoch": 0.9692822373135603, "grad_norm": 0.5330358147621155, "learning_rate": 4.649505091168571e-08, "loss": 0.2396, "step": 52210 }, { "epoch": 0.9693193674509789, "grad_norm": 0.41658705472946167, "learning_rate": 4.6382763158968615e-08, "loss": 0.4732, "step": 52212 }, { "epoch": 0.9693564975883976, "grad_norm": 0.40233591198921204, "learning_rate": 4.627061084516826e-08, "loss": 0.4603, "step": 52214 }, { "epoch": 0.9693936277258162, "grad_norm": 0.44144904613494873, "learning_rate": 4.615859397181011e-08, "loss": 0.3123, "step": 52216 }, { "epoch": 0.9694307578632348, "grad_norm": 0.38346484303474426, "learning_rate": 4.604671254041848e-08, "loss": 0.1842, "step": 52218 }, { "epoch": 0.9694678880006535, "grad_norm": 0.38805896043777466, "learning_rate": 4.5934966552515505e-08, "loss": 0.2188, "step": 52220 }, { "epoch": 0.9695050181380721, "grad_norm": 0.447449266910553, "learning_rate": 4.582335600962107e-08, "loss": 0.3932, "step": 52222 }, { "epoch": 0.9695421482754908, "grad_norm": 0.3837672472000122, "learning_rate": 4.571188091325507e-08, "loss": 0.1795, "step": 52224 }, { "epoch": 0.9695792784129094, "grad_norm": 0.44542455673217773, "learning_rate": 4.5600541264934074e-08, "loss": 0.4923, "step": 52226 }, { "epoch": 0.969616408550328, "grad_norm": 0.617743968963623, "learning_rate": 4.548933706617242e-08, "loss": 0.2639, "step": 52228 }, { "epoch": 0.9696535386877467, "grad_norm": 0.6876521110534668, "learning_rate": 4.537826831848335e-08, "loss": 0.2823, "step": 52230 }, { "epoch": 0.9696906688251653, "grad_norm": 0.5498456954956055, "learning_rate": 4.526733502337899e-08, "loss": 0.2075, "step": 52232 }, { "epoch": 0.969727798962584, "grad_norm": 0.4629049003124237, "learning_rate": 4.515653718236812e-08, "loss": 0.2346, "step": 52234 }, { "epoch": 0.9697649291000026, "grad_norm": 0.3241240978240967, "learning_rate": 4.5045874796958435e-08, "loss": 0.3661, "step": 52236 }, { "epoch": 0.9698020592374212, "grad_norm": 0.4844273626804352, "learning_rate": 4.49353478686565e-08, "loss": 0.23, "step": 52238 }, { "epoch": 0.9698391893748399, "grad_norm": 0.5575023293495178, "learning_rate": 4.482495639896445e-08, "loss": 0.2491, "step": 52240 }, { "epoch": 0.9698763195122585, "grad_norm": 0.5679180026054382, "learning_rate": 4.4714700389386635e-08, "loss": 0.2196, "step": 52242 }, { "epoch": 0.9699134496496772, "grad_norm": 0.530428409576416, "learning_rate": 4.4604579841422965e-08, "loss": 0.2097, "step": 52244 }, { "epoch": 0.9699505797870958, "grad_norm": 0.3517967462539673, "learning_rate": 4.449459475657003e-08, "loss": 0.4829, "step": 52246 }, { "epoch": 0.9699877099245144, "grad_norm": 0.4275780916213989, "learning_rate": 4.4384745136324406e-08, "loss": 0.2136, "step": 52248 }, { "epoch": 0.9700248400619331, "grad_norm": 0.4370857775211334, "learning_rate": 4.427503098218378e-08, "loss": 0.2589, "step": 52250 }, { "epoch": 0.9700619701993517, "grad_norm": 0.38713881373405457, "learning_rate": 4.416545229563807e-08, "loss": 0.3396, "step": 52252 }, { "epoch": 0.9700991003367704, "grad_norm": 0.4153943955898285, "learning_rate": 4.405600907817942e-08, "loss": 0.3046, "step": 52254 }, { "epoch": 0.970136230474189, "grad_norm": 0.5087686777114868, "learning_rate": 4.394670133129664e-08, "loss": 0.3375, "step": 52256 }, { "epoch": 0.9701733606116076, "grad_norm": 0.3228212296962738, "learning_rate": 4.383752905647853e-08, "loss": 0.0541, "step": 52258 }, { "epoch": 0.9702104907490262, "grad_norm": 0.7833784222602844, "learning_rate": 4.3728492255208364e-08, "loss": 0.4716, "step": 52260 }, { "epoch": 0.9702476208864449, "grad_norm": 0.4449281394481659, "learning_rate": 4.361959092897161e-08, "loss": 0.3141, "step": 52262 }, { "epoch": 0.9702847510238636, "grad_norm": 0.4693848788738251, "learning_rate": 4.351082507924931e-08, "loss": 0.5738, "step": 52264 }, { "epoch": 0.9703218811612822, "grad_norm": 0.4302957355976105, "learning_rate": 4.340219470752138e-08, "loss": 0.3716, "step": 52266 }, { "epoch": 0.9703590112987008, "grad_norm": 0.4023655652999878, "learning_rate": 4.3293699815266655e-08, "loss": 0.2937, "step": 52268 }, { "epoch": 0.9703961414361194, "grad_norm": 0.3765750527381897, "learning_rate": 4.318534040396061e-08, "loss": 0.2493, "step": 52270 }, { "epoch": 0.9704332715735381, "grad_norm": 0.34252360463142395, "learning_rate": 4.307711647507762e-08, "loss": 0.2458, "step": 52272 }, { "epoch": 0.9704704017109568, "grad_norm": 0.3828517496585846, "learning_rate": 4.296902803009095e-08, "loss": 0.3062, "step": 52274 }, { "epoch": 0.9705075318483753, "grad_norm": 0.385883092880249, "learning_rate": 4.286107507047055e-08, "loss": 0.3781, "step": 52276 }, { "epoch": 0.970544661985794, "grad_norm": 0.462250292301178, "learning_rate": 4.275325759768634e-08, "loss": 0.2805, "step": 52278 }, { "epoch": 0.9705817921232126, "grad_norm": 0.43409058451652527, "learning_rate": 4.2645575613204927e-08, "loss": 0.249, "step": 52280 }, { "epoch": 0.9706189222606313, "grad_norm": 0.44005900621414185, "learning_rate": 4.253802911849181e-08, "loss": 0.3421, "step": 52282 }, { "epoch": 0.97065605239805, "grad_norm": 0.2576841413974762, "learning_rate": 4.243061811500915e-08, "loss": 0.2232, "step": 52284 }, { "epoch": 0.9706931825354685, "grad_norm": 0.3891441822052002, "learning_rate": 4.232334260422022e-08, "loss": 0.3199, "step": 52286 }, { "epoch": 0.9707303126728872, "grad_norm": 0.435314804315567, "learning_rate": 4.2216202587583854e-08, "loss": 0.2707, "step": 52288 }, { "epoch": 0.9707674428103058, "grad_norm": 0.42135632038116455, "learning_rate": 4.2109198066556666e-08, "loss": 0.2789, "step": 52290 }, { "epoch": 0.9708045729477245, "grad_norm": 0.34286144375801086, "learning_rate": 4.200232904259749e-08, "loss": 0.3169, "step": 52292 }, { "epoch": 0.9708417030851432, "grad_norm": 0.6923753023147583, "learning_rate": 4.1895595517158494e-08, "loss": 0.3712, "step": 52294 }, { "epoch": 0.9708788332225617, "grad_norm": 0.4062420427799225, "learning_rate": 4.178899749169185e-08, "loss": 0.3797, "step": 52296 }, { "epoch": 0.9709159633599804, "grad_norm": 0.4525628387928009, "learning_rate": 4.168253496764974e-08, "loss": 0.2691, "step": 52298 }, { "epoch": 0.970953093497399, "grad_norm": 0.5534800887107849, "learning_rate": 4.157620794647876e-08, "loss": 0.3463, "step": 52300 }, { "epoch": 0.9709902236348177, "grad_norm": 0.37565261125564575, "learning_rate": 4.147001642962667e-08, "loss": 0.4954, "step": 52302 }, { "epoch": 0.9710273537722363, "grad_norm": 0.24253802001476288, "learning_rate": 4.1363960418538963e-08, "loss": 0.2076, "step": 52304 }, { "epoch": 0.9710644839096549, "grad_norm": 1.8483612537384033, "learning_rate": 4.125803991465782e-08, "loss": 0.2077, "step": 52306 }, { "epoch": 0.9711016140470736, "grad_norm": 0.3528026342391968, "learning_rate": 4.115225491942543e-08, "loss": 0.1916, "step": 52308 }, { "epoch": 0.9711387441844922, "grad_norm": 0.39766982197761536, "learning_rate": 4.104660543428063e-08, "loss": 0.2032, "step": 52310 }, { "epoch": 0.9711758743219109, "grad_norm": 0.3801085650920868, "learning_rate": 4.0941091460660055e-08, "loss": 0.1953, "step": 52312 }, { "epoch": 0.9712130044593295, "grad_norm": 0.34212860465049744, "learning_rate": 4.083571300000144e-08, "loss": 0.3623, "step": 52314 }, { "epoch": 0.9712501345967481, "grad_norm": 0.3319641053676605, "learning_rate": 4.073047005373698e-08, "loss": 0.1739, "step": 52316 }, { "epoch": 0.9712872647341668, "grad_norm": 0.4583672881126404, "learning_rate": 4.062536262329997e-08, "loss": 0.4254, "step": 52318 }, { "epoch": 0.9713243948715854, "grad_norm": 0.47544699907302856, "learning_rate": 4.052039071011926e-08, "loss": 0.1431, "step": 52320 }, { "epoch": 0.9713615250090041, "grad_norm": 0.3348153829574585, "learning_rate": 4.041555431562372e-08, "loss": 0.2437, "step": 52322 }, { "epoch": 0.9713986551464227, "grad_norm": 0.5156077742576599, "learning_rate": 4.031085344124108e-08, "loss": 0.3041, "step": 52324 }, { "epoch": 0.9714357852838413, "grad_norm": 0.4214031398296356, "learning_rate": 4.020628808839466e-08, "loss": 0.3558, "step": 52326 }, { "epoch": 0.97147291542126, "grad_norm": 0.27523618936538696, "learning_rate": 4.0101858258507764e-08, "loss": 0.4049, "step": 52328 }, { "epoch": 0.9715100455586786, "grad_norm": 0.2950456738471985, "learning_rate": 3.9997563953000364e-08, "loss": 0.3094, "step": 52330 }, { "epoch": 0.9715471756960973, "grad_norm": 0.4403890073299408, "learning_rate": 3.989340517329354e-08, "loss": 0.35, "step": 52332 }, { "epoch": 0.9715843058335158, "grad_norm": 0.22450010478496552, "learning_rate": 3.978938192080284e-08, "loss": 0.2865, "step": 52334 }, { "epoch": 0.9716214359709345, "grad_norm": 0.34587424993515015, "learning_rate": 3.9685494196944894e-08, "loss": 0.1388, "step": 52336 }, { "epoch": 0.9716585661083532, "grad_norm": 0.5718381404876709, "learning_rate": 3.958174200313192e-08, "loss": 0.3292, "step": 52338 }, { "epoch": 0.9716956962457718, "grad_norm": 0.5066449642181396, "learning_rate": 3.947812534077722e-08, "loss": 0.2451, "step": 52340 }, { "epoch": 0.9717328263831905, "grad_norm": 0.46121588349342346, "learning_rate": 3.937464421128967e-08, "loss": 0.5591, "step": 52342 }, { "epoch": 0.971769956520609, "grad_norm": 0.4850556552410126, "learning_rate": 3.9271298616077037e-08, "loss": 0.2448, "step": 52344 }, { "epoch": 0.9718070866580277, "grad_norm": 0.3433486521244049, "learning_rate": 3.916808855654819e-08, "loss": 0.3022, "step": 52346 }, { "epoch": 0.9718442167954464, "grad_norm": 0.5071460008621216, "learning_rate": 3.906501403410423e-08, "loss": 0.2032, "step": 52348 }, { "epoch": 0.971881346932865, "grad_norm": 0.2898614704608917, "learning_rate": 3.896207505014848e-08, "loss": 0.147, "step": 52350 }, { "epoch": 0.9719184770702837, "grad_norm": 0.4271768629550934, "learning_rate": 3.885927160608316e-08, "loss": 0.2297, "step": 52352 }, { "epoch": 0.9719556072077022, "grad_norm": 0.44398200511932373, "learning_rate": 3.875660370330603e-08, "loss": 0.1111, "step": 52354 }, { "epoch": 0.9719927373451209, "grad_norm": 0.6286386847496033, "learning_rate": 3.865407134321264e-08, "loss": 0.4717, "step": 52356 }, { "epoch": 0.9720298674825395, "grad_norm": 0.3571556806564331, "learning_rate": 3.855167452720188e-08, "loss": 0.3092, "step": 52358 }, { "epoch": 0.9720669976199582, "grad_norm": 0.34171995520591736, "learning_rate": 3.844941325666374e-08, "loss": 0.2387, "step": 52360 }, { "epoch": 0.9721041277573769, "grad_norm": 0.5210464000701904, "learning_rate": 3.834728753299044e-08, "loss": 0.2233, "step": 52362 }, { "epoch": 0.9721412578947954, "grad_norm": 0.38443824648857117, "learning_rate": 3.824529735757199e-08, "loss": 0.2382, "step": 52364 }, { "epoch": 0.9721783880322141, "grad_norm": 0.24594590067863464, "learning_rate": 3.8143442731796154e-08, "loss": 0.1391, "step": 52366 }, { "epoch": 0.9722155181696327, "grad_norm": 0.27534717321395874, "learning_rate": 3.804172365704961e-08, "loss": 0.2746, "step": 52368 }, { "epoch": 0.9722526483070514, "grad_norm": 0.258372038602829, "learning_rate": 3.794014013471459e-08, "loss": 0.3865, "step": 52370 }, { "epoch": 0.9722897784444701, "grad_norm": 0.5005186200141907, "learning_rate": 3.783869216617553e-08, "loss": 0.3906, "step": 52372 }, { "epoch": 0.9723269085818886, "grad_norm": 0.7120698690414429, "learning_rate": 3.773737975281133e-08, "loss": 0.3128, "step": 52374 }, { "epoch": 0.9723640387193073, "grad_norm": 0.3213861286640167, "learning_rate": 3.763620289600089e-08, "loss": 0.1925, "step": 52376 }, { "epoch": 0.9724011688567259, "grad_norm": 0.5673524737358093, "learning_rate": 3.753516159712089e-08, "loss": 0.4235, "step": 52378 }, { "epoch": 0.9724382989941446, "grad_norm": 0.6401824951171875, "learning_rate": 3.7434255857545786e-08, "loss": 0.2199, "step": 52380 }, { "epoch": 0.9724754291315633, "grad_norm": 0.3292693793773651, "learning_rate": 3.733348567865003e-08, "loss": 0.1266, "step": 52382 }, { "epoch": 0.9725125592689818, "grad_norm": 0.4881197214126587, "learning_rate": 3.7232851061803635e-08, "loss": 0.307, "step": 52384 }, { "epoch": 0.9725496894064005, "grad_norm": 0.4343830645084381, "learning_rate": 3.713235200837662e-08, "loss": 0.2965, "step": 52386 }, { "epoch": 0.9725868195438191, "grad_norm": 0.20725177228450775, "learning_rate": 3.7031988519735664e-08, "loss": 0.3818, "step": 52388 }, { "epoch": 0.9726239496812378, "grad_norm": 0.3907625675201416, "learning_rate": 3.6931760597246345e-08, "loss": 0.4011, "step": 52390 }, { "epoch": 0.9726610798186565, "grad_norm": 0.47869133949279785, "learning_rate": 3.683166824227313e-08, "loss": 0.5723, "step": 52392 }, { "epoch": 0.972698209956075, "grad_norm": 0.4184792935848236, "learning_rate": 3.673171145617827e-08, "loss": 0.3221, "step": 52394 }, { "epoch": 0.9727353400934937, "grad_norm": 0.5446518063545227, "learning_rate": 3.663189024032177e-08, "loss": 0.1639, "step": 52396 }, { "epoch": 0.9727724702309123, "grad_norm": 0.2734091281890869, "learning_rate": 3.6532204596060326e-08, "loss": 0.3196, "step": 52398 }, { "epoch": 0.972809600368331, "grad_norm": 0.6280050277709961, "learning_rate": 3.6432654524751755e-08, "loss": 0.1841, "step": 52400 }, { "epoch": 0.9728467305057497, "grad_norm": 0.39039379358291626, "learning_rate": 3.6333240027750515e-08, "loss": 0.18, "step": 52402 }, { "epoch": 0.9728838606431682, "grad_norm": 0.5928041338920593, "learning_rate": 3.623396110640887e-08, "loss": 0.2752, "step": 52404 }, { "epoch": 0.9729209907805869, "grad_norm": 0.46616432070732117, "learning_rate": 3.6134817762079053e-08, "loss": 0.3744, "step": 52406 }, { "epoch": 0.9729581209180055, "grad_norm": 0.3793087899684906, "learning_rate": 3.6035809996108894e-08, "loss": 0.1487, "step": 52408 }, { "epoch": 0.9729952510554242, "grad_norm": 0.3221988379955292, "learning_rate": 3.5936937809845087e-08, "loss": 0.2328, "step": 52410 }, { "epoch": 0.9730323811928427, "grad_norm": 0.5459412932395935, "learning_rate": 3.5838201204634327e-08, "loss": 0.1445, "step": 52412 }, { "epoch": 0.9730695113302614, "grad_norm": 0.43969154357910156, "learning_rate": 3.573960018181999e-08, "loss": 0.2808, "step": 52414 }, { "epoch": 0.9731066414676801, "grad_norm": 0.36072394251823425, "learning_rate": 3.564113474274211e-08, "loss": 0.2578, "step": 52416 }, { "epoch": 0.9731437716050987, "grad_norm": 0.35191261768341064, "learning_rate": 3.5542804888742954e-08, "loss": 0.1617, "step": 52418 }, { "epoch": 0.9731809017425174, "grad_norm": 0.46096497774124146, "learning_rate": 3.544461062115812e-08, "loss": 0.3327, "step": 52420 }, { "epoch": 0.9732180318799359, "grad_norm": 0.36141785979270935, "learning_rate": 3.5346551941325414e-08, "loss": 0.2449, "step": 52422 }, { "epoch": 0.9732551620173546, "grad_norm": 0.30802807211875916, "learning_rate": 3.5248628850578224e-08, "loss": 0.2961, "step": 52424 }, { "epoch": 0.9732922921547733, "grad_norm": 0.4033017158508301, "learning_rate": 3.515084135024993e-08, "loss": 0.3185, "step": 52426 }, { "epoch": 0.9733294222921919, "grad_norm": 0.32289233803749084, "learning_rate": 3.505318944166947e-08, "loss": 0.1804, "step": 52428 }, { "epoch": 0.9733665524296106, "grad_norm": 0.24146059155464172, "learning_rate": 3.495567312616688e-08, "loss": 0.1335, "step": 52430 }, { "epoch": 0.9734036825670291, "grad_norm": 0.41892123222351074, "learning_rate": 3.4858292405069996e-08, "loss": 0.1674, "step": 52432 }, { "epoch": 0.9734408127044478, "grad_norm": 0.34480446577072144, "learning_rate": 3.47610472797022e-08, "loss": 0.1479, "step": 52434 }, { "epoch": 0.9734779428418665, "grad_norm": 0.31480976939201355, "learning_rate": 3.4663937751386875e-08, "loss": 0.4589, "step": 52436 }, { "epoch": 0.9735150729792851, "grad_norm": 0.5684669017791748, "learning_rate": 3.45669638214452e-08, "loss": 0.5442, "step": 52438 }, { "epoch": 0.9735522031167038, "grad_norm": 0.2442110776901245, "learning_rate": 3.4470125491198324e-08, "loss": 0.433, "step": 52440 }, { "epoch": 0.9735893332541223, "grad_norm": 0.4478112757205963, "learning_rate": 3.437342276196187e-08, "loss": 0.2489, "step": 52442 }, { "epoch": 0.973626463391541, "grad_norm": 0.29075488448143005, "learning_rate": 3.427685563505256e-08, "loss": 0.3118, "step": 52444 }, { "epoch": 0.9736635935289597, "grad_norm": 0.5737714767456055, "learning_rate": 3.4180424111784905e-08, "loss": 0.2905, "step": 52446 }, { "epoch": 0.9737007236663783, "grad_norm": 0.47367018461227417, "learning_rate": 3.4084128193471176e-08, "loss": 0.4879, "step": 52448 }, { "epoch": 0.973737853803797, "grad_norm": 0.33778467774391174, "learning_rate": 3.3987967881420333e-08, "loss": 0.213, "step": 52450 }, { "epoch": 0.9737749839412155, "grad_norm": 0.4188460409641266, "learning_rate": 3.389194317694133e-08, "loss": 0.2444, "step": 52452 }, { "epoch": 0.9738121140786342, "grad_norm": 0.48974165320396423, "learning_rate": 3.3796054081342014e-08, "loss": 0.3504, "step": 52454 }, { "epoch": 0.9738492442160528, "grad_norm": 0.3058036267757416, "learning_rate": 3.3700300595924664e-08, "loss": 0.1649, "step": 52456 }, { "epoch": 0.9738863743534715, "grad_norm": 0.47437435388565063, "learning_rate": 3.360468272199491e-08, "loss": 0.134, "step": 52458 }, { "epoch": 0.9739235044908902, "grad_norm": 0.3037269413471222, "learning_rate": 3.350920046085282e-08, "loss": 0.1854, "step": 52460 }, { "epoch": 0.9739606346283087, "grad_norm": 0.3654553294181824, "learning_rate": 3.341385381379625e-08, "loss": 0.3039, "step": 52462 }, { "epoch": 0.9739977647657274, "grad_norm": 0.36664679646492004, "learning_rate": 3.331864278212526e-08, "loss": 0.0863, "step": 52464 }, { "epoch": 0.974034894903146, "grad_norm": 0.6627983450889587, "learning_rate": 3.322356736713217e-08, "loss": 0.2081, "step": 52466 }, { "epoch": 0.9740720250405647, "grad_norm": 0.3373587727546692, "learning_rate": 3.3128627570113705e-08, "loss": 0.2321, "step": 52468 }, { "epoch": 0.9741091551779834, "grad_norm": 0.3118228614330292, "learning_rate": 3.3033823392359945e-08, "loss": 0.265, "step": 52470 }, { "epoch": 0.9741462853154019, "grad_norm": 0.4530518054962158, "learning_rate": 3.293915483516097e-08, "loss": 0.1735, "step": 52472 }, { "epoch": 0.9741834154528206, "grad_norm": 0.3990160822868347, "learning_rate": 3.284462189980575e-08, "loss": 0.2922, "step": 52474 }, { "epoch": 0.9742205455902392, "grad_norm": 0.5626431107521057, "learning_rate": 3.2750224587579924e-08, "loss": 0.3601, "step": 52476 }, { "epoch": 0.9742576757276579, "grad_norm": 0.6035019755363464, "learning_rate": 3.2655962899768027e-08, "loss": 0.3731, "step": 52478 }, { "epoch": 0.9742948058650766, "grad_norm": 0.28266674280166626, "learning_rate": 3.256183683765346e-08, "loss": 0.232, "step": 52480 }, { "epoch": 0.9743319360024951, "grad_norm": 0.3526769280433655, "learning_rate": 3.2467846402516324e-08, "loss": 0.4141, "step": 52482 }, { "epoch": 0.9743690661399138, "grad_norm": 0.5029996633529663, "learning_rate": 3.237399159563448e-08, "loss": 0.2065, "step": 52484 }, { "epoch": 0.9744061962773324, "grad_norm": 0.47557756304740906, "learning_rate": 3.228027241828691e-08, "loss": 0.3551, "step": 52486 }, { "epoch": 0.9744433264147511, "grad_norm": 0.41370537877082825, "learning_rate": 3.218668887174814e-08, "loss": 0.2723, "step": 52488 }, { "epoch": 0.9744804565521697, "grad_norm": 0.36553439497947693, "learning_rate": 3.20932409572916e-08, "loss": 0.1588, "step": 52490 }, { "epoch": 0.9745175866895883, "grad_norm": 0.3766074478626251, "learning_rate": 3.1999928676188505e-08, "loss": 0.1288, "step": 52492 }, { "epoch": 0.974554716827007, "grad_norm": 0.5123952627182007, "learning_rate": 3.190675202970783e-08, "loss": 0.2449, "step": 52494 }, { "epoch": 0.9745918469644256, "grad_norm": 0.22378191351890564, "learning_rate": 3.181371101911968e-08, "loss": 0.2137, "step": 52496 }, { "epoch": 0.9746289771018443, "grad_norm": 0.3810320794582367, "learning_rate": 3.172080564568858e-08, "loss": 0.3039, "step": 52498 }, { "epoch": 0.974666107239263, "grad_norm": 0.46907705068588257, "learning_rate": 3.162803591067909e-08, "loss": 0.1964, "step": 52500 }, { "epoch": 0.9747032373766815, "grad_norm": 0.39222970604896545, "learning_rate": 3.1535401815352416e-08, "loss": 0.2044, "step": 52502 }, { "epoch": 0.9747403675141002, "grad_norm": 0.5220727324485779, "learning_rate": 3.144290336096978e-08, "loss": 0.2374, "step": 52504 }, { "epoch": 0.9747774976515188, "grad_norm": 0.2719477117061615, "learning_rate": 3.1350540548791276e-08, "loss": 0.1044, "step": 52506 }, { "epoch": 0.9748146277889375, "grad_norm": 0.3455660343170166, "learning_rate": 3.1258313380070345e-08, "loss": 0.22, "step": 52508 }, { "epoch": 0.974851757926356, "grad_norm": 0.34028542041778564, "learning_rate": 3.1166221856065995e-08, "loss": 0.2956, "step": 52510 }, { "epoch": 0.9748888880637747, "grad_norm": 0.33966779708862305, "learning_rate": 3.107426597802832e-08, "loss": 0.5051, "step": 52512 }, { "epoch": 0.9749260182011934, "grad_norm": 0.5566272139549255, "learning_rate": 3.0982445747209656e-08, "loss": 0.2726, "step": 52514 }, { "epoch": 0.974963148338612, "grad_norm": 0.3141480088233948, "learning_rate": 3.0890761164859004e-08, "loss": 0.3139, "step": 52516 }, { "epoch": 0.9750002784760307, "grad_norm": 0.2885169982910156, "learning_rate": 3.0799212232224265e-08, "loss": 0.233, "step": 52518 }, { "epoch": 0.9750374086134492, "grad_norm": 0.38074564933776855, "learning_rate": 3.0707798950551095e-08, "loss": 0.1124, "step": 52520 }, { "epoch": 0.9750745387508679, "grad_norm": 0.49517300724983215, "learning_rate": 3.061652132108295e-08, "loss": 0.3213, "step": 52522 }, { "epoch": 0.9751116688882866, "grad_norm": 0.5436496734619141, "learning_rate": 3.052537934506328e-08, "loss": 0.3126, "step": 52524 }, { "epoch": 0.9751487990257052, "grad_norm": 0.38221463561058044, "learning_rate": 3.043437302372998e-08, "loss": 0.2795, "step": 52526 }, { "epoch": 0.9751859291631239, "grad_norm": 0.2784236967563629, "learning_rate": 3.034350235832317e-08, "loss": 0.3699, "step": 52528 }, { "epoch": 0.9752230593005424, "grad_norm": 0.2580036520957947, "learning_rate": 3.025276735007854e-08, "loss": 0.3234, "step": 52530 }, { "epoch": 0.9752601894379611, "grad_norm": 0.37121567130088806, "learning_rate": 3.016216800023064e-08, "loss": 0.0849, "step": 52532 }, { "epoch": 0.9752973195753798, "grad_norm": 0.23808827996253967, "learning_rate": 3.007170431001294e-08, "loss": 0.2437, "step": 52534 }, { "epoch": 0.9753344497127984, "grad_norm": 0.2764566242694855, "learning_rate": 2.998137628065556e-08, "loss": 0.2789, "step": 52536 }, { "epoch": 0.975371579850217, "grad_norm": 0.39689087867736816, "learning_rate": 2.9891183913387525e-08, "loss": 0.3759, "step": 52538 }, { "epoch": 0.9754087099876356, "grad_norm": 0.3870512545108795, "learning_rate": 2.980112720943784e-08, "loss": 0.1843, "step": 52540 }, { "epoch": 0.9754458401250543, "grad_norm": 0.5041999816894531, "learning_rate": 2.9711206170029983e-08, "loss": 0.2334, "step": 52542 }, { "epoch": 0.975482970262473, "grad_norm": 0.3743602931499481, "learning_rate": 2.9621420796387414e-08, "loss": 0.2127, "step": 52544 }, { "epoch": 0.9755201003998916, "grad_norm": 0.215674489736557, "learning_rate": 2.9531771089733598e-08, "loss": 0.3453, "step": 52546 }, { "epoch": 0.9755572305373102, "grad_norm": 0.34515440464019775, "learning_rate": 2.944225705128756e-08, "loss": 0.4175, "step": 52548 }, { "epoch": 0.9755943606747288, "grad_norm": 0.25181716680526733, "learning_rate": 2.9352878682266106e-08, "loss": 0.1931, "step": 52550 }, { "epoch": 0.9756314908121475, "grad_norm": 0.35617464780807495, "learning_rate": 2.926363598388715e-08, "loss": 0.1598, "step": 52552 }, { "epoch": 0.9756686209495661, "grad_norm": 0.32083582878112793, "learning_rate": 2.9174528957364167e-08, "loss": 0.3056, "step": 52554 }, { "epoch": 0.9757057510869848, "grad_norm": 0.3217821717262268, "learning_rate": 2.908555760390952e-08, "loss": 0.2996, "step": 52556 }, { "epoch": 0.9757428812244034, "grad_norm": 0.3042164146900177, "learning_rate": 2.899672192473446e-08, "loss": 0.1812, "step": 52558 }, { "epoch": 0.975780011361822, "grad_norm": 0.4111628234386444, "learning_rate": 2.8908021921046914e-08, "loss": 0.4655, "step": 52560 }, { "epoch": 0.9758171414992407, "grad_norm": 0.2899168133735657, "learning_rate": 2.88194575940548e-08, "loss": 0.1325, "step": 52562 }, { "epoch": 0.9758542716366593, "grad_norm": 0.34224218130111694, "learning_rate": 2.8731028944962714e-08, "loss": 0.2113, "step": 52564 }, { "epoch": 0.975891401774078, "grad_norm": 0.538062334060669, "learning_rate": 2.8642735974973024e-08, "loss": 0.3183, "step": 52566 }, { "epoch": 0.9759285319114966, "grad_norm": 0.3536325693130493, "learning_rate": 2.8554578685289214e-08, "loss": 0.294, "step": 52568 }, { "epoch": 0.9759656620489152, "grad_norm": 0.3570454716682434, "learning_rate": 2.8466557077108103e-08, "loss": 0.4027, "step": 52570 }, { "epoch": 0.9760027921863339, "grad_norm": 0.3187287747859955, "learning_rate": 2.8378671151630954e-08, "loss": 0.2558, "step": 52572 }, { "epoch": 0.9760399223237525, "grad_norm": 0.40225204825401306, "learning_rate": 2.8290920910050146e-08, "loss": 0.2772, "step": 52574 }, { "epoch": 0.9760770524611712, "grad_norm": 0.32322025299072266, "learning_rate": 2.820330635356139e-08, "loss": 0.3288, "step": 52576 }, { "epoch": 0.9761141825985898, "grad_norm": 0.4910758435726166, "learning_rate": 2.811582748335706e-08, "loss": 0.2053, "step": 52578 }, { "epoch": 0.9761513127360084, "grad_norm": 0.6543188691139221, "learning_rate": 2.8028484300627322e-08, "loss": 0.226, "step": 52580 }, { "epoch": 0.9761884428734271, "grad_norm": 0.3032397925853729, "learning_rate": 2.7941276806560112e-08, "loss": 0.221, "step": 52582 }, { "epoch": 0.9762255730108457, "grad_norm": 0.3602171242237091, "learning_rate": 2.785420500234226e-08, "loss": 0.2157, "step": 52584 }, { "epoch": 0.9762627031482644, "grad_norm": 0.48271968960762024, "learning_rate": 2.7767268889158373e-08, "loss": 0.2133, "step": 52586 }, { "epoch": 0.976299833285683, "grad_norm": 0.19905145466327667, "learning_rate": 2.7680468468191947e-08, "loss": 0.2804, "step": 52588 }, { "epoch": 0.9763369634231016, "grad_norm": 0.21626682579517365, "learning_rate": 2.759380374062426e-08, "loss": 0.3924, "step": 52590 }, { "epoch": 0.9763740935605203, "grad_norm": 0.35485005378723145, "learning_rate": 2.750727470763326e-08, "loss": 0.1978, "step": 52592 }, { "epoch": 0.9764112236979389, "grad_norm": 0.31072521209716797, "learning_rate": 2.742088137039689e-08, "loss": 0.0933, "step": 52594 }, { "epoch": 0.9764483538353576, "grad_norm": 0.5642861127853394, "learning_rate": 2.733462373009199e-08, "loss": 0.2589, "step": 52596 }, { "epoch": 0.9764854839727762, "grad_norm": 0.27900439500808716, "learning_rate": 2.7248501787890957e-08, "loss": 0.3788, "step": 52598 }, { "epoch": 0.9765226141101948, "grad_norm": 0.5054538249969482, "learning_rate": 2.7162515544966182e-08, "loss": 0.2025, "step": 52600 }, { "epoch": 0.9765597442476135, "grad_norm": 0.32331642508506775, "learning_rate": 2.7076665002486734e-08, "loss": 0.4825, "step": 52602 }, { "epoch": 0.9765968743850321, "grad_norm": 0.37884241342544556, "learning_rate": 2.6990950161622787e-08, "loss": 0.2509, "step": 52604 }, { "epoch": 0.9766340045224507, "grad_norm": 0.5432556867599487, "learning_rate": 2.6905371023537852e-08, "loss": 0.2735, "step": 52606 }, { "epoch": 0.9766711346598693, "grad_norm": 0.399818480014801, "learning_rate": 2.6819927589398775e-08, "loss": 0.2318, "step": 52608 }, { "epoch": 0.976708264797288, "grad_norm": 0.25092265009880066, "learning_rate": 2.673461986036796e-08, "loss": 0.3856, "step": 52610 }, { "epoch": 0.9767453949347067, "grad_norm": 0.48335975408554077, "learning_rate": 2.664944783760448e-08, "loss": 0.2507, "step": 52612 }, { "epoch": 0.9767825250721253, "grad_norm": 0.5250675082206726, "learning_rate": 2.656441152226852e-08, "loss": 0.275, "step": 52614 }, { "epoch": 0.976819655209544, "grad_norm": 0.3093653917312622, "learning_rate": 2.6479510915516926e-08, "loss": 0.4943, "step": 52616 }, { "epoch": 0.9768567853469625, "grad_norm": 0.3774946630001068, "learning_rate": 2.6394746018505447e-08, "loss": 0.2235, "step": 52618 }, { "epoch": 0.9768939154843812, "grad_norm": 0.4391375482082367, "learning_rate": 2.631011683238649e-08, "loss": 0.4435, "step": 52620 }, { "epoch": 0.9769310456217999, "grad_norm": 0.2906727194786072, "learning_rate": 2.622562335831358e-08, "loss": 0.2458, "step": 52622 }, { "epoch": 0.9769681757592185, "grad_norm": 0.5192936658859253, "learning_rate": 2.6141265597433574e-08, "loss": 0.2049, "step": 52624 }, { "epoch": 0.9770053058966371, "grad_norm": 1.1134597063064575, "learning_rate": 2.6057043550896667e-08, "loss": 0.4149, "step": 52626 }, { "epoch": 0.9770424360340557, "grad_norm": 0.20615707337856293, "learning_rate": 2.5972957219847494e-08, "loss": 0.2737, "step": 52628 }, { "epoch": 0.9770795661714744, "grad_norm": 0.5077775120735168, "learning_rate": 2.58890066054307e-08, "loss": 0.1686, "step": 52630 }, { "epoch": 0.9771166963088931, "grad_norm": 0.3013204038143158, "learning_rate": 2.5805191708788703e-08, "loss": 0.1169, "step": 52632 }, { "epoch": 0.9771538264463117, "grad_norm": 0.372030645608902, "learning_rate": 2.57215125310617e-08, "loss": 0.4087, "step": 52634 }, { "epoch": 0.9771909565837303, "grad_norm": 0.3397143483161926, "learning_rate": 2.563796907338878e-08, "loss": 0.1733, "step": 52636 }, { "epoch": 0.9772280867211489, "grad_norm": 0.30003345012664795, "learning_rate": 2.5554561336906814e-08, "loss": 0.1496, "step": 52638 }, { "epoch": 0.9772652168585676, "grad_norm": 0.2165130078792572, "learning_rate": 2.5471289322749338e-08, "loss": 0.0353, "step": 52640 }, { "epoch": 0.9773023469959863, "grad_norm": 0.5763567686080933, "learning_rate": 2.5388153032051e-08, "loss": 0.3336, "step": 52642 }, { "epoch": 0.9773394771334049, "grad_norm": 0.30017635226249695, "learning_rate": 2.5305152465943116e-08, "loss": 0.1733, "step": 52644 }, { "epoch": 0.9773766072708235, "grad_norm": 0.511091947555542, "learning_rate": 2.5222287625553675e-08, "loss": 0.122, "step": 52646 }, { "epoch": 0.9774137374082421, "grad_norm": 0.32595300674438477, "learning_rate": 2.5139558512011776e-08, "loss": 0.4068, "step": 52648 }, { "epoch": 0.9774508675456608, "grad_norm": 0.2640933692455292, "learning_rate": 2.505696512644096e-08, "loss": 0.3098, "step": 52650 }, { "epoch": 0.9774879976830795, "grad_norm": 0.3979596793651581, "learning_rate": 2.497450746996699e-08, "loss": 0.0657, "step": 52652 }, { "epoch": 0.977525127820498, "grad_norm": 0.5992537140846252, "learning_rate": 2.48921855437112e-08, "loss": 0.4477, "step": 52654 }, { "epoch": 0.9775622579579167, "grad_norm": 0.3509773910045624, "learning_rate": 2.48099993487938e-08, "loss": 0.3969, "step": 52656 }, { "epoch": 0.9775993880953353, "grad_norm": 0.43008869886398315, "learning_rate": 2.4727948886333898e-08, "loss": 0.5364, "step": 52658 }, { "epoch": 0.977636518232754, "grad_norm": 0.31644877791404724, "learning_rate": 2.4646034157447264e-08, "loss": 0.1419, "step": 52660 }, { "epoch": 0.9776736483701726, "grad_norm": 0.5948702692985535, "learning_rate": 2.456425516324745e-08, "loss": 0.273, "step": 52662 }, { "epoch": 0.9777107785075912, "grad_norm": 0.3356820046901703, "learning_rate": 2.448261190484913e-08, "loss": 0.2393, "step": 52664 }, { "epoch": 0.9777479086450099, "grad_norm": 0.6614366769790649, "learning_rate": 2.4401104383361406e-08, "loss": 0.4151, "step": 52666 }, { "epoch": 0.9777850387824285, "grad_norm": 0.37577515840530396, "learning_rate": 2.431973259989562e-08, "loss": 0.3648, "step": 52668 }, { "epoch": 0.9778221689198472, "grad_norm": 0.43406444787979126, "learning_rate": 2.4238496555556434e-08, "loss": 0.281, "step": 52670 }, { "epoch": 0.9778592990572658, "grad_norm": 0.2981327176094055, "learning_rate": 2.415739625145075e-08, "loss": 0.393, "step": 52672 }, { "epoch": 0.9778964291946844, "grad_norm": 0.44977495074272156, "learning_rate": 2.407643168868212e-08, "loss": 0.3544, "step": 52674 }, { "epoch": 0.9779335593321031, "grad_norm": 0.3680245280265808, "learning_rate": 2.3995602868351897e-08, "loss": 0.1886, "step": 52676 }, { "epoch": 0.9779706894695217, "grad_norm": 0.2316843867301941, "learning_rate": 2.3914909791559195e-08, "loss": 0.364, "step": 52678 }, { "epoch": 0.9780078196069404, "grad_norm": 0.4484330415725708, "learning_rate": 2.3834352459403132e-08, "loss": 0.2541, "step": 52680 }, { "epoch": 0.978044949744359, "grad_norm": 0.42768245935440063, "learning_rate": 2.3753930872979502e-08, "loss": 0.2974, "step": 52682 }, { "epoch": 0.9780820798817776, "grad_norm": 0.49718043208122253, "learning_rate": 2.3673645033382985e-08, "loss": 0.3228, "step": 52684 }, { "epoch": 0.9781192100191963, "grad_norm": 0.3892648220062256, "learning_rate": 2.359349494170493e-08, "loss": 0.466, "step": 52686 }, { "epoch": 0.9781563401566149, "grad_norm": 0.4572296142578125, "learning_rate": 2.3513480599036686e-08, "loss": 0.1825, "step": 52688 }, { "epoch": 0.9781934702940336, "grad_norm": 0.42824915051460266, "learning_rate": 2.3433602006467383e-08, "loss": 0.3205, "step": 52690 }, { "epoch": 0.9782306004314522, "grad_norm": 0.5128491520881653, "learning_rate": 2.3353859165083925e-08, "loss": 0.4391, "step": 52692 }, { "epoch": 0.9782677305688708, "grad_norm": 0.35662809014320374, "learning_rate": 2.3274252075969893e-08, "loss": 0.2015, "step": 52694 }, { "epoch": 0.9783048607062895, "grad_norm": 0.34031981229782104, "learning_rate": 2.3194780740209976e-08, "loss": 0.2894, "step": 52696 }, { "epoch": 0.9783419908437081, "grad_norm": 0.6298449635505676, "learning_rate": 2.311544515888553e-08, "loss": 0.1317, "step": 52698 }, { "epoch": 0.9783791209811268, "grad_norm": 0.4997366666793823, "learning_rate": 2.303624533307458e-08, "loss": 0.3311, "step": 52700 }, { "epoch": 0.9784162511185454, "grad_norm": 0.41044628620147705, "learning_rate": 2.2957181263856265e-08, "loss": 0.1661, "step": 52702 }, { "epoch": 0.978453381255964, "grad_norm": 0.352595716714859, "learning_rate": 2.287825295230639e-08, "loss": 0.3509, "step": 52704 }, { "epoch": 0.9784905113933826, "grad_norm": 0.34571948647499084, "learning_rate": 2.2799460399498542e-08, "loss": 0.3981, "step": 52706 }, { "epoch": 0.9785276415308013, "grad_norm": 0.3974832594394684, "learning_rate": 2.2720803606504084e-08, "loss": 0.2851, "step": 52708 }, { "epoch": 0.97856477166822, "grad_norm": 0.36147740483283997, "learning_rate": 2.264228257439549e-08, "loss": 0.5107, "step": 52710 }, { "epoch": 0.9786019018056386, "grad_norm": 0.41298454999923706, "learning_rate": 2.256389730423858e-08, "loss": 0.181, "step": 52712 }, { "epoch": 0.9786390319430572, "grad_norm": 0.5696089863777161, "learning_rate": 2.2485647797101384e-08, "loss": 0.15, "step": 52714 }, { "epoch": 0.9786761620804758, "grad_norm": 0.36442697048187256, "learning_rate": 2.2407534054048608e-08, "loss": 0.1328, "step": 52716 }, { "epoch": 0.9787132922178945, "grad_norm": 0.16239385306835175, "learning_rate": 2.2329556076142733e-08, "loss": 0.2648, "step": 52718 }, { "epoch": 0.9787504223553132, "grad_norm": 0.32683441042900085, "learning_rate": 2.2251713864445133e-08, "loss": 0.0754, "step": 52720 }, { "epoch": 0.9787875524927317, "grad_norm": 0.3488009572029114, "learning_rate": 2.2174007420014965e-08, "loss": 0.3742, "step": 52722 }, { "epoch": 0.9788246826301504, "grad_norm": 0.3816192150115967, "learning_rate": 2.2096436743910264e-08, "loss": 0.2999, "step": 52724 }, { "epoch": 0.978861812767569, "grad_norm": 0.5605812072753906, "learning_rate": 2.2019001837184638e-08, "loss": 0.2527, "step": 52726 }, { "epoch": 0.9788989429049877, "grad_norm": 0.5688818097114563, "learning_rate": 2.1941702700892798e-08, "loss": 0.3419, "step": 52728 }, { "epoch": 0.9789360730424064, "grad_norm": 0.521503746509552, "learning_rate": 2.186453933608723e-08, "loss": 0.344, "step": 52730 }, { "epoch": 0.978973203179825, "grad_norm": 0.27724796533584595, "learning_rate": 2.17875117438171e-08, "loss": 0.4428, "step": 52732 }, { "epoch": 0.9790103333172436, "grad_norm": 0.4398658573627472, "learning_rate": 2.171061992513046e-08, "loss": 0.306, "step": 52734 }, { "epoch": 0.9790474634546622, "grad_norm": 0.4050920605659485, "learning_rate": 2.1633863881074245e-08, "loss": 0.3518, "step": 52736 }, { "epoch": 0.9790845935920809, "grad_norm": 0.4036939740180969, "learning_rate": 2.155724361269318e-08, "loss": 0.44, "step": 52738 }, { "epoch": 0.9791217237294996, "grad_norm": 0.24823527038097382, "learning_rate": 2.148075912102754e-08, "loss": 0.2675, "step": 52740 }, { "epoch": 0.9791588538669181, "grad_norm": 0.39661112427711487, "learning_rate": 2.1404410407120936e-08, "loss": 0.2928, "step": 52742 }, { "epoch": 0.9791959840043368, "grad_norm": 0.5383175611495972, "learning_rate": 2.1328197472010315e-08, "loss": 0.2731, "step": 52744 }, { "epoch": 0.9792331141417554, "grad_norm": 0.5584187507629395, "learning_rate": 2.125212031673374e-08, "loss": 0.118, "step": 52746 }, { "epoch": 0.9792702442791741, "grad_norm": 0.20835137367248535, "learning_rate": 2.117617894232482e-08, "loss": 0.227, "step": 52748 }, { "epoch": 0.9793073744165928, "grad_norm": 0.4613208770751953, "learning_rate": 2.1100373349819403e-08, "loss": 0.1014, "step": 52750 }, { "epoch": 0.9793445045540113, "grad_norm": 0.3824054002761841, "learning_rate": 2.102470354024777e-08, "loss": 0.1685, "step": 52752 }, { "epoch": 0.97938163469143, "grad_norm": 0.4447002410888672, "learning_rate": 2.09491695146391e-08, "loss": 0.3277, "step": 52754 }, { "epoch": 0.9794187648288486, "grad_norm": 0.49731025099754333, "learning_rate": 2.087377127402146e-08, "loss": 0.31, "step": 52756 }, { "epoch": 0.9794558949662673, "grad_norm": 0.24771015346050262, "learning_rate": 2.0798508819419582e-08, "loss": 0.2411, "step": 52758 }, { "epoch": 0.9794930251036859, "grad_norm": 0.5227185487747192, "learning_rate": 2.0723382151860427e-08, "loss": 0.2772, "step": 52760 }, { "epoch": 0.9795301552411045, "grad_norm": 0.2755507826805115, "learning_rate": 2.0648391272364287e-08, "loss": 0.2852, "step": 52762 }, { "epoch": 0.9795672853785232, "grad_norm": 0.45438331365585327, "learning_rate": 2.057353618195146e-08, "loss": 0.1982, "step": 52764 }, { "epoch": 0.9796044155159418, "grad_norm": 0.47954118251800537, "learning_rate": 2.0498816881641127e-08, "loss": 0.358, "step": 52766 }, { "epoch": 0.9796415456533605, "grad_norm": 0.3554628789424896, "learning_rate": 2.0424233372450253e-08, "loss": 0.3661, "step": 52768 }, { "epoch": 0.979678675790779, "grad_norm": 0.32770344614982605, "learning_rate": 2.0349785655393584e-08, "loss": 0.0854, "step": 52770 }, { "epoch": 0.9797158059281977, "grad_norm": 0.4444442391395569, "learning_rate": 2.0275473731482532e-08, "loss": 0.2301, "step": 52772 }, { "epoch": 0.9797529360656164, "grad_norm": 0.37271958589553833, "learning_rate": 2.020129760173073e-08, "loss": 0.2251, "step": 52774 }, { "epoch": 0.979790066203035, "grad_norm": 0.6411756873130798, "learning_rate": 2.0127257267146262e-08, "loss": 0.1835, "step": 52776 }, { "epoch": 0.9798271963404537, "grad_norm": 0.5964099168777466, "learning_rate": 2.0053352728736098e-08, "loss": 0.2327, "step": 52778 }, { "epoch": 0.9798643264778722, "grad_norm": 0.6121138334274292, "learning_rate": 1.9979583987506103e-08, "loss": 0.2595, "step": 52780 }, { "epoch": 0.9799014566152909, "grad_norm": 0.3534139394760132, "learning_rate": 1.9905951044461025e-08, "loss": 0.4295, "step": 52782 }, { "epoch": 0.9799385867527096, "grad_norm": 0.4625093340873718, "learning_rate": 1.9832453900601178e-08, "loss": 0.4808, "step": 52784 }, { "epoch": 0.9799757168901282, "grad_norm": 0.4284069538116455, "learning_rate": 1.9759092556929094e-08, "loss": 0.3214, "step": 52786 }, { "epoch": 0.9800128470275469, "grad_norm": 0.3997967541217804, "learning_rate": 1.968586701444064e-08, "loss": 0.264, "step": 52788 }, { "epoch": 0.9800499771649654, "grad_norm": 0.5932273268699646, "learning_rate": 1.96127772741328e-08, "loss": 0.3216, "step": 52790 }, { "epoch": 0.9800871073023841, "grad_norm": 0.3180560767650604, "learning_rate": 1.953982333700033e-08, "loss": 0.3493, "step": 52792 }, { "epoch": 0.9801242374398028, "grad_norm": 0.5678401589393616, "learning_rate": 1.9467005204036883e-08, "loss": 0.2271, "step": 52794 }, { "epoch": 0.9801613675772214, "grad_norm": 0.31910276412963867, "learning_rate": 1.9394322876231663e-08, "loss": 0.3144, "step": 52796 }, { "epoch": 0.9801984977146401, "grad_norm": 0.34138843417167664, "learning_rate": 1.9321776354573885e-08, "loss": 0.4476, "step": 52798 }, { "epoch": 0.9802356278520586, "grad_norm": 0.2658621668815613, "learning_rate": 1.9249365640051642e-08, "loss": 0.1022, "step": 52800 }, { "epoch": 0.9802727579894773, "grad_norm": 0.6638557314872742, "learning_rate": 1.9177090733649705e-08, "loss": 0.2759, "step": 52802 }, { "epoch": 0.980309888126896, "grad_norm": 0.2613357901573181, "learning_rate": 1.910495163635173e-08, "loss": 0.1718, "step": 52804 }, { "epoch": 0.9803470182643146, "grad_norm": 0.26341357827186584, "learning_rate": 1.9032948349138046e-08, "loss": 0.1402, "step": 52806 }, { "epoch": 0.9803841484017333, "grad_norm": 0.3948315382003784, "learning_rate": 1.896108087299009e-08, "loss": 0.1752, "step": 52808 }, { "epoch": 0.9804212785391518, "grad_norm": 0.6185754537582397, "learning_rate": 1.8889349208885965e-08, "loss": 0.3638, "step": 52810 }, { "epoch": 0.9804584086765705, "grad_norm": 0.4181791841983795, "learning_rate": 1.881775335780045e-08, "loss": 0.2577, "step": 52812 }, { "epoch": 0.9804955388139891, "grad_norm": 0.38397645950317383, "learning_rate": 1.8746293320708318e-08, "loss": 0.2355, "step": 52814 }, { "epoch": 0.9805326689514078, "grad_norm": 0.312391072511673, "learning_rate": 1.867496909858102e-08, "loss": 0.4889, "step": 52816 }, { "epoch": 0.9805697990888265, "grad_norm": 0.4079931974411011, "learning_rate": 1.8603780692389995e-08, "loss": 0.1835, "step": 52818 }, { "epoch": 0.980606929226245, "grad_norm": 0.24754378199577332, "learning_rate": 1.853272810310447e-08, "loss": 0.36, "step": 52820 }, { "epoch": 0.9806440593636637, "grad_norm": 0.3267788290977478, "learning_rate": 1.846181133169034e-08, "loss": 0.2409, "step": 52822 }, { "epoch": 0.9806811895010823, "grad_norm": 0.574005663394928, "learning_rate": 1.8391030379113493e-08, "loss": 0.5111, "step": 52824 }, { "epoch": 0.980718319638501, "grad_norm": 0.40877363085746765, "learning_rate": 1.8320385246335394e-08, "loss": 0.3059, "step": 52826 }, { "epoch": 0.9807554497759197, "grad_norm": 0.355730265378952, "learning_rate": 1.8249875934317483e-08, "loss": 0.296, "step": 52828 }, { "epoch": 0.9807925799133382, "grad_norm": 0.49034491181373596, "learning_rate": 1.8179502444021224e-08, "loss": 0.1473, "step": 52830 }, { "epoch": 0.9808297100507569, "grad_norm": 0.338891863822937, "learning_rate": 1.8109264776402514e-08, "loss": 0.2209, "step": 52832 }, { "epoch": 0.9808668401881755, "grad_norm": 0.3314644694328308, "learning_rate": 1.8039162932417252e-08, "loss": 0.2614, "step": 52834 }, { "epoch": 0.9809039703255942, "grad_norm": 0.46912673115730286, "learning_rate": 1.7969196913020238e-08, "loss": 0.1514, "step": 52836 }, { "epoch": 0.9809411004630129, "grad_norm": 0.5093889236450195, "learning_rate": 1.7899366719162926e-08, "loss": 0.2625, "step": 52838 }, { "epoch": 0.9809782306004314, "grad_norm": 0.4473753273487091, "learning_rate": 1.7829672351794557e-08, "loss": 0.1652, "step": 52840 }, { "epoch": 0.9810153607378501, "grad_norm": 0.2749598026275635, "learning_rate": 1.7760113811864375e-08, "loss": 0.3651, "step": 52842 }, { "epoch": 0.9810524908752687, "grad_norm": 0.4179568886756897, "learning_rate": 1.7690691100319403e-08, "loss": 0.2564, "step": 52844 }, { "epoch": 0.9810896210126874, "grad_norm": 0.3955458998680115, "learning_rate": 1.7621404218103323e-08, "loss": 0.4479, "step": 52846 }, { "epoch": 0.9811267511501061, "grad_norm": 0.432411253452301, "learning_rate": 1.7552253166158716e-08, "loss": 0.2532, "step": 52848 }, { "epoch": 0.9811638812875246, "grad_norm": 0.6299183964729309, "learning_rate": 1.7483237945428167e-08, "loss": 0.2844, "step": 52850 }, { "epoch": 0.9812010114249433, "grad_norm": 0.21975034475326538, "learning_rate": 1.74143585568487e-08, "loss": 0.2474, "step": 52852 }, { "epoch": 0.9812381415623619, "grad_norm": 0.3171994686126709, "learning_rate": 1.734561500135845e-08, "loss": 0.408, "step": 52854 }, { "epoch": 0.9812752716997806, "grad_norm": 0.46048086881637573, "learning_rate": 1.7277007279893344e-08, "loss": 0.2808, "step": 52856 }, { "epoch": 0.9813124018371991, "grad_norm": 0.7793512344360352, "learning_rate": 1.720853539338707e-08, "loss": 0.1515, "step": 52858 }, { "epoch": 0.9813495319746178, "grad_norm": 0.3635629415512085, "learning_rate": 1.7140199342770005e-08, "loss": 0.2522, "step": 52860 }, { "epoch": 0.9813866621120365, "grad_norm": 0.47209876775741577, "learning_rate": 1.7071999128972504e-08, "loss": 0.494, "step": 52862 }, { "epoch": 0.9814237922494551, "grad_norm": 0.31801649928092957, "learning_rate": 1.7003934752922723e-08, "loss": 0.3408, "step": 52864 }, { "epoch": 0.9814609223868738, "grad_norm": 0.3857996463775635, "learning_rate": 1.6936006215546584e-08, "loss": 0.2377, "step": 52866 }, { "epoch": 0.9814980525242923, "grad_norm": 0.5572075843811035, "learning_rate": 1.6868213517770016e-08, "loss": 0.1468, "step": 52868 }, { "epoch": 0.981535182661711, "grad_norm": 0.34970077872276306, "learning_rate": 1.680055666051228e-08, "loss": 0.1699, "step": 52870 }, { "epoch": 0.9815723127991297, "grad_norm": 0.36041855812072754, "learning_rate": 1.673303564469708e-08, "loss": 0.2577, "step": 52872 }, { "epoch": 0.9816094429365483, "grad_norm": 0.78822922706604, "learning_rate": 1.666565047124147e-08, "loss": 0.3168, "step": 52874 }, { "epoch": 0.981646573073967, "grad_norm": 0.3355909585952759, "learning_rate": 1.6598401141063593e-08, "loss": 0.2424, "step": 52876 }, { "epoch": 0.9816837032113855, "grad_norm": 0.3343003988265991, "learning_rate": 1.6531287655077166e-08, "loss": 0.105, "step": 52878 }, { "epoch": 0.9817208333488042, "grad_norm": 0.40963032841682434, "learning_rate": 1.6464310014195906e-08, "loss": 0.1985, "step": 52880 }, { "epoch": 0.9817579634862229, "grad_norm": 0.26679834723472595, "learning_rate": 1.6397468219331302e-08, "loss": 0.1181, "step": 52882 }, { "epoch": 0.9817950936236415, "grad_norm": 0.27392667531967163, "learning_rate": 1.633076227139263e-08, "loss": 0.2663, "step": 52884 }, { "epoch": 0.9818322237610602, "grad_norm": 0.2717020809650421, "learning_rate": 1.626419217128694e-08, "loss": 0.3258, "step": 52886 }, { "epoch": 0.9818693538984787, "grad_norm": 0.24505992233753204, "learning_rate": 1.6197757919922398e-08, "loss": 0.2452, "step": 52888 }, { "epoch": 0.9819064840358974, "grad_norm": 0.4316135346889496, "learning_rate": 1.6131459518200498e-08, "loss": 0.265, "step": 52890 }, { "epoch": 0.9819436141733161, "grad_norm": 0.34182289242744446, "learning_rate": 1.6065296967024968e-08, "loss": 0.3385, "step": 52892 }, { "epoch": 0.9819807443107347, "grad_norm": 0.47099146246910095, "learning_rate": 1.5999270267295087e-08, "loss": 0.4158, "step": 52894 }, { "epoch": 0.9820178744481534, "grad_norm": 0.3879220485687256, "learning_rate": 1.5933379419909023e-08, "loss": 0.3247, "step": 52896 }, { "epoch": 0.9820550045855719, "grad_norm": 0.5407374501228333, "learning_rate": 1.586762442576495e-08, "loss": 0.2354, "step": 52898 }, { "epoch": 0.9820921347229906, "grad_norm": 0.4070351719856262, "learning_rate": 1.5802005285755485e-08, "loss": 0.5488, "step": 52900 }, { "epoch": 0.9821292648604093, "grad_norm": 0.332881897687912, "learning_rate": 1.5736522000775466e-08, "loss": 0.3025, "step": 52902 }, { "epoch": 0.9821663949978279, "grad_norm": 0.27456822991371155, "learning_rate": 1.5671174571714186e-08, "loss": 0.1363, "step": 52904 }, { "epoch": 0.9822035251352466, "grad_norm": 0.2130303829908371, "learning_rate": 1.5605962999462044e-08, "loss": 0.0708, "step": 52906 }, { "epoch": 0.9822406552726651, "grad_norm": 0.5916491150856018, "learning_rate": 1.5540887284906103e-08, "loss": 0.417, "step": 52908 }, { "epoch": 0.9822777854100838, "grad_norm": 0.688385546207428, "learning_rate": 1.5475947428932325e-08, "loss": 0.1805, "step": 52910 }, { "epoch": 0.9823149155475024, "grad_norm": 0.3841279149055481, "learning_rate": 1.5411143432423337e-08, "loss": 0.2372, "step": 52912 }, { "epoch": 0.9823520456849211, "grad_norm": 0.27340877056121826, "learning_rate": 1.5346475296261764e-08, "loss": 0.2267, "step": 52914 }, { "epoch": 0.9823891758223398, "grad_norm": 0.4170225262641907, "learning_rate": 1.5281943021326906e-08, "loss": 0.2333, "step": 52916 }, { "epoch": 0.9824263059597583, "grad_norm": 0.41291022300720215, "learning_rate": 1.5217546608495835e-08, "loss": 0.2107, "step": 52918 }, { "epoch": 0.982463436097177, "grad_norm": 0.3946945369243622, "learning_rate": 1.5153286058647855e-08, "loss": 0.1434, "step": 52920 }, { "epoch": 0.9825005662345956, "grad_norm": 0.410515159368515, "learning_rate": 1.5089161372654483e-08, "loss": 0.1629, "step": 52922 }, { "epoch": 0.9825376963720143, "grad_norm": 0.4105881452560425, "learning_rate": 1.5025172551390575e-08, "loss": 0.2839, "step": 52924 }, { "epoch": 0.982574826509433, "grad_norm": 0.571586549282074, "learning_rate": 1.4961319595724333e-08, "loss": 0.2665, "step": 52926 }, { "epoch": 0.9826119566468515, "grad_norm": 0.4444388151168823, "learning_rate": 1.4897602506526165e-08, "loss": 0.1819, "step": 52928 }, { "epoch": 0.9826490867842702, "grad_norm": 0.2781773507595062, "learning_rate": 1.4834021284663157e-08, "loss": 0.2921, "step": 52930 }, { "epoch": 0.9826862169216888, "grad_norm": 0.49465271830558777, "learning_rate": 1.4770575931000176e-08, "loss": 0.2521, "step": 52932 }, { "epoch": 0.9827233470591075, "grad_norm": 0.3094932734966278, "learning_rate": 1.4707266446399859e-08, "loss": 0.1966, "step": 52934 }, { "epoch": 0.9827604771965261, "grad_norm": 0.4414733946323395, "learning_rate": 1.4644092831724855e-08, "loss": 0.1535, "step": 52936 }, { "epoch": 0.9827976073339447, "grad_norm": 0.5220457911491394, "learning_rate": 1.4581055087833362e-08, "loss": 0.3634, "step": 52938 }, { "epoch": 0.9828347374713634, "grad_norm": 0.46621036529541016, "learning_rate": 1.4518153215584695e-08, "loss": 0.3282, "step": 52940 }, { "epoch": 0.982871867608782, "grad_norm": 0.37622910737991333, "learning_rate": 1.4455387215833728e-08, "loss": 0.3323, "step": 52942 }, { "epoch": 0.9829089977462007, "grad_norm": 0.3686809539794922, "learning_rate": 1.4392757089434217e-08, "loss": 0.3028, "step": 52944 }, { "epoch": 0.9829461278836193, "grad_norm": 0.4885520935058594, "learning_rate": 1.4330262837239927e-08, "loss": 0.084, "step": 52946 }, { "epoch": 0.9829832580210379, "grad_norm": 0.4798552691936493, "learning_rate": 1.4267904460099069e-08, "loss": 0.5117, "step": 52948 }, { "epoch": 0.9830203881584566, "grad_norm": 0.2901044487953186, "learning_rate": 1.4205681958862073e-08, "loss": 0.302, "step": 52950 }, { "epoch": 0.9830575182958752, "grad_norm": 0.2551848590373993, "learning_rate": 1.4143595334374927e-08, "loss": 0.0837, "step": 52952 }, { "epoch": 0.9830946484332939, "grad_norm": 0.3479268252849579, "learning_rate": 1.4081644587482513e-08, "loss": 0.4864, "step": 52954 }, { "epoch": 0.9831317785707125, "grad_norm": 0.5674386620521545, "learning_rate": 1.401982971902749e-08, "loss": 0.2726, "step": 52956 }, { "epoch": 0.9831689087081311, "grad_norm": 0.29985716938972473, "learning_rate": 1.3958150729850295e-08, "loss": 0.238, "step": 52958 }, { "epoch": 0.9832060388455498, "grad_norm": 0.26672589778900146, "learning_rate": 1.389660762079137e-08, "loss": 0.402, "step": 52960 }, { "epoch": 0.9832431689829684, "grad_norm": 0.8663783669471741, "learning_rate": 1.383520039268782e-08, "loss": 0.2666, "step": 52962 }, { "epoch": 0.9832802991203871, "grad_norm": 0.3186021149158478, "learning_rate": 1.3773929046375645e-08, "loss": 0.2136, "step": 52964 }, { "epoch": 0.9833174292578056, "grad_norm": 0.22540484368801117, "learning_rate": 1.371279358268751e-08, "loss": 0.2074, "step": 52966 }, { "epoch": 0.9833545593952243, "grad_norm": 0.4237343966960907, "learning_rate": 1.3651794002456087e-08, "loss": 0.2856, "step": 52968 }, { "epoch": 0.983391689532643, "grad_norm": 0.4007505774497986, "learning_rate": 1.3590930306511819e-08, "loss": 0.3496, "step": 52970 }, { "epoch": 0.9834288196700616, "grad_norm": 0.4818536937236786, "learning_rate": 1.353020249568071e-08, "loss": 0.2505, "step": 52972 }, { "epoch": 0.9834659498074803, "grad_norm": 0.32615578174591064, "learning_rate": 1.3469610570792103e-08, "loss": 0.372, "step": 52974 }, { "epoch": 0.9835030799448988, "grad_norm": 0.3479304015636444, "learning_rate": 1.3409154532668667e-08, "loss": 0.2359, "step": 52976 }, { "epoch": 0.9835402100823175, "grad_norm": 0.4243628680706024, "learning_rate": 1.334883438213308e-08, "loss": 0.2649, "step": 52978 }, { "epoch": 0.9835773402197362, "grad_norm": 0.27116644382476807, "learning_rate": 1.3288650120005797e-08, "loss": 0.256, "step": 52980 }, { "epoch": 0.9836144703571548, "grad_norm": 0.4559706151485443, "learning_rate": 1.3228601747107272e-08, "loss": 0.2736, "step": 52982 }, { "epoch": 0.9836516004945735, "grad_norm": 0.34350845217704773, "learning_rate": 1.316868926425352e-08, "loss": 0.267, "step": 52984 }, { "epoch": 0.983688730631992, "grad_norm": 0.6091378331184387, "learning_rate": 1.3108912672259445e-08, "loss": 0.1935, "step": 52986 }, { "epoch": 0.9837258607694107, "grad_norm": 0.49014905095100403, "learning_rate": 1.3049271971938838e-08, "loss": 0.2191, "step": 52988 }, { "epoch": 0.9837629909068294, "grad_norm": 0.6771931648254395, "learning_rate": 1.2989767164103273e-08, "loss": 0.1934, "step": 52990 }, { "epoch": 0.983800121044248, "grad_norm": 0.3722216784954071, "learning_rate": 1.2930398249562103e-08, "loss": 0.3798, "step": 52992 }, { "epoch": 0.9838372511816666, "grad_norm": 0.5458692312240601, "learning_rate": 1.2871165229124683e-08, "loss": 0.1906, "step": 52994 }, { "epoch": 0.9838743813190852, "grad_norm": 0.4778379797935486, "learning_rate": 1.281206810359481e-08, "loss": 0.2864, "step": 52996 }, { "epoch": 0.9839115114565039, "grad_norm": 0.37623992562294006, "learning_rate": 1.27531068737774e-08, "loss": 0.1773, "step": 52998 }, { "epoch": 0.9839486415939226, "grad_norm": 0.2356218695640564, "learning_rate": 1.269428154047514e-08, "loss": 0.1914, "step": 53000 }, { "epoch": 0.9839857717313412, "grad_norm": 0.3305864930152893, "learning_rate": 1.2635592104488502e-08, "loss": 0.3145, "step": 53002 }, { "epoch": 0.9840229018687598, "grad_norm": 0.5014298558235168, "learning_rate": 1.257703856661574e-08, "loss": 0.177, "step": 53004 }, { "epoch": 0.9840600320061784, "grad_norm": 0.3790280818939209, "learning_rate": 1.251862092765399e-08, "loss": 0.1949, "step": 53006 }, { "epoch": 0.9840971621435971, "grad_norm": 0.17900466918945312, "learning_rate": 1.2460339188398174e-08, "loss": 0.2662, "step": 53008 }, { "epoch": 0.9841342922810157, "grad_norm": 0.353831946849823, "learning_rate": 1.2402193349639879e-08, "loss": 0.2156, "step": 53010 }, { "epoch": 0.9841714224184344, "grad_norm": 0.31914785504341125, "learning_rate": 1.2344183412171806e-08, "loss": 0.352, "step": 53012 }, { "epoch": 0.984208552555853, "grad_norm": 0.33785805106163025, "learning_rate": 1.2286309376783323e-08, "loss": 0.2257, "step": 53014 }, { "epoch": 0.9842456826932716, "grad_norm": 0.45867738127708435, "learning_rate": 1.2228571244261578e-08, "loss": 0.2575, "step": 53016 }, { "epoch": 0.9842828128306903, "grad_norm": 0.28101497888565063, "learning_rate": 1.2170969015391499e-08, "loss": 0.338, "step": 53018 }, { "epoch": 0.9843199429681089, "grad_norm": 0.3125835359096527, "learning_rate": 1.2113502690958011e-08, "loss": 0.0865, "step": 53020 }, { "epoch": 0.9843570731055276, "grad_norm": 0.340982586145401, "learning_rate": 1.2056172271742717e-08, "loss": 0.262, "step": 53022 }, { "epoch": 0.9843942032429462, "grad_norm": 0.6768050193786621, "learning_rate": 1.1998977758524987e-08, "loss": 0.415, "step": 53024 }, { "epoch": 0.9844313333803648, "grad_norm": 0.36077985167503357, "learning_rate": 1.1941919152084203e-08, "loss": 0.3354, "step": 53026 }, { "epoch": 0.9844684635177835, "grad_norm": 0.43528828024864197, "learning_rate": 1.1884996453196406e-08, "loss": 0.2371, "step": 53028 }, { "epoch": 0.9845055936552021, "grad_norm": 0.21479077637195587, "learning_rate": 1.1828209662635425e-08, "loss": 0.2092, "step": 53030 }, { "epoch": 0.9845427237926208, "grad_norm": 0.5424080491065979, "learning_rate": 1.1771558781173976e-08, "loss": 0.3202, "step": 53032 }, { "epoch": 0.9845798539300394, "grad_norm": 0.36951112747192383, "learning_rate": 1.171504380958366e-08, "loss": 0.2445, "step": 53034 }, { "epoch": 0.984616984067458, "grad_norm": 0.494515061378479, "learning_rate": 1.1658664748633863e-08, "loss": 0.2434, "step": 53036 }, { "epoch": 0.9846541142048767, "grad_norm": 0.30795934796333313, "learning_rate": 1.1602421599090641e-08, "loss": 0.3617, "step": 53038 }, { "epoch": 0.9846912443422953, "grad_norm": 0.4298781156539917, "learning_rate": 1.1546314361720046e-08, "loss": 0.4055, "step": 53040 }, { "epoch": 0.984728374479714, "grad_norm": 0.5600579380989075, "learning_rate": 1.1490343037284801e-08, "loss": 0.4086, "step": 53042 }, { "epoch": 0.9847655046171326, "grad_norm": 0.432710200548172, "learning_rate": 1.143450762654652e-08, "loss": 0.2066, "step": 53044 }, { "epoch": 0.9848026347545512, "grad_norm": 0.4219384789466858, "learning_rate": 1.1378808130265705e-08, "loss": 0.1525, "step": 53046 }, { "epoch": 0.9848397648919699, "grad_norm": 0.4071643054485321, "learning_rate": 1.132324454920064e-08, "loss": 0.5499, "step": 53048 }, { "epoch": 0.9848768950293885, "grad_norm": 0.3895339369773865, "learning_rate": 1.1267816884106275e-08, "loss": 0.3011, "step": 53050 }, { "epoch": 0.9849140251668071, "grad_norm": 0.4749773144721985, "learning_rate": 1.1212525135737563e-08, "loss": 0.2704, "step": 53052 }, { "epoch": 0.9849511553042258, "grad_norm": 0.560800313949585, "learning_rate": 1.1157369304846122e-08, "loss": 0.327, "step": 53054 }, { "epoch": 0.9849882854416444, "grad_norm": 0.46202149987220764, "learning_rate": 1.1102349392182466e-08, "loss": 0.1878, "step": 53056 }, { "epoch": 0.9850254155790631, "grad_norm": 0.2849270701408386, "learning_rate": 1.1047465398495993e-08, "loss": 0.2057, "step": 53058 }, { "epoch": 0.9850625457164817, "grad_norm": 0.37653493881225586, "learning_rate": 1.0992717324532776e-08, "loss": 0.2182, "step": 53060 }, { "epoch": 0.9850996758539003, "grad_norm": 0.34469857811927795, "learning_rate": 1.0938105171038882e-08, "loss": 0.2982, "step": 53062 }, { "epoch": 0.9851368059913189, "grad_norm": 0.46987178921699524, "learning_rate": 1.0883628938755942e-08, "loss": 0.2527, "step": 53064 }, { "epoch": 0.9851739361287376, "grad_norm": 0.36580121517181396, "learning_rate": 1.0829288628426694e-08, "loss": 0.2096, "step": 53066 }, { "epoch": 0.9852110662661563, "grad_norm": 0.394609659910202, "learning_rate": 1.0775084240789435e-08, "loss": 0.303, "step": 53068 }, { "epoch": 0.9852481964035749, "grad_norm": 0.5190526247024536, "learning_rate": 1.0721015776581356e-08, "loss": 0.276, "step": 53070 }, { "epoch": 0.9852853265409935, "grad_norm": 0.5082761645317078, "learning_rate": 1.0667083236539644e-08, "loss": 0.2337, "step": 53072 }, { "epoch": 0.9853224566784121, "grad_norm": 0.30295056104660034, "learning_rate": 1.0613286621398156e-08, "loss": 0.3476, "step": 53074 }, { "epoch": 0.9853595868158308, "grad_norm": 0.5708178281784058, "learning_rate": 1.0559625931887419e-08, "loss": 0.2694, "step": 53076 }, { "epoch": 0.9853967169532495, "grad_norm": 0.4554416239261627, "learning_rate": 1.0506101168737959e-08, "loss": 0.2682, "step": 53078 }, { "epoch": 0.9854338470906681, "grad_norm": 0.3567994236946106, "learning_rate": 1.0452712332678083e-08, "loss": 0.2271, "step": 53080 }, { "epoch": 0.9854709772280867, "grad_norm": 0.32993486523628235, "learning_rate": 1.0399459424436098e-08, "loss": 0.3898, "step": 53082 }, { "epoch": 0.9855081073655053, "grad_norm": 0.49736452102661133, "learning_rate": 1.0346342444733648e-08, "loss": 0.2953, "step": 53084 }, { "epoch": 0.985545237502924, "grad_norm": 0.24259555339813232, "learning_rate": 1.0293361394295709e-08, "loss": 0.2253, "step": 53086 }, { "epoch": 0.9855823676403427, "grad_norm": 0.4085068106651306, "learning_rate": 1.0240516273842815e-08, "loss": 0.1324, "step": 53088 }, { "epoch": 0.9856194977777613, "grad_norm": 0.4238944351673126, "learning_rate": 1.0187807084093282e-08, "loss": 0.319, "step": 53090 }, { "epoch": 0.9856566279151799, "grad_norm": 0.31107038259506226, "learning_rate": 1.0135233825765423e-08, "loss": 0.4662, "step": 53092 }, { "epoch": 0.9856937580525985, "grad_norm": 0.36112818121910095, "learning_rate": 1.0082796499573112e-08, "loss": 0.3264, "step": 53094 }, { "epoch": 0.9857308881900172, "grad_norm": 0.3701775372028351, "learning_rate": 1.003049510623022e-08, "loss": 0.1673, "step": 53096 }, { "epoch": 0.9857680183274359, "grad_norm": 0.4876464307308197, "learning_rate": 9.978329646449514e-09, "loss": 0.1603, "step": 53098 }, { "epoch": 0.9858051484648545, "grad_norm": 0.47264254093170166, "learning_rate": 9.926300120940423e-09, "loss": 0.1892, "step": 53100 }, { "epoch": 0.9858422786022731, "grad_norm": 0.4265842139720917, "learning_rate": 9.874406530411274e-09, "loss": 0.2829, "step": 53102 }, { "epoch": 0.9858794087396917, "grad_norm": 0.40275701880455017, "learning_rate": 9.822648875565944e-09, "loss": 0.1599, "step": 53104 }, { "epoch": 0.9859165388771104, "grad_norm": 0.4250245690345764, "learning_rate": 9.771027157111645e-09, "loss": 0.2779, "step": 53106 }, { "epoch": 0.9859536690145291, "grad_norm": 0.559378445148468, "learning_rate": 9.71954137574893e-09, "loss": 0.3623, "step": 53108 }, { "epoch": 0.9859907991519477, "grad_norm": 0.4601593017578125, "learning_rate": 9.66819153217835e-09, "loss": 0.3438, "step": 53110 }, { "epoch": 0.9860279292893663, "grad_norm": 0.4573003947734833, "learning_rate": 9.616977627099344e-09, "loss": 0.2259, "step": 53112 }, { "epoch": 0.9860650594267849, "grad_norm": 0.4358961582183838, "learning_rate": 9.565899661209132e-09, "loss": 0.3607, "step": 53114 }, { "epoch": 0.9861021895642036, "grad_norm": 0.39182502031326294, "learning_rate": 9.514957635202715e-09, "loss": 0.2713, "step": 53116 }, { "epoch": 0.9861393197016222, "grad_norm": 0.2643243372440338, "learning_rate": 9.46415154977176e-09, "loss": 0.1512, "step": 53118 }, { "epoch": 0.9861764498390408, "grad_norm": 0.48572197556495667, "learning_rate": 9.413481405609048e-09, "loss": 0.2686, "step": 53120 }, { "epoch": 0.9862135799764595, "grad_norm": 0.3542144000530243, "learning_rate": 9.362947203402917e-09, "loss": 0.2562, "step": 53122 }, { "epoch": 0.9862507101138781, "grad_norm": 0.26186344027519226, "learning_rate": 9.312548943842815e-09, "loss": 0.2799, "step": 53124 }, { "epoch": 0.9862878402512968, "grad_norm": 0.32664772868156433, "learning_rate": 9.262286627612638e-09, "loss": 0.2103, "step": 53126 }, { "epoch": 0.9863249703887154, "grad_norm": 0.41710367798805237, "learning_rate": 9.212160255397396e-09, "loss": 0.2349, "step": 53128 }, { "epoch": 0.986362100526134, "grad_norm": 0.27914559841156006, "learning_rate": 9.162169827877653e-09, "loss": 0.1334, "step": 53130 }, { "epoch": 0.9863992306635527, "grad_norm": 0.27275288105010986, "learning_rate": 9.1123153457362e-09, "loss": 0.1456, "step": 53132 }, { "epoch": 0.9864363608009713, "grad_norm": 0.26110753417015076, "learning_rate": 9.06259680964916e-09, "loss": 0.2433, "step": 53134 }, { "epoch": 0.98647349093839, "grad_norm": 0.3933124840259552, "learning_rate": 9.01301422029377e-09, "loss": 0.2525, "step": 53136 }, { "epoch": 0.9865106210758086, "grad_norm": 0.5826109647750854, "learning_rate": 8.963567578345045e-09, "loss": 0.3903, "step": 53138 }, { "epoch": 0.9865477512132272, "grad_norm": 0.44276341795921326, "learning_rate": 8.91425688447578e-09, "loss": 0.4659, "step": 53140 }, { "epoch": 0.9865848813506459, "grad_norm": 0.31747257709503174, "learning_rate": 8.86508213935655e-09, "loss": 0.2264, "step": 53142 }, { "epoch": 0.9866220114880645, "grad_norm": 0.5133402943611145, "learning_rate": 8.81604334365682e-09, "loss": 0.4238, "step": 53144 }, { "epoch": 0.9866591416254832, "grad_norm": 0.23044633865356445, "learning_rate": 8.767140498042725e-09, "loss": 0.1266, "step": 53146 }, { "epoch": 0.9866962717629018, "grad_norm": 0.3196012079715729, "learning_rate": 8.718373603181508e-09, "loss": 0.3068, "step": 53148 }, { "epoch": 0.9867334019003204, "grad_norm": 0.22003494203090668, "learning_rate": 8.66974265973597e-09, "loss": 0.154, "step": 53150 }, { "epoch": 0.9867705320377391, "grad_norm": 0.17721134424209595, "learning_rate": 8.621247668367806e-09, "loss": 0.2136, "step": 53152 }, { "epoch": 0.9868076621751577, "grad_norm": 0.5871778726577759, "learning_rate": 8.572888629736487e-09, "loss": 0.3495, "step": 53154 }, { "epoch": 0.9868447923125764, "grad_norm": 0.39905649423599243, "learning_rate": 8.524665544500378e-09, "loss": 0.3881, "step": 53156 }, { "epoch": 0.986881922449995, "grad_norm": 0.4124574363231659, "learning_rate": 8.476578413315618e-09, "loss": 0.2409, "step": 53158 }, { "epoch": 0.9869190525874136, "grad_norm": 0.4230913817882538, "learning_rate": 8.428627236836129e-09, "loss": 0.4552, "step": 53160 }, { "epoch": 0.9869561827248322, "grad_norm": 0.3227681815624237, "learning_rate": 8.380812015714723e-09, "loss": 0.2197, "step": 53162 }, { "epoch": 0.9869933128622509, "grad_norm": 0.32631757855415344, "learning_rate": 8.333132750603102e-09, "loss": 0.3257, "step": 53164 }, { "epoch": 0.9870304429996696, "grad_norm": 0.35036492347717285, "learning_rate": 8.285589442148524e-09, "loss": 0.1323, "step": 53166 }, { "epoch": 0.9870675731370882, "grad_norm": 0.5013242363929749, "learning_rate": 8.238182090998248e-09, "loss": 0.3104, "step": 53168 }, { "epoch": 0.9871047032745068, "grad_norm": 0.5183802843093872, "learning_rate": 8.190910697798426e-09, "loss": 0.4346, "step": 53170 }, { "epoch": 0.9871418334119254, "grad_norm": 0.4218640923500061, "learning_rate": 8.143775263189657e-09, "loss": 0.3287, "step": 53172 }, { "epoch": 0.9871789635493441, "grad_norm": 0.5029458999633789, "learning_rate": 8.096775787816979e-09, "loss": 0.3065, "step": 53174 }, { "epoch": 0.9872160936867628, "grad_norm": 0.2588317096233368, "learning_rate": 8.04991227231655e-09, "loss": 0.3033, "step": 53176 }, { "epoch": 0.9872532238241813, "grad_norm": 0.4231448173522949, "learning_rate": 8.00318471732786e-09, "loss": 0.2711, "step": 53178 }, { "epoch": 0.9872903539616, "grad_norm": 0.28477081656455994, "learning_rate": 7.956593123487066e-09, "loss": 0.1377, "step": 53180 }, { "epoch": 0.9873274840990186, "grad_norm": 0.40675631165504456, "learning_rate": 7.910137491425884e-09, "loss": 0.1686, "step": 53182 }, { "epoch": 0.9873646142364373, "grad_norm": 0.6056632399559021, "learning_rate": 7.863817821779363e-09, "loss": 0.2871, "step": 53184 }, { "epoch": 0.987401744373856, "grad_norm": 0.3164503872394562, "learning_rate": 7.817634115175887e-09, "loss": 0.2802, "step": 53186 }, { "epoch": 0.9874388745112745, "grad_norm": 0.41557440161705017, "learning_rate": 7.771586372243844e-09, "loss": 0.2096, "step": 53188 }, { "epoch": 0.9874760046486932, "grad_norm": 0.7443267107009888, "learning_rate": 7.725674593611621e-09, "loss": 0.2484, "step": 53190 }, { "epoch": 0.9875131347861118, "grad_norm": 0.22927403450012207, "learning_rate": 7.679898779900941e-09, "loss": 0.3217, "step": 53192 }, { "epoch": 0.9875502649235305, "grad_norm": 0.5478350520133972, "learning_rate": 7.63425893173797e-09, "loss": 0.345, "step": 53194 }, { "epoch": 0.9875873950609492, "grad_norm": 0.3027779757976532, "learning_rate": 7.588755049741104e-09, "loss": 0.0598, "step": 53196 }, { "epoch": 0.9876245251983677, "grad_norm": 0.23514460027217865, "learning_rate": 7.543387134530955e-09, "loss": 0.2057, "step": 53198 }, { "epoch": 0.9876616553357864, "grad_norm": 0.31874892115592957, "learning_rate": 7.498155186723698e-09, "loss": 0.2605, "step": 53200 }, { "epoch": 0.987698785473205, "grad_norm": 0.2721218466758728, "learning_rate": 7.453059206936619e-09, "loss": 0.1028, "step": 53202 }, { "epoch": 0.9877359156106237, "grad_norm": 0.3757397532463074, "learning_rate": 7.408099195781449e-09, "loss": 0.4145, "step": 53204 }, { "epoch": 0.9877730457480424, "grad_norm": 0.4937693774700165, "learning_rate": 7.36327515387103e-09, "loss": 0.3242, "step": 53206 }, { "epoch": 0.9878101758854609, "grad_norm": 0.3928329348564148, "learning_rate": 7.318587081815987e-09, "loss": 0.4254, "step": 53208 }, { "epoch": 0.9878473060228796, "grad_norm": 0.46920663118362427, "learning_rate": 7.274034980222499e-09, "loss": 0.2867, "step": 53210 }, { "epoch": 0.9878844361602982, "grad_norm": 0.43465757369995117, "learning_rate": 7.229618849696752e-09, "loss": 0.3264, "step": 53212 }, { "epoch": 0.9879215662977169, "grad_norm": 0.3177186846733093, "learning_rate": 7.185338690846033e-09, "loss": 0.304, "step": 53214 }, { "epoch": 0.9879586964351355, "grad_norm": 0.4860471487045288, "learning_rate": 7.141194504269866e-09, "loss": 0.2128, "step": 53216 }, { "epoch": 0.9879958265725541, "grad_norm": 0.36399605870246887, "learning_rate": 7.097186290569991e-09, "loss": 0.1092, "step": 53218 }, { "epoch": 0.9880329567099728, "grad_norm": 0.42836377024650574, "learning_rate": 7.053314050344817e-09, "loss": 0.2586, "step": 53220 }, { "epoch": 0.9880700868473914, "grad_norm": 0.47378724813461304, "learning_rate": 7.009577784192756e-09, "loss": 0.3456, "step": 53222 }, { "epoch": 0.9881072169848101, "grad_norm": 0.46237489581108093, "learning_rate": 6.965977492706666e-09, "loss": 0.2552, "step": 53224 }, { "epoch": 0.9881443471222287, "grad_norm": 0.28131136298179626, "learning_rate": 6.922513176481627e-09, "loss": 0.1465, "step": 53226 }, { "epoch": 0.9881814772596473, "grad_norm": 0.30757683515548706, "learning_rate": 6.879184836109387e-09, "loss": 0.4704, "step": 53228 }, { "epoch": 0.988218607397066, "grad_norm": 0.4576558768749237, "learning_rate": 6.835992472177255e-09, "loss": 0.3092, "step": 53230 }, { "epoch": 0.9882557375344846, "grad_norm": 0.32685327529907227, "learning_rate": 6.792936085274759e-09, "loss": 0.2592, "step": 53232 }, { "epoch": 0.9882928676719033, "grad_norm": 0.3320324420928955, "learning_rate": 6.7500156759869874e-09, "loss": 0.2922, "step": 53234 }, { "epoch": 0.9883299978093218, "grad_norm": 0.3592033088207245, "learning_rate": 6.707231244897916e-09, "loss": 0.3451, "step": 53236 }, { "epoch": 0.9883671279467405, "grad_norm": 0.37102431058883667, "learning_rate": 6.664582792591523e-09, "loss": 0.1934, "step": 53238 }, { "epoch": 0.9884042580841592, "grad_norm": 0.4393404722213745, "learning_rate": 6.622070319645124e-09, "loss": 0.2311, "step": 53240 }, { "epoch": 0.9884413882215778, "grad_norm": 0.4849635660648346, "learning_rate": 6.5796938266393665e-09, "loss": 0.3056, "step": 53242 }, { "epoch": 0.9884785183589965, "grad_norm": 0.4947705864906311, "learning_rate": 6.5374533141504545e-09, "loss": 0.4123, "step": 53244 }, { "epoch": 0.988515648496415, "grad_norm": 0.4610726535320282, "learning_rate": 6.495348782752375e-09, "loss": 0.2363, "step": 53246 }, { "epoch": 0.9885527786338337, "grad_norm": 0.5574173927307129, "learning_rate": 6.453380233019113e-09, "loss": 0.3357, "step": 53248 }, { "epoch": 0.9885899087712524, "grad_norm": 0.4010533094406128, "learning_rate": 6.411547665520212e-09, "loss": 0.1971, "step": 53250 }, { "epoch": 0.988627038908671, "grad_norm": 0.4894147217273712, "learning_rate": 6.369851080827438e-09, "loss": 0.253, "step": 53252 }, { "epoch": 0.9886641690460897, "grad_norm": 0.5542286038398743, "learning_rate": 6.328290479505894e-09, "loss": 0.1554, "step": 53254 }, { "epoch": 0.9887012991835082, "grad_norm": 0.365032821893692, "learning_rate": 6.286865862120684e-09, "loss": 0.3762, "step": 53256 }, { "epoch": 0.9887384293209269, "grad_norm": 0.16837038099765778, "learning_rate": 6.24557722923802e-09, "loss": 0.1749, "step": 53258 }, { "epoch": 0.9887755594583456, "grad_norm": 0.4075848162174225, "learning_rate": 6.204424581417456e-09, "loss": 0.3186, "step": 53260 }, { "epoch": 0.9888126895957642, "grad_norm": 0.4162130057811737, "learning_rate": 6.1634079192207654e-09, "loss": 0.2489, "step": 53262 }, { "epoch": 0.9888498197331829, "grad_norm": 0.37175485491752625, "learning_rate": 6.122527243204168e-09, "loss": 0.2835, "step": 53264 }, { "epoch": 0.9888869498706014, "grad_norm": 0.5033999681472778, "learning_rate": 6.081782553926107e-09, "loss": 0.4711, "step": 53266 }, { "epoch": 0.9889240800080201, "grad_norm": 0.3311261534690857, "learning_rate": 6.041173851938365e-09, "loss": 0.1535, "step": 53268 }, { "epoch": 0.9889612101454387, "grad_norm": 0.3732919991016388, "learning_rate": 6.0007011377949394e-09, "loss": 0.2485, "step": 53270 }, { "epoch": 0.9889983402828574, "grad_norm": 0.33835986256599426, "learning_rate": 5.960364412046505e-09, "loss": 0.3863, "step": 53272 }, { "epoch": 0.9890354704202761, "grad_norm": 0.19367873668670654, "learning_rate": 5.9201636752426186e-09, "loss": 0.1656, "step": 53274 }, { "epoch": 0.9890726005576946, "grad_norm": 0.34048378467559814, "learning_rate": 5.8800989279295115e-09, "loss": 0.3059, "step": 53276 }, { "epoch": 0.9891097306951133, "grad_norm": 0.3036670982837677, "learning_rate": 5.840170170652304e-09, "loss": 0.4157, "step": 53278 }, { "epoch": 0.9891468608325319, "grad_norm": 0.4369852840900421, "learning_rate": 5.800377403953894e-09, "loss": 0.3206, "step": 53280 }, { "epoch": 0.9891839909699506, "grad_norm": 0.4867478013038635, "learning_rate": 5.760720628377181e-09, "loss": 0.27, "step": 53282 }, { "epoch": 0.9892211211073693, "grad_norm": 0.8104970455169678, "learning_rate": 5.721199844459513e-09, "loss": 0.2174, "step": 53284 }, { "epoch": 0.9892582512447878, "grad_norm": 0.3569347560405731, "learning_rate": 5.681815052740458e-09, "loss": 0.1651, "step": 53286 }, { "epoch": 0.9892953813822065, "grad_norm": 0.3604087829589844, "learning_rate": 5.642566253756254e-09, "loss": 0.2577, "step": 53288 }, { "epoch": 0.9893325115196251, "grad_norm": 0.4281077980995178, "learning_rate": 5.603453448039808e-09, "loss": 0.4361, "step": 53290 }, { "epoch": 0.9893696416570438, "grad_norm": 0.6063163876533508, "learning_rate": 5.564476636122918e-09, "loss": 0.389, "step": 53292 }, { "epoch": 0.9894067717944625, "grad_norm": 0.28695377707481384, "learning_rate": 5.525635818537378e-09, "loss": 0.3937, "step": 53294 }, { "epoch": 0.989443901931881, "grad_norm": 0.3462778627872467, "learning_rate": 5.486930995810546e-09, "loss": 0.1233, "step": 53296 }, { "epoch": 0.9894810320692997, "grad_norm": 0.40961721539497375, "learning_rate": 5.448362168469778e-09, "loss": 0.1678, "step": 53298 }, { "epoch": 0.9895181622067183, "grad_norm": 0.4321346580982208, "learning_rate": 5.40992933704021e-09, "loss": 0.2486, "step": 53300 }, { "epoch": 0.989555292344137, "grad_norm": 0.3577726483345032, "learning_rate": 5.371632502043644e-09, "loss": 0.1735, "step": 53302 }, { "epoch": 0.9895924224815557, "grad_norm": 0.6342619061470032, "learning_rate": 5.333471664001888e-09, "loss": 0.1127, "step": 53304 }, { "epoch": 0.9896295526189742, "grad_norm": 0.2502640187740326, "learning_rate": 5.2954468234345245e-09, "loss": 0.2863, "step": 53306 }, { "epoch": 0.9896666827563929, "grad_norm": 0.7644517421722412, "learning_rate": 5.2575579808578085e-09, "loss": 0.4087, "step": 53308 }, { "epoch": 0.9897038128938115, "grad_norm": 0.39484095573425293, "learning_rate": 5.219805136789102e-09, "loss": 0.2468, "step": 53310 }, { "epoch": 0.9897409430312302, "grad_norm": 0.3180004060268402, "learning_rate": 5.1821882917391095e-09, "loss": 0.282, "step": 53312 }, { "epoch": 0.9897780731686487, "grad_norm": 0.3529704511165619, "learning_rate": 5.144707446222974e-09, "loss": 0.1592, "step": 53314 }, { "epoch": 0.9898152033060674, "grad_norm": 0.5988825559616089, "learning_rate": 5.107362600749177e-09, "loss": 0.2789, "step": 53316 }, { "epoch": 0.9898523334434861, "grad_norm": 0.22207272052764893, "learning_rate": 5.0701537558262014e-09, "loss": 0.1865, "step": 53318 }, { "epoch": 0.9898894635809047, "grad_norm": 0.24557848274707794, "learning_rate": 5.033080911959198e-09, "loss": 0.2769, "step": 53320 }, { "epoch": 0.9899265937183234, "grad_norm": 0.2961386740207672, "learning_rate": 4.996144069654429e-09, "loss": 0.3831, "step": 53322 }, { "epoch": 0.9899637238557419, "grad_norm": 0.30986568331718445, "learning_rate": 4.9593432294137156e-09, "loss": 0.1994, "step": 53324 }, { "epoch": 0.9900008539931606, "grad_norm": 0.5762717723846436, "learning_rate": 4.922678391737767e-09, "loss": 0.1298, "step": 53326 }, { "epoch": 0.9900379841305793, "grad_norm": 0.49220049381256104, "learning_rate": 4.886149557125075e-09, "loss": 0.228, "step": 53328 }, { "epoch": 0.9900751142679979, "grad_norm": 0.3445313572883606, "learning_rate": 4.849756726073018e-09, "loss": 0.1974, "step": 53330 }, { "epoch": 0.9901122444054166, "grad_norm": 0.4992985427379608, "learning_rate": 4.813499899076757e-09, "loss": 0.3602, "step": 53332 }, { "epoch": 0.9901493745428351, "grad_norm": 0.42849379777908325, "learning_rate": 4.77737907663034e-09, "loss": 0.2281, "step": 53334 }, { "epoch": 0.9901865046802538, "grad_norm": 0.39814242720603943, "learning_rate": 4.741394259223375e-09, "loss": 0.2372, "step": 53336 }, { "epoch": 0.9902236348176725, "grad_norm": 0.2453489601612091, "learning_rate": 4.7055454473476925e-09, "loss": 0.1676, "step": 53338 }, { "epoch": 0.9902607649550911, "grad_norm": 0.41594839096069336, "learning_rate": 4.669832641490679e-09, "loss": 0.1983, "step": 53340 }, { "epoch": 0.9902978950925098, "grad_norm": 0.7274026274681091, "learning_rate": 4.634255842136393e-09, "loss": 0.0838, "step": 53342 }, { "epoch": 0.9903350252299283, "grad_norm": 0.290635883808136, "learning_rate": 4.59881504977111e-09, "loss": 0.1224, "step": 53344 }, { "epoch": 0.990372155367347, "grad_norm": 0.26693812012672424, "learning_rate": 4.563510264876669e-09, "loss": 0.3341, "step": 53346 }, { "epoch": 0.9904092855047657, "grad_norm": 0.3814639747142792, "learning_rate": 4.528341487932686e-09, "loss": 0.2294, "step": 53348 }, { "epoch": 0.9904464156421843, "grad_norm": 0.3989715278148651, "learning_rate": 4.493308719417666e-09, "loss": 0.3172, "step": 53350 }, { "epoch": 0.990483545779603, "grad_norm": 0.4636980891227722, "learning_rate": 4.458411959809006e-09, "loss": 0.2385, "step": 53352 }, { "epoch": 0.9905206759170215, "grad_norm": 0.44374024868011475, "learning_rate": 4.423651209580771e-09, "loss": 0.2856, "step": 53354 }, { "epoch": 0.9905578060544402, "grad_norm": 0.26918163895606995, "learning_rate": 4.389026469207025e-09, "loss": 0.1872, "step": 53356 }, { "epoch": 0.9905949361918589, "grad_norm": 0.480447381734848, "learning_rate": 4.354537739158504e-09, "loss": 0.1514, "step": 53358 }, { "epoch": 0.9906320663292775, "grad_norm": 0.27315622568130493, "learning_rate": 4.320185019903722e-09, "loss": 0.2533, "step": 53360 }, { "epoch": 0.9906691964666962, "grad_norm": 0.3951440453529358, "learning_rate": 4.2859683119111925e-09, "loss": 0.4399, "step": 53362 }, { "epoch": 0.9907063266041147, "grad_norm": 0.37956899404525757, "learning_rate": 4.251887615644989e-09, "loss": 0.2445, "step": 53364 }, { "epoch": 0.9907434567415334, "grad_norm": 0.39813852310180664, "learning_rate": 4.217942931571406e-09, "loss": 0.5996, "step": 53366 }, { "epoch": 0.990780586878952, "grad_norm": 0.46266883611679077, "learning_rate": 4.184134260150075e-09, "loss": 0.114, "step": 53368 }, { "epoch": 0.9908177170163707, "grad_norm": 0.33156463503837585, "learning_rate": 4.150461601841737e-09, "loss": 0.2006, "step": 53370 }, { "epoch": 0.9908548471537894, "grad_norm": 0.3152031898498535, "learning_rate": 4.116924957103807e-09, "loss": 0.2329, "step": 53372 }, { "epoch": 0.9908919772912079, "grad_norm": 0.6567909121513367, "learning_rate": 4.083524326394806e-09, "loss": 0.261, "step": 53374 }, { "epoch": 0.9909291074286266, "grad_norm": 0.3906242847442627, "learning_rate": 4.050259710166593e-09, "loss": 0.2924, "step": 53376 }, { "epoch": 0.9909662375660452, "grad_norm": 0.34235602617263794, "learning_rate": 4.017131108873251e-09, "loss": 0.3029, "step": 53378 }, { "epoch": 0.9910033677034639, "grad_norm": 0.372734934091568, "learning_rate": 3.9841385229655305e-09, "loss": 0.4109, "step": 53380 }, { "epoch": 0.9910404978408826, "grad_norm": 0.5924265384674072, "learning_rate": 3.951281952891961e-09, "loss": 0.3896, "step": 53382 }, { "epoch": 0.9910776279783011, "grad_norm": 0.3129676878452301, "learning_rate": 3.918561399101073e-09, "loss": 0.2586, "step": 53384 }, { "epoch": 0.9911147581157198, "grad_norm": 0.2878083288669586, "learning_rate": 3.885976862034735e-09, "loss": 0.184, "step": 53386 }, { "epoch": 0.9911518882531384, "grad_norm": 0.4027186632156372, "learning_rate": 3.853528342140367e-09, "loss": 0.4386, "step": 53388 }, { "epoch": 0.9911890183905571, "grad_norm": 1.048187494277954, "learning_rate": 3.821215839856507e-09, "loss": 0.471, "step": 53390 }, { "epoch": 0.9912261485279757, "grad_norm": 0.49501165747642517, "learning_rate": 3.7890393556239135e-09, "loss": 0.3035, "step": 53392 }, { "epoch": 0.9912632786653943, "grad_norm": 0.6613175868988037, "learning_rate": 3.756998889880015e-09, "loss": 0.2475, "step": 53394 }, { "epoch": 0.991300408802813, "grad_norm": 0.3849734663963318, "learning_rate": 3.7250944430622383e-09, "loss": 0.496, "step": 53396 }, { "epoch": 0.9913375389402316, "grad_norm": 0.34422069787979126, "learning_rate": 3.6933260156024607e-09, "loss": 0.2689, "step": 53398 }, { "epoch": 0.9913746690776503, "grad_norm": 0.4010905623435974, "learning_rate": 3.66169360793478e-09, "loss": 0.2522, "step": 53400 }, { "epoch": 0.991411799215069, "grad_norm": 0.4136776030063629, "learning_rate": 3.6301972204888515e-09, "loss": 0.1628, "step": 53402 }, { "epoch": 0.9914489293524875, "grad_norm": 0.39263975620269775, "learning_rate": 3.598836853693222e-09, "loss": 0.1642, "step": 53404 }, { "epoch": 0.9914860594899062, "grad_norm": 0.38743874430656433, "learning_rate": 3.5676125079753266e-09, "loss": 0.1524, "step": 53406 }, { "epoch": 0.9915231896273248, "grad_norm": 0.2566862404346466, "learning_rate": 3.5365241837592713e-09, "loss": 0.1722, "step": 53408 }, { "epoch": 0.9915603197647435, "grad_norm": 0.2932698428630829, "learning_rate": 3.505571881468051e-09, "loss": 0.1563, "step": 53410 }, { "epoch": 0.9915974499021621, "grad_norm": 0.44588708877563477, "learning_rate": 3.474755601522439e-09, "loss": 0.3682, "step": 53412 }, { "epoch": 0.9916345800395807, "grad_norm": 0.7323007583618164, "learning_rate": 3.4440753443432118e-09, "loss": 0.2727, "step": 53414 }, { "epoch": 0.9916717101769994, "grad_norm": 0.3405202031135559, "learning_rate": 3.4135311103455916e-09, "loss": 0.383, "step": 53416 }, { "epoch": 0.991708840314418, "grad_norm": 0.41052931547164917, "learning_rate": 3.3831228999481324e-09, "loss": 0.333, "step": 53418 }, { "epoch": 0.9917459704518367, "grad_norm": 0.355103075504303, "learning_rate": 3.3528507135627274e-09, "loss": 0.3513, "step": 53420 }, { "epoch": 0.9917831005892552, "grad_norm": 0.4810498058795929, "learning_rate": 3.3227145516012693e-09, "loss": 0.3655, "step": 53422 }, { "epoch": 0.9918202307266739, "grad_norm": 0.2912665903568268, "learning_rate": 3.2927144144734303e-09, "loss": 0.3988, "step": 53424 }, { "epoch": 0.9918573608640926, "grad_norm": 0.4754859507083893, "learning_rate": 3.2628503025899925e-09, "loss": 0.1577, "step": 53426 }, { "epoch": 0.9918944910015112, "grad_norm": 0.38529467582702637, "learning_rate": 3.233122216353968e-09, "loss": 0.3528, "step": 53428 }, { "epoch": 0.9919316211389299, "grad_norm": 0.4235038459300995, "learning_rate": 3.2035301561716968e-09, "loss": 0.1733, "step": 53430 }, { "epoch": 0.9919687512763484, "grad_norm": 0.38974523544311523, "learning_rate": 3.174074122446191e-09, "loss": 0.2643, "step": 53432 }, { "epoch": 0.9920058814137671, "grad_norm": 0.6087730526924133, "learning_rate": 3.144754115576021e-09, "loss": 0.4278, "step": 53434 }, { "epoch": 0.9920430115511858, "grad_norm": 0.2930710017681122, "learning_rate": 3.1155701359630865e-09, "loss": 0.2443, "step": 53436 }, { "epoch": 0.9920801416886044, "grad_norm": 0.44917362928390503, "learning_rate": 3.0865221840026273e-09, "loss": 0.2315, "step": 53438 }, { "epoch": 0.992117271826023, "grad_norm": 0.43266502022743225, "learning_rate": 3.057610260090993e-09, "loss": 0.2447, "step": 53440 }, { "epoch": 0.9921544019634416, "grad_norm": 0.41224372386932373, "learning_rate": 3.0288343646200926e-09, "loss": 0.2161, "step": 53442 }, { "epoch": 0.9921915321008603, "grad_norm": 0.40588468313217163, "learning_rate": 3.0001944979829444e-09, "loss": 0.2163, "step": 53444 }, { "epoch": 0.992228662238279, "grad_norm": 0.3856228291988373, "learning_rate": 2.9716906605681272e-09, "loss": 0.1889, "step": 53446 }, { "epoch": 0.9922657923756976, "grad_norm": 0.2712112069129944, "learning_rate": 2.9433228527642186e-09, "loss": 0.2092, "step": 53448 }, { "epoch": 0.9923029225131162, "grad_norm": 0.4635825455188751, "learning_rate": 2.915091074957577e-09, "loss": 0.1971, "step": 53450 }, { "epoch": 0.9923400526505348, "grad_norm": 0.31481078267097473, "learning_rate": 2.886995327531228e-09, "loss": 0.1635, "step": 53452 }, { "epoch": 0.9923771827879535, "grad_norm": 0.40318188071250916, "learning_rate": 2.8590356108681994e-09, "loss": 0.2224, "step": 53454 }, { "epoch": 0.9924143129253722, "grad_norm": 0.33646485209465027, "learning_rate": 2.831211925348187e-09, "loss": 0.3131, "step": 53456 }, { "epoch": 0.9924514430627908, "grad_norm": 0.33020278811454773, "learning_rate": 2.803524271350888e-09, "loss": 0.2114, "step": 53458 }, { "epoch": 0.9924885732002094, "grad_norm": 0.31008413434028625, "learning_rate": 2.7759726492526674e-09, "loss": 0.3573, "step": 53460 }, { "epoch": 0.992525703337628, "grad_norm": 0.5426214933395386, "learning_rate": 2.74855705942878e-09, "loss": 0.4391, "step": 53462 }, { "epoch": 0.9925628334750467, "grad_norm": 0.31837981939315796, "learning_rate": 2.7212775022511517e-09, "loss": 0.1023, "step": 53464 }, { "epoch": 0.9925999636124653, "grad_norm": 0.3098362982273102, "learning_rate": 2.6941339780917064e-09, "loss": 0.2495, "step": 53466 }, { "epoch": 0.992637093749884, "grad_norm": 0.4193378686904907, "learning_rate": 2.6671264873201486e-09, "loss": 0.3251, "step": 53468 }, { "epoch": 0.9926742238873026, "grad_norm": 0.5505390167236328, "learning_rate": 2.6402550303028516e-09, "loss": 0.2582, "step": 53470 }, { "epoch": 0.9927113540247212, "grad_norm": 0.5060679316520691, "learning_rate": 2.61351960740619e-09, "loss": 0.3774, "step": 53472 }, { "epoch": 0.9927484841621399, "grad_norm": 0.24632394313812256, "learning_rate": 2.5869202189943156e-09, "loss": 0.1187, "step": 53474 }, { "epoch": 0.9927856142995585, "grad_norm": 0.3460080027580261, "learning_rate": 2.5604568654291616e-09, "loss": 0.3076, "step": 53476 }, { "epoch": 0.9928227444369772, "grad_norm": 0.44628074765205383, "learning_rate": 2.5341295470693306e-09, "loss": 0.1971, "step": 53478 }, { "epoch": 0.9928598745743958, "grad_norm": 0.5723697543144226, "learning_rate": 2.5079382642745344e-09, "loss": 0.1416, "step": 53480 }, { "epoch": 0.9928970047118144, "grad_norm": 0.31815630197525024, "learning_rate": 2.4818830174011544e-09, "loss": 0.2075, "step": 53482 }, { "epoch": 0.9929341348492331, "grad_norm": 0.5790970921516418, "learning_rate": 2.455963806802242e-09, "loss": 0.2423, "step": 53484 }, { "epoch": 0.9929712649866517, "grad_norm": 0.5024975538253784, "learning_rate": 2.430180632833068e-09, "loss": 0.2143, "step": 53486 }, { "epoch": 0.9930083951240704, "grad_norm": 0.39349859952926636, "learning_rate": 2.404533495841133e-09, "loss": 0.1257, "step": 53488 }, { "epoch": 0.993045525261489, "grad_norm": 0.36300456523895264, "learning_rate": 2.3790223961783766e-09, "loss": 0.2291, "step": 53490 }, { "epoch": 0.9930826553989076, "grad_norm": 0.3271787166595459, "learning_rate": 2.3536473341911893e-09, "loss": 0.1167, "step": 53492 }, { "epoch": 0.9931197855363263, "grad_norm": 0.3815661668777466, "learning_rate": 2.32840831022374e-09, "loss": 0.2907, "step": 53494 }, { "epoch": 0.9931569156737449, "grad_norm": 0.26615142822265625, "learning_rate": 2.3033053246201977e-09, "loss": 0.1387, "step": 53496 }, { "epoch": 0.9931940458111636, "grad_norm": 0.6048455834388733, "learning_rate": 2.278338377722511e-09, "loss": 0.3151, "step": 53498 }, { "epoch": 0.9932311759485822, "grad_norm": 0.3908873498439789, "learning_rate": 2.253507469869298e-09, "loss": 0.2528, "step": 53500 }, { "epoch": 0.9932683060860008, "grad_norm": 0.3220839202404022, "learning_rate": 2.228812601399177e-09, "loss": 0.2634, "step": 53502 }, { "epoch": 0.9933054362234195, "grad_norm": 0.39248910546302795, "learning_rate": 2.2042537726485456e-09, "loss": 0.3237, "step": 53504 }, { "epoch": 0.9933425663608381, "grad_norm": 0.372831255197525, "learning_rate": 2.179830983951581e-09, "loss": 0.4463, "step": 53506 }, { "epoch": 0.9933796964982567, "grad_norm": 0.4783819615840912, "learning_rate": 2.1555442356391287e-09, "loss": 0.2326, "step": 53508 }, { "epoch": 0.9934168266356754, "grad_norm": 0.37915462255477905, "learning_rate": 2.1313935280431465e-09, "loss": 0.2429, "step": 53510 }, { "epoch": 0.993453956773094, "grad_norm": 0.26616522669792175, "learning_rate": 2.1073788614922596e-09, "loss": 0.2037, "step": 53512 }, { "epoch": 0.9934910869105127, "grad_norm": 0.3992733657360077, "learning_rate": 2.083500236312874e-09, "loss": 0.2271, "step": 53514 }, { "epoch": 0.9935282170479313, "grad_norm": 0.34670883417129517, "learning_rate": 2.0597576528291752e-09, "loss": 0.247, "step": 53516 }, { "epoch": 0.99356534718535, "grad_norm": 0.2764451801776886, "learning_rate": 2.0361511113653477e-09, "loss": 0.2873, "step": 53518 }, { "epoch": 0.9936024773227685, "grad_norm": 0.45024481415748596, "learning_rate": 2.0126806122411356e-09, "loss": 0.204, "step": 53520 }, { "epoch": 0.9936396074601872, "grad_norm": 0.3657209575176239, "learning_rate": 1.9893461557785042e-09, "loss": 0.2899, "step": 53522 }, { "epoch": 0.9936767375976059, "grad_norm": 0.43768295645713806, "learning_rate": 1.9661477422916465e-09, "loss": 0.3133, "step": 53524 }, { "epoch": 0.9937138677350245, "grad_norm": 0.4510355591773987, "learning_rate": 1.943085372099196e-09, "loss": 0.1475, "step": 53526 }, { "epoch": 0.9937509978724431, "grad_norm": 0.3143789768218994, "learning_rate": 1.9201590455131257e-09, "loss": 0.3382, "step": 53528 }, { "epoch": 0.9937881280098617, "grad_norm": 0.2952684164047241, "learning_rate": 1.8973687628465186e-09, "loss": 0.2522, "step": 53530 }, { "epoch": 0.9938252581472804, "grad_norm": 0.4724416434764862, "learning_rate": 1.874714524408017e-09, "loss": 0.3912, "step": 53532 }, { "epoch": 0.9938623882846991, "grad_norm": 0.309989333152771, "learning_rate": 1.852196330507372e-09, "loss": 0.1172, "step": 53534 }, { "epoch": 0.9938995184221177, "grad_norm": 0.5229268670082092, "learning_rate": 1.8298141814510062e-09, "loss": 0.1765, "step": 53536 }, { "epoch": 0.9939366485595363, "grad_norm": 0.5416547060012817, "learning_rate": 1.8075680775420102e-09, "loss": 0.2724, "step": 53538 }, { "epoch": 0.9939737786969549, "grad_norm": 0.3997677266597748, "learning_rate": 1.7854580190845849e-09, "loss": 0.2793, "step": 53540 }, { "epoch": 0.9940109088343736, "grad_norm": 0.46541351079940796, "learning_rate": 1.7634840063784909e-09, "loss": 0.1859, "step": 53542 }, { "epoch": 0.9940480389717923, "grad_norm": 0.18295122683048248, "learning_rate": 1.741646039723488e-09, "loss": 0.276, "step": 53544 }, { "epoch": 0.9940851691092109, "grad_norm": 0.43300697207450867, "learning_rate": 1.719944119416006e-09, "loss": 0.2477, "step": 53546 }, { "epoch": 0.9941222992466295, "grad_norm": 0.3096964955329895, "learning_rate": 1.6983782457513642e-09, "loss": 0.2512, "step": 53548 }, { "epoch": 0.9941594293840481, "grad_norm": 0.3587380051612854, "learning_rate": 1.676948419024882e-09, "loss": 0.2363, "step": 53550 }, { "epoch": 0.9941965595214668, "grad_norm": 0.4650644063949585, "learning_rate": 1.655654639525217e-09, "loss": 0.388, "step": 53552 }, { "epoch": 0.9942336896588855, "grad_norm": 0.35090088844299316, "learning_rate": 1.6344969075443584e-09, "loss": 0.282, "step": 53554 }, { "epoch": 0.994270819796304, "grad_norm": 0.39210835099220276, "learning_rate": 1.6134752233698537e-09, "loss": 0.3073, "step": 53556 }, { "epoch": 0.9943079499337227, "grad_norm": 0.34158438444137573, "learning_rate": 1.59258958728703e-09, "loss": 0.088, "step": 53558 }, { "epoch": 0.9943450800711413, "grad_norm": 0.4653012454509735, "learning_rate": 1.5718399995801047e-09, "loss": 0.6044, "step": 53560 }, { "epoch": 0.99438221020856, "grad_norm": 0.7530316710472107, "learning_rate": 1.5512264605310745e-09, "loss": 0.2215, "step": 53562 }, { "epoch": 0.9944193403459787, "grad_norm": 0.48231950402259827, "learning_rate": 1.5307489704219358e-09, "loss": 0.232, "step": 53564 }, { "epoch": 0.9944564704833972, "grad_norm": 0.3443574607372284, "learning_rate": 1.5104075295302445e-09, "loss": 0.2529, "step": 53566 }, { "epoch": 0.9944936006208159, "grad_norm": 0.3055831789970398, "learning_rate": 1.4902021381324461e-09, "loss": 0.1434, "step": 53568 }, { "epoch": 0.9945307307582345, "grad_norm": 0.2209073007106781, "learning_rate": 1.4701327965038759e-09, "loss": 0.4109, "step": 53570 }, { "epoch": 0.9945678608956532, "grad_norm": 0.41637393832206726, "learning_rate": 1.4501995049187589e-09, "loss": 0.3015, "step": 53572 }, { "epoch": 0.9946049910330718, "grad_norm": 0.21818257868289948, "learning_rate": 1.4304022636468796e-09, "loss": 0.2275, "step": 53574 }, { "epoch": 0.9946421211704904, "grad_norm": 0.3729708790779114, "learning_rate": 1.4107410729580217e-09, "loss": 0.2756, "step": 53576 }, { "epoch": 0.9946792513079091, "grad_norm": 0.4783799350261688, "learning_rate": 1.3912159331197495e-09, "loss": 0.18, "step": 53578 }, { "epoch": 0.9947163814453277, "grad_norm": 0.4182230234146118, "learning_rate": 1.3718268443974059e-09, "loss": 0.1816, "step": 53580 }, { "epoch": 0.9947535115827464, "grad_norm": 0.3196980357170105, "learning_rate": 1.352573807055224e-09, "loss": 0.2514, "step": 53582 }, { "epoch": 0.994790641720165, "grad_norm": 0.47197315096855164, "learning_rate": 1.333456821355217e-09, "loss": 0.2721, "step": 53584 }, { "epoch": 0.9948277718575836, "grad_norm": 0.4402155578136444, "learning_rate": 1.314475887557176e-09, "loss": 0.2735, "step": 53586 }, { "epoch": 0.9948649019950023, "grad_norm": 0.38109177350997925, "learning_rate": 1.2956310059197841e-09, "loss": 0.2884, "step": 53588 }, { "epoch": 0.9949020321324209, "grad_norm": 0.4029310941696167, "learning_rate": 1.2769221766995022e-09, "loss": 0.2454, "step": 53590 }, { "epoch": 0.9949391622698396, "grad_norm": 0.34634122252464294, "learning_rate": 1.2583494001505714e-09, "loss": 0.1174, "step": 53592 }, { "epoch": 0.9949762924072582, "grad_norm": 0.2543226480484009, "learning_rate": 1.2399126765261227e-09, "loss": 0.3627, "step": 53594 }, { "epoch": 0.9950134225446768, "grad_norm": 0.6224349141120911, "learning_rate": 1.2216120060759562e-09, "loss": 0.2763, "step": 53596 }, { "epoch": 0.9950505526820955, "grad_norm": 0.5005269646644592, "learning_rate": 1.2034473890498722e-09, "loss": 0.287, "step": 53598 }, { "epoch": 0.9950876828195141, "grad_norm": 0.32010751962661743, "learning_rate": 1.18541882569545e-09, "loss": 0.3269, "step": 53600 }, { "epoch": 0.9951248129569328, "grad_norm": 0.45107343792915344, "learning_rate": 1.1675263162580497e-09, "loss": 0.4303, "step": 53602 }, { "epoch": 0.9951619430943514, "grad_norm": 0.329780250787735, "learning_rate": 1.1497698609796993e-09, "loss": 0.2941, "step": 53604 }, { "epoch": 0.99519907323177, "grad_norm": 0.4158528447151184, "learning_rate": 1.1321494601035377e-09, "loss": 0.2347, "step": 53606 }, { "epoch": 0.9952362033691887, "grad_norm": 0.3148943781852722, "learning_rate": 1.114665113868263e-09, "loss": 0.2758, "step": 53608 }, { "epoch": 0.9952733335066073, "grad_norm": 0.3847060203552246, "learning_rate": 1.0973168225125729e-09, "loss": 0.3269, "step": 53610 }, { "epoch": 0.995310463644026, "grad_norm": 0.26362016797065735, "learning_rate": 1.0801045862718351e-09, "loss": 0.2093, "step": 53612 }, { "epoch": 0.9953475937814446, "grad_norm": 0.3356444239616394, "learning_rate": 1.0630284053814165e-09, "loss": 0.2673, "step": 53614 }, { "epoch": 0.9953847239188632, "grad_norm": 0.4901287853717804, "learning_rate": 1.046088280072244e-09, "loss": 0.246, "step": 53616 }, { "epoch": 0.9954218540562818, "grad_norm": 0.3556593656539917, "learning_rate": 1.0292842105752432e-09, "loss": 0.2867, "step": 53618 }, { "epoch": 0.9954589841937005, "grad_norm": 0.33632415533065796, "learning_rate": 1.0126161971202309e-09, "loss": 0.41, "step": 53620 }, { "epoch": 0.9954961143311192, "grad_norm": 0.4291035532951355, "learning_rate": 9.96084239932582e-10, "loss": 0.2029, "step": 53622 }, { "epoch": 0.9955332444685377, "grad_norm": 4.7542901039123535, "learning_rate": 9.796883392376722e-10, "loss": 0.2167, "step": 53624 }, { "epoch": 0.9955703746059564, "grad_norm": 0.4169110655784607, "learning_rate": 9.63428495258656e-10, "loss": 0.3424, "step": 53626 }, { "epoch": 0.995607504743375, "grad_norm": 0.4213502109050751, "learning_rate": 9.473047082164676e-10, "loss": 0.2929, "step": 53628 }, { "epoch": 0.9956446348807937, "grad_norm": 0.3640592396259308, "learning_rate": 9.313169783309317e-10, "loss": 0.2291, "step": 53630 }, { "epoch": 0.9956817650182124, "grad_norm": 0.33782774209976196, "learning_rate": 9.154653058196516e-10, "loss": 0.25, "step": 53632 }, { "epoch": 0.995718895155631, "grad_norm": 0.4786086082458496, "learning_rate": 8.99749690899121e-10, "loss": 0.2543, "step": 53634 }, { "epoch": 0.9957560252930496, "grad_norm": 0.44893699884414673, "learning_rate": 8.841701337813924e-10, "loss": 0.2986, "step": 53636 }, { "epoch": 0.9957931554304682, "grad_norm": 0.3055330514907837, "learning_rate": 8.687266346785184e-10, "loss": 0.2478, "step": 53638 }, { "epoch": 0.9958302855678869, "grad_norm": 0.4336663782596588, "learning_rate": 8.534191938025516e-10, "loss": 0.3425, "step": 53640 }, { "epoch": 0.9958674157053056, "grad_norm": 0.39832034707069397, "learning_rate": 8.382478113599934e-10, "loss": 0.1627, "step": 53642 }, { "epoch": 0.9959045458427241, "grad_norm": 0.4333001673221588, "learning_rate": 8.232124875584558e-10, "loss": 0.2044, "step": 53644 }, { "epoch": 0.9959416759801428, "grad_norm": 0.3518376052379608, "learning_rate": 8.083132226022195e-10, "loss": 0.2138, "step": 53646 }, { "epoch": 0.9959788061175614, "grad_norm": 0.31043896079063416, "learning_rate": 7.935500166933452e-10, "loss": 0.4797, "step": 53648 }, { "epoch": 0.9960159362549801, "grad_norm": 0.4227985739707947, "learning_rate": 7.789228700327833e-10, "loss": 0.4345, "step": 53650 }, { "epoch": 0.9960530663923988, "grad_norm": 0.42044922709465027, "learning_rate": 7.64431782820374e-10, "loss": 0.4083, "step": 53652 }, { "epoch": 0.9960901965298173, "grad_norm": 0.2929927110671997, "learning_rate": 7.500767552526267e-10, "loss": 0.2392, "step": 53654 }, { "epoch": 0.996127326667236, "grad_norm": 0.22848284244537354, "learning_rate": 7.358577875249406e-10, "loss": 0.2927, "step": 53656 }, { "epoch": 0.9961644568046546, "grad_norm": 0.42669424414634705, "learning_rate": 7.217748798316049e-10, "loss": 0.1534, "step": 53658 }, { "epoch": 0.9962015869420733, "grad_norm": 0.2893311381340027, "learning_rate": 7.078280323635778e-10, "loss": 0.2772, "step": 53660 }, { "epoch": 0.996238717079492, "grad_norm": 0.45024368166923523, "learning_rate": 6.940172453095972e-10, "loss": 0.3999, "step": 53662 }, { "epoch": 0.9962758472169105, "grad_norm": 0.4148610830307007, "learning_rate": 6.803425188595114e-10, "loss": 0.1435, "step": 53664 }, { "epoch": 0.9963129773543292, "grad_norm": 0.5388889312744141, "learning_rate": 6.668038531987275e-10, "loss": 0.2778, "step": 53666 }, { "epoch": 0.9963501074917478, "grad_norm": 0.48851341009140015, "learning_rate": 6.534012485115426e-10, "loss": 0.2584, "step": 53668 }, { "epoch": 0.9963872376291665, "grad_norm": 0.4291050434112549, "learning_rate": 6.40134704978923e-10, "loss": 0.3036, "step": 53670 }, { "epoch": 0.996424367766585, "grad_norm": 0.341378778219223, "learning_rate": 6.270042227829453e-10, "loss": 0.3362, "step": 53672 }, { "epoch": 0.9964614979040037, "grad_norm": 0.27826619148254395, "learning_rate": 6.140098021023555e-10, "loss": 0.172, "step": 53674 }, { "epoch": 0.9964986280414224, "grad_norm": 0.4631991982460022, "learning_rate": 6.011514431136789e-10, "loss": 0.2323, "step": 53676 }, { "epoch": 0.996535758178841, "grad_norm": 0.33916786313056946, "learning_rate": 5.884291459912206e-10, "loss": 0.2308, "step": 53678 }, { "epoch": 0.9965728883162597, "grad_norm": 0.32062771916389465, "learning_rate": 5.758429109081753e-10, "loss": 0.2907, "step": 53680 }, { "epoch": 0.9966100184536782, "grad_norm": 0.24314405024051666, "learning_rate": 5.633927380366277e-10, "loss": 0.1704, "step": 53682 }, { "epoch": 0.9966471485910969, "grad_norm": 0.3417761027812958, "learning_rate": 5.510786275453317e-10, "loss": 0.1601, "step": 53684 }, { "epoch": 0.9966842787285156, "grad_norm": 0.33845800161361694, "learning_rate": 5.389005796019309e-10, "loss": 0.2226, "step": 53686 }, { "epoch": 0.9967214088659342, "grad_norm": 0.5561087727546692, "learning_rate": 5.268585943718485e-10, "loss": 0.5634, "step": 53688 }, { "epoch": 0.9967585390033529, "grad_norm": 0.5833976864814758, "learning_rate": 5.149526720205078e-10, "loss": 0.2883, "step": 53690 }, { "epoch": 0.9967956691407714, "grad_norm": 0.4740462005138397, "learning_rate": 5.031828127077809e-10, "loss": 0.2769, "step": 53692 }, { "epoch": 0.9968327992781901, "grad_norm": 0.5102601051330566, "learning_rate": 4.915490165946501e-10, "loss": 0.3672, "step": 53694 }, { "epoch": 0.9968699294156088, "grad_norm": 0.28935521841049194, "learning_rate": 4.800512838398774e-10, "loss": 0.3387, "step": 53696 }, { "epoch": 0.9969070595530274, "grad_norm": 0.3723895847797394, "learning_rate": 4.686896146000042e-10, "loss": 0.1884, "step": 53698 }, { "epoch": 0.9969441896904461, "grad_norm": 0.4542303681373596, "learning_rate": 4.5746400902824116e-10, "loss": 0.2482, "step": 53700 }, { "epoch": 0.9969813198278646, "grad_norm": 0.2907441556453705, "learning_rate": 4.4637446727890944e-10, "loss": 0.1688, "step": 53702 }, { "epoch": 0.9970184499652833, "grad_norm": 0.4606148898601532, "learning_rate": 4.354209895029993e-10, "loss": 0.1532, "step": 53704 }, { "epoch": 0.997055580102702, "grad_norm": 0.3624784052371979, "learning_rate": 4.2460357584817034e-10, "loss": 0.1822, "step": 53706 }, { "epoch": 0.9970927102401206, "grad_norm": 0.5269354581832886, "learning_rate": 4.1392222646208236e-10, "loss": 0.3226, "step": 53708 }, { "epoch": 0.9971298403775393, "grad_norm": 0.3577289581298828, "learning_rate": 4.033769414901745e-10, "loss": 0.2755, "step": 53710 }, { "epoch": 0.9971669705149578, "grad_norm": 0.37310826778411865, "learning_rate": 3.929677210767757e-10, "loss": 0.458, "step": 53712 }, { "epoch": 0.9972041006523765, "grad_norm": 0.9955070614814758, "learning_rate": 3.826945653617742e-10, "loss": 0.3687, "step": 53714 }, { "epoch": 0.9972412307897952, "grad_norm": 0.4011007845401764, "learning_rate": 3.725574744861682e-10, "loss": 0.2351, "step": 53716 }, { "epoch": 0.9972783609272138, "grad_norm": 0.3228796124458313, "learning_rate": 3.625564485887356e-10, "loss": 0.3916, "step": 53718 }, { "epoch": 0.9973154910646325, "grad_norm": 0.465603768825531, "learning_rate": 3.5269148780381347e-10, "loss": 0.2046, "step": 53720 }, { "epoch": 0.997352621202051, "grad_norm": 0.4606330692768097, "learning_rate": 3.429625922668489e-10, "loss": 0.2519, "step": 53722 }, { "epoch": 0.9973897513394697, "grad_norm": 0.2891654968261719, "learning_rate": 3.333697621088483e-10, "loss": 0.2871, "step": 53724 }, { "epoch": 0.9974268814768883, "grad_norm": 0.3046843409538269, "learning_rate": 3.2391299746192817e-10, "loss": 0.2035, "step": 53726 }, { "epoch": 0.997464011614307, "grad_norm": 0.43812593817710876, "learning_rate": 3.1459229845376416e-10, "loss": 0.4661, "step": 53728 }, { "epoch": 0.9975011417517257, "grad_norm": 0.4280153810977936, "learning_rate": 3.0540766521092167e-10, "loss": 0.3746, "step": 53730 }, { "epoch": 0.9975382718891442, "grad_norm": 0.24807289242744446, "learning_rate": 2.963590978599662e-10, "loss": 0.1999, "step": 53732 }, { "epoch": 0.9975754020265629, "grad_norm": 0.3842063546180725, "learning_rate": 2.87446596521912e-10, "loss": 0.3784, "step": 53734 }, { "epoch": 0.9976125321639815, "grad_norm": 0.5945963263511658, "learning_rate": 2.786701613199938e-10, "loss": 0.3559, "step": 53736 }, { "epoch": 0.9976496623014002, "grad_norm": 0.32852858304977417, "learning_rate": 2.700297923718953e-10, "loss": 0.2284, "step": 53738 }, { "epoch": 0.9976867924388189, "grad_norm": 0.5375715494155884, "learning_rate": 2.6152548979752055e-10, "loss": 0.4906, "step": 53740 }, { "epoch": 0.9977239225762374, "grad_norm": 0.34251654148101807, "learning_rate": 2.5315725371011234e-10, "loss": 0.1663, "step": 53742 }, { "epoch": 0.9977610527136561, "grad_norm": 0.41482555866241455, "learning_rate": 2.449250842240236e-10, "loss": 0.3223, "step": 53744 }, { "epoch": 0.9977981828510747, "grad_norm": 0.30851998925209045, "learning_rate": 2.368289814524971e-10, "loss": 0.3236, "step": 53746 }, { "epoch": 0.9978353129884934, "grad_norm": 0.42594292759895325, "learning_rate": 2.288689455043347e-10, "loss": 0.3947, "step": 53748 }, { "epoch": 0.9978724431259121, "grad_norm": 0.6044430732727051, "learning_rate": 2.2104497648944845e-10, "loss": 0.3306, "step": 53750 }, { "epoch": 0.9979095732633306, "grad_norm": 0.719402015209198, "learning_rate": 2.1335707451219934e-10, "loss": 0.3729, "step": 53752 }, { "epoch": 0.9979467034007493, "grad_norm": 0.48660847544670105, "learning_rate": 2.0580523967916877e-10, "loss": 0.2041, "step": 53754 }, { "epoch": 0.9979838335381679, "grad_norm": 0.40611547231674194, "learning_rate": 1.9838947209249725e-10, "loss": 0.4153, "step": 53756 }, { "epoch": 0.9980209636755866, "grad_norm": 0.46043530106544495, "learning_rate": 1.9110977185210489e-10, "loss": 0.3609, "step": 53758 }, { "epoch": 0.9980580938130053, "grad_norm": 0.41980114579200745, "learning_rate": 1.8396613905791173e-10, "loss": 0.3069, "step": 53760 }, { "epoch": 0.9980952239504238, "grad_norm": 0.32720959186553955, "learning_rate": 1.7695857380761738e-10, "loss": 0.1981, "step": 53762 }, { "epoch": 0.9981323540878425, "grad_norm": 0.3697236180305481, "learning_rate": 1.7008707619559083e-10, "loss": 0.2896, "step": 53764 }, { "epoch": 0.9981694842252611, "grad_norm": 0.6462329626083374, "learning_rate": 1.6335164631620105e-10, "loss": 0.2578, "step": 53766 }, { "epoch": 0.9982066143626798, "grad_norm": 0.4427346885204315, "learning_rate": 1.5675228425937605e-10, "loss": 0.2741, "step": 53768 }, { "epoch": 0.9982437445000983, "grad_norm": 0.2270592898130417, "learning_rate": 1.5028899011726438e-10, "loss": 0.289, "step": 53770 }, { "epoch": 0.998280874637517, "grad_norm": 0.28622016310691833, "learning_rate": 1.4396176397646345e-10, "loss": 0.1953, "step": 53772 }, { "epoch": 0.9983180047749357, "grad_norm": 0.32890117168426514, "learning_rate": 1.377706059235706e-10, "loss": 0.2047, "step": 53774 }, { "epoch": 0.9983551349123543, "grad_norm": 0.4771745204925537, "learning_rate": 1.3171551604185263e-10, "loss": 0.2669, "step": 53776 }, { "epoch": 0.998392265049773, "grad_norm": 0.4160178601741791, "learning_rate": 1.257964944145762e-10, "loss": 0.2299, "step": 53778 }, { "epoch": 0.9984293951871915, "grad_norm": 0.27709200978279114, "learning_rate": 1.2001354112278762e-10, "loss": 0.1718, "step": 53780 }, { "epoch": 0.9984665253246102, "grad_norm": 0.5008622407913208, "learning_rate": 1.143666562442025e-10, "loss": 0.499, "step": 53782 }, { "epoch": 0.9985036554620289, "grad_norm": 0.4462430775165558, "learning_rate": 1.0885583985542625e-10, "loss": 0.4113, "step": 53784 }, { "epoch": 0.9985407855994475, "grad_norm": 0.5909548997879028, "learning_rate": 1.03481092031954e-10, "loss": 0.1594, "step": 53786 }, { "epoch": 0.9985779157368662, "grad_norm": 0.21251781284809113, "learning_rate": 9.82424128481707e-11, "loss": 0.2595, "step": 53788 }, { "epoch": 0.9986150458742847, "grad_norm": 0.5942063331604004, "learning_rate": 9.31398023729102e-11, "loss": 0.1757, "step": 53790 }, { "epoch": 0.9986521760117034, "grad_norm": 0.3929668068885803, "learning_rate": 8.817326067722675e-11, "loss": 0.151, "step": 53792 }, { "epoch": 0.9986893061491221, "grad_norm": 0.23832613229751587, "learning_rate": 8.334278782773376e-11, "loss": 0.1724, "step": 53794 }, { "epoch": 0.9987264362865407, "grad_norm": 0.4586021900177002, "learning_rate": 7.86483838921548e-11, "loss": 0.3743, "step": 53796 }, { "epoch": 0.9987635664239594, "grad_norm": 0.4723970592021942, "learning_rate": 7.409004893155214e-11, "loss": 0.1855, "step": 53798 }, { "epoch": 0.9988006965613779, "grad_norm": 0.3699887692928314, "learning_rate": 6.966778301031874e-11, "loss": 0.2648, "step": 53800 }, { "epoch": 0.9988378266987966, "grad_norm": 0.518562912940979, "learning_rate": 6.53815861872964e-11, "loss": 0.0801, "step": 53802 }, { "epoch": 0.9988749568362153, "grad_norm": 0.5363730192184448, "learning_rate": 6.123145852021672e-11, "loss": 0.4228, "step": 53804 }, { "epoch": 0.9989120869736339, "grad_norm": 0.5990108251571655, "learning_rate": 5.7217400067921534e-11, "loss": 0.2689, "step": 53806 }, { "epoch": 0.9989492171110526, "grad_norm": 0.5347096920013428, "learning_rate": 5.333941088259131e-11, "loss": 0.2134, "step": 53808 }, { "epoch": 0.9989863472484711, "grad_norm": 0.2717122435569763, "learning_rate": 4.9597491018626984e-11, "loss": 0.158, "step": 53810 }, { "epoch": 0.9990234773858898, "grad_norm": 0.43843913078308105, "learning_rate": 4.599164052598859e-11, "loss": 0.3426, "step": 53812 }, { "epoch": 0.9990606075233085, "grad_norm": 0.49701523780822754, "learning_rate": 4.252185945463616e-11, "loss": 0.288, "step": 53814 }, { "epoch": 0.9990977376607271, "grad_norm": 0.29901742935180664, "learning_rate": 3.918814785008884e-11, "loss": 0.1733, "step": 53816 }, { "epoch": 0.9991348677981458, "grad_norm": 0.46045032143592834, "learning_rate": 3.5990505760086226e-11, "loss": 0.2785, "step": 53818 }, { "epoch": 0.9991719979355643, "grad_norm": 0.22243620455265045, "learning_rate": 3.292893322570656e-11, "loss": 0.2369, "step": 53820 }, { "epoch": 0.999209128072983, "grad_norm": 0.3235505223274231, "learning_rate": 3.000343029135877e-11, "loss": 0.1959, "step": 53822 }, { "epoch": 0.9992462582104016, "grad_norm": 0.5686267614364624, "learning_rate": 2.7213996993680213e-11, "loss": 0.3031, "step": 53824 }, { "epoch": 0.9992833883478203, "grad_norm": 0.5182155966758728, "learning_rate": 2.456063337263892e-11, "loss": 0.3971, "step": 53826 }, { "epoch": 0.999320518485239, "grad_norm": 0.29292815923690796, "learning_rate": 2.2043339464872248e-11, "loss": 0.2562, "step": 53828 }, { "epoch": 0.9993576486226575, "grad_norm": 0.5193690657615662, "learning_rate": 1.9662115302576667e-11, "loss": 0.3402, "step": 53830 }, { "epoch": 0.9993947787600762, "grad_norm": 0.503534197807312, "learning_rate": 1.741696092016909e-11, "loss": 0.3503, "step": 53832 }, { "epoch": 0.9994319088974948, "grad_norm": 0.5535789728164673, "learning_rate": 1.5307876346515315e-11, "loss": 0.2963, "step": 53834 }, { "epoch": 0.9994690390349135, "grad_norm": 0.48619726300239563, "learning_rate": 1.3334861611591365e-11, "loss": 0.3453, "step": 53836 }, { "epoch": 0.9995061691723321, "grad_norm": 0.45374229550361633, "learning_rate": 1.1497916742042592e-11, "loss": 0.3414, "step": 53838 }, { "epoch": 0.9995432993097507, "grad_norm": 0.34518998861312866, "learning_rate": 9.797041761183678e-12, "loss": 0.2941, "step": 53840 }, { "epoch": 0.9995804294471694, "grad_norm": 0.37265685200691223, "learning_rate": 8.232236694549756e-12, "loss": 0.2017, "step": 53842 }, { "epoch": 0.999617559584588, "grad_norm": 0.7281001806259155, "learning_rate": 6.80350156323506e-12, "loss": 0.214, "step": 53844 }, { "epoch": 0.9996546897220067, "grad_norm": 0.3920292854309082, "learning_rate": 5.51083638500316e-12, "loss": 0.386, "step": 53846 }, { "epoch": 0.9996918198594253, "grad_norm": 0.46180394291877747, "learning_rate": 4.354241177617624e-12, "loss": 0.2327, "step": 53848 }, { "epoch": 0.9997289499968439, "grad_norm": 0.5219993591308594, "learning_rate": 3.333715958842021e-12, "loss": 0.1978, "step": 53850 }, { "epoch": 0.9997660801342626, "grad_norm": 0.420766681432724, "learning_rate": 2.4492607408888035e-12, "loss": 0.0943, "step": 53852 }, { "epoch": 0.9998032102716812, "grad_norm": 0.3140062093734741, "learning_rate": 1.7008755348602025e-12, "loss": 0.2727, "step": 53854 }, { "epoch": 0.9998403404090999, "grad_norm": 0.23232311010360718, "learning_rate": 1.0885603540788936e-12, "loss": 0.2094, "step": 53856 }, { "epoch": 0.9998774705465185, "grad_norm": 0.4728757441043854, "learning_rate": 6.123152040959923e-13, "loss": 0.2055, "step": 53858 }, { "epoch": 0.9999146006839371, "grad_norm": 0.24474041163921356, "learning_rate": 2.7214009268305974e-13, "loss": 0.196, "step": 53860 }, { "epoch": 0.9999517308213558, "grad_norm": 0.3812914490699768, "learning_rate": 6.803502317076493e-14, "loss": 0.2148, "step": 53862 }, { "epoch": 0.9999888609587744, "grad_norm": 0.2657621502876282, "learning_rate": 0.0, "loss": 0.2966, "step": 53864 }, { "epoch": 0.9999888609587744, "step": 53864, "total_flos": 4.172909200980178e+19, "train_loss": 0.30097950914072474, "train_runtime": 327326.476, "train_samples_per_second": 3.291, "train_steps_per_second": 0.165 } ], "logging_steps": 2, "max_steps": 53864, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 15000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.172909200980178e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }