{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 63428, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.576590780097118e-05, "grad_norm": 6136.448873477378, "learning_rate": 3.1496062992125985e-08, "loss": 84.6258, "step": 1 }, { "epoch": 0.0001576590780097118, "grad_norm": 3161.4555224240553, "learning_rate": 3.149606299212599e-07, "loss": 72.5296, "step": 10 }, { "epoch": 0.0003153181560194236, "grad_norm": 1114.5524729010065, "learning_rate": 6.299212598425198e-07, "loss": 54.4315, "step": 20 }, { "epoch": 0.0004729772340291354, "grad_norm": 645.5323541224077, "learning_rate": 9.448818897637796e-07, "loss": 43.284, "step": 30 }, { "epoch": 0.0006306363120388472, "grad_norm": 498.3970108427761, "learning_rate": 1.2598425196850396e-06, "loss": 39.0964, "step": 40 }, { "epoch": 0.000788295390048559, "grad_norm": 394.7435011914806, "learning_rate": 1.5748031496062992e-06, "loss": 36.6301, "step": 50 }, { "epoch": 0.0009459544680582708, "grad_norm": 354.84027487028345, "learning_rate": 1.8897637795275591e-06, "loss": 35.0133, "step": 60 }, { "epoch": 0.0011036135460679825, "grad_norm": 386.62413540905965, "learning_rate": 2.2047244094488192e-06, "loss": 33.2827, "step": 70 }, { "epoch": 0.0012612726240776944, "grad_norm": 359.87927578259485, "learning_rate": 2.519685039370079e-06, "loss": 32.6403, "step": 80 }, { "epoch": 0.0014189317020874062, "grad_norm": 367.8663362489142, "learning_rate": 2.8346456692913386e-06, "loss": 32.842, "step": 90 }, { "epoch": 0.001576590780097118, "grad_norm": 387.73100270686484, "learning_rate": 3.1496062992125985e-06, "loss": 32.4829, "step": 100 }, { "epoch": 0.0017342498581068297, "grad_norm": 356.5409243626625, "learning_rate": 3.4645669291338583e-06, "loss": 31.6143, "step": 110 }, { "epoch": 0.0018919089361165416, "grad_norm": 337.63945096262444, "learning_rate": 3.7795275590551182e-06, "loss": 31.4999, "step": 120 }, { "epoch": 0.0020495680141262534, "grad_norm": 371.2342743585285, "learning_rate": 4.0944881889763785e-06, "loss": 31.8041, "step": 130 }, { "epoch": 0.002207227092135965, "grad_norm": 356.11604484982104, "learning_rate": 4.4094488188976384e-06, "loss": 32.2964, "step": 140 }, { "epoch": 0.002364886170145677, "grad_norm": 345.1117745099858, "learning_rate": 4.724409448818898e-06, "loss": 31.8047, "step": 150 }, { "epoch": 0.0025225452481553888, "grad_norm": 348.80788979853213, "learning_rate": 5.039370078740158e-06, "loss": 32.2796, "step": 160 }, { "epoch": 0.0026802043261651004, "grad_norm": 361.5351858486409, "learning_rate": 5.354330708661418e-06, "loss": 31.9193, "step": 170 }, { "epoch": 0.0028378634041748125, "grad_norm": 384.57844733889215, "learning_rate": 5.669291338582677e-06, "loss": 32.0474, "step": 180 }, { "epoch": 0.002995522482184524, "grad_norm": 330.86847358728994, "learning_rate": 5.984251968503938e-06, "loss": 31.7381, "step": 190 }, { "epoch": 0.003153181560194236, "grad_norm": 344.3369473738352, "learning_rate": 6.299212598425197e-06, "loss": 32.3074, "step": 200 }, { "epoch": 0.003310840638203948, "grad_norm": 334.0015742118894, "learning_rate": 6.614173228346458e-06, "loss": 32.059, "step": 210 }, { "epoch": 0.0034684997162136594, "grad_norm": 348.42059438616525, "learning_rate": 6.929133858267717e-06, "loss": 32.7286, "step": 220 }, { "epoch": 0.0036261587942233715, "grad_norm": 370.8645434280134, "learning_rate": 7.2440944881889774e-06, "loss": 33.194, "step": 230 }, { "epoch": 0.003783817872233083, "grad_norm": 358.63877885290714, "learning_rate": 7.5590551181102365e-06, "loss": 32.5248, "step": 240 }, { "epoch": 0.003941476950242795, "grad_norm": 348.623528233752, "learning_rate": 7.874015748031496e-06, "loss": 33.59, "step": 250 }, { "epoch": 0.004099136028252507, "grad_norm": 382.59025185042793, "learning_rate": 8.188976377952757e-06, "loss": 33.5457, "step": 260 }, { "epoch": 0.0042567951062622185, "grad_norm": 346.95311346894295, "learning_rate": 8.503937007874016e-06, "loss": 34.2029, "step": 270 }, { "epoch": 0.00441445418427193, "grad_norm": 339.8810937496737, "learning_rate": 8.818897637795277e-06, "loss": 34.057, "step": 280 }, { "epoch": 0.004572113262281642, "grad_norm": 391.58592083965414, "learning_rate": 9.133858267716536e-06, "loss": 34.0145, "step": 290 }, { "epoch": 0.004729772340291354, "grad_norm": 348.21439972388964, "learning_rate": 9.448818897637797e-06, "loss": 33.7321, "step": 300 }, { "epoch": 0.004887431418301066, "grad_norm": 338.16549431526096, "learning_rate": 9.763779527559056e-06, "loss": 34.4547, "step": 310 }, { "epoch": 0.0050450904963107775, "grad_norm": 327.65345063605656, "learning_rate": 1.0078740157480316e-05, "loss": 34.8402, "step": 320 }, { "epoch": 0.005202749574320489, "grad_norm": 364.0018919156324, "learning_rate": 1.0393700787401575e-05, "loss": 35.7161, "step": 330 }, { "epoch": 0.005360408652330201, "grad_norm": 371.40899997633636, "learning_rate": 1.0708661417322836e-05, "loss": 34.927, "step": 340 }, { "epoch": 0.005518067730339913, "grad_norm": 378.4140191691348, "learning_rate": 1.1023622047244095e-05, "loss": 35.4069, "step": 350 }, { "epoch": 0.005675726808349625, "grad_norm": 416.05826095144647, "learning_rate": 1.1338582677165354e-05, "loss": 35.5483, "step": 360 }, { "epoch": 0.005833385886359337, "grad_norm": 335.1182819693213, "learning_rate": 1.1653543307086615e-05, "loss": 36.1456, "step": 370 }, { "epoch": 0.005991044964369048, "grad_norm": 381.194822577663, "learning_rate": 1.1968503937007876e-05, "loss": 36.264, "step": 380 }, { "epoch": 0.00614870404237876, "grad_norm": 425.80290442672924, "learning_rate": 1.2283464566929135e-05, "loss": 36.8141, "step": 390 }, { "epoch": 0.006306363120388472, "grad_norm": 465.7409853496373, "learning_rate": 1.2598425196850394e-05, "loss": 40.7431, "step": 400 }, { "epoch": 0.006464022198398184, "grad_norm": 361.8723674720942, "learning_rate": 1.2913385826771655e-05, "loss": 37.849, "step": 410 }, { "epoch": 0.006621681276407896, "grad_norm": 449.79474132772896, "learning_rate": 1.3228346456692915e-05, "loss": 36.757, "step": 420 }, { "epoch": 0.006779340354417607, "grad_norm": 365.9189310797654, "learning_rate": 1.3543307086614174e-05, "loss": 37.67, "step": 430 }, { "epoch": 0.006936999432427319, "grad_norm": 324.15337203845814, "learning_rate": 1.3858267716535433e-05, "loss": 38.4031, "step": 440 }, { "epoch": 0.0070946585104370305, "grad_norm": 346.46504115097224, "learning_rate": 1.4173228346456694e-05, "loss": 38.2098, "step": 450 }, { "epoch": 0.007252317588446743, "grad_norm": 313.38820253085686, "learning_rate": 1.4488188976377955e-05, "loss": 38.846, "step": 460 }, { "epoch": 0.007409976666456455, "grad_norm": 409.81356317220343, "learning_rate": 1.4803149606299214e-05, "loss": 39.9179, "step": 470 }, { "epoch": 0.007567635744466166, "grad_norm": 328.0614828957513, "learning_rate": 1.5118110236220473e-05, "loss": 40.0069, "step": 480 }, { "epoch": 0.007725294822475878, "grad_norm": 338.36335738625036, "learning_rate": 1.5433070866141734e-05, "loss": 40.1234, "step": 490 }, { "epoch": 0.00788295390048559, "grad_norm": 316.289219952385, "learning_rate": 1.5748031496062993e-05, "loss": 40.3979, "step": 500 }, { "epoch": 0.008040612978495301, "grad_norm": 371.08358155487036, "learning_rate": 1.6062992125984255e-05, "loss": 41.5509, "step": 510 }, { "epoch": 0.008198272056505014, "grad_norm": 346.418578886624, "learning_rate": 1.6377952755905514e-05, "loss": 42.0929, "step": 520 }, { "epoch": 0.008355931134514726, "grad_norm": 376.1110032211999, "learning_rate": 1.6692913385826773e-05, "loss": 41.1094, "step": 530 }, { "epoch": 0.008513590212524437, "grad_norm": 326.2207127609827, "learning_rate": 1.7007874015748032e-05, "loss": 41.7175, "step": 540 }, { "epoch": 0.00867124929053415, "grad_norm": 322.81883278987743, "learning_rate": 1.7322834645669295e-05, "loss": 41.8284, "step": 550 }, { "epoch": 0.00882890836854386, "grad_norm": 300.72705016748944, "learning_rate": 1.7637795275590554e-05, "loss": 42.4107, "step": 560 }, { "epoch": 0.008986567446553573, "grad_norm": 339.21075922654063, "learning_rate": 1.7952755905511813e-05, "loss": 43.6416, "step": 570 }, { "epoch": 0.009144226524563284, "grad_norm": 313.63666368931524, "learning_rate": 1.8267716535433072e-05, "loss": 43.748, "step": 580 }, { "epoch": 0.009301885602572996, "grad_norm": 341.931966599984, "learning_rate": 1.858267716535433e-05, "loss": 44.2815, "step": 590 }, { "epoch": 0.009459544680582709, "grad_norm": 374.9772387223308, "learning_rate": 1.8897637795275593e-05, "loss": 44.1742, "step": 600 }, { "epoch": 0.00961720375859242, "grad_norm": 324.33047969102665, "learning_rate": 1.9212598425196852e-05, "loss": 44.3532, "step": 610 }, { "epoch": 0.009774862836602132, "grad_norm": 302.7664673381459, "learning_rate": 1.952755905511811e-05, "loss": 44.175, "step": 620 }, { "epoch": 0.009932521914611843, "grad_norm": 329.6713522808945, "learning_rate": 1.984251968503937e-05, "loss": 45.7035, "step": 630 }, { "epoch": 0.010090180992621555, "grad_norm": 303.3520660322093, "learning_rate": 1.9999999687113166e-05, "loss": 46.2882, "step": 640 }, { "epoch": 0.010247840070631268, "grad_norm": 315.7783694578589, "learning_rate": 1.99999971840186e-05, "loss": 46.666, "step": 650 }, { "epoch": 0.010405499148640978, "grad_norm": 391.71666615763985, "learning_rate": 1.9999992177830084e-05, "loss": 48.9957, "step": 660 }, { "epoch": 0.01056315822665069, "grad_norm": 351.1395601988741, "learning_rate": 1.9999984668548883e-05, "loss": 47.4255, "step": 670 }, { "epoch": 0.010720817304660402, "grad_norm": 331.47711234394666, "learning_rate": 1.9999974656176875e-05, "loss": 47.5439, "step": 680 }, { "epoch": 0.010878476382670114, "grad_norm": 324.8069298330113, "learning_rate": 1.999996214071656e-05, "loss": 46.3133, "step": 690 }, { "epoch": 0.011036135460679827, "grad_norm": 321.2713655212137, "learning_rate": 1.999994712217108e-05, "loss": 46.9811, "step": 700 }, { "epoch": 0.011193794538689537, "grad_norm": 302.2472160013204, "learning_rate": 1.9999929600544182e-05, "loss": 46.0791, "step": 710 }, { "epoch": 0.01135145361669925, "grad_norm": 316.52056813645106, "learning_rate": 1.999990957584026e-05, "loss": 46.4621, "step": 720 }, { "epoch": 0.01150911269470896, "grad_norm": 538.7369436744099, "learning_rate": 1.9999887048064325e-05, "loss": 45.7207, "step": 730 }, { "epoch": 0.011666771772718673, "grad_norm": 287.4711324236869, "learning_rate": 1.999986201722202e-05, "loss": 47.6132, "step": 740 }, { "epoch": 0.011824430850728386, "grad_norm": 688.0557993227912, "learning_rate": 1.99998344833196e-05, "loss": 48.8556, "step": 750 }, { "epoch": 0.011982089928738096, "grad_norm": 336.08769573968226, "learning_rate": 1.9999804446363966e-05, "loss": 46.6399, "step": 760 }, { "epoch": 0.012139749006747809, "grad_norm": 277.37932713098746, "learning_rate": 1.9999771906362633e-05, "loss": 46.7161, "step": 770 }, { "epoch": 0.01229740808475752, "grad_norm": 288.69030270400117, "learning_rate": 1.999973686332375e-05, "loss": 47.3196, "step": 780 }, { "epoch": 0.012455067162767232, "grad_norm": 336.97857541433876, "learning_rate": 1.9999699317256084e-05, "loss": 47.6121, "step": 790 }, { "epoch": 0.012612726240776945, "grad_norm": 376.32746894013934, "learning_rate": 1.9999659268169034e-05, "loss": 48.3939, "step": 800 }, { "epoch": 0.012770385318786655, "grad_norm": 368.3575262348943, "learning_rate": 1.9999616716072622e-05, "loss": 52.5304, "step": 810 }, { "epoch": 0.012928044396796368, "grad_norm": 1278.07362197743, "learning_rate": 1.9999571660977507e-05, "loss": 50.3874, "step": 820 }, { "epoch": 0.013085703474806079, "grad_norm": 308.254512444898, "learning_rate": 1.999952410289496e-05, "loss": 47.887, "step": 830 }, { "epoch": 0.013243362552815791, "grad_norm": 296.27579548693177, "learning_rate": 1.9999474041836885e-05, "loss": 49.5803, "step": 840 }, { "epoch": 0.013401021630825504, "grad_norm": 319.133688104285, "learning_rate": 1.999942147781582e-05, "loss": 46.6842, "step": 850 }, { "epoch": 0.013558680708835215, "grad_norm": 379.6412150204057, "learning_rate": 1.9999366410844912e-05, "loss": 47.9732, "step": 860 }, { "epoch": 0.013716339786844927, "grad_norm": 291.98747061917146, "learning_rate": 1.9999308840937958e-05, "loss": 46.8928, "step": 870 }, { "epoch": 0.013873998864854638, "grad_norm": 317.95284413889567, "learning_rate": 1.9999248768109353e-05, "loss": 48.9627, "step": 880 }, { "epoch": 0.01403165794286435, "grad_norm": 395.61344423534945, "learning_rate": 1.9999186192374144e-05, "loss": 48.3399, "step": 890 }, { "epoch": 0.014189317020874061, "grad_norm": 282.43015653898834, "learning_rate": 1.9999121113747994e-05, "loss": 46.9062, "step": 900 }, { "epoch": 0.014346976098883774, "grad_norm": 281.80049050390414, "learning_rate": 1.9999053532247187e-05, "loss": 47.5105, "step": 910 }, { "epoch": 0.014504635176893486, "grad_norm": 273.74423295470865, "learning_rate": 1.9998983447888644e-05, "loss": 47.5068, "step": 920 }, { "epoch": 0.014662294254903197, "grad_norm": 263.82840240668577, "learning_rate": 1.9998910860689907e-05, "loss": 47.3946, "step": 930 }, { "epoch": 0.01481995333291291, "grad_norm": 262.2052588228915, "learning_rate": 1.999883577066914e-05, "loss": 47.8084, "step": 940 }, { "epoch": 0.01497761241092262, "grad_norm": 330.2385996236281, "learning_rate": 1.999875817784515e-05, "loss": 48.1465, "step": 950 }, { "epoch": 0.015135271488932333, "grad_norm": 309.17015460393037, "learning_rate": 1.9998678082237347e-05, "loss": 49.5961, "step": 960 }, { "epoch": 0.015292930566942045, "grad_norm": 290.9401002572036, "learning_rate": 1.9998595483865787e-05, "loss": 48.4621, "step": 970 }, { "epoch": 0.015450589644951756, "grad_norm": 323.8662724340775, "learning_rate": 1.999851038275115e-05, "loss": 47.1934, "step": 980 }, { "epoch": 0.015608248722961468, "grad_norm": 256.4790663315771, "learning_rate": 1.9998422778914722e-05, "loss": 47.4243, "step": 990 }, { "epoch": 0.01576590780097118, "grad_norm": 344.38002378602965, "learning_rate": 1.9998332672378445e-05, "loss": 48.2011, "step": 1000 }, { "epoch": 0.01592356687898089, "grad_norm": 299.515431248553, "learning_rate": 1.999824006316487e-05, "loss": 48.0474, "step": 1010 }, { "epoch": 0.016081225956990602, "grad_norm": 297.2913958325348, "learning_rate": 1.999814495129717e-05, "loss": 47.5725, "step": 1020 }, { "epoch": 0.016238885035000317, "grad_norm": 258.13588733090876, "learning_rate": 1.9998047336799164e-05, "loss": 46.3174, "step": 1030 }, { "epoch": 0.016396544113010027, "grad_norm": 270.2213693530075, "learning_rate": 1.999794721969528e-05, "loss": 47.5647, "step": 1040 }, { "epoch": 0.016554203191019738, "grad_norm": 268.88604970189544, "learning_rate": 1.9997844600010578e-05, "loss": 48.0002, "step": 1050 }, { "epoch": 0.016711862269029452, "grad_norm": 273.05730836910175, "learning_rate": 1.999773947777075e-05, "loss": 47.044, "step": 1060 }, { "epoch": 0.016869521347039163, "grad_norm": 286.7332619701638, "learning_rate": 1.99976318530021e-05, "loss": 48.6883, "step": 1070 }, { "epoch": 0.017027180425048874, "grad_norm": 283.8763785951342, "learning_rate": 1.999752172573158e-05, "loss": 47.1762, "step": 1080 }, { "epoch": 0.017184839503058585, "grad_norm": 313.39476435451144, "learning_rate": 1.999740909598674e-05, "loss": 48.7136, "step": 1090 }, { "epoch": 0.0173424985810683, "grad_norm": 254.9804885007702, "learning_rate": 1.9997293963795782e-05, "loss": 46.8325, "step": 1100 }, { "epoch": 0.01750015765907801, "grad_norm": 255.0749199476409, "learning_rate": 1.9997176329187524e-05, "loss": 46.9065, "step": 1110 }, { "epoch": 0.01765781673708772, "grad_norm": 271.83534881667657, "learning_rate": 1.999705619219141e-05, "loss": 47.5617, "step": 1120 }, { "epoch": 0.017815475815097435, "grad_norm": 259.00083222137283, "learning_rate": 1.9996933552837515e-05, "loss": 48.3295, "step": 1130 }, { "epoch": 0.017973134893107146, "grad_norm": 257.4775618005067, "learning_rate": 1.9996808411156528e-05, "loss": 46.9299, "step": 1140 }, { "epoch": 0.018130793971116856, "grad_norm": 255.62930494337695, "learning_rate": 1.9996680767179787e-05, "loss": 47.3899, "step": 1150 }, { "epoch": 0.018288453049126567, "grad_norm": 245.05504266323982, "learning_rate": 1.9996550620939228e-05, "loss": 47.9785, "step": 1160 }, { "epoch": 0.01844611212713628, "grad_norm": 273.47380954029177, "learning_rate": 1.9996417972467434e-05, "loss": 48.6639, "step": 1170 }, { "epoch": 0.018603771205145992, "grad_norm": 400.856970379522, "learning_rate": 1.999628282179761e-05, "loss": 48.7376, "step": 1180 }, { "epoch": 0.018761430283155703, "grad_norm": 261.6162933678833, "learning_rate": 1.999614516896358e-05, "loss": 47.0069, "step": 1190 }, { "epoch": 0.018919089361165417, "grad_norm": 561.6533145553475, "learning_rate": 1.9996005013999808e-05, "loss": 47.7094, "step": 1200 }, { "epoch": 0.019076748439175128, "grad_norm": 258.9300652675039, "learning_rate": 1.999586235694137e-05, "loss": 47.0054, "step": 1210 }, { "epoch": 0.01923440751718484, "grad_norm": 268.31523123031684, "learning_rate": 1.9995717197823978e-05, "loss": 47.293, "step": 1220 }, { "epoch": 0.019392066595194553, "grad_norm": 280.7876609822523, "learning_rate": 1.9995569536683962e-05, "loss": 47.5284, "step": 1230 }, { "epoch": 0.019549725673204264, "grad_norm": 245.6584835078672, "learning_rate": 1.9995419373558288e-05, "loss": 47.5203, "step": 1240 }, { "epoch": 0.019707384751213974, "grad_norm": 268.74908025319957, "learning_rate": 1.999526670848454e-05, "loss": 47.373, "step": 1250 }, { "epoch": 0.019865043829223685, "grad_norm": 277.79059405072843, "learning_rate": 1.999511154150093e-05, "loss": 47.0457, "step": 1260 }, { "epoch": 0.0200227029072334, "grad_norm": 261.15863346901426, "learning_rate": 1.9994953872646304e-05, "loss": 46.619, "step": 1270 }, { "epoch": 0.02018036198524311, "grad_norm": 245.48209562729426, "learning_rate": 1.999479370196012e-05, "loss": 46.977, "step": 1280 }, { "epoch": 0.02033802106325282, "grad_norm": 283.4970933954463, "learning_rate": 1.9994631029482477e-05, "loss": 47.4848, "step": 1290 }, { "epoch": 0.020495680141262535, "grad_norm": 237.48764125526412, "learning_rate": 1.9994465855254094e-05, "loss": 45.8869, "step": 1300 }, { "epoch": 0.020653339219272246, "grad_norm": 269.12608447255576, "learning_rate": 1.999429817931631e-05, "loss": 46.7642, "step": 1310 }, { "epoch": 0.020810998297281957, "grad_norm": 262.7718626827125, "learning_rate": 1.9994128001711097e-05, "loss": 47.3484, "step": 1320 }, { "epoch": 0.02096865737529167, "grad_norm": 270.96813856875434, "learning_rate": 1.9993955322481054e-05, "loss": 46.6908, "step": 1330 }, { "epoch": 0.02112631645330138, "grad_norm": 249.07619182982148, "learning_rate": 1.9993780141669405e-05, "loss": 47.5962, "step": 1340 }, { "epoch": 0.021283975531311092, "grad_norm": 245.66159843964525, "learning_rate": 1.9993602459319997e-05, "loss": 46.957, "step": 1350 }, { "epoch": 0.021441634609320803, "grad_norm": 257.05482888992105, "learning_rate": 1.999342227547731e-05, "loss": 49.9269, "step": 1360 }, { "epoch": 0.021599293687330517, "grad_norm": 284.7002868084265, "learning_rate": 1.999323959018644e-05, "loss": 48.3095, "step": 1370 }, { "epoch": 0.021756952765340228, "grad_norm": 234.725923886284, "learning_rate": 1.9993054403493114e-05, "loss": 48.6381, "step": 1380 }, { "epoch": 0.02191461184334994, "grad_norm": 232.8141583419383, "learning_rate": 1.9992866715443695e-05, "loss": 47.7242, "step": 1390 }, { "epoch": 0.022072270921359653, "grad_norm": 287.41000205753465, "learning_rate": 1.9992676526085154e-05, "loss": 47.7324, "step": 1400 }, { "epoch": 0.022229929999369364, "grad_norm": 233.9061967626706, "learning_rate": 1.9992483835465105e-05, "loss": 46.2357, "step": 1410 }, { "epoch": 0.022387589077379075, "grad_norm": 232.03417045640137, "learning_rate": 1.9992288643631773e-05, "loss": 46.1043, "step": 1420 }, { "epoch": 0.02254524815538879, "grad_norm": 235.21224500515225, "learning_rate": 1.999209095063402e-05, "loss": 47.5046, "step": 1430 }, { "epoch": 0.0227029072333985, "grad_norm": 360.8072765170084, "learning_rate": 1.9991890756521327e-05, "loss": 46.3222, "step": 1440 }, { "epoch": 0.02286056631140821, "grad_norm": 262.68808262754607, "learning_rate": 1.9991688061343807e-05, "loss": 47.949, "step": 1450 }, { "epoch": 0.02301822538941792, "grad_norm": 263.54200441351935, "learning_rate": 1.9991482865152202e-05, "loss": 45.9398, "step": 1460 }, { "epoch": 0.023175884467427636, "grad_norm": 254.70861565961832, "learning_rate": 1.9991275167997863e-05, "loss": 46.0417, "step": 1470 }, { "epoch": 0.023333543545437346, "grad_norm": 233.39304143998672, "learning_rate": 1.999106496993279e-05, "loss": 46.436, "step": 1480 }, { "epoch": 0.023491202623447057, "grad_norm": 265.2248130716645, "learning_rate": 1.9990852271009588e-05, "loss": 47.5151, "step": 1490 }, { "epoch": 0.02364886170145677, "grad_norm": 274.76376009543526, "learning_rate": 1.9990637071281505e-05, "loss": 47.9234, "step": 1500 }, { "epoch": 0.023806520779466482, "grad_norm": 235.91620136739724, "learning_rate": 1.9990419370802404e-05, "loss": 46.894, "step": 1510 }, { "epoch": 0.023964179857476193, "grad_norm": 243.67488545074977, "learning_rate": 1.9990199169626775e-05, "loss": 46.9501, "step": 1520 }, { "epoch": 0.024121838935485904, "grad_norm": 241.99502467065454, "learning_rate": 1.9989976467809745e-05, "loss": 46.261, "step": 1530 }, { "epoch": 0.024279498013495618, "grad_norm": 254.26723647416142, "learning_rate": 1.998975126540705e-05, "loss": 46.7328, "step": 1540 }, { "epoch": 0.02443715709150533, "grad_norm": 254.61351776829153, "learning_rate": 1.9989523562475063e-05, "loss": 46.4496, "step": 1550 }, { "epoch": 0.02459481616951504, "grad_norm": 228.6703797843249, "learning_rate": 1.9989293359070778e-05, "loss": 46.529, "step": 1560 }, { "epoch": 0.024752475247524754, "grad_norm": 248.432780717712, "learning_rate": 1.9989060655251822e-05, "loss": 47.3409, "step": 1570 }, { "epoch": 0.024910134325534464, "grad_norm": 252.89447827725152, "learning_rate": 1.998882545107644e-05, "loss": 46.8765, "step": 1580 }, { "epoch": 0.025067793403544175, "grad_norm": 222.94329197453303, "learning_rate": 1.9988587746603508e-05, "loss": 47.2395, "step": 1590 }, { "epoch": 0.02522545248155389, "grad_norm": 227.74158893098507, "learning_rate": 1.998834754189252e-05, "loss": 46.4685, "step": 1600 }, { "epoch": 0.0253831115595636, "grad_norm": 243.95109641045937, "learning_rate": 1.9988104837003606e-05, "loss": 48.7543, "step": 1610 }, { "epoch": 0.02554077063757331, "grad_norm": 230.00844782570252, "learning_rate": 1.998785963199752e-05, "loss": 47.2675, "step": 1620 }, { "epoch": 0.02569842971558302, "grad_norm": 241.28191834721073, "learning_rate": 1.9987611926935634e-05, "loss": 48.6255, "step": 1630 }, { "epoch": 0.025856088793592736, "grad_norm": 221.12880447384313, "learning_rate": 1.998736172187995e-05, "loss": 47.186, "step": 1640 }, { "epoch": 0.026013747871602447, "grad_norm": 235.11302827586607, "learning_rate": 1.9987109016893105e-05, "loss": 47.5916, "step": 1650 }, { "epoch": 0.026171406949612157, "grad_norm": 231.20330211074648, "learning_rate": 1.9986853812038345e-05, "loss": 47.0501, "step": 1660 }, { "epoch": 0.02632906602762187, "grad_norm": 233.88695083171888, "learning_rate": 1.998659610737955e-05, "loss": 46.8973, "step": 1670 }, { "epoch": 0.026486725105631582, "grad_norm": 235.57181026432468, "learning_rate": 1.9986335902981233e-05, "loss": 46.4802, "step": 1680 }, { "epoch": 0.026644384183641293, "grad_norm": 238.85852849626113, "learning_rate": 1.998607319890852e-05, "loss": 46.6761, "step": 1690 }, { "epoch": 0.026802043261651008, "grad_norm": 386.16229827923667, "learning_rate": 1.9985807995227172e-05, "loss": 45.3553, "step": 1700 }, { "epoch": 0.02695970233966072, "grad_norm": 271.88150061009276, "learning_rate": 1.998554029200357e-05, "loss": 46.3471, "step": 1710 }, { "epoch": 0.02711736141767043, "grad_norm": 299.98955159951174, "learning_rate": 1.998527008930472e-05, "loss": 45.5774, "step": 1720 }, { "epoch": 0.02727502049568014, "grad_norm": 243.32323925570805, "learning_rate": 1.9984997387198263e-05, "loss": 45.4532, "step": 1730 }, { "epoch": 0.027432679573689854, "grad_norm": 339.2908673316229, "learning_rate": 1.9984722185752447e-05, "loss": 47.4071, "step": 1740 }, { "epoch": 0.027590338651699565, "grad_norm": 258.2542923725232, "learning_rate": 1.9984444485036173e-05, "loss": 46.5614, "step": 1750 }, { "epoch": 0.027747997729709276, "grad_norm": 257.83664648977555, "learning_rate": 1.998416428511894e-05, "loss": 47.0513, "step": 1760 }, { "epoch": 0.02790565680771899, "grad_norm": 221.76525959064, "learning_rate": 1.9983881586070893e-05, "loss": 45.5975, "step": 1770 }, { "epoch": 0.0280633158857287, "grad_norm": 216.4032178414288, "learning_rate": 1.9983596387962786e-05, "loss": 45.5731, "step": 1780 }, { "epoch": 0.02822097496373841, "grad_norm": 250.54506700345803, "learning_rate": 1.9983308690866012e-05, "loss": 45.1819, "step": 1790 }, { "epoch": 0.028378634041748122, "grad_norm": 249.76075133255682, "learning_rate": 1.9983018494852583e-05, "loss": 46.9977, "step": 1800 }, { "epoch": 0.028536293119757836, "grad_norm": 227.47969866857775, "learning_rate": 1.998272579999514e-05, "loss": 46.136, "step": 1810 }, { "epoch": 0.028693952197767547, "grad_norm": 728.4059272465186, "learning_rate": 1.9982430606366945e-05, "loss": 45.3594, "step": 1820 }, { "epoch": 0.028851611275777258, "grad_norm": 227.35267403482263, "learning_rate": 1.998213291404189e-05, "loss": 45.6378, "step": 1830 }, { "epoch": 0.029009270353786972, "grad_norm": 253.2923565700324, "learning_rate": 1.998183272309449e-05, "loss": 45.0908, "step": 1840 }, { "epoch": 0.029166929431796683, "grad_norm": 227.85207716958524, "learning_rate": 1.998153003359988e-05, "loss": 45.9278, "step": 1850 }, { "epoch": 0.029324588509806394, "grad_norm": 209.23435394115927, "learning_rate": 1.9981224845633832e-05, "loss": 45.3033, "step": 1860 }, { "epoch": 0.029482247587816108, "grad_norm": 1571.6335332474105, "learning_rate": 1.9980917159272735e-05, "loss": 47.2025, "step": 1870 }, { "epoch": 0.02963990666582582, "grad_norm": 223.23511280245933, "learning_rate": 1.9980606974593605e-05, "loss": 46.9106, "step": 1880 }, { "epoch": 0.02979756574383553, "grad_norm": 227.63154465924143, "learning_rate": 1.9980294291674087e-05, "loss": 45.6183, "step": 1890 }, { "epoch": 0.02995522482184524, "grad_norm": 227.8790945612638, "learning_rate": 1.9979979110592448e-05, "loss": 45.6081, "step": 1900 }, { "epoch": 0.030112883899854954, "grad_norm": 250.2993425051767, "learning_rate": 1.9979661431427583e-05, "loss": 46.0521, "step": 1910 }, { "epoch": 0.030270542977864665, "grad_norm": 468.8547203077812, "learning_rate": 1.9979341254259e-05, "loss": 46.4701, "step": 1920 }, { "epoch": 0.030428202055874376, "grad_norm": 224.9640675936819, "learning_rate": 1.9979018579166854e-05, "loss": 44.9141, "step": 1930 }, { "epoch": 0.03058586113388409, "grad_norm": 224.34258896754744, "learning_rate": 1.997869340623191e-05, "loss": 46.1389, "step": 1940 }, { "epoch": 0.0307435202118938, "grad_norm": 237.86158578552917, "learning_rate": 1.9978365735535557e-05, "loss": 46.7765, "step": 1950 }, { "epoch": 0.030901179289903512, "grad_norm": 220.0663621222628, "learning_rate": 1.997803556715982e-05, "loss": 45.3912, "step": 1960 }, { "epoch": 0.031058838367913226, "grad_norm": 237.9529737704678, "learning_rate": 1.997770290118734e-05, "loss": 45.8004, "step": 1970 }, { "epoch": 0.031216497445922937, "grad_norm": 209.54573889550915, "learning_rate": 1.997736773770139e-05, "loss": 45.5533, "step": 1980 }, { "epoch": 0.03137415652393265, "grad_norm": 467.62947673008995, "learning_rate": 1.9977030076785858e-05, "loss": 45.9957, "step": 1990 }, { "epoch": 0.03153181560194236, "grad_norm": 253.84111339976454, "learning_rate": 1.997668991852527e-05, "loss": 44.7353, "step": 2000 }, { "epoch": 0.03168947467995207, "grad_norm": 261.22662452767264, "learning_rate": 1.9976347263004773e-05, "loss": 45.6616, "step": 2010 }, { "epoch": 0.03184713375796178, "grad_norm": 299.9799888055782, "learning_rate": 1.9976002110310126e-05, "loss": 45.9098, "step": 2020 }, { "epoch": 0.0320047928359715, "grad_norm": 219.0581268042121, "learning_rate": 1.9975654460527734e-05, "loss": 45.2352, "step": 2030 }, { "epoch": 0.032162451913981205, "grad_norm": 204.20240005627628, "learning_rate": 1.9975304313744615e-05, "loss": 44.3001, "step": 2040 }, { "epoch": 0.03232011099199092, "grad_norm": 216.28144549360366, "learning_rate": 1.9974951670048412e-05, "loss": 46.212, "step": 2050 }, { "epoch": 0.03247777007000063, "grad_norm": 233.32009140692045, "learning_rate": 1.9974596529527396e-05, "loss": 45.2551, "step": 2060 }, { "epoch": 0.03263542914801034, "grad_norm": 238.0135608725938, "learning_rate": 1.9974238892270463e-05, "loss": 45.6654, "step": 2070 }, { "epoch": 0.032793088226020055, "grad_norm": 246.20258852926298, "learning_rate": 1.997387875836713e-05, "loss": 45.1872, "step": 2080 }, { "epoch": 0.03295074730402977, "grad_norm": 234.67379492479589, "learning_rate": 1.9973516127907543e-05, "loss": 45.689, "step": 2090 }, { "epoch": 0.033108406382039476, "grad_norm": 236.98496118174086, "learning_rate": 1.9973151000982477e-05, "loss": 45.4973, "step": 2100 }, { "epoch": 0.03326606546004919, "grad_norm": 209.63535119497232, "learning_rate": 1.9972783377683317e-05, "loss": 44.9182, "step": 2110 }, { "epoch": 0.033423724538058905, "grad_norm": 239.2054762066021, "learning_rate": 1.997241325810209e-05, "loss": 46.0567, "step": 2120 }, { "epoch": 0.03358138361606861, "grad_norm": 215.0434943730801, "learning_rate": 1.9972040642331437e-05, "loss": 45.4237, "step": 2130 }, { "epoch": 0.033739042694078326, "grad_norm": 222.57911045575776, "learning_rate": 1.9971665530464634e-05, "loss": 45.0018, "step": 2140 }, { "epoch": 0.033896701772088034, "grad_norm": 199.7701104280177, "learning_rate": 1.9971287922595565e-05, "loss": 45.2201, "step": 2150 }, { "epoch": 0.03405436085009775, "grad_norm": 208.75335807831638, "learning_rate": 1.9970907818818755e-05, "loss": 45.8789, "step": 2160 }, { "epoch": 0.03421201992810746, "grad_norm": 215.98816136398617, "learning_rate": 1.9970525219229345e-05, "loss": 46.3931, "step": 2170 }, { "epoch": 0.03436967900611717, "grad_norm": 213.3492682780177, "learning_rate": 1.9970140123923105e-05, "loss": 44.6687, "step": 2180 }, { "epoch": 0.034527338084126884, "grad_norm": 214.33585320065177, "learning_rate": 1.9969752532996426e-05, "loss": 46.275, "step": 2190 }, { "epoch": 0.0346849971621366, "grad_norm": 209.40922436374785, "learning_rate": 1.996936244654633e-05, "loss": 44.8888, "step": 2200 }, { "epoch": 0.034842656240146305, "grad_norm": 236.852533384014, "learning_rate": 1.9968969864670457e-05, "loss": 46.6204, "step": 2210 }, { "epoch": 0.03500031531815602, "grad_norm": 200.96099006736105, "learning_rate": 1.996857478746707e-05, "loss": 46.4489, "step": 2220 }, { "epoch": 0.035157974396165734, "grad_norm": 202.63552940180483, "learning_rate": 1.9968177215035065e-05, "loss": 45.5926, "step": 2230 }, { "epoch": 0.03531563347417544, "grad_norm": 220.50825094055014, "learning_rate": 1.9967777147473955e-05, "loss": 44.9733, "step": 2240 }, { "epoch": 0.035473292552185155, "grad_norm": 213.47875268758966, "learning_rate": 1.9967374584883887e-05, "loss": 45.5409, "step": 2250 }, { "epoch": 0.03563095163019487, "grad_norm": 204.00238795970995, "learning_rate": 1.996696952736562e-05, "loss": 44.4812, "step": 2260 }, { "epoch": 0.03578861070820458, "grad_norm": 217.18834952270973, "learning_rate": 1.9966561975020547e-05, "loss": 45.2716, "step": 2270 }, { "epoch": 0.03594626978621429, "grad_norm": 210.7890761543121, "learning_rate": 1.996615192795068e-05, "loss": 44.001, "step": 2280 }, { "epoch": 0.036103928864224005, "grad_norm": 216.90005936628432, "learning_rate": 1.9965739386258656e-05, "loss": 44.7287, "step": 2290 }, { "epoch": 0.03626158794223371, "grad_norm": 213.00596222704007, "learning_rate": 1.9965324350047742e-05, "loss": 44.5438, "step": 2300 }, { "epoch": 0.03641924702024343, "grad_norm": 198.77483733127877, "learning_rate": 1.9964906819421827e-05, "loss": 44.6339, "step": 2310 }, { "epoch": 0.036576906098253134, "grad_norm": 221.6349711581708, "learning_rate": 1.996448679448542e-05, "loss": 44.8778, "step": 2320 }, { "epoch": 0.03673456517626285, "grad_norm": 212.6690657118566, "learning_rate": 1.9964064275343654e-05, "loss": 43.6158, "step": 2330 }, { "epoch": 0.03689222425427256, "grad_norm": 208.16899526069577, "learning_rate": 1.9963639262102294e-05, "loss": 44.428, "step": 2340 }, { "epoch": 0.03704988333228227, "grad_norm": 221.08999631187814, "learning_rate": 1.9963211754867725e-05, "loss": 44.1053, "step": 2350 }, { "epoch": 0.037207542410291984, "grad_norm": 222.2640915285751, "learning_rate": 1.996278175374695e-05, "loss": 44.8349, "step": 2360 }, { "epoch": 0.0373652014883017, "grad_norm": 208.090814505668, "learning_rate": 1.996234925884761e-05, "loss": 44.639, "step": 2370 }, { "epoch": 0.037522860566311406, "grad_norm": 216.58567060998084, "learning_rate": 1.9961914270277965e-05, "loss": 43.8202, "step": 2380 }, { "epoch": 0.03768051964432112, "grad_norm": 193.5555620897855, "learning_rate": 1.9961476788146888e-05, "loss": 44.1095, "step": 2390 }, { "epoch": 0.037838178722330834, "grad_norm": 216.27277223971782, "learning_rate": 1.9961036812563886e-05, "loss": 43.7823, "step": 2400 }, { "epoch": 0.03799583780034054, "grad_norm": 227.5884767278815, "learning_rate": 1.9960594343639092e-05, "loss": 43.4106, "step": 2410 }, { "epoch": 0.038153496878350256, "grad_norm": 221.9716042084233, "learning_rate": 1.996014938148326e-05, "loss": 43.4134, "step": 2420 }, { "epoch": 0.03831115595635997, "grad_norm": 202.351333059179, "learning_rate": 1.9959701926207767e-05, "loss": 46.326, "step": 2430 }, { "epoch": 0.03846881503436968, "grad_norm": 209.27217409288212, "learning_rate": 1.995925197792462e-05, "loss": 44.7008, "step": 2440 }, { "epoch": 0.03862647411237939, "grad_norm": 190.65821798971487, "learning_rate": 1.995879953674644e-05, "loss": 44.6003, "step": 2450 }, { "epoch": 0.038784133190389106, "grad_norm": 194.82380400322714, "learning_rate": 1.9958344602786476e-05, "loss": 44.6011, "step": 2460 }, { "epoch": 0.03894179226839881, "grad_norm": 199.23554221211154, "learning_rate": 1.9957887176158603e-05, "loss": 44.3742, "step": 2470 }, { "epoch": 0.03909945134640853, "grad_norm": 210.38206448945914, "learning_rate": 1.9957427256977325e-05, "loss": 44.2413, "step": 2480 }, { "epoch": 0.03925711042441824, "grad_norm": 365.9917629326284, "learning_rate": 1.9956964845357764e-05, "loss": 43.9806, "step": 2490 }, { "epoch": 0.03941476950242795, "grad_norm": 214.61213132570916, "learning_rate": 1.9956499941415655e-05, "loss": 44.4918, "step": 2500 }, { "epoch": 0.03957242858043766, "grad_norm": 218.68423727233593, "learning_rate": 1.9956032545267378e-05, "loss": 44.6514, "step": 2510 }, { "epoch": 0.03973008765844737, "grad_norm": 199.42834963440225, "learning_rate": 1.9955562657029927e-05, "loss": 44.7988, "step": 2520 }, { "epoch": 0.039887746736457085, "grad_norm": 208.2076391183765, "learning_rate": 1.9955090276820912e-05, "loss": 44.525, "step": 2530 }, { "epoch": 0.0400454058144668, "grad_norm": 239.94796220258343, "learning_rate": 1.995461540475858e-05, "loss": 44.3914, "step": 2540 }, { "epoch": 0.040203064892476506, "grad_norm": 211.1659130064914, "learning_rate": 1.9954138040961795e-05, "loss": 45.0692, "step": 2550 }, { "epoch": 0.04036072397048622, "grad_norm": 195.01735782789635, "learning_rate": 1.9953658185550045e-05, "loss": 44.5151, "step": 2560 }, { "epoch": 0.040518383048495935, "grad_norm": 208.03662199169545, "learning_rate": 1.995317583864344e-05, "loss": 43.9539, "step": 2570 }, { "epoch": 0.04067604212650564, "grad_norm": 200.50469552591272, "learning_rate": 1.995269100036272e-05, "loss": 44.0426, "step": 2580 }, { "epoch": 0.040833701204515356, "grad_norm": 202.7122159485502, "learning_rate": 1.995220367082925e-05, "loss": 44.08, "step": 2590 }, { "epoch": 0.04099136028252507, "grad_norm": 206.29506426654214, "learning_rate": 1.9951713850164997e-05, "loss": 43.4858, "step": 2600 }, { "epoch": 0.04114901936053478, "grad_norm": 202.92333439886332, "learning_rate": 1.995122153849258e-05, "loss": 44.0071, "step": 2610 }, { "epoch": 0.04130667843854449, "grad_norm": 205.36524643157998, "learning_rate": 1.9950726735935228e-05, "loss": 43.4913, "step": 2620 }, { "epoch": 0.041464337516554206, "grad_norm": 209.2151019110018, "learning_rate": 1.9950229442616792e-05, "loss": 43.7187, "step": 2630 }, { "epoch": 0.04162199659456391, "grad_norm": 200.67901838435708, "learning_rate": 1.9949729658661753e-05, "loss": 44.7599, "step": 2640 }, { "epoch": 0.04177965567257363, "grad_norm": 197.28661061765845, "learning_rate": 1.9949227384195208e-05, "loss": 43.8445, "step": 2650 }, { "epoch": 0.04193731475058334, "grad_norm": 205.0384155037315, "learning_rate": 1.994872261934288e-05, "loss": 44.2239, "step": 2660 }, { "epoch": 0.04209497382859305, "grad_norm": 197.59897572319912, "learning_rate": 1.9948215364231117e-05, "loss": 45.002, "step": 2670 }, { "epoch": 0.04225263290660276, "grad_norm": 196.0792875241443, "learning_rate": 1.9947705618986896e-05, "loss": 43.6254, "step": 2680 }, { "epoch": 0.04241029198461247, "grad_norm": 207.2803860298778, "learning_rate": 1.9947193383737803e-05, "loss": 43.61, "step": 2690 }, { "epoch": 0.042567951062622185, "grad_norm": 207.2443855899817, "learning_rate": 1.9946678658612057e-05, "loss": 43.3428, "step": 2700 }, { "epoch": 0.0427256101406319, "grad_norm": 184.87526463229156, "learning_rate": 1.9946161443738507e-05, "loss": 44.4851, "step": 2710 }, { "epoch": 0.042883269218641606, "grad_norm": 209.36132651639596, "learning_rate": 1.9945641739246605e-05, "loss": 43.9448, "step": 2720 }, { "epoch": 0.04304092829665132, "grad_norm": 197.0658317343903, "learning_rate": 1.9945119545266443e-05, "loss": 43.598, "step": 2730 }, { "epoch": 0.043198587374661035, "grad_norm": 201.99209797495428, "learning_rate": 1.9944594861928732e-05, "loss": 43.7288, "step": 2740 }, { "epoch": 0.04335624645267074, "grad_norm": 194.46003861137666, "learning_rate": 1.9944067689364802e-05, "loss": 43.7224, "step": 2750 }, { "epoch": 0.043513905530680456, "grad_norm": 192.39018922719353, "learning_rate": 1.9943538027706614e-05, "loss": 43.7135, "step": 2760 }, { "epoch": 0.04367156460869017, "grad_norm": 192.18446313471654, "learning_rate": 1.9943005877086746e-05, "loss": 43.8843, "step": 2770 }, { "epoch": 0.04382922368669988, "grad_norm": 195.23312757600544, "learning_rate": 1.99424712376384e-05, "loss": 44.6569, "step": 2780 }, { "epoch": 0.04398688276470959, "grad_norm": 188.0275626710642, "learning_rate": 1.9941934109495396e-05, "loss": 43.4028, "step": 2790 }, { "epoch": 0.044144541842719306, "grad_norm": 202.427289670092, "learning_rate": 1.994139449279219e-05, "loss": 44.7908, "step": 2800 }, { "epoch": 0.044302200920729014, "grad_norm": 192.86607405481163, "learning_rate": 1.994085238766385e-05, "loss": 44.4595, "step": 2810 }, { "epoch": 0.04445985999873873, "grad_norm": 201.03178076447335, "learning_rate": 1.9940307794246066e-05, "loss": 44.068, "step": 2820 }, { "epoch": 0.04461751907674844, "grad_norm": 210.98572130301233, "learning_rate": 1.9939760712675164e-05, "loss": 43.2765, "step": 2830 }, { "epoch": 0.04477517815475815, "grad_norm": 192.5879036155047, "learning_rate": 1.9939211143088077e-05, "loss": 43.8273, "step": 2840 }, { "epoch": 0.044932837232767864, "grad_norm": 207.37321239071738, "learning_rate": 1.9938659085622367e-05, "loss": 43.3914, "step": 2850 }, { "epoch": 0.04509049631077758, "grad_norm": 374.4190163880251, "learning_rate": 1.993810454041622e-05, "loss": 43.7673, "step": 2860 }, { "epoch": 0.045248155388787285, "grad_norm": 221.32048449361196, "learning_rate": 1.993754750760845e-05, "loss": 43.5826, "step": 2870 }, { "epoch": 0.045405814466797, "grad_norm": 207.25970743904324, "learning_rate": 1.993698798733848e-05, "loss": 42.8599, "step": 2880 }, { "epoch": 0.04556347354480671, "grad_norm": 206.99298076426192, "learning_rate": 1.9936425979746367e-05, "loss": 44.7854, "step": 2890 }, { "epoch": 0.04572113262281642, "grad_norm": 199.85914159493188, "learning_rate": 1.9935861484972786e-05, "loss": 43.1499, "step": 2900 }, { "epoch": 0.045878791700826135, "grad_norm": 207.0535149185918, "learning_rate": 1.9935294503159033e-05, "loss": 43.1884, "step": 2910 }, { "epoch": 0.04603645077883584, "grad_norm": 212.9430478530785, "learning_rate": 1.9934725034447037e-05, "loss": 43.6437, "step": 2920 }, { "epoch": 0.04619410985684556, "grad_norm": 189.33766770484635, "learning_rate": 1.993415307897933e-05, "loss": 44.1408, "step": 2930 }, { "epoch": 0.04635176893485527, "grad_norm": 203.4922741611937, "learning_rate": 1.9933578636899084e-05, "loss": 43.5999, "step": 2940 }, { "epoch": 0.04650942801286498, "grad_norm": 199.8718015279651, "learning_rate": 1.9933001708350086e-05, "loss": 42.9701, "step": 2950 }, { "epoch": 0.04666708709087469, "grad_norm": 197.34054132818193, "learning_rate": 1.9932422293476746e-05, "loss": 42.4793, "step": 2960 }, { "epoch": 0.04682474616888441, "grad_norm": 195.4006266358509, "learning_rate": 1.9931840392424104e-05, "loss": 43.7039, "step": 2970 }, { "epoch": 0.046982405246894114, "grad_norm": 192.5597227584816, "learning_rate": 1.9931256005337808e-05, "loss": 42.9477, "step": 2980 }, { "epoch": 0.04714006432490383, "grad_norm": 194.98248908477234, "learning_rate": 1.9930669132364132e-05, "loss": 42.4427, "step": 2990 }, { "epoch": 0.04729772340291354, "grad_norm": 211.53468144025268, "learning_rate": 1.9930079773649987e-05, "loss": 44.2303, "step": 3000 }, { "epoch": 0.04745538248092325, "grad_norm": 177.43045752095094, "learning_rate": 1.9929487929342887e-05, "loss": 43.0056, "step": 3010 }, { "epoch": 0.047613041558932964, "grad_norm": 183.62955477190957, "learning_rate": 1.9928893599590977e-05, "loss": 43.7704, "step": 3020 }, { "epoch": 0.04777070063694268, "grad_norm": 189.30805898669306, "learning_rate": 1.992829678454303e-05, "loss": 43.0146, "step": 3030 }, { "epoch": 0.047928359714952386, "grad_norm": 207.40139207711022, "learning_rate": 1.9927697484348422e-05, "loss": 43.5189, "step": 3040 }, { "epoch": 0.0480860187929621, "grad_norm": 201.45443881202584, "learning_rate": 1.9927095699157177e-05, "loss": 44.7209, "step": 3050 }, { "epoch": 0.04824367787097181, "grad_norm": 200.55398728888997, "learning_rate": 1.9926491429119917e-05, "loss": 42.6264, "step": 3060 }, { "epoch": 0.04840133694898152, "grad_norm": 179.72838728505482, "learning_rate": 1.9925884674387905e-05, "loss": 41.6827, "step": 3070 }, { "epoch": 0.048558996026991236, "grad_norm": 196.92285954014278, "learning_rate": 1.992527543511301e-05, "loss": 43.8842, "step": 3080 }, { "epoch": 0.04871665510500094, "grad_norm": 192.92601058452829, "learning_rate": 1.9924663711447738e-05, "loss": 43.7701, "step": 3090 }, { "epoch": 0.04887431418301066, "grad_norm": 201.2563490747253, "learning_rate": 1.99240495035452e-05, "loss": 43.6824, "step": 3100 }, { "epoch": 0.04903197326102037, "grad_norm": 192.80513823506334, "learning_rate": 1.9923432811559144e-05, "loss": 44.214, "step": 3110 }, { "epoch": 0.04918963233903008, "grad_norm": 204.39109139723857, "learning_rate": 1.9922813635643933e-05, "loss": 43.3211, "step": 3120 }, { "epoch": 0.04934729141703979, "grad_norm": 194.0565701574193, "learning_rate": 1.9922191975954553e-05, "loss": 43.2848, "step": 3130 }, { "epoch": 0.04950495049504951, "grad_norm": 176.15984778476022, "learning_rate": 1.992156783264661e-05, "loss": 43.0547, "step": 3140 }, { "epoch": 0.049662609573059215, "grad_norm": 192.27099027448835, "learning_rate": 1.9920941205876335e-05, "loss": 43.2668, "step": 3150 }, { "epoch": 0.04982026865106893, "grad_norm": 203.43523971843544, "learning_rate": 1.9920312095800577e-05, "loss": 42.0771, "step": 3160 }, { "epoch": 0.04997792772907864, "grad_norm": 196.95707722351926, "learning_rate": 1.9919680502576805e-05, "loss": 42.4665, "step": 3170 }, { "epoch": 0.05013558680708835, "grad_norm": 227.82644082054014, "learning_rate": 1.991904642636312e-05, "loss": 42.2459, "step": 3180 }, { "epoch": 0.050293245885098065, "grad_norm": 194.71913551084322, "learning_rate": 1.9918409867318235e-05, "loss": 43.4926, "step": 3190 }, { "epoch": 0.05045090496310778, "grad_norm": 191.95243307123084, "learning_rate": 1.991777082560148e-05, "loss": 42.0448, "step": 3200 }, { "epoch": 0.050608564041117486, "grad_norm": 199.2353379859488, "learning_rate": 1.991712930137282e-05, "loss": 41.9564, "step": 3210 }, { "epoch": 0.0507662231191272, "grad_norm": 189.37136937085222, "learning_rate": 1.9916485294792833e-05, "loss": 42.1988, "step": 3220 }, { "epoch": 0.050923882197136915, "grad_norm": 218.7026477662868, "learning_rate": 1.9915838806022723e-05, "loss": 43.7225, "step": 3230 }, { "epoch": 0.05108154127514662, "grad_norm": 187.8707073976572, "learning_rate": 1.991518983522431e-05, "loss": 42.5848, "step": 3240 }, { "epoch": 0.051239200353156336, "grad_norm": 219.23037481856423, "learning_rate": 1.9914538382560033e-05, "loss": 43.3267, "step": 3250 }, { "epoch": 0.05139685943116604, "grad_norm": 174.01522964800202, "learning_rate": 1.9913884448192958e-05, "loss": 43.0394, "step": 3260 }, { "epoch": 0.05155451850917576, "grad_norm": 213.6565783944176, "learning_rate": 1.991322803228678e-05, "loss": 44.0223, "step": 3270 }, { "epoch": 0.05171217758718547, "grad_norm": 179.8630625515791, "learning_rate": 1.9912569135005798e-05, "loss": 42.3553, "step": 3280 }, { "epoch": 0.05186983666519518, "grad_norm": 180.23403889351985, "learning_rate": 1.9911907756514938e-05, "loss": 42.3133, "step": 3290 }, { "epoch": 0.05202749574320489, "grad_norm": 202.30764033892416, "learning_rate": 1.9911243896979754e-05, "loss": 42.6042, "step": 3300 }, { "epoch": 0.05218515482121461, "grad_norm": 190.1095831161692, "learning_rate": 1.991057755656642e-05, "loss": 42.7949, "step": 3310 }, { "epoch": 0.052342813899224315, "grad_norm": 184.0672241109152, "learning_rate": 1.990990873544172e-05, "loss": 42.755, "step": 3320 }, { "epoch": 0.05250047297723403, "grad_norm": 180.38954255365553, "learning_rate": 1.9909237433773067e-05, "loss": 42.5626, "step": 3330 }, { "epoch": 0.05265813205524374, "grad_norm": 178.4762033862733, "learning_rate": 1.9908563651728498e-05, "loss": 42.3664, "step": 3340 }, { "epoch": 0.05281579113325345, "grad_norm": 190.04577802980083, "learning_rate": 1.9907887389476666e-05, "loss": 42.864, "step": 3350 }, { "epoch": 0.052973450211263165, "grad_norm": 196.26354169393863, "learning_rate": 1.9907208647186843e-05, "loss": 42.6984, "step": 3360 }, { "epoch": 0.05313110928927288, "grad_norm": 192.82977920639604, "learning_rate": 1.990652742502893e-05, "loss": 43.3124, "step": 3370 }, { "epoch": 0.05328876836728259, "grad_norm": 196.15094134393064, "learning_rate": 1.9905843723173436e-05, "loss": 43.3339, "step": 3380 }, { "epoch": 0.0534464274452923, "grad_norm": 196.01550719052267, "learning_rate": 1.9905157541791505e-05, "loss": 41.7506, "step": 3390 }, { "epoch": 0.053604086523302015, "grad_norm": 184.82993027023613, "learning_rate": 1.990446888105489e-05, "loss": 42.1116, "step": 3400 }, { "epoch": 0.05376174560131172, "grad_norm": 179.88399900833758, "learning_rate": 1.990377774113597e-05, "loss": 42.7029, "step": 3410 }, { "epoch": 0.05391940467932144, "grad_norm": 180.49868327083462, "learning_rate": 1.9903084122207747e-05, "loss": 42.3341, "step": 3420 }, { "epoch": 0.054077063757331144, "grad_norm": 177.21653301104752, "learning_rate": 1.990238802444384e-05, "loss": 42.2582, "step": 3430 }, { "epoch": 0.05423472283534086, "grad_norm": 190.06443303172733, "learning_rate": 1.9901689448018483e-05, "loss": 42.9794, "step": 3440 }, { "epoch": 0.05439238191335057, "grad_norm": 179.43478920568992, "learning_rate": 1.9900988393106543e-05, "loss": 42.7486, "step": 3450 }, { "epoch": 0.05455004099136028, "grad_norm": 173.58235539586775, "learning_rate": 1.9900284859883497e-05, "loss": 41.5249, "step": 3460 }, { "epoch": 0.054707700069369994, "grad_norm": 192.4411220456966, "learning_rate": 1.989957884852545e-05, "loss": 41.2629, "step": 3470 }, { "epoch": 0.05486535914737971, "grad_norm": 179.58866983285876, "learning_rate": 1.9898870359209117e-05, "loss": 43.3398, "step": 3480 }, { "epoch": 0.055023018225389415, "grad_norm": 176.84317244297935, "learning_rate": 1.9898159392111846e-05, "loss": 43.7876, "step": 3490 }, { "epoch": 0.05518067730339913, "grad_norm": 177.35497480462473, "learning_rate": 1.9897445947411596e-05, "loss": 42.4181, "step": 3500 }, { "epoch": 0.055338336381408844, "grad_norm": 197.925691550323, "learning_rate": 1.9896730025286946e-05, "loss": 43.0176, "step": 3510 }, { "epoch": 0.05549599545941855, "grad_norm": 184.7016999301689, "learning_rate": 1.98960116259171e-05, "loss": 42.6027, "step": 3520 }, { "epoch": 0.055653654537428265, "grad_norm": 176.2515301755238, "learning_rate": 1.9895290749481882e-05, "loss": 42.3129, "step": 3530 }, { "epoch": 0.05581131361543798, "grad_norm": 175.64212152112285, "learning_rate": 1.989456739616174e-05, "loss": 42.8124, "step": 3540 }, { "epoch": 0.05596897269344769, "grad_norm": 172.73841986275525, "learning_rate": 1.989384156613772e-05, "loss": 41.467, "step": 3550 }, { "epoch": 0.0561266317714574, "grad_norm": 175.52752227480752, "learning_rate": 1.9893113259591515e-05, "loss": 42.5388, "step": 3560 }, { "epoch": 0.056284290849467115, "grad_norm": 192.0204060166524, "learning_rate": 1.9892382476705433e-05, "loss": 43.03, "step": 3570 }, { "epoch": 0.05644194992747682, "grad_norm": 176.81065983548712, "learning_rate": 1.989164921766238e-05, "loss": 42.4773, "step": 3580 }, { "epoch": 0.05659960900548654, "grad_norm": 177.1415159858181, "learning_rate": 1.989091348264591e-05, "loss": 42.1196, "step": 3590 }, { "epoch": 0.056757268083496244, "grad_norm": 191.26601360496727, "learning_rate": 1.989017527184018e-05, "loss": 42.907, "step": 3600 }, { "epoch": 0.05691492716150596, "grad_norm": 187.54814269402007, "learning_rate": 1.9889434585429972e-05, "loss": 42.3749, "step": 3610 }, { "epoch": 0.05707258623951567, "grad_norm": 186.33074762695873, "learning_rate": 1.9888691423600686e-05, "loss": 42.8143, "step": 3620 }, { "epoch": 0.05723024531752538, "grad_norm": 169.171179681762, "learning_rate": 1.988794578653834e-05, "loss": 41.1053, "step": 3630 }, { "epoch": 0.057387904395535094, "grad_norm": 187.9872594116964, "learning_rate": 1.9887197674429584e-05, "loss": 41.8371, "step": 3640 }, { "epoch": 0.05754556347354481, "grad_norm": 191.61516175625187, "learning_rate": 1.9886447087461666e-05, "loss": 42.3782, "step": 3650 }, { "epoch": 0.057703222551554516, "grad_norm": 180.911142965085, "learning_rate": 1.9885694025822472e-05, "loss": 42.0732, "step": 3660 }, { "epoch": 0.05786088162956423, "grad_norm": 176.9403207100869, "learning_rate": 1.9884938489700495e-05, "loss": 41.4066, "step": 3670 }, { "epoch": 0.058018540707573944, "grad_norm": 183.14616040794903, "learning_rate": 1.988418047928486e-05, "loss": 42.7576, "step": 3680 }, { "epoch": 0.05817619978558365, "grad_norm": 171.59255558440884, "learning_rate": 1.9883419994765297e-05, "loss": 41.9467, "step": 3690 }, { "epoch": 0.058333858863593366, "grad_norm": 180.43367906504545, "learning_rate": 1.9882657036332165e-05, "loss": 41.7585, "step": 3700 }, { "epoch": 0.05849151794160308, "grad_norm": 172.16803336359473, "learning_rate": 1.988189160417644e-05, "loss": 42.6578, "step": 3710 }, { "epoch": 0.05864917701961279, "grad_norm": 188.43068680974187, "learning_rate": 1.988112369848972e-05, "loss": 42.2283, "step": 3720 }, { "epoch": 0.0588068360976225, "grad_norm": 173.84448368440263, "learning_rate": 1.988035331946421e-05, "loss": 41.7609, "step": 3730 }, { "epoch": 0.058964495175632216, "grad_norm": 166.60387391459955, "learning_rate": 1.9879580467292756e-05, "loss": 41.7229, "step": 3740 }, { "epoch": 0.05912215425364192, "grad_norm": 183.46267533753556, "learning_rate": 1.9878805142168802e-05, "loss": 42.0589, "step": 3750 }, { "epoch": 0.05927981333165164, "grad_norm": 179.45047400819132, "learning_rate": 1.987802734428642e-05, "loss": 41.4802, "step": 3760 }, { "epoch": 0.05943747240966135, "grad_norm": 184.66546873278014, "learning_rate": 1.9877247073840302e-05, "loss": 42.4832, "step": 3770 }, { "epoch": 0.05959513148767106, "grad_norm": 173.70880510558385, "learning_rate": 1.9876464331025755e-05, "loss": 41.9698, "step": 3780 }, { "epoch": 0.05975279056568077, "grad_norm": 189.69119976330086, "learning_rate": 1.987567911603871e-05, "loss": 41.5762, "step": 3790 }, { "epoch": 0.05991044964369048, "grad_norm": 170.9464889333001, "learning_rate": 1.987489142907571e-05, "loss": 41.7621, "step": 3800 }, { "epoch": 0.060068108721700195, "grad_norm": 178.20293927602438, "learning_rate": 1.9874101270333922e-05, "loss": 41.8593, "step": 3810 }, { "epoch": 0.06022576779970991, "grad_norm": 166.8453663844318, "learning_rate": 1.987330864001113e-05, "loss": 40.6591, "step": 3820 }, { "epoch": 0.060383426877719616, "grad_norm": 183.57643678012522, "learning_rate": 1.9872513538305737e-05, "loss": 42.9405, "step": 3830 }, { "epoch": 0.06054108595572933, "grad_norm": 189.06909119348543, "learning_rate": 1.9871715965416768e-05, "loss": 41.6981, "step": 3840 }, { "epoch": 0.060698745033739045, "grad_norm": 174.5639277995013, "learning_rate": 1.9870915921543855e-05, "loss": 41.564, "step": 3850 }, { "epoch": 0.06085640411174875, "grad_norm": 173.38582923070436, "learning_rate": 1.9870113406887266e-05, "loss": 42.9793, "step": 3860 }, { "epoch": 0.061014063189758466, "grad_norm": 178.364414575169, "learning_rate": 1.986930842164787e-05, "loss": 42.2838, "step": 3870 }, { "epoch": 0.06117172226776818, "grad_norm": 163.1621889867866, "learning_rate": 1.9868500966027166e-05, "loss": 41.436, "step": 3880 }, { "epoch": 0.06132938134577789, "grad_norm": 167.9485574765967, "learning_rate": 1.9867691040227267e-05, "loss": 41.4836, "step": 3890 }, { "epoch": 0.0614870404237876, "grad_norm": 335.83939594282225, "learning_rate": 1.986687864445091e-05, "loss": 42.9901, "step": 3900 }, { "epoch": 0.061644699501797316, "grad_norm": 170.63733943413985, "learning_rate": 1.986606377890144e-05, "loss": 41.7085, "step": 3910 }, { "epoch": 0.061802358579807024, "grad_norm": 191.2129370005771, "learning_rate": 1.9865246443782824e-05, "loss": 41.8332, "step": 3920 }, { "epoch": 0.06196001765781674, "grad_norm": 198.5184202750361, "learning_rate": 1.9864426639299652e-05, "loss": 41.2428, "step": 3930 }, { "epoch": 0.06211767673582645, "grad_norm": 167.54795216811996, "learning_rate": 1.986360436565713e-05, "loss": 41.245, "step": 3940 }, { "epoch": 0.06227533581383616, "grad_norm": 174.76703291309005, "learning_rate": 1.9862779623061076e-05, "loss": 41.6339, "step": 3950 }, { "epoch": 0.062432994891845874, "grad_norm": 180.87608376211867, "learning_rate": 1.9861952411717938e-05, "loss": 41.4093, "step": 3960 }, { "epoch": 0.06259065396985558, "grad_norm": 187.49623040281043, "learning_rate": 1.9861122731834768e-05, "loss": 42.5357, "step": 3970 }, { "epoch": 0.0627483130478653, "grad_norm": 186.67298438319918, "learning_rate": 1.9860290583619246e-05, "loss": 41.1954, "step": 3980 }, { "epoch": 0.06290597212587501, "grad_norm": 165.66104643773224, "learning_rate": 1.9859455967279664e-05, "loss": 40.4945, "step": 3990 }, { "epoch": 0.06306363120388472, "grad_norm": 172.91921654641752, "learning_rate": 1.9858618883024938e-05, "loss": 41.8845, "step": 4000 }, { "epoch": 0.06322129028189444, "grad_norm": 230.3440308537433, "learning_rate": 1.9857779331064597e-05, "loss": 41.6001, "step": 4010 }, { "epoch": 0.06337894935990414, "grad_norm": 179.9355510048491, "learning_rate": 1.985693731160879e-05, "loss": 41.2619, "step": 4020 }, { "epoch": 0.06353660843791385, "grad_norm": 165.43740960857218, "learning_rate": 1.9856092824868276e-05, "loss": 41.9627, "step": 4030 }, { "epoch": 0.06369426751592357, "grad_norm": 166.5455604422909, "learning_rate": 1.9855245871054448e-05, "loss": 40.7642, "step": 4040 }, { "epoch": 0.06385192659393328, "grad_norm": 169.03656540144848, "learning_rate": 1.98543964503793e-05, "loss": 40.7731, "step": 4050 }, { "epoch": 0.064009585671943, "grad_norm": 180.0754363596538, "learning_rate": 1.9853544563055447e-05, "loss": 41.6555, "step": 4060 }, { "epoch": 0.0641672447499527, "grad_norm": 171.79766222068105, "learning_rate": 1.9852690209296135e-05, "loss": 41.4211, "step": 4070 }, { "epoch": 0.06432490382796241, "grad_norm": 232.10188964911254, "learning_rate": 1.9851833389315207e-05, "loss": 41.7985, "step": 4080 }, { "epoch": 0.06448256290597212, "grad_norm": 174.64314477729468, "learning_rate": 1.985097410332714e-05, "loss": 41.4964, "step": 4090 }, { "epoch": 0.06464022198398184, "grad_norm": 249.2327663947929, "learning_rate": 1.9850112351547013e-05, "loss": 41.141, "step": 4100 }, { "epoch": 0.06479788106199155, "grad_norm": 170.36224653416397, "learning_rate": 1.984924813419054e-05, "loss": 40.9569, "step": 4110 }, { "epoch": 0.06495554014000127, "grad_norm": 176.73066478134098, "learning_rate": 1.9848381451474035e-05, "loss": 40.6764, "step": 4120 }, { "epoch": 0.06511319921801097, "grad_norm": 167.84855771765294, "learning_rate": 1.9847512303614446e-05, "loss": 41.1551, "step": 4130 }, { "epoch": 0.06527085829602068, "grad_norm": 188.24170037471222, "learning_rate": 1.984664069082932e-05, "loss": 43.2905, "step": 4140 }, { "epoch": 0.0654285173740304, "grad_norm": 176.92576806489174, "learning_rate": 1.984576661333683e-05, "loss": 41.5126, "step": 4150 }, { "epoch": 0.06558617645204011, "grad_norm": 181.65766052335317, "learning_rate": 1.9844890071355776e-05, "loss": 41.1248, "step": 4160 }, { "epoch": 0.06574383553004982, "grad_norm": 166.56638763448572, "learning_rate": 1.9844011065105553e-05, "loss": 41.3764, "step": 4170 }, { "epoch": 0.06590149460805954, "grad_norm": 545.4529285072359, "learning_rate": 1.9843129594806193e-05, "loss": 40.7013, "step": 4180 }, { "epoch": 0.06605915368606924, "grad_norm": 177.0033891084635, "learning_rate": 1.984224566067833e-05, "loss": 41.4154, "step": 4190 }, { "epoch": 0.06621681276407895, "grad_norm": 176.45044724494284, "learning_rate": 1.984135926294323e-05, "loss": 41.6738, "step": 4200 }, { "epoch": 0.06637447184208867, "grad_norm": 185.1729891371147, "learning_rate": 1.9840470401822752e-05, "loss": 40.9848, "step": 4210 }, { "epoch": 0.06653213092009838, "grad_norm": 173.00167188735708, "learning_rate": 1.98395790775394e-05, "loss": 40.5748, "step": 4220 }, { "epoch": 0.0666897899981081, "grad_norm": 192.04910680263055, "learning_rate": 1.9838685290316276e-05, "loss": 40.7463, "step": 4230 }, { "epoch": 0.06684744907611781, "grad_norm": 213.7093107511071, "learning_rate": 1.9837789040377103e-05, "loss": 41.9237, "step": 4240 }, { "epoch": 0.06700510815412751, "grad_norm": 175.79951484252817, "learning_rate": 1.9836890327946223e-05, "loss": 40.4487, "step": 4250 }, { "epoch": 0.06716276723213722, "grad_norm": 178.60595787096906, "learning_rate": 1.983598915324859e-05, "loss": 40.4364, "step": 4260 }, { "epoch": 0.06732042631014694, "grad_norm": 171.0425813704141, "learning_rate": 1.9835085516509776e-05, "loss": 40.6343, "step": 4270 }, { "epoch": 0.06747808538815665, "grad_norm": 165.618508825491, "learning_rate": 1.983417941795597e-05, "loss": 40.5572, "step": 4280 }, { "epoch": 0.06763574446616637, "grad_norm": 171.79783860709438, "learning_rate": 1.983327085781398e-05, "loss": 41.2337, "step": 4290 }, { "epoch": 0.06779340354417607, "grad_norm": 163.04649696220136, "learning_rate": 1.9832359836311222e-05, "loss": 41.0959, "step": 4300 }, { "epoch": 0.06795106262218578, "grad_norm": 170.6757872847392, "learning_rate": 1.9831446353675738e-05, "loss": 40.8245, "step": 4310 }, { "epoch": 0.0681087217001955, "grad_norm": 166.09172649676535, "learning_rate": 1.983053041013618e-05, "loss": 40.5654, "step": 4320 }, { "epoch": 0.06826638077820521, "grad_norm": 173.5637294033437, "learning_rate": 1.9829612005921818e-05, "loss": 41.358, "step": 4330 }, { "epoch": 0.06842403985621492, "grad_norm": 179.69872664994529, "learning_rate": 1.9828691141262535e-05, "loss": 40.3117, "step": 4340 }, { "epoch": 0.06858169893422464, "grad_norm": 162.61699955310223, "learning_rate": 1.9827767816388833e-05, "loss": 40.6854, "step": 4350 }, { "epoch": 0.06873935801223434, "grad_norm": 166.3701109055888, "learning_rate": 1.982684203153183e-05, "loss": 40.1984, "step": 4360 }, { "epoch": 0.06889701709024405, "grad_norm": 167.5433445000001, "learning_rate": 1.9825913786923263e-05, "loss": 40.564, "step": 4370 }, { "epoch": 0.06905467616825377, "grad_norm": 169.8508702832974, "learning_rate": 1.982498308279547e-05, "loss": 40.6443, "step": 4380 }, { "epoch": 0.06921233524626348, "grad_norm": 175.9754277773347, "learning_rate": 1.9824049919381422e-05, "loss": 41.1758, "step": 4390 }, { "epoch": 0.0693699943242732, "grad_norm": 175.15866957238515, "learning_rate": 1.98231142969147e-05, "loss": 40.9897, "step": 4400 }, { "epoch": 0.06952765340228291, "grad_norm": 175.67771834397342, "learning_rate": 1.982217621562949e-05, "loss": 41.2306, "step": 4410 }, { "epoch": 0.06968531248029261, "grad_norm": 166.55993878135166, "learning_rate": 1.9821235675760614e-05, "loss": 42.1208, "step": 4420 }, { "epoch": 0.06984297155830232, "grad_norm": 166.64371757205396, "learning_rate": 1.9820292677543493e-05, "loss": 40.7082, "step": 4430 }, { "epoch": 0.07000063063631204, "grad_norm": 172.31046220432322, "learning_rate": 1.9819347221214167e-05, "loss": 41.7686, "step": 4440 }, { "epoch": 0.07015828971432175, "grad_norm": 162.18945882236122, "learning_rate": 1.9818399307009298e-05, "loss": 41.7259, "step": 4450 }, { "epoch": 0.07031594879233147, "grad_norm": 194.34380289764985, "learning_rate": 1.9817448935166152e-05, "loss": 41.6345, "step": 4460 }, { "epoch": 0.07047360787034117, "grad_norm": 172.76432303375358, "learning_rate": 1.9816496105922613e-05, "loss": 39.8995, "step": 4470 }, { "epoch": 0.07063126694835088, "grad_norm": 175.32620425381475, "learning_rate": 1.9815540819517193e-05, "loss": 40.7747, "step": 4480 }, { "epoch": 0.0707889260263606, "grad_norm": 163.37108036208298, "learning_rate": 1.9814583076189006e-05, "loss": 40.1128, "step": 4490 }, { "epoch": 0.07094658510437031, "grad_norm": 177.01980825364805, "learning_rate": 1.9813622876177782e-05, "loss": 41.7963, "step": 4500 }, { "epoch": 0.07110424418238002, "grad_norm": 169.16597218566966, "learning_rate": 1.9812660219723865e-05, "loss": 40.7509, "step": 4510 }, { "epoch": 0.07126190326038974, "grad_norm": 172.75161286704554, "learning_rate": 1.9811695107068222e-05, "loss": 41.046, "step": 4520 }, { "epoch": 0.07141956233839944, "grad_norm": 169.9016908865531, "learning_rate": 1.981072753845243e-05, "loss": 40.5997, "step": 4530 }, { "epoch": 0.07157722141640915, "grad_norm": 181.64401988287918, "learning_rate": 1.9809757514118682e-05, "loss": 41.2859, "step": 4540 }, { "epoch": 0.07173488049441887, "grad_norm": 159.6841170794919, "learning_rate": 1.980878503430978e-05, "loss": 40.6543, "step": 4550 }, { "epoch": 0.07189253957242858, "grad_norm": 175.70790825759386, "learning_rate": 1.9807810099269145e-05, "loss": 42.1607, "step": 4560 }, { "epoch": 0.0720501986504383, "grad_norm": 178.42177462840866, "learning_rate": 1.9806832709240815e-05, "loss": 41.8165, "step": 4570 }, { "epoch": 0.07220785772844801, "grad_norm": 167.2493945208695, "learning_rate": 1.9805852864469437e-05, "loss": 41.3548, "step": 4580 }, { "epoch": 0.07236551680645771, "grad_norm": 180.4676777451892, "learning_rate": 1.980487056520028e-05, "loss": 41.4129, "step": 4590 }, { "epoch": 0.07252317588446743, "grad_norm": 159.5462219019561, "learning_rate": 1.980388581167922e-05, "loss": 40.685, "step": 4600 }, { "epoch": 0.07268083496247714, "grad_norm": 169.02493415410387, "learning_rate": 1.9802898604152746e-05, "loss": 40.8677, "step": 4610 }, { "epoch": 0.07283849404048685, "grad_norm": 160.4102071934799, "learning_rate": 1.9801908942867976e-05, "loss": 41.0741, "step": 4620 }, { "epoch": 0.07299615311849657, "grad_norm": 180.3343011290459, "learning_rate": 1.9800916828072622e-05, "loss": 40.594, "step": 4630 }, { "epoch": 0.07315381219650627, "grad_norm": 171.8637307880815, "learning_rate": 1.9799922260015022e-05, "loss": 41.4151, "step": 4640 }, { "epoch": 0.07331147127451598, "grad_norm": 169.2031290949207, "learning_rate": 1.9798925238944128e-05, "loss": 40.4994, "step": 4650 }, { "epoch": 0.0734691303525257, "grad_norm": 190.92646627334355, "learning_rate": 1.9797925765109504e-05, "loss": 40.8075, "step": 4660 }, { "epoch": 0.07362678943053541, "grad_norm": 172.37117036909652, "learning_rate": 1.9796923838761328e-05, "loss": 40.4725, "step": 4670 }, { "epoch": 0.07378444850854513, "grad_norm": 170.17411979178266, "learning_rate": 1.9795919460150384e-05, "loss": 41.8613, "step": 4680 }, { "epoch": 0.07394210758655484, "grad_norm": 169.9228706605949, "learning_rate": 1.979491262952809e-05, "loss": 41.6273, "step": 4690 }, { "epoch": 0.07409976666456454, "grad_norm": 162.20047748326863, "learning_rate": 1.9793903347146454e-05, "loss": 39.8943, "step": 4700 }, { "epoch": 0.07425742574257425, "grad_norm": 164.75429334803363, "learning_rate": 1.9792891613258115e-05, "loss": 41.5286, "step": 4710 }, { "epoch": 0.07441508482058397, "grad_norm": 160.75650808222994, "learning_rate": 1.979187742811632e-05, "loss": 40.0067, "step": 4720 }, { "epoch": 0.07457274389859368, "grad_norm": 168.15887142719166, "learning_rate": 1.9790860791974923e-05, "loss": 40.4289, "step": 4730 }, { "epoch": 0.0747304029766034, "grad_norm": 154.3259160908452, "learning_rate": 1.9789841705088406e-05, "loss": 40.9631, "step": 4740 }, { "epoch": 0.07488806205461311, "grad_norm": 173.74191004193207, "learning_rate": 1.9788820167711853e-05, "loss": 40.6272, "step": 4750 }, { "epoch": 0.07504572113262281, "grad_norm": 175.78104299843795, "learning_rate": 1.978779618010096e-05, "loss": 40.3274, "step": 4760 }, { "epoch": 0.07520338021063253, "grad_norm": 167.60919978537873, "learning_rate": 1.978676974251205e-05, "loss": 40.2103, "step": 4770 }, { "epoch": 0.07536103928864224, "grad_norm": 164.3254266051744, "learning_rate": 1.9785740855202037e-05, "loss": 41.4538, "step": 4780 }, { "epoch": 0.07551869836665195, "grad_norm": 160.45298715608646, "learning_rate": 1.9784709518428472e-05, "loss": 40.2232, "step": 4790 }, { "epoch": 0.07567635744466167, "grad_norm": 195.84318529459193, "learning_rate": 1.9783675732449505e-05, "loss": 41.0976, "step": 4800 }, { "epoch": 0.07583401652267137, "grad_norm": 155.60879551458106, "learning_rate": 1.9782639497523903e-05, "loss": 40.1679, "step": 4810 }, { "epoch": 0.07599167560068108, "grad_norm": 161.12117085315768, "learning_rate": 1.9781600813911046e-05, "loss": 40.8093, "step": 4820 }, { "epoch": 0.0761493346786908, "grad_norm": 175.0777996470541, "learning_rate": 1.9780559681870924e-05, "loss": 40.6938, "step": 4830 }, { "epoch": 0.07630699375670051, "grad_norm": 175.59970276400082, "learning_rate": 1.977951610166414e-05, "loss": 41.0607, "step": 4840 }, { "epoch": 0.07646465283471023, "grad_norm": 170.77772314662246, "learning_rate": 1.9778470073551918e-05, "loss": 42.3463, "step": 4850 }, { "epoch": 0.07662231191271994, "grad_norm": 161.4142083481919, "learning_rate": 1.977742159779609e-05, "loss": 40.4033, "step": 4860 }, { "epoch": 0.07677997099072964, "grad_norm": 185.04173436785908, "learning_rate": 1.977637067465909e-05, "loss": 39.9964, "step": 4870 }, { "epoch": 0.07693763006873935, "grad_norm": 165.77392974300068, "learning_rate": 1.977531730440398e-05, "loss": 40.6341, "step": 4880 }, { "epoch": 0.07709528914674907, "grad_norm": 167.62859989118664, "learning_rate": 1.977426148729443e-05, "loss": 39.6403, "step": 4890 }, { "epoch": 0.07725294822475878, "grad_norm": 164.40282050751316, "learning_rate": 1.977320322359472e-05, "loss": 40.0975, "step": 4900 }, { "epoch": 0.0774106073027685, "grad_norm": 164.24072407341296, "learning_rate": 1.9772142513569742e-05, "loss": 40.6078, "step": 4910 }, { "epoch": 0.07756826638077821, "grad_norm": 160.29761851465292, "learning_rate": 1.9771079357484997e-05, "loss": 40.2569, "step": 4920 }, { "epoch": 0.07772592545878791, "grad_norm": 160.47620369221323, "learning_rate": 1.9770013755606614e-05, "loss": 41.0338, "step": 4930 }, { "epoch": 0.07788358453679763, "grad_norm": 188.0514793298374, "learning_rate": 1.976894570820132e-05, "loss": 41.0078, "step": 4940 }, { "epoch": 0.07804124361480734, "grad_norm": 169.79083970344408, "learning_rate": 1.9767875215536447e-05, "loss": 40.8674, "step": 4950 }, { "epoch": 0.07819890269281705, "grad_norm": 165.29478330473572, "learning_rate": 1.976680227787996e-05, "loss": 40.9826, "step": 4960 }, { "epoch": 0.07835656177082677, "grad_norm": 160.11037527077772, "learning_rate": 1.9765726895500427e-05, "loss": 39.9369, "step": 4970 }, { "epoch": 0.07851422084883648, "grad_norm": 175.5528313514245, "learning_rate": 1.9764649068667017e-05, "loss": 39.9129, "step": 4980 }, { "epoch": 0.07867187992684618, "grad_norm": 213.35479187341318, "learning_rate": 1.976356879764953e-05, "loss": 40.4125, "step": 4990 }, { "epoch": 0.0788295390048559, "grad_norm": 158.9006622587528, "learning_rate": 1.9762486082718362e-05, "loss": 40.0789, "step": 5000 }, { "epoch": 0.07898719808286561, "grad_norm": 162.8760678249178, "learning_rate": 1.9761400924144526e-05, "loss": 39.7887, "step": 5010 }, { "epoch": 0.07914485716087533, "grad_norm": 158.65696365144598, "learning_rate": 1.9760313322199654e-05, "loss": 39.6536, "step": 5020 }, { "epoch": 0.07930251623888504, "grad_norm": 171.0081643691202, "learning_rate": 1.9759223277155975e-05, "loss": 40.0582, "step": 5030 }, { "epoch": 0.07946017531689474, "grad_norm": 160.63668813021948, "learning_rate": 1.9758130789286346e-05, "loss": 40.6565, "step": 5040 }, { "epoch": 0.07961783439490445, "grad_norm": 155.4362524450992, "learning_rate": 1.975703585886422e-05, "loss": 39.6639, "step": 5050 }, { "epoch": 0.07977549347291417, "grad_norm": 164.82883600955702, "learning_rate": 1.9755938486163666e-05, "loss": 40.5436, "step": 5060 }, { "epoch": 0.07993315255092388, "grad_norm": 154.5674594017784, "learning_rate": 1.975483867145938e-05, "loss": 39.9285, "step": 5070 }, { "epoch": 0.0800908116289336, "grad_norm": 161.0186607926127, "learning_rate": 1.975373641502664e-05, "loss": 40.7616, "step": 5080 }, { "epoch": 0.08024847070694331, "grad_norm": 155.85223633197802, "learning_rate": 1.9752631717141362e-05, "loss": 40.2948, "step": 5090 }, { "epoch": 0.08040612978495301, "grad_norm": 161.18921184439256, "learning_rate": 1.9751524578080062e-05, "loss": 40.2212, "step": 5100 }, { "epoch": 0.08056378886296273, "grad_norm": 164.6269633910323, "learning_rate": 1.9750414998119862e-05, "loss": 39.5259, "step": 5110 }, { "epoch": 0.08072144794097244, "grad_norm": 158.9550471448016, "learning_rate": 1.97493029775385e-05, "loss": 39.5533, "step": 5120 }, { "epoch": 0.08087910701898215, "grad_norm": 158.67604722740214, "learning_rate": 1.9748188516614335e-05, "loss": 39.2481, "step": 5130 }, { "epoch": 0.08103676609699187, "grad_norm": 155.0133920166526, "learning_rate": 1.9747071615626318e-05, "loss": 39.9741, "step": 5140 }, { "epoch": 0.08119442517500158, "grad_norm": 148.19915099729715, "learning_rate": 1.974595227485402e-05, "loss": 41.2371, "step": 5150 }, { "epoch": 0.08135208425301128, "grad_norm": 159.8282011105743, "learning_rate": 1.974483049457763e-05, "loss": 40.8518, "step": 5160 }, { "epoch": 0.081509743331021, "grad_norm": 691.1674500772158, "learning_rate": 1.974370627507793e-05, "loss": 39.3863, "step": 5170 }, { "epoch": 0.08166740240903071, "grad_norm": 174.03346063247992, "learning_rate": 1.974257961663633e-05, "loss": 39.8698, "step": 5180 }, { "epoch": 0.08182506148704043, "grad_norm": 154.36680622336044, "learning_rate": 1.9741450519534838e-05, "loss": 39.2261, "step": 5190 }, { "epoch": 0.08198272056505014, "grad_norm": 155.65084853111279, "learning_rate": 1.9740318984056085e-05, "loss": 38.8175, "step": 5200 }, { "epoch": 0.08214037964305984, "grad_norm": 170.2719297679754, "learning_rate": 1.9739185010483298e-05, "loss": 41.4107, "step": 5210 }, { "epoch": 0.08229803872106956, "grad_norm": 167.32987924046841, "learning_rate": 1.9738048599100326e-05, "loss": 38.9441, "step": 5220 }, { "epoch": 0.08245569779907927, "grad_norm": 185.16435306214373, "learning_rate": 1.973690975019162e-05, "loss": 40.1528, "step": 5230 }, { "epoch": 0.08261335687708898, "grad_norm": 161.74409137239533, "learning_rate": 1.973576846404225e-05, "loss": 40.266, "step": 5240 }, { "epoch": 0.0827710159550987, "grad_norm": 164.6515560529175, "learning_rate": 1.9734624740937884e-05, "loss": 39.5769, "step": 5250 }, { "epoch": 0.08292867503310841, "grad_norm": 154.7481076254931, "learning_rate": 1.9733478581164812e-05, "loss": 39.7869, "step": 5260 }, { "epoch": 0.08308633411111811, "grad_norm": 159.7494338706996, "learning_rate": 1.9732329985009925e-05, "loss": 40.7367, "step": 5270 }, { "epoch": 0.08324399318912783, "grad_norm": 264.533590796024, "learning_rate": 1.9731178952760726e-05, "loss": 40.9246, "step": 5280 }, { "epoch": 0.08340165226713754, "grad_norm": 188.26274249022316, "learning_rate": 1.973002548470534e-05, "loss": 40.1347, "step": 5290 }, { "epoch": 0.08355931134514726, "grad_norm": 158.61163434187227, "learning_rate": 1.9728869581132478e-05, "loss": 40.4135, "step": 5300 }, { "epoch": 0.08371697042315697, "grad_norm": 215.31477010103745, "learning_rate": 1.972771124233148e-05, "loss": 41.1941, "step": 5310 }, { "epoch": 0.08387462950116668, "grad_norm": 201.9768743312379, "learning_rate": 1.9726550468592286e-05, "loss": 39.8565, "step": 5320 }, { "epoch": 0.08403228857917638, "grad_norm": 160.7176050157212, "learning_rate": 1.9725387260205452e-05, "loss": 40.9946, "step": 5330 }, { "epoch": 0.0841899476571861, "grad_norm": 163.03955590561952, "learning_rate": 1.9724221617462137e-05, "loss": 38.9201, "step": 5340 }, { "epoch": 0.08434760673519581, "grad_norm": 155.68083772455478, "learning_rate": 1.972305354065412e-05, "loss": 39.5184, "step": 5350 }, { "epoch": 0.08450526581320553, "grad_norm": 166.80741964659958, "learning_rate": 1.972188303007377e-05, "loss": 38.831, "step": 5360 }, { "epoch": 0.08466292489121524, "grad_norm": 158.24355662363266, "learning_rate": 1.9720710086014085e-05, "loss": 39.4615, "step": 5370 }, { "epoch": 0.08482058396922494, "grad_norm": 153.03381140610074, "learning_rate": 1.9719534708768662e-05, "loss": 38.6748, "step": 5380 }, { "epoch": 0.08497824304723466, "grad_norm": 161.51940928883383, "learning_rate": 1.9718356898631708e-05, "loss": 40.849, "step": 5390 }, { "epoch": 0.08513590212524437, "grad_norm": 151.79304216536417, "learning_rate": 1.971717665589804e-05, "loss": 38.821, "step": 5400 }, { "epoch": 0.08529356120325408, "grad_norm": 162.68449860211973, "learning_rate": 1.9715993980863085e-05, "loss": 39.9294, "step": 5410 }, { "epoch": 0.0854512202812638, "grad_norm": 165.74696913868985, "learning_rate": 1.971480887382288e-05, "loss": 39.7883, "step": 5420 }, { "epoch": 0.08560887935927351, "grad_norm": 163.69557740932208, "learning_rate": 1.971362133507406e-05, "loss": 40.0712, "step": 5430 }, { "epoch": 0.08576653843728321, "grad_norm": 149.19620597858443, "learning_rate": 1.971243136491389e-05, "loss": 39.9559, "step": 5440 }, { "epoch": 0.08592419751529293, "grad_norm": 154.97141469409928, "learning_rate": 1.9711238963640218e-05, "loss": 39.2976, "step": 5450 }, { "epoch": 0.08608185659330264, "grad_norm": 168.2758878589725, "learning_rate": 1.971004413155152e-05, "loss": 40.3097, "step": 5460 }, { "epoch": 0.08623951567131236, "grad_norm": 152.77773657891026, "learning_rate": 1.9708846868946878e-05, "loss": 39.9608, "step": 5470 }, { "epoch": 0.08639717474932207, "grad_norm": 172.16123867711994, "learning_rate": 1.970764717612597e-05, "loss": 39.8861, "step": 5480 }, { "epoch": 0.08655483382733178, "grad_norm": 156.35390170304623, "learning_rate": 1.9706445053389095e-05, "loss": 40.462, "step": 5490 }, { "epoch": 0.08671249290534148, "grad_norm": 154.13547722759955, "learning_rate": 1.9705240501037154e-05, "loss": 39.0871, "step": 5500 }, { "epoch": 0.0868701519833512, "grad_norm": 154.69338300634072, "learning_rate": 1.970403351937166e-05, "loss": 40.6306, "step": 5510 }, { "epoch": 0.08702781106136091, "grad_norm": 173.26645218020818, "learning_rate": 1.9702824108694723e-05, "loss": 38.9992, "step": 5520 }, { "epoch": 0.08718547013937063, "grad_norm": 168.2379598408447, "learning_rate": 1.970161226930908e-05, "loss": 39.7963, "step": 5530 }, { "epoch": 0.08734312921738034, "grad_norm": 170.5262752617452, "learning_rate": 1.9700398001518066e-05, "loss": 39.2754, "step": 5540 }, { "epoch": 0.08750078829539004, "grad_norm": 151.76985182252525, "learning_rate": 1.9699181305625622e-05, "loss": 39.1592, "step": 5550 }, { "epoch": 0.08765844737339976, "grad_norm": 157.80392387581836, "learning_rate": 1.9697962181936293e-05, "loss": 39.9365, "step": 5560 }, { "epoch": 0.08781610645140947, "grad_norm": 155.65692023700586, "learning_rate": 1.9696740630755244e-05, "loss": 39.4677, "step": 5570 }, { "epoch": 0.08797376552941918, "grad_norm": 154.13842664162942, "learning_rate": 1.9695516652388237e-05, "loss": 39.3085, "step": 5580 }, { "epoch": 0.0881314246074289, "grad_norm": 155.09791091938877, "learning_rate": 1.9694290247141644e-05, "loss": 39.2041, "step": 5590 }, { "epoch": 0.08828908368543861, "grad_norm": 162.9303371381609, "learning_rate": 1.9693061415322453e-05, "loss": 39.4571, "step": 5600 }, { "epoch": 0.08844674276344831, "grad_norm": 156.09691513926464, "learning_rate": 1.969183015723825e-05, "loss": 38.7197, "step": 5610 }, { "epoch": 0.08860440184145803, "grad_norm": 163.74276430843597, "learning_rate": 1.9690596473197223e-05, "loss": 39.9419, "step": 5620 }, { "epoch": 0.08876206091946774, "grad_norm": 151.84695348870736, "learning_rate": 1.968936036350818e-05, "loss": 39.9335, "step": 5630 }, { "epoch": 0.08891971999747746, "grad_norm": 159.61145831833565, "learning_rate": 1.968812182848053e-05, "loss": 39.0372, "step": 5640 }, { "epoch": 0.08907737907548717, "grad_norm": 151.3008895549943, "learning_rate": 1.968688086842429e-05, "loss": 39.7881, "step": 5650 }, { "epoch": 0.08923503815349688, "grad_norm": 226.23249188295813, "learning_rate": 1.968563748365009e-05, "loss": 39.4929, "step": 5660 }, { "epoch": 0.08939269723150658, "grad_norm": 156.61214234860353, "learning_rate": 1.9684391674469154e-05, "loss": 39.6968, "step": 5670 }, { "epoch": 0.0895503563095163, "grad_norm": 145.0939890925629, "learning_rate": 1.9683143441193322e-05, "loss": 38.8859, "step": 5680 }, { "epoch": 0.08970801538752601, "grad_norm": 167.43169920379665, "learning_rate": 1.968189278413504e-05, "loss": 40.0423, "step": 5690 }, { "epoch": 0.08986567446553573, "grad_norm": 146.99065813630003, "learning_rate": 1.9680639703607355e-05, "loss": 39.3902, "step": 5700 }, { "epoch": 0.09002333354354544, "grad_norm": 162.2582691474644, "learning_rate": 1.967938419992393e-05, "loss": 39.0372, "step": 5710 }, { "epoch": 0.09018099262155516, "grad_norm": 181.88631347582202, "learning_rate": 1.967812627339902e-05, "loss": 40.6027, "step": 5720 }, { "epoch": 0.09033865169956486, "grad_norm": 183.29444128560073, "learning_rate": 1.967686592434751e-05, "loss": 39.9354, "step": 5730 }, { "epoch": 0.09049631077757457, "grad_norm": 147.42923871903997, "learning_rate": 1.967560315308487e-05, "loss": 38.7271, "step": 5740 }, { "epoch": 0.09065396985558428, "grad_norm": 162.4747645897163, "learning_rate": 1.9674337959927183e-05, "loss": 39.0165, "step": 5750 }, { "epoch": 0.090811628933594, "grad_norm": 153.92873169291963, "learning_rate": 1.967307034519114e-05, "loss": 39.8669, "step": 5760 }, { "epoch": 0.09096928801160371, "grad_norm": 158.24766335399303, "learning_rate": 1.9671800309194036e-05, "loss": 39.3987, "step": 5770 }, { "epoch": 0.09112694708961341, "grad_norm": 149.52681053897996, "learning_rate": 1.9670527852253775e-05, "loss": 39.2691, "step": 5780 }, { "epoch": 0.09128460616762313, "grad_norm": 155.06008636611062, "learning_rate": 1.9669252974688863e-05, "loss": 39.6026, "step": 5790 }, { "epoch": 0.09144226524563284, "grad_norm": 151.20345743804657, "learning_rate": 1.9667975676818414e-05, "loss": 39.4067, "step": 5800 }, { "epoch": 0.09159992432364256, "grad_norm": 151.1814252939513, "learning_rate": 1.966669595896215e-05, "loss": 38.7314, "step": 5810 }, { "epoch": 0.09175758340165227, "grad_norm": 157.50015337938524, "learning_rate": 1.9665413821440394e-05, "loss": 39.6151, "step": 5820 }, { "epoch": 0.09191524247966198, "grad_norm": 160.4615098201207, "learning_rate": 1.9664129264574078e-05, "loss": 39.891, "step": 5830 }, { "epoch": 0.09207290155767169, "grad_norm": 147.92532631644076, "learning_rate": 1.9662842288684738e-05, "loss": 39.0762, "step": 5840 }, { "epoch": 0.0922305606356814, "grad_norm": 173.32205164782715, "learning_rate": 1.9661552894094518e-05, "loss": 39.6474, "step": 5850 }, { "epoch": 0.09238821971369111, "grad_norm": 150.54644613225287, "learning_rate": 1.9660261081126165e-05, "loss": 39.03, "step": 5860 }, { "epoch": 0.09254587879170083, "grad_norm": 165.46728062177675, "learning_rate": 1.965896685010303e-05, "loss": 38.651, "step": 5870 }, { "epoch": 0.09270353786971054, "grad_norm": 159.39173963554035, "learning_rate": 1.9657670201349074e-05, "loss": 39.2811, "step": 5880 }, { "epoch": 0.09286119694772026, "grad_norm": 146.23350637744247, "learning_rate": 1.965637113518886e-05, "loss": 39.8488, "step": 5890 }, { "epoch": 0.09301885602572996, "grad_norm": 151.93190841022346, "learning_rate": 1.9655069651947557e-05, "loss": 40.1956, "step": 5900 }, { "epoch": 0.09317651510373967, "grad_norm": 159.80672266780897, "learning_rate": 1.9653765751950937e-05, "loss": 40.1933, "step": 5910 }, { "epoch": 0.09333417418174939, "grad_norm": 160.65832398324653, "learning_rate": 1.9652459435525375e-05, "loss": 40.324, "step": 5920 }, { "epoch": 0.0934918332597591, "grad_norm": 153.11120074931728, "learning_rate": 1.9651150702997863e-05, "loss": 39.8512, "step": 5930 }, { "epoch": 0.09364949233776881, "grad_norm": 147.9822533955925, "learning_rate": 1.9649839554695985e-05, "loss": 39.0103, "step": 5940 }, { "epoch": 0.09380715141577851, "grad_norm": 150.65131966407648, "learning_rate": 1.964852599094793e-05, "loss": 38.3902, "step": 5950 }, { "epoch": 0.09396481049378823, "grad_norm": 154.80999217802534, "learning_rate": 1.96472100120825e-05, "loss": 39.9298, "step": 5960 }, { "epoch": 0.09412246957179794, "grad_norm": 158.5464760279413, "learning_rate": 1.96458916184291e-05, "loss": 39.1582, "step": 5970 }, { "epoch": 0.09428012864980766, "grad_norm": 149.82794678117688, "learning_rate": 1.9644570810317728e-05, "loss": 38.3176, "step": 5980 }, { "epoch": 0.09443778772781737, "grad_norm": 148.16058074459457, "learning_rate": 1.9643247588079e-05, "loss": 38.1712, "step": 5990 }, { "epoch": 0.09459544680582709, "grad_norm": 173.14462936970125, "learning_rate": 1.964192195204413e-05, "loss": 39.1062, "step": 6000 }, { "epoch": 0.09475310588383679, "grad_norm": 156.58304980703073, "learning_rate": 1.9640593902544936e-05, "loss": 38.6322, "step": 6010 }, { "epoch": 0.0949107649618465, "grad_norm": 155.6359572206372, "learning_rate": 1.9639263439913845e-05, "loss": 38.4454, "step": 6020 }, { "epoch": 0.09506842403985621, "grad_norm": 155.02967788092175, "learning_rate": 1.963793056448388e-05, "loss": 38.6316, "step": 6030 }, { "epoch": 0.09522608311786593, "grad_norm": 162.14931865433405, "learning_rate": 1.9636595276588676e-05, "loss": 39.2482, "step": 6040 }, { "epoch": 0.09538374219587564, "grad_norm": 166.8290789539889, "learning_rate": 1.9635257576562466e-05, "loss": 39.9477, "step": 6050 }, { "epoch": 0.09554140127388536, "grad_norm": 162.015448184476, "learning_rate": 1.963391746474009e-05, "loss": 39.3006, "step": 6060 }, { "epoch": 0.09569906035189506, "grad_norm": 155.79235899503672, "learning_rate": 1.963257494145699e-05, "loss": 40.377, "step": 6070 }, { "epoch": 0.09585671942990477, "grad_norm": 158.80140268196874, "learning_rate": 1.9631230007049213e-05, "loss": 39.0309, "step": 6080 }, { "epoch": 0.09601437850791449, "grad_norm": 156.8549222838188, "learning_rate": 1.962988266185341e-05, "loss": 38.3383, "step": 6090 }, { "epoch": 0.0961720375859242, "grad_norm": 167.38898565164547, "learning_rate": 1.9628532906206826e-05, "loss": 39.6946, "step": 6100 }, { "epoch": 0.09632969666393391, "grad_norm": 206.4146000398918, "learning_rate": 1.9627180740447328e-05, "loss": 40.2181, "step": 6110 }, { "epoch": 0.09648735574194361, "grad_norm": 163.20328198430616, "learning_rate": 1.9625826164913377e-05, "loss": 38.4297, "step": 6120 }, { "epoch": 0.09664501481995333, "grad_norm": 161.6522629653744, "learning_rate": 1.9624469179944025e-05, "loss": 39.2149, "step": 6130 }, { "epoch": 0.09680267389796304, "grad_norm": 156.0891981596665, "learning_rate": 1.9623109785878944e-05, "loss": 38.7685, "step": 6140 }, { "epoch": 0.09696033297597276, "grad_norm": 160.94900418104342, "learning_rate": 1.962174798305841e-05, "loss": 38.1942, "step": 6150 }, { "epoch": 0.09711799205398247, "grad_norm": 154.18246144875513, "learning_rate": 1.9620383771823282e-05, "loss": 38.9017, "step": 6160 }, { "epoch": 0.09727565113199219, "grad_norm": 153.27925905359825, "learning_rate": 1.9619017152515044e-05, "loss": 39.2315, "step": 6170 }, { "epoch": 0.09743331021000189, "grad_norm": 150.50450467074134, "learning_rate": 1.9617648125475772e-05, "loss": 38.7182, "step": 6180 }, { "epoch": 0.0975909692880116, "grad_norm": 151.16051217105633, "learning_rate": 1.9616276691048145e-05, "loss": 38.3366, "step": 6190 }, { "epoch": 0.09774862836602131, "grad_norm": 157.08376828817464, "learning_rate": 1.961490284957545e-05, "loss": 38.8126, "step": 6200 }, { "epoch": 0.09790628744403103, "grad_norm": 157.89066510839743, "learning_rate": 1.9613526601401564e-05, "loss": 39.4064, "step": 6210 }, { "epoch": 0.09806394652204074, "grad_norm": 162.05015815304327, "learning_rate": 1.9612147946870983e-05, "loss": 39.0712, "step": 6220 }, { "epoch": 0.09822160560005046, "grad_norm": 158.21914445163017, "learning_rate": 1.9610766886328797e-05, "loss": 37.8615, "step": 6230 }, { "epoch": 0.09837926467806016, "grad_norm": 147.98922160950215, "learning_rate": 1.960938342012069e-05, "loss": 38.9058, "step": 6240 }, { "epoch": 0.09853692375606987, "grad_norm": 146.49223593492994, "learning_rate": 1.9607997548592968e-05, "loss": 38.536, "step": 6250 }, { "epoch": 0.09869458283407959, "grad_norm": 169.42621974598728, "learning_rate": 1.960660927209252e-05, "loss": 39.172, "step": 6260 }, { "epoch": 0.0988522419120893, "grad_norm": 154.72534314282237, "learning_rate": 1.960521859096685e-05, "loss": 38.1271, "step": 6270 }, { "epoch": 0.09900990099009901, "grad_norm": 141.54684548353916, "learning_rate": 1.9603825505564054e-05, "loss": 39.363, "step": 6280 }, { "epoch": 0.09916756006810871, "grad_norm": 147.70608189071928, "learning_rate": 1.9602430016232837e-05, "loss": 38.4844, "step": 6290 }, { "epoch": 0.09932521914611843, "grad_norm": 151.66091664707076, "learning_rate": 1.9601032123322503e-05, "loss": 39.4043, "step": 6300 }, { "epoch": 0.09948287822412814, "grad_norm": 152.69387996032566, "learning_rate": 1.959963182718296e-05, "loss": 38.2397, "step": 6310 }, { "epoch": 0.09964053730213786, "grad_norm": 150.67183186848493, "learning_rate": 1.959822912816471e-05, "loss": 38.5819, "step": 6320 }, { "epoch": 0.09979819638014757, "grad_norm": 152.94863260148907, "learning_rate": 1.959682402661886e-05, "loss": 38.3649, "step": 6330 }, { "epoch": 0.09995585545815729, "grad_norm": 151.20083915940307, "learning_rate": 1.9595416522897134e-05, "loss": 38.002, "step": 6340 }, { "epoch": 0.10011351453616699, "grad_norm": 156.30811469075795, "learning_rate": 1.9594006617351828e-05, "loss": 38.7428, "step": 6350 }, { "epoch": 0.1002711736141767, "grad_norm": 148.47453809866167, "learning_rate": 1.9592594310335863e-05, "loss": 38.5762, "step": 6360 }, { "epoch": 0.10042883269218641, "grad_norm": 151.91867697705257, "learning_rate": 1.959117960220275e-05, "loss": 37.869, "step": 6370 }, { "epoch": 0.10058649177019613, "grad_norm": 149.78968820094752, "learning_rate": 1.9589762493306604e-05, "loss": 39.1156, "step": 6380 }, { "epoch": 0.10074415084820584, "grad_norm": 157.05269798065112, "learning_rate": 1.9588342984002144e-05, "loss": 38.6686, "step": 6390 }, { "epoch": 0.10090180992621556, "grad_norm": 148.19918867819837, "learning_rate": 1.9586921074644685e-05, "loss": 40.6543, "step": 6400 }, { "epoch": 0.10105946900422526, "grad_norm": 147.3036618232394, "learning_rate": 1.9585496765590143e-05, "loss": 38.2188, "step": 6410 }, { "epoch": 0.10121712808223497, "grad_norm": 152.1257488995344, "learning_rate": 1.9584070057195035e-05, "loss": 39.7542, "step": 6420 }, { "epoch": 0.10137478716024469, "grad_norm": 159.0859998068498, "learning_rate": 1.958264094981648e-05, "loss": 38.3134, "step": 6430 }, { "epoch": 0.1015324462382544, "grad_norm": 149.24667347410556, "learning_rate": 1.9581209443812202e-05, "loss": 39.1284, "step": 6440 }, { "epoch": 0.10169010531626411, "grad_norm": 162.84507701075214, "learning_rate": 1.9579775539540513e-05, "loss": 39.5842, "step": 6450 }, { "epoch": 0.10184776439427383, "grad_norm": 147.79418741686948, "learning_rate": 1.9578339237360338e-05, "loss": 39.2912, "step": 6460 }, { "epoch": 0.10200542347228353, "grad_norm": 146.8637478554716, "learning_rate": 1.9576900537631197e-05, "loss": 38.9867, "step": 6470 }, { "epoch": 0.10216308255029324, "grad_norm": 285.819875293981, "learning_rate": 1.9575459440713205e-05, "loss": 38.4894, "step": 6480 }, { "epoch": 0.10232074162830296, "grad_norm": 158.08434075642987, "learning_rate": 1.9574015946967087e-05, "loss": 38.5264, "step": 6490 }, { "epoch": 0.10247840070631267, "grad_norm": 154.2467718161191, "learning_rate": 1.9572570056754166e-05, "loss": 38.1427, "step": 6500 }, { "epoch": 0.10263605978432239, "grad_norm": 146.2037459164991, "learning_rate": 1.9571121770436355e-05, "loss": 38.7737, "step": 6510 }, { "epoch": 0.10279371886233209, "grad_norm": 148.19603429576665, "learning_rate": 1.956967108837618e-05, "loss": 38.1886, "step": 6520 }, { "epoch": 0.1029513779403418, "grad_norm": 152.74756426856536, "learning_rate": 1.9568218010936753e-05, "loss": 39.3079, "step": 6530 }, { "epoch": 0.10310903701835152, "grad_norm": 158.37593916181496, "learning_rate": 1.95667625384818e-05, "loss": 39.4134, "step": 6540 }, { "epoch": 0.10326669609636123, "grad_norm": 155.19290737030806, "learning_rate": 1.9565304671375636e-05, "loss": 38.3045, "step": 6550 }, { "epoch": 0.10342435517437094, "grad_norm": 146.3154125935707, "learning_rate": 1.956384440998318e-05, "loss": 38.5027, "step": 6560 }, { "epoch": 0.10358201425238066, "grad_norm": 166.36764303200636, "learning_rate": 1.956238175466995e-05, "loss": 39.3381, "step": 6570 }, { "epoch": 0.10373967333039036, "grad_norm": 144.85031543185895, "learning_rate": 1.956091670580206e-05, "loss": 38.2961, "step": 6580 }, { "epoch": 0.10389733240840007, "grad_norm": 152.57778603118956, "learning_rate": 1.9559449263746228e-05, "loss": 37.2916, "step": 6590 }, { "epoch": 0.10405499148640979, "grad_norm": 145.13943318613462, "learning_rate": 1.9557979428869767e-05, "loss": 38.7075, "step": 6600 }, { "epoch": 0.1042126505644195, "grad_norm": 156.8372903864195, "learning_rate": 1.955650720154059e-05, "loss": 39.0135, "step": 6610 }, { "epoch": 0.10437030964242922, "grad_norm": 139.46758429770685, "learning_rate": 1.9555032582127214e-05, "loss": 38.0532, "step": 6620 }, { "epoch": 0.10452796872043893, "grad_norm": 158.08130255129743, "learning_rate": 1.9553555570998742e-05, "loss": 38.6585, "step": 6630 }, { "epoch": 0.10468562779844863, "grad_norm": 153.14085620331093, "learning_rate": 1.9552076168524896e-05, "loss": 40.0455, "step": 6640 }, { "epoch": 0.10484328687645834, "grad_norm": 170.96047085755959, "learning_rate": 1.9550594375075973e-05, "loss": 39.9179, "step": 6650 }, { "epoch": 0.10500094595446806, "grad_norm": 153.6828927462337, "learning_rate": 1.9549110191022883e-05, "loss": 38.5033, "step": 6660 }, { "epoch": 0.10515860503247777, "grad_norm": 152.44324977797893, "learning_rate": 1.9547623616737135e-05, "loss": 38.9042, "step": 6670 }, { "epoch": 0.10531626411048749, "grad_norm": 143.2780528427834, "learning_rate": 1.954613465259083e-05, "loss": 38.3349, "step": 6680 }, { "epoch": 0.10547392318849719, "grad_norm": 150.56933492804447, "learning_rate": 1.954464329895667e-05, "loss": 37.4728, "step": 6690 }, { "epoch": 0.1056315822665069, "grad_norm": 157.92139118554314, "learning_rate": 1.9543149556207956e-05, "loss": 39.1351, "step": 6700 }, { "epoch": 0.10578924134451662, "grad_norm": 155.34092173233864, "learning_rate": 1.9541653424718584e-05, "loss": 38.73, "step": 6710 }, { "epoch": 0.10594690042252633, "grad_norm": 157.84646936765327, "learning_rate": 1.9540154904863048e-05, "loss": 38.1247, "step": 6720 }, { "epoch": 0.10610455950053604, "grad_norm": 165.3913062875131, "learning_rate": 1.953865399701645e-05, "loss": 38.847, "step": 6730 }, { "epoch": 0.10626221857854576, "grad_norm": 335.06035355128466, "learning_rate": 1.953715070155447e-05, "loss": 37.9981, "step": 6740 }, { "epoch": 0.10641987765655546, "grad_norm": 155.1301423420491, "learning_rate": 1.9535645018853408e-05, "loss": 38.7379, "step": 6750 }, { "epoch": 0.10657753673456517, "grad_norm": 153.89313865885475, "learning_rate": 1.953413694929014e-05, "loss": 37.6197, "step": 6760 }, { "epoch": 0.10673519581257489, "grad_norm": 157.4300446154275, "learning_rate": 1.953262649324216e-05, "loss": 37.7439, "step": 6770 }, { "epoch": 0.1068928548905846, "grad_norm": 147.40840103415462, "learning_rate": 1.9531113651087544e-05, "loss": 38.843, "step": 6780 }, { "epoch": 0.10705051396859432, "grad_norm": 149.14811705816533, "learning_rate": 1.952959842320497e-05, "loss": 39.2218, "step": 6790 }, { "epoch": 0.10720817304660403, "grad_norm": 152.01830681465606, "learning_rate": 1.9528080809973716e-05, "loss": 38.3282, "step": 6800 }, { "epoch": 0.10736583212461373, "grad_norm": 161.39713509301174, "learning_rate": 1.9526560811773657e-05, "loss": 38.6722, "step": 6810 }, { "epoch": 0.10752349120262344, "grad_norm": 152.15174289664532, "learning_rate": 1.9525038428985258e-05, "loss": 38.4067, "step": 6820 }, { "epoch": 0.10768115028063316, "grad_norm": 152.58809519450404, "learning_rate": 1.9523513661989594e-05, "loss": 38.0325, "step": 6830 }, { "epoch": 0.10783880935864287, "grad_norm": 142.07939623445824, "learning_rate": 1.952198651116832e-05, "loss": 37.7668, "step": 6840 }, { "epoch": 0.10799646843665259, "grad_norm": 163.8469442034598, "learning_rate": 1.9520456976903698e-05, "loss": 38.2136, "step": 6850 }, { "epoch": 0.10815412751466229, "grad_norm": 140.39817803986048, "learning_rate": 1.9518925059578585e-05, "loss": 37.9667, "step": 6860 }, { "epoch": 0.108311786592672, "grad_norm": 164.65111931987533, "learning_rate": 1.9517390759576436e-05, "loss": 38.404, "step": 6870 }, { "epoch": 0.10846944567068172, "grad_norm": 136.49234622963328, "learning_rate": 1.9515854077281302e-05, "loss": 37.8836, "step": 6880 }, { "epoch": 0.10862710474869143, "grad_norm": 146.76402386983023, "learning_rate": 1.9514315013077828e-05, "loss": 38.7125, "step": 6890 }, { "epoch": 0.10878476382670114, "grad_norm": 152.35498535136205, "learning_rate": 1.9512773567351254e-05, "loss": 38.4479, "step": 6900 }, { "epoch": 0.10894242290471086, "grad_norm": 150.8631701506349, "learning_rate": 1.951122974048742e-05, "loss": 38.5335, "step": 6910 }, { "epoch": 0.10910008198272056, "grad_norm": 143.28624032058084, "learning_rate": 1.9509683532872765e-05, "loss": 38.7005, "step": 6920 }, { "epoch": 0.10925774106073027, "grad_norm": 155.29086779191638, "learning_rate": 1.950813494489431e-05, "loss": 37.6693, "step": 6930 }, { "epoch": 0.10941540013873999, "grad_norm": 151.41203529424317, "learning_rate": 1.950658397693969e-05, "loss": 38.188, "step": 6940 }, { "epoch": 0.1095730592167497, "grad_norm": 147.77409812045283, "learning_rate": 1.950503062939712e-05, "loss": 38.3949, "step": 6950 }, { "epoch": 0.10973071829475942, "grad_norm": 144.60446433580412, "learning_rate": 1.9503474902655425e-05, "loss": 38.3913, "step": 6960 }, { "epoch": 0.10988837737276913, "grad_norm": 147.77297556923554, "learning_rate": 1.950191679710401e-05, "loss": 37.8907, "step": 6970 }, { "epoch": 0.11004603645077883, "grad_norm": 148.1253467691901, "learning_rate": 1.9500356313132895e-05, "loss": 38.5582, "step": 6980 }, { "epoch": 0.11020369552878855, "grad_norm": 153.27584699762525, "learning_rate": 1.9498793451132673e-05, "loss": 41.7124, "step": 6990 }, { "epoch": 0.11036135460679826, "grad_norm": 154.1772777711588, "learning_rate": 1.9497228211494542e-05, "loss": 38.7845, "step": 7000 }, { "epoch": 0.11051901368480797, "grad_norm": 148.52067030963877, "learning_rate": 1.9495660594610306e-05, "loss": 37.399, "step": 7010 }, { "epoch": 0.11067667276281769, "grad_norm": 145.6387087577252, "learning_rate": 1.9494090600872348e-05, "loss": 39.1701, "step": 7020 }, { "epoch": 0.11083433184082739, "grad_norm": 150.21872210332674, "learning_rate": 1.9492518230673654e-05, "loss": 38.2852, "step": 7030 }, { "epoch": 0.1109919909188371, "grad_norm": 142.92951830750437, "learning_rate": 1.9490943484407803e-05, "loss": 38.0379, "step": 7040 }, { "epoch": 0.11114964999684682, "grad_norm": 147.69089384998694, "learning_rate": 1.948936636246897e-05, "loss": 38.6913, "step": 7050 }, { "epoch": 0.11130730907485653, "grad_norm": 140.86181687169744, "learning_rate": 1.948778686525192e-05, "loss": 38.4077, "step": 7060 }, { "epoch": 0.11146496815286625, "grad_norm": 153.13044616721373, "learning_rate": 1.9486204993152017e-05, "loss": 37.5909, "step": 7070 }, { "epoch": 0.11162262723087596, "grad_norm": 148.33791988713782, "learning_rate": 1.9484620746565222e-05, "loss": 38.2346, "step": 7080 }, { "epoch": 0.11178028630888566, "grad_norm": 149.40022703717761, "learning_rate": 1.9483034125888086e-05, "loss": 37.4634, "step": 7090 }, { "epoch": 0.11193794538689537, "grad_norm": 146.39849824708003, "learning_rate": 1.948144513151775e-05, "loss": 38.0811, "step": 7100 }, { "epoch": 0.11209560446490509, "grad_norm": 162.27807481392597, "learning_rate": 1.9479853763851965e-05, "loss": 38.0946, "step": 7110 }, { "epoch": 0.1122532635429148, "grad_norm": 142.4307962213922, "learning_rate": 1.9478260023289052e-05, "loss": 37.9672, "step": 7120 }, { "epoch": 0.11241092262092452, "grad_norm": 145.90188313240324, "learning_rate": 1.947666391022795e-05, "loss": 38.6749, "step": 7130 }, { "epoch": 0.11256858169893423, "grad_norm": 146.30679821892153, "learning_rate": 1.9475065425068177e-05, "loss": 38.9813, "step": 7140 }, { "epoch": 0.11272624077694393, "grad_norm": 144.4880487255454, "learning_rate": 1.9473464568209848e-05, "loss": 38.0083, "step": 7150 }, { "epoch": 0.11288389985495365, "grad_norm": 183.1057546091642, "learning_rate": 1.9471861340053678e-05, "loss": 39.067, "step": 7160 }, { "epoch": 0.11304155893296336, "grad_norm": 156.88584893822832, "learning_rate": 1.9470255741000962e-05, "loss": 38.7282, "step": 7170 }, { "epoch": 0.11319921801097307, "grad_norm": 146.26059364067376, "learning_rate": 1.94686477714536e-05, "loss": 39.4808, "step": 7180 }, { "epoch": 0.11335687708898279, "grad_norm": 155.08347268002697, "learning_rate": 1.9467037431814083e-05, "loss": 37.7227, "step": 7190 }, { "epoch": 0.11351453616699249, "grad_norm": 141.42576212114497, "learning_rate": 1.9465424722485498e-05, "loss": 37.8175, "step": 7200 }, { "epoch": 0.1136721952450022, "grad_norm": 157.5204781811958, "learning_rate": 1.946380964387151e-05, "loss": 38.3844, "step": 7210 }, { "epoch": 0.11382985432301192, "grad_norm": 148.44388281194486, "learning_rate": 1.9462192196376403e-05, "loss": 37.6398, "step": 7220 }, { "epoch": 0.11398751340102163, "grad_norm": 150.98682455966056, "learning_rate": 1.946057238040503e-05, "loss": 37.6638, "step": 7230 }, { "epoch": 0.11414517247903135, "grad_norm": 144.11044814335165, "learning_rate": 1.9458950196362848e-05, "loss": 37.7719, "step": 7240 }, { "epoch": 0.11430283155704106, "grad_norm": 146.29737971413005, "learning_rate": 1.9457325644655905e-05, "loss": 38.2705, "step": 7250 }, { "epoch": 0.11446049063505076, "grad_norm": 142.27134757664345, "learning_rate": 1.9455698725690845e-05, "loss": 39.1797, "step": 7260 }, { "epoch": 0.11461814971306047, "grad_norm": 145.27839756437868, "learning_rate": 1.9454069439874895e-05, "loss": 38.0101, "step": 7270 }, { "epoch": 0.11477580879107019, "grad_norm": 158.70997341778437, "learning_rate": 1.9452437787615885e-05, "loss": 38.03, "step": 7280 }, { "epoch": 0.1149334678690799, "grad_norm": 149.2664166928701, "learning_rate": 1.9450803769322233e-05, "loss": 38.1629, "step": 7290 }, { "epoch": 0.11509112694708962, "grad_norm": 170.44545833082975, "learning_rate": 1.944916738540295e-05, "loss": 38.7612, "step": 7300 }, { "epoch": 0.11524878602509933, "grad_norm": 141.37067393521335, "learning_rate": 1.9447528636267633e-05, "loss": 37.1475, "step": 7310 }, { "epoch": 0.11540644510310903, "grad_norm": 134.49777942775376, "learning_rate": 1.944588752232648e-05, "loss": 37.8033, "step": 7320 }, { "epoch": 0.11556410418111875, "grad_norm": 146.2884143287009, "learning_rate": 1.944424404399028e-05, "loss": 38.3737, "step": 7330 }, { "epoch": 0.11572176325912846, "grad_norm": 148.25912114766328, "learning_rate": 1.9442598201670407e-05, "loss": 37.4276, "step": 7340 }, { "epoch": 0.11587942233713817, "grad_norm": 155.81671809142807, "learning_rate": 1.9440949995778834e-05, "loss": 39.5934, "step": 7350 }, { "epoch": 0.11603708141514789, "grad_norm": 150.04152341437162, "learning_rate": 1.943929942672812e-05, "loss": 37.9631, "step": 7360 }, { "epoch": 0.1161947404931576, "grad_norm": 141.32655267540758, "learning_rate": 1.9437646494931418e-05, "loss": 37.2616, "step": 7370 }, { "epoch": 0.1163523995711673, "grad_norm": 131.6675567356568, "learning_rate": 1.9435991200802476e-05, "loss": 37.3561, "step": 7380 }, { "epoch": 0.11651005864917702, "grad_norm": 158.8751947747983, "learning_rate": 1.9434333544755628e-05, "loss": 37.3443, "step": 7390 }, { "epoch": 0.11666771772718673, "grad_norm": 144.32649189613275, "learning_rate": 1.94326735272058e-05, "loss": 38.4049, "step": 7400 }, { "epoch": 0.11682537680519645, "grad_norm": 154.55080772537207, "learning_rate": 1.943101114856851e-05, "loss": 38.5734, "step": 7410 }, { "epoch": 0.11698303588320616, "grad_norm": 147.4791989798978, "learning_rate": 1.9429346409259865e-05, "loss": 37.8056, "step": 7420 }, { "epoch": 0.11714069496121586, "grad_norm": 134.4892025152648, "learning_rate": 1.942767930969657e-05, "loss": 37.9643, "step": 7430 }, { "epoch": 0.11729835403922557, "grad_norm": 148.44041240959083, "learning_rate": 1.9426009850295916e-05, "loss": 37.8806, "step": 7440 }, { "epoch": 0.11745601311723529, "grad_norm": 143.2072195402872, "learning_rate": 1.942433803147578e-05, "loss": 37.6033, "step": 7450 }, { "epoch": 0.117613672195245, "grad_norm": 152.14853162405527, "learning_rate": 1.9422663853654632e-05, "loss": 37.6819, "step": 7460 }, { "epoch": 0.11777133127325472, "grad_norm": 142.68585175114447, "learning_rate": 1.9420987317251542e-05, "loss": 37.9662, "step": 7470 }, { "epoch": 0.11792899035126443, "grad_norm": 149.25472803591606, "learning_rate": 1.9419308422686162e-05, "loss": 38.6635, "step": 7480 }, { "epoch": 0.11808664942927413, "grad_norm": 142.21765369429795, "learning_rate": 1.941762717037873e-05, "loss": 37.6442, "step": 7490 }, { "epoch": 0.11824430850728385, "grad_norm": 138.89516644541743, "learning_rate": 1.941594356075008e-05, "loss": 36.3141, "step": 7500 }, { "epoch": 0.11840196758529356, "grad_norm": 139.3598970923297, "learning_rate": 1.9414257594221643e-05, "loss": 37.39, "step": 7510 }, { "epoch": 0.11855962666330327, "grad_norm": 138.66605589840665, "learning_rate": 1.941256927121542e-05, "loss": 36.9466, "step": 7520 }, { "epoch": 0.11871728574131299, "grad_norm": 150.28555626228444, "learning_rate": 1.9410878592154026e-05, "loss": 38.2193, "step": 7530 }, { "epoch": 0.1188749448193227, "grad_norm": 206.19297530598556, "learning_rate": 1.940918555746065e-05, "loss": 39.0657, "step": 7540 }, { "epoch": 0.1190326038973324, "grad_norm": 154.33990820869022, "learning_rate": 1.9407490167559068e-05, "loss": 38.2993, "step": 7550 }, { "epoch": 0.11919026297534212, "grad_norm": 145.42406465440507, "learning_rate": 1.940579242287366e-05, "loss": 37.685, "step": 7560 }, { "epoch": 0.11934792205335183, "grad_norm": 149.17391108313424, "learning_rate": 1.9404092323829387e-05, "loss": 36.5538, "step": 7570 }, { "epoch": 0.11950558113136155, "grad_norm": 142.51211729017078, "learning_rate": 1.9402389870851797e-05, "loss": 36.4737, "step": 7580 }, { "epoch": 0.11966324020937126, "grad_norm": 152.19073655866507, "learning_rate": 1.9400685064367034e-05, "loss": 37.689, "step": 7590 }, { "epoch": 0.11982089928738096, "grad_norm": 136.4782381737489, "learning_rate": 1.9398977904801823e-05, "loss": 37.6641, "step": 7600 }, { "epoch": 0.11997855836539068, "grad_norm": 148.210182160682, "learning_rate": 1.9397268392583485e-05, "loss": 37.5977, "step": 7610 }, { "epoch": 0.12013621744340039, "grad_norm": 138.91426694171838, "learning_rate": 1.9395556528139924e-05, "loss": 37.8132, "step": 7620 }, { "epoch": 0.1202938765214101, "grad_norm": 137.45471385293857, "learning_rate": 1.9393842311899637e-05, "loss": 37.612, "step": 7630 }, { "epoch": 0.12045153559941982, "grad_norm": 143.80776284722268, "learning_rate": 1.939212574429171e-05, "loss": 37.9986, "step": 7640 }, { "epoch": 0.12060919467742953, "grad_norm": 134.98343252790212, "learning_rate": 1.9390406825745816e-05, "loss": 37.6969, "step": 7650 }, { "epoch": 0.12076685375543923, "grad_norm": 149.1996196896786, "learning_rate": 1.9388685556692217e-05, "loss": 37.3927, "step": 7660 }, { "epoch": 0.12092451283344895, "grad_norm": 153.1373017885703, "learning_rate": 1.9386961937561758e-05, "loss": 37.6487, "step": 7670 }, { "epoch": 0.12108217191145866, "grad_norm": 155.53258219777447, "learning_rate": 1.9385235968785884e-05, "loss": 37.3824, "step": 7680 }, { "epoch": 0.12123983098946838, "grad_norm": 155.69385157107993, "learning_rate": 1.9383507650796617e-05, "loss": 37.4083, "step": 7690 }, { "epoch": 0.12139749006747809, "grad_norm": 141.745034310858, "learning_rate": 1.9381776984026573e-05, "loss": 37.5765, "step": 7700 }, { "epoch": 0.1215551491454878, "grad_norm": 146.3691762067744, "learning_rate": 1.9380043968908955e-05, "loss": 36.9586, "step": 7710 }, { "epoch": 0.1217128082234975, "grad_norm": 140.7805820114197, "learning_rate": 1.9378308605877553e-05, "loss": 37.3004, "step": 7720 }, { "epoch": 0.12187046730150722, "grad_norm": 147.1029469954892, "learning_rate": 1.9376570895366742e-05, "loss": 37.6606, "step": 7730 }, { "epoch": 0.12202812637951693, "grad_norm": 288.6398315944881, "learning_rate": 1.937483083781149e-05, "loss": 38.0692, "step": 7740 }, { "epoch": 0.12218578545752665, "grad_norm": 142.82175703493235, "learning_rate": 1.937308843364735e-05, "loss": 37.6765, "step": 7750 }, { "epoch": 0.12234344453553636, "grad_norm": 153.99563453957066, "learning_rate": 1.937134368331046e-05, "loss": 37.8435, "step": 7760 }, { "epoch": 0.12250110361354606, "grad_norm": 166.27026468124683, "learning_rate": 1.9369596587237548e-05, "loss": 37.8068, "step": 7770 }, { "epoch": 0.12265876269155578, "grad_norm": 175.0466357090612, "learning_rate": 1.936784714586593e-05, "loss": 37.352, "step": 7780 }, { "epoch": 0.12281642176956549, "grad_norm": 153.59181551157008, "learning_rate": 1.936609535963351e-05, "loss": 38.2781, "step": 7790 }, { "epoch": 0.1229740808475752, "grad_norm": 149.71423776291368, "learning_rate": 1.936434122897877e-05, "loss": 37.4317, "step": 7800 }, { "epoch": 0.12313173992558492, "grad_norm": 141.07283482766948, "learning_rate": 1.9362584754340793e-05, "loss": 37.3668, "step": 7810 }, { "epoch": 0.12328939900359463, "grad_norm": 143.194395593983, "learning_rate": 1.936082593615924e-05, "loss": 37.3972, "step": 7820 }, { "epoch": 0.12344705808160433, "grad_norm": 152.24560408829043, "learning_rate": 1.9359064774874353e-05, "loss": 37.0522, "step": 7830 }, { "epoch": 0.12360471715961405, "grad_norm": 147.0744119402882, "learning_rate": 1.9357301270926972e-05, "loss": 37.4316, "step": 7840 }, { "epoch": 0.12376237623762376, "grad_norm": 153.54881891913007, "learning_rate": 1.935553542475852e-05, "loss": 37.7853, "step": 7850 }, { "epoch": 0.12392003531563348, "grad_norm": 149.49776946820634, "learning_rate": 1.9353767236811002e-05, "loss": 37.3807, "step": 7860 }, { "epoch": 0.12407769439364319, "grad_norm": 143.50688141994692, "learning_rate": 1.935199670752702e-05, "loss": 38.1047, "step": 7870 }, { "epoch": 0.1242353534716529, "grad_norm": 139.71763837318764, "learning_rate": 1.935022383734974e-05, "loss": 37.1622, "step": 7880 }, { "epoch": 0.1243930125496626, "grad_norm": 139.71069238680138, "learning_rate": 1.934844862672294e-05, "loss": 37.8696, "step": 7890 }, { "epoch": 0.12455067162767232, "grad_norm": 148.06798117036666, "learning_rate": 1.9346671076090968e-05, "loss": 36.7385, "step": 7900 }, { "epoch": 0.12470833070568203, "grad_norm": 130.73641911232787, "learning_rate": 1.9344891185898762e-05, "loss": 37.0861, "step": 7910 }, { "epoch": 0.12486598978369175, "grad_norm": 135.40327836335342, "learning_rate": 1.9343108956591848e-05, "loss": 37.1307, "step": 7920 }, { "epoch": 0.12502364886170145, "grad_norm": 143.03126162057077, "learning_rate": 1.934132438861633e-05, "loss": 37.3243, "step": 7930 }, { "epoch": 0.12518130793971116, "grad_norm": 149.8598953765738, "learning_rate": 1.9339537482418902e-05, "loss": 38.6697, "step": 7940 }, { "epoch": 0.12533896701772088, "grad_norm": 138.91920259656663, "learning_rate": 1.9337748238446847e-05, "loss": 37.454, "step": 7950 }, { "epoch": 0.1254966260957306, "grad_norm": 148.49062490714152, "learning_rate": 1.933595665714803e-05, "loss": 37.3091, "step": 7960 }, { "epoch": 0.1256542851737403, "grad_norm": 133.98813743730898, "learning_rate": 1.93341627389709e-05, "loss": 37.9987, "step": 7970 }, { "epoch": 0.12581194425175002, "grad_norm": 135.93706209970014, "learning_rate": 1.933236648436449e-05, "loss": 37.6378, "step": 7980 }, { "epoch": 0.12596960332975973, "grad_norm": 140.4066182804711, "learning_rate": 1.9330567893778426e-05, "loss": 38.079, "step": 7990 }, { "epoch": 0.12612726240776945, "grad_norm": 141.1887770365414, "learning_rate": 1.93287669676629e-05, "loss": 37.1662, "step": 8000 }, { "epoch": 0.12628492148577916, "grad_norm": 133.63761468455812, "learning_rate": 1.9326963706468713e-05, "loss": 37.4087, "step": 8010 }, { "epoch": 0.12644258056378888, "grad_norm": 140.99858940568453, "learning_rate": 1.9325158110647227e-05, "loss": 37.6095, "step": 8020 }, { "epoch": 0.12660023964179856, "grad_norm": 140.3335735333774, "learning_rate": 1.9323350180650408e-05, "loss": 36.9005, "step": 8030 }, { "epoch": 0.12675789871980828, "grad_norm": 138.85211565369602, "learning_rate": 1.93215399169308e-05, "loss": 37.2475, "step": 8040 }, { "epoch": 0.126915557797818, "grad_norm": 140.7179625172242, "learning_rate": 1.931972731994152e-05, "loss": 38.0227, "step": 8050 }, { "epoch": 0.1270732168758277, "grad_norm": 143.06127051899696, "learning_rate": 1.9317912390136287e-05, "loss": 38.0557, "step": 8060 }, { "epoch": 0.12723087595383742, "grad_norm": 132.77484015364837, "learning_rate": 1.931609512796939e-05, "loss": 36.3187, "step": 8070 }, { "epoch": 0.12738853503184713, "grad_norm": 142.18328790342173, "learning_rate": 1.931427553389571e-05, "loss": 37.186, "step": 8080 }, { "epoch": 0.12754619410985685, "grad_norm": 161.11219662564014, "learning_rate": 1.93124536083707e-05, "loss": 36.9269, "step": 8090 }, { "epoch": 0.12770385318786656, "grad_norm": 151.53510543503816, "learning_rate": 1.931062935185042e-05, "loss": 37.4773, "step": 8100 }, { "epoch": 0.12786151226587628, "grad_norm": 155.75320271284417, "learning_rate": 1.930880276479149e-05, "loss": 38.1327, "step": 8110 }, { "epoch": 0.128019171343886, "grad_norm": 138.26828529718293, "learning_rate": 1.9306973847651118e-05, "loss": 36.8246, "step": 8120 }, { "epoch": 0.1281768304218957, "grad_norm": 145.17810317508946, "learning_rate": 1.9305142600887108e-05, "loss": 37.4768, "step": 8130 }, { "epoch": 0.1283344894999054, "grad_norm": 153.06153334785571, "learning_rate": 1.9303309024957833e-05, "loss": 38.3182, "step": 8140 }, { "epoch": 0.1284921485779151, "grad_norm": 127.52305597794704, "learning_rate": 1.9301473120322254e-05, "loss": 36.4988, "step": 8150 }, { "epoch": 0.12864980765592482, "grad_norm": 130.80327841618939, "learning_rate": 1.929963488743992e-05, "loss": 36.6116, "step": 8160 }, { "epoch": 0.12880746673393453, "grad_norm": 147.7693478042968, "learning_rate": 1.9297794326770953e-05, "loss": 36.4755, "step": 8170 }, { "epoch": 0.12896512581194425, "grad_norm": 141.45724744341004, "learning_rate": 1.9295951438776066e-05, "loss": 36.6917, "step": 8180 }, { "epoch": 0.12912278488995396, "grad_norm": 153.86919375288778, "learning_rate": 1.929410622391655e-05, "loss": 36.997, "step": 8190 }, { "epoch": 0.12928044396796368, "grad_norm": 139.88810791348843, "learning_rate": 1.929225868265428e-05, "loss": 37.2061, "step": 8200 }, { "epoch": 0.1294381030459734, "grad_norm": 140.52740468356726, "learning_rate": 1.9290408815451716e-05, "loss": 37.0307, "step": 8210 }, { "epoch": 0.1295957621239831, "grad_norm": 129.88691186067572, "learning_rate": 1.928855662277189e-05, "loss": 36.4483, "step": 8220 }, { "epoch": 0.12975342120199282, "grad_norm": 143.06384207610532, "learning_rate": 1.9286702105078433e-05, "loss": 37.4975, "step": 8230 }, { "epoch": 0.12991108028000253, "grad_norm": 149.16943262659416, "learning_rate": 1.928484526283554e-05, "loss": 37.1556, "step": 8240 }, { "epoch": 0.13006873935801225, "grad_norm": 139.45739963010337, "learning_rate": 1.9282986096507996e-05, "loss": 36.7693, "step": 8250 }, { "epoch": 0.13022639843602193, "grad_norm": 135.2363232118703, "learning_rate": 1.9281124606561174e-05, "loss": 36.2226, "step": 8260 }, { "epoch": 0.13038405751403165, "grad_norm": 141.0532767027154, "learning_rate": 1.927926079346102e-05, "loss": 37.7124, "step": 8270 }, { "epoch": 0.13054171659204136, "grad_norm": 136.96733020978832, "learning_rate": 1.9277394657674066e-05, "loss": 37.1003, "step": 8280 }, { "epoch": 0.13069937567005108, "grad_norm": 137.220794523698, "learning_rate": 1.927552619966742e-05, "loss": 37.7193, "step": 8290 }, { "epoch": 0.1308570347480608, "grad_norm": 136.78694767761027, "learning_rate": 1.9273655419908772e-05, "loss": 37.062, "step": 8300 }, { "epoch": 0.1310146938260705, "grad_norm": 128.45432836354993, "learning_rate": 1.9271782318866405e-05, "loss": 36.3413, "step": 8310 }, { "epoch": 0.13117235290408022, "grad_norm": 129.2827989058426, "learning_rate": 1.9269906897009165e-05, "loss": 36.5566, "step": 8320 }, { "epoch": 0.13133001198208993, "grad_norm": 149.9312653110646, "learning_rate": 1.9268029154806494e-05, "loss": 38.0847, "step": 8330 }, { "epoch": 0.13148767106009965, "grad_norm": 136.87124695261102, "learning_rate": 1.9266149092728406e-05, "loss": 36.7852, "step": 8340 }, { "epoch": 0.13164533013810936, "grad_norm": 141.23816695378176, "learning_rate": 1.9264266711245497e-05, "loss": 36.5919, "step": 8350 }, { "epoch": 0.13180298921611908, "grad_norm": 135.91088997165582, "learning_rate": 1.926238201082895e-05, "loss": 38.0769, "step": 8360 }, { "epoch": 0.13196064829412876, "grad_norm": 134.05683052096447, "learning_rate": 1.9260494991950516e-05, "loss": 36.2791, "step": 8370 }, { "epoch": 0.13211830737213848, "grad_norm": 134.5866360286171, "learning_rate": 1.9258605655082538e-05, "loss": 37.1417, "step": 8380 }, { "epoch": 0.1322759664501482, "grad_norm": 136.81262157075236, "learning_rate": 1.9256714000697938e-05, "loss": 37.6671, "step": 8390 }, { "epoch": 0.1324336255281579, "grad_norm": 140.48040844563465, "learning_rate": 1.9254820029270208e-05, "loss": 37.134, "step": 8400 }, { "epoch": 0.13259128460616762, "grad_norm": 138.32202651808583, "learning_rate": 1.9252923741273433e-05, "loss": 35.8819, "step": 8410 }, { "epoch": 0.13274894368417733, "grad_norm": 125.60200742328233, "learning_rate": 1.9251025137182265e-05, "loss": 36.3417, "step": 8420 }, { "epoch": 0.13290660276218705, "grad_norm": 146.65738100422888, "learning_rate": 1.924912421747195e-05, "loss": 36.6012, "step": 8430 }, { "epoch": 0.13306426184019676, "grad_norm": 135.75960558014054, "learning_rate": 1.9247220982618303e-05, "loss": 36.0568, "step": 8440 }, { "epoch": 0.13322192091820648, "grad_norm": 130.47263382201118, "learning_rate": 1.924531543309772e-05, "loss": 36.6681, "step": 8450 }, { "epoch": 0.1333795799962162, "grad_norm": 144.85353326418354, "learning_rate": 1.924340756938718e-05, "loss": 36.92, "step": 8460 }, { "epoch": 0.1335372390742259, "grad_norm": 141.1261466484809, "learning_rate": 1.9241497391964243e-05, "loss": 36.8903, "step": 8470 }, { "epoch": 0.13369489815223562, "grad_norm": 131.50722444905415, "learning_rate": 1.9239584901307035e-05, "loss": 37.8163, "step": 8480 }, { "epoch": 0.1338525572302453, "grad_norm": 131.8253095837835, "learning_rate": 1.923767009789428e-05, "loss": 34.9976, "step": 8490 }, { "epoch": 0.13401021630825502, "grad_norm": 142.1103427219503, "learning_rate": 1.9235752982205265e-05, "loss": 38.5571, "step": 8500 }, { "epoch": 0.13416787538626473, "grad_norm": 140.31391556790575, "learning_rate": 1.9233833554719867e-05, "loss": 36.9722, "step": 8510 }, { "epoch": 0.13432553446427445, "grad_norm": 137.9509826377278, "learning_rate": 1.9231911815918535e-05, "loss": 37.7019, "step": 8520 }, { "epoch": 0.13448319354228416, "grad_norm": 144.94795899097974, "learning_rate": 1.9229987766282296e-05, "loss": 37.5, "step": 8530 }, { "epoch": 0.13464085262029388, "grad_norm": 141.37735474203413, "learning_rate": 1.922806140629276e-05, "loss": 36.0147, "step": 8540 }, { "epoch": 0.1347985116983036, "grad_norm": 140.19769345452892, "learning_rate": 1.9226132736432114e-05, "loss": 37.7362, "step": 8550 }, { "epoch": 0.1349561707763133, "grad_norm": 162.3861227518138, "learning_rate": 1.922420175718312e-05, "loss": 36.5944, "step": 8560 }, { "epoch": 0.13511382985432302, "grad_norm": 133.80519750062675, "learning_rate": 1.9222268469029122e-05, "loss": 37.4681, "step": 8570 }, { "epoch": 0.13527148893233273, "grad_norm": 6826.205733652656, "learning_rate": 1.922033287245404e-05, "loss": 36.4132, "step": 8580 }, { "epoch": 0.13542914801034245, "grad_norm": 138.00720826186136, "learning_rate": 1.9218394967942373e-05, "loss": 36.6435, "step": 8590 }, { "epoch": 0.13558680708835213, "grad_norm": 130.94306809582346, "learning_rate": 1.9216454755979198e-05, "loss": 36.9557, "step": 8600 }, { "epoch": 0.13574446616636185, "grad_norm": 143.49342998562054, "learning_rate": 1.9214512237050163e-05, "loss": 36.6864, "step": 8610 }, { "epoch": 0.13590212524437156, "grad_norm": 132.83995893008944, "learning_rate": 1.92125674116415e-05, "loss": 37.1259, "step": 8620 }, { "epoch": 0.13605978432238128, "grad_norm": 148.75606649281556, "learning_rate": 1.9210620280240024e-05, "loss": 36.4361, "step": 8630 }, { "epoch": 0.136217443400391, "grad_norm": 145.0656656057845, "learning_rate": 1.9208670843333117e-05, "loss": 37.9011, "step": 8640 }, { "epoch": 0.1363751024784007, "grad_norm": 155.56884517546197, "learning_rate": 1.9206719101408738e-05, "loss": 38.3998, "step": 8650 }, { "epoch": 0.13653276155641042, "grad_norm": 135.68208550957544, "learning_rate": 1.9204765054955427e-05, "loss": 37.4858, "step": 8660 }, { "epoch": 0.13669042063442013, "grad_norm": 139.07012628208219, "learning_rate": 1.9202808704462307e-05, "loss": 36.3458, "step": 8670 }, { "epoch": 0.13684807971242985, "grad_norm": 136.75097352491377, "learning_rate": 1.9200850050419065e-05, "loss": 36.2354, "step": 8680 }, { "epoch": 0.13700573879043956, "grad_norm": 161.8678003302224, "learning_rate": 1.919888909331597e-05, "loss": 36.5151, "step": 8690 }, { "epoch": 0.13716339786844928, "grad_norm": 135.97155292661662, "learning_rate": 1.9196925833643868e-05, "loss": 37.1978, "step": 8700 }, { "epoch": 0.13732105694645896, "grad_norm": 138.53311883060942, "learning_rate": 1.9194960271894187e-05, "loss": 36.566, "step": 8710 }, { "epoch": 0.13747871602446868, "grad_norm": 141.31480479288993, "learning_rate": 1.919299240855892e-05, "loss": 36.3599, "step": 8720 }, { "epoch": 0.1376363751024784, "grad_norm": 139.06466986864746, "learning_rate": 1.9191022244130647e-05, "loss": 36.7632, "step": 8730 }, { "epoch": 0.1377940341804881, "grad_norm": 141.92745206034735, "learning_rate": 1.9189049779102517e-05, "loss": 36.7689, "step": 8740 }, { "epoch": 0.13795169325849782, "grad_norm": 135.15475429210446, "learning_rate": 1.918707501396825e-05, "loss": 35.7795, "step": 8750 }, { "epoch": 0.13810935233650753, "grad_norm": 129.79622104342806, "learning_rate": 1.9185097949222155e-05, "loss": 36.5033, "step": 8760 }, { "epoch": 0.13826701141451725, "grad_norm": 133.67206612722907, "learning_rate": 1.918311858535911e-05, "loss": 36.793, "step": 8770 }, { "epoch": 0.13842467049252696, "grad_norm": 128.9722410714371, "learning_rate": 1.918113692287457e-05, "loss": 36.6378, "step": 8780 }, { "epoch": 0.13858232957053668, "grad_norm": 137.67702803703529, "learning_rate": 1.917915296226456e-05, "loss": 36.3823, "step": 8790 }, { "epoch": 0.1387399886485464, "grad_norm": 130.43648103194644, "learning_rate": 1.9177166704025685e-05, "loss": 36.7973, "step": 8800 }, { "epoch": 0.1388976477265561, "grad_norm": 132.4234101763619, "learning_rate": 1.9175178148655125e-05, "loss": 35.5143, "step": 8810 }, { "epoch": 0.13905530680456582, "grad_norm": 139.57375823851356, "learning_rate": 1.9173187296650632e-05, "loss": 35.6994, "step": 8820 }, { "epoch": 0.1392129658825755, "grad_norm": 153.62672224866986, "learning_rate": 1.9171194148510538e-05, "loss": 36.3775, "step": 8830 }, { "epoch": 0.13937062496058522, "grad_norm": 145.55591041954764, "learning_rate": 1.916919870473375e-05, "loss": 36.3689, "step": 8840 }, { "epoch": 0.13952828403859494, "grad_norm": 196.01799726081916, "learning_rate": 1.9167200965819735e-05, "loss": 36.8327, "step": 8850 }, { "epoch": 0.13968594311660465, "grad_norm": 149.06360011370143, "learning_rate": 1.916520093226856e-05, "loss": 36.8424, "step": 8860 }, { "epoch": 0.13984360219461436, "grad_norm": 134.13235760092758, "learning_rate": 1.9163198604580843e-05, "loss": 35.9244, "step": 8870 }, { "epoch": 0.14000126127262408, "grad_norm": 130.15754840961714, "learning_rate": 1.9161193983257783e-05, "loss": 35.7983, "step": 8880 }, { "epoch": 0.1401589203506338, "grad_norm": 138.66944761132987, "learning_rate": 1.9159187068801168e-05, "loss": 37.3287, "step": 8890 }, { "epoch": 0.1403165794286435, "grad_norm": 127.33688150410674, "learning_rate": 1.915717786171334e-05, "loss": 36.5516, "step": 8900 }, { "epoch": 0.14047423850665322, "grad_norm": 139.67917744488602, "learning_rate": 1.9155166362497216e-05, "loss": 35.823, "step": 8910 }, { "epoch": 0.14063189758466293, "grad_norm": 180.16038198476576, "learning_rate": 1.915315257165631e-05, "loss": 35.7325, "step": 8920 }, { "epoch": 0.14078955666267265, "grad_norm": 138.05242230879034, "learning_rate": 1.915113648969468e-05, "loss": 35.9908, "step": 8930 }, { "epoch": 0.14094721574068234, "grad_norm": 134.93193573632368, "learning_rate": 1.914911811711697e-05, "loss": 36.6511, "step": 8940 }, { "epoch": 0.14110487481869205, "grad_norm": 138.87754548658552, "learning_rate": 1.9147097454428402e-05, "loss": 36.0359, "step": 8950 }, { "epoch": 0.14126253389670176, "grad_norm": 139.1638070197138, "learning_rate": 1.914507450213477e-05, "loss": 36.6337, "step": 8960 }, { "epoch": 0.14142019297471148, "grad_norm": 137.4604543312337, "learning_rate": 1.9143049260742434e-05, "loss": 36.4298, "step": 8970 }, { "epoch": 0.1415778520527212, "grad_norm": 135.16484759199398, "learning_rate": 1.9141021730758332e-05, "loss": 36.4846, "step": 8980 }, { "epoch": 0.1417355111307309, "grad_norm": 129.16798116441467, "learning_rate": 1.913899191268997e-05, "loss": 36.2833, "step": 8990 }, { "epoch": 0.14189317020874062, "grad_norm": 134.89295493594508, "learning_rate": 1.9136959807045438e-05, "loss": 37.2821, "step": 9000 }, { "epoch": 0.14205082928675034, "grad_norm": 136.6398007686383, "learning_rate": 1.9134925414333387e-05, "loss": 36.2333, "step": 9010 }, { "epoch": 0.14220848836476005, "grad_norm": 130.98400236616038, "learning_rate": 1.9132888735063044e-05, "loss": 37.0178, "step": 9020 }, { "epoch": 0.14236614744276976, "grad_norm": 138.6086360046942, "learning_rate": 1.913084976974421e-05, "loss": 37.7827, "step": 9030 }, { "epoch": 0.14252380652077948, "grad_norm": 132.02273123685617, "learning_rate": 1.9128808518887258e-05, "loss": 35.8196, "step": 9040 }, { "epoch": 0.14268146559878916, "grad_norm": 127.83246597678952, "learning_rate": 1.912676498300313e-05, "loss": 35.5712, "step": 9050 }, { "epoch": 0.14283912467679888, "grad_norm": 137.0551481661176, "learning_rate": 1.9124719162603348e-05, "loss": 36.5109, "step": 9060 }, { "epoch": 0.1429967837548086, "grad_norm": 159.9993029805288, "learning_rate": 1.9122671058199996e-05, "loss": 36.5281, "step": 9070 }, { "epoch": 0.1431544428328183, "grad_norm": 145.4636863729111, "learning_rate": 1.9120620670305732e-05, "loss": 36.6691, "step": 9080 }, { "epoch": 0.14331210191082802, "grad_norm": 130.88092543841745, "learning_rate": 1.9118567999433795e-05, "loss": 36.5092, "step": 9090 }, { "epoch": 0.14346976098883774, "grad_norm": 145.9222440239545, "learning_rate": 1.911651304609798e-05, "loss": 36.7105, "step": 9100 }, { "epoch": 0.14362742006684745, "grad_norm": 130.62580363116786, "learning_rate": 1.9114455810812665e-05, "loss": 36.3672, "step": 9110 }, { "epoch": 0.14378507914485716, "grad_norm": 137.67595409152986, "learning_rate": 1.9112396294092793e-05, "loss": 37.4723, "step": 9120 }, { "epoch": 0.14394273822286688, "grad_norm": 128.27190500296334, "learning_rate": 1.9110334496453886e-05, "loss": 35.3453, "step": 9130 }, { "epoch": 0.1441003973008766, "grad_norm": 323.0754994410572, "learning_rate": 1.9108270418412025e-05, "loss": 37.4211, "step": 9140 }, { "epoch": 0.1442580563788863, "grad_norm": 135.51246958027858, "learning_rate": 1.9106204060483873e-05, "loss": 35.312, "step": 9150 }, { "epoch": 0.14441571545689602, "grad_norm": 135.75669182742652, "learning_rate": 1.9104135423186654e-05, "loss": 35.8649, "step": 9160 }, { "epoch": 0.1445733745349057, "grad_norm": 145.02673657610814, "learning_rate": 1.910206450703817e-05, "loss": 35.855, "step": 9170 }, { "epoch": 0.14473103361291542, "grad_norm": 125.98469172586569, "learning_rate": 1.90999913125568e-05, "loss": 36.6935, "step": 9180 }, { "epoch": 0.14488869269092514, "grad_norm": 142.78377620907708, "learning_rate": 1.9097915840261464e-05, "loss": 36.2578, "step": 9190 }, { "epoch": 0.14504635176893485, "grad_norm": 131.11252420284583, "learning_rate": 1.909583809067169e-05, "loss": 36.5505, "step": 9200 }, { "epoch": 0.14520401084694456, "grad_norm": 131.23984562215207, "learning_rate": 1.9093758064307553e-05, "loss": 36.8154, "step": 9210 }, { "epoch": 0.14536166992495428, "grad_norm": 141.24446065468737, "learning_rate": 1.90916757616897e-05, "loss": 35.4063, "step": 9220 }, { "epoch": 0.145519329002964, "grad_norm": 139.71153876622094, "learning_rate": 1.9089591183339356e-05, "loss": 36.7626, "step": 9230 }, { "epoch": 0.1456769880809737, "grad_norm": 138.56806514284597, "learning_rate": 1.9087504329778306e-05, "loss": 35.9962, "step": 9240 }, { "epoch": 0.14583464715898342, "grad_norm": 136.91933092412867, "learning_rate": 1.9085415201528914e-05, "loss": 37.0026, "step": 9250 }, { "epoch": 0.14599230623699314, "grad_norm": 132.0122302088051, "learning_rate": 1.9083323799114105e-05, "loss": 35.2464, "step": 9260 }, { "epoch": 0.14614996531500285, "grad_norm": 131.42538464581142, "learning_rate": 1.908123012305738e-05, "loss": 36.3962, "step": 9270 }, { "epoch": 0.14630762439301254, "grad_norm": 135.1466328465588, "learning_rate": 1.9079134173882797e-05, "loss": 36.4071, "step": 9280 }, { "epoch": 0.14646528347102225, "grad_norm": 134.99082470546097, "learning_rate": 1.9077035952115007e-05, "loss": 36.2126, "step": 9290 }, { "epoch": 0.14662294254903196, "grad_norm": 134.34426248354873, "learning_rate": 1.9074935458279202e-05, "loss": 36.5174, "step": 9300 }, { "epoch": 0.14678060162704168, "grad_norm": 140.42026749804901, "learning_rate": 1.9072832692901163e-05, "loss": 36.788, "step": 9310 }, { "epoch": 0.1469382607050514, "grad_norm": 132.55414878955463, "learning_rate": 1.9070727656507225e-05, "loss": 35.8071, "step": 9320 }, { "epoch": 0.1470959197830611, "grad_norm": 141.55507442090004, "learning_rate": 1.9068620349624308e-05, "loss": 36.7443, "step": 9330 }, { "epoch": 0.14725357886107082, "grad_norm": 147.34634126266235, "learning_rate": 1.9066510772779883e-05, "loss": 36.731, "step": 9340 }, { "epoch": 0.14741123793908054, "grad_norm": 130.4443550050252, "learning_rate": 1.9064398926502e-05, "loss": 36.7545, "step": 9350 }, { "epoch": 0.14756889701709025, "grad_norm": 128.99110613432143, "learning_rate": 1.906228481131927e-05, "loss": 35.2245, "step": 9360 }, { "epoch": 0.14772655609509996, "grad_norm": 133.5234448956101, "learning_rate": 1.9060168427760885e-05, "loss": 35.3284, "step": 9370 }, { "epoch": 0.14788421517310968, "grad_norm": 131.61773582666774, "learning_rate": 1.9058049776356586e-05, "loss": 36.2361, "step": 9380 }, { "epoch": 0.1480418742511194, "grad_norm": 124.58962781826725, "learning_rate": 1.90559288576367e-05, "loss": 36.1601, "step": 9390 }, { "epoch": 0.14819953332912908, "grad_norm": 156.95541430599621, "learning_rate": 1.905380567213211e-05, "loss": 36.0459, "step": 9400 }, { "epoch": 0.1483571924071388, "grad_norm": 131.28342710698516, "learning_rate": 1.9051680220374264e-05, "loss": 36.2473, "step": 9410 }, { "epoch": 0.1485148514851485, "grad_norm": 134.22915595295643, "learning_rate": 1.9049552502895187e-05, "loss": 36.8168, "step": 9420 }, { "epoch": 0.14867251056315822, "grad_norm": 126.35066426623764, "learning_rate": 1.904742252022747e-05, "loss": 36.2844, "step": 9430 }, { "epoch": 0.14883016964116794, "grad_norm": 137.8480828890642, "learning_rate": 1.9045290272904266e-05, "loss": 36.4093, "step": 9440 }, { "epoch": 0.14898782871917765, "grad_norm": 127.96447428356369, "learning_rate": 1.9043155761459293e-05, "loss": 34.8622, "step": 9450 }, { "epoch": 0.14914548779718736, "grad_norm": 296.36111834466874, "learning_rate": 1.9041018986426844e-05, "loss": 36.3059, "step": 9460 }, { "epoch": 0.14930314687519708, "grad_norm": 132.99938965858811, "learning_rate": 1.903887994834177e-05, "loss": 36.7079, "step": 9470 }, { "epoch": 0.1494608059532068, "grad_norm": 131.552554073767, "learning_rate": 1.9036738647739496e-05, "loss": 36.302, "step": 9480 }, { "epoch": 0.1496184650312165, "grad_norm": 128.19087795998948, "learning_rate": 1.9034595085156005e-05, "loss": 36.0307, "step": 9490 }, { "epoch": 0.14977612410922622, "grad_norm": 141.05645102127323, "learning_rate": 1.9032449261127855e-05, "loss": 35.6264, "step": 9500 }, { "epoch": 0.1499337831872359, "grad_norm": 132.5514955515827, "learning_rate": 1.903030117619217e-05, "loss": 36.0911, "step": 9510 }, { "epoch": 0.15009144226524562, "grad_norm": 133.44586060466375, "learning_rate": 1.9028150830886627e-05, "loss": 36.364, "step": 9520 }, { "epoch": 0.15024910134325534, "grad_norm": 128.92340165174548, "learning_rate": 1.9025998225749488e-05, "loss": 35.8848, "step": 9530 }, { "epoch": 0.15040676042126505, "grad_norm": 134.1549726837433, "learning_rate": 1.902384336131956e-05, "loss": 36.0785, "step": 9540 }, { "epoch": 0.15056441949927477, "grad_norm": 130.82784005226702, "learning_rate": 1.9021686238136228e-05, "loss": 35.5844, "step": 9550 }, { "epoch": 0.15072207857728448, "grad_norm": 136.0663735413709, "learning_rate": 1.9019526856739444e-05, "loss": 35.4672, "step": 9560 }, { "epoch": 0.1508797376552942, "grad_norm": 134.68902487579086, "learning_rate": 1.9017365217669723e-05, "loss": 34.9611, "step": 9570 }, { "epoch": 0.1510373967333039, "grad_norm": 132.72441946320043, "learning_rate": 1.9015201321468143e-05, "loss": 36.5362, "step": 9580 }, { "epoch": 0.15119505581131362, "grad_norm": 126.65836402275342, "learning_rate": 1.901303516867634e-05, "loss": 36.0724, "step": 9590 }, { "epoch": 0.15135271488932334, "grad_norm": 137.38506557154173, "learning_rate": 1.901086675983653e-05, "loss": 36.2738, "step": 9600 }, { "epoch": 0.15151037396733305, "grad_norm": 153.34315895833922, "learning_rate": 1.9008696095491488e-05, "loss": 36.4934, "step": 9610 }, { "epoch": 0.15166803304534274, "grad_norm": 127.83786896275639, "learning_rate": 1.900652317618455e-05, "loss": 34.9258, "step": 9620 }, { "epoch": 0.15182569212335245, "grad_norm": 130.81233659840598, "learning_rate": 1.900434800245961e-05, "loss": 34.9374, "step": 9630 }, { "epoch": 0.15198335120136217, "grad_norm": 130.13443894762838, "learning_rate": 1.9002170574861146e-05, "loss": 38.3685, "step": 9640 }, { "epoch": 0.15214101027937188, "grad_norm": 137.2988913700596, "learning_rate": 1.8999990893934185e-05, "loss": 35.3054, "step": 9650 }, { "epoch": 0.1522986693573816, "grad_norm": 131.8393115170175, "learning_rate": 1.8997808960224316e-05, "loss": 36.1739, "step": 9660 }, { "epoch": 0.1524563284353913, "grad_norm": 136.96692942850262, "learning_rate": 1.8995624774277707e-05, "loss": 36.0288, "step": 9670 }, { "epoch": 0.15261398751340102, "grad_norm": 126.03899716079601, "learning_rate": 1.8993438336641072e-05, "loss": 35.6803, "step": 9680 }, { "epoch": 0.15277164659141074, "grad_norm": 120.86210079827013, "learning_rate": 1.8991249647861704e-05, "loss": 34.7078, "step": 9690 }, { "epoch": 0.15292930566942045, "grad_norm": 130.81048766975104, "learning_rate": 1.898905870848745e-05, "loss": 36.2126, "step": 9700 }, { "epoch": 0.15308696474743017, "grad_norm": 139.50090441195624, "learning_rate": 1.898686551906672e-05, "loss": 36.0019, "step": 9710 }, { "epoch": 0.15324462382543988, "grad_norm": 124.12510765851383, "learning_rate": 1.8984670080148496e-05, "loss": 35.4971, "step": 9720 }, { "epoch": 0.1534022829034496, "grad_norm": 130.1182691178501, "learning_rate": 1.898247239228231e-05, "loss": 36.0336, "step": 9730 }, { "epoch": 0.15355994198145928, "grad_norm": 131.56502903440713, "learning_rate": 1.8980272456018268e-05, "loss": 36.0148, "step": 9740 }, { "epoch": 0.153717601059469, "grad_norm": 121.82433679159129, "learning_rate": 1.8978070271907034e-05, "loss": 35.8445, "step": 9750 }, { "epoch": 0.1538752601374787, "grad_norm": 134.659678424295, "learning_rate": 1.8975865840499836e-05, "loss": 35.8113, "step": 9760 }, { "epoch": 0.15403291921548842, "grad_norm": 129.7096627178289, "learning_rate": 1.897365916234847e-05, "loss": 36.9488, "step": 9770 }, { "epoch": 0.15419057829349814, "grad_norm": 171.85282728878002, "learning_rate": 1.8971450238005277e-05, "loss": 35.2035, "step": 9780 }, { "epoch": 0.15434823737150785, "grad_norm": 130.22580180508794, "learning_rate": 1.8969239068023174e-05, "loss": 35.8962, "step": 9790 }, { "epoch": 0.15450589644951757, "grad_norm": 133.6152421218222, "learning_rate": 1.8967025652955647e-05, "loss": 38.2263, "step": 9800 }, { "epoch": 0.15466355552752728, "grad_norm": 130.2238936905987, "learning_rate": 1.8964809993356726e-05, "loss": 36.028, "step": 9810 }, { "epoch": 0.154821214605537, "grad_norm": 137.7619155932253, "learning_rate": 1.8962592089781015e-05, "loss": 36.0211, "step": 9820 }, { "epoch": 0.1549788736835467, "grad_norm": 137.3374952210969, "learning_rate": 1.8960371942783676e-05, "loss": 36.039, "step": 9830 }, { "epoch": 0.15513653276155642, "grad_norm": 128.42181439800305, "learning_rate": 1.895814955292043e-05, "loss": 36.2212, "step": 9840 }, { "epoch": 0.1552941918395661, "grad_norm": 131.2054786250011, "learning_rate": 1.8955924920747567e-05, "loss": 35.8068, "step": 9850 }, { "epoch": 0.15545185091757582, "grad_norm": 134.12807141075035, "learning_rate": 1.895369804682193e-05, "loss": 35.9629, "step": 9860 }, { "epoch": 0.15560950999558554, "grad_norm": 138.76020626692576, "learning_rate": 1.895146893170093e-05, "loss": 35.4673, "step": 9870 }, { "epoch": 0.15576716907359525, "grad_norm": 145.68867864796192, "learning_rate": 1.8949237575942532e-05, "loss": 35.2288, "step": 9880 }, { "epoch": 0.15592482815160497, "grad_norm": 129.94862700865858, "learning_rate": 1.8947003980105264e-05, "loss": 36.4595, "step": 9890 }, { "epoch": 0.15608248722961468, "grad_norm": 130.21403680757123, "learning_rate": 1.894476814474822e-05, "loss": 36.1214, "step": 9900 }, { "epoch": 0.1562401463076244, "grad_norm": 122.7120879646289, "learning_rate": 1.894253007043105e-05, "loss": 35.4105, "step": 9910 }, { "epoch": 0.1563978053856341, "grad_norm": 133.8681090947264, "learning_rate": 1.894028975771397e-05, "loss": 35.3597, "step": 9920 }, { "epoch": 0.15655546446364382, "grad_norm": 163.18858661608024, "learning_rate": 1.8938047207157736e-05, "loss": 35.6257, "step": 9930 }, { "epoch": 0.15671312354165354, "grad_norm": 127.58444845906686, "learning_rate": 1.89358024193237e-05, "loss": 35.793, "step": 9940 }, { "epoch": 0.15687078261966325, "grad_norm": 550.3023199923429, "learning_rate": 1.8933555394773737e-05, "loss": 35.7317, "step": 9950 }, { "epoch": 0.15702844169767297, "grad_norm": 127.20367769099425, "learning_rate": 1.8931306134070305e-05, "loss": 35.0043, "step": 9960 }, { "epoch": 0.15718610077568265, "grad_norm": 129.39186018339782, "learning_rate": 1.892905463777642e-05, "loss": 35.675, "step": 9970 }, { "epoch": 0.15734375985369237, "grad_norm": 138.12065154985874, "learning_rate": 1.8926800906455646e-05, "loss": 36.0761, "step": 9980 }, { "epoch": 0.15750141893170208, "grad_norm": 136.02742830695394, "learning_rate": 1.8924544940672116e-05, "loss": 35.4373, "step": 9990 }, { "epoch": 0.1576590780097118, "grad_norm": 120.53118913264723, "learning_rate": 1.8922286740990516e-05, "loss": 35.7687, "step": 10000 }, { "epoch": 0.1578167370877215, "grad_norm": 137.50291039793228, "learning_rate": 1.89200263079761e-05, "loss": 35.6315, "step": 10010 }, { "epoch": 0.15797439616573122, "grad_norm": 127.31577591911162, "learning_rate": 1.8917763642194674e-05, "loss": 35.6303, "step": 10020 }, { "epoch": 0.15813205524374094, "grad_norm": 778.612792773578, "learning_rate": 1.8915498744212604e-05, "loss": 35.8806, "step": 10030 }, { "epoch": 0.15828971432175065, "grad_norm": 130.72746025404268, "learning_rate": 1.8913231614596815e-05, "loss": 37.0473, "step": 10040 }, { "epoch": 0.15844737339976037, "grad_norm": 126.14952343266835, "learning_rate": 1.891096225391479e-05, "loss": 35.7919, "step": 10050 }, { "epoch": 0.15860503247777008, "grad_norm": 142.7175178707744, "learning_rate": 1.890869066273458e-05, "loss": 35.2614, "step": 10060 }, { "epoch": 0.1587626915557798, "grad_norm": 125.97844510463446, "learning_rate": 1.890641684162477e-05, "loss": 35.2234, "step": 10070 }, { "epoch": 0.15892035063378948, "grad_norm": 123.0020485134836, "learning_rate": 1.8904140791154533e-05, "loss": 35.7178, "step": 10080 }, { "epoch": 0.1590780097117992, "grad_norm": 121.25512785608029, "learning_rate": 1.890186251189358e-05, "loss": 35.8335, "step": 10090 }, { "epoch": 0.1592356687898089, "grad_norm": 133.87546189843275, "learning_rate": 1.8899582004412186e-05, "loss": 35.8868, "step": 10100 }, { "epoch": 0.15939332786781862, "grad_norm": 130.03236879753888, "learning_rate": 1.8897299269281183e-05, "loss": 35.9603, "step": 10110 }, { "epoch": 0.15955098694582834, "grad_norm": 124.14218248126842, "learning_rate": 1.8895014307071967e-05, "loss": 36.0692, "step": 10120 }, { "epoch": 0.15970864602383805, "grad_norm": 132.95193063098776, "learning_rate": 1.8892727118356476e-05, "loss": 35.3376, "step": 10130 }, { "epoch": 0.15986630510184777, "grad_norm": 126.26498889996736, "learning_rate": 1.8890437703707223e-05, "loss": 36.7366, "step": 10140 }, { "epoch": 0.16002396417985748, "grad_norm": 125.23863422538244, "learning_rate": 1.8888146063697266e-05, "loss": 36.0414, "step": 10150 }, { "epoch": 0.1601816232578672, "grad_norm": 131.14668477571016, "learning_rate": 1.8885852198900226e-05, "loss": 36.6597, "step": 10160 }, { "epoch": 0.1603392823358769, "grad_norm": 135.96949603216433, "learning_rate": 1.8883556109890276e-05, "loss": 34.8132, "step": 10170 }, { "epoch": 0.16049694141388662, "grad_norm": 129.08434166300538, "learning_rate": 1.8881257797242156e-05, "loss": 34.8055, "step": 10180 }, { "epoch": 0.1606546004918963, "grad_norm": 123.48031277717082, "learning_rate": 1.887895726153115e-05, "loss": 34.6969, "step": 10190 }, { "epoch": 0.16081225956990602, "grad_norm": 132.2555851525331, "learning_rate": 1.88766545033331e-05, "loss": 36.5125, "step": 10200 }, { "epoch": 0.16096991864791574, "grad_norm": 130.01169453888542, "learning_rate": 1.887434952322442e-05, "loss": 35.2636, "step": 10210 }, { "epoch": 0.16112757772592545, "grad_norm": 121.38647434639208, "learning_rate": 1.8872042321782053e-05, "loss": 35.6985, "step": 10220 }, { "epoch": 0.16128523680393517, "grad_norm": 126.29400773693372, "learning_rate": 1.886973289958353e-05, "loss": 35.8309, "step": 10230 }, { "epoch": 0.16144289588194488, "grad_norm": 138.0488846313139, "learning_rate": 1.8867421257206903e-05, "loss": 34.8479, "step": 10240 }, { "epoch": 0.1616005549599546, "grad_norm": 130.5908914658253, "learning_rate": 1.8865107395230814e-05, "loss": 35.9721, "step": 10250 }, { "epoch": 0.1617582140379643, "grad_norm": 137.27254009669477, "learning_rate": 1.8862791314234438e-05, "loss": 35.4351, "step": 10260 }, { "epoch": 0.16191587311597402, "grad_norm": 150.3775963255185, "learning_rate": 1.8860473014797513e-05, "loss": 35.8447, "step": 10270 }, { "epoch": 0.16207353219398374, "grad_norm": 126.36096912416761, "learning_rate": 1.885815249750033e-05, "loss": 36.627, "step": 10280 }, { "epoch": 0.16223119127199345, "grad_norm": 132.42570819717946, "learning_rate": 1.885582976292373e-05, "loss": 35.5542, "step": 10290 }, { "epoch": 0.16238885035000317, "grad_norm": 128.95329801193242, "learning_rate": 1.885350481164913e-05, "loss": 34.9079, "step": 10300 }, { "epoch": 0.16254650942801285, "grad_norm": 137.31166338278715, "learning_rate": 1.8851177644258478e-05, "loss": 35.9786, "step": 10310 }, { "epoch": 0.16270416850602257, "grad_norm": 130.6418382999198, "learning_rate": 1.884884826133429e-05, "loss": 34.9199, "step": 10320 }, { "epoch": 0.16286182758403228, "grad_norm": 117.10982811744047, "learning_rate": 1.8846516663459627e-05, "loss": 35.3889, "step": 10330 }, { "epoch": 0.163019486662042, "grad_norm": 129.53753720537676, "learning_rate": 1.8844182851218112e-05, "loss": 36.6391, "step": 10340 }, { "epoch": 0.1631771457400517, "grad_norm": 133.68191212348452, "learning_rate": 1.8841846825193926e-05, "loss": 35.4223, "step": 10350 }, { "epoch": 0.16333480481806142, "grad_norm": 132.74813995090722, "learning_rate": 1.8839508585971792e-05, "loss": 35.3392, "step": 10360 }, { "epoch": 0.16349246389607114, "grad_norm": 120.1574478317979, "learning_rate": 1.8837168134136994e-05, "loss": 35.0273, "step": 10370 }, { "epoch": 0.16365012297408085, "grad_norm": 129.83210502838446, "learning_rate": 1.8834825470275372e-05, "loss": 35.2975, "step": 10380 }, { "epoch": 0.16380778205209057, "grad_norm": 211.1849274931107, "learning_rate": 1.8832480594973318e-05, "loss": 35.4179, "step": 10390 }, { "epoch": 0.16396544113010028, "grad_norm": 133.74268834704495, "learning_rate": 1.8830133508817772e-05, "loss": 34.2285, "step": 10400 }, { "epoch": 0.16412310020811, "grad_norm": 134.76341202791428, "learning_rate": 1.882778421239623e-05, "loss": 36.4406, "step": 10410 }, { "epoch": 0.16428075928611968, "grad_norm": 132.9359922604103, "learning_rate": 1.8825432706296755e-05, "loss": 35.9274, "step": 10420 }, { "epoch": 0.1644384183641294, "grad_norm": 128.85896868827228, "learning_rate": 1.8823078991107938e-05, "loss": 34.8243, "step": 10430 }, { "epoch": 0.1645960774421391, "grad_norm": 130.8893461034181, "learning_rate": 1.882072306741894e-05, "loss": 34.3577, "step": 10440 }, { "epoch": 0.16475373652014882, "grad_norm": 150.76341201631809, "learning_rate": 1.8818364935819477e-05, "loss": 35.3597, "step": 10450 }, { "epoch": 0.16491139559815854, "grad_norm": 124.20734501612031, "learning_rate": 1.8816004596899807e-05, "loss": 35.3521, "step": 10460 }, { "epoch": 0.16506905467616825, "grad_norm": 130.59451132707352, "learning_rate": 1.8813642051250745e-05, "loss": 34.3964, "step": 10470 }, { "epoch": 0.16522671375417797, "grad_norm": 137.1907516418888, "learning_rate": 1.8811277299463658e-05, "loss": 35.4464, "step": 10480 }, { "epoch": 0.16538437283218768, "grad_norm": 127.47679626612026, "learning_rate": 1.8808910342130464e-05, "loss": 34.5701, "step": 10490 }, { "epoch": 0.1655420319101974, "grad_norm": 137.57740610554478, "learning_rate": 1.880654117984364e-05, "loss": 35.8566, "step": 10500 }, { "epoch": 0.1656996909882071, "grad_norm": 127.67550257081628, "learning_rate": 1.8804169813196208e-05, "loss": 35.9055, "step": 10510 }, { "epoch": 0.16585735006621682, "grad_norm": 136.3814182011912, "learning_rate": 1.8801796242781743e-05, "loss": 34.878, "step": 10520 }, { "epoch": 0.1660150091442265, "grad_norm": 128.22796404187915, "learning_rate": 1.879942046919437e-05, "loss": 34.8676, "step": 10530 }, { "epoch": 0.16617266822223623, "grad_norm": 145.19136357636793, "learning_rate": 1.8797042493028773e-05, "loss": 36.1846, "step": 10540 }, { "epoch": 0.16633032730024594, "grad_norm": 138.50769897214258, "learning_rate": 1.8794662314880176e-05, "loss": 35.5689, "step": 10550 }, { "epoch": 0.16648798637825565, "grad_norm": 123.20939791599764, "learning_rate": 1.879227993534436e-05, "loss": 35.0583, "step": 10560 }, { "epoch": 0.16664564545626537, "grad_norm": 125.0213053997306, "learning_rate": 1.8789895355017665e-05, "loss": 35.0005, "step": 10570 }, { "epoch": 0.16680330453427508, "grad_norm": 133.15874447653465, "learning_rate": 1.8787508574496964e-05, "loss": 35.596, "step": 10580 }, { "epoch": 0.1669609636122848, "grad_norm": 125.15366556442783, "learning_rate": 1.8785119594379698e-05, "loss": 34.7687, "step": 10590 }, { "epoch": 0.1671186226902945, "grad_norm": 125.1797485082663, "learning_rate": 1.878272841526385e-05, "loss": 35.3024, "step": 10600 }, { "epoch": 0.16727628176830422, "grad_norm": 140.65625681707616, "learning_rate": 1.878033503774795e-05, "loss": 35.2356, "step": 10610 }, { "epoch": 0.16743394084631394, "grad_norm": 122.44534298183493, "learning_rate": 1.877793946243109e-05, "loss": 35.2277, "step": 10620 }, { "epoch": 0.16759159992432365, "grad_norm": 120.40304384959958, "learning_rate": 1.8775541689912898e-05, "loss": 34.4853, "step": 10630 }, { "epoch": 0.16774925900233337, "grad_norm": 141.69610608099396, "learning_rate": 1.877314172079357e-05, "loss": 35.9134, "step": 10640 }, { "epoch": 0.16790691808034305, "grad_norm": 142.18057611618838, "learning_rate": 1.877073955567383e-05, "loss": 36.4508, "step": 10650 }, { "epoch": 0.16806457715835277, "grad_norm": 130.09549522578732, "learning_rate": 1.8768335195154965e-05, "loss": 35.9862, "step": 10660 }, { "epoch": 0.16822223623636248, "grad_norm": 130.5654887988876, "learning_rate": 1.876592863983881e-05, "loss": 36.6417, "step": 10670 }, { "epoch": 0.1683798953143722, "grad_norm": 131.75695051492696, "learning_rate": 1.8763519890327752e-05, "loss": 35.4609, "step": 10680 }, { "epoch": 0.1685375543923819, "grad_norm": 134.74050128333036, "learning_rate": 1.876110894722472e-05, "loss": 35.2319, "step": 10690 }, { "epoch": 0.16869521347039163, "grad_norm": 132.882663697065, "learning_rate": 1.8758695811133197e-05, "loss": 34.2138, "step": 10700 }, { "epoch": 0.16885287254840134, "grad_norm": 145.7215511419834, "learning_rate": 1.8756280482657216e-05, "loss": 35.3339, "step": 10710 }, { "epoch": 0.16901053162641105, "grad_norm": 145.19883273621366, "learning_rate": 1.8753862962401355e-05, "loss": 36.4268, "step": 10720 }, { "epoch": 0.16916819070442077, "grad_norm": 117.1653614357897, "learning_rate": 1.875144325097074e-05, "loss": 34.8391, "step": 10730 }, { "epoch": 0.16932584978243048, "grad_norm": 149.00974833727963, "learning_rate": 1.874902134897105e-05, "loss": 34.8946, "step": 10740 }, { "epoch": 0.1694835088604402, "grad_norm": 134.80420960035568, "learning_rate": 1.8746597257008505e-05, "loss": 36.4568, "step": 10750 }, { "epoch": 0.16964116793844988, "grad_norm": 130.86587071619724, "learning_rate": 1.8744170975689887e-05, "loss": 34.0063, "step": 10760 }, { "epoch": 0.1697988270164596, "grad_norm": 130.3498983203405, "learning_rate": 1.874174250562251e-05, "loss": 34.8674, "step": 10770 }, { "epoch": 0.1699564860944693, "grad_norm": 141.90607837017382, "learning_rate": 1.8739311847414246e-05, "loss": 35.7259, "step": 10780 }, { "epoch": 0.17011414517247903, "grad_norm": 132.3071420022353, "learning_rate": 1.8736879001673513e-05, "loss": 34.9726, "step": 10790 }, { "epoch": 0.17027180425048874, "grad_norm": 127.4578112473913, "learning_rate": 1.8734443969009275e-05, "loss": 36.0409, "step": 10800 }, { "epoch": 0.17042946332849845, "grad_norm": 153.2665279658938, "learning_rate": 1.873200675003104e-05, "loss": 36.0956, "step": 10810 }, { "epoch": 0.17058712240650817, "grad_norm": 129.41814640572105, "learning_rate": 1.8729567345348872e-05, "loss": 35.0192, "step": 10820 }, { "epoch": 0.17074478148451788, "grad_norm": 125.70419052431156, "learning_rate": 1.872712575557337e-05, "loss": 34.5662, "step": 10830 }, { "epoch": 0.1709024405625276, "grad_norm": 125.26910341628437, "learning_rate": 1.8724681981315694e-05, "loss": 34.6311, "step": 10840 }, { "epoch": 0.1710600996405373, "grad_norm": 125.03457410321447, "learning_rate": 1.8722236023187544e-05, "loss": 34.9419, "step": 10850 }, { "epoch": 0.17121775871854703, "grad_norm": 138.31463646696469, "learning_rate": 1.871978788180116e-05, "loss": 35.4232, "step": 10860 }, { "epoch": 0.17137541779655674, "grad_norm": 120.77899541259386, "learning_rate": 1.871733755776934e-05, "loss": 34.6136, "step": 10870 }, { "epoch": 0.17153307687456643, "grad_norm": 127.87545231111798, "learning_rate": 1.8714885051705424e-05, "loss": 35.3086, "step": 10880 }, { "epoch": 0.17169073595257614, "grad_norm": 125.25953808104263, "learning_rate": 1.8712430364223296e-05, "loss": 35.0662, "step": 10890 }, { "epoch": 0.17184839503058585, "grad_norm": 126.8893439832345, "learning_rate": 1.8709973495937386e-05, "loss": 35.1418, "step": 10900 }, { "epoch": 0.17200605410859557, "grad_norm": 180.0770615437453, "learning_rate": 1.870751444746267e-05, "loss": 36.3653, "step": 10910 }, { "epoch": 0.17216371318660528, "grad_norm": 143.04503256850805, "learning_rate": 1.8705053219414676e-05, "loss": 35.7265, "step": 10920 }, { "epoch": 0.172321372264615, "grad_norm": 127.4674739513682, "learning_rate": 1.8702589812409472e-05, "loss": 34.9749, "step": 10930 }, { "epoch": 0.1724790313426247, "grad_norm": 120.05723332959232, "learning_rate": 1.8700124227063664e-05, "loss": 35.1225, "step": 10940 }, { "epoch": 0.17263669042063443, "grad_norm": 128.90426610607048, "learning_rate": 1.8697656463994425e-05, "loss": 35.9786, "step": 10950 }, { "epoch": 0.17279434949864414, "grad_norm": 133.57002706129256, "learning_rate": 1.8695186523819447e-05, "loss": 34.0951, "step": 10960 }, { "epoch": 0.17295200857665385, "grad_norm": 131.08415267346967, "learning_rate": 1.8692714407156987e-05, "loss": 35.3425, "step": 10970 }, { "epoch": 0.17310966765466357, "grad_norm": 220.3485833401311, "learning_rate": 1.8690240114625837e-05, "loss": 35.0765, "step": 10980 }, { "epoch": 0.17326732673267325, "grad_norm": 138.15702518890888, "learning_rate": 1.8687763646845336e-05, "loss": 36.6919, "step": 10990 }, { "epoch": 0.17342498581068297, "grad_norm": 129.8526975487404, "learning_rate": 1.8685285004435364e-05, "loss": 35.266, "step": 11000 }, { "epoch": 0.17358264488869268, "grad_norm": 131.97457421863854, "learning_rate": 1.868280418801636e-05, "loss": 35.0707, "step": 11010 }, { "epoch": 0.1737403039667024, "grad_norm": 132.90686051956143, "learning_rate": 1.8680321198209276e-05, "loss": 35.6514, "step": 11020 }, { "epoch": 0.1738979630447121, "grad_norm": 134.3432103254393, "learning_rate": 1.8677836035635643e-05, "loss": 35.0226, "step": 11030 }, { "epoch": 0.17405562212272183, "grad_norm": 137.3087743402806, "learning_rate": 1.8675348700917523e-05, "loss": 36.0945, "step": 11040 }, { "epoch": 0.17421328120073154, "grad_norm": 122.50197176745638, "learning_rate": 1.867285919467751e-05, "loss": 34.3764, "step": 11050 }, { "epoch": 0.17437094027874125, "grad_norm": 122.84501068616953, "learning_rate": 1.867036751753875e-05, "loss": 34.2074, "step": 11060 }, { "epoch": 0.17452859935675097, "grad_norm": 125.19179837424976, "learning_rate": 1.8667873670124943e-05, "loss": 34.2825, "step": 11070 }, { "epoch": 0.17468625843476068, "grad_norm": 127.60861313623035, "learning_rate": 1.866537765306031e-05, "loss": 35.2667, "step": 11080 }, { "epoch": 0.1748439175127704, "grad_norm": 123.79590258484023, "learning_rate": 1.8662879466969643e-05, "loss": 35.2073, "step": 11090 }, { "epoch": 0.17500157659078008, "grad_norm": 162.9747158443272, "learning_rate": 1.866037911247825e-05, "loss": 35.7584, "step": 11100 }, { "epoch": 0.1751592356687898, "grad_norm": 128.39408421456446, "learning_rate": 1.8657876590212e-05, "loss": 35.5087, "step": 11110 }, { "epoch": 0.1753168947467995, "grad_norm": 122.9477711154402, "learning_rate": 1.8655371900797295e-05, "loss": 35.0631, "step": 11120 }, { "epoch": 0.17547455382480923, "grad_norm": 124.3141361302776, "learning_rate": 1.865286504486108e-05, "loss": 34.8037, "step": 11130 }, { "epoch": 0.17563221290281894, "grad_norm": 123.37737187912913, "learning_rate": 1.865035602303085e-05, "loss": 34.654, "step": 11140 }, { "epoch": 0.17578987198082865, "grad_norm": 129.63405765936864, "learning_rate": 1.864784483593463e-05, "loss": 35.1082, "step": 11150 }, { "epoch": 0.17594753105883837, "grad_norm": 123.83929130911538, "learning_rate": 1.8645331484201e-05, "loss": 34.577, "step": 11160 }, { "epoch": 0.17610519013684808, "grad_norm": 121.66578172233271, "learning_rate": 1.8642815968459076e-05, "loss": 33.714, "step": 11170 }, { "epoch": 0.1762628492148578, "grad_norm": 126.17306787986976, "learning_rate": 1.8640298289338513e-05, "loss": 34.4744, "step": 11180 }, { "epoch": 0.1764205082928675, "grad_norm": 115.84516444517605, "learning_rate": 1.863777844746951e-05, "loss": 34.9983, "step": 11190 }, { "epoch": 0.17657816737087723, "grad_norm": 126.63521581257031, "learning_rate": 1.8635256443482806e-05, "loss": 34.8167, "step": 11200 }, { "epoch": 0.17673582644888694, "grad_norm": 128.38387032239936, "learning_rate": 1.8632732278009684e-05, "loss": 35.7672, "step": 11210 }, { "epoch": 0.17689348552689663, "grad_norm": 125.47303032325503, "learning_rate": 1.8630205951681964e-05, "loss": 34.5584, "step": 11220 }, { "epoch": 0.17705114460490634, "grad_norm": 126.04247366744032, "learning_rate": 1.8627677465132014e-05, "loss": 35.7057, "step": 11230 }, { "epoch": 0.17720880368291606, "grad_norm": 128.7675608912966, "learning_rate": 1.862514681899274e-05, "loss": 34.8252, "step": 11240 }, { "epoch": 0.17736646276092577, "grad_norm": 124.59021967609112, "learning_rate": 1.8622614013897576e-05, "loss": 35.0767, "step": 11250 }, { "epoch": 0.17752412183893548, "grad_norm": 120.87286986640942, "learning_rate": 1.8620079050480518e-05, "loss": 34.0581, "step": 11260 }, { "epoch": 0.1776817809169452, "grad_norm": 138.94862804682063, "learning_rate": 1.8617541929376088e-05, "loss": 35.498, "step": 11270 }, { "epoch": 0.1778394399949549, "grad_norm": 137.57112845211674, "learning_rate": 1.8615002651219346e-05, "loss": 35.5431, "step": 11280 }, { "epoch": 0.17799709907296463, "grad_norm": 137.84716191614302, "learning_rate": 1.8612461216645907e-05, "loss": 34.9782, "step": 11290 }, { "epoch": 0.17815475815097434, "grad_norm": 128.74309870022554, "learning_rate": 1.860991762629191e-05, "loss": 34.0163, "step": 11300 }, { "epoch": 0.17831241722898405, "grad_norm": 123.34577546981559, "learning_rate": 1.8607371880794035e-05, "loss": 34.8474, "step": 11310 }, { "epoch": 0.17847007630699377, "grad_norm": 124.83205339406798, "learning_rate": 1.8604823980789517e-05, "loss": 35.0739, "step": 11320 }, { "epoch": 0.17862773538500346, "grad_norm": 158.22376272813025, "learning_rate": 1.8602273926916112e-05, "loss": 35.825, "step": 11330 }, { "epoch": 0.17878539446301317, "grad_norm": 120.09640773193834, "learning_rate": 1.8599721719812127e-05, "loss": 33.7652, "step": 11340 }, { "epoch": 0.17894305354102288, "grad_norm": 125.16777956609555, "learning_rate": 1.8597167360116398e-05, "loss": 34.6703, "step": 11350 }, { "epoch": 0.1791007126190326, "grad_norm": 129.06475858172243, "learning_rate": 1.8594610848468316e-05, "loss": 34.7527, "step": 11360 }, { "epoch": 0.1792583716970423, "grad_norm": 128.7741759249048, "learning_rate": 1.8592052185507788e-05, "loss": 34.9019, "step": 11370 }, { "epoch": 0.17941603077505203, "grad_norm": 125.0082232315388, "learning_rate": 1.8589491371875278e-05, "loss": 34.1942, "step": 11380 }, { "epoch": 0.17957368985306174, "grad_norm": 133.006767041816, "learning_rate": 1.8586928408211777e-05, "loss": 34.5248, "step": 11390 }, { "epoch": 0.17973134893107146, "grad_norm": 759.7736486142433, "learning_rate": 1.8584363295158825e-05, "loss": 34.7447, "step": 11400 }, { "epoch": 0.17988900800908117, "grad_norm": 124.2338242448042, "learning_rate": 1.8581796033358493e-05, "loss": 34.5667, "step": 11410 }, { "epoch": 0.18004666708709088, "grad_norm": 131.20699164374835, "learning_rate": 1.8579226623453388e-05, "loss": 35.5153, "step": 11420 }, { "epoch": 0.1802043261651006, "grad_norm": 123.93687842021994, "learning_rate": 1.857665506608666e-05, "loss": 33.7514, "step": 11430 }, { "epoch": 0.1803619852431103, "grad_norm": 131.09296901613396, "learning_rate": 1.8574081361901993e-05, "loss": 34.3619, "step": 11440 }, { "epoch": 0.18051964432112, "grad_norm": 128.57510651021104, "learning_rate": 1.857150551154361e-05, "loss": 34.6704, "step": 11450 }, { "epoch": 0.1806773033991297, "grad_norm": 120.30032827202079, "learning_rate": 1.856892751565627e-05, "loss": 34.1662, "step": 11460 }, { "epoch": 0.18083496247713943, "grad_norm": 128.51393417977235, "learning_rate": 1.856634737488527e-05, "loss": 34.0935, "step": 11470 }, { "epoch": 0.18099262155514914, "grad_norm": 135.7248738424741, "learning_rate": 1.8563765089876448e-05, "loss": 35.7657, "step": 11480 }, { "epoch": 0.18115028063315886, "grad_norm": 125.13400049564382, "learning_rate": 1.8561180661276165e-05, "loss": 34.2814, "step": 11490 }, { "epoch": 0.18130793971116857, "grad_norm": 122.19599884811387, "learning_rate": 1.8558594089731336e-05, "loss": 34.9799, "step": 11500 }, { "epoch": 0.18146559878917828, "grad_norm": 122.22040109749325, "learning_rate": 1.85560053758894e-05, "loss": 34.0433, "step": 11510 }, { "epoch": 0.181623257867188, "grad_norm": 118.7661540529956, "learning_rate": 1.855341452039834e-05, "loss": 35.3044, "step": 11520 }, { "epoch": 0.1817809169451977, "grad_norm": 135.39906863049697, "learning_rate": 1.855082152390667e-05, "loss": 34.4469, "step": 11530 }, { "epoch": 0.18193857602320743, "grad_norm": 126.40492879002709, "learning_rate": 1.854822638706344e-05, "loss": 34.0665, "step": 11540 }, { "epoch": 0.18209623510121714, "grad_norm": 125.67339187735097, "learning_rate": 1.854562911051824e-05, "loss": 34.5894, "step": 11550 }, { "epoch": 0.18225389417922683, "grad_norm": 126.84910433091424, "learning_rate": 1.854302969492119e-05, "loss": 34.1751, "step": 11560 }, { "epoch": 0.18241155325723654, "grad_norm": 118.83620835172124, "learning_rate": 1.854042814092295e-05, "loss": 34.1369, "step": 11570 }, { "epoch": 0.18256921233524626, "grad_norm": 126.23266516814006, "learning_rate": 1.8537824449174717e-05, "loss": 33.9754, "step": 11580 }, { "epoch": 0.18272687141325597, "grad_norm": 124.91794703661606, "learning_rate": 1.8535218620328215e-05, "loss": 36.2723, "step": 11590 }, { "epoch": 0.18288453049126568, "grad_norm": 132.78164683162885, "learning_rate": 1.8532610655035705e-05, "loss": 35.022, "step": 11600 }, { "epoch": 0.1830421895692754, "grad_norm": 128.25578667746603, "learning_rate": 1.853000055394999e-05, "loss": 33.9217, "step": 11610 }, { "epoch": 0.1831998486472851, "grad_norm": 114.39149323159005, "learning_rate": 1.852738831772441e-05, "loss": 34.6444, "step": 11620 }, { "epoch": 0.18335750772529483, "grad_norm": 127.0905047241783, "learning_rate": 1.852477394701282e-05, "loss": 34.0643, "step": 11630 }, { "epoch": 0.18351516680330454, "grad_norm": 124.53888304458951, "learning_rate": 1.852215744246962e-05, "loss": 34.3319, "step": 11640 }, { "epoch": 0.18367282588131426, "grad_norm": 118.79038962078243, "learning_rate": 1.8519538804749762e-05, "loss": 34.8181, "step": 11650 }, { "epoch": 0.18383048495932397, "grad_norm": 118.82481312054351, "learning_rate": 1.8516918034508703e-05, "loss": 34.8879, "step": 11660 }, { "epoch": 0.18398814403733366, "grad_norm": 123.02800704677622, "learning_rate": 1.8514295132402447e-05, "loss": 34.6102, "step": 11670 }, { "epoch": 0.18414580311534337, "grad_norm": 126.18987226369492, "learning_rate": 1.8511670099087533e-05, "loss": 34.8613, "step": 11680 }, { "epoch": 0.18430346219335308, "grad_norm": 123.61214726542077, "learning_rate": 1.8509042935221034e-05, "loss": 34.7389, "step": 11690 }, { "epoch": 0.1844611212713628, "grad_norm": 126.36211913303927, "learning_rate": 1.8506413641460557e-05, "loss": 33.7643, "step": 11700 }, { "epoch": 0.1846187803493725, "grad_norm": 133.98126850572967, "learning_rate": 1.8503782218464232e-05, "loss": 34.168, "step": 11710 }, { "epoch": 0.18477643942738223, "grad_norm": 121.93869159717478, "learning_rate": 1.8501148666890733e-05, "loss": 34.6096, "step": 11720 }, { "epoch": 0.18493409850539194, "grad_norm": 127.43566723290327, "learning_rate": 1.8498512987399258e-05, "loss": 35.0246, "step": 11730 }, { "epoch": 0.18509175758340166, "grad_norm": 122.25167134037414, "learning_rate": 1.8495875180649548e-05, "loss": 34.5506, "step": 11740 }, { "epoch": 0.18524941666141137, "grad_norm": 119.23872375432774, "learning_rate": 1.849323524730187e-05, "loss": 34.0049, "step": 11750 }, { "epoch": 0.18540707573942108, "grad_norm": 116.44241801363664, "learning_rate": 1.8490593188017024e-05, "loss": 35.6943, "step": 11760 }, { "epoch": 0.1855647348174308, "grad_norm": 138.07534959725126, "learning_rate": 1.848794900345634e-05, "loss": 34.909, "step": 11770 }, { "epoch": 0.1857223938954405, "grad_norm": 124.27534039903072, "learning_rate": 1.8485302694281685e-05, "loss": 34.1938, "step": 11780 }, { "epoch": 0.1858800529734502, "grad_norm": 123.22896678583777, "learning_rate": 1.8482654261155456e-05, "loss": 34.429, "step": 11790 }, { "epoch": 0.1860377120514599, "grad_norm": 126.5377102916836, "learning_rate": 1.8480003704740578e-05, "loss": 34.6608, "step": 11800 }, { "epoch": 0.18619537112946963, "grad_norm": 118.30799563457218, "learning_rate": 1.8477351025700514e-05, "loss": 34.3546, "step": 11810 }, { "epoch": 0.18635303020747934, "grad_norm": 127.84319214570337, "learning_rate": 1.847469622469925e-05, "loss": 34.5379, "step": 11820 }, { "epoch": 0.18651068928548906, "grad_norm": 129.90347288518245, "learning_rate": 1.847203930240131e-05, "loss": 34.696, "step": 11830 }, { "epoch": 0.18666834836349877, "grad_norm": 132.8130280881028, "learning_rate": 1.846938025947175e-05, "loss": 33.7788, "step": 11840 }, { "epoch": 0.18682600744150848, "grad_norm": 124.3577654656834, "learning_rate": 1.8466719096576148e-05, "loss": 35.6935, "step": 11850 }, { "epoch": 0.1869836665195182, "grad_norm": 123.22363583035857, "learning_rate": 1.8464055814380622e-05, "loss": 34.8279, "step": 11860 }, { "epoch": 0.1871413255975279, "grad_norm": 144.10342441545728, "learning_rate": 1.8461390413551817e-05, "loss": 34.3385, "step": 11870 }, { "epoch": 0.18729898467553763, "grad_norm": 123.28330052407185, "learning_rate": 1.8458722894756908e-05, "loss": 33.5103, "step": 11880 }, { "epoch": 0.18745664375354734, "grad_norm": 120.91115584812381, "learning_rate": 1.8456053258663593e-05, "loss": 34.1301, "step": 11890 }, { "epoch": 0.18761430283155703, "grad_norm": 118.52380143465696, "learning_rate": 1.8453381505940118e-05, "loss": 35.0117, "step": 11900 }, { "epoch": 0.18777196190956674, "grad_norm": 140.2193528137124, "learning_rate": 1.845070763725524e-05, "loss": 34.8234, "step": 11910 }, { "epoch": 0.18792962098757646, "grad_norm": 118.45569731467184, "learning_rate": 1.844803165327826e-05, "loss": 34.2285, "step": 11920 }, { "epoch": 0.18808728006558617, "grad_norm": 121.74981794737505, "learning_rate": 1.8445353554678996e-05, "loss": 34.232, "step": 11930 }, { "epoch": 0.18824493914359589, "grad_norm": 120.28745266093213, "learning_rate": 1.844267334212781e-05, "loss": 34.1991, "step": 11940 }, { "epoch": 0.1884025982216056, "grad_norm": 129.33070867038901, "learning_rate": 1.8439991016295574e-05, "loss": 34.9238, "step": 11950 }, { "epoch": 0.1885602572996153, "grad_norm": 133.53312415099273, "learning_rate": 1.8437306577853707e-05, "loss": 34.8086, "step": 11960 }, { "epoch": 0.18871791637762503, "grad_norm": 140.9003786219351, "learning_rate": 1.8434620027474147e-05, "loss": 34.3984, "step": 11970 }, { "epoch": 0.18887557545563474, "grad_norm": 125.62202387570716, "learning_rate": 1.843193136582936e-05, "loss": 33.9835, "step": 11980 }, { "epoch": 0.18903323453364446, "grad_norm": 117.1288839454966, "learning_rate": 1.8429240593592347e-05, "loss": 34.0985, "step": 11990 }, { "epoch": 0.18919089361165417, "grad_norm": 118.61198909920469, "learning_rate": 1.8426547711436632e-05, "loss": 33.9016, "step": 12000 }, { "epoch": 0.18934855268966386, "grad_norm": 120.97049401679843, "learning_rate": 1.842385272003627e-05, "loss": 33.9237, "step": 12010 }, { "epoch": 0.18950621176767357, "grad_norm": 117.86477375825443, "learning_rate": 1.8421155620065847e-05, "loss": 34.288, "step": 12020 }, { "epoch": 0.18966387084568329, "grad_norm": 121.72850683443586, "learning_rate": 1.8418456412200465e-05, "loss": 35.3951, "step": 12030 }, { "epoch": 0.189821529923693, "grad_norm": 135.23526220577995, "learning_rate": 1.8415755097115767e-05, "loss": 34.4329, "step": 12040 }, { "epoch": 0.18997918900170271, "grad_norm": 136.9101896920067, "learning_rate": 1.8413051675487912e-05, "loss": 34.0782, "step": 12050 }, { "epoch": 0.19013684807971243, "grad_norm": 120.54496345605835, "learning_rate": 1.8410346147993598e-05, "loss": 33.1569, "step": 12060 }, { "epoch": 0.19029450715772214, "grad_norm": 118.57639319540925, "learning_rate": 1.840763851531004e-05, "loss": 33.8807, "step": 12070 }, { "epoch": 0.19045216623573186, "grad_norm": 360.06999513881794, "learning_rate": 1.8404928778114986e-05, "loss": 34.8837, "step": 12080 }, { "epoch": 0.19060982531374157, "grad_norm": 133.98352326706905, "learning_rate": 1.840221693708671e-05, "loss": 35.2109, "step": 12090 }, { "epoch": 0.19076748439175129, "grad_norm": 118.20100641265046, "learning_rate": 1.839950299290401e-05, "loss": 33.1633, "step": 12100 }, { "epoch": 0.190925143469761, "grad_norm": 116.30846439732294, "learning_rate": 1.839678694624621e-05, "loss": 33.2879, "step": 12110 }, { "epoch": 0.1910828025477707, "grad_norm": 119.03088562885391, "learning_rate": 1.8394068797793167e-05, "loss": 33.6393, "step": 12120 }, { "epoch": 0.1912404616257804, "grad_norm": 119.62946612815881, "learning_rate": 1.8391348548225256e-05, "loss": 34.7167, "step": 12130 }, { "epoch": 0.19139812070379011, "grad_norm": 116.28567160469187, "learning_rate": 1.838862619822338e-05, "loss": 33.1063, "step": 12140 }, { "epoch": 0.19155577978179983, "grad_norm": 128.3326124048678, "learning_rate": 1.8385901748468967e-05, "loss": 33.3519, "step": 12150 }, { "epoch": 0.19171343885980954, "grad_norm": 138.95180588183115, "learning_rate": 1.838317519964398e-05, "loss": 33.72, "step": 12160 }, { "epoch": 0.19187109793781926, "grad_norm": 128.436784111623, "learning_rate": 1.8380446552430897e-05, "loss": 33.6568, "step": 12170 }, { "epoch": 0.19202875701582897, "grad_norm": 121.95524801671209, "learning_rate": 1.837771580751272e-05, "loss": 33.9509, "step": 12180 }, { "epoch": 0.19218641609383869, "grad_norm": 123.19541825766431, "learning_rate": 1.837498296557299e-05, "loss": 33.5569, "step": 12190 }, { "epoch": 0.1923440751718484, "grad_norm": 126.84577492315925, "learning_rate": 1.837224802729575e-05, "loss": 34.3365, "step": 12200 }, { "epoch": 0.19250173424985811, "grad_norm": 119.69476185539511, "learning_rate": 1.8369510993365592e-05, "loss": 33.6132, "step": 12210 }, { "epoch": 0.19265939332786783, "grad_norm": 123.23761649809344, "learning_rate": 1.8366771864467615e-05, "loss": 33.8786, "step": 12220 }, { "epoch": 0.19281705240587754, "grad_norm": 127.80070310216057, "learning_rate": 1.8364030641287453e-05, "loss": 34.0519, "step": 12230 }, { "epoch": 0.19297471148388723, "grad_norm": 120.38307872496966, "learning_rate": 1.8361287324511256e-05, "loss": 33.6182, "step": 12240 }, { "epoch": 0.19313237056189694, "grad_norm": 125.2138565961602, "learning_rate": 1.8358541914825706e-05, "loss": 34.708, "step": 12250 }, { "epoch": 0.19329002963990666, "grad_norm": 120.90551005344305, "learning_rate": 1.8355794412918006e-05, "loss": 33.3657, "step": 12260 }, { "epoch": 0.19344768871791637, "grad_norm": 122.14237904662112, "learning_rate": 1.8353044819475874e-05, "loss": 34.0208, "step": 12270 }, { "epoch": 0.19360534779592609, "grad_norm": 117.84844036942182, "learning_rate": 1.835029313518757e-05, "loss": 34.5054, "step": 12280 }, { "epoch": 0.1937630068739358, "grad_norm": 118.18056841579279, "learning_rate": 1.8347539360741856e-05, "loss": 34.127, "step": 12290 }, { "epoch": 0.19392066595194551, "grad_norm": 122.35602479584811, "learning_rate": 1.8344783496828037e-05, "loss": 34.5348, "step": 12300 }, { "epoch": 0.19407832502995523, "grad_norm": 117.04207924370972, "learning_rate": 1.8342025544135924e-05, "loss": 34.4727, "step": 12310 }, { "epoch": 0.19423598410796494, "grad_norm": 120.45005097112488, "learning_rate": 1.8339265503355865e-05, "loss": 34.2892, "step": 12320 }, { "epoch": 0.19439364318597466, "grad_norm": 115.30489312106722, "learning_rate": 1.833650337517872e-05, "loss": 33.4492, "step": 12330 }, { "epoch": 0.19455130226398437, "grad_norm": 126.82470349494096, "learning_rate": 1.833373916029588e-05, "loss": 33.1718, "step": 12340 }, { "epoch": 0.19470896134199409, "grad_norm": 118.65109644169199, "learning_rate": 1.8330972859399247e-05, "loss": 33.3417, "step": 12350 }, { "epoch": 0.19486662042000377, "grad_norm": 122.48046377887778, "learning_rate": 1.832820447318126e-05, "loss": 34.7937, "step": 12360 }, { "epoch": 0.1950242794980135, "grad_norm": 114.58180797814482, "learning_rate": 1.8325434002334872e-05, "loss": 33.3619, "step": 12370 }, { "epoch": 0.1951819385760232, "grad_norm": 118.56144648195793, "learning_rate": 1.832266144755355e-05, "loss": 34.1038, "step": 12380 }, { "epoch": 0.19533959765403291, "grad_norm": 127.27866863297028, "learning_rate": 1.83198868095313e-05, "loss": 34.0453, "step": 12390 }, { "epoch": 0.19549725673204263, "grad_norm": 113.59811802953202, "learning_rate": 1.8317110088962636e-05, "loss": 34.2841, "step": 12400 }, { "epoch": 0.19565491581005234, "grad_norm": 120.81336828427736, "learning_rate": 1.8314331286542596e-05, "loss": 33.9361, "step": 12410 }, { "epoch": 0.19581257488806206, "grad_norm": 128.91758149499285, "learning_rate": 1.831155040296674e-05, "loss": 34.3692, "step": 12420 }, { "epoch": 0.19597023396607177, "grad_norm": 120.32760850679996, "learning_rate": 1.8308767438931155e-05, "loss": 33.9509, "step": 12430 }, { "epoch": 0.19612789304408149, "grad_norm": 124.92092335715225, "learning_rate": 1.830598239513244e-05, "loss": 34.1168, "step": 12440 }, { "epoch": 0.1962855521220912, "grad_norm": 121.22139838298051, "learning_rate": 1.8303195272267717e-05, "loss": 33.9769, "step": 12450 }, { "epoch": 0.19644321120010091, "grad_norm": 119.64848983653056, "learning_rate": 1.830040607103463e-05, "loss": 34.4136, "step": 12460 }, { "epoch": 0.1966008702781106, "grad_norm": 124.58293075126306, "learning_rate": 1.8297614792131342e-05, "loss": 33.7162, "step": 12470 }, { "epoch": 0.19675852935612032, "grad_norm": 122.3496681588902, "learning_rate": 1.8294821436256537e-05, "loss": 34.7356, "step": 12480 }, { "epoch": 0.19691618843413003, "grad_norm": 131.42420300491017, "learning_rate": 1.829202600410942e-05, "loss": 34.973, "step": 12490 }, { "epoch": 0.19707384751213974, "grad_norm": 144.09003420024158, "learning_rate": 1.8289228496389707e-05, "loss": 34.4658, "step": 12500 }, { "epoch": 0.19723150659014946, "grad_norm": 122.7599037115432, "learning_rate": 1.828642891379765e-05, "loss": 33.3716, "step": 12510 }, { "epoch": 0.19738916566815917, "grad_norm": 127.48568673263276, "learning_rate": 1.8283627257034007e-05, "loss": 34.5337, "step": 12520 }, { "epoch": 0.1975468247461689, "grad_norm": 127.13139592123132, "learning_rate": 1.8280823526800062e-05, "loss": 35.0646, "step": 12530 }, { "epoch": 0.1977044838241786, "grad_norm": 132.2393935942836, "learning_rate": 1.827801772379761e-05, "loss": 34.3195, "step": 12540 }, { "epoch": 0.19786214290218831, "grad_norm": 124.66462446975598, "learning_rate": 1.8275209848728974e-05, "loss": 34.5048, "step": 12550 }, { "epoch": 0.19801980198019803, "grad_norm": 122.52291550618962, "learning_rate": 1.827239990229699e-05, "loss": 33.8448, "step": 12560 }, { "epoch": 0.19817746105820774, "grad_norm": 127.57950319117225, "learning_rate": 1.8269587885205012e-05, "loss": 34.1621, "step": 12570 }, { "epoch": 0.19833512013621743, "grad_norm": 116.7161354579564, "learning_rate": 1.826677379815692e-05, "loss": 34.1568, "step": 12580 }, { "epoch": 0.19849277921422714, "grad_norm": 126.07397410358297, "learning_rate": 1.82639576418571e-05, "loss": 34.2432, "step": 12590 }, { "epoch": 0.19865043829223686, "grad_norm": 122.7811705675209, "learning_rate": 1.8261139417010468e-05, "loss": 33.9401, "step": 12600 }, { "epoch": 0.19880809737024657, "grad_norm": 112.40581015085394, "learning_rate": 1.825831912432245e-05, "loss": 34.165, "step": 12610 }, { "epoch": 0.1989657564482563, "grad_norm": 117.65373944370687, "learning_rate": 1.8255496764498996e-05, "loss": 33.34, "step": 12620 }, { "epoch": 0.199123415526266, "grad_norm": 115.40013102944786, "learning_rate": 1.8252672338246564e-05, "loss": 34.2337, "step": 12630 }, { "epoch": 0.19928107460427572, "grad_norm": 122.35834395841646, "learning_rate": 1.8249845846272135e-05, "loss": 34.2945, "step": 12640 }, { "epoch": 0.19943873368228543, "grad_norm": 121.22047623593079, "learning_rate": 1.824701728928321e-05, "loss": 33.8022, "step": 12650 }, { "epoch": 0.19959639276029514, "grad_norm": 126.7382279514013, "learning_rate": 1.82441866679878e-05, "loss": 33.8705, "step": 12660 }, { "epoch": 0.19975405183830486, "grad_norm": 116.42086999833968, "learning_rate": 1.8241353983094437e-05, "loss": 34.5943, "step": 12670 }, { "epoch": 0.19991171091631457, "grad_norm": 133.27796467812865, "learning_rate": 1.8238519235312174e-05, "loss": 34.1289, "step": 12680 }, { "epoch": 0.2000693699943243, "grad_norm": 116.83951947080679, "learning_rate": 1.823568242535057e-05, "loss": 33.8675, "step": 12690 }, { "epoch": 0.20022702907233397, "grad_norm": 121.92967199541248, "learning_rate": 1.8232843553919703e-05, "loss": 33.519, "step": 12700 }, { "epoch": 0.2003846881503437, "grad_norm": 112.34388425898653, "learning_rate": 1.8230002621730175e-05, "loss": 33.9208, "step": 12710 }, { "epoch": 0.2005423472283534, "grad_norm": 123.66215062804035, "learning_rate": 1.82271596294931e-05, "loss": 33.5944, "step": 12720 }, { "epoch": 0.20070000630636312, "grad_norm": 111.74204717481216, "learning_rate": 1.8224314577920096e-05, "loss": 32.507, "step": 12730 }, { "epoch": 0.20085766538437283, "grad_norm": 119.32353758421455, "learning_rate": 1.8221467467723314e-05, "loss": 33.9482, "step": 12740 }, { "epoch": 0.20101532446238254, "grad_norm": 129.42930201065315, "learning_rate": 1.8218618299615413e-05, "loss": 34.0101, "step": 12750 }, { "epoch": 0.20117298354039226, "grad_norm": 119.4271905268123, "learning_rate": 1.8215767074309563e-05, "loss": 33.0678, "step": 12760 }, { "epoch": 0.20133064261840197, "grad_norm": 116.20001638541385, "learning_rate": 1.8212913792519457e-05, "loss": 33.3407, "step": 12770 }, { "epoch": 0.2014883016964117, "grad_norm": 132.72374707575128, "learning_rate": 1.821005845495929e-05, "loss": 34.1793, "step": 12780 }, { "epoch": 0.2016459607744214, "grad_norm": 128.49412953781138, "learning_rate": 1.8207201062343793e-05, "loss": 33.2769, "step": 12790 }, { "epoch": 0.20180361985243112, "grad_norm": 122.95539321413649, "learning_rate": 1.8204341615388186e-05, "loss": 33.2217, "step": 12800 }, { "epoch": 0.2019612789304408, "grad_norm": 122.03670574178733, "learning_rate": 1.820148011480822e-05, "loss": 34.1153, "step": 12810 }, { "epoch": 0.20211893800845052, "grad_norm": 116.72426644361725, "learning_rate": 1.8198616561320165e-05, "loss": 33.9547, "step": 12820 }, { "epoch": 0.20227659708646023, "grad_norm": 127.49000405997016, "learning_rate": 1.8195750955640778e-05, "loss": 34.1477, "step": 12830 }, { "epoch": 0.20243425616446994, "grad_norm": 123.16248720998799, "learning_rate": 1.819288329848736e-05, "loss": 33.0406, "step": 12840 }, { "epoch": 0.20259191524247966, "grad_norm": 125.01842771174154, "learning_rate": 1.819001359057771e-05, "loss": 34.1103, "step": 12850 }, { "epoch": 0.20274957432048937, "grad_norm": 175.0863893809011, "learning_rate": 1.818714183263014e-05, "loss": 34.7864, "step": 12860 }, { "epoch": 0.2029072333984991, "grad_norm": 120.74515811415361, "learning_rate": 1.818426802536348e-05, "loss": 33.4905, "step": 12870 }, { "epoch": 0.2030648924765088, "grad_norm": 115.45310195484309, "learning_rate": 1.8181392169497073e-05, "loss": 32.1757, "step": 12880 }, { "epoch": 0.20322255155451852, "grad_norm": 120.77832658671862, "learning_rate": 1.817851426575077e-05, "loss": 34.0868, "step": 12890 }, { "epoch": 0.20338021063252823, "grad_norm": 118.16280141905445, "learning_rate": 1.817563431484494e-05, "loss": 34.4083, "step": 12900 }, { "epoch": 0.20353786971053794, "grad_norm": 124.25940364599894, "learning_rate": 1.8172752317500463e-05, "loss": 32.5214, "step": 12910 }, { "epoch": 0.20369552878854766, "grad_norm": 113.94365298032784, "learning_rate": 1.8169868274438725e-05, "loss": 32.6276, "step": 12920 }, { "epoch": 0.20385318786655734, "grad_norm": 118.97249703322662, "learning_rate": 1.8166982186381633e-05, "loss": 33.659, "step": 12930 }, { "epoch": 0.20401084694456706, "grad_norm": 121.70647981774007, "learning_rate": 1.8164094054051603e-05, "loss": 33.3157, "step": 12940 }, { "epoch": 0.20416850602257677, "grad_norm": 120.59972568998622, "learning_rate": 1.816120387817156e-05, "loss": 34.3487, "step": 12950 }, { "epoch": 0.2043261651005865, "grad_norm": 120.03681589031817, "learning_rate": 1.815831165946494e-05, "loss": 33.8407, "step": 12960 }, { "epoch": 0.2044838241785962, "grad_norm": 117.17439180878492, "learning_rate": 1.81554173986557e-05, "loss": 33.1261, "step": 12970 }, { "epoch": 0.20464148325660592, "grad_norm": 125.24778285317161, "learning_rate": 1.815252109646829e-05, "loss": 34.1839, "step": 12980 }, { "epoch": 0.20479914233461563, "grad_norm": 120.59293450912496, "learning_rate": 1.8149622753627693e-05, "loss": 33.2117, "step": 12990 }, { "epoch": 0.20495680141262534, "grad_norm": 123.94032204754522, "learning_rate": 1.8146722370859387e-05, "loss": 33.3195, "step": 13000 }, { "epoch": 0.20511446049063506, "grad_norm": 107.81584517522266, "learning_rate": 1.8143819948889363e-05, "loss": 33.1313, "step": 13010 }, { "epoch": 0.20527211956864477, "grad_norm": 117.55150507749795, "learning_rate": 1.814091548844413e-05, "loss": 32.5763, "step": 13020 }, { "epoch": 0.2054297786466545, "grad_norm": 123.70750657788699, "learning_rate": 1.8138008990250694e-05, "loss": 34.0794, "step": 13030 }, { "epoch": 0.20558743772466417, "grad_norm": 132.2543384690559, "learning_rate": 1.8135100455036587e-05, "loss": 33.0696, "step": 13040 }, { "epoch": 0.2057450968026739, "grad_norm": 114.12499667386892, "learning_rate": 1.813218988352984e-05, "loss": 33.3993, "step": 13050 }, { "epoch": 0.2059027558806836, "grad_norm": 115.53855793896092, "learning_rate": 1.8129277276458994e-05, "loss": 32.8549, "step": 13060 }, { "epoch": 0.20606041495869332, "grad_norm": 118.41054249387038, "learning_rate": 1.812636263455311e-05, "loss": 33.3164, "step": 13070 }, { "epoch": 0.20621807403670303, "grad_norm": 117.934978914325, "learning_rate": 1.8123445958541738e-05, "loss": 33.2124, "step": 13080 }, { "epoch": 0.20637573311471274, "grad_norm": 113.55627249536822, "learning_rate": 1.812052724915496e-05, "loss": 33.4031, "step": 13090 }, { "epoch": 0.20653339219272246, "grad_norm": 118.46169800290508, "learning_rate": 1.8117606507123353e-05, "loss": 33.024, "step": 13100 }, { "epoch": 0.20669105127073217, "grad_norm": 124.28643705329253, "learning_rate": 1.8114683733178005e-05, "loss": 33.5397, "step": 13110 }, { "epoch": 0.2068487103487419, "grad_norm": 127.97675123511837, "learning_rate": 1.8111758928050514e-05, "loss": 34.2436, "step": 13120 }, { "epoch": 0.2070063694267516, "grad_norm": 119.60053935459098, "learning_rate": 1.810883209247299e-05, "loss": 33.2881, "step": 13130 }, { "epoch": 0.20716402850476132, "grad_norm": 126.76376844486332, "learning_rate": 1.8105903227178045e-05, "loss": 32.7994, "step": 13140 }, { "epoch": 0.207321687582771, "grad_norm": 120.00481323719288, "learning_rate": 1.81029723328988e-05, "loss": 33.319, "step": 13150 }, { "epoch": 0.20747934666078072, "grad_norm": 116.47250009025348, "learning_rate": 1.8100039410368895e-05, "loss": 33.4766, "step": 13160 }, { "epoch": 0.20763700573879043, "grad_norm": 124.3787995512414, "learning_rate": 1.8097104460322455e-05, "loss": 34.4423, "step": 13170 }, { "epoch": 0.20779466481680015, "grad_norm": 117.19040050435694, "learning_rate": 1.809416748349413e-05, "loss": 32.5767, "step": 13180 }, { "epoch": 0.20795232389480986, "grad_norm": 121.53714432634426, "learning_rate": 1.8091228480619078e-05, "loss": 32.6273, "step": 13190 }, { "epoch": 0.20810998297281957, "grad_norm": 120.02019841852434, "learning_rate": 1.8088287452432957e-05, "loss": 33.443, "step": 13200 }, { "epoch": 0.2082676420508293, "grad_norm": 120.16171380705055, "learning_rate": 1.8085344399671932e-05, "loss": 33.7533, "step": 13210 }, { "epoch": 0.208425301128839, "grad_norm": 124.22968096034336, "learning_rate": 1.808239932307268e-05, "loss": 33.1421, "step": 13220 }, { "epoch": 0.20858296020684872, "grad_norm": 120.84152884061251, "learning_rate": 1.8079452223372377e-05, "loss": 32.4851, "step": 13230 }, { "epoch": 0.20874061928485843, "grad_norm": 119.88410473197118, "learning_rate": 1.8076503101308712e-05, "loss": 32.7075, "step": 13240 }, { "epoch": 0.20889827836286814, "grad_norm": 128.3765490515231, "learning_rate": 1.8073551957619883e-05, "loss": 32.9421, "step": 13250 }, { "epoch": 0.20905593744087786, "grad_norm": 120.10268381537071, "learning_rate": 1.807059879304458e-05, "loss": 32.9457, "step": 13260 }, { "epoch": 0.20921359651888755, "grad_norm": 118.66117109892092, "learning_rate": 1.8067643608322018e-05, "loss": 32.3254, "step": 13270 }, { "epoch": 0.20937125559689726, "grad_norm": 120.38547742748051, "learning_rate": 1.80646864041919e-05, "loss": 33.6323, "step": 13280 }, { "epoch": 0.20952891467490697, "grad_norm": 112.85779197281698, "learning_rate": 1.8061727181394448e-05, "loss": 32.4308, "step": 13290 }, { "epoch": 0.2096865737529167, "grad_norm": 113.74646758420087, "learning_rate": 1.8058765940670378e-05, "loss": 33.2737, "step": 13300 }, { "epoch": 0.2098442328309264, "grad_norm": 121.3986333356734, "learning_rate": 1.8055802682760915e-05, "loss": 34.1649, "step": 13310 }, { "epoch": 0.21000189190893612, "grad_norm": 107.84242100482331, "learning_rate": 1.80528374084078e-05, "loss": 33.691, "step": 13320 }, { "epoch": 0.21015955098694583, "grad_norm": 118.0813709111929, "learning_rate": 1.8049870118353264e-05, "loss": 33.8638, "step": 13330 }, { "epoch": 0.21031721006495555, "grad_norm": 116.73284642638434, "learning_rate": 1.804690081334005e-05, "loss": 33.0383, "step": 13340 }, { "epoch": 0.21047486914296526, "grad_norm": 122.87693222049433, "learning_rate": 1.8043929494111396e-05, "loss": 33.6088, "step": 13350 }, { "epoch": 0.21063252822097497, "grad_norm": 118.2549412597334, "learning_rate": 1.8040956161411058e-05, "loss": 32.4149, "step": 13360 }, { "epoch": 0.2107901872989847, "grad_norm": 114.59211197433332, "learning_rate": 1.8037980815983285e-05, "loss": 33.1954, "step": 13370 }, { "epoch": 0.21094784637699437, "grad_norm": 117.56989771444957, "learning_rate": 1.8035003458572837e-05, "loss": 32.7755, "step": 13380 }, { "epoch": 0.2111055054550041, "grad_norm": 122.20378087783519, "learning_rate": 1.8032024089924977e-05, "loss": 33.1927, "step": 13390 }, { "epoch": 0.2112631645330138, "grad_norm": 114.84981312763642, "learning_rate": 1.8029042710785466e-05, "loss": 33.2722, "step": 13400 }, { "epoch": 0.21142082361102352, "grad_norm": 115.63100054245707, "learning_rate": 1.8026059321900568e-05, "loss": 33.0916, "step": 13410 }, { "epoch": 0.21157848268903323, "grad_norm": 120.6206936450646, "learning_rate": 1.802307392401706e-05, "loss": 32.7331, "step": 13420 }, { "epoch": 0.21173614176704295, "grad_norm": 122.43339007501883, "learning_rate": 1.8020086517882214e-05, "loss": 32.6251, "step": 13430 }, { "epoch": 0.21189380084505266, "grad_norm": 112.49624133184037, "learning_rate": 1.8017097104243805e-05, "loss": 33.0702, "step": 13440 }, { "epoch": 0.21205145992306237, "grad_norm": 126.94975584307896, "learning_rate": 1.801410568385011e-05, "loss": 32.6521, "step": 13450 }, { "epoch": 0.2122091190010721, "grad_norm": 115.07171991332606, "learning_rate": 1.801111225744991e-05, "loss": 33.3908, "step": 13460 }, { "epoch": 0.2123667780790818, "grad_norm": 127.44126865670552, "learning_rate": 1.8008116825792487e-05, "loss": 33.2822, "step": 13470 }, { "epoch": 0.21252443715709152, "grad_norm": 120.03687757799358, "learning_rate": 1.8005119389627634e-05, "loss": 33.1449, "step": 13480 }, { "epoch": 0.2126820962351012, "grad_norm": 125.22645962238954, "learning_rate": 1.8002119949705626e-05, "loss": 34.2096, "step": 13490 }, { "epoch": 0.21283975531311092, "grad_norm": 117.00419330138932, "learning_rate": 1.799911850677726e-05, "loss": 33.8721, "step": 13500 }, { "epoch": 0.21299741439112063, "grad_norm": 109.86731289293004, "learning_rate": 1.7996115061593823e-05, "loss": 32.9452, "step": 13510 }, { "epoch": 0.21315507346913035, "grad_norm": 120.7796365623038, "learning_rate": 1.7993109614907102e-05, "loss": 32.6747, "step": 13520 }, { "epoch": 0.21331273254714006, "grad_norm": 115.99002063956495, "learning_rate": 1.7990102167469396e-05, "loss": 32.4606, "step": 13530 }, { "epoch": 0.21347039162514977, "grad_norm": 131.0386698916122, "learning_rate": 1.798709272003349e-05, "loss": 33.647, "step": 13540 }, { "epoch": 0.2136280507031595, "grad_norm": 113.73148740536077, "learning_rate": 1.798408127335268e-05, "loss": 32.8981, "step": 13550 }, { "epoch": 0.2137857097811692, "grad_norm": 123.75759288800656, "learning_rate": 1.798106782818076e-05, "loss": 33.955, "step": 13560 }, { "epoch": 0.21394336885917892, "grad_norm": 119.77903358957255, "learning_rate": 1.797805238527203e-05, "loss": 32.8878, "step": 13570 }, { "epoch": 0.21410102793718863, "grad_norm": 116.02466264122972, "learning_rate": 1.797503494538127e-05, "loss": 33.2681, "step": 13580 }, { "epoch": 0.21425868701519835, "grad_norm": 115.33700774412675, "learning_rate": 1.797201550926379e-05, "loss": 32.5826, "step": 13590 }, { "epoch": 0.21441634609320806, "grad_norm": 115.59661409672051, "learning_rate": 1.7968994077675365e-05, "loss": 33.7887, "step": 13600 }, { "epoch": 0.21457400517121775, "grad_norm": 118.05453624652056, "learning_rate": 1.7965970651372306e-05, "loss": 32.6164, "step": 13610 }, { "epoch": 0.21473166424922746, "grad_norm": 383.3810378660345, "learning_rate": 1.7962945231111395e-05, "loss": 33.4815, "step": 13620 }, { "epoch": 0.21488932332723717, "grad_norm": 128.38963242960142, "learning_rate": 1.7959917817649924e-05, "loss": 32.9621, "step": 13630 }, { "epoch": 0.2150469824052469, "grad_norm": 126.8664677702243, "learning_rate": 1.795688841174569e-05, "loss": 33.1962, "step": 13640 }, { "epoch": 0.2152046414832566, "grad_norm": 121.08340539574664, "learning_rate": 1.7953857014156973e-05, "loss": 32.6198, "step": 13650 }, { "epoch": 0.21536230056126632, "grad_norm": 118.19544111874467, "learning_rate": 1.795082362564256e-05, "loss": 33.6343, "step": 13660 }, { "epoch": 0.21551995963927603, "grad_norm": 123.30360761172089, "learning_rate": 1.7947788246961744e-05, "loss": 32.7033, "step": 13670 }, { "epoch": 0.21567761871728575, "grad_norm": 113.40682522476855, "learning_rate": 1.794475087887431e-05, "loss": 32.5396, "step": 13680 }, { "epoch": 0.21583527779529546, "grad_norm": 120.46951444422223, "learning_rate": 1.7941711522140536e-05, "loss": 32.2334, "step": 13690 }, { "epoch": 0.21599293687330517, "grad_norm": 1179.9258402870644, "learning_rate": 1.7938670177521202e-05, "loss": 33.2274, "step": 13700 }, { "epoch": 0.2161505959513149, "grad_norm": 125.6507662735651, "learning_rate": 1.793562684577758e-05, "loss": 33.3302, "step": 13710 }, { "epoch": 0.21630825502932458, "grad_norm": 113.24779559687818, "learning_rate": 1.7932581527671456e-05, "loss": 32.9348, "step": 13720 }, { "epoch": 0.2164659141073343, "grad_norm": 111.00972575213548, "learning_rate": 1.7929534223965094e-05, "loss": 32.3957, "step": 13730 }, { "epoch": 0.216623573185344, "grad_norm": 115.0274997823543, "learning_rate": 1.7926484935421264e-05, "loss": 32.6102, "step": 13740 }, { "epoch": 0.21678123226335372, "grad_norm": 144.29077325696937, "learning_rate": 1.792343366280323e-05, "loss": 32.8485, "step": 13750 }, { "epoch": 0.21693889134136343, "grad_norm": 113.5983081116414, "learning_rate": 1.792038040687476e-05, "loss": 33.3512, "step": 13760 }, { "epoch": 0.21709655041937315, "grad_norm": 124.45937925899015, "learning_rate": 1.791732516840011e-05, "loss": 33.7852, "step": 13770 }, { "epoch": 0.21725420949738286, "grad_norm": 113.71414996455752, "learning_rate": 1.7914267948144034e-05, "loss": 33.3281, "step": 13780 }, { "epoch": 0.21741186857539258, "grad_norm": 115.52791699150278, "learning_rate": 1.7911208746871787e-05, "loss": 32.6262, "step": 13790 }, { "epoch": 0.2175695276534023, "grad_norm": 119.30746258369437, "learning_rate": 1.790814756534911e-05, "loss": 34.4293, "step": 13800 }, { "epoch": 0.217727186731412, "grad_norm": 113.51274570106743, "learning_rate": 1.7905084404342252e-05, "loss": 32.5554, "step": 13810 }, { "epoch": 0.21788484580942172, "grad_norm": 113.10779704123301, "learning_rate": 1.7902019264617946e-05, "loss": 33.4119, "step": 13820 }, { "epoch": 0.21804250488743143, "grad_norm": 121.0687292875216, "learning_rate": 1.789895214694343e-05, "loss": 33.1101, "step": 13830 }, { "epoch": 0.21820016396544112, "grad_norm": 120.56384266868731, "learning_rate": 1.7895883052086427e-05, "loss": 32.5338, "step": 13840 }, { "epoch": 0.21835782304345083, "grad_norm": 113.78508438702056, "learning_rate": 1.7892811980815167e-05, "loss": 32.2084, "step": 13850 }, { "epoch": 0.21851548212146055, "grad_norm": 129.13865529494353, "learning_rate": 1.7889738933898365e-05, "loss": 32.8847, "step": 13860 }, { "epoch": 0.21867314119947026, "grad_norm": 116.46694090402026, "learning_rate": 1.788666391210523e-05, "loss": 33.124, "step": 13870 }, { "epoch": 0.21883080027747998, "grad_norm": 106.25200933271206, "learning_rate": 1.7883586916205474e-05, "loss": 32.679, "step": 13880 }, { "epoch": 0.2189884593554897, "grad_norm": 122.65213366337643, "learning_rate": 1.7880507946969298e-05, "loss": 34.1165, "step": 13890 }, { "epoch": 0.2191461184334994, "grad_norm": 121.87656850801798, "learning_rate": 1.7877427005167395e-05, "loss": 32.5612, "step": 13900 }, { "epoch": 0.21930377751150912, "grad_norm": 122.26676364007575, "learning_rate": 1.7874344091570956e-05, "loss": 33.5155, "step": 13910 }, { "epoch": 0.21946143658951883, "grad_norm": 110.53168685230484, "learning_rate": 1.7871259206951664e-05, "loss": 32.1574, "step": 13920 }, { "epoch": 0.21961909566752855, "grad_norm": 120.32773880016208, "learning_rate": 1.7868172352081692e-05, "loss": 33.2288, "step": 13930 }, { "epoch": 0.21977675474553826, "grad_norm": 118.27259032125738, "learning_rate": 1.786508352773371e-05, "loss": 33.8868, "step": 13940 }, { "epoch": 0.21993441382354795, "grad_norm": 115.5032548295694, "learning_rate": 1.786199273468088e-05, "loss": 32.4591, "step": 13950 }, { "epoch": 0.22009207290155766, "grad_norm": 126.02504111584743, "learning_rate": 1.7858899973696858e-05, "loss": 32.6634, "step": 13960 }, { "epoch": 0.22024973197956738, "grad_norm": 108.44954239443773, "learning_rate": 1.7855805245555786e-05, "loss": 32.4561, "step": 13970 }, { "epoch": 0.2204073910575771, "grad_norm": 113.06332257529859, "learning_rate": 1.785270855103231e-05, "loss": 32.5646, "step": 13980 }, { "epoch": 0.2205650501355868, "grad_norm": 118.98414865423653, "learning_rate": 1.7849609890901563e-05, "loss": 32.3141, "step": 13990 }, { "epoch": 0.22072270921359652, "grad_norm": 131.98685328933396, "learning_rate": 1.7846509265939164e-05, "loss": 33.1617, "step": 14000 }, { "epoch": 0.22088036829160623, "grad_norm": 105.96292537792819, "learning_rate": 1.7843406676921232e-05, "loss": 32.5754, "step": 14010 }, { "epoch": 0.22103802736961595, "grad_norm": 127.53915041848308, "learning_rate": 1.7840302124624372e-05, "loss": 33.1435, "step": 14020 }, { "epoch": 0.22119568644762566, "grad_norm": 114.38972497592322, "learning_rate": 1.7837195609825682e-05, "loss": 32.5555, "step": 14030 }, { "epoch": 0.22135334552563538, "grad_norm": 114.58262921544198, "learning_rate": 1.783408713330276e-05, "loss": 32.7092, "step": 14040 }, { "epoch": 0.2215110046036451, "grad_norm": 113.29808969395758, "learning_rate": 1.7830976695833673e-05, "loss": 32.6583, "step": 14050 }, { "epoch": 0.22166866368165478, "grad_norm": 123.09198139201511, "learning_rate": 1.7827864298197008e-05, "loss": 31.5214, "step": 14060 }, { "epoch": 0.2218263227596645, "grad_norm": 119.08007168515694, "learning_rate": 1.7824749941171816e-05, "loss": 32.8745, "step": 14070 }, { "epoch": 0.2219839818376742, "grad_norm": 109.58926069768533, "learning_rate": 1.7821633625537656e-05, "loss": 32.1273, "step": 14080 }, { "epoch": 0.22214164091568392, "grad_norm": 113.30531469946371, "learning_rate": 1.7818515352074574e-05, "loss": 33.8593, "step": 14090 }, { "epoch": 0.22229929999369363, "grad_norm": 116.20883716158625, "learning_rate": 1.7815395121563094e-05, "loss": 32.3359, "step": 14100 }, { "epoch": 0.22245695907170335, "grad_norm": 113.72437869718927, "learning_rate": 1.7812272934784245e-05, "loss": 32.5208, "step": 14110 }, { "epoch": 0.22261461814971306, "grad_norm": 223.84921380474802, "learning_rate": 1.7809148792519543e-05, "loss": 32.7381, "step": 14120 }, { "epoch": 0.22277227722772278, "grad_norm": 113.84170908665419, "learning_rate": 1.7806022695550983e-05, "loss": 33.0456, "step": 14130 }, { "epoch": 0.2229299363057325, "grad_norm": 125.79635888880686, "learning_rate": 1.780289464466106e-05, "loss": 33.6416, "step": 14140 }, { "epoch": 0.2230875953837422, "grad_norm": 112.79323916206191, "learning_rate": 1.779976464063276e-05, "loss": 32.7181, "step": 14150 }, { "epoch": 0.22324525446175192, "grad_norm": 109.93353239338106, "learning_rate": 1.7796632684249545e-05, "loss": 32.378, "step": 14160 }, { "epoch": 0.22340291353976163, "grad_norm": 127.46602705694391, "learning_rate": 1.7793498776295375e-05, "loss": 31.5615, "step": 14170 }, { "epoch": 0.22356057261777132, "grad_norm": 118.8940444322011, "learning_rate": 1.77903629175547e-05, "loss": 32.9386, "step": 14180 }, { "epoch": 0.22371823169578103, "grad_norm": 117.95181475285348, "learning_rate": 1.778722510881245e-05, "loss": 33.9357, "step": 14190 }, { "epoch": 0.22387589077379075, "grad_norm": 119.48483007517312, "learning_rate": 1.7784085350854058e-05, "loss": 33.1897, "step": 14200 }, { "epoch": 0.22403354985180046, "grad_norm": 119.7293358806565, "learning_rate": 1.7780943644465423e-05, "loss": 32.6387, "step": 14210 }, { "epoch": 0.22419120892981018, "grad_norm": 120.28069700703678, "learning_rate": 1.777779999043295e-05, "loss": 32.685, "step": 14220 }, { "epoch": 0.2243488680078199, "grad_norm": 121.86065328691497, "learning_rate": 1.7774654389543526e-05, "loss": 32.6555, "step": 14230 }, { "epoch": 0.2245065270858296, "grad_norm": 113.00031260877584, "learning_rate": 1.7771506842584524e-05, "loss": 32.9612, "step": 14240 }, { "epoch": 0.22466418616383932, "grad_norm": 118.36555701779736, "learning_rate": 1.77683573503438e-05, "loss": 31.9237, "step": 14250 }, { "epoch": 0.22482184524184903, "grad_norm": 124.54011790469947, "learning_rate": 1.776520591360971e-05, "loss": 33.3795, "step": 14260 }, { "epoch": 0.22497950431985875, "grad_norm": 118.98934924807108, "learning_rate": 1.7762052533171085e-05, "loss": 32.6839, "step": 14270 }, { "epoch": 0.22513716339786846, "grad_norm": 112.33769991463838, "learning_rate": 1.775889720981724e-05, "loss": 32.5676, "step": 14280 }, { "epoch": 0.22529482247587815, "grad_norm": 115.53529595627788, "learning_rate": 1.7755739944337992e-05, "loss": 32.3498, "step": 14290 }, { "epoch": 0.22545248155388786, "grad_norm": 111.28423470242326, "learning_rate": 1.775258073752363e-05, "loss": 32.4294, "step": 14300 }, { "epoch": 0.22561014063189758, "grad_norm": 119.21733257989995, "learning_rate": 1.7749419590164933e-05, "loss": 32.5936, "step": 14310 }, { "epoch": 0.2257677997099073, "grad_norm": 127.68625447311904, "learning_rate": 1.7746256503053165e-05, "loss": 33.7718, "step": 14320 }, { "epoch": 0.225925458787917, "grad_norm": 124.5221681942007, "learning_rate": 1.774309147698008e-05, "loss": 32.4419, "step": 14330 }, { "epoch": 0.22608311786592672, "grad_norm": 118.34570371237083, "learning_rate": 1.773992451273791e-05, "loss": 32.9091, "step": 14340 }, { "epoch": 0.22624077694393643, "grad_norm": 117.53057377473066, "learning_rate": 1.773675561111938e-05, "loss": 32.6145, "step": 14350 }, { "epoch": 0.22639843602194615, "grad_norm": 123.49683523091399, "learning_rate": 1.7733584772917695e-05, "loss": 32.6941, "step": 14360 }, { "epoch": 0.22655609509995586, "grad_norm": 117.29585544627167, "learning_rate": 1.7730411998926543e-05, "loss": 32.2381, "step": 14370 }, { "epoch": 0.22671375417796558, "grad_norm": 112.35730072164037, "learning_rate": 1.7727237289940103e-05, "loss": 32.5052, "step": 14380 }, { "epoch": 0.2268714132559753, "grad_norm": 118.09767698512306, "learning_rate": 1.7724060646753033e-05, "loss": 32.9161, "step": 14390 }, { "epoch": 0.22702907233398498, "grad_norm": 111.86028321022454, "learning_rate": 1.7720882070160477e-05, "loss": 32.062, "step": 14400 }, { "epoch": 0.2271867314119947, "grad_norm": 120.50259329812336, "learning_rate": 1.7717701560958063e-05, "loss": 32.0409, "step": 14410 }, { "epoch": 0.2273443904900044, "grad_norm": 116.02371748089028, "learning_rate": 1.77145191199419e-05, "loss": 32.1703, "step": 14420 }, { "epoch": 0.22750204956801412, "grad_norm": 111.38024287063365, "learning_rate": 1.771133474790859e-05, "loss": 31.5958, "step": 14430 }, { "epoch": 0.22765970864602383, "grad_norm": 112.55597784039146, "learning_rate": 1.7708148445655203e-05, "loss": 32.4433, "step": 14440 }, { "epoch": 0.22781736772403355, "grad_norm": 113.65987346024798, "learning_rate": 1.7704960213979308e-05, "loss": 33.184, "step": 14450 }, { "epoch": 0.22797502680204326, "grad_norm": 117.05464112856005, "learning_rate": 1.7701770053678943e-05, "loss": 32.5829, "step": 14460 }, { "epoch": 0.22813268588005298, "grad_norm": 123.97883646112771, "learning_rate": 1.769857796555264e-05, "loss": 32.7575, "step": 14470 }, { "epoch": 0.2282903449580627, "grad_norm": 113.60278491780734, "learning_rate": 1.7695383950399407e-05, "loss": 31.9047, "step": 14480 }, { "epoch": 0.2284480040360724, "grad_norm": 118.5731281488291, "learning_rate": 1.7692188009018736e-05, "loss": 33.4752, "step": 14490 }, { "epoch": 0.22860566311408212, "grad_norm": 116.24930630425298, "learning_rate": 1.7688990142210603e-05, "loss": 33.1266, "step": 14500 }, { "epoch": 0.22876332219209183, "grad_norm": 110.7559353028009, "learning_rate": 1.768579035077546e-05, "loss": 32.3636, "step": 14510 }, { "epoch": 0.22892098127010152, "grad_norm": 117.70468828280264, "learning_rate": 1.7682588635514254e-05, "loss": 32.1485, "step": 14520 }, { "epoch": 0.22907864034811123, "grad_norm": 106.26998545136603, "learning_rate": 1.7679384997228394e-05, "loss": 31.8258, "step": 14530 }, { "epoch": 0.22923629942612095, "grad_norm": 111.15585515951912, "learning_rate": 1.767617943671979e-05, "loss": 32.9148, "step": 14540 }, { "epoch": 0.22939395850413066, "grad_norm": 106.51088734588805, "learning_rate": 1.7672971954790818e-05, "loss": 31.6275, "step": 14550 }, { "epoch": 0.22955161758214038, "grad_norm": 125.85037654820145, "learning_rate": 1.7669762552244344e-05, "loss": 32.3315, "step": 14560 }, { "epoch": 0.2297092766601501, "grad_norm": 109.95272728264777, "learning_rate": 1.766655122988371e-05, "loss": 32.9875, "step": 14570 }, { "epoch": 0.2298669357381598, "grad_norm": 131.22908177353642, "learning_rate": 1.766333798851274e-05, "loss": 32.5569, "step": 14580 }, { "epoch": 0.23002459481616952, "grad_norm": 116.55996456403919, "learning_rate": 1.7660122828935743e-05, "loss": 32.5698, "step": 14590 }, { "epoch": 0.23018225389417923, "grad_norm": 117.62520077291346, "learning_rate": 1.76569057519575e-05, "loss": 32.3841, "step": 14600 }, { "epoch": 0.23033991297218895, "grad_norm": 118.87393970791551, "learning_rate": 1.7653686758383277e-05, "loss": 32.4323, "step": 14610 }, { "epoch": 0.23049757205019866, "grad_norm": 124.15686225891139, "learning_rate": 1.765046584901882e-05, "loss": 32.4678, "step": 14620 }, { "epoch": 0.23065523112820835, "grad_norm": 120.9887810599614, "learning_rate": 1.764724302467035e-05, "loss": 33.5071, "step": 14630 }, { "epoch": 0.23081289020621806, "grad_norm": 113.39438988981202, "learning_rate": 1.764401828614457e-05, "loss": 31.7474, "step": 14640 }, { "epoch": 0.23097054928422778, "grad_norm": 109.21583581282917, "learning_rate": 1.7640791634248667e-05, "loss": 32.965, "step": 14650 }, { "epoch": 0.2311282083622375, "grad_norm": 120.7672343507009, "learning_rate": 1.7637563069790298e-05, "loss": 33.1872, "step": 14660 }, { "epoch": 0.2312858674402472, "grad_norm": 117.06952961180531, "learning_rate": 1.763433259357761e-05, "loss": 31.5481, "step": 14670 }, { "epoch": 0.23144352651825692, "grad_norm": 110.52904317093201, "learning_rate": 1.763110020641921e-05, "loss": 32.8888, "step": 14680 }, { "epoch": 0.23160118559626663, "grad_norm": 124.66006083510199, "learning_rate": 1.7627865909124208e-05, "loss": 32.3841, "step": 14690 }, { "epoch": 0.23175884467427635, "grad_norm": 117.46903894587997, "learning_rate": 1.762462970250217e-05, "loss": 31.4981, "step": 14700 }, { "epoch": 0.23191650375228606, "grad_norm": 111.26474845681236, "learning_rate": 1.762139158736315e-05, "loss": 32.5848, "step": 14710 }, { "epoch": 0.23207416283029578, "grad_norm": 117.54875679193303, "learning_rate": 1.7618151564517684e-05, "loss": 32.9431, "step": 14720 }, { "epoch": 0.2322318219083055, "grad_norm": 110.81322407222864, "learning_rate": 1.761490963477678e-05, "loss": 32.217, "step": 14730 }, { "epoch": 0.2323894809863152, "grad_norm": 121.41278968094927, "learning_rate": 1.761166579895192e-05, "loss": 31.8222, "step": 14740 }, { "epoch": 0.2325471400643249, "grad_norm": 110.771497247778, "learning_rate": 1.7608420057855068e-05, "loss": 32.1063, "step": 14750 }, { "epoch": 0.2327047991423346, "grad_norm": 117.16637228684458, "learning_rate": 1.7605172412298666e-05, "loss": 32.6105, "step": 14760 }, { "epoch": 0.23286245822034432, "grad_norm": 110.9188759852896, "learning_rate": 1.7601922863095625e-05, "loss": 31.262, "step": 14770 }, { "epoch": 0.23302011729835403, "grad_norm": 124.49202674931131, "learning_rate": 1.7598671411059342e-05, "loss": 32.2842, "step": 14780 }, { "epoch": 0.23317777637636375, "grad_norm": 107.89008250880657, "learning_rate": 1.7595418057003688e-05, "loss": 31.7016, "step": 14790 }, { "epoch": 0.23333543545437346, "grad_norm": 112.34609761343295, "learning_rate": 1.7592162801743004e-05, "loss": 31.9684, "step": 14800 }, { "epoch": 0.23349309453238318, "grad_norm": 115.04263685885714, "learning_rate": 1.758890564609211e-05, "loss": 31.8578, "step": 14810 }, { "epoch": 0.2336507536103929, "grad_norm": 115.82980481981973, "learning_rate": 1.758564659086631e-05, "loss": 32.2909, "step": 14820 }, { "epoch": 0.2338084126884026, "grad_norm": 160.89556085340627, "learning_rate": 1.758238563688137e-05, "loss": 32.8412, "step": 14830 }, { "epoch": 0.23396607176641232, "grad_norm": 113.87238087845851, "learning_rate": 1.7579122784953543e-05, "loss": 32.509, "step": 14840 }, { "epoch": 0.23412373084442203, "grad_norm": 112.6986102436653, "learning_rate": 1.7575858035899547e-05, "loss": 31.302, "step": 14850 }, { "epoch": 0.23428138992243172, "grad_norm": 114.8740419386669, "learning_rate": 1.757259139053658e-05, "loss": 32.2087, "step": 14860 }, { "epoch": 0.23443904900044144, "grad_norm": 122.20742350812476, "learning_rate": 1.756932284968232e-05, "loss": 31.5681, "step": 14870 }, { "epoch": 0.23459670807845115, "grad_norm": 117.0740141819061, "learning_rate": 1.7566052414154903e-05, "loss": 31.9391, "step": 14880 }, { "epoch": 0.23475436715646086, "grad_norm": 111.47807376018692, "learning_rate": 1.7562780084772957e-05, "loss": 31.4742, "step": 14890 }, { "epoch": 0.23491202623447058, "grad_norm": 119.81307026109081, "learning_rate": 1.755950586235558e-05, "loss": 32.7646, "step": 14900 }, { "epoch": 0.2350696853124803, "grad_norm": 167.7750660943806, "learning_rate": 1.7556229747722335e-05, "loss": 32.4056, "step": 14910 }, { "epoch": 0.23522734439049, "grad_norm": 118.2545410360466, "learning_rate": 1.755295174169327e-05, "loss": 32.4, "step": 14920 }, { "epoch": 0.23538500346849972, "grad_norm": 106.13734444633687, "learning_rate": 1.7549671845088895e-05, "loss": 32.0823, "step": 14930 }, { "epoch": 0.23554266254650943, "grad_norm": 109.40336174910045, "learning_rate": 1.75463900587302e-05, "loss": 32.1905, "step": 14940 }, { "epoch": 0.23570032162451915, "grad_norm": 116.28463020538786, "learning_rate": 1.7543106383438647e-05, "loss": 32.1553, "step": 14950 }, { "epoch": 0.23585798070252886, "grad_norm": 115.2241503934489, "learning_rate": 1.7539820820036177e-05, "loss": 32.1639, "step": 14960 }, { "epoch": 0.23601563978053855, "grad_norm": 114.31654646364716, "learning_rate": 1.7536533369345194e-05, "loss": 32.6267, "step": 14970 }, { "epoch": 0.23617329885854826, "grad_norm": 114.18876720297565, "learning_rate": 1.7533244032188574e-05, "loss": 32.4633, "step": 14980 }, { "epoch": 0.23633095793655798, "grad_norm": 119.35492010075444, "learning_rate": 1.7529952809389673e-05, "loss": 31.9391, "step": 14990 }, { "epoch": 0.2364886170145677, "grad_norm": 114.60052455883292, "learning_rate": 1.7526659701772317e-05, "loss": 31.8052, "step": 15000 }, { "epoch": 0.2366462760925774, "grad_norm": 117.34738307522082, "learning_rate": 1.75233647101608e-05, "loss": 31.455, "step": 15010 }, { "epoch": 0.23680393517058712, "grad_norm": 114.56315129299284, "learning_rate": 1.752006783537989e-05, "loss": 31.3898, "step": 15020 }, { "epoch": 0.23696159424859684, "grad_norm": 114.4848372615369, "learning_rate": 1.7516769078254823e-05, "loss": 32.153, "step": 15030 }, { "epoch": 0.23711925332660655, "grad_norm": 112.09416865765021, "learning_rate": 1.7513468439611313e-05, "loss": 32.3461, "step": 15040 }, { "epoch": 0.23727691240461626, "grad_norm": 114.45371065399696, "learning_rate": 1.7510165920275535e-05, "loss": 31.2391, "step": 15050 }, { "epoch": 0.23743457148262598, "grad_norm": 115.1410330784839, "learning_rate": 1.750686152107415e-05, "loss": 32.3712, "step": 15060 }, { "epoch": 0.2375922305606357, "grad_norm": 112.30652861362059, "learning_rate": 1.750355524283428e-05, "loss": 32.5172, "step": 15070 }, { "epoch": 0.2377498896386454, "grad_norm": 122.17171111182992, "learning_rate": 1.7500247086383507e-05, "loss": 33.1847, "step": 15080 }, { "epoch": 0.2379075487166551, "grad_norm": 115.34139051944373, "learning_rate": 1.7496937052549903e-05, "loss": 32.6386, "step": 15090 }, { "epoch": 0.2380652077946648, "grad_norm": 122.2482816142332, "learning_rate": 1.7493625142162e-05, "loss": 32.8823, "step": 15100 }, { "epoch": 0.23822286687267452, "grad_norm": 108.25182486396534, "learning_rate": 1.74903113560488e-05, "loss": 31.8171, "step": 15110 }, { "epoch": 0.23838052595068424, "grad_norm": 123.30693317228248, "learning_rate": 1.7486995695039766e-05, "loss": 32.5395, "step": 15120 }, { "epoch": 0.23853818502869395, "grad_norm": 117.48626746726674, "learning_rate": 1.7483678159964855e-05, "loss": 32.5124, "step": 15130 }, { "epoch": 0.23869584410670366, "grad_norm": 112.81576556578192, "learning_rate": 1.7480358751654467e-05, "loss": 33.3237, "step": 15140 }, { "epoch": 0.23885350318471338, "grad_norm": 173.24673750159843, "learning_rate": 1.7477037470939487e-05, "loss": 32.1417, "step": 15150 }, { "epoch": 0.2390111622627231, "grad_norm": 111.97942068083854, "learning_rate": 1.7473714318651254e-05, "loss": 31.1486, "step": 15160 }, { "epoch": 0.2391688213407328, "grad_norm": 111.08593058529861, "learning_rate": 1.7470389295621595e-05, "loss": 32.6133, "step": 15170 }, { "epoch": 0.23932648041874252, "grad_norm": 107.5205254723614, "learning_rate": 1.7467062402682787e-05, "loss": 32.3855, "step": 15180 }, { "epoch": 0.23948413949675224, "grad_norm": 115.20485384295408, "learning_rate": 1.7463733640667587e-05, "loss": 32.1258, "step": 15190 }, { "epoch": 0.23964179857476192, "grad_norm": 119.99968959054466, "learning_rate": 1.7460403010409216e-05, "loss": 32.3779, "step": 15200 }, { "epoch": 0.23979945765277164, "grad_norm": 115.37527308883745, "learning_rate": 1.745707051274136e-05, "loss": 32.8166, "step": 15210 }, { "epoch": 0.23995711673078135, "grad_norm": 116.33037726278302, "learning_rate": 1.7453736148498178e-05, "loss": 32.4295, "step": 15220 }, { "epoch": 0.24011477580879106, "grad_norm": 122.35544409079438, "learning_rate": 1.7450399918514283e-05, "loss": 32.0833, "step": 15230 }, { "epoch": 0.24027243488680078, "grad_norm": 110.47454108485448, "learning_rate": 1.744706182362478e-05, "loss": 31.6342, "step": 15240 }, { "epoch": 0.2404300939648105, "grad_norm": 119.3013461384883, "learning_rate": 1.7443721864665217e-05, "loss": 31.9058, "step": 15250 }, { "epoch": 0.2405877530428202, "grad_norm": 121.57919123904338, "learning_rate": 1.7440380042471616e-05, "loss": 33.277, "step": 15260 }, { "epoch": 0.24074541212082992, "grad_norm": 117.07214435540763, "learning_rate": 1.743703635788047e-05, "loss": 32.1387, "step": 15270 }, { "epoch": 0.24090307119883964, "grad_norm": 114.08353834207193, "learning_rate": 1.7433690811728737e-05, "loss": 32.0056, "step": 15280 }, { "epoch": 0.24106073027684935, "grad_norm": 108.56519218807593, "learning_rate": 1.7430343404853833e-05, "loss": 31.5834, "step": 15290 }, { "epoch": 0.24121838935485906, "grad_norm": 111.2696998506344, "learning_rate": 1.7426994138093653e-05, "loss": 31.9245, "step": 15300 }, { "epoch": 0.24137604843286878, "grad_norm": 109.22352066822, "learning_rate": 1.742364301228654e-05, "loss": 31.4389, "step": 15310 }, { "epoch": 0.24153370751087846, "grad_norm": 263.17084701027454, "learning_rate": 1.742029002827132e-05, "loss": 31.9707, "step": 15320 }, { "epoch": 0.24169136658888818, "grad_norm": 118.17168499554444, "learning_rate": 1.7416935186887274e-05, "loss": 31.506, "step": 15330 }, { "epoch": 0.2418490256668979, "grad_norm": 114.11292822760693, "learning_rate": 1.7413578488974155e-05, "loss": 31.8382, "step": 15340 }, { "epoch": 0.2420066847449076, "grad_norm": 106.35082466413466, "learning_rate": 1.741021993537217e-05, "loss": 32.0725, "step": 15350 }, { "epoch": 0.24216434382291732, "grad_norm": 110.7779015704153, "learning_rate": 1.7406859526922e-05, "loss": 31.416, "step": 15360 }, { "epoch": 0.24232200290092704, "grad_norm": 131.93084961715545, "learning_rate": 1.740349726446479e-05, "loss": 32.912, "step": 15370 }, { "epoch": 0.24247966197893675, "grad_norm": 115.12523963429973, "learning_rate": 1.7400133148842137e-05, "loss": 31.6702, "step": 15380 }, { "epoch": 0.24263732105694646, "grad_norm": 110.31279751539638, "learning_rate": 1.739676718089612e-05, "loss": 31.3346, "step": 15390 }, { "epoch": 0.24279498013495618, "grad_norm": 119.34427343837008, "learning_rate": 1.7393399361469266e-05, "loss": 32.619, "step": 15400 }, { "epoch": 0.2429526392129659, "grad_norm": 115.7433546311478, "learning_rate": 1.739002969140458e-05, "loss": 32.0221, "step": 15410 }, { "epoch": 0.2431102982909756, "grad_norm": 107.84583579528223, "learning_rate": 1.7386658171545516e-05, "loss": 31.672, "step": 15420 }, { "epoch": 0.2432679573689853, "grad_norm": 140.34899769376796, "learning_rate": 1.7383284802735997e-05, "loss": 32.2835, "step": 15430 }, { "epoch": 0.243425616446995, "grad_norm": 120.8299595557252, "learning_rate": 1.7379909585820415e-05, "loss": 31.9272, "step": 15440 }, { "epoch": 0.24358327552500472, "grad_norm": 117.81570447551604, "learning_rate": 1.737653252164361e-05, "loss": 31.9374, "step": 15450 }, { "epoch": 0.24374093460301444, "grad_norm": 112.1585354194358, "learning_rate": 1.7373153611050903e-05, "loss": 33.2884, "step": 15460 }, { "epoch": 0.24389859368102415, "grad_norm": 113.21933062365099, "learning_rate": 1.736977285488806e-05, "loss": 31.7207, "step": 15470 }, { "epoch": 0.24405625275903386, "grad_norm": 117.01034980369253, "learning_rate": 1.7366390254001318e-05, "loss": 32.2695, "step": 15480 }, { "epoch": 0.24421391183704358, "grad_norm": 113.14294193744502, "learning_rate": 1.7363005809237375e-05, "loss": 31.0298, "step": 15490 }, { "epoch": 0.2443715709150533, "grad_norm": 114.44631702393997, "learning_rate": 1.735961952144339e-05, "loss": 31.9749, "step": 15500 }, { "epoch": 0.244529229993063, "grad_norm": 110.04853393361984, "learning_rate": 1.7356231391466983e-05, "loss": 31.8061, "step": 15510 }, { "epoch": 0.24468688907107272, "grad_norm": 110.14450792518205, "learning_rate": 1.735284142015623e-05, "loss": 31.888, "step": 15520 }, { "epoch": 0.24484454814908244, "grad_norm": 113.37992742683952, "learning_rate": 1.734944960835968e-05, "loss": 31.9315, "step": 15530 }, { "epoch": 0.24500220722709212, "grad_norm": 108.98839788728489, "learning_rate": 1.7346055956926334e-05, "loss": 31.729, "step": 15540 }, { "epoch": 0.24515986630510184, "grad_norm": 118.77424547083577, "learning_rate": 1.7342660466705653e-05, "loss": 31.7682, "step": 15550 }, { "epoch": 0.24531752538311155, "grad_norm": 109.77353457132489, "learning_rate": 1.7339263138547553e-05, "loss": 32.2151, "step": 15560 }, { "epoch": 0.24547518446112127, "grad_norm": 115.80776286793491, "learning_rate": 1.7335863973302436e-05, "loss": 32.0519, "step": 15570 }, { "epoch": 0.24563284353913098, "grad_norm": 108.03927797316219, "learning_rate": 1.733246297182113e-05, "loss": 31.4855, "step": 15580 }, { "epoch": 0.2457905026171407, "grad_norm": 127.39668046635, "learning_rate": 1.732906013495494e-05, "loss": 32.6478, "step": 15590 }, { "epoch": 0.2459481616951504, "grad_norm": 108.50686994246955, "learning_rate": 1.7325655463555636e-05, "loss": 31.0547, "step": 15600 }, { "epoch": 0.24610582077316012, "grad_norm": 117.83685768589028, "learning_rate": 1.7322248958475435e-05, "loss": 32.1184, "step": 15610 }, { "epoch": 0.24626347985116984, "grad_norm": 107.89081679051243, "learning_rate": 1.7318840620567014e-05, "loss": 32.1847, "step": 15620 }, { "epoch": 0.24642113892917955, "grad_norm": 109.12739360578523, "learning_rate": 1.7315430450683513e-05, "loss": 31.6642, "step": 15630 }, { "epoch": 0.24657879800718926, "grad_norm": 107.6557289466507, "learning_rate": 1.7312018449678532e-05, "loss": 33.9879, "step": 15640 }, { "epoch": 0.24673645708519898, "grad_norm": 120.3237980135468, "learning_rate": 1.7308604618406127e-05, "loss": 31.7207, "step": 15650 }, { "epoch": 0.24689411616320867, "grad_norm": 121.57553798014293, "learning_rate": 1.7305188957720816e-05, "loss": 31.8635, "step": 15660 }, { "epoch": 0.24705177524121838, "grad_norm": 114.92880456674158, "learning_rate": 1.730177146847756e-05, "loss": 31.8252, "step": 15670 }, { "epoch": 0.2472094343192281, "grad_norm": 114.37093981845325, "learning_rate": 1.7298352151531805e-05, "loss": 31.4406, "step": 15680 }, { "epoch": 0.2473670933972378, "grad_norm": 112.89629020121735, "learning_rate": 1.7294931007739426e-05, "loss": 31.7336, "step": 15690 }, { "epoch": 0.24752475247524752, "grad_norm": 106.47883896480015, "learning_rate": 1.729150803795677e-05, "loss": 32.0218, "step": 15700 }, { "epoch": 0.24768241155325724, "grad_norm": 116.40545497123797, "learning_rate": 1.728808324304064e-05, "loss": 32.4333, "step": 15710 }, { "epoch": 0.24784007063126695, "grad_norm": 132.7849732046477, "learning_rate": 1.7284656623848294e-05, "loss": 31.4701, "step": 15720 }, { "epoch": 0.24799772970927667, "grad_norm": 120.00489896358812, "learning_rate": 1.7281228181237447e-05, "loss": 31.2144, "step": 15730 }, { "epoch": 0.24815538878728638, "grad_norm": 188.05964162821851, "learning_rate": 1.7277797916066276e-05, "loss": 32.3181, "step": 15740 }, { "epoch": 0.2483130478652961, "grad_norm": 113.80749892610083, "learning_rate": 1.7274365829193403e-05, "loss": 31.2959, "step": 15750 }, { "epoch": 0.2484707069433058, "grad_norm": 105.59750775537734, "learning_rate": 1.7270931921477913e-05, "loss": 32.0868, "step": 15760 }, { "epoch": 0.2486283660213155, "grad_norm": 112.46256601756087, "learning_rate": 1.7267496193779342e-05, "loss": 31.7558, "step": 15770 }, { "epoch": 0.2487860250993252, "grad_norm": 116.3254060925634, "learning_rate": 1.726405864695769e-05, "loss": 33.1473, "step": 15780 }, { "epoch": 0.24894368417733492, "grad_norm": 111.94333551061841, "learning_rate": 1.726061928187341e-05, "loss": 31.4682, "step": 15790 }, { "epoch": 0.24910134325534464, "grad_norm": 111.26823335805841, "learning_rate": 1.72571780993874e-05, "loss": 31.9283, "step": 15800 }, { "epoch": 0.24925900233335435, "grad_norm": 119.9601064355874, "learning_rate": 1.725373510036103e-05, "loss": 31.9585, "step": 15810 }, { "epoch": 0.24941666141136407, "grad_norm": 116.42175422141322, "learning_rate": 1.7250290285656108e-05, "loss": 31.9111, "step": 15820 }, { "epoch": 0.24957432048937378, "grad_norm": 115.20444373827118, "learning_rate": 1.7246843656134902e-05, "loss": 33.1264, "step": 15830 }, { "epoch": 0.2497319795673835, "grad_norm": 119.67662915423385, "learning_rate": 1.7243395212660142e-05, "loss": 31.649, "step": 15840 }, { "epoch": 0.2498896386453932, "grad_norm": 116.71177363690921, "learning_rate": 1.7239944956095e-05, "loss": 32.5467, "step": 15850 }, { "epoch": 0.2500472977234029, "grad_norm": 108.48847136709728, "learning_rate": 1.7236492887303114e-05, "loss": 32.1667, "step": 15860 }, { "epoch": 0.25020495680141264, "grad_norm": 119.28187247608243, "learning_rate": 1.723303900714857e-05, "loss": 31.9796, "step": 15870 }, { "epoch": 0.2503626158794223, "grad_norm": 118.33689512214, "learning_rate": 1.72295833164959e-05, "loss": 31.2953, "step": 15880 }, { "epoch": 0.25052027495743207, "grad_norm": 107.4032503895712, "learning_rate": 1.7226125816210102e-05, "loss": 30.9473, "step": 15890 }, { "epoch": 0.25067793403544175, "grad_norm": 115.89978157180131, "learning_rate": 1.7222666507156617e-05, "loss": 31.6469, "step": 15900 }, { "epoch": 0.2508355931134515, "grad_norm": 121.25942549656729, "learning_rate": 1.7219205390201346e-05, "loss": 31.7978, "step": 15910 }, { "epoch": 0.2509932521914612, "grad_norm": 114.31112287204493, "learning_rate": 1.7215742466210637e-05, "loss": 31.2201, "step": 15920 }, { "epoch": 0.2511509112694709, "grad_norm": 114.65875199489167, "learning_rate": 1.7212277736051297e-05, "loss": 31.956, "step": 15930 }, { "epoch": 0.2513085703474806, "grad_norm": 113.55520314402494, "learning_rate": 1.7208811200590575e-05, "loss": 31.2401, "step": 15940 }, { "epoch": 0.2514662294254903, "grad_norm": 107.83259336792949, "learning_rate": 1.720534286069618e-05, "loss": 30.7414, "step": 15950 }, { "epoch": 0.25162388850350004, "grad_norm": 110.72394499681037, "learning_rate": 1.720187271723627e-05, "loss": 31.326, "step": 15960 }, { "epoch": 0.2517815475815097, "grad_norm": 110.21155699801253, "learning_rate": 1.7198400771079457e-05, "loss": 30.8872, "step": 15970 }, { "epoch": 0.25193920665951947, "grad_norm": 112.59462265718076, "learning_rate": 1.7194927023094798e-05, "loss": 31.8988, "step": 15980 }, { "epoch": 0.25209686573752915, "grad_norm": 107.6190399044337, "learning_rate": 1.7191451474151807e-05, "loss": 31.5956, "step": 15990 }, { "epoch": 0.2522545248155389, "grad_norm": 113.97970727154573, "learning_rate": 1.718797412512045e-05, "loss": 32.1756, "step": 16000 }, { "epoch": 0.2524121838935486, "grad_norm": 122.6443462318379, "learning_rate": 1.7184494976871136e-05, "loss": 32.0522, "step": 16010 }, { "epoch": 0.2525698429715583, "grad_norm": 104.22353620195078, "learning_rate": 1.7181014030274728e-05, "loss": 31.5676, "step": 16020 }, { "epoch": 0.252727502049568, "grad_norm": 108.33618775110396, "learning_rate": 1.7177531286202544e-05, "loss": 31.7264, "step": 16030 }, { "epoch": 0.25288516112757775, "grad_norm": 112.31163102346295, "learning_rate": 1.7174046745526346e-05, "loss": 31.8205, "step": 16040 }, { "epoch": 0.25304282020558744, "grad_norm": 113.1240108795909, "learning_rate": 1.7170560409118345e-05, "loss": 31.7458, "step": 16050 }, { "epoch": 0.2532004792835971, "grad_norm": 112.38296724774399, "learning_rate": 1.7167072277851205e-05, "loss": 32.5319, "step": 16060 }, { "epoch": 0.25335813836160687, "grad_norm": 103.97245812273316, "learning_rate": 1.7163582352598044e-05, "loss": 31.7123, "step": 16070 }, { "epoch": 0.25351579743961655, "grad_norm": 111.64174693205477, "learning_rate": 1.7160090634232417e-05, "loss": 30.7869, "step": 16080 }, { "epoch": 0.2536734565176263, "grad_norm": 114.82531091368767, "learning_rate": 1.7156597123628338e-05, "loss": 31.0495, "step": 16090 }, { "epoch": 0.253831115595636, "grad_norm": 122.80907918480892, "learning_rate": 1.715310182166026e-05, "loss": 31.3781, "step": 16100 }, { "epoch": 0.2539887746736457, "grad_norm": 104.19063794797111, "learning_rate": 1.71496047292031e-05, "loss": 30.6973, "step": 16110 }, { "epoch": 0.2541464337516554, "grad_norm": 107.25610070299768, "learning_rate": 1.7146105847132198e-05, "loss": 31.6543, "step": 16120 }, { "epoch": 0.25430409282966515, "grad_norm": 116.02198451339405, "learning_rate": 1.7142605176323374e-05, "loss": 30.8456, "step": 16130 }, { "epoch": 0.25446175190767484, "grad_norm": 110.09133065427804, "learning_rate": 1.7139102717652868e-05, "loss": 31.4586, "step": 16140 }, { "epoch": 0.2546194109856846, "grad_norm": 115.04365321444752, "learning_rate": 1.7135598471997387e-05, "loss": 31.4605, "step": 16150 }, { "epoch": 0.25477707006369427, "grad_norm": 113.9914878117694, "learning_rate": 1.7132092440234064e-05, "loss": 31.4056, "step": 16160 }, { "epoch": 0.25493472914170395, "grad_norm": 119.00073429625988, "learning_rate": 1.7128584623240504e-05, "loss": 31.811, "step": 16170 }, { "epoch": 0.2550923882197137, "grad_norm": 106.12356007476141, "learning_rate": 1.7125075021894746e-05, "loss": 31.0726, "step": 16180 }, { "epoch": 0.2552500472977234, "grad_norm": 111.81952408160865, "learning_rate": 1.712156363707527e-05, "loss": 31.215, "step": 16190 }, { "epoch": 0.2554077063757331, "grad_norm": 110.07291919763021, "learning_rate": 1.711805046966101e-05, "loss": 32.0201, "step": 16200 }, { "epoch": 0.2555653654537428, "grad_norm": 108.17510182905664, "learning_rate": 1.711453552053135e-05, "loss": 31.0281, "step": 16210 }, { "epoch": 0.25572302453175255, "grad_norm": 106.54377760637686, "learning_rate": 1.7111018790566114e-05, "loss": 31.8687, "step": 16220 }, { "epoch": 0.25588068360976224, "grad_norm": 107.88218964710855, "learning_rate": 1.7107500280645565e-05, "loss": 30.8494, "step": 16230 }, { "epoch": 0.256038342687772, "grad_norm": 126.61786329513002, "learning_rate": 1.7103979991650433e-05, "loss": 31.5751, "step": 16240 }, { "epoch": 0.25619600176578167, "grad_norm": 107.29277775780211, "learning_rate": 1.7100457924461868e-05, "loss": 31.2116, "step": 16250 }, { "epoch": 0.2563536608437914, "grad_norm": 107.53170147100863, "learning_rate": 1.7096934079961477e-05, "loss": 31.7389, "step": 16260 }, { "epoch": 0.2565113199218011, "grad_norm": 109.36585515782396, "learning_rate": 1.7093408459031318e-05, "loss": 31.5414, "step": 16270 }, { "epoch": 0.2566689789998108, "grad_norm": 115.53179266147512, "learning_rate": 1.7089881062553886e-05, "loss": 31.7838, "step": 16280 }, { "epoch": 0.2568266380778205, "grad_norm": 120.38968889344714, "learning_rate": 1.708635189141212e-05, "loss": 31.7624, "step": 16290 }, { "epoch": 0.2569842971558302, "grad_norm": 113.6141356535995, "learning_rate": 1.7082820946489404e-05, "loss": 31.9655, "step": 16300 }, { "epoch": 0.25714195623383995, "grad_norm": 104.87484519868445, "learning_rate": 1.707928822866957e-05, "loss": 30.6816, "step": 16310 }, { "epoch": 0.25729961531184964, "grad_norm": 114.25780251683327, "learning_rate": 1.707575373883689e-05, "loss": 31.1803, "step": 16320 }, { "epoch": 0.2574572743898594, "grad_norm": 114.22817684605992, "learning_rate": 1.7072217477876076e-05, "loss": 30.9962, "step": 16330 }, { "epoch": 0.25761493346786907, "grad_norm": 101.39774696817838, "learning_rate": 1.7068679446672288e-05, "loss": 30.5176, "step": 16340 }, { "epoch": 0.2577725925458788, "grad_norm": 125.3337174013572, "learning_rate": 1.706513964611114e-05, "loss": 31.753, "step": 16350 }, { "epoch": 0.2579302516238885, "grad_norm": 117.44006101913156, "learning_rate": 1.706159807707866e-05, "loss": 32.0329, "step": 16360 }, { "epoch": 0.25808791070189824, "grad_norm": 107.50086467839539, "learning_rate": 1.705805474046135e-05, "loss": 32.3368, "step": 16370 }, { "epoch": 0.2582455697799079, "grad_norm": 113.95355967509016, "learning_rate": 1.7054509637146137e-05, "loss": 31.7946, "step": 16380 }, { "epoch": 0.25840322885791767, "grad_norm": 114.27900950390858, "learning_rate": 1.7050962768020392e-05, "loss": 30.8465, "step": 16390 }, { "epoch": 0.25856088793592735, "grad_norm": 125.34054908902696, "learning_rate": 1.704741413397193e-05, "loss": 32.228, "step": 16400 }, { "epoch": 0.25871854701393704, "grad_norm": 114.47063314621302, "learning_rate": 1.7043863735889007e-05, "loss": 31.6404, "step": 16410 }, { "epoch": 0.2588762060919468, "grad_norm": 111.41846340108097, "learning_rate": 1.7040311574660325e-05, "loss": 31.3748, "step": 16420 }, { "epoch": 0.25903386516995647, "grad_norm": 134.30951437968062, "learning_rate": 1.703675765117502e-05, "loss": 30.7395, "step": 16430 }, { "epoch": 0.2591915242479662, "grad_norm": 117.0228029396714, "learning_rate": 1.7033201966322675e-05, "loss": 31.6698, "step": 16440 }, { "epoch": 0.2593491833259759, "grad_norm": 102.30946833320182, "learning_rate": 1.702964452099331e-05, "loss": 31.2496, "step": 16450 }, { "epoch": 0.25950684240398564, "grad_norm": 114.67772355624999, "learning_rate": 1.7026085316077387e-05, "loss": 31.7321, "step": 16460 }, { "epoch": 0.2596645014819953, "grad_norm": 116.91101035941493, "learning_rate": 1.7022524352465813e-05, "loss": 31.7017, "step": 16470 }, { "epoch": 0.25982216056000507, "grad_norm": 110.74117609275467, "learning_rate": 1.7018961631049924e-05, "loss": 32.3484, "step": 16480 }, { "epoch": 0.25997981963801475, "grad_norm": 110.84929889667124, "learning_rate": 1.7015397152721506e-05, "loss": 32.018, "step": 16490 }, { "epoch": 0.2601374787160245, "grad_norm": 2138.633101893244, "learning_rate": 1.7011830918372783e-05, "loss": 31.179, "step": 16500 }, { "epoch": 0.2602951377940342, "grad_norm": 115.21451624632216, "learning_rate": 1.700826292889642e-05, "loss": 31.2474, "step": 16510 }, { "epoch": 0.26045279687204387, "grad_norm": 110.41818205014584, "learning_rate": 1.7004693185185506e-05, "loss": 31.1292, "step": 16520 }, { "epoch": 0.2606104559500536, "grad_norm": 109.05186574316109, "learning_rate": 1.7001121688133598e-05, "loss": 31.9305, "step": 16530 }, { "epoch": 0.2607681150280633, "grad_norm": 111.441718732044, "learning_rate": 1.6997548438634662e-05, "loss": 30.7056, "step": 16540 }, { "epoch": 0.26092577410607304, "grad_norm": 118.47478084598804, "learning_rate": 1.6993973437583126e-05, "loss": 31.4838, "step": 16550 }, { "epoch": 0.2610834331840827, "grad_norm": 106.60042103237606, "learning_rate": 1.699039668587384e-05, "loss": 31.2846, "step": 16560 }, { "epoch": 0.26124109226209247, "grad_norm": 105.72167136524445, "learning_rate": 1.6986818184402102e-05, "loss": 30.6334, "step": 16570 }, { "epoch": 0.26139875134010215, "grad_norm": 112.23664271752918, "learning_rate": 1.6983237934063646e-05, "loss": 31.1702, "step": 16580 }, { "epoch": 0.2615564104181119, "grad_norm": 108.15537758181765, "learning_rate": 1.697965593575464e-05, "loss": 31.7153, "step": 16590 }, { "epoch": 0.2617140694961216, "grad_norm": 116.4210623466635, "learning_rate": 1.697607219037169e-05, "loss": 32.1397, "step": 16600 }, { "epoch": 0.2618717285741313, "grad_norm": 111.54773817720758, "learning_rate": 1.6972486698811847e-05, "loss": 30.6375, "step": 16610 }, { "epoch": 0.262029387652141, "grad_norm": 788.2778974555441, "learning_rate": 1.696889946197259e-05, "loss": 33.032, "step": 16620 }, { "epoch": 0.2621870467301507, "grad_norm": 108.31229409189726, "learning_rate": 1.696531048075184e-05, "loss": 31.2693, "step": 16630 }, { "epoch": 0.26234470580816044, "grad_norm": 112.89142373895011, "learning_rate": 1.6961719756047954e-05, "loss": 31.7911, "step": 16640 }, { "epoch": 0.2625023648861701, "grad_norm": 115.8411944674148, "learning_rate": 1.695812728875972e-05, "loss": 31.4978, "step": 16650 }, { "epoch": 0.26266002396417987, "grad_norm": 146.21407542154853, "learning_rate": 1.6954533079786365e-05, "loss": 32.1015, "step": 16660 }, { "epoch": 0.26281768304218955, "grad_norm": 111.53165768064902, "learning_rate": 1.6950937130027562e-05, "loss": 31.2302, "step": 16670 }, { "epoch": 0.2629753421201993, "grad_norm": 115.71137192755953, "learning_rate": 1.6947339440383403e-05, "loss": 30.7361, "step": 16680 }, { "epoch": 0.263133001198209, "grad_norm": 111.12569400885961, "learning_rate": 1.6943740011754433e-05, "loss": 30.279, "step": 16690 }, { "epoch": 0.2632906602762187, "grad_norm": 108.23418782477727, "learning_rate": 1.6940138845041615e-05, "loss": 30.9265, "step": 16700 }, { "epoch": 0.2634483193542284, "grad_norm": 109.62484003390576, "learning_rate": 1.6936535941146353e-05, "loss": 31.0581, "step": 16710 }, { "epoch": 0.26360597843223815, "grad_norm": 103.49827903203624, "learning_rate": 1.6932931300970498e-05, "loss": 30.8059, "step": 16720 }, { "epoch": 0.26376363751024784, "grad_norm": 118.50008155886597, "learning_rate": 1.692932492541632e-05, "loss": 31.5412, "step": 16730 }, { "epoch": 0.2639212965882575, "grad_norm": 111.56894885893448, "learning_rate": 1.692571681538653e-05, "loss": 31.5533, "step": 16740 }, { "epoch": 0.26407895566626727, "grad_norm": 105.95714030251997, "learning_rate": 1.6922106971784266e-05, "loss": 31.9043, "step": 16750 }, { "epoch": 0.26423661474427695, "grad_norm": 179.1894820229574, "learning_rate": 1.6918495395513118e-05, "loss": 31.3429, "step": 16760 }, { "epoch": 0.2643942738222867, "grad_norm": 106.5190603114857, "learning_rate": 1.6914882087477084e-05, "loss": 30.8777, "step": 16770 }, { "epoch": 0.2645519329002964, "grad_norm": 122.44889812712151, "learning_rate": 1.691126704858062e-05, "loss": 31.2431, "step": 16780 }, { "epoch": 0.2647095919783061, "grad_norm": 115.92961111434724, "learning_rate": 1.69076502797286e-05, "loss": 30.8103, "step": 16790 }, { "epoch": 0.2648672510563158, "grad_norm": 113.64065189542482, "learning_rate": 1.6904031781826336e-05, "loss": 31.4398, "step": 16800 }, { "epoch": 0.26502491013432555, "grad_norm": 114.1022275633658, "learning_rate": 1.690041155577957e-05, "loss": 31.8913, "step": 16810 }, { "epoch": 0.26518256921233524, "grad_norm": 104.34891631511631, "learning_rate": 1.689678960249448e-05, "loss": 30.8424, "step": 16820 }, { "epoch": 0.265340228290345, "grad_norm": 127.51471497168548, "learning_rate": 1.6893165922877684e-05, "loss": 31.399, "step": 16830 }, { "epoch": 0.26549788736835467, "grad_norm": 107.02513896312036, "learning_rate": 1.688954051783621e-05, "loss": 35.0804, "step": 16840 }, { "epoch": 0.26565554644636435, "grad_norm": 107.38555670600024, "learning_rate": 1.6885913388277537e-05, "loss": 30.52, "step": 16850 }, { "epoch": 0.2658132055243741, "grad_norm": 119.45956473601805, "learning_rate": 1.688228453510957e-05, "loss": 31.7327, "step": 16860 }, { "epoch": 0.2659708646023838, "grad_norm": 103.827139737559, "learning_rate": 1.6878653959240648e-05, "loss": 30.8745, "step": 16870 }, { "epoch": 0.2661285236803935, "grad_norm": 109.55400169325405, "learning_rate": 1.6875021661579533e-05, "loss": 30.9793, "step": 16880 }, { "epoch": 0.2662861827584032, "grad_norm": 108.34752412923982, "learning_rate": 1.6871387643035423e-05, "loss": 31.0321, "step": 16890 }, { "epoch": 0.26644384183641295, "grad_norm": 105.17270515581922, "learning_rate": 1.6867751904517957e-05, "loss": 30.8697, "step": 16900 }, { "epoch": 0.26660150091442264, "grad_norm": 112.85388391452089, "learning_rate": 1.686411444693718e-05, "loss": 32.2152, "step": 16910 }, { "epoch": 0.2667591599924324, "grad_norm": 105.36153161571123, "learning_rate": 1.68604752712036e-05, "loss": 30.9345, "step": 16920 }, { "epoch": 0.26691681907044207, "grad_norm": 108.48733630017638, "learning_rate": 1.6856834378228116e-05, "loss": 31.2912, "step": 16930 }, { "epoch": 0.2670744781484518, "grad_norm": 130.65003424765965, "learning_rate": 1.68531917689221e-05, "loss": 31.3185, "step": 16940 }, { "epoch": 0.2672321372264615, "grad_norm": 101.33923258597193, "learning_rate": 1.6849547444197312e-05, "loss": 31.4724, "step": 16950 }, { "epoch": 0.26738979630447124, "grad_norm": 114.18692346143199, "learning_rate": 1.684590140496597e-05, "loss": 31.0315, "step": 16960 }, { "epoch": 0.2675474553824809, "grad_norm": 109.85424363095089, "learning_rate": 1.6842253652140716e-05, "loss": 32.7279, "step": 16970 }, { "epoch": 0.2677051144604906, "grad_norm": 128.1273693034774, "learning_rate": 1.6838604186634612e-05, "loss": 31.1878, "step": 16980 }, { "epoch": 0.26786277353850035, "grad_norm": 108.87047273103977, "learning_rate": 1.6834953009361153e-05, "loss": 30.9855, "step": 16990 }, { "epoch": 0.26802043261651004, "grad_norm": 107.22922965740018, "learning_rate": 1.6831300121234266e-05, "loss": 31.0291, "step": 17000 }, { "epoch": 0.2681780916945198, "grad_norm": 111.4416005614792, "learning_rate": 1.6827645523168302e-05, "loss": 30.4932, "step": 17010 }, { "epoch": 0.26833575077252947, "grad_norm": 117.27176345058609, "learning_rate": 1.682398921607804e-05, "loss": 30.726, "step": 17020 }, { "epoch": 0.2684934098505392, "grad_norm": 113.50238410510916, "learning_rate": 1.6820331200878695e-05, "loss": 30.1741, "step": 17030 }, { "epoch": 0.2686510689285489, "grad_norm": 107.49679456415105, "learning_rate": 1.6816671478485895e-05, "loss": 31.0466, "step": 17040 }, { "epoch": 0.26880872800655864, "grad_norm": 99.81862237478725, "learning_rate": 1.6813010049815706e-05, "loss": 30.6658, "step": 17050 }, { "epoch": 0.2689663870845683, "grad_norm": 105.19621299739647, "learning_rate": 1.680934691578462e-05, "loss": 30.9114, "step": 17060 }, { "epoch": 0.26912404616257807, "grad_norm": 110.2623440032851, "learning_rate": 1.680568207730955e-05, "loss": 31.5891, "step": 17070 }, { "epoch": 0.26928170524058775, "grad_norm": 105.41092434505342, "learning_rate": 1.6802015535307842e-05, "loss": 31.2081, "step": 17080 }, { "epoch": 0.26943936431859744, "grad_norm": 115.11525011302481, "learning_rate": 1.6798347290697266e-05, "loss": 31.0989, "step": 17090 }, { "epoch": 0.2695970233966072, "grad_norm": 108.71498511571257, "learning_rate": 1.679467734439602e-05, "loss": 33.6724, "step": 17100 }, { "epoch": 0.26975468247461687, "grad_norm": 114.0976646702436, "learning_rate": 1.6791005697322726e-05, "loss": 30.7812, "step": 17110 }, { "epoch": 0.2699123415526266, "grad_norm": 104.27849319447623, "learning_rate": 1.6787332350396428e-05, "loss": 31.1528, "step": 17120 }, { "epoch": 0.2700700006306363, "grad_norm": 106.09580210486872, "learning_rate": 1.6783657304536607e-05, "loss": 31.6405, "step": 17130 }, { "epoch": 0.27022765970864604, "grad_norm": 102.45416471214068, "learning_rate": 1.6779980560663153e-05, "loss": 30.3814, "step": 17140 }, { "epoch": 0.2703853187866557, "grad_norm": 99.50337808532043, "learning_rate": 1.6776302119696394e-05, "loss": 29.3625, "step": 17150 }, { "epoch": 0.27054297786466547, "grad_norm": 112.78807307330716, "learning_rate": 1.6772621982557084e-05, "loss": 32.2471, "step": 17160 }, { "epoch": 0.27070063694267515, "grad_norm": 112.60086156804088, "learning_rate": 1.6768940150166386e-05, "loss": 30.5971, "step": 17170 }, { "epoch": 0.2708582960206849, "grad_norm": 102.82414525935687, "learning_rate": 1.6765256623445902e-05, "loss": 30.9575, "step": 17180 }, { "epoch": 0.2710159550986946, "grad_norm": 103.41386831129905, "learning_rate": 1.6761571403317658e-05, "loss": 30.6049, "step": 17190 }, { "epoch": 0.27117361417670427, "grad_norm": 104.04000722579093, "learning_rate": 1.6757884490704093e-05, "loss": 31.2335, "step": 17200 }, { "epoch": 0.271331273254714, "grad_norm": 102.21629811075522, "learning_rate": 1.6754195886528077e-05, "loss": 31.3764, "step": 17210 }, { "epoch": 0.2714889323327237, "grad_norm": 107.17237400901566, "learning_rate": 1.6750505591712906e-05, "loss": 32.5429, "step": 17220 }, { "epoch": 0.27164659141073344, "grad_norm": 112.84147600018447, "learning_rate": 1.6746813607182293e-05, "loss": 30.7222, "step": 17230 }, { "epoch": 0.2718042504887431, "grad_norm": 112.54314296300392, "learning_rate": 1.6743119933860378e-05, "loss": 30.7659, "step": 17240 }, { "epoch": 0.27196190956675287, "grad_norm": 113.93293065538667, "learning_rate": 1.673942457267172e-05, "loss": 30.8151, "step": 17250 }, { "epoch": 0.27211956864476256, "grad_norm": 108.95185993810037, "learning_rate": 1.6735727524541304e-05, "loss": 30.656, "step": 17260 }, { "epoch": 0.2722772277227723, "grad_norm": 116.08462339961558, "learning_rate": 1.6732028790394543e-05, "loss": 30.636, "step": 17270 }, { "epoch": 0.272434886800782, "grad_norm": 106.63567658142946, "learning_rate": 1.672832837115725e-05, "loss": 29.7802, "step": 17280 }, { "epoch": 0.2725925458787917, "grad_norm": 107.20587612636811, "learning_rate": 1.6724626267755694e-05, "loss": 30.5151, "step": 17290 }, { "epoch": 0.2727502049568014, "grad_norm": 119.18643693016479, "learning_rate": 1.6720922481116528e-05, "loss": 30.0388, "step": 17300 }, { "epoch": 0.2729078640348111, "grad_norm": 108.18362411963292, "learning_rate": 1.671721701216686e-05, "loss": 30.3206, "step": 17310 }, { "epoch": 0.27306552311282084, "grad_norm": 110.09564050589964, "learning_rate": 1.671350986183419e-05, "loss": 30.9065, "step": 17320 }, { "epoch": 0.2732231821908305, "grad_norm": 110.70656977593418, "learning_rate": 1.6709801031046465e-05, "loss": 31.0126, "step": 17330 }, { "epoch": 0.27338084126884027, "grad_norm": 115.28401767907452, "learning_rate": 1.6706090520732036e-05, "loss": 30.9002, "step": 17340 }, { "epoch": 0.27353850034684996, "grad_norm": 111.12949291457055, "learning_rate": 1.6702378331819676e-05, "loss": 30.2599, "step": 17350 }, { "epoch": 0.2736961594248597, "grad_norm": 113.15073592441794, "learning_rate": 1.6698664465238584e-05, "loss": 30.9574, "step": 17360 }, { "epoch": 0.2738538185028694, "grad_norm": 110.99867666768154, "learning_rate": 1.6694948921918378e-05, "loss": 30.982, "step": 17370 }, { "epoch": 0.2740114775808791, "grad_norm": 110.93504465157409, "learning_rate": 1.669123170278909e-05, "loss": 31.1811, "step": 17380 }, { "epoch": 0.2741691366588888, "grad_norm": 104.45844959037258, "learning_rate": 1.668751280878117e-05, "loss": 30.9599, "step": 17390 }, { "epoch": 0.27432679573689855, "grad_norm": 105.29418149821545, "learning_rate": 1.6683792240825507e-05, "loss": 30.4022, "step": 17400 }, { "epoch": 0.27448445481490824, "grad_norm": 112.88016715743542, "learning_rate": 1.668006999985338e-05, "loss": 31.2048, "step": 17410 }, { "epoch": 0.2746421138929179, "grad_norm": 999.7808567366861, "learning_rate": 1.667634608679651e-05, "loss": 30.6898, "step": 17420 }, { "epoch": 0.27479977297092767, "grad_norm": 107.59570812734903, "learning_rate": 1.667262050258702e-05, "loss": 30.8007, "step": 17430 }, { "epoch": 0.27495743204893736, "grad_norm": 113.65268166059172, "learning_rate": 1.6668893248157472e-05, "loss": 31.2025, "step": 17440 }, { "epoch": 0.2751150911269471, "grad_norm": 105.52151446195019, "learning_rate": 1.666516432444082e-05, "loss": 30.1142, "step": 17450 }, { "epoch": 0.2752727502049568, "grad_norm": 109.1477735843145, "learning_rate": 1.6661433732370453e-05, "loss": 30.8411, "step": 17460 }, { "epoch": 0.2754304092829665, "grad_norm": 102.50061706719737, "learning_rate": 1.6657701472880176e-05, "loss": 30.2388, "step": 17470 }, { "epoch": 0.2755880683609762, "grad_norm": 113.17018913800699, "learning_rate": 1.6653967546904207e-05, "loss": 29.9715, "step": 17480 }, { "epoch": 0.27574572743898595, "grad_norm": 104.51419844312575, "learning_rate": 1.6650231955377182e-05, "loss": 30.5449, "step": 17490 }, { "epoch": 0.27590338651699564, "grad_norm": 109.10867326515216, "learning_rate": 1.6646494699234156e-05, "loss": 30.4263, "step": 17500 }, { "epoch": 0.2760610455950054, "grad_norm": 116.23836423482902, "learning_rate": 1.6642755779410603e-05, "loss": 31.2352, "step": 17510 }, { "epoch": 0.27621870467301507, "grad_norm": 103.91690969374181, "learning_rate": 1.6639015196842402e-05, "loss": 29.9544, "step": 17520 }, { "epoch": 0.2763763637510248, "grad_norm": 112.27535702529515, "learning_rate": 1.6635272952465866e-05, "loss": 31.1683, "step": 17530 }, { "epoch": 0.2765340228290345, "grad_norm": 110.74061678058398, "learning_rate": 1.6631529047217704e-05, "loss": 30.8649, "step": 17540 }, { "epoch": 0.2766916819070442, "grad_norm": 109.06615884884982, "learning_rate": 1.6627783482035056e-05, "loss": 31.093, "step": 17550 }, { "epoch": 0.2768493409850539, "grad_norm": 107.87817346912948, "learning_rate": 1.6624036257855475e-05, "loss": 31.326, "step": 17560 }, { "epoch": 0.2770070000630636, "grad_norm": 106.02917925584877, "learning_rate": 1.6620287375616925e-05, "loss": 30.9805, "step": 17570 }, { "epoch": 0.27716465914107336, "grad_norm": 106.45252462074127, "learning_rate": 1.661653683625778e-05, "loss": 31.0798, "step": 17580 }, { "epoch": 0.27732231821908304, "grad_norm": 105.50556638117638, "learning_rate": 1.6612784640716847e-05, "loss": 30.7915, "step": 17590 }, { "epoch": 0.2774799772970928, "grad_norm": 110.91420330369968, "learning_rate": 1.6609030789933326e-05, "loss": 30.0406, "step": 17600 }, { "epoch": 0.27763763637510247, "grad_norm": 101.81799850367754, "learning_rate": 1.660527528484685e-05, "loss": 30.0344, "step": 17610 }, { "epoch": 0.2777952954531122, "grad_norm": 111.65942880799028, "learning_rate": 1.6601518126397447e-05, "loss": 31.9343, "step": 17620 }, { "epoch": 0.2779529545311219, "grad_norm": 109.0099889755388, "learning_rate": 1.659775931552558e-05, "loss": 31.1018, "step": 17630 }, { "epoch": 0.27811061360913164, "grad_norm": 106.73527944638504, "learning_rate": 1.6593998853172112e-05, "loss": 31.1983, "step": 17640 }, { "epoch": 0.2782682726871413, "grad_norm": 111.57495714850678, "learning_rate": 1.659023674027832e-05, "loss": 30.7117, "step": 17650 }, { "epoch": 0.278425931765151, "grad_norm": 111.27924599256127, "learning_rate": 1.658647297778589e-05, "loss": 31.1943, "step": 17660 }, { "epoch": 0.27858359084316076, "grad_norm": 104.98882105186166, "learning_rate": 1.6582707566636943e-05, "loss": 30.668, "step": 17670 }, { "epoch": 0.27874124992117044, "grad_norm": 106.01737775241065, "learning_rate": 1.6578940507773986e-05, "loss": 30.9099, "step": 17680 }, { "epoch": 0.2788989089991802, "grad_norm": 107.57085419433282, "learning_rate": 1.6575171802139955e-05, "loss": 30.8989, "step": 17690 }, { "epoch": 0.27905656807718987, "grad_norm": 111.44410575809884, "learning_rate": 1.6571401450678186e-05, "loss": 30.3942, "step": 17700 }, { "epoch": 0.2792142271551996, "grad_norm": 118.6681609133664, "learning_rate": 1.6567629454332437e-05, "loss": 30.6502, "step": 17710 }, { "epoch": 0.2793718862332093, "grad_norm": 105.56535805716203, "learning_rate": 1.6563855814046875e-05, "loss": 30.751, "step": 17720 }, { "epoch": 0.27952954531121904, "grad_norm": 110.7497830585941, "learning_rate": 1.656008053076608e-05, "loss": 30.5539, "step": 17730 }, { "epoch": 0.2796872043892287, "grad_norm": 103.42047670375948, "learning_rate": 1.6556303605435035e-05, "loss": 29.9159, "step": 17740 }, { "epoch": 0.27984486346723847, "grad_norm": 104.64844287734883, "learning_rate": 1.6552525038999148e-05, "loss": 29.4837, "step": 17750 }, { "epoch": 0.28000252254524816, "grad_norm": 105.35482030863749, "learning_rate": 1.6548744832404224e-05, "loss": 30.0977, "step": 17760 }, { "epoch": 0.28016018162325784, "grad_norm": 103.34234592844008, "learning_rate": 1.6544962986596487e-05, "loss": 31.4249, "step": 17770 }, { "epoch": 0.2803178407012676, "grad_norm": 102.21766479360063, "learning_rate": 1.654117950252257e-05, "loss": 30.3653, "step": 17780 }, { "epoch": 0.28047549977927727, "grad_norm": 111.04843029482268, "learning_rate": 1.6537394381129507e-05, "loss": 31.1803, "step": 17790 }, { "epoch": 0.280633158857287, "grad_norm": 105.12751624001464, "learning_rate": 1.653360762336476e-05, "loss": 29.762, "step": 17800 }, { "epoch": 0.2807908179352967, "grad_norm": 106.73030483393367, "learning_rate": 1.6529819230176186e-05, "loss": 31.2187, "step": 17810 }, { "epoch": 0.28094847701330644, "grad_norm": 106.89654395864484, "learning_rate": 1.652602920251206e-05, "loss": 29.6043, "step": 17820 }, { "epoch": 0.28110613609131613, "grad_norm": 115.0752754216652, "learning_rate": 1.652223754132105e-05, "loss": 31.1488, "step": 17830 }, { "epoch": 0.28126379516932587, "grad_norm": 110.62683575652395, "learning_rate": 1.6518444247552258e-05, "loss": 30.7421, "step": 17840 }, { "epoch": 0.28142145424733556, "grad_norm": 111.39887182988565, "learning_rate": 1.651464932215517e-05, "loss": 30.6134, "step": 17850 }, { "epoch": 0.2815791133253453, "grad_norm": 113.60370583718004, "learning_rate": 1.65108527660797e-05, "loss": 30.0699, "step": 17860 }, { "epoch": 0.281736772403355, "grad_norm": 101.45782345201663, "learning_rate": 1.6507054580276164e-05, "loss": 29.8367, "step": 17870 }, { "epoch": 0.28189443148136467, "grad_norm": 114.53305988869134, "learning_rate": 1.6503254765695273e-05, "loss": 31.5275, "step": 17880 }, { "epoch": 0.2820520905593744, "grad_norm": 102.93802006926771, "learning_rate": 1.6499453323288164e-05, "loss": 30.4086, "step": 17890 }, { "epoch": 0.2822097496373841, "grad_norm": 100.15426727194979, "learning_rate": 1.6495650254006372e-05, "loss": 30.0516, "step": 17900 }, { "epoch": 0.28236740871539384, "grad_norm": 104.29408973059311, "learning_rate": 1.6491845558801842e-05, "loss": 30.4277, "step": 17910 }, { "epoch": 0.28252506779340353, "grad_norm": 111.17356446603715, "learning_rate": 1.6488039238626927e-05, "loss": 30.6189, "step": 17920 }, { "epoch": 0.28268272687141327, "grad_norm": 104.09277835689151, "learning_rate": 1.6484231294434384e-05, "loss": 30.4493, "step": 17930 }, { "epoch": 0.28284038594942296, "grad_norm": 102.9151760196748, "learning_rate": 1.6480421727177375e-05, "loss": 29.8132, "step": 17940 }, { "epoch": 0.2829980450274327, "grad_norm": 119.76051489603213, "learning_rate": 1.6476610537809473e-05, "loss": 31.4081, "step": 17950 }, { "epoch": 0.2831557041054424, "grad_norm": 106.40018446806035, "learning_rate": 1.647279772728465e-05, "loss": 29.9187, "step": 17960 }, { "epoch": 0.2833133631834521, "grad_norm": 109.3770555579015, "learning_rate": 1.6468983296557298e-05, "loss": 30.4837, "step": 17970 }, { "epoch": 0.2834710222614618, "grad_norm": 105.76479070323143, "learning_rate": 1.64651672465822e-05, "loss": 30.0607, "step": 17980 }, { "epoch": 0.2836286813394715, "grad_norm": 123.11332882310907, "learning_rate": 1.6461349578314545e-05, "loss": 30.2038, "step": 17990 }, { "epoch": 0.28378634041748124, "grad_norm": 106.90848778492143, "learning_rate": 1.6457530292709936e-05, "loss": 29.2041, "step": 18000 }, { "epoch": 0.28394399949549093, "grad_norm": 109.92300713468244, "learning_rate": 1.645370939072438e-05, "loss": 29.9889, "step": 18010 }, { "epoch": 0.28410165857350067, "grad_norm": 103.25533940446493, "learning_rate": 1.6449886873314277e-05, "loss": 29.9099, "step": 18020 }, { "epoch": 0.28425931765151036, "grad_norm": 119.52351568069375, "learning_rate": 1.6446062741436448e-05, "loss": 30.3593, "step": 18030 }, { "epoch": 0.2844169767295201, "grad_norm": 109.62478098413493, "learning_rate": 1.64422369960481e-05, "loss": 31.7347, "step": 18040 }, { "epoch": 0.2845746358075298, "grad_norm": 101.58670861741771, "learning_rate": 1.643840963810686e-05, "loss": 29.9664, "step": 18050 }, { "epoch": 0.2847322948855395, "grad_norm": 107.68884153985546, "learning_rate": 1.6434580668570748e-05, "loss": 29.964, "step": 18060 }, { "epoch": 0.2848899539635492, "grad_norm": 107.34447058762461, "learning_rate": 1.6430750088398193e-05, "loss": 30.8012, "step": 18070 }, { "epoch": 0.28504761304155896, "grad_norm": 106.21635414416384, "learning_rate": 1.6426917898548027e-05, "loss": 29.9764, "step": 18080 }, { "epoch": 0.28520527211956864, "grad_norm": 107.8306373515507, "learning_rate": 1.6423084099979485e-05, "loss": 30.4859, "step": 18090 }, { "epoch": 0.28536293119757833, "grad_norm": 112.63296508500589, "learning_rate": 1.6419248693652196e-05, "loss": 29.9512, "step": 18100 }, { "epoch": 0.28552059027558807, "grad_norm": 105.72464368433681, "learning_rate": 1.6415411680526204e-05, "loss": 29.8311, "step": 18110 }, { "epoch": 0.28567824935359776, "grad_norm": 108.6934627688226, "learning_rate": 1.6411573061561948e-05, "loss": 29.9028, "step": 18120 }, { "epoch": 0.2858359084316075, "grad_norm": 107.67026335295502, "learning_rate": 1.640773283772027e-05, "loss": 29.9485, "step": 18130 }, { "epoch": 0.2859935675096172, "grad_norm": 107.80531224885242, "learning_rate": 1.6403891009962415e-05, "loss": 30.2181, "step": 18140 }, { "epoch": 0.28615122658762693, "grad_norm": 102.91379908883317, "learning_rate": 1.640004757925003e-05, "loss": 30.227, "step": 18150 }, { "epoch": 0.2863088856656366, "grad_norm": 99.51566146238318, "learning_rate": 1.6396202546545163e-05, "loss": 30.794, "step": 18160 }, { "epoch": 0.28646654474364636, "grad_norm": 96.65738555017627, "learning_rate": 1.639235591281026e-05, "loss": 29.9076, "step": 18170 }, { "epoch": 0.28662420382165604, "grad_norm": 103.77246153189819, "learning_rate": 1.638850767900817e-05, "loss": 30.0705, "step": 18180 }, { "epoch": 0.2867818628996658, "grad_norm": 121.14226863874934, "learning_rate": 1.6384657846102138e-05, "loss": 30.0126, "step": 18190 }, { "epoch": 0.28693952197767547, "grad_norm": 110.09491092869843, "learning_rate": 1.6380806415055825e-05, "loss": 30.1049, "step": 18200 }, { "epoch": 0.2870971810556852, "grad_norm": 109.40592329583752, "learning_rate": 1.6376953386833273e-05, "loss": 30.578, "step": 18210 }, { "epoch": 0.2872548401336949, "grad_norm": 107.93100530341925, "learning_rate": 1.637309876239893e-05, "loss": 30.1849, "step": 18220 }, { "epoch": 0.2874124992117046, "grad_norm": 109.9930170327524, "learning_rate": 1.6369242542717646e-05, "loss": 30.6299, "step": 18230 }, { "epoch": 0.28757015828971433, "grad_norm": 103.06742616194842, "learning_rate": 1.6365384728754673e-05, "loss": 30.2508, "step": 18240 }, { "epoch": 0.287727817367724, "grad_norm": 103.83439623755997, "learning_rate": 1.6361525321475652e-05, "loss": 29.6962, "step": 18250 }, { "epoch": 0.28788547644573376, "grad_norm": 108.23953864656886, "learning_rate": 1.6357664321846637e-05, "loss": 30.5898, "step": 18260 }, { "epoch": 0.28804313552374344, "grad_norm": 102.75090810725095, "learning_rate": 1.6353801730834066e-05, "loss": 29.7205, "step": 18270 }, { "epoch": 0.2882007946017532, "grad_norm": 109.27525775446163, "learning_rate": 1.6349937549404786e-05, "loss": 29.9586, "step": 18280 }, { "epoch": 0.28835845367976287, "grad_norm": 116.18884285729891, "learning_rate": 1.6346071778526037e-05, "loss": 30.2523, "step": 18290 }, { "epoch": 0.2885161127577726, "grad_norm": 109.8682636366281, "learning_rate": 1.6342204419165457e-05, "loss": 29.5794, "step": 18300 }, { "epoch": 0.2886737718357823, "grad_norm": 107.1811058096124, "learning_rate": 1.6338335472291085e-05, "loss": 30.8509, "step": 18310 }, { "epoch": 0.28883143091379204, "grad_norm": 106.98729558602429, "learning_rate": 1.6334464938871352e-05, "loss": 29.5138, "step": 18320 }, { "epoch": 0.28898908999180173, "grad_norm": 113.36141404054449, "learning_rate": 1.6330592819875093e-05, "loss": 30.3201, "step": 18330 }, { "epoch": 0.2891467490698114, "grad_norm": 99.64198820989947, "learning_rate": 1.632671911627153e-05, "loss": 29.5144, "step": 18340 }, { "epoch": 0.28930440814782116, "grad_norm": 109.09543882146397, "learning_rate": 1.63228438290303e-05, "loss": 30.0958, "step": 18350 }, { "epoch": 0.28946206722583084, "grad_norm": 104.41043806863244, "learning_rate": 1.6318966959121408e-05, "loss": 30.2219, "step": 18360 }, { "epoch": 0.2896197263038406, "grad_norm": 104.53150837577955, "learning_rate": 1.631508850751528e-05, "loss": 30.5794, "step": 18370 }, { "epoch": 0.28977738538185027, "grad_norm": 105.34465085149203, "learning_rate": 1.631120847518273e-05, "loss": 30.2432, "step": 18380 }, { "epoch": 0.28993504445986, "grad_norm": 99.79428826348504, "learning_rate": 1.6307326863094965e-05, "loss": 29.8371, "step": 18390 }, { "epoch": 0.2900927035378697, "grad_norm": 109.26337324962395, "learning_rate": 1.630344367222359e-05, "loss": 29.9601, "step": 18400 }, { "epoch": 0.29025036261587944, "grad_norm": 110.62982721386211, "learning_rate": 1.6299558903540596e-05, "loss": 29.9289, "step": 18410 }, { "epoch": 0.29040802169388913, "grad_norm": 101.20129329806808, "learning_rate": 1.6295672558018394e-05, "loss": 29.8849, "step": 18420 }, { "epoch": 0.29056568077189887, "grad_norm": 113.83510239222949, "learning_rate": 1.629178463662976e-05, "loss": 31.441, "step": 18430 }, { "epoch": 0.29072333984990856, "grad_norm": 111.54550438886142, "learning_rate": 1.628789514034788e-05, "loss": 30.2602, "step": 18440 }, { "epoch": 0.29088099892791824, "grad_norm": 104.80351691838011, "learning_rate": 1.628400407014634e-05, "loss": 29.7579, "step": 18450 }, { "epoch": 0.291038658005928, "grad_norm": 103.49641611463817, "learning_rate": 1.6280111426999097e-05, "loss": 29.859, "step": 18460 }, { "epoch": 0.2911963170839377, "grad_norm": 114.26012497479326, "learning_rate": 1.627621721188053e-05, "loss": 30.079, "step": 18470 }, { "epoch": 0.2913539761619474, "grad_norm": 124.79048424014155, "learning_rate": 1.627232142576539e-05, "loss": 29.6254, "step": 18480 }, { "epoch": 0.2915116352399571, "grad_norm": 109.54842236974157, "learning_rate": 1.626842406962883e-05, "loss": 29.5, "step": 18490 }, { "epoch": 0.29166929431796684, "grad_norm": 108.57935781566958, "learning_rate": 1.62645251444464e-05, "loss": 29.8883, "step": 18500 }, { "epoch": 0.29182695339597653, "grad_norm": 106.2173393377666, "learning_rate": 1.6260624651194028e-05, "loss": 30.3053, "step": 18510 }, { "epoch": 0.29198461247398627, "grad_norm": 164.00452522673558, "learning_rate": 1.6256722590848056e-05, "loss": 29.6668, "step": 18520 }, { "epoch": 0.29214227155199596, "grad_norm": 108.21878910595238, "learning_rate": 1.6252818964385202e-05, "loss": 30.2314, "step": 18530 }, { "epoch": 0.2922999306300057, "grad_norm": 101.8759429499018, "learning_rate": 1.6248913772782576e-05, "loss": 29.9988, "step": 18540 }, { "epoch": 0.2924575897080154, "grad_norm": 114.40385098902534, "learning_rate": 1.6245007017017692e-05, "loss": 30.5619, "step": 18550 }, { "epoch": 0.2926152487860251, "grad_norm": 113.29785659503139, "learning_rate": 1.624109869806844e-05, "loss": 30.23, "step": 18560 }, { "epoch": 0.2927729078640348, "grad_norm": 111.24723787299915, "learning_rate": 1.6237188816913118e-05, "loss": 29.1976, "step": 18570 }, { "epoch": 0.2929305669420445, "grad_norm": 115.66922281738232, "learning_rate": 1.62332773745304e-05, "loss": 29.5465, "step": 18580 }, { "epoch": 0.29308822602005424, "grad_norm": 114.21473669517165, "learning_rate": 1.6229364371899362e-05, "loss": 30.6002, "step": 18590 }, { "epoch": 0.29324588509806393, "grad_norm": 109.17249618042298, "learning_rate": 1.6225449809999457e-05, "loss": 30.2035, "step": 18600 }, { "epoch": 0.29340354417607367, "grad_norm": 105.4750675539166, "learning_rate": 1.6221533689810548e-05, "loss": 30.3337, "step": 18610 }, { "epoch": 0.29356120325408336, "grad_norm": 103.14713507574584, "learning_rate": 1.621761601231287e-05, "loss": 30.0568, "step": 18620 }, { "epoch": 0.2937188623320931, "grad_norm": 111.50193450934599, "learning_rate": 1.6213696778487053e-05, "loss": 30.6887, "step": 18630 }, { "epoch": 0.2938765214101028, "grad_norm": 103.5047761567763, "learning_rate": 1.6209775989314123e-05, "loss": 30.7477, "step": 18640 }, { "epoch": 0.29403418048811253, "grad_norm": 102.80567670309783, "learning_rate": 1.6205853645775488e-05, "loss": 29.9012, "step": 18650 }, { "epoch": 0.2941918395661222, "grad_norm": 102.21927356163484, "learning_rate": 1.6201929748852953e-05, "loss": 30.1243, "step": 18660 }, { "epoch": 0.2943494986441319, "grad_norm": 110.5149173996102, "learning_rate": 1.61980042995287e-05, "loss": 30.2323, "step": 18670 }, { "epoch": 0.29450715772214164, "grad_norm": 101.93060010093122, "learning_rate": 1.6194077298785307e-05, "loss": 29.3317, "step": 18680 }, { "epoch": 0.29466481680015133, "grad_norm": 105.4446347525255, "learning_rate": 1.619014874760574e-05, "loss": 29.933, "step": 18690 }, { "epoch": 0.29482247587816107, "grad_norm": 101.54175628613814, "learning_rate": 1.6186218646973357e-05, "loss": 32.4168, "step": 18700 }, { "epoch": 0.29498013495617076, "grad_norm": 103.69830802037242, "learning_rate": 1.6182286997871894e-05, "loss": 29.2275, "step": 18710 }, { "epoch": 0.2951377940341805, "grad_norm": 113.6786056247825, "learning_rate": 1.6178353801285483e-05, "loss": 29.6209, "step": 18720 }, { "epoch": 0.2952954531121902, "grad_norm": 106.49755866732322, "learning_rate": 1.6174419058198635e-05, "loss": 30.2051, "step": 18730 }, { "epoch": 0.29545311219019993, "grad_norm": 123.01785279923884, "learning_rate": 1.617048276959626e-05, "loss": 29.1329, "step": 18740 }, { "epoch": 0.2956107712682096, "grad_norm": 102.47264706828216, "learning_rate": 1.616654493646365e-05, "loss": 29.4666, "step": 18750 }, { "epoch": 0.29576843034621936, "grad_norm": 109.56427915684702, "learning_rate": 1.6162605559786474e-05, "loss": 30.5996, "step": 18760 }, { "epoch": 0.29592608942422904, "grad_norm": 105.21270243989642, "learning_rate": 1.6158664640550797e-05, "loss": 29.8331, "step": 18770 }, { "epoch": 0.2960837485022388, "grad_norm": 106.69698494756274, "learning_rate": 1.6154722179743078e-05, "loss": 29.8516, "step": 18780 }, { "epoch": 0.2962414075802485, "grad_norm": 111.37288450251145, "learning_rate": 1.6150778178350137e-05, "loss": 30.1422, "step": 18790 }, { "epoch": 0.29639906665825816, "grad_norm": 106.18011668065822, "learning_rate": 1.6146832637359208e-05, "loss": 29.1392, "step": 18800 }, { "epoch": 0.2965567257362679, "grad_norm": 99.06679110962212, "learning_rate": 1.6142885557757887e-05, "loss": 29.5763, "step": 18810 }, { "epoch": 0.2967143848142776, "grad_norm": 101.56540451867261, "learning_rate": 1.613893694053417e-05, "loss": 31.2507, "step": 18820 }, { "epoch": 0.29687204389228733, "grad_norm": 103.53055411829284, "learning_rate": 1.6134986786676438e-05, "loss": 29.9212, "step": 18830 }, { "epoch": 0.297029702970297, "grad_norm": 99.77210512937393, "learning_rate": 1.6131035097173444e-05, "loss": 29.766, "step": 18840 }, { "epoch": 0.29718736204830676, "grad_norm": 111.70823424990617, "learning_rate": 1.612708187301434e-05, "loss": 30.4775, "step": 18850 }, { "epoch": 0.29734502112631644, "grad_norm": 103.14989031677922, "learning_rate": 1.612312711518865e-05, "loss": 30.4223, "step": 18860 }, { "epoch": 0.2975026802043262, "grad_norm": 112.33582337099173, "learning_rate": 1.6119170824686285e-05, "loss": 30.422, "step": 18870 }, { "epoch": 0.2976603392823359, "grad_norm": 101.62527311870156, "learning_rate": 1.6115213002497555e-05, "loss": 29.5905, "step": 18880 }, { "epoch": 0.2978179983603456, "grad_norm": 102.49263169443456, "learning_rate": 1.6111253649613126e-05, "loss": 30.8163, "step": 18890 }, { "epoch": 0.2979756574383553, "grad_norm": 104.19835221451419, "learning_rate": 1.610729276702407e-05, "loss": 29.7353, "step": 18900 }, { "epoch": 0.298133316516365, "grad_norm": 105.69383088438676, "learning_rate": 1.6103330355721828e-05, "loss": 29.7447, "step": 18910 }, { "epoch": 0.29829097559437473, "grad_norm": 127.55138466699228, "learning_rate": 1.6099366416698232e-05, "loss": 29.9207, "step": 18920 }, { "epoch": 0.2984486346723844, "grad_norm": 112.07373894185373, "learning_rate": 1.6095400950945494e-05, "loss": 31.039, "step": 18930 }, { "epoch": 0.29860629375039416, "grad_norm": 102.98366344667379, "learning_rate": 1.6091433959456205e-05, "loss": 29.7935, "step": 18940 }, { "epoch": 0.29876395282840384, "grad_norm": 103.95551660111198, "learning_rate": 1.6087465443223345e-05, "loss": 30.192, "step": 18950 }, { "epoch": 0.2989216119064136, "grad_norm": 101.17108802944419, "learning_rate": 1.6083495403240266e-05, "loss": 30.1033, "step": 18960 }, { "epoch": 0.2990792709844233, "grad_norm": 103.35326168649229, "learning_rate": 1.607952384050071e-05, "loss": 29.9788, "step": 18970 }, { "epoch": 0.299236930062433, "grad_norm": 102.11671833255684, "learning_rate": 1.60755507559988e-05, "loss": 29.708, "step": 18980 }, { "epoch": 0.2993945891404427, "grad_norm": 101.00940587117101, "learning_rate": 1.6071576150729023e-05, "loss": 29.2173, "step": 18990 }, { "epoch": 0.29955224821845244, "grad_norm": 98.4957148846017, "learning_rate": 1.6067600025686276e-05, "loss": 29.2371, "step": 19000 }, { "epoch": 0.29970990729646213, "grad_norm": 101.1277893168839, "learning_rate": 1.6063622381865814e-05, "loss": 30.0443, "step": 19010 }, { "epoch": 0.2998675663744718, "grad_norm": 109.09183205925804, "learning_rate": 1.605964322026328e-05, "loss": 29.7463, "step": 19020 }, { "epoch": 0.30002522545248156, "grad_norm": 107.33916455807856, "learning_rate": 1.6055662541874693e-05, "loss": 30.3874, "step": 19030 }, { "epoch": 0.30018288453049125, "grad_norm": 111.56262005363511, "learning_rate": 1.605168034769646e-05, "loss": 29.7029, "step": 19040 }, { "epoch": 0.300340543608501, "grad_norm": 102.51231797841356, "learning_rate": 1.6047696638725355e-05, "loss": 29.8507, "step": 19050 }, { "epoch": 0.3004982026865107, "grad_norm": 104.0209390243304, "learning_rate": 1.604371141595854e-05, "loss": 29.3676, "step": 19060 }, { "epoch": 0.3006558617645204, "grad_norm": 105.95571005615173, "learning_rate": 1.603972468039356e-05, "loss": 30.472, "step": 19070 }, { "epoch": 0.3008135208425301, "grad_norm": 106.17663778097045, "learning_rate": 1.603573643302833e-05, "loss": 29.5257, "step": 19080 }, { "epoch": 0.30097117992053984, "grad_norm": 98.13657160803521, "learning_rate": 1.6031746674861138e-05, "loss": 30.7635, "step": 19090 }, { "epoch": 0.30112883899854953, "grad_norm": 106.07030571865624, "learning_rate": 1.602775540689067e-05, "loss": 30.6141, "step": 19100 }, { "epoch": 0.3012864980765593, "grad_norm": 100.84868543687612, "learning_rate": 1.6023762630115968e-05, "loss": 30.853, "step": 19110 }, { "epoch": 0.30144415715456896, "grad_norm": 126.70709347485054, "learning_rate": 1.6019768345536467e-05, "loss": 28.9214, "step": 19120 }, { "epoch": 0.30160181623257865, "grad_norm": 104.79416377650632, "learning_rate": 1.6015772554151973e-05, "loss": 30.3646, "step": 19130 }, { "epoch": 0.3017594753105884, "grad_norm": 106.73380166558363, "learning_rate": 1.6011775256962677e-05, "loss": 29.963, "step": 19140 }, { "epoch": 0.3019171343885981, "grad_norm": 101.28399866771589, "learning_rate": 1.6007776454969128e-05, "loss": 29.7816, "step": 19150 }, { "epoch": 0.3020747934666078, "grad_norm": 105.0229371122633, "learning_rate": 1.6003776149172275e-05, "loss": 29.4324, "step": 19160 }, { "epoch": 0.3022324525446175, "grad_norm": 110.82724004527697, "learning_rate": 1.5999774340573426e-05, "loss": 29.5452, "step": 19170 }, { "epoch": 0.30239011162262724, "grad_norm": 105.87698293006625, "learning_rate": 1.5995771030174272e-05, "loss": 29.7459, "step": 19180 }, { "epoch": 0.30254777070063693, "grad_norm": 106.38680963892757, "learning_rate": 1.599176621897688e-05, "loss": 29.5723, "step": 19190 }, { "epoch": 0.3027054297786467, "grad_norm": 190.27323622353128, "learning_rate": 1.5987759907983694e-05, "loss": 29.974, "step": 19200 }, { "epoch": 0.30286308885665636, "grad_norm": 99.96059556562662, "learning_rate": 1.5983752098197532e-05, "loss": 30.1652, "step": 19210 }, { "epoch": 0.3030207479346661, "grad_norm": 104.48769728276825, "learning_rate": 1.5979742790621587e-05, "loss": 30.0194, "step": 19220 }, { "epoch": 0.3031784070126758, "grad_norm": 107.53981874687003, "learning_rate": 1.5975731986259422e-05, "loss": 30.2748, "step": 19230 }, { "epoch": 0.3033360660906855, "grad_norm": 101.55267427842674, "learning_rate": 1.597171968611498e-05, "loss": 30.5985, "step": 19240 }, { "epoch": 0.3034937251686952, "grad_norm": 103.43705525173483, "learning_rate": 1.5967705891192586e-05, "loss": 29.5384, "step": 19250 }, { "epoch": 0.3036513842467049, "grad_norm": 107.66875002279454, "learning_rate": 1.596369060249692e-05, "loss": 30.0557, "step": 19260 }, { "epoch": 0.30380904332471464, "grad_norm": 104.50012446403548, "learning_rate": 1.5959673821033056e-05, "loss": 29.9933, "step": 19270 }, { "epoch": 0.30396670240272433, "grad_norm": 107.67389070890326, "learning_rate": 1.5955655547806426e-05, "loss": 29.4415, "step": 19280 }, { "epoch": 0.3041243614807341, "grad_norm": 99.98663196558114, "learning_rate": 1.5951635783822843e-05, "loss": 29.0952, "step": 19290 }, { "epoch": 0.30428202055874376, "grad_norm": 103.9688938521687, "learning_rate": 1.5947614530088493e-05, "loss": 32.9413, "step": 19300 }, { "epoch": 0.3044396796367535, "grad_norm": 111.26350964040994, "learning_rate": 1.594359178760993e-05, "loss": 28.8486, "step": 19310 }, { "epoch": 0.3045973387147632, "grad_norm": 98.17926728148979, "learning_rate": 1.5939567557394092e-05, "loss": 29.0966, "step": 19320 }, { "epoch": 0.30475499779277293, "grad_norm": 102.99781645931944, "learning_rate": 1.5935541840448278e-05, "loss": 29.0339, "step": 19330 }, { "epoch": 0.3049126568707826, "grad_norm": 100.85677966705016, "learning_rate": 1.5931514637780163e-05, "loss": 29.4419, "step": 19340 }, { "epoch": 0.30507031594879236, "grad_norm": 100.62071005039981, "learning_rate": 1.5927485950397793e-05, "loss": 29.8575, "step": 19350 }, { "epoch": 0.30522797502680205, "grad_norm": 98.95814685197439, "learning_rate": 1.5923455779309588e-05, "loss": 30.5297, "step": 19360 }, { "epoch": 0.30538563410481173, "grad_norm": 97.01032618228061, "learning_rate": 1.5919424125524338e-05, "loss": 29.0567, "step": 19370 }, { "epoch": 0.3055432931828215, "grad_norm": 114.09226094995785, "learning_rate": 1.5915390990051204e-05, "loss": 30.0658, "step": 19380 }, { "epoch": 0.30570095226083116, "grad_norm": 100.16885715685409, "learning_rate": 1.5911356373899714e-05, "loss": 29.6012, "step": 19390 }, { "epoch": 0.3058586113388409, "grad_norm": 107.40667981571791, "learning_rate": 1.590732027807978e-05, "loss": 30.2195, "step": 19400 }, { "epoch": 0.3060162704168506, "grad_norm": 118.88106889528119, "learning_rate": 1.5903282703601668e-05, "loss": 29.8609, "step": 19410 }, { "epoch": 0.30617392949486033, "grad_norm": 104.9831866002174, "learning_rate": 1.589924365147602e-05, "loss": 29.2712, "step": 19420 }, { "epoch": 0.30633158857287, "grad_norm": 110.7380453434704, "learning_rate": 1.5895203122713852e-05, "loss": 29.9343, "step": 19430 }, { "epoch": 0.30648924765087976, "grad_norm": 98.60377113995453, "learning_rate": 1.5891161118326547e-05, "loss": 30.0531, "step": 19440 }, { "epoch": 0.30664690672888945, "grad_norm": 113.07972343356133, "learning_rate": 1.5887117639325858e-05, "loss": 29.1605, "step": 19450 }, { "epoch": 0.3068045658068992, "grad_norm": 103.84360377177214, "learning_rate": 1.58830726867239e-05, "loss": 29.1435, "step": 19460 }, { "epoch": 0.3069622248849089, "grad_norm": 103.23299007549899, "learning_rate": 1.5879026261533165e-05, "loss": 29.213, "step": 19470 }, { "epoch": 0.30711988396291856, "grad_norm": 111.45523410551544, "learning_rate": 1.587497836476652e-05, "loss": 29.4278, "step": 19480 }, { "epoch": 0.3072775430409283, "grad_norm": 100.5783567300768, "learning_rate": 1.587092899743718e-05, "loss": 29.5198, "step": 19490 }, { "epoch": 0.307435202118938, "grad_norm": 97.15041578363571, "learning_rate": 1.5866878160558746e-05, "loss": 28.9159, "step": 19500 }, { "epoch": 0.30759286119694773, "grad_norm": 105.29431650329883, "learning_rate": 1.5862825855145183e-05, "loss": 29.8182, "step": 19510 }, { "epoch": 0.3077505202749574, "grad_norm": 101.11691561035317, "learning_rate": 1.5858772082210817e-05, "loss": 29.9243, "step": 19520 }, { "epoch": 0.30790817935296716, "grad_norm": 101.00936374669706, "learning_rate": 1.5854716842770345e-05, "loss": 28.6387, "step": 19530 }, { "epoch": 0.30806583843097685, "grad_norm": 105.22903820412743, "learning_rate": 1.5850660137838838e-05, "loss": 28.7006, "step": 19540 }, { "epoch": 0.3082234975089866, "grad_norm": 108.8900854954, "learning_rate": 1.584660196843172e-05, "loss": 28.9512, "step": 19550 }, { "epoch": 0.3083811565869963, "grad_norm": 104.26755072452885, "learning_rate": 1.5842542335564795e-05, "loss": 29.4017, "step": 19560 }, { "epoch": 0.308538815665006, "grad_norm": 104.51090042832043, "learning_rate": 1.5838481240254227e-05, "loss": 29.1209, "step": 19570 }, { "epoch": 0.3086964747430157, "grad_norm": 101.69021625017236, "learning_rate": 1.583441868351654e-05, "loss": 29.6288, "step": 19580 }, { "epoch": 0.3088541338210254, "grad_norm": 107.59345281517875, "learning_rate": 1.5830354666368637e-05, "loss": 28.9349, "step": 19590 }, { "epoch": 0.30901179289903513, "grad_norm": 111.51718226348892, "learning_rate": 1.5826289189827782e-05, "loss": 29.5444, "step": 19600 }, { "epoch": 0.3091694519770448, "grad_norm": 106.00330358553587, "learning_rate": 1.5822222254911593e-05, "loss": 28.9766, "step": 19610 }, { "epoch": 0.30932711105505456, "grad_norm": 102.09433659471048, "learning_rate": 1.5818153862638074e-05, "loss": 28.602, "step": 19620 }, { "epoch": 0.30948477013306425, "grad_norm": 112.25664693130607, "learning_rate": 1.5814084014025573e-05, "loss": 29.3348, "step": 19630 }, { "epoch": 0.309642429211074, "grad_norm": 110.81500232899579, "learning_rate": 1.581001271009281e-05, "loss": 29.4714, "step": 19640 }, { "epoch": 0.3098000882890837, "grad_norm": 103.15690323342773, "learning_rate": 1.5805939951858882e-05, "loss": 29.4771, "step": 19650 }, { "epoch": 0.3099577473670934, "grad_norm": 107.21181598891252, "learning_rate": 1.580186574034323e-05, "loss": 30.5383, "step": 19660 }, { "epoch": 0.3101154064451031, "grad_norm": 98.60259510898945, "learning_rate": 1.5797790076565672e-05, "loss": 29.1296, "step": 19670 }, { "epoch": 0.31027306552311285, "grad_norm": 101.174000222156, "learning_rate": 1.5793712961546385e-05, "loss": 28.2834, "step": 19680 }, { "epoch": 0.31043072460112253, "grad_norm": 111.76522202686674, "learning_rate": 1.57896343963059e-05, "loss": 30.2619, "step": 19690 }, { "epoch": 0.3105883836791322, "grad_norm": 103.50000259889596, "learning_rate": 1.5785554381865137e-05, "loss": 29.272, "step": 19700 }, { "epoch": 0.31074604275714196, "grad_norm": 101.1614424967166, "learning_rate": 1.578147291924535e-05, "loss": 28.8331, "step": 19710 }, { "epoch": 0.31090370183515165, "grad_norm": 100.43867455605356, "learning_rate": 1.577739000946817e-05, "loss": 29.3267, "step": 19720 }, { "epoch": 0.3110613609131614, "grad_norm": 105.27789824770659, "learning_rate": 1.577330565355559e-05, "loss": 28.579, "step": 19730 }, { "epoch": 0.3112190199911711, "grad_norm": 105.68055736004996, "learning_rate": 1.576921985252996e-05, "loss": 29.4302, "step": 19740 }, { "epoch": 0.3113766790691808, "grad_norm": 100.79731931972705, "learning_rate": 1.5765132607414e-05, "loss": 29.0404, "step": 19750 }, { "epoch": 0.3115343381471905, "grad_norm": 102.95994467504308, "learning_rate": 1.5761043919230784e-05, "loss": 29.2761, "step": 19760 }, { "epoch": 0.31169199722520025, "grad_norm": 103.0852646233178, "learning_rate": 1.5756953789003746e-05, "loss": 29.4265, "step": 19770 }, { "epoch": 0.31184965630320993, "grad_norm": 97.03848855121186, "learning_rate": 1.575286221775669e-05, "loss": 28.0791, "step": 19780 }, { "epoch": 0.3120073153812197, "grad_norm": 103.03160026820571, "learning_rate": 1.5748769206513767e-05, "loss": 29.753, "step": 19790 }, { "epoch": 0.31216497445922936, "grad_norm": 103.34625754634894, "learning_rate": 1.5744674756299504e-05, "loss": 29.1543, "step": 19800 }, { "epoch": 0.31232263353723905, "grad_norm": 102.08003881807807, "learning_rate": 1.5740578868138775e-05, "loss": 29.127, "step": 19810 }, { "epoch": 0.3124802926152488, "grad_norm": 101.97574065605183, "learning_rate": 1.5736481543056826e-05, "loss": 29.8006, "step": 19820 }, { "epoch": 0.3126379516932585, "grad_norm": 100.94780394558641, "learning_rate": 1.5732382782079252e-05, "loss": 29.7644, "step": 19830 }, { "epoch": 0.3127956107712682, "grad_norm": 108.57422068824037, "learning_rate": 1.572828258623201e-05, "loss": 29.7409, "step": 19840 }, { "epoch": 0.3129532698492779, "grad_norm": 104.5389880282125, "learning_rate": 1.5724180956541422e-05, "loss": 29.7655, "step": 19850 }, { "epoch": 0.31311092892728765, "grad_norm": 106.54071378973003, "learning_rate": 1.572007789403416e-05, "loss": 28.7867, "step": 19860 }, { "epoch": 0.31326858800529733, "grad_norm": 108.36697342340011, "learning_rate": 1.5715973399737267e-05, "loss": 30.0061, "step": 19870 }, { "epoch": 0.3134262470833071, "grad_norm": 104.05464390888955, "learning_rate": 1.5711867474678125e-05, "loss": 30.3542, "step": 19880 }, { "epoch": 0.31358390616131676, "grad_norm": 108.1681770697888, "learning_rate": 1.57077601198845e-05, "loss": 30.585, "step": 19890 }, { "epoch": 0.3137415652393265, "grad_norm": 105.41194882684877, "learning_rate": 1.570365133638449e-05, "loss": 29.6038, "step": 19900 }, { "epoch": 0.3138992243173362, "grad_norm": 96.38117034667226, "learning_rate": 1.569954112520657e-05, "loss": 28.8978, "step": 19910 }, { "epoch": 0.31405688339534593, "grad_norm": 102.73388406223056, "learning_rate": 1.569542948737956e-05, "loss": 29.4729, "step": 19920 }, { "epoch": 0.3142145424733556, "grad_norm": 102.43483484778965, "learning_rate": 1.5691316423932645e-05, "loss": 29.4798, "step": 19930 }, { "epoch": 0.3143722015513653, "grad_norm": 104.70635898293942, "learning_rate": 1.568720193589536e-05, "loss": 28.8511, "step": 19940 }, { "epoch": 0.31452986062937505, "grad_norm": 105.10857552294004, "learning_rate": 1.568308602429761e-05, "loss": 29.7898, "step": 19950 }, { "epoch": 0.31468751970738473, "grad_norm": 104.09614238898044, "learning_rate": 1.5678968690169628e-05, "loss": 28.8078, "step": 19960 }, { "epoch": 0.3148451787853945, "grad_norm": 100.84396473012973, "learning_rate": 1.567484993454204e-05, "loss": 29.5106, "step": 19970 }, { "epoch": 0.31500283786340416, "grad_norm": 100.99849242829752, "learning_rate": 1.56707297584458e-05, "loss": 29.7657, "step": 19980 }, { "epoch": 0.3151604969414139, "grad_norm": 104.1459935569806, "learning_rate": 1.566660816291223e-05, "loss": 29.1709, "step": 19990 }, { "epoch": 0.3153181560194236, "grad_norm": 101.63784812690342, "learning_rate": 1.5662485148973007e-05, "loss": 28.6342, "step": 20000 }, { "epoch": 0.31547581509743333, "grad_norm": 104.27864777263368, "learning_rate": 1.5658360717660153e-05, "loss": 29.6471, "step": 20010 }, { "epoch": 0.315633474175443, "grad_norm": 98.49160769597913, "learning_rate": 1.565423487000606e-05, "loss": 28.372, "step": 20020 }, { "epoch": 0.31579113325345276, "grad_norm": 103.19277644321052, "learning_rate": 1.5650107607043462e-05, "loss": 29.0489, "step": 20030 }, { "epoch": 0.31594879233146245, "grad_norm": 95.77757589732526, "learning_rate": 1.5645978929805453e-05, "loss": 29.4264, "step": 20040 }, { "epoch": 0.31610645140947213, "grad_norm": 103.02049260191137, "learning_rate": 1.564184883932548e-05, "loss": 29.3361, "step": 20050 }, { "epoch": 0.3162641104874819, "grad_norm": 98.81177802398511, "learning_rate": 1.5637717336637345e-05, "loss": 28.3852, "step": 20060 }, { "epoch": 0.31642176956549156, "grad_norm": 98.31638240576635, "learning_rate": 1.56335844227752e-05, "loss": 29.2982, "step": 20070 }, { "epoch": 0.3165794286435013, "grad_norm": 92.70854937465226, "learning_rate": 1.5629450098773555e-05, "loss": 28.5643, "step": 20080 }, { "epoch": 0.316737087721511, "grad_norm": 106.12525730920102, "learning_rate": 1.5625314365667267e-05, "loss": 29.5871, "step": 20090 }, { "epoch": 0.31689474679952073, "grad_norm": 101.45841726802979, "learning_rate": 1.5621177224491554e-05, "loss": 29.349, "step": 20100 }, { "epoch": 0.3170524058775304, "grad_norm": 99.19391785925538, "learning_rate": 1.5617038676281978e-05, "loss": 29.488, "step": 20110 }, { "epoch": 0.31721006495554016, "grad_norm": 103.80808663486125, "learning_rate": 1.5612898722074458e-05, "loss": 29.1559, "step": 20120 }, { "epoch": 0.31736772403354985, "grad_norm": 122.59507948673338, "learning_rate": 1.5608757362905264e-05, "loss": 30.5576, "step": 20130 }, { "epoch": 0.3175253831115596, "grad_norm": 100.98784820502595, "learning_rate": 1.5604614599811014e-05, "loss": 28.8596, "step": 20140 }, { "epoch": 0.3176830421895693, "grad_norm": 112.62675987363501, "learning_rate": 1.5600470433828685e-05, "loss": 30.7738, "step": 20150 }, { "epoch": 0.31784070126757896, "grad_norm": 103.50184044057112, "learning_rate": 1.55963248659956e-05, "loss": 28.9303, "step": 20160 }, { "epoch": 0.3179983603455887, "grad_norm": 101.21268798436097, "learning_rate": 1.5592177897349432e-05, "loss": 29.333, "step": 20170 }, { "epoch": 0.3181560194235984, "grad_norm": 100.64752941524401, "learning_rate": 1.558802952892821e-05, "loss": 29.0723, "step": 20180 }, { "epoch": 0.31831367850160813, "grad_norm": 100.93869100405148, "learning_rate": 1.5583879761770303e-05, "loss": 29.102, "step": 20190 }, { "epoch": 0.3184713375796178, "grad_norm": 104.54578063137062, "learning_rate": 1.5579728596914448e-05, "loss": 29.0437, "step": 20200 }, { "epoch": 0.31862899665762756, "grad_norm": 101.9996444207307, "learning_rate": 1.557557603539971e-05, "loss": 28.9365, "step": 20210 }, { "epoch": 0.31878665573563725, "grad_norm": 103.99905265808546, "learning_rate": 1.5571422078265524e-05, "loss": 29.4161, "step": 20220 }, { "epoch": 0.318944314813647, "grad_norm": 101.03367984088361, "learning_rate": 1.5567266726551655e-05, "loss": 29.6728, "step": 20230 }, { "epoch": 0.3191019738916567, "grad_norm": 101.82386954074393, "learning_rate": 1.556310998129823e-05, "loss": 28.7396, "step": 20240 }, { "epoch": 0.3192596329696664, "grad_norm": 105.99714258144644, "learning_rate": 1.555895184354573e-05, "loss": 28.667, "step": 20250 }, { "epoch": 0.3194172920476761, "grad_norm": 103.38612013171839, "learning_rate": 1.5554792314334964e-05, "loss": 28.7819, "step": 20260 }, { "epoch": 0.3195749511256858, "grad_norm": 106.43024066780744, "learning_rate": 1.5550631394707112e-05, "loss": 28.4805, "step": 20270 }, { "epoch": 0.31973261020369553, "grad_norm": 96.9907712991976, "learning_rate": 1.5546469085703684e-05, "loss": 29.635, "step": 20280 }, { "epoch": 0.3198902692817052, "grad_norm": 103.32460787258928, "learning_rate": 1.554230538836655e-05, "loss": 28.6731, "step": 20290 }, { "epoch": 0.32004792835971496, "grad_norm": 101.19716385839381, "learning_rate": 1.553814030373792e-05, "loss": 28.2664, "step": 20300 }, { "epoch": 0.32020558743772465, "grad_norm": 114.76789620042352, "learning_rate": 1.5533973832860355e-05, "loss": 28.8506, "step": 20310 }, { "epoch": 0.3203632465157344, "grad_norm": 107.5004223332923, "learning_rate": 1.552980597677676e-05, "loss": 28.8324, "step": 20320 }, { "epoch": 0.3205209055937441, "grad_norm": 98.33119734871677, "learning_rate": 1.5525636736530395e-05, "loss": 28.9797, "step": 20330 }, { "epoch": 0.3206785646717538, "grad_norm": 109.66316652392642, "learning_rate": 1.5521466113164853e-05, "loss": 29.435, "step": 20340 }, { "epoch": 0.3208362237497635, "grad_norm": 101.15085457847235, "learning_rate": 1.5517294107724087e-05, "loss": 28.4097, "step": 20350 }, { "epoch": 0.32099388282777325, "grad_norm": 99.74511157096451, "learning_rate": 1.551312072125238e-05, "loss": 29.0237, "step": 20360 }, { "epoch": 0.32115154190578293, "grad_norm": 111.97516197992385, "learning_rate": 1.5508945954794383e-05, "loss": 29.0862, "step": 20370 }, { "epoch": 0.3213092009837926, "grad_norm": 102.96341886060812, "learning_rate": 1.5504769809395068e-05, "loss": 29.3853, "step": 20380 }, { "epoch": 0.32146686006180236, "grad_norm": 95.94941446605816, "learning_rate": 1.550059228609977e-05, "loss": 29.0471, "step": 20390 }, { "epoch": 0.32162451913981205, "grad_norm": 104.77475090811987, "learning_rate": 1.5496413385954162e-05, "loss": 28.6556, "step": 20400 }, { "epoch": 0.3217821782178218, "grad_norm": 109.23367802788204, "learning_rate": 1.549223311000426e-05, "loss": 28.5642, "step": 20410 }, { "epoch": 0.3219398372958315, "grad_norm": 101.39725831326858, "learning_rate": 1.548805145929643e-05, "loss": 29.2745, "step": 20420 }, { "epoch": 0.3220974963738412, "grad_norm": 96.91158249722922, "learning_rate": 1.548386843487738e-05, "loss": 28.5748, "step": 20430 }, { "epoch": 0.3222551554518509, "grad_norm": 101.94773440324822, "learning_rate": 1.5479684037794152e-05, "loss": 29.4288, "step": 20440 }, { "epoch": 0.32241281452986065, "grad_norm": 95.00334653857766, "learning_rate": 1.5475498269094147e-05, "loss": 28.8309, "step": 20450 }, { "epoch": 0.32257047360787033, "grad_norm": 100.39019522937912, "learning_rate": 1.54713111298251e-05, "loss": 28.9747, "step": 20460 }, { "epoch": 0.3227281326858801, "grad_norm": 101.50267558698941, "learning_rate": 1.5467122621035097e-05, "loss": 28.9894, "step": 20470 }, { "epoch": 0.32288579176388976, "grad_norm": 102.01982744395468, "learning_rate": 1.5462932743772555e-05, "loss": 28.8642, "step": 20480 }, { "epoch": 0.3230434508418995, "grad_norm": 96.51553993909005, "learning_rate": 1.5458741499086243e-05, "loss": 28.3953, "step": 20490 }, { "epoch": 0.3232011099199092, "grad_norm": 96.78631652016954, "learning_rate": 1.5454548888025266e-05, "loss": 28.9349, "step": 20500 }, { "epoch": 0.3233587689979189, "grad_norm": 101.60715225066255, "learning_rate": 1.545035491163908e-05, "loss": 28.5357, "step": 20510 }, { "epoch": 0.3235164280759286, "grad_norm": 104.00148647391337, "learning_rate": 1.544615957097747e-05, "loss": 28.9114, "step": 20520 }, { "epoch": 0.3236740871539383, "grad_norm": 98.97669067606199, "learning_rate": 1.5441962867090574e-05, "loss": 29.3004, "step": 20530 }, { "epoch": 0.32383174623194805, "grad_norm": 107.67520876487836, "learning_rate": 1.543776480102886e-05, "loss": 29.401, "step": 20540 }, { "epoch": 0.32398940530995773, "grad_norm": 100.79869007874841, "learning_rate": 1.543356537384316e-05, "loss": 28.8814, "step": 20550 }, { "epoch": 0.3241470643879675, "grad_norm": 113.66570096918453, "learning_rate": 1.5429364586584617e-05, "loss": 28.5974, "step": 20560 }, { "epoch": 0.32430472346597716, "grad_norm": 105.20610347272455, "learning_rate": 1.5425162440304726e-05, "loss": 28.8129, "step": 20570 }, { "epoch": 0.3244623825439869, "grad_norm": 97.66931513013512, "learning_rate": 1.542095893605533e-05, "loss": 28.2835, "step": 20580 }, { "epoch": 0.3246200416219966, "grad_norm": 107.65090802605563, "learning_rate": 1.5416754074888607e-05, "loss": 28.7051, "step": 20590 }, { "epoch": 0.32477770070000633, "grad_norm": 113.09573119353908, "learning_rate": 1.5412547857857073e-05, "loss": 28.6771, "step": 20600 }, { "epoch": 0.324935359778016, "grad_norm": 106.6963257615736, "learning_rate": 1.5408340286013576e-05, "loss": 29.046, "step": 20610 }, { "epoch": 0.3250930188560257, "grad_norm": 101.71643082285316, "learning_rate": 1.5404131360411318e-05, "loss": 28.395, "step": 20620 }, { "epoch": 0.32525067793403545, "grad_norm": 109.64896084817966, "learning_rate": 1.5399921082103834e-05, "loss": 29.3959, "step": 20630 }, { "epoch": 0.32540833701204513, "grad_norm": 101.32099174690516, "learning_rate": 1.5395709452144995e-05, "loss": 29.1506, "step": 20640 }, { "epoch": 0.3255659960900549, "grad_norm": 97.90453565772397, "learning_rate": 1.5391496471589008e-05, "loss": 29.0529, "step": 20650 }, { "epoch": 0.32572365516806456, "grad_norm": 96.30320216159734, "learning_rate": 1.5387282141490428e-05, "loss": 27.973, "step": 20660 }, { "epoch": 0.3258813142460743, "grad_norm": 107.94882363292452, "learning_rate": 1.5383066462904138e-05, "loss": 29.7297, "step": 20670 }, { "epoch": 0.326038973324084, "grad_norm": 101.68085292987338, "learning_rate": 1.5378849436885364e-05, "loss": 29.2692, "step": 20680 }, { "epoch": 0.32619663240209373, "grad_norm": 99.15363305107496, "learning_rate": 1.5374631064489665e-05, "loss": 28.3932, "step": 20690 }, { "epoch": 0.3263542914801034, "grad_norm": 95.64293459795422, "learning_rate": 1.5370411346772942e-05, "loss": 29.2357, "step": 20700 }, { "epoch": 0.32651195055811316, "grad_norm": 109.86080361078344, "learning_rate": 1.536619028479143e-05, "loss": 28.6842, "step": 20710 }, { "epoch": 0.32666960963612285, "grad_norm": 103.222542486621, "learning_rate": 1.53619678796017e-05, "loss": 28.5266, "step": 20720 }, { "epoch": 0.32682726871413253, "grad_norm": 110.30020414362689, "learning_rate": 1.535774413226066e-05, "loss": 28.8886, "step": 20730 }, { "epoch": 0.3269849277921423, "grad_norm": 92.3718601869464, "learning_rate": 1.5353519043825556e-05, "loss": 27.0233, "step": 20740 }, { "epoch": 0.32714258687015196, "grad_norm": 101.85824862858358, "learning_rate": 1.5349292615353963e-05, "loss": 28.2078, "step": 20750 }, { "epoch": 0.3273002459481617, "grad_norm": 108.10823476303138, "learning_rate": 1.53450648479038e-05, "loss": 28.045, "step": 20760 }, { "epoch": 0.3274579050261714, "grad_norm": 97.73911414448376, "learning_rate": 1.5340835742533317e-05, "loss": 28.8696, "step": 20770 }, { "epoch": 0.32761556410418113, "grad_norm": 99.82302892932461, "learning_rate": 1.5336605300301096e-05, "loss": 28.4731, "step": 20780 }, { "epoch": 0.3277732231821908, "grad_norm": 106.81028031106167, "learning_rate": 1.533237352226606e-05, "loss": 28.4201, "step": 20790 }, { "epoch": 0.32793088226020056, "grad_norm": 100.54379854134113, "learning_rate": 1.5328140409487463e-05, "loss": 27.741, "step": 20800 }, { "epoch": 0.32808854133821025, "grad_norm": 109.93630886528959, "learning_rate": 1.532390596302489e-05, "loss": 29.6069, "step": 20810 }, { "epoch": 0.32824620041622, "grad_norm": 99.23240913387689, "learning_rate": 1.5319670183938266e-05, "loss": 29.3491, "step": 20820 }, { "epoch": 0.3284038594942297, "grad_norm": 102.32098104229338, "learning_rate": 1.5315433073287845e-05, "loss": 30.3579, "step": 20830 }, { "epoch": 0.32856151857223936, "grad_norm": 100.20953303986816, "learning_rate": 1.5311194632134218e-05, "loss": 28.2639, "step": 20840 }, { "epoch": 0.3287191776502491, "grad_norm": 101.94395782117718, "learning_rate": 1.5306954861538304e-05, "loss": 28.1008, "step": 20850 }, { "epoch": 0.3288768367282588, "grad_norm": 105.66302201277635, "learning_rate": 1.5302713762561364e-05, "loss": 29.2764, "step": 20860 }, { "epoch": 0.32903449580626853, "grad_norm": 109.27264601295695, "learning_rate": 1.5298471336264972e-05, "loss": 29.3463, "step": 20870 }, { "epoch": 0.3291921548842782, "grad_norm": 100.08474499167184, "learning_rate": 1.5294227583711062e-05, "loss": 28.6513, "step": 20880 }, { "epoch": 0.32934981396228796, "grad_norm": 102.98985516017878, "learning_rate": 1.528998250596188e-05, "loss": 28.9696, "step": 20890 }, { "epoch": 0.32950747304029765, "grad_norm": 119.24675747354381, "learning_rate": 1.528573610408001e-05, "loss": 29.2771, "step": 20900 }, { "epoch": 0.3296651321183074, "grad_norm": 106.46519339080497, "learning_rate": 1.528148837912836e-05, "loss": 30.1898, "step": 20910 }, { "epoch": 0.3298227911963171, "grad_norm": 96.67301165983632, "learning_rate": 1.5277239332170184e-05, "loss": 28.0617, "step": 20920 }, { "epoch": 0.3299804502743268, "grad_norm": 102.45246150107755, "learning_rate": 1.5272988964269054e-05, "loss": 28.3542, "step": 20930 }, { "epoch": 0.3301381093523365, "grad_norm": 98.63980325361608, "learning_rate": 1.5268737276488877e-05, "loss": 29.3389, "step": 20940 }, { "epoch": 0.3302957684303462, "grad_norm": 97.15385741113478, "learning_rate": 1.5264484269893895e-05, "loss": 28.6804, "step": 20950 }, { "epoch": 0.33045342750835593, "grad_norm": 106.06309053342738, "learning_rate": 1.5260229945548675e-05, "loss": 28.5091, "step": 20960 }, { "epoch": 0.3306110865863656, "grad_norm": 100.74676336507508, "learning_rate": 1.525597430451811e-05, "loss": 29.2608, "step": 20970 }, { "epoch": 0.33076874566437536, "grad_norm": 100.836770617603, "learning_rate": 1.5251717347867432e-05, "loss": 28.4178, "step": 20980 }, { "epoch": 0.33092640474238505, "grad_norm": 99.7865847497957, "learning_rate": 1.5247459076662198e-05, "loss": 28.4927, "step": 20990 }, { "epoch": 0.3310840638203948, "grad_norm": 97.32881931672478, "learning_rate": 1.524319949196829e-05, "loss": 28.7142, "step": 21000 }, { "epoch": 0.3312417228984045, "grad_norm": 103.31672738238731, "learning_rate": 1.5238938594851921e-05, "loss": 28.7675, "step": 21010 }, { "epoch": 0.3313993819764142, "grad_norm": 99.32636392160943, "learning_rate": 1.5234676386379636e-05, "loss": 28.8062, "step": 21020 }, { "epoch": 0.3315570410544239, "grad_norm": 106.91430296023054, "learning_rate": 1.523041286761831e-05, "loss": 29.0417, "step": 21030 }, { "epoch": 0.33171470013243365, "grad_norm": 102.23408002609968, "learning_rate": 1.5226148039635141e-05, "loss": 28.735, "step": 21040 }, { "epoch": 0.33187235921044334, "grad_norm": 100.60521557740731, "learning_rate": 1.5221881903497648e-05, "loss": 28.68, "step": 21050 }, { "epoch": 0.332030018288453, "grad_norm": 94.385599171222, "learning_rate": 1.5217614460273695e-05, "loss": 28.4279, "step": 21060 }, { "epoch": 0.33218767736646276, "grad_norm": 102.83908216641126, "learning_rate": 1.5213345711031457e-05, "loss": 28.5208, "step": 21070 }, { "epoch": 0.33234533644447245, "grad_norm": 103.44030092972451, "learning_rate": 1.5209075656839446e-05, "loss": 29.5744, "step": 21080 }, { "epoch": 0.3325029955224822, "grad_norm": 103.82258442412073, "learning_rate": 1.5204804298766493e-05, "loss": 28.5822, "step": 21090 }, { "epoch": 0.3326606546004919, "grad_norm": 101.65511745926871, "learning_rate": 1.5200531637881764e-05, "loss": 29.0244, "step": 21100 }, { "epoch": 0.3328183136785016, "grad_norm": 98.30079652692308, "learning_rate": 1.5196257675254745e-05, "loss": 29.0234, "step": 21110 }, { "epoch": 0.3329759727565113, "grad_norm": 114.55308272776146, "learning_rate": 1.5191982411955248e-05, "loss": 28.9247, "step": 21120 }, { "epoch": 0.33313363183452105, "grad_norm": 122.88392048468566, "learning_rate": 1.5187705849053416e-05, "loss": 29.5433, "step": 21130 }, { "epoch": 0.33329129091253074, "grad_norm": 103.76287976293898, "learning_rate": 1.5183427987619705e-05, "loss": 29.1795, "step": 21140 }, { "epoch": 0.3334489499905405, "grad_norm": 99.87848907626443, "learning_rate": 1.5179148828724911e-05, "loss": 27.3821, "step": 21150 }, { "epoch": 0.33360660906855016, "grad_norm": 108.29218143186583, "learning_rate": 1.5174868373440145e-05, "loss": 28.5104, "step": 21160 }, { "epoch": 0.3337642681465599, "grad_norm": 100.36967075825038, "learning_rate": 1.5170586622836847e-05, "loss": 28.837, "step": 21170 }, { "epoch": 0.3339219272245696, "grad_norm": 775.6653309520228, "learning_rate": 1.5166303577986781e-05, "loss": 28.0584, "step": 21180 }, { "epoch": 0.3340795863025793, "grad_norm": 101.95661292269102, "learning_rate": 1.5162019239962029e-05, "loss": 29.3755, "step": 21190 }, { "epoch": 0.334237245380589, "grad_norm": 107.12140814397995, "learning_rate": 1.5157733609835005e-05, "loss": 28.9328, "step": 21200 }, { "epoch": 0.3343949044585987, "grad_norm": 102.3836610976339, "learning_rate": 1.5153446688678444e-05, "loss": 28.8309, "step": 21210 }, { "epoch": 0.33455256353660845, "grad_norm": 101.68743838205665, "learning_rate": 1.51491584775654e-05, "loss": 28.1053, "step": 21220 }, { "epoch": 0.33471022261461814, "grad_norm": 105.0683878400836, "learning_rate": 1.5144868977569251e-05, "loss": 27.9879, "step": 21230 }, { "epoch": 0.3348678816926279, "grad_norm": 96.58095009340224, "learning_rate": 1.5140578189763703e-05, "loss": 29.351, "step": 21240 }, { "epoch": 0.33502554077063756, "grad_norm": 101.07665623247867, "learning_rate": 1.5136286115222782e-05, "loss": 28.2404, "step": 21250 }, { "epoch": 0.3351831998486473, "grad_norm": 98.07406454010432, "learning_rate": 1.5131992755020834e-05, "loss": 30.3706, "step": 21260 }, { "epoch": 0.335340858926657, "grad_norm": 98.09759048157716, "learning_rate": 1.5127698110232523e-05, "loss": 28.4313, "step": 21270 }, { "epoch": 0.33549851800466673, "grad_norm": 100.00641030250765, "learning_rate": 1.5123402181932845e-05, "loss": 28.0834, "step": 21280 }, { "epoch": 0.3356561770826764, "grad_norm": 103.13862909259004, "learning_rate": 1.5119104971197106e-05, "loss": 28.0689, "step": 21290 }, { "epoch": 0.3358138361606861, "grad_norm": 102.64294114371785, "learning_rate": 1.5114806479100947e-05, "loss": 28.3342, "step": 21300 }, { "epoch": 0.33597149523869585, "grad_norm": 98.05120541215311, "learning_rate": 1.5110506706720313e-05, "loss": 28.0865, "step": 21310 }, { "epoch": 0.33612915431670554, "grad_norm": 99.9305212192819, "learning_rate": 1.5106205655131483e-05, "loss": 28.3849, "step": 21320 }, { "epoch": 0.3362868133947153, "grad_norm": 112.01072283981203, "learning_rate": 1.5101903325411044e-05, "loss": 28.7051, "step": 21330 }, { "epoch": 0.33644447247272496, "grad_norm": 99.08691017096525, "learning_rate": 1.5097599718635919e-05, "loss": 28.5245, "step": 21340 }, { "epoch": 0.3366021315507347, "grad_norm": 104.17108109521887, "learning_rate": 1.5093294835883336e-05, "loss": 28.7685, "step": 21350 }, { "epoch": 0.3367597906287444, "grad_norm": 99.86916481103995, "learning_rate": 1.508898867823085e-05, "loss": 28.5475, "step": 21360 }, { "epoch": 0.33691744970675414, "grad_norm": 98.79332058659705, "learning_rate": 1.508468124675633e-05, "loss": 27.5138, "step": 21370 }, { "epoch": 0.3370751087847638, "grad_norm": 96.50077186466612, "learning_rate": 1.5080372542537966e-05, "loss": 28.6673, "step": 21380 }, { "epoch": 0.33723276786277356, "grad_norm": 104.2721961426429, "learning_rate": 1.5076062566654277e-05, "loss": 27.8755, "step": 21390 }, { "epoch": 0.33739042694078325, "grad_norm": 97.27234549825175, "learning_rate": 1.507175132018408e-05, "loss": 28.0945, "step": 21400 }, { "epoch": 0.33754808601879294, "grad_norm": 96.84450350149065, "learning_rate": 1.5067438804206527e-05, "loss": 27.5767, "step": 21410 }, { "epoch": 0.3377057450968027, "grad_norm": 113.54745384337008, "learning_rate": 1.5063125019801077e-05, "loss": 28.7249, "step": 21420 }, { "epoch": 0.33786340417481237, "grad_norm": 102.41004816630289, "learning_rate": 1.5058809968047515e-05, "loss": 28.6943, "step": 21430 }, { "epoch": 0.3380210632528221, "grad_norm": 101.18115369778451, "learning_rate": 1.5054493650025937e-05, "loss": 28.1616, "step": 21440 }, { "epoch": 0.3381787223308318, "grad_norm": 102.01729667739443, "learning_rate": 1.5050176066816759e-05, "loss": 28.4049, "step": 21450 }, { "epoch": 0.33833638140884154, "grad_norm": 91.18623014608131, "learning_rate": 1.5045857219500714e-05, "loss": 27.7449, "step": 21460 }, { "epoch": 0.3384940404868512, "grad_norm": 100.71447403744686, "learning_rate": 1.504153710915885e-05, "loss": 28.1482, "step": 21470 }, { "epoch": 0.33865169956486096, "grad_norm": 101.73984033311062, "learning_rate": 1.5037215736872526e-05, "loss": 28.1038, "step": 21480 }, { "epoch": 0.33880935864287065, "grad_norm": 105.34669419625263, "learning_rate": 1.5032893103723432e-05, "loss": 28.8871, "step": 21490 }, { "epoch": 0.3389670177208804, "grad_norm": 95.77781288552089, "learning_rate": 1.502856921079356e-05, "loss": 27.7304, "step": 21500 }, { "epoch": 0.3391246767988901, "grad_norm": 102.58661574911558, "learning_rate": 1.5024244059165219e-05, "loss": 27.7741, "step": 21510 }, { "epoch": 0.33928233587689977, "grad_norm": 370.00120852517216, "learning_rate": 1.5019917649921035e-05, "loss": 27.7745, "step": 21520 }, { "epoch": 0.3394399949549095, "grad_norm": 109.75582453031932, "learning_rate": 1.5015589984143955e-05, "loss": 29.1635, "step": 21530 }, { "epoch": 0.3395976540329192, "grad_norm": 179.77870338087084, "learning_rate": 1.5011261062917226e-05, "loss": 28.6474, "step": 21540 }, { "epoch": 0.33975531311092894, "grad_norm": 95.22642409956245, "learning_rate": 1.5006930887324425e-05, "loss": 27.5057, "step": 21550 }, { "epoch": 0.3399129721889386, "grad_norm": 99.61100063163636, "learning_rate": 1.5002599458449435e-05, "loss": 28.2366, "step": 21560 }, { "epoch": 0.34007063126694836, "grad_norm": 96.41814567690464, "learning_rate": 1.499826677737645e-05, "loss": 28.2609, "step": 21570 }, { "epoch": 0.34022829034495805, "grad_norm": 93.2323593069271, "learning_rate": 1.4993932845189985e-05, "loss": 27.6125, "step": 21580 }, { "epoch": 0.3403859494229678, "grad_norm": 100.20318030470878, "learning_rate": 1.4989597662974861e-05, "loss": 28.106, "step": 21590 }, { "epoch": 0.3405436085009775, "grad_norm": 105.96693961789359, "learning_rate": 1.498526123181622e-05, "loss": 28.7564, "step": 21600 }, { "epoch": 0.3407012675789872, "grad_norm": 101.65080613294805, "learning_rate": 1.4980923552799508e-05, "loss": 28.5087, "step": 21610 }, { "epoch": 0.3408589266569969, "grad_norm": 102.94268076444868, "learning_rate": 1.4976584627010487e-05, "loss": 28.0508, "step": 21620 }, { "epoch": 0.3410165857350066, "grad_norm": 99.20062963557496, "learning_rate": 1.4972244455535228e-05, "loss": 28.5171, "step": 21630 }, { "epoch": 0.34117424481301634, "grad_norm": 109.08827473925493, "learning_rate": 1.4967903039460125e-05, "loss": 28.7967, "step": 21640 }, { "epoch": 0.341331903891026, "grad_norm": 94.60882660246938, "learning_rate": 1.4963560379871874e-05, "loss": 28.0727, "step": 21650 }, { "epoch": 0.34148956296903576, "grad_norm": 98.62033520049714, "learning_rate": 1.4959216477857477e-05, "loss": 28.0252, "step": 21660 }, { "epoch": 0.34164722204704545, "grad_norm": 102.54938941944876, "learning_rate": 1.4954871334504257e-05, "loss": 28.175, "step": 21670 }, { "epoch": 0.3418048811250552, "grad_norm": 96.40350872994048, "learning_rate": 1.4950524950899847e-05, "loss": 27.9779, "step": 21680 }, { "epoch": 0.3419625402030649, "grad_norm": 97.50879134878647, "learning_rate": 1.4946177328132188e-05, "loss": 28.9777, "step": 21690 }, { "epoch": 0.3421201992810746, "grad_norm": 98.40788487846942, "learning_rate": 1.4941828467289527e-05, "loss": 28.0612, "step": 21700 }, { "epoch": 0.3422778583590843, "grad_norm": 106.02531332959772, "learning_rate": 1.493747836946043e-05, "loss": 27.9788, "step": 21710 }, { "epoch": 0.34243551743709405, "grad_norm": 99.70980599926736, "learning_rate": 1.4933127035733765e-05, "loss": 28.1714, "step": 21720 }, { "epoch": 0.34259317651510374, "grad_norm": 103.25472709137537, "learning_rate": 1.492877446719871e-05, "loss": 28.3686, "step": 21730 }, { "epoch": 0.3427508355931135, "grad_norm": 99.6913944516665, "learning_rate": 1.4924420664944759e-05, "loss": 28.4548, "step": 21740 }, { "epoch": 0.34290849467112317, "grad_norm": 102.68547152437885, "learning_rate": 1.4920065630061706e-05, "loss": 28.0643, "step": 21750 }, { "epoch": 0.34306615374913285, "grad_norm": 107.85556117071332, "learning_rate": 1.4915709363639658e-05, "loss": 27.4824, "step": 21760 }, { "epoch": 0.3432238128271426, "grad_norm": 101.04386764473485, "learning_rate": 1.4911351866769032e-05, "loss": 28.2085, "step": 21770 }, { "epoch": 0.3433814719051523, "grad_norm": 101.2835640057201, "learning_rate": 1.4906993140540546e-05, "loss": 28.2924, "step": 21780 }, { "epoch": 0.343539130983162, "grad_norm": 101.7907676766499, "learning_rate": 1.4902633186045241e-05, "loss": 28.4618, "step": 21790 }, { "epoch": 0.3436967900611717, "grad_norm": 103.40161016688867, "learning_rate": 1.4898272004374443e-05, "loss": 28.0242, "step": 21800 }, { "epoch": 0.34385444913918145, "grad_norm": 101.35229099364821, "learning_rate": 1.48939095966198e-05, "loss": 27.7285, "step": 21810 }, { "epoch": 0.34401210821719114, "grad_norm": 91.40185352723623, "learning_rate": 1.4889545963873267e-05, "loss": 27.4997, "step": 21820 }, { "epoch": 0.3441697672952009, "grad_norm": 103.67850085749487, "learning_rate": 1.4885181107227101e-05, "loss": 27.9575, "step": 21830 }, { "epoch": 0.34432742637321057, "grad_norm": 106.28659592899244, "learning_rate": 1.4880815027773867e-05, "loss": 28.7399, "step": 21840 }, { "epoch": 0.3444850854512203, "grad_norm": 97.02597172222805, "learning_rate": 1.4876447726606435e-05, "loss": 28.1261, "step": 21850 }, { "epoch": 0.34464274452923, "grad_norm": 100.98770097507159, "learning_rate": 1.4872079204817985e-05, "loss": 28.6109, "step": 21860 }, { "epoch": 0.3448004036072397, "grad_norm": 102.8241644742614, "learning_rate": 1.4867709463501995e-05, "loss": 27.7606, "step": 21870 }, { "epoch": 0.3449580626852494, "grad_norm": 104.58515245130685, "learning_rate": 1.4863338503752255e-05, "loss": 28.7743, "step": 21880 }, { "epoch": 0.3451157217632591, "grad_norm": 95.55414338994859, "learning_rate": 1.4858966326662859e-05, "loss": 27.2369, "step": 21890 }, { "epoch": 0.34527338084126885, "grad_norm": 97.10010275437635, "learning_rate": 1.48545929333282e-05, "loss": 28.6898, "step": 21900 }, { "epoch": 0.34543103991927854, "grad_norm": 94.44542461229501, "learning_rate": 1.4850218324842986e-05, "loss": 28.0067, "step": 21910 }, { "epoch": 0.3455886989972883, "grad_norm": 104.0774646677416, "learning_rate": 1.4845842502302218e-05, "loss": 27.8225, "step": 21920 }, { "epoch": 0.34574635807529797, "grad_norm": 103.47548012811723, "learning_rate": 1.4841465466801207e-05, "loss": 28.3802, "step": 21930 }, { "epoch": 0.3459040171533077, "grad_norm": 94.80495229709857, "learning_rate": 1.4837087219435566e-05, "loss": 27.0635, "step": 21940 }, { "epoch": 0.3460616762313174, "grad_norm": 100.30266111460371, "learning_rate": 1.4832707761301213e-05, "loss": 27.8321, "step": 21950 }, { "epoch": 0.34621933530932714, "grad_norm": 100.7820353662667, "learning_rate": 1.4828327093494366e-05, "loss": 28.9784, "step": 21960 }, { "epoch": 0.3463769943873368, "grad_norm": 104.87786477486372, "learning_rate": 1.4823945217111551e-05, "loss": 28.7088, "step": 21970 }, { "epoch": 0.3465346534653465, "grad_norm": 105.62790803230517, "learning_rate": 1.4819562133249586e-05, "loss": 28.096, "step": 21980 }, { "epoch": 0.34669231254335625, "grad_norm": 103.44845857734866, "learning_rate": 1.4815177843005608e-05, "loss": 28.4636, "step": 21990 }, { "epoch": 0.34684997162136594, "grad_norm": 96.60036599153106, "learning_rate": 1.4810792347477039e-05, "loss": 28.1969, "step": 22000 }, { "epoch": 0.3470076306993757, "grad_norm": 104.39086899059392, "learning_rate": 1.4806405647761614e-05, "loss": 28.693, "step": 22010 }, { "epoch": 0.34716528977738537, "grad_norm": 96.50875000710644, "learning_rate": 1.4802017744957364e-05, "loss": 27.7784, "step": 22020 }, { "epoch": 0.3473229488553951, "grad_norm": 99.87104470824553, "learning_rate": 1.4797628640162623e-05, "loss": 28.7794, "step": 22030 }, { "epoch": 0.3474806079334048, "grad_norm": 98.51725707038626, "learning_rate": 1.4793238334476023e-05, "loss": 28.0291, "step": 22040 }, { "epoch": 0.34763826701141454, "grad_norm": 92.43473594451056, "learning_rate": 1.4788846828996506e-05, "loss": 27.7541, "step": 22050 }, { "epoch": 0.3477959260894242, "grad_norm": 106.57821496769121, "learning_rate": 1.47844541248233e-05, "loss": 28.6264, "step": 22060 }, { "epoch": 0.34795358516743397, "grad_norm": 101.63118255080893, "learning_rate": 1.478006022305594e-05, "loss": 27.9168, "step": 22070 }, { "epoch": 0.34811124424544365, "grad_norm": 102.63045109503928, "learning_rate": 1.4775665124794268e-05, "loss": 28.4565, "step": 22080 }, { "epoch": 0.34826890332345334, "grad_norm": 96.31218983004212, "learning_rate": 1.4771268831138412e-05, "loss": 28.4766, "step": 22090 }, { "epoch": 0.3484265624014631, "grad_norm": 105.12764874893698, "learning_rate": 1.476687134318881e-05, "loss": 27.6555, "step": 22100 }, { "epoch": 0.34858422147947277, "grad_norm": 109.45888499906596, "learning_rate": 1.4762472662046196e-05, "loss": 28.1209, "step": 22110 }, { "epoch": 0.3487418805574825, "grad_norm": 100.6284960091106, "learning_rate": 1.4758072788811597e-05, "loss": 27.6395, "step": 22120 }, { "epoch": 0.3488995396354922, "grad_norm": 98.18641905585388, "learning_rate": 1.4753671724586345e-05, "loss": 28.2206, "step": 22130 }, { "epoch": 0.34905719871350194, "grad_norm": 96.33556446761214, "learning_rate": 1.474926947047207e-05, "loss": 27.1192, "step": 22140 }, { "epoch": 0.3492148577915116, "grad_norm": 99.05855709761087, "learning_rate": 1.4744866027570694e-05, "loss": 27.4062, "step": 22150 }, { "epoch": 0.34937251686952137, "grad_norm": 100.87591307504354, "learning_rate": 1.4740461396984443e-05, "loss": 28.289, "step": 22160 }, { "epoch": 0.34953017594753105, "grad_norm": 98.5005824361895, "learning_rate": 1.4736055579815838e-05, "loss": 27.2549, "step": 22170 }, { "epoch": 0.3496878350255408, "grad_norm": 95.76376402418548, "learning_rate": 1.4731648577167694e-05, "loss": 27.7655, "step": 22180 }, { "epoch": 0.3498454941035505, "grad_norm": 93.9974278312321, "learning_rate": 1.4727240390143131e-05, "loss": 27.5331, "step": 22190 }, { "epoch": 0.35000315318156017, "grad_norm": 102.14462191121302, "learning_rate": 1.4722831019845554e-05, "loss": 28.0492, "step": 22200 }, { "epoch": 0.3501608122595699, "grad_norm": 91.6963701350532, "learning_rate": 1.4718420467378673e-05, "loss": 27.8586, "step": 22210 }, { "epoch": 0.3503184713375796, "grad_norm": 105.51516194109664, "learning_rate": 1.4714008733846488e-05, "loss": 28.4252, "step": 22220 }, { "epoch": 0.35047613041558934, "grad_norm": 97.76404617416978, "learning_rate": 1.4709595820353302e-05, "loss": 28.0898, "step": 22230 }, { "epoch": 0.350633789493599, "grad_norm": 107.71382623174846, "learning_rate": 1.4705181728003708e-05, "loss": 28.404, "step": 22240 }, { "epoch": 0.35079144857160877, "grad_norm": 109.36570839314193, "learning_rate": 1.4700766457902593e-05, "loss": 28.0524, "step": 22250 }, { "epoch": 0.35094910764961845, "grad_norm": 96.78056278425677, "learning_rate": 1.4696350011155145e-05, "loss": 27.2718, "step": 22260 }, { "epoch": 0.3511067667276282, "grad_norm": 103.84948109024974, "learning_rate": 1.4691932388866836e-05, "loss": 28.1412, "step": 22270 }, { "epoch": 0.3512644258056379, "grad_norm": 100.24820697654425, "learning_rate": 1.4687513592143444e-05, "loss": 27.3314, "step": 22280 }, { "epoch": 0.3514220848836476, "grad_norm": 100.79040939598706, "learning_rate": 1.4683093622091033e-05, "loss": 28.23, "step": 22290 }, { "epoch": 0.3515797439616573, "grad_norm": 95.76299236445433, "learning_rate": 1.4678672479815966e-05, "loss": 28.2134, "step": 22300 }, { "epoch": 0.35173740303966705, "grad_norm": 100.51220572817864, "learning_rate": 1.4674250166424892e-05, "loss": 28.4909, "step": 22310 }, { "epoch": 0.35189506211767674, "grad_norm": 106.99737069474598, "learning_rate": 1.4669826683024764e-05, "loss": 28.4465, "step": 22320 }, { "epoch": 0.3520527211956864, "grad_norm": 102.68630882379294, "learning_rate": 1.4665402030722817e-05, "loss": 28.7704, "step": 22330 }, { "epoch": 0.35221038027369617, "grad_norm": 96.99403447676426, "learning_rate": 1.4660976210626586e-05, "loss": 27.4427, "step": 22340 }, { "epoch": 0.35236803935170585, "grad_norm": 100.89235286988004, "learning_rate": 1.4656549223843894e-05, "loss": 27.6502, "step": 22350 }, { "epoch": 0.3525256984297156, "grad_norm": 101.41025024516313, "learning_rate": 1.465212107148286e-05, "loss": 28.6151, "step": 22360 }, { "epoch": 0.3526833575077253, "grad_norm": 109.2902763841089, "learning_rate": 1.4647691754651887e-05, "loss": 28.587, "step": 22370 }, { "epoch": 0.352841016585735, "grad_norm": 93.43454238039001, "learning_rate": 1.4643261274459679e-05, "loss": 27.9031, "step": 22380 }, { "epoch": 0.3529986756637447, "grad_norm": 107.25546060152048, "learning_rate": 1.463882963201523e-05, "loss": 31.192, "step": 22390 }, { "epoch": 0.35315633474175445, "grad_norm": 94.43401190465278, "learning_rate": 1.4634396828427817e-05, "loss": 27.9255, "step": 22400 }, { "epoch": 0.35331399381976414, "grad_norm": 96.34330312998222, "learning_rate": 1.4629962864807015e-05, "loss": 27.809, "step": 22410 }, { "epoch": 0.3534716528977739, "grad_norm": 101.54493680441828, "learning_rate": 1.4625527742262686e-05, "loss": 28.8393, "step": 22420 }, { "epoch": 0.35362931197578357, "grad_norm": 99.21844088116771, "learning_rate": 1.4621091461904984e-05, "loss": 26.6941, "step": 22430 }, { "epoch": 0.35378697105379325, "grad_norm": 95.78601728989273, "learning_rate": 1.4616654024844351e-05, "loss": 27.9691, "step": 22440 }, { "epoch": 0.353944630131803, "grad_norm": 96.92273829291108, "learning_rate": 1.4612215432191526e-05, "loss": 27.6372, "step": 22450 }, { "epoch": 0.3541022892098127, "grad_norm": 107.5590517449944, "learning_rate": 1.4607775685057521e-05, "loss": 29.2334, "step": 22460 }, { "epoch": 0.3542599482878224, "grad_norm": 104.16345619370166, "learning_rate": 1.4603334784553647e-05, "loss": 27.3818, "step": 22470 }, { "epoch": 0.3544176073658321, "grad_norm": 99.71607552731022, "learning_rate": 1.4598892731791513e-05, "loss": 28.1403, "step": 22480 }, { "epoch": 0.35457526644384185, "grad_norm": 92.62315331962802, "learning_rate": 1.4594449527882997e-05, "loss": 28.0926, "step": 22490 }, { "epoch": 0.35473292552185154, "grad_norm": 98.57791558968087, "learning_rate": 1.459000517394028e-05, "loss": 27.5311, "step": 22500 }, { "epoch": 0.3548905845998613, "grad_norm": 99.43370378758291, "learning_rate": 1.4585559671075826e-05, "loss": 27.7779, "step": 22510 }, { "epoch": 0.35504824367787097, "grad_norm": 99.5919284423819, "learning_rate": 1.4581113020402385e-05, "loss": 28.3179, "step": 22520 }, { "epoch": 0.3552059027558807, "grad_norm": 106.2723853823764, "learning_rate": 1.4576665223032992e-05, "loss": 27.9438, "step": 22530 }, { "epoch": 0.3553635618338904, "grad_norm": 96.05661579419406, "learning_rate": 1.4572216280080981e-05, "loss": 28.0822, "step": 22540 }, { "epoch": 0.3555212209119001, "grad_norm": 96.6934249267142, "learning_rate": 1.456776619265996e-05, "loss": 28.0878, "step": 22550 }, { "epoch": 0.3556788799899098, "grad_norm": 97.24220818069257, "learning_rate": 1.456331496188383e-05, "loss": 28.3402, "step": 22560 }, { "epoch": 0.3558365390679195, "grad_norm": 97.94329777548364, "learning_rate": 1.4558862588866771e-05, "loss": 27.6929, "step": 22570 }, { "epoch": 0.35599419814592925, "grad_norm": 99.42129899426804, "learning_rate": 1.4554409074723262e-05, "loss": 27.8818, "step": 22580 }, { "epoch": 0.35615185722393894, "grad_norm": 102.01934349539812, "learning_rate": 1.4549954420568052e-05, "loss": 27.9306, "step": 22590 }, { "epoch": 0.3563095163019487, "grad_norm": 94.37610788258478, "learning_rate": 1.4545498627516188e-05, "loss": 27.7686, "step": 22600 }, { "epoch": 0.35646717537995837, "grad_norm": 103.79487270113528, "learning_rate": 1.4541041696682994e-05, "loss": 27.9314, "step": 22610 }, { "epoch": 0.3566248344579681, "grad_norm": 97.79312083318375, "learning_rate": 1.4536583629184082e-05, "loss": 27.9149, "step": 22620 }, { "epoch": 0.3567824935359778, "grad_norm": 102.31323828444359, "learning_rate": 1.4532124426135354e-05, "loss": 27.5011, "step": 22630 }, { "epoch": 0.35694015261398754, "grad_norm": 97.4623561603604, "learning_rate": 1.4527664088652987e-05, "loss": 28.3883, "step": 22640 }, { "epoch": 0.3570978116919972, "grad_norm": 95.98869938124268, "learning_rate": 1.4523202617853442e-05, "loss": 27.4624, "step": 22650 }, { "epoch": 0.3572554707700069, "grad_norm": 98.05880802995142, "learning_rate": 1.4518740014853475e-05, "loss": 27.8714, "step": 22660 }, { "epoch": 0.35741312984801665, "grad_norm": 92.19516411640234, "learning_rate": 1.4514276280770113e-05, "loss": 28.2924, "step": 22670 }, { "epoch": 0.35757078892602634, "grad_norm": 99.12164467412455, "learning_rate": 1.4509811416720667e-05, "loss": 27.1719, "step": 22680 }, { "epoch": 0.3577284480040361, "grad_norm": 105.78724350089023, "learning_rate": 1.4505345423822742e-05, "loss": 28.0273, "step": 22690 }, { "epoch": 0.35788610708204577, "grad_norm": 93.53165480086673, "learning_rate": 1.4500878303194217e-05, "loss": 27.5226, "step": 22700 }, { "epoch": 0.3580437661600555, "grad_norm": 94.81181618665805, "learning_rate": 1.4496410055953255e-05, "loss": 27.4608, "step": 22710 }, { "epoch": 0.3582014252380652, "grad_norm": 98.58026406101251, "learning_rate": 1.4491940683218294e-05, "loss": 27.3628, "step": 22720 }, { "epoch": 0.35835908431607494, "grad_norm": 96.90303095464755, "learning_rate": 1.4487470186108066e-05, "loss": 27.856, "step": 22730 }, { "epoch": 0.3585167433940846, "grad_norm": 109.5458103786438, "learning_rate": 1.4482998565741579e-05, "loss": 27.9466, "step": 22740 }, { "epoch": 0.35867440247209437, "grad_norm": 101.11875222312595, "learning_rate": 1.447852582323812e-05, "loss": 27.7877, "step": 22750 }, { "epoch": 0.35883206155010405, "grad_norm": 96.53668044444305, "learning_rate": 1.447405195971726e-05, "loss": 27.5866, "step": 22760 }, { "epoch": 0.35898972062811374, "grad_norm": 91.97380950272441, "learning_rate": 1.4469576976298849e-05, "loss": 26.8984, "step": 22770 }, { "epoch": 0.3591473797061235, "grad_norm": 102.66244921677495, "learning_rate": 1.4465100874103017e-05, "loss": 27.4815, "step": 22780 }, { "epoch": 0.35930503878413317, "grad_norm": 98.45843163881034, "learning_rate": 1.4460623654250174e-05, "loss": 27.3949, "step": 22790 }, { "epoch": 0.3594626978621429, "grad_norm": 114.90548722586689, "learning_rate": 1.4456145317861014e-05, "loss": 27.7659, "step": 22800 }, { "epoch": 0.3596203569401526, "grad_norm": 104.35041890860843, "learning_rate": 1.4451665866056506e-05, "loss": 27.4471, "step": 22810 }, { "epoch": 0.35977801601816234, "grad_norm": 91.02074335078169, "learning_rate": 1.4447185299957896e-05, "loss": 27.4049, "step": 22820 }, { "epoch": 0.359935675096172, "grad_norm": 99.18039059913148, "learning_rate": 1.444270362068671e-05, "loss": 27.554, "step": 22830 }, { "epoch": 0.36009333417418177, "grad_norm": 98.14768451647228, "learning_rate": 1.4438220829364766e-05, "loss": 27.6372, "step": 22840 }, { "epoch": 0.36025099325219145, "grad_norm": 94.96200241235726, "learning_rate": 1.443373692711414e-05, "loss": 27.5474, "step": 22850 }, { "epoch": 0.3604086523302012, "grad_norm": 97.43421837923644, "learning_rate": 1.4429251915057196e-05, "loss": 27.6188, "step": 22860 }, { "epoch": 0.3605663114082109, "grad_norm": 97.0101225330494, "learning_rate": 1.4424765794316575e-05, "loss": 27.5095, "step": 22870 }, { "epoch": 0.3607239704862206, "grad_norm": 96.15261548332674, "learning_rate": 1.4420278566015198e-05, "loss": 26.9594, "step": 22880 }, { "epoch": 0.3608816295642303, "grad_norm": 94.9380941471935, "learning_rate": 1.4415790231276259e-05, "loss": 27.6244, "step": 22890 }, { "epoch": 0.36103928864224, "grad_norm": 98.19055677333034, "learning_rate": 1.4411300791223231e-05, "loss": 27.7642, "step": 22900 }, { "epoch": 0.36119694772024974, "grad_norm": 93.72911649833011, "learning_rate": 1.440681024697986e-05, "loss": 26.9589, "step": 22910 }, { "epoch": 0.3613546067982594, "grad_norm": 99.91079841800403, "learning_rate": 1.440231859967018e-05, "loss": 27.9732, "step": 22920 }, { "epoch": 0.36151226587626917, "grad_norm": 102.40893657166113, "learning_rate": 1.4397825850418483e-05, "loss": 27.5988, "step": 22930 }, { "epoch": 0.36166992495427885, "grad_norm": 97.63427586672798, "learning_rate": 1.4393332000349354e-05, "loss": 27.1651, "step": 22940 }, { "epoch": 0.3618275840322886, "grad_norm": 100.27182026430424, "learning_rate": 1.4388837050587645e-05, "loss": 28.8646, "step": 22950 }, { "epoch": 0.3619852431102983, "grad_norm": 103.66140085954846, "learning_rate": 1.4384341002258482e-05, "loss": 27.659, "step": 22960 }, { "epoch": 0.362142902188308, "grad_norm": 98.91688784026277, "learning_rate": 1.437984385648727e-05, "loss": 27.4895, "step": 22970 }, { "epoch": 0.3623005612663177, "grad_norm": 91.73293683188706, "learning_rate": 1.4375345614399688e-05, "loss": 27.4901, "step": 22980 }, { "epoch": 0.36245822034432745, "grad_norm": 100.73627184376763, "learning_rate": 1.4370846277121685e-05, "loss": 27.8605, "step": 22990 }, { "epoch": 0.36261587942233714, "grad_norm": 95.69905950103472, "learning_rate": 1.436634584577949e-05, "loss": 28.1166, "step": 23000 }, { "epoch": 0.3627735385003468, "grad_norm": 98.06442846800658, "learning_rate": 1.4361844321499603e-05, "loss": 27.0556, "step": 23010 }, { "epoch": 0.36293119757835657, "grad_norm": 104.52975771271272, "learning_rate": 1.43573417054088e-05, "loss": 27.8643, "step": 23020 }, { "epoch": 0.36308885665636625, "grad_norm": 93.67590633986711, "learning_rate": 1.4352837998634125e-05, "loss": 27.4652, "step": 23030 }, { "epoch": 0.363246515734376, "grad_norm": 97.03313715929677, "learning_rate": 1.4348333202302903e-05, "loss": 28.0587, "step": 23040 }, { "epoch": 0.3634041748123857, "grad_norm": 101.50775405885356, "learning_rate": 1.4343827317542725e-05, "loss": 28.0551, "step": 23050 }, { "epoch": 0.3635618338903954, "grad_norm": 96.61682935159595, "learning_rate": 1.4339320345481456e-05, "loss": 27.1849, "step": 23060 }, { "epoch": 0.3637194929684051, "grad_norm": 100.66849007371756, "learning_rate": 1.4334812287247233e-05, "loss": 27.5434, "step": 23070 }, { "epoch": 0.36387715204641485, "grad_norm": 96.75262338177549, "learning_rate": 1.4330303143968464e-05, "loss": 27.6474, "step": 23080 }, { "epoch": 0.36403481112442454, "grad_norm": 97.67496647064674, "learning_rate": 1.4325792916773838e-05, "loss": 26.8949, "step": 23090 }, { "epoch": 0.3641924702024343, "grad_norm": 96.63146254366441, "learning_rate": 1.4321281606792302e-05, "loss": 27.2543, "step": 23100 }, { "epoch": 0.36435012928044397, "grad_norm": 94.55108290780662, "learning_rate": 1.4316769215153079e-05, "loss": 27.5807, "step": 23110 }, { "epoch": 0.36450778835845365, "grad_norm": 103.0072380822552, "learning_rate": 1.4312255742985663e-05, "loss": 27.3405, "step": 23120 }, { "epoch": 0.3646654474364634, "grad_norm": 96.52125832122053, "learning_rate": 1.4307741191419821e-05, "loss": 27.6371, "step": 23130 }, { "epoch": 0.3648231065144731, "grad_norm": 100.89941163884555, "learning_rate": 1.4303225561585587e-05, "loss": 27.5112, "step": 23140 }, { "epoch": 0.3649807655924828, "grad_norm": 101.79642638445776, "learning_rate": 1.4298708854613268e-05, "loss": 28.1634, "step": 23150 }, { "epoch": 0.3651384246704925, "grad_norm": 107.51155565702547, "learning_rate": 1.4294191071633437e-05, "loss": 27.4649, "step": 23160 }, { "epoch": 0.36529608374850225, "grad_norm": 96.65549171602049, "learning_rate": 1.4289672213776935e-05, "loss": 27.879, "step": 23170 }, { "epoch": 0.36545374282651194, "grad_norm": 100.13681233867788, "learning_rate": 1.4285152282174876e-05, "loss": 27.3245, "step": 23180 }, { "epoch": 0.3656114019045217, "grad_norm": 98.96092313778321, "learning_rate": 1.4280631277958646e-05, "loss": 28.0279, "step": 23190 }, { "epoch": 0.36576906098253137, "grad_norm": 95.50682496527281, "learning_rate": 1.427610920225989e-05, "loss": 27.5954, "step": 23200 }, { "epoch": 0.3659267200605411, "grad_norm": 94.64120595197589, "learning_rate": 1.4271586056210532e-05, "loss": 27.5467, "step": 23210 }, { "epoch": 0.3660843791385508, "grad_norm": 94.56401066235576, "learning_rate": 1.4267061840942753e-05, "loss": 26.9711, "step": 23220 }, { "epoch": 0.3662420382165605, "grad_norm": 110.71511559682159, "learning_rate": 1.4262536557589007e-05, "loss": 27.5284, "step": 23230 }, { "epoch": 0.3663996972945702, "grad_norm": 103.49365409848218, "learning_rate": 1.4258010207282019e-05, "loss": 27.7917, "step": 23240 }, { "epoch": 0.3665573563725799, "grad_norm": 94.73298250367228, "learning_rate": 1.4253482791154776e-05, "loss": 26.3338, "step": 23250 }, { "epoch": 0.36671501545058965, "grad_norm": 100.41240656701058, "learning_rate": 1.424895431034053e-05, "loss": 27.3066, "step": 23260 }, { "epoch": 0.36687267452859934, "grad_norm": 95.91163148053163, "learning_rate": 1.4244424765972805e-05, "loss": 26.9891, "step": 23270 }, { "epoch": 0.3670303336066091, "grad_norm": 99.59783642100457, "learning_rate": 1.423989415918539e-05, "loss": 27.287, "step": 23280 }, { "epoch": 0.36718799268461877, "grad_norm": 102.35985423513493, "learning_rate": 1.4235362491112337e-05, "loss": 27.4209, "step": 23290 }, { "epoch": 0.3673456517626285, "grad_norm": 96.40350035359313, "learning_rate": 1.4230829762887966e-05, "loss": 27.6141, "step": 23300 }, { "epoch": 0.3675033108406382, "grad_norm": 93.62737383857343, "learning_rate": 1.4226295975646863e-05, "loss": 27.4768, "step": 23310 }, { "epoch": 0.36766096991864794, "grad_norm": 90.04985811604658, "learning_rate": 1.4221761130523874e-05, "loss": 27.3801, "step": 23320 }, { "epoch": 0.3678186289966576, "grad_norm": 94.92879294579201, "learning_rate": 1.4217225228654117e-05, "loss": 26.6334, "step": 23330 }, { "epoch": 0.3679762880746673, "grad_norm": 98.92181811643519, "learning_rate": 1.4212688271172969e-05, "loss": 27.5553, "step": 23340 }, { "epoch": 0.36813394715267705, "grad_norm": 104.84894021496108, "learning_rate": 1.4208150259216079e-05, "loss": 28.0157, "step": 23350 }, { "epoch": 0.36829160623068674, "grad_norm": 105.93123646647899, "learning_rate": 1.420361119391935e-05, "loss": 27.5083, "step": 23360 }, { "epoch": 0.3684492653086965, "grad_norm": 98.8835816771367, "learning_rate": 1.4199071076418948e-05, "loss": 27.6404, "step": 23370 }, { "epoch": 0.36860692438670617, "grad_norm": 96.53295200846453, "learning_rate": 1.4194529907851318e-05, "loss": 27.1996, "step": 23380 }, { "epoch": 0.3687645834647159, "grad_norm": 94.8829985940188, "learning_rate": 1.4189987689353149e-05, "loss": 27.7546, "step": 23390 }, { "epoch": 0.3689222425427256, "grad_norm": 98.66020710625165, "learning_rate": 1.4185444422061405e-05, "loss": 27.073, "step": 23400 }, { "epoch": 0.36907990162073534, "grad_norm": 104.4684140957419, "learning_rate": 1.418090010711331e-05, "loss": 27.4693, "step": 23410 }, { "epoch": 0.369237560698745, "grad_norm": 94.94205839385774, "learning_rate": 1.4176354745646345e-05, "loss": 26.6544, "step": 23420 }, { "epoch": 0.36939521977675477, "grad_norm": 97.63932637905752, "learning_rate": 1.4171808338798258e-05, "loss": 27.8365, "step": 23430 }, { "epoch": 0.36955287885476445, "grad_norm": 95.34473176530281, "learning_rate": 1.4167260887707059e-05, "loss": 27.2615, "step": 23440 }, { "epoch": 0.36971053793277414, "grad_norm": 95.89664363594926, "learning_rate": 1.416271239351102e-05, "loss": 26.8513, "step": 23450 }, { "epoch": 0.3698681970107839, "grad_norm": 100.06702312045975, "learning_rate": 1.4158162857348669e-05, "loss": 26.8637, "step": 23460 }, { "epoch": 0.37002585608879357, "grad_norm": 91.17356402221098, "learning_rate": 1.4153612280358799e-05, "loss": 26.5385, "step": 23470 }, { "epoch": 0.3701835151668033, "grad_norm": 95.9185360490098, "learning_rate": 1.414906066368046e-05, "loss": 27.6397, "step": 23480 }, { "epoch": 0.370341174244813, "grad_norm": 94.36546554905199, "learning_rate": 1.414450800845297e-05, "loss": 26.9067, "step": 23490 }, { "epoch": 0.37049883332282274, "grad_norm": 102.48221203379829, "learning_rate": 1.41399543158159e-05, "loss": 27.2936, "step": 23500 }, { "epoch": 0.3706564924008324, "grad_norm": 95.2140882988385, "learning_rate": 1.4135399586909083e-05, "loss": 26.9907, "step": 23510 }, { "epoch": 0.37081415147884217, "grad_norm": 95.55542259978843, "learning_rate": 1.4130843822872604e-05, "loss": 27.2393, "step": 23520 }, { "epoch": 0.37097181055685186, "grad_norm": 97.86292995795962, "learning_rate": 1.412628702484682e-05, "loss": 27.1285, "step": 23530 }, { "epoch": 0.3711294696348616, "grad_norm": 95.33054987584427, "learning_rate": 1.412172919397234e-05, "loss": 26.7847, "step": 23540 }, { "epoch": 0.3712871287128713, "grad_norm": 98.15099092531398, "learning_rate": 1.4117170331390033e-05, "loss": 26.9996, "step": 23550 }, { "epoch": 0.371444787790881, "grad_norm": 97.34919621242514, "learning_rate": 1.4112610438241023e-05, "loss": 27.322, "step": 23560 }, { "epoch": 0.3716024468688907, "grad_norm": 102.09058230355538, "learning_rate": 1.4108049515666698e-05, "loss": 27.7599, "step": 23570 }, { "epoch": 0.3717601059469004, "grad_norm": 101.32259392329988, "learning_rate": 1.410348756480869e-05, "loss": 27.8745, "step": 23580 }, { "epoch": 0.37191776502491014, "grad_norm": 97.8555466337914, "learning_rate": 1.4098924586808914e-05, "loss": 27.2707, "step": 23590 }, { "epoch": 0.3720754241029198, "grad_norm": 95.71038547110835, "learning_rate": 1.4094360582809516e-05, "loss": 27.6567, "step": 23600 }, { "epoch": 0.37223308318092957, "grad_norm": 88.30657073781397, "learning_rate": 1.4089795553952913e-05, "loss": 26.7478, "step": 23610 }, { "epoch": 0.37239074225893926, "grad_norm": 96.54818673602945, "learning_rate": 1.4085229501381774e-05, "loss": 27.2209, "step": 23620 }, { "epoch": 0.372548401336949, "grad_norm": 97.34900129935083, "learning_rate": 1.4080662426239022e-05, "loss": 27.2966, "step": 23630 }, { "epoch": 0.3727060604149587, "grad_norm": 99.26987564890922, "learning_rate": 1.4076094329667848e-05, "loss": 26.9802, "step": 23640 }, { "epoch": 0.3728637194929684, "grad_norm": 103.51887081123776, "learning_rate": 1.4071525212811682e-05, "loss": 27.016, "step": 23650 }, { "epoch": 0.3730213785709781, "grad_norm": 97.48990606318614, "learning_rate": 1.4066955076814219e-05, "loss": 27.2903, "step": 23660 }, { "epoch": 0.37317903764898785, "grad_norm": 97.90436879603055, "learning_rate": 1.4062383922819407e-05, "loss": 26.771, "step": 23670 }, { "epoch": 0.37333669672699754, "grad_norm": 93.46354723461626, "learning_rate": 1.405781175197145e-05, "loss": 27.2073, "step": 23680 }, { "epoch": 0.3734943558050072, "grad_norm": 93.43490983884105, "learning_rate": 1.4053238565414806e-05, "loss": 26.9786, "step": 23690 }, { "epoch": 0.37365201488301697, "grad_norm": 89.5009546183088, "learning_rate": 1.4048664364294186e-05, "loss": 26.8449, "step": 23700 }, { "epoch": 0.37380967396102666, "grad_norm": 98.35076488725149, "learning_rate": 1.4044089149754556e-05, "loss": 27.5025, "step": 23710 }, { "epoch": 0.3739673330390364, "grad_norm": 100.54955924097271, "learning_rate": 1.4039512922941136e-05, "loss": 27.5068, "step": 23720 }, { "epoch": 0.3741249921170461, "grad_norm": 97.32867034643931, "learning_rate": 1.4034935684999398e-05, "loss": 27.8326, "step": 23730 }, { "epoch": 0.3742826511950558, "grad_norm": 93.94037110868054, "learning_rate": 1.4030357437075068e-05, "loss": 27.2543, "step": 23740 }, { "epoch": 0.3744403102730655, "grad_norm": 100.23594377605748, "learning_rate": 1.4025778180314126e-05, "loss": 26.7616, "step": 23750 }, { "epoch": 0.37459796935107526, "grad_norm": 106.5532296795737, "learning_rate": 1.4021197915862805e-05, "loss": 28.3971, "step": 23760 }, { "epoch": 0.37475562842908494, "grad_norm": 103.50178442125001, "learning_rate": 1.4016616644867585e-05, "loss": 26.7436, "step": 23770 }, { "epoch": 0.3749132875070947, "grad_norm": 98.55778380293809, "learning_rate": 1.40120343684752e-05, "loss": 27.2036, "step": 23780 }, { "epoch": 0.37507094658510437, "grad_norm": 97.55230055463416, "learning_rate": 1.4007451087832641e-05, "loss": 26.7494, "step": 23790 }, { "epoch": 0.37522860566311406, "grad_norm": 96.9246907778845, "learning_rate": 1.4002866804087147e-05, "loss": 27.9287, "step": 23800 }, { "epoch": 0.3753862647411238, "grad_norm": 104.20173561383939, "learning_rate": 1.3998281518386206e-05, "loss": 26.7441, "step": 23810 }, { "epoch": 0.3755439238191335, "grad_norm": 106.05035361042857, "learning_rate": 1.3993695231877555e-05, "loss": 27.1013, "step": 23820 }, { "epoch": 0.3757015828971432, "grad_norm": 96.68645335033034, "learning_rate": 1.3989107945709188e-05, "loss": 26.296, "step": 23830 }, { "epoch": 0.3758592419751529, "grad_norm": 102.2457425582185, "learning_rate": 1.398451966102935e-05, "loss": 27.0219, "step": 23840 }, { "epoch": 0.37601690105316266, "grad_norm": 93.85262155086092, "learning_rate": 1.3979930378986529e-05, "loss": 26.7741, "step": 23850 }, { "epoch": 0.37617456013117234, "grad_norm": 112.79061759860423, "learning_rate": 1.3975340100729462e-05, "loss": 28.0028, "step": 23860 }, { "epoch": 0.3763322192091821, "grad_norm": 90.39794457513074, "learning_rate": 1.3970748827407144e-05, "loss": 26.8434, "step": 23870 }, { "epoch": 0.37648987828719177, "grad_norm": 103.5873414194446, "learning_rate": 1.396615656016881e-05, "loss": 27.3872, "step": 23880 }, { "epoch": 0.3766475373652015, "grad_norm": 98.20229038352913, "learning_rate": 1.3961563300163952e-05, "loss": 27.581, "step": 23890 }, { "epoch": 0.3768051964432112, "grad_norm": 90.48543500286874, "learning_rate": 1.3956969048542309e-05, "loss": 27.1844, "step": 23900 }, { "epoch": 0.3769628555212209, "grad_norm": 115.86626708663925, "learning_rate": 1.3952373806453859e-05, "loss": 27.3881, "step": 23910 }, { "epoch": 0.3771205145992306, "grad_norm": 96.3811218156777, "learning_rate": 1.3947777575048832e-05, "loss": 28.3303, "step": 23920 }, { "epoch": 0.3772781736772403, "grad_norm": 99.54313430310967, "learning_rate": 1.394318035547772e-05, "loss": 26.311, "step": 23930 }, { "epoch": 0.37743583275525006, "grad_norm": 92.16084230569054, "learning_rate": 1.393858214889124e-05, "loss": 26.3323, "step": 23940 }, { "epoch": 0.37759349183325974, "grad_norm": 102.72553836258558, "learning_rate": 1.3933982956440373e-05, "loss": 26.9423, "step": 23950 }, { "epoch": 0.3777511509112695, "grad_norm": 93.43793770550195, "learning_rate": 1.3929382779276337e-05, "loss": 27.008, "step": 23960 }, { "epoch": 0.37790880998927917, "grad_norm": 96.13768619477909, "learning_rate": 1.39247816185506e-05, "loss": 27.0679, "step": 23970 }, { "epoch": 0.3780664690672889, "grad_norm": 98.40960039024884, "learning_rate": 1.3920179475414877e-05, "loss": 27.0182, "step": 23980 }, { "epoch": 0.3782241281452986, "grad_norm": 93.0799985874567, "learning_rate": 1.391557635102113e-05, "loss": 26.8335, "step": 23990 }, { "epoch": 0.37838178722330834, "grad_norm": 99.69608242397061, "learning_rate": 1.391097224652156e-05, "loss": 26.6477, "step": 24000 }, { "epoch": 0.378539446301318, "grad_norm": 101.787342734203, "learning_rate": 1.390636716306862e-05, "loss": 26.9377, "step": 24010 }, { "epoch": 0.3786971053793277, "grad_norm": 102.04607043479638, "learning_rate": 1.390176110181501e-05, "loss": 26.7274, "step": 24020 }, { "epoch": 0.37885476445733746, "grad_norm": 98.28064262982784, "learning_rate": 1.3897154063913664e-05, "loss": 27.3671, "step": 24030 }, { "epoch": 0.37901242353534714, "grad_norm": 93.07750871807295, "learning_rate": 1.3892546050517772e-05, "loss": 26.5832, "step": 24040 }, { "epoch": 0.3791700826133569, "grad_norm": 97.18334374500137, "learning_rate": 1.3887937062780763e-05, "loss": 26.6794, "step": 24050 }, { "epoch": 0.37932774169136657, "grad_norm": 100.0402549506052, "learning_rate": 1.3883327101856307e-05, "loss": 27.8543, "step": 24060 }, { "epoch": 0.3794854007693763, "grad_norm": 92.4967352126256, "learning_rate": 1.3878716168898322e-05, "loss": 26.045, "step": 24070 }, { "epoch": 0.379643059847386, "grad_norm": 89.42948078062766, "learning_rate": 1.3874104265060967e-05, "loss": 26.1494, "step": 24080 }, { "epoch": 0.37980071892539574, "grad_norm": 97.49351710754148, "learning_rate": 1.3869491391498652e-05, "loss": 26.2762, "step": 24090 }, { "epoch": 0.37995837800340543, "grad_norm": 91.79057217055484, "learning_rate": 1.3864877549366015e-05, "loss": 27.1042, "step": 24100 }, { "epoch": 0.38011603708141517, "grad_norm": 101.86690660182637, "learning_rate": 1.3860262739817949e-05, "loss": 27.6267, "step": 24110 }, { "epoch": 0.38027369615942486, "grad_norm": 92.54761764608801, "learning_rate": 1.3855646964009584e-05, "loss": 26.1717, "step": 24120 }, { "epoch": 0.3804313552374346, "grad_norm": 94.92321803943216, "learning_rate": 1.3851030223096288e-05, "loss": 27.3968, "step": 24130 }, { "epoch": 0.3805890143154443, "grad_norm": 92.0132533635473, "learning_rate": 1.3846412518233681e-05, "loss": 26.6894, "step": 24140 }, { "epoch": 0.38074667339345397, "grad_norm": 91.73464819756344, "learning_rate": 1.3841793850577613e-05, "loss": 26.8338, "step": 24150 }, { "epoch": 0.3809043324714637, "grad_norm": 96.95124229253258, "learning_rate": 1.3837174221284184e-05, "loss": 26.6307, "step": 24160 }, { "epoch": 0.3810619915494734, "grad_norm": 100.43930713599134, "learning_rate": 1.383255363150973e-05, "loss": 26.6284, "step": 24170 }, { "epoch": 0.38121965062748314, "grad_norm": 102.93561595963533, "learning_rate": 1.3827932082410828e-05, "loss": 26.8517, "step": 24180 }, { "epoch": 0.38137730970549283, "grad_norm": 95.2940486352579, "learning_rate": 1.3823309575144297e-05, "loss": 26.3677, "step": 24190 }, { "epoch": 0.38153496878350257, "grad_norm": 97.92557319576451, "learning_rate": 1.3818686110867189e-05, "loss": 27.0099, "step": 24200 }, { "epoch": 0.38169262786151226, "grad_norm": 112.25790968940638, "learning_rate": 1.3814061690736809e-05, "loss": 26.8579, "step": 24210 }, { "epoch": 0.381850286939522, "grad_norm": 99.7088689680714, "learning_rate": 1.3809436315910687e-05, "loss": 27.1603, "step": 24220 }, { "epoch": 0.3820079460175317, "grad_norm": 86.10735292578991, "learning_rate": 1.3804809987546597e-05, "loss": 25.9365, "step": 24230 }, { "epoch": 0.3821656050955414, "grad_norm": 90.29075385469781, "learning_rate": 1.3800182706802558e-05, "loss": 26.1101, "step": 24240 }, { "epoch": 0.3823232641735511, "grad_norm": 99.78634684212551, "learning_rate": 1.379555447483682e-05, "loss": 26.8529, "step": 24250 }, { "epoch": 0.3824809232515608, "grad_norm": 94.39377409998231, "learning_rate": 1.3790925292807872e-05, "loss": 26.57, "step": 24260 }, { "epoch": 0.38263858232957054, "grad_norm": 100.80954304664535, "learning_rate": 1.3786295161874446e-05, "loss": 26.3167, "step": 24270 }, { "epoch": 0.38279624140758023, "grad_norm": 96.544300935134, "learning_rate": 1.37816640831955e-05, "loss": 27.2726, "step": 24280 }, { "epoch": 0.38295390048558997, "grad_norm": 94.0096459801086, "learning_rate": 1.3777032057930245e-05, "loss": 26.8293, "step": 24290 }, { "epoch": 0.38311155956359966, "grad_norm": 103.49829578132098, "learning_rate": 1.377239908723812e-05, "loss": 26.633, "step": 24300 }, { "epoch": 0.3832692186416094, "grad_norm": 100.71476081973054, "learning_rate": 1.3767765172278795e-05, "loss": 26.8224, "step": 24310 }, { "epoch": 0.3834268777196191, "grad_norm": 92.79319499542358, "learning_rate": 1.3763130314212185e-05, "loss": 26.3491, "step": 24320 }, { "epoch": 0.38358453679762883, "grad_norm": 97.509139126929, "learning_rate": 1.3758494514198444e-05, "loss": 25.8945, "step": 24330 }, { "epoch": 0.3837421958756385, "grad_norm": 92.48113471438563, "learning_rate": 1.375385777339795e-05, "loss": 26.5947, "step": 24340 }, { "epoch": 0.38389985495364826, "grad_norm": 97.31737745522611, "learning_rate": 1.3749220092971327e-05, "loss": 26.4422, "step": 24350 }, { "epoch": 0.38405751403165794, "grad_norm": 101.91443397722435, "learning_rate": 1.374458147407943e-05, "loss": 26.6156, "step": 24360 }, { "epoch": 0.38421517310966763, "grad_norm": 88.39372782873909, "learning_rate": 1.3739941917883351e-05, "loss": 26.2085, "step": 24370 }, { "epoch": 0.38437283218767737, "grad_norm": 97.50523429470314, "learning_rate": 1.3735301425544408e-05, "loss": 26.6234, "step": 24380 }, { "epoch": 0.38453049126568706, "grad_norm": 92.93047269702882, "learning_rate": 1.3730659998224168e-05, "loss": 26.3151, "step": 24390 }, { "epoch": 0.3846881503436968, "grad_norm": 99.73300400259237, "learning_rate": 1.3726017637084419e-05, "loss": 27.1758, "step": 24400 }, { "epoch": 0.3848458094217065, "grad_norm": 95.74115214651552, "learning_rate": 1.3721374343287191e-05, "loss": 28.6933, "step": 24410 }, { "epoch": 0.38500346849971623, "grad_norm": 94.65994746087745, "learning_rate": 1.3716730117994741e-05, "loss": 26.6764, "step": 24420 }, { "epoch": 0.3851611275777259, "grad_norm": 100.03999988950486, "learning_rate": 1.3712084962369568e-05, "loss": 26.8002, "step": 24430 }, { "epoch": 0.38531878665573566, "grad_norm": 95.37474613317703, "learning_rate": 1.3707438877574393e-05, "loss": 25.85, "step": 24440 }, { "epoch": 0.38547644573374534, "grad_norm": 106.84076391933309, "learning_rate": 1.3702791864772176e-05, "loss": 27.9932, "step": 24450 }, { "epoch": 0.3856341048117551, "grad_norm": 90.92681036713438, "learning_rate": 1.369814392512611e-05, "loss": 26.0903, "step": 24460 }, { "epoch": 0.38579176388976477, "grad_norm": 94.9931205467173, "learning_rate": 1.369349505979962e-05, "loss": 27.1481, "step": 24470 }, { "epoch": 0.38594942296777446, "grad_norm": 101.1810995147447, "learning_rate": 1.3688845269956354e-05, "loss": 26.7407, "step": 24480 }, { "epoch": 0.3861070820457842, "grad_norm": 92.893729175208, "learning_rate": 1.3684194556760207e-05, "loss": 26.5756, "step": 24490 }, { "epoch": 0.3862647411237939, "grad_norm": 87.74532596926717, "learning_rate": 1.3679542921375291e-05, "loss": 26.238, "step": 24500 }, { "epoch": 0.38642240020180363, "grad_norm": 96.52473739395413, "learning_rate": 1.3674890364965956e-05, "loss": 26.4939, "step": 24510 }, { "epoch": 0.3865800592798133, "grad_norm": 96.54173606085834, "learning_rate": 1.3670236888696782e-05, "loss": 26.9342, "step": 24520 }, { "epoch": 0.38673771835782306, "grad_norm": 116.405364020985, "learning_rate": 1.3665582493732575e-05, "loss": 26.7262, "step": 24530 }, { "epoch": 0.38689537743583274, "grad_norm": 102.9913264997024, "learning_rate": 1.3660927181238376e-05, "loss": 27.4868, "step": 24540 }, { "epoch": 0.3870530365138425, "grad_norm": 103.58308790592817, "learning_rate": 1.3656270952379456e-05, "loss": 26.8437, "step": 24550 }, { "epoch": 0.38721069559185217, "grad_norm": 99.55365167955553, "learning_rate": 1.3651613808321313e-05, "loss": 27.3026, "step": 24560 }, { "epoch": 0.3873683546698619, "grad_norm": 91.24979923908397, "learning_rate": 1.3646955750229667e-05, "loss": 27.6315, "step": 24570 }, { "epoch": 0.3875260137478716, "grad_norm": 95.07491223397736, "learning_rate": 1.3642296779270483e-05, "loss": 26.6609, "step": 24580 }, { "epoch": 0.3876836728258813, "grad_norm": 93.15341268805795, "learning_rate": 1.363763689660994e-05, "loss": 25.9621, "step": 24590 }, { "epoch": 0.38784133190389103, "grad_norm": 85.87189215308558, "learning_rate": 1.3632976103414453e-05, "loss": 26.4946, "step": 24600 }, { "epoch": 0.3879989909819007, "grad_norm": 92.75519668234756, "learning_rate": 1.3628314400850664e-05, "loss": 26.6244, "step": 24610 }, { "epoch": 0.38815665005991046, "grad_norm": 105.12245862170262, "learning_rate": 1.3623651790085437e-05, "loss": 27.2373, "step": 24620 }, { "epoch": 0.38831430913792014, "grad_norm": 104.6734128509891, "learning_rate": 1.361898827228587e-05, "loss": 26.8626, "step": 24630 }, { "epoch": 0.3884719682159299, "grad_norm": 93.95209587728233, "learning_rate": 1.3614323848619286e-05, "loss": 27.2737, "step": 24640 }, { "epoch": 0.3886296272939396, "grad_norm": 93.37600702908023, "learning_rate": 1.3609658520253235e-05, "loss": 27.28, "step": 24650 }, { "epoch": 0.3887872863719493, "grad_norm": 91.23311542774744, "learning_rate": 1.3604992288355491e-05, "loss": 26.1805, "step": 24660 }, { "epoch": 0.388944945449959, "grad_norm": 90.81691202273426, "learning_rate": 1.360032515409406e-05, "loss": 26.2648, "step": 24670 }, { "epoch": 0.38910260452796874, "grad_norm": 92.64353078887493, "learning_rate": 1.3595657118637163e-05, "loss": 26.5544, "step": 24680 }, { "epoch": 0.38926026360597843, "grad_norm": 97.76064290822771, "learning_rate": 1.3590988183153261e-05, "loss": 26.4172, "step": 24690 }, { "epoch": 0.38941792268398817, "grad_norm": 99.82871350740646, "learning_rate": 1.3586318348811027e-05, "loss": 26.8238, "step": 24700 }, { "epoch": 0.38957558176199786, "grad_norm": 90.94625846886534, "learning_rate": 1.3581647616779367e-05, "loss": 25.5552, "step": 24710 }, { "epoch": 0.38973324084000754, "grad_norm": 94.22236044311055, "learning_rate": 1.3576975988227408e-05, "loss": 26.2773, "step": 24720 }, { "epoch": 0.3898908999180173, "grad_norm": 90.35655741162613, "learning_rate": 1.3572303464324505e-05, "loss": 27.1506, "step": 24730 }, { "epoch": 0.390048558996027, "grad_norm": 92.1106702394827, "learning_rate": 1.3567630046240234e-05, "loss": 27.0026, "step": 24740 }, { "epoch": 0.3902062180740367, "grad_norm": 94.83949944579842, "learning_rate": 1.3562955735144397e-05, "loss": 27.8982, "step": 24750 }, { "epoch": 0.3903638771520464, "grad_norm": 94.89413919759133, "learning_rate": 1.3558280532207015e-05, "loss": 26.852, "step": 24760 }, { "epoch": 0.39052153623005614, "grad_norm": 88.90146412885923, "learning_rate": 1.355360443859834e-05, "loss": 26.6215, "step": 24770 }, { "epoch": 0.39067919530806583, "grad_norm": 94.976726036835, "learning_rate": 1.3548927455488836e-05, "loss": 26.3704, "step": 24780 }, { "epoch": 0.39083685438607557, "grad_norm": 96.3799830549508, "learning_rate": 1.3544249584049204e-05, "loss": 27.4286, "step": 24790 }, { "epoch": 0.39099451346408526, "grad_norm": 95.59533238778181, "learning_rate": 1.3539570825450356e-05, "loss": 26.0222, "step": 24800 }, { "epoch": 0.391152172542095, "grad_norm": 94.30250039252141, "learning_rate": 1.3534891180863427e-05, "loss": 26.2668, "step": 24810 }, { "epoch": 0.3913098316201047, "grad_norm": 93.28706973864112, "learning_rate": 1.353021065145978e-05, "loss": 25.8266, "step": 24820 }, { "epoch": 0.3914674906981144, "grad_norm": 94.34575602008793, "learning_rate": 1.3525529238410996e-05, "loss": 26.3395, "step": 24830 }, { "epoch": 0.3916251497761241, "grad_norm": 95.92897801897799, "learning_rate": 1.3520846942888874e-05, "loss": 26.7499, "step": 24840 }, { "epoch": 0.3917828088541338, "grad_norm": 91.00797773250204, "learning_rate": 1.351616376606544e-05, "loss": 26.9672, "step": 24850 }, { "epoch": 0.39194046793214354, "grad_norm": 97.60598184465422, "learning_rate": 1.3511479709112934e-05, "loss": 27.139, "step": 24860 }, { "epoch": 0.39209812701015323, "grad_norm": 97.1188366367001, "learning_rate": 1.3506794773203822e-05, "loss": 26.4264, "step": 24870 }, { "epoch": 0.39225578608816297, "grad_norm": 91.9874264682211, "learning_rate": 1.3502108959510785e-05, "loss": 26.741, "step": 24880 }, { "epoch": 0.39241344516617266, "grad_norm": 104.07030290056154, "learning_rate": 1.3497422269206733e-05, "loss": 27.0333, "step": 24890 }, { "epoch": 0.3925711042441824, "grad_norm": 99.20036707686081, "learning_rate": 1.3492734703464783e-05, "loss": 27.3848, "step": 24900 }, { "epoch": 0.3927287633221921, "grad_norm": 89.5272958158275, "learning_rate": 1.3488046263458278e-05, "loss": 26.4096, "step": 24910 }, { "epoch": 0.39288642240020183, "grad_norm": 93.52779066680988, "learning_rate": 1.3483356950360782e-05, "loss": 25.998, "step": 24920 }, { "epoch": 0.3930440814782115, "grad_norm": 95.85637553724474, "learning_rate": 1.347866676534607e-05, "loss": 26.3432, "step": 24930 }, { "epoch": 0.3932017405562212, "grad_norm": 98.85776296044145, "learning_rate": 1.3473975709588143e-05, "loss": 25.9649, "step": 24940 }, { "epoch": 0.39335939963423094, "grad_norm": 86.70140119146063, "learning_rate": 1.3469283784261216e-05, "loss": 25.846, "step": 24950 }, { "epoch": 0.39351705871224063, "grad_norm": 102.16282262581525, "learning_rate": 1.3464590990539725e-05, "loss": 26.6511, "step": 24960 }, { "epoch": 0.3936747177902504, "grad_norm": 90.94251919599706, "learning_rate": 1.3459897329598312e-05, "loss": 26.2509, "step": 24970 }, { "epoch": 0.39383237686826006, "grad_norm": 88.27842573045663, "learning_rate": 1.3455202802611856e-05, "loss": 27.2202, "step": 24980 }, { "epoch": 0.3939900359462698, "grad_norm": 98.5836687959891, "learning_rate": 1.3450507410755431e-05, "loss": 26.9179, "step": 24990 }, { "epoch": 0.3941476950242795, "grad_norm": 93.27182315696518, "learning_rate": 1.3445811155204345e-05, "loss": 27.06, "step": 25000 }, { "epoch": 0.39430535410228923, "grad_norm": 97.68206608747775, "learning_rate": 1.3441114037134115e-05, "loss": 26.1906, "step": 25010 }, { "epoch": 0.3944630131802989, "grad_norm": 93.90817091250698, "learning_rate": 1.3436416057720472e-05, "loss": 26.2868, "step": 25020 }, { "epoch": 0.39462067225830866, "grad_norm": 96.21800358352374, "learning_rate": 1.3431717218139362e-05, "loss": 27.6044, "step": 25030 }, { "epoch": 0.39477833133631834, "grad_norm": 94.78134260900849, "learning_rate": 1.3427017519566952e-05, "loss": 26.2717, "step": 25040 }, { "epoch": 0.39493599041432803, "grad_norm": 93.00957806817384, "learning_rate": 1.3422316963179625e-05, "loss": 26.3458, "step": 25050 }, { "epoch": 0.3950936494923378, "grad_norm": 91.40927309021868, "learning_rate": 1.3417615550153969e-05, "loss": 26.3486, "step": 25060 }, { "epoch": 0.39525130857034746, "grad_norm": 95.92812953950057, "learning_rate": 1.3412913281666792e-05, "loss": 26.5867, "step": 25070 }, { "epoch": 0.3954089676483572, "grad_norm": 99.66217965891317, "learning_rate": 1.3408210158895118e-05, "loss": 25.9647, "step": 25080 }, { "epoch": 0.3955666267263669, "grad_norm": 91.82414943997271, "learning_rate": 1.3403506183016189e-05, "loss": 25.8206, "step": 25090 }, { "epoch": 0.39572428580437663, "grad_norm": 92.09650901282083, "learning_rate": 1.3398801355207445e-05, "loss": 26.7294, "step": 25100 }, { "epoch": 0.3958819448823863, "grad_norm": 91.42888452085656, "learning_rate": 1.3394095676646555e-05, "loss": 25.6534, "step": 25110 }, { "epoch": 0.39603960396039606, "grad_norm": 98.58393587805054, "learning_rate": 1.3389389148511392e-05, "loss": 25.8996, "step": 25120 }, { "epoch": 0.39619726303840574, "grad_norm": 94.09149949561687, "learning_rate": 1.3384681771980043e-05, "loss": 26.1107, "step": 25130 }, { "epoch": 0.3963549221164155, "grad_norm": 100.19108603426167, "learning_rate": 1.3379973548230813e-05, "loss": 26.9235, "step": 25140 }, { "epoch": 0.3965125811944252, "grad_norm": 90.67229270700935, "learning_rate": 1.3375264478442215e-05, "loss": 26.6202, "step": 25150 }, { "epoch": 0.39667024027243486, "grad_norm": 95.65177174826047, "learning_rate": 1.3370554563792973e-05, "loss": 26.7818, "step": 25160 }, { "epoch": 0.3968278993504446, "grad_norm": 94.39717882073113, "learning_rate": 1.336584380546202e-05, "loss": 26.7389, "step": 25170 }, { "epoch": 0.3969855584284543, "grad_norm": 90.74660155080306, "learning_rate": 1.3361132204628504e-05, "loss": 26.397, "step": 25180 }, { "epoch": 0.39714321750646403, "grad_norm": 95.59636880866377, "learning_rate": 1.3356419762471788e-05, "loss": 26.8933, "step": 25190 }, { "epoch": 0.3973008765844737, "grad_norm": 92.93392470631106, "learning_rate": 1.3351706480171439e-05, "loss": 26.751, "step": 25200 }, { "epoch": 0.39745853566248346, "grad_norm": 103.40688016410505, "learning_rate": 1.334699235890723e-05, "loss": 26.3497, "step": 25210 }, { "epoch": 0.39761619474049315, "grad_norm": 103.22624770583504, "learning_rate": 1.334227739985916e-05, "loss": 25.7909, "step": 25220 }, { "epoch": 0.3977738538185029, "grad_norm": 93.70525616861751, "learning_rate": 1.3337561604207423e-05, "loss": 26.6423, "step": 25230 }, { "epoch": 0.3979315128965126, "grad_norm": 89.60375886556274, "learning_rate": 1.3332844973132427e-05, "loss": 27.3367, "step": 25240 }, { "epoch": 0.3980891719745223, "grad_norm": 96.19639517812672, "learning_rate": 1.3328127507814788e-05, "loss": 25.6643, "step": 25250 }, { "epoch": 0.398246831052532, "grad_norm": 97.39509281699145, "learning_rate": 1.3323409209435333e-05, "loss": 26.6227, "step": 25260 }, { "epoch": 0.39840449013054174, "grad_norm": 94.3084502926077, "learning_rate": 1.3318690079175101e-05, "loss": 25.9065, "step": 25270 }, { "epoch": 0.39856214920855143, "grad_norm": 92.47710071252544, "learning_rate": 1.331397011821533e-05, "loss": 26.711, "step": 25280 }, { "epoch": 0.3987198082865611, "grad_norm": 101.02841237620306, "learning_rate": 1.3309249327737473e-05, "loss": 25.1939, "step": 25290 }, { "epoch": 0.39887746736457086, "grad_norm": 96.8004992407032, "learning_rate": 1.3304527708923187e-05, "loss": 27.7117, "step": 25300 }, { "epoch": 0.39903512644258055, "grad_norm": 92.15611583517894, "learning_rate": 1.329980526295434e-05, "loss": 26.2461, "step": 25310 }, { "epoch": 0.3991927855205903, "grad_norm": 96.24068953870211, "learning_rate": 1.3295081991013004e-05, "loss": 26.4642, "step": 25320 }, { "epoch": 0.3993504445986, "grad_norm": 93.87762460413491, "learning_rate": 1.3290357894281458e-05, "loss": 26.252, "step": 25330 }, { "epoch": 0.3995081036766097, "grad_norm": 100.18827752993317, "learning_rate": 1.3285632973942189e-05, "loss": 26.4172, "step": 25340 }, { "epoch": 0.3996657627546194, "grad_norm": 90.53415099394796, "learning_rate": 1.3280907231177888e-05, "loss": 25.2262, "step": 25350 }, { "epoch": 0.39982342183262914, "grad_norm": 96.32810188593335, "learning_rate": 1.327618066717146e-05, "loss": 26.6782, "step": 25360 }, { "epoch": 0.39998108091063883, "grad_norm": 97.62231853276926, "learning_rate": 1.3271453283105995e-05, "loss": 26.0881, "step": 25370 }, { "epoch": 0.4001387399886486, "grad_norm": 97.62419295151842, "learning_rate": 1.3266725080164812e-05, "loss": 26.5131, "step": 25380 }, { "epoch": 0.40029639906665826, "grad_norm": 94.05696280017136, "learning_rate": 1.3261996059531426e-05, "loss": 26.4726, "step": 25390 }, { "epoch": 0.40045405814466795, "grad_norm": 101.48111137714793, "learning_rate": 1.325726622238955e-05, "loss": 26.0487, "step": 25400 }, { "epoch": 0.4006117172226777, "grad_norm": 92.82149311840801, "learning_rate": 1.3252535569923111e-05, "loss": 25.6943, "step": 25410 }, { "epoch": 0.4007693763006874, "grad_norm": 91.6578729078813, "learning_rate": 1.3247804103316234e-05, "loss": 25.8898, "step": 25420 }, { "epoch": 0.4009270353786971, "grad_norm": 96.2573020944607, "learning_rate": 1.3243071823753249e-05, "loss": 25.385, "step": 25430 }, { "epoch": 0.4010846944567068, "grad_norm": 92.41602277539111, "learning_rate": 1.3238338732418692e-05, "loss": 25.6247, "step": 25440 }, { "epoch": 0.40124235353471654, "grad_norm": 100.26985243516643, "learning_rate": 1.3233604830497301e-05, "loss": 26.3488, "step": 25450 }, { "epoch": 0.40140001261272623, "grad_norm": 92.99512958125834, "learning_rate": 1.3228870119174015e-05, "loss": 26.1158, "step": 25460 }, { "epoch": 0.401557671690736, "grad_norm": 99.26931735419106, "learning_rate": 1.322413459963398e-05, "loss": 25.9819, "step": 25470 }, { "epoch": 0.40171533076874566, "grad_norm": 84.96606599111944, "learning_rate": 1.3219398273062533e-05, "loss": 25.4845, "step": 25480 }, { "epoch": 0.4018729898467554, "grad_norm": 90.93293638782275, "learning_rate": 1.3214661140645237e-05, "loss": 26.2083, "step": 25490 }, { "epoch": 0.4020306489247651, "grad_norm": 95.64928024039295, "learning_rate": 1.3209923203567826e-05, "loss": 26.0296, "step": 25500 }, { "epoch": 0.4021883080027748, "grad_norm": 88.87627547300407, "learning_rate": 1.3205184463016259e-05, "loss": 26.0269, "step": 25510 }, { "epoch": 0.4023459670807845, "grad_norm": 107.73056960972514, "learning_rate": 1.3200444920176682e-05, "loss": 27.3772, "step": 25520 }, { "epoch": 0.4025036261587942, "grad_norm": 90.96150775067372, "learning_rate": 1.3195704576235449e-05, "loss": 26.1914, "step": 25530 }, { "epoch": 0.40266128523680395, "grad_norm": 92.05222074783735, "learning_rate": 1.3190963432379117e-05, "loss": 26.089, "step": 25540 }, { "epoch": 0.40281894431481363, "grad_norm": 96.97615043479756, "learning_rate": 1.3186221489794435e-05, "loss": 25.8684, "step": 25550 }, { "epoch": 0.4029766033928234, "grad_norm": 99.24999759756287, "learning_rate": 1.3181478749668357e-05, "loss": 26.2972, "step": 25560 }, { "epoch": 0.40313426247083306, "grad_norm": 91.48875821407108, "learning_rate": 1.3176735213188036e-05, "loss": 25.4842, "step": 25570 }, { "epoch": 0.4032919215488428, "grad_norm": 91.74757280651939, "learning_rate": 1.3171990881540823e-05, "loss": 25.6361, "step": 25580 }, { "epoch": 0.4034495806268525, "grad_norm": 97.6617056778647, "learning_rate": 1.3167245755914274e-05, "loss": 25.9249, "step": 25590 }, { "epoch": 0.40360723970486223, "grad_norm": 90.5383582887212, "learning_rate": 1.3162499837496134e-05, "loss": 25.7821, "step": 25600 }, { "epoch": 0.4037648987828719, "grad_norm": 98.05048288202666, "learning_rate": 1.3157753127474352e-05, "loss": 25.3693, "step": 25610 }, { "epoch": 0.4039225578608816, "grad_norm": 95.29771560952487, "learning_rate": 1.3153005627037074e-05, "loss": 26.0262, "step": 25620 }, { "epoch": 0.40408021693889135, "grad_norm": 90.77579104299089, "learning_rate": 1.3148257337372647e-05, "loss": 25.893, "step": 25630 }, { "epoch": 0.40423787601690103, "grad_norm": 87.6942246887738, "learning_rate": 1.3143508259669611e-05, "loss": 25.5222, "step": 25640 }, { "epoch": 0.4043955350949108, "grad_norm": 92.99220210366903, "learning_rate": 1.3138758395116704e-05, "loss": 25.6849, "step": 25650 }, { "epoch": 0.40455319417292046, "grad_norm": 99.55638124925069, "learning_rate": 1.3134007744902865e-05, "loss": 26.5783, "step": 25660 }, { "epoch": 0.4047108532509302, "grad_norm": 91.62791157928419, "learning_rate": 1.3129256310217224e-05, "loss": 26.0384, "step": 25670 }, { "epoch": 0.4048685123289399, "grad_norm": 84.27876917346751, "learning_rate": 1.3124504092249108e-05, "loss": 26.0944, "step": 25680 }, { "epoch": 0.40502617140694963, "grad_norm": 97.8310681974736, "learning_rate": 1.3119751092188049e-05, "loss": 25.4322, "step": 25690 }, { "epoch": 0.4051838304849593, "grad_norm": 96.3322222137988, "learning_rate": 1.3114997311223763e-05, "loss": 25.4249, "step": 25700 }, { "epoch": 0.40534148956296906, "grad_norm": 98.98784575211225, "learning_rate": 1.3110242750546167e-05, "loss": 25.8189, "step": 25710 }, { "epoch": 0.40549914864097875, "grad_norm": 91.03999450345098, "learning_rate": 1.3105487411345374e-05, "loss": 26.2526, "step": 25720 }, { "epoch": 0.40565680771898843, "grad_norm": 92.86545494506822, "learning_rate": 1.3100731294811689e-05, "loss": 25.6049, "step": 25730 }, { "epoch": 0.4058144667969982, "grad_norm": 93.07877655287686, "learning_rate": 1.3095974402135612e-05, "loss": 26.0333, "step": 25740 }, { "epoch": 0.40597212587500786, "grad_norm": 86.98675090159757, "learning_rate": 1.3091216734507844e-05, "loss": 25.5465, "step": 25750 }, { "epoch": 0.4061297849530176, "grad_norm": 89.87204172116819, "learning_rate": 1.3086458293119266e-05, "loss": 25.8511, "step": 25760 }, { "epoch": 0.4062874440310273, "grad_norm": 97.67399944256395, "learning_rate": 1.3081699079160963e-05, "loss": 26.1027, "step": 25770 }, { "epoch": 0.40644510310903703, "grad_norm": 93.1532261583242, "learning_rate": 1.3076939093824215e-05, "loss": 26.2462, "step": 25780 }, { "epoch": 0.4066027621870467, "grad_norm": 90.08967008181726, "learning_rate": 1.3072178338300492e-05, "loss": 26.0322, "step": 25790 }, { "epoch": 0.40676042126505646, "grad_norm": 90.31283058300976, "learning_rate": 1.3067416813781451e-05, "loss": 26.3833, "step": 25800 }, { "epoch": 0.40691808034306615, "grad_norm": 94.70680675597055, "learning_rate": 1.3062654521458948e-05, "loss": 26.3604, "step": 25810 }, { "epoch": 0.4070757394210759, "grad_norm": 91.08782736643215, "learning_rate": 1.3057891462525034e-05, "loss": 25.316, "step": 25820 }, { "epoch": 0.4072333984990856, "grad_norm": 110.57405129802754, "learning_rate": 1.3053127638171944e-05, "loss": 25.1904, "step": 25830 }, { "epoch": 0.4073910575770953, "grad_norm": 95.46933349745376, "learning_rate": 1.3048363049592108e-05, "loss": 26.6786, "step": 25840 }, { "epoch": 0.407548716655105, "grad_norm": 93.56099317659417, "learning_rate": 1.304359769797815e-05, "loss": 25.757, "step": 25850 }, { "epoch": 0.4077063757331147, "grad_norm": 97.69533492943847, "learning_rate": 1.3038831584522883e-05, "loss": 26.0928, "step": 25860 }, { "epoch": 0.40786403481112443, "grad_norm": 92.97643039140448, "learning_rate": 1.3034064710419305e-05, "loss": 26.1573, "step": 25870 }, { "epoch": 0.4080216938891341, "grad_norm": 189.14997284884058, "learning_rate": 1.3029297076860616e-05, "loss": 26.1253, "step": 25880 }, { "epoch": 0.40817935296714386, "grad_norm": 86.68261233577651, "learning_rate": 1.3024528685040197e-05, "loss": 25.772, "step": 25890 }, { "epoch": 0.40833701204515355, "grad_norm": 92.13094126396591, "learning_rate": 1.3019759536151621e-05, "loss": 25.0786, "step": 25900 }, { "epoch": 0.4084946711231633, "grad_norm": 94.70482830898972, "learning_rate": 1.3014989631388652e-05, "loss": 26.0858, "step": 25910 }, { "epoch": 0.408652330201173, "grad_norm": 100.37684677779558, "learning_rate": 1.3010218971945244e-05, "loss": 26.373, "step": 25920 }, { "epoch": 0.4088099892791827, "grad_norm": 97.17987337287764, "learning_rate": 1.3005447559015532e-05, "loss": 25.7779, "step": 25930 }, { "epoch": 0.4089676483571924, "grad_norm": 92.93496841582292, "learning_rate": 1.3000675393793853e-05, "loss": 25.5459, "step": 25940 }, { "epoch": 0.40912530743520215, "grad_norm": 96.59592249259232, "learning_rate": 1.2995902477474723e-05, "loss": 26.4154, "step": 25950 }, { "epoch": 0.40928296651321183, "grad_norm": 93.4764959077194, "learning_rate": 1.2991128811252847e-05, "loss": 25.4682, "step": 25960 }, { "epoch": 0.4094406255912215, "grad_norm": 88.91353187637088, "learning_rate": 1.298635439632312e-05, "loss": 25.9893, "step": 25970 }, { "epoch": 0.40959828466923126, "grad_norm": 87.97366989361824, "learning_rate": 1.2981579233880619e-05, "loss": 25.8866, "step": 25980 }, { "epoch": 0.40975594374724095, "grad_norm": 101.9287583937747, "learning_rate": 1.297680332512062e-05, "loss": 26.6164, "step": 25990 }, { "epoch": 0.4099136028252507, "grad_norm": 92.44608008599681, "learning_rate": 1.2972026671238574e-05, "loss": 25.8617, "step": 26000 }, { "epoch": 0.4100712619032604, "grad_norm": 90.55368844470507, "learning_rate": 1.296724927343012e-05, "loss": 25.7413, "step": 26010 }, { "epoch": 0.4102289209812701, "grad_norm": 98.776116801261, "learning_rate": 1.296247113289109e-05, "loss": 25.8161, "step": 26020 }, { "epoch": 0.4103865800592798, "grad_norm": 91.4726354370747, "learning_rate": 1.2957692250817497e-05, "loss": 26.0101, "step": 26030 }, { "epoch": 0.41054423913728955, "grad_norm": 99.25834476861132, "learning_rate": 1.2952912628405541e-05, "loss": 25.7086, "step": 26040 }, { "epoch": 0.41070189821529923, "grad_norm": 92.49760890809414, "learning_rate": 1.2948132266851606e-05, "loss": 25.7047, "step": 26050 }, { "epoch": 0.410859557293309, "grad_norm": 94.94946992601898, "learning_rate": 1.2943351167352258e-05, "loss": 26.1433, "step": 26060 }, { "epoch": 0.41101721637131866, "grad_norm": 95.07934963853263, "learning_rate": 1.2938569331104258e-05, "loss": 25.2836, "step": 26070 }, { "epoch": 0.41117487544932835, "grad_norm": 96.92929829052838, "learning_rate": 1.2933786759304539e-05, "loss": 25.1783, "step": 26080 }, { "epoch": 0.4113325345273381, "grad_norm": 95.11818784307066, "learning_rate": 1.2929003453150229e-05, "loss": 26.2922, "step": 26090 }, { "epoch": 0.4114901936053478, "grad_norm": 94.23408290102823, "learning_rate": 1.292421941383863e-05, "loss": 25.5216, "step": 26100 }, { "epoch": 0.4116478526833575, "grad_norm": 89.17878821249782, "learning_rate": 1.2919434642567237e-05, "loss": 25.4154, "step": 26110 }, { "epoch": 0.4118055117613672, "grad_norm": 97.78397504316499, "learning_rate": 1.2914649140533722e-05, "loss": 25.7954, "step": 26120 }, { "epoch": 0.41196317083937695, "grad_norm": 99.93855516252366, "learning_rate": 1.2909862908935937e-05, "loss": 25.3668, "step": 26130 }, { "epoch": 0.41212082991738663, "grad_norm": 87.72262592356958, "learning_rate": 1.2905075948971929e-05, "loss": 26.3795, "step": 26140 }, { "epoch": 0.4122784889953964, "grad_norm": 91.05798440997276, "learning_rate": 1.2900288261839915e-05, "loss": 26.4931, "step": 26150 }, { "epoch": 0.41243614807340606, "grad_norm": 99.49066363188126, "learning_rate": 1.2895499848738298e-05, "loss": 26.1542, "step": 26160 }, { "epoch": 0.4125938071514158, "grad_norm": 89.11927263659481, "learning_rate": 1.2890710710865661e-05, "loss": 25.51, "step": 26170 }, { "epoch": 0.4127514662294255, "grad_norm": 97.61632370128004, "learning_rate": 1.2885920849420775e-05, "loss": 25.8259, "step": 26180 }, { "epoch": 0.4129091253074352, "grad_norm": 88.47351979754922, "learning_rate": 1.2881130265602586e-05, "loss": 25.7896, "step": 26190 }, { "epoch": 0.4130667843854449, "grad_norm": 92.78330497894376, "learning_rate": 1.2876338960610223e-05, "loss": 25.3529, "step": 26200 }, { "epoch": 0.4132244434634546, "grad_norm": 97.18844751334503, "learning_rate": 1.2871546935642993e-05, "loss": 25.4424, "step": 26210 }, { "epoch": 0.41338210254146435, "grad_norm": 93.65385222679741, "learning_rate": 1.2866754191900387e-05, "loss": 25.4433, "step": 26220 }, { "epoch": 0.41353976161947403, "grad_norm": 91.5851190153827, "learning_rate": 1.2861960730582075e-05, "loss": 25.8549, "step": 26230 }, { "epoch": 0.4136974206974838, "grad_norm": 98.94969755150477, "learning_rate": 1.2857166552887903e-05, "loss": 25.9396, "step": 26240 }, { "epoch": 0.41385507977549346, "grad_norm": 94.38956607241595, "learning_rate": 1.2852371660017901e-05, "loss": 25.6903, "step": 26250 }, { "epoch": 0.4140127388535032, "grad_norm": 90.25249980810455, "learning_rate": 1.2847576053172278e-05, "loss": 26.5845, "step": 26260 }, { "epoch": 0.4141703979315129, "grad_norm": 87.47798062256815, "learning_rate": 1.2842779733551414e-05, "loss": 25.8547, "step": 26270 }, { "epoch": 0.41432805700952263, "grad_norm": 94.0988681566339, "learning_rate": 1.2837982702355878e-05, "loss": 25.8728, "step": 26280 }, { "epoch": 0.4144857160875323, "grad_norm": 93.37397289959375, "learning_rate": 1.2833184960786412e-05, "loss": 26.2736, "step": 26290 }, { "epoch": 0.414643375165542, "grad_norm": 91.74237669183651, "learning_rate": 1.2828386510043936e-05, "loss": 26.3893, "step": 26300 }, { "epoch": 0.41480103424355175, "grad_norm": 90.2987389638625, "learning_rate": 1.2823587351329542e-05, "loss": 25.678, "step": 26310 }, { "epoch": 0.41495869332156143, "grad_norm": 93.8927005787828, "learning_rate": 1.2818787485844513e-05, "loss": 25.2163, "step": 26320 }, { "epoch": 0.4151163523995712, "grad_norm": 98.74445376896509, "learning_rate": 1.2813986914790296e-05, "loss": 25.7588, "step": 26330 }, { "epoch": 0.41527401147758086, "grad_norm": 89.15845205078068, "learning_rate": 1.280918563936852e-05, "loss": 25.76, "step": 26340 }, { "epoch": 0.4154316705555906, "grad_norm": 94.49556452048516, "learning_rate": 1.2804383660780993e-05, "loss": 26.2802, "step": 26350 }, { "epoch": 0.4155893296336003, "grad_norm": 88.72693799678106, "learning_rate": 1.2799580980229692e-05, "loss": 25.3963, "step": 26360 }, { "epoch": 0.41574698871161003, "grad_norm": 107.44107917246035, "learning_rate": 1.2794777598916772e-05, "loss": 26.0661, "step": 26370 }, { "epoch": 0.4159046477896197, "grad_norm": 88.15608827601638, "learning_rate": 1.2789973518044567e-05, "loss": 25.8725, "step": 26380 }, { "epoch": 0.41606230686762946, "grad_norm": 92.31065792460988, "learning_rate": 1.2785168738815586e-05, "loss": 26.1649, "step": 26390 }, { "epoch": 0.41621996594563915, "grad_norm": 96.85116001801748, "learning_rate": 1.2780363262432509e-05, "loss": 25.2636, "step": 26400 }, { "epoch": 0.41637762502364883, "grad_norm": 95.73732043125395, "learning_rate": 1.2775557090098194e-05, "loss": 26.1851, "step": 26410 }, { "epoch": 0.4165352841016586, "grad_norm": 93.94562030169865, "learning_rate": 1.2770750223015665e-05, "loss": 25.5725, "step": 26420 }, { "epoch": 0.41669294317966826, "grad_norm": 97.74227056359433, "learning_rate": 1.2765942662388133e-05, "loss": 26.406, "step": 26430 }, { "epoch": 0.416850602257678, "grad_norm": 95.29247663904542, "learning_rate": 1.2761134409418976e-05, "loss": 26.1151, "step": 26440 }, { "epoch": 0.4170082613356877, "grad_norm": 86.22578164682724, "learning_rate": 1.275632546531174e-05, "loss": 25.7931, "step": 26450 }, { "epoch": 0.41716592041369743, "grad_norm": 96.65723905048567, "learning_rate": 1.2751515831270153e-05, "loss": 25.0189, "step": 26460 }, { "epoch": 0.4173235794917071, "grad_norm": 107.01565176372553, "learning_rate": 1.2746705508498112e-05, "loss": 26.571, "step": 26470 }, { "epoch": 0.41748123856971686, "grad_norm": 88.83140386193087, "learning_rate": 1.2741894498199683e-05, "loss": 24.7641, "step": 26480 }, { "epoch": 0.41763889764772655, "grad_norm": 98.14995581302558, "learning_rate": 1.2737082801579112e-05, "loss": 26.5936, "step": 26490 }, { "epoch": 0.4177965567257363, "grad_norm": 99.37973057309702, "learning_rate": 1.273227041984081e-05, "loss": 25.3571, "step": 26500 }, { "epoch": 0.417954215803746, "grad_norm": 88.45856911976746, "learning_rate": 1.2727457354189362e-05, "loss": 25.6498, "step": 26510 }, { "epoch": 0.4181118748817557, "grad_norm": 93.71839632662444, "learning_rate": 1.2722643605829521e-05, "loss": 25.5306, "step": 26520 }, { "epoch": 0.4182695339597654, "grad_norm": 88.21400059750945, "learning_rate": 1.2717829175966216e-05, "loss": 25.6158, "step": 26530 }, { "epoch": 0.4184271930377751, "grad_norm": 87.22900686177415, "learning_rate": 1.271301406580455e-05, "loss": 24.9336, "step": 26540 }, { "epoch": 0.41858485211578483, "grad_norm": 93.13615135653679, "learning_rate": 1.2708198276549783e-05, "loss": 26.0338, "step": 26550 }, { "epoch": 0.4187425111937945, "grad_norm": 93.32471268713172, "learning_rate": 1.2703381809407357e-05, "loss": 25.5792, "step": 26560 }, { "epoch": 0.41890017027180426, "grad_norm": 94.4170176912389, "learning_rate": 1.2698564665582873e-05, "loss": 25.8417, "step": 26570 }, { "epoch": 0.41905782934981395, "grad_norm": 89.47555202367082, "learning_rate": 1.2693746846282111e-05, "loss": 25.5989, "step": 26580 }, { "epoch": 0.4192154884278237, "grad_norm": 86.12139413357097, "learning_rate": 1.268892835271102e-05, "loss": 25.2418, "step": 26590 }, { "epoch": 0.4193731475058334, "grad_norm": 90.79879610109649, "learning_rate": 1.2684109186075714e-05, "loss": 25.8, "step": 26600 }, { "epoch": 0.4195308065838431, "grad_norm": 91.49973243530269, "learning_rate": 1.2679289347582474e-05, "loss": 25.2806, "step": 26610 }, { "epoch": 0.4196884656618528, "grad_norm": 86.56059000029897, "learning_rate": 1.2674468838437753e-05, "loss": 25.2097, "step": 26620 }, { "epoch": 0.41984612473986255, "grad_norm": 91.9883899637137, "learning_rate": 1.2669647659848163e-05, "loss": 26.2475, "step": 26630 }, { "epoch": 0.42000378381787223, "grad_norm": 91.26559828354216, "learning_rate": 1.26648258130205e-05, "loss": 25.7104, "step": 26640 }, { "epoch": 0.4201614428958819, "grad_norm": 92.43355280470732, "learning_rate": 1.2660003299161714e-05, "loss": 25.8799, "step": 26650 }, { "epoch": 0.42031910197389166, "grad_norm": 89.98743197503462, "learning_rate": 1.2655180119478928e-05, "loss": 25.6516, "step": 26660 }, { "epoch": 0.42047676105190135, "grad_norm": 91.32156094843442, "learning_rate": 1.2650356275179425e-05, "loss": 24.8708, "step": 26670 }, { "epoch": 0.4206344201299111, "grad_norm": 95.74058297027037, "learning_rate": 1.2645531767470662e-05, "loss": 25.4792, "step": 26680 }, { "epoch": 0.4207920792079208, "grad_norm": 100.85001511638983, "learning_rate": 1.2640706597560259e-05, "loss": 25.585, "step": 26690 }, { "epoch": 0.4209497382859305, "grad_norm": 90.61990512312406, "learning_rate": 1.2635880766656001e-05, "loss": 25.4589, "step": 26700 }, { "epoch": 0.4211073973639402, "grad_norm": 93.69744841532005, "learning_rate": 1.2631054275965839e-05, "loss": 25.6687, "step": 26710 }, { "epoch": 0.42126505644194995, "grad_norm": 88.9311473084232, "learning_rate": 1.2626227126697889e-05, "loss": 25.4575, "step": 26720 }, { "epoch": 0.42142271551995963, "grad_norm": 89.59032931841767, "learning_rate": 1.2621399320060432e-05, "loss": 25.5964, "step": 26730 }, { "epoch": 0.4215803745979694, "grad_norm": 99.3914126527904, "learning_rate": 1.2616570857261915e-05, "loss": 24.9947, "step": 26740 }, { "epoch": 0.42173803367597906, "grad_norm": 91.32879605961163, "learning_rate": 1.261174173951095e-05, "loss": 25.3283, "step": 26750 }, { "epoch": 0.42189569275398875, "grad_norm": 94.4746747315746, "learning_rate": 1.2606911968016304e-05, "loss": 25.012, "step": 26760 }, { "epoch": 0.4220533518319985, "grad_norm": 91.76050666974074, "learning_rate": 1.2602081543986921e-05, "loss": 25.903, "step": 26770 }, { "epoch": 0.4222110109100082, "grad_norm": 101.0113703111381, "learning_rate": 1.2597250468631898e-05, "loss": 25.0743, "step": 26780 }, { "epoch": 0.4223686699880179, "grad_norm": 90.75956077053998, "learning_rate": 1.2592418743160502e-05, "loss": 25.5219, "step": 26790 }, { "epoch": 0.4225263290660276, "grad_norm": 92.65495265133174, "learning_rate": 1.2587586368782158e-05, "loss": 25.9311, "step": 26800 }, { "epoch": 0.42268398814403735, "grad_norm": 96.511525657345, "learning_rate": 1.2582753346706458e-05, "loss": 25.6989, "step": 26810 }, { "epoch": 0.42284164722204703, "grad_norm": 92.5118727942966, "learning_rate": 1.2577919678143145e-05, "loss": 25.3761, "step": 26820 }, { "epoch": 0.4229993063000568, "grad_norm": 95.25866285598832, "learning_rate": 1.257308536430214e-05, "loss": 25.5238, "step": 26830 }, { "epoch": 0.42315696537806646, "grad_norm": 90.16130638376694, "learning_rate": 1.2568250406393514e-05, "loss": 25.8824, "step": 26840 }, { "epoch": 0.4233146244560762, "grad_norm": 98.00029135531165, "learning_rate": 1.2563414805627506e-05, "loss": 26.0927, "step": 26850 }, { "epoch": 0.4234722835340859, "grad_norm": 98.30915097450443, "learning_rate": 1.2558578563214506e-05, "loss": 24.7692, "step": 26860 }, { "epoch": 0.4236299426120956, "grad_norm": 94.35927288054627, "learning_rate": 1.2553741680365076e-05, "loss": 25.3896, "step": 26870 }, { "epoch": 0.4237876016901053, "grad_norm": 96.27450332081995, "learning_rate": 1.2548904158289935e-05, "loss": 24.9471, "step": 26880 }, { "epoch": 0.423945260768115, "grad_norm": 83.95394905073103, "learning_rate": 1.2544065998199956e-05, "loss": 24.2869, "step": 26890 }, { "epoch": 0.42410291984612475, "grad_norm": 87.72871936919775, "learning_rate": 1.2539227201306181e-05, "loss": 25.5358, "step": 26900 }, { "epoch": 0.42426057892413443, "grad_norm": 89.88525939365381, "learning_rate": 1.2534387768819805e-05, "loss": 25.1107, "step": 26910 }, { "epoch": 0.4244182380021442, "grad_norm": 92.84666858509505, "learning_rate": 1.2529547701952183e-05, "loss": 25.538, "step": 26920 }, { "epoch": 0.42457589708015386, "grad_norm": 93.78293713159611, "learning_rate": 1.2524707001914826e-05, "loss": 24.7435, "step": 26930 }, { "epoch": 0.4247335561581636, "grad_norm": 91.63535208391887, "learning_rate": 1.2519865669919416e-05, "loss": 25.8016, "step": 26940 }, { "epoch": 0.4248912152361733, "grad_norm": 94.20928458195993, "learning_rate": 1.2515023707177778e-05, "loss": 25.593, "step": 26950 }, { "epoch": 0.42504887431418303, "grad_norm": 96.6531271752134, "learning_rate": 1.25101811149019e-05, "loss": 25.389, "step": 26960 }, { "epoch": 0.4252065333921927, "grad_norm": 97.26809377574725, "learning_rate": 1.2505337894303933e-05, "loss": 25.8241, "step": 26970 }, { "epoch": 0.4253641924702024, "grad_norm": 91.84728286973127, "learning_rate": 1.2500494046596174e-05, "loss": 26.6573, "step": 26980 }, { "epoch": 0.42552185154821215, "grad_norm": 98.95209257554174, "learning_rate": 1.2495649572991094e-05, "loss": 25.9158, "step": 26990 }, { "epoch": 0.42567951062622184, "grad_norm": 85.55103321867135, "learning_rate": 1.24908044747013e-05, "loss": 24.7039, "step": 27000 }, { "epoch": 0.4258371697042316, "grad_norm": 94.116920199962, "learning_rate": 1.2485958752939578e-05, "loss": 26.2338, "step": 27010 }, { "epoch": 0.42599482878224126, "grad_norm": 93.47864036539181, "learning_rate": 1.2481112408918848e-05, "loss": 25.0584, "step": 27020 }, { "epoch": 0.426152487860251, "grad_norm": 88.2123345454158, "learning_rate": 1.2476265443852195e-05, "loss": 25.2124, "step": 27030 }, { "epoch": 0.4263101469382607, "grad_norm": 83.7316430455713, "learning_rate": 1.247141785895287e-05, "loss": 25.341, "step": 27040 }, { "epoch": 0.42646780601627043, "grad_norm": 89.51929963584635, "learning_rate": 1.2466569655434263e-05, "loss": 25.167, "step": 27050 }, { "epoch": 0.4266254650942801, "grad_norm": 95.53344711587259, "learning_rate": 1.2461720834509924e-05, "loss": 24.5763, "step": 27060 }, { "epoch": 0.42678312417228986, "grad_norm": 87.65161104224562, "learning_rate": 1.2456871397393559e-05, "loss": 24.7354, "step": 27070 }, { "epoch": 0.42694078325029955, "grad_norm": 93.3214170201727, "learning_rate": 1.2452021345299032e-05, "loss": 25.1141, "step": 27080 }, { "epoch": 0.4270984423283093, "grad_norm": 91.12000101595038, "learning_rate": 1.2447170679440355e-05, "loss": 25.214, "step": 27090 }, { "epoch": 0.427256101406319, "grad_norm": 88.38377743235114, "learning_rate": 1.2442319401031696e-05, "loss": 25.0888, "step": 27100 }, { "epoch": 0.42741376048432866, "grad_norm": 91.13933722664255, "learning_rate": 1.2437467511287373e-05, "loss": 25.4317, "step": 27110 }, { "epoch": 0.4275714195623384, "grad_norm": 93.01739575833739, "learning_rate": 1.2432615011421863e-05, "loss": 25.8025, "step": 27120 }, { "epoch": 0.4277290786403481, "grad_norm": 94.02437253583614, "learning_rate": 1.242776190264979e-05, "loss": 24.4127, "step": 27130 }, { "epoch": 0.42788673771835783, "grad_norm": 90.00800942442298, "learning_rate": 1.2422908186185936e-05, "loss": 25.1027, "step": 27140 }, { "epoch": 0.4280443967963675, "grad_norm": 94.66874772645993, "learning_rate": 1.241805386324523e-05, "loss": 25.1106, "step": 27150 }, { "epoch": 0.42820205587437726, "grad_norm": 91.66016852395403, "learning_rate": 1.2413198935042757e-05, "loss": 25.6203, "step": 27160 }, { "epoch": 0.42835971495238695, "grad_norm": 87.03808528372527, "learning_rate": 1.240834340279375e-05, "loss": 25.2631, "step": 27170 }, { "epoch": 0.4285173740303967, "grad_norm": 87.11450284924491, "learning_rate": 1.2403487267713593e-05, "loss": 24.518, "step": 27180 }, { "epoch": 0.4286750331084064, "grad_norm": 88.29817847236411, "learning_rate": 1.2398630531017825e-05, "loss": 24.7808, "step": 27190 }, { "epoch": 0.4288326921864161, "grad_norm": 93.62285943992018, "learning_rate": 1.2393773193922135e-05, "loss": 25.4445, "step": 27200 }, { "epoch": 0.4289903512644258, "grad_norm": 88.94317359188294, "learning_rate": 1.2388915257642359e-05, "loss": 25.6997, "step": 27210 }, { "epoch": 0.4291480103424355, "grad_norm": 94.01436902883002, "learning_rate": 1.2384056723394482e-05, "loss": 25.4898, "step": 27220 }, { "epoch": 0.42930566942044524, "grad_norm": 88.48548798672866, "learning_rate": 1.237919759239464e-05, "loss": 24.5241, "step": 27230 }, { "epoch": 0.4294633284984549, "grad_norm": 94.95767268012158, "learning_rate": 1.2374337865859128e-05, "loss": 24.7219, "step": 27240 }, { "epoch": 0.42962098757646466, "grad_norm": 87.1066429930278, "learning_rate": 1.236947754500437e-05, "loss": 24.927, "step": 27250 }, { "epoch": 0.42977864665447435, "grad_norm": 101.43808445963775, "learning_rate": 1.236461663104696e-05, "loss": 25.2416, "step": 27260 }, { "epoch": 0.4299363057324841, "grad_norm": 99.64885390367617, "learning_rate": 1.2359755125203625e-05, "loss": 24.3819, "step": 27270 }, { "epoch": 0.4300939648104938, "grad_norm": 140.3790958463309, "learning_rate": 1.2354893028691246e-05, "loss": 25.0992, "step": 27280 }, { "epoch": 0.4302516238885035, "grad_norm": 85.71836235715088, "learning_rate": 1.2350030342726855e-05, "loss": 24.9466, "step": 27290 }, { "epoch": 0.4304092829665132, "grad_norm": 95.85103721780055, "learning_rate": 1.2345167068527628e-05, "loss": 25.3847, "step": 27300 }, { "epoch": 0.43056694204452295, "grad_norm": 93.52776221978728, "learning_rate": 1.2340303207310888e-05, "loss": 25.5292, "step": 27310 }, { "epoch": 0.43072460112253264, "grad_norm": 93.05296273042774, "learning_rate": 1.2335438760294103e-05, "loss": 25.0181, "step": 27320 }, { "epoch": 0.4308822602005423, "grad_norm": 96.48758784176253, "learning_rate": 1.2330573728694892e-05, "loss": 25.6062, "step": 27330 }, { "epoch": 0.43103991927855206, "grad_norm": 86.86971402327593, "learning_rate": 1.232570811373102e-05, "loss": 24.9106, "step": 27340 }, { "epoch": 0.43119757835656175, "grad_norm": 100.41320448524407, "learning_rate": 1.2320841916620396e-05, "loss": 25.1819, "step": 27350 }, { "epoch": 0.4313552374345715, "grad_norm": 90.95572896807103, "learning_rate": 1.231597513858107e-05, "loss": 24.8521, "step": 27360 }, { "epoch": 0.4315128965125812, "grad_norm": 92.04847571311089, "learning_rate": 1.2311107780831251e-05, "loss": 25.5909, "step": 27370 }, { "epoch": 0.4316705555905909, "grad_norm": 86.52281906193873, "learning_rate": 1.2306239844589277e-05, "loss": 24.6335, "step": 27380 }, { "epoch": 0.4318282146686006, "grad_norm": 90.99150611885533, "learning_rate": 1.2301371331073644e-05, "loss": 25.185, "step": 27390 }, { "epoch": 0.43198587374661035, "grad_norm": 94.68111255572809, "learning_rate": 1.2296502241502983e-05, "loss": 25.6269, "step": 27400 }, { "epoch": 0.43214353282462004, "grad_norm": 92.19051796323417, "learning_rate": 1.2291632577096078e-05, "loss": 25.2649, "step": 27410 }, { "epoch": 0.4323011919026298, "grad_norm": 94.5048661948387, "learning_rate": 1.228676233907185e-05, "loss": 25.7028, "step": 27420 }, { "epoch": 0.43245885098063946, "grad_norm": 89.43662059480506, "learning_rate": 1.2281891528649358e-05, "loss": 24.7104, "step": 27430 }, { "epoch": 0.43261651005864915, "grad_norm": 86.6665093043105, "learning_rate": 1.2277020147047828e-05, "loss": 25.0992, "step": 27440 }, { "epoch": 0.4327741691366589, "grad_norm": 93.40731295431148, "learning_rate": 1.2272148195486597e-05, "loss": 24.9002, "step": 27450 }, { "epoch": 0.4329318282146686, "grad_norm": 95.23870341078955, "learning_rate": 1.226727567518517e-05, "loss": 24.9192, "step": 27460 }, { "epoch": 0.4330894872926783, "grad_norm": 90.32563248371237, "learning_rate": 1.2262402587363185e-05, "loss": 24.4104, "step": 27470 }, { "epoch": 0.433247146370688, "grad_norm": 89.2353389198355, "learning_rate": 1.2257528933240417e-05, "loss": 25.4197, "step": 27480 }, { "epoch": 0.43340480544869775, "grad_norm": 90.33749089866001, "learning_rate": 1.2252654714036789e-05, "loss": 25.2408, "step": 27490 }, { "epoch": 0.43356246452670744, "grad_norm": 92.26141635895327, "learning_rate": 1.2247779930972368e-05, "loss": 24.9885, "step": 27500 }, { "epoch": 0.4337201236047172, "grad_norm": 89.33973216657573, "learning_rate": 1.2242904585267354e-05, "loss": 24.8904, "step": 27510 }, { "epoch": 0.43387778268272686, "grad_norm": 89.0970666104242, "learning_rate": 1.2238028678142093e-05, "loss": 25.264, "step": 27520 }, { "epoch": 0.4340354417607366, "grad_norm": 102.300773919373, "learning_rate": 1.2233152210817072e-05, "loss": 25.6051, "step": 27530 }, { "epoch": 0.4341931008387463, "grad_norm": 100.8921455316812, "learning_rate": 1.2228275184512917e-05, "loss": 25.2046, "step": 27540 }, { "epoch": 0.434350759916756, "grad_norm": 93.52356132885835, "learning_rate": 1.2223397600450393e-05, "loss": 25.2841, "step": 27550 }, { "epoch": 0.4345084189947657, "grad_norm": 90.53223104578394, "learning_rate": 1.2218519459850406e-05, "loss": 24.8908, "step": 27560 }, { "epoch": 0.4346660780727754, "grad_norm": 84.76098397442352, "learning_rate": 1.2213640763934001e-05, "loss": 25.0079, "step": 27570 }, { "epoch": 0.43482373715078515, "grad_norm": 89.68351007245982, "learning_rate": 1.2208761513922357e-05, "loss": 26.1053, "step": 27580 }, { "epoch": 0.43498139622879484, "grad_norm": 86.71943195628228, "learning_rate": 1.2203881711036807e-05, "loss": 25.0716, "step": 27590 }, { "epoch": 0.4351390553068046, "grad_norm": 87.067654221337, "learning_rate": 1.2199001356498806e-05, "loss": 24.5721, "step": 27600 }, { "epoch": 0.43529671438481427, "grad_norm": 98.36181380926391, "learning_rate": 1.219412045152995e-05, "loss": 25.1875, "step": 27610 }, { "epoch": 0.435454373462824, "grad_norm": 91.00754955440094, "learning_rate": 1.2189238997351977e-05, "loss": 24.7115, "step": 27620 }, { "epoch": 0.4356120325408337, "grad_norm": 91.76272777026443, "learning_rate": 1.2184356995186761e-05, "loss": 24.7739, "step": 27630 }, { "epoch": 0.43576969161884344, "grad_norm": 91.36627107110851, "learning_rate": 1.2179474446256318e-05, "loss": 25.3377, "step": 27640 }, { "epoch": 0.4359273506968531, "grad_norm": 99.16854564397288, "learning_rate": 1.2174591351782792e-05, "loss": 24.4979, "step": 27650 }, { "epoch": 0.43608500977486286, "grad_norm": 92.12785523592919, "learning_rate": 1.216970771298847e-05, "loss": 25.3442, "step": 27660 }, { "epoch": 0.43624266885287255, "grad_norm": 95.02366151699445, "learning_rate": 1.216482353109577e-05, "loss": 25.1541, "step": 27670 }, { "epoch": 0.43640032793088224, "grad_norm": 90.37501397507786, "learning_rate": 1.2159938807327252e-05, "loss": 25.012, "step": 27680 }, { "epoch": 0.436557987008892, "grad_norm": 91.28698229268471, "learning_rate": 1.2155053542905606e-05, "loss": 24.2891, "step": 27690 }, { "epoch": 0.43671564608690167, "grad_norm": 91.51609624058011, "learning_rate": 1.2150167739053662e-05, "loss": 25.2794, "step": 27700 }, { "epoch": 0.4368733051649114, "grad_norm": 85.03494884002585, "learning_rate": 1.2145281396994381e-05, "loss": 25.5785, "step": 27710 }, { "epoch": 0.4370309642429211, "grad_norm": 96.84763749683142, "learning_rate": 1.2140394517950864e-05, "loss": 25.9673, "step": 27720 }, { "epoch": 0.43718862332093084, "grad_norm": 95.14086271029738, "learning_rate": 1.213550710314634e-05, "loss": 24.7821, "step": 27730 }, { "epoch": 0.4373462823989405, "grad_norm": 87.95812908603828, "learning_rate": 1.2130619153804177e-05, "loss": 25.1747, "step": 27740 }, { "epoch": 0.43750394147695026, "grad_norm": 94.45357953521928, "learning_rate": 1.2125730671147872e-05, "loss": 24.7746, "step": 27750 }, { "epoch": 0.43766160055495995, "grad_norm": 89.77597364454029, "learning_rate": 1.2120841656401064e-05, "loss": 25.2031, "step": 27760 }, { "epoch": 0.4378192596329697, "grad_norm": 91.5368802384781, "learning_rate": 1.2115952110787514e-05, "loss": 25.3054, "step": 27770 }, { "epoch": 0.4379769187109794, "grad_norm": 89.63667564883654, "learning_rate": 1.2111062035531122e-05, "loss": 26.8458, "step": 27780 }, { "epoch": 0.43813457778898907, "grad_norm": 92.53483125518159, "learning_rate": 1.2106171431855925e-05, "loss": 24.2984, "step": 27790 }, { "epoch": 0.4382922368669988, "grad_norm": 86.39992603587153, "learning_rate": 1.2101280300986082e-05, "loss": 24.907, "step": 27800 }, { "epoch": 0.4384498959450085, "grad_norm": 87.85974097107407, "learning_rate": 1.2096388644145894e-05, "loss": 25.2701, "step": 27810 }, { "epoch": 0.43860755502301824, "grad_norm": 99.38565689897601, "learning_rate": 1.2091496462559785e-05, "loss": 25.1732, "step": 27820 }, { "epoch": 0.4387652141010279, "grad_norm": 96.11730051923405, "learning_rate": 1.2086603757452313e-05, "loss": 25.476, "step": 27830 }, { "epoch": 0.43892287317903766, "grad_norm": 91.86536272919496, "learning_rate": 1.2081710530048174e-05, "loss": 24.8135, "step": 27840 }, { "epoch": 0.43908053225704735, "grad_norm": 88.78792411053185, "learning_rate": 1.2076816781572188e-05, "loss": 24.2067, "step": 27850 }, { "epoch": 0.4392381913350571, "grad_norm": 91.72257007309109, "learning_rate": 1.2071922513249303e-05, "loss": 24.7256, "step": 27860 }, { "epoch": 0.4393958504130668, "grad_norm": 97.46162485485871, "learning_rate": 1.2067027726304605e-05, "loss": 24.7467, "step": 27870 }, { "epoch": 0.4395535094910765, "grad_norm": 93.28607562624074, "learning_rate": 1.2062132421963302e-05, "loss": 24.9295, "step": 27880 }, { "epoch": 0.4397111685690862, "grad_norm": 87.16629319016411, "learning_rate": 1.2057236601450737e-05, "loss": 24.8758, "step": 27890 }, { "epoch": 0.4398688276470959, "grad_norm": 89.46669933449253, "learning_rate": 1.205234026599238e-05, "loss": 24.3981, "step": 27900 }, { "epoch": 0.44002648672510564, "grad_norm": 86.71761586168215, "learning_rate": 1.204744341681383e-05, "loss": 24.5021, "step": 27910 }, { "epoch": 0.4401841458031153, "grad_norm": 97.97428507536809, "learning_rate": 1.2042546055140811e-05, "loss": 25.253, "step": 27920 }, { "epoch": 0.44034180488112507, "grad_norm": 87.71126395217638, "learning_rate": 1.2037648182199182e-05, "loss": 25.4177, "step": 27930 }, { "epoch": 0.44049946395913475, "grad_norm": 92.38656670770993, "learning_rate": 1.203274979921493e-05, "loss": 24.6915, "step": 27940 }, { "epoch": 0.4406571230371445, "grad_norm": 89.89085677201864, "learning_rate": 1.2027850907414164e-05, "loss": 24.8011, "step": 27950 }, { "epoch": 0.4408147821151542, "grad_norm": 94.13627633288087, "learning_rate": 1.2022951508023122e-05, "loss": 24.5335, "step": 27960 }, { "epoch": 0.4409724411931639, "grad_norm": 91.48374028012172, "learning_rate": 1.201805160226817e-05, "loss": 25.057, "step": 27970 }, { "epoch": 0.4411301002711736, "grad_norm": 89.90397221296023, "learning_rate": 1.2013151191375803e-05, "loss": 24.8928, "step": 27980 }, { "epoch": 0.44128775934918335, "grad_norm": 90.15009970829668, "learning_rate": 1.2008250276572637e-05, "loss": 25.1365, "step": 27990 }, { "epoch": 0.44144541842719304, "grad_norm": 85.04777112820942, "learning_rate": 1.2003348859085424e-05, "loss": 23.9604, "step": 28000 }, { "epoch": 0.4416030775052027, "grad_norm": 89.0747413152366, "learning_rate": 1.1998446940141029e-05, "loss": 25.1757, "step": 28010 }, { "epoch": 0.44176073658321247, "grad_norm": 90.26390162462138, "learning_rate": 1.1993544520966447e-05, "loss": 25.1669, "step": 28020 }, { "epoch": 0.44191839566122215, "grad_norm": 94.40387202310632, "learning_rate": 1.1988641602788803e-05, "loss": 25.2262, "step": 28030 }, { "epoch": 0.4420760547392319, "grad_norm": 84.89432518851535, "learning_rate": 1.1983738186835345e-05, "loss": 25.0142, "step": 28040 }, { "epoch": 0.4422337138172416, "grad_norm": 93.36339202616477, "learning_rate": 1.1978834274333444e-05, "loss": 24.6141, "step": 28050 }, { "epoch": 0.4423913728952513, "grad_norm": 90.06776195684536, "learning_rate": 1.1973929866510594e-05, "loss": 24.3239, "step": 28060 }, { "epoch": 0.442549031973261, "grad_norm": 90.04321151044972, "learning_rate": 1.1969024964594418e-05, "loss": 25.0647, "step": 28070 }, { "epoch": 0.44270669105127075, "grad_norm": 91.5838746272535, "learning_rate": 1.1964119569812653e-05, "loss": 24.8412, "step": 28080 }, { "epoch": 0.44286435012928044, "grad_norm": 91.49467320802506, "learning_rate": 1.1959213683393171e-05, "loss": 23.6584, "step": 28090 }, { "epoch": 0.4430220092072902, "grad_norm": 86.34106535740538, "learning_rate": 1.1954307306563964e-05, "loss": 24.2555, "step": 28100 }, { "epoch": 0.44317966828529987, "grad_norm": 95.24486821019077, "learning_rate": 1.1949400440553138e-05, "loss": 24.7128, "step": 28110 }, { "epoch": 0.44333732736330955, "grad_norm": 94.14952708653433, "learning_rate": 1.1944493086588934e-05, "loss": 24.8876, "step": 28120 }, { "epoch": 0.4434949864413193, "grad_norm": 86.06692911590439, "learning_rate": 1.1939585245899703e-05, "loss": 24.5761, "step": 28130 }, { "epoch": 0.443652645519329, "grad_norm": 86.83158046307916, "learning_rate": 1.193467691971393e-05, "loss": 24.4301, "step": 28140 }, { "epoch": 0.4438103045973387, "grad_norm": 94.90715101970825, "learning_rate": 1.1929768109260211e-05, "loss": 24.9413, "step": 28150 }, { "epoch": 0.4439679636753484, "grad_norm": 100.78354544578734, "learning_rate": 1.192485881576727e-05, "loss": 24.9407, "step": 28160 }, { "epoch": 0.44412562275335815, "grad_norm": 96.41589030461782, "learning_rate": 1.1919949040463951e-05, "loss": 24.3747, "step": 28170 }, { "epoch": 0.44428328183136784, "grad_norm": 90.07717329903826, "learning_rate": 1.1915038784579212e-05, "loss": 24.2884, "step": 28180 }, { "epoch": 0.4444409409093776, "grad_norm": 95.2285895177711, "learning_rate": 1.1910128049342142e-05, "loss": 24.4488, "step": 28190 }, { "epoch": 0.44459859998738727, "grad_norm": 101.73982800895762, "learning_rate": 1.1905216835981943e-05, "loss": 24.578, "step": 28200 }, { "epoch": 0.444756259065397, "grad_norm": 95.11945940177768, "learning_rate": 1.1900305145727936e-05, "loss": 24.3311, "step": 28210 }, { "epoch": 0.4449139181434067, "grad_norm": 91.72084049228187, "learning_rate": 1.1895392979809568e-05, "loss": 24.5432, "step": 28220 }, { "epoch": 0.44507157722141644, "grad_norm": 88.81385582109944, "learning_rate": 1.1890480339456393e-05, "loss": 24.6584, "step": 28230 }, { "epoch": 0.4452292362994261, "grad_norm": 91.75450697262943, "learning_rate": 1.1885567225898098e-05, "loss": 24.8279, "step": 28240 }, { "epoch": 0.4453868953774358, "grad_norm": 90.64314255141497, "learning_rate": 1.1880653640364482e-05, "loss": 27.1975, "step": 28250 }, { "epoch": 0.44554455445544555, "grad_norm": 88.88605678408305, "learning_rate": 1.1875739584085462e-05, "loss": 24.333, "step": 28260 }, { "epoch": 0.44570221353345524, "grad_norm": 94.75482604937616, "learning_rate": 1.1870825058291066e-05, "loss": 24.6435, "step": 28270 }, { "epoch": 0.445859872611465, "grad_norm": 92.4564341772302, "learning_rate": 1.1865910064211452e-05, "loss": 23.9502, "step": 28280 }, { "epoch": 0.44601753168947467, "grad_norm": 92.76508180357511, "learning_rate": 1.186099460307689e-05, "loss": 24.9787, "step": 28290 }, { "epoch": 0.4461751907674844, "grad_norm": 84.54382041980554, "learning_rate": 1.1856078676117763e-05, "loss": 24.7374, "step": 28300 }, { "epoch": 0.4463328498454941, "grad_norm": 94.80210859375093, "learning_rate": 1.185116228456458e-05, "loss": 24.4629, "step": 28310 }, { "epoch": 0.44649050892350384, "grad_norm": 90.33711404627213, "learning_rate": 1.1846245429647954e-05, "loss": 25.6747, "step": 28320 }, { "epoch": 0.4466481680015135, "grad_norm": 87.55041353872137, "learning_rate": 1.184132811259862e-05, "loss": 23.9589, "step": 28330 }, { "epoch": 0.44680582707952327, "grad_norm": 91.89548569974107, "learning_rate": 1.1836410334647435e-05, "loss": 24.8877, "step": 28340 }, { "epoch": 0.44696348615753295, "grad_norm": 85.09819208028918, "learning_rate": 1.183149209702536e-05, "loss": 25.375, "step": 28350 }, { "epoch": 0.44712114523554264, "grad_norm": 93.6449509529666, "learning_rate": 1.1826573400963482e-05, "loss": 24.6167, "step": 28360 }, { "epoch": 0.4472788043135524, "grad_norm": 88.49762117969296, "learning_rate": 1.1821654247692993e-05, "loss": 24.5525, "step": 28370 }, { "epoch": 0.44743646339156207, "grad_norm": 86.86290343565933, "learning_rate": 1.18167346384452e-05, "loss": 25.0486, "step": 28380 }, { "epoch": 0.4475941224695718, "grad_norm": 84.35512427657912, "learning_rate": 1.1811814574451537e-05, "loss": 24.2083, "step": 28390 }, { "epoch": 0.4477517815475815, "grad_norm": 94.59530836655307, "learning_rate": 1.1806894056943539e-05, "loss": 24.7959, "step": 28400 }, { "epoch": 0.44790944062559124, "grad_norm": 91.48458411336253, "learning_rate": 1.1801973087152854e-05, "loss": 25.0671, "step": 28410 }, { "epoch": 0.4480670997036009, "grad_norm": 92.2896032184164, "learning_rate": 1.1797051666311248e-05, "loss": 24.2916, "step": 28420 }, { "epoch": 0.44822475878161067, "grad_norm": 88.01862279900173, "learning_rate": 1.1792129795650602e-05, "loss": 25.1918, "step": 28430 }, { "epoch": 0.44838241785962035, "grad_norm": 91.49983783964639, "learning_rate": 1.1787207476402909e-05, "loss": 24.2397, "step": 28440 }, { "epoch": 0.4485400769376301, "grad_norm": 88.88612790306824, "learning_rate": 1.1782284709800268e-05, "loss": 24.6771, "step": 28450 }, { "epoch": 0.4486977360156398, "grad_norm": 88.10992909904577, "learning_rate": 1.1777361497074895e-05, "loss": 23.8559, "step": 28460 }, { "epoch": 0.44885539509364947, "grad_norm": 90.74302450875372, "learning_rate": 1.1772437839459116e-05, "loss": 24.4582, "step": 28470 }, { "epoch": 0.4490130541716592, "grad_norm": 89.63703163501307, "learning_rate": 1.176751373818537e-05, "loss": 24.4319, "step": 28480 }, { "epoch": 0.4491707132496689, "grad_norm": 85.54575264226513, "learning_rate": 1.1762589194486206e-05, "loss": 25.3164, "step": 28490 }, { "epoch": 0.44932837232767864, "grad_norm": 94.13715261667828, "learning_rate": 1.1757664209594289e-05, "loss": 24.931, "step": 28500 }, { "epoch": 0.4494860314056883, "grad_norm": 84.56360160898872, "learning_rate": 1.175273878474238e-05, "loss": 24.2795, "step": 28510 }, { "epoch": 0.44964369048369807, "grad_norm": 82.22355456114691, "learning_rate": 1.1747812921163366e-05, "loss": 24.4741, "step": 28520 }, { "epoch": 0.44980134956170775, "grad_norm": 88.8364484238708, "learning_rate": 1.1742886620090234e-05, "loss": 24.6845, "step": 28530 }, { "epoch": 0.4499590086397175, "grad_norm": 89.02199406636541, "learning_rate": 1.1737959882756087e-05, "loss": 25.0799, "step": 28540 }, { "epoch": 0.4501166677177272, "grad_norm": 87.97748138986098, "learning_rate": 1.1733032710394129e-05, "loss": 24.3462, "step": 28550 }, { "epoch": 0.4502743267957369, "grad_norm": 96.46766143897601, "learning_rate": 1.1728105104237683e-05, "loss": 25.2724, "step": 28560 }, { "epoch": 0.4504319858737466, "grad_norm": 89.17996550496885, "learning_rate": 1.1723177065520176e-05, "loss": 25.0871, "step": 28570 }, { "epoch": 0.4505896449517563, "grad_norm": 88.71384497746763, "learning_rate": 1.1718248595475133e-05, "loss": 23.9568, "step": 28580 }, { "epoch": 0.45074730402976604, "grad_norm": 88.73375705390774, "learning_rate": 1.171331969533621e-05, "loss": 24.5554, "step": 28590 }, { "epoch": 0.4509049631077757, "grad_norm": 94.97840715338879, "learning_rate": 1.1708390366337147e-05, "loss": 23.8079, "step": 28600 }, { "epoch": 0.45106262218578547, "grad_norm": 97.1027002046562, "learning_rate": 1.170346060971181e-05, "loss": 24.8987, "step": 28610 }, { "epoch": 0.45122028126379515, "grad_norm": 172.90539538206735, "learning_rate": 1.1698530426694157e-05, "loss": 25.0813, "step": 28620 }, { "epoch": 0.4513779403418049, "grad_norm": 90.59659392459055, "learning_rate": 1.1693599818518259e-05, "loss": 24.1877, "step": 28630 }, { "epoch": 0.4515355994198146, "grad_norm": 100.84617701992377, "learning_rate": 1.1688668786418302e-05, "loss": 24.4288, "step": 28640 }, { "epoch": 0.4516932584978243, "grad_norm": 96.56192739653274, "learning_rate": 1.1683737331628563e-05, "loss": 24.1354, "step": 28650 }, { "epoch": 0.451850917575834, "grad_norm": 93.96208753327831, "learning_rate": 1.1678805455383438e-05, "loss": 24.4572, "step": 28660 }, { "epoch": 0.45200857665384375, "grad_norm": 91.480576085026, "learning_rate": 1.1673873158917414e-05, "loss": 23.8874, "step": 28670 }, { "epoch": 0.45216623573185344, "grad_norm": 91.99518864669777, "learning_rate": 1.1668940443465096e-05, "loss": 24.1667, "step": 28680 }, { "epoch": 0.4523238948098631, "grad_norm": 88.9171697118304, "learning_rate": 1.1664007310261187e-05, "loss": 24.6745, "step": 28690 }, { "epoch": 0.45248155388787287, "grad_norm": 92.84440821354514, "learning_rate": 1.1659073760540503e-05, "loss": 24.3472, "step": 28700 }, { "epoch": 0.45263921296588255, "grad_norm": 99.62143414978691, "learning_rate": 1.1654139795537951e-05, "loss": 23.9341, "step": 28710 }, { "epoch": 0.4527968720438923, "grad_norm": 85.95232945838573, "learning_rate": 1.1649205416488554e-05, "loss": 24.7133, "step": 28720 }, { "epoch": 0.452954531121902, "grad_norm": 89.13604770675771, "learning_rate": 1.164427062462743e-05, "loss": 23.6811, "step": 28730 }, { "epoch": 0.4531121901999117, "grad_norm": 91.4822753593346, "learning_rate": 1.1639335421189806e-05, "loss": 24.2833, "step": 28740 }, { "epoch": 0.4532698492779214, "grad_norm": 91.57512217654919, "learning_rate": 1.1634399807411011e-05, "loss": 24.0862, "step": 28750 }, { "epoch": 0.45342750835593115, "grad_norm": 88.1007474541491, "learning_rate": 1.1629463784526478e-05, "loss": 24.3086, "step": 28760 }, { "epoch": 0.45358516743394084, "grad_norm": 100.38136367926803, "learning_rate": 1.1624527353771736e-05, "loss": 25.3987, "step": 28770 }, { "epoch": 0.4537428265119506, "grad_norm": 94.23424744888327, "learning_rate": 1.1619590516382417e-05, "loss": 24.1004, "step": 28780 }, { "epoch": 0.45390048558996027, "grad_norm": 100.16552366315823, "learning_rate": 1.1614653273594267e-05, "loss": 25.4686, "step": 28790 }, { "epoch": 0.45405814466796995, "grad_norm": 85.77143715025655, "learning_rate": 1.160971562664312e-05, "loss": 24.6494, "step": 28800 }, { "epoch": 0.4542158037459797, "grad_norm": 93.44937826343072, "learning_rate": 1.1604777576764917e-05, "loss": 24.8265, "step": 28810 }, { "epoch": 0.4543734628239894, "grad_norm": 90.26043925540321, "learning_rate": 1.1599839125195698e-05, "loss": 24.0024, "step": 28820 }, { "epoch": 0.4545311219019991, "grad_norm": 86.12733593528678, "learning_rate": 1.1594900273171598e-05, "loss": 23.8567, "step": 28830 }, { "epoch": 0.4546887809800088, "grad_norm": 88.63883795621865, "learning_rate": 1.158996102192887e-05, "loss": 23.7468, "step": 28840 }, { "epoch": 0.45484644005801855, "grad_norm": 86.68850558223689, "learning_rate": 1.1585021372703849e-05, "loss": 24.4773, "step": 28850 }, { "epoch": 0.45500409913602824, "grad_norm": 99.07557944071914, "learning_rate": 1.1580081326732977e-05, "loss": 23.9072, "step": 28860 }, { "epoch": 0.455161758214038, "grad_norm": 92.9716162995403, "learning_rate": 1.1575140885252793e-05, "loss": 24.6326, "step": 28870 }, { "epoch": 0.45531941729204767, "grad_norm": 93.50925925597754, "learning_rate": 1.1570200049499936e-05, "loss": 24.3141, "step": 28880 }, { "epoch": 0.4554770763700574, "grad_norm": 85.24573983066901, "learning_rate": 1.1565258820711147e-05, "loss": 23.8518, "step": 28890 }, { "epoch": 0.4556347354480671, "grad_norm": 92.86109848352437, "learning_rate": 1.1560317200123258e-05, "loss": 24.4205, "step": 28900 }, { "epoch": 0.45579239452607684, "grad_norm": 94.17507904954698, "learning_rate": 1.1555375188973208e-05, "loss": 24.7393, "step": 28910 }, { "epoch": 0.4559500536040865, "grad_norm": 91.3830197535068, "learning_rate": 1.1550432788498027e-05, "loss": 24.2758, "step": 28920 }, { "epoch": 0.4561077126820962, "grad_norm": 91.91604780109476, "learning_rate": 1.1545489999934845e-05, "loss": 24.6986, "step": 28930 }, { "epoch": 0.45626537176010595, "grad_norm": 86.64924294647638, "learning_rate": 1.1540546824520888e-05, "loss": 24.2064, "step": 28940 }, { "epoch": 0.45642303083811564, "grad_norm": 89.39568883315856, "learning_rate": 1.153560326349348e-05, "loss": 24.4362, "step": 28950 }, { "epoch": 0.4565806899161254, "grad_norm": 88.83179545856264, "learning_rate": 1.1530659318090041e-05, "loss": 23.9222, "step": 28960 }, { "epoch": 0.45673834899413507, "grad_norm": 89.04369074456282, "learning_rate": 1.1525714989548088e-05, "loss": 24.0157, "step": 28970 }, { "epoch": 0.4568960080721448, "grad_norm": 88.07608910810646, "learning_rate": 1.152077027910523e-05, "loss": 24.6644, "step": 28980 }, { "epoch": 0.4570536671501545, "grad_norm": 84.13981145993526, "learning_rate": 1.1515825187999179e-05, "loss": 24.0979, "step": 28990 }, { "epoch": 0.45721132622816424, "grad_norm": 83.18460220384578, "learning_rate": 1.1510879717467738e-05, "loss": 23.6066, "step": 29000 }, { "epoch": 0.4573689853061739, "grad_norm": 90.48566229073367, "learning_rate": 1.1505933868748802e-05, "loss": 24.4869, "step": 29010 }, { "epoch": 0.45752664438418367, "grad_norm": 84.96582383542628, "learning_rate": 1.1500987643080366e-05, "loss": 23.4132, "step": 29020 }, { "epoch": 0.45768430346219335, "grad_norm": 95.83071207377795, "learning_rate": 1.1496041041700515e-05, "loss": 23.666, "step": 29030 }, { "epoch": 0.45784196254020304, "grad_norm": 135.34262416382833, "learning_rate": 1.1491094065847433e-05, "loss": 24.5523, "step": 29040 }, { "epoch": 0.4579996216182128, "grad_norm": 84.61068220286901, "learning_rate": 1.1486146716759393e-05, "loss": 24.7525, "step": 29050 }, { "epoch": 0.45815728069622247, "grad_norm": 88.73709192074611, "learning_rate": 1.1481198995674767e-05, "loss": 23.7233, "step": 29060 }, { "epoch": 0.4583149397742322, "grad_norm": 86.20618525673446, "learning_rate": 1.147625090383201e-05, "loss": 23.933, "step": 29070 }, { "epoch": 0.4584725988522419, "grad_norm": 90.15034364239943, "learning_rate": 1.1471302442469678e-05, "loss": 24.1662, "step": 29080 }, { "epoch": 0.45863025793025164, "grad_norm": 90.65158293831152, "learning_rate": 1.1466353612826424e-05, "loss": 23.8595, "step": 29090 }, { "epoch": 0.4587879170082613, "grad_norm": 94.33971434077984, "learning_rate": 1.1461404416140978e-05, "loss": 23.4528, "step": 29100 }, { "epoch": 0.45894557608627107, "grad_norm": 86.57903058661684, "learning_rate": 1.1456454853652176e-05, "loss": 24.1686, "step": 29110 }, { "epoch": 0.45910323516428075, "grad_norm": 92.38624218678335, "learning_rate": 1.145150492659894e-05, "loss": 24.6736, "step": 29120 }, { "epoch": 0.4592608942422905, "grad_norm": 91.0231559693203, "learning_rate": 1.1446554636220281e-05, "loss": 24.2962, "step": 29130 }, { "epoch": 0.4594185533203002, "grad_norm": 81.59238305029253, "learning_rate": 1.1441603983755307e-05, "loss": 24.2335, "step": 29140 }, { "epoch": 0.45957621239830987, "grad_norm": 83.26465696204207, "learning_rate": 1.1436652970443212e-05, "loss": 24.5147, "step": 29150 }, { "epoch": 0.4597338714763196, "grad_norm": 89.71341368774483, "learning_rate": 1.1431701597523279e-05, "loss": 25.3197, "step": 29160 }, { "epoch": 0.4598915305543293, "grad_norm": 85.65900439033854, "learning_rate": 1.1426749866234887e-05, "loss": 24.3254, "step": 29170 }, { "epoch": 0.46004918963233904, "grad_norm": 84.3942809378085, "learning_rate": 1.14217977778175e-05, "loss": 24.2934, "step": 29180 }, { "epoch": 0.4602068487103487, "grad_norm": 93.93412543486635, "learning_rate": 1.1416845333510672e-05, "loss": 24.3671, "step": 29190 }, { "epoch": 0.46036450778835847, "grad_norm": 95.87689490604707, "learning_rate": 1.1411892534554046e-05, "loss": 24.1874, "step": 29200 }, { "epoch": 0.46052216686636815, "grad_norm": 92.26587866538597, "learning_rate": 1.1406939382187357e-05, "loss": 23.74, "step": 29210 }, { "epoch": 0.4606798259443779, "grad_norm": 91.90920602610458, "learning_rate": 1.1401985877650423e-05, "loss": 24.5924, "step": 29220 }, { "epoch": 0.4608374850223876, "grad_norm": 89.31248402791967, "learning_rate": 1.1397032022183153e-05, "loss": 23.6591, "step": 29230 }, { "epoch": 0.4609951441003973, "grad_norm": 85.32910248442388, "learning_rate": 1.1392077817025546e-05, "loss": 24.6095, "step": 29240 }, { "epoch": 0.461152803178407, "grad_norm": 81.37962548650806, "learning_rate": 1.1387123263417688e-05, "loss": 23.6585, "step": 29250 }, { "epoch": 0.4613104622564167, "grad_norm": 86.7396444108399, "learning_rate": 1.1382168362599748e-05, "loss": 23.6031, "step": 29260 }, { "epoch": 0.46146812133442644, "grad_norm": 83.61426494323563, "learning_rate": 1.1377213115811985e-05, "loss": 24.0597, "step": 29270 }, { "epoch": 0.4616257804124361, "grad_norm": 85.2158753533911, "learning_rate": 1.1372257524294741e-05, "loss": 24.6007, "step": 29280 }, { "epoch": 0.46178343949044587, "grad_norm": 92.57006352165675, "learning_rate": 1.1367301589288453e-05, "loss": 24.0912, "step": 29290 }, { "epoch": 0.46194109856845555, "grad_norm": 83.54033203847372, "learning_rate": 1.1362345312033635e-05, "loss": 24.4062, "step": 29300 }, { "epoch": 0.4620987576464653, "grad_norm": 86.05879796716471, "learning_rate": 1.1357388693770895e-05, "loss": 23.718, "step": 29310 }, { "epoch": 0.462256416724475, "grad_norm": 87.9225573673277, "learning_rate": 1.1352431735740917e-05, "loss": 23.9693, "step": 29320 }, { "epoch": 0.4624140758024847, "grad_norm": 89.06968613162783, "learning_rate": 1.1347474439184472e-05, "loss": 24.405, "step": 29330 }, { "epoch": 0.4625717348804944, "grad_norm": 94.37873158158737, "learning_rate": 1.1342516805342423e-05, "loss": 24.0203, "step": 29340 }, { "epoch": 0.46272939395850415, "grad_norm": 90.0250125884993, "learning_rate": 1.133755883545571e-05, "loss": 24.6464, "step": 29350 }, { "epoch": 0.46288705303651384, "grad_norm": 96.68580413945791, "learning_rate": 1.133260053076536e-05, "loss": 24.8743, "step": 29360 }, { "epoch": 0.4630447121145235, "grad_norm": 86.93066081567271, "learning_rate": 1.132764189251249e-05, "loss": 23.8677, "step": 29370 }, { "epoch": 0.46320237119253327, "grad_norm": 94.31705472118887, "learning_rate": 1.132268292193828e-05, "loss": 24.1563, "step": 29380 }, { "epoch": 0.46336003027054296, "grad_norm": 90.23891949819043, "learning_rate": 1.1317723620284021e-05, "loss": 23.9726, "step": 29390 }, { "epoch": 0.4635176893485527, "grad_norm": 89.6048368121063, "learning_rate": 1.1312763988791067e-05, "loss": 24.4389, "step": 29400 }, { "epoch": 0.4636753484265624, "grad_norm": 85.14597394514517, "learning_rate": 1.1307804028700862e-05, "loss": 24.4226, "step": 29410 }, { "epoch": 0.4638330075045721, "grad_norm": 90.39976322455405, "learning_rate": 1.1302843741254931e-05, "loss": 24.2598, "step": 29420 }, { "epoch": 0.4639906665825818, "grad_norm": 85.97785414013077, "learning_rate": 1.1297883127694878e-05, "loss": 23.4542, "step": 29430 }, { "epoch": 0.46414832566059155, "grad_norm": 88.41217882757401, "learning_rate": 1.1292922189262397e-05, "loss": 23.6621, "step": 29440 }, { "epoch": 0.46430598473860124, "grad_norm": 92.46002478597875, "learning_rate": 1.1287960927199256e-05, "loss": 23.8682, "step": 29450 }, { "epoch": 0.464463643816611, "grad_norm": 92.78885250201134, "learning_rate": 1.1282999342747302e-05, "loss": 24.2924, "step": 29460 }, { "epoch": 0.46462130289462067, "grad_norm": 85.31328466891654, "learning_rate": 1.1278037437148472e-05, "loss": 24.3651, "step": 29470 }, { "epoch": 0.4647789619726304, "grad_norm": 87.21630393569089, "learning_rate": 1.127307521164477e-05, "loss": 23.7059, "step": 29480 }, { "epoch": 0.4649366210506401, "grad_norm": 86.58714008999672, "learning_rate": 1.1268112667478298e-05, "loss": 23.335, "step": 29490 }, { "epoch": 0.4650942801286498, "grad_norm": 87.04205806611994, "learning_rate": 1.126314980589122e-05, "loss": 24.5323, "step": 29500 }, { "epoch": 0.4652519392066595, "grad_norm": 88.71631394948437, "learning_rate": 1.1258186628125793e-05, "loss": 24.4618, "step": 29510 }, { "epoch": 0.4654095982846692, "grad_norm": 86.01607126116808, "learning_rate": 1.1253223135424343e-05, "loss": 23.5232, "step": 29520 }, { "epoch": 0.46556725736267895, "grad_norm": 91.98242518680976, "learning_rate": 1.1248259329029279e-05, "loss": 23.6635, "step": 29530 }, { "epoch": 0.46572491644068864, "grad_norm": 100.992136431381, "learning_rate": 1.1243295210183094e-05, "loss": 23.9678, "step": 29540 }, { "epoch": 0.4658825755186984, "grad_norm": 89.54735094641873, "learning_rate": 1.1238330780128348e-05, "loss": 24.0463, "step": 29550 }, { "epoch": 0.46604023459670807, "grad_norm": 83.66541882929099, "learning_rate": 1.123336604010769e-05, "loss": 23.7977, "step": 29560 }, { "epoch": 0.4661978936747178, "grad_norm": 94.63347879391125, "learning_rate": 1.1228400991363837e-05, "loss": 23.8269, "step": 29570 }, { "epoch": 0.4663555527527275, "grad_norm": 100.45580795930414, "learning_rate": 1.122343563513959e-05, "loss": 23.9836, "step": 29580 }, { "epoch": 0.46651321183073724, "grad_norm": 93.45960298404447, "learning_rate": 1.1218469972677823e-05, "loss": 24.8268, "step": 29590 }, { "epoch": 0.4666708709087469, "grad_norm": 86.4612222320948, "learning_rate": 1.121350400522149e-05, "loss": 24.0556, "step": 29600 }, { "epoch": 0.4668285299867566, "grad_norm": 90.07929485966622, "learning_rate": 1.120853773401362e-05, "loss": 23.9928, "step": 29610 }, { "epoch": 0.46698618906476635, "grad_norm": 94.52606414853089, "learning_rate": 1.1203571160297316e-05, "loss": 24.0855, "step": 29620 }, { "epoch": 0.46714384814277604, "grad_norm": 91.80282068364401, "learning_rate": 1.1198604285315754e-05, "loss": 24.2115, "step": 29630 }, { "epoch": 0.4673015072207858, "grad_norm": 79.4095087001645, "learning_rate": 1.1193637110312201e-05, "loss": 23.9595, "step": 29640 }, { "epoch": 0.46745916629879547, "grad_norm": 96.84119453896567, "learning_rate": 1.1188669636529981e-05, "loss": 24.7385, "step": 29650 }, { "epoch": 0.4676168253768052, "grad_norm": 90.10645091084784, "learning_rate": 1.1183701865212499e-05, "loss": 24.2462, "step": 29660 }, { "epoch": 0.4677744844548149, "grad_norm": 89.44653018354533, "learning_rate": 1.117873379760324e-05, "loss": 24.0784, "step": 29670 }, { "epoch": 0.46793214353282464, "grad_norm": 85.74517991661361, "learning_rate": 1.1173765434945749e-05, "loss": 24.2644, "step": 29680 }, { "epoch": 0.4680898026108343, "grad_norm": 85.37337082139608, "learning_rate": 1.1168796778483666e-05, "loss": 23.393, "step": 29690 }, { "epoch": 0.46824746168884407, "grad_norm": 88.05064664551253, "learning_rate": 1.1163827829460685e-05, "loss": 24.1229, "step": 29700 }, { "epoch": 0.46840512076685376, "grad_norm": 91.61286193169136, "learning_rate": 1.1158858589120582e-05, "loss": 23.914, "step": 29710 }, { "epoch": 0.46856277984486344, "grad_norm": 85.8105995997837, "learning_rate": 1.115388905870721e-05, "loss": 23.6692, "step": 29720 }, { "epoch": 0.4687204389228732, "grad_norm": 86.59780596833724, "learning_rate": 1.1148919239464482e-05, "loss": 23.9777, "step": 29730 }, { "epoch": 0.46887809800088287, "grad_norm": 90.18981229341993, "learning_rate": 1.1143949132636394e-05, "loss": 23.0203, "step": 29740 }, { "epoch": 0.4690357570788926, "grad_norm": 94.10854603544284, "learning_rate": 1.1138978739467013e-05, "loss": 23.6697, "step": 29750 }, { "epoch": 0.4691934161569023, "grad_norm": 91.47885973099999, "learning_rate": 1.1134008061200469e-05, "loss": 24.319, "step": 29760 }, { "epoch": 0.46935107523491204, "grad_norm": 426.54027959325794, "learning_rate": 1.1129037099080979e-05, "loss": 25.4445, "step": 29770 }, { "epoch": 0.4695087343129217, "grad_norm": 89.31230516042582, "learning_rate": 1.1124065854352813e-05, "loss": 24.1546, "step": 29780 }, { "epoch": 0.46966639339093147, "grad_norm": 97.35199522911871, "learning_rate": 1.1119094328260328e-05, "loss": 23.8639, "step": 29790 }, { "epoch": 0.46982405246894116, "grad_norm": 92.05969772379828, "learning_rate": 1.1114122522047938e-05, "loss": 23.8185, "step": 29800 }, { "epoch": 0.4699817115469509, "grad_norm": 91.07795968110412, "learning_rate": 1.1109150436960139e-05, "loss": 24.2627, "step": 29810 }, { "epoch": 0.4701393706249606, "grad_norm": 93.5734618515218, "learning_rate": 1.1104178074241485e-05, "loss": 23.7602, "step": 29820 }, { "epoch": 0.47029702970297027, "grad_norm": 93.88507765827825, "learning_rate": 1.109920543513661e-05, "loss": 24.1524, "step": 29830 }, { "epoch": 0.47045468878098, "grad_norm": 85.51725920481304, "learning_rate": 1.109423252089021e-05, "loss": 23.8527, "step": 29840 }, { "epoch": 0.4706123478589897, "grad_norm": 89.99740882007791, "learning_rate": 1.1089259332747056e-05, "loss": 24.0932, "step": 29850 }, { "epoch": 0.47077000693699944, "grad_norm": 84.47052249413696, "learning_rate": 1.1084285871951978e-05, "loss": 23.5463, "step": 29860 }, { "epoch": 0.4709276660150091, "grad_norm": 85.12692039072084, "learning_rate": 1.1079312139749888e-05, "loss": 23.6583, "step": 29870 }, { "epoch": 0.47108532509301887, "grad_norm": 95.81671014648083, "learning_rate": 1.1074338137385748e-05, "loss": 24.0231, "step": 29880 }, { "epoch": 0.47124298417102856, "grad_norm": 93.46846984013084, "learning_rate": 1.1069363866104607e-05, "loss": 24.6641, "step": 29890 }, { "epoch": 0.4714006432490383, "grad_norm": 92.92843779952321, "learning_rate": 1.106438932715157e-05, "loss": 23.9363, "step": 29900 }, { "epoch": 0.471558302327048, "grad_norm": 102.68499609840167, "learning_rate": 1.1059414521771809e-05, "loss": 24.1249, "step": 29910 }, { "epoch": 0.4717159614050577, "grad_norm": 89.59430493259727, "learning_rate": 1.1054439451210569e-05, "loss": 24.571, "step": 29920 }, { "epoch": 0.4718736204830674, "grad_norm": 86.97385847086406, "learning_rate": 1.1049464116713149e-05, "loss": 23.8804, "step": 29930 }, { "epoch": 0.4720312795610771, "grad_norm": 86.46447502986113, "learning_rate": 1.104448851952493e-05, "loss": 23.9011, "step": 29940 }, { "epoch": 0.47218893863908684, "grad_norm": 89.50475690015448, "learning_rate": 1.103951266089135e-05, "loss": 23.5855, "step": 29950 }, { "epoch": 0.47234659771709653, "grad_norm": 85.90420651465696, "learning_rate": 1.1034536542057912e-05, "loss": 23.9374, "step": 29960 }, { "epoch": 0.47250425679510627, "grad_norm": 90.91717981030538, "learning_rate": 1.1029560164270186e-05, "loss": 23.2337, "step": 29970 }, { "epoch": 0.47266191587311596, "grad_norm": 93.04108915999721, "learning_rate": 1.1024583528773805e-05, "loss": 23.8652, "step": 29980 }, { "epoch": 0.4728195749511257, "grad_norm": 83.99339264110095, "learning_rate": 1.101960663681447e-05, "loss": 23.8736, "step": 29990 }, { "epoch": 0.4729772340291354, "grad_norm": 91.20375864344878, "learning_rate": 1.1014629489637945e-05, "loss": 23.7748, "step": 30000 }, { "epoch": 0.4731348931071451, "grad_norm": 93.31958931257802, "learning_rate": 1.1009652088490055e-05, "loss": 23.9181, "step": 30010 }, { "epoch": 0.4732925521851548, "grad_norm": 90.18958692693003, "learning_rate": 1.1004674434616692e-05, "loss": 24.163, "step": 30020 }, { "epoch": 0.47345021126316456, "grad_norm": 88.2840068882534, "learning_rate": 1.0999696529263804e-05, "loss": 23.7423, "step": 30030 }, { "epoch": 0.47360787034117424, "grad_norm": 92.92050373491145, "learning_rate": 1.0994718373677418e-05, "loss": 23.5586, "step": 30040 }, { "epoch": 0.473765529419184, "grad_norm": 92.00515197653186, "learning_rate": 1.0989739969103606e-05, "loss": 24.2073, "step": 30050 }, { "epoch": 0.47392318849719367, "grad_norm": 84.82109511321924, "learning_rate": 1.0984761316788513e-05, "loss": 23.3346, "step": 30060 }, { "epoch": 0.47408084757520336, "grad_norm": 84.25864644887402, "learning_rate": 1.0979782417978342e-05, "loss": 23.6658, "step": 30070 }, { "epoch": 0.4742385066532131, "grad_norm": 87.18544674110515, "learning_rate": 1.0974803273919356e-05, "loss": 23.6506, "step": 30080 }, { "epoch": 0.4743961657312228, "grad_norm": 100.43752313983241, "learning_rate": 1.0969823885857889e-05, "loss": 24.7281, "step": 30090 }, { "epoch": 0.4745538248092325, "grad_norm": 90.43445877855675, "learning_rate": 1.0964844255040322e-05, "loss": 23.2828, "step": 30100 }, { "epoch": 0.4747114838872422, "grad_norm": 89.67558376673882, "learning_rate": 1.0959864382713107e-05, "loss": 23.8833, "step": 30110 }, { "epoch": 0.47486914296525196, "grad_norm": 86.85764767082695, "learning_rate": 1.0954884270122753e-05, "loss": 23.9065, "step": 30120 }, { "epoch": 0.47502680204326164, "grad_norm": 86.88234349571324, "learning_rate": 1.0949903918515823e-05, "loss": 23.8512, "step": 30130 }, { "epoch": 0.4751844611212714, "grad_norm": 88.83125287313443, "learning_rate": 1.0944923329138957e-05, "loss": 23.7406, "step": 30140 }, { "epoch": 0.47534212019928107, "grad_norm": 84.78461030007311, "learning_rate": 1.0939942503238837e-05, "loss": 22.9463, "step": 30150 }, { "epoch": 0.4754997792772908, "grad_norm": 95.81107906341829, "learning_rate": 1.093496144206221e-05, "loss": 23.6462, "step": 30160 }, { "epoch": 0.4756574383553005, "grad_norm": 85.72035334828392, "learning_rate": 1.0929980146855888e-05, "loss": 22.8442, "step": 30170 }, { "epoch": 0.4758150974333102, "grad_norm": 98.62982284804707, "learning_rate": 1.092499861886673e-05, "loss": 23.8507, "step": 30180 }, { "epoch": 0.4759727565113199, "grad_norm": 84.24365478584349, "learning_rate": 1.0920016859341665e-05, "loss": 23.4713, "step": 30190 }, { "epoch": 0.4761304155893296, "grad_norm": 90.796376814513, "learning_rate": 1.091503486952767e-05, "loss": 23.8187, "step": 30200 }, { "epoch": 0.47628807466733936, "grad_norm": 89.3734229531229, "learning_rate": 1.091005265067179e-05, "loss": 23.6766, "step": 30210 }, { "epoch": 0.47644573374534904, "grad_norm": 85.5420960131149, "learning_rate": 1.0905070204021117e-05, "loss": 23.1846, "step": 30220 }, { "epoch": 0.4766033928233588, "grad_norm": 88.84171232598939, "learning_rate": 1.0900087530822806e-05, "loss": 23.4553, "step": 30230 }, { "epoch": 0.47676105190136847, "grad_norm": 91.8541805617093, "learning_rate": 1.0895104632324062e-05, "loss": 23.4908, "step": 30240 }, { "epoch": 0.4769187109793782, "grad_norm": 86.6085118493213, "learning_rate": 1.0890121509772165e-05, "loss": 23.2713, "step": 30250 }, { "epoch": 0.4770763700573879, "grad_norm": 94.77660583476049, "learning_rate": 1.0885138164414425e-05, "loss": 23.4544, "step": 30260 }, { "epoch": 0.47723402913539764, "grad_norm": 86.6536446740295, "learning_rate": 1.0880154597498227e-05, "loss": 23.4171, "step": 30270 }, { "epoch": 0.47739168821340733, "grad_norm": 82.73888375089693, "learning_rate": 1.0875170810271e-05, "loss": 23.7769, "step": 30280 }, { "epoch": 0.477549347291417, "grad_norm": 90.06898173988515, "learning_rate": 1.0870186803980236e-05, "loss": 23.7694, "step": 30290 }, { "epoch": 0.47770700636942676, "grad_norm": 88.40269042446666, "learning_rate": 1.086520257987348e-05, "loss": 23.6943, "step": 30300 }, { "epoch": 0.47786466544743644, "grad_norm": 92.10637468534868, "learning_rate": 1.0860218139198329e-05, "loss": 23.5017, "step": 30310 }, { "epoch": 0.4780223245254462, "grad_norm": 92.21928676438394, "learning_rate": 1.0855233483202436e-05, "loss": 23.4234, "step": 30320 }, { "epoch": 0.47817998360345587, "grad_norm": 87.84809221323769, "learning_rate": 1.0850248613133508e-05, "loss": 23.3149, "step": 30330 }, { "epoch": 0.4783376426814656, "grad_norm": 91.76843854788247, "learning_rate": 1.0845263530239303e-05, "loss": 23.6651, "step": 30340 }, { "epoch": 0.4784953017594753, "grad_norm": 82.62585238615077, "learning_rate": 1.0840278235767638e-05, "loss": 23.6437, "step": 30350 }, { "epoch": 0.47865296083748504, "grad_norm": 84.42069035569114, "learning_rate": 1.0835292730966378e-05, "loss": 23.5207, "step": 30360 }, { "epoch": 0.47881061991549473, "grad_norm": 93.4945340296752, "learning_rate": 1.0830307017083441e-05, "loss": 23.8735, "step": 30370 }, { "epoch": 0.47896827899350447, "grad_norm": 93.25018396501632, "learning_rate": 1.08253210953668e-05, "loss": 23.3799, "step": 30380 }, { "epoch": 0.47912593807151416, "grad_norm": 86.4762762265664, "learning_rate": 1.0820334967064477e-05, "loss": 24.0757, "step": 30390 }, { "epoch": 0.47928359714952384, "grad_norm": 91.8713236964567, "learning_rate": 1.0815348633424547e-05, "loss": 23.5864, "step": 30400 }, { "epoch": 0.4794412562275336, "grad_norm": 89.01052801931299, "learning_rate": 1.0810362095695136e-05, "loss": 23.9551, "step": 30410 }, { "epoch": 0.47959891530554327, "grad_norm": 91.07230428117148, "learning_rate": 1.0805375355124426e-05, "loss": 24.1502, "step": 30420 }, { "epoch": 0.479756574383553, "grad_norm": 88.32224362298206, "learning_rate": 1.0800388412960637e-05, "loss": 23.7381, "step": 30430 }, { "epoch": 0.4799142334615627, "grad_norm": 93.50078603354966, "learning_rate": 1.0795401270452054e-05, "loss": 23.415, "step": 30440 }, { "epoch": 0.48007189253957244, "grad_norm": 105.5943067312731, "learning_rate": 1.0790413928847007e-05, "loss": 24.5624, "step": 30450 }, { "epoch": 0.48022955161758213, "grad_norm": 86.51094246820394, "learning_rate": 1.0785426389393871e-05, "loss": 23.3345, "step": 30460 }, { "epoch": 0.48038721069559187, "grad_norm": 96.39415544759892, "learning_rate": 1.078043865334108e-05, "loss": 23.2452, "step": 30470 }, { "epoch": 0.48054486977360156, "grad_norm": 88.53628084583279, "learning_rate": 1.07754507219371e-05, "loss": 23.4667, "step": 30480 }, { "epoch": 0.4807025288516113, "grad_norm": 83.545394841156, "learning_rate": 1.0770462596430468e-05, "loss": 23.3781, "step": 30490 }, { "epoch": 0.480860187929621, "grad_norm": 89.03842117646522, "learning_rate": 1.0765474278069759e-05, "loss": 23.2504, "step": 30500 }, { "epoch": 0.4810178470076307, "grad_norm": 89.55116615475714, "learning_rate": 1.0760485768103592e-05, "loss": 23.5364, "step": 30510 }, { "epoch": 0.4811755060856404, "grad_norm": 89.33679990954441, "learning_rate": 1.0755497067780638e-05, "loss": 23.8874, "step": 30520 }, { "epoch": 0.4813331651636501, "grad_norm": 91.54546066284672, "learning_rate": 1.0750508178349618e-05, "loss": 23.331, "step": 30530 }, { "epoch": 0.48149082424165984, "grad_norm": 90.4332829142366, "learning_rate": 1.0745519101059297e-05, "loss": 23.3065, "step": 30540 }, { "epoch": 0.48164848331966953, "grad_norm": 86.92904955543449, "learning_rate": 1.0740529837158489e-05, "loss": 23.021, "step": 30550 }, { "epoch": 0.48180614239767927, "grad_norm": 90.59888391585652, "learning_rate": 1.0735540387896055e-05, "loss": 23.6494, "step": 30560 }, { "epoch": 0.48196380147568896, "grad_norm": 86.9026090344768, "learning_rate": 1.0730550754520898e-05, "loss": 23.2126, "step": 30570 }, { "epoch": 0.4821214605536987, "grad_norm": 84.4970943565705, "learning_rate": 1.0725560938281974e-05, "loss": 23.5672, "step": 30580 }, { "epoch": 0.4822791196317084, "grad_norm": 85.67080288786572, "learning_rate": 1.0720570940428279e-05, "loss": 23.9757, "step": 30590 }, { "epoch": 0.48243677870971813, "grad_norm": 91.64015425619657, "learning_rate": 1.0715580762208859e-05, "loss": 23.6755, "step": 30600 }, { "epoch": 0.4825944377877278, "grad_norm": 87.33689374049709, "learning_rate": 1.07105904048728e-05, "loss": 23.4488, "step": 30610 }, { "epoch": 0.48275209686573756, "grad_norm": 84.48367569461921, "learning_rate": 1.0705599869669239e-05, "loss": 23.0796, "step": 30620 }, { "epoch": 0.48290975594374724, "grad_norm": 95.57437754377828, "learning_rate": 1.0700609157847353e-05, "loss": 24.1636, "step": 30630 }, { "epoch": 0.48306741502175693, "grad_norm": 91.8200834066614, "learning_rate": 1.0695618270656363e-05, "loss": 23.942, "step": 30640 }, { "epoch": 0.48322507409976667, "grad_norm": 93.59758951197696, "learning_rate": 1.0690627209345534e-05, "loss": 23.9357, "step": 30650 }, { "epoch": 0.48338273317777636, "grad_norm": 88.2510392283291, "learning_rate": 1.0685635975164179e-05, "loss": 23.7382, "step": 30660 }, { "epoch": 0.4835403922557861, "grad_norm": 94.36304872039612, "learning_rate": 1.0680644569361652e-05, "loss": 23.8219, "step": 30670 }, { "epoch": 0.4836980513337958, "grad_norm": 87.15181563947213, "learning_rate": 1.0675652993187343e-05, "loss": 23.7071, "step": 30680 }, { "epoch": 0.48385571041180553, "grad_norm": 84.55780999152543, "learning_rate": 1.0670661247890697e-05, "loss": 23.6479, "step": 30690 }, { "epoch": 0.4840133694898152, "grad_norm": 85.11330343527104, "learning_rate": 1.0665669334721194e-05, "loss": 23.4268, "step": 30700 }, { "epoch": 0.48417102856782496, "grad_norm": 88.06486168563542, "learning_rate": 1.0660677254928355e-05, "loss": 23.0163, "step": 30710 }, { "epoch": 0.48432868764583464, "grad_norm": 84.58802420354286, "learning_rate": 1.0655685009761747e-05, "loss": 24.194, "step": 30720 }, { "epoch": 0.4844863467238444, "grad_norm": 84.27916404292701, "learning_rate": 1.0650692600470973e-05, "loss": 23.064, "step": 30730 }, { "epoch": 0.48464400580185407, "grad_norm": 83.22106027654118, "learning_rate": 1.0645700028305683e-05, "loss": 23.2139, "step": 30740 }, { "epoch": 0.48480166487986376, "grad_norm": 90.16721805658362, "learning_rate": 1.0640707294515567e-05, "loss": 23.1406, "step": 30750 }, { "epoch": 0.4849593239578735, "grad_norm": 88.45371866163052, "learning_rate": 1.063571440035035e-05, "loss": 23.5798, "step": 30760 }, { "epoch": 0.4851169830358832, "grad_norm": 86.9342872087259, "learning_rate": 1.0630721347059802e-05, "loss": 23.1441, "step": 30770 }, { "epoch": 0.48527464211389293, "grad_norm": 85.69680192716817, "learning_rate": 1.062572813589373e-05, "loss": 23.6833, "step": 30780 }, { "epoch": 0.4854323011919026, "grad_norm": 88.12577143428291, "learning_rate": 1.0620734768101983e-05, "loss": 22.901, "step": 30790 }, { "epoch": 0.48558996026991236, "grad_norm": 101.67178645907454, "learning_rate": 1.061574124493445e-05, "loss": 23.8955, "step": 30800 }, { "epoch": 0.48574761934792204, "grad_norm": 95.76833385731263, "learning_rate": 1.0610747567641056e-05, "loss": 23.2617, "step": 30810 }, { "epoch": 0.4859052784259318, "grad_norm": 91.25933613025148, "learning_rate": 1.0605753737471764e-05, "loss": 23.1073, "step": 30820 }, { "epoch": 0.4860629375039415, "grad_norm": 90.5159200819666, "learning_rate": 1.0600759755676578e-05, "loss": 23.0801, "step": 30830 }, { "epoch": 0.4862205965819512, "grad_norm": 85.62043691155972, "learning_rate": 1.0595765623505539e-05, "loss": 23.6309, "step": 30840 }, { "epoch": 0.4863782556599609, "grad_norm": 92.06441262042024, "learning_rate": 1.0590771342208725e-05, "loss": 23.0059, "step": 30850 }, { "epoch": 0.4865359147379706, "grad_norm": 83.12949298195663, "learning_rate": 1.0585776913036254e-05, "loss": 23.6676, "step": 30860 }, { "epoch": 0.48669357381598033, "grad_norm": 82.2075563792421, "learning_rate": 1.0580782337238278e-05, "loss": 23.2043, "step": 30870 }, { "epoch": 0.48685123289399, "grad_norm": 87.75856656854207, "learning_rate": 1.0575787616064983e-05, "loss": 23.7809, "step": 30880 }, { "epoch": 0.48700889197199976, "grad_norm": 91.42216996654909, "learning_rate": 1.0570792750766601e-05, "loss": 23.2629, "step": 30890 }, { "epoch": 0.48716655105000944, "grad_norm": 87.7154997931125, "learning_rate": 1.0565797742593392e-05, "loss": 23.4624, "step": 30900 }, { "epoch": 0.4873242101280192, "grad_norm": 83.7641748228305, "learning_rate": 1.0560802592795653e-05, "loss": 23.7819, "step": 30910 }, { "epoch": 0.4874818692060289, "grad_norm": 86.15206172420775, "learning_rate": 1.0555807302623715e-05, "loss": 22.7112, "step": 30920 }, { "epoch": 0.4876395282840386, "grad_norm": 91.22980368801989, "learning_rate": 1.055081187332795e-05, "loss": 22.5231, "step": 30930 }, { "epoch": 0.4877971873620483, "grad_norm": 88.4137832588136, "learning_rate": 1.0545816306158758e-05, "loss": 23.5681, "step": 30940 }, { "epoch": 0.48795484644005804, "grad_norm": 85.98435937995393, "learning_rate": 1.054082060236658e-05, "loss": 23.0326, "step": 30950 }, { "epoch": 0.48811250551806773, "grad_norm": 92.17467481892518, "learning_rate": 1.0535824763201885e-05, "loss": 23.388, "step": 30960 }, { "epoch": 0.4882701645960774, "grad_norm": 87.46370547918913, "learning_rate": 1.0530828789915182e-05, "loss": 23.7458, "step": 30970 }, { "epoch": 0.48842782367408716, "grad_norm": 85.65285560570057, "learning_rate": 1.0525832683757007e-05, "loss": 23.3749, "step": 30980 }, { "epoch": 0.48858548275209684, "grad_norm": 82.34779387794396, "learning_rate": 1.0520836445977935e-05, "loss": 23.413, "step": 30990 }, { "epoch": 0.4887431418301066, "grad_norm": 89.9534862990821, "learning_rate": 1.0515840077828572e-05, "loss": 24.7365, "step": 31000 }, { "epoch": 0.4889008009081163, "grad_norm": 85.72410811614034, "learning_rate": 1.0510843580559555e-05, "loss": 22.943, "step": 31010 }, { "epoch": 0.489058459986126, "grad_norm": 88.02540169636283, "learning_rate": 1.0505846955421554e-05, "loss": 23.1334, "step": 31020 }, { "epoch": 0.4892161190641357, "grad_norm": 84.25024910724392, "learning_rate": 1.0500850203665274e-05, "loss": 23.8756, "step": 31030 }, { "epoch": 0.48937377814214544, "grad_norm": 698.295093453091, "learning_rate": 1.0495853326541446e-05, "loss": 23.9814, "step": 31040 }, { "epoch": 0.48953143722015513, "grad_norm": 89.49847254890014, "learning_rate": 1.0490856325300838e-05, "loss": 24.2502, "step": 31050 }, { "epoch": 0.48968909629816487, "grad_norm": 82.49832749220698, "learning_rate": 1.0485859201194245e-05, "loss": 23.2415, "step": 31060 }, { "epoch": 0.48984675537617456, "grad_norm": 85.13519706361134, "learning_rate": 1.0480861955472497e-05, "loss": 22.9535, "step": 31070 }, { "epoch": 0.49000441445418425, "grad_norm": 90.96582319157494, "learning_rate": 1.0475864589386449e-05, "loss": 22.5465, "step": 31080 }, { "epoch": 0.490162073532194, "grad_norm": 87.96605724429438, "learning_rate": 1.047086710418699e-05, "loss": 22.8581, "step": 31090 }, { "epoch": 0.4903197326102037, "grad_norm": 81.41436674991186, "learning_rate": 1.0465869501125038e-05, "loss": 23.2851, "step": 31100 }, { "epoch": 0.4904773916882134, "grad_norm": 87.78541254374045, "learning_rate": 1.0460871781451542e-05, "loss": 23.4183, "step": 31110 }, { "epoch": 0.4906350507662231, "grad_norm": 83.51740676272543, "learning_rate": 1.0455873946417478e-05, "loss": 23.2259, "step": 31120 }, { "epoch": 0.49079270984423284, "grad_norm": 87.0026650153042, "learning_rate": 1.0450875997273846e-05, "loss": 22.9021, "step": 31130 }, { "epoch": 0.49095036892224253, "grad_norm": 86.10617937571024, "learning_rate": 1.044587793527169e-05, "loss": 23.2689, "step": 31140 }, { "epoch": 0.4911080280002523, "grad_norm": 88.51740864428461, "learning_rate": 1.0440879761662065e-05, "loss": 22.5836, "step": 31150 }, { "epoch": 0.49126568707826196, "grad_norm": 88.87561449677219, "learning_rate": 1.0435881477696061e-05, "loss": 23.1319, "step": 31160 }, { "epoch": 0.4914233461562717, "grad_norm": 87.57317886800519, "learning_rate": 1.04308830846248e-05, "loss": 23.2691, "step": 31170 }, { "epoch": 0.4915810052342814, "grad_norm": 82.8273291941598, "learning_rate": 1.0425884583699422e-05, "loss": 23.3137, "step": 31180 }, { "epoch": 0.49173866431229113, "grad_norm": 86.6386389603263, "learning_rate": 1.0420885976171105e-05, "loss": 22.8966, "step": 31190 }, { "epoch": 0.4918963233903008, "grad_norm": 85.21351279795816, "learning_rate": 1.0415887263291043e-05, "loss": 23.0503, "step": 31200 }, { "epoch": 0.4920539824683105, "grad_norm": 89.8031623962104, "learning_rate": 1.0410888446310463e-05, "loss": 23.5099, "step": 31210 }, { "epoch": 0.49221164154632024, "grad_norm": 90.28765301551473, "learning_rate": 1.0405889526480614e-05, "loss": 22.9205, "step": 31220 }, { "epoch": 0.49236930062432993, "grad_norm": 82.48806818017655, "learning_rate": 1.0400890505052776e-05, "loss": 23.4137, "step": 31230 }, { "epoch": 0.4925269597023397, "grad_norm": 93.20158713155286, "learning_rate": 1.0395891383278252e-05, "loss": 23.6415, "step": 31240 }, { "epoch": 0.49268461878034936, "grad_norm": 87.1192149376241, "learning_rate": 1.0390892162408367e-05, "loss": 23.3924, "step": 31250 }, { "epoch": 0.4928422778583591, "grad_norm": 79.1451621812195, "learning_rate": 1.0385892843694472e-05, "loss": 23.2385, "step": 31260 }, { "epoch": 0.4929999369363688, "grad_norm": 87.1176158423193, "learning_rate": 1.0380893428387948e-05, "loss": 23.2423, "step": 31270 }, { "epoch": 0.49315759601437853, "grad_norm": 84.04998855801625, "learning_rate": 1.0375893917740194e-05, "loss": 23.6141, "step": 31280 }, { "epoch": 0.4933152550923882, "grad_norm": 86.12193096947728, "learning_rate": 1.0370894313002631e-05, "loss": 23.002, "step": 31290 }, { "epoch": 0.49347291417039796, "grad_norm": 90.28294355152518, "learning_rate": 1.0365894615426716e-05, "loss": 23.2362, "step": 31300 }, { "epoch": 0.49363057324840764, "grad_norm": 85.93042062696851, "learning_rate": 1.0360894826263914e-05, "loss": 23.1112, "step": 31310 }, { "epoch": 0.49378823232641733, "grad_norm": 89.77908298973216, "learning_rate": 1.035589494676572e-05, "loss": 23.5199, "step": 31320 }, { "epoch": 0.4939458914044271, "grad_norm": 98.12734395863276, "learning_rate": 1.0350894978183654e-05, "loss": 23.1711, "step": 31330 }, { "epoch": 0.49410355048243676, "grad_norm": 86.8089764731417, "learning_rate": 1.0345894921769253e-05, "loss": 22.9998, "step": 31340 }, { "epoch": 0.4942612095604465, "grad_norm": 88.60753100970601, "learning_rate": 1.034089477877408e-05, "loss": 23.9473, "step": 31350 }, { "epoch": 0.4944188686384562, "grad_norm": 88.79779717361855, "learning_rate": 1.0335894550449716e-05, "loss": 23.0773, "step": 31360 }, { "epoch": 0.49457652771646593, "grad_norm": 86.11049086061887, "learning_rate": 1.0330894238047768e-05, "loss": 22.9685, "step": 31370 }, { "epoch": 0.4947341867944756, "grad_norm": 88.57323537446669, "learning_rate": 1.032589384281986e-05, "loss": 22.5897, "step": 31380 }, { "epoch": 0.49489184587248536, "grad_norm": 88.4074737752977, "learning_rate": 1.0320893366017637e-05, "loss": 22.937, "step": 31390 }, { "epoch": 0.49504950495049505, "grad_norm": 85.8633128771128, "learning_rate": 1.0315892808892768e-05, "loss": 23.2143, "step": 31400 }, { "epoch": 0.4952071640285048, "grad_norm": 89.1239711814439, "learning_rate": 1.0310892172696943e-05, "loss": 23.5812, "step": 31410 }, { "epoch": 0.4953648231065145, "grad_norm": 82.7760773411894, "learning_rate": 1.030589145868186e-05, "loss": 22.5583, "step": 31420 }, { "epoch": 0.49552248218452416, "grad_norm": 93.38632476913719, "learning_rate": 1.0300890668099252e-05, "loss": 23.9025, "step": 31430 }, { "epoch": 0.4956801412625339, "grad_norm": 91.83655068101153, "learning_rate": 1.0295889802200863e-05, "loss": 22.9897, "step": 31440 }, { "epoch": 0.4958378003405436, "grad_norm": 88.37975212290456, "learning_rate": 1.0290888862238453e-05, "loss": 22.8935, "step": 31450 }, { "epoch": 0.49599545941855333, "grad_norm": 85.55496452765811, "learning_rate": 1.0285887849463808e-05, "loss": 23.0581, "step": 31460 }, { "epoch": 0.496153118496563, "grad_norm": 85.72253459175091, "learning_rate": 1.0280886765128732e-05, "loss": 22.9375, "step": 31470 }, { "epoch": 0.49631077757457276, "grad_norm": 93.1148928679509, "learning_rate": 1.0275885610485034e-05, "loss": 22.5277, "step": 31480 }, { "epoch": 0.49646843665258245, "grad_norm": 86.34699431059879, "learning_rate": 1.0270884386784561e-05, "loss": 22.6384, "step": 31490 }, { "epoch": 0.4966260957305922, "grad_norm": 85.89304362902848, "learning_rate": 1.0265883095279163e-05, "loss": 22.9964, "step": 31500 }, { "epoch": 0.4967837548086019, "grad_norm": 89.32503839336907, "learning_rate": 1.0260881737220708e-05, "loss": 23.0477, "step": 31510 }, { "epoch": 0.4969414138866116, "grad_norm": 96.06818706216185, "learning_rate": 1.0255880313861084e-05, "loss": 23.846, "step": 31520 }, { "epoch": 0.4970990729646213, "grad_norm": 86.81521100412196, "learning_rate": 1.0250878826452195e-05, "loss": 23.2559, "step": 31530 }, { "epoch": 0.497256732042631, "grad_norm": 85.95826059333952, "learning_rate": 1.0245877276245964e-05, "loss": 22.8228, "step": 31540 }, { "epoch": 0.49741439112064073, "grad_norm": 81.81124803846504, "learning_rate": 1.0240875664494324e-05, "loss": 23.0186, "step": 31550 }, { "epoch": 0.4975720501986504, "grad_norm": 85.67480782942445, "learning_rate": 1.0235873992449223e-05, "loss": 22.6403, "step": 31560 }, { "epoch": 0.49772970927666016, "grad_norm": 88.4895305534166, "learning_rate": 1.0230872261362632e-05, "loss": 23.2435, "step": 31570 }, { "epoch": 0.49788736835466985, "grad_norm": 87.56759471605137, "learning_rate": 1.0225870472486526e-05, "loss": 23.4277, "step": 31580 }, { "epoch": 0.4980450274326796, "grad_norm": 87.2879694500788, "learning_rate": 1.0220868627072904e-05, "loss": 23.5589, "step": 31590 }, { "epoch": 0.4982026865106893, "grad_norm": 84.41117053510888, "learning_rate": 1.0215866726373773e-05, "loss": 23.4882, "step": 31600 }, { "epoch": 0.498360345588699, "grad_norm": 94.1724614239011, "learning_rate": 1.0210864771641158e-05, "loss": 23.3176, "step": 31610 }, { "epoch": 0.4985180046667087, "grad_norm": 96.23890176896865, "learning_rate": 1.0205862764127096e-05, "loss": 22.9692, "step": 31620 }, { "epoch": 0.49867566374471844, "grad_norm": 85.59448780564088, "learning_rate": 1.0200860705083634e-05, "loss": 23.2196, "step": 31630 }, { "epoch": 0.49883332282272813, "grad_norm": 82.27604774240706, "learning_rate": 1.0195858595762838e-05, "loss": 22.8952, "step": 31640 }, { "epoch": 0.4989909819007378, "grad_norm": 88.09403098849974, "learning_rate": 1.0190856437416781e-05, "loss": 22.9111, "step": 31650 }, { "epoch": 0.49914864097874756, "grad_norm": 92.48732306740013, "learning_rate": 1.018585423129755e-05, "loss": 23.2426, "step": 31660 }, { "epoch": 0.49930630005675725, "grad_norm": 85.72071356536598, "learning_rate": 1.018085197865725e-05, "loss": 22.7385, "step": 31670 }, { "epoch": 0.499463959134767, "grad_norm": 89.26219365217356, "learning_rate": 1.0175849680747985e-05, "loss": 24.2035, "step": 31680 }, { "epoch": 0.4996216182127767, "grad_norm": 96.03865875247448, "learning_rate": 1.0170847338821876e-05, "loss": 22.814, "step": 31690 }, { "epoch": 0.4997792772907864, "grad_norm": 84.85973318761182, "learning_rate": 1.016584495413107e-05, "loss": 22.6044, "step": 31700 }, { "epoch": 0.4999369363687961, "grad_norm": 84.32810943275565, "learning_rate": 1.0160842527927697e-05, "loss": 23.0249, "step": 31710 }, { "epoch": 0.5000945954468058, "grad_norm": 89.63962999421757, "learning_rate": 1.0155840061463919e-05, "loss": 23.5941, "step": 31720 }, { "epoch": 0.5002522545248156, "grad_norm": 105.04301558517722, "learning_rate": 1.0150837555991895e-05, "loss": 23.1282, "step": 31730 }, { "epoch": 0.5004099136028253, "grad_norm": 91.6001589543976, "learning_rate": 1.0145835012763807e-05, "loss": 22.9081, "step": 31740 }, { "epoch": 0.500567572680835, "grad_norm": 85.32131615507889, "learning_rate": 1.0140832433031834e-05, "loss": 22.617, "step": 31750 }, { "epoch": 0.5007252317588446, "grad_norm": 86.86408290162426, "learning_rate": 1.013582981804817e-05, "loss": 23.4496, "step": 31760 }, { "epoch": 0.5008828908368543, "grad_norm": 97.19565237785659, "learning_rate": 1.0130827169065017e-05, "loss": 21.9847, "step": 31770 }, { "epoch": 0.5010405499148641, "grad_norm": 94.01174765111647, "learning_rate": 1.0125824487334583e-05, "loss": 23.3396, "step": 31780 }, { "epoch": 0.5011982089928738, "grad_norm": 88.4197477704076, "learning_rate": 1.0120821774109091e-05, "loss": 23.1452, "step": 31790 }, { "epoch": 0.5013558680708835, "grad_norm": 87.51251769443269, "learning_rate": 1.0115819030640766e-05, "loss": 22.8699, "step": 31800 }, { "epoch": 0.5015135271488932, "grad_norm": 84.37904191428096, "learning_rate": 1.0110816258181841e-05, "loss": 22.8251, "step": 31810 }, { "epoch": 0.501671186226903, "grad_norm": 85.89918860927968, "learning_rate": 1.010581345798456e-05, "loss": 22.5664, "step": 31820 }, { "epoch": 0.5018288453049127, "grad_norm": 83.57550353402416, "learning_rate": 1.0100810631301166e-05, "loss": 23.5784, "step": 31830 }, { "epoch": 0.5019865043829224, "grad_norm": 85.90070384375377, "learning_rate": 1.0095807779383917e-05, "loss": 22.7724, "step": 31840 }, { "epoch": 0.502144163460932, "grad_norm": 93.18640748715212, "learning_rate": 1.0090804903485073e-05, "loss": 23.464, "step": 31850 }, { "epoch": 0.5023018225389418, "grad_norm": 86.8338158421668, "learning_rate": 1.0085802004856905e-05, "loss": 23.0623, "step": 31860 }, { "epoch": 0.5024594816169515, "grad_norm": 86.5684135201163, "learning_rate": 1.0080799084751683e-05, "loss": 22.6568, "step": 31870 }, { "epoch": 0.5026171406949612, "grad_norm": 85.99295086246488, "learning_rate": 1.007579614442168e-05, "loss": 22.8621, "step": 31880 }, { "epoch": 0.5027747997729709, "grad_norm": 88.33005944211664, "learning_rate": 1.0070793185119192e-05, "loss": 22.1621, "step": 31890 }, { "epoch": 0.5029324588509806, "grad_norm": 95.9810356985879, "learning_rate": 1.0065790208096495e-05, "loss": 22.6802, "step": 31900 }, { "epoch": 0.5030901179289904, "grad_norm": 90.05089434546417, "learning_rate": 1.006078721460589e-05, "loss": 23.3383, "step": 31910 }, { "epoch": 0.5032477770070001, "grad_norm": 84.57833535106886, "learning_rate": 1.0055784205899666e-05, "loss": 22.2744, "step": 31920 }, { "epoch": 0.5034054360850098, "grad_norm": 83.80167395974998, "learning_rate": 1.0050781183230127e-05, "loss": 22.5422, "step": 31930 }, { "epoch": 0.5035630951630194, "grad_norm": 88.00232220180315, "learning_rate": 1.004577814784958e-05, "loss": 22.6578, "step": 31940 }, { "epoch": 0.5037207542410292, "grad_norm": 91.0777254222407, "learning_rate": 1.004077510101033e-05, "loss": 23.0687, "step": 31950 }, { "epoch": 0.5038784133190389, "grad_norm": 77.53832505245893, "learning_rate": 1.0035772043964686e-05, "loss": 22.0562, "step": 31960 }, { "epoch": 0.5040360723970486, "grad_norm": 87.562086191815, "learning_rate": 1.003076897796496e-05, "loss": 22.8297, "step": 31970 }, { "epoch": 0.5041937314750583, "grad_norm": 82.21689353335151, "learning_rate": 1.0025765904263464e-05, "loss": 22.7652, "step": 31980 }, { "epoch": 0.504351390553068, "grad_norm": 90.70976389670501, "learning_rate": 1.0020762824112522e-05, "loss": 22.8317, "step": 31990 }, { "epoch": 0.5045090496310778, "grad_norm": 97.48027679666532, "learning_rate": 1.0015759738764448e-05, "loss": 22.7365, "step": 32000 }, { "epoch": 0.5046667087090875, "grad_norm": 85.89010205806295, "learning_rate": 1.0010756649471562e-05, "loss": 22.9024, "step": 32010 }, { "epoch": 0.5048243677870972, "grad_norm": 87.11638639941523, "learning_rate": 1.0005753557486186e-05, "loss": 22.6999, "step": 32020 }, { "epoch": 0.5049820268651068, "grad_norm": 89.67213355708775, "learning_rate": 1.0000750464060638e-05, "loss": 22.9097, "step": 32030 }, { "epoch": 0.5051396859431166, "grad_norm": 85.2610586611918, "learning_rate": 9.995747370447242e-06, "loss": 22.9257, "step": 32040 }, { "epoch": 0.5052973450211263, "grad_norm": 88.09420765963695, "learning_rate": 9.990744277898325e-06, "loss": 22.4056, "step": 32050 }, { "epoch": 0.505455004099136, "grad_norm": 117.26478436447414, "learning_rate": 9.985741187666194e-06, "loss": 23.2663, "step": 32060 }, { "epoch": 0.5056126631771457, "grad_norm": 82.84747748717677, "learning_rate": 9.980738101003185e-06, "loss": 23.1882, "step": 32070 }, { "epoch": 0.5057703222551555, "grad_norm": 89.40329758542437, "learning_rate": 9.975735019161609e-06, "loss": 22.8322, "step": 32080 }, { "epoch": 0.5059279813331652, "grad_norm": 85.1370339851041, "learning_rate": 9.97073194339379e-06, "loss": 22.4474, "step": 32090 }, { "epoch": 0.5060856404111749, "grad_norm": 92.80430253905027, "learning_rate": 9.965728874952036e-06, "loss": 22.6037, "step": 32100 }, { "epoch": 0.5062432994891846, "grad_norm": 84.8757070452909, "learning_rate": 9.960725815088676e-06, "loss": 22.5283, "step": 32110 }, { "epoch": 0.5064009585671942, "grad_norm": 84.70154757398171, "learning_rate": 9.955722765056013e-06, "loss": 22.7037, "step": 32120 }, { "epoch": 0.506558617645204, "grad_norm": 90.2243230677286, "learning_rate": 9.950719726106361e-06, "loss": 22.5898, "step": 32130 }, { "epoch": 0.5067162767232137, "grad_norm": 84.88331309150412, "learning_rate": 9.945716699492028e-06, "loss": 22.4787, "step": 32140 }, { "epoch": 0.5068739358012234, "grad_norm": 81.06801410482468, "learning_rate": 9.940713686465321e-06, "loss": 23.1768, "step": 32150 }, { "epoch": 0.5070315948792331, "grad_norm": 300.29354091372505, "learning_rate": 9.93571068827854e-06, "loss": 23.1175, "step": 32160 }, { "epoch": 0.5071892539572429, "grad_norm": 88.08098838684116, "learning_rate": 9.93070770618398e-06, "loss": 23.0224, "step": 32170 }, { "epoch": 0.5073469130352526, "grad_norm": 88.32118891527232, "learning_rate": 9.925704741433936e-06, "loss": 23.1662, "step": 32180 }, { "epoch": 0.5075045721132623, "grad_norm": 84.85083904676331, "learning_rate": 9.920701795280697e-06, "loss": 22.7504, "step": 32190 }, { "epoch": 0.507662231191272, "grad_norm": 86.24547534740003, "learning_rate": 9.915698868976554e-06, "loss": 23.2307, "step": 32200 }, { "epoch": 0.5078198902692818, "grad_norm": 89.3074672518964, "learning_rate": 9.910695963773777e-06, "loss": 22.9338, "step": 32210 }, { "epoch": 0.5079775493472914, "grad_norm": 86.58193507389616, "learning_rate": 9.905693080924652e-06, "loss": 22.3386, "step": 32220 }, { "epoch": 0.5081352084253011, "grad_norm": 87.55070397813236, "learning_rate": 9.900690221681435e-06, "loss": 23.1773, "step": 32230 }, { "epoch": 0.5082928675033108, "grad_norm": 88.75654615410869, "learning_rate": 9.895687387296398e-06, "loss": 23.3205, "step": 32240 }, { "epoch": 0.5084505265813205, "grad_norm": 85.69736448192947, "learning_rate": 9.890684579021793e-06, "loss": 22.6351, "step": 32250 }, { "epoch": 0.5086081856593303, "grad_norm": 84.25747824252024, "learning_rate": 9.885681798109878e-06, "loss": 22.5981, "step": 32260 }, { "epoch": 0.50876584473734, "grad_norm": 91.43375421121651, "learning_rate": 9.880679045812886e-06, "loss": 22.7267, "step": 32270 }, { "epoch": 0.5089235038153497, "grad_norm": 89.01079298041515, "learning_rate": 9.87567632338306e-06, "loss": 22.9809, "step": 32280 }, { "epoch": 0.5090811628933594, "grad_norm": 85.32968139245001, "learning_rate": 9.870673632072629e-06, "loss": 22.8432, "step": 32290 }, { "epoch": 0.5092388219713692, "grad_norm": 86.48028142566315, "learning_rate": 9.865670973133809e-06, "loss": 22.935, "step": 32300 }, { "epoch": 0.5093964810493788, "grad_norm": 87.39539948189675, "learning_rate": 9.860668347818817e-06, "loss": 23.2902, "step": 32310 }, { "epoch": 0.5095541401273885, "grad_norm": 84.45823852077976, "learning_rate": 9.855665757379855e-06, "loss": 22.9747, "step": 32320 }, { "epoch": 0.5097117992053982, "grad_norm": 154.90737262493562, "learning_rate": 9.850663203069121e-06, "loss": 22.5133, "step": 32330 }, { "epoch": 0.5098694582834079, "grad_norm": 82.92832226439965, "learning_rate": 9.845660686138799e-06, "loss": 22.4841, "step": 32340 }, { "epoch": 0.5100271173614177, "grad_norm": 86.88920898704035, "learning_rate": 9.840658207841073e-06, "loss": 21.6369, "step": 32350 }, { "epoch": 0.5101847764394274, "grad_norm": 83.97974004890321, "learning_rate": 9.835655769428099e-06, "loss": 23.1122, "step": 32360 }, { "epoch": 0.5103424355174371, "grad_norm": 90.73736051002419, "learning_rate": 9.830653372152047e-06, "loss": 23.0965, "step": 32370 }, { "epoch": 0.5105000945954468, "grad_norm": 82.3553704745293, "learning_rate": 9.825651017265055e-06, "loss": 22.2035, "step": 32380 }, { "epoch": 0.5106577536734566, "grad_norm": 82.5799856304863, "learning_rate": 9.820648706019265e-06, "loss": 22.879, "step": 32390 }, { "epoch": 0.5108154127514662, "grad_norm": 80.32670396625551, "learning_rate": 9.815646439666799e-06, "loss": 22.7244, "step": 32400 }, { "epoch": 0.5109730718294759, "grad_norm": 80.21461635648187, "learning_rate": 9.81064421945978e-06, "loss": 22.1952, "step": 32410 }, { "epoch": 0.5111307309074856, "grad_norm": 104.1355329870796, "learning_rate": 9.805642046650299e-06, "loss": 23.7502, "step": 32420 }, { "epoch": 0.5112883899854954, "grad_norm": 84.54012548360487, "learning_rate": 9.800639922490456e-06, "loss": 23.1448, "step": 32430 }, { "epoch": 0.5114460490635051, "grad_norm": 87.14920209058816, "learning_rate": 9.795637848232328e-06, "loss": 23.3194, "step": 32440 }, { "epoch": 0.5116037081415148, "grad_norm": 87.37600128590289, "learning_rate": 9.790635825127979e-06, "loss": 23.0056, "step": 32450 }, { "epoch": 0.5117613672195245, "grad_norm": 85.01109060765312, "learning_rate": 9.785633854429468e-06, "loss": 22.5593, "step": 32460 }, { "epoch": 0.5119190262975342, "grad_norm": 90.7718788547782, "learning_rate": 9.780631937388827e-06, "loss": 22.088, "step": 32470 }, { "epoch": 0.512076685375544, "grad_norm": 88.23007631238433, "learning_rate": 9.775630075258092e-06, "loss": 22.1483, "step": 32480 }, { "epoch": 0.5122343444535536, "grad_norm": 85.11582099778236, "learning_rate": 9.77062826928927e-06, "loss": 22.3152, "step": 32490 }, { "epoch": 0.5123920035315633, "grad_norm": 89.6667300242049, "learning_rate": 9.765626520734366e-06, "loss": 22.2537, "step": 32500 }, { "epoch": 0.512549662609573, "grad_norm": 80.00247925851914, "learning_rate": 9.760624830845358e-06, "loss": 22.0442, "step": 32510 }, { "epoch": 0.5127073216875828, "grad_norm": 85.08564508831056, "learning_rate": 9.755623200874227e-06, "loss": 22.5966, "step": 32520 }, { "epoch": 0.5128649807655925, "grad_norm": 83.73104690058199, "learning_rate": 9.750621632072916e-06, "loss": 22.7846, "step": 32530 }, { "epoch": 0.5130226398436022, "grad_norm": 81.54751742985405, "learning_rate": 9.745620125693374e-06, "loss": 22.1925, "step": 32540 }, { "epoch": 0.5131802989216119, "grad_norm": 87.72365619279037, "learning_rate": 9.740618682987518e-06, "loss": 22.5355, "step": 32550 }, { "epoch": 0.5133379579996216, "grad_norm": 83.18384305229459, "learning_rate": 9.735617305207267e-06, "loss": 21.7171, "step": 32560 }, { "epoch": 0.5134956170776314, "grad_norm": 86.80694699570799, "learning_rate": 9.730615993604503e-06, "loss": 22.22, "step": 32570 }, { "epoch": 0.513653276155641, "grad_norm": 88.56282269353727, "learning_rate": 9.725614749431105e-06, "loss": 22.7044, "step": 32580 }, { "epoch": 0.5138109352336507, "grad_norm": 87.62136947177477, "learning_rate": 9.720613573938936e-06, "loss": 23.0559, "step": 32590 }, { "epoch": 0.5139685943116604, "grad_norm": 89.25796676961694, "learning_rate": 9.715612468379828e-06, "loss": 22.9319, "step": 32600 }, { "epoch": 0.5141262533896702, "grad_norm": 78.32112290274556, "learning_rate": 9.710611434005616e-06, "loss": 21.6401, "step": 32610 }, { "epoch": 0.5142839124676799, "grad_norm": 84.15656669014555, "learning_rate": 9.705610472068097e-06, "loss": 22.5579, "step": 32620 }, { "epoch": 0.5144415715456896, "grad_norm": 83.10405042243434, "learning_rate": 9.700609583819064e-06, "loss": 22.7759, "step": 32630 }, { "epoch": 0.5145992306236993, "grad_norm": 80.84169374770214, "learning_rate": 9.695608770510286e-06, "loss": 22.5898, "step": 32640 }, { "epoch": 0.5147568897017091, "grad_norm": 81.51080597292348, "learning_rate": 9.690608033393517e-06, "loss": 22.3408, "step": 32650 }, { "epoch": 0.5149145487797188, "grad_norm": 88.01184753765101, "learning_rate": 9.68560737372048e-06, "loss": 22.3256, "step": 32660 }, { "epoch": 0.5150722078577284, "grad_norm": 407.412876866794, "learning_rate": 9.680606792742899e-06, "loss": 23.3544, "step": 32670 }, { "epoch": 0.5152298669357381, "grad_norm": 88.58767371434426, "learning_rate": 9.675606291712456e-06, "loss": 22.7073, "step": 32680 }, { "epoch": 0.5153875260137478, "grad_norm": 86.57071978815034, "learning_rate": 9.67060587188083e-06, "loss": 22.8923, "step": 32690 }, { "epoch": 0.5155451850917576, "grad_norm": 85.73517595039992, "learning_rate": 9.665605534499675e-06, "loss": 22.2513, "step": 32700 }, { "epoch": 0.5157028441697673, "grad_norm": 84.36535975561507, "learning_rate": 9.660605280820615e-06, "loss": 22.0165, "step": 32710 }, { "epoch": 0.515860503247777, "grad_norm": 87.27238506387303, "learning_rate": 9.655605112095268e-06, "loss": 22.8195, "step": 32720 }, { "epoch": 0.5160181623257867, "grad_norm": 84.57079807512166, "learning_rate": 9.65060502957522e-06, "loss": 22.3938, "step": 32730 }, { "epoch": 0.5161758214037965, "grad_norm": 89.34016910965076, "learning_rate": 9.64560503451204e-06, "loss": 22.5986, "step": 32740 }, { "epoch": 0.5163334804818062, "grad_norm": 83.33347763914178, "learning_rate": 9.640605128157273e-06, "loss": 22.1919, "step": 32750 }, { "epoch": 0.5164911395598158, "grad_norm": 90.42440639277406, "learning_rate": 9.635605311762448e-06, "loss": 22.2382, "step": 32760 }, { "epoch": 0.5166487986378255, "grad_norm": 86.45375560778955, "learning_rate": 9.630605586579058e-06, "loss": 22.0911, "step": 32770 }, { "epoch": 0.5168064577158353, "grad_norm": 90.1948680178134, "learning_rate": 9.625605953858588e-06, "loss": 22.7057, "step": 32780 }, { "epoch": 0.516964116793845, "grad_norm": 80.50868890487777, "learning_rate": 9.620606414852489e-06, "loss": 22.2859, "step": 32790 }, { "epoch": 0.5171217758718547, "grad_norm": 86.11212475629429, "learning_rate": 9.6156069708122e-06, "loss": 22.433, "step": 32800 }, { "epoch": 0.5172794349498644, "grad_norm": 91.03553419918258, "learning_rate": 9.61060762298912e-06, "loss": 22.5157, "step": 32810 }, { "epoch": 0.5174370940278741, "grad_norm": 78.96215552692685, "learning_rate": 9.605608372634642e-06, "loss": 22.9236, "step": 32820 }, { "epoch": 0.5175947531058839, "grad_norm": 81.70901621905753, "learning_rate": 9.600609221000117e-06, "loss": 21.5292, "step": 32830 }, { "epoch": 0.5177524121838936, "grad_norm": 89.1601577060969, "learning_rate": 9.595610169336885e-06, "loss": 21.9983, "step": 32840 }, { "epoch": 0.5179100712619032, "grad_norm": 84.77283948020614, "learning_rate": 9.590611218896256e-06, "loss": 22.1208, "step": 32850 }, { "epoch": 0.5180677303399129, "grad_norm": 84.1374722858226, "learning_rate": 9.585612370929512e-06, "loss": 22.9927, "step": 32860 }, { "epoch": 0.5182253894179227, "grad_norm": 90.54864474456215, "learning_rate": 9.580613626687917e-06, "loss": 22.3202, "step": 32870 }, { "epoch": 0.5183830484959324, "grad_norm": 83.61100228569852, "learning_rate": 9.575614987422697e-06, "loss": 22.7038, "step": 32880 }, { "epoch": 0.5185407075739421, "grad_norm": 89.14175388519321, "learning_rate": 9.570616454385063e-06, "loss": 22.0211, "step": 32890 }, { "epoch": 0.5186983666519518, "grad_norm": 87.2024601079338, "learning_rate": 9.565618028826193e-06, "loss": 23.42, "step": 32900 }, { "epoch": 0.5188560257299615, "grad_norm": 89.10506271102827, "learning_rate": 9.560619711997247e-06, "loss": 22.6998, "step": 32910 }, { "epoch": 0.5190136848079713, "grad_norm": 84.51935945281161, "learning_rate": 9.55562150514934e-06, "loss": 22.6831, "step": 32920 }, { "epoch": 0.519171343885981, "grad_norm": 92.11965616902214, "learning_rate": 9.550623409533578e-06, "loss": 22.3671, "step": 32930 }, { "epoch": 0.5193290029639906, "grad_norm": 84.27247632960712, "learning_rate": 9.545625426401028e-06, "loss": 22.6395, "step": 32940 }, { "epoch": 0.5194866620420003, "grad_norm": 83.93931916814252, "learning_rate": 9.540627557002739e-06, "loss": 22.2854, "step": 32950 }, { "epoch": 0.5196443211200101, "grad_norm": 83.0684066371472, "learning_rate": 9.535629802589721e-06, "loss": 22.1799, "step": 32960 }, { "epoch": 0.5198019801980198, "grad_norm": 85.8520513872881, "learning_rate": 9.530632164412953e-06, "loss": 22.1732, "step": 32970 }, { "epoch": 0.5199596392760295, "grad_norm": 83.0491172683594, "learning_rate": 9.5256346437234e-06, "loss": 22.6279, "step": 32980 }, { "epoch": 0.5201172983540392, "grad_norm": 91.9489031932746, "learning_rate": 9.520637241771983e-06, "loss": 23.1393, "step": 32990 }, { "epoch": 0.520274957432049, "grad_norm": 94.28587199217267, "learning_rate": 9.515639959809607e-06, "loss": 23.0424, "step": 33000 }, { "epoch": 0.5204326165100587, "grad_norm": 89.475435488884, "learning_rate": 9.510642799087127e-06, "loss": 22.5889, "step": 33010 }, { "epoch": 0.5205902755880684, "grad_norm": 96.66753834046253, "learning_rate": 9.50564576085539e-06, "loss": 22.8949, "step": 33020 }, { "epoch": 0.520747934666078, "grad_norm": 92.79613617001407, "learning_rate": 9.500648846365195e-06, "loss": 22.0636, "step": 33030 }, { "epoch": 0.5209055937440877, "grad_norm": 88.24805422574568, "learning_rate": 9.495652056867322e-06, "loss": 22.2595, "step": 33040 }, { "epoch": 0.5210632528220975, "grad_norm": 84.01347272431023, "learning_rate": 9.49065539361251e-06, "loss": 22.2324, "step": 33050 }, { "epoch": 0.5212209119001072, "grad_norm": 87.31836996606968, "learning_rate": 9.485658857851479e-06, "loss": 21.4993, "step": 33060 }, { "epoch": 0.5213785709781169, "grad_norm": 88.17514528854102, "learning_rate": 9.480662450834897e-06, "loss": 22.7211, "step": 33070 }, { "epoch": 0.5215362300561266, "grad_norm": 87.23982865469806, "learning_rate": 9.475666173813424e-06, "loss": 21.7395, "step": 33080 }, { "epoch": 0.5216938891341364, "grad_norm": 97.82913112757963, "learning_rate": 9.470670028037672e-06, "loss": 22.7274, "step": 33090 }, { "epoch": 0.5218515482121461, "grad_norm": 85.35952509145349, "learning_rate": 9.465674014758216e-06, "loss": 22.1092, "step": 33100 }, { "epoch": 0.5220092072901558, "grad_norm": 86.74522377326679, "learning_rate": 9.460678135225619e-06, "loss": 21.9906, "step": 33110 }, { "epoch": 0.5221668663681654, "grad_norm": 85.63053756370395, "learning_rate": 9.455682390690382e-06, "loss": 22.4968, "step": 33120 }, { "epoch": 0.5223245254461751, "grad_norm": 92.5641399929994, "learning_rate": 9.450686782402998e-06, "loss": 22.9062, "step": 33130 }, { "epoch": 0.5224821845241849, "grad_norm": 98.26724896750412, "learning_rate": 9.445691311613908e-06, "loss": 22.3594, "step": 33140 }, { "epoch": 0.5226398436021946, "grad_norm": 86.0176828463108, "learning_rate": 9.440695979573534e-06, "loss": 22.1094, "step": 33150 }, { "epoch": 0.5227975026802043, "grad_norm": 84.74648453480444, "learning_rate": 9.435700787532246e-06, "loss": 22.0012, "step": 33160 }, { "epoch": 0.522955161758214, "grad_norm": 89.8244963175425, "learning_rate": 9.430705736740396e-06, "loss": 22.8557, "step": 33170 }, { "epoch": 0.5231128208362238, "grad_norm": 87.57906461803735, "learning_rate": 9.425710828448283e-06, "loss": 22.181, "step": 33180 }, { "epoch": 0.5232704799142335, "grad_norm": 94.16662636072675, "learning_rate": 9.42071606390619e-06, "loss": 22.427, "step": 33190 }, { "epoch": 0.5234281389922432, "grad_norm": 84.29466155238748, "learning_rate": 9.415721444364343e-06, "loss": 22.1238, "step": 33200 }, { "epoch": 0.5235857980702529, "grad_norm": 90.66709829932437, "learning_rate": 9.410726971072956e-06, "loss": 22.4943, "step": 33210 }, { "epoch": 0.5237434571482626, "grad_norm": 85.71083744378318, "learning_rate": 9.405732645282182e-06, "loss": 21.79, "step": 33220 }, { "epoch": 0.5239011162262723, "grad_norm": 84.04972105110318, "learning_rate": 9.400738468242148e-06, "loss": 22.2286, "step": 33230 }, { "epoch": 0.524058775304282, "grad_norm": 85.45942645760672, "learning_rate": 9.395744441202954e-06, "loss": 22.14, "step": 33240 }, { "epoch": 0.5242164343822917, "grad_norm": 83.96660069976899, "learning_rate": 9.39075056541464e-06, "loss": 22.758, "step": 33250 }, { "epoch": 0.5243740934603014, "grad_norm": 83.9863197947836, "learning_rate": 9.385756842127231e-06, "loss": 22.1546, "step": 33260 }, { "epoch": 0.5245317525383112, "grad_norm": 84.31296528425514, "learning_rate": 9.380763272590693e-06, "loss": 22.2944, "step": 33270 }, { "epoch": 0.5246894116163209, "grad_norm": 84.73835286906204, "learning_rate": 9.37576985805497e-06, "loss": 22.475, "step": 33280 }, { "epoch": 0.5248470706943306, "grad_norm": 88.38854231686398, "learning_rate": 9.370776599769959e-06, "loss": 22.6217, "step": 33290 }, { "epoch": 0.5250047297723403, "grad_norm": 86.1512178833529, "learning_rate": 9.365783498985523e-06, "loss": 22.165, "step": 33300 }, { "epoch": 0.52516238885035, "grad_norm": 83.05027577678959, "learning_rate": 9.360790556951477e-06, "loss": 22.1015, "step": 33310 }, { "epoch": 0.5253200479283597, "grad_norm": 83.39077404482285, "learning_rate": 9.35579777491761e-06, "loss": 22.6273, "step": 33320 }, { "epoch": 0.5254777070063694, "grad_norm": 91.81711209252525, "learning_rate": 9.350805154133651e-06, "loss": 21.9569, "step": 33330 }, { "epoch": 0.5256353660843791, "grad_norm": 91.51625978152943, "learning_rate": 9.345812695849308e-06, "loss": 22.553, "step": 33340 }, { "epoch": 0.5257930251623889, "grad_norm": 92.86168045037836, "learning_rate": 9.340820401314243e-06, "loss": 21.9599, "step": 33350 }, { "epoch": 0.5259506842403986, "grad_norm": 84.01126151057771, "learning_rate": 9.335828271778066e-06, "loss": 23.1253, "step": 33360 }, { "epoch": 0.5261083433184083, "grad_norm": 89.5174643004809, "learning_rate": 9.330836308490359e-06, "loss": 21.6757, "step": 33370 }, { "epoch": 0.526266002396418, "grad_norm": 94.53168508197155, "learning_rate": 9.325844512700657e-06, "loss": 21.9337, "step": 33380 }, { "epoch": 0.5264236614744277, "grad_norm": 89.1354967322699, "learning_rate": 9.320852885658457e-06, "loss": 21.8094, "step": 33390 }, { "epoch": 0.5265813205524374, "grad_norm": 82.09355084614616, "learning_rate": 9.315861428613205e-06, "loss": 22.0779, "step": 33400 }, { "epoch": 0.5267389796304471, "grad_norm": 85.46405109370968, "learning_rate": 9.310870142814316e-06, "loss": 22.1532, "step": 33410 }, { "epoch": 0.5268966387084568, "grad_norm": 86.44604858865854, "learning_rate": 9.30587902951115e-06, "loss": 22.1679, "step": 33420 }, { "epoch": 0.5270542977864665, "grad_norm": 83.1665394598187, "learning_rate": 9.300888089953034e-06, "loss": 21.6325, "step": 33430 }, { "epoch": 0.5272119568644763, "grad_norm": 89.92853986137614, "learning_rate": 9.295897325389242e-06, "loss": 22.7656, "step": 33440 }, { "epoch": 0.527369615942486, "grad_norm": 83.68551837123643, "learning_rate": 9.290906737069018e-06, "loss": 22.1763, "step": 33450 }, { "epoch": 0.5275272750204957, "grad_norm": 81.85904926135726, "learning_rate": 9.285916326241547e-06, "loss": 21.9799, "step": 33460 }, { "epoch": 0.5276849340985054, "grad_norm": 86.14479966395207, "learning_rate": 9.280926094155981e-06, "loss": 22.0071, "step": 33470 }, { "epoch": 0.527842593176515, "grad_norm": 90.28918596864611, "learning_rate": 9.275936042061417e-06, "loss": 22.3785, "step": 33480 }, { "epoch": 0.5280002522545248, "grad_norm": 84.77756290138717, "learning_rate": 9.270946171206914e-06, "loss": 21.8504, "step": 33490 }, { "epoch": 0.5281579113325345, "grad_norm": 85.91601757577877, "learning_rate": 9.265956482841488e-06, "loss": 21.9775, "step": 33500 }, { "epoch": 0.5283155704105442, "grad_norm": 82.03293919265235, "learning_rate": 9.260966978214097e-06, "loss": 22.0417, "step": 33510 }, { "epoch": 0.5284732294885539, "grad_norm": 82.95378303620585, "learning_rate": 9.255977658573671e-06, "loss": 22.7583, "step": 33520 }, { "epoch": 0.5286308885665637, "grad_norm": 81.3674567827811, "learning_rate": 9.250988525169076e-06, "loss": 22.1304, "step": 33530 }, { "epoch": 0.5287885476445734, "grad_norm": 83.84872246332469, "learning_rate": 9.245999579249142e-06, "loss": 22.1231, "step": 33540 }, { "epoch": 0.5289462067225831, "grad_norm": 89.2501814492576, "learning_rate": 9.24101082206265e-06, "loss": 22.1691, "step": 33550 }, { "epoch": 0.5291038658005928, "grad_norm": 81.84070887141489, "learning_rate": 9.236022254858337e-06, "loss": 22.3556, "step": 33560 }, { "epoch": 0.5292615248786026, "grad_norm": 81.17580528224602, "learning_rate": 9.231033878884878e-06, "loss": 21.351, "step": 33570 }, { "epoch": 0.5294191839566122, "grad_norm": 91.68568260091061, "learning_rate": 9.22604569539092e-06, "loss": 23.4725, "step": 33580 }, { "epoch": 0.5295768430346219, "grad_norm": 81.11475865637541, "learning_rate": 9.221057705625048e-06, "loss": 22.0965, "step": 33590 }, { "epoch": 0.5297345021126316, "grad_norm": 84.52725519290075, "learning_rate": 9.216069910835809e-06, "loss": 21.8402, "step": 33600 }, { "epoch": 0.5298921611906413, "grad_norm": 89.0043322137629, "learning_rate": 9.211082312271688e-06, "loss": 22.9509, "step": 33610 }, { "epoch": 0.5300498202686511, "grad_norm": 79.56732863179933, "learning_rate": 9.206094911181134e-06, "loss": 22.7112, "step": 33620 }, { "epoch": 0.5302074793466608, "grad_norm": 89.55249733096291, "learning_rate": 9.201107708812535e-06, "loss": 21.9157, "step": 33630 }, { "epoch": 0.5303651384246705, "grad_norm": 88.11273520630068, "learning_rate": 9.196120706414238e-06, "loss": 21.8177, "step": 33640 }, { "epoch": 0.5305227975026802, "grad_norm": 86.02970562966966, "learning_rate": 9.191133905234539e-06, "loss": 22.2255, "step": 33650 }, { "epoch": 0.53068045658069, "grad_norm": 82.61267341484985, "learning_rate": 9.186147306521676e-06, "loss": 22.0441, "step": 33660 }, { "epoch": 0.5308381156586996, "grad_norm": 87.85580805270442, "learning_rate": 9.181160911523847e-06, "loss": 21.6351, "step": 33670 }, { "epoch": 0.5309957747367093, "grad_norm": 82.91238712070323, "learning_rate": 9.17617472148919e-06, "loss": 22.6094, "step": 33680 }, { "epoch": 0.531153433814719, "grad_norm": 83.96651792374224, "learning_rate": 9.1711887376658e-06, "loss": 22.0367, "step": 33690 }, { "epoch": 0.5313110928927287, "grad_norm": 86.865356921606, "learning_rate": 9.16620296130171e-06, "loss": 21.7637, "step": 33700 }, { "epoch": 0.5314687519707385, "grad_norm": 88.17857300911781, "learning_rate": 9.161217393644914e-06, "loss": 22.5042, "step": 33710 }, { "epoch": 0.5316264110487482, "grad_norm": 87.61996385423278, "learning_rate": 9.15623203594334e-06, "loss": 22.0578, "step": 33720 }, { "epoch": 0.5317840701267579, "grad_norm": 84.60147915249652, "learning_rate": 9.151246889444875e-06, "loss": 21.705, "step": 33730 }, { "epoch": 0.5319417292047676, "grad_norm": 88.55008749732349, "learning_rate": 9.146261955397346e-06, "loss": 22.4792, "step": 33740 }, { "epoch": 0.5320993882827774, "grad_norm": 95.75972124771606, "learning_rate": 9.141277235048533e-06, "loss": 21.6323, "step": 33750 }, { "epoch": 0.532257047360787, "grad_norm": 85.41171816863795, "learning_rate": 9.136292729646157e-06, "loss": 21.628, "step": 33760 }, { "epoch": 0.5324147064387967, "grad_norm": 85.64132972974741, "learning_rate": 9.13130844043788e-06, "loss": 22.0368, "step": 33770 }, { "epoch": 0.5325723655168064, "grad_norm": 89.46403164714683, "learning_rate": 9.126324368671324e-06, "loss": 22.0418, "step": 33780 }, { "epoch": 0.5327300245948162, "grad_norm": 91.34997763629218, "learning_rate": 9.121340515594045e-06, "loss": 22.2726, "step": 33790 }, { "epoch": 0.5328876836728259, "grad_norm": 89.04811199162408, "learning_rate": 9.116356882453559e-06, "loss": 21.5564, "step": 33800 }, { "epoch": 0.5330453427508356, "grad_norm": 91.28346777339061, "learning_rate": 9.111373470497302e-06, "loss": 21.7892, "step": 33810 }, { "epoch": 0.5332030018288453, "grad_norm": 84.2596382787007, "learning_rate": 9.10639028097268e-06, "loss": 21.4462, "step": 33820 }, { "epoch": 0.533360660906855, "grad_norm": 82.54974543507639, "learning_rate": 9.101407315127024e-06, "loss": 22.2862, "step": 33830 }, { "epoch": 0.5335183199848648, "grad_norm": 85.06771877417626, "learning_rate": 9.096424574207625e-06, "loss": 22.6006, "step": 33840 }, { "epoch": 0.5336759790628745, "grad_norm": 98.78745059094067, "learning_rate": 9.091442059461705e-06, "loss": 22.8282, "step": 33850 }, { "epoch": 0.5338336381408841, "grad_norm": 84.18292395346509, "learning_rate": 9.08645977213644e-06, "loss": 22.5177, "step": 33860 }, { "epoch": 0.5339912972188938, "grad_norm": 92.883578922575, "learning_rate": 9.081477713478936e-06, "loss": 22.3224, "step": 33870 }, { "epoch": 0.5341489562969036, "grad_norm": 84.27344521008487, "learning_rate": 9.076495884736259e-06, "loss": 21.7751, "step": 33880 }, { "epoch": 0.5343066153749133, "grad_norm": 84.030574337322, "learning_rate": 9.071514287155402e-06, "loss": 21.6733, "step": 33890 }, { "epoch": 0.534464274452923, "grad_norm": 82.0981451780402, "learning_rate": 9.066532921983303e-06, "loss": 22.0022, "step": 33900 }, { "epoch": 0.5346219335309327, "grad_norm": 84.58546415870416, "learning_rate": 9.061551790466853e-06, "loss": 20.9933, "step": 33910 }, { "epoch": 0.5347795926089425, "grad_norm": 83.66167122805881, "learning_rate": 9.056570893852868e-06, "loss": 21.7499, "step": 33920 }, { "epoch": 0.5349372516869522, "grad_norm": 85.91673341461234, "learning_rate": 9.051590233388118e-06, "loss": 22.0947, "step": 33930 }, { "epoch": 0.5350949107649619, "grad_norm": 85.9082019167498, "learning_rate": 9.04660981031931e-06, "loss": 22.1601, "step": 33940 }, { "epoch": 0.5352525698429715, "grad_norm": 84.53620898561955, "learning_rate": 9.041629625893093e-06, "loss": 21.5591, "step": 33950 }, { "epoch": 0.5354102289209812, "grad_norm": 86.10845348693357, "learning_rate": 9.036649681356047e-06, "loss": 21.5393, "step": 33960 }, { "epoch": 0.535567887998991, "grad_norm": 88.20953570847118, "learning_rate": 9.031669977954707e-06, "loss": 22.0757, "step": 33970 }, { "epoch": 0.5357255470770007, "grad_norm": 80.83143926890085, "learning_rate": 9.026690516935533e-06, "loss": 21.7832, "step": 33980 }, { "epoch": 0.5358832061550104, "grad_norm": 84.70691129940295, "learning_rate": 9.021711299544937e-06, "loss": 22.0165, "step": 33990 }, { "epoch": 0.5360408652330201, "grad_norm": 85.06759147585043, "learning_rate": 9.016732327029259e-06, "loss": 22.1272, "step": 34000 }, { "epoch": 0.5361985243110299, "grad_norm": 90.24049603631838, "learning_rate": 9.011753600634792e-06, "loss": 21.5825, "step": 34010 }, { "epoch": 0.5363561833890396, "grad_norm": 85.89596698944793, "learning_rate": 9.006775121607748e-06, "loss": 21.9695, "step": 34020 }, { "epoch": 0.5365138424670493, "grad_norm": 93.27373243386519, "learning_rate": 9.00179689119429e-06, "loss": 22.0306, "step": 34030 }, { "epoch": 0.5366715015450589, "grad_norm": 108.93274554234942, "learning_rate": 8.996818910640523e-06, "loss": 21.5634, "step": 34040 }, { "epoch": 0.5368291606230686, "grad_norm": 80.31184757321941, "learning_rate": 8.991841181192472e-06, "loss": 21.4583, "step": 34050 }, { "epoch": 0.5369868197010784, "grad_norm": 89.21706823915522, "learning_rate": 8.986863704096121e-06, "loss": 22.6187, "step": 34060 }, { "epoch": 0.5371444787790881, "grad_norm": 81.94205173227552, "learning_rate": 8.981886480597368e-06, "loss": 21.7751, "step": 34070 }, { "epoch": 0.5373021378570978, "grad_norm": 84.73880534376633, "learning_rate": 8.97690951194207e-06, "loss": 21.6451, "step": 34080 }, { "epoch": 0.5374597969351075, "grad_norm": 83.63511987806879, "learning_rate": 8.971932799376e-06, "loss": 22.5689, "step": 34090 }, { "epoch": 0.5376174560131173, "grad_norm": 86.02622843425407, "learning_rate": 8.966956344144884e-06, "loss": 21.8587, "step": 34100 }, { "epoch": 0.537775115091127, "grad_norm": 89.10765757747528, "learning_rate": 8.96198014749437e-06, "loss": 21.3675, "step": 34110 }, { "epoch": 0.5379327741691367, "grad_norm": 87.80628164848612, "learning_rate": 8.957004210670054e-06, "loss": 22.5569, "step": 34120 }, { "epoch": 0.5380904332471463, "grad_norm": 83.60300396731174, "learning_rate": 8.95202853491745e-06, "loss": 22.0598, "step": 34130 }, { "epoch": 0.5382480923251561, "grad_norm": 90.75946632186977, "learning_rate": 8.947053121482027e-06, "loss": 22.3711, "step": 34140 }, { "epoch": 0.5384057514031658, "grad_norm": 87.89189506901204, "learning_rate": 8.942077971609174e-06, "loss": 21.6612, "step": 34150 }, { "epoch": 0.5385634104811755, "grad_norm": 82.50200883607279, "learning_rate": 8.937103086544214e-06, "loss": 21.7929, "step": 34160 }, { "epoch": 0.5387210695591852, "grad_norm": 83.31886515116447, "learning_rate": 8.932128467532412e-06, "loss": 21.6249, "step": 34170 }, { "epoch": 0.5388787286371949, "grad_norm": 83.90005849758532, "learning_rate": 8.927154115818963e-06, "loss": 22.2543, "step": 34180 }, { "epoch": 0.5390363877152047, "grad_norm": 85.18046311113287, "learning_rate": 8.922180032648993e-06, "loss": 21.7084, "step": 34190 }, { "epoch": 0.5391940467932144, "grad_norm": 76.52400916310661, "learning_rate": 8.91720621926756e-06, "loss": 21.5597, "step": 34200 }, { "epoch": 0.539351705871224, "grad_norm": 92.61962424612364, "learning_rate": 8.912232676919663e-06, "loss": 22.7584, "step": 34210 }, { "epoch": 0.5395093649492337, "grad_norm": 82.10759300491368, "learning_rate": 8.907259406850218e-06, "loss": 21.4452, "step": 34220 }, { "epoch": 0.5396670240272435, "grad_norm": 84.22878049561906, "learning_rate": 8.902286410304087e-06, "loss": 21.7592, "step": 34230 }, { "epoch": 0.5398246831052532, "grad_norm": 91.12524178780258, "learning_rate": 8.897313688526057e-06, "loss": 21.5872, "step": 34240 }, { "epoch": 0.5399823421832629, "grad_norm": 86.36722104641424, "learning_rate": 8.89234124276085e-06, "loss": 22.3513, "step": 34250 }, { "epoch": 0.5401400012612726, "grad_norm": 94.87152142562209, "learning_rate": 8.887369074253111e-06, "loss": 21.5157, "step": 34260 }, { "epoch": 0.5402976603392823, "grad_norm": 83.02107824774133, "learning_rate": 8.882397184247425e-06, "loss": 22.0828, "step": 34270 }, { "epoch": 0.5404553194172921, "grad_norm": 88.72690282610272, "learning_rate": 8.877425573988301e-06, "loss": 21.8048, "step": 34280 }, { "epoch": 0.5406129784953018, "grad_norm": 83.79458852406566, "learning_rate": 8.87245424472018e-06, "loss": 21.7538, "step": 34290 }, { "epoch": 0.5407706375733115, "grad_norm": 89.1407935786487, "learning_rate": 8.867483197687435e-06, "loss": 21.7257, "step": 34300 }, { "epoch": 0.5409282966513211, "grad_norm": 88.17227669165698, "learning_rate": 8.862512434134363e-06, "loss": 22.3413, "step": 34310 }, { "epoch": 0.5410859557293309, "grad_norm": 85.94720052675068, "learning_rate": 8.857541955305196e-06, "loss": 21.5486, "step": 34320 }, { "epoch": 0.5412436148073406, "grad_norm": 88.48382162329875, "learning_rate": 8.852571762444088e-06, "loss": 22.4526, "step": 34330 }, { "epoch": 0.5414012738853503, "grad_norm": 89.61827912489984, "learning_rate": 8.847601856795132e-06, "loss": 21.8274, "step": 34340 }, { "epoch": 0.54155893296336, "grad_norm": 83.38488693721372, "learning_rate": 8.842632239602334e-06, "loss": 21.4172, "step": 34350 }, { "epoch": 0.5417165920413698, "grad_norm": 82.04955717110116, "learning_rate": 8.837662912109644e-06, "loss": 21.1161, "step": 34360 }, { "epoch": 0.5418742511193795, "grad_norm": 89.52045474752912, "learning_rate": 8.832693875560926e-06, "loss": 21.6835, "step": 34370 }, { "epoch": 0.5420319101973892, "grad_norm": 84.24311237920746, "learning_rate": 8.82772513119998e-06, "loss": 21.9772, "step": 34380 }, { "epoch": 0.5421895692753989, "grad_norm": 80.95215769333014, "learning_rate": 8.822756680270528e-06, "loss": 21.3766, "step": 34390 }, { "epoch": 0.5423472283534085, "grad_norm": 82.38486015446293, "learning_rate": 8.817788524016224e-06, "loss": 21.2912, "step": 34400 }, { "epoch": 0.5425048874314183, "grad_norm": 83.45749190659333, "learning_rate": 8.812820663680642e-06, "loss": 22.1274, "step": 34410 }, { "epoch": 0.542662546509428, "grad_norm": 102.20797484128481, "learning_rate": 8.80785310050728e-06, "loss": 22.1224, "step": 34420 }, { "epoch": 0.5428202055874377, "grad_norm": 85.55087051537446, "learning_rate": 8.802885835739575e-06, "loss": 21.9628, "step": 34430 }, { "epoch": 0.5429778646654474, "grad_norm": 79.73419128995097, "learning_rate": 8.79791887062087e-06, "loss": 21.0198, "step": 34440 }, { "epoch": 0.5431355237434572, "grad_norm": 81.24446826989943, "learning_rate": 8.792952206394456e-06, "loss": 20.9654, "step": 34450 }, { "epoch": 0.5432931828214669, "grad_norm": 85.31223458320869, "learning_rate": 8.787985844303523e-06, "loss": 21.5747, "step": 34460 }, { "epoch": 0.5434508418994766, "grad_norm": 80.39215093013271, "learning_rate": 8.78301978559121e-06, "loss": 21.8859, "step": 34470 }, { "epoch": 0.5436085009774863, "grad_norm": 85.10652091365652, "learning_rate": 8.778054031500556e-06, "loss": 21.0391, "step": 34480 }, { "epoch": 0.543766160055496, "grad_norm": 85.22577503793781, "learning_rate": 8.773088583274548e-06, "loss": 21.8081, "step": 34490 }, { "epoch": 0.5439238191335057, "grad_norm": 88.60628883961841, "learning_rate": 8.768123442156079e-06, "loss": 22.1524, "step": 34500 }, { "epoch": 0.5440814782115154, "grad_norm": 85.37920105492336, "learning_rate": 8.763158609387974e-06, "loss": 21.7193, "step": 34510 }, { "epoch": 0.5442391372895251, "grad_norm": 84.91955675094485, "learning_rate": 8.75819408621297e-06, "loss": 22.0054, "step": 34520 }, { "epoch": 0.5443967963675348, "grad_norm": 81.7887008240577, "learning_rate": 8.753229873873745e-06, "loss": 21.3586, "step": 34530 }, { "epoch": 0.5445544554455446, "grad_norm": 82.72416367381611, "learning_rate": 8.748265973612883e-06, "loss": 21.5389, "step": 34540 }, { "epoch": 0.5447121145235543, "grad_norm": 84.33894344421253, "learning_rate": 8.74330238667289e-06, "loss": 21.5665, "step": 34550 }, { "epoch": 0.544869773601564, "grad_norm": 87.53732239170338, "learning_rate": 8.738339114296211e-06, "loss": 21.8326, "step": 34560 }, { "epoch": 0.5450274326795737, "grad_norm": 87.23880240928196, "learning_rate": 8.733376157725186e-06, "loss": 21.896, "step": 34570 }, { "epoch": 0.5451850917575835, "grad_norm": 84.33418010055252, "learning_rate": 8.7284135182021e-06, "loss": 21.7405, "step": 34580 }, { "epoch": 0.5453427508355931, "grad_norm": 84.20740306191857, "learning_rate": 8.723451196969145e-06, "loss": 21.763, "step": 34590 }, { "epoch": 0.5455004099136028, "grad_norm": 86.967786524169, "learning_rate": 8.71848919526844e-06, "loss": 21.6264, "step": 34600 }, { "epoch": 0.5456580689916125, "grad_norm": 84.0050916113328, "learning_rate": 8.713527514342013e-06, "loss": 21.9217, "step": 34610 }, { "epoch": 0.5458157280696222, "grad_norm": 88.23746144985043, "learning_rate": 8.708566155431832e-06, "loss": 21.9708, "step": 34620 }, { "epoch": 0.545973387147632, "grad_norm": 85.7196229807868, "learning_rate": 8.70360511977976e-06, "loss": 21.7938, "step": 34630 }, { "epoch": 0.5461310462256417, "grad_norm": 84.633115225495, "learning_rate": 8.6986444086276e-06, "loss": 20.8743, "step": 34640 }, { "epoch": 0.5462887053036514, "grad_norm": 83.764573476364, "learning_rate": 8.693684023217059e-06, "loss": 20.7905, "step": 34650 }, { "epoch": 0.546446364381661, "grad_norm": 83.09549904876035, "learning_rate": 8.688723964789773e-06, "loss": 21.607, "step": 34660 }, { "epoch": 0.5466040234596709, "grad_norm": 81.40921904800123, "learning_rate": 8.683764234587289e-06, "loss": 21.2954, "step": 34670 }, { "epoch": 0.5467616825376805, "grad_norm": 87.53653441929526, "learning_rate": 8.678804833851073e-06, "loss": 21.7246, "step": 34680 }, { "epoch": 0.5469193416156902, "grad_norm": 81.0643718293042, "learning_rate": 8.673845763822516e-06, "loss": 21.4268, "step": 34690 }, { "epoch": 0.5470770006936999, "grad_norm": 83.14406150596152, "learning_rate": 8.668887025742912e-06, "loss": 21.3231, "step": 34700 }, { "epoch": 0.5472346597717097, "grad_norm": 78.38139221616714, "learning_rate": 8.663928620853488e-06, "loss": 22.141, "step": 34710 }, { "epoch": 0.5473923188497194, "grad_norm": 83.300650677365, "learning_rate": 8.658970550395371e-06, "loss": 22.028, "step": 34720 }, { "epoch": 0.5475499779277291, "grad_norm": 82.77105540172367, "learning_rate": 8.654012815609619e-06, "loss": 21.1501, "step": 34730 }, { "epoch": 0.5477076370057388, "grad_norm": 79.75108651196476, "learning_rate": 8.649055417737198e-06, "loss": 21.3093, "step": 34740 }, { "epoch": 0.5478652960837485, "grad_norm": 84.97688223133275, "learning_rate": 8.644098358018996e-06, "loss": 21.229, "step": 34750 }, { "epoch": 0.5480229551617583, "grad_norm": 88.4545416856905, "learning_rate": 8.639141637695806e-06, "loss": 22.0806, "step": 34760 }, { "epoch": 0.5481806142397679, "grad_norm": 86.88008589536291, "learning_rate": 8.634185258008345e-06, "loss": 21.5106, "step": 34770 }, { "epoch": 0.5483382733177776, "grad_norm": 80.84708778396973, "learning_rate": 8.629229220197238e-06, "loss": 21.9412, "step": 34780 }, { "epoch": 0.5484959323957873, "grad_norm": 87.04453733285972, "learning_rate": 8.624273525503034e-06, "loss": 22.2505, "step": 34790 }, { "epoch": 0.5486535914737971, "grad_norm": 79.45703703679006, "learning_rate": 8.619318175166185e-06, "loss": 20.8141, "step": 34800 }, { "epoch": 0.5488112505518068, "grad_norm": 84.24353398972822, "learning_rate": 8.614363170427068e-06, "loss": 21.6226, "step": 34810 }, { "epoch": 0.5489689096298165, "grad_norm": 86.1618433356885, "learning_rate": 8.609408512525963e-06, "loss": 21.5478, "step": 34820 }, { "epoch": 0.5491265687078262, "grad_norm": 81.29785459978844, "learning_rate": 8.604454202703068e-06, "loss": 21.392, "step": 34830 }, { "epoch": 0.5492842277858359, "grad_norm": 80.5287776731951, "learning_rate": 8.599500242198495e-06, "loss": 21.7448, "step": 34840 }, { "epoch": 0.5494418868638457, "grad_norm": 80.90685608941293, "learning_rate": 8.594546632252264e-06, "loss": 21.7498, "step": 34850 }, { "epoch": 0.5495995459418553, "grad_norm": 86.55139401743867, "learning_rate": 8.58959337410432e-06, "loss": 21.1803, "step": 34860 }, { "epoch": 0.549757205019865, "grad_norm": 80.82673188611511, "learning_rate": 8.584640468994496e-06, "loss": 21.7492, "step": 34870 }, { "epoch": 0.5499148640978747, "grad_norm": 82.28744998774518, "learning_rate": 8.579687918162562e-06, "loss": 21.5112, "step": 34880 }, { "epoch": 0.5500725231758845, "grad_norm": 87.21292240172906, "learning_rate": 8.574735722848183e-06, "loss": 22.0181, "step": 34890 }, { "epoch": 0.5502301822538942, "grad_norm": 89.28290555149697, "learning_rate": 8.569783884290946e-06, "loss": 22.0633, "step": 34900 }, { "epoch": 0.5503878413319039, "grad_norm": 91.21323061225813, "learning_rate": 8.564832403730337e-06, "loss": 21.3694, "step": 34910 }, { "epoch": 0.5505455004099136, "grad_norm": 88.01777505269148, "learning_rate": 8.559881282405762e-06, "loss": 20.9705, "step": 34920 }, { "epoch": 0.5507031594879234, "grad_norm": 90.5640314828772, "learning_rate": 8.55493052155653e-06, "loss": 21.6109, "step": 34930 }, { "epoch": 0.550860818565933, "grad_norm": 88.5548879365263, "learning_rate": 8.549980122421868e-06, "loss": 21.3665, "step": 34940 }, { "epoch": 0.5510184776439427, "grad_norm": 83.99500018146954, "learning_rate": 8.545030086240906e-06, "loss": 22.1585, "step": 34950 }, { "epoch": 0.5511761367219524, "grad_norm": 85.58954137219645, "learning_rate": 8.540080414252681e-06, "loss": 21.174, "step": 34960 }, { "epoch": 0.5513337957999621, "grad_norm": 80.16488566798304, "learning_rate": 8.53513110769615e-06, "loss": 20.8985, "step": 34970 }, { "epoch": 0.5514914548779719, "grad_norm": 88.03343629906652, "learning_rate": 8.530182167810164e-06, "loss": 21.9477, "step": 34980 }, { "epoch": 0.5516491139559816, "grad_norm": 86.6380765999656, "learning_rate": 8.525233595833495e-06, "loss": 21.8327, "step": 34990 }, { "epoch": 0.5518067730339913, "grad_norm": 83.76087601328409, "learning_rate": 8.520285393004813e-06, "loss": 21.7682, "step": 35000 }, { "epoch": 0.551964432112001, "grad_norm": 85.41118549894377, "learning_rate": 8.515337560562704e-06, "loss": 21.6176, "step": 35010 }, { "epoch": 0.5521220911900108, "grad_norm": 79.54662175004586, "learning_rate": 8.510390099745653e-06, "loss": 21.129, "step": 35020 }, { "epoch": 0.5522797502680205, "grad_norm": 96.50232749691017, "learning_rate": 8.505443011792062e-06, "loss": 21.7706, "step": 35030 }, { "epoch": 0.5524374093460301, "grad_norm": 84.61418882711276, "learning_rate": 8.500496297940228e-06, "loss": 21.3873, "step": 35040 }, { "epoch": 0.5525950684240398, "grad_norm": 94.69019437018119, "learning_rate": 8.495549959428366e-06, "loss": 21.9705, "step": 35050 }, { "epoch": 0.5527527275020496, "grad_norm": 78.38433702799952, "learning_rate": 8.490603997494584e-06, "loss": 21.651, "step": 35060 }, { "epoch": 0.5529103865800593, "grad_norm": 81.2866809240663, "learning_rate": 8.485658413376914e-06, "loss": 21.5884, "step": 35070 }, { "epoch": 0.553068045658069, "grad_norm": 86.59252444781329, "learning_rate": 8.480713208313271e-06, "loss": 21.2993, "step": 35080 }, { "epoch": 0.5532257047360787, "grad_norm": 84.65042298352327, "learning_rate": 8.475768383541492e-06, "loss": 21.369, "step": 35090 }, { "epoch": 0.5533833638140884, "grad_norm": 86.4813708689467, "learning_rate": 8.470823940299315e-06, "loss": 21.657, "step": 35100 }, { "epoch": 0.5535410228920982, "grad_norm": 82.66039322999794, "learning_rate": 8.465879879824377e-06, "loss": 21.1086, "step": 35110 }, { "epoch": 0.5536986819701079, "grad_norm": 85.15719133712477, "learning_rate": 8.460936203354229e-06, "loss": 20.9987, "step": 35120 }, { "epoch": 0.5538563410481175, "grad_norm": 85.7225992251125, "learning_rate": 8.45599291212631e-06, "loss": 21.4016, "step": 35130 }, { "epoch": 0.5540140001261272, "grad_norm": 84.38496327044585, "learning_rate": 8.451050007377982e-06, "loss": 21.7596, "step": 35140 }, { "epoch": 0.554171659204137, "grad_norm": 88.69513963567482, "learning_rate": 8.446107490346495e-06, "loss": 22.0675, "step": 35150 }, { "epoch": 0.5543293182821467, "grad_norm": 88.43302397727517, "learning_rate": 8.441165362269014e-06, "loss": 21.2898, "step": 35160 }, { "epoch": 0.5544869773601564, "grad_norm": 91.87029292724789, "learning_rate": 8.436223624382595e-06, "loss": 22.116, "step": 35170 }, { "epoch": 0.5546446364381661, "grad_norm": 78.48385803675369, "learning_rate": 8.431282277924203e-06, "loss": 21.3825, "step": 35180 }, { "epoch": 0.5548022955161758, "grad_norm": 89.44767401231579, "learning_rate": 8.426341324130703e-06, "loss": 21.5455, "step": 35190 }, { "epoch": 0.5549599545941856, "grad_norm": 87.84642611427152, "learning_rate": 8.421400764238869e-06, "loss": 21.8654, "step": 35200 }, { "epoch": 0.5551176136721953, "grad_norm": 82.55953162257443, "learning_rate": 8.416460599485363e-06, "loss": 21.2516, "step": 35210 }, { "epoch": 0.5552752727502049, "grad_norm": 84.83347286680583, "learning_rate": 8.411520831106755e-06, "loss": 21.7987, "step": 35220 }, { "epoch": 0.5554329318282146, "grad_norm": 83.00278080612489, "learning_rate": 8.406581460339516e-06, "loss": 21.8705, "step": 35230 }, { "epoch": 0.5555905909062244, "grad_norm": 82.95743300748238, "learning_rate": 8.40164248842002e-06, "loss": 21.6624, "step": 35240 }, { "epoch": 0.5557482499842341, "grad_norm": 83.60873934397637, "learning_rate": 8.39670391658454e-06, "loss": 21.277, "step": 35250 }, { "epoch": 0.5559059090622438, "grad_norm": 81.28189484661819, "learning_rate": 8.39176574606924e-06, "loss": 21.7458, "step": 35260 }, { "epoch": 0.5560635681402535, "grad_norm": 87.0371393699704, "learning_rate": 8.386827978110201e-06, "loss": 21.4333, "step": 35270 }, { "epoch": 0.5562212272182633, "grad_norm": 80.8924371538108, "learning_rate": 8.381890613943381e-06, "loss": 21.1802, "step": 35280 }, { "epoch": 0.556378886296273, "grad_norm": 81.34316563723615, "learning_rate": 8.376953654804659e-06, "loss": 21.1531, "step": 35290 }, { "epoch": 0.5565365453742827, "grad_norm": 90.0107061360151, "learning_rate": 8.372017101929795e-06, "loss": 21.475, "step": 35300 }, { "epoch": 0.5566942044522923, "grad_norm": 91.45352792577613, "learning_rate": 8.367080956554463e-06, "loss": 21.453, "step": 35310 }, { "epoch": 0.556851863530302, "grad_norm": 84.14222047689036, "learning_rate": 8.36214521991422e-06, "loss": 22.1035, "step": 35320 }, { "epoch": 0.5570095226083118, "grad_norm": 90.18981888757507, "learning_rate": 8.35720989324453e-06, "loss": 21.3112, "step": 35330 }, { "epoch": 0.5571671816863215, "grad_norm": 79.74983112617585, "learning_rate": 8.352274977780758e-06, "loss": 21.903, "step": 35340 }, { "epoch": 0.5573248407643312, "grad_norm": 93.11086134121373, "learning_rate": 8.347340474758148e-06, "loss": 21.0317, "step": 35350 }, { "epoch": 0.5574824998423409, "grad_norm": 80.15388154732334, "learning_rate": 8.342406385411864e-06, "loss": 21.2019, "step": 35360 }, { "epoch": 0.5576401589203507, "grad_norm": 86.11812721492852, "learning_rate": 8.337472710976946e-06, "loss": 21.3269, "step": 35370 }, { "epoch": 0.5577978179983604, "grad_norm": 81.58143889445986, "learning_rate": 8.332539452688345e-06, "loss": 21.3992, "step": 35380 }, { "epoch": 0.55795547707637, "grad_norm": 87.16200879910441, "learning_rate": 8.3276066117809e-06, "loss": 21.1521, "step": 35390 }, { "epoch": 0.5581131361543797, "grad_norm": 77.65487208320167, "learning_rate": 8.322674189489352e-06, "loss": 21.1072, "step": 35400 }, { "epoch": 0.5582707952323894, "grad_norm": 83.29247777260704, "learning_rate": 8.317742187048327e-06, "loss": 20.9969, "step": 35410 }, { "epoch": 0.5584284543103992, "grad_norm": 87.91398199747395, "learning_rate": 8.31281060569236e-06, "loss": 20.6174, "step": 35420 }, { "epoch": 0.5585861133884089, "grad_norm": 88.73057799451865, "learning_rate": 8.307879446655858e-06, "loss": 21.5394, "step": 35430 }, { "epoch": 0.5587437724664186, "grad_norm": 82.04917984165985, "learning_rate": 8.302948711173153e-06, "loss": 21.4155, "step": 35440 }, { "epoch": 0.5589014315444283, "grad_norm": 82.82178448483475, "learning_rate": 8.298018400478443e-06, "loss": 21.5099, "step": 35450 }, { "epoch": 0.5590590906224381, "grad_norm": 88.14243521593201, "learning_rate": 8.29308851580584e-06, "loss": 21.2311, "step": 35460 }, { "epoch": 0.5592167497004478, "grad_norm": 83.17782109703153, "learning_rate": 8.288159058389336e-06, "loss": 21.1493, "step": 35470 }, { "epoch": 0.5593744087784575, "grad_norm": 79.21911757005724, "learning_rate": 8.28323002946282e-06, "loss": 20.8692, "step": 35480 }, { "epoch": 0.5595320678564671, "grad_norm": 84.14055784220172, "learning_rate": 8.27830143026008e-06, "loss": 21.384, "step": 35490 }, { "epoch": 0.5596897269344769, "grad_norm": 80.35115351797319, "learning_rate": 8.273373262014784e-06, "loss": 21.2199, "step": 35500 }, { "epoch": 0.5598473860124866, "grad_norm": 84.13547084528912, "learning_rate": 8.268445525960508e-06, "loss": 21.2002, "step": 35510 }, { "epoch": 0.5600050450904963, "grad_norm": 80.21772215866427, "learning_rate": 8.263518223330698e-06, "loss": 20.6136, "step": 35520 }, { "epoch": 0.560162704168506, "grad_norm": 78.21320902578907, "learning_rate": 8.258591355358715e-06, "loss": 21.0633, "step": 35530 }, { "epoch": 0.5603203632465157, "grad_norm": 93.01643417030627, "learning_rate": 8.253664923277797e-06, "loss": 22.0266, "step": 35540 }, { "epoch": 0.5604780223245255, "grad_norm": 87.05536288034679, "learning_rate": 8.24873892832108e-06, "loss": 20.9159, "step": 35550 }, { "epoch": 0.5606356814025352, "grad_norm": 82.40106446989061, "learning_rate": 8.24381337172158e-06, "loss": 20.5181, "step": 35560 }, { "epoch": 0.5607933404805449, "grad_norm": 85.52668321272733, "learning_rate": 8.23888825471222e-06, "loss": 21.526, "step": 35570 }, { "epoch": 0.5609509995585545, "grad_norm": 83.49299776852524, "learning_rate": 8.233963578525795e-06, "loss": 20.8536, "step": 35580 }, { "epoch": 0.5611086586365643, "grad_norm": 86.18656992285426, "learning_rate": 8.229039344395003e-06, "loss": 21.4478, "step": 35590 }, { "epoch": 0.561266317714574, "grad_norm": 82.36020578275304, "learning_rate": 8.224115553552425e-06, "loss": 21.4803, "step": 35600 }, { "epoch": 0.5614239767925837, "grad_norm": 88.05798387366387, "learning_rate": 8.21919220723053e-06, "loss": 21.5715, "step": 35610 }, { "epoch": 0.5615816358705934, "grad_norm": 79.14209863553862, "learning_rate": 8.214269306661682e-06, "loss": 21.0375, "step": 35620 }, { "epoch": 0.5617392949486031, "grad_norm": 80.11416358683283, "learning_rate": 8.209346853078127e-06, "loss": 20.7887, "step": 35630 }, { "epoch": 0.5618969540266129, "grad_norm": 88.66621073578852, "learning_rate": 8.204424847712003e-06, "loss": 20.4653, "step": 35640 }, { "epoch": 0.5620546131046226, "grad_norm": 90.0434469554845, "learning_rate": 8.199503291795334e-06, "loss": 20.9637, "step": 35650 }, { "epoch": 0.5622122721826323, "grad_norm": 80.03241640282064, "learning_rate": 8.194582186560033e-06, "loss": 21.2685, "step": 35660 }, { "epoch": 0.5623699312606419, "grad_norm": 82.67097896057177, "learning_rate": 8.189661533237898e-06, "loss": 21.5815, "step": 35670 }, { "epoch": 0.5625275903386517, "grad_norm": 85.00051941281103, "learning_rate": 8.184741333060616e-06, "loss": 20.9067, "step": 35680 }, { "epoch": 0.5626852494166614, "grad_norm": 83.6858756289558, "learning_rate": 8.179821587259757e-06, "loss": 20.9974, "step": 35690 }, { "epoch": 0.5628429084946711, "grad_norm": 87.80712198996677, "learning_rate": 8.174902297066786e-06, "loss": 21.4609, "step": 35700 }, { "epoch": 0.5630005675726808, "grad_norm": 79.87853514740495, "learning_rate": 8.16998346371304e-06, "loss": 20.9827, "step": 35710 }, { "epoch": 0.5631582266506906, "grad_norm": 84.98548039940309, "learning_rate": 8.165065088429759e-06, "loss": 21.2985, "step": 35720 }, { "epoch": 0.5633158857287003, "grad_norm": 86.93613024401081, "learning_rate": 8.160147172448051e-06, "loss": 21.4262, "step": 35730 }, { "epoch": 0.56347354480671, "grad_norm": 84.10816654930407, "learning_rate": 8.155229716998916e-06, "loss": 21.146, "step": 35740 }, { "epoch": 0.5636312038847197, "grad_norm": 82.02323558667352, "learning_rate": 8.150312723313248e-06, "loss": 20.9617, "step": 35750 }, { "epoch": 0.5637888629627293, "grad_norm": 85.90837528890913, "learning_rate": 8.145396192621808e-06, "loss": 20.7072, "step": 35760 }, { "epoch": 0.5639465220407391, "grad_norm": 79.36651932163595, "learning_rate": 8.140480126155259e-06, "loss": 21.2412, "step": 35770 }, { "epoch": 0.5641041811187488, "grad_norm": 84.16078861728417, "learning_rate": 8.13556452514413e-06, "loss": 20.6231, "step": 35780 }, { "epoch": 0.5642618401967585, "grad_norm": 86.80821343210495, "learning_rate": 8.130649390818849e-06, "loss": 20.845, "step": 35790 }, { "epoch": 0.5644194992747682, "grad_norm": 78.84113985183681, "learning_rate": 8.125734724409716e-06, "loss": 21.0347, "step": 35800 }, { "epoch": 0.564577158352778, "grad_norm": 83.87734023957148, "learning_rate": 8.120820527146924e-06, "loss": 20.8875, "step": 35810 }, { "epoch": 0.5647348174307877, "grad_norm": 82.82262586667834, "learning_rate": 8.115906800260536e-06, "loss": 21.4718, "step": 35820 }, { "epoch": 0.5648924765087974, "grad_norm": 83.74215887790098, "learning_rate": 8.110993544980512e-06, "loss": 20.7425, "step": 35830 }, { "epoch": 0.5650501355868071, "grad_norm": 81.89003902835117, "learning_rate": 8.106080762536678e-06, "loss": 21.8384, "step": 35840 }, { "epoch": 0.5652077946648169, "grad_norm": 83.17793540701223, "learning_rate": 8.101168454158762e-06, "loss": 21.939, "step": 35850 }, { "epoch": 0.5653654537428265, "grad_norm": 80.04281443645456, "learning_rate": 8.096256621076352e-06, "loss": 20.9621, "step": 35860 }, { "epoch": 0.5655231128208362, "grad_norm": 83.56093476410021, "learning_rate": 8.091345264518924e-06, "loss": 20.9573, "step": 35870 }, { "epoch": 0.5656807718988459, "grad_norm": 91.49661231721822, "learning_rate": 8.086434385715844e-06, "loss": 21.4606, "step": 35880 }, { "epoch": 0.5658384309768556, "grad_norm": 83.5207876042562, "learning_rate": 8.081523985896345e-06, "loss": 20.9313, "step": 35890 }, { "epoch": 0.5659960900548654, "grad_norm": 83.13375089871025, "learning_rate": 8.076614066289556e-06, "loss": 21.1499, "step": 35900 }, { "epoch": 0.5661537491328751, "grad_norm": 87.63032337588541, "learning_rate": 8.071704628124465e-06, "loss": 22.0228, "step": 35910 }, { "epoch": 0.5663114082108848, "grad_norm": 84.02749619292159, "learning_rate": 8.066795672629961e-06, "loss": 20.6545, "step": 35920 }, { "epoch": 0.5664690672888945, "grad_norm": 85.08075851622418, "learning_rate": 8.061887201034792e-06, "loss": 20.4019, "step": 35930 }, { "epoch": 0.5666267263669043, "grad_norm": 82.09863421064482, "learning_rate": 8.056979214567605e-06, "loss": 20.9472, "step": 35940 }, { "epoch": 0.5667843854449139, "grad_norm": 86.0683970375064, "learning_rate": 8.052071714456907e-06, "loss": 21.1342, "step": 35950 }, { "epoch": 0.5669420445229236, "grad_norm": 86.12765383916042, "learning_rate": 8.0471647019311e-06, "loss": 20.5092, "step": 35960 }, { "epoch": 0.5670997036009333, "grad_norm": 80.11923528887296, "learning_rate": 8.042258178218447e-06, "loss": 20.1604, "step": 35970 }, { "epoch": 0.567257362678943, "grad_norm": 83.54322817566015, "learning_rate": 8.037352144547103e-06, "loss": 20.666, "step": 35980 }, { "epoch": 0.5674150217569528, "grad_norm": 88.5273322579464, "learning_rate": 8.032446602145096e-06, "loss": 21.3846, "step": 35990 }, { "epoch": 0.5675726808349625, "grad_norm": 78.23398923647416, "learning_rate": 8.027541552240322e-06, "loss": 20.3906, "step": 36000 }, { "epoch": 0.5677303399129722, "grad_norm": 83.2392799466062, "learning_rate": 8.022636996060571e-06, "loss": 20.9087, "step": 36010 }, { "epoch": 0.5678879989909819, "grad_norm": 87.45590524652195, "learning_rate": 8.017732934833491e-06, "loss": 21.1458, "step": 36020 }, { "epoch": 0.5680456580689917, "grad_norm": 84.08355557106174, "learning_rate": 8.01282936978662e-06, "loss": 21.1731, "step": 36030 }, { "epoch": 0.5682033171470013, "grad_norm": 79.71970956034555, "learning_rate": 8.007926302147363e-06, "loss": 20.5294, "step": 36040 }, { "epoch": 0.568360976225011, "grad_norm": 83.03260036605737, "learning_rate": 8.003023733143014e-06, "loss": 20.9549, "step": 36050 }, { "epoch": 0.5685186353030207, "grad_norm": 83.70851523758432, "learning_rate": 7.99812166400072e-06, "loss": 20.8474, "step": 36060 }, { "epoch": 0.5686762943810305, "grad_norm": 80.45292010785826, "learning_rate": 7.993220095947525e-06, "loss": 20.533, "step": 36070 }, { "epoch": 0.5688339534590402, "grad_norm": 79.9003381863366, "learning_rate": 7.98831903021033e-06, "loss": 20.4875, "step": 36080 }, { "epoch": 0.5689916125370499, "grad_norm": 96.47615112404058, "learning_rate": 7.983418468015922e-06, "loss": 21.0215, "step": 36090 }, { "epoch": 0.5691492716150596, "grad_norm": 81.85743205420319, "learning_rate": 7.978518410590957e-06, "loss": 20.7977, "step": 36100 }, { "epoch": 0.5693069306930693, "grad_norm": 79.42706551974025, "learning_rate": 7.97361885916197e-06, "loss": 21.4388, "step": 36110 }, { "epoch": 0.569464589771079, "grad_norm": 83.40345366018587, "learning_rate": 7.968719814955358e-06, "loss": 21.1725, "step": 36120 }, { "epoch": 0.5696222488490887, "grad_norm": 79.48994301521864, "learning_rate": 7.963821279197403e-06, "loss": 21.2501, "step": 36130 }, { "epoch": 0.5697799079270984, "grad_norm": 81.00399302334108, "learning_rate": 7.958923253114257e-06, "loss": 21.1876, "step": 36140 }, { "epoch": 0.5699375670051081, "grad_norm": 82.49416069244268, "learning_rate": 7.954025737931935e-06, "loss": 21.089, "step": 36150 }, { "epoch": 0.5700952260831179, "grad_norm": 83.53802612018475, "learning_rate": 7.949128734876339e-06, "loss": 20.6425, "step": 36160 }, { "epoch": 0.5702528851611276, "grad_norm": 85.64554062558229, "learning_rate": 7.944232245173226e-06, "loss": 21.4045, "step": 36170 }, { "epoch": 0.5704105442391373, "grad_norm": 89.79592345131543, "learning_rate": 7.939336270048243e-06, "loss": 20.6486, "step": 36180 }, { "epoch": 0.570568203317147, "grad_norm": 84.2573168689343, "learning_rate": 7.934440810726893e-06, "loss": 21.2189, "step": 36190 }, { "epoch": 0.5707258623951567, "grad_norm": 81.95933915255115, "learning_rate": 7.929545868434563e-06, "loss": 21.6722, "step": 36200 }, { "epoch": 0.5708835214731665, "grad_norm": 80.53308299118324, "learning_rate": 7.924651444396493e-06, "loss": 21.0338, "step": 36210 }, { "epoch": 0.5710411805511761, "grad_norm": 82.90907681986242, "learning_rate": 7.919757539837813e-06, "loss": 20.46, "step": 36220 }, { "epoch": 0.5711988396291858, "grad_norm": 85.36094823292204, "learning_rate": 7.914864155983507e-06, "loss": 20.601, "step": 36230 }, { "epoch": 0.5713564987071955, "grad_norm": 80.26357708223364, "learning_rate": 7.909971294058438e-06, "loss": 20.6774, "step": 36240 }, { "epoch": 0.5715141577852053, "grad_norm": 88.79684036147768, "learning_rate": 7.905078955287336e-06, "loss": 21.2492, "step": 36250 }, { "epoch": 0.571671816863215, "grad_norm": 84.88748381109001, "learning_rate": 7.9001871408948e-06, "loss": 20.6702, "step": 36260 }, { "epoch": 0.5718294759412247, "grad_norm": 83.68715564609514, "learning_rate": 7.895295852105297e-06, "loss": 20.1925, "step": 36270 }, { "epoch": 0.5719871350192344, "grad_norm": 78.84622946267304, "learning_rate": 7.890405090143162e-06, "loss": 21.7302, "step": 36280 }, { "epoch": 0.5721447940972442, "grad_norm": 81.06471973284968, "learning_rate": 7.885514856232602e-06, "loss": 21.0973, "step": 36290 }, { "epoch": 0.5723024531752539, "grad_norm": 88.97043470115851, "learning_rate": 7.880625151597682e-06, "loss": 20.6385, "step": 36300 }, { "epoch": 0.5724601122532635, "grad_norm": 81.74412235253543, "learning_rate": 7.87573597746235e-06, "loss": 21.2159, "step": 36310 }, { "epoch": 0.5726177713312732, "grad_norm": 83.43550041726473, "learning_rate": 7.870847335050404e-06, "loss": 20.9259, "step": 36320 }, { "epoch": 0.5727754304092829, "grad_norm": 79.68038253040396, "learning_rate": 7.865959225585526e-06, "loss": 20.59, "step": 36330 }, { "epoch": 0.5729330894872927, "grad_norm": 80.19010556280185, "learning_rate": 7.861071650291248e-06, "loss": 20.4027, "step": 36340 }, { "epoch": 0.5730907485653024, "grad_norm": 81.67478076797943, "learning_rate": 7.856184610390985e-06, "loss": 20.8145, "step": 36350 }, { "epoch": 0.5732484076433121, "grad_norm": 79.97762447729198, "learning_rate": 7.851298107108e-06, "loss": 20.303, "step": 36360 }, { "epoch": 0.5734060667213218, "grad_norm": 76.81802677988402, "learning_rate": 7.846412141665442e-06, "loss": 20.5223, "step": 36370 }, { "epoch": 0.5735637257993316, "grad_norm": 84.49953671368226, "learning_rate": 7.841526715286302e-06, "loss": 21.248, "step": 36380 }, { "epoch": 0.5737213848773413, "grad_norm": 83.41967940634238, "learning_rate": 7.836641829193457e-06, "loss": 20.5854, "step": 36390 }, { "epoch": 0.5738790439553509, "grad_norm": 88.83240520623717, "learning_rate": 7.831757484609639e-06, "loss": 21.8891, "step": 36400 }, { "epoch": 0.5740367030333606, "grad_norm": 83.24441921162241, "learning_rate": 7.82687368275744e-06, "loss": 21.0495, "step": 36410 }, { "epoch": 0.5741943621113704, "grad_norm": 82.6802898171807, "learning_rate": 7.82199042485933e-06, "loss": 20.266, "step": 36420 }, { "epoch": 0.5743520211893801, "grad_norm": 82.64669725264862, "learning_rate": 7.81710771213763e-06, "loss": 20.3083, "step": 36430 }, { "epoch": 0.5745096802673898, "grad_norm": 87.09209130087727, "learning_rate": 7.81222554581453e-06, "loss": 20.5991, "step": 36440 }, { "epoch": 0.5746673393453995, "grad_norm": 83.26934078269818, "learning_rate": 7.80734392711208e-06, "loss": 20.858, "step": 36450 }, { "epoch": 0.5748249984234092, "grad_norm": 80.61346516626057, "learning_rate": 7.802462857252203e-06, "loss": 20.9698, "step": 36460 }, { "epoch": 0.574982657501419, "grad_norm": 82.05106082765076, "learning_rate": 7.797582337456668e-06, "loss": 20.8534, "step": 36470 }, { "epoch": 0.5751403165794287, "grad_norm": 92.03556525107419, "learning_rate": 7.792702368947121e-06, "loss": 21.3012, "step": 36480 }, { "epoch": 0.5752979756574383, "grad_norm": 95.46489608015735, "learning_rate": 7.78782295294506e-06, "loss": 21.7786, "step": 36490 }, { "epoch": 0.575455634735448, "grad_norm": 82.27081835321309, "learning_rate": 7.782944090671855e-06, "loss": 20.3598, "step": 36500 }, { "epoch": 0.5756132938134578, "grad_norm": 82.97254241361388, "learning_rate": 7.778065783348725e-06, "loss": 20.9518, "step": 36510 }, { "epoch": 0.5757709528914675, "grad_norm": 83.5113522726242, "learning_rate": 7.773188032196763e-06, "loss": 20.5351, "step": 36520 }, { "epoch": 0.5759286119694772, "grad_norm": 76.9662713908963, "learning_rate": 7.768310838436912e-06, "loss": 20.7727, "step": 36530 }, { "epoch": 0.5760862710474869, "grad_norm": 83.91855014675444, "learning_rate": 7.763434203289976e-06, "loss": 20.9518, "step": 36540 }, { "epoch": 0.5762439301254966, "grad_norm": 83.82117184308169, "learning_rate": 7.758558127976633e-06, "loss": 21.2919, "step": 36550 }, { "epoch": 0.5764015892035064, "grad_norm": 85.36982448955021, "learning_rate": 7.7536826137174e-06, "loss": 21.1901, "step": 36560 }, { "epoch": 0.5765592482815161, "grad_norm": 87.90808284099093, "learning_rate": 7.748807661732676e-06, "loss": 20.64, "step": 36570 }, { "epoch": 0.5767169073595257, "grad_norm": 88.81905035621872, "learning_rate": 7.743933273242694e-06, "loss": 20.6052, "step": 36580 }, { "epoch": 0.5768745664375354, "grad_norm": 81.95593790880898, "learning_rate": 7.73905944946757e-06, "loss": 21.1725, "step": 36590 }, { "epoch": 0.5770322255155452, "grad_norm": 77.4227661780444, "learning_rate": 7.734186191627263e-06, "loss": 20.0937, "step": 36600 }, { "epoch": 0.5771898845935549, "grad_norm": 80.09093984829084, "learning_rate": 7.729313500941603e-06, "loss": 20.9455, "step": 36610 }, { "epoch": 0.5773475436715646, "grad_norm": 90.83420678361014, "learning_rate": 7.72444137863026e-06, "loss": 21.4668, "step": 36620 }, { "epoch": 0.5775052027495743, "grad_norm": 81.47937841746536, "learning_rate": 7.719569825912779e-06, "loss": 20.8408, "step": 36630 }, { "epoch": 0.5776628618275841, "grad_norm": 82.89304898914214, "learning_rate": 7.714698844008552e-06, "loss": 20.8751, "step": 36640 }, { "epoch": 0.5778205209055938, "grad_norm": 79.85609837135782, "learning_rate": 7.709828434136839e-06, "loss": 21.0384, "step": 36650 }, { "epoch": 0.5779781799836035, "grad_norm": 84.40878230732943, "learning_rate": 7.704958597516745e-06, "loss": 21.144, "step": 36660 }, { "epoch": 0.5781358390616131, "grad_norm": 85.10002685379997, "learning_rate": 7.700089335367231e-06, "loss": 20.9174, "step": 36670 }, { "epoch": 0.5782934981396228, "grad_norm": 89.43014406616835, "learning_rate": 7.695220648907127e-06, "loss": 20.9313, "step": 36680 }, { "epoch": 0.5784511572176326, "grad_norm": 80.828318433258, "learning_rate": 7.690352539355106e-06, "loss": 21.2376, "step": 36690 }, { "epoch": 0.5786088162956423, "grad_norm": 84.71752136325026, "learning_rate": 7.68548500792971e-06, "loss": 20.5986, "step": 36700 }, { "epoch": 0.578766475373652, "grad_norm": 84.84930450965865, "learning_rate": 7.680618055849315e-06, "loss": 20.4366, "step": 36710 }, { "epoch": 0.5789241344516617, "grad_norm": 81.63783449666685, "learning_rate": 7.675751684332182e-06, "loss": 20.991, "step": 36720 }, { "epoch": 0.5790817935296715, "grad_norm": 82.3547256416268, "learning_rate": 7.670885894596394e-06, "loss": 21.0218, "step": 36730 }, { "epoch": 0.5792394526076812, "grad_norm": 87.20429272956693, "learning_rate": 7.666020687859912e-06, "loss": 20.7684, "step": 36740 }, { "epoch": 0.5793971116856909, "grad_norm": 79.28891411860971, "learning_rate": 7.661156065340542e-06, "loss": 20.4841, "step": 36750 }, { "epoch": 0.5795547707637005, "grad_norm": 91.22867767962015, "learning_rate": 7.65629202825595e-06, "loss": 20.9042, "step": 36760 }, { "epoch": 0.5797124298417102, "grad_norm": 82.33048096907399, "learning_rate": 7.651428577823641e-06, "loss": 21.2805, "step": 36770 }, { "epoch": 0.57987008891972, "grad_norm": 84.82603320253598, "learning_rate": 7.64656571526099e-06, "loss": 20.5579, "step": 36780 }, { "epoch": 0.5800277479977297, "grad_norm": 82.18860003854462, "learning_rate": 7.641703441785216e-06, "loss": 20.2242, "step": 36790 }, { "epoch": 0.5801854070757394, "grad_norm": 85.05189975965489, "learning_rate": 7.636841758613387e-06, "loss": 21.1358, "step": 36800 }, { "epoch": 0.5803430661537491, "grad_norm": 80.13243783543588, "learning_rate": 7.631980666962438e-06, "loss": 21.2433, "step": 36810 }, { "epoch": 0.5805007252317589, "grad_norm": 82.13622368172024, "learning_rate": 7.627120168049134e-06, "loss": 21.1579, "step": 36820 }, { "epoch": 0.5806583843097686, "grad_norm": 83.0980520284913, "learning_rate": 7.622260263090115e-06, "loss": 20.0824, "step": 36830 }, { "epoch": 0.5808160433877783, "grad_norm": 79.62872968237319, "learning_rate": 7.617400953301854e-06, "loss": 20.5191, "step": 36840 }, { "epoch": 0.580973702465788, "grad_norm": 84.07300206105612, "learning_rate": 7.61254223990069e-06, "loss": 21.603, "step": 36850 }, { "epoch": 0.5811313615437977, "grad_norm": 84.69243297145442, "learning_rate": 7.607684124102795e-06, "loss": 21.2192, "step": 36860 }, { "epoch": 0.5812890206218074, "grad_norm": 82.29167482848125, "learning_rate": 7.60282660712421e-06, "loss": 19.7382, "step": 36870 }, { "epoch": 0.5814466796998171, "grad_norm": 85.04395538759367, "learning_rate": 7.597969690180811e-06, "loss": 21.4513, "step": 36880 }, { "epoch": 0.5816043387778268, "grad_norm": 82.70725189285102, "learning_rate": 7.593113374488334e-06, "loss": 20.2894, "step": 36890 }, { "epoch": 0.5817619978558365, "grad_norm": 85.14129421510569, "learning_rate": 7.588257661262357e-06, "loss": 19.996, "step": 36900 }, { "epoch": 0.5819196569338463, "grad_norm": 85.0770650778721, "learning_rate": 7.583402551718319e-06, "loss": 20.1704, "step": 36910 }, { "epoch": 0.582077316011856, "grad_norm": 79.97287545006347, "learning_rate": 7.578548047071494e-06, "loss": 20.36, "step": 36920 }, { "epoch": 0.5822349750898657, "grad_norm": 85.55336512137134, "learning_rate": 7.57369414853701e-06, "loss": 20.7278, "step": 36930 }, { "epoch": 0.5823926341678753, "grad_norm": 82.24070499635013, "learning_rate": 7.5688408573298445e-06, "loss": 20.0501, "step": 36940 }, { "epoch": 0.5825502932458851, "grad_norm": 110.21068295497014, "learning_rate": 7.563988174664821e-06, "loss": 20.6764, "step": 36950 }, { "epoch": 0.5827079523238948, "grad_norm": 79.51742388631874, "learning_rate": 7.559136101756618e-06, "loss": 21.2092, "step": 36960 }, { "epoch": 0.5828656114019045, "grad_norm": 83.28475794718494, "learning_rate": 7.554284639819746e-06, "loss": 20.6959, "step": 36970 }, { "epoch": 0.5830232704799142, "grad_norm": 80.74390781426905, "learning_rate": 7.549433790068578e-06, "loss": 21.0376, "step": 36980 }, { "epoch": 0.583180929557924, "grad_norm": 82.71530092953758, "learning_rate": 7.5445835537173265e-06, "loss": 21.0503, "step": 36990 }, { "epoch": 0.5833385886359337, "grad_norm": 82.95187078348901, "learning_rate": 7.539733931980055e-06, "loss": 20.6459, "step": 37000 }, { "epoch": 0.5834962477139434, "grad_norm": 80.8265934464125, "learning_rate": 7.5348849260706606e-06, "loss": 20.7254, "step": 37010 }, { "epoch": 0.5836539067919531, "grad_norm": 86.59097762331001, "learning_rate": 7.530036537202905e-06, "loss": 20.8124, "step": 37020 }, { "epoch": 0.5838115658699627, "grad_norm": 87.13735444628324, "learning_rate": 7.525188766590379e-06, "loss": 21.0368, "step": 37030 }, { "epoch": 0.5839692249479725, "grad_norm": 84.08316231037551, "learning_rate": 7.520341615446529e-06, "loss": 20.9104, "step": 37040 }, { "epoch": 0.5841268840259822, "grad_norm": 83.81744794254561, "learning_rate": 7.515495084984644e-06, "loss": 20.7265, "step": 37050 }, { "epoch": 0.5842845431039919, "grad_norm": 82.81480685750053, "learning_rate": 7.51064917641785e-06, "loss": 20.8789, "step": 37060 }, { "epoch": 0.5844422021820016, "grad_norm": 79.77270997782436, "learning_rate": 7.505803890959133e-06, "loss": 20.5081, "step": 37070 }, { "epoch": 0.5845998612600114, "grad_norm": 89.7212719896023, "learning_rate": 7.500959229821305e-06, "loss": 21.4913, "step": 37080 }, { "epoch": 0.5847575203380211, "grad_norm": 83.36836247845142, "learning_rate": 7.496115194217035e-06, "loss": 20.5383, "step": 37090 }, { "epoch": 0.5849151794160308, "grad_norm": 76.35260979554245, "learning_rate": 7.4912717853588305e-06, "loss": 20.0157, "step": 37100 }, { "epoch": 0.5850728384940405, "grad_norm": 80.4312718310057, "learning_rate": 7.486429004459046e-06, "loss": 21.0366, "step": 37110 }, { "epoch": 0.5852304975720501, "grad_norm": 80.92649666059717, "learning_rate": 7.481586852729868e-06, "loss": 20.9609, "step": 37120 }, { "epoch": 0.5853881566500599, "grad_norm": 80.17604566580131, "learning_rate": 7.476745331383337e-06, "loss": 20.1327, "step": 37130 }, { "epoch": 0.5855458157280696, "grad_norm": 82.6124017584851, "learning_rate": 7.47190444163133e-06, "loss": 20.1433, "step": 37140 }, { "epoch": 0.5857034748060793, "grad_norm": 86.2921073930135, "learning_rate": 7.4670641846855755e-06, "loss": 20.676, "step": 37150 }, { "epoch": 0.585861133884089, "grad_norm": 75.52963821317364, "learning_rate": 7.462224561757623e-06, "loss": 20.6245, "step": 37160 }, { "epoch": 0.5860187929620988, "grad_norm": 82.06197469839691, "learning_rate": 7.457385574058886e-06, "loss": 20.1721, "step": 37170 }, { "epoch": 0.5861764520401085, "grad_norm": 82.52701682535465, "learning_rate": 7.4525472228006035e-06, "loss": 20.5445, "step": 37180 }, { "epoch": 0.5863341111181182, "grad_norm": 79.81398188914432, "learning_rate": 7.44770950919386e-06, "loss": 20.796, "step": 37190 }, { "epoch": 0.5864917701961279, "grad_norm": 80.98451415862498, "learning_rate": 7.4428724344495865e-06, "loss": 20.4631, "step": 37200 }, { "epoch": 0.5866494292741377, "grad_norm": 85.40071872535577, "learning_rate": 7.4380359997785415e-06, "loss": 19.9568, "step": 37210 }, { "epoch": 0.5868070883521473, "grad_norm": 78.85616185468764, "learning_rate": 7.433200206391339e-06, "loss": 20.0941, "step": 37220 }, { "epoch": 0.586964747430157, "grad_norm": 84.1006597295273, "learning_rate": 7.428365055498414e-06, "loss": 20.6528, "step": 37230 }, { "epoch": 0.5871224065081667, "grad_norm": 83.21908876787282, "learning_rate": 7.423530548310057e-06, "loss": 20.1826, "step": 37240 }, { "epoch": 0.5872800655861764, "grad_norm": 82.38596429906272, "learning_rate": 7.418696686036388e-06, "loss": 20.6182, "step": 37250 }, { "epoch": 0.5874377246641862, "grad_norm": 78.35021516221582, "learning_rate": 7.4138634698873735e-06, "loss": 20.7159, "step": 37260 }, { "epoch": 0.5875953837421959, "grad_norm": 76.96361977987588, "learning_rate": 7.409030901072806e-06, "loss": 19.8917, "step": 37270 }, { "epoch": 0.5877530428202056, "grad_norm": 76.4001356853278, "learning_rate": 7.4041989808023285e-06, "loss": 19.43, "step": 37280 }, { "epoch": 0.5879107018982153, "grad_norm": 74.10613162532455, "learning_rate": 7.399367710285413e-06, "loss": 20.937, "step": 37290 }, { "epoch": 0.5880683609762251, "grad_norm": 81.16740393685346, "learning_rate": 7.394537090731376e-06, "loss": 21.0573, "step": 37300 }, { "epoch": 0.5882260200542347, "grad_norm": 92.87272541507583, "learning_rate": 7.389707123349363e-06, "loss": 20.4491, "step": 37310 }, { "epoch": 0.5883836791322444, "grad_norm": 83.51570648814221, "learning_rate": 7.384877809348366e-06, "loss": 20.5078, "step": 37320 }, { "epoch": 0.5885413382102541, "grad_norm": 76.21684604939716, "learning_rate": 7.380049149937203e-06, "loss": 20.3601, "step": 37330 }, { "epoch": 0.5886989972882638, "grad_norm": 85.3985167923056, "learning_rate": 7.375221146324533e-06, "loss": 20.5841, "step": 37340 }, { "epoch": 0.5888566563662736, "grad_norm": 83.10091761332431, "learning_rate": 7.370393799718857e-06, "loss": 20.1015, "step": 37350 }, { "epoch": 0.5890143154442833, "grad_norm": 82.71258609027632, "learning_rate": 7.365567111328496e-06, "loss": 20.3908, "step": 37360 }, { "epoch": 0.589171974522293, "grad_norm": 82.68619056040932, "learning_rate": 7.360741082361627e-06, "loss": 20.8072, "step": 37370 }, { "epoch": 0.5893296336003027, "grad_norm": 81.90752188763378, "learning_rate": 7.3559157140262404e-06, "loss": 20.2405, "step": 37380 }, { "epoch": 0.5894872926783125, "grad_norm": 81.54594740253563, "learning_rate": 7.351091007530178e-06, "loss": 20.264, "step": 37390 }, { "epoch": 0.5896449517563221, "grad_norm": 79.9947027747893, "learning_rate": 7.346266964081105e-06, "loss": 20.6225, "step": 37400 }, { "epoch": 0.5898026108343318, "grad_norm": 82.33808891370437, "learning_rate": 7.341443584886532e-06, "loss": 20.1954, "step": 37410 }, { "epoch": 0.5899602699123415, "grad_norm": 79.77861336829194, "learning_rate": 7.336620871153789e-06, "loss": 19.8646, "step": 37420 }, { "epoch": 0.5901179289903513, "grad_norm": 88.94792871085747, "learning_rate": 7.33179882409005e-06, "loss": 20.0381, "step": 37430 }, { "epoch": 0.590275588068361, "grad_norm": 86.95924930466907, "learning_rate": 7.32697744490232e-06, "loss": 20.4779, "step": 37440 }, { "epoch": 0.5904332471463707, "grad_norm": 83.42909055313359, "learning_rate": 7.322156734797437e-06, "loss": 20.9152, "step": 37450 }, { "epoch": 0.5905909062243804, "grad_norm": 76.90255601169618, "learning_rate": 7.317336694982069e-06, "loss": 20.1306, "step": 37460 }, { "epoch": 0.5907485653023901, "grad_norm": 86.27738409671097, "learning_rate": 7.312517326662714e-06, "loss": 20.2865, "step": 37470 }, { "epoch": 0.5909062243803999, "grad_norm": 85.64598654494577, "learning_rate": 7.307698631045709e-06, "loss": 20.4328, "step": 37480 }, { "epoch": 0.5910638834584095, "grad_norm": 80.564603266864, "learning_rate": 7.302880609337216e-06, "loss": 20.2918, "step": 37490 }, { "epoch": 0.5912215425364192, "grad_norm": 79.88230350625246, "learning_rate": 7.298063262743239e-06, "loss": 20.2241, "step": 37500 }, { "epoch": 0.5913792016144289, "grad_norm": 85.17728288151541, "learning_rate": 7.293246592469596e-06, "loss": 20.4456, "step": 37510 }, { "epoch": 0.5915368606924387, "grad_norm": 83.10012983660647, "learning_rate": 7.288430599721954e-06, "loss": 19.7572, "step": 37520 }, { "epoch": 0.5916945197704484, "grad_norm": 81.07674677317563, "learning_rate": 7.283615285705792e-06, "loss": 20.4248, "step": 37530 }, { "epoch": 0.5918521788484581, "grad_norm": 80.09489316360276, "learning_rate": 7.278800651626436e-06, "loss": 20.1431, "step": 37540 }, { "epoch": 0.5920098379264678, "grad_norm": 86.6589227429986, "learning_rate": 7.273986698689032e-06, "loss": 21.325, "step": 37550 }, { "epoch": 0.5921674970044776, "grad_norm": 81.56521392537617, "learning_rate": 7.269173428098561e-06, "loss": 20.1263, "step": 37560 }, { "epoch": 0.5923251560824873, "grad_norm": 81.34296455110439, "learning_rate": 7.264360841059823e-06, "loss": 20.3853, "step": 37570 }, { "epoch": 0.592482815160497, "grad_norm": 77.54470008652734, "learning_rate": 7.259548938777462e-06, "loss": 20.3687, "step": 37580 }, { "epoch": 0.5926404742385066, "grad_norm": 78.54341500171732, "learning_rate": 7.2547377224559405e-06, "loss": 20.0157, "step": 37590 }, { "epoch": 0.5927981333165163, "grad_norm": 82.21697032282475, "learning_rate": 7.249927193299546e-06, "loss": 19.5409, "step": 37600 }, { "epoch": 0.5929557923945261, "grad_norm": 84.94140032877195, "learning_rate": 7.245117352512407e-06, "loss": 20.3521, "step": 37610 }, { "epoch": 0.5931134514725358, "grad_norm": 82.72297858273937, "learning_rate": 7.240308201298467e-06, "loss": 20.2051, "step": 37620 }, { "epoch": 0.5932711105505455, "grad_norm": 81.02179336856933, "learning_rate": 7.235499740861504e-06, "loss": 19.6267, "step": 37630 }, { "epoch": 0.5934287696285552, "grad_norm": 81.92973322929818, "learning_rate": 7.230691972405119e-06, "loss": 20.1856, "step": 37640 }, { "epoch": 0.593586428706565, "grad_norm": 80.29562794809188, "learning_rate": 7.225884897132749e-06, "loss": 19.7473, "step": 37650 }, { "epoch": 0.5937440877845747, "grad_norm": 86.02653292363885, "learning_rate": 7.221078516247642e-06, "loss": 20.8787, "step": 37660 }, { "epoch": 0.5939017468625843, "grad_norm": 76.01182575947406, "learning_rate": 7.216272830952886e-06, "loss": 20.5873, "step": 37670 }, { "epoch": 0.594059405940594, "grad_norm": 81.65893023516631, "learning_rate": 7.211467842451385e-06, "loss": 19.8093, "step": 37680 }, { "epoch": 0.5942170650186037, "grad_norm": 79.8459640355626, "learning_rate": 7.206663551945879e-06, "loss": 20.5374, "step": 37690 }, { "epoch": 0.5943747240966135, "grad_norm": 87.07859402064055, "learning_rate": 7.2018599606389195e-06, "loss": 21.3125, "step": 37700 }, { "epoch": 0.5945323831746232, "grad_norm": 84.14194266868016, "learning_rate": 7.1970570697329004e-06, "loss": 20.8796, "step": 37710 }, { "epoch": 0.5946900422526329, "grad_norm": 85.75670199771666, "learning_rate": 7.1922548804300255e-06, "loss": 20.2629, "step": 37720 }, { "epoch": 0.5948477013306426, "grad_norm": 80.31366509009656, "learning_rate": 7.187453393932327e-06, "loss": 19.8181, "step": 37730 }, { "epoch": 0.5950053604086524, "grad_norm": 81.6438123819646, "learning_rate": 7.1826526114416625e-06, "loss": 20.6207, "step": 37740 }, { "epoch": 0.5951630194866621, "grad_norm": 82.4236485606464, "learning_rate": 7.177852534159715e-06, "loss": 20.5698, "step": 37750 }, { "epoch": 0.5953206785646717, "grad_norm": 87.85830040408825, "learning_rate": 7.173053163287992e-06, "loss": 20.6852, "step": 37760 }, { "epoch": 0.5954783376426814, "grad_norm": 80.46110416757396, "learning_rate": 7.168254500027815e-06, "loss": 20.7489, "step": 37770 }, { "epoch": 0.5956359967206912, "grad_norm": 82.0307509352925, "learning_rate": 7.163456545580341e-06, "loss": 20.5132, "step": 37780 }, { "epoch": 0.5957936557987009, "grad_norm": 77.85224829144165, "learning_rate": 7.158659301146537e-06, "loss": 19.9387, "step": 37790 }, { "epoch": 0.5959513148767106, "grad_norm": 83.12934083866865, "learning_rate": 7.153862767927207e-06, "loss": 20.4063, "step": 37800 }, { "epoch": 0.5961089739547203, "grad_norm": 79.16891788368095, "learning_rate": 7.14906694712296e-06, "loss": 20.7084, "step": 37810 }, { "epoch": 0.59626663303273, "grad_norm": 79.19581513693261, "learning_rate": 7.144271839934245e-06, "loss": 20.2181, "step": 37820 }, { "epoch": 0.5964242921107398, "grad_norm": 81.46185841076296, "learning_rate": 7.139477447561312e-06, "loss": 20.3756, "step": 37830 }, { "epoch": 0.5965819511887495, "grad_norm": 80.16019609221627, "learning_rate": 7.1346837712042495e-06, "loss": 20.2227, "step": 37840 }, { "epoch": 0.5967396102667591, "grad_norm": 86.05155794078657, "learning_rate": 7.12989081206296e-06, "loss": 20.497, "step": 37850 }, { "epoch": 0.5968972693447688, "grad_norm": 82.25413941169639, "learning_rate": 7.125098571337161e-06, "loss": 20.2134, "step": 37860 }, { "epoch": 0.5970549284227786, "grad_norm": 84.67530690127558, "learning_rate": 7.1203070502264045e-06, "loss": 20.3198, "step": 37870 }, { "epoch": 0.5972125875007883, "grad_norm": 83.54787913514448, "learning_rate": 7.115516249930045e-06, "loss": 20.7478, "step": 37880 }, { "epoch": 0.597370246578798, "grad_norm": 90.7673506629121, "learning_rate": 7.110726171647271e-06, "loss": 21.0498, "step": 37890 }, { "epoch": 0.5975279056568077, "grad_norm": 83.90345320237518, "learning_rate": 7.105936816577082e-06, "loss": 20.3228, "step": 37900 }, { "epoch": 0.5976855647348174, "grad_norm": 77.84414560988162, "learning_rate": 7.101148185918301e-06, "loss": 19.9595, "step": 37910 }, { "epoch": 0.5978432238128272, "grad_norm": 75.50639200151704, "learning_rate": 7.096360280869561e-06, "loss": 19.9979, "step": 37920 }, { "epoch": 0.5980008828908369, "grad_norm": 84.13441341888897, "learning_rate": 7.091573102629329e-06, "loss": 20.2413, "step": 37930 }, { "epoch": 0.5981585419688465, "grad_norm": 79.66296907290776, "learning_rate": 7.086786652395874e-06, "loss": 20.3162, "step": 37940 }, { "epoch": 0.5983162010468562, "grad_norm": 88.15144452912887, "learning_rate": 7.082000931367297e-06, "loss": 19.9722, "step": 37950 }, { "epoch": 0.598473860124866, "grad_norm": 82.96905427310833, "learning_rate": 7.0772159407415e-06, "loss": 20.6235, "step": 37960 }, { "epoch": 0.5986315192028757, "grad_norm": 77.31933675105753, "learning_rate": 7.072431681716221e-06, "loss": 20.309, "step": 37970 }, { "epoch": 0.5987891782808854, "grad_norm": 83.49581497997102, "learning_rate": 7.067648155489e-06, "loss": 20.0461, "step": 37980 }, { "epoch": 0.5989468373588951, "grad_norm": 81.72914681225444, "learning_rate": 7.062865363257195e-06, "loss": 19.7626, "step": 37990 }, { "epoch": 0.5991044964369049, "grad_norm": 76.81025143436074, "learning_rate": 7.0580833062179945e-06, "loss": 20.0655, "step": 38000 }, { "epoch": 0.5992621555149146, "grad_norm": 79.96281993494867, "learning_rate": 7.053301985568382e-06, "loss": 20.1145, "step": 38010 }, { "epoch": 0.5994198145929243, "grad_norm": 89.06650917988257, "learning_rate": 7.048521402505177e-06, "loss": 20.8354, "step": 38020 }, { "epoch": 0.599577473670934, "grad_norm": 79.32558770117308, "learning_rate": 7.043741558224995e-06, "loss": 20.0514, "step": 38030 }, { "epoch": 0.5997351327489436, "grad_norm": 84.80464388721549, "learning_rate": 7.038962453924284e-06, "loss": 20.7152, "step": 38040 }, { "epoch": 0.5998927918269534, "grad_norm": 87.94607485481262, "learning_rate": 7.034184090799294e-06, "loss": 20.086, "step": 38050 }, { "epoch": 0.6000504509049631, "grad_norm": 85.1771094304157, "learning_rate": 7.0294064700461006e-06, "loss": 20.4594, "step": 38060 }, { "epoch": 0.6002081099829728, "grad_norm": 79.52123377958817, "learning_rate": 7.024629592860579e-06, "loss": 20.2308, "step": 38070 }, { "epoch": 0.6003657690609825, "grad_norm": 86.40598333135416, "learning_rate": 7.019853460438432e-06, "loss": 20.1351, "step": 38080 }, { "epoch": 0.6005234281389923, "grad_norm": 84.90553742030387, "learning_rate": 7.015078073975171e-06, "loss": 20.123, "step": 38090 }, { "epoch": 0.600681087217002, "grad_norm": 83.83998179297541, "learning_rate": 7.010303434666123e-06, "loss": 20.7033, "step": 38100 }, { "epoch": 0.6008387462950117, "grad_norm": 86.83073863493047, "learning_rate": 7.005529543706422e-06, "loss": 19.5862, "step": 38110 }, { "epoch": 0.6009964053730213, "grad_norm": 77.96488142468442, "learning_rate": 7.0007564022910126e-06, "loss": 20.6807, "step": 38120 }, { "epoch": 0.6011540644510311, "grad_norm": 101.23966846271179, "learning_rate": 6.995984011614665e-06, "loss": 20.6841, "step": 38130 }, { "epoch": 0.6013117235290408, "grad_norm": 81.45848306809843, "learning_rate": 6.99121237287195e-06, "loss": 20.0438, "step": 38140 }, { "epoch": 0.6014693826070505, "grad_norm": 89.46086281347652, "learning_rate": 6.986441487257259e-06, "loss": 20.625, "step": 38150 }, { "epoch": 0.6016270416850602, "grad_norm": 88.3653984314936, "learning_rate": 6.981671355964783e-06, "loss": 20.3548, "step": 38160 }, { "epoch": 0.6017847007630699, "grad_norm": 81.02219231731326, "learning_rate": 6.976901980188538e-06, "loss": 20.0184, "step": 38170 }, { "epoch": 0.6019423598410797, "grad_norm": 79.21803617084808, "learning_rate": 6.972133361122335e-06, "loss": 19.948, "step": 38180 }, { "epoch": 0.6021000189190894, "grad_norm": 84.54337945486319, "learning_rate": 6.967365499959813e-06, "loss": 20.308, "step": 38190 }, { "epoch": 0.6022576779970991, "grad_norm": 80.10032817986264, "learning_rate": 6.962598397894407e-06, "loss": 19.5487, "step": 38200 }, { "epoch": 0.6024153370751087, "grad_norm": 78.16717647030426, "learning_rate": 6.957832056119373e-06, "loss": 19.6303, "step": 38210 }, { "epoch": 0.6025729961531185, "grad_norm": 82.49301829689632, "learning_rate": 6.953066475827768e-06, "loss": 20.2042, "step": 38220 }, { "epoch": 0.6027306552311282, "grad_norm": 79.51239320155332, "learning_rate": 6.948301658212462e-06, "loss": 20.0294, "step": 38230 }, { "epoch": 0.6028883143091379, "grad_norm": 88.51143473477991, "learning_rate": 6.943537604466135e-06, "loss": 20.037, "step": 38240 }, { "epoch": 0.6030459733871476, "grad_norm": 79.89312198255537, "learning_rate": 6.938774315781272e-06, "loss": 20.364, "step": 38250 }, { "epoch": 0.6032036324651573, "grad_norm": 83.55652558047373, "learning_rate": 6.934011793350175e-06, "loss": 21.1509, "step": 38260 }, { "epoch": 0.6033612915431671, "grad_norm": 82.66861737317367, "learning_rate": 6.929250038364941e-06, "loss": 20.3536, "step": 38270 }, { "epoch": 0.6035189506211768, "grad_norm": 82.84042694027892, "learning_rate": 6.924489052017489e-06, "loss": 19.8839, "step": 38280 }, { "epoch": 0.6036766096991865, "grad_norm": 76.4359100850806, "learning_rate": 6.919728835499533e-06, "loss": 19.9731, "step": 38290 }, { "epoch": 0.6038342687771961, "grad_norm": 79.81130955304165, "learning_rate": 6.914969390002608e-06, "loss": 19.5489, "step": 38300 }, { "epoch": 0.603991927855206, "grad_norm": 82.4229849777083, "learning_rate": 6.91021071671804e-06, "loss": 19.7266, "step": 38310 }, { "epoch": 0.6041495869332156, "grad_norm": 98.43470024918867, "learning_rate": 6.905452816836976e-06, "loss": 19.8309, "step": 38320 }, { "epoch": 0.6043072460112253, "grad_norm": 76.39906232596658, "learning_rate": 6.900695691550358e-06, "loss": 20.3654, "step": 38330 }, { "epoch": 0.604464905089235, "grad_norm": 123.57583201208176, "learning_rate": 6.895939342048944e-06, "loss": 20.4015, "step": 38340 }, { "epoch": 0.6046225641672448, "grad_norm": 85.4927048817062, "learning_rate": 6.891183769523291e-06, "loss": 20.4027, "step": 38350 }, { "epoch": 0.6047802232452545, "grad_norm": 89.20807082687831, "learning_rate": 6.886428975163767e-06, "loss": 19.9168, "step": 38360 }, { "epoch": 0.6049378823232642, "grad_norm": 82.67859634133438, "learning_rate": 6.8816749601605385e-06, "loss": 19.9997, "step": 38370 }, { "epoch": 0.6050955414012739, "grad_norm": 82.81201688136537, "learning_rate": 6.87692172570358e-06, "loss": 20.0729, "step": 38380 }, { "epoch": 0.6052532004792835, "grad_norm": 80.7440788142432, "learning_rate": 6.872169272982675e-06, "loss": 19.8828, "step": 38390 }, { "epoch": 0.6054108595572933, "grad_norm": 78.28772575329666, "learning_rate": 6.867417603187402e-06, "loss": 20.2924, "step": 38400 }, { "epoch": 0.605568518635303, "grad_norm": 79.76585165694033, "learning_rate": 6.862666717507156e-06, "loss": 20.104, "step": 38410 }, { "epoch": 0.6057261777133127, "grad_norm": 83.5174110141317, "learning_rate": 6.8579166171311195e-06, "loss": 19.9704, "step": 38420 }, { "epoch": 0.6058838367913224, "grad_norm": 82.29710975167667, "learning_rate": 6.853167303248296e-06, "loss": 20.1765, "step": 38430 }, { "epoch": 0.6060414958693322, "grad_norm": 79.33411147933658, "learning_rate": 6.848418777047479e-06, "loss": 19.3382, "step": 38440 }, { "epoch": 0.6061991549473419, "grad_norm": 80.16607295495753, "learning_rate": 6.8436710397172725e-06, "loss": 20.368, "step": 38450 }, { "epoch": 0.6063568140253516, "grad_norm": 79.51132895106818, "learning_rate": 6.838924092446077e-06, "loss": 19.8716, "step": 38460 }, { "epoch": 0.6065144731033613, "grad_norm": 83.34360156172924, "learning_rate": 6.834177936422102e-06, "loss": 19.8759, "step": 38470 }, { "epoch": 0.606672132181371, "grad_norm": 79.60412907018181, "learning_rate": 6.82943257283335e-06, "loss": 19.7337, "step": 38480 }, { "epoch": 0.6068297912593807, "grad_norm": 79.39353114715098, "learning_rate": 6.824688002867636e-06, "loss": 19.6202, "step": 38490 }, { "epoch": 0.6069874503373904, "grad_norm": 84.32730880572096, "learning_rate": 6.819944227712565e-06, "loss": 19.7944, "step": 38500 }, { "epoch": 0.6071451094154001, "grad_norm": 80.9824472117405, "learning_rate": 6.815201248555556e-06, "loss": 19.9739, "step": 38510 }, { "epoch": 0.6073027684934098, "grad_norm": 85.39491368538177, "learning_rate": 6.81045906658382e-06, "loss": 20.0992, "step": 38520 }, { "epoch": 0.6074604275714196, "grad_norm": 92.84385029888993, "learning_rate": 6.805717682984363e-06, "loss": 20.2659, "step": 38530 }, { "epoch": 0.6076180866494293, "grad_norm": 79.18750619520856, "learning_rate": 6.800977098944004e-06, "loss": 20.1575, "step": 38540 }, { "epoch": 0.607775745727439, "grad_norm": 79.4023297426618, "learning_rate": 6.796237315649353e-06, "loss": 20.3511, "step": 38550 }, { "epoch": 0.6079334048054487, "grad_norm": 84.16819411544536, "learning_rate": 6.791498334286828e-06, "loss": 20.4371, "step": 38560 }, { "epoch": 0.6080910638834585, "grad_norm": 83.50392112306946, "learning_rate": 6.786760156042635e-06, "loss": 19.9514, "step": 38570 }, { "epoch": 0.6082487229614681, "grad_norm": 81.57613460746869, "learning_rate": 6.782022782102789e-06, "loss": 19.9012, "step": 38580 }, { "epoch": 0.6084063820394778, "grad_norm": 78.5535404448373, "learning_rate": 6.777286213653096e-06, "loss": 19.7917, "step": 38590 }, { "epoch": 0.6085640411174875, "grad_norm": 77.97292384152878, "learning_rate": 6.7725504518791695e-06, "loss": 19.8164, "step": 38600 }, { "epoch": 0.6087217001954972, "grad_norm": 81.80087381642481, "learning_rate": 6.767815497966408e-06, "loss": 20.2671, "step": 38610 }, { "epoch": 0.608879359273507, "grad_norm": 82.53083641325169, "learning_rate": 6.763081353100023e-06, "loss": 19.8427, "step": 38620 }, { "epoch": 0.6090370183515167, "grad_norm": 80.96681792454838, "learning_rate": 6.758348018465006e-06, "loss": 19.5705, "step": 38630 }, { "epoch": 0.6091946774295264, "grad_norm": 80.16949940126992, "learning_rate": 6.753615495246166e-06, "loss": 20.103, "step": 38640 }, { "epoch": 0.6093523365075361, "grad_norm": 82.71881860214759, "learning_rate": 6.748883784628094e-06, "loss": 19.7666, "step": 38650 }, { "epoch": 0.6095099955855459, "grad_norm": 80.2134224030042, "learning_rate": 6.744152887795179e-06, "loss": 20.1393, "step": 38660 }, { "epoch": 0.6096676546635555, "grad_norm": 86.94066508833797, "learning_rate": 6.739422805931615e-06, "loss": 20.3751, "step": 38670 }, { "epoch": 0.6098253137415652, "grad_norm": 82.87225248994363, "learning_rate": 6.734693540221379e-06, "loss": 20.8838, "step": 38680 }, { "epoch": 0.6099829728195749, "grad_norm": 83.57879056605361, "learning_rate": 6.729965091848257e-06, "loss": 19.824, "step": 38690 }, { "epoch": 0.6101406318975847, "grad_norm": 78.14292356267308, "learning_rate": 6.725237461995822e-06, "loss": 19.747, "step": 38700 }, { "epoch": 0.6102982909755944, "grad_norm": 81.01760974872815, "learning_rate": 6.720510651847447e-06, "loss": 19.1113, "step": 38710 }, { "epoch": 0.6104559500536041, "grad_norm": 89.13401436803208, "learning_rate": 6.715784662586291e-06, "loss": 20.4614, "step": 38720 }, { "epoch": 0.6106136091316138, "grad_norm": 80.95266565288216, "learning_rate": 6.711059495395322e-06, "loss": 20.1696, "step": 38730 }, { "epoch": 0.6107712682096235, "grad_norm": 79.44459140067323, "learning_rate": 6.7063351514572875e-06, "loss": 20.0646, "step": 38740 }, { "epoch": 0.6109289272876333, "grad_norm": 81.32649036303788, "learning_rate": 6.701611631954742e-06, "loss": 19.7894, "step": 38750 }, { "epoch": 0.611086586365643, "grad_norm": 77.76458513712457, "learning_rate": 6.696888938070019e-06, "loss": 20.1088, "step": 38760 }, { "epoch": 0.6112442454436526, "grad_norm": 80.76568570295271, "learning_rate": 6.692167070985263e-06, "loss": 19.4692, "step": 38770 }, { "epoch": 0.6114019045216623, "grad_norm": 80.02539662520296, "learning_rate": 6.687446031882394e-06, "loss": 20.1941, "step": 38780 }, { "epoch": 0.6115595635996721, "grad_norm": 83.89665810600883, "learning_rate": 6.682725821943134e-06, "loss": 19.4735, "step": 38790 }, { "epoch": 0.6117172226776818, "grad_norm": 78.39744448134498, "learning_rate": 6.678006442349001e-06, "loss": 19.5672, "step": 38800 }, { "epoch": 0.6118748817556915, "grad_norm": 91.95615764416438, "learning_rate": 6.6732878942812925e-06, "loss": 19.6095, "step": 38810 }, { "epoch": 0.6120325408337012, "grad_norm": 74.97898020329241, "learning_rate": 6.668570178921116e-06, "loss": 19.8785, "step": 38820 }, { "epoch": 0.6121901999117109, "grad_norm": 86.33524383176842, "learning_rate": 6.663853297449352e-06, "loss": 19.8672, "step": 38830 }, { "epoch": 0.6123478589897207, "grad_norm": 78.45006202727767, "learning_rate": 6.659137251046683e-06, "loss": 20.1813, "step": 38840 }, { "epoch": 0.6125055180677303, "grad_norm": 82.37084197017496, "learning_rate": 6.65442204089358e-06, "loss": 19.2678, "step": 38850 }, { "epoch": 0.61266317714574, "grad_norm": 79.01843670134085, "learning_rate": 6.649707668170307e-06, "loss": 19.9341, "step": 38860 }, { "epoch": 0.6128208362237497, "grad_norm": 79.06710092425178, "learning_rate": 6.644994134056912e-06, "loss": 19.0927, "step": 38870 }, { "epoch": 0.6129784953017595, "grad_norm": 92.66503876371866, "learning_rate": 6.640281439733241e-06, "loss": 19.8072, "step": 38880 }, { "epoch": 0.6131361543797692, "grad_norm": 77.8635501187071, "learning_rate": 6.635569586378925e-06, "loss": 19.555, "step": 38890 }, { "epoch": 0.6132938134577789, "grad_norm": 85.31512778561195, "learning_rate": 6.630858575173385e-06, "loss": 19.9021, "step": 38900 }, { "epoch": 0.6134514725357886, "grad_norm": 84.27832719181276, "learning_rate": 6.626148407295831e-06, "loss": 20.0053, "step": 38910 }, { "epoch": 0.6136091316137984, "grad_norm": 83.63219082032865, "learning_rate": 6.621439083925263e-06, "loss": 19.8013, "step": 38920 }, { "epoch": 0.6137667906918081, "grad_norm": 79.73951611273223, "learning_rate": 6.616730606240469e-06, "loss": 19.9879, "step": 38930 }, { "epoch": 0.6139244497698177, "grad_norm": 84.48397865052313, "learning_rate": 6.6120229754200246e-06, "loss": 19.5536, "step": 38940 }, { "epoch": 0.6140821088478274, "grad_norm": 84.13809509704708, "learning_rate": 6.607316192642299e-06, "loss": 19.5728, "step": 38950 }, { "epoch": 0.6142397679258371, "grad_norm": 82.09116775051459, "learning_rate": 6.602610259085438e-06, "loss": 20.0058, "step": 38960 }, { "epoch": 0.6143974270038469, "grad_norm": 87.66810713104509, "learning_rate": 6.597905175927388e-06, "loss": 19.9212, "step": 38970 }, { "epoch": 0.6145550860818566, "grad_norm": 83.46189559272975, "learning_rate": 6.593200944345869e-06, "loss": 20.0854, "step": 38980 }, { "epoch": 0.6147127451598663, "grad_norm": 316.7267353775781, "learning_rate": 6.5884975655183995e-06, "loss": 20.4303, "step": 38990 }, { "epoch": 0.614870404237876, "grad_norm": 76.1348753512774, "learning_rate": 6.583795040622276e-06, "loss": 19.4783, "step": 39000 }, { "epoch": 0.6150280633158858, "grad_norm": 78.04136978099635, "learning_rate": 6.579093370834593e-06, "loss": 19.7033, "step": 39010 }, { "epoch": 0.6151857223938955, "grad_norm": 78.7245146084527, "learning_rate": 6.574392557332211e-06, "loss": 20.1169, "step": 39020 }, { "epoch": 0.6153433814719051, "grad_norm": 77.2375892411675, "learning_rate": 6.569692601291799e-06, "loss": 19.4564, "step": 39030 }, { "epoch": 0.6155010405499148, "grad_norm": 84.91287070381317, "learning_rate": 6.564993503889794e-06, "loss": 20.7519, "step": 39040 }, { "epoch": 0.6156586996279245, "grad_norm": 78.17835044035213, "learning_rate": 6.560295266302426e-06, "loss": 19.9676, "step": 39050 }, { "epoch": 0.6158163587059343, "grad_norm": 76.68896742928261, "learning_rate": 6.5555978897057096e-06, "loss": 20.0102, "step": 39060 }, { "epoch": 0.615974017783944, "grad_norm": 77.96285248436979, "learning_rate": 6.550901375275439e-06, "loss": 19.6112, "step": 39070 }, { "epoch": 0.6161316768619537, "grad_norm": 89.46163210935657, "learning_rate": 6.546205724187201e-06, "loss": 19.4013, "step": 39080 }, { "epoch": 0.6162893359399634, "grad_norm": 75.59724811161986, "learning_rate": 6.541510937616358e-06, "loss": 19.8793, "step": 39090 }, { "epoch": 0.6164469950179732, "grad_norm": 84.47597063758461, "learning_rate": 6.536817016738064e-06, "loss": 19.613, "step": 39100 }, { "epoch": 0.6166046540959829, "grad_norm": 83.21743862274184, "learning_rate": 6.5321239627272455e-06, "loss": 19.7086, "step": 39110 }, { "epoch": 0.6167623131739925, "grad_norm": 78.07382427868872, "learning_rate": 6.527431776758626e-06, "loss": 19.9259, "step": 39120 }, { "epoch": 0.6169199722520022, "grad_norm": 85.17582911131805, "learning_rate": 6.522740460006693e-06, "loss": 20.2235, "step": 39130 }, { "epoch": 0.617077631330012, "grad_norm": 78.69372866868196, "learning_rate": 6.518050013645739e-06, "loss": 18.8898, "step": 39140 }, { "epoch": 0.6172352904080217, "grad_norm": 85.19166459963374, "learning_rate": 6.51336043884982e-06, "loss": 20.8107, "step": 39150 }, { "epoch": 0.6173929494860314, "grad_norm": 81.96208331520054, "learning_rate": 6.508671736792786e-06, "loss": 20.0554, "step": 39160 }, { "epoch": 0.6175506085640411, "grad_norm": 90.9530262404953, "learning_rate": 6.503983908648262e-06, "loss": 19.8457, "step": 39170 }, { "epoch": 0.6177082676420508, "grad_norm": 76.40195577412356, "learning_rate": 6.499296955589649e-06, "loss": 19.4459, "step": 39180 }, { "epoch": 0.6178659267200606, "grad_norm": 76.91726683271224, "learning_rate": 6.494610878790144e-06, "loss": 19.6716, "step": 39190 }, { "epoch": 0.6180235857980703, "grad_norm": 83.97903645969168, "learning_rate": 6.489925679422712e-06, "loss": 19.6097, "step": 39200 }, { "epoch": 0.61818124487608, "grad_norm": 81.57822087536564, "learning_rate": 6.4852413586601085e-06, "loss": 19.6496, "step": 39210 }, { "epoch": 0.6183389039540896, "grad_norm": 81.67166786676067, "learning_rate": 6.480557917674853e-06, "loss": 20.0797, "step": 39220 }, { "epoch": 0.6184965630320994, "grad_norm": 84.07015645326962, "learning_rate": 6.4758753576392655e-06, "loss": 19.1932, "step": 39230 }, { "epoch": 0.6186542221101091, "grad_norm": 78.37240809190598, "learning_rate": 6.471193679725427e-06, "loss": 19.946, "step": 39240 }, { "epoch": 0.6188118811881188, "grad_norm": 84.30594763122399, "learning_rate": 6.466512885105216e-06, "loss": 19.4588, "step": 39250 }, { "epoch": 0.6189695402661285, "grad_norm": 83.32145200316141, "learning_rate": 6.461832974950267e-06, "loss": 19.7074, "step": 39260 }, { "epoch": 0.6191271993441383, "grad_norm": 79.38598747915661, "learning_rate": 6.457153950432018e-06, "loss": 19.5521, "step": 39270 }, { "epoch": 0.619284858422148, "grad_norm": 81.50595427337396, "learning_rate": 6.4524758127216615e-06, "loss": 20.0434, "step": 39280 }, { "epoch": 0.6194425175001577, "grad_norm": 80.61620636517408, "learning_rate": 6.447798562990188e-06, "loss": 19.518, "step": 39290 }, { "epoch": 0.6196001765781673, "grad_norm": 83.33502591127792, "learning_rate": 6.443122202408358e-06, "loss": 19.7879, "step": 39300 }, { "epoch": 0.619757835656177, "grad_norm": 870.2977745526458, "learning_rate": 6.438446732146701e-06, "loss": 20.3584, "step": 39310 }, { "epoch": 0.6199154947341868, "grad_norm": 79.3507036431469, "learning_rate": 6.433772153375539e-06, "loss": 19.4461, "step": 39320 }, { "epoch": 0.6200731538121965, "grad_norm": 83.71333944764856, "learning_rate": 6.429098467264957e-06, "loss": 19.5108, "step": 39330 }, { "epoch": 0.6202308128902062, "grad_norm": 82.24762265689098, "learning_rate": 6.424425674984829e-06, "loss": 19.7156, "step": 39340 }, { "epoch": 0.6203884719682159, "grad_norm": 81.11178101545212, "learning_rate": 6.4197537777047935e-06, "loss": 18.8641, "step": 39350 }, { "epoch": 0.6205461310462257, "grad_norm": 78.71140603236505, "learning_rate": 6.415082776594277e-06, "loss": 19.8946, "step": 39360 }, { "epoch": 0.6207037901242354, "grad_norm": 85.00625196403419, "learning_rate": 6.410412672822468e-06, "loss": 20.2613, "step": 39370 }, { "epoch": 0.6208614492022451, "grad_norm": 75.55445319336322, "learning_rate": 6.405743467558344e-06, "loss": 19.3658, "step": 39380 }, { "epoch": 0.6210191082802548, "grad_norm": 83.88981813313323, "learning_rate": 6.401075161970647e-06, "loss": 20.0291, "step": 39390 }, { "epoch": 0.6211767673582644, "grad_norm": 79.64149767268654, "learning_rate": 6.396407757227903e-06, "loss": 21.252, "step": 39400 }, { "epoch": 0.6213344264362742, "grad_norm": 81.51685534837806, "learning_rate": 6.3917412544984e-06, "loss": 20.1235, "step": 39410 }, { "epoch": 0.6214920855142839, "grad_norm": 83.40002669940789, "learning_rate": 6.387075654950217e-06, "loss": 20.168, "step": 39420 }, { "epoch": 0.6216497445922936, "grad_norm": 78.92222467785818, "learning_rate": 6.382410959751191e-06, "loss": 19.4043, "step": 39430 }, { "epoch": 0.6218074036703033, "grad_norm": 77.44652432384608, "learning_rate": 6.377747170068939e-06, "loss": 19.7308, "step": 39440 }, { "epoch": 0.6219650627483131, "grad_norm": 83.11844531306686, "learning_rate": 6.373084287070858e-06, "loss": 19.6155, "step": 39450 }, { "epoch": 0.6221227218263228, "grad_norm": 76.82990137259047, "learning_rate": 6.368422311924103e-06, "loss": 20.2151, "step": 39460 }, { "epoch": 0.6222803809043325, "grad_norm": 79.87652660453759, "learning_rate": 6.363761245795619e-06, "loss": 19.4298, "step": 39470 }, { "epoch": 0.6224380399823422, "grad_norm": 81.3116893721423, "learning_rate": 6.359101089852109e-06, "loss": 19.7377, "step": 39480 }, { "epoch": 0.622595699060352, "grad_norm": 82.0585119614386, "learning_rate": 6.354441845260057e-06, "loss": 20.1882, "step": 39490 }, { "epoch": 0.6227533581383616, "grad_norm": 80.67544579320527, "learning_rate": 6.349783513185712e-06, "loss": 19.3715, "step": 39500 }, { "epoch": 0.6229110172163713, "grad_norm": 80.81931683751786, "learning_rate": 6.345126094795109e-06, "loss": 19.6163, "step": 39510 }, { "epoch": 0.623068676294381, "grad_norm": 82.14961219002241, "learning_rate": 6.34046959125403e-06, "loss": 19.9961, "step": 39520 }, { "epoch": 0.6232263353723907, "grad_norm": 94.9890576405288, "learning_rate": 6.3358140037280515e-06, "loss": 19.3266, "step": 39530 }, { "epoch": 0.6233839944504005, "grad_norm": 79.97666721701407, "learning_rate": 6.331159333382508e-06, "loss": 19.2838, "step": 39540 }, { "epoch": 0.6235416535284102, "grad_norm": 86.26752374390772, "learning_rate": 6.326505581382509e-06, "loss": 20.0095, "step": 39550 }, { "epoch": 0.6236993126064199, "grad_norm": 78.88159324408524, "learning_rate": 6.321852748892934e-06, "loss": 19.2543, "step": 39560 }, { "epoch": 0.6238569716844296, "grad_norm": 80.64649971830521, "learning_rate": 6.317200837078424e-06, "loss": 19.2551, "step": 39570 }, { "epoch": 0.6240146307624393, "grad_norm": 80.47200555811831, "learning_rate": 6.312549847103403e-06, "loss": 19.9935, "step": 39580 }, { "epoch": 0.624172289840449, "grad_norm": 90.20714679877179, "learning_rate": 6.307899780132054e-06, "loss": 20.2072, "step": 39590 }, { "epoch": 0.6243299489184587, "grad_norm": 86.80866628421919, "learning_rate": 6.303250637328339e-06, "loss": 19.5683, "step": 39600 }, { "epoch": 0.6244876079964684, "grad_norm": 83.18564263090349, "learning_rate": 6.298602419855974e-06, "loss": 19.7662, "step": 39610 }, { "epoch": 0.6246452670744781, "grad_norm": 85.76436621962063, "learning_rate": 6.29395512887846e-06, "loss": 19.3477, "step": 39620 }, { "epoch": 0.6248029261524879, "grad_norm": 78.32175839698304, "learning_rate": 6.289308765559049e-06, "loss": 20.0591, "step": 39630 }, { "epoch": 0.6249605852304976, "grad_norm": 82.0477730522588, "learning_rate": 6.284663331060777e-06, "loss": 19.7794, "step": 39640 }, { "epoch": 0.6251182443085073, "grad_norm": 79.77824868445911, "learning_rate": 6.2800188265464365e-06, "loss": 19.5889, "step": 39650 }, { "epoch": 0.625275903386517, "grad_norm": 82.29676760535827, "learning_rate": 6.275375253178596e-06, "loss": 20.1852, "step": 39660 }, { "epoch": 0.6254335624645267, "grad_norm": 88.3454337568108, "learning_rate": 6.270732612119579e-06, "loss": 19.9201, "step": 39670 }, { "epoch": 0.6255912215425364, "grad_norm": 82.37047553799101, "learning_rate": 6.266090904531485e-06, "loss": 19.5125, "step": 39680 }, { "epoch": 0.6257488806205461, "grad_norm": 82.43931267751901, "learning_rate": 6.261450131576181e-06, "loss": 19.5517, "step": 39690 }, { "epoch": 0.6259065396985558, "grad_norm": 76.99956074932135, "learning_rate": 6.256810294415292e-06, "loss": 19.9292, "step": 39700 }, { "epoch": 0.6260641987765656, "grad_norm": 85.56881714110152, "learning_rate": 6.252171394210217e-06, "loss": 19.5046, "step": 39710 }, { "epoch": 0.6262218578545753, "grad_norm": 80.2247592310869, "learning_rate": 6.24753343212211e-06, "loss": 19.3873, "step": 39720 }, { "epoch": 0.626379516932585, "grad_norm": 73.84541106157505, "learning_rate": 6.242896409311903e-06, "loss": 19.2583, "step": 39730 }, { "epoch": 0.6265371760105947, "grad_norm": 77.1152796697136, "learning_rate": 6.238260326940282e-06, "loss": 18.9448, "step": 39740 }, { "epoch": 0.6266948350886044, "grad_norm": 80.75092388322628, "learning_rate": 6.2336251861677096e-06, "loss": 19.9634, "step": 39750 }, { "epoch": 0.6268524941666141, "grad_norm": 80.23592696676997, "learning_rate": 6.228990988154398e-06, "loss": 19.6096, "step": 39760 }, { "epoch": 0.6270101532446238, "grad_norm": 77.4221981106056, "learning_rate": 6.224357734060335e-06, "loss": 19.0559, "step": 39770 }, { "epoch": 0.6271678123226335, "grad_norm": 79.61670325246823, "learning_rate": 6.219725425045265e-06, "loss": 19.656, "step": 39780 }, { "epoch": 0.6273254714006432, "grad_norm": 79.29964060521245, "learning_rate": 6.215094062268701e-06, "loss": 20.2979, "step": 39790 }, { "epoch": 0.627483130478653, "grad_norm": 77.54838671257826, "learning_rate": 6.210463646889914e-06, "loss": 19.3935, "step": 39800 }, { "epoch": 0.6276407895566627, "grad_norm": 75.5644143535271, "learning_rate": 6.205834180067948e-06, "loss": 18.9612, "step": 39810 }, { "epoch": 0.6277984486346724, "grad_norm": 80.28936620146729, "learning_rate": 6.201205662961592e-06, "loss": 19.6669, "step": 39820 }, { "epoch": 0.6279561077126821, "grad_norm": 73.55041641277322, "learning_rate": 6.1965780967294185e-06, "loss": 19.2924, "step": 39830 }, { "epoch": 0.6281137667906919, "grad_norm": 77.48213788055074, "learning_rate": 6.191951482529743e-06, "loss": 18.9297, "step": 39840 }, { "epoch": 0.6282714258687015, "grad_norm": 83.67289814328663, "learning_rate": 6.187325821520653e-06, "loss": 19.6019, "step": 39850 }, { "epoch": 0.6284290849467112, "grad_norm": 73.34028849945364, "learning_rate": 6.182701114859998e-06, "loss": 19.064, "step": 39860 }, { "epoch": 0.6285867440247209, "grad_norm": 83.69513013934322, "learning_rate": 6.178077363705381e-06, "loss": 19.8649, "step": 39870 }, { "epoch": 0.6287444031027306, "grad_norm": 83.68543927720926, "learning_rate": 6.173454569214175e-06, "loss": 19.7043, "step": 39880 }, { "epoch": 0.6289020621807404, "grad_norm": 78.24536298638094, "learning_rate": 6.168832732543504e-06, "loss": 19.5538, "step": 39890 }, { "epoch": 0.6290597212587501, "grad_norm": 77.73742153596652, "learning_rate": 6.164211854850266e-06, "loss": 19.4731, "step": 39900 }, { "epoch": 0.6292173803367598, "grad_norm": 86.36219636703095, "learning_rate": 6.159591937291102e-06, "loss": 19.8891, "step": 39910 }, { "epoch": 0.6293750394147695, "grad_norm": 77.20928671019651, "learning_rate": 6.1549729810224265e-06, "loss": 19.169, "step": 39920 }, { "epoch": 0.6295326984927793, "grad_norm": 77.15156654886404, "learning_rate": 6.1503549872004045e-06, "loss": 19.3308, "step": 39930 }, { "epoch": 0.629690357570789, "grad_norm": 78.83851648018232, "learning_rate": 6.145737956980966e-06, "loss": 19.8782, "step": 39940 }, { "epoch": 0.6298480166487986, "grad_norm": 83.31111914577632, "learning_rate": 6.141121891519795e-06, "loss": 19.7649, "step": 39950 }, { "epoch": 0.6300056757268083, "grad_norm": 80.74224734269029, "learning_rate": 6.13650679197234e-06, "loss": 19.6537, "step": 39960 }, { "epoch": 0.630163334804818, "grad_norm": 78.42532523814744, "learning_rate": 6.1318926594938035e-06, "loss": 18.49, "step": 39970 }, { "epoch": 0.6303209938828278, "grad_norm": 75.53081689330995, "learning_rate": 6.12727949523914e-06, "loss": 19.3316, "step": 39980 }, { "epoch": 0.6304786529608375, "grad_norm": 77.18815424294745, "learning_rate": 6.122667300363074e-06, "loss": 19.9978, "step": 39990 }, { "epoch": 0.6306363120388472, "grad_norm": 81.02114008736812, "learning_rate": 6.11805607602008e-06, "loss": 19.2273, "step": 40000 }, { "epoch": 0.6307939711168569, "grad_norm": 82.4382851212784, "learning_rate": 6.113445823364397e-06, "loss": 19.509, "step": 40010 }, { "epoch": 0.6309516301948667, "grad_norm": 79.92478307829732, "learning_rate": 6.108836543550003e-06, "loss": 19.4631, "step": 40020 }, { "epoch": 0.6311092892728764, "grad_norm": 79.20834674748001, "learning_rate": 6.104228237730654e-06, "loss": 19.1896, "step": 40030 }, { "epoch": 0.631266948350886, "grad_norm": 79.95817424447287, "learning_rate": 6.099620907059848e-06, "loss": 19.3624, "step": 40040 }, { "epoch": 0.6314246074288957, "grad_norm": 80.88958050880096, "learning_rate": 6.095014552690848e-06, "loss": 19.1704, "step": 40050 }, { "epoch": 0.6315822665069055, "grad_norm": 84.89035194382589, "learning_rate": 6.0904091757766615e-06, "loss": 19.4865, "step": 40060 }, { "epoch": 0.6317399255849152, "grad_norm": 83.8534088693665, "learning_rate": 6.085804777470066e-06, "loss": 19.375, "step": 40070 }, { "epoch": 0.6318975846629249, "grad_norm": 82.10538256221645, "learning_rate": 6.081201358923576e-06, "loss": 19.2527, "step": 40080 }, { "epoch": 0.6320552437409346, "grad_norm": 85.04976019690689, "learning_rate": 6.07659892128948e-06, "loss": 19.8943, "step": 40090 }, { "epoch": 0.6322129028189443, "grad_norm": 92.29498888266112, "learning_rate": 6.071997465719808e-06, "loss": 19.5161, "step": 40100 }, { "epoch": 0.6323705618969541, "grad_norm": 78.59117955773173, "learning_rate": 6.067396993366345e-06, "loss": 19.5034, "step": 40110 }, { "epoch": 0.6325282209749638, "grad_norm": 85.36582948181173, "learning_rate": 6.06279750538064e-06, "loss": 19.2171, "step": 40120 }, { "epoch": 0.6326858800529734, "grad_norm": 80.16314513593821, "learning_rate": 6.0581990029139785e-06, "loss": 19.0017, "step": 40130 }, { "epoch": 0.6328435391309831, "grad_norm": 76.93267756354639, "learning_rate": 6.053601487117417e-06, "loss": 19.5031, "step": 40140 }, { "epoch": 0.6330011982089929, "grad_norm": 85.88211732192103, "learning_rate": 6.049004959141753e-06, "loss": 19.1322, "step": 40150 }, { "epoch": 0.6331588572870026, "grad_norm": 78.00437741954521, "learning_rate": 6.044409420137548e-06, "loss": 19.3036, "step": 40160 }, { "epoch": 0.6333165163650123, "grad_norm": 85.98387082451141, "learning_rate": 6.039814871255099e-06, "loss": 19.1657, "step": 40170 }, { "epoch": 0.633474175443022, "grad_norm": 77.94502706060308, "learning_rate": 6.03522131364447e-06, "loss": 19.2527, "step": 40180 }, { "epoch": 0.6336318345210317, "grad_norm": 78.58814173196959, "learning_rate": 6.030628748455472e-06, "loss": 19.0455, "step": 40190 }, { "epoch": 0.6337894935990415, "grad_norm": 81.70414272413271, "learning_rate": 6.026037176837671e-06, "loss": 19.1772, "step": 40200 }, { "epoch": 0.6339471526770512, "grad_norm": 83.51933508097349, "learning_rate": 6.021446599940372e-06, "loss": 19.6505, "step": 40210 }, { "epoch": 0.6341048117550608, "grad_norm": 83.01801347264049, "learning_rate": 6.01685701891265e-06, "loss": 19.5066, "step": 40220 }, { "epoch": 0.6342624708330705, "grad_norm": 76.85734215912407, "learning_rate": 6.012268434903312e-06, "loss": 18.731, "step": 40230 }, { "epoch": 0.6344201299110803, "grad_norm": 81.61714772630066, "learning_rate": 6.007680849060926e-06, "loss": 20.0912, "step": 40240 }, { "epoch": 0.63457778898909, "grad_norm": 80.92860525776295, "learning_rate": 6.003094262533815e-06, "loss": 19.1497, "step": 40250 }, { "epoch": 0.6347354480670997, "grad_norm": 73.20530626528588, "learning_rate": 5.998508676470034e-06, "loss": 19.5016, "step": 40260 }, { "epoch": 0.6348931071451094, "grad_norm": 83.6694148055814, "learning_rate": 5.993924092017409e-06, "loss": 19.6263, "step": 40270 }, { "epoch": 0.6350507662231192, "grad_norm": 80.65690444904088, "learning_rate": 5.989340510323496e-06, "loss": 19.7475, "step": 40280 }, { "epoch": 0.6352084253011289, "grad_norm": 79.43690478547964, "learning_rate": 5.984757932535616e-06, "loss": 19.3994, "step": 40290 }, { "epoch": 0.6353660843791386, "grad_norm": 82.33507043937274, "learning_rate": 5.980176359800826e-06, "loss": 19.5575, "step": 40300 }, { "epoch": 0.6355237434571482, "grad_norm": 80.67727108535857, "learning_rate": 5.975595793265942e-06, "loss": 19.3063, "step": 40310 }, { "epoch": 0.6356814025351579, "grad_norm": 76.18146121963018, "learning_rate": 5.971016234077519e-06, "loss": 19.207, "step": 40320 }, { "epoch": 0.6358390616131677, "grad_norm": 83.05342510033687, "learning_rate": 5.966437683381867e-06, "loss": 19.557, "step": 40330 }, { "epoch": 0.6359967206911774, "grad_norm": 79.01906193532335, "learning_rate": 5.96186014232504e-06, "loss": 19.5081, "step": 40340 }, { "epoch": 0.6361543797691871, "grad_norm": 88.12689686866287, "learning_rate": 5.957283612052838e-06, "loss": 19.5402, "step": 40350 }, { "epoch": 0.6363120388471968, "grad_norm": 82.50026286195994, "learning_rate": 5.952708093710814e-06, "loss": 19.102, "step": 40360 }, { "epoch": 0.6364696979252066, "grad_norm": 84.62830234007421, "learning_rate": 5.948133588444256e-06, "loss": 19.2978, "step": 40370 }, { "epoch": 0.6366273570032163, "grad_norm": 81.30656360567707, "learning_rate": 5.943560097398214e-06, "loss": 19.6225, "step": 40380 }, { "epoch": 0.636785016081226, "grad_norm": 83.66765832660221, "learning_rate": 5.938987621717469e-06, "loss": 19.454, "step": 40390 }, { "epoch": 0.6369426751592356, "grad_norm": 83.93698132944749, "learning_rate": 5.9344161625465635e-06, "loss": 19.9585, "step": 40400 }, { "epoch": 0.6371003342372454, "grad_norm": 82.59025892151718, "learning_rate": 5.929845721029768e-06, "loss": 19.3812, "step": 40410 }, { "epoch": 0.6372579933152551, "grad_norm": 77.30192518281113, "learning_rate": 5.925276298311115e-06, "loss": 18.9842, "step": 40420 }, { "epoch": 0.6374156523932648, "grad_norm": 82.19130568198368, "learning_rate": 5.920707895534368e-06, "loss": 19.3454, "step": 40430 }, { "epoch": 0.6375733114712745, "grad_norm": 85.42472508995348, "learning_rate": 5.916140513843045e-06, "loss": 19.506, "step": 40440 }, { "epoch": 0.6377309705492842, "grad_norm": 78.31737126224864, "learning_rate": 5.911574154380402e-06, "loss": 19.1991, "step": 40450 }, { "epoch": 0.637888629627294, "grad_norm": 75.87765894876294, "learning_rate": 5.907008818289447e-06, "loss": 19.1163, "step": 40460 }, { "epoch": 0.6380462887053037, "grad_norm": 76.54058792243538, "learning_rate": 5.902444506712921e-06, "loss": 19.3304, "step": 40470 }, { "epoch": 0.6382039477833134, "grad_norm": 80.21955602759542, "learning_rate": 5.897881220793321e-06, "loss": 18.4755, "step": 40480 }, { "epoch": 0.638361606861323, "grad_norm": 77.40351815252784, "learning_rate": 5.893318961672875e-06, "loss": 19.017, "step": 40490 }, { "epoch": 0.6385192659393328, "grad_norm": 79.83278181329078, "learning_rate": 5.888757730493559e-06, "loss": 18.7327, "step": 40500 }, { "epoch": 0.6386769250173425, "grad_norm": 77.76798927991966, "learning_rate": 5.8841975283971e-06, "loss": 19.1164, "step": 40510 }, { "epoch": 0.6388345840953522, "grad_norm": 77.11219105390728, "learning_rate": 5.879638356524948e-06, "loss": 18.9745, "step": 40520 }, { "epoch": 0.6389922431733619, "grad_norm": 75.24264304723916, "learning_rate": 5.875080216018318e-06, "loss": 18.2682, "step": 40530 }, { "epoch": 0.6391499022513716, "grad_norm": 80.77605791494969, "learning_rate": 5.870523108018148e-06, "loss": 19.5517, "step": 40540 }, { "epoch": 0.6393075613293814, "grad_norm": 77.2730330098623, "learning_rate": 5.865967033665132e-06, "loss": 19.1042, "step": 40550 }, { "epoch": 0.6394652204073911, "grad_norm": 82.14139978907411, "learning_rate": 5.861411994099693e-06, "loss": 19.5166, "step": 40560 }, { "epoch": 0.6396228794854008, "grad_norm": 89.35240295979149, "learning_rate": 5.856857990462004e-06, "loss": 18.8378, "step": 40570 }, { "epoch": 0.6397805385634104, "grad_norm": 82.37624723851215, "learning_rate": 5.8523050238919706e-06, "loss": 19.6449, "step": 40580 }, { "epoch": 0.6399381976414202, "grad_norm": 80.6482479187578, "learning_rate": 5.847753095529249e-06, "loss": 19.7739, "step": 40590 }, { "epoch": 0.6400958567194299, "grad_norm": 77.92370686342097, "learning_rate": 5.843202206513226e-06, "loss": 19.5837, "step": 40600 }, { "epoch": 0.6402535157974396, "grad_norm": 82.91801389243187, "learning_rate": 5.838652357983035e-06, "loss": 19.4808, "step": 40610 }, { "epoch": 0.6404111748754493, "grad_norm": 86.26107303536914, "learning_rate": 5.834103551077545e-06, "loss": 19.1262, "step": 40620 }, { "epoch": 0.6405688339534591, "grad_norm": 80.08038327658957, "learning_rate": 5.829555786935363e-06, "loss": 18.9799, "step": 40630 }, { "epoch": 0.6407264930314688, "grad_norm": 79.01665877558914, "learning_rate": 5.825009066694842e-06, "loss": 18.2833, "step": 40640 }, { "epoch": 0.6408841521094785, "grad_norm": 82.69924106118872, "learning_rate": 5.8204633914940645e-06, "loss": 19.3017, "step": 40650 }, { "epoch": 0.6410418111874882, "grad_norm": 79.57325225006021, "learning_rate": 5.815918762470863e-06, "loss": 18.9358, "step": 40660 }, { "epoch": 0.6411994702654978, "grad_norm": 84.55277718258101, "learning_rate": 5.8113751807627906e-06, "loss": 18.917, "step": 40670 }, { "epoch": 0.6413571293435076, "grad_norm": 79.18897378817205, "learning_rate": 5.806832647507159e-06, "loss": 18.952, "step": 40680 }, { "epoch": 0.6415147884215173, "grad_norm": 76.86557802039569, "learning_rate": 5.802291163840998e-06, "loss": 18.6827, "step": 40690 }, { "epoch": 0.641672447499527, "grad_norm": 81.70288659944316, "learning_rate": 5.7977507309010935e-06, "loss": 19.1954, "step": 40700 }, { "epoch": 0.6418301065775367, "grad_norm": 81.04933634973246, "learning_rate": 5.793211349823951e-06, "loss": 20.2882, "step": 40710 }, { "epoch": 0.6419877656555465, "grad_norm": 74.79739888893097, "learning_rate": 5.788673021745823e-06, "loss": 18.5633, "step": 40720 }, { "epoch": 0.6421454247335562, "grad_norm": 76.19475295747702, "learning_rate": 5.784135747802697e-06, "loss": 18.9354, "step": 40730 }, { "epoch": 0.6423030838115659, "grad_norm": 76.11896668925831, "learning_rate": 5.7795995291303e-06, "loss": 18.8339, "step": 40740 }, { "epoch": 0.6424607428895756, "grad_norm": 79.4334204318697, "learning_rate": 5.775064366864085e-06, "loss": 19.3927, "step": 40750 }, { "epoch": 0.6426184019675852, "grad_norm": 79.23318619768757, "learning_rate": 5.770530262139242e-06, "loss": 19.2234, "step": 40760 }, { "epoch": 0.642776061045595, "grad_norm": 79.19224588375768, "learning_rate": 5.765997216090711e-06, "loss": 19.4796, "step": 40770 }, { "epoch": 0.6429337201236047, "grad_norm": 82.84745602543373, "learning_rate": 5.761465229853145e-06, "loss": 19.3779, "step": 40780 }, { "epoch": 0.6430913792016144, "grad_norm": 77.01983488096866, "learning_rate": 5.7569343045609525e-06, "loss": 19.0549, "step": 40790 }, { "epoch": 0.6432490382796241, "grad_norm": 82.34435906071948, "learning_rate": 5.752404441348258e-06, "loss": 19.1675, "step": 40800 }, { "epoch": 0.6434066973576339, "grad_norm": 82.5124570360027, "learning_rate": 5.747875641348937e-06, "loss": 19.3538, "step": 40810 }, { "epoch": 0.6435643564356436, "grad_norm": 80.81993526149977, "learning_rate": 5.743347905696586e-06, "loss": 19.3972, "step": 40820 }, { "epoch": 0.6437220155136533, "grad_norm": 79.4282899998076, "learning_rate": 5.738821235524546e-06, "loss": 19.1287, "step": 40830 }, { "epoch": 0.643879674591663, "grad_norm": 269.66035371087577, "learning_rate": 5.7342956319658795e-06, "loss": 19.1662, "step": 40840 }, { "epoch": 0.6440373336696728, "grad_norm": 84.09660884270765, "learning_rate": 5.7297710961533925e-06, "loss": 19.6346, "step": 40850 }, { "epoch": 0.6441949927476824, "grad_norm": 76.82521796398096, "learning_rate": 5.7252476292196125e-06, "loss": 19.2762, "step": 40860 }, { "epoch": 0.6443526518256921, "grad_norm": 76.01233217723136, "learning_rate": 5.720725232296816e-06, "loss": 18.5215, "step": 40870 }, { "epoch": 0.6445103109037018, "grad_norm": 85.97015402485215, "learning_rate": 5.716203906516991e-06, "loss": 19.6696, "step": 40880 }, { "epoch": 0.6446679699817115, "grad_norm": 76.77512238517161, "learning_rate": 5.711683653011877e-06, "loss": 18.9323, "step": 40890 }, { "epoch": 0.6448256290597213, "grad_norm": 72.62381115795853, "learning_rate": 5.707164472912927e-06, "loss": 18.4815, "step": 40900 }, { "epoch": 0.644983288137731, "grad_norm": 78.85269088021676, "learning_rate": 5.702646367351341e-06, "loss": 19.5101, "step": 40910 }, { "epoch": 0.6451409472157407, "grad_norm": 79.56956879650866, "learning_rate": 5.698129337458047e-06, "loss": 19.4741, "step": 40920 }, { "epoch": 0.6452986062937504, "grad_norm": 80.06656964811276, "learning_rate": 5.693613384363692e-06, "loss": 18.7558, "step": 40930 }, { "epoch": 0.6454562653717602, "grad_norm": 82.20753530544467, "learning_rate": 5.68909850919867e-06, "loss": 19.3257, "step": 40940 }, { "epoch": 0.6456139244497698, "grad_norm": 81.29051162722254, "learning_rate": 5.684584713093086e-06, "loss": 19.4404, "step": 40950 }, { "epoch": 0.6457715835277795, "grad_norm": 90.83183045714017, "learning_rate": 5.6800719971767995e-06, "loss": 19.0359, "step": 40960 }, { "epoch": 0.6459292426057892, "grad_norm": 79.67141965193673, "learning_rate": 5.675560362579373e-06, "loss": 19.0632, "step": 40970 }, { "epoch": 0.646086901683799, "grad_norm": 86.3143756655032, "learning_rate": 5.671049810430123e-06, "loss": 19.1303, "step": 40980 }, { "epoch": 0.6462445607618087, "grad_norm": 84.24803249156876, "learning_rate": 5.666540341858074e-06, "loss": 19.1914, "step": 40990 }, { "epoch": 0.6464022198398184, "grad_norm": 81.75143271036312, "learning_rate": 5.662031957991993e-06, "loss": 19.5675, "step": 41000 }, { "epoch": 0.6465598789178281, "grad_norm": 75.80770827116703, "learning_rate": 5.65752465996037e-06, "loss": 19.2589, "step": 41010 }, { "epoch": 0.6467175379958378, "grad_norm": 77.93140167275331, "learning_rate": 5.653018448891423e-06, "loss": 19.2178, "step": 41020 }, { "epoch": 0.6468751970738476, "grad_norm": 82.87059414938605, "learning_rate": 5.648513325913108e-06, "loss": 18.5121, "step": 41030 }, { "epoch": 0.6470328561518572, "grad_norm": 76.29975556514778, "learning_rate": 5.644009292153085e-06, "loss": 19.3779, "step": 41040 }, { "epoch": 0.6471905152298669, "grad_norm": 74.3707491775362, "learning_rate": 5.639506348738772e-06, "loss": 18.9067, "step": 41050 }, { "epoch": 0.6473481743078766, "grad_norm": 81.20608669381333, "learning_rate": 5.635004496797285e-06, "loss": 19.5229, "step": 41060 }, { "epoch": 0.6475058333858864, "grad_norm": 80.66134772340537, "learning_rate": 5.63050373745549e-06, "loss": 19.2591, "step": 41070 }, { "epoch": 0.6476634924638961, "grad_norm": 74.86251315530377, "learning_rate": 5.626004071839961e-06, "loss": 18.3842, "step": 41080 }, { "epoch": 0.6478211515419058, "grad_norm": 79.24398177775586, "learning_rate": 5.621505501077017e-06, "loss": 18.8747, "step": 41090 }, { "epoch": 0.6479788106199155, "grad_norm": 83.63032641740425, "learning_rate": 5.617008026292683e-06, "loss": 19.1242, "step": 41100 }, { "epoch": 0.6481364696979252, "grad_norm": 82.94358678414935, "learning_rate": 5.612511648612723e-06, "loss": 18.6619, "step": 41110 }, { "epoch": 0.648294128775935, "grad_norm": 76.40101902408374, "learning_rate": 5.6080163691626235e-06, "loss": 18.9732, "step": 41120 }, { "epoch": 0.6484517878539446, "grad_norm": 80.18918057110947, "learning_rate": 5.6035221890676006e-06, "loss": 19.2391, "step": 41130 }, { "epoch": 0.6486094469319543, "grad_norm": 80.25303203822523, "learning_rate": 5.59902910945258e-06, "loss": 18.385, "step": 41140 }, { "epoch": 0.648767106009964, "grad_norm": 85.77171480899904, "learning_rate": 5.594537131442232e-06, "loss": 18.8064, "step": 41150 }, { "epoch": 0.6489247650879738, "grad_norm": 87.00262315693024, "learning_rate": 5.590046256160937e-06, "loss": 19.08, "step": 41160 }, { "epoch": 0.6490824241659835, "grad_norm": 76.61654001214626, "learning_rate": 5.585556484732799e-06, "loss": 18.7274, "step": 41170 }, { "epoch": 0.6492400832439932, "grad_norm": 78.17769741125545, "learning_rate": 5.581067818281658e-06, "loss": 19.036, "step": 41180 }, { "epoch": 0.6493977423220029, "grad_norm": 80.45323234899749, "learning_rate": 5.576580257931063e-06, "loss": 18.8169, "step": 41190 }, { "epoch": 0.6495554014000127, "grad_norm": 76.38905522830946, "learning_rate": 5.572093804804298e-06, "loss": 18.9658, "step": 41200 }, { "epoch": 0.6497130604780224, "grad_norm": 74.24506979944817, "learning_rate": 5.5676084600243594e-06, "loss": 19.2195, "step": 41210 }, { "epoch": 0.649870719556032, "grad_norm": 81.06303072688529, "learning_rate": 5.563124224713975e-06, "loss": 19.4023, "step": 41220 }, { "epoch": 0.6500283786340417, "grad_norm": 79.57739285509889, "learning_rate": 5.5586410999955895e-06, "loss": 18.7491, "step": 41230 }, { "epoch": 0.6501860377120514, "grad_norm": 79.55342308780216, "learning_rate": 5.5541590869913785e-06, "loss": 19.2427, "step": 41240 }, { "epoch": 0.6503436967900612, "grad_norm": 82.16989309216281, "learning_rate": 5.549678186823222e-06, "loss": 18.3878, "step": 41250 }, { "epoch": 0.6505013558680709, "grad_norm": 79.50563514730811, "learning_rate": 5.545198400612741e-06, "loss": 19.0552, "step": 41260 }, { "epoch": 0.6506590149460806, "grad_norm": 77.42258253274454, "learning_rate": 5.540719729481259e-06, "loss": 19.0779, "step": 41270 }, { "epoch": 0.6508166740240903, "grad_norm": 80.01782865668214, "learning_rate": 5.53624217454984e-06, "loss": 18.9316, "step": 41280 }, { "epoch": 0.6509743331021001, "grad_norm": 79.35774386186371, "learning_rate": 5.53176573693925e-06, "loss": 18.5533, "step": 41290 }, { "epoch": 0.6511319921801098, "grad_norm": 78.72630393351578, "learning_rate": 5.527290417769985e-06, "loss": 19.1455, "step": 41300 }, { "epoch": 0.6512896512581194, "grad_norm": 78.24932276237655, "learning_rate": 5.522816218162261e-06, "loss": 18.4991, "step": 41310 }, { "epoch": 0.6514473103361291, "grad_norm": 90.26346016913061, "learning_rate": 5.518343139236012e-06, "loss": 19.9346, "step": 41320 }, { "epoch": 0.6516049694141388, "grad_norm": 81.11164480351324, "learning_rate": 5.513871182110897e-06, "loss": 19.3984, "step": 41330 }, { "epoch": 0.6517626284921486, "grad_norm": 78.51940899197541, "learning_rate": 5.509400347906282e-06, "loss": 18.619, "step": 41340 }, { "epoch": 0.6519202875701583, "grad_norm": 80.96400389327947, "learning_rate": 5.504930637741266e-06, "loss": 18.9661, "step": 41350 }, { "epoch": 0.652077946648168, "grad_norm": 86.08539186359019, "learning_rate": 5.500462052734652e-06, "loss": 19.1188, "step": 41360 }, { "epoch": 0.6522356057261777, "grad_norm": 79.97262487156806, "learning_rate": 5.495994594004976e-06, "loss": 19.0195, "step": 41370 }, { "epoch": 0.6523932648041875, "grad_norm": 80.62521082317714, "learning_rate": 5.491528262670479e-06, "loss": 18.906, "step": 41380 }, { "epoch": 0.6525509238821972, "grad_norm": 80.63057179630336, "learning_rate": 5.487063059849134e-06, "loss": 18.725, "step": 41390 }, { "epoch": 0.6527085829602068, "grad_norm": 81.69231018183334, "learning_rate": 5.482598986658614e-06, "loss": 19.1832, "step": 41400 }, { "epoch": 0.6528662420382165, "grad_norm": 73.94568018373288, "learning_rate": 5.478136044216324e-06, "loss": 18.6769, "step": 41410 }, { "epoch": 0.6530239011162263, "grad_norm": 81.82734698015099, "learning_rate": 5.4736742336393835e-06, "loss": 18.8372, "step": 41420 }, { "epoch": 0.653181560194236, "grad_norm": 80.61736723034687, "learning_rate": 5.469213556044619e-06, "loss": 19.182, "step": 41430 }, { "epoch": 0.6533392192722457, "grad_norm": 83.08431528379765, "learning_rate": 5.464754012548588e-06, "loss": 19.7733, "step": 41440 }, { "epoch": 0.6534968783502554, "grad_norm": 80.20400546448684, "learning_rate": 5.4602956042675495e-06, "loss": 19.1419, "step": 41450 }, { "epoch": 0.6536545374282651, "grad_norm": 73.59424536004262, "learning_rate": 5.455838332317491e-06, "loss": 18.5172, "step": 41460 }, { "epoch": 0.6538121965062749, "grad_norm": 85.0976940047731, "learning_rate": 5.451382197814103e-06, "loss": 19.1653, "step": 41470 }, { "epoch": 0.6539698555842846, "grad_norm": 81.66827058521525, "learning_rate": 5.446927201872807e-06, "loss": 18.6728, "step": 41480 }, { "epoch": 0.6541275146622942, "grad_norm": 82.95982543245019, "learning_rate": 5.442473345608722e-06, "loss": 19.408, "step": 41490 }, { "epoch": 0.6542851737403039, "grad_norm": 77.53580318385019, "learning_rate": 5.4380206301366975e-06, "loss": 18.769, "step": 41500 }, { "epoch": 0.6544428328183137, "grad_norm": 78.8577034986417, "learning_rate": 5.433569056571284e-06, "loss": 19.2334, "step": 41510 }, { "epoch": 0.6546004918963234, "grad_norm": 81.11814141612888, "learning_rate": 5.429118626026756e-06, "loss": 18.5905, "step": 41520 }, { "epoch": 0.6547581509743331, "grad_norm": 80.95890956233134, "learning_rate": 5.424669339617097e-06, "loss": 18.5108, "step": 41530 }, { "epoch": 0.6549158100523428, "grad_norm": 81.45889337223656, "learning_rate": 5.420221198456011e-06, "loss": 18.6084, "step": 41540 }, { "epoch": 0.6550734691303525, "grad_norm": 73.12210163889368, "learning_rate": 5.415774203656905e-06, "loss": 18.9587, "step": 41550 }, { "epoch": 0.6552311282083623, "grad_norm": 78.99604183118655, "learning_rate": 5.411328356332901e-06, "loss": 18.8071, "step": 41560 }, { "epoch": 0.655388787286372, "grad_norm": 84.14671775771909, "learning_rate": 5.406883657596843e-06, "loss": 18.7947, "step": 41570 }, { "epoch": 0.6555464463643816, "grad_norm": 81.55240465809482, "learning_rate": 5.402440108561276e-06, "loss": 19.4085, "step": 41580 }, { "epoch": 0.6557041054423913, "grad_norm": 78.00735976651893, "learning_rate": 5.397997710338465e-06, "loss": 18.8912, "step": 41590 }, { "epoch": 0.6558617645204011, "grad_norm": 81.2764105538014, "learning_rate": 5.393556464040381e-06, "loss": 18.672, "step": 41600 }, { "epoch": 0.6560194235984108, "grad_norm": 94.56107926179725, "learning_rate": 5.389116370778715e-06, "loss": 18.985, "step": 41610 }, { "epoch": 0.6561770826764205, "grad_norm": 77.70256622895546, "learning_rate": 5.38467743166486e-06, "loss": 18.4431, "step": 41620 }, { "epoch": 0.6563347417544302, "grad_norm": 72.87991856392522, "learning_rate": 5.3802396478099305e-06, "loss": 19.0939, "step": 41630 }, { "epoch": 0.65649240083244, "grad_norm": 80.93434135772625, "learning_rate": 5.3758030203247395e-06, "loss": 19.0118, "step": 41640 }, { "epoch": 0.6566500599104497, "grad_norm": 77.11196789855913, "learning_rate": 5.371367550319821e-06, "loss": 19.0181, "step": 41650 }, { "epoch": 0.6568077189884594, "grad_norm": 76.78037316411655, "learning_rate": 5.366933238905412e-06, "loss": 18.6364, "step": 41660 }, { "epoch": 0.656965378066469, "grad_norm": 75.76952015055437, "learning_rate": 5.362500087191467e-06, "loss": 18.9627, "step": 41670 }, { "epoch": 0.6571230371444787, "grad_norm": 81.64868676989911, "learning_rate": 5.35806809628764e-06, "loss": 18.676, "step": 41680 }, { "epoch": 0.6572806962224885, "grad_norm": 87.34517358066356, "learning_rate": 5.353637267303302e-06, "loss": 19.148, "step": 41690 }, { "epoch": 0.6574383553004982, "grad_norm": 81.98829708639843, "learning_rate": 5.349207601347533e-06, "loss": 18.975, "step": 41700 }, { "epoch": 0.6575960143785079, "grad_norm": 80.99116406648704, "learning_rate": 5.344779099529118e-06, "loss": 18.3599, "step": 41710 }, { "epoch": 0.6577536734565176, "grad_norm": 79.66587191759373, "learning_rate": 5.340351762956559e-06, "loss": 18.2349, "step": 41720 }, { "epoch": 0.6579113325345274, "grad_norm": 83.80299889018845, "learning_rate": 5.335925592738053e-06, "loss": 18.9686, "step": 41730 }, { "epoch": 0.6580689916125371, "grad_norm": 80.78361338198884, "learning_rate": 5.33150058998152e-06, "loss": 19.0169, "step": 41740 }, { "epoch": 0.6582266506905468, "grad_norm": 78.03296667406404, "learning_rate": 5.327076755794569e-06, "loss": 18.7252, "step": 41750 }, { "epoch": 0.6583843097685564, "grad_norm": 94.3724535252461, "learning_rate": 5.322654091284541e-06, "loss": 19.2096, "step": 41760 }, { "epoch": 0.6585419688465662, "grad_norm": 74.56857457883714, "learning_rate": 5.318232597558457e-06, "loss": 18.7031, "step": 41770 }, { "epoch": 0.6586996279245759, "grad_norm": 75.44112134914226, "learning_rate": 5.313812275723069e-06, "loss": 18.4973, "step": 41780 }, { "epoch": 0.6588572870025856, "grad_norm": 75.00024107538967, "learning_rate": 5.30939312688482e-06, "loss": 19.2952, "step": 41790 }, { "epoch": 0.6590149460805953, "grad_norm": 77.67872292499895, "learning_rate": 5.3049751521498695e-06, "loss": 18.6394, "step": 41800 }, { "epoch": 0.659172605158605, "grad_norm": 74.63880784525976, "learning_rate": 5.300558352624072e-06, "loss": 18.5611, "step": 41810 }, { "epoch": 0.6593302642366148, "grad_norm": 82.85032702148443, "learning_rate": 5.2961427294129975e-06, "loss": 19.3152, "step": 41820 }, { "epoch": 0.6594879233146245, "grad_norm": 79.49636311979955, "learning_rate": 5.291728283621922e-06, "loss": 18.5816, "step": 41830 }, { "epoch": 0.6596455823926342, "grad_norm": 78.81599466944078, "learning_rate": 5.287315016355816e-06, "loss": 19.4641, "step": 41840 }, { "epoch": 0.6598032414706438, "grad_norm": 78.15119895874025, "learning_rate": 5.28290292871937e-06, "loss": 18.7073, "step": 41850 }, { "epoch": 0.6599609005486536, "grad_norm": 78.21303891299571, "learning_rate": 5.278492021816963e-06, "loss": 18.3782, "step": 41860 }, { "epoch": 0.6601185596266633, "grad_norm": 76.10653463833937, "learning_rate": 5.274082296752694e-06, "loss": 18.8959, "step": 41870 }, { "epoch": 0.660276218704673, "grad_norm": 78.03142937420128, "learning_rate": 5.269673754630353e-06, "loss": 18.485, "step": 41880 }, { "epoch": 0.6604338777826827, "grad_norm": 73.34843262278407, "learning_rate": 5.2652663965534444e-06, "loss": 18.1433, "step": 41890 }, { "epoch": 0.6605915368606924, "grad_norm": 76.70613017240647, "learning_rate": 5.260860223625166e-06, "loss": 18.7981, "step": 41900 }, { "epoch": 0.6607491959387022, "grad_norm": 73.21230755179964, "learning_rate": 5.25645523694843e-06, "loss": 18.3789, "step": 41910 }, { "epoch": 0.6609068550167119, "grad_norm": 81.37074128526174, "learning_rate": 5.252051437625842e-06, "loss": 19.2049, "step": 41920 }, { "epoch": 0.6610645140947216, "grad_norm": 84.35269880005163, "learning_rate": 5.247648826759724e-06, "loss": 18.2724, "step": 41930 }, { "epoch": 0.6612221731727312, "grad_norm": 76.66456019760686, "learning_rate": 5.2432474054520835e-06, "loss": 18.7884, "step": 41940 }, { "epoch": 0.661379832250741, "grad_norm": 77.71810972778468, "learning_rate": 5.238847174804633e-06, "loss": 18.8561, "step": 41950 }, { "epoch": 0.6615374913287507, "grad_norm": 77.63498748145803, "learning_rate": 5.234448135918803e-06, "loss": 17.9039, "step": 41960 }, { "epoch": 0.6616951504067604, "grad_norm": 85.71469705881141, "learning_rate": 5.230050289895705e-06, "loss": 18.8512, "step": 41970 }, { "epoch": 0.6618528094847701, "grad_norm": 84.20161350220599, "learning_rate": 5.22565363783617e-06, "loss": 18.3162, "step": 41980 }, { "epoch": 0.6620104685627799, "grad_norm": 79.51191439273641, "learning_rate": 5.221258180840712e-06, "loss": 18.4094, "step": 41990 }, { "epoch": 0.6621681276407896, "grad_norm": 78.1356397335404, "learning_rate": 5.216863920009566e-06, "loss": 19.5299, "step": 42000 }, { "epoch": 0.6623257867187993, "grad_norm": 79.1112891301514, "learning_rate": 5.212470856442647e-06, "loss": 19.209, "step": 42010 }, { "epoch": 0.662483445796809, "grad_norm": 77.74090632125925, "learning_rate": 5.2080789912395865e-06, "loss": 18.3907, "step": 42020 }, { "epoch": 0.6626411048748186, "grad_norm": 75.94564876917094, "learning_rate": 5.203688325499708e-06, "loss": 18.5047, "step": 42030 }, { "epoch": 0.6627987639528284, "grad_norm": 83.16824823540956, "learning_rate": 5.199298860322042e-06, "loss": 18.9305, "step": 42040 }, { "epoch": 0.6629564230308381, "grad_norm": 82.68182399137196, "learning_rate": 5.194910596805303e-06, "loss": 18.8704, "step": 42050 }, { "epoch": 0.6631140821088478, "grad_norm": 81.58055979244254, "learning_rate": 5.1905235360479255e-06, "loss": 18.8625, "step": 42060 }, { "epoch": 0.6632717411868575, "grad_norm": 76.2224367074237, "learning_rate": 5.186137679148027e-06, "loss": 18.7043, "step": 42070 }, { "epoch": 0.6634294002648673, "grad_norm": 82.88571258375565, "learning_rate": 5.181753027203425e-06, "loss": 18.5823, "step": 42080 }, { "epoch": 0.663587059342877, "grad_norm": 87.4137841115876, "learning_rate": 5.177369581311649e-06, "loss": 18.8365, "step": 42090 }, { "epoch": 0.6637447184208867, "grad_norm": 78.9839603456789, "learning_rate": 5.172987342569906e-06, "loss": 18.5903, "step": 42100 }, { "epoch": 0.6639023774988964, "grad_norm": 76.66030257174104, "learning_rate": 5.168606312075118e-06, "loss": 17.8744, "step": 42110 }, { "epoch": 0.664060036576906, "grad_norm": 78.93875919798396, "learning_rate": 5.1642264909238984e-06, "loss": 18.6033, "step": 42120 }, { "epoch": 0.6642176956549158, "grad_norm": 83.04934217200078, "learning_rate": 5.1598478802125606e-06, "loss": 18.6551, "step": 42130 }, { "epoch": 0.6643753547329255, "grad_norm": 80.18448582168699, "learning_rate": 5.155470481037106e-06, "loss": 18.4509, "step": 42140 }, { "epoch": 0.6645330138109352, "grad_norm": 79.44255977251704, "learning_rate": 5.151094294493246e-06, "loss": 18.9694, "step": 42150 }, { "epoch": 0.6646906728889449, "grad_norm": 75.68058026083717, "learning_rate": 5.146719321676372e-06, "loss": 18.0476, "step": 42160 }, { "epoch": 0.6648483319669547, "grad_norm": 77.79370038919187, "learning_rate": 5.142345563681592e-06, "loss": 18.7142, "step": 42170 }, { "epoch": 0.6650059910449644, "grad_norm": 75.87351809447739, "learning_rate": 5.137973021603691e-06, "loss": 18.1455, "step": 42180 }, { "epoch": 0.6651636501229741, "grad_norm": 82.66995393434989, "learning_rate": 5.1336016965371625e-06, "loss": 18.6172, "step": 42190 }, { "epoch": 0.6653213092009838, "grad_norm": 82.06623453919174, "learning_rate": 5.129231589576185e-06, "loss": 19.2684, "step": 42200 }, { "epoch": 0.6654789682789936, "grad_norm": 80.09193689994258, "learning_rate": 5.1248627018146415e-06, "loss": 19.206, "step": 42210 }, { "epoch": 0.6656366273570032, "grad_norm": 79.24771241120197, "learning_rate": 5.1204950343461094e-06, "loss": 18.7043, "step": 42220 }, { "epoch": 0.6657942864350129, "grad_norm": 78.50609136776613, "learning_rate": 5.116128588263849e-06, "loss": 18.4085, "step": 42230 }, { "epoch": 0.6659519455130226, "grad_norm": 73.39714660604778, "learning_rate": 5.111763364660831e-06, "loss": 18.4322, "step": 42240 }, { "epoch": 0.6661096045910323, "grad_norm": 79.15297689511084, "learning_rate": 5.107399364629705e-06, "loss": 19.1179, "step": 42250 }, { "epoch": 0.6662672636690421, "grad_norm": 79.60341532319941, "learning_rate": 5.103036589262829e-06, "loss": 19.0873, "step": 42260 }, { "epoch": 0.6664249227470518, "grad_norm": 84.6769755800866, "learning_rate": 5.098675039652239e-06, "loss": 19.1241, "step": 42270 }, { "epoch": 0.6665825818250615, "grad_norm": 81.32414288824617, "learning_rate": 5.094314716889681e-06, "loss": 18.8612, "step": 42280 }, { "epoch": 0.6667402409030712, "grad_norm": 73.60854095628363, "learning_rate": 5.089955622066576e-06, "loss": 18.273, "step": 42290 }, { "epoch": 0.666897899981081, "grad_norm": 89.74868040936244, "learning_rate": 5.085597756274054e-06, "loss": 19.2473, "step": 42300 }, { "epoch": 0.6670555590590906, "grad_norm": 76.68491040433858, "learning_rate": 5.081241120602925e-06, "loss": 18.3585, "step": 42310 }, { "epoch": 0.6672132181371003, "grad_norm": 78.0770169729294, "learning_rate": 5.0768857161436965e-06, "loss": 18.3272, "step": 42320 }, { "epoch": 0.66737087721511, "grad_norm": 81.56509221745188, "learning_rate": 5.07253154398657e-06, "loss": 19.1963, "step": 42330 }, { "epoch": 0.6675285362931198, "grad_norm": 86.42581298961468, "learning_rate": 5.068178605221438e-06, "loss": 18.4678, "step": 42340 }, { "epoch": 0.6676861953711295, "grad_norm": 80.72647562889242, "learning_rate": 5.06382690093788e-06, "loss": 18.6102, "step": 42350 }, { "epoch": 0.6678438544491392, "grad_norm": 76.3757651051974, "learning_rate": 5.0594764322251656e-06, "loss": 19.3814, "step": 42360 }, { "epoch": 0.6680015135271489, "grad_norm": 80.58648964596391, "learning_rate": 5.055127200172263e-06, "loss": 18.9711, "step": 42370 }, { "epoch": 0.6681591726051586, "grad_norm": 74.33761153912985, "learning_rate": 5.0507792058678215e-06, "loss": 18.3005, "step": 42380 }, { "epoch": 0.6683168316831684, "grad_norm": 75.7889299638129, "learning_rate": 5.046432450400192e-06, "loss": 18.5864, "step": 42390 }, { "epoch": 0.668474490761178, "grad_norm": 76.06946069031808, "learning_rate": 5.042086934857399e-06, "loss": 18.7956, "step": 42400 }, { "epoch": 0.6686321498391877, "grad_norm": 79.28852260614853, "learning_rate": 5.037742660327172e-06, "loss": 18.4403, "step": 42410 }, { "epoch": 0.6687898089171974, "grad_norm": 76.04225282736323, "learning_rate": 5.033399627896922e-06, "loss": 17.6296, "step": 42420 }, { "epoch": 0.6689474679952072, "grad_norm": 79.51539436741518, "learning_rate": 5.029057838653759e-06, "loss": 18.5011, "step": 42430 }, { "epoch": 0.6691051270732169, "grad_norm": 81.67718501576547, "learning_rate": 5.024717293684462e-06, "loss": 19.0109, "step": 42440 }, { "epoch": 0.6692627861512266, "grad_norm": 80.12692915234899, "learning_rate": 5.0203779940755204e-06, "loss": 18.5432, "step": 42450 }, { "epoch": 0.6694204452292363, "grad_norm": 73.42496021425777, "learning_rate": 5.0160399409130934e-06, "loss": 18.4212, "step": 42460 }, { "epoch": 0.669578104307246, "grad_norm": 88.80478546376501, "learning_rate": 5.011703135283046e-06, "loss": 18.7742, "step": 42470 }, { "epoch": 0.6697357633852558, "grad_norm": 81.46497424820433, "learning_rate": 5.0073675782709155e-06, "loss": 18.3648, "step": 42480 }, { "epoch": 0.6698934224632654, "grad_norm": 78.96738053667227, "learning_rate": 5.00303327096193e-06, "loss": 18.8298, "step": 42490 }, { "epoch": 0.6700510815412751, "grad_norm": 73.588651455845, "learning_rate": 4.998700214441012e-06, "loss": 18.0352, "step": 42500 }, { "epoch": 0.6702087406192848, "grad_norm": 78.9946078360591, "learning_rate": 4.994368409792771e-06, "loss": 18.396, "step": 42510 }, { "epoch": 0.6703663996972946, "grad_norm": 79.69108836102731, "learning_rate": 4.990037858101489e-06, "loss": 18.6999, "step": 42520 }, { "epoch": 0.6705240587753043, "grad_norm": 76.02954064899642, "learning_rate": 4.98570856045115e-06, "loss": 18.6117, "step": 42530 }, { "epoch": 0.670681717853314, "grad_norm": 77.0159908550388, "learning_rate": 4.9813805179254206e-06, "loss": 18.5349, "step": 42540 }, { "epoch": 0.6708393769313237, "grad_norm": 74.47758461952988, "learning_rate": 4.977053731607643e-06, "loss": 18.4143, "step": 42550 }, { "epoch": 0.6709970360093335, "grad_norm": 88.4309455350592, "learning_rate": 4.972728202580861e-06, "loss": 18.4196, "step": 42560 }, { "epoch": 0.6711546950873432, "grad_norm": 78.92526792181533, "learning_rate": 4.968403931927788e-06, "loss": 19.0172, "step": 42570 }, { "epoch": 0.6713123541653528, "grad_norm": 75.48142541527749, "learning_rate": 4.964080920730837e-06, "loss": 18.7386, "step": 42580 }, { "epoch": 0.6714700132433625, "grad_norm": 82.05930382072246, "learning_rate": 4.959759170072093e-06, "loss": 18.2081, "step": 42590 }, { "epoch": 0.6716276723213722, "grad_norm": 82.27873371333517, "learning_rate": 4.955438681033336e-06, "loss": 17.7486, "step": 42600 }, { "epoch": 0.671785331399382, "grad_norm": 76.24171586690892, "learning_rate": 4.9511194546960175e-06, "loss": 18.1301, "step": 42610 }, { "epoch": 0.6719429904773917, "grad_norm": 75.68415092664138, "learning_rate": 4.9468014921412865e-06, "loss": 17.9736, "step": 42620 }, { "epoch": 0.6721006495554014, "grad_norm": 78.29473016235744, "learning_rate": 4.942484794449973e-06, "loss": 18.1835, "step": 42630 }, { "epoch": 0.6722583086334111, "grad_norm": 77.71534263471172, "learning_rate": 4.938169362702579e-06, "loss": 18.3591, "step": 42640 }, { "epoch": 0.6724159677114209, "grad_norm": 81.23451572656067, "learning_rate": 4.933855197979307e-06, "loss": 19.142, "step": 42650 }, { "epoch": 0.6725736267894306, "grad_norm": 76.08942960756963, "learning_rate": 4.9295423013600254e-06, "loss": 18.074, "step": 42660 }, { "epoch": 0.6727312858674402, "grad_norm": 79.94494760145578, "learning_rate": 4.925230673924298e-06, "loss": 18.5901, "step": 42670 }, { "epoch": 0.6728889449454499, "grad_norm": 79.68341736724894, "learning_rate": 4.920920316751362e-06, "loss": 18.2339, "step": 42680 }, { "epoch": 0.6730466040234596, "grad_norm": 84.81106334955146, "learning_rate": 4.916611230920146e-06, "loss": 18.5552, "step": 42690 }, { "epoch": 0.6732042631014694, "grad_norm": 84.97063960068057, "learning_rate": 4.9123034175092485e-06, "loss": 18.6535, "step": 42700 }, { "epoch": 0.6733619221794791, "grad_norm": 78.59376531996479, "learning_rate": 4.907996877596959e-06, "loss": 18.7709, "step": 42710 }, { "epoch": 0.6735195812574888, "grad_norm": 76.7612074317487, "learning_rate": 4.903691612261244e-06, "loss": 17.8368, "step": 42720 }, { "epoch": 0.6736772403354985, "grad_norm": 81.98043329121762, "learning_rate": 4.89938762257976e-06, "loss": 18.6534, "step": 42730 }, { "epoch": 0.6738348994135083, "grad_norm": 74.166702574572, "learning_rate": 4.8950849096298284e-06, "loss": 18.6071, "step": 42740 }, { "epoch": 0.673992558491518, "grad_norm": 77.23820755126265, "learning_rate": 4.890783474488457e-06, "loss": 18.3754, "step": 42750 }, { "epoch": 0.6741502175695276, "grad_norm": 75.09299422300435, "learning_rate": 4.886483318232342e-06, "loss": 18.1857, "step": 42760 }, { "epoch": 0.6743078766475373, "grad_norm": 84.19756632403752, "learning_rate": 4.882184441937849e-06, "loss": 18.6915, "step": 42770 }, { "epoch": 0.6744655357255471, "grad_norm": 77.25135482037078, "learning_rate": 4.87788684668103e-06, "loss": 18.2854, "step": 42780 }, { "epoch": 0.6746231948035568, "grad_norm": 73.82705046934733, "learning_rate": 4.87359053353761e-06, "loss": 18.0454, "step": 42790 }, { "epoch": 0.6747808538815665, "grad_norm": 76.88293797352434, "learning_rate": 4.869295503583004e-06, "loss": 18.9895, "step": 42800 }, { "epoch": 0.6749385129595762, "grad_norm": 82.2674580680914, "learning_rate": 4.865001757892289e-06, "loss": 18.3292, "step": 42810 }, { "epoch": 0.6750961720375859, "grad_norm": 84.07849339955776, "learning_rate": 4.860709297540236e-06, "loss": 18.471, "step": 42820 }, { "epoch": 0.6752538311155957, "grad_norm": 80.66007179990682, "learning_rate": 4.856418123601288e-06, "loss": 18.0849, "step": 42830 }, { "epoch": 0.6754114901936054, "grad_norm": 75.16037634043064, "learning_rate": 4.8521282371495694e-06, "loss": 18.729, "step": 42840 }, { "epoch": 0.675569149271615, "grad_norm": 79.49491345455702, "learning_rate": 4.847839639258873e-06, "loss": 18.6341, "step": 42850 }, { "epoch": 0.6757268083496247, "grad_norm": 75.97611064518662, "learning_rate": 4.843552331002682e-06, "loss": 18.7024, "step": 42860 }, { "epoch": 0.6758844674276345, "grad_norm": 77.48973916545413, "learning_rate": 4.839266313454147e-06, "loss": 17.9642, "step": 42870 }, { "epoch": 0.6760421265056442, "grad_norm": 84.53300070403111, "learning_rate": 4.834981587686095e-06, "loss": 18.9021, "step": 42880 }, { "epoch": 0.6761997855836539, "grad_norm": 73.49725432251326, "learning_rate": 4.830698154771041e-06, "loss": 18.334, "step": 42890 }, { "epoch": 0.6763574446616636, "grad_norm": 86.45944845533772, "learning_rate": 4.82641601578116e-06, "loss": 18.7248, "step": 42900 }, { "epoch": 0.6765151037396734, "grad_norm": 74.98705587177743, "learning_rate": 4.822135171788318e-06, "loss": 18.0165, "step": 42910 }, { "epoch": 0.6766727628176831, "grad_norm": 82.9302824124884, "learning_rate": 4.8178556238640495e-06, "loss": 18.8696, "step": 42920 }, { "epoch": 0.6768304218956928, "grad_norm": 75.38469484014865, "learning_rate": 4.8135773730795685e-06, "loss": 18.2479, "step": 42930 }, { "epoch": 0.6769880809737024, "grad_norm": 74.45323440946521, "learning_rate": 4.809300420505757e-06, "loss": 17.9114, "step": 42940 }, { "epoch": 0.6771457400517121, "grad_norm": 79.46641446260345, "learning_rate": 4.805024767213181e-06, "loss": 18.0205, "step": 42950 }, { "epoch": 0.6773033991297219, "grad_norm": 80.55326976057673, "learning_rate": 4.800750414272072e-06, "loss": 18.8001, "step": 42960 }, { "epoch": 0.6774610582077316, "grad_norm": 75.61744281851189, "learning_rate": 4.796477362752346e-06, "loss": 17.9369, "step": 42970 }, { "epoch": 0.6776187172857413, "grad_norm": 77.81393029955396, "learning_rate": 4.792205613723584e-06, "loss": 18.5631, "step": 42980 }, { "epoch": 0.677776376363751, "grad_norm": 78.4343276966239, "learning_rate": 4.78793516825505e-06, "loss": 19.298, "step": 42990 }, { "epoch": 0.6779340354417608, "grad_norm": 78.06524492072614, "learning_rate": 4.783666027415671e-06, "loss": 17.7361, "step": 43000 }, { "epoch": 0.6780916945197705, "grad_norm": 82.98478030166399, "learning_rate": 4.779398192274056e-06, "loss": 18.3694, "step": 43010 }, { "epoch": 0.6782493535977802, "grad_norm": 75.3791589831723, "learning_rate": 4.775131663898489e-06, "loss": 17.8792, "step": 43020 }, { "epoch": 0.6784070126757898, "grad_norm": 79.87852016977375, "learning_rate": 4.770866443356914e-06, "loss": 18.6268, "step": 43030 }, { "epoch": 0.6785646717537995, "grad_norm": 81.64398779587302, "learning_rate": 4.766602531716965e-06, "loss": 18.2975, "step": 43040 }, { "epoch": 0.6787223308318093, "grad_norm": 78.99849057492624, "learning_rate": 4.762339930045932e-06, "loss": 18.1618, "step": 43050 }, { "epoch": 0.678879989909819, "grad_norm": 82.62783520244488, "learning_rate": 4.758078639410789e-06, "loss": 18.159, "step": 43060 }, { "epoch": 0.6790376489878287, "grad_norm": 74.93495402694522, "learning_rate": 4.753818660878174e-06, "loss": 17.9425, "step": 43070 }, { "epoch": 0.6791953080658384, "grad_norm": 74.95835582939314, "learning_rate": 4.749559995514405e-06, "loss": 17.8621, "step": 43080 }, { "epoch": 0.6793529671438482, "grad_norm": 78.39097704197889, "learning_rate": 4.745302644385459e-06, "loss": 18.3927, "step": 43090 }, { "epoch": 0.6795106262218579, "grad_norm": 79.29725285915617, "learning_rate": 4.741046608556999e-06, "loss": 18.6017, "step": 43100 }, { "epoch": 0.6796682852998676, "grad_norm": 78.4397263754039, "learning_rate": 4.736791889094344e-06, "loss": 18.2475, "step": 43110 }, { "epoch": 0.6798259443778772, "grad_norm": 81.7059724243149, "learning_rate": 4.7325384870624945e-06, "loss": 18.6154, "step": 43120 }, { "epoch": 0.679983603455887, "grad_norm": 82.58553881159521, "learning_rate": 4.72828640352612e-06, "loss": 18.2022, "step": 43130 }, { "epoch": 0.6801412625338967, "grad_norm": 80.6091458013106, "learning_rate": 4.72403563954955e-06, "loss": 18.4657, "step": 43140 }, { "epoch": 0.6802989216119064, "grad_norm": 76.5499818527688, "learning_rate": 4.719786196196798e-06, "loss": 17.779, "step": 43150 }, { "epoch": 0.6804565806899161, "grad_norm": 74.86636686587401, "learning_rate": 4.715538074531535e-06, "loss": 18.1508, "step": 43160 }, { "epoch": 0.6806142397679258, "grad_norm": 79.47383711232668, "learning_rate": 4.71129127561711e-06, "loss": 18.6737, "step": 43170 }, { "epoch": 0.6807718988459356, "grad_norm": 78.30175531223529, "learning_rate": 4.707045800516532e-06, "loss": 18.6905, "step": 43180 }, { "epoch": 0.6809295579239453, "grad_norm": 72.71759343722215, "learning_rate": 4.70280165029249e-06, "loss": 18.0892, "step": 43190 }, { "epoch": 0.681087217001955, "grad_norm": 85.50403658068453, "learning_rate": 4.6985588260073266e-06, "loss": 18.2203, "step": 43200 }, { "epoch": 0.6812448760799646, "grad_norm": 76.99046188739516, "learning_rate": 4.694317328723067e-06, "loss": 18.1164, "step": 43210 }, { "epoch": 0.6814025351579744, "grad_norm": 78.35580263368782, "learning_rate": 4.690077159501396e-06, "loss": 17.8379, "step": 43220 }, { "epoch": 0.6815601942359841, "grad_norm": 76.97773996999939, "learning_rate": 4.685838319403674e-06, "loss": 18.1535, "step": 43230 }, { "epoch": 0.6817178533139938, "grad_norm": 78.70376483219363, "learning_rate": 4.681600809490912e-06, "loss": 18.4233, "step": 43240 }, { "epoch": 0.6818755123920035, "grad_norm": 85.94442333913314, "learning_rate": 4.67736463082381e-06, "loss": 18.9504, "step": 43250 }, { "epoch": 0.6820331714700132, "grad_norm": 81.2567767359574, "learning_rate": 4.673129784462718e-06, "loss": 18.5996, "step": 43260 }, { "epoch": 0.682190830548023, "grad_norm": 80.33315261990403, "learning_rate": 4.668896271467654e-06, "loss": 18.5098, "step": 43270 }, { "epoch": 0.6823484896260327, "grad_norm": 80.30785426219877, "learning_rate": 4.664664092898314e-06, "loss": 18.3352, "step": 43280 }, { "epoch": 0.6825061487040424, "grad_norm": 80.2791099748527, "learning_rate": 4.660433249814046e-06, "loss": 18.6397, "step": 43290 }, { "epoch": 0.682663807782052, "grad_norm": 79.93491116729807, "learning_rate": 4.656203743273876e-06, "loss": 17.8783, "step": 43300 }, { "epoch": 0.6828214668600618, "grad_norm": 88.05875069708733, "learning_rate": 4.651975574336483e-06, "loss": 18.5502, "step": 43310 }, { "epoch": 0.6829791259380715, "grad_norm": 78.62799000437101, "learning_rate": 4.647748744060222e-06, "loss": 18.7821, "step": 43320 }, { "epoch": 0.6831367850160812, "grad_norm": 86.43252677899432, "learning_rate": 4.643523253503108e-06, "loss": 18.6692, "step": 43330 }, { "epoch": 0.6832944440940909, "grad_norm": 76.081274849152, "learning_rate": 4.6392991037228245e-06, "loss": 18.1622, "step": 43340 }, { "epoch": 0.6834521031721007, "grad_norm": 78.96139199918736, "learning_rate": 4.635076295776709e-06, "loss": 18.0521, "step": 43350 }, { "epoch": 0.6836097622501104, "grad_norm": 82.24967955235546, "learning_rate": 4.630854830721779e-06, "loss": 18.8637, "step": 43360 }, { "epoch": 0.6837674213281201, "grad_norm": 78.87532900035447, "learning_rate": 4.626634709614697e-06, "loss": 18.6286, "step": 43370 }, { "epoch": 0.6839250804061298, "grad_norm": 86.38942523974517, "learning_rate": 4.622415933511809e-06, "loss": 18.8822, "step": 43380 }, { "epoch": 0.6840827394841394, "grad_norm": 75.70462449382818, "learning_rate": 4.618198503469106e-06, "loss": 18.4794, "step": 43390 }, { "epoch": 0.6842403985621492, "grad_norm": 73.39978643976842, "learning_rate": 4.613982420542259e-06, "loss": 17.9886, "step": 43400 }, { "epoch": 0.6843980576401589, "grad_norm": 80.98546951908038, "learning_rate": 4.609767685786586e-06, "loss": 18.7656, "step": 43410 }, { "epoch": 0.6845557167181686, "grad_norm": 77.596725658844, "learning_rate": 4.6055543002570764e-06, "loss": 18.0559, "step": 43420 }, { "epoch": 0.6847133757961783, "grad_norm": 74.00902876618372, "learning_rate": 4.601342265008388e-06, "loss": 18.3297, "step": 43430 }, { "epoch": 0.6848710348741881, "grad_norm": 77.65748050065442, "learning_rate": 4.597131581094823e-06, "loss": 18.5821, "step": 43440 }, { "epoch": 0.6850286939521978, "grad_norm": 75.53801406877201, "learning_rate": 4.592922249570364e-06, "loss": 18.7789, "step": 43450 }, { "epoch": 0.6851863530302075, "grad_norm": 80.2042988444886, "learning_rate": 4.588714271488639e-06, "loss": 17.8236, "step": 43460 }, { "epoch": 0.6853440121082172, "grad_norm": 83.29412947148006, "learning_rate": 4.58450764790295e-06, "loss": 18.088, "step": 43470 }, { "epoch": 0.685501671186227, "grad_norm": 76.9920062460646, "learning_rate": 4.5803023798662515e-06, "loss": 18.1101, "step": 43480 }, { "epoch": 0.6856593302642366, "grad_norm": 83.51571337968056, "learning_rate": 4.576098468431166e-06, "loss": 18.9172, "step": 43490 }, { "epoch": 0.6858169893422463, "grad_norm": 78.13250678144718, "learning_rate": 4.571895914649965e-06, "loss": 18.3344, "step": 43500 }, { "epoch": 0.685974648420256, "grad_norm": 76.58311555484643, "learning_rate": 4.567694719574594e-06, "loss": 17.974, "step": 43510 }, { "epoch": 0.6861323074982657, "grad_norm": 76.95830098983966, "learning_rate": 4.5634948842566485e-06, "loss": 18.0988, "step": 43520 }, { "epoch": 0.6862899665762755, "grad_norm": 80.30839199415884, "learning_rate": 4.559296409747391e-06, "loss": 18.3108, "step": 43530 }, { "epoch": 0.6864476256542852, "grad_norm": 93.21826382487956, "learning_rate": 4.555099297097739e-06, "loss": 18.8486, "step": 43540 }, { "epoch": 0.6866052847322949, "grad_norm": 76.09315637748726, "learning_rate": 4.550903547358263e-06, "loss": 17.8891, "step": 43550 }, { "epoch": 0.6867629438103046, "grad_norm": 80.24420825435372, "learning_rate": 4.546709161579207e-06, "loss": 17.883, "step": 43560 }, { "epoch": 0.6869206028883144, "grad_norm": 77.8898870798222, "learning_rate": 4.542516140810458e-06, "loss": 18.586, "step": 43570 }, { "epoch": 0.687078261966324, "grad_norm": 73.8126074902395, "learning_rate": 4.5383244861015765e-06, "loss": 18.1092, "step": 43580 }, { "epoch": 0.6872359210443337, "grad_norm": 75.86751007534956, "learning_rate": 4.5341341985017655e-06, "loss": 18.009, "step": 43590 }, { "epoch": 0.6873935801223434, "grad_norm": 79.66946383979734, "learning_rate": 4.5299452790599005e-06, "loss": 18.5071, "step": 43600 }, { "epoch": 0.6875512392003531, "grad_norm": 77.41365593639496, "learning_rate": 4.525757728824502e-06, "loss": 17.8568, "step": 43610 }, { "epoch": 0.6877088982783629, "grad_norm": 77.2460537468097, "learning_rate": 4.521571548843755e-06, "loss": 17.8947, "step": 43620 }, { "epoch": 0.6878665573563726, "grad_norm": 74.87592632474814, "learning_rate": 4.517386740165502e-06, "loss": 17.4571, "step": 43630 }, { "epoch": 0.6880242164343823, "grad_norm": 76.83349939106445, "learning_rate": 4.513203303837242e-06, "loss": 18.0564, "step": 43640 }, { "epoch": 0.688181875512392, "grad_norm": 85.46800720851891, "learning_rate": 4.509021240906124e-06, "loss": 18.1794, "step": 43650 }, { "epoch": 0.6883395345904018, "grad_norm": 78.48519308541977, "learning_rate": 4.504840552418962e-06, "loss": 18.3425, "step": 43660 }, { "epoch": 0.6884971936684114, "grad_norm": 77.23476666704038, "learning_rate": 4.50066123942222e-06, "loss": 17.7626, "step": 43670 }, { "epoch": 0.6886548527464211, "grad_norm": 81.79554688472709, "learning_rate": 4.496483302962017e-06, "loss": 18.3146, "step": 43680 }, { "epoch": 0.6888125118244308, "grad_norm": 78.57811455479826, "learning_rate": 4.492306744084136e-06, "loss": 17.7322, "step": 43690 }, { "epoch": 0.6889701709024406, "grad_norm": 77.81657859402539, "learning_rate": 4.488131563834002e-06, "loss": 18.4083, "step": 43700 }, { "epoch": 0.6891278299804503, "grad_norm": 78.48647547444145, "learning_rate": 4.483957763256705e-06, "loss": 18.3541, "step": 43710 }, { "epoch": 0.68928548905846, "grad_norm": 74.22450539898414, "learning_rate": 4.479785343396989e-06, "loss": 17.821, "step": 43720 }, { "epoch": 0.6894431481364697, "grad_norm": 77.57432939766399, "learning_rate": 4.4756143052992505e-06, "loss": 18.4232, "step": 43730 }, { "epoch": 0.6896008072144794, "grad_norm": 81.33603097696552, "learning_rate": 4.471444650007536e-06, "loss": 18.1194, "step": 43740 }, { "epoch": 0.6897584662924892, "grad_norm": 77.89361253979179, "learning_rate": 4.467276378565555e-06, "loss": 17.9841, "step": 43750 }, { "epoch": 0.6899161253704988, "grad_norm": 80.53546772839144, "learning_rate": 4.463109492016658e-06, "loss": 18.5942, "step": 43760 }, { "epoch": 0.6900737844485085, "grad_norm": 77.94212980983123, "learning_rate": 4.458943991403866e-06, "loss": 17.8993, "step": 43770 }, { "epoch": 0.6902314435265182, "grad_norm": 80.59946726753392, "learning_rate": 4.454779877769832e-06, "loss": 17.7395, "step": 43780 }, { "epoch": 0.690389102604528, "grad_norm": 83.41968089387538, "learning_rate": 4.450617152156882e-06, "loss": 18.3692, "step": 43790 }, { "epoch": 0.6905467616825377, "grad_norm": 81.68523728130457, "learning_rate": 4.44645581560698e-06, "loss": 18.359, "step": 43800 }, { "epoch": 0.6907044207605474, "grad_norm": 76.87785495150906, "learning_rate": 4.442295869161748e-06, "loss": 18.1299, "step": 43810 }, { "epoch": 0.6908620798385571, "grad_norm": 78.61790901776462, "learning_rate": 4.438137313862468e-06, "loss": 18.5806, "step": 43820 }, { "epoch": 0.6910197389165668, "grad_norm": 79.74904587450914, "learning_rate": 4.433980150750055e-06, "loss": 18.3313, "step": 43830 }, { "epoch": 0.6911773979945766, "grad_norm": 79.85431513968453, "learning_rate": 4.4298243808650945e-06, "loss": 18.4729, "step": 43840 }, { "epoch": 0.6913350570725862, "grad_norm": 73.01813219823568, "learning_rate": 4.425670005247807e-06, "loss": 17.7912, "step": 43850 }, { "epoch": 0.6914927161505959, "grad_norm": 75.55694696244113, "learning_rate": 4.421517024938082e-06, "loss": 18.1764, "step": 43860 }, { "epoch": 0.6916503752286056, "grad_norm": 76.31506806636048, "learning_rate": 4.417365440975441e-06, "loss": 18.027, "step": 43870 }, { "epoch": 0.6918080343066154, "grad_norm": 76.26587494169367, "learning_rate": 4.41321525439907e-06, "loss": 17.7327, "step": 43880 }, { "epoch": 0.6919656933846251, "grad_norm": 78.149769111536, "learning_rate": 4.409066466247797e-06, "loss": 18.5932, "step": 43890 }, { "epoch": 0.6921233524626348, "grad_norm": 80.02057721741474, "learning_rate": 4.404919077560106e-06, "loss": 18.162, "step": 43900 }, { "epoch": 0.6922810115406445, "grad_norm": 76.09306850173692, "learning_rate": 4.400773089374123e-06, "loss": 18.3471, "step": 43910 }, { "epoch": 0.6924386706186543, "grad_norm": 78.51917990786686, "learning_rate": 4.39662850272763e-06, "loss": 17.9799, "step": 43920 }, { "epoch": 0.692596329696664, "grad_norm": 81.44143492306732, "learning_rate": 4.392485318658061e-06, "loss": 17.7787, "step": 43930 }, { "epoch": 0.6927539887746736, "grad_norm": 77.54106134904806, "learning_rate": 4.388343538202487e-06, "loss": 17.8929, "step": 43940 }, { "epoch": 0.6929116478526833, "grad_norm": 71.52322031518965, "learning_rate": 4.384203162397641e-06, "loss": 18.2954, "step": 43950 }, { "epoch": 0.693069306930693, "grad_norm": 77.35815030606038, "learning_rate": 4.380064192279892e-06, "loss": 18.2429, "step": 43960 }, { "epoch": 0.6932269660087028, "grad_norm": 75.02557070268635, "learning_rate": 4.375926628885271e-06, "loss": 18.5872, "step": 43970 }, { "epoch": 0.6933846250867125, "grad_norm": 76.98399369043823, "learning_rate": 4.371790473249441e-06, "loss": 18.0385, "step": 43980 }, { "epoch": 0.6935422841647222, "grad_norm": 80.53862184806283, "learning_rate": 4.367655726407729e-06, "loss": 18.4964, "step": 43990 }, { "epoch": 0.6936999432427319, "grad_norm": 78.9721932389912, "learning_rate": 4.363522389395094e-06, "loss": 18.1019, "step": 44000 }, { "epoch": 0.6938576023207417, "grad_norm": 88.34565242553448, "learning_rate": 4.359390463246151e-06, "loss": 18.6213, "step": 44010 }, { "epoch": 0.6940152613987514, "grad_norm": 77.0128211911323, "learning_rate": 4.355259948995163e-06, "loss": 17.9213, "step": 44020 }, { "epoch": 0.694172920476761, "grad_norm": 83.27388769369972, "learning_rate": 4.351130847676039e-06, "loss": 19.0561, "step": 44030 }, { "epoch": 0.6943305795547707, "grad_norm": 76.27395045078121, "learning_rate": 4.347003160322326e-06, "loss": 17.9993, "step": 44040 }, { "epoch": 0.6944882386327805, "grad_norm": 82.25922483711228, "learning_rate": 4.342876887967229e-06, "loss": 17.8089, "step": 44050 }, { "epoch": 0.6946458977107902, "grad_norm": 78.2495725526461, "learning_rate": 4.338752031643589e-06, "loss": 18.2053, "step": 44060 }, { "epoch": 0.6948035567887999, "grad_norm": 77.70532718187924, "learning_rate": 4.334628592383895e-06, "loss": 18.2413, "step": 44070 }, { "epoch": 0.6949612158668096, "grad_norm": 79.85591894465243, "learning_rate": 4.33050657122029e-06, "loss": 18.3559, "step": 44080 }, { "epoch": 0.6951188749448193, "grad_norm": 81.72367906048633, "learning_rate": 4.326385969184546e-06, "loss": 17.7059, "step": 44090 }, { "epoch": 0.6952765340228291, "grad_norm": 73.51170800748794, "learning_rate": 4.3222667873080955e-06, "loss": 18.3437, "step": 44100 }, { "epoch": 0.6954341931008388, "grad_norm": 79.67359527472709, "learning_rate": 4.318149026622004e-06, "loss": 17.7441, "step": 44110 }, { "epoch": 0.6955918521788484, "grad_norm": 79.67886298757097, "learning_rate": 4.314032688156987e-06, "loss": 18.1742, "step": 44120 }, { "epoch": 0.6957495112568581, "grad_norm": 75.03312181965966, "learning_rate": 4.309917772943406e-06, "loss": 17.4269, "step": 44130 }, { "epoch": 0.6959071703348679, "grad_norm": 78.21126540043512, "learning_rate": 4.305804282011263e-06, "loss": 17.5619, "step": 44140 }, { "epoch": 0.6960648294128776, "grad_norm": 74.4830845572072, "learning_rate": 4.3016922163902005e-06, "loss": 17.9318, "step": 44150 }, { "epoch": 0.6962224884908873, "grad_norm": 80.7285279804033, "learning_rate": 4.297581577109512e-06, "loss": 17.7531, "step": 44160 }, { "epoch": 0.696380147568897, "grad_norm": 81.01724992266165, "learning_rate": 4.293472365198123e-06, "loss": 18.1001, "step": 44170 }, { "epoch": 0.6965378066469067, "grad_norm": 77.7409830035104, "learning_rate": 4.289364581684615e-06, "loss": 17.9455, "step": 44180 }, { "epoch": 0.6966954657249165, "grad_norm": 76.95970955752128, "learning_rate": 4.285258227597202e-06, "loss": 17.4794, "step": 44190 }, { "epoch": 0.6968531248029262, "grad_norm": 79.57175096352722, "learning_rate": 4.28115330396374e-06, "loss": 18.5789, "step": 44200 }, { "epoch": 0.6970107838809358, "grad_norm": 75.3976459323838, "learning_rate": 4.277049811811732e-06, "loss": 18.0787, "step": 44210 }, { "epoch": 0.6971684429589455, "grad_norm": 83.35073092686116, "learning_rate": 4.272947752168323e-06, "loss": 18.2985, "step": 44220 }, { "epoch": 0.6973261020369553, "grad_norm": 78.7877935331083, "learning_rate": 4.2688471260603e-06, "loss": 18.1432, "step": 44230 }, { "epoch": 0.697483761114965, "grad_norm": 78.50833466274767, "learning_rate": 4.264747934514082e-06, "loss": 17.6257, "step": 44240 }, { "epoch": 0.6976414201929747, "grad_norm": 77.76981242893841, "learning_rate": 4.260650178555742e-06, "loss": 17.7201, "step": 44250 }, { "epoch": 0.6977990792709844, "grad_norm": 78.18203169081826, "learning_rate": 4.256553859210979e-06, "loss": 17.7144, "step": 44260 }, { "epoch": 0.6979567383489942, "grad_norm": 77.54662150323995, "learning_rate": 4.252458977505149e-06, "loss": 18.3484, "step": 44270 }, { "epoch": 0.6981143974270039, "grad_norm": 86.07771423022963, "learning_rate": 4.248365534463233e-06, "loss": 18.014, "step": 44280 }, { "epoch": 0.6982720565050136, "grad_norm": 76.67795950784979, "learning_rate": 4.244273531109865e-06, "loss": 18.1569, "step": 44290 }, { "epoch": 0.6984297155830232, "grad_norm": 81.18017569217284, "learning_rate": 4.240182968469305e-06, "loss": 17.9129, "step": 44300 }, { "epoch": 0.6985873746610329, "grad_norm": 75.60477112685197, "learning_rate": 4.2360938475654625e-06, "loss": 17.7307, "step": 44310 }, { "epoch": 0.6987450337390427, "grad_norm": 80.06256300048655, "learning_rate": 4.232006169421887e-06, "loss": 17.8978, "step": 44320 }, { "epoch": 0.6989026928170524, "grad_norm": 81.86407073765359, "learning_rate": 4.227919935061757e-06, "loss": 17.8425, "step": 44330 }, { "epoch": 0.6990603518950621, "grad_norm": 80.02874681329631, "learning_rate": 4.2238351455079005e-06, "loss": 18.0537, "step": 44340 }, { "epoch": 0.6992180109730718, "grad_norm": 78.28076190302737, "learning_rate": 4.219751801782775e-06, "loss": 18.3099, "step": 44350 }, { "epoch": 0.6993756700510816, "grad_norm": 76.92574692904982, "learning_rate": 4.215669904908485e-06, "loss": 17.4811, "step": 44360 }, { "epoch": 0.6995333291290913, "grad_norm": 78.06141510420797, "learning_rate": 4.211589455906761e-06, "loss": 17.8035, "step": 44370 }, { "epoch": 0.699690988207101, "grad_norm": 77.98531517567066, "learning_rate": 4.207510455798985e-06, "loss": 18.0124, "step": 44380 }, { "epoch": 0.6998486472851106, "grad_norm": 80.26375693539875, "learning_rate": 4.203432905606164e-06, "loss": 18.1169, "step": 44390 }, { "epoch": 0.7000063063631203, "grad_norm": 86.9259227482584, "learning_rate": 4.199356806348952e-06, "loss": 18.0026, "step": 44400 }, { "epoch": 0.7001639654411301, "grad_norm": 190.28893974147465, "learning_rate": 4.195282159047629e-06, "loss": 18.5602, "step": 44410 }, { "epoch": 0.7003216245191398, "grad_norm": 77.09643358709918, "learning_rate": 4.191208964722122e-06, "loss": 18.0915, "step": 44420 }, { "epoch": 0.7004792835971495, "grad_norm": 81.30523970965304, "learning_rate": 4.18713722439199e-06, "loss": 18.0498, "step": 44430 }, { "epoch": 0.7006369426751592, "grad_norm": 84.65633414044859, "learning_rate": 4.183066939076432e-06, "loss": 18.2186, "step": 44440 }, { "epoch": 0.700794601753169, "grad_norm": 78.90816254316957, "learning_rate": 4.178998109794274e-06, "loss": 17.6231, "step": 44450 }, { "epoch": 0.7009522608311787, "grad_norm": 76.02310300030405, "learning_rate": 4.1749307375639796e-06, "loss": 17.7917, "step": 44460 }, { "epoch": 0.7011099199091884, "grad_norm": 72.95267266424969, "learning_rate": 4.170864823403659e-06, "loss": 17.9584, "step": 44470 }, { "epoch": 0.701267578987198, "grad_norm": 74.63926123127482, "learning_rate": 4.166800368331038e-06, "loss": 17.4536, "step": 44480 }, { "epoch": 0.7014252380652078, "grad_norm": 79.22016439158017, "learning_rate": 4.1627373733635e-06, "loss": 17.707, "step": 44490 }, { "epoch": 0.7015828971432175, "grad_norm": 74.46881355489893, "learning_rate": 4.158675839518042e-06, "loss": 17.6688, "step": 44500 }, { "epoch": 0.7017405562212272, "grad_norm": 83.94322306108042, "learning_rate": 4.154615767811308e-06, "loss": 18.5892, "step": 44510 }, { "epoch": 0.7018982152992369, "grad_norm": 76.7465016793839, "learning_rate": 4.1505571592595705e-06, "loss": 17.6715, "step": 44520 }, { "epoch": 0.7020558743772466, "grad_norm": 75.75998085640246, "learning_rate": 4.1465000148787445e-06, "loss": 17.7418, "step": 44530 }, { "epoch": 0.7022135334552564, "grad_norm": 77.20446381614747, "learning_rate": 4.142444335684363e-06, "loss": 18.3161, "step": 44540 }, { "epoch": 0.7023711925332661, "grad_norm": 98.47060705280012, "learning_rate": 4.138390122691607e-06, "loss": 17.9582, "step": 44550 }, { "epoch": 0.7025288516112758, "grad_norm": 78.55918729856252, "learning_rate": 4.134337376915278e-06, "loss": 17.1306, "step": 44560 }, { "epoch": 0.7026865106892854, "grad_norm": 76.09212683785505, "learning_rate": 4.130286099369825e-06, "loss": 17.5074, "step": 44570 }, { "epoch": 0.7028441697672952, "grad_norm": 75.23974954296624, "learning_rate": 4.1262362910693136e-06, "loss": 17.8888, "step": 44580 }, { "epoch": 0.7030018288453049, "grad_norm": 83.24042468895935, "learning_rate": 4.122187953027455e-06, "loss": 18.444, "step": 44590 }, { "epoch": 0.7031594879233146, "grad_norm": 76.56462852952549, "learning_rate": 4.118141086257579e-06, "loss": 17.6754, "step": 44600 }, { "epoch": 0.7033171470013243, "grad_norm": 76.39344860652699, "learning_rate": 4.114095691772664e-06, "loss": 17.8791, "step": 44610 }, { "epoch": 0.7034748060793341, "grad_norm": 80.60852299878167, "learning_rate": 4.1100517705853015e-06, "loss": 18.3913, "step": 44620 }, { "epoch": 0.7036324651573438, "grad_norm": 74.04325877482111, "learning_rate": 4.106009323707727e-06, "loss": 18.4226, "step": 44630 }, { "epoch": 0.7037901242353535, "grad_norm": 79.30984721719956, "learning_rate": 4.101968352151808e-06, "loss": 17.5921, "step": 44640 }, { "epoch": 0.7039477833133632, "grad_norm": 79.29389816240462, "learning_rate": 4.097928856929031e-06, "loss": 18.1882, "step": 44650 }, { "epoch": 0.7041054423913728, "grad_norm": 77.74029052118323, "learning_rate": 4.093890839050525e-06, "loss": 17.754, "step": 44660 }, { "epoch": 0.7042631014693826, "grad_norm": 87.50000398695906, "learning_rate": 4.089854299527038e-06, "loss": 18.5087, "step": 44670 }, { "epoch": 0.7044207605473923, "grad_norm": 75.53092077643757, "learning_rate": 4.085819239368962e-06, "loss": 17.4692, "step": 44680 }, { "epoch": 0.704578419625402, "grad_norm": 78.0635069151504, "learning_rate": 4.081785659586303e-06, "loss": 17.296, "step": 44690 }, { "epoch": 0.7047360787034117, "grad_norm": 74.19025314530344, "learning_rate": 4.077753561188711e-06, "loss": 17.9986, "step": 44700 }, { "epoch": 0.7048937377814215, "grad_norm": 75.57107521583939, "learning_rate": 4.073722945185451e-06, "loss": 17.9162, "step": 44710 }, { "epoch": 0.7050513968594312, "grad_norm": 78.15770794766954, "learning_rate": 4.069693812585428e-06, "loss": 17.8335, "step": 44720 }, { "epoch": 0.7052090559374409, "grad_norm": 77.76703934874142, "learning_rate": 4.065666164397176e-06, "loss": 18.4792, "step": 44730 }, { "epoch": 0.7053667150154506, "grad_norm": 83.21407706601862, "learning_rate": 4.061640001628847e-06, "loss": 18.8244, "step": 44740 }, { "epoch": 0.7055243740934602, "grad_norm": 74.389529061714, "learning_rate": 4.057615325288235e-06, "loss": 17.801, "step": 44750 }, { "epoch": 0.70568203317147, "grad_norm": 78.71751339392775, "learning_rate": 4.0535921363827444e-06, "loss": 18.071, "step": 44760 }, { "epoch": 0.7058396922494797, "grad_norm": 75.07187069484932, "learning_rate": 4.049570435919428e-06, "loss": 17.6632, "step": 44770 }, { "epoch": 0.7059973513274894, "grad_norm": 80.54554442231816, "learning_rate": 4.045550224904947e-06, "loss": 17.9131, "step": 44780 }, { "epoch": 0.7061550104054991, "grad_norm": 76.65040782743489, "learning_rate": 4.041531504345605e-06, "loss": 17.4943, "step": 44790 }, { "epoch": 0.7063126694835089, "grad_norm": 76.57026978234654, "learning_rate": 4.037514275247319e-06, "loss": 17.8686, "step": 44800 }, { "epoch": 0.7064703285615186, "grad_norm": 76.04760457991677, "learning_rate": 4.033498538615645e-06, "loss": 17.9995, "step": 44810 }, { "epoch": 0.7066279876395283, "grad_norm": 79.05376455076058, "learning_rate": 4.029484295455756e-06, "loss": 18.28, "step": 44820 }, { "epoch": 0.706785646717538, "grad_norm": 75.55779853171369, "learning_rate": 4.025471546772462e-06, "loss": 18.3816, "step": 44830 }, { "epoch": 0.7069433057955478, "grad_norm": 75.62242161887387, "learning_rate": 4.021460293570182e-06, "loss": 17.3259, "step": 44840 }, { "epoch": 0.7071009648735574, "grad_norm": 76.440355259804, "learning_rate": 4.01745053685298e-06, "loss": 18.011, "step": 44850 }, { "epoch": 0.7072586239515671, "grad_norm": 77.41178047358297, "learning_rate": 4.013442277624532e-06, "loss": 17.8877, "step": 44860 }, { "epoch": 0.7074162830295768, "grad_norm": 78.7730675704909, "learning_rate": 4.009435516888139e-06, "loss": 17.6813, "step": 44870 }, { "epoch": 0.7075739421075865, "grad_norm": 76.16444708994753, "learning_rate": 4.005430255646738e-06, "loss": 17.7833, "step": 44880 }, { "epoch": 0.7077316011855963, "grad_norm": 735.0250330915158, "learning_rate": 4.001426494902879e-06, "loss": 18.3259, "step": 44890 }, { "epoch": 0.707889260263606, "grad_norm": 79.53009970090329, "learning_rate": 3.997424235658747e-06, "loss": 17.6943, "step": 44900 }, { "epoch": 0.7080469193416157, "grad_norm": 85.80728433922285, "learning_rate": 3.993423478916136e-06, "loss": 17.8537, "step": 44910 }, { "epoch": 0.7082045784196254, "grad_norm": 73.6716418605857, "learning_rate": 3.989424225676481e-06, "loss": 17.8453, "step": 44920 }, { "epoch": 0.7083622374976352, "grad_norm": 73.32179738744918, "learning_rate": 3.985426476940828e-06, "loss": 17.6657, "step": 44930 }, { "epoch": 0.7085198965756448, "grad_norm": 76.91354663295337, "learning_rate": 3.98143023370986e-06, "loss": 17.6759, "step": 44940 }, { "epoch": 0.7086775556536545, "grad_norm": 78.43135714505509, "learning_rate": 3.977435496983864e-06, "loss": 17.8611, "step": 44950 }, { "epoch": 0.7088352147316642, "grad_norm": 79.55887109884068, "learning_rate": 3.9734422677627685e-06, "loss": 18.1337, "step": 44960 }, { "epoch": 0.7089928738096739, "grad_norm": 72.98447942692361, "learning_rate": 3.96945054704611e-06, "loss": 17.5756, "step": 44970 }, { "epoch": 0.7091505328876837, "grad_norm": 82.02940951781201, "learning_rate": 3.9654603358330605e-06, "loss": 17.2986, "step": 44980 }, { "epoch": 0.7093081919656934, "grad_norm": 77.14814361967788, "learning_rate": 3.961471635122404e-06, "loss": 17.7458, "step": 44990 }, { "epoch": 0.7094658510437031, "grad_norm": 77.34992065076574, "learning_rate": 3.957484445912546e-06, "loss": 17.7005, "step": 45000 }, { "epoch": 0.7096235101217128, "grad_norm": 83.83035186168291, "learning_rate": 3.953498769201522e-06, "loss": 18.0487, "step": 45010 }, { "epoch": 0.7097811691997226, "grad_norm": 77.49473682704561, "learning_rate": 3.949514605986985e-06, "loss": 17.9678, "step": 45020 }, { "epoch": 0.7099388282777322, "grad_norm": 77.66372188159689, "learning_rate": 3.945531957266212e-06, "loss": 17.9943, "step": 45030 }, { "epoch": 0.7100964873557419, "grad_norm": 79.82560183452172, "learning_rate": 3.94155082403609e-06, "loss": 18.1765, "step": 45040 }, { "epoch": 0.7102541464337516, "grad_norm": 77.80284585047164, "learning_rate": 3.9375712072931425e-06, "loss": 17.1922, "step": 45050 }, { "epoch": 0.7104118055117614, "grad_norm": 79.58230177037183, "learning_rate": 3.933593108033497e-06, "loss": 18.5138, "step": 45060 }, { "epoch": 0.7105694645897711, "grad_norm": 77.67331561138361, "learning_rate": 3.929616527252916e-06, "loss": 17.5703, "step": 45070 }, { "epoch": 0.7107271236677808, "grad_norm": 79.17878081564999, "learning_rate": 3.925641465946771e-06, "loss": 17.4822, "step": 45080 }, { "epoch": 0.7108847827457905, "grad_norm": 80.11368557611328, "learning_rate": 3.921667925110062e-06, "loss": 17.9635, "step": 45090 }, { "epoch": 0.7110424418238002, "grad_norm": 79.28530135545645, "learning_rate": 3.917695905737396e-06, "loss": 17.9226, "step": 45100 }, { "epoch": 0.71120010090181, "grad_norm": 79.38054159074855, "learning_rate": 3.913725408823014e-06, "loss": 17.7921, "step": 45110 }, { "epoch": 0.7113577599798196, "grad_norm": 78.0941771899219, "learning_rate": 3.90975643536077e-06, "loss": 17.8014, "step": 45120 }, { "epoch": 0.7115154190578293, "grad_norm": 74.7040277071064, "learning_rate": 3.905788986344129e-06, "loss": 17.7304, "step": 45130 }, { "epoch": 0.711673078135839, "grad_norm": 79.87505804096698, "learning_rate": 3.901823062766189e-06, "loss": 17.7178, "step": 45140 }, { "epoch": 0.7118307372138488, "grad_norm": 76.22344249404979, "learning_rate": 3.897858665619649e-06, "loss": 17.7987, "step": 45150 }, { "epoch": 0.7119883962918585, "grad_norm": 78.41319479711808, "learning_rate": 3.893895795896845e-06, "loss": 17.2214, "step": 45160 }, { "epoch": 0.7121460553698682, "grad_norm": 75.72075250327171, "learning_rate": 3.889934454589711e-06, "loss": 17.7898, "step": 45170 }, { "epoch": 0.7123037144478779, "grad_norm": 73.99789720638098, "learning_rate": 3.885974642689817e-06, "loss": 17.7906, "step": 45180 }, { "epoch": 0.7124613735258877, "grad_norm": 79.33778960936209, "learning_rate": 3.882016361188334e-06, "loss": 18.0201, "step": 45190 }, { "epoch": 0.7126190326038974, "grad_norm": 76.48832722465052, "learning_rate": 3.878059611076065e-06, "loss": 17.701, "step": 45200 }, { "epoch": 0.712776691681907, "grad_norm": 75.65673936644922, "learning_rate": 3.874104393343414e-06, "loss": 17.6968, "step": 45210 }, { "epoch": 0.7129343507599167, "grad_norm": 82.13336865918566, "learning_rate": 3.870150708980413e-06, "loss": 18.0782, "step": 45220 }, { "epoch": 0.7130920098379264, "grad_norm": 80.92322990107573, "learning_rate": 3.866198558976707e-06, "loss": 17.5073, "step": 45230 }, { "epoch": 0.7132496689159362, "grad_norm": 73.79183120152223, "learning_rate": 3.86224794432156e-06, "loss": 17.3851, "step": 45240 }, { "epoch": 0.7134073279939459, "grad_norm": 74.65830540331352, "learning_rate": 3.858298866003844e-06, "loss": 17.6771, "step": 45250 }, { "epoch": 0.7135649870719556, "grad_norm": 74.69791875931955, "learning_rate": 3.854351325012048e-06, "loss": 18.0077, "step": 45260 }, { "epoch": 0.7137226461499653, "grad_norm": 75.24769070468464, "learning_rate": 3.850405322334287e-06, "loss": 17.9397, "step": 45270 }, { "epoch": 0.7138803052279751, "grad_norm": 77.16449201510245, "learning_rate": 3.846460858958272e-06, "loss": 17.4048, "step": 45280 }, { "epoch": 0.7140379643059848, "grad_norm": 73.42202028652265, "learning_rate": 3.842517935871351e-06, "loss": 16.8268, "step": 45290 }, { "epoch": 0.7141956233839944, "grad_norm": 76.83412384621698, "learning_rate": 3.838576554060465e-06, "loss": 18.3545, "step": 45300 }, { "epoch": 0.7143532824620041, "grad_norm": 78.7119367546208, "learning_rate": 3.834636714512184e-06, "loss": 17.5636, "step": 45310 }, { "epoch": 0.7145109415400138, "grad_norm": 83.98870008441418, "learning_rate": 3.830698418212687e-06, "loss": 18.3292, "step": 45320 }, { "epoch": 0.7146686006180236, "grad_norm": 84.71870324272265, "learning_rate": 3.82676166614777e-06, "loss": 17.6019, "step": 45330 }, { "epoch": 0.7148262596960333, "grad_norm": 75.74389524068478, "learning_rate": 3.822826459302832e-06, "loss": 17.735, "step": 45340 }, { "epoch": 0.714983918774043, "grad_norm": 79.19965894615967, "learning_rate": 3.818892798662901e-06, "loss": 17.8603, "step": 45350 }, { "epoch": 0.7151415778520527, "grad_norm": 75.61441234599364, "learning_rate": 3.8149606852126005e-06, "loss": 18.0005, "step": 45360 }, { "epoch": 0.7152992369300625, "grad_norm": 78.31440473691046, "learning_rate": 3.811030119936184e-06, "loss": 17.3486, "step": 45370 }, { "epoch": 0.7154568960080722, "grad_norm": 73.43574969126016, "learning_rate": 3.8071011038175056e-06, "loss": 16.9545, "step": 45380 }, { "epoch": 0.7156145550860818, "grad_norm": 72.9875672267476, "learning_rate": 3.8031736378400308e-06, "loss": 17.1505, "step": 45390 }, { "epoch": 0.7157722141640915, "grad_norm": 79.12714273569958, "learning_rate": 3.7992477229868464e-06, "loss": 17.95, "step": 45400 }, { "epoch": 0.7159298732421013, "grad_norm": 75.07242738280871, "learning_rate": 3.795323360240648e-06, "loss": 17.7568, "step": 45410 }, { "epoch": 0.716087532320111, "grad_norm": 77.90209087276915, "learning_rate": 3.7914005505837348e-06, "loss": 17.7656, "step": 45420 }, { "epoch": 0.7162451913981207, "grad_norm": 75.24186092199058, "learning_rate": 3.7874792949980265e-06, "loss": 17.6669, "step": 45430 }, { "epoch": 0.7164028504761304, "grad_norm": 83.492445962183, "learning_rate": 3.783559594465055e-06, "loss": 17.6812, "step": 45440 }, { "epoch": 0.7165605095541401, "grad_norm": 81.43909570693226, "learning_rate": 3.7796414499659484e-06, "loss": 17.3531, "step": 45450 }, { "epoch": 0.7167181686321499, "grad_norm": 75.40842569829174, "learning_rate": 3.7757248624814657e-06, "loss": 18.0285, "step": 45460 }, { "epoch": 0.7168758277101596, "grad_norm": 75.56190543951858, "learning_rate": 3.7718098329919574e-06, "loss": 17.2678, "step": 45470 }, { "epoch": 0.7170334867881692, "grad_norm": 81.69994892664454, "learning_rate": 3.767896362477399e-06, "loss": 17.4745, "step": 45480 }, { "epoch": 0.7171911458661789, "grad_norm": 76.20582785316407, "learning_rate": 3.763984451917363e-06, "loss": 17.2137, "step": 45490 }, { "epoch": 0.7173488049441887, "grad_norm": 85.06538741551125, "learning_rate": 3.7600741022910436e-06, "loss": 17.9969, "step": 45500 }, { "epoch": 0.7175064640221984, "grad_norm": 73.45056262661379, "learning_rate": 3.7561653145772325e-06, "loss": 17.5912, "step": 45510 }, { "epoch": 0.7176641231002081, "grad_norm": 75.71094534715175, "learning_rate": 3.7522580897543404e-06, "loss": 17.0701, "step": 45520 }, { "epoch": 0.7178217821782178, "grad_norm": 78.58844494203757, "learning_rate": 3.7483524288003837e-06, "loss": 17.3448, "step": 45530 }, { "epoch": 0.7179794412562275, "grad_norm": 75.03211664390847, "learning_rate": 3.74444833269298e-06, "loss": 17.9302, "step": 45540 }, { "epoch": 0.7181371003342373, "grad_norm": 73.79336194829021, "learning_rate": 3.740545802409371e-06, "loss": 17.2152, "step": 45550 }, { "epoch": 0.718294759412247, "grad_norm": 79.45549240305924, "learning_rate": 3.736644838926388e-06, "loss": 17.8319, "step": 45560 }, { "epoch": 0.7184524184902567, "grad_norm": 75.06358980437085, "learning_rate": 3.7327454432204844e-06, "loss": 17.0793, "step": 45570 }, { "epoch": 0.7186100775682663, "grad_norm": 78.35908955732144, "learning_rate": 3.728847616267712e-06, "loss": 17.5338, "step": 45580 }, { "epoch": 0.7187677366462761, "grad_norm": 74.81972191917009, "learning_rate": 3.7249513590437393e-06, "loss": 17.2631, "step": 45590 }, { "epoch": 0.7189253957242858, "grad_norm": 75.32648077174039, "learning_rate": 3.7210566725238295e-06, "loss": 17.7299, "step": 45600 }, { "epoch": 0.7190830548022955, "grad_norm": 82.37202933366488, "learning_rate": 3.7171635576828636e-06, "loss": 17.054, "step": 45610 }, { "epoch": 0.7192407138803052, "grad_norm": 76.01642225729834, "learning_rate": 3.713272015495324e-06, "loss": 17.4827, "step": 45620 }, { "epoch": 0.719398372958315, "grad_norm": 77.71445375595253, "learning_rate": 3.709382046935304e-06, "loss": 17.8101, "step": 45630 }, { "epoch": 0.7195560320363247, "grad_norm": 79.67409052660547, "learning_rate": 3.7054936529764975e-06, "loss": 18.0144, "step": 45640 }, { "epoch": 0.7197136911143344, "grad_norm": 83.74865060643835, "learning_rate": 3.701606834592201e-06, "loss": 17.2809, "step": 45650 }, { "epoch": 0.719871350192344, "grad_norm": 78.6897993791136, "learning_rate": 3.69772159275533e-06, "loss": 18.1649, "step": 45660 }, { "epoch": 0.7200290092703537, "grad_norm": 76.7228860281304, "learning_rate": 3.6938379284383896e-06, "loss": 17.6718, "step": 45670 }, { "epoch": 0.7201866683483635, "grad_norm": 79.57322156247078, "learning_rate": 3.6899558426135063e-06, "loss": 17.6653, "step": 45680 }, { "epoch": 0.7203443274263732, "grad_norm": 81.24890603529398, "learning_rate": 3.6860753362523947e-06, "loss": 17.5242, "step": 45690 }, { "epoch": 0.7205019865043829, "grad_norm": 80.18636258246543, "learning_rate": 3.6821964103263897e-06, "loss": 18.2299, "step": 45700 }, { "epoch": 0.7206596455823926, "grad_norm": 74.97539345776552, "learning_rate": 3.6783190658064148e-06, "loss": 17.2772, "step": 45710 }, { "epoch": 0.7208173046604024, "grad_norm": 79.38098544820637, "learning_rate": 3.6744433036630125e-06, "loss": 17.6648, "step": 45720 }, { "epoch": 0.7209749637384121, "grad_norm": 81.01245169506964, "learning_rate": 3.670569124866319e-06, "loss": 17.7477, "step": 45730 }, { "epoch": 0.7211326228164218, "grad_norm": 78.48546288774637, "learning_rate": 3.6666965303860847e-06, "loss": 17.7146, "step": 45740 }, { "epoch": 0.7212902818944315, "grad_norm": 78.59237903875271, "learning_rate": 3.6628255211916476e-06, "loss": 17.7721, "step": 45750 }, { "epoch": 0.7214479409724412, "grad_norm": 74.80125573199214, "learning_rate": 3.6589560982519654e-06, "loss": 17.6112, "step": 45760 }, { "epoch": 0.7216056000504509, "grad_norm": 94.7617898755612, "learning_rate": 3.6550882625355856e-06, "loss": 17.2214, "step": 45770 }, { "epoch": 0.7217632591284606, "grad_norm": 73.62239249198278, "learning_rate": 3.651222015010669e-06, "loss": 17.5294, "step": 45780 }, { "epoch": 0.7219209182064703, "grad_norm": 74.51111017040971, "learning_rate": 3.6473573566449714e-06, "loss": 18.1366, "step": 45790 }, { "epoch": 0.72207857728448, "grad_norm": 76.80279942643938, "learning_rate": 3.64349428840585e-06, "loss": 17.1415, "step": 45800 }, { "epoch": 0.7222362363624898, "grad_norm": 73.70647645437887, "learning_rate": 3.639632811260272e-06, "loss": 17.2716, "step": 45810 }, { "epoch": 0.7223938954404995, "grad_norm": 74.45199489343354, "learning_rate": 3.6357729261747986e-06, "loss": 17.7408, "step": 45820 }, { "epoch": 0.7225515545185092, "grad_norm": 75.93632023162647, "learning_rate": 3.631914634115602e-06, "loss": 17.5569, "step": 45830 }, { "epoch": 0.7227092135965189, "grad_norm": 74.96635480584375, "learning_rate": 3.6280579360484405e-06, "loss": 17.7728, "step": 45840 }, { "epoch": 0.7228668726745286, "grad_norm": 77.65715110163711, "learning_rate": 3.6242028329386905e-06, "loss": 17.244, "step": 45850 }, { "epoch": 0.7230245317525383, "grad_norm": 79.15382618000719, "learning_rate": 3.6203493257513123e-06, "loss": 17.619, "step": 45860 }, { "epoch": 0.723182190830548, "grad_norm": 79.10051040317295, "learning_rate": 3.6164974154508837e-06, "loss": 17.44, "step": 45870 }, { "epoch": 0.7233398499085577, "grad_norm": 77.09626660658616, "learning_rate": 3.6126471030015664e-06, "loss": 17.028, "step": 45880 }, { "epoch": 0.7234975089865674, "grad_norm": 83.12943706378073, "learning_rate": 3.6087983893671363e-06, "loss": 17.5896, "step": 45890 }, { "epoch": 0.7236551680645772, "grad_norm": 82.52764382587975, "learning_rate": 3.604951275510957e-06, "loss": 17.7068, "step": 45900 }, { "epoch": 0.7238128271425869, "grad_norm": 73.91487165773047, "learning_rate": 3.6011057623960024e-06, "loss": 17.21, "step": 45910 }, { "epoch": 0.7239704862205966, "grad_norm": 75.93039716102638, "learning_rate": 3.597261850984841e-06, "loss": 17.1824, "step": 45920 }, { "epoch": 0.7241281452986063, "grad_norm": 78.62043056346421, "learning_rate": 3.5934195422396355e-06, "loss": 17.3804, "step": 45930 }, { "epoch": 0.724285804376616, "grad_norm": 73.56008807733001, "learning_rate": 3.5895788371221584e-06, "loss": 17.4049, "step": 45940 }, { "epoch": 0.7244434634546257, "grad_norm": 92.39677753946293, "learning_rate": 3.5857397365937684e-06, "loss": 18.2091, "step": 45950 }, { "epoch": 0.7246011225326354, "grad_norm": 75.70996273181109, "learning_rate": 3.5819022416154337e-06, "loss": 16.9994, "step": 45960 }, { "epoch": 0.7247587816106451, "grad_norm": 74.24059494805411, "learning_rate": 3.578066353147711e-06, "loss": 17.6911, "step": 45970 }, { "epoch": 0.7249164406886549, "grad_norm": 76.92920847720636, "learning_rate": 3.574232072150765e-06, "loss": 17.8013, "step": 45980 }, { "epoch": 0.7250740997666646, "grad_norm": 79.0146585955057, "learning_rate": 3.5703993995843454e-06, "loss": 17.5809, "step": 45990 }, { "epoch": 0.7252317588446743, "grad_norm": 74.39716134963992, "learning_rate": 3.5665683364078152e-06, "loss": 17.162, "step": 46000 }, { "epoch": 0.725389417922684, "grad_norm": 74.00769320254064, "learning_rate": 3.5627388835801167e-06, "loss": 17.1251, "step": 46010 }, { "epoch": 0.7255470770006937, "grad_norm": 74.53396140930992, "learning_rate": 3.5589110420598026e-06, "loss": 17.3892, "step": 46020 }, { "epoch": 0.7257047360787034, "grad_norm": 77.06449768157056, "learning_rate": 3.5550848128050176e-06, "loss": 17.1696, "step": 46030 }, { "epoch": 0.7258623951567131, "grad_norm": 69.97506214309627, "learning_rate": 3.5512601967735073e-06, "loss": 16.8796, "step": 46040 }, { "epoch": 0.7260200542347228, "grad_norm": 76.03599118941561, "learning_rate": 3.547437194922605e-06, "loss": 17.1423, "step": 46050 }, { "epoch": 0.7261777133127325, "grad_norm": 74.32838292168286, "learning_rate": 3.543615808209241e-06, "loss": 17.6595, "step": 46060 }, { "epoch": 0.7263353723907423, "grad_norm": 74.1610747337476, "learning_rate": 3.539796037589951e-06, "loss": 17.1651, "step": 46070 }, { "epoch": 0.726493031468752, "grad_norm": 78.5439706984469, "learning_rate": 3.5359778840208547e-06, "loss": 17.5838, "step": 46080 }, { "epoch": 0.7266506905467617, "grad_norm": 76.20918915117863, "learning_rate": 3.5321613484576767e-06, "loss": 17.4235, "step": 46090 }, { "epoch": 0.7268083496247714, "grad_norm": 77.4516582557228, "learning_rate": 3.5283464318557258e-06, "loss": 17.6416, "step": 46100 }, { "epoch": 0.726966008702781, "grad_norm": 82.14612777404699, "learning_rate": 3.524533135169915e-06, "loss": 17.0785, "step": 46110 }, { "epoch": 0.7271236677807908, "grad_norm": 73.86739875774742, "learning_rate": 3.52072145935475e-06, "loss": 17.0867, "step": 46120 }, { "epoch": 0.7272813268588005, "grad_norm": 72.87084852407524, "learning_rate": 3.5169114053643317e-06, "loss": 16.8753, "step": 46130 }, { "epoch": 0.7274389859368102, "grad_norm": 77.71537824705526, "learning_rate": 3.513102974152346e-06, "loss": 16.9121, "step": 46140 }, { "epoch": 0.7275966450148199, "grad_norm": 78.06279055479595, "learning_rate": 3.509296166672085e-06, "loss": 17.4102, "step": 46150 }, { "epoch": 0.7277543040928297, "grad_norm": 75.95157058791771, "learning_rate": 3.505490983876423e-06, "loss": 17.9381, "step": 46160 }, { "epoch": 0.7279119631708394, "grad_norm": 77.16213803282282, "learning_rate": 3.5016874267178403e-06, "loss": 17.6601, "step": 46170 }, { "epoch": 0.7280696222488491, "grad_norm": 75.75405495700323, "learning_rate": 3.497885496148399e-06, "loss": 17.2194, "step": 46180 }, { "epoch": 0.7282272813268588, "grad_norm": 77.40850761495365, "learning_rate": 3.494085193119755e-06, "loss": 17.6896, "step": 46190 }, { "epoch": 0.7283849404048686, "grad_norm": 72.31749802140367, "learning_rate": 3.490286518583168e-06, "loss": 17.5767, "step": 46200 }, { "epoch": 0.7285425994828783, "grad_norm": 73.13046468491132, "learning_rate": 3.486489473489474e-06, "loss": 17.5412, "step": 46210 }, { "epoch": 0.7287002585608879, "grad_norm": 75.7261298814829, "learning_rate": 3.4826940587891132e-06, "loss": 17.6042, "step": 46220 }, { "epoch": 0.7288579176388976, "grad_norm": 78.44827784067046, "learning_rate": 3.4789002754321157e-06, "loss": 17.4987, "step": 46230 }, { "epoch": 0.7290155767169073, "grad_norm": 76.05235462685505, "learning_rate": 3.475108124368101e-06, "loss": 17.6059, "step": 46240 }, { "epoch": 0.7291732357949171, "grad_norm": 77.6470611409966, "learning_rate": 3.471317606546276e-06, "loss": 17.0073, "step": 46250 }, { "epoch": 0.7293308948729268, "grad_norm": 76.93075804402098, "learning_rate": 3.4675287229154495e-06, "loss": 17.6891, "step": 46260 }, { "epoch": 0.7294885539509365, "grad_norm": 75.5646201331903, "learning_rate": 3.4637414744240093e-06, "loss": 18.1243, "step": 46270 }, { "epoch": 0.7296462130289462, "grad_norm": 78.38183324417636, "learning_rate": 3.4599558620199448e-06, "loss": 17.658, "step": 46280 }, { "epoch": 0.729803872106956, "grad_norm": 74.62896934198945, "learning_rate": 3.4561718866508245e-06, "loss": 17.7201, "step": 46290 }, { "epoch": 0.7299615311849657, "grad_norm": 80.14365550916557, "learning_rate": 3.4523895492638193e-06, "loss": 17.3869, "step": 46300 }, { "epoch": 0.7301191902629753, "grad_norm": 75.82504646430313, "learning_rate": 3.4486088508056783e-06, "loss": 17.9231, "step": 46310 }, { "epoch": 0.730276849340985, "grad_norm": 75.88704521579454, "learning_rate": 3.444829792222749e-06, "loss": 17.5943, "step": 46320 }, { "epoch": 0.7304345084189948, "grad_norm": 82.52849720810447, "learning_rate": 3.4410523744609682e-06, "loss": 17.7402, "step": 46330 }, { "epoch": 0.7305921674970045, "grad_norm": 79.30267500361383, "learning_rate": 3.437276598465854e-06, "loss": 17.6396, "step": 46340 }, { "epoch": 0.7307498265750142, "grad_norm": 72.43622018514931, "learning_rate": 3.4335024651825244e-06, "loss": 17.6767, "step": 46350 }, { "epoch": 0.7309074856530239, "grad_norm": 80.22226233096046, "learning_rate": 3.4297299755556756e-06, "loss": 17.758, "step": 46360 }, { "epoch": 0.7310651447310336, "grad_norm": 71.18211955528295, "learning_rate": 3.4259591305296015e-06, "loss": 16.8222, "step": 46370 }, { "epoch": 0.7312228038090434, "grad_norm": 74.11848576699086, "learning_rate": 3.4221899310481767e-06, "loss": 16.8062, "step": 46380 }, { "epoch": 0.731380462887053, "grad_norm": 93.74057358151309, "learning_rate": 3.418422378054871e-06, "loss": 17.1577, "step": 46390 }, { "epoch": 0.7315381219650627, "grad_norm": 75.56487285621988, "learning_rate": 3.4146564724927345e-06, "loss": 17.1703, "step": 46400 }, { "epoch": 0.7316957810430724, "grad_norm": 80.55549782448904, "learning_rate": 3.410892215304411e-06, "loss": 17.0155, "step": 46410 }, { "epoch": 0.7318534401210822, "grad_norm": 76.2237583900672, "learning_rate": 3.4071296074321303e-06, "loss": 17.5889, "step": 46420 }, { "epoch": 0.7320110991990919, "grad_norm": 74.36936643533008, "learning_rate": 3.403368649817712e-06, "loss": 16.6681, "step": 46430 }, { "epoch": 0.7321687582771016, "grad_norm": 73.17968550883195, "learning_rate": 3.3996093434025555e-06, "loss": 17.5952, "step": 46440 }, { "epoch": 0.7323264173551113, "grad_norm": 72.42364336913815, "learning_rate": 3.3958516891276482e-06, "loss": 17.6135, "step": 46450 }, { "epoch": 0.732484076433121, "grad_norm": 79.13942210549205, "learning_rate": 3.3920956879335733e-06, "loss": 16.9123, "step": 46460 }, { "epoch": 0.7326417355111308, "grad_norm": 78.60007434289932, "learning_rate": 3.388341340760486e-06, "loss": 18.0921, "step": 46470 }, { "epoch": 0.7327993945891405, "grad_norm": 72.42601382102873, "learning_rate": 3.3845886485481418e-06, "loss": 16.666, "step": 46480 }, { "epoch": 0.7329570536671501, "grad_norm": 77.1105603051414, "learning_rate": 3.3808376122358677e-06, "loss": 16.8929, "step": 46490 }, { "epoch": 0.7331147127451598, "grad_norm": 71.51677296576801, "learning_rate": 3.377088232762592e-06, "loss": 16.9837, "step": 46500 }, { "epoch": 0.7332723718231696, "grad_norm": 83.02697368507465, "learning_rate": 3.3733405110668115e-06, "loss": 16.8511, "step": 46510 }, { "epoch": 0.7334300309011793, "grad_norm": 73.14356544422886, "learning_rate": 3.3695944480866215e-06, "loss": 17.8105, "step": 46520 }, { "epoch": 0.733587689979189, "grad_norm": 77.39171847281101, "learning_rate": 3.3658500447596965e-06, "loss": 17.5252, "step": 46530 }, { "epoch": 0.7337453490571987, "grad_norm": 81.526108176031, "learning_rate": 3.3621073020232976e-06, "loss": 17.0783, "step": 46540 }, { "epoch": 0.7339030081352085, "grad_norm": 74.68675012451243, "learning_rate": 3.3583662208142644e-06, "loss": 17.6372, "step": 46550 }, { "epoch": 0.7340606672132182, "grad_norm": 76.74696920418738, "learning_rate": 3.3546268020690307e-06, "loss": 16.9094, "step": 46560 }, { "epoch": 0.7342183262912279, "grad_norm": 73.88925526920981, "learning_rate": 3.3508890467236044e-06, "loss": 17.1554, "step": 46570 }, { "epoch": 0.7343759853692375, "grad_norm": 72.38677128279241, "learning_rate": 3.3471529557135783e-06, "loss": 17.2285, "step": 46580 }, { "epoch": 0.7345336444472472, "grad_norm": 80.22764391915227, "learning_rate": 3.3434185299741385e-06, "loss": 17.4835, "step": 46590 }, { "epoch": 0.734691303525257, "grad_norm": 77.07328642265259, "learning_rate": 3.3396857704400398e-06, "loss": 17.4645, "step": 46600 }, { "epoch": 0.7348489626032667, "grad_norm": 77.02477835521987, "learning_rate": 3.335954678045631e-06, "loss": 16.9096, "step": 46610 }, { "epoch": 0.7350066216812764, "grad_norm": 80.48523702033715, "learning_rate": 3.33222525372484e-06, "loss": 17.2785, "step": 46620 }, { "epoch": 0.7351642807592861, "grad_norm": 79.5666409839509, "learning_rate": 3.3284974984111794e-06, "loss": 17.2326, "step": 46630 }, { "epoch": 0.7353219398372959, "grad_norm": 75.92174862482257, "learning_rate": 3.324771413037735e-06, "loss": 16.6366, "step": 46640 }, { "epoch": 0.7354795989153056, "grad_norm": 77.35194883039182, "learning_rate": 3.3210469985371896e-06, "loss": 17.3117, "step": 46650 }, { "epoch": 0.7356372579933153, "grad_norm": 72.1671628319615, "learning_rate": 3.3173242558417906e-06, "loss": 17.4262, "step": 46660 }, { "epoch": 0.7357949170713249, "grad_norm": 77.6129986505995, "learning_rate": 3.3136031858833837e-06, "loss": 16.9964, "step": 46670 }, { "epoch": 0.7359525761493346, "grad_norm": 77.3832938410571, "learning_rate": 3.309883789593381e-06, "loss": 16.9617, "step": 46680 }, { "epoch": 0.7361102352273444, "grad_norm": 75.582226543466, "learning_rate": 3.3061660679027885e-06, "loss": 17.5655, "step": 46690 }, { "epoch": 0.7362678943053541, "grad_norm": 75.15977998110937, "learning_rate": 3.3024500217421817e-06, "loss": 17.2074, "step": 46700 }, { "epoch": 0.7364255533833638, "grad_norm": 72.03711389344272, "learning_rate": 3.298735652041728e-06, "loss": 16.878, "step": 46710 }, { "epoch": 0.7365832124613735, "grad_norm": 76.28338994618197, "learning_rate": 3.2950229597311632e-06, "loss": 17.0348, "step": 46720 }, { "epoch": 0.7367408715393833, "grad_norm": 77.03225771569682, "learning_rate": 3.2913119457398123e-06, "loss": 16.8192, "step": 46730 }, { "epoch": 0.736898530617393, "grad_norm": 75.92453583626128, "learning_rate": 3.287602610996581e-06, "loss": 17.6933, "step": 46740 }, { "epoch": 0.7370561896954027, "grad_norm": 71.93201153368669, "learning_rate": 3.283894956429943e-06, "loss": 17.1386, "step": 46750 }, { "epoch": 0.7372138487734123, "grad_norm": 74.86204252698958, "learning_rate": 3.280188982967967e-06, "loss": 17.1905, "step": 46760 }, { "epoch": 0.7373715078514221, "grad_norm": 81.00170476194752, "learning_rate": 3.276484691538285e-06, "loss": 17.0801, "step": 46770 }, { "epoch": 0.7375291669294318, "grad_norm": 83.04146429467751, "learning_rate": 3.2727820830681266e-06, "loss": 16.83, "step": 46780 }, { "epoch": 0.7376868260074415, "grad_norm": 78.17263697055883, "learning_rate": 3.2690811584842795e-06, "loss": 17.2442, "step": 46790 }, { "epoch": 0.7378444850854512, "grad_norm": 74.80600739580423, "learning_rate": 3.2653819187131276e-06, "loss": 17.529, "step": 46800 }, { "epoch": 0.7380021441634609, "grad_norm": 76.08982916135358, "learning_rate": 3.2616843646806207e-06, "loss": 17.3533, "step": 46810 }, { "epoch": 0.7381598032414707, "grad_norm": 80.66252426752577, "learning_rate": 3.2579884973122922e-06, "loss": 17.547, "step": 46820 }, { "epoch": 0.7383174623194804, "grad_norm": 74.98529752983178, "learning_rate": 3.2542943175332584e-06, "loss": 16.632, "step": 46830 }, { "epoch": 0.73847512139749, "grad_norm": 74.351187463116, "learning_rate": 3.2506018262681993e-06, "loss": 17.3262, "step": 46840 }, { "epoch": 0.7386327804754997, "grad_norm": 78.76475021754261, "learning_rate": 3.246911024441388e-06, "loss": 17.3394, "step": 46850 }, { "epoch": 0.7387904395535095, "grad_norm": 78.61079888850841, "learning_rate": 3.2432219129766595e-06, "loss": 16.8688, "step": 46860 }, { "epoch": 0.7389480986315192, "grad_norm": 76.89463548826365, "learning_rate": 3.2395344927974403e-06, "loss": 17.2006, "step": 46870 }, { "epoch": 0.7391057577095289, "grad_norm": 76.56427273854823, "learning_rate": 3.235848764826721e-06, "loss": 16.8088, "step": 46880 }, { "epoch": 0.7392634167875386, "grad_norm": 76.03842344703918, "learning_rate": 3.2321647299870797e-06, "loss": 16.6844, "step": 46890 }, { "epoch": 0.7394210758655483, "grad_norm": 73.77815652700231, "learning_rate": 3.2284823892006588e-06, "loss": 17.0864, "step": 46900 }, { "epoch": 0.7395787349435581, "grad_norm": 78.89387225966256, "learning_rate": 3.2248017433891863e-06, "loss": 17.4095, "step": 46910 }, { "epoch": 0.7397363940215678, "grad_norm": 74.43181454189707, "learning_rate": 3.221122793473963e-06, "loss": 17.0073, "step": 46920 }, { "epoch": 0.7398940530995775, "grad_norm": 75.23111387871471, "learning_rate": 3.2174455403758674e-06, "loss": 17.1003, "step": 46930 }, { "epoch": 0.7400517121775871, "grad_norm": 77.92787728777651, "learning_rate": 3.2137699850153448e-06, "loss": 17.0581, "step": 46940 }, { "epoch": 0.7402093712555969, "grad_norm": 89.30307455869723, "learning_rate": 3.210096128312429e-06, "loss": 17.6626, "step": 46950 }, { "epoch": 0.7403670303336066, "grad_norm": 78.60413118873288, "learning_rate": 3.206423971186715e-06, "loss": 16.6765, "step": 46960 }, { "epoch": 0.7405246894116163, "grad_norm": 79.36030719877962, "learning_rate": 3.2027535145573784e-06, "loss": 16.7431, "step": 46970 }, { "epoch": 0.740682348489626, "grad_norm": 78.32557837963977, "learning_rate": 3.199084759343174e-06, "loss": 17.1763, "step": 46980 }, { "epoch": 0.7408400075676358, "grad_norm": 70.32597541286934, "learning_rate": 3.1954177064624203e-06, "loss": 16.877, "step": 46990 }, { "epoch": 0.7409976666456455, "grad_norm": 77.38873464524636, "learning_rate": 3.191752356833021e-06, "loss": 17.6896, "step": 47000 }, { "epoch": 0.7411553257236552, "grad_norm": 75.99587043504859, "learning_rate": 3.188088711372441e-06, "loss": 16.7879, "step": 47010 }, { "epoch": 0.7413129848016649, "grad_norm": 76.83656459907381, "learning_rate": 3.1844267709977306e-06, "loss": 16.7805, "step": 47020 }, { "epoch": 0.7414706438796745, "grad_norm": 74.78433846387613, "learning_rate": 3.180766536625507e-06, "loss": 17.5819, "step": 47030 }, { "epoch": 0.7416283029576843, "grad_norm": 73.00920274366419, "learning_rate": 3.177108009171963e-06, "loss": 16.8442, "step": 47040 }, { "epoch": 0.741785962035694, "grad_norm": 73.58655112987793, "learning_rate": 3.1734511895528597e-06, "loss": 16.7467, "step": 47050 }, { "epoch": 0.7419436211137037, "grad_norm": 79.08597976288509, "learning_rate": 3.169796078683537e-06, "loss": 16.8549, "step": 47060 }, { "epoch": 0.7421012801917134, "grad_norm": 78.51429515973777, "learning_rate": 3.1661426774788995e-06, "loss": 17.3726, "step": 47070 }, { "epoch": 0.7422589392697232, "grad_norm": 79.70136945320783, "learning_rate": 3.1624909868534326e-06, "loss": 17.39, "step": 47080 }, { "epoch": 0.7424165983477329, "grad_norm": 77.59616573122945, "learning_rate": 3.158841007721184e-06, "loss": 17.2763, "step": 47090 }, { "epoch": 0.7425742574257426, "grad_norm": 74.4888447424804, "learning_rate": 3.155192740995784e-06, "loss": 16.6853, "step": 47100 }, { "epoch": 0.7427319165037523, "grad_norm": 76.89861782740533, "learning_rate": 3.151546187590422e-06, "loss": 17.0601, "step": 47110 }, { "epoch": 0.742889575581762, "grad_norm": 71.10916980908299, "learning_rate": 3.147901348417868e-06, "loss": 16.7932, "step": 47120 }, { "epoch": 0.7430472346597717, "grad_norm": 68.62252692904607, "learning_rate": 3.144258224390463e-06, "loss": 16.2937, "step": 47130 }, { "epoch": 0.7432048937377814, "grad_norm": 72.4131757422408, "learning_rate": 3.1406168164201077e-06, "loss": 16.7089, "step": 47140 }, { "epoch": 0.7433625528157911, "grad_norm": 77.55339175843623, "learning_rate": 3.13697712541829e-06, "loss": 17.085, "step": 47150 }, { "epoch": 0.7435202118938008, "grad_norm": 77.89277715554036, "learning_rate": 3.133339152296051e-06, "loss": 17.2924, "step": 47160 }, { "epoch": 0.7436778709718106, "grad_norm": 78.05751467410734, "learning_rate": 3.1297028979640164e-06, "loss": 17.3323, "step": 47170 }, { "epoch": 0.7438355300498203, "grad_norm": 80.60944369013552, "learning_rate": 3.1260683633323687e-06, "loss": 17.9535, "step": 47180 }, { "epoch": 0.74399318912783, "grad_norm": 80.9184076874934, "learning_rate": 3.1224355493108717e-06, "loss": 17.034, "step": 47190 }, { "epoch": 0.7441508482058397, "grad_norm": 74.44616234497833, "learning_rate": 3.1188044568088485e-06, "loss": 17.0982, "step": 47200 }, { "epoch": 0.7443085072838495, "grad_norm": 75.50345937291189, "learning_rate": 3.1151750867351982e-06, "loss": 16.8366, "step": 47210 }, { "epoch": 0.7444661663618591, "grad_norm": 79.48853435874572, "learning_rate": 3.1115474399983867e-06, "loss": 16.7642, "step": 47220 }, { "epoch": 0.7446238254398688, "grad_norm": 76.38110413037292, "learning_rate": 3.1079215175064515e-06, "loss": 16.953, "step": 47230 }, { "epoch": 0.7447814845178785, "grad_norm": 75.05261186636119, "learning_rate": 3.1042973201669922e-06, "loss": 17.9125, "step": 47240 }, { "epoch": 0.7449391435958882, "grad_norm": 77.17327332034304, "learning_rate": 3.1006748488871764e-06, "loss": 17.0876, "step": 47250 }, { "epoch": 0.745096802673898, "grad_norm": 80.99921951078838, "learning_rate": 3.097054104573749e-06, "loss": 17.7803, "step": 47260 }, { "epoch": 0.7452544617519077, "grad_norm": 79.6166200133262, "learning_rate": 3.09343508813301e-06, "loss": 16.8463, "step": 47270 }, { "epoch": 0.7454121208299174, "grad_norm": 74.91507797140632, "learning_rate": 3.0898178004708413e-06, "loss": 17.0709, "step": 47280 }, { "epoch": 0.745569779907927, "grad_norm": 75.75569403996181, "learning_rate": 3.0862022424926764e-06, "loss": 17.074, "step": 47290 }, { "epoch": 0.7457274389859369, "grad_norm": 76.99012824482581, "learning_rate": 3.0825884151035302e-06, "loss": 17.157, "step": 47300 }, { "epoch": 0.7458850980639465, "grad_norm": 77.61085073983273, "learning_rate": 3.0789763192079723e-06, "loss": 17.2051, "step": 47310 }, { "epoch": 0.7460427571419562, "grad_norm": 77.81450739172293, "learning_rate": 3.075365955710148e-06, "loss": 17.2751, "step": 47320 }, { "epoch": 0.7462004162199659, "grad_norm": 74.87379834028049, "learning_rate": 3.0717573255137632e-06, "loss": 17.4033, "step": 47330 }, { "epoch": 0.7463580752979757, "grad_norm": 73.79611850441998, "learning_rate": 3.0681504295220977e-06, "loss": 17.0999, "step": 47340 }, { "epoch": 0.7465157343759854, "grad_norm": 73.6545446360894, "learning_rate": 3.064545268637984e-06, "loss": 16.6808, "step": 47350 }, { "epoch": 0.7466733934539951, "grad_norm": 75.10994005833376, "learning_rate": 3.060941843763836e-06, "loss": 17.0937, "step": 47360 }, { "epoch": 0.7468310525320048, "grad_norm": 79.63868167372266, "learning_rate": 3.0573401558016193e-06, "loss": 17.4253, "step": 47370 }, { "epoch": 0.7469887116100145, "grad_norm": 81.75568721373375, "learning_rate": 3.05374020565287e-06, "loss": 17.1618, "step": 47380 }, { "epoch": 0.7471463706880243, "grad_norm": 80.17211828760709, "learning_rate": 3.0501419942186936e-06, "loss": 17.3545, "step": 47390 }, { "epoch": 0.7473040297660339, "grad_norm": 74.57774068741912, "learning_rate": 3.046545522399752e-06, "loss": 17.4533, "step": 47400 }, { "epoch": 0.7474616888440436, "grad_norm": 76.20461097131141, "learning_rate": 3.0429507910962787e-06, "loss": 16.9903, "step": 47410 }, { "epoch": 0.7476193479220533, "grad_norm": 75.73197526221402, "learning_rate": 3.039357801208069e-06, "loss": 16.6555, "step": 47420 }, { "epoch": 0.7477770070000631, "grad_norm": 73.84885470998408, "learning_rate": 3.0357665536344848e-06, "loss": 17.4248, "step": 47430 }, { "epoch": 0.7479346660780728, "grad_norm": 74.57720936165485, "learning_rate": 3.032177049274444e-06, "loss": 17.0794, "step": 47440 }, { "epoch": 0.7480923251560825, "grad_norm": 76.85791947472809, "learning_rate": 3.0285892890264402e-06, "loss": 17.1941, "step": 47450 }, { "epoch": 0.7482499842340922, "grad_norm": 77.15564474695886, "learning_rate": 3.025003273788515e-06, "loss": 17.4262, "step": 47460 }, { "epoch": 0.7484076433121019, "grad_norm": 85.56855758518415, "learning_rate": 3.0214190044582913e-06, "loss": 17.7848, "step": 47470 }, { "epoch": 0.7485653023901117, "grad_norm": 74.78342561310913, "learning_rate": 3.0178364819329376e-06, "loss": 17.5792, "step": 47480 }, { "epoch": 0.7487229614681213, "grad_norm": 76.98680833649927, "learning_rate": 3.0142557071091995e-06, "loss": 16.5768, "step": 47490 }, { "epoch": 0.748880620546131, "grad_norm": 79.80265795844636, "learning_rate": 3.010676680883373e-06, "loss": 17.4261, "step": 47500 }, { "epoch": 0.7490382796241407, "grad_norm": 72.51379851686913, "learning_rate": 3.007099404151328e-06, "loss": 16.8918, "step": 47510 }, { "epoch": 0.7491959387021505, "grad_norm": 72.77106882674974, "learning_rate": 3.003523877808485e-06, "loss": 16.6762, "step": 47520 }, { "epoch": 0.7493535977801602, "grad_norm": 73.6013715692581, "learning_rate": 2.9999501027498356e-06, "loss": 16.9975, "step": 47530 }, { "epoch": 0.7495112568581699, "grad_norm": 75.88590341693863, "learning_rate": 2.9963780798699317e-06, "loss": 17.0907, "step": 47540 }, { "epoch": 0.7496689159361796, "grad_norm": 73.64182526195752, "learning_rate": 2.9928078100628788e-06, "loss": 17.0003, "step": 47550 }, { "epoch": 0.7498265750141894, "grad_norm": 80.39686462195931, "learning_rate": 2.9892392942223548e-06, "loss": 17.0673, "step": 47560 }, { "epoch": 0.749984234092199, "grad_norm": 73.3694228148149, "learning_rate": 2.985672533241588e-06, "loss": 16.7419, "step": 47570 }, { "epoch": 0.7501418931702087, "grad_norm": 69.15636999944277, "learning_rate": 2.982107528013377e-06, "loss": 16.8203, "step": 47580 }, { "epoch": 0.7502995522482184, "grad_norm": 76.15451258360858, "learning_rate": 2.9785442794300722e-06, "loss": 16.733, "step": 47590 }, { "epoch": 0.7504572113262281, "grad_norm": 77.42878062779508, "learning_rate": 2.9749827883835926e-06, "loss": 16.9417, "step": 47600 }, { "epoch": 0.7506148704042379, "grad_norm": 76.79695525498657, "learning_rate": 2.9714230557654077e-06, "loss": 16.5831, "step": 47610 }, { "epoch": 0.7507725294822476, "grad_norm": 70.86371749515708, "learning_rate": 2.9678650824665557e-06, "loss": 16.7936, "step": 47620 }, { "epoch": 0.7509301885602573, "grad_norm": 73.81619340389452, "learning_rate": 2.9643088693776336e-06, "loss": 16.7394, "step": 47630 }, { "epoch": 0.751087847638267, "grad_norm": 72.43234381145123, "learning_rate": 2.960754417388789e-06, "loss": 16.8096, "step": 47640 }, { "epoch": 0.7512455067162768, "grad_norm": 81.93963374542228, "learning_rate": 2.9572017273897426e-06, "loss": 17.4438, "step": 47650 }, { "epoch": 0.7514031657942865, "grad_norm": 79.15951292791854, "learning_rate": 2.9536508002697584e-06, "loss": 17.1695, "step": 47660 }, { "epoch": 0.7515608248722961, "grad_norm": 84.21384799368515, "learning_rate": 2.950101636917673e-06, "loss": 16.8534, "step": 47670 }, { "epoch": 0.7517184839503058, "grad_norm": 79.15491715042278, "learning_rate": 2.9465542382218702e-06, "loss": 17.5277, "step": 47680 }, { "epoch": 0.7518761430283156, "grad_norm": 80.3458621095387, "learning_rate": 2.943008605070303e-06, "loss": 16.977, "step": 47690 }, { "epoch": 0.7520338021063253, "grad_norm": 74.91307182638019, "learning_rate": 2.9394647383504728e-06, "loss": 16.5582, "step": 47700 }, { "epoch": 0.752191461184335, "grad_norm": 75.57755525779447, "learning_rate": 2.9359226389494433e-06, "loss": 16.9054, "step": 47710 }, { "epoch": 0.7523491202623447, "grad_norm": 79.88758924801269, "learning_rate": 2.932382307753838e-06, "loss": 16.6604, "step": 47720 }, { "epoch": 0.7525067793403544, "grad_norm": 73.76923403284877, "learning_rate": 2.9288437456498365e-06, "loss": 16.6805, "step": 47730 }, { "epoch": 0.7526644384183642, "grad_norm": 74.54463308714747, "learning_rate": 2.925306953523168e-06, "loss": 16.9946, "step": 47740 }, { "epoch": 0.7528220974963739, "grad_norm": 89.44310243690909, "learning_rate": 2.9217719322591344e-06, "loss": 17.8897, "step": 47750 }, { "epoch": 0.7529797565743835, "grad_norm": 72.36582685516608, "learning_rate": 2.918238682742578e-06, "loss": 16.7759, "step": 47760 }, { "epoch": 0.7531374156523932, "grad_norm": 73.92193810304715, "learning_rate": 2.914707205857903e-06, "loss": 17.1271, "step": 47770 }, { "epoch": 0.753295074730403, "grad_norm": 77.06350297294159, "learning_rate": 2.9111775024890778e-06, "loss": 17.1011, "step": 47780 }, { "epoch": 0.7534527338084127, "grad_norm": 80.19342484168078, "learning_rate": 2.907649573519614e-06, "loss": 16.6877, "step": 47790 }, { "epoch": 0.7536103928864224, "grad_norm": 71.4738480621693, "learning_rate": 2.904123419832593e-06, "loss": 16.3453, "step": 47800 }, { "epoch": 0.7537680519644321, "grad_norm": 78.28685261769137, "learning_rate": 2.900599042310638e-06, "loss": 17.107, "step": 47810 }, { "epoch": 0.7539257110424418, "grad_norm": 79.25356159449636, "learning_rate": 2.8970764418359353e-06, "loss": 17.1879, "step": 47820 }, { "epoch": 0.7540833701204516, "grad_norm": 74.5911104897828, "learning_rate": 2.893555619290227e-06, "loss": 16.3168, "step": 47830 }, { "epoch": 0.7542410291984613, "grad_norm": 73.40728263914137, "learning_rate": 2.89003657555481e-06, "loss": 16.8549, "step": 47840 }, { "epoch": 0.7543986882764709, "grad_norm": 79.47388220999684, "learning_rate": 2.8865193115105293e-06, "loss": 17.2258, "step": 47850 }, { "epoch": 0.7545563473544806, "grad_norm": 73.96166653329952, "learning_rate": 2.883003828037796e-06, "loss": 16.6872, "step": 47860 }, { "epoch": 0.7547140064324904, "grad_norm": 78.11632162419183, "learning_rate": 2.879490126016561e-06, "loss": 16.9971, "step": 47870 }, { "epoch": 0.7548716655105001, "grad_norm": 77.51705364195462, "learning_rate": 2.875978206326344e-06, "loss": 17.1235, "step": 47880 }, { "epoch": 0.7550293245885098, "grad_norm": 73.19161835326987, "learning_rate": 2.8724680698462093e-06, "loss": 16.5782, "step": 47890 }, { "epoch": 0.7551869836665195, "grad_norm": 78.72287146539173, "learning_rate": 2.8689597174547735e-06, "loss": 17.1145, "step": 47900 }, { "epoch": 0.7553446427445293, "grad_norm": 76.54376335658208, "learning_rate": 2.865453150030213e-06, "loss": 16.9083, "step": 47910 }, { "epoch": 0.755502301822539, "grad_norm": 77.354443952534, "learning_rate": 2.861948368450257e-06, "loss": 16.8077, "step": 47920 }, { "epoch": 0.7556599609005487, "grad_norm": 78.99521284722388, "learning_rate": 2.858445373592186e-06, "loss": 17.1457, "step": 47930 }, { "epoch": 0.7558176199785583, "grad_norm": 77.25409686557127, "learning_rate": 2.854944166332828e-06, "loss": 17.4774, "step": 47940 }, { "epoch": 0.755975279056568, "grad_norm": 76.74487484560318, "learning_rate": 2.851444747548574e-06, "loss": 16.6911, "step": 47950 }, { "epoch": 0.7561329381345778, "grad_norm": 85.86749526248461, "learning_rate": 2.847947118115356e-06, "loss": 17.2797, "step": 47960 }, { "epoch": 0.7562905972125875, "grad_norm": 75.39485252938283, "learning_rate": 2.84445127890867e-06, "loss": 16.6714, "step": 47970 }, { "epoch": 0.7564482562905972, "grad_norm": 74.96907607219639, "learning_rate": 2.8409572308035506e-06, "loss": 16.9751, "step": 47980 }, { "epoch": 0.7566059153686069, "grad_norm": 80.90588760112207, "learning_rate": 2.8374649746745975e-06, "loss": 17.5511, "step": 47990 }, { "epoch": 0.7567635744466167, "grad_norm": 79.59786784525822, "learning_rate": 2.8339745113959492e-06, "loss": 17.0351, "step": 48000 }, { "epoch": 0.7569212335246264, "grad_norm": 75.34805467208552, "learning_rate": 2.830485841841306e-06, "loss": 16.5401, "step": 48010 }, { "epoch": 0.757078892602636, "grad_norm": 76.33169936431213, "learning_rate": 2.826998966883917e-06, "loss": 16.2385, "step": 48020 }, { "epoch": 0.7572365516806457, "grad_norm": 74.46791302823907, "learning_rate": 2.823513887396573e-06, "loss": 17.1675, "step": 48030 }, { "epoch": 0.7573942107586554, "grad_norm": 78.21149720949288, "learning_rate": 2.82003060425163e-06, "loss": 17.0346, "step": 48040 }, { "epoch": 0.7575518698366652, "grad_norm": 76.77978243224277, "learning_rate": 2.8165491183209803e-06, "loss": 16.8147, "step": 48050 }, { "epoch": 0.7577095289146749, "grad_norm": 74.25552539143808, "learning_rate": 2.8130694304760788e-06, "loss": 17.1106, "step": 48060 }, { "epoch": 0.7578671879926846, "grad_norm": 79.63854053849799, "learning_rate": 2.8095915415879182e-06, "loss": 16.7073, "step": 48070 }, { "epoch": 0.7580248470706943, "grad_norm": 74.57121089242298, "learning_rate": 2.806115452527053e-06, "loss": 16.8815, "step": 48080 }, { "epoch": 0.7581825061487041, "grad_norm": 79.87017025137617, "learning_rate": 2.8026411641635742e-06, "loss": 17.2846, "step": 48090 }, { "epoch": 0.7583401652267138, "grad_norm": 80.34507227016746, "learning_rate": 2.7991686773671367e-06, "loss": 16.5417, "step": 48100 }, { "epoch": 0.7584978243047235, "grad_norm": 76.76170316846628, "learning_rate": 2.7956979930069304e-06, "loss": 17.0609, "step": 48110 }, { "epoch": 0.7586554833827331, "grad_norm": 77.52992283716368, "learning_rate": 2.792229111951703e-06, "loss": 16.9957, "step": 48120 }, { "epoch": 0.7588131424607429, "grad_norm": 74.22188938946711, "learning_rate": 2.7887620350697476e-06, "loss": 16.712, "step": 48130 }, { "epoch": 0.7589708015387526, "grad_norm": 73.22965974855366, "learning_rate": 2.7852967632289107e-06, "loss": 16.1391, "step": 48140 }, { "epoch": 0.7591284606167623, "grad_norm": 74.0289867743417, "learning_rate": 2.78183329729658e-06, "loss": 16.7952, "step": 48150 }, { "epoch": 0.759286119694772, "grad_norm": 80.62610810703737, "learning_rate": 2.7783716381396886e-06, "loss": 16.7814, "step": 48160 }, { "epoch": 0.7594437787727817, "grad_norm": 73.49439355523695, "learning_rate": 2.77491178662473e-06, "loss": 16.4249, "step": 48170 }, { "epoch": 0.7596014378507915, "grad_norm": 68.09080731385968, "learning_rate": 2.7714537436177323e-06, "loss": 16.7396, "step": 48180 }, { "epoch": 0.7597590969288012, "grad_norm": 72.60212108201804, "learning_rate": 2.7679975099842817e-06, "loss": 16.3995, "step": 48190 }, { "epoch": 0.7599167560068109, "grad_norm": 71.39142977044096, "learning_rate": 2.7645430865894995e-06, "loss": 16.4314, "step": 48200 }, { "epoch": 0.7600744150848205, "grad_norm": 70.33749846648935, "learning_rate": 2.761090474298065e-06, "loss": 16.0857, "step": 48210 }, { "epoch": 0.7602320741628303, "grad_norm": 72.77355475170904, "learning_rate": 2.7576396739741983e-06, "loss": 16.6582, "step": 48220 }, { "epoch": 0.76038973324084, "grad_norm": 76.51208023587826, "learning_rate": 2.7541906864816713e-06, "loss": 16.4003, "step": 48230 }, { "epoch": 0.7605473923188497, "grad_norm": 74.2684220198953, "learning_rate": 2.7507435126837924e-06, "loss": 17.4611, "step": 48240 }, { "epoch": 0.7607050513968594, "grad_norm": 72.16660784583219, "learning_rate": 2.747298153443427e-06, "loss": 16.7782, "step": 48250 }, { "epoch": 0.7608627104748692, "grad_norm": 76.61583210691866, "learning_rate": 2.7438546096229767e-06, "loss": 16.7407, "step": 48260 }, { "epoch": 0.7610203695528789, "grad_norm": 75.97610735870155, "learning_rate": 2.7404128820843967e-06, "loss": 17.0992, "step": 48270 }, { "epoch": 0.7611780286308886, "grad_norm": 74.84679578685328, "learning_rate": 2.73697297168918e-06, "loss": 16.4947, "step": 48280 }, { "epoch": 0.7613356877088983, "grad_norm": 79.38637630488819, "learning_rate": 2.733534879298374e-06, "loss": 16.8876, "step": 48290 }, { "epoch": 0.7614933467869079, "grad_norm": 77.92637167433007, "learning_rate": 2.730098605772562e-06, "loss": 16.7995, "step": 48300 }, { "epoch": 0.7616510058649177, "grad_norm": 71.20481131486174, "learning_rate": 2.726664151971874e-06, "loss": 16.8731, "step": 48310 }, { "epoch": 0.7618086649429274, "grad_norm": 67.68920361038668, "learning_rate": 2.72323151875599e-06, "loss": 16.5268, "step": 48320 }, { "epoch": 0.7619663240209371, "grad_norm": 79.30216604144424, "learning_rate": 2.719800706984128e-06, "loss": 16.5349, "step": 48330 }, { "epoch": 0.7621239830989468, "grad_norm": 75.7164492917123, "learning_rate": 2.716371717515057e-06, "loss": 16.7418, "step": 48340 }, { "epoch": 0.7622816421769566, "grad_norm": 72.51218110482404, "learning_rate": 2.7129445512070807e-06, "loss": 16.9538, "step": 48350 }, { "epoch": 0.7624393012549663, "grad_norm": 71.54012143600406, "learning_rate": 2.7095192089180544e-06, "loss": 16.5511, "step": 48360 }, { "epoch": 0.762596960332976, "grad_norm": 85.0491082489682, "learning_rate": 2.7060956915053716e-06, "loss": 17.3192, "step": 48370 }, { "epoch": 0.7627546194109857, "grad_norm": 76.86021135273381, "learning_rate": 2.7026739998259734e-06, "loss": 16.6893, "step": 48380 }, { "epoch": 0.7629122784889953, "grad_norm": 78.07131446743605, "learning_rate": 2.6992541347363376e-06, "loss": 17.0359, "step": 48390 }, { "epoch": 0.7630699375670051, "grad_norm": 75.63688720388554, "learning_rate": 2.6958360970924948e-06, "loss": 16.8119, "step": 48400 }, { "epoch": 0.7632275966450148, "grad_norm": 81.74075224099455, "learning_rate": 2.692419887750005e-06, "loss": 17.0514, "step": 48410 }, { "epoch": 0.7633852557230245, "grad_norm": 75.13085617482749, "learning_rate": 2.6890055075639822e-06, "loss": 17.0611, "step": 48420 }, { "epoch": 0.7635429148010342, "grad_norm": 75.51306906140697, "learning_rate": 2.6855929573890803e-06, "loss": 16.8206, "step": 48430 }, { "epoch": 0.763700573879044, "grad_norm": 72.62841464706621, "learning_rate": 2.682182238079487e-06, "loss": 16.6014, "step": 48440 }, { "epoch": 0.7638582329570537, "grad_norm": 83.61865939517908, "learning_rate": 2.6787733504889436e-06, "loss": 17.012, "step": 48450 }, { "epoch": 0.7640158920350634, "grad_norm": 77.25244954700284, "learning_rate": 2.6753662954707204e-06, "loss": 16.3544, "step": 48460 }, { "epoch": 0.7641735511130731, "grad_norm": 75.03291158170336, "learning_rate": 2.6719610738776423e-06, "loss": 16.3571, "step": 48470 }, { "epoch": 0.7643312101910829, "grad_norm": 68.81071801242197, "learning_rate": 2.668557686562062e-06, "loss": 16.7944, "step": 48480 }, { "epoch": 0.7644888692690925, "grad_norm": 75.55715308200469, "learning_rate": 2.665156134375886e-06, "loss": 17.0213, "step": 48490 }, { "epoch": 0.7646465283471022, "grad_norm": 73.69333430822799, "learning_rate": 2.661756418170549e-06, "loss": 16.493, "step": 48500 }, { "epoch": 0.7648041874251119, "grad_norm": 74.11331252521393, "learning_rate": 2.6583585387970347e-06, "loss": 16.8522, "step": 48510 }, { "epoch": 0.7649618465031216, "grad_norm": 78.40615572033192, "learning_rate": 2.6549624971058654e-06, "loss": 16.6814, "step": 48520 }, { "epoch": 0.7651195055811314, "grad_norm": 74.17926866533966, "learning_rate": 2.6515682939471033e-06, "loss": 16.5363, "step": 48530 }, { "epoch": 0.7652771646591411, "grad_norm": 70.33579683248747, "learning_rate": 2.648175930170347e-06, "loss": 16.9374, "step": 48540 }, { "epoch": 0.7654348237371508, "grad_norm": 77.17307043562731, "learning_rate": 2.6447854066247404e-06, "loss": 16.3996, "step": 48550 }, { "epoch": 0.7655924828151605, "grad_norm": 73.40264602957225, "learning_rate": 2.641396724158961e-06, "loss": 16.4503, "step": 48560 }, { "epoch": 0.7657501418931703, "grad_norm": 76.10594927508825, "learning_rate": 2.6380098836212266e-06, "loss": 17.2823, "step": 48570 }, { "epoch": 0.7659078009711799, "grad_norm": 76.10962168886898, "learning_rate": 2.6346248858592994e-06, "loss": 16.7361, "step": 48580 }, { "epoch": 0.7660654600491896, "grad_norm": 74.63422560852425, "learning_rate": 2.631241731720472e-06, "loss": 16.5953, "step": 48590 }, { "epoch": 0.7662231191271993, "grad_norm": 74.97036694813117, "learning_rate": 2.6278604220515857e-06, "loss": 16.7008, "step": 48600 }, { "epoch": 0.766380778205209, "grad_norm": 76.59318242672201, "learning_rate": 2.6244809576990083e-06, "loss": 16.3207, "step": 48610 }, { "epoch": 0.7665384372832188, "grad_norm": 75.24811400396916, "learning_rate": 2.621103339508654e-06, "loss": 16.8849, "step": 48620 }, { "epoch": 0.7666960963612285, "grad_norm": 72.38967676814099, "learning_rate": 2.617727568325973e-06, "loss": 16.5702, "step": 48630 }, { "epoch": 0.7668537554392382, "grad_norm": 75.78599459424703, "learning_rate": 2.614353644995955e-06, "loss": 16.8472, "step": 48640 }, { "epoch": 0.7670114145172479, "grad_norm": 77.89926015788852, "learning_rate": 2.6109815703631204e-06, "loss": 16.8077, "step": 48650 }, { "epoch": 0.7671690735952577, "grad_norm": 73.2717489135544, "learning_rate": 2.607611345271537e-06, "loss": 16.1691, "step": 48660 }, { "epoch": 0.7673267326732673, "grad_norm": 74.26094238860938, "learning_rate": 2.6042429705647976e-06, "loss": 16.3663, "step": 48670 }, { "epoch": 0.767484391751277, "grad_norm": 68.89183495074866, "learning_rate": 2.600876447086044e-06, "loss": 16.8179, "step": 48680 }, { "epoch": 0.7676420508292867, "grad_norm": 72.98841002138843, "learning_rate": 2.5975117756779467e-06, "loss": 17.1743, "step": 48690 }, { "epoch": 0.7677997099072965, "grad_norm": 79.04429141989742, "learning_rate": 2.5941489571827117e-06, "loss": 16.7883, "step": 48700 }, { "epoch": 0.7679573689853062, "grad_norm": 76.83339377373031, "learning_rate": 2.5907879924420863e-06, "loss": 17.3578, "step": 48710 }, { "epoch": 0.7681150280633159, "grad_norm": 127.90300515450475, "learning_rate": 2.5874288822973526e-06, "loss": 16.1187, "step": 48720 }, { "epoch": 0.7682726871413256, "grad_norm": 76.60179638140796, "learning_rate": 2.584071627589331e-06, "loss": 16.6469, "step": 48730 }, { "epoch": 0.7684303462193353, "grad_norm": 72.03612499401845, "learning_rate": 2.5807162291583677e-06, "loss": 16.559, "step": 48740 }, { "epoch": 0.7685880052973451, "grad_norm": 72.87231863354955, "learning_rate": 2.577362687844357e-06, "loss": 15.9054, "step": 48750 }, { "epoch": 0.7687456643753547, "grad_norm": 81.39984639775282, "learning_rate": 2.5740110044867152e-06, "loss": 16.6439, "step": 48760 }, { "epoch": 0.7689033234533644, "grad_norm": 74.9626563527744, "learning_rate": 2.570661179924406e-06, "loss": 16.3865, "step": 48770 }, { "epoch": 0.7690609825313741, "grad_norm": 76.20739200721309, "learning_rate": 2.567313214995919e-06, "loss": 17.109, "step": 48780 }, { "epoch": 0.7692186416093839, "grad_norm": 72.16772125687066, "learning_rate": 2.5639671105392837e-06, "loss": 16.2605, "step": 48790 }, { "epoch": 0.7693763006873936, "grad_norm": 79.99644973498086, "learning_rate": 2.5606228673920587e-06, "loss": 16.7692, "step": 48800 }, { "epoch": 0.7695339597654033, "grad_norm": 75.01260634172344, "learning_rate": 2.5572804863913447e-06, "loss": 17.5983, "step": 48810 }, { "epoch": 0.769691618843413, "grad_norm": 74.1929034692987, "learning_rate": 2.5539399683737642e-06, "loss": 16.3083, "step": 48820 }, { "epoch": 0.7698492779214228, "grad_norm": 71.5369802772437, "learning_rate": 2.550601314175486e-06, "loss": 16.1105, "step": 48830 }, { "epoch": 0.7700069369994325, "grad_norm": 69.10568646354915, "learning_rate": 2.547264524632207e-06, "loss": 16.5465, "step": 48840 }, { "epoch": 0.7701645960774421, "grad_norm": 83.46512129160236, "learning_rate": 2.543929600579154e-06, "loss": 16.5101, "step": 48850 }, { "epoch": 0.7703222551554518, "grad_norm": 76.56335069260146, "learning_rate": 2.5405965428510926e-06, "loss": 16.7867, "step": 48860 }, { "epoch": 0.7704799142334615, "grad_norm": 74.775765593101, "learning_rate": 2.5372653522823156e-06, "loss": 16.4092, "step": 48870 }, { "epoch": 0.7706375733114713, "grad_norm": 82.63107526769977, "learning_rate": 2.5339360297066573e-06, "loss": 17.0185, "step": 48880 }, { "epoch": 0.770795232389481, "grad_norm": 79.33169711821961, "learning_rate": 2.5306085759574706e-06, "loss": 16.675, "step": 48890 }, { "epoch": 0.7709528914674907, "grad_norm": 74.21194216695297, "learning_rate": 2.5272829918676567e-06, "loss": 16.307, "step": 48900 }, { "epoch": 0.7711105505455004, "grad_norm": 75.33128470084868, "learning_rate": 2.523959278269634e-06, "loss": 16.9402, "step": 48910 }, { "epoch": 0.7712682096235102, "grad_norm": 78.35521129712704, "learning_rate": 2.5206374359953632e-06, "loss": 16.8903, "step": 48920 }, { "epoch": 0.7714258687015199, "grad_norm": 74.51729088939884, "learning_rate": 2.517317465876332e-06, "loss": 16.8159, "step": 48930 }, { "epoch": 0.7715835277795295, "grad_norm": 83.09875437487375, "learning_rate": 2.513999368743564e-06, "loss": 16.7933, "step": 48940 }, { "epoch": 0.7717411868575392, "grad_norm": 77.49336506463092, "learning_rate": 2.510683145427606e-06, "loss": 16.7262, "step": 48950 }, { "epoch": 0.7718988459355489, "grad_norm": 74.66766356589216, "learning_rate": 2.507368796758538e-06, "loss": 16.4676, "step": 48960 }, { "epoch": 0.7720565050135587, "grad_norm": 70.57249354885865, "learning_rate": 2.504056323565979e-06, "loss": 16.9267, "step": 48970 }, { "epoch": 0.7722141640915684, "grad_norm": 77.28539527361711, "learning_rate": 2.500745726679067e-06, "loss": 16.3814, "step": 48980 }, { "epoch": 0.7723718231695781, "grad_norm": 78.20530257354304, "learning_rate": 2.4974370069264798e-06, "loss": 16.7553, "step": 48990 }, { "epoch": 0.7725294822475878, "grad_norm": 74.53734882393766, "learning_rate": 2.494130165136417e-06, "loss": 16.9152, "step": 49000 }, { "epoch": 0.7726871413255976, "grad_norm": 72.27554593286096, "learning_rate": 2.4908252021366142e-06, "loss": 17.3969, "step": 49010 }, { "epoch": 0.7728448004036073, "grad_norm": 77.35516259525876, "learning_rate": 2.4875221187543365e-06, "loss": 16.4724, "step": 49020 }, { "epoch": 0.7730024594816169, "grad_norm": 81.90833820412097, "learning_rate": 2.484220915816379e-06, "loss": 16.378, "step": 49030 }, { "epoch": 0.7731601185596266, "grad_norm": 77.63104631866885, "learning_rate": 2.480921594149057e-06, "loss": 16.3851, "step": 49040 }, { "epoch": 0.7733177776376364, "grad_norm": 74.4183237605765, "learning_rate": 2.4776241545782287e-06, "loss": 16.8855, "step": 49050 }, { "epoch": 0.7734754367156461, "grad_norm": 72.43964795856652, "learning_rate": 2.4743285979292697e-06, "loss": 16.8575, "step": 49060 }, { "epoch": 0.7736330957936558, "grad_norm": 76.39444542679358, "learning_rate": 2.4710349250270937e-06, "loss": 16.4383, "step": 49070 }, { "epoch": 0.7737907548716655, "grad_norm": 72.04362625101626, "learning_rate": 2.4677431366961356e-06, "loss": 16.0242, "step": 49080 }, { "epoch": 0.7739484139496752, "grad_norm": 76.09690644774936, "learning_rate": 2.4644532337603577e-06, "loss": 16.5766, "step": 49090 }, { "epoch": 0.774106073027685, "grad_norm": 76.25142064741428, "learning_rate": 2.461165217043261e-06, "loss": 16.7968, "step": 49100 }, { "epoch": 0.7742637321056947, "grad_norm": 96.27482851168963, "learning_rate": 2.4578790873678603e-06, "loss": 16.0158, "step": 49110 }, { "epoch": 0.7744213911837043, "grad_norm": 71.81590152783271, "learning_rate": 2.454594845556707e-06, "loss": 16.6322, "step": 49120 }, { "epoch": 0.774579050261714, "grad_norm": 75.34941241824744, "learning_rate": 2.45131249243188e-06, "loss": 17.5659, "step": 49130 }, { "epoch": 0.7747367093397238, "grad_norm": 76.87684490074892, "learning_rate": 2.448032028814985e-06, "loss": 17.0474, "step": 49140 }, { "epoch": 0.7748943684177335, "grad_norm": 80.07238376219867, "learning_rate": 2.444753455527148e-06, "loss": 16.2831, "step": 49150 }, { "epoch": 0.7750520274957432, "grad_norm": 77.62277385880786, "learning_rate": 2.4414767733890322e-06, "loss": 16.3459, "step": 49160 }, { "epoch": 0.7752096865737529, "grad_norm": 71.0487984643827, "learning_rate": 2.438201983220816e-06, "loss": 16.6559, "step": 49170 }, { "epoch": 0.7753673456517626, "grad_norm": 74.86337010533892, "learning_rate": 2.434929085842217e-06, "loss": 16.919, "step": 49180 }, { "epoch": 0.7755250047297724, "grad_norm": 76.32810285757687, "learning_rate": 2.4316580820724655e-06, "loss": 17.3871, "step": 49190 }, { "epoch": 0.7756826638077821, "grad_norm": 76.84416176187202, "learning_rate": 2.428388972730331e-06, "loss": 16.4912, "step": 49200 }, { "epoch": 0.7758403228857917, "grad_norm": 79.89332308273939, "learning_rate": 2.425121758634098e-06, "loss": 16.6588, "step": 49210 }, { "epoch": 0.7759979819638014, "grad_norm": 74.96685976693763, "learning_rate": 2.4218564406015823e-06, "loss": 16.8263, "step": 49220 }, { "epoch": 0.7761556410418112, "grad_norm": 80.16256630061655, "learning_rate": 2.418593019450127e-06, "loss": 17.1174, "step": 49230 }, { "epoch": 0.7763133001198209, "grad_norm": 77.7539660305279, "learning_rate": 2.4153314959965923e-06, "loss": 17.2127, "step": 49240 }, { "epoch": 0.7764709591978306, "grad_norm": 73.8536769708555, "learning_rate": 2.412071871057374e-06, "loss": 16.1553, "step": 49250 }, { "epoch": 0.7766286182758403, "grad_norm": 75.38585973460428, "learning_rate": 2.4088141454483816e-06, "loss": 16.3318, "step": 49260 }, { "epoch": 0.7767862773538501, "grad_norm": 80.37807564346743, "learning_rate": 2.4055583199850595e-06, "loss": 16.503, "step": 49270 }, { "epoch": 0.7769439364318598, "grad_norm": 75.21637506355579, "learning_rate": 2.4023043954823656e-06, "loss": 16.7214, "step": 49280 }, { "epoch": 0.7771015955098695, "grad_norm": 74.87601949763292, "learning_rate": 2.3990523727547954e-06, "loss": 16.992, "step": 49290 }, { "epoch": 0.7772592545878791, "grad_norm": 73.56079496869943, "learning_rate": 2.395802252616354e-06, "loss": 16.9701, "step": 49300 }, { "epoch": 0.7774169136658888, "grad_norm": 77.0767579954295, "learning_rate": 2.3925540358805797e-06, "loss": 16.5281, "step": 49310 }, { "epoch": 0.7775745727438986, "grad_norm": 76.39624013936914, "learning_rate": 2.389307723360533e-06, "loss": 16.9594, "step": 49320 }, { "epoch": 0.7777322318219083, "grad_norm": 73.44161034397642, "learning_rate": 2.3860633158687995e-06, "loss": 16.431, "step": 49330 }, { "epoch": 0.777889890899918, "grad_norm": 74.44963920308624, "learning_rate": 2.3828208142174814e-06, "loss": 16.6984, "step": 49340 }, { "epoch": 0.7780475499779277, "grad_norm": 76.43973802750682, "learning_rate": 2.379580219218205e-06, "loss": 16.881, "step": 49350 }, { "epoch": 0.7782052090559375, "grad_norm": 75.78136399404137, "learning_rate": 2.376341531682128e-06, "loss": 16.4528, "step": 49360 }, { "epoch": 0.7783628681339472, "grad_norm": 73.28681972649326, "learning_rate": 2.373104752419918e-06, "loss": 16.689, "step": 49370 }, { "epoch": 0.7785205272119569, "grad_norm": 72.48102060147781, "learning_rate": 2.3698698822417777e-06, "loss": 16.672, "step": 49380 }, { "epoch": 0.7786781862899665, "grad_norm": 74.00035267349253, "learning_rate": 2.3666369219574216e-06, "loss": 16.9636, "step": 49390 }, { "epoch": 0.7788358453679763, "grad_norm": 77.6535174780624, "learning_rate": 2.363405872376092e-06, "loss": 16.6206, "step": 49400 }, { "epoch": 0.778993504445986, "grad_norm": 76.73791840653449, "learning_rate": 2.3601767343065497e-06, "loss": 16.3797, "step": 49410 }, { "epoch": 0.7791511635239957, "grad_norm": 78.96373209297359, "learning_rate": 2.3569495085570794e-06, "loss": 16.2785, "step": 49420 }, { "epoch": 0.7793088226020054, "grad_norm": 81.54355949983693, "learning_rate": 2.3537241959354863e-06, "loss": 16.6691, "step": 49430 }, { "epoch": 0.7794664816800151, "grad_norm": 75.28331911073226, "learning_rate": 2.350500797249099e-06, "loss": 16.6636, "step": 49440 }, { "epoch": 0.7796241407580249, "grad_norm": 75.04393158747507, "learning_rate": 2.34727931330476e-06, "loss": 16.8589, "step": 49450 }, { "epoch": 0.7797817998360346, "grad_norm": 75.17448889767577, "learning_rate": 2.3440597449088433e-06, "loss": 16.7615, "step": 49460 }, { "epoch": 0.7799394589140443, "grad_norm": 75.78111885017137, "learning_rate": 2.3408420928672303e-06, "loss": 16.1925, "step": 49470 }, { "epoch": 0.780097117992054, "grad_norm": 79.541977552501, "learning_rate": 2.3376263579853366e-06, "loss": 16.4086, "step": 49480 }, { "epoch": 0.7802547770700637, "grad_norm": 79.1818967399226, "learning_rate": 2.334412541068087e-06, "loss": 16.8773, "step": 49490 }, { "epoch": 0.7804124361480734, "grad_norm": 81.16228304978468, "learning_rate": 2.3312006429199284e-06, "loss": 16.3932, "step": 49500 }, { "epoch": 0.7805700952260831, "grad_norm": 71.55995283159409, "learning_rate": 2.3279906643448314e-06, "loss": 16.2225, "step": 49510 }, { "epoch": 0.7807277543040928, "grad_norm": 77.8892607312785, "learning_rate": 2.324782606146285e-06, "loss": 16.3984, "step": 49520 }, { "epoch": 0.7808854133821025, "grad_norm": 75.64947816826104, "learning_rate": 2.321576469127299e-06, "loss": 16.5351, "step": 49530 }, { "epoch": 0.7810430724601123, "grad_norm": 77.22218731341191, "learning_rate": 2.318372254090394e-06, "loss": 16.4403, "step": 49540 }, { "epoch": 0.781200731538122, "grad_norm": 90.97944818683415, "learning_rate": 2.315169961837621e-06, "loss": 16.833, "step": 49550 }, { "epoch": 0.7813583906161317, "grad_norm": 71.02885745397123, "learning_rate": 2.311969593170539e-06, "loss": 16.4324, "step": 49560 }, { "epoch": 0.7815160496941413, "grad_norm": 117.31447471338019, "learning_rate": 2.308771148890235e-06, "loss": 17.1316, "step": 49570 }, { "epoch": 0.7816737087721511, "grad_norm": 76.47549612036264, "learning_rate": 2.305574629797306e-06, "loss": 16.2305, "step": 49580 }, { "epoch": 0.7818313678501608, "grad_norm": 75.11860690127983, "learning_rate": 2.302380036691875e-06, "loss": 16.4968, "step": 49590 }, { "epoch": 0.7819890269281705, "grad_norm": 71.16127633263477, "learning_rate": 2.2991873703735734e-06, "loss": 16.3037, "step": 49600 }, { "epoch": 0.7821466860061802, "grad_norm": 75.75331189819408, "learning_rate": 2.2959966316415617e-06, "loss": 16.5338, "step": 49610 }, { "epoch": 0.78230434508419, "grad_norm": 73.2950585363388, "learning_rate": 2.292807821294507e-06, "loss": 16.6683, "step": 49620 }, { "epoch": 0.7824620041621997, "grad_norm": 71.6512450760452, "learning_rate": 2.2896209401306004e-06, "loss": 16.0299, "step": 49630 }, { "epoch": 0.7826196632402094, "grad_norm": 72.93063159805708, "learning_rate": 2.286435988947553e-06, "loss": 16.5899, "step": 49640 }, { "epoch": 0.7827773223182191, "grad_norm": 81.37665068322602, "learning_rate": 2.283252968542581e-06, "loss": 17.0821, "step": 49650 }, { "epoch": 0.7829349813962287, "grad_norm": 74.55726520431514, "learning_rate": 2.28007187971243e-06, "loss": 16.6768, "step": 49660 }, { "epoch": 0.7830926404742385, "grad_norm": 78.63442207478059, "learning_rate": 2.2768927232533524e-06, "loss": 16.6575, "step": 49670 }, { "epoch": 0.7832502995522482, "grad_norm": 75.57695466798889, "learning_rate": 2.2737154999611257e-06, "loss": 16.2543, "step": 49680 }, { "epoch": 0.7834079586302579, "grad_norm": 73.83063087212439, "learning_rate": 2.2705402106310337e-06, "loss": 16.3468, "step": 49690 }, { "epoch": 0.7835656177082676, "grad_norm": 73.07432379215464, "learning_rate": 2.2673668560578864e-06, "loss": 16.6839, "step": 49700 }, { "epoch": 0.7837232767862774, "grad_norm": 75.68084456326079, "learning_rate": 2.264195437036e-06, "loss": 16.9225, "step": 49710 }, { "epoch": 0.7838809358642871, "grad_norm": 70.69179165094491, "learning_rate": 2.2610259543592126e-06, "loss": 16.8458, "step": 49720 }, { "epoch": 0.7840385949422968, "grad_norm": 75.28025538821309, "learning_rate": 2.2578584088208765e-06, "loss": 16.7724, "step": 49730 }, { "epoch": 0.7841962540203065, "grad_norm": 68.39794097218632, "learning_rate": 2.2546928012138612e-06, "loss": 16.7908, "step": 49740 }, { "epoch": 0.7843539130983161, "grad_norm": 73.1525992893438, "learning_rate": 2.251529132330544e-06, "loss": 15.9475, "step": 49750 }, { "epoch": 0.7845115721763259, "grad_norm": 79.27743403587735, "learning_rate": 2.248367402962819e-06, "loss": 16.7824, "step": 49760 }, { "epoch": 0.7846692312543356, "grad_norm": 70.94692255818177, "learning_rate": 2.245207613902104e-06, "loss": 16.1703, "step": 49770 }, { "epoch": 0.7848268903323453, "grad_norm": 73.25669873785486, "learning_rate": 2.242049765939317e-06, "loss": 16.6101, "step": 49780 }, { "epoch": 0.784984549410355, "grad_norm": 73.81047508571417, "learning_rate": 2.2388938598649025e-06, "loss": 16.505, "step": 49790 }, { "epoch": 0.7851422084883648, "grad_norm": 75.65600698992479, "learning_rate": 2.2357398964688094e-06, "loss": 15.9539, "step": 49800 }, { "epoch": 0.7852998675663745, "grad_norm": 80.72340114330541, "learning_rate": 2.232587876540506e-06, "loss": 16.5564, "step": 49810 }, { "epoch": 0.7854575266443842, "grad_norm": 76.73333965195873, "learning_rate": 2.229437800868974e-06, "loss": 16.7432, "step": 49820 }, { "epoch": 0.7856151857223939, "grad_norm": 72.80526754319261, "learning_rate": 2.2262896702427094e-06, "loss": 16.3123, "step": 49830 }, { "epoch": 0.7857728448004037, "grad_norm": 71.32174383728416, "learning_rate": 2.2231434854497135e-06, "loss": 16.7853, "step": 49840 }, { "epoch": 0.7859305038784133, "grad_norm": 75.48668244530171, "learning_rate": 2.2199992472775124e-06, "loss": 16.7496, "step": 49850 }, { "epoch": 0.786088162956423, "grad_norm": 74.95864367069949, "learning_rate": 2.2168569565131313e-06, "loss": 16.3569, "step": 49860 }, { "epoch": 0.7862458220344327, "grad_norm": 78.08557349194196, "learning_rate": 2.213716613943123e-06, "loss": 16.6629, "step": 49870 }, { "epoch": 0.7864034811124424, "grad_norm": 75.40296506055127, "learning_rate": 2.210578220353541e-06, "loss": 16.9017, "step": 49880 }, { "epoch": 0.7865611401904522, "grad_norm": 73.06183803058714, "learning_rate": 2.2074417765299514e-06, "loss": 16.1955, "step": 49890 }, { "epoch": 0.7867187992684619, "grad_norm": 82.73690180634941, "learning_rate": 2.2043072832574443e-06, "loss": 16.6391, "step": 49900 }, { "epoch": 0.7868764583464716, "grad_norm": 74.57082289290166, "learning_rate": 2.201174741320604e-06, "loss": 16.7219, "step": 49910 }, { "epoch": 0.7870341174244813, "grad_norm": 72.76991885159359, "learning_rate": 2.19804415150354e-06, "loss": 16.6027, "step": 49920 }, { "epoch": 0.7871917765024911, "grad_norm": 72.67689669925323, "learning_rate": 2.1949155145898683e-06, "loss": 16.468, "step": 49930 }, { "epoch": 0.7873494355805007, "grad_norm": 73.37611331344088, "learning_rate": 2.191788831362718e-06, "loss": 16.8523, "step": 49940 }, { "epoch": 0.7875070946585104, "grad_norm": 76.04796149194398, "learning_rate": 2.188664102604724e-06, "loss": 17.2603, "step": 49950 }, { "epoch": 0.7876647537365201, "grad_norm": 72.04904247404261, "learning_rate": 2.18554132909804e-06, "loss": 15.9915, "step": 49960 }, { "epoch": 0.7878224128145299, "grad_norm": 79.36598852379957, "learning_rate": 2.1824205116243192e-06, "loss": 16.5273, "step": 49970 }, { "epoch": 0.7879800718925396, "grad_norm": 71.99743774638821, "learning_rate": 2.179301650964739e-06, "loss": 16.5927, "step": 49980 }, { "epoch": 0.7881377309705493, "grad_norm": 78.06855103345795, "learning_rate": 2.176184747899974e-06, "loss": 16.3328, "step": 49990 }, { "epoch": 0.788295390048559, "grad_norm": 71.18848559677207, "learning_rate": 2.1730698032102194e-06, "loss": 16.7883, "step": 50000 }, { "epoch": 0.7884530491265687, "grad_norm": 77.46500565080143, "learning_rate": 2.1699568176751695e-06, "loss": 16.335, "step": 50010 }, { "epoch": 0.7886107082045785, "grad_norm": 74.17652420051407, "learning_rate": 2.1668457920740373e-06, "loss": 16.3774, "step": 50020 }, { "epoch": 0.7887683672825881, "grad_norm": 75.91793004696268, "learning_rate": 2.1637367271855447e-06, "loss": 16.2911, "step": 50030 }, { "epoch": 0.7889260263605978, "grad_norm": 73.78733844790612, "learning_rate": 2.1606296237879152e-06, "loss": 16.2387, "step": 50040 }, { "epoch": 0.7890836854386075, "grad_norm": 77.51681369536456, "learning_rate": 2.15752448265889e-06, "loss": 16.4606, "step": 50050 }, { "epoch": 0.7892413445166173, "grad_norm": 79.22491868686943, "learning_rate": 2.1544213045757125e-06, "loss": 16.619, "step": 50060 }, { "epoch": 0.789399003594627, "grad_norm": 75.1435564201437, "learning_rate": 2.1513200903151398e-06, "loss": 16.4647, "step": 50070 }, { "epoch": 0.7895566626726367, "grad_norm": 76.48931018851799, "learning_rate": 2.1482208406534324e-06, "loss": 16.7902, "step": 50080 }, { "epoch": 0.7897143217506464, "grad_norm": 72.24471527585614, "learning_rate": 2.1451235563663654e-06, "loss": 16.3147, "step": 50090 }, { "epoch": 0.7898719808286561, "grad_norm": 78.66463554254757, "learning_rate": 2.1420282382292145e-06, "loss": 16.6401, "step": 50100 }, { "epoch": 0.7900296399066659, "grad_norm": 68.52290050333788, "learning_rate": 2.1389348870167683e-06, "loss": 16.4992, "step": 50110 }, { "epoch": 0.7901872989846755, "grad_norm": 76.56889406457884, "learning_rate": 2.1358435035033255e-06, "loss": 16.7918, "step": 50120 }, { "epoch": 0.7903449580626852, "grad_norm": 76.86111142282356, "learning_rate": 2.132754088462684e-06, "loss": 16.5338, "step": 50130 }, { "epoch": 0.7905026171406949, "grad_norm": 73.33518653171132, "learning_rate": 2.1296666426681566e-06, "loss": 16.1427, "step": 50140 }, { "epoch": 0.7906602762187047, "grad_norm": 73.36992480648657, "learning_rate": 2.1265811668925573e-06, "loss": 16.4064, "step": 50150 }, { "epoch": 0.7908179352967144, "grad_norm": 75.111375687019, "learning_rate": 2.123497661908215e-06, "loss": 16.3658, "step": 50160 }, { "epoch": 0.7909755943747241, "grad_norm": 77.47637395388854, "learning_rate": 2.120416128486953e-06, "loss": 16.5721, "step": 50170 }, { "epoch": 0.7911332534527338, "grad_norm": 77.41240308524641, "learning_rate": 2.1173365674001145e-06, "loss": 16.3752, "step": 50180 }, { "epoch": 0.7912909125307436, "grad_norm": 74.4899020667328, "learning_rate": 2.1142589794185387e-06, "loss": 16.3804, "step": 50190 }, { "epoch": 0.7914485716087533, "grad_norm": 75.99529749613528, "learning_rate": 2.111183365312579e-06, "loss": 16.3352, "step": 50200 }, { "epoch": 0.791606230686763, "grad_norm": 71.67170762643666, "learning_rate": 2.1081097258520857e-06, "loss": 16.5964, "step": 50210 }, { "epoch": 0.7917638897647726, "grad_norm": 77.18561643428315, "learning_rate": 2.1050380618064224e-06, "loss": 17.1964, "step": 50220 }, { "epoch": 0.7919215488427823, "grad_norm": 79.74509980059986, "learning_rate": 2.101968373944455e-06, "loss": 16.8827, "step": 50230 }, { "epoch": 0.7920792079207921, "grad_norm": 79.91755790444326, "learning_rate": 2.098900663034561e-06, "loss": 17.0197, "step": 50240 }, { "epoch": 0.7922368669988018, "grad_norm": 78.53832989539832, "learning_rate": 2.0958349298446078e-06, "loss": 15.9778, "step": 50250 }, { "epoch": 0.7923945260768115, "grad_norm": 77.0003742941144, "learning_rate": 2.0927711751419867e-06, "loss": 16.1004, "step": 50260 }, { "epoch": 0.7925521851548212, "grad_norm": 73.81599252087449, "learning_rate": 2.089709399693579e-06, "loss": 16.5571, "step": 50270 }, { "epoch": 0.792709844232831, "grad_norm": 78.20402450180008, "learning_rate": 2.0866496042657757e-06, "loss": 16.5628, "step": 50280 }, { "epoch": 0.7928675033108407, "grad_norm": 77.66841116126241, "learning_rate": 2.0835917896244763e-06, "loss": 16.4367, "step": 50290 }, { "epoch": 0.7930251623888503, "grad_norm": 74.82131593753665, "learning_rate": 2.0805359565350756e-06, "loss": 16.2729, "step": 50300 }, { "epoch": 0.79318282146686, "grad_norm": 77.36390250445868, "learning_rate": 2.0774821057624806e-06, "loss": 16.1054, "step": 50310 }, { "epoch": 0.7933404805448697, "grad_norm": 76.95433228781134, "learning_rate": 2.074430238071099e-06, "loss": 16.1315, "step": 50320 }, { "epoch": 0.7934981396228795, "grad_norm": 74.08793062093417, "learning_rate": 2.0713803542248443e-06, "loss": 16.2202, "step": 50330 }, { "epoch": 0.7936557987008892, "grad_norm": 73.3711273069228, "learning_rate": 2.0683324549871275e-06, "loss": 16.6628, "step": 50340 }, { "epoch": 0.7938134577788989, "grad_norm": 74.72527986419928, "learning_rate": 2.06528654112087e-06, "loss": 16.9029, "step": 50350 }, { "epoch": 0.7939711168569086, "grad_norm": 78.77046243952549, "learning_rate": 2.062242613388489e-06, "loss": 16.0246, "step": 50360 }, { "epoch": 0.7941287759349184, "grad_norm": 87.33272387669766, "learning_rate": 2.059200672551913e-06, "loss": 16.687, "step": 50370 }, { "epoch": 0.7942864350129281, "grad_norm": 72.89742656888947, "learning_rate": 2.056160719372563e-06, "loss": 16.2862, "step": 50380 }, { "epoch": 0.7944440940909377, "grad_norm": 75.51208240015228, "learning_rate": 2.053122754611374e-06, "loss": 16.5306, "step": 50390 }, { "epoch": 0.7946017531689474, "grad_norm": 75.8463113921979, "learning_rate": 2.0500867790287746e-06, "loss": 16.3492, "step": 50400 }, { "epoch": 0.7947594122469572, "grad_norm": 74.07515664965877, "learning_rate": 2.047052793384696e-06, "loss": 16.5282, "step": 50410 }, { "epoch": 0.7949170713249669, "grad_norm": 75.96081278056441, "learning_rate": 2.044020798438575e-06, "loss": 16.4118, "step": 50420 }, { "epoch": 0.7950747304029766, "grad_norm": 71.89530437689038, "learning_rate": 2.040990794949349e-06, "loss": 16.4514, "step": 50430 }, { "epoch": 0.7952323894809863, "grad_norm": 89.32690783727699, "learning_rate": 2.03796278367546e-06, "loss": 16.5431, "step": 50440 }, { "epoch": 0.795390048558996, "grad_norm": 74.89899397243015, "learning_rate": 2.0349367653748418e-06, "loss": 16.6499, "step": 50450 }, { "epoch": 0.7955477076370058, "grad_norm": 73.93283961640822, "learning_rate": 2.031912740804941e-06, "loss": 16.1357, "step": 50460 }, { "epoch": 0.7957053667150155, "grad_norm": 77.51902882964023, "learning_rate": 2.028890710722694e-06, "loss": 16.5308, "step": 50470 }, { "epoch": 0.7958630257930251, "grad_norm": 80.41007309128027, "learning_rate": 2.0258706758845493e-06, "loss": 16.7238, "step": 50480 }, { "epoch": 0.7960206848710348, "grad_norm": 72.69673022193405, "learning_rate": 2.022852637046445e-06, "loss": 16.3836, "step": 50490 }, { "epoch": 0.7961783439490446, "grad_norm": 83.11962671307944, "learning_rate": 2.01983659496383e-06, "loss": 16.5819, "step": 50500 }, { "epoch": 0.7963360030270543, "grad_norm": 73.47037021823692, "learning_rate": 2.0168225503916415e-06, "loss": 16.0556, "step": 50510 }, { "epoch": 0.796493662105064, "grad_norm": 70.44532930148395, "learning_rate": 2.013810504084328e-06, "loss": 16.2475, "step": 50520 }, { "epoch": 0.7966513211830737, "grad_norm": 80.8952248113774, "learning_rate": 2.010800456795835e-06, "loss": 16.4293, "step": 50530 }, { "epoch": 0.7968089802610835, "grad_norm": 70.36765249465499, "learning_rate": 2.0077924092795996e-06, "loss": 16.3646, "step": 50540 }, { "epoch": 0.7969666393390932, "grad_norm": 74.82118027273707, "learning_rate": 2.004786362288572e-06, "loss": 16.1922, "step": 50550 }, { "epoch": 0.7971242984171029, "grad_norm": 71.29685637841473, "learning_rate": 2.001782316575187e-06, "loss": 16.4167, "step": 50560 }, { "epoch": 0.7972819574951125, "grad_norm": 71.33866273625708, "learning_rate": 1.998780272891392e-06, "loss": 16.3067, "step": 50570 }, { "epoch": 0.7974396165731222, "grad_norm": 76.40953966057543, "learning_rate": 1.995780231988621e-06, "loss": 16.4482, "step": 50580 }, { "epoch": 0.797597275651132, "grad_norm": 74.19704554709263, "learning_rate": 1.9927821946178193e-06, "loss": 16.4752, "step": 50590 }, { "epoch": 0.7977549347291417, "grad_norm": 83.84030706256245, "learning_rate": 1.989786161529417e-06, "loss": 16.0772, "step": 50600 }, { "epoch": 0.7979125938071514, "grad_norm": 79.96842617291244, "learning_rate": 1.986792133473352e-06, "loss": 16.5139, "step": 50610 }, { "epoch": 0.7980702528851611, "grad_norm": 72.3214399375786, "learning_rate": 1.98380011119906e-06, "loss": 16.1659, "step": 50620 }, { "epoch": 0.7982279119631709, "grad_norm": 77.98907583678366, "learning_rate": 1.9808100954554734e-06, "loss": 16.2444, "step": 50630 }, { "epoch": 0.7983855710411806, "grad_norm": 73.0101967816525, "learning_rate": 1.977822086991017e-06, "loss": 16.0953, "step": 50640 }, { "epoch": 0.7985432301191903, "grad_norm": 74.89724655180936, "learning_rate": 1.974836086553622e-06, "loss": 16.8907, "step": 50650 }, { "epoch": 0.7987008891972, "grad_norm": 74.8128730933345, "learning_rate": 1.9718520948907083e-06, "loss": 16.5302, "step": 50660 }, { "epoch": 0.7988585482752096, "grad_norm": 73.26097352707019, "learning_rate": 1.9688701127492014e-06, "loss": 16.2215, "step": 50670 }, { "epoch": 0.7990162073532194, "grad_norm": 75.82331361231087, "learning_rate": 1.965890140875518e-06, "loss": 16.4149, "step": 50680 }, { "epoch": 0.7991738664312291, "grad_norm": 73.09517391094033, "learning_rate": 1.9629121800155714e-06, "loss": 17.165, "step": 50690 }, { "epoch": 0.7993315255092388, "grad_norm": 76.38260788351101, "learning_rate": 1.959936230914776e-06, "loss": 16.1491, "step": 50700 }, { "epoch": 0.7994891845872485, "grad_norm": 72.05876735481746, "learning_rate": 1.9569622943180366e-06, "loss": 16.1628, "step": 50710 }, { "epoch": 0.7996468436652583, "grad_norm": 68.87335640197418, "learning_rate": 1.9539903709697607e-06, "loss": 16.2199, "step": 50720 }, { "epoch": 0.799804502743268, "grad_norm": 73.14871065180608, "learning_rate": 1.9510204616138474e-06, "loss": 15.7979, "step": 50730 }, { "epoch": 0.7999621618212777, "grad_norm": 78.78002368225701, "learning_rate": 1.948052566993697e-06, "loss": 16.0677, "step": 50740 }, { "epoch": 0.8001198208992873, "grad_norm": 81.20176380135246, "learning_rate": 1.9450866878521955e-06, "loss": 16.5907, "step": 50750 }, { "epoch": 0.8002774799772971, "grad_norm": 79.65415398851583, "learning_rate": 1.942122824931737e-06, "loss": 16.5404, "step": 50760 }, { "epoch": 0.8004351390553068, "grad_norm": 80.19948662511108, "learning_rate": 1.9391609789741973e-06, "loss": 16.1002, "step": 50770 }, { "epoch": 0.8005927981333165, "grad_norm": 78.76793913190802, "learning_rate": 1.936201150720962e-06, "loss": 15.9714, "step": 50780 }, { "epoch": 0.8007504572113262, "grad_norm": 75.29032788377852, "learning_rate": 1.933243340912897e-06, "loss": 15.8552, "step": 50790 }, { "epoch": 0.8009081162893359, "grad_norm": 77.44364094510968, "learning_rate": 1.930287550290375e-06, "loss": 16.4672, "step": 50800 }, { "epoch": 0.8010657753673457, "grad_norm": 73.74799620564494, "learning_rate": 1.9273337795932555e-06, "loss": 15.7478, "step": 50810 }, { "epoch": 0.8012234344453554, "grad_norm": 75.61875594357103, "learning_rate": 1.924382029560896e-06, "loss": 16.3993, "step": 50820 }, { "epoch": 0.8013810935233651, "grad_norm": 71.61591105255825, "learning_rate": 1.9214323009321502e-06, "loss": 16.3074, "step": 50830 }, { "epoch": 0.8015387526013747, "grad_norm": 76.51302341820708, "learning_rate": 1.918484594445359e-06, "loss": 16.7521, "step": 50840 }, { "epoch": 0.8016964116793845, "grad_norm": 74.94829569454551, "learning_rate": 1.915538910838365e-06, "loss": 16.2828, "step": 50850 }, { "epoch": 0.8018540707573942, "grad_norm": 119.38575217997865, "learning_rate": 1.912595250848496e-06, "loss": 16.6914, "step": 50860 }, { "epoch": 0.8020117298354039, "grad_norm": 74.3952658933275, "learning_rate": 1.9096536152125846e-06, "loss": 16.3217, "step": 50870 }, { "epoch": 0.8021693889134136, "grad_norm": 74.8603186634588, "learning_rate": 1.906714004666942e-06, "loss": 16.1981, "step": 50880 }, { "epoch": 0.8023270479914233, "grad_norm": 72.78652268422631, "learning_rate": 1.9037764199473886e-06, "loss": 17.0866, "step": 50890 }, { "epoch": 0.8024847070694331, "grad_norm": 74.52291803052799, "learning_rate": 1.9008408617892216e-06, "loss": 16.6902, "step": 50900 }, { "epoch": 0.8026423661474428, "grad_norm": 73.60958115049253, "learning_rate": 1.8979073309272467e-06, "loss": 15.8961, "step": 50910 }, { "epoch": 0.8028000252254525, "grad_norm": 78.2573918517348, "learning_rate": 1.8949758280957487e-06, "loss": 16.3541, "step": 50920 }, { "epoch": 0.8029576843034621, "grad_norm": 73.19060628887716, "learning_rate": 1.8920463540285127e-06, "loss": 16.3548, "step": 50930 }, { "epoch": 0.803115343381472, "grad_norm": 73.09361551380944, "learning_rate": 1.8891189094588159e-06, "loss": 15.9187, "step": 50940 }, { "epoch": 0.8032730024594816, "grad_norm": 69.0307082769917, "learning_rate": 1.8861934951194205e-06, "loss": 15.869, "step": 50950 }, { "epoch": 0.8034306615374913, "grad_norm": 71.54004184825035, "learning_rate": 1.8832701117425912e-06, "loss": 15.7291, "step": 50960 }, { "epoch": 0.803588320615501, "grad_norm": 74.42127102122511, "learning_rate": 1.8803487600600723e-06, "loss": 16.0453, "step": 50970 }, { "epoch": 0.8037459796935108, "grad_norm": 78.27122723048362, "learning_rate": 1.8774294408031125e-06, "loss": 16.4175, "step": 50980 }, { "epoch": 0.8039036387715205, "grad_norm": 68.11687787804209, "learning_rate": 1.8745121547024381e-06, "loss": 16.1356, "step": 50990 }, { "epoch": 0.8040612978495302, "grad_norm": 73.39106297384636, "learning_rate": 1.8715969024882808e-06, "loss": 16.1194, "step": 51000 }, { "epoch": 0.8042189569275399, "grad_norm": 75.51350396055328, "learning_rate": 1.8686836848903478e-06, "loss": 16.694, "step": 51010 }, { "epoch": 0.8043766160055495, "grad_norm": 73.79565164088244, "learning_rate": 1.8657725026378492e-06, "loss": 16.0635, "step": 51020 }, { "epoch": 0.8045342750835593, "grad_norm": 73.26553213961697, "learning_rate": 1.8628633564594822e-06, "loss": 16.2398, "step": 51030 }, { "epoch": 0.804691934161569, "grad_norm": 76.68029838917946, "learning_rate": 1.8599562470834342e-06, "loss": 16.3296, "step": 51040 }, { "epoch": 0.8048495932395787, "grad_norm": 95.85631500687784, "learning_rate": 1.85705117523738e-06, "loss": 16.2613, "step": 51050 }, { "epoch": 0.8050072523175884, "grad_norm": 72.71795889972698, "learning_rate": 1.8541481416484886e-06, "loss": 16.1358, "step": 51060 }, { "epoch": 0.8051649113955982, "grad_norm": 72.64541254198586, "learning_rate": 1.8512471470434157e-06, "loss": 16.0989, "step": 51070 }, { "epoch": 0.8053225704736079, "grad_norm": 74.91156122598795, "learning_rate": 1.848348192148305e-06, "loss": 16.2698, "step": 51080 }, { "epoch": 0.8054802295516176, "grad_norm": 74.72019990672597, "learning_rate": 1.8454512776887978e-06, "loss": 16.6488, "step": 51090 }, { "epoch": 0.8056378886296273, "grad_norm": 72.77360298563782, "learning_rate": 1.8425564043900135e-06, "loss": 16.2321, "step": 51100 }, { "epoch": 0.8057955477076371, "grad_norm": 70.1399866242977, "learning_rate": 1.8396635729765688e-06, "loss": 15.7471, "step": 51110 }, { "epoch": 0.8059532067856467, "grad_norm": 76.10082342353526, "learning_rate": 1.836772784172567e-06, "loss": 16.5619, "step": 51120 }, { "epoch": 0.8061108658636564, "grad_norm": 71.19713128677908, "learning_rate": 1.8338840387016032e-06, "loss": 16.4066, "step": 51130 }, { "epoch": 0.8062685249416661, "grad_norm": 72.53758771622907, "learning_rate": 1.8309973372867517e-06, "loss": 16.1976, "step": 51140 }, { "epoch": 0.8064261840196758, "grad_norm": 74.29744321705635, "learning_rate": 1.8281126806505867e-06, "loss": 16.6126, "step": 51150 }, { "epoch": 0.8065838430976856, "grad_norm": 72.30383700114353, "learning_rate": 1.8252300695151593e-06, "loss": 15.8144, "step": 51160 }, { "epoch": 0.8067415021756953, "grad_norm": 77.67794094396118, "learning_rate": 1.82234950460202e-06, "loss": 16.5118, "step": 51170 }, { "epoch": 0.806899161253705, "grad_norm": 74.85642928952875, "learning_rate": 1.819470986632197e-06, "loss": 16.6706, "step": 51180 }, { "epoch": 0.8070568203317147, "grad_norm": 72.59028422448914, "learning_rate": 1.8165945163262143e-06, "loss": 16.1384, "step": 51190 }, { "epoch": 0.8072144794097245, "grad_norm": 77.08953953942901, "learning_rate": 1.8137200944040778e-06, "loss": 16.4768, "step": 51200 }, { "epoch": 0.8073721384877341, "grad_norm": 74.1080112840133, "learning_rate": 1.8108477215852804e-06, "loss": 16.0263, "step": 51210 }, { "epoch": 0.8075297975657438, "grad_norm": 76.19023708154984, "learning_rate": 1.8079773985888061e-06, "loss": 16.1172, "step": 51220 }, { "epoch": 0.8076874566437535, "grad_norm": 75.36192941107808, "learning_rate": 1.8051091261331243e-06, "loss": 16.5263, "step": 51230 }, { "epoch": 0.8078451157217632, "grad_norm": 71.96886932741641, "learning_rate": 1.8022429049361934e-06, "loss": 16.0201, "step": 51240 }, { "epoch": 0.808002774799773, "grad_norm": 72.855485844855, "learning_rate": 1.7993787357154501e-06, "loss": 16.0467, "step": 51250 }, { "epoch": 0.8081604338777827, "grad_norm": 74.55347452116156, "learning_rate": 1.796516619187828e-06, "loss": 16.5331, "step": 51260 }, { "epoch": 0.8083180929557924, "grad_norm": 75.78124942117981, "learning_rate": 1.793656556069737e-06, "loss": 15.989, "step": 51270 }, { "epoch": 0.8084757520338021, "grad_norm": 72.2071571831185, "learning_rate": 1.7907985470770838e-06, "loss": 15.7725, "step": 51280 }, { "epoch": 0.8086334111118119, "grad_norm": 82.63505784636207, "learning_rate": 1.7879425929252492e-06, "loss": 16.3147, "step": 51290 }, { "epoch": 0.8087910701898215, "grad_norm": 74.18943122625079, "learning_rate": 1.7850886943291112e-06, "loss": 16.0159, "step": 51300 }, { "epoch": 0.8089487292678312, "grad_norm": 69.50060003844426, "learning_rate": 1.782236852003022e-06, "loss": 15.9154, "step": 51310 }, { "epoch": 0.8091063883458409, "grad_norm": 73.12672320199846, "learning_rate": 1.7793870666608282e-06, "loss": 16.1158, "step": 51320 }, { "epoch": 0.8092640474238507, "grad_norm": 76.81365092405056, "learning_rate": 1.7765393390158602e-06, "loss": 16.5744, "step": 51330 }, { "epoch": 0.8094217065018604, "grad_norm": 76.28178788956328, "learning_rate": 1.7736936697809248e-06, "loss": 16.2883, "step": 51340 }, { "epoch": 0.8095793655798701, "grad_norm": 69.96695237361044, "learning_rate": 1.7708500596683264e-06, "loss": 16.0228, "step": 51350 }, { "epoch": 0.8097370246578798, "grad_norm": 125.23321522028164, "learning_rate": 1.7680085093898426e-06, "loss": 16.5846, "step": 51360 }, { "epoch": 0.8098946837358895, "grad_norm": 78.63400641103871, "learning_rate": 1.7651690196567438e-06, "loss": 16.2065, "step": 51370 }, { "epoch": 0.8100523428138993, "grad_norm": 72.8373566952641, "learning_rate": 1.762331591179779e-06, "loss": 15.9836, "step": 51380 }, { "epoch": 0.810210001891909, "grad_norm": 81.5869663160512, "learning_rate": 1.759496224669185e-06, "loss": 15.8865, "step": 51390 }, { "epoch": 0.8103676609699186, "grad_norm": 75.0071953685424, "learning_rate": 1.7566629208346786e-06, "loss": 15.9402, "step": 51400 }, { "epoch": 0.8105253200479283, "grad_norm": 79.83848023163857, "learning_rate": 1.7538316803854627e-06, "loss": 15.9624, "step": 51410 }, { "epoch": 0.8106829791259381, "grad_norm": 74.7927313827252, "learning_rate": 1.751002504030226e-06, "loss": 16.3904, "step": 51420 }, { "epoch": 0.8108406382039478, "grad_norm": 77.10660450664143, "learning_rate": 1.7481753924771393e-06, "loss": 16.3387, "step": 51430 }, { "epoch": 0.8109982972819575, "grad_norm": 81.04349725956628, "learning_rate": 1.7453503464338506e-06, "loss": 16.3255, "step": 51440 }, { "epoch": 0.8111559563599672, "grad_norm": 70.9839966454378, "learning_rate": 1.7425273666075005e-06, "loss": 16.7066, "step": 51450 }, { "epoch": 0.8113136154379769, "grad_norm": 71.78947125371336, "learning_rate": 1.7397064537047048e-06, "loss": 16.2682, "step": 51460 }, { "epoch": 0.8114712745159867, "grad_norm": 75.6034576227708, "learning_rate": 1.7368876084315633e-06, "loss": 16.331, "step": 51470 }, { "epoch": 0.8116289335939963, "grad_norm": 73.89818616176387, "learning_rate": 1.734070831493664e-06, "loss": 16.0774, "step": 51480 }, { "epoch": 0.811786592672006, "grad_norm": 80.0219923006098, "learning_rate": 1.7312561235960668e-06, "loss": 16.5404, "step": 51490 }, { "epoch": 0.8119442517500157, "grad_norm": 77.93222429647109, "learning_rate": 1.7284434854433264e-06, "loss": 16.8057, "step": 51500 }, { "epoch": 0.8121019108280255, "grad_norm": 71.58095331613332, "learning_rate": 1.725632917739467e-06, "loss": 15.4248, "step": 51510 }, { "epoch": 0.8122595699060352, "grad_norm": 75.83641185528163, "learning_rate": 1.722824421188003e-06, "loss": 15.8965, "step": 51520 }, { "epoch": 0.8124172289840449, "grad_norm": 74.16679326800572, "learning_rate": 1.7200179964919272e-06, "loss": 16.3374, "step": 51530 }, { "epoch": 0.8125748880620546, "grad_norm": 75.5557629245846, "learning_rate": 1.717213644353717e-06, "loss": 16.5081, "step": 51540 }, { "epoch": 0.8127325471400644, "grad_norm": 75.36810476837204, "learning_rate": 1.7144113654753236e-06, "loss": 16.0731, "step": 51550 }, { "epoch": 0.8128902062180741, "grad_norm": 75.24215389171617, "learning_rate": 1.7116111605581887e-06, "loss": 16.4126, "step": 51560 }, { "epoch": 0.8130478652960837, "grad_norm": 71.97185178366921, "learning_rate": 1.7088130303032246e-06, "loss": 16.0819, "step": 51570 }, { "epoch": 0.8132055243740934, "grad_norm": 75.51629827000473, "learning_rate": 1.7060169754108359e-06, "loss": 15.9333, "step": 51580 }, { "epoch": 0.8133631834521031, "grad_norm": 74.2035499935769, "learning_rate": 1.7032229965808978e-06, "loss": 16.0776, "step": 51590 }, { "epoch": 0.8135208425301129, "grad_norm": 83.12402560520746, "learning_rate": 1.700431094512769e-06, "loss": 16.2787, "step": 51600 }, { "epoch": 0.8136785016081226, "grad_norm": 69.06200794625497, "learning_rate": 1.697641269905289e-06, "loss": 15.5403, "step": 51610 }, { "epoch": 0.8138361606861323, "grad_norm": 75.05509492358617, "learning_rate": 1.6948535234567798e-06, "loss": 16.3048, "step": 51620 }, { "epoch": 0.813993819764142, "grad_norm": 75.32654409295225, "learning_rate": 1.6920678558650415e-06, "loss": 16.1578, "step": 51630 }, { "epoch": 0.8141514788421518, "grad_norm": 73.72065595440965, "learning_rate": 1.689284267827348e-06, "loss": 16.4539, "step": 51640 }, { "epoch": 0.8143091379201615, "grad_norm": 72.75564103702455, "learning_rate": 1.6865027600404648e-06, "loss": 16.3708, "step": 51650 }, { "epoch": 0.8144667969981711, "grad_norm": 74.22142454310439, "learning_rate": 1.683723333200622e-06, "loss": 15.4301, "step": 51660 }, { "epoch": 0.8146244560761808, "grad_norm": 69.1539729336623, "learning_rate": 1.6809459880035427e-06, "loss": 15.5996, "step": 51670 }, { "epoch": 0.8147821151541906, "grad_norm": 72.03425208412071, "learning_rate": 1.678170725144418e-06, "loss": 15.7079, "step": 51680 }, { "epoch": 0.8149397742322003, "grad_norm": 77.8997280921837, "learning_rate": 1.6753975453179272e-06, "loss": 16.3155, "step": 51690 }, { "epoch": 0.81509743331021, "grad_norm": 71.11660636622202, "learning_rate": 1.6726264492182188e-06, "loss": 16.5187, "step": 51700 }, { "epoch": 0.8152550923882197, "grad_norm": 70.31181350835347, "learning_rate": 1.6698574375389276e-06, "loss": 15.6943, "step": 51710 }, { "epoch": 0.8154127514662294, "grad_norm": 75.25055208608399, "learning_rate": 1.6670905109731606e-06, "loss": 16.1926, "step": 51720 }, { "epoch": 0.8155704105442392, "grad_norm": 74.80240512002656, "learning_rate": 1.664325670213507e-06, "loss": 16.1772, "step": 51730 }, { "epoch": 0.8157280696222489, "grad_norm": 75.11385807584628, "learning_rate": 1.6615629159520353e-06, "loss": 16.2262, "step": 51740 }, { "epoch": 0.8158857287002586, "grad_norm": 76.48438446342377, "learning_rate": 1.6588022488802847e-06, "loss": 16.0763, "step": 51750 }, { "epoch": 0.8160433877782682, "grad_norm": 70.3967784362338, "learning_rate": 1.65604366968928e-06, "loss": 16.0999, "step": 51760 }, { "epoch": 0.816201046856278, "grad_norm": 74.19608232803012, "learning_rate": 1.6532871790695171e-06, "loss": 16.9695, "step": 51770 }, { "epoch": 0.8163587059342877, "grad_norm": 73.124018074832, "learning_rate": 1.6505327777109737e-06, "loss": 16.1356, "step": 51780 }, { "epoch": 0.8165163650122974, "grad_norm": 77.09137305117135, "learning_rate": 1.6477804663030993e-06, "loss": 15.7734, "step": 51790 }, { "epoch": 0.8166740240903071, "grad_norm": 76.24075884439154, "learning_rate": 1.6450302455348277e-06, "loss": 16.2448, "step": 51800 }, { "epoch": 0.8168316831683168, "grad_norm": 75.68088869799614, "learning_rate": 1.6422821160945611e-06, "loss": 15.9448, "step": 51810 }, { "epoch": 0.8169893422463266, "grad_norm": 75.20236047992667, "learning_rate": 1.6395360786701843e-06, "loss": 16.7469, "step": 51820 }, { "epoch": 0.8171470013243363, "grad_norm": 74.7381744460634, "learning_rate": 1.6367921339490567e-06, "loss": 15.8802, "step": 51830 }, { "epoch": 0.817304660402346, "grad_norm": 76.91325938399214, "learning_rate": 1.634050282618015e-06, "loss": 16.4133, "step": 51840 }, { "epoch": 0.8174623194803556, "grad_norm": 76.48076060332869, "learning_rate": 1.6313105253633665e-06, "loss": 16.2241, "step": 51850 }, { "epoch": 0.8176199785583654, "grad_norm": 67.54468473614207, "learning_rate": 1.628572862870904e-06, "loss": 16.2299, "step": 51860 }, { "epoch": 0.8177776376363751, "grad_norm": 75.52116263218016, "learning_rate": 1.6258372958258872e-06, "loss": 15.8078, "step": 51870 }, { "epoch": 0.8179352967143848, "grad_norm": 79.02712456873958, "learning_rate": 1.6231038249130516e-06, "loss": 15.8309, "step": 51880 }, { "epoch": 0.8180929557923945, "grad_norm": 70.99727388682152, "learning_rate": 1.6203724508166151e-06, "loss": 16.1812, "step": 51890 }, { "epoch": 0.8182506148704043, "grad_norm": 73.26735700875001, "learning_rate": 1.6176431742202636e-06, "loss": 16.0212, "step": 51900 }, { "epoch": 0.818408273948414, "grad_norm": 74.07043124314534, "learning_rate": 1.6149159958071626e-06, "loss": 15.998, "step": 51910 }, { "epoch": 0.8185659330264237, "grad_norm": 75.52942887083098, "learning_rate": 1.6121909162599492e-06, "loss": 16.2201, "step": 51920 }, { "epoch": 0.8187235921044334, "grad_norm": 78.15930094375602, "learning_rate": 1.6094679362607402e-06, "loss": 16.4878, "step": 51930 }, { "epoch": 0.818881251182443, "grad_norm": 72.0992461801975, "learning_rate": 1.6067470564911193e-06, "loss": 16.0983, "step": 51940 }, { "epoch": 0.8190389102604528, "grad_norm": 73.82721715475272, "learning_rate": 1.6040282776321515e-06, "loss": 16.9296, "step": 51950 }, { "epoch": 0.8191965693384625, "grad_norm": 73.19914281100448, "learning_rate": 1.6013116003643692e-06, "loss": 16.1207, "step": 51960 }, { "epoch": 0.8193542284164722, "grad_norm": 76.09424465254831, "learning_rate": 1.5985970253677874e-06, "loss": 15.9214, "step": 51970 }, { "epoch": 0.8195118874944819, "grad_norm": 76.07426538540311, "learning_rate": 1.5958845533218848e-06, "loss": 16.4111, "step": 51980 }, { "epoch": 0.8196695465724917, "grad_norm": 76.74879051391903, "learning_rate": 1.5931741849056226e-06, "loss": 15.9218, "step": 51990 }, { "epoch": 0.8198272056505014, "grad_norm": 75.90110410847986, "learning_rate": 1.5904659207974304e-06, "loss": 16.1884, "step": 52000 }, { "epoch": 0.8199848647285111, "grad_norm": 76.9691044769542, "learning_rate": 1.5877597616752105e-06, "loss": 15.3017, "step": 52010 }, { "epoch": 0.8201425238065208, "grad_norm": 74.34234978414638, "learning_rate": 1.5850557082163408e-06, "loss": 15.6856, "step": 52020 }, { "epoch": 0.8203001828845304, "grad_norm": 68.7668290085449, "learning_rate": 1.5823537610976724e-06, "loss": 15.7466, "step": 52030 }, { "epoch": 0.8204578419625402, "grad_norm": 78.34711091648154, "learning_rate": 1.579653920995532e-06, "loss": 16.0793, "step": 52040 }, { "epoch": 0.8206155010405499, "grad_norm": 87.27079542687983, "learning_rate": 1.5769561885857076e-06, "loss": 16.3092, "step": 52050 }, { "epoch": 0.8207731601185596, "grad_norm": 71.90437263717058, "learning_rate": 1.574260564543474e-06, "loss": 15.8016, "step": 52060 }, { "epoch": 0.8209308191965693, "grad_norm": 79.83902855355502, "learning_rate": 1.5715670495435653e-06, "loss": 15.888, "step": 52070 }, { "epoch": 0.8210884782745791, "grad_norm": 78.64999981682116, "learning_rate": 1.5688756442601993e-06, "loss": 15.9319, "step": 52080 }, { "epoch": 0.8212461373525888, "grad_norm": 76.79573784212305, "learning_rate": 1.5661863493670559e-06, "loss": 16.2582, "step": 52090 }, { "epoch": 0.8214037964305985, "grad_norm": 76.24053068527974, "learning_rate": 1.5634991655372954e-06, "loss": 15.6366, "step": 52100 }, { "epoch": 0.8215614555086082, "grad_norm": 74.24011354603844, "learning_rate": 1.56081409344354e-06, "loss": 16.037, "step": 52110 }, { "epoch": 0.821719114586618, "grad_norm": 71.55352814959508, "learning_rate": 1.558131133757892e-06, "loss": 16.1932, "step": 52120 }, { "epoch": 0.8218767736646276, "grad_norm": 74.32962010768996, "learning_rate": 1.5554502871519227e-06, "loss": 15.9673, "step": 52130 }, { "epoch": 0.8220344327426373, "grad_norm": 72.30710498281957, "learning_rate": 1.5527715542966705e-06, "loss": 16.4106, "step": 52140 }, { "epoch": 0.822192091820647, "grad_norm": 74.27643621744508, "learning_rate": 1.5500949358626505e-06, "loss": 15.7904, "step": 52150 }, { "epoch": 0.8223497508986567, "grad_norm": 79.20349793830151, "learning_rate": 1.5474204325198417e-06, "loss": 15.7964, "step": 52160 }, { "epoch": 0.8225074099766665, "grad_norm": 76.6951190273259, "learning_rate": 1.5447480449377028e-06, "loss": 15.9902, "step": 52170 }, { "epoch": 0.8226650690546762, "grad_norm": 69.89149742957143, "learning_rate": 1.5420777737851523e-06, "loss": 16.0271, "step": 52180 }, { "epoch": 0.8228227281326859, "grad_norm": 73.66485513113322, "learning_rate": 1.5394096197305897e-06, "loss": 16.8987, "step": 52190 }, { "epoch": 0.8229803872106956, "grad_norm": 84.01844910812517, "learning_rate": 1.5367435834418732e-06, "loss": 16.2988, "step": 52200 }, { "epoch": 0.8231380462887053, "grad_norm": 75.59231594931255, "learning_rate": 1.53407966558634e-06, "loss": 16.462, "step": 52210 }, { "epoch": 0.823295705366715, "grad_norm": 77.47765856309219, "learning_rate": 1.5314178668307977e-06, "loss": 16.4231, "step": 52220 }, { "epoch": 0.8234533644447247, "grad_norm": 72.02808287986923, "learning_rate": 1.5287581878415125e-06, "loss": 15.9564, "step": 52230 }, { "epoch": 0.8236110235227344, "grad_norm": 73.56678802483576, "learning_rate": 1.5261006292842307e-06, "loss": 15.6404, "step": 52240 }, { "epoch": 0.8237686826007441, "grad_norm": 74.2723214280383, "learning_rate": 1.5234451918241677e-06, "loss": 16.4214, "step": 52250 }, { "epoch": 0.8239263416787539, "grad_norm": 75.38302518084936, "learning_rate": 1.5207918761260009e-06, "loss": 15.7813, "step": 52260 }, { "epoch": 0.8240840007567636, "grad_norm": 79.25406915437027, "learning_rate": 1.5181406828538781e-06, "loss": 16.719, "step": 52270 }, { "epoch": 0.8242416598347733, "grad_norm": 78.79442258330137, "learning_rate": 1.5154916126714237e-06, "loss": 15.8528, "step": 52280 }, { "epoch": 0.824399318912783, "grad_norm": 75.35811324570939, "learning_rate": 1.5128446662417207e-06, "loss": 16.0296, "step": 52290 }, { "epoch": 0.8245569779907927, "grad_norm": 83.23131176907256, "learning_rate": 1.510199844227327e-06, "loss": 15.9256, "step": 52300 }, { "epoch": 0.8247146370688024, "grad_norm": 74.83875429753361, "learning_rate": 1.5075571472902639e-06, "loss": 16.0047, "step": 52310 }, { "epoch": 0.8248722961468121, "grad_norm": 76.30796458887096, "learning_rate": 1.5049165760920259e-06, "loss": 15.8967, "step": 52320 }, { "epoch": 0.8250299552248218, "grad_norm": 78.12942592863689, "learning_rate": 1.5022781312935719e-06, "loss": 15.9231, "step": 52330 }, { "epoch": 0.8251876143028316, "grad_norm": 72.00639459233545, "learning_rate": 1.4996418135553336e-06, "loss": 15.688, "step": 52340 }, { "epoch": 0.8253452733808413, "grad_norm": 72.16419231778097, "learning_rate": 1.4970076235372e-06, "loss": 16.0295, "step": 52350 }, { "epoch": 0.825502932458851, "grad_norm": 74.146938966771, "learning_rate": 1.4943755618985379e-06, "loss": 15.9185, "step": 52360 }, { "epoch": 0.8256605915368607, "grad_norm": 74.07611463707309, "learning_rate": 1.4917456292981747e-06, "loss": 16.0132, "step": 52370 }, { "epoch": 0.8258182506148704, "grad_norm": 69.7817905237141, "learning_rate": 1.4891178263944117e-06, "loss": 16.4862, "step": 52380 }, { "epoch": 0.8259759096928802, "grad_norm": 73.75700132368294, "learning_rate": 1.4864921538450084e-06, "loss": 16.1293, "step": 52390 }, { "epoch": 0.8261335687708898, "grad_norm": 71.77548130940184, "learning_rate": 1.4838686123071955e-06, "loss": 15.7284, "step": 52400 }, { "epoch": 0.8262912278488995, "grad_norm": 72.06824820055445, "learning_rate": 1.4812472024376713e-06, "loss": 16.1107, "step": 52410 }, { "epoch": 0.8264488869269092, "grad_norm": 70.54624901871706, "learning_rate": 1.4786279248926006e-06, "loss": 15.9492, "step": 52420 }, { "epoch": 0.826606546004919, "grad_norm": 82.73770206424686, "learning_rate": 1.4760107803276146e-06, "loss": 15.8273, "step": 52430 }, { "epoch": 0.8267642050829287, "grad_norm": 78.32068792602841, "learning_rate": 1.4733957693978052e-06, "loss": 16.0931, "step": 52440 }, { "epoch": 0.8269218641609384, "grad_norm": 70.11561629445411, "learning_rate": 1.4707828927577383e-06, "loss": 16.353, "step": 52450 }, { "epoch": 0.8270795232389481, "grad_norm": 73.95533922779055, "learning_rate": 1.4681721510614388e-06, "loss": 16.5588, "step": 52460 }, { "epoch": 0.8272371823169579, "grad_norm": 79.85670352079916, "learning_rate": 1.4655635449624027e-06, "loss": 16.1816, "step": 52470 }, { "epoch": 0.8273948413949676, "grad_norm": 74.28627640540599, "learning_rate": 1.4629570751135857e-06, "loss": 16.2726, "step": 52480 }, { "epoch": 0.8275525004729772, "grad_norm": 70.90858495353923, "learning_rate": 1.4603527421674158e-06, "loss": 16.0743, "step": 52490 }, { "epoch": 0.8277101595509869, "grad_norm": 74.4027351007355, "learning_rate": 1.457750546775777e-06, "loss": 16.0246, "step": 52500 }, { "epoch": 0.8278678186289966, "grad_norm": 73.6490444808392, "learning_rate": 1.4551504895900293e-06, "loss": 15.8457, "step": 52510 }, { "epoch": 0.8280254777070064, "grad_norm": 70.82174978969992, "learning_rate": 1.4525525712609855e-06, "loss": 15.9585, "step": 52520 }, { "epoch": 0.8281831367850161, "grad_norm": 74.46933381901101, "learning_rate": 1.4499567924389323e-06, "loss": 15.9398, "step": 52530 }, { "epoch": 0.8283407958630258, "grad_norm": 72.85800482991762, "learning_rate": 1.447363153773621e-06, "loss": 15.8603, "step": 52540 }, { "epoch": 0.8284984549410355, "grad_norm": 76.11730922372742, "learning_rate": 1.4447716559142576e-06, "loss": 16.0235, "step": 52550 }, { "epoch": 0.8286561140190453, "grad_norm": 71.41168992877476, "learning_rate": 1.4421822995095236e-06, "loss": 15.8717, "step": 52560 }, { "epoch": 0.828813773097055, "grad_norm": 71.22061751468621, "learning_rate": 1.4395950852075557e-06, "loss": 15.6203, "step": 52570 }, { "epoch": 0.8289714321750646, "grad_norm": 69.78488062104256, "learning_rate": 1.4370100136559618e-06, "loss": 15.7865, "step": 52580 }, { "epoch": 0.8291290912530743, "grad_norm": 76.78996543040168, "learning_rate": 1.4344270855018061e-06, "loss": 16.192, "step": 52590 }, { "epoch": 0.829286750331084, "grad_norm": 72.22067836508859, "learning_rate": 1.4318463013916229e-06, "loss": 15.9304, "step": 52600 }, { "epoch": 0.8294444094090938, "grad_norm": 72.38980769329541, "learning_rate": 1.4292676619714042e-06, "loss": 16.0996, "step": 52610 }, { "epoch": 0.8296020684871035, "grad_norm": 74.53199750263417, "learning_rate": 1.4266911678866092e-06, "loss": 16.1493, "step": 52620 }, { "epoch": 0.8297597275651132, "grad_norm": 82.97176692564832, "learning_rate": 1.4241168197821587e-06, "loss": 16.2154, "step": 52630 }, { "epoch": 0.8299173866431229, "grad_norm": 79.05539489479742, "learning_rate": 1.4215446183024383e-06, "loss": 15.7697, "step": 52640 }, { "epoch": 0.8300750457211327, "grad_norm": 75.94697914040023, "learning_rate": 1.4189745640912934e-06, "loss": 16.603, "step": 52650 }, { "epoch": 0.8302327047991424, "grad_norm": 75.78425583186034, "learning_rate": 1.4164066577920288e-06, "loss": 15.7644, "step": 52660 }, { "epoch": 0.830390363877152, "grad_norm": 83.26830567322725, "learning_rate": 1.4138409000474229e-06, "loss": 15.8494, "step": 52670 }, { "epoch": 0.8305480229551617, "grad_norm": 73.56080654962766, "learning_rate": 1.4112772914997018e-06, "loss": 15.8534, "step": 52680 }, { "epoch": 0.8307056820331715, "grad_norm": 71.76014097654655, "learning_rate": 1.408715832790567e-06, "loss": 15.777, "step": 52690 }, { "epoch": 0.8308633411111812, "grad_norm": 72.40364658893915, "learning_rate": 1.4061565245611708e-06, "loss": 16.22, "step": 52700 }, { "epoch": 0.8310210001891909, "grad_norm": 74.87126935479101, "learning_rate": 1.4035993674521353e-06, "loss": 16.0891, "step": 52710 }, { "epoch": 0.8311786592672006, "grad_norm": 72.70273743793364, "learning_rate": 1.4010443621035409e-06, "loss": 16.238, "step": 52720 }, { "epoch": 0.8313363183452103, "grad_norm": 78.20048975016377, "learning_rate": 1.398491509154931e-06, "loss": 16.2463, "step": 52730 }, { "epoch": 0.8314939774232201, "grad_norm": 70.36519262964121, "learning_rate": 1.3959408092453054e-06, "loss": 16.1951, "step": 52740 }, { "epoch": 0.8316516365012298, "grad_norm": 72.16904408234312, "learning_rate": 1.3933922630131325e-06, "loss": 16.072, "step": 52750 }, { "epoch": 0.8318092955792394, "grad_norm": 72.0824567967617, "learning_rate": 1.3908458710963324e-06, "loss": 15.5216, "step": 52760 }, { "epoch": 0.8319669546572491, "grad_norm": 76.93471627446203, "learning_rate": 1.3883016341322963e-06, "loss": 16.2829, "step": 52770 }, { "epoch": 0.8321246137352589, "grad_norm": 72.81354781210173, "learning_rate": 1.3857595527578693e-06, "loss": 15.7505, "step": 52780 }, { "epoch": 0.8322822728132686, "grad_norm": 69.42458281670984, "learning_rate": 1.3832196276093545e-06, "loss": 16.2331, "step": 52790 }, { "epoch": 0.8324399318912783, "grad_norm": 73.75893249373397, "learning_rate": 1.3806818593225257e-06, "loss": 15.752, "step": 52800 }, { "epoch": 0.832597590969288, "grad_norm": 73.96129671343697, "learning_rate": 1.3781462485326036e-06, "loss": 16.4096, "step": 52810 }, { "epoch": 0.8327552500472977, "grad_norm": 70.98244395274803, "learning_rate": 1.3756127958742794e-06, "loss": 15.503, "step": 52820 }, { "epoch": 0.8329129091253075, "grad_norm": 69.78370474099546, "learning_rate": 1.373081501981699e-06, "loss": 16.0191, "step": 52830 }, { "epoch": 0.8330705682033172, "grad_norm": 71.25775468278242, "learning_rate": 1.3705523674884735e-06, "loss": 16.1426, "step": 52840 }, { "epoch": 0.8332282272813268, "grad_norm": 72.98705262312264, "learning_rate": 1.3680253930276632e-06, "loss": 15.9655, "step": 52850 }, { "epoch": 0.8333858863593365, "grad_norm": 70.6616117981318, "learning_rate": 1.3655005792317977e-06, "loss": 15.4827, "step": 52860 }, { "epoch": 0.8335435454373463, "grad_norm": 75.1132544623044, "learning_rate": 1.3629779267328591e-06, "loss": 15.8704, "step": 52870 }, { "epoch": 0.833701204515356, "grad_norm": 74.48776580674952, "learning_rate": 1.3604574361622935e-06, "loss": 15.8393, "step": 52880 }, { "epoch": 0.8338588635933657, "grad_norm": 86.42947379534561, "learning_rate": 1.3579391081510007e-06, "loss": 16.2251, "step": 52890 }, { "epoch": 0.8340165226713754, "grad_norm": 72.47094758213967, "learning_rate": 1.3554229433293464e-06, "loss": 16.3812, "step": 52900 }, { "epoch": 0.8341741817493852, "grad_norm": 73.10895412350632, "learning_rate": 1.3529089423271458e-06, "loss": 15.9996, "step": 52910 }, { "epoch": 0.8343318408273949, "grad_norm": 73.87104096633996, "learning_rate": 1.3503971057736786e-06, "loss": 15.852, "step": 52920 }, { "epoch": 0.8344894999054046, "grad_norm": 75.52309545966735, "learning_rate": 1.3478874342976844e-06, "loss": 16.3735, "step": 52930 }, { "epoch": 0.8346471589834142, "grad_norm": 76.57181124336617, "learning_rate": 1.345379928527354e-06, "loss": 15.5897, "step": 52940 }, { "epoch": 0.8348048180614239, "grad_norm": 69.98168508831975, "learning_rate": 1.3428745890903417e-06, "loss": 15.8818, "step": 52950 }, { "epoch": 0.8349624771394337, "grad_norm": 76.28017710147134, "learning_rate": 1.340371416613755e-06, "loss": 15.5816, "step": 52960 }, { "epoch": 0.8351201362174434, "grad_norm": 78.26555300540818, "learning_rate": 1.337870411724167e-06, "loss": 15.9421, "step": 52970 }, { "epoch": 0.8352777952954531, "grad_norm": 73.16309911245948, "learning_rate": 1.335371575047596e-06, "loss": 16.2277, "step": 52980 }, { "epoch": 0.8354354543734628, "grad_norm": 74.19516156399499, "learning_rate": 1.3328749072095315e-06, "loss": 15.9443, "step": 52990 }, { "epoch": 0.8355931134514726, "grad_norm": 72.80909198794023, "learning_rate": 1.3303804088349058e-06, "loss": 15.8571, "step": 53000 }, { "epoch": 0.8357507725294823, "grad_norm": 71.86928943677155, "learning_rate": 1.327888080548123e-06, "loss": 15.6302, "step": 53010 }, { "epoch": 0.835908431607492, "grad_norm": 72.8619022849029, "learning_rate": 1.3253979229730297e-06, "loss": 15.5649, "step": 53020 }, { "epoch": 0.8360660906855016, "grad_norm": 74.70035879060676, "learning_rate": 1.32290993673294e-06, "loss": 15.7957, "step": 53030 }, { "epoch": 0.8362237497635114, "grad_norm": 73.1561507293749, "learning_rate": 1.3204241224506208e-06, "loss": 15.9819, "step": 53040 }, { "epoch": 0.8363814088415211, "grad_norm": 70.67019964502335, "learning_rate": 1.317940480748291e-06, "loss": 15.8627, "step": 53050 }, { "epoch": 0.8365390679195308, "grad_norm": 75.44925606116884, "learning_rate": 1.315459012247634e-06, "loss": 15.9013, "step": 53060 }, { "epoch": 0.8366967269975405, "grad_norm": 75.4922187205115, "learning_rate": 1.3129797175697823e-06, "loss": 15.9599, "step": 53070 }, { "epoch": 0.8368543860755502, "grad_norm": 76.08116490731115, "learning_rate": 1.310502597335328e-06, "loss": 16.1449, "step": 53080 }, { "epoch": 0.83701204515356, "grad_norm": 81.36932984678326, "learning_rate": 1.3080276521643165e-06, "loss": 15.8872, "step": 53090 }, { "epoch": 0.8371697042315697, "grad_norm": 77.63099950138542, "learning_rate": 1.3055548826762521e-06, "loss": 15.9013, "step": 53100 }, { "epoch": 0.8373273633095794, "grad_norm": 77.13040173575274, "learning_rate": 1.3030842894900885e-06, "loss": 15.8653, "step": 53110 }, { "epoch": 0.837485022387589, "grad_norm": 73.13880342601017, "learning_rate": 1.3006158732242424e-06, "loss": 16.3248, "step": 53120 }, { "epoch": 0.8376426814655988, "grad_norm": 76.95191935214928, "learning_rate": 1.2981496344965794e-06, "loss": 15.909, "step": 53130 }, { "epoch": 0.8378003405436085, "grad_norm": 70.38864105243891, "learning_rate": 1.2956855739244257e-06, "loss": 15.8957, "step": 53140 }, { "epoch": 0.8379579996216182, "grad_norm": 77.22665629112454, "learning_rate": 1.2932236921245555e-06, "loss": 16.1013, "step": 53150 }, { "epoch": 0.8381156586996279, "grad_norm": 72.04089200743903, "learning_rate": 1.290763989713204e-06, "loss": 15.8334, "step": 53160 }, { "epoch": 0.8382733177776376, "grad_norm": 81.31100207243902, "learning_rate": 1.288306467306054e-06, "loss": 16.153, "step": 53170 }, { "epoch": 0.8384309768556474, "grad_norm": 73.76964539133435, "learning_rate": 1.2858511255182504e-06, "loss": 15.911, "step": 53180 }, { "epoch": 0.8385886359336571, "grad_norm": 73.65609411817273, "learning_rate": 1.283397964964388e-06, "loss": 15.8499, "step": 53190 }, { "epoch": 0.8387462950116668, "grad_norm": 80.7421659142838, "learning_rate": 1.2809469862585133e-06, "loss": 16.0867, "step": 53200 }, { "epoch": 0.8389039540896764, "grad_norm": 69.28876112492017, "learning_rate": 1.2784981900141303e-06, "loss": 16.2013, "step": 53210 }, { "epoch": 0.8390616131676862, "grad_norm": 73.93808281318377, "learning_rate": 1.2760515768441971e-06, "loss": 16.056, "step": 53220 }, { "epoch": 0.8392192722456959, "grad_norm": 70.24927071258546, "learning_rate": 1.2736071473611267e-06, "loss": 16.0615, "step": 53230 }, { "epoch": 0.8393769313237056, "grad_norm": 73.07163591390265, "learning_rate": 1.271164902176778e-06, "loss": 16.3208, "step": 53240 }, { "epoch": 0.8395345904017153, "grad_norm": 72.78228225389535, "learning_rate": 1.2687248419024723e-06, "loss": 15.6945, "step": 53250 }, { "epoch": 0.8396922494797251, "grad_norm": 80.09072814881658, "learning_rate": 1.2662869671489752e-06, "loss": 16.2939, "step": 53260 }, { "epoch": 0.8398499085577348, "grad_norm": 67.52297269363314, "learning_rate": 1.2638512785265144e-06, "loss": 15.7485, "step": 53270 }, { "epoch": 0.8400075676357445, "grad_norm": 73.75085619347074, "learning_rate": 1.2614177766447621e-06, "loss": 15.8206, "step": 53280 }, { "epoch": 0.8401652267137542, "grad_norm": 69.67360515295071, "learning_rate": 1.2589864621128512e-06, "loss": 15.9799, "step": 53290 }, { "epoch": 0.8403228857917638, "grad_norm": 71.00086063497339, "learning_rate": 1.2565573355393568e-06, "loss": 16.0997, "step": 53300 }, { "epoch": 0.8404805448697736, "grad_norm": 71.5195531359231, "learning_rate": 1.2541303975323183e-06, "loss": 15.913, "step": 53310 }, { "epoch": 0.8406382039477833, "grad_norm": 82.1702708926472, "learning_rate": 1.2517056486992163e-06, "loss": 15.8454, "step": 53320 }, { "epoch": 0.840795863025793, "grad_norm": 71.93441627781903, "learning_rate": 1.2492830896469909e-06, "loss": 15.8801, "step": 53330 }, { "epoch": 0.8409535221038027, "grad_norm": 74.7430946760441, "learning_rate": 1.2468627209820328e-06, "loss": 16.1939, "step": 53340 }, { "epoch": 0.8411111811818125, "grad_norm": 72.71854626396664, "learning_rate": 1.2444445433101792e-06, "loss": 15.9384, "step": 53350 }, { "epoch": 0.8412688402598222, "grad_norm": 75.1520349713288, "learning_rate": 1.2420285572367286e-06, "loss": 16.0208, "step": 53360 }, { "epoch": 0.8414264993378319, "grad_norm": 70.66509638475036, "learning_rate": 1.239614763366419e-06, "loss": 16.1487, "step": 53370 }, { "epoch": 0.8415841584158416, "grad_norm": 71.35949290211406, "learning_rate": 1.2372031623034508e-06, "loss": 15.8619, "step": 53380 }, { "epoch": 0.8417418174938512, "grad_norm": 75.76558232433922, "learning_rate": 1.2347937546514666e-06, "loss": 15.7459, "step": 53390 }, { "epoch": 0.841899476571861, "grad_norm": 73.55857519317976, "learning_rate": 1.2323865410135672e-06, "loss": 15.7391, "step": 53400 }, { "epoch": 0.8420571356498707, "grad_norm": 70.54024782037818, "learning_rate": 1.2299815219922983e-06, "loss": 15.3245, "step": 53410 }, { "epoch": 0.8422147947278804, "grad_norm": 72.7843258910126, "learning_rate": 1.2275786981896597e-06, "loss": 15.4408, "step": 53420 }, { "epoch": 0.8423724538058901, "grad_norm": 81.0349604511338, "learning_rate": 1.2251780702071015e-06, "loss": 15.9643, "step": 53430 }, { "epoch": 0.8425301128838999, "grad_norm": 73.22431236613583, "learning_rate": 1.222779638645526e-06, "loss": 15.9887, "step": 53440 }, { "epoch": 0.8426877719619096, "grad_norm": 74.80132797393323, "learning_rate": 1.2203834041052808e-06, "loss": 15.5938, "step": 53450 }, { "epoch": 0.8428454310399193, "grad_norm": 73.2064708402831, "learning_rate": 1.2179893671861642e-06, "loss": 16.3658, "step": 53460 }, { "epoch": 0.843003090117929, "grad_norm": 71.56024103911933, "learning_rate": 1.21559752848743e-06, "loss": 16.1462, "step": 53470 }, { "epoch": 0.8431607491959388, "grad_norm": 77.44287097688066, "learning_rate": 1.2132078886077746e-06, "loss": 15.9423, "step": 53480 }, { "epoch": 0.8433184082739484, "grad_norm": 76.43729932877025, "learning_rate": 1.2108204481453512e-06, "loss": 15.8888, "step": 53490 }, { "epoch": 0.8434760673519581, "grad_norm": 69.66777371644224, "learning_rate": 1.2084352076977545e-06, "loss": 16.1814, "step": 53500 }, { "epoch": 0.8436337264299678, "grad_norm": 72.81032952547017, "learning_rate": 1.2060521678620352e-06, "loss": 15.6877, "step": 53510 }, { "epoch": 0.8437913855079775, "grad_norm": 76.00659905171754, "learning_rate": 1.2036713292346903e-06, "loss": 15.9586, "step": 53520 }, { "epoch": 0.8439490445859873, "grad_norm": 74.82022706437215, "learning_rate": 1.2012926924116686e-06, "loss": 15.938, "step": 53530 }, { "epoch": 0.844106703663997, "grad_norm": 72.78762434388224, "learning_rate": 1.1989162579883607e-06, "loss": 15.6468, "step": 53540 }, { "epoch": 0.8442643627420067, "grad_norm": 75.57116020128241, "learning_rate": 1.1965420265596161e-06, "loss": 15.6495, "step": 53550 }, { "epoch": 0.8444220218200164, "grad_norm": 69.87924089639544, "learning_rate": 1.194169998719722e-06, "loss": 15.679, "step": 53560 }, { "epoch": 0.8445796808980262, "grad_norm": 73.0719598387355, "learning_rate": 1.1918001750624243e-06, "loss": 15.7329, "step": 53570 }, { "epoch": 0.8447373399760358, "grad_norm": 81.50773032016836, "learning_rate": 1.1894325561809094e-06, "loss": 16.2048, "step": 53580 }, { "epoch": 0.8448949990540455, "grad_norm": 74.799727190435, "learning_rate": 1.1870671426678138e-06, "loss": 16.3576, "step": 53590 }, { "epoch": 0.8450526581320552, "grad_norm": 72.17331231326145, "learning_rate": 1.1847039351152268e-06, "loss": 16.2027, "step": 53600 }, { "epoch": 0.845210317210065, "grad_norm": 71.70419722662815, "learning_rate": 1.1823429341146764e-06, "loss": 15.9941, "step": 53610 }, { "epoch": 0.8453679762880747, "grad_norm": 82.00219745906509, "learning_rate": 1.1799841402571454e-06, "loss": 16.0281, "step": 53620 }, { "epoch": 0.8455256353660844, "grad_norm": 72.23566637811876, "learning_rate": 1.177627554133064e-06, "loss": 15.87, "step": 53630 }, { "epoch": 0.8456832944440941, "grad_norm": 76.23525833569794, "learning_rate": 1.175273176332309e-06, "loss": 15.9682, "step": 53640 }, { "epoch": 0.8458409535221038, "grad_norm": 73.81238131089896, "learning_rate": 1.1729210074441999e-06, "loss": 15.8802, "step": 53650 }, { "epoch": 0.8459986126001136, "grad_norm": 71.89598192919449, "learning_rate": 1.1705710480575094e-06, "loss": 15.8232, "step": 53660 }, { "epoch": 0.8461562716781232, "grad_norm": 70.40316860075009, "learning_rate": 1.1682232987604525e-06, "loss": 15.5661, "step": 53670 }, { "epoch": 0.8463139307561329, "grad_norm": 73.48571758385786, "learning_rate": 1.1658777601406968e-06, "loss": 15.6999, "step": 53680 }, { "epoch": 0.8464715898341426, "grad_norm": 74.77279484664722, "learning_rate": 1.163534432785347e-06, "loss": 15.6483, "step": 53690 }, { "epoch": 0.8466292489121524, "grad_norm": 81.54792945609574, "learning_rate": 1.1611933172809663e-06, "loss": 16.091, "step": 53700 }, { "epoch": 0.8467869079901621, "grad_norm": 71.17385228072591, "learning_rate": 1.1588544142135527e-06, "loss": 15.9913, "step": 53710 }, { "epoch": 0.8469445670681718, "grad_norm": 77.10878743328792, "learning_rate": 1.1565177241685576e-06, "loss": 15.564, "step": 53720 }, { "epoch": 0.8471022261461815, "grad_norm": 71.56271972835614, "learning_rate": 1.1541832477308802e-06, "loss": 15.8512, "step": 53730 }, { "epoch": 0.8472598852241912, "grad_norm": 89.19579084450152, "learning_rate": 1.151850985484856e-06, "loss": 15.988, "step": 53740 }, { "epoch": 0.847417544302201, "grad_norm": 71.27471757883454, "learning_rate": 1.149520938014278e-06, "loss": 15.7794, "step": 53750 }, { "epoch": 0.8475752033802106, "grad_norm": 70.41312776702988, "learning_rate": 1.1471931059023744e-06, "loss": 15.2097, "step": 53760 }, { "epoch": 0.8477328624582203, "grad_norm": 76.37724529772096, "learning_rate": 1.144867489731828e-06, "loss": 15.4868, "step": 53770 }, { "epoch": 0.84789052153623, "grad_norm": 72.4286512139729, "learning_rate": 1.1425440900847585e-06, "loss": 16.0698, "step": 53780 }, { "epoch": 0.8480481806142398, "grad_norm": 73.72798387947991, "learning_rate": 1.1402229075427373e-06, "loss": 15.9267, "step": 53790 }, { "epoch": 0.8482058396922495, "grad_norm": 79.75280109406744, "learning_rate": 1.1379039426867767e-06, "loss": 16.3425, "step": 53800 }, { "epoch": 0.8483634987702592, "grad_norm": 71.71550298939184, "learning_rate": 1.1355871960973386e-06, "loss": 15.6981, "step": 53810 }, { "epoch": 0.8485211578482689, "grad_norm": 72.07807624008278, "learning_rate": 1.1332726683543217e-06, "loss": 15.4255, "step": 53820 }, { "epoch": 0.8486788169262787, "grad_norm": 81.29177105626394, "learning_rate": 1.1309603600370779e-06, "loss": 15.4638, "step": 53830 }, { "epoch": 0.8488364760042884, "grad_norm": 71.54748820586818, "learning_rate": 1.1286502717244008e-06, "loss": 16.2955, "step": 53840 }, { "epoch": 0.848994135082298, "grad_norm": 77.03366288272063, "learning_rate": 1.1263424039945224e-06, "loss": 15.8587, "step": 53850 }, { "epoch": 0.8491517941603077, "grad_norm": 75.25358746477919, "learning_rate": 1.1240367574251298e-06, "loss": 15.7254, "step": 53860 }, { "epoch": 0.8493094532383174, "grad_norm": 81.25756513156176, "learning_rate": 1.1217333325933432e-06, "loss": 16.1101, "step": 53870 }, { "epoch": 0.8494671123163272, "grad_norm": 81.21542177833197, "learning_rate": 1.119432130075736e-06, "loss": 15.903, "step": 53880 }, { "epoch": 0.8496247713943369, "grad_norm": 73.03900774737257, "learning_rate": 1.1171331504483174e-06, "loss": 15.4737, "step": 53890 }, { "epoch": 0.8497824304723466, "grad_norm": 80.01181099667998, "learning_rate": 1.114836394286546e-06, "loss": 15.7169, "step": 53900 }, { "epoch": 0.8499400895503563, "grad_norm": 73.95035330177335, "learning_rate": 1.1125418621653183e-06, "loss": 16.3443, "step": 53910 }, { "epoch": 0.8500977486283661, "grad_norm": 76.81600110324837, "learning_rate": 1.1102495546589808e-06, "loss": 15.6251, "step": 53920 }, { "epoch": 0.8502554077063758, "grad_norm": 81.59643100686364, "learning_rate": 1.1079594723413189e-06, "loss": 15.7894, "step": 53930 }, { "epoch": 0.8504130667843854, "grad_norm": 74.89460509816446, "learning_rate": 1.1056716157855628e-06, "loss": 15.5688, "step": 53940 }, { "epoch": 0.8505707258623951, "grad_norm": 76.3756484528151, "learning_rate": 1.1033859855643825e-06, "loss": 15.5301, "step": 53950 }, { "epoch": 0.8507283849404048, "grad_norm": 78.5573462404921, "learning_rate": 1.1011025822498954e-06, "loss": 15.6535, "step": 53960 }, { "epoch": 0.8508860440184146, "grad_norm": 74.69159885995927, "learning_rate": 1.0988214064136571e-06, "loss": 15.3571, "step": 53970 }, { "epoch": 0.8510437030964243, "grad_norm": 74.13541994269457, "learning_rate": 1.096542458626667e-06, "loss": 15.6766, "step": 53980 }, { "epoch": 0.851201362174434, "grad_norm": 72.9141278858231, "learning_rate": 1.0942657394593692e-06, "loss": 15.7777, "step": 53990 }, { "epoch": 0.8513590212524437, "grad_norm": 75.0243841360995, "learning_rate": 1.0919912494816454e-06, "loss": 16.0695, "step": 54000 }, { "epoch": 0.8515166803304535, "grad_norm": 74.89182606033728, "learning_rate": 1.0897189892628247e-06, "loss": 15.7027, "step": 54010 }, { "epoch": 0.8516743394084632, "grad_norm": 70.54075774381492, "learning_rate": 1.087448959371673e-06, "loss": 15.7088, "step": 54020 }, { "epoch": 0.8518319984864728, "grad_norm": 72.47446238497614, "learning_rate": 1.085181160376404e-06, "loss": 15.9644, "step": 54030 }, { "epoch": 0.8519896575644825, "grad_norm": 75.54310773890374, "learning_rate": 1.0829155928446645e-06, "loss": 16.1848, "step": 54040 }, { "epoch": 0.8521473166424923, "grad_norm": 74.34386581217984, "learning_rate": 1.080652257343553e-06, "loss": 15.4694, "step": 54050 }, { "epoch": 0.852304975720502, "grad_norm": 73.81601177938998, "learning_rate": 1.0783911544395976e-06, "loss": 16.2098, "step": 54060 }, { "epoch": 0.8524626347985117, "grad_norm": 75.556369622043, "learning_rate": 1.0761322846987786e-06, "loss": 15.9269, "step": 54070 }, { "epoch": 0.8526202938765214, "grad_norm": 78.449447861099, "learning_rate": 1.0738756486865087e-06, "loss": 15.1897, "step": 54080 }, { "epoch": 0.8527779529545311, "grad_norm": 73.75966888273354, "learning_rate": 1.071621246967649e-06, "loss": 15.7026, "step": 54090 }, { "epoch": 0.8529356120325409, "grad_norm": 70.78700092659444, "learning_rate": 1.0693690801064972e-06, "loss": 16.1415, "step": 54100 }, { "epoch": 0.8530932711105506, "grad_norm": 75.49891027200074, "learning_rate": 1.0671191486667876e-06, "loss": 15.3948, "step": 54110 }, { "epoch": 0.8532509301885602, "grad_norm": 74.00006981722369, "learning_rate": 1.0648714532117022e-06, "loss": 16.2677, "step": 54120 }, { "epoch": 0.8534085892665699, "grad_norm": 66.77514604621341, "learning_rate": 1.0626259943038598e-06, "loss": 15.7962, "step": 54130 }, { "epoch": 0.8535662483445797, "grad_norm": 75.70770772361998, "learning_rate": 1.060382772505324e-06, "loss": 15.7434, "step": 54140 }, { "epoch": 0.8537239074225894, "grad_norm": 78.45626131984183, "learning_rate": 1.0581417883775891e-06, "loss": 16.3066, "step": 54150 }, { "epoch": 0.8538815665005991, "grad_norm": 80.46664454634295, "learning_rate": 1.0559030424815985e-06, "loss": 15.4321, "step": 54160 }, { "epoch": 0.8540392255786088, "grad_norm": 72.74721577495193, "learning_rate": 1.0536665353777276e-06, "loss": 15.465, "step": 54170 }, { "epoch": 0.8541968846566186, "grad_norm": 75.27903840509002, "learning_rate": 1.0514322676257992e-06, "loss": 15.2925, "step": 54180 }, { "epoch": 0.8543545437346283, "grad_norm": 74.89370298716757, "learning_rate": 1.0492002397850686e-06, "loss": 15.8858, "step": 54190 }, { "epoch": 0.854512202812638, "grad_norm": 70.2160979662377, "learning_rate": 1.0469704524142354e-06, "loss": 15.331, "step": 54200 }, { "epoch": 0.8546698618906476, "grad_norm": 68.46925320652696, "learning_rate": 1.0447429060714353e-06, "loss": 15.9277, "step": 54210 }, { "epoch": 0.8548275209686573, "grad_norm": 74.28521961447007, "learning_rate": 1.0425176013142445e-06, "loss": 15.0736, "step": 54220 }, { "epoch": 0.8549851800466671, "grad_norm": 77.46757403047228, "learning_rate": 1.0402945386996798e-06, "loss": 15.9865, "step": 54230 }, { "epoch": 0.8551428391246768, "grad_norm": 72.23310875810368, "learning_rate": 1.0380737187841916e-06, "loss": 16.1935, "step": 54240 }, { "epoch": 0.8553004982026865, "grad_norm": 73.61905139274694, "learning_rate": 1.035855142123675e-06, "loss": 15.6697, "step": 54250 }, { "epoch": 0.8554581572806962, "grad_norm": 76.29867295366219, "learning_rate": 1.0336388092734585e-06, "loss": 16.0274, "step": 54260 }, { "epoch": 0.855615816358706, "grad_norm": 77.71369391477306, "learning_rate": 1.0314247207883133e-06, "loss": 16.1744, "step": 54270 }, { "epoch": 0.8557734754367157, "grad_norm": 77.7812003381272, "learning_rate": 1.029212877222444e-06, "loss": 16.1977, "step": 54280 }, { "epoch": 0.8559311345147254, "grad_norm": 74.99305263886751, "learning_rate": 1.0270032791294993e-06, "loss": 15.7773, "step": 54290 }, { "epoch": 0.856088793592735, "grad_norm": 72.95396878432732, "learning_rate": 1.0247959270625585e-06, "loss": 15.8358, "step": 54300 }, { "epoch": 0.8562464526707447, "grad_norm": 76.83785724502458, "learning_rate": 1.0225908215741454e-06, "loss": 15.8818, "step": 54310 }, { "epoch": 0.8564041117487545, "grad_norm": 71.56185168374154, "learning_rate": 1.0203879632162194e-06, "loss": 15.722, "step": 54320 }, { "epoch": 0.8565617708267642, "grad_norm": 69.83922700131224, "learning_rate": 1.0181873525401742e-06, "loss": 15.5549, "step": 54330 }, { "epoch": 0.8567194299047739, "grad_norm": 74.18410925570579, "learning_rate": 1.0159889900968457e-06, "loss": 16.042, "step": 54340 }, { "epoch": 0.8568770889827836, "grad_norm": 74.5964107300845, "learning_rate": 1.0137928764365056e-06, "loss": 15.6982, "step": 54350 }, { "epoch": 0.8570347480607934, "grad_norm": 74.3098548097798, "learning_rate": 1.0115990121088581e-06, "loss": 15.6097, "step": 54360 }, { "epoch": 0.8571924071388031, "grad_norm": 74.92035902156603, "learning_rate": 1.0094073976630537e-06, "loss": 15.559, "step": 54370 }, { "epoch": 0.8573500662168128, "grad_norm": 69.69793653850581, "learning_rate": 1.00721803364767e-06, "loss": 15.4262, "step": 54380 }, { "epoch": 0.8575077252948224, "grad_norm": 67.67725226509413, "learning_rate": 1.0050309206107246e-06, "loss": 15.8746, "step": 54390 }, { "epoch": 0.8576653843728322, "grad_norm": 78.82595984784231, "learning_rate": 1.002846059099677e-06, "loss": 16.038, "step": 54400 }, { "epoch": 0.8578230434508419, "grad_norm": 71.94634866187567, "learning_rate": 1.0006634496614142e-06, "loss": 15.5224, "step": 54410 }, { "epoch": 0.8579807025288516, "grad_norm": 77.12025576379777, "learning_rate": 9.984830928422662e-07, "loss": 15.8074, "step": 54420 }, { "epoch": 0.8581383616068613, "grad_norm": 72.78161107693829, "learning_rate": 9.963049891879971e-07, "loss": 15.7018, "step": 54430 }, { "epoch": 0.858296020684871, "grad_norm": 72.1514639819078, "learning_rate": 9.94129139243809e-07, "loss": 15.7748, "step": 54440 }, { "epoch": 0.8584536797628808, "grad_norm": 73.71893383353616, "learning_rate": 9.919555435543326e-07, "loss": 15.243, "step": 54450 }, { "epoch": 0.8586113388408905, "grad_norm": 73.56120216478757, "learning_rate": 9.897842026636439e-07, "loss": 15.6383, "step": 54460 }, { "epoch": 0.8587689979189002, "grad_norm": 74.4332194186988, "learning_rate": 9.876151171152481e-07, "loss": 15.8827, "step": 54470 }, { "epoch": 0.8589266569969098, "grad_norm": 66.72934694924906, "learning_rate": 9.854482874520889e-07, "loss": 15.9248, "step": 54480 }, { "epoch": 0.8590843160749196, "grad_norm": 72.37278511727429, "learning_rate": 9.832837142165418e-07, "loss": 15.6455, "step": 54490 }, { "epoch": 0.8592419751529293, "grad_norm": 71.22907233790508, "learning_rate": 9.81121397950423e-07, "loss": 15.8819, "step": 54500 }, { "epoch": 0.859399634230939, "grad_norm": 70.62580193116143, "learning_rate": 9.789613391949782e-07, "loss": 15.2661, "step": 54510 }, { "epoch": 0.8595572933089487, "grad_norm": 76.89694918597601, "learning_rate": 9.768035384908925e-07, "loss": 16.1052, "step": 54520 }, { "epoch": 0.8597149523869584, "grad_norm": 73.60154853757473, "learning_rate": 9.74647996378284e-07, "loss": 15.7275, "step": 54530 }, { "epoch": 0.8598726114649682, "grad_norm": 70.46346546223961, "learning_rate": 9.724947133967032e-07, "loss": 15.3958, "step": 54540 }, { "epoch": 0.8600302705429779, "grad_norm": 71.59137830625737, "learning_rate": 9.703436900851392e-07, "loss": 15.3193, "step": 54550 }, { "epoch": 0.8601879296209876, "grad_norm": 76.89538566354894, "learning_rate": 9.681949269820124e-07, "loss": 15.5638, "step": 54560 }, { "epoch": 0.8603455886989972, "grad_norm": 73.85468550232987, "learning_rate": 9.6604842462518e-07, "loss": 15.9458, "step": 54570 }, { "epoch": 0.860503247777007, "grad_norm": 74.38593827989155, "learning_rate": 9.639041835519292e-07, "loss": 15.3817, "step": 54580 }, { "epoch": 0.8606609068550167, "grad_norm": 78.913910221898, "learning_rate": 9.61762204298986e-07, "loss": 15.9306, "step": 54590 }, { "epoch": 0.8608185659330264, "grad_norm": 71.43211864739816, "learning_rate": 9.596224874025073e-07, "loss": 15.3589, "step": 54600 }, { "epoch": 0.8609762250110361, "grad_norm": 78.15596608304337, "learning_rate": 9.574850333980855e-07, "loss": 16.0811, "step": 54610 }, { "epoch": 0.8611338840890459, "grad_norm": 67.56653677707654, "learning_rate": 9.553498428207431e-07, "loss": 15.4213, "step": 54620 }, { "epoch": 0.8612915431670556, "grad_norm": 71.770359172407, "learning_rate": 9.532169162049398e-07, "loss": 15.9164, "step": 54630 }, { "epoch": 0.8614492022450653, "grad_norm": 68.49704476903014, "learning_rate": 9.5108625408457e-07, "loss": 16.0632, "step": 54640 }, { "epoch": 0.861606861323075, "grad_norm": 71.07921557778292, "learning_rate": 9.489578569929536e-07, "loss": 15.2038, "step": 54650 }, { "epoch": 0.8617645204010846, "grad_norm": 73.98708054757478, "learning_rate": 9.468317254628534e-07, "loss": 15.9252, "step": 54660 }, { "epoch": 0.8619221794790944, "grad_norm": 79.34944633751259, "learning_rate": 9.447078600264569e-07, "loss": 15.8348, "step": 54670 }, { "epoch": 0.8620798385571041, "grad_norm": 73.28415401019006, "learning_rate": 9.425862612153902e-07, "loss": 16.0377, "step": 54680 }, { "epoch": 0.8622374976351138, "grad_norm": 73.88125680193264, "learning_rate": 9.404669295607061e-07, "loss": 15.4529, "step": 54690 }, { "epoch": 0.8623951567131235, "grad_norm": 73.66683354582892, "learning_rate": 9.383498655928968e-07, "loss": 15.4123, "step": 54700 }, { "epoch": 0.8625528157911333, "grad_norm": 69.67818548391097, "learning_rate": 9.36235069841881e-07, "loss": 15.692, "step": 54710 }, { "epoch": 0.862710474869143, "grad_norm": 74.28669744920028, "learning_rate": 9.341225428370137e-07, "loss": 15.9644, "step": 54720 }, { "epoch": 0.8628681339471527, "grad_norm": 70.22860458887072, "learning_rate": 9.32012285107079e-07, "loss": 15.1112, "step": 54730 }, { "epoch": 0.8630257930251624, "grad_norm": 72.35975941759997, "learning_rate": 9.299042971802974e-07, "loss": 15.5508, "step": 54740 }, { "epoch": 0.8631834521031722, "grad_norm": 73.31008459510954, "learning_rate": 9.277985795843147e-07, "loss": 15.9759, "step": 54750 }, { "epoch": 0.8633411111811818, "grad_norm": 69.70861510324237, "learning_rate": 9.256951328462149e-07, "loss": 15.7202, "step": 54760 }, { "epoch": 0.8634987702591915, "grad_norm": 76.85726315014227, "learning_rate": 9.23593957492509e-07, "loss": 15.2813, "step": 54770 }, { "epoch": 0.8636564293372012, "grad_norm": 75.01268617198944, "learning_rate": 9.214950540491385e-07, "loss": 15.4385, "step": 54780 }, { "epoch": 0.8638140884152109, "grad_norm": 72.22300421355929, "learning_rate": 9.193984230414843e-07, "loss": 15.9176, "step": 54790 }, { "epoch": 0.8639717474932207, "grad_norm": 72.13228779142163, "learning_rate": 9.173040649943487e-07, "loss": 15.5417, "step": 54800 }, { "epoch": 0.8641294065712304, "grad_norm": 73.95585863194826, "learning_rate": 9.152119804319704e-07, "loss": 15.2156, "step": 54810 }, { "epoch": 0.8642870656492401, "grad_norm": 72.82716319594839, "learning_rate": 9.13122169878018e-07, "loss": 15.9434, "step": 54820 }, { "epoch": 0.8644447247272498, "grad_norm": 75.06856745116738, "learning_rate": 9.110346338555931e-07, "loss": 15.3933, "step": 54830 }, { "epoch": 0.8646023838052596, "grad_norm": 76.49166949876985, "learning_rate": 9.089493728872223e-07, "loss": 15.9721, "step": 54840 }, { "epoch": 0.8647600428832692, "grad_norm": 73.44530666518229, "learning_rate": 9.068663874948702e-07, "loss": 15.6792, "step": 54850 }, { "epoch": 0.8649177019612789, "grad_norm": 71.19314599016015, "learning_rate": 9.04785678199922e-07, "loss": 15.6594, "step": 54860 }, { "epoch": 0.8650753610392886, "grad_norm": 70.36333633404831, "learning_rate": 9.027072455232056e-07, "loss": 15.9531, "step": 54870 }, { "epoch": 0.8652330201172983, "grad_norm": 75.87450461714565, "learning_rate": 9.006310899849657e-07, "loss": 16.0301, "step": 54880 }, { "epoch": 0.8653906791953081, "grad_norm": 66.91267805753235, "learning_rate": 8.985572121048891e-07, "loss": 15.7074, "step": 54890 }, { "epoch": 0.8655483382733178, "grad_norm": 74.26920137954119, "learning_rate": 8.96485612402086e-07, "loss": 15.8345, "step": 54900 }, { "epoch": 0.8657059973513275, "grad_norm": 79.17896890924818, "learning_rate": 8.944162913950927e-07, "loss": 15.7132, "step": 54910 }, { "epoch": 0.8658636564293372, "grad_norm": 77.03451053533472, "learning_rate": 8.923492496018848e-07, "loss": 15.8683, "step": 54920 }, { "epoch": 0.866021315507347, "grad_norm": 68.57151751316837, "learning_rate": 8.902844875398608e-07, "loss": 15.6376, "step": 54930 }, { "epoch": 0.8661789745853566, "grad_norm": 77.97371997007623, "learning_rate": 8.882220057258517e-07, "loss": 15.7422, "step": 54940 }, { "epoch": 0.8663366336633663, "grad_norm": 69.81378801491793, "learning_rate": 8.861618046761144e-07, "loss": 15.6647, "step": 54950 }, { "epoch": 0.866494292741376, "grad_norm": 78.42440523848182, "learning_rate": 8.841038849063377e-07, "loss": 15.6912, "step": 54960 }, { "epoch": 0.8666519518193858, "grad_norm": 75.58248546168615, "learning_rate": 8.820482469316371e-07, "loss": 15.5004, "step": 54970 }, { "epoch": 0.8668096108973955, "grad_norm": 69.65780002532556, "learning_rate": 8.799948912665612e-07, "loss": 16.0017, "step": 54980 }, { "epoch": 0.8669672699754052, "grad_norm": 75.3415320098684, "learning_rate": 8.779438184250799e-07, "loss": 15.603, "step": 54990 }, { "epoch": 0.8671249290534149, "grad_norm": 70.76408105291321, "learning_rate": 8.758950289206003e-07, "loss": 15.3968, "step": 55000 }, { "epoch": 0.8672825881314246, "grad_norm": 72.35727772245785, "learning_rate": 8.7384852326595e-07, "loss": 15.3713, "step": 55010 }, { "epoch": 0.8674402472094344, "grad_norm": 72.46181463762393, "learning_rate": 8.718043019733913e-07, "loss": 15.59, "step": 55020 }, { "epoch": 0.867597906287444, "grad_norm": 79.27469529293226, "learning_rate": 8.697623655546139e-07, "loss": 15.5722, "step": 55030 }, { "epoch": 0.8677555653654537, "grad_norm": 74.1711359946481, "learning_rate": 8.67722714520729e-07, "loss": 15.6943, "step": 55040 }, { "epoch": 0.8679132244434634, "grad_norm": 68.02249738125764, "learning_rate": 8.656853493822847e-07, "loss": 15.4791, "step": 55050 }, { "epoch": 0.8680708835214732, "grad_norm": 69.2105492935017, "learning_rate": 8.6365027064925e-07, "loss": 15.3117, "step": 55060 }, { "epoch": 0.8682285425994829, "grad_norm": 76.23161128559168, "learning_rate": 8.616174788310272e-07, "loss": 16.0872, "step": 55070 }, { "epoch": 0.8683862016774926, "grad_norm": 71.13124368380176, "learning_rate": 8.595869744364393e-07, "loss": 15.5209, "step": 55080 }, { "epoch": 0.8685438607555023, "grad_norm": 73.8023763669478, "learning_rate": 8.575587579737454e-07, "loss": 15.7891, "step": 55090 }, { "epoch": 0.868701519833512, "grad_norm": 71.75298425211817, "learning_rate": 8.555328299506238e-07, "loss": 15.5709, "step": 55100 }, { "epoch": 0.8688591789115218, "grad_norm": 72.10264171685046, "learning_rate": 8.535091908741855e-07, "loss": 15.7228, "step": 55110 }, { "epoch": 0.8690168379895314, "grad_norm": 71.66298267310297, "learning_rate": 8.514878412509642e-07, "loss": 16.2248, "step": 55120 }, { "epoch": 0.8691744970675411, "grad_norm": 73.31619222225655, "learning_rate": 8.494687815869251e-07, "loss": 15.4582, "step": 55130 }, { "epoch": 0.8693321561455508, "grad_norm": 71.06999258958284, "learning_rate": 8.474520123874563e-07, "loss": 15.5807, "step": 55140 }, { "epoch": 0.8694898152235606, "grad_norm": 78.30689253938263, "learning_rate": 8.454375341573773e-07, "loss": 15.5587, "step": 55150 }, { "epoch": 0.8696474743015703, "grad_norm": 73.12093810720106, "learning_rate": 8.43425347400928e-07, "loss": 15.7279, "step": 55160 }, { "epoch": 0.86980513337958, "grad_norm": 70.38426998785604, "learning_rate": 8.414154526217777e-07, "loss": 15.9105, "step": 55170 }, { "epoch": 0.8699627924575897, "grad_norm": 78.37768979161501, "learning_rate": 8.394078503230252e-07, "loss": 16.0676, "step": 55180 }, { "epoch": 0.8701204515355995, "grad_norm": 75.07949584861595, "learning_rate": 8.374025410071873e-07, "loss": 16.0255, "step": 55190 }, { "epoch": 0.8702781106136092, "grad_norm": 74.49489356238912, "learning_rate": 8.35399525176217e-07, "loss": 15.6343, "step": 55200 }, { "epoch": 0.8704357696916188, "grad_norm": 72.68145457811033, "learning_rate": 8.333988033314844e-07, "loss": 16.3828, "step": 55210 }, { "epoch": 0.8705934287696285, "grad_norm": 72.59995517855822, "learning_rate": 8.31400375973791e-07, "loss": 15.8889, "step": 55220 }, { "epoch": 0.8707510878476382, "grad_norm": 71.73731095110283, "learning_rate": 8.294042436033611e-07, "loss": 15.3832, "step": 55230 }, { "epoch": 0.870908746925648, "grad_norm": 72.7568276711028, "learning_rate": 8.274104067198474e-07, "loss": 15.4636, "step": 55240 }, { "epoch": 0.8710664060036577, "grad_norm": 70.05118751280068, "learning_rate": 8.254188658223239e-07, "loss": 15.3943, "step": 55250 }, { "epoch": 0.8712240650816674, "grad_norm": 72.16045245658387, "learning_rate": 8.234296214092951e-07, "loss": 15.2681, "step": 55260 }, { "epoch": 0.8713817241596771, "grad_norm": 78.3052679571343, "learning_rate": 8.214426739786841e-07, "loss": 16.1597, "step": 55270 }, { "epoch": 0.8715393832376869, "grad_norm": 74.62694889052231, "learning_rate": 8.194580240278449e-07, "loss": 15.4723, "step": 55280 }, { "epoch": 0.8716970423156966, "grad_norm": 71.55797238401163, "learning_rate": 8.174756720535548e-07, "loss": 15.6487, "step": 55290 }, { "epoch": 0.8718547013937062, "grad_norm": 73.03141624568953, "learning_rate": 8.154956185520113e-07, "loss": 15.7947, "step": 55300 }, { "epoch": 0.8720123604717159, "grad_norm": 72.18856600814108, "learning_rate": 8.135178640188434e-07, "loss": 15.2823, "step": 55310 }, { "epoch": 0.8721700195497257, "grad_norm": 74.84547431840323, "learning_rate": 8.11542408949102e-07, "loss": 15.5098, "step": 55320 }, { "epoch": 0.8723276786277354, "grad_norm": 70.85580136639234, "learning_rate": 8.095692538372635e-07, "loss": 15.4992, "step": 55330 }, { "epoch": 0.8724853377057451, "grad_norm": 71.24829035943522, "learning_rate": 8.075983991772229e-07, "loss": 15.5555, "step": 55340 }, { "epoch": 0.8726429967837548, "grad_norm": 69.68521803727229, "learning_rate": 8.05629845462309e-07, "loss": 15.5291, "step": 55350 }, { "epoch": 0.8728006558617645, "grad_norm": 74.41546184287833, "learning_rate": 8.036635931852655e-07, "loss": 15.723, "step": 55360 }, { "epoch": 0.8729583149397743, "grad_norm": 86.19249619121656, "learning_rate": 8.016996428382662e-07, "loss": 15.2075, "step": 55370 }, { "epoch": 0.873115974017784, "grad_norm": 77.97537094289584, "learning_rate": 7.997379949129047e-07, "loss": 15.4739, "step": 55380 }, { "epoch": 0.8732736330957936, "grad_norm": 70.12764705685736, "learning_rate": 7.977786499002028e-07, "loss": 15.102, "step": 55390 }, { "epoch": 0.8734312921738033, "grad_norm": 72.71424913025523, "learning_rate": 7.95821608290599e-07, "loss": 15.2259, "step": 55400 }, { "epoch": 0.8735889512518131, "grad_norm": 73.02946680312301, "learning_rate": 7.938668705739638e-07, "loss": 15.6918, "step": 55410 }, { "epoch": 0.8737466103298228, "grad_norm": 74.02200210325482, "learning_rate": 7.919144372395826e-07, "loss": 15.1737, "step": 55420 }, { "epoch": 0.8739042694078325, "grad_norm": 71.25367379189922, "learning_rate": 7.899643087761688e-07, "loss": 15.5608, "step": 55430 }, { "epoch": 0.8740619284858422, "grad_norm": 70.93683148479289, "learning_rate": 7.88016485671862e-07, "loss": 15.3904, "step": 55440 }, { "epoch": 0.8742195875638519, "grad_norm": 70.42061914544453, "learning_rate": 7.860709684142153e-07, "loss": 15.4718, "step": 55450 }, { "epoch": 0.8743772466418617, "grad_norm": 80.36231580344483, "learning_rate": 7.84127757490214e-07, "loss": 15.7743, "step": 55460 }, { "epoch": 0.8745349057198714, "grad_norm": 72.6060336497137, "learning_rate": 7.821868533862586e-07, "loss": 15.1379, "step": 55470 }, { "epoch": 0.874692564797881, "grad_norm": 67.63709677315856, "learning_rate": 7.802482565881808e-07, "loss": 15.4811, "step": 55480 }, { "epoch": 0.8748502238758907, "grad_norm": 76.35675338426762, "learning_rate": 7.783119675812234e-07, "loss": 15.5337, "step": 55490 }, { "epoch": 0.8750078829539005, "grad_norm": 75.0564723949823, "learning_rate": 7.763779868500642e-07, "loss": 15.9809, "step": 55500 }, { "epoch": 0.8751655420319102, "grad_norm": 70.2227386803904, "learning_rate": 7.744463148787917e-07, "loss": 15.6811, "step": 55510 }, { "epoch": 0.8753232011099199, "grad_norm": 69.69915494817226, "learning_rate": 7.725169521509235e-07, "loss": 15.1393, "step": 55520 }, { "epoch": 0.8754808601879296, "grad_norm": 71.70066071621055, "learning_rate": 7.705898991493988e-07, "loss": 15.2997, "step": 55530 }, { "epoch": 0.8756385192659394, "grad_norm": 72.35747537096408, "learning_rate": 7.686651563565762e-07, "loss": 15.267, "step": 55540 }, { "epoch": 0.8757961783439491, "grad_norm": 68.45998289192866, "learning_rate": 7.667427242542369e-07, "loss": 15.2124, "step": 55550 }, { "epoch": 0.8759538374219588, "grad_norm": 73.44791034568756, "learning_rate": 7.64822603323585e-07, "loss": 15.6306, "step": 55560 }, { "epoch": 0.8761114964999684, "grad_norm": 77.79636587328228, "learning_rate": 7.629047940452427e-07, "loss": 15.4734, "step": 55570 }, { "epoch": 0.8762691555779781, "grad_norm": 75.20186717709457, "learning_rate": 7.609892968992571e-07, "loss": 15.792, "step": 55580 }, { "epoch": 0.8764268146559879, "grad_norm": 75.04338491816974, "learning_rate": 7.590761123650958e-07, "loss": 15.4815, "step": 55590 }, { "epoch": 0.8765844737339976, "grad_norm": 74.49466236578444, "learning_rate": 7.571652409216445e-07, "loss": 15.903, "step": 55600 }, { "epoch": 0.8767421328120073, "grad_norm": 75.73691303377072, "learning_rate": 7.552566830472152e-07, "loss": 16.0623, "step": 55610 }, { "epoch": 0.876899791890017, "grad_norm": 210.15983230926363, "learning_rate": 7.533504392195357e-07, "loss": 15.9374, "step": 55620 }, { "epoch": 0.8770574509680268, "grad_norm": 74.43205229547716, "learning_rate": 7.514465099157608e-07, "loss": 15.2899, "step": 55630 }, { "epoch": 0.8772151100460365, "grad_norm": 70.75165379273227, "learning_rate": 7.495448956124573e-07, "loss": 15.4322, "step": 55640 }, { "epoch": 0.8773727691240462, "grad_norm": 71.78979013308047, "learning_rate": 7.476455967856211e-07, "loss": 15.8739, "step": 55650 }, { "epoch": 0.8775304282020558, "grad_norm": 74.54903013203523, "learning_rate": 7.457486139106618e-07, "loss": 15.9958, "step": 55660 }, { "epoch": 0.8776880872800655, "grad_norm": 69.47911021269785, "learning_rate": 7.438539474624151e-07, "loss": 16.0684, "step": 55670 }, { "epoch": 0.8778457463580753, "grad_norm": 69.12682020690418, "learning_rate": 7.419615979151296e-07, "loss": 15.4302, "step": 55680 }, { "epoch": 0.878003405436085, "grad_norm": 71.31520399933353, "learning_rate": 7.400715657424828e-07, "loss": 15.5002, "step": 55690 }, { "epoch": 0.8781610645140947, "grad_norm": 72.93181158546805, "learning_rate": 7.381838514175665e-07, "loss": 15.3054, "step": 55700 }, { "epoch": 0.8783187235921044, "grad_norm": 73.55768876357571, "learning_rate": 7.362984554128905e-07, "loss": 15.4185, "step": 55710 }, { "epoch": 0.8784763826701142, "grad_norm": 71.89069530614077, "learning_rate": 7.344153782003894e-07, "loss": 15.721, "step": 55720 }, { "epoch": 0.8786340417481239, "grad_norm": 80.47715547105554, "learning_rate": 7.325346202514149e-07, "loss": 15.6494, "step": 55730 }, { "epoch": 0.8787917008261336, "grad_norm": 72.71240443617516, "learning_rate": 7.306561820367408e-07, "loss": 15.7567, "step": 55740 }, { "epoch": 0.8789493599041432, "grad_norm": 74.29456690283001, "learning_rate": 7.287800640265552e-07, "loss": 15.4114, "step": 55750 }, { "epoch": 0.879107018982153, "grad_norm": 70.23122690574222, "learning_rate": 7.269062666904703e-07, "loss": 16.0438, "step": 55760 }, { "epoch": 0.8792646780601627, "grad_norm": 71.8159415029164, "learning_rate": 7.250347904975119e-07, "loss": 15.2933, "step": 55770 }, { "epoch": 0.8794223371381724, "grad_norm": 72.33493313548513, "learning_rate": 7.231656359161321e-07, "loss": 15.453, "step": 55780 }, { "epoch": 0.8795799962161821, "grad_norm": 74.5746435360231, "learning_rate": 7.212988034141955e-07, "loss": 16.2793, "step": 55790 }, { "epoch": 0.8797376552941918, "grad_norm": 76.78706392555976, "learning_rate": 7.194342934589893e-07, "loss": 15.5478, "step": 55800 }, { "epoch": 0.8798953143722016, "grad_norm": 75.4923185206007, "learning_rate": 7.175721065172158e-07, "loss": 15.5444, "step": 55810 }, { "epoch": 0.8800529734502113, "grad_norm": 82.0864584169336, "learning_rate": 7.157122430550001e-07, "loss": 15.7579, "step": 55820 }, { "epoch": 0.880210632528221, "grad_norm": 76.30719944645197, "learning_rate": 7.138547035378852e-07, "loss": 15.8509, "step": 55830 }, { "epoch": 0.8803682916062306, "grad_norm": 73.74648061119751, "learning_rate": 7.119994884308268e-07, "loss": 15.6363, "step": 55840 }, { "epoch": 0.8805259506842404, "grad_norm": 73.35656798660017, "learning_rate": 7.101465981982059e-07, "loss": 15.1578, "step": 55850 }, { "epoch": 0.8806836097622501, "grad_norm": 70.22572829487726, "learning_rate": 7.082960333038158e-07, "loss": 15.1038, "step": 55860 }, { "epoch": 0.8808412688402598, "grad_norm": 80.63975569342507, "learning_rate": 7.064477942108738e-07, "loss": 16.0019, "step": 55870 }, { "epoch": 0.8809989279182695, "grad_norm": 67.83819919205109, "learning_rate": 7.046018813820077e-07, "loss": 15.2605, "step": 55880 }, { "epoch": 0.8811565869962793, "grad_norm": 64.90534438189829, "learning_rate": 7.027582952792711e-07, "loss": 15.6755, "step": 55890 }, { "epoch": 0.881314246074289, "grad_norm": 75.82018009973876, "learning_rate": 7.009170363641282e-07, "loss": 15.7865, "step": 55900 }, { "epoch": 0.8814719051522987, "grad_norm": 71.92837990826682, "learning_rate": 6.990781050974649e-07, "loss": 15.5436, "step": 55910 }, { "epoch": 0.8816295642303084, "grad_norm": 72.06707623376546, "learning_rate": 6.972415019395807e-07, "loss": 15.4982, "step": 55920 }, { "epoch": 0.881787223308318, "grad_norm": 69.40160721566254, "learning_rate": 6.954072273501966e-07, "loss": 15.7734, "step": 55930 }, { "epoch": 0.8819448823863278, "grad_norm": 72.09259612971348, "learning_rate": 6.935752817884489e-07, "loss": 15.4618, "step": 55940 }, { "epoch": 0.8821025414643375, "grad_norm": 76.43431855273492, "learning_rate": 6.917456657128929e-07, "loss": 15.8071, "step": 55950 }, { "epoch": 0.8822602005423472, "grad_norm": 73.01236830981733, "learning_rate": 6.899183795814967e-07, "loss": 15.4581, "step": 55960 }, { "epoch": 0.8824178596203569, "grad_norm": 72.78079801514643, "learning_rate": 6.880934238516457e-07, "loss": 16.4362, "step": 55970 }, { "epoch": 0.8825755186983667, "grad_norm": 76.04554605383174, "learning_rate": 6.862707989801465e-07, "loss": 15.5671, "step": 55980 }, { "epoch": 0.8827331777763764, "grad_norm": 68.75798357963161, "learning_rate": 6.844505054232176e-07, "loss": 16.009, "step": 55990 }, { "epoch": 0.8828908368543861, "grad_norm": 69.04129322167734, "learning_rate": 6.826325436364967e-07, "loss": 15.5262, "step": 56000 }, { "epoch": 0.8830484959323958, "grad_norm": 80.20579509989787, "learning_rate": 6.808169140750353e-07, "loss": 15.6529, "step": 56010 }, { "epoch": 0.8832061550104054, "grad_norm": 70.1546683108538, "learning_rate": 6.790036171933035e-07, "loss": 15.986, "step": 56020 }, { "epoch": 0.8833638140884152, "grad_norm": 72.827358820161, "learning_rate": 6.771926534451878e-07, "loss": 16.0466, "step": 56030 }, { "epoch": 0.8835214731664249, "grad_norm": 73.36814105545176, "learning_rate": 6.753840232839892e-07, "loss": 16.1864, "step": 56040 }, { "epoch": 0.8836791322444346, "grad_norm": 72.67997056186184, "learning_rate": 6.735777271624233e-07, "loss": 15.4883, "step": 56050 }, { "epoch": 0.8838367913224443, "grad_norm": 73.7714442911318, "learning_rate": 6.717737655326262e-07, "loss": 14.9495, "step": 56060 }, { "epoch": 0.8839944504004541, "grad_norm": 78.05330783557072, "learning_rate": 6.699721388461422e-07, "loss": 15.6419, "step": 56070 }, { "epoch": 0.8841521094784638, "grad_norm": 79.06124712833005, "learning_rate": 6.681728475539406e-07, "loss": 15.8585, "step": 56080 }, { "epoch": 0.8843097685564735, "grad_norm": 77.0999571273823, "learning_rate": 6.663758921063978e-07, "loss": 15.634, "step": 56090 }, { "epoch": 0.8844674276344832, "grad_norm": 74.9858340791467, "learning_rate": 6.645812729533074e-07, "loss": 15.1178, "step": 56100 }, { "epoch": 0.884625086712493, "grad_norm": 71.670152057579, "learning_rate": 6.627889905438822e-07, "loss": 15.0988, "step": 56110 }, { "epoch": 0.8847827457905026, "grad_norm": 68.82315474037046, "learning_rate": 6.609990453267456e-07, "loss": 16.1395, "step": 56120 }, { "epoch": 0.8849404048685123, "grad_norm": 73.91690485572012, "learning_rate": 6.592114377499414e-07, "loss": 15.8894, "step": 56130 }, { "epoch": 0.885098063946522, "grad_norm": 72.656189061241, "learning_rate": 6.574261682609207e-07, "loss": 15.7848, "step": 56140 }, { "epoch": 0.8852557230245317, "grad_norm": 75.38039661767668, "learning_rate": 6.556432373065568e-07, "loss": 15.628, "step": 56150 }, { "epoch": 0.8854133821025415, "grad_norm": 72.76592823769496, "learning_rate": 6.538626453331298e-07, "loss": 15.1245, "step": 56160 }, { "epoch": 0.8855710411805512, "grad_norm": 75.60766227798743, "learning_rate": 6.520843927863441e-07, "loss": 15.8644, "step": 56170 }, { "epoch": 0.8857287002585609, "grad_norm": 68.77429300526993, "learning_rate": 6.50308480111308e-07, "loss": 15.5756, "step": 56180 }, { "epoch": 0.8858863593365706, "grad_norm": 78.26707022031546, "learning_rate": 6.485349077525527e-07, "loss": 16.1735, "step": 56190 }, { "epoch": 0.8860440184145804, "grad_norm": 73.76359316465259, "learning_rate": 6.467636761540174e-07, "loss": 15.6006, "step": 56200 }, { "epoch": 0.88620167749259, "grad_norm": 76.11776982071238, "learning_rate": 6.449947857590621e-07, "loss": 15.7347, "step": 56210 }, { "epoch": 0.8863593365705997, "grad_norm": 80.07337665114974, "learning_rate": 6.432282370104526e-07, "loss": 16.8877, "step": 56220 }, { "epoch": 0.8865169956486094, "grad_norm": 68.27532669214023, "learning_rate": 6.414640303503739e-07, "loss": 15.181, "step": 56230 }, { "epoch": 0.8866746547266191, "grad_norm": 69.01107346711275, "learning_rate": 6.397021662204272e-07, "loss": 15.7477, "step": 56240 }, { "epoch": 0.8868323138046289, "grad_norm": 91.04297935861317, "learning_rate": 6.379426450616177e-07, "loss": 16.1667, "step": 56250 }, { "epoch": 0.8869899728826386, "grad_norm": 74.92201759555553, "learning_rate": 6.361854673143763e-07, "loss": 15.614, "step": 56260 }, { "epoch": 0.8871476319606483, "grad_norm": 70.21188112240753, "learning_rate": 6.344306334185357e-07, "loss": 15.6151, "step": 56270 }, { "epoch": 0.887305291038658, "grad_norm": 70.1859731145135, "learning_rate": 6.326781438133523e-07, "loss": 15.7304, "step": 56280 }, { "epoch": 0.8874629501166678, "grad_norm": 75.1844956044162, "learning_rate": 6.309279989374873e-07, "loss": 15.3064, "step": 56290 }, { "epoch": 0.8876206091946774, "grad_norm": 71.11889177547631, "learning_rate": 6.291801992290202e-07, "loss": 15.3393, "step": 56300 }, { "epoch": 0.8877782682726871, "grad_norm": 70.93151559856805, "learning_rate": 6.274347451254414e-07, "loss": 14.9246, "step": 56310 }, { "epoch": 0.8879359273506968, "grad_norm": 74.81368356464176, "learning_rate": 6.256916370636534e-07, "loss": 15.8067, "step": 56320 }, { "epoch": 0.8880935864287066, "grad_norm": 71.19065514378909, "learning_rate": 6.239508754799739e-07, "loss": 15.7215, "step": 56330 }, { "epoch": 0.8882512455067163, "grad_norm": 73.60254203458422, "learning_rate": 6.222124608101343e-07, "loss": 15.8886, "step": 56340 }, { "epoch": 0.888408904584726, "grad_norm": 70.50161579135772, "learning_rate": 6.204763934892733e-07, "loss": 15.6781, "step": 56350 }, { "epoch": 0.8885665636627357, "grad_norm": 75.13701205520105, "learning_rate": 6.187426739519442e-07, "loss": 15.8402, "step": 56360 }, { "epoch": 0.8887242227407454, "grad_norm": 70.24365217598066, "learning_rate": 6.170113026321156e-07, "loss": 15.6778, "step": 56370 }, { "epoch": 0.8888818818187552, "grad_norm": 82.98788153432271, "learning_rate": 6.152822799631642e-07, "loss": 15.4842, "step": 56380 }, { "epoch": 0.8890395408967648, "grad_norm": 68.07301630790472, "learning_rate": 6.135556063778836e-07, "loss": 15.4522, "step": 56390 }, { "epoch": 0.8891971999747745, "grad_norm": 71.20956491802964, "learning_rate": 6.118312823084738e-07, "loss": 15.2968, "step": 56400 }, { "epoch": 0.8893548590527842, "grad_norm": 73.55398347633498, "learning_rate": 6.101093081865495e-07, "loss": 15.629, "step": 56410 }, { "epoch": 0.889512518130794, "grad_norm": 68.33519184190082, "learning_rate": 6.083896844431402e-07, "loss": 15.6583, "step": 56420 }, { "epoch": 0.8896701772088037, "grad_norm": 71.68293576022087, "learning_rate": 6.066724115086797e-07, "loss": 15.1642, "step": 56430 }, { "epoch": 0.8898278362868134, "grad_norm": 70.1148968794131, "learning_rate": 6.049574898130206e-07, "loss": 16.0178, "step": 56440 }, { "epoch": 0.8899854953648231, "grad_norm": 71.39574572708146, "learning_rate": 6.03244919785424e-07, "loss": 15.3475, "step": 56450 }, { "epoch": 0.8901431544428329, "grad_norm": 78.31996639210223, "learning_rate": 6.015347018545603e-07, "loss": 15.4679, "step": 56460 }, { "epoch": 0.8903008135208426, "grad_norm": 77.14638818937313, "learning_rate": 5.99826836448516e-07, "loss": 15.4256, "step": 56470 }, { "epoch": 0.8904584725988522, "grad_norm": 73.14147841275268, "learning_rate": 5.981213239947847e-07, "loss": 15.1338, "step": 56480 }, { "epoch": 0.8906161316768619, "grad_norm": 75.22920044486133, "learning_rate": 5.964181649202705e-07, "loss": 15.4243, "step": 56490 }, { "epoch": 0.8907737907548716, "grad_norm": 73.11685612487078, "learning_rate": 5.947173596512934e-07, "loss": 16.0186, "step": 56500 }, { "epoch": 0.8909314498328814, "grad_norm": 72.34138499670397, "learning_rate": 5.930189086135784e-07, "loss": 15.7597, "step": 56510 }, { "epoch": 0.8910891089108911, "grad_norm": 74.98527236494294, "learning_rate": 5.913228122322645e-07, "loss": 15.2141, "step": 56520 }, { "epoch": 0.8912467679889008, "grad_norm": 73.78477759319362, "learning_rate": 5.896290709319019e-07, "loss": 15.8113, "step": 56530 }, { "epoch": 0.8914044270669105, "grad_norm": 73.8632497740955, "learning_rate": 5.879376851364505e-07, "loss": 15.1533, "step": 56540 }, { "epoch": 0.8915620861449203, "grad_norm": 75.73951910468489, "learning_rate": 5.86248655269277e-07, "loss": 16.2239, "step": 56550 }, { "epoch": 0.89171974522293, "grad_norm": 72.72524238962437, "learning_rate": 5.845619817531656e-07, "loss": 15.5565, "step": 56560 }, { "epoch": 0.8918774043009396, "grad_norm": 70.2867486502653, "learning_rate": 5.828776650103029e-07, "loss": 15.4351, "step": 56570 }, { "epoch": 0.8920350633789493, "grad_norm": 75.01674064861554, "learning_rate": 5.811957054622929e-07, "loss": 15.8698, "step": 56580 }, { "epoch": 0.892192722456959, "grad_norm": 71.80180711209445, "learning_rate": 5.795161035301422e-07, "loss": 15.9611, "step": 56590 }, { "epoch": 0.8923503815349688, "grad_norm": 72.09596183305125, "learning_rate": 5.778388596342755e-07, "loss": 15.7868, "step": 56600 }, { "epoch": 0.8925080406129785, "grad_norm": 77.1045311286869, "learning_rate": 5.76163974194518e-07, "loss": 15.3949, "step": 56610 }, { "epoch": 0.8926656996909882, "grad_norm": 75.19044013696511, "learning_rate": 5.744914476301122e-07, "loss": 15.7306, "step": 56620 }, { "epoch": 0.8928233587689979, "grad_norm": 75.88514224004585, "learning_rate": 5.728212803597077e-07, "loss": 15.5453, "step": 56630 }, { "epoch": 0.8929810178470077, "grad_norm": 77.59798370956665, "learning_rate": 5.711534728013613e-07, "loss": 15.5235, "step": 56640 }, { "epoch": 0.8931386769250174, "grad_norm": 79.37835671665405, "learning_rate": 5.694880253725432e-07, "loss": 15.5046, "step": 56650 }, { "epoch": 0.893296336003027, "grad_norm": 69.18467441823327, "learning_rate": 5.67824938490128e-07, "loss": 15.7582, "step": 56660 }, { "epoch": 0.8934539950810367, "grad_norm": 70.66993410899042, "learning_rate": 5.661642125704059e-07, "loss": 15.6151, "step": 56670 }, { "epoch": 0.8936116541590465, "grad_norm": 67.97807455927102, "learning_rate": 5.645058480290677e-07, "loss": 15.0144, "step": 56680 }, { "epoch": 0.8937693132370562, "grad_norm": 74.79709031436306, "learning_rate": 5.628498452812214e-07, "loss": 15.2773, "step": 56690 }, { "epoch": 0.8939269723150659, "grad_norm": 68.76300114135934, "learning_rate": 5.611962047413777e-07, "loss": 15.0752, "step": 56700 }, { "epoch": 0.8940846313930756, "grad_norm": 70.36496793917775, "learning_rate": 5.59544926823461e-07, "loss": 15.1332, "step": 56710 }, { "epoch": 0.8942422904710853, "grad_norm": 78.18852330192503, "learning_rate": 5.578960119407984e-07, "loss": 15.4928, "step": 56720 }, { "epoch": 0.8943999495490951, "grad_norm": 75.04703722455463, "learning_rate": 5.56249460506132e-07, "loss": 15.3482, "step": 56730 }, { "epoch": 0.8945576086271048, "grad_norm": 70.66486413797371, "learning_rate": 5.546052729316076e-07, "loss": 15.4648, "step": 56740 }, { "epoch": 0.8947152677051144, "grad_norm": 71.21686008099444, "learning_rate": 5.529634496287827e-07, "loss": 15.493, "step": 56750 }, { "epoch": 0.8948729267831241, "grad_norm": 69.79493588469256, "learning_rate": 5.513239910086198e-07, "loss": 15.6552, "step": 56760 }, { "epoch": 0.8950305858611339, "grad_norm": 70.85653173364096, "learning_rate": 5.496868974814895e-07, "loss": 15.5284, "step": 56770 }, { "epoch": 0.8951882449391436, "grad_norm": 73.66565294698462, "learning_rate": 5.480521694571749e-07, "loss": 15.1815, "step": 56780 }, { "epoch": 0.8953459040171533, "grad_norm": 71.34851695401879, "learning_rate": 5.464198073448612e-07, "loss": 15.8583, "step": 56790 }, { "epoch": 0.895503563095163, "grad_norm": 73.11009788434473, "learning_rate": 5.447898115531458e-07, "loss": 15.7108, "step": 56800 }, { "epoch": 0.8956612221731727, "grad_norm": 68.91723343863272, "learning_rate": 5.431621824900313e-07, "loss": 15.2211, "step": 56810 }, { "epoch": 0.8958188812511825, "grad_norm": 69.0743941747788, "learning_rate": 5.415369205629273e-07, "loss": 14.9842, "step": 56820 }, { "epoch": 0.8959765403291922, "grad_norm": 69.61200978620083, "learning_rate": 5.399140261786551e-07, "loss": 15.5374, "step": 56830 }, { "epoch": 0.8961341994072018, "grad_norm": 74.14680688271075, "learning_rate": 5.382934997434397e-07, "loss": 15.5203, "step": 56840 }, { "epoch": 0.8962918584852115, "grad_norm": 73.93334996124244, "learning_rate": 5.366753416629134e-07, "loss": 15.4091, "step": 56850 }, { "epoch": 0.8964495175632213, "grad_norm": 75.75811596456582, "learning_rate": 5.350595523421176e-07, "loss": 15.9927, "step": 56860 }, { "epoch": 0.896607176641231, "grad_norm": 68.89563323901507, "learning_rate": 5.334461321854967e-07, "loss": 15.4678, "step": 56870 }, { "epoch": 0.8967648357192407, "grad_norm": 74.90381498923874, "learning_rate": 5.318350815969098e-07, "loss": 16.0545, "step": 56880 }, { "epoch": 0.8969224947972504, "grad_norm": 73.14565343109679, "learning_rate": 5.302264009796143e-07, "loss": 16.0236, "step": 56890 }, { "epoch": 0.8970801538752602, "grad_norm": 85.8896286310813, "learning_rate": 5.286200907362793e-07, "loss": 15.9277, "step": 56900 }, { "epoch": 0.8972378129532699, "grad_norm": 77.31940467125467, "learning_rate": 5.270161512689786e-07, "loss": 15.8644, "step": 56910 }, { "epoch": 0.8973954720312796, "grad_norm": 68.77397344211188, "learning_rate": 5.254145829791946e-07, "loss": 15.2277, "step": 56920 }, { "epoch": 0.8975531311092892, "grad_norm": 71.29558433750229, "learning_rate": 5.238153862678163e-07, "loss": 15.457, "step": 56930 }, { "epoch": 0.8977107901872989, "grad_norm": 68.22715572334353, "learning_rate": 5.222185615351349e-07, "loss": 15.4542, "step": 56940 }, { "epoch": 0.8978684492653087, "grad_norm": 78.73261970905762, "learning_rate": 5.206241091808528e-07, "loss": 15.9558, "step": 56950 }, { "epoch": 0.8980261083433184, "grad_norm": 69.49485862150644, "learning_rate": 5.190320296040752e-07, "loss": 14.8852, "step": 56960 }, { "epoch": 0.8981837674213281, "grad_norm": 69.55287409862342, "learning_rate": 5.174423232033165e-07, "loss": 15.4229, "step": 56970 }, { "epoch": 0.8983414264993378, "grad_norm": 73.70699914974952, "learning_rate": 5.158549903764931e-07, "loss": 15.5763, "step": 56980 }, { "epoch": 0.8984990855773476, "grad_norm": 71.23142203758674, "learning_rate": 5.142700315209304e-07, "loss": 15.2701, "step": 56990 }, { "epoch": 0.8986567446553573, "grad_norm": 78.9223956460877, "learning_rate": 5.126874470333587e-07, "loss": 16.3429, "step": 57000 }, { "epoch": 0.898814403733367, "grad_norm": 67.83971494551461, "learning_rate": 5.111072373099146e-07, "loss": 15.4439, "step": 57010 }, { "epoch": 0.8989720628113766, "grad_norm": 77.29316525739485, "learning_rate": 5.095294027461372e-07, "loss": 15.3307, "step": 57020 }, { "epoch": 0.8991297218893864, "grad_norm": 71.46267040390775, "learning_rate": 5.079539437369752e-07, "loss": 15.7357, "step": 57030 }, { "epoch": 0.8992873809673961, "grad_norm": 71.63205640044653, "learning_rate": 5.063808606767817e-07, "loss": 15.7221, "step": 57040 }, { "epoch": 0.8994450400454058, "grad_norm": 72.786658846452, "learning_rate": 5.04810153959312e-07, "loss": 15.1991, "step": 57050 }, { "epoch": 0.8996026991234155, "grad_norm": 67.14820699241605, "learning_rate": 5.032418239777303e-07, "loss": 15.203, "step": 57060 }, { "epoch": 0.8997603582014252, "grad_norm": 70.66733901578233, "learning_rate": 5.01675871124605e-07, "loss": 15.5976, "step": 57070 }, { "epoch": 0.899918017279435, "grad_norm": 73.77905654977585, "learning_rate": 5.00112295791908e-07, "loss": 15.6527, "step": 57080 }, { "epoch": 0.9000756763574447, "grad_norm": 71.99147795094626, "learning_rate": 4.985510983710162e-07, "loss": 16.0, "step": 57090 }, { "epoch": 0.9002333354354544, "grad_norm": 72.7218190592175, "learning_rate": 4.969922792527138e-07, "loss": 15.3697, "step": 57100 }, { "epoch": 0.900390994513464, "grad_norm": 66.8201974757855, "learning_rate": 4.954358388271863e-07, "loss": 15.1741, "step": 57110 }, { "epoch": 0.9005486535914738, "grad_norm": 71.02145356574141, "learning_rate": 4.938817774840265e-07, "loss": 15.3795, "step": 57120 }, { "epoch": 0.9007063126694835, "grad_norm": 80.04369366667902, "learning_rate": 4.923300956122312e-07, "loss": 16.1783, "step": 57130 }, { "epoch": 0.9008639717474932, "grad_norm": 76.67695160938472, "learning_rate": 4.907807936002007e-07, "loss": 15.485, "step": 57140 }, { "epoch": 0.9010216308255029, "grad_norm": 70.69205083826724, "learning_rate": 4.892338718357404e-07, "loss": 15.4546, "step": 57150 }, { "epoch": 0.9011792899035126, "grad_norm": 68.68462461651079, "learning_rate": 4.876893307060571e-07, "loss": 15.2135, "step": 57160 }, { "epoch": 0.9013369489815224, "grad_norm": 79.1758166037522, "learning_rate": 4.861471705977661e-07, "loss": 15.7434, "step": 57170 }, { "epoch": 0.9014946080595321, "grad_norm": 80.53602981530946, "learning_rate": 4.846073918968841e-07, "loss": 15.7502, "step": 57180 }, { "epoch": 0.9016522671375418, "grad_norm": 72.51555419451682, "learning_rate": 4.830699949888329e-07, "loss": 15.6118, "step": 57190 }, { "epoch": 0.9018099262155514, "grad_norm": 71.47041961746632, "learning_rate": 4.815349802584369e-07, "loss": 15.3857, "step": 57200 }, { "epoch": 0.9019675852935612, "grad_norm": 72.16978095026084, "learning_rate": 4.800023480899241e-07, "loss": 15.4562, "step": 57210 }, { "epoch": 0.9021252443715709, "grad_norm": 69.6592927539237, "learning_rate": 4.78472098866929e-07, "loss": 15.2206, "step": 57220 }, { "epoch": 0.9022829034495806, "grad_norm": 73.431768260184, "learning_rate": 4.76944232972485e-07, "loss": 15.464, "step": 57230 }, { "epoch": 0.9024405625275903, "grad_norm": 72.78355410123413, "learning_rate": 4.7541875078903175e-07, "loss": 15.6772, "step": 57240 }, { "epoch": 0.9025982216056001, "grad_norm": 76.37705767732925, "learning_rate": 4.7389565269841507e-07, "loss": 15.2634, "step": 57250 }, { "epoch": 0.9027558806836098, "grad_norm": 71.32638170643592, "learning_rate": 4.723749390818766e-07, "loss": 15.3878, "step": 57260 }, { "epoch": 0.9029135397616195, "grad_norm": 71.6049297462179, "learning_rate": 4.7085661032006845e-07, "loss": 15.3188, "step": 57270 }, { "epoch": 0.9030711988396292, "grad_norm": 79.08323672481717, "learning_rate": 4.6934066679304113e-07, "loss": 15.8306, "step": 57280 }, { "epoch": 0.9032288579176388, "grad_norm": 76.84720462055238, "learning_rate": 4.6782710888024883e-07, "loss": 15.7848, "step": 57290 }, { "epoch": 0.9033865169956486, "grad_norm": 78.85373159355632, "learning_rate": 4.66315936960553e-07, "loss": 15.5217, "step": 57300 }, { "epoch": 0.9035441760736583, "grad_norm": 76.58558723385089, "learning_rate": 4.648071514122088e-07, "loss": 15.5973, "step": 57310 }, { "epoch": 0.903701835151668, "grad_norm": 77.42000278663242, "learning_rate": 4.6330075261288297e-07, "loss": 15.5828, "step": 57320 }, { "epoch": 0.9038594942296777, "grad_norm": 70.3380866402462, "learning_rate": 4.6179674093964176e-07, "loss": 15.3999, "step": 57330 }, { "epoch": 0.9040171533076875, "grad_norm": 73.88383260896114, "learning_rate": 4.602951167689529e-07, "loss": 15.224, "step": 57340 }, { "epoch": 0.9041748123856972, "grad_norm": 71.03269199984635, "learning_rate": 4.587958804766868e-07, "loss": 14.8934, "step": 57350 }, { "epoch": 0.9043324714637069, "grad_norm": 71.80844333877948, "learning_rate": 4.5729903243811655e-07, "loss": 15.0233, "step": 57360 }, { "epoch": 0.9044901305417166, "grad_norm": 71.48731900372741, "learning_rate": 4.5580457302791705e-07, "loss": 16.0791, "step": 57370 }, { "epoch": 0.9046477896197263, "grad_norm": 75.186161384449, "learning_rate": 4.543125026201678e-07, "loss": 15.7335, "step": 57380 }, { "epoch": 0.904805448697736, "grad_norm": 73.64594044038755, "learning_rate": 4.5282282158834455e-07, "loss": 15.0679, "step": 57390 }, { "epoch": 0.9049631077757457, "grad_norm": 77.75011174603563, "learning_rate": 4.513355303053313e-07, "loss": 15.9334, "step": 57400 }, { "epoch": 0.9051207668537554, "grad_norm": 76.35044031348724, "learning_rate": 4.4985062914340906e-07, "loss": 15.4731, "step": 57410 }, { "epoch": 0.9052784259317651, "grad_norm": 72.51257366518331, "learning_rate": 4.4836811847426496e-07, "loss": 15.4983, "step": 57420 }, { "epoch": 0.9054360850097749, "grad_norm": 73.59589092563343, "learning_rate": 4.468879986689845e-07, "loss": 15.5273, "step": 57430 }, { "epoch": 0.9055937440877846, "grad_norm": 69.08297101396924, "learning_rate": 4.454102700980545e-07, "loss": 15.4065, "step": 57440 }, { "epoch": 0.9057514031657943, "grad_norm": 74.71143674195636, "learning_rate": 4.4393493313136695e-07, "loss": 15.4316, "step": 57450 }, { "epoch": 0.905909062243804, "grad_norm": 68.53438951118841, "learning_rate": 4.4246198813820864e-07, "loss": 15.5174, "step": 57460 }, { "epoch": 0.9060667213218138, "grad_norm": 79.34835359166509, "learning_rate": 4.409914354872769e-07, "loss": 15.8949, "step": 57470 }, { "epoch": 0.9062243803998234, "grad_norm": 73.89715687751732, "learning_rate": 4.3952327554666053e-07, "loss": 15.5532, "step": 57480 }, { "epoch": 0.9063820394778331, "grad_norm": 76.06245334028118, "learning_rate": 4.3805750868385566e-07, "loss": 15.4904, "step": 57490 }, { "epoch": 0.9065396985558428, "grad_norm": 74.36945163909601, "learning_rate": 4.365941352657577e-07, "loss": 15.8268, "step": 57500 }, { "epoch": 0.9066973576338525, "grad_norm": 77.88024917610385, "learning_rate": 4.3513315565866353e-07, "loss": 15.0958, "step": 57510 }, { "epoch": 0.9068550167118623, "grad_norm": 73.37772746197601, "learning_rate": 4.3367457022826744e-07, "loss": 15.9088, "step": 57520 }, { "epoch": 0.907012675789872, "grad_norm": 76.41359925388369, "learning_rate": 4.3221837933966946e-07, "loss": 15.7976, "step": 57530 }, { "epoch": 0.9071703348678817, "grad_norm": 68.2452592994581, "learning_rate": 4.3076458335736815e-07, "loss": 15.2574, "step": 57540 }, { "epoch": 0.9073279939458914, "grad_norm": 76.8430096370303, "learning_rate": 4.2931318264526125e-07, "loss": 15.4193, "step": 57550 }, { "epoch": 0.9074856530239012, "grad_norm": 69.735105364164, "learning_rate": 4.2786417756664925e-07, "loss": 15.7866, "step": 57560 }, { "epoch": 0.9076433121019108, "grad_norm": 76.32207151750556, "learning_rate": 4.264175684842309e-07, "loss": 15.5505, "step": 57570 }, { "epoch": 0.9078009711799205, "grad_norm": 69.62928272025952, "learning_rate": 4.249733557601066e-07, "loss": 15.5076, "step": 57580 }, { "epoch": 0.9079586302579302, "grad_norm": 70.25019834798564, "learning_rate": 4.235315397557749e-07, "loss": 15.3487, "step": 57590 }, { "epoch": 0.9081162893359399, "grad_norm": 70.30207157440134, "learning_rate": 4.2209212083213936e-07, "loss": 15.423, "step": 57600 }, { "epoch": 0.9082739484139497, "grad_norm": 73.6180437687013, "learning_rate": 4.2065509934949735e-07, "loss": 15.6645, "step": 57610 }, { "epoch": 0.9084316074919594, "grad_norm": 67.69825352380701, "learning_rate": 4.192204756675489e-07, "loss": 15.2782, "step": 57620 }, { "epoch": 0.9085892665699691, "grad_norm": 68.41737278805763, "learning_rate": 4.177882501453956e-07, "loss": 15.4281, "step": 57630 }, { "epoch": 0.9087469256479788, "grad_norm": 70.92922326396499, "learning_rate": 4.163584231415363e-07, "loss": 15.0981, "step": 57640 }, { "epoch": 0.9089045847259886, "grad_norm": 73.59811138541613, "learning_rate": 4.1493099501387025e-07, "loss": 15.1729, "step": 57650 }, { "epoch": 0.9090622438039982, "grad_norm": 76.96368891047727, "learning_rate": 4.1350596611969606e-07, "loss": 15.6935, "step": 57660 }, { "epoch": 0.9092199028820079, "grad_norm": 72.99104160833649, "learning_rate": 4.120833368157129e-07, "loss": 15.4646, "step": 57670 }, { "epoch": 0.9093775619600176, "grad_norm": 72.65948985442436, "learning_rate": 4.1066310745801585e-07, "loss": 15.1345, "step": 57680 }, { "epoch": 0.9095352210380274, "grad_norm": 73.0825693786896, "learning_rate": 4.0924527840210503e-07, "loss": 15.3851, "step": 57690 }, { "epoch": 0.9096928801160371, "grad_norm": 80.02738413372487, "learning_rate": 4.0782985000287325e-07, "loss": 15.775, "step": 57700 }, { "epoch": 0.9098505391940468, "grad_norm": 70.26854711871856, "learning_rate": 4.0641682261461703e-07, "loss": 15.2258, "step": 57710 }, { "epoch": 0.9100081982720565, "grad_norm": 65.72235109756843, "learning_rate": 4.0500619659103123e-07, "loss": 15.4147, "step": 57720 }, { "epoch": 0.9101658573500662, "grad_norm": 70.71588094407132, "learning_rate": 4.03597972285209e-07, "loss": 15.1303, "step": 57730 }, { "epoch": 0.910323516428076, "grad_norm": 70.49125701675193, "learning_rate": 4.021921500496406e-07, "loss": 15.4732, "step": 57740 }, { "epoch": 0.9104811755060856, "grad_norm": 73.05132316378929, "learning_rate": 4.00788730236219e-07, "loss": 15.6658, "step": 57750 }, { "epoch": 0.9106388345840953, "grad_norm": 82.32358114709355, "learning_rate": 3.9938771319623095e-07, "loss": 15.5787, "step": 57760 }, { "epoch": 0.910796493662105, "grad_norm": 67.96356966747636, "learning_rate": 3.979890992803659e-07, "loss": 15.0644, "step": 57770 }, { "epoch": 0.9109541527401148, "grad_norm": 72.60165096652665, "learning_rate": 3.965928888387094e-07, "loss": 15.4904, "step": 57780 }, { "epoch": 0.9111118118181245, "grad_norm": 72.52918688382204, "learning_rate": 3.951990822207474e-07, "loss": 15.9568, "step": 57790 }, { "epoch": 0.9112694708961342, "grad_norm": 71.44151081483784, "learning_rate": 3.9380767977536094e-07, "loss": 15.1312, "step": 57800 }, { "epoch": 0.9114271299741439, "grad_norm": 65.73378057930113, "learning_rate": 3.924186818508313e-07, "loss": 15.1267, "step": 57810 }, { "epoch": 0.9115847890521537, "grad_norm": 79.03107679212262, "learning_rate": 3.9103208879483934e-07, "loss": 15.9304, "step": 57820 }, { "epoch": 0.9117424481301634, "grad_norm": 68.46094832392538, "learning_rate": 3.896479009544607e-07, "loss": 15.2874, "step": 57830 }, { "epoch": 0.911900107208173, "grad_norm": 74.59266174303286, "learning_rate": 3.882661186761738e-07, "loss": 15.4628, "step": 57840 }, { "epoch": 0.9120577662861827, "grad_norm": 79.41119480265243, "learning_rate": 3.8688674230584865e-07, "loss": 15.3407, "step": 57850 }, { "epoch": 0.9122154253641924, "grad_norm": 74.83664106060395, "learning_rate": 3.8550977218875794e-07, "loss": 15.2034, "step": 57860 }, { "epoch": 0.9123730844422022, "grad_norm": 73.91211909583105, "learning_rate": 3.841352086695682e-07, "loss": 15.3334, "step": 57870 }, { "epoch": 0.9125307435202119, "grad_norm": 71.46343397722248, "learning_rate": 3.8276305209234864e-07, "loss": 15.1191, "step": 57880 }, { "epoch": 0.9126884025982216, "grad_norm": 74.8410078590757, "learning_rate": 3.813933028005612e-07, "loss": 15.2742, "step": 57890 }, { "epoch": 0.9128460616762313, "grad_norm": 73.83642473048951, "learning_rate": 3.8002596113706826e-07, "loss": 15.3395, "step": 57900 }, { "epoch": 0.9130037207542411, "grad_norm": 71.20892069697705, "learning_rate": 3.7866102744412605e-07, "loss": 15.2406, "step": 57910 }, { "epoch": 0.9131613798322508, "grad_norm": 73.31214773595082, "learning_rate": 3.7729850206339236e-07, "loss": 15.4975, "step": 57920 }, { "epoch": 0.9133190389102605, "grad_norm": 68.76858825122498, "learning_rate": 3.7593838533591996e-07, "loss": 15.0079, "step": 57930 }, { "epoch": 0.9134766979882701, "grad_norm": 78.60419044593074, "learning_rate": 3.7458067760215877e-07, "loss": 15.8212, "step": 57940 }, { "epoch": 0.9136343570662798, "grad_norm": 153.44870867144354, "learning_rate": 3.732253792019569e-07, "loss": 15.5115, "step": 57950 }, { "epoch": 0.9137920161442896, "grad_norm": 70.18310064686281, "learning_rate": 3.718724904745552e-07, "loss": 15.3941, "step": 57960 }, { "epoch": 0.9139496752222993, "grad_norm": 74.18308000477779, "learning_rate": 3.705220117585995e-07, "loss": 15.6399, "step": 57970 }, { "epoch": 0.914107334300309, "grad_norm": 74.17061374115218, "learning_rate": 3.6917394339212154e-07, "loss": 15.5643, "step": 57980 }, { "epoch": 0.9142649933783187, "grad_norm": 70.39612091649826, "learning_rate": 3.6782828571256144e-07, "loss": 16.2431, "step": 57990 }, { "epoch": 0.9144226524563285, "grad_norm": 72.09399770371395, "learning_rate": 3.664850390567454e-07, "loss": 15.5062, "step": 58000 }, { "epoch": 0.9145803115343382, "grad_norm": 74.41369080171762, "learning_rate": 3.651442037609032e-07, "loss": 15.5695, "step": 58010 }, { "epoch": 0.9147379706123479, "grad_norm": 75.75941700216728, "learning_rate": 3.638057801606576e-07, "loss": 15.7066, "step": 58020 }, { "epoch": 0.9148956296903575, "grad_norm": 66.92007203559938, "learning_rate": 3.6246976859102835e-07, "loss": 14.4933, "step": 58030 }, { "epoch": 0.9150532887683673, "grad_norm": 71.52770888186242, "learning_rate": 3.6113616938643235e-07, "loss": 15.6149, "step": 58040 }, { "epoch": 0.915210947846377, "grad_norm": 75.83605493168125, "learning_rate": 3.598049828806849e-07, "loss": 15.8426, "step": 58050 }, { "epoch": 0.9153686069243867, "grad_norm": 72.63543624594237, "learning_rate": 3.5847620940698936e-07, "loss": 15.552, "step": 58060 }, { "epoch": 0.9155262660023964, "grad_norm": 75.21966410148636, "learning_rate": 3.5714984929795525e-07, "loss": 15.5473, "step": 58070 }, { "epoch": 0.9156839250804061, "grad_norm": 71.88590423665616, "learning_rate": 3.558259028855804e-07, "loss": 15.5305, "step": 58080 }, { "epoch": 0.9158415841584159, "grad_norm": 75.61223037454691, "learning_rate": 3.545043705012596e-07, "loss": 15.3521, "step": 58090 }, { "epoch": 0.9159992432364256, "grad_norm": 69.94216956454814, "learning_rate": 3.531852524757884e-07, "loss": 15.7298, "step": 58100 }, { "epoch": 0.9161569023144353, "grad_norm": 71.10733926273487, "learning_rate": 3.5186854913935144e-07, "loss": 15.499, "step": 58110 }, { "epoch": 0.9163145613924449, "grad_norm": 74.5825412643883, "learning_rate": 3.50554260821534e-07, "loss": 15.5309, "step": 58120 }, { "epoch": 0.9164722204704547, "grad_norm": 75.69370622240714, "learning_rate": 3.49242387851314e-07, "loss": 15.4641, "step": 58130 }, { "epoch": 0.9166298795484644, "grad_norm": 71.73632722267888, "learning_rate": 3.479329305570678e-07, "loss": 15.3771, "step": 58140 }, { "epoch": 0.9167875386264741, "grad_norm": 73.4933631593654, "learning_rate": 3.4662588926656084e-07, "loss": 15.4639, "step": 58150 }, { "epoch": 0.9169451977044838, "grad_norm": 77.22337799348624, "learning_rate": 3.4532126430696257e-07, "loss": 15.2798, "step": 58160 }, { "epoch": 0.9171028567824935, "grad_norm": 71.45146758724766, "learning_rate": 3.4401905600482957e-07, "loss": 16.165, "step": 58170 }, { "epoch": 0.9172605158605033, "grad_norm": 79.2192347832131, "learning_rate": 3.427192646861177e-07, "loss": 15.6063, "step": 58180 }, { "epoch": 0.917418174938513, "grad_norm": 74.47972446590046, "learning_rate": 3.414218906761779e-07, "loss": 15.3571, "step": 58190 }, { "epoch": 0.9175758340165227, "grad_norm": 68.43095416582113, "learning_rate": 3.4012693429975486e-07, "loss": 15.1107, "step": 58200 }, { "epoch": 0.9177334930945323, "grad_norm": 77.28398051635374, "learning_rate": 3.38834395880987e-07, "loss": 16.064, "step": 58210 }, { "epoch": 0.9178911521725421, "grad_norm": 69.0779204569236, "learning_rate": 3.3754427574341e-07, "loss": 15.5874, "step": 58220 }, { "epoch": 0.9180488112505518, "grad_norm": 74.21323586788432, "learning_rate": 3.362565742099544e-07, "loss": 15.5798, "step": 58230 }, { "epoch": 0.9182064703285615, "grad_norm": 73.42373091685384, "learning_rate": 3.349712916029424e-07, "loss": 15.5603, "step": 58240 }, { "epoch": 0.9183641294065712, "grad_norm": 76.55096546946075, "learning_rate": 3.3368842824409333e-07, "loss": 15.8813, "step": 58250 }, { "epoch": 0.918521788484581, "grad_norm": 75.31828147566783, "learning_rate": 3.3240798445451914e-07, "loss": 15.2319, "step": 58260 }, { "epoch": 0.9186794475625907, "grad_norm": 73.48226775916538, "learning_rate": 3.31129960554728e-07, "loss": 15.3279, "step": 58270 }, { "epoch": 0.9188371066406004, "grad_norm": 72.25819304623731, "learning_rate": 3.2985435686461955e-07, "loss": 15.4124, "step": 58280 }, { "epoch": 0.91899476571861, "grad_norm": 73.08505038430089, "learning_rate": 3.2858117370349275e-07, "loss": 15.5222, "step": 58290 }, { "epoch": 0.9191524247966197, "grad_norm": 71.2598030217685, "learning_rate": 3.2731041139003384e-07, "loss": 15.7479, "step": 58300 }, { "epoch": 0.9193100838746295, "grad_norm": 76.22641725739065, "learning_rate": 3.260420702423295e-07, "loss": 15.6245, "step": 58310 }, { "epoch": 0.9194677429526392, "grad_norm": 73.60331648920412, "learning_rate": 3.247761505778546e-07, "loss": 15.9266, "step": 58320 }, { "epoch": 0.9196254020306489, "grad_norm": 77.61717499695253, "learning_rate": 3.2351265271348243e-07, "loss": 15.9205, "step": 58330 }, { "epoch": 0.9197830611086586, "grad_norm": 77.36949786736153, "learning_rate": 3.2225157696548105e-07, "loss": 15.2906, "step": 58340 }, { "epoch": 0.9199407201866684, "grad_norm": 69.93541187196232, "learning_rate": 3.2099292364950463e-07, "loss": 15.2557, "step": 58350 }, { "epoch": 0.9200983792646781, "grad_norm": 72.90669016414903, "learning_rate": 3.1973669308061006e-07, "loss": 14.9435, "step": 58360 }, { "epoch": 0.9202560383426878, "grad_norm": 73.17712010743364, "learning_rate": 3.1848288557324137e-07, "loss": 15.5077, "step": 58370 }, { "epoch": 0.9204136974206975, "grad_norm": 73.87866516276152, "learning_rate": 3.172315014412397e-07, "loss": 15.7151, "step": 58380 }, { "epoch": 0.9205713564987072, "grad_norm": 76.84901833995563, "learning_rate": 3.1598254099783674e-07, "loss": 15.1814, "step": 58390 }, { "epoch": 0.9207290155767169, "grad_norm": 75.82925471372951, "learning_rate": 3.1473600455566244e-07, "loss": 15.6431, "step": 58400 }, { "epoch": 0.9208866746547266, "grad_norm": 68.74554334800405, "learning_rate": 3.1349189242673273e-07, "loss": 15.1958, "step": 58410 }, { "epoch": 0.9210443337327363, "grad_norm": 70.7533880539776, "learning_rate": 3.122502049224618e-07, "loss": 15.9939, "step": 58420 }, { "epoch": 0.921201992810746, "grad_norm": 75.0206633835267, "learning_rate": 3.110109423536578e-07, "loss": 14.6917, "step": 58430 }, { "epoch": 0.9213596518887558, "grad_norm": 72.33311252827193, "learning_rate": 3.097741050305181e-07, "loss": 15.3984, "step": 58440 }, { "epoch": 0.9215173109667655, "grad_norm": 70.87857291809863, "learning_rate": 3.08539693262635e-07, "loss": 15.6418, "step": 58450 }, { "epoch": 0.9216749700447752, "grad_norm": 69.57931859311763, "learning_rate": 3.0730770735899476e-07, "loss": 15.2785, "step": 58460 }, { "epoch": 0.9218326291227849, "grad_norm": 75.53466752089808, "learning_rate": 3.060781476279739e-07, "loss": 15.0121, "step": 58470 }, { "epoch": 0.9219902882007946, "grad_norm": 76.72806874637533, "learning_rate": 3.04851014377342e-07, "loss": 15.1934, "step": 58480 }, { "epoch": 0.9221479472788043, "grad_norm": 75.1069199175059, "learning_rate": 3.036263079142632e-07, "loss": 15.0346, "step": 58490 }, { "epoch": 0.922305606356814, "grad_norm": 70.22851345394582, "learning_rate": 3.024040285452934e-07, "loss": 14.8287, "step": 58500 }, { "epoch": 0.9224632654348237, "grad_norm": 71.71260343873689, "learning_rate": 3.01184176576379e-07, "loss": 15.4382, "step": 58510 }, { "epoch": 0.9226209245128334, "grad_norm": 78.58354135382014, "learning_rate": 2.9996675231286356e-07, "loss": 15.4584, "step": 58520 }, { "epoch": 0.9227785835908432, "grad_norm": 68.78291584105813, "learning_rate": 2.987517560594766e-07, "loss": 16.1155, "step": 58530 }, { "epoch": 0.9229362426688529, "grad_norm": 87.41065079709131, "learning_rate": 2.975391881203449e-07, "loss": 15.7025, "step": 58540 }, { "epoch": 0.9230939017468626, "grad_norm": 74.24566605781463, "learning_rate": 2.963290487989867e-07, "loss": 16.8247, "step": 58550 }, { "epoch": 0.9232515608248723, "grad_norm": 75.92949924653418, "learning_rate": 2.951213383983087e-07, "loss": 15.4873, "step": 58560 }, { "epoch": 0.923409219902882, "grad_norm": 69.73962035087644, "learning_rate": 2.9391605722061457e-07, "loss": 15.2428, "step": 58570 }, { "epoch": 0.9235668789808917, "grad_norm": 344.47116611646373, "learning_rate": 2.927132055675963e-07, "loss": 17.443, "step": 58580 }, { "epoch": 0.9237245380589014, "grad_norm": 75.19641200274783, "learning_rate": 2.9151278374033976e-07, "loss": 15.8338, "step": 58590 }, { "epoch": 0.9238821971369111, "grad_norm": 69.83059762876056, "learning_rate": 2.903147920393223e-07, "loss": 15.2055, "step": 58600 }, { "epoch": 0.9240398562149209, "grad_norm": 82.08959901211193, "learning_rate": 2.8911923076440975e-07, "loss": 15.56, "step": 58610 }, { "epoch": 0.9241975152929306, "grad_norm": 72.4595010031017, "learning_rate": 2.879261002148659e-07, "loss": 14.9087, "step": 58620 }, { "epoch": 0.9243551743709403, "grad_norm": 66.47859671291197, "learning_rate": 2.867354006893397e-07, "loss": 15.3992, "step": 58630 }, { "epoch": 0.92451283344895, "grad_norm": 73.41028954115947, "learning_rate": 2.855471324858783e-07, "loss": 14.9733, "step": 58640 }, { "epoch": 0.9246704925269597, "grad_norm": 67.52994580652843, "learning_rate": 2.8436129590191266e-07, "loss": 15.4342, "step": 58650 }, { "epoch": 0.9248281516049695, "grad_norm": 73.4456827013121, "learning_rate": 2.831778912342709e-07, "loss": 15.256, "step": 58660 }, { "epoch": 0.9249858106829791, "grad_norm": 72.55935697392646, "learning_rate": 2.8199691877916936e-07, "loss": 14.914, "step": 58670 }, { "epoch": 0.9251434697609888, "grad_norm": 71.42089796690571, "learning_rate": 2.808183788322194e-07, "loss": 15.3038, "step": 58680 }, { "epoch": 0.9253011288389985, "grad_norm": 73.75434499474808, "learning_rate": 2.796422716884162e-07, "loss": 14.9945, "step": 58690 }, { "epoch": 0.9254587879170083, "grad_norm": 71.49809072599541, "learning_rate": 2.784685976421542e-07, "loss": 15.3691, "step": 58700 }, { "epoch": 0.925616446995018, "grad_norm": 73.48525110132437, "learning_rate": 2.7729735698721284e-07, "loss": 14.8547, "step": 58710 }, { "epoch": 0.9257741060730277, "grad_norm": 78.65028873224082, "learning_rate": 2.7612855001676543e-07, "loss": 16.012, "step": 58720 }, { "epoch": 0.9259317651510374, "grad_norm": 69.78330903664927, "learning_rate": 2.749621770233768e-07, "loss": 15.5122, "step": 58730 }, { "epoch": 0.926089424229047, "grad_norm": 76.36156484188365, "learning_rate": 2.7379823829899897e-07, "loss": 15.5939, "step": 58740 }, { "epoch": 0.9262470833070569, "grad_norm": 68.9754702960997, "learning_rate": 2.7263673413497893e-07, "loss": 15.1765, "step": 58750 }, { "epoch": 0.9264047423850665, "grad_norm": 66.16512798731246, "learning_rate": 2.7147766482204965e-07, "loss": 15.299, "step": 58760 }, { "epoch": 0.9265624014630762, "grad_norm": 65.73555157162707, "learning_rate": 2.703210306503401e-07, "loss": 15.0078, "step": 58770 }, { "epoch": 0.9267200605410859, "grad_norm": 68.24657293912367, "learning_rate": 2.691668319093643e-07, "loss": 15.4285, "step": 58780 }, { "epoch": 0.9268777196190957, "grad_norm": 70.87725563378412, "learning_rate": 2.680150688880312e-07, "loss": 15.2973, "step": 58790 }, { "epoch": 0.9270353786971054, "grad_norm": 73.79023992041701, "learning_rate": 2.6686574187463565e-07, "loss": 15.2253, "step": 58800 }, { "epoch": 0.9271930377751151, "grad_norm": 72.20040044614338, "learning_rate": 2.657188511568676e-07, "loss": 15.511, "step": 58810 }, { "epoch": 0.9273506968531248, "grad_norm": 70.29056800707049, "learning_rate": 2.645743970218018e-07, "loss": 15.1983, "step": 58820 }, { "epoch": 0.9275083559311346, "grad_norm": 75.58626731497247, "learning_rate": 2.6343237975590795e-07, "loss": 15.2053, "step": 58830 }, { "epoch": 0.9276660150091443, "grad_norm": 71.18137003693924, "learning_rate": 2.6229279964504306e-07, "loss": 15.5671, "step": 58840 }, { "epoch": 0.9278236740871539, "grad_norm": 70.21923443751304, "learning_rate": 2.6115565697445556e-07, "loss": 16.1544, "step": 58850 }, { "epoch": 0.9279813331651636, "grad_norm": 79.68060463993916, "learning_rate": 2.600209520287833e-07, "loss": 15.4933, "step": 58860 }, { "epoch": 0.9281389922431733, "grad_norm": 76.95982582362839, "learning_rate": 2.588886850920525e-07, "loss": 15.6898, "step": 58870 }, { "epoch": 0.9282966513211831, "grad_norm": 73.11801160292855, "learning_rate": 2.577588564476807e-07, "loss": 15.8726, "step": 58880 }, { "epoch": 0.9284543103991928, "grad_norm": 74.30854337972065, "learning_rate": 2.56631466378473e-07, "loss": 15.04, "step": 58890 }, { "epoch": 0.9286119694772025, "grad_norm": 71.19545257182426, "learning_rate": 2.555065151666303e-07, "loss": 15.6212, "step": 58900 }, { "epoch": 0.9287696285552122, "grad_norm": 73.5882473652562, "learning_rate": 2.543840030937339e-07, "loss": 15.2525, "step": 58910 }, { "epoch": 0.928927287633222, "grad_norm": 76.57984330448932, "learning_rate": 2.5326393044076026e-07, "loss": 15.5159, "step": 58920 }, { "epoch": 0.9290849467112317, "grad_norm": 71.29478805544494, "learning_rate": 2.5214629748807617e-07, "loss": 15.6078, "step": 58930 }, { "epoch": 0.9292426057892413, "grad_norm": 80.36968062837619, "learning_rate": 2.5103110451543567e-07, "loss": 15.8036, "step": 58940 }, { "epoch": 0.929400264867251, "grad_norm": 69.65132087930387, "learning_rate": 2.4991835180197874e-07, "loss": 15.1564, "step": 58950 }, { "epoch": 0.9295579239452608, "grad_norm": 69.05898317602875, "learning_rate": 2.488080396262416e-07, "loss": 15.1464, "step": 58960 }, { "epoch": 0.9297155830232705, "grad_norm": 69.22768808590055, "learning_rate": 2.4770016826614395e-07, "loss": 14.8041, "step": 58970 }, { "epoch": 0.9298732421012802, "grad_norm": 67.27737338200521, "learning_rate": 2.4659473799899745e-07, "loss": 15.1196, "step": 58980 }, { "epoch": 0.9300309011792899, "grad_norm": 76.44509053396516, "learning_rate": 2.4549174910150187e-07, "loss": 15.7146, "step": 58990 }, { "epoch": 0.9301885602572996, "grad_norm": 72.98794212951512, "learning_rate": 2.4439120184974297e-07, "loss": 15.0711, "step": 59000 }, { "epoch": 0.9303462193353094, "grad_norm": 76.98982575570919, "learning_rate": 2.432930965192015e-07, "loss": 15.6532, "step": 59010 }, { "epoch": 0.930503878413319, "grad_norm": 68.62721521162958, "learning_rate": 2.421974333847421e-07, "loss": 15.1883, "step": 59020 }, { "epoch": 0.9306615374913287, "grad_norm": 70.22696904747136, "learning_rate": 2.411042127206209e-07, "loss": 15.4079, "step": 59030 }, { "epoch": 0.9308191965693384, "grad_norm": 68.81023832542411, "learning_rate": 2.40013434800479e-07, "loss": 15.8753, "step": 59040 }, { "epoch": 0.9309768556473482, "grad_norm": 70.58257368916968, "learning_rate": 2.3892509989735247e-07, "loss": 15.3619, "step": 59050 }, { "epoch": 0.9311345147253579, "grad_norm": 80.00038702230975, "learning_rate": 2.378392082836567e-07, "loss": 15.5902, "step": 59060 }, { "epoch": 0.9312921738033676, "grad_norm": 72.80821727956508, "learning_rate": 2.3675576023120427e-07, "loss": 14.8474, "step": 59070 }, { "epoch": 0.9314498328813773, "grad_norm": 72.59479294168226, "learning_rate": 2.3567475601119162e-07, "loss": 15.6069, "step": 59080 }, { "epoch": 0.931607491959387, "grad_norm": 72.21341502334401, "learning_rate": 2.3459619589420446e-07, "loss": 15.3529, "step": 59090 }, { "epoch": 0.9317651510373968, "grad_norm": 73.0140832029889, "learning_rate": 2.3352008015021466e-07, "loss": 15.25, "step": 59100 }, { "epoch": 0.9319228101154065, "grad_norm": 72.71283170277209, "learning_rate": 2.3244640904858784e-07, "loss": 15.6312, "step": 59110 }, { "epoch": 0.9320804691934161, "grad_norm": 72.99962709775002, "learning_rate": 2.3137518285807015e-07, "loss": 14.9116, "step": 59120 }, { "epoch": 0.9322381282714258, "grad_norm": 70.36167823051625, "learning_rate": 2.3030640184680152e-07, "loss": 15.4134, "step": 59130 }, { "epoch": 0.9323957873494356, "grad_norm": 70.69250655378461, "learning_rate": 2.292400662823091e-07, "loss": 15.6924, "step": 59140 }, { "epoch": 0.9325534464274453, "grad_norm": 76.00208286221577, "learning_rate": 2.2817617643150492e-07, "loss": 16.5328, "step": 59150 }, { "epoch": 0.932711105505455, "grad_norm": 73.79848915734408, "learning_rate": 2.271147325606926e-07, "loss": 15.5382, "step": 59160 }, { "epoch": 0.9328687645834647, "grad_norm": 71.34280470947546, "learning_rate": 2.2605573493555854e-07, "loss": 15.3236, "step": 59170 }, { "epoch": 0.9330264236614745, "grad_norm": 72.98672639929683, "learning_rate": 2.2499918382118292e-07, "loss": 15.0429, "step": 59180 }, { "epoch": 0.9331840827394842, "grad_norm": 73.27567343261097, "learning_rate": 2.239450794820286e-07, "loss": 15.5495, "step": 59190 }, { "epoch": 0.9333417418174939, "grad_norm": 72.36224549820011, "learning_rate": 2.2289342218194898e-07, "loss": 15.5698, "step": 59200 }, { "epoch": 0.9334994008955035, "grad_norm": 84.1429097493316, "learning_rate": 2.218442121841824e-07, "loss": 15.8406, "step": 59210 }, { "epoch": 0.9336570599735132, "grad_norm": 84.00624994147907, "learning_rate": 2.2079744975135764e-07, "loss": 15.8373, "step": 59220 }, { "epoch": 0.933814719051523, "grad_norm": 77.90614947092223, "learning_rate": 2.1975313514548734e-07, "loss": 15.4224, "step": 59230 }, { "epoch": 0.9339723781295327, "grad_norm": 71.59458990864991, "learning_rate": 2.1871126862797686e-07, "loss": 15.3994, "step": 59240 }, { "epoch": 0.9341300372075424, "grad_norm": 78.11761886488407, "learning_rate": 2.1767185045961205e-07, "loss": 15.758, "step": 59250 }, { "epoch": 0.9342876962855521, "grad_norm": 69.49171616456977, "learning_rate": 2.1663488090057027e-07, "loss": 15.3156, "step": 59260 }, { "epoch": 0.9344453553635619, "grad_norm": 70.3386257091939, "learning_rate": 2.156003602104151e-07, "loss": 15.4768, "step": 59270 }, { "epoch": 0.9346030144415716, "grad_norm": 69.09136981301718, "learning_rate": 2.1456828864809488e-07, "loss": 15.1086, "step": 59280 }, { "epoch": 0.9347606735195813, "grad_norm": 75.92289739335747, "learning_rate": 2.1353866647194966e-07, "loss": 15.3179, "step": 59290 }, { "epoch": 0.9349183325975909, "grad_norm": 71.40888531890808, "learning_rate": 2.1251149393970106e-07, "loss": 14.9803, "step": 59300 }, { "epoch": 0.9350759916756006, "grad_norm": 66.92563588387414, "learning_rate": 2.114867713084612e-07, "loss": 15.1863, "step": 59310 }, { "epoch": 0.9352336507536104, "grad_norm": 71.4003642483912, "learning_rate": 2.104644988347293e-07, "loss": 15.2531, "step": 59320 }, { "epoch": 0.9353913098316201, "grad_norm": 72.31577194568901, "learning_rate": 2.094446767743885e-07, "loss": 15.175, "step": 59330 }, { "epoch": 0.9355489689096298, "grad_norm": 71.34166707601916, "learning_rate": 2.0842730538270906e-07, "loss": 15.3429, "step": 59340 }, { "epoch": 0.9357066279876395, "grad_norm": 70.26231394075896, "learning_rate": 2.0741238491435057e-07, "loss": 15.2257, "step": 59350 }, { "epoch": 0.9358642870656493, "grad_norm": 71.69298039459395, "learning_rate": 2.0639991562335537e-07, "loss": 15.8155, "step": 59360 }, { "epoch": 0.936021946143659, "grad_norm": 72.66586720593212, "learning_rate": 2.053898977631552e-07, "loss": 15.4933, "step": 59370 }, { "epoch": 0.9361796052216687, "grad_norm": 70.72924984380374, "learning_rate": 2.0438233158656674e-07, "loss": 15.697, "step": 59380 }, { "epoch": 0.9363372642996783, "grad_norm": 75.52831374071212, "learning_rate": 2.0337721734579485e-07, "loss": 16.1374, "step": 59390 }, { "epoch": 0.9364949233776881, "grad_norm": 72.17750725831536, "learning_rate": 2.0237455529242723e-07, "loss": 15.8722, "step": 59400 }, { "epoch": 0.9366525824556978, "grad_norm": 73.3883008328265, "learning_rate": 2.0137434567743863e-07, "loss": 15.3494, "step": 59410 }, { "epoch": 0.9368102415337075, "grad_norm": 67.6061432958398, "learning_rate": 2.0037658875119327e-07, "loss": 15.0665, "step": 59420 }, { "epoch": 0.9369679006117172, "grad_norm": 73.04564525681687, "learning_rate": 1.99381284763438e-07, "loss": 15.2881, "step": 59430 }, { "epoch": 0.9371255596897269, "grad_norm": 77.1515049063786, "learning_rate": 1.98388433963308e-07, "loss": 15.5913, "step": 59440 }, { "epoch": 0.9372832187677367, "grad_norm": 73.14441772108714, "learning_rate": 1.9739803659932222e-07, "loss": 15.9395, "step": 59450 }, { "epoch": 0.9374408778457464, "grad_norm": 71.45317030538249, "learning_rate": 1.9641009291938794e-07, "loss": 15.0206, "step": 59460 }, { "epoch": 0.937598536923756, "grad_norm": 72.57379937679886, "learning_rate": 1.9542460317079404e-07, "loss": 15.4267, "step": 59470 }, { "epoch": 0.9377561960017657, "grad_norm": 72.55425110743285, "learning_rate": 1.944415676002198e-07, "loss": 15.4068, "step": 59480 }, { "epoch": 0.9379138550797755, "grad_norm": 72.98922037188963, "learning_rate": 1.9346098645372846e-07, "loss": 15.2935, "step": 59490 }, { "epoch": 0.9380715141577852, "grad_norm": 75.36630802812665, "learning_rate": 1.9248285997676918e-07, "loss": 15.1205, "step": 59500 }, { "epoch": 0.9382291732357949, "grad_norm": 70.66712424453152, "learning_rate": 1.9150718841417393e-07, "loss": 15.5292, "step": 59510 }, { "epoch": 0.9383868323138046, "grad_norm": 73.47007578218442, "learning_rate": 1.9053397201016398e-07, "loss": 15.7818, "step": 59520 }, { "epoch": 0.9385444913918144, "grad_norm": 70.07727869760515, "learning_rate": 1.8956321100834562e-07, "loss": 15.6906, "step": 59530 }, { "epoch": 0.9387021504698241, "grad_norm": 77.66747975971103, "learning_rate": 1.8859490565170779e-07, "loss": 15.6418, "step": 59540 }, { "epoch": 0.9388598095478338, "grad_norm": 78.24444370935159, "learning_rate": 1.8762905618262773e-07, "loss": 16.0065, "step": 59550 }, { "epoch": 0.9390174686258435, "grad_norm": 66.84355975502507, "learning_rate": 1.8666566284286648e-07, "loss": 15.2222, "step": 59560 }, { "epoch": 0.9391751277038531, "grad_norm": 73.87101868901121, "learning_rate": 1.8570472587357003e-07, "loss": 15.2857, "step": 59570 }, { "epoch": 0.9393327867818629, "grad_norm": 72.13653315063063, "learning_rate": 1.8474624551527044e-07, "loss": 15.2279, "step": 59580 }, { "epoch": 0.9394904458598726, "grad_norm": 77.9806626867923, "learning_rate": 1.8379022200788465e-07, "loss": 15.2449, "step": 59590 }, { "epoch": 0.9396481049378823, "grad_norm": 69.5091582885135, "learning_rate": 1.828366555907124e-07, "loss": 15.5675, "step": 59600 }, { "epoch": 0.939805764015892, "grad_norm": 80.28074104864329, "learning_rate": 1.8188554650244384e-07, "loss": 15.9715, "step": 59610 }, { "epoch": 0.9399634230939018, "grad_norm": 75.15269439314888, "learning_rate": 1.8093689498114742e-07, "loss": 15.1613, "step": 59620 }, { "epoch": 0.9401210821719115, "grad_norm": 77.35889506635944, "learning_rate": 1.7999070126427988e-07, "loss": 14.9771, "step": 59630 }, { "epoch": 0.9402787412499212, "grad_norm": 72.92687571686639, "learning_rate": 1.7904696558868507e-07, "loss": 15.1177, "step": 59640 }, { "epoch": 0.9404364003279309, "grad_norm": 70.45411607835253, "learning_rate": 1.781056881905874e-07, "loss": 15.6235, "step": 59650 }, { "epoch": 0.9405940594059405, "grad_norm": 74.17227062131475, "learning_rate": 1.7716686930559722e-07, "loss": 15.2414, "step": 59660 }, { "epoch": 0.9407517184839503, "grad_norm": 73.0003083979018, "learning_rate": 1.7623050916871e-07, "loss": 14.7837, "step": 59670 }, { "epoch": 0.94090937756196, "grad_norm": 74.85963752208056, "learning_rate": 1.7529660801430482e-07, "loss": 15.0291, "step": 59680 }, { "epoch": 0.9410670366399697, "grad_norm": 77.85858896752794, "learning_rate": 1.7436516607614695e-07, "loss": 15.912, "step": 59690 }, { "epoch": 0.9412246957179794, "grad_norm": 77.66130689225864, "learning_rate": 1.7343618358738656e-07, "loss": 15.5594, "step": 59700 }, { "epoch": 0.9413823547959892, "grad_norm": 70.84491524017275, "learning_rate": 1.725096607805532e-07, "loss": 15.3711, "step": 59710 }, { "epoch": 0.9415400138739989, "grad_norm": 74.83799847593104, "learning_rate": 1.7158559788756578e-07, "loss": 14.8403, "step": 59720 }, { "epoch": 0.9416976729520086, "grad_norm": 71.16198889746923, "learning_rate": 1.706639951397271e-07, "loss": 15.4934, "step": 59730 }, { "epoch": 0.9418553320300183, "grad_norm": 74.18230600784376, "learning_rate": 1.6974485276772367e-07, "loss": 15.5296, "step": 59740 }, { "epoch": 0.942012991108028, "grad_norm": 70.83159037137953, "learning_rate": 1.6882817100162264e-07, "loss": 15.3636, "step": 59750 }, { "epoch": 0.9421706501860377, "grad_norm": 70.48928718594819, "learning_rate": 1.679139500708804e-07, "loss": 15.6636, "step": 59760 }, { "epoch": 0.9423283092640474, "grad_norm": 75.93150412732048, "learning_rate": 1.6700219020433394e-07, "loss": 15.0758, "step": 59770 }, { "epoch": 0.9424859683420571, "grad_norm": 74.79613011330342, "learning_rate": 1.6609289163020626e-07, "loss": 15.7373, "step": 59780 }, { "epoch": 0.9426436274200668, "grad_norm": 73.82995336899516, "learning_rate": 1.6518605457610305e-07, "loss": 14.9451, "step": 59790 }, { "epoch": 0.9428012864980766, "grad_norm": 77.74680670410767, "learning_rate": 1.6428167926901384e-07, "loss": 15.5695, "step": 59800 }, { "epoch": 0.9429589455760863, "grad_norm": 81.13842863715344, "learning_rate": 1.63379765935312e-07, "loss": 15.6041, "step": 59810 }, { "epoch": 0.943116604654096, "grad_norm": 75.67182812728984, "learning_rate": 1.6248031480075587e-07, "loss": 15.2291, "step": 59820 }, { "epoch": 0.9432742637321057, "grad_norm": 69.53011644967772, "learning_rate": 1.6158332609048643e-07, "loss": 15.654, "step": 59830 }, { "epoch": 0.9434319228101155, "grad_norm": 77.6879972799813, "learning_rate": 1.6068880002902854e-07, "loss": 15.6412, "step": 59840 }, { "epoch": 0.9435895818881251, "grad_norm": 74.65458416417658, "learning_rate": 1.5979673684029084e-07, "loss": 15.7548, "step": 59850 }, { "epoch": 0.9437472409661348, "grad_norm": 72.41156549269576, "learning_rate": 1.5890713674756364e-07, "loss": 15.1959, "step": 59860 }, { "epoch": 0.9439049000441445, "grad_norm": 79.36921274936893, "learning_rate": 1.5801999997352435e-07, "loss": 15.5305, "step": 59870 }, { "epoch": 0.9440625591221542, "grad_norm": 72.56336095890359, "learning_rate": 1.571353267402298e-07, "loss": 15.5317, "step": 59880 }, { "epoch": 0.944220218200164, "grad_norm": 73.75929237261059, "learning_rate": 1.5625311726912396e-07, "loss": 15.2034, "step": 59890 }, { "epoch": 0.9443778772781737, "grad_norm": 71.52695850121994, "learning_rate": 1.5537337178102906e-07, "loss": 15.2643, "step": 59900 }, { "epoch": 0.9445355363561834, "grad_norm": 72.162951344912, "learning_rate": 1.5449609049615788e-07, "loss": 15.0013, "step": 59910 }, { "epoch": 0.9446931954341931, "grad_norm": 67.20475997620632, "learning_rate": 1.5362127363409917e-07, "loss": 15.3603, "step": 59920 }, { "epoch": 0.9448508545122029, "grad_norm": 73.38046829450207, "learning_rate": 1.5274892141382892e-07, "loss": 15.3601, "step": 59930 }, { "epoch": 0.9450085135902125, "grad_norm": 71.42703630755277, "learning_rate": 1.5187903405370575e-07, "loss": 15.2126, "step": 59940 }, { "epoch": 0.9451661726682222, "grad_norm": 74.79471928159842, "learning_rate": 1.5101161177146995e-07, "loss": 15.0122, "step": 59950 }, { "epoch": 0.9453238317462319, "grad_norm": 73.69035599172179, "learning_rate": 1.5014665478424561e-07, "loss": 15.1146, "step": 59960 }, { "epoch": 0.9454814908242417, "grad_norm": 69.8178904353764, "learning_rate": 1.4928416330853846e-07, "loss": 16.2192, "step": 59970 }, { "epoch": 0.9456391499022514, "grad_norm": 71.41314475761239, "learning_rate": 1.4842413756024133e-07, "loss": 15.3, "step": 59980 }, { "epoch": 0.9457968089802611, "grad_norm": 75.93889795016379, "learning_rate": 1.4756657775462312e-07, "loss": 15.4733, "step": 59990 }, { "epoch": 0.9459544680582708, "grad_norm": 73.56691272937525, "learning_rate": 1.4671148410634327e-07, "loss": 15.1316, "step": 60000 }, { "epoch": 0.9461121271362805, "grad_norm": 74.02969905847976, "learning_rate": 1.45858856829435e-07, "loss": 15.6418, "step": 60010 }, { "epoch": 0.9462697862142903, "grad_norm": 71.88413394750846, "learning_rate": 1.4500869613732316e-07, "loss": 15.8427, "step": 60020 }, { "epoch": 0.9464274452922999, "grad_norm": 75.27456219420243, "learning_rate": 1.441610022428086e-07, "loss": 15.7482, "step": 60030 }, { "epoch": 0.9465851043703096, "grad_norm": 75.22281806032807, "learning_rate": 1.4331577535807827e-07, "loss": 15.8142, "step": 60040 }, { "epoch": 0.9467427634483193, "grad_norm": 70.92672280420885, "learning_rate": 1.4247301569470073e-07, "loss": 15.4862, "step": 60050 }, { "epoch": 0.9469004225263291, "grad_norm": 72.2656617359024, "learning_rate": 1.4163272346362611e-07, "loss": 15.135, "step": 60060 }, { "epoch": 0.9470580816043388, "grad_norm": 71.14081346632909, "learning_rate": 1.4079489887518728e-07, "loss": 15.7496, "step": 60070 }, { "epoch": 0.9472157406823485, "grad_norm": 74.00111043479612, "learning_rate": 1.399595421390987e-07, "loss": 15.0542, "step": 60080 }, { "epoch": 0.9473733997603582, "grad_norm": 71.6215088325452, "learning_rate": 1.3912665346446087e-07, "loss": 15.0525, "step": 60090 }, { "epoch": 0.947531058838368, "grad_norm": 70.53182931732117, "learning_rate": 1.3829623305975037e-07, "loss": 15.0899, "step": 60100 }, { "epoch": 0.9476887179163777, "grad_norm": 81.21885741608604, "learning_rate": 1.3746828113283317e-07, "loss": 15.5196, "step": 60110 }, { "epoch": 0.9478463769943873, "grad_norm": 71.84352445066182, "learning_rate": 1.3664279789094903e-07, "loss": 15.0129, "step": 60120 }, { "epoch": 0.948004036072397, "grad_norm": 74.25446441527806, "learning_rate": 1.358197835407271e-07, "loss": 15.4146, "step": 60130 }, { "epoch": 0.9481616951504067, "grad_norm": 69.92350171312495, "learning_rate": 1.3499923828817484e-07, "loss": 15.6116, "step": 60140 }, { "epoch": 0.9483193542284165, "grad_norm": 71.78900894884183, "learning_rate": 1.3418116233868462e-07, "loss": 15.4486, "step": 60150 }, { "epoch": 0.9484770133064262, "grad_norm": 78.44905697741036, "learning_rate": 1.333655558970248e-07, "loss": 15.3829, "step": 60160 }, { "epoch": 0.9486346723844359, "grad_norm": 80.530248865635, "learning_rate": 1.3255241916735218e-07, "loss": 15.2694, "step": 60170 }, { "epoch": 0.9487923314624456, "grad_norm": 71.54896565572626, "learning_rate": 1.3174175235320164e-07, "loss": 15.6533, "step": 60180 }, { "epoch": 0.9489499905404554, "grad_norm": 74.51763050052534, "learning_rate": 1.3093355565748976e-07, "loss": 15.0498, "step": 60190 }, { "epoch": 0.949107649618465, "grad_norm": 76.83159519936554, "learning_rate": 1.301278292825181e-07, "loss": 15.1212, "step": 60200 }, { "epoch": 0.9492653086964747, "grad_norm": 68.13092797050481, "learning_rate": 1.293245734299653e-07, "loss": 15.478, "step": 60210 }, { "epoch": 0.9494229677744844, "grad_norm": 70.58671612629986, "learning_rate": 1.2852378830089495e-07, "loss": 15.3413, "step": 60220 }, { "epoch": 0.9495806268524941, "grad_norm": 90.51518021150167, "learning_rate": 1.277254740957512e-07, "loss": 15.4758, "step": 60230 }, { "epoch": 0.9497382859305039, "grad_norm": 75.99416752655877, "learning_rate": 1.2692963101436083e-07, "loss": 15.7614, "step": 60240 }, { "epoch": 0.9498959450085136, "grad_norm": 70.97630312030947, "learning_rate": 1.2613625925592788e-07, "loss": 15.2988, "step": 60250 }, { "epoch": 0.9500536040865233, "grad_norm": 76.70520673049965, "learning_rate": 1.2534535901904455e-07, "loss": 15.4144, "step": 60260 }, { "epoch": 0.950211263164533, "grad_norm": 74.26153558967799, "learning_rate": 1.2455693050167695e-07, "loss": 15.1026, "step": 60270 }, { "epoch": 0.9503689222425428, "grad_norm": 69.03555227096389, "learning_rate": 1.2377097390117943e-07, "loss": 14.8195, "step": 60280 }, { "epoch": 0.9505265813205525, "grad_norm": 72.28203096721198, "learning_rate": 1.229874894142813e-07, "loss": 15.3464, "step": 60290 }, { "epoch": 0.9506842403985621, "grad_norm": 70.94599220722992, "learning_rate": 1.2220647723709899e-07, "loss": 15.4261, "step": 60300 }, { "epoch": 0.9508418994765718, "grad_norm": 68.44333423885881, "learning_rate": 1.2142793756512506e-07, "loss": 15.3807, "step": 60310 }, { "epoch": 0.9509995585545816, "grad_norm": 76.24133223437316, "learning_rate": 1.206518705932369e-07, "loss": 16.077, "step": 60320 }, { "epoch": 0.9511572176325913, "grad_norm": 73.55670421410906, "learning_rate": 1.1987827651569029e-07, "loss": 15.8464, "step": 60330 }, { "epoch": 0.951314876710601, "grad_norm": 78.76655674606384, "learning_rate": 1.1910715552612363e-07, "loss": 15.1221, "step": 60340 }, { "epoch": 0.9514725357886107, "grad_norm": 69.72937535881763, "learning_rate": 1.1833850781755695e-07, "loss": 15.706, "step": 60350 }, { "epoch": 0.9516301948666204, "grad_norm": 70.27529928631125, "learning_rate": 1.1757233358238751e-07, "loss": 15.2665, "step": 60360 }, { "epoch": 0.9517878539446302, "grad_norm": 65.43373385771726, "learning_rate": 1.1680863301239853e-07, "loss": 14.7694, "step": 60370 }, { "epoch": 0.9519455130226399, "grad_norm": 78.591409036458, "learning_rate": 1.1604740629874934e-07, "loss": 15.7596, "step": 60380 }, { "epoch": 0.9521031721006495, "grad_norm": 70.95711025307195, "learning_rate": 1.1528865363198417e-07, "loss": 15.2566, "step": 60390 }, { "epoch": 0.9522608311786592, "grad_norm": 71.85964445387582, "learning_rate": 1.1453237520202443e-07, "loss": 15.6586, "step": 60400 }, { "epoch": 0.952418490256669, "grad_norm": 72.97058655130745, "learning_rate": 1.1377857119817426e-07, "loss": 15.3108, "step": 60410 }, { "epoch": 0.9525761493346787, "grad_norm": 71.96342443592745, "learning_rate": 1.1302724180911828e-07, "loss": 15.7577, "step": 60420 }, { "epoch": 0.9527338084126884, "grad_norm": 75.41449370430858, "learning_rate": 1.122783872229205e-07, "loss": 15.4344, "step": 60430 }, { "epoch": 0.9528914674906981, "grad_norm": 71.65697250861295, "learning_rate": 1.1153200762702654e-07, "loss": 15.3873, "step": 60440 }, { "epoch": 0.9530491265687078, "grad_norm": 71.99158796850138, "learning_rate": 1.107881032082636e-07, "loss": 15.4614, "step": 60450 }, { "epoch": 0.9532067856467176, "grad_norm": 68.54387046648237, "learning_rate": 1.1004667415283721e-07, "loss": 15.0325, "step": 60460 }, { "epoch": 0.9533644447247273, "grad_norm": 81.02302662035065, "learning_rate": 1.0930772064633333e-07, "loss": 16.427, "step": 60470 }, { "epoch": 0.9535221038027369, "grad_norm": 72.53414050532905, "learning_rate": 1.0857124287372067e-07, "loss": 15.4258, "step": 60480 }, { "epoch": 0.9536797628807466, "grad_norm": 71.17805249091383, "learning_rate": 1.0783724101934401e-07, "loss": 14.879, "step": 60490 }, { "epoch": 0.9538374219587564, "grad_norm": 71.76321751579911, "learning_rate": 1.0710571526693414e-07, "loss": 14.8406, "step": 60500 }, { "epoch": 0.9539950810367661, "grad_norm": 69.39163333828623, "learning_rate": 1.0637666579959572e-07, "loss": 15.1079, "step": 60510 }, { "epoch": 0.9541527401147758, "grad_norm": 72.24395079879729, "learning_rate": 1.0565009279981831e-07, "loss": 15.0269, "step": 60520 }, { "epoch": 0.9543103991927855, "grad_norm": 69.11174808941419, "learning_rate": 1.0492599644946977e-07, "loss": 15.4156, "step": 60530 }, { "epoch": 0.9544680582707953, "grad_norm": 73.12275985121225, "learning_rate": 1.0420437692979957e-07, "loss": 15.2508, "step": 60540 }, { "epoch": 0.954625717348805, "grad_norm": 75.39179330903539, "learning_rate": 1.0348523442143433e-07, "loss": 15.6128, "step": 60550 }, { "epoch": 0.9547833764268147, "grad_norm": 74.62080003179018, "learning_rate": 1.0276856910438226e-07, "loss": 15.317, "step": 60560 }, { "epoch": 0.9549410355048243, "grad_norm": 69.55587754601099, "learning_rate": 1.0205438115803213e-07, "loss": 15.5314, "step": 60570 }, { "epoch": 0.955098694582834, "grad_norm": 73.73505752201524, "learning_rate": 1.01342670761152e-07, "loss": 15.9666, "step": 60580 }, { "epoch": 0.9552563536608438, "grad_norm": 76.49679193702242, "learning_rate": 1.0063343809188942e-07, "loss": 15.2132, "step": 60590 }, { "epoch": 0.9554140127388535, "grad_norm": 76.57474375217083, "learning_rate": 9.992668332777122e-08, "loss": 15.3565, "step": 60600 }, { "epoch": 0.9555716718168632, "grad_norm": 71.52938813489997, "learning_rate": 9.922240664570592e-08, "loss": 14.954, "step": 60610 }, { "epoch": 0.9557293308948729, "grad_norm": 70.82508058510119, "learning_rate": 9.852060822198029e-08, "loss": 15.2667, "step": 60620 }, { "epoch": 0.9558869899728827, "grad_norm": 67.46910510377381, "learning_rate": 9.782128823226155e-08, "loss": 15.3671, "step": 60630 }, { "epoch": 0.9560446490508924, "grad_norm": 72.01182781552194, "learning_rate": 9.712444685159417e-08, "loss": 15.3686, "step": 60640 }, { "epoch": 0.9562023081289021, "grad_norm": 71.85592043581529, "learning_rate": 9.643008425440747e-08, "loss": 15.0773, "step": 60650 }, { "epoch": 0.9563599672069117, "grad_norm": 73.41723321476553, "learning_rate": 9.573820061450467e-08, "loss": 16.1103, "step": 60660 }, { "epoch": 0.9565176262849215, "grad_norm": 69.11415433783964, "learning_rate": 9.504879610507168e-08, "loss": 15.6648, "step": 60670 }, { "epoch": 0.9566752853629312, "grad_norm": 77.23724309482513, "learning_rate": 9.436187089867154e-08, "loss": 15.452, "step": 60680 }, { "epoch": 0.9568329444409409, "grad_norm": 71.64580745623617, "learning_rate": 9.367742516725009e-08, "loss": 15.2916, "step": 60690 }, { "epoch": 0.9569906035189506, "grad_norm": 69.80972038818402, "learning_rate": 9.299545908212915e-08, "loss": 15.5534, "step": 60700 }, { "epoch": 0.9571482625969603, "grad_norm": 76.76862858419736, "learning_rate": 9.231597281401217e-08, "loss": 15.1114, "step": 60710 }, { "epoch": 0.9573059216749701, "grad_norm": 72.09386410502799, "learning_rate": 9.163896653298088e-08, "loss": 15.5136, "step": 60720 }, { "epoch": 0.9574635807529798, "grad_norm": 69.40785346889389, "learning_rate": 9.096444040849529e-08, "loss": 15.5677, "step": 60730 }, { "epoch": 0.9576212398309895, "grad_norm": 78.35884878379595, "learning_rate": 9.029239460939698e-08, "loss": 15.4704, "step": 60740 }, { "epoch": 0.9577788989089991, "grad_norm": 69.23454814100137, "learning_rate": 8.962282930390475e-08, "loss": 15.3144, "step": 60750 }, { "epoch": 0.9579365579870089, "grad_norm": 68.49229525380932, "learning_rate": 8.895574465961787e-08, "loss": 15.3057, "step": 60760 }, { "epoch": 0.9580942170650186, "grad_norm": 68.23146543167202, "learning_rate": 8.829114084351276e-08, "loss": 15.35, "step": 60770 }, { "epoch": 0.9582518761430283, "grad_norm": 68.05180703683072, "learning_rate": 8.762901802194745e-08, "loss": 15.2236, "step": 60780 }, { "epoch": 0.958409535221038, "grad_norm": 71.73397269529312, "learning_rate": 8.696937636065606e-08, "loss": 15.522, "step": 60790 }, { "epoch": 0.9585671942990477, "grad_norm": 71.3823953148473, "learning_rate": 8.631221602475426e-08, "loss": 15.3142, "step": 60800 }, { "epoch": 0.9587248533770575, "grad_norm": 72.93438867653362, "learning_rate": 8.565753717873382e-08, "loss": 15.2069, "step": 60810 }, { "epoch": 0.9588825124550672, "grad_norm": 72.88741515681929, "learning_rate": 8.50053399864692e-08, "loss": 15.4355, "step": 60820 }, { "epoch": 0.9590401715330769, "grad_norm": 71.54750339746899, "learning_rate": 8.435562461120983e-08, "loss": 15.6696, "step": 60830 }, { "epoch": 0.9591978306110865, "grad_norm": 68.64464954341757, "learning_rate": 8.370839121558783e-08, "loss": 15.0453, "step": 60840 }, { "epoch": 0.9593554896890963, "grad_norm": 72.50902313913461, "learning_rate": 8.306363996161027e-08, "loss": 15.4679, "step": 60850 }, { "epoch": 0.959513148767106, "grad_norm": 71.12771763359241, "learning_rate": 8.24213710106636e-08, "loss": 15.0043, "step": 60860 }, { "epoch": 0.9596708078451157, "grad_norm": 75.9090052914444, "learning_rate": 8.178158452351592e-08, "loss": 15.2414, "step": 60870 }, { "epoch": 0.9598284669231254, "grad_norm": 77.5939179778238, "learning_rate": 8.114428066031021e-08, "loss": 15.585, "step": 60880 }, { "epoch": 0.9599861260011352, "grad_norm": 81.42609730263375, "learning_rate": 8.050945958057e-08, "loss": 15.2223, "step": 60890 }, { "epoch": 0.9601437850791449, "grad_norm": 68.23466927018322, "learning_rate": 7.987712144319704e-08, "loss": 15.2301, "step": 60900 }, { "epoch": 0.9603014441571546, "grad_norm": 72.11145239198875, "learning_rate": 7.924726640647251e-08, "loss": 15.3354, "step": 60910 }, { "epoch": 0.9604591032351643, "grad_norm": 71.19143376323879, "learning_rate": 7.861989462805475e-08, "loss": 15.5006, "step": 60920 }, { "epoch": 0.9606167623131739, "grad_norm": 69.61729761307552, "learning_rate": 7.799500626497925e-08, "loss": 15.8173, "step": 60930 }, { "epoch": 0.9607744213911837, "grad_norm": 72.42407239651814, "learning_rate": 7.73726014736631e-08, "loss": 15.2331, "step": 60940 }, { "epoch": 0.9609320804691934, "grad_norm": 70.82289715974376, "learning_rate": 7.675268040989947e-08, "loss": 15.1397, "step": 60950 }, { "epoch": 0.9610897395472031, "grad_norm": 66.75111084171778, "learning_rate": 7.61352432288609e-08, "loss": 15.0804, "step": 60960 }, { "epoch": 0.9612473986252128, "grad_norm": 68.25983218466264, "learning_rate": 7.552029008509821e-08, "loss": 15.2846, "step": 60970 }, { "epoch": 0.9614050577032226, "grad_norm": 71.41962013642159, "learning_rate": 7.490782113253825e-08, "loss": 15.7295, "step": 60980 }, { "epoch": 0.9615627167812323, "grad_norm": 105.06631746744159, "learning_rate": 7.429783652448952e-08, "loss": 15.4539, "step": 60990 }, { "epoch": 0.961720375859242, "grad_norm": 74.95669136404732, "learning_rate": 7.369033641363654e-08, "loss": 15.8053, "step": 61000 }, { "epoch": 0.9618780349372517, "grad_norm": 67.9778089859913, "learning_rate": 7.308532095204102e-08, "loss": 15.2499, "step": 61010 }, { "epoch": 0.9620356940152613, "grad_norm": 71.2475969170957, "learning_rate": 7.248279029114625e-08, "loss": 15.6491, "step": 61020 }, { "epoch": 0.9621933530932711, "grad_norm": 69.92059513626128, "learning_rate": 7.188274458176935e-08, "loss": 15.5689, "step": 61030 }, { "epoch": 0.9623510121712808, "grad_norm": 76.35201250823201, "learning_rate": 7.128518397410911e-08, "loss": 15.1541, "step": 61040 }, { "epoch": 0.9625086712492905, "grad_norm": 78.96505818320175, "learning_rate": 7.069010861773917e-08, "loss": 15.8214, "step": 61050 }, { "epoch": 0.9626663303273002, "grad_norm": 78.36435706170263, "learning_rate": 7.009751866161374e-08, "loss": 15.3128, "step": 61060 }, { "epoch": 0.96282398940531, "grad_norm": 72.55730000155262, "learning_rate": 6.950741425406193e-08, "loss": 15.2766, "step": 61070 }, { "epoch": 0.9629816484833197, "grad_norm": 69.89100747068092, "learning_rate": 6.89197955427956e-08, "loss": 15.6721, "step": 61080 }, { "epoch": 0.9631393075613294, "grad_norm": 69.79710423308877, "learning_rate": 6.833466267489818e-08, "loss": 15.4126, "step": 61090 }, { "epoch": 0.9632969666393391, "grad_norm": 72.09530358084577, "learning_rate": 6.775201579683477e-08, "loss": 14.71, "step": 61100 }, { "epoch": 0.9634546257173489, "grad_norm": 71.86526531348838, "learning_rate": 6.717185505444867e-08, "loss": 15.5745, "step": 61110 }, { "epoch": 0.9636122847953585, "grad_norm": 69.64409486680721, "learning_rate": 6.659418059295708e-08, "loss": 15.9193, "step": 61120 }, { "epoch": 0.9637699438733682, "grad_norm": 69.501594868545, "learning_rate": 6.601899255696098e-08, "loss": 14.8509, "step": 61130 }, { "epoch": 0.9639276029513779, "grad_norm": 69.55735214435529, "learning_rate": 6.544629109043188e-08, "loss": 15.0647, "step": 61140 }, { "epoch": 0.9640852620293876, "grad_norm": 71.76369395154119, "learning_rate": 6.48760763367251e-08, "loss": 15.4644, "step": 61150 }, { "epoch": 0.9642429211073974, "grad_norm": 71.34744704461218, "learning_rate": 6.43083484385687e-08, "loss": 15.4316, "step": 61160 }, { "epoch": 0.9644005801854071, "grad_norm": 72.23899547157716, "learning_rate": 6.374310753807234e-08, "loss": 15.6604, "step": 61170 }, { "epoch": 0.9645582392634168, "grad_norm": 71.31892170904752, "learning_rate": 6.318035377671838e-08, "loss": 15.0099, "step": 61180 }, { "epoch": 0.9647158983414265, "grad_norm": 75.57605379573579, "learning_rate": 6.262008729537194e-08, "loss": 15.5505, "step": 61190 }, { "epoch": 0.9648735574194363, "grad_norm": 73.95022226483664, "learning_rate": 6.206230823427194e-08, "loss": 15.3988, "step": 61200 }, { "epoch": 0.9650312164974459, "grad_norm": 69.20856148941735, "learning_rate": 6.150701673303671e-08, "loss": 15.5278, "step": 61210 }, { "epoch": 0.9651888755754556, "grad_norm": 71.06240265116612, "learning_rate": 6.095421293065951e-08, "loss": 15.2992, "step": 61220 }, { "epoch": 0.9653465346534653, "grad_norm": 71.55812531291849, "learning_rate": 6.040389696551296e-08, "loss": 15.4389, "step": 61230 }, { "epoch": 0.9655041937314751, "grad_norm": 74.23408397435193, "learning_rate": 5.985606897534691e-08, "loss": 15.7462, "step": 61240 }, { "epoch": 0.9656618528094848, "grad_norm": 68.69448348396637, "learning_rate": 5.931072909728719e-08, "loss": 15.3703, "step": 61250 }, { "epoch": 0.9658195118874945, "grad_norm": 72.84251485234601, "learning_rate": 5.8767877467837967e-08, "loss": 15.4312, "step": 61260 }, { "epoch": 0.9659771709655042, "grad_norm": 72.7454142148841, "learning_rate": 5.8227514222880535e-08, "loss": 15.45, "step": 61270 }, { "epoch": 0.9661348300435139, "grad_norm": 73.83983247341664, "learning_rate": 5.768963949767226e-08, "loss": 15.3287, "step": 61280 }, { "epoch": 0.9662924891215237, "grad_norm": 80.17993492511341, "learning_rate": 5.7154253426847664e-08, "loss": 15.837, "step": 61290 }, { "epoch": 0.9664501481995333, "grad_norm": 68.1844660066275, "learning_rate": 5.662135614441955e-08, "loss": 15.0847, "step": 61300 }, { "epoch": 0.966607807277543, "grad_norm": 72.8014156538086, "learning_rate": 5.6090947783777885e-08, "loss": 14.8427, "step": 61310 }, { "epoch": 0.9667654663555527, "grad_norm": 75.25788416745215, "learning_rate": 5.5563028477687574e-08, "loss": 15.4652, "step": 61320 }, { "epoch": 0.9669231254335625, "grad_norm": 73.90557690486848, "learning_rate": 5.503759835829292e-08, "loss": 15.3758, "step": 61330 }, { "epoch": 0.9670807845115722, "grad_norm": 69.70705502199073, "learning_rate": 5.451465755711427e-08, "loss": 15.5676, "step": 61340 }, { "epoch": 0.9672384435895819, "grad_norm": 70.1004753058241, "learning_rate": 5.3994206205046917e-08, "loss": 15.1081, "step": 61350 }, { "epoch": 0.9673961026675916, "grad_norm": 70.26786263455175, "learning_rate": 5.347624443236776e-08, "loss": 15.8468, "step": 61360 }, { "epoch": 0.9675537617456013, "grad_norm": 71.0502278701614, "learning_rate": 5.296077236872421e-08, "loss": 15.5351, "step": 61370 }, { "epoch": 0.9677114208236111, "grad_norm": 73.65384210338382, "learning_rate": 5.244779014314527e-08, "loss": 14.6488, "step": 61380 }, { "epoch": 0.9678690799016207, "grad_norm": 69.53243779014683, "learning_rate": 5.193729788403601e-08, "loss": 15.3001, "step": 61390 }, { "epoch": 0.9680267389796304, "grad_norm": 76.44092144528173, "learning_rate": 5.142929571917643e-08, "loss": 15.6734, "step": 61400 }, { "epoch": 0.9681843980576401, "grad_norm": 69.58410257007618, "learning_rate": 5.092378377572371e-08, "loss": 15.5025, "step": 61410 }, { "epoch": 0.9683420571356499, "grad_norm": 70.0548257438723, "learning_rate": 5.042076218021441e-08, "loss": 15.2266, "step": 61420 }, { "epoch": 0.9684997162136596, "grad_norm": 72.4687183362224, "learning_rate": 4.9920231058557813e-08, "loss": 15.2906, "step": 61430 }, { "epoch": 0.9686573752916693, "grad_norm": 76.97228398958731, "learning_rate": 4.942219053604147e-08, "loss": 15.243, "step": 61440 }, { "epoch": 0.968815034369679, "grad_norm": 66.64119744733756, "learning_rate": 4.892664073733122e-08, "loss": 15.508, "step": 61450 }, { "epoch": 0.9689726934476888, "grad_norm": 74.47457517280705, "learning_rate": 4.843358178646562e-08, "loss": 15.3617, "step": 61460 }, { "epoch": 0.9691303525256985, "grad_norm": 76.26892733892248, "learning_rate": 4.7943013806864834e-08, "loss": 15.3245, "step": 61470 }, { "epoch": 0.9692880116037081, "grad_norm": 70.79337261585538, "learning_rate": 4.7454936921319527e-08, "loss": 14.8916, "step": 61480 }, { "epoch": 0.9694456706817178, "grad_norm": 72.55923498043347, "learning_rate": 4.696935125200308e-08, "loss": 15.7301, "step": 61490 }, { "epoch": 0.9696033297597275, "grad_norm": 73.67600487004133, "learning_rate": 4.648625692046049e-08, "loss": 15.4906, "step": 61500 }, { "epoch": 0.9697609888377373, "grad_norm": 71.3702068374697, "learning_rate": 4.600565404761392e-08, "loss": 15.4003, "step": 61510 }, { "epoch": 0.969918647915747, "grad_norm": 73.93685976858234, "learning_rate": 4.552754275376492e-08, "loss": 15.6989, "step": 61520 }, { "epoch": 0.9700763069937567, "grad_norm": 82.4770324675775, "learning_rate": 4.505192315858664e-08, "loss": 15.7566, "step": 61530 }, { "epoch": 0.9702339660717664, "grad_norm": 74.36639228039337, "learning_rate": 4.4578795381134964e-08, "loss": 15.6429, "step": 61540 }, { "epoch": 0.9703916251497762, "grad_norm": 71.15932659111955, "learning_rate": 4.410815953983516e-08, "loss": 15.342, "step": 61550 }, { "epoch": 0.9705492842277859, "grad_norm": 73.27875621158961, "learning_rate": 4.364001575249188e-08, "loss": 15.3202, "step": 61560 }, { "epoch": 0.9707069433057955, "grad_norm": 70.30386323831954, "learning_rate": 4.317436413628806e-08, "loss": 15.225, "step": 61570 }, { "epoch": 0.9708646023838052, "grad_norm": 75.97010840884985, "learning_rate": 4.271120480777935e-08, "loss": 15.326, "step": 61580 }, { "epoch": 0.9710222614618149, "grad_norm": 77.23696905890475, "learning_rate": 4.2250537882898566e-08, "loss": 15.674, "step": 61590 }, { "epoch": 0.9711799205398247, "grad_norm": 68.86665495883615, "learning_rate": 4.17923634769557e-08, "loss": 15.2181, "step": 61600 }, { "epoch": 0.9713375796178344, "grad_norm": 72.06289348978387, "learning_rate": 4.133668170463567e-08, "loss": 14.9887, "step": 61610 }, { "epoch": 0.9714952386958441, "grad_norm": 70.53610527636167, "learning_rate": 4.088349267999947e-08, "loss": 15.0583, "step": 61620 }, { "epoch": 0.9716528977738538, "grad_norm": 68.89857035620804, "learning_rate": 4.043279651648635e-08, "loss": 15.4164, "step": 61630 }, { "epoch": 0.9718105568518636, "grad_norm": 64.04241696003047, "learning_rate": 3.998459332690829e-08, "loss": 15.4309, "step": 61640 }, { "epoch": 0.9719682159298733, "grad_norm": 70.69778963909687, "learning_rate": 3.9538883223455557e-08, "loss": 14.9525, "step": 61650 }, { "epoch": 0.972125875007883, "grad_norm": 74.15091542580804, "learning_rate": 3.9095666317693346e-08, "loss": 15.7171, "step": 61660 }, { "epoch": 0.9722835340858926, "grad_norm": 71.66146918863566, "learning_rate": 3.8654942720562914e-08, "loss": 15.5908, "step": 61670 }, { "epoch": 0.9724411931639024, "grad_norm": 66.27602422062562, "learning_rate": 3.821671254238157e-08, "loss": 15.7786, "step": 61680 }, { "epoch": 0.9725988522419121, "grad_norm": 71.07063135668479, "learning_rate": 3.778097589284269e-08, "loss": 15.4388, "step": 61690 }, { "epoch": 0.9727565113199218, "grad_norm": 72.81657937807671, "learning_rate": 3.7347732881014564e-08, "loss": 15.4521, "step": 61700 }, { "epoch": 0.9729141703979315, "grad_norm": 71.08750737875101, "learning_rate": 3.691698361534379e-08, "loss": 15.4817, "step": 61710 }, { "epoch": 0.9730718294759412, "grad_norm": 66.16032018602668, "learning_rate": 3.6488728203648574e-08, "loss": 14.9878, "step": 61720 }, { "epoch": 0.973229488553951, "grad_norm": 71.53146996027874, "learning_rate": 3.606296675312759e-08, "loss": 15.2961, "step": 61730 }, { "epoch": 0.9733871476319607, "grad_norm": 79.89644636318795, "learning_rate": 3.563969937035117e-08, "loss": 15.5133, "step": 61740 }, { "epoch": 0.9735448067099703, "grad_norm": 76.30117297451051, "learning_rate": 3.5218926161267876e-08, "loss": 15.7344, "step": 61750 }, { "epoch": 0.97370246578798, "grad_norm": 80.42242733579106, "learning_rate": 3.480064723120125e-08, "loss": 15.6988, "step": 61760 }, { "epoch": 0.9738601248659898, "grad_norm": 78.87017981380245, "learning_rate": 3.438486268485086e-08, "loss": 15.8601, "step": 61770 }, { "epoch": 0.9740177839439995, "grad_norm": 75.80455141861279, "learning_rate": 3.397157262629125e-08, "loss": 15.0435, "step": 61780 }, { "epoch": 0.9741754430220092, "grad_norm": 74.76579692639868, "learning_rate": 3.356077715897188e-08, "loss": 15.597, "step": 61790 }, { "epoch": 0.9743331021000189, "grad_norm": 74.53766995216512, "learning_rate": 3.31524763857205e-08, "loss": 15.2014, "step": 61800 }, { "epoch": 0.9744907611780287, "grad_norm": 68.81783577980364, "learning_rate": 3.274667040873647e-08, "loss": 14.8864, "step": 61810 }, { "epoch": 0.9746484202560384, "grad_norm": 74.58840742022, "learning_rate": 3.234335932959853e-08, "loss": 15.9351, "step": 61820 }, { "epoch": 0.9748060793340481, "grad_norm": 79.75129369300177, "learning_rate": 3.194254324925927e-08, "loss": 15.7228, "step": 61830 }, { "epoch": 0.9749637384120577, "grad_norm": 72.20578579774086, "learning_rate": 3.154422226804621e-08, "loss": 15.3256, "step": 61840 }, { "epoch": 0.9751213974900674, "grad_norm": 75.91181904576924, "learning_rate": 3.1148396485661814e-08, "loss": 15.305, "step": 61850 }, { "epoch": 0.9752790565680772, "grad_norm": 71.27174119221186, "learning_rate": 3.075506600118794e-08, "loss": 15.3498, "step": 61860 }, { "epoch": 0.9754367156460869, "grad_norm": 75.24489570270453, "learning_rate": 3.036423091307583e-08, "loss": 15.0009, "step": 61870 }, { "epoch": 0.9755943747240966, "grad_norm": 77.15063189675759, "learning_rate": 2.9975891319157235e-08, "loss": 15.4808, "step": 61880 }, { "epoch": 0.9757520338021063, "grad_norm": 70.48164055238054, "learning_rate": 2.9590047316636616e-08, "loss": 15.1408, "step": 61890 }, { "epoch": 0.9759096928801161, "grad_norm": 76.47789758023772, "learning_rate": 2.9206699002093387e-08, "loss": 15.7721, "step": 61900 }, { "epoch": 0.9760673519581258, "grad_norm": 73.39992164714506, "learning_rate": 2.882584647148523e-08, "loss": 15.9085, "step": 61910 }, { "epoch": 0.9762250110361355, "grad_norm": 71.1502435475034, "learning_rate": 2.8447489820141448e-08, "loss": 15.0977, "step": 61920 }, { "epoch": 0.9763826701141451, "grad_norm": 67.86756105203588, "learning_rate": 2.8071629142768507e-08, "loss": 15.245, "step": 61930 }, { "epoch": 0.9765403291921548, "grad_norm": 68.77429748964984, "learning_rate": 2.7698264533448925e-08, "loss": 15.0381, "step": 61940 }, { "epoch": 0.9766979882701646, "grad_norm": 72.14430545696844, "learning_rate": 2.732739608563906e-08, "loss": 14.9896, "step": 61950 }, { "epoch": 0.9768556473481743, "grad_norm": 72.82148173789905, "learning_rate": 2.69590238921702e-08, "loss": 15.1676, "step": 61960 }, { "epoch": 0.977013306426184, "grad_norm": 78.18474374900967, "learning_rate": 2.6593148045249707e-08, "loss": 15.7373, "step": 61970 }, { "epoch": 0.9771709655041937, "grad_norm": 70.82600571333558, "learning_rate": 2.6229768636459874e-08, "loss": 15.2516, "step": 61980 }, { "epoch": 0.9773286245822035, "grad_norm": 73.38151053676427, "learning_rate": 2.5868885756756835e-08, "loss": 15.3346, "step": 61990 }, { "epoch": 0.9774862836602132, "grad_norm": 61.12745322419389, "learning_rate": 2.5510499496474993e-08, "loss": 14.9884, "step": 62000 }, { "epoch": 0.9776439427382229, "grad_norm": 68.55623833386264, "learning_rate": 2.5154609945321483e-08, "loss": 15.3179, "step": 62010 }, { "epoch": 0.9778016018162325, "grad_norm": 68.41939031082282, "learning_rate": 2.480121719237727e-08, "loss": 15.2594, "step": 62020 }, { "epoch": 0.9779592608942423, "grad_norm": 72.6898262848257, "learning_rate": 2.445032132610048e-08, "loss": 15.7684, "step": 62030 }, { "epoch": 0.978116919972252, "grad_norm": 73.11783822956447, "learning_rate": 2.4101922434325297e-08, "loss": 14.9922, "step": 62040 }, { "epoch": 0.9782745790502617, "grad_norm": 71.35517049062217, "learning_rate": 2.3756020604256414e-08, "loss": 15.1648, "step": 62050 }, { "epoch": 0.9784322381282714, "grad_norm": 71.63180072153165, "learning_rate": 2.3412615922479008e-08, "loss": 15.5936, "step": 62060 }, { "epoch": 0.9785898972062811, "grad_norm": 70.91787805434781, "learning_rate": 2.3071708474949882e-08, "loss": 14.9703, "step": 62070 }, { "epoch": 0.9787475562842909, "grad_norm": 74.10212646618768, "learning_rate": 2.2733298346999665e-08, "loss": 15.5603, "step": 62080 }, { "epoch": 0.9789052153623006, "grad_norm": 64.61291906286954, "learning_rate": 2.2397385623338374e-08, "loss": 14.7457, "step": 62090 }, { "epoch": 0.9790628744403103, "grad_norm": 74.71813725101863, "learning_rate": 2.206397038804542e-08, "loss": 14.9924, "step": 62100 }, { "epoch": 0.97922053351832, "grad_norm": 651.8606388013304, "learning_rate": 2.1733052724579595e-08, "loss": 15.7981, "step": 62110 }, { "epoch": 0.9793781925963297, "grad_norm": 70.87978718827641, "learning_rate": 2.1404632715771313e-08, "loss": 15.0194, "step": 62120 }, { "epoch": 0.9795358516743394, "grad_norm": 74.53600696678488, "learning_rate": 2.1078710443829253e-08, "loss": 15.7099, "step": 62130 }, { "epoch": 0.9796935107523491, "grad_norm": 71.71322840585701, "learning_rate": 2.0755285990333718e-08, "loss": 15.0399, "step": 62140 }, { "epoch": 0.9798511698303588, "grad_norm": 72.1695099705509, "learning_rate": 2.043435943623995e-08, "loss": 15.4294, "step": 62150 }, { "epoch": 0.9800088289083685, "grad_norm": 72.44993920898989, "learning_rate": 2.011593086188035e-08, "loss": 15.2788, "step": 62160 }, { "epoch": 0.9801664879863783, "grad_norm": 72.65581528241597, "learning_rate": 1.9800000346960056e-08, "loss": 14.6045, "step": 62170 }, { "epoch": 0.980324147064388, "grad_norm": 76.06394604678519, "learning_rate": 1.9486567970559145e-08, "loss": 15.4429, "step": 62180 }, { "epoch": 0.9804818061423977, "grad_norm": 70.60262844391697, "learning_rate": 1.917563381113263e-08, "loss": 14.8448, "step": 62190 }, { "epoch": 0.9806394652204073, "grad_norm": 77.46577227968206, "learning_rate": 1.8867197946511596e-08, "loss": 15.5458, "step": 62200 }, { "epoch": 0.9807971242984171, "grad_norm": 75.95472943376812, "learning_rate": 1.856126045389872e-08, "loss": 14.8014, "step": 62210 }, { "epoch": 0.9809547833764268, "grad_norm": 68.78080359286561, "learning_rate": 1.8257821409874975e-08, "loss": 15.1267, "step": 62220 }, { "epoch": 0.9811124424544365, "grad_norm": 73.35293832614488, "learning_rate": 1.7956880890391824e-08, "loss": 15.4314, "step": 62230 }, { "epoch": 0.9812701015324462, "grad_norm": 70.0436195559438, "learning_rate": 1.7658438970779014e-08, "loss": 15.7701, "step": 62240 }, { "epoch": 0.981427760610456, "grad_norm": 74.19220934527445, "learning_rate": 1.7362495725739003e-08, "loss": 15.9348, "step": 62250 }, { "epoch": 0.9815854196884657, "grad_norm": 73.78324712862097, "learning_rate": 1.706905122934921e-08, "loss": 15.557, "step": 62260 }, { "epoch": 0.9817430787664754, "grad_norm": 68.100525338959, "learning_rate": 1.6778105555061984e-08, "loss": 15.3041, "step": 62270 }, { "epoch": 0.9819007378444851, "grad_norm": 73.26517663066325, "learning_rate": 1.648965877570241e-08, "loss": 15.4691, "step": 62280 }, { "epoch": 0.9820583969224947, "grad_norm": 71.23182645935876, "learning_rate": 1.620371096347273e-08, "loss": 15.6598, "step": 62290 }, { "epoch": 0.9822160560005045, "grad_norm": 77.34471217020877, "learning_rate": 1.592026218994791e-08, "loss": 15.1322, "step": 62300 }, { "epoch": 0.9823737150785142, "grad_norm": 76.4438407413961, "learning_rate": 1.5639312526078977e-08, "loss": 15.5873, "step": 62310 }, { "epoch": 0.9825313741565239, "grad_norm": 71.78022988067579, "learning_rate": 1.5360862042187452e-08, "loss": 15.244, "step": 62320 }, { "epoch": 0.9826890332345336, "grad_norm": 73.14588249402479, "learning_rate": 1.508491080797536e-08, "loss": 15.0172, "step": 62330 }, { "epoch": 0.9828466923125434, "grad_norm": 80.51703535895321, "learning_rate": 1.4811458892514118e-08, "loss": 15.5016, "step": 62340 }, { "epoch": 0.9830043513905531, "grad_norm": 77.04747654866493, "learning_rate": 1.4540506364251195e-08, "loss": 15.9011, "step": 62350 }, { "epoch": 0.9831620104685628, "grad_norm": 82.4471309851767, "learning_rate": 1.4272053291010113e-08, "loss": 15.5546, "step": 62360 }, { "epoch": 0.9833196695465725, "grad_norm": 76.1571617325384, "learning_rate": 1.4006099739986012e-08, "loss": 15.7693, "step": 62370 }, { "epoch": 0.9834773286245823, "grad_norm": 78.01524678824208, "learning_rate": 1.3742645777748974e-08, "loss": 15.5539, "step": 62380 }, { "epoch": 0.983634987702592, "grad_norm": 73.19732343409643, "learning_rate": 1.348169147024514e-08, "loss": 15.5234, "step": 62390 }, { "epoch": 0.9837926467806016, "grad_norm": 241.544767280832, "learning_rate": 1.3223236882794478e-08, "loss": 15.7947, "step": 62400 }, { "epoch": 0.9839503058586113, "grad_norm": 71.85439061380936, "learning_rate": 1.2967282080088572e-08, "loss": 15.3829, "step": 62410 }, { "epoch": 0.984107964936621, "grad_norm": 71.96671530541622, "learning_rate": 1.2713827126197286e-08, "loss": 15.1434, "step": 62420 }, { "epoch": 0.9842656240146308, "grad_norm": 67.86559952184719, "learning_rate": 1.2462872084560984e-08, "loss": 15.5828, "step": 62430 }, { "epoch": 0.9844232830926405, "grad_norm": 77.94806023437175, "learning_rate": 1.2214417017997194e-08, "loss": 15.345, "step": 62440 }, { "epoch": 0.9845809421706502, "grad_norm": 71.29947952009479, "learning_rate": 1.1968461988697277e-08, "loss": 15.6525, "step": 62450 }, { "epoch": 0.9847386012486599, "grad_norm": 71.42704983298349, "learning_rate": 1.1725007058224214e-08, "loss": 15.1142, "step": 62460 }, { "epoch": 0.9848962603266697, "grad_norm": 64.55869370420301, "learning_rate": 1.1484052287518143e-08, "loss": 14.9849, "step": 62470 }, { "epoch": 0.9850539194046793, "grad_norm": 69.90724072363236, "learning_rate": 1.1245597736893044e-08, "loss": 15.4947, "step": 62480 }, { "epoch": 0.985211578482689, "grad_norm": 70.46809946832815, "learning_rate": 1.1009643466035613e-08, "loss": 14.9278, "step": 62490 }, { "epoch": 0.9853692375606987, "grad_norm": 67.82273204199528, "learning_rate": 1.0776189534006387e-08, "loss": 14.9954, "step": 62500 }, { "epoch": 0.9855268966387084, "grad_norm": 68.21657281767851, "learning_rate": 1.0545235999243064e-08, "loss": 15.1489, "step": 62510 }, { "epoch": 0.9856845557167182, "grad_norm": 70.57625369462967, "learning_rate": 1.0316782919553848e-08, "loss": 15.5267, "step": 62520 }, { "epoch": 0.9858422147947279, "grad_norm": 71.97091576823173, "learning_rate": 1.0090830352124104e-08, "loss": 15.4662, "step": 62530 }, { "epoch": 0.9859998738727376, "grad_norm": 69.40611653835984, "learning_rate": 9.867378353510815e-09, "loss": 15.5463, "step": 62540 }, { "epoch": 0.9861575329507473, "grad_norm": 69.37192044517155, "learning_rate": 9.646426979647017e-09, "loss": 15.1612, "step": 62550 }, { "epoch": 0.9863151920287571, "grad_norm": 72.21936816362691, "learning_rate": 9.427976285837358e-09, "loss": 14.9484, "step": 62560 }, { "epoch": 0.9864728511067667, "grad_norm": 74.37812842665492, "learning_rate": 9.212026326763657e-09, "loss": 15.3549, "step": 62570 }, { "epoch": 0.9866305101847764, "grad_norm": 73.50595348011662, "learning_rate": 8.998577156480447e-09, "loss": 15.1288, "step": 62580 }, { "epoch": 0.9867881692627861, "grad_norm": 73.66061908066159, "learning_rate": 8.787628828414996e-09, "loss": 14.8347, "step": 62590 }, { "epoch": 0.9869458283407959, "grad_norm": 72.01991931740298, "learning_rate": 8.579181395369508e-09, "loss": 15.2947, "step": 62600 }, { "epoch": 0.9871034874188056, "grad_norm": 74.0351852945476, "learning_rate": 8.373234909522244e-09, "loss": 15.4963, "step": 62610 }, { "epoch": 0.9872611464968153, "grad_norm": 68.58075980893325, "learning_rate": 8.16978942242086e-09, "loss": 14.7492, "step": 62620 }, { "epoch": 0.987418805574825, "grad_norm": 70.98953938742109, "learning_rate": 7.968844984991286e-09, "loss": 15.5612, "step": 62630 }, { "epoch": 0.9875764646528347, "grad_norm": 71.93517022334841, "learning_rate": 7.770401647532177e-09, "loss": 15.4595, "step": 62640 }, { "epoch": 0.9877341237308445, "grad_norm": 72.32573206061669, "learning_rate": 7.574459459714911e-09, "loss": 15.5671, "step": 62650 }, { "epoch": 0.9878917828088541, "grad_norm": 75.6454161941524, "learning_rate": 7.38101847058581e-09, "loss": 15.3223, "step": 62660 }, { "epoch": 0.9880494418868638, "grad_norm": 72.17028316271914, "learning_rate": 7.190078728565031e-09, "loss": 15.3378, "step": 62670 }, { "epoch": 0.9882071009648735, "grad_norm": 71.11304517486238, "learning_rate": 7.001640281446565e-09, "loss": 15.2163, "step": 62680 }, { "epoch": 0.9883647600428833, "grad_norm": 72.72380551510115, "learning_rate": 6.815703176399346e-09, "loss": 16.1113, "step": 62690 }, { "epoch": 0.988522419120893, "grad_norm": 68.05359949716458, "learning_rate": 6.632267459963926e-09, "loss": 15.172, "step": 62700 }, { "epoch": 0.9886800781989027, "grad_norm": 68.24559076270955, "learning_rate": 6.451333178055796e-09, "loss": 15.5305, "step": 62710 }, { "epoch": 0.9888377372769124, "grad_norm": 70.86800215575751, "learning_rate": 6.272900375965396e-09, "loss": 15.4727, "step": 62720 }, { "epoch": 0.9889953963549221, "grad_norm": 70.64499975287592, "learning_rate": 6.096969098355887e-09, "loss": 15.3651, "step": 62730 }, { "epoch": 0.9891530554329319, "grad_norm": 70.59645369526193, "learning_rate": 5.9235393892642654e-09, "loss": 14.9737, "step": 62740 }, { "epoch": 0.9893107145109415, "grad_norm": 69.7871770975933, "learning_rate": 5.752611292102472e-09, "loss": 14.6091, "step": 62750 }, { "epoch": 0.9894683735889512, "grad_norm": 74.04040140009751, "learning_rate": 5.5841848496540614e-09, "loss": 15.1848, "step": 62760 }, { "epoch": 0.9896260326669609, "grad_norm": 72.94115770726322, "learning_rate": 5.4182601040797535e-09, "loss": 15.4029, "step": 62770 }, { "epoch": 0.9897836917449707, "grad_norm": 70.55939691589651, "learning_rate": 5.25483709690966e-09, "loss": 15.5597, "step": 62780 }, { "epoch": 0.9899413508229804, "grad_norm": 78.74632514917103, "learning_rate": 5.0939158690510585e-09, "loss": 15.3071, "step": 62790 }, { "epoch": 0.9900990099009901, "grad_norm": 80.2896081985593, "learning_rate": 4.935496460785061e-09, "loss": 15.6594, "step": 62800 }, { "epoch": 0.9902566689789998, "grad_norm": 74.1774601957332, "learning_rate": 4.779578911764393e-09, "loss": 15.8744, "step": 62810 }, { "epoch": 0.9904143280570096, "grad_norm": 69.87879396766968, "learning_rate": 4.626163261017835e-09, "loss": 14.9496, "step": 62820 }, { "epoch": 0.9905719871350193, "grad_norm": 74.55235503525408, "learning_rate": 4.475249546945782e-09, "loss": 15.7348, "step": 62830 }, { "epoch": 0.990729646213029, "grad_norm": 76.84724927689935, "learning_rate": 4.326837807322459e-09, "loss": 15.5373, "step": 62840 }, { "epoch": 0.9908873052910386, "grad_norm": 74.88102798868039, "learning_rate": 4.180928079299262e-09, "loss": 15.2629, "step": 62850 }, { "epoch": 0.9910449643690483, "grad_norm": 71.94761466855526, "learning_rate": 4.037520399396977e-09, "loss": 15.3963, "step": 62860 }, { "epoch": 0.9912026234470581, "grad_norm": 74.05945158540416, "learning_rate": 3.896614803512444e-09, "loss": 15.0633, "step": 62870 }, { "epoch": 0.9913602825250678, "grad_norm": 71.43981048738159, "learning_rate": 3.758211326915229e-09, "loss": 15.6829, "step": 62880 }, { "epoch": 0.9915179416030775, "grad_norm": 73.1123900220917, "learning_rate": 3.62231000424873e-09, "loss": 15.4489, "step": 62890 }, { "epoch": 0.9916756006810872, "grad_norm": 77.13597181797898, "learning_rate": 3.4889108695324027e-09, "loss": 14.9156, "step": 62900 }, { "epoch": 0.991833259759097, "grad_norm": 69.4219520032701, "learning_rate": 3.3580139561539827e-09, "loss": 15.5516, "step": 62910 }, { "epoch": 0.9919909188371067, "grad_norm": 68.68334196933203, "learning_rate": 3.2296192968817033e-09, "loss": 15.0967, "step": 62920 }, { "epoch": 0.9921485779151163, "grad_norm": 72.09162976576863, "learning_rate": 3.1037269238520795e-09, "loss": 14.8565, "step": 62930 }, { "epoch": 0.992306236993126, "grad_norm": 76.61075450517653, "learning_rate": 2.9803368685765723e-09, "loss": 15.2925, "step": 62940 }, { "epoch": 0.9924638960711357, "grad_norm": 69.63333239206078, "learning_rate": 2.8594491619426958e-09, "loss": 15.5155, "step": 62950 }, { "epoch": 0.9926215551491455, "grad_norm": 70.60980586067652, "learning_rate": 2.7410638342084683e-09, "loss": 15.4655, "step": 62960 }, { "epoch": 0.9927792142271552, "grad_norm": 77.7042548053405, "learning_rate": 2.625180915006853e-09, "loss": 15.4241, "step": 62970 }, { "epoch": 0.9929368733051649, "grad_norm": 75.75546134676064, "learning_rate": 2.5118004333446465e-09, "loss": 15.1969, "step": 62980 }, { "epoch": 0.9930945323831746, "grad_norm": 615.215483813308, "learning_rate": 2.4009224176035904e-09, "loss": 15.8885, "step": 62990 }, { "epoch": 0.9932521914611844, "grad_norm": 71.97935621859891, "learning_rate": 2.2925468955348197e-09, "loss": 15.7049, "step": 63000 }, { "epoch": 0.9934098505391941, "grad_norm": 73.0965146196684, "learning_rate": 2.1866738942677433e-09, "loss": 15.3721, "step": 63010 }, { "epoch": 0.9935675096172037, "grad_norm": 70.95254798903862, "learning_rate": 2.0833034403022754e-09, "loss": 15.3558, "step": 63020 }, { "epoch": 0.9937251686952134, "grad_norm": 72.08970396388264, "learning_rate": 1.982435559513274e-09, "loss": 15.8436, "step": 63030 }, { "epoch": 0.9938828277732232, "grad_norm": 73.36560803584712, "learning_rate": 1.8840702771494302e-09, "loss": 15.4949, "step": 63040 }, { "epoch": 0.9940404868512329, "grad_norm": 70.48673576188322, "learning_rate": 1.7882076178321605e-09, "loss": 15.2227, "step": 63050 }, { "epoch": 0.9941981459292426, "grad_norm": 72.64594464908934, "learning_rate": 1.6948476055567153e-09, "loss": 15.3318, "step": 63060 }, { "epoch": 0.9943558050072523, "grad_norm": 68.44886579458789, "learning_rate": 1.6039902636921789e-09, "loss": 15.0776, "step": 63070 }, { "epoch": 0.994513464085262, "grad_norm": 68.61973401288945, "learning_rate": 1.5156356149803598e-09, "loss": 15.6926, "step": 63080 }, { "epoch": 0.9946711231632718, "grad_norm": 69.86884150436214, "learning_rate": 1.4297836815380107e-09, "loss": 15.631, "step": 63090 }, { "epoch": 0.9948287822412815, "grad_norm": 70.69445999195418, "learning_rate": 1.3464344848546084e-09, "loss": 14.892, "step": 63100 }, { "epoch": 0.9949864413192911, "grad_norm": 75.11853830221014, "learning_rate": 1.2655880457934644e-09, "loss": 15.7249, "step": 63110 }, { "epoch": 0.9951441003973008, "grad_norm": 73.68571582566663, "learning_rate": 1.1872443845906133e-09, "loss": 16.1694, "step": 63120 }, { "epoch": 0.9953017594753106, "grad_norm": 78.67120917914129, "learning_rate": 1.1114035208559248e-09, "loss": 15.7473, "step": 63130 }, { "epoch": 0.9954594185533203, "grad_norm": 68.35339760972902, "learning_rate": 1.0380654735742125e-09, "loss": 15.1043, "step": 63140 }, { "epoch": 0.99561707763133, "grad_norm": 70.52827197685788, "learning_rate": 9.672302611019035e-10, "loss": 15.4719, "step": 63150 }, { "epoch": 0.9957747367093397, "grad_norm": 72.45323748608985, "learning_rate": 8.988979011703702e-10, "loss": 15.7843, "step": 63160 }, { "epoch": 0.9959323957873495, "grad_norm": 68.78049091110107, "learning_rate": 8.330684108825981e-10, "loss": 14.7463, "step": 63170 }, { "epoch": 0.9960900548653592, "grad_norm": 75.6255629916906, "learning_rate": 7.697418067176277e-10, "loss": 14.912, "step": 63180 }, { "epoch": 0.9962477139433689, "grad_norm": 72.82335185429014, "learning_rate": 7.089181045250026e-10, "loss": 15.6915, "step": 63190 }, { "epoch": 0.9964053730213785, "grad_norm": 70.8100468178881, "learning_rate": 6.505973195325421e-10, "loss": 15.9435, "step": 63200 }, { "epoch": 0.9965630320993882, "grad_norm": 70.18584900237275, "learning_rate": 5.947794663352379e-10, "loss": 15.2242, "step": 63210 }, { "epoch": 0.996720691177398, "grad_norm": 69.61107125246326, "learning_rate": 5.414645589063572e-10, "loss": 15.3501, "step": 63220 }, { "epoch": 0.9968783502554077, "grad_norm": 68.80035969110904, "learning_rate": 4.906526105918907e-10, "loss": 14.9567, "step": 63230 }, { "epoch": 0.9970360093334174, "grad_norm": 69.20373926718062, "learning_rate": 4.423436341083331e-10, "loss": 15.3955, "step": 63240 }, { "epoch": 0.9971936684114271, "grad_norm": 78.70237062839213, "learning_rate": 3.9653764155045404e-10, "loss": 15.4897, "step": 63250 }, { "epoch": 0.9973513274894369, "grad_norm": 74.52765717579484, "learning_rate": 3.5323464438130617e-10, "loss": 14.9913, "step": 63260 }, { "epoch": 0.9975089865674466, "grad_norm": 85.36007176781993, "learning_rate": 3.1243465344221736e-10, "loss": 15.6091, "step": 63270 }, { "epoch": 0.9976666456454563, "grad_norm": 71.56185823325832, "learning_rate": 2.7413767894501897e-10, "loss": 15.0829, "step": 63280 }, { "epoch": 0.997824304723466, "grad_norm": 74.72556068145401, "learning_rate": 2.383437304753766e-10, "loss": 15.3861, "step": 63290 }, { "epoch": 0.9979819638014756, "grad_norm": 68.65402802520713, "learning_rate": 2.0505281699390034e-10, "loss": 15.2687, "step": 63300 }, { "epoch": 0.9981396228794854, "grad_norm": 74.97187753369066, "learning_rate": 1.7426494683281392e-10, "loss": 15.6109, "step": 63310 }, { "epoch": 0.9982972819574951, "grad_norm": 75.453209537725, "learning_rate": 1.4598012769817538e-10, "loss": 15.2297, "step": 63320 }, { "epoch": 0.9984549410355048, "grad_norm": 68.6096527421223, "learning_rate": 1.2019836667098716e-10, "loss": 15.4915, "step": 63330 }, { "epoch": 0.9986126001135145, "grad_norm": 75.81335441623455, "learning_rate": 9.69196702038655e-11, "loss": 15.5448, "step": 63340 }, { "epoch": 0.9987702591915243, "grad_norm": 69.6779214437594, "learning_rate": 7.614404412326082e-11, "loss": 15.3389, "step": 63350 }, { "epoch": 0.998927918269534, "grad_norm": 72.00084680533689, "learning_rate": 5.787149363167821e-11, "loss": 15.1847, "step": 63360 }, { "epoch": 0.9990855773475437, "grad_norm": 69.72179923518124, "learning_rate": 4.210202330101609e-11, "loss": 15.2297, "step": 63370 }, { "epoch": 0.9992432364255533, "grad_norm": 72.09147321065417, "learning_rate": 2.8835637079227542e-11, "loss": 15.4038, "step": 63380 }, { "epoch": 0.9994008955035631, "grad_norm": 75.78889774883733, "learning_rate": 1.8072338285879398e-11, "loss": 15.448, "step": 63390 }, { "epoch": 0.9995585545815728, "grad_norm": 70.92609324259224, "learning_rate": 9.812129616593168e-12, "loss": 15.4415, "step": 63400 }, { "epoch": 0.9997162136595825, "grad_norm": 68.04848820158087, "learning_rate": 4.0550131386041245e-12, "loss": 15.6331, "step": 63410 }, { "epoch": 0.9998738727375922, "grad_norm": 76.56220408017451, "learning_rate": 8.009902929817514e-13, "loss": 15.5272, "step": 63420 }, { "epoch": 1.0, "eval_loss": 14.704147338867188, "eval_runtime": 16.6026, "eval_samples_per_second": 51.558, "eval_steps_per_second": 0.843, "step": 63428 }, { "epoch": 1.0, "step": 63428, "total_flos": 8.846878004006093e+16, "train_loss": 25.182933448444604, "train_runtime": 193152.484, "train_samples_per_second": 21.016, "train_steps_per_second": 0.328 } ], "logging_steps": 10, "max_steps": 63428, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.846878004006093e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }