{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6809669731018045, "eval_steps": 1000, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017024174327545113, "grad_norm": 5.9435296058654785, "learning_rate": 0.0, "loss": 0.9299, "step": 1 }, { "epoch": 0.00034048348655090226, "grad_norm": 4.946147918701172, "learning_rate": 1.3391173292339812e-07, "loss": 0.8753, "step": 2 }, { "epoch": 0.0005107252298263534, "grad_norm": 4.654350280761719, "learning_rate": 2.1224507509017273e-07, "loss": 0.8961, "step": 3 }, { "epoch": 0.0006809669731018045, "grad_norm": 4.624459266662598, "learning_rate": 2.6782346584679625e-07, "loss": 0.8702, "step": 4 }, { "epoch": 0.0008512087163772557, "grad_norm": 4.840507984161377, "learning_rate": 3.109334149098911e-07, "loss": 0.9089, "step": 5 }, { "epoch": 0.0010214504596527069, "grad_norm": 4.609108924865723, "learning_rate": 3.4615680801357083e-07, "loss": 0.8922, "step": 6 }, { "epoch": 0.001191692202928158, "grad_norm": 4.153110980987549, "learning_rate": 3.759377625437651e-07, "loss": 0.8303, "step": 7 }, { "epoch": 0.001361933946203609, "grad_norm": 3.97541880607605, "learning_rate": 4.017351987701944e-07, "loss": 0.8569, "step": 8 }, { "epoch": 0.0015321756894790602, "grad_norm": 3.4270944595336914, "learning_rate": 4.2449015018034546e-07, "loss": 0.7777, "step": 9 }, { "epoch": 0.0017024174327545114, "grad_norm": 3.244053840637207, "learning_rate": 4.4484514783328935e-07, "loss": 0.7431, "step": 10 }, { "epoch": 0.0018726591760299626, "grad_norm": 3.140742778778076, "learning_rate": 4.632584829817167e-07, "loss": 0.7605, "step": 11 }, { "epoch": 0.0020429009193054137, "grad_norm": 3.1321463584899902, "learning_rate": 4.80068540936969e-07, "loss": 0.7243, "step": 12 }, { "epoch": 0.0022131426625808647, "grad_norm": 2.890293836593628, "learning_rate": 4.955322952348447e-07, "loss": 0.6453, "step": 13 }, { "epoch": 0.002383384405856316, "grad_norm": 2.773414134979248, "learning_rate": 5.098494954671633e-07, "loss": 0.6187, "step": 14 }, { "epoch": 0.002553626149131767, "grad_norm": 2.7988698482513428, "learning_rate": 5.231784900000639e-07, "loss": 0.6197, "step": 15 }, { "epoch": 0.002723867892407218, "grad_norm": 2.7417197227478027, "learning_rate": 5.356469316935925e-07, "loss": 0.6139, "step": 16 }, { "epoch": 0.0028941096356826694, "grad_norm": 2.771998167037964, "learning_rate": 5.473592323318297e-07, "loss": 0.6009, "step": 17 }, { "epoch": 0.0030643513789581204, "grad_norm": 3.031147003173828, "learning_rate": 5.584018831037436e-07, "loss": 0.562, "step": 18 }, { "epoch": 0.003234593122233572, "grad_norm": 2.6269404888153076, "learning_rate": 5.688473346582122e-07, "loss": 0.4996, "step": 19 }, { "epoch": 0.0034048348655090228, "grad_norm": 2.2291667461395264, "learning_rate": 5.787568807566874e-07, "loss": 0.5131, "step": 20 }, { "epoch": 0.003575076608784474, "grad_norm": 2.112966299057007, "learning_rate": 5.881828376339378e-07, "loss": 0.4314, "step": 21 }, { "epoch": 0.003745318352059925, "grad_norm": 2.252114772796631, "learning_rate": 5.971702159051149e-07, "loss": 0.4716, "step": 22 }, { "epoch": 0.003915560095335376, "grad_norm": 1.981942057609558, "learning_rate": 6.057580205219512e-07, "loss": 0.4206, "step": 23 }, { "epoch": 0.0040858018386108275, "grad_norm": 1.7979234457015991, "learning_rate": 6.139802738603672e-07, "loss": 0.4061, "step": 24 }, { "epoch": 0.004256043581886279, "grad_norm": 1.8551833629608154, "learning_rate": 6.218668298197822e-07, "loss": 0.427, "step": 25 }, { "epoch": 0.004426285325161729, "grad_norm": 1.7879689931869507, "learning_rate": 6.294440281582428e-07, "loss": 0.4368, "step": 26 }, { "epoch": 0.004596527068437181, "grad_norm": 1.7607426643371582, "learning_rate": 6.367352252705181e-07, "loss": 0.3931, "step": 27 }, { "epoch": 0.004766768811712632, "grad_norm": 1.5092447996139526, "learning_rate": 6.437612283905613e-07, "loss": 0.3544, "step": 28 }, { "epoch": 0.004937010554988083, "grad_norm": 2.1652700901031494, "learning_rate": 6.505406535664675e-07, "loss": 0.3799, "step": 29 }, { "epoch": 0.005107252298263534, "grad_norm": 1.570780634880066, "learning_rate": 6.57090222923462e-07, "loss": 0.3741, "step": 30 }, { "epoch": 0.0052774940415389856, "grad_norm": 1.539757251739502, "learning_rate": 6.634250131666118e-07, "loss": 0.3894, "step": 31 }, { "epoch": 0.005447735784814436, "grad_norm": 1.5071591138839722, "learning_rate": 6.695586646169908e-07, "loss": 0.3559, "step": 32 }, { "epoch": 0.0056179775280898875, "grad_norm": 1.2861617803573608, "learning_rate": 6.755035580718894e-07, "loss": 0.2831, "step": 33 }, { "epoch": 0.005788219271365339, "grad_norm": 1.2855749130249023, "learning_rate": 6.812709652552277e-07, "loss": 0.3348, "step": 34 }, { "epoch": 0.00595846101464079, "grad_norm": 1.649814248085022, "learning_rate": 6.868711774536562e-07, "loss": 0.3381, "step": 35 }, { "epoch": 0.006128702757916241, "grad_norm": 1.4062033891677856, "learning_rate": 6.923136160271417e-07, "loss": 0.343, "step": 36 }, { "epoch": 0.006298944501191692, "grad_norm": 1.3435696363449097, "learning_rate": 6.976069277750015e-07, "loss": 0.3239, "step": 37 }, { "epoch": 0.006469186244467144, "grad_norm": 1.419482946395874, "learning_rate": 7.027590675816104e-07, "loss": 0.3034, "step": 38 }, { "epoch": 0.006639427987742594, "grad_norm": 1.3096425533294678, "learning_rate": 7.077773703250174e-07, "loss": 0.3282, "step": 39 }, { "epoch": 0.0068096697310180455, "grad_norm": 1.3494988679885864, "learning_rate": 7.126686136800855e-07, "loss": 0.3264, "step": 40 }, { "epoch": 0.006979911474293497, "grad_norm": 1.2407349348068237, "learning_rate": 7.174390731656332e-07, "loss": 0.3306, "step": 41 }, { "epoch": 0.007150153217568948, "grad_norm": 1.4115320444107056, "learning_rate": 7.220945705573361e-07, "loss": 0.3093, "step": 42 }, { "epoch": 0.007320394960844399, "grad_norm": 1.4195406436920166, "learning_rate": 7.266405166033159e-07, "loss": 0.3102, "step": 43 }, { "epoch": 0.00749063670411985, "grad_norm": 1.1780062913894653, "learning_rate": 7.31081948828513e-07, "loss": 0.3072, "step": 44 }, { "epoch": 0.007660878447395302, "grad_norm": 1.3273332118988037, "learning_rate": 7.354235650902366e-07, "loss": 0.3161, "step": 45 }, { "epoch": 0.007831120190670752, "grad_norm": 1.2345938682556152, "learning_rate": 7.396697534453494e-07, "loss": 0.312, "step": 46 }, { "epoch": 0.008001361933946204, "grad_norm": 1.327564001083374, "learning_rate": 7.438246188051406e-07, "loss": 0.3177, "step": 47 }, { "epoch": 0.008171603677221655, "grad_norm": 1.195912480354309, "learning_rate": 7.478920067837654e-07, "loss": 0.3031, "step": 48 }, { "epoch": 0.008341845420497106, "grad_norm": 1.3030645847320557, "learning_rate": 7.518755250875302e-07, "loss": 0.3009, "step": 49 }, { "epoch": 0.008512087163772558, "grad_norm": 1.2128993272781372, "learning_rate": 7.557785627431804e-07, "loss": 0.3139, "step": 50 }, { "epoch": 0.008682328907048007, "grad_norm": 1.1837964057922363, "learning_rate": 7.596043074220024e-07, "loss": 0.2816, "step": 51 }, { "epoch": 0.008852570650323459, "grad_norm": 1.1090530157089233, "learning_rate": 7.633557610816411e-07, "loss": 0.2451, "step": 52 }, { "epoch": 0.00902281239359891, "grad_norm": 1.2700027227401733, "learning_rate": 7.670357541179365e-07, "loss": 0.2702, "step": 53 }, { "epoch": 0.009193054136874362, "grad_norm": 1.1058295965194702, "learning_rate": 7.706469581939163e-07, "loss": 0.2632, "step": 54 }, { "epoch": 0.009363295880149813, "grad_norm": 1.152600884437561, "learning_rate": 7.741918978916079e-07, "loss": 0.2693, "step": 55 }, { "epoch": 0.009533537623425264, "grad_norm": 1.1038074493408203, "learning_rate": 7.776729613139597e-07, "loss": 0.2361, "step": 56 }, { "epoch": 0.009703779366700716, "grad_norm": 1.298447847366333, "learning_rate": 7.81092409748385e-07, "loss": 0.2814, "step": 57 }, { "epoch": 0.009874021109976166, "grad_norm": 1.1311403512954712, "learning_rate": 7.844523864898656e-07, "loss": 0.2706, "step": 58 }, { "epoch": 0.010044262853251617, "grad_norm": 1.2915065288543701, "learning_rate": 7.877549249098274e-07, "loss": 0.2774, "step": 59 }, { "epoch": 0.010214504596527068, "grad_norm": 1.0150870084762573, "learning_rate": 7.910019558468602e-07, "loss": 0.2325, "step": 60 }, { "epoch": 0.01038474633980252, "grad_norm": 1.2452411651611328, "learning_rate": 7.941953143865467e-07, "loss": 0.2873, "step": 61 }, { "epoch": 0.010554988083077971, "grad_norm": 1.3862929344177246, "learning_rate": 7.973367460900099e-07, "loss": 0.3041, "step": 62 }, { "epoch": 0.010725229826353423, "grad_norm": 1.0656795501708984, "learning_rate": 8.004279127241105e-07, "loss": 0.2215, "step": 63 }, { "epoch": 0.010895471569628872, "grad_norm": 1.3162792921066284, "learning_rate": 8.034703975403888e-07, "loss": 0.3019, "step": 64 }, { "epoch": 0.011065713312904324, "grad_norm": 1.0575990676879883, "learning_rate": 8.064657101447357e-07, "loss": 0.2394, "step": 65 }, { "epoch": 0.011235955056179775, "grad_norm": 1.0472103357315063, "learning_rate": 8.094152909952874e-07, "loss": 0.246, "step": 66 }, { "epoch": 0.011406196799455226, "grad_norm": 1.1870509386062622, "learning_rate": 8.123205155620937e-07, "loss": 0.2323, "step": 67 }, { "epoch": 0.011576438542730678, "grad_norm": 1.0447919368743896, "learning_rate": 8.15182698178626e-07, "loss": 0.2372, "step": 68 }, { "epoch": 0.01174668028600613, "grad_norm": 1.3799355030059814, "learning_rate": 8.18003095612124e-07, "loss": 0.2981, "step": 69 }, { "epoch": 0.01191692202928158, "grad_norm": 1.3175947666168213, "learning_rate": 8.207829103770545e-07, "loss": 0.2427, "step": 70 }, { "epoch": 0.01208716377255703, "grad_norm": 1.8826605081558228, "learning_rate": 8.235232938135481e-07, "loss": 0.2829, "step": 71 }, { "epoch": 0.012257405515832482, "grad_norm": 1.0391643047332764, "learning_rate": 8.262253489505398e-07, "loss": 0.2228, "step": 72 }, { "epoch": 0.012427647259107933, "grad_norm": 0.9970806241035461, "learning_rate": 8.288901331714316e-07, "loss": 0.2154, "step": 73 }, { "epoch": 0.012597889002383384, "grad_norm": 1.1637834310531616, "learning_rate": 8.315186606983998e-07, "loss": 0.2596, "step": 74 }, { "epoch": 0.012768130745658836, "grad_norm": 1.374963402748108, "learning_rate": 8.34111904909955e-07, "loss": 0.2749, "step": 75 }, { "epoch": 0.012938372488934287, "grad_norm": 1.0730386972427368, "learning_rate": 8.366708005050085e-07, "loss": 0.2227, "step": 76 }, { "epoch": 0.013108614232209739, "grad_norm": 1.1448426246643066, "learning_rate": 8.391962455254819e-07, "loss": 0.2361, "step": 77 }, { "epoch": 0.013278855975485188, "grad_norm": 1.2940086126327515, "learning_rate": 8.416891032484155e-07, "loss": 0.3044, "step": 78 }, { "epoch": 0.01344909771876064, "grad_norm": 1.3453335762023926, "learning_rate": 8.441502039575513e-07, "loss": 0.3157, "step": 79 }, { "epoch": 0.013619339462036091, "grad_norm": 1.1563291549682617, "learning_rate": 8.465803466034837e-07, "loss": 0.2501, "step": 80 }, { "epoch": 0.013789581205311542, "grad_norm": 1.1329809427261353, "learning_rate": 8.489803003606909e-07, "loss": 0.2429, "step": 81 }, { "epoch": 0.013959822948586994, "grad_norm": 1.0661827325820923, "learning_rate": 8.513508060890314e-07, "loss": 0.2232, "step": 82 }, { "epoch": 0.014130064691862445, "grad_norm": 1.1219416856765747, "learning_rate": 8.536925777066614e-07, "loss": 0.2142, "step": 83 }, { "epoch": 0.014300306435137897, "grad_norm": 1.2325749397277832, "learning_rate": 8.560063034807341e-07, "loss": 0.2122, "step": 84 }, { "epoch": 0.014470548178413346, "grad_norm": 1.0120998620986938, "learning_rate": 8.582926472417208e-07, "loss": 0.2147, "step": 85 }, { "epoch": 0.014640789921688798, "grad_norm": 1.0740280151367188, "learning_rate": 8.605522495267141e-07, "loss": 0.2271, "step": 86 }, { "epoch": 0.01481103166496425, "grad_norm": 1.309480905532837, "learning_rate": 8.627857286566401e-07, "loss": 0.2563, "step": 87 }, { "epoch": 0.0149812734082397, "grad_norm": 1.108237385749817, "learning_rate": 8.649936817519112e-07, "loss": 0.2231, "step": 88 }, { "epoch": 0.015151515151515152, "grad_norm": 1.1064398288726807, "learning_rate": 8.671766856906931e-07, "loss": 0.2516, "step": 89 }, { "epoch": 0.015321756894790603, "grad_norm": 1.2427948713302612, "learning_rate": 8.693352980136347e-07, "loss": 0.2604, "step": 90 }, { "epoch": 0.015491998638066053, "grad_norm": 1.1325477361679077, "learning_rate": 8.714700577786097e-07, "loss": 0.2314, "step": 91 }, { "epoch": 0.015662240381341504, "grad_norm": 0.9738770127296448, "learning_rate": 8.735814863687475e-07, "loss": 0.2126, "step": 92 }, { "epoch": 0.015832482124616958, "grad_norm": 1.1820428371429443, "learning_rate": 8.756700882567846e-07, "loss": 0.2116, "step": 93 }, { "epoch": 0.016002723867892407, "grad_norm": 1.0578908920288086, "learning_rate": 8.777363517285388e-07, "loss": 0.2284, "step": 94 }, { "epoch": 0.016172965611167857, "grad_norm": 1.2253124713897705, "learning_rate": 8.797807495681034e-07, "loss": 0.2492, "step": 95 }, { "epoch": 0.01634320735444331, "grad_norm": 1.4362291097640991, "learning_rate": 8.818037397071634e-07, "loss": 0.2468, "step": 96 }, { "epoch": 0.01651344909771876, "grad_norm": 1.2666908502578735, "learning_rate": 8.838057658406682e-07, "loss": 0.2349, "step": 97 }, { "epoch": 0.016683690840994213, "grad_norm": 1.0576752424240112, "learning_rate": 8.857872580109284e-07, "loss": 0.2142, "step": 98 }, { "epoch": 0.016853932584269662, "grad_norm": 0.937912106513977, "learning_rate": 8.877486331620622e-07, "loss": 0.1829, "step": 99 }, { "epoch": 0.017024174327545116, "grad_norm": 1.2614257335662842, "learning_rate": 8.896902956665787e-07, "loss": 0.2405, "step": 100 }, { "epoch": 0.017194416070820565, "grad_norm": 1.2120734453201294, "learning_rate": 8.916126378257612e-07, "loss": 0.2347, "step": 101 }, { "epoch": 0.017364657814096015, "grad_norm": 1.108526349067688, "learning_rate": 8.935160403454004e-07, "loss": 0.1874, "step": 102 }, { "epoch": 0.017534899557371468, "grad_norm": 1.0109800100326538, "learning_rate": 8.954008727883201e-07, "loss": 0.2053, "step": 103 }, { "epoch": 0.017705141300646918, "grad_norm": 1.0737659931182861, "learning_rate": 8.972674940050391e-07, "loss": 0.2015, "step": 104 }, { "epoch": 0.01787538304392237, "grad_norm": 1.0084854364395142, "learning_rate": 8.991162525438289e-07, "loss": 0.178, "step": 105 }, { "epoch": 0.01804562478719782, "grad_norm": 1.0313851833343506, "learning_rate": 9.009474870413346e-07, "loss": 0.1977, "step": 106 }, { "epoch": 0.018215866530473274, "grad_norm": 1.1986593008041382, "learning_rate": 9.02761526594856e-07, "loss": 0.2481, "step": 107 }, { "epoch": 0.018386108273748723, "grad_norm": 1.0787945985794067, "learning_rate": 9.045586911173146e-07, "loss": 0.2128, "step": 108 }, { "epoch": 0.018556350017024173, "grad_norm": 1.36089289188385, "learning_rate": 9.063392916758576e-07, "loss": 0.2232, "step": 109 }, { "epoch": 0.018726591760299626, "grad_norm": 1.1465134620666504, "learning_rate": 9.081036308150061e-07, "loss": 0.2174, "step": 110 }, { "epoch": 0.018896833503575076, "grad_norm": 1.1831120252609253, "learning_rate": 9.098520028651742e-07, "loss": 0.2266, "step": 111 }, { "epoch": 0.01906707524685053, "grad_norm": 1.2039859294891357, "learning_rate": 9.115846942373576e-07, "loss": 0.1919, "step": 112 }, { "epoch": 0.01923731699012598, "grad_norm": 1.119031548500061, "learning_rate": 9.133019837047214e-07, "loss": 0.2067, "step": 113 }, { "epoch": 0.01940755873340143, "grad_norm": 1.1545226573944092, "learning_rate": 9.150041426717831e-07, "loss": 0.2066, "step": 114 }, { "epoch": 0.01957780047667688, "grad_norm": 1.141434669494629, "learning_rate": 9.166914354318424e-07, "loss": 0.2301, "step": 115 }, { "epoch": 0.01974804221995233, "grad_norm": 0.9759832620620728, "learning_rate": 9.183641194132636e-07, "loss": 0.1774, "step": 116 }, { "epoch": 0.019918283963227784, "grad_norm": 1.1361424922943115, "learning_rate": 9.200224454151901e-07, "loss": 0.2169, "step": 117 }, { "epoch": 0.020088525706503234, "grad_norm": 1.0914556980133057, "learning_rate": 9.216666578332256e-07, "loss": 0.188, "step": 118 }, { "epoch": 0.020258767449778687, "grad_norm": 1.4587888717651367, "learning_rate": 9.232969948755948e-07, "loss": 0.2582, "step": 119 }, { "epoch": 0.020429009193054137, "grad_norm": 1.1200069189071655, "learning_rate": 9.249136887702583e-07, "loss": 0.2058, "step": 120 }, { "epoch": 0.020599250936329586, "grad_norm": 1.232216715812683, "learning_rate": 9.265169659634334e-07, "loss": 0.2383, "step": 121 }, { "epoch": 0.02076949267960504, "grad_norm": 1.1444164514541626, "learning_rate": 9.281070473099448e-07, "loss": 0.2153, "step": 122 }, { "epoch": 0.02093973442288049, "grad_norm": 1.0245447158813477, "learning_rate": 9.296841482558059e-07, "loss": 0.1701, "step": 123 }, { "epoch": 0.021109976166155942, "grad_norm": 1.1672013998031616, "learning_rate": 9.312484790134081e-07, "loss": 0.2171, "step": 124 }, { "epoch": 0.021280217909431392, "grad_norm": 1.001262903213501, "learning_rate": 9.328002447296736e-07, "loss": 0.1813, "step": 125 }, { "epoch": 0.021450459652706845, "grad_norm": 1.345151662826538, "learning_rate": 9.343396456475086e-07, "loss": 0.2214, "step": 126 }, { "epoch": 0.021620701395982295, "grad_norm": 1.1059603691101074, "learning_rate": 9.358668772608768e-07, "loss": 0.2258, "step": 127 }, { "epoch": 0.021790943139257744, "grad_norm": 1.2178932428359985, "learning_rate": 9.37382130463787e-07, "loss": 0.1938, "step": 128 }, { "epoch": 0.021961184882533197, "grad_norm": 1.2222176790237427, "learning_rate": 9.388855916934886e-07, "loss": 0.2357, "step": 129 }, { "epoch": 0.022131426625808647, "grad_norm": 1.1123160123825073, "learning_rate": 9.403774430681339e-07, "loss": 0.1898, "step": 130 }, { "epoch": 0.0223016683690841, "grad_norm": 1.3277643918991089, "learning_rate": 9.418578625191684e-07, "loss": 0.2397, "step": 131 }, { "epoch": 0.02247191011235955, "grad_norm": 1.0972932577133179, "learning_rate": 9.433270239186857e-07, "loss": 0.187, "step": 132 }, { "epoch": 0.022642151855635003, "grad_norm": 1.1459786891937256, "learning_rate": 9.447850972019773e-07, "loss": 0.1931, "step": 133 }, { "epoch": 0.022812393598910453, "grad_norm": 1.1958963871002197, "learning_rate": 9.462322484854918e-07, "loss": 0.2066, "step": 134 }, { "epoch": 0.022982635342185902, "grad_norm": 1.410596251487732, "learning_rate": 9.476686401804093e-07, "loss": 0.1863, "step": 135 }, { "epoch": 0.023152877085461356, "grad_norm": 1.1559172868728638, "learning_rate": 9.490944311020241e-07, "loss": 0.1947, "step": 136 }, { "epoch": 0.023323118828736805, "grad_norm": 1.2141169309616089, "learning_rate": 9.505097765751215e-07, "loss": 0.2312, "step": 137 }, { "epoch": 0.02349336057201226, "grad_norm": 1.081640362739563, "learning_rate": 9.519148285355222e-07, "loss": 0.204, "step": 138 }, { "epoch": 0.023663602315287708, "grad_norm": 1.086338996887207, "learning_rate": 9.533097356279598e-07, "loss": 0.1895, "step": 139 }, { "epoch": 0.02383384405856316, "grad_norm": 1.0459181070327759, "learning_rate": 9.546946433004524e-07, "loss": 0.1944, "step": 140 }, { "epoch": 0.02400408580183861, "grad_norm": 1.192455530166626, "learning_rate": 9.560696938953133e-07, "loss": 0.1916, "step": 141 }, { "epoch": 0.02417432754511406, "grad_norm": 1.0824813842773438, "learning_rate": 9.574350267369461e-07, "loss": 0.1585, "step": 142 }, { "epoch": 0.024344569288389514, "grad_norm": 1.211179494857788, "learning_rate": 9.587907782165614e-07, "loss": 0.1878, "step": 143 }, { "epoch": 0.024514811031664963, "grad_norm": 1.272672176361084, "learning_rate": 9.60137081873938e-07, "loss": 0.1899, "step": 144 }, { "epoch": 0.024685052774940416, "grad_norm": 1.2296862602233887, "learning_rate": 9.614740684763584e-07, "loss": 0.1851, "step": 145 }, { "epoch": 0.024855294518215866, "grad_norm": 1.2672345638275146, "learning_rate": 9.628018660948297e-07, "loss": 0.2202, "step": 146 }, { "epoch": 0.02502553626149132, "grad_norm": 1.336018443107605, "learning_rate": 9.641206001777028e-07, "loss": 0.2084, "step": 147 }, { "epoch": 0.02519577800476677, "grad_norm": 1.0895048379898071, "learning_rate": 9.654303936217977e-07, "loss": 0.1939, "step": 148 }, { "epoch": 0.02536601974804222, "grad_norm": 1.124503493309021, "learning_rate": 9.667313668411324e-07, "loss": 0.186, "step": 149 }, { "epoch": 0.02553626149131767, "grad_norm": 1.1752759218215942, "learning_rate": 9.680236378333531e-07, "loss": 0.1991, "step": 150 }, { "epoch": 0.02570650323459312, "grad_norm": 1.1944782733917236, "learning_rate": 9.693073222439593e-07, "loss": 0.2166, "step": 151 }, { "epoch": 0.025876744977868574, "grad_norm": 1.1967934370040894, "learning_rate": 9.705825334284067e-07, "loss": 0.2066, "step": 152 }, { "epoch": 0.026046986721144024, "grad_norm": 1.2898709774017334, "learning_rate": 9.71849382512175e-07, "loss": 0.2167, "step": 153 }, { "epoch": 0.026217228464419477, "grad_norm": 1.0959877967834473, "learning_rate": 9.7310797844888e-07, "loss": 0.1884, "step": 154 }, { "epoch": 0.026387470207694927, "grad_norm": 1.192084789276123, "learning_rate": 9.743584280765029e-07, "loss": 0.2111, "step": 155 }, { "epoch": 0.026557711950970377, "grad_norm": 1.4502787590026855, "learning_rate": 9.756008361718137e-07, "loss": 0.2387, "step": 156 }, { "epoch": 0.02672795369424583, "grad_norm": 1.3559253215789795, "learning_rate": 9.76835305503054e-07, "loss": 0.2201, "step": 157 }, { "epoch": 0.02689819543752128, "grad_norm": 1.1862843036651611, "learning_rate": 9.780619368809492e-07, "loss": 0.1881, "step": 158 }, { "epoch": 0.027068437180796732, "grad_norm": 1.2608420848846436, "learning_rate": 9.792808292081091e-07, "loss": 0.2009, "step": 159 }, { "epoch": 0.027238678924072182, "grad_norm": 1.3061561584472656, "learning_rate": 9.804920795268817e-07, "loss": 0.2324, "step": 160 }, { "epoch": 0.027408920667347635, "grad_norm": 1.3358960151672363, "learning_rate": 9.816957830657163e-07, "loss": 0.2116, "step": 161 }, { "epoch": 0.027579162410623085, "grad_norm": 1.0722428560256958, "learning_rate": 9.828920332840889e-07, "loss": 0.1641, "step": 162 }, { "epoch": 0.027749404153898535, "grad_norm": 1.2872867584228516, "learning_rate": 9.840809219160487e-07, "loss": 0.1849, "step": 163 }, { "epoch": 0.027919645897173988, "grad_norm": 1.3819077014923096, "learning_rate": 9.852625390124294e-07, "loss": 0.1933, "step": 164 }, { "epoch": 0.028089887640449437, "grad_norm": 1.278441309928894, "learning_rate": 9.864369729817805e-07, "loss": 0.1835, "step": 165 }, { "epoch": 0.02826012938372489, "grad_norm": 1.1808550357818604, "learning_rate": 9.876043106300596e-07, "loss": 0.166, "step": 166 }, { "epoch": 0.02843037112700034, "grad_norm": 1.233665108680725, "learning_rate": 9.887646371991337e-07, "loss": 0.1918, "step": 167 }, { "epoch": 0.028600612870275793, "grad_norm": 1.1117044687271118, "learning_rate": 9.899180364041324e-07, "loss": 0.1699, "step": 168 }, { "epoch": 0.028770854613551243, "grad_norm": 1.112967848777771, "learning_rate": 9.910645904696893e-07, "loss": 0.1629, "step": 169 }, { "epoch": 0.028941096356826693, "grad_norm": 1.1570054292678833, "learning_rate": 9.92204380165119e-07, "loss": 0.1729, "step": 170 }, { "epoch": 0.029111338100102146, "grad_norm": 1.1414086818695068, "learning_rate": 9.933374848385576e-07, "loss": 0.1659, "step": 171 }, { "epoch": 0.029281579843377595, "grad_norm": 1.2004859447479248, "learning_rate": 9.944639824501122e-07, "loss": 0.1835, "step": 172 }, { "epoch": 0.02945182158665305, "grad_norm": 1.2365108728408813, "learning_rate": 9.95583949604046e-07, "loss": 0.2133, "step": 173 }, { "epoch": 0.0296220633299285, "grad_norm": 1.2216784954071045, "learning_rate": 9.966974615800383e-07, "loss": 0.1848, "step": 174 }, { "epoch": 0.029792305073203948, "grad_norm": 1.2305302619934082, "learning_rate": 9.978045923635475e-07, "loss": 0.2026, "step": 175 }, { "epoch": 0.0299625468164794, "grad_norm": 1.3286123275756836, "learning_rate": 9.989054146753091e-07, "loss": 0.1817, "step": 176 }, { "epoch": 0.03013278855975485, "grad_norm": 1.3320600986480713, "learning_rate": 1e-06, "loss": 0.2147, "step": 177 }, { "epoch": 0.030303030303030304, "grad_norm": 1.2844750881195068, "learning_rate": 1e-06, "loss": 0.1893, "step": 178 }, { "epoch": 0.030473272046305754, "grad_norm": 1.1248033046722412, "learning_rate": 1e-06, "loss": 0.1617, "step": 179 }, { "epoch": 0.030643513789581207, "grad_norm": 1.3026221990585327, "learning_rate": 1e-06, "loss": 0.1815, "step": 180 }, { "epoch": 0.030813755532856656, "grad_norm": 1.2536060810089111, "learning_rate": 1e-06, "loss": 0.1852, "step": 181 }, { "epoch": 0.030983997276132106, "grad_norm": 1.1810585260391235, "learning_rate": 1e-06, "loss": 0.1712, "step": 182 }, { "epoch": 0.03115423901940756, "grad_norm": 1.4999784231185913, "learning_rate": 1e-06, "loss": 0.2461, "step": 183 }, { "epoch": 0.03132448076268301, "grad_norm": 1.1074563264846802, "learning_rate": 1e-06, "loss": 0.1796, "step": 184 }, { "epoch": 0.03149472250595846, "grad_norm": 1.0583021640777588, "learning_rate": 1e-06, "loss": 0.1603, "step": 185 }, { "epoch": 0.031664964249233915, "grad_norm": 1.1220263242721558, "learning_rate": 1e-06, "loss": 0.1732, "step": 186 }, { "epoch": 0.031835205992509365, "grad_norm": 1.1038565635681152, "learning_rate": 1e-06, "loss": 0.1766, "step": 187 }, { "epoch": 0.032005447735784814, "grad_norm": 1.2731438875198364, "learning_rate": 1e-06, "loss": 0.1993, "step": 188 }, { "epoch": 0.032175689479060264, "grad_norm": 1.569389820098877, "learning_rate": 1e-06, "loss": 0.1862, "step": 189 }, { "epoch": 0.032345931222335714, "grad_norm": 1.1800955533981323, "learning_rate": 1e-06, "loss": 0.1411, "step": 190 }, { "epoch": 0.03251617296561117, "grad_norm": 1.2948437929153442, "learning_rate": 1e-06, "loss": 0.186, "step": 191 }, { "epoch": 0.03268641470888662, "grad_norm": 1.4020929336547852, "learning_rate": 1e-06, "loss": 0.1837, "step": 192 }, { "epoch": 0.03285665645216207, "grad_norm": 1.25234055519104, "learning_rate": 1e-06, "loss": 0.1645, "step": 193 }, { "epoch": 0.03302689819543752, "grad_norm": 1.349990725517273, "learning_rate": 1e-06, "loss": 0.2217, "step": 194 }, { "epoch": 0.03319713993871297, "grad_norm": 1.2276535034179688, "learning_rate": 1e-06, "loss": 0.1825, "step": 195 }, { "epoch": 0.033367381681988426, "grad_norm": 1.2954577207565308, "learning_rate": 1e-06, "loss": 0.1811, "step": 196 }, { "epoch": 0.033537623425263875, "grad_norm": 1.2191767692565918, "learning_rate": 1e-06, "loss": 0.195, "step": 197 }, { "epoch": 0.033707865168539325, "grad_norm": 1.1188750267028809, "learning_rate": 1e-06, "loss": 0.1505, "step": 198 }, { "epoch": 0.033878106911814775, "grad_norm": 1.246447205543518, "learning_rate": 1e-06, "loss": 0.1615, "step": 199 }, { "epoch": 0.03404834865509023, "grad_norm": 1.1524237394332886, "learning_rate": 1e-06, "loss": 0.1858, "step": 200 }, { "epoch": 0.03421859039836568, "grad_norm": 1.1863471269607544, "learning_rate": 1e-06, "loss": 0.142, "step": 201 }, { "epoch": 0.03438883214164113, "grad_norm": 1.2720561027526855, "learning_rate": 1e-06, "loss": 0.1689, "step": 202 }, { "epoch": 0.03455907388491658, "grad_norm": 1.3305280208587646, "learning_rate": 1e-06, "loss": 0.1679, "step": 203 }, { "epoch": 0.03472931562819203, "grad_norm": 1.2643027305603027, "learning_rate": 1e-06, "loss": 0.1906, "step": 204 }, { "epoch": 0.034899557371467486, "grad_norm": 1.2647238969802856, "learning_rate": 1e-06, "loss": 0.192, "step": 205 }, { "epoch": 0.035069799114742936, "grad_norm": 1.4146193265914917, "learning_rate": 1e-06, "loss": 0.1747, "step": 206 }, { "epoch": 0.035240040858018386, "grad_norm": 1.366545557975769, "learning_rate": 1e-06, "loss": 0.182, "step": 207 }, { "epoch": 0.035410282601293835, "grad_norm": 1.150009036064148, "learning_rate": 1e-06, "loss": 0.1536, "step": 208 }, { "epoch": 0.035580524344569285, "grad_norm": 1.2122458219528198, "learning_rate": 1e-06, "loss": 0.1717, "step": 209 }, { "epoch": 0.03575076608784474, "grad_norm": 1.1708459854125977, "learning_rate": 1e-06, "loss": 0.1685, "step": 210 }, { "epoch": 0.03592100783112019, "grad_norm": 1.1359509229660034, "learning_rate": 1e-06, "loss": 0.1628, "step": 211 }, { "epoch": 0.03609124957439564, "grad_norm": 2.6132564544677734, "learning_rate": 1e-06, "loss": 0.2673, "step": 212 }, { "epoch": 0.03626149131767109, "grad_norm": 1.2932703495025635, "learning_rate": 1e-06, "loss": 0.1568, "step": 213 }, { "epoch": 0.03643173306094655, "grad_norm": 1.6617991924285889, "learning_rate": 1e-06, "loss": 0.1944, "step": 214 }, { "epoch": 0.036601974804222, "grad_norm": 2.0360565185546875, "learning_rate": 1e-06, "loss": 0.2282, "step": 215 }, { "epoch": 0.03677221654749745, "grad_norm": 1.231723666191101, "learning_rate": 1e-06, "loss": 0.1452, "step": 216 }, { "epoch": 0.036942458290772896, "grad_norm": 1.3259527683258057, "learning_rate": 1e-06, "loss": 0.167, "step": 217 }, { "epoch": 0.037112700034048346, "grad_norm": 1.054499626159668, "learning_rate": 1e-06, "loss": 0.1342, "step": 218 }, { "epoch": 0.0372829417773238, "grad_norm": 1.4665144681930542, "learning_rate": 1e-06, "loss": 0.1978, "step": 219 }, { "epoch": 0.03745318352059925, "grad_norm": 1.3940861225128174, "learning_rate": 1e-06, "loss": 0.1971, "step": 220 }, { "epoch": 0.0376234252638747, "grad_norm": 1.164638638496399, "learning_rate": 1e-06, "loss": 0.1727, "step": 221 }, { "epoch": 0.03779366700715015, "grad_norm": 1.1719894409179688, "learning_rate": 1e-06, "loss": 0.1426, "step": 222 }, { "epoch": 0.0379639087504256, "grad_norm": 1.1395282745361328, "learning_rate": 1e-06, "loss": 0.1544, "step": 223 }, { "epoch": 0.03813415049370106, "grad_norm": 1.2896770238876343, "learning_rate": 1e-06, "loss": 0.1709, "step": 224 }, { "epoch": 0.03830439223697651, "grad_norm": 1.1964484453201294, "learning_rate": 1e-06, "loss": 0.1723, "step": 225 }, { "epoch": 0.03847463398025196, "grad_norm": 1.2355952262878418, "learning_rate": 1e-06, "loss": 0.1388, "step": 226 }, { "epoch": 0.03864487572352741, "grad_norm": 1.3132867813110352, "learning_rate": 1e-06, "loss": 0.174, "step": 227 }, { "epoch": 0.03881511746680286, "grad_norm": 1.175132393836975, "learning_rate": 1e-06, "loss": 0.1533, "step": 228 }, { "epoch": 0.03898535921007831, "grad_norm": 1.4419723749160767, "learning_rate": 1e-06, "loss": 0.1839, "step": 229 }, { "epoch": 0.03915560095335376, "grad_norm": 1.1911609172821045, "learning_rate": 1e-06, "loss": 0.1473, "step": 230 }, { "epoch": 0.03932584269662921, "grad_norm": 1.047421932220459, "learning_rate": 1e-06, "loss": 0.1437, "step": 231 }, { "epoch": 0.03949608443990466, "grad_norm": 1.2604156732559204, "learning_rate": 1e-06, "loss": 0.1651, "step": 232 }, { "epoch": 0.03966632618318012, "grad_norm": 1.2047826051712036, "learning_rate": 1e-06, "loss": 0.1587, "step": 233 }, { "epoch": 0.03983656792645557, "grad_norm": 1.0610707998275757, "learning_rate": 1e-06, "loss": 0.1317, "step": 234 }, { "epoch": 0.04000680966973102, "grad_norm": 1.1626613140106201, "learning_rate": 1e-06, "loss": 0.1543, "step": 235 }, { "epoch": 0.04017705141300647, "grad_norm": 1.0833077430725098, "learning_rate": 1e-06, "loss": 0.1316, "step": 236 }, { "epoch": 0.04034729315628192, "grad_norm": 1.4806079864501953, "learning_rate": 1e-06, "loss": 0.1699, "step": 237 }, { "epoch": 0.040517534899557374, "grad_norm": 1.360552191734314, "learning_rate": 1e-06, "loss": 0.1559, "step": 238 }, { "epoch": 0.040687776642832824, "grad_norm": 1.2109061479568481, "learning_rate": 1e-06, "loss": 0.1507, "step": 239 }, { "epoch": 0.04085801838610827, "grad_norm": 1.1271363496780396, "learning_rate": 1e-06, "loss": 0.1522, "step": 240 }, { "epoch": 0.04102826012938372, "grad_norm": 1.3026715517044067, "learning_rate": 1e-06, "loss": 0.1586, "step": 241 }, { "epoch": 0.04119850187265917, "grad_norm": 1.589646816253662, "learning_rate": 1e-06, "loss": 0.2006, "step": 242 }, { "epoch": 0.04136874361593463, "grad_norm": 1.1894760131835938, "learning_rate": 1e-06, "loss": 0.1547, "step": 243 }, { "epoch": 0.04153898535921008, "grad_norm": 1.1556522846221924, "learning_rate": 1e-06, "loss": 0.1509, "step": 244 }, { "epoch": 0.04170922710248553, "grad_norm": 2.4897043704986572, "learning_rate": 1e-06, "loss": 0.2587, "step": 245 }, { "epoch": 0.04187946884576098, "grad_norm": 1.3815762996673584, "learning_rate": 1e-06, "loss": 0.1604, "step": 246 }, { "epoch": 0.042049710589036435, "grad_norm": 1.4068046808242798, "learning_rate": 1e-06, "loss": 0.1628, "step": 247 }, { "epoch": 0.042219952332311884, "grad_norm": 1.4070250988006592, "learning_rate": 1e-06, "loss": 0.1631, "step": 248 }, { "epoch": 0.042390194075587334, "grad_norm": 1.1561863422393799, "learning_rate": 1e-06, "loss": 0.1488, "step": 249 }, { "epoch": 0.042560435818862784, "grad_norm": 1.0689971446990967, "learning_rate": 1e-06, "loss": 0.1318, "step": 250 }, { "epoch": 0.04273067756213823, "grad_norm": 1.1282958984375, "learning_rate": 1e-06, "loss": 0.1243, "step": 251 }, { "epoch": 0.04290091930541369, "grad_norm": 1.2809712886810303, "learning_rate": 1e-06, "loss": 0.1609, "step": 252 }, { "epoch": 0.04307116104868914, "grad_norm": 1.1898773908615112, "learning_rate": 1e-06, "loss": 0.1653, "step": 253 }, { "epoch": 0.04324140279196459, "grad_norm": 1.215122938156128, "learning_rate": 1e-06, "loss": 0.1459, "step": 254 }, { "epoch": 0.04341164453524004, "grad_norm": 1.4629710912704468, "learning_rate": 1e-06, "loss": 0.1945, "step": 255 }, { "epoch": 0.04358188627851549, "grad_norm": 1.206394910812378, "learning_rate": 1e-06, "loss": 0.149, "step": 256 }, { "epoch": 0.043752128021790945, "grad_norm": 1.3022671937942505, "learning_rate": 1e-06, "loss": 0.1529, "step": 257 }, { "epoch": 0.043922369765066395, "grad_norm": 1.2597123384475708, "learning_rate": 1e-06, "loss": 0.1411, "step": 258 }, { "epoch": 0.044092611508341845, "grad_norm": 1.3716976642608643, "learning_rate": 1e-06, "loss": 0.1552, "step": 259 }, { "epoch": 0.044262853251617294, "grad_norm": 1.2082346677780151, "learning_rate": 1e-06, "loss": 0.1583, "step": 260 }, { "epoch": 0.04443309499489275, "grad_norm": 1.1413241624832153, "learning_rate": 1e-06, "loss": 0.1197, "step": 261 }, { "epoch": 0.0446033367381682, "grad_norm": 1.346548080444336, "learning_rate": 1e-06, "loss": 0.175, "step": 262 }, { "epoch": 0.04477357848144365, "grad_norm": 1.326627254486084, "learning_rate": 1e-06, "loss": 0.168, "step": 263 }, { "epoch": 0.0449438202247191, "grad_norm": 1.259684681892395, "learning_rate": 1e-06, "loss": 0.1541, "step": 264 }, { "epoch": 0.04511406196799455, "grad_norm": 1.3728045225143433, "learning_rate": 1e-06, "loss": 0.1653, "step": 265 }, { "epoch": 0.045284303711270006, "grad_norm": 1.1758908033370972, "learning_rate": 1e-06, "loss": 0.1519, "step": 266 }, { "epoch": 0.045454545454545456, "grad_norm": 1.4522403478622437, "learning_rate": 1e-06, "loss": 0.1541, "step": 267 }, { "epoch": 0.045624787197820905, "grad_norm": 1.3297243118286133, "learning_rate": 1e-06, "loss": 0.1647, "step": 268 }, { "epoch": 0.045795028941096355, "grad_norm": 1.1865285634994507, "learning_rate": 1e-06, "loss": 0.1415, "step": 269 }, { "epoch": 0.045965270684371805, "grad_norm": 1.2091939449310303, "learning_rate": 1e-06, "loss": 0.1386, "step": 270 }, { "epoch": 0.04613551242764726, "grad_norm": 1.2446978092193604, "learning_rate": 1e-06, "loss": 0.1285, "step": 271 }, { "epoch": 0.04630575417092271, "grad_norm": 1.1907007694244385, "learning_rate": 1e-06, "loss": 0.1607, "step": 272 }, { "epoch": 0.04647599591419816, "grad_norm": 1.2765071392059326, "learning_rate": 1e-06, "loss": 0.1539, "step": 273 }, { "epoch": 0.04664623765747361, "grad_norm": 1.364173173904419, "learning_rate": 1e-06, "loss": 0.1434, "step": 274 }, { "epoch": 0.04681647940074907, "grad_norm": 1.212433934211731, "learning_rate": 1e-06, "loss": 0.1221, "step": 275 }, { "epoch": 0.04698672114402452, "grad_norm": 1.203960657119751, "learning_rate": 1e-06, "loss": 0.1292, "step": 276 }, { "epoch": 0.047156962887299966, "grad_norm": 1.4043571949005127, "learning_rate": 1e-06, "loss": 0.1496, "step": 277 }, { "epoch": 0.047327204630575416, "grad_norm": 1.4482816457748413, "learning_rate": 1e-06, "loss": 0.1747, "step": 278 }, { "epoch": 0.047497446373850866, "grad_norm": 1.1347603797912598, "learning_rate": 1e-06, "loss": 0.1268, "step": 279 }, { "epoch": 0.04766768811712632, "grad_norm": 1.1415354013442993, "learning_rate": 1e-06, "loss": 0.1448, "step": 280 }, { "epoch": 0.04783792986040177, "grad_norm": 1.4771915674209595, "learning_rate": 1e-06, "loss": 0.1596, "step": 281 }, { "epoch": 0.04800817160367722, "grad_norm": 1.176045298576355, "learning_rate": 1e-06, "loss": 0.1207, "step": 282 }, { "epoch": 0.04817841334695267, "grad_norm": 1.3680100440979004, "learning_rate": 1e-06, "loss": 0.1413, "step": 283 }, { "epoch": 0.04834865509022812, "grad_norm": 1.3888130187988281, "learning_rate": 1e-06, "loss": 0.1395, "step": 284 }, { "epoch": 0.04851889683350358, "grad_norm": 1.4310017824172974, "learning_rate": 1e-06, "loss": 0.1395, "step": 285 }, { "epoch": 0.04868913857677903, "grad_norm": 1.4663360118865967, "learning_rate": 1e-06, "loss": 0.1674, "step": 286 }, { "epoch": 0.04885938032005448, "grad_norm": 1.2074915170669556, "learning_rate": 1e-06, "loss": 0.1384, "step": 287 }, { "epoch": 0.049029622063329927, "grad_norm": 1.3467768430709839, "learning_rate": 1e-06, "loss": 0.1702, "step": 288 }, { "epoch": 0.049199863806605376, "grad_norm": 1.192862629890442, "learning_rate": 1e-06, "loss": 0.1347, "step": 289 }, { "epoch": 0.04937010554988083, "grad_norm": 1.304807424545288, "learning_rate": 1e-06, "loss": 0.1459, "step": 290 }, { "epoch": 0.04954034729315628, "grad_norm": 1.309431791305542, "learning_rate": 1e-06, "loss": 0.1651, "step": 291 }, { "epoch": 0.04971058903643173, "grad_norm": 1.534616231918335, "learning_rate": 1e-06, "loss": 0.1481, "step": 292 }, { "epoch": 0.04988083077970718, "grad_norm": 1.2383191585540771, "learning_rate": 1e-06, "loss": 0.1238, "step": 293 }, { "epoch": 0.05005107252298264, "grad_norm": 1.1695579290390015, "learning_rate": 1e-06, "loss": 0.1144, "step": 294 }, { "epoch": 0.05022131426625809, "grad_norm": 1.3590017557144165, "learning_rate": 1e-06, "loss": 0.1376, "step": 295 }, { "epoch": 0.05039155600953354, "grad_norm": 1.186447024345398, "learning_rate": 1e-06, "loss": 0.1336, "step": 296 }, { "epoch": 0.05056179775280899, "grad_norm": 1.2510392665863037, "learning_rate": 1e-06, "loss": 0.1363, "step": 297 }, { "epoch": 0.05073203949608444, "grad_norm": 1.3094967603683472, "learning_rate": 1e-06, "loss": 0.1339, "step": 298 }, { "epoch": 0.050902281239359894, "grad_norm": 1.4106348752975464, "learning_rate": 1e-06, "loss": 0.1527, "step": 299 }, { "epoch": 0.05107252298263534, "grad_norm": 1.2612876892089844, "learning_rate": 1e-06, "loss": 0.1343, "step": 300 }, { "epoch": 0.05124276472591079, "grad_norm": 1.3075928688049316, "learning_rate": 1e-06, "loss": 0.1572, "step": 301 }, { "epoch": 0.05141300646918624, "grad_norm": 1.3252586126327515, "learning_rate": 1e-06, "loss": 0.1491, "step": 302 }, { "epoch": 0.05158324821246169, "grad_norm": 1.3578503131866455, "learning_rate": 1e-06, "loss": 0.1526, "step": 303 }, { "epoch": 0.05175348995573715, "grad_norm": 1.383754849433899, "learning_rate": 1e-06, "loss": 0.1547, "step": 304 }, { "epoch": 0.0519237316990126, "grad_norm": 1.463248372077942, "learning_rate": 1e-06, "loss": 0.171, "step": 305 }, { "epoch": 0.05209397344228805, "grad_norm": 1.3403490781784058, "learning_rate": 1e-06, "loss": 0.1738, "step": 306 }, { "epoch": 0.0522642151855635, "grad_norm": 1.3475086688995361, "learning_rate": 1e-06, "loss": 0.1434, "step": 307 }, { "epoch": 0.052434456928838954, "grad_norm": 1.1605323553085327, "learning_rate": 1e-06, "loss": 0.1362, "step": 308 }, { "epoch": 0.052604698672114404, "grad_norm": 1.2388291358947754, "learning_rate": 1e-06, "loss": 0.1377, "step": 309 }, { "epoch": 0.052774940415389854, "grad_norm": 1.6835265159606934, "learning_rate": 1e-06, "loss": 0.1976, "step": 310 }, { "epoch": 0.052945182158665303, "grad_norm": 1.2333765029907227, "learning_rate": 1e-06, "loss": 0.1284, "step": 311 }, { "epoch": 0.05311542390194075, "grad_norm": 1.581054925918579, "learning_rate": 1e-06, "loss": 0.1709, "step": 312 }, { "epoch": 0.05328566564521621, "grad_norm": 1.165010690689087, "learning_rate": 1e-06, "loss": 0.1258, "step": 313 }, { "epoch": 0.05345590738849166, "grad_norm": 1.209064245223999, "learning_rate": 1e-06, "loss": 0.1223, "step": 314 }, { "epoch": 0.05362614913176711, "grad_norm": 1.3305888175964355, "learning_rate": 1e-06, "loss": 0.1425, "step": 315 }, { "epoch": 0.05379639087504256, "grad_norm": 1.828088402748108, "learning_rate": 1e-06, "loss": 0.1634, "step": 316 }, { "epoch": 0.05396663261831801, "grad_norm": 1.332383394241333, "learning_rate": 1e-06, "loss": 0.1268, "step": 317 }, { "epoch": 0.054136874361593465, "grad_norm": 1.339270830154419, "learning_rate": 1e-06, "loss": 0.1428, "step": 318 }, { "epoch": 0.054307116104868915, "grad_norm": 1.2108736038208008, "learning_rate": 1e-06, "loss": 0.1345, "step": 319 }, { "epoch": 0.054477357848144364, "grad_norm": 1.2508689165115356, "learning_rate": 1e-06, "loss": 0.1251, "step": 320 }, { "epoch": 0.054647599591419814, "grad_norm": 1.0894383192062378, "learning_rate": 1e-06, "loss": 0.1272, "step": 321 }, { "epoch": 0.05481784133469527, "grad_norm": 1.2453547716140747, "learning_rate": 1e-06, "loss": 0.1315, "step": 322 }, { "epoch": 0.05498808307797072, "grad_norm": 1.3076040744781494, "learning_rate": 1e-06, "loss": 0.1178, "step": 323 }, { "epoch": 0.05515832482124617, "grad_norm": 2.6135568618774414, "learning_rate": 1e-06, "loss": 0.1613, "step": 324 }, { "epoch": 0.05532856656452162, "grad_norm": 1.5091561079025269, "learning_rate": 1e-06, "loss": 0.1126, "step": 325 }, { "epoch": 0.05549880830779707, "grad_norm": 1.7307822704315186, "learning_rate": 1e-06, "loss": 0.171, "step": 326 }, { "epoch": 0.055669050051072526, "grad_norm": 1.2100858688354492, "learning_rate": 1e-06, "loss": 0.1094, "step": 327 }, { "epoch": 0.055839291794347976, "grad_norm": 1.2510110139846802, "learning_rate": 1e-06, "loss": 0.1162, "step": 328 }, { "epoch": 0.056009533537623425, "grad_norm": 1.3461787700653076, "learning_rate": 1e-06, "loss": 0.1427, "step": 329 }, { "epoch": 0.056179775280898875, "grad_norm": 1.6088210344314575, "learning_rate": 1e-06, "loss": 0.1648, "step": 330 }, { "epoch": 0.056350017024174325, "grad_norm": 1.4088263511657715, "learning_rate": 1e-06, "loss": 0.1334, "step": 331 }, { "epoch": 0.05652025876744978, "grad_norm": 1.4224536418914795, "learning_rate": 1e-06, "loss": 0.1414, "step": 332 }, { "epoch": 0.05669050051072523, "grad_norm": 1.1860085725784302, "learning_rate": 1e-06, "loss": 0.132, "step": 333 }, { "epoch": 0.05686074225400068, "grad_norm": 1.3678163290023804, "learning_rate": 1e-06, "loss": 0.1496, "step": 334 }, { "epoch": 0.05703098399727613, "grad_norm": 1.4010056257247925, "learning_rate": 1e-06, "loss": 0.1444, "step": 335 }, { "epoch": 0.05720122574055159, "grad_norm": 1.2456624507904053, "learning_rate": 1e-06, "loss": 0.1222, "step": 336 }, { "epoch": 0.057371467483827036, "grad_norm": 1.5460395812988281, "learning_rate": 1e-06, "loss": 0.1385, "step": 337 }, { "epoch": 0.057541709227102486, "grad_norm": 1.2900766134262085, "learning_rate": 1e-06, "loss": 0.1096, "step": 338 }, { "epoch": 0.057711950970377936, "grad_norm": 1.5039972066879272, "learning_rate": 1e-06, "loss": 0.1393, "step": 339 }, { "epoch": 0.057882192713653385, "grad_norm": 1.2665271759033203, "learning_rate": 1e-06, "loss": 0.1264, "step": 340 }, { "epoch": 0.05805243445692884, "grad_norm": 1.4474594593048096, "learning_rate": 1e-06, "loss": 0.1308, "step": 341 }, { "epoch": 0.05822267620020429, "grad_norm": 1.3482589721679688, "learning_rate": 1e-06, "loss": 0.1473, "step": 342 }, { "epoch": 0.05839291794347974, "grad_norm": 1.3561499118804932, "learning_rate": 1e-06, "loss": 0.1227, "step": 343 }, { "epoch": 0.05856315968675519, "grad_norm": 1.194254755973816, "learning_rate": 1e-06, "loss": 0.116, "step": 344 }, { "epoch": 0.05873340143003064, "grad_norm": 1.2828115224838257, "learning_rate": 1e-06, "loss": 0.1218, "step": 345 }, { "epoch": 0.0589036431733061, "grad_norm": 2.596560001373291, "learning_rate": 1e-06, "loss": 0.2237, "step": 346 }, { "epoch": 0.05907388491658155, "grad_norm": 1.3315943479537964, "learning_rate": 1e-06, "loss": 0.1271, "step": 347 }, { "epoch": 0.059244126659857, "grad_norm": 1.2427936792373657, "learning_rate": 1e-06, "loss": 0.1181, "step": 348 }, { "epoch": 0.059414368403132446, "grad_norm": 1.223074197769165, "learning_rate": 1e-06, "loss": 0.1073, "step": 349 }, { "epoch": 0.059584610146407896, "grad_norm": 1.4834375381469727, "learning_rate": 1e-06, "loss": 0.1435, "step": 350 }, { "epoch": 0.05975485188968335, "grad_norm": 1.4834375381469727, "learning_rate": 1e-06, "loss": 0.1392, "step": 351 }, { "epoch": 0.0599250936329588, "grad_norm": 1.2152388095855713, "learning_rate": 1e-06, "loss": 0.1158, "step": 352 }, { "epoch": 0.06009533537623425, "grad_norm": 1.3129734992980957, "learning_rate": 1e-06, "loss": 0.1272, "step": 353 }, { "epoch": 0.0602655771195097, "grad_norm": 1.3754154443740845, "learning_rate": 1e-06, "loss": 0.1416, "step": 354 }, { "epoch": 0.06043581886278516, "grad_norm": 1.2157810926437378, "learning_rate": 1e-06, "loss": 0.1207, "step": 355 }, { "epoch": 0.06060606060606061, "grad_norm": 1.3933988809585571, "learning_rate": 1e-06, "loss": 0.1504, "step": 356 }, { "epoch": 0.06077630234933606, "grad_norm": 1.3696870803833008, "learning_rate": 1e-06, "loss": 0.1303, "step": 357 }, { "epoch": 0.06094654409261151, "grad_norm": 1.324859380722046, "learning_rate": 1e-06, "loss": 0.1344, "step": 358 }, { "epoch": 0.06111678583588696, "grad_norm": 1.1997361183166504, "learning_rate": 1e-06, "loss": 0.1165, "step": 359 }, { "epoch": 0.06128702757916241, "grad_norm": 1.3134782314300537, "learning_rate": 1e-06, "loss": 0.1269, "step": 360 }, { "epoch": 0.06145726932243786, "grad_norm": 2.7401535511016846, "learning_rate": 1e-06, "loss": 0.253, "step": 361 }, { "epoch": 0.06162751106571331, "grad_norm": 1.3712646961212158, "learning_rate": 1e-06, "loss": 0.138, "step": 362 }, { "epoch": 0.06179775280898876, "grad_norm": 1.2538585662841797, "learning_rate": 1e-06, "loss": 0.1225, "step": 363 }, { "epoch": 0.06196799455226421, "grad_norm": 1.2842880487442017, "learning_rate": 1e-06, "loss": 0.1301, "step": 364 }, { "epoch": 0.06213823629553967, "grad_norm": 1.1870968341827393, "learning_rate": 1e-06, "loss": 0.1116, "step": 365 }, { "epoch": 0.06230847803881512, "grad_norm": 1.3063998222351074, "learning_rate": 1e-06, "loss": 0.124, "step": 366 }, { "epoch": 0.06247871978209057, "grad_norm": 1.3379669189453125, "learning_rate": 1e-06, "loss": 0.1396, "step": 367 }, { "epoch": 0.06264896152536602, "grad_norm": 1.3882386684417725, "learning_rate": 1e-06, "loss": 0.1274, "step": 368 }, { "epoch": 0.06281920326864147, "grad_norm": 1.38962984085083, "learning_rate": 1e-06, "loss": 0.1192, "step": 369 }, { "epoch": 0.06298944501191692, "grad_norm": 1.5751211643218994, "learning_rate": 1e-06, "loss": 0.1465, "step": 370 }, { "epoch": 0.06315968675519237, "grad_norm": 1.4705424308776855, "learning_rate": 1e-06, "loss": 0.1428, "step": 371 }, { "epoch": 0.06332992849846783, "grad_norm": 1.27107834815979, "learning_rate": 1e-06, "loss": 0.1309, "step": 372 }, { "epoch": 0.06350017024174327, "grad_norm": 1.3142486810684204, "learning_rate": 1e-06, "loss": 0.1407, "step": 373 }, { "epoch": 0.06367041198501873, "grad_norm": 1.4944632053375244, "learning_rate": 1e-06, "loss": 0.139, "step": 374 }, { "epoch": 0.06384065372829417, "grad_norm": 1.5329262018203735, "learning_rate": 1e-06, "loss": 0.1533, "step": 375 }, { "epoch": 0.06401089547156963, "grad_norm": 1.682967185974121, "learning_rate": 1e-06, "loss": 0.1446, "step": 376 }, { "epoch": 0.06418113721484509, "grad_norm": 1.3599567413330078, "learning_rate": 1e-06, "loss": 0.117, "step": 377 }, { "epoch": 0.06435137895812053, "grad_norm": 1.41896653175354, "learning_rate": 1e-06, "loss": 0.1316, "step": 378 }, { "epoch": 0.06452162070139598, "grad_norm": 1.2187316417694092, "learning_rate": 1e-06, "loss": 0.1104, "step": 379 }, { "epoch": 0.06469186244467143, "grad_norm": 1.3118098974227905, "learning_rate": 1e-06, "loss": 0.1173, "step": 380 }, { "epoch": 0.06486210418794688, "grad_norm": 1.2350298166275024, "learning_rate": 1e-06, "loss": 0.1188, "step": 381 }, { "epoch": 0.06503234593122234, "grad_norm": 1.3306002616882324, "learning_rate": 1e-06, "loss": 0.1153, "step": 382 }, { "epoch": 0.06520258767449778, "grad_norm": 2.9044129848480225, "learning_rate": 1e-06, "loss": 0.1784, "step": 383 }, { "epoch": 0.06537282941777324, "grad_norm": 1.6895009279251099, "learning_rate": 1e-06, "loss": 0.1522, "step": 384 }, { "epoch": 0.06554307116104868, "grad_norm": 1.2503180503845215, "learning_rate": 1e-06, "loss": 0.1048, "step": 385 }, { "epoch": 0.06571331290432414, "grad_norm": 1.4215972423553467, "learning_rate": 1e-06, "loss": 0.135, "step": 386 }, { "epoch": 0.0658835546475996, "grad_norm": 1.4079679250717163, "learning_rate": 1e-06, "loss": 0.1266, "step": 387 }, { "epoch": 0.06605379639087504, "grad_norm": 1.29921293258667, "learning_rate": 1e-06, "loss": 0.1122, "step": 388 }, { "epoch": 0.0662240381341505, "grad_norm": 1.442528486251831, "learning_rate": 1e-06, "loss": 0.1333, "step": 389 }, { "epoch": 0.06639427987742594, "grad_norm": 1.8958500623703003, "learning_rate": 1e-06, "loss": 0.1928, "step": 390 }, { "epoch": 0.0665645216207014, "grad_norm": 1.1162981986999512, "learning_rate": 1e-06, "loss": 0.0949, "step": 391 }, { "epoch": 0.06673476336397685, "grad_norm": 1.3155403137207031, "learning_rate": 1e-06, "loss": 0.1148, "step": 392 }, { "epoch": 0.0669050051072523, "grad_norm": 1.2109743356704712, "learning_rate": 1e-06, "loss": 0.0965, "step": 393 }, { "epoch": 0.06707524685052775, "grad_norm": 1.2797634601593018, "learning_rate": 1e-06, "loss": 0.108, "step": 394 }, { "epoch": 0.06724548859380321, "grad_norm": 1.2622194290161133, "learning_rate": 1e-06, "loss": 0.1205, "step": 395 }, { "epoch": 0.06741573033707865, "grad_norm": 1.4360229969024658, "learning_rate": 1e-06, "loss": 0.1337, "step": 396 }, { "epoch": 0.0675859720803541, "grad_norm": 1.1915457248687744, "learning_rate": 1e-06, "loss": 0.1186, "step": 397 }, { "epoch": 0.06775621382362955, "grad_norm": 1.2633016109466553, "learning_rate": 1e-06, "loss": 0.1055, "step": 398 }, { "epoch": 0.067926455566905, "grad_norm": 1.318787932395935, "learning_rate": 1e-06, "loss": 0.1247, "step": 399 }, { "epoch": 0.06809669731018046, "grad_norm": 1.0899081230163574, "learning_rate": 1e-06, "loss": 0.0921, "step": 400 }, { "epoch": 0.0682669390534559, "grad_norm": 1.4745516777038574, "learning_rate": 1e-06, "loss": 0.134, "step": 401 }, { "epoch": 0.06843718079673136, "grad_norm": 1.2535583972930908, "learning_rate": 1e-06, "loss": 0.1183, "step": 402 }, { "epoch": 0.0686074225400068, "grad_norm": 1.5067999362945557, "learning_rate": 1e-06, "loss": 0.1411, "step": 403 }, { "epoch": 0.06877766428328226, "grad_norm": 1.5428158044815063, "learning_rate": 1e-06, "loss": 0.1325, "step": 404 }, { "epoch": 0.06894790602655772, "grad_norm": 1.7841521501541138, "learning_rate": 1e-06, "loss": 0.1571, "step": 405 }, { "epoch": 0.06911814776983316, "grad_norm": 1.1209527254104614, "learning_rate": 1e-06, "loss": 0.094, "step": 406 }, { "epoch": 0.06928838951310862, "grad_norm": 1.2920805215835571, "learning_rate": 1e-06, "loss": 0.1211, "step": 407 }, { "epoch": 0.06945863125638406, "grad_norm": 1.410902500152588, "learning_rate": 1e-06, "loss": 0.1163, "step": 408 }, { "epoch": 0.06962887299965952, "grad_norm": 1.3287694454193115, "learning_rate": 1e-06, "loss": 0.1061, "step": 409 }, { "epoch": 0.06979911474293497, "grad_norm": 1.3526452779769897, "learning_rate": 1e-06, "loss": 0.1176, "step": 410 }, { "epoch": 0.06996935648621042, "grad_norm": 1.6113089323043823, "learning_rate": 1e-06, "loss": 0.1318, "step": 411 }, { "epoch": 0.07013959822948587, "grad_norm": 1.4161698818206787, "learning_rate": 1e-06, "loss": 0.1344, "step": 412 }, { "epoch": 0.07030983997276131, "grad_norm": 1.4419130086898804, "learning_rate": 1e-06, "loss": 0.137, "step": 413 }, { "epoch": 0.07048008171603677, "grad_norm": 1.2640091180801392, "learning_rate": 1e-06, "loss": 0.1227, "step": 414 }, { "epoch": 0.07065032345931223, "grad_norm": 1.23671555519104, "learning_rate": 1e-06, "loss": 0.121, "step": 415 }, { "epoch": 0.07082056520258767, "grad_norm": 1.3423279523849487, "learning_rate": 1e-06, "loss": 0.0975, "step": 416 }, { "epoch": 0.07099080694586313, "grad_norm": 1.2852997779846191, "learning_rate": 1e-06, "loss": 0.1246, "step": 417 }, { "epoch": 0.07116104868913857, "grad_norm": 1.4518605470657349, "learning_rate": 1e-06, "loss": 0.1341, "step": 418 }, { "epoch": 0.07133129043241403, "grad_norm": 1.4686856269836426, "learning_rate": 1e-06, "loss": 0.1259, "step": 419 }, { "epoch": 0.07150153217568948, "grad_norm": 1.3495961427688599, "learning_rate": 1e-06, "loss": 0.122, "step": 420 }, { "epoch": 0.07167177391896493, "grad_norm": 1.5383343696594238, "learning_rate": 1e-06, "loss": 0.1229, "step": 421 }, { "epoch": 0.07184201566224038, "grad_norm": 1.2680972814559937, "learning_rate": 1e-06, "loss": 0.105, "step": 422 }, { "epoch": 0.07201225740551584, "grad_norm": 1.4354145526885986, "learning_rate": 1e-06, "loss": 0.1247, "step": 423 }, { "epoch": 0.07218249914879128, "grad_norm": 1.4354145526885986, "learning_rate": 1e-06, "loss": 0.2006, "step": 424 }, { "epoch": 0.07235274089206674, "grad_norm": 1.7208505868911743, "learning_rate": 1e-06, "loss": 0.1541, "step": 425 }, { "epoch": 0.07252298263534218, "grad_norm": 1.2794008255004883, "learning_rate": 1e-06, "loss": 0.1088, "step": 426 }, { "epoch": 0.07269322437861764, "grad_norm": 1.3522515296936035, "learning_rate": 1e-06, "loss": 0.1139, "step": 427 }, { "epoch": 0.0728634661218931, "grad_norm": 1.4536526203155518, "learning_rate": 1e-06, "loss": 0.1293, "step": 428 }, { "epoch": 0.07303370786516854, "grad_norm": 1.2702877521514893, "learning_rate": 1e-06, "loss": 0.0999, "step": 429 }, { "epoch": 0.073203949608444, "grad_norm": 1.3044495582580566, "learning_rate": 1e-06, "loss": 0.1225, "step": 430 }, { "epoch": 0.07337419135171944, "grad_norm": 1.5265694856643677, "learning_rate": 1e-06, "loss": 0.1321, "step": 431 }, { "epoch": 0.0735444330949949, "grad_norm": 1.3504886627197266, "learning_rate": 1e-06, "loss": 0.0999, "step": 432 }, { "epoch": 0.07371467483827035, "grad_norm": 1.9963023662567139, "learning_rate": 1e-06, "loss": 0.1559, "step": 433 }, { "epoch": 0.07388491658154579, "grad_norm": 1.6677991151809692, "learning_rate": 1e-06, "loss": 0.1331, "step": 434 }, { "epoch": 0.07405515832482125, "grad_norm": 1.486445665359497, "learning_rate": 1e-06, "loss": 0.1224, "step": 435 }, { "epoch": 0.07422540006809669, "grad_norm": 1.301155686378479, "learning_rate": 1e-06, "loss": 0.1045, "step": 436 }, { "epoch": 0.07439564181137215, "grad_norm": 1.8198158740997314, "learning_rate": 1e-06, "loss": 0.1324, "step": 437 }, { "epoch": 0.0745658835546476, "grad_norm": 1.3100183010101318, "learning_rate": 1e-06, "loss": 0.1107, "step": 438 }, { "epoch": 0.07473612529792305, "grad_norm": 1.3502755165100098, "learning_rate": 1e-06, "loss": 0.1082, "step": 439 }, { "epoch": 0.0749063670411985, "grad_norm": 1.5028979778289795, "learning_rate": 1e-06, "loss": 0.1325, "step": 440 }, { "epoch": 0.07507660878447395, "grad_norm": 1.3118284940719604, "learning_rate": 1e-06, "loss": 0.0964, "step": 441 }, { "epoch": 0.0752468505277494, "grad_norm": 1.4086387157440186, "learning_rate": 1e-06, "loss": 0.094, "step": 442 }, { "epoch": 0.07541709227102486, "grad_norm": 1.5664148330688477, "learning_rate": 1e-06, "loss": 0.1195, "step": 443 }, { "epoch": 0.0755873340143003, "grad_norm": 1.4086772203445435, "learning_rate": 1e-06, "loss": 0.1104, "step": 444 }, { "epoch": 0.07575757575757576, "grad_norm": 1.4743391275405884, "learning_rate": 1e-06, "loss": 0.1163, "step": 445 }, { "epoch": 0.0759278175008512, "grad_norm": 1.600606083869934, "learning_rate": 1e-06, "loss": 0.1199, "step": 446 }, { "epoch": 0.07609805924412666, "grad_norm": 1.3386887311935425, "learning_rate": 1e-06, "loss": 0.1372, "step": 447 }, { "epoch": 0.07626830098740212, "grad_norm": 1.4845508337020874, "learning_rate": 1e-06, "loss": 0.1201, "step": 448 }, { "epoch": 0.07643854273067756, "grad_norm": 1.503592610359192, "learning_rate": 1e-06, "loss": 0.1243, "step": 449 }, { "epoch": 0.07660878447395301, "grad_norm": 1.3089518547058105, "learning_rate": 1e-06, "loss": 0.0884, "step": 450 }, { "epoch": 0.07677902621722846, "grad_norm": 1.3585859537124634, "learning_rate": 1e-06, "loss": 0.0991, "step": 451 }, { "epoch": 0.07694926796050391, "grad_norm": 3.5047550201416016, "learning_rate": 1e-06, "loss": 0.2257, "step": 452 }, { "epoch": 0.07711950970377937, "grad_norm": 2.3448007106781006, "learning_rate": 1e-06, "loss": 0.1543, "step": 453 }, { "epoch": 0.07728975144705481, "grad_norm": 1.8489949703216553, "learning_rate": 1e-06, "loss": 0.1335, "step": 454 }, { "epoch": 0.07745999319033027, "grad_norm": 1.5891618728637695, "learning_rate": 1e-06, "loss": 0.108, "step": 455 }, { "epoch": 0.07763023493360573, "grad_norm": 1.9977794885635376, "learning_rate": 1e-06, "loss": 0.1622, "step": 456 }, { "epoch": 0.07780047667688117, "grad_norm": 1.131685495376587, "learning_rate": 1e-06, "loss": 0.0874, "step": 457 }, { "epoch": 0.07797071842015663, "grad_norm": 1.4111319780349731, "learning_rate": 1e-06, "loss": 0.122, "step": 458 }, { "epoch": 0.07814096016343207, "grad_norm": 1.3373247385025024, "learning_rate": 1e-06, "loss": 0.0998, "step": 459 }, { "epoch": 0.07831120190670753, "grad_norm": 1.3671199083328247, "learning_rate": 1e-06, "loss": 0.1006, "step": 460 }, { "epoch": 0.07848144364998298, "grad_norm": 1.4334213733673096, "learning_rate": 1e-06, "loss": 0.1115, "step": 461 }, { "epoch": 0.07865168539325842, "grad_norm": 1.6302123069763184, "learning_rate": 1e-06, "loss": 0.1225, "step": 462 }, { "epoch": 0.07882192713653388, "grad_norm": 1.5899648666381836, "learning_rate": 1e-06, "loss": 0.1196, "step": 463 }, { "epoch": 0.07899216887980932, "grad_norm": 1.3022098541259766, "learning_rate": 1e-06, "loss": 0.0863, "step": 464 }, { "epoch": 0.07916241062308478, "grad_norm": 1.5681778192520142, "learning_rate": 1e-06, "loss": 0.1, "step": 465 }, { "epoch": 0.07933265236636024, "grad_norm": 1.5576342344284058, "learning_rate": 1e-06, "loss": 0.1098, "step": 466 }, { "epoch": 0.07950289410963568, "grad_norm": 1.1382691860198975, "learning_rate": 1e-06, "loss": 0.0896, "step": 467 }, { "epoch": 0.07967313585291114, "grad_norm": 1.551830768585205, "learning_rate": 1e-06, "loss": 0.1475, "step": 468 }, { "epoch": 0.07984337759618658, "grad_norm": 1.4834508895874023, "learning_rate": 1e-06, "loss": 0.1331, "step": 469 }, { "epoch": 0.08001361933946204, "grad_norm": 1.7695828676223755, "learning_rate": 1e-06, "loss": 0.1544, "step": 470 }, { "epoch": 0.08018386108273749, "grad_norm": 2.0113699436187744, "learning_rate": 1e-06, "loss": 0.1643, "step": 471 }, { "epoch": 0.08035410282601294, "grad_norm": 1.7527496814727783, "learning_rate": 1e-06, "loss": 0.1529, "step": 472 }, { "epoch": 0.08052434456928839, "grad_norm": 1.2186193466186523, "learning_rate": 1e-06, "loss": 0.0892, "step": 473 }, { "epoch": 0.08069458631256383, "grad_norm": 1.2762824296951294, "learning_rate": 1e-06, "loss": 0.0839, "step": 474 }, { "epoch": 0.08086482805583929, "grad_norm": 1.3743529319763184, "learning_rate": 1e-06, "loss": 0.1033, "step": 475 }, { "epoch": 0.08103506979911475, "grad_norm": 1.5724990367889404, "learning_rate": 1e-06, "loss": 0.104, "step": 476 }, { "epoch": 0.08120531154239019, "grad_norm": 1.5919311046600342, "learning_rate": 1e-06, "loss": 0.0943, "step": 477 }, { "epoch": 0.08137555328566565, "grad_norm": 1.5781210660934448, "learning_rate": 1e-06, "loss": 0.1012, "step": 478 }, { "epoch": 0.08154579502894109, "grad_norm": 1.8504365682601929, "learning_rate": 1e-06, "loss": 0.1361, "step": 479 }, { "epoch": 0.08171603677221655, "grad_norm": 1.4634361267089844, "learning_rate": 1e-06, "loss": 0.1075, "step": 480 }, { "epoch": 0.081886278515492, "grad_norm": 1.2929086685180664, "learning_rate": 1e-06, "loss": 0.0966, "step": 481 }, { "epoch": 0.08205652025876745, "grad_norm": 3.413325786590576, "learning_rate": 1e-06, "loss": 0.2179, "step": 482 }, { "epoch": 0.0822267620020429, "grad_norm": 1.2101235389709473, "learning_rate": 1e-06, "loss": 0.0915, "step": 483 }, { "epoch": 0.08239700374531835, "grad_norm": 1.7361688613891602, "learning_rate": 1e-06, "loss": 0.121, "step": 484 }, { "epoch": 0.0825672454885938, "grad_norm": 1.4495158195495605, "learning_rate": 1e-06, "loss": 0.1387, "step": 485 }, { "epoch": 0.08273748723186926, "grad_norm": 1.3097033500671387, "learning_rate": 1e-06, "loss": 0.0994, "step": 486 }, { "epoch": 0.0829077289751447, "grad_norm": 1.6425195932388306, "learning_rate": 1e-06, "loss": 0.1233, "step": 487 }, { "epoch": 0.08307797071842016, "grad_norm": 1.344718098640442, "learning_rate": 1e-06, "loss": 0.107, "step": 488 }, { "epoch": 0.08324821246169561, "grad_norm": 1.1989573240280151, "learning_rate": 1e-06, "loss": 0.0888, "step": 489 }, { "epoch": 0.08341845420497106, "grad_norm": 1.4948607683181763, "learning_rate": 1e-06, "loss": 0.1105, "step": 490 }, { "epoch": 0.08358869594824651, "grad_norm": 1.4378379583358765, "learning_rate": 1e-06, "loss": 0.106, "step": 491 }, { "epoch": 0.08375893769152196, "grad_norm": 1.2525907754898071, "learning_rate": 1e-06, "loss": 0.0968, "step": 492 }, { "epoch": 0.08392917943479741, "grad_norm": 1.479597806930542, "learning_rate": 1e-06, "loss": 0.1217, "step": 493 }, { "epoch": 0.08409942117807287, "grad_norm": 1.3180419206619263, "learning_rate": 1e-06, "loss": 0.1011, "step": 494 }, { "epoch": 0.08426966292134831, "grad_norm": 1.3772739171981812, "learning_rate": 1e-06, "loss": 0.1049, "step": 495 }, { "epoch": 0.08443990466462377, "grad_norm": 1.814520001411438, "learning_rate": 1e-06, "loss": 0.1293, "step": 496 }, { "epoch": 0.08461014640789921, "grad_norm": 1.5137838125228882, "learning_rate": 1e-06, "loss": 0.0999, "step": 497 }, { "epoch": 0.08478038815117467, "grad_norm": 1.4192203283309937, "learning_rate": 1e-06, "loss": 0.114, "step": 498 }, { "epoch": 0.08495062989445012, "grad_norm": 1.3078948259353638, "learning_rate": 1e-06, "loss": 0.1158, "step": 499 }, { "epoch": 0.08512087163772557, "grad_norm": 1.5848225355148315, "learning_rate": 1e-06, "loss": 0.1163, "step": 500 }, { "epoch": 0.08529111338100102, "grad_norm": 1.3920193910598755, "learning_rate": 1e-06, "loss": 0.0995, "step": 501 }, { "epoch": 0.08546135512427647, "grad_norm": 1.3783040046691895, "learning_rate": 1e-06, "loss": 0.0955, "step": 502 }, { "epoch": 0.08563159686755192, "grad_norm": 1.8510295152664185, "learning_rate": 1e-06, "loss": 0.1328, "step": 503 }, { "epoch": 0.08580183861082738, "grad_norm": 3.0699658393859863, "learning_rate": 1e-06, "loss": 0.1774, "step": 504 }, { "epoch": 0.08597208035410282, "grad_norm": 1.5914885997772217, "learning_rate": 1e-06, "loss": 0.1055, "step": 505 }, { "epoch": 0.08614232209737828, "grad_norm": 1.7265233993530273, "learning_rate": 1e-06, "loss": 0.1271, "step": 506 }, { "epoch": 0.08631256384065372, "grad_norm": 1.5482544898986816, "learning_rate": 1e-06, "loss": 0.1279, "step": 507 }, { "epoch": 0.08648280558392918, "grad_norm": 1.321733832359314, "learning_rate": 1e-06, "loss": 0.0937, "step": 508 }, { "epoch": 0.08665304732720464, "grad_norm": 1.4876199960708618, "learning_rate": 1e-06, "loss": 0.1192, "step": 509 }, { "epoch": 0.08682328907048008, "grad_norm": 1.8378245830535889, "learning_rate": 1e-06, "loss": 0.1157, "step": 510 }, { "epoch": 0.08699353081375553, "grad_norm": 1.625184178352356, "learning_rate": 1e-06, "loss": 0.1206, "step": 511 }, { "epoch": 0.08716377255703098, "grad_norm": 1.4779977798461914, "learning_rate": 1e-06, "loss": 0.1103, "step": 512 }, { "epoch": 0.08733401430030643, "grad_norm": 1.4066017866134644, "learning_rate": 1e-06, "loss": 0.1011, "step": 513 }, { "epoch": 0.08750425604358189, "grad_norm": 1.525978922843933, "learning_rate": 1e-06, "loss": 0.096, "step": 514 }, { "epoch": 0.08767449778685733, "grad_norm": 1.49380362033844, "learning_rate": 1e-06, "loss": 0.1135, "step": 515 }, { "epoch": 0.08784473953013279, "grad_norm": 1.74233078956604, "learning_rate": 1e-06, "loss": 0.1114, "step": 516 }, { "epoch": 0.08801498127340825, "grad_norm": 1.3099770545959473, "learning_rate": 1e-06, "loss": 0.0907, "step": 517 }, { "epoch": 0.08818522301668369, "grad_norm": 1.7866302728652954, "learning_rate": 1e-06, "loss": 0.1178, "step": 518 }, { "epoch": 0.08835546475995915, "grad_norm": 1.6818584203720093, "learning_rate": 1e-06, "loss": 0.1252, "step": 519 }, { "epoch": 0.08852570650323459, "grad_norm": 1.628466248512268, "learning_rate": 1e-06, "loss": 0.1105, "step": 520 }, { "epoch": 0.08869594824651005, "grad_norm": 1.681505799293518, "learning_rate": 1e-06, "loss": 0.1177, "step": 521 }, { "epoch": 0.0888661899897855, "grad_norm": 1.2766761779785156, "learning_rate": 1e-06, "loss": 0.0799, "step": 522 }, { "epoch": 0.08903643173306094, "grad_norm": 1.575498342514038, "learning_rate": 1e-06, "loss": 0.1068, "step": 523 }, { "epoch": 0.0892066734763364, "grad_norm": 1.4087481498718262, "learning_rate": 1e-06, "loss": 0.0935, "step": 524 }, { "epoch": 0.08937691521961184, "grad_norm": 1.4377367496490479, "learning_rate": 1e-06, "loss": 0.0968, "step": 525 }, { "epoch": 0.0895471569628873, "grad_norm": 1.7580372095108032, "learning_rate": 1e-06, "loss": 0.1101, "step": 526 }, { "epoch": 0.08971739870616276, "grad_norm": 1.5135084390640259, "learning_rate": 1e-06, "loss": 0.1005, "step": 527 }, { "epoch": 0.0898876404494382, "grad_norm": 1.4128092527389526, "learning_rate": 1e-06, "loss": 0.0858, "step": 528 }, { "epoch": 0.09005788219271366, "grad_norm": 1.4026193618774414, "learning_rate": 1e-06, "loss": 0.1002, "step": 529 }, { "epoch": 0.0902281239359891, "grad_norm": 1.3924407958984375, "learning_rate": 1e-06, "loss": 0.0895, "step": 530 }, { "epoch": 0.09039836567926456, "grad_norm": 1.4779433012008667, "learning_rate": 1e-06, "loss": 0.1033, "step": 531 }, { "epoch": 0.09056860742254001, "grad_norm": 1.9175746440887451, "learning_rate": 1e-06, "loss": 0.1302, "step": 532 }, { "epoch": 0.09073884916581546, "grad_norm": 1.7925680875778198, "learning_rate": 1e-06, "loss": 0.1162, "step": 533 }, { "epoch": 0.09090909090909091, "grad_norm": 1.3707115650177002, "learning_rate": 1e-06, "loss": 0.0872, "step": 534 }, { "epoch": 0.09107933265236635, "grad_norm": 1.3779429197311401, "learning_rate": 1e-06, "loss": 0.0982, "step": 535 }, { "epoch": 0.09124957439564181, "grad_norm": 1.5113381147384644, "learning_rate": 1e-06, "loss": 0.1036, "step": 536 }, { "epoch": 0.09141981613891727, "grad_norm": 1.5571949481964111, "learning_rate": 1e-06, "loss": 0.0933, "step": 537 }, { "epoch": 0.09159005788219271, "grad_norm": 1.3230814933776855, "learning_rate": 1e-06, "loss": 0.1019, "step": 538 }, { "epoch": 0.09176029962546817, "grad_norm": 1.257027506828308, "learning_rate": 1e-06, "loss": 0.0775, "step": 539 }, { "epoch": 0.09193054136874361, "grad_norm": 1.3978290557861328, "learning_rate": 1e-06, "loss": 0.0942, "step": 540 }, { "epoch": 0.09210078311201907, "grad_norm": 1.290602207183838, "learning_rate": 1e-06, "loss": 0.0809, "step": 541 }, { "epoch": 0.09227102485529452, "grad_norm": 1.3179094791412354, "learning_rate": 1e-06, "loss": 0.0963, "step": 542 }, { "epoch": 0.09244126659856997, "grad_norm": 1.6563512086868286, "learning_rate": 1e-06, "loss": 0.111, "step": 543 }, { "epoch": 0.09261150834184542, "grad_norm": 1.4956837892532349, "learning_rate": 1e-06, "loss": 0.1165, "step": 544 }, { "epoch": 0.09278175008512086, "grad_norm": 1.3757632970809937, "learning_rate": 1e-06, "loss": 0.0897, "step": 545 }, { "epoch": 0.09295199182839632, "grad_norm": 1.205857276916504, "learning_rate": 1e-06, "loss": 0.0787, "step": 546 }, { "epoch": 0.09312223357167178, "grad_norm": 1.7391396760940552, "learning_rate": 1e-06, "loss": 0.1286, "step": 547 }, { "epoch": 0.09329247531494722, "grad_norm": 1.492804765701294, "learning_rate": 1e-06, "loss": 0.0916, "step": 548 }, { "epoch": 0.09346271705822268, "grad_norm": 1.6261539459228516, "learning_rate": 1e-06, "loss": 0.0995, "step": 549 }, { "epoch": 0.09363295880149813, "grad_norm": 1.5201900005340576, "learning_rate": 1e-06, "loss": 0.0987, "step": 550 }, { "epoch": 0.09380320054477358, "grad_norm": 1.3725306987762451, "learning_rate": 1e-06, "loss": 0.0958, "step": 551 }, { "epoch": 0.09397344228804903, "grad_norm": 1.601542592048645, "learning_rate": 1e-06, "loss": 0.1168, "step": 552 }, { "epoch": 0.09414368403132448, "grad_norm": 1.4362895488739014, "learning_rate": 1e-06, "loss": 0.0896, "step": 553 }, { "epoch": 0.09431392577459993, "grad_norm": 1.3668473958969116, "learning_rate": 1e-06, "loss": 0.1029, "step": 554 }, { "epoch": 0.09448416751787539, "grad_norm": 1.6272999048233032, "learning_rate": 1e-06, "loss": 0.0938, "step": 555 }, { "epoch": 0.09465440926115083, "grad_norm": 1.8201227188110352, "learning_rate": 1e-06, "loss": 0.1186, "step": 556 }, { "epoch": 0.09482465100442629, "grad_norm": 1.4684869050979614, "learning_rate": 1e-06, "loss": 0.1029, "step": 557 }, { "epoch": 0.09499489274770173, "grad_norm": 1.4213141202926636, "learning_rate": 1e-06, "loss": 0.0875, "step": 558 }, { "epoch": 0.09516513449097719, "grad_norm": 1.4483574628829956, "learning_rate": 1e-06, "loss": 0.0996, "step": 559 }, { "epoch": 0.09533537623425264, "grad_norm": 1.4041225910186768, "learning_rate": 1e-06, "loss": 0.0769, "step": 560 }, { "epoch": 0.09550561797752809, "grad_norm": 1.3655953407287598, "learning_rate": 1e-06, "loss": 0.0927, "step": 561 }, { "epoch": 0.09567585972080354, "grad_norm": 1.2655277252197266, "learning_rate": 1e-06, "loss": 0.078, "step": 562 }, { "epoch": 0.09584610146407899, "grad_norm": 1.6605805158615112, "learning_rate": 1e-06, "loss": 0.1101, "step": 563 }, { "epoch": 0.09601634320735444, "grad_norm": 1.6565370559692383, "learning_rate": 1e-06, "loss": 0.1107, "step": 564 }, { "epoch": 0.0961865849506299, "grad_norm": 1.5182013511657715, "learning_rate": 1e-06, "loss": 0.0992, "step": 565 }, { "epoch": 0.09635682669390534, "grad_norm": 1.3900082111358643, "learning_rate": 1e-06, "loss": 0.0999, "step": 566 }, { "epoch": 0.0965270684371808, "grad_norm": 1.2898858785629272, "learning_rate": 1e-06, "loss": 0.0906, "step": 567 }, { "epoch": 0.09669731018045624, "grad_norm": 3.67594838142395, "learning_rate": 1e-06, "loss": 0.1686, "step": 568 }, { "epoch": 0.0968675519237317, "grad_norm": 1.313101887702942, "learning_rate": 1e-06, "loss": 0.0799, "step": 569 }, { "epoch": 0.09703779366700716, "grad_norm": 3.672680139541626, "learning_rate": 1e-06, "loss": 0.2146, "step": 570 }, { "epoch": 0.0972080354102826, "grad_norm": 1.770923376083374, "learning_rate": 1e-06, "loss": 0.0999, "step": 571 }, { "epoch": 0.09737827715355805, "grad_norm": 1.533212661743164, "learning_rate": 1e-06, "loss": 0.1004, "step": 572 }, { "epoch": 0.0975485188968335, "grad_norm": 1.6698973178863525, "learning_rate": 1e-06, "loss": 0.0945, "step": 573 }, { "epoch": 0.09771876064010895, "grad_norm": 1.4515860080718994, "learning_rate": 1e-06, "loss": 0.1065, "step": 574 }, { "epoch": 0.09788900238338441, "grad_norm": 1.332733154296875, "learning_rate": 1e-06, "loss": 0.0875, "step": 575 }, { "epoch": 0.09805924412665985, "grad_norm": 1.3322087526321411, "learning_rate": 1e-06, "loss": 0.0872, "step": 576 }, { "epoch": 0.09822948586993531, "grad_norm": 1.320599913597107, "learning_rate": 1e-06, "loss": 0.0832, "step": 577 }, { "epoch": 0.09839972761321075, "grad_norm": 1.4648524522781372, "learning_rate": 1e-06, "loss": 0.0963, "step": 578 }, { "epoch": 0.09856996935648621, "grad_norm": 1.6072378158569336, "learning_rate": 1e-06, "loss": 0.09, "step": 579 }, { "epoch": 0.09874021109976167, "grad_norm": 2.066091537475586, "learning_rate": 1e-06, "loss": 0.1273, "step": 580 }, { "epoch": 0.09891045284303711, "grad_norm": 1.3935089111328125, "learning_rate": 1e-06, "loss": 0.0938, "step": 581 }, { "epoch": 0.09908069458631256, "grad_norm": 1.4537439346313477, "learning_rate": 1e-06, "loss": 0.0908, "step": 582 }, { "epoch": 0.09925093632958802, "grad_norm": 1.4111661911010742, "learning_rate": 1e-06, "loss": 0.0826, "step": 583 }, { "epoch": 0.09942117807286346, "grad_norm": 1.417158603668213, "learning_rate": 1e-06, "loss": 0.0916, "step": 584 }, { "epoch": 0.09959141981613892, "grad_norm": 1.5110013484954834, "learning_rate": 1e-06, "loss": 0.0842, "step": 585 }, { "epoch": 0.09976166155941436, "grad_norm": 1.3743090629577637, "learning_rate": 1e-06, "loss": 0.0891, "step": 586 }, { "epoch": 0.09993190330268982, "grad_norm": 1.4649263620376587, "learning_rate": 1e-06, "loss": 0.0791, "step": 587 }, { "epoch": 0.10010214504596528, "grad_norm": 1.6102653741836548, "learning_rate": 1e-06, "loss": 0.0856, "step": 588 }, { "epoch": 0.10027238678924072, "grad_norm": 1.7673293352127075, "learning_rate": 1e-06, "loss": 0.1013, "step": 589 }, { "epoch": 0.10044262853251618, "grad_norm": 1.4860748052597046, "learning_rate": 1e-06, "loss": 0.0854, "step": 590 }, { "epoch": 0.10061287027579162, "grad_norm": 1.5368369817733765, "learning_rate": 1e-06, "loss": 0.089, "step": 591 }, { "epoch": 0.10078311201906708, "grad_norm": 1.6211143732070923, "learning_rate": 1e-06, "loss": 0.1087, "step": 592 }, { "epoch": 0.10095335376234253, "grad_norm": 1.432572603225708, "learning_rate": 1e-06, "loss": 0.0934, "step": 593 }, { "epoch": 0.10112359550561797, "grad_norm": 1.351190209388733, "learning_rate": 1e-06, "loss": 0.0816, "step": 594 }, { "epoch": 0.10129383724889343, "grad_norm": 1.5220497846603394, "learning_rate": 1e-06, "loss": 0.1014, "step": 595 }, { "epoch": 0.10146407899216887, "grad_norm": 3.933367967605591, "learning_rate": 1e-06, "loss": 0.2032, "step": 596 }, { "epoch": 0.10163432073544433, "grad_norm": 1.5819555521011353, "learning_rate": 1e-06, "loss": 0.1125, "step": 597 }, { "epoch": 0.10180456247871979, "grad_norm": 1.5007481575012207, "learning_rate": 1e-06, "loss": 0.0991, "step": 598 }, { "epoch": 0.10197480422199523, "grad_norm": 1.6869536638259888, "learning_rate": 1e-06, "loss": 0.1113, "step": 599 }, { "epoch": 0.10214504596527069, "grad_norm": 1.5548107624053955, "learning_rate": 1e-06, "loss": 0.1018, "step": 600 }, { "epoch": 0.10231528770854613, "grad_norm": 1.7281802892684937, "learning_rate": 1e-06, "loss": 0.0962, "step": 601 }, { "epoch": 0.10248552945182159, "grad_norm": 1.603119969367981, "learning_rate": 1e-06, "loss": 0.0764, "step": 602 }, { "epoch": 0.10265577119509704, "grad_norm": 1.6384819746017456, "learning_rate": 1e-06, "loss": 0.0842, "step": 603 }, { "epoch": 0.10282601293837249, "grad_norm": 1.245998501777649, "learning_rate": 1e-06, "loss": 0.086, "step": 604 }, { "epoch": 0.10299625468164794, "grad_norm": 1.5214192867279053, "learning_rate": 1e-06, "loss": 0.0834, "step": 605 }, { "epoch": 0.10316649642492338, "grad_norm": 1.345001459121704, "learning_rate": 1e-06, "loss": 0.0869, "step": 606 }, { "epoch": 0.10333673816819884, "grad_norm": 3.7370526790618896, "learning_rate": 1e-06, "loss": 0.1794, "step": 607 }, { "epoch": 0.1035069799114743, "grad_norm": 1.3892709016799927, "learning_rate": 1e-06, "loss": 0.0912, "step": 608 }, { "epoch": 0.10367722165474974, "grad_norm": 1.4373610019683838, "learning_rate": 1e-06, "loss": 0.0914, "step": 609 }, { "epoch": 0.1038474633980252, "grad_norm": 1.4336578845977783, "learning_rate": 1e-06, "loss": 0.0732, "step": 610 }, { "epoch": 0.10401770514130065, "grad_norm": 1.7176496982574463, "learning_rate": 1e-06, "loss": 0.1008, "step": 611 }, { "epoch": 0.1041879468845761, "grad_norm": 1.9851466417312622, "learning_rate": 1e-06, "loss": 0.1067, "step": 612 }, { "epoch": 0.10435818862785155, "grad_norm": 1.8875466585159302, "learning_rate": 1e-06, "loss": 0.1106, "step": 613 }, { "epoch": 0.104528430371127, "grad_norm": 1.458789348602295, "learning_rate": 1e-06, "loss": 0.0843, "step": 614 }, { "epoch": 0.10469867211440245, "grad_norm": 1.6216390132904053, "learning_rate": 1e-06, "loss": 0.09, "step": 615 }, { "epoch": 0.10486891385767791, "grad_norm": 1.2169405221939087, "learning_rate": 1e-06, "loss": 0.0626, "step": 616 }, { "epoch": 0.10503915560095335, "grad_norm": 1.3988337516784668, "learning_rate": 1e-06, "loss": 0.0891, "step": 617 }, { "epoch": 0.10520939734422881, "grad_norm": 1.5023198127746582, "learning_rate": 1e-06, "loss": 0.0968, "step": 618 }, { "epoch": 0.10537963908750425, "grad_norm": 1.4495683908462524, "learning_rate": 1e-06, "loss": 0.0865, "step": 619 }, { "epoch": 0.10554988083077971, "grad_norm": 1.256415843963623, "learning_rate": 1e-06, "loss": 0.0713, "step": 620 }, { "epoch": 0.10572012257405516, "grad_norm": 1.7719178199768066, "learning_rate": 1e-06, "loss": 0.0998, "step": 621 }, { "epoch": 0.10589036431733061, "grad_norm": 1.5151079893112183, "learning_rate": 1e-06, "loss": 0.0812, "step": 622 }, { "epoch": 0.10606060606060606, "grad_norm": 4.284844875335693, "learning_rate": 1e-06, "loss": 0.178, "step": 623 }, { "epoch": 0.1062308478038815, "grad_norm": 1.978816032409668, "learning_rate": 1e-06, "loss": 0.0872, "step": 624 }, { "epoch": 0.10640108954715696, "grad_norm": 1.5746681690216064, "learning_rate": 1e-06, "loss": 0.0849, "step": 625 }, { "epoch": 0.10657133129043242, "grad_norm": 1.4485297203063965, "learning_rate": 1e-06, "loss": 0.081, "step": 626 }, { "epoch": 0.10674157303370786, "grad_norm": 1.581984281539917, "learning_rate": 1e-06, "loss": 0.1037, "step": 627 }, { "epoch": 0.10691181477698332, "grad_norm": 1.3287845849990845, "learning_rate": 1e-06, "loss": 0.0722, "step": 628 }, { "epoch": 0.10708205652025876, "grad_norm": 1.2169119119644165, "learning_rate": 1e-06, "loss": 0.0588, "step": 629 }, { "epoch": 0.10725229826353422, "grad_norm": 1.2675771713256836, "learning_rate": 1e-06, "loss": 0.0792, "step": 630 }, { "epoch": 0.10742254000680967, "grad_norm": 1.3391927480697632, "learning_rate": 1e-06, "loss": 0.0884, "step": 631 }, { "epoch": 0.10759278175008512, "grad_norm": 1.7200751304626465, "learning_rate": 1e-06, "loss": 0.0922, "step": 632 }, { "epoch": 0.10776302349336057, "grad_norm": 1.4246052503585815, "learning_rate": 1e-06, "loss": 0.08, "step": 633 }, { "epoch": 0.10793326523663602, "grad_norm": 1.4974547624588013, "learning_rate": 1e-06, "loss": 0.0738, "step": 634 }, { "epoch": 0.10810350697991147, "grad_norm": 1.7298548221588135, "learning_rate": 1e-06, "loss": 0.0869, "step": 635 }, { "epoch": 0.10827374872318693, "grad_norm": 1.4626245498657227, "learning_rate": 1e-06, "loss": 0.0745, "step": 636 }, { "epoch": 0.10844399046646237, "grad_norm": 1.5561753511428833, "learning_rate": 1e-06, "loss": 0.0807, "step": 637 }, { "epoch": 0.10861423220973783, "grad_norm": 1.6584053039550781, "learning_rate": 1e-06, "loss": 0.0951, "step": 638 }, { "epoch": 0.10878447395301327, "grad_norm": 1.3954371213912964, "learning_rate": 1e-06, "loss": 0.0754, "step": 639 }, { "epoch": 0.10895471569628873, "grad_norm": 3.7084250450134277, "learning_rate": 1e-06, "loss": 0.1604, "step": 640 }, { "epoch": 0.10912495743956419, "grad_norm": 1.594537377357483, "learning_rate": 1e-06, "loss": 0.1014, "step": 641 }, { "epoch": 0.10929519918283963, "grad_norm": 3.0633435249328613, "learning_rate": 1e-06, "loss": 0.1342, "step": 642 }, { "epoch": 0.10946544092611508, "grad_norm": 1.6951473951339722, "learning_rate": 1e-06, "loss": 0.0925, "step": 643 }, { "epoch": 0.10963568266939054, "grad_norm": 1.4243108034133911, "learning_rate": 1e-06, "loss": 0.0696, "step": 644 }, { "epoch": 0.10980592441266598, "grad_norm": 1.480381965637207, "learning_rate": 1e-06, "loss": 0.0768, "step": 645 }, { "epoch": 0.10997616615594144, "grad_norm": 1.3805941343307495, "learning_rate": 1e-06, "loss": 0.0848, "step": 646 }, { "epoch": 0.11014640789921688, "grad_norm": 1.4682902097702026, "learning_rate": 1e-06, "loss": 0.0722, "step": 647 }, { "epoch": 0.11031664964249234, "grad_norm": 1.408152461051941, "learning_rate": 1e-06, "loss": 0.0734, "step": 648 }, { "epoch": 0.1104868913857678, "grad_norm": 1.8382225036621094, "learning_rate": 1e-06, "loss": 0.1047, "step": 649 }, { "epoch": 0.11065713312904324, "grad_norm": 1.4985864162445068, "learning_rate": 1e-06, "loss": 0.0845, "step": 650 }, { "epoch": 0.1108273748723187, "grad_norm": 1.340008020401001, "learning_rate": 1e-06, "loss": 0.0741, "step": 651 }, { "epoch": 0.11099761661559414, "grad_norm": 1.65091073513031, "learning_rate": 1e-06, "loss": 0.0914, "step": 652 }, { "epoch": 0.1111678583588696, "grad_norm": 1.6151150465011597, "learning_rate": 1e-06, "loss": 0.084, "step": 653 }, { "epoch": 0.11133810010214505, "grad_norm": 1.8154164552688599, "learning_rate": 1e-06, "loss": 0.0988, "step": 654 }, { "epoch": 0.1115083418454205, "grad_norm": 1.4415311813354492, "learning_rate": 1e-06, "loss": 0.0841, "step": 655 }, { "epoch": 0.11167858358869595, "grad_norm": 2.2858686447143555, "learning_rate": 1e-06, "loss": 0.1037, "step": 656 }, { "epoch": 0.1118488253319714, "grad_norm": 1.6448941230773926, "learning_rate": 1e-06, "loss": 0.082, "step": 657 }, { "epoch": 0.11201906707524685, "grad_norm": 1.4810398817062378, "learning_rate": 1e-06, "loss": 0.0806, "step": 658 }, { "epoch": 0.11218930881852231, "grad_norm": 1.6105705499649048, "learning_rate": 1e-06, "loss": 0.0962, "step": 659 }, { "epoch": 0.11235955056179775, "grad_norm": 1.5040578842163086, "learning_rate": 1e-06, "loss": 0.0842, "step": 660 }, { "epoch": 0.1125297923050732, "grad_norm": 1.6204015016555786, "learning_rate": 1e-06, "loss": 0.082, "step": 661 }, { "epoch": 0.11270003404834865, "grad_norm": 1.3563892841339111, "learning_rate": 1e-06, "loss": 0.061, "step": 662 }, { "epoch": 0.1128702757916241, "grad_norm": 2.079106330871582, "learning_rate": 1e-06, "loss": 0.0947, "step": 663 }, { "epoch": 0.11304051753489956, "grad_norm": 1.3969217538833618, "learning_rate": 1e-06, "loss": 0.0723, "step": 664 }, { "epoch": 0.113210759278175, "grad_norm": 1.3509082794189453, "learning_rate": 1e-06, "loss": 0.0811, "step": 665 }, { "epoch": 0.11338100102145046, "grad_norm": 1.4950233697891235, "learning_rate": 1e-06, "loss": 0.089, "step": 666 }, { "epoch": 0.1135512427647259, "grad_norm": 1.6716737747192383, "learning_rate": 1e-06, "loss": 0.0861, "step": 667 }, { "epoch": 0.11372148450800136, "grad_norm": 1.8707127571105957, "learning_rate": 1e-06, "loss": 0.0924, "step": 668 }, { "epoch": 0.11389172625127682, "grad_norm": 1.5277262926101685, "learning_rate": 1e-06, "loss": 0.079, "step": 669 }, { "epoch": 0.11406196799455226, "grad_norm": 1.7219480276107788, "learning_rate": 1e-06, "loss": 0.0928, "step": 670 }, { "epoch": 0.11423220973782772, "grad_norm": 1.4405816793441772, "learning_rate": 1e-06, "loss": 0.0767, "step": 671 }, { "epoch": 0.11440245148110317, "grad_norm": 1.7040899991989136, "learning_rate": 1e-06, "loss": 0.0855, "step": 672 }, { "epoch": 0.11457269322437862, "grad_norm": 1.588266134262085, "learning_rate": 1e-06, "loss": 0.0772, "step": 673 }, { "epoch": 0.11474293496765407, "grad_norm": 1.8362873792648315, "learning_rate": 1e-06, "loss": 0.0978, "step": 674 }, { "epoch": 0.11491317671092952, "grad_norm": 1.6758869886398315, "learning_rate": 1e-06, "loss": 0.0928, "step": 675 }, { "epoch": 0.11508341845420497, "grad_norm": 1.7752368450164795, "learning_rate": 1e-06, "loss": 0.0879, "step": 676 }, { "epoch": 0.11525366019748043, "grad_norm": 1.6252251863479614, "learning_rate": 1e-06, "loss": 0.096, "step": 677 }, { "epoch": 0.11542390194075587, "grad_norm": 1.7671598196029663, "learning_rate": 1e-06, "loss": 0.0975, "step": 678 }, { "epoch": 0.11559414368403133, "grad_norm": 1.3313266038894653, "learning_rate": 1e-06, "loss": 0.0782, "step": 679 }, { "epoch": 0.11576438542730677, "grad_norm": 1.298072338104248, "learning_rate": 1e-06, "loss": 0.0788, "step": 680 }, { "epoch": 0.11593462717058223, "grad_norm": 1.4549241065979004, "learning_rate": 1e-06, "loss": 0.0827, "step": 681 }, { "epoch": 0.11610486891385768, "grad_norm": 1.5572149753570557, "learning_rate": 1e-06, "loss": 0.0866, "step": 682 }, { "epoch": 0.11627511065713313, "grad_norm": 1.3730863332748413, "learning_rate": 1e-06, "loss": 0.0714, "step": 683 }, { "epoch": 0.11644535240040858, "grad_norm": 1.7244811058044434, "learning_rate": 1e-06, "loss": 0.0768, "step": 684 }, { "epoch": 0.11661559414368403, "grad_norm": 1.879199743270874, "learning_rate": 1e-06, "loss": 0.0863, "step": 685 }, { "epoch": 0.11678583588695948, "grad_norm": 1.3578203916549683, "learning_rate": 1e-06, "loss": 0.0623, "step": 686 }, { "epoch": 0.11695607763023494, "grad_norm": 1.5125126838684082, "learning_rate": 1e-06, "loss": 0.0872, "step": 687 }, { "epoch": 0.11712631937351038, "grad_norm": 2.28521728515625, "learning_rate": 1e-06, "loss": 0.1136, "step": 688 }, { "epoch": 0.11729656111678584, "grad_norm": 1.5524616241455078, "learning_rate": 1e-06, "loss": 0.0815, "step": 689 }, { "epoch": 0.11746680286006128, "grad_norm": 1.5951274633407593, "learning_rate": 1e-06, "loss": 0.0779, "step": 690 }, { "epoch": 0.11763704460333674, "grad_norm": 1.1863946914672852, "learning_rate": 1e-06, "loss": 0.0611, "step": 691 }, { "epoch": 0.1178072863466122, "grad_norm": 1.413318395614624, "learning_rate": 1e-06, "loss": 0.0735, "step": 692 }, { "epoch": 0.11797752808988764, "grad_norm": 1.5461151599884033, "learning_rate": 1e-06, "loss": 0.0794, "step": 693 }, { "epoch": 0.1181477698331631, "grad_norm": 1.5801036357879639, "learning_rate": 1e-06, "loss": 0.0805, "step": 694 }, { "epoch": 0.11831801157643854, "grad_norm": 1.341295838356018, "learning_rate": 1e-06, "loss": 0.07, "step": 695 }, { "epoch": 0.118488253319714, "grad_norm": 1.561644196510315, "learning_rate": 1e-06, "loss": 0.0756, "step": 696 }, { "epoch": 0.11865849506298945, "grad_norm": 1.3806345462799072, "learning_rate": 1e-06, "loss": 0.0646, "step": 697 }, { "epoch": 0.11882873680626489, "grad_norm": 1.5367329120635986, "learning_rate": 1e-06, "loss": 0.0775, "step": 698 }, { "epoch": 0.11899897854954035, "grad_norm": 1.8795855045318604, "learning_rate": 1e-06, "loss": 0.0936, "step": 699 }, { "epoch": 0.11916922029281579, "grad_norm": 1.3554261922836304, "learning_rate": 1e-06, "loss": 0.0624, "step": 700 }, { "epoch": 0.11933946203609125, "grad_norm": 1.4295129776000977, "learning_rate": 1e-06, "loss": 0.0817, "step": 701 }, { "epoch": 0.1195097037793667, "grad_norm": 2.1465654373168945, "learning_rate": 1e-06, "loss": 0.1037, "step": 702 }, { "epoch": 0.11967994552264215, "grad_norm": 1.2775219678878784, "learning_rate": 1e-06, "loss": 0.0698, "step": 703 }, { "epoch": 0.1198501872659176, "grad_norm": 1.6362903118133545, "learning_rate": 1e-06, "loss": 0.0867, "step": 704 }, { "epoch": 0.12002042900919306, "grad_norm": 1.531864047050476, "learning_rate": 1e-06, "loss": 0.075, "step": 705 }, { "epoch": 0.1201906707524685, "grad_norm": 1.4992605447769165, "learning_rate": 1e-06, "loss": 0.0909, "step": 706 }, { "epoch": 0.12036091249574396, "grad_norm": 1.376063346862793, "learning_rate": 1e-06, "loss": 0.0639, "step": 707 }, { "epoch": 0.1205311542390194, "grad_norm": 1.4128174781799316, "learning_rate": 1e-06, "loss": 0.0725, "step": 708 }, { "epoch": 0.12070139598229486, "grad_norm": 1.9290549755096436, "learning_rate": 1e-06, "loss": 0.1059, "step": 709 }, { "epoch": 0.12087163772557032, "grad_norm": 1.6471437215805054, "learning_rate": 1e-06, "loss": 0.0768, "step": 710 }, { "epoch": 0.12104187946884576, "grad_norm": 1.5400367975234985, "learning_rate": 1e-06, "loss": 0.0828, "step": 711 }, { "epoch": 0.12121212121212122, "grad_norm": 1.3457374572753906, "learning_rate": 1e-06, "loss": 0.0738, "step": 712 }, { "epoch": 0.12138236295539666, "grad_norm": 1.8186038732528687, "learning_rate": 1e-06, "loss": 0.0968, "step": 713 }, { "epoch": 0.12155260469867211, "grad_norm": 1.650877833366394, "learning_rate": 1e-06, "loss": 0.076, "step": 714 }, { "epoch": 0.12172284644194757, "grad_norm": 1.4798765182495117, "learning_rate": 1e-06, "loss": 0.0787, "step": 715 }, { "epoch": 0.12189308818522301, "grad_norm": 1.2624869346618652, "learning_rate": 1e-06, "loss": 0.0592, "step": 716 }, { "epoch": 0.12206332992849847, "grad_norm": 1.596134901046753, "learning_rate": 1e-06, "loss": 0.0761, "step": 717 }, { "epoch": 0.12223357167177391, "grad_norm": 1.538567304611206, "learning_rate": 1e-06, "loss": 0.0723, "step": 718 }, { "epoch": 0.12240381341504937, "grad_norm": 5.429737567901611, "learning_rate": 1e-06, "loss": 0.1379, "step": 719 }, { "epoch": 0.12257405515832483, "grad_norm": 1.6454511880874634, "learning_rate": 1e-06, "loss": 0.0871, "step": 720 }, { "epoch": 0.12274429690160027, "grad_norm": 1.4540003538131714, "learning_rate": 1e-06, "loss": 0.0788, "step": 721 }, { "epoch": 0.12291453864487573, "grad_norm": 1.774742603302002, "learning_rate": 1e-06, "loss": 0.0824, "step": 722 }, { "epoch": 0.12308478038815117, "grad_norm": 1.881834626197815, "learning_rate": 1e-06, "loss": 0.1062, "step": 723 }, { "epoch": 0.12325502213142663, "grad_norm": 1.543564796447754, "learning_rate": 1e-06, "loss": 0.0949, "step": 724 }, { "epoch": 0.12342526387470208, "grad_norm": 1.6501184701919556, "learning_rate": 1e-06, "loss": 0.0775, "step": 725 }, { "epoch": 0.12359550561797752, "grad_norm": 1.3438044786453247, "learning_rate": 1e-06, "loss": 0.0764, "step": 726 }, { "epoch": 0.12376574736125298, "grad_norm": 1.2549158334732056, "learning_rate": 1e-06, "loss": 0.0575, "step": 727 }, { "epoch": 0.12393598910452842, "grad_norm": 1.8060505390167236, "learning_rate": 1e-06, "loss": 0.0829, "step": 728 }, { "epoch": 0.12410623084780388, "grad_norm": 1.6881535053253174, "learning_rate": 1e-06, "loss": 0.0849, "step": 729 }, { "epoch": 0.12427647259107934, "grad_norm": 1.4681129455566406, "learning_rate": 1e-06, "loss": 0.054, "step": 730 }, { "epoch": 0.12444671433435478, "grad_norm": 1.806591510772705, "learning_rate": 1e-06, "loss": 0.081, "step": 731 }, { "epoch": 0.12461695607763024, "grad_norm": 1.6089766025543213, "learning_rate": 1e-06, "loss": 0.0773, "step": 732 }, { "epoch": 0.12478719782090568, "grad_norm": 1.6091641187667847, "learning_rate": 1e-06, "loss": 0.0804, "step": 733 }, { "epoch": 0.12495743956418114, "grad_norm": 1.669281005859375, "learning_rate": 1e-06, "loss": 0.0771, "step": 734 }, { "epoch": 0.12512768130745658, "grad_norm": 1.7492468357086182, "learning_rate": 1e-06, "loss": 0.0777, "step": 735 }, { "epoch": 0.12529792305073204, "grad_norm": 1.7161555290222168, "learning_rate": 1e-06, "loss": 0.0898, "step": 736 }, { "epoch": 0.1254681647940075, "grad_norm": 1.7574137449264526, "learning_rate": 1e-06, "loss": 0.107, "step": 737 }, { "epoch": 0.12563840653728295, "grad_norm": 1.3838268518447876, "learning_rate": 1e-06, "loss": 0.0699, "step": 738 }, { "epoch": 0.1258086482805584, "grad_norm": 1.6737573146820068, "learning_rate": 1e-06, "loss": 0.0846, "step": 739 }, { "epoch": 0.12597889002383383, "grad_norm": 2.306833028793335, "learning_rate": 1e-06, "loss": 0.1018, "step": 740 }, { "epoch": 0.1261491317671093, "grad_norm": 2.311218738555908, "learning_rate": 1e-06, "loss": 0.1155, "step": 741 }, { "epoch": 0.12631937351038475, "grad_norm": 1.696584701538086, "learning_rate": 1e-06, "loss": 0.069, "step": 742 }, { "epoch": 0.1264896152536602, "grad_norm": 1.3152414560317993, "learning_rate": 1e-06, "loss": 0.0606, "step": 743 }, { "epoch": 0.12665985699693566, "grad_norm": 1.6591945886611938, "learning_rate": 1e-06, "loss": 0.0859, "step": 744 }, { "epoch": 0.1268300987402111, "grad_norm": 1.4438961744308472, "learning_rate": 1e-06, "loss": 0.0702, "step": 745 }, { "epoch": 0.12700034048348655, "grad_norm": 1.289958119392395, "learning_rate": 1e-06, "loss": 0.067, "step": 746 }, { "epoch": 0.127170582226762, "grad_norm": 2.529083251953125, "learning_rate": 1e-06, "loss": 0.1006, "step": 747 }, { "epoch": 0.12734082397003746, "grad_norm": 1.5549131631851196, "learning_rate": 1e-06, "loss": 0.0815, "step": 748 }, { "epoch": 0.12751106571331292, "grad_norm": 1.5461150407791138, "learning_rate": 1e-06, "loss": 0.0835, "step": 749 }, { "epoch": 0.12768130745658834, "grad_norm": 1.4177753925323486, "learning_rate": 1e-06, "loss": 0.072, "step": 750 }, { "epoch": 0.1278515491998638, "grad_norm": 1.6254005432128906, "learning_rate": 1e-06, "loss": 0.0788, "step": 751 }, { "epoch": 0.12802179094313926, "grad_norm": 1.7862988710403442, "learning_rate": 1e-06, "loss": 0.0813, "step": 752 }, { "epoch": 0.12819203268641471, "grad_norm": 1.7907599210739136, "learning_rate": 1e-06, "loss": 0.0889, "step": 753 }, { "epoch": 0.12836227442969017, "grad_norm": 1.9308052062988281, "learning_rate": 1e-06, "loss": 0.0851, "step": 754 }, { "epoch": 0.1285325161729656, "grad_norm": 1.512603998184204, "learning_rate": 1e-06, "loss": 0.0737, "step": 755 }, { "epoch": 0.12870275791624106, "grad_norm": 1.6706660985946655, "learning_rate": 1e-06, "loss": 0.0857, "step": 756 }, { "epoch": 0.1288729996595165, "grad_norm": 1.6297451257705688, "learning_rate": 1e-06, "loss": 0.0796, "step": 757 }, { "epoch": 0.12904324140279197, "grad_norm": 1.4156213998794556, "learning_rate": 1e-06, "loss": 0.0762, "step": 758 }, { "epoch": 0.12921348314606743, "grad_norm": 1.7496767044067383, "learning_rate": 1e-06, "loss": 0.0802, "step": 759 }, { "epoch": 0.12938372488934285, "grad_norm": 1.380969524383545, "learning_rate": 1e-06, "loss": 0.0757, "step": 760 }, { "epoch": 0.1295539666326183, "grad_norm": 1.8817622661590576, "learning_rate": 1e-06, "loss": 0.0812, "step": 761 }, { "epoch": 0.12972420837589377, "grad_norm": 1.3597335815429688, "learning_rate": 1e-06, "loss": 0.063, "step": 762 }, { "epoch": 0.12989445011916922, "grad_norm": 1.5356440544128418, "learning_rate": 1e-06, "loss": 0.077, "step": 763 }, { "epoch": 0.13006469186244468, "grad_norm": 1.3353973627090454, "learning_rate": 1e-06, "loss": 0.0552, "step": 764 }, { "epoch": 0.1302349336057201, "grad_norm": 1.738538384437561, "learning_rate": 1e-06, "loss": 0.066, "step": 765 }, { "epoch": 0.13040517534899557, "grad_norm": 1.3719581365585327, "learning_rate": 1e-06, "loss": 0.0606, "step": 766 }, { "epoch": 0.13057541709227102, "grad_norm": 2.158234119415283, "learning_rate": 1e-06, "loss": 0.0843, "step": 767 }, { "epoch": 0.13074565883554648, "grad_norm": 1.713789701461792, "learning_rate": 1e-06, "loss": 0.0808, "step": 768 }, { "epoch": 0.13091590057882194, "grad_norm": 1.5063124895095825, "learning_rate": 1e-06, "loss": 0.0769, "step": 769 }, { "epoch": 0.13108614232209737, "grad_norm": 1.3353931903839111, "learning_rate": 1e-06, "loss": 0.0698, "step": 770 }, { "epoch": 0.13125638406537282, "grad_norm": 1.8870093822479248, "learning_rate": 1e-06, "loss": 0.091, "step": 771 }, { "epoch": 0.13142662580864828, "grad_norm": 1.8463548421859741, "learning_rate": 1e-06, "loss": 0.0668, "step": 772 }, { "epoch": 0.13159686755192374, "grad_norm": 1.3892488479614258, "learning_rate": 1e-06, "loss": 0.0718, "step": 773 }, { "epoch": 0.1317671092951992, "grad_norm": 1.5559097528457642, "learning_rate": 1e-06, "loss": 0.0706, "step": 774 }, { "epoch": 0.13193735103847462, "grad_norm": 2.101165533065796, "learning_rate": 1e-06, "loss": 0.1133, "step": 775 }, { "epoch": 0.13210759278175008, "grad_norm": 1.7307274341583252, "learning_rate": 1e-06, "loss": 0.0663, "step": 776 }, { "epoch": 0.13227783452502553, "grad_norm": 1.909165382385254, "learning_rate": 1e-06, "loss": 0.0817, "step": 777 }, { "epoch": 0.132448076268301, "grad_norm": 1.6024906635284424, "learning_rate": 1e-06, "loss": 0.0757, "step": 778 }, { "epoch": 0.13261831801157645, "grad_norm": 1.6666183471679688, "learning_rate": 1e-06, "loss": 0.0841, "step": 779 }, { "epoch": 0.13278855975485188, "grad_norm": 2.6020991802215576, "learning_rate": 1e-06, "loss": 0.1077, "step": 780 }, { "epoch": 0.13295880149812733, "grad_norm": 1.5541698932647705, "learning_rate": 1e-06, "loss": 0.0802, "step": 781 }, { "epoch": 0.1331290432414028, "grad_norm": 1.538192868232727, "learning_rate": 1e-06, "loss": 0.0685, "step": 782 }, { "epoch": 0.13329928498467825, "grad_norm": 1.704047679901123, "learning_rate": 1e-06, "loss": 0.0652, "step": 783 }, { "epoch": 0.1334695267279537, "grad_norm": 1.5509108304977417, "learning_rate": 1e-06, "loss": 0.0736, "step": 784 }, { "epoch": 0.13363976847122916, "grad_norm": 1.4850906133651733, "learning_rate": 1e-06, "loss": 0.0599, "step": 785 }, { "epoch": 0.1338100102145046, "grad_norm": 1.46503484249115, "learning_rate": 1e-06, "loss": 0.0594, "step": 786 }, { "epoch": 0.13398025195778004, "grad_norm": 1.633912205696106, "learning_rate": 1e-06, "loss": 0.0699, "step": 787 }, { "epoch": 0.1341504937010555, "grad_norm": 1.7929328680038452, "learning_rate": 1e-06, "loss": 0.0616, "step": 788 }, { "epoch": 0.13432073544433096, "grad_norm": 1.710610270500183, "learning_rate": 1e-06, "loss": 0.0724, "step": 789 }, { "epoch": 0.13449097718760641, "grad_norm": 1.7763170003890991, "learning_rate": 1e-06, "loss": 0.0858, "step": 790 }, { "epoch": 0.13466121893088184, "grad_norm": 1.5168254375457764, "learning_rate": 1e-06, "loss": 0.0769, "step": 791 }, { "epoch": 0.1348314606741573, "grad_norm": 1.3891587257385254, "learning_rate": 1e-06, "loss": 0.0615, "step": 792 }, { "epoch": 0.13500170241743276, "grad_norm": 1.804847002029419, "learning_rate": 1e-06, "loss": 0.0925, "step": 793 }, { "epoch": 0.1351719441607082, "grad_norm": 1.5551631450653076, "learning_rate": 1e-06, "loss": 0.0702, "step": 794 }, { "epoch": 0.13534218590398367, "grad_norm": 1.4586671590805054, "learning_rate": 1e-06, "loss": 0.0745, "step": 795 }, { "epoch": 0.1355124276472591, "grad_norm": 1.7194534540176392, "learning_rate": 1e-06, "loss": 0.0944, "step": 796 }, { "epoch": 0.13568266939053455, "grad_norm": 1.5851624011993408, "learning_rate": 1e-06, "loss": 0.0713, "step": 797 }, { "epoch": 0.13585291113381, "grad_norm": 2.052922010421753, "learning_rate": 1e-06, "loss": 0.0922, "step": 798 }, { "epoch": 0.13602315287708547, "grad_norm": 1.4071455001831055, "learning_rate": 1e-06, "loss": 0.0655, "step": 799 }, { "epoch": 0.13619339462036092, "grad_norm": 1.8842096328735352, "learning_rate": 1e-06, "loss": 0.0725, "step": 800 }, { "epoch": 0.13636363636363635, "grad_norm": 1.692454218864441, "learning_rate": 1e-06, "loss": 0.0718, "step": 801 }, { "epoch": 0.1365338781069118, "grad_norm": 1.4987190961837769, "learning_rate": 1e-06, "loss": 0.0563, "step": 802 }, { "epoch": 0.13670411985018727, "grad_norm": 1.3979241847991943, "learning_rate": 1e-06, "loss": 0.0658, "step": 803 }, { "epoch": 0.13687436159346272, "grad_norm": 1.5798490047454834, "learning_rate": 1e-06, "loss": 0.0739, "step": 804 }, { "epoch": 0.13704460333673818, "grad_norm": 1.990831732749939, "learning_rate": 1e-06, "loss": 0.0759, "step": 805 }, { "epoch": 0.1372148450800136, "grad_norm": 1.9039353132247925, "learning_rate": 1e-06, "loss": 0.0583, "step": 806 }, { "epoch": 0.13738508682328907, "grad_norm": 1.76191246509552, "learning_rate": 1e-06, "loss": 0.0836, "step": 807 }, { "epoch": 0.13755532856656452, "grad_norm": 1.5676465034484863, "learning_rate": 1e-06, "loss": 0.062, "step": 808 }, { "epoch": 0.13772557030983998, "grad_norm": 1.9201511144638062, "learning_rate": 1e-06, "loss": 0.0705, "step": 809 }, { "epoch": 0.13789581205311544, "grad_norm": 1.5441217422485352, "learning_rate": 1e-06, "loss": 0.0594, "step": 810 }, { "epoch": 0.13806605379639086, "grad_norm": 1.9137436151504517, "learning_rate": 1e-06, "loss": 0.0745, "step": 811 }, { "epoch": 0.13823629553966632, "grad_norm": 1.3507276773452759, "learning_rate": 1e-06, "loss": 0.0656, "step": 812 }, { "epoch": 0.13840653728294178, "grad_norm": 1.6629807949066162, "learning_rate": 1e-06, "loss": 0.0721, "step": 813 }, { "epoch": 0.13857677902621723, "grad_norm": 1.6247719526290894, "learning_rate": 1e-06, "loss": 0.0717, "step": 814 }, { "epoch": 0.1387470207694927, "grad_norm": 1.6012877225875854, "learning_rate": 1e-06, "loss": 0.072, "step": 815 }, { "epoch": 0.13891726251276812, "grad_norm": 1.6676996946334839, "learning_rate": 1e-06, "loss": 0.0812, "step": 816 }, { "epoch": 0.13908750425604358, "grad_norm": 1.6991463899612427, "learning_rate": 1e-06, "loss": 0.0729, "step": 817 }, { "epoch": 0.13925774599931903, "grad_norm": 1.4762986898422241, "learning_rate": 1e-06, "loss": 0.0686, "step": 818 }, { "epoch": 0.1394279877425945, "grad_norm": 1.65053391456604, "learning_rate": 1e-06, "loss": 0.0795, "step": 819 }, { "epoch": 0.13959822948586995, "grad_norm": 1.437187910079956, "learning_rate": 1e-06, "loss": 0.0638, "step": 820 }, { "epoch": 0.13976847122914537, "grad_norm": 1.4831326007843018, "learning_rate": 1e-06, "loss": 0.075, "step": 821 }, { "epoch": 0.13993871297242083, "grad_norm": 1.3216532468795776, "learning_rate": 1e-06, "loss": 0.0532, "step": 822 }, { "epoch": 0.1401089547156963, "grad_norm": 1.9568979740142822, "learning_rate": 1e-06, "loss": 0.0817, "step": 823 }, { "epoch": 0.14027919645897174, "grad_norm": 1.7128760814666748, "learning_rate": 1e-06, "loss": 0.0717, "step": 824 }, { "epoch": 0.1404494382022472, "grad_norm": 1.5816516876220703, "learning_rate": 1e-06, "loss": 0.0662, "step": 825 }, { "epoch": 0.14061967994552263, "grad_norm": 1.5961077213287354, "learning_rate": 1e-06, "loss": 0.0748, "step": 826 }, { "epoch": 0.1407899216887981, "grad_norm": 1.8750134706497192, "learning_rate": 1e-06, "loss": 0.0588, "step": 827 }, { "epoch": 0.14096016343207354, "grad_norm": 1.8028770685195923, "learning_rate": 1e-06, "loss": 0.0777, "step": 828 }, { "epoch": 0.141130405175349, "grad_norm": 1.7617617845535278, "learning_rate": 1e-06, "loss": 0.0753, "step": 829 }, { "epoch": 0.14130064691862446, "grad_norm": 1.6493316888809204, "learning_rate": 1e-06, "loss": 0.0638, "step": 830 }, { "epoch": 0.14147088866189989, "grad_norm": 4.191693305969238, "learning_rate": 1e-06, "loss": 0.1091, "step": 831 }, { "epoch": 0.14164113040517534, "grad_norm": 1.787387728691101, "learning_rate": 1e-06, "loss": 0.0766, "step": 832 }, { "epoch": 0.1418113721484508, "grad_norm": 1.9080184698104858, "learning_rate": 1e-06, "loss": 0.0744, "step": 833 }, { "epoch": 0.14198161389172625, "grad_norm": 2.04909086227417, "learning_rate": 1e-06, "loss": 0.0637, "step": 834 }, { "epoch": 0.1421518556350017, "grad_norm": 1.588629126548767, "learning_rate": 1e-06, "loss": 0.0513, "step": 835 }, { "epoch": 0.14232209737827714, "grad_norm": 1.3033459186553955, "learning_rate": 1e-06, "loss": 0.0605, "step": 836 }, { "epoch": 0.1424923391215526, "grad_norm": 1.4227380752563477, "learning_rate": 1e-06, "loss": 0.0583, "step": 837 }, { "epoch": 0.14266258086482805, "grad_norm": 2.39058780670166, "learning_rate": 1e-06, "loss": 0.0931, "step": 838 }, { "epoch": 0.1428328226081035, "grad_norm": 1.6543277502059937, "learning_rate": 1e-06, "loss": 0.0752, "step": 839 }, { "epoch": 0.14300306435137897, "grad_norm": 1.3659149408340454, "learning_rate": 1e-06, "loss": 0.0439, "step": 840 }, { "epoch": 0.1431733060946544, "grad_norm": 1.9133633375167847, "learning_rate": 1e-06, "loss": 0.0874, "step": 841 }, { "epoch": 0.14334354783792985, "grad_norm": 2.2252583503723145, "learning_rate": 1e-06, "loss": 0.0994, "step": 842 }, { "epoch": 0.1435137895812053, "grad_norm": 1.5215318202972412, "learning_rate": 1e-06, "loss": 0.065, "step": 843 }, { "epoch": 0.14368403132448077, "grad_norm": 1.6508617401123047, "learning_rate": 1e-06, "loss": 0.0641, "step": 844 }, { "epoch": 0.14385427306775622, "grad_norm": 1.4455782175064087, "learning_rate": 1e-06, "loss": 0.0459, "step": 845 }, { "epoch": 0.14402451481103168, "grad_norm": 1.5651944875717163, "learning_rate": 1e-06, "loss": 0.0639, "step": 846 }, { "epoch": 0.1441947565543071, "grad_norm": 1.6415917873382568, "learning_rate": 1e-06, "loss": 0.057, "step": 847 }, { "epoch": 0.14436499829758256, "grad_norm": 1.7259793281555176, "learning_rate": 1e-06, "loss": 0.0627, "step": 848 }, { "epoch": 0.14453524004085802, "grad_norm": 1.8741511106491089, "learning_rate": 1e-06, "loss": 0.0695, "step": 849 }, { "epoch": 0.14470548178413348, "grad_norm": 1.6153074502944946, "learning_rate": 1e-06, "loss": 0.0753, "step": 850 }, { "epoch": 0.14487572352740893, "grad_norm": 1.6454153060913086, "learning_rate": 1e-06, "loss": 0.0708, "step": 851 }, { "epoch": 0.14504596527068436, "grad_norm": 2.058832883834839, "learning_rate": 1e-06, "loss": 0.0723, "step": 852 }, { "epoch": 0.14521620701395982, "grad_norm": 1.3613487482070923, "learning_rate": 1e-06, "loss": 0.0551, "step": 853 }, { "epoch": 0.14538644875723528, "grad_norm": 1.6068713665008545, "learning_rate": 1e-06, "loss": 0.0554, "step": 854 }, { "epoch": 0.14555669050051073, "grad_norm": 1.7198082208633423, "learning_rate": 1e-06, "loss": 0.071, "step": 855 }, { "epoch": 0.1457269322437862, "grad_norm": 1.3624064922332764, "learning_rate": 1e-06, "loss": 0.0504, "step": 856 }, { "epoch": 0.14589717398706162, "grad_norm": 1.5122796297073364, "learning_rate": 1e-06, "loss": 0.0606, "step": 857 }, { "epoch": 0.14606741573033707, "grad_norm": 1.4191750288009644, "learning_rate": 1e-06, "loss": 0.0609, "step": 858 }, { "epoch": 0.14623765747361253, "grad_norm": 1.983462929725647, "learning_rate": 1e-06, "loss": 0.0647, "step": 859 }, { "epoch": 0.146407899216888, "grad_norm": 1.5732680559158325, "learning_rate": 1e-06, "loss": 0.0594, "step": 860 }, { "epoch": 0.14657814096016344, "grad_norm": 1.5888450145721436, "learning_rate": 1e-06, "loss": 0.0609, "step": 861 }, { "epoch": 0.14674838270343887, "grad_norm": 1.727030873298645, "learning_rate": 1e-06, "loss": 0.0762, "step": 862 }, { "epoch": 0.14691862444671433, "grad_norm": 1.599660038948059, "learning_rate": 1e-06, "loss": 0.0706, "step": 863 }, { "epoch": 0.1470888661899898, "grad_norm": 1.5314996242523193, "learning_rate": 1e-06, "loss": 0.0706, "step": 864 }, { "epoch": 0.14725910793326524, "grad_norm": 1.532114863395691, "learning_rate": 1e-06, "loss": 0.0531, "step": 865 }, { "epoch": 0.1474293496765407, "grad_norm": 1.5699235200881958, "learning_rate": 1e-06, "loss": 0.0575, "step": 866 }, { "epoch": 0.14759959141981613, "grad_norm": 1.4962087869644165, "learning_rate": 1e-06, "loss": 0.048, "step": 867 }, { "epoch": 0.14776983316309159, "grad_norm": 1.6223524808883667, "learning_rate": 1e-06, "loss": 0.0557, "step": 868 }, { "epoch": 0.14794007490636704, "grad_norm": 1.379461407661438, "learning_rate": 1e-06, "loss": 0.0488, "step": 869 }, { "epoch": 0.1481103166496425, "grad_norm": 1.6299303770065308, "learning_rate": 1e-06, "loss": 0.0712, "step": 870 }, { "epoch": 0.14828055839291795, "grad_norm": 1.5951275825500488, "learning_rate": 1e-06, "loss": 0.0633, "step": 871 }, { "epoch": 0.14845080013619338, "grad_norm": 1.6737635135650635, "learning_rate": 1e-06, "loss": 0.0576, "step": 872 }, { "epoch": 0.14862104187946884, "grad_norm": 2.7894175052642822, "learning_rate": 1e-06, "loss": 0.0916, "step": 873 }, { "epoch": 0.1487912836227443, "grad_norm": 1.7583144903182983, "learning_rate": 1e-06, "loss": 0.0556, "step": 874 }, { "epoch": 0.14896152536601975, "grad_norm": 1.6645101308822632, "learning_rate": 1e-06, "loss": 0.0673, "step": 875 }, { "epoch": 0.1491317671092952, "grad_norm": 1.6808711290359497, "learning_rate": 1e-06, "loss": 0.0541, "step": 876 }, { "epoch": 0.14930200885257064, "grad_norm": 1.5654128789901733, "learning_rate": 1e-06, "loss": 0.0636, "step": 877 }, { "epoch": 0.1494722505958461, "grad_norm": 1.5253329277038574, "learning_rate": 1e-06, "loss": 0.0561, "step": 878 }, { "epoch": 0.14964249233912155, "grad_norm": 1.4865022897720337, "learning_rate": 1e-06, "loss": 0.0564, "step": 879 }, { "epoch": 0.149812734082397, "grad_norm": 1.4917443990707397, "learning_rate": 1e-06, "loss": 0.0575, "step": 880 }, { "epoch": 0.14998297582567247, "grad_norm": 1.7758963108062744, "learning_rate": 1e-06, "loss": 0.0681, "step": 881 }, { "epoch": 0.1501532175689479, "grad_norm": 1.369234323501587, "learning_rate": 1e-06, "loss": 0.0596, "step": 882 }, { "epoch": 0.15032345931222335, "grad_norm": 2.3506531715393066, "learning_rate": 1e-06, "loss": 0.083, "step": 883 }, { "epoch": 0.1504937010554988, "grad_norm": 1.8270442485809326, "learning_rate": 1e-06, "loss": 0.0722, "step": 884 }, { "epoch": 0.15066394279877426, "grad_norm": 1.7770593166351318, "learning_rate": 1e-06, "loss": 0.0672, "step": 885 }, { "epoch": 0.15083418454204972, "grad_norm": 1.7954052686691284, "learning_rate": 1e-06, "loss": 0.068, "step": 886 }, { "epoch": 0.15100442628532515, "grad_norm": 1.5579285621643066, "learning_rate": 1e-06, "loss": 0.0665, "step": 887 }, { "epoch": 0.1511746680286006, "grad_norm": 1.6267443895339966, "learning_rate": 1e-06, "loss": 0.0616, "step": 888 }, { "epoch": 0.15134490977187606, "grad_norm": 1.9561134576797485, "learning_rate": 1e-06, "loss": 0.0672, "step": 889 }, { "epoch": 0.15151515151515152, "grad_norm": 1.592724084854126, "learning_rate": 1e-06, "loss": 0.0641, "step": 890 }, { "epoch": 0.15168539325842698, "grad_norm": 1.448255181312561, "learning_rate": 1e-06, "loss": 0.0499, "step": 891 }, { "epoch": 0.1518556350017024, "grad_norm": 1.7166950702667236, "learning_rate": 1e-06, "loss": 0.0724, "step": 892 }, { "epoch": 0.15202587674497786, "grad_norm": 2.7402710914611816, "learning_rate": 1e-06, "loss": 0.0981, "step": 893 }, { "epoch": 0.15219611848825332, "grad_norm": 1.9034618139266968, "learning_rate": 1e-06, "loss": 0.0655, "step": 894 }, { "epoch": 0.15236636023152877, "grad_norm": 1.6078972816467285, "learning_rate": 1e-06, "loss": 0.0545, "step": 895 }, { "epoch": 0.15253660197480423, "grad_norm": 3.4988811016082764, "learning_rate": 1e-06, "loss": 0.0725, "step": 896 }, { "epoch": 0.15270684371807966, "grad_norm": 1.6912193298339844, "learning_rate": 1e-06, "loss": 0.0617, "step": 897 }, { "epoch": 0.15287708546135512, "grad_norm": 1.5782009363174438, "learning_rate": 1e-06, "loss": 0.0454, "step": 898 }, { "epoch": 0.15304732720463057, "grad_norm": 2.1376571655273438, "learning_rate": 1e-06, "loss": 0.1007, "step": 899 }, { "epoch": 0.15321756894790603, "grad_norm": 1.8803651332855225, "learning_rate": 1e-06, "loss": 0.0655, "step": 900 }, { "epoch": 0.1533878106911815, "grad_norm": 1.6837856769561768, "learning_rate": 1e-06, "loss": 0.07, "step": 901 }, { "epoch": 0.15355805243445692, "grad_norm": 1.5627678632736206, "learning_rate": 1e-06, "loss": 0.0583, "step": 902 }, { "epoch": 0.15372829417773237, "grad_norm": 1.413833498954773, "learning_rate": 1e-06, "loss": 0.05, "step": 903 }, { "epoch": 0.15389853592100783, "grad_norm": 2.4595329761505127, "learning_rate": 1e-06, "loss": 0.1166, "step": 904 }, { "epoch": 0.15406877766428329, "grad_norm": 4.622979640960693, "learning_rate": 1e-06, "loss": 0.102, "step": 905 }, { "epoch": 0.15423901940755874, "grad_norm": 1.9842865467071533, "learning_rate": 1e-06, "loss": 0.0704, "step": 906 }, { "epoch": 0.1544092611508342, "grad_norm": 1.6348425149917603, "learning_rate": 1e-06, "loss": 0.0519, "step": 907 }, { "epoch": 0.15457950289410963, "grad_norm": 1.920792579650879, "learning_rate": 1e-06, "loss": 0.0644, "step": 908 }, { "epoch": 0.15474974463738508, "grad_norm": 2.1553070545196533, "learning_rate": 1e-06, "loss": 0.0698, "step": 909 }, { "epoch": 0.15491998638066054, "grad_norm": 1.422676920890808, "learning_rate": 1e-06, "loss": 0.0542, "step": 910 }, { "epoch": 0.155090228123936, "grad_norm": 1.8286123275756836, "learning_rate": 1e-06, "loss": 0.0608, "step": 911 }, { "epoch": 0.15526046986721145, "grad_norm": 1.6634122133255005, "learning_rate": 1e-06, "loss": 0.0581, "step": 912 }, { "epoch": 0.15543071161048688, "grad_norm": 1.5610778331756592, "learning_rate": 1e-06, "loss": 0.0667, "step": 913 }, { "epoch": 0.15560095335376234, "grad_norm": 1.7075679302215576, "learning_rate": 1e-06, "loss": 0.0587, "step": 914 }, { "epoch": 0.1557711950970378, "grad_norm": 1.6121772527694702, "learning_rate": 1e-06, "loss": 0.0511, "step": 915 }, { "epoch": 0.15594143684031325, "grad_norm": 1.7952123880386353, "learning_rate": 1e-06, "loss": 0.0716, "step": 916 }, { "epoch": 0.1561116785835887, "grad_norm": 1.8294349908828735, "learning_rate": 1e-06, "loss": 0.0611, "step": 917 }, { "epoch": 0.15628192032686414, "grad_norm": 1.8930115699768066, "learning_rate": 1e-06, "loss": 0.083, "step": 918 }, { "epoch": 0.1564521620701396, "grad_norm": 1.7695558071136475, "learning_rate": 1e-06, "loss": 0.0718, "step": 919 }, { "epoch": 0.15662240381341505, "grad_norm": 1.6812986135482788, "learning_rate": 1e-06, "loss": 0.0709, "step": 920 }, { "epoch": 0.1567926455566905, "grad_norm": 1.6664036512374878, "learning_rate": 1e-06, "loss": 0.0609, "step": 921 }, { "epoch": 0.15696288729996596, "grad_norm": 1.7315361499786377, "learning_rate": 1e-06, "loss": 0.0638, "step": 922 }, { "epoch": 0.1571331290432414, "grad_norm": 1.693820595741272, "learning_rate": 1e-06, "loss": 0.0604, "step": 923 }, { "epoch": 0.15730337078651685, "grad_norm": 1.9926408529281616, "learning_rate": 1e-06, "loss": 0.065, "step": 924 }, { "epoch": 0.1574736125297923, "grad_norm": 1.3008970022201538, "learning_rate": 1e-06, "loss": 0.0424, "step": 925 }, { "epoch": 0.15764385427306776, "grad_norm": 1.7588164806365967, "learning_rate": 1e-06, "loss": 0.0569, "step": 926 }, { "epoch": 0.15781409601634322, "grad_norm": 1.7521356344223022, "learning_rate": 1e-06, "loss": 0.057, "step": 927 }, { "epoch": 0.15798433775961865, "grad_norm": 1.6399370431900024, "learning_rate": 1e-06, "loss": 0.055, "step": 928 }, { "epoch": 0.1581545795028941, "grad_norm": 1.6132686138153076, "learning_rate": 1e-06, "loss": 0.062, "step": 929 }, { "epoch": 0.15832482124616956, "grad_norm": 1.8911019563674927, "learning_rate": 1e-06, "loss": 0.0673, "step": 930 }, { "epoch": 0.15849506298944502, "grad_norm": 1.8688396215438843, "learning_rate": 1e-06, "loss": 0.0593, "step": 931 }, { "epoch": 0.15866530473272047, "grad_norm": 1.948891282081604, "learning_rate": 1e-06, "loss": 0.0717, "step": 932 }, { "epoch": 0.1588355464759959, "grad_norm": 1.5135903358459473, "learning_rate": 1e-06, "loss": 0.0538, "step": 933 }, { "epoch": 0.15900578821927136, "grad_norm": 1.983309030532837, "learning_rate": 1e-06, "loss": 0.0656, "step": 934 }, { "epoch": 0.15917602996254682, "grad_norm": 2.004861354827881, "learning_rate": 1e-06, "loss": 0.0662, "step": 935 }, { "epoch": 0.15934627170582227, "grad_norm": 1.3474713563919067, "learning_rate": 1e-06, "loss": 0.0607, "step": 936 }, { "epoch": 0.15951651344909773, "grad_norm": 1.5540618896484375, "learning_rate": 1e-06, "loss": 0.0607, "step": 937 }, { "epoch": 0.15968675519237316, "grad_norm": 1.8663222789764404, "learning_rate": 1e-06, "loss": 0.0694, "step": 938 }, { "epoch": 0.15985699693564862, "grad_norm": 1.5832575559616089, "learning_rate": 1e-06, "loss": 0.0722, "step": 939 }, { "epoch": 0.16002723867892407, "grad_norm": 1.6708149909973145, "learning_rate": 1e-06, "loss": 0.0715, "step": 940 }, { "epoch": 0.16019748042219953, "grad_norm": 1.7680753469467163, "learning_rate": 1e-06, "loss": 0.0586, "step": 941 }, { "epoch": 0.16036772216547499, "grad_norm": 1.8743613958358765, "learning_rate": 1e-06, "loss": 0.0568, "step": 942 }, { "epoch": 0.16053796390875041, "grad_norm": 4.353957176208496, "learning_rate": 1e-06, "loss": 0.1204, "step": 943 }, { "epoch": 0.16070820565202587, "grad_norm": 1.36697518825531, "learning_rate": 1e-06, "loss": 0.0462, "step": 944 }, { "epoch": 0.16087844739530133, "grad_norm": 1.474086046218872, "learning_rate": 1e-06, "loss": 0.0508, "step": 945 }, { "epoch": 0.16104868913857678, "grad_norm": 1.7142692804336548, "learning_rate": 1e-06, "loss": 0.0528, "step": 946 }, { "epoch": 0.16121893088185224, "grad_norm": 2.086660623550415, "learning_rate": 1e-06, "loss": 0.055, "step": 947 }, { "epoch": 0.16138917262512767, "grad_norm": 2.088574171066284, "learning_rate": 1e-06, "loss": 0.0755, "step": 948 }, { "epoch": 0.16155941436840313, "grad_norm": 1.7473136186599731, "learning_rate": 1e-06, "loss": 0.0598, "step": 949 }, { "epoch": 0.16172965611167858, "grad_norm": 1.3327255249023438, "learning_rate": 1e-06, "loss": 0.049, "step": 950 }, { "epoch": 0.16189989785495404, "grad_norm": 3.631272792816162, "learning_rate": 1e-06, "loss": 0.084, "step": 951 }, { "epoch": 0.1620701395982295, "grad_norm": 1.5777565240859985, "learning_rate": 1e-06, "loss": 0.0448, "step": 952 }, { "epoch": 0.16224038134150492, "grad_norm": 2.02554988861084, "learning_rate": 1e-06, "loss": 0.0661, "step": 953 }, { "epoch": 0.16241062308478038, "grad_norm": 1.5341172218322754, "learning_rate": 1e-06, "loss": 0.0508, "step": 954 }, { "epoch": 0.16258086482805584, "grad_norm": 1.34352707862854, "learning_rate": 1e-06, "loss": 0.0397, "step": 955 }, { "epoch": 0.1627511065713313, "grad_norm": 1.5781581401824951, "learning_rate": 1e-06, "loss": 0.0613, "step": 956 }, { "epoch": 0.16292134831460675, "grad_norm": 1.7862120866775513, "learning_rate": 1e-06, "loss": 0.0512, "step": 957 }, { "epoch": 0.16309159005788218, "grad_norm": 2.01899790763855, "learning_rate": 1e-06, "loss": 0.0689, "step": 958 }, { "epoch": 0.16326183180115764, "grad_norm": 1.6229596138000488, "learning_rate": 1e-06, "loss": 0.0539, "step": 959 }, { "epoch": 0.1634320735444331, "grad_norm": 1.663812279701233, "learning_rate": 1e-06, "loss": 0.0629, "step": 960 }, { "epoch": 0.16360231528770855, "grad_norm": 1.2906622886657715, "learning_rate": 1e-06, "loss": 0.0541, "step": 961 }, { "epoch": 0.163772557030984, "grad_norm": 1.674870491027832, "learning_rate": 1e-06, "loss": 0.0665, "step": 962 }, { "epoch": 0.16394279877425944, "grad_norm": 1.584000587463379, "learning_rate": 1e-06, "loss": 0.0559, "step": 963 }, { "epoch": 0.1641130405175349, "grad_norm": 1.8097106218338013, "learning_rate": 1e-06, "loss": 0.0663, "step": 964 }, { "epoch": 0.16428328226081035, "grad_norm": 1.6824593544006348, "learning_rate": 1e-06, "loss": 0.0592, "step": 965 }, { "epoch": 0.1644535240040858, "grad_norm": 1.6757420301437378, "learning_rate": 1e-06, "loss": 0.0565, "step": 966 }, { "epoch": 0.16462376574736126, "grad_norm": 1.91703462600708, "learning_rate": 1e-06, "loss": 0.0617, "step": 967 }, { "epoch": 0.1647940074906367, "grad_norm": 1.8160761594772339, "learning_rate": 1e-06, "loss": 0.0579, "step": 968 }, { "epoch": 0.16496424923391215, "grad_norm": 1.3976374864578247, "learning_rate": 1e-06, "loss": 0.0428, "step": 969 }, { "epoch": 0.1651344909771876, "grad_norm": 2.016111373901367, "learning_rate": 1e-06, "loss": 0.054, "step": 970 }, { "epoch": 0.16530473272046306, "grad_norm": 1.7259122133255005, "learning_rate": 1e-06, "loss": 0.0488, "step": 971 }, { "epoch": 0.16547497446373852, "grad_norm": 1.5745218992233276, "learning_rate": 1e-06, "loss": 0.0569, "step": 972 }, { "epoch": 0.16564521620701397, "grad_norm": 1.3785955905914307, "learning_rate": 1e-06, "loss": 0.0412, "step": 973 }, { "epoch": 0.1658154579502894, "grad_norm": 1.76412034034729, "learning_rate": 1e-06, "loss": 0.0533, "step": 974 }, { "epoch": 0.16598569969356486, "grad_norm": 1.6779992580413818, "learning_rate": 1e-06, "loss": 0.0636, "step": 975 }, { "epoch": 0.16615594143684032, "grad_norm": 1.5417025089263916, "learning_rate": 1e-06, "loss": 0.0477, "step": 976 }, { "epoch": 0.16632618318011577, "grad_norm": 1.7664419412612915, "learning_rate": 1e-06, "loss": 0.0628, "step": 977 }, { "epoch": 0.16649642492339123, "grad_norm": 1.905269980430603, "learning_rate": 1e-06, "loss": 0.0582, "step": 978 }, { "epoch": 0.16666666666666666, "grad_norm": 2.259796619415283, "learning_rate": 1e-06, "loss": 0.0721, "step": 979 }, { "epoch": 0.16683690840994211, "grad_norm": 1.6967008113861084, "learning_rate": 1e-06, "loss": 0.0517, "step": 980 }, { "epoch": 0.16700715015321757, "grad_norm": 2.18546986579895, "learning_rate": 1e-06, "loss": 0.0645, "step": 981 }, { "epoch": 0.16717739189649303, "grad_norm": 1.5567885637283325, "learning_rate": 1e-06, "loss": 0.0471, "step": 982 }, { "epoch": 0.16734763363976848, "grad_norm": 1.516837477684021, "learning_rate": 1e-06, "loss": 0.0442, "step": 983 }, { "epoch": 0.1675178753830439, "grad_norm": 1.4908742904663086, "learning_rate": 1e-06, "loss": 0.0524, "step": 984 }, { "epoch": 0.16768811712631937, "grad_norm": 1.570236086845398, "learning_rate": 1e-06, "loss": 0.054, "step": 985 }, { "epoch": 0.16785835886959483, "grad_norm": 1.672652006149292, "learning_rate": 1e-06, "loss": 0.0613, "step": 986 }, { "epoch": 0.16802860061287028, "grad_norm": 1.6871075630187988, "learning_rate": 1e-06, "loss": 0.0683, "step": 987 }, { "epoch": 0.16819884235614574, "grad_norm": 1.6366732120513916, "learning_rate": 1e-06, "loss": 0.0542, "step": 988 }, { "epoch": 0.16836908409942117, "grad_norm": 1.6381199359893799, "learning_rate": 1e-06, "loss": 0.0522, "step": 989 }, { "epoch": 0.16853932584269662, "grad_norm": 1.6033238172531128, "learning_rate": 1e-06, "loss": 0.0519, "step": 990 }, { "epoch": 0.16870956758597208, "grad_norm": 2.074885606765747, "learning_rate": 1e-06, "loss": 0.0668, "step": 991 }, { "epoch": 0.16887980932924754, "grad_norm": 1.8530831336975098, "learning_rate": 1e-06, "loss": 0.0569, "step": 992 }, { "epoch": 0.169050051072523, "grad_norm": 1.8304858207702637, "learning_rate": 1e-06, "loss": 0.0613, "step": 993 }, { "epoch": 0.16922029281579842, "grad_norm": 2.9935953617095947, "learning_rate": 1e-06, "loss": 0.0868, "step": 994 }, { "epoch": 0.16939053455907388, "grad_norm": 1.593682050704956, "learning_rate": 1e-06, "loss": 0.0525, "step": 995 }, { "epoch": 0.16956077630234934, "grad_norm": 1.4647144079208374, "learning_rate": 1e-06, "loss": 0.0539, "step": 996 }, { "epoch": 0.1697310180456248, "grad_norm": 1.6193076372146606, "learning_rate": 1e-06, "loss": 0.0556, "step": 997 }, { "epoch": 0.16990125978890025, "grad_norm": 1.3825421333312988, "learning_rate": 1e-06, "loss": 0.0485, "step": 998 }, { "epoch": 0.17007150153217568, "grad_norm": 1.6311777830123901, "learning_rate": 1e-06, "loss": 0.0604, "step": 999 }, { "epoch": 0.17024174327545114, "grad_norm": 1.859731912612915, "learning_rate": 1e-06, "loss": 0.0722, "step": 1000 }, { "epoch": 0.17024174327545114, "eval_loss": 0.262949138879776, "eval_runtime": 21.3587, "eval_samples_per_second": 14.046, "eval_steps_per_second": 0.375, "step": 1000 }, { "epoch": 0.1704119850187266, "grad_norm": 1.5789490938186646, "learning_rate": 1e-06, "loss": 0.0589, "step": 1001 }, { "epoch": 0.17058222676200205, "grad_norm": 1.7059580087661743, "learning_rate": 1e-06, "loss": 0.0505, "step": 1002 }, { "epoch": 0.1707524685052775, "grad_norm": 1.5325723886489868, "learning_rate": 1e-06, "loss": 0.0557, "step": 1003 }, { "epoch": 0.17092271024855293, "grad_norm": 1.3633655309677124, "learning_rate": 1e-06, "loss": 0.0467, "step": 1004 }, { "epoch": 0.1710929519918284, "grad_norm": 2.3447840213775635, "learning_rate": 1e-06, "loss": 0.0671, "step": 1005 }, { "epoch": 0.17126319373510385, "grad_norm": 1.9885766506195068, "learning_rate": 1e-06, "loss": 0.0615, "step": 1006 }, { "epoch": 0.1714334354783793, "grad_norm": 1.5275179147720337, "learning_rate": 1e-06, "loss": 0.0505, "step": 1007 }, { "epoch": 0.17160367722165476, "grad_norm": 1.6165765523910522, "learning_rate": 1e-06, "loss": 0.0465, "step": 1008 }, { "epoch": 0.1717739189649302, "grad_norm": 2.4540200233459473, "learning_rate": 1e-06, "loss": 0.0682, "step": 1009 }, { "epoch": 0.17194416070820565, "grad_norm": 1.5132991075515747, "learning_rate": 1e-06, "loss": 0.0498, "step": 1010 }, { "epoch": 0.1721144024514811, "grad_norm": 1.4574193954467773, "learning_rate": 1e-06, "loss": 0.0515, "step": 1011 }, { "epoch": 0.17228464419475656, "grad_norm": 2.0926284790039062, "learning_rate": 1e-06, "loss": 0.0676, "step": 1012 }, { "epoch": 0.17245488593803202, "grad_norm": 1.316934585571289, "learning_rate": 1e-06, "loss": 0.049, "step": 1013 }, { "epoch": 0.17262512768130744, "grad_norm": 1.4470518827438354, "learning_rate": 1e-06, "loss": 0.0527, "step": 1014 }, { "epoch": 0.1727953694245829, "grad_norm": 1.254742980003357, "learning_rate": 1e-06, "loss": 0.04, "step": 1015 }, { "epoch": 0.17296561116785836, "grad_norm": 1.6093502044677734, "learning_rate": 1e-06, "loss": 0.0575, "step": 1016 }, { "epoch": 0.17313585291113381, "grad_norm": 1.9037551879882812, "learning_rate": 1e-06, "loss": 0.0602, "step": 1017 }, { "epoch": 0.17330609465440927, "grad_norm": 1.4383158683776855, "learning_rate": 1e-06, "loss": 0.0481, "step": 1018 }, { "epoch": 0.1734763363976847, "grad_norm": 1.7174170017242432, "learning_rate": 1e-06, "loss": 0.0677, "step": 1019 }, { "epoch": 0.17364657814096016, "grad_norm": 1.838952660560608, "learning_rate": 1e-06, "loss": 0.0555, "step": 1020 }, { "epoch": 0.1738168198842356, "grad_norm": 1.9162826538085938, "learning_rate": 1e-06, "loss": 0.0619, "step": 1021 }, { "epoch": 0.17398706162751107, "grad_norm": 1.5161962509155273, "learning_rate": 1e-06, "loss": 0.048, "step": 1022 }, { "epoch": 0.17415730337078653, "grad_norm": 1.8026893138885498, "learning_rate": 1e-06, "loss": 0.0615, "step": 1023 }, { "epoch": 0.17432754511406195, "grad_norm": 1.637364387512207, "learning_rate": 1e-06, "loss": 0.0497, "step": 1024 }, { "epoch": 0.1744977868573374, "grad_norm": 1.7427301406860352, "learning_rate": 1e-06, "loss": 0.0467, "step": 1025 }, { "epoch": 0.17466802860061287, "grad_norm": 1.6015710830688477, "learning_rate": 1e-06, "loss": 0.0456, "step": 1026 }, { "epoch": 0.17483827034388832, "grad_norm": 1.5106914043426514, "learning_rate": 1e-06, "loss": 0.0583, "step": 1027 }, { "epoch": 0.17500851208716378, "grad_norm": 1.7485862970352173, "learning_rate": 1e-06, "loss": 0.0743, "step": 1028 }, { "epoch": 0.1751787538304392, "grad_norm": 1.5311837196350098, "learning_rate": 1e-06, "loss": 0.0555, "step": 1029 }, { "epoch": 0.17534899557371467, "grad_norm": 1.6525341272354126, "learning_rate": 1e-06, "loss": 0.0566, "step": 1030 }, { "epoch": 0.17551923731699012, "grad_norm": 1.3050777912139893, "learning_rate": 1e-06, "loss": 0.0454, "step": 1031 }, { "epoch": 0.17568947906026558, "grad_norm": 1.8630820512771606, "learning_rate": 1e-06, "loss": 0.0615, "step": 1032 }, { "epoch": 0.17585972080354104, "grad_norm": 1.974867582321167, "learning_rate": 1e-06, "loss": 0.0574, "step": 1033 }, { "epoch": 0.1760299625468165, "grad_norm": 1.6042343378067017, "learning_rate": 1e-06, "loss": 0.0469, "step": 1034 }, { "epoch": 0.17620020429009192, "grad_norm": 1.5605472326278687, "learning_rate": 1e-06, "loss": 0.0607, "step": 1035 }, { "epoch": 0.17637044603336738, "grad_norm": 1.6991108655929565, "learning_rate": 1e-06, "loss": 0.0458, "step": 1036 }, { "epoch": 0.17654068777664284, "grad_norm": 2.3888437747955322, "learning_rate": 1e-06, "loss": 0.0646, "step": 1037 }, { "epoch": 0.1767109295199183, "grad_norm": 1.9789478778839111, "learning_rate": 1e-06, "loss": 0.0657, "step": 1038 }, { "epoch": 0.17688117126319375, "grad_norm": 1.5386762619018555, "learning_rate": 1e-06, "loss": 0.0591, "step": 1039 }, { "epoch": 0.17705141300646918, "grad_norm": 1.657543659210205, "learning_rate": 1e-06, "loss": 0.0504, "step": 1040 }, { "epoch": 0.17722165474974463, "grad_norm": 1.5504556894302368, "learning_rate": 1e-06, "loss": 0.0467, "step": 1041 }, { "epoch": 0.1773918964930201, "grad_norm": 1.718420147895813, "learning_rate": 1e-06, "loss": 0.0559, "step": 1042 }, { "epoch": 0.17756213823629555, "grad_norm": 2.331953525543213, "learning_rate": 1e-06, "loss": 0.056, "step": 1043 }, { "epoch": 0.177732379979571, "grad_norm": 1.6991020441055298, "learning_rate": 1e-06, "loss": 0.0474, "step": 1044 }, { "epoch": 0.17790262172284643, "grad_norm": 1.8949966430664062, "learning_rate": 1e-06, "loss": 0.043, "step": 1045 }, { "epoch": 0.1780728634661219, "grad_norm": 1.8064547777175903, "learning_rate": 1e-06, "loss": 0.0436, "step": 1046 }, { "epoch": 0.17824310520939735, "grad_norm": 1.4450627565383911, "learning_rate": 1e-06, "loss": 0.0453, "step": 1047 }, { "epoch": 0.1784133469526728, "grad_norm": 1.9606854915618896, "learning_rate": 1e-06, "loss": 0.052, "step": 1048 }, { "epoch": 0.17858358869594826, "grad_norm": 1.8185573816299438, "learning_rate": 1e-06, "loss": 0.0438, "step": 1049 }, { "epoch": 0.1787538304392237, "grad_norm": 1.8913239240646362, "learning_rate": 1e-06, "loss": 0.0456, "step": 1050 }, { "epoch": 0.17892407218249914, "grad_norm": 1.6773005723953247, "learning_rate": 1e-06, "loss": 0.0502, "step": 1051 }, { "epoch": 0.1790943139257746, "grad_norm": 1.6672918796539307, "learning_rate": 1e-06, "loss": 0.0471, "step": 1052 }, { "epoch": 0.17926455566905006, "grad_norm": 1.8581830263137817, "learning_rate": 1e-06, "loss": 0.0539, "step": 1053 }, { "epoch": 0.17943479741232551, "grad_norm": 1.4864681959152222, "learning_rate": 1e-06, "loss": 0.0546, "step": 1054 }, { "epoch": 0.17960503915560094, "grad_norm": 1.527038812637329, "learning_rate": 1e-06, "loss": 0.0417, "step": 1055 }, { "epoch": 0.1797752808988764, "grad_norm": 1.6635370254516602, "learning_rate": 1e-06, "loss": 0.0457, "step": 1056 }, { "epoch": 0.17994552264215186, "grad_norm": 1.5412622690200806, "learning_rate": 1e-06, "loss": 0.0437, "step": 1057 }, { "epoch": 0.1801157643854273, "grad_norm": 1.4829204082489014, "learning_rate": 1e-06, "loss": 0.0493, "step": 1058 }, { "epoch": 0.18028600612870277, "grad_norm": 1.4343328475952148, "learning_rate": 1e-06, "loss": 0.0405, "step": 1059 }, { "epoch": 0.1804562478719782, "grad_norm": 1.7797609567642212, "learning_rate": 1e-06, "loss": 0.0464, "step": 1060 }, { "epoch": 0.18062648961525365, "grad_norm": 1.488383412361145, "learning_rate": 1e-06, "loss": 0.0355, "step": 1061 }, { "epoch": 0.1807967313585291, "grad_norm": 1.3695396184921265, "learning_rate": 1e-06, "loss": 0.048, "step": 1062 }, { "epoch": 0.18096697310180457, "grad_norm": 1.8657076358795166, "learning_rate": 1e-06, "loss": 0.0478, "step": 1063 }, { "epoch": 0.18113721484508002, "grad_norm": 1.7366667985916138, "learning_rate": 1e-06, "loss": 0.0429, "step": 1064 }, { "epoch": 0.18130745658835545, "grad_norm": 1.4432430267333984, "learning_rate": 1e-06, "loss": 0.0451, "step": 1065 }, { "epoch": 0.1814776983316309, "grad_norm": 1.6333496570587158, "learning_rate": 1e-06, "loss": 0.0612, "step": 1066 }, { "epoch": 0.18164794007490637, "grad_norm": 2.0347399711608887, "learning_rate": 1e-06, "loss": 0.0544, "step": 1067 }, { "epoch": 0.18181818181818182, "grad_norm": 2.103520393371582, "learning_rate": 1e-06, "loss": 0.0528, "step": 1068 }, { "epoch": 0.18198842356145728, "grad_norm": 2.289806604385376, "learning_rate": 1e-06, "loss": 0.0714, "step": 1069 }, { "epoch": 0.1821586653047327, "grad_norm": 1.8326396942138672, "learning_rate": 1e-06, "loss": 0.0477, "step": 1070 }, { "epoch": 0.18232890704800817, "grad_norm": 1.6226252317428589, "learning_rate": 1e-06, "loss": 0.0596, "step": 1071 }, { "epoch": 0.18249914879128362, "grad_norm": 1.6972850561141968, "learning_rate": 1e-06, "loss": 0.0555, "step": 1072 }, { "epoch": 0.18266939053455908, "grad_norm": 1.7216845750808716, "learning_rate": 1e-06, "loss": 0.0727, "step": 1073 }, { "epoch": 0.18283963227783454, "grad_norm": 1.396399974822998, "learning_rate": 1e-06, "loss": 0.0451, "step": 1074 }, { "epoch": 0.18300987402110996, "grad_norm": 1.5802775621414185, "learning_rate": 1e-06, "loss": 0.0523, "step": 1075 }, { "epoch": 0.18318011576438542, "grad_norm": 1.5000909566879272, "learning_rate": 1e-06, "loss": 0.0503, "step": 1076 }, { "epoch": 0.18335035750766088, "grad_norm": 1.5903329849243164, "learning_rate": 1e-06, "loss": 0.0453, "step": 1077 }, { "epoch": 0.18352059925093633, "grad_norm": 1.3907889127731323, "learning_rate": 1e-06, "loss": 0.0482, "step": 1078 }, { "epoch": 0.1836908409942118, "grad_norm": 1.4356728792190552, "learning_rate": 1e-06, "loss": 0.0358, "step": 1079 }, { "epoch": 0.18386108273748722, "grad_norm": 1.425837755203247, "learning_rate": 1e-06, "loss": 0.0481, "step": 1080 }, { "epoch": 0.18403132448076268, "grad_norm": 1.793095588684082, "learning_rate": 1e-06, "loss": 0.0577, "step": 1081 }, { "epoch": 0.18420156622403813, "grad_norm": 1.5674705505371094, "learning_rate": 1e-06, "loss": 0.0405, "step": 1082 }, { "epoch": 0.1843718079673136, "grad_norm": 1.527916431427002, "learning_rate": 1e-06, "loss": 0.0394, "step": 1083 }, { "epoch": 0.18454204971058905, "grad_norm": 1.8252404928207397, "learning_rate": 1e-06, "loss": 0.0506, "step": 1084 }, { "epoch": 0.18471229145386447, "grad_norm": 1.6396743059158325, "learning_rate": 1e-06, "loss": 0.0462, "step": 1085 }, { "epoch": 0.18488253319713993, "grad_norm": 1.7540291547775269, "learning_rate": 1e-06, "loss": 0.051, "step": 1086 }, { "epoch": 0.1850527749404154, "grad_norm": 1.7107782363891602, "learning_rate": 1e-06, "loss": 0.0552, "step": 1087 }, { "epoch": 0.18522301668369084, "grad_norm": 1.7277930974960327, "learning_rate": 1e-06, "loss": 0.0558, "step": 1088 }, { "epoch": 0.1853932584269663, "grad_norm": 1.531584620475769, "learning_rate": 1e-06, "loss": 0.0436, "step": 1089 }, { "epoch": 0.18556350017024173, "grad_norm": 1.708498477935791, "learning_rate": 1e-06, "loss": 0.0599, "step": 1090 }, { "epoch": 0.1857337419135172, "grad_norm": 1.5215576887130737, "learning_rate": 1e-06, "loss": 0.0413, "step": 1091 }, { "epoch": 0.18590398365679264, "grad_norm": 1.6072683334350586, "learning_rate": 1e-06, "loss": 0.0472, "step": 1092 }, { "epoch": 0.1860742254000681, "grad_norm": 2.161273717880249, "learning_rate": 1e-06, "loss": 0.053, "step": 1093 }, { "epoch": 0.18624446714334356, "grad_norm": 2.1874208450317383, "learning_rate": 1e-06, "loss": 0.0676, "step": 1094 }, { "epoch": 0.186414708886619, "grad_norm": 1.7869967222213745, "learning_rate": 1e-06, "loss": 0.0566, "step": 1095 }, { "epoch": 0.18658495062989444, "grad_norm": 1.7522093057632446, "learning_rate": 1e-06, "loss": 0.0482, "step": 1096 }, { "epoch": 0.1867551923731699, "grad_norm": 1.5887916088104248, "learning_rate": 1e-06, "loss": 0.0439, "step": 1097 }, { "epoch": 0.18692543411644535, "grad_norm": 1.3974252939224243, "learning_rate": 1e-06, "loss": 0.0413, "step": 1098 }, { "epoch": 0.1870956758597208, "grad_norm": 1.798572301864624, "learning_rate": 1e-06, "loss": 0.0612, "step": 1099 }, { "epoch": 0.18726591760299627, "grad_norm": 2.0376672744750977, "learning_rate": 1e-06, "loss": 0.0512, "step": 1100 }, { "epoch": 0.1874361593462717, "grad_norm": 1.7600435018539429, "learning_rate": 1e-06, "loss": 0.0516, "step": 1101 }, { "epoch": 0.18760640108954715, "grad_norm": 1.638627529144287, "learning_rate": 1e-06, "loss": 0.0438, "step": 1102 }, { "epoch": 0.1877766428328226, "grad_norm": 1.4002454280853271, "learning_rate": 1e-06, "loss": 0.0406, "step": 1103 }, { "epoch": 0.18794688457609807, "grad_norm": 1.3637229204177856, "learning_rate": 1e-06, "loss": 0.0363, "step": 1104 }, { "epoch": 0.18811712631937352, "grad_norm": 1.7648390531539917, "learning_rate": 1e-06, "loss": 0.0491, "step": 1105 }, { "epoch": 0.18828736806264895, "grad_norm": 1.8793604373931885, "learning_rate": 1e-06, "loss": 0.0658, "step": 1106 }, { "epoch": 0.1884576098059244, "grad_norm": 1.87686288356781, "learning_rate": 1e-06, "loss": 0.0495, "step": 1107 }, { "epoch": 0.18862785154919987, "grad_norm": 1.5583778619766235, "learning_rate": 1e-06, "loss": 0.047, "step": 1108 }, { "epoch": 0.18879809329247532, "grad_norm": 1.7888959646224976, "learning_rate": 1e-06, "loss": 0.0373, "step": 1109 }, { "epoch": 0.18896833503575078, "grad_norm": 1.628745436668396, "learning_rate": 1e-06, "loss": 0.0405, "step": 1110 }, { "epoch": 0.1891385767790262, "grad_norm": 2.113985776901245, "learning_rate": 1e-06, "loss": 0.0488, "step": 1111 }, { "epoch": 0.18930881852230166, "grad_norm": 2.1266591548919678, "learning_rate": 1e-06, "loss": 0.058, "step": 1112 }, { "epoch": 0.18947906026557712, "grad_norm": 1.9797148704528809, "learning_rate": 1e-06, "loss": 0.0585, "step": 1113 }, { "epoch": 0.18964930200885258, "grad_norm": 1.6259199380874634, "learning_rate": 1e-06, "loss": 0.0519, "step": 1114 }, { "epoch": 0.18981954375212803, "grad_norm": 1.7262063026428223, "learning_rate": 1e-06, "loss": 0.038, "step": 1115 }, { "epoch": 0.18998978549540346, "grad_norm": 1.5812803506851196, "learning_rate": 1e-06, "loss": 0.0487, "step": 1116 }, { "epoch": 0.19016002723867892, "grad_norm": 2.021449327468872, "learning_rate": 1e-06, "loss": 0.057, "step": 1117 }, { "epoch": 0.19033026898195438, "grad_norm": 1.7292096614837646, "learning_rate": 1e-06, "loss": 0.0379, "step": 1118 }, { "epoch": 0.19050051072522983, "grad_norm": 1.4616914987564087, "learning_rate": 1e-06, "loss": 0.0401, "step": 1119 }, { "epoch": 0.1906707524685053, "grad_norm": 1.4097241163253784, "learning_rate": 1e-06, "loss": 0.0463, "step": 1120 }, { "epoch": 0.19084099421178072, "grad_norm": 1.8328516483306885, "learning_rate": 1e-06, "loss": 0.0454, "step": 1121 }, { "epoch": 0.19101123595505617, "grad_norm": 2.055018663406372, "learning_rate": 1e-06, "loss": 0.0606, "step": 1122 }, { "epoch": 0.19118147769833163, "grad_norm": 1.4977216720581055, "learning_rate": 1e-06, "loss": 0.0347, "step": 1123 }, { "epoch": 0.1913517194416071, "grad_norm": 1.8649815320968628, "learning_rate": 1e-06, "loss": 0.0484, "step": 1124 }, { "epoch": 0.19152196118488254, "grad_norm": 1.5965502262115479, "learning_rate": 1e-06, "loss": 0.0413, "step": 1125 }, { "epoch": 0.19169220292815797, "grad_norm": 1.7801200151443481, "learning_rate": 1e-06, "loss": 0.0462, "step": 1126 }, { "epoch": 0.19186244467143343, "grad_norm": 5.706264495849609, "learning_rate": 1e-06, "loss": 0.1093, "step": 1127 }, { "epoch": 0.1920326864147089, "grad_norm": 1.6867637634277344, "learning_rate": 1e-06, "loss": 0.0417, "step": 1128 }, { "epoch": 0.19220292815798434, "grad_norm": 1.7014727592468262, "learning_rate": 1e-06, "loss": 0.0461, "step": 1129 }, { "epoch": 0.1923731699012598, "grad_norm": 1.6920650005340576, "learning_rate": 1e-06, "loss": 0.0513, "step": 1130 }, { "epoch": 0.19254341164453523, "grad_norm": 1.7450604438781738, "learning_rate": 1e-06, "loss": 0.0562, "step": 1131 }, { "epoch": 0.19271365338781068, "grad_norm": 1.501325011253357, "learning_rate": 1e-06, "loss": 0.0447, "step": 1132 }, { "epoch": 0.19288389513108614, "grad_norm": 1.8626445531845093, "learning_rate": 1e-06, "loss": 0.0755, "step": 1133 }, { "epoch": 0.1930541368743616, "grad_norm": 1.3214149475097656, "learning_rate": 1e-06, "loss": 0.0405, "step": 1134 }, { "epoch": 0.19322437861763705, "grad_norm": 1.639206886291504, "learning_rate": 1e-06, "loss": 0.0486, "step": 1135 }, { "epoch": 0.19339462036091248, "grad_norm": 1.6951420307159424, "learning_rate": 1e-06, "loss": 0.0458, "step": 1136 }, { "epoch": 0.19356486210418794, "grad_norm": 1.5768959522247314, "learning_rate": 1e-06, "loss": 0.0466, "step": 1137 }, { "epoch": 0.1937351038474634, "grad_norm": 1.6302675008773804, "learning_rate": 1e-06, "loss": 0.0413, "step": 1138 }, { "epoch": 0.19390534559073885, "grad_norm": 1.4018832445144653, "learning_rate": 1e-06, "loss": 0.0429, "step": 1139 }, { "epoch": 0.1940755873340143, "grad_norm": 1.482530951499939, "learning_rate": 1e-06, "loss": 0.0342, "step": 1140 }, { "epoch": 0.19424582907728974, "grad_norm": 1.4372907876968384, "learning_rate": 1e-06, "loss": 0.0384, "step": 1141 }, { "epoch": 0.1944160708205652, "grad_norm": 2.328382730484009, "learning_rate": 1e-06, "loss": 0.0549, "step": 1142 }, { "epoch": 0.19458631256384065, "grad_norm": 1.780719518661499, "learning_rate": 1e-06, "loss": 0.0468, "step": 1143 }, { "epoch": 0.1947565543071161, "grad_norm": 1.8703736066818237, "learning_rate": 1e-06, "loss": 0.044, "step": 1144 }, { "epoch": 0.19492679605039157, "grad_norm": 1.92543625831604, "learning_rate": 1e-06, "loss": 0.0583, "step": 1145 }, { "epoch": 0.195097037793667, "grad_norm": 1.7063385248184204, "learning_rate": 1e-06, "loss": 0.0481, "step": 1146 }, { "epoch": 0.19526727953694245, "grad_norm": 1.2378170490264893, "learning_rate": 1e-06, "loss": 0.0309, "step": 1147 }, { "epoch": 0.1954375212802179, "grad_norm": 1.4159069061279297, "learning_rate": 1e-06, "loss": 0.0413, "step": 1148 }, { "epoch": 0.19560776302349336, "grad_norm": 1.816462516784668, "learning_rate": 1e-06, "loss": 0.0421, "step": 1149 }, { "epoch": 0.19577800476676882, "grad_norm": 1.6749504804611206, "learning_rate": 1e-06, "loss": 0.0514, "step": 1150 }, { "epoch": 0.19594824651004425, "grad_norm": 1.813492774963379, "learning_rate": 1e-06, "loss": 0.0387, "step": 1151 }, { "epoch": 0.1961184882533197, "grad_norm": 1.8329745531082153, "learning_rate": 1e-06, "loss": 0.0493, "step": 1152 }, { "epoch": 0.19628872999659516, "grad_norm": 2.6087090969085693, "learning_rate": 1e-06, "loss": 0.0479, "step": 1153 }, { "epoch": 0.19645897173987062, "grad_norm": 2.178959369659424, "learning_rate": 1e-06, "loss": 0.0515, "step": 1154 }, { "epoch": 0.19662921348314608, "grad_norm": 1.7682230472564697, "learning_rate": 1e-06, "loss": 0.0426, "step": 1155 }, { "epoch": 0.1967994552264215, "grad_norm": 1.9333738088607788, "learning_rate": 1e-06, "loss": 0.0395, "step": 1156 }, { "epoch": 0.19696969696969696, "grad_norm": 2.0627245903015137, "learning_rate": 1e-06, "loss": 0.0678, "step": 1157 }, { "epoch": 0.19713993871297242, "grad_norm": 1.649121642112732, "learning_rate": 1e-06, "loss": 0.0548, "step": 1158 }, { "epoch": 0.19731018045624787, "grad_norm": 1.3847073316574097, "learning_rate": 1e-06, "loss": 0.0336, "step": 1159 }, { "epoch": 0.19748042219952333, "grad_norm": 1.4557738304138184, "learning_rate": 1e-06, "loss": 0.0334, "step": 1160 }, { "epoch": 0.1976506639427988, "grad_norm": 1.7297767400741577, "learning_rate": 1e-06, "loss": 0.0404, "step": 1161 }, { "epoch": 0.19782090568607422, "grad_norm": 1.6031403541564941, "learning_rate": 1e-06, "loss": 0.0457, "step": 1162 }, { "epoch": 0.19799114742934967, "grad_norm": 1.4427376985549927, "learning_rate": 1e-06, "loss": 0.0394, "step": 1163 }, { "epoch": 0.19816138917262513, "grad_norm": 1.751774787902832, "learning_rate": 1e-06, "loss": 0.0394, "step": 1164 }, { "epoch": 0.1983316309159006, "grad_norm": 1.5437021255493164, "learning_rate": 1e-06, "loss": 0.0405, "step": 1165 }, { "epoch": 0.19850187265917604, "grad_norm": 1.793131947517395, "learning_rate": 1e-06, "loss": 0.0521, "step": 1166 }, { "epoch": 0.19867211440245147, "grad_norm": 1.4936705827713013, "learning_rate": 1e-06, "loss": 0.0365, "step": 1167 }, { "epoch": 0.19884235614572693, "grad_norm": 1.961836814880371, "learning_rate": 1e-06, "loss": 0.0466, "step": 1168 }, { "epoch": 0.19901259788900239, "grad_norm": 1.4718447923660278, "learning_rate": 1e-06, "loss": 0.0377, "step": 1169 }, { "epoch": 0.19918283963227784, "grad_norm": 1.747491717338562, "learning_rate": 1e-06, "loss": 0.0542, "step": 1170 }, { "epoch": 0.1993530813755533, "grad_norm": 1.4570775032043457, "learning_rate": 1e-06, "loss": 0.0378, "step": 1171 }, { "epoch": 0.19952332311882873, "grad_norm": 1.637116551399231, "learning_rate": 1e-06, "loss": 0.0397, "step": 1172 }, { "epoch": 0.19969356486210418, "grad_norm": 1.496801733970642, "learning_rate": 1e-06, "loss": 0.0526, "step": 1173 }, { "epoch": 0.19986380660537964, "grad_norm": 1.7981535196304321, "learning_rate": 1e-06, "loss": 0.0393, "step": 1174 }, { "epoch": 0.2000340483486551, "grad_norm": 1.7196568250656128, "learning_rate": 1e-06, "loss": 0.0409, "step": 1175 }, { "epoch": 0.20020429009193055, "grad_norm": 2.1747894287109375, "learning_rate": 1e-06, "loss": 0.0571, "step": 1176 }, { "epoch": 0.20037453183520598, "grad_norm": 1.8108248710632324, "learning_rate": 1e-06, "loss": 0.0495, "step": 1177 }, { "epoch": 0.20054477357848144, "grad_norm": 1.820467233657837, "learning_rate": 1e-06, "loss": 0.0341, "step": 1178 }, { "epoch": 0.2007150153217569, "grad_norm": 1.7773289680480957, "learning_rate": 1e-06, "loss": 0.0464, "step": 1179 }, { "epoch": 0.20088525706503235, "grad_norm": 1.433434247970581, "learning_rate": 1e-06, "loss": 0.0351, "step": 1180 }, { "epoch": 0.2010554988083078, "grad_norm": 1.836634874343872, "learning_rate": 1e-06, "loss": 0.0588, "step": 1181 }, { "epoch": 0.20122574055158324, "grad_norm": 1.8555299043655396, "learning_rate": 1e-06, "loss": 0.0517, "step": 1182 }, { "epoch": 0.2013959822948587, "grad_norm": 1.853201985359192, "learning_rate": 1e-06, "loss": 0.0396, "step": 1183 }, { "epoch": 0.20156622403813415, "grad_norm": 1.4348528385162354, "learning_rate": 1e-06, "loss": 0.0393, "step": 1184 }, { "epoch": 0.2017364657814096, "grad_norm": 1.7525590658187866, "learning_rate": 1e-06, "loss": 0.043, "step": 1185 }, { "epoch": 0.20190670752468506, "grad_norm": 1.9075602293014526, "learning_rate": 1e-06, "loss": 0.0399, "step": 1186 }, { "epoch": 0.2020769492679605, "grad_norm": 1.6843738555908203, "learning_rate": 1e-06, "loss": 0.0434, "step": 1187 }, { "epoch": 0.20224719101123595, "grad_norm": 1.3292150497436523, "learning_rate": 1e-06, "loss": 0.0267, "step": 1188 }, { "epoch": 0.2024174327545114, "grad_norm": 1.8276677131652832, "learning_rate": 1e-06, "loss": 0.0394, "step": 1189 }, { "epoch": 0.20258767449778686, "grad_norm": 1.8090132474899292, "learning_rate": 1e-06, "loss": 0.0448, "step": 1190 }, { "epoch": 0.20275791624106232, "grad_norm": 1.500030755996704, "learning_rate": 1e-06, "loss": 0.0251, "step": 1191 }, { "epoch": 0.20292815798433775, "grad_norm": 1.9649564027786255, "learning_rate": 1e-06, "loss": 0.0413, "step": 1192 }, { "epoch": 0.2030983997276132, "grad_norm": 1.6460673809051514, "learning_rate": 1e-06, "loss": 0.0322, "step": 1193 }, { "epoch": 0.20326864147088866, "grad_norm": 2.098881959915161, "learning_rate": 1e-06, "loss": 0.0381, "step": 1194 }, { "epoch": 0.20343888321416412, "grad_norm": 1.7853596210479736, "learning_rate": 1e-06, "loss": 0.0428, "step": 1195 }, { "epoch": 0.20360912495743957, "grad_norm": 2.36584210395813, "learning_rate": 1e-06, "loss": 0.0648, "step": 1196 }, { "epoch": 0.203779366700715, "grad_norm": 2.340277671813965, "learning_rate": 1e-06, "loss": 0.0431, "step": 1197 }, { "epoch": 0.20394960844399046, "grad_norm": 1.4898146390914917, "learning_rate": 1e-06, "loss": 0.0441, "step": 1198 }, { "epoch": 0.20411985018726592, "grad_norm": 1.8527204990386963, "learning_rate": 1e-06, "loss": 0.0483, "step": 1199 }, { "epoch": 0.20429009193054137, "grad_norm": 1.7014769315719604, "learning_rate": 1e-06, "loss": 0.0329, "step": 1200 }, { "epoch": 0.20446033367381683, "grad_norm": 1.4059642553329468, "learning_rate": 1e-06, "loss": 0.0355, "step": 1201 }, { "epoch": 0.20463057541709226, "grad_norm": 2.0433590412139893, "learning_rate": 1e-06, "loss": 0.0547, "step": 1202 }, { "epoch": 0.20480081716036772, "grad_norm": 1.3744386434555054, "learning_rate": 1e-06, "loss": 0.0304, "step": 1203 }, { "epoch": 0.20497105890364317, "grad_norm": 1.6091663837432861, "learning_rate": 1e-06, "loss": 0.0469, "step": 1204 }, { "epoch": 0.20514130064691863, "grad_norm": 1.5374170541763306, "learning_rate": 1e-06, "loss": 0.0378, "step": 1205 }, { "epoch": 0.20531154239019409, "grad_norm": 1.6638165712356567, "learning_rate": 1e-06, "loss": 0.04, "step": 1206 }, { "epoch": 0.2054817841334695, "grad_norm": 1.4747909307479858, "learning_rate": 1e-06, "loss": 0.05, "step": 1207 }, { "epoch": 0.20565202587674497, "grad_norm": 1.9081274271011353, "learning_rate": 1e-06, "loss": 0.0428, "step": 1208 }, { "epoch": 0.20582226762002043, "grad_norm": 1.4791818857192993, "learning_rate": 1e-06, "loss": 0.0359, "step": 1209 }, { "epoch": 0.20599250936329588, "grad_norm": 1.411188006401062, "learning_rate": 1e-06, "loss": 0.0325, "step": 1210 }, { "epoch": 0.20616275110657134, "grad_norm": 2.2119061946868896, "learning_rate": 1e-06, "loss": 0.0473, "step": 1211 }, { "epoch": 0.20633299284984677, "grad_norm": 1.4180293083190918, "learning_rate": 1e-06, "loss": 0.0399, "step": 1212 }, { "epoch": 0.20650323459312223, "grad_norm": 1.3908727169036865, "learning_rate": 1e-06, "loss": 0.0362, "step": 1213 }, { "epoch": 0.20667347633639768, "grad_norm": 2.12986421585083, "learning_rate": 1e-06, "loss": 0.0596, "step": 1214 }, { "epoch": 0.20684371807967314, "grad_norm": 1.529612421989441, "learning_rate": 1e-06, "loss": 0.0486, "step": 1215 }, { "epoch": 0.2070139598229486, "grad_norm": 1.533721685409546, "learning_rate": 1e-06, "loss": 0.0343, "step": 1216 }, { "epoch": 0.20718420156622402, "grad_norm": 1.1491042375564575, "learning_rate": 1e-06, "loss": 0.0301, "step": 1217 }, { "epoch": 0.20735444330949948, "grad_norm": 1.4477553367614746, "learning_rate": 1e-06, "loss": 0.0401, "step": 1218 }, { "epoch": 0.20752468505277494, "grad_norm": 1.9513334035873413, "learning_rate": 1e-06, "loss": 0.0453, "step": 1219 }, { "epoch": 0.2076949267960504, "grad_norm": 1.6228469610214233, "learning_rate": 1e-06, "loss": 0.0335, "step": 1220 }, { "epoch": 0.20786516853932585, "grad_norm": 1.736630916595459, "learning_rate": 1e-06, "loss": 0.0431, "step": 1221 }, { "epoch": 0.2080354102826013, "grad_norm": 1.7524003982543945, "learning_rate": 1e-06, "loss": 0.0354, "step": 1222 }, { "epoch": 0.20820565202587674, "grad_norm": 1.9606053829193115, "learning_rate": 1e-06, "loss": 0.0413, "step": 1223 }, { "epoch": 0.2083758937691522, "grad_norm": 1.4940640926361084, "learning_rate": 1e-06, "loss": 0.0351, "step": 1224 }, { "epoch": 0.20854613551242765, "grad_norm": 1.542036533355713, "learning_rate": 1e-06, "loss": 0.0445, "step": 1225 }, { "epoch": 0.2087163772557031, "grad_norm": 1.5856529474258423, "learning_rate": 1e-06, "loss": 0.0389, "step": 1226 }, { "epoch": 0.20888661899897856, "grad_norm": 1.537645697593689, "learning_rate": 1e-06, "loss": 0.0402, "step": 1227 }, { "epoch": 0.209056860742254, "grad_norm": 1.4162458181381226, "learning_rate": 1e-06, "loss": 0.0421, "step": 1228 }, { "epoch": 0.20922710248552945, "grad_norm": 2.1916866302490234, "learning_rate": 1e-06, "loss": 0.038, "step": 1229 }, { "epoch": 0.2093973442288049, "grad_norm": 1.5257288217544556, "learning_rate": 1e-06, "loss": 0.0417, "step": 1230 }, { "epoch": 0.20956758597208036, "grad_norm": 1.9720427989959717, "learning_rate": 1e-06, "loss": 0.0439, "step": 1231 }, { "epoch": 0.20973782771535582, "grad_norm": 1.6400502920150757, "learning_rate": 1e-06, "loss": 0.0373, "step": 1232 }, { "epoch": 0.20990806945863125, "grad_norm": 1.5405869483947754, "learning_rate": 1e-06, "loss": 0.0459, "step": 1233 }, { "epoch": 0.2100783112019067, "grad_norm": 1.7419536113739014, "learning_rate": 1e-06, "loss": 0.0407, "step": 1234 }, { "epoch": 0.21024855294518216, "grad_norm": 1.5766996145248413, "learning_rate": 1e-06, "loss": 0.0347, "step": 1235 }, { "epoch": 0.21041879468845762, "grad_norm": 2.0112199783325195, "learning_rate": 1e-06, "loss": 0.058, "step": 1236 }, { "epoch": 0.21058903643173307, "grad_norm": 2.1589255332946777, "learning_rate": 1e-06, "loss": 0.0524, "step": 1237 }, { "epoch": 0.2107592781750085, "grad_norm": 1.5398106575012207, "learning_rate": 1e-06, "loss": 0.0405, "step": 1238 }, { "epoch": 0.21092951991828396, "grad_norm": 5.148733139038086, "learning_rate": 1e-06, "loss": 0.0627, "step": 1239 }, { "epoch": 0.21109976166155942, "grad_norm": 1.5265625715255737, "learning_rate": 1e-06, "loss": 0.0405, "step": 1240 }, { "epoch": 0.21127000340483487, "grad_norm": 2.0388882160186768, "learning_rate": 1e-06, "loss": 0.0422, "step": 1241 }, { "epoch": 0.21144024514811033, "grad_norm": 1.5845919847488403, "learning_rate": 1e-06, "loss": 0.0388, "step": 1242 }, { "epoch": 0.21161048689138576, "grad_norm": 1.984523057937622, "learning_rate": 1e-06, "loss": 0.0507, "step": 1243 }, { "epoch": 0.21178072863466121, "grad_norm": 1.4896509647369385, "learning_rate": 1e-06, "loss": 0.0357, "step": 1244 }, { "epoch": 0.21195097037793667, "grad_norm": 1.240064024925232, "learning_rate": 1e-06, "loss": 0.0345, "step": 1245 }, { "epoch": 0.21212121212121213, "grad_norm": 1.753086805343628, "learning_rate": 1e-06, "loss": 0.0492, "step": 1246 }, { "epoch": 0.21229145386448758, "grad_norm": 1.9514403343200684, "learning_rate": 1e-06, "loss": 0.038, "step": 1247 }, { "epoch": 0.212461695607763, "grad_norm": 1.8083099126815796, "learning_rate": 1e-06, "loss": 0.0425, "step": 1248 }, { "epoch": 0.21263193735103847, "grad_norm": 1.6861008405685425, "learning_rate": 1e-06, "loss": 0.043, "step": 1249 }, { "epoch": 0.21280217909431393, "grad_norm": 1.4214813709259033, "learning_rate": 1e-06, "loss": 0.037, "step": 1250 }, { "epoch": 0.21297242083758938, "grad_norm": 1.8322423696517944, "learning_rate": 1e-06, "loss": 0.0427, "step": 1251 }, { "epoch": 0.21314266258086484, "grad_norm": 4.470928192138672, "learning_rate": 1e-06, "loss": 0.0992, "step": 1252 }, { "epoch": 0.21331290432414027, "grad_norm": 1.9335957765579224, "learning_rate": 1e-06, "loss": 0.0402, "step": 1253 }, { "epoch": 0.21348314606741572, "grad_norm": 1.5598244667053223, "learning_rate": 1e-06, "loss": 0.0402, "step": 1254 }, { "epoch": 0.21365338781069118, "grad_norm": 1.5040446519851685, "learning_rate": 1e-06, "loss": 0.0361, "step": 1255 }, { "epoch": 0.21382362955396664, "grad_norm": 2.0225491523742676, "learning_rate": 1e-06, "loss": 0.0518, "step": 1256 }, { "epoch": 0.2139938712972421, "grad_norm": 1.7718695402145386, "learning_rate": 1e-06, "loss": 0.0487, "step": 1257 }, { "epoch": 0.21416411304051752, "grad_norm": 2.1986560821533203, "learning_rate": 1e-06, "loss": 0.0538, "step": 1258 }, { "epoch": 0.21433435478379298, "grad_norm": 1.7088544368743896, "learning_rate": 1e-06, "loss": 0.0452, "step": 1259 }, { "epoch": 0.21450459652706844, "grad_norm": 1.5693634748458862, "learning_rate": 1e-06, "loss": 0.036, "step": 1260 }, { "epoch": 0.2146748382703439, "grad_norm": 1.8983958959579468, "learning_rate": 1e-06, "loss": 0.0543, "step": 1261 }, { "epoch": 0.21484508001361935, "grad_norm": 1.3244619369506836, "learning_rate": 1e-06, "loss": 0.032, "step": 1262 }, { "epoch": 0.21501532175689478, "grad_norm": 1.7973686456680298, "learning_rate": 1e-06, "loss": 0.0458, "step": 1263 }, { "epoch": 0.21518556350017023, "grad_norm": 1.7030102014541626, "learning_rate": 1e-06, "loss": 0.041, "step": 1264 }, { "epoch": 0.2153558052434457, "grad_norm": 1.5380405187606812, "learning_rate": 1e-06, "loss": 0.0382, "step": 1265 }, { "epoch": 0.21552604698672115, "grad_norm": 2.168999433517456, "learning_rate": 1e-06, "loss": 0.0585, "step": 1266 }, { "epoch": 0.2156962887299966, "grad_norm": 2.1720099449157715, "learning_rate": 1e-06, "loss": 0.0363, "step": 1267 }, { "epoch": 0.21586653047327203, "grad_norm": 2.3076796531677246, "learning_rate": 1e-06, "loss": 0.0679, "step": 1268 }, { "epoch": 0.2160367722165475, "grad_norm": 1.696959376335144, "learning_rate": 1e-06, "loss": 0.0331, "step": 1269 }, { "epoch": 0.21620701395982295, "grad_norm": 1.8012551069259644, "learning_rate": 1e-06, "loss": 0.0399, "step": 1270 }, { "epoch": 0.2163772557030984, "grad_norm": 1.706807017326355, "learning_rate": 1e-06, "loss": 0.044, "step": 1271 }, { "epoch": 0.21654749744637386, "grad_norm": 1.3523311614990234, "learning_rate": 1e-06, "loss": 0.04, "step": 1272 }, { "epoch": 0.2167177391896493, "grad_norm": 1.5000497102737427, "learning_rate": 1e-06, "loss": 0.0383, "step": 1273 }, { "epoch": 0.21688798093292475, "grad_norm": 1.4352927207946777, "learning_rate": 1e-06, "loss": 0.0327, "step": 1274 }, { "epoch": 0.2170582226762002, "grad_norm": 1.4274100065231323, "learning_rate": 1e-06, "loss": 0.0302, "step": 1275 }, { "epoch": 0.21722846441947566, "grad_norm": 1.4734169244766235, "learning_rate": 1e-06, "loss": 0.0291, "step": 1276 }, { "epoch": 0.21739870616275112, "grad_norm": 1.532433271408081, "learning_rate": 1e-06, "loss": 0.0424, "step": 1277 }, { "epoch": 0.21756894790602654, "grad_norm": 1.4756892919540405, "learning_rate": 1e-06, "loss": 0.0291, "step": 1278 }, { "epoch": 0.217739189649302, "grad_norm": 1.4791312217712402, "learning_rate": 1e-06, "loss": 0.0428, "step": 1279 }, { "epoch": 0.21790943139257746, "grad_norm": 1.201405644416809, "learning_rate": 1e-06, "loss": 0.0272, "step": 1280 }, { "epoch": 0.21807967313585291, "grad_norm": 3.065424680709839, "learning_rate": 1e-06, "loss": 0.0406, "step": 1281 }, { "epoch": 0.21824991487912837, "grad_norm": 1.7154473066329956, "learning_rate": 1e-06, "loss": 0.0307, "step": 1282 }, { "epoch": 0.21842015662240383, "grad_norm": 1.5745826959609985, "learning_rate": 1e-06, "loss": 0.0295, "step": 1283 }, { "epoch": 0.21859039836567926, "grad_norm": 1.9603149890899658, "learning_rate": 1e-06, "loss": 0.0469, "step": 1284 }, { "epoch": 0.2187606401089547, "grad_norm": 1.6602104902267456, "learning_rate": 1e-06, "loss": 0.0433, "step": 1285 }, { "epoch": 0.21893088185223017, "grad_norm": 1.7477741241455078, "learning_rate": 1e-06, "loss": 0.0291, "step": 1286 }, { "epoch": 0.21910112359550563, "grad_norm": 1.8926868438720703, "learning_rate": 1e-06, "loss": 0.0392, "step": 1287 }, { "epoch": 0.21927136533878108, "grad_norm": 1.508943796157837, "learning_rate": 1e-06, "loss": 0.0286, "step": 1288 }, { "epoch": 0.2194416070820565, "grad_norm": 1.6563001871109009, "learning_rate": 1e-06, "loss": 0.0359, "step": 1289 }, { "epoch": 0.21961184882533197, "grad_norm": 1.6094985008239746, "learning_rate": 1e-06, "loss": 0.0357, "step": 1290 }, { "epoch": 0.21978209056860742, "grad_norm": 1.9460690021514893, "learning_rate": 1e-06, "loss": 0.0515, "step": 1291 }, { "epoch": 0.21995233231188288, "grad_norm": 1.3542574644088745, "learning_rate": 1e-06, "loss": 0.0389, "step": 1292 }, { "epoch": 0.22012257405515834, "grad_norm": 1.3899677991867065, "learning_rate": 1e-06, "loss": 0.0318, "step": 1293 }, { "epoch": 0.22029281579843377, "grad_norm": 1.9105228185653687, "learning_rate": 1e-06, "loss": 0.049, "step": 1294 }, { "epoch": 0.22046305754170922, "grad_norm": 1.5152599811553955, "learning_rate": 1e-06, "loss": 0.0416, "step": 1295 }, { "epoch": 0.22063329928498468, "grad_norm": 1.2295457124710083, "learning_rate": 1e-06, "loss": 0.031, "step": 1296 }, { "epoch": 0.22080354102826014, "grad_norm": 1.8313056230545044, "learning_rate": 1e-06, "loss": 0.0451, "step": 1297 }, { "epoch": 0.2209737827715356, "grad_norm": 1.5762004852294922, "learning_rate": 1e-06, "loss": 0.0437, "step": 1298 }, { "epoch": 0.22114402451481102, "grad_norm": 1.603684663772583, "learning_rate": 1e-06, "loss": 0.0372, "step": 1299 }, { "epoch": 0.22131426625808648, "grad_norm": 1.809328317642212, "learning_rate": 1e-06, "loss": 0.0328, "step": 1300 }, { "epoch": 0.22148450800136193, "grad_norm": 1.6629222631454468, "learning_rate": 1e-06, "loss": 0.0421, "step": 1301 }, { "epoch": 0.2216547497446374, "grad_norm": 1.5856209993362427, "learning_rate": 1e-06, "loss": 0.0377, "step": 1302 }, { "epoch": 0.22182499148791285, "grad_norm": 1.8281983137130737, "learning_rate": 1e-06, "loss": 0.0339, "step": 1303 }, { "epoch": 0.22199523323118828, "grad_norm": 1.8064937591552734, "learning_rate": 1e-06, "loss": 0.0359, "step": 1304 }, { "epoch": 0.22216547497446373, "grad_norm": 1.6382505893707275, "learning_rate": 1e-06, "loss": 0.0376, "step": 1305 }, { "epoch": 0.2223357167177392, "grad_norm": 1.8759084939956665, "learning_rate": 1e-06, "loss": 0.0397, "step": 1306 }, { "epoch": 0.22250595846101465, "grad_norm": 1.587537407875061, "learning_rate": 1e-06, "loss": 0.0361, "step": 1307 }, { "epoch": 0.2226762002042901, "grad_norm": 1.6119834184646606, "learning_rate": 1e-06, "loss": 0.0368, "step": 1308 }, { "epoch": 0.22284644194756553, "grad_norm": 1.4815311431884766, "learning_rate": 1e-06, "loss": 0.0318, "step": 1309 }, { "epoch": 0.223016683690841, "grad_norm": 1.4200611114501953, "learning_rate": 1e-06, "loss": 0.0282, "step": 1310 }, { "epoch": 0.22318692543411645, "grad_norm": 1.3545717000961304, "learning_rate": 1e-06, "loss": 0.0374, "step": 1311 }, { "epoch": 0.2233571671773919, "grad_norm": 1.5961992740631104, "learning_rate": 1e-06, "loss": 0.0402, "step": 1312 }, { "epoch": 0.22352740892066736, "grad_norm": 1.420921802520752, "learning_rate": 1e-06, "loss": 0.0443, "step": 1313 }, { "epoch": 0.2236976506639428, "grad_norm": 1.7957236766815186, "learning_rate": 1e-06, "loss": 0.0354, "step": 1314 }, { "epoch": 0.22386789240721824, "grad_norm": 1.4843519926071167, "learning_rate": 1e-06, "loss": 0.0337, "step": 1315 }, { "epoch": 0.2240381341504937, "grad_norm": 1.5852254629135132, "learning_rate": 1e-06, "loss": 0.0366, "step": 1316 }, { "epoch": 0.22420837589376916, "grad_norm": 1.654274344444275, "learning_rate": 1e-06, "loss": 0.0407, "step": 1317 }, { "epoch": 0.22437861763704461, "grad_norm": 1.884421467781067, "learning_rate": 1e-06, "loss": 0.0379, "step": 1318 }, { "epoch": 0.22454885938032004, "grad_norm": 1.8321963548660278, "learning_rate": 1e-06, "loss": 0.0404, "step": 1319 }, { "epoch": 0.2247191011235955, "grad_norm": 1.735929012298584, "learning_rate": 1e-06, "loss": 0.0477, "step": 1320 }, { "epoch": 0.22488934286687096, "grad_norm": 1.5793464183807373, "learning_rate": 1e-06, "loss": 0.0268, "step": 1321 }, { "epoch": 0.2250595846101464, "grad_norm": 1.6962897777557373, "learning_rate": 1e-06, "loss": 0.0387, "step": 1322 }, { "epoch": 0.22522982635342187, "grad_norm": 1.4622056484222412, "learning_rate": 1e-06, "loss": 0.0315, "step": 1323 }, { "epoch": 0.2254000680966973, "grad_norm": 1.4477473497390747, "learning_rate": 1e-06, "loss": 0.0419, "step": 1324 }, { "epoch": 0.22557030983997275, "grad_norm": 1.508447289466858, "learning_rate": 1e-06, "loss": 0.0317, "step": 1325 }, { "epoch": 0.2257405515832482, "grad_norm": 1.3146283626556396, "learning_rate": 1e-06, "loss": 0.0271, "step": 1326 }, { "epoch": 0.22591079332652367, "grad_norm": 1.3582558631896973, "learning_rate": 1e-06, "loss": 0.0304, "step": 1327 }, { "epoch": 0.22608103506979912, "grad_norm": 2.6392621994018555, "learning_rate": 1e-06, "loss": 0.0521, "step": 1328 }, { "epoch": 0.22625127681307455, "grad_norm": 1.992223858833313, "learning_rate": 1e-06, "loss": 0.0608, "step": 1329 }, { "epoch": 0.22642151855635, "grad_norm": 1.6414374113082886, "learning_rate": 1e-06, "loss": 0.0394, "step": 1330 }, { "epoch": 0.22659176029962547, "grad_norm": 1.3846744298934937, "learning_rate": 1e-06, "loss": 0.0291, "step": 1331 }, { "epoch": 0.22676200204290092, "grad_norm": 1.5988634824752808, "learning_rate": 1e-06, "loss": 0.0307, "step": 1332 }, { "epoch": 0.22693224378617638, "grad_norm": 1.4296313524246216, "learning_rate": 1e-06, "loss": 0.0322, "step": 1333 }, { "epoch": 0.2271024855294518, "grad_norm": 1.8813896179199219, "learning_rate": 1e-06, "loss": 0.0487, "step": 1334 }, { "epoch": 0.22727272727272727, "grad_norm": 1.5750256776809692, "learning_rate": 1e-06, "loss": 0.0335, "step": 1335 }, { "epoch": 0.22744296901600272, "grad_norm": 1.7783371210098267, "learning_rate": 1e-06, "loss": 0.0428, "step": 1336 }, { "epoch": 0.22761321075927818, "grad_norm": 1.2634700536727905, "learning_rate": 1e-06, "loss": 0.0248, "step": 1337 }, { "epoch": 0.22778345250255363, "grad_norm": 1.2207298278808594, "learning_rate": 1e-06, "loss": 0.0283, "step": 1338 }, { "epoch": 0.22795369424582906, "grad_norm": 1.94607412815094, "learning_rate": 1e-06, "loss": 0.0345, "step": 1339 }, { "epoch": 0.22812393598910452, "grad_norm": 3.4443957805633545, "learning_rate": 1e-06, "loss": 0.0546, "step": 1340 }, { "epoch": 0.22829417773237998, "grad_norm": 2.1095688343048096, "learning_rate": 1e-06, "loss": 0.0488, "step": 1341 }, { "epoch": 0.22846441947565543, "grad_norm": 5.005306720733643, "learning_rate": 1e-06, "loss": 0.079, "step": 1342 }, { "epoch": 0.2286346612189309, "grad_norm": 1.6204818487167358, "learning_rate": 1e-06, "loss": 0.034, "step": 1343 }, { "epoch": 0.22880490296220635, "grad_norm": 1.8814785480499268, "learning_rate": 1e-06, "loss": 0.0365, "step": 1344 }, { "epoch": 0.22897514470548178, "grad_norm": 1.5905134677886963, "learning_rate": 1e-06, "loss": 0.0328, "step": 1345 }, { "epoch": 0.22914538644875723, "grad_norm": 1.4241366386413574, "learning_rate": 1e-06, "loss": 0.0307, "step": 1346 }, { "epoch": 0.2293156281920327, "grad_norm": 1.5406595468521118, "learning_rate": 1e-06, "loss": 0.032, "step": 1347 }, { "epoch": 0.22948586993530815, "grad_norm": 1.3714312314987183, "learning_rate": 1e-06, "loss": 0.0271, "step": 1348 }, { "epoch": 0.2296561116785836, "grad_norm": 1.919778823852539, "learning_rate": 1e-06, "loss": 0.0372, "step": 1349 }, { "epoch": 0.22982635342185903, "grad_norm": 1.4447976350784302, "learning_rate": 1e-06, "loss": 0.0289, "step": 1350 }, { "epoch": 0.2299965951651345, "grad_norm": 2.128904104232788, "learning_rate": 1e-06, "loss": 0.0554, "step": 1351 }, { "epoch": 0.23016683690840994, "grad_norm": 1.2440881729125977, "learning_rate": 1e-06, "loss": 0.0365, "step": 1352 }, { "epoch": 0.2303370786516854, "grad_norm": 2.688145637512207, "learning_rate": 1e-06, "loss": 0.0558, "step": 1353 }, { "epoch": 0.23050732039496086, "grad_norm": 1.7107890844345093, "learning_rate": 1e-06, "loss": 0.0364, "step": 1354 }, { "epoch": 0.2306775621382363, "grad_norm": 1.3972139358520508, "learning_rate": 1e-06, "loss": 0.0306, "step": 1355 }, { "epoch": 0.23084780388151174, "grad_norm": 1.6243966817855835, "learning_rate": 1e-06, "loss": 0.0385, "step": 1356 }, { "epoch": 0.2310180456247872, "grad_norm": 1.5047791004180908, "learning_rate": 1e-06, "loss": 0.0344, "step": 1357 }, { "epoch": 0.23118828736806266, "grad_norm": 1.5549938678741455, "learning_rate": 1e-06, "loss": 0.0321, "step": 1358 }, { "epoch": 0.2313585291113381, "grad_norm": 1.5315052270889282, "learning_rate": 1e-06, "loss": 0.0272, "step": 1359 }, { "epoch": 0.23152877085461354, "grad_norm": 1.6653809547424316, "learning_rate": 1e-06, "loss": 0.0239, "step": 1360 }, { "epoch": 0.231699012597889, "grad_norm": 1.8215115070343018, "learning_rate": 1e-06, "loss": 0.034, "step": 1361 }, { "epoch": 0.23186925434116445, "grad_norm": 1.3523900508880615, "learning_rate": 1e-06, "loss": 0.0282, "step": 1362 }, { "epoch": 0.2320394960844399, "grad_norm": 1.6606805324554443, "learning_rate": 1e-06, "loss": 0.0462, "step": 1363 }, { "epoch": 0.23220973782771537, "grad_norm": 1.6801470518112183, "learning_rate": 1e-06, "loss": 0.0428, "step": 1364 }, { "epoch": 0.2323799795709908, "grad_norm": 1.6056513786315918, "learning_rate": 1e-06, "loss": 0.037, "step": 1365 }, { "epoch": 0.23255022131426625, "grad_norm": 1.549023151397705, "learning_rate": 1e-06, "loss": 0.0362, "step": 1366 }, { "epoch": 0.2327204630575417, "grad_norm": 1.8244285583496094, "learning_rate": 1e-06, "loss": 0.0469, "step": 1367 }, { "epoch": 0.23289070480081717, "grad_norm": 1.7030590772628784, "learning_rate": 1e-06, "loss": 0.0396, "step": 1368 }, { "epoch": 0.23306094654409262, "grad_norm": 1.9018099308013916, "learning_rate": 1e-06, "loss": 0.0462, "step": 1369 }, { "epoch": 0.23323118828736805, "grad_norm": 1.8366174697875977, "learning_rate": 1e-06, "loss": 0.0431, "step": 1370 }, { "epoch": 0.2334014300306435, "grad_norm": 1.7807953357696533, "learning_rate": 1e-06, "loss": 0.0397, "step": 1371 }, { "epoch": 0.23357167177391897, "grad_norm": 3.430758476257324, "learning_rate": 1e-06, "loss": 0.0504, "step": 1372 }, { "epoch": 0.23374191351719442, "grad_norm": 1.6039738655090332, "learning_rate": 1e-06, "loss": 0.0326, "step": 1373 }, { "epoch": 0.23391215526046988, "grad_norm": 2.001668691635132, "learning_rate": 1e-06, "loss": 0.0322, "step": 1374 }, { "epoch": 0.2340823970037453, "grad_norm": 1.9030237197875977, "learning_rate": 1e-06, "loss": 0.0322, "step": 1375 }, { "epoch": 0.23425263874702076, "grad_norm": 1.4003009796142578, "learning_rate": 1e-06, "loss": 0.0317, "step": 1376 }, { "epoch": 0.23442288049029622, "grad_norm": 1.6500396728515625, "learning_rate": 1e-06, "loss": 0.0332, "step": 1377 }, { "epoch": 0.23459312223357168, "grad_norm": 4.6141438484191895, "learning_rate": 1e-06, "loss": 0.0547, "step": 1378 }, { "epoch": 0.23476336397684713, "grad_norm": 1.4108147621154785, "learning_rate": 1e-06, "loss": 0.0233, "step": 1379 }, { "epoch": 0.23493360572012256, "grad_norm": 1.68300461769104, "learning_rate": 1e-06, "loss": 0.0265, "step": 1380 }, { "epoch": 0.23510384746339802, "grad_norm": 1.4702105522155762, "learning_rate": 1e-06, "loss": 0.0311, "step": 1381 }, { "epoch": 0.23527408920667348, "grad_norm": 1.669421911239624, "learning_rate": 1e-06, "loss": 0.0459, "step": 1382 }, { "epoch": 0.23544433094994893, "grad_norm": 1.987108588218689, "learning_rate": 1e-06, "loss": 0.0317, "step": 1383 }, { "epoch": 0.2356145726932244, "grad_norm": 1.5817410945892334, "learning_rate": 1e-06, "loss": 0.0297, "step": 1384 }, { "epoch": 0.23578481443649982, "grad_norm": 1.524600863456726, "learning_rate": 1e-06, "loss": 0.027, "step": 1385 }, { "epoch": 0.23595505617977527, "grad_norm": 1.9512219429016113, "learning_rate": 1e-06, "loss": 0.0426, "step": 1386 }, { "epoch": 0.23612529792305073, "grad_norm": 1.6807881593704224, "learning_rate": 1e-06, "loss": 0.0332, "step": 1387 }, { "epoch": 0.2362955396663262, "grad_norm": 1.5827664136886597, "learning_rate": 1e-06, "loss": 0.0393, "step": 1388 }, { "epoch": 0.23646578140960164, "grad_norm": 1.4349877834320068, "learning_rate": 1e-06, "loss": 0.0316, "step": 1389 }, { "epoch": 0.23663602315287707, "grad_norm": 1.8205857276916504, "learning_rate": 1e-06, "loss": 0.0422, "step": 1390 }, { "epoch": 0.23680626489615253, "grad_norm": 2.0506768226623535, "learning_rate": 1e-06, "loss": 0.0371, "step": 1391 }, { "epoch": 0.236976506639428, "grad_norm": 3.028653860092163, "learning_rate": 1e-06, "loss": 0.043, "step": 1392 }, { "epoch": 0.23714674838270344, "grad_norm": 1.7093989849090576, "learning_rate": 1e-06, "loss": 0.0338, "step": 1393 }, { "epoch": 0.2373169901259789, "grad_norm": 1.727501630783081, "learning_rate": 1e-06, "loss": 0.0279, "step": 1394 }, { "epoch": 0.23748723186925433, "grad_norm": 1.9987183809280396, "learning_rate": 1e-06, "loss": 0.0358, "step": 1395 }, { "epoch": 0.23765747361252978, "grad_norm": 1.4499446153640747, "learning_rate": 1e-06, "loss": 0.0228, "step": 1396 }, { "epoch": 0.23782771535580524, "grad_norm": 2.0619688034057617, "learning_rate": 1e-06, "loss": 0.0451, "step": 1397 }, { "epoch": 0.2379979570990807, "grad_norm": 1.4072680473327637, "learning_rate": 1e-06, "loss": 0.0331, "step": 1398 }, { "epoch": 0.23816819884235615, "grad_norm": 1.9472742080688477, "learning_rate": 1e-06, "loss": 0.0357, "step": 1399 }, { "epoch": 0.23833844058563158, "grad_norm": 1.682454228401184, "learning_rate": 1e-06, "loss": 0.0349, "step": 1400 }, { "epoch": 0.23850868232890704, "grad_norm": 1.4352442026138306, "learning_rate": 1e-06, "loss": 0.0222, "step": 1401 }, { "epoch": 0.2386789240721825, "grad_norm": 1.4091311693191528, "learning_rate": 1e-06, "loss": 0.0328, "step": 1402 }, { "epoch": 0.23884916581545795, "grad_norm": 1.3856940269470215, "learning_rate": 1e-06, "loss": 0.0245, "step": 1403 }, { "epoch": 0.2390194075587334, "grad_norm": 3.6680867671966553, "learning_rate": 1e-06, "loss": 0.049, "step": 1404 }, { "epoch": 0.23918964930200884, "grad_norm": 1.7768093347549438, "learning_rate": 1e-06, "loss": 0.0396, "step": 1405 }, { "epoch": 0.2393598910452843, "grad_norm": 1.83253014087677, "learning_rate": 1e-06, "loss": 0.0303, "step": 1406 }, { "epoch": 0.23953013278855975, "grad_norm": 3.3626549243927, "learning_rate": 1e-06, "loss": 0.0739, "step": 1407 }, { "epoch": 0.2397003745318352, "grad_norm": 1.9892669916152954, "learning_rate": 1e-06, "loss": 0.0423, "step": 1408 }, { "epoch": 0.23987061627511067, "grad_norm": 1.340533971786499, "learning_rate": 1e-06, "loss": 0.0245, "step": 1409 }, { "epoch": 0.24004085801838612, "grad_norm": 1.6560157537460327, "learning_rate": 1e-06, "loss": 0.0401, "step": 1410 }, { "epoch": 0.24021109976166155, "grad_norm": 1.6459312438964844, "learning_rate": 1e-06, "loss": 0.0447, "step": 1411 }, { "epoch": 0.240381341504937, "grad_norm": 2.4004392623901367, "learning_rate": 1e-06, "loss": 0.0525, "step": 1412 }, { "epoch": 0.24055158324821246, "grad_norm": 1.7633525133132935, "learning_rate": 1e-06, "loss": 0.0334, "step": 1413 }, { "epoch": 0.24072182499148792, "grad_norm": 1.6249415874481201, "learning_rate": 1e-06, "loss": 0.039, "step": 1414 }, { "epoch": 0.24089206673476338, "grad_norm": 1.817467212677002, "learning_rate": 1e-06, "loss": 0.0329, "step": 1415 }, { "epoch": 0.2410623084780388, "grad_norm": 1.592246174812317, "learning_rate": 1e-06, "loss": 0.0493, "step": 1416 }, { "epoch": 0.24123255022131426, "grad_norm": 2.0346999168395996, "learning_rate": 1e-06, "loss": 0.0353, "step": 1417 }, { "epoch": 0.24140279196458972, "grad_norm": 1.3118454217910767, "learning_rate": 1e-06, "loss": 0.0321, "step": 1418 }, { "epoch": 0.24157303370786518, "grad_norm": 1.5990475416183472, "learning_rate": 1e-06, "loss": 0.0401, "step": 1419 }, { "epoch": 0.24174327545114063, "grad_norm": 1.3188730478286743, "learning_rate": 1e-06, "loss": 0.0188, "step": 1420 }, { "epoch": 0.24191351719441606, "grad_norm": 1.5186814069747925, "learning_rate": 1e-06, "loss": 0.0286, "step": 1421 }, { "epoch": 0.24208375893769152, "grad_norm": 3.969571352005005, "learning_rate": 1e-06, "loss": 0.0573, "step": 1422 }, { "epoch": 0.24225400068096697, "grad_norm": 1.6175631284713745, "learning_rate": 1e-06, "loss": 0.0308, "step": 1423 }, { "epoch": 0.24242424242424243, "grad_norm": 1.3874751329421997, "learning_rate": 1e-06, "loss": 0.0298, "step": 1424 }, { "epoch": 0.2425944841675179, "grad_norm": 2.3302395343780518, "learning_rate": 1e-06, "loss": 0.0483, "step": 1425 }, { "epoch": 0.24276472591079332, "grad_norm": 2.0167653560638428, "learning_rate": 1e-06, "loss": 0.0356, "step": 1426 }, { "epoch": 0.24293496765406877, "grad_norm": 1.5584750175476074, "learning_rate": 1e-06, "loss": 0.0331, "step": 1427 }, { "epoch": 0.24310520939734423, "grad_norm": 1.7811205387115479, "learning_rate": 1e-06, "loss": 0.0296, "step": 1428 }, { "epoch": 0.2432754511406197, "grad_norm": 1.6993045806884766, "learning_rate": 1e-06, "loss": 0.0341, "step": 1429 }, { "epoch": 0.24344569288389514, "grad_norm": 1.6028584241867065, "learning_rate": 1e-06, "loss": 0.03, "step": 1430 }, { "epoch": 0.24361593462717057, "grad_norm": 2.2071762084960938, "learning_rate": 1e-06, "loss": 0.0418, "step": 1431 }, { "epoch": 0.24378617637044603, "grad_norm": 1.5499199628829956, "learning_rate": 1e-06, "loss": 0.037, "step": 1432 }, { "epoch": 0.24395641811372148, "grad_norm": 1.59584641456604, "learning_rate": 1e-06, "loss": 0.0312, "step": 1433 }, { "epoch": 0.24412665985699694, "grad_norm": 1.5070613622665405, "learning_rate": 1e-06, "loss": 0.0274, "step": 1434 }, { "epoch": 0.2442969016002724, "grad_norm": 1.6811244487762451, "learning_rate": 1e-06, "loss": 0.0277, "step": 1435 }, { "epoch": 0.24446714334354783, "grad_norm": 1.631415843963623, "learning_rate": 1e-06, "loss": 0.0363, "step": 1436 }, { "epoch": 0.24463738508682328, "grad_norm": 1.6105327606201172, "learning_rate": 1e-06, "loss": 0.0279, "step": 1437 }, { "epoch": 0.24480762683009874, "grad_norm": 1.791298508644104, "learning_rate": 1e-06, "loss": 0.0494, "step": 1438 }, { "epoch": 0.2449778685733742, "grad_norm": 1.1787437200546265, "learning_rate": 1e-06, "loss": 0.0278, "step": 1439 }, { "epoch": 0.24514811031664965, "grad_norm": 1.3481965065002441, "learning_rate": 1e-06, "loss": 0.031, "step": 1440 }, { "epoch": 0.24531835205992508, "grad_norm": 1.495886206626892, "learning_rate": 1e-06, "loss": 0.0419, "step": 1441 }, { "epoch": 0.24548859380320054, "grad_norm": 1.6901342868804932, "learning_rate": 1e-06, "loss": 0.0329, "step": 1442 }, { "epoch": 0.245658835546476, "grad_norm": 1.443678379058838, "learning_rate": 1e-06, "loss": 0.0412, "step": 1443 }, { "epoch": 0.24582907728975145, "grad_norm": 1.8046194314956665, "learning_rate": 1e-06, "loss": 0.0384, "step": 1444 }, { "epoch": 0.2459993190330269, "grad_norm": 1.5077475309371948, "learning_rate": 1e-06, "loss": 0.0321, "step": 1445 }, { "epoch": 0.24616956077630234, "grad_norm": 1.7492755651474, "learning_rate": 1e-06, "loss": 0.0361, "step": 1446 }, { "epoch": 0.2463398025195778, "grad_norm": 1.5502599477767944, "learning_rate": 1e-06, "loss": 0.0273, "step": 1447 }, { "epoch": 0.24651004426285325, "grad_norm": 1.9191864728927612, "learning_rate": 1e-06, "loss": 0.0369, "step": 1448 }, { "epoch": 0.2466802860061287, "grad_norm": 1.9377306699752808, "learning_rate": 1e-06, "loss": 0.0401, "step": 1449 }, { "epoch": 0.24685052774940416, "grad_norm": 2.082916259765625, "learning_rate": 1e-06, "loss": 0.0402, "step": 1450 }, { "epoch": 0.2470207694926796, "grad_norm": 1.3470356464385986, "learning_rate": 1e-06, "loss": 0.0214, "step": 1451 }, { "epoch": 0.24719101123595505, "grad_norm": 1.5550161600112915, "learning_rate": 1e-06, "loss": 0.0257, "step": 1452 }, { "epoch": 0.2473612529792305, "grad_norm": 1.9106121063232422, "learning_rate": 1e-06, "loss": 0.0399, "step": 1453 }, { "epoch": 0.24753149472250596, "grad_norm": 1.2977889776229858, "learning_rate": 1e-06, "loss": 0.0238, "step": 1454 }, { "epoch": 0.24770173646578142, "grad_norm": 1.8015820980072021, "learning_rate": 1e-06, "loss": 0.0337, "step": 1455 }, { "epoch": 0.24787197820905685, "grad_norm": 1.515289545059204, "learning_rate": 1e-06, "loss": 0.0266, "step": 1456 }, { "epoch": 0.2480422199523323, "grad_norm": 2.0699691772460938, "learning_rate": 1e-06, "loss": 0.0408, "step": 1457 }, { "epoch": 0.24821246169560776, "grad_norm": 1.9157500267028809, "learning_rate": 1e-06, "loss": 0.0431, "step": 1458 }, { "epoch": 0.24838270343888322, "grad_norm": 1.7912602424621582, "learning_rate": 1e-06, "loss": 0.0362, "step": 1459 }, { "epoch": 0.24855294518215867, "grad_norm": 1.5445823669433594, "learning_rate": 1e-06, "loss": 0.0337, "step": 1460 }, { "epoch": 0.2487231869254341, "grad_norm": 1.5842821598052979, "learning_rate": 1e-06, "loss": 0.0283, "step": 1461 }, { "epoch": 0.24889342866870956, "grad_norm": 2.117060899734497, "learning_rate": 1e-06, "loss": 0.033, "step": 1462 }, { "epoch": 0.24906367041198502, "grad_norm": 1.1993916034698486, "learning_rate": 1e-06, "loss": 0.0265, "step": 1463 }, { "epoch": 0.24923391215526047, "grad_norm": 1.9758274555206299, "learning_rate": 1e-06, "loss": 0.0448, "step": 1464 }, { "epoch": 0.24940415389853593, "grad_norm": 1.6750165224075317, "learning_rate": 1e-06, "loss": 0.0371, "step": 1465 }, { "epoch": 0.24957439564181136, "grad_norm": 1.9693377017974854, "learning_rate": 1e-06, "loss": 0.0462, "step": 1466 }, { "epoch": 0.24974463738508682, "grad_norm": 1.2334027290344238, "learning_rate": 1e-06, "loss": 0.0242, "step": 1467 }, { "epoch": 0.24991487912836227, "grad_norm": 1.2368199825286865, "learning_rate": 1e-06, "loss": 0.0276, "step": 1468 }, { "epoch": 0.2500851208716377, "grad_norm": 1.7857489585876465, "learning_rate": 1e-06, "loss": 0.0408, "step": 1469 }, { "epoch": 0.25025536261491316, "grad_norm": 1.4412367343902588, "learning_rate": 1e-06, "loss": 0.0289, "step": 1470 }, { "epoch": 0.2504256043581886, "grad_norm": 1.598808765411377, "learning_rate": 1e-06, "loss": 0.0255, "step": 1471 }, { "epoch": 0.25059584610146407, "grad_norm": 1.440071940422058, "learning_rate": 1e-06, "loss": 0.0343, "step": 1472 }, { "epoch": 0.2507660878447395, "grad_norm": 1.5133793354034424, "learning_rate": 1e-06, "loss": 0.0295, "step": 1473 }, { "epoch": 0.250936329588015, "grad_norm": 1.5876449346542358, "learning_rate": 1e-06, "loss": 0.0282, "step": 1474 }, { "epoch": 0.25110657133129044, "grad_norm": 1.636086106300354, "learning_rate": 1e-06, "loss": 0.0178, "step": 1475 }, { "epoch": 0.2512768130745659, "grad_norm": 1.7871562242507935, "learning_rate": 1e-06, "loss": 0.0362, "step": 1476 }, { "epoch": 0.25144705481784135, "grad_norm": 2.1147830486297607, "learning_rate": 1e-06, "loss": 0.043, "step": 1477 }, { "epoch": 0.2516172965611168, "grad_norm": 2.0674445629119873, "learning_rate": 1e-06, "loss": 0.0411, "step": 1478 }, { "epoch": 0.2517875383043922, "grad_norm": 1.684304118156433, "learning_rate": 1e-06, "loss": 0.0261, "step": 1479 }, { "epoch": 0.25195778004766767, "grad_norm": 1.8230916261672974, "learning_rate": 1e-06, "loss": 0.0328, "step": 1480 }, { "epoch": 0.2521280217909431, "grad_norm": 1.3470852375030518, "learning_rate": 1e-06, "loss": 0.0276, "step": 1481 }, { "epoch": 0.2522982635342186, "grad_norm": 1.640721321105957, "learning_rate": 1e-06, "loss": 0.037, "step": 1482 }, { "epoch": 0.25246850527749404, "grad_norm": 1.4035189151763916, "learning_rate": 1e-06, "loss": 0.0286, "step": 1483 }, { "epoch": 0.2526387470207695, "grad_norm": 1.6055939197540283, "learning_rate": 1e-06, "loss": 0.0269, "step": 1484 }, { "epoch": 0.25280898876404495, "grad_norm": 1.5646830797195435, "learning_rate": 1e-06, "loss": 0.0357, "step": 1485 }, { "epoch": 0.2529792305073204, "grad_norm": 1.7002915143966675, "learning_rate": 1e-06, "loss": 0.0351, "step": 1486 }, { "epoch": 0.25314947225059586, "grad_norm": 1.1237139701843262, "learning_rate": 1e-06, "loss": 0.0229, "step": 1487 }, { "epoch": 0.2533197139938713, "grad_norm": 1.8360260725021362, "learning_rate": 1e-06, "loss": 0.0251, "step": 1488 }, { "epoch": 0.2534899557371467, "grad_norm": 1.483035683631897, "learning_rate": 1e-06, "loss": 0.0387, "step": 1489 }, { "epoch": 0.2536601974804222, "grad_norm": 1.544597864151001, "learning_rate": 1e-06, "loss": 0.0308, "step": 1490 }, { "epoch": 0.25383043922369763, "grad_norm": 1.5810545682907104, "learning_rate": 1e-06, "loss": 0.0358, "step": 1491 }, { "epoch": 0.2540006809669731, "grad_norm": 1.6289693117141724, "learning_rate": 1e-06, "loss": 0.032, "step": 1492 }, { "epoch": 0.25417092271024855, "grad_norm": 1.4735667705535889, "learning_rate": 1e-06, "loss": 0.035, "step": 1493 }, { "epoch": 0.254341164453524, "grad_norm": 1.8274134397506714, "learning_rate": 1e-06, "loss": 0.026, "step": 1494 }, { "epoch": 0.25451140619679946, "grad_norm": 1.5539690256118774, "learning_rate": 1e-06, "loss": 0.0302, "step": 1495 }, { "epoch": 0.2546816479400749, "grad_norm": 1.251301884651184, "learning_rate": 1e-06, "loss": 0.0325, "step": 1496 }, { "epoch": 0.2548518896833504, "grad_norm": 2.4516842365264893, "learning_rate": 1e-06, "loss": 0.033, "step": 1497 }, { "epoch": 0.25502213142662583, "grad_norm": 1.677709698677063, "learning_rate": 1e-06, "loss": 0.0331, "step": 1498 }, { "epoch": 0.25519237316990123, "grad_norm": 1.771200180053711, "learning_rate": 1e-06, "loss": 0.0336, "step": 1499 }, { "epoch": 0.2553626149131767, "grad_norm": 1.574262022972107, "learning_rate": 1e-06, "loss": 0.0306, "step": 1500 }, { "epoch": 0.25553285665645215, "grad_norm": 1.3920859098434448, "learning_rate": 1e-06, "loss": 0.0261, "step": 1501 }, { "epoch": 0.2557030983997276, "grad_norm": 2.0418004989624023, "learning_rate": 1e-06, "loss": 0.036, "step": 1502 }, { "epoch": 0.25587334014300306, "grad_norm": 1.639630913734436, "learning_rate": 1e-06, "loss": 0.0239, "step": 1503 }, { "epoch": 0.2560435818862785, "grad_norm": 3.079418659210205, "learning_rate": 1e-06, "loss": 0.0548, "step": 1504 }, { "epoch": 0.25621382362955397, "grad_norm": 1.5250223875045776, "learning_rate": 1e-06, "loss": 0.025, "step": 1505 }, { "epoch": 0.25638406537282943, "grad_norm": 1.6824452877044678, "learning_rate": 1e-06, "loss": 0.034, "step": 1506 }, { "epoch": 0.2565543071161049, "grad_norm": 1.9331268072128296, "learning_rate": 1e-06, "loss": 0.0261, "step": 1507 }, { "epoch": 0.25672454885938034, "grad_norm": 1.964712142944336, "learning_rate": 1e-06, "loss": 0.0385, "step": 1508 }, { "epoch": 0.2568947906026558, "grad_norm": 1.5585107803344727, "learning_rate": 1e-06, "loss": 0.0267, "step": 1509 }, { "epoch": 0.2570650323459312, "grad_norm": 1.9522138833999634, "learning_rate": 1e-06, "loss": 0.0481, "step": 1510 }, { "epoch": 0.25723527408920666, "grad_norm": 2.522395372390747, "learning_rate": 1e-06, "loss": 0.0503, "step": 1511 }, { "epoch": 0.2574055158324821, "grad_norm": 1.4549627304077148, "learning_rate": 1e-06, "loss": 0.0349, "step": 1512 }, { "epoch": 0.25757575757575757, "grad_norm": 1.0665477514266968, "learning_rate": 1e-06, "loss": 0.0216, "step": 1513 }, { "epoch": 0.257745999319033, "grad_norm": 1.1988400220870972, "learning_rate": 1e-06, "loss": 0.0261, "step": 1514 }, { "epoch": 0.2579162410623085, "grad_norm": 1.5212563276290894, "learning_rate": 1e-06, "loss": 0.0319, "step": 1515 }, { "epoch": 0.25808648280558394, "grad_norm": 1.6229534149169922, "learning_rate": 1e-06, "loss": 0.0387, "step": 1516 }, { "epoch": 0.2582567245488594, "grad_norm": 1.9084266424179077, "learning_rate": 1e-06, "loss": 0.0354, "step": 1517 }, { "epoch": 0.25842696629213485, "grad_norm": 1.2355334758758545, "learning_rate": 1e-06, "loss": 0.0266, "step": 1518 }, { "epoch": 0.2585972080354103, "grad_norm": 1.6511914730072021, "learning_rate": 1e-06, "loss": 0.0238, "step": 1519 }, { "epoch": 0.2587674497786857, "grad_norm": 1.7273814678192139, "learning_rate": 1e-06, "loss": 0.0369, "step": 1520 }, { "epoch": 0.25893769152196117, "grad_norm": 1.715128779411316, "learning_rate": 1e-06, "loss": 0.0337, "step": 1521 }, { "epoch": 0.2591079332652366, "grad_norm": 1.704223871231079, "learning_rate": 1e-06, "loss": 0.029, "step": 1522 }, { "epoch": 0.2592781750085121, "grad_norm": 1.452068567276001, "learning_rate": 1e-06, "loss": 0.0258, "step": 1523 }, { "epoch": 0.25944841675178754, "grad_norm": 2.0453882217407227, "learning_rate": 1e-06, "loss": 0.0342, "step": 1524 }, { "epoch": 0.259618658495063, "grad_norm": 2.1579129695892334, "learning_rate": 1e-06, "loss": 0.0409, "step": 1525 }, { "epoch": 0.25978890023833845, "grad_norm": 1.6939774751663208, "learning_rate": 1e-06, "loss": 0.0228, "step": 1526 }, { "epoch": 0.2599591419816139, "grad_norm": 1.9167999029159546, "learning_rate": 1e-06, "loss": 0.0269, "step": 1527 }, { "epoch": 0.26012938372488936, "grad_norm": 1.4578943252563477, "learning_rate": 1e-06, "loss": 0.0269, "step": 1528 }, { "epoch": 0.2602996254681648, "grad_norm": 1.426521897315979, "learning_rate": 1e-06, "loss": 0.0268, "step": 1529 }, { "epoch": 0.2604698672114402, "grad_norm": 1.7971490621566772, "learning_rate": 1e-06, "loss": 0.0337, "step": 1530 }, { "epoch": 0.2606401089547157, "grad_norm": 1.6107807159423828, "learning_rate": 1e-06, "loss": 0.0434, "step": 1531 }, { "epoch": 0.26081035069799113, "grad_norm": 1.8623952865600586, "learning_rate": 1e-06, "loss": 0.0319, "step": 1532 }, { "epoch": 0.2609805924412666, "grad_norm": 1.3411558866500854, "learning_rate": 1e-06, "loss": 0.0244, "step": 1533 }, { "epoch": 0.26115083418454205, "grad_norm": 1.5986648797988892, "learning_rate": 1e-06, "loss": 0.023, "step": 1534 }, { "epoch": 0.2613210759278175, "grad_norm": 1.803185224533081, "learning_rate": 1e-06, "loss": 0.034, "step": 1535 }, { "epoch": 0.26149131767109296, "grad_norm": 1.4397742748260498, "learning_rate": 1e-06, "loss": 0.0408, "step": 1536 }, { "epoch": 0.2616615594143684, "grad_norm": 1.5375397205352783, "learning_rate": 1e-06, "loss": 0.0277, "step": 1537 }, { "epoch": 0.2618318011576439, "grad_norm": 1.6096251010894775, "learning_rate": 1e-06, "loss": 0.0284, "step": 1538 }, { "epoch": 0.26200204290091933, "grad_norm": 1.9255061149597168, "learning_rate": 1e-06, "loss": 0.0266, "step": 1539 }, { "epoch": 0.26217228464419473, "grad_norm": 1.6099532842636108, "learning_rate": 1e-06, "loss": 0.0248, "step": 1540 }, { "epoch": 0.2623425263874702, "grad_norm": 2.104907989501953, "learning_rate": 1e-06, "loss": 0.0265, "step": 1541 }, { "epoch": 0.26251276813074564, "grad_norm": 1.6234943866729736, "learning_rate": 1e-06, "loss": 0.0365, "step": 1542 }, { "epoch": 0.2626830098740211, "grad_norm": 1.7157036066055298, "learning_rate": 1e-06, "loss": 0.0321, "step": 1543 }, { "epoch": 0.26285325161729656, "grad_norm": 1.219995141029358, "learning_rate": 1e-06, "loss": 0.0201, "step": 1544 }, { "epoch": 0.263023493360572, "grad_norm": 1.5472625494003296, "learning_rate": 1e-06, "loss": 0.0327, "step": 1545 }, { "epoch": 0.26319373510384747, "grad_norm": 1.6542850732803345, "learning_rate": 1e-06, "loss": 0.0285, "step": 1546 }, { "epoch": 0.2633639768471229, "grad_norm": 1.4218086004257202, "learning_rate": 1e-06, "loss": 0.0267, "step": 1547 }, { "epoch": 0.2635342185903984, "grad_norm": 1.3034826517105103, "learning_rate": 1e-06, "loss": 0.0257, "step": 1548 }, { "epoch": 0.26370446033367384, "grad_norm": 2.216113805770874, "learning_rate": 1e-06, "loss": 0.0568, "step": 1549 }, { "epoch": 0.26387470207694924, "grad_norm": 1.4958362579345703, "learning_rate": 1e-06, "loss": 0.0278, "step": 1550 }, { "epoch": 0.2640449438202247, "grad_norm": 1.4572584629058838, "learning_rate": 1e-06, "loss": 0.0256, "step": 1551 }, { "epoch": 0.26421518556350015, "grad_norm": 1.7182279825210571, "learning_rate": 1e-06, "loss": 0.0406, "step": 1552 }, { "epoch": 0.2643854273067756, "grad_norm": 2.1565287113189697, "learning_rate": 1e-06, "loss": 0.0321, "step": 1553 }, { "epoch": 0.26455566905005107, "grad_norm": 1.421190619468689, "learning_rate": 1e-06, "loss": 0.031, "step": 1554 }, { "epoch": 0.2647259107933265, "grad_norm": 1.4493217468261719, "learning_rate": 1e-06, "loss": 0.0334, "step": 1555 }, { "epoch": 0.264896152536602, "grad_norm": 1.5228818655014038, "learning_rate": 1e-06, "loss": 0.0305, "step": 1556 }, { "epoch": 0.26506639427987744, "grad_norm": 1.5019917488098145, "learning_rate": 1e-06, "loss": 0.0254, "step": 1557 }, { "epoch": 0.2652366360231529, "grad_norm": 1.8647435903549194, "learning_rate": 1e-06, "loss": 0.0295, "step": 1558 }, { "epoch": 0.26540687776642835, "grad_norm": 1.5852400064468384, "learning_rate": 1e-06, "loss": 0.0235, "step": 1559 }, { "epoch": 0.26557711950970375, "grad_norm": 1.5699732303619385, "learning_rate": 1e-06, "loss": 0.0369, "step": 1560 }, { "epoch": 0.2657473612529792, "grad_norm": 1.5314867496490479, "learning_rate": 1e-06, "loss": 0.0231, "step": 1561 }, { "epoch": 0.26591760299625467, "grad_norm": 1.3057242631912231, "learning_rate": 1e-06, "loss": 0.0248, "step": 1562 }, { "epoch": 0.2660878447395301, "grad_norm": 1.5217634439468384, "learning_rate": 1e-06, "loss": 0.0383, "step": 1563 }, { "epoch": 0.2662580864828056, "grad_norm": 1.9693313837051392, "learning_rate": 1e-06, "loss": 0.0274, "step": 1564 }, { "epoch": 0.26642832822608103, "grad_norm": 2.246293544769287, "learning_rate": 1e-06, "loss": 0.0421, "step": 1565 }, { "epoch": 0.2665985699693565, "grad_norm": 1.439656376838684, "learning_rate": 1e-06, "loss": 0.0319, "step": 1566 }, { "epoch": 0.26676881171263195, "grad_norm": 1.522289752960205, "learning_rate": 1e-06, "loss": 0.0309, "step": 1567 }, { "epoch": 0.2669390534559074, "grad_norm": 1.6061466932296753, "learning_rate": 1e-06, "loss": 0.0264, "step": 1568 }, { "epoch": 0.26710929519918286, "grad_norm": 1.4138331413269043, "learning_rate": 1e-06, "loss": 0.026, "step": 1569 }, { "epoch": 0.2672795369424583, "grad_norm": 1.924535870552063, "learning_rate": 1e-06, "loss": 0.0253, "step": 1570 }, { "epoch": 0.2674497786857337, "grad_norm": 1.5255926847457886, "learning_rate": 1e-06, "loss": 0.0285, "step": 1571 }, { "epoch": 0.2676200204290092, "grad_norm": 1.752744197845459, "learning_rate": 1e-06, "loss": 0.0223, "step": 1572 }, { "epoch": 0.26779026217228463, "grad_norm": 1.2975589036941528, "learning_rate": 1e-06, "loss": 0.029, "step": 1573 }, { "epoch": 0.2679605039155601, "grad_norm": 1.4539504051208496, "learning_rate": 1e-06, "loss": 0.0303, "step": 1574 }, { "epoch": 0.26813074565883555, "grad_norm": 1.4913675785064697, "learning_rate": 1e-06, "loss": 0.0337, "step": 1575 }, { "epoch": 0.268300987402111, "grad_norm": 1.5786329507827759, "learning_rate": 1e-06, "loss": 0.0317, "step": 1576 }, { "epoch": 0.26847122914538646, "grad_norm": 1.719687581062317, "learning_rate": 1e-06, "loss": 0.0419, "step": 1577 }, { "epoch": 0.2686414708886619, "grad_norm": 1.5540446043014526, "learning_rate": 1e-06, "loss": 0.0295, "step": 1578 }, { "epoch": 0.26881171263193737, "grad_norm": 1.3455661535263062, "learning_rate": 1e-06, "loss": 0.0216, "step": 1579 }, { "epoch": 0.26898195437521283, "grad_norm": 1.514863133430481, "learning_rate": 1e-06, "loss": 0.0309, "step": 1580 }, { "epoch": 0.26915219611848823, "grad_norm": 1.63846755027771, "learning_rate": 1e-06, "loss": 0.0317, "step": 1581 }, { "epoch": 0.2693224378617637, "grad_norm": 1.4479107856750488, "learning_rate": 1e-06, "loss": 0.03, "step": 1582 }, { "epoch": 0.26949267960503914, "grad_norm": 1.504742980003357, "learning_rate": 1e-06, "loss": 0.0204, "step": 1583 }, { "epoch": 0.2696629213483146, "grad_norm": 1.895027995109558, "learning_rate": 1e-06, "loss": 0.0383, "step": 1584 }, { "epoch": 0.26983316309159006, "grad_norm": 1.5659292936325073, "learning_rate": 1e-06, "loss": 0.0269, "step": 1585 }, { "epoch": 0.2700034048348655, "grad_norm": 1.855080246925354, "learning_rate": 1e-06, "loss": 0.0341, "step": 1586 }, { "epoch": 0.27017364657814097, "grad_norm": 2.0592076778411865, "learning_rate": 1e-06, "loss": 0.0367, "step": 1587 }, { "epoch": 0.2703438883214164, "grad_norm": 1.626511812210083, "learning_rate": 1e-06, "loss": 0.0286, "step": 1588 }, { "epoch": 0.2705141300646919, "grad_norm": 1.715602159500122, "learning_rate": 1e-06, "loss": 0.0267, "step": 1589 }, { "epoch": 0.27068437180796734, "grad_norm": 1.7395516633987427, "learning_rate": 1e-06, "loss": 0.0306, "step": 1590 }, { "epoch": 0.27085461355124274, "grad_norm": 2.016375780105591, "learning_rate": 1e-06, "loss": 0.0316, "step": 1591 }, { "epoch": 0.2710248552945182, "grad_norm": 1.7519330978393555, "learning_rate": 1e-06, "loss": 0.0323, "step": 1592 }, { "epoch": 0.27119509703779365, "grad_norm": 1.3459405899047852, "learning_rate": 1e-06, "loss": 0.025, "step": 1593 }, { "epoch": 0.2713653387810691, "grad_norm": 1.7252540588378906, "learning_rate": 1e-06, "loss": 0.0313, "step": 1594 }, { "epoch": 0.27153558052434457, "grad_norm": 1.3951072692871094, "learning_rate": 1e-06, "loss": 0.0341, "step": 1595 }, { "epoch": 0.27170582226762, "grad_norm": 1.615648627281189, "learning_rate": 1e-06, "loss": 0.0302, "step": 1596 }, { "epoch": 0.2718760640108955, "grad_norm": 1.938193678855896, "learning_rate": 1e-06, "loss": 0.0267, "step": 1597 }, { "epoch": 0.27204630575417094, "grad_norm": 1.4162940979003906, "learning_rate": 1e-06, "loss": 0.0267, "step": 1598 }, { "epoch": 0.2722165474974464, "grad_norm": 1.7857416868209839, "learning_rate": 1e-06, "loss": 0.0287, "step": 1599 }, { "epoch": 0.27238678924072185, "grad_norm": 2.2122995853424072, "learning_rate": 1e-06, "loss": 0.0339, "step": 1600 }, { "epoch": 0.27255703098399725, "grad_norm": 1.2226827144622803, "learning_rate": 1e-06, "loss": 0.0224, "step": 1601 }, { "epoch": 0.2727272727272727, "grad_norm": 1.366214394569397, "learning_rate": 1e-06, "loss": 0.0268, "step": 1602 }, { "epoch": 0.27289751447054816, "grad_norm": 1.472756266593933, "learning_rate": 1e-06, "loss": 0.0251, "step": 1603 }, { "epoch": 0.2730677562138236, "grad_norm": 1.4528722763061523, "learning_rate": 1e-06, "loss": 0.0245, "step": 1604 }, { "epoch": 0.2732379979570991, "grad_norm": 1.2808316946029663, "learning_rate": 1e-06, "loss": 0.0231, "step": 1605 }, { "epoch": 0.27340823970037453, "grad_norm": 1.5016905069351196, "learning_rate": 1e-06, "loss": 0.0338, "step": 1606 }, { "epoch": 0.27357848144365, "grad_norm": 1.743894100189209, "learning_rate": 1e-06, "loss": 0.029, "step": 1607 }, { "epoch": 0.27374872318692545, "grad_norm": 1.6557116508483887, "learning_rate": 1e-06, "loss": 0.0288, "step": 1608 }, { "epoch": 0.2739189649302009, "grad_norm": 1.2677494287490845, "learning_rate": 1e-06, "loss": 0.0229, "step": 1609 }, { "epoch": 0.27408920667347636, "grad_norm": 1.6573220491409302, "learning_rate": 1e-06, "loss": 0.0307, "step": 1610 }, { "epoch": 0.27425944841675176, "grad_norm": 1.8028841018676758, "learning_rate": 1e-06, "loss": 0.0289, "step": 1611 }, { "epoch": 0.2744296901600272, "grad_norm": 1.5891400575637817, "learning_rate": 1e-06, "loss": 0.0344, "step": 1612 }, { "epoch": 0.2745999319033027, "grad_norm": 1.492483377456665, "learning_rate": 1e-06, "loss": 0.0214, "step": 1613 }, { "epoch": 0.27477017364657813, "grad_norm": 1.5204499959945679, "learning_rate": 1e-06, "loss": 0.0257, "step": 1614 }, { "epoch": 0.2749404153898536, "grad_norm": 2.0463969707489014, "learning_rate": 1e-06, "loss": 0.0287, "step": 1615 }, { "epoch": 0.27511065713312904, "grad_norm": 1.5479378700256348, "learning_rate": 1e-06, "loss": 0.0291, "step": 1616 }, { "epoch": 0.2752808988764045, "grad_norm": 2.649272918701172, "learning_rate": 1e-06, "loss": 0.0455, "step": 1617 }, { "epoch": 0.27545114061967996, "grad_norm": 1.8164010047912598, "learning_rate": 1e-06, "loss": 0.0291, "step": 1618 }, { "epoch": 0.2756213823629554, "grad_norm": 1.2867664098739624, "learning_rate": 1e-06, "loss": 0.0197, "step": 1619 }, { "epoch": 0.27579162410623087, "grad_norm": 1.3096405267715454, "learning_rate": 1e-06, "loss": 0.0262, "step": 1620 }, { "epoch": 0.27596186584950627, "grad_norm": 1.3673827648162842, "learning_rate": 1e-06, "loss": 0.0177, "step": 1621 }, { "epoch": 0.27613210759278173, "grad_norm": 1.6427737474441528, "learning_rate": 1e-06, "loss": 0.0302, "step": 1622 }, { "epoch": 0.2763023493360572, "grad_norm": 1.4813086986541748, "learning_rate": 1e-06, "loss": 0.0307, "step": 1623 }, { "epoch": 0.27647259107933264, "grad_norm": 1.318463921546936, "learning_rate": 1e-06, "loss": 0.0221, "step": 1624 }, { "epoch": 0.2766428328226081, "grad_norm": 1.6629798412322998, "learning_rate": 1e-06, "loss": 0.0289, "step": 1625 }, { "epoch": 0.27681307456588355, "grad_norm": 1.4706048965454102, "learning_rate": 1e-06, "loss": 0.0241, "step": 1626 }, { "epoch": 0.276983316309159, "grad_norm": 1.6562780141830444, "learning_rate": 1e-06, "loss": 0.027, "step": 1627 }, { "epoch": 0.27715355805243447, "grad_norm": 1.7971525192260742, "learning_rate": 1e-06, "loss": 0.0322, "step": 1628 }, { "epoch": 0.2773237997957099, "grad_norm": 1.9127774238586426, "learning_rate": 1e-06, "loss": 0.0382, "step": 1629 }, { "epoch": 0.2774940415389854, "grad_norm": 1.661810278892517, "learning_rate": 1e-06, "loss": 0.0334, "step": 1630 }, { "epoch": 0.27766428328226084, "grad_norm": 2.6759703159332275, "learning_rate": 1e-06, "loss": 0.0297, "step": 1631 }, { "epoch": 0.27783452502553624, "grad_norm": 1.6530686616897583, "learning_rate": 1e-06, "loss": 0.0353, "step": 1632 }, { "epoch": 0.2780047667688117, "grad_norm": 1.8694590330123901, "learning_rate": 1e-06, "loss": 0.0461, "step": 1633 }, { "epoch": 0.27817500851208715, "grad_norm": 1.3104816675186157, "learning_rate": 1e-06, "loss": 0.0233, "step": 1634 }, { "epoch": 0.2783452502553626, "grad_norm": 1.3213906288146973, "learning_rate": 1e-06, "loss": 0.0212, "step": 1635 }, { "epoch": 0.27851549199863807, "grad_norm": 1.9300750494003296, "learning_rate": 1e-06, "loss": 0.0359, "step": 1636 }, { "epoch": 0.2786857337419135, "grad_norm": 1.7006559371948242, "learning_rate": 1e-06, "loss": 0.0276, "step": 1637 }, { "epoch": 0.278855975485189, "grad_norm": 1.3280925750732422, "learning_rate": 1e-06, "loss": 0.0228, "step": 1638 }, { "epoch": 0.27902621722846443, "grad_norm": 1.5057625770568848, "learning_rate": 1e-06, "loss": 0.0228, "step": 1639 }, { "epoch": 0.2791964589717399, "grad_norm": 1.3928744792938232, "learning_rate": 1e-06, "loss": 0.0194, "step": 1640 }, { "epoch": 0.27936670071501535, "grad_norm": 1.3840206861495972, "learning_rate": 1e-06, "loss": 0.0209, "step": 1641 }, { "epoch": 0.27953694245829075, "grad_norm": 1.7165454626083374, "learning_rate": 1e-06, "loss": 0.0301, "step": 1642 }, { "epoch": 0.2797071842015662, "grad_norm": 2.1626944541931152, "learning_rate": 1e-06, "loss": 0.0354, "step": 1643 }, { "epoch": 0.27987742594484166, "grad_norm": 1.480241060256958, "learning_rate": 1e-06, "loss": 0.0251, "step": 1644 }, { "epoch": 0.2800476676881171, "grad_norm": 1.88970947265625, "learning_rate": 1e-06, "loss": 0.0319, "step": 1645 }, { "epoch": 0.2802179094313926, "grad_norm": 1.82145357131958, "learning_rate": 1e-06, "loss": 0.033, "step": 1646 }, { "epoch": 0.28038815117466803, "grad_norm": 1.4362519979476929, "learning_rate": 1e-06, "loss": 0.0279, "step": 1647 }, { "epoch": 0.2805583929179435, "grad_norm": 1.5925521850585938, "learning_rate": 1e-06, "loss": 0.0274, "step": 1648 }, { "epoch": 0.28072863466121895, "grad_norm": 1.46236252784729, "learning_rate": 1e-06, "loss": 0.0245, "step": 1649 }, { "epoch": 0.2808988764044944, "grad_norm": 2.9916434288024902, "learning_rate": 1e-06, "loss": 0.0428, "step": 1650 }, { "epoch": 0.28106911814776986, "grad_norm": 1.4459913969039917, "learning_rate": 1e-06, "loss": 0.0264, "step": 1651 }, { "epoch": 0.28123935989104526, "grad_norm": 1.4686808586120605, "learning_rate": 1e-06, "loss": 0.0319, "step": 1652 }, { "epoch": 0.2814096016343207, "grad_norm": 1.5071327686309814, "learning_rate": 1e-06, "loss": 0.0234, "step": 1653 }, { "epoch": 0.2815798433775962, "grad_norm": 1.4609589576721191, "learning_rate": 1e-06, "loss": 0.0255, "step": 1654 }, { "epoch": 0.28175008512087163, "grad_norm": 1.7668788433074951, "learning_rate": 1e-06, "loss": 0.0235, "step": 1655 }, { "epoch": 0.2819203268641471, "grad_norm": 1.9806582927703857, "learning_rate": 1e-06, "loss": 0.0307, "step": 1656 }, { "epoch": 0.28209056860742254, "grad_norm": 1.548424243927002, "learning_rate": 1e-06, "loss": 0.0177, "step": 1657 }, { "epoch": 0.282260810350698, "grad_norm": 1.318804144859314, "learning_rate": 1e-06, "loss": 0.0148, "step": 1658 }, { "epoch": 0.28243105209397346, "grad_norm": 1.7616188526153564, "learning_rate": 1e-06, "loss": 0.0308, "step": 1659 }, { "epoch": 0.2826012938372489, "grad_norm": 1.4536751508712769, "learning_rate": 1e-06, "loss": 0.0226, "step": 1660 }, { "epoch": 0.28277153558052437, "grad_norm": 1.8000961542129517, "learning_rate": 1e-06, "loss": 0.0287, "step": 1661 }, { "epoch": 0.28294177732379977, "grad_norm": 1.4188201427459717, "learning_rate": 1e-06, "loss": 0.0202, "step": 1662 }, { "epoch": 0.2831120190670752, "grad_norm": 1.6086786985397339, "learning_rate": 1e-06, "loss": 0.0248, "step": 1663 }, { "epoch": 0.2832822608103507, "grad_norm": 1.3086551427841187, "learning_rate": 1e-06, "loss": 0.0232, "step": 1664 }, { "epoch": 0.28345250255362614, "grad_norm": 1.5026829242706299, "learning_rate": 1e-06, "loss": 0.0203, "step": 1665 }, { "epoch": 0.2836227442969016, "grad_norm": 1.9546946287155151, "learning_rate": 1e-06, "loss": 0.0257, "step": 1666 }, { "epoch": 0.28379298604017705, "grad_norm": 1.1366575956344604, "learning_rate": 1e-06, "loss": 0.0228, "step": 1667 }, { "epoch": 0.2839632277834525, "grad_norm": 1.8793129920959473, "learning_rate": 1e-06, "loss": 0.0443, "step": 1668 }, { "epoch": 0.28413346952672797, "grad_norm": 1.4484946727752686, "learning_rate": 1e-06, "loss": 0.0312, "step": 1669 }, { "epoch": 0.2843037112700034, "grad_norm": 1.7814072370529175, "learning_rate": 1e-06, "loss": 0.0346, "step": 1670 }, { "epoch": 0.2844739530132789, "grad_norm": 1.5171910524368286, "learning_rate": 1e-06, "loss": 0.0231, "step": 1671 }, { "epoch": 0.2846441947565543, "grad_norm": 1.7551907300949097, "learning_rate": 1e-06, "loss": 0.0196, "step": 1672 }, { "epoch": 0.28481443649982974, "grad_norm": 1.8677160739898682, "learning_rate": 1e-06, "loss": 0.0276, "step": 1673 }, { "epoch": 0.2849846782431052, "grad_norm": 1.342197299003601, "learning_rate": 1e-06, "loss": 0.0183, "step": 1674 }, { "epoch": 0.28515491998638065, "grad_norm": 1.656657338142395, "learning_rate": 1e-06, "loss": 0.0262, "step": 1675 }, { "epoch": 0.2853251617296561, "grad_norm": 1.6236470937728882, "learning_rate": 1e-06, "loss": 0.0234, "step": 1676 }, { "epoch": 0.28549540347293156, "grad_norm": 1.7253212928771973, "learning_rate": 1e-06, "loss": 0.0274, "step": 1677 }, { "epoch": 0.285665645216207, "grad_norm": 1.9276634454727173, "learning_rate": 1e-06, "loss": 0.0295, "step": 1678 }, { "epoch": 0.2858358869594825, "grad_norm": 1.4296467304229736, "learning_rate": 1e-06, "loss": 0.0208, "step": 1679 }, { "epoch": 0.28600612870275793, "grad_norm": 2.423842430114746, "learning_rate": 1e-06, "loss": 0.0329, "step": 1680 }, { "epoch": 0.2861763704460334, "grad_norm": 1.48721444606781, "learning_rate": 1e-06, "loss": 0.0237, "step": 1681 }, { "epoch": 0.2863466121893088, "grad_norm": 1.7897123098373413, "learning_rate": 1e-06, "loss": 0.0281, "step": 1682 }, { "epoch": 0.28651685393258425, "grad_norm": 1.6738888025283813, "learning_rate": 1e-06, "loss": 0.0332, "step": 1683 }, { "epoch": 0.2866870956758597, "grad_norm": 1.615144968032837, "learning_rate": 1e-06, "loss": 0.025, "step": 1684 }, { "epoch": 0.28685733741913516, "grad_norm": 1.4232816696166992, "learning_rate": 1e-06, "loss": 0.0209, "step": 1685 }, { "epoch": 0.2870275791624106, "grad_norm": 1.6573175191879272, "learning_rate": 1e-06, "loss": 0.0269, "step": 1686 }, { "epoch": 0.2871978209056861, "grad_norm": 1.7181310653686523, "learning_rate": 1e-06, "loss": 0.0267, "step": 1687 }, { "epoch": 0.28736806264896153, "grad_norm": 1.8144760131835938, "learning_rate": 1e-06, "loss": 0.0373, "step": 1688 }, { "epoch": 0.287538304392237, "grad_norm": 1.360830545425415, "learning_rate": 1e-06, "loss": 0.0241, "step": 1689 }, { "epoch": 0.28770854613551244, "grad_norm": 1.4611048698425293, "learning_rate": 1e-06, "loss": 0.0262, "step": 1690 }, { "epoch": 0.2878787878787879, "grad_norm": 1.525647759437561, "learning_rate": 1e-06, "loss": 0.0283, "step": 1691 }, { "epoch": 0.28804902962206336, "grad_norm": 1.4439351558685303, "learning_rate": 1e-06, "loss": 0.0274, "step": 1692 }, { "epoch": 0.28821927136533876, "grad_norm": 1.6593124866485596, "learning_rate": 1e-06, "loss": 0.0256, "step": 1693 }, { "epoch": 0.2883895131086142, "grad_norm": 1.6211251020431519, "learning_rate": 1e-06, "loss": 0.0388, "step": 1694 }, { "epoch": 0.28855975485188967, "grad_norm": 1.6730520725250244, "learning_rate": 1e-06, "loss": 0.0229, "step": 1695 }, { "epoch": 0.28872999659516513, "grad_norm": 1.850048542022705, "learning_rate": 1e-06, "loss": 0.0202, "step": 1696 }, { "epoch": 0.2889002383384406, "grad_norm": 1.4730114936828613, "learning_rate": 1e-06, "loss": 0.0235, "step": 1697 }, { "epoch": 0.28907048008171604, "grad_norm": 1.8073022365570068, "learning_rate": 1e-06, "loss": 0.04, "step": 1698 }, { "epoch": 0.2892407218249915, "grad_norm": 1.8779622316360474, "learning_rate": 1e-06, "loss": 0.0437, "step": 1699 }, { "epoch": 0.28941096356826695, "grad_norm": 1.5742496252059937, "learning_rate": 1e-06, "loss": 0.0293, "step": 1700 }, { "epoch": 0.2895812053115424, "grad_norm": 1.4416614770889282, "learning_rate": 1e-06, "loss": 0.023, "step": 1701 }, { "epoch": 0.28975144705481787, "grad_norm": 1.4189447164535522, "learning_rate": 1e-06, "loss": 0.0281, "step": 1702 }, { "epoch": 0.28992168879809327, "grad_norm": 1.4489812850952148, "learning_rate": 1e-06, "loss": 0.0269, "step": 1703 }, { "epoch": 0.2900919305413687, "grad_norm": 1.590919852256775, "learning_rate": 1e-06, "loss": 0.0261, "step": 1704 }, { "epoch": 0.2902621722846442, "grad_norm": 1.528601884841919, "learning_rate": 1e-06, "loss": 0.0246, "step": 1705 }, { "epoch": 0.29043241402791964, "grad_norm": 1.3978060483932495, "learning_rate": 1e-06, "loss": 0.0202, "step": 1706 }, { "epoch": 0.2906026557711951, "grad_norm": 1.4895422458648682, "learning_rate": 1e-06, "loss": 0.0263, "step": 1707 }, { "epoch": 0.29077289751447055, "grad_norm": 1.5982104539871216, "learning_rate": 1e-06, "loss": 0.0143, "step": 1708 }, { "epoch": 0.290943139257746, "grad_norm": 1.5358942747116089, "learning_rate": 1e-06, "loss": 0.0223, "step": 1709 }, { "epoch": 0.29111338100102147, "grad_norm": 1.491877794265747, "learning_rate": 1e-06, "loss": 0.0252, "step": 1710 }, { "epoch": 0.2912836227442969, "grad_norm": 1.6114052534103394, "learning_rate": 1e-06, "loss": 0.0226, "step": 1711 }, { "epoch": 0.2914538644875724, "grad_norm": 1.6193110942840576, "learning_rate": 1e-06, "loss": 0.0221, "step": 1712 }, { "epoch": 0.2916241062308478, "grad_norm": 1.40981924533844, "learning_rate": 1e-06, "loss": 0.0209, "step": 1713 }, { "epoch": 0.29179434797412324, "grad_norm": 2.308316469192505, "learning_rate": 1e-06, "loss": 0.0314, "step": 1714 }, { "epoch": 0.2919645897173987, "grad_norm": 1.631408929824829, "learning_rate": 1e-06, "loss": 0.0307, "step": 1715 }, { "epoch": 0.29213483146067415, "grad_norm": 1.4540927410125732, "learning_rate": 1e-06, "loss": 0.0258, "step": 1716 }, { "epoch": 0.2923050732039496, "grad_norm": 1.7663581371307373, "learning_rate": 1e-06, "loss": 0.0332, "step": 1717 }, { "epoch": 0.29247531494722506, "grad_norm": 1.2516728639602661, "learning_rate": 1e-06, "loss": 0.021, "step": 1718 }, { "epoch": 0.2926455566905005, "grad_norm": 1.9072999954223633, "learning_rate": 1e-06, "loss": 0.0308, "step": 1719 }, { "epoch": 0.292815798433776, "grad_norm": 1.8111470937728882, "learning_rate": 1e-06, "loss": 0.0261, "step": 1720 }, { "epoch": 0.29298604017705143, "grad_norm": 1.5507558584213257, "learning_rate": 1e-06, "loss": 0.022, "step": 1721 }, { "epoch": 0.2931562819203269, "grad_norm": 1.2865614891052246, "learning_rate": 1e-06, "loss": 0.0196, "step": 1722 }, { "epoch": 0.2933265236636023, "grad_norm": 1.6672788858413696, "learning_rate": 1e-06, "loss": 0.0191, "step": 1723 }, { "epoch": 0.29349676540687775, "grad_norm": 1.670101284980774, "learning_rate": 1e-06, "loss": 0.03, "step": 1724 }, { "epoch": 0.2936670071501532, "grad_norm": 1.2135088443756104, "learning_rate": 1e-06, "loss": 0.0219, "step": 1725 }, { "epoch": 0.29383724889342866, "grad_norm": 1.2077770233154297, "learning_rate": 1e-06, "loss": 0.0179, "step": 1726 }, { "epoch": 0.2940074906367041, "grad_norm": 1.3592512607574463, "learning_rate": 1e-06, "loss": 0.0235, "step": 1727 }, { "epoch": 0.2941777323799796, "grad_norm": 1.661252737045288, "learning_rate": 1e-06, "loss": 0.0247, "step": 1728 }, { "epoch": 0.29434797412325503, "grad_norm": 1.8278785943984985, "learning_rate": 1e-06, "loss": 0.0403, "step": 1729 }, { "epoch": 0.2945182158665305, "grad_norm": 1.3359884023666382, "learning_rate": 1e-06, "loss": 0.0206, "step": 1730 }, { "epoch": 0.29468845760980594, "grad_norm": 1.8488993644714355, "learning_rate": 1e-06, "loss": 0.0247, "step": 1731 }, { "epoch": 0.2948586993530814, "grad_norm": 1.6495951414108276, "learning_rate": 1e-06, "loss": 0.0268, "step": 1732 }, { "epoch": 0.2950289410963568, "grad_norm": 2.0835161209106445, "learning_rate": 1e-06, "loss": 0.0373, "step": 1733 }, { "epoch": 0.29519918283963226, "grad_norm": 1.2408726215362549, "learning_rate": 1e-06, "loss": 0.0212, "step": 1734 }, { "epoch": 0.2953694245829077, "grad_norm": 1.3629958629608154, "learning_rate": 1e-06, "loss": 0.0263, "step": 1735 }, { "epoch": 0.29553966632618317, "grad_norm": 1.1778775453567505, "learning_rate": 1e-06, "loss": 0.0185, "step": 1736 }, { "epoch": 0.2957099080694586, "grad_norm": 1.1877045631408691, "learning_rate": 1e-06, "loss": 0.0216, "step": 1737 }, { "epoch": 0.2958801498127341, "grad_norm": 1.8210134506225586, "learning_rate": 1e-06, "loss": 0.0264, "step": 1738 }, { "epoch": 0.29605039155600954, "grad_norm": 1.5456937551498413, "learning_rate": 1e-06, "loss": 0.0188, "step": 1739 }, { "epoch": 0.296220633299285, "grad_norm": 1.523476004600525, "learning_rate": 1e-06, "loss": 0.0195, "step": 1740 }, { "epoch": 0.29639087504256045, "grad_norm": 1.5979241132736206, "learning_rate": 1e-06, "loss": 0.0226, "step": 1741 }, { "epoch": 0.2965611167858359, "grad_norm": 1.4582843780517578, "learning_rate": 1e-06, "loss": 0.0227, "step": 1742 }, { "epoch": 0.2967313585291113, "grad_norm": 1.4900776147842407, "learning_rate": 1e-06, "loss": 0.0283, "step": 1743 }, { "epoch": 0.29690160027238677, "grad_norm": 1.4199949502944946, "learning_rate": 1e-06, "loss": 0.031, "step": 1744 }, { "epoch": 0.2970718420156622, "grad_norm": 1.9383891820907593, "learning_rate": 1e-06, "loss": 0.0351, "step": 1745 }, { "epoch": 0.2972420837589377, "grad_norm": 1.6513484716415405, "learning_rate": 1e-06, "loss": 0.0283, "step": 1746 }, { "epoch": 0.29741232550221314, "grad_norm": 1.4408584833145142, "learning_rate": 1e-06, "loss": 0.0253, "step": 1747 }, { "epoch": 0.2975825672454886, "grad_norm": 1.4601348638534546, "learning_rate": 1e-06, "loss": 0.0246, "step": 1748 }, { "epoch": 0.29775280898876405, "grad_norm": 2.014059066772461, "learning_rate": 1e-06, "loss": 0.0239, "step": 1749 }, { "epoch": 0.2979230507320395, "grad_norm": 1.4093255996704102, "learning_rate": 1e-06, "loss": 0.0242, "step": 1750 }, { "epoch": 0.29809329247531496, "grad_norm": 1.8597747087478638, "learning_rate": 1e-06, "loss": 0.0305, "step": 1751 }, { "epoch": 0.2982635342185904, "grad_norm": 1.4685695171356201, "learning_rate": 1e-06, "loss": 0.0226, "step": 1752 }, { "epoch": 0.2984337759618659, "grad_norm": 1.4107317924499512, "learning_rate": 1e-06, "loss": 0.0278, "step": 1753 }, { "epoch": 0.2986040177051413, "grad_norm": 1.1390619277954102, "learning_rate": 1e-06, "loss": 0.0272, "step": 1754 }, { "epoch": 0.29877425944841673, "grad_norm": 1.5630130767822266, "learning_rate": 1e-06, "loss": 0.03, "step": 1755 }, { "epoch": 0.2989445011916922, "grad_norm": 1.6469208002090454, "learning_rate": 1e-06, "loss": 0.02, "step": 1756 }, { "epoch": 0.29911474293496765, "grad_norm": 1.4508461952209473, "learning_rate": 1e-06, "loss": 0.028, "step": 1757 }, { "epoch": 0.2992849846782431, "grad_norm": 1.506699800491333, "learning_rate": 1e-06, "loss": 0.0292, "step": 1758 }, { "epoch": 0.29945522642151856, "grad_norm": 1.0725475549697876, "learning_rate": 1e-06, "loss": 0.0173, "step": 1759 }, { "epoch": 0.299625468164794, "grad_norm": 1.733446717262268, "learning_rate": 1e-06, "loss": 0.0375, "step": 1760 }, { "epoch": 0.2997957099080695, "grad_norm": 1.4227663278579712, "learning_rate": 1e-06, "loss": 0.0203, "step": 1761 }, { "epoch": 0.29996595165134493, "grad_norm": 1.4566940069198608, "learning_rate": 1e-06, "loss": 0.0264, "step": 1762 }, { "epoch": 0.3001361933946204, "grad_norm": 1.6807788610458374, "learning_rate": 1e-06, "loss": 0.0281, "step": 1763 }, { "epoch": 0.3003064351378958, "grad_norm": 1.8362802267074585, "learning_rate": 1e-06, "loss": 0.0251, "step": 1764 }, { "epoch": 0.30047667688117125, "grad_norm": 1.8242303133010864, "learning_rate": 1e-06, "loss": 0.0274, "step": 1765 }, { "epoch": 0.3006469186244467, "grad_norm": 1.639448881149292, "learning_rate": 1e-06, "loss": 0.0282, "step": 1766 }, { "epoch": 0.30081716036772216, "grad_norm": 1.406978726387024, "learning_rate": 1e-06, "loss": 0.0238, "step": 1767 }, { "epoch": 0.3009874021109976, "grad_norm": 1.5948950052261353, "learning_rate": 1e-06, "loss": 0.0266, "step": 1768 }, { "epoch": 0.30115764385427307, "grad_norm": 1.1911594867706299, "learning_rate": 1e-06, "loss": 0.016, "step": 1769 }, { "epoch": 0.30132788559754853, "grad_norm": 1.3455018997192383, "learning_rate": 1e-06, "loss": 0.0196, "step": 1770 }, { "epoch": 0.301498127340824, "grad_norm": 1.5457820892333984, "learning_rate": 1e-06, "loss": 0.021, "step": 1771 }, { "epoch": 0.30166836908409944, "grad_norm": 1.7167928218841553, "learning_rate": 1e-06, "loss": 0.0282, "step": 1772 }, { "epoch": 0.3018386108273749, "grad_norm": 1.1246789693832397, "learning_rate": 1e-06, "loss": 0.0225, "step": 1773 }, { "epoch": 0.3020088525706503, "grad_norm": 1.7185958623886108, "learning_rate": 1e-06, "loss": 0.0319, "step": 1774 }, { "epoch": 0.30217909431392576, "grad_norm": 1.2617838382720947, "learning_rate": 1e-06, "loss": 0.0219, "step": 1775 }, { "epoch": 0.3023493360572012, "grad_norm": 1.5254855155944824, "learning_rate": 1e-06, "loss": 0.0202, "step": 1776 }, { "epoch": 0.30251957780047667, "grad_norm": 1.1870532035827637, "learning_rate": 1e-06, "loss": 0.0131, "step": 1777 }, { "epoch": 0.3026898195437521, "grad_norm": 1.7242926359176636, "learning_rate": 1e-06, "loss": 0.0253, "step": 1778 }, { "epoch": 0.3028600612870276, "grad_norm": 1.8268846273422241, "learning_rate": 1e-06, "loss": 0.0249, "step": 1779 }, { "epoch": 0.30303030303030304, "grad_norm": 1.4591509103775024, "learning_rate": 1e-06, "loss": 0.0193, "step": 1780 }, { "epoch": 0.3032005447735785, "grad_norm": 1.7261550426483154, "learning_rate": 1e-06, "loss": 0.0299, "step": 1781 }, { "epoch": 0.30337078651685395, "grad_norm": 1.9499040842056274, "learning_rate": 1e-06, "loss": 0.0374, "step": 1782 }, { "epoch": 0.3035410282601294, "grad_norm": 1.2761621475219727, "learning_rate": 1e-06, "loss": 0.0215, "step": 1783 }, { "epoch": 0.3037112700034048, "grad_norm": 1.402917742729187, "learning_rate": 1e-06, "loss": 0.0223, "step": 1784 }, { "epoch": 0.30388151174668027, "grad_norm": 3.2693288326263428, "learning_rate": 1e-06, "loss": 0.0334, "step": 1785 }, { "epoch": 0.3040517534899557, "grad_norm": 1.3207221031188965, "learning_rate": 1e-06, "loss": 0.0174, "step": 1786 }, { "epoch": 0.3042219952332312, "grad_norm": 2.6473004817962646, "learning_rate": 1e-06, "loss": 0.0406, "step": 1787 }, { "epoch": 0.30439223697650664, "grad_norm": 1.6894813776016235, "learning_rate": 1e-06, "loss": 0.0283, "step": 1788 }, { "epoch": 0.3045624787197821, "grad_norm": 1.3546245098114014, "learning_rate": 1e-06, "loss": 0.0238, "step": 1789 }, { "epoch": 0.30473272046305755, "grad_norm": 1.6987773180007935, "learning_rate": 1e-06, "loss": 0.0301, "step": 1790 }, { "epoch": 0.304902962206333, "grad_norm": 1.6079025268554688, "learning_rate": 1e-06, "loss": 0.0222, "step": 1791 }, { "epoch": 0.30507320394960846, "grad_norm": 1.5735406875610352, "learning_rate": 1e-06, "loss": 0.0222, "step": 1792 }, { "epoch": 0.3052434456928839, "grad_norm": 1.862619400024414, "learning_rate": 1e-06, "loss": 0.0432, "step": 1793 }, { "epoch": 0.3054136874361593, "grad_norm": 1.18687903881073, "learning_rate": 1e-06, "loss": 0.0145, "step": 1794 }, { "epoch": 0.3055839291794348, "grad_norm": 1.4821864366531372, "learning_rate": 1e-06, "loss": 0.0234, "step": 1795 }, { "epoch": 0.30575417092271023, "grad_norm": 1.6538077592849731, "learning_rate": 1e-06, "loss": 0.0225, "step": 1796 }, { "epoch": 0.3059244126659857, "grad_norm": 1.546225666999817, "learning_rate": 1e-06, "loss": 0.0261, "step": 1797 }, { "epoch": 0.30609465440926115, "grad_norm": 1.4453809261322021, "learning_rate": 1e-06, "loss": 0.0216, "step": 1798 }, { "epoch": 0.3062648961525366, "grad_norm": 2.1485629081726074, "learning_rate": 1e-06, "loss": 0.0324, "step": 1799 }, { "epoch": 0.30643513789581206, "grad_norm": 1.6263507604599, "learning_rate": 1e-06, "loss": 0.0212, "step": 1800 }, { "epoch": 0.3066053796390875, "grad_norm": 1.6911835670471191, "learning_rate": 1e-06, "loss": 0.0241, "step": 1801 }, { "epoch": 0.306775621382363, "grad_norm": 1.4971544742584229, "learning_rate": 1e-06, "loss": 0.0215, "step": 1802 }, { "epoch": 0.30694586312563843, "grad_norm": 1.3353301286697388, "learning_rate": 1e-06, "loss": 0.0167, "step": 1803 }, { "epoch": 0.30711610486891383, "grad_norm": 1.5353763103485107, "learning_rate": 1e-06, "loss": 0.021, "step": 1804 }, { "epoch": 0.3072863466121893, "grad_norm": 1.2348135709762573, "learning_rate": 1e-06, "loss": 0.0203, "step": 1805 }, { "epoch": 0.30745658835546474, "grad_norm": 1.4471068382263184, "learning_rate": 1e-06, "loss": 0.0204, "step": 1806 }, { "epoch": 0.3076268300987402, "grad_norm": 1.7940800189971924, "learning_rate": 1e-06, "loss": 0.0204, "step": 1807 }, { "epoch": 0.30779707184201566, "grad_norm": 1.4490859508514404, "learning_rate": 1e-06, "loss": 0.0186, "step": 1808 }, { "epoch": 0.3079673135852911, "grad_norm": 1.3849128484725952, "learning_rate": 1e-06, "loss": 0.0229, "step": 1809 }, { "epoch": 0.30813755532856657, "grad_norm": 1.6811866760253906, "learning_rate": 1e-06, "loss": 0.0159, "step": 1810 }, { "epoch": 0.308307797071842, "grad_norm": 1.5585503578186035, "learning_rate": 1e-06, "loss": 0.0178, "step": 1811 }, { "epoch": 0.3084780388151175, "grad_norm": 1.498989462852478, "learning_rate": 1e-06, "loss": 0.0215, "step": 1812 }, { "epoch": 0.30864828055839294, "grad_norm": 1.5518637895584106, "learning_rate": 1e-06, "loss": 0.0145, "step": 1813 }, { "epoch": 0.3088185223016684, "grad_norm": 1.9985408782958984, "learning_rate": 1e-06, "loss": 0.0384, "step": 1814 }, { "epoch": 0.3089887640449438, "grad_norm": 1.9101368188858032, "learning_rate": 1e-06, "loss": 0.022, "step": 1815 }, { "epoch": 0.30915900578821925, "grad_norm": 1.438533902168274, "learning_rate": 1e-06, "loss": 0.0206, "step": 1816 }, { "epoch": 0.3093292475314947, "grad_norm": 1.6751413345336914, "learning_rate": 1e-06, "loss": 0.0245, "step": 1817 }, { "epoch": 0.30949948927477017, "grad_norm": 1.1888127326965332, "learning_rate": 1e-06, "loss": 0.0207, "step": 1818 }, { "epoch": 0.3096697310180456, "grad_norm": 2.00555157661438, "learning_rate": 1e-06, "loss": 0.0188, "step": 1819 }, { "epoch": 0.3098399727613211, "grad_norm": 1.2455949783325195, "learning_rate": 1e-06, "loss": 0.021, "step": 1820 }, { "epoch": 0.31001021450459654, "grad_norm": 1.2500693798065186, "learning_rate": 1e-06, "loss": 0.0188, "step": 1821 }, { "epoch": 0.310180456247872, "grad_norm": 1.3195619583129883, "learning_rate": 1e-06, "loss": 0.022, "step": 1822 }, { "epoch": 0.31035069799114745, "grad_norm": 1.429261326789856, "learning_rate": 1e-06, "loss": 0.0294, "step": 1823 }, { "epoch": 0.3105209397344229, "grad_norm": 1.3425854444503784, "learning_rate": 1e-06, "loss": 0.0209, "step": 1824 }, { "epoch": 0.3106911814776983, "grad_norm": 1.5613363981246948, "learning_rate": 1e-06, "loss": 0.0249, "step": 1825 }, { "epoch": 0.31086142322097376, "grad_norm": 1.9861626625061035, "learning_rate": 1e-06, "loss": 0.0237, "step": 1826 }, { "epoch": 0.3110316649642492, "grad_norm": 1.774733304977417, "learning_rate": 1e-06, "loss": 0.0295, "step": 1827 }, { "epoch": 0.3112019067075247, "grad_norm": 1.4516446590423584, "learning_rate": 1e-06, "loss": 0.0247, "step": 1828 }, { "epoch": 0.31137214845080013, "grad_norm": 1.5618529319763184, "learning_rate": 1e-06, "loss": 0.0227, "step": 1829 }, { "epoch": 0.3115423901940756, "grad_norm": 1.410374641418457, "learning_rate": 1e-06, "loss": 0.019, "step": 1830 }, { "epoch": 0.31171263193735105, "grad_norm": 1.3269094228744507, "learning_rate": 1e-06, "loss": 0.0232, "step": 1831 }, { "epoch": 0.3118828736806265, "grad_norm": 2.6260929107666016, "learning_rate": 1e-06, "loss": 0.0334, "step": 1832 }, { "epoch": 0.31205311542390196, "grad_norm": 1.4052633047103882, "learning_rate": 1e-06, "loss": 0.0147, "step": 1833 }, { "epoch": 0.3122233571671774, "grad_norm": 1.3996262550354004, "learning_rate": 1e-06, "loss": 0.0231, "step": 1834 }, { "epoch": 0.3123935989104528, "grad_norm": 1.7314420938491821, "learning_rate": 1e-06, "loss": 0.0289, "step": 1835 }, { "epoch": 0.3125638406537283, "grad_norm": 1.2537227869033813, "learning_rate": 1e-06, "loss": 0.0209, "step": 1836 }, { "epoch": 0.31273408239700373, "grad_norm": 1.3801499605178833, "learning_rate": 1e-06, "loss": 0.023, "step": 1837 }, { "epoch": 0.3129043241402792, "grad_norm": 1.6672334671020508, "learning_rate": 1e-06, "loss": 0.0261, "step": 1838 }, { "epoch": 0.31307456588355465, "grad_norm": 1.7810288667678833, "learning_rate": 1e-06, "loss": 0.031, "step": 1839 }, { "epoch": 0.3132448076268301, "grad_norm": 1.435265064239502, "learning_rate": 1e-06, "loss": 0.0178, "step": 1840 }, { "epoch": 0.31341504937010556, "grad_norm": 1.537177562713623, "learning_rate": 1e-06, "loss": 0.0229, "step": 1841 }, { "epoch": 0.313585291113381, "grad_norm": 1.7948952913284302, "learning_rate": 1e-06, "loss": 0.0289, "step": 1842 }, { "epoch": 0.31375553285665647, "grad_norm": 1.2194470167160034, "learning_rate": 1e-06, "loss": 0.0172, "step": 1843 }, { "epoch": 0.31392577459993193, "grad_norm": 1.6376681327819824, "learning_rate": 1e-06, "loss": 0.0291, "step": 1844 }, { "epoch": 0.31409601634320733, "grad_norm": 1.389352798461914, "learning_rate": 1e-06, "loss": 0.015, "step": 1845 }, { "epoch": 0.3142662580864828, "grad_norm": 1.5804452896118164, "learning_rate": 1e-06, "loss": 0.0386, "step": 1846 }, { "epoch": 0.31443649982975824, "grad_norm": 1.9733364582061768, "learning_rate": 1e-06, "loss": 0.0261, "step": 1847 }, { "epoch": 0.3146067415730337, "grad_norm": 1.4925074577331543, "learning_rate": 1e-06, "loss": 0.0318, "step": 1848 }, { "epoch": 0.31477698331630916, "grad_norm": 1.4996342658996582, "learning_rate": 1e-06, "loss": 0.0203, "step": 1849 }, { "epoch": 0.3149472250595846, "grad_norm": 1.4963217973709106, "learning_rate": 1e-06, "loss": 0.0268, "step": 1850 }, { "epoch": 0.31511746680286007, "grad_norm": 1.6725646257400513, "learning_rate": 1e-06, "loss": 0.0176, "step": 1851 }, { "epoch": 0.3152877085461355, "grad_norm": 1.3835301399230957, "learning_rate": 1e-06, "loss": 0.0256, "step": 1852 }, { "epoch": 0.315457950289411, "grad_norm": 1.7713634967803955, "learning_rate": 1e-06, "loss": 0.0255, "step": 1853 }, { "epoch": 0.31562819203268644, "grad_norm": 1.1334712505340576, "learning_rate": 1e-06, "loss": 0.0175, "step": 1854 }, { "epoch": 0.31579843377596184, "grad_norm": 1.6713346242904663, "learning_rate": 1e-06, "loss": 0.0192, "step": 1855 }, { "epoch": 0.3159686755192373, "grad_norm": 1.463097095489502, "learning_rate": 1e-06, "loss": 0.0243, "step": 1856 }, { "epoch": 0.31613891726251275, "grad_norm": 1.6081440448760986, "learning_rate": 1e-06, "loss": 0.0262, "step": 1857 }, { "epoch": 0.3163091590057882, "grad_norm": 1.7677350044250488, "learning_rate": 1e-06, "loss": 0.022, "step": 1858 }, { "epoch": 0.31647940074906367, "grad_norm": 2.0143535137176514, "learning_rate": 1e-06, "loss": 0.0252, "step": 1859 }, { "epoch": 0.3166496424923391, "grad_norm": 1.4883443117141724, "learning_rate": 1e-06, "loss": 0.0303, "step": 1860 }, { "epoch": 0.3168198842356146, "grad_norm": 1.5226455926895142, "learning_rate": 1e-06, "loss": 0.0199, "step": 1861 }, { "epoch": 0.31699012597889004, "grad_norm": 1.593501091003418, "learning_rate": 1e-06, "loss": 0.0232, "step": 1862 }, { "epoch": 0.3171603677221655, "grad_norm": 1.3263167142868042, "learning_rate": 1e-06, "loss": 0.024, "step": 1863 }, { "epoch": 0.31733060946544095, "grad_norm": 1.4621020555496216, "learning_rate": 1e-06, "loss": 0.024, "step": 1864 }, { "epoch": 0.31750085120871635, "grad_norm": 1.8614875078201294, "learning_rate": 1e-06, "loss": 0.0263, "step": 1865 }, { "epoch": 0.3176710929519918, "grad_norm": 1.5570732355117798, "learning_rate": 1e-06, "loss": 0.0178, "step": 1866 }, { "epoch": 0.31784133469526726, "grad_norm": 1.5920957326889038, "learning_rate": 1e-06, "loss": 0.0283, "step": 1867 }, { "epoch": 0.3180115764385427, "grad_norm": 1.3949881792068481, "learning_rate": 1e-06, "loss": 0.0262, "step": 1868 }, { "epoch": 0.3181818181818182, "grad_norm": 1.364362359046936, "learning_rate": 1e-06, "loss": 0.0292, "step": 1869 }, { "epoch": 0.31835205992509363, "grad_norm": 1.4598324298858643, "learning_rate": 1e-06, "loss": 0.0244, "step": 1870 }, { "epoch": 0.3185223016683691, "grad_norm": 1.320860505104065, "learning_rate": 1e-06, "loss": 0.0198, "step": 1871 }, { "epoch": 0.31869254341164455, "grad_norm": 1.9381211996078491, "learning_rate": 1e-06, "loss": 0.0263, "step": 1872 }, { "epoch": 0.31886278515492, "grad_norm": 1.4324305057525635, "learning_rate": 1e-06, "loss": 0.029, "step": 1873 }, { "epoch": 0.31903302689819546, "grad_norm": 1.2118055820465088, "learning_rate": 1e-06, "loss": 0.0144, "step": 1874 }, { "epoch": 0.3192032686414709, "grad_norm": 1.200643539428711, "learning_rate": 1e-06, "loss": 0.0174, "step": 1875 }, { "epoch": 0.3193735103847463, "grad_norm": 1.5699355602264404, "learning_rate": 1e-06, "loss": 0.0288, "step": 1876 }, { "epoch": 0.3195437521280218, "grad_norm": 1.7179347276687622, "learning_rate": 1e-06, "loss": 0.0267, "step": 1877 }, { "epoch": 0.31971399387129723, "grad_norm": 1.8065325021743774, "learning_rate": 1e-06, "loss": 0.0256, "step": 1878 }, { "epoch": 0.3198842356145727, "grad_norm": 2.4595632553100586, "learning_rate": 1e-06, "loss": 0.0239, "step": 1879 }, { "epoch": 0.32005447735784814, "grad_norm": 1.140232801437378, "learning_rate": 1e-06, "loss": 0.0174, "step": 1880 }, { "epoch": 0.3202247191011236, "grad_norm": 1.3284320831298828, "learning_rate": 1e-06, "loss": 0.0212, "step": 1881 }, { "epoch": 0.32039496084439906, "grad_norm": 1.7007908821105957, "learning_rate": 1e-06, "loss": 0.0252, "step": 1882 }, { "epoch": 0.3205652025876745, "grad_norm": 1.8130091428756714, "learning_rate": 1e-06, "loss": 0.0322, "step": 1883 }, { "epoch": 0.32073544433094997, "grad_norm": 1.434629201889038, "learning_rate": 1e-06, "loss": 0.0176, "step": 1884 }, { "epoch": 0.3209056860742254, "grad_norm": 1.0741052627563477, "learning_rate": 1e-06, "loss": 0.0174, "step": 1885 }, { "epoch": 0.32107592781750083, "grad_norm": 1.5076146125793457, "learning_rate": 1e-06, "loss": 0.0187, "step": 1886 }, { "epoch": 0.3212461695607763, "grad_norm": 1.420843243598938, "learning_rate": 1e-06, "loss": 0.0247, "step": 1887 }, { "epoch": 0.32141641130405174, "grad_norm": 1.4577027559280396, "learning_rate": 1e-06, "loss": 0.019, "step": 1888 }, { "epoch": 0.3215866530473272, "grad_norm": 1.7815074920654297, "learning_rate": 1e-06, "loss": 0.0256, "step": 1889 }, { "epoch": 0.32175689479060265, "grad_norm": 1.2624595165252686, "learning_rate": 1e-06, "loss": 0.023, "step": 1890 }, { "epoch": 0.3219271365338781, "grad_norm": 1.2014528512954712, "learning_rate": 1e-06, "loss": 0.0191, "step": 1891 }, { "epoch": 0.32209737827715357, "grad_norm": 1.948778510093689, "learning_rate": 1e-06, "loss": 0.0239, "step": 1892 }, { "epoch": 0.322267620020429, "grad_norm": 1.3787339925765991, "learning_rate": 1e-06, "loss": 0.0231, "step": 1893 }, { "epoch": 0.3224378617637045, "grad_norm": 1.5238713026046753, "learning_rate": 1e-06, "loss": 0.0195, "step": 1894 }, { "epoch": 0.32260810350697994, "grad_norm": 1.623833417892456, "learning_rate": 1e-06, "loss": 0.0222, "step": 1895 }, { "epoch": 0.32277834525025534, "grad_norm": 1.7165459394454956, "learning_rate": 1e-06, "loss": 0.0294, "step": 1896 }, { "epoch": 0.3229485869935308, "grad_norm": 1.7165459394454956, "learning_rate": 1e-06, "loss": 0.0261, "step": 1897 }, { "epoch": 0.32311882873680625, "grad_norm": 1.6914831399917603, "learning_rate": 1e-06, "loss": 0.0212, "step": 1898 }, { "epoch": 0.3232890704800817, "grad_norm": 1.4278634786605835, "learning_rate": 1e-06, "loss": 0.0173, "step": 1899 }, { "epoch": 0.32345931222335716, "grad_norm": 1.3899396657943726, "learning_rate": 1e-06, "loss": 0.0216, "step": 1900 }, { "epoch": 0.3236295539666326, "grad_norm": 1.8707932233810425, "learning_rate": 1e-06, "loss": 0.0296, "step": 1901 }, { "epoch": 0.3237997957099081, "grad_norm": 1.5746642351150513, "learning_rate": 1e-06, "loss": 0.0202, "step": 1902 }, { "epoch": 0.32397003745318353, "grad_norm": 1.5746642351150513, "learning_rate": 1e-06, "loss": 0.0459, "step": 1903 }, { "epoch": 0.324140279196459, "grad_norm": 1.444451928138733, "learning_rate": 1e-06, "loss": 0.0239, "step": 1904 }, { "epoch": 0.32431052093973445, "grad_norm": 1.6318027973175049, "learning_rate": 1e-06, "loss": 0.0168, "step": 1905 }, { "epoch": 0.32448076268300985, "grad_norm": 1.5491737127304077, "learning_rate": 1e-06, "loss": 0.0229, "step": 1906 }, { "epoch": 0.3246510044262853, "grad_norm": 1.1921838521957397, "learning_rate": 1e-06, "loss": 0.0217, "step": 1907 }, { "epoch": 0.32482124616956076, "grad_norm": 1.1826146841049194, "learning_rate": 1e-06, "loss": 0.0153, "step": 1908 }, { "epoch": 0.3249914879128362, "grad_norm": 1.4094597101211548, "learning_rate": 1e-06, "loss": 0.019, "step": 1909 }, { "epoch": 0.3251617296561117, "grad_norm": 1.6654062271118164, "learning_rate": 1e-06, "loss": 0.0239, "step": 1910 }, { "epoch": 0.32533197139938713, "grad_norm": 1.497878909111023, "learning_rate": 1e-06, "loss": 0.0234, "step": 1911 }, { "epoch": 0.3255022131426626, "grad_norm": 1.8206701278686523, "learning_rate": 1e-06, "loss": 0.0277, "step": 1912 }, { "epoch": 0.32567245488593805, "grad_norm": 1.2228989601135254, "learning_rate": 1e-06, "loss": 0.017, "step": 1913 }, { "epoch": 0.3258426966292135, "grad_norm": 1.4604421854019165, "learning_rate": 1e-06, "loss": 0.0266, "step": 1914 }, { "epoch": 0.32601293837248896, "grad_norm": 1.377022624015808, "learning_rate": 1e-06, "loss": 0.0206, "step": 1915 }, { "epoch": 0.32618318011576436, "grad_norm": 3.5428032875061035, "learning_rate": 1e-06, "loss": 0.0489, "step": 1916 }, { "epoch": 0.3263534218590398, "grad_norm": 1.4174351692199707, "learning_rate": 1e-06, "loss": 0.0177, "step": 1917 }, { "epoch": 0.3265236636023153, "grad_norm": 1.2907001972198486, "learning_rate": 1e-06, "loss": 0.0171, "step": 1918 }, { "epoch": 0.32669390534559073, "grad_norm": 1.2209053039550781, "learning_rate": 1e-06, "loss": 0.019, "step": 1919 }, { "epoch": 0.3268641470888662, "grad_norm": 1.1538865566253662, "learning_rate": 1e-06, "loss": 0.0144, "step": 1920 }, { "epoch": 0.32703438883214164, "grad_norm": 1.5341880321502686, "learning_rate": 1e-06, "loss": 0.019, "step": 1921 }, { "epoch": 0.3272046305754171, "grad_norm": 1.3872793912887573, "learning_rate": 1e-06, "loss": 0.0181, "step": 1922 }, { "epoch": 0.32737487231869256, "grad_norm": 1.6427528858184814, "learning_rate": 1e-06, "loss": 0.0374, "step": 1923 }, { "epoch": 0.327545114061968, "grad_norm": 1.2254210710525513, "learning_rate": 1e-06, "loss": 0.0155, "step": 1924 }, { "epoch": 0.32771535580524347, "grad_norm": 1.4030553102493286, "learning_rate": 1e-06, "loss": 0.0207, "step": 1925 }, { "epoch": 0.32788559754851887, "grad_norm": 1.9961392879486084, "learning_rate": 1e-06, "loss": 0.0262, "step": 1926 }, { "epoch": 0.3280558392917943, "grad_norm": 1.2701325416564941, "learning_rate": 1e-06, "loss": 0.0194, "step": 1927 }, { "epoch": 0.3282260810350698, "grad_norm": 1.761595606803894, "learning_rate": 1e-06, "loss": 0.0242, "step": 1928 }, { "epoch": 0.32839632277834524, "grad_norm": 1.5316126346588135, "learning_rate": 1e-06, "loss": 0.0224, "step": 1929 }, { "epoch": 0.3285665645216207, "grad_norm": 1.808524489402771, "learning_rate": 1e-06, "loss": 0.0289, "step": 1930 }, { "epoch": 0.32873680626489615, "grad_norm": 1.5288134813308716, "learning_rate": 1e-06, "loss": 0.023, "step": 1931 }, { "epoch": 0.3289070480081716, "grad_norm": 1.089560627937317, "learning_rate": 1e-06, "loss": 0.0148, "step": 1932 }, { "epoch": 0.32907728975144707, "grad_norm": 1.5515779256820679, "learning_rate": 1e-06, "loss": 0.0187, "step": 1933 }, { "epoch": 0.3292475314947225, "grad_norm": 1.5481963157653809, "learning_rate": 1e-06, "loss": 0.0249, "step": 1934 }, { "epoch": 0.329417773237998, "grad_norm": 1.1329618692398071, "learning_rate": 1e-06, "loss": 0.0175, "step": 1935 }, { "epoch": 0.3295880149812734, "grad_norm": 1.2369213104248047, "learning_rate": 1e-06, "loss": 0.0196, "step": 1936 }, { "epoch": 0.32975825672454884, "grad_norm": 1.7791813611984253, "learning_rate": 1e-06, "loss": 0.028, "step": 1937 }, { "epoch": 0.3299284984678243, "grad_norm": 1.6558544635772705, "learning_rate": 1e-06, "loss": 0.0208, "step": 1938 }, { "epoch": 0.33009874021109975, "grad_norm": 1.5044019222259521, "learning_rate": 1e-06, "loss": 0.0194, "step": 1939 }, { "epoch": 0.3302689819543752, "grad_norm": 1.4375008344650269, "learning_rate": 1e-06, "loss": 0.021, "step": 1940 }, { "epoch": 0.33043922369765066, "grad_norm": 1.3709217309951782, "learning_rate": 1e-06, "loss": 0.0155, "step": 1941 }, { "epoch": 0.3306094654409261, "grad_norm": 1.812625765800476, "learning_rate": 1e-06, "loss": 0.0261, "step": 1942 }, { "epoch": 0.3307797071842016, "grad_norm": 1.2849189043045044, "learning_rate": 1e-06, "loss": 0.0187, "step": 1943 }, { "epoch": 0.33094994892747703, "grad_norm": 1.6139867305755615, "learning_rate": 1e-06, "loss": 0.0269, "step": 1944 }, { "epoch": 0.3311201906707525, "grad_norm": 1.5526776313781738, "learning_rate": 1e-06, "loss": 0.0236, "step": 1945 }, { "epoch": 0.33129043241402795, "grad_norm": 1.4651141166687012, "learning_rate": 1e-06, "loss": 0.0247, "step": 1946 }, { "epoch": 0.33146067415730335, "grad_norm": 2.1171090602874756, "learning_rate": 1e-06, "loss": 0.0345, "step": 1947 }, { "epoch": 0.3316309159005788, "grad_norm": 1.532902717590332, "learning_rate": 1e-06, "loss": 0.0234, "step": 1948 }, { "epoch": 0.33180115764385426, "grad_norm": 1.4364941120147705, "learning_rate": 1e-06, "loss": 0.0224, "step": 1949 }, { "epoch": 0.3319713993871297, "grad_norm": 1.2812119722366333, "learning_rate": 1e-06, "loss": 0.0178, "step": 1950 }, { "epoch": 0.3321416411304052, "grad_norm": 1.3794339895248413, "learning_rate": 1e-06, "loss": 0.02, "step": 1951 }, { "epoch": 0.33231188287368063, "grad_norm": 1.5946048498153687, "learning_rate": 1e-06, "loss": 0.0179, "step": 1952 }, { "epoch": 0.3324821246169561, "grad_norm": 2.0443038940429688, "learning_rate": 1e-06, "loss": 0.0215, "step": 1953 }, { "epoch": 0.33265236636023154, "grad_norm": 1.368597149848938, "learning_rate": 1e-06, "loss": 0.0158, "step": 1954 }, { "epoch": 0.332822608103507, "grad_norm": 1.441415786743164, "learning_rate": 1e-06, "loss": 0.0197, "step": 1955 }, { "epoch": 0.33299284984678246, "grad_norm": 1.6092345714569092, "learning_rate": 1e-06, "loss": 0.025, "step": 1956 }, { "epoch": 0.33316309159005786, "grad_norm": 1.4012231826782227, "learning_rate": 1e-06, "loss": 0.0217, "step": 1957 }, { "epoch": 0.3333333333333333, "grad_norm": 1.6141374111175537, "learning_rate": 1e-06, "loss": 0.0241, "step": 1958 }, { "epoch": 0.33350357507660877, "grad_norm": 1.4171791076660156, "learning_rate": 1e-06, "loss": 0.0178, "step": 1959 }, { "epoch": 0.33367381681988423, "grad_norm": 1.7821341753005981, "learning_rate": 1e-06, "loss": 0.0288, "step": 1960 }, { "epoch": 0.3338440585631597, "grad_norm": 1.6299926042556763, "learning_rate": 1e-06, "loss": 0.0269, "step": 1961 }, { "epoch": 0.33401430030643514, "grad_norm": 1.5912959575653076, "learning_rate": 1e-06, "loss": 0.0196, "step": 1962 }, { "epoch": 0.3341845420497106, "grad_norm": 1.282433271408081, "learning_rate": 1e-06, "loss": 0.0169, "step": 1963 }, { "epoch": 0.33435478379298605, "grad_norm": 1.5464577674865723, "learning_rate": 1e-06, "loss": 0.027, "step": 1964 }, { "epoch": 0.3345250255362615, "grad_norm": 1.4779020547866821, "learning_rate": 1e-06, "loss": 0.0301, "step": 1965 }, { "epoch": 0.33469526727953697, "grad_norm": 1.4697922468185425, "learning_rate": 1e-06, "loss": 0.0234, "step": 1966 }, { "epoch": 0.33486550902281237, "grad_norm": 1.5022433996200562, "learning_rate": 1e-06, "loss": 0.0189, "step": 1967 }, { "epoch": 0.3350357507660878, "grad_norm": 1.5426301956176758, "learning_rate": 1e-06, "loss": 0.0221, "step": 1968 }, { "epoch": 0.3352059925093633, "grad_norm": 1.9386368989944458, "learning_rate": 1e-06, "loss": 0.0269, "step": 1969 }, { "epoch": 0.33537623425263874, "grad_norm": 1.8475077152252197, "learning_rate": 1e-06, "loss": 0.0194, "step": 1970 }, { "epoch": 0.3355464759959142, "grad_norm": 1.6919015645980835, "learning_rate": 1e-06, "loss": 0.0259, "step": 1971 }, { "epoch": 0.33571671773918965, "grad_norm": 1.3801902532577515, "learning_rate": 1e-06, "loss": 0.0143, "step": 1972 }, { "epoch": 0.3358869594824651, "grad_norm": 1.8841779232025146, "learning_rate": 1e-06, "loss": 0.0382, "step": 1973 }, { "epoch": 0.33605720122574056, "grad_norm": 1.4700556993484497, "learning_rate": 1e-06, "loss": 0.0222, "step": 1974 }, { "epoch": 0.336227442969016, "grad_norm": 1.4399062395095825, "learning_rate": 1e-06, "loss": 0.0233, "step": 1975 }, { "epoch": 0.3363976847122915, "grad_norm": 1.2615573406219482, "learning_rate": 1e-06, "loss": 0.0149, "step": 1976 }, { "epoch": 0.3365679264555669, "grad_norm": 1.5497113466262817, "learning_rate": 1e-06, "loss": 0.0234, "step": 1977 }, { "epoch": 0.33673816819884234, "grad_norm": 1.5951131582260132, "learning_rate": 1e-06, "loss": 0.0189, "step": 1978 }, { "epoch": 0.3369084099421178, "grad_norm": 1.7845149040222168, "learning_rate": 1e-06, "loss": 0.026, "step": 1979 }, { "epoch": 0.33707865168539325, "grad_norm": 1.608124017715454, "learning_rate": 1e-06, "loss": 0.0247, "step": 1980 }, { "epoch": 0.3372488934286687, "grad_norm": 1.1790499687194824, "learning_rate": 1e-06, "loss": 0.0163, "step": 1981 }, { "epoch": 0.33741913517194416, "grad_norm": 1.192211627960205, "learning_rate": 1e-06, "loss": 0.0215, "step": 1982 }, { "epoch": 0.3375893769152196, "grad_norm": 1.3114814758300781, "learning_rate": 1e-06, "loss": 0.0229, "step": 1983 }, { "epoch": 0.3377596186584951, "grad_norm": 1.1542383432388306, "learning_rate": 1e-06, "loss": 0.0124, "step": 1984 }, { "epoch": 0.33792986040177053, "grad_norm": 1.2791386842727661, "learning_rate": 1e-06, "loss": 0.0155, "step": 1985 }, { "epoch": 0.338100102145046, "grad_norm": 1.5622920989990234, "learning_rate": 1e-06, "loss": 0.0191, "step": 1986 }, { "epoch": 0.3382703438883214, "grad_norm": 1.3029829263687134, "learning_rate": 1e-06, "loss": 0.0188, "step": 1987 }, { "epoch": 0.33844058563159685, "grad_norm": 1.5938515663146973, "learning_rate": 1e-06, "loss": 0.0261, "step": 1988 }, { "epoch": 0.3386108273748723, "grad_norm": 1.4531913995742798, "learning_rate": 1e-06, "loss": 0.0189, "step": 1989 }, { "epoch": 0.33878106911814776, "grad_norm": 1.8602267503738403, "learning_rate": 1e-06, "loss": 0.021, "step": 1990 }, { "epoch": 0.3389513108614232, "grad_norm": 1.388229489326477, "learning_rate": 1e-06, "loss": 0.0118, "step": 1991 }, { "epoch": 0.3391215526046987, "grad_norm": 1.3557769060134888, "learning_rate": 1e-06, "loss": 0.0178, "step": 1992 }, { "epoch": 0.33929179434797413, "grad_norm": 1.5094513893127441, "learning_rate": 1e-06, "loss": 0.0186, "step": 1993 }, { "epoch": 0.3394620360912496, "grad_norm": 1.5209040641784668, "learning_rate": 1e-06, "loss": 0.0234, "step": 1994 }, { "epoch": 0.33963227783452504, "grad_norm": 2.0958139896392822, "learning_rate": 1e-06, "loss": 0.024, "step": 1995 }, { "epoch": 0.3398025195778005, "grad_norm": 1.7948811054229736, "learning_rate": 1e-06, "loss": 0.0195, "step": 1996 }, { "epoch": 0.3399727613210759, "grad_norm": 1.6881929636001587, "learning_rate": 1e-06, "loss": 0.0259, "step": 1997 }, { "epoch": 0.34014300306435136, "grad_norm": 1.1193768978118896, "learning_rate": 1e-06, "loss": 0.0157, "step": 1998 }, { "epoch": 0.3403132448076268, "grad_norm": 2.3566431999206543, "learning_rate": 1e-06, "loss": 0.0536, "step": 1999 }, { "epoch": 0.34048348655090227, "grad_norm": 1.4195446968078613, "learning_rate": 1e-06, "loss": 0.0161, "step": 2000 }, { "epoch": 0.34048348655090227, "eval_loss": 0.27453577518463135, "eval_runtime": 21.0487, "eval_samples_per_second": 14.253, "eval_steps_per_second": 0.38, "step": 2000 }, { "epoch": 0.3406537282941777, "grad_norm": 1.61619234085083, "learning_rate": 1e-06, "loss": 0.0193, "step": 2001 }, { "epoch": 0.3408239700374532, "grad_norm": 1.2892980575561523, "learning_rate": 1e-06, "loss": 0.0173, "step": 2002 }, { "epoch": 0.34099421178072864, "grad_norm": 1.3476999998092651, "learning_rate": 1e-06, "loss": 0.0194, "step": 2003 }, { "epoch": 0.3411644535240041, "grad_norm": 1.6146029233932495, "learning_rate": 1e-06, "loss": 0.0154, "step": 2004 }, { "epoch": 0.34133469526727955, "grad_norm": 1.2237156629562378, "learning_rate": 1e-06, "loss": 0.0159, "step": 2005 }, { "epoch": 0.341504937010555, "grad_norm": 1.6599453687667847, "learning_rate": 1e-06, "loss": 0.0228, "step": 2006 }, { "epoch": 0.34167517875383047, "grad_norm": 4.322335720062256, "learning_rate": 1e-06, "loss": 0.0276, "step": 2007 }, { "epoch": 0.34184542049710587, "grad_norm": 1.4886971712112427, "learning_rate": 1e-06, "loss": 0.0307, "step": 2008 }, { "epoch": 0.3420156622403813, "grad_norm": 1.504891037940979, "learning_rate": 1e-06, "loss": 0.0144, "step": 2009 }, { "epoch": 0.3421859039836568, "grad_norm": 1.3250677585601807, "learning_rate": 1e-06, "loss": 0.0178, "step": 2010 }, { "epoch": 0.34235614572693224, "grad_norm": 1.3704164028167725, "learning_rate": 1e-06, "loss": 0.0175, "step": 2011 }, { "epoch": 0.3425263874702077, "grad_norm": 1.270844578742981, "learning_rate": 1e-06, "loss": 0.0188, "step": 2012 }, { "epoch": 0.34269662921348315, "grad_norm": 1.2841618061065674, "learning_rate": 1e-06, "loss": 0.0162, "step": 2013 }, { "epoch": 0.3428668709567586, "grad_norm": 2.089329242706299, "learning_rate": 1e-06, "loss": 0.0362, "step": 2014 }, { "epoch": 0.34303711270003406, "grad_norm": 2.0143887996673584, "learning_rate": 1e-06, "loss": 0.0259, "step": 2015 }, { "epoch": 0.3432073544433095, "grad_norm": 1.7846940755844116, "learning_rate": 1e-06, "loss": 0.0268, "step": 2016 }, { "epoch": 0.343377596186585, "grad_norm": 1.5742454528808594, "learning_rate": 1e-06, "loss": 0.0262, "step": 2017 }, { "epoch": 0.3435478379298604, "grad_norm": 1.1249366998672485, "learning_rate": 1e-06, "loss": 0.0133, "step": 2018 }, { "epoch": 0.34371807967313583, "grad_norm": 1.3523153066635132, "learning_rate": 1e-06, "loss": 0.0234, "step": 2019 }, { "epoch": 0.3438883214164113, "grad_norm": 1.611651062965393, "learning_rate": 1e-06, "loss": 0.0179, "step": 2020 }, { "epoch": 0.34405856315968675, "grad_norm": 1.476097583770752, "learning_rate": 1e-06, "loss": 0.017, "step": 2021 }, { "epoch": 0.3442288049029622, "grad_norm": 1.2952362298965454, "learning_rate": 1e-06, "loss": 0.0255, "step": 2022 }, { "epoch": 0.34439904664623766, "grad_norm": 1.3099634647369385, "learning_rate": 1e-06, "loss": 0.0153, "step": 2023 }, { "epoch": 0.3445692883895131, "grad_norm": 1.4053715467453003, "learning_rate": 1e-06, "loss": 0.0159, "step": 2024 }, { "epoch": 0.3447395301327886, "grad_norm": 1.7385302782058716, "learning_rate": 1e-06, "loss": 0.022, "step": 2025 }, { "epoch": 0.34490977187606403, "grad_norm": 1.365162968635559, "learning_rate": 1e-06, "loss": 0.0171, "step": 2026 }, { "epoch": 0.3450800136193395, "grad_norm": 1.7785168886184692, "learning_rate": 1e-06, "loss": 0.0202, "step": 2027 }, { "epoch": 0.3452502553626149, "grad_norm": 1.1140302419662476, "learning_rate": 1e-06, "loss": 0.0127, "step": 2028 }, { "epoch": 0.34542049710589035, "grad_norm": 1.29301917552948, "learning_rate": 1e-06, "loss": 0.0212, "step": 2029 }, { "epoch": 0.3455907388491658, "grad_norm": 1.9448950290679932, "learning_rate": 1e-06, "loss": 0.0285, "step": 2030 }, { "epoch": 0.34576098059244126, "grad_norm": 1.5493046045303345, "learning_rate": 1e-06, "loss": 0.0268, "step": 2031 }, { "epoch": 0.3459312223357167, "grad_norm": 1.561806321144104, "learning_rate": 1e-06, "loss": 0.0258, "step": 2032 }, { "epoch": 0.34610146407899217, "grad_norm": 1.5834723711013794, "learning_rate": 1e-06, "loss": 0.0336, "step": 2033 }, { "epoch": 0.34627170582226763, "grad_norm": 1.1366455554962158, "learning_rate": 1e-06, "loss": 0.0182, "step": 2034 }, { "epoch": 0.3464419475655431, "grad_norm": 1.152408242225647, "learning_rate": 1e-06, "loss": 0.0166, "step": 2035 }, { "epoch": 0.34661218930881854, "grad_norm": 1.2690989971160889, "learning_rate": 1e-06, "loss": 0.014, "step": 2036 }, { "epoch": 0.346782431052094, "grad_norm": 1.0005087852478027, "learning_rate": 1e-06, "loss": 0.0135, "step": 2037 }, { "epoch": 0.3469526727953694, "grad_norm": 1.1189073324203491, "learning_rate": 1e-06, "loss": 0.0143, "step": 2038 }, { "epoch": 0.34712291453864486, "grad_norm": 1.6101375818252563, "learning_rate": 1e-06, "loss": 0.024, "step": 2039 }, { "epoch": 0.3472931562819203, "grad_norm": 1.4370726346969604, "learning_rate": 1e-06, "loss": 0.0193, "step": 2040 }, { "epoch": 0.34746339802519577, "grad_norm": 1.6193634271621704, "learning_rate": 1e-06, "loss": 0.0249, "step": 2041 }, { "epoch": 0.3476336397684712, "grad_norm": 1.6027393341064453, "learning_rate": 1e-06, "loss": 0.0178, "step": 2042 }, { "epoch": 0.3478038815117467, "grad_norm": 1.3268249034881592, "learning_rate": 1e-06, "loss": 0.0194, "step": 2043 }, { "epoch": 0.34797412325502214, "grad_norm": 1.6447010040283203, "learning_rate": 1e-06, "loss": 0.016, "step": 2044 }, { "epoch": 0.3481443649982976, "grad_norm": 1.3561208248138428, "learning_rate": 1e-06, "loss": 0.0166, "step": 2045 }, { "epoch": 0.34831460674157305, "grad_norm": 1.494844913482666, "learning_rate": 1e-06, "loss": 0.0288, "step": 2046 }, { "epoch": 0.3484848484848485, "grad_norm": 1.2338416576385498, "learning_rate": 1e-06, "loss": 0.0196, "step": 2047 }, { "epoch": 0.3486550902281239, "grad_norm": 1.3532445430755615, "learning_rate": 1e-06, "loss": 0.0208, "step": 2048 }, { "epoch": 0.34882533197139937, "grad_norm": 1.3149325847625732, "learning_rate": 1e-06, "loss": 0.02, "step": 2049 }, { "epoch": 0.3489955737146748, "grad_norm": 1.2256778478622437, "learning_rate": 1e-06, "loss": 0.0188, "step": 2050 }, { "epoch": 0.3491658154579503, "grad_norm": 1.6831778287887573, "learning_rate": 1e-06, "loss": 0.0164, "step": 2051 }, { "epoch": 0.34933605720122574, "grad_norm": 1.2866861820220947, "learning_rate": 1e-06, "loss": 0.0197, "step": 2052 }, { "epoch": 0.3495062989445012, "grad_norm": 1.3010034561157227, "learning_rate": 1e-06, "loss": 0.0218, "step": 2053 }, { "epoch": 0.34967654068777665, "grad_norm": 1.460037350654602, "learning_rate": 1e-06, "loss": 0.021, "step": 2054 }, { "epoch": 0.3498467824310521, "grad_norm": 1.2930116653442383, "learning_rate": 1e-06, "loss": 0.0189, "step": 2055 }, { "epoch": 0.35001702417432756, "grad_norm": 1.4779536724090576, "learning_rate": 1e-06, "loss": 0.0195, "step": 2056 }, { "epoch": 0.350187265917603, "grad_norm": 1.1668182611465454, "learning_rate": 1e-06, "loss": 0.0168, "step": 2057 }, { "epoch": 0.3503575076608784, "grad_norm": 1.5473500490188599, "learning_rate": 1e-06, "loss": 0.0245, "step": 2058 }, { "epoch": 0.3505277494041539, "grad_norm": 1.5026984214782715, "learning_rate": 1e-06, "loss": 0.0263, "step": 2059 }, { "epoch": 0.35069799114742933, "grad_norm": 1.4979209899902344, "learning_rate": 1e-06, "loss": 0.0192, "step": 2060 }, { "epoch": 0.3508682328907048, "grad_norm": 1.6945948600769043, "learning_rate": 1e-06, "loss": 0.0215, "step": 2061 }, { "epoch": 0.35103847463398025, "grad_norm": 1.4094072580337524, "learning_rate": 1e-06, "loss": 0.0159, "step": 2062 }, { "epoch": 0.3512087163772557, "grad_norm": 1.5919791460037231, "learning_rate": 1e-06, "loss": 0.017, "step": 2063 }, { "epoch": 0.35137895812053116, "grad_norm": 1.1257418394088745, "learning_rate": 1e-06, "loss": 0.0117, "step": 2064 }, { "epoch": 0.3515491998638066, "grad_norm": 1.4687309265136719, "learning_rate": 1e-06, "loss": 0.0161, "step": 2065 }, { "epoch": 0.3517194416070821, "grad_norm": 1.5042623281478882, "learning_rate": 1e-06, "loss": 0.0206, "step": 2066 }, { "epoch": 0.35188968335035753, "grad_norm": 1.5676453113555908, "learning_rate": 1e-06, "loss": 0.0191, "step": 2067 }, { "epoch": 0.352059925093633, "grad_norm": 1.368834137916565, "learning_rate": 1e-06, "loss": 0.0177, "step": 2068 }, { "epoch": 0.3522301668369084, "grad_norm": 1.1905148029327393, "learning_rate": 1e-06, "loss": 0.0159, "step": 2069 }, { "epoch": 0.35240040858018384, "grad_norm": 1.3578230142593384, "learning_rate": 1e-06, "loss": 0.0146, "step": 2070 }, { "epoch": 0.3525706503234593, "grad_norm": 1.6715102195739746, "learning_rate": 1e-06, "loss": 0.0239, "step": 2071 }, { "epoch": 0.35274089206673476, "grad_norm": 1.2917587757110596, "learning_rate": 1e-06, "loss": 0.0231, "step": 2072 }, { "epoch": 0.3529111338100102, "grad_norm": 1.7715479135513306, "learning_rate": 1e-06, "loss": 0.0151, "step": 2073 }, { "epoch": 0.35308137555328567, "grad_norm": 1.1060619354248047, "learning_rate": 1e-06, "loss": 0.0133, "step": 2074 }, { "epoch": 0.3532516172965611, "grad_norm": 2.2120845317840576, "learning_rate": 1e-06, "loss": 0.0233, "step": 2075 }, { "epoch": 0.3534218590398366, "grad_norm": 1.4091529846191406, "learning_rate": 1e-06, "loss": 0.027, "step": 2076 }, { "epoch": 0.35359210078311204, "grad_norm": 1.3464224338531494, "learning_rate": 1e-06, "loss": 0.0183, "step": 2077 }, { "epoch": 0.3537623425263875, "grad_norm": 1.6281346082687378, "learning_rate": 1e-06, "loss": 0.0253, "step": 2078 }, { "epoch": 0.3539325842696629, "grad_norm": 1.2976142168045044, "learning_rate": 1e-06, "loss": 0.0157, "step": 2079 }, { "epoch": 0.35410282601293835, "grad_norm": 1.4444133043289185, "learning_rate": 1e-06, "loss": 0.0186, "step": 2080 }, { "epoch": 0.3542730677562138, "grad_norm": 1.5797218084335327, "learning_rate": 1e-06, "loss": 0.0183, "step": 2081 }, { "epoch": 0.35444330949948927, "grad_norm": 1.4098842144012451, "learning_rate": 1e-06, "loss": 0.0211, "step": 2082 }, { "epoch": 0.3546135512427647, "grad_norm": 1.34474778175354, "learning_rate": 1e-06, "loss": 0.0163, "step": 2083 }, { "epoch": 0.3547837929860402, "grad_norm": 1.0983127355575562, "learning_rate": 1e-06, "loss": 0.0163, "step": 2084 }, { "epoch": 0.35495403472931564, "grad_norm": 1.8528269529342651, "learning_rate": 1e-06, "loss": 0.0211, "step": 2085 }, { "epoch": 0.3551242764725911, "grad_norm": 1.4813817739486694, "learning_rate": 1e-06, "loss": 0.0148, "step": 2086 }, { "epoch": 0.35529451821586655, "grad_norm": 1.7108453512191772, "learning_rate": 1e-06, "loss": 0.0221, "step": 2087 }, { "epoch": 0.355464759959142, "grad_norm": 1.5667725801467896, "learning_rate": 1e-06, "loss": 0.0196, "step": 2088 }, { "epoch": 0.3556350017024174, "grad_norm": 1.5677913427352905, "learning_rate": 1e-06, "loss": 0.0221, "step": 2089 }, { "epoch": 0.35580524344569286, "grad_norm": 0.9959020614624023, "learning_rate": 1e-06, "loss": 0.0119, "step": 2090 }, { "epoch": 0.3559754851889683, "grad_norm": 1.4427545070648193, "learning_rate": 1e-06, "loss": 0.018, "step": 2091 }, { "epoch": 0.3561457269322438, "grad_norm": 1.162044644355774, "learning_rate": 1e-06, "loss": 0.0193, "step": 2092 }, { "epoch": 0.35631596867551923, "grad_norm": 1.3956273794174194, "learning_rate": 1e-06, "loss": 0.0219, "step": 2093 }, { "epoch": 0.3564862104187947, "grad_norm": 1.4350906610488892, "learning_rate": 1e-06, "loss": 0.0204, "step": 2094 }, { "epoch": 0.35665645216207015, "grad_norm": 1.2975064516067505, "learning_rate": 1e-06, "loss": 0.0147, "step": 2095 }, { "epoch": 0.3568266939053456, "grad_norm": 2.6082117557525635, "learning_rate": 1e-06, "loss": 0.0259, "step": 2096 }, { "epoch": 0.35699693564862106, "grad_norm": 1.2373225688934326, "learning_rate": 1e-06, "loss": 0.0158, "step": 2097 }, { "epoch": 0.3571671773918965, "grad_norm": 1.4951549768447876, "learning_rate": 1e-06, "loss": 0.0154, "step": 2098 }, { "epoch": 0.3573374191351719, "grad_norm": 1.339722752571106, "learning_rate": 1e-06, "loss": 0.012, "step": 2099 }, { "epoch": 0.3575076608784474, "grad_norm": 1.1775256395339966, "learning_rate": 1e-06, "loss": 0.0141, "step": 2100 }, { "epoch": 0.35767790262172283, "grad_norm": 1.7469888925552368, "learning_rate": 1e-06, "loss": 0.0147, "step": 2101 }, { "epoch": 0.3578481443649983, "grad_norm": 1.4239497184753418, "learning_rate": 1e-06, "loss": 0.0139, "step": 2102 }, { "epoch": 0.35801838610827375, "grad_norm": 1.3292256593704224, "learning_rate": 1e-06, "loss": 0.0162, "step": 2103 }, { "epoch": 0.3581886278515492, "grad_norm": 1.902808427810669, "learning_rate": 1e-06, "loss": 0.0274, "step": 2104 }, { "epoch": 0.35835886959482466, "grad_norm": 1.5837531089782715, "learning_rate": 1e-06, "loss": 0.0222, "step": 2105 }, { "epoch": 0.3585291113381001, "grad_norm": 1.3720051050186157, "learning_rate": 1e-06, "loss": 0.0207, "step": 2106 }, { "epoch": 0.35869935308137557, "grad_norm": 1.7276686429977417, "learning_rate": 1e-06, "loss": 0.0251, "step": 2107 }, { "epoch": 0.35886959482465103, "grad_norm": 1.7324738502502441, "learning_rate": 1e-06, "loss": 0.0256, "step": 2108 }, { "epoch": 0.35903983656792643, "grad_norm": 1.5119861364364624, "learning_rate": 1e-06, "loss": 0.0177, "step": 2109 }, { "epoch": 0.3592100783112019, "grad_norm": 1.672288417816162, "learning_rate": 1e-06, "loss": 0.0249, "step": 2110 }, { "epoch": 0.35938032005447734, "grad_norm": 3.63991379737854, "learning_rate": 1e-06, "loss": 0.0522, "step": 2111 }, { "epoch": 0.3595505617977528, "grad_norm": 1.3657325506210327, "learning_rate": 1e-06, "loss": 0.0194, "step": 2112 }, { "epoch": 0.35972080354102826, "grad_norm": 1.196782112121582, "learning_rate": 1e-06, "loss": 0.0213, "step": 2113 }, { "epoch": 0.3598910452843037, "grad_norm": 1.248388648033142, "learning_rate": 1e-06, "loss": 0.0125, "step": 2114 }, { "epoch": 0.36006128702757917, "grad_norm": 1.267297387123108, "learning_rate": 1e-06, "loss": 0.0182, "step": 2115 }, { "epoch": 0.3602315287708546, "grad_norm": 1.4637548923492432, "learning_rate": 1e-06, "loss": 0.0212, "step": 2116 }, { "epoch": 0.3604017705141301, "grad_norm": 1.5229331254959106, "learning_rate": 1e-06, "loss": 0.0254, "step": 2117 }, { "epoch": 0.36057201225740554, "grad_norm": 1.511386752128601, "learning_rate": 1e-06, "loss": 0.0193, "step": 2118 }, { "epoch": 0.36074225400068094, "grad_norm": 1.6698894500732422, "learning_rate": 1e-06, "loss": 0.0272, "step": 2119 }, { "epoch": 0.3609124957439564, "grad_norm": 2.0563039779663086, "learning_rate": 1e-06, "loss": 0.0234, "step": 2120 }, { "epoch": 0.36108273748723185, "grad_norm": 2.0152013301849365, "learning_rate": 1e-06, "loss": 0.0169, "step": 2121 }, { "epoch": 0.3612529792305073, "grad_norm": 1.2313683032989502, "learning_rate": 1e-06, "loss": 0.0146, "step": 2122 }, { "epoch": 0.36142322097378277, "grad_norm": 1.654374361038208, "learning_rate": 1e-06, "loss": 0.0238, "step": 2123 }, { "epoch": 0.3615934627170582, "grad_norm": 1.3135615587234497, "learning_rate": 1e-06, "loss": 0.013, "step": 2124 }, { "epoch": 0.3617637044603337, "grad_norm": 1.285529613494873, "learning_rate": 1e-06, "loss": 0.0205, "step": 2125 }, { "epoch": 0.36193394620360914, "grad_norm": 1.392561912536621, "learning_rate": 1e-06, "loss": 0.0174, "step": 2126 }, { "epoch": 0.3621041879468846, "grad_norm": 1.8578132390975952, "learning_rate": 1e-06, "loss": 0.0177, "step": 2127 }, { "epoch": 0.36227442969016005, "grad_norm": 1.368056297302246, "learning_rate": 1e-06, "loss": 0.0215, "step": 2128 }, { "epoch": 0.3624446714334355, "grad_norm": 1.1833387613296509, "learning_rate": 1e-06, "loss": 0.0135, "step": 2129 }, { "epoch": 0.3626149131767109, "grad_norm": 1.4502012729644775, "learning_rate": 1e-06, "loss": 0.0139, "step": 2130 }, { "epoch": 0.36278515491998636, "grad_norm": 1.522831678390503, "learning_rate": 1e-06, "loss": 0.0261, "step": 2131 }, { "epoch": 0.3629553966632618, "grad_norm": 1.4958367347717285, "learning_rate": 1e-06, "loss": 0.0168, "step": 2132 }, { "epoch": 0.3631256384065373, "grad_norm": 1.1259353160858154, "learning_rate": 1e-06, "loss": 0.0168, "step": 2133 }, { "epoch": 0.36329588014981273, "grad_norm": 2.0761754512786865, "learning_rate": 1e-06, "loss": 0.0311, "step": 2134 }, { "epoch": 0.3634661218930882, "grad_norm": 1.4105058908462524, "learning_rate": 1e-06, "loss": 0.0211, "step": 2135 }, { "epoch": 0.36363636363636365, "grad_norm": 1.2559282779693604, "learning_rate": 1e-06, "loss": 0.0168, "step": 2136 }, { "epoch": 0.3638066053796391, "grad_norm": 1.5729042291641235, "learning_rate": 1e-06, "loss": 0.0292, "step": 2137 }, { "epoch": 0.36397684712291456, "grad_norm": 1.7037521600723267, "learning_rate": 1e-06, "loss": 0.0228, "step": 2138 }, { "epoch": 0.36414708886619, "grad_norm": 1.3164565563201904, "learning_rate": 1e-06, "loss": 0.0178, "step": 2139 }, { "epoch": 0.3643173306094654, "grad_norm": 1.6225502490997314, "learning_rate": 1e-06, "loss": 0.0195, "step": 2140 }, { "epoch": 0.3644875723527409, "grad_norm": 1.2100952863693237, "learning_rate": 1e-06, "loss": 0.0165, "step": 2141 }, { "epoch": 0.36465781409601633, "grad_norm": 1.2184245586395264, "learning_rate": 1e-06, "loss": 0.0136, "step": 2142 }, { "epoch": 0.3648280558392918, "grad_norm": 1.6735187768936157, "learning_rate": 1e-06, "loss": 0.0227, "step": 2143 }, { "epoch": 0.36499829758256724, "grad_norm": 1.3809057474136353, "learning_rate": 1e-06, "loss": 0.0211, "step": 2144 }, { "epoch": 0.3651685393258427, "grad_norm": 1.2712934017181396, "learning_rate": 1e-06, "loss": 0.0139, "step": 2145 }, { "epoch": 0.36533878106911816, "grad_norm": 1.435024619102478, "learning_rate": 1e-06, "loss": 0.0146, "step": 2146 }, { "epoch": 0.3655090228123936, "grad_norm": 1.45426344871521, "learning_rate": 1e-06, "loss": 0.0159, "step": 2147 }, { "epoch": 0.36567926455566907, "grad_norm": 1.9181464910507202, "learning_rate": 1e-06, "loss": 0.0285, "step": 2148 }, { "epoch": 0.3658495062989445, "grad_norm": 1.4425302743911743, "learning_rate": 1e-06, "loss": 0.0196, "step": 2149 }, { "epoch": 0.36601974804221993, "grad_norm": 1.8058220148086548, "learning_rate": 1e-06, "loss": 0.0204, "step": 2150 }, { "epoch": 0.3661899897854954, "grad_norm": 1.3857768774032593, "learning_rate": 1e-06, "loss": 0.0152, "step": 2151 }, { "epoch": 0.36636023152877084, "grad_norm": 1.4955687522888184, "learning_rate": 1e-06, "loss": 0.0243, "step": 2152 }, { "epoch": 0.3665304732720463, "grad_norm": 1.6004747152328491, "learning_rate": 1e-06, "loss": 0.0177, "step": 2153 }, { "epoch": 0.36670071501532175, "grad_norm": 1.3243181705474854, "learning_rate": 1e-06, "loss": 0.0145, "step": 2154 }, { "epoch": 0.3668709567585972, "grad_norm": 1.8293629884719849, "learning_rate": 1e-06, "loss": 0.0236, "step": 2155 }, { "epoch": 0.36704119850187267, "grad_norm": 1.2558562755584717, "learning_rate": 1e-06, "loss": 0.0163, "step": 2156 }, { "epoch": 0.3672114402451481, "grad_norm": 1.2633802890777588, "learning_rate": 1e-06, "loss": 0.0144, "step": 2157 }, { "epoch": 0.3673816819884236, "grad_norm": 1.4996285438537598, "learning_rate": 1e-06, "loss": 0.0229, "step": 2158 }, { "epoch": 0.36755192373169904, "grad_norm": 1.1702207326889038, "learning_rate": 1e-06, "loss": 0.0154, "step": 2159 }, { "epoch": 0.36772216547497444, "grad_norm": 1.2386761903762817, "learning_rate": 1e-06, "loss": 0.0167, "step": 2160 }, { "epoch": 0.3678924072182499, "grad_norm": 1.7300621271133423, "learning_rate": 1e-06, "loss": 0.0228, "step": 2161 }, { "epoch": 0.36806264896152535, "grad_norm": 1.3940644264221191, "learning_rate": 1e-06, "loss": 0.022, "step": 2162 }, { "epoch": 0.3682328907048008, "grad_norm": 1.4714255332946777, "learning_rate": 1e-06, "loss": 0.019, "step": 2163 }, { "epoch": 0.36840313244807626, "grad_norm": 1.099491834640503, "learning_rate": 1e-06, "loss": 0.012, "step": 2164 }, { "epoch": 0.3685733741913517, "grad_norm": 1.4436107873916626, "learning_rate": 1e-06, "loss": 0.0184, "step": 2165 }, { "epoch": 0.3687436159346272, "grad_norm": 1.4421783685684204, "learning_rate": 1e-06, "loss": 0.0158, "step": 2166 }, { "epoch": 0.36891385767790263, "grad_norm": 1.221912145614624, "learning_rate": 1e-06, "loss": 0.0153, "step": 2167 }, { "epoch": 0.3690840994211781, "grad_norm": 1.3377691507339478, "learning_rate": 1e-06, "loss": 0.0196, "step": 2168 }, { "epoch": 0.36925434116445355, "grad_norm": 1.5522762537002563, "learning_rate": 1e-06, "loss": 0.0217, "step": 2169 }, { "epoch": 0.36942458290772895, "grad_norm": 1.452081561088562, "learning_rate": 1e-06, "loss": 0.0174, "step": 2170 }, { "epoch": 0.3695948246510044, "grad_norm": 1.293049931526184, "learning_rate": 1e-06, "loss": 0.0131, "step": 2171 }, { "epoch": 0.36976506639427986, "grad_norm": 1.621479868888855, "learning_rate": 1e-06, "loss": 0.0177, "step": 2172 }, { "epoch": 0.3699353081375553, "grad_norm": 1.6249638795852661, "learning_rate": 1e-06, "loss": 0.0203, "step": 2173 }, { "epoch": 0.3701055498808308, "grad_norm": 1.5299254655838013, "learning_rate": 1e-06, "loss": 0.0205, "step": 2174 }, { "epoch": 0.37027579162410623, "grad_norm": 1.5919313430786133, "learning_rate": 1e-06, "loss": 0.0191, "step": 2175 }, { "epoch": 0.3704460333673817, "grad_norm": 1.651296615600586, "learning_rate": 1e-06, "loss": 0.0235, "step": 2176 }, { "epoch": 0.37061627511065715, "grad_norm": 1.3494011163711548, "learning_rate": 1e-06, "loss": 0.0202, "step": 2177 }, { "epoch": 0.3707865168539326, "grad_norm": 1.4936208724975586, "learning_rate": 1e-06, "loss": 0.0174, "step": 2178 }, { "epoch": 0.37095675859720806, "grad_norm": 1.3437141180038452, "learning_rate": 1e-06, "loss": 0.0166, "step": 2179 }, { "epoch": 0.37112700034048346, "grad_norm": 1.137948751449585, "learning_rate": 1e-06, "loss": 0.0134, "step": 2180 }, { "epoch": 0.3712972420837589, "grad_norm": 1.409578800201416, "learning_rate": 1e-06, "loss": 0.0187, "step": 2181 }, { "epoch": 0.3714674838270344, "grad_norm": 1.4696379899978638, "learning_rate": 1e-06, "loss": 0.0212, "step": 2182 }, { "epoch": 0.37163772557030983, "grad_norm": 1.8238104581832886, "learning_rate": 1e-06, "loss": 0.0156, "step": 2183 }, { "epoch": 0.3718079673135853, "grad_norm": 1.643831729888916, "learning_rate": 1e-06, "loss": 0.0176, "step": 2184 }, { "epoch": 0.37197820905686074, "grad_norm": 1.536838173866272, "learning_rate": 1e-06, "loss": 0.017, "step": 2185 }, { "epoch": 0.3721484508001362, "grad_norm": 1.572147011756897, "learning_rate": 1e-06, "loss": 0.0185, "step": 2186 }, { "epoch": 0.37231869254341166, "grad_norm": 1.0589184761047363, "learning_rate": 1e-06, "loss": 0.0139, "step": 2187 }, { "epoch": 0.3724889342866871, "grad_norm": 2.404597043991089, "learning_rate": 1e-06, "loss": 0.0245, "step": 2188 }, { "epoch": 0.37265917602996257, "grad_norm": 1.3876649141311646, "learning_rate": 1e-06, "loss": 0.0139, "step": 2189 }, { "epoch": 0.372829417773238, "grad_norm": 1.3182275295257568, "learning_rate": 1e-06, "loss": 0.0153, "step": 2190 }, { "epoch": 0.3729996595165134, "grad_norm": 1.2899410724639893, "learning_rate": 1e-06, "loss": 0.0233, "step": 2191 }, { "epoch": 0.3731699012597889, "grad_norm": 1.5908676385879517, "learning_rate": 1e-06, "loss": 0.0156, "step": 2192 }, { "epoch": 0.37334014300306434, "grad_norm": 1.2450308799743652, "learning_rate": 1e-06, "loss": 0.018, "step": 2193 }, { "epoch": 0.3735103847463398, "grad_norm": 1.247621774673462, "learning_rate": 1e-06, "loss": 0.0134, "step": 2194 }, { "epoch": 0.37368062648961525, "grad_norm": 1.1101661920547485, "learning_rate": 1e-06, "loss": 0.013, "step": 2195 }, { "epoch": 0.3738508682328907, "grad_norm": 1.9619181156158447, "learning_rate": 1e-06, "loss": 0.0247, "step": 2196 }, { "epoch": 0.37402110997616617, "grad_norm": 1.3228130340576172, "learning_rate": 1e-06, "loss": 0.0272, "step": 2197 }, { "epoch": 0.3741913517194416, "grad_norm": 1.2315998077392578, "learning_rate": 1e-06, "loss": 0.0153, "step": 2198 }, { "epoch": 0.3743615934627171, "grad_norm": 1.2987167835235596, "learning_rate": 1e-06, "loss": 0.0151, "step": 2199 }, { "epoch": 0.37453183520599254, "grad_norm": 1.2436332702636719, "learning_rate": 1e-06, "loss": 0.0175, "step": 2200 }, { "epoch": 0.37470207694926794, "grad_norm": 1.6267950534820557, "learning_rate": 1e-06, "loss": 0.0196, "step": 2201 }, { "epoch": 0.3748723186925434, "grad_norm": 1.679490327835083, "learning_rate": 1e-06, "loss": 0.0191, "step": 2202 }, { "epoch": 0.37504256043581885, "grad_norm": 1.6547735929489136, "learning_rate": 1e-06, "loss": 0.0159, "step": 2203 }, { "epoch": 0.3752128021790943, "grad_norm": 1.4193183183670044, "learning_rate": 1e-06, "loss": 0.0184, "step": 2204 }, { "epoch": 0.37538304392236976, "grad_norm": 1.39267098903656, "learning_rate": 1e-06, "loss": 0.025, "step": 2205 }, { "epoch": 0.3755532856656452, "grad_norm": 1.2222113609313965, "learning_rate": 1e-06, "loss": 0.0129, "step": 2206 }, { "epoch": 0.3757235274089207, "grad_norm": 1.31162428855896, "learning_rate": 1e-06, "loss": 0.0147, "step": 2207 }, { "epoch": 0.37589376915219613, "grad_norm": 1.1816409826278687, "learning_rate": 1e-06, "loss": 0.0152, "step": 2208 }, { "epoch": 0.3760640108954716, "grad_norm": 1.3292574882507324, "learning_rate": 1e-06, "loss": 0.0146, "step": 2209 }, { "epoch": 0.37623425263874705, "grad_norm": 1.1428711414337158, "learning_rate": 1e-06, "loss": 0.013, "step": 2210 }, { "epoch": 0.37640449438202245, "grad_norm": 1.3439229726791382, "learning_rate": 1e-06, "loss": 0.0159, "step": 2211 }, { "epoch": 0.3765747361252979, "grad_norm": 1.5189659595489502, "learning_rate": 1e-06, "loss": 0.0231, "step": 2212 }, { "epoch": 0.37674497786857336, "grad_norm": 1.3098925352096558, "learning_rate": 1e-06, "loss": 0.015, "step": 2213 }, { "epoch": 0.3769152196118488, "grad_norm": 1.3993158340454102, "learning_rate": 1e-06, "loss": 0.0225, "step": 2214 }, { "epoch": 0.3770854613551243, "grad_norm": 1.4516868591308594, "learning_rate": 1e-06, "loss": 0.0204, "step": 2215 }, { "epoch": 0.37725570309839973, "grad_norm": 1.124986171722412, "learning_rate": 1e-06, "loss": 0.0119, "step": 2216 }, { "epoch": 0.3774259448416752, "grad_norm": 1.4332947731018066, "learning_rate": 1e-06, "loss": 0.023, "step": 2217 }, { "epoch": 0.37759618658495064, "grad_norm": 1.2420907020568848, "learning_rate": 1e-06, "loss": 0.015, "step": 2218 }, { "epoch": 0.3777664283282261, "grad_norm": 1.521194577217102, "learning_rate": 1e-06, "loss": 0.0173, "step": 2219 }, { "epoch": 0.37793667007150156, "grad_norm": 1.1087653636932373, "learning_rate": 1e-06, "loss": 0.0148, "step": 2220 }, { "epoch": 0.37810691181477696, "grad_norm": 1.2441989183425903, "learning_rate": 1e-06, "loss": 0.0139, "step": 2221 }, { "epoch": 0.3782771535580524, "grad_norm": 1.500753402709961, "learning_rate": 1e-06, "loss": 0.0223, "step": 2222 }, { "epoch": 0.37844739530132787, "grad_norm": 1.2610942125320435, "learning_rate": 1e-06, "loss": 0.0132, "step": 2223 }, { "epoch": 0.37861763704460333, "grad_norm": 1.7212361097335815, "learning_rate": 1e-06, "loss": 0.0143, "step": 2224 }, { "epoch": 0.3787878787878788, "grad_norm": 5.513698101043701, "learning_rate": 1e-06, "loss": 0.0828, "step": 2225 }, { "epoch": 0.37895812053115424, "grad_norm": 1.726049542427063, "learning_rate": 1e-06, "loss": 0.018, "step": 2226 }, { "epoch": 0.3791283622744297, "grad_norm": 1.936808466911316, "learning_rate": 1e-06, "loss": 0.0195, "step": 2227 }, { "epoch": 0.37929860401770515, "grad_norm": 1.879072666168213, "learning_rate": 1e-06, "loss": 0.0226, "step": 2228 }, { "epoch": 0.3794688457609806, "grad_norm": 1.6716440916061401, "learning_rate": 1e-06, "loss": 0.0294, "step": 2229 }, { "epoch": 0.37963908750425607, "grad_norm": 1.3907865285873413, "learning_rate": 1e-06, "loss": 0.0205, "step": 2230 }, { "epoch": 0.37980932924753147, "grad_norm": 1.4366625547409058, "learning_rate": 1e-06, "loss": 0.022, "step": 2231 }, { "epoch": 0.3799795709908069, "grad_norm": 1.3679766654968262, "learning_rate": 1e-06, "loss": 0.0135, "step": 2232 }, { "epoch": 0.3801498127340824, "grad_norm": 1.318230390548706, "learning_rate": 1e-06, "loss": 0.0253, "step": 2233 }, { "epoch": 0.38032005447735784, "grad_norm": 1.0809030532836914, "learning_rate": 1e-06, "loss": 0.0159, "step": 2234 }, { "epoch": 0.3804902962206333, "grad_norm": 1.47706937789917, "learning_rate": 1e-06, "loss": 0.0191, "step": 2235 }, { "epoch": 0.38066053796390875, "grad_norm": 1.1768239736557007, "learning_rate": 1e-06, "loss": 0.0145, "step": 2236 }, { "epoch": 0.3808307797071842, "grad_norm": 1.2666648626327515, "learning_rate": 1e-06, "loss": 0.0175, "step": 2237 }, { "epoch": 0.38100102145045966, "grad_norm": 1.3947467803955078, "learning_rate": 1e-06, "loss": 0.0177, "step": 2238 }, { "epoch": 0.3811712631937351, "grad_norm": 1.308648943901062, "learning_rate": 1e-06, "loss": 0.0174, "step": 2239 }, { "epoch": 0.3813415049370106, "grad_norm": 1.5134379863739014, "learning_rate": 1e-06, "loss": 0.0133, "step": 2240 }, { "epoch": 0.381511746680286, "grad_norm": 1.6946591138839722, "learning_rate": 1e-06, "loss": 0.0219, "step": 2241 }, { "epoch": 0.38168198842356144, "grad_norm": 1.2524915933609009, "learning_rate": 1e-06, "loss": 0.0117, "step": 2242 }, { "epoch": 0.3818522301668369, "grad_norm": 1.4766790866851807, "learning_rate": 1e-06, "loss": 0.0228, "step": 2243 }, { "epoch": 0.38202247191011235, "grad_norm": 1.1624605655670166, "learning_rate": 1e-06, "loss": 0.0111, "step": 2244 }, { "epoch": 0.3821927136533878, "grad_norm": 1.6786658763885498, "learning_rate": 1e-06, "loss": 0.019, "step": 2245 }, { "epoch": 0.38236295539666326, "grad_norm": 1.1682196855545044, "learning_rate": 1e-06, "loss": 0.0136, "step": 2246 }, { "epoch": 0.3825331971399387, "grad_norm": 1.470716953277588, "learning_rate": 1e-06, "loss": 0.016, "step": 2247 }, { "epoch": 0.3827034388832142, "grad_norm": 1.309889793395996, "learning_rate": 1e-06, "loss": 0.0106, "step": 2248 }, { "epoch": 0.38287368062648963, "grad_norm": 1.0708969831466675, "learning_rate": 1e-06, "loss": 0.0138, "step": 2249 }, { "epoch": 0.3830439223697651, "grad_norm": 1.1626148223876953, "learning_rate": 1e-06, "loss": 0.0162, "step": 2250 }, { "epoch": 0.38321416411304055, "grad_norm": 1.4477424621582031, "learning_rate": 1e-06, "loss": 0.0168, "step": 2251 }, { "epoch": 0.38338440585631595, "grad_norm": 1.3030651807785034, "learning_rate": 1e-06, "loss": 0.0153, "step": 2252 }, { "epoch": 0.3835546475995914, "grad_norm": 1.7228481769561768, "learning_rate": 1e-06, "loss": 0.0206, "step": 2253 }, { "epoch": 0.38372488934286686, "grad_norm": 1.1536338329315186, "learning_rate": 1e-06, "loss": 0.0184, "step": 2254 }, { "epoch": 0.3838951310861423, "grad_norm": 1.347286581993103, "learning_rate": 1e-06, "loss": 0.0106, "step": 2255 }, { "epoch": 0.3840653728294178, "grad_norm": 1.1855061054229736, "learning_rate": 1e-06, "loss": 0.0186, "step": 2256 }, { "epoch": 0.38423561457269323, "grad_norm": 1.283276915550232, "learning_rate": 1e-06, "loss": 0.0133, "step": 2257 }, { "epoch": 0.3844058563159687, "grad_norm": 1.4037493467330933, "learning_rate": 1e-06, "loss": 0.0184, "step": 2258 }, { "epoch": 0.38457609805924414, "grad_norm": 1.4328378438949585, "learning_rate": 1e-06, "loss": 0.022, "step": 2259 }, { "epoch": 0.3847463398025196, "grad_norm": 1.7087554931640625, "learning_rate": 1e-06, "loss": 0.02, "step": 2260 }, { "epoch": 0.38491658154579506, "grad_norm": 1.0174990892410278, "learning_rate": 1e-06, "loss": 0.012, "step": 2261 }, { "epoch": 0.38508682328907046, "grad_norm": 1.4762239456176758, "learning_rate": 1e-06, "loss": 0.0168, "step": 2262 }, { "epoch": 0.3852570650323459, "grad_norm": 1.040967345237732, "learning_rate": 1e-06, "loss": 0.0114, "step": 2263 }, { "epoch": 0.38542730677562137, "grad_norm": 1.2842293977737427, "learning_rate": 1e-06, "loss": 0.0144, "step": 2264 }, { "epoch": 0.3855975485188968, "grad_norm": 1.1241962909698486, "learning_rate": 1e-06, "loss": 0.0127, "step": 2265 }, { "epoch": 0.3857677902621723, "grad_norm": 1.4264283180236816, "learning_rate": 1e-06, "loss": 0.0184, "step": 2266 }, { "epoch": 0.38593803200544774, "grad_norm": 1.4640333652496338, "learning_rate": 1e-06, "loss": 0.0226, "step": 2267 }, { "epoch": 0.3861082737487232, "grad_norm": 1.8459786176681519, "learning_rate": 1e-06, "loss": 0.0183, "step": 2268 }, { "epoch": 0.38627851549199865, "grad_norm": 1.619437575340271, "learning_rate": 1e-06, "loss": 0.0322, "step": 2269 }, { "epoch": 0.3864487572352741, "grad_norm": 1.2061455249786377, "learning_rate": 1e-06, "loss": 0.0152, "step": 2270 }, { "epoch": 0.38661899897854957, "grad_norm": 2.0068230628967285, "learning_rate": 1e-06, "loss": 0.0161, "step": 2271 }, { "epoch": 0.38678924072182497, "grad_norm": 1.225205659866333, "learning_rate": 1e-06, "loss": 0.0123, "step": 2272 }, { "epoch": 0.3869594824651004, "grad_norm": 1.4834448099136353, "learning_rate": 1e-06, "loss": 0.0135, "step": 2273 }, { "epoch": 0.3871297242083759, "grad_norm": 1.3730425834655762, "learning_rate": 1e-06, "loss": 0.0127, "step": 2274 }, { "epoch": 0.38729996595165134, "grad_norm": 1.3718072175979614, "learning_rate": 1e-06, "loss": 0.0144, "step": 2275 }, { "epoch": 0.3874702076949268, "grad_norm": 1.5890707969665527, "learning_rate": 1e-06, "loss": 0.018, "step": 2276 }, { "epoch": 0.38764044943820225, "grad_norm": 1.437637209892273, "learning_rate": 1e-06, "loss": 0.0266, "step": 2277 }, { "epoch": 0.3878106911814777, "grad_norm": 1.2185838222503662, "learning_rate": 1e-06, "loss": 0.0118, "step": 2278 }, { "epoch": 0.38798093292475316, "grad_norm": 1.7744706869125366, "learning_rate": 1e-06, "loss": 0.0246, "step": 2279 }, { "epoch": 0.3881511746680286, "grad_norm": 1.0997908115386963, "learning_rate": 1e-06, "loss": 0.0128, "step": 2280 }, { "epoch": 0.3883214164113041, "grad_norm": 1.7149277925491333, "learning_rate": 1e-06, "loss": 0.0189, "step": 2281 }, { "epoch": 0.3884916581545795, "grad_norm": 4.9614338874816895, "learning_rate": 1e-06, "loss": 0.0606, "step": 2282 }, { "epoch": 0.38866189989785493, "grad_norm": 1.2618991136550903, "learning_rate": 1e-06, "loss": 0.0162, "step": 2283 }, { "epoch": 0.3888321416411304, "grad_norm": 1.7077900171279907, "learning_rate": 1e-06, "loss": 0.0212, "step": 2284 }, { "epoch": 0.38900238338440585, "grad_norm": 1.330156683921814, "learning_rate": 1e-06, "loss": 0.0153, "step": 2285 }, { "epoch": 0.3891726251276813, "grad_norm": 1.5688471794128418, "learning_rate": 1e-06, "loss": 0.0196, "step": 2286 }, { "epoch": 0.38934286687095676, "grad_norm": 1.4680328369140625, "learning_rate": 1e-06, "loss": 0.0162, "step": 2287 }, { "epoch": 0.3895131086142322, "grad_norm": 1.4984068870544434, "learning_rate": 1e-06, "loss": 0.0226, "step": 2288 }, { "epoch": 0.3896833503575077, "grad_norm": 1.8151462078094482, "learning_rate": 1e-06, "loss": 0.0177, "step": 2289 }, { "epoch": 0.38985359210078313, "grad_norm": 1.1406170129776, "learning_rate": 1e-06, "loss": 0.0151, "step": 2290 }, { "epoch": 0.3900238338440586, "grad_norm": 1.425500512123108, "learning_rate": 1e-06, "loss": 0.022, "step": 2291 }, { "epoch": 0.390194075587334, "grad_norm": 1.5256712436676025, "learning_rate": 1e-06, "loss": 0.0152, "step": 2292 }, { "epoch": 0.39036431733060944, "grad_norm": 1.2895482778549194, "learning_rate": 1e-06, "loss": 0.0116, "step": 2293 }, { "epoch": 0.3905345590738849, "grad_norm": 1.3082205057144165, "learning_rate": 1e-06, "loss": 0.0127, "step": 2294 }, { "epoch": 0.39070480081716036, "grad_norm": 1.8497793674468994, "learning_rate": 1e-06, "loss": 0.0213, "step": 2295 }, { "epoch": 0.3908750425604358, "grad_norm": 1.3272291421890259, "learning_rate": 1e-06, "loss": 0.0154, "step": 2296 }, { "epoch": 0.39104528430371127, "grad_norm": 1.159996747970581, "learning_rate": 1e-06, "loss": 0.0127, "step": 2297 }, { "epoch": 0.39121552604698673, "grad_norm": 1.58273184299469, "learning_rate": 1e-06, "loss": 0.0124, "step": 2298 }, { "epoch": 0.3913857677902622, "grad_norm": 1.2419120073318481, "learning_rate": 1e-06, "loss": 0.0163, "step": 2299 }, { "epoch": 0.39155600953353764, "grad_norm": 1.4475661516189575, "learning_rate": 1e-06, "loss": 0.0133, "step": 2300 }, { "epoch": 0.3917262512768131, "grad_norm": 1.270992636680603, "learning_rate": 1e-06, "loss": 0.0209, "step": 2301 }, { "epoch": 0.3918964930200885, "grad_norm": 1.5242903232574463, "learning_rate": 1e-06, "loss": 0.0181, "step": 2302 }, { "epoch": 0.39206673476336396, "grad_norm": 1.34809410572052, "learning_rate": 1e-06, "loss": 0.0148, "step": 2303 }, { "epoch": 0.3922369765066394, "grad_norm": 1.6840362548828125, "learning_rate": 1e-06, "loss": 0.019, "step": 2304 }, { "epoch": 0.39240721824991487, "grad_norm": 1.350677728652954, "learning_rate": 1e-06, "loss": 0.0169, "step": 2305 }, { "epoch": 0.3925774599931903, "grad_norm": 1.4453243017196655, "learning_rate": 1e-06, "loss": 0.0215, "step": 2306 }, { "epoch": 0.3927477017364658, "grad_norm": 1.3742339611053467, "learning_rate": 1e-06, "loss": 0.0211, "step": 2307 }, { "epoch": 0.39291794347974124, "grad_norm": 1.471127986907959, "learning_rate": 1e-06, "loss": 0.0149, "step": 2308 }, { "epoch": 0.3930881852230167, "grad_norm": 1.0254688262939453, "learning_rate": 1e-06, "loss": 0.0131, "step": 2309 }, { "epoch": 0.39325842696629215, "grad_norm": 1.232068419456482, "learning_rate": 1e-06, "loss": 0.0158, "step": 2310 }, { "epoch": 0.3934286687095676, "grad_norm": 1.4690749645233154, "learning_rate": 1e-06, "loss": 0.02, "step": 2311 }, { "epoch": 0.393598910452843, "grad_norm": 1.1098586320877075, "learning_rate": 1e-06, "loss": 0.0141, "step": 2312 }, { "epoch": 0.39376915219611847, "grad_norm": 1.5079904794692993, "learning_rate": 1e-06, "loss": 0.0203, "step": 2313 }, { "epoch": 0.3939393939393939, "grad_norm": 0.9211795926094055, "learning_rate": 1e-06, "loss": 0.0108, "step": 2314 }, { "epoch": 0.3941096356826694, "grad_norm": 1.6116832494735718, "learning_rate": 1e-06, "loss": 0.0187, "step": 2315 }, { "epoch": 0.39427987742594484, "grad_norm": 1.5355298519134521, "learning_rate": 1e-06, "loss": 0.0154, "step": 2316 }, { "epoch": 0.3944501191692203, "grad_norm": 1.1545178890228271, "learning_rate": 1e-06, "loss": 0.0164, "step": 2317 }, { "epoch": 0.39462036091249575, "grad_norm": 1.8506436347961426, "learning_rate": 1e-06, "loss": 0.0176, "step": 2318 }, { "epoch": 0.3947906026557712, "grad_norm": 2.0721189975738525, "learning_rate": 1e-06, "loss": 0.0176, "step": 2319 }, { "epoch": 0.39496084439904666, "grad_norm": 1.3172030448913574, "learning_rate": 1e-06, "loss": 0.0173, "step": 2320 }, { "epoch": 0.3951310861423221, "grad_norm": 1.3635270595550537, "learning_rate": 1e-06, "loss": 0.0142, "step": 2321 }, { "epoch": 0.3953013278855976, "grad_norm": 1.1138346195220947, "learning_rate": 1e-06, "loss": 0.0189, "step": 2322 }, { "epoch": 0.395471569628873, "grad_norm": 1.483282446861267, "learning_rate": 1e-06, "loss": 0.0185, "step": 2323 }, { "epoch": 0.39564181137214843, "grad_norm": 1.542601466178894, "learning_rate": 1e-06, "loss": 0.023, "step": 2324 }, { "epoch": 0.3958120531154239, "grad_norm": 1.5426020622253418, "learning_rate": 1e-06, "loss": 0.0302, "step": 2325 }, { "epoch": 0.39598229485869935, "grad_norm": 1.148687720298767, "learning_rate": 1e-06, "loss": 0.0096, "step": 2326 }, { "epoch": 0.3961525366019748, "grad_norm": 1.3703385591506958, "learning_rate": 1e-06, "loss": 0.0179, "step": 2327 }, { "epoch": 0.39632277834525026, "grad_norm": 1.4654871225357056, "learning_rate": 1e-06, "loss": 0.0146, "step": 2328 }, { "epoch": 0.3964930200885257, "grad_norm": 2.1327321529388428, "learning_rate": 1e-06, "loss": 0.028, "step": 2329 }, { "epoch": 0.3966632618318012, "grad_norm": 1.7279863357543945, "learning_rate": 1e-06, "loss": 0.0238, "step": 2330 }, { "epoch": 0.39683350357507663, "grad_norm": 1.1947439908981323, "learning_rate": 1e-06, "loss": 0.0125, "step": 2331 }, { "epoch": 0.3970037453183521, "grad_norm": 1.0099948644638062, "learning_rate": 1e-06, "loss": 0.0108, "step": 2332 }, { "epoch": 0.3971739870616275, "grad_norm": 1.3921846151351929, "learning_rate": 1e-06, "loss": 0.0162, "step": 2333 }, { "epoch": 0.39734422880490294, "grad_norm": 1.3162715435028076, "learning_rate": 1e-06, "loss": 0.0158, "step": 2334 }, { "epoch": 0.3975144705481784, "grad_norm": 1.7758504152297974, "learning_rate": 1e-06, "loss": 0.018, "step": 2335 }, { "epoch": 0.39768471229145386, "grad_norm": 1.2216438055038452, "learning_rate": 1e-06, "loss": 0.0212, "step": 2336 }, { "epoch": 0.3978549540347293, "grad_norm": 1.6880277395248413, "learning_rate": 1e-06, "loss": 0.0149, "step": 2337 }, { "epoch": 0.39802519577800477, "grad_norm": 1.6719337701797485, "learning_rate": 1e-06, "loss": 0.0175, "step": 2338 }, { "epoch": 0.3981954375212802, "grad_norm": 1.5387353897094727, "learning_rate": 1e-06, "loss": 0.0261, "step": 2339 }, { "epoch": 0.3983656792645557, "grad_norm": 1.6653772592544556, "learning_rate": 1e-06, "loss": 0.0161, "step": 2340 }, { "epoch": 0.39853592100783114, "grad_norm": 1.325972318649292, "learning_rate": 1e-06, "loss": 0.0202, "step": 2341 }, { "epoch": 0.3987061627511066, "grad_norm": 1.40695321559906, "learning_rate": 1e-06, "loss": 0.0182, "step": 2342 }, { "epoch": 0.398876404494382, "grad_norm": 1.6472898721694946, "learning_rate": 1e-06, "loss": 0.0167, "step": 2343 }, { "epoch": 0.39904664623765745, "grad_norm": 1.2183064222335815, "learning_rate": 1e-06, "loss": 0.0104, "step": 2344 }, { "epoch": 0.3992168879809329, "grad_norm": 1.193183183670044, "learning_rate": 1e-06, "loss": 0.0102, "step": 2345 }, { "epoch": 0.39938712972420837, "grad_norm": 1.4041932821273804, "learning_rate": 1e-06, "loss": 0.0156, "step": 2346 }, { "epoch": 0.3995573714674838, "grad_norm": 1.3237617015838623, "learning_rate": 1e-06, "loss": 0.0157, "step": 2347 }, { "epoch": 0.3997276132107593, "grad_norm": 1.5075606107711792, "learning_rate": 1e-06, "loss": 0.0167, "step": 2348 }, { "epoch": 0.39989785495403474, "grad_norm": 1.4453775882720947, "learning_rate": 1e-06, "loss": 0.0241, "step": 2349 }, { "epoch": 0.4000680966973102, "grad_norm": 1.4384607076644897, "learning_rate": 1e-06, "loss": 0.0171, "step": 2350 }, { "epoch": 0.40023833844058565, "grad_norm": 1.3807544708251953, "learning_rate": 1e-06, "loss": 0.0131, "step": 2351 }, { "epoch": 0.4004085801838611, "grad_norm": 1.7280919551849365, "learning_rate": 1e-06, "loss": 0.0254, "step": 2352 }, { "epoch": 0.4005788219271365, "grad_norm": 1.2561084032058716, "learning_rate": 1e-06, "loss": 0.0089, "step": 2353 }, { "epoch": 0.40074906367041196, "grad_norm": 1.4211997985839844, "learning_rate": 1e-06, "loss": 0.0174, "step": 2354 }, { "epoch": 0.4009193054136874, "grad_norm": 1.79752516746521, "learning_rate": 1e-06, "loss": 0.0177, "step": 2355 }, { "epoch": 0.4010895471569629, "grad_norm": 1.746137022972107, "learning_rate": 1e-06, "loss": 0.0208, "step": 2356 }, { "epoch": 0.40125978890023833, "grad_norm": 3.5860726833343506, "learning_rate": 1e-06, "loss": 0.0709, "step": 2357 }, { "epoch": 0.4014300306435138, "grad_norm": 1.5278226137161255, "learning_rate": 1e-06, "loss": 0.0161, "step": 2358 }, { "epoch": 0.40160027238678925, "grad_norm": 1.536190390586853, "learning_rate": 1e-06, "loss": 0.0217, "step": 2359 }, { "epoch": 0.4017705141300647, "grad_norm": 1.5676721334457397, "learning_rate": 1e-06, "loss": 0.0166, "step": 2360 }, { "epoch": 0.40194075587334016, "grad_norm": 1.004481554031372, "learning_rate": 1e-06, "loss": 0.0115, "step": 2361 }, { "epoch": 0.4021109976166156, "grad_norm": 1.576690673828125, "learning_rate": 1e-06, "loss": 0.0267, "step": 2362 }, { "epoch": 0.402281239359891, "grad_norm": 1.8953304290771484, "learning_rate": 1e-06, "loss": 0.0205, "step": 2363 }, { "epoch": 0.4024514811031665, "grad_norm": 1.3143856525421143, "learning_rate": 1e-06, "loss": 0.013, "step": 2364 }, { "epoch": 0.40262172284644193, "grad_norm": 1.4957778453826904, "learning_rate": 1e-06, "loss": 0.0285, "step": 2365 }, { "epoch": 0.4027919645897174, "grad_norm": 1.0621047019958496, "learning_rate": 1e-06, "loss": 0.0117, "step": 2366 }, { "epoch": 0.40296220633299284, "grad_norm": 1.4584861993789673, "learning_rate": 1e-06, "loss": 0.0127, "step": 2367 }, { "epoch": 0.4031324480762683, "grad_norm": 1.5141526460647583, "learning_rate": 1e-06, "loss": 0.0149, "step": 2368 }, { "epoch": 0.40330268981954376, "grad_norm": 1.0880855321884155, "learning_rate": 1e-06, "loss": 0.0116, "step": 2369 }, { "epoch": 0.4034729315628192, "grad_norm": 1.1734309196472168, "learning_rate": 1e-06, "loss": 0.0158, "step": 2370 }, { "epoch": 0.40364317330609467, "grad_norm": 1.3398406505584717, "learning_rate": 1e-06, "loss": 0.0172, "step": 2371 }, { "epoch": 0.40381341504937013, "grad_norm": 1.1609718799591064, "learning_rate": 1e-06, "loss": 0.0157, "step": 2372 }, { "epoch": 0.40398365679264553, "grad_norm": 4.927487373352051, "learning_rate": 1e-06, "loss": 0.0723, "step": 2373 }, { "epoch": 0.404153898535921, "grad_norm": 1.258873462677002, "learning_rate": 1e-06, "loss": 0.018, "step": 2374 }, { "epoch": 0.40432414027919644, "grad_norm": 1.1295454502105713, "learning_rate": 1e-06, "loss": 0.0153, "step": 2375 }, { "epoch": 0.4044943820224719, "grad_norm": 1.213400959968567, "learning_rate": 1e-06, "loss": 0.0137, "step": 2376 }, { "epoch": 0.40466462376574736, "grad_norm": 1.0463217496871948, "learning_rate": 1e-06, "loss": 0.013, "step": 2377 }, { "epoch": 0.4048348655090228, "grad_norm": 1.1880186796188354, "learning_rate": 1e-06, "loss": 0.0145, "step": 2378 }, { "epoch": 0.40500510725229827, "grad_norm": 1.3919751644134521, "learning_rate": 1e-06, "loss": 0.015, "step": 2379 }, { "epoch": 0.4051753489955737, "grad_norm": 1.2092021703720093, "learning_rate": 1e-06, "loss": 0.0109, "step": 2380 }, { "epoch": 0.4053455907388492, "grad_norm": 1.5249980688095093, "learning_rate": 1e-06, "loss": 0.016, "step": 2381 }, { "epoch": 0.40551583248212464, "grad_norm": 1.3709051609039307, "learning_rate": 1e-06, "loss": 0.0167, "step": 2382 }, { "epoch": 0.4056860742254001, "grad_norm": 1.3142911195755005, "learning_rate": 1e-06, "loss": 0.0135, "step": 2383 }, { "epoch": 0.4058563159686755, "grad_norm": 1.423117995262146, "learning_rate": 1e-06, "loss": 0.0144, "step": 2384 }, { "epoch": 0.40602655771195095, "grad_norm": 1.7531969547271729, "learning_rate": 1e-06, "loss": 0.0164, "step": 2385 }, { "epoch": 0.4061967994552264, "grad_norm": 1.1912846565246582, "learning_rate": 1e-06, "loss": 0.0177, "step": 2386 }, { "epoch": 0.40636704119850187, "grad_norm": 1.3569473028182983, "learning_rate": 1e-06, "loss": 0.0152, "step": 2387 }, { "epoch": 0.4065372829417773, "grad_norm": 1.3196473121643066, "learning_rate": 1e-06, "loss": 0.0183, "step": 2388 }, { "epoch": 0.4067075246850528, "grad_norm": 1.8479810953140259, "learning_rate": 1e-06, "loss": 0.0224, "step": 2389 }, { "epoch": 0.40687776642832824, "grad_norm": 1.048545479774475, "learning_rate": 1e-06, "loss": 0.0083, "step": 2390 }, { "epoch": 0.4070480081716037, "grad_norm": 1.090034008026123, "learning_rate": 1e-06, "loss": 0.0124, "step": 2391 }, { "epoch": 0.40721824991487915, "grad_norm": 1.0449838638305664, "learning_rate": 1e-06, "loss": 0.0124, "step": 2392 }, { "epoch": 0.4073884916581546, "grad_norm": 1.3128901720046997, "learning_rate": 1e-06, "loss": 0.0118, "step": 2393 }, { "epoch": 0.40755873340143, "grad_norm": 1.3702256679534912, "learning_rate": 1e-06, "loss": 0.0171, "step": 2394 }, { "epoch": 0.40772897514470546, "grad_norm": 1.4796867370605469, "learning_rate": 1e-06, "loss": 0.0204, "step": 2395 }, { "epoch": 0.4078992168879809, "grad_norm": 1.1662784814834595, "learning_rate": 1e-06, "loss": 0.0115, "step": 2396 }, { "epoch": 0.4080694586312564, "grad_norm": 2.216245651245117, "learning_rate": 1e-06, "loss": 0.0212, "step": 2397 }, { "epoch": 0.40823970037453183, "grad_norm": 1.5245473384857178, "learning_rate": 1e-06, "loss": 0.0111, "step": 2398 }, { "epoch": 0.4084099421178073, "grad_norm": 1.6484607458114624, "learning_rate": 1e-06, "loss": 0.0132, "step": 2399 }, { "epoch": 0.40858018386108275, "grad_norm": 1.326431155204773, "learning_rate": 1e-06, "loss": 0.0111, "step": 2400 }, { "epoch": 0.4087504256043582, "grad_norm": 1.8550671339035034, "learning_rate": 1e-06, "loss": 0.0198, "step": 2401 }, { "epoch": 0.40892066734763366, "grad_norm": 1.1143696308135986, "learning_rate": 1e-06, "loss": 0.0141, "step": 2402 }, { "epoch": 0.4090909090909091, "grad_norm": 1.190245509147644, "learning_rate": 1e-06, "loss": 0.012, "step": 2403 }, { "epoch": 0.4092611508341845, "grad_norm": 1.6202400922775269, "learning_rate": 1e-06, "loss": 0.019, "step": 2404 }, { "epoch": 0.40943139257746, "grad_norm": 1.4784173965454102, "learning_rate": 1e-06, "loss": 0.0213, "step": 2405 }, { "epoch": 0.40960163432073543, "grad_norm": 1.2796027660369873, "learning_rate": 1e-06, "loss": 0.0161, "step": 2406 }, { "epoch": 0.4097718760640109, "grad_norm": 1.0544267892837524, "learning_rate": 1e-06, "loss": 0.0147, "step": 2407 }, { "epoch": 0.40994211780728634, "grad_norm": 1.6966776847839355, "learning_rate": 1e-06, "loss": 0.0223, "step": 2408 }, { "epoch": 0.4101123595505618, "grad_norm": 1.3701565265655518, "learning_rate": 1e-06, "loss": 0.0169, "step": 2409 }, { "epoch": 0.41028260129383726, "grad_norm": 1.4099042415618896, "learning_rate": 1e-06, "loss": 0.0157, "step": 2410 }, { "epoch": 0.4104528430371127, "grad_norm": 1.2321785688400269, "learning_rate": 1e-06, "loss": 0.0113, "step": 2411 }, { "epoch": 0.41062308478038817, "grad_norm": 1.3855117559432983, "learning_rate": 1e-06, "loss": 0.0216, "step": 2412 }, { "epoch": 0.4107933265236636, "grad_norm": 1.1128058433532715, "learning_rate": 1e-06, "loss": 0.0124, "step": 2413 }, { "epoch": 0.410963568266939, "grad_norm": 1.4113258123397827, "learning_rate": 1e-06, "loss": 0.0144, "step": 2414 }, { "epoch": 0.4111338100102145, "grad_norm": 1.4246609210968018, "learning_rate": 1e-06, "loss": 0.0156, "step": 2415 }, { "epoch": 0.41130405175348994, "grad_norm": 1.2959715127944946, "learning_rate": 1e-06, "loss": 0.017, "step": 2416 }, { "epoch": 0.4114742934967654, "grad_norm": 1.3845857381820679, "learning_rate": 1e-06, "loss": 0.0153, "step": 2417 }, { "epoch": 0.41164453524004085, "grad_norm": 1.1727241277694702, "learning_rate": 1e-06, "loss": 0.0127, "step": 2418 }, { "epoch": 0.4118147769833163, "grad_norm": 1.3593071699142456, "learning_rate": 1e-06, "loss": 0.0189, "step": 2419 }, { "epoch": 0.41198501872659177, "grad_norm": 1.650676965713501, "learning_rate": 1e-06, "loss": 0.0179, "step": 2420 }, { "epoch": 0.4121552604698672, "grad_norm": 1.345276117324829, "learning_rate": 1e-06, "loss": 0.015, "step": 2421 }, { "epoch": 0.4123255022131427, "grad_norm": 1.3404996395111084, "learning_rate": 1e-06, "loss": 0.0145, "step": 2422 }, { "epoch": 0.41249574395641814, "grad_norm": 1.4839191436767578, "learning_rate": 1e-06, "loss": 0.0176, "step": 2423 }, { "epoch": 0.41266598569969354, "grad_norm": 1.3546916246414185, "learning_rate": 1e-06, "loss": 0.0196, "step": 2424 }, { "epoch": 0.412836227442969, "grad_norm": 1.5656921863555908, "learning_rate": 1e-06, "loss": 0.0242, "step": 2425 }, { "epoch": 0.41300646918624445, "grad_norm": 1.348214030265808, "learning_rate": 1e-06, "loss": 0.0189, "step": 2426 }, { "epoch": 0.4131767109295199, "grad_norm": 1.2620658874511719, "learning_rate": 1e-06, "loss": 0.0154, "step": 2427 }, { "epoch": 0.41334695267279536, "grad_norm": 1.1423382759094238, "learning_rate": 1e-06, "loss": 0.0155, "step": 2428 }, { "epoch": 0.4135171944160708, "grad_norm": 1.179055094718933, "learning_rate": 1e-06, "loss": 0.0144, "step": 2429 }, { "epoch": 0.4136874361593463, "grad_norm": 1.2674864530563354, "learning_rate": 1e-06, "loss": 0.0133, "step": 2430 }, { "epoch": 0.41385767790262173, "grad_norm": 1.2309482097625732, "learning_rate": 1e-06, "loss": 0.0177, "step": 2431 }, { "epoch": 0.4140279196458972, "grad_norm": 1.4680554866790771, "learning_rate": 1e-06, "loss": 0.0156, "step": 2432 }, { "epoch": 0.41419816138917265, "grad_norm": 1.4172992706298828, "learning_rate": 1e-06, "loss": 0.021, "step": 2433 }, { "epoch": 0.41436840313244805, "grad_norm": 1.537314534187317, "learning_rate": 1e-06, "loss": 0.0196, "step": 2434 }, { "epoch": 0.4145386448757235, "grad_norm": 1.3401296138763428, "learning_rate": 1e-06, "loss": 0.0106, "step": 2435 }, { "epoch": 0.41470888661899896, "grad_norm": 1.5095027685165405, "learning_rate": 1e-06, "loss": 0.0266, "step": 2436 }, { "epoch": 0.4148791283622744, "grad_norm": 1.1366171836853027, "learning_rate": 1e-06, "loss": 0.0137, "step": 2437 }, { "epoch": 0.4150493701055499, "grad_norm": 1.1384146213531494, "learning_rate": 1e-06, "loss": 0.0137, "step": 2438 }, { "epoch": 0.41521961184882533, "grad_norm": 0.9028575420379639, "learning_rate": 1e-06, "loss": 0.0084, "step": 2439 }, { "epoch": 0.4153898535921008, "grad_norm": 1.199471116065979, "learning_rate": 1e-06, "loss": 0.0122, "step": 2440 }, { "epoch": 0.41556009533537625, "grad_norm": 1.3193436861038208, "learning_rate": 1e-06, "loss": 0.017, "step": 2441 }, { "epoch": 0.4157303370786517, "grad_norm": 1.3699177503585815, "learning_rate": 1e-06, "loss": 0.016, "step": 2442 }, { "epoch": 0.41590057882192716, "grad_norm": 1.3745206594467163, "learning_rate": 1e-06, "loss": 0.0154, "step": 2443 }, { "epoch": 0.4160708205652026, "grad_norm": 1.289341926574707, "learning_rate": 1e-06, "loss": 0.0152, "step": 2444 }, { "epoch": 0.416241062308478, "grad_norm": 1.4632568359375, "learning_rate": 1e-06, "loss": 0.0169, "step": 2445 }, { "epoch": 0.4164113040517535, "grad_norm": 1.4885531663894653, "learning_rate": 1e-06, "loss": 0.0147, "step": 2446 }, { "epoch": 0.41658154579502893, "grad_norm": 1.2330007553100586, "learning_rate": 1e-06, "loss": 0.0126, "step": 2447 }, { "epoch": 0.4167517875383044, "grad_norm": 2.102605104446411, "learning_rate": 1e-06, "loss": 0.0458, "step": 2448 }, { "epoch": 0.41692202928157984, "grad_norm": 1.2682759761810303, "learning_rate": 1e-06, "loss": 0.0087, "step": 2449 }, { "epoch": 0.4170922710248553, "grad_norm": 1.3345555067062378, "learning_rate": 1e-06, "loss": 0.013, "step": 2450 }, { "epoch": 0.41726251276813076, "grad_norm": 1.390181064605713, "learning_rate": 1e-06, "loss": 0.0203, "step": 2451 }, { "epoch": 0.4174327545114062, "grad_norm": 1.3475115299224854, "learning_rate": 1e-06, "loss": 0.0199, "step": 2452 }, { "epoch": 0.41760299625468167, "grad_norm": 1.5461567640304565, "learning_rate": 1e-06, "loss": 0.0179, "step": 2453 }, { "epoch": 0.4177732379979571, "grad_norm": 1.3287405967712402, "learning_rate": 1e-06, "loss": 0.0156, "step": 2454 }, { "epoch": 0.4179434797412325, "grad_norm": 1.3914709091186523, "learning_rate": 1e-06, "loss": 0.0141, "step": 2455 }, { "epoch": 0.418113721484508, "grad_norm": 1.5743072032928467, "learning_rate": 1e-06, "loss": 0.0166, "step": 2456 }, { "epoch": 0.41828396322778344, "grad_norm": 1.3226776123046875, "learning_rate": 1e-06, "loss": 0.0176, "step": 2457 }, { "epoch": 0.4184542049710589, "grad_norm": 1.3625043630599976, "learning_rate": 1e-06, "loss": 0.0209, "step": 2458 }, { "epoch": 0.41862444671433435, "grad_norm": 0.9081085920333862, "learning_rate": 1e-06, "loss": 0.0076, "step": 2459 }, { "epoch": 0.4187946884576098, "grad_norm": 1.608961820602417, "learning_rate": 1e-06, "loss": 0.0182, "step": 2460 }, { "epoch": 0.41896493020088527, "grad_norm": 1.2214875221252441, "learning_rate": 1e-06, "loss": 0.0147, "step": 2461 }, { "epoch": 0.4191351719441607, "grad_norm": 1.1907516717910767, "learning_rate": 1e-06, "loss": 0.0166, "step": 2462 }, { "epoch": 0.4193054136874362, "grad_norm": 1.0932464599609375, "learning_rate": 1e-06, "loss": 0.0102, "step": 2463 }, { "epoch": 0.41947565543071164, "grad_norm": 1.2685447931289673, "learning_rate": 1e-06, "loss": 0.0141, "step": 2464 }, { "epoch": 0.41964589717398704, "grad_norm": 1.4846066236495972, "learning_rate": 1e-06, "loss": 0.0146, "step": 2465 }, { "epoch": 0.4198161389172625, "grad_norm": 1.1014434099197388, "learning_rate": 1e-06, "loss": 0.0135, "step": 2466 }, { "epoch": 0.41998638066053795, "grad_norm": 1.5888346433639526, "learning_rate": 1e-06, "loss": 0.0195, "step": 2467 }, { "epoch": 0.4201566224038134, "grad_norm": 1.507070779800415, "learning_rate": 1e-06, "loss": 0.0171, "step": 2468 }, { "epoch": 0.42032686414708886, "grad_norm": 1.330121636390686, "learning_rate": 1e-06, "loss": 0.013, "step": 2469 }, { "epoch": 0.4204971058903643, "grad_norm": 1.4843699932098389, "learning_rate": 1e-06, "loss": 0.0232, "step": 2470 }, { "epoch": 0.4206673476336398, "grad_norm": 1.1940176486968994, "learning_rate": 1e-06, "loss": 0.0112, "step": 2471 }, { "epoch": 0.42083758937691523, "grad_norm": 1.3040800094604492, "learning_rate": 1e-06, "loss": 0.0164, "step": 2472 }, { "epoch": 0.4210078311201907, "grad_norm": 1.4341161251068115, "learning_rate": 1e-06, "loss": 0.0146, "step": 2473 }, { "epoch": 0.42117807286346615, "grad_norm": 1.1169599294662476, "learning_rate": 1e-06, "loss": 0.0105, "step": 2474 }, { "epoch": 0.42134831460674155, "grad_norm": 1.2243717908859253, "learning_rate": 1e-06, "loss": 0.0121, "step": 2475 }, { "epoch": 0.421518556350017, "grad_norm": 1.5474375486373901, "learning_rate": 1e-06, "loss": 0.0188, "step": 2476 }, { "epoch": 0.42168879809329246, "grad_norm": 1.487414002418518, "learning_rate": 1e-06, "loss": 0.0116, "step": 2477 }, { "epoch": 0.4218590398365679, "grad_norm": 1.5315251350402832, "learning_rate": 1e-06, "loss": 0.0153, "step": 2478 }, { "epoch": 0.4220292815798434, "grad_norm": 4.279947280883789, "learning_rate": 1e-06, "loss": 0.0537, "step": 2479 }, { "epoch": 0.42219952332311883, "grad_norm": 1.396422266960144, "learning_rate": 1e-06, "loss": 0.0219, "step": 2480 }, { "epoch": 0.4223697650663943, "grad_norm": 1.583313226699829, "learning_rate": 1e-06, "loss": 0.0171, "step": 2481 }, { "epoch": 0.42254000680966974, "grad_norm": 1.1320841312408447, "learning_rate": 1e-06, "loss": 0.013, "step": 2482 }, { "epoch": 0.4227102485529452, "grad_norm": 1.1860630512237549, "learning_rate": 1e-06, "loss": 0.015, "step": 2483 }, { "epoch": 0.42288049029622066, "grad_norm": 1.2758294343948364, "learning_rate": 1e-06, "loss": 0.0168, "step": 2484 }, { "epoch": 0.42305073203949606, "grad_norm": 1.084784984588623, "learning_rate": 1e-06, "loss": 0.0108, "step": 2485 }, { "epoch": 0.4232209737827715, "grad_norm": 1.0579909086227417, "learning_rate": 1e-06, "loss": 0.0103, "step": 2486 }, { "epoch": 0.42339121552604697, "grad_norm": 1.5031887292861938, "learning_rate": 1e-06, "loss": 0.0159, "step": 2487 }, { "epoch": 0.42356145726932243, "grad_norm": 1.2599958181381226, "learning_rate": 1e-06, "loss": 0.0139, "step": 2488 }, { "epoch": 0.4237316990125979, "grad_norm": 1.1589609384536743, "learning_rate": 1e-06, "loss": 0.01, "step": 2489 }, { "epoch": 0.42390194075587334, "grad_norm": 1.139289140701294, "learning_rate": 1e-06, "loss": 0.0113, "step": 2490 }, { "epoch": 0.4240721824991488, "grad_norm": 1.1821390390396118, "learning_rate": 1e-06, "loss": 0.0129, "step": 2491 }, { "epoch": 0.42424242424242425, "grad_norm": 1.2557470798492432, "learning_rate": 1e-06, "loss": 0.0114, "step": 2492 }, { "epoch": 0.4244126659856997, "grad_norm": 1.3615427017211914, "learning_rate": 1e-06, "loss": 0.0142, "step": 2493 }, { "epoch": 0.42458290772897517, "grad_norm": 1.4991490840911865, "learning_rate": 1e-06, "loss": 0.0142, "step": 2494 }, { "epoch": 0.42475314947225057, "grad_norm": 1.1081904172897339, "learning_rate": 1e-06, "loss": 0.0096, "step": 2495 }, { "epoch": 0.424923391215526, "grad_norm": 1.3824467658996582, "learning_rate": 1e-06, "loss": 0.0111, "step": 2496 }, { "epoch": 0.4250936329588015, "grad_norm": 1.4526976346969604, "learning_rate": 1e-06, "loss": 0.0229, "step": 2497 }, { "epoch": 0.42526387470207694, "grad_norm": 1.1925878524780273, "learning_rate": 1e-06, "loss": 0.014, "step": 2498 }, { "epoch": 0.4254341164453524, "grad_norm": 1.7539995908737183, "learning_rate": 1e-06, "loss": 0.0295, "step": 2499 }, { "epoch": 0.42560435818862785, "grad_norm": 1.2335952520370483, "learning_rate": 1e-06, "loss": 0.016, "step": 2500 }, { "epoch": 0.4257745999319033, "grad_norm": 1.1143063306808472, "learning_rate": 1e-06, "loss": 0.0108, "step": 2501 }, { "epoch": 0.42594484167517876, "grad_norm": 1.0467058420181274, "learning_rate": 1e-06, "loss": 0.0096, "step": 2502 }, { "epoch": 0.4261150834184542, "grad_norm": 1.118386149406433, "learning_rate": 1e-06, "loss": 0.0124, "step": 2503 }, { "epoch": 0.4262853251617297, "grad_norm": 1.4646353721618652, "learning_rate": 1e-06, "loss": 0.0148, "step": 2504 }, { "epoch": 0.42645556690500513, "grad_norm": 1.2250324487686157, "learning_rate": 1e-06, "loss": 0.0132, "step": 2505 }, { "epoch": 0.42662580864828054, "grad_norm": 1.757466435432434, "learning_rate": 1e-06, "loss": 0.0219, "step": 2506 }, { "epoch": 0.426796050391556, "grad_norm": 1.2650184631347656, "learning_rate": 1e-06, "loss": 0.0137, "step": 2507 }, { "epoch": 0.42696629213483145, "grad_norm": 1.2739180326461792, "learning_rate": 1e-06, "loss": 0.011, "step": 2508 }, { "epoch": 0.4271365338781069, "grad_norm": 1.3488551378250122, "learning_rate": 1e-06, "loss": 0.0144, "step": 2509 }, { "epoch": 0.42730677562138236, "grad_norm": 1.4845232963562012, "learning_rate": 1e-06, "loss": 0.0196, "step": 2510 }, { "epoch": 0.4274770173646578, "grad_norm": 1.5139720439910889, "learning_rate": 1e-06, "loss": 0.0178, "step": 2511 }, { "epoch": 0.4276472591079333, "grad_norm": 1.3799965381622314, "learning_rate": 1e-06, "loss": 0.0142, "step": 2512 }, { "epoch": 0.42781750085120873, "grad_norm": 1.0673723220825195, "learning_rate": 1e-06, "loss": 0.0093, "step": 2513 }, { "epoch": 0.4279877425944842, "grad_norm": 1.4284578561782837, "learning_rate": 1e-06, "loss": 0.0185, "step": 2514 }, { "epoch": 0.42815798433775965, "grad_norm": 0.9315198659896851, "learning_rate": 1e-06, "loss": 0.0117, "step": 2515 }, { "epoch": 0.42832822608103505, "grad_norm": 1.5946029424667358, "learning_rate": 1e-06, "loss": 0.0183, "step": 2516 }, { "epoch": 0.4284984678243105, "grad_norm": 1.6120253801345825, "learning_rate": 1e-06, "loss": 0.0203, "step": 2517 }, { "epoch": 0.42866870956758596, "grad_norm": 1.2495324611663818, "learning_rate": 1e-06, "loss": 0.0138, "step": 2518 }, { "epoch": 0.4288389513108614, "grad_norm": 1.4058576822280884, "learning_rate": 1e-06, "loss": 0.0137, "step": 2519 }, { "epoch": 0.4290091930541369, "grad_norm": 1.436219573020935, "learning_rate": 1e-06, "loss": 0.0151, "step": 2520 }, { "epoch": 0.42917943479741233, "grad_norm": 1.9257749319076538, "learning_rate": 1e-06, "loss": 0.0199, "step": 2521 }, { "epoch": 0.4293496765406878, "grad_norm": 1.195271611213684, "learning_rate": 1e-06, "loss": 0.0126, "step": 2522 }, { "epoch": 0.42951991828396324, "grad_norm": 2.288235664367676, "learning_rate": 1e-06, "loss": 0.0229, "step": 2523 }, { "epoch": 0.4296901600272387, "grad_norm": 1.20006263256073, "learning_rate": 1e-06, "loss": 0.011, "step": 2524 }, { "epoch": 0.42986040177051416, "grad_norm": 1.8085074424743652, "learning_rate": 1e-06, "loss": 0.0246, "step": 2525 }, { "epoch": 0.43003064351378956, "grad_norm": 1.3439559936523438, "learning_rate": 1e-06, "loss": 0.0139, "step": 2526 }, { "epoch": 0.430200885257065, "grad_norm": 1.2932634353637695, "learning_rate": 1e-06, "loss": 0.0139, "step": 2527 }, { "epoch": 0.43037112700034047, "grad_norm": 1.3444640636444092, "learning_rate": 1e-06, "loss": 0.0134, "step": 2528 }, { "epoch": 0.4305413687436159, "grad_norm": 1.4149837493896484, "learning_rate": 1e-06, "loss": 0.0173, "step": 2529 }, { "epoch": 0.4307116104868914, "grad_norm": 1.1546047925949097, "learning_rate": 1e-06, "loss": 0.0105, "step": 2530 }, { "epoch": 0.43088185223016684, "grad_norm": 1.6338205337524414, "learning_rate": 1e-06, "loss": 0.0204, "step": 2531 }, { "epoch": 0.4310520939734423, "grad_norm": 1.8675543069839478, "learning_rate": 1e-06, "loss": 0.0107, "step": 2532 }, { "epoch": 0.43122233571671775, "grad_norm": 1.2775949239730835, "learning_rate": 1e-06, "loss": 0.0137, "step": 2533 }, { "epoch": 0.4313925774599932, "grad_norm": 1.2907770872116089, "learning_rate": 1e-06, "loss": 0.0156, "step": 2534 }, { "epoch": 0.43156281920326867, "grad_norm": 1.519423484802246, "learning_rate": 1e-06, "loss": 0.0142, "step": 2535 }, { "epoch": 0.43173306094654407, "grad_norm": 1.4875999689102173, "learning_rate": 1e-06, "loss": 0.0177, "step": 2536 }, { "epoch": 0.4319033026898195, "grad_norm": 1.0595074892044067, "learning_rate": 1e-06, "loss": 0.013, "step": 2537 }, { "epoch": 0.432073544433095, "grad_norm": 1.461539626121521, "learning_rate": 1e-06, "loss": 0.0192, "step": 2538 }, { "epoch": 0.43224378617637044, "grad_norm": 1.2863537073135376, "learning_rate": 1e-06, "loss": 0.0142, "step": 2539 }, { "epoch": 0.4324140279196459, "grad_norm": 1.447748064994812, "learning_rate": 1e-06, "loss": 0.0147, "step": 2540 }, { "epoch": 0.43258426966292135, "grad_norm": 1.0105310678482056, "learning_rate": 1e-06, "loss": 0.012, "step": 2541 }, { "epoch": 0.4327545114061968, "grad_norm": 1.367261290550232, "learning_rate": 1e-06, "loss": 0.0161, "step": 2542 }, { "epoch": 0.43292475314947226, "grad_norm": 1.2634276151657104, "learning_rate": 1e-06, "loss": 0.0121, "step": 2543 }, { "epoch": 0.4330949948927477, "grad_norm": 1.2317240238189697, "learning_rate": 1e-06, "loss": 0.0136, "step": 2544 }, { "epoch": 0.4332652366360232, "grad_norm": 1.2238634824752808, "learning_rate": 1e-06, "loss": 0.0106, "step": 2545 }, { "epoch": 0.4334354783792986, "grad_norm": 1.3249047994613647, "learning_rate": 1e-06, "loss": 0.0133, "step": 2546 }, { "epoch": 0.43360572012257403, "grad_norm": 1.049531102180481, "learning_rate": 1e-06, "loss": 0.0107, "step": 2547 }, { "epoch": 0.4337759618658495, "grad_norm": 1.623108983039856, "learning_rate": 1e-06, "loss": 0.0204, "step": 2548 }, { "epoch": 0.43394620360912495, "grad_norm": 1.2683053016662598, "learning_rate": 1e-06, "loss": 0.0173, "step": 2549 }, { "epoch": 0.4341164453524004, "grad_norm": 1.6255425214767456, "learning_rate": 1e-06, "loss": 0.0163, "step": 2550 }, { "epoch": 0.43428668709567586, "grad_norm": 1.1563000679016113, "learning_rate": 1e-06, "loss": 0.0145, "step": 2551 }, { "epoch": 0.4344569288389513, "grad_norm": 1.3939322233200073, "learning_rate": 1e-06, "loss": 0.02, "step": 2552 }, { "epoch": 0.4346271705822268, "grad_norm": 1.2949892282485962, "learning_rate": 1e-06, "loss": 0.0103, "step": 2553 }, { "epoch": 0.43479741232550223, "grad_norm": 1.4135187864303589, "learning_rate": 1e-06, "loss": 0.017, "step": 2554 }, { "epoch": 0.4349676540687777, "grad_norm": 1.1989250183105469, "learning_rate": 1e-06, "loss": 0.0108, "step": 2555 }, { "epoch": 0.4351378958120531, "grad_norm": 1.180803894996643, "learning_rate": 1e-06, "loss": 0.0145, "step": 2556 }, { "epoch": 0.43530813755532854, "grad_norm": 0.8694745302200317, "learning_rate": 1e-06, "loss": 0.0084, "step": 2557 }, { "epoch": 0.435478379298604, "grad_norm": 1.489912986755371, "learning_rate": 1e-06, "loss": 0.0158, "step": 2558 }, { "epoch": 0.43564862104187946, "grad_norm": 1.1703554391860962, "learning_rate": 1e-06, "loss": 0.011, "step": 2559 }, { "epoch": 0.4358188627851549, "grad_norm": 1.3352656364440918, "learning_rate": 1e-06, "loss": 0.0115, "step": 2560 }, { "epoch": 0.43598910452843037, "grad_norm": 2.0167603492736816, "learning_rate": 1e-06, "loss": 0.0132, "step": 2561 }, { "epoch": 0.43615934627170583, "grad_norm": 1.5169191360473633, "learning_rate": 1e-06, "loss": 0.0266, "step": 2562 }, { "epoch": 0.4363295880149813, "grad_norm": 1.1504062414169312, "learning_rate": 1e-06, "loss": 0.0124, "step": 2563 }, { "epoch": 0.43649982975825674, "grad_norm": 1.3212984800338745, "learning_rate": 1e-06, "loss": 0.0156, "step": 2564 }, { "epoch": 0.4366700715015322, "grad_norm": 1.149043321609497, "learning_rate": 1e-06, "loss": 0.0115, "step": 2565 }, { "epoch": 0.43684031324480765, "grad_norm": 1.2085059881210327, "learning_rate": 1e-06, "loss": 0.012, "step": 2566 }, { "epoch": 0.43701055498808306, "grad_norm": 1.2474228143692017, "learning_rate": 1e-06, "loss": 0.0123, "step": 2567 }, { "epoch": 0.4371807967313585, "grad_norm": 1.2562228441238403, "learning_rate": 1e-06, "loss": 0.0118, "step": 2568 }, { "epoch": 0.43735103847463397, "grad_norm": 1.5228354930877686, "learning_rate": 1e-06, "loss": 0.0136, "step": 2569 }, { "epoch": 0.4375212802179094, "grad_norm": 1.6287275552749634, "learning_rate": 1e-06, "loss": 0.0179, "step": 2570 }, { "epoch": 0.4376915219611849, "grad_norm": 1.3553366661071777, "learning_rate": 1e-06, "loss": 0.0154, "step": 2571 }, { "epoch": 0.43786176370446034, "grad_norm": 1.4978290796279907, "learning_rate": 1e-06, "loss": 0.0142, "step": 2572 }, { "epoch": 0.4380320054477358, "grad_norm": 1.4585553407669067, "learning_rate": 1e-06, "loss": 0.0153, "step": 2573 }, { "epoch": 0.43820224719101125, "grad_norm": 1.175268530845642, "learning_rate": 1e-06, "loss": 0.0151, "step": 2574 }, { "epoch": 0.4383724889342867, "grad_norm": 1.2472771406173706, "learning_rate": 1e-06, "loss": 0.0172, "step": 2575 }, { "epoch": 0.43854273067756216, "grad_norm": 1.109674096107483, "learning_rate": 1e-06, "loss": 0.0094, "step": 2576 }, { "epoch": 0.43871297242083757, "grad_norm": 0.974772572517395, "learning_rate": 1e-06, "loss": 0.0109, "step": 2577 }, { "epoch": 0.438883214164113, "grad_norm": 1.1081093549728394, "learning_rate": 1e-06, "loss": 0.0109, "step": 2578 }, { "epoch": 0.4390534559073885, "grad_norm": 1.1672372817993164, "learning_rate": 1e-06, "loss": 0.0125, "step": 2579 }, { "epoch": 0.43922369765066394, "grad_norm": 1.2188431024551392, "learning_rate": 1e-06, "loss": 0.0147, "step": 2580 }, { "epoch": 0.4393939393939394, "grad_norm": 1.0513874292373657, "learning_rate": 1e-06, "loss": 0.0107, "step": 2581 }, { "epoch": 0.43956418113721485, "grad_norm": 1.2327890396118164, "learning_rate": 1e-06, "loss": 0.0129, "step": 2582 }, { "epoch": 0.4397344228804903, "grad_norm": 2.443394422531128, "learning_rate": 1e-06, "loss": 0.0233, "step": 2583 }, { "epoch": 0.43990466462376576, "grad_norm": 1.189650535583496, "learning_rate": 1e-06, "loss": 0.0211, "step": 2584 }, { "epoch": 0.4400749063670412, "grad_norm": 1.22821843624115, "learning_rate": 1e-06, "loss": 0.0095, "step": 2585 }, { "epoch": 0.4402451481103167, "grad_norm": 1.223922848701477, "learning_rate": 1e-06, "loss": 0.0112, "step": 2586 }, { "epoch": 0.4404153898535921, "grad_norm": 1.3211407661437988, "learning_rate": 1e-06, "loss": 0.0141, "step": 2587 }, { "epoch": 0.44058563159686753, "grad_norm": 1.2087403535842896, "learning_rate": 1e-06, "loss": 0.0152, "step": 2588 }, { "epoch": 0.440755873340143, "grad_norm": 1.6649106740951538, "learning_rate": 1e-06, "loss": 0.0225, "step": 2589 }, { "epoch": 0.44092611508341845, "grad_norm": 1.302064061164856, "learning_rate": 1e-06, "loss": 0.0116, "step": 2590 }, { "epoch": 0.4410963568266939, "grad_norm": 1.2485460042953491, "learning_rate": 1e-06, "loss": 0.014, "step": 2591 }, { "epoch": 0.44126659856996936, "grad_norm": 1.1560567617416382, "learning_rate": 1e-06, "loss": 0.0134, "step": 2592 }, { "epoch": 0.4414368403132448, "grad_norm": 1.436091661453247, "learning_rate": 1e-06, "loss": 0.0132, "step": 2593 }, { "epoch": 0.4416070820565203, "grad_norm": 1.1101467609405518, "learning_rate": 1e-06, "loss": 0.0131, "step": 2594 }, { "epoch": 0.44177732379979573, "grad_norm": 1.2781106233596802, "learning_rate": 1e-06, "loss": 0.0217, "step": 2595 }, { "epoch": 0.4419475655430712, "grad_norm": 1.1032869815826416, "learning_rate": 1e-06, "loss": 0.0083, "step": 2596 }, { "epoch": 0.4421178072863466, "grad_norm": 1.4114645719528198, "learning_rate": 1e-06, "loss": 0.0118, "step": 2597 }, { "epoch": 0.44228804902962204, "grad_norm": 1.307861566543579, "learning_rate": 1e-06, "loss": 0.0106, "step": 2598 }, { "epoch": 0.4424582907728975, "grad_norm": 1.3546010255813599, "learning_rate": 1e-06, "loss": 0.0124, "step": 2599 }, { "epoch": 0.44262853251617296, "grad_norm": 1.1858961582183838, "learning_rate": 1e-06, "loss": 0.0181, "step": 2600 }, { "epoch": 0.4427987742594484, "grad_norm": 1.7051303386688232, "learning_rate": 1e-06, "loss": 0.0155, "step": 2601 }, { "epoch": 0.44296901600272387, "grad_norm": 1.2881419658660889, "learning_rate": 1e-06, "loss": 0.015, "step": 2602 }, { "epoch": 0.4431392577459993, "grad_norm": 1.2599304914474487, "learning_rate": 1e-06, "loss": 0.0117, "step": 2603 }, { "epoch": 0.4433094994892748, "grad_norm": 1.2731688022613525, "learning_rate": 1e-06, "loss": 0.0135, "step": 2604 }, { "epoch": 0.44347974123255024, "grad_norm": 1.2084245681762695, "learning_rate": 1e-06, "loss": 0.0129, "step": 2605 }, { "epoch": 0.4436499829758257, "grad_norm": 1.2445482015609741, "learning_rate": 1e-06, "loss": 0.0115, "step": 2606 }, { "epoch": 0.4438202247191011, "grad_norm": 1.8854734897613525, "learning_rate": 1e-06, "loss": 0.0193, "step": 2607 }, { "epoch": 0.44399046646237655, "grad_norm": 1.4180908203125, "learning_rate": 1e-06, "loss": 0.0139, "step": 2608 }, { "epoch": 0.444160708205652, "grad_norm": 1.7596639394760132, "learning_rate": 1e-06, "loss": 0.0215, "step": 2609 }, { "epoch": 0.44433094994892747, "grad_norm": 1.8333537578582764, "learning_rate": 1e-06, "loss": 0.0171, "step": 2610 }, { "epoch": 0.4445011916922029, "grad_norm": 1.5384416580200195, "learning_rate": 1e-06, "loss": 0.0121, "step": 2611 }, { "epoch": 0.4446714334354784, "grad_norm": 1.6996245384216309, "learning_rate": 1e-06, "loss": 0.0296, "step": 2612 }, { "epoch": 0.44484167517875384, "grad_norm": 1.2037662267684937, "learning_rate": 1e-06, "loss": 0.0107, "step": 2613 }, { "epoch": 0.4450119169220293, "grad_norm": 1.2231796979904175, "learning_rate": 1e-06, "loss": 0.0118, "step": 2614 }, { "epoch": 0.44518215866530475, "grad_norm": 1.1597298383712769, "learning_rate": 1e-06, "loss": 0.0147, "step": 2615 }, { "epoch": 0.4453524004085802, "grad_norm": 1.1221568584442139, "learning_rate": 1e-06, "loss": 0.0108, "step": 2616 }, { "epoch": 0.4455226421518556, "grad_norm": 3.223909378051758, "learning_rate": 1e-06, "loss": 0.0231, "step": 2617 }, { "epoch": 0.44569288389513106, "grad_norm": 1.6201415061950684, "learning_rate": 1e-06, "loss": 0.0174, "step": 2618 }, { "epoch": 0.4458631256384065, "grad_norm": 1.248681902885437, "learning_rate": 1e-06, "loss": 0.0129, "step": 2619 }, { "epoch": 0.446033367381682, "grad_norm": 1.1186449527740479, "learning_rate": 1e-06, "loss": 0.0093, "step": 2620 }, { "epoch": 0.44620360912495743, "grad_norm": 2.316514253616333, "learning_rate": 1e-06, "loss": 0.0223, "step": 2621 }, { "epoch": 0.4463738508682329, "grad_norm": 1.214141607284546, "learning_rate": 1e-06, "loss": 0.01, "step": 2622 }, { "epoch": 0.44654409261150835, "grad_norm": 1.4486373662948608, "learning_rate": 1e-06, "loss": 0.0292, "step": 2623 }, { "epoch": 0.4467143343547838, "grad_norm": 1.2913445234298706, "learning_rate": 1e-06, "loss": 0.0123, "step": 2624 }, { "epoch": 0.44688457609805926, "grad_norm": 1.2563960552215576, "learning_rate": 1e-06, "loss": 0.0222, "step": 2625 }, { "epoch": 0.4470548178413347, "grad_norm": 1.4476072788238525, "learning_rate": 1e-06, "loss": 0.0188, "step": 2626 }, { "epoch": 0.4472250595846102, "grad_norm": 1.5944808721542358, "learning_rate": 1e-06, "loss": 0.0161, "step": 2627 }, { "epoch": 0.4473953013278856, "grad_norm": 1.1989669799804688, "learning_rate": 1e-06, "loss": 0.0134, "step": 2628 }, { "epoch": 0.44756554307116103, "grad_norm": 1.5682440996170044, "learning_rate": 1e-06, "loss": 0.021, "step": 2629 }, { "epoch": 0.4477357848144365, "grad_norm": 1.1739362478256226, "learning_rate": 1e-06, "loss": 0.0105, "step": 2630 }, { "epoch": 0.44790602655771194, "grad_norm": 1.2502210140228271, "learning_rate": 1e-06, "loss": 0.019, "step": 2631 }, { "epoch": 0.4480762683009874, "grad_norm": 1.1751649379730225, "learning_rate": 1e-06, "loss": 0.0098, "step": 2632 }, { "epoch": 0.44824651004426286, "grad_norm": 1.1609954833984375, "learning_rate": 1e-06, "loss": 0.0186, "step": 2633 }, { "epoch": 0.4484167517875383, "grad_norm": 1.4711358547210693, "learning_rate": 1e-06, "loss": 0.0197, "step": 2634 }, { "epoch": 0.44858699353081377, "grad_norm": 1.154991865158081, "learning_rate": 1e-06, "loss": 0.0182, "step": 2635 }, { "epoch": 0.44875723527408923, "grad_norm": 1.0405231714248657, "learning_rate": 1e-06, "loss": 0.0104, "step": 2636 }, { "epoch": 0.4489274770173647, "grad_norm": 1.2849845886230469, "learning_rate": 1e-06, "loss": 0.0148, "step": 2637 }, { "epoch": 0.4490977187606401, "grad_norm": 1.4233804941177368, "learning_rate": 1e-06, "loss": 0.0149, "step": 2638 }, { "epoch": 0.44926796050391554, "grad_norm": 1.2203373908996582, "learning_rate": 1e-06, "loss": 0.0156, "step": 2639 }, { "epoch": 0.449438202247191, "grad_norm": 1.1618257761001587, "learning_rate": 1e-06, "loss": 0.0114, "step": 2640 }, { "epoch": 0.44960844399046646, "grad_norm": 1.307241439819336, "learning_rate": 1e-06, "loss": 0.0121, "step": 2641 }, { "epoch": 0.4497786857337419, "grad_norm": 1.3096569776535034, "learning_rate": 1e-06, "loss": 0.016, "step": 2642 }, { "epoch": 0.44994892747701737, "grad_norm": 2.0565989017486572, "learning_rate": 1e-06, "loss": 0.0164, "step": 2643 }, { "epoch": 0.4501191692202928, "grad_norm": 1.3968826532363892, "learning_rate": 1e-06, "loss": 0.0118, "step": 2644 }, { "epoch": 0.4502894109635683, "grad_norm": 1.3210657835006714, "learning_rate": 1e-06, "loss": 0.0126, "step": 2645 }, { "epoch": 0.45045965270684374, "grad_norm": 1.1253306865692139, "learning_rate": 1e-06, "loss": 0.0113, "step": 2646 }, { "epoch": 0.4506298944501192, "grad_norm": 1.6668893098831177, "learning_rate": 1e-06, "loss": 0.0139, "step": 2647 }, { "epoch": 0.4508001361933946, "grad_norm": 1.3010493516921997, "learning_rate": 1e-06, "loss": 0.0151, "step": 2648 }, { "epoch": 0.45097037793667005, "grad_norm": 1.4834797382354736, "learning_rate": 1e-06, "loss": 0.0105, "step": 2649 }, { "epoch": 0.4511406196799455, "grad_norm": 1.3294862508773804, "learning_rate": 1e-06, "loss": 0.0119, "step": 2650 }, { "epoch": 0.45131086142322097, "grad_norm": 1.2867186069488525, "learning_rate": 1e-06, "loss": 0.0124, "step": 2651 }, { "epoch": 0.4514811031664964, "grad_norm": 1.1677923202514648, "learning_rate": 1e-06, "loss": 0.0116, "step": 2652 }, { "epoch": 0.4516513449097719, "grad_norm": 1.1350395679473877, "learning_rate": 1e-06, "loss": 0.0131, "step": 2653 }, { "epoch": 0.45182158665304734, "grad_norm": 1.4789958000183105, "learning_rate": 1e-06, "loss": 0.0141, "step": 2654 }, { "epoch": 0.4519918283963228, "grad_norm": 1.325280785560608, "learning_rate": 1e-06, "loss": 0.0194, "step": 2655 }, { "epoch": 0.45216207013959825, "grad_norm": 1.2727978229522705, "learning_rate": 1e-06, "loss": 0.0098, "step": 2656 }, { "epoch": 0.4523323118828737, "grad_norm": 1.313781976699829, "learning_rate": 1e-06, "loss": 0.0177, "step": 2657 }, { "epoch": 0.4525025536261491, "grad_norm": 1.6075435876846313, "learning_rate": 1e-06, "loss": 0.0196, "step": 2658 }, { "epoch": 0.45267279536942456, "grad_norm": 1.2357505559921265, "learning_rate": 1e-06, "loss": 0.0122, "step": 2659 }, { "epoch": 0.4528430371127, "grad_norm": 1.182496428489685, "learning_rate": 1e-06, "loss": 0.0181, "step": 2660 }, { "epoch": 0.4530132788559755, "grad_norm": 0.940984845161438, "learning_rate": 1e-06, "loss": 0.0086, "step": 2661 }, { "epoch": 0.45318352059925093, "grad_norm": 1.304612636566162, "learning_rate": 1e-06, "loss": 0.0117, "step": 2662 }, { "epoch": 0.4533537623425264, "grad_norm": 1.482349157333374, "learning_rate": 1e-06, "loss": 0.0139, "step": 2663 }, { "epoch": 0.45352400408580185, "grad_norm": 1.1997534036636353, "learning_rate": 1e-06, "loss": 0.0157, "step": 2664 }, { "epoch": 0.4536942458290773, "grad_norm": 1.2524585723876953, "learning_rate": 1e-06, "loss": 0.0124, "step": 2665 }, { "epoch": 0.45386448757235276, "grad_norm": 1.139270544052124, "learning_rate": 1e-06, "loss": 0.0141, "step": 2666 }, { "epoch": 0.4540347293156282, "grad_norm": 1.2429019212722778, "learning_rate": 1e-06, "loss": 0.0135, "step": 2667 }, { "epoch": 0.4542049710589036, "grad_norm": 1.0963373184204102, "learning_rate": 1e-06, "loss": 0.0112, "step": 2668 }, { "epoch": 0.4543752128021791, "grad_norm": 1.0817755460739136, "learning_rate": 1e-06, "loss": 0.0088, "step": 2669 }, { "epoch": 0.45454545454545453, "grad_norm": 1.246390700340271, "learning_rate": 1e-06, "loss": 0.013, "step": 2670 }, { "epoch": 0.45471569628873, "grad_norm": 1.3719927072525024, "learning_rate": 1e-06, "loss": 0.0196, "step": 2671 }, { "epoch": 0.45488593803200544, "grad_norm": 1.0424156188964844, "learning_rate": 1e-06, "loss": 0.0122, "step": 2672 }, { "epoch": 0.4550561797752809, "grad_norm": 1.1549937725067139, "learning_rate": 1e-06, "loss": 0.015, "step": 2673 }, { "epoch": 0.45522642151855636, "grad_norm": 1.3112993240356445, "learning_rate": 1e-06, "loss": 0.0137, "step": 2674 }, { "epoch": 0.4553966632618318, "grad_norm": 1.9749765396118164, "learning_rate": 1e-06, "loss": 0.0143, "step": 2675 }, { "epoch": 0.45556690500510727, "grad_norm": 1.4129223823547363, "learning_rate": 1e-06, "loss": 0.0171, "step": 2676 }, { "epoch": 0.4557371467483827, "grad_norm": 1.2779748439788818, "learning_rate": 1e-06, "loss": 0.0153, "step": 2677 }, { "epoch": 0.4559073884916581, "grad_norm": 4.037950038909912, "learning_rate": 1e-06, "loss": 0.0177, "step": 2678 }, { "epoch": 0.4560776302349336, "grad_norm": 1.1884809732437134, "learning_rate": 1e-06, "loss": 0.0096, "step": 2679 }, { "epoch": 0.45624787197820904, "grad_norm": 1.5136353969573975, "learning_rate": 1e-06, "loss": 0.0176, "step": 2680 }, { "epoch": 0.4564181137214845, "grad_norm": 5.290273189544678, "learning_rate": 1e-06, "loss": 0.0812, "step": 2681 }, { "epoch": 0.45658835546475995, "grad_norm": 1.0113693475723267, "learning_rate": 1e-06, "loss": 0.0086, "step": 2682 }, { "epoch": 0.4567585972080354, "grad_norm": 1.279430627822876, "learning_rate": 1e-06, "loss": 0.0182, "step": 2683 }, { "epoch": 0.45692883895131087, "grad_norm": 1.4505813121795654, "learning_rate": 1e-06, "loss": 0.0156, "step": 2684 }, { "epoch": 0.4570990806945863, "grad_norm": 1.6365201473236084, "learning_rate": 1e-06, "loss": 0.017, "step": 2685 }, { "epoch": 0.4572693224378618, "grad_norm": 1.1942694187164307, "learning_rate": 1e-06, "loss": 0.0119, "step": 2686 }, { "epoch": 0.45743956418113724, "grad_norm": 1.1934460401535034, "learning_rate": 1e-06, "loss": 0.0172, "step": 2687 }, { "epoch": 0.4576098059244127, "grad_norm": 1.3042709827423096, "learning_rate": 1e-06, "loss": 0.0135, "step": 2688 }, { "epoch": 0.4577800476676881, "grad_norm": 1.2046012878417969, "learning_rate": 1e-06, "loss": 0.0092, "step": 2689 }, { "epoch": 0.45795028941096355, "grad_norm": 1.5803378820419312, "learning_rate": 1e-06, "loss": 0.0146, "step": 2690 }, { "epoch": 0.458120531154239, "grad_norm": 1.2354884147644043, "learning_rate": 1e-06, "loss": 0.0117, "step": 2691 }, { "epoch": 0.45829077289751446, "grad_norm": 2.588909864425659, "learning_rate": 1e-06, "loss": 0.026, "step": 2692 }, { "epoch": 0.4584610146407899, "grad_norm": 1.2800123691558838, "learning_rate": 1e-06, "loss": 0.012, "step": 2693 }, { "epoch": 0.4586312563840654, "grad_norm": 1.6324779987335205, "learning_rate": 1e-06, "loss": 0.0157, "step": 2694 }, { "epoch": 0.45880149812734083, "grad_norm": 1.0287292003631592, "learning_rate": 1e-06, "loss": 0.0115, "step": 2695 }, { "epoch": 0.4589717398706163, "grad_norm": 1.4863289594650269, "learning_rate": 1e-06, "loss": 0.0166, "step": 2696 }, { "epoch": 0.45914198161389175, "grad_norm": 1.7314716577529907, "learning_rate": 1e-06, "loss": 0.0222, "step": 2697 }, { "epoch": 0.4593122233571672, "grad_norm": 1.315182089805603, "learning_rate": 1e-06, "loss": 0.0103, "step": 2698 }, { "epoch": 0.4594824651004426, "grad_norm": 1.8149510622024536, "learning_rate": 1e-06, "loss": 0.0246, "step": 2699 }, { "epoch": 0.45965270684371806, "grad_norm": 1.528834581375122, "learning_rate": 1e-06, "loss": 0.0249, "step": 2700 }, { "epoch": 0.4598229485869935, "grad_norm": 1.3489001989364624, "learning_rate": 1e-06, "loss": 0.021, "step": 2701 }, { "epoch": 0.459993190330269, "grad_norm": 1.1386735439300537, "learning_rate": 1e-06, "loss": 0.0146, "step": 2702 }, { "epoch": 0.46016343207354443, "grad_norm": 0.9807332754135132, "learning_rate": 1e-06, "loss": 0.0097, "step": 2703 }, { "epoch": 0.4603336738168199, "grad_norm": 1.237453579902649, "learning_rate": 1e-06, "loss": 0.018, "step": 2704 }, { "epoch": 0.46050391556009534, "grad_norm": 1.3836872577667236, "learning_rate": 1e-06, "loss": 0.0127, "step": 2705 }, { "epoch": 0.4606741573033708, "grad_norm": 1.1257848739624023, "learning_rate": 1e-06, "loss": 0.0132, "step": 2706 }, { "epoch": 0.46084439904664626, "grad_norm": 1.412534475326538, "learning_rate": 1e-06, "loss": 0.0153, "step": 2707 }, { "epoch": 0.4610146407899217, "grad_norm": 1.4209263324737549, "learning_rate": 1e-06, "loss": 0.0144, "step": 2708 }, { "epoch": 0.4611848825331971, "grad_norm": 1.466731309890747, "learning_rate": 1e-06, "loss": 0.0163, "step": 2709 }, { "epoch": 0.4613551242764726, "grad_norm": 2.445279121398926, "learning_rate": 1e-06, "loss": 0.0225, "step": 2710 }, { "epoch": 0.46152536601974803, "grad_norm": 1.6007717847824097, "learning_rate": 1e-06, "loss": 0.0136, "step": 2711 }, { "epoch": 0.4616956077630235, "grad_norm": 4.2332844734191895, "learning_rate": 1e-06, "loss": 0.0242, "step": 2712 }, { "epoch": 0.46186584950629894, "grad_norm": 1.627395749092102, "learning_rate": 1e-06, "loss": 0.0187, "step": 2713 }, { "epoch": 0.4620360912495744, "grad_norm": 1.2684941291809082, "learning_rate": 1e-06, "loss": 0.0114, "step": 2714 }, { "epoch": 0.46220633299284986, "grad_norm": 1.287532925605774, "learning_rate": 1e-06, "loss": 0.012, "step": 2715 }, { "epoch": 0.4623765747361253, "grad_norm": 1.6071957349777222, "learning_rate": 1e-06, "loss": 0.0171, "step": 2716 }, { "epoch": 0.46254681647940077, "grad_norm": 1.3653993606567383, "learning_rate": 1e-06, "loss": 0.0153, "step": 2717 }, { "epoch": 0.4627170582226762, "grad_norm": 0.9938444495201111, "learning_rate": 1e-06, "loss": 0.0084, "step": 2718 }, { "epoch": 0.4628872999659516, "grad_norm": 1.3033769130706787, "learning_rate": 1e-06, "loss": 0.0105, "step": 2719 }, { "epoch": 0.4630575417092271, "grad_norm": 1.2116801738739014, "learning_rate": 1e-06, "loss": 0.0147, "step": 2720 }, { "epoch": 0.46322778345250254, "grad_norm": 0.969497561454773, "learning_rate": 1e-06, "loss": 0.0091, "step": 2721 }, { "epoch": 0.463398025195778, "grad_norm": 1.1007535457611084, "learning_rate": 1e-06, "loss": 0.0151, "step": 2722 }, { "epoch": 0.46356826693905345, "grad_norm": 1.2838865518569946, "learning_rate": 1e-06, "loss": 0.0115, "step": 2723 }, { "epoch": 0.4637385086823289, "grad_norm": 1.1375126838684082, "learning_rate": 1e-06, "loss": 0.0142, "step": 2724 }, { "epoch": 0.46390875042560437, "grad_norm": 0.9952174425125122, "learning_rate": 1e-06, "loss": 0.0102, "step": 2725 }, { "epoch": 0.4640789921688798, "grad_norm": 1.0825743675231934, "learning_rate": 1e-06, "loss": 0.0108, "step": 2726 }, { "epoch": 0.4642492339121553, "grad_norm": 1.658443808555603, "learning_rate": 1e-06, "loss": 0.0286, "step": 2727 }, { "epoch": 0.46441947565543074, "grad_norm": 1.2224160432815552, "learning_rate": 1e-06, "loss": 0.0171, "step": 2728 }, { "epoch": 0.46458971739870614, "grad_norm": 1.210042119026184, "learning_rate": 1e-06, "loss": 0.0128, "step": 2729 }, { "epoch": 0.4647599591419816, "grad_norm": 1.4320639371871948, "learning_rate": 1e-06, "loss": 0.0099, "step": 2730 }, { "epoch": 0.46493020088525705, "grad_norm": 1.2222720384597778, "learning_rate": 1e-06, "loss": 0.0134, "step": 2731 }, { "epoch": 0.4651004426285325, "grad_norm": 1.5248254537582397, "learning_rate": 1e-06, "loss": 0.0136, "step": 2732 }, { "epoch": 0.46527068437180796, "grad_norm": 1.599933385848999, "learning_rate": 1e-06, "loss": 0.0173, "step": 2733 }, { "epoch": 0.4654409261150834, "grad_norm": 1.824179768562317, "learning_rate": 1e-06, "loss": 0.0158, "step": 2734 }, { "epoch": 0.4656111678583589, "grad_norm": 1.516343355178833, "learning_rate": 1e-06, "loss": 0.0209, "step": 2735 }, { "epoch": 0.46578140960163433, "grad_norm": 1.768630862236023, "learning_rate": 1e-06, "loss": 0.0191, "step": 2736 }, { "epoch": 0.4659516513449098, "grad_norm": 1.7268173694610596, "learning_rate": 1e-06, "loss": 0.0312, "step": 2737 }, { "epoch": 0.46612189308818525, "grad_norm": 1.7336803674697876, "learning_rate": 1e-06, "loss": 0.0117, "step": 2738 }, { "epoch": 0.46629213483146065, "grad_norm": 1.4071462154388428, "learning_rate": 1e-06, "loss": 0.0165, "step": 2739 }, { "epoch": 0.4664623765747361, "grad_norm": 1.6221487522125244, "learning_rate": 1e-06, "loss": 0.0204, "step": 2740 }, { "epoch": 0.46663261831801156, "grad_norm": 1.0625100135803223, "learning_rate": 1e-06, "loss": 0.018, "step": 2741 }, { "epoch": 0.466802860061287, "grad_norm": 3.000713348388672, "learning_rate": 1e-06, "loss": 0.0445, "step": 2742 }, { "epoch": 0.4669731018045625, "grad_norm": 1.5852463245391846, "learning_rate": 1e-06, "loss": 0.0152, "step": 2743 }, { "epoch": 0.46714334354783793, "grad_norm": 1.2370667457580566, "learning_rate": 1e-06, "loss": 0.0136, "step": 2744 }, { "epoch": 0.4673135852911134, "grad_norm": 1.1240363121032715, "learning_rate": 1e-06, "loss": 0.0113, "step": 2745 }, { "epoch": 0.46748382703438884, "grad_norm": 1.7612371444702148, "learning_rate": 1e-06, "loss": 0.0312, "step": 2746 }, { "epoch": 0.4676540687776643, "grad_norm": 1.0981045961380005, "learning_rate": 1e-06, "loss": 0.016, "step": 2747 }, { "epoch": 0.46782431052093976, "grad_norm": 1.3208484649658203, "learning_rate": 1e-06, "loss": 0.0167, "step": 2748 }, { "epoch": 0.46799455226421516, "grad_norm": 1.1048396825790405, "learning_rate": 1e-06, "loss": 0.0101, "step": 2749 }, { "epoch": 0.4681647940074906, "grad_norm": 1.2207963466644287, "learning_rate": 1e-06, "loss": 0.0188, "step": 2750 }, { "epoch": 0.46833503575076607, "grad_norm": 1.3159321546554565, "learning_rate": 1e-06, "loss": 0.0136, "step": 2751 }, { "epoch": 0.4685052774940415, "grad_norm": 1.4273492097854614, "learning_rate": 1e-06, "loss": 0.013, "step": 2752 }, { "epoch": 0.468675519237317, "grad_norm": 1.4401754140853882, "learning_rate": 1e-06, "loss": 0.0151, "step": 2753 }, { "epoch": 0.46884576098059244, "grad_norm": 1.262450098991394, "learning_rate": 1e-06, "loss": 0.0145, "step": 2754 }, { "epoch": 0.4690160027238679, "grad_norm": 1.0684226751327515, "learning_rate": 1e-06, "loss": 0.0086, "step": 2755 }, { "epoch": 0.46918624446714335, "grad_norm": 1.1081675291061401, "learning_rate": 1e-06, "loss": 0.0085, "step": 2756 }, { "epoch": 0.4693564862104188, "grad_norm": 1.0820542573928833, "learning_rate": 1e-06, "loss": 0.0112, "step": 2757 }, { "epoch": 0.46952672795369427, "grad_norm": 1.4552348852157593, "learning_rate": 1e-06, "loss": 0.0123, "step": 2758 }, { "epoch": 0.4696969696969697, "grad_norm": 1.2042700052261353, "learning_rate": 1e-06, "loss": 0.0088, "step": 2759 }, { "epoch": 0.4698672114402451, "grad_norm": 1.2401795387268066, "learning_rate": 1e-06, "loss": 0.0119, "step": 2760 }, { "epoch": 0.4700374531835206, "grad_norm": 1.3959617614746094, "learning_rate": 1e-06, "loss": 0.0092, "step": 2761 }, { "epoch": 0.47020769492679604, "grad_norm": 1.220746636390686, "learning_rate": 1e-06, "loss": 0.0172, "step": 2762 }, { "epoch": 0.4703779366700715, "grad_norm": 1.241463303565979, "learning_rate": 1e-06, "loss": 0.0131, "step": 2763 }, { "epoch": 0.47054817841334695, "grad_norm": 1.2252508401870728, "learning_rate": 1e-06, "loss": 0.013, "step": 2764 }, { "epoch": 0.4707184201566224, "grad_norm": 1.9142897129058838, "learning_rate": 1e-06, "loss": 0.028, "step": 2765 }, { "epoch": 0.47088866189989786, "grad_norm": 1.1304916143417358, "learning_rate": 1e-06, "loss": 0.0119, "step": 2766 }, { "epoch": 0.4710589036431733, "grad_norm": 1.247910499572754, "learning_rate": 1e-06, "loss": 0.0179, "step": 2767 }, { "epoch": 0.4712291453864488, "grad_norm": 1.3291040658950806, "learning_rate": 1e-06, "loss": 0.0139, "step": 2768 }, { "epoch": 0.47139938712972423, "grad_norm": 1.139250636100769, "learning_rate": 1e-06, "loss": 0.0146, "step": 2769 }, { "epoch": 0.47156962887299964, "grad_norm": 1.6703858375549316, "learning_rate": 1e-06, "loss": 0.0184, "step": 2770 }, { "epoch": 0.4717398706162751, "grad_norm": 1.6940549612045288, "learning_rate": 1e-06, "loss": 0.0164, "step": 2771 }, { "epoch": 0.47191011235955055, "grad_norm": 1.24887216091156, "learning_rate": 1e-06, "loss": 0.0135, "step": 2772 }, { "epoch": 0.472080354102826, "grad_norm": 1.0762343406677246, "learning_rate": 1e-06, "loss": 0.0095, "step": 2773 }, { "epoch": 0.47225059584610146, "grad_norm": 1.1544651985168457, "learning_rate": 1e-06, "loss": 0.0129, "step": 2774 }, { "epoch": 0.4724208375893769, "grad_norm": 1.3130018711090088, "learning_rate": 1e-06, "loss": 0.0144, "step": 2775 }, { "epoch": 0.4725910793326524, "grad_norm": 1.620080590248108, "learning_rate": 1e-06, "loss": 0.0181, "step": 2776 }, { "epoch": 0.47276132107592783, "grad_norm": 1.6613308191299438, "learning_rate": 1e-06, "loss": 0.0118, "step": 2777 }, { "epoch": 0.4729315628192033, "grad_norm": 1.066480040550232, "learning_rate": 1e-06, "loss": 0.0077, "step": 2778 }, { "epoch": 0.47310180456247874, "grad_norm": 1.109659194946289, "learning_rate": 1e-06, "loss": 0.0123, "step": 2779 }, { "epoch": 0.47327204630575415, "grad_norm": 1.551084041595459, "learning_rate": 1e-06, "loss": 0.0107, "step": 2780 }, { "epoch": 0.4734422880490296, "grad_norm": 1.2061805725097656, "learning_rate": 1e-06, "loss": 0.0166, "step": 2781 }, { "epoch": 0.47361252979230506, "grad_norm": 1.9850165843963623, "learning_rate": 1e-06, "loss": 0.0128, "step": 2782 }, { "epoch": 0.4737827715355805, "grad_norm": 1.3873836994171143, "learning_rate": 1e-06, "loss": 0.0146, "step": 2783 }, { "epoch": 0.473953013278856, "grad_norm": 1.0683759450912476, "learning_rate": 1e-06, "loss": 0.0106, "step": 2784 }, { "epoch": 0.47412325502213143, "grad_norm": 0.9684468507766724, "learning_rate": 1e-06, "loss": 0.0101, "step": 2785 }, { "epoch": 0.4742934967654069, "grad_norm": 1.323331594467163, "learning_rate": 1e-06, "loss": 0.0156, "step": 2786 }, { "epoch": 0.47446373850868234, "grad_norm": 1.1693027019500732, "learning_rate": 1e-06, "loss": 0.0109, "step": 2787 }, { "epoch": 0.4746339802519578, "grad_norm": 0.9231655597686768, "learning_rate": 1e-06, "loss": 0.0085, "step": 2788 }, { "epoch": 0.47480422199523326, "grad_norm": 1.9782781600952148, "learning_rate": 1e-06, "loss": 0.0199, "step": 2789 }, { "epoch": 0.47497446373850866, "grad_norm": 1.5112477540969849, "learning_rate": 1e-06, "loss": 0.0166, "step": 2790 }, { "epoch": 0.4751447054817841, "grad_norm": 1.178739309310913, "learning_rate": 1e-06, "loss": 0.012, "step": 2791 }, { "epoch": 0.47531494722505957, "grad_norm": 2.3978826999664307, "learning_rate": 1e-06, "loss": 0.0155, "step": 2792 }, { "epoch": 0.475485188968335, "grad_norm": 1.3973369598388672, "learning_rate": 1e-06, "loss": 0.0115, "step": 2793 }, { "epoch": 0.4756554307116105, "grad_norm": 1.1762423515319824, "learning_rate": 1e-06, "loss": 0.0124, "step": 2794 }, { "epoch": 0.47582567245488594, "grad_norm": 1.2299197912216187, "learning_rate": 1e-06, "loss": 0.0118, "step": 2795 }, { "epoch": 0.4759959141981614, "grad_norm": 1.3088619709014893, "learning_rate": 1e-06, "loss": 0.0174, "step": 2796 }, { "epoch": 0.47616615594143685, "grad_norm": 1.0132900476455688, "learning_rate": 1e-06, "loss": 0.0109, "step": 2797 }, { "epoch": 0.4763363976847123, "grad_norm": 1.129879355430603, "learning_rate": 1e-06, "loss": 0.0085, "step": 2798 }, { "epoch": 0.47650663942798777, "grad_norm": 0.9757645130157471, "learning_rate": 1e-06, "loss": 0.0132, "step": 2799 }, { "epoch": 0.47667688117126317, "grad_norm": 1.4585630893707275, "learning_rate": 1e-06, "loss": 0.017, "step": 2800 }, { "epoch": 0.4768471229145386, "grad_norm": 1.0814754962921143, "learning_rate": 1e-06, "loss": 0.0162, "step": 2801 }, { "epoch": 0.4770173646578141, "grad_norm": 1.095276117324829, "learning_rate": 1e-06, "loss": 0.0098, "step": 2802 }, { "epoch": 0.47718760640108954, "grad_norm": 1.1605823040008545, "learning_rate": 1e-06, "loss": 0.0111, "step": 2803 }, { "epoch": 0.477357848144365, "grad_norm": 0.9512302279472351, "learning_rate": 1e-06, "loss": 0.0078, "step": 2804 }, { "epoch": 0.47752808988764045, "grad_norm": 0.8988835215568542, "learning_rate": 1e-06, "loss": 0.0097, "step": 2805 }, { "epoch": 0.4776983316309159, "grad_norm": 1.4442946910858154, "learning_rate": 1e-06, "loss": 0.017, "step": 2806 }, { "epoch": 0.47786857337419136, "grad_norm": 1.1615439653396606, "learning_rate": 1e-06, "loss": 0.0118, "step": 2807 }, { "epoch": 0.4780388151174668, "grad_norm": 1.2446208000183105, "learning_rate": 1e-06, "loss": 0.0108, "step": 2808 }, { "epoch": 0.4782090568607423, "grad_norm": 1.1819733381271362, "learning_rate": 1e-06, "loss": 0.0126, "step": 2809 }, { "epoch": 0.4783792986040177, "grad_norm": 1.3760933876037598, "learning_rate": 1e-06, "loss": 0.011, "step": 2810 }, { "epoch": 0.47854954034729313, "grad_norm": 1.2199891805648804, "learning_rate": 1e-06, "loss": 0.011, "step": 2811 }, { "epoch": 0.4787197820905686, "grad_norm": 1.2610541582107544, "learning_rate": 1e-06, "loss": 0.0119, "step": 2812 }, { "epoch": 0.47889002383384405, "grad_norm": 1.4175294637680054, "learning_rate": 1e-06, "loss": 0.0105, "step": 2813 }, { "epoch": 0.4790602655771195, "grad_norm": 1.0820069313049316, "learning_rate": 1e-06, "loss": 0.0083, "step": 2814 }, { "epoch": 0.47923050732039496, "grad_norm": 1.236606478691101, "learning_rate": 1e-06, "loss": 0.015, "step": 2815 }, { "epoch": 0.4794007490636704, "grad_norm": 1.2256829738616943, "learning_rate": 1e-06, "loss": 0.0121, "step": 2816 }, { "epoch": 0.4795709908069459, "grad_norm": 1.4265694618225098, "learning_rate": 1e-06, "loss": 0.0137, "step": 2817 }, { "epoch": 0.47974123255022133, "grad_norm": 1.2129721641540527, "learning_rate": 1e-06, "loss": 0.0104, "step": 2818 }, { "epoch": 0.4799114742934968, "grad_norm": 1.3204402923583984, "learning_rate": 1e-06, "loss": 0.0143, "step": 2819 }, { "epoch": 0.48008171603677224, "grad_norm": 2.682424783706665, "learning_rate": 1e-06, "loss": 0.0244, "step": 2820 }, { "epoch": 0.48025195778004764, "grad_norm": 1.4030735492706299, "learning_rate": 1e-06, "loss": 0.0109, "step": 2821 }, { "epoch": 0.4804221995233231, "grad_norm": 2.13521671295166, "learning_rate": 1e-06, "loss": 0.02, "step": 2822 }, { "epoch": 0.48059244126659856, "grad_norm": 1.1667373180389404, "learning_rate": 1e-06, "loss": 0.0134, "step": 2823 }, { "epoch": 0.480762683009874, "grad_norm": 1.3074268102645874, "learning_rate": 1e-06, "loss": 0.0114, "step": 2824 }, { "epoch": 0.48093292475314947, "grad_norm": 1.321334719657898, "learning_rate": 1e-06, "loss": 0.0118, "step": 2825 }, { "epoch": 0.4811031664964249, "grad_norm": 0.9559478759765625, "learning_rate": 1e-06, "loss": 0.0091, "step": 2826 }, { "epoch": 0.4812734082397004, "grad_norm": 0.8174387812614441, "learning_rate": 1e-06, "loss": 0.007, "step": 2827 }, { "epoch": 0.48144364998297584, "grad_norm": 1.3920475244522095, "learning_rate": 1e-06, "loss": 0.0136, "step": 2828 }, { "epoch": 0.4816138917262513, "grad_norm": 1.5686707496643066, "learning_rate": 1e-06, "loss": 0.0187, "step": 2829 }, { "epoch": 0.48178413346952675, "grad_norm": 1.0519627332687378, "learning_rate": 1e-06, "loss": 0.0105, "step": 2830 }, { "epoch": 0.48195437521280216, "grad_norm": 1.2499573230743408, "learning_rate": 1e-06, "loss": 0.0158, "step": 2831 }, { "epoch": 0.4821246169560776, "grad_norm": 1.9623509645462036, "learning_rate": 1e-06, "loss": 0.0192, "step": 2832 }, { "epoch": 0.48229485869935307, "grad_norm": 1.0774935483932495, "learning_rate": 1e-06, "loss": 0.0096, "step": 2833 }, { "epoch": 0.4824651004426285, "grad_norm": 1.0941082239151, "learning_rate": 1e-06, "loss": 0.0097, "step": 2834 }, { "epoch": 0.482635342185904, "grad_norm": 1.3845523595809937, "learning_rate": 1e-06, "loss": 0.0154, "step": 2835 }, { "epoch": 0.48280558392917944, "grad_norm": 1.569908857345581, "learning_rate": 1e-06, "loss": 0.018, "step": 2836 }, { "epoch": 0.4829758256724549, "grad_norm": 1.1683586835861206, "learning_rate": 1e-06, "loss": 0.0151, "step": 2837 }, { "epoch": 0.48314606741573035, "grad_norm": 1.277343511581421, "learning_rate": 1e-06, "loss": 0.0166, "step": 2838 }, { "epoch": 0.4833163091590058, "grad_norm": 1.2092841863632202, "learning_rate": 1e-06, "loss": 0.0105, "step": 2839 }, { "epoch": 0.48348655090228126, "grad_norm": 1.1916158199310303, "learning_rate": 1e-06, "loss": 0.0096, "step": 2840 }, { "epoch": 0.48365679264555667, "grad_norm": 1.280978798866272, "learning_rate": 1e-06, "loss": 0.0123, "step": 2841 }, { "epoch": 0.4838270343888321, "grad_norm": 1.2461068630218506, "learning_rate": 1e-06, "loss": 0.0098, "step": 2842 }, { "epoch": 0.4839972761321076, "grad_norm": 1.471252202987671, "learning_rate": 1e-06, "loss": 0.0135, "step": 2843 }, { "epoch": 0.48416751787538304, "grad_norm": 1.4099452495574951, "learning_rate": 1e-06, "loss": 0.0151, "step": 2844 }, { "epoch": 0.4843377596186585, "grad_norm": 1.3784700632095337, "learning_rate": 1e-06, "loss": 0.0086, "step": 2845 }, { "epoch": 0.48450800136193395, "grad_norm": 1.322296142578125, "learning_rate": 1e-06, "loss": 0.0154, "step": 2846 }, { "epoch": 0.4846782431052094, "grad_norm": 1.3607927560806274, "learning_rate": 1e-06, "loss": 0.0103, "step": 2847 }, { "epoch": 0.48484848484848486, "grad_norm": 0.7826800346374512, "learning_rate": 1e-06, "loss": 0.0089, "step": 2848 }, { "epoch": 0.4850187265917603, "grad_norm": 1.2559735774993896, "learning_rate": 1e-06, "loss": 0.0118, "step": 2849 }, { "epoch": 0.4851889683350358, "grad_norm": 1.423887848854065, "learning_rate": 1e-06, "loss": 0.0127, "step": 2850 }, { "epoch": 0.4853592100783112, "grad_norm": 0.9544010758399963, "learning_rate": 1e-06, "loss": 0.0088, "step": 2851 }, { "epoch": 0.48552945182158663, "grad_norm": 1.8954381942749023, "learning_rate": 1e-06, "loss": 0.0208, "step": 2852 }, { "epoch": 0.4856996935648621, "grad_norm": 1.5164066553115845, "learning_rate": 1e-06, "loss": 0.0115, "step": 2853 }, { "epoch": 0.48586993530813755, "grad_norm": 1.3428789377212524, "learning_rate": 1e-06, "loss": 0.0127, "step": 2854 }, { "epoch": 0.486040177051413, "grad_norm": 1.2199963331222534, "learning_rate": 1e-06, "loss": 0.0168, "step": 2855 }, { "epoch": 0.48621041879468846, "grad_norm": 1.103547215461731, "learning_rate": 1e-06, "loss": 0.0117, "step": 2856 }, { "epoch": 0.4863806605379639, "grad_norm": 1.19355309009552, "learning_rate": 1e-06, "loss": 0.0137, "step": 2857 }, { "epoch": 0.4865509022812394, "grad_norm": 1.5257407426834106, "learning_rate": 1e-06, "loss": 0.0125, "step": 2858 }, { "epoch": 0.48672114402451483, "grad_norm": 1.4707660675048828, "learning_rate": 1e-06, "loss": 0.0142, "step": 2859 }, { "epoch": 0.4868913857677903, "grad_norm": 1.3224432468414307, "learning_rate": 1e-06, "loss": 0.0141, "step": 2860 }, { "epoch": 0.4870616275110657, "grad_norm": 1.5693126916885376, "learning_rate": 1e-06, "loss": 0.0144, "step": 2861 }, { "epoch": 0.48723186925434114, "grad_norm": 1.315860629081726, "learning_rate": 1e-06, "loss": 0.0107, "step": 2862 }, { "epoch": 0.4874021109976166, "grad_norm": 1.2941914796829224, "learning_rate": 1e-06, "loss": 0.0125, "step": 2863 }, { "epoch": 0.48757235274089206, "grad_norm": 1.343846321105957, "learning_rate": 1e-06, "loss": 0.0135, "step": 2864 }, { "epoch": 0.4877425944841675, "grad_norm": 1.0334171056747437, "learning_rate": 1e-06, "loss": 0.0086, "step": 2865 }, { "epoch": 0.48791283622744297, "grad_norm": 1.295588493347168, "learning_rate": 1e-06, "loss": 0.0115, "step": 2866 }, { "epoch": 0.4880830779707184, "grad_norm": 1.4225810766220093, "learning_rate": 1e-06, "loss": 0.0131, "step": 2867 }, { "epoch": 0.4882533197139939, "grad_norm": 1.4328070878982544, "learning_rate": 1e-06, "loss": 0.0186, "step": 2868 }, { "epoch": 0.48842356145726934, "grad_norm": 1.6347346305847168, "learning_rate": 1e-06, "loss": 0.0111, "step": 2869 }, { "epoch": 0.4885938032005448, "grad_norm": 1.3069541454315186, "learning_rate": 1e-06, "loss": 0.0124, "step": 2870 }, { "epoch": 0.4887640449438202, "grad_norm": 1.1802916526794434, "learning_rate": 1e-06, "loss": 0.0111, "step": 2871 }, { "epoch": 0.48893428668709565, "grad_norm": 1.2617202997207642, "learning_rate": 1e-06, "loss": 0.0106, "step": 2872 }, { "epoch": 0.4891045284303711, "grad_norm": 1.1735727787017822, "learning_rate": 1e-06, "loss": 0.0113, "step": 2873 }, { "epoch": 0.48927477017364657, "grad_norm": 1.4026036262512207, "learning_rate": 1e-06, "loss": 0.0163, "step": 2874 }, { "epoch": 0.489445011916922, "grad_norm": 1.503218173980713, "learning_rate": 1e-06, "loss": 0.0135, "step": 2875 }, { "epoch": 0.4896152536601975, "grad_norm": 1.212363362312317, "learning_rate": 1e-06, "loss": 0.0138, "step": 2876 }, { "epoch": 0.48978549540347294, "grad_norm": 1.4121812582015991, "learning_rate": 1e-06, "loss": 0.0141, "step": 2877 }, { "epoch": 0.4899557371467484, "grad_norm": 1.3481481075286865, "learning_rate": 1e-06, "loss": 0.0136, "step": 2878 }, { "epoch": 0.49012597889002385, "grad_norm": 1.1686102151870728, "learning_rate": 1e-06, "loss": 0.0135, "step": 2879 }, { "epoch": 0.4902962206332993, "grad_norm": 1.311177372932434, "learning_rate": 1e-06, "loss": 0.0189, "step": 2880 }, { "epoch": 0.49046646237657476, "grad_norm": 1.1763383150100708, "learning_rate": 1e-06, "loss": 0.0133, "step": 2881 }, { "epoch": 0.49063670411985016, "grad_norm": 1.164952039718628, "learning_rate": 1e-06, "loss": 0.0095, "step": 2882 }, { "epoch": 0.4908069458631256, "grad_norm": 1.2506550550460815, "learning_rate": 1e-06, "loss": 0.0115, "step": 2883 }, { "epoch": 0.4909771876064011, "grad_norm": 1.4701337814331055, "learning_rate": 1e-06, "loss": 0.0109, "step": 2884 }, { "epoch": 0.49114742934967653, "grad_norm": 1.318747639656067, "learning_rate": 1e-06, "loss": 0.0109, "step": 2885 }, { "epoch": 0.491317671092952, "grad_norm": 1.1562262773513794, "learning_rate": 1e-06, "loss": 0.0121, "step": 2886 }, { "epoch": 0.49148791283622745, "grad_norm": 1.6450200080871582, "learning_rate": 1e-06, "loss": 0.0184, "step": 2887 }, { "epoch": 0.4916581545795029, "grad_norm": 1.0631816387176514, "learning_rate": 1e-06, "loss": 0.0121, "step": 2888 }, { "epoch": 0.49182839632277836, "grad_norm": 0.9874283671379089, "learning_rate": 1e-06, "loss": 0.0116, "step": 2889 }, { "epoch": 0.4919986380660538, "grad_norm": 1.4185420274734497, "learning_rate": 1e-06, "loss": 0.0162, "step": 2890 }, { "epoch": 0.4921688798093293, "grad_norm": 0.9347524642944336, "learning_rate": 1e-06, "loss": 0.0069, "step": 2891 }, { "epoch": 0.4923391215526047, "grad_norm": 0.9645169973373413, "learning_rate": 1e-06, "loss": 0.0113, "step": 2892 }, { "epoch": 0.49250936329588013, "grad_norm": 1.236021876335144, "learning_rate": 1e-06, "loss": 0.0136, "step": 2893 }, { "epoch": 0.4926796050391556, "grad_norm": 1.1450927257537842, "learning_rate": 1e-06, "loss": 0.0149, "step": 2894 }, { "epoch": 0.49284984678243104, "grad_norm": 1.5991291999816895, "learning_rate": 1e-06, "loss": 0.0155, "step": 2895 }, { "epoch": 0.4930200885257065, "grad_norm": 1.1790274381637573, "learning_rate": 1e-06, "loss": 0.0134, "step": 2896 }, { "epoch": 0.49319033026898196, "grad_norm": 1.1705747842788696, "learning_rate": 1e-06, "loss": 0.0102, "step": 2897 }, { "epoch": 0.4933605720122574, "grad_norm": 0.9381265640258789, "learning_rate": 1e-06, "loss": 0.0078, "step": 2898 }, { "epoch": 0.49353081375553287, "grad_norm": 1.261146903038025, "learning_rate": 1e-06, "loss": 0.014, "step": 2899 }, { "epoch": 0.4937010554988083, "grad_norm": 1.0365086793899536, "learning_rate": 1e-06, "loss": 0.0125, "step": 2900 }, { "epoch": 0.4938712972420838, "grad_norm": 1.20415198802948, "learning_rate": 1e-06, "loss": 0.0063, "step": 2901 }, { "epoch": 0.4940415389853592, "grad_norm": 1.1824898719787598, "learning_rate": 1e-06, "loss": 0.0101, "step": 2902 }, { "epoch": 0.49421178072863464, "grad_norm": 1.3033336400985718, "learning_rate": 1e-06, "loss": 0.0153, "step": 2903 }, { "epoch": 0.4943820224719101, "grad_norm": 1.490906834602356, "learning_rate": 1e-06, "loss": 0.0129, "step": 2904 }, { "epoch": 0.49455226421518556, "grad_norm": 1.298890471458435, "learning_rate": 1e-06, "loss": 0.0121, "step": 2905 }, { "epoch": 0.494722505958461, "grad_norm": 1.805892825126648, "learning_rate": 1e-06, "loss": 0.028, "step": 2906 }, { "epoch": 0.49489274770173647, "grad_norm": 1.0890419483184814, "learning_rate": 1e-06, "loss": 0.0098, "step": 2907 }, { "epoch": 0.4950629894450119, "grad_norm": 1.0342925786972046, "learning_rate": 1e-06, "loss": 0.0088, "step": 2908 }, { "epoch": 0.4952332311882874, "grad_norm": 1.5081183910369873, "learning_rate": 1e-06, "loss": 0.0134, "step": 2909 }, { "epoch": 0.49540347293156284, "grad_norm": 2.6579811573028564, "learning_rate": 1e-06, "loss": 0.0148, "step": 2910 }, { "epoch": 0.4955737146748383, "grad_norm": 1.3652353286743164, "learning_rate": 1e-06, "loss": 0.0125, "step": 2911 }, { "epoch": 0.4957439564181137, "grad_norm": 1.6202207803726196, "learning_rate": 1e-06, "loss": 0.0186, "step": 2912 }, { "epoch": 0.49591419816138915, "grad_norm": 1.1754422187805176, "learning_rate": 1e-06, "loss": 0.0154, "step": 2913 }, { "epoch": 0.4960844399046646, "grad_norm": 1.2409707307815552, "learning_rate": 1e-06, "loss": 0.0135, "step": 2914 }, { "epoch": 0.49625468164794007, "grad_norm": 1.7688099145889282, "learning_rate": 1e-06, "loss": 0.0294, "step": 2915 }, { "epoch": 0.4964249233912155, "grad_norm": 1.4038777351379395, "learning_rate": 1e-06, "loss": 0.0121, "step": 2916 }, { "epoch": 0.496595165134491, "grad_norm": 0.8886030316352844, "learning_rate": 1e-06, "loss": 0.0087, "step": 2917 }, { "epoch": 0.49676540687776644, "grad_norm": 2.1861026287078857, "learning_rate": 1e-06, "loss": 0.0101, "step": 2918 }, { "epoch": 0.4969356486210419, "grad_norm": 1.338416576385498, "learning_rate": 1e-06, "loss": 0.0111, "step": 2919 }, { "epoch": 0.49710589036431735, "grad_norm": 1.1682435274124146, "learning_rate": 1e-06, "loss": 0.0095, "step": 2920 }, { "epoch": 0.4972761321075928, "grad_norm": 1.538461446762085, "learning_rate": 1e-06, "loss": 0.0213, "step": 2921 }, { "epoch": 0.4974463738508682, "grad_norm": 1.5773953199386597, "learning_rate": 1e-06, "loss": 0.0114, "step": 2922 }, { "epoch": 0.49761661559414366, "grad_norm": 1.3020201921463013, "learning_rate": 1e-06, "loss": 0.0145, "step": 2923 }, { "epoch": 0.4977868573374191, "grad_norm": 1.1434447765350342, "learning_rate": 1e-06, "loss": 0.01, "step": 2924 }, { "epoch": 0.4979570990806946, "grad_norm": 1.2094712257385254, "learning_rate": 1e-06, "loss": 0.011, "step": 2925 }, { "epoch": 0.49812734082397003, "grad_norm": 1.0923223495483398, "learning_rate": 1e-06, "loss": 0.0066, "step": 2926 }, { "epoch": 0.4982975825672455, "grad_norm": 1.6678919792175293, "learning_rate": 1e-06, "loss": 0.0123, "step": 2927 }, { "epoch": 0.49846782431052095, "grad_norm": 1.2051362991333008, "learning_rate": 1e-06, "loss": 0.0095, "step": 2928 }, { "epoch": 0.4986380660537964, "grad_norm": 1.1779026985168457, "learning_rate": 1e-06, "loss": 0.0148, "step": 2929 }, { "epoch": 0.49880830779707186, "grad_norm": 1.1191192865371704, "learning_rate": 1e-06, "loss": 0.0091, "step": 2930 }, { "epoch": 0.4989785495403473, "grad_norm": 1.4694077968597412, "learning_rate": 1e-06, "loss": 0.0117, "step": 2931 }, { "epoch": 0.4991487912836227, "grad_norm": 1.1187613010406494, "learning_rate": 1e-06, "loss": 0.0126, "step": 2932 }, { "epoch": 0.4993190330268982, "grad_norm": 1.3665251731872559, "learning_rate": 1e-06, "loss": 0.0116, "step": 2933 }, { "epoch": 0.49948927477017363, "grad_norm": 2.8328680992126465, "learning_rate": 1e-06, "loss": 0.0335, "step": 2934 }, { "epoch": 0.4996595165134491, "grad_norm": 1.5214513540267944, "learning_rate": 1e-06, "loss": 0.0125, "step": 2935 }, { "epoch": 0.49982975825672454, "grad_norm": 1.1730064153671265, "learning_rate": 1e-06, "loss": 0.013, "step": 2936 }, { "epoch": 0.5, "grad_norm": 1.2666690349578857, "learning_rate": 1e-06, "loss": 0.0125, "step": 2937 }, { "epoch": 0.5001702417432754, "grad_norm": 1.3443002700805664, "learning_rate": 1e-06, "loss": 0.0124, "step": 2938 }, { "epoch": 0.5003404834865509, "grad_norm": 1.225641131401062, "learning_rate": 1e-06, "loss": 0.0089, "step": 2939 }, { "epoch": 0.5005107252298263, "grad_norm": 1.4913207292556763, "learning_rate": 1e-06, "loss": 0.0155, "step": 2940 }, { "epoch": 0.5006809669731018, "grad_norm": 1.65010666847229, "learning_rate": 1e-06, "loss": 0.0235, "step": 2941 }, { "epoch": 0.5008512087163772, "grad_norm": 0.9611451625823975, "learning_rate": 1e-06, "loss": 0.0096, "step": 2942 }, { "epoch": 0.5010214504596527, "grad_norm": 1.4015928506851196, "learning_rate": 1e-06, "loss": 0.0099, "step": 2943 }, { "epoch": 0.5011916922029281, "grad_norm": 0.9811087250709534, "learning_rate": 1e-06, "loss": 0.0103, "step": 2944 }, { "epoch": 0.5013619339462037, "grad_norm": 1.1719774007797241, "learning_rate": 1e-06, "loss": 0.0122, "step": 2945 }, { "epoch": 0.501532175689479, "grad_norm": 1.2584707736968994, "learning_rate": 1e-06, "loss": 0.011, "step": 2946 }, { "epoch": 0.5017024174327546, "grad_norm": 2.9327192306518555, "learning_rate": 1e-06, "loss": 0.0499, "step": 2947 }, { "epoch": 0.50187265917603, "grad_norm": 1.3452236652374268, "learning_rate": 1e-06, "loss": 0.0139, "step": 2948 }, { "epoch": 0.5020429009193054, "grad_norm": 1.2244794368743896, "learning_rate": 1e-06, "loss": 0.0104, "step": 2949 }, { "epoch": 0.5022131426625809, "grad_norm": 1.3074755668640137, "learning_rate": 1e-06, "loss": 0.0099, "step": 2950 }, { "epoch": 0.5023833844058563, "grad_norm": 1.3225749731063843, "learning_rate": 1e-06, "loss": 0.0242, "step": 2951 }, { "epoch": 0.5025536261491318, "grad_norm": 1.5591984987258911, "learning_rate": 1e-06, "loss": 0.0122, "step": 2952 }, { "epoch": 0.5027238678924072, "grad_norm": 1.4079065322875977, "learning_rate": 1e-06, "loss": 0.01, "step": 2953 }, { "epoch": 0.5028941096356827, "grad_norm": 1.699751377105713, "learning_rate": 1e-06, "loss": 0.0138, "step": 2954 }, { "epoch": 0.5030643513789581, "grad_norm": 1.5341709852218628, "learning_rate": 1e-06, "loss": 0.0247, "step": 2955 }, { "epoch": 0.5032345931222336, "grad_norm": 1.3966187238693237, "learning_rate": 1e-06, "loss": 0.0099, "step": 2956 }, { "epoch": 0.503404834865509, "grad_norm": 1.1383638381958008, "learning_rate": 1e-06, "loss": 0.0119, "step": 2957 }, { "epoch": 0.5035750766087844, "grad_norm": 1.2133922576904297, "learning_rate": 1e-06, "loss": 0.0134, "step": 2958 }, { "epoch": 0.5037453183520599, "grad_norm": 1.335353970527649, "learning_rate": 1e-06, "loss": 0.013, "step": 2959 }, { "epoch": 0.5039155600953353, "grad_norm": 1.509978175163269, "learning_rate": 1e-06, "loss": 0.0143, "step": 2960 }, { "epoch": 0.5040858018386108, "grad_norm": 1.110413670539856, "learning_rate": 1e-06, "loss": 0.0104, "step": 2961 }, { "epoch": 0.5042560435818862, "grad_norm": 1.325568437576294, "learning_rate": 1e-06, "loss": 0.0109, "step": 2962 }, { "epoch": 0.5044262853251618, "grad_norm": 1.420128345489502, "learning_rate": 1e-06, "loss": 0.015, "step": 2963 }, { "epoch": 0.5045965270684372, "grad_norm": 0.9586927890777588, "learning_rate": 1e-06, "loss": 0.0074, "step": 2964 }, { "epoch": 0.5047667688117127, "grad_norm": 1.4876079559326172, "learning_rate": 1e-06, "loss": 0.0169, "step": 2965 }, { "epoch": 0.5049370105549881, "grad_norm": 1.4042977094650269, "learning_rate": 1e-06, "loss": 0.0127, "step": 2966 }, { "epoch": 0.5051072522982636, "grad_norm": 1.723920226097107, "learning_rate": 1e-06, "loss": 0.0132, "step": 2967 }, { "epoch": 0.505277494041539, "grad_norm": 1.1755249500274658, "learning_rate": 1e-06, "loss": 0.0139, "step": 2968 }, { "epoch": 0.5054477357848144, "grad_norm": 1.0757211446762085, "learning_rate": 1e-06, "loss": 0.0095, "step": 2969 }, { "epoch": 0.5056179775280899, "grad_norm": 1.336470603942871, "learning_rate": 1e-06, "loss": 0.0131, "step": 2970 }, { "epoch": 0.5057882192713653, "grad_norm": 1.0691769123077393, "learning_rate": 1e-06, "loss": 0.0112, "step": 2971 }, { "epoch": 0.5059584610146408, "grad_norm": 1.1425819396972656, "learning_rate": 1e-06, "loss": 0.0089, "step": 2972 }, { "epoch": 0.5061287027579162, "grad_norm": 1.1680139303207397, "learning_rate": 1e-06, "loss": 0.0128, "step": 2973 }, { "epoch": 0.5062989445011917, "grad_norm": 0.9332813024520874, "learning_rate": 1e-06, "loss": 0.0088, "step": 2974 }, { "epoch": 0.5064691862444671, "grad_norm": 0.983639121055603, "learning_rate": 1e-06, "loss": 0.0096, "step": 2975 }, { "epoch": 0.5066394279877426, "grad_norm": 1.32431161403656, "learning_rate": 1e-06, "loss": 0.0165, "step": 2976 }, { "epoch": 0.506809669731018, "grad_norm": 1.2261784076690674, "learning_rate": 1e-06, "loss": 0.0108, "step": 2977 }, { "epoch": 0.5069799114742934, "grad_norm": 3.250417470932007, "learning_rate": 1e-06, "loss": 0.0345, "step": 2978 }, { "epoch": 0.507150153217569, "grad_norm": 1.4886819124221802, "learning_rate": 1e-06, "loss": 0.0108, "step": 2979 }, { "epoch": 0.5073203949608444, "grad_norm": 1.391330361366272, "learning_rate": 1e-06, "loss": 0.0135, "step": 2980 }, { "epoch": 0.5074906367041199, "grad_norm": 1.0295233726501465, "learning_rate": 1e-06, "loss": 0.0098, "step": 2981 }, { "epoch": 0.5076608784473953, "grad_norm": 1.6181305646896362, "learning_rate": 1e-06, "loss": 0.0126, "step": 2982 }, { "epoch": 0.5078311201906708, "grad_norm": 1.2847509384155273, "learning_rate": 1e-06, "loss": 0.0105, "step": 2983 }, { "epoch": 0.5080013619339462, "grad_norm": 1.1932363510131836, "learning_rate": 1e-06, "loss": 0.0119, "step": 2984 }, { "epoch": 0.5081716036772217, "grad_norm": 1.5506092309951782, "learning_rate": 1e-06, "loss": 0.0108, "step": 2985 }, { "epoch": 0.5083418454204971, "grad_norm": 1.11466646194458, "learning_rate": 1e-06, "loss": 0.0098, "step": 2986 }, { "epoch": 0.5085120871637726, "grad_norm": 1.4007675647735596, "learning_rate": 1e-06, "loss": 0.0114, "step": 2987 }, { "epoch": 0.508682328907048, "grad_norm": 1.0601606369018555, "learning_rate": 1e-06, "loss": 0.0084, "step": 2988 }, { "epoch": 0.5088525706503234, "grad_norm": 1.1195155382156372, "learning_rate": 1e-06, "loss": 0.0104, "step": 2989 }, { "epoch": 0.5090228123935989, "grad_norm": 1.2109330892562866, "learning_rate": 1e-06, "loss": 0.0176, "step": 2990 }, { "epoch": 0.5091930541368743, "grad_norm": 1.20180082321167, "learning_rate": 1e-06, "loss": 0.0119, "step": 2991 }, { "epoch": 0.5093632958801498, "grad_norm": 1.3377587795257568, "learning_rate": 1e-06, "loss": 0.0121, "step": 2992 }, { "epoch": 0.5095335376234252, "grad_norm": 1.0854805707931519, "learning_rate": 1e-06, "loss": 0.0081, "step": 2993 }, { "epoch": 0.5097037793667007, "grad_norm": 1.8077919483184814, "learning_rate": 1e-06, "loss": 0.0248, "step": 2994 }, { "epoch": 0.5098740211099761, "grad_norm": 1.4350749254226685, "learning_rate": 1e-06, "loss": 0.0113, "step": 2995 }, { "epoch": 0.5100442628532517, "grad_norm": 1.6307591199874878, "learning_rate": 1e-06, "loss": 0.0142, "step": 2996 }, { "epoch": 0.5102145045965271, "grad_norm": 1.2001097202301025, "learning_rate": 1e-06, "loss": 0.0098, "step": 2997 }, { "epoch": 0.5103847463398025, "grad_norm": 0.9367316365242004, "learning_rate": 1e-06, "loss": 0.0073, "step": 2998 }, { "epoch": 0.510554988083078, "grad_norm": 1.4046683311462402, "learning_rate": 1e-06, "loss": 0.0142, "step": 2999 }, { "epoch": 0.5107252298263534, "grad_norm": 1.2139997482299805, "learning_rate": 1e-06, "loss": 0.0213, "step": 3000 }, { "epoch": 0.5107252298263534, "eval_loss": 0.31468451023101807, "eval_runtime": 21.1588, "eval_samples_per_second": 14.178, "eval_steps_per_second": 0.378, "step": 3000 }, { "epoch": 0.5108954715696289, "grad_norm": 1.300648808479309, "learning_rate": 1e-06, "loss": 0.0112, "step": 3001 }, { "epoch": 0.5110657133129043, "grad_norm": 1.0096200704574585, "learning_rate": 1e-06, "loss": 0.0087, "step": 3002 }, { "epoch": 0.5112359550561798, "grad_norm": 1.1899932622909546, "learning_rate": 1e-06, "loss": 0.0109, "step": 3003 }, { "epoch": 0.5114061967994552, "grad_norm": 0.9526229500770569, "learning_rate": 1e-06, "loss": 0.0092, "step": 3004 }, { "epoch": 0.5115764385427307, "grad_norm": 1.8259762525558472, "learning_rate": 1e-06, "loss": 0.0231, "step": 3005 }, { "epoch": 0.5117466802860061, "grad_norm": 1.3105390071868896, "learning_rate": 1e-06, "loss": 0.0119, "step": 3006 }, { "epoch": 0.5119169220292816, "grad_norm": 1.3401422500610352, "learning_rate": 1e-06, "loss": 0.0109, "step": 3007 }, { "epoch": 0.512087163772557, "grad_norm": 1.4094361066818237, "learning_rate": 1e-06, "loss": 0.0152, "step": 3008 }, { "epoch": 0.5122574055158324, "grad_norm": 1.5171374082565308, "learning_rate": 1e-06, "loss": 0.0166, "step": 3009 }, { "epoch": 0.5124276472591079, "grad_norm": 1.069426417350769, "learning_rate": 1e-06, "loss": 0.0162, "step": 3010 }, { "epoch": 0.5125978890023833, "grad_norm": 1.3579994440078735, "learning_rate": 1e-06, "loss": 0.0136, "step": 3011 }, { "epoch": 0.5127681307456589, "grad_norm": 0.9840942621231079, "learning_rate": 1e-06, "loss": 0.0114, "step": 3012 }, { "epoch": 0.5129383724889343, "grad_norm": 1.523397445678711, "learning_rate": 1e-06, "loss": 0.0139, "step": 3013 }, { "epoch": 0.5131086142322098, "grad_norm": 1.2539904117584229, "learning_rate": 1e-06, "loss": 0.0112, "step": 3014 }, { "epoch": 0.5132788559754852, "grad_norm": 1.0365042686462402, "learning_rate": 1e-06, "loss": 0.0086, "step": 3015 }, { "epoch": 0.5134490977187607, "grad_norm": 0.8410796523094177, "learning_rate": 1e-06, "loss": 0.0089, "step": 3016 }, { "epoch": 0.5136193394620361, "grad_norm": 1.3191943168640137, "learning_rate": 1e-06, "loss": 0.0182, "step": 3017 }, { "epoch": 0.5137895812053116, "grad_norm": 0.8363417387008667, "learning_rate": 1e-06, "loss": 0.0068, "step": 3018 }, { "epoch": 0.513959822948587, "grad_norm": 1.1895376443862915, "learning_rate": 1e-06, "loss": 0.0141, "step": 3019 }, { "epoch": 0.5141300646918624, "grad_norm": 1.4793716669082642, "learning_rate": 1e-06, "loss": 0.0191, "step": 3020 }, { "epoch": 0.5143003064351379, "grad_norm": 1.583125114440918, "learning_rate": 1e-06, "loss": 0.0159, "step": 3021 }, { "epoch": 0.5144705481784133, "grad_norm": 1.3003768920898438, "learning_rate": 1e-06, "loss": 0.0138, "step": 3022 }, { "epoch": 0.5146407899216888, "grad_norm": 1.2707653045654297, "learning_rate": 1e-06, "loss": 0.0095, "step": 3023 }, { "epoch": 0.5148110316649642, "grad_norm": 1.094768762588501, "learning_rate": 1e-06, "loss": 0.0086, "step": 3024 }, { "epoch": 0.5149812734082397, "grad_norm": 0.997310221195221, "learning_rate": 1e-06, "loss": 0.0073, "step": 3025 }, { "epoch": 0.5151515151515151, "grad_norm": 1.2261766195297241, "learning_rate": 1e-06, "loss": 0.0144, "step": 3026 }, { "epoch": 0.5153217568947907, "grad_norm": 1.513581395149231, "learning_rate": 1e-06, "loss": 0.0145, "step": 3027 }, { "epoch": 0.515491998638066, "grad_norm": 1.1396069526672363, "learning_rate": 1e-06, "loss": 0.0107, "step": 3028 }, { "epoch": 0.5156622403813415, "grad_norm": 1.0320284366607666, "learning_rate": 1e-06, "loss": 0.0074, "step": 3029 }, { "epoch": 0.515832482124617, "grad_norm": 1.4781116247177124, "learning_rate": 1e-06, "loss": 0.0167, "step": 3030 }, { "epoch": 0.5160027238678924, "grad_norm": 1.4949243068695068, "learning_rate": 1e-06, "loss": 0.0176, "step": 3031 }, { "epoch": 0.5161729656111679, "grad_norm": 1.2848188877105713, "learning_rate": 1e-06, "loss": 0.0142, "step": 3032 }, { "epoch": 0.5163432073544433, "grad_norm": 1.240247368812561, "learning_rate": 1e-06, "loss": 0.0103, "step": 3033 }, { "epoch": 0.5165134490977188, "grad_norm": 0.9559020400047302, "learning_rate": 1e-06, "loss": 0.0091, "step": 3034 }, { "epoch": 0.5166836908409942, "grad_norm": 1.2412127256393433, "learning_rate": 1e-06, "loss": 0.012, "step": 3035 }, { "epoch": 0.5168539325842697, "grad_norm": 1.1315244436264038, "learning_rate": 1e-06, "loss": 0.0145, "step": 3036 }, { "epoch": 0.5170241743275451, "grad_norm": 1.2756315469741821, "learning_rate": 1e-06, "loss": 0.0113, "step": 3037 }, { "epoch": 0.5171944160708206, "grad_norm": 1.620142936706543, "learning_rate": 1e-06, "loss": 0.0264, "step": 3038 }, { "epoch": 0.517364657814096, "grad_norm": 1.0165133476257324, "learning_rate": 1e-06, "loss": 0.0086, "step": 3039 }, { "epoch": 0.5175348995573714, "grad_norm": 1.0977479219436646, "learning_rate": 1e-06, "loss": 0.0075, "step": 3040 }, { "epoch": 0.5177051413006469, "grad_norm": 1.153247356414795, "learning_rate": 1e-06, "loss": 0.0092, "step": 3041 }, { "epoch": 0.5178753830439223, "grad_norm": 1.3911783695220947, "learning_rate": 1e-06, "loss": 0.0167, "step": 3042 }, { "epoch": 0.5180456247871978, "grad_norm": 1.2800657749176025, "learning_rate": 1e-06, "loss": 0.0115, "step": 3043 }, { "epoch": 0.5182158665304732, "grad_norm": 1.216147780418396, "learning_rate": 1e-06, "loss": 0.0126, "step": 3044 }, { "epoch": 0.5183861082737488, "grad_norm": 1.263554573059082, "learning_rate": 1e-06, "loss": 0.0114, "step": 3045 }, { "epoch": 0.5185563500170242, "grad_norm": 1.123915672302246, "learning_rate": 1e-06, "loss": 0.0082, "step": 3046 }, { "epoch": 0.5187265917602997, "grad_norm": 1.1444989442825317, "learning_rate": 1e-06, "loss": 0.0131, "step": 3047 }, { "epoch": 0.5188968335035751, "grad_norm": 1.2586398124694824, "learning_rate": 1e-06, "loss": 0.0084, "step": 3048 }, { "epoch": 0.5190670752468505, "grad_norm": 1.3242063522338867, "learning_rate": 1e-06, "loss": 0.0113, "step": 3049 }, { "epoch": 0.519237316990126, "grad_norm": 1.1678466796875, "learning_rate": 1e-06, "loss": 0.0103, "step": 3050 }, { "epoch": 0.5194075587334014, "grad_norm": 1.157733678817749, "learning_rate": 1e-06, "loss": 0.0108, "step": 3051 }, { "epoch": 0.5195778004766769, "grad_norm": 1.495124340057373, "learning_rate": 1e-06, "loss": 0.0091, "step": 3052 }, { "epoch": 0.5197480422199523, "grad_norm": 1.0295337438583374, "learning_rate": 1e-06, "loss": 0.0097, "step": 3053 }, { "epoch": 0.5199182839632278, "grad_norm": 0.9237085580825806, "learning_rate": 1e-06, "loss": 0.0085, "step": 3054 }, { "epoch": 0.5200885257065032, "grad_norm": 1.2110072374343872, "learning_rate": 1e-06, "loss": 0.0122, "step": 3055 }, { "epoch": 0.5202587674497787, "grad_norm": 1.4104846715927124, "learning_rate": 1e-06, "loss": 0.0115, "step": 3056 }, { "epoch": 0.5204290091930541, "grad_norm": 1.0618224143981934, "learning_rate": 1e-06, "loss": 0.0115, "step": 3057 }, { "epoch": 0.5205992509363296, "grad_norm": 1.23426353931427, "learning_rate": 1e-06, "loss": 0.011, "step": 3058 }, { "epoch": 0.520769492679605, "grad_norm": 1.1079343557357788, "learning_rate": 1e-06, "loss": 0.0089, "step": 3059 }, { "epoch": 0.5209397344228804, "grad_norm": 1.472641110420227, "learning_rate": 1e-06, "loss": 0.0139, "step": 3060 }, { "epoch": 0.521109976166156, "grad_norm": 1.1031280755996704, "learning_rate": 1e-06, "loss": 0.0103, "step": 3061 }, { "epoch": 0.5212802179094314, "grad_norm": 1.4007097482681274, "learning_rate": 1e-06, "loss": 0.0093, "step": 3062 }, { "epoch": 0.5214504596527069, "grad_norm": 1.2048598527908325, "learning_rate": 1e-06, "loss": 0.0141, "step": 3063 }, { "epoch": 0.5216207013959823, "grad_norm": 1.1933079957962036, "learning_rate": 1e-06, "loss": 0.012, "step": 3064 }, { "epoch": 0.5217909431392578, "grad_norm": 1.139629602432251, "learning_rate": 1e-06, "loss": 0.0107, "step": 3065 }, { "epoch": 0.5219611848825332, "grad_norm": 1.330549716949463, "learning_rate": 1e-06, "loss": 0.0087, "step": 3066 }, { "epoch": 0.5221314266258087, "grad_norm": 0.8479977250099182, "learning_rate": 1e-06, "loss": 0.009, "step": 3067 }, { "epoch": 0.5223016683690841, "grad_norm": 1.093963623046875, "learning_rate": 1e-06, "loss": 0.0102, "step": 3068 }, { "epoch": 0.5224719101123596, "grad_norm": 1.0407942533493042, "learning_rate": 1e-06, "loss": 0.0104, "step": 3069 }, { "epoch": 0.522642151855635, "grad_norm": 1.4160226583480835, "learning_rate": 1e-06, "loss": 0.0178, "step": 3070 }, { "epoch": 0.5228123935989104, "grad_norm": 1.3179391622543335, "learning_rate": 1e-06, "loss": 0.01, "step": 3071 }, { "epoch": 0.5229826353421859, "grad_norm": 1.8564910888671875, "learning_rate": 1e-06, "loss": 0.0199, "step": 3072 }, { "epoch": 0.5231528770854613, "grad_norm": 1.2088946104049683, "learning_rate": 1e-06, "loss": 0.0129, "step": 3073 }, { "epoch": 0.5233231188287368, "grad_norm": 1.1301778554916382, "learning_rate": 1e-06, "loss": 0.0109, "step": 3074 }, { "epoch": 0.5234933605720122, "grad_norm": 1.4394501447677612, "learning_rate": 1e-06, "loss": 0.0135, "step": 3075 }, { "epoch": 0.5236636023152877, "grad_norm": 1.0562634468078613, "learning_rate": 1e-06, "loss": 0.0095, "step": 3076 }, { "epoch": 0.5238338440585631, "grad_norm": 1.2691398859024048, "learning_rate": 1e-06, "loss": 0.0072, "step": 3077 }, { "epoch": 0.5240040858018387, "grad_norm": 1.257546305656433, "learning_rate": 1e-06, "loss": 0.013, "step": 3078 }, { "epoch": 0.5241743275451141, "grad_norm": 1.5691494941711426, "learning_rate": 1e-06, "loss": 0.0152, "step": 3079 }, { "epoch": 0.5243445692883895, "grad_norm": 1.1722148656845093, "learning_rate": 1e-06, "loss": 0.0078, "step": 3080 }, { "epoch": 0.524514811031665, "grad_norm": 0.8400589823722839, "learning_rate": 1e-06, "loss": 0.008, "step": 3081 }, { "epoch": 0.5246850527749404, "grad_norm": 1.016576886177063, "learning_rate": 1e-06, "loss": 0.0127, "step": 3082 }, { "epoch": 0.5248552945182159, "grad_norm": 1.129569411277771, "learning_rate": 1e-06, "loss": 0.0083, "step": 3083 }, { "epoch": 0.5250255362614913, "grad_norm": 1.5929679870605469, "learning_rate": 1e-06, "loss": 0.0109, "step": 3084 }, { "epoch": 0.5251957780047668, "grad_norm": 1.9398027658462524, "learning_rate": 1e-06, "loss": 0.0098, "step": 3085 }, { "epoch": 0.5253660197480422, "grad_norm": 1.6181927919387817, "learning_rate": 1e-06, "loss": 0.0182, "step": 3086 }, { "epoch": 0.5255362614913177, "grad_norm": 1.1063342094421387, "learning_rate": 1e-06, "loss": 0.0103, "step": 3087 }, { "epoch": 0.5257065032345931, "grad_norm": 1.2465970516204834, "learning_rate": 1e-06, "loss": 0.0136, "step": 3088 }, { "epoch": 0.5258767449778686, "grad_norm": 0.9947730302810669, "learning_rate": 1e-06, "loss": 0.0081, "step": 3089 }, { "epoch": 0.526046986721144, "grad_norm": 1.0790003538131714, "learning_rate": 1e-06, "loss": 0.0095, "step": 3090 }, { "epoch": 0.5262172284644194, "grad_norm": 1.3154680728912354, "learning_rate": 1e-06, "loss": 0.0102, "step": 3091 }, { "epoch": 0.5263874702076949, "grad_norm": 1.074992060661316, "learning_rate": 1e-06, "loss": 0.0096, "step": 3092 }, { "epoch": 0.5265577119509703, "grad_norm": 1.3531382083892822, "learning_rate": 1e-06, "loss": 0.0127, "step": 3093 }, { "epoch": 0.5267279536942459, "grad_norm": 1.8065028190612793, "learning_rate": 1e-06, "loss": 0.0149, "step": 3094 }, { "epoch": 0.5268981954375213, "grad_norm": 1.652100920677185, "learning_rate": 1e-06, "loss": 0.015, "step": 3095 }, { "epoch": 0.5270684371807968, "grad_norm": 1.073090672492981, "learning_rate": 1e-06, "loss": 0.0123, "step": 3096 }, { "epoch": 0.5272386789240722, "grad_norm": 1.0769790410995483, "learning_rate": 1e-06, "loss": 0.01, "step": 3097 }, { "epoch": 0.5274089206673477, "grad_norm": 1.2385014295578003, "learning_rate": 1e-06, "loss": 0.0091, "step": 3098 }, { "epoch": 0.5275791624106231, "grad_norm": 1.9769033193588257, "learning_rate": 1e-06, "loss": 0.0126, "step": 3099 }, { "epoch": 0.5277494041538985, "grad_norm": 1.2743234634399414, "learning_rate": 1e-06, "loss": 0.0085, "step": 3100 }, { "epoch": 0.527919645897174, "grad_norm": 0.9275697469711304, "learning_rate": 1e-06, "loss": 0.0085, "step": 3101 }, { "epoch": 0.5280898876404494, "grad_norm": 1.1067229509353638, "learning_rate": 1e-06, "loss": 0.0112, "step": 3102 }, { "epoch": 0.5282601293837249, "grad_norm": 1.1381607055664062, "learning_rate": 1e-06, "loss": 0.0103, "step": 3103 }, { "epoch": 0.5284303711270003, "grad_norm": 1.1659680604934692, "learning_rate": 1e-06, "loss": 0.0104, "step": 3104 }, { "epoch": 0.5286006128702758, "grad_norm": 1.089293122291565, "learning_rate": 1e-06, "loss": 0.0101, "step": 3105 }, { "epoch": 0.5287708546135512, "grad_norm": 1.252436876296997, "learning_rate": 1e-06, "loss": 0.0097, "step": 3106 }, { "epoch": 0.5289410963568267, "grad_norm": 1.4552236795425415, "learning_rate": 1e-06, "loss": 0.0137, "step": 3107 }, { "epoch": 0.5291113381001021, "grad_norm": 1.2212755680084229, "learning_rate": 1e-06, "loss": 0.0107, "step": 3108 }, { "epoch": 0.5292815798433776, "grad_norm": 1.3947482109069824, "learning_rate": 1e-06, "loss": 0.0149, "step": 3109 }, { "epoch": 0.529451821586653, "grad_norm": 1.2081656455993652, "learning_rate": 1e-06, "loss": 0.0095, "step": 3110 }, { "epoch": 0.5296220633299284, "grad_norm": 1.2081656455993652, "learning_rate": 1e-06, "loss": 0.0395, "step": 3111 }, { "epoch": 0.529792305073204, "grad_norm": 1.6731754541397095, "learning_rate": 1e-06, "loss": 0.0147, "step": 3112 }, { "epoch": 0.5299625468164794, "grad_norm": 1.3174570798873901, "learning_rate": 1e-06, "loss": 0.0143, "step": 3113 }, { "epoch": 0.5301327885597549, "grad_norm": 1.2845019102096558, "learning_rate": 1e-06, "loss": 0.0094, "step": 3114 }, { "epoch": 0.5303030303030303, "grad_norm": 1.1987799406051636, "learning_rate": 1e-06, "loss": 0.0086, "step": 3115 }, { "epoch": 0.5304732720463058, "grad_norm": 0.9767221808433533, "learning_rate": 1e-06, "loss": 0.0066, "step": 3116 }, { "epoch": 0.5306435137895812, "grad_norm": 1.1443259716033936, "learning_rate": 1e-06, "loss": 0.0109, "step": 3117 }, { "epoch": 0.5308137555328567, "grad_norm": 1.4838154315948486, "learning_rate": 1e-06, "loss": 0.0178, "step": 3118 }, { "epoch": 0.5309839972761321, "grad_norm": 1.5887151956558228, "learning_rate": 1e-06, "loss": 0.0122, "step": 3119 }, { "epoch": 0.5311542390194075, "grad_norm": 1.2178723812103271, "learning_rate": 1e-06, "loss": 0.0101, "step": 3120 }, { "epoch": 0.531324480762683, "grad_norm": 1.0301523208618164, "learning_rate": 1e-06, "loss": 0.0078, "step": 3121 }, { "epoch": 0.5314947225059584, "grad_norm": 1.1088286638259888, "learning_rate": 1e-06, "loss": 0.0107, "step": 3122 }, { "epoch": 0.5316649642492339, "grad_norm": 1.2651501893997192, "learning_rate": 1e-06, "loss": 0.0141, "step": 3123 }, { "epoch": 0.5318352059925093, "grad_norm": 1.6318732500076294, "learning_rate": 1e-06, "loss": 0.0198, "step": 3124 }, { "epoch": 0.5320054477357848, "grad_norm": 1.1797488927841187, "learning_rate": 1e-06, "loss": 0.0127, "step": 3125 }, { "epoch": 0.5321756894790602, "grad_norm": 1.480616569519043, "learning_rate": 1e-06, "loss": 0.0091, "step": 3126 }, { "epoch": 0.5323459312223358, "grad_norm": 1.1608449220657349, "learning_rate": 1e-06, "loss": 0.0095, "step": 3127 }, { "epoch": 0.5325161729656112, "grad_norm": 1.0857185125350952, "learning_rate": 1e-06, "loss": 0.0111, "step": 3128 }, { "epoch": 0.5326864147088867, "grad_norm": 1.1040613651275635, "learning_rate": 1e-06, "loss": 0.0082, "step": 3129 }, { "epoch": 0.5328566564521621, "grad_norm": 1.3238484859466553, "learning_rate": 1e-06, "loss": 0.01, "step": 3130 }, { "epoch": 0.5330268981954375, "grad_norm": 1.1844618320465088, "learning_rate": 1e-06, "loss": 0.0153, "step": 3131 }, { "epoch": 0.533197139938713, "grad_norm": 1.6046870946884155, "learning_rate": 1e-06, "loss": 0.0159, "step": 3132 }, { "epoch": 0.5333673816819884, "grad_norm": 1.2018859386444092, "learning_rate": 1e-06, "loss": 0.01, "step": 3133 }, { "epoch": 0.5335376234252639, "grad_norm": 1.0841635465621948, "learning_rate": 1e-06, "loss": 0.0111, "step": 3134 }, { "epoch": 0.5337078651685393, "grad_norm": 1.3309770822525024, "learning_rate": 1e-06, "loss": 0.0162, "step": 3135 }, { "epoch": 0.5338781069118148, "grad_norm": 1.043618083000183, "learning_rate": 1e-06, "loss": 0.0085, "step": 3136 }, { "epoch": 0.5340483486550902, "grad_norm": 1.2823837995529175, "learning_rate": 1e-06, "loss": 0.0108, "step": 3137 }, { "epoch": 0.5342185903983657, "grad_norm": 1.3309781551361084, "learning_rate": 1e-06, "loss": 0.011, "step": 3138 }, { "epoch": 0.5343888321416411, "grad_norm": 1.053794264793396, "learning_rate": 1e-06, "loss": 0.0119, "step": 3139 }, { "epoch": 0.5345590738849166, "grad_norm": 1.136751413345337, "learning_rate": 1e-06, "loss": 0.0087, "step": 3140 }, { "epoch": 0.534729315628192, "grad_norm": 1.0801527500152588, "learning_rate": 1e-06, "loss": 0.0071, "step": 3141 }, { "epoch": 0.5348995573714674, "grad_norm": 1.2816129922866821, "learning_rate": 1e-06, "loss": 0.0147, "step": 3142 }, { "epoch": 0.535069799114743, "grad_norm": 1.2644041776657104, "learning_rate": 1e-06, "loss": 0.0076, "step": 3143 }, { "epoch": 0.5352400408580184, "grad_norm": 1.4873162508010864, "learning_rate": 1e-06, "loss": 0.0158, "step": 3144 }, { "epoch": 0.5354102826012939, "grad_norm": 1.1576414108276367, "learning_rate": 1e-06, "loss": 0.0135, "step": 3145 }, { "epoch": 0.5355805243445693, "grad_norm": 1.1078191995620728, "learning_rate": 1e-06, "loss": 0.0112, "step": 3146 }, { "epoch": 0.5357507660878448, "grad_norm": 1.2426562309265137, "learning_rate": 1e-06, "loss": 0.0109, "step": 3147 }, { "epoch": 0.5359210078311202, "grad_norm": 1.977630615234375, "learning_rate": 1e-06, "loss": 0.0157, "step": 3148 }, { "epoch": 0.5360912495743957, "grad_norm": 1.5125465393066406, "learning_rate": 1e-06, "loss": 0.0126, "step": 3149 }, { "epoch": 0.5362614913176711, "grad_norm": 1.4426789283752441, "learning_rate": 1e-06, "loss": 0.0115, "step": 3150 }, { "epoch": 0.5364317330609465, "grad_norm": 1.0922977924346924, "learning_rate": 1e-06, "loss": 0.0116, "step": 3151 }, { "epoch": 0.536601974804222, "grad_norm": 1.118429183959961, "learning_rate": 1e-06, "loss": 0.0098, "step": 3152 }, { "epoch": 0.5367722165474974, "grad_norm": 1.0450609922409058, "learning_rate": 1e-06, "loss": 0.009, "step": 3153 }, { "epoch": 0.5369424582907729, "grad_norm": 1.1401301622390747, "learning_rate": 1e-06, "loss": 0.0093, "step": 3154 }, { "epoch": 0.5371127000340483, "grad_norm": 0.9356278777122498, "learning_rate": 1e-06, "loss": 0.0086, "step": 3155 }, { "epoch": 0.5372829417773238, "grad_norm": 0.9514819979667664, "learning_rate": 1e-06, "loss": 0.0097, "step": 3156 }, { "epoch": 0.5374531835205992, "grad_norm": 1.2086161375045776, "learning_rate": 1e-06, "loss": 0.0111, "step": 3157 }, { "epoch": 0.5376234252638747, "grad_norm": 1.0937926769256592, "learning_rate": 1e-06, "loss": 0.0095, "step": 3158 }, { "epoch": 0.5377936670071501, "grad_norm": 1.2354090213775635, "learning_rate": 1e-06, "loss": 0.0121, "step": 3159 }, { "epoch": 0.5379639087504257, "grad_norm": 1.7234115600585938, "learning_rate": 1e-06, "loss": 0.0144, "step": 3160 }, { "epoch": 0.5381341504937011, "grad_norm": 1.390219807624817, "learning_rate": 1e-06, "loss": 0.0095, "step": 3161 }, { "epoch": 0.5383043922369765, "grad_norm": 1.2615468502044678, "learning_rate": 1e-06, "loss": 0.0102, "step": 3162 }, { "epoch": 0.538474633980252, "grad_norm": 1.4214125871658325, "learning_rate": 1e-06, "loss": 0.0086, "step": 3163 }, { "epoch": 0.5386448757235274, "grad_norm": 1.3602176904678345, "learning_rate": 1e-06, "loss": 0.0139, "step": 3164 }, { "epoch": 0.5388151174668029, "grad_norm": 1.4177559614181519, "learning_rate": 1e-06, "loss": 0.0143, "step": 3165 }, { "epoch": 0.5389853592100783, "grad_norm": 0.868809163570404, "learning_rate": 1e-06, "loss": 0.0094, "step": 3166 }, { "epoch": 0.5391556009533538, "grad_norm": 1.1446378231048584, "learning_rate": 1e-06, "loss": 0.0098, "step": 3167 }, { "epoch": 0.5393258426966292, "grad_norm": 1.3364777565002441, "learning_rate": 1e-06, "loss": 0.0112, "step": 3168 }, { "epoch": 0.5394960844399047, "grad_norm": 1.2700159549713135, "learning_rate": 1e-06, "loss": 0.0101, "step": 3169 }, { "epoch": 0.5396663261831801, "grad_norm": 1.1539145708084106, "learning_rate": 1e-06, "loss": 0.0092, "step": 3170 }, { "epoch": 0.5398365679264555, "grad_norm": 1.2744473218917847, "learning_rate": 1e-06, "loss": 0.0146, "step": 3171 }, { "epoch": 0.540006809669731, "grad_norm": 1.206419587135315, "learning_rate": 1e-06, "loss": 0.0127, "step": 3172 }, { "epoch": 0.5401770514130064, "grad_norm": 1.7593352794647217, "learning_rate": 1e-06, "loss": 0.0106, "step": 3173 }, { "epoch": 0.5403472931562819, "grad_norm": 1.1174089908599854, "learning_rate": 1e-06, "loss": 0.013, "step": 3174 }, { "epoch": 0.5405175348995573, "grad_norm": 1.2210553884506226, "learning_rate": 1e-06, "loss": 0.012, "step": 3175 }, { "epoch": 0.5406877766428329, "grad_norm": 1.1429721117019653, "learning_rate": 1e-06, "loss": 0.0091, "step": 3176 }, { "epoch": 0.5408580183861083, "grad_norm": 1.086243748664856, "learning_rate": 1e-06, "loss": 0.0079, "step": 3177 }, { "epoch": 0.5410282601293838, "grad_norm": 1.1069355010986328, "learning_rate": 1e-06, "loss": 0.0148, "step": 3178 }, { "epoch": 0.5411985018726592, "grad_norm": 1.1497927904129028, "learning_rate": 1e-06, "loss": 0.0127, "step": 3179 }, { "epoch": 0.5413687436159347, "grad_norm": 1.2102113962173462, "learning_rate": 1e-06, "loss": 0.0113, "step": 3180 }, { "epoch": 0.5415389853592101, "grad_norm": 2.604693651199341, "learning_rate": 1e-06, "loss": 0.0258, "step": 3181 }, { "epoch": 0.5417092271024855, "grad_norm": 1.2018678188323975, "learning_rate": 1e-06, "loss": 0.0117, "step": 3182 }, { "epoch": 0.541879468845761, "grad_norm": 1.0666275024414062, "learning_rate": 1e-06, "loss": 0.0081, "step": 3183 }, { "epoch": 0.5420497105890364, "grad_norm": 1.0146986246109009, "learning_rate": 1e-06, "loss": 0.0117, "step": 3184 }, { "epoch": 0.5422199523323119, "grad_norm": 1.2989884614944458, "learning_rate": 1e-06, "loss": 0.0113, "step": 3185 }, { "epoch": 0.5423901940755873, "grad_norm": 1.071435570716858, "learning_rate": 1e-06, "loss": 0.0133, "step": 3186 }, { "epoch": 0.5425604358188628, "grad_norm": 1.090047836303711, "learning_rate": 1e-06, "loss": 0.0087, "step": 3187 }, { "epoch": 0.5427306775621382, "grad_norm": 1.1832104921340942, "learning_rate": 1e-06, "loss": 0.0131, "step": 3188 }, { "epoch": 0.5429009193054137, "grad_norm": 1.1112650632858276, "learning_rate": 1e-06, "loss": 0.0112, "step": 3189 }, { "epoch": 0.5430711610486891, "grad_norm": 1.0457887649536133, "learning_rate": 1e-06, "loss": 0.0158, "step": 3190 }, { "epoch": 0.5432414027919646, "grad_norm": 1.2125804424285889, "learning_rate": 1e-06, "loss": 0.0165, "step": 3191 }, { "epoch": 0.54341164453524, "grad_norm": 1.0135105848312378, "learning_rate": 1e-06, "loss": 0.0115, "step": 3192 }, { "epoch": 0.5435818862785154, "grad_norm": 1.0347403287887573, "learning_rate": 1e-06, "loss": 0.0094, "step": 3193 }, { "epoch": 0.543752128021791, "grad_norm": 1.2791105508804321, "learning_rate": 1e-06, "loss": 0.0093, "step": 3194 }, { "epoch": 0.5439223697650664, "grad_norm": 1.1392316818237305, "learning_rate": 1e-06, "loss": 0.0121, "step": 3195 }, { "epoch": 0.5440926115083419, "grad_norm": 1.273457646369934, "learning_rate": 1e-06, "loss": 0.0074, "step": 3196 }, { "epoch": 0.5442628532516173, "grad_norm": 1.5065120458602905, "learning_rate": 1e-06, "loss": 0.0144, "step": 3197 }, { "epoch": 0.5444330949948928, "grad_norm": 1.0680369138717651, "learning_rate": 1e-06, "loss": 0.0082, "step": 3198 }, { "epoch": 0.5446033367381682, "grad_norm": 1.3402657508850098, "learning_rate": 1e-06, "loss": 0.0111, "step": 3199 }, { "epoch": 0.5447735784814437, "grad_norm": 1.0954920053482056, "learning_rate": 1e-06, "loss": 0.0108, "step": 3200 }, { "epoch": 0.5449438202247191, "grad_norm": 1.4809321165084839, "learning_rate": 1e-06, "loss": 0.0127, "step": 3201 }, { "epoch": 0.5451140619679945, "grad_norm": 1.1664936542510986, "learning_rate": 1e-06, "loss": 0.0101, "step": 3202 }, { "epoch": 0.54528430371127, "grad_norm": 1.4334688186645508, "learning_rate": 1e-06, "loss": 0.0119, "step": 3203 }, { "epoch": 0.5454545454545454, "grad_norm": 1.2824522256851196, "learning_rate": 1e-06, "loss": 0.0103, "step": 3204 }, { "epoch": 0.5456247871978209, "grad_norm": 1.2238925695419312, "learning_rate": 1e-06, "loss": 0.0101, "step": 3205 }, { "epoch": 0.5457950289410963, "grad_norm": 1.2942602634429932, "learning_rate": 1e-06, "loss": 0.0142, "step": 3206 }, { "epoch": 0.5459652706843718, "grad_norm": 0.9721873998641968, "learning_rate": 1e-06, "loss": 0.0082, "step": 3207 }, { "epoch": 0.5461355124276472, "grad_norm": 1.3409883975982666, "learning_rate": 1e-06, "loss": 0.0122, "step": 3208 }, { "epoch": 0.5463057541709228, "grad_norm": 1.283263087272644, "learning_rate": 1e-06, "loss": 0.0108, "step": 3209 }, { "epoch": 0.5464759959141982, "grad_norm": 0.9081169366836548, "learning_rate": 1e-06, "loss": 0.0071, "step": 3210 }, { "epoch": 0.5466462376574737, "grad_norm": 1.0881484746932983, "learning_rate": 1e-06, "loss": 0.008, "step": 3211 }, { "epoch": 0.5468164794007491, "grad_norm": 1.2378934621810913, "learning_rate": 1e-06, "loss": 0.0078, "step": 3212 }, { "epoch": 0.5469867211440245, "grad_norm": 1.277190089225769, "learning_rate": 1e-06, "loss": 0.0152, "step": 3213 }, { "epoch": 0.5471569628873, "grad_norm": 1.6451539993286133, "learning_rate": 1e-06, "loss": 0.017, "step": 3214 }, { "epoch": 0.5473272046305754, "grad_norm": 1.5039424896240234, "learning_rate": 1e-06, "loss": 0.0096, "step": 3215 }, { "epoch": 0.5474974463738509, "grad_norm": 0.7711275219917297, "learning_rate": 1e-06, "loss": 0.007, "step": 3216 }, { "epoch": 0.5476676881171263, "grad_norm": 1.2757947444915771, "learning_rate": 1e-06, "loss": 0.0137, "step": 3217 }, { "epoch": 0.5478379298604018, "grad_norm": 1.2242053747177124, "learning_rate": 1e-06, "loss": 0.0102, "step": 3218 }, { "epoch": 0.5480081716036772, "grad_norm": 1.3338735103607178, "learning_rate": 1e-06, "loss": 0.01, "step": 3219 }, { "epoch": 0.5481784133469527, "grad_norm": 0.9244385361671448, "learning_rate": 1e-06, "loss": 0.0084, "step": 3220 }, { "epoch": 0.5483486550902281, "grad_norm": 1.0538643598556519, "learning_rate": 1e-06, "loss": 0.0088, "step": 3221 }, { "epoch": 0.5485188968335035, "grad_norm": 1.0866847038269043, "learning_rate": 1e-06, "loss": 0.0086, "step": 3222 }, { "epoch": 0.548689138576779, "grad_norm": 0.8596549034118652, "learning_rate": 1e-06, "loss": 0.0083, "step": 3223 }, { "epoch": 0.5488593803200544, "grad_norm": 1.1239334344863892, "learning_rate": 1e-06, "loss": 0.0101, "step": 3224 }, { "epoch": 0.54902962206333, "grad_norm": 1.1236169338226318, "learning_rate": 1e-06, "loss": 0.0094, "step": 3225 }, { "epoch": 0.5491998638066053, "grad_norm": 1.6141034364700317, "learning_rate": 1e-06, "loss": 0.0139, "step": 3226 }, { "epoch": 0.5493701055498809, "grad_norm": 1.1685870885849, "learning_rate": 1e-06, "loss": 0.0082, "step": 3227 }, { "epoch": 0.5495403472931563, "grad_norm": 1.2277820110321045, "learning_rate": 1e-06, "loss": 0.0146, "step": 3228 }, { "epoch": 0.5497105890364318, "grad_norm": 1.1030385494232178, "learning_rate": 1e-06, "loss": 0.0116, "step": 3229 }, { "epoch": 0.5498808307797072, "grad_norm": 1.246597409248352, "learning_rate": 1e-06, "loss": 0.0091, "step": 3230 }, { "epoch": 0.5500510725229827, "grad_norm": 1.3571773767471313, "learning_rate": 1e-06, "loss": 0.0101, "step": 3231 }, { "epoch": 0.5502213142662581, "grad_norm": 1.283843755722046, "learning_rate": 1e-06, "loss": 0.011, "step": 3232 }, { "epoch": 0.5503915560095335, "grad_norm": 1.2857335805892944, "learning_rate": 1e-06, "loss": 0.0113, "step": 3233 }, { "epoch": 0.550561797752809, "grad_norm": 1.0135763883590698, "learning_rate": 1e-06, "loss": 0.0102, "step": 3234 }, { "epoch": 0.5507320394960844, "grad_norm": 1.3282479047775269, "learning_rate": 1e-06, "loss": 0.0117, "step": 3235 }, { "epoch": 0.5509022812393599, "grad_norm": 1.251345157623291, "learning_rate": 1e-06, "loss": 0.0118, "step": 3236 }, { "epoch": 0.5510725229826353, "grad_norm": 1.0486385822296143, "learning_rate": 1e-06, "loss": 0.0125, "step": 3237 }, { "epoch": 0.5512427647259108, "grad_norm": 0.894183874130249, "learning_rate": 1e-06, "loss": 0.007, "step": 3238 }, { "epoch": 0.5514130064691862, "grad_norm": 1.545824646949768, "learning_rate": 1e-06, "loss": 0.0141, "step": 3239 }, { "epoch": 0.5515832482124617, "grad_norm": 1.3999972343444824, "learning_rate": 1e-06, "loss": 0.0137, "step": 3240 }, { "epoch": 0.5517534899557371, "grad_norm": 1.1850388050079346, "learning_rate": 1e-06, "loss": 0.0081, "step": 3241 }, { "epoch": 0.5519237316990125, "grad_norm": 1.2595492601394653, "learning_rate": 1e-06, "loss": 0.0143, "step": 3242 }, { "epoch": 0.552093973442288, "grad_norm": 1.0089160203933716, "learning_rate": 1e-06, "loss": 0.008, "step": 3243 }, { "epoch": 0.5522642151855635, "grad_norm": 2.293635845184326, "learning_rate": 1e-06, "loss": 0.0285, "step": 3244 }, { "epoch": 0.552434456928839, "grad_norm": 1.250726342201233, "learning_rate": 1e-06, "loss": 0.0168, "step": 3245 }, { "epoch": 0.5526046986721144, "grad_norm": 1.1201728582382202, "learning_rate": 1e-06, "loss": 0.0085, "step": 3246 }, { "epoch": 0.5527749404153899, "grad_norm": 1.013512134552002, "learning_rate": 1e-06, "loss": 0.0078, "step": 3247 }, { "epoch": 0.5529451821586653, "grad_norm": 1.111093282699585, "learning_rate": 1e-06, "loss": 0.0113, "step": 3248 }, { "epoch": 0.5531154239019408, "grad_norm": 0.9368535876274109, "learning_rate": 1e-06, "loss": 0.0149, "step": 3249 }, { "epoch": 0.5532856656452162, "grad_norm": 1.0745536088943481, "learning_rate": 1e-06, "loss": 0.0134, "step": 3250 }, { "epoch": 0.5534559073884917, "grad_norm": 1.2841395139694214, "learning_rate": 1e-06, "loss": 0.0088, "step": 3251 }, { "epoch": 0.5536261491317671, "grad_norm": 1.3827086687088013, "learning_rate": 1e-06, "loss": 0.0094, "step": 3252 }, { "epoch": 0.5537963908750425, "grad_norm": 1.475622534751892, "learning_rate": 1e-06, "loss": 0.0131, "step": 3253 }, { "epoch": 0.553966632618318, "grad_norm": 0.9399949312210083, "learning_rate": 1e-06, "loss": 0.0104, "step": 3254 }, { "epoch": 0.5541368743615934, "grad_norm": 1.1346850395202637, "learning_rate": 1e-06, "loss": 0.0087, "step": 3255 }, { "epoch": 0.5543071161048689, "grad_norm": 1.2634351253509521, "learning_rate": 1e-06, "loss": 0.0105, "step": 3256 }, { "epoch": 0.5544773578481443, "grad_norm": 0.9699029326438904, "learning_rate": 1e-06, "loss": 0.0081, "step": 3257 }, { "epoch": 0.5546475995914198, "grad_norm": 1.6691503524780273, "learning_rate": 1e-06, "loss": 0.0188, "step": 3258 }, { "epoch": 0.5548178413346952, "grad_norm": 1.39431893825531, "learning_rate": 1e-06, "loss": 0.0139, "step": 3259 }, { "epoch": 0.5549880830779708, "grad_norm": 1.5263134241104126, "learning_rate": 1e-06, "loss": 0.0186, "step": 3260 }, { "epoch": 0.5551583248212462, "grad_norm": 1.1584216356277466, "learning_rate": 1e-06, "loss": 0.0164, "step": 3261 }, { "epoch": 0.5553285665645217, "grad_norm": 1.0202586650848389, "learning_rate": 1e-06, "loss": 0.0107, "step": 3262 }, { "epoch": 0.5554988083077971, "grad_norm": 1.393701434135437, "learning_rate": 1e-06, "loss": 0.0097, "step": 3263 }, { "epoch": 0.5556690500510725, "grad_norm": 1.0818877220153809, "learning_rate": 1e-06, "loss": 0.0102, "step": 3264 }, { "epoch": 0.555839291794348, "grad_norm": 1.3524203300476074, "learning_rate": 1e-06, "loss": 0.0103, "step": 3265 }, { "epoch": 0.5560095335376234, "grad_norm": 1.1028423309326172, "learning_rate": 1e-06, "loss": 0.008, "step": 3266 }, { "epoch": 0.5561797752808989, "grad_norm": 1.4970299005508423, "learning_rate": 1e-06, "loss": 0.0173, "step": 3267 }, { "epoch": 0.5563500170241743, "grad_norm": 1.0553059577941895, "learning_rate": 1e-06, "loss": 0.0178, "step": 3268 }, { "epoch": 0.5565202587674498, "grad_norm": 1.0315494537353516, "learning_rate": 1e-06, "loss": 0.0082, "step": 3269 }, { "epoch": 0.5566905005107252, "grad_norm": 1.201059341430664, "learning_rate": 1e-06, "loss": 0.0134, "step": 3270 }, { "epoch": 0.5568607422540007, "grad_norm": 1.472936987876892, "learning_rate": 1e-06, "loss": 0.0179, "step": 3271 }, { "epoch": 0.5570309839972761, "grad_norm": 1.2015950679779053, "learning_rate": 1e-06, "loss": 0.0102, "step": 3272 }, { "epoch": 0.5572012257405515, "grad_norm": 1.715442180633545, "learning_rate": 1e-06, "loss": 0.0126, "step": 3273 }, { "epoch": 0.557371467483827, "grad_norm": 0.8229348659515381, "learning_rate": 1e-06, "loss": 0.0067, "step": 3274 }, { "epoch": 0.5575417092271024, "grad_norm": 1.4160443544387817, "learning_rate": 1e-06, "loss": 0.0123, "step": 3275 }, { "epoch": 0.557711950970378, "grad_norm": 1.3626257181167603, "learning_rate": 1e-06, "loss": 0.0116, "step": 3276 }, { "epoch": 0.5578821927136534, "grad_norm": 0.9082250595092773, "learning_rate": 1e-06, "loss": 0.0081, "step": 3277 }, { "epoch": 0.5580524344569289, "grad_norm": 1.0986019372940063, "learning_rate": 1e-06, "loss": 0.0139, "step": 3278 }, { "epoch": 0.5582226762002043, "grad_norm": 1.828899621963501, "learning_rate": 1e-06, "loss": 0.0307, "step": 3279 }, { "epoch": 0.5583929179434798, "grad_norm": 0.9443734288215637, "learning_rate": 1e-06, "loss": 0.0076, "step": 3280 }, { "epoch": 0.5585631596867552, "grad_norm": 0.9179772138595581, "learning_rate": 1e-06, "loss": 0.0095, "step": 3281 }, { "epoch": 0.5587334014300307, "grad_norm": 1.0771028995513916, "learning_rate": 1e-06, "loss": 0.0071, "step": 3282 }, { "epoch": 0.5589036431733061, "grad_norm": 1.1734918355941772, "learning_rate": 1e-06, "loss": 0.0092, "step": 3283 }, { "epoch": 0.5590738849165815, "grad_norm": 1.24713134765625, "learning_rate": 1e-06, "loss": 0.0139, "step": 3284 }, { "epoch": 0.559244126659857, "grad_norm": 1.0508451461791992, "learning_rate": 1e-06, "loss": 0.0083, "step": 3285 }, { "epoch": 0.5594143684031324, "grad_norm": 1.0896153450012207, "learning_rate": 1e-06, "loss": 0.011, "step": 3286 }, { "epoch": 0.5595846101464079, "grad_norm": 1.1460729837417603, "learning_rate": 1e-06, "loss": 0.0112, "step": 3287 }, { "epoch": 0.5597548518896833, "grad_norm": 1.5633426904678345, "learning_rate": 1e-06, "loss": 0.0148, "step": 3288 }, { "epoch": 0.5599250936329588, "grad_norm": 1.5234661102294922, "learning_rate": 1e-06, "loss": 0.0132, "step": 3289 }, { "epoch": 0.5600953353762342, "grad_norm": 0.8329904079437256, "learning_rate": 1e-06, "loss": 0.0059, "step": 3290 }, { "epoch": 0.5602655771195098, "grad_norm": 1.244648814201355, "learning_rate": 1e-06, "loss": 0.0106, "step": 3291 }, { "epoch": 0.5604358188627852, "grad_norm": 1.2756433486938477, "learning_rate": 1e-06, "loss": 0.0113, "step": 3292 }, { "epoch": 0.5606060606060606, "grad_norm": 0.9432902932167053, "learning_rate": 1e-06, "loss": 0.0078, "step": 3293 }, { "epoch": 0.5607763023493361, "grad_norm": 1.3495780229568481, "learning_rate": 1e-06, "loss": 0.0122, "step": 3294 }, { "epoch": 0.5609465440926115, "grad_norm": 1.3346593379974365, "learning_rate": 1e-06, "loss": 0.0121, "step": 3295 }, { "epoch": 0.561116785835887, "grad_norm": 1.12068510055542, "learning_rate": 1e-06, "loss": 0.0087, "step": 3296 }, { "epoch": 0.5612870275791624, "grad_norm": 1.0519905090332031, "learning_rate": 1e-06, "loss": 0.009, "step": 3297 }, { "epoch": 0.5614572693224379, "grad_norm": 1.1385189294815063, "learning_rate": 1e-06, "loss": 0.0111, "step": 3298 }, { "epoch": 0.5616275110657133, "grad_norm": 1.3988077640533447, "learning_rate": 1e-06, "loss": 0.0183, "step": 3299 }, { "epoch": 0.5617977528089888, "grad_norm": 1.1090185642242432, "learning_rate": 1e-06, "loss": 0.0072, "step": 3300 }, { "epoch": 0.5619679945522642, "grad_norm": 1.2562520503997803, "learning_rate": 1e-06, "loss": 0.0116, "step": 3301 }, { "epoch": 0.5621382362955397, "grad_norm": 1.1855425834655762, "learning_rate": 1e-06, "loss": 0.0132, "step": 3302 }, { "epoch": 0.5623084780388151, "grad_norm": 1.492267370223999, "learning_rate": 1e-06, "loss": 0.0105, "step": 3303 }, { "epoch": 0.5624787197820905, "grad_norm": 1.1446338891983032, "learning_rate": 1e-06, "loss": 0.0112, "step": 3304 }, { "epoch": 0.562648961525366, "grad_norm": 0.9587407112121582, "learning_rate": 1e-06, "loss": 0.0091, "step": 3305 }, { "epoch": 0.5628192032686414, "grad_norm": 1.0618356466293335, "learning_rate": 1e-06, "loss": 0.0101, "step": 3306 }, { "epoch": 0.562989445011917, "grad_norm": 1.6512682437896729, "learning_rate": 1e-06, "loss": 0.0229, "step": 3307 }, { "epoch": 0.5631596867551923, "grad_norm": 1.2739150524139404, "learning_rate": 1e-06, "loss": 0.0156, "step": 3308 }, { "epoch": 0.5633299284984679, "grad_norm": 1.4853968620300293, "learning_rate": 1e-06, "loss": 0.0197, "step": 3309 }, { "epoch": 0.5635001702417433, "grad_norm": 1.269023060798645, "learning_rate": 1e-06, "loss": 0.0109, "step": 3310 }, { "epoch": 0.5636704119850188, "grad_norm": 1.162980556488037, "learning_rate": 1e-06, "loss": 0.0103, "step": 3311 }, { "epoch": 0.5638406537282942, "grad_norm": 2.1068434715270996, "learning_rate": 1e-06, "loss": 0.0151, "step": 3312 }, { "epoch": 0.5640108954715697, "grad_norm": 1.2134860754013062, "learning_rate": 1e-06, "loss": 0.0142, "step": 3313 }, { "epoch": 0.5641811372148451, "grad_norm": 1.2850688695907593, "learning_rate": 1e-06, "loss": 0.015, "step": 3314 }, { "epoch": 0.5643513789581205, "grad_norm": 1.0942927598953247, "learning_rate": 1e-06, "loss": 0.0068, "step": 3315 }, { "epoch": 0.564521620701396, "grad_norm": 1.20088791847229, "learning_rate": 1e-06, "loss": 0.0092, "step": 3316 }, { "epoch": 0.5646918624446714, "grad_norm": 1.0646437406539917, "learning_rate": 1e-06, "loss": 0.0091, "step": 3317 }, { "epoch": 0.5648621041879469, "grad_norm": 0.8980295062065125, "learning_rate": 1e-06, "loss": 0.0075, "step": 3318 }, { "epoch": 0.5650323459312223, "grad_norm": 1.4501545429229736, "learning_rate": 1e-06, "loss": 0.0101, "step": 3319 }, { "epoch": 0.5652025876744978, "grad_norm": 1.4403294324874878, "learning_rate": 1e-06, "loss": 0.0204, "step": 3320 }, { "epoch": 0.5653728294177732, "grad_norm": 1.6485029458999634, "learning_rate": 1e-06, "loss": 0.0119, "step": 3321 }, { "epoch": 0.5655430711610487, "grad_norm": 1.0820796489715576, "learning_rate": 1e-06, "loss": 0.0068, "step": 3322 }, { "epoch": 0.5657133129043241, "grad_norm": 1.0240590572357178, "learning_rate": 1e-06, "loss": 0.0135, "step": 3323 }, { "epoch": 0.5658835546475995, "grad_norm": 1.1677789688110352, "learning_rate": 1e-06, "loss": 0.0088, "step": 3324 }, { "epoch": 0.566053796390875, "grad_norm": 1.3193532228469849, "learning_rate": 1e-06, "loss": 0.0113, "step": 3325 }, { "epoch": 0.5662240381341505, "grad_norm": 0.9879570007324219, "learning_rate": 1e-06, "loss": 0.009, "step": 3326 }, { "epoch": 0.566394279877426, "grad_norm": 1.6975972652435303, "learning_rate": 1e-06, "loss": 0.0098, "step": 3327 }, { "epoch": 0.5665645216207014, "grad_norm": 1.4392386674880981, "learning_rate": 1e-06, "loss": 0.0126, "step": 3328 }, { "epoch": 0.5667347633639769, "grad_norm": 1.341951847076416, "learning_rate": 1e-06, "loss": 0.0139, "step": 3329 }, { "epoch": 0.5669050051072523, "grad_norm": 1.0540547370910645, "learning_rate": 1e-06, "loss": 0.0086, "step": 3330 }, { "epoch": 0.5670752468505278, "grad_norm": 1.0311988592147827, "learning_rate": 1e-06, "loss": 0.0147, "step": 3331 }, { "epoch": 0.5672454885938032, "grad_norm": 1.1205745935440063, "learning_rate": 1e-06, "loss": 0.0094, "step": 3332 }, { "epoch": 0.5674157303370787, "grad_norm": 0.9916560649871826, "learning_rate": 1e-06, "loss": 0.01, "step": 3333 }, { "epoch": 0.5675859720803541, "grad_norm": 1.0641469955444336, "learning_rate": 1e-06, "loss": 0.0116, "step": 3334 }, { "epoch": 0.5677562138236295, "grad_norm": 1.4012588262557983, "learning_rate": 1e-06, "loss": 0.0185, "step": 3335 }, { "epoch": 0.567926455566905, "grad_norm": 1.0425010919570923, "learning_rate": 1e-06, "loss": 0.0109, "step": 3336 }, { "epoch": 0.5680966973101804, "grad_norm": 1.2166441679000854, "learning_rate": 1e-06, "loss": 0.0123, "step": 3337 }, { "epoch": 0.5682669390534559, "grad_norm": 1.4279252290725708, "learning_rate": 1e-06, "loss": 0.0118, "step": 3338 }, { "epoch": 0.5684371807967313, "grad_norm": 1.7417263984680176, "learning_rate": 1e-06, "loss": 0.0167, "step": 3339 }, { "epoch": 0.5686074225400068, "grad_norm": 1.190510869026184, "learning_rate": 1e-06, "loss": 0.0112, "step": 3340 }, { "epoch": 0.5687776642832822, "grad_norm": 1.2967720031738281, "learning_rate": 1e-06, "loss": 0.0087, "step": 3341 }, { "epoch": 0.5689479060265578, "grad_norm": 1.2348685264587402, "learning_rate": 1e-06, "loss": 0.0084, "step": 3342 }, { "epoch": 0.5691181477698332, "grad_norm": 1.24281907081604, "learning_rate": 1e-06, "loss": 0.0116, "step": 3343 }, { "epoch": 0.5692883895131086, "grad_norm": 1.3598650693893433, "learning_rate": 1e-06, "loss": 0.0173, "step": 3344 }, { "epoch": 0.5694586312563841, "grad_norm": 1.173242211341858, "learning_rate": 1e-06, "loss": 0.0103, "step": 3345 }, { "epoch": 0.5696288729996595, "grad_norm": 1.2969194650650024, "learning_rate": 1e-06, "loss": 0.0114, "step": 3346 }, { "epoch": 0.569799114742935, "grad_norm": 0.953393280506134, "learning_rate": 1e-06, "loss": 0.0078, "step": 3347 }, { "epoch": 0.5699693564862104, "grad_norm": 1.2837368249893188, "learning_rate": 1e-06, "loss": 0.0075, "step": 3348 }, { "epoch": 0.5701395982294859, "grad_norm": 0.9645003080368042, "learning_rate": 1e-06, "loss": 0.0086, "step": 3349 }, { "epoch": 0.5703098399727613, "grad_norm": 0.9485358595848083, "learning_rate": 1e-06, "loss": 0.0088, "step": 3350 }, { "epoch": 0.5704800817160368, "grad_norm": 1.000497817993164, "learning_rate": 1e-06, "loss": 0.0109, "step": 3351 }, { "epoch": 0.5706503234593122, "grad_norm": 1.262580394744873, "learning_rate": 1e-06, "loss": 0.0091, "step": 3352 }, { "epoch": 0.5708205652025877, "grad_norm": 1.2770668268203735, "learning_rate": 1e-06, "loss": 0.0116, "step": 3353 }, { "epoch": 0.5709908069458631, "grad_norm": 0.9978349804878235, "learning_rate": 1e-06, "loss": 0.0078, "step": 3354 }, { "epoch": 0.5711610486891385, "grad_norm": 1.1348481178283691, "learning_rate": 1e-06, "loss": 0.0114, "step": 3355 }, { "epoch": 0.571331290432414, "grad_norm": 1.1138725280761719, "learning_rate": 1e-06, "loss": 0.0077, "step": 3356 }, { "epoch": 0.5715015321756894, "grad_norm": 1.243876576423645, "learning_rate": 1e-06, "loss": 0.0103, "step": 3357 }, { "epoch": 0.571671773918965, "grad_norm": 2.1010210514068604, "learning_rate": 1e-06, "loss": 0.0229, "step": 3358 }, { "epoch": 0.5718420156622404, "grad_norm": 1.5689268112182617, "learning_rate": 1e-06, "loss": 0.0199, "step": 3359 }, { "epoch": 0.5720122574055159, "grad_norm": 1.4927451610565186, "learning_rate": 1e-06, "loss": 0.0145, "step": 3360 }, { "epoch": 0.5721824991487913, "grad_norm": 1.2088425159454346, "learning_rate": 1e-06, "loss": 0.0084, "step": 3361 }, { "epoch": 0.5723527408920668, "grad_norm": 3.114051580429077, "learning_rate": 1e-06, "loss": 0.0122, "step": 3362 }, { "epoch": 0.5725229826353422, "grad_norm": 0.9580630660057068, "learning_rate": 1e-06, "loss": 0.0076, "step": 3363 }, { "epoch": 0.5726932243786176, "grad_norm": 0.9830418229103088, "learning_rate": 1e-06, "loss": 0.0097, "step": 3364 }, { "epoch": 0.5728634661218931, "grad_norm": 1.2389250993728638, "learning_rate": 1e-06, "loss": 0.0099, "step": 3365 }, { "epoch": 0.5730337078651685, "grad_norm": 1.159663438796997, "learning_rate": 1e-06, "loss": 0.0141, "step": 3366 }, { "epoch": 0.573203949608444, "grad_norm": 1.5765507221221924, "learning_rate": 1e-06, "loss": 0.0123, "step": 3367 }, { "epoch": 0.5733741913517194, "grad_norm": 1.1403703689575195, "learning_rate": 1e-06, "loss": 0.0091, "step": 3368 }, { "epoch": 0.5735444330949949, "grad_norm": 1.376774787902832, "learning_rate": 1e-06, "loss": 0.0111, "step": 3369 }, { "epoch": 0.5737146748382703, "grad_norm": 1.386721134185791, "learning_rate": 1e-06, "loss": 0.0139, "step": 3370 }, { "epoch": 0.5738849165815458, "grad_norm": 1.6140387058258057, "learning_rate": 1e-06, "loss": 0.014, "step": 3371 }, { "epoch": 0.5740551583248212, "grad_norm": 0.9297884702682495, "learning_rate": 1e-06, "loss": 0.0089, "step": 3372 }, { "epoch": 0.5742254000680967, "grad_norm": 1.196844220161438, "learning_rate": 1e-06, "loss": 0.0103, "step": 3373 }, { "epoch": 0.5743956418113721, "grad_norm": 0.999453604221344, "learning_rate": 1e-06, "loss": 0.0097, "step": 3374 }, { "epoch": 0.5745658835546475, "grad_norm": 1.378943920135498, "learning_rate": 1e-06, "loss": 0.0135, "step": 3375 }, { "epoch": 0.5747361252979231, "grad_norm": 1.1324682235717773, "learning_rate": 1e-06, "loss": 0.0102, "step": 3376 }, { "epoch": 0.5749063670411985, "grad_norm": 1.0931276082992554, "learning_rate": 1e-06, "loss": 0.0078, "step": 3377 }, { "epoch": 0.575076608784474, "grad_norm": 1.0368367433547974, "learning_rate": 1e-06, "loss": 0.0073, "step": 3378 }, { "epoch": 0.5752468505277494, "grad_norm": 1.0412521362304688, "learning_rate": 1e-06, "loss": 0.0096, "step": 3379 }, { "epoch": 0.5754170922710249, "grad_norm": 1.5202747583389282, "learning_rate": 1e-06, "loss": 0.0134, "step": 3380 }, { "epoch": 0.5755873340143003, "grad_norm": 0.9431686401367188, "learning_rate": 1e-06, "loss": 0.0062, "step": 3381 }, { "epoch": 0.5757575757575758, "grad_norm": 1.391595482826233, "learning_rate": 1e-06, "loss": 0.0187, "step": 3382 }, { "epoch": 0.5759278175008512, "grad_norm": 1.1645992994308472, "learning_rate": 1e-06, "loss": 0.0091, "step": 3383 }, { "epoch": 0.5760980592441267, "grad_norm": 1.1957941055297852, "learning_rate": 1e-06, "loss": 0.0107, "step": 3384 }, { "epoch": 0.5762683009874021, "grad_norm": 0.9163592457771301, "learning_rate": 1e-06, "loss": 0.0083, "step": 3385 }, { "epoch": 0.5764385427306775, "grad_norm": 1.4192230701446533, "learning_rate": 1e-06, "loss": 0.0132, "step": 3386 }, { "epoch": 0.576608784473953, "grad_norm": 1.128556728363037, "learning_rate": 1e-06, "loss": 0.0094, "step": 3387 }, { "epoch": 0.5767790262172284, "grad_norm": 1.0899877548217773, "learning_rate": 1e-06, "loss": 0.0099, "step": 3388 }, { "epoch": 0.5769492679605039, "grad_norm": 1.5892912149429321, "learning_rate": 1e-06, "loss": 0.0116, "step": 3389 }, { "epoch": 0.5771195097037793, "grad_norm": 0.9154031276702881, "learning_rate": 1e-06, "loss": 0.0088, "step": 3390 }, { "epoch": 0.5772897514470549, "grad_norm": 1.380571722984314, "learning_rate": 1e-06, "loss": 0.0131, "step": 3391 }, { "epoch": 0.5774599931903303, "grad_norm": 1.0964083671569824, "learning_rate": 1e-06, "loss": 0.0084, "step": 3392 }, { "epoch": 0.5776302349336058, "grad_norm": 1.2165638208389282, "learning_rate": 1e-06, "loss": 0.0109, "step": 3393 }, { "epoch": 0.5778004766768812, "grad_norm": 1.234350323677063, "learning_rate": 1e-06, "loss": 0.0085, "step": 3394 }, { "epoch": 0.5779707184201566, "grad_norm": 1.2220001220703125, "learning_rate": 1e-06, "loss": 0.0143, "step": 3395 }, { "epoch": 0.5781409601634321, "grad_norm": 1.1645594835281372, "learning_rate": 1e-06, "loss": 0.0086, "step": 3396 }, { "epoch": 0.5783112019067075, "grad_norm": 1.3842546939849854, "learning_rate": 1e-06, "loss": 0.0127, "step": 3397 }, { "epoch": 0.578481443649983, "grad_norm": 0.8232072591781616, "learning_rate": 1e-06, "loss": 0.0083, "step": 3398 }, { "epoch": 0.5786516853932584, "grad_norm": 1.1387802362442017, "learning_rate": 1e-06, "loss": 0.0103, "step": 3399 }, { "epoch": 0.5788219271365339, "grad_norm": 1.1812922954559326, "learning_rate": 1e-06, "loss": 0.0097, "step": 3400 }, { "epoch": 0.5789921688798093, "grad_norm": 1.0787347555160522, "learning_rate": 1e-06, "loss": 0.0167, "step": 3401 }, { "epoch": 0.5791624106230848, "grad_norm": 1.2608331441879272, "learning_rate": 1e-06, "loss": 0.0122, "step": 3402 }, { "epoch": 0.5793326523663602, "grad_norm": 1.1550672054290771, "learning_rate": 1e-06, "loss": 0.008, "step": 3403 }, { "epoch": 0.5795028941096357, "grad_norm": 1.1368712186813354, "learning_rate": 1e-06, "loss": 0.0098, "step": 3404 }, { "epoch": 0.5796731358529111, "grad_norm": 1.4843052625656128, "learning_rate": 1e-06, "loss": 0.0103, "step": 3405 }, { "epoch": 0.5798433775961865, "grad_norm": 1.0383031368255615, "learning_rate": 1e-06, "loss": 0.0069, "step": 3406 }, { "epoch": 0.580013619339462, "grad_norm": 1.9053559303283691, "learning_rate": 1e-06, "loss": 0.0148, "step": 3407 }, { "epoch": 0.5801838610827375, "grad_norm": 1.11162531375885, "learning_rate": 1e-06, "loss": 0.0111, "step": 3408 }, { "epoch": 0.580354102826013, "grad_norm": 1.5268166065216064, "learning_rate": 1e-06, "loss": 0.0105, "step": 3409 }, { "epoch": 0.5805243445692884, "grad_norm": 0.8884620070457458, "learning_rate": 1e-06, "loss": 0.0074, "step": 3410 }, { "epoch": 0.5806945863125639, "grad_norm": 1.1013774871826172, "learning_rate": 1e-06, "loss": 0.0109, "step": 3411 }, { "epoch": 0.5808648280558393, "grad_norm": 0.822043776512146, "learning_rate": 1e-06, "loss": 0.0082, "step": 3412 }, { "epoch": 0.5810350697991148, "grad_norm": 0.9275667667388916, "learning_rate": 1e-06, "loss": 0.0093, "step": 3413 }, { "epoch": 0.5812053115423902, "grad_norm": 1.002583384513855, "learning_rate": 1e-06, "loss": 0.0125, "step": 3414 }, { "epoch": 0.5813755532856656, "grad_norm": 0.8230078816413879, "learning_rate": 1e-06, "loss": 0.0078, "step": 3415 }, { "epoch": 0.5815457950289411, "grad_norm": 1.2379169464111328, "learning_rate": 1e-06, "loss": 0.0102, "step": 3416 }, { "epoch": 0.5817160367722165, "grad_norm": 0.9411685466766357, "learning_rate": 1e-06, "loss": 0.0071, "step": 3417 }, { "epoch": 0.581886278515492, "grad_norm": 1.2261695861816406, "learning_rate": 1e-06, "loss": 0.0106, "step": 3418 }, { "epoch": 0.5820565202587674, "grad_norm": 1.1066349744796753, "learning_rate": 1e-06, "loss": 0.0084, "step": 3419 }, { "epoch": 0.5822267620020429, "grad_norm": 1.058130145072937, "learning_rate": 1e-06, "loss": 0.0095, "step": 3420 }, { "epoch": 0.5823970037453183, "grad_norm": 2.5851633548736572, "learning_rate": 1e-06, "loss": 0.0253, "step": 3421 }, { "epoch": 0.5825672454885938, "grad_norm": 1.2730610370635986, "learning_rate": 1e-06, "loss": 0.0077, "step": 3422 }, { "epoch": 0.5827374872318692, "grad_norm": 1.3514959812164307, "learning_rate": 1e-06, "loss": 0.0102, "step": 3423 }, { "epoch": 0.5829077289751448, "grad_norm": 1.4177110195159912, "learning_rate": 1e-06, "loss": 0.0097, "step": 3424 }, { "epoch": 0.5830779707184202, "grad_norm": 1.2021658420562744, "learning_rate": 1e-06, "loss": 0.0118, "step": 3425 }, { "epoch": 0.5832482124616956, "grad_norm": 1.4584933519363403, "learning_rate": 1e-06, "loss": 0.0109, "step": 3426 }, { "epoch": 0.5834184542049711, "grad_norm": 1.345293402671814, "learning_rate": 1e-06, "loss": 0.0128, "step": 3427 }, { "epoch": 0.5835886959482465, "grad_norm": 1.2940164804458618, "learning_rate": 1e-06, "loss": 0.0113, "step": 3428 }, { "epoch": 0.583758937691522, "grad_norm": 1.1534115076065063, "learning_rate": 1e-06, "loss": 0.0109, "step": 3429 }, { "epoch": 0.5839291794347974, "grad_norm": 0.9437724947929382, "learning_rate": 1e-06, "loss": 0.0083, "step": 3430 }, { "epoch": 0.5840994211780729, "grad_norm": 1.2992208003997803, "learning_rate": 1e-06, "loss": 0.0164, "step": 3431 }, { "epoch": 0.5842696629213483, "grad_norm": 0.95462566614151, "learning_rate": 1e-06, "loss": 0.0091, "step": 3432 }, { "epoch": 0.5844399046646238, "grad_norm": 1.0334690809249878, "learning_rate": 1e-06, "loss": 0.0078, "step": 3433 }, { "epoch": 0.5846101464078992, "grad_norm": 1.707252025604248, "learning_rate": 1e-06, "loss": 0.0143, "step": 3434 }, { "epoch": 0.5847803881511746, "grad_norm": 0.9804726839065552, "learning_rate": 1e-06, "loss": 0.0077, "step": 3435 }, { "epoch": 0.5849506298944501, "grad_norm": 1.5533514022827148, "learning_rate": 1e-06, "loss": 0.0143, "step": 3436 }, { "epoch": 0.5851208716377255, "grad_norm": 1.154433012008667, "learning_rate": 1e-06, "loss": 0.0111, "step": 3437 }, { "epoch": 0.585291113381001, "grad_norm": 1.2386829853057861, "learning_rate": 1e-06, "loss": 0.0107, "step": 3438 }, { "epoch": 0.5854613551242764, "grad_norm": 0.9458797574043274, "learning_rate": 1e-06, "loss": 0.0082, "step": 3439 }, { "epoch": 0.585631596867552, "grad_norm": 1.1015353202819824, "learning_rate": 1e-06, "loss": 0.0093, "step": 3440 }, { "epoch": 0.5858018386108274, "grad_norm": 1.14461350440979, "learning_rate": 1e-06, "loss": 0.0105, "step": 3441 }, { "epoch": 0.5859720803541029, "grad_norm": 1.4417644739151, "learning_rate": 1e-06, "loss": 0.0104, "step": 3442 }, { "epoch": 0.5861423220973783, "grad_norm": 1.3879674673080444, "learning_rate": 1e-06, "loss": 0.0098, "step": 3443 }, { "epoch": 0.5863125638406538, "grad_norm": 1.0989384651184082, "learning_rate": 1e-06, "loss": 0.008, "step": 3444 }, { "epoch": 0.5864828055839292, "grad_norm": 1.2886449098587036, "learning_rate": 1e-06, "loss": 0.0084, "step": 3445 }, { "epoch": 0.5866530473272046, "grad_norm": 0.9253196716308594, "learning_rate": 1e-06, "loss": 0.0088, "step": 3446 }, { "epoch": 0.5868232890704801, "grad_norm": 1.2055374383926392, "learning_rate": 1e-06, "loss": 0.01, "step": 3447 }, { "epoch": 0.5869935308137555, "grad_norm": 0.9988220930099487, "learning_rate": 1e-06, "loss": 0.0085, "step": 3448 }, { "epoch": 0.587163772557031, "grad_norm": 0.9941924214363098, "learning_rate": 1e-06, "loss": 0.0072, "step": 3449 }, { "epoch": 0.5873340143003064, "grad_norm": 1.3294003009796143, "learning_rate": 1e-06, "loss": 0.0099, "step": 3450 }, { "epoch": 0.5875042560435819, "grad_norm": 1.015142560005188, "learning_rate": 1e-06, "loss": 0.0106, "step": 3451 }, { "epoch": 0.5876744977868573, "grad_norm": 1.1107553243637085, "learning_rate": 1e-06, "loss": 0.0083, "step": 3452 }, { "epoch": 0.5878447395301328, "grad_norm": 1.1980547904968262, "learning_rate": 1e-06, "loss": 0.0087, "step": 3453 }, { "epoch": 0.5880149812734082, "grad_norm": 1.1775740385055542, "learning_rate": 1e-06, "loss": 0.0095, "step": 3454 }, { "epoch": 0.5881852230166837, "grad_norm": 1.5608429908752441, "learning_rate": 1e-06, "loss": 0.0131, "step": 3455 }, { "epoch": 0.5883554647599591, "grad_norm": 1.3156591653823853, "learning_rate": 1e-06, "loss": 0.011, "step": 3456 }, { "epoch": 0.5885257065032345, "grad_norm": 1.0954924821853638, "learning_rate": 1e-06, "loss": 0.0077, "step": 3457 }, { "epoch": 0.5886959482465101, "grad_norm": 0.908723771572113, "learning_rate": 1e-06, "loss": 0.0067, "step": 3458 }, { "epoch": 0.5888661899897855, "grad_norm": 0.9518954753875732, "learning_rate": 1e-06, "loss": 0.0065, "step": 3459 }, { "epoch": 0.589036431733061, "grad_norm": 1.0838017463684082, "learning_rate": 1e-06, "loss": 0.0116, "step": 3460 }, { "epoch": 0.5892066734763364, "grad_norm": 1.235297441482544, "learning_rate": 1e-06, "loss": 0.0149, "step": 3461 }, { "epoch": 0.5893769152196119, "grad_norm": 1.1786282062530518, "learning_rate": 1e-06, "loss": 0.0094, "step": 3462 }, { "epoch": 0.5895471569628873, "grad_norm": 1.1594127416610718, "learning_rate": 1e-06, "loss": 0.0101, "step": 3463 }, { "epoch": 0.5897173987061628, "grad_norm": 0.8134921193122864, "learning_rate": 1e-06, "loss": 0.0079, "step": 3464 }, { "epoch": 0.5898876404494382, "grad_norm": 1.253074288368225, "learning_rate": 1e-06, "loss": 0.0132, "step": 3465 }, { "epoch": 0.5900578821927136, "grad_norm": 0.9157419800758362, "learning_rate": 1e-06, "loss": 0.0062, "step": 3466 }, { "epoch": 0.5902281239359891, "grad_norm": 0.8979124426841736, "learning_rate": 1e-06, "loss": 0.0077, "step": 3467 }, { "epoch": 0.5903983656792645, "grad_norm": 0.9070771336555481, "learning_rate": 1e-06, "loss": 0.0078, "step": 3468 }, { "epoch": 0.59056860742254, "grad_norm": 1.6368584632873535, "learning_rate": 1e-06, "loss": 0.0089, "step": 3469 }, { "epoch": 0.5907388491658154, "grad_norm": 1.0019363164901733, "learning_rate": 1e-06, "loss": 0.0077, "step": 3470 }, { "epoch": 0.5909090909090909, "grad_norm": 1.3202464580535889, "learning_rate": 1e-06, "loss": 0.0146, "step": 3471 }, { "epoch": 0.5910793326523663, "grad_norm": 1.0647854804992676, "learning_rate": 1e-06, "loss": 0.0101, "step": 3472 }, { "epoch": 0.5912495743956419, "grad_norm": 1.138110637664795, "learning_rate": 1e-06, "loss": 0.0112, "step": 3473 }, { "epoch": 0.5914198161389173, "grad_norm": 0.9572812914848328, "learning_rate": 1e-06, "loss": 0.0073, "step": 3474 }, { "epoch": 0.5915900578821928, "grad_norm": 1.1067378520965576, "learning_rate": 1e-06, "loss": 0.0086, "step": 3475 }, { "epoch": 0.5917602996254682, "grad_norm": 1.1624847650527954, "learning_rate": 1e-06, "loss": 0.015, "step": 3476 }, { "epoch": 0.5919305413687436, "grad_norm": 1.1068007946014404, "learning_rate": 1e-06, "loss": 0.0098, "step": 3477 }, { "epoch": 0.5921007831120191, "grad_norm": 0.9079290628433228, "learning_rate": 1e-06, "loss": 0.0082, "step": 3478 }, { "epoch": 0.5922710248552945, "grad_norm": 1.3529939651489258, "learning_rate": 1e-06, "loss": 0.0145, "step": 3479 }, { "epoch": 0.59244126659857, "grad_norm": 0.914438009262085, "learning_rate": 1e-06, "loss": 0.0073, "step": 3480 }, { "epoch": 0.5926115083418454, "grad_norm": 0.9735174775123596, "learning_rate": 1e-06, "loss": 0.0068, "step": 3481 }, { "epoch": 0.5927817500851209, "grad_norm": 1.064338207244873, "learning_rate": 1e-06, "loss": 0.0061, "step": 3482 }, { "epoch": 0.5929519918283963, "grad_norm": 1.145073413848877, "learning_rate": 1e-06, "loss": 0.0086, "step": 3483 }, { "epoch": 0.5931222335716718, "grad_norm": 0.9396929144859314, "learning_rate": 1e-06, "loss": 0.008, "step": 3484 }, { "epoch": 0.5932924753149472, "grad_norm": 0.8736780881881714, "learning_rate": 1e-06, "loss": 0.0063, "step": 3485 }, { "epoch": 0.5934627170582226, "grad_norm": 1.157373070716858, "learning_rate": 1e-06, "loss": 0.0094, "step": 3486 }, { "epoch": 0.5936329588014981, "grad_norm": 1.1017827987670898, "learning_rate": 1e-06, "loss": 0.0085, "step": 3487 }, { "epoch": 0.5938032005447735, "grad_norm": 1.0480536222457886, "learning_rate": 1e-06, "loss": 0.0093, "step": 3488 }, { "epoch": 0.593973442288049, "grad_norm": 0.9790979027748108, "learning_rate": 1e-06, "loss": 0.0075, "step": 3489 }, { "epoch": 0.5941436840313244, "grad_norm": 0.9106320142745972, "learning_rate": 1e-06, "loss": 0.0096, "step": 3490 }, { "epoch": 0.5943139257746, "grad_norm": 0.946974515914917, "learning_rate": 1e-06, "loss": 0.0075, "step": 3491 }, { "epoch": 0.5944841675178754, "grad_norm": 1.0634249448776245, "learning_rate": 1e-06, "loss": 0.0102, "step": 3492 }, { "epoch": 0.5946544092611509, "grad_norm": 1.1991939544677734, "learning_rate": 1e-06, "loss": 0.0111, "step": 3493 }, { "epoch": 0.5948246510044263, "grad_norm": 0.9980155229568481, "learning_rate": 1e-06, "loss": 0.0077, "step": 3494 }, { "epoch": 0.5949948927477018, "grad_norm": 1.0674749612808228, "learning_rate": 1e-06, "loss": 0.0088, "step": 3495 }, { "epoch": 0.5951651344909772, "grad_norm": 0.9656354784965515, "learning_rate": 1e-06, "loss": 0.0067, "step": 3496 }, { "epoch": 0.5953353762342526, "grad_norm": 1.1175800561904907, "learning_rate": 1e-06, "loss": 0.0084, "step": 3497 }, { "epoch": 0.5955056179775281, "grad_norm": 0.8521952629089355, "learning_rate": 1e-06, "loss": 0.0076, "step": 3498 }, { "epoch": 0.5956758597208035, "grad_norm": 1.266019344329834, "learning_rate": 1e-06, "loss": 0.0088, "step": 3499 }, { "epoch": 0.595846101464079, "grad_norm": 1.155662178993225, "learning_rate": 1e-06, "loss": 0.0075, "step": 3500 }, { "epoch": 0.5960163432073544, "grad_norm": 1.200804352760315, "learning_rate": 1e-06, "loss": 0.0098, "step": 3501 }, { "epoch": 0.5961865849506299, "grad_norm": 1.0332434177398682, "learning_rate": 1e-06, "loss": 0.0093, "step": 3502 }, { "epoch": 0.5963568266939053, "grad_norm": 1.114933729171753, "learning_rate": 1e-06, "loss": 0.0083, "step": 3503 }, { "epoch": 0.5965270684371808, "grad_norm": 1.211915135383606, "learning_rate": 1e-06, "loss": 0.0081, "step": 3504 }, { "epoch": 0.5966973101804562, "grad_norm": 1.0030415058135986, "learning_rate": 1e-06, "loss": 0.0086, "step": 3505 }, { "epoch": 0.5968675519237318, "grad_norm": 1.073610782623291, "learning_rate": 1e-06, "loss": 0.0077, "step": 3506 }, { "epoch": 0.5970377936670072, "grad_norm": 0.9142771363258362, "learning_rate": 1e-06, "loss": 0.0065, "step": 3507 }, { "epoch": 0.5972080354102826, "grad_norm": 1.1004252433776855, "learning_rate": 1e-06, "loss": 0.0083, "step": 3508 }, { "epoch": 0.5973782771535581, "grad_norm": 1.1081154346466064, "learning_rate": 1e-06, "loss": 0.0078, "step": 3509 }, { "epoch": 0.5975485188968335, "grad_norm": 1.1693594455718994, "learning_rate": 1e-06, "loss": 0.0106, "step": 3510 }, { "epoch": 0.597718760640109, "grad_norm": 1.2381548881530762, "learning_rate": 1e-06, "loss": 0.009, "step": 3511 }, { "epoch": 0.5978890023833844, "grad_norm": 1.1379530429840088, "learning_rate": 1e-06, "loss": 0.0114, "step": 3512 }, { "epoch": 0.5980592441266599, "grad_norm": 1.3537330627441406, "learning_rate": 1e-06, "loss": 0.0115, "step": 3513 }, { "epoch": 0.5982294858699353, "grad_norm": 0.8830147981643677, "learning_rate": 1e-06, "loss": 0.009, "step": 3514 }, { "epoch": 0.5983997276132108, "grad_norm": 1.09042489528656, "learning_rate": 1e-06, "loss": 0.0083, "step": 3515 }, { "epoch": 0.5985699693564862, "grad_norm": 1.1855099201202393, "learning_rate": 1e-06, "loss": 0.0092, "step": 3516 }, { "epoch": 0.5987402110997616, "grad_norm": 0.7881447672843933, "learning_rate": 1e-06, "loss": 0.0068, "step": 3517 }, { "epoch": 0.5989104528430371, "grad_norm": 0.8492823243141174, "learning_rate": 1e-06, "loss": 0.0068, "step": 3518 }, { "epoch": 0.5990806945863125, "grad_norm": 1.2348202466964722, "learning_rate": 1e-06, "loss": 0.0102, "step": 3519 }, { "epoch": 0.599250936329588, "grad_norm": 1.514280080795288, "learning_rate": 1e-06, "loss": 0.0134, "step": 3520 }, { "epoch": 0.5994211780728634, "grad_norm": 1.5777965784072876, "learning_rate": 1e-06, "loss": 0.0104, "step": 3521 }, { "epoch": 0.599591419816139, "grad_norm": 1.3330321311950684, "learning_rate": 1e-06, "loss": 0.0086, "step": 3522 }, { "epoch": 0.5997616615594143, "grad_norm": 1.630621314048767, "learning_rate": 1e-06, "loss": 0.0122, "step": 3523 }, { "epoch": 0.5999319033026899, "grad_norm": 1.2280832529067993, "learning_rate": 1e-06, "loss": 0.0097, "step": 3524 }, { "epoch": 0.6001021450459653, "grad_norm": 1.187753677368164, "learning_rate": 1e-06, "loss": 0.0087, "step": 3525 }, { "epoch": 0.6002723867892408, "grad_norm": 1.6510494947433472, "learning_rate": 1e-06, "loss": 0.0101, "step": 3526 }, { "epoch": 0.6004426285325162, "grad_norm": 1.1509414911270142, "learning_rate": 1e-06, "loss": 0.0115, "step": 3527 }, { "epoch": 0.6006128702757916, "grad_norm": 1.300317406654358, "learning_rate": 1e-06, "loss": 0.0103, "step": 3528 }, { "epoch": 0.6007831120190671, "grad_norm": 1.013220191001892, "learning_rate": 1e-06, "loss": 0.0108, "step": 3529 }, { "epoch": 0.6009533537623425, "grad_norm": 0.9694526195526123, "learning_rate": 1e-06, "loss": 0.0076, "step": 3530 }, { "epoch": 0.601123595505618, "grad_norm": 1.1134835481643677, "learning_rate": 1e-06, "loss": 0.011, "step": 3531 }, { "epoch": 0.6012938372488934, "grad_norm": 1.0825552940368652, "learning_rate": 1e-06, "loss": 0.0088, "step": 3532 }, { "epoch": 0.6014640789921689, "grad_norm": 1.277435064315796, "learning_rate": 1e-06, "loss": 0.0117, "step": 3533 }, { "epoch": 0.6016343207354443, "grad_norm": 1.1507532596588135, "learning_rate": 1e-06, "loss": 0.009, "step": 3534 }, { "epoch": 0.6018045624787198, "grad_norm": 1.923765778541565, "learning_rate": 1e-06, "loss": 0.0134, "step": 3535 }, { "epoch": 0.6019748042219952, "grad_norm": 1.156172275543213, "learning_rate": 1e-06, "loss": 0.0123, "step": 3536 }, { "epoch": 0.6021450459652706, "grad_norm": 0.9838459491729736, "learning_rate": 1e-06, "loss": 0.0108, "step": 3537 }, { "epoch": 0.6023152877085461, "grad_norm": 1.3179664611816406, "learning_rate": 1e-06, "loss": 0.0119, "step": 3538 }, { "epoch": 0.6024855294518215, "grad_norm": 0.9081220626831055, "learning_rate": 1e-06, "loss": 0.0076, "step": 3539 }, { "epoch": 0.6026557711950971, "grad_norm": 1.1752504110336304, "learning_rate": 1e-06, "loss": 0.0106, "step": 3540 }, { "epoch": 0.6028260129383725, "grad_norm": 0.9486705660820007, "learning_rate": 1e-06, "loss": 0.0107, "step": 3541 }, { "epoch": 0.602996254681648, "grad_norm": 0.8370993733406067, "learning_rate": 1e-06, "loss": 0.0059, "step": 3542 }, { "epoch": 0.6031664964249234, "grad_norm": 0.8764846920967102, "learning_rate": 1e-06, "loss": 0.0091, "step": 3543 }, { "epoch": 0.6033367381681989, "grad_norm": 1.19331955909729, "learning_rate": 1e-06, "loss": 0.0097, "step": 3544 }, { "epoch": 0.6035069799114743, "grad_norm": 0.8320559859275818, "learning_rate": 1e-06, "loss": 0.0074, "step": 3545 }, { "epoch": 0.6036772216547498, "grad_norm": 1.54590904712677, "learning_rate": 1e-06, "loss": 0.0126, "step": 3546 }, { "epoch": 0.6038474633980252, "grad_norm": 0.878666877746582, "learning_rate": 1e-06, "loss": 0.0078, "step": 3547 }, { "epoch": 0.6040177051413006, "grad_norm": 1.5745292901992798, "learning_rate": 1e-06, "loss": 0.0122, "step": 3548 }, { "epoch": 0.6041879468845761, "grad_norm": 0.9554889798164368, "learning_rate": 1e-06, "loss": 0.0072, "step": 3549 }, { "epoch": 0.6043581886278515, "grad_norm": 1.1246119737625122, "learning_rate": 1e-06, "loss": 0.0102, "step": 3550 }, { "epoch": 0.604528430371127, "grad_norm": 1.0322120189666748, "learning_rate": 1e-06, "loss": 0.0063, "step": 3551 }, { "epoch": 0.6046986721144024, "grad_norm": 0.9092923998832703, "learning_rate": 1e-06, "loss": 0.0064, "step": 3552 }, { "epoch": 0.6048689138576779, "grad_norm": 1.2549725770950317, "learning_rate": 1e-06, "loss": 0.0141, "step": 3553 }, { "epoch": 0.6050391556009533, "grad_norm": 1.0988869667053223, "learning_rate": 1e-06, "loss": 0.0135, "step": 3554 }, { "epoch": 0.6052093973442288, "grad_norm": 1.0146656036376953, "learning_rate": 1e-06, "loss": 0.0082, "step": 3555 }, { "epoch": 0.6053796390875043, "grad_norm": 1.2625722885131836, "learning_rate": 1e-06, "loss": 0.0091, "step": 3556 }, { "epoch": 0.6055498808307797, "grad_norm": 1.0923871994018555, "learning_rate": 1e-06, "loss": 0.0078, "step": 3557 }, { "epoch": 0.6057201225740552, "grad_norm": 0.8863467574119568, "learning_rate": 1e-06, "loss": 0.006, "step": 3558 }, { "epoch": 0.6058903643173306, "grad_norm": 1.0210617780685425, "learning_rate": 1e-06, "loss": 0.0088, "step": 3559 }, { "epoch": 0.6060606060606061, "grad_norm": 2.0322105884552, "learning_rate": 1e-06, "loss": 0.02, "step": 3560 }, { "epoch": 0.6062308478038815, "grad_norm": 1.0655431747436523, "learning_rate": 1e-06, "loss": 0.0088, "step": 3561 }, { "epoch": 0.606401089547157, "grad_norm": 1.0927060842514038, "learning_rate": 1e-06, "loss": 0.0085, "step": 3562 }, { "epoch": 0.6065713312904324, "grad_norm": 1.0595030784606934, "learning_rate": 1e-06, "loss": 0.0082, "step": 3563 }, { "epoch": 0.6067415730337079, "grad_norm": 1.1079046726226807, "learning_rate": 1e-06, "loss": 0.0088, "step": 3564 }, { "epoch": 0.6069118147769833, "grad_norm": 1.0296462774276733, "learning_rate": 1e-06, "loss": 0.0108, "step": 3565 }, { "epoch": 0.6070820565202588, "grad_norm": 0.868334949016571, "learning_rate": 1e-06, "loss": 0.0065, "step": 3566 }, { "epoch": 0.6072522982635342, "grad_norm": 1.4144976139068604, "learning_rate": 1e-06, "loss": 0.0141, "step": 3567 }, { "epoch": 0.6074225400068096, "grad_norm": 1.1668305397033691, "learning_rate": 1e-06, "loss": 0.0101, "step": 3568 }, { "epoch": 0.6075927817500851, "grad_norm": 1.2079553604125977, "learning_rate": 1e-06, "loss": 0.0091, "step": 3569 }, { "epoch": 0.6077630234933605, "grad_norm": 1.1177006959915161, "learning_rate": 1e-06, "loss": 0.0117, "step": 3570 }, { "epoch": 0.607933265236636, "grad_norm": 1.1665843725204468, "learning_rate": 1e-06, "loss": 0.0096, "step": 3571 }, { "epoch": 0.6081035069799114, "grad_norm": 1.0102547407150269, "learning_rate": 1e-06, "loss": 0.0075, "step": 3572 }, { "epoch": 0.608273748723187, "grad_norm": 0.9635432362556458, "learning_rate": 1e-06, "loss": 0.0075, "step": 3573 }, { "epoch": 0.6084439904664624, "grad_norm": 1.3346338272094727, "learning_rate": 1e-06, "loss": 0.0108, "step": 3574 }, { "epoch": 0.6086142322097379, "grad_norm": 1.608765721321106, "learning_rate": 1e-06, "loss": 0.0268, "step": 3575 }, { "epoch": 0.6087844739530133, "grad_norm": 0.9008107781410217, "learning_rate": 1e-06, "loss": 0.0071, "step": 3576 }, { "epoch": 0.6089547156962888, "grad_norm": 1.0293365716934204, "learning_rate": 1e-06, "loss": 0.0061, "step": 3577 }, { "epoch": 0.6091249574395642, "grad_norm": 1.082380771636963, "learning_rate": 1e-06, "loss": 0.011, "step": 3578 }, { "epoch": 0.6092951991828396, "grad_norm": 0.7760728597640991, "learning_rate": 1e-06, "loss": 0.0069, "step": 3579 }, { "epoch": 0.6094654409261151, "grad_norm": 0.9977663159370422, "learning_rate": 1e-06, "loss": 0.0057, "step": 3580 }, { "epoch": 0.6096356826693905, "grad_norm": 0.7965632081031799, "learning_rate": 1e-06, "loss": 0.0066, "step": 3581 }, { "epoch": 0.609805924412666, "grad_norm": 1.3011534214019775, "learning_rate": 1e-06, "loss": 0.0158, "step": 3582 }, { "epoch": 0.6099761661559414, "grad_norm": 1.1788239479064941, "learning_rate": 1e-06, "loss": 0.0141, "step": 3583 }, { "epoch": 0.6101464078992169, "grad_norm": 1.0740551948547363, "learning_rate": 1e-06, "loss": 0.0111, "step": 3584 }, { "epoch": 0.6103166496424923, "grad_norm": 1.2038638591766357, "learning_rate": 1e-06, "loss": 0.0105, "step": 3585 }, { "epoch": 0.6104868913857678, "grad_norm": 1.0219711065292358, "learning_rate": 1e-06, "loss": 0.0098, "step": 3586 }, { "epoch": 0.6106571331290432, "grad_norm": 1.2618956565856934, "learning_rate": 1e-06, "loss": 0.0109, "step": 3587 }, { "epoch": 0.6108273748723186, "grad_norm": 1.1306350231170654, "learning_rate": 1e-06, "loss": 0.007, "step": 3588 }, { "epoch": 0.6109976166155942, "grad_norm": 0.8603691458702087, "learning_rate": 1e-06, "loss": 0.008, "step": 3589 }, { "epoch": 0.6111678583588696, "grad_norm": 1.2200685739517212, "learning_rate": 1e-06, "loss": 0.0105, "step": 3590 }, { "epoch": 0.6113381001021451, "grad_norm": 1.4106483459472656, "learning_rate": 1e-06, "loss": 0.0089, "step": 3591 }, { "epoch": 0.6115083418454205, "grad_norm": 1.3192464113235474, "learning_rate": 1e-06, "loss": 0.0123, "step": 3592 }, { "epoch": 0.611678583588696, "grad_norm": 1.1287734508514404, "learning_rate": 1e-06, "loss": 0.0102, "step": 3593 }, { "epoch": 0.6118488253319714, "grad_norm": 1.193594217300415, "learning_rate": 1e-06, "loss": 0.0087, "step": 3594 }, { "epoch": 0.6120190670752469, "grad_norm": 1.3210780620574951, "learning_rate": 1e-06, "loss": 0.0098, "step": 3595 }, { "epoch": 0.6121893088185223, "grad_norm": 1.2266240119934082, "learning_rate": 1e-06, "loss": 0.0075, "step": 3596 }, { "epoch": 0.6123595505617978, "grad_norm": 1.1135960817337036, "learning_rate": 1e-06, "loss": 0.0078, "step": 3597 }, { "epoch": 0.6125297923050732, "grad_norm": 1.2418755292892456, "learning_rate": 1e-06, "loss": 0.0104, "step": 3598 }, { "epoch": 0.6127000340483486, "grad_norm": 1.2024139165878296, "learning_rate": 1e-06, "loss": 0.0107, "step": 3599 }, { "epoch": 0.6128702757916241, "grad_norm": 0.8348981142044067, "learning_rate": 1e-06, "loss": 0.0071, "step": 3600 }, { "epoch": 0.6130405175348995, "grad_norm": 1.1524181365966797, "learning_rate": 1e-06, "loss": 0.0096, "step": 3601 }, { "epoch": 0.613210759278175, "grad_norm": 1.0412416458129883, "learning_rate": 1e-06, "loss": 0.0096, "step": 3602 }, { "epoch": 0.6133810010214504, "grad_norm": 0.7285078763961792, "learning_rate": 1e-06, "loss": 0.0063, "step": 3603 }, { "epoch": 0.613551242764726, "grad_norm": 0.9448837041854858, "learning_rate": 1e-06, "loss": 0.0064, "step": 3604 }, { "epoch": 0.6137214845080013, "grad_norm": 0.8977110385894775, "learning_rate": 1e-06, "loss": 0.007, "step": 3605 }, { "epoch": 0.6138917262512769, "grad_norm": 1.5285866260528564, "learning_rate": 1e-06, "loss": 0.0143, "step": 3606 }, { "epoch": 0.6140619679945523, "grad_norm": 1.3463897705078125, "learning_rate": 1e-06, "loss": 0.0138, "step": 3607 }, { "epoch": 0.6142322097378277, "grad_norm": 1.2851765155792236, "learning_rate": 1e-06, "loss": 0.0084, "step": 3608 }, { "epoch": 0.6144024514811032, "grad_norm": 1.5624653100967407, "learning_rate": 1e-06, "loss": 0.0094, "step": 3609 }, { "epoch": 0.6145726932243786, "grad_norm": 1.6467691659927368, "learning_rate": 1e-06, "loss": 0.0092, "step": 3610 }, { "epoch": 0.6147429349676541, "grad_norm": 1.128787636756897, "learning_rate": 1e-06, "loss": 0.0075, "step": 3611 }, { "epoch": 0.6149131767109295, "grad_norm": 1.4503066539764404, "learning_rate": 1e-06, "loss": 0.02, "step": 3612 }, { "epoch": 0.615083418454205, "grad_norm": 0.8577298521995544, "learning_rate": 1e-06, "loss": 0.0079, "step": 3613 }, { "epoch": 0.6152536601974804, "grad_norm": 1.0738738775253296, "learning_rate": 1e-06, "loss": 0.008, "step": 3614 }, { "epoch": 0.6154239019407559, "grad_norm": 0.769759476184845, "learning_rate": 1e-06, "loss": 0.0058, "step": 3615 }, { "epoch": 0.6155941436840313, "grad_norm": 0.8557948470115662, "learning_rate": 1e-06, "loss": 0.0085, "step": 3616 }, { "epoch": 0.6157643854273068, "grad_norm": 0.8239853382110596, "learning_rate": 1e-06, "loss": 0.0079, "step": 3617 }, { "epoch": 0.6159346271705822, "grad_norm": 0.9858947396278381, "learning_rate": 1e-06, "loss": 0.007, "step": 3618 }, { "epoch": 0.6161048689138576, "grad_norm": 1.341850996017456, "learning_rate": 1e-06, "loss": 0.0122, "step": 3619 }, { "epoch": 0.6162751106571331, "grad_norm": 0.8130425214767456, "learning_rate": 1e-06, "loss": 0.0072, "step": 3620 }, { "epoch": 0.6164453524004085, "grad_norm": 0.826560378074646, "learning_rate": 1e-06, "loss": 0.0053, "step": 3621 }, { "epoch": 0.616615594143684, "grad_norm": 1.0723116397857666, "learning_rate": 1e-06, "loss": 0.008, "step": 3622 }, { "epoch": 0.6167858358869595, "grad_norm": 0.9984582662582397, "learning_rate": 1e-06, "loss": 0.0082, "step": 3623 }, { "epoch": 0.616956077630235, "grad_norm": 1.1177350282669067, "learning_rate": 1e-06, "loss": 0.0114, "step": 3624 }, { "epoch": 0.6171263193735104, "grad_norm": 1.006271243095398, "learning_rate": 1e-06, "loss": 0.0061, "step": 3625 }, { "epoch": 0.6172965611167859, "grad_norm": 1.1038841009140015, "learning_rate": 1e-06, "loss": 0.0068, "step": 3626 }, { "epoch": 0.6174668028600613, "grad_norm": 1.2689024209976196, "learning_rate": 1e-06, "loss": 0.0097, "step": 3627 }, { "epoch": 0.6176370446033368, "grad_norm": 1.8578965663909912, "learning_rate": 1e-06, "loss": 0.0109, "step": 3628 }, { "epoch": 0.6178072863466122, "grad_norm": 1.2967028617858887, "learning_rate": 1e-06, "loss": 0.0091, "step": 3629 }, { "epoch": 0.6179775280898876, "grad_norm": 0.9618591666221619, "learning_rate": 1e-06, "loss": 0.0069, "step": 3630 }, { "epoch": 0.6181477698331631, "grad_norm": 0.8689693212509155, "learning_rate": 1e-06, "loss": 0.0073, "step": 3631 }, { "epoch": 0.6183180115764385, "grad_norm": 1.112473487854004, "learning_rate": 1e-06, "loss": 0.0087, "step": 3632 }, { "epoch": 0.618488253319714, "grad_norm": 1.1240233182907104, "learning_rate": 1e-06, "loss": 0.0078, "step": 3633 }, { "epoch": 0.6186584950629894, "grad_norm": 0.7019220590591431, "learning_rate": 1e-06, "loss": 0.0084, "step": 3634 }, { "epoch": 0.6188287368062649, "grad_norm": 0.8149914145469666, "learning_rate": 1e-06, "loss": 0.008, "step": 3635 }, { "epoch": 0.6189989785495403, "grad_norm": 1.152665376663208, "learning_rate": 1e-06, "loss": 0.0085, "step": 3636 }, { "epoch": 0.6191692202928158, "grad_norm": 1.2790850400924683, "learning_rate": 1e-06, "loss": 0.0134, "step": 3637 }, { "epoch": 0.6193394620360912, "grad_norm": 1.127205491065979, "learning_rate": 1e-06, "loss": 0.0093, "step": 3638 }, { "epoch": 0.6195097037793666, "grad_norm": 1.7708073854446411, "learning_rate": 1e-06, "loss": 0.0252, "step": 3639 }, { "epoch": 0.6196799455226422, "grad_norm": 1.142716646194458, "learning_rate": 1e-06, "loss": 0.0147, "step": 3640 }, { "epoch": 0.6198501872659176, "grad_norm": 0.881137490272522, "learning_rate": 1e-06, "loss": 0.0069, "step": 3641 }, { "epoch": 0.6200204290091931, "grad_norm": 1.1176531314849854, "learning_rate": 1e-06, "loss": 0.009, "step": 3642 }, { "epoch": 0.6201906707524685, "grad_norm": 1.3249547481536865, "learning_rate": 1e-06, "loss": 0.0089, "step": 3643 }, { "epoch": 0.620360912495744, "grad_norm": 2.152984142303467, "learning_rate": 1e-06, "loss": 0.0188, "step": 3644 }, { "epoch": 0.6205311542390194, "grad_norm": 1.2621212005615234, "learning_rate": 1e-06, "loss": 0.0153, "step": 3645 }, { "epoch": 0.6207013959822949, "grad_norm": 1.1430959701538086, "learning_rate": 1e-06, "loss": 0.0107, "step": 3646 }, { "epoch": 0.6208716377255703, "grad_norm": 1.2035990953445435, "learning_rate": 1e-06, "loss": 0.0088, "step": 3647 }, { "epoch": 0.6210418794688458, "grad_norm": 1.1001369953155518, "learning_rate": 1e-06, "loss": 0.0081, "step": 3648 }, { "epoch": 0.6212121212121212, "grad_norm": 1.1591325998306274, "learning_rate": 1e-06, "loss": 0.008, "step": 3649 }, { "epoch": 0.6213823629553966, "grad_norm": 0.8609371781349182, "learning_rate": 1e-06, "loss": 0.0077, "step": 3650 }, { "epoch": 0.6215526046986721, "grad_norm": 1.3064498901367188, "learning_rate": 1e-06, "loss": 0.0189, "step": 3651 }, { "epoch": 0.6217228464419475, "grad_norm": 1.1729905605316162, "learning_rate": 1e-06, "loss": 0.0092, "step": 3652 }, { "epoch": 0.621893088185223, "grad_norm": 1.064233660697937, "learning_rate": 1e-06, "loss": 0.0138, "step": 3653 }, { "epoch": 0.6220633299284984, "grad_norm": 1.0935028791427612, "learning_rate": 1e-06, "loss": 0.0127, "step": 3654 }, { "epoch": 0.622233571671774, "grad_norm": 0.9840522408485413, "learning_rate": 1e-06, "loss": 0.0074, "step": 3655 }, { "epoch": 0.6224038134150494, "grad_norm": 1.152642846107483, "learning_rate": 1e-06, "loss": 0.0076, "step": 3656 }, { "epoch": 0.6225740551583249, "grad_norm": 1.3375604152679443, "learning_rate": 1e-06, "loss": 0.011, "step": 3657 }, { "epoch": 0.6227442969016003, "grad_norm": 1.8616725206375122, "learning_rate": 1e-06, "loss": 0.0119, "step": 3658 }, { "epoch": 0.6229145386448757, "grad_norm": 1.0592864751815796, "learning_rate": 1e-06, "loss": 0.0077, "step": 3659 }, { "epoch": 0.6230847803881512, "grad_norm": 1.135156512260437, "learning_rate": 1e-06, "loss": 0.0082, "step": 3660 }, { "epoch": 0.6232550221314266, "grad_norm": 1.1434532403945923, "learning_rate": 1e-06, "loss": 0.0079, "step": 3661 }, { "epoch": 0.6234252638747021, "grad_norm": 1.0572514533996582, "learning_rate": 1e-06, "loss": 0.0076, "step": 3662 }, { "epoch": 0.6235955056179775, "grad_norm": 1.5850212574005127, "learning_rate": 1e-06, "loss": 0.0124, "step": 3663 }, { "epoch": 0.623765747361253, "grad_norm": 1.4172906875610352, "learning_rate": 1e-06, "loss": 0.0128, "step": 3664 }, { "epoch": 0.6239359891045284, "grad_norm": 1.1192266941070557, "learning_rate": 1e-06, "loss": 0.0114, "step": 3665 }, { "epoch": 0.6241062308478039, "grad_norm": 1.0807698965072632, "learning_rate": 1e-06, "loss": 0.0068, "step": 3666 }, { "epoch": 0.6242764725910793, "grad_norm": 1.0555567741394043, "learning_rate": 1e-06, "loss": 0.0083, "step": 3667 }, { "epoch": 0.6244467143343548, "grad_norm": 1.2566262483596802, "learning_rate": 1e-06, "loss": 0.0088, "step": 3668 }, { "epoch": 0.6246169560776302, "grad_norm": 0.9095777273178101, "learning_rate": 1e-06, "loss": 0.0074, "step": 3669 }, { "epoch": 0.6247871978209056, "grad_norm": 0.8502013683319092, "learning_rate": 1e-06, "loss": 0.0067, "step": 3670 }, { "epoch": 0.6249574395641811, "grad_norm": 1.1455646753311157, "learning_rate": 1e-06, "loss": 0.0079, "step": 3671 }, { "epoch": 0.6251276813074566, "grad_norm": 1.1766411066055298, "learning_rate": 1e-06, "loss": 0.0074, "step": 3672 }, { "epoch": 0.6252979230507321, "grad_norm": 1.0214406251907349, "learning_rate": 1e-06, "loss": 0.007, "step": 3673 }, { "epoch": 0.6254681647940075, "grad_norm": 1.4715005159378052, "learning_rate": 1e-06, "loss": 0.0187, "step": 3674 }, { "epoch": 0.625638406537283, "grad_norm": 0.9039010405540466, "learning_rate": 1e-06, "loss": 0.0059, "step": 3675 }, { "epoch": 0.6258086482805584, "grad_norm": 1.0967904329299927, "learning_rate": 1e-06, "loss": 0.0069, "step": 3676 }, { "epoch": 0.6259788900238339, "grad_norm": 1.013299822807312, "learning_rate": 1e-06, "loss": 0.0072, "step": 3677 }, { "epoch": 0.6261491317671093, "grad_norm": 1.229809045791626, "learning_rate": 1e-06, "loss": 0.0094, "step": 3678 }, { "epoch": 0.6263193735103847, "grad_norm": 1.207761287689209, "learning_rate": 1e-06, "loss": 0.0063, "step": 3679 }, { "epoch": 0.6264896152536602, "grad_norm": 1.036696195602417, "learning_rate": 1e-06, "loss": 0.0091, "step": 3680 }, { "epoch": 0.6266598569969356, "grad_norm": 1.182930827140808, "learning_rate": 1e-06, "loss": 0.0099, "step": 3681 }, { "epoch": 0.6268300987402111, "grad_norm": 1.3283963203430176, "learning_rate": 1e-06, "loss": 0.0164, "step": 3682 }, { "epoch": 0.6270003404834865, "grad_norm": 1.021653175354004, "learning_rate": 1e-06, "loss": 0.0093, "step": 3683 }, { "epoch": 0.627170582226762, "grad_norm": 1.5096207857131958, "learning_rate": 1e-06, "loss": 0.0142, "step": 3684 }, { "epoch": 0.6273408239700374, "grad_norm": 1.1412389278411865, "learning_rate": 1e-06, "loss": 0.0098, "step": 3685 }, { "epoch": 0.6275110657133129, "grad_norm": 1.141716718673706, "learning_rate": 1e-06, "loss": 0.0082, "step": 3686 }, { "epoch": 0.6276813074565883, "grad_norm": 1.1129909753799438, "learning_rate": 1e-06, "loss": 0.0096, "step": 3687 }, { "epoch": 0.6278515491998639, "grad_norm": 0.8182403445243835, "learning_rate": 1e-06, "loss": 0.0064, "step": 3688 }, { "epoch": 0.6280217909431393, "grad_norm": 1.2895714044570923, "learning_rate": 1e-06, "loss": 0.0184, "step": 3689 }, { "epoch": 0.6281920326864147, "grad_norm": 0.8942720293998718, "learning_rate": 1e-06, "loss": 0.0069, "step": 3690 }, { "epoch": 0.6283622744296902, "grad_norm": 1.0091437101364136, "learning_rate": 1e-06, "loss": 0.0076, "step": 3691 }, { "epoch": 0.6285325161729656, "grad_norm": 1.1240427494049072, "learning_rate": 1e-06, "loss": 0.0086, "step": 3692 }, { "epoch": 0.6287027579162411, "grad_norm": 1.328053593635559, "learning_rate": 1e-06, "loss": 0.0125, "step": 3693 }, { "epoch": 0.6288729996595165, "grad_norm": 0.9419410824775696, "learning_rate": 1e-06, "loss": 0.0087, "step": 3694 }, { "epoch": 0.629043241402792, "grad_norm": 0.7652695178985596, "learning_rate": 1e-06, "loss": 0.0057, "step": 3695 }, { "epoch": 0.6292134831460674, "grad_norm": 1.2899373769760132, "learning_rate": 1e-06, "loss": 0.0091, "step": 3696 }, { "epoch": 0.6293837248893429, "grad_norm": 1.0259873867034912, "learning_rate": 1e-06, "loss": 0.0086, "step": 3697 }, { "epoch": 0.6295539666326183, "grad_norm": 1.1988884210586548, "learning_rate": 1e-06, "loss": 0.0072, "step": 3698 }, { "epoch": 0.6297242083758938, "grad_norm": 0.9181260466575623, "learning_rate": 1e-06, "loss": 0.0071, "step": 3699 }, { "epoch": 0.6298944501191692, "grad_norm": 0.9410200119018555, "learning_rate": 1e-06, "loss": 0.0074, "step": 3700 }, { "epoch": 0.6300646918624446, "grad_norm": 1.4462769031524658, "learning_rate": 1e-06, "loss": 0.0113, "step": 3701 }, { "epoch": 0.6302349336057201, "grad_norm": 1.0774480104446411, "learning_rate": 1e-06, "loss": 0.0078, "step": 3702 }, { "epoch": 0.6304051753489955, "grad_norm": 1.004542589187622, "learning_rate": 1e-06, "loss": 0.0117, "step": 3703 }, { "epoch": 0.630575417092271, "grad_norm": 1.9113359451293945, "learning_rate": 1e-06, "loss": 0.0168, "step": 3704 }, { "epoch": 0.6307456588355465, "grad_norm": 1.03765869140625, "learning_rate": 1e-06, "loss": 0.0076, "step": 3705 }, { "epoch": 0.630915900578822, "grad_norm": 1.1582138538360596, "learning_rate": 1e-06, "loss": 0.0072, "step": 3706 }, { "epoch": 0.6310861423220974, "grad_norm": 1.195564866065979, "learning_rate": 1e-06, "loss": 0.0094, "step": 3707 }, { "epoch": 0.6312563840653729, "grad_norm": 0.9630500674247742, "learning_rate": 1e-06, "loss": 0.0084, "step": 3708 }, { "epoch": 0.6314266258086483, "grad_norm": 1.4461884498596191, "learning_rate": 1e-06, "loss": 0.0123, "step": 3709 }, { "epoch": 0.6315968675519237, "grad_norm": 0.9124757051467896, "learning_rate": 1e-06, "loss": 0.0071, "step": 3710 }, { "epoch": 0.6317671092951992, "grad_norm": 1.134900450706482, "learning_rate": 1e-06, "loss": 0.0111, "step": 3711 }, { "epoch": 0.6319373510384746, "grad_norm": 1.1023823022842407, "learning_rate": 1e-06, "loss": 0.0074, "step": 3712 }, { "epoch": 0.6321075927817501, "grad_norm": 1.0747976303100586, "learning_rate": 1e-06, "loss": 0.0111, "step": 3713 }, { "epoch": 0.6322778345250255, "grad_norm": 0.8265736103057861, "learning_rate": 1e-06, "loss": 0.007, "step": 3714 }, { "epoch": 0.632448076268301, "grad_norm": 1.0795259475708008, "learning_rate": 1e-06, "loss": 0.0088, "step": 3715 }, { "epoch": 0.6326183180115764, "grad_norm": 1.0878645181655884, "learning_rate": 1e-06, "loss": 0.0074, "step": 3716 }, { "epoch": 0.6327885597548519, "grad_norm": 0.9701595306396484, "learning_rate": 1e-06, "loss": 0.0072, "step": 3717 }, { "epoch": 0.6329588014981273, "grad_norm": 1.2201334238052368, "learning_rate": 1e-06, "loss": 0.0087, "step": 3718 }, { "epoch": 0.6331290432414028, "grad_norm": 1.0813226699829102, "learning_rate": 1e-06, "loss": 0.0087, "step": 3719 }, { "epoch": 0.6332992849846782, "grad_norm": 0.7975080609321594, "learning_rate": 1e-06, "loss": 0.0056, "step": 3720 }, { "epoch": 0.6334695267279536, "grad_norm": 1.270108938217163, "learning_rate": 1e-06, "loss": 0.0127, "step": 3721 }, { "epoch": 0.6336397684712292, "grad_norm": 1.3048897981643677, "learning_rate": 1e-06, "loss": 0.008, "step": 3722 }, { "epoch": 0.6338100102145046, "grad_norm": 2.3595523834228516, "learning_rate": 1e-06, "loss": 0.0136, "step": 3723 }, { "epoch": 0.6339802519577801, "grad_norm": 1.565253734588623, "learning_rate": 1e-06, "loss": 0.0177, "step": 3724 }, { "epoch": 0.6341504937010555, "grad_norm": 0.9214725494384766, "learning_rate": 1e-06, "loss": 0.0073, "step": 3725 }, { "epoch": 0.634320735444331, "grad_norm": 0.9782243967056274, "learning_rate": 1e-06, "loss": 0.0088, "step": 3726 }, { "epoch": 0.6344909771876064, "grad_norm": 0.9782243967056274, "learning_rate": 1e-06, "loss": 0.0266, "step": 3727 }, { "epoch": 0.6346612189308819, "grad_norm": 1.1668390035629272, "learning_rate": 1e-06, "loss": 0.0101, "step": 3728 }, { "epoch": 0.6348314606741573, "grad_norm": 1.1563256978988647, "learning_rate": 1e-06, "loss": 0.0085, "step": 3729 }, { "epoch": 0.6350017024174327, "grad_norm": 0.9238855242729187, "learning_rate": 1e-06, "loss": 0.0089, "step": 3730 }, { "epoch": 0.6351719441607082, "grad_norm": 0.9837780594825745, "learning_rate": 1e-06, "loss": 0.0054, "step": 3731 }, { "epoch": 0.6353421859039836, "grad_norm": 1.0795369148254395, "learning_rate": 1e-06, "loss": 0.0092, "step": 3732 }, { "epoch": 0.6355124276472591, "grad_norm": 0.8233152627944946, "learning_rate": 1e-06, "loss": 0.0056, "step": 3733 }, { "epoch": 0.6356826693905345, "grad_norm": 0.9861051440238953, "learning_rate": 1e-06, "loss": 0.0087, "step": 3734 }, { "epoch": 0.63585291113381, "grad_norm": 1.1408077478408813, "learning_rate": 1e-06, "loss": 0.0087, "step": 3735 }, { "epoch": 0.6360231528770854, "grad_norm": 1.1144981384277344, "learning_rate": 1e-06, "loss": 0.0089, "step": 3736 }, { "epoch": 0.636193394620361, "grad_norm": 1.3315359354019165, "learning_rate": 1e-06, "loss": 0.0085, "step": 3737 }, { "epoch": 0.6363636363636364, "grad_norm": 0.9346138834953308, "learning_rate": 1e-06, "loss": 0.0102, "step": 3738 }, { "epoch": 0.6365338781069119, "grad_norm": 1.3445444107055664, "learning_rate": 1e-06, "loss": 0.0218, "step": 3739 }, { "epoch": 0.6367041198501873, "grad_norm": 1.3266631364822388, "learning_rate": 1e-06, "loss": 0.0081, "step": 3740 }, { "epoch": 0.6368743615934627, "grad_norm": 0.7501433491706848, "learning_rate": 1e-06, "loss": 0.0066, "step": 3741 }, { "epoch": 0.6370446033367382, "grad_norm": 0.9168145060539246, "learning_rate": 1e-06, "loss": 0.0071, "step": 3742 }, { "epoch": 0.6372148450800136, "grad_norm": 0.8834933042526245, "learning_rate": 1e-06, "loss": 0.006, "step": 3743 }, { "epoch": 0.6373850868232891, "grad_norm": 1.115196943283081, "learning_rate": 1e-06, "loss": 0.009, "step": 3744 }, { "epoch": 0.6375553285665645, "grad_norm": 1.0395985841751099, "learning_rate": 1e-06, "loss": 0.0068, "step": 3745 }, { "epoch": 0.63772557030984, "grad_norm": 1.1208879947662354, "learning_rate": 1e-06, "loss": 0.0079, "step": 3746 }, { "epoch": 0.6378958120531154, "grad_norm": 1.482139229774475, "learning_rate": 1e-06, "loss": 0.0107, "step": 3747 }, { "epoch": 0.6380660537963909, "grad_norm": 1.3802002668380737, "learning_rate": 1e-06, "loss": 0.0109, "step": 3748 }, { "epoch": 0.6382362955396663, "grad_norm": 1.1360260248184204, "learning_rate": 1e-06, "loss": 0.0093, "step": 3749 }, { "epoch": 0.6384065372829418, "grad_norm": 1.353955864906311, "learning_rate": 1e-06, "loss": 0.0098, "step": 3750 }, { "epoch": 0.6385767790262172, "grad_norm": 1.0488437414169312, "learning_rate": 1e-06, "loss": 0.0069, "step": 3751 }, { "epoch": 0.6387470207694926, "grad_norm": 1.547584891319275, "learning_rate": 1e-06, "loss": 0.0152, "step": 3752 }, { "epoch": 0.6389172625127681, "grad_norm": 1.0610159635543823, "learning_rate": 1e-06, "loss": 0.0074, "step": 3753 }, { "epoch": 0.6390875042560435, "grad_norm": 1.1896774768829346, "learning_rate": 1e-06, "loss": 0.0072, "step": 3754 }, { "epoch": 0.6392577459993191, "grad_norm": 1.3751659393310547, "learning_rate": 1e-06, "loss": 0.0152, "step": 3755 }, { "epoch": 0.6394279877425945, "grad_norm": 1.0783185958862305, "learning_rate": 1e-06, "loss": 0.0087, "step": 3756 }, { "epoch": 0.63959822948587, "grad_norm": 1.002148985862732, "learning_rate": 1e-06, "loss": 0.0065, "step": 3757 }, { "epoch": 0.6397684712291454, "grad_norm": 0.9721300005912781, "learning_rate": 1e-06, "loss": 0.0083, "step": 3758 }, { "epoch": 0.6399387129724209, "grad_norm": 1.0367679595947266, "learning_rate": 1e-06, "loss": 0.0075, "step": 3759 }, { "epoch": 0.6401089547156963, "grad_norm": 1.3424270153045654, "learning_rate": 1e-06, "loss": 0.0119, "step": 3760 }, { "epoch": 0.6402791964589717, "grad_norm": 1.1471754312515259, "learning_rate": 1e-06, "loss": 0.0085, "step": 3761 }, { "epoch": 0.6404494382022472, "grad_norm": 1.1951358318328857, "learning_rate": 1e-06, "loss": 0.0096, "step": 3762 }, { "epoch": 0.6406196799455226, "grad_norm": 1.077262282371521, "learning_rate": 1e-06, "loss": 0.0077, "step": 3763 }, { "epoch": 0.6407899216887981, "grad_norm": 2.4920833110809326, "learning_rate": 1e-06, "loss": 0.0159, "step": 3764 }, { "epoch": 0.6409601634320735, "grad_norm": 1.1345974206924438, "learning_rate": 1e-06, "loss": 0.01, "step": 3765 }, { "epoch": 0.641130405175349, "grad_norm": 0.8907074928283691, "learning_rate": 1e-06, "loss": 0.007, "step": 3766 }, { "epoch": 0.6413006469186244, "grad_norm": 1.1611642837524414, "learning_rate": 1e-06, "loss": 0.0095, "step": 3767 }, { "epoch": 0.6414708886618999, "grad_norm": 1.0396606922149658, "learning_rate": 1e-06, "loss": 0.0104, "step": 3768 }, { "epoch": 0.6416411304051753, "grad_norm": 0.9746201634407043, "learning_rate": 1e-06, "loss": 0.0078, "step": 3769 }, { "epoch": 0.6418113721484509, "grad_norm": 0.9750457406044006, "learning_rate": 1e-06, "loss": 0.0108, "step": 3770 }, { "epoch": 0.6419816138917263, "grad_norm": 1.3506128787994385, "learning_rate": 1e-06, "loss": 0.0099, "step": 3771 }, { "epoch": 0.6421518556350017, "grad_norm": 1.9846067428588867, "learning_rate": 1e-06, "loss": 0.0243, "step": 3772 }, { "epoch": 0.6423220973782772, "grad_norm": 0.9080547094345093, "learning_rate": 1e-06, "loss": 0.008, "step": 3773 }, { "epoch": 0.6424923391215526, "grad_norm": 1.4013069868087769, "learning_rate": 1e-06, "loss": 0.0107, "step": 3774 }, { "epoch": 0.6426625808648281, "grad_norm": 1.1114245653152466, "learning_rate": 1e-06, "loss": 0.0146, "step": 3775 }, { "epoch": 0.6428328226081035, "grad_norm": 0.9145612120628357, "learning_rate": 1e-06, "loss": 0.0092, "step": 3776 }, { "epoch": 0.643003064351379, "grad_norm": 0.9096139073371887, "learning_rate": 1e-06, "loss": 0.0089, "step": 3777 }, { "epoch": 0.6431733060946544, "grad_norm": 2.0348570346832275, "learning_rate": 1e-06, "loss": 0.0102, "step": 3778 }, { "epoch": 0.6433435478379299, "grad_norm": 1.3212345838546753, "learning_rate": 1e-06, "loss": 0.0129, "step": 3779 }, { "epoch": 0.6435137895812053, "grad_norm": 0.8319016098976135, "learning_rate": 1e-06, "loss": 0.0072, "step": 3780 }, { "epoch": 0.6436840313244807, "grad_norm": 0.9304143786430359, "learning_rate": 1e-06, "loss": 0.0067, "step": 3781 }, { "epoch": 0.6438542730677562, "grad_norm": 1.048282504081726, "learning_rate": 1e-06, "loss": 0.0089, "step": 3782 }, { "epoch": 0.6440245148110316, "grad_norm": 1.1110150814056396, "learning_rate": 1e-06, "loss": 0.0103, "step": 3783 }, { "epoch": 0.6441947565543071, "grad_norm": 1.329409122467041, "learning_rate": 1e-06, "loss": 0.0162, "step": 3784 }, { "epoch": 0.6443649982975825, "grad_norm": 0.838369607925415, "learning_rate": 1e-06, "loss": 0.0064, "step": 3785 }, { "epoch": 0.644535240040858, "grad_norm": 1.109646201133728, "learning_rate": 1e-06, "loss": 0.0122, "step": 3786 }, { "epoch": 0.6447054817841334, "grad_norm": 1.0034140348434448, "learning_rate": 1e-06, "loss": 0.0072, "step": 3787 }, { "epoch": 0.644875723527409, "grad_norm": 1.1564267873764038, "learning_rate": 1e-06, "loss": 0.0101, "step": 3788 }, { "epoch": 0.6450459652706844, "grad_norm": 1.3622796535491943, "learning_rate": 1e-06, "loss": 0.0152, "step": 3789 }, { "epoch": 0.6452162070139599, "grad_norm": 0.9166209697723389, "learning_rate": 1e-06, "loss": 0.0066, "step": 3790 }, { "epoch": 0.6453864487572353, "grad_norm": 1.0101425647735596, "learning_rate": 1e-06, "loss": 0.0113, "step": 3791 }, { "epoch": 0.6455566905005107, "grad_norm": 1.1960997581481934, "learning_rate": 1e-06, "loss": 0.0143, "step": 3792 }, { "epoch": 0.6457269322437862, "grad_norm": 1.1836118698120117, "learning_rate": 1e-06, "loss": 0.0083, "step": 3793 }, { "epoch": 0.6458971739870616, "grad_norm": 1.131833553314209, "learning_rate": 1e-06, "loss": 0.0075, "step": 3794 }, { "epoch": 0.6460674157303371, "grad_norm": 1.1071782112121582, "learning_rate": 1e-06, "loss": 0.0067, "step": 3795 }, { "epoch": 0.6462376574736125, "grad_norm": 1.1466583013534546, "learning_rate": 1e-06, "loss": 0.0078, "step": 3796 }, { "epoch": 0.646407899216888, "grad_norm": 1.2023851871490479, "learning_rate": 1e-06, "loss": 0.0079, "step": 3797 }, { "epoch": 0.6465781409601634, "grad_norm": 1.4373587369918823, "learning_rate": 1e-06, "loss": 0.0123, "step": 3798 }, { "epoch": 0.6467483827034389, "grad_norm": 0.9874997138977051, "learning_rate": 1e-06, "loss": 0.007, "step": 3799 }, { "epoch": 0.6469186244467143, "grad_norm": 1.1962223052978516, "learning_rate": 1e-06, "loss": 0.0114, "step": 3800 }, { "epoch": 0.6470888661899897, "grad_norm": 1.095088005065918, "learning_rate": 1e-06, "loss": 0.0089, "step": 3801 }, { "epoch": 0.6472591079332652, "grad_norm": 1.0901947021484375, "learning_rate": 1e-06, "loss": 0.0074, "step": 3802 }, { "epoch": 0.6474293496765406, "grad_norm": 1.262510895729065, "learning_rate": 1e-06, "loss": 0.0067, "step": 3803 }, { "epoch": 0.6475995914198162, "grad_norm": 1.2522022724151611, "learning_rate": 1e-06, "loss": 0.0106, "step": 3804 }, { "epoch": 0.6477698331630916, "grad_norm": 1.0772358179092407, "learning_rate": 1e-06, "loss": 0.0074, "step": 3805 }, { "epoch": 0.6479400749063671, "grad_norm": 1.433408498764038, "learning_rate": 1e-06, "loss": 0.011, "step": 3806 }, { "epoch": 0.6481103166496425, "grad_norm": 1.124001383781433, "learning_rate": 1e-06, "loss": 0.0076, "step": 3807 }, { "epoch": 0.648280558392918, "grad_norm": 1.0808985233306885, "learning_rate": 1e-06, "loss": 0.0074, "step": 3808 }, { "epoch": 0.6484508001361934, "grad_norm": 1.0477313995361328, "learning_rate": 1e-06, "loss": 0.0101, "step": 3809 }, { "epoch": 0.6486210418794689, "grad_norm": 2.4625258445739746, "learning_rate": 1e-06, "loss": 0.0164, "step": 3810 }, { "epoch": 0.6487912836227443, "grad_norm": 0.9473697543144226, "learning_rate": 1e-06, "loss": 0.0101, "step": 3811 }, { "epoch": 0.6489615253660197, "grad_norm": 0.9401687383651733, "learning_rate": 1e-06, "loss": 0.0076, "step": 3812 }, { "epoch": 0.6491317671092952, "grad_norm": 1.2510778903961182, "learning_rate": 1e-06, "loss": 0.0086, "step": 3813 }, { "epoch": 0.6493020088525706, "grad_norm": 0.9812240600585938, "learning_rate": 1e-06, "loss": 0.01, "step": 3814 }, { "epoch": 0.6494722505958461, "grad_norm": 0.99561607837677, "learning_rate": 1e-06, "loss": 0.0077, "step": 3815 }, { "epoch": 0.6496424923391215, "grad_norm": 1.579348087310791, "learning_rate": 1e-06, "loss": 0.024, "step": 3816 }, { "epoch": 0.649812734082397, "grad_norm": 0.7100584506988525, "learning_rate": 1e-06, "loss": 0.0075, "step": 3817 }, { "epoch": 0.6499829758256724, "grad_norm": 1.0388001203536987, "learning_rate": 1e-06, "loss": 0.0071, "step": 3818 }, { "epoch": 0.650153217568948, "grad_norm": 1.2186797857284546, "learning_rate": 1e-06, "loss": 0.0075, "step": 3819 }, { "epoch": 0.6503234593122234, "grad_norm": 0.8475635647773743, "learning_rate": 1e-06, "loss": 0.0078, "step": 3820 }, { "epoch": 0.6504937010554989, "grad_norm": 1.1312545537948608, "learning_rate": 1e-06, "loss": 0.0067, "step": 3821 }, { "epoch": 0.6506639427987743, "grad_norm": 1.2596800327301025, "learning_rate": 1e-06, "loss": 0.0117, "step": 3822 }, { "epoch": 0.6508341845420497, "grad_norm": 0.8742133378982544, "learning_rate": 1e-06, "loss": 0.01, "step": 3823 }, { "epoch": 0.6510044262853252, "grad_norm": 0.891171395778656, "learning_rate": 1e-06, "loss": 0.0092, "step": 3824 }, { "epoch": 0.6511746680286006, "grad_norm": 1.0801339149475098, "learning_rate": 1e-06, "loss": 0.0096, "step": 3825 }, { "epoch": 0.6513449097718761, "grad_norm": 0.9043775200843811, "learning_rate": 1e-06, "loss": 0.0094, "step": 3826 }, { "epoch": 0.6515151515151515, "grad_norm": 1.2618519067764282, "learning_rate": 1e-06, "loss": 0.009, "step": 3827 }, { "epoch": 0.651685393258427, "grad_norm": 0.9808958172798157, "learning_rate": 1e-06, "loss": 0.0068, "step": 3828 }, { "epoch": 0.6518556350017024, "grad_norm": 0.9210644364356995, "learning_rate": 1e-06, "loss": 0.0059, "step": 3829 }, { "epoch": 0.6520258767449779, "grad_norm": 1.0287270545959473, "learning_rate": 1e-06, "loss": 0.0109, "step": 3830 }, { "epoch": 0.6521961184882533, "grad_norm": 0.9316836595535278, "learning_rate": 1e-06, "loss": 0.0077, "step": 3831 }, { "epoch": 0.6523663602315287, "grad_norm": 0.9281513094902039, "learning_rate": 1e-06, "loss": 0.0077, "step": 3832 }, { "epoch": 0.6525366019748042, "grad_norm": 1.0260971784591675, "learning_rate": 1e-06, "loss": 0.0075, "step": 3833 }, { "epoch": 0.6527068437180796, "grad_norm": 1.012954831123352, "learning_rate": 1e-06, "loss": 0.0099, "step": 3834 }, { "epoch": 0.6528770854613551, "grad_norm": 1.0251330137252808, "learning_rate": 1e-06, "loss": 0.0068, "step": 3835 }, { "epoch": 0.6530473272046305, "grad_norm": 1.228165864944458, "learning_rate": 1e-06, "loss": 0.0075, "step": 3836 }, { "epoch": 0.6532175689479061, "grad_norm": 0.8660666346549988, "learning_rate": 1e-06, "loss": 0.0088, "step": 3837 }, { "epoch": 0.6533878106911815, "grad_norm": 0.8670229911804199, "learning_rate": 1e-06, "loss": 0.0059, "step": 3838 }, { "epoch": 0.653558052434457, "grad_norm": 1.1961077451705933, "learning_rate": 1e-06, "loss": 0.0134, "step": 3839 }, { "epoch": 0.6537282941777324, "grad_norm": 0.9217979311943054, "learning_rate": 1e-06, "loss": 0.0065, "step": 3840 }, { "epoch": 0.6538985359210079, "grad_norm": 0.9868664741516113, "learning_rate": 1e-06, "loss": 0.0073, "step": 3841 }, { "epoch": 0.6540687776642833, "grad_norm": 1.3531166315078735, "learning_rate": 1e-06, "loss": 0.0118, "step": 3842 }, { "epoch": 0.6542390194075587, "grad_norm": 1.6801015138626099, "learning_rate": 1e-06, "loss": 0.012, "step": 3843 }, { "epoch": 0.6544092611508342, "grad_norm": 1.0095585584640503, "learning_rate": 1e-06, "loss": 0.0065, "step": 3844 }, { "epoch": 0.6545795028941096, "grad_norm": 0.8943735361099243, "learning_rate": 1e-06, "loss": 0.006, "step": 3845 }, { "epoch": 0.6547497446373851, "grad_norm": 1.3049546480178833, "learning_rate": 1e-06, "loss": 0.0092, "step": 3846 }, { "epoch": 0.6549199863806605, "grad_norm": 1.1015139818191528, "learning_rate": 1e-06, "loss": 0.0078, "step": 3847 }, { "epoch": 0.655090228123936, "grad_norm": 1.2883460521697998, "learning_rate": 1e-06, "loss": 0.0076, "step": 3848 }, { "epoch": 0.6552604698672114, "grad_norm": 0.9433921575546265, "learning_rate": 1e-06, "loss": 0.007, "step": 3849 }, { "epoch": 0.6554307116104869, "grad_norm": 1.1686891317367554, "learning_rate": 1e-06, "loss": 0.0092, "step": 3850 }, { "epoch": 0.6556009533537623, "grad_norm": 1.2734119892120361, "learning_rate": 1e-06, "loss": 0.0115, "step": 3851 }, { "epoch": 0.6557711950970377, "grad_norm": 1.1788426637649536, "learning_rate": 1e-06, "loss": 0.007, "step": 3852 }, { "epoch": 0.6559414368403133, "grad_norm": 1.1873812675476074, "learning_rate": 1e-06, "loss": 0.0148, "step": 3853 }, { "epoch": 0.6561116785835887, "grad_norm": 1.2980464696884155, "learning_rate": 1e-06, "loss": 0.0079, "step": 3854 }, { "epoch": 0.6562819203268642, "grad_norm": 1.1819038391113281, "learning_rate": 1e-06, "loss": 0.0116, "step": 3855 }, { "epoch": 0.6564521620701396, "grad_norm": 1.5700839757919312, "learning_rate": 1e-06, "loss": 0.0128, "step": 3856 }, { "epoch": 0.6566224038134151, "grad_norm": 0.907390296459198, "learning_rate": 1e-06, "loss": 0.0081, "step": 3857 }, { "epoch": 0.6567926455566905, "grad_norm": 1.2444868087768555, "learning_rate": 1e-06, "loss": 0.0096, "step": 3858 }, { "epoch": 0.656962887299966, "grad_norm": 0.8420805931091309, "learning_rate": 1e-06, "loss": 0.007, "step": 3859 }, { "epoch": 0.6571331290432414, "grad_norm": 0.9762222170829773, "learning_rate": 1e-06, "loss": 0.0091, "step": 3860 }, { "epoch": 0.6573033707865169, "grad_norm": 1.3171446323394775, "learning_rate": 1e-06, "loss": 0.0071, "step": 3861 }, { "epoch": 0.6574736125297923, "grad_norm": 0.9845103025436401, "learning_rate": 1e-06, "loss": 0.0092, "step": 3862 }, { "epoch": 0.6576438542730677, "grad_norm": 1.1617891788482666, "learning_rate": 1e-06, "loss": 0.0102, "step": 3863 }, { "epoch": 0.6578140960163432, "grad_norm": 0.9630821943283081, "learning_rate": 1e-06, "loss": 0.0073, "step": 3864 }, { "epoch": 0.6579843377596186, "grad_norm": 1.274949312210083, "learning_rate": 1e-06, "loss": 0.0079, "step": 3865 }, { "epoch": 0.6581545795028941, "grad_norm": 1.40157151222229, "learning_rate": 1e-06, "loss": 0.0071, "step": 3866 }, { "epoch": 0.6583248212461695, "grad_norm": 1.0424250364303589, "learning_rate": 1e-06, "loss": 0.0094, "step": 3867 }, { "epoch": 0.658495062989445, "grad_norm": 1.3154240846633911, "learning_rate": 1e-06, "loss": 0.0126, "step": 3868 }, { "epoch": 0.6586653047327204, "grad_norm": 1.1080321073532104, "learning_rate": 1e-06, "loss": 0.0082, "step": 3869 }, { "epoch": 0.658835546475996, "grad_norm": 1.0510450601577759, "learning_rate": 1e-06, "loss": 0.0075, "step": 3870 }, { "epoch": 0.6590057882192714, "grad_norm": 1.3665117025375366, "learning_rate": 1e-06, "loss": 0.0115, "step": 3871 }, { "epoch": 0.6591760299625468, "grad_norm": 0.914477527141571, "learning_rate": 1e-06, "loss": 0.0088, "step": 3872 }, { "epoch": 0.6593462717058223, "grad_norm": 1.2469263076782227, "learning_rate": 1e-06, "loss": 0.0094, "step": 3873 }, { "epoch": 0.6595165134490977, "grad_norm": 1.1164896488189697, "learning_rate": 1e-06, "loss": 0.0101, "step": 3874 }, { "epoch": 0.6596867551923732, "grad_norm": 1.092367172241211, "learning_rate": 1e-06, "loss": 0.0078, "step": 3875 }, { "epoch": 0.6598569969356486, "grad_norm": 1.0838600397109985, "learning_rate": 1e-06, "loss": 0.0116, "step": 3876 }, { "epoch": 0.6600272386789241, "grad_norm": 1.1364141702651978, "learning_rate": 1e-06, "loss": 0.0063, "step": 3877 }, { "epoch": 0.6601974804221995, "grad_norm": 1.0023877620697021, "learning_rate": 1e-06, "loss": 0.0073, "step": 3878 }, { "epoch": 0.660367722165475, "grad_norm": 0.9350517392158508, "learning_rate": 1e-06, "loss": 0.0076, "step": 3879 }, { "epoch": 0.6605379639087504, "grad_norm": 0.807680606842041, "learning_rate": 1e-06, "loss": 0.0075, "step": 3880 }, { "epoch": 0.6607082056520259, "grad_norm": 0.7163336277008057, "learning_rate": 1e-06, "loss": 0.0057, "step": 3881 }, { "epoch": 0.6608784473953013, "grad_norm": 1.0191099643707275, "learning_rate": 1e-06, "loss": 0.0085, "step": 3882 }, { "epoch": 0.6610486891385767, "grad_norm": 0.8287161588668823, "learning_rate": 1e-06, "loss": 0.0083, "step": 3883 }, { "epoch": 0.6612189308818522, "grad_norm": 1.2603466510772705, "learning_rate": 1e-06, "loss": 0.0077, "step": 3884 }, { "epoch": 0.6613891726251276, "grad_norm": 1.042250633239746, "learning_rate": 1e-06, "loss": 0.0123, "step": 3885 }, { "epoch": 0.6615594143684032, "grad_norm": 1.064152479171753, "learning_rate": 1e-06, "loss": 0.011, "step": 3886 }, { "epoch": 0.6617296561116786, "grad_norm": 1.2731701135635376, "learning_rate": 1e-06, "loss": 0.0217, "step": 3887 }, { "epoch": 0.6618998978549541, "grad_norm": 1.1830432415008545, "learning_rate": 1e-06, "loss": 0.0084, "step": 3888 }, { "epoch": 0.6620701395982295, "grad_norm": 1.008324384689331, "learning_rate": 1e-06, "loss": 0.0064, "step": 3889 }, { "epoch": 0.662240381341505, "grad_norm": 0.9876424074172974, "learning_rate": 1e-06, "loss": 0.0075, "step": 3890 }, { "epoch": 0.6624106230847804, "grad_norm": 1.2349107265472412, "learning_rate": 1e-06, "loss": 0.0095, "step": 3891 }, { "epoch": 0.6625808648280559, "grad_norm": 1.1617462635040283, "learning_rate": 1e-06, "loss": 0.0142, "step": 3892 }, { "epoch": 0.6627511065713313, "grad_norm": 0.8870407938957214, "learning_rate": 1e-06, "loss": 0.0073, "step": 3893 }, { "epoch": 0.6629213483146067, "grad_norm": 1.0737135410308838, "learning_rate": 1e-06, "loss": 0.0088, "step": 3894 }, { "epoch": 0.6630915900578822, "grad_norm": 1.0294846296310425, "learning_rate": 1e-06, "loss": 0.0088, "step": 3895 }, { "epoch": 0.6632618318011576, "grad_norm": 0.8994992971420288, "learning_rate": 1e-06, "loss": 0.0068, "step": 3896 }, { "epoch": 0.6634320735444331, "grad_norm": 1.0250681638717651, "learning_rate": 1e-06, "loss": 0.0105, "step": 3897 }, { "epoch": 0.6636023152877085, "grad_norm": 1.2135937213897705, "learning_rate": 1e-06, "loss": 0.0092, "step": 3898 }, { "epoch": 0.663772557030984, "grad_norm": 0.907864511013031, "learning_rate": 1e-06, "loss": 0.0098, "step": 3899 }, { "epoch": 0.6639427987742594, "grad_norm": 1.0360740423202515, "learning_rate": 1e-06, "loss": 0.0065, "step": 3900 }, { "epoch": 0.664113040517535, "grad_norm": 1.1412363052368164, "learning_rate": 1e-06, "loss": 0.0126, "step": 3901 }, { "epoch": 0.6642832822608103, "grad_norm": 1.2569540739059448, "learning_rate": 1e-06, "loss": 0.0121, "step": 3902 }, { "epoch": 0.6644535240040857, "grad_norm": 1.2806369066238403, "learning_rate": 1e-06, "loss": 0.0082, "step": 3903 }, { "epoch": 0.6646237657473613, "grad_norm": 1.199313759803772, "learning_rate": 1e-06, "loss": 0.0137, "step": 3904 }, { "epoch": 0.6647940074906367, "grad_norm": 1.0673506259918213, "learning_rate": 1e-06, "loss": 0.0132, "step": 3905 }, { "epoch": 0.6649642492339122, "grad_norm": 1.1850353479385376, "learning_rate": 1e-06, "loss": 0.0095, "step": 3906 }, { "epoch": 0.6651344909771876, "grad_norm": 1.9815788269042969, "learning_rate": 1e-06, "loss": 0.0108, "step": 3907 }, { "epoch": 0.6653047327204631, "grad_norm": 1.181032657623291, "learning_rate": 1e-06, "loss": 0.007, "step": 3908 }, { "epoch": 0.6654749744637385, "grad_norm": 1.2337028980255127, "learning_rate": 1e-06, "loss": 0.008, "step": 3909 }, { "epoch": 0.665645216207014, "grad_norm": 1.0751506090164185, "learning_rate": 1e-06, "loss": 0.0065, "step": 3910 }, { "epoch": 0.6658154579502894, "grad_norm": 1.4897148609161377, "learning_rate": 1e-06, "loss": 0.0101, "step": 3911 }, { "epoch": 0.6659856996935649, "grad_norm": 1.2287198305130005, "learning_rate": 1e-06, "loss": 0.0085, "step": 3912 }, { "epoch": 0.6661559414368403, "grad_norm": 1.3234024047851562, "learning_rate": 1e-06, "loss": 0.008, "step": 3913 }, { "epoch": 0.6663261831801157, "grad_norm": 1.0900822877883911, "learning_rate": 1e-06, "loss": 0.0114, "step": 3914 }, { "epoch": 0.6664964249233912, "grad_norm": 0.9296737313270569, "learning_rate": 1e-06, "loss": 0.0063, "step": 3915 }, { "epoch": 0.6666666666666666, "grad_norm": 1.1464951038360596, "learning_rate": 1e-06, "loss": 0.0083, "step": 3916 }, { "epoch": 0.6668369084099421, "grad_norm": 1.5186680555343628, "learning_rate": 1e-06, "loss": 0.013, "step": 3917 }, { "epoch": 0.6670071501532175, "grad_norm": 1.0801197290420532, "learning_rate": 1e-06, "loss": 0.0095, "step": 3918 }, { "epoch": 0.667177391896493, "grad_norm": 1.058134913444519, "learning_rate": 1e-06, "loss": 0.0066, "step": 3919 }, { "epoch": 0.6673476336397685, "grad_norm": 0.9633292555809021, "learning_rate": 1e-06, "loss": 0.0066, "step": 3920 }, { "epoch": 0.667517875383044, "grad_norm": 1.1550352573394775, "learning_rate": 1e-06, "loss": 0.0091, "step": 3921 }, { "epoch": 0.6676881171263194, "grad_norm": 1.437263011932373, "learning_rate": 1e-06, "loss": 0.0086, "step": 3922 }, { "epoch": 0.6678583588695948, "grad_norm": 0.9223193526268005, "learning_rate": 1e-06, "loss": 0.0059, "step": 3923 }, { "epoch": 0.6680286006128703, "grad_norm": 0.9309805631637573, "learning_rate": 1e-06, "loss": 0.0076, "step": 3924 }, { "epoch": 0.6681988423561457, "grad_norm": 1.1240042448043823, "learning_rate": 1e-06, "loss": 0.0081, "step": 3925 }, { "epoch": 0.6683690840994212, "grad_norm": 1.074847936630249, "learning_rate": 1e-06, "loss": 0.0116, "step": 3926 }, { "epoch": 0.6685393258426966, "grad_norm": 0.9684469103813171, "learning_rate": 1e-06, "loss": 0.0073, "step": 3927 }, { "epoch": 0.6687095675859721, "grad_norm": 1.2416397333145142, "learning_rate": 1e-06, "loss": 0.0099, "step": 3928 }, { "epoch": 0.6688798093292475, "grad_norm": 0.9240944981575012, "learning_rate": 1e-06, "loss": 0.0067, "step": 3929 }, { "epoch": 0.669050051072523, "grad_norm": 0.9790809154510498, "learning_rate": 1e-06, "loss": 0.0092, "step": 3930 }, { "epoch": 0.6692202928157984, "grad_norm": 0.9496670961380005, "learning_rate": 1e-06, "loss": 0.0069, "step": 3931 }, { "epoch": 0.6693905345590739, "grad_norm": 0.8220587968826294, "learning_rate": 1e-06, "loss": 0.008, "step": 3932 }, { "epoch": 0.6695607763023493, "grad_norm": 1.191679835319519, "learning_rate": 1e-06, "loss": 0.0107, "step": 3933 }, { "epoch": 0.6697310180456247, "grad_norm": 1.0436556339263916, "learning_rate": 1e-06, "loss": 0.0058, "step": 3934 }, { "epoch": 0.6699012597889002, "grad_norm": 1.3039947748184204, "learning_rate": 1e-06, "loss": 0.0082, "step": 3935 }, { "epoch": 0.6700715015321757, "grad_norm": 1.2773553133010864, "learning_rate": 1e-06, "loss": 0.008, "step": 3936 }, { "epoch": 0.6702417432754512, "grad_norm": 1.6559447050094604, "learning_rate": 1e-06, "loss": 0.0166, "step": 3937 }, { "epoch": 0.6704119850187266, "grad_norm": 1.3342622518539429, "learning_rate": 1e-06, "loss": 0.0092, "step": 3938 }, { "epoch": 0.6705822267620021, "grad_norm": 1.0363655090332031, "learning_rate": 1e-06, "loss": 0.0063, "step": 3939 }, { "epoch": 0.6707524685052775, "grad_norm": 1.0175873041152954, "learning_rate": 1e-06, "loss": 0.0106, "step": 3940 }, { "epoch": 0.670922710248553, "grad_norm": 0.8500238656997681, "learning_rate": 1e-06, "loss": 0.0099, "step": 3941 }, { "epoch": 0.6710929519918284, "grad_norm": 4.019010543823242, "learning_rate": 1e-06, "loss": 0.0548, "step": 3942 }, { "epoch": 0.6712631937351039, "grad_norm": 0.9771483540534973, "learning_rate": 1e-06, "loss": 0.006, "step": 3943 }, { "epoch": 0.6714334354783793, "grad_norm": 1.0605002641677856, "learning_rate": 1e-06, "loss": 0.0065, "step": 3944 }, { "epoch": 0.6716036772216547, "grad_norm": 1.0014209747314453, "learning_rate": 1e-06, "loss": 0.0066, "step": 3945 }, { "epoch": 0.6717739189649302, "grad_norm": 1.0529136657714844, "learning_rate": 1e-06, "loss": 0.0082, "step": 3946 }, { "epoch": 0.6719441607082056, "grad_norm": 1.1662652492523193, "learning_rate": 1e-06, "loss": 0.0168, "step": 3947 }, { "epoch": 0.6721144024514811, "grad_norm": 1.425837755203247, "learning_rate": 1e-06, "loss": 0.0101, "step": 3948 }, { "epoch": 0.6722846441947565, "grad_norm": 0.8996866345405579, "learning_rate": 1e-06, "loss": 0.0063, "step": 3949 }, { "epoch": 0.672454885938032, "grad_norm": 0.9349283576011658, "learning_rate": 1e-06, "loss": 0.007, "step": 3950 }, { "epoch": 0.6726251276813074, "grad_norm": 0.9103063344955444, "learning_rate": 1e-06, "loss": 0.0073, "step": 3951 }, { "epoch": 0.672795369424583, "grad_norm": 1.297703504562378, "learning_rate": 1e-06, "loss": 0.016, "step": 3952 }, { "epoch": 0.6729656111678584, "grad_norm": 1.0639750957489014, "learning_rate": 1e-06, "loss": 0.0079, "step": 3953 }, { "epoch": 0.6731358529111338, "grad_norm": 1.0544763803482056, "learning_rate": 1e-06, "loss": 0.0083, "step": 3954 }, { "epoch": 0.6733060946544093, "grad_norm": 1.192960500717163, "learning_rate": 1e-06, "loss": 0.0064, "step": 3955 }, { "epoch": 0.6734763363976847, "grad_norm": 0.8247693777084351, "learning_rate": 1e-06, "loss": 0.0074, "step": 3956 }, { "epoch": 0.6736465781409602, "grad_norm": 0.7883464694023132, "learning_rate": 1e-06, "loss": 0.0067, "step": 3957 }, { "epoch": 0.6738168198842356, "grad_norm": 0.9810028672218323, "learning_rate": 1e-06, "loss": 0.0069, "step": 3958 }, { "epoch": 0.6739870616275111, "grad_norm": 0.9418751001358032, "learning_rate": 1e-06, "loss": 0.0064, "step": 3959 }, { "epoch": 0.6741573033707865, "grad_norm": 1.339374303817749, "learning_rate": 1e-06, "loss": 0.0092, "step": 3960 }, { "epoch": 0.674327545114062, "grad_norm": 1.082369089126587, "learning_rate": 1e-06, "loss": 0.0089, "step": 3961 }, { "epoch": 0.6744977868573374, "grad_norm": 1.2776871919631958, "learning_rate": 1e-06, "loss": 0.0102, "step": 3962 }, { "epoch": 0.6746680286006129, "grad_norm": 1.1005768775939941, "learning_rate": 1e-06, "loss": 0.0081, "step": 3963 }, { "epoch": 0.6748382703438883, "grad_norm": 1.0241467952728271, "learning_rate": 1e-06, "loss": 0.0056, "step": 3964 }, { "epoch": 0.6750085120871637, "grad_norm": 0.992150068283081, "learning_rate": 1e-06, "loss": 0.0069, "step": 3965 }, { "epoch": 0.6751787538304392, "grad_norm": 0.8683270215988159, "learning_rate": 1e-06, "loss": 0.0072, "step": 3966 }, { "epoch": 0.6753489955737146, "grad_norm": 1.0942341089248657, "learning_rate": 1e-06, "loss": 0.0096, "step": 3967 }, { "epoch": 0.6755192373169902, "grad_norm": 1.0581514835357666, "learning_rate": 1e-06, "loss": 0.0063, "step": 3968 }, { "epoch": 0.6756894790602656, "grad_norm": 1.0217764377593994, "learning_rate": 1e-06, "loss": 0.0096, "step": 3969 }, { "epoch": 0.6758597208035411, "grad_norm": 1.3109872341156006, "learning_rate": 1e-06, "loss": 0.0068, "step": 3970 }, { "epoch": 0.6760299625468165, "grad_norm": 0.9485663771629333, "learning_rate": 1e-06, "loss": 0.0077, "step": 3971 }, { "epoch": 0.676200204290092, "grad_norm": 0.8911463618278503, "learning_rate": 1e-06, "loss": 0.0069, "step": 3972 }, { "epoch": 0.6763704460333674, "grad_norm": 0.908300518989563, "learning_rate": 1e-06, "loss": 0.0073, "step": 3973 }, { "epoch": 0.6765406877766428, "grad_norm": 0.9515292644500732, "learning_rate": 1e-06, "loss": 0.0078, "step": 3974 }, { "epoch": 0.6767109295199183, "grad_norm": 1.1426764726638794, "learning_rate": 1e-06, "loss": 0.0085, "step": 3975 }, { "epoch": 0.6768811712631937, "grad_norm": 1.1612918376922607, "learning_rate": 1e-06, "loss": 0.0117, "step": 3976 }, { "epoch": 0.6770514130064692, "grad_norm": 0.757738471031189, "learning_rate": 1e-06, "loss": 0.0056, "step": 3977 }, { "epoch": 0.6772216547497446, "grad_norm": 1.1565226316452026, "learning_rate": 1e-06, "loss": 0.0062, "step": 3978 }, { "epoch": 0.6773918964930201, "grad_norm": 1.3450309038162231, "learning_rate": 1e-06, "loss": 0.0113, "step": 3979 }, { "epoch": 0.6775621382362955, "grad_norm": 1.1107460260391235, "learning_rate": 1e-06, "loss": 0.0102, "step": 3980 }, { "epoch": 0.677732379979571, "grad_norm": 1.5839945077896118, "learning_rate": 1e-06, "loss": 0.0118, "step": 3981 }, { "epoch": 0.6779026217228464, "grad_norm": 1.143868088722229, "learning_rate": 1e-06, "loss": 0.0079, "step": 3982 }, { "epoch": 0.678072863466122, "grad_norm": 1.0564833879470825, "learning_rate": 1e-06, "loss": 0.0064, "step": 3983 }, { "epoch": 0.6782431052093973, "grad_norm": 1.1876205205917358, "learning_rate": 1e-06, "loss": 0.0073, "step": 3984 }, { "epoch": 0.6784133469526727, "grad_norm": 1.2426191568374634, "learning_rate": 1e-06, "loss": 0.0135, "step": 3985 }, { "epoch": 0.6785835886959483, "grad_norm": 1.0779424905776978, "learning_rate": 1e-06, "loss": 0.0081, "step": 3986 }, { "epoch": 0.6787538304392237, "grad_norm": 1.3028185367584229, "learning_rate": 1e-06, "loss": 0.0068, "step": 3987 }, { "epoch": 0.6789240721824992, "grad_norm": 1.04373300075531, "learning_rate": 1e-06, "loss": 0.0089, "step": 3988 }, { "epoch": 0.6790943139257746, "grad_norm": 0.9343636631965637, "learning_rate": 1e-06, "loss": 0.0084, "step": 3989 }, { "epoch": 0.6792645556690501, "grad_norm": 1.0329524278640747, "learning_rate": 1e-06, "loss": 0.0081, "step": 3990 }, { "epoch": 0.6794347974123255, "grad_norm": 0.9664596319198608, "learning_rate": 1e-06, "loss": 0.0077, "step": 3991 }, { "epoch": 0.679605039155601, "grad_norm": 0.9661188721656799, "learning_rate": 1e-06, "loss": 0.0071, "step": 3992 }, { "epoch": 0.6797752808988764, "grad_norm": 1.0348271131515503, "learning_rate": 1e-06, "loss": 0.008, "step": 3993 }, { "epoch": 0.6799455226421518, "grad_norm": 0.9838294386863708, "learning_rate": 1e-06, "loss": 0.0056, "step": 3994 }, { "epoch": 0.6801157643854273, "grad_norm": 1.4004563093185425, "learning_rate": 1e-06, "loss": 0.01, "step": 3995 }, { "epoch": 0.6802860061287027, "grad_norm": 0.8205869793891907, "learning_rate": 1e-06, "loss": 0.0053, "step": 3996 }, { "epoch": 0.6804562478719782, "grad_norm": 0.8853178024291992, "learning_rate": 1e-06, "loss": 0.007, "step": 3997 }, { "epoch": 0.6806264896152536, "grad_norm": 0.8219318985939026, "learning_rate": 1e-06, "loss": 0.0076, "step": 3998 }, { "epoch": 0.6807967313585291, "grad_norm": 1.133307695388794, "learning_rate": 1e-06, "loss": 0.0068, "step": 3999 }, { "epoch": 0.6809669731018045, "grad_norm": 0.9907300472259521, "learning_rate": 1e-06, "loss": 0.0083, "step": 4000 }, { "epoch": 0.6809669731018045, "eval_loss": 0.34189823269844055, "eval_runtime": 23.8668, "eval_samples_per_second": 12.57, "eval_steps_per_second": 0.335, "step": 4000 } ], "logging_steps": 1.0, "max_steps": 17622, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "total_flos": 2.3577906965334983e+19, "train_batch_size": 5, "trial_name": null, "trial_params": null }