{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00015082956259426848, "grad_norm": 5.274559497833252, "learning_rate": 1.0000000000000002e-06, "loss": 5.9964, "step": 1 }, { "epoch": 0.00030165912518853697, "grad_norm": 6.214627265930176, "learning_rate": 2.0000000000000003e-06, "loss": 6.5417, "step": 2 }, { "epoch": 0.00045248868778280545, "grad_norm": 6.134393692016602, "learning_rate": 3e-06, "loss": 6.5564, "step": 3 }, { "epoch": 0.0006033182503770739, "grad_norm": 5.790661334991455, "learning_rate": 4.000000000000001e-06, "loss": 6.1761, "step": 4 }, { "epoch": 0.0007541478129713424, "grad_norm": 6.244629859924316, "learning_rate": 5e-06, "loss": 6.5931, "step": 5 }, { "epoch": 0.0009049773755656109, "grad_norm": 5.828901767730713, "learning_rate": 6e-06, "loss": 5.8984, "step": 6 }, { "epoch": 0.0010558069381598793, "grad_norm": 6.0552449226379395, "learning_rate": 7.000000000000001e-06, "loss": 6.382, "step": 7 }, { "epoch": 0.0012066365007541479, "grad_norm": 6.195773124694824, "learning_rate": 8.000000000000001e-06, "loss": 6.7115, "step": 8 }, { "epoch": 0.0013574660633484162, "grad_norm": 6.3292999267578125, "learning_rate": 9e-06, "loss": 6.4155, "step": 9 }, { "epoch": 0.0015082956259426848, "grad_norm": 6.532902240753174, "learning_rate": 1e-05, "loss": 6.6551, "step": 10 }, { "epoch": 0.0016591251885369532, "grad_norm": 6.543457508087158, "learning_rate": 1.1000000000000001e-05, "loss": 5.9875, "step": 11 }, { "epoch": 0.0018099547511312218, "grad_norm": 6.736660957336426, "learning_rate": 1.2e-05, "loss": 6.8337, "step": 12 }, { "epoch": 0.00196078431372549, "grad_norm": 6.12073278427124, "learning_rate": 1.3000000000000001e-05, "loss": 6.2802, "step": 13 }, { "epoch": 0.0021116138763197585, "grad_norm": 6.283128261566162, "learning_rate": 1.4000000000000001e-05, "loss": 6.274, "step": 14 }, { "epoch": 0.0022624434389140274, "grad_norm": 6.165809154510498, "learning_rate": 1.5e-05, "loss": 5.7747, "step": 15 }, { "epoch": 0.0024132730015082957, "grad_norm": 6.3602094650268555, "learning_rate": 1.6000000000000003e-05, "loss": 5.9182, "step": 16 }, { "epoch": 0.002564102564102564, "grad_norm": 5.831494331359863, "learning_rate": 1.7000000000000003e-05, "loss": 5.7483, "step": 17 }, { "epoch": 0.0027149321266968325, "grad_norm": 5.68951416015625, "learning_rate": 1.8e-05, "loss": 5.4224, "step": 18 }, { "epoch": 0.002865761689291101, "grad_norm": 5.434991359710693, "learning_rate": 1.9e-05, "loss": 4.8335, "step": 19 }, { "epoch": 0.0030165912518853697, "grad_norm": 5.204574108123779, "learning_rate": 2e-05, "loss": 5.0465, "step": 20 }, { "epoch": 0.003167420814479638, "grad_norm": 4.821778297424316, "learning_rate": 2.1e-05, "loss": 4.8574, "step": 21 }, { "epoch": 0.0033182503770739064, "grad_norm": 4.472037315368652, "learning_rate": 2.2000000000000003e-05, "loss": 4.5344, "step": 22 }, { "epoch": 0.0034690799396681748, "grad_norm": 4.2375264167785645, "learning_rate": 2.3000000000000003e-05, "loss": 3.662, "step": 23 }, { "epoch": 0.0036199095022624436, "grad_norm": 4.155740261077881, "learning_rate": 2.4e-05, "loss": 4.0496, "step": 24 }, { "epoch": 0.003770739064856712, "grad_norm": 3.8859496116638184, "learning_rate": 2.5e-05, "loss": 3.7169, "step": 25 }, { "epoch": 0.00392156862745098, "grad_norm": 4.007806777954102, "learning_rate": 2.6000000000000002e-05, "loss": 4.2186, "step": 26 }, { "epoch": 0.004072398190045249, "grad_norm": 3.8781261444091797, "learning_rate": 2.7000000000000002e-05, "loss": 3.6404, "step": 27 }, { "epoch": 0.004223227752639517, "grad_norm": 4.123991966247559, "learning_rate": 2.8000000000000003e-05, "loss": 3.3886, "step": 28 }, { "epoch": 0.004374057315233786, "grad_norm": 3.7035181522369385, "learning_rate": 2.9e-05, "loss": 3.2269, "step": 29 }, { "epoch": 0.004524886877828055, "grad_norm": 3.790928602218628, "learning_rate": 3e-05, "loss": 3.0236, "step": 30 }, { "epoch": 0.004675716440422323, "grad_norm": 4.267747402191162, "learning_rate": 3.1e-05, "loss": 3.0719, "step": 31 }, { "epoch": 0.0048265460030165915, "grad_norm": 4.406635761260986, "learning_rate": 3.2000000000000005e-05, "loss": 3.394, "step": 32 }, { "epoch": 0.004977375565610859, "grad_norm": 3.7168877124786377, "learning_rate": 3.3e-05, "loss": 2.6942, "step": 33 }, { "epoch": 0.005128205128205128, "grad_norm": 4.05290412902832, "learning_rate": 3.4000000000000007e-05, "loss": 2.6303, "step": 34 }, { "epoch": 0.005279034690799397, "grad_norm": 4.259477615356445, "learning_rate": 3.5e-05, "loss": 2.6707, "step": 35 }, { "epoch": 0.005429864253393665, "grad_norm": 3.788215160369873, "learning_rate": 3.6e-05, "loss": 2.3549, "step": 36 }, { "epoch": 0.005580693815987934, "grad_norm": 4.48261022567749, "learning_rate": 3.7e-05, "loss": 3.0293, "step": 37 }, { "epoch": 0.005731523378582202, "grad_norm": 4.056379318237305, "learning_rate": 3.8e-05, "loss": 2.4605, "step": 38 }, { "epoch": 0.0058823529411764705, "grad_norm": 3.307687282562256, "learning_rate": 3.9000000000000006e-05, "loss": 2.1057, "step": 39 }, { "epoch": 0.006033182503770739, "grad_norm": 3.773874044418335, "learning_rate": 4e-05, "loss": 2.4257, "step": 40 }, { "epoch": 0.006184012066365007, "grad_norm": 3.7316606044769287, "learning_rate": 4.1e-05, "loss": 2.4968, "step": 41 }, { "epoch": 0.006334841628959276, "grad_norm": 3.976229667663574, "learning_rate": 4.2e-05, "loss": 2.6619, "step": 42 }, { "epoch": 0.006485671191553545, "grad_norm": 3.7572786808013916, "learning_rate": 4.3e-05, "loss": 2.0168, "step": 43 }, { "epoch": 0.006636500754147813, "grad_norm": 3.526514768600464, "learning_rate": 4.4000000000000006e-05, "loss": 2.167, "step": 44 }, { "epoch": 0.006787330316742082, "grad_norm": 3.8320441246032715, "learning_rate": 4.5e-05, "loss": 2.1766, "step": 45 }, { "epoch": 0.0069381598793363496, "grad_norm": 5.8103837966918945, "learning_rate": 4.600000000000001e-05, "loss": 2.2991, "step": 46 }, { "epoch": 0.007088989441930618, "grad_norm": 7.584037780761719, "learning_rate": 4.7e-05, "loss": 1.9716, "step": 47 }, { "epoch": 0.007239819004524887, "grad_norm": 5.774935722351074, "learning_rate": 4.8e-05, "loss": 2.2881, "step": 48 }, { "epoch": 0.007390648567119155, "grad_norm": 3.179358959197998, "learning_rate": 4.9e-05, "loss": 1.5412, "step": 49 }, { "epoch": 0.007541478129713424, "grad_norm": 3.4190738201141357, "learning_rate": 5e-05, "loss": 2.0032, "step": 50 }, { "epoch": 0.007692307692307693, "grad_norm": 3.5771982669830322, "learning_rate": 5.1000000000000006e-05, "loss": 2.71, "step": 51 }, { "epoch": 0.00784313725490196, "grad_norm": 2.8963332176208496, "learning_rate": 5.2000000000000004e-05, "loss": 1.9256, "step": 52 }, { "epoch": 0.007993966817496229, "grad_norm": 2.9542369842529297, "learning_rate": 5.300000000000001e-05, "loss": 2.6429, "step": 53 }, { "epoch": 0.008144796380090498, "grad_norm": 3.0134377479553223, "learning_rate": 5.4000000000000005e-05, "loss": 2.0889, "step": 54 }, { "epoch": 0.008295625942684766, "grad_norm": 3.232659339904785, "learning_rate": 5.500000000000001e-05, "loss": 2.2611, "step": 55 }, { "epoch": 0.008446455505279034, "grad_norm": 3.289823293685913, "learning_rate": 5.6000000000000006e-05, "loss": 2.1106, "step": 56 }, { "epoch": 0.008597285067873304, "grad_norm": 3.1317858695983887, "learning_rate": 5.6999999999999996e-05, "loss": 2.505, "step": 57 }, { "epoch": 0.008748114630467572, "grad_norm": 2.8002500534057617, "learning_rate": 5.8e-05, "loss": 2.4077, "step": 58 }, { "epoch": 0.00889894419306184, "grad_norm": 2.576694965362549, "learning_rate": 5.9e-05, "loss": 2.1261, "step": 59 }, { "epoch": 0.00904977375565611, "grad_norm": 2.770052194595337, "learning_rate": 6e-05, "loss": 2.5213, "step": 60 }, { "epoch": 0.009200603318250377, "grad_norm": 2.617337465286255, "learning_rate": 6.1e-05, "loss": 2.0172, "step": 61 }, { "epoch": 0.009351432880844645, "grad_norm": 3.009204626083374, "learning_rate": 6.2e-05, "loss": 2.4191, "step": 62 }, { "epoch": 0.009502262443438913, "grad_norm": 2.585763692855835, "learning_rate": 6.3e-05, "loss": 1.8121, "step": 63 }, { "epoch": 0.009653092006033183, "grad_norm": 2.8127567768096924, "learning_rate": 6.400000000000001e-05, "loss": 1.9557, "step": 64 }, { "epoch": 0.00980392156862745, "grad_norm": 3.087707757949829, "learning_rate": 6.500000000000001e-05, "loss": 2.4427, "step": 65 }, { "epoch": 0.009954751131221719, "grad_norm": 2.914344072341919, "learning_rate": 6.6e-05, "loss": 2.6976, "step": 66 }, { "epoch": 0.010105580693815988, "grad_norm": 2.4512031078338623, "learning_rate": 6.7e-05, "loss": 1.7943, "step": 67 }, { "epoch": 0.010256410256410256, "grad_norm": 2.200735092163086, "learning_rate": 6.800000000000001e-05, "loss": 1.4525, "step": 68 }, { "epoch": 0.010407239819004524, "grad_norm": 2.5984487533569336, "learning_rate": 6.9e-05, "loss": 1.5063, "step": 69 }, { "epoch": 0.010558069381598794, "grad_norm": 3.276723623275757, "learning_rate": 7e-05, "loss": 1.9314, "step": 70 }, { "epoch": 0.010708898944193062, "grad_norm": 2.949026107788086, "learning_rate": 7.1e-05, "loss": 2.3046, "step": 71 }, { "epoch": 0.01085972850678733, "grad_norm": 2.9635751247406006, "learning_rate": 7.2e-05, "loss": 1.8196, "step": 72 }, { "epoch": 0.0110105580693816, "grad_norm": 3.7988672256469727, "learning_rate": 7.3e-05, "loss": 2.2269, "step": 73 }, { "epoch": 0.011161387631975868, "grad_norm": 2.638552188873291, "learning_rate": 7.4e-05, "loss": 1.53, "step": 74 }, { "epoch": 0.011312217194570135, "grad_norm": 2.4735448360443115, "learning_rate": 7.500000000000001e-05, "loss": 1.5379, "step": 75 }, { "epoch": 0.011463046757164403, "grad_norm": 2.941606044769287, "learning_rate": 7.6e-05, "loss": 1.8521, "step": 76 }, { "epoch": 0.011613876319758673, "grad_norm": 2.710932493209839, "learning_rate": 7.7e-05, "loss": 1.773, "step": 77 }, { "epoch": 0.011764705882352941, "grad_norm": 2.8382749557495117, "learning_rate": 7.800000000000001e-05, "loss": 1.8868, "step": 78 }, { "epoch": 0.011915535444947209, "grad_norm": 2.7556474208831787, "learning_rate": 7.900000000000001e-05, "loss": 1.9486, "step": 79 }, { "epoch": 0.012066365007541479, "grad_norm": 3.050816774368286, "learning_rate": 8e-05, "loss": 2.034, "step": 80 }, { "epoch": 0.012217194570135747, "grad_norm": 2.713569164276123, "learning_rate": 8.1e-05, "loss": 2.0982, "step": 81 }, { "epoch": 0.012368024132730015, "grad_norm": 2.801042318344116, "learning_rate": 8.2e-05, "loss": 1.8621, "step": 82 }, { "epoch": 0.012518853695324284, "grad_norm": 2.4068610668182373, "learning_rate": 8.3e-05, "loss": 1.5094, "step": 83 }, { "epoch": 0.012669683257918552, "grad_norm": 2.6680634021759033, "learning_rate": 8.4e-05, "loss": 2.0363, "step": 84 }, { "epoch": 0.01282051282051282, "grad_norm": 2.6560962200164795, "learning_rate": 8.5e-05, "loss": 1.948, "step": 85 }, { "epoch": 0.01297134238310709, "grad_norm": 2.5324976444244385, "learning_rate": 8.6e-05, "loss": 1.5887, "step": 86 }, { "epoch": 0.013122171945701358, "grad_norm": 2.8241560459136963, "learning_rate": 8.7e-05, "loss": 1.2917, "step": 87 }, { "epoch": 0.013273001508295626, "grad_norm": 3.0698764324188232, "learning_rate": 8.800000000000001e-05, "loss": 1.9339, "step": 88 }, { "epoch": 0.013423831070889894, "grad_norm": 3.1944687366485596, "learning_rate": 8.900000000000001e-05, "loss": 2.1897, "step": 89 }, { "epoch": 0.013574660633484163, "grad_norm": 3.135181427001953, "learning_rate": 9e-05, "loss": 2.0402, "step": 90 }, { "epoch": 0.013725490196078431, "grad_norm": 3.0618715286254883, "learning_rate": 9.1e-05, "loss": 1.7574, "step": 91 }, { "epoch": 0.013876319758672699, "grad_norm": 3.2244045734405518, "learning_rate": 9.200000000000001e-05, "loss": 2.1998, "step": 92 }, { "epoch": 0.014027149321266969, "grad_norm": 2.9624483585357666, "learning_rate": 9.300000000000001e-05, "loss": 1.6039, "step": 93 }, { "epoch": 0.014177978883861237, "grad_norm": 2.7207741737365723, "learning_rate": 9.4e-05, "loss": 1.676, "step": 94 }, { "epoch": 0.014328808446455505, "grad_norm": 2.7867345809936523, "learning_rate": 9.5e-05, "loss": 1.521, "step": 95 }, { "epoch": 0.014479638009049774, "grad_norm": 3.4219319820404053, "learning_rate": 9.6e-05, "loss": 1.4554, "step": 96 }, { "epoch": 0.014630467571644042, "grad_norm": 6.275210857391357, "learning_rate": 9.7e-05, "loss": 1.9413, "step": 97 }, { "epoch": 0.01478129713423831, "grad_norm": 2.9849274158477783, "learning_rate": 9.8e-05, "loss": 1.3216, "step": 98 }, { "epoch": 0.01493212669683258, "grad_norm": 3.1904163360595703, "learning_rate": 9.900000000000001e-05, "loss": 2.0743, "step": 99 }, { "epoch": 0.015082956259426848, "grad_norm": 2.812879800796509, "learning_rate": 0.0001, "loss": 1.3856, "step": 100 }, { "epoch": 0.015233785822021116, "grad_norm": 2.3076891899108887, "learning_rate": 9.99999942135343e-05, "loss": 1.8725, "step": 101 }, { "epoch": 0.015384615384615385, "grad_norm": 2.5048635005950928, "learning_rate": 9.999997685413854e-05, "loss": 1.671, "step": 102 }, { "epoch": 0.015535444947209653, "grad_norm": 2.869196653366089, "learning_rate": 9.999994792181674e-05, "loss": 2.1263, "step": 103 }, { "epoch": 0.01568627450980392, "grad_norm": 2.2507481575012207, "learning_rate": 9.999990741657558e-05, "loss": 1.9334, "step": 104 }, { "epoch": 0.01583710407239819, "grad_norm": 2.201056718826294, "learning_rate": 9.999985533842447e-05, "loss": 1.932, "step": 105 }, { "epoch": 0.015987933634992457, "grad_norm": 2.2397043704986572, "learning_rate": 9.999979168737544e-05, "loss": 1.846, "step": 106 }, { "epoch": 0.01613876319758673, "grad_norm": 2.7901651859283447, "learning_rate": 9.999971646344322e-05, "loss": 2.4966, "step": 107 }, { "epoch": 0.016289592760180997, "grad_norm": 2.2456893920898438, "learning_rate": 9.999962966664522e-05, "loss": 1.7409, "step": 108 }, { "epoch": 0.016440422322775265, "grad_norm": 2.255948781967163, "learning_rate": 9.999953129700155e-05, "loss": 1.681, "step": 109 }, { "epoch": 0.016591251885369532, "grad_norm": 2.2496795654296875, "learning_rate": 9.999942135453495e-05, "loss": 1.5265, "step": 110 }, { "epoch": 0.0167420814479638, "grad_norm": 2.0057170391082764, "learning_rate": 9.999929983927089e-05, "loss": 1.5787, "step": 111 }, { "epoch": 0.01689291101055807, "grad_norm": 2.294450044631958, "learning_rate": 9.999916675123748e-05, "loss": 1.7564, "step": 112 }, { "epoch": 0.017043740573152336, "grad_norm": 2.409651041030884, "learning_rate": 9.999902209046554e-05, "loss": 2.103, "step": 113 }, { "epoch": 0.017194570135746608, "grad_norm": 2.354245185852051, "learning_rate": 9.999886585698854e-05, "loss": 1.6491, "step": 114 }, { "epoch": 0.017345399698340876, "grad_norm": 2.1762161254882812, "learning_rate": 9.999869805084266e-05, "loss": 1.4992, "step": 115 }, { "epoch": 0.017496229260935144, "grad_norm": 2.4508519172668457, "learning_rate": 9.999851867206674e-05, "loss": 1.8044, "step": 116 }, { "epoch": 0.01764705882352941, "grad_norm": 2.3584420680999756, "learning_rate": 9.999832772070226e-05, "loss": 1.2041, "step": 117 }, { "epoch": 0.01779788838612368, "grad_norm": 2.590296983718872, "learning_rate": 9.999812519679343e-05, "loss": 1.7463, "step": 118 }, { "epoch": 0.017948717948717947, "grad_norm": 2.2447705268859863, "learning_rate": 9.999791110038716e-05, "loss": 1.5905, "step": 119 }, { "epoch": 0.01809954751131222, "grad_norm": 2.485983371734619, "learning_rate": 9.9997685431533e-05, "loss": 1.938, "step": 120 }, { "epoch": 0.018250377073906487, "grad_norm": 2.4634485244750977, "learning_rate": 9.999744819028314e-05, "loss": 1.7497, "step": 121 }, { "epoch": 0.018401206636500755, "grad_norm": 2.468576431274414, "learning_rate": 9.999719937669254e-05, "loss": 1.6973, "step": 122 }, { "epoch": 0.018552036199095023, "grad_norm": 2.4422619342803955, "learning_rate": 9.999693899081874e-05, "loss": 2.3023, "step": 123 }, { "epoch": 0.01870286576168929, "grad_norm": 2.2130205631256104, "learning_rate": 9.999666703272206e-05, "loss": 1.2381, "step": 124 }, { "epoch": 0.01885369532428356, "grad_norm": 2.5542964935302734, "learning_rate": 9.999638350246543e-05, "loss": 2.1145, "step": 125 }, { "epoch": 0.019004524886877826, "grad_norm": 2.357135772705078, "learning_rate": 9.999608840011445e-05, "loss": 1.8523, "step": 126 }, { "epoch": 0.019155354449472098, "grad_norm": 2.1701672077178955, "learning_rate": 9.999578172573745e-05, "loss": 1.7645, "step": 127 }, { "epoch": 0.019306184012066366, "grad_norm": 2.4496967792510986, "learning_rate": 9.99954634794054e-05, "loss": 1.7748, "step": 128 }, { "epoch": 0.019457013574660634, "grad_norm": 2.200291156768799, "learning_rate": 9.999513366119197e-05, "loss": 1.5115, "step": 129 }, { "epoch": 0.0196078431372549, "grad_norm": 2.76247239112854, "learning_rate": 9.99947922711735e-05, "loss": 1.6852, "step": 130 }, { "epoch": 0.01975867269984917, "grad_norm": 2.147261619567871, "learning_rate": 9.999443930942902e-05, "loss": 1.5494, "step": 131 }, { "epoch": 0.019909502262443438, "grad_norm": 2.534330129623413, "learning_rate": 9.99940747760402e-05, "loss": 1.9683, "step": 132 }, { "epoch": 0.02006033182503771, "grad_norm": 2.7945022583007812, "learning_rate": 9.99936986710914e-05, "loss": 2.0405, "step": 133 }, { "epoch": 0.020211161387631977, "grad_norm": 2.2911410331726074, "learning_rate": 9.999331099466972e-05, "loss": 1.6036, "step": 134 }, { "epoch": 0.020361990950226245, "grad_norm": 2.1557750701904297, "learning_rate": 9.999291174686486e-05, "loss": 1.2333, "step": 135 }, { "epoch": 0.020512820512820513, "grad_norm": 2.4809930324554443, "learning_rate": 9.999250092776924e-05, "loss": 1.8804, "step": 136 }, { "epoch": 0.02066365007541478, "grad_norm": 2.8033816814422607, "learning_rate": 9.999207853747794e-05, "loss": 2.1672, "step": 137 }, { "epoch": 0.02081447963800905, "grad_norm": 2.4443202018737793, "learning_rate": 9.999164457608874e-05, "loss": 1.5243, "step": 138 }, { "epoch": 0.020965309200603317, "grad_norm": 2.269228219985962, "learning_rate": 9.999119904370206e-05, "loss": 1.4527, "step": 139 }, { "epoch": 0.021116138763197588, "grad_norm": 2.2829506397247314, "learning_rate": 9.999074194042106e-05, "loss": 1.3404, "step": 140 }, { "epoch": 0.021266968325791856, "grad_norm": 2.5040829181671143, "learning_rate": 9.99902732663515e-05, "loss": 1.8195, "step": 141 }, { "epoch": 0.021417797888386124, "grad_norm": 2.70528244972229, "learning_rate": 9.998979302160188e-05, "loss": 1.643, "step": 142 }, { "epoch": 0.021568627450980392, "grad_norm": 3.1368956565856934, "learning_rate": 9.998930120628335e-05, "loss": 2.255, "step": 143 }, { "epoch": 0.02171945701357466, "grad_norm": 2.7998604774475098, "learning_rate": 9.998879782050976e-05, "loss": 1.5081, "step": 144 }, { "epoch": 0.021870286576168928, "grad_norm": 3.3889834880828857, "learning_rate": 9.998828286439761e-05, "loss": 1.4402, "step": 145 }, { "epoch": 0.0220211161387632, "grad_norm": 4.711235523223877, "learning_rate": 9.998775633806607e-05, "loss": 1.4027, "step": 146 }, { "epoch": 0.022171945701357467, "grad_norm": 2.937638521194458, "learning_rate": 9.998721824163706e-05, "loss": 1.4237, "step": 147 }, { "epoch": 0.022322775263951735, "grad_norm": 2.155961751937866, "learning_rate": 9.998666857523509e-05, "loss": 1.2546, "step": 148 }, { "epoch": 0.022473604826546003, "grad_norm": 2.2661139965057373, "learning_rate": 9.998610733898739e-05, "loss": 1.297, "step": 149 }, { "epoch": 0.02262443438914027, "grad_norm": 3.1641366481781006, "learning_rate": 9.998553453302387e-05, "loss": 1.7331, "step": 150 }, { "epoch": 0.02277526395173454, "grad_norm": 2.662306070327759, "learning_rate": 9.998495015747708e-05, "loss": 1.7831, "step": 151 }, { "epoch": 0.022926093514328807, "grad_norm": 2.4125866889953613, "learning_rate": 9.998435421248234e-05, "loss": 1.4689, "step": 152 }, { "epoch": 0.023076923076923078, "grad_norm": 2.411210536956787, "learning_rate": 9.998374669817754e-05, "loss": 1.8493, "step": 153 }, { "epoch": 0.023227752639517346, "grad_norm": 2.533069610595703, "learning_rate": 9.99831276147033e-05, "loss": 1.6749, "step": 154 }, { "epoch": 0.023378582202111614, "grad_norm": 3.169226884841919, "learning_rate": 9.998249696220292e-05, "loss": 2.4669, "step": 155 }, { "epoch": 0.023529411764705882, "grad_norm": 2.9307048320770264, "learning_rate": 9.998185474082238e-05, "loss": 2.3159, "step": 156 }, { "epoch": 0.02368024132730015, "grad_norm": 2.2841804027557373, "learning_rate": 9.998120095071031e-05, "loss": 1.9272, "step": 157 }, { "epoch": 0.023831070889894418, "grad_norm": 2.242295503616333, "learning_rate": 9.998053559201806e-05, "loss": 1.8297, "step": 158 }, { "epoch": 0.02398190045248869, "grad_norm": 2.1146342754364014, "learning_rate": 9.997985866489959e-05, "loss": 1.6142, "step": 159 }, { "epoch": 0.024132730015082957, "grad_norm": 2.2620275020599365, "learning_rate": 9.997917016951162e-05, "loss": 1.6243, "step": 160 }, { "epoch": 0.024283559577677225, "grad_norm": 2.8224666118621826, "learning_rate": 9.99784701060135e-05, "loss": 1.8909, "step": 161 }, { "epoch": 0.024434389140271493, "grad_norm": 2.3511710166931152, "learning_rate": 9.997775847456723e-05, "loss": 1.8913, "step": 162 }, { "epoch": 0.02458521870286576, "grad_norm": 1.8456021547317505, "learning_rate": 9.997703527533758e-05, "loss": 1.1458, "step": 163 }, { "epoch": 0.02473604826546003, "grad_norm": 2.316385269165039, "learning_rate": 9.997630050849189e-05, "loss": 1.7794, "step": 164 }, { "epoch": 0.024886877828054297, "grad_norm": 2.0768957138061523, "learning_rate": 9.997555417420028e-05, "loss": 1.67, "step": 165 }, { "epoch": 0.02503770739064857, "grad_norm": 2.0877885818481445, "learning_rate": 9.997479627263544e-05, "loss": 1.5265, "step": 166 }, { "epoch": 0.025188536953242836, "grad_norm": 2.2749664783477783, "learning_rate": 9.997402680397285e-05, "loss": 1.6557, "step": 167 }, { "epoch": 0.025339366515837104, "grad_norm": 1.9519391059875488, "learning_rate": 9.997324576839056e-05, "loss": 1.4024, "step": 168 }, { "epoch": 0.025490196078431372, "grad_norm": 2.412795305252075, "learning_rate": 9.997245316606936e-05, "loss": 1.9962, "step": 169 }, { "epoch": 0.02564102564102564, "grad_norm": 1.8434703350067139, "learning_rate": 9.997164899719273e-05, "loss": 1.2101, "step": 170 }, { "epoch": 0.025791855203619908, "grad_norm": 2.29972767829895, "learning_rate": 9.997083326194678e-05, "loss": 1.7666, "step": 171 }, { "epoch": 0.02594268476621418, "grad_norm": 2.411770820617676, "learning_rate": 9.997000596052033e-05, "loss": 1.6606, "step": 172 }, { "epoch": 0.026093514328808447, "grad_norm": 2.402708053588867, "learning_rate": 9.996916709310485e-05, "loss": 1.6377, "step": 173 }, { "epoch": 0.026244343891402715, "grad_norm": 2.3925929069519043, "learning_rate": 9.99683166598945e-05, "loss": 1.5897, "step": 174 }, { "epoch": 0.026395173453996983, "grad_norm": 2.4539906978607178, "learning_rate": 9.996745466108615e-05, "loss": 1.7966, "step": 175 }, { "epoch": 0.02654600301659125, "grad_norm": 2.370915651321411, "learning_rate": 9.99665810968793e-05, "loss": 1.6598, "step": 176 }, { "epoch": 0.02669683257918552, "grad_norm": 2.046102285385132, "learning_rate": 9.996569596747614e-05, "loss": 1.4218, "step": 177 }, { "epoch": 0.026847662141779787, "grad_norm": 2.3283424377441406, "learning_rate": 9.996479927308154e-05, "loss": 1.4797, "step": 178 }, { "epoch": 0.02699849170437406, "grad_norm": 2.4158549308776855, "learning_rate": 9.996389101390306e-05, "loss": 1.6722, "step": 179 }, { "epoch": 0.027149321266968326, "grad_norm": 2.464944362640381, "learning_rate": 9.99629711901509e-05, "loss": 1.9407, "step": 180 }, { "epoch": 0.027300150829562594, "grad_norm": 2.6098389625549316, "learning_rate": 9.9962039802038e-05, "loss": 1.6917, "step": 181 }, { "epoch": 0.027450980392156862, "grad_norm": 2.3715851306915283, "learning_rate": 9.99610968497799e-05, "loss": 1.4895, "step": 182 }, { "epoch": 0.02760180995475113, "grad_norm": 2.2832272052764893, "learning_rate": 9.996014233359487e-05, "loss": 1.3671, "step": 183 }, { "epoch": 0.027752639517345398, "grad_norm": 2.825448513031006, "learning_rate": 9.995917625370385e-05, "loss": 1.9499, "step": 184 }, { "epoch": 0.02790346907993967, "grad_norm": 2.229189872741699, "learning_rate": 9.995819861033042e-05, "loss": 1.3514, "step": 185 }, { "epoch": 0.028054298642533938, "grad_norm": 2.785944700241089, "learning_rate": 9.99572094037009e-05, "loss": 1.8477, "step": 186 }, { "epoch": 0.028205128205128206, "grad_norm": 2.806279420852661, "learning_rate": 9.995620863404422e-05, "loss": 1.6443, "step": 187 }, { "epoch": 0.028355957767722473, "grad_norm": 2.6953282356262207, "learning_rate": 9.995519630159205e-05, "loss": 1.8321, "step": 188 }, { "epoch": 0.02850678733031674, "grad_norm": 2.493748188018799, "learning_rate": 9.995417240657868e-05, "loss": 1.6282, "step": 189 }, { "epoch": 0.02865761689291101, "grad_norm": 2.371903657913208, "learning_rate": 9.995313694924107e-05, "loss": 1.6715, "step": 190 }, { "epoch": 0.028808446455505277, "grad_norm": 2.5447800159454346, "learning_rate": 9.995208992981894e-05, "loss": 1.7113, "step": 191 }, { "epoch": 0.02895927601809955, "grad_norm": 2.1391799449920654, "learning_rate": 9.995103134855462e-05, "loss": 1.3489, "step": 192 }, { "epoch": 0.029110105580693817, "grad_norm": 2.256561279296875, "learning_rate": 9.99499612056931e-05, "loss": 1.4702, "step": 193 }, { "epoch": 0.029260935143288085, "grad_norm": 3.1700937747955322, "learning_rate": 9.99488795014821e-05, "loss": 2.396, "step": 194 }, { "epoch": 0.029411764705882353, "grad_norm": 2.259786605834961, "learning_rate": 9.994778623617196e-05, "loss": 1.2666, "step": 195 }, { "epoch": 0.02956259426847662, "grad_norm": 3.8225016593933105, "learning_rate": 9.994668141001577e-05, "loss": 1.5071, "step": 196 }, { "epoch": 0.02971342383107089, "grad_norm": 4.042316913604736, "learning_rate": 9.994556502326921e-05, "loss": 1.6841, "step": 197 }, { "epoch": 0.02986425339366516, "grad_norm": 2.5446815490722656, "learning_rate": 9.994443707619071e-05, "loss": 1.4123, "step": 198 }, { "epoch": 0.030015082956259428, "grad_norm": 2.1477596759796143, "learning_rate": 9.994329756904132e-05, "loss": 1.3106, "step": 199 }, { "epoch": 0.030165912518853696, "grad_norm": 2.026958703994751, "learning_rate": 9.99421465020848e-05, "loss": 1.3727, "step": 200 }, { "epoch": 0.030316742081447964, "grad_norm": 1.8753694295883179, "learning_rate": 9.994098387558756e-05, "loss": 1.7661, "step": 201 }, { "epoch": 0.03046757164404223, "grad_norm": 1.9686955213546753, "learning_rate": 9.993980968981873e-05, "loss": 1.4189, "step": 202 }, { "epoch": 0.0306184012066365, "grad_norm": 2.2756576538085938, "learning_rate": 9.993862394505006e-05, "loss": 1.811, "step": 203 }, { "epoch": 0.03076923076923077, "grad_norm": 2.0661325454711914, "learning_rate": 9.9937426641556e-05, "loss": 1.5252, "step": 204 }, { "epoch": 0.03092006033182504, "grad_norm": 2.5643160343170166, "learning_rate": 9.99362177796137e-05, "loss": 1.5346, "step": 205 }, { "epoch": 0.031070889894419307, "grad_norm": 2.062894344329834, "learning_rate": 9.993499735950294e-05, "loss": 1.6086, "step": 206 }, { "epoch": 0.031221719457013575, "grad_norm": 2.1178181171417236, "learning_rate": 9.993376538150619e-05, "loss": 1.8177, "step": 207 }, { "epoch": 0.03137254901960784, "grad_norm": 2.1209819316864014, "learning_rate": 9.993252184590863e-05, "loss": 1.8188, "step": 208 }, { "epoch": 0.03152337858220211, "grad_norm": 2.4639313220977783, "learning_rate": 9.993126675299806e-05, "loss": 2.3022, "step": 209 }, { "epoch": 0.03167420814479638, "grad_norm": 2.0030486583709717, "learning_rate": 9.993000010306499e-05, "loss": 1.4001, "step": 210 }, { "epoch": 0.031825037707390647, "grad_norm": 1.8976800441741943, "learning_rate": 9.99287218964026e-05, "loss": 1.4954, "step": 211 }, { "epoch": 0.031975867269984914, "grad_norm": 1.9887937307357788, "learning_rate": 9.992743213330675e-05, "loss": 1.1259, "step": 212 }, { "epoch": 0.03212669683257918, "grad_norm": 1.9479683637619019, "learning_rate": 9.992613081407595e-05, "loss": 1.3624, "step": 213 }, { "epoch": 0.03227752639517346, "grad_norm": 1.8424012660980225, "learning_rate": 9.992481793901142e-05, "loss": 1.2693, "step": 214 }, { "epoch": 0.032428355957767725, "grad_norm": 2.082228660583496, "learning_rate": 9.992349350841702e-05, "loss": 1.7469, "step": 215 }, { "epoch": 0.03257918552036199, "grad_norm": 2.0636680126190186, "learning_rate": 9.99221575225993e-05, "loss": 1.2391, "step": 216 }, { "epoch": 0.03273001508295626, "grad_norm": 2.3256919384002686, "learning_rate": 9.99208099818675e-05, "loss": 1.658, "step": 217 }, { "epoch": 0.03288084464555053, "grad_norm": 2.269465208053589, "learning_rate": 9.991945088653353e-05, "loss": 1.511, "step": 218 }, { "epoch": 0.0330316742081448, "grad_norm": 2.432229518890381, "learning_rate": 9.991808023691192e-05, "loss": 1.6171, "step": 219 }, { "epoch": 0.033182503770739065, "grad_norm": 2.469449996948242, "learning_rate": 9.991669803331997e-05, "loss": 1.729, "step": 220 }, { "epoch": 0.03333333333333333, "grad_norm": 2.5987584590911865, "learning_rate": 9.991530427607755e-05, "loss": 2.033, "step": 221 }, { "epoch": 0.0334841628959276, "grad_norm": 2.2188456058502197, "learning_rate": 9.991389896550732e-05, "loss": 1.5355, "step": 222 }, { "epoch": 0.03363499245852187, "grad_norm": 2.586286783218384, "learning_rate": 9.99124821019345e-05, "loss": 1.7695, "step": 223 }, { "epoch": 0.03378582202111614, "grad_norm": 2.3551723957061768, "learning_rate": 9.991105368568705e-05, "loss": 1.7357, "step": 224 }, { "epoch": 0.033936651583710405, "grad_norm": 2.14882755279541, "learning_rate": 9.99096137170956e-05, "loss": 1.7452, "step": 225 }, { "epoch": 0.03408748114630467, "grad_norm": 2.0189082622528076, "learning_rate": 9.990816219649343e-05, "loss": 1.2922, "step": 226 }, { "epoch": 0.03423831070889895, "grad_norm": 2.00338077545166, "learning_rate": 9.99066991242165e-05, "loss": 1.3752, "step": 227 }, { "epoch": 0.034389140271493215, "grad_norm": 2.31833553314209, "learning_rate": 9.990522450060349e-05, "loss": 1.8198, "step": 228 }, { "epoch": 0.03453996983408748, "grad_norm": 2.1115901470184326, "learning_rate": 9.990373832599566e-05, "loss": 1.7142, "step": 229 }, { "epoch": 0.03469079939668175, "grad_norm": 2.288539409637451, "learning_rate": 9.990224060073705e-05, "loss": 1.6972, "step": 230 }, { "epoch": 0.03484162895927602, "grad_norm": 2.262002468109131, "learning_rate": 9.990073132517428e-05, "loss": 1.5728, "step": 231 }, { "epoch": 0.03499245852187029, "grad_norm": 2.2425923347473145, "learning_rate": 9.98992104996567e-05, "loss": 1.6509, "step": 232 }, { "epoch": 0.035143288084464555, "grad_norm": 1.7508853673934937, "learning_rate": 9.989767812453632e-05, "loss": 1.1619, "step": 233 }, { "epoch": 0.03529411764705882, "grad_norm": 2.1534414291381836, "learning_rate": 9.989613420016783e-05, "loss": 1.6655, "step": 234 }, { "epoch": 0.03544494720965309, "grad_norm": 2.1205594539642334, "learning_rate": 9.989457872690857e-05, "loss": 1.528, "step": 235 }, { "epoch": 0.03559577677224736, "grad_norm": 1.9857796430587769, "learning_rate": 9.989301170511858e-05, "loss": 1.3221, "step": 236 }, { "epoch": 0.03574660633484163, "grad_norm": 2.4939494132995605, "learning_rate": 9.989143313516053e-05, "loss": 1.8978, "step": 237 }, { "epoch": 0.035897435897435895, "grad_norm": 2.0178327560424805, "learning_rate": 9.988984301739984e-05, "loss": 1.3298, "step": 238 }, { "epoch": 0.03604826546003016, "grad_norm": 1.9922305345535278, "learning_rate": 9.988824135220454e-05, "loss": 1.4038, "step": 239 }, { "epoch": 0.03619909502262444, "grad_norm": 2.2438390254974365, "learning_rate": 9.988662813994533e-05, "loss": 1.3927, "step": 240 }, { "epoch": 0.036349924585218706, "grad_norm": 2.1822104454040527, "learning_rate": 9.988500338099561e-05, "loss": 1.6238, "step": 241 }, { "epoch": 0.036500754147812974, "grad_norm": 2.2070152759552, "learning_rate": 9.988336707573146e-05, "loss": 1.5517, "step": 242 }, { "epoch": 0.03665158371040724, "grad_norm": 2.22825288772583, "learning_rate": 9.98817192245316e-05, "loss": 1.4282, "step": 243 }, { "epoch": 0.03680241327300151, "grad_norm": 2.668522596359253, "learning_rate": 9.988005982777746e-05, "loss": 1.6218, "step": 244 }, { "epoch": 0.03695324283559578, "grad_norm": 2.4360294342041016, "learning_rate": 9.98783888858531e-05, "loss": 1.6916, "step": 245 }, { "epoch": 0.037104072398190045, "grad_norm": 3.6566147804260254, "learning_rate": 9.987670639914526e-05, "loss": 1.6338, "step": 246 }, { "epoch": 0.03725490196078431, "grad_norm": 2.4161393642425537, "learning_rate": 9.987501236804341e-05, "loss": 1.5312, "step": 247 }, { "epoch": 0.03740573152337858, "grad_norm": 2.10469388961792, "learning_rate": 9.987330679293963e-05, "loss": 1.4075, "step": 248 }, { "epoch": 0.03755656108597285, "grad_norm": 1.7964826822280884, "learning_rate": 9.987158967422867e-05, "loss": 1.2318, "step": 249 }, { "epoch": 0.03770739064856712, "grad_norm": 1.7593894004821777, "learning_rate": 9.9869861012308e-05, "loss": 1.0904, "step": 250 }, { "epoch": 0.037858220211161385, "grad_norm": 2.0524063110351562, "learning_rate": 9.986812080757771e-05, "loss": 1.8408, "step": 251 }, { "epoch": 0.03800904977375565, "grad_norm": 2.04956316947937, "learning_rate": 9.986636906044061e-05, "loss": 1.5629, "step": 252 }, { "epoch": 0.03815987933634993, "grad_norm": 1.8530405759811401, "learning_rate": 9.986460577130214e-05, "loss": 1.3675, "step": 253 }, { "epoch": 0.038310708898944196, "grad_norm": 2.195443868637085, "learning_rate": 9.986283094057043e-05, "loss": 1.6049, "step": 254 }, { "epoch": 0.038461538461538464, "grad_norm": 2.109321117401123, "learning_rate": 9.986104456865629e-05, "loss": 1.6686, "step": 255 }, { "epoch": 0.03861236802413273, "grad_norm": 2.249310255050659, "learning_rate": 9.985924665597317e-05, "loss": 1.7184, "step": 256 }, { "epoch": 0.038763197586727, "grad_norm": 2.1419944763183594, "learning_rate": 9.985743720293723e-05, "loss": 1.8925, "step": 257 }, { "epoch": 0.03891402714932127, "grad_norm": 1.8457478284835815, "learning_rate": 9.985561620996729e-05, "loss": 1.5769, "step": 258 }, { "epoch": 0.039064856711915535, "grad_norm": 1.882033109664917, "learning_rate": 9.98537836774848e-05, "loss": 1.249, "step": 259 }, { "epoch": 0.0392156862745098, "grad_norm": 2.0022733211517334, "learning_rate": 9.985193960591395e-05, "loss": 1.3297, "step": 260 }, { "epoch": 0.03936651583710407, "grad_norm": 2.054608106613159, "learning_rate": 9.985008399568156e-05, "loss": 1.7728, "step": 261 }, { "epoch": 0.03951734539969834, "grad_norm": 2.2128407955169678, "learning_rate": 9.984821684721713e-05, "loss": 1.8249, "step": 262 }, { "epoch": 0.03966817496229261, "grad_norm": 1.973047137260437, "learning_rate": 9.984633816095282e-05, "loss": 1.7756, "step": 263 }, { "epoch": 0.039819004524886875, "grad_norm": 1.9727122783660889, "learning_rate": 9.984444793732347e-05, "loss": 1.6329, "step": 264 }, { "epoch": 0.03996983408748114, "grad_norm": 1.9350301027297974, "learning_rate": 9.984254617676656e-05, "loss": 1.4524, "step": 265 }, { "epoch": 0.04012066365007542, "grad_norm": 1.8080114126205444, "learning_rate": 9.984063287972232e-05, "loss": 1.4432, "step": 266 }, { "epoch": 0.040271493212669686, "grad_norm": 2.0837016105651855, "learning_rate": 9.983870804663356e-05, "loss": 1.4688, "step": 267 }, { "epoch": 0.040422322775263954, "grad_norm": 2.3831660747528076, "learning_rate": 9.983677167794582e-05, "loss": 1.6382, "step": 268 }, { "epoch": 0.04057315233785822, "grad_norm": 1.7946358919143677, "learning_rate": 9.98348237741073e-05, "loss": 1.4788, "step": 269 }, { "epoch": 0.04072398190045249, "grad_norm": 2.3515286445617676, "learning_rate": 9.983286433556882e-05, "loss": 1.9936, "step": 270 }, { "epoch": 0.04087481146304676, "grad_norm": 1.8859704732894897, "learning_rate": 9.983089336278395e-05, "loss": 1.2745, "step": 271 }, { "epoch": 0.041025641025641026, "grad_norm": 2.1666197776794434, "learning_rate": 9.982891085620885e-05, "loss": 1.6192, "step": 272 }, { "epoch": 0.041176470588235294, "grad_norm": 2.275212287902832, "learning_rate": 9.982691681630243e-05, "loss": 1.681, "step": 273 }, { "epoch": 0.04132730015082956, "grad_norm": 2.3809397220611572, "learning_rate": 9.98249112435262e-05, "loss": 1.9978, "step": 274 }, { "epoch": 0.04147812971342383, "grad_norm": 2.532116651535034, "learning_rate": 9.982289413834436e-05, "loss": 1.982, "step": 275 }, { "epoch": 0.0416289592760181, "grad_norm": 2.045194149017334, "learning_rate": 9.982086550122382e-05, "loss": 1.5152, "step": 276 }, { "epoch": 0.041779788838612365, "grad_norm": 1.9967626333236694, "learning_rate": 9.981882533263409e-05, "loss": 1.6552, "step": 277 }, { "epoch": 0.04193061840120663, "grad_norm": 2.464695453643799, "learning_rate": 9.98167736330474e-05, "loss": 1.7826, "step": 278 }, { "epoch": 0.04208144796380091, "grad_norm": 2.157590389251709, "learning_rate": 9.981471040293864e-05, "loss": 1.4969, "step": 279 }, { "epoch": 0.042232277526395176, "grad_norm": 2.257296323776245, "learning_rate": 9.981263564278535e-05, "loss": 1.6786, "step": 280 }, { "epoch": 0.042383107088989444, "grad_norm": 1.82217276096344, "learning_rate": 9.981054935306776e-05, "loss": 1.3693, "step": 281 }, { "epoch": 0.04253393665158371, "grad_norm": 2.057436227798462, "learning_rate": 9.980845153426876e-05, "loss": 1.2905, "step": 282 }, { "epoch": 0.04268476621417798, "grad_norm": 2.2824275493621826, "learning_rate": 9.98063421868739e-05, "loss": 1.8705, "step": 283 }, { "epoch": 0.04283559577677225, "grad_norm": 2.1706347465515137, "learning_rate": 9.980422131137142e-05, "loss": 1.6427, "step": 284 }, { "epoch": 0.042986425339366516, "grad_norm": 2.1961731910705566, "learning_rate": 9.98020889082522e-05, "loss": 1.7293, "step": 285 }, { "epoch": 0.043137254901960784, "grad_norm": 2.193164825439453, "learning_rate": 9.97999449780098e-05, "loss": 1.547, "step": 286 }, { "epoch": 0.04328808446455505, "grad_norm": 2.2035481929779053, "learning_rate": 9.979778952114048e-05, "loss": 1.5167, "step": 287 }, { "epoch": 0.04343891402714932, "grad_norm": 2.114410877227783, "learning_rate": 9.979562253814311e-05, "loss": 1.3479, "step": 288 }, { "epoch": 0.04358974358974359, "grad_norm": 2.239751100540161, "learning_rate": 9.979344402951927e-05, "loss": 1.6793, "step": 289 }, { "epoch": 0.043740573152337855, "grad_norm": 2.2798495292663574, "learning_rate": 9.979125399577318e-05, "loss": 1.6349, "step": 290 }, { "epoch": 0.04389140271493212, "grad_norm": 2.346857786178589, "learning_rate": 9.978905243741177e-05, "loss": 1.6526, "step": 291 }, { "epoch": 0.0440422322775264, "grad_norm": 2.115262269973755, "learning_rate": 9.978683935494458e-05, "loss": 1.3788, "step": 292 }, { "epoch": 0.044193061840120666, "grad_norm": 2.1350300312042236, "learning_rate": 9.978461474888387e-05, "loss": 1.6952, "step": 293 }, { "epoch": 0.044343891402714934, "grad_norm": 2.4989709854125977, "learning_rate": 9.978237861974453e-05, "loss": 1.7617, "step": 294 }, { "epoch": 0.0444947209653092, "grad_norm": 2.652993679046631, "learning_rate": 9.978013096804413e-05, "loss": 1.8402, "step": 295 }, { "epoch": 0.04464555052790347, "grad_norm": 3.693023920059204, "learning_rate": 9.977787179430293e-05, "loss": 1.6718, "step": 296 }, { "epoch": 0.04479638009049774, "grad_norm": 3.6798834800720215, "learning_rate": 9.977560109904382e-05, "loss": 1.6634, "step": 297 }, { "epoch": 0.044947209653092006, "grad_norm": 1.9653760194778442, "learning_rate": 9.977331888279236e-05, "loss": 1.2653, "step": 298 }, { "epoch": 0.045098039215686274, "grad_norm": 1.884011149406433, "learning_rate": 9.97710251460768e-05, "loss": 1.2247, "step": 299 }, { "epoch": 0.04524886877828054, "grad_norm": 2.0321784019470215, "learning_rate": 9.976871988942804e-05, "loss": 1.4001, "step": 300 }, { "epoch": 0.04539969834087481, "grad_norm": 2.1035354137420654, "learning_rate": 9.976640311337968e-05, "loss": 1.7061, "step": 301 }, { "epoch": 0.04555052790346908, "grad_norm": 1.8764148950576782, "learning_rate": 9.976407481846791e-05, "loss": 1.5579, "step": 302 }, { "epoch": 0.045701357466063346, "grad_norm": 1.9994333982467651, "learning_rate": 9.976173500523167e-05, "loss": 1.9199, "step": 303 }, { "epoch": 0.045852187028657614, "grad_norm": 1.8820325136184692, "learning_rate": 9.975938367421252e-05, "loss": 1.6876, "step": 304 }, { "epoch": 0.04600301659125189, "grad_norm": 1.8958543539047241, "learning_rate": 9.975702082595469e-05, "loss": 1.5751, "step": 305 }, { "epoch": 0.046153846153846156, "grad_norm": 1.7075029611587524, "learning_rate": 9.97546464610051e-05, "loss": 1.3663, "step": 306 }, { "epoch": 0.046304675716440424, "grad_norm": 2.1296162605285645, "learning_rate": 9.97522605799133e-05, "loss": 1.7286, "step": 307 }, { "epoch": 0.04645550527903469, "grad_norm": 2.0746421813964844, "learning_rate": 9.974986318323151e-05, "loss": 1.8505, "step": 308 }, { "epoch": 0.04660633484162896, "grad_norm": 1.9328157901763916, "learning_rate": 9.974745427151466e-05, "loss": 1.9587, "step": 309 }, { "epoch": 0.04675716440422323, "grad_norm": 1.7952712774276733, "learning_rate": 9.974503384532028e-05, "loss": 1.4155, "step": 310 }, { "epoch": 0.046907993966817496, "grad_norm": 1.7803106307983398, "learning_rate": 9.974260190520863e-05, "loss": 1.0929, "step": 311 }, { "epoch": 0.047058823529411764, "grad_norm": 1.8547481298446655, "learning_rate": 9.974015845174259e-05, "loss": 1.5684, "step": 312 }, { "epoch": 0.04720965309200603, "grad_norm": 2.030813694000244, "learning_rate": 9.973770348548772e-05, "loss": 1.6853, "step": 313 }, { "epoch": 0.0473604826546003, "grad_norm": 2.1407673358917236, "learning_rate": 9.973523700701222e-05, "loss": 1.786, "step": 314 }, { "epoch": 0.04751131221719457, "grad_norm": 2.07851243019104, "learning_rate": 9.973275901688702e-05, "loss": 1.5293, "step": 315 }, { "epoch": 0.047662141779788836, "grad_norm": 2.4997401237487793, "learning_rate": 9.973026951568564e-05, "loss": 2.2111, "step": 316 }, { "epoch": 0.047812971342383104, "grad_norm": 1.8094096183776855, "learning_rate": 9.972776850398433e-05, "loss": 1.2745, "step": 317 }, { "epoch": 0.04796380090497738, "grad_norm": 1.7426528930664062, "learning_rate": 9.972525598236193e-05, "loss": 1.2796, "step": 318 }, { "epoch": 0.04811463046757165, "grad_norm": 2.381237745285034, "learning_rate": 9.972273195140001e-05, "loss": 1.8086, "step": 319 }, { "epoch": 0.048265460030165915, "grad_norm": 2.088801383972168, "learning_rate": 9.972019641168276e-05, "loss": 1.629, "step": 320 }, { "epoch": 0.04841628959276018, "grad_norm": 2.1674294471740723, "learning_rate": 9.971764936379709e-05, "loss": 1.5405, "step": 321 }, { "epoch": 0.04856711915535445, "grad_norm": 2.07413387298584, "learning_rate": 9.971509080833248e-05, "loss": 1.553, "step": 322 }, { "epoch": 0.04871794871794872, "grad_norm": 1.8469160795211792, "learning_rate": 9.971252074588117e-05, "loss": 1.1604, "step": 323 }, { "epoch": 0.048868778280542986, "grad_norm": 2.0366363525390625, "learning_rate": 9.970993917703802e-05, "loss": 1.3069, "step": 324 }, { "epoch": 0.049019607843137254, "grad_norm": 2.712502956390381, "learning_rate": 9.970734610240055e-05, "loss": 2.0853, "step": 325 }, { "epoch": 0.04917043740573152, "grad_norm": 2.4430429935455322, "learning_rate": 9.970474152256895e-05, "loss": 1.7796, "step": 326 }, { "epoch": 0.04932126696832579, "grad_norm": 2.2171597480773926, "learning_rate": 9.970212543814608e-05, "loss": 1.4401, "step": 327 }, { "epoch": 0.04947209653092006, "grad_norm": 2.3697609901428223, "learning_rate": 9.969949784973744e-05, "loss": 1.7159, "step": 328 }, { "epoch": 0.049622926093514326, "grad_norm": 1.9806801080703735, "learning_rate": 9.96968587579512e-05, "loss": 1.4012, "step": 329 }, { "epoch": 0.049773755656108594, "grad_norm": 1.9970709085464478, "learning_rate": 9.969420816339823e-05, "loss": 1.2601, "step": 330 }, { "epoch": 0.04992458521870287, "grad_norm": 1.8729099035263062, "learning_rate": 9.969154606669202e-05, "loss": 1.5025, "step": 331 }, { "epoch": 0.05007541478129714, "grad_norm": 2.369875907897949, "learning_rate": 9.968887246844874e-05, "loss": 1.973, "step": 332 }, { "epoch": 0.050226244343891405, "grad_norm": 2.043605089187622, "learning_rate": 9.96861873692872e-05, "loss": 1.4592, "step": 333 }, { "epoch": 0.05037707390648567, "grad_norm": 1.992164969444275, "learning_rate": 9.96834907698289e-05, "loss": 1.4897, "step": 334 }, { "epoch": 0.05052790346907994, "grad_norm": 1.9267593622207642, "learning_rate": 9.968078267069801e-05, "loss": 1.2809, "step": 335 }, { "epoch": 0.05067873303167421, "grad_norm": 2.274538278579712, "learning_rate": 9.96780630725213e-05, "loss": 1.8064, "step": 336 }, { "epoch": 0.050829562594268476, "grad_norm": 2.017228841781616, "learning_rate": 9.967533197592828e-05, "loss": 1.5342, "step": 337 }, { "epoch": 0.050980392156862744, "grad_norm": 2.17130708694458, "learning_rate": 9.967258938155108e-05, "loss": 1.5917, "step": 338 }, { "epoch": 0.05113122171945701, "grad_norm": 1.8505017757415771, "learning_rate": 9.96698352900245e-05, "loss": 1.2049, "step": 339 }, { "epoch": 0.05128205128205128, "grad_norm": 2.207291603088379, "learning_rate": 9.966706970198596e-05, "loss": 1.5648, "step": 340 }, { "epoch": 0.05143288084464555, "grad_norm": 2.6361517906188965, "learning_rate": 9.966429261807564e-05, "loss": 1.5765, "step": 341 }, { "epoch": 0.051583710407239816, "grad_norm": 2.135483741760254, "learning_rate": 9.966150403893629e-05, "loss": 1.4182, "step": 342 }, { "epoch": 0.051734539969834084, "grad_norm": 2.4497997760772705, "learning_rate": 9.965870396521333e-05, "loss": 1.7864, "step": 343 }, { "epoch": 0.05188536953242836, "grad_norm": 2.1382393836975098, "learning_rate": 9.965589239755488e-05, "loss": 1.2529, "step": 344 }, { "epoch": 0.05203619909502263, "grad_norm": 1.9038982391357422, "learning_rate": 9.965306933661173e-05, "loss": 1.2129, "step": 345 }, { "epoch": 0.052187028657616895, "grad_norm": 3.3968660831451416, "learning_rate": 9.965023478303726e-05, "loss": 1.6877, "step": 346 }, { "epoch": 0.05233785822021116, "grad_norm": 3.349261522293091, "learning_rate": 9.964738873748756e-05, "loss": 1.6679, "step": 347 }, { "epoch": 0.05248868778280543, "grad_norm": 2.1745152473449707, "learning_rate": 9.964453120062138e-05, "loss": 1.2243, "step": 348 }, { "epoch": 0.0526395173453997, "grad_norm": 1.5899947881698608, "learning_rate": 9.964166217310014e-05, "loss": 0.9986, "step": 349 }, { "epoch": 0.05279034690799397, "grad_norm": 2.140247106552124, "learning_rate": 9.963878165558787e-05, "loss": 1.2827, "step": 350 }, { "epoch": 0.052941176470588235, "grad_norm": 1.8857852220535278, "learning_rate": 9.96358896487513e-05, "loss": 1.6065, "step": 351 }, { "epoch": 0.0530920060331825, "grad_norm": 1.6521553993225098, "learning_rate": 9.963298615325983e-05, "loss": 1.4432, "step": 352 }, { "epoch": 0.05324283559577677, "grad_norm": 1.842698574066162, "learning_rate": 9.963007116978544e-05, "loss": 1.5673, "step": 353 }, { "epoch": 0.05339366515837104, "grad_norm": 1.8827754259109497, "learning_rate": 9.96271446990029e-05, "loss": 1.5212, "step": 354 }, { "epoch": 0.053544494720965306, "grad_norm": 2.433640956878662, "learning_rate": 9.962420674158954e-05, "loss": 2.0699, "step": 355 }, { "epoch": 0.053695324283559574, "grad_norm": 2.2321066856384277, "learning_rate": 9.962125729822536e-05, "loss": 1.9544, "step": 356 }, { "epoch": 0.05384615384615385, "grad_norm": 2.044400453567505, "learning_rate": 9.961829636959306e-05, "loss": 1.7248, "step": 357 }, { "epoch": 0.05399698340874812, "grad_norm": 2.2629992961883545, "learning_rate": 9.961532395637793e-05, "loss": 1.9474, "step": 358 }, { "epoch": 0.054147812971342385, "grad_norm": 1.9467332363128662, "learning_rate": 9.961234005926801e-05, "loss": 1.6516, "step": 359 }, { "epoch": 0.05429864253393665, "grad_norm": 1.658907175064087, "learning_rate": 9.960934467895393e-05, "loss": 1.4263, "step": 360 }, { "epoch": 0.05444947209653092, "grad_norm": 1.8661919832229614, "learning_rate": 9.960633781612899e-05, "loss": 1.6139, "step": 361 }, { "epoch": 0.05460030165912519, "grad_norm": 1.6447954177856445, "learning_rate": 9.960331947148915e-05, "loss": 1.3563, "step": 362 }, { "epoch": 0.05475113122171946, "grad_norm": 1.9990676641464233, "learning_rate": 9.960028964573305e-05, "loss": 2.0053, "step": 363 }, { "epoch": 0.054901960784313725, "grad_norm": 2.188812017440796, "learning_rate": 9.959724833956197e-05, "loss": 1.6219, "step": 364 }, { "epoch": 0.05505279034690799, "grad_norm": 1.783074975013733, "learning_rate": 9.959419555367983e-05, "loss": 1.2415, "step": 365 }, { "epoch": 0.05520361990950226, "grad_norm": 1.7075802087783813, "learning_rate": 9.959113128879322e-05, "loss": 1.0908, "step": 366 }, { "epoch": 0.05535444947209653, "grad_norm": 1.8681516647338867, "learning_rate": 9.958805554561141e-05, "loss": 1.159, "step": 367 }, { "epoch": 0.055505279034690796, "grad_norm": 2.082524299621582, "learning_rate": 9.958496832484629e-05, "loss": 1.6063, "step": 368 }, { "epoch": 0.055656108597285064, "grad_norm": 1.9591269493103027, "learning_rate": 9.958186962721244e-05, "loss": 1.4038, "step": 369 }, { "epoch": 0.05580693815987934, "grad_norm": 2.2012362480163574, "learning_rate": 9.957875945342707e-05, "loss": 1.1894, "step": 370 }, { "epoch": 0.05595776772247361, "grad_norm": 1.9299888610839844, "learning_rate": 9.957563780421006e-05, "loss": 1.1123, "step": 371 }, { "epoch": 0.056108597285067875, "grad_norm": 2.5027194023132324, "learning_rate": 9.957250468028393e-05, "loss": 1.5206, "step": 372 }, { "epoch": 0.05625942684766214, "grad_norm": 2.2416751384735107, "learning_rate": 9.956936008237389e-05, "loss": 1.6303, "step": 373 }, { "epoch": 0.05641025641025641, "grad_norm": 2.1342170238494873, "learning_rate": 9.956620401120779e-05, "loss": 1.6929, "step": 374 }, { "epoch": 0.05656108597285068, "grad_norm": 2.159839630126953, "learning_rate": 9.956303646751611e-05, "loss": 1.4568, "step": 375 }, { "epoch": 0.05671191553544495, "grad_norm": 1.9007152318954468, "learning_rate": 9.955985745203198e-05, "loss": 1.0179, "step": 376 }, { "epoch": 0.056862745098039215, "grad_norm": 1.8742084503173828, "learning_rate": 9.955666696549127e-05, "loss": 1.2822, "step": 377 }, { "epoch": 0.05701357466063348, "grad_norm": 2.125220537185669, "learning_rate": 9.95534650086324e-05, "loss": 1.2591, "step": 378 }, { "epoch": 0.05716440422322775, "grad_norm": 2.103370428085327, "learning_rate": 9.95502515821965e-05, "loss": 1.5081, "step": 379 }, { "epoch": 0.05731523378582202, "grad_norm": 2.2881221771240234, "learning_rate": 9.954702668692737e-05, "loss": 1.701, "step": 380 }, { "epoch": 0.05746606334841629, "grad_norm": 2.005524158477783, "learning_rate": 9.954379032357142e-05, "loss": 1.5146, "step": 381 }, { "epoch": 0.057616892911010555, "grad_norm": 2.3529951572418213, "learning_rate": 9.954054249287774e-05, "loss": 1.8107, "step": 382 }, { "epoch": 0.05776772247360483, "grad_norm": 2.443455457687378, "learning_rate": 9.953728319559805e-05, "loss": 1.8367, "step": 383 }, { "epoch": 0.0579185520361991, "grad_norm": 2.1198055744171143, "learning_rate": 9.953401243248676e-05, "loss": 1.3515, "step": 384 }, { "epoch": 0.058069381598793365, "grad_norm": 2.277155637741089, "learning_rate": 9.953073020430092e-05, "loss": 1.7541, "step": 385 }, { "epoch": 0.05822021116138763, "grad_norm": 2.0460877418518066, "learning_rate": 9.952743651180021e-05, "loss": 1.4029, "step": 386 }, { "epoch": 0.0583710407239819, "grad_norm": 1.9922071695327759, "learning_rate": 9.9524131355747e-05, "loss": 1.2497, "step": 387 }, { "epoch": 0.05852187028657617, "grad_norm": 2.109055995941162, "learning_rate": 9.952081473690631e-05, "loss": 1.4075, "step": 388 }, { "epoch": 0.05867269984917044, "grad_norm": 2.288250207901001, "learning_rate": 9.951748665604576e-05, "loss": 1.354, "step": 389 }, { "epoch": 0.058823529411764705, "grad_norm": 2.251235246658325, "learning_rate": 9.95141471139357e-05, "loss": 1.5289, "step": 390 }, { "epoch": 0.05897435897435897, "grad_norm": 1.9700275659561157, "learning_rate": 9.951079611134909e-05, "loss": 1.2715, "step": 391 }, { "epoch": 0.05912518853695324, "grad_norm": 2.411818504333496, "learning_rate": 9.950743364906152e-05, "loss": 1.5106, "step": 392 }, { "epoch": 0.05927601809954751, "grad_norm": 2.4477343559265137, "learning_rate": 9.95040597278513e-05, "loss": 1.6444, "step": 393 }, { "epoch": 0.05942684766214178, "grad_norm": 2.349750280380249, "learning_rate": 9.950067434849933e-05, "loss": 1.5246, "step": 394 }, { "epoch": 0.05957767722473605, "grad_norm": 2.7155117988586426, "learning_rate": 9.949727751178918e-05, "loss": 1.4065, "step": 395 }, { "epoch": 0.05972850678733032, "grad_norm": 2.3150546550750732, "learning_rate": 9.94938692185071e-05, "loss": 1.2642, "step": 396 }, { "epoch": 0.05987933634992459, "grad_norm": 2.4782910346984863, "learning_rate": 9.949044946944194e-05, "loss": 1.4239, "step": 397 }, { "epoch": 0.060030165912518856, "grad_norm": 2.365849018096924, "learning_rate": 9.948701826538526e-05, "loss": 1.3902, "step": 398 }, { "epoch": 0.060180995475113123, "grad_norm": 2.052434206008911, "learning_rate": 9.948357560713124e-05, "loss": 1.1237, "step": 399 }, { "epoch": 0.06033182503770739, "grad_norm": 2.121835231781006, "learning_rate": 9.948012149547667e-05, "loss": 1.2673, "step": 400 }, { "epoch": 0.06048265460030166, "grad_norm": 2.3905692100524902, "learning_rate": 9.94766559312211e-05, "loss": 1.751, "step": 401 }, { "epoch": 0.06063348416289593, "grad_norm": 2.0945801734924316, "learning_rate": 9.94731789151666e-05, "loss": 1.7549, "step": 402 }, { "epoch": 0.060784313725490195, "grad_norm": 2.069336414337158, "learning_rate": 9.9469690448118e-05, "loss": 1.5039, "step": 403 }, { "epoch": 0.06093514328808446, "grad_norm": 2.138472080230713, "learning_rate": 9.946619053088272e-05, "loss": 1.5041, "step": 404 }, { "epoch": 0.06108597285067873, "grad_norm": 2.4060845375061035, "learning_rate": 9.946267916427086e-05, "loss": 2.0509, "step": 405 }, { "epoch": 0.061236802413273, "grad_norm": 2.0061821937561035, "learning_rate": 9.945915634909513e-05, "loss": 1.4339, "step": 406 }, { "epoch": 0.06138763197586727, "grad_norm": 2.027470350265503, "learning_rate": 9.945562208617092e-05, "loss": 1.4034, "step": 407 }, { "epoch": 0.06153846153846154, "grad_norm": 1.9938838481903076, "learning_rate": 9.945207637631628e-05, "loss": 1.474, "step": 408 }, { "epoch": 0.06168929110105581, "grad_norm": 2.2502546310424805, "learning_rate": 9.94485192203519e-05, "loss": 1.8248, "step": 409 }, { "epoch": 0.06184012066365008, "grad_norm": 1.754813551902771, "learning_rate": 9.94449506191011e-05, "loss": 1.37, "step": 410 }, { "epoch": 0.061990950226244346, "grad_norm": 1.7429776191711426, "learning_rate": 9.944137057338987e-05, "loss": 1.3636, "step": 411 }, { "epoch": 0.062141779788838614, "grad_norm": 2.3067684173583984, "learning_rate": 9.943777908404685e-05, "loss": 1.8181, "step": 412 }, { "epoch": 0.06229260935143288, "grad_norm": 2.048661231994629, "learning_rate": 9.94341761519033e-05, "loss": 1.6485, "step": 413 }, { "epoch": 0.06244343891402715, "grad_norm": 2.3448455333709717, "learning_rate": 9.943056177779315e-05, "loss": 1.9119, "step": 414 }, { "epoch": 0.06259426847662142, "grad_norm": 1.9397746324539185, "learning_rate": 9.942693596255302e-05, "loss": 1.5047, "step": 415 }, { "epoch": 0.06274509803921569, "grad_norm": 2.144831657409668, "learning_rate": 9.942329870702208e-05, "loss": 1.5845, "step": 416 }, { "epoch": 0.06289592760180995, "grad_norm": 2.106671094894409, "learning_rate": 9.941965001204223e-05, "loss": 1.5394, "step": 417 }, { "epoch": 0.06304675716440422, "grad_norm": 2.007460832595825, "learning_rate": 9.941598987845799e-05, "loss": 1.6971, "step": 418 }, { "epoch": 0.06319758672699849, "grad_norm": 1.9224721193313599, "learning_rate": 9.941231830711655e-05, "loss": 1.3712, "step": 419 }, { "epoch": 0.06334841628959276, "grad_norm": 2.043853282928467, "learning_rate": 9.94086352988677e-05, "loss": 1.5176, "step": 420 }, { "epoch": 0.06349924585218703, "grad_norm": 2.2324841022491455, "learning_rate": 9.940494085456391e-05, "loss": 1.8371, "step": 421 }, { "epoch": 0.06365007541478129, "grad_norm": 2.2512619495391846, "learning_rate": 9.94012349750603e-05, "loss": 1.4618, "step": 422 }, { "epoch": 0.06380090497737556, "grad_norm": 2.2154650688171387, "learning_rate": 9.939751766121462e-05, "loss": 1.7325, "step": 423 }, { "epoch": 0.06395173453996983, "grad_norm": 2.3972463607788086, "learning_rate": 9.939378891388726e-05, "loss": 1.6924, "step": 424 }, { "epoch": 0.0641025641025641, "grad_norm": 2.105886220932007, "learning_rate": 9.93900487339413e-05, "loss": 1.3109, "step": 425 }, { "epoch": 0.06425339366515836, "grad_norm": 2.384768486022949, "learning_rate": 9.938629712224242e-05, "loss": 1.5599, "step": 426 }, { "epoch": 0.06440422322775263, "grad_norm": 2.3110196590423584, "learning_rate": 9.938253407965896e-05, "loss": 1.6303, "step": 427 }, { "epoch": 0.06455505279034691, "grad_norm": 2.5721442699432373, "learning_rate": 9.937875960706194e-05, "loss": 1.7884, "step": 428 }, { "epoch": 0.06470588235294118, "grad_norm": 2.1358489990234375, "learning_rate": 9.937497370532493e-05, "loss": 1.5566, "step": 429 }, { "epoch": 0.06485671191553545, "grad_norm": 2.1993465423583984, "learning_rate": 9.937117637532427e-05, "loss": 1.4529, "step": 430 }, { "epoch": 0.06500754147812972, "grad_norm": 1.9674843549728394, "learning_rate": 9.936736761793888e-05, "loss": 1.3587, "step": 431 }, { "epoch": 0.06515837104072399, "grad_norm": 2.2609212398529053, "learning_rate": 9.936354743405028e-05, "loss": 1.6467, "step": 432 }, { "epoch": 0.06530920060331825, "grad_norm": 2.0433859825134277, "learning_rate": 9.935971582454273e-05, "loss": 1.4377, "step": 433 }, { "epoch": 0.06546003016591252, "grad_norm": 2.0998268127441406, "learning_rate": 9.93558727903031e-05, "loss": 1.2392, "step": 434 }, { "epoch": 0.06561085972850679, "grad_norm": 2.453070878982544, "learning_rate": 9.935201833222083e-05, "loss": 1.6045, "step": 435 }, { "epoch": 0.06576168929110106, "grad_norm": 2.418815851211548, "learning_rate": 9.934815245118813e-05, "loss": 1.5059, "step": 436 }, { "epoch": 0.06591251885369533, "grad_norm": 2.4651410579681396, "learning_rate": 9.934427514809977e-05, "loss": 1.3626, "step": 437 }, { "epoch": 0.0660633484162896, "grad_norm": 2.5019607543945312, "learning_rate": 9.934038642385318e-05, "loss": 1.5253, "step": 438 }, { "epoch": 0.06621417797888386, "grad_norm": 2.545278549194336, "learning_rate": 9.933648627934845e-05, "loss": 1.4551, "step": 439 }, { "epoch": 0.06636500754147813, "grad_norm": 2.7286593914031982, "learning_rate": 9.93325747154883e-05, "loss": 1.8207, "step": 440 }, { "epoch": 0.0665158371040724, "grad_norm": 2.408266544342041, "learning_rate": 9.932865173317807e-05, "loss": 1.4999, "step": 441 }, { "epoch": 0.06666666666666667, "grad_norm": 2.1894545555114746, "learning_rate": 9.93247173333258e-05, "loss": 1.2625, "step": 442 }, { "epoch": 0.06681749622926093, "grad_norm": 2.332308292388916, "learning_rate": 9.932077151684214e-05, "loss": 1.2804, "step": 443 }, { "epoch": 0.0669683257918552, "grad_norm": 2.8180978298187256, "learning_rate": 9.931681428464035e-05, "loss": 1.4684, "step": 444 }, { "epoch": 0.06711915535444947, "grad_norm": 2.9690754413604736, "learning_rate": 9.931284563763641e-05, "loss": 1.3634, "step": 445 }, { "epoch": 0.06726998491704374, "grad_norm": 3.205641746520996, "learning_rate": 9.930886557674886e-05, "loss": 1.2904, "step": 446 }, { "epoch": 0.067420814479638, "grad_norm": 2.591036081314087, "learning_rate": 9.930487410289893e-05, "loss": 1.3005, "step": 447 }, { "epoch": 0.06757164404223227, "grad_norm": 2.1119163036346436, "learning_rate": 9.930087121701051e-05, "loss": 1.0085, "step": 448 }, { "epoch": 0.06772247360482654, "grad_norm": 2.1135833263397217, "learning_rate": 9.929685692001007e-05, "loss": 1.2414, "step": 449 }, { "epoch": 0.06787330316742081, "grad_norm": 2.301307201385498, "learning_rate": 9.929283121282676e-05, "loss": 1.1405, "step": 450 }, { "epoch": 0.06802413273001508, "grad_norm": 2.659304141998291, "learning_rate": 9.928879409639235e-05, "loss": 1.7189, "step": 451 }, { "epoch": 0.06817496229260935, "grad_norm": 2.2451791763305664, "learning_rate": 9.928474557164131e-05, "loss": 1.3915, "step": 452 }, { "epoch": 0.06832579185520361, "grad_norm": 2.604858160018921, "learning_rate": 9.928068563951067e-05, "loss": 1.674, "step": 453 }, { "epoch": 0.0684766214177979, "grad_norm": 2.382298707962036, "learning_rate": 9.927661430094013e-05, "loss": 1.5041, "step": 454 }, { "epoch": 0.06862745098039216, "grad_norm": 2.320906639099121, "learning_rate": 9.927253155687207e-05, "loss": 1.7029, "step": 455 }, { "epoch": 0.06877828054298643, "grad_norm": 2.3916819095611572, "learning_rate": 9.926843740825145e-05, "loss": 1.6025, "step": 456 }, { "epoch": 0.0689291101055807, "grad_norm": 2.320341110229492, "learning_rate": 9.92643318560259e-05, "loss": 1.8042, "step": 457 }, { "epoch": 0.06907993966817497, "grad_norm": 2.2245514392852783, "learning_rate": 9.92602149011457e-05, "loss": 1.7677, "step": 458 }, { "epoch": 0.06923076923076923, "grad_norm": 1.9618114233016968, "learning_rate": 9.925608654456374e-05, "loss": 1.2543, "step": 459 }, { "epoch": 0.0693815987933635, "grad_norm": 1.8921643495559692, "learning_rate": 9.925194678723557e-05, "loss": 1.5725, "step": 460 }, { "epoch": 0.06953242835595777, "grad_norm": 1.7456059455871582, "learning_rate": 9.924779563011937e-05, "loss": 1.2919, "step": 461 }, { "epoch": 0.06968325791855204, "grad_norm": 1.9638371467590332, "learning_rate": 9.924363307417595e-05, "loss": 1.4764, "step": 462 }, { "epoch": 0.0698340874811463, "grad_norm": 2.1433091163635254, "learning_rate": 9.923945912036879e-05, "loss": 1.6761, "step": 463 }, { "epoch": 0.06998491704374057, "grad_norm": 1.8884183168411255, "learning_rate": 9.923527376966397e-05, "loss": 1.5327, "step": 464 }, { "epoch": 0.07013574660633484, "grad_norm": 2.051868438720703, "learning_rate": 9.923107702303025e-05, "loss": 1.6128, "step": 465 }, { "epoch": 0.07028657616892911, "grad_norm": 2.0410938262939453, "learning_rate": 9.922686888143897e-05, "loss": 1.5613, "step": 466 }, { "epoch": 0.07043740573152338, "grad_norm": 2.0305609703063965, "learning_rate": 9.922264934586418e-05, "loss": 1.4471, "step": 467 }, { "epoch": 0.07058823529411765, "grad_norm": 2.009371757507324, "learning_rate": 9.921841841728248e-05, "loss": 1.6647, "step": 468 }, { "epoch": 0.07073906485671191, "grad_norm": 1.8768951892852783, "learning_rate": 9.92141760966732e-05, "loss": 1.3799, "step": 469 }, { "epoch": 0.07088989441930618, "grad_norm": 1.9296144247055054, "learning_rate": 9.920992238501822e-05, "loss": 1.1921, "step": 470 }, { "epoch": 0.07104072398190045, "grad_norm": 1.9924461841583252, "learning_rate": 9.920565728330215e-05, "loss": 1.3742, "step": 471 }, { "epoch": 0.07119155354449472, "grad_norm": 1.8936887979507446, "learning_rate": 9.920138079251215e-05, "loss": 1.0881, "step": 472 }, { "epoch": 0.07134238310708899, "grad_norm": 2.1397571563720703, "learning_rate": 9.919709291363804e-05, "loss": 1.6866, "step": 473 }, { "epoch": 0.07149321266968325, "grad_norm": 2.129784345626831, "learning_rate": 9.919279364767232e-05, "loss": 1.3215, "step": 474 }, { "epoch": 0.07164404223227752, "grad_norm": 2.336887836456299, "learning_rate": 9.918848299561006e-05, "loss": 1.3591, "step": 475 }, { "epoch": 0.07179487179487179, "grad_norm": 2.2237837314605713, "learning_rate": 9.918416095844901e-05, "loss": 1.3641, "step": 476 }, { "epoch": 0.07194570135746606, "grad_norm": 1.8997187614440918, "learning_rate": 9.917982753718955e-05, "loss": 1.3294, "step": 477 }, { "epoch": 0.07209653092006033, "grad_norm": 2.386195659637451, "learning_rate": 9.917548273283469e-05, "loss": 1.5141, "step": 478 }, { "epoch": 0.0722473604826546, "grad_norm": 1.768800139427185, "learning_rate": 9.917112654639007e-05, "loss": 1.1309, "step": 479 }, { "epoch": 0.07239819004524888, "grad_norm": 1.998670220375061, "learning_rate": 9.916675897886393e-05, "loss": 1.3321, "step": 480 }, { "epoch": 0.07254901960784314, "grad_norm": 1.9141145944595337, "learning_rate": 9.916238003126725e-05, "loss": 1.2183, "step": 481 }, { "epoch": 0.07269984917043741, "grad_norm": 2.446007013320923, "learning_rate": 9.915798970461352e-05, "loss": 1.7773, "step": 482 }, { "epoch": 0.07285067873303168, "grad_norm": 2.2703816890716553, "learning_rate": 9.915358799991896e-05, "loss": 1.3987, "step": 483 }, { "epoch": 0.07300150829562595, "grad_norm": 2.317413091659546, "learning_rate": 9.914917491820233e-05, "loss": 1.6743, "step": 484 }, { "epoch": 0.07315233785822021, "grad_norm": 2.484086751937866, "learning_rate": 9.914475046048512e-05, "loss": 1.4346, "step": 485 }, { "epoch": 0.07330316742081448, "grad_norm": 2.1060049533843994, "learning_rate": 9.914031462779139e-05, "loss": 1.2784, "step": 486 }, { "epoch": 0.07345399698340875, "grad_norm": 2.4134058952331543, "learning_rate": 9.913586742114787e-05, "loss": 1.4102, "step": 487 }, { "epoch": 0.07360482654600302, "grad_norm": 2.011125087738037, "learning_rate": 9.913140884158388e-05, "loss": 1.2556, "step": 488 }, { "epoch": 0.07375565610859729, "grad_norm": 2.3142900466918945, "learning_rate": 9.91269388901314e-05, "loss": 1.3796, "step": 489 }, { "epoch": 0.07390648567119155, "grad_norm": 2.5536510944366455, "learning_rate": 9.912245756782506e-05, "loss": 1.5487, "step": 490 }, { "epoch": 0.07405731523378582, "grad_norm": 2.164118766784668, "learning_rate": 9.911796487570209e-05, "loss": 1.2171, "step": 491 }, { "epoch": 0.07420814479638009, "grad_norm": 2.5822372436523438, "learning_rate": 9.911346081480234e-05, "loss": 1.5807, "step": 492 }, { "epoch": 0.07435897435897436, "grad_norm": 2.475553512573242, "learning_rate": 9.910894538616836e-05, "loss": 1.2122, "step": 493 }, { "epoch": 0.07450980392156863, "grad_norm": 2.7050111293792725, "learning_rate": 9.910441859084525e-05, "loss": 1.4851, "step": 494 }, { "epoch": 0.0746606334841629, "grad_norm": 2.584451913833618, "learning_rate": 9.909988042988078e-05, "loss": 1.5824, "step": 495 }, { "epoch": 0.07481146304675716, "grad_norm": 3.208932876586914, "learning_rate": 9.909533090432534e-05, "loss": 1.3789, "step": 496 }, { "epoch": 0.07496229260935143, "grad_norm": 2.654555082321167, "learning_rate": 9.909077001523198e-05, "loss": 0.9702, "step": 497 }, { "epoch": 0.0751131221719457, "grad_norm": 2.867201805114746, "learning_rate": 9.908619776365636e-05, "loss": 1.5254, "step": 498 }, { "epoch": 0.07526395173453997, "grad_norm": 2.3314249515533447, "learning_rate": 9.908161415065672e-05, "loss": 1.0644, "step": 499 }, { "epoch": 0.07541478129713423, "grad_norm": 2.498211145401001, "learning_rate": 9.907701917729402e-05, "loss": 1.4767, "step": 500 }, { "epoch": 0.0755656108597285, "grad_norm": 3.3147830963134766, "learning_rate": 9.90724128446318e-05, "loss": 1.6958, "step": 501 }, { "epoch": 0.07571644042232277, "grad_norm": 2.3740317821502686, "learning_rate": 9.906779515373624e-05, "loss": 1.6983, "step": 502 }, { "epoch": 0.07586726998491704, "grad_norm": 2.4016666412353516, "learning_rate": 9.906316610567611e-05, "loss": 1.5589, "step": 503 }, { "epoch": 0.0760180995475113, "grad_norm": 2.108949899673462, "learning_rate": 9.905852570152288e-05, "loss": 1.3802, "step": 504 }, { "epoch": 0.07616892911010557, "grad_norm": 2.3924717903137207, "learning_rate": 9.905387394235059e-05, "loss": 1.6924, "step": 505 }, { "epoch": 0.07631975867269986, "grad_norm": 2.3926901817321777, "learning_rate": 9.904921082923597e-05, "loss": 1.6722, "step": 506 }, { "epoch": 0.07647058823529412, "grad_norm": 2.878296136856079, "learning_rate": 9.904453636325827e-05, "loss": 1.9239, "step": 507 }, { "epoch": 0.07662141779788839, "grad_norm": 2.761507987976074, "learning_rate": 9.90398505454995e-05, "loss": 1.6538, "step": 508 }, { "epoch": 0.07677224736048266, "grad_norm": 2.6997220516204834, "learning_rate": 9.903515337704418e-05, "loss": 1.7932, "step": 509 }, { "epoch": 0.07692307692307693, "grad_norm": 2.818328380584717, "learning_rate": 9.903044485897956e-05, "loss": 1.6452, "step": 510 }, { "epoch": 0.0770739064856712, "grad_norm": 2.298360824584961, "learning_rate": 9.902572499239543e-05, "loss": 1.6422, "step": 511 }, { "epoch": 0.07722473604826546, "grad_norm": 2.1006827354431152, "learning_rate": 9.902099377838425e-05, "loss": 1.433, "step": 512 }, { "epoch": 0.07737556561085973, "grad_norm": 1.9711023569107056, "learning_rate": 9.901625121804112e-05, "loss": 1.506, "step": 513 }, { "epoch": 0.077526395173454, "grad_norm": 2.10446834564209, "learning_rate": 9.901149731246373e-05, "loss": 1.4356, "step": 514 }, { "epoch": 0.07767722473604827, "grad_norm": 1.772251844406128, "learning_rate": 9.900673206275241e-05, "loss": 1.0009, "step": 515 }, { "epoch": 0.07782805429864253, "grad_norm": 2.1258292198181152, "learning_rate": 9.900195547001014e-05, "loss": 1.7313, "step": 516 }, { "epoch": 0.0779788838612368, "grad_norm": 2.298868417739868, "learning_rate": 9.899716753534247e-05, "loss": 1.8089, "step": 517 }, { "epoch": 0.07812971342383107, "grad_norm": 2.0090651512145996, "learning_rate": 9.899236825985763e-05, "loss": 1.3601, "step": 518 }, { "epoch": 0.07828054298642534, "grad_norm": 2.0547776222229004, "learning_rate": 9.898755764466645e-05, "loss": 1.4232, "step": 519 }, { "epoch": 0.0784313725490196, "grad_norm": 1.9851412773132324, "learning_rate": 9.89827356908824e-05, "loss": 1.3847, "step": 520 }, { "epoch": 0.07858220211161387, "grad_norm": 2.354459762573242, "learning_rate": 9.897790239962155e-05, "loss": 1.9653, "step": 521 }, { "epoch": 0.07873303167420814, "grad_norm": 1.9165771007537842, "learning_rate": 9.897305777200258e-05, "loss": 1.3331, "step": 522 }, { "epoch": 0.07888386123680241, "grad_norm": 2.1581804752349854, "learning_rate": 9.896820180914687e-05, "loss": 1.3693, "step": 523 }, { "epoch": 0.07903469079939668, "grad_norm": 2.250979423522949, "learning_rate": 9.896333451217837e-05, "loss": 1.5235, "step": 524 }, { "epoch": 0.07918552036199095, "grad_norm": 1.861362099647522, "learning_rate": 9.895845588222361e-05, "loss": 1.3256, "step": 525 }, { "epoch": 0.07933634992458521, "grad_norm": 1.8232605457305908, "learning_rate": 9.895356592041185e-05, "loss": 1.2202, "step": 526 }, { "epoch": 0.07948717948717948, "grad_norm": 1.7856285572052002, "learning_rate": 9.894866462787486e-05, "loss": 1.3716, "step": 527 }, { "epoch": 0.07963800904977375, "grad_norm": 2.1264638900756836, "learning_rate": 9.894375200574712e-05, "loss": 1.4825, "step": 528 }, { "epoch": 0.07978883861236802, "grad_norm": 2.1568119525909424, "learning_rate": 9.893882805516572e-05, "loss": 1.6922, "step": 529 }, { "epoch": 0.07993966817496229, "grad_norm": 2.0701980590820312, "learning_rate": 9.89338927772703e-05, "loss": 1.483, "step": 530 }, { "epoch": 0.08009049773755657, "grad_norm": 2.0261435508728027, "learning_rate": 9.892894617320318e-05, "loss": 1.1977, "step": 531 }, { "epoch": 0.08024132730015084, "grad_norm": 1.6572974920272827, "learning_rate": 9.892398824410934e-05, "loss": 0.9564, "step": 532 }, { "epoch": 0.0803921568627451, "grad_norm": 2.072892904281616, "learning_rate": 9.891901899113628e-05, "loss": 1.3395, "step": 533 }, { "epoch": 0.08054298642533937, "grad_norm": 1.9855315685272217, "learning_rate": 9.891403841543421e-05, "loss": 1.3476, "step": 534 }, { "epoch": 0.08069381598793364, "grad_norm": 2.231200933456421, "learning_rate": 9.890904651815592e-05, "loss": 1.536, "step": 535 }, { "epoch": 0.08084464555052791, "grad_norm": 2.0439870357513428, "learning_rate": 9.890404330045682e-05, "loss": 1.2836, "step": 536 }, { "epoch": 0.08099547511312218, "grad_norm": 2.008301019668579, "learning_rate": 9.889902876349497e-05, "loss": 1.3125, "step": 537 }, { "epoch": 0.08114630467571644, "grad_norm": 2.232987880706787, "learning_rate": 9.889400290843099e-05, "loss": 1.6771, "step": 538 }, { "epoch": 0.08129713423831071, "grad_norm": 2.2124712467193604, "learning_rate": 9.888896573642818e-05, "loss": 1.6153, "step": 539 }, { "epoch": 0.08144796380090498, "grad_norm": 2.120187520980835, "learning_rate": 9.888391724865246e-05, "loss": 1.3752, "step": 540 }, { "epoch": 0.08159879336349925, "grad_norm": 2.1582772731781006, "learning_rate": 9.88788574462723e-05, "loss": 1.3491, "step": 541 }, { "epoch": 0.08174962292609352, "grad_norm": 2.3547956943511963, "learning_rate": 9.887378633045888e-05, "loss": 1.5045, "step": 542 }, { "epoch": 0.08190045248868778, "grad_norm": 2.2569007873535156, "learning_rate": 9.886870390238591e-05, "loss": 1.5001, "step": 543 }, { "epoch": 0.08205128205128205, "grad_norm": 2.370293140411377, "learning_rate": 9.886361016322978e-05, "loss": 1.2656, "step": 544 }, { "epoch": 0.08220211161387632, "grad_norm": 2.271300792694092, "learning_rate": 9.88585051141695e-05, "loss": 1.5054, "step": 545 }, { "epoch": 0.08235294117647059, "grad_norm": 2.484107732772827, "learning_rate": 9.885338875638666e-05, "loss": 1.4349, "step": 546 }, { "epoch": 0.08250377073906486, "grad_norm": 4.551506042480469, "learning_rate": 9.884826109106546e-05, "loss": 2.0325, "step": 547 }, { "epoch": 0.08265460030165912, "grad_norm": 3.2043097019195557, "learning_rate": 9.884312211939279e-05, "loss": 1.5591, "step": 548 }, { "epoch": 0.08280542986425339, "grad_norm": 2.1631574630737305, "learning_rate": 9.883797184255808e-05, "loss": 1.3747, "step": 549 }, { "epoch": 0.08295625942684766, "grad_norm": 1.9252657890319824, "learning_rate": 9.883281026175342e-05, "loss": 0.8518, "step": 550 }, { "epoch": 0.08310708898944193, "grad_norm": 3.4278464317321777, "learning_rate": 9.882763737817348e-05, "loss": 1.8638, "step": 551 }, { "epoch": 0.0832579185520362, "grad_norm": 2.8765599727630615, "learning_rate": 9.88224531930156e-05, "loss": 2.2347, "step": 552 }, { "epoch": 0.08340874811463046, "grad_norm": 2.188877820968628, "learning_rate": 9.881725770747968e-05, "loss": 1.6593, "step": 553 }, { "epoch": 0.08355957767722473, "grad_norm": 2.1948931217193604, "learning_rate": 9.881205092276826e-05, "loss": 1.6455, "step": 554 }, { "epoch": 0.083710407239819, "grad_norm": 2.0407495498657227, "learning_rate": 9.880683284008652e-05, "loss": 1.6019, "step": 555 }, { "epoch": 0.08386123680241327, "grad_norm": 2.706714630126953, "learning_rate": 9.88016034606422e-05, "loss": 2.1843, "step": 556 }, { "epoch": 0.08401206636500755, "grad_norm": 1.911562442779541, "learning_rate": 9.879636278564573e-05, "loss": 1.2663, "step": 557 }, { "epoch": 0.08416289592760182, "grad_norm": 2.1969919204711914, "learning_rate": 9.879111081631004e-05, "loss": 1.5868, "step": 558 }, { "epoch": 0.08431372549019608, "grad_norm": 2.426746129989624, "learning_rate": 9.87858475538508e-05, "loss": 1.4926, "step": 559 }, { "epoch": 0.08446455505279035, "grad_norm": 2.4105238914489746, "learning_rate": 9.878057299948621e-05, "loss": 1.8006, "step": 560 }, { "epoch": 0.08461538461538462, "grad_norm": 2.113612651824951, "learning_rate": 9.877528715443714e-05, "loss": 1.7256, "step": 561 }, { "epoch": 0.08476621417797889, "grad_norm": 1.8413364887237549, "learning_rate": 9.876999001992699e-05, "loss": 1.3626, "step": 562 }, { "epoch": 0.08491704374057316, "grad_norm": 2.0110812187194824, "learning_rate": 9.876468159718188e-05, "loss": 1.5043, "step": 563 }, { "epoch": 0.08506787330316742, "grad_norm": 1.9871180057525635, "learning_rate": 9.875936188743048e-05, "loss": 1.3572, "step": 564 }, { "epoch": 0.08521870286576169, "grad_norm": 1.9818801879882812, "learning_rate": 9.875403089190408e-05, "loss": 1.3328, "step": 565 }, { "epoch": 0.08536953242835596, "grad_norm": 2.07511830329895, "learning_rate": 9.874868861183658e-05, "loss": 1.3518, "step": 566 }, { "epoch": 0.08552036199095023, "grad_norm": 2.142296552658081, "learning_rate": 9.874333504846448e-05, "loss": 1.7494, "step": 567 }, { "epoch": 0.0856711915535445, "grad_norm": 1.7099543809890747, "learning_rate": 9.873797020302693e-05, "loss": 1.057, "step": 568 }, { "epoch": 0.08582202111613876, "grad_norm": 2.287100076675415, "learning_rate": 9.873259407676568e-05, "loss": 1.7002, "step": 569 }, { "epoch": 0.08597285067873303, "grad_norm": 1.9302998781204224, "learning_rate": 9.872720667092505e-05, "loss": 1.3704, "step": 570 }, { "epoch": 0.0861236802413273, "grad_norm": 2.389709711074829, "learning_rate": 9.872180798675201e-05, "loss": 1.8739, "step": 571 }, { "epoch": 0.08627450980392157, "grad_norm": 2.147240400314331, "learning_rate": 9.871639802549616e-05, "loss": 1.4211, "step": 572 }, { "epoch": 0.08642533936651584, "grad_norm": 2.212937593460083, "learning_rate": 9.871097678840966e-05, "loss": 1.5427, "step": 573 }, { "epoch": 0.0865761689291101, "grad_norm": 2.3460443019866943, "learning_rate": 9.87055442767473e-05, "loss": 1.786, "step": 574 }, { "epoch": 0.08672699849170437, "grad_norm": 2.4007625579833984, "learning_rate": 9.870010049176648e-05, "loss": 1.3728, "step": 575 }, { "epoch": 0.08687782805429864, "grad_norm": 2.0630557537078857, "learning_rate": 9.869464543472722e-05, "loss": 1.3995, "step": 576 }, { "epoch": 0.08702865761689291, "grad_norm": 2.2057836055755615, "learning_rate": 9.868917910689214e-05, "loss": 1.3055, "step": 577 }, { "epoch": 0.08717948717948718, "grad_norm": 2.3438024520874023, "learning_rate": 9.868370150952646e-05, "loss": 1.878, "step": 578 }, { "epoch": 0.08733031674208144, "grad_norm": 2.0782670974731445, "learning_rate": 9.867821264389802e-05, "loss": 1.4796, "step": 579 }, { "epoch": 0.08748114630467571, "grad_norm": 2.2018582820892334, "learning_rate": 9.867271251127728e-05, "loss": 1.5518, "step": 580 }, { "epoch": 0.08763197586726998, "grad_norm": 2.109649658203125, "learning_rate": 9.866720111293728e-05, "loss": 1.497, "step": 581 }, { "epoch": 0.08778280542986425, "grad_norm": 2.3510866165161133, "learning_rate": 9.866167845015367e-05, "loss": 2.0501, "step": 582 }, { "epoch": 0.08793363499245853, "grad_norm": 1.9672144651412964, "learning_rate": 9.865614452420474e-05, "loss": 1.2701, "step": 583 }, { "epoch": 0.0880844645550528, "grad_norm": 2.0804853439331055, "learning_rate": 9.865059933637136e-05, "loss": 1.5648, "step": 584 }, { "epoch": 0.08823529411764706, "grad_norm": 2.2581827640533447, "learning_rate": 9.864504288793699e-05, "loss": 1.7468, "step": 585 }, { "epoch": 0.08838612368024133, "grad_norm": 1.677463173866272, "learning_rate": 9.863947518018773e-05, "loss": 1.1212, "step": 586 }, { "epoch": 0.0885369532428356, "grad_norm": 2.323305130004883, "learning_rate": 9.86338962144123e-05, "loss": 1.6776, "step": 587 }, { "epoch": 0.08868778280542987, "grad_norm": 1.656463861465454, "learning_rate": 9.862830599190196e-05, "loss": 1.0769, "step": 588 }, { "epoch": 0.08883861236802414, "grad_norm": 2.189354658126831, "learning_rate": 9.862270451395064e-05, "loss": 1.5036, "step": 589 }, { "epoch": 0.0889894419306184, "grad_norm": 2.1354801654815674, "learning_rate": 9.861709178185484e-05, "loss": 1.3051, "step": 590 }, { "epoch": 0.08914027149321267, "grad_norm": 2.028475522994995, "learning_rate": 9.861146779691368e-05, "loss": 1.2316, "step": 591 }, { "epoch": 0.08929110105580694, "grad_norm": 2.2836828231811523, "learning_rate": 9.860583256042888e-05, "loss": 1.4728, "step": 592 }, { "epoch": 0.08944193061840121, "grad_norm": 2.5102667808532715, "learning_rate": 9.860018607370477e-05, "loss": 1.5306, "step": 593 }, { "epoch": 0.08959276018099548, "grad_norm": 2.669907331466675, "learning_rate": 9.859452833804826e-05, "loss": 1.6984, "step": 594 }, { "epoch": 0.08974358974358974, "grad_norm": 2.2926864624023438, "learning_rate": 9.858885935476889e-05, "loss": 1.2505, "step": 595 }, { "epoch": 0.08989441930618401, "grad_norm": 2.1844966411590576, "learning_rate": 9.858317912517882e-05, "loss": 1.0822, "step": 596 }, { "epoch": 0.09004524886877828, "grad_norm": 2.541600465774536, "learning_rate": 9.857748765059273e-05, "loss": 1.1185, "step": 597 }, { "epoch": 0.09019607843137255, "grad_norm": 2.485133647918701, "learning_rate": 9.857178493232804e-05, "loss": 1.1744, "step": 598 }, { "epoch": 0.09034690799396682, "grad_norm": 2.3249192237854004, "learning_rate": 9.856607097170462e-05, "loss": 1.4469, "step": 599 }, { "epoch": 0.09049773755656108, "grad_norm": 2.0013108253479004, "learning_rate": 9.856034577004505e-05, "loss": 1.055, "step": 600 }, { "epoch": 0.09064856711915535, "grad_norm": 2.5659003257751465, "learning_rate": 9.855460932867448e-05, "loss": 1.7103, "step": 601 }, { "epoch": 0.09079939668174962, "grad_norm": 1.9800317287445068, "learning_rate": 9.854886164892065e-05, "loss": 1.4324, "step": 602 }, { "epoch": 0.09095022624434389, "grad_norm": 2.181774377822876, "learning_rate": 9.854310273211391e-05, "loss": 1.5512, "step": 603 }, { "epoch": 0.09110105580693816, "grad_norm": 1.8020169734954834, "learning_rate": 9.853733257958721e-05, "loss": 1.3495, "step": 604 }, { "epoch": 0.09125188536953242, "grad_norm": 2.103862762451172, "learning_rate": 9.853155119267609e-05, "loss": 1.7367, "step": 605 }, { "epoch": 0.09140271493212669, "grad_norm": 2.162837028503418, "learning_rate": 9.852575857271874e-05, "loss": 2.0539, "step": 606 }, { "epoch": 0.09155354449472096, "grad_norm": 1.9912182092666626, "learning_rate": 9.851995472105588e-05, "loss": 1.4467, "step": 607 }, { "epoch": 0.09170437405731523, "grad_norm": 2.04294490814209, "learning_rate": 9.851413963903086e-05, "loss": 1.7088, "step": 608 }, { "epoch": 0.09185520361990951, "grad_norm": 2.061035633087158, "learning_rate": 9.850831332798967e-05, "loss": 1.7087, "step": 609 }, { "epoch": 0.09200603318250378, "grad_norm": 1.9700995683670044, "learning_rate": 9.85024757892808e-05, "loss": 1.6341, "step": 610 }, { "epoch": 0.09215686274509804, "grad_norm": 1.7970138788223267, "learning_rate": 9.849662702425543e-05, "loss": 1.3412, "step": 611 }, { "epoch": 0.09230769230769231, "grad_norm": 1.865281105041504, "learning_rate": 9.849076703426732e-05, "loss": 1.532, "step": 612 }, { "epoch": 0.09245852187028658, "grad_norm": 1.8525031805038452, "learning_rate": 9.84848958206728e-05, "loss": 1.3598, "step": 613 }, { "epoch": 0.09260935143288085, "grad_norm": 1.8883912563323975, "learning_rate": 9.84790133848308e-05, "loss": 1.2958, "step": 614 }, { "epoch": 0.09276018099547512, "grad_norm": 2.0695533752441406, "learning_rate": 9.84731197281029e-05, "loss": 1.4082, "step": 615 }, { "epoch": 0.09291101055806938, "grad_norm": 2.1228621006011963, "learning_rate": 9.846721485185319e-05, "loss": 1.4784, "step": 616 }, { "epoch": 0.09306184012066365, "grad_norm": 2.210078239440918, "learning_rate": 9.846129875744845e-05, "loss": 1.9423, "step": 617 }, { "epoch": 0.09321266968325792, "grad_norm": 2.4706459045410156, "learning_rate": 9.845537144625798e-05, "loss": 1.5875, "step": 618 }, { "epoch": 0.09336349924585219, "grad_norm": 1.9344203472137451, "learning_rate": 9.84494329196537e-05, "loss": 1.4311, "step": 619 }, { "epoch": 0.09351432880844646, "grad_norm": 2.179300308227539, "learning_rate": 9.844348317901017e-05, "loss": 1.8064, "step": 620 }, { "epoch": 0.09366515837104072, "grad_norm": 1.943617582321167, "learning_rate": 9.843752222570449e-05, "loss": 1.367, "step": 621 }, { "epoch": 0.09381598793363499, "grad_norm": 2.2165043354034424, "learning_rate": 9.843155006111635e-05, "loss": 1.4549, "step": 622 }, { "epoch": 0.09396681749622926, "grad_norm": 2.149899959564209, "learning_rate": 9.842556668662808e-05, "loss": 1.8619, "step": 623 }, { "epoch": 0.09411764705882353, "grad_norm": 2.0328519344329834, "learning_rate": 9.84195721036246e-05, "loss": 1.3389, "step": 624 }, { "epoch": 0.0942684766214178, "grad_norm": 2.318492889404297, "learning_rate": 9.84135663134934e-05, "loss": 1.6371, "step": 625 }, { "epoch": 0.09441930618401206, "grad_norm": 2.2614850997924805, "learning_rate": 9.840754931762454e-05, "loss": 1.2314, "step": 626 }, { "epoch": 0.09457013574660633, "grad_norm": 2.0057296752929688, "learning_rate": 9.840152111741075e-05, "loss": 1.1824, "step": 627 }, { "epoch": 0.0947209653092006, "grad_norm": 2.076726198196411, "learning_rate": 9.839548171424729e-05, "loss": 1.3436, "step": 628 }, { "epoch": 0.09487179487179487, "grad_norm": 2.3527371883392334, "learning_rate": 9.838943110953201e-05, "loss": 1.5851, "step": 629 }, { "epoch": 0.09502262443438914, "grad_norm": 2.003953218460083, "learning_rate": 9.83833693046654e-05, "loss": 1.4849, "step": 630 }, { "epoch": 0.0951734539969834, "grad_norm": 2.143641948699951, "learning_rate": 9.837729630105052e-05, "loss": 1.496, "step": 631 }, { "epoch": 0.09532428355957767, "grad_norm": 2.2946596145629883, "learning_rate": 9.837121210009302e-05, "loss": 1.3702, "step": 632 }, { "epoch": 0.09547511312217194, "grad_norm": 2.6816534996032715, "learning_rate": 9.836511670320112e-05, "loss": 1.6296, "step": 633 }, { "epoch": 0.09562594268476621, "grad_norm": 1.988086462020874, "learning_rate": 9.835901011178567e-05, "loss": 1.3161, "step": 634 }, { "epoch": 0.09577677224736049, "grad_norm": 2.058480739593506, "learning_rate": 9.835289232726009e-05, "loss": 1.4348, "step": 635 }, { "epoch": 0.09592760180995476, "grad_norm": 2.2156879901885986, "learning_rate": 9.834676335104039e-05, "loss": 1.715, "step": 636 }, { "epoch": 0.09607843137254903, "grad_norm": 1.9767754077911377, "learning_rate": 9.834062318454518e-05, "loss": 1.1158, "step": 637 }, { "epoch": 0.0962292609351433, "grad_norm": 1.7689787149429321, "learning_rate": 9.833447182919565e-05, "loss": 1.3351, "step": 638 }, { "epoch": 0.09638009049773756, "grad_norm": 2.280195713043213, "learning_rate": 9.832830928641558e-05, "loss": 1.6247, "step": 639 }, { "epoch": 0.09653092006033183, "grad_norm": 2.018157482147217, "learning_rate": 9.832213555763136e-05, "loss": 1.4022, "step": 640 }, { "epoch": 0.0966817496229261, "grad_norm": 1.968766689300537, "learning_rate": 9.831595064427193e-05, "loss": 1.3849, "step": 641 }, { "epoch": 0.09683257918552036, "grad_norm": 2.3065130710601807, "learning_rate": 9.830975454776885e-05, "loss": 1.4016, "step": 642 }, { "epoch": 0.09698340874811463, "grad_norm": 2.0207769870758057, "learning_rate": 9.830354726955629e-05, "loss": 1.2432, "step": 643 }, { "epoch": 0.0971342383107089, "grad_norm": 1.9676011800765991, "learning_rate": 9.829732881107092e-05, "loss": 1.2164, "step": 644 }, { "epoch": 0.09728506787330317, "grad_norm": 2.6048552989959717, "learning_rate": 9.82910991737521e-05, "loss": 1.5537, "step": 645 }, { "epoch": 0.09743589743589744, "grad_norm": 2.018747091293335, "learning_rate": 9.82848583590417e-05, "loss": 0.9388, "step": 646 }, { "epoch": 0.0975867269984917, "grad_norm": 2.9909493923187256, "learning_rate": 9.827860636838427e-05, "loss": 1.4277, "step": 647 }, { "epoch": 0.09773755656108597, "grad_norm": 2.240370988845825, "learning_rate": 9.827234320322682e-05, "loss": 1.4442, "step": 648 }, { "epoch": 0.09788838612368024, "grad_norm": 1.828084111213684, "learning_rate": 9.826606886501906e-05, "loss": 1.1156, "step": 649 }, { "epoch": 0.09803921568627451, "grad_norm": 2.1443729400634766, "learning_rate": 9.825978335521321e-05, "loss": 1.3978, "step": 650 }, { "epoch": 0.09819004524886878, "grad_norm": 2.4449050426483154, "learning_rate": 9.825348667526412e-05, "loss": 2.1165, "step": 651 }, { "epoch": 0.09834087481146304, "grad_norm": 2.0376052856445312, "learning_rate": 9.82471788266292e-05, "loss": 1.6367, "step": 652 }, { "epoch": 0.09849170437405731, "grad_norm": 1.6563533544540405, "learning_rate": 9.824085981076848e-05, "loss": 1.1249, "step": 653 }, { "epoch": 0.09864253393665158, "grad_norm": 1.8417364358901978, "learning_rate": 9.823452962914452e-05, "loss": 1.4868, "step": 654 }, { "epoch": 0.09879336349924585, "grad_norm": 1.7958792448043823, "learning_rate": 9.82281882832225e-05, "loss": 1.2791, "step": 655 }, { "epoch": 0.09894419306184012, "grad_norm": 1.9717044830322266, "learning_rate": 9.822183577447021e-05, "loss": 1.9364, "step": 656 }, { "epoch": 0.09909502262443438, "grad_norm": 1.937212347984314, "learning_rate": 9.821547210435797e-05, "loss": 1.4169, "step": 657 }, { "epoch": 0.09924585218702865, "grad_norm": 1.9123809337615967, "learning_rate": 9.820909727435871e-05, "loss": 1.4583, "step": 658 }, { "epoch": 0.09939668174962292, "grad_norm": 1.8934465646743774, "learning_rate": 9.820271128594794e-05, "loss": 1.4086, "step": 659 }, { "epoch": 0.09954751131221719, "grad_norm": 1.738917589187622, "learning_rate": 9.819631414060373e-05, "loss": 1.1535, "step": 660 }, { "epoch": 0.09969834087481147, "grad_norm": 1.7804843187332153, "learning_rate": 9.81899058398068e-05, "loss": 1.3678, "step": 661 }, { "epoch": 0.09984917043740574, "grad_norm": 2.058985471725464, "learning_rate": 9.818348638504036e-05, "loss": 1.7324, "step": 662 }, { "epoch": 0.1, "grad_norm": 2.009982109069824, "learning_rate": 9.817705577779028e-05, "loss": 1.8562, "step": 663 }, { "epoch": 0.10015082956259427, "grad_norm": 1.8494656085968018, "learning_rate": 9.817061401954496e-05, "loss": 1.3701, "step": 664 }, { "epoch": 0.10030165912518854, "grad_norm": 1.790815830230713, "learning_rate": 9.816416111179542e-05, "loss": 1.4324, "step": 665 }, { "epoch": 0.10045248868778281, "grad_norm": 2.056887149810791, "learning_rate": 9.815769705603521e-05, "loss": 1.396, "step": 666 }, { "epoch": 0.10060331825037708, "grad_norm": 1.693608045578003, "learning_rate": 9.815122185376053e-05, "loss": 1.2947, "step": 667 }, { "epoch": 0.10075414781297135, "grad_norm": 1.7255276441574097, "learning_rate": 9.81447355064701e-05, "loss": 1.206, "step": 668 }, { "epoch": 0.10090497737556561, "grad_norm": 1.6875814199447632, "learning_rate": 9.813823801566524e-05, "loss": 1.1127, "step": 669 }, { "epoch": 0.10105580693815988, "grad_norm": 1.922959804534912, "learning_rate": 9.813172938284987e-05, "loss": 1.3411, "step": 670 }, { "epoch": 0.10120663650075415, "grad_norm": 2.0158820152282715, "learning_rate": 9.812520960953043e-05, "loss": 1.345, "step": 671 }, { "epoch": 0.10135746606334842, "grad_norm": 1.8558765649795532, "learning_rate": 9.811867869721602e-05, "loss": 1.1339, "step": 672 }, { "epoch": 0.10150829562594268, "grad_norm": 1.8779488801956177, "learning_rate": 9.811213664741825e-05, "loss": 1.2642, "step": 673 }, { "epoch": 0.10165912518853695, "grad_norm": 2.013618230819702, "learning_rate": 9.810558346165135e-05, "loss": 1.3127, "step": 674 }, { "epoch": 0.10180995475113122, "grad_norm": 1.982958436012268, "learning_rate": 9.809901914143209e-05, "loss": 1.199, "step": 675 }, { "epoch": 0.10196078431372549, "grad_norm": 1.7226979732513428, "learning_rate": 9.809244368827985e-05, "loss": 1.0038, "step": 676 }, { "epoch": 0.10211161387631976, "grad_norm": 2.268807888031006, "learning_rate": 9.808585710371658e-05, "loss": 1.5968, "step": 677 }, { "epoch": 0.10226244343891402, "grad_norm": 2.3648650646209717, "learning_rate": 9.807925938926681e-05, "loss": 1.6752, "step": 678 }, { "epoch": 0.10241327300150829, "grad_norm": 2.616579294204712, "learning_rate": 9.807265054645761e-05, "loss": 2.0672, "step": 679 }, { "epoch": 0.10256410256410256, "grad_norm": 2.1461124420166016, "learning_rate": 9.806603057681868e-05, "loss": 1.4793, "step": 680 }, { "epoch": 0.10271493212669683, "grad_norm": 2.1779255867004395, "learning_rate": 9.805939948188226e-05, "loss": 1.5446, "step": 681 }, { "epoch": 0.1028657616892911, "grad_norm": 2.138575792312622, "learning_rate": 9.805275726318317e-05, "loss": 1.3107, "step": 682 }, { "epoch": 0.10301659125188536, "grad_norm": 2.21596097946167, "learning_rate": 9.804610392225881e-05, "loss": 1.6852, "step": 683 }, { "epoch": 0.10316742081447963, "grad_norm": 2.329537868499756, "learning_rate": 9.803943946064915e-05, "loss": 1.7629, "step": 684 }, { "epoch": 0.1033182503770739, "grad_norm": 2.1015031337738037, "learning_rate": 9.803276387989675e-05, "loss": 1.6391, "step": 685 }, { "epoch": 0.10346907993966817, "grad_norm": 2.160978317260742, "learning_rate": 9.802607718154673e-05, "loss": 1.3196, "step": 686 }, { "epoch": 0.10361990950226245, "grad_norm": 1.8772436380386353, "learning_rate": 9.801937936714678e-05, "loss": 1.14, "step": 687 }, { "epoch": 0.10377073906485672, "grad_norm": 2.068040132522583, "learning_rate": 9.801267043824715e-05, "loss": 1.3262, "step": 688 }, { "epoch": 0.10392156862745099, "grad_norm": 1.894688367843628, "learning_rate": 9.80059503964007e-05, "loss": 1.1724, "step": 689 }, { "epoch": 0.10407239819004525, "grad_norm": 1.9089521169662476, "learning_rate": 9.799921924316283e-05, "loss": 0.8657, "step": 690 }, { "epoch": 0.10422322775263952, "grad_norm": 2.3364973068237305, "learning_rate": 9.799247698009153e-05, "loss": 1.5973, "step": 691 }, { "epoch": 0.10437405731523379, "grad_norm": 2.5619301795959473, "learning_rate": 9.798572360874736e-05, "loss": 1.3442, "step": 692 }, { "epoch": 0.10452488687782806, "grad_norm": 2.5922067165374756, "learning_rate": 9.797895913069344e-05, "loss": 1.459, "step": 693 }, { "epoch": 0.10467571644042233, "grad_norm": 2.5765655040740967, "learning_rate": 9.797218354749547e-05, "loss": 1.3734, "step": 694 }, { "epoch": 0.1048265460030166, "grad_norm": 1.9034115076065063, "learning_rate": 9.796539686072171e-05, "loss": 0.9481, "step": 695 }, { "epoch": 0.10497737556561086, "grad_norm": 2.9135286808013916, "learning_rate": 9.795859907194299e-05, "loss": 1.4942, "step": 696 }, { "epoch": 0.10512820512820513, "grad_norm": 2.65156888961792, "learning_rate": 9.795179018273273e-05, "loss": 1.6737, "step": 697 }, { "epoch": 0.1052790346907994, "grad_norm": 2.326845645904541, "learning_rate": 9.794497019466692e-05, "loss": 1.247, "step": 698 }, { "epoch": 0.10542986425339367, "grad_norm": 2.5117335319519043, "learning_rate": 9.793813910932407e-05, "loss": 1.7415, "step": 699 }, { "epoch": 0.10558069381598793, "grad_norm": 1.669466495513916, "learning_rate": 9.793129692828533e-05, "loss": 0.7584, "step": 700 }, { "epoch": 0.1057315233785822, "grad_norm": 2.915030002593994, "learning_rate": 9.792444365313435e-05, "loss": 1.9866, "step": 701 }, { "epoch": 0.10588235294117647, "grad_norm": 2.4733641147613525, "learning_rate": 9.791757928545742e-05, "loss": 1.8836, "step": 702 }, { "epoch": 0.10603318250377074, "grad_norm": 2.2590954303741455, "learning_rate": 9.791070382684331e-05, "loss": 1.7923, "step": 703 }, { "epoch": 0.106184012066365, "grad_norm": 2.2017927169799805, "learning_rate": 9.790381727888342e-05, "loss": 1.5241, "step": 704 }, { "epoch": 0.10633484162895927, "grad_norm": 1.8193823099136353, "learning_rate": 9.789691964317171e-05, "loss": 1.2317, "step": 705 }, { "epoch": 0.10648567119155354, "grad_norm": 1.6650581359863281, "learning_rate": 9.789001092130471e-05, "loss": 1.0624, "step": 706 }, { "epoch": 0.10663650075414781, "grad_norm": 1.8699396848678589, "learning_rate": 9.788309111488148e-05, "loss": 1.4744, "step": 707 }, { "epoch": 0.10678733031674208, "grad_norm": 2.26715350151062, "learning_rate": 9.787616022550366e-05, "loss": 1.7118, "step": 708 }, { "epoch": 0.10693815987933634, "grad_norm": 2.3962268829345703, "learning_rate": 9.786921825477551e-05, "loss": 2.0528, "step": 709 }, { "epoch": 0.10708898944193061, "grad_norm": 1.8923518657684326, "learning_rate": 9.786226520430375e-05, "loss": 1.4254, "step": 710 }, { "epoch": 0.10723981900452488, "grad_norm": 1.8281782865524292, "learning_rate": 9.785530107569777e-05, "loss": 1.299, "step": 711 }, { "epoch": 0.10739064856711915, "grad_norm": 2.491332769393921, "learning_rate": 9.784832587056945e-05, "loss": 2.2497, "step": 712 }, { "epoch": 0.10754147812971343, "grad_norm": 1.830319881439209, "learning_rate": 9.784133959053328e-05, "loss": 1.2758, "step": 713 }, { "epoch": 0.1076923076923077, "grad_norm": 1.996274471282959, "learning_rate": 9.783434223720629e-05, "loss": 1.3187, "step": 714 }, { "epoch": 0.10784313725490197, "grad_norm": 2.3327176570892334, "learning_rate": 9.782733381220807e-05, "loss": 1.8242, "step": 715 }, { "epoch": 0.10799396681749623, "grad_norm": 1.8498339653015137, "learning_rate": 9.782031431716078e-05, "loss": 1.3179, "step": 716 }, { "epoch": 0.1081447963800905, "grad_norm": 1.888105869293213, "learning_rate": 9.781328375368914e-05, "loss": 1.4949, "step": 717 }, { "epoch": 0.10829562594268477, "grad_norm": 1.7863876819610596, "learning_rate": 9.780624212342047e-05, "loss": 1.2038, "step": 718 }, { "epoch": 0.10844645550527904, "grad_norm": 2.0943331718444824, "learning_rate": 9.779918942798456e-05, "loss": 1.5434, "step": 719 }, { "epoch": 0.1085972850678733, "grad_norm": 1.9994456768035889, "learning_rate": 9.779212566901385e-05, "loss": 1.4947, "step": 720 }, { "epoch": 0.10874811463046757, "grad_norm": 1.9945685863494873, "learning_rate": 9.778505084814332e-05, "loss": 1.2882, "step": 721 }, { "epoch": 0.10889894419306184, "grad_norm": 1.958489179611206, "learning_rate": 9.777796496701047e-05, "loss": 1.3583, "step": 722 }, { "epoch": 0.10904977375565611, "grad_norm": 1.9830950498580933, "learning_rate": 9.777086802725541e-05, "loss": 1.2309, "step": 723 }, { "epoch": 0.10920060331825038, "grad_norm": 2.1284217834472656, "learning_rate": 9.776376003052077e-05, "loss": 1.4011, "step": 724 }, { "epoch": 0.10935143288084465, "grad_norm": 2.145578384399414, "learning_rate": 9.775664097845176e-05, "loss": 1.2868, "step": 725 }, { "epoch": 0.10950226244343891, "grad_norm": 1.8859941959381104, "learning_rate": 9.774951087269615e-05, "loss": 1.2189, "step": 726 }, { "epoch": 0.10965309200603318, "grad_norm": 2.069518804550171, "learning_rate": 9.774236971490428e-05, "loss": 1.5654, "step": 727 }, { "epoch": 0.10980392156862745, "grad_norm": 1.8040270805358887, "learning_rate": 9.7735217506729e-05, "loss": 1.0688, "step": 728 }, { "epoch": 0.10995475113122172, "grad_norm": 2.1686718463897705, "learning_rate": 9.772805424982578e-05, "loss": 1.4408, "step": 729 }, { "epoch": 0.11010558069381599, "grad_norm": 2.242349863052368, "learning_rate": 9.772087994585261e-05, "loss": 1.3567, "step": 730 }, { "epoch": 0.11025641025641025, "grad_norm": 2.446911096572876, "learning_rate": 9.771369459647004e-05, "loss": 1.7169, "step": 731 }, { "epoch": 0.11040723981900452, "grad_norm": 1.9658212661743164, "learning_rate": 9.770649820334117e-05, "loss": 1.2972, "step": 732 }, { "epoch": 0.11055806938159879, "grad_norm": 2.190000295639038, "learning_rate": 9.769929076813169e-05, "loss": 1.244, "step": 733 }, { "epoch": 0.11070889894419306, "grad_norm": 2.2551960945129395, "learning_rate": 9.76920722925098e-05, "loss": 1.43, "step": 734 }, { "epoch": 0.11085972850678733, "grad_norm": 1.9090838432312012, "learning_rate": 9.768484277814631e-05, "loss": 1.229, "step": 735 }, { "epoch": 0.11101055806938159, "grad_norm": 2.1648261547088623, "learning_rate": 9.767760222671452e-05, "loss": 1.2593, "step": 736 }, { "epoch": 0.11116138763197586, "grad_norm": 2.0040831565856934, "learning_rate": 9.767035063989034e-05, "loss": 1.4684, "step": 737 }, { "epoch": 0.11131221719457013, "grad_norm": 1.9773333072662354, "learning_rate": 9.76630880193522e-05, "loss": 1.3125, "step": 738 }, { "epoch": 0.11146304675716441, "grad_norm": 1.898787498474121, "learning_rate": 9.765581436678111e-05, "loss": 1.1586, "step": 739 }, { "epoch": 0.11161387631975868, "grad_norm": 2.3469107151031494, "learning_rate": 9.76485296838606e-05, "loss": 1.5807, "step": 740 }, { "epoch": 0.11176470588235295, "grad_norm": 2.1334564685821533, "learning_rate": 9.764123397227678e-05, "loss": 1.382, "step": 741 }, { "epoch": 0.11191553544494721, "grad_norm": 2.049365282058716, "learning_rate": 9.763392723371832e-05, "loss": 1.1462, "step": 742 }, { "epoch": 0.11206636500754148, "grad_norm": 2.593294382095337, "learning_rate": 9.762660946987641e-05, "loss": 1.7628, "step": 743 }, { "epoch": 0.11221719457013575, "grad_norm": 2.541346311569214, "learning_rate": 9.761928068244482e-05, "loss": 1.3405, "step": 744 }, { "epoch": 0.11236802413273002, "grad_norm": 2.2519469261169434, "learning_rate": 9.761194087311985e-05, "loss": 1.4051, "step": 745 }, { "epoch": 0.11251885369532429, "grad_norm": 2.3047966957092285, "learning_rate": 9.760459004360036e-05, "loss": 1.3947, "step": 746 }, { "epoch": 0.11266968325791855, "grad_norm": 2.1622793674468994, "learning_rate": 9.759722819558779e-05, "loss": 1.2649, "step": 747 }, { "epoch": 0.11282051282051282, "grad_norm": 1.9681200981140137, "learning_rate": 9.758985533078609e-05, "loss": 1.2116, "step": 748 }, { "epoch": 0.11297134238310709, "grad_norm": 1.9276795387268066, "learning_rate": 9.758247145090173e-05, "loss": 1.1885, "step": 749 }, { "epoch": 0.11312217194570136, "grad_norm": 2.049804210662842, "learning_rate": 9.757507655764385e-05, "loss": 1.1154, "step": 750 }, { "epoch": 0.11327300150829563, "grad_norm": 2.4763283729553223, "learning_rate": 9.756767065272401e-05, "loss": 1.7269, "step": 751 }, { "epoch": 0.1134238310708899, "grad_norm": 2.167797803878784, "learning_rate": 9.756025373785639e-05, "loss": 1.8723, "step": 752 }, { "epoch": 0.11357466063348416, "grad_norm": 1.9352751970291138, "learning_rate": 9.755282581475769e-05, "loss": 1.4467, "step": 753 }, { "epoch": 0.11372549019607843, "grad_norm": 2.0437910556793213, "learning_rate": 9.754538688514716e-05, "loss": 1.8025, "step": 754 }, { "epoch": 0.1138763197586727, "grad_norm": 2.1338541507720947, "learning_rate": 9.753793695074663e-05, "loss": 1.617, "step": 755 }, { "epoch": 0.11402714932126697, "grad_norm": 1.906224012374878, "learning_rate": 9.753047601328041e-05, "loss": 1.631, "step": 756 }, { "epoch": 0.11417797888386123, "grad_norm": 1.8936704397201538, "learning_rate": 9.752300407447544e-05, "loss": 1.5337, "step": 757 }, { "epoch": 0.1143288084464555, "grad_norm": 2.0061473846435547, "learning_rate": 9.751552113606115e-05, "loss": 1.7083, "step": 758 }, { "epoch": 0.11447963800904977, "grad_norm": 1.9538122415542603, "learning_rate": 9.750802719976952e-05, "loss": 1.6795, "step": 759 }, { "epoch": 0.11463046757164404, "grad_norm": 1.8154133558273315, "learning_rate": 9.75005222673351e-05, "loss": 1.1303, "step": 760 }, { "epoch": 0.1147812971342383, "grad_norm": 2.118807077407837, "learning_rate": 9.749300634049498e-05, "loss": 1.7623, "step": 761 }, { "epoch": 0.11493212669683257, "grad_norm": 1.510846734046936, "learning_rate": 9.748547942098875e-05, "loss": 1.1512, "step": 762 }, { "epoch": 0.11508295625942684, "grad_norm": 1.9070452451705933, "learning_rate": 9.747794151055862e-05, "loss": 1.3845, "step": 763 }, { "epoch": 0.11523378582202111, "grad_norm": 1.9719150066375732, "learning_rate": 9.747039261094929e-05, "loss": 1.5365, "step": 764 }, { "epoch": 0.11538461538461539, "grad_norm": 1.77341628074646, "learning_rate": 9.7462832723908e-05, "loss": 1.3438, "step": 765 }, { "epoch": 0.11553544494720966, "grad_norm": 2.1729040145874023, "learning_rate": 9.745526185118458e-05, "loss": 1.5697, "step": 766 }, { "epoch": 0.11568627450980393, "grad_norm": 1.8950449228286743, "learning_rate": 9.744767999453135e-05, "loss": 1.2604, "step": 767 }, { "epoch": 0.1158371040723982, "grad_norm": 2.1599841117858887, "learning_rate": 9.744008715570322e-05, "loss": 1.6351, "step": 768 }, { "epoch": 0.11598793363499246, "grad_norm": 1.7477604150772095, "learning_rate": 9.74324833364576e-05, "loss": 1.1606, "step": 769 }, { "epoch": 0.11613876319758673, "grad_norm": 2.010547637939453, "learning_rate": 9.742486853855445e-05, "loss": 1.4331, "step": 770 }, { "epoch": 0.116289592760181, "grad_norm": 2.002239465713501, "learning_rate": 9.741724276375631e-05, "loss": 1.6287, "step": 771 }, { "epoch": 0.11644042232277527, "grad_norm": 2.2884528636932373, "learning_rate": 9.740960601382822e-05, "loss": 1.5531, "step": 772 }, { "epoch": 0.11659125188536953, "grad_norm": 2.026289224624634, "learning_rate": 9.740195829053775e-05, "loss": 1.5058, "step": 773 }, { "epoch": 0.1167420814479638, "grad_norm": 2.07962965965271, "learning_rate": 9.739429959565507e-05, "loss": 1.388, "step": 774 }, { "epoch": 0.11689291101055807, "grad_norm": 1.8911181688308716, "learning_rate": 9.738662993095281e-05, "loss": 1.2673, "step": 775 }, { "epoch": 0.11704374057315234, "grad_norm": 2.2263457775115967, "learning_rate": 9.737894929820621e-05, "loss": 1.6304, "step": 776 }, { "epoch": 0.1171945701357466, "grad_norm": 2.040027618408203, "learning_rate": 9.737125769919303e-05, "loss": 1.4402, "step": 777 }, { "epoch": 0.11734539969834087, "grad_norm": 2.013152599334717, "learning_rate": 9.736355513569351e-05, "loss": 1.358, "step": 778 }, { "epoch": 0.11749622926093514, "grad_norm": 1.9204072952270508, "learning_rate": 9.735584160949049e-05, "loss": 1.2702, "step": 779 }, { "epoch": 0.11764705882352941, "grad_norm": 2.01863956451416, "learning_rate": 9.734811712236937e-05, "loss": 1.2179, "step": 780 }, { "epoch": 0.11779788838612368, "grad_norm": 2.3715755939483643, "learning_rate": 9.734038167611801e-05, "loss": 1.873, "step": 781 }, { "epoch": 0.11794871794871795, "grad_norm": 2.252241849899292, "learning_rate": 9.733263527252687e-05, "loss": 1.6852, "step": 782 }, { "epoch": 0.11809954751131221, "grad_norm": 2.102360963821411, "learning_rate": 9.73248779133889e-05, "loss": 1.2861, "step": 783 }, { "epoch": 0.11825037707390648, "grad_norm": 2.1654603481292725, "learning_rate": 9.731710960049962e-05, "loss": 1.4408, "step": 784 }, { "epoch": 0.11840120663650075, "grad_norm": 2.1258981227874756, "learning_rate": 9.730933033565706e-05, "loss": 1.5483, "step": 785 }, { "epoch": 0.11855203619909502, "grad_norm": 2.164949417114258, "learning_rate": 9.730154012066181e-05, "loss": 1.4501, "step": 786 }, { "epoch": 0.11870286576168929, "grad_norm": 2.321657657623291, "learning_rate": 9.7293738957317e-05, "loss": 1.7376, "step": 787 }, { "epoch": 0.11885369532428355, "grad_norm": 1.9069411754608154, "learning_rate": 9.728592684742825e-05, "loss": 1.3862, "step": 788 }, { "epoch": 0.11900452488687782, "grad_norm": 2.2773919105529785, "learning_rate": 9.727810379280375e-05, "loss": 1.5284, "step": 789 }, { "epoch": 0.1191553544494721, "grad_norm": 2.2799999713897705, "learning_rate": 9.72702697952542e-05, "loss": 1.3762, "step": 790 }, { "epoch": 0.11930618401206637, "grad_norm": 2.0497937202453613, "learning_rate": 9.726242485659288e-05, "loss": 1.249, "step": 791 }, { "epoch": 0.11945701357466064, "grad_norm": 1.843137502670288, "learning_rate": 9.725456897863552e-05, "loss": 1.0434, "step": 792 }, { "epoch": 0.11960784313725491, "grad_norm": 2.2501907348632812, "learning_rate": 9.724670216320048e-05, "loss": 1.3849, "step": 793 }, { "epoch": 0.11975867269984918, "grad_norm": 3.1499533653259277, "learning_rate": 9.723882441210856e-05, "loss": 2.3374, "step": 794 }, { "epoch": 0.11990950226244344, "grad_norm": 2.28291916847229, "learning_rate": 9.723093572718316e-05, "loss": 1.3148, "step": 795 }, { "epoch": 0.12006033182503771, "grad_norm": 2.070971727371216, "learning_rate": 9.72230361102502e-05, "loss": 1.1045, "step": 796 }, { "epoch": 0.12021116138763198, "grad_norm": 2.5492336750030518, "learning_rate": 9.721512556313805e-05, "loss": 1.2047, "step": 797 }, { "epoch": 0.12036199095022625, "grad_norm": 2.344729423522949, "learning_rate": 9.720720408767775e-05, "loss": 1.4672, "step": 798 }, { "epoch": 0.12051282051282051, "grad_norm": 2.1794044971466064, "learning_rate": 9.719927168570276e-05, "loss": 1.3597, "step": 799 }, { "epoch": 0.12066365007541478, "grad_norm": 1.9934016466140747, "learning_rate": 9.719132835904907e-05, "loss": 0.983, "step": 800 }, { "epoch": 0.12081447963800905, "grad_norm": 2.718465805053711, "learning_rate": 9.718337410955529e-05, "loss": 1.5304, "step": 801 }, { "epoch": 0.12096530920060332, "grad_norm": 2.1472959518432617, "learning_rate": 9.717540893906247e-05, "loss": 1.4895, "step": 802 }, { "epoch": 0.12111613876319759, "grad_norm": 2.120173215866089, "learning_rate": 9.716743284941423e-05, "loss": 1.6605, "step": 803 }, { "epoch": 0.12126696832579185, "grad_norm": 1.9663909673690796, "learning_rate": 9.715944584245669e-05, "loss": 1.5929, "step": 804 }, { "epoch": 0.12141779788838612, "grad_norm": 2.0677783489227295, "learning_rate": 9.715144792003852e-05, "loss": 1.6462, "step": 805 }, { "epoch": 0.12156862745098039, "grad_norm": 2.1090519428253174, "learning_rate": 9.714343908401089e-05, "loss": 1.5886, "step": 806 }, { "epoch": 0.12171945701357466, "grad_norm": 1.653702735900879, "learning_rate": 9.713541933622754e-05, "loss": 1.108, "step": 807 }, { "epoch": 0.12187028657616893, "grad_norm": 2.051887273788452, "learning_rate": 9.71273886785447e-05, "loss": 1.6686, "step": 808 }, { "epoch": 0.1220211161387632, "grad_norm": 2.3014259338378906, "learning_rate": 9.711934711282114e-05, "loss": 2.0333, "step": 809 }, { "epoch": 0.12217194570135746, "grad_norm": 1.672851324081421, "learning_rate": 9.711129464091815e-05, "loss": 1.1034, "step": 810 }, { "epoch": 0.12232277526395173, "grad_norm": 1.7933355569839478, "learning_rate": 9.710323126469954e-05, "loss": 1.2269, "step": 811 }, { "epoch": 0.122473604826546, "grad_norm": 1.7691394090652466, "learning_rate": 9.709515698603163e-05, "loss": 1.1924, "step": 812 }, { "epoch": 0.12262443438914027, "grad_norm": 1.8649042844772339, "learning_rate": 9.708707180678331e-05, "loss": 1.3984, "step": 813 }, { "epoch": 0.12277526395173453, "grad_norm": 2.0087668895721436, "learning_rate": 9.707897572882595e-05, "loss": 1.6077, "step": 814 }, { "epoch": 0.1229260935143288, "grad_norm": 1.7827634811401367, "learning_rate": 9.707086875403346e-05, "loss": 1.2146, "step": 815 }, { "epoch": 0.12307692307692308, "grad_norm": 1.8732385635375977, "learning_rate": 9.706275088428227e-05, "loss": 1.4204, "step": 816 }, { "epoch": 0.12322775263951735, "grad_norm": 1.8897415399551392, "learning_rate": 9.705462212145133e-05, "loss": 1.257, "step": 817 }, { "epoch": 0.12337858220211162, "grad_norm": 2.623896837234497, "learning_rate": 9.704648246742211e-05, "loss": 1.4505, "step": 818 }, { "epoch": 0.12352941176470589, "grad_norm": 1.843873381614685, "learning_rate": 9.703833192407862e-05, "loss": 1.1238, "step": 819 }, { "epoch": 0.12368024132730016, "grad_norm": 2.2622385025024414, "learning_rate": 9.703017049330734e-05, "loss": 1.6068, "step": 820 }, { "epoch": 0.12383107088989442, "grad_norm": 2.3389852046966553, "learning_rate": 9.702199817699733e-05, "loss": 1.7755, "step": 821 }, { "epoch": 0.12398190045248869, "grad_norm": 2.185539484024048, "learning_rate": 9.701381497704015e-05, "loss": 1.6309, "step": 822 }, { "epoch": 0.12413273001508296, "grad_norm": 2.436089038848877, "learning_rate": 9.700562089532984e-05, "loss": 1.5012, "step": 823 }, { "epoch": 0.12428355957767723, "grad_norm": 2.0534682273864746, "learning_rate": 9.699741593376301e-05, "loss": 1.2885, "step": 824 }, { "epoch": 0.1244343891402715, "grad_norm": 1.9727692604064941, "learning_rate": 9.698920009423877e-05, "loss": 1.2596, "step": 825 }, { "epoch": 0.12458521870286576, "grad_norm": 1.7550346851348877, "learning_rate": 9.698097337865876e-05, "loss": 1.1829, "step": 826 }, { "epoch": 0.12473604826546003, "grad_norm": 1.996570348739624, "learning_rate": 9.69727357889271e-05, "loss": 1.4516, "step": 827 }, { "epoch": 0.1248868778280543, "grad_norm": 1.9991282224655151, "learning_rate": 9.696448732695044e-05, "loss": 1.3295, "step": 828 }, { "epoch": 0.12503770739064857, "grad_norm": 2.096342086791992, "learning_rate": 9.695622799463802e-05, "loss": 1.5133, "step": 829 }, { "epoch": 0.12518853695324283, "grad_norm": 2.096299648284912, "learning_rate": 9.694795779390146e-05, "loss": 1.4642, "step": 830 }, { "epoch": 0.1253393665158371, "grad_norm": 1.8609261512756348, "learning_rate": 9.693967672665502e-05, "loss": 1.3241, "step": 831 }, { "epoch": 0.12549019607843137, "grad_norm": 1.962562918663025, "learning_rate": 9.69313847948154e-05, "loss": 1.4986, "step": 832 }, { "epoch": 0.12564102564102564, "grad_norm": 1.786353588104248, "learning_rate": 9.692308200030183e-05, "loss": 1.2244, "step": 833 }, { "epoch": 0.1257918552036199, "grad_norm": 2.0545425415039062, "learning_rate": 9.69147683450361e-05, "loss": 1.286, "step": 834 }, { "epoch": 0.12594268476621417, "grad_norm": 2.1928775310516357, "learning_rate": 9.690644383094245e-05, "loss": 1.5679, "step": 835 }, { "epoch": 0.12609351432880844, "grad_norm": 2.379899024963379, "learning_rate": 9.689810845994767e-05, "loss": 1.8559, "step": 836 }, { "epoch": 0.1262443438914027, "grad_norm": 2.191215753555298, "learning_rate": 9.688976223398103e-05, "loss": 1.4595, "step": 837 }, { "epoch": 0.12639517345399698, "grad_norm": 1.8007265329360962, "learning_rate": 9.688140515497437e-05, "loss": 1.158, "step": 838 }, { "epoch": 0.12654600301659125, "grad_norm": 1.9509371519088745, "learning_rate": 9.6873037224862e-05, "loss": 1.2436, "step": 839 }, { "epoch": 0.12669683257918551, "grad_norm": 1.8939921855926514, "learning_rate": 9.686465844558073e-05, "loss": 1.0824, "step": 840 }, { "epoch": 0.12684766214177978, "grad_norm": 2.311962604522705, "learning_rate": 9.685626881906993e-05, "loss": 1.4909, "step": 841 }, { "epoch": 0.12699849170437405, "grad_norm": 2.1536026000976562, "learning_rate": 9.684786834727142e-05, "loss": 1.5314, "step": 842 }, { "epoch": 0.12714932126696832, "grad_norm": 2.279118061065674, "learning_rate": 9.68394570321296e-05, "loss": 1.3425, "step": 843 }, { "epoch": 0.12730015082956259, "grad_norm": 2.274172067642212, "learning_rate": 9.68310348755913e-05, "loss": 1.3067, "step": 844 }, { "epoch": 0.12745098039215685, "grad_norm": 2.4245054721832275, "learning_rate": 9.682260187960592e-05, "loss": 1.1618, "step": 845 }, { "epoch": 0.12760180995475112, "grad_norm": 2.6458516120910645, "learning_rate": 9.681415804612535e-05, "loss": 1.4042, "step": 846 }, { "epoch": 0.1277526395173454, "grad_norm": 1.8311444520950317, "learning_rate": 9.680570337710399e-05, "loss": 0.977, "step": 847 }, { "epoch": 0.12790346907993966, "grad_norm": 2.366166591644287, "learning_rate": 9.679723787449875e-05, "loss": 1.6101, "step": 848 }, { "epoch": 0.12805429864253393, "grad_norm": 2.501387357711792, "learning_rate": 9.678876154026903e-05, "loss": 1.3189, "step": 849 }, { "epoch": 0.1282051282051282, "grad_norm": 1.6906558275222778, "learning_rate": 9.678027437637677e-05, "loss": 0.8767, "step": 850 }, { "epoch": 0.12835595776772246, "grad_norm": 2.6260392665863037, "learning_rate": 9.677177638478639e-05, "loss": 1.5554, "step": 851 }, { "epoch": 0.12850678733031673, "grad_norm": 2.0962626934051514, "learning_rate": 9.676326756746479e-05, "loss": 1.6209, "step": 852 }, { "epoch": 0.128657616892911, "grad_norm": 1.9503521919250488, "learning_rate": 9.675474792638146e-05, "loss": 1.4088, "step": 853 }, { "epoch": 0.12880844645550527, "grad_norm": 1.8349584341049194, "learning_rate": 9.674621746350831e-05, "loss": 1.4212, "step": 854 }, { "epoch": 0.12895927601809956, "grad_norm": 1.8227194547653198, "learning_rate": 9.673767618081981e-05, "loss": 1.1775, "step": 855 }, { "epoch": 0.12911010558069383, "grad_norm": 1.9799035787582397, "learning_rate": 9.67291240802929e-05, "loss": 1.5978, "step": 856 }, { "epoch": 0.1292609351432881, "grad_norm": 2.1331517696380615, "learning_rate": 9.672056116390705e-05, "loss": 1.63, "step": 857 }, { "epoch": 0.12941176470588237, "grad_norm": 2.045865535736084, "learning_rate": 9.67119874336442e-05, "loss": 1.3391, "step": 858 }, { "epoch": 0.12956259426847663, "grad_norm": 1.8868646621704102, "learning_rate": 9.670340289148883e-05, "loss": 1.3354, "step": 859 }, { "epoch": 0.1297134238310709, "grad_norm": 1.9739452600479126, "learning_rate": 9.669480753942793e-05, "loss": 1.2448, "step": 860 }, { "epoch": 0.12986425339366517, "grad_norm": 1.948276162147522, "learning_rate": 9.668620137945092e-05, "loss": 1.35, "step": 861 }, { "epoch": 0.13001508295625944, "grad_norm": 1.7897614240646362, "learning_rate": 9.667758441354979e-05, "loss": 1.2707, "step": 862 }, { "epoch": 0.1301659125188537, "grad_norm": 1.8041011095046997, "learning_rate": 9.666895664371901e-05, "loss": 1.2232, "step": 863 }, { "epoch": 0.13031674208144797, "grad_norm": 1.7004255056381226, "learning_rate": 9.666031807195557e-05, "loss": 1.0755, "step": 864 }, { "epoch": 0.13046757164404224, "grad_norm": 1.9903745651245117, "learning_rate": 9.665166870025891e-05, "loss": 1.5736, "step": 865 }, { "epoch": 0.1306184012066365, "grad_norm": 1.8403515815734863, "learning_rate": 9.664300853063104e-05, "loss": 1.5551, "step": 866 }, { "epoch": 0.13076923076923078, "grad_norm": 1.8968864679336548, "learning_rate": 9.663433756507638e-05, "loss": 1.2374, "step": 867 }, { "epoch": 0.13092006033182504, "grad_norm": 2.045746088027954, "learning_rate": 9.662565580560195e-05, "loss": 1.3823, "step": 868 }, { "epoch": 0.1310708898944193, "grad_norm": 2.1413216590881348, "learning_rate": 9.66169632542172e-05, "loss": 1.3365, "step": 869 }, { "epoch": 0.13122171945701358, "grad_norm": 2.0690109729766846, "learning_rate": 9.660825991293409e-05, "loss": 1.4161, "step": 870 }, { "epoch": 0.13137254901960785, "grad_norm": 2.458144187927246, "learning_rate": 9.659954578376709e-05, "loss": 1.7404, "step": 871 }, { "epoch": 0.13152337858220212, "grad_norm": 1.8325227499008179, "learning_rate": 9.659082086873314e-05, "loss": 1.1229, "step": 872 }, { "epoch": 0.13167420814479638, "grad_norm": 2.011420249938965, "learning_rate": 9.658208516985174e-05, "loss": 1.3386, "step": 873 }, { "epoch": 0.13182503770739065, "grad_norm": 2.2637939453125, "learning_rate": 9.65733386891448e-05, "loss": 1.7395, "step": 874 }, { "epoch": 0.13197586726998492, "grad_norm": 1.9083267450332642, "learning_rate": 9.65645814286368e-05, "loss": 1.2961, "step": 875 }, { "epoch": 0.1321266968325792, "grad_norm": 1.960263967514038, "learning_rate": 9.655581339035465e-05, "loss": 1.3906, "step": 876 }, { "epoch": 0.13227752639517346, "grad_norm": 1.801590085029602, "learning_rate": 9.654703457632781e-05, "loss": 1.3079, "step": 877 }, { "epoch": 0.13242835595776772, "grad_norm": 2.0689918994903564, "learning_rate": 9.653824498858822e-05, "loss": 1.3171, "step": 878 }, { "epoch": 0.132579185520362, "grad_norm": 1.9107067584991455, "learning_rate": 9.65294446291703e-05, "loss": 1.4119, "step": 879 }, { "epoch": 0.13273001508295626, "grad_norm": 2.520681381225586, "learning_rate": 9.652063350011095e-05, "loss": 1.5881, "step": 880 }, { "epoch": 0.13288084464555053, "grad_norm": 1.8461648225784302, "learning_rate": 9.651181160344959e-05, "loss": 1.0317, "step": 881 }, { "epoch": 0.1330316742081448, "grad_norm": 2.005880117416382, "learning_rate": 9.650297894122815e-05, "loss": 1.3781, "step": 882 }, { "epoch": 0.13318250377073906, "grad_norm": 1.7483874559402466, "learning_rate": 9.6494135515491e-05, "loss": 1.1113, "step": 883 }, { "epoch": 0.13333333333333333, "grad_norm": 2.0637574195861816, "learning_rate": 9.648528132828503e-05, "loss": 1.3866, "step": 884 }, { "epoch": 0.1334841628959276, "grad_norm": 2.174586772918701, "learning_rate": 9.647641638165962e-05, "loss": 1.4664, "step": 885 }, { "epoch": 0.13363499245852187, "grad_norm": 2.211858034133911, "learning_rate": 9.646754067766666e-05, "loss": 1.6162, "step": 886 }, { "epoch": 0.13378582202111614, "grad_norm": 2.389209747314453, "learning_rate": 9.645865421836047e-05, "loss": 1.686, "step": 887 }, { "epoch": 0.1339366515837104, "grad_norm": 2.2435269355773926, "learning_rate": 9.644975700579792e-05, "loss": 1.568, "step": 888 }, { "epoch": 0.13408748114630467, "grad_norm": 2.338548421859741, "learning_rate": 9.644084904203834e-05, "loss": 1.3728, "step": 889 }, { "epoch": 0.13423831070889894, "grad_norm": 2.1208319664001465, "learning_rate": 9.643193032914356e-05, "loss": 1.2786, "step": 890 }, { "epoch": 0.1343891402714932, "grad_norm": 2.530845880508423, "learning_rate": 9.642300086917788e-05, "loss": 1.8626, "step": 891 }, { "epoch": 0.13453996983408748, "grad_norm": 2.378835916519165, "learning_rate": 9.641406066420812e-05, "loss": 1.3599, "step": 892 }, { "epoch": 0.13469079939668174, "grad_norm": 2.080965995788574, "learning_rate": 9.640510971630356e-05, "loss": 1.2137, "step": 893 }, { "epoch": 0.134841628959276, "grad_norm": 2.3406405448913574, "learning_rate": 9.639614802753597e-05, "loss": 1.193, "step": 894 }, { "epoch": 0.13499245852187028, "grad_norm": 2.2735369205474854, "learning_rate": 9.638717559997961e-05, "loss": 1.369, "step": 895 }, { "epoch": 0.13514328808446455, "grad_norm": 2.238830089569092, "learning_rate": 9.637819243571124e-05, "loss": 1.1937, "step": 896 }, { "epoch": 0.13529411764705881, "grad_norm": 2.6795542240142822, "learning_rate": 9.636919853681005e-05, "loss": 1.3221, "step": 897 }, { "epoch": 0.13544494720965308, "grad_norm": 2.523437261581421, "learning_rate": 9.636019390535783e-05, "loss": 1.1234, "step": 898 }, { "epoch": 0.13559577677224735, "grad_norm": 2.0610969066619873, "learning_rate": 9.63511785434387e-05, "loss": 1.2978, "step": 899 }, { "epoch": 0.13574660633484162, "grad_norm": 2.2167792320251465, "learning_rate": 9.63421524531394e-05, "loss": 1.1833, "step": 900 }, { "epoch": 0.1358974358974359, "grad_norm": 2.6505234241485596, "learning_rate": 9.633311563654906e-05, "loss": 1.6609, "step": 901 }, { "epoch": 0.13604826546003015, "grad_norm": 2.5852766036987305, "learning_rate": 9.632406809575936e-05, "loss": 1.697, "step": 902 }, { "epoch": 0.13619909502262442, "grad_norm": 2.1287894248962402, "learning_rate": 9.631500983286438e-05, "loss": 1.326, "step": 903 }, { "epoch": 0.1363499245852187, "grad_norm": 1.8902140855789185, "learning_rate": 9.630594084996081e-05, "loss": 1.2603, "step": 904 }, { "epoch": 0.13650075414781296, "grad_norm": 2.30253005027771, "learning_rate": 9.629686114914768e-05, "loss": 1.5484, "step": 905 }, { "epoch": 0.13665158371040723, "grad_norm": 1.9453856945037842, "learning_rate": 9.62877707325266e-05, "loss": 1.3424, "step": 906 }, { "epoch": 0.13680241327300152, "grad_norm": 2.0721945762634277, "learning_rate": 9.62786696022016e-05, "loss": 1.3928, "step": 907 }, { "epoch": 0.1369532428355958, "grad_norm": 2.32631254196167, "learning_rate": 9.626955776027924e-05, "loss": 1.941, "step": 908 }, { "epoch": 0.13710407239819006, "grad_norm": 2.179380178451538, "learning_rate": 9.62604352088685e-05, "loss": 1.7047, "step": 909 }, { "epoch": 0.13725490196078433, "grad_norm": 2.176330804824829, "learning_rate": 9.625130195008092e-05, "loss": 1.4519, "step": 910 }, { "epoch": 0.1374057315233786, "grad_norm": 1.9162336587905884, "learning_rate": 9.624215798603044e-05, "loss": 1.6705, "step": 911 }, { "epoch": 0.13755656108597286, "grad_norm": 1.9776102304458618, "learning_rate": 9.623300331883351e-05, "loss": 1.5363, "step": 912 }, { "epoch": 0.13770739064856713, "grad_norm": 1.9269723892211914, "learning_rate": 9.622383795060908e-05, "loss": 1.4055, "step": 913 }, { "epoch": 0.1378582202111614, "grad_norm": 1.8170199394226074, "learning_rate": 9.621466188347852e-05, "loss": 1.4603, "step": 914 }, { "epoch": 0.13800904977375567, "grad_norm": 1.824129343032837, "learning_rate": 9.620547511956574e-05, "loss": 1.4671, "step": 915 }, { "epoch": 0.13815987933634993, "grad_norm": 1.8963289260864258, "learning_rate": 9.619627766099707e-05, "loss": 1.4502, "step": 916 }, { "epoch": 0.1383107088989442, "grad_norm": 1.9497238397598267, "learning_rate": 9.618706950990137e-05, "loss": 1.2624, "step": 917 }, { "epoch": 0.13846153846153847, "grad_norm": 1.7677209377288818, "learning_rate": 9.617785066840992e-05, "loss": 1.3596, "step": 918 }, { "epoch": 0.13861236802413274, "grad_norm": 1.8210183382034302, "learning_rate": 9.61686211386565e-05, "loss": 1.4042, "step": 919 }, { "epoch": 0.138763197586727, "grad_norm": 1.7157173156738281, "learning_rate": 9.615938092277738e-05, "loss": 0.9901, "step": 920 }, { "epoch": 0.13891402714932127, "grad_norm": 2.0528416633605957, "learning_rate": 9.61501300229113e-05, "loss": 1.4345, "step": 921 }, { "epoch": 0.13906485671191554, "grad_norm": 1.9616551399230957, "learning_rate": 9.614086844119943e-05, "loss": 1.2643, "step": 922 }, { "epoch": 0.1392156862745098, "grad_norm": 2.0131654739379883, "learning_rate": 9.613159617978545e-05, "loss": 1.3041, "step": 923 }, { "epoch": 0.13936651583710408, "grad_norm": 1.9867621660232544, "learning_rate": 9.612231324081551e-05, "loss": 1.3783, "step": 924 }, { "epoch": 0.13951734539969834, "grad_norm": 2.137282609939575, "learning_rate": 9.611301962643824e-05, "loss": 1.4895, "step": 925 }, { "epoch": 0.1396681749622926, "grad_norm": 2.1574854850769043, "learning_rate": 9.610371533880471e-05, "loss": 1.4799, "step": 926 }, { "epoch": 0.13981900452488688, "grad_norm": 1.887077808380127, "learning_rate": 9.609440038006847e-05, "loss": 1.3353, "step": 927 }, { "epoch": 0.13996983408748115, "grad_norm": 2.2352898120880127, "learning_rate": 9.608507475238557e-05, "loss": 1.0618, "step": 928 }, { "epoch": 0.14012066365007542, "grad_norm": 1.911362886428833, "learning_rate": 9.60757384579145e-05, "loss": 1.1964, "step": 929 }, { "epoch": 0.14027149321266968, "grad_norm": 1.9468247890472412, "learning_rate": 9.606639149881622e-05, "loss": 1.2023, "step": 930 }, { "epoch": 0.14042232277526395, "grad_norm": 1.8941102027893066, "learning_rate": 9.605703387725417e-05, "loss": 1.0161, "step": 931 }, { "epoch": 0.14057315233785822, "grad_norm": 2.228132724761963, "learning_rate": 9.604766559539424e-05, "loss": 1.3803, "step": 932 }, { "epoch": 0.1407239819004525, "grad_norm": 2.0321106910705566, "learning_rate": 9.603828665540482e-05, "loss": 1.2208, "step": 933 }, { "epoch": 0.14087481146304676, "grad_norm": 2.0694713592529297, "learning_rate": 9.602889705945673e-05, "loss": 1.2739, "step": 934 }, { "epoch": 0.14102564102564102, "grad_norm": 2.0509352684020996, "learning_rate": 9.601949680972326e-05, "loss": 1.1794, "step": 935 }, { "epoch": 0.1411764705882353, "grad_norm": 2.191495418548584, "learning_rate": 9.60100859083802e-05, "loss": 1.4483, "step": 936 }, { "epoch": 0.14132730015082956, "grad_norm": 1.996182918548584, "learning_rate": 9.600066435760582e-05, "loss": 1.3294, "step": 937 }, { "epoch": 0.14147812971342383, "grad_norm": 2.173114776611328, "learning_rate": 9.599123215958074e-05, "loss": 1.2773, "step": 938 }, { "epoch": 0.1416289592760181, "grad_norm": 2.340319871902466, "learning_rate": 9.598178931648818e-05, "loss": 1.3075, "step": 939 }, { "epoch": 0.14177978883861236, "grad_norm": 1.8796918392181396, "learning_rate": 9.597233583051376e-05, "loss": 1.2595, "step": 940 }, { "epoch": 0.14193061840120663, "grad_norm": 1.885307788848877, "learning_rate": 9.596287170384554e-05, "loss": 1.1711, "step": 941 }, { "epoch": 0.1420814479638009, "grad_norm": 2.2280311584472656, "learning_rate": 9.595339693867412e-05, "loss": 1.4487, "step": 942 }, { "epoch": 0.14223227752639517, "grad_norm": 2.1306185722351074, "learning_rate": 9.594391153719249e-05, "loss": 1.2924, "step": 943 }, { "epoch": 0.14238310708898944, "grad_norm": 2.3707525730133057, "learning_rate": 9.593441550159612e-05, "loss": 1.5612, "step": 944 }, { "epoch": 0.1425339366515837, "grad_norm": 2.043574810028076, "learning_rate": 9.592490883408295e-05, "loss": 1.1069, "step": 945 }, { "epoch": 0.14268476621417797, "grad_norm": 2.3667831420898438, "learning_rate": 9.591539153685341e-05, "loss": 1.324, "step": 946 }, { "epoch": 0.14283559577677224, "grad_norm": 2.2436506748199463, "learning_rate": 9.590586361211033e-05, "loss": 1.0822, "step": 947 }, { "epoch": 0.1429864253393665, "grad_norm": 2.065676689147949, "learning_rate": 9.589632506205906e-05, "loss": 1.0652, "step": 948 }, { "epoch": 0.14313725490196078, "grad_norm": 1.9564242362976074, "learning_rate": 9.588677588890735e-05, "loss": 1.0055, "step": 949 }, { "epoch": 0.14328808446455504, "grad_norm": 2.306912899017334, "learning_rate": 9.587721609486543e-05, "loss": 1.4067, "step": 950 }, { "epoch": 0.1434389140271493, "grad_norm": 2.4047138690948486, "learning_rate": 9.586764568214604e-05, "loss": 1.4046, "step": 951 }, { "epoch": 0.14358974358974358, "grad_norm": 2.2606968879699707, "learning_rate": 9.58580646529643e-05, "loss": 1.5082, "step": 952 }, { "epoch": 0.14374057315233785, "grad_norm": 1.70610511302948, "learning_rate": 9.584847300953784e-05, "loss": 0.9767, "step": 953 }, { "epoch": 0.14389140271493212, "grad_norm": 2.3571531772613525, "learning_rate": 9.583887075408671e-05, "loss": 1.8926, "step": 954 }, { "epoch": 0.14404223227752638, "grad_norm": 1.7737387418746948, "learning_rate": 9.582925788883345e-05, "loss": 1.1489, "step": 955 }, { "epoch": 0.14419306184012065, "grad_norm": 2.2213399410247803, "learning_rate": 9.581963441600306e-05, "loss": 1.7282, "step": 956 }, { "epoch": 0.14434389140271492, "grad_norm": 2.216583013534546, "learning_rate": 9.581000033782293e-05, "loss": 1.6621, "step": 957 }, { "epoch": 0.1444947209653092, "grad_norm": 2.403583288192749, "learning_rate": 9.580035565652296e-05, "loss": 1.9226, "step": 958 }, { "epoch": 0.14464555052790348, "grad_norm": 1.7124239206314087, "learning_rate": 9.579070037433553e-05, "loss": 1.1444, "step": 959 }, { "epoch": 0.14479638009049775, "grad_norm": 2.086493968963623, "learning_rate": 9.57810344934954e-05, "loss": 1.5514, "step": 960 }, { "epoch": 0.14494720965309202, "grad_norm": 1.8340226411819458, "learning_rate": 9.577135801623985e-05, "loss": 1.3562, "step": 961 }, { "epoch": 0.1450980392156863, "grad_norm": 1.8091669082641602, "learning_rate": 9.576167094480856e-05, "loss": 1.3694, "step": 962 }, { "epoch": 0.14524886877828055, "grad_norm": 2.0270955562591553, "learning_rate": 9.575197328144372e-05, "loss": 1.2763, "step": 963 }, { "epoch": 0.14539969834087482, "grad_norm": 1.851887822151184, "learning_rate": 9.57422650283899e-05, "loss": 1.3215, "step": 964 }, { "epoch": 0.1455505279034691, "grad_norm": 1.6895865201950073, "learning_rate": 9.573254618789417e-05, "loss": 1.2341, "step": 965 }, { "epoch": 0.14570135746606336, "grad_norm": 1.6245213747024536, "learning_rate": 9.572281676220608e-05, "loss": 1.1628, "step": 966 }, { "epoch": 0.14585218702865763, "grad_norm": 2.0248637199401855, "learning_rate": 9.571307675357752e-05, "loss": 1.5016, "step": 967 }, { "epoch": 0.1460030165912519, "grad_norm": 2.0918617248535156, "learning_rate": 9.570332616426293e-05, "loss": 1.3727, "step": 968 }, { "epoch": 0.14615384615384616, "grad_norm": 1.973069667816162, "learning_rate": 9.569356499651918e-05, "loss": 1.1689, "step": 969 }, { "epoch": 0.14630467571644043, "grad_norm": 2.0802700519561768, "learning_rate": 9.568379325260557e-05, "loss": 1.6109, "step": 970 }, { "epoch": 0.1464555052790347, "grad_norm": 2.225330114364624, "learning_rate": 9.567401093478386e-05, "loss": 1.501, "step": 971 }, { "epoch": 0.14660633484162897, "grad_norm": 1.9735524654388428, "learning_rate": 9.566421804531822e-05, "loss": 1.3759, "step": 972 }, { "epoch": 0.14675716440422323, "grad_norm": 1.9069606065750122, "learning_rate": 9.565441458647533e-05, "loss": 1.1062, "step": 973 }, { "epoch": 0.1469079939668175, "grad_norm": 1.801305890083313, "learning_rate": 9.564460056052429e-05, "loss": 1.1415, "step": 974 }, { "epoch": 0.14705882352941177, "grad_norm": 2.3485240936279297, "learning_rate": 9.563477596973663e-05, "loss": 1.3556, "step": 975 }, { "epoch": 0.14720965309200604, "grad_norm": 1.9664510488510132, "learning_rate": 9.562494081638631e-05, "loss": 1.1269, "step": 976 }, { "epoch": 0.1473604826546003, "grad_norm": 2.0578181743621826, "learning_rate": 9.561509510274981e-05, "loss": 1.1862, "step": 977 }, { "epoch": 0.14751131221719457, "grad_norm": 2.2146785259246826, "learning_rate": 9.560523883110596e-05, "loss": 1.6101, "step": 978 }, { "epoch": 0.14766214177978884, "grad_norm": 2.2980546951293945, "learning_rate": 9.559537200373613e-05, "loss": 1.5983, "step": 979 }, { "epoch": 0.1478129713423831, "grad_norm": 1.9175201654434204, "learning_rate": 9.558549462292403e-05, "loss": 1.1039, "step": 980 }, { "epoch": 0.14796380090497738, "grad_norm": 2.1425094604492188, "learning_rate": 9.55756066909559e-05, "loss": 1.3894, "step": 981 }, { "epoch": 0.14811463046757165, "grad_norm": 1.8712708950042725, "learning_rate": 9.556570821012036e-05, "loss": 1.0027, "step": 982 }, { "epoch": 0.1482654600301659, "grad_norm": 2.9026615619659424, "learning_rate": 9.555579918270854e-05, "loss": 1.6844, "step": 983 }, { "epoch": 0.14841628959276018, "grad_norm": 2.086268901824951, "learning_rate": 9.554587961101392e-05, "loss": 1.3568, "step": 984 }, { "epoch": 0.14856711915535445, "grad_norm": 2.1442112922668457, "learning_rate": 9.55359494973325e-05, "loss": 1.374, "step": 985 }, { "epoch": 0.14871794871794872, "grad_norm": 2.389625310897827, "learning_rate": 9.552600884396269e-05, "loss": 1.3383, "step": 986 }, { "epoch": 0.14886877828054298, "grad_norm": 2.1203243732452393, "learning_rate": 9.551605765320533e-05, "loss": 1.3944, "step": 987 }, { "epoch": 0.14901960784313725, "grad_norm": 2.025132417678833, "learning_rate": 9.550609592736371e-05, "loss": 1.3428, "step": 988 }, { "epoch": 0.14917043740573152, "grad_norm": 2.1210391521453857, "learning_rate": 9.549612366874356e-05, "loss": 1.2705, "step": 989 }, { "epoch": 0.1493212669683258, "grad_norm": 1.8094677925109863, "learning_rate": 9.548614087965306e-05, "loss": 0.965, "step": 990 }, { "epoch": 0.14947209653092006, "grad_norm": 2.285565137863159, "learning_rate": 9.54761475624028e-05, "loss": 1.3889, "step": 991 }, { "epoch": 0.14962292609351432, "grad_norm": 2.2486350536346436, "learning_rate": 9.54661437193058e-05, "loss": 1.2073, "step": 992 }, { "epoch": 0.1497737556561086, "grad_norm": 2.6402995586395264, "learning_rate": 9.545612935267757e-05, "loss": 1.4972, "step": 993 }, { "epoch": 0.14992458521870286, "grad_norm": 2.2208542823791504, "learning_rate": 9.5446104464836e-05, "loss": 1.1736, "step": 994 }, { "epoch": 0.15007541478129713, "grad_norm": 2.3594796657562256, "learning_rate": 9.543606905810143e-05, "loss": 1.3771, "step": 995 }, { "epoch": 0.1502262443438914, "grad_norm": 2.616582155227661, "learning_rate": 9.542602313479667e-05, "loss": 1.3632, "step": 996 }, { "epoch": 0.15037707390648566, "grad_norm": 2.917656660079956, "learning_rate": 9.541596669724692e-05, "loss": 1.1047, "step": 997 }, { "epoch": 0.15052790346907993, "grad_norm": 2.136172294616699, "learning_rate": 9.540589974777982e-05, "loss": 1.0542, "step": 998 }, { "epoch": 0.1506787330316742, "grad_norm": 2.057155132293701, "learning_rate": 9.539582228872546e-05, "loss": 1.1051, "step": 999 }, { "epoch": 0.15082956259426847, "grad_norm": 1.9447458982467651, "learning_rate": 9.538573432241637e-05, "loss": 0.8995, "step": 1000 }, { "epoch": 0.15098039215686274, "grad_norm": 3.3232572078704834, "learning_rate": 9.537563585118747e-05, "loss": 2.1547, "step": 1001 }, { "epoch": 0.151131221719457, "grad_norm": 2.3210809230804443, "learning_rate": 9.536552687737617e-05, "loss": 1.4586, "step": 1002 }, { "epoch": 0.15128205128205127, "grad_norm": 2.194894790649414, "learning_rate": 9.535540740332223e-05, "loss": 1.6184, "step": 1003 }, { "epoch": 0.15143288084464554, "grad_norm": 2.3214166164398193, "learning_rate": 9.534527743136793e-05, "loss": 1.755, "step": 1004 }, { "epoch": 0.1515837104072398, "grad_norm": 2.00956654548645, "learning_rate": 9.533513696385795e-05, "loss": 1.3514, "step": 1005 }, { "epoch": 0.15173453996983408, "grad_norm": 2.159562587738037, "learning_rate": 9.532498600313935e-05, "loss": 1.6914, "step": 1006 }, { "epoch": 0.15188536953242834, "grad_norm": 2.012486219406128, "learning_rate": 9.531482455156169e-05, "loss": 1.291, "step": 1007 }, { "epoch": 0.1520361990950226, "grad_norm": 2.649683952331543, "learning_rate": 9.53046526114769e-05, "loss": 2.0392, "step": 1008 }, { "epoch": 0.15218702865761688, "grad_norm": 2.397692918777466, "learning_rate": 9.529447018523936e-05, "loss": 1.9161, "step": 1009 }, { "epoch": 0.15233785822021115, "grad_norm": 2.576223373413086, "learning_rate": 9.528427727520592e-05, "loss": 1.7361, "step": 1010 }, { "epoch": 0.15248868778280544, "grad_norm": 2.285437822341919, "learning_rate": 9.527407388373578e-05, "loss": 1.6762, "step": 1011 }, { "epoch": 0.1526395173453997, "grad_norm": 2.2392020225524902, "learning_rate": 9.526386001319061e-05, "loss": 1.7672, "step": 1012 }, { "epoch": 0.15279034690799398, "grad_norm": 1.9882025718688965, "learning_rate": 9.525363566593451e-05, "loss": 1.4397, "step": 1013 }, { "epoch": 0.15294117647058825, "grad_norm": 1.699373722076416, "learning_rate": 9.524340084433399e-05, "loss": 1.126, "step": 1014 }, { "epoch": 0.15309200603318251, "grad_norm": 1.7424490451812744, "learning_rate": 9.523315555075799e-05, "loss": 1.3879, "step": 1015 }, { "epoch": 0.15324283559577678, "grad_norm": 1.7331626415252686, "learning_rate": 9.522289978757785e-05, "loss": 1.4594, "step": 1016 }, { "epoch": 0.15339366515837105, "grad_norm": 1.923264741897583, "learning_rate": 9.521263355716739e-05, "loss": 1.3513, "step": 1017 }, { "epoch": 0.15354449472096532, "grad_norm": 1.6555724143981934, "learning_rate": 9.520235686190278e-05, "loss": 1.2099, "step": 1018 }, { "epoch": 0.1536953242835596, "grad_norm": 1.815916657447815, "learning_rate": 9.519206970416268e-05, "loss": 1.3513, "step": 1019 }, { "epoch": 0.15384615384615385, "grad_norm": 1.8921176195144653, "learning_rate": 9.518177208632812e-05, "loss": 1.4915, "step": 1020 }, { "epoch": 0.15399698340874812, "grad_norm": 1.7158149480819702, "learning_rate": 9.517146401078258e-05, "loss": 1.2106, "step": 1021 }, { "epoch": 0.1541478129713424, "grad_norm": 2.107874631881714, "learning_rate": 9.516114547991198e-05, "loss": 1.4158, "step": 1022 }, { "epoch": 0.15429864253393666, "grad_norm": 1.60274076461792, "learning_rate": 9.515081649610458e-05, "loss": 0.9541, "step": 1023 }, { "epoch": 0.15444947209653093, "grad_norm": 1.9151545763015747, "learning_rate": 9.514047706175116e-05, "loss": 1.4183, "step": 1024 }, { "epoch": 0.1546003016591252, "grad_norm": 2.0128321647644043, "learning_rate": 9.513012717924483e-05, "loss": 1.2574, "step": 1025 }, { "epoch": 0.15475113122171946, "grad_norm": 2.3244400024414062, "learning_rate": 9.51197668509812e-05, "loss": 1.6248, "step": 1026 }, { "epoch": 0.15490196078431373, "grad_norm": 2.1567137241363525, "learning_rate": 9.510939607935822e-05, "loss": 1.3263, "step": 1027 }, { "epoch": 0.155052790346908, "grad_norm": 1.9266432523727417, "learning_rate": 9.509901486677634e-05, "loss": 1.2895, "step": 1028 }, { "epoch": 0.15520361990950227, "grad_norm": 2.3747339248657227, "learning_rate": 9.508862321563834e-05, "loss": 1.3976, "step": 1029 }, { "epoch": 0.15535444947209653, "grad_norm": 1.8803409337997437, "learning_rate": 9.507822112834948e-05, "loss": 0.9132, "step": 1030 }, { "epoch": 0.1555052790346908, "grad_norm": 2.1156516075134277, "learning_rate": 9.506780860731739e-05, "loss": 1.6226, "step": 1031 }, { "epoch": 0.15565610859728507, "grad_norm": 1.6440308094024658, "learning_rate": 9.505738565495217e-05, "loss": 0.8654, "step": 1032 }, { "epoch": 0.15580693815987934, "grad_norm": 2.470644474029541, "learning_rate": 9.504695227366626e-05, "loss": 1.7949, "step": 1033 }, { "epoch": 0.1559577677224736, "grad_norm": 2.200721263885498, "learning_rate": 9.50365084658746e-05, "loss": 1.5654, "step": 1034 }, { "epoch": 0.15610859728506787, "grad_norm": 2.0585007667541504, "learning_rate": 9.502605423399449e-05, "loss": 1.2962, "step": 1035 }, { "epoch": 0.15625942684766214, "grad_norm": 2.00528621673584, "learning_rate": 9.501558958044563e-05, "loss": 1.2753, "step": 1036 }, { "epoch": 0.1564102564102564, "grad_norm": 2.3784217834472656, "learning_rate": 9.500511450765018e-05, "loss": 1.4538, "step": 1037 }, { "epoch": 0.15656108597285068, "grad_norm": 2.1793370246887207, "learning_rate": 9.499462901803264e-05, "loss": 1.354, "step": 1038 }, { "epoch": 0.15671191553544495, "grad_norm": 1.8960219621658325, "learning_rate": 9.498413311402003e-05, "loss": 1.1406, "step": 1039 }, { "epoch": 0.1568627450980392, "grad_norm": 2.4864325523376465, "learning_rate": 9.497362679804168e-05, "loss": 1.4852, "step": 1040 }, { "epoch": 0.15701357466063348, "grad_norm": 2.0590357780456543, "learning_rate": 9.496311007252938e-05, "loss": 1.2245, "step": 1041 }, { "epoch": 0.15716440422322775, "grad_norm": 1.9832812547683716, "learning_rate": 9.49525829399173e-05, "loss": 1.2161, "step": 1042 }, { "epoch": 0.15731523378582202, "grad_norm": 2.1165900230407715, "learning_rate": 9.494204540264205e-05, "loss": 1.2495, "step": 1043 }, { "epoch": 0.15746606334841629, "grad_norm": 2.100119113922119, "learning_rate": 9.493149746314265e-05, "loss": 1.3625, "step": 1044 }, { "epoch": 0.15761689291101055, "grad_norm": 2.227144956588745, "learning_rate": 9.492093912386047e-05, "loss": 1.3159, "step": 1045 }, { "epoch": 0.15776772247360482, "grad_norm": 2.440833330154419, "learning_rate": 9.491037038723935e-05, "loss": 1.3078, "step": 1046 }, { "epoch": 0.1579185520361991, "grad_norm": 1.928432583808899, "learning_rate": 9.489979125572555e-05, "loss": 0.9683, "step": 1047 }, { "epoch": 0.15806938159879336, "grad_norm": 2.115945339202881, "learning_rate": 9.488920173176764e-05, "loss": 0.8715, "step": 1048 }, { "epoch": 0.15822021116138762, "grad_norm": 1.7256051301956177, "learning_rate": 9.487860181781669e-05, "loss": 0.761, "step": 1049 }, { "epoch": 0.1583710407239819, "grad_norm": 2.530266761779785, "learning_rate": 9.486799151632613e-05, "loss": 1.2577, "step": 1050 }, { "epoch": 0.15852187028657616, "grad_norm": 2.5231919288635254, "learning_rate": 9.485737082975182e-05, "loss": 1.5003, "step": 1051 }, { "epoch": 0.15867269984917043, "grad_norm": 2.7261199951171875, "learning_rate": 9.4846739760552e-05, "loss": 1.9214, "step": 1052 }, { "epoch": 0.1588235294117647, "grad_norm": 2.59985089302063, "learning_rate": 9.483609831118734e-05, "loss": 1.8506, "step": 1053 }, { "epoch": 0.15897435897435896, "grad_norm": 1.9226500988006592, "learning_rate": 9.482544648412086e-05, "loss": 1.0556, "step": 1054 }, { "epoch": 0.15912518853695323, "grad_norm": 1.836128830909729, "learning_rate": 9.481478428181804e-05, "loss": 0.9541, "step": 1055 }, { "epoch": 0.1592760180995475, "grad_norm": 2.3025894165039062, "learning_rate": 9.480411170674674e-05, "loss": 1.687, "step": 1056 }, { "epoch": 0.15942684766214177, "grad_norm": 2.151245355606079, "learning_rate": 9.479342876137722e-05, "loss": 1.3714, "step": 1057 }, { "epoch": 0.15957767722473604, "grad_norm": 2.0001742839813232, "learning_rate": 9.478273544818212e-05, "loss": 1.1993, "step": 1058 }, { "epoch": 0.1597285067873303, "grad_norm": 2.0212972164154053, "learning_rate": 9.477203176963654e-05, "loss": 1.3084, "step": 1059 }, { "epoch": 0.15987933634992457, "grad_norm": 2.1967344284057617, "learning_rate": 9.47613177282179e-05, "loss": 1.5343, "step": 1060 }, { "epoch": 0.16003016591251884, "grad_norm": 1.899034023284912, "learning_rate": 9.475059332640607e-05, "loss": 1.1676, "step": 1061 }, { "epoch": 0.16018099547511314, "grad_norm": 1.8246182203292847, "learning_rate": 9.47398585666833e-05, "loss": 1.0482, "step": 1062 }, { "epoch": 0.1603318250377074, "grad_norm": 1.928686499595642, "learning_rate": 9.472911345153425e-05, "loss": 1.2751, "step": 1063 }, { "epoch": 0.16048265460030167, "grad_norm": 1.7922472953796387, "learning_rate": 9.4718357983446e-05, "loss": 1.1792, "step": 1064 }, { "epoch": 0.16063348416289594, "grad_norm": 1.9879646301269531, "learning_rate": 9.470759216490792e-05, "loss": 1.5015, "step": 1065 }, { "epoch": 0.1607843137254902, "grad_norm": 1.843225359916687, "learning_rate": 9.469681599841192e-05, "loss": 1.3823, "step": 1066 }, { "epoch": 0.16093514328808448, "grad_norm": 1.9352290630340576, "learning_rate": 9.468602948645221e-05, "loss": 1.4481, "step": 1067 }, { "epoch": 0.16108597285067874, "grad_norm": 1.773650884628296, "learning_rate": 9.467523263152542e-05, "loss": 1.2963, "step": 1068 }, { "epoch": 0.161236802413273, "grad_norm": 2.0818982124328613, "learning_rate": 9.466442543613059e-05, "loss": 1.5011, "step": 1069 }, { "epoch": 0.16138763197586728, "grad_norm": 2.00089955329895, "learning_rate": 9.465360790276911e-05, "loss": 1.2628, "step": 1070 }, { "epoch": 0.16153846153846155, "grad_norm": 2.2019450664520264, "learning_rate": 9.464278003394483e-05, "loss": 1.519, "step": 1071 }, { "epoch": 0.16168929110105582, "grad_norm": 1.9879052639007568, "learning_rate": 9.463194183216393e-05, "loss": 1.162, "step": 1072 }, { "epoch": 0.16184012066365008, "grad_norm": 2.0933687686920166, "learning_rate": 9.4621093299935e-05, "loss": 1.3917, "step": 1073 }, { "epoch": 0.16199095022624435, "grad_norm": 1.9903498888015747, "learning_rate": 9.461023443976905e-05, "loss": 1.4392, "step": 1074 }, { "epoch": 0.16214177978883862, "grad_norm": 2.320218563079834, "learning_rate": 9.459936525417942e-05, "loss": 1.6919, "step": 1075 }, { "epoch": 0.1622926093514329, "grad_norm": 2.076493978500366, "learning_rate": 9.458848574568191e-05, "loss": 1.2942, "step": 1076 }, { "epoch": 0.16244343891402716, "grad_norm": 1.810027837753296, "learning_rate": 9.457759591679466e-05, "loss": 1.195, "step": 1077 }, { "epoch": 0.16259426847662142, "grad_norm": 2.5718019008636475, "learning_rate": 9.456669577003821e-05, "loss": 1.9954, "step": 1078 }, { "epoch": 0.1627450980392157, "grad_norm": 2.480525255203247, "learning_rate": 9.455578530793552e-05, "loss": 1.3774, "step": 1079 }, { "epoch": 0.16289592760180996, "grad_norm": 2.395385980606079, "learning_rate": 9.454486453301189e-05, "loss": 1.3741, "step": 1080 }, { "epoch": 0.16304675716440423, "grad_norm": 2.210343360900879, "learning_rate": 9.453393344779502e-05, "loss": 1.2543, "step": 1081 }, { "epoch": 0.1631975867269985, "grad_norm": 2.113731622695923, "learning_rate": 9.452299205481503e-05, "loss": 1.4941, "step": 1082 }, { "epoch": 0.16334841628959276, "grad_norm": 1.9620264768600464, "learning_rate": 9.451204035660438e-05, "loss": 1.3437, "step": 1083 }, { "epoch": 0.16349924585218703, "grad_norm": 2.152198076248169, "learning_rate": 9.450107835569794e-05, "loss": 1.496, "step": 1084 }, { "epoch": 0.1636500754147813, "grad_norm": 2.211247682571411, "learning_rate": 9.449010605463296e-05, "loss": 1.4809, "step": 1085 }, { "epoch": 0.16380090497737557, "grad_norm": 2.074950695037842, "learning_rate": 9.447912345594906e-05, "loss": 1.3435, "step": 1086 }, { "epoch": 0.16395173453996983, "grad_norm": 1.9204068183898926, "learning_rate": 9.446813056218829e-05, "loss": 1.175, "step": 1087 }, { "epoch": 0.1641025641025641, "grad_norm": 2.8162665367126465, "learning_rate": 9.445712737589501e-05, "loss": 1.3982, "step": 1088 }, { "epoch": 0.16425339366515837, "grad_norm": 1.949252963066101, "learning_rate": 9.444611389961604e-05, "loss": 0.953, "step": 1089 }, { "epoch": 0.16440422322775264, "grad_norm": 2.107999324798584, "learning_rate": 9.44350901359005e-05, "loss": 1.2766, "step": 1090 }, { "epoch": 0.1645550527903469, "grad_norm": 1.924381971359253, "learning_rate": 9.44240560873e-05, "loss": 1.179, "step": 1091 }, { "epoch": 0.16470588235294117, "grad_norm": 2.210275888442993, "learning_rate": 9.441301175636841e-05, "loss": 1.232, "step": 1092 }, { "epoch": 0.16485671191553544, "grad_norm": 2.2899484634399414, "learning_rate": 9.440195714566205e-05, "loss": 1.4492, "step": 1093 }, { "epoch": 0.1650075414781297, "grad_norm": 2.0316169261932373, "learning_rate": 9.43908922577396e-05, "loss": 1.0843, "step": 1094 }, { "epoch": 0.16515837104072398, "grad_norm": 2.2261252403259277, "learning_rate": 9.437981709516215e-05, "loss": 1.2065, "step": 1095 }, { "epoch": 0.16530920060331825, "grad_norm": 1.8929057121276855, "learning_rate": 9.436873166049311e-05, "loss": 1.025, "step": 1096 }, { "epoch": 0.16546003016591251, "grad_norm": 2.2586159706115723, "learning_rate": 9.435763595629833e-05, "loss": 1.1391, "step": 1097 }, { "epoch": 0.16561085972850678, "grad_norm": 2.4292056560516357, "learning_rate": 9.434652998514599e-05, "loss": 1.295, "step": 1098 }, { "epoch": 0.16576168929110105, "grad_norm": 2.2045655250549316, "learning_rate": 9.433541374960665e-05, "loss": 1.0136, "step": 1099 }, { "epoch": 0.16591251885369532, "grad_norm": 2.1582672595977783, "learning_rate": 9.432428725225327e-05, "loss": 1.3307, "step": 1100 }, { "epoch": 0.16606334841628959, "grad_norm": 2.8083748817443848, "learning_rate": 9.431315049566118e-05, "loss": 1.5782, "step": 1101 }, { "epoch": 0.16621417797888385, "grad_norm": 2.305295467376709, "learning_rate": 9.430200348240808e-05, "loss": 1.4609, "step": 1102 }, { "epoch": 0.16636500754147812, "grad_norm": 2.08916974067688, "learning_rate": 9.429084621507404e-05, "loss": 1.3391, "step": 1103 }, { "epoch": 0.1665158371040724, "grad_norm": 2.1611876487731934, "learning_rate": 9.427967869624148e-05, "loss": 1.6248, "step": 1104 }, { "epoch": 0.16666666666666666, "grad_norm": 2.413980007171631, "learning_rate": 9.426850092849526e-05, "loss": 1.7107, "step": 1105 }, { "epoch": 0.16681749622926093, "grad_norm": 2.166827440261841, "learning_rate": 9.425731291442255e-05, "loss": 1.5438, "step": 1106 }, { "epoch": 0.1669683257918552, "grad_norm": 2.3342337608337402, "learning_rate": 9.42461146566129e-05, "loss": 1.6591, "step": 1107 }, { "epoch": 0.16711915535444946, "grad_norm": 2.04266619682312, "learning_rate": 9.423490615765827e-05, "loss": 1.1304, "step": 1108 }, { "epoch": 0.16726998491704373, "grad_norm": 2.4701836109161377, "learning_rate": 9.422368742015295e-05, "loss": 1.7563, "step": 1109 }, { "epoch": 0.167420814479638, "grad_norm": 2.0741963386535645, "learning_rate": 9.421245844669362e-05, "loss": 1.4108, "step": 1110 }, { "epoch": 0.16757164404223227, "grad_norm": 2.418666362762451, "learning_rate": 9.42012192398793e-05, "loss": 1.8021, "step": 1111 }, { "epoch": 0.16772247360482653, "grad_norm": 2.3456459045410156, "learning_rate": 9.418996980231144e-05, "loss": 1.4557, "step": 1112 }, { "epoch": 0.1678733031674208, "grad_norm": 1.9622342586517334, "learning_rate": 9.417871013659378e-05, "loss": 1.1849, "step": 1113 }, { "epoch": 0.1680241327300151, "grad_norm": 1.8213087320327759, "learning_rate": 9.41674402453325e-05, "loss": 1.1016, "step": 1114 }, { "epoch": 0.16817496229260936, "grad_norm": 1.9858585596084595, "learning_rate": 9.415616013113608e-05, "loss": 1.1069, "step": 1115 }, { "epoch": 0.16832579185520363, "grad_norm": 2.156146287918091, "learning_rate": 9.414486979661543e-05, "loss": 1.4725, "step": 1116 }, { "epoch": 0.1684766214177979, "grad_norm": 1.8659228086471558, "learning_rate": 9.41335692443838e-05, "loss": 1.1878, "step": 1117 }, { "epoch": 0.16862745098039217, "grad_norm": 1.8273420333862305, "learning_rate": 9.412225847705675e-05, "loss": 1.112, "step": 1118 }, { "epoch": 0.16877828054298644, "grad_norm": 1.7170915603637695, "learning_rate": 9.41109374972523e-05, "loss": 1.0974, "step": 1119 }, { "epoch": 0.1689291101055807, "grad_norm": 2.261781930923462, "learning_rate": 9.40996063075908e-05, "loss": 1.6065, "step": 1120 }, { "epoch": 0.16907993966817497, "grad_norm": 2.0867013931274414, "learning_rate": 9.40882649106949e-05, "loss": 1.5232, "step": 1121 }, { "epoch": 0.16923076923076924, "grad_norm": 1.918204426765442, "learning_rate": 9.407691330918971e-05, "loss": 1.4172, "step": 1122 }, { "epoch": 0.1693815987933635, "grad_norm": 2.0358152389526367, "learning_rate": 9.406555150570263e-05, "loss": 1.3164, "step": 1123 }, { "epoch": 0.16953242835595778, "grad_norm": 1.6925209760665894, "learning_rate": 9.405417950286346e-05, "loss": 1.066, "step": 1124 }, { "epoch": 0.16968325791855204, "grad_norm": 1.9159318208694458, "learning_rate": 9.404279730330434e-05, "loss": 1.4073, "step": 1125 }, { "epoch": 0.1698340874811463, "grad_norm": 2.123150110244751, "learning_rate": 9.403140490965978e-05, "loss": 1.5945, "step": 1126 }, { "epoch": 0.16998491704374058, "grad_norm": 2.2580065727233887, "learning_rate": 9.402000232456667e-05, "loss": 1.4534, "step": 1127 }, { "epoch": 0.17013574660633485, "grad_norm": 2.2390294075012207, "learning_rate": 9.40085895506642e-05, "loss": 1.4769, "step": 1128 }, { "epoch": 0.17028657616892912, "grad_norm": 1.7377204895019531, "learning_rate": 9.399716659059397e-05, "loss": 1.1134, "step": 1129 }, { "epoch": 0.17043740573152338, "grad_norm": 1.9897478818893433, "learning_rate": 9.398573344699994e-05, "loss": 1.5278, "step": 1130 }, { "epoch": 0.17058823529411765, "grad_norm": 2.034961700439453, "learning_rate": 9.397429012252838e-05, "loss": 1.5105, "step": 1131 }, { "epoch": 0.17073906485671192, "grad_norm": 2.0086429119110107, "learning_rate": 9.396283661982796e-05, "loss": 1.5782, "step": 1132 }, { "epoch": 0.1708898944193062, "grad_norm": 2.079279661178589, "learning_rate": 9.395137294154971e-05, "loss": 1.5043, "step": 1133 }, { "epoch": 0.17104072398190046, "grad_norm": 1.9180268049240112, "learning_rate": 9.393989909034697e-05, "loss": 1.5202, "step": 1134 }, { "epoch": 0.17119155354449472, "grad_norm": 2.0650200843811035, "learning_rate": 9.392841506887547e-05, "loss": 1.653, "step": 1135 }, { "epoch": 0.171342383107089, "grad_norm": 1.9913122653961182, "learning_rate": 9.391692087979329e-05, "loss": 1.3019, "step": 1136 }, { "epoch": 0.17149321266968326, "grad_norm": 1.8521068096160889, "learning_rate": 9.390541652576085e-05, "loss": 1.3973, "step": 1137 }, { "epoch": 0.17164404223227753, "grad_norm": 2.226893186569214, "learning_rate": 9.389390200944095e-05, "loss": 1.5288, "step": 1138 }, { "epoch": 0.1717948717948718, "grad_norm": 1.7998656034469604, "learning_rate": 9.388237733349872e-05, "loss": 1.2346, "step": 1139 }, { "epoch": 0.17194570135746606, "grad_norm": 1.9661458730697632, "learning_rate": 9.387084250060163e-05, "loss": 1.2745, "step": 1140 }, { "epoch": 0.17209653092006033, "grad_norm": 2.150909662246704, "learning_rate": 9.385929751341952e-05, "loss": 1.4046, "step": 1141 }, { "epoch": 0.1722473604826546, "grad_norm": 2.1541781425476074, "learning_rate": 9.38477423746246e-05, "loss": 1.401, "step": 1142 }, { "epoch": 0.17239819004524887, "grad_norm": 2.107025146484375, "learning_rate": 9.383617708689136e-05, "loss": 1.1952, "step": 1143 }, { "epoch": 0.17254901960784313, "grad_norm": 1.9124258756637573, "learning_rate": 9.382460165289672e-05, "loss": 1.1528, "step": 1144 }, { "epoch": 0.1726998491704374, "grad_norm": 2.3970818519592285, "learning_rate": 9.381301607531994e-05, "loss": 1.5803, "step": 1145 }, { "epoch": 0.17285067873303167, "grad_norm": 2.4584882259368896, "learning_rate": 9.380142035684253e-05, "loss": 1.217, "step": 1146 }, { "epoch": 0.17300150829562594, "grad_norm": 1.9755421876907349, "learning_rate": 9.37898145001485e-05, "loss": 1.2276, "step": 1147 }, { "epoch": 0.1731523378582202, "grad_norm": 1.948481798171997, "learning_rate": 9.377819850792407e-05, "loss": 1.0642, "step": 1148 }, { "epoch": 0.17330316742081447, "grad_norm": 1.8882261514663696, "learning_rate": 9.376657238285786e-05, "loss": 1.1004, "step": 1149 }, { "epoch": 0.17345399698340874, "grad_norm": 1.7930375337600708, "learning_rate": 9.375493612764086e-05, "loss": 0.876, "step": 1150 }, { "epoch": 0.173604826546003, "grad_norm": 2.9658010005950928, "learning_rate": 9.374328974496638e-05, "loss": 2.1143, "step": 1151 }, { "epoch": 0.17375565610859728, "grad_norm": 2.239036798477173, "learning_rate": 9.373163323753008e-05, "loss": 1.2594, "step": 1152 }, { "epoch": 0.17390648567119155, "grad_norm": 2.1840670108795166, "learning_rate": 9.371996660802995e-05, "loss": 1.6277, "step": 1153 }, { "epoch": 0.17405731523378581, "grad_norm": 1.9760925769805908, "learning_rate": 9.370828985916632e-05, "loss": 1.453, "step": 1154 }, { "epoch": 0.17420814479638008, "grad_norm": 2.250620126724243, "learning_rate": 9.36966029936419e-05, "loss": 1.7418, "step": 1155 }, { "epoch": 0.17435897435897435, "grad_norm": 1.9868831634521484, "learning_rate": 9.368490601416169e-05, "loss": 1.462, "step": 1156 }, { "epoch": 0.17450980392156862, "grad_norm": 1.8471624851226807, "learning_rate": 9.367319892343307e-05, "loss": 1.3904, "step": 1157 }, { "epoch": 0.17466063348416289, "grad_norm": 2.0114991664886475, "learning_rate": 9.366148172416576e-05, "loss": 1.3369, "step": 1158 }, { "epoch": 0.17481146304675715, "grad_norm": 1.9214061498641968, "learning_rate": 9.364975441907178e-05, "loss": 1.46, "step": 1159 }, { "epoch": 0.17496229260935142, "grad_norm": 1.7859654426574707, "learning_rate": 9.363801701086554e-05, "loss": 1.0942, "step": 1160 }, { "epoch": 0.1751131221719457, "grad_norm": 1.9107918739318848, "learning_rate": 9.362626950226375e-05, "loss": 1.3425, "step": 1161 }, { "epoch": 0.17526395173453996, "grad_norm": 1.79483163356781, "learning_rate": 9.361451189598547e-05, "loss": 1.1112, "step": 1162 }, { "epoch": 0.17541478129713423, "grad_norm": 2.0642123222351074, "learning_rate": 9.360274419475212e-05, "loss": 1.6381, "step": 1163 }, { "epoch": 0.1755656108597285, "grad_norm": 1.5205408334732056, "learning_rate": 9.359096640128742e-05, "loss": 0.9407, "step": 1164 }, { "epoch": 0.17571644042232276, "grad_norm": 1.8815926313400269, "learning_rate": 9.357917851831743e-05, "loss": 1.4031, "step": 1165 }, { "epoch": 0.17586726998491706, "grad_norm": 2.006647825241089, "learning_rate": 9.356738054857057e-05, "loss": 1.5604, "step": 1166 }, { "epoch": 0.17601809954751133, "grad_norm": 1.9575740098953247, "learning_rate": 9.35555724947776e-05, "loss": 1.402, "step": 1167 }, { "epoch": 0.1761689291101056, "grad_norm": 1.8240772485733032, "learning_rate": 9.354375435967157e-05, "loss": 1.2637, "step": 1168 }, { "epoch": 0.17631975867269986, "grad_norm": 1.8540453910827637, "learning_rate": 9.353192614598789e-05, "loss": 1.2069, "step": 1169 }, { "epoch": 0.17647058823529413, "grad_norm": 2.212239980697632, "learning_rate": 9.352008785646432e-05, "loss": 1.1319, "step": 1170 }, { "epoch": 0.1766214177978884, "grad_norm": 1.8122636079788208, "learning_rate": 9.350823949384091e-05, "loss": 1.143, "step": 1171 }, { "epoch": 0.17677224736048266, "grad_norm": 1.9338431358337402, "learning_rate": 9.34963810608601e-05, "loss": 1.3896, "step": 1172 }, { "epoch": 0.17692307692307693, "grad_norm": 1.8173787593841553, "learning_rate": 9.34845125602666e-05, "loss": 1.0803, "step": 1173 }, { "epoch": 0.1770739064856712, "grad_norm": 1.9273878335952759, "learning_rate": 9.347263399480747e-05, "loss": 1.204, "step": 1174 }, { "epoch": 0.17722473604826547, "grad_norm": 1.885274052619934, "learning_rate": 9.346074536723214e-05, "loss": 1.0363, "step": 1175 }, { "epoch": 0.17737556561085974, "grad_norm": 2.3609778881073, "learning_rate": 9.34488466802923e-05, "loss": 1.5554, "step": 1176 }, { "epoch": 0.177526395173454, "grad_norm": 2.224374532699585, "learning_rate": 9.343693793674203e-05, "loss": 1.369, "step": 1177 }, { "epoch": 0.17767722473604827, "grad_norm": 2.3981502056121826, "learning_rate": 9.34250191393377e-05, "loss": 1.3917, "step": 1178 }, { "epoch": 0.17782805429864254, "grad_norm": 2.2244973182678223, "learning_rate": 9.3413090290838e-05, "loss": 1.2242, "step": 1179 }, { "epoch": 0.1779788838612368, "grad_norm": 2.362309694290161, "learning_rate": 9.3401151394004e-05, "loss": 1.2918, "step": 1180 }, { "epoch": 0.17812971342383108, "grad_norm": 2.4505841732025146, "learning_rate": 9.338920245159906e-05, "loss": 1.616, "step": 1181 }, { "epoch": 0.17828054298642534, "grad_norm": 2.011510133743286, "learning_rate": 9.337724346638882e-05, "loss": 1.1245, "step": 1182 }, { "epoch": 0.1784313725490196, "grad_norm": 2.19594144821167, "learning_rate": 9.336527444114133e-05, "loss": 1.5255, "step": 1183 }, { "epoch": 0.17858220211161388, "grad_norm": 2.0368034839630127, "learning_rate": 9.335329537862694e-05, "loss": 1.384, "step": 1184 }, { "epoch": 0.17873303167420815, "grad_norm": 2.0538136959075928, "learning_rate": 9.334130628161826e-05, "loss": 1.3082, "step": 1185 }, { "epoch": 0.17888386123680242, "grad_norm": 1.8186177015304565, "learning_rate": 9.33293071528903e-05, "loss": 1.043, "step": 1186 }, { "epoch": 0.17903469079939668, "grad_norm": 1.8794890642166138, "learning_rate": 9.331729799522034e-05, "loss": 1.0017, "step": 1187 }, { "epoch": 0.17918552036199095, "grad_norm": 2.2389791011810303, "learning_rate": 9.330527881138801e-05, "loss": 1.4221, "step": 1188 }, { "epoch": 0.17933634992458522, "grad_norm": 2.072192907333374, "learning_rate": 9.329324960417529e-05, "loss": 1.4265, "step": 1189 }, { "epoch": 0.1794871794871795, "grad_norm": 2.2176218032836914, "learning_rate": 9.328121037636641e-05, "loss": 1.3646, "step": 1190 }, { "epoch": 0.17963800904977376, "grad_norm": 2.075070858001709, "learning_rate": 9.326916113074795e-05, "loss": 1.1609, "step": 1191 }, { "epoch": 0.17978883861236802, "grad_norm": 2.4087135791778564, "learning_rate": 9.325710187010882e-05, "loss": 1.177, "step": 1192 }, { "epoch": 0.1799396681749623, "grad_norm": 1.8682174682617188, "learning_rate": 9.324503259724025e-05, "loss": 0.9574, "step": 1193 }, { "epoch": 0.18009049773755656, "grad_norm": 2.1071815490722656, "learning_rate": 9.323295331493575e-05, "loss": 1.3624, "step": 1194 }, { "epoch": 0.18024132730015083, "grad_norm": 3.1041884422302246, "learning_rate": 9.32208640259912e-05, "loss": 1.6031, "step": 1195 }, { "epoch": 0.1803921568627451, "grad_norm": 1.7738794088363647, "learning_rate": 9.320876473320475e-05, "loss": 0.989, "step": 1196 }, { "epoch": 0.18054298642533936, "grad_norm": 2.2537429332733154, "learning_rate": 9.31966554393769e-05, "loss": 1.0351, "step": 1197 }, { "epoch": 0.18069381598793363, "grad_norm": 1.886830449104309, "learning_rate": 9.318453614731046e-05, "loss": 0.7185, "step": 1198 }, { "epoch": 0.1808446455505279, "grad_norm": 2.2066776752471924, "learning_rate": 9.317240685981054e-05, "loss": 1.1036, "step": 1199 }, { "epoch": 0.18099547511312217, "grad_norm": 2.2928333282470703, "learning_rate": 9.316026757968455e-05, "loss": 1.3501, "step": 1200 }, { "epoch": 0.18114630467571644, "grad_norm": 2.405466079711914, "learning_rate": 9.314811830974224e-05, "loss": 1.4885, "step": 1201 }, { "epoch": 0.1812971342383107, "grad_norm": 1.8908004760742188, "learning_rate": 9.313595905279566e-05, "loss": 1.2148, "step": 1202 }, { "epoch": 0.18144796380090497, "grad_norm": 2.640021562576294, "learning_rate": 9.312378981165919e-05, "loss": 1.9808, "step": 1203 }, { "epoch": 0.18159879336349924, "grad_norm": 1.9114588499069214, "learning_rate": 9.311161058914948e-05, "loss": 1.5874, "step": 1204 }, { "epoch": 0.1817496229260935, "grad_norm": 2.221534490585327, "learning_rate": 9.309942138808556e-05, "loss": 2.1272, "step": 1205 }, { "epoch": 0.18190045248868777, "grad_norm": 1.9181766510009766, "learning_rate": 9.308722221128867e-05, "loss": 1.1701, "step": 1206 }, { "epoch": 0.18205128205128204, "grad_norm": 1.9168063402175903, "learning_rate": 9.307501306158247e-05, "loss": 1.3224, "step": 1207 }, { "epoch": 0.1822021116138763, "grad_norm": 1.932343602180481, "learning_rate": 9.306279394179281e-05, "loss": 1.3194, "step": 1208 }, { "epoch": 0.18235294117647058, "grad_norm": 1.9432015419006348, "learning_rate": 9.305056485474796e-05, "loss": 1.4825, "step": 1209 }, { "epoch": 0.18250377073906485, "grad_norm": 2.0103373527526855, "learning_rate": 9.303832580327845e-05, "loss": 1.7307, "step": 1210 }, { "epoch": 0.18265460030165911, "grad_norm": 1.9620616436004639, "learning_rate": 9.302607679021709e-05, "loss": 1.3287, "step": 1211 }, { "epoch": 0.18280542986425338, "grad_norm": 1.8710129261016846, "learning_rate": 9.301381781839902e-05, "loss": 1.3579, "step": 1212 }, { "epoch": 0.18295625942684765, "grad_norm": 1.724479079246521, "learning_rate": 9.300154889066171e-05, "loss": 1.0606, "step": 1213 }, { "epoch": 0.18310708898944192, "grad_norm": 2.000087261199951, "learning_rate": 9.298927000984487e-05, "loss": 1.629, "step": 1214 }, { "epoch": 0.1832579185520362, "grad_norm": 1.835522174835205, "learning_rate": 9.297698117879059e-05, "loss": 1.2763, "step": 1215 }, { "epoch": 0.18340874811463045, "grad_norm": 1.9344607591629028, "learning_rate": 9.296468240034322e-05, "loss": 1.5219, "step": 1216 }, { "epoch": 0.18355957767722472, "grad_norm": 1.6562713384628296, "learning_rate": 9.295237367734939e-05, "loss": 1.1191, "step": 1217 }, { "epoch": 0.18371040723981902, "grad_norm": 1.7753430604934692, "learning_rate": 9.294005501265807e-05, "loss": 1.3991, "step": 1218 }, { "epoch": 0.18386123680241329, "grad_norm": 1.7133748531341553, "learning_rate": 9.292772640912056e-05, "loss": 1.284, "step": 1219 }, { "epoch": 0.18401206636500755, "grad_norm": 1.873835802078247, "learning_rate": 9.291538786959037e-05, "loss": 1.4072, "step": 1220 }, { "epoch": 0.18416289592760182, "grad_norm": 1.687179446220398, "learning_rate": 9.29030393969234e-05, "loss": 1.0076, "step": 1221 }, { "epoch": 0.1843137254901961, "grad_norm": 1.6083852052688599, "learning_rate": 9.289068099397777e-05, "loss": 1.0172, "step": 1222 }, { "epoch": 0.18446455505279036, "grad_norm": 1.7729251384735107, "learning_rate": 9.287831266361398e-05, "loss": 1.3397, "step": 1223 }, { "epoch": 0.18461538461538463, "grad_norm": 2.049114227294922, "learning_rate": 9.286593440869477e-05, "loss": 1.3639, "step": 1224 }, { "epoch": 0.1847662141779789, "grad_norm": 1.8267741203308105, "learning_rate": 9.28535462320852e-05, "loss": 1.0971, "step": 1225 }, { "epoch": 0.18491704374057316, "grad_norm": 1.9117971658706665, "learning_rate": 9.28411481366526e-05, "loss": 1.3084, "step": 1226 }, { "epoch": 0.18506787330316743, "grad_norm": 2.065711259841919, "learning_rate": 9.282874012526663e-05, "loss": 1.4686, "step": 1227 }, { "epoch": 0.1852187028657617, "grad_norm": 1.977129340171814, "learning_rate": 9.281632220079924e-05, "loss": 1.4043, "step": 1228 }, { "epoch": 0.18536953242835597, "grad_norm": 1.9103113412857056, "learning_rate": 9.280389436612467e-05, "loss": 1.2816, "step": 1229 }, { "epoch": 0.18552036199095023, "grad_norm": 2.062885046005249, "learning_rate": 9.279145662411942e-05, "loss": 1.5639, "step": 1230 }, { "epoch": 0.1856711915535445, "grad_norm": 2.0764904022216797, "learning_rate": 9.277900897766232e-05, "loss": 1.5043, "step": 1231 }, { "epoch": 0.18582202111613877, "grad_norm": 2.1024768352508545, "learning_rate": 9.276655142963451e-05, "loss": 1.4143, "step": 1232 }, { "epoch": 0.18597285067873304, "grad_norm": 1.8804652690887451, "learning_rate": 9.275408398291939e-05, "loss": 1.4052, "step": 1233 }, { "epoch": 0.1861236802413273, "grad_norm": 1.9897946119308472, "learning_rate": 9.274160664040264e-05, "loss": 1.3936, "step": 1234 }, { "epoch": 0.18627450980392157, "grad_norm": 1.8933037519454956, "learning_rate": 9.272911940497224e-05, "loss": 1.2842, "step": 1235 }, { "epoch": 0.18642533936651584, "grad_norm": 1.9031405448913574, "learning_rate": 9.271662227951852e-05, "loss": 1.212, "step": 1236 }, { "epoch": 0.1865761689291101, "grad_norm": 2.0967700481414795, "learning_rate": 9.270411526693399e-05, "loss": 1.3909, "step": 1237 }, { "epoch": 0.18672699849170438, "grad_norm": 2.3143086433410645, "learning_rate": 9.269159837011355e-05, "loss": 1.6575, "step": 1238 }, { "epoch": 0.18687782805429864, "grad_norm": 1.974550724029541, "learning_rate": 9.26790715919543e-05, "loss": 1.1171, "step": 1239 }, { "epoch": 0.1870286576168929, "grad_norm": 2.7651207447052, "learning_rate": 9.26665349353557e-05, "loss": 1.2815, "step": 1240 }, { "epoch": 0.18717948717948718, "grad_norm": 1.9609638452529907, "learning_rate": 9.265398840321948e-05, "loss": 1.1197, "step": 1241 }, { "epoch": 0.18733031674208145, "grad_norm": 1.9713983535766602, "learning_rate": 9.264143199844961e-05, "loss": 1.4081, "step": 1242 }, { "epoch": 0.18748114630467572, "grad_norm": 2.2063581943511963, "learning_rate": 9.26288657239524e-05, "loss": 1.5358, "step": 1243 }, { "epoch": 0.18763197586726998, "grad_norm": 1.8809722661972046, "learning_rate": 9.261628958263642e-05, "loss": 1.1405, "step": 1244 }, { "epoch": 0.18778280542986425, "grad_norm": 2.0900070667266846, "learning_rate": 9.26037035774125e-05, "loss": 1.1299, "step": 1245 }, { "epoch": 0.18793363499245852, "grad_norm": 2.4252164363861084, "learning_rate": 9.259110771119384e-05, "loss": 1.2934, "step": 1246 }, { "epoch": 0.1880844645550528, "grad_norm": 2.369295120239258, "learning_rate": 9.25785019868958e-05, "loss": 1.1366, "step": 1247 }, { "epoch": 0.18823529411764706, "grad_norm": 1.8856267929077148, "learning_rate": 9.25658864074361e-05, "loss": 0.8623, "step": 1248 }, { "epoch": 0.18838612368024132, "grad_norm": 1.9104337692260742, "learning_rate": 9.255326097573476e-05, "loss": 0.9013, "step": 1249 }, { "epoch": 0.1885369532428356, "grad_norm": 1.876307487487793, "learning_rate": 9.2540625694714e-05, "loss": 0.9112, "step": 1250 }, { "epoch": 0.18868778280542986, "grad_norm": 3.4011120796203613, "learning_rate": 9.252798056729838e-05, "loss": 1.722, "step": 1251 }, { "epoch": 0.18883861236802413, "grad_norm": 2.367579460144043, "learning_rate": 9.251532559641474e-05, "loss": 1.4167, "step": 1252 }, { "epoch": 0.1889894419306184, "grad_norm": 2.4090607166290283, "learning_rate": 9.250266078499214e-05, "loss": 1.6584, "step": 1253 }, { "epoch": 0.18914027149321266, "grad_norm": 1.8441895246505737, "learning_rate": 9.248998613596202e-05, "loss": 1.2076, "step": 1254 }, { "epoch": 0.18929110105580693, "grad_norm": 1.9453390836715698, "learning_rate": 9.247730165225798e-05, "loss": 1.4361, "step": 1255 }, { "epoch": 0.1894419306184012, "grad_norm": 2.146521806716919, "learning_rate": 9.246460733681599e-05, "loss": 1.5415, "step": 1256 }, { "epoch": 0.18959276018099547, "grad_norm": 2.1151444911956787, "learning_rate": 9.245190319257427e-05, "loss": 1.3227, "step": 1257 }, { "epoch": 0.18974358974358974, "grad_norm": 1.997647762298584, "learning_rate": 9.243918922247325e-05, "loss": 1.2515, "step": 1258 }, { "epoch": 0.189894419306184, "grad_norm": 2.1349799633026123, "learning_rate": 9.242646542945572e-05, "loss": 1.586, "step": 1259 }, { "epoch": 0.19004524886877827, "grad_norm": 2.24044132232666, "learning_rate": 9.241373181646672e-05, "loss": 1.4807, "step": 1260 }, { "epoch": 0.19019607843137254, "grad_norm": 2.3528175354003906, "learning_rate": 9.240098838645354e-05, "loss": 1.8743, "step": 1261 }, { "epoch": 0.1903469079939668, "grad_norm": 2.208540201187134, "learning_rate": 9.238823514236578e-05, "loss": 1.8989, "step": 1262 }, { "epoch": 0.19049773755656108, "grad_norm": 2.1658318042755127, "learning_rate": 9.237547208715525e-05, "loss": 1.4505, "step": 1263 }, { "epoch": 0.19064856711915534, "grad_norm": 2.208954095840454, "learning_rate": 9.23626992237761e-05, "loss": 1.7371, "step": 1264 }, { "epoch": 0.1907993966817496, "grad_norm": 2.1060221195220947, "learning_rate": 9.23499165551847e-05, "loss": 1.5373, "step": 1265 }, { "epoch": 0.19095022624434388, "grad_norm": 1.7845115661621094, "learning_rate": 9.233712408433972e-05, "loss": 1.2029, "step": 1266 }, { "epoch": 0.19110105580693815, "grad_norm": 1.9380519390106201, "learning_rate": 9.23243218142021e-05, "loss": 1.5386, "step": 1267 }, { "epoch": 0.19125188536953242, "grad_norm": 1.7178587913513184, "learning_rate": 9.231150974773502e-05, "loss": 1.3271, "step": 1268 }, { "epoch": 0.19140271493212668, "grad_norm": 1.8461300134658813, "learning_rate": 9.229868788790392e-05, "loss": 1.3052, "step": 1269 }, { "epoch": 0.19155354449472098, "grad_norm": 1.7390819787979126, "learning_rate": 9.228585623767659e-05, "loss": 1.2393, "step": 1270 }, { "epoch": 0.19170437405731525, "grad_norm": 1.9406623840332031, "learning_rate": 9.227301480002297e-05, "loss": 1.3742, "step": 1271 }, { "epoch": 0.19185520361990951, "grad_norm": 1.8771127462387085, "learning_rate": 9.226016357791533e-05, "loss": 1.4515, "step": 1272 }, { "epoch": 0.19200603318250378, "grad_norm": 1.824664831161499, "learning_rate": 9.224730257432824e-05, "loss": 1.1359, "step": 1273 }, { "epoch": 0.19215686274509805, "grad_norm": 1.8379006385803223, "learning_rate": 9.223443179223845e-05, "loss": 1.2472, "step": 1274 }, { "epoch": 0.19230769230769232, "grad_norm": 1.9288454055786133, "learning_rate": 9.222155123462501e-05, "loss": 1.3422, "step": 1275 }, { "epoch": 0.1924585218702866, "grad_norm": 2.1230061054229736, "learning_rate": 9.220866090446926e-05, "loss": 1.1743, "step": 1276 }, { "epoch": 0.19260935143288085, "grad_norm": 2.1075727939605713, "learning_rate": 9.219576080475476e-05, "loss": 1.6557, "step": 1277 }, { "epoch": 0.19276018099547512, "grad_norm": 1.9544392824172974, "learning_rate": 9.218285093846737e-05, "loss": 1.3832, "step": 1278 }, { "epoch": 0.1929110105580694, "grad_norm": 2.168405055999756, "learning_rate": 9.216993130859517e-05, "loss": 1.4814, "step": 1279 }, { "epoch": 0.19306184012066366, "grad_norm": 1.7613312005996704, "learning_rate": 9.215700191812852e-05, "loss": 1.0812, "step": 1280 }, { "epoch": 0.19321266968325793, "grad_norm": 2.040526866912842, "learning_rate": 9.214406277006004e-05, "loss": 1.2978, "step": 1281 }, { "epoch": 0.1933634992458522, "grad_norm": 2.3921589851379395, "learning_rate": 9.213111386738463e-05, "loss": 1.7258, "step": 1282 }, { "epoch": 0.19351432880844646, "grad_norm": 2.2433652877807617, "learning_rate": 9.21181552130994e-05, "loss": 1.3435, "step": 1283 }, { "epoch": 0.19366515837104073, "grad_norm": 2.2411489486694336, "learning_rate": 9.210518681020375e-05, "loss": 1.3256, "step": 1284 }, { "epoch": 0.193815987933635, "grad_norm": 1.80022394657135, "learning_rate": 9.209220866169934e-05, "loss": 1.0623, "step": 1285 }, { "epoch": 0.19396681749622927, "grad_norm": 2.1455681324005127, "learning_rate": 9.207922077059005e-05, "loss": 1.4094, "step": 1286 }, { "epoch": 0.19411764705882353, "grad_norm": 1.8876397609710693, "learning_rate": 9.206622313988206e-05, "loss": 1.2581, "step": 1287 }, { "epoch": 0.1942684766214178, "grad_norm": 1.9607913494110107, "learning_rate": 9.205321577258377e-05, "loss": 1.1725, "step": 1288 }, { "epoch": 0.19441930618401207, "grad_norm": 1.9715403318405151, "learning_rate": 9.204019867170586e-05, "loss": 1.2022, "step": 1289 }, { "epoch": 0.19457013574660634, "grad_norm": 2.2546043395996094, "learning_rate": 9.202717184026124e-05, "loss": 1.4422, "step": 1290 }, { "epoch": 0.1947209653092006, "grad_norm": 2.2663564682006836, "learning_rate": 9.201413528126509e-05, "loss": 1.5798, "step": 1291 }, { "epoch": 0.19487179487179487, "grad_norm": 1.9558860063552856, "learning_rate": 9.200108899773483e-05, "loss": 0.9696, "step": 1292 }, { "epoch": 0.19502262443438914, "grad_norm": 2.4042537212371826, "learning_rate": 9.198803299269014e-05, "loss": 1.6484, "step": 1293 }, { "epoch": 0.1951734539969834, "grad_norm": 2.4813220500946045, "learning_rate": 9.197496726915293e-05, "loss": 1.8323, "step": 1294 }, { "epoch": 0.19532428355957768, "grad_norm": 2.2392983436584473, "learning_rate": 9.19618918301474e-05, "loss": 1.3392, "step": 1295 }, { "epoch": 0.19547511312217195, "grad_norm": 2.137173652648926, "learning_rate": 9.194880667869996e-05, "loss": 1.361, "step": 1296 }, { "epoch": 0.1956259426847662, "grad_norm": 2.137274980545044, "learning_rate": 9.193571181783927e-05, "loss": 1.1892, "step": 1297 }, { "epoch": 0.19577677224736048, "grad_norm": 1.905195713043213, "learning_rate": 9.192260725059626e-05, "loss": 1.0655, "step": 1298 }, { "epoch": 0.19592760180995475, "grad_norm": 1.9559085369110107, "learning_rate": 9.190949298000409e-05, "loss": 1.2064, "step": 1299 }, { "epoch": 0.19607843137254902, "grad_norm": 1.7755825519561768, "learning_rate": 9.189636900909818e-05, "loss": 0.9018, "step": 1300 }, { "epoch": 0.19622926093514328, "grad_norm": 2.2560386657714844, "learning_rate": 9.188323534091619e-05, "loss": 1.6201, "step": 1301 }, { "epoch": 0.19638009049773755, "grad_norm": 1.9495882987976074, "learning_rate": 9.187009197849798e-05, "loss": 1.3781, "step": 1302 }, { "epoch": 0.19653092006033182, "grad_norm": 1.9242854118347168, "learning_rate": 9.185693892488577e-05, "loss": 1.3979, "step": 1303 }, { "epoch": 0.1966817496229261, "grad_norm": 2.116360664367676, "learning_rate": 9.184377618312387e-05, "loss": 1.6685, "step": 1304 }, { "epoch": 0.19683257918552036, "grad_norm": 1.865724802017212, "learning_rate": 9.183060375625896e-05, "loss": 1.0775, "step": 1305 }, { "epoch": 0.19698340874811462, "grad_norm": 1.8675156831741333, "learning_rate": 9.181742164733988e-05, "loss": 1.1811, "step": 1306 }, { "epoch": 0.1971342383107089, "grad_norm": 2.1517856121063232, "learning_rate": 9.180422985941779e-05, "loss": 1.3838, "step": 1307 }, { "epoch": 0.19728506787330316, "grad_norm": 1.7082343101501465, "learning_rate": 9.179102839554598e-05, "loss": 1.0208, "step": 1308 }, { "epoch": 0.19743589743589743, "grad_norm": 2.1055867671966553, "learning_rate": 9.17778172587801e-05, "loss": 1.3304, "step": 1309 }, { "epoch": 0.1975867269984917, "grad_norm": 1.8640735149383545, "learning_rate": 9.176459645217794e-05, "loss": 1.0941, "step": 1310 }, { "epoch": 0.19773755656108596, "grad_norm": 1.968670129776001, "learning_rate": 9.175136597879959e-05, "loss": 1.1034, "step": 1311 }, { "epoch": 0.19788838612368023, "grad_norm": 2.0363237857818604, "learning_rate": 9.173812584170736e-05, "loss": 1.4402, "step": 1312 }, { "epoch": 0.1980392156862745, "grad_norm": 2.098939895629883, "learning_rate": 9.172487604396578e-05, "loss": 1.3782, "step": 1313 }, { "epoch": 0.19819004524886877, "grad_norm": 1.8784483671188354, "learning_rate": 9.171161658864164e-05, "loss": 1.3668, "step": 1314 }, { "epoch": 0.19834087481146304, "grad_norm": 2.162252902984619, "learning_rate": 9.169834747880395e-05, "loss": 1.4266, "step": 1315 }, { "epoch": 0.1984917043740573, "grad_norm": 1.7071750164031982, "learning_rate": 9.168506871752395e-05, "loss": 0.9645, "step": 1316 }, { "epoch": 0.19864253393665157, "grad_norm": 1.830863356590271, "learning_rate": 9.167178030787516e-05, "loss": 1.3185, "step": 1317 }, { "epoch": 0.19879336349924584, "grad_norm": 1.9019253253936768, "learning_rate": 9.165848225293327e-05, "loss": 1.2933, "step": 1318 }, { "epoch": 0.1989441930618401, "grad_norm": 1.9650695323944092, "learning_rate": 9.164517455577623e-05, "loss": 1.3498, "step": 1319 }, { "epoch": 0.19909502262443438, "grad_norm": 1.9208468198776245, "learning_rate": 9.163185721948422e-05, "loss": 1.3816, "step": 1320 }, { "epoch": 0.19924585218702867, "grad_norm": 1.993523120880127, "learning_rate": 9.161853024713967e-05, "loss": 1.2947, "step": 1321 }, { "epoch": 0.19939668174962294, "grad_norm": 1.8388339281082153, "learning_rate": 9.160519364182718e-05, "loss": 1.0708, "step": 1322 }, { "epoch": 0.1995475113122172, "grad_norm": 1.91354501247406, "learning_rate": 9.159184740663369e-05, "loss": 1.1182, "step": 1323 }, { "epoch": 0.19969834087481148, "grad_norm": 1.858434796333313, "learning_rate": 9.157849154464824e-05, "loss": 1.3787, "step": 1324 }, { "epoch": 0.19984917043740574, "grad_norm": 1.934219241142273, "learning_rate": 9.156512605896217e-05, "loss": 1.353, "step": 1325 }, { "epoch": 0.2, "grad_norm": 2.044290065765381, "learning_rate": 9.155175095266909e-05, "loss": 1.3619, "step": 1326 }, { "epoch": 0.20015082956259428, "grad_norm": 1.7877196073532104, "learning_rate": 9.15383662288647e-05, "loss": 1.0849, "step": 1327 }, { "epoch": 0.20030165912518855, "grad_norm": 2.201510429382324, "learning_rate": 9.152497189064708e-05, "loss": 1.493, "step": 1328 }, { "epoch": 0.20045248868778281, "grad_norm": 2.322266101837158, "learning_rate": 9.151156794111643e-05, "loss": 1.6111, "step": 1329 }, { "epoch": 0.20060331825037708, "grad_norm": 2.0824172496795654, "learning_rate": 9.149815438337521e-05, "loss": 1.456, "step": 1330 }, { "epoch": 0.20075414781297135, "grad_norm": 2.0181267261505127, "learning_rate": 9.148473122052813e-05, "loss": 1.4143, "step": 1331 }, { "epoch": 0.20090497737556562, "grad_norm": 2.0329978466033936, "learning_rate": 9.147129845568208e-05, "loss": 1.4036, "step": 1332 }, { "epoch": 0.2010558069381599, "grad_norm": 2.1368300914764404, "learning_rate": 9.145785609194617e-05, "loss": 1.5004, "step": 1333 }, { "epoch": 0.20120663650075415, "grad_norm": 2.4404523372650146, "learning_rate": 9.144440413243178e-05, "loss": 1.5912, "step": 1334 }, { "epoch": 0.20135746606334842, "grad_norm": 2.091318368911743, "learning_rate": 9.143094258025247e-05, "loss": 1.3131, "step": 1335 }, { "epoch": 0.2015082956259427, "grad_norm": 2.1151645183563232, "learning_rate": 9.141747143852402e-05, "loss": 1.4673, "step": 1336 }, { "epoch": 0.20165912518853696, "grad_norm": 1.909485936164856, "learning_rate": 9.140399071036447e-05, "loss": 1.1937, "step": 1337 }, { "epoch": 0.20180995475113123, "grad_norm": 2.2577810287475586, "learning_rate": 9.139050039889402e-05, "loss": 1.5856, "step": 1338 }, { "epoch": 0.2019607843137255, "grad_norm": 1.888202428817749, "learning_rate": 9.137700050723517e-05, "loss": 1.0817, "step": 1339 }, { "epoch": 0.20211161387631976, "grad_norm": 1.9649828672409058, "learning_rate": 9.136349103851253e-05, "loss": 1.1127, "step": 1340 }, { "epoch": 0.20226244343891403, "grad_norm": 1.9600636959075928, "learning_rate": 9.1349971995853e-05, "loss": 1.2481, "step": 1341 }, { "epoch": 0.2024132730015083, "grad_norm": 2.1791532039642334, "learning_rate": 9.133644338238569e-05, "loss": 1.2328, "step": 1342 }, { "epoch": 0.20256410256410257, "grad_norm": 2.0222721099853516, "learning_rate": 9.132290520124192e-05, "loss": 1.1625, "step": 1343 }, { "epoch": 0.20271493212669683, "grad_norm": 1.900904655456543, "learning_rate": 9.130935745555519e-05, "loss": 1.0816, "step": 1344 }, { "epoch": 0.2028657616892911, "grad_norm": 2.112949848175049, "learning_rate": 9.129580014846127e-05, "loss": 1.0329, "step": 1345 }, { "epoch": 0.20301659125188537, "grad_norm": 1.9854365587234497, "learning_rate": 9.128223328309809e-05, "loss": 1.2442, "step": 1346 }, { "epoch": 0.20316742081447964, "grad_norm": 1.6198221445083618, "learning_rate": 9.126865686260585e-05, "loss": 0.7662, "step": 1347 }, { "epoch": 0.2033182503770739, "grad_norm": 1.7187918424606323, "learning_rate": 9.12550708901269e-05, "loss": 0.889, "step": 1348 }, { "epoch": 0.20346907993966817, "grad_norm": 1.9818456172943115, "learning_rate": 9.124147536880585e-05, "loss": 1.0947, "step": 1349 }, { "epoch": 0.20361990950226244, "grad_norm": 2.003612756729126, "learning_rate": 9.12278703017895e-05, "loss": 1.1104, "step": 1350 }, { "epoch": 0.2037707390648567, "grad_norm": 2.292498826980591, "learning_rate": 9.121425569222686e-05, "loss": 1.5969, "step": 1351 }, { "epoch": 0.20392156862745098, "grad_norm": 1.9597994089126587, "learning_rate": 9.120063154326912e-05, "loss": 1.1791, "step": 1352 }, { "epoch": 0.20407239819004525, "grad_norm": 1.9945870637893677, "learning_rate": 9.118699785806975e-05, "loss": 1.3726, "step": 1353 }, { "epoch": 0.2042232277526395, "grad_norm": 1.870679259300232, "learning_rate": 9.117335463978435e-05, "loss": 1.4206, "step": 1354 }, { "epoch": 0.20437405731523378, "grad_norm": 2.120237350463867, "learning_rate": 9.11597018915708e-05, "loss": 1.6961, "step": 1355 }, { "epoch": 0.20452488687782805, "grad_norm": 2.309626817703247, "learning_rate": 9.11460396165891e-05, "loss": 1.7567, "step": 1356 }, { "epoch": 0.20467571644042232, "grad_norm": 1.9403882026672363, "learning_rate": 9.113236781800151e-05, "loss": 1.4675, "step": 1357 }, { "epoch": 0.20482654600301659, "grad_norm": 2.1890225410461426, "learning_rate": 9.111868649897253e-05, "loss": 1.6659, "step": 1358 }, { "epoch": 0.20497737556561085, "grad_norm": 2.1127943992614746, "learning_rate": 9.110499566266877e-05, "loss": 1.6549, "step": 1359 }, { "epoch": 0.20512820512820512, "grad_norm": 2.0958640575408936, "learning_rate": 9.10912953122591e-05, "loss": 1.6762, "step": 1360 }, { "epoch": 0.2052790346907994, "grad_norm": 1.8460781574249268, "learning_rate": 9.107758545091463e-05, "loss": 1.4504, "step": 1361 }, { "epoch": 0.20542986425339366, "grad_norm": 1.9882128238677979, "learning_rate": 9.106386608180856e-05, "loss": 1.3995, "step": 1362 }, { "epoch": 0.20558069381598792, "grad_norm": 1.926417350769043, "learning_rate": 9.105013720811639e-05, "loss": 1.5362, "step": 1363 }, { "epoch": 0.2057315233785822, "grad_norm": 2.151364326477051, "learning_rate": 9.103639883301578e-05, "loss": 1.5728, "step": 1364 }, { "epoch": 0.20588235294117646, "grad_norm": 1.939749836921692, "learning_rate": 9.102265095968661e-05, "loss": 1.5054, "step": 1365 }, { "epoch": 0.20603318250377073, "grad_norm": 1.6817288398742676, "learning_rate": 9.100889359131093e-05, "loss": 1.0784, "step": 1366 }, { "epoch": 0.206184012066365, "grad_norm": 1.731872320175171, "learning_rate": 9.0995126731073e-05, "loss": 1.2873, "step": 1367 }, { "epoch": 0.20633484162895926, "grad_norm": 1.6237006187438965, "learning_rate": 9.098135038215927e-05, "loss": 1.0277, "step": 1368 }, { "epoch": 0.20648567119155353, "grad_norm": 1.8862062692642212, "learning_rate": 9.096756454775843e-05, "loss": 1.6495, "step": 1369 }, { "epoch": 0.2066365007541478, "grad_norm": 2.050292730331421, "learning_rate": 9.095376923106129e-05, "loss": 1.5485, "step": 1370 }, { "epoch": 0.20678733031674207, "grad_norm": 1.6843444108963013, "learning_rate": 9.093996443526092e-05, "loss": 1.0686, "step": 1371 }, { "epoch": 0.20693815987933634, "grad_norm": 1.934694766998291, "learning_rate": 9.092615016355255e-05, "loss": 1.2415, "step": 1372 }, { "epoch": 0.20708898944193063, "grad_norm": 1.840377926826477, "learning_rate": 9.091232641913361e-05, "loss": 1.0508, "step": 1373 }, { "epoch": 0.2072398190045249, "grad_norm": 1.9314602613449097, "learning_rate": 9.089849320520374e-05, "loss": 1.4031, "step": 1374 }, { "epoch": 0.20739064856711917, "grad_norm": 2.203970193862915, "learning_rate": 9.088465052496473e-05, "loss": 1.5704, "step": 1375 }, { "epoch": 0.20754147812971344, "grad_norm": 2.079960823059082, "learning_rate": 9.087079838162061e-05, "loss": 1.3823, "step": 1376 }, { "epoch": 0.2076923076923077, "grad_norm": 1.9350496530532837, "learning_rate": 9.085693677837757e-05, "loss": 1.2848, "step": 1377 }, { "epoch": 0.20784313725490197, "grad_norm": 2.0181422233581543, "learning_rate": 9.0843065718444e-05, "loss": 1.4994, "step": 1378 }, { "epoch": 0.20799396681749624, "grad_norm": 2.045013666152954, "learning_rate": 9.082918520503048e-05, "loss": 1.2055, "step": 1379 }, { "epoch": 0.2081447963800905, "grad_norm": 2.094616651535034, "learning_rate": 9.081529524134976e-05, "loss": 1.4664, "step": 1380 }, { "epoch": 0.20829562594268478, "grad_norm": 1.99825119972229, "learning_rate": 9.080139583061682e-05, "loss": 1.3183, "step": 1381 }, { "epoch": 0.20844645550527904, "grad_norm": 2.088332176208496, "learning_rate": 9.078748697604877e-05, "loss": 1.1254, "step": 1382 }, { "epoch": 0.2085972850678733, "grad_norm": 2.1560606956481934, "learning_rate": 9.077356868086492e-05, "loss": 1.5352, "step": 1383 }, { "epoch": 0.20874811463046758, "grad_norm": 2.20355486869812, "learning_rate": 9.075964094828684e-05, "loss": 1.3208, "step": 1384 }, { "epoch": 0.20889894419306185, "grad_norm": 1.8082834482192993, "learning_rate": 9.074570378153817e-05, "loss": 1.1374, "step": 1385 }, { "epoch": 0.20904977375565612, "grad_norm": 2.107736110687256, "learning_rate": 9.073175718384482e-05, "loss": 1.5122, "step": 1386 }, { "epoch": 0.20920060331825038, "grad_norm": 2.2412798404693604, "learning_rate": 9.071780115843482e-05, "loss": 1.4253, "step": 1387 }, { "epoch": 0.20935143288084465, "grad_norm": 2.191100597381592, "learning_rate": 9.070383570853844e-05, "loss": 1.5112, "step": 1388 }, { "epoch": 0.20950226244343892, "grad_norm": 1.9157055616378784, "learning_rate": 9.068986083738809e-05, "loss": 1.2802, "step": 1389 }, { "epoch": 0.2096530920060332, "grad_norm": 2.291934013366699, "learning_rate": 9.067587654821839e-05, "loss": 1.742, "step": 1390 }, { "epoch": 0.20980392156862746, "grad_norm": 2.340492010116577, "learning_rate": 9.06618828442661e-05, "loss": 1.4703, "step": 1391 }, { "epoch": 0.20995475113122172, "grad_norm": 2.251694917678833, "learning_rate": 9.064787972877018e-05, "loss": 1.6051, "step": 1392 }, { "epoch": 0.210105580693816, "grad_norm": 1.7096487283706665, "learning_rate": 9.06338672049718e-05, "loss": 0.9237, "step": 1393 }, { "epoch": 0.21025641025641026, "grad_norm": 2.29158616065979, "learning_rate": 9.061984527611427e-05, "loss": 1.3917, "step": 1394 }, { "epoch": 0.21040723981900453, "grad_norm": 2.1721572875976562, "learning_rate": 9.060581394544307e-05, "loss": 1.1765, "step": 1395 }, { "epoch": 0.2105580693815988, "grad_norm": 2.2922005653381348, "learning_rate": 9.05917732162059e-05, "loss": 1.2647, "step": 1396 }, { "epoch": 0.21070889894419306, "grad_norm": 1.9698129892349243, "learning_rate": 9.057772309165259e-05, "loss": 0.9861, "step": 1397 }, { "epoch": 0.21085972850678733, "grad_norm": 2.204995632171631, "learning_rate": 9.056366357503516e-05, "loss": 1.2127, "step": 1398 }, { "epoch": 0.2110105580693816, "grad_norm": 2.1891632080078125, "learning_rate": 9.054959466960782e-05, "loss": 1.3071, "step": 1399 }, { "epoch": 0.21116138763197587, "grad_norm": 2.1918492317199707, "learning_rate": 9.053551637862692e-05, "loss": 1.0223, "step": 1400 }, { "epoch": 0.21131221719457013, "grad_norm": 3.0221829414367676, "learning_rate": 9.052142870535103e-05, "loss": 1.3434, "step": 1401 }, { "epoch": 0.2114630467571644, "grad_norm": 2.7427587509155273, "learning_rate": 9.050733165304084e-05, "loss": 1.6331, "step": 1402 }, { "epoch": 0.21161387631975867, "grad_norm": 2.146721601486206, "learning_rate": 9.049322522495925e-05, "loss": 1.4011, "step": 1403 }, { "epoch": 0.21176470588235294, "grad_norm": 1.9571388959884644, "learning_rate": 9.047910942437128e-05, "loss": 1.2196, "step": 1404 }, { "epoch": 0.2119155354449472, "grad_norm": 2.259096622467041, "learning_rate": 9.04649842545442e-05, "loss": 1.6446, "step": 1405 }, { "epoch": 0.21206636500754147, "grad_norm": 1.9372862577438354, "learning_rate": 9.045084971874738e-05, "loss": 0.9493, "step": 1406 }, { "epoch": 0.21221719457013574, "grad_norm": 2.4948477745056152, "learning_rate": 9.043670582025238e-05, "loss": 1.783, "step": 1407 }, { "epoch": 0.21236802413273, "grad_norm": 2.0068395137786865, "learning_rate": 9.042255256233292e-05, "loss": 1.3387, "step": 1408 }, { "epoch": 0.21251885369532428, "grad_norm": 1.9806522130966187, "learning_rate": 9.040838994826493e-05, "loss": 1.3463, "step": 1409 }, { "epoch": 0.21266968325791855, "grad_norm": 2.2105000019073486, "learning_rate": 9.039421798132642e-05, "loss": 1.3089, "step": 1410 }, { "epoch": 0.2128205128205128, "grad_norm": 2.190873146057129, "learning_rate": 9.038003666479763e-05, "loss": 1.5049, "step": 1411 }, { "epoch": 0.21297134238310708, "grad_norm": 2.322499990463257, "learning_rate": 9.036584600196098e-05, "loss": 1.5819, "step": 1412 }, { "epoch": 0.21312217194570135, "grad_norm": 1.6029049158096313, "learning_rate": 9.035164599610099e-05, "loss": 0.8715, "step": 1413 }, { "epoch": 0.21327300150829562, "grad_norm": 2.055644989013672, "learning_rate": 9.033743665050438e-05, "loss": 1.6423, "step": 1414 }, { "epoch": 0.21342383107088989, "grad_norm": 1.98372483253479, "learning_rate": 9.032321796846002e-05, "loss": 1.4236, "step": 1415 }, { "epoch": 0.21357466063348415, "grad_norm": 1.8755744695663452, "learning_rate": 9.030898995325895e-05, "loss": 1.2815, "step": 1416 }, { "epoch": 0.21372549019607842, "grad_norm": 2.079458475112915, "learning_rate": 9.029475260819438e-05, "loss": 1.7743, "step": 1417 }, { "epoch": 0.2138763197586727, "grad_norm": 1.761240839958191, "learning_rate": 9.028050593656164e-05, "loss": 1.1149, "step": 1418 }, { "epoch": 0.21402714932126696, "grad_norm": 1.8196676969528198, "learning_rate": 9.026624994165826e-05, "loss": 1.2211, "step": 1419 }, { "epoch": 0.21417797888386123, "grad_norm": 1.8897814750671387, "learning_rate": 9.025198462678393e-05, "loss": 1.2356, "step": 1420 }, { "epoch": 0.2143288084464555, "grad_norm": 1.5664094686508179, "learning_rate": 9.023770999524047e-05, "loss": 0.9341, "step": 1421 }, { "epoch": 0.21447963800904976, "grad_norm": 1.8813600540161133, "learning_rate": 9.022342605033183e-05, "loss": 1.118, "step": 1422 }, { "epoch": 0.21463046757164403, "grad_norm": 2.1076366901397705, "learning_rate": 9.02091327953642e-05, "loss": 1.5728, "step": 1423 }, { "epoch": 0.2147812971342383, "grad_norm": 2.081864356994629, "learning_rate": 9.019483023364588e-05, "loss": 1.4663, "step": 1424 }, { "epoch": 0.2149321266968326, "grad_norm": 1.9934933185577393, "learning_rate": 9.018051836848727e-05, "loss": 1.2759, "step": 1425 }, { "epoch": 0.21508295625942686, "grad_norm": 2.0541794300079346, "learning_rate": 9.016619720320102e-05, "loss": 1.5334, "step": 1426 }, { "epoch": 0.21523378582202113, "grad_norm": 1.784936785697937, "learning_rate": 9.015186674110188e-05, "loss": 1.1767, "step": 1427 }, { "epoch": 0.2153846153846154, "grad_norm": 1.936484932899475, "learning_rate": 9.013752698550674e-05, "loss": 1.0501, "step": 1428 }, { "epoch": 0.21553544494720966, "grad_norm": 2.298659563064575, "learning_rate": 9.012317793973469e-05, "loss": 1.329, "step": 1429 }, { "epoch": 0.21568627450980393, "grad_norm": 1.8768755197525024, "learning_rate": 9.010881960710689e-05, "loss": 1.1491, "step": 1430 }, { "epoch": 0.2158371040723982, "grad_norm": 2.234511137008667, "learning_rate": 9.009445199094675e-05, "loss": 1.4732, "step": 1431 }, { "epoch": 0.21598793363499247, "grad_norm": 1.9655760526657104, "learning_rate": 9.008007509457977e-05, "loss": 1.2231, "step": 1432 }, { "epoch": 0.21613876319758674, "grad_norm": 2.18068790435791, "learning_rate": 9.00656889213336e-05, "loss": 1.5013, "step": 1433 }, { "epoch": 0.216289592760181, "grad_norm": 2.14027738571167, "learning_rate": 9.005129347453803e-05, "loss": 1.6304, "step": 1434 }, { "epoch": 0.21644042232277527, "grad_norm": 2.1916842460632324, "learning_rate": 9.003688875752503e-05, "loss": 1.3969, "step": 1435 }, { "epoch": 0.21659125188536954, "grad_norm": 2.0524742603302, "learning_rate": 9.002247477362868e-05, "loss": 1.0878, "step": 1436 }, { "epoch": 0.2167420814479638, "grad_norm": 2.191272497177124, "learning_rate": 9.000805152618524e-05, "loss": 1.4454, "step": 1437 }, { "epoch": 0.21689291101055808, "grad_norm": 2.046478033065796, "learning_rate": 8.999361901853308e-05, "loss": 1.1677, "step": 1438 }, { "epoch": 0.21704374057315234, "grad_norm": 2.056230306625366, "learning_rate": 8.997917725401274e-05, "loss": 1.3375, "step": 1439 }, { "epoch": 0.2171945701357466, "grad_norm": 2.4020018577575684, "learning_rate": 8.996472623596688e-05, "loss": 1.3857, "step": 1440 }, { "epoch": 0.21734539969834088, "grad_norm": 2.19354510307312, "learning_rate": 8.995026596774031e-05, "loss": 1.2401, "step": 1441 }, { "epoch": 0.21749622926093515, "grad_norm": 2.1322340965270996, "learning_rate": 8.993579645268e-05, "loss": 1.3783, "step": 1442 }, { "epoch": 0.21764705882352942, "grad_norm": 2.248856782913208, "learning_rate": 8.992131769413503e-05, "loss": 1.2831, "step": 1443 }, { "epoch": 0.21779788838612368, "grad_norm": 1.9719327688217163, "learning_rate": 8.990682969545663e-05, "loss": 1.1371, "step": 1444 }, { "epoch": 0.21794871794871795, "grad_norm": 1.9230453968048096, "learning_rate": 8.98923324599982e-05, "loss": 1.012, "step": 1445 }, { "epoch": 0.21809954751131222, "grad_norm": 2.200929880142212, "learning_rate": 8.987782599111523e-05, "loss": 1.2279, "step": 1446 }, { "epoch": 0.2182503770739065, "grad_norm": 2.12271785736084, "learning_rate": 8.986331029216535e-05, "loss": 1.0994, "step": 1447 }, { "epoch": 0.21840120663650076, "grad_norm": 1.863816261291504, "learning_rate": 8.984878536650837e-05, "loss": 0.8855, "step": 1448 }, { "epoch": 0.21855203619909502, "grad_norm": 1.9469228982925415, "learning_rate": 8.983425121750621e-05, "loss": 1.2154, "step": 1449 }, { "epoch": 0.2187028657616893, "grad_norm": 1.9962732791900635, "learning_rate": 8.981970784852291e-05, "loss": 1.088, "step": 1450 }, { "epoch": 0.21885369532428356, "grad_norm": 2.5722055435180664, "learning_rate": 8.980515526292465e-05, "loss": 1.5978, "step": 1451 }, { "epoch": 0.21900452488687783, "grad_norm": 2.482862949371338, "learning_rate": 8.979059346407979e-05, "loss": 1.5566, "step": 1452 }, { "epoch": 0.2191553544494721, "grad_norm": 2.1840219497680664, "learning_rate": 8.977602245535874e-05, "loss": 1.5059, "step": 1453 }, { "epoch": 0.21930618401206636, "grad_norm": 1.9417396783828735, "learning_rate": 8.976144224013412e-05, "loss": 1.353, "step": 1454 }, { "epoch": 0.21945701357466063, "grad_norm": 2.0393378734588623, "learning_rate": 8.974685282178062e-05, "loss": 1.6259, "step": 1455 }, { "epoch": 0.2196078431372549, "grad_norm": 1.9727704524993896, "learning_rate": 8.97322542036751e-05, "loss": 1.3545, "step": 1456 }, { "epoch": 0.21975867269984917, "grad_norm": 1.8196444511413574, "learning_rate": 8.971764638919656e-05, "loss": 1.2803, "step": 1457 }, { "epoch": 0.21990950226244343, "grad_norm": 2.066009759902954, "learning_rate": 8.970302938172606e-05, "loss": 1.4453, "step": 1458 }, { "epoch": 0.2200603318250377, "grad_norm": 1.9280444383621216, "learning_rate": 8.968840318464684e-05, "loss": 1.3098, "step": 1459 }, { "epoch": 0.22021116138763197, "grad_norm": 2.0023579597473145, "learning_rate": 8.967376780134428e-05, "loss": 1.6374, "step": 1460 }, { "epoch": 0.22036199095022624, "grad_norm": 1.814986228942871, "learning_rate": 8.965912323520586e-05, "loss": 1.2152, "step": 1461 }, { "epoch": 0.2205128205128205, "grad_norm": 1.956112027168274, "learning_rate": 8.964446948962117e-05, "loss": 1.3685, "step": 1462 }, { "epoch": 0.22066365007541477, "grad_norm": 1.904112696647644, "learning_rate": 8.962980656798199e-05, "loss": 1.3635, "step": 1463 }, { "epoch": 0.22081447963800904, "grad_norm": 1.7448221445083618, "learning_rate": 8.961513447368215e-05, "loss": 1.2607, "step": 1464 }, { "epoch": 0.2209653092006033, "grad_norm": 1.833314061164856, "learning_rate": 8.960045321011762e-05, "loss": 1.6061, "step": 1465 }, { "epoch": 0.22111613876319758, "grad_norm": 1.924702763557434, "learning_rate": 8.958576278068655e-05, "loss": 1.6239, "step": 1466 }, { "epoch": 0.22126696832579185, "grad_norm": 2.1179938316345215, "learning_rate": 8.957106318878912e-05, "loss": 1.4806, "step": 1467 }, { "epoch": 0.22141779788838611, "grad_norm": 1.7633947134017944, "learning_rate": 8.955635443782769e-05, "loss": 1.166, "step": 1468 }, { "epoch": 0.22156862745098038, "grad_norm": 1.7920347452163696, "learning_rate": 8.954163653120675e-05, "loss": 1.3628, "step": 1469 }, { "epoch": 0.22171945701357465, "grad_norm": 1.9144536256790161, "learning_rate": 8.952690947233285e-05, "loss": 1.3632, "step": 1470 }, { "epoch": 0.22187028657616892, "grad_norm": 1.8046661615371704, "learning_rate": 8.951217326461471e-05, "loss": 1.2768, "step": 1471 }, { "epoch": 0.22202111613876319, "grad_norm": 1.9228757619857788, "learning_rate": 8.949742791146318e-05, "loss": 1.3429, "step": 1472 }, { "epoch": 0.22217194570135745, "grad_norm": 1.82856023311615, "learning_rate": 8.948267341629118e-05, "loss": 1.1848, "step": 1473 }, { "epoch": 0.22232277526395172, "grad_norm": 2.06038498878479, "learning_rate": 8.946790978251373e-05, "loss": 1.2458, "step": 1474 }, { "epoch": 0.222473604826546, "grad_norm": 1.9259973764419556, "learning_rate": 8.945313701354806e-05, "loss": 1.1874, "step": 1475 }, { "epoch": 0.22262443438914026, "grad_norm": 2.013064384460449, "learning_rate": 8.943835511281342e-05, "loss": 1.4764, "step": 1476 }, { "epoch": 0.22277526395173455, "grad_norm": 1.6677402257919312, "learning_rate": 8.942356408373121e-05, "loss": 1.0536, "step": 1477 }, { "epoch": 0.22292609351432882, "grad_norm": 2.0562076568603516, "learning_rate": 8.940876392972495e-05, "loss": 1.3773, "step": 1478 }, { "epoch": 0.2230769230769231, "grad_norm": 2.1425089836120605, "learning_rate": 8.939395465422025e-05, "loss": 1.3682, "step": 1479 }, { "epoch": 0.22322775263951736, "grad_norm": 1.9653915166854858, "learning_rate": 8.937913626064487e-05, "loss": 1.2015, "step": 1480 }, { "epoch": 0.22337858220211163, "grad_norm": 2.5217978954315186, "learning_rate": 8.936430875242863e-05, "loss": 1.3908, "step": 1481 }, { "epoch": 0.2235294117647059, "grad_norm": 2.246840000152588, "learning_rate": 8.934947213300348e-05, "loss": 1.2836, "step": 1482 }, { "epoch": 0.22368024132730016, "grad_norm": 1.805379867553711, "learning_rate": 8.933462640580353e-05, "loss": 0.8514, "step": 1483 }, { "epoch": 0.22383107088989443, "grad_norm": 1.8961191177368164, "learning_rate": 8.93197715742649e-05, "loss": 1.0949, "step": 1484 }, { "epoch": 0.2239819004524887, "grad_norm": 2.207665205001831, "learning_rate": 8.930490764182589e-05, "loss": 1.1656, "step": 1485 }, { "epoch": 0.22413273001508296, "grad_norm": 1.893350601196289, "learning_rate": 8.929003461192688e-05, "loss": 1.0569, "step": 1486 }, { "epoch": 0.22428355957767723, "grad_norm": 2.3251140117645264, "learning_rate": 8.927515248801037e-05, "loss": 1.3191, "step": 1487 }, { "epoch": 0.2244343891402715, "grad_norm": 2.4959113597869873, "learning_rate": 8.926026127352093e-05, "loss": 1.4163, "step": 1488 }, { "epoch": 0.22458521870286577, "grad_norm": 2.218398332595825, "learning_rate": 8.924536097190532e-05, "loss": 1.3041, "step": 1489 }, { "epoch": 0.22473604826546004, "grad_norm": 2.368290901184082, "learning_rate": 8.923045158661227e-05, "loss": 1.1004, "step": 1490 }, { "epoch": 0.2248868778280543, "grad_norm": 2.283897638320923, "learning_rate": 8.921553312109274e-05, "loss": 1.2793, "step": 1491 }, { "epoch": 0.22503770739064857, "grad_norm": 2.421985387802124, "learning_rate": 8.92006055787997e-05, "loss": 1.1811, "step": 1492 }, { "epoch": 0.22518853695324284, "grad_norm": 2.3970065116882324, "learning_rate": 8.918566896318829e-05, "loss": 1.4665, "step": 1493 }, { "epoch": 0.2253393665158371, "grad_norm": 2.242093563079834, "learning_rate": 8.91707232777157e-05, "loss": 1.1127, "step": 1494 }, { "epoch": 0.22549019607843138, "grad_norm": 2.1407928466796875, "learning_rate": 8.915576852584124e-05, "loss": 1.3983, "step": 1495 }, { "epoch": 0.22564102564102564, "grad_norm": 2.300218343734741, "learning_rate": 8.914080471102632e-05, "loss": 1.3236, "step": 1496 }, { "epoch": 0.2257918552036199, "grad_norm": 2.2749948501586914, "learning_rate": 8.912583183673446e-05, "loss": 1.066, "step": 1497 }, { "epoch": 0.22594268476621418, "grad_norm": 2.343904495239258, "learning_rate": 8.911084990643122e-05, "loss": 1.1579, "step": 1498 }, { "epoch": 0.22609351432880845, "grad_norm": 2.1201090812683105, "learning_rate": 8.909585892358434e-05, "loss": 1.2105, "step": 1499 }, { "epoch": 0.22624434389140272, "grad_norm": 1.9724844694137573, "learning_rate": 8.908085889166358e-05, "loss": 1.1398, "step": 1500 }, { "epoch": 0.22639517345399698, "grad_norm": 2.5250325202941895, "learning_rate": 8.906584981414084e-05, "loss": 1.0947, "step": 1501 }, { "epoch": 0.22654600301659125, "grad_norm": 2.251833915710449, "learning_rate": 8.90508316944901e-05, "loss": 1.2316, "step": 1502 }, { "epoch": 0.22669683257918552, "grad_norm": 2.371737241744995, "learning_rate": 8.903580453618743e-05, "loss": 1.6331, "step": 1503 }, { "epoch": 0.2268476621417798, "grad_norm": 1.8648347854614258, "learning_rate": 8.9020768342711e-05, "loss": 1.1568, "step": 1504 }, { "epoch": 0.22699849170437406, "grad_norm": 2.0729479789733887, "learning_rate": 8.900572311754109e-05, "loss": 1.3934, "step": 1505 }, { "epoch": 0.22714932126696832, "grad_norm": 2.197603940963745, "learning_rate": 8.899066886416e-05, "loss": 1.7968, "step": 1506 }, { "epoch": 0.2273001508295626, "grad_norm": 1.7364863157272339, "learning_rate": 8.897560558605219e-05, "loss": 0.923, "step": 1507 }, { "epoch": 0.22745098039215686, "grad_norm": 2.0912363529205322, "learning_rate": 8.89605332867042e-05, "loss": 1.2373, "step": 1508 }, { "epoch": 0.22760180995475113, "grad_norm": 2.057990550994873, "learning_rate": 8.894545196960462e-05, "loss": 1.4536, "step": 1509 }, { "epoch": 0.2277526395173454, "grad_norm": 2.1483116149902344, "learning_rate": 8.893036163824415e-05, "loss": 1.6313, "step": 1510 }, { "epoch": 0.22790346907993966, "grad_norm": 2.0251407623291016, "learning_rate": 8.89152622961156e-05, "loss": 1.2522, "step": 1511 }, { "epoch": 0.22805429864253393, "grad_norm": 1.9454152584075928, "learning_rate": 8.890015394671382e-05, "loss": 1.2275, "step": 1512 }, { "epoch": 0.2282051282051282, "grad_norm": 1.921204686164856, "learning_rate": 8.888503659353579e-05, "loss": 1.1972, "step": 1513 }, { "epoch": 0.22835595776772247, "grad_norm": 1.729824185371399, "learning_rate": 8.886991024008054e-05, "loss": 1.1716, "step": 1514 }, { "epoch": 0.22850678733031674, "grad_norm": 1.8938820362091064, "learning_rate": 8.885477488984919e-05, "loss": 1.5513, "step": 1515 }, { "epoch": 0.228657616892911, "grad_norm": 1.8480533361434937, "learning_rate": 8.883963054634495e-05, "loss": 1.0794, "step": 1516 }, { "epoch": 0.22880844645550527, "grad_norm": 1.559557318687439, "learning_rate": 8.882447721307312e-05, "loss": 0.9959, "step": 1517 }, { "epoch": 0.22895927601809954, "grad_norm": 1.8606772422790527, "learning_rate": 8.880931489354105e-05, "loss": 1.1896, "step": 1518 }, { "epoch": 0.2291101055806938, "grad_norm": 1.767792820930481, "learning_rate": 8.879414359125822e-05, "loss": 1.2198, "step": 1519 }, { "epoch": 0.22926093514328807, "grad_norm": 1.7562978267669678, "learning_rate": 8.877896330973612e-05, "loss": 1.1474, "step": 1520 }, { "epoch": 0.22941176470588234, "grad_norm": 2.1839122772216797, "learning_rate": 8.87637740524884e-05, "loss": 1.5856, "step": 1521 }, { "epoch": 0.2295625942684766, "grad_norm": 1.9293506145477295, "learning_rate": 8.87485758230307e-05, "loss": 1.3077, "step": 1522 }, { "epoch": 0.22971342383107088, "grad_norm": 2.036752700805664, "learning_rate": 8.87333686248808e-05, "loss": 1.0183, "step": 1523 }, { "epoch": 0.22986425339366515, "grad_norm": 2.1952016353607178, "learning_rate": 8.871815246155854e-05, "loss": 1.3183, "step": 1524 }, { "epoch": 0.23001508295625941, "grad_norm": 1.9662429094314575, "learning_rate": 8.870292733658585e-05, "loss": 1.2869, "step": 1525 }, { "epoch": 0.23016591251885368, "grad_norm": 2.067446708679199, "learning_rate": 8.86876932534867e-05, "loss": 1.2027, "step": 1526 }, { "epoch": 0.23031674208144795, "grad_norm": 1.9492758512496948, "learning_rate": 8.867245021578714e-05, "loss": 1.3416, "step": 1527 }, { "epoch": 0.23046757164404222, "grad_norm": 1.8131574392318726, "learning_rate": 8.86571982270153e-05, "loss": 1.0827, "step": 1528 }, { "epoch": 0.23061840120663651, "grad_norm": 2.3384923934936523, "learning_rate": 8.864193729070141e-05, "loss": 1.4772, "step": 1529 }, { "epoch": 0.23076923076923078, "grad_norm": 1.8453158140182495, "learning_rate": 8.862666741037772e-05, "loss": 0.9903, "step": 1530 }, { "epoch": 0.23092006033182505, "grad_norm": 2.236769914627075, "learning_rate": 8.86113885895786e-05, "loss": 1.5606, "step": 1531 }, { "epoch": 0.23107088989441932, "grad_norm": 2.1324853897094727, "learning_rate": 8.859610083184045e-05, "loss": 1.3939, "step": 1532 }, { "epoch": 0.23122171945701359, "grad_norm": 1.9575307369232178, "learning_rate": 8.858080414070176e-05, "loss": 1.0659, "step": 1533 }, { "epoch": 0.23137254901960785, "grad_norm": 2.153789758682251, "learning_rate": 8.856549851970307e-05, "loss": 1.5236, "step": 1534 }, { "epoch": 0.23152337858220212, "grad_norm": 2.7134368419647217, "learning_rate": 8.855018397238702e-05, "loss": 1.5808, "step": 1535 }, { "epoch": 0.2316742081447964, "grad_norm": 2.1839146614074707, "learning_rate": 8.853486050229824e-05, "loss": 1.3748, "step": 1536 }, { "epoch": 0.23182503770739066, "grad_norm": 2.152637004852295, "learning_rate": 8.851952811298356e-05, "loss": 1.28, "step": 1537 }, { "epoch": 0.23197586726998493, "grad_norm": 2.061213493347168, "learning_rate": 8.850418680799173e-05, "loss": 1.1557, "step": 1538 }, { "epoch": 0.2321266968325792, "grad_norm": 2.5356838703155518, "learning_rate": 8.848883659087364e-05, "loss": 1.5293, "step": 1539 }, { "epoch": 0.23227752639517346, "grad_norm": 2.469954252243042, "learning_rate": 8.847347746518226e-05, "loss": 1.4567, "step": 1540 }, { "epoch": 0.23242835595776773, "grad_norm": 2.2634201049804688, "learning_rate": 8.845810943447256e-05, "loss": 1.4408, "step": 1541 }, { "epoch": 0.232579185520362, "grad_norm": 2.5795977115631104, "learning_rate": 8.844273250230162e-05, "loss": 1.8345, "step": 1542 }, { "epoch": 0.23273001508295627, "grad_norm": 2.3470804691314697, "learning_rate": 8.842734667222856e-05, "loss": 1.3469, "step": 1543 }, { "epoch": 0.23288084464555053, "grad_norm": 2.0746166706085205, "learning_rate": 8.841195194781455e-05, "loss": 1.4511, "step": 1544 }, { "epoch": 0.2330316742081448, "grad_norm": 2.435683488845825, "learning_rate": 8.839654833262285e-05, "loss": 1.4569, "step": 1545 }, { "epoch": 0.23318250377073907, "grad_norm": 2.4197633266448975, "learning_rate": 8.838113583021877e-05, "loss": 1.3094, "step": 1546 }, { "epoch": 0.23333333333333334, "grad_norm": 1.9691776037216187, "learning_rate": 8.836571444416962e-05, "loss": 1.01, "step": 1547 }, { "epoch": 0.2334841628959276, "grad_norm": 2.14076566696167, "learning_rate": 8.835028417804484e-05, "loss": 1.1138, "step": 1548 }, { "epoch": 0.23363499245852187, "grad_norm": 2.3846726417541504, "learning_rate": 8.833484503541593e-05, "loss": 1.248, "step": 1549 }, { "epoch": 0.23378582202111614, "grad_norm": 1.7068798542022705, "learning_rate": 8.831939701985637e-05, "loss": 0.7525, "step": 1550 }, { "epoch": 0.2339366515837104, "grad_norm": 2.5826425552368164, "learning_rate": 8.830394013494174e-05, "loss": 1.5785, "step": 1551 }, { "epoch": 0.23408748114630468, "grad_norm": 2.3295257091522217, "learning_rate": 8.82884743842497e-05, "loss": 1.6219, "step": 1552 }, { "epoch": 0.23423831070889894, "grad_norm": 2.0497474670410156, "learning_rate": 8.82729997713599e-05, "loss": 1.2191, "step": 1553 }, { "epoch": 0.2343891402714932, "grad_norm": 2.1023871898651123, "learning_rate": 8.825751629985409e-05, "loss": 1.4049, "step": 1554 }, { "epoch": 0.23453996983408748, "grad_norm": 2.0562241077423096, "learning_rate": 8.824202397331604e-05, "loss": 1.272, "step": 1555 }, { "epoch": 0.23469079939668175, "grad_norm": 1.9464681148529053, "learning_rate": 8.82265227953316e-05, "loss": 1.2622, "step": 1556 }, { "epoch": 0.23484162895927602, "grad_norm": 2.027942657470703, "learning_rate": 8.821101276948862e-05, "loss": 1.3877, "step": 1557 }, { "epoch": 0.23499245852187028, "grad_norm": 1.8898385763168335, "learning_rate": 8.819549389937706e-05, "loss": 1.012, "step": 1558 }, { "epoch": 0.23514328808446455, "grad_norm": 1.8579227924346924, "learning_rate": 8.817996618858888e-05, "loss": 1.1524, "step": 1559 }, { "epoch": 0.23529411764705882, "grad_norm": 1.8580265045166016, "learning_rate": 8.816442964071812e-05, "loss": 1.0953, "step": 1560 }, { "epoch": 0.2354449472096531, "grad_norm": 1.8299130201339722, "learning_rate": 8.814888425936083e-05, "loss": 1.07, "step": 1561 }, { "epoch": 0.23559577677224736, "grad_norm": 1.8751325607299805, "learning_rate": 8.813333004811513e-05, "loss": 1.2646, "step": 1562 }, { "epoch": 0.23574660633484162, "grad_norm": 1.948305606842041, "learning_rate": 8.811776701058117e-05, "loss": 1.2865, "step": 1563 }, { "epoch": 0.2358974358974359, "grad_norm": 2.0197417736053467, "learning_rate": 8.810219515036117e-05, "loss": 1.3217, "step": 1564 }, { "epoch": 0.23604826546003016, "grad_norm": 2.1098923683166504, "learning_rate": 8.808661447105934e-05, "loss": 1.3426, "step": 1565 }, { "epoch": 0.23619909502262443, "grad_norm": 1.8118036985397339, "learning_rate": 8.807102497628199e-05, "loss": 0.9946, "step": 1566 }, { "epoch": 0.2363499245852187, "grad_norm": 1.8626090288162231, "learning_rate": 8.805542666963744e-05, "loss": 1.3441, "step": 1567 }, { "epoch": 0.23650075414781296, "grad_norm": 1.639060139656067, "learning_rate": 8.803981955473604e-05, "loss": 1.1714, "step": 1568 }, { "epoch": 0.23665158371040723, "grad_norm": 1.8230570554733276, "learning_rate": 8.80242036351902e-05, "loss": 1.2838, "step": 1569 }, { "epoch": 0.2368024132730015, "grad_norm": 1.8147037029266357, "learning_rate": 8.800857891461434e-05, "loss": 1.2769, "step": 1570 }, { "epoch": 0.23695324283559577, "grad_norm": 1.865813136100769, "learning_rate": 8.799294539662495e-05, "loss": 1.33, "step": 1571 }, { "epoch": 0.23710407239819004, "grad_norm": 1.9967467784881592, "learning_rate": 8.797730308484055e-05, "loss": 1.6348, "step": 1572 }, { "epoch": 0.2372549019607843, "grad_norm": 2.0340375900268555, "learning_rate": 8.79616519828817e-05, "loss": 1.447, "step": 1573 }, { "epoch": 0.23740573152337857, "grad_norm": 1.9811543226242065, "learning_rate": 8.794599209437096e-05, "loss": 1.3687, "step": 1574 }, { "epoch": 0.23755656108597284, "grad_norm": 2.0284652709960938, "learning_rate": 8.793032342293293e-05, "loss": 1.2159, "step": 1575 }, { "epoch": 0.2377073906485671, "grad_norm": 2.0309066772460938, "learning_rate": 8.79146459721943e-05, "loss": 1.4363, "step": 1576 }, { "epoch": 0.23785822021116138, "grad_norm": 1.7274417877197266, "learning_rate": 8.789895974578373e-05, "loss": 1.0676, "step": 1577 }, { "epoch": 0.23800904977375564, "grad_norm": 1.9293369054794312, "learning_rate": 8.788326474733193e-05, "loss": 1.2195, "step": 1578 }, { "epoch": 0.2381598793363499, "grad_norm": 2.0224716663360596, "learning_rate": 8.786756098047165e-05, "loss": 1.4437, "step": 1579 }, { "epoch": 0.2383107088989442, "grad_norm": 1.7573405504226685, "learning_rate": 8.785184844883767e-05, "loss": 1.0708, "step": 1580 }, { "epoch": 0.23846153846153847, "grad_norm": 2.0014076232910156, "learning_rate": 8.783612715606677e-05, "loss": 1.3237, "step": 1581 }, { "epoch": 0.23861236802413274, "grad_norm": 1.9142881631851196, "learning_rate": 8.782039710579779e-05, "loss": 1.0566, "step": 1582 }, { "epoch": 0.238763197586727, "grad_norm": 2.053525447845459, "learning_rate": 8.780465830167159e-05, "loss": 1.2396, "step": 1583 }, { "epoch": 0.23891402714932128, "grad_norm": 2.108991861343384, "learning_rate": 8.778891074733103e-05, "loss": 1.3323, "step": 1584 }, { "epoch": 0.23906485671191555, "grad_norm": 2.0759384632110596, "learning_rate": 8.777315444642104e-05, "loss": 1.424, "step": 1585 }, { "epoch": 0.23921568627450981, "grad_norm": 2.2755789756774902, "learning_rate": 8.775738940258856e-05, "loss": 1.5641, "step": 1586 }, { "epoch": 0.23936651583710408, "grad_norm": 2.1879098415374756, "learning_rate": 8.774161561948253e-05, "loss": 1.3755, "step": 1587 }, { "epoch": 0.23951734539969835, "grad_norm": 2.08903169631958, "learning_rate": 8.772583310075393e-05, "loss": 1.2698, "step": 1588 }, { "epoch": 0.23966817496229262, "grad_norm": 1.903348684310913, "learning_rate": 8.771004185005575e-05, "loss": 1.2197, "step": 1589 }, { "epoch": 0.2398190045248869, "grad_norm": 2.0090150833129883, "learning_rate": 8.769424187104303e-05, "loss": 1.2774, "step": 1590 }, { "epoch": 0.23996983408748115, "grad_norm": 2.0252773761749268, "learning_rate": 8.76784331673728e-05, "loss": 1.2067, "step": 1591 }, { "epoch": 0.24012066365007542, "grad_norm": 2.356809377670288, "learning_rate": 8.766261574270414e-05, "loss": 1.6132, "step": 1592 }, { "epoch": 0.2402714932126697, "grad_norm": 2.618988513946533, "learning_rate": 8.764678960069809e-05, "loss": 1.6196, "step": 1593 }, { "epoch": 0.24042232277526396, "grad_norm": 2.4427056312561035, "learning_rate": 8.763095474501779e-05, "loss": 1.3369, "step": 1594 }, { "epoch": 0.24057315233785823, "grad_norm": 2.153461456298828, "learning_rate": 8.761511117932831e-05, "loss": 1.169, "step": 1595 }, { "epoch": 0.2407239819004525, "grad_norm": 2.1247127056121826, "learning_rate": 8.75992589072968e-05, "loss": 1.0688, "step": 1596 }, { "epoch": 0.24087481146304676, "grad_norm": 1.871667742729187, "learning_rate": 8.758339793259245e-05, "loss": 1.0011, "step": 1597 }, { "epoch": 0.24102564102564103, "grad_norm": 1.8301746845245361, "learning_rate": 8.756752825888634e-05, "loss": 1.0577, "step": 1598 }, { "epoch": 0.2411764705882353, "grad_norm": 2.1867599487304688, "learning_rate": 8.755164988985171e-05, "loss": 1.0639, "step": 1599 }, { "epoch": 0.24132730015082957, "grad_norm": 2.1920924186706543, "learning_rate": 8.753576282916371e-05, "loss": 0.9012, "step": 1600 }, { "epoch": 0.24147812971342383, "grad_norm": 3.5003163814544678, "learning_rate": 8.751986708049952e-05, "loss": 2.6309, "step": 1601 }, { "epoch": 0.2416289592760181, "grad_norm": 2.2395520210266113, "learning_rate": 8.75039626475384e-05, "loss": 1.0392, "step": 1602 }, { "epoch": 0.24177978883861237, "grad_norm": 1.9503103494644165, "learning_rate": 8.748804953396154e-05, "loss": 1.0682, "step": 1603 }, { "epoch": 0.24193061840120664, "grad_norm": 2.313530683517456, "learning_rate": 8.747212774345216e-05, "loss": 1.5709, "step": 1604 }, { "epoch": 0.2420814479638009, "grad_norm": 1.906840205192566, "learning_rate": 8.745619727969551e-05, "loss": 1.3473, "step": 1605 }, { "epoch": 0.24223227752639517, "grad_norm": 2.0075979232788086, "learning_rate": 8.744025814637882e-05, "loss": 1.4134, "step": 1606 }, { "epoch": 0.24238310708898944, "grad_norm": 2.09586501121521, "learning_rate": 8.742431034719135e-05, "loss": 1.356, "step": 1607 }, { "epoch": 0.2425339366515837, "grad_norm": 1.9307790994644165, "learning_rate": 8.740835388582436e-05, "loss": 1.2669, "step": 1608 }, { "epoch": 0.24268476621417798, "grad_norm": 2.342639923095703, "learning_rate": 8.73923887659711e-05, "loss": 1.3977, "step": 1609 }, { "epoch": 0.24283559577677225, "grad_norm": 1.8217182159423828, "learning_rate": 8.737641499132682e-05, "loss": 1.1644, "step": 1610 }, { "epoch": 0.2429864253393665, "grad_norm": 2.025130033493042, "learning_rate": 8.736043256558883e-05, "loss": 1.2568, "step": 1611 }, { "epoch": 0.24313725490196078, "grad_norm": 2.0927116870880127, "learning_rate": 8.734444149245637e-05, "loss": 1.2706, "step": 1612 }, { "epoch": 0.24328808446455505, "grad_norm": 1.9039268493652344, "learning_rate": 8.732844177563073e-05, "loss": 1.0722, "step": 1613 }, { "epoch": 0.24343891402714932, "grad_norm": 1.8429970741271973, "learning_rate": 8.731243341881514e-05, "loss": 1.3029, "step": 1614 }, { "epoch": 0.24358974358974358, "grad_norm": 2.128506660461426, "learning_rate": 8.729641642571493e-05, "loss": 1.5055, "step": 1615 }, { "epoch": 0.24374057315233785, "grad_norm": 2.0047271251678467, "learning_rate": 8.728039080003734e-05, "loss": 1.503, "step": 1616 }, { "epoch": 0.24389140271493212, "grad_norm": 2.103487014770508, "learning_rate": 8.726435654549165e-05, "loss": 1.2707, "step": 1617 }, { "epoch": 0.2440422322775264, "grad_norm": 1.7351815700531006, "learning_rate": 8.724831366578912e-05, "loss": 0.9177, "step": 1618 }, { "epoch": 0.24419306184012066, "grad_norm": 1.5638823509216309, "learning_rate": 8.7232262164643e-05, "loss": 0.8384, "step": 1619 }, { "epoch": 0.24434389140271492, "grad_norm": 1.9381847381591797, "learning_rate": 8.721620204576857e-05, "loss": 1.2465, "step": 1620 }, { "epoch": 0.2444947209653092, "grad_norm": 2.009727954864502, "learning_rate": 8.720013331288306e-05, "loss": 1.2813, "step": 1621 }, { "epoch": 0.24464555052790346, "grad_norm": 1.8187170028686523, "learning_rate": 8.718405596970576e-05, "loss": 1.1503, "step": 1622 }, { "epoch": 0.24479638009049773, "grad_norm": 1.937670350074768, "learning_rate": 8.716797001995786e-05, "loss": 0.9367, "step": 1623 }, { "epoch": 0.244947209653092, "grad_norm": 2.2900192737579346, "learning_rate": 8.715187546736262e-05, "loss": 1.5647, "step": 1624 }, { "epoch": 0.24509803921568626, "grad_norm": 1.792128562927246, "learning_rate": 8.713577231564526e-05, "loss": 1.0316, "step": 1625 }, { "epoch": 0.24524886877828053, "grad_norm": 2.327512741088867, "learning_rate": 8.711966056853298e-05, "loss": 1.4471, "step": 1626 }, { "epoch": 0.2453996983408748, "grad_norm": 2.1237335205078125, "learning_rate": 8.710354022975501e-05, "loss": 1.2796, "step": 1627 }, { "epoch": 0.24555052790346907, "grad_norm": 2.0049214363098145, "learning_rate": 8.708741130304252e-05, "loss": 1.2618, "step": 1628 }, { "epoch": 0.24570135746606334, "grad_norm": 2.0558953285217285, "learning_rate": 8.707127379212868e-05, "loss": 1.3498, "step": 1629 }, { "epoch": 0.2458521870286576, "grad_norm": 1.8655160665512085, "learning_rate": 8.705512770074868e-05, "loss": 1.1202, "step": 1630 }, { "epoch": 0.24600301659125187, "grad_norm": 2.00508713722229, "learning_rate": 8.703897303263965e-05, "loss": 1.1969, "step": 1631 }, { "epoch": 0.24615384615384617, "grad_norm": 2.238654375076294, "learning_rate": 8.702280979154076e-05, "loss": 1.4335, "step": 1632 }, { "epoch": 0.24630467571644044, "grad_norm": 1.878458857536316, "learning_rate": 8.70066379811931e-05, "loss": 1.1448, "step": 1633 }, { "epoch": 0.2464555052790347, "grad_norm": 1.6219549179077148, "learning_rate": 8.699045760533979e-05, "loss": 0.7762, "step": 1634 }, { "epoch": 0.24660633484162897, "grad_norm": 1.9893156290054321, "learning_rate": 8.697426866772591e-05, "loss": 1.3111, "step": 1635 }, { "epoch": 0.24675716440422324, "grad_norm": 2.0122084617614746, "learning_rate": 8.695807117209853e-05, "loss": 1.2381, "step": 1636 }, { "epoch": 0.2469079939668175, "grad_norm": 2.1094706058502197, "learning_rate": 8.69418651222067e-05, "loss": 1.2586, "step": 1637 }, { "epoch": 0.24705882352941178, "grad_norm": 1.9445891380310059, "learning_rate": 8.692565052180146e-05, "loss": 1.1162, "step": 1638 }, { "epoch": 0.24720965309200604, "grad_norm": 2.1635138988494873, "learning_rate": 8.690942737463581e-05, "loss": 1.1594, "step": 1639 }, { "epoch": 0.2473604826546003, "grad_norm": 2.190124034881592, "learning_rate": 8.689319568446475e-05, "loss": 1.5239, "step": 1640 }, { "epoch": 0.24751131221719458, "grad_norm": 2.232959270477295, "learning_rate": 8.687695545504521e-05, "loss": 1.2181, "step": 1641 }, { "epoch": 0.24766214177978885, "grad_norm": 2.1134564876556396, "learning_rate": 8.686070669013618e-05, "loss": 1.1851, "step": 1642 }, { "epoch": 0.24781297134238311, "grad_norm": 2.3410351276397705, "learning_rate": 8.684444939349854e-05, "loss": 1.5767, "step": 1643 }, { "epoch": 0.24796380090497738, "grad_norm": 1.9487030506134033, "learning_rate": 8.682818356889521e-05, "loss": 1.0484, "step": 1644 }, { "epoch": 0.24811463046757165, "grad_norm": 2.021014928817749, "learning_rate": 8.681190922009101e-05, "loss": 0.8725, "step": 1645 }, { "epoch": 0.24826546003016592, "grad_norm": 1.9957084655761719, "learning_rate": 8.679562635085283e-05, "loss": 1.0572, "step": 1646 }, { "epoch": 0.2484162895927602, "grad_norm": 1.9742887020111084, "learning_rate": 8.677933496494946e-05, "loss": 0.9607, "step": 1647 }, { "epoch": 0.24856711915535445, "grad_norm": 1.7382841110229492, "learning_rate": 8.676303506615167e-05, "loss": 0.776, "step": 1648 }, { "epoch": 0.24871794871794872, "grad_norm": 1.866708755493164, "learning_rate": 8.674672665823223e-05, "loss": 0.7761, "step": 1649 }, { "epoch": 0.248868778280543, "grad_norm": 1.9779770374298096, "learning_rate": 8.673040974496584e-05, "loss": 1.0255, "step": 1650 }, { "epoch": 0.24901960784313726, "grad_norm": 2.5890915393829346, "learning_rate": 8.671408433012922e-05, "loss": 1.6895, "step": 1651 }, { "epoch": 0.24917043740573153, "grad_norm": 2.1492977142333984, "learning_rate": 8.6697750417501e-05, "loss": 1.1241, "step": 1652 }, { "epoch": 0.2493212669683258, "grad_norm": 2.2635326385498047, "learning_rate": 8.668140801086182e-05, "loss": 1.569, "step": 1653 }, { "epoch": 0.24947209653092006, "grad_norm": 2.11495041847229, "learning_rate": 8.666505711399429e-05, "loss": 1.3615, "step": 1654 }, { "epoch": 0.24962292609351433, "grad_norm": 2.2493021488189697, "learning_rate": 8.664869773068291e-05, "loss": 1.7702, "step": 1655 }, { "epoch": 0.2497737556561086, "grad_norm": 2.3917930126190186, "learning_rate": 8.663232986471426e-05, "loss": 1.7913, "step": 1656 }, { "epoch": 0.24992458521870287, "grad_norm": 2.0683367252349854, "learning_rate": 8.66159535198768e-05, "loss": 1.3467, "step": 1657 }, { "epoch": 0.25007541478129713, "grad_norm": 1.692660927772522, "learning_rate": 8.659956869996096e-05, "loss": 1.0292, "step": 1658 }, { "epoch": 0.2502262443438914, "grad_norm": 2.1042561531066895, "learning_rate": 8.658317540875918e-05, "loss": 1.4564, "step": 1659 }, { "epoch": 0.25037707390648567, "grad_norm": 2.123304605484009, "learning_rate": 8.656677365006579e-05, "loss": 1.7128, "step": 1660 }, { "epoch": 0.25052790346907994, "grad_norm": 1.757774829864502, "learning_rate": 8.655036342767716e-05, "loss": 1.0944, "step": 1661 }, { "epoch": 0.2506787330316742, "grad_norm": 1.9367386102676392, "learning_rate": 8.653394474539154e-05, "loss": 1.3446, "step": 1662 }, { "epoch": 0.2508295625942685, "grad_norm": 2.0554773807525635, "learning_rate": 8.651751760700921e-05, "loss": 1.4363, "step": 1663 }, { "epoch": 0.25098039215686274, "grad_norm": 1.9044411182403564, "learning_rate": 8.650108201633236e-05, "loss": 1.2321, "step": 1664 }, { "epoch": 0.251131221719457, "grad_norm": 2.181232452392578, "learning_rate": 8.648463797716512e-05, "loss": 1.6281, "step": 1665 }, { "epoch": 0.2512820512820513, "grad_norm": 1.9495446681976318, "learning_rate": 8.646818549331366e-05, "loss": 1.5498, "step": 1666 }, { "epoch": 0.25143288084464555, "grad_norm": 2.0607523918151855, "learning_rate": 8.645172456858601e-05, "loss": 1.2386, "step": 1667 }, { "epoch": 0.2515837104072398, "grad_norm": 1.7833044528961182, "learning_rate": 8.64352552067922e-05, "loss": 1.1368, "step": 1668 }, { "epoch": 0.2517345399698341, "grad_norm": 1.9121464490890503, "learning_rate": 8.64187774117442e-05, "loss": 1.1607, "step": 1669 }, { "epoch": 0.25188536953242835, "grad_norm": 1.7391852140426636, "learning_rate": 8.640229118725595e-05, "loss": 1.1658, "step": 1670 }, { "epoch": 0.2520361990950226, "grad_norm": 1.8700181245803833, "learning_rate": 8.638579653714335e-05, "loss": 1.2557, "step": 1671 }, { "epoch": 0.2521870286576169, "grad_norm": 1.9725980758666992, "learning_rate": 8.636929346522418e-05, "loss": 1.425, "step": 1672 }, { "epoch": 0.25233785822021115, "grad_norm": 2.0844554901123047, "learning_rate": 8.635278197531827e-05, "loss": 1.6664, "step": 1673 }, { "epoch": 0.2524886877828054, "grad_norm": 1.8714632987976074, "learning_rate": 8.63362620712473e-05, "loss": 1.1537, "step": 1674 }, { "epoch": 0.2526395173453997, "grad_norm": 2.3312389850616455, "learning_rate": 8.631973375683495e-05, "loss": 1.9106, "step": 1675 }, { "epoch": 0.25279034690799396, "grad_norm": 2.077479124069214, "learning_rate": 8.630319703590688e-05, "loss": 1.2966, "step": 1676 }, { "epoch": 0.2529411764705882, "grad_norm": 1.8629156351089478, "learning_rate": 8.628665191229062e-05, "loss": 1.2511, "step": 1677 }, { "epoch": 0.2530920060331825, "grad_norm": 2.1629750728607178, "learning_rate": 8.62700983898157e-05, "loss": 1.6146, "step": 1678 }, { "epoch": 0.25324283559577676, "grad_norm": 2.092108964920044, "learning_rate": 8.625353647231356e-05, "loss": 1.2196, "step": 1679 }, { "epoch": 0.25339366515837103, "grad_norm": 1.8568159341812134, "learning_rate": 8.623696616361761e-05, "loss": 1.1954, "step": 1680 }, { "epoch": 0.2535444947209653, "grad_norm": 1.9321937561035156, "learning_rate": 8.622038746756319e-05, "loss": 1.1786, "step": 1681 }, { "epoch": 0.25369532428355956, "grad_norm": 1.9487347602844238, "learning_rate": 8.620380038798759e-05, "loss": 1.3938, "step": 1682 }, { "epoch": 0.25384615384615383, "grad_norm": 2.177527666091919, "learning_rate": 8.618720492873e-05, "loss": 1.1967, "step": 1683 }, { "epoch": 0.2539969834087481, "grad_norm": 1.7819695472717285, "learning_rate": 8.617060109363163e-05, "loss": 1.0692, "step": 1684 }, { "epoch": 0.25414781297134237, "grad_norm": 1.999214768409729, "learning_rate": 8.615398888653555e-05, "loss": 1.362, "step": 1685 }, { "epoch": 0.25429864253393664, "grad_norm": 1.887911081314087, "learning_rate": 8.61373683112868e-05, "loss": 1.0883, "step": 1686 }, { "epoch": 0.2544494720965309, "grad_norm": 1.9062210321426392, "learning_rate": 8.612073937173237e-05, "loss": 1.167, "step": 1687 }, { "epoch": 0.25460030165912517, "grad_norm": 2.307865858078003, "learning_rate": 8.610410207172117e-05, "loss": 1.4992, "step": 1688 }, { "epoch": 0.25475113122171944, "grad_norm": 1.8345000743865967, "learning_rate": 8.608745641510403e-05, "loss": 0.951, "step": 1689 }, { "epoch": 0.2549019607843137, "grad_norm": 2.1507961750030518, "learning_rate": 8.607080240573373e-05, "loss": 1.2504, "step": 1690 }, { "epoch": 0.255052790346908, "grad_norm": 2.006972074508667, "learning_rate": 8.6054140047465e-05, "loss": 1.2873, "step": 1691 }, { "epoch": 0.25520361990950224, "grad_norm": 2.1385738849639893, "learning_rate": 8.603746934415449e-05, "loss": 1.318, "step": 1692 }, { "epoch": 0.2553544494720965, "grad_norm": 2.2225115299224854, "learning_rate": 8.602079029966076e-05, "loss": 1.3764, "step": 1693 }, { "epoch": 0.2555052790346908, "grad_norm": 2.1547582149505615, "learning_rate": 8.600410291784433e-05, "loss": 1.2978, "step": 1694 }, { "epoch": 0.25565610859728505, "grad_norm": 2.296815872192383, "learning_rate": 8.598740720256763e-05, "loss": 1.3161, "step": 1695 }, { "epoch": 0.2558069381598793, "grad_norm": 2.261298418045044, "learning_rate": 8.597070315769504e-05, "loss": 1.3243, "step": 1696 }, { "epoch": 0.2559577677224736, "grad_norm": 2.2688779830932617, "learning_rate": 8.595399078709285e-05, "loss": 1.307, "step": 1697 }, { "epoch": 0.25610859728506785, "grad_norm": 1.654139757156372, "learning_rate": 8.593727009462928e-05, "loss": 0.7872, "step": 1698 }, { "epoch": 0.2562594268476621, "grad_norm": 1.706728219985962, "learning_rate": 8.592054108417449e-05, "loss": 0.6803, "step": 1699 }, { "epoch": 0.2564102564102564, "grad_norm": 1.688580870628357, "learning_rate": 8.590380375960054e-05, "loss": 0.9869, "step": 1700 }, { "epoch": 0.25656108597285066, "grad_norm": 2.40749454498291, "learning_rate": 8.588705812478142e-05, "loss": 1.6796, "step": 1701 }, { "epoch": 0.2567119155354449, "grad_norm": 2.359017848968506, "learning_rate": 8.587030418359307e-05, "loss": 1.6124, "step": 1702 }, { "epoch": 0.2568627450980392, "grad_norm": 2.228362798690796, "learning_rate": 8.585354193991333e-05, "loss": 1.3808, "step": 1703 }, { "epoch": 0.25701357466063346, "grad_norm": 1.9198143482208252, "learning_rate": 8.583677139762195e-05, "loss": 1.2234, "step": 1704 }, { "epoch": 0.2571644042232277, "grad_norm": 2.0590853691101074, "learning_rate": 8.581999256060064e-05, "loss": 1.5322, "step": 1705 }, { "epoch": 0.257315233785822, "grad_norm": 2.1521620750427246, "learning_rate": 8.5803205432733e-05, "loss": 1.3291, "step": 1706 }, { "epoch": 0.25746606334841626, "grad_norm": 1.9085427522659302, "learning_rate": 8.578641001790455e-05, "loss": 1.3797, "step": 1707 }, { "epoch": 0.25761689291101053, "grad_norm": 2.3034329414367676, "learning_rate": 8.576960632000273e-05, "loss": 1.4197, "step": 1708 }, { "epoch": 0.2577677224736048, "grad_norm": 1.8611953258514404, "learning_rate": 8.575279434291693e-05, "loss": 1.0734, "step": 1709 }, { "epoch": 0.2579185520361991, "grad_norm": 1.8516241312026978, "learning_rate": 8.573597409053838e-05, "loss": 1.0297, "step": 1710 }, { "epoch": 0.2580693815987934, "grad_norm": 1.9145904779434204, "learning_rate": 8.57191455667603e-05, "loss": 1.2722, "step": 1711 }, { "epoch": 0.25822021116138766, "grad_norm": 2.1757001876831055, "learning_rate": 8.57023087754778e-05, "loss": 1.4919, "step": 1712 }, { "epoch": 0.2583710407239819, "grad_norm": 1.7386876344680786, "learning_rate": 8.568546372058789e-05, "loss": 1.1745, "step": 1713 }, { "epoch": 0.2585218702865762, "grad_norm": 1.9458181858062744, "learning_rate": 8.566861040598951e-05, "loss": 1.335, "step": 1714 }, { "epoch": 0.25867269984917046, "grad_norm": 1.9183921813964844, "learning_rate": 8.565174883558352e-05, "loss": 1.346, "step": 1715 }, { "epoch": 0.25882352941176473, "grad_norm": 1.971706509590149, "learning_rate": 8.563487901327262e-05, "loss": 1.5172, "step": 1716 }, { "epoch": 0.258974358974359, "grad_norm": 1.7054967880249023, "learning_rate": 8.561800094296154e-05, "loss": 1.0898, "step": 1717 }, { "epoch": 0.25912518853695327, "grad_norm": 1.9653642177581787, "learning_rate": 8.560111462855682e-05, "loss": 1.3865, "step": 1718 }, { "epoch": 0.25927601809954753, "grad_norm": 1.8593882322311401, "learning_rate": 8.558422007396696e-05, "loss": 1.2542, "step": 1719 }, { "epoch": 0.2594268476621418, "grad_norm": 1.7923235893249512, "learning_rate": 8.556731728310234e-05, "loss": 1.1304, "step": 1720 }, { "epoch": 0.25957767722473607, "grad_norm": 2.026214122772217, "learning_rate": 8.555040625987527e-05, "loss": 1.47, "step": 1721 }, { "epoch": 0.25972850678733034, "grad_norm": 1.7049280405044556, "learning_rate": 8.553348700819994e-05, "loss": 1.1701, "step": 1722 }, { "epoch": 0.2598793363499246, "grad_norm": 1.9804025888442993, "learning_rate": 8.551655953199245e-05, "loss": 1.337, "step": 1723 }, { "epoch": 0.2600301659125189, "grad_norm": 1.9181625843048096, "learning_rate": 8.549962383517083e-05, "loss": 1.4181, "step": 1724 }, { "epoch": 0.26018099547511314, "grad_norm": 1.9684301614761353, "learning_rate": 8.548267992165497e-05, "loss": 1.5878, "step": 1725 }, { "epoch": 0.2603318250377074, "grad_norm": 1.8727178573608398, "learning_rate": 8.54657277953667e-05, "loss": 1.1864, "step": 1726 }, { "epoch": 0.2604826546003017, "grad_norm": 1.9245989322662354, "learning_rate": 8.544876746022974e-05, "loss": 1.2136, "step": 1727 }, { "epoch": 0.26063348416289595, "grad_norm": 2.0022096633911133, "learning_rate": 8.543179892016969e-05, "loss": 1.054, "step": 1728 }, { "epoch": 0.2607843137254902, "grad_norm": 1.6546602249145508, "learning_rate": 8.541482217911408e-05, "loss": 0.9192, "step": 1729 }, { "epoch": 0.2609351432880845, "grad_norm": 1.9985918998718262, "learning_rate": 8.539783724099231e-05, "loss": 1.1822, "step": 1730 }, { "epoch": 0.26108597285067875, "grad_norm": 2.2371222972869873, "learning_rate": 8.538084410973571e-05, "loss": 1.5245, "step": 1731 }, { "epoch": 0.261236802413273, "grad_norm": 2.111189126968384, "learning_rate": 8.536384278927747e-05, "loss": 1.1343, "step": 1732 }, { "epoch": 0.2613876319758673, "grad_norm": 1.9571455717086792, "learning_rate": 8.534683328355269e-05, "loss": 1.1297, "step": 1733 }, { "epoch": 0.26153846153846155, "grad_norm": 1.9911381006240845, "learning_rate": 8.532981559649839e-05, "loss": 1.2272, "step": 1734 }, { "epoch": 0.2616892911010558, "grad_norm": 2.275474786758423, "learning_rate": 8.531278973205343e-05, "loss": 1.4373, "step": 1735 }, { "epoch": 0.2618401206636501, "grad_norm": 2.20463228225708, "learning_rate": 8.529575569415862e-05, "loss": 1.2887, "step": 1736 }, { "epoch": 0.26199095022624436, "grad_norm": 2.003204107284546, "learning_rate": 8.527871348675661e-05, "loss": 1.1599, "step": 1737 }, { "epoch": 0.2621417797888386, "grad_norm": 2.7638907432556152, "learning_rate": 8.5261663113792e-05, "loss": 1.7713, "step": 1738 }, { "epoch": 0.2622926093514329, "grad_norm": 2.2376015186309814, "learning_rate": 8.52446045792112e-05, "loss": 1.4551, "step": 1739 }, { "epoch": 0.26244343891402716, "grad_norm": 2.116881847381592, "learning_rate": 8.522753788696258e-05, "loss": 1.1355, "step": 1740 }, { "epoch": 0.26259426847662143, "grad_norm": 2.2198848724365234, "learning_rate": 8.521046304099638e-05, "loss": 1.29, "step": 1741 }, { "epoch": 0.2627450980392157, "grad_norm": 2.4494173526763916, "learning_rate": 8.519338004526472e-05, "loss": 1.4939, "step": 1742 }, { "epoch": 0.26289592760180996, "grad_norm": 2.334120273590088, "learning_rate": 8.51762889037216e-05, "loss": 1.4015, "step": 1743 }, { "epoch": 0.26304675716440423, "grad_norm": 2.8597159385681152, "learning_rate": 8.51591896203229e-05, "loss": 1.4801, "step": 1744 }, { "epoch": 0.2631975867269985, "grad_norm": 2.1932003498077393, "learning_rate": 8.514208219902641e-05, "loss": 1.3021, "step": 1745 }, { "epoch": 0.26334841628959277, "grad_norm": 2.0252768993377686, "learning_rate": 8.512496664379182e-05, "loss": 1.0371, "step": 1746 }, { "epoch": 0.26349924585218704, "grad_norm": 1.7073770761489868, "learning_rate": 8.51078429585806e-05, "loss": 0.8198, "step": 1747 }, { "epoch": 0.2636500754147813, "grad_norm": 1.993157982826233, "learning_rate": 8.509071114735623e-05, "loss": 0.8919, "step": 1748 }, { "epoch": 0.26380090497737557, "grad_norm": 2.1012964248657227, "learning_rate": 8.507357121408401e-05, "loss": 1.127, "step": 1749 }, { "epoch": 0.26395173453996984, "grad_norm": 2.2206454277038574, "learning_rate": 8.505642316273113e-05, "loss": 1.2511, "step": 1750 }, { "epoch": 0.2641025641025641, "grad_norm": 2.536593437194824, "learning_rate": 8.50392669972666e-05, "loss": 1.645, "step": 1751 }, { "epoch": 0.2642533936651584, "grad_norm": 2.021211862564087, "learning_rate": 8.502210272166145e-05, "loss": 1.1436, "step": 1752 }, { "epoch": 0.26440422322775264, "grad_norm": 2.0649125576019287, "learning_rate": 8.500493033988844e-05, "loss": 1.3838, "step": 1753 }, { "epoch": 0.2645550527903469, "grad_norm": 2.0226950645446777, "learning_rate": 8.498774985592226e-05, "loss": 1.2671, "step": 1754 }, { "epoch": 0.2647058823529412, "grad_norm": 2.0565273761749268, "learning_rate": 8.497056127373953e-05, "loss": 1.5337, "step": 1755 }, { "epoch": 0.26485671191553545, "grad_norm": 1.8130093812942505, "learning_rate": 8.495336459731864e-05, "loss": 1.1038, "step": 1756 }, { "epoch": 0.2650075414781297, "grad_norm": 2.1372110843658447, "learning_rate": 8.493615983063996e-05, "loss": 1.5498, "step": 1757 }, { "epoch": 0.265158371040724, "grad_norm": 2.2411484718322754, "learning_rate": 8.491894697768564e-05, "loss": 1.7803, "step": 1758 }, { "epoch": 0.26530920060331825, "grad_norm": 2.16078782081604, "learning_rate": 8.490172604243977e-05, "loss": 1.5047, "step": 1759 }, { "epoch": 0.2654600301659125, "grad_norm": 1.9883795976638794, "learning_rate": 8.488449702888827e-05, "loss": 1.4161, "step": 1760 }, { "epoch": 0.2656108597285068, "grad_norm": 1.9035414457321167, "learning_rate": 8.486725994101896e-05, "loss": 1.3497, "step": 1761 }, { "epoch": 0.26576168929110106, "grad_norm": 1.7272480726242065, "learning_rate": 8.485001478282151e-05, "loss": 1.2197, "step": 1762 }, { "epoch": 0.2659125188536953, "grad_norm": 1.8524956703186035, "learning_rate": 8.483276155828744e-05, "loss": 1.2134, "step": 1763 }, { "epoch": 0.2660633484162896, "grad_norm": 2.382441759109497, "learning_rate": 8.481550027141016e-05, "loss": 1.6544, "step": 1764 }, { "epoch": 0.26621417797888386, "grad_norm": 2.1464788913726807, "learning_rate": 8.479823092618496e-05, "loss": 1.5823, "step": 1765 }, { "epoch": 0.2663650075414781, "grad_norm": 1.7801110744476318, "learning_rate": 8.478095352660897e-05, "loss": 1.3661, "step": 1766 }, { "epoch": 0.2665158371040724, "grad_norm": 1.9557703733444214, "learning_rate": 8.476366807668122e-05, "loss": 1.1612, "step": 1767 }, { "epoch": 0.26666666666666666, "grad_norm": 1.7250722646713257, "learning_rate": 8.474637458040253e-05, "loss": 1.3119, "step": 1768 }, { "epoch": 0.26681749622926093, "grad_norm": 1.8538395166397095, "learning_rate": 8.472907304177565e-05, "loss": 1.2401, "step": 1769 }, { "epoch": 0.2669683257918552, "grad_norm": 1.6720640659332275, "learning_rate": 8.471176346480518e-05, "loss": 1.1605, "step": 1770 }, { "epoch": 0.26711915535444947, "grad_norm": 2.033076286315918, "learning_rate": 8.469444585349757e-05, "loss": 1.4984, "step": 1771 }, { "epoch": 0.26726998491704373, "grad_norm": 2.017662286758423, "learning_rate": 8.467712021186111e-05, "loss": 1.7678, "step": 1772 }, { "epoch": 0.267420814479638, "grad_norm": 1.8662738800048828, "learning_rate": 8.465978654390598e-05, "loss": 1.0951, "step": 1773 }, { "epoch": 0.26757164404223227, "grad_norm": 1.8508961200714111, "learning_rate": 8.464244485364422e-05, "loss": 1.266, "step": 1774 }, { "epoch": 0.26772247360482654, "grad_norm": 1.7888880968093872, "learning_rate": 8.46250951450897e-05, "loss": 1.2651, "step": 1775 }, { "epoch": 0.2678733031674208, "grad_norm": 2.297257900238037, "learning_rate": 8.460773742225816e-05, "loss": 1.6538, "step": 1776 }, { "epoch": 0.2680241327300151, "grad_norm": 1.991653561592102, "learning_rate": 8.459037168916721e-05, "loss": 1.5407, "step": 1777 }, { "epoch": 0.26817496229260934, "grad_norm": 1.8159890174865723, "learning_rate": 8.457299794983628e-05, "loss": 1.2379, "step": 1778 }, { "epoch": 0.2683257918552036, "grad_norm": 2.2436141967773438, "learning_rate": 8.455561620828667e-05, "loss": 1.8238, "step": 1779 }, { "epoch": 0.2684766214177979, "grad_norm": 2.1632773876190186, "learning_rate": 8.453822646854155e-05, "loss": 1.3926, "step": 1780 }, { "epoch": 0.26862745098039215, "grad_norm": 1.9314738512039185, "learning_rate": 8.452082873462592e-05, "loss": 1.3799, "step": 1781 }, { "epoch": 0.2687782805429864, "grad_norm": 2.081183433532715, "learning_rate": 8.450342301056664e-05, "loss": 1.3956, "step": 1782 }, { "epoch": 0.2689291101055807, "grad_norm": 1.8697906732559204, "learning_rate": 8.44860093003924e-05, "loss": 1.2001, "step": 1783 }, { "epoch": 0.26907993966817495, "grad_norm": 1.8104020357131958, "learning_rate": 8.446858760813376e-05, "loss": 1.2725, "step": 1784 }, { "epoch": 0.2692307692307692, "grad_norm": 1.8735888004302979, "learning_rate": 8.445115793782312e-05, "loss": 1.0933, "step": 1785 }, { "epoch": 0.2693815987933635, "grad_norm": 1.9050918817520142, "learning_rate": 8.443372029349474e-05, "loss": 1.2145, "step": 1786 }, { "epoch": 0.26953242835595775, "grad_norm": 1.8834900856018066, "learning_rate": 8.44162746791847e-05, "loss": 1.1302, "step": 1787 }, { "epoch": 0.269683257918552, "grad_norm": 1.933315396308899, "learning_rate": 8.439882109893094e-05, "loss": 1.0384, "step": 1788 }, { "epoch": 0.2698340874811463, "grad_norm": 2.2339189052581787, "learning_rate": 8.438135955677325e-05, "loss": 1.5134, "step": 1789 }, { "epoch": 0.26998491704374056, "grad_norm": 2.1519527435302734, "learning_rate": 8.436389005675325e-05, "loss": 1.3229, "step": 1790 }, { "epoch": 0.2701357466063348, "grad_norm": 2.1145355701446533, "learning_rate": 8.434641260291441e-05, "loss": 1.2153, "step": 1791 }, { "epoch": 0.2702865761689291, "grad_norm": 2.2097833156585693, "learning_rate": 8.4328927199302e-05, "loss": 1.2387, "step": 1792 }, { "epoch": 0.27043740573152336, "grad_norm": 2.0797996520996094, "learning_rate": 8.431143384996322e-05, "loss": 1.3311, "step": 1793 }, { "epoch": 0.27058823529411763, "grad_norm": 2.558091402053833, "learning_rate": 8.429393255894704e-05, "loss": 1.5521, "step": 1794 }, { "epoch": 0.2707390648567119, "grad_norm": 2.2153680324554443, "learning_rate": 8.427642333030427e-05, "loss": 1.1057, "step": 1795 }, { "epoch": 0.27088989441930617, "grad_norm": 1.6978236436843872, "learning_rate": 8.425890616808761e-05, "loss": 0.7566, "step": 1796 }, { "epoch": 0.27104072398190043, "grad_norm": 1.4796879291534424, "learning_rate": 8.424138107635151e-05, "loss": 0.6584, "step": 1797 }, { "epoch": 0.2711915535444947, "grad_norm": 1.9742679595947266, "learning_rate": 8.422384805915232e-05, "loss": 0.9266, "step": 1798 }, { "epoch": 0.27134238310708897, "grad_norm": 1.7691909074783325, "learning_rate": 8.420630712054822e-05, "loss": 0.8636, "step": 1799 }, { "epoch": 0.27149321266968324, "grad_norm": 1.8918894529342651, "learning_rate": 8.41887582645992e-05, "loss": 1.0782, "step": 1800 }, { "epoch": 0.2716440422322775, "grad_norm": 3.047360420227051, "learning_rate": 8.417120149536712e-05, "loss": 1.97, "step": 1801 }, { "epoch": 0.2717948717948718, "grad_norm": 2.5801033973693848, "learning_rate": 8.41536368169156e-05, "loss": 1.5839, "step": 1802 }, { "epoch": 0.27194570135746604, "grad_norm": 2.2631614208221436, "learning_rate": 8.413606423331018e-05, "loss": 1.3656, "step": 1803 }, { "epoch": 0.2720965309200603, "grad_norm": 1.9312583208084106, "learning_rate": 8.411848374861815e-05, "loss": 1.1101, "step": 1804 }, { "epoch": 0.2722473604826546, "grad_norm": 2.072216033935547, "learning_rate": 8.410089536690868e-05, "loss": 1.4676, "step": 1805 }, { "epoch": 0.27239819004524884, "grad_norm": 1.936889410018921, "learning_rate": 8.408329909225278e-05, "loss": 1.4214, "step": 1806 }, { "epoch": 0.2725490196078431, "grad_norm": 1.9845346212387085, "learning_rate": 8.406569492872322e-05, "loss": 1.3905, "step": 1807 }, { "epoch": 0.2726998491704374, "grad_norm": 1.4838321208953857, "learning_rate": 8.404808288039465e-05, "loss": 0.9603, "step": 1808 }, { "epoch": 0.27285067873303165, "grad_norm": 2.535266399383545, "learning_rate": 8.403046295134354e-05, "loss": 1.8992, "step": 1809 }, { "epoch": 0.2730015082956259, "grad_norm": 2.08539080619812, "learning_rate": 8.401283514564815e-05, "loss": 1.3989, "step": 1810 }, { "epoch": 0.2731523378582202, "grad_norm": 1.7666457891464233, "learning_rate": 8.399519946738861e-05, "loss": 0.9054, "step": 1811 }, { "epoch": 0.27330316742081445, "grad_norm": 1.6327643394470215, "learning_rate": 8.397755592064684e-05, "loss": 0.9758, "step": 1812 }, { "epoch": 0.2734539969834088, "grad_norm": 1.893851637840271, "learning_rate": 8.395990450950659e-05, "loss": 1.3036, "step": 1813 }, { "epoch": 0.27360482654600304, "grad_norm": 1.8213824033737183, "learning_rate": 8.394224523805343e-05, "loss": 1.0344, "step": 1814 }, { "epoch": 0.2737556561085973, "grad_norm": 1.9491199254989624, "learning_rate": 8.392457811037478e-05, "loss": 1.2548, "step": 1815 }, { "epoch": 0.2739064856711916, "grad_norm": 2.1402478218078613, "learning_rate": 8.39069031305598e-05, "loss": 1.6338, "step": 1816 }, { "epoch": 0.27405731523378585, "grad_norm": 1.8611786365509033, "learning_rate": 8.388922030269955e-05, "loss": 1.2656, "step": 1817 }, { "epoch": 0.2742081447963801, "grad_norm": 1.982978343963623, "learning_rate": 8.387152963088687e-05, "loss": 1.4326, "step": 1818 }, { "epoch": 0.2743589743589744, "grad_norm": 1.9269381761550903, "learning_rate": 8.38538311192164e-05, "loss": 1.3509, "step": 1819 }, { "epoch": 0.27450980392156865, "grad_norm": 1.9634844064712524, "learning_rate": 8.383612477178463e-05, "loss": 1.3365, "step": 1820 }, { "epoch": 0.2746606334841629, "grad_norm": 1.8501307964324951, "learning_rate": 8.381841059268987e-05, "loss": 1.2513, "step": 1821 }, { "epoch": 0.2748114630467572, "grad_norm": 1.8846673965454102, "learning_rate": 8.380068858603216e-05, "loss": 1.131, "step": 1822 }, { "epoch": 0.27496229260935146, "grad_norm": 1.8553606271743774, "learning_rate": 8.378295875591347e-05, "loss": 1.237, "step": 1823 }, { "epoch": 0.2751131221719457, "grad_norm": 1.8840142488479614, "learning_rate": 8.376522110643749e-05, "loss": 1.214, "step": 1824 }, { "epoch": 0.27526395173454, "grad_norm": 2.016960382461548, "learning_rate": 8.374747564170974e-05, "loss": 1.2949, "step": 1825 }, { "epoch": 0.27541478129713426, "grad_norm": 1.7327892780303955, "learning_rate": 8.372972236583761e-05, "loss": 1.0036, "step": 1826 }, { "epoch": 0.2755656108597285, "grad_norm": 1.8761262893676758, "learning_rate": 8.371196128293018e-05, "loss": 1.4026, "step": 1827 }, { "epoch": 0.2757164404223228, "grad_norm": 1.9945921897888184, "learning_rate": 8.369419239709847e-05, "loss": 1.5756, "step": 1828 }, { "epoch": 0.27586726998491706, "grad_norm": 2.251904249191284, "learning_rate": 8.367641571245521e-05, "loss": 1.2687, "step": 1829 }, { "epoch": 0.27601809954751133, "grad_norm": 1.9971143007278442, "learning_rate": 8.365863123311498e-05, "loss": 1.1205, "step": 1830 }, { "epoch": 0.2761689291101056, "grad_norm": 2.296811580657959, "learning_rate": 8.364083896319413e-05, "loss": 1.5748, "step": 1831 }, { "epoch": 0.27631975867269987, "grad_norm": 2.165687322616577, "learning_rate": 8.362303890681087e-05, "loss": 1.3314, "step": 1832 }, { "epoch": 0.27647058823529413, "grad_norm": 1.8314911127090454, "learning_rate": 8.360523106808513e-05, "loss": 1.2421, "step": 1833 }, { "epoch": 0.2766214177978884, "grad_norm": 2.1203720569610596, "learning_rate": 8.358741545113873e-05, "loss": 1.3599, "step": 1834 }, { "epoch": 0.27677224736048267, "grad_norm": 2.327512741088867, "learning_rate": 8.356959206009523e-05, "loss": 1.4982, "step": 1835 }, { "epoch": 0.27692307692307694, "grad_norm": 1.8770159482955933, "learning_rate": 8.355176089908002e-05, "loss": 1.1194, "step": 1836 }, { "epoch": 0.2770739064856712, "grad_norm": 2.0236198902130127, "learning_rate": 8.353392197222024e-05, "loss": 1.1778, "step": 1837 }, { "epoch": 0.2772247360482655, "grad_norm": 1.9943221807479858, "learning_rate": 8.35160752836449e-05, "loss": 1.1153, "step": 1838 }, { "epoch": 0.27737556561085974, "grad_norm": 1.8838574886322021, "learning_rate": 8.349822083748475e-05, "loss": 1.1266, "step": 1839 }, { "epoch": 0.277526395173454, "grad_norm": 2.2954235076904297, "learning_rate": 8.348035863787238e-05, "loss": 1.4456, "step": 1840 }, { "epoch": 0.2776772247360483, "grad_norm": 2.043288230895996, "learning_rate": 8.346248868894213e-05, "loss": 1.2227, "step": 1841 }, { "epoch": 0.27782805429864255, "grad_norm": 2.1289315223693848, "learning_rate": 8.344461099483017e-05, "loss": 1.5634, "step": 1842 }, { "epoch": 0.2779788838612368, "grad_norm": 1.92012357711792, "learning_rate": 8.342672555967442e-05, "loss": 1.2807, "step": 1843 }, { "epoch": 0.2781297134238311, "grad_norm": 2.238909959793091, "learning_rate": 8.340883238761463e-05, "loss": 1.1618, "step": 1844 }, { "epoch": 0.27828054298642535, "grad_norm": 2.253582000732422, "learning_rate": 8.339093148279233e-05, "loss": 1.2655, "step": 1845 }, { "epoch": 0.2784313725490196, "grad_norm": 2.005913734436035, "learning_rate": 8.337302284935086e-05, "loss": 0.9982, "step": 1846 }, { "epoch": 0.2785822021116139, "grad_norm": 1.7619290351867676, "learning_rate": 8.33551064914353e-05, "loss": 1.1483, "step": 1847 }, { "epoch": 0.27873303167420815, "grad_norm": 1.834871530532837, "learning_rate": 8.333718241319255e-05, "loss": 0.9027, "step": 1848 }, { "epoch": 0.2788838612368024, "grad_norm": 2.0664381980895996, "learning_rate": 8.331925061877131e-05, "loss": 1.1563, "step": 1849 }, { "epoch": 0.2790346907993967, "grad_norm": 1.7221072912216187, "learning_rate": 8.330131111232203e-05, "loss": 0.859, "step": 1850 }, { "epoch": 0.27918552036199096, "grad_norm": 2.3939151763916016, "learning_rate": 8.328336389799696e-05, "loss": 1.7602, "step": 1851 }, { "epoch": 0.2793363499245852, "grad_norm": 2.44992995262146, "learning_rate": 8.326540897995015e-05, "loss": 1.6157, "step": 1852 }, { "epoch": 0.2794871794871795, "grad_norm": 2.2985663414001465, "learning_rate": 8.324744636233742e-05, "loss": 1.5827, "step": 1853 }, { "epoch": 0.27963800904977376, "grad_norm": 2.0403196811676025, "learning_rate": 8.322947604931637e-05, "loss": 1.4919, "step": 1854 }, { "epoch": 0.27978883861236803, "grad_norm": 2.158809185028076, "learning_rate": 8.321149804504638e-05, "loss": 1.4543, "step": 1855 }, { "epoch": 0.2799396681749623, "grad_norm": 1.8818317651748657, "learning_rate": 8.319351235368862e-05, "loss": 1.3887, "step": 1856 }, { "epoch": 0.28009049773755657, "grad_norm": 1.826813817024231, "learning_rate": 8.317551897940604e-05, "loss": 1.2025, "step": 1857 }, { "epoch": 0.28024132730015083, "grad_norm": 1.8183190822601318, "learning_rate": 8.315751792636335e-05, "loss": 1.2194, "step": 1858 }, { "epoch": 0.2803921568627451, "grad_norm": 2.5023345947265625, "learning_rate": 8.313950919872707e-05, "loss": 1.6034, "step": 1859 }, { "epoch": 0.28054298642533937, "grad_norm": 1.9086394309997559, "learning_rate": 8.312149280066542e-05, "loss": 1.0225, "step": 1860 }, { "epoch": 0.28069381598793364, "grad_norm": 1.766782283782959, "learning_rate": 8.310346873634852e-05, "loss": 0.8961, "step": 1861 }, { "epoch": 0.2808446455505279, "grad_norm": 1.6554056406021118, "learning_rate": 8.308543700994815e-05, "loss": 1.2149, "step": 1862 }, { "epoch": 0.2809954751131222, "grad_norm": 1.7944473028182983, "learning_rate": 8.306739762563794e-05, "loss": 1.1892, "step": 1863 }, { "epoch": 0.28114630467571644, "grad_norm": 1.89187753200531, "learning_rate": 8.304935058759323e-05, "loss": 1.1247, "step": 1864 }, { "epoch": 0.2812971342383107, "grad_norm": 2.2053380012512207, "learning_rate": 8.303129589999117e-05, "loss": 1.2969, "step": 1865 }, { "epoch": 0.281447963800905, "grad_norm": 1.970859169960022, "learning_rate": 8.301323356701069e-05, "loss": 1.2473, "step": 1866 }, { "epoch": 0.28159879336349924, "grad_norm": 2.272434949874878, "learning_rate": 8.299516359283246e-05, "loss": 1.7036, "step": 1867 }, { "epoch": 0.2817496229260935, "grad_norm": 1.8075746297836304, "learning_rate": 8.297708598163894e-05, "loss": 1.0446, "step": 1868 }, { "epoch": 0.2819004524886878, "grad_norm": 1.803596019744873, "learning_rate": 8.295900073761433e-05, "loss": 0.8184, "step": 1869 }, { "epoch": 0.28205128205128205, "grad_norm": 1.6999430656433105, "learning_rate": 8.294090786494464e-05, "loss": 1.0165, "step": 1870 }, { "epoch": 0.2822021116138763, "grad_norm": 1.9448697566986084, "learning_rate": 8.29228073678176e-05, "loss": 1.5273, "step": 1871 }, { "epoch": 0.2823529411764706, "grad_norm": 2.230556011199951, "learning_rate": 8.290469925042275e-05, "loss": 1.72, "step": 1872 }, { "epoch": 0.28250377073906485, "grad_norm": 2.161909818649292, "learning_rate": 8.288658351695134e-05, "loss": 1.6374, "step": 1873 }, { "epoch": 0.2826546003016591, "grad_norm": 1.9787851572036743, "learning_rate": 8.286846017159645e-05, "loss": 1.3147, "step": 1874 }, { "epoch": 0.2828054298642534, "grad_norm": 2.0310702323913574, "learning_rate": 8.285032921855284e-05, "loss": 1.1076, "step": 1875 }, { "epoch": 0.28295625942684766, "grad_norm": 2.039301872253418, "learning_rate": 8.283219066201712e-05, "loss": 1.2642, "step": 1876 }, { "epoch": 0.2831070889894419, "grad_norm": 1.9144707918167114, "learning_rate": 8.281404450618759e-05, "loss": 1.2147, "step": 1877 }, { "epoch": 0.2832579185520362, "grad_norm": 1.997266411781311, "learning_rate": 8.279589075526434e-05, "loss": 1.294, "step": 1878 }, { "epoch": 0.28340874811463046, "grad_norm": 1.7925808429718018, "learning_rate": 8.277772941344921e-05, "loss": 1.2438, "step": 1879 }, { "epoch": 0.28355957767722473, "grad_norm": 1.9929962158203125, "learning_rate": 8.27595604849458e-05, "loss": 1.3113, "step": 1880 }, { "epoch": 0.283710407239819, "grad_norm": 2.2836763858795166, "learning_rate": 8.274138397395948e-05, "loss": 1.55, "step": 1881 }, { "epoch": 0.28386123680241326, "grad_norm": 2.027259111404419, "learning_rate": 8.272319988469732e-05, "loss": 1.2465, "step": 1882 }, { "epoch": 0.28401206636500753, "grad_norm": 2.0516750812530518, "learning_rate": 8.270500822136822e-05, "loss": 1.1551, "step": 1883 }, { "epoch": 0.2841628959276018, "grad_norm": 2.0886988639831543, "learning_rate": 8.26868089881828e-05, "loss": 1.3198, "step": 1884 }, { "epoch": 0.28431372549019607, "grad_norm": 2.155057907104492, "learning_rate": 8.266860218935341e-05, "loss": 1.264, "step": 1885 }, { "epoch": 0.28446455505279034, "grad_norm": 2.129568338394165, "learning_rate": 8.265038782909417e-05, "loss": 1.3893, "step": 1886 }, { "epoch": 0.2846153846153846, "grad_norm": 2.1467883586883545, "learning_rate": 8.263216591162097e-05, "loss": 1.2009, "step": 1887 }, { "epoch": 0.28476621417797887, "grad_norm": 1.8793869018554688, "learning_rate": 8.261393644115143e-05, "loss": 1.1861, "step": 1888 }, { "epoch": 0.28491704374057314, "grad_norm": 1.8690520524978638, "learning_rate": 8.259569942190489e-05, "loss": 1.1833, "step": 1889 }, { "epoch": 0.2850678733031674, "grad_norm": 2.1215128898620605, "learning_rate": 8.25774548581025e-05, "loss": 1.2592, "step": 1890 }, { "epoch": 0.2852187028657617, "grad_norm": 2.1997530460357666, "learning_rate": 8.255920275396709e-05, "loss": 1.2801, "step": 1891 }, { "epoch": 0.28536953242835594, "grad_norm": 1.9920107126235962, "learning_rate": 8.254094311372329e-05, "loss": 1.2331, "step": 1892 }, { "epoch": 0.2855203619909502, "grad_norm": 1.9829208850860596, "learning_rate": 8.252267594159743e-05, "loss": 1.2702, "step": 1893 }, { "epoch": 0.2856711915535445, "grad_norm": 2.11250376701355, "learning_rate": 8.250440124181763e-05, "loss": 1.1984, "step": 1894 }, { "epoch": 0.28582202111613875, "grad_norm": 1.7648823261260986, "learning_rate": 8.248611901861371e-05, "loss": 0.8747, "step": 1895 }, { "epoch": 0.285972850678733, "grad_norm": 1.9433587789535522, "learning_rate": 8.246782927621726e-05, "loss": 1.0968, "step": 1896 }, { "epoch": 0.2861236802413273, "grad_norm": 2.0180156230926514, "learning_rate": 8.244953201886158e-05, "loss": 1.2933, "step": 1897 }, { "epoch": 0.28627450980392155, "grad_norm": 1.7674845457077026, "learning_rate": 8.243122725078173e-05, "loss": 1.0804, "step": 1898 }, { "epoch": 0.2864253393665158, "grad_norm": 2.291440725326538, "learning_rate": 8.241291497621456e-05, "loss": 1.3096, "step": 1899 }, { "epoch": 0.2865761689291101, "grad_norm": 1.7212042808532715, "learning_rate": 8.239459519939851e-05, "loss": 0.7358, "step": 1900 }, { "epoch": 0.28672699849170435, "grad_norm": 2.1608753204345703, "learning_rate": 8.237626792457394e-05, "loss": 1.3198, "step": 1901 }, { "epoch": 0.2868778280542986, "grad_norm": 2.308379650115967, "learning_rate": 8.23579331559828e-05, "loss": 1.5265, "step": 1902 }, { "epoch": 0.2870286576168929, "grad_norm": 2.0121045112609863, "learning_rate": 8.233959089786883e-05, "loss": 1.3337, "step": 1903 }, { "epoch": 0.28717948717948716, "grad_norm": 2.0366461277008057, "learning_rate": 8.232124115447753e-05, "loss": 1.2459, "step": 1904 }, { "epoch": 0.2873303167420814, "grad_norm": 1.9962174892425537, "learning_rate": 8.23028839300561e-05, "loss": 1.3538, "step": 1905 }, { "epoch": 0.2874811463046757, "grad_norm": 1.9082911014556885, "learning_rate": 8.228451922885349e-05, "loss": 1.3147, "step": 1906 }, { "epoch": 0.28763197586726996, "grad_norm": 2.252793788909912, "learning_rate": 8.226614705512033e-05, "loss": 1.5029, "step": 1907 }, { "epoch": 0.28778280542986423, "grad_norm": 1.9102108478546143, "learning_rate": 8.224776741310907e-05, "loss": 1.1311, "step": 1908 }, { "epoch": 0.2879336349924585, "grad_norm": 2.267390012741089, "learning_rate": 8.222938030707378e-05, "loss": 1.6925, "step": 1909 }, { "epoch": 0.28808446455505277, "grad_norm": 2.095104694366455, "learning_rate": 8.221098574127035e-05, "loss": 1.3787, "step": 1910 }, { "epoch": 0.28823529411764703, "grad_norm": 1.9677194356918335, "learning_rate": 8.219258371995638e-05, "loss": 1.2177, "step": 1911 }, { "epoch": 0.2883861236802413, "grad_norm": 2.0356385707855225, "learning_rate": 8.217417424739111e-05, "loss": 1.3076, "step": 1912 }, { "epoch": 0.28853695324283557, "grad_norm": 2.0324718952178955, "learning_rate": 8.215575732783564e-05, "loss": 1.3675, "step": 1913 }, { "epoch": 0.28868778280542984, "grad_norm": 1.8609644174575806, "learning_rate": 8.213733296555269e-05, "loss": 1.2699, "step": 1914 }, { "epoch": 0.2888386123680241, "grad_norm": 2.1286535263061523, "learning_rate": 8.211890116480673e-05, "loss": 1.6943, "step": 1915 }, { "epoch": 0.2889894419306184, "grad_norm": 1.9638196229934692, "learning_rate": 8.210046192986398e-05, "loss": 1.2255, "step": 1916 }, { "epoch": 0.2891402714932127, "grad_norm": 1.9107683897018433, "learning_rate": 8.208201526499236e-05, "loss": 1.1488, "step": 1917 }, { "epoch": 0.28929110105580697, "grad_norm": 1.844429612159729, "learning_rate": 8.20635611744615e-05, "loss": 1.1946, "step": 1918 }, { "epoch": 0.28944193061840123, "grad_norm": 2.030169725418091, "learning_rate": 8.204509966254276e-05, "loss": 1.3926, "step": 1919 }, { "epoch": 0.2895927601809955, "grad_norm": 1.8712544441223145, "learning_rate": 8.202663073350922e-05, "loss": 1.2496, "step": 1920 }, { "epoch": 0.28974358974358977, "grad_norm": 1.7529350519180298, "learning_rate": 8.200815439163567e-05, "loss": 1.0544, "step": 1921 }, { "epoch": 0.28989441930618404, "grad_norm": 1.8936941623687744, "learning_rate": 8.198967064119864e-05, "loss": 1.2785, "step": 1922 }, { "epoch": 0.2900452488687783, "grad_norm": 1.7409107685089111, "learning_rate": 8.197117948647632e-05, "loss": 1.1001, "step": 1923 }, { "epoch": 0.2901960784313726, "grad_norm": 2.009305715560913, "learning_rate": 8.195268093174864e-05, "loss": 1.2818, "step": 1924 }, { "epoch": 0.29034690799396684, "grad_norm": 1.9904239177703857, "learning_rate": 8.193417498129729e-05, "loss": 1.3734, "step": 1925 }, { "epoch": 0.2904977375565611, "grad_norm": 1.9091339111328125, "learning_rate": 8.191566163940561e-05, "loss": 1.108, "step": 1926 }, { "epoch": 0.2906485671191554, "grad_norm": 1.9187074899673462, "learning_rate": 8.189714091035868e-05, "loss": 1.2794, "step": 1927 }, { "epoch": 0.29079939668174964, "grad_norm": 2.098842144012451, "learning_rate": 8.187861279844326e-05, "loss": 1.3579, "step": 1928 }, { "epoch": 0.2909502262443439, "grad_norm": 2.1402039527893066, "learning_rate": 8.186007730794786e-05, "loss": 1.4686, "step": 1929 }, { "epoch": 0.2911010558069382, "grad_norm": 1.8592971563339233, "learning_rate": 8.184153444316269e-05, "loss": 1.199, "step": 1930 }, { "epoch": 0.29125188536953245, "grad_norm": 2.1896369457244873, "learning_rate": 8.182298420837965e-05, "loss": 1.3281, "step": 1931 }, { "epoch": 0.2914027149321267, "grad_norm": 2.0501885414123535, "learning_rate": 8.180442660789234e-05, "loss": 1.2577, "step": 1932 }, { "epoch": 0.291553544494721, "grad_norm": 2.3815717697143555, "learning_rate": 8.17858616459961e-05, "loss": 1.4616, "step": 1933 }, { "epoch": 0.29170437405731525, "grad_norm": 1.895424485206604, "learning_rate": 8.17672893269879e-05, "loss": 0.9649, "step": 1934 }, { "epoch": 0.2918552036199095, "grad_norm": 2.1486666202545166, "learning_rate": 8.174870965516652e-05, "loss": 1.5448, "step": 1935 }, { "epoch": 0.2920060331825038, "grad_norm": 2.187565326690674, "learning_rate": 8.173012263483235e-05, "loss": 1.534, "step": 1936 }, { "epoch": 0.29215686274509806, "grad_norm": 2.1776623725891113, "learning_rate": 8.171152827028754e-05, "loss": 1.2765, "step": 1937 }, { "epoch": 0.2923076923076923, "grad_norm": 2.2120256423950195, "learning_rate": 8.169292656583589e-05, "loss": 1.4203, "step": 1938 }, { "epoch": 0.2924585218702866, "grad_norm": 1.886023998260498, "learning_rate": 8.167431752578295e-05, "loss": 1.3618, "step": 1939 }, { "epoch": 0.29260935143288086, "grad_norm": 1.9709806442260742, "learning_rate": 8.165570115443592e-05, "loss": 1.2291, "step": 1940 }, { "epoch": 0.2927601809954751, "grad_norm": 1.9867538213729858, "learning_rate": 8.163707745610376e-05, "loss": 1.1549, "step": 1941 }, { "epoch": 0.2929110105580694, "grad_norm": 2.1118643283843994, "learning_rate": 8.161844643509704e-05, "loss": 1.3054, "step": 1942 }, { "epoch": 0.29306184012066366, "grad_norm": 2.3084850311279297, "learning_rate": 8.159980809572809e-05, "loss": 1.5924, "step": 1943 }, { "epoch": 0.29321266968325793, "grad_norm": 2.0950472354888916, "learning_rate": 8.158116244231091e-05, "loss": 1.2377, "step": 1944 }, { "epoch": 0.2933634992458522, "grad_norm": 1.954380750656128, "learning_rate": 8.15625094791612e-05, "loss": 1.1336, "step": 1945 }, { "epoch": 0.29351432880844647, "grad_norm": 1.7222503423690796, "learning_rate": 8.154384921059636e-05, "loss": 1.0592, "step": 1946 }, { "epoch": 0.29366515837104074, "grad_norm": 1.6936205625534058, "learning_rate": 8.152518164093545e-05, "loss": 0.9132, "step": 1947 }, { "epoch": 0.293815987933635, "grad_norm": 1.7453097105026245, "learning_rate": 8.150650677449925e-05, "loss": 1.0629, "step": 1948 }, { "epoch": 0.29396681749622927, "grad_norm": 1.6292860507965088, "learning_rate": 8.148782461561023e-05, "loss": 0.7914, "step": 1949 }, { "epoch": 0.29411764705882354, "grad_norm": 1.7947287559509277, "learning_rate": 8.146913516859251e-05, "loss": 1.0742, "step": 1950 }, { "epoch": 0.2942684766214178, "grad_norm": 2.5155184268951416, "learning_rate": 8.145043843777194e-05, "loss": 2.0123, "step": 1951 }, { "epoch": 0.2944193061840121, "grad_norm": 2.1364591121673584, "learning_rate": 8.143173442747604e-05, "loss": 1.3352, "step": 1952 }, { "epoch": 0.29457013574660634, "grad_norm": 2.106806755065918, "learning_rate": 8.141302314203403e-05, "loss": 1.53, "step": 1953 }, { "epoch": 0.2947209653092006, "grad_norm": 1.724652886390686, "learning_rate": 8.139430458577677e-05, "loss": 0.9832, "step": 1954 }, { "epoch": 0.2948717948717949, "grad_norm": 2.1583516597747803, "learning_rate": 8.137557876303686e-05, "loss": 1.6467, "step": 1955 }, { "epoch": 0.29502262443438915, "grad_norm": 2.0201945304870605, "learning_rate": 8.135684567814852e-05, "loss": 1.5359, "step": 1956 }, { "epoch": 0.2951734539969834, "grad_norm": 1.7157902717590332, "learning_rate": 8.133810533544771e-05, "loss": 0.9758, "step": 1957 }, { "epoch": 0.2953242835595777, "grad_norm": 1.8875629901885986, "learning_rate": 8.131935773927201e-05, "loss": 1.1142, "step": 1958 }, { "epoch": 0.29547511312217195, "grad_norm": 1.990832805633545, "learning_rate": 8.130060289396076e-05, "loss": 1.5569, "step": 1959 }, { "epoch": 0.2956259426847662, "grad_norm": 1.9186888933181763, "learning_rate": 8.128184080385491e-05, "loss": 1.3032, "step": 1960 }, { "epoch": 0.2957767722473605, "grad_norm": 1.7460001707077026, "learning_rate": 8.12630714732971e-05, "loss": 1.3166, "step": 1961 }, { "epoch": 0.29592760180995475, "grad_norm": 2.033179998397827, "learning_rate": 8.124429490663166e-05, "loss": 1.4041, "step": 1962 }, { "epoch": 0.296078431372549, "grad_norm": 1.8218116760253906, "learning_rate": 8.122551110820458e-05, "loss": 1.1679, "step": 1963 }, { "epoch": 0.2962292609351433, "grad_norm": 2.024066925048828, "learning_rate": 8.120672008236356e-05, "loss": 1.5173, "step": 1964 }, { "epoch": 0.29638009049773756, "grad_norm": 2.228179693222046, "learning_rate": 8.118792183345791e-05, "loss": 1.3269, "step": 1965 }, { "epoch": 0.2965309200603318, "grad_norm": 1.6904361248016357, "learning_rate": 8.116911636583866e-05, "loss": 1.0681, "step": 1966 }, { "epoch": 0.2966817496229261, "grad_norm": 1.6346503496170044, "learning_rate": 8.115030368385852e-05, "loss": 1.0682, "step": 1967 }, { "epoch": 0.29683257918552036, "grad_norm": 1.936173915863037, "learning_rate": 8.11314837918718e-05, "loss": 1.1825, "step": 1968 }, { "epoch": 0.29698340874811463, "grad_norm": 1.7327868938446045, "learning_rate": 8.111265669423457e-05, "loss": 1.0606, "step": 1969 }, { "epoch": 0.2971342383107089, "grad_norm": 1.80511474609375, "learning_rate": 8.109382239530452e-05, "loss": 1.1069, "step": 1970 }, { "epoch": 0.29728506787330317, "grad_norm": 2.0137836933135986, "learning_rate": 8.107498089944098e-05, "loss": 1.3536, "step": 1971 }, { "epoch": 0.29743589743589743, "grad_norm": 2.047759532928467, "learning_rate": 8.105613221100499e-05, "loss": 1.5348, "step": 1972 }, { "epoch": 0.2975867269984917, "grad_norm": 2.0782363414764404, "learning_rate": 8.103727633435927e-05, "loss": 1.2406, "step": 1973 }, { "epoch": 0.29773755656108597, "grad_norm": 1.8973119258880615, "learning_rate": 8.101841327386815e-05, "loss": 1.2795, "step": 1974 }, { "epoch": 0.29788838612368024, "grad_norm": 1.898832082748413, "learning_rate": 8.099954303389764e-05, "loss": 1.4201, "step": 1975 }, { "epoch": 0.2980392156862745, "grad_norm": 1.8488231897354126, "learning_rate": 8.098066561881542e-05, "loss": 1.3166, "step": 1976 }, { "epoch": 0.2981900452488688, "grad_norm": 1.9157990217208862, "learning_rate": 8.096178103299086e-05, "loss": 1.1757, "step": 1977 }, { "epoch": 0.29834087481146304, "grad_norm": 1.7759112119674683, "learning_rate": 8.094288928079492e-05, "loss": 1.1755, "step": 1978 }, { "epoch": 0.2984917043740573, "grad_norm": 1.7383739948272705, "learning_rate": 8.092399036660032e-05, "loss": 1.0669, "step": 1979 }, { "epoch": 0.2986425339366516, "grad_norm": 1.8631985187530518, "learning_rate": 8.090508429478129e-05, "loss": 1.0424, "step": 1980 }, { "epoch": 0.29879336349924585, "grad_norm": 2.1173548698425293, "learning_rate": 8.088617106971388e-05, "loss": 1.2046, "step": 1981 }, { "epoch": 0.2989441930618401, "grad_norm": 1.8946735858917236, "learning_rate": 8.086725069577569e-05, "loss": 1.1767, "step": 1982 }, { "epoch": 0.2990950226244344, "grad_norm": 1.836971402168274, "learning_rate": 8.0848323177346e-05, "loss": 1.1593, "step": 1983 }, { "epoch": 0.29924585218702865, "grad_norm": 1.8348891735076904, "learning_rate": 8.082938851880575e-05, "loss": 1.0917, "step": 1984 }, { "epoch": 0.2993966817496229, "grad_norm": 2.299510955810547, "learning_rate": 8.081044672453752e-05, "loss": 1.1519, "step": 1985 }, { "epoch": 0.2995475113122172, "grad_norm": 1.9288758039474487, "learning_rate": 8.079149779892558e-05, "loss": 1.1503, "step": 1986 }, { "epoch": 0.29969834087481145, "grad_norm": 2.1668975353240967, "learning_rate": 8.07725417463558e-05, "loss": 1.1019, "step": 1987 }, { "epoch": 0.2998491704374057, "grad_norm": 1.9475492238998413, "learning_rate": 8.075357857121572e-05, "loss": 1.2359, "step": 1988 }, { "epoch": 0.3, "grad_norm": 2.3879201412200928, "learning_rate": 8.073460827789456e-05, "loss": 1.5098, "step": 1989 }, { "epoch": 0.30015082956259426, "grad_norm": 2.135897159576416, "learning_rate": 8.07156308707831e-05, "loss": 1.29, "step": 1990 }, { "epoch": 0.3003016591251885, "grad_norm": 2.129786491394043, "learning_rate": 8.06966463542739e-05, "loss": 1.3788, "step": 1991 }, { "epoch": 0.3004524886877828, "grad_norm": 2.3773648738861084, "learning_rate": 8.067765473276102e-05, "loss": 1.4874, "step": 1992 }, { "epoch": 0.30060331825037706, "grad_norm": 2.0571134090423584, "learning_rate": 8.065865601064029e-05, "loss": 1.0983, "step": 1993 }, { "epoch": 0.30075414781297133, "grad_norm": 1.8859487771987915, "learning_rate": 8.063965019230908e-05, "loss": 1.0548, "step": 1994 }, { "epoch": 0.3009049773755656, "grad_norm": 2.426760673522949, "learning_rate": 8.062063728216649e-05, "loss": 1.2896, "step": 1995 }, { "epoch": 0.30105580693815986, "grad_norm": 2.065840244293213, "learning_rate": 8.060161728461319e-05, "loss": 1.114, "step": 1996 }, { "epoch": 0.30120663650075413, "grad_norm": 2.172605037689209, "learning_rate": 8.058259020405156e-05, "loss": 1.2485, "step": 1997 }, { "epoch": 0.3013574660633484, "grad_norm": 2.0910117626190186, "learning_rate": 8.056355604488554e-05, "loss": 0.883, "step": 1998 }, { "epoch": 0.30150829562594267, "grad_norm": 1.8993933200836182, "learning_rate": 8.054451481152079e-05, "loss": 0.9466, "step": 1999 }, { "epoch": 0.30165912518853694, "grad_norm": 1.8264797925949097, "learning_rate": 8.052546650836454e-05, "loss": 0.979, "step": 2000 }, { "epoch": 0.3018099547511312, "grad_norm": 2.4789340496063232, "learning_rate": 8.05064111398257e-05, "loss": 1.7755, "step": 2001 }, { "epoch": 0.30196078431372547, "grad_norm": 2.4046576023101807, "learning_rate": 8.048734871031478e-05, "loss": 1.7952, "step": 2002 }, { "epoch": 0.30211161387631974, "grad_norm": 2.4665300846099854, "learning_rate": 8.046827922424396e-05, "loss": 1.7865, "step": 2003 }, { "epoch": 0.302262443438914, "grad_norm": 2.1047542095184326, "learning_rate": 8.044920268602702e-05, "loss": 1.4802, "step": 2004 }, { "epoch": 0.3024132730015083, "grad_norm": 2.0567305088043213, "learning_rate": 8.043011910007942e-05, "loss": 1.4472, "step": 2005 }, { "epoch": 0.30256410256410254, "grad_norm": 2.2316954135894775, "learning_rate": 8.04110284708182e-05, "loss": 1.4702, "step": 2006 }, { "epoch": 0.3027149321266968, "grad_norm": 1.9619947671890259, "learning_rate": 8.039193080266206e-05, "loss": 1.4107, "step": 2007 }, { "epoch": 0.3028657616892911, "grad_norm": 1.56526780128479, "learning_rate": 8.037282610003131e-05, "loss": 0.9764, "step": 2008 }, { "epoch": 0.30301659125188535, "grad_norm": 2.0272648334503174, "learning_rate": 8.03537143673479e-05, "loss": 1.3599, "step": 2009 }, { "epoch": 0.3031674208144796, "grad_norm": 1.8451874256134033, "learning_rate": 8.03345956090354e-05, "loss": 1.0917, "step": 2010 }, { "epoch": 0.3033182503770739, "grad_norm": 1.6818028688430786, "learning_rate": 8.031546982951903e-05, "loss": 1.0257, "step": 2011 }, { "epoch": 0.30346907993966815, "grad_norm": 1.9353454113006592, "learning_rate": 8.02963370332256e-05, "loss": 1.3029, "step": 2012 }, { "epoch": 0.3036199095022624, "grad_norm": 2.242112636566162, "learning_rate": 8.027719722458358e-05, "loss": 1.4667, "step": 2013 }, { "epoch": 0.3037707390648567, "grad_norm": 1.7774497270584106, "learning_rate": 8.025805040802301e-05, "loss": 1.0864, "step": 2014 }, { "epoch": 0.30392156862745096, "grad_norm": 2.0368564128875732, "learning_rate": 8.023889658797563e-05, "loss": 1.2761, "step": 2015 }, { "epoch": 0.3040723981900452, "grad_norm": 1.6400374174118042, "learning_rate": 8.021973576887471e-05, "loss": 1.1044, "step": 2016 }, { "epoch": 0.3042232277526395, "grad_norm": 1.9477553367614746, "learning_rate": 8.020056795515522e-05, "loss": 1.5696, "step": 2017 }, { "epoch": 0.30437405731523376, "grad_norm": 1.9152183532714844, "learning_rate": 8.018139315125372e-05, "loss": 1.4054, "step": 2018 }, { "epoch": 0.304524886877828, "grad_norm": 1.9425663948059082, "learning_rate": 8.016221136160835e-05, "loss": 1.2022, "step": 2019 }, { "epoch": 0.3046757164404223, "grad_norm": 1.7328767776489258, "learning_rate": 8.014302259065894e-05, "loss": 1.0915, "step": 2020 }, { "epoch": 0.3048265460030166, "grad_norm": 2.0148394107818604, "learning_rate": 8.012382684284685e-05, "loss": 1.0628, "step": 2021 }, { "epoch": 0.3049773755656109, "grad_norm": 2.2491300106048584, "learning_rate": 8.010462412261515e-05, "loss": 1.5587, "step": 2022 }, { "epoch": 0.30512820512820515, "grad_norm": 1.9212427139282227, "learning_rate": 8.008541443440845e-05, "loss": 1.1632, "step": 2023 }, { "epoch": 0.3052790346907994, "grad_norm": 1.7928673028945923, "learning_rate": 8.0066197782673e-05, "loss": 1.1511, "step": 2024 }, { "epoch": 0.3054298642533937, "grad_norm": 1.7729668617248535, "learning_rate": 8.004697417185667e-05, "loss": 1.1154, "step": 2025 }, { "epoch": 0.30558069381598796, "grad_norm": 1.660508394241333, "learning_rate": 8.002774360640892e-05, "loss": 0.7742, "step": 2026 }, { "epoch": 0.3057315233785822, "grad_norm": 1.7599925994873047, "learning_rate": 8.000850609078083e-05, "loss": 1.3382, "step": 2027 }, { "epoch": 0.3058823529411765, "grad_norm": 1.8248248100280762, "learning_rate": 7.998926162942508e-05, "loss": 1.2555, "step": 2028 }, { "epoch": 0.30603318250377076, "grad_norm": 2.000857353210449, "learning_rate": 7.9970010226796e-05, "loss": 1.3275, "step": 2029 }, { "epoch": 0.30618401206636503, "grad_norm": 1.9200164079666138, "learning_rate": 7.995075188734947e-05, "loss": 1.4172, "step": 2030 }, { "epoch": 0.3063348416289593, "grad_norm": 1.9978601932525635, "learning_rate": 7.993148661554298e-05, "loss": 1.153, "step": 2031 }, { "epoch": 0.30648567119155357, "grad_norm": 1.9519500732421875, "learning_rate": 7.991221441583568e-05, "loss": 1.2587, "step": 2032 }, { "epoch": 0.30663650075414783, "grad_norm": 2.1030404567718506, "learning_rate": 7.989293529268826e-05, "loss": 1.2126, "step": 2033 }, { "epoch": 0.3067873303167421, "grad_norm": 2.006040334701538, "learning_rate": 7.987364925056305e-05, "loss": 1.0537, "step": 2034 }, { "epoch": 0.30693815987933637, "grad_norm": 2.0718767642974854, "learning_rate": 7.985435629392398e-05, "loss": 1.1908, "step": 2035 }, { "epoch": 0.30708898944193064, "grad_norm": 2.2207224369049072, "learning_rate": 7.983505642723656e-05, "loss": 1.6384, "step": 2036 }, { "epoch": 0.3072398190045249, "grad_norm": 2.4193897247314453, "learning_rate": 7.981574965496791e-05, "loss": 1.3922, "step": 2037 }, { "epoch": 0.3073906485671192, "grad_norm": 1.997031569480896, "learning_rate": 7.979643598158675e-05, "loss": 1.2275, "step": 2038 }, { "epoch": 0.30754147812971344, "grad_norm": 2.0422425270080566, "learning_rate": 7.977711541156341e-05, "loss": 1.2248, "step": 2039 }, { "epoch": 0.3076923076923077, "grad_norm": 1.9494401216506958, "learning_rate": 7.975778794936979e-05, "loss": 0.9951, "step": 2040 }, { "epoch": 0.307843137254902, "grad_norm": 2.085408926010132, "learning_rate": 7.973845359947939e-05, "loss": 1.1109, "step": 2041 }, { "epoch": 0.30799396681749625, "grad_norm": 1.9734898805618286, "learning_rate": 7.971911236636732e-05, "loss": 1.1572, "step": 2042 }, { "epoch": 0.3081447963800905, "grad_norm": 2.3416223526000977, "learning_rate": 7.969976425451027e-05, "loss": 0.829, "step": 2043 }, { "epoch": 0.3082956259426848, "grad_norm": 2.4501566886901855, "learning_rate": 7.968040926838655e-05, "loss": 1.352, "step": 2044 }, { "epoch": 0.30844645550527905, "grad_norm": 2.4470129013061523, "learning_rate": 7.966104741247603e-05, "loss": 1.31, "step": 2045 }, { "epoch": 0.3085972850678733, "grad_norm": 2.2977850437164307, "learning_rate": 7.964167869126015e-05, "loss": 1.2896, "step": 2046 }, { "epoch": 0.3087481146304676, "grad_norm": 1.7011759281158447, "learning_rate": 7.9622303109222e-05, "loss": 0.8211, "step": 2047 }, { "epoch": 0.30889894419306185, "grad_norm": 1.96943199634552, "learning_rate": 7.96029206708462e-05, "loss": 1.1111, "step": 2048 }, { "epoch": 0.3090497737556561, "grad_norm": 1.464977741241455, "learning_rate": 7.958353138061902e-05, "loss": 0.6284, "step": 2049 }, { "epoch": 0.3092006033182504, "grad_norm": 1.9553420543670654, "learning_rate": 7.956413524302824e-05, "loss": 0.9515, "step": 2050 }, { "epoch": 0.30935143288084466, "grad_norm": 2.3986318111419678, "learning_rate": 7.954473226256327e-05, "loss": 1.4044, "step": 2051 }, { "epoch": 0.3095022624434389, "grad_norm": 2.4363291263580322, "learning_rate": 7.952532244371512e-05, "loss": 1.6782, "step": 2052 }, { "epoch": 0.3096530920060332, "grad_norm": 2.6046526432037354, "learning_rate": 7.950590579097634e-05, "loss": 1.4255, "step": 2053 }, { "epoch": 0.30980392156862746, "grad_norm": 1.7490018606185913, "learning_rate": 7.948648230884109e-05, "loss": 0.8549, "step": 2054 }, { "epoch": 0.30995475113122173, "grad_norm": 2.3781471252441406, "learning_rate": 7.946705200180511e-05, "loss": 1.8855, "step": 2055 }, { "epoch": 0.310105580693816, "grad_norm": 2.0032827854156494, "learning_rate": 7.944761487436568e-05, "loss": 1.5962, "step": 2056 }, { "epoch": 0.31025641025641026, "grad_norm": 2.12138032913208, "learning_rate": 7.942817093102172e-05, "loss": 1.3649, "step": 2057 }, { "epoch": 0.31040723981900453, "grad_norm": 1.8067811727523804, "learning_rate": 7.940872017627371e-05, "loss": 1.2192, "step": 2058 }, { "epoch": 0.3105580693815988, "grad_norm": 2.10638165473938, "learning_rate": 7.938926261462366e-05, "loss": 1.3389, "step": 2059 }, { "epoch": 0.31070889894419307, "grad_norm": 2.3365352153778076, "learning_rate": 7.936979825057521e-05, "loss": 1.698, "step": 2060 }, { "epoch": 0.31085972850678734, "grad_norm": 2.156334400177002, "learning_rate": 7.935032708863355e-05, "loss": 1.3741, "step": 2061 }, { "epoch": 0.3110105580693816, "grad_norm": 1.7790610790252686, "learning_rate": 7.933084913330546e-05, "loss": 1.1393, "step": 2062 }, { "epoch": 0.31116138763197587, "grad_norm": 1.9712390899658203, "learning_rate": 7.931136438909927e-05, "loss": 1.5487, "step": 2063 }, { "epoch": 0.31131221719457014, "grad_norm": 2.1226658821105957, "learning_rate": 7.929187286052489e-05, "loss": 1.594, "step": 2064 }, { "epoch": 0.3114630467571644, "grad_norm": 1.8629928827285767, "learning_rate": 7.92723745520938e-05, "loss": 1.1334, "step": 2065 }, { "epoch": 0.3116138763197587, "grad_norm": 1.9002476930618286, "learning_rate": 7.925286946831907e-05, "loss": 1.1725, "step": 2066 }, { "epoch": 0.31176470588235294, "grad_norm": 1.852799415588379, "learning_rate": 7.923335761371531e-05, "loss": 1.1477, "step": 2067 }, { "epoch": 0.3119155354449472, "grad_norm": 1.7344245910644531, "learning_rate": 7.921383899279869e-05, "loss": 1.0533, "step": 2068 }, { "epoch": 0.3120663650075415, "grad_norm": 1.8888721466064453, "learning_rate": 7.919431361008698e-05, "loss": 1.2537, "step": 2069 }, { "epoch": 0.31221719457013575, "grad_norm": 2.20353627204895, "learning_rate": 7.91747814700995e-05, "loss": 1.6639, "step": 2070 }, { "epoch": 0.31236802413273, "grad_norm": 1.9597432613372803, "learning_rate": 7.915524257735712e-05, "loss": 1.2295, "step": 2071 }, { "epoch": 0.3125188536953243, "grad_norm": 1.9797371625900269, "learning_rate": 7.913569693638232e-05, "loss": 1.4515, "step": 2072 }, { "epoch": 0.31266968325791855, "grad_norm": 1.9970898628234863, "learning_rate": 7.911614455169905e-05, "loss": 1.1913, "step": 2073 }, { "epoch": 0.3128205128205128, "grad_norm": 1.620439052581787, "learning_rate": 7.909658542783294e-05, "loss": 1.0541, "step": 2074 }, { "epoch": 0.3129713423831071, "grad_norm": 1.9726227521896362, "learning_rate": 7.907701956931106e-05, "loss": 1.4592, "step": 2075 }, { "epoch": 0.31312217194570136, "grad_norm": 1.8297202587127686, "learning_rate": 7.905744698066213e-05, "loss": 1.3745, "step": 2076 }, { "epoch": 0.3132730015082956, "grad_norm": 1.9842840433120728, "learning_rate": 7.903786766641641e-05, "loss": 1.2828, "step": 2077 }, { "epoch": 0.3134238310708899, "grad_norm": 2.0969135761260986, "learning_rate": 7.901828163110565e-05, "loss": 1.3771, "step": 2078 }, { "epoch": 0.31357466063348416, "grad_norm": 1.8513256311416626, "learning_rate": 7.899868887926325e-05, "loss": 1.3633, "step": 2079 }, { "epoch": 0.3137254901960784, "grad_norm": 2.044416904449463, "learning_rate": 7.897908941542411e-05, "loss": 1.4513, "step": 2080 }, { "epoch": 0.3138763197586727, "grad_norm": 1.857562780380249, "learning_rate": 7.89594832441247e-05, "loss": 1.317, "step": 2081 }, { "epoch": 0.31402714932126696, "grad_norm": 1.9112484455108643, "learning_rate": 7.893987036990302e-05, "loss": 1.3063, "step": 2082 }, { "epoch": 0.31417797888386123, "grad_norm": 1.6574615240097046, "learning_rate": 7.892025079729866e-05, "loss": 0.9935, "step": 2083 }, { "epoch": 0.3143288084464555, "grad_norm": 2.129579544067383, "learning_rate": 7.890062453085271e-05, "loss": 1.3162, "step": 2084 }, { "epoch": 0.31447963800904977, "grad_norm": 1.7706995010375977, "learning_rate": 7.888099157510786e-05, "loss": 1.0456, "step": 2085 }, { "epoch": 0.31463046757164403, "grad_norm": 2.265636444091797, "learning_rate": 7.886135193460836e-05, "loss": 1.4888, "step": 2086 }, { "epoch": 0.3147812971342383, "grad_norm": 1.9507603645324707, "learning_rate": 7.88417056138999e-05, "loss": 1.0928, "step": 2087 }, { "epoch": 0.31493212669683257, "grad_norm": 2.1440205574035645, "learning_rate": 7.882205261752984e-05, "loss": 1.399, "step": 2088 }, { "epoch": 0.31508295625942684, "grad_norm": 1.8957526683807373, "learning_rate": 7.880239295004704e-05, "loss": 1.2216, "step": 2089 }, { "epoch": 0.3152337858220211, "grad_norm": 2.0498299598693848, "learning_rate": 7.878272661600186e-05, "loss": 1.1416, "step": 2090 }, { "epoch": 0.3153846153846154, "grad_norm": 2.268223762512207, "learning_rate": 7.87630536199463e-05, "loss": 1.2707, "step": 2091 }, { "epoch": 0.31553544494720964, "grad_norm": 1.6065419912338257, "learning_rate": 7.874337396643378e-05, "loss": 0.9645, "step": 2092 }, { "epoch": 0.3156862745098039, "grad_norm": 2.293581485748291, "learning_rate": 7.872368766001937e-05, "loss": 1.5479, "step": 2093 }, { "epoch": 0.3158371040723982, "grad_norm": 2.2901246547698975, "learning_rate": 7.870399470525962e-05, "loss": 1.4005, "step": 2094 }, { "epoch": 0.31598793363499245, "grad_norm": 2.35381817817688, "learning_rate": 7.868429510671262e-05, "loss": 1.2948, "step": 2095 }, { "epoch": 0.3161387631975867, "grad_norm": 1.792441725730896, "learning_rate": 7.866458886893806e-05, "loss": 1.0363, "step": 2096 }, { "epoch": 0.316289592760181, "grad_norm": 1.3926469087600708, "learning_rate": 7.864487599649708e-05, "loss": 0.65, "step": 2097 }, { "epoch": 0.31644042232277525, "grad_norm": 1.6517584323883057, "learning_rate": 7.862515649395239e-05, "loss": 0.7041, "step": 2098 }, { "epoch": 0.3165912518853695, "grad_norm": 1.5307166576385498, "learning_rate": 7.860543036586827e-05, "loss": 0.7241, "step": 2099 }, { "epoch": 0.3167420814479638, "grad_norm": 1.6841472387313843, "learning_rate": 7.858569761681048e-05, "loss": 0.7703, "step": 2100 }, { "epoch": 0.31689291101055805, "grad_norm": 2.3318681716918945, "learning_rate": 7.856595825134633e-05, "loss": 1.477, "step": 2101 }, { "epoch": 0.3170437405731523, "grad_norm": 2.4762024879455566, "learning_rate": 7.854621227404469e-05, "loss": 1.641, "step": 2102 }, { "epoch": 0.3171945701357466, "grad_norm": 2.0471670627593994, "learning_rate": 7.85264596894759e-05, "loss": 1.2986, "step": 2103 }, { "epoch": 0.31734539969834086, "grad_norm": 2.5108072757720947, "learning_rate": 7.85067005022119e-05, "loss": 1.9562, "step": 2104 }, { "epoch": 0.3174962292609351, "grad_norm": 2.466811180114746, "learning_rate": 7.848693471682611e-05, "loss": 1.6952, "step": 2105 }, { "epoch": 0.3176470588235294, "grad_norm": 2.030205249786377, "learning_rate": 7.846716233789351e-05, "loss": 1.3612, "step": 2106 }, { "epoch": 0.31779788838612366, "grad_norm": 1.9520014524459839, "learning_rate": 7.844738336999055e-05, "loss": 1.2055, "step": 2107 }, { "epoch": 0.31794871794871793, "grad_norm": 1.989987850189209, "learning_rate": 7.842759781769528e-05, "loss": 1.3649, "step": 2108 }, { "epoch": 0.3180995475113122, "grad_norm": 1.6327499151229858, "learning_rate": 7.840780568558721e-05, "loss": 0.9069, "step": 2109 }, { "epoch": 0.31825037707390647, "grad_norm": 2.465270757675171, "learning_rate": 7.838800697824743e-05, "loss": 1.7166, "step": 2110 }, { "epoch": 0.31840120663650073, "grad_norm": 1.8699637651443481, "learning_rate": 7.83682017002585e-05, "loss": 1.2286, "step": 2111 }, { "epoch": 0.318552036199095, "grad_norm": 2.0082993507385254, "learning_rate": 7.83483898562045e-05, "loss": 1.3202, "step": 2112 }, { "epoch": 0.31870286576168927, "grad_norm": 2.0627291202545166, "learning_rate": 7.83285714506711e-05, "loss": 1.5311, "step": 2113 }, { "epoch": 0.31885369532428354, "grad_norm": 1.590405821800232, "learning_rate": 7.830874648824543e-05, "loss": 0.7811, "step": 2114 }, { "epoch": 0.3190045248868778, "grad_norm": 1.886236310005188, "learning_rate": 7.828891497351611e-05, "loss": 1.1364, "step": 2115 }, { "epoch": 0.3191553544494721, "grad_norm": 1.8510969877243042, "learning_rate": 7.826907691107334e-05, "loss": 1.2644, "step": 2116 }, { "epoch": 0.31930618401206634, "grad_norm": 1.8986903429031372, "learning_rate": 7.824923230550884e-05, "loss": 1.0862, "step": 2117 }, { "epoch": 0.3194570135746606, "grad_norm": 2.0460221767425537, "learning_rate": 7.822938116141578e-05, "loss": 1.3635, "step": 2118 }, { "epoch": 0.3196078431372549, "grad_norm": 2.063119411468506, "learning_rate": 7.820952348338888e-05, "loss": 1.4818, "step": 2119 }, { "epoch": 0.31975867269984914, "grad_norm": 1.9195157289505005, "learning_rate": 7.818965927602438e-05, "loss": 1.2484, "step": 2120 }, { "epoch": 0.3199095022624434, "grad_norm": 1.7175984382629395, "learning_rate": 7.816978854392002e-05, "loss": 1.2006, "step": 2121 }, { "epoch": 0.3200603318250377, "grad_norm": 2.1072256565093994, "learning_rate": 7.814991129167506e-05, "loss": 1.2768, "step": 2122 }, { "epoch": 0.32021116138763195, "grad_norm": 1.9124327898025513, "learning_rate": 7.813002752389024e-05, "loss": 1.2862, "step": 2123 }, { "epoch": 0.32036199095022627, "grad_norm": 2.0215513706207275, "learning_rate": 7.811013724516786e-05, "loss": 1.4676, "step": 2124 }, { "epoch": 0.32051282051282054, "grad_norm": 1.5508869886398315, "learning_rate": 7.809024046011167e-05, "loss": 0.8619, "step": 2125 }, { "epoch": 0.3206636500754148, "grad_norm": 1.730616569519043, "learning_rate": 7.807033717332697e-05, "loss": 1.0347, "step": 2126 }, { "epoch": 0.3208144796380091, "grad_norm": 2.037172794342041, "learning_rate": 7.805042738942053e-05, "loss": 1.4433, "step": 2127 }, { "epoch": 0.32096530920060334, "grad_norm": 2.0702357292175293, "learning_rate": 7.803051111300066e-05, "loss": 1.3632, "step": 2128 }, { "epoch": 0.3211161387631976, "grad_norm": 1.776043176651001, "learning_rate": 7.801058834867714e-05, "loss": 1.0914, "step": 2129 }, { "epoch": 0.3212669683257919, "grad_norm": 2.0812079906463623, "learning_rate": 7.799065910106128e-05, "loss": 1.1997, "step": 2130 }, { "epoch": 0.32141779788838615, "grad_norm": 1.9302732944488525, "learning_rate": 7.797072337476585e-05, "loss": 1.2459, "step": 2131 }, { "epoch": 0.3215686274509804, "grad_norm": 1.5608243942260742, "learning_rate": 7.795078117440518e-05, "loss": 0.9178, "step": 2132 }, { "epoch": 0.3217194570135747, "grad_norm": 1.9089075326919556, "learning_rate": 7.793083250459504e-05, "loss": 1.3125, "step": 2133 }, { "epoch": 0.32187028657616895, "grad_norm": 2.1311984062194824, "learning_rate": 7.791087736995272e-05, "loss": 1.4574, "step": 2134 }, { "epoch": 0.3220211161387632, "grad_norm": 2.3200409412384033, "learning_rate": 7.789091577509705e-05, "loss": 1.5852, "step": 2135 }, { "epoch": 0.3221719457013575, "grad_norm": 2.1799349784851074, "learning_rate": 7.787094772464824e-05, "loss": 1.2439, "step": 2136 }, { "epoch": 0.32232277526395176, "grad_norm": 2.1456146240234375, "learning_rate": 7.785097322322814e-05, "loss": 1.3374, "step": 2137 }, { "epoch": 0.322473604826546, "grad_norm": 2.196166515350342, "learning_rate": 7.783099227545998e-05, "loss": 1.5045, "step": 2138 }, { "epoch": 0.3226244343891403, "grad_norm": 1.9885152578353882, "learning_rate": 7.781100488596853e-05, "loss": 1.2864, "step": 2139 }, { "epoch": 0.32277526395173456, "grad_norm": 2.437120199203491, "learning_rate": 7.779101105938004e-05, "loss": 1.489, "step": 2140 }, { "epoch": 0.3229260935143288, "grad_norm": 2.1418490409851074, "learning_rate": 7.777101080032228e-05, "loss": 1.2077, "step": 2141 }, { "epoch": 0.3230769230769231, "grad_norm": 2.219167947769165, "learning_rate": 7.775100411342444e-05, "loss": 1.4023, "step": 2142 }, { "epoch": 0.32322775263951736, "grad_norm": 2.393301248550415, "learning_rate": 7.773099100331727e-05, "loss": 1.3356, "step": 2143 }, { "epoch": 0.32337858220211163, "grad_norm": 2.030027389526367, "learning_rate": 7.771097147463297e-05, "loss": 1.0568, "step": 2144 }, { "epoch": 0.3235294117647059, "grad_norm": 1.922145128250122, "learning_rate": 7.769094553200523e-05, "loss": 0.9375, "step": 2145 }, { "epoch": 0.32368024132730017, "grad_norm": 1.9440650939941406, "learning_rate": 7.767091318006922e-05, "loss": 0.98, "step": 2146 }, { "epoch": 0.32383107088989443, "grad_norm": 1.9338539838790894, "learning_rate": 7.765087442346163e-05, "loss": 1.0471, "step": 2147 }, { "epoch": 0.3239819004524887, "grad_norm": 1.8205417394638062, "learning_rate": 7.763082926682058e-05, "loss": 0.9277, "step": 2148 }, { "epoch": 0.32413273001508297, "grad_norm": 2.101309061050415, "learning_rate": 7.761077771478569e-05, "loss": 0.9224, "step": 2149 }, { "epoch": 0.32428355957767724, "grad_norm": 2.1898229122161865, "learning_rate": 7.759071977199806e-05, "loss": 1.1812, "step": 2150 }, { "epoch": 0.3244343891402715, "grad_norm": 2.315579414367676, "learning_rate": 7.75706554431003e-05, "loss": 1.5397, "step": 2151 }, { "epoch": 0.3245852187028658, "grad_norm": 2.0580050945281982, "learning_rate": 7.755058473273646e-05, "loss": 1.266, "step": 2152 }, { "epoch": 0.32473604826546004, "grad_norm": 2.2977848052978516, "learning_rate": 7.753050764555207e-05, "loss": 1.599, "step": 2153 }, { "epoch": 0.3248868778280543, "grad_norm": 2.078383445739746, "learning_rate": 7.751042418619417e-05, "loss": 1.3589, "step": 2154 }, { "epoch": 0.3250377073906486, "grad_norm": 2.066185712814331, "learning_rate": 7.749033435931121e-05, "loss": 1.1455, "step": 2155 }, { "epoch": 0.32518853695324285, "grad_norm": 2.251288890838623, "learning_rate": 7.747023816955319e-05, "loss": 1.4771, "step": 2156 }, { "epoch": 0.3253393665158371, "grad_norm": 2.909471035003662, "learning_rate": 7.745013562157153e-05, "loss": 1.3618, "step": 2157 }, { "epoch": 0.3254901960784314, "grad_norm": 2.0360212326049805, "learning_rate": 7.743002672001914e-05, "loss": 1.2775, "step": 2158 }, { "epoch": 0.32564102564102565, "grad_norm": 2.3610775470733643, "learning_rate": 7.740991146955039e-05, "loss": 1.5839, "step": 2159 }, { "epoch": 0.3257918552036199, "grad_norm": 2.0022690296173096, "learning_rate": 7.738978987482112e-05, "loss": 1.24, "step": 2160 }, { "epoch": 0.3259426847662142, "grad_norm": 2.281221866607666, "learning_rate": 7.736966194048871e-05, "loss": 1.4877, "step": 2161 }, { "epoch": 0.32609351432880845, "grad_norm": 1.7508684396743774, "learning_rate": 7.734952767121188e-05, "loss": 1.0949, "step": 2162 }, { "epoch": 0.3262443438914027, "grad_norm": 1.9903571605682373, "learning_rate": 7.73293870716509e-05, "loss": 1.2232, "step": 2163 }, { "epoch": 0.326395173453997, "grad_norm": 1.9289876222610474, "learning_rate": 7.730924014646748e-05, "loss": 1.2734, "step": 2164 }, { "epoch": 0.32654600301659126, "grad_norm": 1.8334612846374512, "learning_rate": 7.72890869003248e-05, "loss": 1.1307, "step": 2165 }, { "epoch": 0.3266968325791855, "grad_norm": 1.8999308347702026, "learning_rate": 7.72689273378875e-05, "loss": 1.066, "step": 2166 }, { "epoch": 0.3268476621417798, "grad_norm": 1.882737636566162, "learning_rate": 7.72487614638217e-05, "loss": 1.1352, "step": 2167 }, { "epoch": 0.32699849170437406, "grad_norm": 2.197422981262207, "learning_rate": 7.722858928279496e-05, "loss": 1.3772, "step": 2168 }, { "epoch": 0.32714932126696833, "grad_norm": 1.96427583694458, "learning_rate": 7.720841079947629e-05, "loss": 1.3929, "step": 2169 }, { "epoch": 0.3273001508295626, "grad_norm": 1.9181370735168457, "learning_rate": 7.718822601853621e-05, "loss": 1.4001, "step": 2170 }, { "epoch": 0.32745098039215687, "grad_norm": 1.9122101068496704, "learning_rate": 7.716803494464663e-05, "loss": 1.2029, "step": 2171 }, { "epoch": 0.32760180995475113, "grad_norm": 1.6675642728805542, "learning_rate": 7.714783758248094e-05, "loss": 1.3027, "step": 2172 }, { "epoch": 0.3277526395173454, "grad_norm": 2.014936923980713, "learning_rate": 7.712763393671403e-05, "loss": 1.4802, "step": 2173 }, { "epoch": 0.32790346907993967, "grad_norm": 1.759763240814209, "learning_rate": 7.710742401202218e-05, "loss": 0.9158, "step": 2174 }, { "epoch": 0.32805429864253394, "grad_norm": 1.942934274673462, "learning_rate": 7.708720781308316e-05, "loss": 1.3264, "step": 2175 }, { "epoch": 0.3282051282051282, "grad_norm": 1.8620729446411133, "learning_rate": 7.706698534457618e-05, "loss": 1.1459, "step": 2176 }, { "epoch": 0.3283559577677225, "grad_norm": 1.9933490753173828, "learning_rate": 7.70467566111819e-05, "loss": 1.3263, "step": 2177 }, { "epoch": 0.32850678733031674, "grad_norm": 1.710165023803711, "learning_rate": 7.702652161758246e-05, "loss": 1.0023, "step": 2178 }, { "epoch": 0.328657616892911, "grad_norm": 1.9362845420837402, "learning_rate": 7.70062803684614e-05, "loss": 1.3859, "step": 2179 }, { "epoch": 0.3288084464555053, "grad_norm": 1.8772884607315063, "learning_rate": 7.698603286850375e-05, "loss": 1.3222, "step": 2180 }, { "epoch": 0.32895927601809954, "grad_norm": 1.9854366779327393, "learning_rate": 7.696577912239593e-05, "loss": 1.3309, "step": 2181 }, { "epoch": 0.3291101055806938, "grad_norm": 1.823313593864441, "learning_rate": 7.69455191348259e-05, "loss": 1.0338, "step": 2182 }, { "epoch": 0.3292609351432881, "grad_norm": 1.7979207038879395, "learning_rate": 7.692525291048296e-05, "loss": 1.1327, "step": 2183 }, { "epoch": 0.32941176470588235, "grad_norm": 2.1780972480773926, "learning_rate": 7.690498045405794e-05, "loss": 1.2026, "step": 2184 }, { "epoch": 0.3295625942684766, "grad_norm": 1.7461633682250977, "learning_rate": 7.688470177024305e-05, "loss": 1.1157, "step": 2185 }, { "epoch": 0.3297134238310709, "grad_norm": 2.0876290798187256, "learning_rate": 7.686441686373196e-05, "loss": 1.4756, "step": 2186 }, { "epoch": 0.32986425339366515, "grad_norm": 2.4564716815948486, "learning_rate": 7.684412573921982e-05, "loss": 1.3083, "step": 2187 }, { "epoch": 0.3300150829562594, "grad_norm": 2.0837290287017822, "learning_rate": 7.682382840140316e-05, "loss": 1.2002, "step": 2188 }, { "epoch": 0.3301659125188537, "grad_norm": 1.9494925737380981, "learning_rate": 7.680352485497997e-05, "loss": 1.0256, "step": 2189 }, { "epoch": 0.33031674208144796, "grad_norm": 2.009239673614502, "learning_rate": 7.678321510464971e-05, "loss": 1.2663, "step": 2190 }, { "epoch": 0.3304675716440422, "grad_norm": 2.2096879482269287, "learning_rate": 7.676289915511321e-05, "loss": 1.4817, "step": 2191 }, { "epoch": 0.3306184012066365, "grad_norm": 2.4408605098724365, "learning_rate": 7.67425770110728e-05, "loss": 1.4074, "step": 2192 }, { "epoch": 0.33076923076923076, "grad_norm": 2.276932716369629, "learning_rate": 7.672224867723221e-05, "loss": 1.3102, "step": 2193 }, { "epoch": 0.33092006033182503, "grad_norm": 1.939516544342041, "learning_rate": 7.67019141582966e-05, "loss": 0.8378, "step": 2194 }, { "epoch": 0.3310708898944193, "grad_norm": 2.0520780086517334, "learning_rate": 7.668157345897256e-05, "loss": 1.0607, "step": 2195 }, { "epoch": 0.33122171945701356, "grad_norm": 1.878717303276062, "learning_rate": 7.666122658396814e-05, "loss": 0.9987, "step": 2196 }, { "epoch": 0.33137254901960783, "grad_norm": 1.9568232297897339, "learning_rate": 7.66408735379928e-05, "loss": 0.7918, "step": 2197 }, { "epoch": 0.3315233785822021, "grad_norm": 1.6444367170333862, "learning_rate": 7.662051432575745e-05, "loss": 0.6857, "step": 2198 }, { "epoch": 0.33167420814479637, "grad_norm": 1.9435197114944458, "learning_rate": 7.660014895197434e-05, "loss": 0.9324, "step": 2199 }, { "epoch": 0.33182503770739064, "grad_norm": 2.0838968753814697, "learning_rate": 7.657977742135726e-05, "loss": 0.9995, "step": 2200 }, { "epoch": 0.3319758672699849, "grad_norm": 1.8517134189605713, "learning_rate": 7.655939973862136e-05, "loss": 1.0365, "step": 2201 }, { "epoch": 0.33212669683257917, "grad_norm": 1.9543771743774414, "learning_rate": 7.653901590848325e-05, "loss": 1.1003, "step": 2202 }, { "epoch": 0.33227752639517344, "grad_norm": 2.204191207885742, "learning_rate": 7.651862593566093e-05, "loss": 1.7115, "step": 2203 }, { "epoch": 0.3324283559577677, "grad_norm": 2.1187376976013184, "learning_rate": 7.649822982487382e-05, "loss": 1.3629, "step": 2204 }, { "epoch": 0.332579185520362, "grad_norm": 2.072145938873291, "learning_rate": 7.647782758084278e-05, "loss": 1.4312, "step": 2205 }, { "epoch": 0.33273001508295624, "grad_norm": 1.8358123302459717, "learning_rate": 7.645741920829013e-05, "loss": 1.1309, "step": 2206 }, { "epoch": 0.3328808446455505, "grad_norm": 1.935274362564087, "learning_rate": 7.64370047119395e-05, "loss": 1.3316, "step": 2207 }, { "epoch": 0.3330316742081448, "grad_norm": 1.9558253288269043, "learning_rate": 7.641658409651604e-05, "loss": 1.0719, "step": 2208 }, { "epoch": 0.33318250377073905, "grad_norm": 2.0524258613586426, "learning_rate": 7.639615736674628e-05, "loss": 1.3606, "step": 2209 }, { "epoch": 0.3333333333333333, "grad_norm": 2.084775447845459, "learning_rate": 7.637572452735814e-05, "loss": 1.345, "step": 2210 }, { "epoch": 0.3334841628959276, "grad_norm": 1.956209421157837, "learning_rate": 7.635528558308098e-05, "loss": 1.3527, "step": 2211 }, { "epoch": 0.33363499245852185, "grad_norm": 1.8987846374511719, "learning_rate": 7.633484053864561e-05, "loss": 1.1296, "step": 2212 }, { "epoch": 0.3337858220211161, "grad_norm": 1.7817195653915405, "learning_rate": 7.631438939878415e-05, "loss": 1.0559, "step": 2213 }, { "epoch": 0.3339366515837104, "grad_norm": 1.899654507637024, "learning_rate": 7.629393216823023e-05, "loss": 1.3979, "step": 2214 }, { "epoch": 0.33408748114630465, "grad_norm": 1.9521822929382324, "learning_rate": 7.627346885171885e-05, "loss": 1.1931, "step": 2215 }, { "epoch": 0.3342383107088989, "grad_norm": 1.8783096075057983, "learning_rate": 7.625299945398641e-05, "loss": 1.1635, "step": 2216 }, { "epoch": 0.3343891402714932, "grad_norm": 1.9451441764831543, "learning_rate": 7.623252397977074e-05, "loss": 1.3159, "step": 2217 }, { "epoch": 0.33453996983408746, "grad_norm": 1.8786613941192627, "learning_rate": 7.621204243381106e-05, "loss": 1.4374, "step": 2218 }, { "epoch": 0.3346907993966817, "grad_norm": 2.0722830295562744, "learning_rate": 7.619155482084799e-05, "loss": 1.3235, "step": 2219 }, { "epoch": 0.334841628959276, "grad_norm": 1.7930673360824585, "learning_rate": 7.617106114562358e-05, "loss": 1.202, "step": 2220 }, { "epoch": 0.33499245852187026, "grad_norm": 1.7867363691329956, "learning_rate": 7.615056141288128e-05, "loss": 1.0764, "step": 2221 }, { "epoch": 0.33514328808446453, "grad_norm": 1.8160027265548706, "learning_rate": 7.613005562736592e-05, "loss": 1.0726, "step": 2222 }, { "epoch": 0.3352941176470588, "grad_norm": 1.937736988067627, "learning_rate": 7.610954379382372e-05, "loss": 1.3284, "step": 2223 }, { "epoch": 0.33544494720965307, "grad_norm": 1.9707967042922974, "learning_rate": 7.608902591700233e-05, "loss": 1.1131, "step": 2224 }, { "epoch": 0.33559577677224733, "grad_norm": 2.1105775833129883, "learning_rate": 7.60685020016508e-05, "loss": 1.6707, "step": 2225 }, { "epoch": 0.3357466063348416, "grad_norm": 2.0093870162963867, "learning_rate": 7.604797205251957e-05, "loss": 1.2149, "step": 2226 }, { "epoch": 0.33589743589743587, "grad_norm": 2.069889783859253, "learning_rate": 7.602743607436045e-05, "loss": 1.3985, "step": 2227 }, { "epoch": 0.3360482654600302, "grad_norm": 2.0532679557800293, "learning_rate": 7.60068940719267e-05, "loss": 1.2154, "step": 2228 }, { "epoch": 0.33619909502262446, "grad_norm": 2.3079540729522705, "learning_rate": 7.598634604997292e-05, "loss": 1.5263, "step": 2229 }, { "epoch": 0.33634992458521873, "grad_norm": 1.7749857902526855, "learning_rate": 7.596579201325516e-05, "loss": 0.8901, "step": 2230 }, { "epoch": 0.336500754147813, "grad_norm": 2.021812915802002, "learning_rate": 7.594523196653079e-05, "loss": 1.2007, "step": 2231 }, { "epoch": 0.33665158371040727, "grad_norm": 1.9192273616790771, "learning_rate": 7.592466591455863e-05, "loss": 1.0223, "step": 2232 }, { "epoch": 0.33680241327300153, "grad_norm": 2.409867525100708, "learning_rate": 7.590409386209885e-05, "loss": 1.3169, "step": 2233 }, { "epoch": 0.3369532428355958, "grad_norm": 2.201551914215088, "learning_rate": 7.588351581391304e-05, "loss": 1.5649, "step": 2234 }, { "epoch": 0.33710407239819007, "grad_norm": 2.0027241706848145, "learning_rate": 7.58629317747642e-05, "loss": 1.3738, "step": 2235 }, { "epoch": 0.33725490196078434, "grad_norm": 1.6591501235961914, "learning_rate": 7.584234174941664e-05, "loss": 0.9847, "step": 2236 }, { "epoch": 0.3374057315233786, "grad_norm": 1.8831413984298706, "learning_rate": 7.582174574263612e-05, "loss": 1.1064, "step": 2237 }, { "epoch": 0.3375565610859729, "grad_norm": 2.07102370262146, "learning_rate": 7.580114375918975e-05, "loss": 1.2404, "step": 2238 }, { "epoch": 0.33770739064856714, "grad_norm": 1.708683729171753, "learning_rate": 7.578053580384606e-05, "loss": 0.9443, "step": 2239 }, { "epoch": 0.3378582202111614, "grad_norm": 1.9187439680099487, "learning_rate": 7.575992188137491e-05, "loss": 0.878, "step": 2240 }, { "epoch": 0.3380090497737557, "grad_norm": 2.5119245052337646, "learning_rate": 7.573930199654758e-05, "loss": 1.1124, "step": 2241 }, { "epoch": 0.33815987933634994, "grad_norm": 1.9526264667510986, "learning_rate": 7.571867615413673e-05, "loss": 1.1663, "step": 2242 }, { "epoch": 0.3383107088989442, "grad_norm": 2.2972488403320312, "learning_rate": 7.569804435891638e-05, "loss": 1.0842, "step": 2243 }, { "epoch": 0.3384615384615385, "grad_norm": 2.2375898361206055, "learning_rate": 7.567740661566195e-05, "loss": 1.3138, "step": 2244 }, { "epoch": 0.33861236802413275, "grad_norm": 2.297070026397705, "learning_rate": 7.56567629291502e-05, "loss": 1.2867, "step": 2245 }, { "epoch": 0.338763197586727, "grad_norm": 1.8776723146438599, "learning_rate": 7.563611330415931e-05, "loss": 0.8706, "step": 2246 }, { "epoch": 0.3389140271493213, "grad_norm": 1.4462754726409912, "learning_rate": 7.561545774546881e-05, "loss": 0.638, "step": 2247 }, { "epoch": 0.33906485671191555, "grad_norm": 1.8826711177825928, "learning_rate": 7.559479625785961e-05, "loss": 0.9438, "step": 2248 }, { "epoch": 0.3392156862745098, "grad_norm": 1.756062626838684, "learning_rate": 7.557412884611397e-05, "loss": 0.7521, "step": 2249 }, { "epoch": 0.3393665158371041, "grad_norm": 1.7070274353027344, "learning_rate": 7.555345551501559e-05, "loss": 0.7247, "step": 2250 }, { "epoch": 0.33951734539969836, "grad_norm": 2.089998483657837, "learning_rate": 7.553277626934941e-05, "loss": 1.3055, "step": 2251 }, { "epoch": 0.3396681749622926, "grad_norm": 2.244295835494995, "learning_rate": 7.55120911139019e-05, "loss": 1.6128, "step": 2252 }, { "epoch": 0.3398190045248869, "grad_norm": 2.1364426612854004, "learning_rate": 7.549140005346076e-05, "loss": 1.1633, "step": 2253 }, { "epoch": 0.33996983408748116, "grad_norm": 1.8823668956756592, "learning_rate": 7.547070309281514e-05, "loss": 0.8651, "step": 2254 }, { "epoch": 0.3401206636500754, "grad_norm": 2.2294719219207764, "learning_rate": 7.545000023675555e-05, "loss": 1.3573, "step": 2255 }, { "epoch": 0.3402714932126697, "grad_norm": 2.050429344177246, "learning_rate": 7.542929149007379e-05, "loss": 1.285, "step": 2256 }, { "epoch": 0.34042232277526396, "grad_norm": 2.298964738845825, "learning_rate": 7.540857685756313e-05, "loss": 1.6373, "step": 2257 }, { "epoch": 0.34057315233785823, "grad_norm": 1.911263346672058, "learning_rate": 7.538785634401813e-05, "loss": 1.1129, "step": 2258 }, { "epoch": 0.3407239819004525, "grad_norm": 2.001763343811035, "learning_rate": 7.536712995423474e-05, "loss": 1.2119, "step": 2259 }, { "epoch": 0.34087481146304677, "grad_norm": 2.0650904178619385, "learning_rate": 7.534639769301024e-05, "loss": 1.2629, "step": 2260 }, { "epoch": 0.34102564102564104, "grad_norm": 2.0376245975494385, "learning_rate": 7.532565956514331e-05, "loss": 1.3587, "step": 2261 }, { "epoch": 0.3411764705882353, "grad_norm": 1.945311427116394, "learning_rate": 7.530491557543396e-05, "loss": 1.1364, "step": 2262 }, { "epoch": 0.34132730015082957, "grad_norm": 2.0679149627685547, "learning_rate": 7.528416572868358e-05, "loss": 1.4402, "step": 2263 }, { "epoch": 0.34147812971342384, "grad_norm": 1.9883077144622803, "learning_rate": 7.526341002969488e-05, "loss": 1.3967, "step": 2264 }, { "epoch": 0.3416289592760181, "grad_norm": 1.9844133853912354, "learning_rate": 7.524264848327196e-05, "loss": 1.1791, "step": 2265 }, { "epoch": 0.3417797888386124, "grad_norm": 1.7692663669586182, "learning_rate": 7.522188109422025e-05, "loss": 1.2392, "step": 2266 }, { "epoch": 0.34193061840120664, "grad_norm": 1.6607944965362549, "learning_rate": 7.520110786734655e-05, "loss": 1.1741, "step": 2267 }, { "epoch": 0.3420814479638009, "grad_norm": 1.7081873416900635, "learning_rate": 7.5180328807459e-05, "loss": 1.1646, "step": 2268 }, { "epoch": 0.3422322775263952, "grad_norm": 1.9927161931991577, "learning_rate": 7.515954391936711e-05, "loss": 1.5022, "step": 2269 }, { "epoch": 0.34238310708898945, "grad_norm": 2.045468807220459, "learning_rate": 7.513875320788166e-05, "loss": 1.4222, "step": 2270 }, { "epoch": 0.3425339366515837, "grad_norm": 1.6737682819366455, "learning_rate": 7.511795667781492e-05, "loss": 1.0186, "step": 2271 }, { "epoch": 0.342684766214178, "grad_norm": 1.7843754291534424, "learning_rate": 7.509715433398037e-05, "loss": 1.1809, "step": 2272 }, { "epoch": 0.34283559577677225, "grad_norm": 2.0645439624786377, "learning_rate": 7.507634618119292e-05, "loss": 1.2963, "step": 2273 }, { "epoch": 0.3429864253393665, "grad_norm": 1.6013661623001099, "learning_rate": 7.50555322242688e-05, "loss": 0.8989, "step": 2274 }, { "epoch": 0.3431372549019608, "grad_norm": 1.720360517501831, "learning_rate": 7.503471246802554e-05, "loss": 1.0767, "step": 2275 }, { "epoch": 0.34328808446455505, "grad_norm": 1.9313515424728394, "learning_rate": 7.50138869172821e-05, "loss": 1.3045, "step": 2276 }, { "epoch": 0.3434389140271493, "grad_norm": 1.7261441946029663, "learning_rate": 7.49930555768587e-05, "loss": 1.0328, "step": 2277 }, { "epoch": 0.3435897435897436, "grad_norm": 1.7472411394119263, "learning_rate": 7.497221845157696e-05, "loss": 0.9747, "step": 2278 }, { "epoch": 0.34374057315233786, "grad_norm": 1.9296356439590454, "learning_rate": 7.495137554625979e-05, "loss": 1.2685, "step": 2279 }, { "epoch": 0.3438914027149321, "grad_norm": 1.9970730543136597, "learning_rate": 7.493052686573148e-05, "loss": 1.0093, "step": 2280 }, { "epoch": 0.3440422322775264, "grad_norm": 2.230480909347534, "learning_rate": 7.490967241481762e-05, "loss": 1.6599, "step": 2281 }, { "epoch": 0.34419306184012066, "grad_norm": 2.192351818084717, "learning_rate": 7.488881219834514e-05, "loss": 1.3045, "step": 2282 }, { "epoch": 0.34434389140271493, "grad_norm": 1.8328914642333984, "learning_rate": 7.486794622114236e-05, "loss": 1.1766, "step": 2283 }, { "epoch": 0.3444947209653092, "grad_norm": 2.350728988647461, "learning_rate": 7.484707448803886e-05, "loss": 1.3889, "step": 2284 }, { "epoch": 0.34464555052790347, "grad_norm": 2.1453452110290527, "learning_rate": 7.482619700386558e-05, "loss": 1.4443, "step": 2285 }, { "epoch": 0.34479638009049773, "grad_norm": 1.9765609502792358, "learning_rate": 7.480531377345481e-05, "loss": 1.2881, "step": 2286 }, { "epoch": 0.344947209653092, "grad_norm": 1.759034514427185, "learning_rate": 7.478442480164015e-05, "loss": 1.0697, "step": 2287 }, { "epoch": 0.34509803921568627, "grad_norm": 1.9857261180877686, "learning_rate": 7.476353009325652e-05, "loss": 1.3522, "step": 2288 }, { "epoch": 0.34524886877828054, "grad_norm": 2.029978036880493, "learning_rate": 7.474262965314021e-05, "loss": 1.2803, "step": 2289 }, { "epoch": 0.3453996983408748, "grad_norm": 2.2259232997894287, "learning_rate": 7.472172348612877e-05, "loss": 1.4567, "step": 2290 }, { "epoch": 0.3455505279034691, "grad_norm": 2.0439534187316895, "learning_rate": 7.470081159706112e-05, "loss": 1.1236, "step": 2291 }, { "epoch": 0.34570135746606334, "grad_norm": 1.7691459655761719, "learning_rate": 7.467989399077753e-05, "loss": 0.8909, "step": 2292 }, { "epoch": 0.3458521870286576, "grad_norm": 2.047611951828003, "learning_rate": 7.465897067211955e-05, "loss": 1.1107, "step": 2293 }, { "epoch": 0.3460030165912519, "grad_norm": 2.021305799484253, "learning_rate": 7.463804164593002e-05, "loss": 1.4632, "step": 2294 }, { "epoch": 0.34615384615384615, "grad_norm": 2.4192168712615967, "learning_rate": 7.461710691705318e-05, "loss": 1.4692, "step": 2295 }, { "epoch": 0.3463046757164404, "grad_norm": 1.8810949325561523, "learning_rate": 7.459616649033456e-05, "loss": 0.994, "step": 2296 }, { "epoch": 0.3464555052790347, "grad_norm": 1.9804407358169556, "learning_rate": 7.457522037062099e-05, "loss": 1.1472, "step": 2297 }, { "epoch": 0.34660633484162895, "grad_norm": 1.7141746282577515, "learning_rate": 7.45542685627606e-05, "loss": 0.8735, "step": 2298 }, { "epoch": 0.3467571644042232, "grad_norm": 1.6841880083084106, "learning_rate": 7.453331107160292e-05, "loss": 0.9805, "step": 2299 }, { "epoch": 0.3469079939668175, "grad_norm": 2.2924270629882812, "learning_rate": 7.451234790199872e-05, "loss": 1.4014, "step": 2300 }, { "epoch": 0.34705882352941175, "grad_norm": 2.3225743770599365, "learning_rate": 7.449137905880012e-05, "loss": 1.648, "step": 2301 }, { "epoch": 0.347209653092006, "grad_norm": 2.2067480087280273, "learning_rate": 7.447040454686051e-05, "loss": 1.3106, "step": 2302 }, { "epoch": 0.3473604826546003, "grad_norm": 2.1487810611724854, "learning_rate": 7.444942437103464e-05, "loss": 1.4099, "step": 2303 }, { "epoch": 0.34751131221719456, "grad_norm": 1.7744041681289673, "learning_rate": 7.442843853617853e-05, "loss": 1.1581, "step": 2304 }, { "epoch": 0.3476621417797888, "grad_norm": 2.2377047538757324, "learning_rate": 7.440744704714956e-05, "loss": 1.604, "step": 2305 }, { "epoch": 0.3478129713423831, "grad_norm": 1.8856343030929565, "learning_rate": 7.43864499088064e-05, "loss": 1.2388, "step": 2306 }, { "epoch": 0.34796380090497736, "grad_norm": 2.532244920730591, "learning_rate": 7.4365447126009e-05, "loss": 1.4583, "step": 2307 }, { "epoch": 0.34811463046757163, "grad_norm": 2.331462860107422, "learning_rate": 7.434443870361863e-05, "loss": 1.4647, "step": 2308 }, { "epoch": 0.3482654600301659, "grad_norm": 2.0838348865509033, "learning_rate": 7.432342464649788e-05, "loss": 1.6309, "step": 2309 }, { "epoch": 0.34841628959276016, "grad_norm": 1.927541732788086, "learning_rate": 7.430240495951063e-05, "loss": 1.0943, "step": 2310 }, { "epoch": 0.34856711915535443, "grad_norm": 1.8954256772994995, "learning_rate": 7.428137964752208e-05, "loss": 1.2849, "step": 2311 }, { "epoch": 0.3487179487179487, "grad_norm": 2.1100780963897705, "learning_rate": 7.42603487153987e-05, "loss": 1.4823, "step": 2312 }, { "epoch": 0.34886877828054297, "grad_norm": 2.013159990310669, "learning_rate": 7.423931216800829e-05, "loss": 1.2978, "step": 2313 }, { "epoch": 0.34901960784313724, "grad_norm": 1.8414592742919922, "learning_rate": 7.421827001021995e-05, "loss": 1.1491, "step": 2314 }, { "epoch": 0.3491704374057315, "grad_norm": 2.055443525314331, "learning_rate": 7.419722224690406e-05, "loss": 1.6166, "step": 2315 }, { "epoch": 0.34932126696832577, "grad_norm": 2.1403768062591553, "learning_rate": 7.417616888293231e-05, "loss": 1.512, "step": 2316 }, { "epoch": 0.34947209653092004, "grad_norm": 2.1928815841674805, "learning_rate": 7.415510992317767e-05, "loss": 1.6524, "step": 2317 }, { "epoch": 0.3496229260935143, "grad_norm": 2.0321037769317627, "learning_rate": 7.413404537251443e-05, "loss": 1.3811, "step": 2318 }, { "epoch": 0.3497737556561086, "grad_norm": 2.05094575881958, "learning_rate": 7.411297523581817e-05, "loss": 1.4242, "step": 2319 }, { "epoch": 0.34992458521870284, "grad_norm": 1.596950650215149, "learning_rate": 7.409189951796575e-05, "loss": 1.0775, "step": 2320 }, { "epoch": 0.3500754147812971, "grad_norm": 1.7472995519638062, "learning_rate": 7.40708182238353e-05, "loss": 1.2426, "step": 2321 }, { "epoch": 0.3502262443438914, "grad_norm": 1.8369619846343994, "learning_rate": 7.40497313583063e-05, "loss": 1.342, "step": 2322 }, { "epoch": 0.35037707390648565, "grad_norm": 1.506338119506836, "learning_rate": 7.402863892625948e-05, "loss": 0.9129, "step": 2323 }, { "epoch": 0.3505279034690799, "grad_norm": 2.0480709075927734, "learning_rate": 7.400754093257685e-05, "loss": 1.562, "step": 2324 }, { "epoch": 0.3506787330316742, "grad_norm": 1.9476085901260376, "learning_rate": 7.398643738214175e-05, "loss": 1.3124, "step": 2325 }, { "epoch": 0.35082956259426845, "grad_norm": 2.096043109893799, "learning_rate": 7.396532827983875e-05, "loss": 1.4592, "step": 2326 }, { "epoch": 0.3509803921568627, "grad_norm": 1.7416229248046875, "learning_rate": 7.394421363055375e-05, "loss": 1.1527, "step": 2327 }, { "epoch": 0.351131221719457, "grad_norm": 2.022958278656006, "learning_rate": 7.392309343917391e-05, "loss": 1.4492, "step": 2328 }, { "epoch": 0.35128205128205126, "grad_norm": 2.2271676063537598, "learning_rate": 7.390196771058771e-05, "loss": 1.5584, "step": 2329 }, { "epoch": 0.3514328808446455, "grad_norm": 1.7098373174667358, "learning_rate": 7.388083644968482e-05, "loss": 1.0741, "step": 2330 }, { "epoch": 0.35158371040723985, "grad_norm": 2.1339151859283447, "learning_rate": 7.38596996613563e-05, "loss": 1.3401, "step": 2331 }, { "epoch": 0.3517345399698341, "grad_norm": 1.4728320837020874, "learning_rate": 7.383855735049446e-05, "loss": 0.7087, "step": 2332 }, { "epoch": 0.3518853695324284, "grad_norm": 2.2029800415039062, "learning_rate": 7.38174095219928e-05, "loss": 1.2324, "step": 2333 }, { "epoch": 0.35203619909502265, "grad_norm": 2.099498748779297, "learning_rate": 7.379625618074624e-05, "loss": 1.3093, "step": 2334 }, { "epoch": 0.3521870286576169, "grad_norm": 1.8276475667953491, "learning_rate": 7.377509733165085e-05, "loss": 1.0708, "step": 2335 }, { "epoch": 0.3523378582202112, "grad_norm": 1.7912293672561646, "learning_rate": 7.375393297960407e-05, "loss": 0.9462, "step": 2336 }, { "epoch": 0.35248868778280545, "grad_norm": 1.7835010290145874, "learning_rate": 7.373276312950454e-05, "loss": 0.958, "step": 2337 }, { "epoch": 0.3526395173453997, "grad_norm": 2.1449973583221436, "learning_rate": 7.371158778625225e-05, "loss": 1.3525, "step": 2338 }, { "epoch": 0.352790346907994, "grad_norm": 2.0952136516571045, "learning_rate": 7.369040695474835e-05, "loss": 1.347, "step": 2339 }, { "epoch": 0.35294117647058826, "grad_norm": 2.372382879257202, "learning_rate": 7.366922063989537e-05, "loss": 1.191, "step": 2340 }, { "epoch": 0.3530920060331825, "grad_norm": 2.2610905170440674, "learning_rate": 7.364802884659704e-05, "loss": 1.3754, "step": 2341 }, { "epoch": 0.3532428355957768, "grad_norm": 2.067652702331543, "learning_rate": 7.362683157975841e-05, "loss": 1.2298, "step": 2342 }, { "epoch": 0.35339366515837106, "grad_norm": 2.7515716552734375, "learning_rate": 7.360562884428576e-05, "loss": 1.4184, "step": 2343 }, { "epoch": 0.35354449472096533, "grad_norm": 2.164839506149292, "learning_rate": 7.358442064508665e-05, "loss": 1.3826, "step": 2344 }, { "epoch": 0.3536953242835596, "grad_norm": 2.695064067840576, "learning_rate": 7.356320698706989e-05, "loss": 1.2268, "step": 2345 }, { "epoch": 0.35384615384615387, "grad_norm": 2.0360052585601807, "learning_rate": 7.354198787514557e-05, "loss": 1.1261, "step": 2346 }, { "epoch": 0.35399698340874813, "grad_norm": 1.534145474433899, "learning_rate": 7.352076331422503e-05, "loss": 0.6976, "step": 2347 }, { "epoch": 0.3541478129713424, "grad_norm": 1.8815832138061523, "learning_rate": 7.349953330922089e-05, "loss": 0.8886, "step": 2348 }, { "epoch": 0.35429864253393667, "grad_norm": 1.975032091140747, "learning_rate": 7.3478297865047e-05, "loss": 0.8913, "step": 2349 }, { "epoch": 0.35444947209653094, "grad_norm": 2.109985828399658, "learning_rate": 7.345705698661852e-05, "loss": 1.2473, "step": 2350 }, { "epoch": 0.3546003016591252, "grad_norm": 2.247023344039917, "learning_rate": 7.34358106788518e-05, "loss": 1.489, "step": 2351 }, { "epoch": 0.3547511312217195, "grad_norm": 1.903556227684021, "learning_rate": 7.341455894666449e-05, "loss": 1.3591, "step": 2352 }, { "epoch": 0.35490196078431374, "grad_norm": 2.0889101028442383, "learning_rate": 7.33933017949755e-05, "loss": 1.641, "step": 2353 }, { "epoch": 0.355052790346908, "grad_norm": 1.9119189977645874, "learning_rate": 7.337203922870498e-05, "loss": 1.1404, "step": 2354 }, { "epoch": 0.3552036199095023, "grad_norm": 2.0555646419525146, "learning_rate": 7.335077125277431e-05, "loss": 1.4621, "step": 2355 }, { "epoch": 0.35535444947209655, "grad_norm": 1.9514905214309692, "learning_rate": 7.332949787210616e-05, "loss": 1.1202, "step": 2356 }, { "epoch": 0.3555052790346908, "grad_norm": 1.7376586198806763, "learning_rate": 7.330821909162445e-05, "loss": 1.0375, "step": 2357 }, { "epoch": 0.3556561085972851, "grad_norm": 1.7276861667633057, "learning_rate": 7.328693491625433e-05, "loss": 1.1869, "step": 2358 }, { "epoch": 0.35580693815987935, "grad_norm": 1.9056283235549927, "learning_rate": 7.326564535092221e-05, "loss": 1.2635, "step": 2359 }, { "epoch": 0.3559577677224736, "grad_norm": 1.9361528158187866, "learning_rate": 7.324435040055572e-05, "loss": 1.3737, "step": 2360 }, { "epoch": 0.3561085972850679, "grad_norm": 2.0962777137756348, "learning_rate": 7.322305007008378e-05, "loss": 1.3785, "step": 2361 }, { "epoch": 0.35625942684766215, "grad_norm": 2.3330395221710205, "learning_rate": 7.320174436443654e-05, "loss": 1.8768, "step": 2362 }, { "epoch": 0.3564102564102564, "grad_norm": 1.9897713661193848, "learning_rate": 7.318043328854538e-05, "loss": 1.3987, "step": 2363 }, { "epoch": 0.3565610859728507, "grad_norm": 1.8547635078430176, "learning_rate": 7.315911684734294e-05, "loss": 0.9934, "step": 2364 }, { "epoch": 0.35671191553544496, "grad_norm": 1.9924299716949463, "learning_rate": 7.313779504576309e-05, "loss": 1.3507, "step": 2365 }, { "epoch": 0.3568627450980392, "grad_norm": 1.8534613847732544, "learning_rate": 7.311646788874093e-05, "loss": 1.2912, "step": 2366 }, { "epoch": 0.3570135746606335, "grad_norm": 1.802147626876831, "learning_rate": 7.309513538121284e-05, "loss": 1.1949, "step": 2367 }, { "epoch": 0.35716440422322776, "grad_norm": 1.8739521503448486, "learning_rate": 7.307379752811639e-05, "loss": 1.2863, "step": 2368 }, { "epoch": 0.35731523378582203, "grad_norm": 1.5669344663619995, "learning_rate": 7.305245433439043e-05, "loss": 0.8948, "step": 2369 }, { "epoch": 0.3574660633484163, "grad_norm": 1.8840255737304688, "learning_rate": 7.303110580497501e-05, "loss": 1.175, "step": 2370 }, { "epoch": 0.35761689291101056, "grad_norm": 1.5509527921676636, "learning_rate": 7.300975194481145e-05, "loss": 1.0012, "step": 2371 }, { "epoch": 0.35776772247360483, "grad_norm": 1.7175570726394653, "learning_rate": 7.298839275884227e-05, "loss": 1.0091, "step": 2372 }, { "epoch": 0.3579185520361991, "grad_norm": 2.0445358753204346, "learning_rate": 7.296702825201125e-05, "loss": 1.1653, "step": 2373 }, { "epoch": 0.35806938159879337, "grad_norm": 1.7760766744613647, "learning_rate": 7.294565842926337e-05, "loss": 1.1617, "step": 2374 }, { "epoch": 0.35822021116138764, "grad_norm": 1.6779544353485107, "learning_rate": 7.292428329554487e-05, "loss": 1.0406, "step": 2375 }, { "epoch": 0.3583710407239819, "grad_norm": 1.6946232318878174, "learning_rate": 7.290290285580322e-05, "loss": 1.0536, "step": 2376 }, { "epoch": 0.35852187028657617, "grad_norm": 2.127642869949341, "learning_rate": 7.28815171149871e-05, "loss": 1.0676, "step": 2377 }, { "epoch": 0.35867269984917044, "grad_norm": 2.1614084243774414, "learning_rate": 7.28601260780464e-05, "loss": 1.2977, "step": 2378 }, { "epoch": 0.3588235294117647, "grad_norm": 1.987667202949524, "learning_rate": 7.28387297499323e-05, "loss": 1.0557, "step": 2379 }, { "epoch": 0.358974358974359, "grad_norm": 1.545460820198059, "learning_rate": 7.281732813559714e-05, "loss": 0.8195, "step": 2380 }, { "epoch": 0.35912518853695324, "grad_norm": 2.2628893852233887, "learning_rate": 7.279592123999452e-05, "loss": 1.5361, "step": 2381 }, { "epoch": 0.3592760180995475, "grad_norm": 1.7971796989440918, "learning_rate": 7.277450906807925e-05, "loss": 0.9722, "step": 2382 }, { "epoch": 0.3594268476621418, "grad_norm": 1.8155694007873535, "learning_rate": 7.275309162480735e-05, "loss": 0.9713, "step": 2383 }, { "epoch": 0.35957767722473605, "grad_norm": 2.219168186187744, "learning_rate": 7.273166891513607e-05, "loss": 1.3503, "step": 2384 }, { "epoch": 0.3597285067873303, "grad_norm": 2.0893828868865967, "learning_rate": 7.271024094402392e-05, "loss": 1.342, "step": 2385 }, { "epoch": 0.3598793363499246, "grad_norm": 2.1907238960266113, "learning_rate": 7.268880771643053e-05, "loss": 1.2007, "step": 2386 }, { "epoch": 0.36003016591251885, "grad_norm": 2.1367082595825195, "learning_rate": 7.266736923731686e-05, "loss": 1.1833, "step": 2387 }, { "epoch": 0.3601809954751131, "grad_norm": 1.912649154663086, "learning_rate": 7.264592551164498e-05, "loss": 1.0899, "step": 2388 }, { "epoch": 0.3603318250377074, "grad_norm": 2.347161054611206, "learning_rate": 7.262447654437826e-05, "loss": 1.2988, "step": 2389 }, { "epoch": 0.36048265460030166, "grad_norm": 1.8670896291732788, "learning_rate": 7.260302234048125e-05, "loss": 1.0165, "step": 2390 }, { "epoch": 0.3606334841628959, "grad_norm": 2.172441244125366, "learning_rate": 7.25815629049197e-05, "loss": 1.2734, "step": 2391 }, { "epoch": 0.3607843137254902, "grad_norm": 2.5849685668945312, "learning_rate": 7.256009824266057e-05, "loss": 1.4957, "step": 2392 }, { "epoch": 0.36093514328808446, "grad_norm": 2.1665077209472656, "learning_rate": 7.253862835867205e-05, "loss": 1.2444, "step": 2393 }, { "epoch": 0.3610859728506787, "grad_norm": 2.23895001411438, "learning_rate": 7.251715325792355e-05, "loss": 1.2426, "step": 2394 }, { "epoch": 0.361236802413273, "grad_norm": 2.1842455863952637, "learning_rate": 7.249567294538566e-05, "loss": 1.1407, "step": 2395 }, { "epoch": 0.36138763197586726, "grad_norm": 2.1964778900146484, "learning_rate": 7.247418742603015e-05, "loss": 1.3085, "step": 2396 }, { "epoch": 0.36153846153846153, "grad_norm": 1.694628119468689, "learning_rate": 7.245269670483006e-05, "loss": 0.8773, "step": 2397 }, { "epoch": 0.3616892911010558, "grad_norm": 1.494042992591858, "learning_rate": 7.243120078675959e-05, "loss": 0.6835, "step": 2398 }, { "epoch": 0.36184012066365007, "grad_norm": 1.4933854341506958, "learning_rate": 7.240969967679417e-05, "loss": 0.5897, "step": 2399 }, { "epoch": 0.36199095022624433, "grad_norm": 1.9709001779556274, "learning_rate": 7.23881933799104e-05, "loss": 1.0437, "step": 2400 }, { "epoch": 0.3621417797888386, "grad_norm": 2.494091510772705, "learning_rate": 7.236668190108613e-05, "loss": 1.9258, "step": 2401 }, { "epoch": 0.36229260935143287, "grad_norm": 2.1404917240142822, "learning_rate": 7.234516524530033e-05, "loss": 1.2575, "step": 2402 }, { "epoch": 0.36244343891402714, "grad_norm": 2.0850231647491455, "learning_rate": 7.232364341753323e-05, "loss": 1.4039, "step": 2403 }, { "epoch": 0.3625942684766214, "grad_norm": 2.0382814407348633, "learning_rate": 7.230211642276628e-05, "loss": 1.3796, "step": 2404 }, { "epoch": 0.3627450980392157, "grad_norm": 1.986515998840332, "learning_rate": 7.228058426598205e-05, "loss": 1.4826, "step": 2405 }, { "epoch": 0.36289592760180994, "grad_norm": 2.18184494972229, "learning_rate": 7.225904695216437e-05, "loss": 1.4701, "step": 2406 }, { "epoch": 0.3630467571644042, "grad_norm": 1.8596422672271729, "learning_rate": 7.22375044862982e-05, "loss": 1.1293, "step": 2407 }, { "epoch": 0.3631975867269985, "grad_norm": 2.2695066928863525, "learning_rate": 7.221595687336975e-05, "loss": 1.4897, "step": 2408 }, { "epoch": 0.36334841628959275, "grad_norm": 2.024878978729248, "learning_rate": 7.219440411836642e-05, "loss": 1.4027, "step": 2409 }, { "epoch": 0.363499245852187, "grad_norm": 1.8025548458099365, "learning_rate": 7.217284622627674e-05, "loss": 1.1206, "step": 2410 }, { "epoch": 0.3636500754147813, "grad_norm": 2.056196928024292, "learning_rate": 7.215128320209052e-05, "loss": 1.0985, "step": 2411 }, { "epoch": 0.36380090497737555, "grad_norm": 2.561940908432007, "learning_rate": 7.212971505079867e-05, "loss": 1.4534, "step": 2412 }, { "epoch": 0.3639517345399698, "grad_norm": 1.731815218925476, "learning_rate": 7.210814177739333e-05, "loss": 1.0704, "step": 2413 }, { "epoch": 0.3641025641025641, "grad_norm": 2.2688865661621094, "learning_rate": 7.208656338686784e-05, "loss": 1.5761, "step": 2414 }, { "epoch": 0.36425339366515835, "grad_norm": 1.9325155019760132, "learning_rate": 7.206497988421668e-05, "loss": 1.0377, "step": 2415 }, { "epoch": 0.3644042232277526, "grad_norm": 2.1206870079040527, "learning_rate": 7.204339127443556e-05, "loss": 1.3525, "step": 2416 }, { "epoch": 0.3645550527903469, "grad_norm": 2.0337514877319336, "learning_rate": 7.202179756252132e-05, "loss": 1.3316, "step": 2417 }, { "epoch": 0.36470588235294116, "grad_norm": 1.5754536390304565, "learning_rate": 7.200019875347206e-05, "loss": 0.8318, "step": 2418 }, { "epoch": 0.3648567119155354, "grad_norm": 2.075679063796997, "learning_rate": 7.197859485228696e-05, "loss": 1.2904, "step": 2419 }, { "epoch": 0.3650075414781297, "grad_norm": 2.0816526412963867, "learning_rate": 7.195698586396646e-05, "loss": 1.1519, "step": 2420 }, { "epoch": 0.36515837104072396, "grad_norm": 1.9093371629714966, "learning_rate": 7.193537179351212e-05, "loss": 1.2248, "step": 2421 }, { "epoch": 0.36530920060331823, "grad_norm": 1.8182028532028198, "learning_rate": 7.191375264592673e-05, "loss": 1.1027, "step": 2422 }, { "epoch": 0.3654600301659125, "grad_norm": 1.8110241889953613, "learning_rate": 7.189212842621423e-05, "loss": 1.1933, "step": 2423 }, { "epoch": 0.36561085972850677, "grad_norm": 1.8070435523986816, "learning_rate": 7.18704991393797e-05, "loss": 1.2807, "step": 2424 }, { "epoch": 0.36576168929110103, "grad_norm": 1.7439028024673462, "learning_rate": 7.184886479042945e-05, "loss": 0.9579, "step": 2425 }, { "epoch": 0.3659125188536953, "grad_norm": 1.84995698928833, "learning_rate": 7.182722538437094e-05, "loss": 1.0076, "step": 2426 }, { "epoch": 0.36606334841628957, "grad_norm": 2.1285464763641357, "learning_rate": 7.18055809262128e-05, "loss": 1.4051, "step": 2427 }, { "epoch": 0.36621417797888384, "grad_norm": 1.8858191967010498, "learning_rate": 7.17839314209648e-05, "loss": 1.1843, "step": 2428 }, { "epoch": 0.3663650075414781, "grad_norm": 1.8740215301513672, "learning_rate": 7.176227687363792e-05, "loss": 1.1789, "step": 2429 }, { "epoch": 0.3665158371040724, "grad_norm": 1.977115511894226, "learning_rate": 7.174061728924428e-05, "loss": 1.12, "step": 2430 }, { "epoch": 0.36666666666666664, "grad_norm": 2.0389347076416016, "learning_rate": 7.171895267279721e-05, "loss": 1.3112, "step": 2431 }, { "epoch": 0.3668174962292609, "grad_norm": 1.997484564781189, "learning_rate": 7.169728302931116e-05, "loss": 1.2487, "step": 2432 }, { "epoch": 0.3669683257918552, "grad_norm": 2.012660026550293, "learning_rate": 7.167560836380173e-05, "loss": 1.2786, "step": 2433 }, { "epoch": 0.36711915535444944, "grad_norm": 2.2063143253326416, "learning_rate": 7.165392868128574e-05, "loss": 1.4139, "step": 2434 }, { "epoch": 0.36726998491704377, "grad_norm": 1.9184118509292603, "learning_rate": 7.163224398678112e-05, "loss": 0.9774, "step": 2435 }, { "epoch": 0.36742081447963804, "grad_norm": 2.046160936355591, "learning_rate": 7.161055428530697e-05, "loss": 1.1429, "step": 2436 }, { "epoch": 0.3675716440422323, "grad_norm": 2.0970704555511475, "learning_rate": 7.158885958188361e-05, "loss": 1.3126, "step": 2437 }, { "epoch": 0.36772247360482657, "grad_norm": 2.0939252376556396, "learning_rate": 7.156715988153241e-05, "loss": 1.417, "step": 2438 }, { "epoch": 0.36787330316742084, "grad_norm": 1.9063082933425903, "learning_rate": 7.154545518927598e-05, "loss": 1.1905, "step": 2439 }, { "epoch": 0.3680241327300151, "grad_norm": 2.2680747509002686, "learning_rate": 7.152374551013804e-05, "loss": 1.4692, "step": 2440 }, { "epoch": 0.3681749622926094, "grad_norm": 1.9095778465270996, "learning_rate": 7.15020308491435e-05, "loss": 0.98, "step": 2441 }, { "epoch": 0.36832579185520364, "grad_norm": 1.9860514402389526, "learning_rate": 7.148031121131842e-05, "loss": 1.0191, "step": 2442 }, { "epoch": 0.3684766214177979, "grad_norm": 1.8829677104949951, "learning_rate": 7.145858660168996e-05, "loss": 0.9317, "step": 2443 }, { "epoch": 0.3686274509803922, "grad_norm": 2.1206109523773193, "learning_rate": 7.143685702528648e-05, "loss": 1.0848, "step": 2444 }, { "epoch": 0.36877828054298645, "grad_norm": 1.6453744173049927, "learning_rate": 7.14151224871375e-05, "loss": 0.7027, "step": 2445 }, { "epoch": 0.3689291101055807, "grad_norm": 2.1705117225646973, "learning_rate": 7.139338299227365e-05, "loss": 1.3364, "step": 2446 }, { "epoch": 0.369079939668175, "grad_norm": 1.9286288022994995, "learning_rate": 7.137163854572672e-05, "loss": 0.8883, "step": 2447 }, { "epoch": 0.36923076923076925, "grad_norm": 1.9306164979934692, "learning_rate": 7.134988915252965e-05, "loss": 1.1343, "step": 2448 }, { "epoch": 0.3693815987933635, "grad_norm": 1.6540474891662598, "learning_rate": 7.132813481771651e-05, "loss": 0.6977, "step": 2449 }, { "epoch": 0.3695324283559578, "grad_norm": 1.9735183715820312, "learning_rate": 7.130637554632258e-05, "loss": 0.9893, "step": 2450 }, { "epoch": 0.36968325791855206, "grad_norm": 2.488708972930908, "learning_rate": 7.12846113433842e-05, "loss": 1.857, "step": 2451 }, { "epoch": 0.3698340874811463, "grad_norm": 2.3507988452911377, "learning_rate": 7.126284221393886e-05, "loss": 1.7916, "step": 2452 }, { "epoch": 0.3699849170437406, "grad_norm": 2.445390224456787, "learning_rate": 7.124106816302524e-05, "loss": 1.7084, "step": 2453 }, { "epoch": 0.37013574660633486, "grad_norm": 2.331740379333496, "learning_rate": 7.121928919568313e-05, "loss": 1.9012, "step": 2454 }, { "epoch": 0.3702865761689291, "grad_norm": 1.95292329788208, "learning_rate": 7.119750531695344e-05, "loss": 1.2013, "step": 2455 }, { "epoch": 0.3704374057315234, "grad_norm": 1.9887620210647583, "learning_rate": 7.117571653187827e-05, "loss": 1.1254, "step": 2456 }, { "epoch": 0.37058823529411766, "grad_norm": 1.8293858766555786, "learning_rate": 7.11539228455008e-05, "loss": 1.1908, "step": 2457 }, { "epoch": 0.37073906485671193, "grad_norm": 2.0632123947143555, "learning_rate": 7.113212426286537e-05, "loss": 1.3997, "step": 2458 }, { "epoch": 0.3708898944193062, "grad_norm": 2.0067272186279297, "learning_rate": 7.111032078901745e-05, "loss": 1.2658, "step": 2459 }, { "epoch": 0.37104072398190047, "grad_norm": 1.7523579597473145, "learning_rate": 7.108851242900366e-05, "loss": 1.2048, "step": 2460 }, { "epoch": 0.37119155354449473, "grad_norm": 2.0724599361419678, "learning_rate": 7.10666991878717e-05, "loss": 1.3201, "step": 2461 }, { "epoch": 0.371342383107089, "grad_norm": 1.9547722339630127, "learning_rate": 7.104488107067045e-05, "loss": 1.3133, "step": 2462 }, { "epoch": 0.37149321266968327, "grad_norm": 1.6978693008422852, "learning_rate": 7.102305808244989e-05, "loss": 0.9956, "step": 2463 }, { "epoch": 0.37164404223227754, "grad_norm": 2.1613686084747314, "learning_rate": 7.100123022826116e-05, "loss": 1.3826, "step": 2464 }, { "epoch": 0.3717948717948718, "grad_norm": 1.923836588859558, "learning_rate": 7.09793975131565e-05, "loss": 0.9651, "step": 2465 }, { "epoch": 0.3719457013574661, "grad_norm": 1.6593579053878784, "learning_rate": 7.095755994218929e-05, "loss": 0.8778, "step": 2466 }, { "epoch": 0.37209653092006034, "grad_norm": 1.865933895111084, "learning_rate": 7.093571752041399e-05, "loss": 1.0086, "step": 2467 }, { "epoch": 0.3722473604826546, "grad_norm": 2.0929365158081055, "learning_rate": 7.091387025288622e-05, "loss": 1.4938, "step": 2468 }, { "epoch": 0.3723981900452489, "grad_norm": 2.077238082885742, "learning_rate": 7.089201814466276e-05, "loss": 1.3839, "step": 2469 }, { "epoch": 0.37254901960784315, "grad_norm": 1.708426833152771, "learning_rate": 7.087016120080145e-05, "loss": 0.9823, "step": 2470 }, { "epoch": 0.3726998491704374, "grad_norm": 2.0948245525360107, "learning_rate": 7.084829942636124e-05, "loss": 1.139, "step": 2471 }, { "epoch": 0.3728506787330317, "grad_norm": 2.181133985519409, "learning_rate": 7.082643282640227e-05, "loss": 1.3865, "step": 2472 }, { "epoch": 0.37300150829562595, "grad_norm": 2.093658208847046, "learning_rate": 7.080456140598571e-05, "loss": 1.3535, "step": 2473 }, { "epoch": 0.3731523378582202, "grad_norm": 2.137423515319824, "learning_rate": 7.078268517017393e-05, "loss": 1.1888, "step": 2474 }, { "epoch": 0.3733031674208145, "grad_norm": 2.286235809326172, "learning_rate": 7.076080412403035e-05, "loss": 1.3886, "step": 2475 }, { "epoch": 0.37345399698340875, "grad_norm": 1.8436098098754883, "learning_rate": 7.073891827261952e-05, "loss": 1.2031, "step": 2476 }, { "epoch": 0.373604826546003, "grad_norm": 2.0970165729522705, "learning_rate": 7.071702762100713e-05, "loss": 1.318, "step": 2477 }, { "epoch": 0.3737556561085973, "grad_norm": 2.010972023010254, "learning_rate": 7.069513217425995e-05, "loss": 1.3482, "step": 2478 }, { "epoch": 0.37390648567119156, "grad_norm": 1.9952596426010132, "learning_rate": 7.067323193744586e-05, "loss": 1.0355, "step": 2479 }, { "epoch": 0.3740573152337858, "grad_norm": 2.0795116424560547, "learning_rate": 7.065132691563388e-05, "loss": 1.3991, "step": 2480 }, { "epoch": 0.3742081447963801, "grad_norm": 2.2211523056030273, "learning_rate": 7.06294171138941e-05, "loss": 1.3215, "step": 2481 }, { "epoch": 0.37435897435897436, "grad_norm": 2.4253602027893066, "learning_rate": 7.060750253729774e-05, "loss": 1.6236, "step": 2482 }, { "epoch": 0.37450980392156863, "grad_norm": 1.8192031383514404, "learning_rate": 7.058558319091712e-05, "loss": 1.0421, "step": 2483 }, { "epoch": 0.3746606334841629, "grad_norm": 1.946062684059143, "learning_rate": 7.056365907982568e-05, "loss": 1.3732, "step": 2484 }, { "epoch": 0.37481146304675717, "grad_norm": 2.1307430267333984, "learning_rate": 7.054173020909789e-05, "loss": 1.4839, "step": 2485 }, { "epoch": 0.37496229260935143, "grad_norm": 1.798385739326477, "learning_rate": 7.051979658380942e-05, "loss": 0.8582, "step": 2486 }, { "epoch": 0.3751131221719457, "grad_norm": 1.934238314628601, "learning_rate": 7.049785820903698e-05, "loss": 1.1522, "step": 2487 }, { "epoch": 0.37526395173453997, "grad_norm": 2.1408348083496094, "learning_rate": 7.047591508985841e-05, "loss": 1.3254, "step": 2488 }, { "epoch": 0.37541478129713424, "grad_norm": 1.807942509651184, "learning_rate": 7.045396723135263e-05, "loss": 1.0359, "step": 2489 }, { "epoch": 0.3755656108597285, "grad_norm": 1.9899126291275024, "learning_rate": 7.043201463859964e-05, "loss": 1.115, "step": 2490 }, { "epoch": 0.3757164404223228, "grad_norm": 2.1206085681915283, "learning_rate": 7.041005731668057e-05, "loss": 1.1937, "step": 2491 }, { "epoch": 0.37586726998491704, "grad_norm": 1.9888256788253784, "learning_rate": 7.038809527067766e-05, "loss": 1.2277, "step": 2492 }, { "epoch": 0.3760180995475113, "grad_norm": 1.901181697845459, "learning_rate": 7.036612850567418e-05, "loss": 0.9946, "step": 2493 }, { "epoch": 0.3761689291101056, "grad_norm": 2.1443917751312256, "learning_rate": 7.034415702675454e-05, "loss": 1.0734, "step": 2494 }, { "epoch": 0.37631975867269984, "grad_norm": 2.2028348445892334, "learning_rate": 7.032218083900423e-05, "loss": 1.1708, "step": 2495 }, { "epoch": 0.3764705882352941, "grad_norm": 2.0165109634399414, "learning_rate": 7.030019994750983e-05, "loss": 1.0821, "step": 2496 }, { "epoch": 0.3766214177978884, "grad_norm": 1.7434908151626587, "learning_rate": 7.027821435735898e-05, "loss": 0.8385, "step": 2497 }, { "epoch": 0.37677224736048265, "grad_norm": 2.106576919555664, "learning_rate": 7.025622407364049e-05, "loss": 1.0926, "step": 2498 }, { "epoch": 0.3769230769230769, "grad_norm": 2.277604579925537, "learning_rate": 7.023422910144413e-05, "loss": 1.1298, "step": 2499 }, { "epoch": 0.3770739064856712, "grad_norm": 1.9283312559127808, "learning_rate": 7.021222944586088e-05, "loss": 1.0183, "step": 2500 }, { "epoch": 0.37722473604826545, "grad_norm": 2.3869149684906006, "learning_rate": 7.019022511198274e-05, "loss": 1.7024, "step": 2501 }, { "epoch": 0.3773755656108597, "grad_norm": 2.338660478591919, "learning_rate": 7.01682161049028e-05, "loss": 1.5961, "step": 2502 }, { "epoch": 0.377526395173454, "grad_norm": 2.5335779190063477, "learning_rate": 7.014620242971522e-05, "loss": 1.665, "step": 2503 }, { "epoch": 0.37767722473604826, "grad_norm": 2.112314224243164, "learning_rate": 7.012418409151526e-05, "loss": 1.597, "step": 2504 }, { "epoch": 0.3778280542986425, "grad_norm": 1.993316888809204, "learning_rate": 7.010216109539927e-05, "loss": 1.3646, "step": 2505 }, { "epoch": 0.3779788838612368, "grad_norm": 1.8795030117034912, "learning_rate": 7.008013344646465e-05, "loss": 1.3316, "step": 2506 }, { "epoch": 0.37812971342383106, "grad_norm": 1.7489924430847168, "learning_rate": 7.005810114980991e-05, "loss": 1.0525, "step": 2507 }, { "epoch": 0.3782805429864253, "grad_norm": 1.8315353393554688, "learning_rate": 7.003606421053457e-05, "loss": 0.9918, "step": 2508 }, { "epoch": 0.3784313725490196, "grad_norm": 2.012483596801758, "learning_rate": 7.001402263373931e-05, "loss": 1.364, "step": 2509 }, { "epoch": 0.37858220211161386, "grad_norm": 1.9914672374725342, "learning_rate": 6.999197642452583e-05, "loss": 1.3511, "step": 2510 }, { "epoch": 0.37873303167420813, "grad_norm": 1.7541953325271606, "learning_rate": 6.99699255879969e-05, "loss": 1.0867, "step": 2511 }, { "epoch": 0.3788838612368024, "grad_norm": 1.7081209421157837, "learning_rate": 6.994787012925643e-05, "loss": 1.0199, "step": 2512 }, { "epoch": 0.37903469079939667, "grad_norm": 1.8453751802444458, "learning_rate": 6.992581005340928e-05, "loss": 1.1746, "step": 2513 }, { "epoch": 0.37918552036199094, "grad_norm": 1.8641068935394287, "learning_rate": 6.990374536556148e-05, "loss": 1.1222, "step": 2514 }, { "epoch": 0.3793363499245852, "grad_norm": 1.974021553993225, "learning_rate": 6.988167607082009e-05, "loss": 1.4436, "step": 2515 }, { "epoch": 0.37948717948717947, "grad_norm": 1.8714640140533447, "learning_rate": 6.985960217429323e-05, "loss": 1.1841, "step": 2516 }, { "epoch": 0.37963800904977374, "grad_norm": 1.965423345565796, "learning_rate": 6.98375236810901e-05, "loss": 1.1896, "step": 2517 }, { "epoch": 0.379788838612368, "grad_norm": 1.8604180812835693, "learning_rate": 6.981544059632095e-05, "loss": 1.0987, "step": 2518 }, { "epoch": 0.3799396681749623, "grad_norm": 1.950122594833374, "learning_rate": 6.97933529250971e-05, "loss": 1.3379, "step": 2519 }, { "epoch": 0.38009049773755654, "grad_norm": 2.0621063709259033, "learning_rate": 6.977126067253096e-05, "loss": 1.4844, "step": 2520 }, { "epoch": 0.3802413273001508, "grad_norm": 2.0624144077301025, "learning_rate": 6.974916384373594e-05, "loss": 1.2969, "step": 2521 }, { "epoch": 0.3803921568627451, "grad_norm": 1.6318641901016235, "learning_rate": 6.972706244382655e-05, "loss": 0.9229, "step": 2522 }, { "epoch": 0.38054298642533935, "grad_norm": 1.674439549446106, "learning_rate": 6.970495647791836e-05, "loss": 0.9989, "step": 2523 }, { "epoch": 0.3806938159879336, "grad_norm": 1.9600105285644531, "learning_rate": 6.968284595112797e-05, "loss": 1.1959, "step": 2524 }, { "epoch": 0.3808446455505279, "grad_norm": 1.8055658340454102, "learning_rate": 6.966073086857305e-05, "loss": 0.9809, "step": 2525 }, { "epoch": 0.38099547511312215, "grad_norm": 1.8366382122039795, "learning_rate": 6.963861123537236e-05, "loss": 1.0629, "step": 2526 }, { "epoch": 0.3811463046757164, "grad_norm": 2.0379841327667236, "learning_rate": 6.961648705664566e-05, "loss": 1.3393, "step": 2527 }, { "epoch": 0.3812971342383107, "grad_norm": 2.186140298843384, "learning_rate": 6.959435833751375e-05, "loss": 1.3574, "step": 2528 }, { "epoch": 0.38144796380090495, "grad_norm": 1.7631288766860962, "learning_rate": 6.957222508309857e-05, "loss": 1.0774, "step": 2529 }, { "epoch": 0.3815987933634992, "grad_norm": 2.0925354957580566, "learning_rate": 6.955008729852301e-05, "loss": 1.1192, "step": 2530 }, { "epoch": 0.3817496229260935, "grad_norm": 2.0266642570495605, "learning_rate": 6.952794498891107e-05, "loss": 1.148, "step": 2531 }, { "epoch": 0.38190045248868776, "grad_norm": 1.8688682317733765, "learning_rate": 6.950579815938778e-05, "loss": 1.1411, "step": 2532 }, { "epoch": 0.382051282051282, "grad_norm": 1.8874269723892212, "learning_rate": 6.948364681507921e-05, "loss": 1.2611, "step": 2533 }, { "epoch": 0.3822021116138763, "grad_norm": 2.359656810760498, "learning_rate": 6.946149096111247e-05, "loss": 1.4742, "step": 2534 }, { "epoch": 0.38235294117647056, "grad_norm": 1.8739774227142334, "learning_rate": 6.943933060261576e-05, "loss": 1.0494, "step": 2535 }, { "epoch": 0.38250377073906483, "grad_norm": 1.8639079332351685, "learning_rate": 6.941716574471823e-05, "loss": 1.0827, "step": 2536 }, { "epoch": 0.3826546003016591, "grad_norm": 2.567532777786255, "learning_rate": 6.939499639255017e-05, "loss": 1.9228, "step": 2537 }, { "epoch": 0.38280542986425337, "grad_norm": 2.1746270656585693, "learning_rate": 6.937282255124286e-05, "loss": 1.2794, "step": 2538 }, { "epoch": 0.3829562594268477, "grad_norm": 2.408900022506714, "learning_rate": 6.93506442259286e-05, "loss": 1.6125, "step": 2539 }, { "epoch": 0.38310708898944196, "grad_norm": 1.7325302362442017, "learning_rate": 6.93284614217408e-05, "loss": 0.9016, "step": 2540 }, { "epoch": 0.3832579185520362, "grad_norm": 2.0310189723968506, "learning_rate": 6.930627414381383e-05, "loss": 1.1201, "step": 2541 }, { "epoch": 0.3834087481146305, "grad_norm": 1.9837300777435303, "learning_rate": 6.928408239728314e-05, "loss": 1.1996, "step": 2542 }, { "epoch": 0.38355957767722476, "grad_norm": 1.6560924053192139, "learning_rate": 6.926188618728519e-05, "loss": 0.7987, "step": 2543 }, { "epoch": 0.38371040723981903, "grad_norm": 2.4077885150909424, "learning_rate": 6.923968551895747e-05, "loss": 1.3757, "step": 2544 }, { "epoch": 0.3838612368024133, "grad_norm": 2.040168285369873, "learning_rate": 6.921748039743858e-05, "loss": 1.0227, "step": 2545 }, { "epoch": 0.38401206636500756, "grad_norm": 2.055284261703491, "learning_rate": 6.919527082786802e-05, "loss": 0.8139, "step": 2546 }, { "epoch": 0.38416289592760183, "grad_norm": 1.5688884258270264, "learning_rate": 6.91730568153864e-05, "loss": 0.6564, "step": 2547 }, { "epoch": 0.3843137254901961, "grad_norm": 2.030174732208252, "learning_rate": 6.915083836513536e-05, "loss": 0.8854, "step": 2548 }, { "epoch": 0.38446455505279037, "grad_norm": 1.7057172060012817, "learning_rate": 6.912861548225755e-05, "loss": 0.6599, "step": 2549 }, { "epoch": 0.38461538461538464, "grad_norm": 2.113065004348755, "learning_rate": 6.910638817189666e-05, "loss": 1.0981, "step": 2550 }, { "epoch": 0.3847662141779789, "grad_norm": 2.386643648147583, "learning_rate": 6.908415643919736e-05, "loss": 1.7435, "step": 2551 }, { "epoch": 0.3849170437405732, "grad_norm": 2.4367661476135254, "learning_rate": 6.90619202893054e-05, "loss": 1.6021, "step": 2552 }, { "epoch": 0.38506787330316744, "grad_norm": 1.8641273975372314, "learning_rate": 6.903967972736752e-05, "loss": 1.044, "step": 2553 }, { "epoch": 0.3852187028657617, "grad_norm": 2.1890385150909424, "learning_rate": 6.90174347585315e-05, "loss": 1.4597, "step": 2554 }, { "epoch": 0.385369532428356, "grad_norm": 2.3145968914031982, "learning_rate": 6.899518538794611e-05, "loss": 1.6921, "step": 2555 }, { "epoch": 0.38552036199095024, "grad_norm": 2.27569317817688, "learning_rate": 6.897293162076118e-05, "loss": 1.2726, "step": 2556 }, { "epoch": 0.3856711915535445, "grad_norm": 2.014270544052124, "learning_rate": 6.895067346212752e-05, "loss": 1.266, "step": 2557 }, { "epoch": 0.3858220211161388, "grad_norm": 2.11973237991333, "learning_rate": 6.8928410917197e-05, "loss": 1.3344, "step": 2558 }, { "epoch": 0.38597285067873305, "grad_norm": 2.055311679840088, "learning_rate": 6.890614399112245e-05, "loss": 1.3266, "step": 2559 }, { "epoch": 0.3861236802413273, "grad_norm": 2.0286660194396973, "learning_rate": 6.888387268905773e-05, "loss": 1.1978, "step": 2560 }, { "epoch": 0.3862745098039216, "grad_norm": 1.9377142190933228, "learning_rate": 6.886159701615777e-05, "loss": 1.2293, "step": 2561 }, { "epoch": 0.38642533936651585, "grad_norm": 2.1626415252685547, "learning_rate": 6.883931697757844e-05, "loss": 1.371, "step": 2562 }, { "epoch": 0.3865761689291101, "grad_norm": 2.0159502029418945, "learning_rate": 6.881703257847665e-05, "loss": 1.1708, "step": 2563 }, { "epoch": 0.3867269984917044, "grad_norm": 1.7816298007965088, "learning_rate": 6.879474382401031e-05, "loss": 0.9514, "step": 2564 }, { "epoch": 0.38687782805429866, "grad_norm": 2.2429261207580566, "learning_rate": 6.877245071933836e-05, "loss": 1.4573, "step": 2565 }, { "epoch": 0.3870286576168929, "grad_norm": 1.9230639934539795, "learning_rate": 6.875015326962071e-05, "loss": 1.1363, "step": 2566 }, { "epoch": 0.3871794871794872, "grad_norm": 2.0780699253082275, "learning_rate": 6.872785148001831e-05, "loss": 1.3524, "step": 2567 }, { "epoch": 0.38733031674208146, "grad_norm": 1.9140557050704956, "learning_rate": 6.870554535569311e-05, "loss": 1.1538, "step": 2568 }, { "epoch": 0.3874811463046757, "grad_norm": 1.9624210596084595, "learning_rate": 6.868323490180804e-05, "loss": 1.4554, "step": 2569 }, { "epoch": 0.38763197586727, "grad_norm": 2.1547279357910156, "learning_rate": 6.866092012352706e-05, "loss": 1.3239, "step": 2570 }, { "epoch": 0.38778280542986426, "grad_norm": 2.010479211807251, "learning_rate": 6.86386010260151e-05, "loss": 0.9221, "step": 2571 }, { "epoch": 0.38793363499245853, "grad_norm": 1.9042800664901733, "learning_rate": 6.861627761443813e-05, "loss": 1.155, "step": 2572 }, { "epoch": 0.3880844645550528, "grad_norm": 1.9643280506134033, "learning_rate": 6.859394989396307e-05, "loss": 1.1617, "step": 2573 }, { "epoch": 0.38823529411764707, "grad_norm": 1.8143097162246704, "learning_rate": 6.857161786975788e-05, "loss": 1.0259, "step": 2574 }, { "epoch": 0.38838612368024134, "grad_norm": 1.9691681861877441, "learning_rate": 6.854928154699151e-05, "loss": 1.3832, "step": 2575 }, { "epoch": 0.3885369532428356, "grad_norm": 1.5354695320129395, "learning_rate": 6.852694093083389e-05, "loss": 0.8642, "step": 2576 }, { "epoch": 0.38868778280542987, "grad_norm": 1.8112379312515259, "learning_rate": 6.850459602645594e-05, "loss": 1.1473, "step": 2577 }, { "epoch": 0.38883861236802414, "grad_norm": 2.2512474060058594, "learning_rate": 6.848224683902956e-05, "loss": 1.3467, "step": 2578 }, { "epoch": 0.3889894419306184, "grad_norm": 1.9121710062026978, "learning_rate": 6.845989337372769e-05, "loss": 1.2293, "step": 2579 }, { "epoch": 0.3891402714932127, "grad_norm": 2.1618075370788574, "learning_rate": 6.843753563572423e-05, "loss": 1.2493, "step": 2580 }, { "epoch": 0.38929110105580694, "grad_norm": 1.9280670881271362, "learning_rate": 6.841517363019407e-05, "loss": 1.3629, "step": 2581 }, { "epoch": 0.3894419306184012, "grad_norm": 1.8057018518447876, "learning_rate": 6.83928073623131e-05, "loss": 1.0205, "step": 2582 }, { "epoch": 0.3895927601809955, "grad_norm": 1.9001094102859497, "learning_rate": 6.837043683725815e-05, "loss": 1.1912, "step": 2583 }, { "epoch": 0.38974358974358975, "grad_norm": 1.621059775352478, "learning_rate": 6.834806206020712e-05, "loss": 0.8056, "step": 2584 }, { "epoch": 0.389894419306184, "grad_norm": 2.3446195125579834, "learning_rate": 6.83256830363388e-05, "loss": 1.4065, "step": 2585 }, { "epoch": 0.3900452488687783, "grad_norm": 1.9667116403579712, "learning_rate": 6.830329977083303e-05, "loss": 1.2265, "step": 2586 }, { "epoch": 0.39019607843137255, "grad_norm": 2.032541275024414, "learning_rate": 6.828091226887061e-05, "loss": 1.1119, "step": 2587 }, { "epoch": 0.3903469079939668, "grad_norm": 1.996535301208496, "learning_rate": 6.825852053563331e-05, "loss": 1.1462, "step": 2588 }, { "epoch": 0.3904977375565611, "grad_norm": 2.2289507389068604, "learning_rate": 6.82361245763039e-05, "loss": 1.2264, "step": 2589 }, { "epoch": 0.39064856711915535, "grad_norm": 2.1390058994293213, "learning_rate": 6.821372439606613e-05, "loss": 1.1398, "step": 2590 }, { "epoch": 0.3907993966817496, "grad_norm": 1.870766043663025, "learning_rate": 6.819132000010469e-05, "loss": 1.111, "step": 2591 }, { "epoch": 0.3909502262443439, "grad_norm": 2.0597574710845947, "learning_rate": 6.816891139360528e-05, "loss": 1.087, "step": 2592 }, { "epoch": 0.39110105580693816, "grad_norm": 2.145270824432373, "learning_rate": 6.814649858175457e-05, "loss": 1.5399, "step": 2593 }, { "epoch": 0.3912518853695324, "grad_norm": 2.1077022552490234, "learning_rate": 6.812408156974018e-05, "loss": 1.2244, "step": 2594 }, { "epoch": 0.3914027149321267, "grad_norm": 1.4308724403381348, "learning_rate": 6.810166036275075e-05, "loss": 0.6696, "step": 2595 }, { "epoch": 0.39155354449472096, "grad_norm": 1.8217495679855347, "learning_rate": 6.807923496597584e-05, "loss": 0.9506, "step": 2596 }, { "epoch": 0.39170437405731523, "grad_norm": 1.6813448667526245, "learning_rate": 6.805680538460601e-05, "loss": 0.8474, "step": 2597 }, { "epoch": 0.3918552036199095, "grad_norm": 1.7697834968566895, "learning_rate": 6.803437162383279e-05, "loss": 0.7625, "step": 2598 }, { "epoch": 0.39200603318250377, "grad_norm": 2.166346311569214, "learning_rate": 6.801193368884866e-05, "loss": 1.1464, "step": 2599 }, { "epoch": 0.39215686274509803, "grad_norm": 1.8601603507995605, "learning_rate": 6.798949158484705e-05, "loss": 1.0113, "step": 2600 }, { "epoch": 0.3923076923076923, "grad_norm": 2.014188051223755, "learning_rate": 6.796704531702243e-05, "loss": 1.3546, "step": 2601 }, { "epoch": 0.39245852187028657, "grad_norm": 1.7226791381835938, "learning_rate": 6.794459489057013e-05, "loss": 0.7979, "step": 2602 }, { "epoch": 0.39260935143288084, "grad_norm": 2.0911614894866943, "learning_rate": 6.792214031068652e-05, "loss": 1.2459, "step": 2603 }, { "epoch": 0.3927601809954751, "grad_norm": 1.9533742666244507, "learning_rate": 6.78996815825689e-05, "loss": 1.3554, "step": 2604 }, { "epoch": 0.3929110105580694, "grad_norm": 2.059835195541382, "learning_rate": 6.787721871141554e-05, "loss": 1.274, "step": 2605 }, { "epoch": 0.39306184012066364, "grad_norm": 1.8437566757202148, "learning_rate": 6.785475170242568e-05, "loss": 1.1178, "step": 2606 }, { "epoch": 0.3932126696832579, "grad_norm": 1.8338737487792969, "learning_rate": 6.783228056079947e-05, "loss": 0.858, "step": 2607 }, { "epoch": 0.3933634992458522, "grad_norm": 1.8517431020736694, "learning_rate": 6.780980529173807e-05, "loss": 1.1378, "step": 2608 }, { "epoch": 0.39351432880844645, "grad_norm": 2.0894675254821777, "learning_rate": 6.778732590044357e-05, "loss": 1.3109, "step": 2609 }, { "epoch": 0.3936651583710407, "grad_norm": 2.120586633682251, "learning_rate": 6.776484239211904e-05, "loss": 1.3928, "step": 2610 }, { "epoch": 0.393815987933635, "grad_norm": 2.257617235183716, "learning_rate": 6.774235477196844e-05, "loss": 1.579, "step": 2611 }, { "epoch": 0.39396681749622925, "grad_norm": 1.9821752309799194, "learning_rate": 6.771986304519676e-05, "loss": 1.1906, "step": 2612 }, { "epoch": 0.3941176470588235, "grad_norm": 1.9296094179153442, "learning_rate": 6.769736721700989e-05, "loss": 1.2769, "step": 2613 }, { "epoch": 0.3942684766214178, "grad_norm": 2.1100258827209473, "learning_rate": 6.767486729261467e-05, "loss": 1.3991, "step": 2614 }, { "epoch": 0.39441930618401205, "grad_norm": 1.989810585975647, "learning_rate": 6.765236327721894e-05, "loss": 1.2577, "step": 2615 }, { "epoch": 0.3945701357466063, "grad_norm": 1.8405725955963135, "learning_rate": 6.762985517603142e-05, "loss": 1.1603, "step": 2616 }, { "epoch": 0.3947209653092006, "grad_norm": 2.02946400642395, "learning_rate": 6.76073429942618e-05, "loss": 1.3146, "step": 2617 }, { "epoch": 0.39487179487179486, "grad_norm": 1.7022359371185303, "learning_rate": 6.758482673712072e-05, "loss": 0.8331, "step": 2618 }, { "epoch": 0.3950226244343891, "grad_norm": 2.3115081787109375, "learning_rate": 6.756230640981979e-05, "loss": 1.3079, "step": 2619 }, { "epoch": 0.3951734539969834, "grad_norm": 2.02268385887146, "learning_rate": 6.75397820175715e-05, "loss": 1.4851, "step": 2620 }, { "epoch": 0.39532428355957766, "grad_norm": 1.7962958812713623, "learning_rate": 6.751725356558934e-05, "loss": 1.1028, "step": 2621 }, { "epoch": 0.39547511312217193, "grad_norm": 1.7929502725601196, "learning_rate": 6.749472105908768e-05, "loss": 1.3278, "step": 2622 }, { "epoch": 0.3956259426847662, "grad_norm": 1.972873330116272, "learning_rate": 6.74721845032819e-05, "loss": 1.2636, "step": 2623 }, { "epoch": 0.39577677224736046, "grad_norm": 2.0597586631774902, "learning_rate": 6.744964390338829e-05, "loss": 1.3396, "step": 2624 }, { "epoch": 0.39592760180995473, "grad_norm": 1.8006325960159302, "learning_rate": 6.7427099264624e-05, "loss": 1.2595, "step": 2625 }, { "epoch": 0.396078431372549, "grad_norm": 1.7516613006591797, "learning_rate": 6.740455059220725e-05, "loss": 0.9981, "step": 2626 }, { "epoch": 0.39622926093514327, "grad_norm": 2.0178840160369873, "learning_rate": 6.73819978913571e-05, "loss": 1.2874, "step": 2627 }, { "epoch": 0.39638009049773754, "grad_norm": 2.109095811843872, "learning_rate": 6.735944116729356e-05, "loss": 1.0952, "step": 2628 }, { "epoch": 0.3965309200603318, "grad_norm": 1.851090908050537, "learning_rate": 6.733688042523759e-05, "loss": 1.1055, "step": 2629 }, { "epoch": 0.39668174962292607, "grad_norm": 2.161010265350342, "learning_rate": 6.731431567041106e-05, "loss": 1.5796, "step": 2630 }, { "epoch": 0.39683257918552034, "grad_norm": 1.7434622049331665, "learning_rate": 6.729174690803678e-05, "loss": 1.055, "step": 2631 }, { "epoch": 0.3969834087481146, "grad_norm": 2.0520665645599365, "learning_rate": 6.726917414333847e-05, "loss": 1.1205, "step": 2632 }, { "epoch": 0.3971342383107089, "grad_norm": 2.0461955070495605, "learning_rate": 6.724659738154084e-05, "loss": 1.3492, "step": 2633 }, { "epoch": 0.39728506787330314, "grad_norm": 1.9782538414001465, "learning_rate": 6.722401662786942e-05, "loss": 1.3461, "step": 2634 }, { "epoch": 0.3974358974358974, "grad_norm": 1.724668025970459, "learning_rate": 6.720143188755074e-05, "loss": 1.1688, "step": 2635 }, { "epoch": 0.3975867269984917, "grad_norm": 2.1678504943847656, "learning_rate": 6.717884316581225e-05, "loss": 1.4058, "step": 2636 }, { "epoch": 0.39773755656108595, "grad_norm": 1.9905818700790405, "learning_rate": 6.715625046788228e-05, "loss": 1.3983, "step": 2637 }, { "epoch": 0.3978883861236802, "grad_norm": 1.9268549680709839, "learning_rate": 6.713365379899012e-05, "loss": 1.0173, "step": 2638 }, { "epoch": 0.3980392156862745, "grad_norm": 1.9744762182235718, "learning_rate": 6.711105316436595e-05, "loss": 1.0152, "step": 2639 }, { "epoch": 0.39819004524886875, "grad_norm": 2.298093795776367, "learning_rate": 6.70884485692409e-05, "loss": 1.3509, "step": 2640 }, { "epoch": 0.398340874811463, "grad_norm": 1.7891029119491577, "learning_rate": 6.706584001884697e-05, "loss": 0.9567, "step": 2641 }, { "epoch": 0.39849170437405734, "grad_norm": 2.0799646377563477, "learning_rate": 6.704322751841714e-05, "loss": 1.3149, "step": 2642 }, { "epoch": 0.3986425339366516, "grad_norm": 2.288794755935669, "learning_rate": 6.702061107318525e-05, "loss": 1.0528, "step": 2643 }, { "epoch": 0.3987933634992459, "grad_norm": 2.140857219696045, "learning_rate": 6.699799068838609e-05, "loss": 1.3338, "step": 2644 }, { "epoch": 0.39894419306184015, "grad_norm": 2.408792734146118, "learning_rate": 6.69753663692553e-05, "loss": 1.3682, "step": 2645 }, { "epoch": 0.3990950226244344, "grad_norm": 2.2024271488189697, "learning_rate": 6.69527381210295e-05, "loss": 1.0513, "step": 2646 }, { "epoch": 0.3992458521870287, "grad_norm": 1.7288520336151123, "learning_rate": 6.693010594894622e-05, "loss": 0.7427, "step": 2647 }, { "epoch": 0.39939668174962295, "grad_norm": 1.8335210084915161, "learning_rate": 6.690746985824383e-05, "loss": 0.8164, "step": 2648 }, { "epoch": 0.3995475113122172, "grad_norm": 1.844871163368225, "learning_rate": 6.688482985416166e-05, "loss": 0.9322, "step": 2649 }, { "epoch": 0.3996983408748115, "grad_norm": 2.0186030864715576, "learning_rate": 6.686218594193993e-05, "loss": 1.0857, "step": 2650 }, { "epoch": 0.39984917043740575, "grad_norm": 2.322162628173828, "learning_rate": 6.683953812681979e-05, "loss": 1.6409, "step": 2651 }, { "epoch": 0.4, "grad_norm": 1.9377840757369995, "learning_rate": 6.681688641404327e-05, "loss": 1.0516, "step": 2652 }, { "epoch": 0.4001508295625943, "grad_norm": 1.855031967163086, "learning_rate": 6.679423080885327e-05, "loss": 1.0048, "step": 2653 }, { "epoch": 0.40030165912518856, "grad_norm": 2.118563413619995, "learning_rate": 6.677157131649367e-05, "loss": 1.4895, "step": 2654 }, { "epoch": 0.4004524886877828, "grad_norm": 2.202331066131592, "learning_rate": 6.674890794220916e-05, "loss": 1.4856, "step": 2655 }, { "epoch": 0.4006033182503771, "grad_norm": 2.1290743350982666, "learning_rate": 6.672624069124542e-05, "loss": 1.6974, "step": 2656 }, { "epoch": 0.40075414781297136, "grad_norm": 1.8632421493530273, "learning_rate": 6.670356956884895e-05, "loss": 1.2216, "step": 2657 }, { "epoch": 0.40090497737556563, "grad_norm": 1.5878190994262695, "learning_rate": 6.668089458026718e-05, "loss": 0.8068, "step": 2658 }, { "epoch": 0.4010558069381599, "grad_norm": 2.1977851390838623, "learning_rate": 6.665821573074843e-05, "loss": 1.5192, "step": 2659 }, { "epoch": 0.40120663650075417, "grad_norm": 2.252800464630127, "learning_rate": 6.663553302554195e-05, "loss": 1.3789, "step": 2660 }, { "epoch": 0.40135746606334843, "grad_norm": 2.0761117935180664, "learning_rate": 6.661284646989779e-05, "loss": 1.4052, "step": 2661 }, { "epoch": 0.4015082956259427, "grad_norm": 1.8737459182739258, "learning_rate": 6.6590156069067e-05, "loss": 1.2266, "step": 2662 }, { "epoch": 0.40165912518853697, "grad_norm": 1.9868550300598145, "learning_rate": 6.656746182830143e-05, "loss": 1.2074, "step": 2663 }, { "epoch": 0.40180995475113124, "grad_norm": 1.7318787574768066, "learning_rate": 6.65447637528539e-05, "loss": 1.0716, "step": 2664 }, { "epoch": 0.4019607843137255, "grad_norm": 1.6505789756774902, "learning_rate": 6.652206184797802e-05, "loss": 0.8898, "step": 2665 }, { "epoch": 0.4021116138763198, "grad_norm": 1.831339716911316, "learning_rate": 6.649935611892839e-05, "loss": 1.1156, "step": 2666 }, { "epoch": 0.40226244343891404, "grad_norm": 1.874295949935913, "learning_rate": 6.647664657096042e-05, "loss": 1.1775, "step": 2667 }, { "epoch": 0.4024132730015083, "grad_norm": 1.7088813781738281, "learning_rate": 6.645393320933046e-05, "loss": 0.9921, "step": 2668 }, { "epoch": 0.4025641025641026, "grad_norm": 1.7923787832260132, "learning_rate": 6.643121603929568e-05, "loss": 0.8583, "step": 2669 }, { "epoch": 0.40271493212669685, "grad_norm": 2.196300506591797, "learning_rate": 6.640849506611418e-05, "loss": 1.6857, "step": 2670 }, { "epoch": 0.4028657616892911, "grad_norm": 1.835006594657898, "learning_rate": 6.638577029504492e-05, "loss": 1.0712, "step": 2671 }, { "epoch": 0.4030165912518854, "grad_norm": 1.722980260848999, "learning_rate": 6.636304173134776e-05, "loss": 0.9573, "step": 2672 }, { "epoch": 0.40316742081447965, "grad_norm": 1.7671836614608765, "learning_rate": 6.63403093802834e-05, "loss": 0.8821, "step": 2673 }, { "epoch": 0.4033182503770739, "grad_norm": 2.231600046157837, "learning_rate": 6.631757324711346e-05, "loss": 1.4247, "step": 2674 }, { "epoch": 0.4034690799396682, "grad_norm": 1.7666263580322266, "learning_rate": 6.629483333710039e-05, "loss": 1.138, "step": 2675 }, { "epoch": 0.40361990950226245, "grad_norm": 1.9056721925735474, "learning_rate": 6.627208965550758e-05, "loss": 1.1367, "step": 2676 }, { "epoch": 0.4037707390648567, "grad_norm": 1.7723126411437988, "learning_rate": 6.62493422075992e-05, "loss": 1.1873, "step": 2677 }, { "epoch": 0.403921568627451, "grad_norm": 2.236968755722046, "learning_rate": 6.622659099864037e-05, "loss": 1.4738, "step": 2678 }, { "epoch": 0.40407239819004526, "grad_norm": 2.1398849487304688, "learning_rate": 6.620383603389705e-05, "loss": 1.2898, "step": 2679 }, { "epoch": 0.4042232277526395, "grad_norm": 2.0881850719451904, "learning_rate": 6.618107731863608e-05, "loss": 1.2645, "step": 2680 }, { "epoch": 0.4043740573152338, "grad_norm": 1.9718806743621826, "learning_rate": 6.615831485812515e-05, "loss": 1.2715, "step": 2681 }, { "epoch": 0.40452488687782806, "grad_norm": 2.177408456802368, "learning_rate": 6.613554865763284e-05, "loss": 1.3829, "step": 2682 }, { "epoch": 0.40467571644042233, "grad_norm": 2.3238956928253174, "learning_rate": 6.611277872242856e-05, "loss": 1.5409, "step": 2683 }, { "epoch": 0.4048265460030166, "grad_norm": 1.9984525442123413, "learning_rate": 6.609000505778263e-05, "loss": 1.0949, "step": 2684 }, { "epoch": 0.40497737556561086, "grad_norm": 2.0751354694366455, "learning_rate": 6.606722766896622e-05, "loss": 1.1533, "step": 2685 }, { "epoch": 0.40512820512820513, "grad_norm": 2.001845598220825, "learning_rate": 6.604444656125132e-05, "loss": 1.0139, "step": 2686 }, { "epoch": 0.4052790346907994, "grad_norm": 2.906510591506958, "learning_rate": 6.602166173991083e-05, "loss": 1.2243, "step": 2687 }, { "epoch": 0.40542986425339367, "grad_norm": 2.0637574195861816, "learning_rate": 6.59988732102185e-05, "loss": 1.2956, "step": 2688 }, { "epoch": 0.40558069381598794, "grad_norm": 2.348588705062866, "learning_rate": 6.597608097744893e-05, "loss": 1.1898, "step": 2689 }, { "epoch": 0.4057315233785822, "grad_norm": 1.8921582698822021, "learning_rate": 6.595328504687758e-05, "loss": 0.9565, "step": 2690 }, { "epoch": 0.40588235294117647, "grad_norm": 2.1882147789001465, "learning_rate": 6.593048542378075e-05, "loss": 1.365, "step": 2691 }, { "epoch": 0.40603318250377074, "grad_norm": 2.5717673301696777, "learning_rate": 6.590768211343562e-05, "loss": 1.354, "step": 2692 }, { "epoch": 0.406184012066365, "grad_norm": 2.3142945766448975, "learning_rate": 6.588487512112023e-05, "loss": 1.3402, "step": 2693 }, { "epoch": 0.4063348416289593, "grad_norm": 2.2006869316101074, "learning_rate": 6.586206445211342e-05, "loss": 1.3033, "step": 2694 }, { "epoch": 0.40648567119155354, "grad_norm": 1.9459795951843262, "learning_rate": 6.583925011169495e-05, "loss": 1.0401, "step": 2695 }, { "epoch": 0.4066365007541478, "grad_norm": 1.822084903717041, "learning_rate": 6.581643210514536e-05, "loss": 0.8368, "step": 2696 }, { "epoch": 0.4067873303167421, "grad_norm": 1.8946164846420288, "learning_rate": 6.579361043774613e-05, "loss": 1.036, "step": 2697 }, { "epoch": 0.40693815987933635, "grad_norm": 2.0425612926483154, "learning_rate": 6.577078511477945e-05, "loss": 1.0734, "step": 2698 }, { "epoch": 0.4070889894419306, "grad_norm": 1.9128081798553467, "learning_rate": 6.574795614152853e-05, "loss": 0.8298, "step": 2699 }, { "epoch": 0.4072398190045249, "grad_norm": 2.140578031539917, "learning_rate": 6.572512352327727e-05, "loss": 1.1627, "step": 2700 }, { "epoch": 0.40739064856711915, "grad_norm": 2.004373788833618, "learning_rate": 6.570228726531049e-05, "loss": 1.6551, "step": 2701 }, { "epoch": 0.4075414781297134, "grad_norm": 2.2464518547058105, "learning_rate": 6.567944737291384e-05, "loss": 1.7322, "step": 2702 }, { "epoch": 0.4076923076923077, "grad_norm": 2.008788824081421, "learning_rate": 6.565660385137382e-05, "loss": 1.5549, "step": 2703 }, { "epoch": 0.40784313725490196, "grad_norm": 2.234952449798584, "learning_rate": 6.563375670597776e-05, "loss": 1.8503, "step": 2704 }, { "epoch": 0.4079939668174962, "grad_norm": 2.0310752391815186, "learning_rate": 6.561090594201381e-05, "loss": 1.2535, "step": 2705 }, { "epoch": 0.4081447963800905, "grad_norm": 2.061178207397461, "learning_rate": 6.558805156477099e-05, "loss": 1.5362, "step": 2706 }, { "epoch": 0.40829562594268476, "grad_norm": 2.1164286136627197, "learning_rate": 6.556519357953914e-05, "loss": 1.6077, "step": 2707 }, { "epoch": 0.408446455505279, "grad_norm": 1.722432017326355, "learning_rate": 6.554233199160896e-05, "loss": 1.0598, "step": 2708 }, { "epoch": 0.4085972850678733, "grad_norm": 2.0625057220458984, "learning_rate": 6.551946680627192e-05, "loss": 1.2537, "step": 2709 }, { "epoch": 0.40874811463046756, "grad_norm": 1.875905990600586, "learning_rate": 6.549659802882039e-05, "loss": 1.1066, "step": 2710 }, { "epoch": 0.40889894419306183, "grad_norm": 1.6067169904708862, "learning_rate": 6.547372566454754e-05, "loss": 0.9757, "step": 2711 }, { "epoch": 0.4090497737556561, "grad_norm": 1.760211706161499, "learning_rate": 6.545084971874738e-05, "loss": 0.9674, "step": 2712 }, { "epoch": 0.40920060331825037, "grad_norm": 1.9359079599380493, "learning_rate": 6.542797019671474e-05, "loss": 1.324, "step": 2713 }, { "epoch": 0.40935143288084463, "grad_norm": 2.165308952331543, "learning_rate": 6.540508710374529e-05, "loss": 1.3991, "step": 2714 }, { "epoch": 0.4095022624434389, "grad_norm": 2.3355894088745117, "learning_rate": 6.53822004451355e-05, "loss": 1.3984, "step": 2715 }, { "epoch": 0.40965309200603317, "grad_norm": 2.365321159362793, "learning_rate": 6.535931022618271e-05, "loss": 1.1099, "step": 2716 }, { "epoch": 0.40980392156862744, "grad_norm": 1.937690258026123, "learning_rate": 6.533641645218506e-05, "loss": 1.0623, "step": 2717 }, { "epoch": 0.4099547511312217, "grad_norm": 2.186981439590454, "learning_rate": 6.531351912844149e-05, "loss": 1.4069, "step": 2718 }, { "epoch": 0.410105580693816, "grad_norm": 2.448000192642212, "learning_rate": 6.529061826025178e-05, "loss": 1.594, "step": 2719 }, { "epoch": 0.41025641025641024, "grad_norm": 2.146296977996826, "learning_rate": 6.526771385291657e-05, "loss": 1.2535, "step": 2720 }, { "epoch": 0.4104072398190045, "grad_norm": 2.0685575008392334, "learning_rate": 6.524480591173725e-05, "loss": 1.1738, "step": 2721 }, { "epoch": 0.4105580693815988, "grad_norm": 2.1229248046875, "learning_rate": 6.522189444201608e-05, "loss": 1.3829, "step": 2722 }, { "epoch": 0.41070889894419305, "grad_norm": 1.99869966506958, "learning_rate": 6.519897944905609e-05, "loss": 1.2458, "step": 2723 }, { "epoch": 0.4108597285067873, "grad_norm": 1.99562668800354, "learning_rate": 6.517606093816116e-05, "loss": 1.2373, "step": 2724 }, { "epoch": 0.4110105580693816, "grad_norm": 2.110825300216675, "learning_rate": 6.5153138914636e-05, "loss": 1.4709, "step": 2725 }, { "epoch": 0.41116138763197585, "grad_norm": 1.9057080745697021, "learning_rate": 6.51302133837861e-05, "loss": 1.134, "step": 2726 }, { "epoch": 0.4113122171945701, "grad_norm": 2.1121714115142822, "learning_rate": 6.510728435091777e-05, "loss": 1.3428, "step": 2727 }, { "epoch": 0.4114630467571644, "grad_norm": 1.9966111183166504, "learning_rate": 6.508435182133812e-05, "loss": 1.2286, "step": 2728 }, { "epoch": 0.41161387631975865, "grad_norm": 1.9155805110931396, "learning_rate": 6.506141580035512e-05, "loss": 1.1099, "step": 2729 }, { "epoch": 0.4117647058823529, "grad_norm": 2.147678852081299, "learning_rate": 6.503847629327745e-05, "loss": 1.2967, "step": 2730 }, { "epoch": 0.4119155354449472, "grad_norm": 1.8277705907821655, "learning_rate": 6.50155333054147e-05, "loss": 1.176, "step": 2731 }, { "epoch": 0.41206636500754146, "grad_norm": 1.9834309816360474, "learning_rate": 6.499258684207723e-05, "loss": 1.2076, "step": 2732 }, { "epoch": 0.4122171945701357, "grad_norm": 1.9439367055892944, "learning_rate": 6.496963690857615e-05, "loss": 1.2272, "step": 2733 }, { "epoch": 0.41236802413273, "grad_norm": 2.1126301288604736, "learning_rate": 6.494668351022349e-05, "loss": 1.2297, "step": 2734 }, { "epoch": 0.41251885369532426, "grad_norm": 2.129175901412964, "learning_rate": 6.492372665233192e-05, "loss": 0.9854, "step": 2735 }, { "epoch": 0.41266968325791853, "grad_norm": 2.0370118618011475, "learning_rate": 6.490076634021509e-05, "loss": 1.223, "step": 2736 }, { "epoch": 0.4128205128205128, "grad_norm": 2.077777147293091, "learning_rate": 6.487780257918732e-05, "loss": 1.2472, "step": 2737 }, { "epoch": 0.41297134238310707, "grad_norm": 2.011540412902832, "learning_rate": 6.485483537456377e-05, "loss": 1.2447, "step": 2738 }, { "epoch": 0.41312217194570133, "grad_norm": 2.146353244781494, "learning_rate": 6.483186473166041e-05, "loss": 1.3664, "step": 2739 }, { "epoch": 0.4132730015082956, "grad_norm": 1.9017188549041748, "learning_rate": 6.480889065579398e-05, "loss": 1.0843, "step": 2740 }, { "epoch": 0.41342383107088987, "grad_norm": 2.22025203704834, "learning_rate": 6.478591315228206e-05, "loss": 1.1971, "step": 2741 }, { "epoch": 0.41357466063348414, "grad_norm": 2.1650569438934326, "learning_rate": 6.476293222644295e-05, "loss": 0.904, "step": 2742 }, { "epoch": 0.4137254901960784, "grad_norm": 2.1381373405456543, "learning_rate": 6.473994788359581e-05, "loss": 1.3786, "step": 2743 }, { "epoch": 0.4138763197586727, "grad_norm": 1.6839525699615479, "learning_rate": 6.471696012906055e-05, "loss": 0.6954, "step": 2744 }, { "epoch": 0.41402714932126694, "grad_norm": 2.077660322189331, "learning_rate": 6.469396896815788e-05, "loss": 0.9619, "step": 2745 }, { "epoch": 0.41417797888386126, "grad_norm": 1.9662799835205078, "learning_rate": 6.467097440620934e-05, "loss": 1.0626, "step": 2746 }, { "epoch": 0.41432880844645553, "grad_norm": 1.6570369005203247, "learning_rate": 6.464797644853717e-05, "loss": 0.8372, "step": 2747 }, { "epoch": 0.4144796380090498, "grad_norm": 1.693311095237732, "learning_rate": 6.462497510046448e-05, "loss": 0.7841, "step": 2748 }, { "epoch": 0.41463046757164407, "grad_norm": 1.8975443840026855, "learning_rate": 6.46019703673151e-05, "loss": 0.8465, "step": 2749 }, { "epoch": 0.41478129713423834, "grad_norm": 2.2894816398620605, "learning_rate": 6.457896225441373e-05, "loss": 1.0411, "step": 2750 }, { "epoch": 0.4149321266968326, "grad_norm": 2.428711175918579, "learning_rate": 6.455595076708573e-05, "loss": 1.5081, "step": 2751 }, { "epoch": 0.41508295625942687, "grad_norm": 2.3841662406921387, "learning_rate": 6.453293591065733e-05, "loss": 1.7102, "step": 2752 }, { "epoch": 0.41523378582202114, "grad_norm": 2.135828733444214, "learning_rate": 6.450991769045553e-05, "loss": 1.2262, "step": 2753 }, { "epoch": 0.4153846153846154, "grad_norm": 2.259650468826294, "learning_rate": 6.44868961118081e-05, "loss": 1.5031, "step": 2754 }, { "epoch": 0.4155354449472097, "grad_norm": 2.2646007537841797, "learning_rate": 6.446387118004357e-05, "loss": 1.4591, "step": 2755 }, { "epoch": 0.41568627450980394, "grad_norm": 2.0325136184692383, "learning_rate": 6.444084290049127e-05, "loss": 1.3013, "step": 2756 }, { "epoch": 0.4158371040723982, "grad_norm": 1.7546310424804688, "learning_rate": 6.441781127848126e-05, "loss": 1.0481, "step": 2757 }, { "epoch": 0.4159879336349925, "grad_norm": 2.053043842315674, "learning_rate": 6.439477631934444e-05, "loss": 1.379, "step": 2758 }, { "epoch": 0.41613876319758675, "grad_norm": 1.8677970170974731, "learning_rate": 6.437173802841244e-05, "loss": 1.152, "step": 2759 }, { "epoch": 0.416289592760181, "grad_norm": 1.7475125789642334, "learning_rate": 6.434869641101768e-05, "loss": 1.0391, "step": 2760 }, { "epoch": 0.4164404223227753, "grad_norm": 1.9766830205917358, "learning_rate": 6.432565147249335e-05, "loss": 1.2742, "step": 2761 }, { "epoch": 0.41659125188536955, "grad_norm": 1.7530118227005005, "learning_rate": 6.430260321817336e-05, "loss": 0.9156, "step": 2762 }, { "epoch": 0.4167420814479638, "grad_norm": 1.8336827754974365, "learning_rate": 6.427955165339247e-05, "loss": 1.0455, "step": 2763 }, { "epoch": 0.4168929110105581, "grad_norm": 2.2341561317443848, "learning_rate": 6.425649678348614e-05, "loss": 1.2512, "step": 2764 }, { "epoch": 0.41704374057315236, "grad_norm": 2.2067108154296875, "learning_rate": 6.423343861379063e-05, "loss": 1.1677, "step": 2765 }, { "epoch": 0.4171945701357466, "grad_norm": 1.7967073917388916, "learning_rate": 6.421037714964293e-05, "loss": 0.8051, "step": 2766 }, { "epoch": 0.4173453996983409, "grad_norm": 2.425626039505005, "learning_rate": 6.418731239638083e-05, "loss": 1.2929, "step": 2767 }, { "epoch": 0.41749622926093516, "grad_norm": 2.0256762504577637, "learning_rate": 6.416424435934289e-05, "loss": 1.2503, "step": 2768 }, { "epoch": 0.4176470588235294, "grad_norm": 1.6871517896652222, "learning_rate": 6.414117304386837e-05, "loss": 0.791, "step": 2769 }, { "epoch": 0.4177978883861237, "grad_norm": 1.9966320991516113, "learning_rate": 6.411809845529735e-05, "loss": 1.302, "step": 2770 }, { "epoch": 0.41794871794871796, "grad_norm": 2.3911185264587402, "learning_rate": 6.409502059897061e-05, "loss": 1.3493, "step": 2771 }, { "epoch": 0.41809954751131223, "grad_norm": 1.9732463359832764, "learning_rate": 6.407193948022975e-05, "loss": 1.1865, "step": 2772 }, { "epoch": 0.4182503770739065, "grad_norm": 2.071732997894287, "learning_rate": 6.404885510441709e-05, "loss": 1.3296, "step": 2773 }, { "epoch": 0.41840120663650077, "grad_norm": 2.1904828548431396, "learning_rate": 6.402576747687572e-05, "loss": 1.21, "step": 2774 }, { "epoch": 0.41855203619909503, "grad_norm": 2.2029149532318115, "learning_rate": 6.400267660294942e-05, "loss": 1.1446, "step": 2775 }, { "epoch": 0.4187028657616893, "grad_norm": 2.030404567718506, "learning_rate": 6.397958248798282e-05, "loss": 1.2321, "step": 2776 }, { "epoch": 0.41885369532428357, "grad_norm": 1.885515570640564, "learning_rate": 6.395648513732125e-05, "loss": 0.9618, "step": 2777 }, { "epoch": 0.41900452488687784, "grad_norm": 1.808356761932373, "learning_rate": 6.393338455631077e-05, "loss": 0.9644, "step": 2778 }, { "epoch": 0.4191553544494721, "grad_norm": 1.9324374198913574, "learning_rate": 6.391028075029822e-05, "loss": 1.1962, "step": 2779 }, { "epoch": 0.4193061840120664, "grad_norm": 2.328951597213745, "learning_rate": 6.388717372463115e-05, "loss": 1.1906, "step": 2780 }, { "epoch": 0.41945701357466064, "grad_norm": 2.008695363998413, "learning_rate": 6.386406348465792e-05, "loss": 1.2852, "step": 2781 }, { "epoch": 0.4196078431372549, "grad_norm": 1.8685520887374878, "learning_rate": 6.384095003572758e-05, "loss": 0.8466, "step": 2782 }, { "epoch": 0.4197586726998492, "grad_norm": 2.090329885482788, "learning_rate": 6.381783338318993e-05, "loss": 1.0613, "step": 2783 }, { "epoch": 0.41990950226244345, "grad_norm": 2.006880044937134, "learning_rate": 6.379471353239553e-05, "loss": 1.3454, "step": 2784 }, { "epoch": 0.4200603318250377, "grad_norm": 2.176910161972046, "learning_rate": 6.377159048869564e-05, "loss": 1.0764, "step": 2785 }, { "epoch": 0.420211161387632, "grad_norm": 2.008969306945801, "learning_rate": 6.374846425744233e-05, "loss": 1.2837, "step": 2786 }, { "epoch": 0.42036199095022625, "grad_norm": 2.207596778869629, "learning_rate": 6.372533484398834e-05, "loss": 1.4658, "step": 2787 }, { "epoch": 0.4205128205128205, "grad_norm": 1.8495585918426514, "learning_rate": 6.370220225368717e-05, "loss": 1.056, "step": 2788 }, { "epoch": 0.4206636500754148, "grad_norm": 1.978991150856018, "learning_rate": 6.367906649189308e-05, "loss": 1.1435, "step": 2789 }, { "epoch": 0.42081447963800905, "grad_norm": 2.159968852996826, "learning_rate": 6.365592756396101e-05, "loss": 1.1823, "step": 2790 }, { "epoch": 0.4209653092006033, "grad_norm": 2.041513204574585, "learning_rate": 6.36327854752467e-05, "loss": 1.0438, "step": 2791 }, { "epoch": 0.4211161387631976, "grad_norm": 2.076815605163574, "learning_rate": 6.360964023110657e-05, "loss": 1.0762, "step": 2792 }, { "epoch": 0.42126696832579186, "grad_norm": 1.9797546863555908, "learning_rate": 6.358649183689777e-05, "loss": 0.9937, "step": 2793 }, { "epoch": 0.4214177978883861, "grad_norm": 2.214568853378296, "learning_rate": 6.356334029797822e-05, "loss": 1.1865, "step": 2794 }, { "epoch": 0.4215686274509804, "grad_norm": 1.8349162340164185, "learning_rate": 6.354018561970654e-05, "loss": 0.9273, "step": 2795 }, { "epoch": 0.42171945701357466, "grad_norm": 1.741288661956787, "learning_rate": 6.351702780744205e-05, "loss": 0.7666, "step": 2796 }, { "epoch": 0.42187028657616893, "grad_norm": 1.7897742986679077, "learning_rate": 6.349386686654488e-05, "loss": 0.8732, "step": 2797 }, { "epoch": 0.4220211161387632, "grad_norm": 1.6462560892105103, "learning_rate": 6.347070280237579e-05, "loss": 0.8284, "step": 2798 }, { "epoch": 0.42217194570135747, "grad_norm": 1.5053852796554565, "learning_rate": 6.344753562029629e-05, "loss": 0.6126, "step": 2799 }, { "epoch": 0.42232277526395173, "grad_norm": 1.9441418647766113, "learning_rate": 6.342436532566865e-05, "loss": 0.699, "step": 2800 }, { "epoch": 0.422473604826546, "grad_norm": 2.627267599105835, "learning_rate": 6.340119192385583e-05, "loss": 1.5759, "step": 2801 }, { "epoch": 0.42262443438914027, "grad_norm": 2.0830185413360596, "learning_rate": 6.337801542022155e-05, "loss": 1.1677, "step": 2802 }, { "epoch": 0.42277526395173454, "grad_norm": 2.2712857723236084, "learning_rate": 6.335483582013014e-05, "loss": 1.5018, "step": 2803 }, { "epoch": 0.4229260935143288, "grad_norm": 2.2649953365325928, "learning_rate": 6.333165312894675e-05, "loss": 1.5677, "step": 2804 }, { "epoch": 0.4230769230769231, "grad_norm": 2.3456218242645264, "learning_rate": 6.330846735203724e-05, "loss": 1.5533, "step": 2805 }, { "epoch": 0.42322775263951734, "grad_norm": 2.0057284832000732, "learning_rate": 6.328527849476811e-05, "loss": 1.2981, "step": 2806 }, { "epoch": 0.4233785822021116, "grad_norm": 1.8295812606811523, "learning_rate": 6.326208656250668e-05, "loss": 1.1976, "step": 2807 }, { "epoch": 0.4235294117647059, "grad_norm": 2.0203964710235596, "learning_rate": 6.323889156062087e-05, "loss": 1.3864, "step": 2808 }, { "epoch": 0.42368024132730014, "grad_norm": 2.173598051071167, "learning_rate": 6.321569349447938e-05, "loss": 1.3016, "step": 2809 }, { "epoch": 0.4238310708898944, "grad_norm": 2.476740837097168, "learning_rate": 6.319249236945162e-05, "loss": 2.1854, "step": 2810 }, { "epoch": 0.4239819004524887, "grad_norm": 2.113797426223755, "learning_rate": 6.316928819090767e-05, "loss": 1.0882, "step": 2811 }, { "epoch": 0.42413273001508295, "grad_norm": 2.0698344707489014, "learning_rate": 6.314608096421833e-05, "loss": 1.3624, "step": 2812 }, { "epoch": 0.4242835595776772, "grad_norm": 2.3796372413635254, "learning_rate": 6.312287069475513e-05, "loss": 1.4515, "step": 2813 }, { "epoch": 0.4244343891402715, "grad_norm": 1.9347970485687256, "learning_rate": 6.309965738789029e-05, "loss": 1.1221, "step": 2814 }, { "epoch": 0.42458521870286575, "grad_norm": 1.956421136856079, "learning_rate": 6.307644104899673e-05, "loss": 1.2376, "step": 2815 }, { "epoch": 0.42473604826546, "grad_norm": 1.9439444541931152, "learning_rate": 6.305322168344805e-05, "loss": 1.032, "step": 2816 }, { "epoch": 0.4248868778280543, "grad_norm": 1.8301547765731812, "learning_rate": 6.302999929661861e-05, "loss": 1.0572, "step": 2817 }, { "epoch": 0.42503770739064856, "grad_norm": 1.9464551210403442, "learning_rate": 6.30067738938834e-05, "loss": 1.2686, "step": 2818 }, { "epoch": 0.4251885369532428, "grad_norm": 1.9293017387390137, "learning_rate": 6.298354548061814e-05, "loss": 1.2919, "step": 2819 }, { "epoch": 0.4253393665158371, "grad_norm": 2.0347659587860107, "learning_rate": 6.296031406219927e-05, "loss": 1.203, "step": 2820 }, { "epoch": 0.42549019607843136, "grad_norm": 2.008296012878418, "learning_rate": 6.293707964400389e-05, "loss": 1.2729, "step": 2821 }, { "epoch": 0.4256410256410256, "grad_norm": 2.0835814476013184, "learning_rate": 6.291384223140979e-05, "loss": 1.3074, "step": 2822 }, { "epoch": 0.4257918552036199, "grad_norm": 2.3613128662109375, "learning_rate": 6.289060182979549e-05, "loss": 1.5107, "step": 2823 }, { "epoch": 0.42594268476621416, "grad_norm": 1.9428207874298096, "learning_rate": 6.286735844454018e-05, "loss": 1.2589, "step": 2824 }, { "epoch": 0.42609351432880843, "grad_norm": 1.9004138708114624, "learning_rate": 6.284411208102374e-05, "loss": 1.1324, "step": 2825 }, { "epoch": 0.4262443438914027, "grad_norm": 2.014880418777466, "learning_rate": 6.282086274462674e-05, "loss": 1.3026, "step": 2826 }, { "epoch": 0.42639517345399697, "grad_norm": 2.301804780960083, "learning_rate": 6.279761044073044e-05, "loss": 1.3847, "step": 2827 }, { "epoch": 0.42654600301659124, "grad_norm": 1.9066057205200195, "learning_rate": 6.277435517471678e-05, "loss": 1.1218, "step": 2828 }, { "epoch": 0.4266968325791855, "grad_norm": 1.9419662952423096, "learning_rate": 6.275109695196841e-05, "loss": 1.1778, "step": 2829 }, { "epoch": 0.42684766214177977, "grad_norm": 2.249624013900757, "learning_rate": 6.272783577786862e-05, "loss": 1.4176, "step": 2830 }, { "epoch": 0.42699849170437404, "grad_norm": 2.051431655883789, "learning_rate": 6.270457165780142e-05, "loss": 1.096, "step": 2831 }, { "epoch": 0.4271493212669683, "grad_norm": 2.0919408798217773, "learning_rate": 6.268130459715152e-05, "loss": 1.2612, "step": 2832 }, { "epoch": 0.4273001508295626, "grad_norm": 1.9640942811965942, "learning_rate": 6.265803460130423e-05, "loss": 1.11, "step": 2833 }, { "epoch": 0.42745098039215684, "grad_norm": 2.076467752456665, "learning_rate": 6.263476167564562e-05, "loss": 1.1609, "step": 2834 }, { "epoch": 0.4276018099547511, "grad_norm": 2.077592611312866, "learning_rate": 6.261148582556242e-05, "loss": 1.1905, "step": 2835 }, { "epoch": 0.4277526395173454, "grad_norm": 2.0116591453552246, "learning_rate": 6.258820705644201e-05, "loss": 1.0095, "step": 2836 }, { "epoch": 0.42790346907993965, "grad_norm": 1.9714961051940918, "learning_rate": 6.256492537367246e-05, "loss": 1.1024, "step": 2837 }, { "epoch": 0.4280542986425339, "grad_norm": 2.1512722969055176, "learning_rate": 6.254164078264252e-05, "loss": 1.1866, "step": 2838 }, { "epoch": 0.4282051282051282, "grad_norm": 2.299185037612915, "learning_rate": 6.251835328874163e-05, "loss": 1.1831, "step": 2839 }, { "epoch": 0.42835595776772245, "grad_norm": 2.531789541244507, "learning_rate": 6.249506289735985e-05, "loss": 1.8316, "step": 2840 }, { "epoch": 0.4285067873303167, "grad_norm": 2.3749351501464844, "learning_rate": 6.247176961388795e-05, "loss": 1.5348, "step": 2841 }, { "epoch": 0.428657616892911, "grad_norm": 2.1463191509246826, "learning_rate": 6.244847344371737e-05, "loss": 1.1862, "step": 2842 }, { "epoch": 0.42880844645550525, "grad_norm": 1.840556263923645, "learning_rate": 6.24251743922402e-05, "loss": 1.1208, "step": 2843 }, { "epoch": 0.4289592760180995, "grad_norm": 2.2424192428588867, "learning_rate": 6.240187246484923e-05, "loss": 1.302, "step": 2844 }, { "epoch": 0.4291101055806938, "grad_norm": 2.127702236175537, "learning_rate": 6.237856766693788e-05, "loss": 1.0472, "step": 2845 }, { "epoch": 0.42926093514328806, "grad_norm": 1.7243934869766235, "learning_rate": 6.235526000390021e-05, "loss": 0.6685, "step": 2846 }, { "epoch": 0.4294117647058823, "grad_norm": 1.9161992073059082, "learning_rate": 6.233194948113103e-05, "loss": 0.9406, "step": 2847 }, { "epoch": 0.4295625942684766, "grad_norm": 1.9276727437973022, "learning_rate": 6.230863610402574e-05, "loss": 0.9276, "step": 2848 }, { "epoch": 0.4297134238310709, "grad_norm": 2.1955838203430176, "learning_rate": 6.228531987798043e-05, "loss": 1.4185, "step": 2849 }, { "epoch": 0.4298642533936652, "grad_norm": 1.7315608263015747, "learning_rate": 6.226200080839183e-05, "loss": 0.8335, "step": 2850 }, { "epoch": 0.43001508295625945, "grad_norm": 2.112722158432007, "learning_rate": 6.223867890065735e-05, "loss": 1.4479, "step": 2851 }, { "epoch": 0.4301659125188537, "grad_norm": 1.9658900499343872, "learning_rate": 6.221535416017504e-05, "loss": 1.3894, "step": 2852 }, { "epoch": 0.430316742081448, "grad_norm": 2.1800925731658936, "learning_rate": 6.219202659234362e-05, "loss": 1.5717, "step": 2853 }, { "epoch": 0.43046757164404226, "grad_norm": 1.9386200904846191, "learning_rate": 6.216869620256244e-05, "loss": 1.3166, "step": 2854 }, { "epoch": 0.4306184012066365, "grad_norm": 1.8229913711547852, "learning_rate": 6.214536299623155e-05, "loss": 1.0617, "step": 2855 }, { "epoch": 0.4307692307692308, "grad_norm": 2.050565004348755, "learning_rate": 6.212202697875159e-05, "loss": 1.3892, "step": 2856 }, { "epoch": 0.43092006033182506, "grad_norm": 1.654355764389038, "learning_rate": 6.209868815552391e-05, "loss": 1.0107, "step": 2857 }, { "epoch": 0.43107088989441933, "grad_norm": 1.8448518514633179, "learning_rate": 6.207534653195045e-05, "loss": 1.2999, "step": 2858 }, { "epoch": 0.4312217194570136, "grad_norm": 1.8625530004501343, "learning_rate": 6.205200211343387e-05, "loss": 0.9691, "step": 2859 }, { "epoch": 0.43137254901960786, "grad_norm": 1.9009959697723389, "learning_rate": 6.202865490537739e-05, "loss": 1.1955, "step": 2860 }, { "epoch": 0.43152337858220213, "grad_norm": 1.817966103553772, "learning_rate": 6.200530491318497e-05, "loss": 1.0503, "step": 2861 }, { "epoch": 0.4316742081447964, "grad_norm": 1.893959879875183, "learning_rate": 6.198195214226114e-05, "loss": 1.0875, "step": 2862 }, { "epoch": 0.43182503770739067, "grad_norm": 1.8453878164291382, "learning_rate": 6.195859659801112e-05, "loss": 0.9839, "step": 2863 }, { "epoch": 0.43197586726998494, "grad_norm": 2.2231011390686035, "learning_rate": 6.193523828584072e-05, "loss": 1.2824, "step": 2864 }, { "epoch": 0.4321266968325792, "grad_norm": 1.8394495248794556, "learning_rate": 6.191187721115644e-05, "loss": 1.2507, "step": 2865 }, { "epoch": 0.4322775263951735, "grad_norm": 2.143310070037842, "learning_rate": 6.188851337936541e-05, "loss": 1.3909, "step": 2866 }, { "epoch": 0.43242835595776774, "grad_norm": 2.219456195831299, "learning_rate": 6.18651467958754e-05, "loss": 1.1419, "step": 2867 }, { "epoch": 0.432579185520362, "grad_norm": 2.310849905014038, "learning_rate": 6.184177746609478e-05, "loss": 1.3647, "step": 2868 }, { "epoch": 0.4327300150829563, "grad_norm": 2.256617546081543, "learning_rate": 6.181840539543259e-05, "loss": 1.5062, "step": 2869 }, { "epoch": 0.43288084464555054, "grad_norm": 2.3318982124328613, "learning_rate": 6.17950305892985e-05, "loss": 1.666, "step": 2870 }, { "epoch": 0.4330316742081448, "grad_norm": 2.1620755195617676, "learning_rate": 6.177165305310282e-05, "loss": 1.2927, "step": 2871 }, { "epoch": 0.4331825037707391, "grad_norm": 2.257291793823242, "learning_rate": 6.174827279225648e-05, "loss": 0.9907, "step": 2872 }, { "epoch": 0.43333333333333335, "grad_norm": 2.1101062297821045, "learning_rate": 6.172488981217103e-05, "loss": 1.2205, "step": 2873 }, { "epoch": 0.4334841628959276, "grad_norm": 2.020550489425659, "learning_rate": 6.170150411825866e-05, "loss": 0.9199, "step": 2874 }, { "epoch": 0.4336349924585219, "grad_norm": 1.9758199453353882, "learning_rate": 6.167811571593221e-05, "loss": 1.1658, "step": 2875 }, { "epoch": 0.43378582202111615, "grad_norm": 1.837363600730896, "learning_rate": 6.165472461060514e-05, "loss": 0.9711, "step": 2876 }, { "epoch": 0.4339366515837104, "grad_norm": 1.8062270879745483, "learning_rate": 6.163133080769148e-05, "loss": 0.8877, "step": 2877 }, { "epoch": 0.4340874811463047, "grad_norm": 2.0454230308532715, "learning_rate": 6.160793431260594e-05, "loss": 1.4141, "step": 2878 }, { "epoch": 0.43423831070889896, "grad_norm": 1.9783329963684082, "learning_rate": 6.158453513076386e-05, "loss": 1.1657, "step": 2879 }, { "epoch": 0.4343891402714932, "grad_norm": 2.0302493572235107, "learning_rate": 6.156113326758118e-05, "loss": 1.219, "step": 2880 }, { "epoch": 0.4345399698340875, "grad_norm": 1.9237884283065796, "learning_rate": 6.153772872847446e-05, "loss": 1.2365, "step": 2881 }, { "epoch": 0.43469079939668176, "grad_norm": 1.9789142608642578, "learning_rate": 6.151432151886085e-05, "loss": 1.2751, "step": 2882 }, { "epoch": 0.434841628959276, "grad_norm": 2.028818130493164, "learning_rate": 6.14909116441582e-05, "loss": 1.1739, "step": 2883 }, { "epoch": 0.4349924585218703, "grad_norm": 1.9682210683822632, "learning_rate": 6.14674991097849e-05, "loss": 1.177, "step": 2884 }, { "epoch": 0.43514328808446456, "grad_norm": 2.3160383701324463, "learning_rate": 6.144408392116e-05, "loss": 1.3099, "step": 2885 }, { "epoch": 0.43529411764705883, "grad_norm": 1.906753659248352, "learning_rate": 6.142066608370313e-05, "loss": 0.999, "step": 2886 }, { "epoch": 0.4354449472096531, "grad_norm": 1.7614167928695679, "learning_rate": 6.139724560283456e-05, "loss": 0.8877, "step": 2887 }, { "epoch": 0.43559577677224737, "grad_norm": 1.985700249671936, "learning_rate": 6.137382248397516e-05, "loss": 1.2933, "step": 2888 }, { "epoch": 0.43574660633484164, "grad_norm": 2.2762093544006348, "learning_rate": 6.13503967325464e-05, "loss": 1.4099, "step": 2889 }, { "epoch": 0.4358974358974359, "grad_norm": 2.0707013607025146, "learning_rate": 6.132696835397039e-05, "loss": 1.3189, "step": 2890 }, { "epoch": 0.43604826546003017, "grad_norm": 2.83544921875, "learning_rate": 6.130353735366985e-05, "loss": 1.1884, "step": 2891 }, { "epoch": 0.43619909502262444, "grad_norm": 2.2677717208862305, "learning_rate": 6.128010373706804e-05, "loss": 1.258, "step": 2892 }, { "epoch": 0.4363499245852187, "grad_norm": 2.1900970935821533, "learning_rate": 6.125666750958889e-05, "loss": 1.1381, "step": 2893 }, { "epoch": 0.436500754147813, "grad_norm": 2.0720808506011963, "learning_rate": 6.123322867665693e-05, "loss": 1.1226, "step": 2894 }, { "epoch": 0.43665158371040724, "grad_norm": 1.788298487663269, "learning_rate": 6.120978724369728e-05, "loss": 0.9144, "step": 2895 }, { "epoch": 0.4368024132730015, "grad_norm": 1.5853902101516724, "learning_rate": 6.118634321613565e-05, "loss": 0.7845, "step": 2896 }, { "epoch": 0.4369532428355958, "grad_norm": 1.6724823713302612, "learning_rate": 6.116289659939837e-05, "loss": 0.729, "step": 2897 }, { "epoch": 0.43710407239819005, "grad_norm": 1.7245433330535889, "learning_rate": 6.113944739891234e-05, "loss": 0.9146, "step": 2898 }, { "epoch": 0.4372549019607843, "grad_norm": 1.840879201889038, "learning_rate": 6.111599562010512e-05, "loss": 1.1205, "step": 2899 }, { "epoch": 0.4374057315233786, "grad_norm": 1.9024556875228882, "learning_rate": 6.10925412684048e-05, "loss": 0.8782, "step": 2900 }, { "epoch": 0.43755656108597285, "grad_norm": 2.235851526260376, "learning_rate": 6.10690843492401e-05, "loss": 1.1027, "step": 2901 }, { "epoch": 0.4377073906485671, "grad_norm": 2.1296613216400146, "learning_rate": 6.104562486804031e-05, "loss": 1.6357, "step": 2902 }, { "epoch": 0.4378582202111614, "grad_norm": 2.134019374847412, "learning_rate": 6.102216283023534e-05, "loss": 1.5201, "step": 2903 }, { "epoch": 0.43800904977375565, "grad_norm": 2.0726771354675293, "learning_rate": 6.099869824125569e-05, "loss": 1.4661, "step": 2904 }, { "epoch": 0.4381598793363499, "grad_norm": 1.7530078887939453, "learning_rate": 6.0975231106532435e-05, "loss": 1.0881, "step": 2905 }, { "epoch": 0.4383107088989442, "grad_norm": 2.2295236587524414, "learning_rate": 6.095176143149725e-05, "loss": 1.675, "step": 2906 }, { "epoch": 0.43846153846153846, "grad_norm": 2.174976110458374, "learning_rate": 6.092828922158239e-05, "loss": 1.4167, "step": 2907 }, { "epoch": 0.4386123680241327, "grad_norm": 1.88423490524292, "learning_rate": 6.0904814482220693e-05, "loss": 1.1805, "step": 2908 }, { "epoch": 0.438763197586727, "grad_norm": 2.3528478145599365, "learning_rate": 6.0881337218845616e-05, "loss": 1.365, "step": 2909 }, { "epoch": 0.43891402714932126, "grad_norm": 1.8062489032745361, "learning_rate": 6.085785743689114e-05, "loss": 1.0392, "step": 2910 }, { "epoch": 0.43906485671191553, "grad_norm": 1.548813819885254, "learning_rate": 6.0834375141791885e-05, "loss": 0.8447, "step": 2911 }, { "epoch": 0.4392156862745098, "grad_norm": 1.9762433767318726, "learning_rate": 6.0810890338983016e-05, "loss": 1.3376, "step": 2912 }, { "epoch": 0.43936651583710407, "grad_norm": 1.9063622951507568, "learning_rate": 6.0787403033900313e-05, "loss": 1.1721, "step": 2913 }, { "epoch": 0.43951734539969833, "grad_norm": 1.8436044454574585, "learning_rate": 6.0763913231980096e-05, "loss": 1.0356, "step": 2914 }, { "epoch": 0.4396681749622926, "grad_norm": 1.9796805381774902, "learning_rate": 6.074042093865929e-05, "loss": 1.1621, "step": 2915 }, { "epoch": 0.43981900452488687, "grad_norm": 2.0933351516723633, "learning_rate": 6.07169261593754e-05, "loss": 1.558, "step": 2916 }, { "epoch": 0.43996983408748114, "grad_norm": 1.8411680459976196, "learning_rate": 6.069342889956647e-05, "loss": 1.2576, "step": 2917 }, { "epoch": 0.4401206636500754, "grad_norm": 2.1013433933258057, "learning_rate": 6.0669929164671166e-05, "loss": 1.3453, "step": 2918 }, { "epoch": 0.4402714932126697, "grad_norm": 1.8182259798049927, "learning_rate": 6.06464269601287e-05, "loss": 1.0938, "step": 2919 }, { "epoch": 0.44042232277526394, "grad_norm": 1.5691766738891602, "learning_rate": 6.062292229137886e-05, "loss": 0.7101, "step": 2920 }, { "epoch": 0.4405731523378582, "grad_norm": 1.8774539232254028, "learning_rate": 6.059941516386198e-05, "loss": 1.0416, "step": 2921 }, { "epoch": 0.4407239819004525, "grad_norm": 1.845726728439331, "learning_rate": 6.057590558301902e-05, "loss": 1.0747, "step": 2922 }, { "epoch": 0.44087481146304675, "grad_norm": 1.7080789804458618, "learning_rate": 6.0552393554291483e-05, "loss": 0.9552, "step": 2923 }, { "epoch": 0.441025641025641, "grad_norm": 2.098067045211792, "learning_rate": 6.0528879083121395e-05, "loss": 0.9558, "step": 2924 }, { "epoch": 0.4411764705882353, "grad_norm": 1.8147761821746826, "learning_rate": 6.05053621749514e-05, "loss": 1.0109, "step": 2925 }, { "epoch": 0.44132730015082955, "grad_norm": 1.8140023946762085, "learning_rate": 6.048184283522468e-05, "loss": 1.072, "step": 2926 }, { "epoch": 0.4414781297134238, "grad_norm": 2.308245897293091, "learning_rate": 6.0458321069385e-05, "loss": 1.4826, "step": 2927 }, { "epoch": 0.4416289592760181, "grad_norm": 2.0087547302246094, "learning_rate": 6.0434796882876687e-05, "loss": 1.0261, "step": 2928 }, { "epoch": 0.44177978883861235, "grad_norm": 1.8477953672409058, "learning_rate": 6.0411270281144594e-05, "loss": 1.0003, "step": 2929 }, { "epoch": 0.4419306184012066, "grad_norm": 2.2859201431274414, "learning_rate": 6.0387741269634165e-05, "loss": 1.2874, "step": 2930 }, { "epoch": 0.4420814479638009, "grad_norm": 2.1965765953063965, "learning_rate": 6.036420985379141e-05, "loss": 1.3148, "step": 2931 }, { "epoch": 0.44223227752639516, "grad_norm": 1.9737344980239868, "learning_rate": 6.0340676039062846e-05, "loss": 0.8102, "step": 2932 }, { "epoch": 0.4423831070889894, "grad_norm": 1.9626588821411133, "learning_rate": 6.0317139830895606e-05, "loss": 1.1563, "step": 2933 }, { "epoch": 0.4425339366515837, "grad_norm": 1.9669253826141357, "learning_rate": 6.029360123473733e-05, "loss": 1.16, "step": 2934 }, { "epoch": 0.44268476621417796, "grad_norm": 2.0727782249450684, "learning_rate": 6.027006025603623e-05, "loss": 1.1727, "step": 2935 }, { "epoch": 0.44283559577677223, "grad_norm": 2.297727346420288, "learning_rate": 6.024651690024108e-05, "loss": 1.5977, "step": 2936 }, { "epoch": 0.4429864253393665, "grad_norm": 2.0084822177886963, "learning_rate": 6.022297117280119e-05, "loss": 1.1934, "step": 2937 }, { "epoch": 0.44313725490196076, "grad_norm": 2.288133144378662, "learning_rate": 6.0199423079166404e-05, "loss": 1.1489, "step": 2938 }, { "epoch": 0.44328808446455503, "grad_norm": 1.980177879333496, "learning_rate": 6.017587262478715e-05, "loss": 1.1033, "step": 2939 }, { "epoch": 0.4434389140271493, "grad_norm": 2.0666604042053223, "learning_rate": 6.015231981511439e-05, "loss": 1.2491, "step": 2940 }, { "epoch": 0.44358974358974357, "grad_norm": 1.9918434619903564, "learning_rate": 6.01287646555996e-05, "loss": 0.9228, "step": 2941 }, { "epoch": 0.44374057315233784, "grad_norm": 2.037714719772339, "learning_rate": 6.0105207151694854e-05, "loss": 1.2237, "step": 2942 }, { "epoch": 0.4438914027149321, "grad_norm": 2.056065797805786, "learning_rate": 6.0081647308852715e-05, "loss": 1.2733, "step": 2943 }, { "epoch": 0.44404223227752637, "grad_norm": 2.2046995162963867, "learning_rate": 6.0058085132526334e-05, "loss": 1.2646, "step": 2944 }, { "epoch": 0.44419306184012064, "grad_norm": 2.2664430141448975, "learning_rate": 6.0034520628169355e-05, "loss": 1.1134, "step": 2945 }, { "epoch": 0.4443438914027149, "grad_norm": 1.9337106943130493, "learning_rate": 6.0010953801236e-05, "loss": 1.0487, "step": 2946 }, { "epoch": 0.4444947209653092, "grad_norm": 1.6106865406036377, "learning_rate": 5.9987384657181026e-05, "loss": 0.836, "step": 2947 }, { "epoch": 0.44464555052790344, "grad_norm": 1.7709181308746338, "learning_rate": 5.9963813201459684e-05, "loss": 0.8105, "step": 2948 }, { "epoch": 0.4447963800904977, "grad_norm": 2.075230121612549, "learning_rate": 5.994023943952782e-05, "loss": 1.2657, "step": 2949 }, { "epoch": 0.444947209653092, "grad_norm": 2.2233726978302, "learning_rate": 5.991666337684177e-05, "loss": 1.1246, "step": 2950 }, { "epoch": 0.44509803921568625, "grad_norm": 2.0646188259124756, "learning_rate": 5.989308501885842e-05, "loss": 1.3807, "step": 2951 }, { "epoch": 0.4452488687782805, "grad_norm": 2.015010356903076, "learning_rate": 5.986950437103518e-05, "loss": 1.3324, "step": 2952 }, { "epoch": 0.44539969834087484, "grad_norm": 1.9854192733764648, "learning_rate": 5.984592143883e-05, "loss": 1.28, "step": 2953 }, { "epoch": 0.4455505279034691, "grad_norm": 2.050630569458008, "learning_rate": 5.982233622770136e-05, "loss": 1.1773, "step": 2954 }, { "epoch": 0.4457013574660634, "grad_norm": 1.882399320602417, "learning_rate": 5.9798748743108244e-05, "loss": 1.1676, "step": 2955 }, { "epoch": 0.44585218702865764, "grad_norm": 1.870511770248413, "learning_rate": 5.9775158990510206e-05, "loss": 0.9952, "step": 2956 }, { "epoch": 0.4460030165912519, "grad_norm": 1.9878190755844116, "learning_rate": 5.975156697536727e-05, "loss": 1.2618, "step": 2957 }, { "epoch": 0.4461538461538462, "grad_norm": 1.9319453239440918, "learning_rate": 5.972797270314001e-05, "loss": 0.9424, "step": 2958 }, { "epoch": 0.44630467571644045, "grad_norm": 1.982954978942871, "learning_rate": 5.9704376179289544e-05, "loss": 1.0929, "step": 2959 }, { "epoch": 0.4464555052790347, "grad_norm": 2.0766940116882324, "learning_rate": 5.968077740927748e-05, "loss": 1.3619, "step": 2960 }, { "epoch": 0.446606334841629, "grad_norm": 2.4009692668914795, "learning_rate": 5.965717639856596e-05, "loss": 1.503, "step": 2961 }, { "epoch": 0.44675716440422325, "grad_norm": 1.9359006881713867, "learning_rate": 5.963357315261764e-05, "loss": 1.3451, "step": 2962 }, { "epoch": 0.4469079939668175, "grad_norm": 2.009873151779175, "learning_rate": 5.96099676768957e-05, "loss": 1.3392, "step": 2963 }, { "epoch": 0.4470588235294118, "grad_norm": 2.020941734313965, "learning_rate": 5.958635997686382e-05, "loss": 1.5573, "step": 2964 }, { "epoch": 0.44720965309200605, "grad_norm": 1.925164818763733, "learning_rate": 5.956275005798622e-05, "loss": 1.2834, "step": 2965 }, { "epoch": 0.4473604826546003, "grad_norm": 2.063746213912964, "learning_rate": 5.953913792572761e-05, "loss": 1.1319, "step": 2966 }, { "epoch": 0.4475113122171946, "grad_norm": 1.6664730310440063, "learning_rate": 5.951552358555321e-05, "loss": 0.9398, "step": 2967 }, { "epoch": 0.44766214177978886, "grad_norm": 1.9982300996780396, "learning_rate": 5.9491907042928786e-05, "loss": 1.2701, "step": 2968 }, { "epoch": 0.4478129713423831, "grad_norm": 1.744247555732727, "learning_rate": 5.946828830332058e-05, "loss": 1.0078, "step": 2969 }, { "epoch": 0.4479638009049774, "grad_norm": 2.0212817192077637, "learning_rate": 5.9444667372195364e-05, "loss": 1.1316, "step": 2970 }, { "epoch": 0.44811463046757166, "grad_norm": 2.181373357772827, "learning_rate": 5.942104425502038e-05, "loss": 1.1861, "step": 2971 }, { "epoch": 0.44826546003016593, "grad_norm": 1.7548621892929077, "learning_rate": 5.939741895726343e-05, "loss": 0.8793, "step": 2972 }, { "epoch": 0.4484162895927602, "grad_norm": 1.9749959707260132, "learning_rate": 5.937379148439277e-05, "loss": 1.1164, "step": 2973 }, { "epoch": 0.44856711915535447, "grad_norm": 1.4374895095825195, "learning_rate": 5.935016184187719e-05, "loss": 0.6478, "step": 2974 }, { "epoch": 0.44871794871794873, "grad_norm": 1.583638310432434, "learning_rate": 5.9326530035186003e-05, "loss": 0.9719, "step": 2975 }, { "epoch": 0.448868778280543, "grad_norm": 1.8791584968566895, "learning_rate": 5.930289606978895e-05, "loss": 1.1327, "step": 2976 }, { "epoch": 0.44901960784313727, "grad_norm": 2.2332797050476074, "learning_rate": 5.927925995115633e-05, "loss": 1.5896, "step": 2977 }, { "epoch": 0.44917043740573154, "grad_norm": 1.9965503215789795, "learning_rate": 5.9255621684758944e-05, "loss": 1.2173, "step": 2978 }, { "epoch": 0.4493212669683258, "grad_norm": 2.0521183013916016, "learning_rate": 5.923198127606806e-05, "loss": 1.1051, "step": 2979 }, { "epoch": 0.4494720965309201, "grad_norm": 2.0951385498046875, "learning_rate": 5.9208338730555466e-05, "loss": 1.3723, "step": 2980 }, { "epoch": 0.44962292609351434, "grad_norm": 1.919703722000122, "learning_rate": 5.9184694053693425e-05, "loss": 0.9608, "step": 2981 }, { "epoch": 0.4497737556561086, "grad_norm": 2.2394537925720215, "learning_rate": 5.916104725095468e-05, "loss": 1.4144, "step": 2982 }, { "epoch": 0.4499245852187029, "grad_norm": 1.8898155689239502, "learning_rate": 5.913739832781252e-05, "loss": 1.1053, "step": 2983 }, { "epoch": 0.45007541478129715, "grad_norm": 2.356942653656006, "learning_rate": 5.911374728974068e-05, "loss": 1.5677, "step": 2984 }, { "epoch": 0.4502262443438914, "grad_norm": 2.0991547107696533, "learning_rate": 5.909009414221339e-05, "loss": 1.1752, "step": 2985 }, { "epoch": 0.4503770739064857, "grad_norm": 1.833966851234436, "learning_rate": 5.906643889070539e-05, "loss": 0.9466, "step": 2986 }, { "epoch": 0.45052790346907995, "grad_norm": 1.9910413026809692, "learning_rate": 5.904278154069187e-05, "loss": 1.085, "step": 2987 }, { "epoch": 0.4506787330316742, "grad_norm": 2.129300832748413, "learning_rate": 5.901912209764855e-05, "loss": 1.2448, "step": 2988 }, { "epoch": 0.4508295625942685, "grad_norm": 2.131591558456421, "learning_rate": 5.899546056705162e-05, "loss": 1.2501, "step": 2989 }, { "epoch": 0.45098039215686275, "grad_norm": 2.1076929569244385, "learning_rate": 5.89717969543777e-05, "loss": 1.2569, "step": 2990 }, { "epoch": 0.451131221719457, "grad_norm": 2.260589361190796, "learning_rate": 5.8948131265103976e-05, "loss": 1.2223, "step": 2991 }, { "epoch": 0.4512820512820513, "grad_norm": 2.176215171813965, "learning_rate": 5.892446350470807e-05, "loss": 1.1327, "step": 2992 }, { "epoch": 0.45143288084464556, "grad_norm": 1.9010145664215088, "learning_rate": 5.8900793678668084e-05, "loss": 0.9891, "step": 2993 }, { "epoch": 0.4515837104072398, "grad_norm": 2.476580858230591, "learning_rate": 5.887712179246261e-05, "loss": 1.5111, "step": 2994 }, { "epoch": 0.4517345399698341, "grad_norm": 1.943680763244629, "learning_rate": 5.8853447851570696e-05, "loss": 1.0818, "step": 2995 }, { "epoch": 0.45188536953242836, "grad_norm": 1.502707839012146, "learning_rate": 5.8829771861471895e-05, "loss": 0.7963, "step": 2996 }, { "epoch": 0.45203619909502263, "grad_norm": 1.7717297077178955, "learning_rate": 5.880609382764621e-05, "loss": 0.9131, "step": 2997 }, { "epoch": 0.4521870286576169, "grad_norm": 1.621617078781128, "learning_rate": 5.878241375557413e-05, "loss": 0.6809, "step": 2998 }, { "epoch": 0.45233785822021116, "grad_norm": 1.9815871715545654, "learning_rate": 5.875873165073661e-05, "loss": 0.975, "step": 2999 }, { "epoch": 0.45248868778280543, "grad_norm": 2.1412315368652344, "learning_rate": 5.873504751861508e-05, "loss": 1.1603, "step": 3000 }, { "epoch": 0.4526395173453997, "grad_norm": 2.209851026535034, "learning_rate": 5.871136136469144e-05, "loss": 1.8435, "step": 3001 }, { "epoch": 0.45279034690799397, "grad_norm": 2.10321044921875, "learning_rate": 5.8687673194448045e-05, "loss": 1.6439, "step": 3002 }, { "epoch": 0.45294117647058824, "grad_norm": 1.768019676208496, "learning_rate": 5.866398301336775e-05, "loss": 0.9249, "step": 3003 }, { "epoch": 0.4530920060331825, "grad_norm": 2.2213475704193115, "learning_rate": 5.8640290826933805e-05, "loss": 1.3177, "step": 3004 }, { "epoch": 0.45324283559577677, "grad_norm": 2.1244289875030518, "learning_rate": 5.861659664063002e-05, "loss": 1.4062, "step": 3005 }, { "epoch": 0.45339366515837104, "grad_norm": 1.8269492387771606, "learning_rate": 5.859290045994059e-05, "loss": 0.9998, "step": 3006 }, { "epoch": 0.4535444947209653, "grad_norm": 2.0541980266571045, "learning_rate": 5.856920229035021e-05, "loss": 1.2458, "step": 3007 }, { "epoch": 0.4536953242835596, "grad_norm": 2.1866719722747803, "learning_rate": 5.854550213734403e-05, "loss": 1.588, "step": 3008 }, { "epoch": 0.45384615384615384, "grad_norm": 2.0508596897125244, "learning_rate": 5.852180000640765e-05, "loss": 1.2309, "step": 3009 }, { "epoch": 0.4539969834087481, "grad_norm": 2.032155752182007, "learning_rate": 5.8498095903027125e-05, "loss": 1.3313, "step": 3010 }, { "epoch": 0.4541478129713424, "grad_norm": 1.9068013429641724, "learning_rate": 5.847438983268898e-05, "loss": 1.0549, "step": 3011 }, { "epoch": 0.45429864253393665, "grad_norm": 1.751044511795044, "learning_rate": 5.8450681800880216e-05, "loss": 1.1172, "step": 3012 }, { "epoch": 0.4544494720965309, "grad_norm": 2.088129997253418, "learning_rate": 5.8426971813088206e-05, "loss": 1.3362, "step": 3013 }, { "epoch": 0.4546003016591252, "grad_norm": 2.213381767272949, "learning_rate": 5.8403259874800874e-05, "loss": 1.5008, "step": 3014 }, { "epoch": 0.45475113122171945, "grad_norm": 2.057218313217163, "learning_rate": 5.8379545991506535e-05, "loss": 1.4182, "step": 3015 }, { "epoch": 0.4549019607843137, "grad_norm": 1.6921429634094238, "learning_rate": 5.835583016869398e-05, "loss": 0.9318, "step": 3016 }, { "epoch": 0.455052790346908, "grad_norm": 2.1360998153686523, "learning_rate": 5.833211241185245e-05, "loss": 1.5829, "step": 3017 }, { "epoch": 0.45520361990950226, "grad_norm": 1.9580217599868774, "learning_rate": 5.830839272647161e-05, "loss": 1.3188, "step": 3018 }, { "epoch": 0.4553544494720965, "grad_norm": 1.9652215242385864, "learning_rate": 5.8284671118041565e-05, "loss": 1.2702, "step": 3019 }, { "epoch": 0.4555052790346908, "grad_norm": 1.7035040855407715, "learning_rate": 5.826094759205293e-05, "loss": 1.1332, "step": 3020 }, { "epoch": 0.45565610859728506, "grad_norm": 1.7170456647872925, "learning_rate": 5.8237222153996694e-05, "loss": 0.91, "step": 3021 }, { "epoch": 0.4558069381598793, "grad_norm": 1.9305657148361206, "learning_rate": 5.821349480936432e-05, "loss": 1.2117, "step": 3022 }, { "epoch": 0.4559577677224736, "grad_norm": 1.947059154510498, "learning_rate": 5.81897655636477e-05, "loss": 0.8891, "step": 3023 }, { "epoch": 0.45610859728506786, "grad_norm": 1.8038357496261597, "learning_rate": 5.816603442233919e-05, "loss": 1.0744, "step": 3024 }, { "epoch": 0.45625942684766213, "grad_norm": 2.284907102584839, "learning_rate": 5.814230139093155e-05, "loss": 1.669, "step": 3025 }, { "epoch": 0.4564102564102564, "grad_norm": 1.9211556911468506, "learning_rate": 5.8118566474918014e-05, "loss": 1.1593, "step": 3026 }, { "epoch": 0.45656108597285067, "grad_norm": 2.0990073680877686, "learning_rate": 5.8094829679792205e-05, "loss": 1.4439, "step": 3027 }, { "epoch": 0.45671191553544493, "grad_norm": 1.9338178634643555, "learning_rate": 5.807109101104824e-05, "loss": 1.0922, "step": 3028 }, { "epoch": 0.4568627450980392, "grad_norm": 1.451488971710205, "learning_rate": 5.80473504741806e-05, "loss": 0.727, "step": 3029 }, { "epoch": 0.45701357466063347, "grad_norm": 2.1277501583099365, "learning_rate": 5.802360807468428e-05, "loss": 1.4527, "step": 3030 }, { "epoch": 0.45716440422322774, "grad_norm": 2.0594005584716797, "learning_rate": 5.799986381805465e-05, "loss": 1.0352, "step": 3031 }, { "epoch": 0.457315233785822, "grad_norm": 2.27569317817688, "learning_rate": 5.797611770978749e-05, "loss": 1.442, "step": 3032 }, { "epoch": 0.4574660633484163, "grad_norm": 2.076523780822754, "learning_rate": 5.795236975537909e-05, "loss": 1.1894, "step": 3033 }, { "epoch": 0.45761689291101054, "grad_norm": 2.3631677627563477, "learning_rate": 5.792861996032608e-05, "loss": 1.3314, "step": 3034 }, { "epoch": 0.4577677224736048, "grad_norm": 1.9254887104034424, "learning_rate": 5.790486833012558e-05, "loss": 1.1658, "step": 3035 }, { "epoch": 0.4579185520361991, "grad_norm": 1.8009214401245117, "learning_rate": 5.788111487027511e-05, "loss": 0.9596, "step": 3036 }, { "epoch": 0.45806938159879335, "grad_norm": 2.0693392753601074, "learning_rate": 5.7857359586272595e-05, "loss": 1.1734, "step": 3037 }, { "epoch": 0.4582202111613876, "grad_norm": 2.08595609664917, "learning_rate": 5.783360248361641e-05, "loss": 1.1316, "step": 3038 }, { "epoch": 0.4583710407239819, "grad_norm": 2.144890069961548, "learning_rate": 5.7809843567805346e-05, "loss": 1.2529, "step": 3039 }, { "epoch": 0.45852187028657615, "grad_norm": 2.100008726119995, "learning_rate": 5.778608284433862e-05, "loss": 1.077, "step": 3040 }, { "epoch": 0.4586726998491704, "grad_norm": 2.4382829666137695, "learning_rate": 5.7762320318715813e-05, "loss": 1.2941, "step": 3041 }, { "epoch": 0.4588235294117647, "grad_norm": 2.4513330459594727, "learning_rate": 5.7738555996437e-05, "loss": 1.4754, "step": 3042 }, { "epoch": 0.45897435897435895, "grad_norm": 3.0466840267181396, "learning_rate": 5.771478988300265e-05, "loss": 1.2946, "step": 3043 }, { "epoch": 0.4591251885369532, "grad_norm": 2.030565023422241, "learning_rate": 5.7691021983913605e-05, "loss": 1.064, "step": 3044 }, { "epoch": 0.4592760180995475, "grad_norm": 1.7167774438858032, "learning_rate": 5.766725230467116e-05, "loss": 0.7461, "step": 3045 }, { "epoch": 0.45942684766214176, "grad_norm": 2.2877635955810547, "learning_rate": 5.764348085077702e-05, "loss": 0.791, "step": 3046 }, { "epoch": 0.459577677224736, "grad_norm": 1.7092499732971191, "learning_rate": 5.76197076277333e-05, "loss": 0.8124, "step": 3047 }, { "epoch": 0.4597285067873303, "grad_norm": 1.796270489692688, "learning_rate": 5.759593264104249e-05, "loss": 1.0399, "step": 3048 }, { "epoch": 0.45987933634992456, "grad_norm": 2.081171989440918, "learning_rate": 5.757215589620755e-05, "loss": 1.0012, "step": 3049 }, { "epoch": 0.46003016591251883, "grad_norm": 2.268544912338257, "learning_rate": 5.754837739873179e-05, "loss": 1.3584, "step": 3050 }, { "epoch": 0.4601809954751131, "grad_norm": 2.0904104709625244, "learning_rate": 5.752459715411894e-05, "loss": 1.3404, "step": 3051 }, { "epoch": 0.46033182503770737, "grad_norm": 1.8922903537750244, "learning_rate": 5.7500815167873166e-05, "loss": 1.0451, "step": 3052 }, { "epoch": 0.46048265460030163, "grad_norm": 2.101043462753296, "learning_rate": 5.747703144549901e-05, "loss": 1.5123, "step": 3053 }, { "epoch": 0.4606334841628959, "grad_norm": 1.9561312198638916, "learning_rate": 5.7453245992501414e-05, "loss": 1.0566, "step": 3054 }, { "epoch": 0.46078431372549017, "grad_norm": 1.8918583393096924, "learning_rate": 5.742945881438573e-05, "loss": 1.1433, "step": 3055 }, { "epoch": 0.46093514328808444, "grad_norm": 1.9173082113265991, "learning_rate": 5.740566991665769e-05, "loss": 1.0934, "step": 3056 }, { "epoch": 0.46108597285067876, "grad_norm": 2.101879596710205, "learning_rate": 5.738187930482345e-05, "loss": 1.2999, "step": 3057 }, { "epoch": 0.46123680241327303, "grad_norm": 2.0791397094726562, "learning_rate": 5.7358086984389556e-05, "loss": 1.217, "step": 3058 }, { "epoch": 0.4613876319758673, "grad_norm": 1.8658796548843384, "learning_rate": 5.7334292960862956e-05, "loss": 1.1091, "step": 3059 }, { "epoch": 0.46153846153846156, "grad_norm": 1.964824914932251, "learning_rate": 5.731049723975096e-05, "loss": 1.1379, "step": 3060 }, { "epoch": 0.46168929110105583, "grad_norm": 2.095642328262329, "learning_rate": 5.7286699826561305e-05, "loss": 1.3887, "step": 3061 }, { "epoch": 0.4618401206636501, "grad_norm": 2.1390275955200195, "learning_rate": 5.7262900726802106e-05, "loss": 1.3622, "step": 3062 }, { "epoch": 0.46199095022624437, "grad_norm": 1.4467761516571045, "learning_rate": 5.723909994598187e-05, "loss": 0.7757, "step": 3063 }, { "epoch": 0.46214177978883864, "grad_norm": 1.7861415147781372, "learning_rate": 5.7215297489609485e-05, "loss": 1.0384, "step": 3064 }, { "epoch": 0.4622926093514329, "grad_norm": 1.966065526008606, "learning_rate": 5.719149336319425e-05, "loss": 1.2887, "step": 3065 }, { "epoch": 0.46244343891402717, "grad_norm": 2.16975998878479, "learning_rate": 5.716768757224583e-05, "loss": 1.4579, "step": 3066 }, { "epoch": 0.46259426847662144, "grad_norm": 1.8525549173355103, "learning_rate": 5.714388012227427e-05, "loss": 1.0169, "step": 3067 }, { "epoch": 0.4627450980392157, "grad_norm": 1.8898024559020996, "learning_rate": 5.712007101879002e-05, "loss": 1.0146, "step": 3068 }, { "epoch": 0.46289592760181, "grad_norm": 2.0693061351776123, "learning_rate": 5.70962602673039e-05, "loss": 1.3931, "step": 3069 }, { "epoch": 0.46304675716440424, "grad_norm": 2.0602948665618896, "learning_rate": 5.7072447873327114e-05, "loss": 1.2456, "step": 3070 }, { "epoch": 0.4631975867269985, "grad_norm": 1.706660509109497, "learning_rate": 5.7048633842371256e-05, "loss": 1.1137, "step": 3071 }, { "epoch": 0.4633484162895928, "grad_norm": 1.6892670392990112, "learning_rate": 5.702481817994827e-05, "loss": 0.8567, "step": 3072 }, { "epoch": 0.46349924585218705, "grad_norm": 1.6112780570983887, "learning_rate": 5.700100089157052e-05, "loss": 0.8265, "step": 3073 }, { "epoch": 0.4636500754147813, "grad_norm": 1.8563029766082764, "learning_rate": 5.6977181982750694e-05, "loss": 1.1756, "step": 3074 }, { "epoch": 0.4638009049773756, "grad_norm": 1.9787973165512085, "learning_rate": 5.6953361459001905e-05, "loss": 1.1509, "step": 3075 }, { "epoch": 0.46395173453996985, "grad_norm": 1.852583646774292, "learning_rate": 5.692953932583761e-05, "loss": 1.1563, "step": 3076 }, { "epoch": 0.4641025641025641, "grad_norm": 2.1013665199279785, "learning_rate": 5.690571558877166e-05, "loss": 1.294, "step": 3077 }, { "epoch": 0.4642533936651584, "grad_norm": 2.0065133571624756, "learning_rate": 5.688189025331825e-05, "loss": 1.2604, "step": 3078 }, { "epoch": 0.46440422322775265, "grad_norm": 1.7860686779022217, "learning_rate": 5.685806332499196e-05, "loss": 1.0165, "step": 3079 }, { "epoch": 0.4645550527903469, "grad_norm": 2.2401421070098877, "learning_rate": 5.683423480930774e-05, "loss": 1.4884, "step": 3080 }, { "epoch": 0.4647058823529412, "grad_norm": 2.1118552684783936, "learning_rate": 5.6810404711780907e-05, "loss": 1.5108, "step": 3081 }, { "epoch": 0.46485671191553546, "grad_norm": 1.9597967863082886, "learning_rate": 5.678657303792715e-05, "loss": 1.0663, "step": 3082 }, { "epoch": 0.4650075414781297, "grad_norm": 1.8824141025543213, "learning_rate": 5.676273979326251e-05, "loss": 1.1414, "step": 3083 }, { "epoch": 0.465158371040724, "grad_norm": 1.9519803524017334, "learning_rate": 5.673890498330339e-05, "loss": 1.2684, "step": 3084 }, { "epoch": 0.46530920060331826, "grad_norm": 2.067005157470703, "learning_rate": 5.671506861356657e-05, "loss": 1.2159, "step": 3085 }, { "epoch": 0.46546003016591253, "grad_norm": 1.9644209146499634, "learning_rate": 5.6691230689569176e-05, "loss": 1.4499, "step": 3086 }, { "epoch": 0.4656108597285068, "grad_norm": 2.1351263523101807, "learning_rate": 5.6667391216828726e-05, "loss": 1.2919, "step": 3087 }, { "epoch": 0.46576168929110107, "grad_norm": 2.0406301021575928, "learning_rate": 5.6643550200863026e-05, "loss": 1.348, "step": 3088 }, { "epoch": 0.46591251885369533, "grad_norm": 1.9450726509094238, "learning_rate": 5.661970764719033e-05, "loss": 1.2907, "step": 3089 }, { "epoch": 0.4660633484162896, "grad_norm": 2.1070194244384766, "learning_rate": 5.659586356132918e-05, "loss": 1.1662, "step": 3090 }, { "epoch": 0.46621417797888387, "grad_norm": 2.040290594100952, "learning_rate": 5.6572017948798494e-05, "loss": 1.2381, "step": 3091 }, { "epoch": 0.46636500754147814, "grad_norm": 2.0674917697906494, "learning_rate": 5.654817081511755e-05, "loss": 1.2575, "step": 3092 }, { "epoch": 0.4665158371040724, "grad_norm": 2.0069117546081543, "learning_rate": 5.652432216580598e-05, "loss": 1.1526, "step": 3093 }, { "epoch": 0.4666666666666667, "grad_norm": 2.0675883293151855, "learning_rate": 5.6500472006383744e-05, "loss": 1.0998, "step": 3094 }, { "epoch": 0.46681749622926094, "grad_norm": 2.146172046661377, "learning_rate": 5.647662034237118e-05, "loss": 1.4476, "step": 3095 }, { "epoch": 0.4669683257918552, "grad_norm": 1.9102356433868408, "learning_rate": 5.6452767179288956e-05, "loss": 1.1289, "step": 3096 }, { "epoch": 0.4671191553544495, "grad_norm": 1.8289722204208374, "learning_rate": 5.642891252265812e-05, "loss": 1.0005, "step": 3097 }, { "epoch": 0.46726998491704375, "grad_norm": 1.33940851688385, "learning_rate": 5.640505637799998e-05, "loss": 0.5893, "step": 3098 }, { "epoch": 0.467420814479638, "grad_norm": 1.5650482177734375, "learning_rate": 5.6381198750836286e-05, "loss": 0.6135, "step": 3099 }, { "epoch": 0.4675716440422323, "grad_norm": 1.9277353286743164, "learning_rate": 5.6357339646689086e-05, "loss": 1.1308, "step": 3100 }, { "epoch": 0.46772247360482655, "grad_norm": 2.0331003665924072, "learning_rate": 5.6333479071080784e-05, "loss": 1.3932, "step": 3101 }, { "epoch": 0.4678733031674208, "grad_norm": 1.9140945672988892, "learning_rate": 5.630961702953409e-05, "loss": 1.2311, "step": 3102 }, { "epoch": 0.4680241327300151, "grad_norm": 1.9435855150222778, "learning_rate": 5.628575352757211e-05, "loss": 1.2537, "step": 3103 }, { "epoch": 0.46817496229260935, "grad_norm": 1.7208058834075928, "learning_rate": 5.626188857071822e-05, "loss": 0.9432, "step": 3104 }, { "epoch": 0.4683257918552036, "grad_norm": 1.7752649784088135, "learning_rate": 5.623802216449621e-05, "loss": 1.0899, "step": 3105 }, { "epoch": 0.4684766214177979, "grad_norm": 2.330484628677368, "learning_rate": 5.621415431443016e-05, "loss": 1.5218, "step": 3106 }, { "epoch": 0.46862745098039216, "grad_norm": 2.3208189010620117, "learning_rate": 5.6190285026044456e-05, "loss": 1.7412, "step": 3107 }, { "epoch": 0.4687782805429864, "grad_norm": 2.1189563274383545, "learning_rate": 5.616641430486388e-05, "loss": 1.3475, "step": 3108 }, { "epoch": 0.4689291101055807, "grad_norm": 2.0012354850769043, "learning_rate": 5.6142542156413505e-05, "loss": 1.3609, "step": 3109 }, { "epoch": 0.46907993966817496, "grad_norm": 1.9910681247711182, "learning_rate": 5.611866858621875e-05, "loss": 1.4148, "step": 3110 }, { "epoch": 0.46923076923076923, "grad_norm": 2.030245780944824, "learning_rate": 5.609479359980537e-05, "loss": 1.2607, "step": 3111 }, { "epoch": 0.4693815987933635, "grad_norm": 1.8505526781082153, "learning_rate": 5.607091720269942e-05, "loss": 1.116, "step": 3112 }, { "epoch": 0.46953242835595776, "grad_norm": 1.727562427520752, "learning_rate": 5.6047039400427305e-05, "loss": 0.8282, "step": 3113 }, { "epoch": 0.46968325791855203, "grad_norm": 1.937791109085083, "learning_rate": 5.602316019851574e-05, "loss": 1.2433, "step": 3114 }, { "epoch": 0.4698340874811463, "grad_norm": 2.017515182495117, "learning_rate": 5.5999279602491784e-05, "loss": 1.4606, "step": 3115 }, { "epoch": 0.46998491704374057, "grad_norm": 1.6565909385681152, "learning_rate": 5.59753976178828e-05, "loss": 1.0141, "step": 3116 }, { "epoch": 0.47013574660633484, "grad_norm": 1.8242199420928955, "learning_rate": 5.595151425021647e-05, "loss": 1.0261, "step": 3117 }, { "epoch": 0.4702865761689291, "grad_norm": 1.504959225654602, "learning_rate": 5.5927629505020825e-05, "loss": 0.721, "step": 3118 }, { "epoch": 0.4704374057315234, "grad_norm": 1.837146520614624, "learning_rate": 5.59037433878242e-05, "loss": 1.1908, "step": 3119 }, { "epoch": 0.47058823529411764, "grad_norm": 1.854205846786499, "learning_rate": 5.5879855904155233e-05, "loss": 1.1194, "step": 3120 }, { "epoch": 0.4707390648567119, "grad_norm": 1.7920652627944946, "learning_rate": 5.585596705954287e-05, "loss": 0.9521, "step": 3121 }, { "epoch": 0.4708898944193062, "grad_norm": 1.9816592931747437, "learning_rate": 5.5832076859516415e-05, "loss": 1.1142, "step": 3122 }, { "epoch": 0.47104072398190044, "grad_norm": 1.6433911323547363, "learning_rate": 5.580818530960544e-05, "loss": 0.9701, "step": 3123 }, { "epoch": 0.4711915535444947, "grad_norm": 1.9043782949447632, "learning_rate": 5.578429241533987e-05, "loss": 1.2066, "step": 3124 }, { "epoch": 0.471342383107089, "grad_norm": 1.9815117120742798, "learning_rate": 5.576039818224993e-05, "loss": 1.1446, "step": 3125 }, { "epoch": 0.47149321266968325, "grad_norm": 1.929847002029419, "learning_rate": 5.5736502615866105e-05, "loss": 1.0851, "step": 3126 }, { "epoch": 0.4716440422322775, "grad_norm": 1.9358450174331665, "learning_rate": 5.571260572171927e-05, "loss": 1.0174, "step": 3127 }, { "epoch": 0.4717948717948718, "grad_norm": 1.7563176155090332, "learning_rate": 5.568870750534054e-05, "loss": 1.048, "step": 3128 }, { "epoch": 0.47194570135746605, "grad_norm": 2.029773473739624, "learning_rate": 5.566480797226139e-05, "loss": 1.2483, "step": 3129 }, { "epoch": 0.4720965309200603, "grad_norm": 1.9039345979690552, "learning_rate": 5.564090712801355e-05, "loss": 1.1726, "step": 3130 }, { "epoch": 0.4722473604826546, "grad_norm": 1.7702053785324097, "learning_rate": 5.561700497812908e-05, "loss": 0.9183, "step": 3131 }, { "epoch": 0.47239819004524886, "grad_norm": 1.863633632659912, "learning_rate": 5.559310152814035e-05, "loss": 1.3647, "step": 3132 }, { "epoch": 0.4725490196078431, "grad_norm": 1.9658821821212769, "learning_rate": 5.5569196783580014e-05, "loss": 1.252, "step": 3133 }, { "epoch": 0.4726998491704374, "grad_norm": 2.1547935009002686, "learning_rate": 5.554529074998104e-05, "loss": 1.2255, "step": 3134 }, { "epoch": 0.47285067873303166, "grad_norm": 1.8863904476165771, "learning_rate": 5.5521383432876676e-05, "loss": 0.9555, "step": 3135 }, { "epoch": 0.4730015082956259, "grad_norm": 1.8325077295303345, "learning_rate": 5.5497474837800455e-05, "loss": 1.1775, "step": 3136 }, { "epoch": 0.4731523378582202, "grad_norm": 1.8762943744659424, "learning_rate": 5.5473564970286275e-05, "loss": 1.1132, "step": 3137 }, { "epoch": 0.47330316742081446, "grad_norm": 1.8356549739837646, "learning_rate": 5.544965383586824e-05, "loss": 1.1182, "step": 3138 }, { "epoch": 0.47345399698340873, "grad_norm": 2.0654070377349854, "learning_rate": 5.542574144008081e-05, "loss": 1.2982, "step": 3139 }, { "epoch": 0.473604826546003, "grad_norm": 2.1852126121520996, "learning_rate": 5.540182778845872e-05, "loss": 1.3542, "step": 3140 }, { "epoch": 0.47375565610859727, "grad_norm": 2.1818692684173584, "learning_rate": 5.537791288653696e-05, "loss": 1.1052, "step": 3141 }, { "epoch": 0.47390648567119154, "grad_norm": 1.9194327592849731, "learning_rate": 5.535399673985089e-05, "loss": 0.9731, "step": 3142 }, { "epoch": 0.4740573152337858, "grad_norm": 2.2810544967651367, "learning_rate": 5.533007935393607e-05, "loss": 1.2249, "step": 3143 }, { "epoch": 0.47420814479638007, "grad_norm": 1.6781193017959595, "learning_rate": 5.5306160734328396e-05, "loss": 0.8843, "step": 3144 }, { "epoch": 0.47435897435897434, "grad_norm": 1.86876380443573, "learning_rate": 5.528224088656403e-05, "loss": 0.9202, "step": 3145 }, { "epoch": 0.4745098039215686, "grad_norm": 1.8685144186019897, "learning_rate": 5.525831981617944e-05, "loss": 1.0841, "step": 3146 }, { "epoch": 0.4746606334841629, "grad_norm": 1.9164763689041138, "learning_rate": 5.523439752871136e-05, "loss": 0.9059, "step": 3147 }, { "epoch": 0.47481146304675714, "grad_norm": 1.8726049661636353, "learning_rate": 5.521047402969681e-05, "loss": 0.966, "step": 3148 }, { "epoch": 0.4749622926093514, "grad_norm": 2.0713486671447754, "learning_rate": 5.51865493246731e-05, "loss": 1.0099, "step": 3149 }, { "epoch": 0.4751131221719457, "grad_norm": 1.9347867965698242, "learning_rate": 5.5162623419177783e-05, "loss": 0.8152, "step": 3150 }, { "epoch": 0.47526395173453995, "grad_norm": 2.175199270248413, "learning_rate": 5.513869631874874e-05, "loss": 1.6227, "step": 3151 }, { "epoch": 0.4754147812971342, "grad_norm": 2.113898277282715, "learning_rate": 5.511476802892409e-05, "loss": 1.3886, "step": 3152 }, { "epoch": 0.4755656108597285, "grad_norm": 2.0272274017333984, "learning_rate": 5.509083855524226e-05, "loss": 1.0776, "step": 3153 }, { "epoch": 0.47571644042232275, "grad_norm": 2.4371376037597656, "learning_rate": 5.506690790324192e-05, "loss": 1.6139, "step": 3154 }, { "epoch": 0.475867269984917, "grad_norm": 1.6315704584121704, "learning_rate": 5.5042976078462026e-05, "loss": 0.9458, "step": 3155 }, { "epoch": 0.4760180995475113, "grad_norm": 1.8564924001693726, "learning_rate": 5.50190430864418e-05, "loss": 1.1599, "step": 3156 }, { "epoch": 0.47616892911010555, "grad_norm": 1.955262303352356, "learning_rate": 5.499510893272077e-05, "loss": 1.11, "step": 3157 }, { "epoch": 0.4763197586726998, "grad_norm": 2.205500364303589, "learning_rate": 5.497117362283867e-05, "loss": 1.327, "step": 3158 }, { "epoch": 0.4764705882352941, "grad_norm": 1.936598777770996, "learning_rate": 5.494723716233553e-05, "loss": 0.9361, "step": 3159 }, { "epoch": 0.4766214177978884, "grad_norm": 1.8889321088790894, "learning_rate": 5.4923299556751673e-05, "loss": 1.0346, "step": 3160 }, { "epoch": 0.4767722473604827, "grad_norm": 1.805065393447876, "learning_rate": 5.489936081162766e-05, "loss": 0.8308, "step": 3161 }, { "epoch": 0.47692307692307695, "grad_norm": 1.6282885074615479, "learning_rate": 5.4875420932504306e-05, "loss": 0.9199, "step": 3162 }, { "epoch": 0.4770739064856712, "grad_norm": 1.716908574104309, "learning_rate": 5.485147992492271e-05, "loss": 0.9806, "step": 3163 }, { "epoch": 0.4772247360482655, "grad_norm": 1.721126675605774, "learning_rate": 5.4827537794424244e-05, "loss": 0.9811, "step": 3164 }, { "epoch": 0.47737556561085975, "grad_norm": 2.0741562843322754, "learning_rate": 5.48035945465505e-05, "loss": 1.2689, "step": 3165 }, { "epoch": 0.477526395173454, "grad_norm": 1.9985960721969604, "learning_rate": 5.4779650186843335e-05, "loss": 1.444, "step": 3166 }, { "epoch": 0.4776772247360483, "grad_norm": 1.9682432413101196, "learning_rate": 5.475570472084491e-05, "loss": 1.3247, "step": 3167 }, { "epoch": 0.47782805429864256, "grad_norm": 2.3881618976593018, "learning_rate": 5.473175815409759e-05, "loss": 1.4192, "step": 3168 }, { "epoch": 0.4779788838612368, "grad_norm": 2.1143503189086914, "learning_rate": 5.470781049214402e-05, "loss": 1.2449, "step": 3169 }, { "epoch": 0.4781297134238311, "grad_norm": 1.9898343086242676, "learning_rate": 5.4683861740527085e-05, "loss": 1.0175, "step": 3170 }, { "epoch": 0.47828054298642536, "grad_norm": 1.548792839050293, "learning_rate": 5.465991190478996e-05, "loss": 0.8106, "step": 3171 }, { "epoch": 0.47843137254901963, "grad_norm": 1.9238406419754028, "learning_rate": 5.463596099047601e-05, "loss": 1.1201, "step": 3172 }, { "epoch": 0.4785822021116139, "grad_norm": 1.9281147718429565, "learning_rate": 5.461200900312888e-05, "loss": 1.2845, "step": 3173 }, { "epoch": 0.47873303167420816, "grad_norm": 1.9917998313903809, "learning_rate": 5.458805594829248e-05, "loss": 1.2271, "step": 3174 }, { "epoch": 0.47888386123680243, "grad_norm": 2.0988333225250244, "learning_rate": 5.456410183151094e-05, "loss": 1.1434, "step": 3175 }, { "epoch": 0.4790346907993967, "grad_norm": 2.3428614139556885, "learning_rate": 5.454014665832866e-05, "loss": 1.7689, "step": 3176 }, { "epoch": 0.47918552036199097, "grad_norm": 1.8909062147140503, "learning_rate": 5.451619043429026e-05, "loss": 1.2339, "step": 3177 }, { "epoch": 0.47933634992458524, "grad_norm": 2.1328070163726807, "learning_rate": 5.449223316494062e-05, "loss": 1.3083, "step": 3178 }, { "epoch": 0.4794871794871795, "grad_norm": 2.132441282272339, "learning_rate": 5.4468274855824855e-05, "loss": 1.5014, "step": 3179 }, { "epoch": 0.4796380090497738, "grad_norm": 1.8037899732589722, "learning_rate": 5.444431551248832e-05, "loss": 1.0036, "step": 3180 }, { "epoch": 0.47978883861236804, "grad_norm": 1.9464470148086548, "learning_rate": 5.442035514047662e-05, "loss": 1.2159, "step": 3181 }, { "epoch": 0.4799396681749623, "grad_norm": 2.0315113067626953, "learning_rate": 5.439639374533557e-05, "loss": 1.3364, "step": 3182 }, { "epoch": 0.4800904977375566, "grad_norm": 1.8734036684036255, "learning_rate": 5.4372431332611264e-05, "loss": 1.2234, "step": 3183 }, { "epoch": 0.48024132730015084, "grad_norm": 1.942132592201233, "learning_rate": 5.434846790785e-05, "loss": 1.1752, "step": 3184 }, { "epoch": 0.4803921568627451, "grad_norm": 1.982226014137268, "learning_rate": 5.4324503476598334e-05, "loss": 0.8192, "step": 3185 }, { "epoch": 0.4805429864253394, "grad_norm": 2.27180814743042, "learning_rate": 5.4300538044403e-05, "loss": 1.3185, "step": 3186 }, { "epoch": 0.48069381598793365, "grad_norm": 2.32281231880188, "learning_rate": 5.4276571616811054e-05, "loss": 1.4051, "step": 3187 }, { "epoch": 0.4808446455505279, "grad_norm": 1.802240252494812, "learning_rate": 5.4252604199369696e-05, "loss": 1.0089, "step": 3188 }, { "epoch": 0.4809954751131222, "grad_norm": 2.1337621212005615, "learning_rate": 5.42286357976264e-05, "loss": 1.211, "step": 3189 }, { "epoch": 0.48114630467571645, "grad_norm": 2.070204973220825, "learning_rate": 5.420466641712887e-05, "loss": 1.276, "step": 3190 }, { "epoch": 0.4812971342383107, "grad_norm": 2.0097341537475586, "learning_rate": 5.418069606342502e-05, "loss": 1.0855, "step": 3191 }, { "epoch": 0.481447963800905, "grad_norm": 2.0808277130126953, "learning_rate": 5.4156724742063e-05, "loss": 1.2693, "step": 3192 }, { "epoch": 0.48159879336349926, "grad_norm": 2.160407781600952, "learning_rate": 5.4132752458591166e-05, "loss": 1.22, "step": 3193 }, { "epoch": 0.4817496229260935, "grad_norm": 2.1191699504852295, "learning_rate": 5.4108779218558124e-05, "loss": 1.2511, "step": 3194 }, { "epoch": 0.4819004524886878, "grad_norm": 2.1446127891540527, "learning_rate": 5.408480502751269e-05, "loss": 1.1739, "step": 3195 }, { "epoch": 0.48205128205128206, "grad_norm": 1.9813650846481323, "learning_rate": 5.406082989100387e-05, "loss": 1.0183, "step": 3196 }, { "epoch": 0.4822021116138763, "grad_norm": 1.6664319038391113, "learning_rate": 5.403685381458094e-05, "loss": 0.82, "step": 3197 }, { "epoch": 0.4823529411764706, "grad_norm": 1.6570916175842285, "learning_rate": 5.401287680379338e-05, "loss": 0.8498, "step": 3198 }, { "epoch": 0.48250377073906486, "grad_norm": 1.6332212686538696, "learning_rate": 5.398889886419085e-05, "loss": 0.8251, "step": 3199 }, { "epoch": 0.48265460030165913, "grad_norm": 1.952623724937439, "learning_rate": 5.396492000132326e-05, "loss": 1.0799, "step": 3200 }, { "epoch": 0.4828054298642534, "grad_norm": 1.9904412031173706, "learning_rate": 5.394094022074072e-05, "loss": 1.2079, "step": 3201 }, { "epoch": 0.48295625942684767, "grad_norm": 1.948791265487671, "learning_rate": 5.391695952799357e-05, "loss": 1.38, "step": 3202 }, { "epoch": 0.48310708898944194, "grad_norm": 1.7585110664367676, "learning_rate": 5.389297792863235e-05, "loss": 1.2728, "step": 3203 }, { "epoch": 0.4832579185520362, "grad_norm": 1.8920320272445679, "learning_rate": 5.386899542820779e-05, "loss": 1.1272, "step": 3204 }, { "epoch": 0.48340874811463047, "grad_norm": 2.2437527179718018, "learning_rate": 5.384501203227086e-05, "loss": 1.4841, "step": 3205 }, { "epoch": 0.48355957767722474, "grad_norm": 2.0041534900665283, "learning_rate": 5.382102774637272e-05, "loss": 1.1243, "step": 3206 }, { "epoch": 0.483710407239819, "grad_norm": 2.1837105751037598, "learning_rate": 5.379704257606474e-05, "loss": 1.6038, "step": 3207 }, { "epoch": 0.4838612368024133, "grad_norm": 2.222090244293213, "learning_rate": 5.377305652689849e-05, "loss": 1.482, "step": 3208 }, { "epoch": 0.48401206636500754, "grad_norm": 1.9280670881271362, "learning_rate": 5.374906960442576e-05, "loss": 0.9872, "step": 3209 }, { "epoch": 0.4841628959276018, "grad_norm": 2.005507469177246, "learning_rate": 5.372508181419852e-05, "loss": 1.394, "step": 3210 }, { "epoch": 0.4843137254901961, "grad_norm": 1.9137048721313477, "learning_rate": 5.3701093161768955e-05, "loss": 1.1036, "step": 3211 }, { "epoch": 0.48446455505279035, "grad_norm": 2.0502278804779053, "learning_rate": 5.367710365268944e-05, "loss": 1.0504, "step": 3212 }, { "epoch": 0.4846153846153846, "grad_norm": 2.0895094871520996, "learning_rate": 5.365311329251256e-05, "loss": 1.3877, "step": 3213 }, { "epoch": 0.4847662141779789, "grad_norm": 1.7411394119262695, "learning_rate": 5.3629122086791093e-05, "loss": 0.8608, "step": 3214 }, { "epoch": 0.48491704374057315, "grad_norm": 1.7803622484207153, "learning_rate": 5.3605130041078e-05, "loss": 0.9758, "step": 3215 }, { "epoch": 0.4850678733031674, "grad_norm": 2.219048023223877, "learning_rate": 5.3581137160926464e-05, "loss": 1.4368, "step": 3216 }, { "epoch": 0.4852187028657617, "grad_norm": 1.721985936164856, "learning_rate": 5.355714345188982e-05, "loss": 1.0022, "step": 3217 }, { "epoch": 0.48536953242835595, "grad_norm": 2.159724473953247, "learning_rate": 5.3533148919521656e-05, "loss": 1.4267, "step": 3218 }, { "epoch": 0.4855203619909502, "grad_norm": 1.7123401165008545, "learning_rate": 5.3509153569375666e-05, "loss": 0.856, "step": 3219 }, { "epoch": 0.4856711915535445, "grad_norm": 2.167013645172119, "learning_rate": 5.3485157407005826e-05, "loss": 1.3367, "step": 3220 }, { "epoch": 0.48582202111613876, "grad_norm": 1.8615204095840454, "learning_rate": 5.346116043796622e-05, "loss": 1.2496, "step": 3221 }, { "epoch": 0.485972850678733, "grad_norm": 2.0863802433013916, "learning_rate": 5.343716266781117e-05, "loss": 1.3225, "step": 3222 }, { "epoch": 0.4861236802413273, "grad_norm": 1.9512511491775513, "learning_rate": 5.341316410209518e-05, "loss": 1.2492, "step": 3223 }, { "epoch": 0.48627450980392156, "grad_norm": 2.2378532886505127, "learning_rate": 5.338916474637289e-05, "loss": 1.2012, "step": 3224 }, { "epoch": 0.48642533936651583, "grad_norm": 2.255401611328125, "learning_rate": 5.336516460619918e-05, "loss": 1.3413, "step": 3225 }, { "epoch": 0.4865761689291101, "grad_norm": 1.76991868019104, "learning_rate": 5.3341163687129094e-05, "loss": 0.8279, "step": 3226 }, { "epoch": 0.48672699849170437, "grad_norm": 2.087656021118164, "learning_rate": 5.331716199471784e-05, "loss": 1.1274, "step": 3227 }, { "epoch": 0.48687782805429863, "grad_norm": 2.0421957969665527, "learning_rate": 5.3293159534520834e-05, "loss": 1.2424, "step": 3228 }, { "epoch": 0.4870286576168929, "grad_norm": 2.1989026069641113, "learning_rate": 5.326915631209364e-05, "loss": 1.4907, "step": 3229 }, { "epoch": 0.48717948717948717, "grad_norm": 2.1662380695343018, "learning_rate": 5.324515233299199e-05, "loss": 1.2857, "step": 3230 }, { "epoch": 0.48733031674208144, "grad_norm": 2.0114245414733887, "learning_rate": 5.322114760277185e-05, "loss": 1.3249, "step": 3231 }, { "epoch": 0.4874811463046757, "grad_norm": 2.0627007484436035, "learning_rate": 5.319714212698931e-05, "loss": 1.2073, "step": 3232 }, { "epoch": 0.48763197586727, "grad_norm": 2.0062766075134277, "learning_rate": 5.317313591120062e-05, "loss": 1.5445, "step": 3233 }, { "epoch": 0.48778280542986424, "grad_norm": 1.6910678148269653, "learning_rate": 5.314912896096226e-05, "loss": 0.8502, "step": 3234 }, { "epoch": 0.4879336349924585, "grad_norm": 2.107973575592041, "learning_rate": 5.312512128183083e-05, "loss": 1.3068, "step": 3235 }, { "epoch": 0.4880844645550528, "grad_norm": 1.8708592653274536, "learning_rate": 5.3101112879363116e-05, "loss": 1.1266, "step": 3236 }, { "epoch": 0.48823529411764705, "grad_norm": 1.9603568315505981, "learning_rate": 5.307710375911605e-05, "loss": 1.2288, "step": 3237 }, { "epoch": 0.4883861236802413, "grad_norm": 1.8326711654663086, "learning_rate": 5.305309392664679e-05, "loss": 1.0653, "step": 3238 }, { "epoch": 0.4885369532428356, "grad_norm": 1.7489628791809082, "learning_rate": 5.302908338751259e-05, "loss": 0.8972, "step": 3239 }, { "epoch": 0.48868778280542985, "grad_norm": 1.902839183807373, "learning_rate": 5.300507214727092e-05, "loss": 1.2279, "step": 3240 }, { "epoch": 0.4888386123680241, "grad_norm": 2.131943464279175, "learning_rate": 5.2981060211479364e-05, "loss": 1.1052, "step": 3241 }, { "epoch": 0.4889894419306184, "grad_norm": 2.18989896774292, "learning_rate": 5.295704758569571e-05, "loss": 1.0891, "step": 3242 }, { "epoch": 0.48914027149321265, "grad_norm": 2.0796988010406494, "learning_rate": 5.2933034275477865e-05, "loss": 1.1235, "step": 3243 }, { "epoch": 0.4892911010558069, "grad_norm": 1.7939469814300537, "learning_rate": 5.290902028638394e-05, "loss": 0.8535, "step": 3244 }, { "epoch": 0.4894419306184012, "grad_norm": 2.0148701667785645, "learning_rate": 5.288500562397216e-05, "loss": 0.9746, "step": 3245 }, { "epoch": 0.48959276018099546, "grad_norm": 2.0896146297454834, "learning_rate": 5.286099029380095e-05, "loss": 1.129, "step": 3246 }, { "epoch": 0.4897435897435897, "grad_norm": 1.9987130165100098, "learning_rate": 5.283697430142884e-05, "loss": 1.057, "step": 3247 }, { "epoch": 0.489894419306184, "grad_norm": 1.8667711019515991, "learning_rate": 5.281295765241455e-05, "loss": 0.8366, "step": 3248 }, { "epoch": 0.49004524886877826, "grad_norm": 1.6956944465637207, "learning_rate": 5.278894035231695e-05, "loss": 0.8294, "step": 3249 }, { "epoch": 0.49019607843137253, "grad_norm": 1.6423479318618774, "learning_rate": 5.276492240669504e-05, "loss": 0.6935, "step": 3250 }, { "epoch": 0.4903469079939668, "grad_norm": 1.9833804368972778, "learning_rate": 5.274090382110799e-05, "loss": 1.4334, "step": 3251 }, { "epoch": 0.49049773755656106, "grad_norm": 1.6972874402999878, "learning_rate": 5.271688460111509e-05, "loss": 0.9098, "step": 3252 }, { "epoch": 0.49064856711915533, "grad_norm": 1.8692502975463867, "learning_rate": 5.269286475227582e-05, "loss": 1.1162, "step": 3253 }, { "epoch": 0.4907993966817496, "grad_norm": 1.9171364307403564, "learning_rate": 5.266884428014976e-05, "loss": 1.3272, "step": 3254 }, { "epoch": 0.49095022624434387, "grad_norm": 1.9148989915847778, "learning_rate": 5.2644823190296655e-05, "loss": 1.0768, "step": 3255 }, { "epoch": 0.49110105580693814, "grad_norm": 2.0859313011169434, "learning_rate": 5.262080148827643e-05, "loss": 1.5826, "step": 3256 }, { "epoch": 0.4912518853695324, "grad_norm": 1.8426395654678345, "learning_rate": 5.259677917964907e-05, "loss": 1.2405, "step": 3257 }, { "epoch": 0.49140271493212667, "grad_norm": 2.358083724975586, "learning_rate": 5.257275626997476e-05, "loss": 1.5459, "step": 3258 }, { "epoch": 0.49155354449472094, "grad_norm": 1.6924525499343872, "learning_rate": 5.2548732764813825e-05, "loss": 1.0514, "step": 3259 }, { "epoch": 0.4917043740573152, "grad_norm": 1.7259705066680908, "learning_rate": 5.252470866972669e-05, "loss": 0.9357, "step": 3260 }, { "epoch": 0.4918552036199095, "grad_norm": 2.4524753093719482, "learning_rate": 5.250068399027396e-05, "loss": 1.5539, "step": 3261 }, { "epoch": 0.49200603318250374, "grad_norm": 1.8356348276138306, "learning_rate": 5.247665873201634e-05, "loss": 0.999, "step": 3262 }, { "epoch": 0.492156862745098, "grad_norm": 2.047808885574341, "learning_rate": 5.2452632900514676e-05, "loss": 1.1674, "step": 3263 }, { "epoch": 0.49230769230769234, "grad_norm": 2.135498285293579, "learning_rate": 5.242860650132998e-05, "loss": 1.2007, "step": 3264 }, { "epoch": 0.4924585218702866, "grad_norm": 2.0121548175811768, "learning_rate": 5.240457954002337e-05, "loss": 1.1568, "step": 3265 }, { "epoch": 0.49260935143288087, "grad_norm": 1.9750007390975952, "learning_rate": 5.238055202215606e-05, "loss": 0.9903, "step": 3266 }, { "epoch": 0.49276018099547514, "grad_norm": 1.7739484310150146, "learning_rate": 5.235652395328944e-05, "loss": 0.8376, "step": 3267 }, { "epoch": 0.4929110105580694, "grad_norm": 2.27482008934021, "learning_rate": 5.233249533898502e-05, "loss": 1.2978, "step": 3268 }, { "epoch": 0.4930618401206637, "grad_norm": 2.0342624187469482, "learning_rate": 5.230846618480444e-05, "loss": 1.1069, "step": 3269 }, { "epoch": 0.49321266968325794, "grad_norm": 1.85487961769104, "learning_rate": 5.228443649630945e-05, "loss": 1.025, "step": 3270 }, { "epoch": 0.4933634992458522, "grad_norm": 2.009345769882202, "learning_rate": 5.2260406279061904e-05, "loss": 1.0932, "step": 3271 }, { "epoch": 0.4935143288084465, "grad_norm": 2.140887975692749, "learning_rate": 5.2236375538623825e-05, "loss": 1.1107, "step": 3272 }, { "epoch": 0.49366515837104075, "grad_norm": 2.3295538425445557, "learning_rate": 5.2212344280557345e-05, "loss": 1.4015, "step": 3273 }, { "epoch": 0.493815987933635, "grad_norm": 1.9145679473876953, "learning_rate": 5.21883125104247e-05, "loss": 0.9521, "step": 3274 }, { "epoch": 0.4939668174962293, "grad_norm": 2.0674242973327637, "learning_rate": 5.216428023378822e-05, "loss": 1.3592, "step": 3275 }, { "epoch": 0.49411764705882355, "grad_norm": 2.340428352355957, "learning_rate": 5.214024745621041e-05, "loss": 1.4874, "step": 3276 }, { "epoch": 0.4942684766214178, "grad_norm": 2.076951742172241, "learning_rate": 5.211621418325386e-05, "loss": 1.1914, "step": 3277 }, { "epoch": 0.4944193061840121, "grad_norm": 2.0274250507354736, "learning_rate": 5.20921804204813e-05, "loss": 1.1586, "step": 3278 }, { "epoch": 0.49457013574660635, "grad_norm": 1.8812642097473145, "learning_rate": 5.206814617345551e-05, "loss": 0.9376, "step": 3279 }, { "epoch": 0.4947209653092006, "grad_norm": 1.9721226692199707, "learning_rate": 5.204411144773945e-05, "loss": 0.9959, "step": 3280 }, { "epoch": 0.4948717948717949, "grad_norm": 1.9183399677276611, "learning_rate": 5.2020076248896154e-05, "loss": 0.9551, "step": 3281 }, { "epoch": 0.49502262443438916, "grad_norm": 1.899163842201233, "learning_rate": 5.1996040582488794e-05, "loss": 0.9645, "step": 3282 }, { "epoch": 0.4951734539969834, "grad_norm": 2.2612433433532715, "learning_rate": 5.19720044540806e-05, "loss": 1.2011, "step": 3283 }, { "epoch": 0.4953242835595777, "grad_norm": 2.1064417362213135, "learning_rate": 5.194796786923497e-05, "loss": 0.8929, "step": 3284 }, { "epoch": 0.49547511312217196, "grad_norm": 1.9224557876586914, "learning_rate": 5.192393083351537e-05, "loss": 1.1624, "step": 3285 }, { "epoch": 0.49562594268476623, "grad_norm": 2.198005199432373, "learning_rate": 5.189989335248538e-05, "loss": 1.2245, "step": 3286 }, { "epoch": 0.4957767722473605, "grad_norm": 1.8646551370620728, "learning_rate": 5.187585543170868e-05, "loss": 1.179, "step": 3287 }, { "epoch": 0.49592760180995477, "grad_norm": 2.075382947921753, "learning_rate": 5.185181707674906e-05, "loss": 1.0543, "step": 3288 }, { "epoch": 0.49607843137254903, "grad_norm": 1.5087312459945679, "learning_rate": 5.1827778293170394e-05, "loss": 0.7278, "step": 3289 }, { "epoch": 0.4962292609351433, "grad_norm": 2.385061502456665, "learning_rate": 5.1803739086536674e-05, "loss": 1.593, "step": 3290 }, { "epoch": 0.49638009049773757, "grad_norm": 1.991692304611206, "learning_rate": 5.177969946241198e-05, "loss": 0.953, "step": 3291 }, { "epoch": 0.49653092006033184, "grad_norm": 2.124134063720703, "learning_rate": 5.175565942636048e-05, "loss": 1.0831, "step": 3292 }, { "epoch": 0.4966817496229261, "grad_norm": 2.060319185256958, "learning_rate": 5.1731618983946486e-05, "loss": 1.1107, "step": 3293 }, { "epoch": 0.4968325791855204, "grad_norm": 2.58099102973938, "learning_rate": 5.170757814073431e-05, "loss": 1.5246, "step": 3294 }, { "epoch": 0.49698340874811464, "grad_norm": 1.915647029876709, "learning_rate": 5.1683536902288456e-05, "loss": 0.9418, "step": 3295 }, { "epoch": 0.4971342383107089, "grad_norm": 1.6240559816360474, "learning_rate": 5.1659495274173454e-05, "loss": 0.645, "step": 3296 }, { "epoch": 0.4972850678733032, "grad_norm": 1.8489500284194946, "learning_rate": 5.1635453261953947e-05, "loss": 1.0722, "step": 3297 }, { "epoch": 0.49743589743589745, "grad_norm": 1.8843498229980469, "learning_rate": 5.161141087119469e-05, "loss": 1.0002, "step": 3298 }, { "epoch": 0.4975867269984917, "grad_norm": 1.7522529363632202, "learning_rate": 5.158736810746047e-05, "loss": 0.9804, "step": 3299 }, { "epoch": 0.497737556561086, "grad_norm": 2.0942931175231934, "learning_rate": 5.156332497631621e-05, "loss": 1.1494, "step": 3300 }, { "epoch": 0.49788838612368025, "grad_norm": 2.0003907680511475, "learning_rate": 5.1539281483326884e-05, "loss": 1.3936, "step": 3301 }, { "epoch": 0.4980392156862745, "grad_norm": 1.6228581666946411, "learning_rate": 5.15152376340576e-05, "loss": 1.0931, "step": 3302 }, { "epoch": 0.4981900452488688, "grad_norm": 1.802562952041626, "learning_rate": 5.149119343407347e-05, "loss": 1.0585, "step": 3303 }, { "epoch": 0.49834087481146305, "grad_norm": 2.2002322673797607, "learning_rate": 5.1467148888939765e-05, "loss": 1.411, "step": 3304 }, { "epoch": 0.4984917043740573, "grad_norm": 1.9289953708648682, "learning_rate": 5.144310400422179e-05, "loss": 1.2978, "step": 3305 }, { "epoch": 0.4986425339366516, "grad_norm": 2.947585344314575, "learning_rate": 5.141905878548494e-05, "loss": 1.0798, "step": 3306 }, { "epoch": 0.49879336349924586, "grad_norm": 1.881165623664856, "learning_rate": 5.139501323829469e-05, "loss": 1.2317, "step": 3307 }, { "epoch": 0.4989441930618401, "grad_norm": 1.7283804416656494, "learning_rate": 5.137096736821658e-05, "loss": 1.0215, "step": 3308 }, { "epoch": 0.4990950226244344, "grad_norm": 2.074725389480591, "learning_rate": 5.134692118081626e-05, "loss": 1.2789, "step": 3309 }, { "epoch": 0.49924585218702866, "grad_norm": 1.9955615997314453, "learning_rate": 5.1322874681659405e-05, "loss": 1.3223, "step": 3310 }, { "epoch": 0.49939668174962293, "grad_norm": 1.6074857711791992, "learning_rate": 5.12988278763118e-05, "loss": 0.8012, "step": 3311 }, { "epoch": 0.4995475113122172, "grad_norm": 2.0709049701690674, "learning_rate": 5.127478077033927e-05, "loss": 1.4475, "step": 3312 }, { "epoch": 0.49969834087481146, "grad_norm": 1.9154144525527954, "learning_rate": 5.1250733369307724e-05, "loss": 1.1395, "step": 3313 }, { "epoch": 0.49984917043740573, "grad_norm": 1.9365462064743042, "learning_rate": 5.122668567878316e-05, "loss": 1.068, "step": 3314 }, { "epoch": 0.5, "grad_norm": 2.2784550189971924, "learning_rate": 5.1202637704331604e-05, "loss": 1.4479, "step": 3315 }, { "epoch": 0.5001508295625943, "grad_norm": 2.016960859298706, "learning_rate": 5.117858945151919e-05, "loss": 1.0877, "step": 3316 }, { "epoch": 0.5003016591251885, "grad_norm": 1.9028633832931519, "learning_rate": 5.1154540925912066e-05, "loss": 1.0093, "step": 3317 }, { "epoch": 0.5004524886877828, "grad_norm": 2.042022705078125, "learning_rate": 5.113049213307648e-05, "loss": 1.2336, "step": 3318 }, { "epoch": 0.5006033182503771, "grad_norm": 1.8977651596069336, "learning_rate": 5.110644307857874e-05, "loss": 1.2126, "step": 3319 }, { "epoch": 0.5007541478129713, "grad_norm": 1.948185920715332, "learning_rate": 5.10823937679852e-05, "loss": 0.9868, "step": 3320 }, { "epoch": 0.5009049773755656, "grad_norm": 2.02095890045166, "learning_rate": 5.10583442068623e-05, "loss": 1.3541, "step": 3321 }, { "epoch": 0.5010558069381599, "grad_norm": 1.8055042028427124, "learning_rate": 5.103429440077648e-05, "loss": 1.0843, "step": 3322 }, { "epoch": 0.5012066365007541, "grad_norm": 2.141511917114258, "learning_rate": 5.1010244355294314e-05, "loss": 1.1954, "step": 3323 }, { "epoch": 0.5013574660633484, "grad_norm": 2.0156402587890625, "learning_rate": 5.0986194075982366e-05, "loss": 1.0272, "step": 3324 }, { "epoch": 0.5015082956259427, "grad_norm": 1.83160400390625, "learning_rate": 5.096214356840728e-05, "loss": 1.1777, "step": 3325 }, { "epoch": 0.501659125188537, "grad_norm": 1.608211636543274, "learning_rate": 5.0938092838135785e-05, "loss": 0.9139, "step": 3326 }, { "epoch": 0.5018099547511312, "grad_norm": 1.9318221807479858, "learning_rate": 5.0914041890734595e-05, "loss": 1.2743, "step": 3327 }, { "epoch": 0.5019607843137255, "grad_norm": 1.966196894645691, "learning_rate": 5.088999073177053e-05, "loss": 1.0803, "step": 3328 }, { "epoch": 0.5021116138763198, "grad_norm": 1.722032904624939, "learning_rate": 5.0865939366810434e-05, "loss": 0.9555, "step": 3329 }, { "epoch": 0.502262443438914, "grad_norm": 1.821544885635376, "learning_rate": 5.084188780142118e-05, "loss": 0.9775, "step": 3330 }, { "epoch": 0.5024132730015083, "grad_norm": 2.227196455001831, "learning_rate": 5.081783604116975e-05, "loss": 1.4868, "step": 3331 }, { "epoch": 0.5025641025641026, "grad_norm": 2.434945583343506, "learning_rate": 5.079378409162311e-05, "loss": 1.5375, "step": 3332 }, { "epoch": 0.5027149321266968, "grad_norm": 1.827649474143982, "learning_rate": 5.076973195834829e-05, "loss": 0.9935, "step": 3333 }, { "epoch": 0.5028657616892911, "grad_norm": 1.5964699983596802, "learning_rate": 5.074567964691236e-05, "loss": 0.7606, "step": 3334 }, { "epoch": 0.5030165912518854, "grad_norm": 2.535710573196411, "learning_rate": 5.072162716288246e-05, "loss": 1.5042, "step": 3335 }, { "epoch": 0.5031674208144796, "grad_norm": 2.3185982704162598, "learning_rate": 5.069757451182572e-05, "loss": 1.3948, "step": 3336 }, { "epoch": 0.5033182503770739, "grad_norm": 2.1141011714935303, "learning_rate": 5.067352169930934e-05, "loss": 1.1557, "step": 3337 }, { "epoch": 0.5034690799396682, "grad_norm": 1.8145285844802856, "learning_rate": 5.064946873090054e-05, "loss": 0.8936, "step": 3338 }, { "epoch": 0.5036199095022624, "grad_norm": 2.011888027191162, "learning_rate": 5.062541561216661e-05, "loss": 0.9875, "step": 3339 }, { "epoch": 0.5037707390648567, "grad_norm": 2.4530515670776367, "learning_rate": 5.060136234867484e-05, "loss": 1.4484, "step": 3340 }, { "epoch": 0.503921568627451, "grad_norm": 2.0995843410491943, "learning_rate": 5.057730894599256e-05, "loss": 1.0576, "step": 3341 }, { "epoch": 0.5040723981900452, "grad_norm": 2.005825996398926, "learning_rate": 5.055325540968715e-05, "loss": 1.0396, "step": 3342 }, { "epoch": 0.5042232277526395, "grad_norm": 2.329840898513794, "learning_rate": 5.0529201745325995e-05, "loss": 1.4937, "step": 3343 }, { "epoch": 0.5043740573152338, "grad_norm": 2.345376968383789, "learning_rate": 5.050514795847654e-05, "loss": 1.1025, "step": 3344 }, { "epoch": 0.504524886877828, "grad_norm": 1.3902584314346313, "learning_rate": 5.048109405470622e-05, "loss": 0.5767, "step": 3345 }, { "epoch": 0.5046757164404223, "grad_norm": 1.5703389644622803, "learning_rate": 5.045704003958254e-05, "loss": 0.7362, "step": 3346 }, { "epoch": 0.5048265460030166, "grad_norm": 1.9831242561340332, "learning_rate": 5.043298591867299e-05, "loss": 0.9297, "step": 3347 }, { "epoch": 0.5049773755656108, "grad_norm": 1.499377965927124, "learning_rate": 5.040893169754511e-05, "loss": 0.7906, "step": 3348 }, { "epoch": 0.5051282051282051, "grad_norm": 1.9531241655349731, "learning_rate": 5.038487738176648e-05, "loss": 0.8435, "step": 3349 }, { "epoch": 0.5052790346907994, "grad_norm": 2.0241005420684814, "learning_rate": 5.0360822976904654e-05, "loss": 1.0688, "step": 3350 }, { "epoch": 0.5054298642533936, "grad_norm": 2.055194139480591, "learning_rate": 5.033676848852723e-05, "loss": 1.6784, "step": 3351 }, { "epoch": 0.5055806938159879, "grad_norm": 2.003324508666992, "learning_rate": 5.031271392220183e-05, "loss": 1.2764, "step": 3352 }, { "epoch": 0.5057315233785822, "grad_norm": 1.7979236841201782, "learning_rate": 5.02886592834961e-05, "loss": 1.102, "step": 3353 }, { "epoch": 0.5058823529411764, "grad_norm": 2.294301986694336, "learning_rate": 5.026460457797768e-05, "loss": 1.5236, "step": 3354 }, { "epoch": 0.5060331825037707, "grad_norm": 2.066312551498413, "learning_rate": 5.024054981121426e-05, "loss": 1.3142, "step": 3355 }, { "epoch": 0.506184012066365, "grad_norm": 2.1324691772460938, "learning_rate": 5.02164949887735e-05, "loss": 1.3604, "step": 3356 }, { "epoch": 0.5063348416289593, "grad_norm": 1.7560207843780518, "learning_rate": 5.019244011622312e-05, "loss": 0.978, "step": 3357 }, { "epoch": 0.5064856711915535, "grad_norm": 1.8775548934936523, "learning_rate": 5.016838519913081e-05, "loss": 1.0841, "step": 3358 }, { "epoch": 0.5066365007541478, "grad_norm": 1.7833598852157593, "learning_rate": 5.0144330243064286e-05, "loss": 1.0033, "step": 3359 }, { "epoch": 0.5067873303167421, "grad_norm": 2.1636157035827637, "learning_rate": 5.012027525359129e-05, "loss": 1.5688, "step": 3360 }, { "epoch": 0.5069381598793363, "grad_norm": 2.017244577407837, "learning_rate": 5.0096220236279534e-05, "loss": 0.9758, "step": 3361 }, { "epoch": 0.5070889894419306, "grad_norm": 2.090182304382324, "learning_rate": 5.007216519669679e-05, "loss": 1.4839, "step": 3362 }, { "epoch": 0.5072398190045249, "grad_norm": 1.9695630073547363, "learning_rate": 5.004811014041079e-05, "loss": 1.2491, "step": 3363 }, { "epoch": 0.5073906485671191, "grad_norm": 1.639790654182434, "learning_rate": 5.0024055072989274e-05, "loss": 0.9756, "step": 3364 }, { "epoch": 0.5075414781297134, "grad_norm": 1.9498252868652344, "learning_rate": 5e-05, "loss": 1.1222, "step": 3365 }, { "epoch": 0.5076923076923077, "grad_norm": 2.037968158721924, "learning_rate": 4.997594492701074e-05, "loss": 1.0116, "step": 3366 }, { "epoch": 0.5078431372549019, "grad_norm": 2.149688482284546, "learning_rate": 4.9951889859589216e-05, "loss": 1.1923, "step": 3367 }, { "epoch": 0.5079939668174962, "grad_norm": 2.056096076965332, "learning_rate": 4.992783480330321e-05, "loss": 1.0902, "step": 3368 }, { "epoch": 0.5081447963800905, "grad_norm": 1.7145709991455078, "learning_rate": 4.990377976372046e-05, "loss": 0.6955, "step": 3369 }, { "epoch": 0.5082956259426847, "grad_norm": 1.8806049823760986, "learning_rate": 4.9879724746408735e-05, "loss": 1.04, "step": 3370 }, { "epoch": 0.508446455505279, "grad_norm": 1.8756335973739624, "learning_rate": 4.985566975693573e-05, "loss": 0.9918, "step": 3371 }, { "epoch": 0.5085972850678733, "grad_norm": 2.206523895263672, "learning_rate": 4.9831614800869215e-05, "loss": 1.193, "step": 3372 }, { "epoch": 0.5087481146304675, "grad_norm": 1.710902214050293, "learning_rate": 4.98075598837769e-05, "loss": 0.9101, "step": 3373 }, { "epoch": 0.5088989441930618, "grad_norm": 2.302554130554199, "learning_rate": 4.978350501122651e-05, "loss": 1.3449, "step": 3374 }, { "epoch": 0.5090497737556561, "grad_norm": 1.7121431827545166, "learning_rate": 4.9759450188785755e-05, "loss": 0.8948, "step": 3375 }, { "epoch": 0.5092006033182503, "grad_norm": 1.8769952058792114, "learning_rate": 4.973539542202233e-05, "loss": 1.0202, "step": 3376 }, { "epoch": 0.5093514328808446, "grad_norm": 2.120204448699951, "learning_rate": 4.971134071650391e-05, "loss": 1.0954, "step": 3377 }, { "epoch": 0.5095022624434389, "grad_norm": 1.9705021381378174, "learning_rate": 4.9687286077798177e-05, "loss": 0.9565, "step": 3378 }, { "epoch": 0.5096530920060331, "grad_norm": 1.9342830181121826, "learning_rate": 4.966323151147278e-05, "loss": 1.0384, "step": 3379 }, { "epoch": 0.5098039215686274, "grad_norm": 2.0101492404937744, "learning_rate": 4.963917702309537e-05, "loss": 1.126, "step": 3380 }, { "epoch": 0.5099547511312217, "grad_norm": 1.7355793714523315, "learning_rate": 4.961512261823353e-05, "loss": 0.9398, "step": 3381 }, { "epoch": 0.510105580693816, "grad_norm": 1.9788336753845215, "learning_rate": 4.959106830245489e-05, "loss": 1.1817, "step": 3382 }, { "epoch": 0.5102564102564102, "grad_norm": 1.8913638591766357, "learning_rate": 4.9567014081327015e-05, "loss": 1.0367, "step": 3383 }, { "epoch": 0.5104072398190045, "grad_norm": 1.9035699367523193, "learning_rate": 4.954295996041747e-05, "loss": 1.0651, "step": 3384 }, { "epoch": 0.5105580693815988, "grad_norm": 1.7082078456878662, "learning_rate": 4.951890594529379e-05, "loss": 0.9107, "step": 3385 }, { "epoch": 0.510708898944193, "grad_norm": 2.4878342151641846, "learning_rate": 4.949485204152347e-05, "loss": 1.541, "step": 3386 }, { "epoch": 0.5108597285067873, "grad_norm": 2.062018871307373, "learning_rate": 4.9470798254674e-05, "loss": 1.0323, "step": 3387 }, { "epoch": 0.5110105580693816, "grad_norm": 1.9152967929840088, "learning_rate": 4.9446744590312856e-05, "loss": 0.917, "step": 3388 }, { "epoch": 0.5111613876319758, "grad_norm": 1.8478883504867554, "learning_rate": 4.9422691054007463e-05, "loss": 1.0121, "step": 3389 }, { "epoch": 0.5113122171945701, "grad_norm": 2.0721046924591064, "learning_rate": 4.9398637651325185e-05, "loss": 1.0689, "step": 3390 }, { "epoch": 0.5114630467571644, "grad_norm": 1.9444692134857178, "learning_rate": 4.937458438783341e-05, "loss": 0.8792, "step": 3391 }, { "epoch": 0.5116138763197586, "grad_norm": 1.715564489364624, "learning_rate": 4.935053126909947e-05, "loss": 0.77, "step": 3392 }, { "epoch": 0.5117647058823529, "grad_norm": 2.2533576488494873, "learning_rate": 4.932647830069068e-05, "loss": 1.1413, "step": 3393 }, { "epoch": 0.5119155354449472, "grad_norm": 1.942355990409851, "learning_rate": 4.93024254881743e-05, "loss": 1.0728, "step": 3394 }, { "epoch": 0.5120663650075414, "grad_norm": 1.8835346698760986, "learning_rate": 4.9278372837117555e-05, "loss": 0.9518, "step": 3395 }, { "epoch": 0.5122171945701357, "grad_norm": 2.1421611309051514, "learning_rate": 4.925432035308764e-05, "loss": 1.0525, "step": 3396 }, { "epoch": 0.51236802413273, "grad_norm": 1.6642241477966309, "learning_rate": 4.9230268041651715e-05, "loss": 0.8094, "step": 3397 }, { "epoch": 0.5125188536953242, "grad_norm": 1.6739556789398193, "learning_rate": 4.920621590837691e-05, "loss": 0.8439, "step": 3398 }, { "epoch": 0.5126696832579185, "grad_norm": 1.7903692722320557, "learning_rate": 4.918216395883026e-05, "loss": 0.881, "step": 3399 }, { "epoch": 0.5128205128205128, "grad_norm": 1.7841300964355469, "learning_rate": 4.915811219857882e-05, "loss": 0.9263, "step": 3400 }, { "epoch": 0.512971342383107, "grad_norm": 1.7981131076812744, "learning_rate": 4.9134060633189585e-05, "loss": 1.1566, "step": 3401 }, { "epoch": 0.5131221719457013, "grad_norm": 2.0049092769622803, "learning_rate": 4.9110009268229477e-05, "loss": 1.2851, "step": 3402 }, { "epoch": 0.5132730015082956, "grad_norm": 2.059823989868164, "learning_rate": 4.908595810926541e-05, "loss": 1.3751, "step": 3403 }, { "epoch": 0.5134238310708898, "grad_norm": 2.0205817222595215, "learning_rate": 4.9061907161864226e-05, "loss": 1.1717, "step": 3404 }, { "epoch": 0.5135746606334841, "grad_norm": 2.141433000564575, "learning_rate": 4.9037856431592716e-05, "loss": 1.4792, "step": 3405 }, { "epoch": 0.5137254901960784, "grad_norm": 1.5827782154083252, "learning_rate": 4.901380592401764e-05, "loss": 0.8835, "step": 3406 }, { "epoch": 0.5138763197586727, "grad_norm": 1.812775731086731, "learning_rate": 4.898975564470569e-05, "loss": 1.1053, "step": 3407 }, { "epoch": 0.5140271493212669, "grad_norm": 1.6884154081344604, "learning_rate": 4.8965705599223536e-05, "loss": 0.9528, "step": 3408 }, { "epoch": 0.5141779788838612, "grad_norm": 1.82872474193573, "learning_rate": 4.894165579313773e-05, "loss": 1.1072, "step": 3409 }, { "epoch": 0.5143288084464555, "grad_norm": 1.925179123878479, "learning_rate": 4.891760623201481e-05, "loss": 1.0889, "step": 3410 }, { "epoch": 0.5144796380090497, "grad_norm": 1.9543579816818237, "learning_rate": 4.889355692142127e-05, "loss": 1.2644, "step": 3411 }, { "epoch": 0.514630467571644, "grad_norm": 1.8816975355148315, "learning_rate": 4.886950786692353e-05, "loss": 1.1803, "step": 3412 }, { "epoch": 0.5147812971342383, "grad_norm": 1.942017912864685, "learning_rate": 4.8845459074087945e-05, "loss": 1.3585, "step": 3413 }, { "epoch": 0.5149321266968325, "grad_norm": 2.074310541152954, "learning_rate": 4.8821410548480824e-05, "loss": 1.1959, "step": 3414 }, { "epoch": 0.5150829562594268, "grad_norm": 2.071761131286621, "learning_rate": 4.8797362295668394e-05, "loss": 1.2389, "step": 3415 }, { "epoch": 0.5152337858220211, "grad_norm": 2.04622220993042, "learning_rate": 4.877331432121684e-05, "loss": 1.1807, "step": 3416 }, { "epoch": 0.5153846153846153, "grad_norm": 1.9244906902313232, "learning_rate": 4.8749266630692294e-05, "loss": 1.1823, "step": 3417 }, { "epoch": 0.5155354449472096, "grad_norm": 1.7708419561386108, "learning_rate": 4.872521922966075e-05, "loss": 0.9994, "step": 3418 }, { "epoch": 0.515686274509804, "grad_norm": 2.0685486793518066, "learning_rate": 4.870117212368822e-05, "loss": 1.1496, "step": 3419 }, { "epoch": 0.5158371040723982, "grad_norm": 1.5190072059631348, "learning_rate": 4.8677125318340606e-05, "loss": 0.834, "step": 3420 }, { "epoch": 0.5159879336349925, "grad_norm": 2.188469409942627, "learning_rate": 4.865307881918375e-05, "loss": 1.436, "step": 3421 }, { "epoch": 0.5161387631975868, "grad_norm": 2.0852010250091553, "learning_rate": 4.862903263178342e-05, "loss": 1.1011, "step": 3422 }, { "epoch": 0.516289592760181, "grad_norm": 1.6148992776870728, "learning_rate": 4.860498676170532e-05, "loss": 0.8329, "step": 3423 }, { "epoch": 0.5164404223227753, "grad_norm": 1.7761046886444092, "learning_rate": 4.858094121451507e-05, "loss": 0.9837, "step": 3424 }, { "epoch": 0.5165912518853696, "grad_norm": 1.656729817390442, "learning_rate": 4.8556895995778223e-05, "loss": 0.8401, "step": 3425 }, { "epoch": 0.5167420814479639, "grad_norm": 1.936323881149292, "learning_rate": 4.853285111106026e-05, "loss": 0.9712, "step": 3426 }, { "epoch": 0.5168929110105581, "grad_norm": 1.7529481649398804, "learning_rate": 4.8508806565926546e-05, "loss": 0.9715, "step": 3427 }, { "epoch": 0.5170437405731524, "grad_norm": 1.9206653833389282, "learning_rate": 4.848476236594243e-05, "loss": 1.0945, "step": 3428 }, { "epoch": 0.5171945701357467, "grad_norm": 2.369285821914673, "learning_rate": 4.846071851667313e-05, "loss": 1.5085, "step": 3429 }, { "epoch": 0.5173453996983409, "grad_norm": 2.152615547180176, "learning_rate": 4.843667502368381e-05, "loss": 1.3945, "step": 3430 }, { "epoch": 0.5174962292609352, "grad_norm": 2.233842134475708, "learning_rate": 4.8412631892539544e-05, "loss": 1.5261, "step": 3431 }, { "epoch": 0.5176470588235295, "grad_norm": 2.1864984035491943, "learning_rate": 4.838858912880532e-05, "loss": 1.3289, "step": 3432 }, { "epoch": 0.5177978883861237, "grad_norm": 1.8778278827667236, "learning_rate": 4.836454673804605e-05, "loss": 0.9118, "step": 3433 }, { "epoch": 0.517948717948718, "grad_norm": 2.345118522644043, "learning_rate": 4.834050472582655e-05, "loss": 1.4775, "step": 3434 }, { "epoch": 0.5180995475113123, "grad_norm": 1.807883858680725, "learning_rate": 4.831646309771155e-05, "loss": 1.0804, "step": 3435 }, { "epoch": 0.5182503770739065, "grad_norm": 2.016303062438965, "learning_rate": 4.8292421859265705e-05, "loss": 1.2156, "step": 3436 }, { "epoch": 0.5184012066365008, "grad_norm": 1.805872917175293, "learning_rate": 4.826838101605354e-05, "loss": 0.9448, "step": 3437 }, { "epoch": 0.5185520361990951, "grad_norm": 2.105135917663574, "learning_rate": 4.824434057363952e-05, "loss": 1.3692, "step": 3438 }, { "epoch": 0.5187028657616893, "grad_norm": 2.1049206256866455, "learning_rate": 4.822030053758803e-05, "loss": 1.112, "step": 3439 }, { "epoch": 0.5188536953242836, "grad_norm": 1.988727331161499, "learning_rate": 4.819626091346334e-05, "loss": 1.1241, "step": 3440 }, { "epoch": 0.5190045248868779, "grad_norm": 1.951704978942871, "learning_rate": 4.817222170682962e-05, "loss": 1.3187, "step": 3441 }, { "epoch": 0.5191553544494721, "grad_norm": 1.9170702695846558, "learning_rate": 4.814818292325095e-05, "loss": 0.9338, "step": 3442 }, { "epoch": 0.5193061840120664, "grad_norm": 1.8935556411743164, "learning_rate": 4.812414456829132e-05, "loss": 1.046, "step": 3443 }, { "epoch": 0.5194570135746607, "grad_norm": 2.186962366104126, "learning_rate": 4.810010664751463e-05, "loss": 1.5406, "step": 3444 }, { "epoch": 0.5196078431372549, "grad_norm": 2.107091188430786, "learning_rate": 4.8076069166484645e-05, "loss": 0.8844, "step": 3445 }, { "epoch": 0.5197586726998492, "grad_norm": 1.7987252473831177, "learning_rate": 4.805203213076504e-05, "loss": 0.943, "step": 3446 }, { "epoch": 0.5199095022624435, "grad_norm": 1.6914503574371338, "learning_rate": 4.802799554591941e-05, "loss": 0.8545, "step": 3447 }, { "epoch": 0.5200603318250377, "grad_norm": 1.8760881423950195, "learning_rate": 4.8003959417511225e-05, "loss": 0.957, "step": 3448 }, { "epoch": 0.520211161387632, "grad_norm": 1.8759722709655762, "learning_rate": 4.797992375110385e-05, "loss": 1.0206, "step": 3449 }, { "epoch": 0.5203619909502263, "grad_norm": 1.5164190530776978, "learning_rate": 4.7955888552260555e-05, "loss": 0.6916, "step": 3450 }, { "epoch": 0.5205128205128206, "grad_norm": 1.9913809299468994, "learning_rate": 4.79318538265445e-05, "loss": 1.382, "step": 3451 }, { "epoch": 0.5206636500754148, "grad_norm": 1.757570743560791, "learning_rate": 4.790781957951871e-05, "loss": 0.9117, "step": 3452 }, { "epoch": 0.5208144796380091, "grad_norm": 1.8439915180206299, "learning_rate": 4.788378581674613e-05, "loss": 1.3093, "step": 3453 }, { "epoch": 0.5209653092006034, "grad_norm": 1.9384721517562866, "learning_rate": 4.785975254378961e-05, "loss": 1.1855, "step": 3454 }, { "epoch": 0.5211161387631976, "grad_norm": 1.8777159452438354, "learning_rate": 4.78357197662118e-05, "loss": 1.1067, "step": 3455 }, { "epoch": 0.5212669683257919, "grad_norm": 2.065002202987671, "learning_rate": 4.781168748957533e-05, "loss": 1.2223, "step": 3456 }, { "epoch": 0.5214177978883862, "grad_norm": 1.9392448663711548, "learning_rate": 4.778765571944267e-05, "loss": 1.2919, "step": 3457 }, { "epoch": 0.5215686274509804, "grad_norm": 1.9451721906661987, "learning_rate": 4.776362446137618e-05, "loss": 1.2253, "step": 3458 }, { "epoch": 0.5217194570135747, "grad_norm": 1.753855586051941, "learning_rate": 4.77395937209381e-05, "loss": 1.0326, "step": 3459 }, { "epoch": 0.521870286576169, "grad_norm": 1.6362558603286743, "learning_rate": 4.771556350369056e-05, "loss": 0.9764, "step": 3460 }, { "epoch": 0.5220211161387632, "grad_norm": 1.824249029159546, "learning_rate": 4.769153381519556e-05, "loss": 0.9631, "step": 3461 }, { "epoch": 0.5221719457013575, "grad_norm": 2.020247220993042, "learning_rate": 4.766750466101498e-05, "loss": 1.2179, "step": 3462 }, { "epoch": 0.5223227752639518, "grad_norm": 1.7954250574111938, "learning_rate": 4.764347604671056e-05, "loss": 0.9063, "step": 3463 }, { "epoch": 0.522473604826546, "grad_norm": 2.0842320919036865, "learning_rate": 4.761944797784397e-05, "loss": 1.4381, "step": 3464 }, { "epoch": 0.5226244343891403, "grad_norm": 1.8721038103103638, "learning_rate": 4.759542045997666e-05, "loss": 1.1994, "step": 3465 }, { "epoch": 0.5227752639517346, "grad_norm": 1.8553043603897095, "learning_rate": 4.757139349867003e-05, "loss": 1.4945, "step": 3466 }, { "epoch": 0.5229260935143288, "grad_norm": 2.213884115219116, "learning_rate": 4.754736709948533e-05, "loss": 1.6526, "step": 3467 }, { "epoch": 0.5230769230769231, "grad_norm": 1.7306240797042847, "learning_rate": 4.752334126798368e-05, "loss": 1.0236, "step": 3468 }, { "epoch": 0.5232277526395174, "grad_norm": 2.298701524734497, "learning_rate": 4.749931600972606e-05, "loss": 1.4841, "step": 3469 }, { "epoch": 0.5233785822021116, "grad_norm": 1.6937445402145386, "learning_rate": 4.7475291330273316e-05, "loss": 1.0734, "step": 3470 }, { "epoch": 0.5235294117647059, "grad_norm": 1.7619284391403198, "learning_rate": 4.745126723518619e-05, "loss": 0.9316, "step": 3471 }, { "epoch": 0.5236802413273002, "grad_norm": 1.6455206871032715, "learning_rate": 4.742724373002525e-05, "loss": 1.0098, "step": 3472 }, { "epoch": 0.5238310708898944, "grad_norm": 1.8646241426467896, "learning_rate": 4.740322082035095e-05, "loss": 0.9803, "step": 3473 }, { "epoch": 0.5239819004524887, "grad_norm": 2.0718510150909424, "learning_rate": 4.7379198511723595e-05, "loss": 0.9716, "step": 3474 }, { "epoch": 0.524132730015083, "grad_norm": 2.1144886016845703, "learning_rate": 4.735517680970335e-05, "loss": 1.4592, "step": 3475 }, { "epoch": 0.5242835595776772, "grad_norm": 2.066756248474121, "learning_rate": 4.7331155719850254e-05, "loss": 1.326, "step": 3476 }, { "epoch": 0.5244343891402715, "grad_norm": 1.8565542697906494, "learning_rate": 4.730713524772419e-05, "loss": 1.1402, "step": 3477 }, { "epoch": 0.5245852187028658, "grad_norm": 1.989497184753418, "learning_rate": 4.7283115398884914e-05, "loss": 1.1851, "step": 3478 }, { "epoch": 0.52473604826546, "grad_norm": 2.0513405799865723, "learning_rate": 4.725909617889202e-05, "loss": 1.3362, "step": 3479 }, { "epoch": 0.5248868778280543, "grad_norm": 1.7999367713928223, "learning_rate": 4.723507759330496e-05, "loss": 0.9038, "step": 3480 }, { "epoch": 0.5250377073906486, "grad_norm": 2.241624355316162, "learning_rate": 4.7211059647683046e-05, "loss": 1.3571, "step": 3481 }, { "epoch": 0.5251885369532429, "grad_norm": 2.044605255126953, "learning_rate": 4.718704234758546e-05, "loss": 1.4212, "step": 3482 }, { "epoch": 0.5253393665158371, "grad_norm": 1.934442162513733, "learning_rate": 4.7163025698571176e-05, "loss": 1.0532, "step": 3483 }, { "epoch": 0.5254901960784314, "grad_norm": 1.9619300365447998, "learning_rate": 4.713900970619907e-05, "loss": 0.9418, "step": 3484 }, { "epoch": 0.5256410256410257, "grad_norm": 1.8931814432144165, "learning_rate": 4.711499437602785e-05, "loss": 1.1089, "step": 3485 }, { "epoch": 0.5257918552036199, "grad_norm": 2.2265396118164062, "learning_rate": 4.709097971361608e-05, "loss": 1.4206, "step": 3486 }, { "epoch": 0.5259426847662142, "grad_norm": 1.7293939590454102, "learning_rate": 4.7066965724522147e-05, "loss": 0.861, "step": 3487 }, { "epoch": 0.5260935143288085, "grad_norm": 1.9415054321289062, "learning_rate": 4.704295241430431e-05, "loss": 1.1438, "step": 3488 }, { "epoch": 0.5262443438914027, "grad_norm": 2.3136658668518066, "learning_rate": 4.701893978852064e-05, "loss": 1.4835, "step": 3489 }, { "epoch": 0.526395173453997, "grad_norm": 2.0033867359161377, "learning_rate": 4.699492785272909e-05, "loss": 1.2419, "step": 3490 }, { "epoch": 0.5265460030165913, "grad_norm": 2.336920976638794, "learning_rate": 4.69709166124874e-05, "loss": 1.3883, "step": 3491 }, { "epoch": 0.5266968325791855, "grad_norm": 2.1203083992004395, "learning_rate": 4.694690607335322e-05, "loss": 1.2348, "step": 3492 }, { "epoch": 0.5268476621417798, "grad_norm": 1.9526053667068481, "learning_rate": 4.692289624088395e-05, "loss": 1.1836, "step": 3493 }, { "epoch": 0.5269984917043741, "grad_norm": 2.1543760299682617, "learning_rate": 4.68988871206369e-05, "loss": 1.1768, "step": 3494 }, { "epoch": 0.5271493212669683, "grad_norm": 2.2491378784179688, "learning_rate": 4.6874878718169184e-05, "loss": 1.3654, "step": 3495 }, { "epoch": 0.5273001508295626, "grad_norm": 2.055936336517334, "learning_rate": 4.685087103903775e-05, "loss": 1.1985, "step": 3496 }, { "epoch": 0.5274509803921569, "grad_norm": 2.1467831134796143, "learning_rate": 4.682686408879938e-05, "loss": 1.1947, "step": 3497 }, { "epoch": 0.5276018099547511, "grad_norm": 1.6856695413589478, "learning_rate": 4.6802857873010704e-05, "loss": 0.8678, "step": 3498 }, { "epoch": 0.5277526395173454, "grad_norm": 1.5933407545089722, "learning_rate": 4.677885239722815e-05, "loss": 0.7282, "step": 3499 }, { "epoch": 0.5279034690799397, "grad_norm": 1.8304611444473267, "learning_rate": 4.6754847667008004e-05, "loss": 1.0266, "step": 3500 }, { "epoch": 0.528054298642534, "grad_norm": 1.8734949827194214, "learning_rate": 4.673084368790639e-05, "loss": 1.3287, "step": 3501 }, { "epoch": 0.5282051282051282, "grad_norm": 1.9382309913635254, "learning_rate": 4.6706840465479185e-05, "loss": 1.448, "step": 3502 }, { "epoch": 0.5283559577677225, "grad_norm": 2.048452138900757, "learning_rate": 4.668283800528217e-05, "loss": 1.3972, "step": 3503 }, { "epoch": 0.5285067873303168, "grad_norm": 1.510329008102417, "learning_rate": 4.665883631287091e-05, "loss": 0.7542, "step": 3504 }, { "epoch": 0.528657616892911, "grad_norm": 1.6389919519424438, "learning_rate": 4.663483539380082e-05, "loss": 0.9103, "step": 3505 }, { "epoch": 0.5288084464555053, "grad_norm": 1.9852782487869263, "learning_rate": 4.6610835253627116e-05, "loss": 1.3454, "step": 3506 }, { "epoch": 0.5289592760180996, "grad_norm": 2.178499460220337, "learning_rate": 4.658683589790483e-05, "loss": 1.5201, "step": 3507 }, { "epoch": 0.5291101055806938, "grad_norm": 2.0230252742767334, "learning_rate": 4.656283733218883e-05, "loss": 1.1352, "step": 3508 }, { "epoch": 0.5292609351432881, "grad_norm": 2.089252233505249, "learning_rate": 4.653883956203378e-05, "loss": 1.4483, "step": 3509 }, { "epoch": 0.5294117647058824, "grad_norm": 1.6875437498092651, "learning_rate": 4.651484259299418e-05, "loss": 1.0411, "step": 3510 }, { "epoch": 0.5295625942684766, "grad_norm": 2.015944719314575, "learning_rate": 4.6490846430624345e-05, "loss": 1.2396, "step": 3511 }, { "epoch": 0.5297134238310709, "grad_norm": 2.0096445083618164, "learning_rate": 4.646685108047837e-05, "loss": 1.2249, "step": 3512 }, { "epoch": 0.5298642533936652, "grad_norm": 2.4056224822998047, "learning_rate": 4.6442856548110184e-05, "loss": 1.8625, "step": 3513 }, { "epoch": 0.5300150829562594, "grad_norm": 1.7186540365219116, "learning_rate": 4.6418862839073555e-05, "loss": 0.8623, "step": 3514 }, { "epoch": 0.5301659125188537, "grad_norm": 2.042248010635376, "learning_rate": 4.639486995892201e-05, "loss": 1.007, "step": 3515 }, { "epoch": 0.530316742081448, "grad_norm": 2.1237471103668213, "learning_rate": 4.637087791320892e-05, "loss": 1.4721, "step": 3516 }, { "epoch": 0.5304675716440422, "grad_norm": 1.973709225654602, "learning_rate": 4.6346886707487444e-05, "loss": 1.0777, "step": 3517 }, { "epoch": 0.5306184012066365, "grad_norm": 1.9835397005081177, "learning_rate": 4.6322896347310565e-05, "loss": 1.2138, "step": 3518 }, { "epoch": 0.5307692307692308, "grad_norm": 1.8855249881744385, "learning_rate": 4.6298906838231056e-05, "loss": 1.08, "step": 3519 }, { "epoch": 0.530920060331825, "grad_norm": 1.8809267282485962, "learning_rate": 4.62749181858015e-05, "loss": 1.1034, "step": 3520 }, { "epoch": 0.5310708898944193, "grad_norm": 1.9159802198410034, "learning_rate": 4.625093039557426e-05, "loss": 1.2136, "step": 3521 }, { "epoch": 0.5312217194570136, "grad_norm": 1.8870458602905273, "learning_rate": 4.6226943473101525e-05, "loss": 1.195, "step": 3522 }, { "epoch": 0.5313725490196078, "grad_norm": 1.8412127494812012, "learning_rate": 4.620295742393528e-05, "loss": 0.8649, "step": 3523 }, { "epoch": 0.5315233785822021, "grad_norm": 1.975739598274231, "learning_rate": 4.6178972253627295e-05, "loss": 1.0694, "step": 3524 }, { "epoch": 0.5316742081447964, "grad_norm": 1.918983817100525, "learning_rate": 4.6154987967729144e-05, "loss": 1.1151, "step": 3525 }, { "epoch": 0.5318250377073906, "grad_norm": 2.2882513999938965, "learning_rate": 4.6131004571792214e-05, "loss": 1.2272, "step": 3526 }, { "epoch": 0.5319758672699849, "grad_norm": 2.1939027309417725, "learning_rate": 4.610702207136765e-05, "loss": 1.2391, "step": 3527 }, { "epoch": 0.5321266968325792, "grad_norm": 1.7766733169555664, "learning_rate": 4.608304047200643e-05, "loss": 0.8321, "step": 3528 }, { "epoch": 0.5322775263951735, "grad_norm": 2.227503538131714, "learning_rate": 4.6059059779259295e-05, "loss": 1.5139, "step": 3529 }, { "epoch": 0.5324283559577677, "grad_norm": 2.3098931312561035, "learning_rate": 4.603507999867676e-05, "loss": 1.6025, "step": 3530 }, { "epoch": 0.532579185520362, "grad_norm": 1.860466480255127, "learning_rate": 4.601110113580917e-05, "loss": 1.1119, "step": 3531 }, { "epoch": 0.5327300150829563, "grad_norm": 2.365417957305908, "learning_rate": 4.598712319620664e-05, "loss": 1.4613, "step": 3532 }, { "epoch": 0.5328808446455505, "grad_norm": 2.0001473426818848, "learning_rate": 4.596314618541906e-05, "loss": 1.0536, "step": 3533 }, { "epoch": 0.5330316742081448, "grad_norm": 1.7783737182617188, "learning_rate": 4.593917010899614e-05, "loss": 1.0065, "step": 3534 }, { "epoch": 0.5331825037707391, "grad_norm": 2.0880351066589355, "learning_rate": 4.591519497248732e-05, "loss": 1.2737, "step": 3535 }, { "epoch": 0.5333333333333333, "grad_norm": 2.196953296661377, "learning_rate": 4.589122078144188e-05, "loss": 1.4049, "step": 3536 }, { "epoch": 0.5334841628959276, "grad_norm": 2.0626373291015625, "learning_rate": 4.586724754140883e-05, "loss": 1.1687, "step": 3537 }, { "epoch": 0.5336349924585219, "grad_norm": 2.0523622035980225, "learning_rate": 4.5843275257937e-05, "loss": 1.1464, "step": 3538 }, { "epoch": 0.5337858220211161, "grad_norm": 2.4195241928100586, "learning_rate": 4.581930393657499e-05, "loss": 1.3409, "step": 3539 }, { "epoch": 0.5339366515837104, "grad_norm": 1.8011910915374756, "learning_rate": 4.5795333582871135e-05, "loss": 1.0894, "step": 3540 }, { "epoch": 0.5340874811463047, "grad_norm": 2.0362563133239746, "learning_rate": 4.577136420237361e-05, "loss": 0.9706, "step": 3541 }, { "epoch": 0.5342383107088989, "grad_norm": 1.7968207597732544, "learning_rate": 4.5747395800630315e-05, "loss": 1.144, "step": 3542 }, { "epoch": 0.5343891402714932, "grad_norm": 1.8627300262451172, "learning_rate": 4.572342838318896e-05, "loss": 1.0396, "step": 3543 }, { "epoch": 0.5345399698340875, "grad_norm": 2.189988136291504, "learning_rate": 4.5699461955597e-05, "loss": 1.3444, "step": 3544 }, { "epoch": 0.5346907993966817, "grad_norm": 2.121567726135254, "learning_rate": 4.567549652340168e-05, "loss": 1.2062, "step": 3545 }, { "epoch": 0.534841628959276, "grad_norm": 2.030698537826538, "learning_rate": 4.565153209215e-05, "loss": 1.145, "step": 3546 }, { "epoch": 0.5349924585218703, "grad_norm": 1.4867711067199707, "learning_rate": 4.562756866738874e-05, "loss": 0.599, "step": 3547 }, { "epoch": 0.5351432880844645, "grad_norm": 1.663171410560608, "learning_rate": 4.560360625466445e-05, "loss": 0.7067, "step": 3548 }, { "epoch": 0.5352941176470588, "grad_norm": 1.5531384944915771, "learning_rate": 4.5579644859523404e-05, "loss": 0.7781, "step": 3549 }, { "epoch": 0.5354449472096531, "grad_norm": 1.8373225927352905, "learning_rate": 4.5555684487511694e-05, "loss": 0.8193, "step": 3550 }, { "epoch": 0.5355957767722473, "grad_norm": 2.181082248687744, "learning_rate": 4.553172514417516e-05, "loss": 1.7229, "step": 3551 }, { "epoch": 0.5357466063348416, "grad_norm": 1.8571056127548218, "learning_rate": 4.550776683505939e-05, "loss": 1.0789, "step": 3552 }, { "epoch": 0.5358974358974359, "grad_norm": 2.102952718734741, "learning_rate": 4.548380956570974e-05, "loss": 1.3833, "step": 3553 }, { "epoch": 0.5360482654600301, "grad_norm": 1.7685072422027588, "learning_rate": 4.5459853341671346e-05, "loss": 1.0767, "step": 3554 }, { "epoch": 0.5361990950226244, "grad_norm": 1.6819690465927124, "learning_rate": 4.543589816848906e-05, "loss": 0.9017, "step": 3555 }, { "epoch": 0.5363499245852187, "grad_norm": 2.165426254272461, "learning_rate": 4.5411944051707524e-05, "loss": 1.4583, "step": 3556 }, { "epoch": 0.536500754147813, "grad_norm": 1.699973702430725, "learning_rate": 4.5387990996871143e-05, "loss": 0.9175, "step": 3557 }, { "epoch": 0.5366515837104072, "grad_norm": 1.993802785873413, "learning_rate": 4.536403900952402e-05, "loss": 1.2034, "step": 3558 }, { "epoch": 0.5368024132730015, "grad_norm": 2.1659581661224365, "learning_rate": 4.534008809521006e-05, "loss": 1.2248, "step": 3559 }, { "epoch": 0.5369532428355958, "grad_norm": 1.8297818899154663, "learning_rate": 4.531613825947292e-05, "loss": 1.176, "step": 3560 }, { "epoch": 0.53710407239819, "grad_norm": 1.9437909126281738, "learning_rate": 4.5292189507855995e-05, "loss": 1.1871, "step": 3561 }, { "epoch": 0.5372549019607843, "grad_norm": 1.9103668928146362, "learning_rate": 4.526824184590242e-05, "loss": 1.316, "step": 3562 }, { "epoch": 0.5374057315233786, "grad_norm": 1.998435139656067, "learning_rate": 4.52442952791551e-05, "loss": 1.2017, "step": 3563 }, { "epoch": 0.5375565610859728, "grad_norm": 1.8006601333618164, "learning_rate": 4.522034981315667e-05, "loss": 1.1969, "step": 3564 }, { "epoch": 0.5377073906485671, "grad_norm": 1.9052091836929321, "learning_rate": 4.5196405453449514e-05, "loss": 1.0849, "step": 3565 }, { "epoch": 0.5378582202111614, "grad_norm": 1.9962084293365479, "learning_rate": 4.517246220557577e-05, "loss": 1.0951, "step": 3566 }, { "epoch": 0.5380090497737556, "grad_norm": 2.1128275394439697, "learning_rate": 4.51485200750773e-05, "loss": 1.2078, "step": 3567 }, { "epoch": 0.5381598793363499, "grad_norm": 1.806293249130249, "learning_rate": 4.5124579067495706e-05, "loss": 0.9625, "step": 3568 }, { "epoch": 0.5383107088989442, "grad_norm": 1.5936121940612793, "learning_rate": 4.5100639188372355e-05, "loss": 0.6959, "step": 3569 }, { "epoch": 0.5384615384615384, "grad_norm": 2.307835102081299, "learning_rate": 4.507670044324834e-05, "loss": 1.3647, "step": 3570 }, { "epoch": 0.5386123680241327, "grad_norm": 1.6666710376739502, "learning_rate": 4.505276283766448e-05, "loss": 0.9927, "step": 3571 }, { "epoch": 0.538763197586727, "grad_norm": 2.0026986598968506, "learning_rate": 4.502882637716135e-05, "loss": 1.1555, "step": 3572 }, { "epoch": 0.5389140271493212, "grad_norm": 1.694486379623413, "learning_rate": 4.500489106727924e-05, "loss": 0.8793, "step": 3573 }, { "epoch": 0.5390648567119155, "grad_norm": 1.7861931324005127, "learning_rate": 4.498095691355819e-05, "loss": 0.9105, "step": 3574 }, { "epoch": 0.5392156862745098, "grad_norm": 1.9892933368682861, "learning_rate": 4.495702392153798e-05, "loss": 1.2645, "step": 3575 }, { "epoch": 0.539366515837104, "grad_norm": 2.2032032012939453, "learning_rate": 4.49330920967581e-05, "loss": 1.3824, "step": 3576 }, { "epoch": 0.5395173453996983, "grad_norm": 2.215413808822632, "learning_rate": 4.4909161444757756e-05, "loss": 1.4105, "step": 3577 }, { "epoch": 0.5396681749622926, "grad_norm": 2.2153854370117188, "learning_rate": 4.488523197107593e-05, "loss": 1.1502, "step": 3578 }, { "epoch": 0.5398190045248868, "grad_norm": 2.008617401123047, "learning_rate": 4.486130368125128e-05, "loss": 1.1904, "step": 3579 }, { "epoch": 0.5399698340874811, "grad_norm": 2.23500394821167, "learning_rate": 4.4837376580822235e-05, "loss": 1.4125, "step": 3580 }, { "epoch": 0.5401206636500754, "grad_norm": 2.0818474292755127, "learning_rate": 4.481345067532692e-05, "loss": 1.1974, "step": 3581 }, { "epoch": 0.5402714932126697, "grad_norm": 1.9874356985092163, "learning_rate": 4.47895259703032e-05, "loss": 1.2024, "step": 3582 }, { "epoch": 0.5404223227752639, "grad_norm": 2.125408887863159, "learning_rate": 4.476560247128864e-05, "loss": 1.3559, "step": 3583 }, { "epoch": 0.5405731523378582, "grad_norm": 2.3588383197784424, "learning_rate": 4.474168018382056e-05, "loss": 1.3463, "step": 3584 }, { "epoch": 0.5407239819004525, "grad_norm": 2.4385619163513184, "learning_rate": 4.471775911343598e-05, "loss": 1.5263, "step": 3585 }, { "epoch": 0.5408748114630467, "grad_norm": 2.1558332443237305, "learning_rate": 4.469383926567162e-05, "loss": 1.3965, "step": 3586 }, { "epoch": 0.541025641025641, "grad_norm": 2.3310928344726562, "learning_rate": 4.466992064606395e-05, "loss": 1.485, "step": 3587 }, { "epoch": 0.5411764705882353, "grad_norm": 2.127803087234497, "learning_rate": 4.464600326014913e-05, "loss": 1.3233, "step": 3588 }, { "epoch": 0.5413273001508295, "grad_norm": 1.5157750844955444, "learning_rate": 4.4622087113463043e-05, "loss": 0.8132, "step": 3589 }, { "epoch": 0.5414781297134238, "grad_norm": 1.8330291509628296, "learning_rate": 4.4598172211541294e-05, "loss": 1.1729, "step": 3590 }, { "epoch": 0.5416289592760181, "grad_norm": 2.0278689861297607, "learning_rate": 4.45742585599192e-05, "loss": 0.9169, "step": 3591 }, { "epoch": 0.5417797888386123, "grad_norm": 1.8470678329467773, "learning_rate": 4.455034616413177e-05, "loss": 0.9989, "step": 3592 }, { "epoch": 0.5419306184012066, "grad_norm": 2.065004587173462, "learning_rate": 4.452643502971374e-05, "loss": 1.2205, "step": 3593 }, { "epoch": 0.5420814479638009, "grad_norm": 1.8813830614089966, "learning_rate": 4.450252516219955e-05, "loss": 0.9902, "step": 3594 }, { "epoch": 0.5422322775263951, "grad_norm": 1.9677950143814087, "learning_rate": 4.4478616567123356e-05, "loss": 1.2472, "step": 3595 }, { "epoch": 0.5423831070889894, "grad_norm": 2.0186057090759277, "learning_rate": 4.4454709250018984e-05, "loss": 0.9152, "step": 3596 }, { "epoch": 0.5425339366515837, "grad_norm": 1.9013831615447998, "learning_rate": 4.443080321642e-05, "loss": 1.0992, "step": 3597 }, { "epoch": 0.5426847662141779, "grad_norm": 1.7425974607467651, "learning_rate": 4.440689847185966e-05, "loss": 0.7751, "step": 3598 }, { "epoch": 0.5428355957767722, "grad_norm": 1.5162702798843384, "learning_rate": 4.4382995021870924e-05, "loss": 0.683, "step": 3599 }, { "epoch": 0.5429864253393665, "grad_norm": 2.1455678939819336, "learning_rate": 4.435909287198646e-05, "loss": 1.1007, "step": 3600 }, { "epoch": 0.5431372549019607, "grad_norm": 1.8116439580917358, "learning_rate": 4.433519202773862e-05, "loss": 1.4508, "step": 3601 }, { "epoch": 0.543288084464555, "grad_norm": 1.7668204307556152, "learning_rate": 4.4311292494659464e-05, "loss": 1.058, "step": 3602 }, { "epoch": 0.5434389140271493, "grad_norm": 1.7224217653274536, "learning_rate": 4.428739427828074e-05, "loss": 0.9795, "step": 3603 }, { "epoch": 0.5435897435897435, "grad_norm": 1.8062832355499268, "learning_rate": 4.426349738413392e-05, "loss": 1.2359, "step": 3604 }, { "epoch": 0.5437405731523378, "grad_norm": 1.928071141242981, "learning_rate": 4.4239601817750095e-05, "loss": 1.5215, "step": 3605 }, { "epoch": 0.5438914027149321, "grad_norm": 1.9333680868148804, "learning_rate": 4.421570758466014e-05, "loss": 1.1865, "step": 3606 }, { "epoch": 0.5440422322775263, "grad_norm": 1.8194891214370728, "learning_rate": 4.419181469039457e-05, "loss": 1.2093, "step": 3607 }, { "epoch": 0.5441930618401206, "grad_norm": 1.8567965030670166, "learning_rate": 4.4167923140483596e-05, "loss": 1.3394, "step": 3608 }, { "epoch": 0.5443438914027149, "grad_norm": 1.841776967048645, "learning_rate": 4.4144032940457134e-05, "loss": 1.1173, "step": 3609 }, { "epoch": 0.5444947209653092, "grad_norm": 2.1076226234436035, "learning_rate": 4.412014409584478e-05, "loss": 1.1482, "step": 3610 }, { "epoch": 0.5446455505279034, "grad_norm": 1.9208612442016602, "learning_rate": 4.40962566121758e-05, "loss": 1.0657, "step": 3611 }, { "epoch": 0.5447963800904977, "grad_norm": 2.0134856700897217, "learning_rate": 4.4072370494979166e-05, "loss": 1.1282, "step": 3612 }, { "epoch": 0.544947209653092, "grad_norm": 1.9932762384414673, "learning_rate": 4.404848574978354e-05, "loss": 1.3524, "step": 3613 }, { "epoch": 0.5450980392156862, "grad_norm": 2.1824147701263428, "learning_rate": 4.402460238211722e-05, "loss": 1.42, "step": 3614 }, { "epoch": 0.5452488687782805, "grad_norm": 1.650315761566162, "learning_rate": 4.4000720397508234e-05, "loss": 0.864, "step": 3615 }, { "epoch": 0.5453996983408748, "grad_norm": 2.013115644454956, "learning_rate": 4.397683980148427e-05, "loss": 1.1501, "step": 3616 }, { "epoch": 0.545550527903469, "grad_norm": 1.698461651802063, "learning_rate": 4.3952960599572714e-05, "loss": 0.8494, "step": 3617 }, { "epoch": 0.5457013574660633, "grad_norm": 2.01017427444458, "learning_rate": 4.392908279730059e-05, "loss": 1.2304, "step": 3618 }, { "epoch": 0.5458521870286576, "grad_norm": 1.7123278379440308, "learning_rate": 4.390520640019464e-05, "loss": 1.0044, "step": 3619 }, { "epoch": 0.5460030165912518, "grad_norm": 1.7410070896148682, "learning_rate": 4.3881331413781254e-05, "loss": 0.9886, "step": 3620 }, { "epoch": 0.5461538461538461, "grad_norm": 2.248065233230591, "learning_rate": 4.38574578435865e-05, "loss": 1.3983, "step": 3621 }, { "epoch": 0.5463046757164404, "grad_norm": 1.8236321210861206, "learning_rate": 4.383358569513613e-05, "loss": 1.0712, "step": 3622 }, { "epoch": 0.5464555052790346, "grad_norm": 2.124253988265991, "learning_rate": 4.380971497395557e-05, "loss": 1.3011, "step": 3623 }, { "epoch": 0.5466063348416289, "grad_norm": 2.0949811935424805, "learning_rate": 4.378584568556987e-05, "loss": 1.3477, "step": 3624 }, { "epoch": 0.5467571644042232, "grad_norm": 2.0685477256774902, "learning_rate": 4.37619778355038e-05, "loss": 1.2053, "step": 3625 }, { "epoch": 0.5469079939668176, "grad_norm": 1.9367709159851074, "learning_rate": 4.3738111429281784e-05, "loss": 1.1138, "step": 3626 }, { "epoch": 0.5470588235294118, "grad_norm": 2.1425888538360596, "learning_rate": 4.371424647242791e-05, "loss": 1.2837, "step": 3627 }, { "epoch": 0.5472096530920061, "grad_norm": 1.819670557975769, "learning_rate": 4.369038297046592e-05, "loss": 0.8283, "step": 3628 }, { "epoch": 0.5473604826546004, "grad_norm": 1.9996459484100342, "learning_rate": 4.366652092891923e-05, "loss": 1.2188, "step": 3629 }, { "epoch": 0.5475113122171946, "grad_norm": 1.7832447290420532, "learning_rate": 4.364266035331092e-05, "loss": 0.8973, "step": 3630 }, { "epoch": 0.5476621417797889, "grad_norm": 1.8961189985275269, "learning_rate": 4.361880124916371e-05, "loss": 1.2223, "step": 3631 }, { "epoch": 0.5478129713423832, "grad_norm": 2.0101687908172607, "learning_rate": 4.359494362200004e-05, "loss": 1.1115, "step": 3632 }, { "epoch": 0.5479638009049774, "grad_norm": 1.9084994792938232, "learning_rate": 4.357108747734191e-05, "loss": 1.2448, "step": 3633 }, { "epoch": 0.5481146304675717, "grad_norm": 2.076115846633911, "learning_rate": 4.354723282071105e-05, "loss": 1.4202, "step": 3634 }, { "epoch": 0.548265460030166, "grad_norm": 1.946301817893982, "learning_rate": 4.3523379657628825e-05, "loss": 1.0315, "step": 3635 }, { "epoch": 0.5484162895927602, "grad_norm": 2.077080726623535, "learning_rate": 4.349952799361626e-05, "loss": 1.1596, "step": 3636 }, { "epoch": 0.5485671191553545, "grad_norm": 2.0820751190185547, "learning_rate": 4.3475677834194033e-05, "loss": 1.394, "step": 3637 }, { "epoch": 0.5487179487179488, "grad_norm": 2.20198917388916, "learning_rate": 4.345182918488246e-05, "loss": 1.4119, "step": 3638 }, { "epoch": 0.548868778280543, "grad_norm": 1.9794163703918457, "learning_rate": 4.3427982051201525e-05, "loss": 1.0633, "step": 3639 }, { "epoch": 0.5490196078431373, "grad_norm": 1.9918262958526611, "learning_rate": 4.3404136438670833e-05, "loss": 1.1976, "step": 3640 }, { "epoch": 0.5491704374057316, "grad_norm": 1.728222370147705, "learning_rate": 4.3380292352809694e-05, "loss": 0.8271, "step": 3641 }, { "epoch": 0.5493212669683258, "grad_norm": 2.181032419204712, "learning_rate": 4.3356449799136986e-05, "loss": 1.3145, "step": 3642 }, { "epoch": 0.5494720965309201, "grad_norm": 2.0563418865203857, "learning_rate": 4.33326087831713e-05, "loss": 1.2212, "step": 3643 }, { "epoch": 0.5496229260935144, "grad_norm": 2.129120111465454, "learning_rate": 4.3308769310430836e-05, "loss": 0.9873, "step": 3644 }, { "epoch": 0.5497737556561086, "grad_norm": 2.186619281768799, "learning_rate": 4.328493138643345e-05, "loss": 1.3957, "step": 3645 }, { "epoch": 0.5499245852187029, "grad_norm": 1.7065813541412354, "learning_rate": 4.326109501669662e-05, "loss": 0.8313, "step": 3646 }, { "epoch": 0.5500754147812972, "grad_norm": 1.571581244468689, "learning_rate": 4.3237260206737504e-05, "loss": 0.7195, "step": 3647 }, { "epoch": 0.5502262443438914, "grad_norm": 2.078268051147461, "learning_rate": 4.3213426962072856e-05, "loss": 1.3179, "step": 3648 }, { "epoch": 0.5503770739064857, "grad_norm": 1.7759270668029785, "learning_rate": 4.318959528821909e-05, "loss": 0.8485, "step": 3649 }, { "epoch": 0.55052790346908, "grad_norm": 1.94783616065979, "learning_rate": 4.316576519069227e-05, "loss": 1.0022, "step": 3650 }, { "epoch": 0.5506787330316743, "grad_norm": 1.8166431188583374, "learning_rate": 4.314193667500806e-05, "loss": 1.3605, "step": 3651 }, { "epoch": 0.5508295625942685, "grad_norm": 1.7996186017990112, "learning_rate": 4.311810974668177e-05, "loss": 1.2446, "step": 3652 }, { "epoch": 0.5509803921568628, "grad_norm": 1.6951183080673218, "learning_rate": 4.309428441122836e-05, "loss": 0.9964, "step": 3653 }, { "epoch": 0.551131221719457, "grad_norm": 2.0056533813476562, "learning_rate": 4.30704606741624e-05, "loss": 1.1819, "step": 3654 }, { "epoch": 0.5512820512820513, "grad_norm": 2.0797009468078613, "learning_rate": 4.30466385409981e-05, "loss": 1.4998, "step": 3655 }, { "epoch": 0.5514328808446456, "grad_norm": 1.885399580001831, "learning_rate": 4.302281801724931e-05, "loss": 1.1222, "step": 3656 }, { "epoch": 0.5515837104072399, "grad_norm": 2.032731533050537, "learning_rate": 4.299899910842949e-05, "loss": 1.2825, "step": 3657 }, { "epoch": 0.5517345399698341, "grad_norm": 2.141953468322754, "learning_rate": 4.297518182005173e-05, "loss": 1.6788, "step": 3658 }, { "epoch": 0.5518853695324284, "grad_norm": 2.141239643096924, "learning_rate": 4.295136615762875e-05, "loss": 1.3441, "step": 3659 }, { "epoch": 0.5520361990950227, "grad_norm": 1.7707017660140991, "learning_rate": 4.292755212667289e-05, "loss": 1.0767, "step": 3660 }, { "epoch": 0.5521870286576169, "grad_norm": 1.867909550666809, "learning_rate": 4.290373973269611e-05, "loss": 1.1228, "step": 3661 }, { "epoch": 0.5523378582202112, "grad_norm": 1.7877081632614136, "learning_rate": 4.2879928981209985e-05, "loss": 1.1679, "step": 3662 }, { "epoch": 0.5524886877828055, "grad_norm": 1.8077682256698608, "learning_rate": 4.2856119877725745e-05, "loss": 1.0864, "step": 3663 }, { "epoch": 0.5526395173453997, "grad_norm": 1.842250943183899, "learning_rate": 4.2832312427754186e-05, "loss": 0.9802, "step": 3664 }, { "epoch": 0.552790346907994, "grad_norm": 1.8458845615386963, "learning_rate": 4.280850663680576e-05, "loss": 1.1098, "step": 3665 }, { "epoch": 0.5529411764705883, "grad_norm": 1.8727163076400757, "learning_rate": 4.278470251039052e-05, "loss": 0.9778, "step": 3666 }, { "epoch": 0.5530920060331825, "grad_norm": 2.0156102180480957, "learning_rate": 4.2760900054018136e-05, "loss": 1.1282, "step": 3667 }, { "epoch": 0.5532428355957768, "grad_norm": 1.7127286195755005, "learning_rate": 4.27370992731979e-05, "loss": 0.9095, "step": 3668 }, { "epoch": 0.5533936651583711, "grad_norm": 1.9398804903030396, "learning_rate": 4.27133001734387e-05, "loss": 0.9293, "step": 3669 }, { "epoch": 0.5535444947209653, "grad_norm": 1.8816441297531128, "learning_rate": 4.268950276024906e-05, "loss": 1.0594, "step": 3670 }, { "epoch": 0.5536953242835596, "grad_norm": 2.0224595069885254, "learning_rate": 4.266570703913706e-05, "loss": 1.443, "step": 3671 }, { "epoch": 0.5538461538461539, "grad_norm": 2.4659786224365234, "learning_rate": 4.2641913015610456e-05, "loss": 1.4661, "step": 3672 }, { "epoch": 0.5539969834087481, "grad_norm": 1.614249587059021, "learning_rate": 4.261812069517656e-05, "loss": 0.7901, "step": 3673 }, { "epoch": 0.5541478129713424, "grad_norm": 2.0786821842193604, "learning_rate": 4.2594330083342325e-05, "loss": 1.1592, "step": 3674 }, { "epoch": 0.5542986425339367, "grad_norm": 2.18681001663208, "learning_rate": 4.257054118561429e-05, "loss": 1.3154, "step": 3675 }, { "epoch": 0.554449472096531, "grad_norm": 1.9523979425430298, "learning_rate": 4.254675400749859e-05, "loss": 0.908, "step": 3676 }, { "epoch": 0.5546003016591252, "grad_norm": 2.1649928092956543, "learning_rate": 4.252296855450099e-05, "loss": 1.4123, "step": 3677 }, { "epoch": 0.5547511312217195, "grad_norm": 1.8109549283981323, "learning_rate": 4.2499184832126825e-05, "loss": 1.1465, "step": 3678 }, { "epoch": 0.5549019607843138, "grad_norm": 1.709310531616211, "learning_rate": 4.247540284588107e-05, "loss": 0.9383, "step": 3679 }, { "epoch": 0.555052790346908, "grad_norm": 2.0603697299957275, "learning_rate": 4.245162260126824e-05, "loss": 1.2975, "step": 3680 }, { "epoch": 0.5552036199095023, "grad_norm": 2.0596415996551514, "learning_rate": 4.242784410379247e-05, "loss": 1.2218, "step": 3681 }, { "epoch": 0.5553544494720966, "grad_norm": 1.8440126180648804, "learning_rate": 4.240406735895752e-05, "loss": 0.9072, "step": 3682 }, { "epoch": 0.5555052790346908, "grad_norm": 2.0689144134521484, "learning_rate": 4.2380292372266714e-05, "loss": 0.8844, "step": 3683 }, { "epoch": 0.5556561085972851, "grad_norm": 1.9025776386260986, "learning_rate": 4.2356519149222986e-05, "loss": 0.9825, "step": 3684 }, { "epoch": 0.5558069381598794, "grad_norm": 2.585792064666748, "learning_rate": 4.233274769532884e-05, "loss": 1.4404, "step": 3685 }, { "epoch": 0.5559577677224736, "grad_norm": 2.3224804401397705, "learning_rate": 4.230897801608641e-05, "loss": 1.5425, "step": 3686 }, { "epoch": 0.5561085972850679, "grad_norm": 2.144929885864258, "learning_rate": 4.228521011699737e-05, "loss": 1.3121, "step": 3687 }, { "epoch": 0.5562594268476622, "grad_norm": 1.8324192762374878, "learning_rate": 4.2261444003563016e-05, "loss": 0.964, "step": 3688 }, { "epoch": 0.5564102564102564, "grad_norm": 2.1895179748535156, "learning_rate": 4.2237679681284205e-05, "loss": 1.3368, "step": 3689 }, { "epoch": 0.5565610859728507, "grad_norm": 2.0485446453094482, "learning_rate": 4.221391715566141e-05, "loss": 0.9577, "step": 3690 }, { "epoch": 0.556711915535445, "grad_norm": 2.3645472526550293, "learning_rate": 4.2190156432194666e-05, "loss": 1.3596, "step": 3691 }, { "epoch": 0.5568627450980392, "grad_norm": 2.2413523197174072, "learning_rate": 4.2166397516383596e-05, "loss": 1.2509, "step": 3692 }, { "epoch": 0.5570135746606335, "grad_norm": 1.917912483215332, "learning_rate": 4.214264041372741e-05, "loss": 0.9455, "step": 3693 }, { "epoch": 0.5571644042232278, "grad_norm": 2.332165241241455, "learning_rate": 4.21188851297249e-05, "loss": 1.4075, "step": 3694 }, { "epoch": 0.557315233785822, "grad_norm": 1.7111363410949707, "learning_rate": 4.209513166987442e-05, "loss": 0.8248, "step": 3695 }, { "epoch": 0.5574660633484163, "grad_norm": 1.525354027748108, "learning_rate": 4.2071380039673916e-05, "loss": 0.7253, "step": 3696 }, { "epoch": 0.5576168929110106, "grad_norm": 1.765557885169983, "learning_rate": 4.2047630244620915e-05, "loss": 0.8792, "step": 3697 }, { "epoch": 0.5577677224736048, "grad_norm": 1.916614294052124, "learning_rate": 4.202388229021252e-05, "loss": 1.0859, "step": 3698 }, { "epoch": 0.5579185520361991, "grad_norm": 1.539377212524414, "learning_rate": 4.200013618194538e-05, "loss": 0.6826, "step": 3699 }, { "epoch": 0.5580693815987934, "grad_norm": 2.024653434753418, "learning_rate": 4.1976391925315734e-05, "loss": 0.9591, "step": 3700 }, { "epoch": 0.5582202111613876, "grad_norm": 2.4223721027374268, "learning_rate": 4.19526495258194e-05, "loss": 1.929, "step": 3701 }, { "epoch": 0.5583710407239819, "grad_norm": 1.9612743854522705, "learning_rate": 4.192890898895178e-05, "loss": 1.2416, "step": 3702 }, { "epoch": 0.5585218702865762, "grad_norm": 1.6777098178863525, "learning_rate": 4.19051703202078e-05, "loss": 1.0662, "step": 3703 }, { "epoch": 0.5586726998491705, "grad_norm": 2.312670946121216, "learning_rate": 4.188143352508199e-05, "loss": 1.7597, "step": 3704 }, { "epoch": 0.5588235294117647, "grad_norm": 1.5869169235229492, "learning_rate": 4.1857698609068445e-05, "loss": 0.7778, "step": 3705 }, { "epoch": 0.558974358974359, "grad_norm": 1.8320130109786987, "learning_rate": 4.183396557766081e-05, "loss": 1.0968, "step": 3706 }, { "epoch": 0.5591251885369533, "grad_norm": 1.8819389343261719, "learning_rate": 4.1810234436352304e-05, "loss": 1.2084, "step": 3707 }, { "epoch": 0.5592760180995475, "grad_norm": 1.9622294902801514, "learning_rate": 4.1786505190635686e-05, "loss": 1.1145, "step": 3708 }, { "epoch": 0.5594268476621418, "grad_norm": 1.9404561519622803, "learning_rate": 4.176277784600332e-05, "loss": 1.2053, "step": 3709 }, { "epoch": 0.5595776772247361, "grad_norm": 1.892250418663025, "learning_rate": 4.1739052407947077e-05, "loss": 1.1811, "step": 3710 }, { "epoch": 0.5597285067873303, "grad_norm": 1.8156415224075317, "learning_rate": 4.171532888195844e-05, "loss": 1.0845, "step": 3711 }, { "epoch": 0.5598793363499246, "grad_norm": 2.145080089569092, "learning_rate": 4.169160727352841e-05, "loss": 1.5416, "step": 3712 }, { "epoch": 0.5600301659125189, "grad_norm": 2.0821151733398438, "learning_rate": 4.166788758814756e-05, "loss": 1.2679, "step": 3713 }, { "epoch": 0.5601809954751131, "grad_norm": 1.8016338348388672, "learning_rate": 4.164416983130602e-05, "loss": 0.9915, "step": 3714 }, { "epoch": 0.5603318250377074, "grad_norm": 1.7696868181228638, "learning_rate": 4.1620454008493464e-05, "loss": 0.9117, "step": 3715 }, { "epoch": 0.5604826546003017, "grad_norm": 1.9447799921035767, "learning_rate": 4.159674012519915e-05, "loss": 0.9549, "step": 3716 }, { "epoch": 0.5606334841628959, "grad_norm": 1.7005735635757446, "learning_rate": 4.157302818691181e-05, "loss": 0.7901, "step": 3717 }, { "epoch": 0.5607843137254902, "grad_norm": 2.210742950439453, "learning_rate": 4.1549318199119815e-05, "loss": 1.4675, "step": 3718 }, { "epoch": 0.5609351432880845, "grad_norm": 1.62502920627594, "learning_rate": 4.1525610167311025e-05, "loss": 0.9199, "step": 3719 }, { "epoch": 0.5610859728506787, "grad_norm": 1.7855981588363647, "learning_rate": 4.1501904096972886e-05, "loss": 1.0393, "step": 3720 }, { "epoch": 0.561236802413273, "grad_norm": 2.2324459552764893, "learning_rate": 4.147819999359236e-05, "loss": 1.3315, "step": 3721 }, { "epoch": 0.5613876319758673, "grad_norm": 2.054593563079834, "learning_rate": 4.1454497862655975e-05, "loss": 1.1862, "step": 3722 }, { "epoch": 0.5615384615384615, "grad_norm": 2.112711191177368, "learning_rate": 4.143079770964979e-05, "loss": 1.2155, "step": 3723 }, { "epoch": 0.5616892911010558, "grad_norm": 2.362238645553589, "learning_rate": 4.140709954005941e-05, "loss": 1.1136, "step": 3724 }, { "epoch": 0.5618401206636501, "grad_norm": 2.047379732131958, "learning_rate": 4.138340335936998e-05, "loss": 1.34, "step": 3725 }, { "epoch": 0.5619909502262443, "grad_norm": 2.1222033500671387, "learning_rate": 4.1359709173066207e-05, "loss": 1.2044, "step": 3726 }, { "epoch": 0.5621417797888386, "grad_norm": 1.8908255100250244, "learning_rate": 4.1336016986632274e-05, "loss": 1.1451, "step": 3727 }, { "epoch": 0.5622926093514329, "grad_norm": 2.0411009788513184, "learning_rate": 4.131232680555197e-05, "loss": 1.2747, "step": 3728 }, { "epoch": 0.5624434389140271, "grad_norm": 2.4350738525390625, "learning_rate": 4.128863863530857e-05, "loss": 1.6594, "step": 3729 }, { "epoch": 0.5625942684766214, "grad_norm": 2.128171682357788, "learning_rate": 4.1264952481384925e-05, "loss": 1.1742, "step": 3730 }, { "epoch": 0.5627450980392157, "grad_norm": 1.9978854656219482, "learning_rate": 4.12412683492634e-05, "loss": 1.0414, "step": 3731 }, { "epoch": 0.56289592760181, "grad_norm": 2.065382957458496, "learning_rate": 4.121758624442588e-05, "loss": 1.2312, "step": 3732 }, { "epoch": 0.5630467571644042, "grad_norm": 2.166901111602783, "learning_rate": 4.11939061723538e-05, "loss": 1.1669, "step": 3733 }, { "epoch": 0.5631975867269985, "grad_norm": 2.1244285106658936, "learning_rate": 4.117022813852811e-05, "loss": 1.0844, "step": 3734 }, { "epoch": 0.5633484162895928, "grad_norm": 1.8784126043319702, "learning_rate": 4.1146552148429316e-05, "loss": 1.116, "step": 3735 }, { "epoch": 0.563499245852187, "grad_norm": 1.9204376935958862, "learning_rate": 4.112287820753741e-05, "loss": 0.9978, "step": 3736 }, { "epoch": 0.5636500754147813, "grad_norm": 2.5264368057250977, "learning_rate": 4.109920632133193e-05, "loss": 1.6025, "step": 3737 }, { "epoch": 0.5638009049773756, "grad_norm": 2.2950844764709473, "learning_rate": 4.107553649529194e-05, "loss": 1.3989, "step": 3738 }, { "epoch": 0.5639517345399698, "grad_norm": 2.0132076740264893, "learning_rate": 4.105186873489603e-05, "loss": 0.9658, "step": 3739 }, { "epoch": 0.5641025641025641, "grad_norm": 2.230646848678589, "learning_rate": 4.10282030456223e-05, "loss": 1.3227, "step": 3740 }, { "epoch": 0.5642533936651584, "grad_norm": 1.8460323810577393, "learning_rate": 4.10045394329484e-05, "loss": 0.865, "step": 3741 }, { "epoch": 0.5644042232277526, "grad_norm": 1.7469884157180786, "learning_rate": 4.0980877902351446e-05, "loss": 0.851, "step": 3742 }, { "epoch": 0.5645550527903469, "grad_norm": 1.848232388496399, "learning_rate": 4.095721845930812e-05, "loss": 0.917, "step": 3743 }, { "epoch": 0.5647058823529412, "grad_norm": 2.290393829345703, "learning_rate": 4.093356110929463e-05, "loss": 1.0796, "step": 3744 }, { "epoch": 0.5648567119155354, "grad_norm": 2.4179670810699463, "learning_rate": 4.0909905857786627e-05, "loss": 1.2963, "step": 3745 }, { "epoch": 0.5650075414781297, "grad_norm": 1.6191885471343994, "learning_rate": 4.0886252710259336e-05, "loss": 0.6441, "step": 3746 }, { "epoch": 0.565158371040724, "grad_norm": 1.7936654090881348, "learning_rate": 4.086260167218749e-05, "loss": 0.8488, "step": 3747 }, { "epoch": 0.5653092006033182, "grad_norm": 1.8516613245010376, "learning_rate": 4.0838952749045325e-05, "loss": 0.7905, "step": 3748 }, { "epoch": 0.5654600301659125, "grad_norm": 1.7119678258895874, "learning_rate": 4.0815305946306593e-05, "loss": 0.7943, "step": 3749 }, { "epoch": 0.5656108597285068, "grad_norm": 1.9361222982406616, "learning_rate": 4.079166126944454e-05, "loss": 1.0623, "step": 3750 }, { "epoch": 0.565761689291101, "grad_norm": 1.9653840065002441, "learning_rate": 4.076801872393193e-05, "loss": 1.4224, "step": 3751 }, { "epoch": 0.5659125188536953, "grad_norm": 1.9422831535339355, "learning_rate": 4.074437831524105e-05, "loss": 1.2876, "step": 3752 }, { "epoch": 0.5660633484162896, "grad_norm": 1.6621524095535278, "learning_rate": 4.072074004884366e-05, "loss": 0.7344, "step": 3753 }, { "epoch": 0.5662141779788838, "grad_norm": 1.8988804817199707, "learning_rate": 4.069710393021107e-05, "loss": 1.0853, "step": 3754 }, { "epoch": 0.5663650075414781, "grad_norm": 2.1940195560455322, "learning_rate": 4.0673469964814015e-05, "loss": 1.548, "step": 3755 }, { "epoch": 0.5665158371040724, "grad_norm": 1.9263852834701538, "learning_rate": 4.064983815812281e-05, "loss": 1.2385, "step": 3756 }, { "epoch": 0.5666666666666667, "grad_norm": 2.1344120502471924, "learning_rate": 4.062620851560724e-05, "loss": 0.9881, "step": 3757 }, { "epoch": 0.5668174962292609, "grad_norm": 2.1682348251342773, "learning_rate": 4.0602581042736584e-05, "loss": 1.1648, "step": 3758 }, { "epoch": 0.5669683257918552, "grad_norm": 2.276914358139038, "learning_rate": 4.057895574497963e-05, "loss": 1.482, "step": 3759 }, { "epoch": 0.5671191553544495, "grad_norm": 1.798677921295166, "learning_rate": 4.055533262780465e-05, "loss": 1.0426, "step": 3760 }, { "epoch": 0.5672699849170437, "grad_norm": 1.7040866613388062, "learning_rate": 4.053171169667942e-05, "loss": 0.8467, "step": 3761 }, { "epoch": 0.567420814479638, "grad_norm": 1.996989130973816, "learning_rate": 4.050809295707121e-05, "loss": 1.1996, "step": 3762 }, { "epoch": 0.5675716440422323, "grad_norm": 2.09346079826355, "learning_rate": 4.048447641444681e-05, "loss": 1.1757, "step": 3763 }, { "epoch": 0.5677224736048265, "grad_norm": 2.0511131286621094, "learning_rate": 4.0460862074272416e-05, "loss": 1.2283, "step": 3764 }, { "epoch": 0.5678733031674208, "grad_norm": 1.7575315237045288, "learning_rate": 4.04372499420138e-05, "loss": 1.037, "step": 3765 }, { "epoch": 0.5680241327300151, "grad_norm": 2.087954044342041, "learning_rate": 4.041364002313619e-05, "loss": 1.4217, "step": 3766 }, { "epoch": 0.5681749622926093, "grad_norm": 1.8446522951126099, "learning_rate": 4.039003232310431e-05, "loss": 1.0601, "step": 3767 }, { "epoch": 0.5683257918552036, "grad_norm": 2.0083117485046387, "learning_rate": 4.036642684738236e-05, "loss": 1.2558, "step": 3768 }, { "epoch": 0.5684766214177979, "grad_norm": 2.5872888565063477, "learning_rate": 4.034282360143404e-05, "loss": 1.3398, "step": 3769 }, { "epoch": 0.5686274509803921, "grad_norm": 2.1471989154815674, "learning_rate": 4.0319222590722524e-05, "loss": 1.4932, "step": 3770 }, { "epoch": 0.5687782805429864, "grad_norm": 1.861656665802002, "learning_rate": 4.029562382071046e-05, "loss": 0.9698, "step": 3771 }, { "epoch": 0.5689291101055807, "grad_norm": 2.034170150756836, "learning_rate": 4.027202729686001e-05, "loss": 1.1328, "step": 3772 }, { "epoch": 0.5690799396681749, "grad_norm": 2.0665347576141357, "learning_rate": 4.024843302463276e-05, "loss": 1.2937, "step": 3773 }, { "epoch": 0.5692307692307692, "grad_norm": 2.100402355194092, "learning_rate": 4.022484100948982e-05, "loss": 1.3467, "step": 3774 }, { "epoch": 0.5693815987933635, "grad_norm": 2.089700222015381, "learning_rate": 4.020125125689177e-05, "loss": 1.3924, "step": 3775 }, { "epoch": 0.5695324283559577, "grad_norm": 1.8664213418960571, "learning_rate": 4.017766377229865e-05, "loss": 0.9355, "step": 3776 }, { "epoch": 0.569683257918552, "grad_norm": 2.1071009635925293, "learning_rate": 4.015407856117e-05, "loss": 1.1843, "step": 3777 }, { "epoch": 0.5698340874811463, "grad_norm": 2.0238847732543945, "learning_rate": 4.0130495628964834e-05, "loss": 1.1075, "step": 3778 }, { "epoch": 0.5699849170437405, "grad_norm": 2.062262773513794, "learning_rate": 4.01069149811416e-05, "loss": 1.1408, "step": 3779 }, { "epoch": 0.5701357466063348, "grad_norm": 1.860342264175415, "learning_rate": 4.008333662315824e-05, "loss": 1.2263, "step": 3780 }, { "epoch": 0.5702865761689291, "grad_norm": 1.920046329498291, "learning_rate": 4.005976056047219e-05, "loss": 1.2015, "step": 3781 }, { "epoch": 0.5704374057315234, "grad_norm": 1.701170563697815, "learning_rate": 4.0036186798540334e-05, "loss": 0.8187, "step": 3782 }, { "epoch": 0.5705882352941176, "grad_norm": 1.7304587364196777, "learning_rate": 4.0012615342819e-05, "loss": 0.8975, "step": 3783 }, { "epoch": 0.5707390648567119, "grad_norm": 1.8187429904937744, "learning_rate": 3.998904619876401e-05, "loss": 1.1848, "step": 3784 }, { "epoch": 0.5708898944193062, "grad_norm": 1.984991192817688, "learning_rate": 3.9965479371830663e-05, "loss": 1.0959, "step": 3785 }, { "epoch": 0.5710407239819004, "grad_norm": 2.08815598487854, "learning_rate": 3.9941914867473685e-05, "loss": 1.3299, "step": 3786 }, { "epoch": 0.5711915535444947, "grad_norm": 2.108060359954834, "learning_rate": 3.991835269114729e-05, "loss": 1.0227, "step": 3787 }, { "epoch": 0.571342383107089, "grad_norm": 2.1650636196136475, "learning_rate": 3.989479284830515e-05, "loss": 1.3499, "step": 3788 }, { "epoch": 0.5714932126696832, "grad_norm": 2.311306953430176, "learning_rate": 3.98712353444004e-05, "loss": 1.2789, "step": 3789 }, { "epoch": 0.5716440422322775, "grad_norm": 1.8747297525405884, "learning_rate": 3.9847680184885615e-05, "loss": 0.9996, "step": 3790 }, { "epoch": 0.5717948717948718, "grad_norm": 2.116684675216675, "learning_rate": 3.9824127375212866e-05, "loss": 0.9628, "step": 3791 }, { "epoch": 0.571945701357466, "grad_norm": 2.3029167652130127, "learning_rate": 3.9800576920833614e-05, "loss": 1.521, "step": 3792 }, { "epoch": 0.5720965309200603, "grad_norm": 1.7979470491409302, "learning_rate": 3.9777028827198836e-05, "loss": 0.9702, "step": 3793 }, { "epoch": 0.5722473604826546, "grad_norm": 2.0977284908294678, "learning_rate": 3.975348309975894e-05, "loss": 1.1852, "step": 3794 }, { "epoch": 0.5723981900452488, "grad_norm": 1.813122272491455, "learning_rate": 3.972993974396378e-05, "loss": 0.7808, "step": 3795 }, { "epoch": 0.5725490196078431, "grad_norm": 1.7628694772720337, "learning_rate": 3.970639876526269e-05, "loss": 0.7649, "step": 3796 }, { "epoch": 0.5726998491704374, "grad_norm": 1.839743733406067, "learning_rate": 3.9682860169104405e-05, "loss": 1.0145, "step": 3797 }, { "epoch": 0.5728506787330316, "grad_norm": 1.475430965423584, "learning_rate": 3.965932396093716e-05, "loss": 0.678, "step": 3798 }, { "epoch": 0.5730015082956259, "grad_norm": 1.747683048248291, "learning_rate": 3.96357901462086e-05, "loss": 0.6775, "step": 3799 }, { "epoch": 0.5731523378582202, "grad_norm": 1.7387402057647705, "learning_rate": 3.9612258730365826e-05, "loss": 0.9108, "step": 3800 }, { "epoch": 0.5733031674208144, "grad_norm": 1.9479564428329468, "learning_rate": 3.958872971885541e-05, "loss": 1.3201, "step": 3801 }, { "epoch": 0.5734539969834087, "grad_norm": 1.7495300769805908, "learning_rate": 3.9565203117123325e-05, "loss": 0.9764, "step": 3802 }, { "epoch": 0.573604826546003, "grad_norm": 1.8585385084152222, "learning_rate": 3.9541678930615e-05, "loss": 1.3941, "step": 3803 }, { "epoch": 0.5737556561085972, "grad_norm": 1.8154933452606201, "learning_rate": 3.9518157164775325e-05, "loss": 1.2392, "step": 3804 }, { "epoch": 0.5739064856711915, "grad_norm": 2.0302700996398926, "learning_rate": 3.9494637825048615e-05, "loss": 1.5323, "step": 3805 }, { "epoch": 0.5740573152337858, "grad_norm": 1.6944444179534912, "learning_rate": 3.947112091687862e-05, "loss": 0.9007, "step": 3806 }, { "epoch": 0.57420814479638, "grad_norm": 2.0628957748413086, "learning_rate": 3.944760644570853e-05, "loss": 1.3652, "step": 3807 }, { "epoch": 0.5743589743589743, "grad_norm": 2.1790435314178467, "learning_rate": 3.9424094416980975e-05, "loss": 1.3759, "step": 3808 }, { "epoch": 0.5745098039215686, "grad_norm": 1.8119908571243286, "learning_rate": 3.940058483613801e-05, "loss": 0.8726, "step": 3809 }, { "epoch": 0.5746606334841629, "grad_norm": 2.3447301387786865, "learning_rate": 3.937707770862117e-05, "loss": 1.5676, "step": 3810 }, { "epoch": 0.5748114630467571, "grad_norm": 1.649901032447815, "learning_rate": 3.935357303987132e-05, "loss": 0.8121, "step": 3811 }, { "epoch": 0.5749622926093514, "grad_norm": 2.144688367843628, "learning_rate": 3.9330070835328845e-05, "loss": 1.3274, "step": 3812 }, { "epoch": 0.5751131221719457, "grad_norm": 2.233408212661743, "learning_rate": 3.930657110043354e-05, "loss": 1.4063, "step": 3813 }, { "epoch": 0.5752639517345399, "grad_norm": 1.984216570854187, "learning_rate": 3.928307384062461e-05, "loss": 1.2849, "step": 3814 }, { "epoch": 0.5754147812971342, "grad_norm": 1.6670180559158325, "learning_rate": 3.925957906134071e-05, "loss": 0.9012, "step": 3815 }, { "epoch": 0.5755656108597285, "grad_norm": 1.7813409566879272, "learning_rate": 3.923608676801991e-05, "loss": 0.9633, "step": 3816 }, { "epoch": 0.5757164404223227, "grad_norm": 2.1469907760620117, "learning_rate": 3.921259696609969e-05, "loss": 1.3557, "step": 3817 }, { "epoch": 0.575867269984917, "grad_norm": 1.7404171228408813, "learning_rate": 3.918910966101698e-05, "loss": 1.1651, "step": 3818 }, { "epoch": 0.5760180995475113, "grad_norm": 1.9195870161056519, "learning_rate": 3.916562485820814e-05, "loss": 1.137, "step": 3819 }, { "epoch": 0.5761689291101055, "grad_norm": 2.180469274520874, "learning_rate": 3.914214256310887e-05, "loss": 1.2386, "step": 3820 }, { "epoch": 0.5763197586726998, "grad_norm": 1.7790589332580566, "learning_rate": 3.91186627811544e-05, "loss": 1.0115, "step": 3821 }, { "epoch": 0.5764705882352941, "grad_norm": 1.8493021726608276, "learning_rate": 3.909518551777931e-05, "loss": 1.0881, "step": 3822 }, { "epoch": 0.5766214177978883, "grad_norm": 1.7308415174484253, "learning_rate": 3.907171077841762e-05, "loss": 0.7198, "step": 3823 }, { "epoch": 0.5767722473604826, "grad_norm": 2.2413573265075684, "learning_rate": 3.904823856850276e-05, "loss": 1.4132, "step": 3824 }, { "epoch": 0.5769230769230769, "grad_norm": 2.0828633308410645, "learning_rate": 3.902476889346757e-05, "loss": 1.1614, "step": 3825 }, { "epoch": 0.5770739064856711, "grad_norm": 2.1088972091674805, "learning_rate": 3.9001301758744316e-05, "loss": 1.0706, "step": 3826 }, { "epoch": 0.5772247360482654, "grad_norm": 1.9242440462112427, "learning_rate": 3.897783716976467e-05, "loss": 1.1146, "step": 3827 }, { "epoch": 0.5773755656108597, "grad_norm": 1.7562739849090576, "learning_rate": 3.895437513195971e-05, "loss": 0.8898, "step": 3828 }, { "epoch": 0.5775263951734539, "grad_norm": 1.9286617040634155, "learning_rate": 3.893091565075993e-05, "loss": 0.995, "step": 3829 }, { "epoch": 0.5776772247360482, "grad_norm": 1.861310362815857, "learning_rate": 3.890745873159522e-05, "loss": 1.1096, "step": 3830 }, { "epoch": 0.5778280542986425, "grad_norm": 1.9347667694091797, "learning_rate": 3.8884004379894893e-05, "loss": 1.0934, "step": 3831 }, { "epoch": 0.5779788838612367, "grad_norm": 1.9113729000091553, "learning_rate": 3.886055260108767e-05, "loss": 0.9703, "step": 3832 }, { "epoch": 0.5781297134238311, "grad_norm": 1.9082084894180298, "learning_rate": 3.883710340060165e-05, "loss": 1.0291, "step": 3833 }, { "epoch": 0.5782805429864254, "grad_norm": 2.3025405406951904, "learning_rate": 3.881365678386436e-05, "loss": 1.4826, "step": 3834 }, { "epoch": 0.5784313725490197, "grad_norm": 2.128587484359741, "learning_rate": 3.879021275630272e-05, "loss": 1.0226, "step": 3835 }, { "epoch": 0.5785822021116139, "grad_norm": 2.083890676498413, "learning_rate": 3.876677132334307e-05, "loss": 0.9549, "step": 3836 }, { "epoch": 0.5787330316742082, "grad_norm": 2.0119521617889404, "learning_rate": 3.8743332490411104e-05, "loss": 1.1593, "step": 3837 }, { "epoch": 0.5788838612368025, "grad_norm": 2.0062952041625977, "learning_rate": 3.871989626293199e-05, "loss": 1.0355, "step": 3838 }, { "epoch": 0.5790346907993967, "grad_norm": 1.849158763885498, "learning_rate": 3.869646264633018e-05, "loss": 0.8294, "step": 3839 }, { "epoch": 0.579185520361991, "grad_norm": 2.0078728199005127, "learning_rate": 3.867303164602961e-05, "loss": 1.1058, "step": 3840 }, { "epoch": 0.5793363499245853, "grad_norm": 1.9507700204849243, "learning_rate": 3.864960326745361e-05, "loss": 0.9708, "step": 3841 }, { "epoch": 0.5794871794871795, "grad_norm": 2.2363967895507812, "learning_rate": 3.8626177516024855e-05, "loss": 1.0661, "step": 3842 }, { "epoch": 0.5796380090497738, "grad_norm": 2.1223902702331543, "learning_rate": 3.860275439716545e-05, "loss": 1.2028, "step": 3843 }, { "epoch": 0.5797888386123681, "grad_norm": 2.0905911922454834, "learning_rate": 3.857933391629688e-05, "loss": 1.0526, "step": 3844 }, { "epoch": 0.5799396681749623, "grad_norm": 1.8848520517349243, "learning_rate": 3.855591607884e-05, "loss": 0.9663, "step": 3845 }, { "epoch": 0.5800904977375566, "grad_norm": 1.948407769203186, "learning_rate": 3.85325008902151e-05, "loss": 1.0578, "step": 3846 }, { "epoch": 0.5802413273001509, "grad_norm": 2.0566036701202393, "learning_rate": 3.850908835584181e-05, "loss": 1.1545, "step": 3847 }, { "epoch": 0.5803921568627451, "grad_norm": 2.0464518070220947, "learning_rate": 3.848567848113915e-05, "loss": 0.9451, "step": 3848 }, { "epoch": 0.5805429864253394, "grad_norm": 1.895623803138733, "learning_rate": 3.8462271271525566e-05, "loss": 0.9108, "step": 3849 }, { "epoch": 0.5806938159879337, "grad_norm": 1.975078821182251, "learning_rate": 3.8438866732418835e-05, "loss": 1.0743, "step": 3850 }, { "epoch": 0.580844645550528, "grad_norm": 1.6269946098327637, "learning_rate": 3.841546486923615e-05, "loss": 0.8547, "step": 3851 }, { "epoch": 0.5809954751131222, "grad_norm": 1.5943189859390259, "learning_rate": 3.839206568739406e-05, "loss": 0.85, "step": 3852 }, { "epoch": 0.5811463046757165, "grad_norm": 1.9571070671081543, "learning_rate": 3.836866919230854e-05, "loss": 1.3784, "step": 3853 }, { "epoch": 0.5812971342383108, "grad_norm": 2.003793239593506, "learning_rate": 3.8345275389394875e-05, "loss": 1.4722, "step": 3854 }, { "epoch": 0.581447963800905, "grad_norm": 1.6111950874328613, "learning_rate": 3.8321884284067785e-05, "loss": 0.9498, "step": 3855 }, { "epoch": 0.5815987933634993, "grad_norm": 1.8582446575164795, "learning_rate": 3.8298495881741334e-05, "loss": 1.2233, "step": 3856 }, { "epoch": 0.5817496229260936, "grad_norm": 2.1196680068969727, "learning_rate": 3.8275110187828997e-05, "loss": 1.5097, "step": 3857 }, { "epoch": 0.5819004524886878, "grad_norm": 1.4446624517440796, "learning_rate": 3.8251727207743543e-05, "loss": 0.6713, "step": 3858 }, { "epoch": 0.5820512820512821, "grad_norm": 2.0594499111175537, "learning_rate": 3.822834694689719e-05, "loss": 1.1636, "step": 3859 }, { "epoch": 0.5822021116138764, "grad_norm": 1.8353052139282227, "learning_rate": 3.820496941070151e-05, "loss": 0.9831, "step": 3860 }, { "epoch": 0.5823529411764706, "grad_norm": 1.6891440153121948, "learning_rate": 3.818159460456742e-05, "loss": 0.9513, "step": 3861 }, { "epoch": 0.5825037707390649, "grad_norm": 1.9636777639389038, "learning_rate": 3.815822253390523e-05, "loss": 1.3306, "step": 3862 }, { "epoch": 0.5826546003016592, "grad_norm": 1.7918481826782227, "learning_rate": 3.813485320412461e-05, "loss": 0.965, "step": 3863 }, { "epoch": 0.5828054298642534, "grad_norm": 1.9752830266952515, "learning_rate": 3.8111486620634584e-05, "loss": 1.445, "step": 3864 }, { "epoch": 0.5829562594268477, "grad_norm": 2.073298692703247, "learning_rate": 3.808812278884355e-05, "loss": 1.3099, "step": 3865 }, { "epoch": 0.583107088989442, "grad_norm": 1.9217450618743896, "learning_rate": 3.80647617141593e-05, "loss": 1.0046, "step": 3866 }, { "epoch": 0.5832579185520362, "grad_norm": 2.120866060256958, "learning_rate": 3.804140340198891e-05, "loss": 1.3342, "step": 3867 }, { "epoch": 0.5834087481146305, "grad_norm": 1.7239270210266113, "learning_rate": 3.801804785773887e-05, "loss": 0.9563, "step": 3868 }, { "epoch": 0.5835595776772248, "grad_norm": 1.8902339935302734, "learning_rate": 3.799469508681504e-05, "loss": 1.13, "step": 3869 }, { "epoch": 0.583710407239819, "grad_norm": 1.7027430534362793, "learning_rate": 3.797134509462261e-05, "loss": 0.9695, "step": 3870 }, { "epoch": 0.5838612368024133, "grad_norm": 1.9443086385726929, "learning_rate": 3.794799788656615e-05, "loss": 1.04, "step": 3871 }, { "epoch": 0.5840120663650076, "grad_norm": 1.549987554550171, "learning_rate": 3.7924653468049554e-05, "loss": 0.7138, "step": 3872 }, { "epoch": 0.5841628959276018, "grad_norm": 1.9716720581054688, "learning_rate": 3.7901311844476106e-05, "loss": 0.9773, "step": 3873 }, { "epoch": 0.5843137254901961, "grad_norm": 1.961449384689331, "learning_rate": 3.7877973021248416e-05, "loss": 0.9367, "step": 3874 }, { "epoch": 0.5844645550527904, "grad_norm": 2.6652557849884033, "learning_rate": 3.785463700376847e-05, "loss": 1.2622, "step": 3875 }, { "epoch": 0.5846153846153846, "grad_norm": 2.1901206970214844, "learning_rate": 3.7831303797437575e-05, "loss": 1.0954, "step": 3876 }, { "epoch": 0.5847662141779789, "grad_norm": 2.161951780319214, "learning_rate": 3.78079734076564e-05, "loss": 1.2945, "step": 3877 }, { "epoch": 0.5849170437405732, "grad_norm": 1.8873648643493652, "learning_rate": 3.7784645839824975e-05, "loss": 0.9502, "step": 3878 }, { "epoch": 0.5850678733031675, "grad_norm": 1.7692062854766846, "learning_rate": 3.7761321099342665e-05, "loss": 0.8258, "step": 3879 }, { "epoch": 0.5852187028657617, "grad_norm": 2.0206427574157715, "learning_rate": 3.773799919160818e-05, "loss": 1.1914, "step": 3880 }, { "epoch": 0.585369532428356, "grad_norm": 1.7653228044509888, "learning_rate": 3.7714680122019574e-05, "loss": 0.9876, "step": 3881 }, { "epoch": 0.5855203619909503, "grad_norm": 2.7259747982025146, "learning_rate": 3.769136389597426e-05, "loss": 1.0824, "step": 3882 }, { "epoch": 0.5856711915535445, "grad_norm": 2.1274468898773193, "learning_rate": 3.766805051886897e-05, "loss": 1.3259, "step": 3883 }, { "epoch": 0.5858220211161388, "grad_norm": 2.0085883140563965, "learning_rate": 3.7644739996099784e-05, "loss": 1.156, "step": 3884 }, { "epoch": 0.5859728506787331, "grad_norm": 2.3968701362609863, "learning_rate": 3.762143233306215e-05, "loss": 1.5695, "step": 3885 }, { "epoch": 0.5861236802413273, "grad_norm": 1.9137704372406006, "learning_rate": 3.7598127535150783e-05, "loss": 1.094, "step": 3886 }, { "epoch": 0.5862745098039216, "grad_norm": 1.908901333808899, "learning_rate": 3.75748256077598e-05, "loss": 1.0012, "step": 3887 }, { "epoch": 0.5864253393665159, "grad_norm": 1.937550663948059, "learning_rate": 3.755152655628264e-05, "loss": 0.9724, "step": 3888 }, { "epoch": 0.5865761689291101, "grad_norm": 2.030938148498535, "learning_rate": 3.752823038611206e-05, "loss": 1.1033, "step": 3889 }, { "epoch": 0.5867269984917044, "grad_norm": 1.8451001644134521, "learning_rate": 3.750493710264016e-05, "loss": 1.1605, "step": 3890 }, { "epoch": 0.5868778280542987, "grad_norm": 2.3761889934539795, "learning_rate": 3.748164671125839e-05, "loss": 1.0641, "step": 3891 }, { "epoch": 0.5870286576168929, "grad_norm": 2.0553181171417236, "learning_rate": 3.745835921735748e-05, "loss": 1.18, "step": 3892 }, { "epoch": 0.5871794871794872, "grad_norm": 2.203770399093628, "learning_rate": 3.743507462632755e-05, "loss": 1.4579, "step": 3893 }, { "epoch": 0.5873303167420815, "grad_norm": 2.355835437774658, "learning_rate": 3.741179294355801e-05, "loss": 1.2132, "step": 3894 }, { "epoch": 0.5874811463046757, "grad_norm": 1.9941486120224, "learning_rate": 3.7388514174437594e-05, "loss": 0.779, "step": 3895 }, { "epoch": 0.58763197586727, "grad_norm": 1.8373384475708008, "learning_rate": 3.736523832435439e-05, "loss": 0.7943, "step": 3896 }, { "epoch": 0.5877828054298643, "grad_norm": 1.59861421585083, "learning_rate": 3.7341965398695786e-05, "loss": 0.7275, "step": 3897 }, { "epoch": 0.5879336349924585, "grad_norm": 1.744720697402954, "learning_rate": 3.73186954028485e-05, "loss": 0.8237, "step": 3898 }, { "epoch": 0.5880844645550528, "grad_norm": 1.779641032218933, "learning_rate": 3.729542834219858e-05, "loss": 0.7259, "step": 3899 }, { "epoch": 0.5882352941176471, "grad_norm": 1.9322764873504639, "learning_rate": 3.727216422213139e-05, "loss": 0.8777, "step": 3900 }, { "epoch": 0.5883861236802413, "grad_norm": 2.327582359313965, "learning_rate": 3.72489030480316e-05, "loss": 1.6351, "step": 3901 }, { "epoch": 0.5885369532428356, "grad_norm": 1.9666976928710938, "learning_rate": 3.722564482528323e-05, "loss": 1.2998, "step": 3902 }, { "epoch": 0.5886877828054299, "grad_norm": 2.0830228328704834, "learning_rate": 3.720238955926958e-05, "loss": 1.7034, "step": 3903 }, { "epoch": 0.5888386123680242, "grad_norm": 2.0553481578826904, "learning_rate": 3.717913725537328e-05, "loss": 1.4121, "step": 3904 }, { "epoch": 0.5889894419306184, "grad_norm": 2.048902750015259, "learning_rate": 3.715588791897627e-05, "loss": 1.4758, "step": 3905 }, { "epoch": 0.5891402714932127, "grad_norm": 1.4155818223953247, "learning_rate": 3.713264155545983e-05, "loss": 0.6176, "step": 3906 }, { "epoch": 0.589291101055807, "grad_norm": 1.8274027109146118, "learning_rate": 3.710939817020452e-05, "loss": 1.1516, "step": 3907 }, { "epoch": 0.5894419306184012, "grad_norm": 1.9115885496139526, "learning_rate": 3.7086157768590215e-05, "loss": 0.9656, "step": 3908 }, { "epoch": 0.5895927601809955, "grad_norm": 2.3763747215270996, "learning_rate": 3.7062920355996125e-05, "loss": 1.5621, "step": 3909 }, { "epoch": 0.5897435897435898, "grad_norm": 2.352717638015747, "learning_rate": 3.7039685937800736e-05, "loss": 1.5633, "step": 3910 }, { "epoch": 0.589894419306184, "grad_norm": 1.8512084484100342, "learning_rate": 3.701645451938186e-05, "loss": 1.0923, "step": 3911 }, { "epoch": 0.5900452488687783, "grad_norm": 1.9730018377304077, "learning_rate": 3.699322610611661e-05, "loss": 1.151, "step": 3912 }, { "epoch": 0.5901960784313726, "grad_norm": 1.9053795337677002, "learning_rate": 3.697000070338141e-05, "loss": 1.1752, "step": 3913 }, { "epoch": 0.5903469079939668, "grad_norm": 1.729095697402954, "learning_rate": 3.694677831655196e-05, "loss": 0.9221, "step": 3914 }, { "epoch": 0.5904977375565611, "grad_norm": 1.965542197227478, "learning_rate": 3.692355895100329e-05, "loss": 1.345, "step": 3915 }, { "epoch": 0.5906485671191554, "grad_norm": 2.0446412563323975, "learning_rate": 3.690034261210972e-05, "loss": 1.3044, "step": 3916 }, { "epoch": 0.5907993966817496, "grad_norm": 1.625805377960205, "learning_rate": 3.6877129305244876e-05, "loss": 0.9032, "step": 3917 }, { "epoch": 0.5909502262443439, "grad_norm": 2.162475109100342, "learning_rate": 3.685391903578168e-05, "loss": 1.8448, "step": 3918 }, { "epoch": 0.5911010558069382, "grad_norm": 1.9024873971939087, "learning_rate": 3.683071180909235e-05, "loss": 1.2221, "step": 3919 }, { "epoch": 0.5912518853695324, "grad_norm": 1.815177083015442, "learning_rate": 3.68075076305484e-05, "loss": 0.9921, "step": 3920 }, { "epoch": 0.5914027149321267, "grad_norm": 1.6885398626327515, "learning_rate": 3.678430650552063e-05, "loss": 0.9204, "step": 3921 }, { "epoch": 0.591553544494721, "grad_norm": 1.7024505138397217, "learning_rate": 3.6761108439379156e-05, "loss": 1.0485, "step": 3922 }, { "epoch": 0.5917043740573152, "grad_norm": 1.757189393043518, "learning_rate": 3.6737913437493346e-05, "loss": 0.7913, "step": 3923 }, { "epoch": 0.5918552036199095, "grad_norm": 1.8933260440826416, "learning_rate": 3.671472150523189e-05, "loss": 1.1704, "step": 3924 }, { "epoch": 0.5920060331825038, "grad_norm": 2.026594400405884, "learning_rate": 3.6691532647962776e-05, "loss": 0.9594, "step": 3925 }, { "epoch": 0.592156862745098, "grad_norm": 1.6546918153762817, "learning_rate": 3.666834687105325e-05, "loss": 0.8958, "step": 3926 }, { "epoch": 0.5923076923076923, "grad_norm": 2.1133580207824707, "learning_rate": 3.664516417986987e-05, "loss": 1.2567, "step": 3927 }, { "epoch": 0.5924585218702866, "grad_norm": 2.146718978881836, "learning_rate": 3.6621984579778466e-05, "loss": 1.4442, "step": 3928 }, { "epoch": 0.5926093514328808, "grad_norm": 2.0095598697662354, "learning_rate": 3.659880807614416e-05, "loss": 1.1812, "step": 3929 }, { "epoch": 0.5927601809954751, "grad_norm": 1.7624636888504028, "learning_rate": 3.657563467433134e-05, "loss": 0.971, "step": 3930 }, { "epoch": 0.5929110105580694, "grad_norm": 1.7456715106964111, "learning_rate": 3.655246437970373e-05, "loss": 0.8428, "step": 3931 }, { "epoch": 0.5930618401206637, "grad_norm": 2.2691783905029297, "learning_rate": 3.6529297197624246e-05, "loss": 1.5178, "step": 3932 }, { "epoch": 0.5932126696832579, "grad_norm": 1.9859094619750977, "learning_rate": 3.6506133133455146e-05, "loss": 1.0268, "step": 3933 }, { "epoch": 0.5933634992458522, "grad_norm": 2.167849063873291, "learning_rate": 3.648297219255795e-05, "loss": 1.1524, "step": 3934 }, { "epoch": 0.5935143288084465, "grad_norm": 2.0024635791778564, "learning_rate": 3.645981438029348e-05, "loss": 1.2918, "step": 3935 }, { "epoch": 0.5936651583710407, "grad_norm": 1.963091254234314, "learning_rate": 3.6436659702021785e-05, "loss": 1.1235, "step": 3936 }, { "epoch": 0.593815987933635, "grad_norm": 1.9152246713638306, "learning_rate": 3.6413508163102226e-05, "loss": 0.9386, "step": 3937 }, { "epoch": 0.5939668174962293, "grad_norm": 2.154026508331299, "learning_rate": 3.639035976889344e-05, "loss": 1.4202, "step": 3938 }, { "epoch": 0.5941176470588235, "grad_norm": 1.7826855182647705, "learning_rate": 3.63672145247533e-05, "loss": 1.0203, "step": 3939 }, { "epoch": 0.5942684766214178, "grad_norm": 1.9185582399368286, "learning_rate": 3.634407243603898e-05, "loss": 1.0757, "step": 3940 }, { "epoch": 0.5944193061840121, "grad_norm": 2.2654175758361816, "learning_rate": 3.6320933508106933e-05, "loss": 1.2896, "step": 3941 }, { "epoch": 0.5945701357466063, "grad_norm": 2.1807973384857178, "learning_rate": 3.629779774631283e-05, "loss": 1.1419, "step": 3942 }, { "epoch": 0.5947209653092006, "grad_norm": 1.912052869796753, "learning_rate": 3.627466515601167e-05, "loss": 0.9177, "step": 3943 }, { "epoch": 0.5948717948717949, "grad_norm": 1.9397125244140625, "learning_rate": 3.625153574255769e-05, "loss": 0.978, "step": 3944 }, { "epoch": 0.5950226244343891, "grad_norm": 1.9888482093811035, "learning_rate": 3.622840951130436e-05, "loss": 0.9686, "step": 3945 }, { "epoch": 0.5951734539969834, "grad_norm": 2.1971898078918457, "learning_rate": 3.620528646760449e-05, "loss": 1.1749, "step": 3946 }, { "epoch": 0.5953242835595777, "grad_norm": 1.6674482822418213, "learning_rate": 3.6182166616810084e-05, "loss": 0.6828, "step": 3947 }, { "epoch": 0.5954751131221719, "grad_norm": 1.9170610904693604, "learning_rate": 3.615904996427243e-05, "loss": 0.9239, "step": 3948 }, { "epoch": 0.5956259426847662, "grad_norm": 1.6503795385360718, "learning_rate": 3.613593651534208e-05, "loss": 0.8252, "step": 3949 }, { "epoch": 0.5957767722473605, "grad_norm": 1.8990890979766846, "learning_rate": 3.6112826275368875e-05, "loss": 0.944, "step": 3950 }, { "epoch": 0.5959276018099547, "grad_norm": 2.082523822784424, "learning_rate": 3.6089719249701816e-05, "loss": 1.6046, "step": 3951 }, { "epoch": 0.596078431372549, "grad_norm": 1.5975029468536377, "learning_rate": 3.606661544368925e-05, "loss": 0.7555, "step": 3952 }, { "epoch": 0.5962292609351433, "grad_norm": 1.834357738494873, "learning_rate": 3.604351486267877e-05, "loss": 1.088, "step": 3953 }, { "epoch": 0.5963800904977375, "grad_norm": 1.7460297346115112, "learning_rate": 3.602041751201718e-05, "loss": 1.0181, "step": 3954 }, { "epoch": 0.5965309200603318, "grad_norm": 2.16597056388855, "learning_rate": 3.599732339705058e-05, "loss": 1.4312, "step": 3955 }, { "epoch": 0.5966817496229261, "grad_norm": 2.0575509071350098, "learning_rate": 3.5974232523124295e-05, "loss": 1.2746, "step": 3956 }, { "epoch": 0.5968325791855204, "grad_norm": 1.928029179573059, "learning_rate": 3.59511448955829e-05, "loss": 1.2272, "step": 3957 }, { "epoch": 0.5969834087481146, "grad_norm": 1.9778990745544434, "learning_rate": 3.592806051977024e-05, "loss": 1.1748, "step": 3958 }, { "epoch": 0.5971342383107089, "grad_norm": 2.020461320877075, "learning_rate": 3.590497940102938e-05, "loss": 1.1551, "step": 3959 }, { "epoch": 0.5972850678733032, "grad_norm": 1.974583625793457, "learning_rate": 3.588190154470268e-05, "loss": 1.2811, "step": 3960 }, { "epoch": 0.5974358974358974, "grad_norm": 1.6737124919891357, "learning_rate": 3.5858826956131645e-05, "loss": 0.7707, "step": 3961 }, { "epoch": 0.5975867269984917, "grad_norm": 2.1272268295288086, "learning_rate": 3.5835755640657134e-05, "loss": 1.3862, "step": 3962 }, { "epoch": 0.597737556561086, "grad_norm": 1.7698798179626465, "learning_rate": 3.581268760361918e-05, "loss": 1.0079, "step": 3963 }, { "epoch": 0.5978883861236802, "grad_norm": 2.150222063064575, "learning_rate": 3.5789622850357085e-05, "loss": 1.4275, "step": 3964 }, { "epoch": 0.5980392156862745, "grad_norm": 2.1057803630828857, "learning_rate": 3.57665613862094e-05, "loss": 1.0622, "step": 3965 }, { "epoch": 0.5981900452488688, "grad_norm": 1.9020404815673828, "learning_rate": 3.5743503216513875e-05, "loss": 0.9541, "step": 3966 }, { "epoch": 0.598340874811463, "grad_norm": 1.8399683237075806, "learning_rate": 3.5720448346607545e-05, "loss": 0.8929, "step": 3967 }, { "epoch": 0.5984917043740573, "grad_norm": 2.069932460784912, "learning_rate": 3.569739678182664e-05, "loss": 1.2037, "step": 3968 }, { "epoch": 0.5986425339366516, "grad_norm": 2.19966459274292, "learning_rate": 3.567434852750667e-05, "loss": 1.2723, "step": 3969 }, { "epoch": 0.5987933634992458, "grad_norm": 1.920042872428894, "learning_rate": 3.565130358898233e-05, "loss": 0.9204, "step": 3970 }, { "epoch": 0.5989441930618401, "grad_norm": 1.844545841217041, "learning_rate": 3.562826197158757e-05, "loss": 0.864, "step": 3971 }, { "epoch": 0.5990950226244344, "grad_norm": 2.317274570465088, "learning_rate": 3.560522368065558e-05, "loss": 1.1287, "step": 3972 }, { "epoch": 0.5992458521870286, "grad_norm": 1.7556495666503906, "learning_rate": 3.558218872151876e-05, "loss": 0.8879, "step": 3973 }, { "epoch": 0.5993966817496229, "grad_norm": 2.0100631713867188, "learning_rate": 3.555915709950875e-05, "loss": 1.1234, "step": 3974 }, { "epoch": 0.5995475113122172, "grad_norm": 1.7695196866989136, "learning_rate": 3.553612881995644e-05, "loss": 1.0686, "step": 3975 }, { "epoch": 0.5996983408748114, "grad_norm": 2.062626361846924, "learning_rate": 3.55131038881919e-05, "loss": 1.3119, "step": 3976 }, { "epoch": 0.5998491704374057, "grad_norm": 1.7377500534057617, "learning_rate": 3.549008230954446e-05, "loss": 0.9485, "step": 3977 }, { "epoch": 0.6, "grad_norm": 2.0161612033843994, "learning_rate": 3.546706408934269e-05, "loss": 1.0955, "step": 3978 }, { "epoch": 0.6001508295625942, "grad_norm": 1.915877103805542, "learning_rate": 3.54440492329143e-05, "loss": 1.0522, "step": 3979 }, { "epoch": 0.6003016591251885, "grad_norm": 1.7869607210159302, "learning_rate": 3.5421037745586305e-05, "loss": 0.9661, "step": 3980 }, { "epoch": 0.6004524886877828, "grad_norm": 1.9352627992630005, "learning_rate": 3.5398029632684905e-05, "loss": 1.1415, "step": 3981 }, { "epoch": 0.600603318250377, "grad_norm": 2.179194450378418, "learning_rate": 3.537502489953554e-05, "loss": 1.2939, "step": 3982 }, { "epoch": 0.6007541478129713, "grad_norm": 2.274010181427002, "learning_rate": 3.5352023551462846e-05, "loss": 1.0762, "step": 3983 }, { "epoch": 0.6009049773755656, "grad_norm": 2.3407459259033203, "learning_rate": 3.532902559379068e-05, "loss": 1.3995, "step": 3984 }, { "epoch": 0.6010558069381599, "grad_norm": 2.435645580291748, "learning_rate": 3.530603103184211e-05, "loss": 1.0833, "step": 3985 }, { "epoch": 0.6012066365007541, "grad_norm": 2.0708301067352295, "learning_rate": 3.528303987093946e-05, "loss": 1.1968, "step": 3986 }, { "epoch": 0.6013574660633484, "grad_norm": 1.950560450553894, "learning_rate": 3.5260052116404194e-05, "loss": 0.97, "step": 3987 }, { "epoch": 0.6015082956259427, "grad_norm": 1.9760770797729492, "learning_rate": 3.523706777355706e-05, "loss": 1.0631, "step": 3988 }, { "epoch": 0.6016591251885369, "grad_norm": 2.151787281036377, "learning_rate": 3.5214086847717954e-05, "loss": 1.3102, "step": 3989 }, { "epoch": 0.6018099547511312, "grad_norm": 2.1190555095672607, "learning_rate": 3.519110934420602e-05, "loss": 1.1669, "step": 3990 }, { "epoch": 0.6019607843137255, "grad_norm": 2.161095380783081, "learning_rate": 3.51681352683396e-05, "loss": 1.2916, "step": 3991 }, { "epoch": 0.6021116138763197, "grad_norm": 2.0549397468566895, "learning_rate": 3.514516462543624e-05, "loss": 0.9517, "step": 3992 }, { "epoch": 0.602262443438914, "grad_norm": 2.028581380844116, "learning_rate": 3.512219742081269e-05, "loss": 0.9794, "step": 3993 }, { "epoch": 0.6024132730015083, "grad_norm": 2.120790481567383, "learning_rate": 3.509923365978492e-05, "loss": 1.1084, "step": 3994 }, { "epoch": 0.6025641025641025, "grad_norm": 2.033998489379883, "learning_rate": 3.5076273347668076e-05, "loss": 0.9398, "step": 3995 }, { "epoch": 0.6027149321266968, "grad_norm": 2.4048311710357666, "learning_rate": 3.5053316489776536e-05, "loss": 1.1708, "step": 3996 }, { "epoch": 0.6028657616892911, "grad_norm": 1.882212519645691, "learning_rate": 3.503036309142386e-05, "loss": 0.9259, "step": 3997 }, { "epoch": 0.6030165912518853, "grad_norm": 1.7932173013687134, "learning_rate": 3.500741315792278e-05, "loss": 0.9327, "step": 3998 }, { "epoch": 0.6031674208144796, "grad_norm": 2.1224122047424316, "learning_rate": 3.49844666945853e-05, "loss": 1.0629, "step": 3999 }, { "epoch": 0.6033182503770739, "grad_norm": 1.5714775323867798, "learning_rate": 3.496152370672255e-05, "loss": 0.7594, "step": 4000 }, { "epoch": 0.6034690799396681, "grad_norm": 2.0087456703186035, "learning_rate": 3.4938584199644894e-05, "loss": 1.105, "step": 4001 }, { "epoch": 0.6036199095022624, "grad_norm": 1.9552452564239502, "learning_rate": 3.4915648178661874e-05, "loss": 1.0441, "step": 4002 }, { "epoch": 0.6037707390648567, "grad_norm": 2.107801914215088, "learning_rate": 3.489271564908223e-05, "loss": 1.0966, "step": 4003 }, { "epoch": 0.6039215686274509, "grad_norm": 1.9709044694900513, "learning_rate": 3.486978661621389e-05, "loss": 1.2046, "step": 4004 }, { "epoch": 0.6040723981900452, "grad_norm": 1.94219970703125, "learning_rate": 3.4846861085363994e-05, "loss": 1.1369, "step": 4005 }, { "epoch": 0.6042232277526395, "grad_norm": 1.8935497999191284, "learning_rate": 3.482393906183885e-05, "loss": 1.0314, "step": 4006 }, { "epoch": 0.6043740573152337, "grad_norm": 1.763367772102356, "learning_rate": 3.4801020550943936e-05, "loss": 0.9101, "step": 4007 }, { "epoch": 0.604524886877828, "grad_norm": 1.8040186166763306, "learning_rate": 3.477810555798395e-05, "loss": 0.9701, "step": 4008 }, { "epoch": 0.6046757164404223, "grad_norm": 1.8566020727157593, "learning_rate": 3.4755194088262763e-05, "loss": 1.1734, "step": 4009 }, { "epoch": 0.6048265460030166, "grad_norm": 1.9791457653045654, "learning_rate": 3.473228614708344e-05, "loss": 1.1616, "step": 4010 }, { "epoch": 0.6049773755656108, "grad_norm": 1.892521858215332, "learning_rate": 3.470938173974821e-05, "loss": 0.9665, "step": 4011 }, { "epoch": 0.6051282051282051, "grad_norm": 1.9835515022277832, "learning_rate": 3.4686480871558514e-05, "loss": 1.0483, "step": 4012 }, { "epoch": 0.6052790346907994, "grad_norm": 2.05202317237854, "learning_rate": 3.466358354781495e-05, "loss": 1.2938, "step": 4013 }, { "epoch": 0.6054298642533936, "grad_norm": 1.9429121017456055, "learning_rate": 3.464068977381728e-05, "loss": 1.1055, "step": 4014 }, { "epoch": 0.6055806938159879, "grad_norm": 2.2131335735321045, "learning_rate": 3.4617799554864494e-05, "loss": 0.9769, "step": 4015 }, { "epoch": 0.6057315233785822, "grad_norm": 2.1614556312561035, "learning_rate": 3.4594912896254726e-05, "loss": 1.3808, "step": 4016 }, { "epoch": 0.6058823529411764, "grad_norm": 2.102891683578491, "learning_rate": 3.4572029803285274e-05, "loss": 1.2046, "step": 4017 }, { "epoch": 0.6060331825037707, "grad_norm": 1.8325203657150269, "learning_rate": 3.4549150281252636e-05, "loss": 1.0586, "step": 4018 }, { "epoch": 0.606184012066365, "grad_norm": 2.270690679550171, "learning_rate": 3.452627433545247e-05, "loss": 1.3598, "step": 4019 }, { "epoch": 0.6063348416289592, "grad_norm": 2.051109790802002, "learning_rate": 3.450340197117962e-05, "loss": 1.1273, "step": 4020 }, { "epoch": 0.6064856711915535, "grad_norm": 2.0354955196380615, "learning_rate": 3.448053319372809e-05, "loss": 1.2588, "step": 4021 }, { "epoch": 0.6066365007541478, "grad_norm": 1.9789419174194336, "learning_rate": 3.4457668008391054e-05, "loss": 0.9884, "step": 4022 }, { "epoch": 0.606787330316742, "grad_norm": 1.6501291990280151, "learning_rate": 3.443480642046086e-05, "loss": 0.8446, "step": 4023 }, { "epoch": 0.6069381598793363, "grad_norm": 2.0869297981262207, "learning_rate": 3.4411948435229014e-05, "loss": 1.2073, "step": 4024 }, { "epoch": 0.6070889894419306, "grad_norm": 1.8116329908370972, "learning_rate": 3.438909405798622e-05, "loss": 0.843, "step": 4025 }, { "epoch": 0.6072398190045248, "grad_norm": 2.0527615547180176, "learning_rate": 3.436624329402227e-05, "loss": 1.0803, "step": 4026 }, { "epoch": 0.6073906485671191, "grad_norm": 1.8030986785888672, "learning_rate": 3.43433961486262e-05, "loss": 0.8768, "step": 4027 }, { "epoch": 0.6075414781297134, "grad_norm": 2.122394323348999, "learning_rate": 3.432055262708617e-05, "loss": 1.3514, "step": 4028 }, { "epoch": 0.6076923076923076, "grad_norm": 2.0777695178985596, "learning_rate": 3.4297712734689526e-05, "loss": 1.2578, "step": 4029 }, { "epoch": 0.6078431372549019, "grad_norm": 1.7542365789413452, "learning_rate": 3.427487647672275e-05, "loss": 0.8013, "step": 4030 }, { "epoch": 0.6079939668174962, "grad_norm": 1.7899192571640015, "learning_rate": 3.4252043858471484e-05, "loss": 0.8972, "step": 4031 }, { "epoch": 0.6081447963800904, "grad_norm": 2.3391523361206055, "learning_rate": 3.422921488522054e-05, "loss": 1.0657, "step": 4032 }, { "epoch": 0.6082956259426847, "grad_norm": 2.519115924835205, "learning_rate": 3.4206389562253885e-05, "loss": 1.6697, "step": 4033 }, { "epoch": 0.608446455505279, "grad_norm": 1.6659750938415527, "learning_rate": 3.418356789485465e-05, "loss": 0.7753, "step": 4034 }, { "epoch": 0.6085972850678733, "grad_norm": 1.9265978336334229, "learning_rate": 3.4160749888305075e-05, "loss": 1.0772, "step": 4035 }, { "epoch": 0.6087481146304675, "grad_norm": 2.3834903240203857, "learning_rate": 3.413793554788659e-05, "loss": 1.2211, "step": 4036 }, { "epoch": 0.6088989441930618, "grad_norm": 1.868340253829956, "learning_rate": 3.411512487887979e-05, "loss": 1.0739, "step": 4037 }, { "epoch": 0.609049773755656, "grad_norm": 2.0673558712005615, "learning_rate": 3.409231788656439e-05, "loss": 1.249, "step": 4038 }, { "epoch": 0.6092006033182503, "grad_norm": 2.104642391204834, "learning_rate": 3.406951457621926e-05, "loss": 1.1246, "step": 4039 }, { "epoch": 0.6093514328808446, "grad_norm": 2.5059192180633545, "learning_rate": 3.404671495312244e-05, "loss": 1.5383, "step": 4040 }, { "epoch": 0.609502262443439, "grad_norm": 1.8835904598236084, "learning_rate": 3.4023919022551076e-05, "loss": 0.9153, "step": 4041 }, { "epoch": 0.6096530920060332, "grad_norm": 2.376406192779541, "learning_rate": 3.400112678978151e-05, "loss": 1.1883, "step": 4042 }, { "epoch": 0.6098039215686275, "grad_norm": 1.8355250358581543, "learning_rate": 3.3978338260089175e-05, "loss": 1.0954, "step": 4043 }, { "epoch": 0.6099547511312218, "grad_norm": 2.1949899196624756, "learning_rate": 3.3955553438748696e-05, "loss": 1.0911, "step": 4044 }, { "epoch": 0.610105580693816, "grad_norm": 1.6959978342056274, "learning_rate": 3.39327723310338e-05, "loss": 0.8083, "step": 4045 }, { "epoch": 0.6102564102564103, "grad_norm": 1.8835595846176147, "learning_rate": 3.390999494221738e-05, "loss": 0.9201, "step": 4046 }, { "epoch": 0.6104072398190046, "grad_norm": 1.9305739402770996, "learning_rate": 3.3887221277571444e-05, "loss": 1.0029, "step": 4047 }, { "epoch": 0.6105580693815988, "grad_norm": 2.291267156600952, "learning_rate": 3.386445134236717e-05, "loss": 1.1479, "step": 4048 }, { "epoch": 0.6107088989441931, "grad_norm": 1.6723989248275757, "learning_rate": 3.384168514187486e-05, "loss": 0.7414, "step": 4049 }, { "epoch": 0.6108597285067874, "grad_norm": 1.8244835138320923, "learning_rate": 3.3818922681363924e-05, "loss": 0.7554, "step": 4050 }, { "epoch": 0.6110105580693816, "grad_norm": 1.7369440793991089, "learning_rate": 3.379616396610294e-05, "loss": 1.2025, "step": 4051 }, { "epoch": 0.6111613876319759, "grad_norm": 2.080164909362793, "learning_rate": 3.377340900135963e-05, "loss": 1.2969, "step": 4052 }, { "epoch": 0.6113122171945702, "grad_norm": 2.0392329692840576, "learning_rate": 3.375065779240082e-05, "loss": 1.2934, "step": 4053 }, { "epoch": 0.6114630467571645, "grad_norm": 1.8000450134277344, "learning_rate": 3.372791034449244e-05, "loss": 0.9455, "step": 4054 }, { "epoch": 0.6116138763197587, "grad_norm": 2.0292422771453857, "learning_rate": 3.370516666289961e-05, "loss": 1.4038, "step": 4055 }, { "epoch": 0.611764705882353, "grad_norm": 1.7157974243164062, "learning_rate": 3.3682426752886556e-05, "loss": 0.8513, "step": 4056 }, { "epoch": 0.6119155354449473, "grad_norm": 1.859204888343811, "learning_rate": 3.3659690619716604e-05, "loss": 0.9819, "step": 4057 }, { "epoch": 0.6120663650075415, "grad_norm": 2.1529510021209717, "learning_rate": 3.363695826865225e-05, "loss": 1.5978, "step": 4058 }, { "epoch": 0.6122171945701358, "grad_norm": 2.0190141201019287, "learning_rate": 3.361422970495508e-05, "loss": 1.1742, "step": 4059 }, { "epoch": 0.6123680241327301, "grad_norm": 1.9270621538162231, "learning_rate": 3.359150493388583e-05, "loss": 1.1763, "step": 4060 }, { "epoch": 0.6125188536953243, "grad_norm": 2.02591609954834, "learning_rate": 3.356878396070433e-05, "loss": 1.2887, "step": 4061 }, { "epoch": 0.6126696832579186, "grad_norm": 2.066586494445801, "learning_rate": 3.354606679066956e-05, "loss": 1.0733, "step": 4062 }, { "epoch": 0.6128205128205129, "grad_norm": 2.223280191421509, "learning_rate": 3.3523353429039586e-05, "loss": 1.5112, "step": 4063 }, { "epoch": 0.6129713423831071, "grad_norm": 1.6628400087356567, "learning_rate": 3.3500643881071624e-05, "loss": 0.9547, "step": 4064 }, { "epoch": 0.6131221719457014, "grad_norm": 2.039923667907715, "learning_rate": 3.3477938152021994e-05, "loss": 1.0715, "step": 4065 }, { "epoch": 0.6132730015082957, "grad_norm": 1.8121058940887451, "learning_rate": 3.3455236247146124e-05, "loss": 1.0471, "step": 4066 }, { "epoch": 0.6134238310708899, "grad_norm": 1.8359087705612183, "learning_rate": 3.343253817169857e-05, "loss": 0.8566, "step": 4067 }, { "epoch": 0.6135746606334842, "grad_norm": 2.357978343963623, "learning_rate": 3.340984393093301e-05, "loss": 1.5686, "step": 4068 }, { "epoch": 0.6137254901960785, "grad_norm": 2.0989975929260254, "learning_rate": 3.338715353010221e-05, "loss": 1.1096, "step": 4069 }, { "epoch": 0.6138763197586727, "grad_norm": 1.7977399826049805, "learning_rate": 3.336446697445806e-05, "loss": 0.903, "step": 4070 }, { "epoch": 0.614027149321267, "grad_norm": 2.1370317935943604, "learning_rate": 3.3341784269251556e-05, "loss": 1.3357, "step": 4071 }, { "epoch": 0.6141779788838613, "grad_norm": 2.0843966007232666, "learning_rate": 3.3319105419732836e-05, "loss": 1.1737, "step": 4072 }, { "epoch": 0.6143288084464555, "grad_norm": 1.7150628566741943, "learning_rate": 3.329643043115107e-05, "loss": 1.1111, "step": 4073 }, { "epoch": 0.6144796380090498, "grad_norm": 2.0279176235198975, "learning_rate": 3.3273759308754596e-05, "loss": 1.1444, "step": 4074 }, { "epoch": 0.6146304675716441, "grad_norm": 2.0138814449310303, "learning_rate": 3.3251092057790844e-05, "loss": 1.2242, "step": 4075 }, { "epoch": 0.6147812971342383, "grad_norm": 1.8077012300491333, "learning_rate": 3.3228428683506344e-05, "loss": 1.0648, "step": 4076 }, { "epoch": 0.6149321266968326, "grad_norm": 2.0460381507873535, "learning_rate": 3.320576919114673e-05, "loss": 1.292, "step": 4077 }, { "epoch": 0.6150829562594269, "grad_norm": 2.479356527328491, "learning_rate": 3.318311358595674e-05, "loss": 1.3355, "step": 4078 }, { "epoch": 0.6152337858220212, "grad_norm": 2.177966594696045, "learning_rate": 3.3160461873180206e-05, "loss": 1.3173, "step": 4079 }, { "epoch": 0.6153846153846154, "grad_norm": 1.9509291648864746, "learning_rate": 3.313781405806006e-05, "loss": 1.2293, "step": 4080 }, { "epoch": 0.6155354449472097, "grad_norm": 2.0308034420013428, "learning_rate": 3.311517014583837e-05, "loss": 1.1521, "step": 4081 }, { "epoch": 0.615686274509804, "grad_norm": 2.148648977279663, "learning_rate": 3.309253014175619e-05, "loss": 1.2405, "step": 4082 }, { "epoch": 0.6158371040723982, "grad_norm": 2.0457499027252197, "learning_rate": 3.306989405105381e-05, "loss": 1.1895, "step": 4083 }, { "epoch": 0.6159879336349925, "grad_norm": 3.911609172821045, "learning_rate": 3.3047261878970505e-05, "loss": 1.311, "step": 4084 }, { "epoch": 0.6161387631975868, "grad_norm": 1.9162880182266235, "learning_rate": 3.3024633630744716e-05, "loss": 0.9827, "step": 4085 }, { "epoch": 0.616289592760181, "grad_norm": 1.9421119689941406, "learning_rate": 3.300200931161393e-05, "loss": 1.1191, "step": 4086 }, { "epoch": 0.6164404223227753, "grad_norm": 2.059380292892456, "learning_rate": 3.2979388926814755e-05, "loss": 1.1302, "step": 4087 }, { "epoch": 0.6165912518853696, "grad_norm": 2.1559836864471436, "learning_rate": 3.2956772481582865e-05, "loss": 1.0762, "step": 4088 }, { "epoch": 0.6167420814479638, "grad_norm": 1.7537373304367065, "learning_rate": 3.2934159981153034e-05, "loss": 0.8445, "step": 4089 }, { "epoch": 0.6168929110105581, "grad_norm": 2.1979899406433105, "learning_rate": 3.291155143075912e-05, "loss": 1.2016, "step": 4090 }, { "epoch": 0.6170437405731524, "grad_norm": 1.752422571182251, "learning_rate": 3.2888946835634075e-05, "loss": 1.0702, "step": 4091 }, { "epoch": 0.6171945701357466, "grad_norm": 2.0493950843811035, "learning_rate": 3.2866346201009904e-05, "loss": 1.0161, "step": 4092 }, { "epoch": 0.6173453996983409, "grad_norm": 1.904589295387268, "learning_rate": 3.284374953211774e-05, "loss": 0.9005, "step": 4093 }, { "epoch": 0.6174962292609352, "grad_norm": 1.9221986532211304, "learning_rate": 3.282115683418777e-05, "loss": 1.0796, "step": 4094 }, { "epoch": 0.6176470588235294, "grad_norm": 1.9167890548706055, "learning_rate": 3.279856811244927e-05, "loss": 1.1095, "step": 4095 }, { "epoch": 0.6177978883861237, "grad_norm": 1.6140084266662598, "learning_rate": 3.277598337213059e-05, "loss": 0.6964, "step": 4096 }, { "epoch": 0.617948717948718, "grad_norm": 1.9732178449630737, "learning_rate": 3.275340261845917e-05, "loss": 1.0681, "step": 4097 }, { "epoch": 0.6180995475113122, "grad_norm": 1.8216191530227661, "learning_rate": 3.273082585666152e-05, "loss": 0.9929, "step": 4098 }, { "epoch": 0.6182503770739065, "grad_norm": 1.6939674615859985, "learning_rate": 3.2708253091963226e-05, "loss": 0.7926, "step": 4099 }, { "epoch": 0.6184012066365008, "grad_norm": 1.915073037147522, "learning_rate": 3.2685684329588964e-05, "loss": 0.89, "step": 4100 }, { "epoch": 0.618552036199095, "grad_norm": 1.8101600408554077, "learning_rate": 3.266311957476243e-05, "loss": 1.0509, "step": 4101 }, { "epoch": 0.6187028657616893, "grad_norm": 1.7240500450134277, "learning_rate": 3.2640558832706455e-05, "loss": 0.847, "step": 4102 }, { "epoch": 0.6188536953242836, "grad_norm": 1.8285021781921387, "learning_rate": 3.261800210864291e-05, "loss": 1.234, "step": 4103 }, { "epoch": 0.6190045248868778, "grad_norm": 2.098858594894409, "learning_rate": 3.2595449407792754e-05, "loss": 1.2779, "step": 4104 }, { "epoch": 0.6191553544494721, "grad_norm": 1.9831771850585938, "learning_rate": 3.2572900735376e-05, "loss": 1.1294, "step": 4105 }, { "epoch": 0.6193061840120664, "grad_norm": 1.9215821027755737, "learning_rate": 3.2550356096611737e-05, "loss": 1.1548, "step": 4106 }, { "epoch": 0.6194570135746607, "grad_norm": 2.003269672393799, "learning_rate": 3.2527815496718096e-05, "loss": 1.1414, "step": 4107 }, { "epoch": 0.6196078431372549, "grad_norm": 2.036864757537842, "learning_rate": 3.250527894091232e-05, "loss": 1.5063, "step": 4108 }, { "epoch": 0.6197586726998492, "grad_norm": 1.99592125415802, "learning_rate": 3.2482746434410685e-05, "loss": 1.3078, "step": 4109 }, { "epoch": 0.6199095022624435, "grad_norm": 2.052480697631836, "learning_rate": 3.246021798242851e-05, "loss": 1.3084, "step": 4110 }, { "epoch": 0.6200603318250377, "grad_norm": 1.9609205722808838, "learning_rate": 3.243769359018023e-05, "loss": 1.2153, "step": 4111 }, { "epoch": 0.620211161387632, "grad_norm": 1.970360279083252, "learning_rate": 3.2415173262879284e-05, "loss": 1.2065, "step": 4112 }, { "epoch": 0.6203619909502263, "grad_norm": 1.8717528581619263, "learning_rate": 3.2392657005738216e-05, "loss": 1.1694, "step": 4113 }, { "epoch": 0.6205128205128205, "grad_norm": 1.84800124168396, "learning_rate": 3.2370144823968597e-05, "loss": 0.7999, "step": 4114 }, { "epoch": 0.6206636500754148, "grad_norm": 1.9470328092575073, "learning_rate": 3.234763672278107e-05, "loss": 1.1901, "step": 4115 }, { "epoch": 0.6208144796380091, "grad_norm": 1.973477840423584, "learning_rate": 3.2325132707385316e-05, "loss": 1.2292, "step": 4116 }, { "epoch": 0.6209653092006033, "grad_norm": 1.799973487854004, "learning_rate": 3.230263278299012e-05, "loss": 0.9647, "step": 4117 }, { "epoch": 0.6211161387631976, "grad_norm": 1.875845193862915, "learning_rate": 3.228013695480324e-05, "loss": 1.0041, "step": 4118 }, { "epoch": 0.6212669683257919, "grad_norm": 1.7588155269622803, "learning_rate": 3.2257645228031573e-05, "loss": 0.9925, "step": 4119 }, { "epoch": 0.6214177978883861, "grad_norm": 2.1523571014404297, "learning_rate": 3.223515760788098e-05, "loss": 0.9937, "step": 4120 }, { "epoch": 0.6215686274509804, "grad_norm": 2.121896982192993, "learning_rate": 3.221267409955643e-05, "loss": 1.4297, "step": 4121 }, { "epoch": 0.6217194570135747, "grad_norm": 1.864038348197937, "learning_rate": 3.219019470826193e-05, "loss": 0.9371, "step": 4122 }, { "epoch": 0.6218702865761689, "grad_norm": 2.241852283477783, "learning_rate": 3.216771943920054e-05, "loss": 1.2224, "step": 4123 }, { "epoch": 0.6220211161387632, "grad_norm": 2.225027084350586, "learning_rate": 3.214524829757433e-05, "loss": 1.2681, "step": 4124 }, { "epoch": 0.6221719457013575, "grad_norm": 1.848245620727539, "learning_rate": 3.2122781288584455e-05, "loss": 1.0708, "step": 4125 }, { "epoch": 0.6223227752639517, "grad_norm": 2.0567965507507324, "learning_rate": 3.2100318417431095e-05, "loss": 1.268, "step": 4126 }, { "epoch": 0.622473604826546, "grad_norm": 1.8369029760360718, "learning_rate": 3.207785968931348e-05, "loss": 1.0612, "step": 4127 }, { "epoch": 0.6226244343891403, "grad_norm": 1.5539590120315552, "learning_rate": 3.205540510942989e-05, "loss": 0.7877, "step": 4128 }, { "epoch": 0.6227752639517345, "grad_norm": 2.191901445388794, "learning_rate": 3.203295468297759e-05, "loss": 1.4338, "step": 4129 }, { "epoch": 0.6229260935143288, "grad_norm": 2.0323266983032227, "learning_rate": 3.201050841515295e-05, "loss": 1.1813, "step": 4130 }, { "epoch": 0.6230769230769231, "grad_norm": 2.2755632400512695, "learning_rate": 3.198806631115135e-05, "loss": 1.4353, "step": 4131 }, { "epoch": 0.6232277526395174, "grad_norm": 1.7936792373657227, "learning_rate": 3.196562837616722e-05, "loss": 0.9077, "step": 4132 }, { "epoch": 0.6233785822021116, "grad_norm": 2.0458149909973145, "learning_rate": 3.194319461539399e-05, "loss": 1.0928, "step": 4133 }, { "epoch": 0.6235294117647059, "grad_norm": 1.8093148469924927, "learning_rate": 3.192076503402417e-05, "loss": 0.8875, "step": 4134 }, { "epoch": 0.6236802413273002, "grad_norm": 1.9889484643936157, "learning_rate": 3.189833963724926e-05, "loss": 1.0467, "step": 4135 }, { "epoch": 0.6238310708898944, "grad_norm": 2.490994453430176, "learning_rate": 3.187591843025982e-05, "loss": 0.8717, "step": 4136 }, { "epoch": 0.6239819004524887, "grad_norm": 1.9786401987075806, "learning_rate": 3.1853501418245454e-05, "loss": 1.1926, "step": 4137 }, { "epoch": 0.624132730015083, "grad_norm": 2.0421969890594482, "learning_rate": 3.1831088606394744e-05, "loss": 0.9857, "step": 4138 }, { "epoch": 0.6242835595776772, "grad_norm": 2.0536365509033203, "learning_rate": 3.180867999989533e-05, "loss": 1.1461, "step": 4139 }, { "epoch": 0.6244343891402715, "grad_norm": 1.8732315301895142, "learning_rate": 3.178627560393389e-05, "loss": 1.1479, "step": 4140 }, { "epoch": 0.6245852187028658, "grad_norm": 2.221444845199585, "learning_rate": 3.1763875423696106e-05, "loss": 1.2888, "step": 4141 }, { "epoch": 0.62473604826546, "grad_norm": 2.1363635063171387, "learning_rate": 3.1741479464366694e-05, "loss": 1.2716, "step": 4142 }, { "epoch": 0.6248868778280543, "grad_norm": 2.0983526706695557, "learning_rate": 3.17190877311294e-05, "loss": 1.219, "step": 4143 }, { "epoch": 0.6250377073906486, "grad_norm": 2.2995691299438477, "learning_rate": 3.169670022916698e-05, "loss": 0.9566, "step": 4144 }, { "epoch": 0.6251885369532428, "grad_norm": 2.121694803237915, "learning_rate": 3.1674316963661205e-05, "loss": 1.1261, "step": 4145 }, { "epoch": 0.6253393665158371, "grad_norm": 1.875169277191162, "learning_rate": 3.165193793979288e-05, "loss": 0.8812, "step": 4146 }, { "epoch": 0.6254901960784314, "grad_norm": 1.8302184343338013, "learning_rate": 3.162956316274186e-05, "loss": 0.8087, "step": 4147 }, { "epoch": 0.6256410256410256, "grad_norm": 1.80866277217865, "learning_rate": 3.160719263768692e-05, "loss": 0.874, "step": 4148 }, { "epoch": 0.6257918552036199, "grad_norm": 1.997422218322754, "learning_rate": 3.158482636980594e-05, "loss": 1.0356, "step": 4149 }, { "epoch": 0.6259426847662142, "grad_norm": 1.640008807182312, "learning_rate": 3.156246436427578e-05, "loss": 0.7505, "step": 4150 }, { "epoch": 0.6260935143288084, "grad_norm": 1.8258100748062134, "learning_rate": 3.154010662627231e-05, "loss": 1.0395, "step": 4151 }, { "epoch": 0.6262443438914027, "grad_norm": 1.8314194679260254, "learning_rate": 3.1517753160970456e-05, "loss": 1.114, "step": 4152 }, { "epoch": 0.626395173453997, "grad_norm": 2.2881128787994385, "learning_rate": 3.149540397354408e-05, "loss": 1.5199, "step": 4153 }, { "epoch": 0.6265460030165912, "grad_norm": 2.1978940963745117, "learning_rate": 3.1473059069166125e-05, "loss": 1.6365, "step": 4154 }, { "epoch": 0.6266968325791855, "grad_norm": 1.8292009830474854, "learning_rate": 3.145071845300849e-05, "loss": 1.0268, "step": 4155 }, { "epoch": 0.6268476621417798, "grad_norm": 1.9458435773849487, "learning_rate": 3.142838213024212e-05, "loss": 1.2232, "step": 4156 }, { "epoch": 0.626998491704374, "grad_norm": 1.6323527097702026, "learning_rate": 3.140605010603693e-05, "loss": 0.8962, "step": 4157 }, { "epoch": 0.6271493212669683, "grad_norm": 2.033097267150879, "learning_rate": 3.1383722385561885e-05, "loss": 1.3677, "step": 4158 }, { "epoch": 0.6273001508295626, "grad_norm": 2.0813498497009277, "learning_rate": 3.1361398973984906e-05, "loss": 1.4273, "step": 4159 }, { "epoch": 0.6274509803921569, "grad_norm": 1.3763247728347778, "learning_rate": 3.1339079876472955e-05, "loss": 0.6154, "step": 4160 }, { "epoch": 0.6276018099547511, "grad_norm": 1.6900060176849365, "learning_rate": 3.1316765098191965e-05, "loss": 0.9449, "step": 4161 }, { "epoch": 0.6277526395173454, "grad_norm": 1.8782049417495728, "learning_rate": 3.129445464430689e-05, "loss": 0.9906, "step": 4162 }, { "epoch": 0.6279034690799397, "grad_norm": 1.6884255409240723, "learning_rate": 3.1272148519981695e-05, "loss": 0.835, "step": 4163 }, { "epoch": 0.6280542986425339, "grad_norm": 1.7860504388809204, "learning_rate": 3.12498467303793e-05, "loss": 1.2175, "step": 4164 }, { "epoch": 0.6282051282051282, "grad_norm": 1.6587984561920166, "learning_rate": 3.122754928066167e-05, "loss": 0.9586, "step": 4165 }, { "epoch": 0.6283559577677225, "grad_norm": 2.0883383750915527, "learning_rate": 3.1205256175989706e-05, "loss": 1.3089, "step": 4166 }, { "epoch": 0.6285067873303167, "grad_norm": 1.8549760580062866, "learning_rate": 3.118296742152337e-05, "loss": 0.9936, "step": 4167 }, { "epoch": 0.628657616892911, "grad_norm": 2.249657392501831, "learning_rate": 3.1160683022421573e-05, "loss": 1.4041, "step": 4168 }, { "epoch": 0.6288084464555053, "grad_norm": 1.8997793197631836, "learning_rate": 3.113840298384224e-05, "loss": 1.1933, "step": 4169 }, { "epoch": 0.6289592760180995, "grad_norm": 2.2107577323913574, "learning_rate": 3.111612731094227e-05, "loss": 1.1276, "step": 4170 }, { "epoch": 0.6291101055806938, "grad_norm": 1.5076310634613037, "learning_rate": 3.1093856008877566e-05, "loss": 0.7975, "step": 4171 }, { "epoch": 0.6292609351432881, "grad_norm": 2.212472915649414, "learning_rate": 3.107158908280301e-05, "loss": 1.4912, "step": 4172 }, { "epoch": 0.6294117647058823, "grad_norm": 1.6318174600601196, "learning_rate": 3.104932653787247e-05, "loss": 0.8257, "step": 4173 }, { "epoch": 0.6295625942684766, "grad_norm": 1.9729692935943604, "learning_rate": 3.102706837923882e-05, "loss": 1.1271, "step": 4174 }, { "epoch": 0.6297134238310709, "grad_norm": 2.0626285076141357, "learning_rate": 3.100481461205391e-05, "loss": 1.3105, "step": 4175 }, { "epoch": 0.6298642533936651, "grad_norm": 1.8247520923614502, "learning_rate": 3.098256524146852e-05, "loss": 0.9712, "step": 4176 }, { "epoch": 0.6300150829562594, "grad_norm": 2.115445852279663, "learning_rate": 3.096032027263249e-05, "loss": 1.2779, "step": 4177 }, { "epoch": 0.6301659125188537, "grad_norm": 1.9781287908554077, "learning_rate": 3.093807971069461e-05, "loss": 1.1156, "step": 4178 }, { "epoch": 0.630316742081448, "grad_norm": 2.082458734512329, "learning_rate": 3.091584356080265e-05, "loss": 1.0946, "step": 4179 }, { "epoch": 0.6304675716440422, "grad_norm": 2.1613845825195312, "learning_rate": 3.0893611828103355e-05, "loss": 1.3963, "step": 4180 }, { "epoch": 0.6306184012066365, "grad_norm": 2.0180821418762207, "learning_rate": 3.087138451774245e-05, "loss": 1.011, "step": 4181 }, { "epoch": 0.6307692307692307, "grad_norm": 2.2814180850982666, "learning_rate": 3.084916163486464e-05, "loss": 1.1109, "step": 4182 }, { "epoch": 0.630920060331825, "grad_norm": 2.045764923095703, "learning_rate": 3.082694318461361e-05, "loss": 1.1652, "step": 4183 }, { "epoch": 0.6310708898944193, "grad_norm": 2.051539659500122, "learning_rate": 3.080472917213201e-05, "loss": 1.1825, "step": 4184 }, { "epoch": 0.6312217194570136, "grad_norm": 1.909887433052063, "learning_rate": 3.078251960256144e-05, "loss": 1.197, "step": 4185 }, { "epoch": 0.6313725490196078, "grad_norm": 2.3189001083374023, "learning_rate": 3.076031448104253e-05, "loss": 1.3615, "step": 4186 }, { "epoch": 0.6315233785822021, "grad_norm": 1.7613399028778076, "learning_rate": 3.073811381271483e-05, "loss": 0.8605, "step": 4187 }, { "epoch": 0.6316742081447964, "grad_norm": 2.4054336547851562, "learning_rate": 3.071591760271687e-05, "loss": 1.2078, "step": 4188 }, { "epoch": 0.6318250377073906, "grad_norm": 1.951547384262085, "learning_rate": 3.069372585618618e-05, "loss": 0.971, "step": 4189 }, { "epoch": 0.6319758672699849, "grad_norm": 1.8021390438079834, "learning_rate": 3.0671538578259205e-05, "loss": 1.0471, "step": 4190 }, { "epoch": 0.6321266968325792, "grad_norm": 2.1819238662719727, "learning_rate": 3.0649355774071395e-05, "loss": 1.0215, "step": 4191 }, { "epoch": 0.6322775263951734, "grad_norm": 2.5673365592956543, "learning_rate": 3.062717744875715e-05, "loss": 1.1644, "step": 4192 }, { "epoch": 0.6324283559577677, "grad_norm": 2.4157252311706543, "learning_rate": 3.0605003607449845e-05, "loss": 1.4542, "step": 4193 }, { "epoch": 0.632579185520362, "grad_norm": 1.7722249031066895, "learning_rate": 3.0582834255281786e-05, "loss": 0.7866, "step": 4194 }, { "epoch": 0.6327300150829562, "grad_norm": 2.0440282821655273, "learning_rate": 3.056066939738427e-05, "loss": 1.0273, "step": 4195 }, { "epoch": 0.6328808446455505, "grad_norm": 1.7164654731750488, "learning_rate": 3.053850903888753e-05, "loss": 0.7982, "step": 4196 }, { "epoch": 0.6330316742081448, "grad_norm": 1.9142847061157227, "learning_rate": 3.0516353184920797e-05, "loss": 0.9243, "step": 4197 }, { "epoch": 0.633182503770739, "grad_norm": 2.2113189697265625, "learning_rate": 3.0494201840612224e-05, "loss": 1.2819, "step": 4198 }, { "epoch": 0.6333333333333333, "grad_norm": 1.873897671699524, "learning_rate": 3.0472055011088928e-05, "loss": 0.8311, "step": 4199 }, { "epoch": 0.6334841628959276, "grad_norm": 2.124514579772949, "learning_rate": 3.044991270147699e-05, "loss": 1.0634, "step": 4200 }, { "epoch": 0.6336349924585218, "grad_norm": 1.8144724369049072, "learning_rate": 3.042777491690144e-05, "loss": 1.103, "step": 4201 }, { "epoch": 0.6337858220211161, "grad_norm": 2.0371062755584717, "learning_rate": 3.040564166248625e-05, "loss": 1.4887, "step": 4202 }, { "epoch": 0.6339366515837104, "grad_norm": 2.0759525299072266, "learning_rate": 3.0383512943354364e-05, "loss": 1.3508, "step": 4203 }, { "epoch": 0.6340874811463046, "grad_norm": 2.168422222137451, "learning_rate": 3.036138876462765e-05, "loss": 1.6124, "step": 4204 }, { "epoch": 0.6342383107088989, "grad_norm": 2.2327098846435547, "learning_rate": 3.033926913142695e-05, "loss": 1.8047, "step": 4205 }, { "epoch": 0.6343891402714932, "grad_norm": 2.030261993408203, "learning_rate": 3.0317154048872042e-05, "loss": 1.4175, "step": 4206 }, { "epoch": 0.6345399698340874, "grad_norm": 2.154801607131958, "learning_rate": 3.0295043522081657e-05, "loss": 1.3514, "step": 4207 }, { "epoch": 0.6346907993966817, "grad_norm": 1.9126750230789185, "learning_rate": 3.027293755617346e-05, "loss": 1.1257, "step": 4208 }, { "epoch": 0.634841628959276, "grad_norm": 1.8124282360076904, "learning_rate": 3.0250836156264067e-05, "loss": 1.1665, "step": 4209 }, { "epoch": 0.6349924585218703, "grad_norm": 2.281590700149536, "learning_rate": 3.0228739327469047e-05, "loss": 1.5867, "step": 4210 }, { "epoch": 0.6351432880844645, "grad_norm": 1.6189051866531372, "learning_rate": 3.0206647074902895e-05, "loss": 0.856, "step": 4211 }, { "epoch": 0.6352941176470588, "grad_norm": 1.7163923978805542, "learning_rate": 3.018455940367907e-05, "loss": 1.057, "step": 4212 }, { "epoch": 0.635444947209653, "grad_norm": 1.9865096807479858, "learning_rate": 3.0162476318909923e-05, "loss": 1.2267, "step": 4213 }, { "epoch": 0.6355957767722473, "grad_norm": 1.7751446962356567, "learning_rate": 3.014039782570679e-05, "loss": 0.9841, "step": 4214 }, { "epoch": 0.6357466063348416, "grad_norm": 1.5663546323776245, "learning_rate": 3.0118323929179926e-05, "loss": 0.8342, "step": 4215 }, { "epoch": 0.6358974358974359, "grad_norm": 2.187666893005371, "learning_rate": 3.0096254634438526e-05, "loss": 1.5071, "step": 4216 }, { "epoch": 0.6360482654600301, "grad_norm": 2.0091283321380615, "learning_rate": 3.0074189946590724e-05, "loss": 1.3635, "step": 4217 }, { "epoch": 0.6361990950226244, "grad_norm": 1.6676578521728516, "learning_rate": 3.0052129870743577e-05, "loss": 1.0493, "step": 4218 }, { "epoch": 0.6363499245852187, "grad_norm": 1.6475070714950562, "learning_rate": 3.0030074412003085e-05, "loss": 0.9489, "step": 4219 }, { "epoch": 0.6365007541478129, "grad_norm": 1.8594186305999756, "learning_rate": 3.0008023575474172e-05, "loss": 1.0761, "step": 4220 }, { "epoch": 0.6366515837104072, "grad_norm": 2.03241229057312, "learning_rate": 2.9985977366260686e-05, "loss": 1.3091, "step": 4221 }, { "epoch": 0.6368024132730015, "grad_norm": 1.8296679258346558, "learning_rate": 2.9963935789465446e-05, "loss": 0.9492, "step": 4222 }, { "epoch": 0.6369532428355957, "grad_norm": 1.8466787338256836, "learning_rate": 2.994189885019012e-05, "loss": 1.072, "step": 4223 }, { "epoch": 0.63710407239819, "grad_norm": 2.076136350631714, "learning_rate": 2.991986655353536e-05, "loss": 1.2623, "step": 4224 }, { "epoch": 0.6372549019607843, "grad_norm": 1.8045437335968018, "learning_rate": 2.989783890460074e-05, "loss": 0.8858, "step": 4225 }, { "epoch": 0.6374057315233785, "grad_norm": 1.6164228916168213, "learning_rate": 2.987581590848475e-05, "loss": 0.7653, "step": 4226 }, { "epoch": 0.6375565610859728, "grad_norm": 2.1668052673339844, "learning_rate": 2.985379757028479e-05, "loss": 1.4534, "step": 4227 }, { "epoch": 0.6377073906485671, "grad_norm": 2.0459043979644775, "learning_rate": 2.9831783895097217e-05, "loss": 1.0754, "step": 4228 }, { "epoch": 0.6378582202111613, "grad_norm": 1.9405592679977417, "learning_rate": 2.9809774888017266e-05, "loss": 0.983, "step": 4229 }, { "epoch": 0.6380090497737556, "grad_norm": 2.019425868988037, "learning_rate": 2.9787770554139116e-05, "loss": 1.2763, "step": 4230 }, { "epoch": 0.6381598793363499, "grad_norm": 1.5386815071105957, "learning_rate": 2.9765770898555878e-05, "loss": 0.7723, "step": 4231 }, { "epoch": 0.6383107088989441, "grad_norm": 1.9770363569259644, "learning_rate": 2.9743775926359534e-05, "loss": 0.9068, "step": 4232 }, { "epoch": 0.6384615384615384, "grad_norm": 2.0960278511047363, "learning_rate": 2.972178564264102e-05, "loss": 1.0293, "step": 4233 }, { "epoch": 0.6386123680241327, "grad_norm": 2.3621792793273926, "learning_rate": 2.969980005249019e-05, "loss": 1.3062, "step": 4234 }, { "epoch": 0.638763197586727, "grad_norm": 2.3824892044067383, "learning_rate": 2.967781916099578e-05, "loss": 1.501, "step": 4235 }, { "epoch": 0.6389140271493212, "grad_norm": 2.5578866004943848, "learning_rate": 2.9655842973245464e-05, "loss": 1.4461, "step": 4236 }, { "epoch": 0.6390648567119155, "grad_norm": 2.002455472946167, "learning_rate": 2.9633871494325814e-05, "loss": 1.1094, "step": 4237 }, { "epoch": 0.6392156862745098, "grad_norm": 2.1413679122924805, "learning_rate": 2.9611904729322338e-05, "loss": 1.1629, "step": 4238 }, { "epoch": 0.639366515837104, "grad_norm": 2.1001882553100586, "learning_rate": 2.9589942683319416e-05, "loss": 1.4282, "step": 4239 }, { "epoch": 0.6395173453996983, "grad_norm": 2.669832944869995, "learning_rate": 2.956798536140038e-05, "loss": 1.4635, "step": 4240 }, { "epoch": 0.6396681749622926, "grad_norm": 1.941057801246643, "learning_rate": 2.9546032768647402e-05, "loss": 0.9993, "step": 4241 }, { "epoch": 0.6398190045248868, "grad_norm": 2.263823986053467, "learning_rate": 2.9524084910141615e-05, "loss": 1.3422, "step": 4242 }, { "epoch": 0.6399698340874811, "grad_norm": 2.037031888961792, "learning_rate": 2.9502141790963034e-05, "loss": 0.9586, "step": 4243 }, { "epoch": 0.6401206636500754, "grad_norm": 2.0615642070770264, "learning_rate": 2.94802034161906e-05, "loss": 1.2711, "step": 4244 }, { "epoch": 0.6402714932126696, "grad_norm": 2.1307594776153564, "learning_rate": 2.9458269790902128e-05, "loss": 1.1356, "step": 4245 }, { "epoch": 0.6404223227752639, "grad_norm": 1.9904541969299316, "learning_rate": 2.9436340920174345e-05, "loss": 1.027, "step": 4246 }, { "epoch": 0.6405731523378582, "grad_norm": 1.9379582405090332, "learning_rate": 2.9414416809082877e-05, "loss": 0.8326, "step": 4247 }, { "epoch": 0.6407239819004525, "grad_norm": 1.4040699005126953, "learning_rate": 2.939249746270225e-05, "loss": 0.6227, "step": 4248 }, { "epoch": 0.6408748114630468, "grad_norm": 1.646571159362793, "learning_rate": 2.9370582886105897e-05, "loss": 0.6961, "step": 4249 }, { "epoch": 0.6410256410256411, "grad_norm": 1.557146668434143, "learning_rate": 2.934867308436613e-05, "loss": 0.7248, "step": 4250 }, { "epoch": 0.6411764705882353, "grad_norm": 1.7021080255508423, "learning_rate": 2.9326768062554143e-05, "loss": 0.9324, "step": 4251 }, { "epoch": 0.6413273001508296, "grad_norm": 1.995469570159912, "learning_rate": 2.930486782574006e-05, "loss": 1.3485, "step": 4252 }, { "epoch": 0.6414781297134239, "grad_norm": 1.8303143978118896, "learning_rate": 2.928297237899288e-05, "loss": 1.0589, "step": 4253 }, { "epoch": 0.6416289592760182, "grad_norm": 1.8311840295791626, "learning_rate": 2.9261081727380485e-05, "loss": 1.0808, "step": 4254 }, { "epoch": 0.6417797888386124, "grad_norm": 1.9535244703292847, "learning_rate": 2.9239195875969662e-05, "loss": 1.3477, "step": 4255 }, { "epoch": 0.6419306184012067, "grad_norm": 1.5242146253585815, "learning_rate": 2.9217314829826077e-05, "loss": 0.7582, "step": 4256 }, { "epoch": 0.642081447963801, "grad_norm": 1.7812702655792236, "learning_rate": 2.9195438594014284e-05, "loss": 0.8854, "step": 4257 }, { "epoch": 0.6422322775263952, "grad_norm": 2.307931661605835, "learning_rate": 2.9173567173597737e-05, "loss": 1.2961, "step": 4258 }, { "epoch": 0.6423831070889895, "grad_norm": 1.740419626235962, "learning_rate": 2.9151700573638773e-05, "loss": 0.9823, "step": 4259 }, { "epoch": 0.6425339366515838, "grad_norm": 1.6996687650680542, "learning_rate": 2.912983879919857e-05, "loss": 0.8426, "step": 4260 }, { "epoch": 0.642684766214178, "grad_norm": 2.270007610321045, "learning_rate": 2.9107981855337246e-05, "loss": 1.1648, "step": 4261 }, { "epoch": 0.6428355957767723, "grad_norm": 2.1406545639038086, "learning_rate": 2.9086129747113778e-05, "loss": 1.2161, "step": 4262 }, { "epoch": 0.6429864253393666, "grad_norm": 1.8169647455215454, "learning_rate": 2.9064282479586026e-05, "loss": 1.1628, "step": 4263 }, { "epoch": 0.6431372549019608, "grad_norm": 2.1595945358276367, "learning_rate": 2.9042440057810728e-05, "loss": 1.2033, "step": 4264 }, { "epoch": 0.6432880844645551, "grad_norm": 1.827949047088623, "learning_rate": 2.9020602486843496e-05, "loss": 0.9761, "step": 4265 }, { "epoch": 0.6434389140271494, "grad_norm": 1.733146071434021, "learning_rate": 2.8998769771738833e-05, "loss": 1.0196, "step": 4266 }, { "epoch": 0.6435897435897436, "grad_norm": 2.281442642211914, "learning_rate": 2.8976941917550103e-05, "loss": 1.4002, "step": 4267 }, { "epoch": 0.6437405731523379, "grad_norm": 2.371664524078369, "learning_rate": 2.8955118929329577e-05, "loss": 1.8352, "step": 4268 }, { "epoch": 0.6438914027149322, "grad_norm": 2.0034492015838623, "learning_rate": 2.893330081212833e-05, "loss": 1.111, "step": 4269 }, { "epoch": 0.6440422322775264, "grad_norm": 2.0831093788146973, "learning_rate": 2.8911487570996365e-05, "loss": 1.0374, "step": 4270 }, { "epoch": 0.6441930618401207, "grad_norm": 2.2700893878936768, "learning_rate": 2.8889679210982563e-05, "loss": 1.1885, "step": 4271 }, { "epoch": 0.644343891402715, "grad_norm": 2.106363296508789, "learning_rate": 2.886787573713464e-05, "loss": 1.1697, "step": 4272 }, { "epoch": 0.6444947209653092, "grad_norm": 1.7629790306091309, "learning_rate": 2.884607715449921e-05, "loss": 0.7769, "step": 4273 }, { "epoch": 0.6446455505279035, "grad_norm": 1.949423909187317, "learning_rate": 2.8824283468121738e-05, "loss": 1.1442, "step": 4274 }, { "epoch": 0.6447963800904978, "grad_norm": 2.0169317722320557, "learning_rate": 2.8802494683046567e-05, "loss": 1.0132, "step": 4275 }, { "epoch": 0.644947209653092, "grad_norm": 2.005075454711914, "learning_rate": 2.8780710804316884e-05, "loss": 1.1321, "step": 4276 }, { "epoch": 0.6450980392156863, "grad_norm": 1.674033761024475, "learning_rate": 2.8758931836974768e-05, "loss": 0.8399, "step": 4277 }, { "epoch": 0.6452488687782806, "grad_norm": 1.9275245666503906, "learning_rate": 2.873715778606117e-05, "loss": 0.9128, "step": 4278 }, { "epoch": 0.6453996983408749, "grad_norm": 1.9272633790969849, "learning_rate": 2.8715388656615838e-05, "loss": 1.0323, "step": 4279 }, { "epoch": 0.6455505279034691, "grad_norm": 2.090581178665161, "learning_rate": 2.8693624453677438e-05, "loss": 1.2166, "step": 4280 }, { "epoch": 0.6457013574660634, "grad_norm": 2.036947727203369, "learning_rate": 2.8671865182283496e-05, "loss": 0.9267, "step": 4281 }, { "epoch": 0.6458521870286577, "grad_norm": 2.0430567264556885, "learning_rate": 2.8650110847470378e-05, "loss": 1.2861, "step": 4282 }, { "epoch": 0.6460030165912519, "grad_norm": 2.0791804790496826, "learning_rate": 2.8628361454273306e-05, "loss": 1.1263, "step": 4283 }, { "epoch": 0.6461538461538462, "grad_norm": 2.090322732925415, "learning_rate": 2.860661700772638e-05, "loss": 1.1863, "step": 4284 }, { "epoch": 0.6463046757164405, "grad_norm": 1.5754964351654053, "learning_rate": 2.8584877512862517e-05, "loss": 0.7915, "step": 4285 }, { "epoch": 0.6464555052790347, "grad_norm": 1.951542615890503, "learning_rate": 2.856314297471353e-05, "loss": 0.956, "step": 4286 }, { "epoch": 0.646606334841629, "grad_norm": 2.207848072052002, "learning_rate": 2.8541413398310057e-05, "loss": 1.2819, "step": 4287 }, { "epoch": 0.6467571644042233, "grad_norm": 1.9823555946350098, "learning_rate": 2.8519688788681598e-05, "loss": 1.0926, "step": 4288 }, { "epoch": 0.6469079939668175, "grad_norm": 2.260096549987793, "learning_rate": 2.8497969150856496e-05, "loss": 1.1451, "step": 4289 }, { "epoch": 0.6470588235294118, "grad_norm": 2.154817581176758, "learning_rate": 2.8476254489861964e-05, "loss": 1.2571, "step": 4290 }, { "epoch": 0.6472096530920061, "grad_norm": 1.996903419494629, "learning_rate": 2.8454544810724027e-05, "loss": 0.9858, "step": 4291 }, { "epoch": 0.6473604826546003, "grad_norm": 2.232395648956299, "learning_rate": 2.8432840118467596e-05, "loss": 1.2456, "step": 4292 }, { "epoch": 0.6475113122171946, "grad_norm": 2.0480306148529053, "learning_rate": 2.8411140418116394e-05, "loss": 1.0277, "step": 4293 }, { "epoch": 0.6476621417797889, "grad_norm": 2.0657477378845215, "learning_rate": 2.838944571469302e-05, "loss": 1.1483, "step": 4294 }, { "epoch": 0.6478129713423831, "grad_norm": 2.2139177322387695, "learning_rate": 2.8367756013218884e-05, "loss": 1.0445, "step": 4295 }, { "epoch": 0.6479638009049774, "grad_norm": 1.8812731504440308, "learning_rate": 2.8346071318714283e-05, "loss": 0.8432, "step": 4296 }, { "epoch": 0.6481146304675717, "grad_norm": 1.8499181270599365, "learning_rate": 2.8324391636198283e-05, "loss": 0.8758, "step": 4297 }, { "epoch": 0.6482654600301659, "grad_norm": 2.1865739822387695, "learning_rate": 2.8302716970688858e-05, "loss": 0.9518, "step": 4298 }, { "epoch": 0.6484162895927602, "grad_norm": 2.1535258293151855, "learning_rate": 2.8281047327202798e-05, "loss": 0.9325, "step": 4299 }, { "epoch": 0.6485671191553545, "grad_norm": 2.374119758605957, "learning_rate": 2.825938271075572e-05, "loss": 1.1599, "step": 4300 }, { "epoch": 0.6487179487179487, "grad_norm": 1.8881893157958984, "learning_rate": 2.823772312636209e-05, "loss": 1.3817, "step": 4301 }, { "epoch": 0.648868778280543, "grad_norm": 1.7471024990081787, "learning_rate": 2.8216068579035214e-05, "loss": 0.8114, "step": 4302 }, { "epoch": 0.6490196078431373, "grad_norm": 2.0855958461761475, "learning_rate": 2.8194419073787216e-05, "loss": 1.2383, "step": 4303 }, { "epoch": 0.6491704374057315, "grad_norm": 1.6998381614685059, "learning_rate": 2.817277461562906e-05, "loss": 0.8921, "step": 4304 }, { "epoch": 0.6493212669683258, "grad_norm": 1.8165861368179321, "learning_rate": 2.8151135209570545e-05, "loss": 0.9081, "step": 4305 }, { "epoch": 0.6494720965309201, "grad_norm": 2.1854095458984375, "learning_rate": 2.8129500860620317e-05, "loss": 1.3686, "step": 4306 }, { "epoch": 0.6496229260935144, "grad_norm": 2.453355073928833, "learning_rate": 2.8107871573785793e-05, "loss": 1.5426, "step": 4307 }, { "epoch": 0.6497737556561086, "grad_norm": 1.8244335651397705, "learning_rate": 2.808624735407328e-05, "loss": 0.903, "step": 4308 }, { "epoch": 0.6499245852187029, "grad_norm": 2.1809511184692383, "learning_rate": 2.806462820648789e-05, "loss": 1.4771, "step": 4309 }, { "epoch": 0.6500754147812972, "grad_norm": 2.232210636138916, "learning_rate": 2.8043014136033563e-05, "loss": 1.2428, "step": 4310 }, { "epoch": 0.6502262443438914, "grad_norm": 1.9895273447036743, "learning_rate": 2.8021405147713052e-05, "loss": 0.9247, "step": 4311 }, { "epoch": 0.6503770739064857, "grad_norm": 1.991914987564087, "learning_rate": 2.7999801246527955e-05, "loss": 1.2254, "step": 4312 }, { "epoch": 0.65052790346908, "grad_norm": 2.2909624576568604, "learning_rate": 2.7978202437478675e-05, "loss": 1.1944, "step": 4313 }, { "epoch": 0.6506787330316742, "grad_norm": 1.8935598134994507, "learning_rate": 2.795660872556445e-05, "loss": 1.0466, "step": 4314 }, { "epoch": 0.6508295625942685, "grad_norm": 1.7987301349639893, "learning_rate": 2.793502011578334e-05, "loss": 0.9762, "step": 4315 }, { "epoch": 0.6509803921568628, "grad_norm": 1.820001244544983, "learning_rate": 2.7913436613132183e-05, "loss": 1.14, "step": 4316 }, { "epoch": 0.651131221719457, "grad_norm": 2.211435317993164, "learning_rate": 2.789185822260668e-05, "loss": 1.4515, "step": 4317 }, { "epoch": 0.6512820512820513, "grad_norm": 2.015305519104004, "learning_rate": 2.7870284949201342e-05, "loss": 1.0674, "step": 4318 }, { "epoch": 0.6514328808446456, "grad_norm": 1.9374717473983765, "learning_rate": 2.7848716797909492e-05, "loss": 1.228, "step": 4319 }, { "epoch": 0.6515837104072398, "grad_norm": 2.1030166149139404, "learning_rate": 2.7827153773723257e-05, "loss": 1.0957, "step": 4320 }, { "epoch": 0.6517345399698341, "grad_norm": 1.9124196767807007, "learning_rate": 2.7805595881633596e-05, "loss": 1.0039, "step": 4321 }, { "epoch": 0.6518853695324284, "grad_norm": 2.219850540161133, "learning_rate": 2.7784043126630255e-05, "loss": 1.2518, "step": 4322 }, { "epoch": 0.6520361990950226, "grad_norm": 2.0444538593292236, "learning_rate": 2.7762495513701813e-05, "loss": 1.0938, "step": 4323 }, { "epoch": 0.6521870286576169, "grad_norm": 2.1170871257781982, "learning_rate": 2.7740953047835666e-05, "loss": 1.4428, "step": 4324 }, { "epoch": 0.6523378582202112, "grad_norm": 2.17271089553833, "learning_rate": 2.771941573401797e-05, "loss": 1.0962, "step": 4325 }, { "epoch": 0.6524886877828054, "grad_norm": 2.181549310684204, "learning_rate": 2.769788357723374e-05, "loss": 1.2463, "step": 4326 }, { "epoch": 0.6526395173453997, "grad_norm": 1.9101901054382324, "learning_rate": 2.7676356582466778e-05, "loss": 1.0808, "step": 4327 }, { "epoch": 0.652790346907994, "grad_norm": 2.042769432067871, "learning_rate": 2.7654834754699693e-05, "loss": 1.1781, "step": 4328 }, { "epoch": 0.6529411764705882, "grad_norm": 2.087529182434082, "learning_rate": 2.7633318098913896e-05, "loss": 1.2773, "step": 4329 }, { "epoch": 0.6530920060331825, "grad_norm": 1.7922618389129639, "learning_rate": 2.7611806620089607e-05, "loss": 0.9999, "step": 4330 }, { "epoch": 0.6532428355957768, "grad_norm": 1.9095731973648071, "learning_rate": 2.7590300323205844e-05, "loss": 1.0852, "step": 4331 }, { "epoch": 0.653393665158371, "grad_norm": 1.9506059885025024, "learning_rate": 2.756879921324042e-05, "loss": 1.1505, "step": 4332 }, { "epoch": 0.6535444947209653, "grad_norm": 1.7650158405303955, "learning_rate": 2.7547303295169957e-05, "loss": 0.762, "step": 4333 }, { "epoch": 0.6536953242835596, "grad_norm": 1.5633152723312378, "learning_rate": 2.7525812573969867e-05, "loss": 0.724, "step": 4334 }, { "epoch": 0.6538461538461539, "grad_norm": 1.9011088609695435, "learning_rate": 2.750432705461436e-05, "loss": 0.9308, "step": 4335 }, { "epoch": 0.6539969834087481, "grad_norm": 2.2035138607025146, "learning_rate": 2.748284674207645e-05, "loss": 1.142, "step": 4336 }, { "epoch": 0.6541478129713424, "grad_norm": 1.8959178924560547, "learning_rate": 2.7461371641327938e-05, "loss": 0.9465, "step": 4337 }, { "epoch": 0.6542986425339367, "grad_norm": 1.983120322227478, "learning_rate": 2.7439901757339426e-05, "loss": 1.0877, "step": 4338 }, { "epoch": 0.6544494720965309, "grad_norm": 2.0747601985931396, "learning_rate": 2.741843709508031e-05, "loss": 1.1706, "step": 4339 }, { "epoch": 0.6546003016591252, "grad_norm": 2.1220078468322754, "learning_rate": 2.7396977659518748e-05, "loss": 1.0444, "step": 4340 }, { "epoch": 0.6547511312217195, "grad_norm": 2.1055171489715576, "learning_rate": 2.737552345562173e-05, "loss": 1.1489, "step": 4341 }, { "epoch": 0.6549019607843137, "grad_norm": 1.788300633430481, "learning_rate": 2.7354074488355008e-05, "loss": 0.8925, "step": 4342 }, { "epoch": 0.655052790346908, "grad_norm": 2.1498630046844482, "learning_rate": 2.7332630762683165e-05, "loss": 1.2735, "step": 4343 }, { "epoch": 0.6552036199095023, "grad_norm": 2.416926860809326, "learning_rate": 2.7311192283569475e-05, "loss": 1.0822, "step": 4344 }, { "epoch": 0.6553544494720965, "grad_norm": 2.1345763206481934, "learning_rate": 2.72897590559761e-05, "loss": 1.1316, "step": 4345 }, { "epoch": 0.6555052790346908, "grad_norm": 1.7388973236083984, "learning_rate": 2.726833108486393e-05, "loss": 0.7596, "step": 4346 }, { "epoch": 0.6556561085972851, "grad_norm": 1.9436395168304443, "learning_rate": 2.7246908375192658e-05, "loss": 0.8839, "step": 4347 }, { "epoch": 0.6558069381598793, "grad_norm": 2.1143417358398438, "learning_rate": 2.722549093192076e-05, "loss": 1.0623, "step": 4348 }, { "epoch": 0.6559577677224736, "grad_norm": 1.5826070308685303, "learning_rate": 2.720407876000548e-05, "loss": 0.7243, "step": 4349 }, { "epoch": 0.6561085972850679, "grad_norm": 1.9750350713729858, "learning_rate": 2.718267186440286e-05, "loss": 0.9855, "step": 4350 }, { "epoch": 0.6562594268476621, "grad_norm": 1.9834917783737183, "learning_rate": 2.7161270250067706e-05, "loss": 1.4092, "step": 4351 }, { "epoch": 0.6564102564102564, "grad_norm": 1.7406214475631714, "learning_rate": 2.7139873921953618e-05, "loss": 1.1256, "step": 4352 }, { "epoch": 0.6565610859728507, "grad_norm": 1.8588886260986328, "learning_rate": 2.711848288501293e-05, "loss": 1.0508, "step": 4353 }, { "epoch": 0.656711915535445, "grad_norm": 2.0893218517303467, "learning_rate": 2.7097097144196802e-05, "loss": 1.358, "step": 4354 }, { "epoch": 0.6568627450980392, "grad_norm": 1.870761752128601, "learning_rate": 2.7075716704455147e-05, "loss": 1.0292, "step": 4355 }, { "epoch": 0.6570135746606335, "grad_norm": 1.854346752166748, "learning_rate": 2.7054341570736646e-05, "loss": 0.9393, "step": 4356 }, { "epoch": 0.6571644042232277, "grad_norm": 1.7644259929656982, "learning_rate": 2.7032971747988766e-05, "loss": 1.0066, "step": 4357 }, { "epoch": 0.657315233785822, "grad_norm": 1.731934905052185, "learning_rate": 2.7011607241157737e-05, "loss": 0.7984, "step": 4358 }, { "epoch": 0.6574660633484163, "grad_norm": 1.9067120552062988, "learning_rate": 2.6990248055188556e-05, "loss": 1.149, "step": 4359 }, { "epoch": 0.6576168929110106, "grad_norm": 2.005640983581543, "learning_rate": 2.6968894195024985e-05, "loss": 1.0495, "step": 4360 }, { "epoch": 0.6577677224736048, "grad_norm": 2.256908655166626, "learning_rate": 2.6947545665609575e-05, "loss": 1.1335, "step": 4361 }, { "epoch": 0.6579185520361991, "grad_norm": 1.7261273860931396, "learning_rate": 2.6926202471883632e-05, "loss": 0.9146, "step": 4362 }, { "epoch": 0.6580693815987934, "grad_norm": 1.888320803642273, "learning_rate": 2.6904864618787186e-05, "loss": 1.0127, "step": 4363 }, { "epoch": 0.6582202111613876, "grad_norm": 1.8069711923599243, "learning_rate": 2.688353211125909e-05, "loss": 1.0317, "step": 4364 }, { "epoch": 0.6583710407239819, "grad_norm": 1.876613974571228, "learning_rate": 2.6862204954236936e-05, "loss": 1.1009, "step": 4365 }, { "epoch": 0.6585218702865762, "grad_norm": 1.7066594362258911, "learning_rate": 2.6840883152657075e-05, "loss": 0.9074, "step": 4366 }, { "epoch": 0.6586726998491704, "grad_norm": 2.243077278137207, "learning_rate": 2.681956671145463e-05, "loss": 1.341, "step": 4367 }, { "epoch": 0.6588235294117647, "grad_norm": 1.990283489227295, "learning_rate": 2.6798255635563468e-05, "loss": 1.1684, "step": 4368 }, { "epoch": 0.658974358974359, "grad_norm": 1.9646904468536377, "learning_rate": 2.6776949929916227e-05, "loss": 1.0393, "step": 4369 }, { "epoch": 0.6591251885369532, "grad_norm": 1.814702033996582, "learning_rate": 2.6755649599444288e-05, "loss": 1.04, "step": 4370 }, { "epoch": 0.6592760180995475, "grad_norm": 2.047797918319702, "learning_rate": 2.6734354649077824e-05, "loss": 1.2371, "step": 4371 }, { "epoch": 0.6594268476621418, "grad_norm": 2.056192398071289, "learning_rate": 2.671306508374569e-05, "loss": 1.1497, "step": 4372 }, { "epoch": 0.659577677224736, "grad_norm": 1.9356954097747803, "learning_rate": 2.6691780908375567e-05, "loss": 0.9889, "step": 4373 }, { "epoch": 0.6597285067873303, "grad_norm": 2.0646581649780273, "learning_rate": 2.667050212789386e-05, "loss": 1.3521, "step": 4374 }, { "epoch": 0.6598793363499246, "grad_norm": 2.1623876094818115, "learning_rate": 2.6649228747225712e-05, "loss": 1.2523, "step": 4375 }, { "epoch": 0.6600301659125188, "grad_norm": 2.1613292694091797, "learning_rate": 2.6627960771295046e-05, "loss": 1.1268, "step": 4376 }, { "epoch": 0.6601809954751131, "grad_norm": 2.1104178428649902, "learning_rate": 2.6606698205024516e-05, "loss": 1.1038, "step": 4377 }, { "epoch": 0.6603318250377074, "grad_norm": 2.9439752101898193, "learning_rate": 2.6585441053335513e-05, "loss": 1.324, "step": 4378 }, { "epoch": 0.6604826546003016, "grad_norm": 2.216477632522583, "learning_rate": 2.6564189321148214e-05, "loss": 1.3502, "step": 4379 }, { "epoch": 0.6606334841628959, "grad_norm": 1.8979535102844238, "learning_rate": 2.6542943013381495e-05, "loss": 1.1529, "step": 4380 }, { "epoch": 0.6607843137254902, "grad_norm": 1.8004547357559204, "learning_rate": 2.6521702134953e-05, "loss": 0.9419, "step": 4381 }, { "epoch": 0.6609351432880844, "grad_norm": 2.0123109817504883, "learning_rate": 2.6500466690779114e-05, "loss": 1.0441, "step": 4382 }, { "epoch": 0.6610859728506787, "grad_norm": 2.152487277984619, "learning_rate": 2.647923668577497e-05, "loss": 1.1807, "step": 4383 }, { "epoch": 0.661236802413273, "grad_norm": 2.0333118438720703, "learning_rate": 2.6458012124854438e-05, "loss": 0.9951, "step": 4384 }, { "epoch": 0.6613876319758673, "grad_norm": 2.1248157024383545, "learning_rate": 2.6436793012930112e-05, "loss": 1.2244, "step": 4385 }, { "epoch": 0.6615384615384615, "grad_norm": 1.7582635879516602, "learning_rate": 2.641557935491335e-05, "loss": 0.7653, "step": 4386 }, { "epoch": 0.6616892911010558, "grad_norm": 2.024310827255249, "learning_rate": 2.639437115571423e-05, "loss": 1.0496, "step": 4387 }, { "epoch": 0.6618401206636501, "grad_norm": 1.9917088747024536, "learning_rate": 2.637316842024158e-05, "loss": 1.1479, "step": 4388 }, { "epoch": 0.6619909502262443, "grad_norm": 2.0068159103393555, "learning_rate": 2.635197115340295e-05, "loss": 1.0627, "step": 4389 }, { "epoch": 0.6621417797888386, "grad_norm": 2.035628318786621, "learning_rate": 2.633077936010465e-05, "loss": 1.185, "step": 4390 }, { "epoch": 0.6622926093514329, "grad_norm": 1.9230365753173828, "learning_rate": 2.630959304525166e-05, "loss": 0.9443, "step": 4391 }, { "epoch": 0.6624434389140271, "grad_norm": 2.253100872039795, "learning_rate": 2.628841221374777e-05, "loss": 1.3142, "step": 4392 }, { "epoch": 0.6625942684766214, "grad_norm": 2.2333714962005615, "learning_rate": 2.6267236870495455e-05, "loss": 1.4314, "step": 4393 }, { "epoch": 0.6627450980392157, "grad_norm": 2.016254425048828, "learning_rate": 2.6246067020395926e-05, "loss": 1.0582, "step": 4394 }, { "epoch": 0.6628959276018099, "grad_norm": 2.7785065174102783, "learning_rate": 2.6224902668349137e-05, "loss": 1.1544, "step": 4395 }, { "epoch": 0.6630467571644042, "grad_norm": 1.7980607748031616, "learning_rate": 2.620374381925377e-05, "loss": 0.9295, "step": 4396 }, { "epoch": 0.6631975867269985, "grad_norm": 1.5250808000564575, "learning_rate": 2.6182590478007196e-05, "loss": 0.5961, "step": 4397 }, { "epoch": 0.6633484162895927, "grad_norm": 1.9021562337875366, "learning_rate": 2.6161442649505553e-05, "loss": 0.9749, "step": 4398 }, { "epoch": 0.663499245852187, "grad_norm": 2.0192177295684814, "learning_rate": 2.614030033864371e-05, "loss": 0.9961, "step": 4399 }, { "epoch": 0.6636500754147813, "grad_norm": 2.0536341667175293, "learning_rate": 2.6119163550315196e-05, "loss": 1.042, "step": 4400 }, { "epoch": 0.6638009049773755, "grad_norm": 1.7113912105560303, "learning_rate": 2.609803228941232e-05, "loss": 1.1319, "step": 4401 }, { "epoch": 0.6639517345399698, "grad_norm": 2.1686599254608154, "learning_rate": 2.60769065608261e-05, "loss": 1.5529, "step": 4402 }, { "epoch": 0.6641025641025641, "grad_norm": 2.0911624431610107, "learning_rate": 2.605578636944626e-05, "loss": 1.3591, "step": 4403 }, { "epoch": 0.6642533936651583, "grad_norm": 1.9239060878753662, "learning_rate": 2.6034671720161263e-05, "loss": 1.251, "step": 4404 }, { "epoch": 0.6644042232277526, "grad_norm": 2.0061123371124268, "learning_rate": 2.6013562617858267e-05, "loss": 1.2663, "step": 4405 }, { "epoch": 0.6645550527903469, "grad_norm": 1.980891466140747, "learning_rate": 2.5992459067423152e-05, "loss": 1.2818, "step": 4406 }, { "epoch": 0.6647058823529411, "grad_norm": 1.8450405597686768, "learning_rate": 2.597136107374053e-05, "loss": 1.2368, "step": 4407 }, { "epoch": 0.6648567119155354, "grad_norm": 1.8758810758590698, "learning_rate": 2.59502686416937e-05, "loss": 1.0504, "step": 4408 }, { "epoch": 0.6650075414781297, "grad_norm": 1.756330966949463, "learning_rate": 2.592918177616472e-05, "loss": 0.9496, "step": 4409 }, { "epoch": 0.665158371040724, "grad_norm": 1.9200359582901, "learning_rate": 2.590810048203428e-05, "loss": 1.2318, "step": 4410 }, { "epoch": 0.6653092006033182, "grad_norm": 1.6079480648040771, "learning_rate": 2.5887024764181843e-05, "loss": 0.7825, "step": 4411 }, { "epoch": 0.6654600301659125, "grad_norm": 1.9778140783309937, "learning_rate": 2.586595462748558e-05, "loss": 1.2036, "step": 4412 }, { "epoch": 0.6656108597285068, "grad_norm": 1.7957698106765747, "learning_rate": 2.5844890076822337e-05, "loss": 1.0836, "step": 4413 }, { "epoch": 0.665761689291101, "grad_norm": 2.0054361820220947, "learning_rate": 2.5823831117067705e-05, "loss": 1.2288, "step": 4414 }, { "epoch": 0.6659125188536953, "grad_norm": 1.9821476936340332, "learning_rate": 2.5802777753095947e-05, "loss": 1.1124, "step": 4415 }, { "epoch": 0.6660633484162896, "grad_norm": 2.01965594291687, "learning_rate": 2.5781729989780056e-05, "loss": 1.16, "step": 4416 }, { "epoch": 0.6662141779788838, "grad_norm": 1.6553148031234741, "learning_rate": 2.5760687831991716e-05, "loss": 0.8104, "step": 4417 }, { "epoch": 0.6663650075414781, "grad_norm": 2.242192029953003, "learning_rate": 2.5739651284601325e-05, "loss": 1.4659, "step": 4418 }, { "epoch": 0.6665158371040724, "grad_norm": 1.6983839273452759, "learning_rate": 2.5718620352477944e-05, "loss": 0.8911, "step": 4419 }, { "epoch": 0.6666666666666666, "grad_norm": 1.8820899724960327, "learning_rate": 2.5697595040489387e-05, "loss": 1.0377, "step": 4420 }, { "epoch": 0.6668174962292609, "grad_norm": 1.9682042598724365, "learning_rate": 2.5676575353502142e-05, "loss": 1.1231, "step": 4421 }, { "epoch": 0.6669683257918552, "grad_norm": 2.173283576965332, "learning_rate": 2.5655561296381387e-05, "loss": 1.2047, "step": 4422 }, { "epoch": 0.6671191553544494, "grad_norm": 1.8841465711593628, "learning_rate": 2.5634552873991013e-05, "loss": 1.0699, "step": 4423 }, { "epoch": 0.6672699849170437, "grad_norm": 1.945205569267273, "learning_rate": 2.5613550091193605e-05, "loss": 0.9892, "step": 4424 }, { "epoch": 0.667420814479638, "grad_norm": 2.010098457336426, "learning_rate": 2.5592552952850434e-05, "loss": 1.0693, "step": 4425 }, { "epoch": 0.6675716440422322, "grad_norm": 2.247731924057007, "learning_rate": 2.5571561463821473e-05, "loss": 1.2558, "step": 4426 }, { "epoch": 0.6677224736048265, "grad_norm": 2.006993055343628, "learning_rate": 2.555057562896538e-05, "loss": 1.2157, "step": 4427 }, { "epoch": 0.6678733031674208, "grad_norm": 2.3283536434173584, "learning_rate": 2.55295954531395e-05, "loss": 1.4411, "step": 4428 }, { "epoch": 0.668024132730015, "grad_norm": 2.0957813262939453, "learning_rate": 2.550862094119989e-05, "loss": 1.072, "step": 4429 }, { "epoch": 0.6681749622926093, "grad_norm": 1.9993664026260376, "learning_rate": 2.548765209800127e-05, "loss": 1.2403, "step": 4430 }, { "epoch": 0.6683257918552036, "grad_norm": 1.911807894706726, "learning_rate": 2.546668892839707e-05, "loss": 1.142, "step": 4431 }, { "epoch": 0.6684766214177978, "grad_norm": 1.8039921522140503, "learning_rate": 2.5445731437239384e-05, "loss": 1.142, "step": 4432 }, { "epoch": 0.6686274509803921, "grad_norm": 2.134629964828491, "learning_rate": 2.5424779629379016e-05, "loss": 1.2333, "step": 4433 }, { "epoch": 0.6687782805429864, "grad_norm": 2.0560686588287354, "learning_rate": 2.5403833509665436e-05, "loss": 1.2225, "step": 4434 }, { "epoch": 0.6689291101055806, "grad_norm": 1.9249696731567383, "learning_rate": 2.5382893082946814e-05, "loss": 0.9713, "step": 4435 }, { "epoch": 0.6690799396681749, "grad_norm": 1.8240432739257812, "learning_rate": 2.536195835406997e-05, "loss": 0.9581, "step": 4436 }, { "epoch": 0.6692307692307692, "grad_norm": 1.9896678924560547, "learning_rate": 2.5341029327880473e-05, "loss": 1.1584, "step": 4437 }, { "epoch": 0.6693815987933635, "grad_norm": 2.000964641571045, "learning_rate": 2.5320106009222466e-05, "loss": 0.9684, "step": 4438 }, { "epoch": 0.6695324283559577, "grad_norm": 2.116213083267212, "learning_rate": 2.5299188402938866e-05, "loss": 1.2253, "step": 4439 }, { "epoch": 0.669683257918552, "grad_norm": 2.161775827407837, "learning_rate": 2.5278276513871236e-05, "loss": 1.2364, "step": 4440 }, { "epoch": 0.6698340874811463, "grad_norm": 1.9633859395980835, "learning_rate": 2.52573703468598e-05, "loss": 1.1587, "step": 4441 }, { "epoch": 0.6699849170437405, "grad_norm": 2.2068185806274414, "learning_rate": 2.5236469906743478e-05, "loss": 1.2731, "step": 4442 }, { "epoch": 0.6701357466063348, "grad_norm": 2.0926973819732666, "learning_rate": 2.5215575198359847e-05, "loss": 1.1474, "step": 4443 }, { "epoch": 0.6702865761689291, "grad_norm": 1.8900254964828491, "learning_rate": 2.5194686226545182e-05, "loss": 0.8156, "step": 4444 }, { "epoch": 0.6704374057315233, "grad_norm": 1.8935285806655884, "learning_rate": 2.517380299613441e-05, "loss": 0.9299, "step": 4445 }, { "epoch": 0.6705882352941176, "grad_norm": 1.702004075050354, "learning_rate": 2.5152925511961156e-05, "loss": 0.7349, "step": 4446 }, { "epoch": 0.6707390648567119, "grad_norm": 2.1219069957733154, "learning_rate": 2.5132053778857655e-05, "loss": 1.1642, "step": 4447 }, { "epoch": 0.6708898944193061, "grad_norm": 1.60079026222229, "learning_rate": 2.5111187801654867e-05, "loss": 0.6556, "step": 4448 }, { "epoch": 0.6710407239819004, "grad_norm": 1.950226068496704, "learning_rate": 2.5090327585182405e-05, "loss": 1.0356, "step": 4449 }, { "epoch": 0.6711915535444947, "grad_norm": 1.6950665712356567, "learning_rate": 2.506947313426854e-05, "loss": 0.7692, "step": 4450 }, { "epoch": 0.6713423831070889, "grad_norm": 2.0412843227386475, "learning_rate": 2.504862445374022e-05, "loss": 1.395, "step": 4451 }, { "epoch": 0.6714932126696832, "grad_norm": 2.02947998046875, "learning_rate": 2.5027781548423045e-05, "loss": 1.1453, "step": 4452 }, { "epoch": 0.6716440422322775, "grad_norm": 1.9622522592544556, "learning_rate": 2.50069444231413e-05, "loss": 1.4223, "step": 4453 }, { "epoch": 0.6717948717948717, "grad_norm": 2.062838554382324, "learning_rate": 2.4986113082717904e-05, "loss": 1.4679, "step": 4454 }, { "epoch": 0.6719457013574661, "grad_norm": 1.8021085262298584, "learning_rate": 2.4965287531974473e-05, "loss": 0.9905, "step": 4455 }, { "epoch": 0.6720965309200604, "grad_norm": 2.1407132148742676, "learning_rate": 2.494446777573123e-05, "loss": 1.3315, "step": 4456 }, { "epoch": 0.6722473604826547, "grad_norm": 1.8628510236740112, "learning_rate": 2.492365381880709e-05, "loss": 1.3089, "step": 4457 }, { "epoch": 0.6723981900452489, "grad_norm": 1.78757643699646, "learning_rate": 2.4902845666019636e-05, "loss": 1.1867, "step": 4458 }, { "epoch": 0.6725490196078432, "grad_norm": 2.011810541152954, "learning_rate": 2.4882043322185093e-05, "loss": 1.3064, "step": 4459 }, { "epoch": 0.6726998491704375, "grad_norm": 2.1120662689208984, "learning_rate": 2.4861246792118342e-05, "loss": 1.1565, "step": 4460 }, { "epoch": 0.6728506787330317, "grad_norm": 1.9057120084762573, "learning_rate": 2.4840456080632912e-05, "loss": 0.9523, "step": 4461 }, { "epoch": 0.673001508295626, "grad_norm": 1.6348810195922852, "learning_rate": 2.4819671192541004e-05, "loss": 0.9188, "step": 4462 }, { "epoch": 0.6731523378582203, "grad_norm": 1.6139795780181885, "learning_rate": 2.479889213265345e-05, "loss": 0.8301, "step": 4463 }, { "epoch": 0.6733031674208145, "grad_norm": 2.096783399581909, "learning_rate": 2.4778118905779753e-05, "loss": 1.3966, "step": 4464 }, { "epoch": 0.6734539969834088, "grad_norm": 1.8629156351089478, "learning_rate": 2.4757351516728066e-05, "loss": 1.0482, "step": 4465 }, { "epoch": 0.6736048265460031, "grad_norm": 1.87657630443573, "learning_rate": 2.473658997030514e-05, "loss": 1.1855, "step": 4466 }, { "epoch": 0.6737556561085973, "grad_norm": 1.9434707164764404, "learning_rate": 2.4715834271316445e-05, "loss": 0.9464, "step": 4467 }, { "epoch": 0.6739064856711916, "grad_norm": 2.011904239654541, "learning_rate": 2.4695084424566056e-05, "loss": 1.2216, "step": 4468 }, { "epoch": 0.6740573152337859, "grad_norm": 1.8969014883041382, "learning_rate": 2.4674340434856708e-05, "loss": 1.0795, "step": 4469 }, { "epoch": 0.6742081447963801, "grad_norm": 1.6169764995574951, "learning_rate": 2.465360230698978e-05, "loss": 0.8155, "step": 4470 }, { "epoch": 0.6743589743589744, "grad_norm": 1.9329882860183716, "learning_rate": 2.4632870045765282e-05, "loss": 1.1702, "step": 4471 }, { "epoch": 0.6745098039215687, "grad_norm": 1.8523646593093872, "learning_rate": 2.4612143655981883e-05, "loss": 0.8901, "step": 4472 }, { "epoch": 0.6746606334841629, "grad_norm": 1.741517186164856, "learning_rate": 2.459142314243688e-05, "loss": 0.8527, "step": 4473 }, { "epoch": 0.6748114630467572, "grad_norm": 1.8274139165878296, "learning_rate": 2.4570708509926215e-05, "loss": 0.95, "step": 4474 }, { "epoch": 0.6749622926093515, "grad_norm": 2.1170694828033447, "learning_rate": 2.454999976324447e-05, "loss": 1.4197, "step": 4475 }, { "epoch": 0.6751131221719457, "grad_norm": 1.6479849815368652, "learning_rate": 2.452929690718486e-05, "loss": 0.7433, "step": 4476 }, { "epoch": 0.67526395173454, "grad_norm": 1.6668832302093506, "learning_rate": 2.4508599946539246e-05, "loss": 0.7519, "step": 4477 }, { "epoch": 0.6754147812971343, "grad_norm": 1.9320100545883179, "learning_rate": 2.448790888609811e-05, "loss": 0.8658, "step": 4478 }, { "epoch": 0.6755656108597285, "grad_norm": 1.7332044839859009, "learning_rate": 2.4467223730650584e-05, "loss": 1.0368, "step": 4479 }, { "epoch": 0.6757164404223228, "grad_norm": 1.955726981163025, "learning_rate": 2.4446544484984422e-05, "loss": 1.0439, "step": 4480 }, { "epoch": 0.6758672699849171, "grad_norm": 2.118703842163086, "learning_rate": 2.4425871153886015e-05, "loss": 1.0092, "step": 4481 }, { "epoch": 0.6760180995475114, "grad_norm": 1.6598149538040161, "learning_rate": 2.440520374214038e-05, "loss": 0.8613, "step": 4482 }, { "epoch": 0.6761689291101056, "grad_norm": 1.8521422147750854, "learning_rate": 2.4384542254531197e-05, "loss": 0.9529, "step": 4483 }, { "epoch": 0.6763197586726999, "grad_norm": 2.171435832977295, "learning_rate": 2.4363886695840694e-05, "loss": 1.1426, "step": 4484 }, { "epoch": 0.6764705882352942, "grad_norm": 2.042433261871338, "learning_rate": 2.43432370708498e-05, "loss": 1.2093, "step": 4485 }, { "epoch": 0.6766214177978884, "grad_norm": 1.9355489015579224, "learning_rate": 2.432259338433806e-05, "loss": 0.8443, "step": 4486 }, { "epoch": 0.6767722473604827, "grad_norm": 1.7876540422439575, "learning_rate": 2.4301955641083624e-05, "loss": 1.0477, "step": 4487 }, { "epoch": 0.676923076923077, "grad_norm": 2.0827796459198, "learning_rate": 2.4281323845863273e-05, "loss": 0.9487, "step": 4488 }, { "epoch": 0.6770739064856712, "grad_norm": 2.112048625946045, "learning_rate": 2.4260698003452426e-05, "loss": 1.4831, "step": 4489 }, { "epoch": 0.6772247360482655, "grad_norm": 2.0722193717956543, "learning_rate": 2.42400781186251e-05, "loss": 1.1224, "step": 4490 }, { "epoch": 0.6773755656108598, "grad_norm": 1.9622279405593872, "learning_rate": 2.421946419615395e-05, "loss": 0.9344, "step": 4491 }, { "epoch": 0.677526395173454, "grad_norm": 2.3910367488861084, "learning_rate": 2.4198856240810246e-05, "loss": 1.3776, "step": 4492 }, { "epoch": 0.6776772247360483, "grad_norm": 2.0743908882141113, "learning_rate": 2.4178254257363903e-05, "loss": 1.0038, "step": 4493 }, { "epoch": 0.6778280542986426, "grad_norm": 2.334857225418091, "learning_rate": 2.4157658250583377e-05, "loss": 1.137, "step": 4494 }, { "epoch": 0.6779788838612368, "grad_norm": 1.9632031917572021, "learning_rate": 2.4137068225235814e-05, "loss": 0.8683, "step": 4495 }, { "epoch": 0.6781297134238311, "grad_norm": 1.7969934940338135, "learning_rate": 2.4116484186086963e-05, "loss": 0.9448, "step": 4496 }, { "epoch": 0.6782805429864254, "grad_norm": 1.7520010471343994, "learning_rate": 2.4095906137901166e-05, "loss": 0.932, "step": 4497 }, { "epoch": 0.6784313725490196, "grad_norm": 1.9412981271743774, "learning_rate": 2.4075334085441392e-05, "loss": 0.8673, "step": 4498 }, { "epoch": 0.6785822021116139, "grad_norm": 2.021216630935669, "learning_rate": 2.4054768033469227e-05, "loss": 1.1273, "step": 4499 }, { "epoch": 0.6787330316742082, "grad_norm": 1.639169454574585, "learning_rate": 2.403420798674485e-05, "loss": 0.6978, "step": 4500 }, { "epoch": 0.6788838612368024, "grad_norm": 1.662379503250122, "learning_rate": 2.401365395002707e-05, "loss": 1.1959, "step": 4501 }, { "epoch": 0.6790346907993967, "grad_norm": 1.8111042976379395, "learning_rate": 2.399310592807332e-05, "loss": 1.1858, "step": 4502 }, { "epoch": 0.679185520361991, "grad_norm": 1.937150239944458, "learning_rate": 2.3972563925639563e-05, "loss": 1.1491, "step": 4503 }, { "epoch": 0.6793363499245852, "grad_norm": 1.9161791801452637, "learning_rate": 2.3952027947480447e-05, "loss": 1.2453, "step": 4504 }, { "epoch": 0.6794871794871795, "grad_norm": 1.9551761150360107, "learning_rate": 2.393149799834921e-05, "loss": 1.2439, "step": 4505 }, { "epoch": 0.6796380090497738, "grad_norm": 1.7764298915863037, "learning_rate": 2.3910974082997683e-05, "loss": 1.0459, "step": 4506 }, { "epoch": 0.679788838612368, "grad_norm": 1.7454358339309692, "learning_rate": 2.3890456206176297e-05, "loss": 1.0182, "step": 4507 }, { "epoch": 0.6799396681749623, "grad_norm": 2.2341389656066895, "learning_rate": 2.3869944372634097e-05, "loss": 1.4677, "step": 4508 }, { "epoch": 0.6800904977375566, "grad_norm": 1.5878716707229614, "learning_rate": 2.3849438587118715e-05, "loss": 0.9596, "step": 4509 }, { "epoch": 0.6802413273001509, "grad_norm": 1.7403442859649658, "learning_rate": 2.382893885437641e-05, "loss": 0.9782, "step": 4510 }, { "epoch": 0.6803921568627451, "grad_norm": 1.7426447868347168, "learning_rate": 2.3808445179152005e-05, "loss": 0.9538, "step": 4511 }, { "epoch": 0.6805429864253394, "grad_norm": 2.0419554710388184, "learning_rate": 2.378795756618896e-05, "loss": 1.2487, "step": 4512 }, { "epoch": 0.6806938159879337, "grad_norm": 1.8041220903396606, "learning_rate": 2.3767476020229285e-05, "loss": 1.0347, "step": 4513 }, { "epoch": 0.6808446455505279, "grad_norm": 1.7528855800628662, "learning_rate": 2.3747000546013607e-05, "loss": 1.0063, "step": 4514 }, { "epoch": 0.6809954751131222, "grad_norm": 1.9290194511413574, "learning_rate": 2.3726531148281173e-05, "loss": 0.9938, "step": 4515 }, { "epoch": 0.6811463046757165, "grad_norm": 2.1026668548583984, "learning_rate": 2.370606783176979e-05, "loss": 1.4129, "step": 4516 }, { "epoch": 0.6812971342383107, "grad_norm": 1.6011643409729004, "learning_rate": 2.3685610601215868e-05, "loss": 0.9136, "step": 4517 }, { "epoch": 0.681447963800905, "grad_norm": 2.2286760807037354, "learning_rate": 2.366515946135442e-05, "loss": 1.3185, "step": 4518 }, { "epoch": 0.6815987933634993, "grad_norm": 2.1434264183044434, "learning_rate": 2.364471441691902e-05, "loss": 1.1643, "step": 4519 }, { "epoch": 0.6817496229260935, "grad_norm": 1.9442858695983887, "learning_rate": 2.3624275472641872e-05, "loss": 1.2119, "step": 4520 }, { "epoch": 0.6819004524886878, "grad_norm": 2.2295138835906982, "learning_rate": 2.360384263325373e-05, "loss": 1.4671, "step": 4521 }, { "epoch": 0.6820512820512821, "grad_norm": 1.9383876323699951, "learning_rate": 2.3583415903483962e-05, "loss": 1.0572, "step": 4522 }, { "epoch": 0.6822021116138763, "grad_norm": 1.7140932083129883, "learning_rate": 2.35629952880605e-05, "loss": 0.8949, "step": 4523 }, { "epoch": 0.6823529411764706, "grad_norm": 1.5414679050445557, "learning_rate": 2.3542580791709877e-05, "loss": 0.8353, "step": 4524 }, { "epoch": 0.6825037707390649, "grad_norm": 1.8597333431243896, "learning_rate": 2.3522172419157212e-05, "loss": 1.1174, "step": 4525 }, { "epoch": 0.6826546003016591, "grad_norm": 1.7747485637664795, "learning_rate": 2.3501770175126187e-05, "loss": 1.0841, "step": 4526 }, { "epoch": 0.6828054298642534, "grad_norm": 2.096862316131592, "learning_rate": 2.348137406433908e-05, "loss": 1.0326, "step": 4527 }, { "epoch": 0.6829562594268477, "grad_norm": 1.8275998830795288, "learning_rate": 2.3460984091516747e-05, "loss": 0.9494, "step": 4528 }, { "epoch": 0.683107088989442, "grad_norm": 1.9876608848571777, "learning_rate": 2.3440600261378627e-05, "loss": 1.2008, "step": 4529 }, { "epoch": 0.6832579185520362, "grad_norm": 1.7452807426452637, "learning_rate": 2.3420222578642748e-05, "loss": 0.823, "step": 4530 }, { "epoch": 0.6834087481146305, "grad_norm": 1.8783314228057861, "learning_rate": 2.3399851048025666e-05, "loss": 0.9996, "step": 4531 }, { "epoch": 0.6835595776772248, "grad_norm": 1.899132490158081, "learning_rate": 2.3379485674242567e-05, "loss": 0.9897, "step": 4532 }, { "epoch": 0.683710407239819, "grad_norm": 2.063966751098633, "learning_rate": 2.335912646200719e-05, "loss": 1.1158, "step": 4533 }, { "epoch": 0.6838612368024133, "grad_norm": 1.9861629009246826, "learning_rate": 2.333877341603185e-05, "loss": 1.3331, "step": 4534 }, { "epoch": 0.6840120663650076, "grad_norm": 1.7164462804794312, "learning_rate": 2.331842654102744e-05, "loss": 0.802, "step": 4535 }, { "epoch": 0.6841628959276018, "grad_norm": 2.121925115585327, "learning_rate": 2.329808584170341e-05, "loss": 1.0509, "step": 4536 }, { "epoch": 0.6843137254901961, "grad_norm": 2.3628766536712646, "learning_rate": 2.3277751322767795e-05, "loss": 1.2186, "step": 4537 }, { "epoch": 0.6844645550527904, "grad_norm": 2.191727638244629, "learning_rate": 2.3257422988927197e-05, "loss": 1.2529, "step": 4538 }, { "epoch": 0.6846153846153846, "grad_norm": 2.251492500305176, "learning_rate": 2.3237100844886788e-05, "loss": 0.9825, "step": 4539 }, { "epoch": 0.6847662141779789, "grad_norm": 1.8493767976760864, "learning_rate": 2.3216784895350313e-05, "loss": 1.0218, "step": 4540 }, { "epoch": 0.6849170437405732, "grad_norm": 2.1659085750579834, "learning_rate": 2.3196475145020042e-05, "loss": 1.14, "step": 4541 }, { "epoch": 0.6850678733031674, "grad_norm": 1.9409279823303223, "learning_rate": 2.3176171598596856e-05, "loss": 1.0154, "step": 4542 }, { "epoch": 0.6852187028657617, "grad_norm": 1.99635648727417, "learning_rate": 2.3155874260780196e-05, "loss": 0.8563, "step": 4543 }, { "epoch": 0.685369532428356, "grad_norm": 2.0226526260375977, "learning_rate": 2.3135583136268048e-05, "loss": 1.0368, "step": 4544 }, { "epoch": 0.6855203619909502, "grad_norm": 2.400123119354248, "learning_rate": 2.3115298229756965e-05, "loss": 1.2406, "step": 4545 }, { "epoch": 0.6856711915535445, "grad_norm": 1.6404377222061157, "learning_rate": 2.3095019545942076e-05, "loss": 0.7472, "step": 4546 }, { "epoch": 0.6858220211161388, "grad_norm": 1.8689852952957153, "learning_rate": 2.307474708951704e-05, "loss": 0.8707, "step": 4547 }, { "epoch": 0.685972850678733, "grad_norm": 2.0692548751831055, "learning_rate": 2.3054480865174106e-05, "loss": 1.0275, "step": 4548 }, { "epoch": 0.6861236802413273, "grad_norm": 1.6252740621566772, "learning_rate": 2.303422087760408e-05, "loss": 0.7479, "step": 4549 }, { "epoch": 0.6862745098039216, "grad_norm": 1.697693943977356, "learning_rate": 2.301396713149627e-05, "loss": 0.7639, "step": 4550 }, { "epoch": 0.6864253393665158, "grad_norm": 2.291041851043701, "learning_rate": 2.2993719631538613e-05, "loss": 1.8556, "step": 4551 }, { "epoch": 0.6865761689291101, "grad_norm": 2.0478310585021973, "learning_rate": 2.297347838241755e-05, "loss": 1.4031, "step": 4552 }, { "epoch": 0.6867269984917044, "grad_norm": 1.725343108177185, "learning_rate": 2.29532433888181e-05, "loss": 1.11, "step": 4553 }, { "epoch": 0.6868778280542986, "grad_norm": 1.7346992492675781, "learning_rate": 2.293301465542383e-05, "loss": 1.0103, "step": 4554 }, { "epoch": 0.6870286576168929, "grad_norm": 1.7897839546203613, "learning_rate": 2.2912792186916848e-05, "loss": 1.1456, "step": 4555 }, { "epoch": 0.6871794871794872, "grad_norm": 1.9877675771713257, "learning_rate": 2.2892575987977827e-05, "loss": 1.0828, "step": 4556 }, { "epoch": 0.6873303167420814, "grad_norm": 1.8586440086364746, "learning_rate": 2.2872366063285977e-05, "loss": 1.2974, "step": 4557 }, { "epoch": 0.6874811463046757, "grad_norm": 1.860966444015503, "learning_rate": 2.2852162417519074e-05, "loss": 1.1575, "step": 4558 }, { "epoch": 0.68763197586727, "grad_norm": 1.8555736541748047, "learning_rate": 2.2831965055353395e-05, "loss": 1.2312, "step": 4559 }, { "epoch": 0.6877828054298643, "grad_norm": 1.9086530208587646, "learning_rate": 2.281177398146381e-05, "loss": 1.3763, "step": 4560 }, { "epoch": 0.6879336349924585, "grad_norm": 2.091766834259033, "learning_rate": 2.279158920052371e-05, "loss": 1.1121, "step": 4561 }, { "epoch": 0.6880844645550528, "grad_norm": 1.8663558959960938, "learning_rate": 2.277141071720505e-05, "loss": 1.2019, "step": 4562 }, { "epoch": 0.6882352941176471, "grad_norm": 2.0251734256744385, "learning_rate": 2.275123853617831e-05, "loss": 1.0093, "step": 4563 }, { "epoch": 0.6883861236802413, "grad_norm": 1.9014619588851929, "learning_rate": 2.2731072662112506e-05, "loss": 1.0996, "step": 4564 }, { "epoch": 0.6885369532428356, "grad_norm": 1.9211238622665405, "learning_rate": 2.2710913099675217e-05, "loss": 1.2378, "step": 4565 }, { "epoch": 0.6886877828054299, "grad_norm": 2.313538074493408, "learning_rate": 2.269075985353254e-05, "loss": 1.3009, "step": 4566 }, { "epoch": 0.6888386123680241, "grad_norm": 1.8864688873291016, "learning_rate": 2.2670612928349115e-05, "loss": 0.9513, "step": 4567 }, { "epoch": 0.6889894419306184, "grad_norm": 2.16385555267334, "learning_rate": 2.265047232878813e-05, "loss": 1.328, "step": 4568 }, { "epoch": 0.6891402714932127, "grad_norm": 1.7994483709335327, "learning_rate": 2.2630338059511298e-05, "loss": 0.8778, "step": 4569 }, { "epoch": 0.6892911010558069, "grad_norm": 2.0631918907165527, "learning_rate": 2.2610210125178866e-05, "loss": 1.1719, "step": 4570 }, { "epoch": 0.6894419306184012, "grad_norm": 2.04980206489563, "learning_rate": 2.2590088530449616e-05, "loss": 1.3181, "step": 4571 }, { "epoch": 0.6895927601809955, "grad_norm": 2.2527554035186768, "learning_rate": 2.256997327998087e-05, "loss": 1.4162, "step": 4572 }, { "epoch": 0.6897435897435897, "grad_norm": 1.728791356086731, "learning_rate": 2.2549864378428477e-05, "loss": 0.8022, "step": 4573 }, { "epoch": 0.689894419306184, "grad_norm": 2.5230369567871094, "learning_rate": 2.252976183044681e-05, "loss": 1.047, "step": 4574 }, { "epoch": 0.6900452488687783, "grad_norm": 1.7018266916275024, "learning_rate": 2.2509665640688803e-05, "loss": 0.8434, "step": 4575 }, { "epoch": 0.6901960784313725, "grad_norm": 2.0672879219055176, "learning_rate": 2.248957581380585e-05, "loss": 1.3786, "step": 4576 }, { "epoch": 0.6903469079939668, "grad_norm": 1.8049570322036743, "learning_rate": 2.2469492354447934e-05, "loss": 0.9719, "step": 4577 }, { "epoch": 0.6904977375565611, "grad_norm": 2.039966106414795, "learning_rate": 2.2449415267263547e-05, "loss": 1.2256, "step": 4578 }, { "epoch": 0.6906485671191553, "grad_norm": 1.8552099466323853, "learning_rate": 2.24293445568997e-05, "loss": 0.9303, "step": 4579 }, { "epoch": 0.6907993966817496, "grad_norm": 1.9115890264511108, "learning_rate": 2.2409280228001938e-05, "loss": 1.0623, "step": 4580 }, { "epoch": 0.6909502262443439, "grad_norm": 1.7886425256729126, "learning_rate": 2.238922228521432e-05, "loss": 0.8998, "step": 4581 }, { "epoch": 0.6911010558069381, "grad_norm": 1.7563514709472656, "learning_rate": 2.2369170733179428e-05, "loss": 0.8484, "step": 4582 }, { "epoch": 0.6912518853695324, "grad_norm": 2.125962257385254, "learning_rate": 2.234912557653837e-05, "loss": 1.1353, "step": 4583 }, { "epoch": 0.6914027149321267, "grad_norm": 1.6059387922286987, "learning_rate": 2.2329086819930767e-05, "loss": 0.6717, "step": 4584 }, { "epoch": 0.691553544494721, "grad_norm": 2.2254068851470947, "learning_rate": 2.2309054467994767e-05, "loss": 1.1222, "step": 4585 }, { "epoch": 0.6917043740573152, "grad_norm": 2.2176573276519775, "learning_rate": 2.2289028525367045e-05, "loss": 1.2969, "step": 4586 }, { "epoch": 0.6918552036199095, "grad_norm": 2.265866279602051, "learning_rate": 2.2269008996682746e-05, "loss": 1.3178, "step": 4587 }, { "epoch": 0.6920060331825038, "grad_norm": 2.0458288192749023, "learning_rate": 2.2248995886575575e-05, "loss": 1.1214, "step": 4588 }, { "epoch": 0.692156862745098, "grad_norm": 2.3845739364624023, "learning_rate": 2.222898919967774e-05, "loss": 1.152, "step": 4589 }, { "epoch": 0.6923076923076923, "grad_norm": 2.3499369621276855, "learning_rate": 2.220898894061996e-05, "loss": 1.2661, "step": 4590 }, { "epoch": 0.6924585218702866, "grad_norm": 2.276214122772217, "learning_rate": 2.218899511403148e-05, "loss": 1.1566, "step": 4591 }, { "epoch": 0.6926093514328808, "grad_norm": 2.099652051925659, "learning_rate": 2.2169007724540026e-05, "loss": 1.2269, "step": 4592 }, { "epoch": 0.6927601809954751, "grad_norm": 2.0007481575012207, "learning_rate": 2.2149026776771864e-05, "loss": 1.0888, "step": 4593 }, { "epoch": 0.6929110105580694, "grad_norm": 2.258118152618408, "learning_rate": 2.2129052275351753e-05, "loss": 1.2797, "step": 4594 }, { "epoch": 0.6930618401206636, "grad_norm": 1.9797393083572388, "learning_rate": 2.210908422490296e-05, "loss": 0.9134, "step": 4595 }, { "epoch": 0.6932126696832579, "grad_norm": 1.6254955530166626, "learning_rate": 2.208912263004729e-05, "loss": 0.7684, "step": 4596 }, { "epoch": 0.6933634992458522, "grad_norm": 1.7604800462722778, "learning_rate": 2.2069167495404978e-05, "loss": 0.7027, "step": 4597 }, { "epoch": 0.6935143288084464, "grad_norm": 3.1391637325286865, "learning_rate": 2.2049218825594843e-05, "loss": 0.7616, "step": 4598 }, { "epoch": 0.6936651583710407, "grad_norm": 2.2382397651672363, "learning_rate": 2.2029276625234162e-05, "loss": 1.1027, "step": 4599 }, { "epoch": 0.693815987933635, "grad_norm": 1.7915929555892944, "learning_rate": 2.2009340898938742e-05, "loss": 0.8236, "step": 4600 }, { "epoch": 0.6939668174962292, "grad_norm": 1.863381028175354, "learning_rate": 2.1989411651322878e-05, "loss": 1.3327, "step": 4601 }, { "epoch": 0.6941176470588235, "grad_norm": 1.956134557723999, "learning_rate": 2.1969488886999357e-05, "loss": 1.2951, "step": 4602 }, { "epoch": 0.6942684766214178, "grad_norm": 1.8145833015441895, "learning_rate": 2.1949572610579483e-05, "loss": 1.0066, "step": 4603 }, { "epoch": 0.694419306184012, "grad_norm": 1.8386484384536743, "learning_rate": 2.192966282667304e-05, "loss": 1.2154, "step": 4604 }, { "epoch": 0.6945701357466063, "grad_norm": 2.228360891342163, "learning_rate": 2.1909759539888358e-05, "loss": 1.3232, "step": 4605 }, { "epoch": 0.6947209653092006, "grad_norm": 1.6445335149765015, "learning_rate": 2.188986275483217e-05, "loss": 0.927, "step": 4606 }, { "epoch": 0.6948717948717948, "grad_norm": 1.4933658838272095, "learning_rate": 2.186997247610978e-05, "loss": 0.7212, "step": 4607 }, { "epoch": 0.6950226244343891, "grad_norm": 1.9351658821105957, "learning_rate": 2.1850088708324963e-05, "loss": 1.3312, "step": 4608 }, { "epoch": 0.6951734539969834, "grad_norm": 1.8469159603118896, "learning_rate": 2.1830211456079998e-05, "loss": 1.1487, "step": 4609 }, { "epoch": 0.6953242835595776, "grad_norm": 2.029816150665283, "learning_rate": 2.1810340723975638e-05, "loss": 1.2666, "step": 4610 }, { "epoch": 0.6954751131221719, "grad_norm": 1.746862769126892, "learning_rate": 2.1790476516611136e-05, "loss": 0.8114, "step": 4611 }, { "epoch": 0.6956259426847662, "grad_norm": 1.981194019317627, "learning_rate": 2.1770618838584235e-05, "loss": 1.0829, "step": 4612 }, { "epoch": 0.6957767722473605, "grad_norm": 1.7488001585006714, "learning_rate": 2.1750767694491168e-05, "loss": 1.0397, "step": 4613 }, { "epoch": 0.6959276018099547, "grad_norm": 1.8406634330749512, "learning_rate": 2.1730923088926657e-05, "loss": 1.162, "step": 4614 }, { "epoch": 0.696078431372549, "grad_norm": 2.2736337184906006, "learning_rate": 2.1711085026483917e-05, "loss": 1.3286, "step": 4615 }, { "epoch": 0.6962292609351433, "grad_norm": 1.806166172027588, "learning_rate": 2.1691253511754604e-05, "loss": 0.9205, "step": 4616 }, { "epoch": 0.6963800904977375, "grad_norm": 2.034961462020874, "learning_rate": 2.167142854932892e-05, "loss": 0.9919, "step": 4617 }, { "epoch": 0.6965309200603318, "grad_norm": 1.7779138088226318, "learning_rate": 2.1651610143795513e-05, "loss": 0.9995, "step": 4618 }, { "epoch": 0.6966817496229261, "grad_norm": 1.94465172290802, "learning_rate": 2.163179829974153e-05, "loss": 1.0789, "step": 4619 }, { "epoch": 0.6968325791855203, "grad_norm": 1.7606016397476196, "learning_rate": 2.1611993021752592e-05, "loss": 0.9071, "step": 4620 }, { "epoch": 0.6969834087481146, "grad_norm": 1.8901212215423584, "learning_rate": 2.15921943144128e-05, "loss": 1.0273, "step": 4621 }, { "epoch": 0.6971342383107089, "grad_norm": 1.7092458009719849, "learning_rate": 2.1572402182304735e-05, "loss": 0.8282, "step": 4622 }, { "epoch": 0.6972850678733031, "grad_norm": 1.7956867218017578, "learning_rate": 2.155261663000946e-05, "loss": 1.0925, "step": 4623 }, { "epoch": 0.6974358974358974, "grad_norm": 2.1103568077087402, "learning_rate": 2.1532837662106508e-05, "loss": 1.0229, "step": 4624 }, { "epoch": 0.6975867269984917, "grad_norm": 2.0585215091705322, "learning_rate": 2.1513065283173895e-05, "loss": 1.0709, "step": 4625 }, { "epoch": 0.6977375565610859, "grad_norm": 1.8693363666534424, "learning_rate": 2.1493299497788104e-05, "loss": 1.094, "step": 4626 }, { "epoch": 0.6978883861236802, "grad_norm": 2.111178159713745, "learning_rate": 2.1473540310524104e-05, "loss": 1.1309, "step": 4627 }, { "epoch": 0.6980392156862745, "grad_norm": 1.9383981227874756, "learning_rate": 2.1453787725955326e-05, "loss": 1.1851, "step": 4628 }, { "epoch": 0.6981900452488687, "grad_norm": 1.8491449356079102, "learning_rate": 2.1434041748653676e-05, "loss": 0.958, "step": 4629 }, { "epoch": 0.698340874811463, "grad_norm": 1.8705974817276, "learning_rate": 2.1414302383189528e-05, "loss": 1.0549, "step": 4630 }, { "epoch": 0.6984917043740573, "grad_norm": 1.976745843887329, "learning_rate": 2.1394569634131733e-05, "loss": 0.9806, "step": 4631 }, { "epoch": 0.6986425339366515, "grad_norm": 1.8239089250564575, "learning_rate": 2.1374843506047605e-05, "loss": 0.8415, "step": 4632 }, { "epoch": 0.6987933634992458, "grad_norm": 1.7784229516983032, "learning_rate": 2.1355124003502936e-05, "loss": 0.9101, "step": 4633 }, { "epoch": 0.6989441930618401, "grad_norm": 1.9251046180725098, "learning_rate": 2.1335411131061956e-05, "loss": 1.0617, "step": 4634 }, { "epoch": 0.6990950226244343, "grad_norm": 2.257554531097412, "learning_rate": 2.131570489328738e-05, "loss": 1.3049, "step": 4635 }, { "epoch": 0.6992458521870286, "grad_norm": 1.9564756155014038, "learning_rate": 2.1296005294740396e-05, "loss": 1.0254, "step": 4636 }, { "epoch": 0.6993966817496229, "grad_norm": 2.17734956741333, "learning_rate": 2.127631233998065e-05, "loss": 1.2318, "step": 4637 }, { "epoch": 0.6995475113122172, "grad_norm": 1.9063318967819214, "learning_rate": 2.1256626033566235e-05, "loss": 0.8562, "step": 4638 }, { "epoch": 0.6996983408748114, "grad_norm": 1.7762688398361206, "learning_rate": 2.1236946380053725e-05, "loss": 0.9558, "step": 4639 }, { "epoch": 0.6998491704374057, "grad_norm": 1.9744398593902588, "learning_rate": 2.1217273383998142e-05, "loss": 0.992, "step": 4640 }, { "epoch": 0.7, "grad_norm": 2.0337867736816406, "learning_rate": 2.1197607049952973e-05, "loss": 1.1152, "step": 4641 }, { "epoch": 0.7001508295625942, "grad_norm": 2.009249448776245, "learning_rate": 2.1177947382470176e-05, "loss": 1.1322, "step": 4642 }, { "epoch": 0.7003016591251885, "grad_norm": 1.689821481704712, "learning_rate": 2.115829438610012e-05, "loss": 0.8288, "step": 4643 }, { "epoch": 0.7004524886877828, "grad_norm": 2.199486017227173, "learning_rate": 2.113864806539167e-05, "loss": 1.202, "step": 4644 }, { "epoch": 0.700603318250377, "grad_norm": 2.073523998260498, "learning_rate": 2.111900842489214e-05, "loss": 1.1463, "step": 4645 }, { "epoch": 0.7007541478129713, "grad_norm": 1.8540726900100708, "learning_rate": 2.10993754691473e-05, "loss": 0.865, "step": 4646 }, { "epoch": 0.7009049773755656, "grad_norm": 1.7330979108810425, "learning_rate": 2.107974920270136e-05, "loss": 0.8588, "step": 4647 }, { "epoch": 0.7010558069381598, "grad_norm": 1.7831276655197144, "learning_rate": 2.1060129630096992e-05, "loss": 0.6987, "step": 4648 }, { "epoch": 0.7012066365007541, "grad_norm": 1.646828293800354, "learning_rate": 2.1040516755875312e-05, "loss": 0.7984, "step": 4649 }, { "epoch": 0.7013574660633484, "grad_norm": 2.0080490112304688, "learning_rate": 2.1020910584575894e-05, "loss": 1.006, "step": 4650 }, { "epoch": 0.7015082956259426, "grad_norm": 1.8286958932876587, "learning_rate": 2.100131112073675e-05, "loss": 1.0105, "step": 4651 }, { "epoch": 0.7016591251885369, "grad_norm": 1.4830034971237183, "learning_rate": 2.0981718368894367e-05, "loss": 0.7366, "step": 4652 }, { "epoch": 0.7018099547511312, "grad_norm": 1.896770715713501, "learning_rate": 2.0962132333583616e-05, "loss": 1.1034, "step": 4653 }, { "epoch": 0.7019607843137254, "grad_norm": 1.9869725704193115, "learning_rate": 2.094255301933788e-05, "loss": 1.2517, "step": 4654 }, { "epoch": 0.7021116138763197, "grad_norm": 2.0550742149353027, "learning_rate": 2.0922980430688954e-05, "loss": 1.2096, "step": 4655 }, { "epoch": 0.702262443438914, "grad_norm": 1.9662516117095947, "learning_rate": 2.0903414572167085e-05, "loss": 1.172, "step": 4656 }, { "epoch": 0.7024132730015082, "grad_norm": 1.7257182598114014, "learning_rate": 2.088385544830096e-05, "loss": 1.0599, "step": 4657 }, { "epoch": 0.7025641025641025, "grad_norm": 1.7056766748428345, "learning_rate": 2.08643030636177e-05, "loss": 0.9065, "step": 4658 }, { "epoch": 0.7027149321266968, "grad_norm": 1.9254727363586426, "learning_rate": 2.084475742264288e-05, "loss": 1.1428, "step": 4659 }, { "epoch": 0.702865761689291, "grad_norm": 1.9736605882644653, "learning_rate": 2.082521852990051e-05, "loss": 1.1935, "step": 4660 }, { "epoch": 0.7030165912518853, "grad_norm": 1.9907866716384888, "learning_rate": 2.080568638991305e-05, "loss": 1.1646, "step": 4661 }, { "epoch": 0.7031674208144797, "grad_norm": 2.0433058738708496, "learning_rate": 2.078616100720134e-05, "loss": 1.2131, "step": 4662 }, { "epoch": 0.703318250377074, "grad_norm": 1.8208763599395752, "learning_rate": 2.0766642386284724e-05, "loss": 0.8494, "step": 4663 }, { "epoch": 0.7034690799396682, "grad_norm": 1.831207275390625, "learning_rate": 2.0747130531680953e-05, "loss": 0.9344, "step": 4664 }, { "epoch": 0.7036199095022625, "grad_norm": 1.8730148077011108, "learning_rate": 2.0727625447906218e-05, "loss": 0.9057, "step": 4665 }, { "epoch": 0.7037707390648568, "grad_norm": 2.1487796306610107, "learning_rate": 2.070812713947513e-05, "loss": 1.4285, "step": 4666 }, { "epoch": 0.703921568627451, "grad_norm": 1.919661283493042, "learning_rate": 2.0688635610900747e-05, "loss": 1.1229, "step": 4667 }, { "epoch": 0.7040723981900453, "grad_norm": 1.784183382987976, "learning_rate": 2.0669150866694552e-05, "loss": 1.0167, "step": 4668 }, { "epoch": 0.7042232277526396, "grad_norm": 1.8138583898544312, "learning_rate": 2.0649672911366458e-05, "loss": 1.2976, "step": 4669 }, { "epoch": 0.7043740573152338, "grad_norm": 1.9801744222640991, "learning_rate": 2.0630201749424798e-05, "loss": 1.2006, "step": 4670 }, { "epoch": 0.7045248868778281, "grad_norm": 1.9677213430404663, "learning_rate": 2.061073738537635e-05, "loss": 1.176, "step": 4671 }, { "epoch": 0.7046757164404224, "grad_norm": 1.9266053438186646, "learning_rate": 2.05912798237263e-05, "loss": 1.2549, "step": 4672 }, { "epoch": 0.7048265460030166, "grad_norm": 1.920556664466858, "learning_rate": 2.0571829068978278e-05, "loss": 1.4388, "step": 4673 }, { "epoch": 0.7049773755656109, "grad_norm": 2.087610960006714, "learning_rate": 2.055238512563432e-05, "loss": 1.1488, "step": 4674 }, { "epoch": 0.7051282051282052, "grad_norm": 2.061352491378784, "learning_rate": 2.05329479981949e-05, "loss": 1.1935, "step": 4675 }, { "epoch": 0.7052790346907994, "grad_norm": 1.6803497076034546, "learning_rate": 2.051351769115891e-05, "loss": 0.9432, "step": 4676 }, { "epoch": 0.7054298642533937, "grad_norm": 1.9175409078598022, "learning_rate": 2.0494094209023656e-05, "loss": 1.0574, "step": 4677 }, { "epoch": 0.705580693815988, "grad_norm": 1.7252395153045654, "learning_rate": 2.0474677556284876e-05, "loss": 0.8085, "step": 4678 }, { "epoch": 0.7057315233785822, "grad_norm": 1.9746429920196533, "learning_rate": 2.0455267737436723e-05, "loss": 1.0218, "step": 4679 }, { "epoch": 0.7058823529411765, "grad_norm": 1.905798316001892, "learning_rate": 2.043586475697178e-05, "loss": 1.1949, "step": 4680 }, { "epoch": 0.7060331825037708, "grad_norm": 1.9092507362365723, "learning_rate": 2.0416468619381e-05, "loss": 0.9451, "step": 4681 }, { "epoch": 0.706184012066365, "grad_norm": 1.9139386415481567, "learning_rate": 2.0397079329153806e-05, "loss": 0.9195, "step": 4682 }, { "epoch": 0.7063348416289593, "grad_norm": 2.041433811187744, "learning_rate": 2.0377696890778013e-05, "loss": 1.1968, "step": 4683 }, { "epoch": 0.7064856711915536, "grad_norm": 1.538837194442749, "learning_rate": 2.035832130873986e-05, "loss": 0.6925, "step": 4684 }, { "epoch": 0.7066365007541479, "grad_norm": 2.2720346450805664, "learning_rate": 2.0338952587523985e-05, "loss": 1.0973, "step": 4685 }, { "epoch": 0.7067873303167421, "grad_norm": 2.2108120918273926, "learning_rate": 2.0319590731613446e-05, "loss": 1.3642, "step": 4686 }, { "epoch": 0.7069381598793364, "grad_norm": 1.961637020111084, "learning_rate": 2.030023574548972e-05, "loss": 1.1966, "step": 4687 }, { "epoch": 0.7070889894419307, "grad_norm": 1.84486985206604, "learning_rate": 2.0280887633632678e-05, "loss": 0.768, "step": 4688 }, { "epoch": 0.7072398190045249, "grad_norm": 2.294198751449585, "learning_rate": 2.0261546400520632e-05, "loss": 1.476, "step": 4689 }, { "epoch": 0.7073906485671192, "grad_norm": 1.8932732343673706, "learning_rate": 2.0242212050630233e-05, "loss": 1.0396, "step": 4690 }, { "epoch": 0.7075414781297135, "grad_norm": 1.73587167263031, "learning_rate": 2.0222884588436603e-05, "loss": 0.778, "step": 4691 }, { "epoch": 0.7076923076923077, "grad_norm": 2.0096917152404785, "learning_rate": 2.0203564018413255e-05, "loss": 1.1701, "step": 4692 }, { "epoch": 0.707843137254902, "grad_norm": 2.2010226249694824, "learning_rate": 2.01842503450321e-05, "loss": 1.1521, "step": 4693 }, { "epoch": 0.7079939668174963, "grad_norm": 2.4370157718658447, "learning_rate": 2.016494357276345e-05, "loss": 1.0608, "step": 4694 }, { "epoch": 0.7081447963800905, "grad_norm": 1.700277328491211, "learning_rate": 2.014564370607603e-05, "loss": 0.8609, "step": 4695 }, { "epoch": 0.7082956259426848, "grad_norm": 1.7533080577850342, "learning_rate": 2.0126350749436955e-05, "loss": 0.9032, "step": 4696 }, { "epoch": 0.7084464555052791, "grad_norm": 1.6027007102966309, "learning_rate": 2.010706470731175e-05, "loss": 0.7268, "step": 4697 }, { "epoch": 0.7085972850678733, "grad_norm": 2.0187020301818848, "learning_rate": 2.0087785584164332e-05, "loss": 1.0231, "step": 4698 }, { "epoch": 0.7087481146304676, "grad_norm": 2.212148666381836, "learning_rate": 2.0068513384457045e-05, "loss": 1.1862, "step": 4699 }, { "epoch": 0.7088989441930619, "grad_norm": 1.9510937929153442, "learning_rate": 2.0049248112650566e-05, "loss": 0.9656, "step": 4700 }, { "epoch": 0.7090497737556561, "grad_norm": 1.8830779790878296, "learning_rate": 2.0029989773204024e-05, "loss": 1.2589, "step": 4701 }, { "epoch": 0.7092006033182504, "grad_norm": 1.5967463254928589, "learning_rate": 2.0010738370574934e-05, "loss": 0.9066, "step": 4702 }, { "epoch": 0.7093514328808447, "grad_norm": 1.8664106130599976, "learning_rate": 1.999149390921919e-05, "loss": 1.1742, "step": 4703 }, { "epoch": 0.709502262443439, "grad_norm": 1.9601449966430664, "learning_rate": 1.99722563935911e-05, "loss": 1.4098, "step": 4704 }, { "epoch": 0.7096530920060332, "grad_norm": 2.063666582107544, "learning_rate": 1.995302582814334e-05, "loss": 0.9689, "step": 4705 }, { "epoch": 0.7098039215686275, "grad_norm": 1.879581093788147, "learning_rate": 1.9933802217327e-05, "loss": 1.0645, "step": 4706 }, { "epoch": 0.7099547511312218, "grad_norm": 2.230823040008545, "learning_rate": 1.9914585565591552e-05, "loss": 1.3748, "step": 4707 }, { "epoch": 0.710105580693816, "grad_norm": 1.8448981046676636, "learning_rate": 1.989537587738487e-05, "loss": 1.2598, "step": 4708 }, { "epoch": 0.7102564102564103, "grad_norm": 1.8379600048065186, "learning_rate": 1.987617315715316e-05, "loss": 1.1571, "step": 4709 }, { "epoch": 0.7104072398190046, "grad_norm": 1.7263849973678589, "learning_rate": 1.9856977409341086e-05, "loss": 1.0862, "step": 4710 }, { "epoch": 0.7105580693815988, "grad_norm": 1.8661818504333496, "learning_rate": 1.9837788638391662e-05, "loss": 1.1301, "step": 4711 }, { "epoch": 0.7107088989441931, "grad_norm": 2.108295202255249, "learning_rate": 1.9818606848746302e-05, "loss": 1.4039, "step": 4712 }, { "epoch": 0.7108597285067874, "grad_norm": 2.142303466796875, "learning_rate": 1.9799432044844784e-05, "loss": 1.4166, "step": 4713 }, { "epoch": 0.7110105580693816, "grad_norm": 1.8046369552612305, "learning_rate": 1.9780264231125295e-05, "loss": 1.0429, "step": 4714 }, { "epoch": 0.7111613876319759, "grad_norm": 2.0721189975738525, "learning_rate": 1.9761103412024386e-05, "loss": 1.4043, "step": 4715 }, { "epoch": 0.7113122171945702, "grad_norm": 2.270005464553833, "learning_rate": 1.9741949591976994e-05, "loss": 1.3961, "step": 4716 }, { "epoch": 0.7114630467571644, "grad_norm": 1.8235307931900024, "learning_rate": 1.9722802775416433e-05, "loss": 1.1985, "step": 4717 }, { "epoch": 0.7116138763197587, "grad_norm": 1.9048746824264526, "learning_rate": 1.97036629667744e-05, "loss": 0.9985, "step": 4718 }, { "epoch": 0.711764705882353, "grad_norm": 2.0765225887298584, "learning_rate": 1.9684530170480976e-05, "loss": 1.2987, "step": 4719 }, { "epoch": 0.7119155354449472, "grad_norm": 1.857518196105957, "learning_rate": 1.96654043909646e-05, "loss": 1.0164, "step": 4720 }, { "epoch": 0.7120663650075415, "grad_norm": 1.7040748596191406, "learning_rate": 1.9646285632652108e-05, "loss": 0.9366, "step": 4721 }, { "epoch": 0.7122171945701358, "grad_norm": 1.7884801626205444, "learning_rate": 1.962717389996869e-05, "loss": 0.97, "step": 4722 }, { "epoch": 0.71236802413273, "grad_norm": 1.6999517679214478, "learning_rate": 1.9608069197337943e-05, "loss": 0.8994, "step": 4723 }, { "epoch": 0.7125188536953243, "grad_norm": 2.0052361488342285, "learning_rate": 1.9588971529181793e-05, "loss": 1.177, "step": 4724 }, { "epoch": 0.7126696832579186, "grad_norm": 1.8715509176254272, "learning_rate": 1.9569880899920572e-05, "loss": 1.0375, "step": 4725 }, { "epoch": 0.7128205128205128, "grad_norm": 1.887596607208252, "learning_rate": 1.955079731397297e-05, "loss": 0.9737, "step": 4726 }, { "epoch": 0.7129713423831071, "grad_norm": 1.6888465881347656, "learning_rate": 1.9531720775756056e-05, "loss": 1.0485, "step": 4727 }, { "epoch": 0.7131221719457014, "grad_norm": 1.9778084754943848, "learning_rate": 1.9512651289685234e-05, "loss": 1.2368, "step": 4728 }, { "epoch": 0.7132730015082956, "grad_norm": 1.8924285173416138, "learning_rate": 1.949358886017432e-05, "loss": 1.0348, "step": 4729 }, { "epoch": 0.7134238310708899, "grad_norm": 1.9314289093017578, "learning_rate": 1.947453349163547e-05, "loss": 0.9969, "step": 4730 }, { "epoch": 0.7135746606334842, "grad_norm": 1.969964623451233, "learning_rate": 1.9455485188479216e-05, "loss": 1.1958, "step": 4731 }, { "epoch": 0.7137254901960784, "grad_norm": 1.9602099657058716, "learning_rate": 1.943644395511446e-05, "loss": 1.2372, "step": 4732 }, { "epoch": 0.7138763197586727, "grad_norm": 2.141599655151367, "learning_rate": 1.9417409795948444e-05, "loss": 1.1516, "step": 4733 }, { "epoch": 0.714027149321267, "grad_norm": 2.1001601219177246, "learning_rate": 1.9398382715386803e-05, "loss": 1.3957, "step": 4734 }, { "epoch": 0.7141779788838613, "grad_norm": 1.7423449754714966, "learning_rate": 1.937936271783351e-05, "loss": 0.7895, "step": 4735 }, { "epoch": 0.7143288084464555, "grad_norm": 1.8107064962387085, "learning_rate": 1.9360349807690932e-05, "loss": 1.0056, "step": 4736 }, { "epoch": 0.7144796380090498, "grad_norm": 1.8268147706985474, "learning_rate": 1.9341343989359732e-05, "loss": 0.9939, "step": 4737 }, { "epoch": 0.7146304675716441, "grad_norm": 2.1489553451538086, "learning_rate": 1.9322345267238982e-05, "loss": 1.2823, "step": 4738 }, { "epoch": 0.7147812971342383, "grad_norm": 1.7798676490783691, "learning_rate": 1.9303353645726118e-05, "loss": 1.0549, "step": 4739 }, { "epoch": 0.7149321266968326, "grad_norm": 2.1323862075805664, "learning_rate": 1.9284369129216895e-05, "loss": 0.9714, "step": 4740 }, { "epoch": 0.7150829562594269, "grad_norm": 2.2148053646087646, "learning_rate": 1.9265391722105453e-05, "loss": 1.1681, "step": 4741 }, { "epoch": 0.7152337858220211, "grad_norm": 2.6935877799987793, "learning_rate": 1.9246421428784277e-05, "loss": 1.5334, "step": 4742 }, { "epoch": 0.7153846153846154, "grad_norm": 2.1641807556152344, "learning_rate": 1.9227458253644203e-05, "loss": 1.0778, "step": 4743 }, { "epoch": 0.7155354449472097, "grad_norm": 2.0378687381744385, "learning_rate": 1.920850220107442e-05, "loss": 1.0659, "step": 4744 }, { "epoch": 0.7156862745098039, "grad_norm": 1.952075481414795, "learning_rate": 1.9189553275462492e-05, "loss": 1.0001, "step": 4745 }, { "epoch": 0.7158371040723982, "grad_norm": 2.0656321048736572, "learning_rate": 1.9170611481194273e-05, "loss": 1.1062, "step": 4746 }, { "epoch": 0.7159879336349925, "grad_norm": 1.7030006647109985, "learning_rate": 1.915167682265402e-05, "loss": 0.8154, "step": 4747 }, { "epoch": 0.7161387631975867, "grad_norm": 1.7365107536315918, "learning_rate": 1.9132749304224322e-05, "loss": 0.8383, "step": 4748 }, { "epoch": 0.716289592760181, "grad_norm": 1.619140863418579, "learning_rate": 1.9113828930286125e-05, "loss": 0.699, "step": 4749 }, { "epoch": 0.7164404223227753, "grad_norm": 1.6841168403625488, "learning_rate": 1.9094915705218714e-05, "loss": 0.7867, "step": 4750 }, { "epoch": 0.7165912518853695, "grad_norm": 1.9107561111450195, "learning_rate": 1.9076009633399704e-05, "loss": 1.4521, "step": 4751 }, { "epoch": 0.7167420814479638, "grad_norm": 2.07964825630188, "learning_rate": 1.905711071920508e-05, "loss": 1.3814, "step": 4752 }, { "epoch": 0.7168929110105581, "grad_norm": 1.8338582515716553, "learning_rate": 1.9038218967009153e-05, "loss": 1.0695, "step": 4753 }, { "epoch": 0.7170437405731523, "grad_norm": 1.8121578693389893, "learning_rate": 1.9019334381184585e-05, "loss": 1.1989, "step": 4754 }, { "epoch": 0.7171945701357466, "grad_norm": 1.9752155542373657, "learning_rate": 1.9000456966102393e-05, "loss": 1.3638, "step": 4755 }, { "epoch": 0.7173453996983409, "grad_norm": 1.7736616134643555, "learning_rate": 1.8981586726131882e-05, "loss": 1.058, "step": 4756 }, { "epoch": 0.7174962292609351, "grad_norm": 1.952149748802185, "learning_rate": 1.8962723665640754e-05, "loss": 1.1891, "step": 4757 }, { "epoch": 0.7176470588235294, "grad_norm": 1.837135910987854, "learning_rate": 1.8943867788995022e-05, "loss": 1.1117, "step": 4758 }, { "epoch": 0.7177978883861237, "grad_norm": 1.755486011505127, "learning_rate": 1.8925019100559038e-05, "loss": 0.9483, "step": 4759 }, { "epoch": 0.717948717948718, "grad_norm": 1.8415342569351196, "learning_rate": 1.8906177604695502e-05, "loss": 1.1491, "step": 4760 }, { "epoch": 0.7180995475113122, "grad_norm": 2.191547393798828, "learning_rate": 1.888734330576544e-05, "loss": 1.2659, "step": 4761 }, { "epoch": 0.7182503770739065, "grad_norm": 1.7941595315933228, "learning_rate": 1.8868516208128207e-05, "loss": 1.164, "step": 4762 }, { "epoch": 0.7184012066365008, "grad_norm": 2.156869649887085, "learning_rate": 1.8849696316141498e-05, "loss": 1.4915, "step": 4763 }, { "epoch": 0.718552036199095, "grad_norm": 1.7574915885925293, "learning_rate": 1.8830883634161346e-05, "loss": 0.9797, "step": 4764 }, { "epoch": 0.7187028657616893, "grad_norm": 1.960387110710144, "learning_rate": 1.8812078166542096e-05, "loss": 1.1661, "step": 4765 }, { "epoch": 0.7188536953242836, "grad_norm": 1.804801106452942, "learning_rate": 1.8793279917636448e-05, "loss": 1.1269, "step": 4766 }, { "epoch": 0.7190045248868778, "grad_norm": 1.4549438953399658, "learning_rate": 1.8774488891795415e-05, "loss": 0.7482, "step": 4767 }, { "epoch": 0.7191553544494721, "grad_norm": 2.167745590209961, "learning_rate": 1.875570509336834e-05, "loss": 1.164, "step": 4768 }, { "epoch": 0.7193061840120664, "grad_norm": 1.637089729309082, "learning_rate": 1.87369285267029e-05, "loss": 0.7873, "step": 4769 }, { "epoch": 0.7194570135746606, "grad_norm": 1.797652006149292, "learning_rate": 1.871815919614509e-05, "loss": 0.9256, "step": 4770 }, { "epoch": 0.7196078431372549, "grad_norm": 1.9391461610794067, "learning_rate": 1.8699397106039236e-05, "loss": 1.0956, "step": 4771 }, { "epoch": 0.7197586726998492, "grad_norm": 1.9888544082641602, "learning_rate": 1.8680642260727983e-05, "loss": 1.0665, "step": 4772 }, { "epoch": 0.7199095022624434, "grad_norm": 1.9632562398910522, "learning_rate": 1.8661894664552315e-05, "loss": 1.1297, "step": 4773 }, { "epoch": 0.7200603318250377, "grad_norm": 1.9895713329315186, "learning_rate": 1.8643154321851495e-05, "loss": 1.1555, "step": 4774 }, { "epoch": 0.720211161387632, "grad_norm": 1.938749074935913, "learning_rate": 1.8624421236963157e-05, "loss": 0.9858, "step": 4775 }, { "epoch": 0.7203619909502262, "grad_norm": 1.986496925354004, "learning_rate": 1.8605695414223235e-05, "loss": 1.299, "step": 4776 }, { "epoch": 0.7205128205128205, "grad_norm": 1.6421059370040894, "learning_rate": 1.8586976857965972e-05, "loss": 0.7973, "step": 4777 }, { "epoch": 0.7206636500754148, "grad_norm": 1.4433473348617554, "learning_rate": 1.8568265572523952e-05, "loss": 0.6636, "step": 4778 }, { "epoch": 0.720814479638009, "grad_norm": 1.7644453048706055, "learning_rate": 1.8549561562228056e-05, "loss": 0.8607, "step": 4779 }, { "epoch": 0.7209653092006033, "grad_norm": 2.094026803970337, "learning_rate": 1.853086483140749e-05, "loss": 1.1301, "step": 4780 }, { "epoch": 0.7211161387631976, "grad_norm": 1.8163297176361084, "learning_rate": 1.851217538438978e-05, "loss": 0.9565, "step": 4781 }, { "epoch": 0.7212669683257918, "grad_norm": 2.1483590602874756, "learning_rate": 1.8493493225500746e-05, "loss": 1.0173, "step": 4782 }, { "epoch": 0.7214177978883861, "grad_norm": 2.1042118072509766, "learning_rate": 1.8474818359064565e-05, "loss": 1.2059, "step": 4783 }, { "epoch": 0.7215686274509804, "grad_norm": 1.864789605140686, "learning_rate": 1.845615078940366e-05, "loss": 1.0341, "step": 4784 }, { "epoch": 0.7217194570135747, "grad_norm": 2.0685956478118896, "learning_rate": 1.843749052083881e-05, "loss": 1.2553, "step": 4785 }, { "epoch": 0.7218702865761689, "grad_norm": 2.186635732650757, "learning_rate": 1.84188375576891e-05, "loss": 1.2673, "step": 4786 }, { "epoch": 0.7220211161387632, "grad_norm": 1.9370559453964233, "learning_rate": 1.8400191904271923e-05, "loss": 1.1151, "step": 4787 }, { "epoch": 0.7221719457013575, "grad_norm": 2.049875020980835, "learning_rate": 1.8381553564902974e-05, "loss": 1.0245, "step": 4788 }, { "epoch": 0.7223227752639517, "grad_norm": 1.9033180475234985, "learning_rate": 1.8362922543896254e-05, "loss": 1.0084, "step": 4789 }, { "epoch": 0.722473604826546, "grad_norm": 2.051478624343872, "learning_rate": 1.8344298845564075e-05, "loss": 1.1533, "step": 4790 }, { "epoch": 0.7226244343891403, "grad_norm": 2.164081335067749, "learning_rate": 1.8325682474217054e-05, "loss": 1.1553, "step": 4791 }, { "epoch": 0.7227752639517345, "grad_norm": 1.6995793581008911, "learning_rate": 1.830707343416413e-05, "loss": 0.7408, "step": 4792 }, { "epoch": 0.7229260935143288, "grad_norm": 2.292006254196167, "learning_rate": 1.8288471729712487e-05, "loss": 1.3749, "step": 4793 }, { "epoch": 0.7230769230769231, "grad_norm": 2.1492135524749756, "learning_rate": 1.826987736516767e-05, "loss": 1.0909, "step": 4794 }, { "epoch": 0.7232277526395173, "grad_norm": 2.086118698120117, "learning_rate": 1.8251290344833498e-05, "loss": 1.2566, "step": 4795 }, { "epoch": 0.7233785822021116, "grad_norm": 1.630105972290039, "learning_rate": 1.8232710673012115e-05, "loss": 0.7052, "step": 4796 }, { "epoch": 0.7235294117647059, "grad_norm": 1.629761815071106, "learning_rate": 1.821413835400393e-05, "loss": 0.7525, "step": 4797 }, { "epoch": 0.7236802413273001, "grad_norm": 1.5539203882217407, "learning_rate": 1.8195573392107667e-05, "loss": 0.6668, "step": 4798 }, { "epoch": 0.7238310708898944, "grad_norm": 1.8216133117675781, "learning_rate": 1.8177015791620357e-05, "loss": 0.8638, "step": 4799 }, { "epoch": 0.7239819004524887, "grad_norm": 2.0074093341827393, "learning_rate": 1.8158465556837305e-05, "loss": 1.1121, "step": 4800 }, { "epoch": 0.7241327300150829, "grad_norm": 1.7724107503890991, "learning_rate": 1.8139922692052134e-05, "loss": 1.3952, "step": 4801 }, { "epoch": 0.7242835595776772, "grad_norm": 2.126448631286621, "learning_rate": 1.8121387201556762e-05, "loss": 1.4481, "step": 4802 }, { "epoch": 0.7244343891402715, "grad_norm": 1.8554126024246216, "learning_rate": 1.8102859089641348e-05, "loss": 1.2876, "step": 4803 }, { "epoch": 0.7245852187028657, "grad_norm": 1.7991535663604736, "learning_rate": 1.808433836059441e-05, "loss": 1.232, "step": 4804 }, { "epoch": 0.72473604826546, "grad_norm": 1.8884533643722534, "learning_rate": 1.8065825018702732e-05, "loss": 1.2576, "step": 4805 }, { "epoch": 0.7248868778280543, "grad_norm": 2.0248684883117676, "learning_rate": 1.804731906825137e-05, "loss": 1.1705, "step": 4806 }, { "epoch": 0.7250377073906485, "grad_norm": 1.8839731216430664, "learning_rate": 1.802882051352371e-05, "loss": 1.0923, "step": 4807 }, { "epoch": 0.7251885369532428, "grad_norm": 1.7766762971878052, "learning_rate": 1.801032935880138e-05, "loss": 0.9041, "step": 4808 }, { "epoch": 0.7253393665158371, "grad_norm": 1.7719634771347046, "learning_rate": 1.7991845608364334e-05, "loss": 1.016, "step": 4809 }, { "epoch": 0.7254901960784313, "grad_norm": 2.140211820602417, "learning_rate": 1.797336926649078e-05, "loss": 1.4523, "step": 4810 }, { "epoch": 0.7256410256410256, "grad_norm": 1.856956124305725, "learning_rate": 1.795490033745724e-05, "loss": 1.1116, "step": 4811 }, { "epoch": 0.7257918552036199, "grad_norm": 1.8282325267791748, "learning_rate": 1.7936438825538504e-05, "loss": 1.0742, "step": 4812 }, { "epoch": 0.7259426847662142, "grad_norm": 2.1738719940185547, "learning_rate": 1.7917984735007642e-05, "loss": 1.1572, "step": 4813 }, { "epoch": 0.7260935143288084, "grad_norm": 1.9374338388442993, "learning_rate": 1.789953807013602e-05, "loss": 1.097, "step": 4814 }, { "epoch": 0.7262443438914027, "grad_norm": 1.9549113512039185, "learning_rate": 1.788109883519327e-05, "loss": 0.9847, "step": 4815 }, { "epoch": 0.726395173453997, "grad_norm": 1.676953911781311, "learning_rate": 1.7862667034447323e-05, "loss": 0.9071, "step": 4816 }, { "epoch": 0.7265460030165912, "grad_norm": 1.8052066564559937, "learning_rate": 1.784424267216437e-05, "loss": 0.9638, "step": 4817 }, { "epoch": 0.7266968325791855, "grad_norm": 1.5305002927780151, "learning_rate": 1.7825825752608882e-05, "loss": 0.7532, "step": 4818 }, { "epoch": 0.7268476621417798, "grad_norm": 1.7620089054107666, "learning_rate": 1.7807416280043633e-05, "loss": 1.0366, "step": 4819 }, { "epoch": 0.726998491704374, "grad_norm": 1.9162039756774902, "learning_rate": 1.778901425872966e-05, "loss": 0.9513, "step": 4820 }, { "epoch": 0.7271493212669683, "grad_norm": 2.1095848083496094, "learning_rate": 1.7770619692926227e-05, "loss": 1.1588, "step": 4821 }, { "epoch": 0.7273001508295626, "grad_norm": 2.0806643962860107, "learning_rate": 1.775223258689095e-05, "loss": 1.4605, "step": 4822 }, { "epoch": 0.7274509803921568, "grad_norm": 1.7466709613800049, "learning_rate": 1.773385294487967e-05, "loss": 0.9972, "step": 4823 }, { "epoch": 0.7276018099547511, "grad_norm": 1.932240605354309, "learning_rate": 1.771548077114652e-05, "loss": 1.0581, "step": 4824 }, { "epoch": 0.7277526395173454, "grad_norm": 1.8386212587356567, "learning_rate": 1.76971160699439e-05, "loss": 0.9207, "step": 4825 }, { "epoch": 0.7279034690799396, "grad_norm": 2.353241443634033, "learning_rate": 1.767875884552247e-05, "loss": 1.1636, "step": 4826 }, { "epoch": 0.7280542986425339, "grad_norm": 1.9590187072753906, "learning_rate": 1.766040910213117e-05, "loss": 1.1624, "step": 4827 }, { "epoch": 0.7282051282051282, "grad_norm": 1.8263847827911377, "learning_rate": 1.7642066844017214e-05, "loss": 0.9605, "step": 4828 }, { "epoch": 0.7283559577677224, "grad_norm": 1.8221766948699951, "learning_rate": 1.7623732075426074e-05, "loss": 1.086, "step": 4829 }, { "epoch": 0.7285067873303167, "grad_norm": 2.0725719928741455, "learning_rate": 1.7605404800601498e-05, "loss": 1.3137, "step": 4830 }, { "epoch": 0.728657616892911, "grad_norm": 1.8284882307052612, "learning_rate": 1.758708502378547e-05, "loss": 1.0235, "step": 4831 }, { "epoch": 0.7288084464555052, "grad_norm": 1.9570002555847168, "learning_rate": 1.756877274921827e-05, "loss": 1.0599, "step": 4832 }, { "epoch": 0.7289592760180995, "grad_norm": 1.9772818088531494, "learning_rate": 1.755046798113843e-05, "loss": 1.0823, "step": 4833 }, { "epoch": 0.7291101055806938, "grad_norm": 2.2867209911346436, "learning_rate": 1.7532170723782754e-05, "loss": 1.2958, "step": 4834 }, { "epoch": 0.729260935143288, "grad_norm": 2.10429310798645, "learning_rate": 1.75138809813863e-05, "loss": 1.4339, "step": 4835 }, { "epoch": 0.7294117647058823, "grad_norm": 1.722807765007019, "learning_rate": 1.749559875818238e-05, "loss": 0.9093, "step": 4836 }, { "epoch": 0.7295625942684766, "grad_norm": 2.129967212677002, "learning_rate": 1.747732405840257e-05, "loss": 1.4409, "step": 4837 }, { "epoch": 0.7297134238310709, "grad_norm": 2.1248631477355957, "learning_rate": 1.745905688627672e-05, "loss": 1.1785, "step": 4838 }, { "epoch": 0.7298642533936651, "grad_norm": 2.1706814765930176, "learning_rate": 1.744079724603293e-05, "loss": 1.4719, "step": 4839 }, { "epoch": 0.7300150829562594, "grad_norm": 2.163557767868042, "learning_rate": 1.7422545141897522e-05, "loss": 1.163, "step": 4840 }, { "epoch": 0.7301659125188537, "grad_norm": 2.0990960597991943, "learning_rate": 1.7404300578095123e-05, "loss": 1.0487, "step": 4841 }, { "epoch": 0.7303167420814479, "grad_norm": 2.1790239810943604, "learning_rate": 1.7386063558848586e-05, "loss": 1.1078, "step": 4842 }, { "epoch": 0.7304675716440422, "grad_norm": 2.124635934829712, "learning_rate": 1.736783408837903e-05, "loss": 1.2208, "step": 4843 }, { "epoch": 0.7306184012066365, "grad_norm": 2.131441831588745, "learning_rate": 1.734961217090583e-05, "loss": 1.2728, "step": 4844 }, { "epoch": 0.7307692307692307, "grad_norm": 1.8008496761322021, "learning_rate": 1.73313978106466e-05, "loss": 0.8915, "step": 4845 }, { "epoch": 0.730920060331825, "grad_norm": 1.5782508850097656, "learning_rate": 1.7313191011817208e-05, "loss": 0.767, "step": 4846 }, { "epoch": 0.7310708898944193, "grad_norm": 1.2700613737106323, "learning_rate": 1.729499177863178e-05, "loss": 0.4994, "step": 4847 }, { "epoch": 0.7312217194570135, "grad_norm": 1.7661645412445068, "learning_rate": 1.72768001153027e-05, "loss": 0.9226, "step": 4848 }, { "epoch": 0.7313725490196078, "grad_norm": 2.062297821044922, "learning_rate": 1.7258616026040552e-05, "loss": 1.1291, "step": 4849 }, { "epoch": 0.7315233785822021, "grad_norm": 2.1212666034698486, "learning_rate": 1.7240439515054218e-05, "loss": 1.1032, "step": 4850 }, { "epoch": 0.7316742081447963, "grad_norm": 1.759946584701538, "learning_rate": 1.7222270586550808e-05, "loss": 1.3952, "step": 4851 }, { "epoch": 0.7318250377073906, "grad_norm": 2.1994497776031494, "learning_rate": 1.7204109244735677e-05, "loss": 1.6379, "step": 4852 }, { "epoch": 0.7319758672699849, "grad_norm": 1.982363224029541, "learning_rate": 1.7185955493812424e-05, "loss": 1.4685, "step": 4853 }, { "epoch": 0.7321266968325791, "grad_norm": 1.8298766613006592, "learning_rate": 1.7167809337982893e-05, "loss": 1.1205, "step": 4854 }, { "epoch": 0.7322775263951734, "grad_norm": 1.937989592552185, "learning_rate": 1.7149670781447163e-05, "loss": 1.1978, "step": 4855 }, { "epoch": 0.7324283559577677, "grad_norm": 2.0450944900512695, "learning_rate": 1.713153982840357e-05, "loss": 1.4498, "step": 4856 }, { "epoch": 0.7325791855203619, "grad_norm": 2.0751776695251465, "learning_rate": 1.7113416483048668e-05, "loss": 1.2629, "step": 4857 }, { "epoch": 0.7327300150829562, "grad_norm": 2.0930843353271484, "learning_rate": 1.7095300749577265e-05, "loss": 1.382, "step": 4858 }, { "epoch": 0.7328808446455505, "grad_norm": 1.930856466293335, "learning_rate": 1.7077192632182408e-05, "loss": 1.2962, "step": 4859 }, { "epoch": 0.7330316742081447, "grad_norm": 1.9677740335464478, "learning_rate": 1.705909213505537e-05, "loss": 1.1352, "step": 4860 }, { "epoch": 0.733182503770739, "grad_norm": 1.7303441762924194, "learning_rate": 1.7040999262385676e-05, "loss": 0.9529, "step": 4861 }, { "epoch": 0.7333333333333333, "grad_norm": 1.9710752964019775, "learning_rate": 1.7022914018361073e-05, "loss": 1.1784, "step": 4862 }, { "epoch": 0.7334841628959275, "grad_norm": 1.7445967197418213, "learning_rate": 1.7004836407167547e-05, "loss": 1.0041, "step": 4863 }, { "epoch": 0.7336349924585218, "grad_norm": 1.8812105655670166, "learning_rate": 1.6986766432989316e-05, "loss": 1.0714, "step": 4864 }, { "epoch": 0.7337858220211161, "grad_norm": 2.2443578243255615, "learning_rate": 1.696870410000883e-05, "loss": 1.3777, "step": 4865 }, { "epoch": 0.7339366515837104, "grad_norm": 1.906666874885559, "learning_rate": 1.6950649412406777e-05, "loss": 1.0856, "step": 4866 }, { "epoch": 0.7340874811463046, "grad_norm": 1.8164887428283691, "learning_rate": 1.6932602374362084e-05, "loss": 1.0161, "step": 4867 }, { "epoch": 0.7342383107088989, "grad_norm": 1.6790679693222046, "learning_rate": 1.6914562990051863e-05, "loss": 0.8452, "step": 4868 }, { "epoch": 0.7343891402714933, "grad_norm": 2.04697585105896, "learning_rate": 1.6896531263651495e-05, "loss": 1.2111, "step": 4869 }, { "epoch": 0.7345399698340875, "grad_norm": 1.9416601657867432, "learning_rate": 1.687850719933458e-05, "loss": 1.0148, "step": 4870 }, { "epoch": 0.7346907993966818, "grad_norm": 1.8429254293441772, "learning_rate": 1.6860490801272956e-05, "loss": 1.198, "step": 4871 }, { "epoch": 0.7348416289592761, "grad_norm": 1.8190077543258667, "learning_rate": 1.684248207363665e-05, "loss": 0.9995, "step": 4872 }, { "epoch": 0.7349924585218703, "grad_norm": 2.133213520050049, "learning_rate": 1.6824481020593962e-05, "loss": 0.9788, "step": 4873 }, { "epoch": 0.7351432880844646, "grad_norm": 1.8261045217514038, "learning_rate": 1.6806487646311374e-05, "loss": 1.085, "step": 4874 }, { "epoch": 0.7352941176470589, "grad_norm": 1.8499606847763062, "learning_rate": 1.6788501954953618e-05, "loss": 1.1184, "step": 4875 }, { "epoch": 0.7354449472096531, "grad_norm": 1.775574803352356, "learning_rate": 1.677052395068365e-05, "loss": 0.9276, "step": 4876 }, { "epoch": 0.7355957767722474, "grad_norm": 1.558165431022644, "learning_rate": 1.67525536376626e-05, "loss": 0.7022, "step": 4877 }, { "epoch": 0.7357466063348417, "grad_norm": 1.891477108001709, "learning_rate": 1.6734591020049866e-05, "loss": 0.9208, "step": 4878 }, { "epoch": 0.735897435897436, "grad_norm": 1.895741581916809, "learning_rate": 1.671663610200306e-05, "loss": 0.9533, "step": 4879 }, { "epoch": 0.7360482654600302, "grad_norm": 2.3489160537719727, "learning_rate": 1.6698688887677993e-05, "loss": 1.368, "step": 4880 }, { "epoch": 0.7361990950226245, "grad_norm": 1.661695957183838, "learning_rate": 1.668074938122871e-05, "loss": 0.7946, "step": 4881 }, { "epoch": 0.7363499245852188, "grad_norm": 1.9571871757507324, "learning_rate": 1.6662817586807457e-05, "loss": 1.1332, "step": 4882 }, { "epoch": 0.736500754147813, "grad_norm": 1.7624675035476685, "learning_rate": 1.664489350856471e-05, "loss": 0.8498, "step": 4883 }, { "epoch": 0.7366515837104073, "grad_norm": 2.0866923332214355, "learning_rate": 1.662697715064915e-05, "loss": 1.2649, "step": 4884 }, { "epoch": 0.7368024132730016, "grad_norm": 1.979429841041565, "learning_rate": 1.6609068517207664e-05, "loss": 1.1418, "step": 4885 }, { "epoch": 0.7369532428355958, "grad_norm": 2.136944532394409, "learning_rate": 1.6591167612385388e-05, "loss": 1.2424, "step": 4886 }, { "epoch": 0.7371040723981901, "grad_norm": 2.1381490230560303, "learning_rate": 1.6573274440325604e-05, "loss": 1.0101, "step": 4887 }, { "epoch": 0.7372549019607844, "grad_norm": 1.8637332916259766, "learning_rate": 1.655538900516986e-05, "loss": 0.7729, "step": 4888 }, { "epoch": 0.7374057315233786, "grad_norm": 1.9317816495895386, "learning_rate": 1.653751131105788e-05, "loss": 1.0867, "step": 4889 }, { "epoch": 0.7375565610859729, "grad_norm": 2.1188855171203613, "learning_rate": 1.651964136212763e-05, "loss": 1.202, "step": 4890 }, { "epoch": 0.7377073906485672, "grad_norm": 2.3408398628234863, "learning_rate": 1.6501779162515252e-05, "loss": 1.4021, "step": 4891 }, { "epoch": 0.7378582202111614, "grad_norm": 2.330956220626831, "learning_rate": 1.648392471635511e-05, "loss": 1.2171, "step": 4892 }, { "epoch": 0.7380090497737557, "grad_norm": 2.376668930053711, "learning_rate": 1.6466078027779768e-05, "loss": 1.276, "step": 4893 }, { "epoch": 0.73815987933635, "grad_norm": 2.252420663833618, "learning_rate": 1.6448239100919998e-05, "loss": 1.0838, "step": 4894 }, { "epoch": 0.7383107088989442, "grad_norm": 2.440389394760132, "learning_rate": 1.643040793990479e-05, "loss": 1.2461, "step": 4895 }, { "epoch": 0.7384615384615385, "grad_norm": 2.010274887084961, "learning_rate": 1.6412584548861286e-05, "loss": 0.9812, "step": 4896 }, { "epoch": 0.7386123680241328, "grad_norm": 1.634930968284607, "learning_rate": 1.6394768931914885e-05, "loss": 0.6824, "step": 4897 }, { "epoch": 0.738763197586727, "grad_norm": 1.4546655416488647, "learning_rate": 1.637696109318915e-05, "loss": 0.6817, "step": 4898 }, { "epoch": 0.7389140271493213, "grad_norm": 1.6289114952087402, "learning_rate": 1.635916103680588e-05, "loss": 0.5859, "step": 4899 }, { "epoch": 0.7390648567119156, "grad_norm": 1.871317744255066, "learning_rate": 1.634136876688504e-05, "loss": 0.8462, "step": 4900 }, { "epoch": 0.7392156862745098, "grad_norm": 2.2586629390716553, "learning_rate": 1.6323584287544802e-05, "loss": 1.4082, "step": 4901 }, { "epoch": 0.7393665158371041, "grad_norm": 2.004472255706787, "learning_rate": 1.630580760290154e-05, "loss": 1.2294, "step": 4902 }, { "epoch": 0.7395173453996984, "grad_norm": 1.6189942359924316, "learning_rate": 1.6288038717069827e-05, "loss": 0.8801, "step": 4903 }, { "epoch": 0.7396681749622926, "grad_norm": 1.7926902770996094, "learning_rate": 1.6270277634162413e-05, "loss": 1.0646, "step": 4904 }, { "epoch": 0.7398190045248869, "grad_norm": 2.009446620941162, "learning_rate": 1.625252435829026e-05, "loss": 1.1965, "step": 4905 }, { "epoch": 0.7399698340874812, "grad_norm": 1.6455755233764648, "learning_rate": 1.6234778893562526e-05, "loss": 0.9883, "step": 4906 }, { "epoch": 0.7401206636500754, "grad_norm": 1.8315411806106567, "learning_rate": 1.621704124408654e-05, "loss": 1.0938, "step": 4907 }, { "epoch": 0.7402714932126697, "grad_norm": 1.4795883893966675, "learning_rate": 1.6199311413967833e-05, "loss": 0.6583, "step": 4908 }, { "epoch": 0.740422322775264, "grad_norm": 1.9206700325012207, "learning_rate": 1.6181589407310132e-05, "loss": 1.1233, "step": 4909 }, { "epoch": 0.7405731523378583, "grad_norm": 2.071958065032959, "learning_rate": 1.6163875228215353e-05, "loss": 1.2006, "step": 4910 }, { "epoch": 0.7407239819004525, "grad_norm": 2.000150203704834, "learning_rate": 1.6146168880783592e-05, "loss": 1.3674, "step": 4911 }, { "epoch": 0.7408748114630468, "grad_norm": 1.7523813247680664, "learning_rate": 1.612847036911313e-05, "loss": 1.0429, "step": 4912 }, { "epoch": 0.7410256410256411, "grad_norm": 2.21526837348938, "learning_rate": 1.6110779697300444e-05, "loss": 1.4381, "step": 4913 }, { "epoch": 0.7411764705882353, "grad_norm": 1.7881379127502441, "learning_rate": 1.609309686944021e-05, "loss": 1.0427, "step": 4914 }, { "epoch": 0.7413273001508296, "grad_norm": 1.9808193445205688, "learning_rate": 1.6075421889625243e-05, "loss": 1.1109, "step": 4915 }, { "epoch": 0.7414781297134239, "grad_norm": 1.7392592430114746, "learning_rate": 1.6057754761946575e-05, "loss": 0.927, "step": 4916 }, { "epoch": 0.7416289592760181, "grad_norm": 1.8602436780929565, "learning_rate": 1.6040095490493424e-05, "loss": 1.1138, "step": 4917 }, { "epoch": 0.7417797888386124, "grad_norm": 1.9554855823516846, "learning_rate": 1.602244407935317e-05, "loss": 1.068, "step": 4918 }, { "epoch": 0.7419306184012067, "grad_norm": 2.0433645248413086, "learning_rate": 1.6004800532611403e-05, "loss": 1.097, "step": 4919 }, { "epoch": 0.7420814479638009, "grad_norm": 1.9246633052825928, "learning_rate": 1.598716485435186e-05, "loss": 1.0844, "step": 4920 }, { "epoch": 0.7422322775263952, "grad_norm": 2.077012300491333, "learning_rate": 1.5969537048656474e-05, "loss": 1.2105, "step": 4921 }, { "epoch": 0.7423831070889895, "grad_norm": 1.8168002367019653, "learning_rate": 1.5951917119605354e-05, "loss": 1.0783, "step": 4922 }, { "epoch": 0.7425339366515837, "grad_norm": 1.8099409341812134, "learning_rate": 1.5934305071276794e-05, "loss": 0.8227, "step": 4923 }, { "epoch": 0.742684766214178, "grad_norm": 1.9858304262161255, "learning_rate": 1.5916700907747235e-05, "loss": 1.2052, "step": 4924 }, { "epoch": 0.7428355957767723, "grad_norm": 2.2734742164611816, "learning_rate": 1.5899104633091316e-05, "loss": 1.1724, "step": 4925 }, { "epoch": 0.7429864253393665, "grad_norm": 1.9134784936904907, "learning_rate": 1.5881516251381856e-05, "loss": 0.9968, "step": 4926 }, { "epoch": 0.7431372549019608, "grad_norm": 1.886278510093689, "learning_rate": 1.5863935766689837e-05, "loss": 1.0421, "step": 4927 }, { "epoch": 0.7432880844645551, "grad_norm": 1.7122905254364014, "learning_rate": 1.5846363183084406e-05, "loss": 0.9161, "step": 4928 }, { "epoch": 0.7434389140271493, "grad_norm": 2.1915364265441895, "learning_rate": 1.582879850463289e-05, "loss": 1.2031, "step": 4929 }, { "epoch": 0.7435897435897436, "grad_norm": 1.8439404964447021, "learning_rate": 1.5811241735400795e-05, "loss": 1.026, "step": 4930 }, { "epoch": 0.7437405731523379, "grad_norm": 2.3225557804107666, "learning_rate": 1.5793692879451784e-05, "loss": 1.1675, "step": 4931 }, { "epoch": 0.7438914027149321, "grad_norm": 1.7642433643341064, "learning_rate": 1.5776151940847683e-05, "loss": 0.9014, "step": 4932 }, { "epoch": 0.7440422322775264, "grad_norm": 1.9423654079437256, "learning_rate": 1.5758618923648515e-05, "loss": 0.9934, "step": 4933 }, { "epoch": 0.7441930618401207, "grad_norm": 2.103456974029541, "learning_rate": 1.5741093831912418e-05, "loss": 1.1893, "step": 4934 }, { "epoch": 0.744343891402715, "grad_norm": 2.110506057739258, "learning_rate": 1.5723576669695738e-05, "loss": 1.2146, "step": 4935 }, { "epoch": 0.7444947209653092, "grad_norm": 2.0451860427856445, "learning_rate": 1.5706067441052975e-05, "loss": 0.9598, "step": 4936 }, { "epoch": 0.7446455505279035, "grad_norm": 1.7031582593917847, "learning_rate": 1.5688566150036787e-05, "loss": 0.8535, "step": 4937 }, { "epoch": 0.7447963800904978, "grad_norm": 2.132134437561035, "learning_rate": 1.5671072800698005e-05, "loss": 1.0653, "step": 4938 }, { "epoch": 0.744947209653092, "grad_norm": 1.5440778732299805, "learning_rate": 1.5653587397085617e-05, "loss": 0.7815, "step": 4939 }, { "epoch": 0.7450980392156863, "grad_norm": 2.132903575897217, "learning_rate": 1.563610994324676e-05, "loss": 1.1432, "step": 4940 }, { "epoch": 0.7452488687782806, "grad_norm": 2.1139414310455322, "learning_rate": 1.5618640443226757e-05, "loss": 1.0026, "step": 4941 }, { "epoch": 0.7453996983408748, "grad_norm": 1.9735926389694214, "learning_rate": 1.560117890106908e-05, "loss": 0.8853, "step": 4942 }, { "epoch": 0.7455505279034691, "grad_norm": 1.8868249654769897, "learning_rate": 1.5583725320815317e-05, "loss": 0.8197, "step": 4943 }, { "epoch": 0.7457013574660634, "grad_norm": 2.272181987762451, "learning_rate": 1.5566279706505278e-05, "loss": 1.2424, "step": 4944 }, { "epoch": 0.7458521870286576, "grad_norm": 2.04722261428833, "learning_rate": 1.5548842062176895e-05, "loss": 0.8385, "step": 4945 }, { "epoch": 0.7460030165912519, "grad_norm": 1.779091715812683, "learning_rate": 1.5531412391866255e-05, "loss": 0.9462, "step": 4946 }, { "epoch": 0.7461538461538462, "grad_norm": 1.9990626573562622, "learning_rate": 1.5513990699607615e-05, "loss": 1.0861, "step": 4947 }, { "epoch": 0.7463046757164404, "grad_norm": 1.9089215993881226, "learning_rate": 1.5496576989433376e-05, "loss": 0.8622, "step": 4948 }, { "epoch": 0.7464555052790347, "grad_norm": 1.7954344749450684, "learning_rate": 1.5479171265374087e-05, "loss": 0.7592, "step": 4949 }, { "epoch": 0.746606334841629, "grad_norm": 1.7749454975128174, "learning_rate": 1.5461773531458457e-05, "loss": 0.8037, "step": 4950 }, { "epoch": 0.7467571644042232, "grad_norm": 1.7315545082092285, "learning_rate": 1.5444383791713335e-05, "loss": 1.1528, "step": 4951 }, { "epoch": 0.7469079939668175, "grad_norm": 2.0746207237243652, "learning_rate": 1.542700205016373e-05, "loss": 1.3107, "step": 4952 }, { "epoch": 0.7470588235294118, "grad_norm": 2.067880153656006, "learning_rate": 1.5409628310832796e-05, "loss": 1.1175, "step": 4953 }, { "epoch": 0.747209653092006, "grad_norm": 1.9108774662017822, "learning_rate": 1.539226257774184e-05, "loss": 1.1475, "step": 4954 }, { "epoch": 0.7473604826546003, "grad_norm": 2.0815038681030273, "learning_rate": 1.5374904854910305e-05, "loss": 1.3037, "step": 4955 }, { "epoch": 0.7475113122171946, "grad_norm": 2.060472011566162, "learning_rate": 1.5357555146355784e-05, "loss": 1.2787, "step": 4956 }, { "epoch": 0.7476621417797888, "grad_norm": 1.9154640436172485, "learning_rate": 1.5340213456094016e-05, "loss": 1.0715, "step": 4957 }, { "epoch": 0.7478129713423831, "grad_norm": 1.5503606796264648, "learning_rate": 1.5322879788138893e-05, "loss": 0.7132, "step": 4958 }, { "epoch": 0.7479638009049774, "grad_norm": 1.9811079502105713, "learning_rate": 1.5305554146502438e-05, "loss": 1.0692, "step": 4959 }, { "epoch": 0.7481146304675717, "grad_norm": 1.8908621072769165, "learning_rate": 1.5288236535194816e-05, "loss": 1.0602, "step": 4960 }, { "epoch": 0.7482654600301659, "grad_norm": 2.0025556087493896, "learning_rate": 1.5270926958224362e-05, "loss": 1.4586, "step": 4961 }, { "epoch": 0.7484162895927602, "grad_norm": 1.8453936576843262, "learning_rate": 1.5253625419597483e-05, "loss": 1.1406, "step": 4962 }, { "epoch": 0.7485671191553545, "grad_norm": 2.027070999145508, "learning_rate": 1.5236331923318798e-05, "loss": 1.1324, "step": 4963 }, { "epoch": 0.7487179487179487, "grad_norm": 2.0184102058410645, "learning_rate": 1.521904647339103e-05, "loss": 1.2107, "step": 4964 }, { "epoch": 0.748868778280543, "grad_norm": 1.851191520690918, "learning_rate": 1.5201769073815048e-05, "loss": 1.0877, "step": 4965 }, { "epoch": 0.7490196078431373, "grad_norm": 1.7799954414367676, "learning_rate": 1.5184499728589846e-05, "loss": 0.9289, "step": 4966 }, { "epoch": 0.7491704374057315, "grad_norm": 2.210563898086548, "learning_rate": 1.5167238441712573e-05, "loss": 1.2192, "step": 4967 }, { "epoch": 0.7493212669683258, "grad_norm": 2.1202516555786133, "learning_rate": 1.5149985217178504e-05, "loss": 1.1629, "step": 4968 }, { "epoch": 0.7494720965309201, "grad_norm": 1.795179009437561, "learning_rate": 1.5132740058981038e-05, "loss": 0.9205, "step": 4969 }, { "epoch": 0.7496229260935143, "grad_norm": 1.7705212831497192, "learning_rate": 1.5115502971111734e-05, "loss": 1.033, "step": 4970 }, { "epoch": 0.7497737556561086, "grad_norm": 2.00350022315979, "learning_rate": 1.5098273957560239e-05, "loss": 1.1486, "step": 4971 }, { "epoch": 0.7499245852187029, "grad_norm": 1.9059998989105225, "learning_rate": 1.5081053022314368e-05, "loss": 1.0549, "step": 4972 }, { "epoch": 0.7500754147812971, "grad_norm": 1.6564347743988037, "learning_rate": 1.5063840169360054e-05, "loss": 0.861, "step": 4973 }, { "epoch": 0.7502262443438914, "grad_norm": 1.5233951807022095, "learning_rate": 1.5046635402681364e-05, "loss": 0.6739, "step": 4974 }, { "epoch": 0.7503770739064857, "grad_norm": 1.7979446649551392, "learning_rate": 1.5029438726260487e-05, "loss": 0.8687, "step": 4975 }, { "epoch": 0.7505279034690799, "grad_norm": 1.9140712022781372, "learning_rate": 1.5012250144077738e-05, "loss": 0.9986, "step": 4976 }, { "epoch": 0.7506787330316742, "grad_norm": 1.9370784759521484, "learning_rate": 1.4995069660111577e-05, "loss": 1.039, "step": 4977 }, { "epoch": 0.7508295625942685, "grad_norm": 1.8144028186798096, "learning_rate": 1.497789727833856e-05, "loss": 0.9378, "step": 4978 }, { "epoch": 0.7509803921568627, "grad_norm": 1.8304857015609741, "learning_rate": 1.4960733002733407e-05, "loss": 0.8561, "step": 4979 }, { "epoch": 0.751131221719457, "grad_norm": 1.8856070041656494, "learning_rate": 1.4943576837268897e-05, "loss": 1.0029, "step": 4980 }, { "epoch": 0.7512820512820513, "grad_norm": 1.8032786846160889, "learning_rate": 1.4926428785916003e-05, "loss": 0.9704, "step": 4981 }, { "epoch": 0.7514328808446455, "grad_norm": 1.7411922216415405, "learning_rate": 1.4909288852643777e-05, "loss": 0.9161, "step": 4982 }, { "epoch": 0.7515837104072398, "grad_norm": 1.5837706327438354, "learning_rate": 1.4892157041419408e-05, "loss": 0.8035, "step": 4983 }, { "epoch": 0.7517345399698341, "grad_norm": 2.195920944213867, "learning_rate": 1.4875033356208206e-05, "loss": 1.3502, "step": 4984 }, { "epoch": 0.7518853695324283, "grad_norm": 2.0343034267425537, "learning_rate": 1.4857917800973591e-05, "loss": 1.2164, "step": 4985 }, { "epoch": 0.7520361990950226, "grad_norm": 1.7179968357086182, "learning_rate": 1.4840810379677105e-05, "loss": 0.8779, "step": 4986 }, { "epoch": 0.7521870286576169, "grad_norm": 2.0550124645233154, "learning_rate": 1.4823711096278408e-05, "loss": 1.1685, "step": 4987 }, { "epoch": 0.7523378582202112, "grad_norm": 2.318574905395508, "learning_rate": 1.4806619954735284e-05, "loss": 1.3482, "step": 4988 }, { "epoch": 0.7524886877828054, "grad_norm": 2.1781351566314697, "learning_rate": 1.4789536959003637e-05, "loss": 1.2167, "step": 4989 }, { "epoch": 0.7526395173453997, "grad_norm": 2.2642064094543457, "learning_rate": 1.4772462113037433e-05, "loss": 1.3261, "step": 4990 }, { "epoch": 0.752790346907994, "grad_norm": 1.7861729860305786, "learning_rate": 1.475539542078882e-05, "loss": 0.9119, "step": 4991 }, { "epoch": 0.7529411764705882, "grad_norm": 1.9445960521697998, "learning_rate": 1.4738336886208032e-05, "loss": 0.9696, "step": 4992 }, { "epoch": 0.7530920060331825, "grad_norm": 2.173095226287842, "learning_rate": 1.4721286513243405e-05, "loss": 1.2829, "step": 4993 }, { "epoch": 0.7532428355957768, "grad_norm": 2.0386602878570557, "learning_rate": 1.4704244305841397e-05, "loss": 1.1252, "step": 4994 }, { "epoch": 0.753393665158371, "grad_norm": 1.8552398681640625, "learning_rate": 1.4687210267946577e-05, "loss": 0.9301, "step": 4995 }, { "epoch": 0.7535444947209653, "grad_norm": 1.685457706451416, "learning_rate": 1.4670184403501624e-05, "loss": 0.7606, "step": 4996 }, { "epoch": 0.7536953242835596, "grad_norm": 2.091202735900879, "learning_rate": 1.4653166716447314e-05, "loss": 0.8093, "step": 4997 }, { "epoch": 0.7538461538461538, "grad_norm": 1.4462225437164307, "learning_rate": 1.463615721072254e-05, "loss": 0.5504, "step": 4998 }, { "epoch": 0.7539969834087481, "grad_norm": 1.6906861066818237, "learning_rate": 1.4619155890264297e-05, "loss": 0.6236, "step": 4999 }, { "epoch": 0.7541478129713424, "grad_norm": 2.3006789684295654, "learning_rate": 1.4602162759007693e-05, "loss": 1.1507, "step": 5000 }, { "epoch": 0.7542986425339366, "grad_norm": 1.8798370361328125, "learning_rate": 1.4585177820885925e-05, "loss": 1.1631, "step": 5001 }, { "epoch": 0.7544494720965309, "grad_norm": 1.4941554069519043, "learning_rate": 1.4568201079830312e-05, "loss": 0.6717, "step": 5002 }, { "epoch": 0.7546003016591252, "grad_norm": 1.7841168642044067, "learning_rate": 1.4551232539770272e-05, "loss": 0.8939, "step": 5003 }, { "epoch": 0.7547511312217194, "grad_norm": 1.810080885887146, "learning_rate": 1.4534272204633303e-05, "loss": 0.9581, "step": 5004 }, { "epoch": 0.7549019607843137, "grad_norm": 1.7913134098052979, "learning_rate": 1.4517320078345037e-05, "loss": 1.2099, "step": 5005 }, { "epoch": 0.755052790346908, "grad_norm": 1.7774066925048828, "learning_rate": 1.4500376164829176e-05, "loss": 0.9375, "step": 5006 }, { "epoch": 0.7552036199095022, "grad_norm": 1.837273120880127, "learning_rate": 1.4483440468007564e-05, "loss": 1.097, "step": 5007 }, { "epoch": 0.7553544494720965, "grad_norm": 2.005321502685547, "learning_rate": 1.4466512991800074e-05, "loss": 1.1743, "step": 5008 }, { "epoch": 0.7555052790346908, "grad_norm": 2.0340325832366943, "learning_rate": 1.444959374012474e-05, "loss": 1.2474, "step": 5009 }, { "epoch": 0.755656108597285, "grad_norm": 1.947788953781128, "learning_rate": 1.443268271689766e-05, "loss": 1.1259, "step": 5010 }, { "epoch": 0.7558069381598793, "grad_norm": 1.8164417743682861, "learning_rate": 1.4415779926033041e-05, "loss": 1.1528, "step": 5011 }, { "epoch": 0.7559577677224736, "grad_norm": 1.9596507549285889, "learning_rate": 1.4398885371443178e-05, "loss": 1.0832, "step": 5012 }, { "epoch": 0.7561085972850679, "grad_norm": 2.0223069190979004, "learning_rate": 1.438199905703846e-05, "loss": 1.0417, "step": 5013 }, { "epoch": 0.7562594268476621, "grad_norm": 1.8023712635040283, "learning_rate": 1.4365120986727376e-05, "loss": 0.9072, "step": 5014 }, { "epoch": 0.7564102564102564, "grad_norm": 2.186389207839966, "learning_rate": 1.4348251164416494e-05, "loss": 1.2136, "step": 5015 }, { "epoch": 0.7565610859728507, "grad_norm": 1.7082821130752563, "learning_rate": 1.4331389594010485e-05, "loss": 0.9077, "step": 5016 }, { "epoch": 0.7567119155354449, "grad_norm": 1.7732874155044556, "learning_rate": 1.4314536279412122e-05, "loss": 1.1596, "step": 5017 }, { "epoch": 0.7568627450980392, "grad_norm": 2.118908166885376, "learning_rate": 1.4297691224522214e-05, "loss": 1.2949, "step": 5018 }, { "epoch": 0.7570135746606335, "grad_norm": 1.6402966976165771, "learning_rate": 1.428085443323971e-05, "loss": 0.6548, "step": 5019 }, { "epoch": 0.7571644042232277, "grad_norm": 1.8856145143508911, "learning_rate": 1.4264025909461631e-05, "loss": 1.0485, "step": 5020 }, { "epoch": 0.757315233785822, "grad_norm": 2.139296770095825, "learning_rate": 1.4247205657083085e-05, "loss": 1.166, "step": 5021 }, { "epoch": 0.7574660633484163, "grad_norm": 1.769866943359375, "learning_rate": 1.4230393679997267e-05, "loss": 0.9358, "step": 5022 }, { "epoch": 0.7576168929110105, "grad_norm": 1.7914714813232422, "learning_rate": 1.4213589982095455e-05, "loss": 0.9166, "step": 5023 }, { "epoch": 0.7577677224736048, "grad_norm": 1.8545969724655151, "learning_rate": 1.4196794567267002e-05, "loss": 1.0589, "step": 5024 }, { "epoch": 0.7579185520361991, "grad_norm": 2.086965799331665, "learning_rate": 1.4180007439399362e-05, "loss": 1.2589, "step": 5025 }, { "epoch": 0.7580693815987933, "grad_norm": 1.8343355655670166, "learning_rate": 1.4163228602378065e-05, "loss": 1.0902, "step": 5026 }, { "epoch": 0.7582202111613876, "grad_norm": 1.6478501558303833, "learning_rate": 1.4146458060086692e-05, "loss": 0.7811, "step": 5027 }, { "epoch": 0.7583710407239819, "grad_norm": 1.6302605867385864, "learning_rate": 1.4129695816406946e-05, "loss": 0.7799, "step": 5028 }, { "epoch": 0.7585218702865761, "grad_norm": 2.0075368881225586, "learning_rate": 1.4112941875218594e-05, "loss": 0.9281, "step": 5029 }, { "epoch": 0.7586726998491704, "grad_norm": 1.8992165327072144, "learning_rate": 1.4096196240399478e-05, "loss": 0.9868, "step": 5030 }, { "epoch": 0.7588235294117647, "grad_norm": 1.9664273262023926, "learning_rate": 1.4079458915825522e-05, "loss": 1.1992, "step": 5031 }, { "epoch": 0.7589743589743589, "grad_norm": 1.7188866138458252, "learning_rate": 1.406272990537072e-05, "loss": 0.9841, "step": 5032 }, { "epoch": 0.7591251885369532, "grad_norm": 1.8893064260482788, "learning_rate": 1.4046009212907153e-05, "loss": 0.8785, "step": 5033 }, { "epoch": 0.7592760180995475, "grad_norm": 1.910568356513977, "learning_rate": 1.4029296842304957e-05, "loss": 1.0003, "step": 5034 }, { "epoch": 0.7594268476621417, "grad_norm": 1.9350330829620361, "learning_rate": 1.4012592797432384e-05, "loss": 0.918, "step": 5035 }, { "epoch": 0.759577677224736, "grad_norm": 1.8524309396743774, "learning_rate": 1.399589708215569e-05, "loss": 0.9575, "step": 5036 }, { "epoch": 0.7597285067873303, "grad_norm": 2.175436496734619, "learning_rate": 1.3979209700339258e-05, "loss": 1.248, "step": 5037 }, { "epoch": 0.7598793363499246, "grad_norm": 1.9905592203140259, "learning_rate": 1.396253065584553e-05, "loss": 0.9995, "step": 5038 }, { "epoch": 0.7600301659125188, "grad_norm": 2.0819664001464844, "learning_rate": 1.3945859952535012e-05, "loss": 1.1658, "step": 5039 }, { "epoch": 0.7601809954751131, "grad_norm": 1.9747505187988281, "learning_rate": 1.392919759426628e-05, "loss": 0.9263, "step": 5040 }, { "epoch": 0.7603318250377074, "grad_norm": 2.064882755279541, "learning_rate": 1.3912543584895988e-05, "loss": 1.0606, "step": 5041 }, { "epoch": 0.7604826546003016, "grad_norm": 2.079087972640991, "learning_rate": 1.3895897928278845e-05, "loss": 1.0263, "step": 5042 }, { "epoch": 0.7606334841628959, "grad_norm": 2.1950886249542236, "learning_rate": 1.3879260628267637e-05, "loss": 0.9906, "step": 5043 }, { "epoch": 0.7607843137254902, "grad_norm": 2.2908031940460205, "learning_rate": 1.38626316887132e-05, "loss": 1.2283, "step": 5044 }, { "epoch": 0.7609351432880844, "grad_norm": 1.8470714092254639, "learning_rate": 1.3846011113464452e-05, "loss": 0.8819, "step": 5045 }, { "epoch": 0.7610859728506787, "grad_norm": 1.8598811626434326, "learning_rate": 1.3829398906368374e-05, "loss": 0.8302, "step": 5046 }, { "epoch": 0.761236802413273, "grad_norm": 2.3393940925598145, "learning_rate": 1.3812795071269997e-05, "loss": 1.0829, "step": 5047 }, { "epoch": 0.7613876319758672, "grad_norm": 1.8763322830200195, "learning_rate": 1.3796199612012423e-05, "loss": 0.7426, "step": 5048 }, { "epoch": 0.7615384615384615, "grad_norm": 1.9812830686569214, "learning_rate": 1.3779612532436814e-05, "loss": 1.0687, "step": 5049 }, { "epoch": 0.7616892911010558, "grad_norm": 1.814353346824646, "learning_rate": 1.3763033836382394e-05, "loss": 1.0351, "step": 5050 }, { "epoch": 0.76184012066365, "grad_norm": 2.108339548110962, "learning_rate": 1.3746463527686442e-05, "loss": 1.7117, "step": 5051 }, { "epoch": 0.7619909502262443, "grad_norm": 1.7234351634979248, "learning_rate": 1.3729901610184309e-05, "loss": 0.9871, "step": 5052 }, { "epoch": 0.7621417797888386, "grad_norm": 2.0552351474761963, "learning_rate": 1.3713348087709382e-05, "loss": 1.3627, "step": 5053 }, { "epoch": 0.7622926093514328, "grad_norm": 1.8986037969589233, "learning_rate": 1.3696802964093142e-05, "loss": 1.0656, "step": 5054 }, { "epoch": 0.7624434389140271, "grad_norm": 2.4476871490478516, "learning_rate": 1.3680266243165057e-05, "loss": 1.7817, "step": 5055 }, { "epoch": 0.7625942684766214, "grad_norm": 1.8422142267227173, "learning_rate": 1.3663737928752723e-05, "loss": 1.2418, "step": 5056 }, { "epoch": 0.7627450980392156, "grad_norm": 2.000885248184204, "learning_rate": 1.3647218024681752e-05, "loss": 1.1937, "step": 5057 }, { "epoch": 0.7628959276018099, "grad_norm": 1.7627336978912354, "learning_rate": 1.3630706534775823e-05, "loss": 0.8834, "step": 5058 }, { "epoch": 0.7630467571644042, "grad_norm": 2.120499849319458, "learning_rate": 1.3614203462856655e-05, "loss": 1.2397, "step": 5059 }, { "epoch": 0.7631975867269984, "grad_norm": 1.855229139328003, "learning_rate": 1.3597708812744036e-05, "loss": 1.0852, "step": 5060 }, { "epoch": 0.7633484162895927, "grad_norm": 2.0069210529327393, "learning_rate": 1.3581222588255793e-05, "loss": 1.3977, "step": 5061 }, { "epoch": 0.763499245852187, "grad_norm": 1.7074490785598755, "learning_rate": 1.3564744793207806e-05, "loss": 1.0396, "step": 5062 }, { "epoch": 0.7636500754147812, "grad_norm": 2.266019582748413, "learning_rate": 1.354827543141401e-05, "loss": 1.4181, "step": 5063 }, { "epoch": 0.7638009049773755, "grad_norm": 1.8110193014144897, "learning_rate": 1.3531814506686357e-05, "loss": 1.0995, "step": 5064 }, { "epoch": 0.7639517345399698, "grad_norm": 2.1053028106689453, "learning_rate": 1.3515362022834882e-05, "loss": 1.2769, "step": 5065 }, { "epoch": 0.764102564102564, "grad_norm": 1.8450783491134644, "learning_rate": 1.3498917983667658e-05, "loss": 0.9687, "step": 5066 }, { "epoch": 0.7642533936651583, "grad_norm": 1.9476534128189087, "learning_rate": 1.3482482392990798e-05, "loss": 1.2028, "step": 5067 }, { "epoch": 0.7644042232277526, "grad_norm": 1.8603235483169556, "learning_rate": 1.3466055254608461e-05, "loss": 1.0116, "step": 5068 }, { "epoch": 0.7645550527903469, "grad_norm": 1.7028840780258179, "learning_rate": 1.3449636572322855e-05, "loss": 0.8619, "step": 5069 }, { "epoch": 0.7647058823529411, "grad_norm": 1.7645436525344849, "learning_rate": 1.3433226349934213e-05, "loss": 1.0391, "step": 5070 }, { "epoch": 0.7648567119155354, "grad_norm": 2.318783760070801, "learning_rate": 1.3416824591240834e-05, "loss": 1.4558, "step": 5071 }, { "epoch": 0.7650075414781297, "grad_norm": 2.1994388103485107, "learning_rate": 1.3400431300039046e-05, "loss": 1.2295, "step": 5072 }, { "epoch": 0.7651583710407239, "grad_norm": 2.319474697113037, "learning_rate": 1.3384046480123225e-05, "loss": 1.3831, "step": 5073 }, { "epoch": 0.7653092006033182, "grad_norm": 1.6514859199523926, "learning_rate": 1.336767013528576e-05, "loss": 0.8797, "step": 5074 }, { "epoch": 0.7654600301659125, "grad_norm": 1.7821881771087646, "learning_rate": 1.3351302269317101e-05, "loss": 0.9274, "step": 5075 }, { "epoch": 0.7656108597285067, "grad_norm": 1.8379124402999878, "learning_rate": 1.3334942886005736e-05, "loss": 1.0148, "step": 5076 }, { "epoch": 0.7657616892911011, "grad_norm": 2.0166919231414795, "learning_rate": 1.3318591989138184e-05, "loss": 1.444, "step": 5077 }, { "epoch": 0.7659125188536954, "grad_norm": 1.9524749517440796, "learning_rate": 1.330224958249901e-05, "loss": 1.2016, "step": 5078 }, { "epoch": 0.7660633484162896, "grad_norm": 2.0518953800201416, "learning_rate": 1.3285915669870796e-05, "loss": 1.233, "step": 5079 }, { "epoch": 0.7662141779788839, "grad_norm": 1.5783653259277344, "learning_rate": 1.3269590255034165e-05, "loss": 0.6895, "step": 5080 }, { "epoch": 0.7663650075414782, "grad_norm": 2.0429208278656006, "learning_rate": 1.3253273341767786e-05, "loss": 1.2179, "step": 5081 }, { "epoch": 0.7665158371040725, "grad_norm": 1.7753398418426514, "learning_rate": 1.3236964933848356e-05, "loss": 0.9931, "step": 5082 }, { "epoch": 0.7666666666666667, "grad_norm": 1.64165198802948, "learning_rate": 1.3220665035050567e-05, "loss": 0.6641, "step": 5083 }, { "epoch": 0.766817496229261, "grad_norm": 1.961643934249878, "learning_rate": 1.3204373649147184e-05, "loss": 0.9943, "step": 5084 }, { "epoch": 0.7669683257918553, "grad_norm": 2.047258138656616, "learning_rate": 1.3188090779908996e-05, "loss": 1.2054, "step": 5085 }, { "epoch": 0.7671191553544495, "grad_norm": 1.874069094657898, "learning_rate": 1.3171816431104812e-05, "loss": 0.9794, "step": 5086 }, { "epoch": 0.7672699849170438, "grad_norm": 2.0978949069976807, "learning_rate": 1.315555060650147e-05, "loss": 1.185, "step": 5087 }, { "epoch": 0.7674208144796381, "grad_norm": 1.9320833683013916, "learning_rate": 1.3139293309863831e-05, "loss": 1.1605, "step": 5088 }, { "epoch": 0.7675716440422323, "grad_norm": 2.2877955436706543, "learning_rate": 1.3123044544954788e-05, "loss": 1.1566, "step": 5089 }, { "epoch": 0.7677224736048266, "grad_norm": 2.1102240085601807, "learning_rate": 1.3106804315535266e-05, "loss": 0.9954, "step": 5090 }, { "epoch": 0.7678733031674209, "grad_norm": 1.7558175325393677, "learning_rate": 1.3090572625364194e-05, "loss": 0.8679, "step": 5091 }, { "epoch": 0.7680241327300151, "grad_norm": 2.1969473361968994, "learning_rate": 1.3074349478198544e-05, "loss": 1.2614, "step": 5092 }, { "epoch": 0.7681749622926094, "grad_norm": 2.1518924236297607, "learning_rate": 1.3058134877793299e-05, "loss": 1.272, "step": 5093 }, { "epoch": 0.7683257918552037, "grad_norm": 2.1715614795684814, "learning_rate": 1.3041928827901473e-05, "loss": 1.2892, "step": 5094 }, { "epoch": 0.7684766214177979, "grad_norm": 1.8471331596374512, "learning_rate": 1.30257313322741e-05, "loss": 0.8601, "step": 5095 }, { "epoch": 0.7686274509803922, "grad_norm": 1.9925836324691772, "learning_rate": 1.3009542394660217e-05, "loss": 1.0311, "step": 5096 }, { "epoch": 0.7687782805429865, "grad_norm": 1.8289563655853271, "learning_rate": 1.2993362018806904e-05, "loss": 0.7435, "step": 5097 }, { "epoch": 0.7689291101055807, "grad_norm": 1.9799333810806274, "learning_rate": 1.297719020845924e-05, "loss": 1.0162, "step": 5098 }, { "epoch": 0.769079939668175, "grad_norm": 1.5894358158111572, "learning_rate": 1.296102696736034e-05, "loss": 0.6095, "step": 5099 }, { "epoch": 0.7692307692307693, "grad_norm": 1.9894354343414307, "learning_rate": 1.294487229925132e-05, "loss": 1.0523, "step": 5100 }, { "epoch": 0.7693815987933635, "grad_norm": 1.9440562725067139, "learning_rate": 1.2928726207871333e-05, "loss": 1.2413, "step": 5101 }, { "epoch": 0.7695324283559578, "grad_norm": 1.8417720794677734, "learning_rate": 1.29125886969575e-05, "loss": 1.3039, "step": 5102 }, { "epoch": 0.7696832579185521, "grad_norm": 1.7194154262542725, "learning_rate": 1.2896459770245001e-05, "loss": 0.9685, "step": 5103 }, { "epoch": 0.7698340874811463, "grad_norm": 2.083836555480957, "learning_rate": 1.288033943146702e-05, "loss": 1.3279, "step": 5104 }, { "epoch": 0.7699849170437406, "grad_norm": 1.8559422492980957, "learning_rate": 1.2864227684354746e-05, "loss": 0.9471, "step": 5105 }, { "epoch": 0.7701357466063349, "grad_norm": 2.2788822650909424, "learning_rate": 1.2848124532637384e-05, "loss": 1.323, "step": 5106 }, { "epoch": 0.7702865761689291, "grad_norm": 2.2673707008361816, "learning_rate": 1.2832029980042142e-05, "loss": 1.563, "step": 5107 }, { "epoch": 0.7704374057315234, "grad_norm": 1.7314002513885498, "learning_rate": 1.2815944030294248e-05, "loss": 1.1353, "step": 5108 }, { "epoch": 0.7705882352941177, "grad_norm": 1.8258804082870483, "learning_rate": 1.2799866687116935e-05, "loss": 1.0111, "step": 5109 }, { "epoch": 0.770739064856712, "grad_norm": 1.612963318824768, "learning_rate": 1.2783797954231453e-05, "loss": 0.784, "step": 5110 }, { "epoch": 0.7708898944193062, "grad_norm": 1.7601886987686157, "learning_rate": 1.276773783535702e-05, "loss": 0.8698, "step": 5111 }, { "epoch": 0.7710407239819005, "grad_norm": 1.7188761234283447, "learning_rate": 1.2751686334210905e-05, "loss": 0.9293, "step": 5112 }, { "epoch": 0.7711915535444948, "grad_norm": 1.576468825340271, "learning_rate": 1.2735643454508367e-05, "loss": 0.7294, "step": 5113 }, { "epoch": 0.771342383107089, "grad_norm": 1.801650881767273, "learning_rate": 1.2719609199962668e-05, "loss": 1.1219, "step": 5114 }, { "epoch": 0.7714932126696833, "grad_norm": 1.6717381477355957, "learning_rate": 1.2703583574285072e-05, "loss": 0.7652, "step": 5115 }, { "epoch": 0.7716440422322776, "grad_norm": 1.8411163091659546, "learning_rate": 1.2687566581184856e-05, "loss": 1.1695, "step": 5116 }, { "epoch": 0.7717948717948718, "grad_norm": 2.0246384143829346, "learning_rate": 1.2671558224369284e-05, "loss": 1.0338, "step": 5117 }, { "epoch": 0.7719457013574661, "grad_norm": 1.9427626132965088, "learning_rate": 1.265555850754363e-05, "loss": 1.2385, "step": 5118 }, { "epoch": 0.7720965309200604, "grad_norm": 2.0877020359039307, "learning_rate": 1.263956743441117e-05, "loss": 0.9762, "step": 5119 }, { "epoch": 0.7722473604826546, "grad_norm": 2.1288390159606934, "learning_rate": 1.2623585008673183e-05, "loss": 1.2133, "step": 5120 }, { "epoch": 0.7723981900452489, "grad_norm": 1.6994426250457764, "learning_rate": 1.2607611234028921e-05, "loss": 0.932, "step": 5121 }, { "epoch": 0.7725490196078432, "grad_norm": 1.6609673500061035, "learning_rate": 1.259164611417566e-05, "loss": 0.7478, "step": 5122 }, { "epoch": 0.7726998491704374, "grad_norm": 1.5616357326507568, "learning_rate": 1.2575689652808659e-05, "loss": 0.6924, "step": 5123 }, { "epoch": 0.7728506787330317, "grad_norm": 2.1105096340179443, "learning_rate": 1.2559741853621193e-05, "loss": 1.1541, "step": 5124 }, { "epoch": 0.773001508295626, "grad_norm": 2.039879083633423, "learning_rate": 1.2543802720304503e-05, "loss": 1.1361, "step": 5125 }, { "epoch": 0.7731523378582202, "grad_norm": 1.635565161705017, "learning_rate": 1.2527872256547846e-05, "loss": 0.7156, "step": 5126 }, { "epoch": 0.7733031674208145, "grad_norm": 1.9462071657180786, "learning_rate": 1.251195046603847e-05, "loss": 1.0849, "step": 5127 }, { "epoch": 0.7734539969834088, "grad_norm": 1.927531123161316, "learning_rate": 1.2496037352461598e-05, "loss": 0.9511, "step": 5128 }, { "epoch": 0.773604826546003, "grad_norm": 1.8496819734573364, "learning_rate": 1.2480132919500487e-05, "loss": 0.8103, "step": 5129 }, { "epoch": 0.7737556561085973, "grad_norm": 2.474874258041382, "learning_rate": 1.2464237170836313e-05, "loss": 1.3946, "step": 5130 }, { "epoch": 0.7739064856711916, "grad_norm": 1.9031410217285156, "learning_rate": 1.2448350110148311e-05, "loss": 1.0159, "step": 5131 }, { "epoch": 0.7740573152337858, "grad_norm": 2.096234083175659, "learning_rate": 1.2432471741113667e-05, "loss": 1.3049, "step": 5132 }, { "epoch": 0.7742081447963801, "grad_norm": 2.3778343200683594, "learning_rate": 1.2416602067407568e-05, "loss": 1.3591, "step": 5133 }, { "epoch": 0.7743589743589744, "grad_norm": 1.7604421377182007, "learning_rate": 1.2400741092703194e-05, "loss": 0.6992, "step": 5134 }, { "epoch": 0.7745098039215687, "grad_norm": 1.728628396987915, "learning_rate": 1.2384888820671703e-05, "loss": 0.8236, "step": 5135 }, { "epoch": 0.7746606334841629, "grad_norm": 1.9464471340179443, "learning_rate": 1.2369045254982236e-05, "loss": 1.2271, "step": 5136 }, { "epoch": 0.7748114630467572, "grad_norm": 1.9091099500656128, "learning_rate": 1.2353210399301924e-05, "loss": 1.1634, "step": 5137 }, { "epoch": 0.7749622926093515, "grad_norm": 2.2218000888824463, "learning_rate": 1.2337384257295881e-05, "loss": 1.1612, "step": 5138 }, { "epoch": 0.7751131221719457, "grad_norm": 1.8105884790420532, "learning_rate": 1.2321566832627202e-05, "loss": 0.9895, "step": 5139 }, { "epoch": 0.77526395173454, "grad_norm": 1.9485459327697754, "learning_rate": 1.2305758128956973e-05, "loss": 0.9221, "step": 5140 }, { "epoch": 0.7754147812971343, "grad_norm": 2.0581893920898438, "learning_rate": 1.2289958149944253e-05, "loss": 1.046, "step": 5141 }, { "epoch": 0.7755656108597285, "grad_norm": 2.127960443496704, "learning_rate": 1.2274166899246076e-05, "loss": 0.9936, "step": 5142 }, { "epoch": 0.7757164404223228, "grad_norm": 2.5606555938720703, "learning_rate": 1.2258384380517474e-05, "loss": 1.4668, "step": 5143 }, { "epoch": 0.7758672699849171, "grad_norm": 2.1445491313934326, "learning_rate": 1.2242610597411436e-05, "loss": 0.9916, "step": 5144 }, { "epoch": 0.7760180995475113, "grad_norm": 1.9441783428192139, "learning_rate": 1.2226845553578948e-05, "loss": 0.8381, "step": 5145 }, { "epoch": 0.7761689291101056, "grad_norm": 1.8978365659713745, "learning_rate": 1.2211089252668967e-05, "loss": 0.7237, "step": 5146 }, { "epoch": 0.7763197586726999, "grad_norm": 1.7877137660980225, "learning_rate": 1.2195341698328416e-05, "loss": 0.8591, "step": 5147 }, { "epoch": 0.7764705882352941, "grad_norm": 1.895365595817566, "learning_rate": 1.2179602894202225e-05, "loss": 0.8761, "step": 5148 }, { "epoch": 0.7766214177978884, "grad_norm": 1.6744133234024048, "learning_rate": 1.2163872843933244e-05, "loss": 0.7047, "step": 5149 }, { "epoch": 0.7767722473604827, "grad_norm": 1.5015414953231812, "learning_rate": 1.2148151551162346e-05, "loss": 0.5832, "step": 5150 }, { "epoch": 0.7769230769230769, "grad_norm": 2.131598711013794, "learning_rate": 1.2132439019528352e-05, "loss": 1.601, "step": 5151 }, { "epoch": 0.7770739064856712, "grad_norm": 1.7952930927276611, "learning_rate": 1.2116735252668071e-05, "loss": 1.1255, "step": 5152 }, { "epoch": 0.7772247360482655, "grad_norm": 1.5246185064315796, "learning_rate": 1.2101040254216272e-05, "loss": 0.8411, "step": 5153 }, { "epoch": 0.7773755656108597, "grad_norm": 2.055288791656494, "learning_rate": 1.2085354027805702e-05, "loss": 1.3082, "step": 5154 }, { "epoch": 0.777526395173454, "grad_norm": 1.5221548080444336, "learning_rate": 1.2069676577067062e-05, "loss": 0.7953, "step": 5155 }, { "epoch": 0.7776772247360483, "grad_norm": 1.886616826057434, "learning_rate": 1.205400790562905e-05, "loss": 0.9456, "step": 5156 }, { "epoch": 0.7778280542986425, "grad_norm": 1.8719031810760498, "learning_rate": 1.2038348017118317e-05, "loss": 1.2375, "step": 5157 }, { "epoch": 0.7779788838612368, "grad_norm": 1.9204763174057007, "learning_rate": 1.2022696915159453e-05, "loss": 1.2098, "step": 5158 }, { "epoch": 0.7781297134238311, "grad_norm": 2.139126777648926, "learning_rate": 1.2007054603375056e-05, "loss": 1.4303, "step": 5159 }, { "epoch": 0.7782805429864253, "grad_norm": 1.931198000907898, "learning_rate": 1.1991421085385674e-05, "loss": 0.8818, "step": 5160 }, { "epoch": 0.7784313725490196, "grad_norm": 1.8533599376678467, "learning_rate": 1.1975796364809822e-05, "loss": 0.9961, "step": 5161 }, { "epoch": 0.7785822021116139, "grad_norm": 2.267897605895996, "learning_rate": 1.1960180445263975e-05, "loss": 1.4988, "step": 5162 }, { "epoch": 0.7787330316742082, "grad_norm": 1.7990275621414185, "learning_rate": 1.1944573330362569e-05, "loss": 0.8868, "step": 5163 }, { "epoch": 0.7788838612368024, "grad_norm": 2.394930601119995, "learning_rate": 1.1928975023718009e-05, "loss": 1.2004, "step": 5164 }, { "epoch": 0.7790346907993967, "grad_norm": 1.8338685035705566, "learning_rate": 1.1913385528940657e-05, "loss": 0.9827, "step": 5165 }, { "epoch": 0.779185520361991, "grad_norm": 1.8290482759475708, "learning_rate": 1.189780484963885e-05, "loss": 0.9479, "step": 5166 }, { "epoch": 0.7793363499245852, "grad_norm": 1.9712389707565308, "learning_rate": 1.188223298941884e-05, "loss": 1.0877, "step": 5167 }, { "epoch": 0.7794871794871795, "grad_norm": 1.8171411752700806, "learning_rate": 1.1866669951884885e-05, "loss": 1.0444, "step": 5168 }, { "epoch": 0.7796380090497738, "grad_norm": 1.8102953433990479, "learning_rate": 1.1851115740639185e-05, "loss": 0.9897, "step": 5169 }, { "epoch": 0.779788838612368, "grad_norm": 1.7867064476013184, "learning_rate": 1.1835570359281895e-05, "loss": 0.9112, "step": 5170 }, { "epoch": 0.7799396681749623, "grad_norm": 2.4399123191833496, "learning_rate": 1.182003381141113e-05, "loss": 1.2523, "step": 5171 }, { "epoch": 0.7800904977375566, "grad_norm": 1.858912706375122, "learning_rate": 1.1804506100622954e-05, "loss": 1.0155, "step": 5172 }, { "epoch": 0.7802413273001508, "grad_norm": 2.028560161590576, "learning_rate": 1.1788987230511395e-05, "loss": 1.228, "step": 5173 }, { "epoch": 0.7803921568627451, "grad_norm": 2.225137233734131, "learning_rate": 1.177347720466842e-05, "loss": 1.2489, "step": 5174 }, { "epoch": 0.7805429864253394, "grad_norm": 2.0499343872070312, "learning_rate": 1.175797602668397e-05, "loss": 1.0516, "step": 5175 }, { "epoch": 0.7806938159879336, "grad_norm": 2.0633394718170166, "learning_rate": 1.1742483700145935e-05, "loss": 0.9832, "step": 5176 }, { "epoch": 0.7808446455505279, "grad_norm": 1.9452874660491943, "learning_rate": 1.1727000228640116e-05, "loss": 0.9617, "step": 5177 }, { "epoch": 0.7809954751131222, "grad_norm": 2.2042813301086426, "learning_rate": 1.1711525615750313e-05, "loss": 1.1821, "step": 5178 }, { "epoch": 0.7811463046757164, "grad_norm": 1.887713074684143, "learning_rate": 1.1696059865058262e-05, "loss": 1.0511, "step": 5179 }, { "epoch": 0.7812971342383107, "grad_norm": 1.6074469089508057, "learning_rate": 1.168060298014364e-05, "loss": 0.8041, "step": 5180 }, { "epoch": 0.781447963800905, "grad_norm": 1.7020577192306519, "learning_rate": 1.166515496458408e-05, "loss": 0.9084, "step": 5181 }, { "epoch": 0.7815987933634992, "grad_norm": 2.208688497543335, "learning_rate": 1.1649715821955153e-05, "loss": 1.2502, "step": 5182 }, { "epoch": 0.7817496229260935, "grad_norm": 2.008653402328491, "learning_rate": 1.1634285555830388e-05, "loss": 1.0919, "step": 5183 }, { "epoch": 0.7819004524886878, "grad_norm": 1.860418677330017, "learning_rate": 1.1618864169781252e-05, "loss": 0.7747, "step": 5184 }, { "epoch": 0.782051282051282, "grad_norm": 1.8590457439422607, "learning_rate": 1.1603451667377152e-05, "loss": 0.9623, "step": 5185 }, { "epoch": 0.7822021116138763, "grad_norm": 1.9407075643539429, "learning_rate": 1.1588048052185452e-05, "loss": 0.9089, "step": 5186 }, { "epoch": 0.7823529411764706, "grad_norm": 2.315188407897949, "learning_rate": 1.1572653327771449e-05, "loss": 1.279, "step": 5187 }, { "epoch": 0.7825037707390649, "grad_norm": 2.1982195377349854, "learning_rate": 1.1557267497698388e-05, "loss": 1.1139, "step": 5188 }, { "epoch": 0.7826546003016591, "grad_norm": 1.9448050260543823, "learning_rate": 1.154189056552744e-05, "loss": 1.1092, "step": 5189 }, { "epoch": 0.7828054298642534, "grad_norm": 2.35791015625, "learning_rate": 1.152652253481774e-05, "loss": 1.3444, "step": 5190 }, { "epoch": 0.7829562594268477, "grad_norm": 2.0024311542510986, "learning_rate": 1.1511163409126352e-05, "loss": 1.0862, "step": 5191 }, { "epoch": 0.7831070889894419, "grad_norm": 1.8200010061264038, "learning_rate": 1.1495813192008276e-05, "loss": 0.89, "step": 5192 }, { "epoch": 0.7832579185520362, "grad_norm": 2.3305983543395996, "learning_rate": 1.1480471887016447e-05, "loss": 1.4726, "step": 5193 }, { "epoch": 0.7834087481146305, "grad_norm": 2.3014442920684814, "learning_rate": 1.146513949770176e-05, "loss": 1.1157, "step": 5194 }, { "epoch": 0.7835595776772247, "grad_norm": 2.1501917839050293, "learning_rate": 1.1449816027613004e-05, "loss": 1.0818, "step": 5195 }, { "epoch": 0.783710407239819, "grad_norm": 1.6372913122177124, "learning_rate": 1.1434501480296932e-05, "loss": 0.8234, "step": 5196 }, { "epoch": 0.7838612368024133, "grad_norm": 1.7303146123886108, "learning_rate": 1.1419195859298242e-05, "loss": 0.7756, "step": 5197 }, { "epoch": 0.7840120663650075, "grad_norm": 1.7059133052825928, "learning_rate": 1.1403899168159549e-05, "loss": 0.7951, "step": 5198 }, { "epoch": 0.7841628959276018, "grad_norm": 1.7122666835784912, "learning_rate": 1.1388611410421397e-05, "loss": 0.6876, "step": 5199 }, { "epoch": 0.7843137254901961, "grad_norm": 1.9368599653244019, "learning_rate": 1.1373332589622271e-05, "loss": 0.9755, "step": 5200 }, { "epoch": 0.7844645550527903, "grad_norm": 1.9459848403930664, "learning_rate": 1.135806270929859e-05, "loss": 1.3098, "step": 5201 }, { "epoch": 0.7846153846153846, "grad_norm": 1.9017606973648071, "learning_rate": 1.1342801772984695e-05, "loss": 1.1247, "step": 5202 }, { "epoch": 0.7847662141779789, "grad_norm": 1.7037550210952759, "learning_rate": 1.1327549784212866e-05, "loss": 0.9512, "step": 5203 }, { "epoch": 0.7849170437405731, "grad_norm": 1.9683678150177002, "learning_rate": 1.1312306746513318e-05, "loss": 1.3863, "step": 5204 }, { "epoch": 0.7850678733031674, "grad_norm": 1.7913116216659546, "learning_rate": 1.129707266341416e-05, "loss": 0.8669, "step": 5205 }, { "epoch": 0.7852187028657617, "grad_norm": 1.890148639678955, "learning_rate": 1.128184753844146e-05, "loss": 1.021, "step": 5206 }, { "epoch": 0.7853695324283559, "grad_norm": 2.035776376724243, "learning_rate": 1.1266631375119208e-05, "loss": 1.3015, "step": 5207 }, { "epoch": 0.7855203619909502, "grad_norm": 1.7087281942367554, "learning_rate": 1.1251424176969316e-05, "loss": 0.899, "step": 5208 }, { "epoch": 0.7856711915535445, "grad_norm": 1.9386011362075806, "learning_rate": 1.1236225947511625e-05, "loss": 1.1604, "step": 5209 }, { "epoch": 0.7858220211161387, "grad_norm": 1.9529072046279907, "learning_rate": 1.1221036690263886e-05, "loss": 1.2031, "step": 5210 }, { "epoch": 0.785972850678733, "grad_norm": 2.0272843837738037, "learning_rate": 1.1205856408741793e-05, "loss": 1.0758, "step": 5211 }, { "epoch": 0.7861236802413273, "grad_norm": 1.713652491569519, "learning_rate": 1.1190685106458953e-05, "loss": 1.0183, "step": 5212 }, { "epoch": 0.7862745098039216, "grad_norm": 2.453809976577759, "learning_rate": 1.1175522786926902e-05, "loss": 1.5148, "step": 5213 }, { "epoch": 0.7864253393665158, "grad_norm": 2.2180378437042236, "learning_rate": 1.1160369453655067e-05, "loss": 1.3987, "step": 5214 }, { "epoch": 0.7865761689291101, "grad_norm": 1.7686799764633179, "learning_rate": 1.114522511015083e-05, "loss": 1.0964, "step": 5215 }, { "epoch": 0.7867269984917044, "grad_norm": 2.3119823932647705, "learning_rate": 1.1130089759919482e-05, "loss": 1.3655, "step": 5216 }, { "epoch": 0.7868778280542986, "grad_norm": 1.6139402389526367, "learning_rate": 1.1114963406464223e-05, "loss": 0.7683, "step": 5217 }, { "epoch": 0.7870286576168929, "grad_norm": 1.7889829874038696, "learning_rate": 1.109984605328619e-05, "loss": 0.7543, "step": 5218 }, { "epoch": 0.7871794871794872, "grad_norm": 2.0540103912353516, "learning_rate": 1.1084737703884412e-05, "loss": 1.3843, "step": 5219 }, { "epoch": 0.7873303167420814, "grad_norm": 1.7661221027374268, "learning_rate": 1.1069638361755858e-05, "loss": 1.0405, "step": 5220 }, { "epoch": 0.7874811463046757, "grad_norm": 1.8561850786209106, "learning_rate": 1.1054548030395395e-05, "loss": 0.9759, "step": 5221 }, { "epoch": 0.78763197586727, "grad_norm": 1.9142900705337524, "learning_rate": 1.1039466713295815e-05, "loss": 1.124, "step": 5222 }, { "epoch": 0.7877828054298642, "grad_norm": 1.7952560186386108, "learning_rate": 1.1024394413947825e-05, "loss": 0.983, "step": 5223 }, { "epoch": 0.7879336349924585, "grad_norm": 1.7534905672073364, "learning_rate": 1.1009331135840017e-05, "loss": 0.9268, "step": 5224 }, { "epoch": 0.7880844645550528, "grad_norm": 1.7025178670883179, "learning_rate": 1.0994276882458931e-05, "loss": 0.9266, "step": 5225 }, { "epoch": 0.788235294117647, "grad_norm": 2.0440895557403564, "learning_rate": 1.0979231657289002e-05, "loss": 1.206, "step": 5226 }, { "epoch": 0.7883861236802413, "grad_norm": 1.8195592164993286, "learning_rate": 1.0964195463812576e-05, "loss": 1.0451, "step": 5227 }, { "epoch": 0.7885369532428356, "grad_norm": 2.0598394870758057, "learning_rate": 1.0949168305509911e-05, "loss": 1.1026, "step": 5228 }, { "epoch": 0.7886877828054298, "grad_norm": 2.017521381378174, "learning_rate": 1.093415018585917e-05, "loss": 0.8944, "step": 5229 }, { "epoch": 0.7888386123680241, "grad_norm": 1.9497545957565308, "learning_rate": 1.0919141108336433e-05, "loss": 1.0955, "step": 5230 }, { "epoch": 0.7889894419306184, "grad_norm": 1.8865833282470703, "learning_rate": 1.0904141076415675e-05, "loss": 0.8834, "step": 5231 }, { "epoch": 0.7891402714932126, "grad_norm": 1.8033993244171143, "learning_rate": 1.0889150093568784e-05, "loss": 0.8976, "step": 5232 }, { "epoch": 0.7892911010558069, "grad_norm": 1.8703923225402832, "learning_rate": 1.0874168163265547e-05, "loss": 0.9642, "step": 5233 }, { "epoch": 0.7894419306184012, "grad_norm": 2.174034833908081, "learning_rate": 1.0859195288973673e-05, "loss": 1.279, "step": 5234 }, { "epoch": 0.7895927601809954, "grad_norm": 2.1243996620178223, "learning_rate": 1.084423147415875e-05, "loss": 1.2578, "step": 5235 }, { "epoch": 0.7897435897435897, "grad_norm": 1.741836667060852, "learning_rate": 1.0829276722284292e-05, "loss": 0.8415, "step": 5236 }, { "epoch": 0.789894419306184, "grad_norm": 2.0664327144622803, "learning_rate": 1.0814331036811704e-05, "loss": 1.006, "step": 5237 }, { "epoch": 0.7900452488687782, "grad_norm": 2.1299610137939453, "learning_rate": 1.0799394421200288e-05, "loss": 1.0648, "step": 5238 }, { "epoch": 0.7901960784313725, "grad_norm": 2.287323236465454, "learning_rate": 1.0784466878907256e-05, "loss": 1.3355, "step": 5239 }, { "epoch": 0.7903469079939668, "grad_norm": 2.159843683242798, "learning_rate": 1.076954841338772e-05, "loss": 1.3782, "step": 5240 }, { "epoch": 0.790497737556561, "grad_norm": 2.010453701019287, "learning_rate": 1.07546390280947e-05, "loss": 1.24, "step": 5241 }, { "epoch": 0.7906485671191553, "grad_norm": 1.975743293762207, "learning_rate": 1.0739738726479065e-05, "loss": 1.0166, "step": 5242 }, { "epoch": 0.7907993966817496, "grad_norm": 2.2808003425598145, "learning_rate": 1.0724847511989644e-05, "loss": 1.4455, "step": 5243 }, { "epoch": 0.7909502262443439, "grad_norm": 2.152498722076416, "learning_rate": 1.0709965388073129e-05, "loss": 1.053, "step": 5244 }, { "epoch": 0.7911010558069381, "grad_norm": 2.2432637214660645, "learning_rate": 1.069509235817412e-05, "loss": 1.1697, "step": 5245 }, { "epoch": 0.7912518853695324, "grad_norm": 1.7683600187301636, "learning_rate": 1.0680228425735112e-05, "loss": 0.7923, "step": 5246 }, { "epoch": 0.7914027149321267, "grad_norm": 1.702346920967102, "learning_rate": 1.0665373594196481e-05, "loss": 0.8033, "step": 5247 }, { "epoch": 0.7915535444947209, "grad_norm": 1.7892452478408813, "learning_rate": 1.0650527866996512e-05, "loss": 0.7103, "step": 5248 }, { "epoch": 0.7917043740573152, "grad_norm": 2.0563089847564697, "learning_rate": 1.0635691247571372e-05, "loss": 0.8468, "step": 5249 }, { "epoch": 0.7918552036199095, "grad_norm": 1.7875243425369263, "learning_rate": 1.0620863739355136e-05, "loss": 0.8371, "step": 5250 }, { "epoch": 0.7920060331825037, "grad_norm": 1.8244872093200684, "learning_rate": 1.0606045345779758e-05, "loss": 1.2305, "step": 5251 }, { "epoch": 0.792156862745098, "grad_norm": 1.7883158922195435, "learning_rate": 1.0591236070275062e-05, "loss": 1.113, "step": 5252 }, { "epoch": 0.7923076923076923, "grad_norm": 1.6514043807983398, "learning_rate": 1.05764359162688e-05, "loss": 0.8889, "step": 5253 }, { "epoch": 0.7924585218702865, "grad_norm": 2.0555672645568848, "learning_rate": 1.056164488718659e-05, "loss": 1.3725, "step": 5254 }, { "epoch": 0.7926093514328808, "grad_norm": 1.8430290222167969, "learning_rate": 1.0546862986451945e-05, "loss": 1.023, "step": 5255 }, { "epoch": 0.7927601809954751, "grad_norm": 1.9301599264144897, "learning_rate": 1.0532090217486268e-05, "loss": 1.1534, "step": 5256 }, { "epoch": 0.7929110105580693, "grad_norm": 2.006223201751709, "learning_rate": 1.0517326583708832e-05, "loss": 1.2134, "step": 5257 }, { "epoch": 0.7930618401206636, "grad_norm": 2.0449318885803223, "learning_rate": 1.0502572088536817e-05, "loss": 1.3449, "step": 5258 }, { "epoch": 0.7932126696832579, "grad_norm": 2.0405032634735107, "learning_rate": 1.0487826735385281e-05, "loss": 1.2901, "step": 5259 }, { "epoch": 0.7933634992458521, "grad_norm": 1.9872326850891113, "learning_rate": 1.0473090527667168e-05, "loss": 1.3638, "step": 5260 }, { "epoch": 0.7935143288084464, "grad_norm": 1.983089566230774, "learning_rate": 1.0458363468793275e-05, "loss": 1.2096, "step": 5261 }, { "epoch": 0.7936651583710407, "grad_norm": 1.7139614820480347, "learning_rate": 1.0443645562172322e-05, "loss": 0.9659, "step": 5262 }, { "epoch": 0.793815987933635, "grad_norm": 1.8966206312179565, "learning_rate": 1.0428936811210899e-05, "loss": 1.07, "step": 5263 }, { "epoch": 0.7939668174962292, "grad_norm": 1.8870989084243774, "learning_rate": 1.0414237219313466e-05, "loss": 1.0312, "step": 5264 }, { "epoch": 0.7941176470588235, "grad_norm": 1.8161216974258423, "learning_rate": 1.039954678988238e-05, "loss": 1.0593, "step": 5265 }, { "epoch": 0.7942684766214178, "grad_norm": 1.9425678253173828, "learning_rate": 1.0384865526317861e-05, "loss": 0.9933, "step": 5266 }, { "epoch": 0.794419306184012, "grad_norm": 2.1194653511047363, "learning_rate": 1.0370193432018017e-05, "loss": 1.2404, "step": 5267 }, { "epoch": 0.7945701357466063, "grad_norm": 1.848604679107666, "learning_rate": 1.0355530510378825e-05, "loss": 1.0511, "step": 5268 }, { "epoch": 0.7947209653092006, "grad_norm": 2.1082918643951416, "learning_rate": 1.034087676479416e-05, "loss": 1.2914, "step": 5269 }, { "epoch": 0.7948717948717948, "grad_norm": 2.0592355728149414, "learning_rate": 1.0326232198655739e-05, "loss": 1.1916, "step": 5270 }, { "epoch": 0.7950226244343891, "grad_norm": 2.036313772201538, "learning_rate": 1.031159681535318e-05, "loss": 1.1828, "step": 5271 }, { "epoch": 0.7951734539969834, "grad_norm": 1.8324285745620728, "learning_rate": 1.0296970618273966e-05, "loss": 0.9854, "step": 5272 }, { "epoch": 0.7953242835595776, "grad_norm": 1.7325353622436523, "learning_rate": 1.0282353610803464e-05, "loss": 0.8598, "step": 5273 }, { "epoch": 0.7954751131221719, "grad_norm": 1.8350906372070312, "learning_rate": 1.0267745796324901e-05, "loss": 1.0078, "step": 5274 }, { "epoch": 0.7956259426847662, "grad_norm": 1.9948002099990845, "learning_rate": 1.0253147178219385e-05, "loss": 1.0134, "step": 5275 }, { "epoch": 0.7957767722473604, "grad_norm": 1.6394400596618652, "learning_rate": 1.023855775986589e-05, "loss": 0.795, "step": 5276 }, { "epoch": 0.7959276018099547, "grad_norm": 1.9293296337127686, "learning_rate": 1.0223977544641261e-05, "loss": 1.0766, "step": 5277 }, { "epoch": 0.796078431372549, "grad_norm": 1.7817665338516235, "learning_rate": 1.0209406535920218e-05, "loss": 0.8911, "step": 5278 }, { "epoch": 0.7962292609351432, "grad_norm": 1.9960997104644775, "learning_rate": 1.0194844737075348e-05, "loss": 1.0396, "step": 5279 }, { "epoch": 0.7963800904977375, "grad_norm": 1.834675669670105, "learning_rate": 1.01802921514771e-05, "loss": 1.075, "step": 5280 }, { "epoch": 0.7965309200603318, "grad_norm": 1.6561415195465088, "learning_rate": 1.01657487824938e-05, "loss": 0.7104, "step": 5281 }, { "epoch": 0.796681749622926, "grad_norm": 2.176407814025879, "learning_rate": 1.0151214633491628e-05, "loss": 1.2965, "step": 5282 }, { "epoch": 0.7968325791855203, "grad_norm": 1.7624300718307495, "learning_rate": 1.0136689707834652e-05, "loss": 0.8689, "step": 5283 }, { "epoch": 0.7969834087481147, "grad_norm": 2.0132689476013184, "learning_rate": 1.0122174008884783e-05, "loss": 0.9744, "step": 5284 }, { "epoch": 0.797134238310709, "grad_norm": 1.9451532363891602, "learning_rate": 1.0107667540001803e-05, "loss": 1.0744, "step": 5285 }, { "epoch": 0.7972850678733032, "grad_norm": 2.041696548461914, "learning_rate": 1.0093170304543359e-05, "loss": 0.9769, "step": 5286 }, { "epoch": 0.7974358974358975, "grad_norm": 1.9827631711959839, "learning_rate": 1.0078682305864973e-05, "loss": 1.1179, "step": 5287 }, { "epoch": 0.7975867269984918, "grad_norm": 2.072096347808838, "learning_rate": 1.0064203547320011e-05, "loss": 1.1623, "step": 5288 }, { "epoch": 0.797737556561086, "grad_norm": 2.049410104751587, "learning_rate": 1.00497340322597e-05, "loss": 0.9704, "step": 5289 }, { "epoch": 0.7978883861236803, "grad_norm": 2.4092702865600586, "learning_rate": 1.0035273764033132e-05, "loss": 1.0559, "step": 5290 }, { "epoch": 0.7980392156862746, "grad_norm": 1.9707903861999512, "learning_rate": 1.0020822745987269e-05, "loss": 1.1566, "step": 5291 }, { "epoch": 0.7981900452488688, "grad_norm": 2.558241128921509, "learning_rate": 1.0006380981466923e-05, "loss": 1.6425, "step": 5292 }, { "epoch": 0.7983408748114631, "grad_norm": 2.029670238494873, "learning_rate": 9.991948473814767e-06, "loss": 1.1245, "step": 5293 }, { "epoch": 0.7984917043740574, "grad_norm": 2.0207602977752686, "learning_rate": 9.977525226371321e-06, "loss": 1.0203, "step": 5294 }, { "epoch": 0.7986425339366516, "grad_norm": 1.9039123058319092, "learning_rate": 9.963111242474977e-06, "loss": 0.8431, "step": 5295 }, { "epoch": 0.7987933634992459, "grad_norm": 1.4931424856185913, "learning_rate": 9.948706525461971e-06, "loss": 0.5922, "step": 5296 }, { "epoch": 0.7989441930618402, "grad_norm": 1.8009635210037231, "learning_rate": 9.934311078666419e-06, "loss": 0.8016, "step": 5297 }, { "epoch": 0.7990950226244344, "grad_norm": 1.5560402870178223, "learning_rate": 9.919924905420242e-06, "loss": 0.6677, "step": 5298 }, { "epoch": 0.7992458521870287, "grad_norm": 1.6616990566253662, "learning_rate": 9.905548009053256e-06, "loss": 0.6443, "step": 5299 }, { "epoch": 0.799396681749623, "grad_norm": 1.8447338342666626, "learning_rate": 9.891180392893119e-06, "loss": 0.8087, "step": 5300 }, { "epoch": 0.7995475113122172, "grad_norm": 2.0707383155822754, "learning_rate": 9.876822060265334e-06, "loss": 1.3811, "step": 5301 }, { "epoch": 0.7996983408748115, "grad_norm": 1.6356968879699707, "learning_rate": 9.862473014493268e-06, "loss": 1.0428, "step": 5302 }, { "epoch": 0.7998491704374058, "grad_norm": 1.8068865537643433, "learning_rate": 9.848133258898134e-06, "loss": 1.2524, "step": 5303 }, { "epoch": 0.8, "grad_norm": 1.8336164951324463, "learning_rate": 9.833802796798985e-06, "loss": 1.0252, "step": 5304 }, { "epoch": 0.8001508295625943, "grad_norm": 1.7061680555343628, "learning_rate": 9.819481631512728e-06, "loss": 1.0712, "step": 5305 }, { "epoch": 0.8003016591251886, "grad_norm": 1.730584740638733, "learning_rate": 9.805169766354134e-06, "loss": 0.8894, "step": 5306 }, { "epoch": 0.8004524886877828, "grad_norm": 1.779383659362793, "learning_rate": 9.790867204635806e-06, "loss": 0.9589, "step": 5307 }, { "epoch": 0.8006033182503771, "grad_norm": 1.6586554050445557, "learning_rate": 9.776573949668172e-06, "loss": 0.8761, "step": 5308 }, { "epoch": 0.8007541478129714, "grad_norm": 1.8410760164260864, "learning_rate": 9.76229000475955e-06, "loss": 0.983, "step": 5309 }, { "epoch": 0.8009049773755657, "grad_norm": 1.9232213497161865, "learning_rate": 9.74801537321608e-06, "loss": 1.0504, "step": 5310 }, { "epoch": 0.8010558069381599, "grad_norm": 2.0194826126098633, "learning_rate": 9.73375005834174e-06, "loss": 1.2772, "step": 5311 }, { "epoch": 0.8012066365007542, "grad_norm": 1.9024852514266968, "learning_rate": 9.719494063438368e-06, "loss": 0.9792, "step": 5312 }, { "epoch": 0.8013574660633485, "grad_norm": 1.6959342956542969, "learning_rate": 9.705247391805634e-06, "loss": 0.8261, "step": 5313 }, { "epoch": 0.8015082956259427, "grad_norm": 1.984748363494873, "learning_rate": 9.691010046741055e-06, "loss": 1.0933, "step": 5314 }, { "epoch": 0.801659125188537, "grad_norm": 2.0949642658233643, "learning_rate": 9.676782031539988e-06, "loss": 1.2588, "step": 5315 }, { "epoch": 0.8018099547511313, "grad_norm": 1.919958233833313, "learning_rate": 9.662563349495645e-06, "loss": 0.914, "step": 5316 }, { "epoch": 0.8019607843137255, "grad_norm": 1.8903716802597046, "learning_rate": 9.64835400389903e-06, "loss": 1.1442, "step": 5317 }, { "epoch": 0.8021116138763198, "grad_norm": 1.7853647470474243, "learning_rate": 9.63415399803903e-06, "loss": 1.0063, "step": 5318 }, { "epoch": 0.8022624434389141, "grad_norm": 1.6748014688491821, "learning_rate": 9.61996333520237e-06, "loss": 0.9114, "step": 5319 }, { "epoch": 0.8024132730015083, "grad_norm": 1.5291383266448975, "learning_rate": 9.605782018673593e-06, "loss": 0.7843, "step": 5320 }, { "epoch": 0.8025641025641026, "grad_norm": 1.6431665420532227, "learning_rate": 9.591610051735089e-06, "loss": 0.7463, "step": 5321 }, { "epoch": 0.8027149321266969, "grad_norm": 2.052990198135376, "learning_rate": 9.577447437667082e-06, "loss": 1.1098, "step": 5322 }, { "epoch": 0.8028657616892911, "grad_norm": 1.722938895225525, "learning_rate": 9.563294179747634e-06, "loss": 0.9437, "step": 5323 }, { "epoch": 0.8030165912518854, "grad_norm": 1.7766436338424683, "learning_rate": 9.549150281252633e-06, "loss": 0.7506, "step": 5324 }, { "epoch": 0.8031674208144797, "grad_norm": 2.102569580078125, "learning_rate": 9.535015745455811e-06, "loss": 1.3887, "step": 5325 }, { "epoch": 0.8033182503770739, "grad_norm": 1.9229612350463867, "learning_rate": 9.520890575628722e-06, "loss": 1.0362, "step": 5326 }, { "epoch": 0.8034690799396682, "grad_norm": 1.8364300727844238, "learning_rate": 9.506774775040767e-06, "loss": 0.9619, "step": 5327 }, { "epoch": 0.8036199095022625, "grad_norm": 1.7381640672683716, "learning_rate": 9.492668346959166e-06, "loss": 0.7775, "step": 5328 }, { "epoch": 0.8037707390648567, "grad_norm": 1.9851635694503784, "learning_rate": 9.478571294648974e-06, "loss": 1.212, "step": 5329 }, { "epoch": 0.803921568627451, "grad_norm": 1.9643309116363525, "learning_rate": 9.464483621373078e-06, "loss": 1.113, "step": 5330 }, { "epoch": 0.8040723981900453, "grad_norm": 1.7613937854766846, "learning_rate": 9.450405330392181e-06, "loss": 0.9043, "step": 5331 }, { "epoch": 0.8042232277526395, "grad_norm": 2.5394070148468018, "learning_rate": 9.436336424964837e-06, "loss": 1.0983, "step": 5332 }, { "epoch": 0.8043740573152338, "grad_norm": 1.8354568481445312, "learning_rate": 9.422276908347405e-06, "loss": 0.9301, "step": 5333 }, { "epoch": 0.8045248868778281, "grad_norm": 1.9672948122024536, "learning_rate": 9.408226783794094e-06, "loss": 0.9431, "step": 5334 }, { "epoch": 0.8046757164404224, "grad_norm": 1.6694124937057495, "learning_rate": 9.39418605455693e-06, "loss": 0.7044, "step": 5335 }, { "epoch": 0.8048265460030166, "grad_norm": 1.8359830379486084, "learning_rate": 9.380154723885737e-06, "loss": 0.9078, "step": 5336 }, { "epoch": 0.8049773755656109, "grad_norm": 2.005419969558716, "learning_rate": 9.366132795028203e-06, "loss": 0.8537, "step": 5337 }, { "epoch": 0.8051282051282052, "grad_norm": 1.8661540746688843, "learning_rate": 9.352120271229819e-06, "loss": 1.1259, "step": 5338 }, { "epoch": 0.8052790346907994, "grad_norm": 2.269028425216675, "learning_rate": 9.33811715573391e-06, "loss": 1.3518, "step": 5339 }, { "epoch": 0.8054298642533937, "grad_norm": 2.1171562671661377, "learning_rate": 9.324123451781618e-06, "loss": 1.0486, "step": 5340 }, { "epoch": 0.805580693815988, "grad_norm": 2.249465227127075, "learning_rate": 9.310139162611902e-06, "loss": 1.171, "step": 5341 }, { "epoch": 0.8057315233785822, "grad_norm": 2.0587258338928223, "learning_rate": 9.296164291461551e-06, "loss": 1.2256, "step": 5342 }, { "epoch": 0.8058823529411765, "grad_norm": 2.2906482219696045, "learning_rate": 9.28219884156517e-06, "loss": 1.1995, "step": 5343 }, { "epoch": 0.8060331825037708, "grad_norm": 2.4010226726531982, "learning_rate": 9.268242816155187e-06, "loss": 1.1022, "step": 5344 }, { "epoch": 0.806184012066365, "grad_norm": 2.3840599060058594, "learning_rate": 9.254296218461833e-06, "loss": 1.327, "step": 5345 }, { "epoch": 0.8063348416289593, "grad_norm": 1.6013177633285522, "learning_rate": 9.240359051713166e-06, "loss": 0.6849, "step": 5346 }, { "epoch": 0.8064856711915536, "grad_norm": 1.6539803743362427, "learning_rate": 9.22643131913507e-06, "loss": 0.6999, "step": 5347 }, { "epoch": 0.8066365007541478, "grad_norm": 1.7981321811676025, "learning_rate": 9.212513023951247e-06, "loss": 0.7895, "step": 5348 }, { "epoch": 0.8067873303167421, "grad_norm": 1.5624638795852661, "learning_rate": 9.19860416938319e-06, "loss": 0.5464, "step": 5349 }, { "epoch": 0.8069381598793364, "grad_norm": 1.7716702222824097, "learning_rate": 9.184704758650243e-06, "loss": 0.7544, "step": 5350 }, { "epoch": 0.8070889894419306, "grad_norm": 2.0965089797973633, "learning_rate": 9.170814794969524e-06, "loss": 1.4886, "step": 5351 }, { "epoch": 0.8072398190045249, "grad_norm": 1.884462833404541, "learning_rate": 9.156934281556001e-06, "loss": 1.3011, "step": 5352 }, { "epoch": 0.8073906485671192, "grad_norm": 1.6607698202133179, "learning_rate": 9.143063221622428e-06, "loss": 1.0212, "step": 5353 }, { "epoch": 0.8075414781297134, "grad_norm": 1.9619632959365845, "learning_rate": 9.129201618379402e-06, "loss": 1.2957, "step": 5354 }, { "epoch": 0.8076923076923077, "grad_norm": 1.8262299299240112, "learning_rate": 9.115349475035284e-06, "loss": 0.9941, "step": 5355 }, { "epoch": 0.807843137254902, "grad_norm": 1.789289116859436, "learning_rate": 9.101506794796277e-06, "loss": 0.9122, "step": 5356 }, { "epoch": 0.8079939668174962, "grad_norm": 1.8560453653335571, "learning_rate": 9.087673580866402e-06, "loss": 1.2448, "step": 5357 }, { "epoch": 0.8081447963800905, "grad_norm": 1.8972336053848267, "learning_rate": 9.073849836447461e-06, "loss": 1.0677, "step": 5358 }, { "epoch": 0.8082956259426848, "grad_norm": 1.9950562715530396, "learning_rate": 9.060035564739089e-06, "loss": 1.1253, "step": 5359 }, { "epoch": 0.808446455505279, "grad_norm": 1.753962755203247, "learning_rate": 9.046230768938719e-06, "loss": 0.8768, "step": 5360 }, { "epoch": 0.8085972850678733, "grad_norm": 1.9821527004241943, "learning_rate": 9.032435452241583e-06, "loss": 1.2591, "step": 5361 }, { "epoch": 0.8087481146304676, "grad_norm": 1.7108322381973267, "learning_rate": 9.01864961784073e-06, "loss": 0.87, "step": 5362 }, { "epoch": 0.8088989441930619, "grad_norm": 1.9553203582763672, "learning_rate": 9.004873268927022e-06, "loss": 1.2016, "step": 5363 }, { "epoch": 0.8090497737556561, "grad_norm": 1.753669261932373, "learning_rate": 8.991106408689086e-06, "loss": 0.9455, "step": 5364 }, { "epoch": 0.8092006033182504, "grad_norm": 1.6604578495025635, "learning_rate": 8.977349040313403e-06, "loss": 0.8583, "step": 5365 }, { "epoch": 0.8093514328808447, "grad_norm": 2.202800750732422, "learning_rate": 8.963601166984225e-06, "loss": 1.2689, "step": 5366 }, { "epoch": 0.8095022624434389, "grad_norm": 1.8151285648345947, "learning_rate": 8.949862791883623e-06, "loss": 0.9382, "step": 5367 }, { "epoch": 0.8096530920060332, "grad_norm": 1.8921010494232178, "learning_rate": 8.936133918191453e-06, "loss": 1.0094, "step": 5368 }, { "epoch": 0.8098039215686275, "grad_norm": 1.8719514608383179, "learning_rate": 8.922414549085389e-06, "loss": 0.8916, "step": 5369 }, { "epoch": 0.8099547511312217, "grad_norm": 1.9346952438354492, "learning_rate": 8.9087046877409e-06, "loss": 1.0363, "step": 5370 }, { "epoch": 0.810105580693816, "grad_norm": 2.124605894088745, "learning_rate": 8.895004337331237e-06, "loss": 1.3475, "step": 5371 }, { "epoch": 0.8102564102564103, "grad_norm": 1.8317384719848633, "learning_rate": 8.881313501027477e-06, "loss": 1.0506, "step": 5372 }, { "epoch": 0.8104072398190045, "grad_norm": 1.7993335723876953, "learning_rate": 8.867632181998487e-06, "loss": 0.9511, "step": 5373 }, { "epoch": 0.8105580693815988, "grad_norm": 2.169856309890747, "learning_rate": 8.853960383410908e-06, "loss": 1.3184, "step": 5374 }, { "epoch": 0.8107088989441931, "grad_norm": 2.0029046535491943, "learning_rate": 8.840298108429213e-06, "loss": 1.1835, "step": 5375 }, { "epoch": 0.8108597285067873, "grad_norm": 1.900715947151184, "learning_rate": 8.826645360215646e-06, "loss": 1.1995, "step": 5376 }, { "epoch": 0.8110105580693816, "grad_norm": 2.1883766651153564, "learning_rate": 8.813002141930249e-06, "loss": 1.4585, "step": 5377 }, { "epoch": 0.8111613876319759, "grad_norm": 1.9314310550689697, "learning_rate": 8.799368456730872e-06, "loss": 0.7868, "step": 5378 }, { "epoch": 0.8113122171945701, "grad_norm": 1.7820298671722412, "learning_rate": 8.785744307773147e-06, "loss": 0.9269, "step": 5379 }, { "epoch": 0.8114630467571644, "grad_norm": 2.1482815742492676, "learning_rate": 8.772129698210496e-06, "loss": 1.1144, "step": 5380 }, { "epoch": 0.8116138763197587, "grad_norm": 2.0797035694122314, "learning_rate": 8.758524631194155e-06, "loss": 0.9179, "step": 5381 }, { "epoch": 0.8117647058823529, "grad_norm": 1.974092721939087, "learning_rate": 8.744929109873102e-06, "loss": 1.3036, "step": 5382 }, { "epoch": 0.8119155354449472, "grad_norm": 1.9769659042358398, "learning_rate": 8.73134313739416e-06, "loss": 1.1961, "step": 5383 }, { "epoch": 0.8120663650075415, "grad_norm": 1.5367333889007568, "learning_rate": 8.71776671690191e-06, "loss": 0.8173, "step": 5384 }, { "epoch": 0.8122171945701357, "grad_norm": 2.1588568687438965, "learning_rate": 8.704199851538742e-06, "loss": 1.0176, "step": 5385 }, { "epoch": 0.81236802413273, "grad_norm": 1.8441039323806763, "learning_rate": 8.690642544444816e-06, "loss": 1.0677, "step": 5386 }, { "epoch": 0.8125188536953243, "grad_norm": 1.8755221366882324, "learning_rate": 8.67709479875809e-06, "loss": 1.1645, "step": 5387 }, { "epoch": 0.8126696832579186, "grad_norm": 2.167508125305176, "learning_rate": 8.663556617614305e-06, "loss": 1.343, "step": 5388 }, { "epoch": 0.8128205128205128, "grad_norm": 1.990826964378357, "learning_rate": 8.650028004146999e-06, "loss": 1.2193, "step": 5389 }, { "epoch": 0.8129713423831071, "grad_norm": 1.889369010925293, "learning_rate": 8.636508961487471e-06, "loss": 1.1168, "step": 5390 }, { "epoch": 0.8131221719457014, "grad_norm": 2.075881242752075, "learning_rate": 8.622999492764843e-06, "loss": 0.9813, "step": 5391 }, { "epoch": 0.8132730015082956, "grad_norm": 2.1152939796447754, "learning_rate": 8.609499601105974e-06, "loss": 1.2691, "step": 5392 }, { "epoch": 0.8134238310708899, "grad_norm": 2.075906753540039, "learning_rate": 8.596009289635537e-06, "loss": 0.9348, "step": 5393 }, { "epoch": 0.8135746606334842, "grad_norm": 2.200687885284424, "learning_rate": 8.582528561475984e-06, "loss": 1.1687, "step": 5394 }, { "epoch": 0.8137254901960784, "grad_norm": 2.324723958969116, "learning_rate": 8.569057419747544e-06, "loss": 1.2164, "step": 5395 }, { "epoch": 0.8138763197586727, "grad_norm": 1.7494066953659058, "learning_rate": 8.555595867568234e-06, "loss": 0.7405, "step": 5396 }, { "epoch": 0.814027149321267, "grad_norm": 1.4704443216323853, "learning_rate": 8.542143908053834e-06, "loss": 0.6228, "step": 5397 }, { "epoch": 0.8141779788838612, "grad_norm": 1.5529978275299072, "learning_rate": 8.528701544317935e-06, "loss": 0.6842, "step": 5398 }, { "epoch": 0.8143288084464555, "grad_norm": 1.6603937149047852, "learning_rate": 8.515268779471875e-06, "loss": 0.5847, "step": 5399 }, { "epoch": 0.8144796380090498, "grad_norm": 2.1366686820983887, "learning_rate": 8.501845616624799e-06, "loss": 0.9724, "step": 5400 }, { "epoch": 0.814630467571644, "grad_norm": 1.9924811124801636, "learning_rate": 8.488432058883583e-06, "loss": 1.2368, "step": 5401 }, { "epoch": 0.8147812971342383, "grad_norm": 1.723261833190918, "learning_rate": 8.475028109352934e-06, "loss": 0.8764, "step": 5402 }, { "epoch": 0.8149321266968326, "grad_norm": 1.9842509031295776, "learning_rate": 8.461633771135308e-06, "loss": 1.1994, "step": 5403 }, { "epoch": 0.8150829562594268, "grad_norm": 1.7919799089431763, "learning_rate": 8.448249047330936e-06, "loss": 1.0278, "step": 5404 }, { "epoch": 0.8152337858220211, "grad_norm": 2.0688140392303467, "learning_rate": 8.43487394103783e-06, "loss": 1.3682, "step": 5405 }, { "epoch": 0.8153846153846154, "grad_norm": 1.5614007711410522, "learning_rate": 8.42150845535178e-06, "loss": 0.8219, "step": 5406 }, { "epoch": 0.8155354449472096, "grad_norm": 1.8434463739395142, "learning_rate": 8.408152593366331e-06, "loss": 1.096, "step": 5407 }, { "epoch": 0.8156862745098039, "grad_norm": 1.5596860647201538, "learning_rate": 8.39480635817282e-06, "loss": 0.7286, "step": 5408 }, { "epoch": 0.8158371040723982, "grad_norm": 1.9699653387069702, "learning_rate": 8.381469752860349e-06, "loss": 1.0393, "step": 5409 }, { "epoch": 0.8159879336349924, "grad_norm": 2.2835376262664795, "learning_rate": 8.368142780515797e-06, "loss": 1.2889, "step": 5410 }, { "epoch": 0.8161387631975867, "grad_norm": 1.7739356756210327, "learning_rate": 8.35482544422379e-06, "loss": 0.8305, "step": 5411 }, { "epoch": 0.816289592760181, "grad_norm": 1.9487102031707764, "learning_rate": 8.341517747066747e-06, "loss": 1.2991, "step": 5412 }, { "epoch": 0.8164404223227752, "grad_norm": 1.7588680982589722, "learning_rate": 8.32821969212485e-06, "loss": 0.8616, "step": 5413 }, { "epoch": 0.8165912518853695, "grad_norm": 2.244819402694702, "learning_rate": 8.314931282476046e-06, "loss": 1.094, "step": 5414 }, { "epoch": 0.8167420814479638, "grad_norm": 1.792413592338562, "learning_rate": 8.301652521196063e-06, "loss": 0.7346, "step": 5415 }, { "epoch": 0.816892911010558, "grad_norm": 2.0373101234436035, "learning_rate": 8.288383411358375e-06, "loss": 1.0317, "step": 5416 }, { "epoch": 0.8170437405731523, "grad_norm": 1.9479233026504517, "learning_rate": 8.27512395603423e-06, "loss": 1.1577, "step": 5417 }, { "epoch": 0.8171945701357466, "grad_norm": 1.6179245710372925, "learning_rate": 8.261874158292653e-06, "loss": 0.777, "step": 5418 }, { "epoch": 0.8173453996983409, "grad_norm": 2.018097400665283, "learning_rate": 8.248634021200414e-06, "loss": 1.1003, "step": 5419 }, { "epoch": 0.8174962292609351, "grad_norm": 1.6551551818847656, "learning_rate": 8.235403547822063e-06, "loss": 0.8997, "step": 5420 }, { "epoch": 0.8176470588235294, "grad_norm": 1.7220540046691895, "learning_rate": 8.22218274121992e-06, "loss": 0.8715, "step": 5421 }, { "epoch": 0.8177978883861237, "grad_norm": 2.2042396068573, "learning_rate": 8.20897160445403e-06, "loss": 1.3912, "step": 5422 }, { "epoch": 0.8179487179487179, "grad_norm": 1.965556263923645, "learning_rate": 8.195770140582231e-06, "loss": 1.0877, "step": 5423 }, { "epoch": 0.8180995475113122, "grad_norm": 2.1407129764556885, "learning_rate": 8.18257835266012e-06, "loss": 1.1377, "step": 5424 }, { "epoch": 0.8182503770739065, "grad_norm": 1.6800580024719238, "learning_rate": 8.169396243741051e-06, "loss": 0.7773, "step": 5425 }, { "epoch": 0.8184012066365007, "grad_norm": 1.736815333366394, "learning_rate": 8.156223816876141e-06, "loss": 0.9432, "step": 5426 }, { "epoch": 0.818552036199095, "grad_norm": 1.8233391046524048, "learning_rate": 8.14306107511425e-06, "loss": 0.9104, "step": 5427 }, { "epoch": 0.8187028657616893, "grad_norm": 1.9960111379623413, "learning_rate": 8.12990802150202e-06, "loss": 0.9641, "step": 5428 }, { "epoch": 0.8188536953242835, "grad_norm": 2.4332821369171143, "learning_rate": 8.116764659083826e-06, "loss": 1.4925, "step": 5429 }, { "epoch": 0.8190045248868778, "grad_norm": 1.9992955923080444, "learning_rate": 8.103630990901829e-06, "loss": 1.1167, "step": 5430 }, { "epoch": 0.8191553544494721, "grad_norm": 1.9114079475402832, "learning_rate": 8.090507019995913e-06, "loss": 0.9104, "step": 5431 }, { "epoch": 0.8193061840120663, "grad_norm": 1.7345335483551025, "learning_rate": 8.077392749403745e-06, "loss": 0.9058, "step": 5432 }, { "epoch": 0.8194570135746606, "grad_norm": 2.434889078140259, "learning_rate": 8.06428818216074e-06, "loss": 1.2579, "step": 5433 }, { "epoch": 0.8196078431372549, "grad_norm": 2.101712703704834, "learning_rate": 8.051193321300049e-06, "loss": 1.2081, "step": 5434 }, { "epoch": 0.8197586726998491, "grad_norm": 1.9670722484588623, "learning_rate": 8.038108169852598e-06, "loss": 1.062, "step": 5435 }, { "epoch": 0.8199095022624434, "grad_norm": 1.8148819208145142, "learning_rate": 8.025032730847065e-06, "loss": 0.9557, "step": 5436 }, { "epoch": 0.8200603318250377, "grad_norm": 2.219857931137085, "learning_rate": 8.011967007309862e-06, "loss": 1.2074, "step": 5437 }, { "epoch": 0.820211161387632, "grad_norm": 2.0780956745147705, "learning_rate": 7.99891100226518e-06, "loss": 1.014, "step": 5438 }, { "epoch": 0.8203619909502262, "grad_norm": 2.152963399887085, "learning_rate": 7.98586471873492e-06, "loss": 1.0992, "step": 5439 }, { "epoch": 0.8205128205128205, "grad_norm": 1.8189295530319214, "learning_rate": 7.972828159738765e-06, "loss": 0.8285, "step": 5440 }, { "epoch": 0.8206636500754148, "grad_norm": 2.460634231567383, "learning_rate": 7.959801328294147e-06, "loss": 1.2425, "step": 5441 }, { "epoch": 0.820814479638009, "grad_norm": 1.7440303564071655, "learning_rate": 7.946784227416237e-06, "loss": 0.7693, "step": 5442 }, { "epoch": 0.8209653092006033, "grad_norm": 2.064901828765869, "learning_rate": 7.933776860117952e-06, "loss": 1.0374, "step": 5443 }, { "epoch": 0.8211161387631976, "grad_norm": 2.3053548336029053, "learning_rate": 7.920779229409958e-06, "loss": 1.176, "step": 5444 }, { "epoch": 0.8212669683257918, "grad_norm": 2.387575387954712, "learning_rate": 7.90779133830067e-06, "loss": 0.8846, "step": 5445 }, { "epoch": 0.8214177978883861, "grad_norm": 1.8777786493301392, "learning_rate": 7.894813189796251e-06, "loss": 1.0308, "step": 5446 }, { "epoch": 0.8215686274509804, "grad_norm": 1.6900545358657837, "learning_rate": 7.881844786900611e-06, "loss": 0.7349, "step": 5447 }, { "epoch": 0.8217194570135746, "grad_norm": 1.7314860820770264, "learning_rate": 7.868886132615383e-06, "loss": 0.7764, "step": 5448 }, { "epoch": 0.8218702865761689, "grad_norm": 1.7128175497055054, "learning_rate": 7.855937229939963e-06, "loss": 0.7689, "step": 5449 }, { "epoch": 0.8220211161387632, "grad_norm": 1.7675073146820068, "learning_rate": 7.842998081871495e-06, "loss": 0.7815, "step": 5450 }, { "epoch": 0.8221719457013574, "grad_norm": 1.66483473777771, "learning_rate": 7.830068691404847e-06, "loss": 1.074, "step": 5451 }, { "epoch": 0.8223227752639517, "grad_norm": 1.6862764358520508, "learning_rate": 7.817149061532641e-06, "loss": 0.9327, "step": 5452 }, { "epoch": 0.822473604826546, "grad_norm": 1.9222733974456787, "learning_rate": 7.804239195245244e-06, "loss": 1.1921, "step": 5453 }, { "epoch": 0.8226244343891402, "grad_norm": 1.6387635469436646, "learning_rate": 7.791339095530747e-06, "loss": 0.8099, "step": 5454 }, { "epoch": 0.8227752639517345, "grad_norm": 1.8643745183944702, "learning_rate": 7.778448765374996e-06, "loss": 1.1404, "step": 5455 }, { "epoch": 0.8229260935143288, "grad_norm": 1.8442468643188477, "learning_rate": 7.765568207761576e-06, "loss": 1.058, "step": 5456 }, { "epoch": 0.823076923076923, "grad_norm": 1.6654711961746216, "learning_rate": 7.752697425671779e-06, "loss": 0.9528, "step": 5457 }, { "epoch": 0.8232277526395173, "grad_norm": 2.0676045417785645, "learning_rate": 7.739836422084673e-06, "loss": 1.1321, "step": 5458 }, { "epoch": 0.8233785822021116, "grad_norm": 1.7883180379867554, "learning_rate": 7.72698519997705e-06, "loss": 0.8264, "step": 5459 }, { "epoch": 0.8235294117647058, "grad_norm": 1.8603087663650513, "learning_rate": 7.714143762323434e-06, "loss": 1.0152, "step": 5460 }, { "epoch": 0.8236802413273001, "grad_norm": 1.7909752130508423, "learning_rate": 7.701312112096083e-06, "loss": 1.002, "step": 5461 }, { "epoch": 0.8238310708898944, "grad_norm": 2.0984034538269043, "learning_rate": 7.688490252265001e-06, "loss": 1.3896, "step": 5462 }, { "epoch": 0.8239819004524886, "grad_norm": 1.9270302057266235, "learning_rate": 7.675678185797914e-06, "loss": 1.1951, "step": 5463 }, { "epoch": 0.8241327300150829, "grad_norm": 1.662935495376587, "learning_rate": 7.662875915660277e-06, "loss": 0.9568, "step": 5464 }, { "epoch": 0.8242835595776772, "grad_norm": 1.7076441049575806, "learning_rate": 7.650083444815303e-06, "loss": 0.9029, "step": 5465 }, { "epoch": 0.8244343891402715, "grad_norm": 1.9622083902359009, "learning_rate": 7.63730077622392e-06, "loss": 1.2832, "step": 5466 }, { "epoch": 0.8245852187028657, "grad_norm": 1.9910181760787964, "learning_rate": 7.624527912844765e-06, "loss": 1.1427, "step": 5467 }, { "epoch": 0.82473604826546, "grad_norm": 1.9950573444366455, "learning_rate": 7.61176485763424e-06, "loss": 1.1963, "step": 5468 }, { "epoch": 0.8248868778280543, "grad_norm": 1.7381895780563354, "learning_rate": 7.599011613546469e-06, "loss": 0.9103, "step": 5469 }, { "epoch": 0.8250377073906485, "grad_norm": 1.7454792261123657, "learning_rate": 7.58626818353329e-06, "loss": 0.9764, "step": 5470 }, { "epoch": 0.8251885369532428, "grad_norm": 1.8211841583251953, "learning_rate": 7.573534570544288e-06, "loss": 0.8908, "step": 5471 }, { "epoch": 0.8253393665158371, "grad_norm": 1.6086565256118774, "learning_rate": 7.5608107775267665e-06, "loss": 0.8128, "step": 5472 }, { "epoch": 0.8254901960784313, "grad_norm": 2.001931667327881, "learning_rate": 7.548096807425753e-06, "loss": 1.1505, "step": 5473 }, { "epoch": 0.8256410256410256, "grad_norm": 1.680903673171997, "learning_rate": 7.53539266318401e-06, "loss": 0.9285, "step": 5474 }, { "epoch": 0.8257918552036199, "grad_norm": 2.0169754028320312, "learning_rate": 7.522698347742025e-06, "loss": 1.1477, "step": 5475 }, { "epoch": 0.8259426847662141, "grad_norm": 1.8875977993011475, "learning_rate": 7.510013864037996e-06, "loss": 0.9499, "step": 5476 }, { "epoch": 0.8260935143288084, "grad_norm": 1.7348730564117432, "learning_rate": 7.4973392150078595e-06, "loss": 0.9357, "step": 5477 }, { "epoch": 0.8262443438914027, "grad_norm": 2.0150208473205566, "learning_rate": 7.484674403585279e-06, "loss": 1.0805, "step": 5478 }, { "epoch": 0.8263951734539969, "grad_norm": 2.1274824142456055, "learning_rate": 7.472019432701627e-06, "loss": 1.0719, "step": 5479 }, { "epoch": 0.8265460030165912, "grad_norm": 2.1294009685516357, "learning_rate": 7.4593743052860095e-06, "loss": 1.1324, "step": 5480 }, { "epoch": 0.8266968325791855, "grad_norm": 1.8614637851715088, "learning_rate": 7.446739024265254e-06, "loss": 0.9451, "step": 5481 }, { "epoch": 0.8268476621417797, "grad_norm": 1.8949862718582153, "learning_rate": 7.434113592563896e-06, "loss": 1.1505, "step": 5482 }, { "epoch": 0.826998491704374, "grad_norm": 1.8136004209518433, "learning_rate": 7.421498013104206e-06, "loss": 0.9847, "step": 5483 }, { "epoch": 0.8271493212669683, "grad_norm": 1.8847849369049072, "learning_rate": 7.408892288806179e-06, "loss": 0.9289, "step": 5484 }, { "epoch": 0.8273001508295625, "grad_norm": 2.1866061687469482, "learning_rate": 7.396296422587501e-06, "loss": 1.1042, "step": 5485 }, { "epoch": 0.8274509803921568, "grad_norm": 1.82841157913208, "learning_rate": 7.383710417363598e-06, "loss": 1.1011, "step": 5486 }, { "epoch": 0.8276018099547511, "grad_norm": 2.3467183113098145, "learning_rate": 7.371134276047609e-06, "loss": 1.4192, "step": 5487 }, { "epoch": 0.8277526395173453, "grad_norm": 1.8054245710372925, "learning_rate": 7.358568001550397e-06, "loss": 0.8171, "step": 5488 }, { "epoch": 0.8279034690799396, "grad_norm": 2.2280561923980713, "learning_rate": 7.346011596780533e-06, "loss": 1.4734, "step": 5489 }, { "epoch": 0.8280542986425339, "grad_norm": 1.6886333227157593, "learning_rate": 7.333465064644301e-06, "loss": 0.888, "step": 5490 }, { "epoch": 0.8282051282051283, "grad_norm": 1.8929355144500732, "learning_rate": 7.320928408045708e-06, "loss": 1.0043, "step": 5491 }, { "epoch": 0.8283559577677225, "grad_norm": 2.056823492050171, "learning_rate": 7.308401629886469e-06, "loss": 0.997, "step": 5492 }, { "epoch": 0.8285067873303168, "grad_norm": 2.0539588928222656, "learning_rate": 7.295884733066016e-06, "loss": 1.2605, "step": 5493 }, { "epoch": 0.8286576168929111, "grad_norm": 2.558398723602295, "learning_rate": 7.2833777204815044e-06, "loss": 0.9783, "step": 5494 }, { "epoch": 0.8288084464555053, "grad_norm": 2.116084098815918, "learning_rate": 7.2708805950277684e-06, "loss": 1.0842, "step": 5495 }, { "epoch": 0.8289592760180996, "grad_norm": 2.0246479511260986, "learning_rate": 7.258393359597382e-06, "loss": 0.9018, "step": 5496 }, { "epoch": 0.8291101055806939, "grad_norm": 1.7457963228225708, "learning_rate": 7.2459160170806286e-06, "loss": 0.8366, "step": 5497 }, { "epoch": 0.8292609351432881, "grad_norm": 2.1776962280273438, "learning_rate": 7.233448570365497e-06, "loss": 1.0851, "step": 5498 }, { "epoch": 0.8294117647058824, "grad_norm": 1.6494146585464478, "learning_rate": 7.2209910223376805e-06, "loss": 0.6713, "step": 5499 }, { "epoch": 0.8295625942684767, "grad_norm": 1.830108642578125, "learning_rate": 7.208543375880594e-06, "loss": 0.8407, "step": 5500 }, { "epoch": 0.8297134238310709, "grad_norm": 1.8206536769866943, "learning_rate": 7.196105633875344e-06, "loss": 1.1208, "step": 5501 }, { "epoch": 0.8298642533936652, "grad_norm": 1.568690538406372, "learning_rate": 7.183677799200761e-06, "loss": 0.9255, "step": 5502 }, { "epoch": 0.8300150829562595, "grad_norm": 1.9780510663986206, "learning_rate": 7.171259874733377e-06, "loss": 1.3021, "step": 5503 }, { "epoch": 0.8301659125188537, "grad_norm": 1.7433146238327026, "learning_rate": 7.158851863347415e-06, "loss": 1.027, "step": 5504 }, { "epoch": 0.830316742081448, "grad_norm": 1.8642061948776245, "learning_rate": 7.146453767914818e-06, "loss": 0.9262, "step": 5505 }, { "epoch": 0.8304675716440423, "grad_norm": 2.2333462238311768, "learning_rate": 7.134065591305239e-06, "loss": 1.3649, "step": 5506 }, { "epoch": 0.8306184012066365, "grad_norm": 1.8207292556762695, "learning_rate": 7.121687336386024e-06, "loss": 0.982, "step": 5507 }, { "epoch": 0.8307692307692308, "grad_norm": 1.8620260953903198, "learning_rate": 7.10931900602223e-06, "loss": 1.2071, "step": 5508 }, { "epoch": 0.8309200603318251, "grad_norm": 1.7083748579025269, "learning_rate": 7.096960603076619e-06, "loss": 1.1036, "step": 5509 }, { "epoch": 0.8310708898944194, "grad_norm": 2.0109496116638184, "learning_rate": 7.084612130409635e-06, "loss": 1.2648, "step": 5510 }, { "epoch": 0.8312217194570136, "grad_norm": 2.0177342891693115, "learning_rate": 7.0722735908794524e-06, "loss": 1.2731, "step": 5511 }, { "epoch": 0.8313725490196079, "grad_norm": 1.930759072303772, "learning_rate": 7.059944987341927e-06, "loss": 1.1557, "step": 5512 }, { "epoch": 0.8315233785822022, "grad_norm": 2.2057390213012695, "learning_rate": 7.047626322650635e-06, "loss": 1.2505, "step": 5513 }, { "epoch": 0.8316742081447964, "grad_norm": 2.2655954360961914, "learning_rate": 7.03531759965681e-06, "loss": 1.6723, "step": 5514 }, { "epoch": 0.8318250377073907, "grad_norm": 1.9529166221618652, "learning_rate": 7.02301882120942e-06, "loss": 1.0289, "step": 5515 }, { "epoch": 0.831975867269985, "grad_norm": 2.191986560821533, "learning_rate": 7.010729990155135e-06, "loss": 1.0805, "step": 5516 }, { "epoch": 0.8321266968325792, "grad_norm": 1.951036810874939, "learning_rate": 6.998451109338305e-06, "loss": 1.1922, "step": 5517 }, { "epoch": 0.8322775263951735, "grad_norm": 2.042438268661499, "learning_rate": 6.9861821816009866e-06, "loss": 1.1466, "step": 5518 }, { "epoch": 0.8324283559577678, "grad_norm": 1.7720205783843994, "learning_rate": 6.973923209782924e-06, "loss": 0.9338, "step": 5519 }, { "epoch": 0.832579185520362, "grad_norm": 1.8521287441253662, "learning_rate": 6.9616741967215575e-06, "loss": 0.957, "step": 5520 }, { "epoch": 0.8327300150829563, "grad_norm": 1.9683830738067627, "learning_rate": 6.949435145252037e-06, "loss": 1.1628, "step": 5521 }, { "epoch": 0.8328808446455506, "grad_norm": 2.1338233947753906, "learning_rate": 6.937206058207191e-06, "loss": 1.2156, "step": 5522 }, { "epoch": 0.8330316742081448, "grad_norm": 1.8829494714736938, "learning_rate": 6.92498693841755e-06, "loss": 1.0363, "step": 5523 }, { "epoch": 0.8331825037707391, "grad_norm": 2.122664213180542, "learning_rate": 6.912777788711333e-06, "loss": 1.2363, "step": 5524 }, { "epoch": 0.8333333333333334, "grad_norm": 2.072742462158203, "learning_rate": 6.900578611914454e-06, "loss": 1.0524, "step": 5525 }, { "epoch": 0.8334841628959276, "grad_norm": 2.0280590057373047, "learning_rate": 6.8883894108505205e-06, "loss": 1.0103, "step": 5526 }, { "epoch": 0.8336349924585219, "grad_norm": 2.117727041244507, "learning_rate": 6.87621018834082e-06, "loss": 1.1894, "step": 5527 }, { "epoch": 0.8337858220211162, "grad_norm": 2.3212852478027344, "learning_rate": 6.864040947204342e-06, "loss": 1.3966, "step": 5528 }, { "epoch": 0.8339366515837104, "grad_norm": 2.2138328552246094, "learning_rate": 6.851881690257766e-06, "loss": 1.2154, "step": 5529 }, { "epoch": 0.8340874811463047, "grad_norm": 1.9478979110717773, "learning_rate": 6.839732420315459e-06, "loss": 1.1405, "step": 5530 }, { "epoch": 0.834238310708899, "grad_norm": 2.0471882820129395, "learning_rate": 6.827593140189475e-06, "loss": 1.0521, "step": 5531 }, { "epoch": 0.8343891402714932, "grad_norm": 2.409430980682373, "learning_rate": 6.815463852689541e-06, "loss": 1.365, "step": 5532 }, { "epoch": 0.8345399698340875, "grad_norm": 1.9277198314666748, "learning_rate": 6.803344560623098e-06, "loss": 0.9182, "step": 5533 }, { "epoch": 0.8346907993966818, "grad_norm": 1.9446583986282349, "learning_rate": 6.791235266795254e-06, "loss": 1.008, "step": 5534 }, { "epoch": 0.834841628959276, "grad_norm": 1.9156359434127808, "learning_rate": 6.779135974008815e-06, "loss": 0.9815, "step": 5535 }, { "epoch": 0.8349924585218703, "grad_norm": 2.068669080734253, "learning_rate": 6.76704668506426e-06, "loss": 1.135, "step": 5536 }, { "epoch": 0.8351432880844646, "grad_norm": 2.1510820388793945, "learning_rate": 6.7549674027597655e-06, "loss": 1.0723, "step": 5537 }, { "epoch": 0.8352941176470589, "grad_norm": 2.0037155151367188, "learning_rate": 6.742898129891189e-06, "loss": 1.2048, "step": 5538 }, { "epoch": 0.8354449472096531, "grad_norm": 2.1139962673187256, "learning_rate": 6.730838869252054e-06, "loss": 0.972, "step": 5539 }, { "epoch": 0.8355957767722474, "grad_norm": 2.1349291801452637, "learning_rate": 6.718789623633598e-06, "loss": 1.3453, "step": 5540 }, { "epoch": 0.8357466063348417, "grad_norm": 2.313953161239624, "learning_rate": 6.706750395824718e-06, "loss": 1.0711, "step": 5541 }, { "epoch": 0.8358974358974359, "grad_norm": 1.9526065587997437, "learning_rate": 6.694721188611985e-06, "loss": 0.8691, "step": 5542 }, { "epoch": 0.8360482654600302, "grad_norm": 1.6814707517623901, "learning_rate": 6.682702004779673e-06, "loss": 0.7591, "step": 5543 }, { "epoch": 0.8361990950226245, "grad_norm": 1.9831913709640503, "learning_rate": 6.6706928471097165e-06, "loss": 1.0416, "step": 5544 }, { "epoch": 0.8363499245852187, "grad_norm": 2.194910764694214, "learning_rate": 6.658693718381753e-06, "loss": 0.968, "step": 5545 }, { "epoch": 0.836500754147813, "grad_norm": 1.5789858102798462, "learning_rate": 6.646704621373073e-06, "loss": 0.6056, "step": 5546 }, { "epoch": 0.8366515837104073, "grad_norm": 2.1647257804870605, "learning_rate": 6.634725558858662e-06, "loss": 1.1879, "step": 5547 }, { "epoch": 0.8368024132730015, "grad_norm": 1.4959051609039307, "learning_rate": 6.622756533611179e-06, "loss": 0.6248, "step": 5548 }, { "epoch": 0.8369532428355958, "grad_norm": 1.7243115901947021, "learning_rate": 6.6107975484009484e-06, "loss": 0.8911, "step": 5549 }, { "epoch": 0.8371040723981901, "grad_norm": 2.083686351776123, "learning_rate": 6.598848605996005e-06, "loss": 0.7785, "step": 5550 }, { "epoch": 0.8372549019607843, "grad_norm": 1.9487073421478271, "learning_rate": 6.5869097091620035e-06, "loss": 1.3127, "step": 5551 }, { "epoch": 0.8374057315233786, "grad_norm": 1.7258858680725098, "learning_rate": 6.574980860662316e-06, "loss": 0.8989, "step": 5552 }, { "epoch": 0.8375565610859729, "grad_norm": 1.9679633378982544, "learning_rate": 6.563062063257985e-06, "loss": 1.145, "step": 5553 }, { "epoch": 0.8377073906485671, "grad_norm": 2.029630184173584, "learning_rate": 6.551153319707709e-06, "loss": 1.112, "step": 5554 }, { "epoch": 0.8378582202111614, "grad_norm": 2.1549150943756104, "learning_rate": 6.539254632767872e-06, "loss": 1.4004, "step": 5555 }, { "epoch": 0.8380090497737557, "grad_norm": 1.906857967376709, "learning_rate": 6.527366005192531e-06, "loss": 1.2019, "step": 5556 }, { "epoch": 0.8381598793363499, "grad_norm": 1.977435827255249, "learning_rate": 6.515487439733414e-06, "loss": 0.991, "step": 5557 }, { "epoch": 0.8383107088989442, "grad_norm": 1.9470657110214233, "learning_rate": 6.5036189391399104e-06, "loss": 1.204, "step": 5558 }, { "epoch": 0.8384615384615385, "grad_norm": 1.8289368152618408, "learning_rate": 6.491760506159101e-06, "loss": 0.9542, "step": 5559 }, { "epoch": 0.8386123680241327, "grad_norm": 1.796347975730896, "learning_rate": 6.479912143535699e-06, "loss": 0.9189, "step": 5560 }, { "epoch": 0.838763197586727, "grad_norm": 2.1334760189056396, "learning_rate": 6.468073854012125e-06, "loss": 1.4124, "step": 5561 }, { "epoch": 0.8389140271493213, "grad_norm": 1.8766220808029175, "learning_rate": 6.456245640328451e-06, "loss": 1.0877, "step": 5562 }, { "epoch": 0.8390648567119156, "grad_norm": 1.9803199768066406, "learning_rate": 6.444427505222417e-06, "loss": 0.9396, "step": 5563 }, { "epoch": 0.8392156862745098, "grad_norm": 2.1593003273010254, "learning_rate": 6.432619451429439e-06, "loss": 1.2747, "step": 5564 }, { "epoch": 0.8393665158371041, "grad_norm": 2.2477760314941406, "learning_rate": 6.420821481682581e-06, "loss": 1.6642, "step": 5565 }, { "epoch": 0.8395173453996984, "grad_norm": 1.5576802492141724, "learning_rate": 6.4090335987126e-06, "loss": 0.7748, "step": 5566 }, { "epoch": 0.8396681749622926, "grad_norm": 1.9919483661651611, "learning_rate": 6.397255805247893e-06, "loss": 0.9638, "step": 5567 }, { "epoch": 0.8398190045248869, "grad_norm": 1.9162858724594116, "learning_rate": 6.385488104014531e-06, "loss": 1.1672, "step": 5568 }, { "epoch": 0.8399698340874812, "grad_norm": 2.0695624351501465, "learning_rate": 6.373730497736258e-06, "loss": 1.1474, "step": 5569 }, { "epoch": 0.8401206636500754, "grad_norm": 1.9179046154022217, "learning_rate": 6.3619829891344684e-06, "loss": 1.0905, "step": 5570 }, { "epoch": 0.8402714932126697, "grad_norm": 1.8506765365600586, "learning_rate": 6.3502455809282214e-06, "loss": 1.0177, "step": 5571 }, { "epoch": 0.840422322775264, "grad_norm": 1.9894875288009644, "learning_rate": 6.3385182758342485e-06, "loss": 0.9377, "step": 5572 }, { "epoch": 0.8405731523378582, "grad_norm": 2.0413002967834473, "learning_rate": 6.3268010765669295e-06, "loss": 1.058, "step": 5573 }, { "epoch": 0.8407239819004525, "grad_norm": 1.8701375722885132, "learning_rate": 6.315093985838311e-06, "loss": 1.1267, "step": 5574 }, { "epoch": 0.8408748114630468, "grad_norm": 1.9019427299499512, "learning_rate": 6.303397006358109e-06, "loss": 0.9406, "step": 5575 }, { "epoch": 0.841025641025641, "grad_norm": 1.9701522588729858, "learning_rate": 6.291710140833679e-06, "loss": 1.1075, "step": 5576 }, { "epoch": 0.8411764705882353, "grad_norm": 1.7889502048492432, "learning_rate": 6.280033391970058e-06, "loss": 1.0467, "step": 5577 }, { "epoch": 0.8413273001508296, "grad_norm": 2.1474647521972656, "learning_rate": 6.268366762469929e-06, "loss": 1.1837, "step": 5578 }, { "epoch": 0.8414781297134238, "grad_norm": 1.9649975299835205, "learning_rate": 6.256710255033621e-06, "loss": 0.9487, "step": 5579 }, { "epoch": 0.8416289592760181, "grad_norm": 2.1389734745025635, "learning_rate": 6.245063872359141e-06, "loss": 1.1356, "step": 5580 }, { "epoch": 0.8417797888386124, "grad_norm": 1.6781351566314697, "learning_rate": 6.233427617142146e-06, "loss": 0.8115, "step": 5581 }, { "epoch": 0.8419306184012066, "grad_norm": 2.0316593647003174, "learning_rate": 6.221801492075952e-06, "loss": 1.1533, "step": 5582 }, { "epoch": 0.8420814479638009, "grad_norm": 1.9056313037872314, "learning_rate": 6.2101854998515115e-06, "loss": 0.9804, "step": 5583 }, { "epoch": 0.8422322775263952, "grad_norm": 1.6533184051513672, "learning_rate": 6.198579643157459e-06, "loss": 0.8668, "step": 5584 }, { "epoch": 0.8423831070889894, "grad_norm": 1.7879443168640137, "learning_rate": 6.186983924680073e-06, "loss": 1.0244, "step": 5585 }, { "epoch": 0.8425339366515837, "grad_norm": 2.4195351600646973, "learning_rate": 6.175398347103267e-06, "loss": 1.4309, "step": 5586 }, { "epoch": 0.842684766214178, "grad_norm": 1.8181878328323364, "learning_rate": 6.163822913108652e-06, "loss": 0.9464, "step": 5587 }, { "epoch": 0.8428355957767723, "grad_norm": 1.788222074508667, "learning_rate": 6.152257625375424e-06, "loss": 0.9393, "step": 5588 }, { "epoch": 0.8429864253393665, "grad_norm": 2.0080509185791016, "learning_rate": 6.1407024865804906e-06, "loss": 0.959, "step": 5589 }, { "epoch": 0.8431372549019608, "grad_norm": 2.1736979484558105, "learning_rate": 6.129157499398386e-06, "loss": 1.1636, "step": 5590 }, { "epoch": 0.843288084464555, "grad_norm": 1.859155535697937, "learning_rate": 6.117622666501294e-06, "loss": 0.9233, "step": 5591 }, { "epoch": 0.8434389140271493, "grad_norm": 2.180818796157837, "learning_rate": 6.106097990559051e-06, "loss": 1.1771, "step": 5592 }, { "epoch": 0.8435897435897436, "grad_norm": 2.368051290512085, "learning_rate": 6.094583474239146e-06, "loss": 1.3566, "step": 5593 }, { "epoch": 0.8437405731523379, "grad_norm": 1.806279182434082, "learning_rate": 6.083079120206714e-06, "loss": 0.7897, "step": 5594 }, { "epoch": 0.8438914027149321, "grad_norm": 1.7711763381958008, "learning_rate": 6.071584931124535e-06, "loss": 0.8121, "step": 5595 }, { "epoch": 0.8440422322775264, "grad_norm": 1.239006519317627, "learning_rate": 6.060100909653032e-06, "loss": 0.4455, "step": 5596 }, { "epoch": 0.8441930618401207, "grad_norm": 1.3940811157226562, "learning_rate": 6.048627058450307e-06, "loss": 0.4808, "step": 5597 }, { "epoch": 0.8443438914027149, "grad_norm": 1.9646434783935547, "learning_rate": 6.037163380172045e-06, "loss": 1.0395, "step": 5598 }, { "epoch": 0.8444947209653092, "grad_norm": 1.8274394273757935, "learning_rate": 6.02570987747163e-06, "loss": 0.9761, "step": 5599 }, { "epoch": 0.8446455505279035, "grad_norm": 2.00116229057312, "learning_rate": 6.014266553000075e-06, "loss": 1.1578, "step": 5600 }, { "epoch": 0.8447963800904977, "grad_norm": 1.5707238912582397, "learning_rate": 6.002833409406039e-06, "loss": 0.9754, "step": 5601 }, { "epoch": 0.844947209653092, "grad_norm": 1.839948058128357, "learning_rate": 5.9914104493358105e-06, "loss": 1.0698, "step": 5602 }, { "epoch": 0.8450980392156863, "grad_norm": 2.067633628845215, "learning_rate": 5.979997675433347e-06, "loss": 1.391, "step": 5603 }, { "epoch": 0.8452488687782805, "grad_norm": 1.9193109273910522, "learning_rate": 5.968595090340223e-06, "loss": 1.2613, "step": 5604 }, { "epoch": 0.8453996983408748, "grad_norm": 2.0559580326080322, "learning_rate": 5.957202696695668e-06, "loss": 1.3449, "step": 5605 }, { "epoch": 0.8455505279034691, "grad_norm": 1.925458312034607, "learning_rate": 5.945820497136562e-06, "loss": 1.1148, "step": 5606 }, { "epoch": 0.8457013574660633, "grad_norm": 1.9200596809387207, "learning_rate": 5.93444849429739e-06, "loss": 1.0853, "step": 5607 }, { "epoch": 0.8458521870286576, "grad_norm": 2.030641555786133, "learning_rate": 5.923086690810309e-06, "loss": 1.397, "step": 5608 }, { "epoch": 0.8460030165912519, "grad_norm": 1.9053107500076294, "learning_rate": 5.911735089305115e-06, "loss": 1.1817, "step": 5609 }, { "epoch": 0.8461538461538461, "grad_norm": 1.8945577144622803, "learning_rate": 5.900393692409223e-06, "loss": 1.0761, "step": 5610 }, { "epoch": 0.8463046757164404, "grad_norm": 2.1278436183929443, "learning_rate": 5.889062502747705e-06, "loss": 1.4088, "step": 5611 }, { "epoch": 0.8464555052790347, "grad_norm": 1.5493136644363403, "learning_rate": 5.8777415229432586e-06, "loss": 0.7617, "step": 5612 }, { "epoch": 0.846606334841629, "grad_norm": 1.8716434240341187, "learning_rate": 5.86643075561622e-06, "loss": 1.0362, "step": 5613 }, { "epoch": 0.8467571644042232, "grad_norm": 1.7003974914550781, "learning_rate": 5.855130203384574e-06, "loss": 0.8062, "step": 5614 }, { "epoch": 0.8469079939668175, "grad_norm": 1.66433846950531, "learning_rate": 5.84383986886392e-06, "loss": 0.7763, "step": 5615 }, { "epoch": 0.8470588235294118, "grad_norm": 1.8128176927566528, "learning_rate": 5.8325597546675104e-06, "loss": 1.0224, "step": 5616 }, { "epoch": 0.847209653092006, "grad_norm": 2.020784616470337, "learning_rate": 5.821289863406221e-06, "loss": 1.11, "step": 5617 }, { "epoch": 0.8473604826546003, "grad_norm": 1.7697383165359497, "learning_rate": 5.8100301976885705e-06, "loss": 0.9718, "step": 5618 }, { "epoch": 0.8475113122171946, "grad_norm": 1.9689781665802002, "learning_rate": 5.798780760120698e-06, "loss": 1.0792, "step": 5619 }, { "epoch": 0.8476621417797888, "grad_norm": 2.195549726486206, "learning_rate": 5.787541553306386e-06, "loss": 1.3147, "step": 5620 }, { "epoch": 0.8478129713423831, "grad_norm": 1.7548563480377197, "learning_rate": 5.776312579847048e-06, "loss": 0.9528, "step": 5621 }, { "epoch": 0.8479638009049774, "grad_norm": 1.7462940216064453, "learning_rate": 5.765093842341723e-06, "loss": 0.8971, "step": 5622 }, { "epoch": 0.8481146304675716, "grad_norm": 1.9339689016342163, "learning_rate": 5.753885343387094e-06, "loss": 0.9877, "step": 5623 }, { "epoch": 0.8482654600301659, "grad_norm": 1.8955289125442505, "learning_rate": 5.742687085577453e-06, "loss": 0.9787, "step": 5624 }, { "epoch": 0.8484162895927602, "grad_norm": 1.948049545288086, "learning_rate": 5.731499071504748e-06, "loss": 1.1421, "step": 5625 }, { "epoch": 0.8485671191553544, "grad_norm": 2.133249521255493, "learning_rate": 5.720321303758519e-06, "loss": 1.2067, "step": 5626 }, { "epoch": 0.8487179487179487, "grad_norm": 1.7371869087219238, "learning_rate": 5.709153784925974e-06, "loss": 0.7479, "step": 5627 }, { "epoch": 0.848868778280543, "grad_norm": 2.299769639968872, "learning_rate": 5.697996517591925e-06, "loss": 1.3187, "step": 5628 }, { "epoch": 0.8490196078431372, "grad_norm": 1.736015796661377, "learning_rate": 5.68684950433882e-06, "loss": 0.972, "step": 5629 }, { "epoch": 0.8491704374057315, "grad_norm": 1.9687845706939697, "learning_rate": 5.675712747746731e-06, "loss": 1.1958, "step": 5630 }, { "epoch": 0.8493212669683258, "grad_norm": 1.8159267902374268, "learning_rate": 5.664586250393355e-06, "loss": 0.9529, "step": 5631 }, { "epoch": 0.84947209653092, "grad_norm": 1.918392539024353, "learning_rate": 5.653470014854018e-06, "loss": 1.0601, "step": 5632 }, { "epoch": 0.8496229260935143, "grad_norm": 2.0483970642089844, "learning_rate": 5.64236404370167e-06, "loss": 1.2442, "step": 5633 }, { "epoch": 0.8497737556561086, "grad_norm": 1.9656226634979248, "learning_rate": 5.63126833950689e-06, "loss": 0.9342, "step": 5634 }, { "epoch": 0.8499245852187028, "grad_norm": 2.011932849884033, "learning_rate": 5.620182904837856e-06, "loss": 1.004, "step": 5635 }, { "epoch": 0.8500754147812971, "grad_norm": 1.8361783027648926, "learning_rate": 5.6091077422604e-06, "loss": 0.9732, "step": 5636 }, { "epoch": 0.8502262443438914, "grad_norm": 1.896653175354004, "learning_rate": 5.59804285433796e-06, "loss": 0.788, "step": 5637 }, { "epoch": 0.8503770739064856, "grad_norm": 1.8385467529296875, "learning_rate": 5.586988243631603e-06, "loss": 0.8885, "step": 5638 }, { "epoch": 0.8505279034690799, "grad_norm": 1.755094051361084, "learning_rate": 5.57594391270001e-06, "loss": 0.8217, "step": 5639 }, { "epoch": 0.8506787330316742, "grad_norm": 1.9699161052703857, "learning_rate": 5.564909864099493e-06, "loss": 1.0866, "step": 5640 }, { "epoch": 0.8508295625942685, "grad_norm": 1.8227176666259766, "learning_rate": 5.5538861003839705e-06, "loss": 0.8607, "step": 5641 }, { "epoch": 0.8509803921568627, "grad_norm": 2.312657594680786, "learning_rate": 5.5428726241049945e-06, "loss": 1.3649, "step": 5642 }, { "epoch": 0.851131221719457, "grad_norm": 2.00703763961792, "learning_rate": 5.5318694378117205e-06, "loss": 1.0863, "step": 5643 }, { "epoch": 0.8512820512820513, "grad_norm": 2.0080652236938477, "learning_rate": 5.520876544050951e-06, "loss": 1.0493, "step": 5644 }, { "epoch": 0.8514328808446455, "grad_norm": 1.6152480840682983, "learning_rate": 5.509893945367062e-06, "loss": 0.8152, "step": 5645 }, { "epoch": 0.8515837104072398, "grad_norm": 1.8678443431854248, "learning_rate": 5.498921644302074e-06, "loss": 0.746, "step": 5646 }, { "epoch": 0.8517345399698341, "grad_norm": 1.8625869750976562, "learning_rate": 5.48795964339563e-06, "loss": 0.7955, "step": 5647 }, { "epoch": 0.8518853695324283, "grad_norm": 1.6935317516326904, "learning_rate": 5.477007945184976e-06, "loss": 0.7537, "step": 5648 }, { "epoch": 0.8520361990950226, "grad_norm": 1.5343499183654785, "learning_rate": 5.466066552204979e-06, "loss": 0.7379, "step": 5649 }, { "epoch": 0.8521870286576169, "grad_norm": 1.6383146047592163, "learning_rate": 5.455135466988115e-06, "loss": 0.603, "step": 5650 }, { "epoch": 0.8523378582202111, "grad_norm": 1.696823239326477, "learning_rate": 5.444214692064486e-06, "loss": 1.1464, "step": 5651 }, { "epoch": 0.8524886877828054, "grad_norm": 1.8519818782806396, "learning_rate": 5.433304229961788e-06, "loss": 0.9901, "step": 5652 }, { "epoch": 0.8526395173453997, "grad_norm": 1.8885829448699951, "learning_rate": 5.422404083205357e-06, "loss": 1.1309, "step": 5653 }, { "epoch": 0.8527903469079939, "grad_norm": 2.1067984104156494, "learning_rate": 5.411514254318112e-06, "loss": 1.147, "step": 5654 }, { "epoch": 0.8529411764705882, "grad_norm": 1.6003172397613525, "learning_rate": 5.4006347458205985e-06, "loss": 0.7227, "step": 5655 }, { "epoch": 0.8530920060331825, "grad_norm": 1.8867164850234985, "learning_rate": 5.389765560230975e-06, "loss": 1.0349, "step": 5656 }, { "epoch": 0.8532428355957767, "grad_norm": 1.9130290746688843, "learning_rate": 5.378906700065012e-06, "loss": 1.1676, "step": 5657 }, { "epoch": 0.853393665158371, "grad_norm": 1.537973165512085, "learning_rate": 5.3680581678360875e-06, "loss": 0.709, "step": 5658 }, { "epoch": 0.8535444947209653, "grad_norm": 1.8500640392303467, "learning_rate": 5.357219966055177e-06, "loss": 1.0231, "step": 5659 }, { "epoch": 0.8536953242835595, "grad_norm": 1.8888369798660278, "learning_rate": 5.34639209723089e-06, "loss": 1.0752, "step": 5660 }, { "epoch": 0.8538461538461538, "grad_norm": 1.7443588972091675, "learning_rate": 5.335574563869416e-06, "loss": 1.0395, "step": 5661 }, { "epoch": 0.8539969834087481, "grad_norm": 2.164952278137207, "learning_rate": 5.324767368474581e-06, "loss": 1.4712, "step": 5662 }, { "epoch": 0.8541478129713423, "grad_norm": 1.6477583646774292, "learning_rate": 5.313970513547794e-06, "loss": 0.9582, "step": 5663 }, { "epoch": 0.8542986425339366, "grad_norm": 2.1709041595458984, "learning_rate": 5.303184001588085e-06, "loss": 1.1881, "step": 5664 }, { "epoch": 0.8544494720965309, "grad_norm": 1.673644781112671, "learning_rate": 5.292407835092078e-06, "loss": 0.8027, "step": 5665 }, { "epoch": 0.8546003016591251, "grad_norm": 2.0416486263275146, "learning_rate": 5.281642016554017e-06, "loss": 1.2118, "step": 5666 }, { "epoch": 0.8547511312217194, "grad_norm": 1.719322681427002, "learning_rate": 5.2708865484657425e-06, "loss": 0.8098, "step": 5667 }, { "epoch": 0.8549019607843137, "grad_norm": 1.858963966369629, "learning_rate": 5.260141433316701e-06, "loss": 1.0345, "step": 5668 }, { "epoch": 0.855052790346908, "grad_norm": 2.131526470184326, "learning_rate": 5.249406673593937e-06, "loss": 1.1452, "step": 5669 }, { "epoch": 0.8552036199095022, "grad_norm": 1.8129717111587524, "learning_rate": 5.238682271782103e-06, "loss": 0.7989, "step": 5670 }, { "epoch": 0.8553544494720965, "grad_norm": 1.8939110040664673, "learning_rate": 5.227968230363467e-06, "loss": 0.862, "step": 5671 }, { "epoch": 0.8555052790346908, "grad_norm": 1.8369245529174805, "learning_rate": 5.21726455181788e-06, "loss": 1.0432, "step": 5672 }, { "epoch": 0.855656108597285, "grad_norm": 1.635174036026001, "learning_rate": 5.206571238622787e-06, "loss": 0.7756, "step": 5673 }, { "epoch": 0.8558069381598793, "grad_norm": 1.9286019802093506, "learning_rate": 5.195888293253265e-06, "loss": 1.0885, "step": 5674 }, { "epoch": 0.8559577677224736, "grad_norm": 2.0594444274902344, "learning_rate": 5.1852157181819626e-06, "loss": 1.0401, "step": 5675 }, { "epoch": 0.8561085972850678, "grad_norm": 2.4820873737335205, "learning_rate": 5.1745535158791495e-06, "loss": 1.6417, "step": 5676 }, { "epoch": 0.8562594268476621, "grad_norm": 1.7239503860473633, "learning_rate": 5.163901688812672e-06, "loss": 0.651, "step": 5677 }, { "epoch": 0.8564102564102564, "grad_norm": 1.9410868883132935, "learning_rate": 5.153260239447999e-06, "loss": 0.9893, "step": 5678 }, { "epoch": 0.8565610859728506, "grad_norm": 2.2860066890716553, "learning_rate": 5.142629170248181e-06, "loss": 1.4228, "step": 5679 }, { "epoch": 0.8567119155354449, "grad_norm": 2.1011180877685547, "learning_rate": 5.132008483673873e-06, "loss": 1.2098, "step": 5680 }, { "epoch": 0.8568627450980392, "grad_norm": 1.8845988512039185, "learning_rate": 5.121398182183329e-06, "loss": 0.9872, "step": 5681 }, { "epoch": 0.8570135746606334, "grad_norm": 2.2555136680603027, "learning_rate": 5.110798268232375e-06, "loss": 1.2135, "step": 5682 }, { "epoch": 0.8571644042232277, "grad_norm": 1.742474913597107, "learning_rate": 5.10020874427447e-06, "loss": 0.8788, "step": 5683 }, { "epoch": 0.857315233785822, "grad_norm": 1.7881877422332764, "learning_rate": 5.089629612760649e-06, "loss": 0.9624, "step": 5684 }, { "epoch": 0.8574660633484162, "grad_norm": 2.554389238357544, "learning_rate": 5.079060876139541e-06, "loss": 1.5076, "step": 5685 }, { "epoch": 0.8576168929110105, "grad_norm": 2.1587963104248047, "learning_rate": 5.0685025368573665e-06, "loss": 1.325, "step": 5686 }, { "epoch": 0.8577677224736048, "grad_norm": 1.8681139945983887, "learning_rate": 5.05795459735795e-06, "loss": 0.8932, "step": 5687 }, { "epoch": 0.857918552036199, "grad_norm": 1.7009758949279785, "learning_rate": 5.047417060082704e-06, "loss": 0.6943, "step": 5688 }, { "epoch": 0.8580693815987933, "grad_norm": 2.4475057125091553, "learning_rate": 5.036889927470628e-06, "loss": 1.3974, "step": 5689 }, { "epoch": 0.8582202111613876, "grad_norm": 1.975375771522522, "learning_rate": 5.026373201958334e-06, "loss": 1.0036, "step": 5690 }, { "epoch": 0.8583710407239818, "grad_norm": 1.9572029113769531, "learning_rate": 5.0158668859799804e-06, "loss": 1.099, "step": 5691 }, { "epoch": 0.8585218702865761, "grad_norm": 2.1581666469573975, "learning_rate": 5.005370981967361e-06, "loss": 0.9868, "step": 5692 }, { "epoch": 0.8586726998491704, "grad_norm": 2.212657928466797, "learning_rate": 4.994885492349844e-06, "loss": 1.1277, "step": 5693 }, { "epoch": 0.8588235294117647, "grad_norm": 2.3052384853363037, "learning_rate": 4.98441041955438e-06, "loss": 1.267, "step": 5694 }, { "epoch": 0.8589743589743589, "grad_norm": 2.0908963680267334, "learning_rate": 4.973945766005522e-06, "loss": 0.958, "step": 5695 }, { "epoch": 0.8591251885369532, "grad_norm": 1.9706727266311646, "learning_rate": 4.963491534125397e-06, "loss": 0.9088, "step": 5696 }, { "epoch": 0.8592760180995475, "grad_norm": 1.9289089441299438, "learning_rate": 4.953047726333737e-06, "loss": 0.8879, "step": 5697 }, { "epoch": 0.8594268476621418, "grad_norm": 1.880107045173645, "learning_rate": 4.942614345047841e-06, "loss": 1.0729, "step": 5698 }, { "epoch": 0.8595776772247361, "grad_norm": 2.072394609451294, "learning_rate": 4.932191392682611e-06, "loss": 1.0555, "step": 5699 }, { "epoch": 0.8597285067873304, "grad_norm": 1.953511118888855, "learning_rate": 4.92177887165054e-06, "loss": 0.8168, "step": 5700 }, { "epoch": 0.8598793363499246, "grad_norm": 1.9078805446624756, "learning_rate": 4.911376784361671e-06, "loss": 1.0755, "step": 5701 }, { "epoch": 0.8600301659125189, "grad_norm": 1.737362265586853, "learning_rate": 4.900985133223673e-06, "loss": 0.9128, "step": 5702 }, { "epoch": 0.8601809954751132, "grad_norm": 2.0313398838043213, "learning_rate": 4.890603920641778e-06, "loss": 1.4688, "step": 5703 }, { "epoch": 0.8603318250377074, "grad_norm": 1.9347751140594482, "learning_rate": 4.8802331490188145e-06, "loss": 1.1594, "step": 5704 }, { "epoch": 0.8604826546003017, "grad_norm": 1.8824583292007446, "learning_rate": 4.869872820755178e-06, "loss": 1.0473, "step": 5705 }, { "epoch": 0.860633484162896, "grad_norm": 2.027435064315796, "learning_rate": 4.8595229382488615e-06, "loss": 1.4752, "step": 5706 }, { "epoch": 0.8607843137254902, "grad_norm": 1.693039059638977, "learning_rate": 4.8491835038954325e-06, "loss": 0.9233, "step": 5707 }, { "epoch": 0.8609351432880845, "grad_norm": 1.850961685180664, "learning_rate": 4.838854520088043e-06, "loss": 0.9055, "step": 5708 }, { "epoch": 0.8610859728506788, "grad_norm": 1.951121211051941, "learning_rate": 4.828535989217425e-06, "loss": 1.2731, "step": 5709 }, { "epoch": 0.861236802413273, "grad_norm": 2.145052671432495, "learning_rate": 4.818227913671891e-06, "loss": 1.4053, "step": 5710 }, { "epoch": 0.8613876319758673, "grad_norm": 1.711506724357605, "learning_rate": 4.8079302958373315e-06, "loss": 1.0404, "step": 5711 }, { "epoch": 0.8615384615384616, "grad_norm": 1.6250860691070557, "learning_rate": 4.79764313809723e-06, "loss": 0.7764, "step": 5712 }, { "epoch": 0.8616892911010559, "grad_norm": 1.8096845149993896, "learning_rate": 4.787366442832625e-06, "loss": 0.9501, "step": 5713 }, { "epoch": 0.8618401206636501, "grad_norm": 1.9882382154464722, "learning_rate": 4.7771002124221535e-06, "loss": 1.2563, "step": 5714 }, { "epoch": 0.8619909502262444, "grad_norm": 1.9545793533325195, "learning_rate": 4.76684444924202e-06, "loss": 1.1281, "step": 5715 }, { "epoch": 0.8621417797888387, "grad_norm": 1.987815499305725, "learning_rate": 4.756599155666008e-06, "loss": 1.0079, "step": 5716 }, { "epoch": 0.8622926093514329, "grad_norm": 1.7733705043792725, "learning_rate": 4.746364334065484e-06, "loss": 1.0038, "step": 5717 }, { "epoch": 0.8624434389140272, "grad_norm": 1.976012110710144, "learning_rate": 4.736139986809396e-06, "loss": 1.1856, "step": 5718 }, { "epoch": 0.8625942684766215, "grad_norm": 1.8400614261627197, "learning_rate": 4.725926116264229e-06, "loss": 0.8203, "step": 5719 }, { "epoch": 0.8627450980392157, "grad_norm": 1.7314363718032837, "learning_rate": 4.715722724794092e-06, "loss": 0.9568, "step": 5720 }, { "epoch": 0.86289592760181, "grad_norm": 1.8765857219696045, "learning_rate": 4.70552981476064e-06, "loss": 1.0251, "step": 5721 }, { "epoch": 0.8630467571644043, "grad_norm": 2.0951194763183594, "learning_rate": 4.695347388523113e-06, "loss": 1.1363, "step": 5722 }, { "epoch": 0.8631975867269985, "grad_norm": 1.628471851348877, "learning_rate": 4.685175448438323e-06, "loss": 0.8843, "step": 5723 }, { "epoch": 0.8633484162895928, "grad_norm": 1.7305693626403809, "learning_rate": 4.6750139968606495e-06, "loss": 0.8867, "step": 5724 }, { "epoch": 0.8634992458521871, "grad_norm": 1.9542447328567505, "learning_rate": 4.6648630361420535e-06, "loss": 1.0589, "step": 5725 }, { "epoch": 0.8636500754147813, "grad_norm": 1.8059319257736206, "learning_rate": 4.654722568632059e-06, "loss": 0.964, "step": 5726 }, { "epoch": 0.8638009049773756, "grad_norm": 1.9875363111495972, "learning_rate": 4.644592596677766e-06, "loss": 1.0046, "step": 5727 }, { "epoch": 0.8639517345399699, "grad_norm": 1.8944685459136963, "learning_rate": 4.63447312262385e-06, "loss": 0.9542, "step": 5728 }, { "epoch": 0.8641025641025641, "grad_norm": 2.1150381565093994, "learning_rate": 4.624364148812538e-06, "loss": 1.1926, "step": 5729 }, { "epoch": 0.8642533936651584, "grad_norm": 1.9295871257781982, "learning_rate": 4.61426567758364e-06, "loss": 0.8106, "step": 5730 }, { "epoch": 0.8644042232277527, "grad_norm": 1.8193904161453247, "learning_rate": 4.604177711274543e-06, "loss": 0.876, "step": 5731 }, { "epoch": 0.864555052790347, "grad_norm": 2.0116233825683594, "learning_rate": 4.594100252220185e-06, "loss": 0.993, "step": 5732 }, { "epoch": 0.8647058823529412, "grad_norm": 2.109434127807617, "learning_rate": 4.584033302753088e-06, "loss": 1.1749, "step": 5733 }, { "epoch": 0.8648567119155355, "grad_norm": 2.18986177444458, "learning_rate": 4.573976865203334e-06, "loss": 1.0964, "step": 5734 }, { "epoch": 0.8650075414781297, "grad_norm": 2.1005072593688965, "learning_rate": 4.56393094189857e-06, "loss": 1.1586, "step": 5735 }, { "epoch": 0.865158371040724, "grad_norm": 1.9242606163024902, "learning_rate": 4.55389553516401e-06, "loss": 1.0215, "step": 5736 }, { "epoch": 0.8653092006033183, "grad_norm": 1.7494606971740723, "learning_rate": 4.543870647322446e-06, "loss": 0.8635, "step": 5737 }, { "epoch": 0.8654600301659126, "grad_norm": 2.020489454269409, "learning_rate": 4.53385628069421e-06, "loss": 1.156, "step": 5738 }, { "epoch": 0.8656108597285068, "grad_norm": 2.0114216804504395, "learning_rate": 4.52385243759722e-06, "loss": 1.1892, "step": 5739 }, { "epoch": 0.8657616892911011, "grad_norm": 2.173281669616699, "learning_rate": 4.513859120346947e-06, "loss": 1.1446, "step": 5740 }, { "epoch": 0.8659125188536954, "grad_norm": 2.258355140686035, "learning_rate": 4.50387633125644e-06, "loss": 1.2399, "step": 5741 }, { "epoch": 0.8660633484162896, "grad_norm": 1.6867848634719849, "learning_rate": 4.493904072636296e-06, "loss": 0.8834, "step": 5742 }, { "epoch": 0.8662141779788839, "grad_norm": 1.8515194654464722, "learning_rate": 4.483942346794678e-06, "loss": 0.8548, "step": 5743 }, { "epoch": 0.8663650075414782, "grad_norm": 2.19108510017395, "learning_rate": 4.4739911560373215e-06, "loss": 1.0601, "step": 5744 }, { "epoch": 0.8665158371040724, "grad_norm": 1.743483304977417, "learning_rate": 4.464050502667505e-06, "loss": 0.8583, "step": 5745 }, { "epoch": 0.8666666666666667, "grad_norm": 1.818824052810669, "learning_rate": 4.454120388986094e-06, "loss": 0.7881, "step": 5746 }, { "epoch": 0.866817496229261, "grad_norm": 1.8078246116638184, "learning_rate": 4.444200817291483e-06, "loss": 0.8491, "step": 5747 }, { "epoch": 0.8669683257918552, "grad_norm": 1.7801952362060547, "learning_rate": 4.434291789879647e-06, "loss": 0.8228, "step": 5748 }, { "epoch": 0.8671191553544495, "grad_norm": 1.9403178691864014, "learning_rate": 4.4243933090441115e-06, "loss": 0.9444, "step": 5749 }, { "epoch": 0.8672699849170438, "grad_norm": 1.7786520719528198, "learning_rate": 4.414505377075978e-06, "loss": 0.8137, "step": 5750 }, { "epoch": 0.867420814479638, "grad_norm": 1.6894053220748901, "learning_rate": 4.404627996263883e-06, "loss": 1.0612, "step": 5751 }, { "epoch": 0.8675716440422323, "grad_norm": 1.7900571823120117, "learning_rate": 4.39476116889404e-06, "loss": 0.8956, "step": 5752 }, { "epoch": 0.8677224736048266, "grad_norm": 1.8312336206436157, "learning_rate": 4.384904897250203e-06, "loss": 1.0937, "step": 5753 }, { "epoch": 0.8678733031674208, "grad_norm": 1.9141660928726196, "learning_rate": 4.3750591836136945e-06, "loss": 1.0494, "step": 5754 }, { "epoch": 0.8680241327300151, "grad_norm": 1.9202622175216675, "learning_rate": 4.365224030263388e-06, "loss": 1.1157, "step": 5755 }, { "epoch": 0.8681749622926094, "grad_norm": 2.0084421634674072, "learning_rate": 4.355399439475722e-06, "loss": 1.1599, "step": 5756 }, { "epoch": 0.8683257918552036, "grad_norm": 1.9229459762573242, "learning_rate": 4.345585413524672e-06, "loss": 1.1553, "step": 5757 }, { "epoch": 0.8684766214177979, "grad_norm": 1.632943034172058, "learning_rate": 4.335781954681789e-06, "loss": 0.8348, "step": 5758 }, { "epoch": 0.8686274509803922, "grad_norm": 1.7401405572891235, "learning_rate": 4.325989065216157e-06, "loss": 0.955, "step": 5759 }, { "epoch": 0.8687782805429864, "grad_norm": 2.0184574127197266, "learning_rate": 4.316206747394436e-06, "loss": 1.3721, "step": 5760 }, { "epoch": 0.8689291101055807, "grad_norm": 2.1352744102478027, "learning_rate": 4.306435003480824e-06, "loss": 1.4009, "step": 5761 }, { "epoch": 0.869079939668175, "grad_norm": 2.0127317905426025, "learning_rate": 4.296673835737075e-06, "loss": 1.4029, "step": 5762 }, { "epoch": 0.8692307692307693, "grad_norm": 2.104182004928589, "learning_rate": 4.286923246422492e-06, "loss": 1.2576, "step": 5763 }, { "epoch": 0.8693815987933635, "grad_norm": 1.7732878923416138, "learning_rate": 4.277183237793936e-06, "loss": 0.8757, "step": 5764 }, { "epoch": 0.8695324283559578, "grad_norm": 1.7321666479110718, "learning_rate": 4.267453812105826e-06, "loss": 0.8284, "step": 5765 }, { "epoch": 0.869683257918552, "grad_norm": 2.3055622577667236, "learning_rate": 4.257734971610106e-06, "loss": 1.3445, "step": 5766 }, { "epoch": 0.8698340874811463, "grad_norm": 2.1415951251983643, "learning_rate": 4.248026718556286e-06, "loss": 1.0487, "step": 5767 }, { "epoch": 0.8699849170437406, "grad_norm": 2.149322748184204, "learning_rate": 4.238329055191431e-06, "loss": 1.2881, "step": 5768 }, { "epoch": 0.8701357466063349, "grad_norm": 2.043663740158081, "learning_rate": 4.228641983760151e-06, "loss": 1.221, "step": 5769 }, { "epoch": 0.8702865761689291, "grad_norm": 2.19248366355896, "learning_rate": 4.218965506504596e-06, "loss": 1.138, "step": 5770 }, { "epoch": 0.8704374057315234, "grad_norm": 2.061392068862915, "learning_rate": 4.209299625664476e-06, "loss": 1.1735, "step": 5771 }, { "epoch": 0.8705882352941177, "grad_norm": 2.2126522064208984, "learning_rate": 4.199644343477038e-06, "loss": 1.4561, "step": 5772 }, { "epoch": 0.8707390648567119, "grad_norm": 2.2144217491149902, "learning_rate": 4.18999966217708e-06, "loss": 1.4499, "step": 5773 }, { "epoch": 0.8708898944193062, "grad_norm": 1.7779138088226318, "learning_rate": 4.1803655839969595e-06, "loss": 0.9549, "step": 5774 }, { "epoch": 0.8710407239819005, "grad_norm": 1.9193519353866577, "learning_rate": 4.1707421111665476e-06, "loss": 1.0102, "step": 5775 }, { "epoch": 0.8711915535444947, "grad_norm": 1.6548126935958862, "learning_rate": 4.161129245913292e-06, "loss": 0.8249, "step": 5776 }, { "epoch": 0.871342383107089, "grad_norm": 1.6883188486099243, "learning_rate": 4.151526990462168e-06, "loss": 0.8322, "step": 5777 }, { "epoch": 0.8714932126696833, "grad_norm": 1.6275461912155151, "learning_rate": 4.141935347035702e-06, "loss": 0.7968, "step": 5778 }, { "epoch": 0.8716440422322775, "grad_norm": 1.8778917789459229, "learning_rate": 4.132354317853965e-06, "loss": 0.8175, "step": 5779 }, { "epoch": 0.8717948717948718, "grad_norm": 2.043480157852173, "learning_rate": 4.122783905134564e-06, "loss": 1.2129, "step": 5780 }, { "epoch": 0.8719457013574661, "grad_norm": 1.9000080823898315, "learning_rate": 4.113224111092662e-06, "loss": 1.0526, "step": 5781 }, { "epoch": 0.8720965309200603, "grad_norm": 1.8623214960098267, "learning_rate": 4.103674937940949e-06, "loss": 0.9829, "step": 5782 }, { "epoch": 0.8722473604826546, "grad_norm": 1.8951624631881714, "learning_rate": 4.0941363878896636e-06, "loss": 0.9261, "step": 5783 }, { "epoch": 0.8723981900452489, "grad_norm": 1.9021334648132324, "learning_rate": 4.084608463146594e-06, "loss": 0.9966, "step": 5784 }, { "epoch": 0.8725490196078431, "grad_norm": 1.9148492813110352, "learning_rate": 4.07509116591705e-06, "loss": 1.0905, "step": 5785 }, { "epoch": 0.8726998491704374, "grad_norm": 2.4946489334106445, "learning_rate": 4.065584498403891e-06, "loss": 1.4204, "step": 5786 }, { "epoch": 0.8728506787330317, "grad_norm": 3.340550422668457, "learning_rate": 4.056088462807522e-06, "loss": 1.1373, "step": 5787 }, { "epoch": 0.873001508295626, "grad_norm": 2.228955030441284, "learning_rate": 4.046603061325888e-06, "loss": 1.2423, "step": 5788 }, { "epoch": 0.8731523378582202, "grad_norm": 2.119100332260132, "learning_rate": 4.037128296154457e-06, "loss": 1.0967, "step": 5789 }, { "epoch": 0.8733031674208145, "grad_norm": 2.711280107498169, "learning_rate": 4.027664169486251e-06, "loss": 1.6756, "step": 5790 }, { "epoch": 0.8734539969834088, "grad_norm": 2.0928170680999756, "learning_rate": 4.0182106835118195e-06, "loss": 1.0134, "step": 5791 }, { "epoch": 0.873604826546003, "grad_norm": 2.411716938018799, "learning_rate": 4.008767840419258e-06, "loss": 1.2547, "step": 5792 }, { "epoch": 0.8737556561085973, "grad_norm": 2.1515462398529053, "learning_rate": 3.999335642394203e-06, "loss": 1.0368, "step": 5793 }, { "epoch": 0.8739064856711916, "grad_norm": 2.0638837814331055, "learning_rate": 3.989914091619795e-06, "loss": 1.1602, "step": 5794 }, { "epoch": 0.8740573152337858, "grad_norm": 1.916962742805481, "learning_rate": 3.980503190276746e-06, "loss": 1.0384, "step": 5795 }, { "epoch": 0.8742081447963801, "grad_norm": 2.0601465702056885, "learning_rate": 3.971102940543292e-06, "loss": 1.0467, "step": 5796 }, { "epoch": 0.8743589743589744, "grad_norm": 1.564326286315918, "learning_rate": 3.961713344595197e-06, "loss": 0.7165, "step": 5797 }, { "epoch": 0.8745098039215686, "grad_norm": 1.706498384475708, "learning_rate": 3.952334404605773e-06, "loss": 0.7194, "step": 5798 }, { "epoch": 0.8746606334841629, "grad_norm": 1.8325831890106201, "learning_rate": 3.942966122745845e-06, "loss": 0.9116, "step": 5799 }, { "epoch": 0.8748114630467572, "grad_norm": 1.782947301864624, "learning_rate": 3.9336085011837885e-06, "loss": 0.7536, "step": 5800 }, { "epoch": 0.8749622926093514, "grad_norm": 2.164787530899048, "learning_rate": 3.924261542085506e-06, "loss": 1.611, "step": 5801 }, { "epoch": 0.8751131221719457, "grad_norm": 1.9572316408157349, "learning_rate": 3.914925247614432e-06, "loss": 1.241, "step": 5802 }, { "epoch": 0.87526395173454, "grad_norm": 1.5270518064498901, "learning_rate": 3.905599619931527e-06, "loss": 0.7094, "step": 5803 }, { "epoch": 0.8754147812971342, "grad_norm": 1.5828275680541992, "learning_rate": 3.896284661195298e-06, "loss": 0.9022, "step": 5804 }, { "epoch": 0.8755656108597285, "grad_norm": 1.6250180006027222, "learning_rate": 3.886980373561766e-06, "loss": 0.6949, "step": 5805 }, { "epoch": 0.8757164404223228, "grad_norm": 2.176971673965454, "learning_rate": 3.87768675918449e-06, "loss": 1.3465, "step": 5806 }, { "epoch": 0.875867269984917, "grad_norm": 1.775646448135376, "learning_rate": 3.868403820214556e-06, "loss": 0.9441, "step": 5807 }, { "epoch": 0.8760180995475113, "grad_norm": 1.9649572372436523, "learning_rate": 3.859131558800577e-06, "loss": 1.3612, "step": 5808 }, { "epoch": 0.8761689291101056, "grad_norm": 1.9862998723983765, "learning_rate": 3.849869977088711e-06, "loss": 1.1963, "step": 5809 }, { "epoch": 0.8763197586726998, "grad_norm": 1.8422207832336426, "learning_rate": 3.840619077222613e-06, "loss": 1.0931, "step": 5810 }, { "epoch": 0.8764705882352941, "grad_norm": 2.022975444793701, "learning_rate": 3.831378861343499e-06, "loss": 1.3669, "step": 5811 }, { "epoch": 0.8766214177978884, "grad_norm": 1.821733832359314, "learning_rate": 3.822149331590097e-06, "loss": 0.9734, "step": 5812 }, { "epoch": 0.8767722473604826, "grad_norm": 2.0091969966888428, "learning_rate": 3.8129304900986486e-06, "loss": 1.2386, "step": 5813 }, { "epoch": 0.8769230769230769, "grad_norm": 1.999679446220398, "learning_rate": 3.8037223390029353e-06, "loss": 1.0395, "step": 5814 }, { "epoch": 0.8770739064856712, "grad_norm": 1.5448946952819824, "learning_rate": 3.7945248804342694e-06, "loss": 0.7157, "step": 5815 }, { "epoch": 0.8772247360482655, "grad_norm": 1.9859123229980469, "learning_rate": 3.785338116521486e-06, "loss": 1.0307, "step": 5816 }, { "epoch": 0.8773755656108597, "grad_norm": 1.529547095298767, "learning_rate": 3.77616204939093e-06, "loss": 0.6796, "step": 5817 }, { "epoch": 0.877526395173454, "grad_norm": 1.9754270315170288, "learning_rate": 3.7669966811664905e-06, "loss": 1.0652, "step": 5818 }, { "epoch": 0.8776772247360483, "grad_norm": 1.9937409162521362, "learning_rate": 3.757842013969565e-06, "loss": 0.8844, "step": 5819 }, { "epoch": 0.8778280542986425, "grad_norm": 1.8071457147598267, "learning_rate": 3.7486980499190805e-06, "loss": 0.9054, "step": 5820 }, { "epoch": 0.8779788838612368, "grad_norm": 1.8110870122909546, "learning_rate": 3.7395647911315e-06, "loss": 1.0198, "step": 5821 }, { "epoch": 0.8781297134238311, "grad_norm": 2.1905901432037354, "learning_rate": 3.730442239720777e-06, "loss": 1.2583, "step": 5822 }, { "epoch": 0.8782805429864253, "grad_norm": 2.086935043334961, "learning_rate": 3.7213303977984115e-06, "loss": 1.2643, "step": 5823 }, { "epoch": 0.8784313725490196, "grad_norm": 1.8567173480987549, "learning_rate": 3.7122292674734162e-06, "loss": 0.8633, "step": 5824 }, { "epoch": 0.8785822021116139, "grad_norm": 1.8386602401733398, "learning_rate": 3.703138850852328e-06, "loss": 0.7873, "step": 5825 }, { "epoch": 0.8787330316742081, "grad_norm": 1.7417104244232178, "learning_rate": 3.694059150039203e-06, "loss": 1.0283, "step": 5826 }, { "epoch": 0.8788838612368024, "grad_norm": 2.219000816345215, "learning_rate": 3.6849901671356158e-06, "loss": 1.2213, "step": 5827 }, { "epoch": 0.8790346907993967, "grad_norm": 1.9336496591567993, "learning_rate": 3.6759319042406594e-06, "loss": 1.1145, "step": 5828 }, { "epoch": 0.8791855203619909, "grad_norm": 1.6486159563064575, "learning_rate": 3.6668843634509452e-06, "loss": 0.8647, "step": 5829 }, { "epoch": 0.8793363499245852, "grad_norm": 2.1549086570739746, "learning_rate": 3.65784754686061e-06, "loss": 1.1222, "step": 5830 }, { "epoch": 0.8794871794871795, "grad_norm": 2.13627552986145, "learning_rate": 3.6488214565613076e-06, "loss": 1.1785, "step": 5831 }, { "epoch": 0.8796380090497737, "grad_norm": 1.7999184131622314, "learning_rate": 3.6398060946421896e-06, "loss": 0.9205, "step": 5832 }, { "epoch": 0.879788838612368, "grad_norm": 1.8619191646575928, "learning_rate": 3.630801463189948e-06, "loss": 1.1609, "step": 5833 }, { "epoch": 0.8799396681749623, "grad_norm": 1.8087142705917358, "learning_rate": 3.621807564288776e-06, "loss": 0.8975, "step": 5834 }, { "epoch": 0.8800904977375565, "grad_norm": 1.8421745300292969, "learning_rate": 3.612824400020398e-06, "loss": 0.9344, "step": 5835 }, { "epoch": 0.8802413273001508, "grad_norm": 1.8136096000671387, "learning_rate": 3.603851972464045e-06, "loss": 0.9471, "step": 5836 }, { "epoch": 0.8803921568627451, "grad_norm": 2.088264226913452, "learning_rate": 3.5948902836964505e-06, "loss": 1.1176, "step": 5837 }, { "epoch": 0.8805429864253393, "grad_norm": 2.0409884452819824, "learning_rate": 3.585939335791888e-06, "loss": 1.0124, "step": 5838 }, { "epoch": 0.8806938159879336, "grad_norm": 2.2946279048919678, "learning_rate": 3.5769991308221283e-06, "loss": 1.2715, "step": 5839 }, { "epoch": 0.8808446455505279, "grad_norm": 2.318822145462036, "learning_rate": 3.5680696708564664e-06, "loss": 1.3702, "step": 5840 }, { "epoch": 0.8809954751131222, "grad_norm": 1.9334570169448853, "learning_rate": 3.5591509579616823e-06, "loss": 0.9636, "step": 5841 }, { "epoch": 0.8811463046757164, "grad_norm": 2.002397298812866, "learning_rate": 3.550242994202102e-06, "loss": 1.0028, "step": 5842 }, { "epoch": 0.8812971342383107, "grad_norm": 1.8111767768859863, "learning_rate": 3.541345781639549e-06, "loss": 0.852, "step": 5843 }, { "epoch": 0.881447963800905, "grad_norm": 2.1802539825439453, "learning_rate": 3.5324593223333592e-06, "loss": 1.1774, "step": 5844 }, { "epoch": 0.8815987933634992, "grad_norm": 2.44977068901062, "learning_rate": 3.5235836183403816e-06, "loss": 1.1313, "step": 5845 }, { "epoch": 0.8817496229260935, "grad_norm": 1.4292312860488892, "learning_rate": 3.5147186717149725e-06, "loss": 0.5103, "step": 5846 }, { "epoch": 0.8819004524886878, "grad_norm": 1.6681098937988281, "learning_rate": 3.5058644845090084e-06, "loss": 0.7572, "step": 5847 }, { "epoch": 0.882051282051282, "grad_norm": 1.7460039854049683, "learning_rate": 3.497021058771854e-06, "loss": 0.7396, "step": 5848 }, { "epoch": 0.8822021116138763, "grad_norm": 1.8563368320465088, "learning_rate": 3.488188396550407e-06, "loss": 1.0029, "step": 5849 }, { "epoch": 0.8823529411764706, "grad_norm": 1.4774785041809082, "learning_rate": 3.4793664998890583e-06, "loss": 0.5951, "step": 5850 }, { "epoch": 0.8825037707390648, "grad_norm": 2.0099728107452393, "learning_rate": 3.4705553708297145e-06, "loss": 1.1974, "step": 5851 }, { "epoch": 0.8826546003016591, "grad_norm": 1.9147669076919556, "learning_rate": 3.4617550114117825e-06, "loss": 1.2096, "step": 5852 }, { "epoch": 0.8828054298642534, "grad_norm": 1.830119013786316, "learning_rate": 3.4529654236721887e-06, "loss": 1.1438, "step": 5853 }, { "epoch": 0.8829562594268476, "grad_norm": 1.4454504251480103, "learning_rate": 3.4441866096453557e-06, "loss": 0.7904, "step": 5854 }, { "epoch": 0.8831070889894419, "grad_norm": 1.7575222253799438, "learning_rate": 3.4354185713632135e-06, "loss": 0.8192, "step": 5855 }, { "epoch": 0.8832579185520362, "grad_norm": 1.648569941520691, "learning_rate": 3.4266613108552002e-06, "loss": 0.8457, "step": 5856 }, { "epoch": 0.8834087481146304, "grad_norm": 2.177745819091797, "learning_rate": 3.4179148301482665e-06, "loss": 1.5357, "step": 5857 }, { "epoch": 0.8835595776772247, "grad_norm": 1.8743804693222046, "learning_rate": 3.4091791312668485e-06, "loss": 1.0708, "step": 5858 }, { "epoch": 0.883710407239819, "grad_norm": 1.9154866933822632, "learning_rate": 3.4004542162329233e-06, "loss": 1.0708, "step": 5859 }, { "epoch": 0.8838612368024132, "grad_norm": 1.7358355522155762, "learning_rate": 3.3917400870659143e-06, "loss": 1.0398, "step": 5860 }, { "epoch": 0.8840120663650075, "grad_norm": 1.6006370782852173, "learning_rate": 3.3830367457828027e-06, "loss": 0.8179, "step": 5861 }, { "epoch": 0.8841628959276018, "grad_norm": 1.7984477281570435, "learning_rate": 3.374344194398049e-06, "loss": 0.994, "step": 5862 }, { "epoch": 0.884313725490196, "grad_norm": 2.0586228370666504, "learning_rate": 3.3656624349236167e-06, "loss": 1.3392, "step": 5863 }, { "epoch": 0.8844645550527903, "grad_norm": 1.9251234531402588, "learning_rate": 3.3569914693689697e-06, "loss": 1.1022, "step": 5864 }, { "epoch": 0.8846153846153846, "grad_norm": 2.0793144702911377, "learning_rate": 3.3483312997410864e-06, "loss": 1.3479, "step": 5865 }, { "epoch": 0.8847662141779788, "grad_norm": 2.132091760635376, "learning_rate": 3.339681928044436e-06, "loss": 1.1755, "step": 5866 }, { "epoch": 0.8849170437405731, "grad_norm": 1.5928733348846436, "learning_rate": 3.3310433562809885e-06, "loss": 0.6825, "step": 5867 }, { "epoch": 0.8850678733031674, "grad_norm": 2.260685920715332, "learning_rate": 3.3224155864502227e-06, "loss": 1.2804, "step": 5868 }, { "epoch": 0.8852187028657617, "grad_norm": 1.6078786849975586, "learning_rate": 3.3137986205490967e-06, "loss": 0.7517, "step": 5869 }, { "epoch": 0.8853695324283559, "grad_norm": 1.911964774131775, "learning_rate": 3.3051924605720872e-06, "loss": 1.2747, "step": 5870 }, { "epoch": 0.8855203619909502, "grad_norm": 1.755420207977295, "learning_rate": 3.296597108511168e-06, "loss": 0.9762, "step": 5871 }, { "epoch": 0.8856711915535445, "grad_norm": 1.6787285804748535, "learning_rate": 3.2880125663558027e-06, "loss": 0.811, "step": 5872 }, { "epoch": 0.8858220211161387, "grad_norm": 1.892607569694519, "learning_rate": 3.279438836092963e-06, "loss": 0.9491, "step": 5873 }, { "epoch": 0.885972850678733, "grad_norm": 2.2054569721221924, "learning_rate": 3.270875919707106e-06, "loss": 1.1228, "step": 5874 }, { "epoch": 0.8861236802413273, "grad_norm": 2.097320318222046, "learning_rate": 3.2623238191801963e-06, "loss": 1.0445, "step": 5875 }, { "epoch": 0.8862745098039215, "grad_norm": 1.8569016456604004, "learning_rate": 3.2537825364916953e-06, "loss": 0.8814, "step": 5876 }, { "epoch": 0.8864253393665158, "grad_norm": 1.9991923570632935, "learning_rate": 3.24525207361856e-06, "loss": 1.0596, "step": 5877 }, { "epoch": 0.8865761689291101, "grad_norm": 2.171919345855713, "learning_rate": 3.236732432535222e-06, "loss": 1.2531, "step": 5878 }, { "epoch": 0.8867269984917043, "grad_norm": 2.336987018585205, "learning_rate": 3.2282236152136324e-06, "loss": 1.3548, "step": 5879 }, { "epoch": 0.8868778280542986, "grad_norm": 2.2119078636169434, "learning_rate": 3.219725623623243e-06, "loss": 1.0395, "step": 5880 }, { "epoch": 0.8870286576168929, "grad_norm": 1.8112682104110718, "learning_rate": 3.2112384597309697e-06, "loss": 0.7991, "step": 5881 }, { "epoch": 0.8871794871794871, "grad_norm": 2.001300811767578, "learning_rate": 3.202762125501252e-06, "loss": 0.9686, "step": 5882 }, { "epoch": 0.8873303167420814, "grad_norm": 2.1132426261901855, "learning_rate": 3.194296622896009e-06, "loss": 1.2535, "step": 5883 }, { "epoch": 0.8874811463046757, "grad_norm": 2.506736993789673, "learning_rate": 3.1858419538746463e-06, "loss": 1.4304, "step": 5884 }, { "epoch": 0.8876319758672699, "grad_norm": 2.3702526092529297, "learning_rate": 3.177398120394082e-06, "loss": 1.3404, "step": 5885 }, { "epoch": 0.8877828054298642, "grad_norm": 1.8490196466445923, "learning_rate": 3.168965124408707e-06, "loss": 0.8852, "step": 5886 }, { "epoch": 0.8879336349924585, "grad_norm": 2.382082939147949, "learning_rate": 3.1605429678704167e-06, "loss": 1.0772, "step": 5887 }, { "epoch": 0.8880844645550527, "grad_norm": 2.1188549995422363, "learning_rate": 3.152131652728585e-06, "loss": 1.1517, "step": 5888 }, { "epoch": 0.888235294117647, "grad_norm": 2.522059202194214, "learning_rate": 3.1437311809300817e-06, "loss": 1.4579, "step": 5889 }, { "epoch": 0.8883861236802413, "grad_norm": 2.101911783218384, "learning_rate": 3.135341554419274e-06, "loss": 1.1894, "step": 5890 }, { "epoch": 0.8885369532428355, "grad_norm": 1.8840824365615845, "learning_rate": 3.1269627751380092e-06, "loss": 1.0072, "step": 5891 }, { "epoch": 0.8886877828054298, "grad_norm": 2.4710307121276855, "learning_rate": 3.1185948450256354e-06, "loss": 1.4285, "step": 5892 }, { "epoch": 0.8888386123680241, "grad_norm": 1.9362757205963135, "learning_rate": 3.110237766018975e-06, "loss": 0.915, "step": 5893 }, { "epoch": 0.8889894419306184, "grad_norm": 1.7300277948379517, "learning_rate": 3.1018915400523485e-06, "loss": 0.8421, "step": 5894 }, { "epoch": 0.8891402714932126, "grad_norm": 1.7710139751434326, "learning_rate": 3.0935561690575597e-06, "loss": 0.8754, "step": 5895 }, { "epoch": 0.8892911010558069, "grad_norm": 1.7190974950790405, "learning_rate": 3.0852316549639103e-06, "loss": 0.7837, "step": 5896 }, { "epoch": 0.8894419306184012, "grad_norm": 1.316733479499817, "learning_rate": 3.07691799969817e-06, "loss": 0.4062, "step": 5897 }, { "epoch": 0.8895927601809954, "grad_norm": 1.779817819595337, "learning_rate": 3.0686152051846105e-06, "loss": 0.8489, "step": 5898 }, { "epoch": 0.8897435897435897, "grad_norm": 1.4735060930252075, "learning_rate": 3.0603232733449895e-06, "loss": 0.6073, "step": 5899 }, { "epoch": 0.889894419306184, "grad_norm": 1.7601490020751953, "learning_rate": 3.0520422060985376e-06, "loss": 0.7289, "step": 5900 }, { "epoch": 0.8900452488687782, "grad_norm": 1.7373229265213013, "learning_rate": 3.043772005361989e-06, "loss": 1.1828, "step": 5901 }, { "epoch": 0.8901960784313725, "grad_norm": 1.8099887371063232, "learning_rate": 3.035512673049545e-06, "loss": 1.0441, "step": 5902 }, { "epoch": 0.8903469079939668, "grad_norm": 1.636582851409912, "learning_rate": 3.0272642110729055e-06, "loss": 1.0118, "step": 5903 }, { "epoch": 0.890497737556561, "grad_norm": 1.9576443433761597, "learning_rate": 3.019026621341242e-06, "loss": 1.213, "step": 5904 }, { "epoch": 0.8906485671191554, "grad_norm": 1.5533088445663452, "learning_rate": 3.0107999057612302e-06, "loss": 0.7676, "step": 5905 }, { "epoch": 0.8907993966817497, "grad_norm": 1.9108792543411255, "learning_rate": 3.0025840662369908e-06, "loss": 1.0381, "step": 5906 }, { "epoch": 0.890950226244344, "grad_norm": 1.7056530714035034, "learning_rate": 2.9943791046701696e-06, "loss": 0.9396, "step": 5907 }, { "epoch": 0.8911010558069382, "grad_norm": 1.6396571397781372, "learning_rate": 2.986185022959864e-06, "loss": 0.861, "step": 5908 }, { "epoch": 0.8912518853695325, "grad_norm": 1.839339256286621, "learning_rate": 2.9780018230026686e-06, "loss": 1.1471, "step": 5909 }, { "epoch": 0.8914027149321267, "grad_norm": 2.1436092853546143, "learning_rate": 2.9698295066926616e-06, "loss": 1.4675, "step": 5910 }, { "epoch": 0.891553544494721, "grad_norm": 1.8446934223175049, "learning_rate": 2.9616680759213866e-06, "loss": 1.0357, "step": 5911 }, { "epoch": 0.8917043740573153, "grad_norm": 1.7145240306854248, "learning_rate": 2.9535175325778876e-06, "loss": 0.9903, "step": 5912 }, { "epoch": 0.8918552036199096, "grad_norm": 1.8614665269851685, "learning_rate": 2.9453778785486717e-06, "loss": 1.1127, "step": 5913 }, { "epoch": 0.8920060331825038, "grad_norm": 2.120891571044922, "learning_rate": 2.9372491157177274e-06, "loss": 1.0116, "step": 5914 }, { "epoch": 0.8921568627450981, "grad_norm": 1.9520103931427002, "learning_rate": 2.9291312459665486e-06, "loss": 1.1975, "step": 5915 }, { "epoch": 0.8923076923076924, "grad_norm": 2.010277032852173, "learning_rate": 2.9210242711740552e-06, "loss": 1.2194, "step": 5916 }, { "epoch": 0.8924585218702866, "grad_norm": 1.9487946033477783, "learning_rate": 2.912928193216696e-06, "loss": 1.061, "step": 5917 }, { "epoch": 0.8926093514328809, "grad_norm": 2.107334613800049, "learning_rate": 2.9048430139683723e-06, "loss": 1.1598, "step": 5918 }, { "epoch": 0.8927601809954752, "grad_norm": 1.6532789468765259, "learning_rate": 2.8967687353004758e-06, "loss": 0.8295, "step": 5919 }, { "epoch": 0.8929110105580694, "grad_norm": 2.2774388790130615, "learning_rate": 2.8887053590818556e-06, "loss": 1.4585, "step": 5920 }, { "epoch": 0.8930618401206637, "grad_norm": 2.131739616394043, "learning_rate": 2.8806528871788583e-06, "loss": 1.2755, "step": 5921 }, { "epoch": 0.893212669683258, "grad_norm": 1.64482843875885, "learning_rate": 2.872611321455293e-06, "loss": 0.7142, "step": 5922 }, { "epoch": 0.8933634992458522, "grad_norm": 1.913920283317566, "learning_rate": 2.8645806637724592e-06, "loss": 1.0212, "step": 5923 }, { "epoch": 0.8935143288084465, "grad_norm": 1.762816309928894, "learning_rate": 2.8565609159891148e-06, "loss": 0.836, "step": 5924 }, { "epoch": 0.8936651583710408, "grad_norm": 1.8569289445877075, "learning_rate": 2.8485520799614972e-06, "loss": 0.9015, "step": 5925 }, { "epoch": 0.893815987933635, "grad_norm": 1.913170337677002, "learning_rate": 2.8405541575433238e-06, "loss": 0.8256, "step": 5926 }, { "epoch": 0.8939668174962293, "grad_norm": 1.9055383205413818, "learning_rate": 2.83256715058578e-06, "loss": 0.7629, "step": 5927 }, { "epoch": 0.8941176470588236, "grad_norm": 2.084530830383301, "learning_rate": 2.8245910609375314e-06, "loss": 1.1871, "step": 5928 }, { "epoch": 0.8942684766214178, "grad_norm": 2.6745471954345703, "learning_rate": 2.816625890444713e-06, "loss": 0.9501, "step": 5929 }, { "epoch": 0.8944193061840121, "grad_norm": 1.9465138912200928, "learning_rate": 2.8086716409509274e-06, "loss": 0.9358, "step": 5930 }, { "epoch": 0.8945701357466064, "grad_norm": 1.9015905857086182, "learning_rate": 2.8007283142972575e-06, "loss": 1.1296, "step": 5931 }, { "epoch": 0.8947209653092006, "grad_norm": 2.0386741161346436, "learning_rate": 2.79279591232226e-06, "loss": 0.9056, "step": 5932 }, { "epoch": 0.8948717948717949, "grad_norm": 2.1774866580963135, "learning_rate": 2.784874436861945e-06, "loss": 1.2665, "step": 5933 }, { "epoch": 0.8950226244343892, "grad_norm": 2.227470874786377, "learning_rate": 2.776963889749823e-06, "loss": 1.2851, "step": 5934 }, { "epoch": 0.8951734539969834, "grad_norm": 1.784650206565857, "learning_rate": 2.769064272816846e-06, "loss": 0.801, "step": 5935 }, { "epoch": 0.8953242835595777, "grad_norm": 1.8678969144821167, "learning_rate": 2.761175587891446e-06, "loss": 0.9952, "step": 5936 }, { "epoch": 0.895475113122172, "grad_norm": 2.0443527698516846, "learning_rate": 2.7532978367995345e-06, "loss": 0.9567, "step": 5937 }, { "epoch": 0.8956259426847663, "grad_norm": 2.0557098388671875, "learning_rate": 2.745431021364486e-06, "loss": 1.2392, "step": 5938 }, { "epoch": 0.8957767722473605, "grad_norm": 2.2098686695098877, "learning_rate": 2.7375751434071395e-06, "loss": 1.1615, "step": 5939 }, { "epoch": 0.8959276018099548, "grad_norm": 2.084488868713379, "learning_rate": 2.7297302047458063e-06, "loss": 1.1241, "step": 5940 }, { "epoch": 0.8960784313725491, "grad_norm": 1.9972290992736816, "learning_rate": 2.721896207196262e-06, "loss": 0.9154, "step": 5941 }, { "epoch": 0.8962292609351433, "grad_norm": 1.555584192276001, "learning_rate": 2.7140731525717557e-06, "loss": 0.6984, "step": 5942 }, { "epoch": 0.8963800904977376, "grad_norm": 1.8113542795181274, "learning_rate": 2.706261042683006e-06, "loss": 0.769, "step": 5943 }, { "epoch": 0.8965309200603319, "grad_norm": 2.0983452796936035, "learning_rate": 2.6984598793381833e-06, "loss": 0.9263, "step": 5944 }, { "epoch": 0.8966817496229261, "grad_norm": 1.7979704141616821, "learning_rate": 2.690669664342943e-06, "loss": 0.8036, "step": 5945 }, { "epoch": 0.8968325791855204, "grad_norm": 1.8164193630218506, "learning_rate": 2.682890399500393e-06, "loss": 1.1062, "step": 5946 }, { "epoch": 0.8969834087481147, "grad_norm": 1.4976903200149536, "learning_rate": 2.675122086611109e-06, "loss": 0.5919, "step": 5947 }, { "epoch": 0.8971342383107089, "grad_norm": 1.764483094215393, "learning_rate": 2.667364727473143e-06, "loss": 0.9695, "step": 5948 }, { "epoch": 0.8972850678733032, "grad_norm": 1.8134326934814453, "learning_rate": 2.6596183238819904e-06, "loss": 0.7785, "step": 5949 }, { "epoch": 0.8974358974358975, "grad_norm": 1.7741295099258423, "learning_rate": 2.6518828776306347e-06, "loss": 0.7654, "step": 5950 }, { "epoch": 0.8975867269984917, "grad_norm": 2.0477488040924072, "learning_rate": 2.6441583905095048e-06, "loss": 1.6178, "step": 5951 }, { "epoch": 0.897737556561086, "grad_norm": 1.817973017692566, "learning_rate": 2.6364448643065086e-06, "loss": 0.992, "step": 5952 }, { "epoch": 0.8978883861236803, "grad_norm": 1.7912812232971191, "learning_rate": 2.628742300806991e-06, "loss": 0.9702, "step": 5953 }, { "epoch": 0.8980392156862745, "grad_norm": 1.7496105432510376, "learning_rate": 2.6210507017937923e-06, "loss": 1.0854, "step": 5954 }, { "epoch": 0.8981900452488688, "grad_norm": 2.043968677520752, "learning_rate": 2.613370069047194e-06, "loss": 1.0736, "step": 5955 }, { "epoch": 0.8983408748114631, "grad_norm": 1.9022880792617798, "learning_rate": 2.6057004043449407e-06, "loss": 1.1042, "step": 5956 }, { "epoch": 0.8984917043740573, "grad_norm": 2.3824005126953125, "learning_rate": 2.5980417094622512e-06, "loss": 1.7076, "step": 5957 }, { "epoch": 0.8986425339366516, "grad_norm": 1.9025477170944214, "learning_rate": 2.5903939861717908e-06, "loss": 1.0961, "step": 5958 }, { "epoch": 0.8987933634992459, "grad_norm": 1.9340075254440308, "learning_rate": 2.5827572362436934e-06, "loss": 1.323, "step": 5959 }, { "epoch": 0.8989441930618401, "grad_norm": 1.6985923051834106, "learning_rate": 2.575131461445546e-06, "loss": 1.0129, "step": 5960 }, { "epoch": 0.8990950226244344, "grad_norm": 2.00496768951416, "learning_rate": 2.5675166635424077e-06, "loss": 1.0739, "step": 5961 }, { "epoch": 0.8992458521870287, "grad_norm": 1.7166461944580078, "learning_rate": 2.5599128442967913e-06, "loss": 0.8199, "step": 5962 }, { "epoch": 0.899396681749623, "grad_norm": 2.069605588912964, "learning_rate": 2.5523200054686504e-06, "loss": 1.2094, "step": 5963 }, { "epoch": 0.8995475113122172, "grad_norm": 2.041762113571167, "learning_rate": 2.5447381488154287e-06, "loss": 1.2334, "step": 5964 }, { "epoch": 0.8996983408748115, "grad_norm": 1.4955508708953857, "learning_rate": 2.537167276092001e-06, "loss": 0.6744, "step": 5965 }, { "epoch": 0.8998491704374058, "grad_norm": 2.112485885620117, "learning_rate": 2.52960738905072e-06, "loss": 1.4387, "step": 5966 }, { "epoch": 0.9, "grad_norm": 1.6923630237579346, "learning_rate": 2.5220584894413813e-06, "loss": 0.9206, "step": 5967 }, { "epoch": 0.9001508295625943, "grad_norm": 2.0821428298950195, "learning_rate": 2.5145205790112427e-06, "loss": 0.9025, "step": 5968 }, { "epoch": 0.9003016591251886, "grad_norm": 1.6140247583389282, "learning_rate": 2.5069936595050246e-06, "loss": 0.7872, "step": 5969 }, { "epoch": 0.9004524886877828, "grad_norm": 2.1050243377685547, "learning_rate": 2.4994777326648955e-06, "loss": 1.1712, "step": 5970 }, { "epoch": 0.9006033182503771, "grad_norm": 1.9915176630020142, "learning_rate": 2.49197280023048e-06, "loss": 1.1618, "step": 5971 }, { "epoch": 0.9007541478129714, "grad_norm": 1.9258143901824951, "learning_rate": 2.484478863938855e-06, "loss": 1.0644, "step": 5972 }, { "epoch": 0.9009049773755656, "grad_norm": 1.858255386352539, "learning_rate": 2.4769959255245668e-06, "loss": 0.8995, "step": 5973 }, { "epoch": 0.9010558069381599, "grad_norm": 2.172921895980835, "learning_rate": 2.4695239867195907e-06, "loss": 1.3644, "step": 5974 }, { "epoch": 0.9012066365007542, "grad_norm": 1.9577503204345703, "learning_rate": 2.462063049253388e-06, "loss": 1.0618, "step": 5975 }, { "epoch": 0.9013574660633484, "grad_norm": 2.0706348419189453, "learning_rate": 2.4546131148528496e-06, "loss": 1.1409, "step": 5976 }, { "epoch": 0.9015082956259427, "grad_norm": 1.8136779069900513, "learning_rate": 2.4471741852423237e-06, "loss": 0.8684, "step": 5977 }, { "epoch": 0.901659125188537, "grad_norm": 2.078585386276245, "learning_rate": 2.4397462621436216e-06, "loss": 1.0559, "step": 5978 }, { "epoch": 0.9018099547511312, "grad_norm": 1.934810996055603, "learning_rate": 2.432329347275991e-06, "loss": 0.9825, "step": 5979 }, { "epoch": 0.9019607843137255, "grad_norm": 1.8848844766616821, "learning_rate": 2.4249234423561583e-06, "loss": 1.0272, "step": 5980 }, { "epoch": 0.9021116138763198, "grad_norm": 1.8616167306900024, "learning_rate": 2.4175285490982692e-06, "loss": 0.7804, "step": 5981 }, { "epoch": 0.902262443438914, "grad_norm": 1.8983227014541626, "learning_rate": 2.410144669213932e-06, "loss": 1.1632, "step": 5982 }, { "epoch": 0.9024132730015083, "grad_norm": 2.0963635444641113, "learning_rate": 2.4027718044122237e-06, "loss": 1.0801, "step": 5983 }, { "epoch": 0.9025641025641026, "grad_norm": 1.9760980606079102, "learning_rate": 2.3954099563996467e-06, "loss": 1.0634, "step": 5984 }, { "epoch": 0.9027149321266968, "grad_norm": 2.0215253829956055, "learning_rate": 2.3880591268801657e-06, "loss": 1.1256, "step": 5985 }, { "epoch": 0.9028657616892911, "grad_norm": 2.180921792984009, "learning_rate": 2.3807193175551966e-06, "loss": 1.4302, "step": 5986 }, { "epoch": 0.9030165912518854, "grad_norm": 2.012826681137085, "learning_rate": 2.373390530123604e-06, "loss": 1.1051, "step": 5987 }, { "epoch": 0.9031674208144796, "grad_norm": 2.3907721042633057, "learning_rate": 2.3660727662816907e-06, "loss": 1.2095, "step": 5988 }, { "epoch": 0.9033182503770739, "grad_norm": 1.7915292978286743, "learning_rate": 2.358766027723225e-06, "loss": 0.9241, "step": 5989 }, { "epoch": 0.9034690799396682, "grad_norm": 1.7415132522583008, "learning_rate": 2.351470316139409e-06, "loss": 0.8354, "step": 5990 }, { "epoch": 0.9036199095022625, "grad_norm": 2.1751768589019775, "learning_rate": 2.344185633218898e-06, "loss": 1.0623, "step": 5991 }, { "epoch": 0.9037707390648567, "grad_norm": 1.9542099237442017, "learning_rate": 2.3369119806477992e-06, "loss": 0.9326, "step": 5992 }, { "epoch": 0.903921568627451, "grad_norm": 1.8173662424087524, "learning_rate": 2.329649360109659e-06, "loss": 0.9052, "step": 5993 }, { "epoch": 0.9040723981900453, "grad_norm": 2.1205360889434814, "learning_rate": 2.3223977732854783e-06, "loss": 1.0038, "step": 5994 }, { "epoch": 0.9042232277526395, "grad_norm": 2.163048505783081, "learning_rate": 2.315157221853692e-06, "loss": 1.0929, "step": 5995 }, { "epoch": 0.9043740573152338, "grad_norm": 1.723755121231079, "learning_rate": 2.3079277074901875e-06, "loss": 0.6773, "step": 5996 }, { "epoch": 0.9045248868778281, "grad_norm": 1.5504698753356934, "learning_rate": 2.3007092318683087e-06, "loss": 0.6883, "step": 5997 }, { "epoch": 0.9046757164404223, "grad_norm": 1.4261337518692017, "learning_rate": 2.2935017966588255e-06, "loss": 0.5368, "step": 5998 }, { "epoch": 0.9048265460030166, "grad_norm": 1.9430757761001587, "learning_rate": 2.28630540352997e-06, "loss": 0.8454, "step": 5999 }, { "epoch": 0.9049773755656109, "grad_norm": 1.8556188344955444, "learning_rate": 2.2791200541473935e-06, "loss": 0.8655, "step": 6000 }, { "epoch": 0.9051282051282051, "grad_norm": 1.739741325378418, "learning_rate": 2.2719457501742202e-06, "loss": 0.9857, "step": 6001 }, { "epoch": 0.9052790346907994, "grad_norm": 2.11395263671875, "learning_rate": 2.264782493270995e-06, "loss": 1.2725, "step": 6002 }, { "epoch": 0.9054298642533937, "grad_norm": 1.951161503791809, "learning_rate": 2.2576302850957243e-06, "loss": 1.0836, "step": 6003 }, { "epoch": 0.9055806938159879, "grad_norm": 1.989766240119934, "learning_rate": 2.2504891273038455e-06, "loss": 1.2357, "step": 6004 }, { "epoch": 0.9057315233785822, "grad_norm": 1.8707345724105835, "learning_rate": 2.2433590215482415e-06, "loss": 1.0432, "step": 6005 }, { "epoch": 0.9058823529411765, "grad_norm": 1.8827862739562988, "learning_rate": 2.2362399694792367e-06, "loss": 1.1298, "step": 6006 }, { "epoch": 0.9060331825037707, "grad_norm": 1.6894561052322388, "learning_rate": 2.229131972744597e-06, "loss": 1.0114, "step": 6007 }, { "epoch": 0.906184012066365, "grad_norm": 1.8513940572738647, "learning_rate": 2.222035032989533e-06, "loss": 1.1222, "step": 6008 }, { "epoch": 0.9063348416289593, "grad_norm": 1.9649217128753662, "learning_rate": 2.2149491518566877e-06, "loss": 0.9211, "step": 6009 }, { "epoch": 0.9064856711915535, "grad_norm": 1.9286850690841675, "learning_rate": 2.2078743309861483e-06, "loss": 1.0362, "step": 6010 }, { "epoch": 0.9066365007541478, "grad_norm": 2.1225497722625732, "learning_rate": 2.2008105720154435e-06, "loss": 1.2624, "step": 6011 }, { "epoch": 0.9067873303167421, "grad_norm": 1.8829160928726196, "learning_rate": 2.1937578765795443e-06, "loss": 1.0398, "step": 6012 }, { "epoch": 0.9069381598793363, "grad_norm": 2.2695188522338867, "learning_rate": 2.1867162463108548e-06, "loss": 1.3452, "step": 6013 }, { "epoch": 0.9070889894419306, "grad_norm": 2.207731008529663, "learning_rate": 2.1796856828392276e-06, "loss": 1.4408, "step": 6014 }, { "epoch": 0.9072398190045249, "grad_norm": 1.9050668478012085, "learning_rate": 2.1726661877919383e-06, "loss": 1.1248, "step": 6015 }, { "epoch": 0.9073906485671192, "grad_norm": 1.8045799732208252, "learning_rate": 2.1656577627937158e-06, "loss": 1.0847, "step": 6016 }, { "epoch": 0.9075414781297134, "grad_norm": 1.855959415435791, "learning_rate": 2.158660409466723e-06, "loss": 0.8744, "step": 6017 }, { "epoch": 0.9076923076923077, "grad_norm": 1.895899772644043, "learning_rate": 2.151674129430553e-06, "loss": 1.161, "step": 6018 }, { "epoch": 0.907843137254902, "grad_norm": 1.9964271783828735, "learning_rate": 2.1446989243022407e-06, "loss": 1.3049, "step": 6019 }, { "epoch": 0.9079939668174962, "grad_norm": 2.1163601875305176, "learning_rate": 2.1377347956962556e-06, "loss": 1.429, "step": 6020 }, { "epoch": 0.9081447963800905, "grad_norm": 2.017669439315796, "learning_rate": 2.1307817452245084e-06, "loss": 1.246, "step": 6021 }, { "epoch": 0.9082956259426848, "grad_norm": 1.8070636987686157, "learning_rate": 2.123839774496339e-06, "loss": 0.9492, "step": 6022 }, { "epoch": 0.908446455505279, "grad_norm": 1.920025110244751, "learning_rate": 2.116908885118529e-06, "loss": 0.9085, "step": 6023 }, { "epoch": 0.9085972850678733, "grad_norm": 1.5635712146759033, "learning_rate": 2.109989078695296e-06, "loss": 0.6895, "step": 6024 }, { "epoch": 0.9087481146304676, "grad_norm": 1.8463526964187622, "learning_rate": 2.1030803568282853e-06, "loss": 1.084, "step": 6025 }, { "epoch": 0.9088989441930618, "grad_norm": 1.9979783296585083, "learning_rate": 2.0961827211165795e-06, "loss": 1.1639, "step": 6026 }, { "epoch": 0.9090497737556561, "grad_norm": 1.5263233184814453, "learning_rate": 2.0892961731567075e-06, "loss": 0.7659, "step": 6027 }, { "epoch": 0.9092006033182504, "grad_norm": 2.0111637115478516, "learning_rate": 2.082420714542599e-06, "loss": 1.1674, "step": 6028 }, { "epoch": 0.9093514328808446, "grad_norm": 1.7540020942687988, "learning_rate": 2.0755563468656536e-06, "loss": 1.0847, "step": 6029 }, { "epoch": 0.9095022624434389, "grad_norm": 1.9565657377243042, "learning_rate": 2.068703071714678e-06, "loss": 1.0351, "step": 6030 }, { "epoch": 0.9096530920060332, "grad_norm": 1.8195117712020874, "learning_rate": 2.0618608906759318e-06, "loss": 1.0144, "step": 6031 }, { "epoch": 0.9098039215686274, "grad_norm": 2.0431888103485107, "learning_rate": 2.0550298053330917e-06, "loss": 1.1437, "step": 6032 }, { "epoch": 0.9099547511312217, "grad_norm": 2.1053390502929688, "learning_rate": 2.0482098172672716e-06, "loss": 1.0869, "step": 6033 }, { "epoch": 0.910105580693816, "grad_norm": 2.069014072418213, "learning_rate": 2.0414009280570134e-06, "loss": 0.9371, "step": 6034 }, { "epoch": 0.9102564102564102, "grad_norm": 1.7647862434387207, "learning_rate": 2.0346031392783016e-06, "loss": 0.835, "step": 6035 }, { "epoch": 0.9104072398190045, "grad_norm": 2.2147107124328613, "learning_rate": 2.0278164525045384e-06, "loss": 1.2721, "step": 6036 }, { "epoch": 0.9105580693815988, "grad_norm": 1.72138249874115, "learning_rate": 2.0210408693065617e-06, "loss": 0.8296, "step": 6037 }, { "epoch": 0.910708898944193, "grad_norm": 1.9998971223831177, "learning_rate": 2.01427639125264e-06, "loss": 1.1145, "step": 6038 }, { "epoch": 0.9108597285067873, "grad_norm": 1.9560002088546753, "learning_rate": 2.0075230199084695e-06, "loss": 1.091, "step": 6039 }, { "epoch": 0.9110105580693816, "grad_norm": 1.9205883741378784, "learning_rate": 2.0007807568371727e-06, "loss": 1.0336, "step": 6040 }, { "epoch": 0.9111613876319758, "grad_norm": 1.9203702211380005, "learning_rate": 1.9940496035993016e-06, "loss": 0.8746, "step": 6041 }, { "epoch": 0.9113122171945701, "grad_norm": 2.2302846908569336, "learning_rate": 1.9873295617528543e-06, "loss": 0.9193, "step": 6042 }, { "epoch": 0.9114630467571644, "grad_norm": 1.8374782800674438, "learning_rate": 1.980620632853225e-06, "loss": 0.9562, "step": 6043 }, { "epoch": 0.9116138763197587, "grad_norm": 2.1728641986846924, "learning_rate": 1.9739228184532666e-06, "loss": 1.2462, "step": 6044 }, { "epoch": 0.9117647058823529, "grad_norm": 1.7620199918746948, "learning_rate": 1.967236120103244e-06, "loss": 0.7843, "step": 6045 }, { "epoch": 0.9119155354449472, "grad_norm": 1.8594876527786255, "learning_rate": 1.9605605393508474e-06, "loss": 0.7521, "step": 6046 }, { "epoch": 0.9120663650075415, "grad_norm": 1.992716670036316, "learning_rate": 1.9538960777411963e-06, "loss": 0.8874, "step": 6047 }, { "epoch": 0.9122171945701357, "grad_norm": 1.9644246101379395, "learning_rate": 1.9472427368168343e-06, "loss": 0.8527, "step": 6048 }, { "epoch": 0.91236802413273, "grad_norm": 2.1554572582244873, "learning_rate": 1.9406005181177466e-06, "loss": 0.9052, "step": 6049 }, { "epoch": 0.9125188536953243, "grad_norm": 1.7025587558746338, "learning_rate": 1.9339694231813254e-06, "loss": 0.7821, "step": 6050 }, { "epoch": 0.9126696832579185, "grad_norm": 1.8636488914489746, "learning_rate": 1.927349453542393e-06, "loss": 1.2251, "step": 6051 }, { "epoch": 0.9128205128205128, "grad_norm": 1.5141013860702515, "learning_rate": 1.920740610733196e-06, "loss": 0.8309, "step": 6052 }, { "epoch": 0.9129713423831071, "grad_norm": 1.6740176677703857, "learning_rate": 1.914142896283416e-06, "loss": 1.0232, "step": 6053 }, { "epoch": 0.9131221719457013, "grad_norm": 1.7774393558502197, "learning_rate": 1.907556311720149e-06, "loss": 0.9077, "step": 6054 }, { "epoch": 0.9132730015082956, "grad_norm": 2.3353066444396973, "learning_rate": 1.9009808585679245e-06, "loss": 1.732, "step": 6055 }, { "epoch": 0.9134238310708899, "grad_norm": 2.110024929046631, "learning_rate": 1.8944165383486645e-06, "loss": 1.3481, "step": 6056 }, { "epoch": 0.9135746606334841, "grad_norm": 1.739798665046692, "learning_rate": 1.887863352581759e-06, "loss": 0.976, "step": 6057 }, { "epoch": 0.9137254901960784, "grad_norm": 1.940887212753296, "learning_rate": 1.8813213027839894e-06, "loss": 1.2067, "step": 6058 }, { "epoch": 0.9138763197586727, "grad_norm": 1.9067021608352661, "learning_rate": 1.8747903904695719e-06, "loss": 1.2115, "step": 6059 }, { "epoch": 0.9140271493212669, "grad_norm": 1.6153011322021484, "learning_rate": 1.8682706171501419e-06, "loss": 0.7681, "step": 6060 }, { "epoch": 0.9141779788838612, "grad_norm": 1.634977102279663, "learning_rate": 1.861761984334759e-06, "loss": 0.8482, "step": 6061 }, { "epoch": 0.9143288084464555, "grad_norm": 2.0818588733673096, "learning_rate": 1.8552644935299014e-06, "loss": 1.0215, "step": 6062 }, { "epoch": 0.9144796380090497, "grad_norm": 1.969977855682373, "learning_rate": 1.8487781462394716e-06, "loss": 1.1314, "step": 6063 }, { "epoch": 0.914630467571644, "grad_norm": 1.877731204032898, "learning_rate": 1.8423029439647855e-06, "loss": 1.0044, "step": 6064 }, { "epoch": 0.9147812971342383, "grad_norm": 2.0573790073394775, "learning_rate": 1.8358388882045941e-06, "loss": 1.1602, "step": 6065 }, { "epoch": 0.9149321266968325, "grad_norm": 1.9530761241912842, "learning_rate": 1.8293859804550506e-06, "loss": 0.9205, "step": 6066 }, { "epoch": 0.9150829562594268, "grad_norm": 2.1931064128875732, "learning_rate": 1.8229442222097326e-06, "loss": 1.4128, "step": 6067 }, { "epoch": 0.9152337858220211, "grad_norm": 1.8459879159927368, "learning_rate": 1.8165136149596474e-06, "loss": 0.9612, "step": 6068 }, { "epoch": 0.9153846153846154, "grad_norm": 2.058382511138916, "learning_rate": 1.8100941601932152e-06, "loss": 1.3003, "step": 6069 }, { "epoch": 0.9155354449472096, "grad_norm": 2.2638463973999023, "learning_rate": 1.8036858593962702e-06, "loss": 1.4287, "step": 6070 }, { "epoch": 0.9156862745098039, "grad_norm": 1.7145456075668335, "learning_rate": 1.79728871405207e-06, "loss": 0.9259, "step": 6071 }, { "epoch": 0.9158371040723982, "grad_norm": 2.0483133792877197, "learning_rate": 1.7909027256412913e-06, "loss": 1.172, "step": 6072 }, { "epoch": 0.9159879336349924, "grad_norm": 1.7859383821487427, "learning_rate": 1.7845278956420297e-06, "loss": 0.9512, "step": 6073 }, { "epoch": 0.9161387631975867, "grad_norm": 1.6583622694015503, "learning_rate": 1.778164225529788e-06, "loss": 0.7401, "step": 6074 }, { "epoch": 0.916289592760181, "grad_norm": 1.7779827117919922, "learning_rate": 1.771811716777494e-06, "loss": 1.028, "step": 6075 }, { "epoch": 0.9164404223227752, "grad_norm": 2.049680471420288, "learning_rate": 1.7654703708554876e-06, "loss": 1.2298, "step": 6076 }, { "epoch": 0.9165912518853695, "grad_norm": 2.1654999256134033, "learning_rate": 1.759140189231534e-06, "loss": 1.0122, "step": 6077 }, { "epoch": 0.9167420814479638, "grad_norm": 2.285860300064087, "learning_rate": 1.7528211733708056e-06, "loss": 0.9315, "step": 6078 }, { "epoch": 0.916892911010558, "grad_norm": 1.9840625524520874, "learning_rate": 1.7465133247358878e-06, "loss": 1.0156, "step": 6079 }, { "epoch": 0.9170437405731523, "grad_norm": 2.019742250442505, "learning_rate": 1.7402166447867963e-06, "loss": 1.1121, "step": 6080 }, { "epoch": 0.9171945701357466, "grad_norm": 2.016984462738037, "learning_rate": 1.7339311349809483e-06, "loss": 1.0991, "step": 6081 }, { "epoch": 0.9173453996983408, "grad_norm": 1.929485559463501, "learning_rate": 1.7276567967731805e-06, "loss": 0.8671, "step": 6082 }, { "epoch": 0.9174962292609351, "grad_norm": 2.0503761768341064, "learning_rate": 1.7213936316157364e-06, "loss": 1.0559, "step": 6083 }, { "epoch": 0.9176470588235294, "grad_norm": 1.9083929061889648, "learning_rate": 1.7151416409582898e-06, "loss": 0.9114, "step": 6084 }, { "epoch": 0.9177978883861236, "grad_norm": 1.7430659532546997, "learning_rate": 1.7089008262479057e-06, "loss": 0.7875, "step": 6085 }, { "epoch": 0.9179487179487179, "grad_norm": 1.8982425928115845, "learning_rate": 1.7026711889290837e-06, "loss": 0.9657, "step": 6086 }, { "epoch": 0.9180995475113122, "grad_norm": 1.8346010446548462, "learning_rate": 1.696452730443726e-06, "loss": 0.8928, "step": 6087 }, { "epoch": 0.9182503770739064, "grad_norm": 2.3585307598114014, "learning_rate": 1.6902454522311484e-06, "loss": 1.4146, "step": 6088 }, { "epoch": 0.9184012066365007, "grad_norm": 2.4726104736328125, "learning_rate": 1.6840493557280734e-06, "loss": 1.193, "step": 6089 }, { "epoch": 0.918552036199095, "grad_norm": 2.272958517074585, "learning_rate": 1.6778644423686485e-06, "loss": 1.4221, "step": 6090 }, { "epoch": 0.9187028657616892, "grad_norm": 1.9309147596359253, "learning_rate": 1.6716907135844173e-06, "loss": 0.9397, "step": 6091 }, { "epoch": 0.9188536953242835, "grad_norm": 1.7593473196029663, "learning_rate": 1.6655281708043536e-06, "loss": 0.9341, "step": 6092 }, { "epoch": 0.9190045248868778, "grad_norm": 2.027787923812866, "learning_rate": 1.6593768154548218e-06, "loss": 1.0671, "step": 6093 }, { "epoch": 0.919155354449472, "grad_norm": 2.2654662132263184, "learning_rate": 1.6532366489596107e-06, "loss": 1.0179, "step": 6094 }, { "epoch": 0.9193061840120663, "grad_norm": 1.8471527099609375, "learning_rate": 1.6471076727399116e-06, "loss": 0.7945, "step": 6095 }, { "epoch": 0.9194570135746606, "grad_norm": 2.0354926586151123, "learning_rate": 1.6409898882143281e-06, "loss": 1.0658, "step": 6096 }, { "epoch": 0.9196078431372549, "grad_norm": 1.7693188190460205, "learning_rate": 1.634883296798878e-06, "loss": 0.6918, "step": 6097 }, { "epoch": 0.9197586726998491, "grad_norm": 2.0768635272979736, "learning_rate": 1.6287878999069805e-06, "loss": 0.8484, "step": 6098 }, { "epoch": 0.9199095022624434, "grad_norm": 2.0135343074798584, "learning_rate": 1.6227036989494738e-06, "loss": 1.0869, "step": 6099 }, { "epoch": 0.9200603318250377, "grad_norm": 1.7810611724853516, "learning_rate": 1.6166306953345922e-06, "loss": 0.7831, "step": 6100 }, { "epoch": 0.9202111613876319, "grad_norm": 1.8530064821243286, "learning_rate": 1.610568890467995e-06, "loss": 1.2646, "step": 6101 }, { "epoch": 0.9203619909502262, "grad_norm": 1.8101675510406494, "learning_rate": 1.604518285752732e-06, "loss": 1.1518, "step": 6102 }, { "epoch": 0.9205128205128205, "grad_norm": 1.9796537160873413, "learning_rate": 1.5984788825892606e-06, "loss": 1.4833, "step": 6103 }, { "epoch": 0.9206636500754147, "grad_norm": 2.0288891792297363, "learning_rate": 1.5924506823754626e-06, "loss": 1.1593, "step": 6104 }, { "epoch": 0.920814479638009, "grad_norm": 2.0104260444641113, "learning_rate": 1.5864336865066165e-06, "loss": 1.2195, "step": 6105 }, { "epoch": 0.9209653092006033, "grad_norm": 1.9347336292266846, "learning_rate": 1.5804278963754026e-06, "loss": 1.2504, "step": 6106 }, { "epoch": 0.9211161387631975, "grad_norm": 1.9467333555221558, "learning_rate": 1.5744333133719203e-06, "loss": 1.2748, "step": 6107 }, { "epoch": 0.9212669683257918, "grad_norm": 2.0754785537719727, "learning_rate": 1.5684499388836593e-06, "loss": 1.4359, "step": 6108 }, { "epoch": 0.9214177978883861, "grad_norm": 2.2468783855438232, "learning_rate": 1.5624777742955289e-06, "loss": 1.4141, "step": 6109 }, { "epoch": 0.9215686274509803, "grad_norm": 2.050715923309326, "learning_rate": 1.5565168209898396e-06, "loss": 1.1298, "step": 6110 }, { "epoch": 0.9217194570135746, "grad_norm": 1.7624192237854004, "learning_rate": 1.5505670803463046e-06, "loss": 0.982, "step": 6111 }, { "epoch": 0.9218702865761689, "grad_norm": 1.9148942232131958, "learning_rate": 1.5446285537420336e-06, "loss": 0.9654, "step": 6112 }, { "epoch": 0.9220211161387633, "grad_norm": 1.7222880125045776, "learning_rate": 1.5387012425515602e-06, "loss": 1.0878, "step": 6113 }, { "epoch": 0.9221719457013575, "grad_norm": 1.8550453186035156, "learning_rate": 1.5327851481468036e-06, "loss": 1.107, "step": 6114 }, { "epoch": 0.9223227752639518, "grad_norm": 2.2148823738098145, "learning_rate": 1.5268802718971077e-06, "loss": 1.3719, "step": 6115 }, { "epoch": 0.9224736048265461, "grad_norm": 1.873578429222107, "learning_rate": 1.52098661516919e-06, "loss": 1.2048, "step": 6116 }, { "epoch": 0.9226244343891403, "grad_norm": 2.0291497707366943, "learning_rate": 1.5151041793272037e-06, "loss": 1.2883, "step": 6117 }, { "epoch": 0.9227752639517346, "grad_norm": 1.7526885271072388, "learning_rate": 1.509232965732682e-06, "loss": 0.9075, "step": 6118 }, { "epoch": 0.9229260935143289, "grad_norm": 2.0419230461120605, "learning_rate": 1.5033729757445659e-06, "loss": 1.1401, "step": 6119 }, { "epoch": 0.9230769230769231, "grad_norm": 1.8950403928756714, "learning_rate": 1.4975242107192034e-06, "loss": 0.9627, "step": 6120 }, { "epoch": 0.9232277526395174, "grad_norm": 1.8482105731964111, "learning_rate": 1.4916866720103505e-06, "loss": 1.0237, "step": 6121 }, { "epoch": 0.9233785822021117, "grad_norm": 2.107534408569336, "learning_rate": 1.4858603609691379e-06, "loss": 1.3883, "step": 6122 }, { "epoch": 0.9235294117647059, "grad_norm": 1.947818398475647, "learning_rate": 1.480045278944131e-06, "loss": 1.0373, "step": 6123 }, { "epoch": 0.9236802413273002, "grad_norm": 1.5710196495056152, "learning_rate": 1.4742414272812698e-06, "loss": 0.7835, "step": 6124 }, { "epoch": 0.9238310708898945, "grad_norm": 1.7612252235412598, "learning_rate": 1.4684488073239078e-06, "loss": 0.7359, "step": 6125 }, { "epoch": 0.9239819004524887, "grad_norm": 1.8296582698822021, "learning_rate": 1.4626674204128055e-06, "loss": 0.8575, "step": 6126 }, { "epoch": 0.924132730015083, "grad_norm": 2.1524436473846436, "learning_rate": 1.4568972678861037e-06, "loss": 1.1862, "step": 6127 }, { "epoch": 0.9242835595776773, "grad_norm": 1.8486026525497437, "learning_rate": 1.451138351079362e-06, "loss": 1.1801, "step": 6128 }, { "epoch": 0.9244343891402715, "grad_norm": 2.2142937183380127, "learning_rate": 1.4453906713255305e-06, "loss": 1.36, "step": 6129 }, { "epoch": 0.9245852187028658, "grad_norm": 2.1411685943603516, "learning_rate": 1.4396542299549564e-06, "loss": 1.1282, "step": 6130 }, { "epoch": 0.9247360482654601, "grad_norm": 1.9450503587722778, "learning_rate": 1.4339290282953888e-06, "loss": 0.918, "step": 6131 }, { "epoch": 0.9248868778280543, "grad_norm": 2.1294782161712646, "learning_rate": 1.428215067671973e-06, "loss": 1.1222, "step": 6132 }, { "epoch": 0.9250377073906486, "grad_norm": 1.807011604309082, "learning_rate": 1.4225123494072623e-06, "loss": 0.9512, "step": 6133 }, { "epoch": 0.9251885369532429, "grad_norm": 1.9434272050857544, "learning_rate": 1.4168208748211898e-06, "loss": 1.0982, "step": 6134 }, { "epoch": 0.9253393665158371, "grad_norm": 2.2980172634124756, "learning_rate": 1.4111406452311016e-06, "loss": 1.1932, "step": 6135 }, { "epoch": 0.9254901960784314, "grad_norm": 2.1382246017456055, "learning_rate": 1.4054716619517406e-06, "loss": 1.1192, "step": 6136 }, { "epoch": 0.9256410256410257, "grad_norm": 2.177013397216797, "learning_rate": 1.3998139262952349e-06, "loss": 1.1901, "step": 6137 }, { "epoch": 0.92579185520362, "grad_norm": 2.302063226699829, "learning_rate": 1.3941674395711146e-06, "loss": 1.37, "step": 6138 }, { "epoch": 0.9259426847662142, "grad_norm": 2.399528741836548, "learning_rate": 1.3885322030863234e-06, "loss": 1.4177, "step": 6139 }, { "epoch": 0.9260935143288085, "grad_norm": 2.1822352409362793, "learning_rate": 1.3829082181451625e-06, "loss": 1.4518, "step": 6140 }, { "epoch": 0.9262443438914028, "grad_norm": 2.1651687622070312, "learning_rate": 1.3772954860493681e-06, "loss": 1.2527, "step": 6141 }, { "epoch": 0.926395173453997, "grad_norm": 2.5904695987701416, "learning_rate": 1.3716940080980455e-06, "loss": 1.5967, "step": 6142 }, { "epoch": 0.9265460030165913, "grad_norm": 2.030517101287842, "learning_rate": 1.3661037855877134e-06, "loss": 1.2583, "step": 6143 }, { "epoch": 0.9266968325791856, "grad_norm": 2.0761780738830566, "learning_rate": 1.36052481981227e-06, "loss": 1.0073, "step": 6144 }, { "epoch": 0.9268476621417798, "grad_norm": 2.226195812225342, "learning_rate": 1.3549571120630212e-06, "loss": 1.0862, "step": 6145 }, { "epoch": 0.9269984917043741, "grad_norm": 1.7457538843154907, "learning_rate": 1.3494006636286582e-06, "loss": 0.7194, "step": 6146 }, { "epoch": 0.9271493212669684, "grad_norm": 1.7029643058776855, "learning_rate": 1.3438554757952692e-06, "loss": 0.6465, "step": 6147 }, { "epoch": 0.9273001508295626, "grad_norm": 1.8745883703231812, "learning_rate": 1.338321549846333e-06, "loss": 0.9464, "step": 6148 }, { "epoch": 0.9274509803921569, "grad_norm": 1.8796849250793457, "learning_rate": 1.3327988870627362e-06, "loss": 0.8588, "step": 6149 }, { "epoch": 0.9276018099547512, "grad_norm": 1.458657145500183, "learning_rate": 1.3272874887227282e-06, "loss": 0.5578, "step": 6150 }, { "epoch": 0.9277526395173454, "grad_norm": 1.8978378772735596, "learning_rate": 1.3217873561019833e-06, "loss": 1.2724, "step": 6151 }, { "epoch": 0.9279034690799397, "grad_norm": 2.0466532707214355, "learning_rate": 1.3162984904735442e-06, "loss": 1.3908, "step": 6152 }, { "epoch": 0.928054298642534, "grad_norm": 2.0911641120910645, "learning_rate": 1.310820893107867e-06, "loss": 1.3953, "step": 6153 }, { "epoch": 0.9282051282051282, "grad_norm": 1.7481964826583862, "learning_rate": 1.3053545652727872e-06, "loss": 0.9822, "step": 6154 }, { "epoch": 0.9283559577677225, "grad_norm": 1.7156928777694702, "learning_rate": 1.2998995082335263e-06, "loss": 0.9959, "step": 6155 }, { "epoch": 0.9285067873303168, "grad_norm": 1.8935059309005737, "learning_rate": 1.2944557232527077e-06, "loss": 0.9927, "step": 6156 }, { "epoch": 0.928657616892911, "grad_norm": 2.193845510482788, "learning_rate": 1.2890232115903456e-06, "loss": 1.4844, "step": 6157 }, { "epoch": 0.9288084464555053, "grad_norm": 1.8832337856292725, "learning_rate": 1.2836019745038453e-06, "loss": 1.0342, "step": 6158 }, { "epoch": 0.9289592760180996, "grad_norm": 1.8237751722335815, "learning_rate": 1.2781920132479863e-06, "loss": 0.9957, "step": 6159 }, { "epoch": 0.9291101055806938, "grad_norm": 2.099750518798828, "learning_rate": 1.2727933290749615e-06, "loss": 1.2345, "step": 6160 }, { "epoch": 0.9292609351432881, "grad_norm": 1.8682037591934204, "learning_rate": 1.2674059232343384e-06, "loss": 1.2369, "step": 6161 }, { "epoch": 0.9294117647058824, "grad_norm": 1.7026097774505615, "learning_rate": 1.2620297969730742e-06, "loss": 0.9399, "step": 6162 }, { "epoch": 0.9295625942684766, "grad_norm": 1.4051376581192017, "learning_rate": 1.25666495153553e-06, "loss": 0.6484, "step": 6163 }, { "epoch": 0.9297134238310709, "grad_norm": 1.9237513542175293, "learning_rate": 1.2513113881634342e-06, "loss": 1.0859, "step": 6164 }, { "epoch": 0.9298642533936652, "grad_norm": 1.7081749439239502, "learning_rate": 1.245969108095929e-06, "loss": 0.9124, "step": 6165 }, { "epoch": 0.9300150829562595, "grad_norm": 2.219341278076172, "learning_rate": 1.24063811256952e-06, "loss": 1.3422, "step": 6166 }, { "epoch": 0.9301659125188537, "grad_norm": 1.8979054689407349, "learning_rate": 1.2353184028181198e-06, "loss": 1.0811, "step": 6167 }, { "epoch": 0.930316742081448, "grad_norm": 1.7843252420425415, "learning_rate": 1.2300099800730103e-06, "loss": 0.869, "step": 6168 }, { "epoch": 0.9304675716440423, "grad_norm": 2.4256253242492676, "learning_rate": 1.224712845562881e-06, "loss": 1.527, "step": 6169 }, { "epoch": 0.9306184012066365, "grad_norm": 1.895707130432129, "learning_rate": 1.2194270005137954e-06, "loss": 1.0045, "step": 6170 }, { "epoch": 0.9307692307692308, "grad_norm": 2.2016494274139404, "learning_rate": 1.2141524461492138e-06, "loss": 1.4074, "step": 6171 }, { "epoch": 0.9309200603318251, "grad_norm": 1.9944689273834229, "learning_rate": 1.2088891836899651e-06, "loss": 1.2759, "step": 6172 }, { "epoch": 0.9310708898944193, "grad_norm": 2.2912027835845947, "learning_rate": 1.2036372143542918e-06, "loss": 1.0835, "step": 6173 }, { "epoch": 0.9312217194570136, "grad_norm": 2.331023931503296, "learning_rate": 1.1983965393577989e-06, "loss": 1.2006, "step": 6174 }, { "epoch": 0.9313725490196079, "grad_norm": 1.4866410493850708, "learning_rate": 1.193167159913483e-06, "loss": 0.7089, "step": 6175 }, { "epoch": 0.9315233785822021, "grad_norm": 1.7109466791152954, "learning_rate": 1.187949077231737e-06, "loss": 0.8484, "step": 6176 }, { "epoch": 0.9316742081447964, "grad_norm": 1.9140154123306274, "learning_rate": 1.182742292520328e-06, "loss": 0.9046, "step": 6177 }, { "epoch": 0.9318250377073907, "grad_norm": 2.2138772010803223, "learning_rate": 1.1775468069844087e-06, "loss": 1.4801, "step": 6178 }, { "epoch": 0.9319758672699849, "grad_norm": 1.736183762550354, "learning_rate": 1.1723626218265227e-06, "loss": 0.7715, "step": 6179 }, { "epoch": 0.9321266968325792, "grad_norm": 2.1296472549438477, "learning_rate": 1.1671897382465879e-06, "loss": 1.3639, "step": 6180 }, { "epoch": 0.9322775263951735, "grad_norm": 1.8430805206298828, "learning_rate": 1.1620281574419244e-06, "loss": 0.9444, "step": 6181 }, { "epoch": 0.9324283559577677, "grad_norm": 1.9328181743621826, "learning_rate": 1.1568778806072156e-06, "loss": 1.0316, "step": 6182 }, { "epoch": 0.932579185520362, "grad_norm": 2.0901663303375244, "learning_rate": 1.1517389089345355e-06, "loss": 1.2009, "step": 6183 }, { "epoch": 0.9327300150829563, "grad_norm": 1.777727723121643, "learning_rate": 1.1466112436133547e-06, "loss": 0.9303, "step": 6184 }, { "epoch": 0.9328808446455505, "grad_norm": 2.0370821952819824, "learning_rate": 1.1414948858305075e-06, "loss": 1.0333, "step": 6185 }, { "epoch": 0.9330316742081448, "grad_norm": 2.5084969997406006, "learning_rate": 1.1363898367702185e-06, "loss": 1.2568, "step": 6186 }, { "epoch": 0.9331825037707391, "grad_norm": 1.9156336784362793, "learning_rate": 1.1312960976140985e-06, "loss": 1.0286, "step": 6187 }, { "epoch": 0.9333333333333333, "grad_norm": 2.3384692668914795, "learning_rate": 1.1262136695411374e-06, "loss": 1.6258, "step": 6188 }, { "epoch": 0.9334841628959276, "grad_norm": 1.9798719882965088, "learning_rate": 1.1211425537276998e-06, "loss": 0.9992, "step": 6189 }, { "epoch": 0.9336349924585219, "grad_norm": 2.1479921340942383, "learning_rate": 1.116082751347547e-06, "loss": 1.1868, "step": 6190 }, { "epoch": 0.9337858220211162, "grad_norm": 1.9493094682693481, "learning_rate": 1.111034263571814e-06, "loss": 1.0812, "step": 6191 }, { "epoch": 0.9339366515837104, "grad_norm": 1.9551457166671753, "learning_rate": 1.105997091569011e-06, "loss": 1.0654, "step": 6192 }, { "epoch": 0.9340874811463047, "grad_norm": 1.956107497215271, "learning_rate": 1.100971236505044e-06, "loss": 0.9965, "step": 6193 }, { "epoch": 0.934238310708899, "grad_norm": 2.0442137718200684, "learning_rate": 1.0959566995431824e-06, "loss": 1.11, "step": 6194 }, { "epoch": 0.9343891402714932, "grad_norm": 1.8906688690185547, "learning_rate": 1.090953481844087e-06, "loss": 0.9496, "step": 6195 }, { "epoch": 0.9345399698340875, "grad_norm": 1.787360668182373, "learning_rate": 1.0859615845657977e-06, "loss": 0.8303, "step": 6196 }, { "epoch": 0.9346907993966818, "grad_norm": 1.9264756441116333, "learning_rate": 1.0809810088637295e-06, "loss": 0.774, "step": 6197 }, { "epoch": 0.934841628959276, "grad_norm": 1.8232239484786987, "learning_rate": 1.0760117558906769e-06, "loss": 0.75, "step": 6198 }, { "epoch": 0.9349924585218703, "grad_norm": 1.8583582639694214, "learning_rate": 1.0710538267968196e-06, "loss": 0.9553, "step": 6199 }, { "epoch": 0.9351432880844646, "grad_norm": 1.9488117694854736, "learning_rate": 1.0661072227297119e-06, "loss": 0.9768, "step": 6200 }, { "epoch": 0.9352941176470588, "grad_norm": 1.6636779308319092, "learning_rate": 1.0611719448342938e-06, "loss": 1.1144, "step": 6201 }, { "epoch": 0.9354449472096531, "grad_norm": 1.9934664964675903, "learning_rate": 1.0562479942528736e-06, "loss": 1.2789, "step": 6202 }, { "epoch": 0.9355957767722474, "grad_norm": 1.5512678623199463, "learning_rate": 1.051335372125134e-06, "loss": 0.7112, "step": 6203 }, { "epoch": 0.9357466063348416, "grad_norm": 1.9760634899139404, "learning_rate": 1.0464340795881599e-06, "loss": 1.1728, "step": 6204 }, { "epoch": 0.9358974358974359, "grad_norm": 1.3425536155700684, "learning_rate": 1.0415441177763884e-06, "loss": 0.5767, "step": 6205 }, { "epoch": 0.9360482654600302, "grad_norm": 1.8505069017410278, "learning_rate": 1.0366654878216419e-06, "loss": 0.9243, "step": 6206 }, { "epoch": 0.9361990950226244, "grad_norm": 1.855046033859253, "learning_rate": 1.0317981908531282e-06, "loss": 1.1052, "step": 6207 }, { "epoch": 0.9363499245852187, "grad_norm": 1.9297765493392944, "learning_rate": 1.0269422279974184e-06, "loss": 1.0852, "step": 6208 }, { "epoch": 0.936500754147813, "grad_norm": 2.0530364513397217, "learning_rate": 1.0220976003784688e-06, "loss": 1.3843, "step": 6209 }, { "epoch": 0.9366515837104072, "grad_norm": 1.783528208732605, "learning_rate": 1.0172643091176103e-06, "loss": 0.8898, "step": 6210 }, { "epoch": 0.9368024132730015, "grad_norm": 1.907161831855774, "learning_rate": 1.0124423553335483e-06, "loss": 1.0087, "step": 6211 }, { "epoch": 0.9369532428355958, "grad_norm": 1.7338639497756958, "learning_rate": 1.007631740142373e-06, "loss": 0.9762, "step": 6212 }, { "epoch": 0.93710407239819, "grad_norm": 2.0924158096313477, "learning_rate": 1.0028324646575326e-06, "loss": 1.3359, "step": 6213 }, { "epoch": 0.9372549019607843, "grad_norm": 1.932849407196045, "learning_rate": 9.980445299898722e-07, "loss": 1.305, "step": 6214 }, { "epoch": 0.9374057315233786, "grad_norm": 1.9081813097000122, "learning_rate": 9.932679372475883e-07, "loss": 0.9995, "step": 6215 }, { "epoch": 0.9375565610859729, "grad_norm": 1.9167311191558838, "learning_rate": 9.885026875362746e-07, "loss": 0.9232, "step": 6216 }, { "epoch": 0.9377073906485671, "grad_norm": 1.6560578346252441, "learning_rate": 9.83748781958882e-07, "loss": 0.9508, "step": 6217 }, { "epoch": 0.9378582202111614, "grad_norm": 1.9292479753494263, "learning_rate": 9.790062216157469e-07, "loss": 1.1442, "step": 6218 }, { "epoch": 0.9380090497737557, "grad_norm": 2.204526901245117, "learning_rate": 9.742750076045749e-07, "loss": 1.1502, "step": 6219 }, { "epoch": 0.9381598793363499, "grad_norm": 2.1179401874542236, "learning_rate": 9.695551410204506e-07, "loss": 1.2448, "step": 6220 }, { "epoch": 0.9383107088989442, "grad_norm": 2.0917677879333496, "learning_rate": 9.648466229558174e-07, "loss": 1.2901, "step": 6221 }, { "epoch": 0.9384615384615385, "grad_norm": 1.9553565979003906, "learning_rate": 9.601494545005085e-07, "loss": 0.9956, "step": 6222 }, { "epoch": 0.9386123680241327, "grad_norm": 2.0436511039733887, "learning_rate": 9.554636367417269e-07, "loss": 0.9519, "step": 6223 }, { "epoch": 0.938763197586727, "grad_norm": 1.8288614749908447, "learning_rate": 9.507891707640437e-07, "loss": 1.0703, "step": 6224 }, { "epoch": 0.9389140271493213, "grad_norm": 1.8449335098266602, "learning_rate": 9.461260576494046e-07, "loss": 0.983, "step": 6225 }, { "epoch": 0.9390648567119155, "grad_norm": 1.7894610166549683, "learning_rate": 9.414742984771241e-07, "loss": 0.8819, "step": 6226 }, { "epoch": 0.9392156862745098, "grad_norm": 2.2926836013793945, "learning_rate": 9.368338943238964e-07, "loss": 1.1588, "step": 6227 }, { "epoch": 0.9393665158371041, "grad_norm": 2.1777687072753906, "learning_rate": 9.322048462637789e-07, "loss": 1.3449, "step": 6228 }, { "epoch": 0.9395173453996983, "grad_norm": 1.7373837232589722, "learning_rate": 9.275871553682092e-07, "loss": 0.938, "step": 6229 }, { "epoch": 0.9396681749622926, "grad_norm": 1.9276561737060547, "learning_rate": 9.229808227059878e-07, "loss": 0.9975, "step": 6230 }, { "epoch": 0.9398190045248869, "grad_norm": 1.7900891304016113, "learning_rate": 9.183858493432895e-07, "loss": 0.8548, "step": 6231 }, { "epoch": 0.9399698340874811, "grad_norm": 2.07910418510437, "learning_rate": 9.138022363436638e-07, "loss": 1.1805, "step": 6232 }, { "epoch": 0.9401206636500754, "grad_norm": 1.7643636465072632, "learning_rate": 9.09229984768023e-07, "loss": 0.991, "step": 6233 }, { "epoch": 0.9402714932126697, "grad_norm": 1.8613131046295166, "learning_rate": 9.046690956746595e-07, "loss": 1.0485, "step": 6234 }, { "epoch": 0.9404223227752639, "grad_norm": 2.2653021812438965, "learning_rate": 9.001195701192289e-07, "loss": 1.2899, "step": 6235 }, { "epoch": 0.9405731523378582, "grad_norm": 2.3277347087860107, "learning_rate": 8.955814091547609e-07, "loss": 1.4375, "step": 6236 }, { "epoch": 0.9407239819004525, "grad_norm": 2.1147515773773193, "learning_rate": 8.910546138316433e-07, "loss": 1.1043, "step": 6237 }, { "epoch": 0.9408748114630467, "grad_norm": 1.4688125848770142, "learning_rate": 8.865391851976491e-07, "loss": 0.6265, "step": 6238 }, { "epoch": 0.941025641025641, "grad_norm": 1.7932592630386353, "learning_rate": 8.820351242979141e-07, "loss": 0.8438, "step": 6239 }, { "epoch": 0.9411764705882353, "grad_norm": 2.096954107284546, "learning_rate": 8.775424321749382e-07, "loss": 1.0978, "step": 6240 }, { "epoch": 0.9413273001508295, "grad_norm": 2.0427722930908203, "learning_rate": 8.730611098685948e-07, "loss": 1.0328, "step": 6241 }, { "epoch": 0.9414781297134238, "grad_norm": 1.7261981964111328, "learning_rate": 8.685911584161266e-07, "loss": 0.7771, "step": 6242 }, { "epoch": 0.9416289592760181, "grad_norm": 2.103815793991089, "learning_rate": 8.641325788521393e-07, "loss": 0.998, "step": 6243 }, { "epoch": 0.9417797888386124, "grad_norm": 2.4655301570892334, "learning_rate": 8.596853722086074e-07, "loss": 1.237, "step": 6244 }, { "epoch": 0.9419306184012066, "grad_norm": 2.7287237644195557, "learning_rate": 8.552495395148852e-07, "loss": 1.4164, "step": 6245 }, { "epoch": 0.9420814479638009, "grad_norm": 1.7122939825057983, "learning_rate": 8.508250817976737e-07, "loss": 0.9301, "step": 6246 }, { "epoch": 0.9422322775263952, "grad_norm": 2.004281997680664, "learning_rate": 8.464120000810538e-07, "loss": 1.1161, "step": 6247 }, { "epoch": 0.9423831070889894, "grad_norm": 1.7664074897766113, "learning_rate": 8.420102953864806e-07, "loss": 0.8845, "step": 6248 }, { "epoch": 0.9425339366515837, "grad_norm": 1.9992055892944336, "learning_rate": 8.376199687327558e-07, "loss": 0.8977, "step": 6249 }, { "epoch": 0.942684766214178, "grad_norm": 1.8883174657821655, "learning_rate": 8.332410211360609e-07, "loss": 0.8384, "step": 6250 }, { "epoch": 0.9428355957767722, "grad_norm": 1.8478468656539917, "learning_rate": 8.288734536099408e-07, "loss": 1.2085, "step": 6251 }, { "epoch": 0.9429864253393665, "grad_norm": 1.9000264406204224, "learning_rate": 8.245172671653145e-07, "loss": 1.1676, "step": 6252 }, { "epoch": 0.9431372549019608, "grad_norm": 1.8996500968933105, "learning_rate": 8.201724628104535e-07, "loss": 1.1095, "step": 6253 }, { "epoch": 0.943288084464555, "grad_norm": 1.6948037147521973, "learning_rate": 8.15839041550992e-07, "loss": 0.9271, "step": 6254 }, { "epoch": 0.9434389140271493, "grad_norm": 1.878197193145752, "learning_rate": 8.115170043899501e-07, "loss": 0.9737, "step": 6255 }, { "epoch": 0.9435897435897436, "grad_norm": 2.2108347415924072, "learning_rate": 8.07206352327694e-07, "loss": 1.4355, "step": 6256 }, { "epoch": 0.9437405731523378, "grad_norm": 1.8024992942810059, "learning_rate": 8.029070863619648e-07, "loss": 1.1714, "step": 6257 }, { "epoch": 0.9438914027149321, "grad_norm": 1.9239073991775513, "learning_rate": 7.986192074878607e-07, "loss": 0.9482, "step": 6258 }, { "epoch": 0.9440422322775264, "grad_norm": 1.612963318824768, "learning_rate": 7.943427166978546e-07, "loss": 0.9701, "step": 6259 }, { "epoch": 0.9441930618401206, "grad_norm": 2.360172748565674, "learning_rate": 7.900776149817713e-07, "loss": 1.3821, "step": 6260 }, { "epoch": 0.9443438914027149, "grad_norm": 1.8003296852111816, "learning_rate": 7.8582390332681e-07, "loss": 0.9967, "step": 6261 }, { "epoch": 0.9444947209653092, "grad_norm": 1.9901286363601685, "learning_rate": 7.815815827175221e-07, "loss": 1.2425, "step": 6262 }, { "epoch": 0.9446455505279034, "grad_norm": 2.1474413871765137, "learning_rate": 7.773506541358333e-07, "loss": 1.2224, "step": 6263 }, { "epoch": 0.9447963800904977, "grad_norm": 1.9672069549560547, "learning_rate": 7.731311185610268e-07, "loss": 1.1663, "step": 6264 }, { "epoch": 0.944947209653092, "grad_norm": 1.8437694311141968, "learning_rate": 7.689229769697604e-07, "loss": 0.9525, "step": 6265 }, { "epoch": 0.9450980392156862, "grad_norm": 1.9961206912994385, "learning_rate": 7.64726230336027e-07, "loss": 1.0964, "step": 6266 }, { "epoch": 0.9452488687782805, "grad_norm": 1.8415944576263428, "learning_rate": 7.605408796312164e-07, "loss": 1.0519, "step": 6267 }, { "epoch": 0.9453996983408748, "grad_norm": 1.9274911880493164, "learning_rate": 7.563669258240535e-07, "loss": 1.1, "step": 6268 }, { "epoch": 0.945550527903469, "grad_norm": 1.9660152196884155, "learning_rate": 7.522043698806436e-07, "loss": 1.1443, "step": 6269 }, { "epoch": 0.9457013574660633, "grad_norm": 1.7958331108093262, "learning_rate": 7.480532127644435e-07, "loss": 0.9059, "step": 6270 }, { "epoch": 0.9458521870286576, "grad_norm": 2.053165912628174, "learning_rate": 7.439134554362681e-07, "loss": 1.2559, "step": 6271 }, { "epoch": 0.9460030165912519, "grad_norm": 1.9057128429412842, "learning_rate": 7.397850988543065e-07, "loss": 1.0629, "step": 6272 }, { "epoch": 0.9461538461538461, "grad_norm": 1.7405214309692383, "learning_rate": 7.356681439740998e-07, "loss": 0.958, "step": 6273 }, { "epoch": 0.9463046757164404, "grad_norm": 1.8470877408981323, "learning_rate": 7.315625917485525e-07, "loss": 0.9393, "step": 6274 }, { "epoch": 0.9464555052790347, "grad_norm": 1.862983226776123, "learning_rate": 7.274684431279378e-07, "loss": 0.8262, "step": 6275 }, { "epoch": 0.9466063348416289, "grad_norm": 2.055889844894409, "learning_rate": 7.233856990598697e-07, "loss": 1.1398, "step": 6276 }, { "epoch": 0.9467571644042232, "grad_norm": 1.97694730758667, "learning_rate": 7.193143604893426e-07, "loss": 1.0823, "step": 6277 }, { "epoch": 0.9469079939668175, "grad_norm": 1.692892074584961, "learning_rate": 7.152544283586971e-07, "loss": 0.7331, "step": 6278 }, { "epoch": 0.9470588235294117, "grad_norm": 1.8486021757125854, "learning_rate": 7.11205903607648e-07, "loss": 0.8909, "step": 6279 }, { "epoch": 0.947209653092006, "grad_norm": 2.050459146499634, "learning_rate": 7.071687871732513e-07, "loss": 1.0519, "step": 6280 }, { "epoch": 0.9473604826546003, "grad_norm": 2.027438163757324, "learning_rate": 7.03143079989943e-07, "loss": 1.1911, "step": 6281 }, { "epoch": 0.9475113122171945, "grad_norm": 2.2774291038513184, "learning_rate": 6.991287829894999e-07, "loss": 1.0686, "step": 6282 }, { "epoch": 0.9476621417797888, "grad_norm": 2.0271224975585938, "learning_rate": 6.951258971010677e-07, "loss": 0.9588, "step": 6283 }, { "epoch": 0.9478129713423831, "grad_norm": 2.0257952213287354, "learning_rate": 6.911344232511496e-07, "loss": 1.0368, "step": 6284 }, { "epoch": 0.9479638009049773, "grad_norm": 1.8290969133377075, "learning_rate": 6.871543623636012e-07, "loss": 0.9618, "step": 6285 }, { "epoch": 0.9481146304675716, "grad_norm": 1.9377784729003906, "learning_rate": 6.831857153596521e-07, "loss": 1.1191, "step": 6286 }, { "epoch": 0.9482654600301659, "grad_norm": 2.043879270553589, "learning_rate": 6.792284831578733e-07, "loss": 1.1668, "step": 6287 }, { "epoch": 0.9484162895927601, "grad_norm": 1.8056532144546509, "learning_rate": 6.752826666742041e-07, "loss": 0.7392, "step": 6288 }, { "epoch": 0.9485671191553544, "grad_norm": 2.201486349105835, "learning_rate": 6.713482668219362e-07, "loss": 1.4299, "step": 6289 }, { "epoch": 0.9487179487179487, "grad_norm": 2.0214931964874268, "learning_rate": 6.674252845117191e-07, "loss": 1.1641, "step": 6290 }, { "epoch": 0.948868778280543, "grad_norm": 2.255383253097534, "learning_rate": 6.63513720651554e-07, "loss": 1.1026, "step": 6291 }, { "epoch": 0.9490196078431372, "grad_norm": 2.57189679145813, "learning_rate": 6.596135761468225e-07, "loss": 1.478, "step": 6292 }, { "epoch": 0.9491704374057315, "grad_norm": 2.4986143112182617, "learning_rate": 6.557248519002302e-07, "loss": 1.6896, "step": 6293 }, { "epoch": 0.9493212669683257, "grad_norm": 2.127760410308838, "learning_rate": 6.51847548811868e-07, "loss": 1.3076, "step": 6294 }, { "epoch": 0.94947209653092, "grad_norm": 2.108933925628662, "learning_rate": 6.479816677791683e-07, "loss": 1.0977, "step": 6295 }, { "epoch": 0.9496229260935143, "grad_norm": 1.5710909366607666, "learning_rate": 6.441272096969153e-07, "loss": 0.6389, "step": 6296 }, { "epoch": 0.9497737556561086, "grad_norm": 1.3058656454086304, "learning_rate": 6.402841754572675e-07, "loss": 0.5123, "step": 6297 }, { "epoch": 0.9499245852187028, "grad_norm": 1.7123138904571533, "learning_rate": 6.364525659497189e-07, "loss": 0.7761, "step": 6298 }, { "epoch": 0.9500754147812971, "grad_norm": 1.6542720794677734, "learning_rate": 6.326323820611379e-07, "loss": 0.7791, "step": 6299 }, { "epoch": 0.9502262443438914, "grad_norm": 2.116077184677124, "learning_rate": 6.288236246757284e-07, "loss": 1.0661, "step": 6300 }, { "epoch": 0.9503770739064856, "grad_norm": 1.7709579467773438, "learning_rate": 6.250262946750685e-07, "loss": 1.0497, "step": 6301 }, { "epoch": 0.9505279034690799, "grad_norm": 1.7317759990692139, "learning_rate": 6.212403929380772e-07, "loss": 1.007, "step": 6302 }, { "epoch": 0.9506787330316742, "grad_norm": 1.5356085300445557, "learning_rate": 6.174659203410371e-07, "loss": 0.8161, "step": 6303 }, { "epoch": 0.9508295625942684, "grad_norm": 1.9997459650039673, "learning_rate": 6.137028777575826e-07, "loss": 1.3262, "step": 6304 }, { "epoch": 0.9509803921568627, "grad_norm": 1.7967150211334229, "learning_rate": 6.099512660587059e-07, "loss": 1.1945, "step": 6305 }, { "epoch": 0.951131221719457, "grad_norm": 1.8450052738189697, "learning_rate": 6.062110861127402e-07, "loss": 1.1252, "step": 6306 }, { "epoch": 0.9512820512820512, "grad_norm": 1.8019013404846191, "learning_rate": 6.024823387853928e-07, "loss": 0.8986, "step": 6307 }, { "epoch": 0.9514328808446455, "grad_norm": 1.6633667945861816, "learning_rate": 5.987650249397125e-07, "loss": 0.9361, "step": 6308 }, { "epoch": 0.9515837104072398, "grad_norm": 1.9914895296096802, "learning_rate": 5.950591454360943e-07, "loss": 1.2805, "step": 6309 }, { "epoch": 0.951734539969834, "grad_norm": 1.905820608139038, "learning_rate": 5.913647011323075e-07, "loss": 1.1174, "step": 6310 }, { "epoch": 0.9518853695324283, "grad_norm": 1.832431435585022, "learning_rate": 5.876816928834572e-07, "loss": 1.1125, "step": 6311 }, { "epoch": 0.9520361990950226, "grad_norm": 1.762558102607727, "learning_rate": 5.840101215420057e-07, "loss": 1.0359, "step": 6312 }, { "epoch": 0.9521870286576168, "grad_norm": 1.7604293823242188, "learning_rate": 5.803499879577734e-07, "loss": 1.0938, "step": 6313 }, { "epoch": 0.9523378582202111, "grad_norm": 2.041506767272949, "learning_rate": 5.767012929779325e-07, "loss": 1.0922, "step": 6314 }, { "epoch": 0.9524886877828054, "grad_norm": 1.8552873134613037, "learning_rate": 5.73064037447002e-07, "loss": 0.9393, "step": 6315 }, { "epoch": 0.9526395173453996, "grad_norm": 1.5460180044174194, "learning_rate": 5.694382222068528e-07, "loss": 0.8098, "step": 6316 }, { "epoch": 0.9527903469079939, "grad_norm": 1.542466640472412, "learning_rate": 5.658238480967137e-07, "loss": 0.7045, "step": 6317 }, { "epoch": 0.9529411764705882, "grad_norm": 1.8291473388671875, "learning_rate": 5.622209159531655e-07, "loss": 1.1387, "step": 6318 }, { "epoch": 0.9530920060331824, "grad_norm": 1.950031042098999, "learning_rate": 5.586294266101355e-07, "loss": 1.0013, "step": 6319 }, { "epoch": 0.9532428355957768, "grad_norm": 1.5500978231430054, "learning_rate": 5.550493808989032e-07, "loss": 0.873, "step": 6320 }, { "epoch": 0.9533936651583711, "grad_norm": 2.1455116271972656, "learning_rate": 5.514807796481003e-07, "loss": 1.3643, "step": 6321 }, { "epoch": 0.9535444947209654, "grad_norm": 1.77354896068573, "learning_rate": 5.47923623683716e-07, "loss": 0.8939, "step": 6322 }, { "epoch": 0.9536953242835596, "grad_norm": 1.7942193746566772, "learning_rate": 5.443779138290806e-07, "loss": 0.8917, "step": 6323 }, { "epoch": 0.9538461538461539, "grad_norm": 1.7935692071914673, "learning_rate": 5.408436509048819e-07, "loss": 0.839, "step": 6324 }, { "epoch": 0.9539969834087482, "grad_norm": 1.6746853590011597, "learning_rate": 5.373208357291493e-07, "loss": 0.7705, "step": 6325 }, { "epoch": 0.9541478129713424, "grad_norm": 1.8481478691101074, "learning_rate": 5.338094691172801e-07, "loss": 0.925, "step": 6326 }, { "epoch": 0.9542986425339367, "grad_norm": 1.7488939762115479, "learning_rate": 5.303095518819967e-07, "loss": 0.8289, "step": 6327 }, { "epoch": 0.954449472096531, "grad_norm": 2.2121334075927734, "learning_rate": 5.268210848333954e-07, "loss": 1.4356, "step": 6328 }, { "epoch": 0.9546003016591252, "grad_norm": 1.872408151626587, "learning_rate": 5.233440687789082e-07, "loss": 0.9361, "step": 6329 }, { "epoch": 0.9547511312217195, "grad_norm": 2.0881173610687256, "learning_rate": 5.198785045233245e-07, "loss": 1.451, "step": 6330 }, { "epoch": 0.9549019607843138, "grad_norm": 1.886342167854309, "learning_rate": 5.164243928687695e-07, "loss": 0.8034, "step": 6331 }, { "epoch": 0.955052790346908, "grad_norm": 2.0379743576049805, "learning_rate": 5.129817346147369e-07, "loss": 1.2724, "step": 6332 }, { "epoch": 0.9552036199095023, "grad_norm": 1.8097858428955078, "learning_rate": 5.095505305580561e-07, "loss": 0.9383, "step": 6333 }, { "epoch": 0.9553544494720966, "grad_norm": 1.8055602312088013, "learning_rate": 5.061307814929028e-07, "loss": 0.9776, "step": 6334 }, { "epoch": 0.9555052790346908, "grad_norm": 1.750784993171692, "learning_rate": 5.027224882108216e-07, "loss": 0.6974, "step": 6335 }, { "epoch": 0.9556561085972851, "grad_norm": 1.8835489749908447, "learning_rate": 4.993256515006817e-07, "loss": 0.9747, "step": 6336 }, { "epoch": 0.9558069381598794, "grad_norm": 2.237809658050537, "learning_rate": 4.959402721487094e-07, "loss": 1.1552, "step": 6337 }, { "epoch": 0.9559577677224737, "grad_norm": 2.2509021759033203, "learning_rate": 4.925663509384782e-07, "loss": 1.2622, "step": 6338 }, { "epoch": 0.9561085972850679, "grad_norm": 1.9245855808258057, "learning_rate": 4.892038886509242e-07, "loss": 0.9188, "step": 6339 }, { "epoch": 0.9562594268476622, "grad_norm": 2.145233392715454, "learning_rate": 4.85852886064303e-07, "loss": 1.0986, "step": 6340 }, { "epoch": 0.9564102564102565, "grad_norm": 1.804188847541809, "learning_rate": 4.825133439542385e-07, "loss": 0.923, "step": 6341 }, { "epoch": 0.9565610859728507, "grad_norm": 1.800316572189331, "learning_rate": 4.79185263093701e-07, "loss": 0.9414, "step": 6342 }, { "epoch": 0.956711915535445, "grad_norm": 2.021705389022827, "learning_rate": 4.758686442529969e-07, "loss": 0.9034, "step": 6343 }, { "epoch": 0.9568627450980393, "grad_norm": 1.8886138200759888, "learning_rate": 4.725634881997898e-07, "loss": 0.9878, "step": 6344 }, { "epoch": 0.9570135746606335, "grad_norm": 2.104884386062622, "learning_rate": 4.692697956990899e-07, "loss": 1.074, "step": 6345 }, { "epoch": 0.9571644042232278, "grad_norm": 2.120182991027832, "learning_rate": 4.6598756751324857e-07, "loss": 0.9175, "step": 6346 }, { "epoch": 0.9573152337858221, "grad_norm": 1.8088730573654175, "learning_rate": 4.6271680440195806e-07, "loss": 0.8787, "step": 6347 }, { "epoch": 0.9574660633484163, "grad_norm": 1.7701196670532227, "learning_rate": 4.59457507122274e-07, "loss": 0.7213, "step": 6348 }, { "epoch": 0.9576168929110106, "grad_norm": 2.178802967071533, "learning_rate": 4.562096764285817e-07, "loss": 0.9971, "step": 6349 }, { "epoch": 0.9577677224736049, "grad_norm": 1.5018692016601562, "learning_rate": 4.529733130726299e-07, "loss": 0.5859, "step": 6350 }, { "epoch": 0.9579185520361991, "grad_norm": 1.998622179031372, "learning_rate": 4.4974841780349163e-07, "loss": 1.3461, "step": 6351 }, { "epoch": 0.9580693815987934, "grad_norm": 1.9802663326263428, "learning_rate": 4.465349913676031e-07, "loss": 1.187, "step": 6352 }, { "epoch": 0.9582202111613877, "grad_norm": 1.7344826459884644, "learning_rate": 4.4333303450873607e-07, "loss": 0.9543, "step": 6353 }, { "epoch": 0.9583710407239819, "grad_norm": 1.9702990055084229, "learning_rate": 4.401425479680199e-07, "loss": 1.2531, "step": 6354 }, { "epoch": 0.9585218702865762, "grad_norm": 2.1435110569000244, "learning_rate": 4.369635324839083e-07, "loss": 1.2078, "step": 6355 }, { "epoch": 0.9586726998491705, "grad_norm": 1.9066894054412842, "learning_rate": 4.3379598879221825e-07, "loss": 1.3334, "step": 6356 }, { "epoch": 0.9588235294117647, "grad_norm": 1.7345263957977295, "learning_rate": 4.306399176261022e-07, "loss": 0.9146, "step": 6357 }, { "epoch": 0.958974358974359, "grad_norm": 1.7452151775360107, "learning_rate": 4.274953197160647e-07, "loss": 1.0763, "step": 6358 }, { "epoch": 0.9591251885369533, "grad_norm": 1.8526966571807861, "learning_rate": 4.243621957899457e-07, "loss": 1.201, "step": 6359 }, { "epoch": 0.9592760180995475, "grad_norm": 1.969795823097229, "learning_rate": 4.2124054657293187e-07, "loss": 1.0364, "step": 6360 }, { "epoch": 0.9594268476621418, "grad_norm": 1.8643343448638916, "learning_rate": 4.1813037278756184e-07, "loss": 0.9283, "step": 6361 }, { "epoch": 0.9595776772247361, "grad_norm": 1.9766918420791626, "learning_rate": 4.1503167515370976e-07, "loss": 1.1157, "step": 6362 }, { "epoch": 0.9597285067873303, "grad_norm": 1.9669005870819092, "learning_rate": 4.119444543885964e-07, "loss": 1.1146, "step": 6363 }, { "epoch": 0.9598793363499246, "grad_norm": 1.802460789680481, "learning_rate": 4.0886871120678903e-07, "loss": 1.0177, "step": 6364 }, { "epoch": 0.9600301659125189, "grad_norm": 1.8280365467071533, "learning_rate": 4.058044463201849e-07, "loss": 1.0281, "step": 6365 }, { "epoch": 0.9601809954751132, "grad_norm": 1.9677125215530396, "learning_rate": 4.027516604380388e-07, "loss": 1.0923, "step": 6366 }, { "epoch": 0.9603318250377074, "grad_norm": 2.0572264194488525, "learning_rate": 3.9971035426695226e-07, "loss": 1.2239, "step": 6367 }, { "epoch": 0.9604826546003017, "grad_norm": 2.10935115814209, "learning_rate": 3.96680528510851e-07, "loss": 1.2413, "step": 6368 }, { "epoch": 0.960633484162896, "grad_norm": 1.6900520324707031, "learning_rate": 3.936621838710186e-07, "loss": 0.8827, "step": 6369 }, { "epoch": 0.9607843137254902, "grad_norm": 2.0149905681610107, "learning_rate": 3.906553210460795e-07, "loss": 0.9325, "step": 6370 }, { "epoch": 0.9609351432880845, "grad_norm": 1.7919515371322632, "learning_rate": 3.8765994073199366e-07, "loss": 0.8165, "step": 6371 }, { "epoch": 0.9610859728506788, "grad_norm": 1.882252812385559, "learning_rate": 3.8467604362206753e-07, "loss": 1.0458, "step": 6372 }, { "epoch": 0.961236802413273, "grad_norm": 1.703015923500061, "learning_rate": 3.8170363040695435e-07, "loss": 0.8059, "step": 6373 }, { "epoch": 0.9613876319758673, "grad_norm": 1.6983740329742432, "learning_rate": 3.787427017746481e-07, "loss": 0.7853, "step": 6374 }, { "epoch": 0.9615384615384616, "grad_norm": 1.5398797988891602, "learning_rate": 3.757932584104673e-07, "loss": 0.6919, "step": 6375 }, { "epoch": 0.9616892911010558, "grad_norm": 1.4278013706207275, "learning_rate": 3.728553009970992e-07, "loss": 0.638, "step": 6376 }, { "epoch": 0.9618401206636501, "grad_norm": 1.8496487140655518, "learning_rate": 3.6992883021455537e-07, "loss": 0.8303, "step": 6377 }, { "epoch": 0.9619909502262444, "grad_norm": 2.1084578037261963, "learning_rate": 3.6701384674018845e-07, "loss": 1.1322, "step": 6378 }, { "epoch": 0.9621417797888386, "grad_norm": 1.3949884176254272, "learning_rate": 3.6411035124870317e-07, "loss": 0.6075, "step": 6379 }, { "epoch": 0.9622926093514329, "grad_norm": 1.922567367553711, "learning_rate": 3.612183444121342e-07, "loss": 1.0072, "step": 6380 }, { "epoch": 0.9624434389140272, "grad_norm": 1.9455474615097046, "learning_rate": 3.583378268998683e-07, "loss": 0.8225, "step": 6381 }, { "epoch": 0.9625942684766214, "grad_norm": 2.082988739013672, "learning_rate": 3.55468799378611e-07, "loss": 1.2573, "step": 6382 }, { "epoch": 0.9627450980392157, "grad_norm": 1.762777328491211, "learning_rate": 3.5261126251244223e-07, "loss": 0.7789, "step": 6383 }, { "epoch": 0.96289592760181, "grad_norm": 2.2491986751556396, "learning_rate": 3.4976521696274966e-07, "loss": 1.1503, "step": 6384 }, { "epoch": 0.9630467571644042, "grad_norm": 2.2879385948181152, "learning_rate": 3.4693066338828405e-07, "loss": 1.1606, "step": 6385 }, { "epoch": 0.9631975867269985, "grad_norm": 1.879817008972168, "learning_rate": 3.441076024451151e-07, "loss": 0.9548, "step": 6386 }, { "epoch": 0.9633484162895928, "grad_norm": 1.963283658027649, "learning_rate": 3.4129603478668114e-07, "loss": 1.0147, "step": 6387 }, { "epoch": 0.963499245852187, "grad_norm": 1.8836108446121216, "learning_rate": 3.384959610637284e-07, "loss": 0.9685, "step": 6388 }, { "epoch": 0.9636500754147813, "grad_norm": 2.1015186309814453, "learning_rate": 3.357073819243661e-07, "loss": 1.3716, "step": 6389 }, { "epoch": 0.9638009049773756, "grad_norm": 2.008430004119873, "learning_rate": 3.329302980140392e-07, "loss": 1.1204, "step": 6390 }, { "epoch": 0.9639517345399699, "grad_norm": 1.9310039281845093, "learning_rate": 3.3016470997551675e-07, "loss": 0.9668, "step": 6391 }, { "epoch": 0.9641025641025641, "grad_norm": 1.8051788806915283, "learning_rate": 3.274106184489312e-07, "loss": 0.8055, "step": 6392 }, { "epoch": 0.9642533936651584, "grad_norm": 1.9417747259140015, "learning_rate": 3.246680240717226e-07, "loss": 1.1606, "step": 6393 }, { "epoch": 0.9644042232277527, "grad_norm": 2.2688350677490234, "learning_rate": 3.219369274787054e-07, "loss": 1.18, "step": 6394 }, { "epoch": 0.9645550527903469, "grad_norm": 2.1807358264923096, "learning_rate": 3.1921732930200734e-07, "loss": 0.9813, "step": 6395 }, { "epoch": 0.9647058823529412, "grad_norm": 1.7102917432785034, "learning_rate": 3.1650923017110254e-07, "loss": 0.7327, "step": 6396 }, { "epoch": 0.9648567119155355, "grad_norm": 1.8029088973999023, "learning_rate": 3.1381263071280643e-07, "loss": 0.8494, "step": 6397 }, { "epoch": 0.9650075414781297, "grad_norm": 1.7781802415847778, "learning_rate": 3.1112753155126963e-07, "loss": 0.9113, "step": 6398 }, { "epoch": 0.965158371040724, "grad_norm": 1.8984512090682983, "learning_rate": 3.084539333079839e-07, "loss": 0.8357, "step": 6399 }, { "epoch": 0.9653092006033183, "grad_norm": 1.5885837078094482, "learning_rate": 3.0579183660177093e-07, "loss": 0.6474, "step": 6400 }, { "epoch": 0.9654600301659125, "grad_norm": 1.7469663619995117, "learning_rate": 3.0314124204880446e-07, "loss": 1.1415, "step": 6401 }, { "epoch": 0.9656108597285068, "grad_norm": 1.5996235609054565, "learning_rate": 3.0050215026257713e-07, "loss": 0.9154, "step": 6402 }, { "epoch": 0.9657616892911011, "grad_norm": 1.8189271688461304, "learning_rate": 2.978745618539336e-07, "loss": 1.0192, "step": 6403 }, { "epoch": 0.9659125188536953, "grad_norm": 1.7454121112823486, "learning_rate": 2.9525847743105405e-07, "loss": 1.0345, "step": 6404 }, { "epoch": 0.9660633484162896, "grad_norm": 1.6599011421203613, "learning_rate": 2.926538975994486e-07, "loss": 1.0556, "step": 6405 }, { "epoch": 0.9662141779788839, "grad_norm": 1.872502088546753, "learning_rate": 2.9006082296197946e-07, "loss": 1.0526, "step": 6406 }, { "epoch": 0.9663650075414781, "grad_norm": 1.8260844945907593, "learning_rate": 2.874792541188276e-07, "loss": 1.0962, "step": 6407 }, { "epoch": 0.9665158371040724, "grad_norm": 2.0990805625915527, "learning_rate": 2.849091916675206e-07, "loss": 1.28, "step": 6408 }, { "epoch": 0.9666666666666667, "grad_norm": 1.6568230390548706, "learning_rate": 2.8235063620292714e-07, "loss": 0.9448, "step": 6409 }, { "epoch": 0.9668174962292609, "grad_norm": 1.853630781173706, "learning_rate": 2.798035883172401e-07, "loss": 0.9706, "step": 6410 }, { "epoch": 0.9669683257918552, "grad_norm": 1.8414771556854248, "learning_rate": 2.7726804859999897e-07, "loss": 0.9215, "step": 6411 }, { "epoch": 0.9671191553544495, "grad_norm": 1.9662418365478516, "learning_rate": 2.7474401763807886e-07, "loss": 0.86, "step": 6412 }, { "epoch": 0.9672699849170437, "grad_norm": 1.7639135122299194, "learning_rate": 2.722314960156791e-07, "loss": 0.8866, "step": 6413 }, { "epoch": 0.967420814479638, "grad_norm": 1.749525785446167, "learning_rate": 2.6973048431435667e-07, "loss": 1.022, "step": 6414 }, { "epoch": 0.9675716440422323, "grad_norm": 1.6495894193649292, "learning_rate": 2.6724098311298183e-07, "loss": 0.7061, "step": 6415 }, { "epoch": 0.9677224736048265, "grad_norm": 1.8045361042022705, "learning_rate": 2.6476299298777705e-07, "loss": 1.0393, "step": 6416 }, { "epoch": 0.9678733031674208, "grad_norm": 1.9385876655578613, "learning_rate": 2.6229651451229465e-07, "loss": 1.0813, "step": 6417 }, { "epoch": 0.9680241327300151, "grad_norm": 2.213212490081787, "learning_rate": 2.5984154825742235e-07, "loss": 1.3767, "step": 6418 }, { "epoch": 0.9681749622926094, "grad_norm": 2.0189006328582764, "learning_rate": 2.573980947913779e-07, "loss": 1.4085, "step": 6419 }, { "epoch": 0.9683257918552036, "grad_norm": 2.1515588760375977, "learning_rate": 2.549661546797255e-07, "loss": 1.3382, "step": 6420 }, { "epoch": 0.9684766214177979, "grad_norm": 2.0453109741210938, "learning_rate": 2.5254572848535383e-07, "loss": 1.1791, "step": 6421 }, { "epoch": 0.9686274509803922, "grad_norm": 2.314772844314575, "learning_rate": 2.501368167684981e-07, "loss": 1.2573, "step": 6422 }, { "epoch": 0.9687782805429864, "grad_norm": 1.948198914527893, "learning_rate": 2.477394200867178e-07, "loss": 1.0669, "step": 6423 }, { "epoch": 0.9689291101055807, "grad_norm": 2.3818795680999756, "learning_rate": 2.45353538994908e-07, "loss": 1.5041, "step": 6424 }, { "epoch": 0.969079939668175, "grad_norm": 1.7617933750152588, "learning_rate": 2.429791740453102e-07, "loss": 0.9809, "step": 6425 }, { "epoch": 0.9692307692307692, "grad_norm": 1.5112937688827515, "learning_rate": 2.406163257874794e-07, "loss": 0.6922, "step": 6426 }, { "epoch": 0.9693815987933635, "grad_norm": 1.7321679592132568, "learning_rate": 2.3826499476832797e-07, "loss": 0.8135, "step": 6427 }, { "epoch": 0.9695324283559578, "grad_norm": 2.0587239265441895, "learning_rate": 2.3592518153208732e-07, "loss": 0.9306, "step": 6428 }, { "epoch": 0.969683257918552, "grad_norm": 2.0357539653778076, "learning_rate": 2.3359688662032975e-07, "loss": 1.1696, "step": 6429 }, { "epoch": 0.9698340874811463, "grad_norm": 1.9683305025100708, "learning_rate": 2.3128011057195753e-07, "loss": 1.0745, "step": 6430 }, { "epoch": 0.9699849170437406, "grad_norm": 2.4005391597747803, "learning_rate": 2.289748539232084e-07, "loss": 1.2111, "step": 6431 }, { "epoch": 0.9701357466063348, "grad_norm": 1.8384374380111694, "learning_rate": 2.2668111720764996e-07, "loss": 0.9713, "step": 6432 }, { "epoch": 0.9702865761689291, "grad_norm": 2.119652271270752, "learning_rate": 2.2439890095619641e-07, "loss": 1.1779, "step": 6433 }, { "epoch": 0.9704374057315234, "grad_norm": 2.0867671966552734, "learning_rate": 2.2212820569707526e-07, "loss": 1.1961, "step": 6434 }, { "epoch": 0.9705882352941176, "grad_norm": 1.7994745969772339, "learning_rate": 2.1986903195586607e-07, "loss": 0.9142, "step": 6435 }, { "epoch": 0.9707390648567119, "grad_norm": 2.197286367416382, "learning_rate": 2.1762138025547275e-07, "loss": 1.2425, "step": 6436 }, { "epoch": 0.9708898944193062, "grad_norm": 1.8814419507980347, "learning_rate": 2.1538525111613473e-07, "loss": 1.0905, "step": 6437 }, { "epoch": 0.9710407239819004, "grad_norm": 2.010801315307617, "learning_rate": 2.1316064505542133e-07, "loss": 1.0614, "step": 6438 }, { "epoch": 0.9711915535444947, "grad_norm": 1.5283746719360352, "learning_rate": 2.1094756258823735e-07, "loss": 0.6536, "step": 6439 }, { "epoch": 0.971342383107089, "grad_norm": 2.065852165222168, "learning_rate": 2.08746004226823e-07, "loss": 1.2922, "step": 6440 }, { "epoch": 0.9714932126696832, "grad_norm": 1.914737582206726, "learning_rate": 2.0655597048074293e-07, "loss": 1.1637, "step": 6441 }, { "epoch": 0.9716440422322775, "grad_norm": 2.0344982147216797, "learning_rate": 2.043774618568972e-07, "loss": 1.0028, "step": 6442 }, { "epoch": 0.9717948717948718, "grad_norm": 2.1902503967285156, "learning_rate": 2.0221047885953248e-07, "loss": 1.1506, "step": 6443 }, { "epoch": 0.971945701357466, "grad_norm": 2.2948451042175293, "learning_rate": 2.0005502199020309e-07, "loss": 1.2524, "step": 6444 }, { "epoch": 0.9720965309200603, "grad_norm": 2.206702709197998, "learning_rate": 1.9791109174780996e-07, "loss": 1.012, "step": 6445 }, { "epoch": 0.9722473604826546, "grad_norm": 1.6949716806411743, "learning_rate": 1.957786886285895e-07, "loss": 0.734, "step": 6446 }, { "epoch": 0.9723981900452489, "grad_norm": 1.4771981239318848, "learning_rate": 1.9365781312610244e-07, "loss": 0.6859, "step": 6447 }, { "epoch": 0.9725490196078431, "grad_norm": 1.7075660228729248, "learning_rate": 1.915484657312505e-07, "loss": 0.7901, "step": 6448 }, { "epoch": 0.9726998491704374, "grad_norm": 2.1574273109436035, "learning_rate": 1.8945064693224322e-07, "loss": 1.1075, "step": 6449 }, { "epoch": 0.9728506787330317, "grad_norm": 1.7101869583129883, "learning_rate": 1.873643572146533e-07, "loss": 0.8212, "step": 6450 }, { "epoch": 0.9730015082956259, "grad_norm": 2.248549461364746, "learning_rate": 1.8528959706136662e-07, "loss": 1.9165, "step": 6451 }, { "epoch": 0.9731523378582202, "grad_norm": 2.1564855575561523, "learning_rate": 1.8322636695260465e-07, "loss": 1.3283, "step": 6452 }, { "epoch": 0.9733031674208145, "grad_norm": 1.9545012712478638, "learning_rate": 1.811746673659187e-07, "loss": 1.0547, "step": 6453 }, { "epoch": 0.9734539969834087, "grad_norm": 2.080857992172241, "learning_rate": 1.7913449877619558e-07, "loss": 1.5701, "step": 6454 }, { "epoch": 0.973604826546003, "grad_norm": 1.761729121208191, "learning_rate": 1.7710586165564093e-07, "loss": 1.0911, "step": 6455 }, { "epoch": 0.9737556561085973, "grad_norm": 1.5841683149337769, "learning_rate": 1.750887564738124e-07, "loss": 0.8346, "step": 6456 }, { "epoch": 0.9739064856711915, "grad_norm": 2.080693483352661, "learning_rate": 1.7308318369757548e-07, "loss": 1.078, "step": 6457 }, { "epoch": 0.9740573152337858, "grad_norm": 1.6733683347702026, "learning_rate": 1.7108914379114772e-07, "loss": 0.9827, "step": 6458 }, { "epoch": 0.9742081447963801, "grad_norm": 1.8590120077133179, "learning_rate": 1.691066372160599e-07, "loss": 0.9611, "step": 6459 }, { "epoch": 0.9743589743589743, "grad_norm": 2.000999927520752, "learning_rate": 1.6713566443117833e-07, "loss": 1.2743, "step": 6460 }, { "epoch": 0.9745098039215686, "grad_norm": 1.6481130123138428, "learning_rate": 1.651762258927103e-07, "loss": 0.8385, "step": 6461 }, { "epoch": 0.9746606334841629, "grad_norm": 1.4775646924972534, "learning_rate": 1.6322832205417637e-07, "loss": 0.6422, "step": 6462 }, { "epoch": 0.9748114630467571, "grad_norm": 1.965220332145691, "learning_rate": 1.612919533664381e-07, "loss": 1.1127, "step": 6463 }, { "epoch": 0.9749622926093514, "grad_norm": 1.770487904548645, "learning_rate": 1.5936712027768695e-07, "loss": 0.9725, "step": 6464 }, { "epoch": 0.9751131221719457, "grad_norm": 1.8315738439559937, "learning_rate": 1.5745382323343883e-07, "loss": 0.9438, "step": 6465 }, { "epoch": 0.97526395173454, "grad_norm": 1.8729884624481201, "learning_rate": 1.5555206267655055e-07, "loss": 0.919, "step": 6466 }, { "epoch": 0.9754147812971342, "grad_norm": 2.053765058517456, "learning_rate": 1.5366183904719222e-07, "loss": 1.1069, "step": 6467 }, { "epoch": 0.9755656108597285, "grad_norm": 1.6839706897735596, "learning_rate": 1.5178315278287502e-07, "loss": 0.975, "step": 6468 }, { "epoch": 0.9757164404223228, "grad_norm": 1.818885087966919, "learning_rate": 1.4991600431843443e-07, "loss": 0.914, "step": 6469 }, { "epoch": 0.975867269984917, "grad_norm": 1.8105655908584595, "learning_rate": 1.48060394086047e-07, "loss": 0.9206, "step": 6470 }, { "epoch": 0.9760180995475113, "grad_norm": 2.1190874576568604, "learning_rate": 1.462163225151969e-07, "loss": 1.3382, "step": 6471 }, { "epoch": 0.9761689291101056, "grad_norm": 1.9208011627197266, "learning_rate": 1.4438379003272605e-07, "loss": 0.9099, "step": 6472 }, { "epoch": 0.9763197586726998, "grad_norm": 1.6620832681655884, "learning_rate": 1.4256279706277299e-07, "loss": 0.8732, "step": 6473 }, { "epoch": 0.9764705882352941, "grad_norm": 2.034463405609131, "learning_rate": 1.4075334402683937e-07, "loss": 0.9726, "step": 6474 }, { "epoch": 0.9766214177978884, "grad_norm": 1.8176192045211792, "learning_rate": 1.3895543134372358e-07, "loss": 1.0147, "step": 6475 }, { "epoch": 0.9767722473604826, "grad_norm": 1.8768322467803955, "learning_rate": 1.3716905942957602e-07, "loss": 0.9693, "step": 6476 }, { "epoch": 0.9769230769230769, "grad_norm": 1.8389067649841309, "learning_rate": 1.35394228697866e-07, "loss": 1.0102, "step": 6477 }, { "epoch": 0.9770739064856712, "grad_norm": 2.1269052028656006, "learning_rate": 1.3363093955939266e-07, "loss": 1.0953, "step": 6478 }, { "epoch": 0.9772247360482654, "grad_norm": 2.317656993865967, "learning_rate": 1.3187919242229063e-07, "loss": 1.18, "step": 6479 }, { "epoch": 0.9773755656108597, "grad_norm": 1.6912294626235962, "learning_rate": 1.3013898769200784e-07, "loss": 0.8096, "step": 6480 }, { "epoch": 0.977526395173454, "grad_norm": 1.921573281288147, "learning_rate": 1.2841032577133317e-07, "loss": 1.0757, "step": 6481 }, { "epoch": 0.9776772247360482, "grad_norm": 1.7856738567352295, "learning_rate": 1.2669320706037991e-07, "loss": 0.9704, "step": 6482 }, { "epoch": 0.9778280542986425, "grad_norm": 2.447734832763672, "learning_rate": 1.2498763195659125e-07, "loss": 1.2028, "step": 6483 }, { "epoch": 0.9779788838612368, "grad_norm": 1.6459358930587769, "learning_rate": 1.2329360085473472e-07, "loss": 0.7704, "step": 6484 }, { "epoch": 0.978129713423831, "grad_norm": 2.0553648471832275, "learning_rate": 1.2161111414691896e-07, "loss": 1.124, "step": 6485 }, { "epoch": 0.9782805429864253, "grad_norm": 1.5967365503311157, "learning_rate": 1.1994017222255461e-07, "loss": 0.8144, "step": 6486 }, { "epoch": 0.9784313725490196, "grad_norm": 1.9038810729980469, "learning_rate": 1.1828077546840455e-07, "loss": 0.9251, "step": 6487 }, { "epoch": 0.9785822021116138, "grad_norm": 1.8847215175628662, "learning_rate": 1.1663292426854489e-07, "loss": 0.9066, "step": 6488 }, { "epoch": 0.9787330316742081, "grad_norm": 2.5748424530029297, "learning_rate": 1.1499661900439274e-07, "loss": 1.0472, "step": 6489 }, { "epoch": 0.9788838612368024, "grad_norm": 2.0933051109313965, "learning_rate": 1.1337186005467848e-07, "loss": 1.2256, "step": 6490 }, { "epoch": 0.9790346907993966, "grad_norm": 2.030015707015991, "learning_rate": 1.1175864779547351e-07, "loss": 0.8935, "step": 6491 }, { "epoch": 0.9791855203619909, "grad_norm": 2.0891942977905273, "learning_rate": 1.101569826001625e-07, "loss": 1.0113, "step": 6492 }, { "epoch": 0.9793363499245852, "grad_norm": 2.1114308834075928, "learning_rate": 1.0856686483946555e-07, "loss": 1.0275, "step": 6493 }, { "epoch": 0.9794871794871794, "grad_norm": 2.236339569091797, "learning_rate": 1.0698829488143269e-07, "loss": 1.2783, "step": 6494 }, { "epoch": 0.9796380090497737, "grad_norm": 2.22591233253479, "learning_rate": 1.0542127309143834e-07, "loss": 1.1781, "step": 6495 }, { "epoch": 0.979788838612368, "grad_norm": 1.9555222988128662, "learning_rate": 1.0386579983217571e-07, "loss": 0.8283, "step": 6496 }, { "epoch": 0.9799396681749623, "grad_norm": 1.7273701429367065, "learning_rate": 1.02321875463679e-07, "loss": 0.6755, "step": 6497 }, { "epoch": 0.9800904977375565, "grad_norm": 1.6880528926849365, "learning_rate": 1.0078950034330681e-07, "loss": 0.7478, "step": 6498 }, { "epoch": 0.9802413273001508, "grad_norm": 1.6948614120483398, "learning_rate": 9.926867482573099e-08, "loss": 0.8543, "step": 6499 }, { "epoch": 0.9803921568627451, "grad_norm": 1.8066236972808838, "learning_rate": 9.77593992629644e-08, "loss": 0.7576, "step": 6500 }, { "epoch": 0.9805429864253393, "grad_norm": 1.5170810222625732, "learning_rate": 9.626167400433872e-08, "loss": 0.8879, "step": 6501 }, { "epoch": 0.9806938159879336, "grad_norm": 1.8861565589904785, "learning_rate": 9.477549939652108e-08, "loss": 1.2489, "step": 6502 }, { "epoch": 0.9808446455505279, "grad_norm": 2.005974054336548, "learning_rate": 9.330087578349745e-08, "loss": 1.0192, "step": 6503 }, { "epoch": 0.9809954751131221, "grad_norm": 1.9359047412872314, "learning_rate": 9.183780350657812e-08, "loss": 1.1492, "step": 6504 }, { "epoch": 0.9811463046757164, "grad_norm": 1.7721647024154663, "learning_rate": 9.038628290440887e-08, "loss": 0.941, "step": 6505 }, { "epoch": 0.9812971342383107, "grad_norm": 1.5403116941452026, "learning_rate": 8.89463143129543e-08, "loss": 0.8233, "step": 6506 }, { "epoch": 0.9814479638009049, "grad_norm": 1.6215004920959473, "learning_rate": 8.751789806550892e-08, "loss": 0.7563, "step": 6507 }, { "epoch": 0.9815987933634992, "grad_norm": 2.063256025314331, "learning_rate": 8.610103449268603e-08, "loss": 1.3814, "step": 6508 }, { "epoch": 0.9817496229260935, "grad_norm": 1.958470344543457, "learning_rate": 8.469572392243996e-08, "loss": 1.3061, "step": 6509 }, { "epoch": 0.9819004524886877, "grad_norm": 1.8253540992736816, "learning_rate": 8.330196668003831e-08, "loss": 1.1178, "step": 6510 }, { "epoch": 0.982051282051282, "grad_norm": 2.160386323928833, "learning_rate": 8.191976308807858e-08, "loss": 1.3507, "step": 6511 }, { "epoch": 0.9822021116138763, "grad_norm": 1.7865500450134277, "learning_rate": 8.054911346647709e-08, "loss": 1.0007, "step": 6512 }, { "epoch": 0.9823529411764705, "grad_norm": 1.9779447317123413, "learning_rate": 7.919001813249671e-08, "loss": 1.1617, "step": 6513 }, { "epoch": 0.9825037707390648, "grad_norm": 1.7435057163238525, "learning_rate": 7.784247740069694e-08, "loss": 0.8858, "step": 6514 }, { "epoch": 0.9826546003016591, "grad_norm": 1.9248754978179932, "learning_rate": 7.650649158298384e-08, "loss": 1.0385, "step": 6515 }, { "epoch": 0.9828054298642533, "grad_norm": 1.8311914205551147, "learning_rate": 7.518206098858782e-08, "loss": 0.8934, "step": 6516 }, { "epoch": 0.9829562594268476, "grad_norm": 1.7870116233825684, "learning_rate": 7.386918592405256e-08, "loss": 1.0486, "step": 6517 }, { "epoch": 0.9831070889894419, "grad_norm": 2.1831729412078857, "learning_rate": 7.256786669325721e-08, "loss": 1.2173, "step": 6518 }, { "epoch": 0.9832579185520361, "grad_norm": 1.9115618467330933, "learning_rate": 7.127810359740527e-08, "loss": 0.8471, "step": 6519 }, { "epoch": 0.9834087481146304, "grad_norm": 2.1784827709198, "learning_rate": 6.999989693501908e-08, "loss": 1.3449, "step": 6520 }, { "epoch": 0.9835595776772247, "grad_norm": 1.8847562074661255, "learning_rate": 6.873324700195083e-08, "loss": 1.0211, "step": 6521 }, { "epoch": 0.983710407239819, "grad_norm": 2.0372416973114014, "learning_rate": 6.74781540913827e-08, "loss": 1.1225, "step": 6522 }, { "epoch": 0.9838612368024132, "grad_norm": 1.8074803352355957, "learning_rate": 6.623461849381563e-08, "loss": 0.942, "step": 6523 }, { "epoch": 0.9840120663650075, "grad_norm": 1.759004831314087, "learning_rate": 6.50026404970694e-08, "loss": 1.0119, "step": 6524 }, { "epoch": 0.9841628959276018, "grad_norm": 1.897386074066162, "learning_rate": 6.378222038630477e-08, "loss": 1.1226, "step": 6525 }, { "epoch": 0.984313725490196, "grad_norm": 1.8542094230651855, "learning_rate": 6.257335844399581e-08, "loss": 0.9392, "step": 6526 }, { "epoch": 0.9844645550527904, "grad_norm": 1.5449867248535156, "learning_rate": 6.137605494994092e-08, "loss": 0.6639, "step": 6527 }, { "epoch": 0.9846153846153847, "grad_norm": 1.8442318439483643, "learning_rate": 6.019031018126841e-08, "loss": 0.9855, "step": 6528 }, { "epoch": 0.9847662141779789, "grad_norm": 2.0847725868225098, "learning_rate": 5.9016124412430987e-08, "loss": 1.1598, "step": 6529 }, { "epoch": 0.9849170437405732, "grad_norm": 1.7849246263504028, "learning_rate": 5.785349791520012e-08, "loss": 0.9352, "step": 6530 }, { "epoch": 0.9850678733031675, "grad_norm": 1.9082951545715332, "learning_rate": 5.670243095867722e-08, "loss": 0.911, "step": 6531 }, { "epoch": 0.9852187028657617, "grad_norm": 1.6478004455566406, "learning_rate": 5.5562923809293624e-08, "loss": 0.7075, "step": 6532 }, { "epoch": 0.985369532428356, "grad_norm": 2.0368950366973877, "learning_rate": 5.4434976730788346e-08, "loss": 1.2577, "step": 6533 }, { "epoch": 0.9855203619909503, "grad_norm": 2.204418897628784, "learning_rate": 5.331858998423589e-08, "loss": 1.4991, "step": 6534 }, { "epoch": 0.9856711915535445, "grad_norm": 1.966247797012329, "learning_rate": 5.221376382803511e-08, "loss": 1.0373, "step": 6535 }, { "epoch": 0.9858220211161388, "grad_norm": 1.7758862972259521, "learning_rate": 5.1120498517914785e-08, "loss": 0.9066, "step": 6536 }, { "epoch": 0.9859728506787331, "grad_norm": 1.944427251815796, "learning_rate": 5.0038794306905834e-08, "loss": 1.1688, "step": 6537 }, { "epoch": 0.9861236802413273, "grad_norm": 1.944295048713684, "learning_rate": 4.896865144539131e-08, "loss": 0.981, "step": 6538 }, { "epoch": 0.9862745098039216, "grad_norm": 1.916852593421936, "learning_rate": 4.7910070181061974e-08, "loss": 1.1284, "step": 6539 }, { "epoch": 0.9864253393665159, "grad_norm": 2.07956862449646, "learning_rate": 4.686305075892738e-08, "loss": 1.198, "step": 6540 }, { "epoch": 0.9865761689291102, "grad_norm": 2.1623141765594482, "learning_rate": 4.5827593421338134e-08, "loss": 1.3789, "step": 6541 }, { "epoch": 0.9867269984917044, "grad_norm": 2.1320431232452393, "learning_rate": 4.480369840795806e-08, "loss": 1.2926, "step": 6542 }, { "epoch": 0.9868778280542987, "grad_norm": 2.1627719402313232, "learning_rate": 4.379136595577537e-08, "loss": 1.1043, "step": 6543 }, { "epoch": 0.987028657616893, "grad_norm": 2.2448055744171143, "learning_rate": 4.2790596299102646e-08, "loss": 1.14, "step": 6544 }, { "epoch": 0.9871794871794872, "grad_norm": 1.8780425786972046, "learning_rate": 4.1801389669576805e-08, "loss": 0.9128, "step": 6545 }, { "epoch": 0.9873303167420815, "grad_norm": 2.0032410621643066, "learning_rate": 4.082374629615915e-08, "loss": 0.9716, "step": 6546 }, { "epoch": 0.9874811463046758, "grad_norm": 1.782638669013977, "learning_rate": 3.985766640513533e-08, "loss": 0.8816, "step": 6547 }, { "epoch": 0.98763197586727, "grad_norm": 1.7118865251541138, "learning_rate": 3.890315022010982e-08, "loss": 0.7538, "step": 6548 }, { "epoch": 0.9877828054298643, "grad_norm": 1.3487775325775146, "learning_rate": 3.7960197962011447e-08, "loss": 0.5478, "step": 6549 }, { "epoch": 0.9879336349924586, "grad_norm": 1.5120551586151123, "learning_rate": 3.7028809849098955e-08, "loss": 0.578, "step": 6550 }, { "epoch": 0.9880844645550528, "grad_norm": 1.6499603986740112, "learning_rate": 3.610898609694991e-08, "loss": 0.9084, "step": 6551 }, { "epoch": 0.9882352941176471, "grad_norm": 2.073840379714966, "learning_rate": 3.520072691846621e-08, "loss": 1.3946, "step": 6552 }, { "epoch": 0.9883861236802414, "grad_norm": 2.301109552383423, "learning_rate": 3.43040325238686e-08, "loss": 1.5888, "step": 6553 }, { "epoch": 0.9885369532428356, "grad_norm": 2.0262253284454346, "learning_rate": 3.341890312070772e-08, "loss": 1.198, "step": 6554 }, { "epoch": 0.9886877828054299, "grad_norm": 1.9711953401565552, "learning_rate": 3.254533891385303e-08, "loss": 0.998, "step": 6555 }, { "epoch": 0.9888386123680242, "grad_norm": 1.9102327823638916, "learning_rate": 3.168334010549834e-08, "loss": 1.16, "step": 6556 }, { "epoch": 0.9889894419306184, "grad_norm": 1.8268523216247559, "learning_rate": 3.083290689516183e-08, "loss": 1.085, "step": 6557 }, { "epoch": 0.9891402714932127, "grad_norm": 1.8766851425170898, "learning_rate": 2.999403947968049e-08, "loss": 1.0967, "step": 6558 }, { "epoch": 0.989291101055807, "grad_norm": 1.867449164390564, "learning_rate": 2.9166738053221232e-08, "loss": 0.9528, "step": 6559 }, { "epoch": 0.9894419306184012, "grad_norm": 2.329340696334839, "learning_rate": 2.8351002807269767e-08, "loss": 1.5427, "step": 6560 }, { "epoch": 0.9895927601809955, "grad_norm": 1.7027629613876343, "learning_rate": 2.7546833930636173e-08, "loss": 0.8837, "step": 6561 }, { "epoch": 0.9897435897435898, "grad_norm": 1.9053348302841187, "learning_rate": 2.6754231609449344e-08, "loss": 1.0772, "step": 6562 }, { "epoch": 0.989894419306184, "grad_norm": 1.7392765283584595, "learning_rate": 2.5973196027162527e-08, "loss": 0.8626, "step": 6563 }, { "epoch": 0.9900452488687783, "grad_norm": 1.7653361558914185, "learning_rate": 2.5203727364558892e-08, "loss": 1.1656, "step": 6564 }, { "epoch": 0.9901960784313726, "grad_norm": 2.0128557682037354, "learning_rate": 2.4445825799729317e-08, "loss": 1.1097, "step": 6565 }, { "epoch": 0.9903469079939669, "grad_norm": 1.5651352405548096, "learning_rate": 2.3699491508105687e-08, "loss": 0.7791, "step": 6566 }, { "epoch": 0.9904977375565611, "grad_norm": 1.7918542623519897, "learning_rate": 2.2964724662433156e-08, "loss": 1.0578, "step": 6567 }, { "epoch": 0.9906485671191554, "grad_norm": 2.0644359588623047, "learning_rate": 2.224152543277569e-08, "loss": 1.1738, "step": 6568 }, { "epoch": 0.9907993966817497, "grad_norm": 1.8133044242858887, "learning_rate": 2.152989398652161e-08, "loss": 1.0495, "step": 6569 }, { "epoch": 0.9909502262443439, "grad_norm": 1.6463404893875122, "learning_rate": 2.0829830488389156e-08, "loss": 0.726, "step": 6570 }, { "epoch": 0.9911010558069382, "grad_norm": 1.8328197002410889, "learning_rate": 2.014133510041538e-08, "loss": 0.976, "step": 6571 }, { "epoch": 0.9912518853695325, "grad_norm": 1.6490834951400757, "learning_rate": 1.9464407981956146e-08, "loss": 0.8504, "step": 6572 }, { "epoch": 0.9914027149321267, "grad_norm": 1.933363914489746, "learning_rate": 1.879904928969167e-08, "loss": 0.9774, "step": 6573 }, { "epoch": 0.991553544494721, "grad_norm": 1.8430428504943848, "learning_rate": 1.8145259177621e-08, "loss": 0.8791, "step": 6574 }, { "epoch": 0.9917043740573153, "grad_norm": 2.158477544784546, "learning_rate": 1.7503037797078626e-08, "loss": 1.3405, "step": 6575 }, { "epoch": 0.9918552036199095, "grad_norm": 2.0298755168914795, "learning_rate": 1.687238529670121e-08, "loss": 1.2048, "step": 6576 }, { "epoch": 0.9920060331825038, "grad_norm": 2.078059196472168, "learning_rate": 1.6253301822466428e-08, "loss": 1.1711, "step": 6577 }, { "epoch": 0.9921568627450981, "grad_norm": 1.9220235347747803, "learning_rate": 1.5645787517670762e-08, "loss": 0.8961, "step": 6578 }, { "epoch": 0.9923076923076923, "grad_norm": 2.269637107849121, "learning_rate": 1.5049842522918412e-08, "loss": 1.4525, "step": 6579 }, { "epoch": 0.9924585218702866, "grad_norm": 2.0240731239318848, "learning_rate": 1.4465466976149034e-08, "loss": 1.2533, "step": 6580 }, { "epoch": 0.9926093514328809, "grad_norm": 2.1804757118225098, "learning_rate": 1.38926610126211e-08, "loss": 1.1277, "step": 6581 }, { "epoch": 0.9927601809954751, "grad_norm": 2.2509958744049072, "learning_rate": 1.3331424764922994e-08, "loss": 1.1611, "step": 6582 }, { "epoch": 0.9929110105580694, "grad_norm": 1.9428391456604004, "learning_rate": 1.2781758362945262e-08, "loss": 1.0481, "step": 6583 }, { "epoch": 0.9930618401206637, "grad_norm": 1.855837106704712, "learning_rate": 1.224366193392501e-08, "loss": 0.8939, "step": 6584 }, { "epoch": 0.9932126696832579, "grad_norm": 1.8608096837997437, "learning_rate": 1.1717135602401507e-08, "loss": 0.9874, "step": 6585 }, { "epoch": 0.9933634992458522, "grad_norm": 1.6831547021865845, "learning_rate": 1.1202179490243937e-08, "loss": 0.9085, "step": 6586 }, { "epoch": 0.9935143288084465, "grad_norm": 2.1930525302886963, "learning_rate": 1.069879371664584e-08, "loss": 1.0266, "step": 6587 }, { "epoch": 0.9936651583710407, "grad_norm": 1.843945026397705, "learning_rate": 1.0206978398119572e-08, "loss": 1.0207, "step": 6588 }, { "epoch": 0.993815987933635, "grad_norm": 1.9906690120697021, "learning_rate": 9.72673364850185e-09, "loss": 1.1523, "step": 6589 }, { "epoch": 0.9939668174962293, "grad_norm": 2.588918685913086, "learning_rate": 9.258059578948209e-09, "loss": 1.4171, "step": 6590 }, { "epoch": 0.9941176470588236, "grad_norm": 2.232747793197632, "learning_rate": 8.800956297932983e-09, "loss": 1.1359, "step": 6591 }, { "epoch": 0.9942684766214178, "grad_norm": 1.9260307550430298, "learning_rate": 8.35542391126598e-09, "loss": 0.8841, "step": 6592 }, { "epoch": 0.9944193061840121, "grad_norm": 2.2145347595214844, "learning_rate": 7.921462522059164e-09, "loss": 1.0699, "step": 6593 }, { "epoch": 0.9945701357466064, "grad_norm": 2.3072757720947266, "learning_rate": 7.499072230765514e-09, "loss": 1.0223, "step": 6594 }, { "epoch": 0.9947209653092006, "grad_norm": 1.863654613494873, "learning_rate": 7.088253135145717e-09, "loss": 0.9445, "step": 6595 }, { "epoch": 0.9948717948717949, "grad_norm": 2.0308523178100586, "learning_rate": 6.6890053302848255e-09, "loss": 1.0649, "step": 6596 }, { "epoch": 0.9950226244343892, "grad_norm": 1.6063568592071533, "learning_rate": 6.301328908597803e-09, "loss": 0.7339, "step": 6597 }, { "epoch": 0.9951734539969834, "grad_norm": 1.5589513778686523, "learning_rate": 5.925223959818427e-09, "loss": 0.5174, "step": 6598 }, { "epoch": 0.9953242835595777, "grad_norm": 1.47749924659729, "learning_rate": 5.560690570988181e-09, "loss": 0.6071, "step": 6599 }, { "epoch": 0.995475113122172, "grad_norm": 2.1695616245269775, "learning_rate": 5.207728826495118e-09, "loss": 1.0439, "step": 6600 }, { "epoch": 0.9956259426847662, "grad_norm": 1.7432292699813843, "learning_rate": 4.866338808023896e-09, "loss": 1.0061, "step": 6601 }, { "epoch": 0.9957767722473605, "grad_norm": 1.63108491897583, "learning_rate": 4.53652059459464e-09, "loss": 0.9774, "step": 6602 }, { "epoch": 0.9959276018099548, "grad_norm": 1.8450249433517456, "learning_rate": 4.218274262551835e-09, "loss": 1.0761, "step": 6603 }, { "epoch": 0.996078431372549, "grad_norm": 1.7824820280075073, "learning_rate": 3.91159988555323e-09, "loss": 1.0144, "step": 6604 }, { "epoch": 0.9962292609351433, "grad_norm": 1.6914958953857422, "learning_rate": 3.6164975345809316e-09, "loss": 0.8875, "step": 6605 }, { "epoch": 0.9963800904977376, "grad_norm": 1.807647943496704, "learning_rate": 3.3329672779414124e-09, "loss": 0.9614, "step": 6606 }, { "epoch": 0.9965309200603318, "grad_norm": 1.8976490497589111, "learning_rate": 3.061009181254404e-09, "loss": 1.1459, "step": 6607 }, { "epoch": 0.9966817496229261, "grad_norm": 1.571880578994751, "learning_rate": 2.800623307469552e-09, "loss": 0.7526, "step": 6608 }, { "epoch": 0.9968325791855204, "grad_norm": 2.0932419300079346, "learning_rate": 2.5518097168608646e-09, "loss": 1.3696, "step": 6609 }, { "epoch": 0.9969834087481146, "grad_norm": 1.9494574069976807, "learning_rate": 2.3145684670100587e-09, "loss": 1.2108, "step": 6610 }, { "epoch": 0.9971342383107089, "grad_norm": 1.837791919708252, "learning_rate": 2.0888996128343164e-09, "loss": 1.0885, "step": 6611 }, { "epoch": 0.9972850678733032, "grad_norm": 1.9261425733566284, "learning_rate": 1.8748032065640797e-09, "loss": 1.0092, "step": 6612 }, { "epoch": 0.9974358974358974, "grad_norm": 1.8976205587387085, "learning_rate": 1.6722792977541535e-09, "loss": 0.9953, "step": 6613 }, { "epoch": 0.9975867269984917, "grad_norm": 2.369472026824951, "learning_rate": 1.4813279332781538e-09, "loss": 1.2183, "step": 6614 }, { "epoch": 0.997737556561086, "grad_norm": 2.169970750808716, "learning_rate": 1.3019491573396104e-09, "loss": 1.3011, "step": 6615 }, { "epoch": 0.9978883861236802, "grad_norm": 1.7789149284362793, "learning_rate": 1.1341430114553132e-09, "loss": 0.8241, "step": 6616 }, { "epoch": 0.9980392156862745, "grad_norm": 1.9637186527252197, "learning_rate": 9.779095344608636e-10, "loss": 0.9467, "step": 6617 }, { "epoch": 0.9981900452488688, "grad_norm": 1.9668041467666626, "learning_rate": 8.332487625217767e-10, "loss": 1.0528, "step": 6618 }, { "epoch": 0.998340874811463, "grad_norm": 1.6691468954086304, "learning_rate": 7.001607291168278e-10, "loss": 0.8094, "step": 6619 }, { "epoch": 0.9984917043740573, "grad_norm": 2.0753369331359863, "learning_rate": 5.786454650602568e-10, "loss": 1.1893, "step": 6620 }, { "epoch": 0.9986425339366516, "grad_norm": 2.0562314987182617, "learning_rate": 4.68702998462911e-10, "loss": 1.2021, "step": 6621 }, { "epoch": 0.9987933634992459, "grad_norm": 2.3388001918792725, "learning_rate": 3.7033335478775523e-10, "loss": 1.2424, "step": 6622 }, { "epoch": 0.9989441930618401, "grad_norm": 1.9954073429107666, "learning_rate": 2.8353655679436154e-10, "loss": 0.9312, "step": 6623 }, { "epoch": 0.9990950226244344, "grad_norm": 2.308021068572998, "learning_rate": 2.0831262457221557e-10, "loss": 1.4345, "step": 6624 }, { "epoch": 0.9992458521870287, "grad_norm": 1.8140825033187866, "learning_rate": 1.4466157553516547e-10, "loss": 0.8634, "step": 6625 }, { "epoch": 0.9993966817496229, "grad_norm": 1.8196262121200562, "learning_rate": 9.258342441587076e-11, "loss": 0.8215, "step": 6626 }, { "epoch": 0.9995475113122172, "grad_norm": 1.757359504699707, "learning_rate": 5.207818326580238e-11, "loss": 0.8753, "step": 6627 }, { "epoch": 0.9996983408748115, "grad_norm": 2.1403098106384277, "learning_rate": 2.3145861460793782e-11, "loss": 1.122, "step": 6628 }, { "epoch": 0.9998491704374057, "grad_norm": 1.536190390586853, "learning_rate": 5.78646570104091e-12, "loss": 0.7415, "step": 6629 }, { "epoch": 1.0, "grad_norm": 2.804316520690918, "learning_rate": 0.0, "loss": 1.0438, "step": 6630 } ], "logging_steps": 1, "max_steps": 6630, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 239, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.369274315715379e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }