{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 15901, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 12.560257448818305, "learning_rate": 2.092050209205021e-08, "loss": 1.3878, "step": 1 }, { "epoch": 0.0, "grad_norm": 12.05368035791786, "learning_rate": 4.184100418410042e-08, "loss": 1.4129, "step": 2 }, { "epoch": 0.0, "grad_norm": 12.118477403459769, "learning_rate": 6.276150627615063e-08, "loss": 1.4392, "step": 3 }, { "epoch": 0.0, "grad_norm": 12.818196701992624, "learning_rate": 8.368200836820084e-08, "loss": 1.4417, "step": 4 }, { "epoch": 0.0, "grad_norm": 12.52241286190301, "learning_rate": 1.0460251046025104e-07, "loss": 1.4274, "step": 5 }, { "epoch": 0.0, "grad_norm": 12.166198039046842, "learning_rate": 1.2552301255230126e-07, "loss": 1.4287, "step": 6 }, { "epoch": 0.0, "grad_norm": 12.152000828367362, "learning_rate": 1.4644351464435148e-07, "loss": 1.3883, "step": 7 }, { "epoch": 0.0, "grad_norm": 12.282759445642426, "learning_rate": 1.6736401673640168e-07, "loss": 1.5214, "step": 8 }, { "epoch": 0.0, "grad_norm": 12.187048651757747, "learning_rate": 1.882845188284519e-07, "loss": 1.3853, "step": 9 }, { "epoch": 0.0, "grad_norm": 11.755813758937292, "learning_rate": 2.092050209205021e-07, "loss": 1.3802, "step": 10 }, { "epoch": 0.0, "grad_norm": 12.471968662690486, "learning_rate": 2.3012552301255234e-07, "loss": 1.3958, "step": 11 }, { "epoch": 0.0, "grad_norm": 12.023481948686502, "learning_rate": 2.5104602510460253e-07, "loss": 1.3625, "step": 12 }, { "epoch": 0.0, "grad_norm": 11.407879675934606, "learning_rate": 2.7196652719665275e-07, "loss": 1.3987, "step": 13 }, { "epoch": 0.0, "grad_norm": 11.002417532604325, "learning_rate": 2.9288702928870297e-07, "loss": 1.3685, "step": 14 }, { "epoch": 0.0, "grad_norm": 12.046397935114733, "learning_rate": 3.1380753138075313e-07, "loss": 1.3761, "step": 15 }, { "epoch": 0.0, "grad_norm": 9.74413617531794, "learning_rate": 3.3472803347280335e-07, "loss": 1.3515, "step": 16 }, { "epoch": 0.0, "grad_norm": 9.668487707646594, "learning_rate": 3.5564853556485363e-07, "loss": 1.3325, "step": 17 }, { "epoch": 0.0, "grad_norm": 9.55703442979226, "learning_rate": 3.765690376569038e-07, "loss": 1.3312, "step": 18 }, { "epoch": 0.0, "grad_norm": 8.86102637540386, "learning_rate": 3.97489539748954e-07, "loss": 1.3139, "step": 19 }, { "epoch": 0.0, "grad_norm": 8.885150017781749, "learning_rate": 4.184100418410042e-07, "loss": 1.313, "step": 20 }, { "epoch": 0.0, "grad_norm": 6.508917024905127, "learning_rate": 4.393305439330544e-07, "loss": 1.182, "step": 21 }, { "epoch": 0.0, "grad_norm": 12.155384192157996, "learning_rate": 4.6025104602510467e-07, "loss": 1.1837, "step": 22 }, { "epoch": 0.0, "grad_norm": 6.159131473054676, "learning_rate": 4.811715481171549e-07, "loss": 1.2192, "step": 23 }, { "epoch": 0.0, "grad_norm": 5.591628396174414, "learning_rate": 5.020920502092051e-07, "loss": 1.2123, "step": 24 }, { "epoch": 0.0, "grad_norm": 5.2459320704076555, "learning_rate": 5.230125523012552e-07, "loss": 1.1723, "step": 25 }, { "epoch": 0.0, "grad_norm": 4.686533379285876, "learning_rate": 5.439330543933055e-07, "loss": 1.2776, "step": 26 }, { "epoch": 0.0, "grad_norm": 4.744258441539658, "learning_rate": 5.648535564853557e-07, "loss": 1.1493, "step": 27 }, { "epoch": 0.0, "grad_norm": 4.774128304322188, "learning_rate": 5.857740585774059e-07, "loss": 1.1565, "step": 28 }, { "epoch": 0.0, "grad_norm": 3.7154225724871135, "learning_rate": 6.066945606694561e-07, "loss": 1.0396, "step": 29 }, { "epoch": 0.0, "grad_norm": 3.5116914798097727, "learning_rate": 6.276150627615063e-07, "loss": 1.0286, "step": 30 }, { "epoch": 0.0, "grad_norm": 3.3769413257117877, "learning_rate": 6.485355648535565e-07, "loss": 1.0221, "step": 31 }, { "epoch": 0.0, "grad_norm": 5.715927901242416, "learning_rate": 6.694560669456067e-07, "loss": 0.7152, "step": 32 }, { "epoch": 0.0, "grad_norm": 3.0664296690038766, "learning_rate": 6.90376569037657e-07, "loss": 1.0258, "step": 33 }, { "epoch": 0.0, "grad_norm": 3.8471000703188283, "learning_rate": 7.112970711297073e-07, "loss": 1.0315, "step": 34 }, { "epoch": 0.0, "grad_norm": 3.04394481220455, "learning_rate": 7.322175732217573e-07, "loss": 1.0299, "step": 35 }, { "epoch": 0.0, "grad_norm": 2.9290763986647264, "learning_rate": 7.531380753138076e-07, "loss": 1.0159, "step": 36 }, { "epoch": 0.0, "grad_norm": 2.560701757054639, "learning_rate": 7.740585774058578e-07, "loss": 0.9823, "step": 37 }, { "epoch": 0.0, "grad_norm": 2.4094589147970704, "learning_rate": 7.94979079497908e-07, "loss": 0.9859, "step": 38 }, { "epoch": 0.0, "grad_norm": 2.304616304731331, "learning_rate": 8.158995815899583e-07, "loss": 0.9283, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.336308342883063, "learning_rate": 8.368200836820084e-07, "loss": 0.9183, "step": 40 }, { "epoch": 0.0, "grad_norm": 2.6741125301434066, "learning_rate": 8.577405857740586e-07, "loss": 0.9356, "step": 41 }, { "epoch": 0.0, "grad_norm": 2.5818907495124943, "learning_rate": 8.786610878661088e-07, "loss": 0.909, "step": 42 }, { "epoch": 0.0, "grad_norm": 2.5428484367199093, "learning_rate": 8.995815899581591e-07, "loss": 0.9047, "step": 43 }, { "epoch": 0.0, "grad_norm": 2.563139490584407, "learning_rate": 9.205020920502093e-07, "loss": 0.9273, "step": 44 }, { "epoch": 0.0, "grad_norm": 2.559972040338691, "learning_rate": 9.414225941422594e-07, "loss": 0.9096, "step": 45 }, { "epoch": 0.0, "grad_norm": 2.176763259124502, "learning_rate": 9.623430962343098e-07, "loss": 0.8882, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.147504876876301, "learning_rate": 9.8326359832636e-07, "loss": 0.8603, "step": 47 }, { "epoch": 0.0, "grad_norm": 1.9553524513603393, "learning_rate": 1.0041841004184101e-06, "loss": 0.8538, "step": 48 }, { "epoch": 0.0, "grad_norm": 1.7258382720756227, "learning_rate": 1.0251046025104603e-06, "loss": 0.8622, "step": 49 }, { "epoch": 0.0, "grad_norm": 2.227266937437163, "learning_rate": 1.0460251046025104e-06, "loss": 0.9289, "step": 50 }, { "epoch": 0.0, "grad_norm": 1.6795622373939423, "learning_rate": 1.0669456066945608e-06, "loss": 0.8651, "step": 51 }, { "epoch": 0.0, "grad_norm": 3.254037734651701, "learning_rate": 1.087866108786611e-06, "loss": 0.8048, "step": 52 }, { "epoch": 0.0, "grad_norm": 1.7583196392527818, "learning_rate": 1.1087866108786612e-06, "loss": 0.8187, "step": 53 }, { "epoch": 0.0, "grad_norm": 1.6957034862495282, "learning_rate": 1.1297071129707113e-06, "loss": 0.8168, "step": 54 }, { "epoch": 0.0, "grad_norm": 1.6473460490856577, "learning_rate": 1.1506276150627615e-06, "loss": 0.7906, "step": 55 }, { "epoch": 0.0, "grad_norm": 1.614593057786042, "learning_rate": 1.1715481171548119e-06, "loss": 0.8337, "step": 56 }, { "epoch": 0.0, "grad_norm": 1.572763056170728, "learning_rate": 1.192468619246862e-06, "loss": 0.8324, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.0357680203943334, "learning_rate": 1.2133891213389122e-06, "loss": 0.8232, "step": 58 }, { "epoch": 0.0, "grad_norm": 1.814797864213227, "learning_rate": 1.2343096234309624e-06, "loss": 0.8144, "step": 59 }, { "epoch": 0.0, "grad_norm": 1.6438167910451045, "learning_rate": 1.2552301255230125e-06, "loss": 0.7745, "step": 60 }, { "epoch": 0.0, "grad_norm": 1.8748382016688478, "learning_rate": 1.276150627615063e-06, "loss": 0.8187, "step": 61 }, { "epoch": 0.0, "grad_norm": 1.783334835407333, "learning_rate": 1.297071129707113e-06, "loss": 0.7347, "step": 62 }, { "epoch": 0.0, "grad_norm": 1.6367872868851776, "learning_rate": 1.3179916317991635e-06, "loss": 0.7966, "step": 63 }, { "epoch": 0.0, "grad_norm": 1.7926974408303888, "learning_rate": 1.3389121338912134e-06, "loss": 0.7534, "step": 64 }, { "epoch": 0.0, "grad_norm": 1.6606628925876652, "learning_rate": 1.3598326359832636e-06, "loss": 0.7751, "step": 65 }, { "epoch": 0.0, "grad_norm": 2.3262653732005614, "learning_rate": 1.380753138075314e-06, "loss": 0.7957, "step": 66 }, { "epoch": 0.0, "grad_norm": 1.9727336258636625, "learning_rate": 1.4016736401673641e-06, "loss": 0.7589, "step": 67 }, { "epoch": 0.0, "grad_norm": 1.5456704266811179, "learning_rate": 1.4225941422594145e-06, "loss": 0.7719, "step": 68 }, { "epoch": 0.0, "grad_norm": 2.696764818970671, "learning_rate": 1.4435146443514645e-06, "loss": 0.7103, "step": 69 }, { "epoch": 0.0, "grad_norm": 1.8835448556085705, "learning_rate": 1.4644351464435146e-06, "loss": 0.7208, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.5943338456870952, "learning_rate": 1.485355648535565e-06, "loss": 0.7296, "step": 71 }, { "epoch": 0.0, "grad_norm": 2.142273603062082, "learning_rate": 1.5062761506276152e-06, "loss": 0.7343, "step": 72 }, { "epoch": 0.0, "grad_norm": 1.9454596449518564, "learning_rate": 1.5271966527196656e-06, "loss": 0.7321, "step": 73 }, { "epoch": 0.0, "grad_norm": 2.384812557384574, "learning_rate": 1.5481171548117155e-06, "loss": 0.76, "step": 74 }, { "epoch": 0.0, "grad_norm": 1.7705882044117423, "learning_rate": 1.5690376569037657e-06, "loss": 0.7091, "step": 75 }, { "epoch": 0.0, "grad_norm": 1.6371238362918907, "learning_rate": 1.589958158995816e-06, "loss": 0.709, "step": 76 }, { "epoch": 0.0, "grad_norm": 2.931138171116971, "learning_rate": 1.6108786610878662e-06, "loss": 0.7118, "step": 77 }, { "epoch": 0.0, "grad_norm": 1.790129601746314, "learning_rate": 1.6317991631799166e-06, "loss": 0.7066, "step": 78 }, { "epoch": 0.0, "grad_norm": 1.561249405813863, "learning_rate": 1.6527196652719666e-06, "loss": 0.6805, "step": 79 }, { "epoch": 0.01, "grad_norm": 2.136737674376449, "learning_rate": 1.6736401673640167e-06, "loss": 0.6977, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.7710100366758756, "learning_rate": 1.694560669456067e-06, "loss": 0.722, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.5677734826636633, "learning_rate": 1.7154811715481173e-06, "loss": 0.678, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.876098097240444, "learning_rate": 1.7364016736401676e-06, "loss": 0.6647, "step": 83 }, { "epoch": 0.01, "grad_norm": 6.079061521700847, "learning_rate": 1.7573221757322176e-06, "loss": 0.6866, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.7513118734948716, "learning_rate": 1.7782426778242678e-06, "loss": 0.6748, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.5300678728195052, "learning_rate": 1.7991631799163181e-06, "loss": 0.6544, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.5018200347221655, "learning_rate": 1.8200836820083683e-06, "loss": 0.6691, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.6743180612769364, "learning_rate": 1.8410041841004187e-06, "loss": 0.6736, "step": 88 }, { "epoch": 0.01, "grad_norm": 2.8169151334734304, "learning_rate": 1.8619246861924686e-06, "loss": 0.5151, "step": 89 }, { "epoch": 0.01, "grad_norm": 2.103169025054942, "learning_rate": 1.8828451882845188e-06, "loss": 0.6861, "step": 90 }, { "epoch": 0.01, "grad_norm": 2.7336268979816554, "learning_rate": 1.9037656903765692e-06, "loss": 0.6725, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.9197089606721394, "learning_rate": 1.9246861924686196e-06, "loss": 0.6709, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.6507475729497776, "learning_rate": 1.9456066945606697e-06, "loss": 0.6621, "step": 93 }, { "epoch": 0.01, "grad_norm": 1.5919123304273, "learning_rate": 1.96652719665272e-06, "loss": 0.6644, "step": 94 }, { "epoch": 0.01, "grad_norm": 1.6086503140886639, "learning_rate": 1.98744769874477e-06, "loss": 0.6779, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.6852890600303034, "learning_rate": 2.0083682008368202e-06, "loss": 0.6649, "step": 96 }, { "epoch": 0.01, "grad_norm": 1.709169433281033, "learning_rate": 2.0292887029288704e-06, "loss": 0.6657, "step": 97 }, { "epoch": 0.01, "grad_norm": 2.255857063906131, "learning_rate": 2.0502092050209206e-06, "loss": 0.6827, "step": 98 }, { "epoch": 0.01, "grad_norm": 2.2727597545863, "learning_rate": 2.071129707112971e-06, "loss": 0.6371, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.7926032772173868, "learning_rate": 2.092050209205021e-06, "loss": 0.6583, "step": 100 }, { "epoch": 0.01, "grad_norm": 2.099978743369382, "learning_rate": 2.112970711297071e-06, "loss": 0.6969, "step": 101 }, { "epoch": 0.01, "grad_norm": 2.0217607651610785, "learning_rate": 2.1338912133891217e-06, "loss": 0.6691, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.8662523120478176, "learning_rate": 2.154811715481172e-06, "loss": 0.6395, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.4402038425145702, "learning_rate": 2.175732217573222e-06, "loss": 0.6115, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.7596873929734194, "learning_rate": 2.196652719665272e-06, "loss": 0.6733, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.6899417778676438, "learning_rate": 2.2175732217573223e-06, "loss": 0.6507, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.920649295828148, "learning_rate": 2.2384937238493725e-06, "loss": 0.6683, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.7786419616325029, "learning_rate": 2.2594142259414227e-06, "loss": 0.6756, "step": 108 }, { "epoch": 0.01, "grad_norm": 2.516926447022803, "learning_rate": 2.2803347280334732e-06, "loss": 0.6141, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.8608800321743426, "learning_rate": 2.301255230125523e-06, "loss": 0.64, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.547921974374885, "learning_rate": 2.322175732217573e-06, "loss": 0.6765, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.5918671185078295, "learning_rate": 2.3430962343096237e-06, "loss": 0.6028, "step": 112 }, { "epoch": 0.01, "grad_norm": 2.542704232862084, "learning_rate": 2.364016736401674e-06, "loss": 0.6367, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.9542813058768365, "learning_rate": 2.384937238493724e-06, "loss": 0.6052, "step": 114 }, { "epoch": 0.01, "grad_norm": 2.1428940037238218, "learning_rate": 2.4058577405857742e-06, "loss": 0.6141, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.5871575608949053, "learning_rate": 2.4267782426778244e-06, "loss": 0.6421, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.6380030671795447, "learning_rate": 2.4476987447698746e-06, "loss": 0.6206, "step": 117 }, { "epoch": 0.01, "grad_norm": 1.768553551606372, "learning_rate": 2.4686192468619247e-06, "loss": 0.6136, "step": 118 }, { "epoch": 0.01, "grad_norm": 1.6324952719566632, "learning_rate": 2.4895397489539753e-06, "loss": 0.6165, "step": 119 }, { "epoch": 0.01, "grad_norm": 1.6563571805125223, "learning_rate": 2.510460251046025e-06, "loss": 0.6085, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.7670514673682962, "learning_rate": 2.5313807531380757e-06, "loss": 0.6067, "step": 121 }, { "epoch": 0.01, "grad_norm": 1.996143798308078, "learning_rate": 2.552301255230126e-06, "loss": 0.6103, "step": 122 }, { "epoch": 0.01, "grad_norm": 2.245567993212247, "learning_rate": 2.5732217573221756e-06, "loss": 0.6164, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.7354389724223707, "learning_rate": 2.594142259414226e-06, "loss": 0.6175, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.7223077760804373, "learning_rate": 2.6150627615062763e-06, "loss": 0.6069, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.7790619431990171, "learning_rate": 2.635983263598327e-06, "loss": 0.6347, "step": 126 }, { "epoch": 0.01, "grad_norm": 1.9358395244420905, "learning_rate": 2.6569037656903767e-06, "loss": 0.5972, "step": 127 }, { "epoch": 0.01, "grad_norm": 2.876919263803925, "learning_rate": 2.677824267782427e-06, "loss": 0.6134, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.8284656253275764, "learning_rate": 2.6987447698744774e-06, "loss": 0.5963, "step": 129 }, { "epoch": 0.01, "grad_norm": 2.0927052573703575, "learning_rate": 2.719665271966527e-06, "loss": 0.6374, "step": 130 }, { "epoch": 0.01, "grad_norm": 2.043065973554129, "learning_rate": 2.7405857740585778e-06, "loss": 0.6321, "step": 131 }, { "epoch": 0.01, "grad_norm": 1.8387676107474462, "learning_rate": 2.761506276150628e-06, "loss": 0.5928, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.6784610566868574, "learning_rate": 2.7824267782426777e-06, "loss": 0.6031, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.7524319607012582, "learning_rate": 2.8033472803347283e-06, "loss": 0.5967, "step": 134 }, { "epoch": 0.01, "grad_norm": 1.6078432497800754, "learning_rate": 2.8242677824267784e-06, "loss": 0.5957, "step": 135 }, { "epoch": 0.01, "grad_norm": 2.5260696980523862, "learning_rate": 2.845188284518829e-06, "loss": 0.6297, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.9377468490909344, "learning_rate": 2.8661087866108788e-06, "loss": 0.5889, "step": 137 }, { "epoch": 0.01, "grad_norm": 4.5307926184565455, "learning_rate": 2.887029288702929e-06, "loss": 0.6061, "step": 138 }, { "epoch": 0.01, "grad_norm": 1.9059167405758104, "learning_rate": 2.9079497907949795e-06, "loss": 0.6077, "step": 139 }, { "epoch": 0.01, "grad_norm": 1.9100292543552058, "learning_rate": 2.9288702928870293e-06, "loss": 0.5915, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.732542946589111, "learning_rate": 2.94979079497908e-06, "loss": 0.5694, "step": 141 }, { "epoch": 0.01, "grad_norm": 2.5538540167703245, "learning_rate": 2.97071129707113e-06, "loss": 0.6137, "step": 142 }, { "epoch": 0.01, "grad_norm": 3.3343185435662486, "learning_rate": 2.9916317991631798e-06, "loss": 0.623, "step": 143 }, { "epoch": 0.01, "grad_norm": 2.0435238439705445, "learning_rate": 3.0125523012552303e-06, "loss": 0.5857, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.9264488409367186, "learning_rate": 3.0334728033472805e-06, "loss": 0.5854, "step": 145 }, { "epoch": 0.01, "grad_norm": 1.8358898581063288, "learning_rate": 3.054393305439331e-06, "loss": 0.573, "step": 146 }, { "epoch": 0.01, "grad_norm": 2.0776269822206817, "learning_rate": 3.075313807531381e-06, "loss": 0.6185, "step": 147 }, { "epoch": 0.01, "grad_norm": 1.758152422938119, "learning_rate": 3.096234309623431e-06, "loss": 0.5661, "step": 148 }, { "epoch": 0.01, "grad_norm": 2.2387002347198237, "learning_rate": 3.1171548117154816e-06, "loss": 0.6174, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.8649137544286947, "learning_rate": 3.1380753138075313e-06, "loss": 0.5836, "step": 150 }, { "epoch": 0.01, "grad_norm": 2.1614961662618675, "learning_rate": 3.158995815899582e-06, "loss": 0.6087, "step": 151 }, { "epoch": 0.01, "grad_norm": 2.963025160385293, "learning_rate": 3.179916317991632e-06, "loss": 0.6015, "step": 152 }, { "epoch": 0.01, "grad_norm": 2.127885928484131, "learning_rate": 3.200836820083682e-06, "loss": 0.5827, "step": 153 }, { "epoch": 0.01, "grad_norm": 1.8926972991118225, "learning_rate": 3.2217573221757324e-06, "loss": 0.5639, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.9778972234978238, "learning_rate": 3.2426778242677826e-06, "loss": 0.5472, "step": 155 }, { "epoch": 0.01, "grad_norm": 3.742073166608714, "learning_rate": 3.263598326359833e-06, "loss": 0.62, "step": 156 }, { "epoch": 0.01, "grad_norm": 1.9624844320438002, "learning_rate": 3.284518828451883e-06, "loss": 0.576, "step": 157 }, { "epoch": 0.01, "grad_norm": 2.0381244644824084, "learning_rate": 3.305439330543933e-06, "loss": 0.5507, "step": 158 }, { "epoch": 0.01, "grad_norm": 1.2545084375913989, "learning_rate": 3.3263598326359837e-06, "loss": 0.4356, "step": 159 }, { "epoch": 0.01, "grad_norm": 2.1873318854202206, "learning_rate": 3.3472803347280334e-06, "loss": 0.5827, "step": 160 }, { "epoch": 0.01, "grad_norm": 2.2282400425869424, "learning_rate": 3.368200836820084e-06, "loss": 0.6281, "step": 161 }, { "epoch": 0.01, "grad_norm": 1.897405087693351, "learning_rate": 3.389121338912134e-06, "loss": 0.5442, "step": 162 }, { "epoch": 0.01, "grad_norm": 6.088965814031687, "learning_rate": 3.410041841004184e-06, "loss": 0.5998, "step": 163 }, { "epoch": 0.01, "grad_norm": 2.026569481356135, "learning_rate": 3.4309623430962345e-06, "loss": 0.5726, "step": 164 }, { "epoch": 0.01, "grad_norm": 6.103211691829716, "learning_rate": 3.4518828451882847e-06, "loss": 0.4828, "step": 165 }, { "epoch": 0.01, "grad_norm": 2.9849847813201476, "learning_rate": 3.4728033472803353e-06, "loss": 0.5415, "step": 166 }, { "epoch": 0.01, "grad_norm": 2.1630055011267317, "learning_rate": 3.493723849372385e-06, "loss": 0.5518, "step": 167 }, { "epoch": 0.01, "grad_norm": 2.4194171052375233, "learning_rate": 3.514644351464435e-06, "loss": 0.5851, "step": 168 }, { "epoch": 0.01, "grad_norm": 1.8093937261849506, "learning_rate": 3.5355648535564858e-06, "loss": 0.5697, "step": 169 }, { "epoch": 0.01, "grad_norm": 1.9064649752754856, "learning_rate": 3.5564853556485355e-06, "loss": 0.6195, "step": 170 }, { "epoch": 0.01, "grad_norm": 2.846338624409405, "learning_rate": 3.577405857740586e-06, "loss": 0.576, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.829929989511185, "learning_rate": 3.5983263598326363e-06, "loss": 0.5492, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.9034714933492618, "learning_rate": 3.619246861924686e-06, "loss": 0.5418, "step": 173 }, { "epoch": 0.01, "grad_norm": 1.962239415108419, "learning_rate": 3.6401673640167366e-06, "loss": 0.5805, "step": 174 }, { "epoch": 0.01, "grad_norm": 1.6926327780212296, "learning_rate": 3.6610878661087868e-06, "loss": 0.5272, "step": 175 }, { "epoch": 0.01, "grad_norm": 0.8187430316592739, "learning_rate": 3.6820083682008374e-06, "loss": 0.4574, "step": 176 }, { "epoch": 0.01, "grad_norm": 35.24986259368454, "learning_rate": 3.702928870292887e-06, "loss": 0.5984, "step": 177 }, { "epoch": 0.01, "grad_norm": 2.2422838567432923, "learning_rate": 3.7238493723849373e-06, "loss": 0.5598, "step": 178 }, { "epoch": 0.01, "grad_norm": 1.8202869130922215, "learning_rate": 3.744769874476988e-06, "loss": 0.5432, "step": 179 }, { "epoch": 0.01, "grad_norm": 3.2684918363837006, "learning_rate": 3.7656903765690376e-06, "loss": 0.5954, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.840220183982628, "learning_rate": 3.786610878661088e-06, "loss": 0.5471, "step": 181 }, { "epoch": 0.01, "grad_norm": 1.7375381450379523, "learning_rate": 3.8075313807531384e-06, "loss": 0.5398, "step": 182 }, { "epoch": 0.01, "grad_norm": 1.5833188190590233, "learning_rate": 3.8284518828451885e-06, "loss": 0.5593, "step": 183 }, { "epoch": 0.01, "grad_norm": 2.4203088015902696, "learning_rate": 3.849372384937239e-06, "loss": 0.5341, "step": 184 }, { "epoch": 0.01, "grad_norm": 2.4037462352952135, "learning_rate": 3.870292887029289e-06, "loss": 0.5241, "step": 185 }, { "epoch": 0.01, "grad_norm": 1.9018114196493612, "learning_rate": 3.8912133891213395e-06, "loss": 0.5541, "step": 186 }, { "epoch": 0.01, "grad_norm": 1.8346681758313153, "learning_rate": 3.912133891213389e-06, "loss": 0.5884, "step": 187 }, { "epoch": 0.01, "grad_norm": 6.6060256359553735, "learning_rate": 3.93305439330544e-06, "loss": 0.5501, "step": 188 }, { "epoch": 0.01, "grad_norm": 2.1610451908072212, "learning_rate": 3.9539748953974895e-06, "loss": 0.5964, "step": 189 }, { "epoch": 0.01, "grad_norm": 3.0619098539338365, "learning_rate": 3.97489539748954e-06, "loss": 0.5723, "step": 190 }, { "epoch": 0.01, "grad_norm": 2.487926344923484, "learning_rate": 3.995815899581591e-06, "loss": 0.5874, "step": 191 }, { "epoch": 0.01, "grad_norm": 1.8608181899237632, "learning_rate": 4.0167364016736405e-06, "loss": 0.5233, "step": 192 }, { "epoch": 0.01, "grad_norm": 2.4239428611178764, "learning_rate": 4.037656903765691e-06, "loss": 0.5519, "step": 193 }, { "epoch": 0.01, "grad_norm": 2.5520657816429004, "learning_rate": 4.058577405857741e-06, "loss": 0.6036, "step": 194 }, { "epoch": 0.01, "grad_norm": 1.956756479512331, "learning_rate": 4.0794979079497905e-06, "loss": 0.5487, "step": 195 }, { "epoch": 0.01, "grad_norm": 2.179677188583239, "learning_rate": 4.100418410041841e-06, "loss": 0.5362, "step": 196 }, { "epoch": 0.01, "grad_norm": 2.58054581350178, "learning_rate": 4.121338912133892e-06, "loss": 0.5526, "step": 197 }, { "epoch": 0.01, "grad_norm": 2.0882473223940354, "learning_rate": 4.142259414225942e-06, "loss": 0.5691, "step": 198 }, { "epoch": 0.01, "grad_norm": 2.225018522453562, "learning_rate": 4.163179916317992e-06, "loss": 0.5446, "step": 199 }, { "epoch": 0.01, "grad_norm": 2.2548977860892365, "learning_rate": 4.184100418410042e-06, "loss": 0.5493, "step": 200 }, { "epoch": 0.01, "grad_norm": 2.0015897504392632, "learning_rate": 4.205020920502092e-06, "loss": 0.5727, "step": 201 }, { "epoch": 0.01, "grad_norm": 0.8549759738478968, "learning_rate": 4.225941422594142e-06, "loss": 0.4758, "step": 202 }, { "epoch": 0.01, "grad_norm": 1.7218688114187135, "learning_rate": 4.246861924686193e-06, "loss": 0.5437, "step": 203 }, { "epoch": 0.01, "grad_norm": 3.4956100779292383, "learning_rate": 4.267782426778243e-06, "loss": 0.5583, "step": 204 }, { "epoch": 0.01, "grad_norm": 2.3497272517836394, "learning_rate": 4.288702928870293e-06, "loss": 0.561, "step": 205 }, { "epoch": 0.01, "grad_norm": 2.1347763158373505, "learning_rate": 4.309623430962344e-06, "loss": 0.5512, "step": 206 }, { "epoch": 0.01, "grad_norm": 2.017505861291735, "learning_rate": 4.330543933054393e-06, "loss": 0.545, "step": 207 }, { "epoch": 0.01, "grad_norm": 2.944054774884085, "learning_rate": 4.351464435146444e-06, "loss": 0.6029, "step": 208 }, { "epoch": 0.01, "grad_norm": 2.05288008370838, "learning_rate": 4.372384937238494e-06, "loss": 0.5386, "step": 209 }, { "epoch": 0.01, "grad_norm": 1.965382181850134, "learning_rate": 4.393305439330544e-06, "loss": 0.5262, "step": 210 }, { "epoch": 0.01, "grad_norm": 2.5857770698218623, "learning_rate": 4.414225941422595e-06, "loss": 0.5269, "step": 211 }, { "epoch": 0.01, "grad_norm": 2.834596608275258, "learning_rate": 4.435146443514645e-06, "loss": 0.5422, "step": 212 }, { "epoch": 0.01, "grad_norm": 2.057630453079115, "learning_rate": 4.456066945606695e-06, "loss": 0.5334, "step": 213 }, { "epoch": 0.01, "grad_norm": 2.6220490371524603, "learning_rate": 4.476987447698745e-06, "loss": 0.5374, "step": 214 }, { "epoch": 0.01, "grad_norm": 2.286435524178221, "learning_rate": 4.4979079497907956e-06, "loss": 0.5227, "step": 215 }, { "epoch": 0.01, "grad_norm": 2.227202842739513, "learning_rate": 4.518828451882845e-06, "loss": 0.5751, "step": 216 }, { "epoch": 0.01, "grad_norm": 1.829600359946089, "learning_rate": 4.539748953974896e-06, "loss": 0.5109, "step": 217 }, { "epoch": 0.01, "grad_norm": 2.1186628117992368, "learning_rate": 4.5606694560669465e-06, "loss": 0.5155, "step": 218 }, { "epoch": 0.01, "grad_norm": 1.757460139156587, "learning_rate": 4.581589958158996e-06, "loss": 0.5528, "step": 219 }, { "epoch": 0.01, "grad_norm": 1.913175574549681, "learning_rate": 4.602510460251046e-06, "loss": 0.5263, "step": 220 }, { "epoch": 0.01, "grad_norm": 1.8302369226695772, "learning_rate": 4.6234309623430966e-06, "loss": 0.548, "step": 221 }, { "epoch": 0.01, "grad_norm": 2.472263108281696, "learning_rate": 4.644351464435146e-06, "loss": 0.5217, "step": 222 }, { "epoch": 0.01, "grad_norm": 2.901098226008417, "learning_rate": 4.665271966527197e-06, "loss": 0.5094, "step": 223 }, { "epoch": 0.01, "grad_norm": 2.23530258377056, "learning_rate": 4.6861924686192475e-06, "loss": 0.4972, "step": 224 }, { "epoch": 0.01, "grad_norm": 2.901061367814879, "learning_rate": 4.707112970711297e-06, "loss": 0.5148, "step": 225 }, { "epoch": 0.01, "grad_norm": 2.5767890434567686, "learning_rate": 4.728033472803348e-06, "loss": 0.5601, "step": 226 }, { "epoch": 0.01, "grad_norm": 2.314143929513127, "learning_rate": 4.7489539748953976e-06, "loss": 0.5433, "step": 227 }, { "epoch": 0.01, "grad_norm": 2.1529240549180026, "learning_rate": 4.769874476987448e-06, "loss": 0.5143, "step": 228 }, { "epoch": 0.01, "grad_norm": 3.1515149718204225, "learning_rate": 4.790794979079498e-06, "loss": 0.5434, "step": 229 }, { "epoch": 0.01, "grad_norm": 2.20706496494061, "learning_rate": 4.8117154811715485e-06, "loss": 0.4952, "step": 230 }, { "epoch": 0.01, "grad_norm": 2.154685233114359, "learning_rate": 4.832635983263599e-06, "loss": 0.5263, "step": 231 }, { "epoch": 0.01, "grad_norm": 3.338673730530193, "learning_rate": 4.853556485355649e-06, "loss": 0.5515, "step": 232 }, { "epoch": 0.01, "grad_norm": 2.332063111503053, "learning_rate": 4.874476987447699e-06, "loss": 0.5559, "step": 233 }, { "epoch": 0.01, "grad_norm": 1.7427624508109318, "learning_rate": 4.895397489539749e-06, "loss": 0.5355, "step": 234 }, { "epoch": 0.01, "grad_norm": 2.2863543898558905, "learning_rate": 4.9163179916318e-06, "loss": 0.5394, "step": 235 }, { "epoch": 0.01, "grad_norm": 2.183148941750467, "learning_rate": 4.9372384937238495e-06, "loss": 0.493, "step": 236 }, { "epoch": 0.01, "grad_norm": 1.8999604880172074, "learning_rate": 4.9581589958159e-06, "loss": 0.5232, "step": 237 }, { "epoch": 0.01, "grad_norm": 2.024650317117212, "learning_rate": 4.979079497907951e-06, "loss": 0.5463, "step": 238 }, { "epoch": 0.02, "grad_norm": 4.519833654324174, "learning_rate": 5e-06, "loss": 0.5053, "step": 239 }, { "epoch": 0.02, "grad_norm": 2.1145062860634294, "learning_rate": 5.02092050209205e-06, "loss": 0.5421, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.8230791631428602, "learning_rate": 5.041841004184101e-06, "loss": 0.5175, "step": 241 }, { "epoch": 0.02, "grad_norm": 2.2292071552421118, "learning_rate": 5.062761506276151e-06, "loss": 0.5188, "step": 242 }, { "epoch": 0.02, "grad_norm": 2.060899197962938, "learning_rate": 5.083682008368201e-06, "loss": 0.5064, "step": 243 }, { "epoch": 0.02, "grad_norm": 2.287968133304338, "learning_rate": 5.104602510460252e-06, "loss": 0.5456, "step": 244 }, { "epoch": 0.02, "grad_norm": 0.8504400266281082, "learning_rate": 5.125523012552301e-06, "loss": 0.4325, "step": 245 }, { "epoch": 0.02, "grad_norm": 2.1361266792185174, "learning_rate": 5.146443514644351e-06, "loss": 0.5422, "step": 246 }, { "epoch": 0.02, "grad_norm": 2.4283629300625877, "learning_rate": 5.167364016736403e-06, "loss": 0.5613, "step": 247 }, { "epoch": 0.02, "grad_norm": 2.004467872375992, "learning_rate": 5.188284518828452e-06, "loss": 0.5277, "step": 248 }, { "epoch": 0.02, "grad_norm": 3.1897735043913804, "learning_rate": 5.209205020920503e-06, "loss": 0.5529, "step": 249 }, { "epoch": 0.02, "grad_norm": 3.0391670505960278, "learning_rate": 5.230125523012553e-06, "loss": 0.5311, "step": 250 }, { "epoch": 0.02, "grad_norm": 2.0097009295986283, "learning_rate": 5.251046025104602e-06, "loss": 0.5372, "step": 251 }, { "epoch": 0.02, "grad_norm": 3.1829047992119124, "learning_rate": 5.271966527196654e-06, "loss": 0.5075, "step": 252 }, { "epoch": 0.02, "grad_norm": 1.8215535134615874, "learning_rate": 5.292887029288704e-06, "loss": 0.5389, "step": 253 }, { "epoch": 0.02, "grad_norm": 2.054601898394612, "learning_rate": 5.313807531380753e-06, "loss": 0.549, "step": 254 }, { "epoch": 0.02, "grad_norm": 5.329629461954194, "learning_rate": 5.334728033472804e-06, "loss": 0.5311, "step": 255 }, { "epoch": 0.02, "grad_norm": 1.8914673884674622, "learning_rate": 5.355648535564854e-06, "loss": 0.52, "step": 256 }, { "epoch": 0.02, "grad_norm": 1.0859952944448994, "learning_rate": 5.376569037656904e-06, "loss": 0.4608, "step": 257 }, { "epoch": 0.02, "grad_norm": 2.503935817868153, "learning_rate": 5.397489539748955e-06, "loss": 0.5126, "step": 258 }, { "epoch": 0.02, "grad_norm": 3.6643267775576076, "learning_rate": 5.418410041841005e-06, "loss": 0.5213, "step": 259 }, { "epoch": 0.02, "grad_norm": 2.65610581106084, "learning_rate": 5.439330543933054e-06, "loss": 0.5778, "step": 260 }, { "epoch": 0.02, "grad_norm": 3.5025442905122772, "learning_rate": 5.460251046025105e-06, "loss": 0.5002, "step": 261 }, { "epoch": 0.02, "grad_norm": 2.520819133078781, "learning_rate": 5.4811715481171555e-06, "loss": 0.5489, "step": 262 }, { "epoch": 0.02, "grad_norm": 2.272951313335712, "learning_rate": 5.502092050209205e-06, "loss": 0.563, "step": 263 }, { "epoch": 0.02, "grad_norm": 2.413045100161984, "learning_rate": 5.523012552301256e-06, "loss": 0.5402, "step": 264 }, { "epoch": 0.02, "grad_norm": 0.8929401036698851, "learning_rate": 5.543933054393306e-06, "loss": 0.4515, "step": 265 }, { "epoch": 0.02, "grad_norm": 3.325230474399367, "learning_rate": 5.564853556485355e-06, "loss": 0.55, "step": 266 }, { "epoch": 0.02, "grad_norm": 5.049356261368777, "learning_rate": 5.585774058577407e-06, "loss": 0.5259, "step": 267 }, { "epoch": 0.02, "grad_norm": 2.621157778314054, "learning_rate": 5.6066945606694565e-06, "loss": 0.5395, "step": 268 }, { "epoch": 0.02, "grad_norm": 96.63248774085258, "learning_rate": 5.627615062761507e-06, "loss": 0.5568, "step": 269 }, { "epoch": 0.02, "grad_norm": 6.91911523630248, "learning_rate": 5.648535564853557e-06, "loss": 0.5073, "step": 270 }, { "epoch": 0.02, "grad_norm": 4.378666811200259, "learning_rate": 5.669456066945607e-06, "loss": 0.5402, "step": 271 }, { "epoch": 0.02, "grad_norm": 5.561323053565177, "learning_rate": 5.690376569037658e-06, "loss": 0.5315, "step": 272 }, { "epoch": 0.02, "grad_norm": 2.6279745240004773, "learning_rate": 5.711297071129708e-06, "loss": 0.4996, "step": 273 }, { "epoch": 0.02, "grad_norm": 2.221717607952609, "learning_rate": 5.7322175732217575e-06, "loss": 0.5017, "step": 274 }, { "epoch": 0.02, "grad_norm": 2.705689661615929, "learning_rate": 5.753138075313808e-06, "loss": 0.5159, "step": 275 }, { "epoch": 0.02, "grad_norm": 1.9104386727427634, "learning_rate": 5.774058577405858e-06, "loss": 0.5142, "step": 276 }, { "epoch": 0.02, "grad_norm": 2.8517039944287434, "learning_rate": 5.7949790794979084e-06, "loss": 0.5703, "step": 277 }, { "epoch": 0.02, "grad_norm": 1.7616299815648704, "learning_rate": 5.815899581589959e-06, "loss": 0.5456, "step": 278 }, { "epoch": 0.02, "grad_norm": 2.6609993986366334, "learning_rate": 5.836820083682009e-06, "loss": 0.5327, "step": 279 }, { "epoch": 0.02, "grad_norm": 3.6076800181703126, "learning_rate": 5.8577405857740585e-06, "loss": 0.505, "step": 280 }, { "epoch": 0.02, "grad_norm": 2.3287901339772996, "learning_rate": 5.878661087866109e-06, "loss": 0.4926, "step": 281 }, { "epoch": 0.02, "grad_norm": 1.9176150332150999, "learning_rate": 5.89958158995816e-06, "loss": 0.4637, "step": 282 }, { "epoch": 0.02, "grad_norm": 2.836208971568334, "learning_rate": 5.92050209205021e-06, "loss": 0.539, "step": 283 }, { "epoch": 0.02, "grad_norm": 3.56516241457902, "learning_rate": 5.94142259414226e-06, "loss": 0.5162, "step": 284 }, { "epoch": 0.02, "grad_norm": 2.234022868978037, "learning_rate": 5.96234309623431e-06, "loss": 0.5201, "step": 285 }, { "epoch": 0.02, "grad_norm": 7.193157674838078, "learning_rate": 5.9832635983263595e-06, "loss": 0.5133, "step": 286 }, { "epoch": 0.02, "grad_norm": 3.6235735117222805, "learning_rate": 6.004184100418411e-06, "loss": 0.5325, "step": 287 }, { "epoch": 0.02, "grad_norm": 2.0685788179195264, "learning_rate": 6.025104602510461e-06, "loss": 0.5088, "step": 288 }, { "epoch": 0.02, "grad_norm": 2.8676759066530564, "learning_rate": 6.046025104602511e-06, "loss": 0.5303, "step": 289 }, { "epoch": 0.02, "grad_norm": 2.0962004596269437, "learning_rate": 6.066945606694561e-06, "loss": 0.4912, "step": 290 }, { "epoch": 0.02, "grad_norm": 2.3877027702814257, "learning_rate": 6.087866108786611e-06, "loss": 0.5036, "step": 291 }, { "epoch": 0.02, "grad_norm": 2.7071208206826958, "learning_rate": 6.108786610878662e-06, "loss": 0.4957, "step": 292 }, { "epoch": 0.02, "grad_norm": 2.2847240073491752, "learning_rate": 6.129707112970712e-06, "loss": 0.5106, "step": 293 }, { "epoch": 0.02, "grad_norm": 3.3774041487743665, "learning_rate": 6.150627615062762e-06, "loss": 0.4879, "step": 294 }, { "epoch": 0.02, "grad_norm": 2.6449473691246475, "learning_rate": 6.171548117154812e-06, "loss": 0.5333, "step": 295 }, { "epoch": 0.02, "grad_norm": 2.4353294447704243, "learning_rate": 6.192468619246862e-06, "loss": 0.4978, "step": 296 }, { "epoch": 0.02, "grad_norm": 2.020778042253329, "learning_rate": 6.213389121338913e-06, "loss": 0.5214, "step": 297 }, { "epoch": 0.02, "grad_norm": 3.0942726866993144, "learning_rate": 6.234309623430963e-06, "loss": 0.5359, "step": 298 }, { "epoch": 0.02, "grad_norm": 3.6716584551967406, "learning_rate": 6.255230125523013e-06, "loss": 0.4982, "step": 299 }, { "epoch": 0.02, "grad_norm": 2.0646153430845535, "learning_rate": 6.276150627615063e-06, "loss": 0.5266, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.94715980004556, "learning_rate": 6.297071129707113e-06, "loss": 0.5234, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.1027040732914546, "learning_rate": 6.317991631799164e-06, "loss": 0.4561, "step": 302 }, { "epoch": 0.02, "grad_norm": 2.206545821119485, "learning_rate": 6.3389121338912145e-06, "loss": 0.4935, "step": 303 }, { "epoch": 0.02, "grad_norm": 2.8473207773595686, "learning_rate": 6.359832635983264e-06, "loss": 0.5527, "step": 304 }, { "epoch": 0.02, "grad_norm": 6.023532167096835, "learning_rate": 6.380753138075314e-06, "loss": 0.4862, "step": 305 }, { "epoch": 0.02, "grad_norm": 4.292990098601755, "learning_rate": 6.401673640167364e-06, "loss": 0.4951, "step": 306 }, { "epoch": 0.02, "grad_norm": 2.8132000573019575, "learning_rate": 6.422594142259415e-06, "loss": 0.5869, "step": 307 }, { "epoch": 0.02, "grad_norm": 10.973838233332685, "learning_rate": 6.443514644351465e-06, "loss": 0.4958, "step": 308 }, { "epoch": 0.02, "grad_norm": 3.539826559036644, "learning_rate": 6.4644351464435155e-06, "loss": 0.5604, "step": 309 }, { "epoch": 0.02, "grad_norm": 3.0504923999789053, "learning_rate": 6.485355648535565e-06, "loss": 0.4794, "step": 310 }, { "epoch": 0.02, "grad_norm": 3.0248026253469127, "learning_rate": 6.506276150627615e-06, "loss": 0.4841, "step": 311 }, { "epoch": 0.02, "grad_norm": 4.060222864652489, "learning_rate": 6.527196652719666e-06, "loss": 0.5296, "step": 312 }, { "epoch": 0.02, "grad_norm": 2.5536836715360005, "learning_rate": 6.548117154811716e-06, "loss": 0.5435, "step": 313 }, { "epoch": 0.02, "grad_norm": 2.8216575901049348, "learning_rate": 6.569037656903766e-06, "loss": 0.4525, "step": 314 }, { "epoch": 0.02, "grad_norm": 2.2872488197895846, "learning_rate": 6.5899581589958165e-06, "loss": 0.4808, "step": 315 }, { "epoch": 0.02, "grad_norm": 2.0154227553774837, "learning_rate": 6.610878661087866e-06, "loss": 0.5075, "step": 316 }, { "epoch": 0.02, "grad_norm": 14.689464889095058, "learning_rate": 6.631799163179918e-06, "loss": 0.5294, "step": 317 }, { "epoch": 0.02, "grad_norm": 3.511514014884, "learning_rate": 6.652719665271967e-06, "loss": 0.5001, "step": 318 }, { "epoch": 0.02, "grad_norm": 5.462426397042369, "learning_rate": 6.673640167364017e-06, "loss": 0.5201, "step": 319 }, { "epoch": 0.02, "grad_norm": 2.309178481355211, "learning_rate": 6.694560669456067e-06, "loss": 0.4914, "step": 320 }, { "epoch": 0.02, "grad_norm": 3.00046890267073, "learning_rate": 6.7154811715481175e-06, "loss": 0.4846, "step": 321 }, { "epoch": 0.02, "grad_norm": 2.771069502433317, "learning_rate": 6.736401673640168e-06, "loss": 0.5383, "step": 322 }, { "epoch": 0.02, "grad_norm": 2.7459182369145854, "learning_rate": 6.757322175732219e-06, "loss": 0.5196, "step": 323 }, { "epoch": 0.02, "grad_norm": 3.0963134515292685, "learning_rate": 6.778242677824268e-06, "loss": 0.4883, "step": 324 }, { "epoch": 0.02, "grad_norm": 21.49385232659443, "learning_rate": 6.799163179916318e-06, "loss": 0.522, "step": 325 }, { "epoch": 0.02, "grad_norm": 1.6575515829395526, "learning_rate": 6.820083682008368e-06, "loss": 0.4965, "step": 326 }, { "epoch": 0.02, "grad_norm": 2.7250280279432313, "learning_rate": 6.841004184100419e-06, "loss": 0.5186, "step": 327 }, { "epoch": 0.02, "grad_norm": 2.0326723726262537, "learning_rate": 6.861924686192469e-06, "loss": 0.5194, "step": 328 }, { "epoch": 0.02, "grad_norm": 2.874319558287596, "learning_rate": 6.88284518828452e-06, "loss": 0.4986, "step": 329 }, { "epoch": 0.02, "grad_norm": 13.5858321447557, "learning_rate": 6.903765690376569e-06, "loss": 0.4747, "step": 330 }, { "epoch": 0.02, "grad_norm": 2.85784679462047, "learning_rate": 6.924686192468619e-06, "loss": 0.5098, "step": 331 }, { "epoch": 0.02, "grad_norm": 2.824972859212857, "learning_rate": 6.9456066945606706e-06, "loss": 0.5029, "step": 332 }, { "epoch": 0.02, "grad_norm": 3.1117225820256342, "learning_rate": 6.96652719665272e-06, "loss": 0.5309, "step": 333 }, { "epoch": 0.02, "grad_norm": 4.061982984691296, "learning_rate": 6.98744769874477e-06, "loss": 0.5131, "step": 334 }, { "epoch": 0.02, "grad_norm": 4.536437490932801, "learning_rate": 7.008368200836821e-06, "loss": 0.5196, "step": 335 }, { "epoch": 0.02, "grad_norm": 2.309975716765149, "learning_rate": 7.02928870292887e-06, "loss": 0.4742, "step": 336 }, { "epoch": 0.02, "grad_norm": 3.570204434848717, "learning_rate": 7.050209205020922e-06, "loss": 0.5131, "step": 337 }, { "epoch": 0.02, "grad_norm": 4.707895248879186, "learning_rate": 7.0711297071129716e-06, "loss": 0.5506, "step": 338 }, { "epoch": 0.02, "grad_norm": 2.673949592438475, "learning_rate": 7.092050209205021e-06, "loss": 0.4867, "step": 339 }, { "epoch": 0.02, "grad_norm": 1.7316392716115172, "learning_rate": 7.112970711297071e-06, "loss": 0.4904, "step": 340 }, { "epoch": 0.02, "grad_norm": 2.3951559468838584, "learning_rate": 7.133891213389122e-06, "loss": 0.4647, "step": 341 }, { "epoch": 0.02, "grad_norm": 2.448340965348166, "learning_rate": 7.154811715481172e-06, "loss": 0.4623, "step": 342 }, { "epoch": 0.02, "grad_norm": 2.6649219852341175, "learning_rate": 7.175732217573223e-06, "loss": 0.5209, "step": 343 }, { "epoch": 0.02, "grad_norm": 4.585114417633767, "learning_rate": 7.1966527196652726e-06, "loss": 0.5184, "step": 344 }, { "epoch": 0.02, "grad_norm": 5.452898799199118, "learning_rate": 7.217573221757322e-06, "loss": 0.4877, "step": 345 }, { "epoch": 0.02, "grad_norm": 2.833547011782983, "learning_rate": 7.238493723849372e-06, "loss": 0.478, "step": 346 }, { "epoch": 0.02, "grad_norm": 2.7953763531070988, "learning_rate": 7.2594142259414235e-06, "loss": 0.5042, "step": 347 }, { "epoch": 0.02, "grad_norm": 6.734182011955151, "learning_rate": 7.280334728033473e-06, "loss": 0.4758, "step": 348 }, { "epoch": 0.02, "grad_norm": 1.9624647174911558, "learning_rate": 7.301255230125524e-06, "loss": 0.5058, "step": 349 }, { "epoch": 0.02, "grad_norm": 2.3363783927689292, "learning_rate": 7.3221757322175736e-06, "loss": 0.479, "step": 350 }, { "epoch": 0.02, "grad_norm": 1.9336687093258327, "learning_rate": 7.343096234309623e-06, "loss": 0.5096, "step": 351 }, { "epoch": 0.02, "grad_norm": 2.266888841938558, "learning_rate": 7.364016736401675e-06, "loss": 0.5157, "step": 352 }, { "epoch": 0.02, "grad_norm": 3.5073325128843336, "learning_rate": 7.3849372384937245e-06, "loss": 0.4959, "step": 353 }, { "epoch": 0.02, "grad_norm": 1.8126613086801358, "learning_rate": 7.405857740585774e-06, "loss": 0.5085, "step": 354 }, { "epoch": 0.02, "grad_norm": 3.60687001755244, "learning_rate": 7.426778242677825e-06, "loss": 0.5306, "step": 355 }, { "epoch": 0.02, "grad_norm": 6.341686498326082, "learning_rate": 7.4476987447698746e-06, "loss": 0.4989, "step": 356 }, { "epoch": 0.02, "grad_norm": 1.0904637210259118, "learning_rate": 7.468619246861926e-06, "loss": 0.4616, "step": 357 }, { "epoch": 0.02, "grad_norm": 3.133117015555545, "learning_rate": 7.489539748953976e-06, "loss": 0.5255, "step": 358 }, { "epoch": 0.02, "grad_norm": 5.006984132530126, "learning_rate": 7.5104602510460255e-06, "loss": 0.5218, "step": 359 }, { "epoch": 0.02, "grad_norm": 3.3938720688264263, "learning_rate": 7.531380753138075e-06, "loss": 0.5064, "step": 360 }, { "epoch": 0.02, "grad_norm": 2.870676793551916, "learning_rate": 7.552301255230127e-06, "loss": 0.4847, "step": 361 }, { "epoch": 0.02, "grad_norm": 0.8390125025598012, "learning_rate": 7.573221757322176e-06, "loss": 0.4413, "step": 362 }, { "epoch": 0.02, "grad_norm": 3.327764398915044, "learning_rate": 7.594142259414227e-06, "loss": 0.5116, "step": 363 }, { "epoch": 0.02, "grad_norm": 2.1647685751562404, "learning_rate": 7.615062761506277e-06, "loss": 0.5016, "step": 364 }, { "epoch": 0.02, "grad_norm": 2.3640020871193026, "learning_rate": 7.635983263598326e-06, "loss": 0.5247, "step": 365 }, { "epoch": 0.02, "grad_norm": 3.700581385258537, "learning_rate": 7.656903765690377e-06, "loss": 0.4815, "step": 366 }, { "epoch": 0.02, "grad_norm": 4.767697256841907, "learning_rate": 7.677824267782428e-06, "loss": 0.4991, "step": 367 }, { "epoch": 0.02, "grad_norm": 2.4927873233606883, "learning_rate": 7.698744769874478e-06, "loss": 0.5097, "step": 368 }, { "epoch": 0.02, "grad_norm": 4.828020437858698, "learning_rate": 7.719665271966527e-06, "loss": 0.4657, "step": 369 }, { "epoch": 0.02, "grad_norm": 2.6332343549170316, "learning_rate": 7.740585774058578e-06, "loss": 0.502, "step": 370 }, { "epoch": 0.02, "grad_norm": 2.4241080414330596, "learning_rate": 7.761506276150628e-06, "loss": 0.4871, "step": 371 }, { "epoch": 0.02, "grad_norm": 2.4052948087097104, "learning_rate": 7.782426778242679e-06, "loss": 0.5261, "step": 372 }, { "epoch": 0.02, "grad_norm": 2.4405844871804074, "learning_rate": 7.80334728033473e-06, "loss": 0.4534, "step": 373 }, { "epoch": 0.02, "grad_norm": 4.648179101276863, "learning_rate": 7.824267782426778e-06, "loss": 0.4664, "step": 374 }, { "epoch": 0.02, "grad_norm": 2.7911410791896376, "learning_rate": 7.845188284518829e-06, "loss": 0.5241, "step": 375 }, { "epoch": 0.02, "grad_norm": 1.8902105884147422, "learning_rate": 7.86610878661088e-06, "loss": 0.4666, "step": 376 }, { "epoch": 0.02, "grad_norm": 3.0296579259691927, "learning_rate": 7.88702928870293e-06, "loss": 0.4474, "step": 377 }, { "epoch": 0.02, "grad_norm": 11.222181680520652, "learning_rate": 7.907949790794979e-06, "loss": 0.4888, "step": 378 }, { "epoch": 0.02, "grad_norm": 3.0631162291398097, "learning_rate": 7.92887029288703e-06, "loss": 0.4995, "step": 379 }, { "epoch": 0.02, "grad_norm": 6.209846140694516, "learning_rate": 7.94979079497908e-06, "loss": 0.537, "step": 380 }, { "epoch": 0.02, "grad_norm": 2.463680463708773, "learning_rate": 7.97071129707113e-06, "loss": 0.5255, "step": 381 }, { "epoch": 0.02, "grad_norm": 2.891988986891982, "learning_rate": 7.991631799163181e-06, "loss": 0.4869, "step": 382 }, { "epoch": 0.02, "grad_norm": 2.867591904677801, "learning_rate": 8.01255230125523e-06, "loss": 0.4612, "step": 383 }, { "epoch": 0.02, "grad_norm": 2.430308944036147, "learning_rate": 8.033472803347281e-06, "loss": 0.5192, "step": 384 }, { "epoch": 0.02, "grad_norm": 2.747116918822297, "learning_rate": 8.054393305439332e-06, "loss": 0.5017, "step": 385 }, { "epoch": 0.02, "grad_norm": 2.085915787920984, "learning_rate": 8.075313807531382e-06, "loss": 0.473, "step": 386 }, { "epoch": 0.02, "grad_norm": 3.3728873767492407, "learning_rate": 8.096234309623433e-06, "loss": 0.4877, "step": 387 }, { "epoch": 0.02, "grad_norm": 3.5845719704457917, "learning_rate": 8.117154811715482e-06, "loss": 0.5671, "step": 388 }, { "epoch": 0.02, "grad_norm": 1.9173579598015733, "learning_rate": 8.138075313807532e-06, "loss": 0.4929, "step": 389 }, { "epoch": 0.02, "grad_norm": 1.883561647904503, "learning_rate": 8.158995815899581e-06, "loss": 0.4941, "step": 390 }, { "epoch": 0.02, "grad_norm": 2.940398886539253, "learning_rate": 8.179916317991633e-06, "loss": 0.4837, "step": 391 }, { "epoch": 0.02, "grad_norm": 1.961712747262717, "learning_rate": 8.200836820083682e-06, "loss": 0.4857, "step": 392 }, { "epoch": 0.02, "grad_norm": 2.7196777020309737, "learning_rate": 8.221757322175733e-06, "loss": 0.5571, "step": 393 }, { "epoch": 0.02, "grad_norm": 2.3298620477337977, "learning_rate": 8.242677824267783e-06, "loss": 0.5027, "step": 394 }, { "epoch": 0.02, "grad_norm": 2.6061366433259003, "learning_rate": 8.263598326359832e-06, "loss": 0.4978, "step": 395 }, { "epoch": 0.02, "grad_norm": 2.0405769078662233, "learning_rate": 8.284518828451885e-06, "loss": 0.4555, "step": 396 }, { "epoch": 0.02, "grad_norm": 2.8937000549595946, "learning_rate": 8.305439330543934e-06, "loss": 0.4902, "step": 397 }, { "epoch": 0.03, "grad_norm": 1.972537027473449, "learning_rate": 8.326359832635984e-06, "loss": 0.4724, "step": 398 }, { "epoch": 0.03, "grad_norm": 6.187539954145826, "learning_rate": 8.347280334728035e-06, "loss": 0.4782, "step": 399 }, { "epoch": 0.03, "grad_norm": 3.5411039017670323, "learning_rate": 8.368200836820084e-06, "loss": 0.5113, "step": 400 }, { "epoch": 0.03, "grad_norm": 2.5085262051947974, "learning_rate": 8.389121338912136e-06, "loss": 0.4803, "step": 401 }, { "epoch": 0.03, "grad_norm": 2.848215888898236, "learning_rate": 8.410041841004185e-06, "loss": 0.4969, "step": 402 }, { "epoch": 0.03, "grad_norm": 3.679526385946745, "learning_rate": 8.430962343096235e-06, "loss": 0.469, "step": 403 }, { "epoch": 0.03, "grad_norm": 2.1049472628050294, "learning_rate": 8.451882845188284e-06, "loss": 0.4728, "step": 404 }, { "epoch": 0.03, "grad_norm": 2.378765128456705, "learning_rate": 8.472803347280335e-06, "loss": 0.4759, "step": 405 }, { "epoch": 0.03, "grad_norm": 2.8512740769910905, "learning_rate": 8.493723849372385e-06, "loss": 0.5282, "step": 406 }, { "epoch": 0.03, "grad_norm": 2.277942641095943, "learning_rate": 8.514644351464436e-06, "loss": 0.4712, "step": 407 }, { "epoch": 0.03, "grad_norm": 15.443730718018829, "learning_rate": 8.535564853556487e-06, "loss": 0.4821, "step": 408 }, { "epoch": 0.03, "grad_norm": 2.321172925160859, "learning_rate": 8.556485355648536e-06, "loss": 0.4636, "step": 409 }, { "epoch": 0.03, "grad_norm": 1.8545919242223279, "learning_rate": 8.577405857740586e-06, "loss": 0.4965, "step": 410 }, { "epoch": 0.03, "grad_norm": 3.4878827595054522, "learning_rate": 8.598326359832637e-06, "loss": 0.4793, "step": 411 }, { "epoch": 0.03, "grad_norm": 4.266346876657598, "learning_rate": 8.619246861924687e-06, "loss": 0.4922, "step": 412 }, { "epoch": 0.03, "grad_norm": 2.5792097367050015, "learning_rate": 8.640167364016738e-06, "loss": 0.5004, "step": 413 }, { "epoch": 0.03, "grad_norm": 2.009757320195426, "learning_rate": 8.661087866108787e-06, "loss": 0.5171, "step": 414 }, { "epoch": 0.03, "grad_norm": 2.2238853932080604, "learning_rate": 8.682008368200837e-06, "loss": 0.5391, "step": 415 }, { "epoch": 0.03, "grad_norm": 2.2712477846853827, "learning_rate": 8.702928870292888e-06, "loss": 0.5026, "step": 416 }, { "epoch": 0.03, "grad_norm": 2.1988046779112986, "learning_rate": 8.723849372384939e-06, "loss": 0.4858, "step": 417 }, { "epoch": 0.03, "grad_norm": 2.406534027788669, "learning_rate": 8.744769874476987e-06, "loss": 0.4656, "step": 418 }, { "epoch": 0.03, "grad_norm": 2.163961435074915, "learning_rate": 8.765690376569038e-06, "loss": 0.5427, "step": 419 }, { "epoch": 0.03, "grad_norm": 2.3597195024144497, "learning_rate": 8.786610878661089e-06, "loss": 0.4897, "step": 420 }, { "epoch": 0.03, "grad_norm": 2.168965457878025, "learning_rate": 8.80753138075314e-06, "loss": 0.479, "step": 421 }, { "epoch": 0.03, "grad_norm": 2.5988091819064887, "learning_rate": 8.82845188284519e-06, "loss": 0.5255, "step": 422 }, { "epoch": 0.03, "grad_norm": 3.7920088032537924, "learning_rate": 8.849372384937239e-06, "loss": 0.4933, "step": 423 }, { "epoch": 0.03, "grad_norm": 1.983311565053605, "learning_rate": 8.87029288702929e-06, "loss": 0.4882, "step": 424 }, { "epoch": 0.03, "grad_norm": 2.6590108562139867, "learning_rate": 8.89121338912134e-06, "loss": 0.5219, "step": 425 }, { "epoch": 0.03, "grad_norm": 2.1317040286033073, "learning_rate": 8.91213389121339e-06, "loss": 0.4815, "step": 426 }, { "epoch": 0.03, "grad_norm": 4.274968257780614, "learning_rate": 8.933054393305441e-06, "loss": 0.4822, "step": 427 }, { "epoch": 0.03, "grad_norm": 3.686968625635076, "learning_rate": 8.95397489539749e-06, "loss": 0.4893, "step": 428 }, { "epoch": 0.03, "grad_norm": 2.5596408404374777, "learning_rate": 8.97489539748954e-06, "loss": 0.4605, "step": 429 }, { "epoch": 0.03, "grad_norm": 17.2740122622564, "learning_rate": 8.995815899581591e-06, "loss": 0.5216, "step": 430 }, { "epoch": 0.03, "grad_norm": 2.921884062267339, "learning_rate": 9.016736401673642e-06, "loss": 0.4743, "step": 431 }, { "epoch": 0.03, "grad_norm": 2.4626916917319113, "learning_rate": 9.03765690376569e-06, "loss": 0.4867, "step": 432 }, { "epoch": 0.03, "grad_norm": 2.073554583318843, "learning_rate": 9.058577405857741e-06, "loss": 0.4803, "step": 433 }, { "epoch": 0.03, "grad_norm": 1.6827488402593347, "learning_rate": 9.079497907949792e-06, "loss": 0.4818, "step": 434 }, { "epoch": 0.03, "grad_norm": 4.575804476795184, "learning_rate": 9.10041841004184e-06, "loss": 0.4579, "step": 435 }, { "epoch": 0.03, "grad_norm": 1.9555600587994975, "learning_rate": 9.121338912133893e-06, "loss": 0.4641, "step": 436 }, { "epoch": 0.03, "grad_norm": 1.9832609347832768, "learning_rate": 9.142259414225942e-06, "loss": 0.4641, "step": 437 }, { "epoch": 0.03, "grad_norm": 2.5867908607748533, "learning_rate": 9.163179916317992e-06, "loss": 0.4515, "step": 438 }, { "epoch": 0.03, "grad_norm": 2.206710679839185, "learning_rate": 9.184100418410043e-06, "loss": 0.5006, "step": 439 }, { "epoch": 0.03, "grad_norm": 1.877142684214976, "learning_rate": 9.205020920502092e-06, "loss": 0.4721, "step": 440 }, { "epoch": 0.03, "grad_norm": 1.8918168834279614, "learning_rate": 9.225941422594144e-06, "loss": 0.4877, "step": 441 }, { "epoch": 0.03, "grad_norm": 2.2351645066296113, "learning_rate": 9.246861924686193e-06, "loss": 0.4819, "step": 442 }, { "epoch": 0.03, "grad_norm": 24.542472415486223, "learning_rate": 9.267782426778244e-06, "loss": 0.5727, "step": 443 }, { "epoch": 0.03, "grad_norm": 1.9593789970001019, "learning_rate": 9.288702928870293e-06, "loss": 0.4963, "step": 444 }, { "epoch": 0.03, "grad_norm": 1.7693233306816372, "learning_rate": 9.309623430962343e-06, "loss": 0.5029, "step": 445 }, { "epoch": 0.03, "grad_norm": 2.3267296066825596, "learning_rate": 9.330543933054394e-06, "loss": 0.476, "step": 446 }, { "epoch": 0.03, "grad_norm": 1.9083774240017217, "learning_rate": 9.351464435146444e-06, "loss": 0.5121, "step": 447 }, { "epoch": 0.03, "grad_norm": 5.886945427880566, "learning_rate": 9.372384937238495e-06, "loss": 0.4735, "step": 448 }, { "epoch": 0.03, "grad_norm": 1.8086180482800336, "learning_rate": 9.393305439330544e-06, "loss": 0.4796, "step": 449 }, { "epoch": 0.03, "grad_norm": 2.5469885536000434, "learning_rate": 9.414225941422594e-06, "loss": 0.5053, "step": 450 }, { "epoch": 0.03, "grad_norm": 1.7621867233967463, "learning_rate": 9.435146443514645e-06, "loss": 0.4892, "step": 451 }, { "epoch": 0.03, "grad_norm": 2.3418689487785005, "learning_rate": 9.456066945606696e-06, "loss": 0.4976, "step": 452 }, { "epoch": 0.03, "grad_norm": 1.7565291667069471, "learning_rate": 9.476987447698746e-06, "loss": 0.4702, "step": 453 }, { "epoch": 0.03, "grad_norm": 2.7055592954521366, "learning_rate": 9.497907949790795e-06, "loss": 0.4975, "step": 454 }, { "epoch": 0.03, "grad_norm": 2.125122571133811, "learning_rate": 9.518828451882846e-06, "loss": 0.4854, "step": 455 }, { "epoch": 0.03, "grad_norm": 1.916873860259975, "learning_rate": 9.539748953974896e-06, "loss": 0.4663, "step": 456 }, { "epoch": 0.03, "grad_norm": 3.0397319347096645, "learning_rate": 9.560669456066947e-06, "loss": 0.4953, "step": 457 }, { "epoch": 0.03, "grad_norm": 2.7578682358617486, "learning_rate": 9.581589958158996e-06, "loss": 0.501, "step": 458 }, { "epoch": 0.03, "grad_norm": 2.93713540725994, "learning_rate": 9.602510460251046e-06, "loss": 0.5161, "step": 459 }, { "epoch": 0.03, "grad_norm": 2.481061522409949, "learning_rate": 9.623430962343097e-06, "loss": 0.4816, "step": 460 }, { "epoch": 0.03, "grad_norm": 3.1183860924267104, "learning_rate": 9.644351464435148e-06, "loss": 0.488, "step": 461 }, { "epoch": 0.03, "grad_norm": 2.5309859152257093, "learning_rate": 9.665271966527198e-06, "loss": 0.4803, "step": 462 }, { "epoch": 0.03, "grad_norm": 1.6623926328087257, "learning_rate": 9.686192468619247e-06, "loss": 0.4625, "step": 463 }, { "epoch": 0.03, "grad_norm": 3.226731031134517, "learning_rate": 9.707112970711298e-06, "loss": 0.4777, "step": 464 }, { "epoch": 0.03, "grad_norm": 1.9974017132717794, "learning_rate": 9.728033472803348e-06, "loss": 0.4999, "step": 465 }, { "epoch": 0.03, "grad_norm": 1.9782826327473388, "learning_rate": 9.748953974895399e-06, "loss": 0.4746, "step": 466 }, { "epoch": 0.03, "grad_norm": 4.280861114366562, "learning_rate": 9.76987447698745e-06, "loss": 0.4949, "step": 467 }, { "epoch": 0.03, "grad_norm": 13.561532321958314, "learning_rate": 9.790794979079498e-06, "loss": 0.5523, "step": 468 }, { "epoch": 0.03, "grad_norm": 3.6482834591286095, "learning_rate": 9.811715481171549e-06, "loss": 0.4785, "step": 469 }, { "epoch": 0.03, "grad_norm": 1.788555807619979, "learning_rate": 9.8326359832636e-06, "loss": 0.4918, "step": 470 }, { "epoch": 0.03, "grad_norm": 1.7239375482508028, "learning_rate": 9.85355648535565e-06, "loss": 0.4521, "step": 471 }, { "epoch": 0.03, "grad_norm": 2.075375377188315, "learning_rate": 9.874476987447699e-06, "loss": 0.4852, "step": 472 }, { "epoch": 0.03, "grad_norm": 2.5660880924744864, "learning_rate": 9.89539748953975e-06, "loss": 0.4766, "step": 473 }, { "epoch": 0.03, "grad_norm": 1.0997494916527482, "learning_rate": 9.9163179916318e-06, "loss": 0.4651, "step": 474 }, { "epoch": 0.03, "grad_norm": 3.197086697835969, "learning_rate": 9.937238493723849e-06, "loss": 0.5007, "step": 475 }, { "epoch": 0.03, "grad_norm": 2.509517393333771, "learning_rate": 9.958158995815901e-06, "loss": 0.4993, "step": 476 }, { "epoch": 0.03, "grad_norm": 1.7187968677337175, "learning_rate": 9.97907949790795e-06, "loss": 0.4827, "step": 477 }, { "epoch": 0.03, "grad_norm": 2.282998909013323, "learning_rate": 1e-05, "loss": 0.5512, "step": 478 }, { "epoch": 0.03, "grad_norm": 2.7084881852837643, "learning_rate": 9.99999989627056e-06, "loss": 0.4992, "step": 479 }, { "epoch": 0.03, "grad_norm": 2.8868923475095896, "learning_rate": 9.999999585082243e-06, "loss": 0.5299, "step": 480 }, { "epoch": 0.03, "grad_norm": 2.6316744119836013, "learning_rate": 9.999999066435062e-06, "loss": 0.5353, "step": 481 }, { "epoch": 0.03, "grad_norm": 1.980509745621231, "learning_rate": 9.99999834032904e-06, "loss": 0.4819, "step": 482 }, { "epoch": 0.03, "grad_norm": 2.7208825151127956, "learning_rate": 9.999997406764204e-06, "loss": 0.4674, "step": 483 }, { "epoch": 0.03, "grad_norm": 2.930274641225573, "learning_rate": 9.999996265740595e-06, "loss": 0.474, "step": 484 }, { "epoch": 0.03, "grad_norm": 3.46637256655527, "learning_rate": 9.99999491725826e-06, "loss": 0.4848, "step": 485 }, { "epoch": 0.03, "grad_norm": 4.962705189780874, "learning_rate": 9.999993361317255e-06, "loss": 0.4832, "step": 486 }, { "epoch": 0.03, "grad_norm": 3.9013581764155414, "learning_rate": 9.999991597917645e-06, "loss": 0.4701, "step": 487 }, { "epoch": 0.03, "grad_norm": 3.392350399151913, "learning_rate": 9.999989627059501e-06, "loss": 0.4972, "step": 488 }, { "epoch": 0.03, "grad_norm": 1.7267714061114867, "learning_rate": 9.999987448742909e-06, "loss": 0.4847, "step": 489 }, { "epoch": 0.03, "grad_norm": 2.632008755771487, "learning_rate": 9.999985062967955e-06, "loss": 0.4746, "step": 490 }, { "epoch": 0.03, "grad_norm": 1.6553610090016668, "learning_rate": 9.99998246973474e-06, "loss": 0.4669, "step": 491 }, { "epoch": 0.03, "grad_norm": 1.8526190594946903, "learning_rate": 9.999979669043371e-06, "loss": 0.4777, "step": 492 }, { "epoch": 0.03, "grad_norm": 1.9881684812188742, "learning_rate": 9.999976660893964e-06, "loss": 0.496, "step": 493 }, { "epoch": 0.03, "grad_norm": 2.192218782535668, "learning_rate": 9.999973445286645e-06, "loss": 0.5122, "step": 494 }, { "epoch": 0.03, "grad_norm": 1.8663884967123074, "learning_rate": 9.999970022221547e-06, "loss": 0.4729, "step": 495 }, { "epoch": 0.03, "grad_norm": 1.7233098792878956, "learning_rate": 9.999966391698814e-06, "loss": 0.5244, "step": 496 }, { "epoch": 0.03, "grad_norm": 3.1689089902666683, "learning_rate": 9.999962553718592e-06, "loss": 0.5363, "step": 497 }, { "epoch": 0.03, "grad_norm": 2.2179232638230273, "learning_rate": 9.999958508281042e-06, "loss": 0.4806, "step": 498 }, { "epoch": 0.03, "grad_norm": 7.830937218899892, "learning_rate": 9.999954255386336e-06, "loss": 0.5072, "step": 499 }, { "epoch": 0.03, "grad_norm": 2.5490979182484326, "learning_rate": 9.999949795034643e-06, "loss": 0.4835, "step": 500 }, { "epoch": 0.03, "grad_norm": 1.6043327569945123, "learning_rate": 9.999945127226153e-06, "loss": 0.473, "step": 501 }, { "epoch": 0.03, "grad_norm": 2.0698345439739447, "learning_rate": 9.999940251961062e-06, "loss": 0.4923, "step": 502 }, { "epoch": 0.03, "grad_norm": 3.404757938812203, "learning_rate": 9.999935169239564e-06, "loss": 0.4903, "step": 503 }, { "epoch": 0.03, "grad_norm": 2.681536235612271, "learning_rate": 9.999929879061879e-06, "loss": 0.4953, "step": 504 }, { "epoch": 0.03, "grad_norm": 2.832575757357449, "learning_rate": 9.999924381428221e-06, "loss": 0.4759, "step": 505 }, { "epoch": 0.03, "grad_norm": 3.6505871660183593, "learning_rate": 9.99991867633882e-06, "loss": 0.4857, "step": 506 }, { "epoch": 0.03, "grad_norm": 2.2055095409109557, "learning_rate": 9.999912763793912e-06, "loss": 0.5042, "step": 507 }, { "epoch": 0.03, "grad_norm": 2.707154179903497, "learning_rate": 9.999906643793741e-06, "loss": 0.4825, "step": 508 }, { "epoch": 0.03, "grad_norm": 2.032646923161125, "learning_rate": 9.999900316338566e-06, "loss": 0.4628, "step": 509 }, { "epoch": 0.03, "grad_norm": 2.3106848272052103, "learning_rate": 9.999893781428643e-06, "loss": 0.4963, "step": 510 }, { "epoch": 0.03, "grad_norm": 2.1321540785077566, "learning_rate": 9.999887039064248e-06, "loss": 0.4897, "step": 511 }, { "epoch": 0.03, "grad_norm": 1.9729172626842642, "learning_rate": 9.999880089245659e-06, "loss": 0.5195, "step": 512 }, { "epoch": 0.03, "grad_norm": 1.9921180065923847, "learning_rate": 9.999872931973163e-06, "loss": 0.4993, "step": 513 }, { "epoch": 0.03, "grad_norm": 1.4222433489211916, "learning_rate": 9.999865567247058e-06, "loss": 0.4661, "step": 514 }, { "epoch": 0.03, "grad_norm": 2.4729050680514097, "learning_rate": 9.999857995067652e-06, "loss": 0.4864, "step": 515 }, { "epoch": 0.03, "grad_norm": 3.9891660124937793, "learning_rate": 9.999850215435255e-06, "loss": 0.4939, "step": 516 }, { "epoch": 0.03, "grad_norm": 2.6145310260308054, "learning_rate": 9.999842228350191e-06, "loss": 0.5025, "step": 517 }, { "epoch": 0.03, "grad_norm": 1.6840151848583955, "learning_rate": 9.999834033812795e-06, "loss": 0.4761, "step": 518 }, { "epoch": 0.03, "grad_norm": 1.9479719370056958, "learning_rate": 9.999825631823404e-06, "loss": 0.445, "step": 519 }, { "epoch": 0.03, "grad_norm": 1.9412623736620975, "learning_rate": 9.999817022382365e-06, "loss": 0.424, "step": 520 }, { "epoch": 0.03, "grad_norm": 8.256004528716746, "learning_rate": 9.999808205490038e-06, "loss": 0.525, "step": 521 }, { "epoch": 0.03, "grad_norm": 2.0329283510561034, "learning_rate": 9.999799181146787e-06, "loss": 0.4958, "step": 522 }, { "epoch": 0.03, "grad_norm": 3.29614027965801, "learning_rate": 9.999789949352988e-06, "loss": 0.4886, "step": 523 }, { "epoch": 0.03, "grad_norm": 2.8802866180224957, "learning_rate": 9.999780510109023e-06, "loss": 0.4997, "step": 524 }, { "epoch": 0.03, "grad_norm": 2.958253765695981, "learning_rate": 9.999770863415286e-06, "loss": 0.5277, "step": 525 }, { "epoch": 0.03, "grad_norm": 2.2046301239602006, "learning_rate": 9.999761009272174e-06, "loss": 0.4725, "step": 526 }, { "epoch": 0.03, "grad_norm": 2.5465710295017514, "learning_rate": 9.999750947680096e-06, "loss": 0.4969, "step": 527 }, { "epoch": 0.03, "grad_norm": 2.031761355371023, "learning_rate": 9.999740678639471e-06, "loss": 0.5108, "step": 528 }, { "epoch": 0.03, "grad_norm": 2.0828099016664163, "learning_rate": 9.999730202150726e-06, "loss": 0.4825, "step": 529 }, { "epoch": 0.03, "grad_norm": 1.9273603673172606, "learning_rate": 9.999719518214293e-06, "loss": 0.4704, "step": 530 }, { "epoch": 0.03, "grad_norm": 2.5018699726099425, "learning_rate": 9.999708626830617e-06, "loss": 0.4905, "step": 531 }, { "epoch": 0.03, "grad_norm": 4.504510016209695, "learning_rate": 9.999697528000151e-06, "loss": 0.5068, "step": 532 }, { "epoch": 0.03, "grad_norm": 2.626186179716801, "learning_rate": 9.999686221723353e-06, "loss": 0.4726, "step": 533 }, { "epoch": 0.03, "grad_norm": 1.9030201544718344, "learning_rate": 9.999674708000692e-06, "loss": 0.4925, "step": 534 }, { "epoch": 0.03, "grad_norm": 2.297824608304791, "learning_rate": 9.99966298683265e-06, "loss": 0.5128, "step": 535 }, { "epoch": 0.03, "grad_norm": 2.4063063427872162, "learning_rate": 9.999651058219708e-06, "loss": 0.4323, "step": 536 }, { "epoch": 0.03, "grad_norm": 1.9257458540615224, "learning_rate": 9.999638922162363e-06, "loss": 0.463, "step": 537 }, { "epoch": 0.03, "grad_norm": 3.9692464760787463, "learning_rate": 9.99962657866112e-06, "loss": 0.4524, "step": 538 }, { "epoch": 0.03, "grad_norm": 2.5905931244414884, "learning_rate": 9.999614027716488e-06, "loss": 0.5167, "step": 539 }, { "epoch": 0.03, "grad_norm": 6.410373295565918, "learning_rate": 9.999601269328994e-06, "loss": 0.4753, "step": 540 }, { "epoch": 0.03, "grad_norm": 2.5532275741843966, "learning_rate": 9.99958830349916e-06, "loss": 0.463, "step": 541 }, { "epoch": 0.03, "grad_norm": 2.8810677571179277, "learning_rate": 9.999575130227526e-06, "loss": 0.4694, "step": 542 }, { "epoch": 0.03, "grad_norm": 2.9790666546297664, "learning_rate": 9.999561749514642e-06, "loss": 0.4966, "step": 543 }, { "epoch": 0.03, "grad_norm": 2.9908036371688174, "learning_rate": 9.999548161361058e-06, "loss": 0.457, "step": 544 }, { "epoch": 0.03, "grad_norm": 2.997095918538681, "learning_rate": 9.999534365767342e-06, "loss": 0.5354, "step": 545 }, { "epoch": 0.03, "grad_norm": 2.08301974810039, "learning_rate": 9.999520362734065e-06, "loss": 0.4981, "step": 546 }, { "epoch": 0.03, "grad_norm": 2.231020304541261, "learning_rate": 9.999506152261809e-06, "loss": 0.4632, "step": 547 }, { "epoch": 0.03, "grad_norm": 3.551668641095965, "learning_rate": 9.999491734351162e-06, "loss": 0.4733, "step": 548 }, { "epoch": 0.03, "grad_norm": 9.180686411917383, "learning_rate": 9.999477109002722e-06, "loss": 0.4842, "step": 549 }, { "epoch": 0.03, "grad_norm": 6.72808462322627, "learning_rate": 9.999462276217096e-06, "loss": 0.4543, "step": 550 }, { "epoch": 0.03, "grad_norm": 2.2164396078924913, "learning_rate": 9.999447235994902e-06, "loss": 0.4276, "step": 551 }, { "epoch": 0.03, "grad_norm": 2.0792152173641463, "learning_rate": 9.999431988336762e-06, "loss": 0.4595, "step": 552 }, { "epoch": 0.03, "grad_norm": 3.4026681979538793, "learning_rate": 9.999416533243309e-06, "loss": 0.465, "step": 553 }, { "epoch": 0.03, "grad_norm": 4.006484644283491, "learning_rate": 9.999400870715182e-06, "loss": 0.4442, "step": 554 }, { "epoch": 0.03, "grad_norm": 2.6070388377684557, "learning_rate": 9.999385000753034e-06, "loss": 0.4676, "step": 555 }, { "epoch": 0.03, "grad_norm": 2.040094846791098, "learning_rate": 9.999368923357525e-06, "loss": 0.4711, "step": 556 }, { "epoch": 0.04, "grad_norm": 3.0506551892026024, "learning_rate": 9.999352638529316e-06, "loss": 0.4602, "step": 557 }, { "epoch": 0.04, "grad_norm": 4.770924960955531, "learning_rate": 9.999336146269088e-06, "loss": 0.4597, "step": 558 }, { "epoch": 0.04, "grad_norm": 2.444707020009515, "learning_rate": 9.999319446577523e-06, "loss": 0.4944, "step": 559 }, { "epoch": 0.04, "grad_norm": 2.0794848971406736, "learning_rate": 9.999302539455314e-06, "loss": 0.4328, "step": 560 }, { "epoch": 0.04, "grad_norm": 1.968220333607118, "learning_rate": 9.999285424903163e-06, "loss": 0.4923, "step": 561 }, { "epoch": 0.04, "grad_norm": 2.2070225116189857, "learning_rate": 9.99926810292178e-06, "loss": 0.4732, "step": 562 }, { "epoch": 0.04, "grad_norm": 1.9672062941744224, "learning_rate": 9.999250573511883e-06, "loss": 0.4457, "step": 563 }, { "epoch": 0.04, "grad_norm": 3.7397800191770814, "learning_rate": 9.999232836674202e-06, "loss": 0.42, "step": 564 }, { "epoch": 0.04, "grad_norm": 2.955989009342422, "learning_rate": 9.99921489240947e-06, "loss": 0.4446, "step": 565 }, { "epoch": 0.04, "grad_norm": 2.9205981841495485, "learning_rate": 9.999196740718432e-06, "loss": 0.438, "step": 566 }, { "epoch": 0.04, "grad_norm": 10.522660114624852, "learning_rate": 9.999178381601842e-06, "loss": 0.4845, "step": 567 }, { "epoch": 0.04, "grad_norm": 2.4620507763502344, "learning_rate": 9.999159815060462e-06, "loss": 0.4804, "step": 568 }, { "epoch": 0.04, "grad_norm": 2.337410846560435, "learning_rate": 9.99914104109506e-06, "loss": 0.4626, "step": 569 }, { "epoch": 0.04, "grad_norm": 3.8850082114866145, "learning_rate": 9.999122059706418e-06, "loss": 0.4671, "step": 570 }, { "epoch": 0.04, "grad_norm": 1.8278181600093955, "learning_rate": 9.999102870895323e-06, "loss": 0.4578, "step": 571 }, { "epoch": 0.04, "grad_norm": 3.489333267750027, "learning_rate": 9.99908347466257e-06, "loss": 0.4449, "step": 572 }, { "epoch": 0.04, "grad_norm": 2.618328682479948, "learning_rate": 9.999063871008963e-06, "loss": 0.4658, "step": 573 }, { "epoch": 0.04, "grad_norm": 2.9802982929547728, "learning_rate": 9.999044059935319e-06, "loss": 0.4391, "step": 574 }, { "epoch": 0.04, "grad_norm": 1.8110250724553485, "learning_rate": 9.999024041442455e-06, "loss": 0.4685, "step": 575 }, { "epoch": 0.04, "grad_norm": 3.2616464759803057, "learning_rate": 9.999003815531206e-06, "loss": 0.457, "step": 576 }, { "epoch": 0.04, "grad_norm": 2.187140130650438, "learning_rate": 9.998983382202408e-06, "loss": 0.4411, "step": 577 }, { "epoch": 0.04, "grad_norm": 2.3819418136366752, "learning_rate": 9.998962741456912e-06, "loss": 0.449, "step": 578 }, { "epoch": 0.04, "grad_norm": 2.7313485182956, "learning_rate": 9.998941893295572e-06, "loss": 0.4712, "step": 579 }, { "epoch": 0.04, "grad_norm": 2.063108026944435, "learning_rate": 9.998920837719254e-06, "loss": 0.46, "step": 580 }, { "epoch": 0.04, "grad_norm": 2.088599257715193, "learning_rate": 9.998899574728832e-06, "loss": 0.4623, "step": 581 }, { "epoch": 0.04, "grad_norm": 2.208467206475676, "learning_rate": 9.998878104325186e-06, "loss": 0.5056, "step": 582 }, { "epoch": 0.04, "grad_norm": 3.566290177886027, "learning_rate": 9.998856426509208e-06, "loss": 0.4744, "step": 583 }, { "epoch": 0.04, "grad_norm": 2.133215283066703, "learning_rate": 9.998834541281798e-06, "loss": 0.4992, "step": 584 }, { "epoch": 0.04, "grad_norm": 3.530779373872894, "learning_rate": 9.998812448643866e-06, "loss": 0.4671, "step": 585 }, { "epoch": 0.04, "grad_norm": 2.6625767048203386, "learning_rate": 9.998790148596326e-06, "loss": 0.4315, "step": 586 }, { "epoch": 0.04, "grad_norm": 1.8671590408093837, "learning_rate": 9.998767641140103e-06, "loss": 0.4998, "step": 587 }, { "epoch": 0.04, "grad_norm": 3.0928109248997626, "learning_rate": 9.998744926276132e-06, "loss": 0.4703, "step": 588 }, { "epoch": 0.04, "grad_norm": 3.2506176259906834, "learning_rate": 9.998722004005356e-06, "loss": 0.4782, "step": 589 }, { "epoch": 0.04, "grad_norm": 1.9063269538020393, "learning_rate": 9.998698874328725e-06, "loss": 0.4609, "step": 590 }, { "epoch": 0.04, "grad_norm": 2.027797630615375, "learning_rate": 9.998675537247199e-06, "loss": 0.4696, "step": 591 }, { "epoch": 0.04, "grad_norm": 3.0533268864376435, "learning_rate": 9.998651992761746e-06, "loss": 0.4694, "step": 592 }, { "epoch": 0.04, "grad_norm": 2.4804998394103683, "learning_rate": 9.998628240873342e-06, "loss": 0.4419, "step": 593 }, { "epoch": 0.04, "grad_norm": 2.795088503320507, "learning_rate": 9.998604281582975e-06, "loss": 0.4994, "step": 594 }, { "epoch": 0.04, "grad_norm": 2.9220824194919697, "learning_rate": 9.998580114891638e-06, "loss": 0.4541, "step": 595 }, { "epoch": 0.04, "grad_norm": 1.8091027763660537, "learning_rate": 9.998555740800335e-06, "loss": 0.4528, "step": 596 }, { "epoch": 0.04, "grad_norm": 20.594109389576467, "learning_rate": 9.998531159310074e-06, "loss": 0.462, "step": 597 }, { "epoch": 0.04, "grad_norm": 2.0457223813804037, "learning_rate": 9.998506370421876e-06, "loss": 0.4449, "step": 598 }, { "epoch": 0.04, "grad_norm": 2.364388799628461, "learning_rate": 9.998481374136773e-06, "loss": 0.4438, "step": 599 }, { "epoch": 0.04, "grad_norm": 2.225475904704627, "learning_rate": 9.998456170455796e-06, "loss": 0.4612, "step": 600 }, { "epoch": 0.04, "grad_norm": 4.540161650441191, "learning_rate": 9.998430759379999e-06, "loss": 0.458, "step": 601 }, { "epoch": 0.04, "grad_norm": 4.602362536891887, "learning_rate": 9.998405140910427e-06, "loss": 0.4778, "step": 602 }, { "epoch": 0.04, "grad_norm": 2.5625773310302615, "learning_rate": 9.99837931504815e-06, "loss": 0.4648, "step": 603 }, { "epoch": 0.04, "grad_norm": 1.3451024621950423, "learning_rate": 9.998353281794235e-06, "loss": 0.4826, "step": 604 }, { "epoch": 0.04, "grad_norm": 2.599579513154246, "learning_rate": 9.998327041149766e-06, "loss": 0.4693, "step": 605 }, { "epoch": 0.04, "grad_norm": 2.9164031153911307, "learning_rate": 9.998300593115831e-06, "loss": 0.4559, "step": 606 }, { "epoch": 0.04, "grad_norm": 1.9919266447646287, "learning_rate": 9.998273937693525e-06, "loss": 0.4275, "step": 607 }, { "epoch": 0.04, "grad_norm": 2.858787089836137, "learning_rate": 9.998247074883956e-06, "loss": 0.4338, "step": 608 }, { "epoch": 0.04, "grad_norm": 2.952562833224354, "learning_rate": 9.998220004688238e-06, "loss": 0.5, "step": 609 }, { "epoch": 0.04, "grad_norm": 1.9821066259376316, "learning_rate": 9.998192727107492e-06, "loss": 0.4838, "step": 610 }, { "epoch": 0.04, "grad_norm": 1.8465287076621513, "learning_rate": 9.998165242142855e-06, "loss": 0.4511, "step": 611 }, { "epoch": 0.04, "grad_norm": 1.8975261047111254, "learning_rate": 9.998137549795462e-06, "loss": 0.4217, "step": 612 }, { "epoch": 0.04, "grad_norm": 4.664265661419895, "learning_rate": 9.998109650066465e-06, "loss": 0.4556, "step": 613 }, { "epoch": 0.04, "grad_norm": 2.024994077208901, "learning_rate": 9.998081542957021e-06, "loss": 0.4598, "step": 614 }, { "epoch": 0.04, "grad_norm": 2.332872265825148, "learning_rate": 9.998053228468296e-06, "loss": 0.4686, "step": 615 }, { "epoch": 0.04, "grad_norm": 1.6914635597004524, "learning_rate": 9.998024706601467e-06, "loss": 0.4297, "step": 616 }, { "epoch": 0.04, "grad_norm": 1.9640858067208848, "learning_rate": 9.997995977357712e-06, "loss": 0.4164, "step": 617 }, { "epoch": 0.04, "grad_norm": 1.731815997672232, "learning_rate": 9.99796704073823e-06, "loss": 0.4402, "step": 618 }, { "epoch": 0.04, "grad_norm": 1.4062689009583236, "learning_rate": 9.997937896744216e-06, "loss": 0.4367, "step": 619 }, { "epoch": 0.04, "grad_norm": 1.8611460221301015, "learning_rate": 9.997908545376883e-06, "loss": 0.4446, "step": 620 }, { "epoch": 0.04, "grad_norm": 1.9255996259506856, "learning_rate": 9.997878986637446e-06, "loss": 0.4364, "step": 621 }, { "epoch": 0.04, "grad_norm": 1.832861131931658, "learning_rate": 9.997849220527132e-06, "loss": 0.461, "step": 622 }, { "epoch": 0.04, "grad_norm": 2.2911431679187215, "learning_rate": 9.997819247047177e-06, "loss": 0.439, "step": 623 }, { "epoch": 0.04, "grad_norm": 2.1394718060547646, "learning_rate": 9.997789066198824e-06, "loss": 0.459, "step": 624 }, { "epoch": 0.04, "grad_norm": 1.768603478672137, "learning_rate": 9.997758677983327e-06, "loss": 0.5346, "step": 625 }, { "epoch": 0.04, "grad_norm": 2.9595764953684403, "learning_rate": 9.997728082401946e-06, "loss": 0.4677, "step": 626 }, { "epoch": 0.04, "grad_norm": 2.689702247590821, "learning_rate": 9.997697279455947e-06, "loss": 0.4479, "step": 627 }, { "epoch": 0.04, "grad_norm": 1.8691749958551407, "learning_rate": 9.997666269146612e-06, "loss": 0.4684, "step": 628 }, { "epoch": 0.04, "grad_norm": 1.588557160746207, "learning_rate": 9.997635051475227e-06, "loss": 0.4452, "step": 629 }, { "epoch": 0.04, "grad_norm": 2.332900061870853, "learning_rate": 9.997603626443088e-06, "loss": 0.4617, "step": 630 }, { "epoch": 0.04, "grad_norm": 1.7491579573049527, "learning_rate": 9.997571994051497e-06, "loss": 0.4861, "step": 631 }, { "epoch": 0.04, "grad_norm": 5.600134242711322, "learning_rate": 9.997540154301766e-06, "loss": 0.4686, "step": 632 }, { "epoch": 0.04, "grad_norm": 2.2919323119550588, "learning_rate": 9.99750810719522e-06, "loss": 0.4816, "step": 633 }, { "epoch": 0.04, "grad_norm": 2.3933176511759915, "learning_rate": 9.997475852733183e-06, "loss": 0.4721, "step": 634 }, { "epoch": 0.04, "grad_norm": 1.7704070087461385, "learning_rate": 9.997443390916999e-06, "loss": 0.426, "step": 635 }, { "epoch": 0.04, "grad_norm": 1.6750412033533468, "learning_rate": 9.99741072174801e-06, "loss": 0.4595, "step": 636 }, { "epoch": 0.04, "grad_norm": 2.7392006563073865, "learning_rate": 9.997377845227577e-06, "loss": 0.4473, "step": 637 }, { "epoch": 0.04, "grad_norm": 1.982753584421421, "learning_rate": 9.997344761357057e-06, "loss": 0.4834, "step": 638 }, { "epoch": 0.04, "grad_norm": 2.542893874456173, "learning_rate": 9.997311470137828e-06, "loss": 0.4372, "step": 639 }, { "epoch": 0.04, "grad_norm": 2.3783093034128138, "learning_rate": 9.99727797157127e-06, "loss": 0.4505, "step": 640 }, { "epoch": 0.04, "grad_norm": 1.863024260620193, "learning_rate": 9.997244265658774e-06, "loss": 0.4273, "step": 641 }, { "epoch": 0.04, "grad_norm": 2.010156445925229, "learning_rate": 9.997210352401736e-06, "loss": 0.4213, "step": 642 }, { "epoch": 0.04, "grad_norm": 1.9513288428258124, "learning_rate": 9.997176231801565e-06, "loss": 0.4659, "step": 643 }, { "epoch": 0.04, "grad_norm": 1.5284652329277972, "learning_rate": 9.997141903859675e-06, "loss": 0.5051, "step": 644 }, { "epoch": 0.04, "grad_norm": 2.767869147714192, "learning_rate": 9.997107368577492e-06, "loss": 0.4745, "step": 645 }, { "epoch": 0.04, "grad_norm": 2.1508694750132054, "learning_rate": 9.99707262595645e-06, "loss": 0.4139, "step": 646 }, { "epoch": 0.04, "grad_norm": 2.11583835038122, "learning_rate": 9.997037675997987e-06, "loss": 0.4698, "step": 647 }, { "epoch": 0.04, "grad_norm": 2.515650691332356, "learning_rate": 9.997002518703556e-06, "loss": 0.455, "step": 648 }, { "epoch": 0.04, "grad_norm": 1.9175224977998657, "learning_rate": 9.996967154074615e-06, "loss": 0.4673, "step": 649 }, { "epoch": 0.04, "grad_norm": 1.9242834891154732, "learning_rate": 9.99693158211263e-06, "loss": 0.4762, "step": 650 }, { "epoch": 0.04, "grad_norm": 1.9173214362306026, "learning_rate": 9.99689580281908e-06, "loss": 0.4902, "step": 651 }, { "epoch": 0.04, "grad_norm": 2.2670351609030837, "learning_rate": 9.996859816195446e-06, "loss": 0.4644, "step": 652 }, { "epoch": 0.04, "grad_norm": 1.8798348386074981, "learning_rate": 9.996823622243224e-06, "loss": 0.4319, "step": 653 }, { "epoch": 0.04, "grad_norm": 2.8049037930911456, "learning_rate": 9.996787220963915e-06, "loss": 0.4488, "step": 654 }, { "epoch": 0.04, "grad_norm": 2.365755338439417, "learning_rate": 9.996750612359026e-06, "loss": 0.4799, "step": 655 }, { "epoch": 0.04, "grad_norm": 6.477361241354372, "learning_rate": 9.99671379643008e-06, "loss": 0.4389, "step": 656 }, { "epoch": 0.04, "grad_norm": 1.7752975651097125, "learning_rate": 9.996676773178604e-06, "loss": 0.4557, "step": 657 }, { "epoch": 0.04, "grad_norm": 1.6547541148359164, "learning_rate": 9.996639542606135e-06, "loss": 0.4828, "step": 658 }, { "epoch": 0.04, "grad_norm": 2.2922398431103956, "learning_rate": 9.996602104714215e-06, "loss": 0.4689, "step": 659 }, { "epoch": 0.04, "grad_norm": 1.6653628345815956, "learning_rate": 9.996564459504398e-06, "loss": 0.4443, "step": 660 }, { "epoch": 0.04, "grad_norm": 3.5935263181365933, "learning_rate": 9.996526606978248e-06, "loss": 0.4537, "step": 661 }, { "epoch": 0.04, "grad_norm": 2.212712442498005, "learning_rate": 9.996488547137335e-06, "loss": 0.4419, "step": 662 }, { "epoch": 0.04, "grad_norm": 1.9478331010182452, "learning_rate": 9.996450279983236e-06, "loss": 0.4565, "step": 663 }, { "epoch": 0.04, "grad_norm": 2.5278859392506945, "learning_rate": 9.99641180551754e-06, "loss": 0.4433, "step": 664 }, { "epoch": 0.04, "grad_norm": 2.027332182723469, "learning_rate": 9.996373123741843e-06, "loss": 0.4834, "step": 665 }, { "epoch": 0.04, "grad_norm": 2.073061441534379, "learning_rate": 9.996334234657751e-06, "loss": 0.4703, "step": 666 }, { "epoch": 0.04, "grad_norm": 2.1932471783137433, "learning_rate": 9.996295138266877e-06, "loss": 0.4501, "step": 667 }, { "epoch": 0.04, "grad_norm": 2.318954452112392, "learning_rate": 9.996255834570844e-06, "loss": 0.4785, "step": 668 }, { "epoch": 0.04, "grad_norm": 8.867713605240658, "learning_rate": 9.996216323571283e-06, "loss": 0.446, "step": 669 }, { "epoch": 0.04, "grad_norm": 1.7452091963528424, "learning_rate": 9.996176605269832e-06, "loss": 0.5069, "step": 670 }, { "epoch": 0.04, "grad_norm": 2.681731236826786, "learning_rate": 9.996136679668138e-06, "loss": 0.4774, "step": 671 }, { "epoch": 0.04, "grad_norm": 2.5679787343301403, "learning_rate": 9.99609654676786e-06, "loss": 0.4378, "step": 672 }, { "epoch": 0.04, "grad_norm": 6.285230500434291, "learning_rate": 9.996056206570662e-06, "loss": 0.4563, "step": 673 }, { "epoch": 0.04, "grad_norm": 2.549352866928325, "learning_rate": 9.996015659078218e-06, "loss": 0.4421, "step": 674 }, { "epoch": 0.04, "grad_norm": 1.6743044639961289, "learning_rate": 9.995974904292211e-06, "loss": 0.4519, "step": 675 }, { "epoch": 0.04, "grad_norm": 2.944103245358746, "learning_rate": 9.995933942214331e-06, "loss": 0.439, "step": 676 }, { "epoch": 0.04, "grad_norm": 2.178578626812516, "learning_rate": 9.995892772846276e-06, "loss": 0.4657, "step": 677 }, { "epoch": 0.04, "grad_norm": 1.8834378335862514, "learning_rate": 9.995851396189759e-06, "loss": 0.4766, "step": 678 }, { "epoch": 0.04, "grad_norm": 2.3943764794326166, "learning_rate": 9.995809812246493e-06, "loss": 0.4434, "step": 679 }, { "epoch": 0.04, "grad_norm": 1.0256693185920414, "learning_rate": 9.995768021018204e-06, "loss": 0.4597, "step": 680 }, { "epoch": 0.04, "grad_norm": 3.066097959989994, "learning_rate": 9.995726022506627e-06, "loss": 0.4364, "step": 681 }, { "epoch": 0.04, "grad_norm": 4.765308871003166, "learning_rate": 9.995683816713503e-06, "loss": 0.4723, "step": 682 }, { "epoch": 0.04, "grad_norm": 2.847147381689037, "learning_rate": 9.995641403640585e-06, "loss": 0.4761, "step": 683 }, { "epoch": 0.04, "grad_norm": 2.4559454985726097, "learning_rate": 9.995598783289631e-06, "loss": 0.4727, "step": 684 }, { "epoch": 0.04, "grad_norm": 3.113195011765131, "learning_rate": 9.99555595566241e-06, "loss": 0.436, "step": 685 }, { "epoch": 0.04, "grad_norm": 3.1883784100307166, "learning_rate": 9.9955129207607e-06, "loss": 0.4867, "step": 686 }, { "epoch": 0.04, "grad_norm": 3.553270239468023, "learning_rate": 9.995469678586286e-06, "loss": 0.5049, "step": 687 }, { "epoch": 0.04, "grad_norm": 2.5839827658726273, "learning_rate": 9.995426229140963e-06, "loss": 0.4729, "step": 688 }, { "epoch": 0.04, "grad_norm": 3.6757771629648346, "learning_rate": 9.995382572426531e-06, "loss": 0.425, "step": 689 }, { "epoch": 0.04, "grad_norm": 1.8114969729388422, "learning_rate": 9.995338708444804e-06, "loss": 0.4355, "step": 690 }, { "epoch": 0.04, "grad_norm": 2.459675253701771, "learning_rate": 9.995294637197602e-06, "loss": 0.4854, "step": 691 }, { "epoch": 0.04, "grad_norm": 2.7511074507439903, "learning_rate": 9.995250358686753e-06, "loss": 0.4154, "step": 692 }, { "epoch": 0.04, "grad_norm": 3.316640599823098, "learning_rate": 9.995205872914094e-06, "loss": 0.4188, "step": 693 }, { "epoch": 0.04, "grad_norm": 2.359760985100658, "learning_rate": 9.995161179881469e-06, "loss": 0.442, "step": 694 }, { "epoch": 0.04, "grad_norm": 2.788610007610766, "learning_rate": 9.995116279590735e-06, "loss": 0.4302, "step": 695 }, { "epoch": 0.04, "grad_norm": 2.0605553313415297, "learning_rate": 9.995071172043755e-06, "loss": 0.4065, "step": 696 }, { "epoch": 0.04, "grad_norm": 3.709277814057277, "learning_rate": 9.9950258572424e-06, "loss": 0.4429, "step": 697 }, { "epoch": 0.04, "grad_norm": 1.8173910399442992, "learning_rate": 9.994980335188549e-06, "loss": 0.4684, "step": 698 }, { "epoch": 0.04, "grad_norm": 1.8807990544036073, "learning_rate": 9.994934605884093e-06, "loss": 0.4423, "step": 699 }, { "epoch": 0.04, "grad_norm": 2.07708957760124, "learning_rate": 9.994888669330927e-06, "loss": 0.4473, "step": 700 }, { "epoch": 0.04, "grad_norm": 3.5759907952586523, "learning_rate": 9.99484252553096e-06, "loss": 0.4489, "step": 701 }, { "epoch": 0.04, "grad_norm": 2.401807864783039, "learning_rate": 9.9947961744861e-06, "loss": 0.458, "step": 702 }, { "epoch": 0.04, "grad_norm": 0.8853034430839495, "learning_rate": 9.99474961619828e-06, "loss": 0.4362, "step": 703 }, { "epoch": 0.04, "grad_norm": 2.4146421279508816, "learning_rate": 9.994702850669426e-06, "loss": 0.4408, "step": 704 }, { "epoch": 0.04, "grad_norm": 1.6001923652749257, "learning_rate": 9.994655877901479e-06, "loss": 0.4403, "step": 705 }, { "epoch": 0.04, "grad_norm": 2.7221897854324726, "learning_rate": 9.994608697896386e-06, "loss": 0.4425, "step": 706 }, { "epoch": 0.04, "grad_norm": 1.9029907396395576, "learning_rate": 9.994561310656107e-06, "loss": 0.4155, "step": 707 }, { "epoch": 0.04, "grad_norm": 2.219955758206292, "learning_rate": 9.99451371618261e-06, "loss": 0.4613, "step": 708 }, { "epoch": 0.04, "grad_norm": 2.9205200134365303, "learning_rate": 9.994465914477866e-06, "loss": 0.4491, "step": 709 }, { "epoch": 0.04, "grad_norm": 4.3594627345880355, "learning_rate": 9.994417905543863e-06, "loss": 0.4623, "step": 710 }, { "epoch": 0.04, "grad_norm": 3.7961548354447747, "learning_rate": 9.994369689382586e-06, "loss": 0.4737, "step": 711 }, { "epoch": 0.04, "grad_norm": 5.153950418069649, "learning_rate": 9.994321265996043e-06, "loss": 0.4557, "step": 712 }, { "epoch": 0.04, "grad_norm": 1.7803043259836775, "learning_rate": 9.994272635386238e-06, "loss": 0.4391, "step": 713 }, { "epoch": 0.04, "grad_norm": 2.105188709639193, "learning_rate": 9.99422379755519e-06, "loss": 0.4386, "step": 714 }, { "epoch": 0.04, "grad_norm": 3.4916189281607455, "learning_rate": 9.994174752504926e-06, "loss": 0.4487, "step": 715 }, { "epoch": 0.05, "grad_norm": 1.8734568379953702, "learning_rate": 9.994125500237482e-06, "loss": 0.4182, "step": 716 }, { "epoch": 0.05, "grad_norm": 2.291847542999621, "learning_rate": 9.994076040754898e-06, "loss": 0.4724, "step": 717 }, { "epoch": 0.05, "grad_norm": 4.647137404633726, "learning_rate": 9.994026374059232e-06, "loss": 0.4919, "step": 718 }, { "epoch": 0.05, "grad_norm": 2.3424631174226507, "learning_rate": 9.99397650015254e-06, "loss": 0.4566, "step": 719 }, { "epoch": 0.05, "grad_norm": 0.928470910158565, "learning_rate": 9.993926419036893e-06, "loss": 0.4845, "step": 720 }, { "epoch": 0.05, "grad_norm": 3.3142700080597045, "learning_rate": 9.993876130714367e-06, "loss": 0.4495, "step": 721 }, { "epoch": 0.05, "grad_norm": 4.046292210969857, "learning_rate": 9.993825635187052e-06, "loss": 0.4376, "step": 722 }, { "epoch": 0.05, "grad_norm": 2.7066143872941533, "learning_rate": 9.99377493245704e-06, "loss": 0.442, "step": 723 }, { "epoch": 0.05, "grad_norm": 3.0008424541582586, "learning_rate": 9.993724022526436e-06, "loss": 0.4551, "step": 724 }, { "epoch": 0.05, "grad_norm": 0.7770143116553527, "learning_rate": 9.993672905397353e-06, "loss": 0.4866, "step": 725 }, { "epoch": 0.05, "grad_norm": 2.3870371251726765, "learning_rate": 9.993621581071913e-06, "loss": 0.4225, "step": 726 }, { "epoch": 0.05, "grad_norm": 2.239640924878757, "learning_rate": 9.993570049552242e-06, "loss": 0.4549, "step": 727 }, { "epoch": 0.05, "grad_norm": 2.186189260968605, "learning_rate": 9.993518310840481e-06, "loss": 0.4115, "step": 728 }, { "epoch": 0.05, "grad_norm": 1.8149784852264021, "learning_rate": 9.993466364938776e-06, "loss": 0.4411, "step": 729 }, { "epoch": 0.05, "grad_norm": 3.4569245728822007, "learning_rate": 9.993414211849281e-06, "loss": 0.428, "step": 730 }, { "epoch": 0.05, "grad_norm": 2.769733089295754, "learning_rate": 9.993361851574162e-06, "loss": 0.431, "step": 731 }, { "epoch": 0.05, "grad_norm": 2.3420693054026698, "learning_rate": 9.99330928411559e-06, "loss": 0.4612, "step": 732 }, { "epoch": 0.05, "grad_norm": 2.2476509824120647, "learning_rate": 9.993256509475746e-06, "loss": 0.4488, "step": 733 }, { "epoch": 0.05, "grad_norm": 3.840328309573542, "learning_rate": 9.993203527656822e-06, "loss": 0.437, "step": 734 }, { "epoch": 0.05, "grad_norm": 1.792612558054085, "learning_rate": 9.993150338661014e-06, "loss": 0.4652, "step": 735 }, { "epoch": 0.05, "grad_norm": 6.49519143632042, "learning_rate": 9.99309694249053e-06, "loss": 0.4302, "step": 736 }, { "epoch": 0.05, "grad_norm": 2.260931105451376, "learning_rate": 9.993043339147584e-06, "loss": 0.4485, "step": 737 }, { "epoch": 0.05, "grad_norm": 1.9443299972263728, "learning_rate": 9.992989528634403e-06, "loss": 0.4193, "step": 738 }, { "epoch": 0.05, "grad_norm": 0.8855198794919493, "learning_rate": 9.992935510953216e-06, "loss": 0.4948, "step": 739 }, { "epoch": 0.05, "grad_norm": 1.852162438384942, "learning_rate": 9.992881286106268e-06, "loss": 0.4686, "step": 740 }, { "epoch": 0.05, "grad_norm": 1.6678759582183533, "learning_rate": 9.992826854095806e-06, "loss": 0.4802, "step": 741 }, { "epoch": 0.05, "grad_norm": 2.4981249056088424, "learning_rate": 9.992772214924089e-06, "loss": 0.4366, "step": 742 }, { "epoch": 0.05, "grad_norm": 1.9979541013190596, "learning_rate": 9.992717368593385e-06, "loss": 0.4504, "step": 743 }, { "epoch": 0.05, "grad_norm": 2.0834487058102034, "learning_rate": 9.99266231510597e-06, "loss": 0.4484, "step": 744 }, { "epoch": 0.05, "grad_norm": 1.8185227599352154, "learning_rate": 9.992607054464128e-06, "loss": 0.4698, "step": 745 }, { "epoch": 0.05, "grad_norm": 0.7366592197539074, "learning_rate": 9.99255158667015e-06, "loss": 0.4665, "step": 746 }, { "epoch": 0.05, "grad_norm": 1.8722742353505182, "learning_rate": 9.99249591172634e-06, "loss": 0.4252, "step": 747 }, { "epoch": 0.05, "grad_norm": 3.049576533626131, "learning_rate": 9.992440029635007e-06, "loss": 0.4645, "step": 748 }, { "epoch": 0.05, "grad_norm": 1.5672077229256496, "learning_rate": 9.99238394039847e-06, "loss": 0.4056, "step": 749 }, { "epoch": 0.05, "grad_norm": 1.5925498825462083, "learning_rate": 9.992327644019053e-06, "loss": 0.434, "step": 750 }, { "epoch": 0.05, "grad_norm": 1.8303015713255473, "learning_rate": 9.992271140499096e-06, "loss": 0.4563, "step": 751 }, { "epoch": 0.05, "grad_norm": 1.4236142884002383, "learning_rate": 9.992214429840944e-06, "loss": 0.459, "step": 752 }, { "epoch": 0.05, "grad_norm": 1.752914195951192, "learning_rate": 9.992157512046947e-06, "loss": 0.4489, "step": 753 }, { "epoch": 0.05, "grad_norm": 2.115485119650311, "learning_rate": 9.992100387119468e-06, "loss": 0.4452, "step": 754 }, { "epoch": 0.05, "grad_norm": 1.8854156175407226, "learning_rate": 9.992043055060876e-06, "loss": 0.4402, "step": 755 }, { "epoch": 0.05, "grad_norm": 3.7833261170781056, "learning_rate": 9.991985515873552e-06, "loss": 0.4629, "step": 756 }, { "epoch": 0.05, "grad_norm": 2.9153728045052216, "learning_rate": 9.991927769559882e-06, "loss": 0.4416, "step": 757 }, { "epoch": 0.05, "grad_norm": 4.783200603276375, "learning_rate": 9.991869816122262e-06, "loss": 0.4675, "step": 758 }, { "epoch": 0.05, "grad_norm": 1.4475071637171941, "learning_rate": 9.991811655563096e-06, "loss": 0.4638, "step": 759 }, { "epoch": 0.05, "grad_norm": 1.8403988649058063, "learning_rate": 9.991753287884797e-06, "loss": 0.4626, "step": 760 }, { "epoch": 0.05, "grad_norm": 2.055641677306497, "learning_rate": 9.99169471308979e-06, "loss": 0.4386, "step": 761 }, { "epoch": 0.05, "grad_norm": 2.063193682721805, "learning_rate": 9.991635931180504e-06, "loss": 0.4363, "step": 762 }, { "epoch": 0.05, "grad_norm": 2.7133765269253347, "learning_rate": 9.991576942159374e-06, "loss": 0.4595, "step": 763 }, { "epoch": 0.05, "grad_norm": 1.9417508822029572, "learning_rate": 9.991517746028851e-06, "loss": 0.5405, "step": 764 }, { "epoch": 0.05, "grad_norm": 2.548362709307829, "learning_rate": 9.991458342791394e-06, "loss": 0.4753, "step": 765 }, { "epoch": 0.05, "grad_norm": 1.7744718817859155, "learning_rate": 9.991398732449461e-06, "loss": 0.4488, "step": 766 }, { "epoch": 0.05, "grad_norm": 2.506629010419364, "learning_rate": 9.991338915005531e-06, "loss": 0.4439, "step": 767 }, { "epoch": 0.05, "grad_norm": 2.746150729627741, "learning_rate": 9.991278890462083e-06, "loss": 0.4498, "step": 768 }, { "epoch": 0.05, "grad_norm": 2.1926730472137432, "learning_rate": 9.991218658821609e-06, "loss": 0.4984, "step": 769 }, { "epoch": 0.05, "grad_norm": 1.846841983617649, "learning_rate": 9.991158220086606e-06, "loss": 0.4453, "step": 770 }, { "epoch": 0.05, "grad_norm": 1.907955257438044, "learning_rate": 9.991097574259583e-06, "loss": 0.446, "step": 771 }, { "epoch": 0.05, "grad_norm": 2.5396188166622897, "learning_rate": 9.991036721343058e-06, "loss": 0.4506, "step": 772 }, { "epoch": 0.05, "grad_norm": 1.9119711306037535, "learning_rate": 9.990975661339554e-06, "loss": 0.4549, "step": 773 }, { "epoch": 0.05, "grad_norm": 2.1540435742691395, "learning_rate": 9.990914394251605e-06, "loss": 0.434, "step": 774 }, { "epoch": 0.05, "grad_norm": 2.2126268391679975, "learning_rate": 9.990852920081753e-06, "loss": 0.4532, "step": 775 }, { "epoch": 0.05, "grad_norm": 3.0377088083931834, "learning_rate": 9.990791238832547e-06, "loss": 0.4291, "step": 776 }, { "epoch": 0.05, "grad_norm": 1.8288013685619777, "learning_rate": 9.99072935050655e-06, "loss": 0.4408, "step": 777 }, { "epoch": 0.05, "grad_norm": 1.781560694504767, "learning_rate": 9.990667255106326e-06, "loss": 0.4358, "step": 778 }, { "epoch": 0.05, "grad_norm": 1.9714217622609607, "learning_rate": 9.990604952634452e-06, "loss": 0.4844, "step": 779 }, { "epoch": 0.05, "grad_norm": 1.6237259170251106, "learning_rate": 9.990542443093518e-06, "loss": 0.4287, "step": 780 }, { "epoch": 0.05, "grad_norm": 2.0796091992235115, "learning_rate": 9.990479726486111e-06, "loss": 0.4779, "step": 781 }, { "epoch": 0.05, "grad_norm": 1.5816331887509902, "learning_rate": 9.990416802814838e-06, "loss": 0.4233, "step": 782 }, { "epoch": 0.05, "grad_norm": 2.198578168807564, "learning_rate": 9.990353672082307e-06, "loss": 0.4779, "step": 783 }, { "epoch": 0.05, "grad_norm": 2.31231238158569, "learning_rate": 9.990290334291137e-06, "loss": 0.4262, "step": 784 }, { "epoch": 0.05, "grad_norm": 1.7891061349189314, "learning_rate": 9.990226789443959e-06, "loss": 0.4686, "step": 785 }, { "epoch": 0.05, "grad_norm": 1.9574795554447961, "learning_rate": 9.990163037543407e-06, "loss": 0.451, "step": 786 }, { "epoch": 0.05, "grad_norm": 1.9366174622724797, "learning_rate": 9.990099078592128e-06, "loss": 0.4428, "step": 787 }, { "epoch": 0.05, "grad_norm": 3.047931041875674, "learning_rate": 9.990034912592774e-06, "loss": 0.4289, "step": 788 }, { "epoch": 0.05, "grad_norm": 3.2617071894422747, "learning_rate": 9.98997053954801e-06, "loss": 0.514, "step": 789 }, { "epoch": 0.05, "grad_norm": 1.6326084843093283, "learning_rate": 9.989905959460502e-06, "loss": 0.4517, "step": 790 }, { "epoch": 0.05, "grad_norm": 1.8286255469673063, "learning_rate": 9.989841172332932e-06, "loss": 0.4562, "step": 791 }, { "epoch": 0.05, "grad_norm": 1.398080419481011, "learning_rate": 9.989776178167991e-06, "loss": 0.422, "step": 792 }, { "epoch": 0.05, "grad_norm": 6.451760998367041, "learning_rate": 9.989710976968375e-06, "loss": 0.4161, "step": 793 }, { "epoch": 0.05, "grad_norm": 1.8979467286199225, "learning_rate": 9.989645568736786e-06, "loss": 0.4751, "step": 794 }, { "epoch": 0.05, "grad_norm": 3.3096835223998924, "learning_rate": 9.98957995347594e-06, "loss": 0.4297, "step": 795 }, { "epoch": 0.05, "grad_norm": 4.255040445356041, "learning_rate": 9.98951413118856e-06, "loss": 0.4463, "step": 796 }, { "epoch": 0.05, "grad_norm": 2.3613276402916803, "learning_rate": 9.989448101877375e-06, "loss": 0.466, "step": 797 }, { "epoch": 0.05, "grad_norm": 3.3231044170074253, "learning_rate": 9.989381865545128e-06, "loss": 0.4539, "step": 798 }, { "epoch": 0.05, "grad_norm": 1.7764446748922416, "learning_rate": 9.989315422194562e-06, "loss": 0.4217, "step": 799 }, { "epoch": 0.05, "grad_norm": 2.944426116231744, "learning_rate": 9.98924877182844e-06, "loss": 0.426, "step": 800 }, { "epoch": 0.05, "grad_norm": 1.413016157089324, "learning_rate": 9.989181914449523e-06, "loss": 0.4499, "step": 801 }, { "epoch": 0.05, "grad_norm": 1.8796354863110096, "learning_rate": 9.989114850060586e-06, "loss": 0.4537, "step": 802 }, { "epoch": 0.05, "grad_norm": 2.30681700893175, "learning_rate": 9.989047578664414e-06, "loss": 0.4339, "step": 803 }, { "epoch": 0.05, "grad_norm": 0.8686538396109308, "learning_rate": 9.988980100263796e-06, "loss": 0.468, "step": 804 }, { "epoch": 0.05, "grad_norm": 2.3910489222265263, "learning_rate": 9.988912414861531e-06, "loss": 0.4618, "step": 805 }, { "epoch": 0.05, "grad_norm": 2.0476596917988434, "learning_rate": 9.98884452246043e-06, "loss": 0.4335, "step": 806 }, { "epoch": 0.05, "grad_norm": 1.9450486515086292, "learning_rate": 9.988776423063309e-06, "loss": 0.4389, "step": 807 }, { "epoch": 0.05, "grad_norm": 2.002990635676293, "learning_rate": 9.988708116672991e-06, "loss": 0.4735, "step": 808 }, { "epoch": 0.05, "grad_norm": 3.92066119516322, "learning_rate": 9.988639603292315e-06, "loss": 0.3943, "step": 809 }, { "epoch": 0.05, "grad_norm": 1.7646101488911214, "learning_rate": 9.988570882924118e-06, "loss": 0.4199, "step": 810 }, { "epoch": 0.05, "grad_norm": 1.6045767909297195, "learning_rate": 9.988501955571257e-06, "loss": 0.4504, "step": 811 }, { "epoch": 0.05, "grad_norm": 1.7229917539663575, "learning_rate": 9.988432821236588e-06, "loss": 0.473, "step": 812 }, { "epoch": 0.05, "grad_norm": 2.726949425061288, "learning_rate": 9.98836347992298e-06, "loss": 0.4415, "step": 813 }, { "epoch": 0.05, "grad_norm": 2.338904531000487, "learning_rate": 9.988293931633312e-06, "loss": 0.4535, "step": 814 }, { "epoch": 0.05, "grad_norm": 2.609909544928862, "learning_rate": 9.988224176370468e-06, "loss": 0.4924, "step": 815 }, { "epoch": 0.05, "grad_norm": 2.219390158335007, "learning_rate": 9.988154214137345e-06, "loss": 0.4382, "step": 816 }, { "epoch": 0.05, "grad_norm": 2.407382476139027, "learning_rate": 9.98808404493684e-06, "loss": 0.4437, "step": 817 }, { "epoch": 0.05, "grad_norm": 1.7609613674124398, "learning_rate": 9.988013668771872e-06, "loss": 0.4226, "step": 818 }, { "epoch": 0.05, "grad_norm": 0.8812049522958381, "learning_rate": 9.987943085645355e-06, "loss": 0.5146, "step": 819 }, { "epoch": 0.05, "grad_norm": 1.9054287777952734, "learning_rate": 9.98787229556022e-06, "loss": 0.4275, "step": 820 }, { "epoch": 0.05, "grad_norm": 2.3132004263071244, "learning_rate": 9.987801298519404e-06, "loss": 0.4744, "step": 821 }, { "epoch": 0.05, "grad_norm": 2.808889943407105, "learning_rate": 9.987730094525854e-06, "loss": 0.4171, "step": 822 }, { "epoch": 0.05, "grad_norm": 4.472432442961767, "learning_rate": 9.987658683582522e-06, "loss": 0.4471, "step": 823 }, { "epoch": 0.05, "grad_norm": 1.818475863719265, "learning_rate": 9.98758706569237e-06, "loss": 0.4252, "step": 824 }, { "epoch": 0.05, "grad_norm": 2.308317465855414, "learning_rate": 9.987515240858375e-06, "loss": 0.4322, "step": 825 }, { "epoch": 0.05, "grad_norm": 1.9523186706404483, "learning_rate": 9.987443209083513e-06, "loss": 0.4434, "step": 826 }, { "epoch": 0.05, "grad_norm": 1.841784401817416, "learning_rate": 9.987370970370773e-06, "loss": 0.4176, "step": 827 }, { "epoch": 0.05, "grad_norm": 5.95961024478376, "learning_rate": 9.987298524723153e-06, "loss": 0.4287, "step": 828 }, { "epoch": 0.05, "grad_norm": 2.4942654397408663, "learning_rate": 9.98722587214366e-06, "loss": 0.4205, "step": 829 }, { "epoch": 0.05, "grad_norm": 2.350426410910016, "learning_rate": 9.987153012635305e-06, "loss": 0.4199, "step": 830 }, { "epoch": 0.05, "grad_norm": 4.12802650657785, "learning_rate": 9.987079946201114e-06, "loss": 0.4457, "step": 831 }, { "epoch": 0.05, "grad_norm": 1.8872701395081841, "learning_rate": 9.987006672844119e-06, "loss": 0.453, "step": 832 }, { "epoch": 0.05, "grad_norm": 18.343687856141752, "learning_rate": 9.98693319256736e-06, "loss": 0.3863, "step": 833 }, { "epoch": 0.05, "grad_norm": 2.1537149097587385, "learning_rate": 9.986859505373882e-06, "loss": 0.4922, "step": 834 }, { "epoch": 0.05, "grad_norm": 1.8026116562799057, "learning_rate": 9.986785611266749e-06, "loss": 0.4246, "step": 835 }, { "epoch": 0.05, "grad_norm": 1.8120638282725512, "learning_rate": 9.986711510249021e-06, "loss": 0.4441, "step": 836 }, { "epoch": 0.05, "grad_norm": 2.0593819798683164, "learning_rate": 9.986637202323777e-06, "loss": 0.3827, "step": 837 }, { "epoch": 0.05, "grad_norm": 3.4497291118023137, "learning_rate": 9.986562687494096e-06, "loss": 0.4348, "step": 838 }, { "epoch": 0.05, "grad_norm": 2.510626177549304, "learning_rate": 9.986487965763073e-06, "loss": 0.4411, "step": 839 }, { "epoch": 0.05, "grad_norm": 1.9839747214140684, "learning_rate": 9.986413037133806e-06, "loss": 0.4852, "step": 840 }, { "epoch": 0.05, "grad_norm": 6.030492457333062, "learning_rate": 9.986337901609407e-06, "loss": 0.4245, "step": 841 }, { "epoch": 0.05, "grad_norm": 0.9773631136619981, "learning_rate": 9.986262559192992e-06, "loss": 0.4883, "step": 842 }, { "epoch": 0.05, "grad_norm": 3.7266203884732154, "learning_rate": 9.986187009887685e-06, "loss": 0.4491, "step": 843 }, { "epoch": 0.05, "grad_norm": 2.7682562026424797, "learning_rate": 9.986111253696625e-06, "loss": 0.4583, "step": 844 }, { "epoch": 0.05, "grad_norm": 2.088482057876728, "learning_rate": 9.98603529062295e-06, "loss": 0.4289, "step": 845 }, { "epoch": 0.05, "grad_norm": 2.249553220163194, "learning_rate": 9.985959120669816e-06, "loss": 0.4257, "step": 846 }, { "epoch": 0.05, "grad_norm": 2.1260195024993584, "learning_rate": 9.985882743840383e-06, "loss": 0.412, "step": 847 }, { "epoch": 0.05, "grad_norm": 1.6891161433123363, "learning_rate": 9.985806160137816e-06, "loss": 0.4203, "step": 848 }, { "epoch": 0.05, "grad_norm": 1.8324367024281996, "learning_rate": 9.985729369565299e-06, "loss": 0.4353, "step": 849 }, { "epoch": 0.05, "grad_norm": 8.344083468436015, "learning_rate": 9.985652372126013e-06, "loss": 0.4629, "step": 850 }, { "epoch": 0.05, "grad_norm": 1.7443403482437772, "learning_rate": 9.985575167823154e-06, "loss": 0.4648, "step": 851 }, { "epoch": 0.05, "grad_norm": 2.2278136312942904, "learning_rate": 9.985497756659927e-06, "loss": 0.4308, "step": 852 }, { "epoch": 0.05, "grad_norm": 2.054966974169479, "learning_rate": 9.985420138639543e-06, "loss": 0.4502, "step": 853 }, { "epoch": 0.05, "grad_norm": 2.7331177037284515, "learning_rate": 9.985342313765223e-06, "loss": 0.4313, "step": 854 }, { "epoch": 0.05, "grad_norm": 1.8738889044291418, "learning_rate": 9.985264282040195e-06, "loss": 0.4138, "step": 855 }, { "epoch": 0.05, "grad_norm": 4.393667567747854, "learning_rate": 9.985186043467697e-06, "loss": 0.431, "step": 856 }, { "epoch": 0.05, "grad_norm": 2.335268424978182, "learning_rate": 9.985107598050973e-06, "loss": 0.4578, "step": 857 }, { "epoch": 0.05, "grad_norm": 2.31628435357004, "learning_rate": 9.985028945793282e-06, "loss": 0.4262, "step": 858 }, { "epoch": 0.05, "grad_norm": 2.6258180779200866, "learning_rate": 9.984950086697886e-06, "loss": 0.4547, "step": 859 }, { "epoch": 0.05, "grad_norm": 4.423719826873862, "learning_rate": 9.984871020768056e-06, "loss": 0.4663, "step": 860 }, { "epoch": 0.05, "grad_norm": 1.8648510078726412, "learning_rate": 9.984791748007074e-06, "loss": 0.4288, "step": 861 }, { "epoch": 0.05, "grad_norm": 1.9127701198918514, "learning_rate": 9.984712268418228e-06, "loss": 0.4648, "step": 862 }, { "epoch": 0.05, "grad_norm": 7.992699198148132, "learning_rate": 9.984632582004817e-06, "loss": 0.4029, "step": 863 }, { "epoch": 0.05, "grad_norm": 3.4562612420210286, "learning_rate": 9.984552688770145e-06, "loss": 0.4631, "step": 864 }, { "epoch": 0.05, "grad_norm": 3.197226952886568, "learning_rate": 9.984472588717528e-06, "loss": 0.4876, "step": 865 }, { "epoch": 0.05, "grad_norm": 2.444824121345561, "learning_rate": 9.984392281850293e-06, "loss": 0.4299, "step": 866 }, { "epoch": 0.05, "grad_norm": 2.1793522233262013, "learning_rate": 9.984311768171766e-06, "loss": 0.4564, "step": 867 }, { "epoch": 0.05, "grad_norm": 2.162687932448794, "learning_rate": 9.984231047685292e-06, "loss": 0.4551, "step": 868 }, { "epoch": 0.05, "grad_norm": 2.997032886043869, "learning_rate": 9.984150120394219e-06, "loss": 0.4321, "step": 869 }, { "epoch": 0.05, "grad_norm": 1.7485735587581321, "learning_rate": 9.984068986301902e-06, "loss": 0.4042, "step": 870 }, { "epoch": 0.05, "grad_norm": 2.026706024737377, "learning_rate": 9.983987645411712e-06, "loss": 0.4477, "step": 871 }, { "epoch": 0.05, "grad_norm": 2.288813397372106, "learning_rate": 9.983906097727023e-06, "loss": 0.4386, "step": 872 }, { "epoch": 0.05, "grad_norm": 1.6025153100994587, "learning_rate": 9.983824343251216e-06, "loss": 0.4004, "step": 873 }, { "epoch": 0.05, "grad_norm": 2.700468949812591, "learning_rate": 9.983742381987685e-06, "loss": 0.4663, "step": 874 }, { "epoch": 0.06, "grad_norm": 3.6293938910155337, "learning_rate": 9.983660213939832e-06, "loss": 0.4464, "step": 875 }, { "epoch": 0.06, "grad_norm": 2.527513601607223, "learning_rate": 9.983577839111062e-06, "loss": 0.4658, "step": 876 }, { "epoch": 0.06, "grad_norm": 1.9417977450619628, "learning_rate": 9.983495257504799e-06, "loss": 0.4022, "step": 877 }, { "epoch": 0.06, "grad_norm": 2.195997308949818, "learning_rate": 9.983412469124462e-06, "loss": 0.4557, "step": 878 }, { "epoch": 0.06, "grad_norm": 1.3393453275261773, "learning_rate": 9.983329473973494e-06, "loss": 0.5276, "step": 879 }, { "epoch": 0.06, "grad_norm": 2.1995698085811766, "learning_rate": 9.983246272055333e-06, "loss": 0.4331, "step": 880 }, { "epoch": 0.06, "grad_norm": 1.8356954093128823, "learning_rate": 9.983162863373433e-06, "loss": 0.4788, "step": 881 }, { "epoch": 0.06, "grad_norm": 2.297221437287468, "learning_rate": 9.983079247931255e-06, "loss": 0.4292, "step": 882 }, { "epoch": 0.06, "grad_norm": 2.57184333886103, "learning_rate": 9.982995425732267e-06, "loss": 0.4302, "step": 883 }, { "epoch": 0.06, "grad_norm": 26.614849061559507, "learning_rate": 9.982911396779949e-06, "loss": 0.4599, "step": 884 }, { "epoch": 0.06, "grad_norm": 1.812496817272605, "learning_rate": 9.982827161077787e-06, "loss": 0.436, "step": 885 }, { "epoch": 0.06, "grad_norm": 1.850573999627721, "learning_rate": 9.982742718629275e-06, "loss": 0.4297, "step": 886 }, { "epoch": 0.06, "grad_norm": 2.1018236009745332, "learning_rate": 9.982658069437916e-06, "loss": 0.409, "step": 887 }, { "epoch": 0.06, "grad_norm": 4.761668117737086, "learning_rate": 9.982573213507225e-06, "loss": 0.4194, "step": 888 }, { "epoch": 0.06, "grad_norm": 2.1370168746072733, "learning_rate": 9.982488150840722e-06, "loss": 0.4014, "step": 889 }, { "epoch": 0.06, "grad_norm": 1.9043261773199873, "learning_rate": 9.982402881441933e-06, "loss": 0.3951, "step": 890 }, { "epoch": 0.06, "grad_norm": 1.6126271997581667, "learning_rate": 9.9823174053144e-06, "loss": 0.4129, "step": 891 }, { "epoch": 0.06, "grad_norm": 1.9991985625527473, "learning_rate": 9.982231722461669e-06, "loss": 0.4392, "step": 892 }, { "epoch": 0.06, "grad_norm": 2.043952618343807, "learning_rate": 9.982145832887294e-06, "loss": 0.5015, "step": 893 }, { "epoch": 0.06, "grad_norm": 1.939395607295308, "learning_rate": 9.98205973659484e-06, "loss": 0.4329, "step": 894 }, { "epoch": 0.06, "grad_norm": 1.6488668660224204, "learning_rate": 9.981973433587876e-06, "loss": 0.426, "step": 895 }, { "epoch": 0.06, "grad_norm": 1.5073712420631002, "learning_rate": 9.981886923869988e-06, "loss": 0.3905, "step": 896 }, { "epoch": 0.06, "grad_norm": 1.8621661400270306, "learning_rate": 9.981800207444762e-06, "loss": 0.4081, "step": 897 }, { "epoch": 0.06, "grad_norm": 2.1146298803252797, "learning_rate": 9.981713284315796e-06, "loss": 0.4397, "step": 898 }, { "epoch": 0.06, "grad_norm": 1.2228346828692689, "learning_rate": 9.981626154486696e-06, "loss": 0.5177, "step": 899 }, { "epoch": 0.06, "grad_norm": 3.4933527790971017, "learning_rate": 9.981538817961082e-06, "loss": 0.4235, "step": 900 }, { "epoch": 0.06, "grad_norm": 0.8595059330336282, "learning_rate": 9.981451274742572e-06, "loss": 0.4814, "step": 901 }, { "epoch": 0.06, "grad_norm": 1.7674160092007893, "learning_rate": 9.9813635248348e-06, "loss": 0.4082, "step": 902 }, { "epoch": 0.06, "grad_norm": 3.783556821691551, "learning_rate": 9.98127556824141e-06, "loss": 0.4332, "step": 903 }, { "epoch": 0.06, "grad_norm": 1.6950369875321318, "learning_rate": 9.981187404966047e-06, "loss": 0.426, "step": 904 }, { "epoch": 0.06, "grad_norm": 2.4850559204818143, "learning_rate": 9.98109903501237e-06, "loss": 0.4538, "step": 905 }, { "epoch": 0.06, "grad_norm": 3.6481703676970185, "learning_rate": 9.981010458384048e-06, "loss": 0.4888, "step": 906 }, { "epoch": 0.06, "grad_norm": 13.853290704029964, "learning_rate": 9.980921675084755e-06, "loss": 0.448, "step": 907 }, { "epoch": 0.06, "grad_norm": 3.5318247712162347, "learning_rate": 9.980832685118173e-06, "loss": 0.4558, "step": 908 }, { "epoch": 0.06, "grad_norm": 1.614367986623324, "learning_rate": 9.980743488487999e-06, "loss": 0.4599, "step": 909 }, { "epoch": 0.06, "grad_norm": 4.1062925944638415, "learning_rate": 9.980654085197928e-06, "loss": 0.4321, "step": 910 }, { "epoch": 0.06, "grad_norm": 1.7061385243800808, "learning_rate": 9.980564475251673e-06, "loss": 0.4315, "step": 911 }, { "epoch": 0.06, "grad_norm": 2.262690557515717, "learning_rate": 9.980474658652953e-06, "loss": 0.4286, "step": 912 }, { "epoch": 0.06, "grad_norm": 2.435930689025621, "learning_rate": 9.98038463540549e-06, "loss": 0.426, "step": 913 }, { "epoch": 0.06, "grad_norm": 1.8404883744451876, "learning_rate": 9.980294405513024e-06, "loss": 0.4525, "step": 914 }, { "epoch": 0.06, "grad_norm": 2.6347995801011797, "learning_rate": 9.980203968979298e-06, "loss": 0.4543, "step": 915 }, { "epoch": 0.06, "grad_norm": 1.9011771571903395, "learning_rate": 9.980113325808062e-06, "loss": 0.4417, "step": 916 }, { "epoch": 0.06, "grad_norm": 1.625756947977176, "learning_rate": 9.980022476003081e-06, "loss": 0.4301, "step": 917 }, { "epoch": 0.06, "grad_norm": 2.4732928132745444, "learning_rate": 9.97993141956812e-06, "loss": 0.4579, "step": 918 }, { "epoch": 0.06, "grad_norm": 3.1499894763709215, "learning_rate": 9.979840156506959e-06, "loss": 0.4579, "step": 919 }, { "epoch": 0.06, "grad_norm": 1.8541051688817138, "learning_rate": 9.979748686823386e-06, "loss": 0.4396, "step": 920 }, { "epoch": 0.06, "grad_norm": 1.4980284504653731, "learning_rate": 9.979657010521194e-06, "loss": 0.4325, "step": 921 }, { "epoch": 0.06, "grad_norm": 2.016309885443545, "learning_rate": 9.979565127604186e-06, "loss": 0.4334, "step": 922 }, { "epoch": 0.06, "grad_norm": 2.6505891850016337, "learning_rate": 9.97947303807618e-06, "loss": 0.4258, "step": 923 }, { "epoch": 0.06, "grad_norm": 1.7367054042675574, "learning_rate": 9.97938074194099e-06, "loss": 0.5969, "step": 924 }, { "epoch": 0.06, "grad_norm": 2.0760989197594975, "learning_rate": 9.97928823920245e-06, "loss": 0.4242, "step": 925 }, { "epoch": 0.06, "grad_norm": 2.921672511976158, "learning_rate": 9.979195529864397e-06, "loss": 0.4731, "step": 926 }, { "epoch": 0.06, "grad_norm": 4.157068536120661, "learning_rate": 9.979102613930676e-06, "loss": 0.469, "step": 927 }, { "epoch": 0.06, "grad_norm": 1.628234387008205, "learning_rate": 9.979009491405145e-06, "loss": 0.4292, "step": 928 }, { "epoch": 0.06, "grad_norm": 1.6049554572463292, "learning_rate": 9.978916162291666e-06, "loss": 0.4482, "step": 929 }, { "epoch": 0.06, "grad_norm": 2.037473235289058, "learning_rate": 9.978822626594112e-06, "loss": 0.4495, "step": 930 }, { "epoch": 0.06, "grad_norm": 10.896781965707907, "learning_rate": 9.978728884316363e-06, "loss": 0.4556, "step": 931 }, { "epoch": 0.06, "grad_norm": 1.8908137079365919, "learning_rate": 9.97863493546231e-06, "loss": 0.4368, "step": 932 }, { "epoch": 0.06, "grad_norm": 2.5531223267122365, "learning_rate": 9.97854078003585e-06, "loss": 0.429, "step": 933 }, { "epoch": 0.06, "grad_norm": 7.149434927746202, "learning_rate": 9.97844641804089e-06, "loss": 0.4543, "step": 934 }, { "epoch": 0.06, "grad_norm": 2.4435713127937873, "learning_rate": 9.978351849481347e-06, "loss": 0.4593, "step": 935 }, { "epoch": 0.06, "grad_norm": 2.406142026912929, "learning_rate": 9.978257074361142e-06, "loss": 0.4534, "step": 936 }, { "epoch": 0.06, "grad_norm": 2.09372384551579, "learning_rate": 9.97816209268421e-06, "loss": 0.4175, "step": 937 }, { "epoch": 0.06, "grad_norm": 2.0932196752914467, "learning_rate": 9.978066904454489e-06, "loss": 0.4157, "step": 938 }, { "epoch": 0.06, "grad_norm": 2.9765605268754234, "learning_rate": 9.97797150967593e-06, "loss": 0.4274, "step": 939 }, { "epoch": 0.06, "grad_norm": 2.193357439145338, "learning_rate": 9.977875908352493e-06, "loss": 0.4479, "step": 940 }, { "epoch": 0.06, "grad_norm": 2.1720783248148368, "learning_rate": 9.977780100488142e-06, "loss": 0.4759, "step": 941 }, { "epoch": 0.06, "grad_norm": 10.947354107383864, "learning_rate": 9.977684086086853e-06, "loss": 0.4512, "step": 942 }, { "epoch": 0.06, "grad_norm": 3.8346989509604015, "learning_rate": 9.977587865152609e-06, "loss": 0.4239, "step": 943 }, { "epoch": 0.06, "grad_norm": 2.7483032110461862, "learning_rate": 9.977491437689403e-06, "loss": 0.4594, "step": 944 }, { "epoch": 0.06, "grad_norm": 2.0645624753941836, "learning_rate": 9.977394803701238e-06, "loss": 0.4333, "step": 945 }, { "epoch": 0.06, "grad_norm": 1.7286216204365414, "learning_rate": 9.97729796319212e-06, "loss": 0.4436, "step": 946 }, { "epoch": 0.06, "grad_norm": 10.362368679846375, "learning_rate": 9.97720091616607e-06, "loss": 0.4532, "step": 947 }, { "epoch": 0.06, "grad_norm": 2.3733509548490734, "learning_rate": 9.977103662627112e-06, "loss": 0.4085, "step": 948 }, { "epoch": 0.06, "grad_norm": 7.704821619867478, "learning_rate": 9.977006202579284e-06, "loss": 0.4379, "step": 949 }, { "epoch": 0.06, "grad_norm": 5.910476077056555, "learning_rate": 9.976908536026628e-06, "loss": 0.4538, "step": 950 }, { "epoch": 0.06, "grad_norm": 2.498882365552425, "learning_rate": 9.976810662973198e-06, "loss": 0.4737, "step": 951 }, { "epoch": 0.06, "grad_norm": 2.8904724739380527, "learning_rate": 9.976712583423053e-06, "loss": 0.4469, "step": 952 }, { "epoch": 0.06, "grad_norm": 2.05992217119996, "learning_rate": 9.976614297380264e-06, "loss": 0.4107, "step": 953 }, { "epoch": 0.06, "grad_norm": 2.9441060804384573, "learning_rate": 9.976515804848907e-06, "loss": 0.4377, "step": 954 }, { "epoch": 0.06, "grad_norm": 2.1253849536855154, "learning_rate": 9.97641710583307e-06, "loss": 0.4028, "step": 955 }, { "epoch": 0.06, "grad_norm": 2.6826962564161625, "learning_rate": 9.97631820033685e-06, "loss": 0.4501, "step": 956 }, { "epoch": 0.06, "grad_norm": 1.9882186886883442, "learning_rate": 9.976219088364347e-06, "loss": 0.4102, "step": 957 }, { "epoch": 0.06, "grad_norm": 2.3046200112231214, "learning_rate": 9.976119769919677e-06, "loss": 0.4329, "step": 958 }, { "epoch": 0.06, "grad_norm": 2.461627477595595, "learning_rate": 9.976020245006957e-06, "loss": 0.4451, "step": 959 }, { "epoch": 0.06, "grad_norm": 0.8276651557840978, "learning_rate": 9.97592051363032e-06, "loss": 0.5161, "step": 960 }, { "epoch": 0.06, "grad_norm": 2.5219751753702964, "learning_rate": 9.975820575793902e-06, "loss": 0.4213, "step": 961 }, { "epoch": 0.06, "grad_norm": 2.6107616287879676, "learning_rate": 9.975720431501851e-06, "loss": 0.427, "step": 962 }, { "epoch": 0.06, "grad_norm": 1.973225956565384, "learning_rate": 9.975620080758321e-06, "loss": 0.431, "step": 963 }, { "epoch": 0.06, "grad_norm": 3.6719909709611906, "learning_rate": 9.975519523567477e-06, "loss": 0.4253, "step": 964 }, { "epoch": 0.06, "grad_norm": 2.450332923063095, "learning_rate": 9.97541875993349e-06, "loss": 0.4618, "step": 965 }, { "epoch": 0.06, "grad_norm": 2.2104242610244804, "learning_rate": 9.97531778986054e-06, "loss": 0.4643, "step": 966 }, { "epoch": 0.06, "grad_norm": 9.771495474982805, "learning_rate": 9.975216613352818e-06, "loss": 0.44, "step": 967 }, { "epoch": 0.06, "grad_norm": 1.79462285334802, "learning_rate": 9.975115230414524e-06, "loss": 0.4188, "step": 968 }, { "epoch": 0.06, "grad_norm": 4.115747433755278, "learning_rate": 9.97501364104986e-06, "loss": 0.4444, "step": 969 }, { "epoch": 0.06, "grad_norm": 2.6015233518485714, "learning_rate": 9.974911845263045e-06, "loss": 0.4033, "step": 970 }, { "epoch": 0.06, "grad_norm": 2.4811623886152963, "learning_rate": 9.974809843058299e-06, "loss": 0.4037, "step": 971 }, { "epoch": 0.06, "grad_norm": 2.290643090711748, "learning_rate": 9.974707634439858e-06, "loss": 0.4017, "step": 972 }, { "epoch": 0.06, "grad_norm": 1.8200089712297691, "learning_rate": 9.974605219411962e-06, "loss": 0.4622, "step": 973 }, { "epoch": 0.06, "grad_norm": 1.3624872532897834, "learning_rate": 9.974502597978858e-06, "loss": 0.4331, "step": 974 }, { "epoch": 0.06, "grad_norm": 5.27210189356096, "learning_rate": 9.974399770144807e-06, "loss": 0.4147, "step": 975 }, { "epoch": 0.06, "grad_norm": 2.4383478828633045, "learning_rate": 9.974296735914072e-06, "loss": 0.4111, "step": 976 }, { "epoch": 0.06, "grad_norm": 2.056648793814065, "learning_rate": 9.974193495290931e-06, "loss": 0.44, "step": 977 }, { "epoch": 0.06, "grad_norm": 1.879102672129695, "learning_rate": 9.974090048279666e-06, "loss": 0.4472, "step": 978 }, { "epoch": 0.06, "grad_norm": 1.8466662539655996, "learning_rate": 9.973986394884571e-06, "loss": 0.4116, "step": 979 }, { "epoch": 0.06, "grad_norm": 2.037496370016901, "learning_rate": 9.973882535109944e-06, "loss": 0.4803, "step": 980 }, { "epoch": 0.06, "grad_norm": 1.9611846114870142, "learning_rate": 9.973778468960099e-06, "loss": 0.4179, "step": 981 }, { "epoch": 0.06, "grad_norm": 3.0710228622316924, "learning_rate": 9.973674196439349e-06, "loss": 0.4056, "step": 982 }, { "epoch": 0.06, "grad_norm": 1.8868523869531129, "learning_rate": 9.973569717552022e-06, "loss": 0.4231, "step": 983 }, { "epoch": 0.06, "grad_norm": 3.5643636961505556, "learning_rate": 9.973465032302455e-06, "loss": 0.4403, "step": 984 }, { "epoch": 0.06, "grad_norm": 1.8418721685888957, "learning_rate": 9.973360140694987e-06, "loss": 0.4365, "step": 985 }, { "epoch": 0.06, "grad_norm": 5.678896806142568, "learning_rate": 9.973255042733976e-06, "loss": 0.3718, "step": 986 }, { "epoch": 0.06, "grad_norm": 2.2221814726497073, "learning_rate": 9.973149738423779e-06, "loss": 0.405, "step": 987 }, { "epoch": 0.06, "grad_norm": 2.2032963475640974, "learning_rate": 9.973044227768765e-06, "loss": 0.4615, "step": 988 }, { "epoch": 0.06, "grad_norm": 1.6657263537472482, "learning_rate": 9.972938510773313e-06, "loss": 0.4407, "step": 989 }, { "epoch": 0.06, "grad_norm": 3.7025416284652275, "learning_rate": 9.972832587441811e-06, "loss": 0.4401, "step": 990 }, { "epoch": 0.06, "grad_norm": 2.0383827337350766, "learning_rate": 9.972726457778651e-06, "loss": 0.4169, "step": 991 }, { "epoch": 0.06, "grad_norm": 2.36485014621816, "learning_rate": 9.972620121788238e-06, "loss": 0.4141, "step": 992 }, { "epoch": 0.06, "grad_norm": 1.4310161874528062, "learning_rate": 9.972513579474982e-06, "loss": 0.4076, "step": 993 }, { "epoch": 0.06, "grad_norm": 4.305654451233364, "learning_rate": 9.97240683084331e-06, "loss": 0.431, "step": 994 }, { "epoch": 0.06, "grad_norm": 2.1923634812750334, "learning_rate": 9.972299875897641e-06, "loss": 0.3946, "step": 995 }, { "epoch": 0.06, "grad_norm": 1.7901803929292885, "learning_rate": 9.972192714642422e-06, "loss": 0.4345, "step": 996 }, { "epoch": 0.06, "grad_norm": 1.465272511542452, "learning_rate": 9.972085347082094e-06, "loss": 0.4202, "step": 997 }, { "epoch": 0.06, "grad_norm": 1.5262679006939772, "learning_rate": 9.971977773221115e-06, "loss": 0.4095, "step": 998 }, { "epoch": 0.06, "grad_norm": 1.4850430263095706, "learning_rate": 9.971869993063947e-06, "loss": 0.4032, "step": 999 }, { "epoch": 0.06, "grad_norm": 1.4255973979231296, "learning_rate": 9.97176200661506e-06, "loss": 0.4027, "step": 1000 }, { "epoch": 0.06, "grad_norm": 0.9043882780359147, "learning_rate": 9.971653813878937e-06, "loss": 0.4484, "step": 1001 }, { "epoch": 0.06, "grad_norm": 2.1930700474658518, "learning_rate": 9.971545414860067e-06, "loss": 0.4517, "step": 1002 }, { "epoch": 0.06, "grad_norm": 2.1065434574616355, "learning_rate": 9.971436809562948e-06, "loss": 0.4328, "step": 1003 }, { "epoch": 0.06, "grad_norm": 1.7849230866270132, "learning_rate": 9.971327997992085e-06, "loss": 0.4287, "step": 1004 }, { "epoch": 0.06, "grad_norm": 2.079922648594418, "learning_rate": 9.971218980151993e-06, "loss": 0.4515, "step": 1005 }, { "epoch": 0.06, "grad_norm": 2.026995958465551, "learning_rate": 9.971109756047197e-06, "loss": 0.4135, "step": 1006 }, { "epoch": 0.06, "grad_norm": 2.3688983241560737, "learning_rate": 9.971000325682225e-06, "loss": 0.4248, "step": 1007 }, { "epoch": 0.06, "grad_norm": 1.4013386275445587, "learning_rate": 9.970890689061622e-06, "loss": 0.4466, "step": 1008 }, { "epoch": 0.06, "grad_norm": 1.4509037141099472, "learning_rate": 9.970780846189934e-06, "loss": 0.4171, "step": 1009 }, { "epoch": 0.06, "grad_norm": 1.894729520330879, "learning_rate": 9.970670797071719e-06, "loss": 0.4144, "step": 1010 }, { "epoch": 0.06, "grad_norm": 4.0500302607849745, "learning_rate": 9.970560541711547e-06, "loss": 0.4362, "step": 1011 }, { "epoch": 0.06, "grad_norm": 1.8937281240678299, "learning_rate": 9.970450080113988e-06, "loss": 0.4361, "step": 1012 }, { "epoch": 0.06, "grad_norm": 2.432129687251775, "learning_rate": 9.970339412283624e-06, "loss": 0.4471, "step": 1013 }, { "epoch": 0.06, "grad_norm": 1.7322069181436954, "learning_rate": 9.970228538225051e-06, "loss": 0.4365, "step": 1014 }, { "epoch": 0.06, "grad_norm": 1.5036771104025033, "learning_rate": 9.97011745794287e-06, "loss": 0.4034, "step": 1015 }, { "epoch": 0.06, "grad_norm": 2.4627252751655098, "learning_rate": 9.970006171441684e-06, "loss": 0.4404, "step": 1016 }, { "epoch": 0.06, "grad_norm": 2.5231895430952793, "learning_rate": 9.969894678726118e-06, "loss": 0.3948, "step": 1017 }, { "epoch": 0.06, "grad_norm": 1.5009359351294047, "learning_rate": 9.969782979800791e-06, "loss": 0.428, "step": 1018 }, { "epoch": 0.06, "grad_norm": 1.6515350137500342, "learning_rate": 9.969671074670341e-06, "loss": 0.4681, "step": 1019 }, { "epoch": 0.06, "grad_norm": 2.1735423046277487, "learning_rate": 9.969558963339414e-06, "loss": 0.4073, "step": 1020 }, { "epoch": 0.06, "grad_norm": 1.6301082636370194, "learning_rate": 9.969446645812656e-06, "loss": 0.4251, "step": 1021 }, { "epoch": 0.06, "grad_norm": 3.353792866562552, "learning_rate": 9.969334122094732e-06, "loss": 0.4101, "step": 1022 }, { "epoch": 0.06, "grad_norm": 2.4371540032356265, "learning_rate": 9.969221392190307e-06, "loss": 0.497, "step": 1023 }, { "epoch": 0.06, "grad_norm": 1.8390282407206398, "learning_rate": 9.96910845610406e-06, "loss": 0.4329, "step": 1024 }, { "epoch": 0.06, "grad_norm": 2.9311063342477017, "learning_rate": 9.968995313840678e-06, "loss": 0.4064, "step": 1025 }, { "epoch": 0.06, "grad_norm": 2.249935201981566, "learning_rate": 9.968881965404855e-06, "loss": 0.3997, "step": 1026 }, { "epoch": 0.06, "grad_norm": 1.5217798343803648, "learning_rate": 9.968768410801292e-06, "loss": 0.4276, "step": 1027 }, { "epoch": 0.06, "grad_norm": 1.3442816268055013, "learning_rate": 9.968654650034702e-06, "loss": 0.4113, "step": 1028 }, { "epoch": 0.06, "grad_norm": 1.8157652085876863, "learning_rate": 9.968540683109806e-06, "loss": 0.4587, "step": 1029 }, { "epoch": 0.06, "grad_norm": 1.5606612589384359, "learning_rate": 9.968426510031333e-06, "loss": 0.397, "step": 1030 }, { "epoch": 0.06, "grad_norm": 3.1053549705834635, "learning_rate": 9.968312130804019e-06, "loss": 0.395, "step": 1031 }, { "epoch": 0.06, "grad_norm": 2.1756119353760828, "learning_rate": 9.968197545432608e-06, "loss": 0.4166, "step": 1032 }, { "epoch": 0.06, "grad_norm": 2.0672867495576734, "learning_rate": 9.968082753921857e-06, "loss": 0.4351, "step": 1033 }, { "epoch": 0.07, "grad_norm": 2.497903565945721, "learning_rate": 9.967967756276528e-06, "loss": 0.4164, "step": 1034 }, { "epoch": 0.07, "grad_norm": 1.6953064491487724, "learning_rate": 9.967852552501394e-06, "loss": 0.453, "step": 1035 }, { "epoch": 0.07, "grad_norm": 3.212831139459771, "learning_rate": 9.967737142601233e-06, "loss": 0.4428, "step": 1036 }, { "epoch": 0.07, "grad_norm": 9.885711416467117, "learning_rate": 9.967621526580836e-06, "loss": 0.442, "step": 1037 }, { "epoch": 0.07, "grad_norm": 4.419841719673243, "learning_rate": 9.967505704444996e-06, "loss": 0.4371, "step": 1038 }, { "epoch": 0.07, "grad_norm": 2.1092049370758943, "learning_rate": 9.967389676198522e-06, "loss": 0.4586, "step": 1039 }, { "epoch": 0.07, "grad_norm": 1.5785517336382928, "learning_rate": 9.967273441846227e-06, "loss": 0.4596, "step": 1040 }, { "epoch": 0.07, "grad_norm": 2.1897340497462054, "learning_rate": 9.967157001392933e-06, "loss": 0.4071, "step": 1041 }, { "epoch": 0.07, "grad_norm": 1.7182912408548263, "learning_rate": 9.967040354843473e-06, "loss": 0.4325, "step": 1042 }, { "epoch": 0.07, "grad_norm": 1.9268781515312112, "learning_rate": 9.966923502202688e-06, "loss": 0.4316, "step": 1043 }, { "epoch": 0.07, "grad_norm": 2.8581954392547293, "learning_rate": 9.966806443475423e-06, "loss": 0.4576, "step": 1044 }, { "epoch": 0.07, "grad_norm": 2.2620286989720024, "learning_rate": 9.966689178666537e-06, "loss": 0.4151, "step": 1045 }, { "epoch": 0.07, "grad_norm": 1.9879301501050124, "learning_rate": 9.966571707780894e-06, "loss": 0.3877, "step": 1046 }, { "epoch": 0.07, "grad_norm": 1.5951202653168173, "learning_rate": 9.96645403082337e-06, "loss": 0.4672, "step": 1047 }, { "epoch": 0.07, "grad_norm": 1.892616323634653, "learning_rate": 9.966336147798848e-06, "loss": 0.436, "step": 1048 }, { "epoch": 0.07, "grad_norm": 3.5057467579778137, "learning_rate": 9.966218058712218e-06, "loss": 0.4144, "step": 1049 }, { "epoch": 0.07, "grad_norm": 2.5211997236715327, "learning_rate": 9.966099763568377e-06, "loss": 0.3761, "step": 1050 }, { "epoch": 0.07, "grad_norm": 2.036891383244653, "learning_rate": 9.96598126237224e-06, "loss": 0.4391, "step": 1051 }, { "epoch": 0.07, "grad_norm": 3.3484219984213954, "learning_rate": 9.965862555128717e-06, "loss": 0.3985, "step": 1052 }, { "epoch": 0.07, "grad_norm": 3.5805476026012735, "learning_rate": 9.965743641842737e-06, "loss": 0.4209, "step": 1053 }, { "epoch": 0.07, "grad_norm": 8.843236713409933, "learning_rate": 9.965624522519233e-06, "loss": 0.4373, "step": 1054 }, { "epoch": 0.07, "grad_norm": 2.0949125824287727, "learning_rate": 9.965505197163148e-06, "loss": 0.4292, "step": 1055 }, { "epoch": 0.07, "grad_norm": 2.22939719849107, "learning_rate": 9.965385665779432e-06, "loss": 0.4244, "step": 1056 }, { "epoch": 0.07, "grad_norm": 1.6796170096341314, "learning_rate": 9.965265928373045e-06, "loss": 0.3717, "step": 1057 }, { "epoch": 0.07, "grad_norm": 1.8208949409810051, "learning_rate": 9.965145984948956e-06, "loss": 0.4295, "step": 1058 }, { "epoch": 0.07, "grad_norm": 1.6048724862682364, "learning_rate": 9.96502583551214e-06, "loss": 0.4128, "step": 1059 }, { "epoch": 0.07, "grad_norm": 2.182544860851816, "learning_rate": 9.964905480067585e-06, "loss": 0.4421, "step": 1060 }, { "epoch": 0.07, "grad_norm": 2.8770526281876805, "learning_rate": 9.964784918620284e-06, "loss": 0.4144, "step": 1061 }, { "epoch": 0.07, "grad_norm": 2.0502433368725024, "learning_rate": 9.964664151175235e-06, "loss": 0.4149, "step": 1062 }, { "epoch": 0.07, "grad_norm": 1.6852493174725927, "learning_rate": 9.964543177737453e-06, "loss": 0.4279, "step": 1063 }, { "epoch": 0.07, "grad_norm": 2.3863314076720705, "learning_rate": 9.964421998311957e-06, "loss": 0.4309, "step": 1064 }, { "epoch": 0.07, "grad_norm": 1.9774735470449674, "learning_rate": 9.964300612903775e-06, "loss": 0.3857, "step": 1065 }, { "epoch": 0.07, "grad_norm": 1.7865301839072318, "learning_rate": 9.964179021517943e-06, "loss": 0.4374, "step": 1066 }, { "epoch": 0.07, "grad_norm": 1.9105237189090487, "learning_rate": 9.964057224159505e-06, "loss": 0.434, "step": 1067 }, { "epoch": 0.07, "grad_norm": 1.0308961812961612, "learning_rate": 9.963935220833516e-06, "loss": 0.5032, "step": 1068 }, { "epoch": 0.07, "grad_norm": 6.567470420085035, "learning_rate": 9.963813011545039e-06, "loss": 0.4449, "step": 1069 }, { "epoch": 0.07, "grad_norm": 1.9238137775653517, "learning_rate": 9.963690596299142e-06, "loss": 0.4169, "step": 1070 }, { "epoch": 0.07, "grad_norm": 2.224238352628365, "learning_rate": 9.963567975100906e-06, "loss": 0.4024, "step": 1071 }, { "epoch": 0.07, "grad_norm": 1.8992630344406685, "learning_rate": 9.963445147955417e-06, "loss": 0.4147, "step": 1072 }, { "epoch": 0.07, "grad_norm": 2.146992246234404, "learning_rate": 9.963322114867775e-06, "loss": 0.4716, "step": 1073 }, { "epoch": 0.07, "grad_norm": 5.068810859692916, "learning_rate": 9.963198875843082e-06, "loss": 0.4104, "step": 1074 }, { "epoch": 0.07, "grad_norm": 2.121934838520355, "learning_rate": 9.963075430886451e-06, "loss": 0.4189, "step": 1075 }, { "epoch": 0.07, "grad_norm": 7.8409386562456875, "learning_rate": 9.962951780003005e-06, "loss": 0.4545, "step": 1076 }, { "epoch": 0.07, "grad_norm": 2.44691412556006, "learning_rate": 9.962827923197875e-06, "loss": 0.4159, "step": 1077 }, { "epoch": 0.07, "grad_norm": 1.8674712042238808, "learning_rate": 9.9627038604762e-06, "loss": 0.4232, "step": 1078 }, { "epoch": 0.07, "grad_norm": 2.7475315454587137, "learning_rate": 9.962579591843126e-06, "loss": 0.4344, "step": 1079 }, { "epoch": 0.07, "grad_norm": 1.9700184410379884, "learning_rate": 9.962455117303813e-06, "loss": 0.385, "step": 1080 }, { "epoch": 0.07, "grad_norm": 3.734268009587939, "learning_rate": 9.96233043686342e-06, "loss": 0.4488, "step": 1081 }, { "epoch": 0.07, "grad_norm": 2.3792889161990405, "learning_rate": 9.962205550527124e-06, "loss": 0.4301, "step": 1082 }, { "epoch": 0.07, "grad_norm": 3.816078388293769, "learning_rate": 9.962080458300105e-06, "loss": 0.4468, "step": 1083 }, { "epoch": 0.07, "grad_norm": 1.786762985149165, "learning_rate": 9.961955160187555e-06, "loss": 0.4022, "step": 1084 }, { "epoch": 0.07, "grad_norm": 1.6825212039123876, "learning_rate": 9.961829656194672e-06, "loss": 0.4142, "step": 1085 }, { "epoch": 0.07, "grad_norm": 3.4717471390365557, "learning_rate": 9.961703946326664e-06, "loss": 0.4195, "step": 1086 }, { "epoch": 0.07, "grad_norm": 1.7048649803787788, "learning_rate": 9.961578030588746e-06, "loss": 0.3955, "step": 1087 }, { "epoch": 0.07, "grad_norm": 3.1645420543212817, "learning_rate": 9.961451908986142e-06, "loss": 0.4688, "step": 1088 }, { "epoch": 0.07, "grad_norm": 2.104394571412978, "learning_rate": 9.961325581524086e-06, "loss": 0.4157, "step": 1089 }, { "epoch": 0.07, "grad_norm": 1.7553422306302857, "learning_rate": 9.961199048207819e-06, "loss": 0.4036, "step": 1090 }, { "epoch": 0.07, "grad_norm": 1.3138483785122332, "learning_rate": 9.961072309042592e-06, "loss": 0.5112, "step": 1091 }, { "epoch": 0.07, "grad_norm": 2.5564848942173555, "learning_rate": 9.960945364033662e-06, "loss": 0.4448, "step": 1092 }, { "epoch": 0.07, "grad_norm": 0.9577214807056948, "learning_rate": 9.9608182131863e-06, "loss": 0.4981, "step": 1093 }, { "epoch": 0.07, "grad_norm": 2.209846204917101, "learning_rate": 9.960690856505774e-06, "loss": 0.4315, "step": 1094 }, { "epoch": 0.07, "grad_norm": 3.3829740417530734, "learning_rate": 9.960563293997377e-06, "loss": 0.4202, "step": 1095 }, { "epoch": 0.07, "grad_norm": 3.6975487269010596, "learning_rate": 9.960435525666397e-06, "loss": 0.4224, "step": 1096 }, { "epoch": 0.07, "grad_norm": 3.1979383948438187, "learning_rate": 9.960307551518135e-06, "loss": 0.4143, "step": 1097 }, { "epoch": 0.07, "grad_norm": 1.6130717949337823, "learning_rate": 9.960179371557905e-06, "loss": 0.4302, "step": 1098 }, { "epoch": 0.07, "grad_norm": 13.193657055310098, "learning_rate": 9.960050985791021e-06, "loss": 0.4207, "step": 1099 }, { "epoch": 0.07, "grad_norm": 1.8108961622421267, "learning_rate": 9.959922394222811e-06, "loss": 0.4081, "step": 1100 }, { "epoch": 0.07, "grad_norm": 1.8118247307835853, "learning_rate": 9.959793596858614e-06, "loss": 0.4129, "step": 1101 }, { "epoch": 0.07, "grad_norm": 1.982502282567653, "learning_rate": 9.959664593703769e-06, "loss": 0.4397, "step": 1102 }, { "epoch": 0.07, "grad_norm": 1.6872548116434403, "learning_rate": 9.95953538476363e-06, "loss": 0.4436, "step": 1103 }, { "epoch": 0.07, "grad_norm": 1.8915972803886736, "learning_rate": 9.959405970043558e-06, "loss": 0.4035, "step": 1104 }, { "epoch": 0.07, "grad_norm": 2.005443454230022, "learning_rate": 9.959276349548926e-06, "loss": 0.3938, "step": 1105 }, { "epoch": 0.07, "grad_norm": 2.1111697114362995, "learning_rate": 9.959146523285108e-06, "loss": 0.4682, "step": 1106 }, { "epoch": 0.07, "grad_norm": 4.906603648905359, "learning_rate": 9.959016491257491e-06, "loss": 0.4771, "step": 1107 }, { "epoch": 0.07, "grad_norm": 1.8274519497356905, "learning_rate": 9.958886253471474e-06, "loss": 0.4155, "step": 1108 }, { "epoch": 0.07, "grad_norm": 2.3098968910760096, "learning_rate": 9.958755809932457e-06, "loss": 0.4178, "step": 1109 }, { "epoch": 0.07, "grad_norm": 1.9589235520258326, "learning_rate": 9.958625160645855e-06, "loss": 0.4624, "step": 1110 }, { "epoch": 0.07, "grad_norm": 3.923709855815237, "learning_rate": 9.958494305617087e-06, "loss": 0.3995, "step": 1111 }, { "epoch": 0.07, "grad_norm": 6.103399335480662, "learning_rate": 9.958363244851584e-06, "loss": 0.4158, "step": 1112 }, { "epoch": 0.07, "grad_norm": 5.800277095827046, "learning_rate": 9.95823197835478e-06, "loss": 0.4377, "step": 1113 }, { "epoch": 0.07, "grad_norm": 1.8306837780654002, "learning_rate": 9.958100506132127e-06, "loss": 0.4303, "step": 1114 }, { "epoch": 0.07, "grad_norm": 4.907673716085669, "learning_rate": 9.957968828189076e-06, "loss": 0.4096, "step": 1115 }, { "epoch": 0.07, "grad_norm": 2.076372980055298, "learning_rate": 9.957836944531091e-06, "loss": 0.4238, "step": 1116 }, { "epoch": 0.07, "grad_norm": 1.5765367603892482, "learning_rate": 9.957704855163648e-06, "loss": 0.4335, "step": 1117 }, { "epoch": 0.07, "grad_norm": 4.268987081114445, "learning_rate": 9.957572560092223e-06, "loss": 0.424, "step": 1118 }, { "epoch": 0.07, "grad_norm": 2.5252904351103718, "learning_rate": 9.957440059322308e-06, "loss": 0.4293, "step": 1119 }, { "epoch": 0.07, "grad_norm": 3.5133172714908736, "learning_rate": 9.957307352859397e-06, "loss": 0.4297, "step": 1120 }, { "epoch": 0.07, "grad_norm": 1.894375399108354, "learning_rate": 9.957174440709e-06, "loss": 0.4009, "step": 1121 }, { "epoch": 0.07, "grad_norm": 5.095415619491392, "learning_rate": 9.957041322876632e-06, "loss": 0.4252, "step": 1122 }, { "epoch": 0.07, "grad_norm": 2.1931537983196945, "learning_rate": 9.956907999367815e-06, "loss": 0.4436, "step": 1123 }, { "epoch": 0.07, "grad_norm": 2.1922907199920667, "learning_rate": 9.956774470188079e-06, "loss": 0.427, "step": 1124 }, { "epoch": 0.07, "grad_norm": 3.1401379075208133, "learning_rate": 9.956640735342966e-06, "loss": 0.4364, "step": 1125 }, { "epoch": 0.07, "grad_norm": 2.34329941398725, "learning_rate": 9.956506794838025e-06, "loss": 0.4483, "step": 1126 }, { "epoch": 0.07, "grad_norm": 2.9979221997681402, "learning_rate": 9.956372648678814e-06, "loss": 0.4196, "step": 1127 }, { "epoch": 0.07, "grad_norm": 2.8312909577519454, "learning_rate": 9.956238296870898e-06, "loss": 0.4273, "step": 1128 }, { "epoch": 0.07, "grad_norm": 2.130057516734946, "learning_rate": 9.956103739419852e-06, "loss": 0.3799, "step": 1129 }, { "epoch": 0.07, "grad_norm": 4.3755194095153955, "learning_rate": 9.955968976331258e-06, "loss": 0.4219, "step": 1130 }, { "epoch": 0.07, "grad_norm": 2.2437667514805772, "learning_rate": 9.95583400761071e-06, "loss": 0.4259, "step": 1131 }, { "epoch": 0.07, "grad_norm": 2.1191427593579175, "learning_rate": 9.955698833263805e-06, "loss": 0.4436, "step": 1132 }, { "epoch": 0.07, "grad_norm": 2.3600928176122453, "learning_rate": 9.955563453296154e-06, "loss": 0.4326, "step": 1133 }, { "epoch": 0.07, "grad_norm": 5.174961780325701, "learning_rate": 9.955427867713372e-06, "loss": 0.4094, "step": 1134 }, { "epoch": 0.07, "grad_norm": 2.103957737047937, "learning_rate": 9.955292076521088e-06, "loss": 0.4169, "step": 1135 }, { "epoch": 0.07, "grad_norm": 2.719030182825631, "learning_rate": 9.955156079724932e-06, "loss": 0.4353, "step": 1136 }, { "epoch": 0.07, "grad_norm": 6.67841001777247, "learning_rate": 9.955019877330549e-06, "loss": 0.3822, "step": 1137 }, { "epoch": 0.07, "grad_norm": 1.6936061193769136, "learning_rate": 9.95488346934359e-06, "loss": 0.4211, "step": 1138 }, { "epoch": 0.07, "grad_norm": 1.8270868845291233, "learning_rate": 9.954746855769717e-06, "loss": 0.4136, "step": 1139 }, { "epoch": 0.07, "grad_norm": 3.942249183739099, "learning_rate": 9.954610036614595e-06, "loss": 0.4024, "step": 1140 }, { "epoch": 0.07, "grad_norm": 5.315704927234245, "learning_rate": 9.954473011883903e-06, "loss": 0.4155, "step": 1141 }, { "epoch": 0.07, "grad_norm": 1.843726684819333, "learning_rate": 9.954335781583326e-06, "loss": 0.4286, "step": 1142 }, { "epoch": 0.07, "grad_norm": 1.640572597785077, "learning_rate": 9.954198345718556e-06, "loss": 0.4089, "step": 1143 }, { "epoch": 0.07, "grad_norm": 2.0486714901349705, "learning_rate": 9.954060704295298e-06, "loss": 0.3875, "step": 1144 }, { "epoch": 0.07, "grad_norm": 2.6723275491556393, "learning_rate": 9.953922857319261e-06, "loss": 0.4276, "step": 1145 }, { "epoch": 0.07, "grad_norm": 6.724395943504821, "learning_rate": 9.953784804796167e-06, "loss": 0.401, "step": 1146 }, { "epoch": 0.07, "grad_norm": 2.2957490366865714, "learning_rate": 9.95364654673174e-06, "loss": 0.4555, "step": 1147 }, { "epoch": 0.07, "grad_norm": 2.170708342495129, "learning_rate": 9.953508083131722e-06, "loss": 0.4553, "step": 1148 }, { "epoch": 0.07, "grad_norm": 2.8269200120497504, "learning_rate": 9.953369414001853e-06, "loss": 0.4604, "step": 1149 }, { "epoch": 0.07, "grad_norm": 1.901983238895689, "learning_rate": 9.95323053934789e-06, "loss": 0.4097, "step": 1150 }, { "epoch": 0.07, "grad_norm": 1.4885513730789306, "learning_rate": 9.953091459175595e-06, "loss": 0.426, "step": 1151 }, { "epoch": 0.07, "grad_norm": 12.654939040249722, "learning_rate": 9.952952173490735e-06, "loss": 0.3987, "step": 1152 }, { "epoch": 0.07, "grad_norm": 2.620085677907411, "learning_rate": 9.952812682299093e-06, "loss": 0.4093, "step": 1153 }, { "epoch": 0.07, "grad_norm": 2.807227735743264, "learning_rate": 9.952672985606457e-06, "loss": 0.3993, "step": 1154 }, { "epoch": 0.07, "grad_norm": 2.077883064036308, "learning_rate": 9.95253308341862e-06, "loss": 0.4111, "step": 1155 }, { "epoch": 0.07, "grad_norm": 1.7373548437655704, "learning_rate": 9.952392975741389e-06, "loss": 0.3972, "step": 1156 }, { "epoch": 0.07, "grad_norm": 2.4599863742025554, "learning_rate": 9.95225266258058e-06, "loss": 0.4592, "step": 1157 }, { "epoch": 0.07, "grad_norm": 2.8306714239615562, "learning_rate": 9.952112143942008e-06, "loss": 0.4112, "step": 1158 }, { "epoch": 0.07, "grad_norm": 2.4581276301566324, "learning_rate": 9.951971419831509e-06, "loss": 0.4061, "step": 1159 }, { "epoch": 0.07, "grad_norm": 2.24926755423996, "learning_rate": 9.951830490254918e-06, "loss": 0.4054, "step": 1160 }, { "epoch": 0.07, "grad_norm": 3.8121588491986924, "learning_rate": 9.951689355218088e-06, "loss": 0.4246, "step": 1161 }, { "epoch": 0.07, "grad_norm": 1.8822317861508915, "learning_rate": 9.951548014726868e-06, "loss": 0.4248, "step": 1162 }, { "epoch": 0.07, "grad_norm": 3.457734983831385, "learning_rate": 9.951406468787128e-06, "loss": 0.4024, "step": 1163 }, { "epoch": 0.07, "grad_norm": 1.6538322895061792, "learning_rate": 9.95126471740474e-06, "loss": 0.4229, "step": 1164 }, { "epoch": 0.07, "grad_norm": 2.180062949779296, "learning_rate": 9.951122760585582e-06, "loss": 0.3761, "step": 1165 }, { "epoch": 0.07, "grad_norm": 2.985139784360479, "learning_rate": 9.950980598335548e-06, "loss": 0.4176, "step": 1166 }, { "epoch": 0.07, "grad_norm": 3.098600871684249, "learning_rate": 9.950838230660535e-06, "loss": 0.4318, "step": 1167 }, { "epoch": 0.07, "grad_norm": 2.4070008307892183, "learning_rate": 9.95069565756645e-06, "loss": 0.4293, "step": 1168 }, { "epoch": 0.07, "grad_norm": 2.2088169739092502, "learning_rate": 9.950552879059208e-06, "loss": 0.3926, "step": 1169 }, { "epoch": 0.07, "grad_norm": 2.45918126709504, "learning_rate": 9.950409895144732e-06, "loss": 0.4406, "step": 1170 }, { "epoch": 0.07, "grad_norm": 2.6032233842578774, "learning_rate": 9.95026670582896e-06, "loss": 0.3942, "step": 1171 }, { "epoch": 0.07, "grad_norm": 2.1697066862160463, "learning_rate": 9.950123311117828e-06, "loss": 0.4011, "step": 1172 }, { "epoch": 0.07, "grad_norm": 3.169470340729822, "learning_rate": 9.949979711017288e-06, "loss": 0.4097, "step": 1173 }, { "epoch": 0.07, "grad_norm": 1.9054993124774207, "learning_rate": 9.949835905533298e-06, "loss": 0.4122, "step": 1174 }, { "epoch": 0.07, "grad_norm": 4.157218184394324, "learning_rate": 9.949691894671824e-06, "loss": 0.4312, "step": 1175 }, { "epoch": 0.07, "grad_norm": 2.020711252280423, "learning_rate": 9.949547678438842e-06, "loss": 0.4044, "step": 1176 }, { "epoch": 0.07, "grad_norm": 2.324584809973775, "learning_rate": 9.949403256840334e-06, "loss": 0.3903, "step": 1177 }, { "epoch": 0.07, "grad_norm": 2.0822794369792312, "learning_rate": 9.949258629882295e-06, "loss": 0.4084, "step": 1178 }, { "epoch": 0.07, "grad_norm": 1.833722949501575, "learning_rate": 9.949113797570724e-06, "loss": 0.3842, "step": 1179 }, { "epoch": 0.07, "grad_norm": 2.186114115992921, "learning_rate": 9.948968759911633e-06, "loss": 0.4584, "step": 1180 }, { "epoch": 0.07, "grad_norm": 5.030559565827121, "learning_rate": 9.948823516911034e-06, "loss": 0.4066, "step": 1181 }, { "epoch": 0.07, "grad_norm": 1.8127397266979124, "learning_rate": 9.948678068574959e-06, "loss": 0.4168, "step": 1182 }, { "epoch": 0.07, "grad_norm": 1.9327679928843606, "learning_rate": 9.948532414909442e-06, "loss": 0.4453, "step": 1183 }, { "epoch": 0.07, "grad_norm": 1.5909469409027934, "learning_rate": 9.948386555920525e-06, "loss": 0.4246, "step": 1184 }, { "epoch": 0.07, "grad_norm": 1.654456156371072, "learning_rate": 9.94824049161426e-06, "loss": 0.3741, "step": 1185 }, { "epoch": 0.07, "grad_norm": 3.50992240356473, "learning_rate": 9.94809422199671e-06, "loss": 0.432, "step": 1186 }, { "epoch": 0.07, "grad_norm": 3.8768086792182466, "learning_rate": 9.947947747073939e-06, "loss": 0.3945, "step": 1187 }, { "epoch": 0.07, "grad_norm": 4.157471389093322, "learning_rate": 9.947801066852029e-06, "loss": 0.4125, "step": 1188 }, { "epoch": 0.07, "grad_norm": 3.194982551871048, "learning_rate": 9.947654181337063e-06, "loss": 0.4154, "step": 1189 }, { "epoch": 0.07, "grad_norm": 2.6524354804752717, "learning_rate": 9.947507090535139e-06, "loss": 0.4158, "step": 1190 }, { "epoch": 0.07, "grad_norm": 3.4113212327423694, "learning_rate": 9.947359794452356e-06, "loss": 0.4155, "step": 1191 }, { "epoch": 0.07, "grad_norm": 3.1654870426499024, "learning_rate": 9.94721229309483e-06, "loss": 0.4089, "step": 1192 }, { "epoch": 0.08, "grad_norm": 2.3239226693757096, "learning_rate": 9.947064586468677e-06, "loss": 0.4077, "step": 1193 }, { "epoch": 0.08, "grad_norm": 3.045717960251605, "learning_rate": 9.946916674580028e-06, "loss": 0.4048, "step": 1194 }, { "epoch": 0.08, "grad_norm": 3.94778507161363, "learning_rate": 9.946768557435019e-06, "loss": 0.3821, "step": 1195 }, { "epoch": 0.08, "grad_norm": 4.863795468418417, "learning_rate": 9.946620235039797e-06, "loss": 0.4062, "step": 1196 }, { "epoch": 0.08, "grad_norm": 2.1985856913958752, "learning_rate": 9.946471707400514e-06, "loss": 0.4419, "step": 1197 }, { "epoch": 0.08, "grad_norm": 2.0466579467587525, "learning_rate": 9.946322974523336e-06, "loss": 0.4051, "step": 1198 }, { "epoch": 0.08, "grad_norm": 1.9695428330223819, "learning_rate": 9.94617403641443e-06, "loss": 0.4178, "step": 1199 }, { "epoch": 0.08, "grad_norm": 2.3333009429864418, "learning_rate": 9.946024893079977e-06, "loss": 0.4116, "step": 1200 }, { "epoch": 0.08, "grad_norm": 2.266405677726087, "learning_rate": 9.945875544526168e-06, "loss": 0.3841, "step": 1201 }, { "epoch": 0.08, "grad_norm": 1.924999768845305, "learning_rate": 9.945725990759197e-06, "loss": 0.4005, "step": 1202 }, { "epoch": 0.08, "grad_norm": 3.431551423993694, "learning_rate": 9.94557623178527e-06, "loss": 0.3906, "step": 1203 }, { "epoch": 0.08, "grad_norm": 2.394086168640765, "learning_rate": 9.945426267610603e-06, "loss": 0.4211, "step": 1204 }, { "epoch": 0.08, "grad_norm": 3.124702489165753, "learning_rate": 9.945276098241413e-06, "loss": 0.4062, "step": 1205 }, { "epoch": 0.08, "grad_norm": 2.0348838838892327, "learning_rate": 9.945125723683934e-06, "loss": 0.4045, "step": 1206 }, { "epoch": 0.08, "grad_norm": 2.079576986386569, "learning_rate": 9.944975143944407e-06, "loss": 0.404, "step": 1207 }, { "epoch": 0.08, "grad_norm": 3.741809985860796, "learning_rate": 9.944824359029078e-06, "loss": 0.4253, "step": 1208 }, { "epoch": 0.08, "grad_norm": 3.7501764025367916, "learning_rate": 9.944673368944202e-06, "loss": 0.397, "step": 1209 }, { "epoch": 0.08, "grad_norm": 2.159395742750831, "learning_rate": 9.944522173696047e-06, "loss": 0.4116, "step": 1210 }, { "epoch": 0.08, "grad_norm": 2.466016323510857, "learning_rate": 9.944370773290883e-06, "loss": 0.3907, "step": 1211 }, { "epoch": 0.08, "grad_norm": 3.971238187842781, "learning_rate": 9.944219167734994e-06, "loss": 0.4272, "step": 1212 }, { "epoch": 0.08, "grad_norm": 2.0298059740919654, "learning_rate": 9.94406735703467e-06, "loss": 0.406, "step": 1213 }, { "epoch": 0.08, "grad_norm": 2.4026259661066875, "learning_rate": 9.943915341196209e-06, "loss": 0.3856, "step": 1214 }, { "epoch": 0.08, "grad_norm": 2.624219757935532, "learning_rate": 9.94376312022592e-06, "loss": 0.405, "step": 1215 }, { "epoch": 0.08, "grad_norm": 2.4718592693709014, "learning_rate": 9.943610694130117e-06, "loss": 0.408, "step": 1216 }, { "epoch": 0.08, "grad_norm": 6.6270700810101255, "learning_rate": 9.943458062915126e-06, "loss": 0.4193, "step": 1217 }, { "epoch": 0.08, "grad_norm": 1.9171228652969508, "learning_rate": 9.94330522658728e-06, "loss": 0.3958, "step": 1218 }, { "epoch": 0.08, "grad_norm": 2.02687721984281, "learning_rate": 9.94315218515292e-06, "loss": 0.3846, "step": 1219 }, { "epoch": 0.08, "grad_norm": 2.270919265580942, "learning_rate": 9.942998938618394e-06, "loss": 0.4047, "step": 1220 }, { "epoch": 0.08, "grad_norm": 1.6915723864732783, "learning_rate": 9.942845486990064e-06, "loss": 0.5804, "step": 1221 }, { "epoch": 0.08, "grad_norm": 3.5053069801244665, "learning_rate": 9.942691830274293e-06, "loss": 0.3884, "step": 1222 }, { "epoch": 0.08, "grad_norm": 2.195396899547578, "learning_rate": 9.942537968477461e-06, "loss": 0.3948, "step": 1223 }, { "epoch": 0.08, "grad_norm": 1.8535971708343282, "learning_rate": 9.94238390160595e-06, "loss": 0.4287, "step": 1224 }, { "epoch": 0.08, "grad_norm": 3.925765481633316, "learning_rate": 9.942229629666152e-06, "loss": 0.3974, "step": 1225 }, { "epoch": 0.08, "grad_norm": 2.254763746431971, "learning_rate": 9.942075152664467e-06, "loss": 0.3776, "step": 1226 }, { "epoch": 0.08, "grad_norm": 2.6502737838639416, "learning_rate": 9.941920470607306e-06, "loss": 0.4086, "step": 1227 }, { "epoch": 0.08, "grad_norm": 1.8380067080178115, "learning_rate": 9.941765583501088e-06, "loss": 0.4707, "step": 1228 }, { "epoch": 0.08, "grad_norm": 2.995217067426309, "learning_rate": 9.941610491352238e-06, "loss": 0.405, "step": 1229 }, { "epoch": 0.08, "grad_norm": 1.9206367129209652, "learning_rate": 9.94145519416719e-06, "loss": 0.3878, "step": 1230 }, { "epoch": 0.08, "grad_norm": 3.0937950878177984, "learning_rate": 9.94129969195239e-06, "loss": 0.3835, "step": 1231 }, { "epoch": 0.08, "grad_norm": 1.681959798676605, "learning_rate": 9.94114398471429e-06, "loss": 0.3898, "step": 1232 }, { "epoch": 0.08, "grad_norm": 2.38673455622357, "learning_rate": 9.94098807245935e-06, "loss": 0.4006, "step": 1233 }, { "epoch": 0.08, "grad_norm": 1.7452893009158057, "learning_rate": 9.940831955194036e-06, "loss": 0.3908, "step": 1234 }, { "epoch": 0.08, "grad_norm": 1.893389830279717, "learning_rate": 9.94067563292483e-06, "loss": 0.4397, "step": 1235 }, { "epoch": 0.08, "grad_norm": 2.4763339006276053, "learning_rate": 9.940519105658217e-06, "loss": 0.4231, "step": 1236 }, { "epoch": 0.08, "grad_norm": 1.9206715546328543, "learning_rate": 9.94036237340069e-06, "loss": 0.4188, "step": 1237 }, { "epoch": 0.08, "grad_norm": 2.226804987608132, "learning_rate": 9.940205436158753e-06, "loss": 0.4057, "step": 1238 }, { "epoch": 0.08, "grad_norm": 2.2201140495236245, "learning_rate": 9.940048293938918e-06, "loss": 0.398, "step": 1239 }, { "epoch": 0.08, "grad_norm": 23.169631949745703, "learning_rate": 9.939890946747703e-06, "loss": 0.4126, "step": 1240 }, { "epoch": 0.08, "grad_norm": 1.8901675996120868, "learning_rate": 9.93973339459164e-06, "loss": 0.4422, "step": 1241 }, { "epoch": 0.08, "grad_norm": 2.0173839554868, "learning_rate": 9.939575637477266e-06, "loss": 0.3825, "step": 1242 }, { "epoch": 0.08, "grad_norm": 2.3312057219372173, "learning_rate": 9.939417675411123e-06, "loss": 0.3937, "step": 1243 }, { "epoch": 0.08, "grad_norm": 2.41994531044071, "learning_rate": 9.939259508399767e-06, "loss": 0.3645, "step": 1244 }, { "epoch": 0.08, "grad_norm": 5.130732966256115, "learning_rate": 9.939101136449763e-06, "loss": 0.4355, "step": 1245 }, { "epoch": 0.08, "grad_norm": 0.9407138873887051, "learning_rate": 9.938942559567677e-06, "loss": 0.5359, "step": 1246 }, { "epoch": 0.08, "grad_norm": 2.1873362989230545, "learning_rate": 9.938783777760095e-06, "loss": 0.3971, "step": 1247 }, { "epoch": 0.08, "grad_norm": 0.6647545068782758, "learning_rate": 9.938624791033599e-06, "loss": 0.4553, "step": 1248 }, { "epoch": 0.08, "grad_norm": 3.726292817792635, "learning_rate": 9.93846559939479e-06, "loss": 0.4015, "step": 1249 }, { "epoch": 0.08, "grad_norm": 0.6179117598872832, "learning_rate": 9.938306202850272e-06, "loss": 0.4852, "step": 1250 }, { "epoch": 0.08, "grad_norm": 13.38919296381999, "learning_rate": 9.938146601406657e-06, "loss": 0.4089, "step": 1251 }, { "epoch": 0.08, "grad_norm": 1.947093361026362, "learning_rate": 9.937986795070568e-06, "loss": 0.4459, "step": 1252 }, { "epoch": 0.08, "grad_norm": 2.2415261426460065, "learning_rate": 9.937826783848636e-06, "loss": 0.4175, "step": 1253 }, { "epoch": 0.08, "grad_norm": 3.0143115187190905, "learning_rate": 9.9376665677475e-06, "loss": 0.3941, "step": 1254 }, { "epoch": 0.08, "grad_norm": 1.884912048352357, "learning_rate": 9.93750614677381e-06, "loss": 0.4685, "step": 1255 }, { "epoch": 0.08, "grad_norm": 10.615011997154566, "learning_rate": 9.93734552093422e-06, "loss": 0.3747, "step": 1256 }, { "epoch": 0.08, "grad_norm": 2.4576862985160397, "learning_rate": 9.937184690235393e-06, "loss": 0.3972, "step": 1257 }, { "epoch": 0.08, "grad_norm": 2.301713426062173, "learning_rate": 9.937023654684004e-06, "loss": 0.3979, "step": 1258 }, { "epoch": 0.08, "grad_norm": 2.3974970802635305, "learning_rate": 9.936862414286734e-06, "loss": 0.4505, "step": 1259 }, { "epoch": 0.08, "grad_norm": 1.901122120553632, "learning_rate": 9.936700969050275e-06, "loss": 0.4301, "step": 1260 }, { "epoch": 0.08, "grad_norm": 2.5712528297749246, "learning_rate": 9.936539318981323e-06, "loss": 0.4034, "step": 1261 }, { "epoch": 0.08, "grad_norm": 2.5061825489424003, "learning_rate": 9.936377464086586e-06, "loss": 0.4692, "step": 1262 }, { "epoch": 0.08, "grad_norm": 3.062917052282411, "learning_rate": 9.936215404372783e-06, "loss": 0.4135, "step": 1263 }, { "epoch": 0.08, "grad_norm": 2.024460585702295, "learning_rate": 9.936053139846631e-06, "loss": 0.5626, "step": 1264 }, { "epoch": 0.08, "grad_norm": 2.647596296601089, "learning_rate": 9.93589067051487e-06, "loss": 0.3908, "step": 1265 }, { "epoch": 0.08, "grad_norm": 1.8476435586334785, "learning_rate": 9.935727996384237e-06, "loss": 0.4074, "step": 1266 }, { "epoch": 0.08, "grad_norm": 6.1113709153265035, "learning_rate": 9.93556511746148e-06, "loss": 0.401, "step": 1267 }, { "epoch": 0.08, "grad_norm": 2.0001077011543593, "learning_rate": 9.935402033753364e-06, "loss": 0.3933, "step": 1268 }, { "epoch": 0.08, "grad_norm": 4.4100065995475255, "learning_rate": 9.935238745266648e-06, "loss": 0.408, "step": 1269 }, { "epoch": 0.08, "grad_norm": 1.9698265874125425, "learning_rate": 9.935075252008113e-06, "loss": 0.4248, "step": 1270 }, { "epoch": 0.08, "grad_norm": 1.6037970756036097, "learning_rate": 9.934911553984539e-06, "loss": 0.3888, "step": 1271 }, { "epoch": 0.08, "grad_norm": 0.7803594986696203, "learning_rate": 9.934747651202718e-06, "loss": 0.484, "step": 1272 }, { "epoch": 0.08, "grad_norm": 1.9670363109487332, "learning_rate": 9.934583543669454e-06, "loss": 0.4171, "step": 1273 }, { "epoch": 0.08, "grad_norm": 2.392762481259209, "learning_rate": 9.934419231391554e-06, "loss": 0.3777, "step": 1274 }, { "epoch": 0.08, "grad_norm": 2.6645278071234206, "learning_rate": 9.934254714375834e-06, "loss": 0.4242, "step": 1275 }, { "epoch": 0.08, "grad_norm": 2.2565647136530593, "learning_rate": 9.934089992629122e-06, "loss": 0.3958, "step": 1276 }, { "epoch": 0.08, "grad_norm": 2.5859524170815797, "learning_rate": 9.933925066158254e-06, "loss": 0.4164, "step": 1277 }, { "epoch": 0.08, "grad_norm": 2.3127556176510717, "learning_rate": 9.933759934970069e-06, "loss": 0.4036, "step": 1278 }, { "epoch": 0.08, "grad_norm": 1.8300530787503981, "learning_rate": 9.93359459907142e-06, "loss": 0.3761, "step": 1279 }, { "epoch": 0.08, "grad_norm": 3.703908562364397, "learning_rate": 9.933429058469171e-06, "loss": 0.3914, "step": 1280 }, { "epoch": 0.08, "grad_norm": 4.140590584560028, "learning_rate": 9.933263313170187e-06, "loss": 0.4299, "step": 1281 }, { "epoch": 0.08, "grad_norm": 2.9763097183198512, "learning_rate": 9.933097363181346e-06, "loss": 0.4018, "step": 1282 }, { "epoch": 0.08, "grad_norm": 2.0209916346738606, "learning_rate": 9.932931208509533e-06, "loss": 0.3795, "step": 1283 }, { "epoch": 0.08, "grad_norm": 1.6783799119102611, "learning_rate": 9.93276484916164e-06, "loss": 0.4336, "step": 1284 }, { "epoch": 0.08, "grad_norm": 2.7725227031042743, "learning_rate": 9.932598285144575e-06, "loss": 0.3956, "step": 1285 }, { "epoch": 0.08, "grad_norm": 2.7385715317313024, "learning_rate": 9.932431516465244e-06, "loss": 0.4128, "step": 1286 }, { "epoch": 0.08, "grad_norm": 1.744366053658705, "learning_rate": 9.932264543130568e-06, "loss": 0.4132, "step": 1287 }, { "epoch": 0.08, "grad_norm": 3.683341016651291, "learning_rate": 9.932097365147477e-06, "loss": 0.3865, "step": 1288 }, { "epoch": 0.08, "grad_norm": 2.334883256421227, "learning_rate": 9.931929982522906e-06, "loss": 0.4004, "step": 1289 }, { "epoch": 0.08, "grad_norm": 2.0309873328446724, "learning_rate": 9.931762395263798e-06, "loss": 0.3918, "step": 1290 }, { "epoch": 0.08, "grad_norm": 76.55529773301637, "learning_rate": 9.93159460337711e-06, "loss": 0.4079, "step": 1291 }, { "epoch": 0.08, "grad_norm": 4.8661975404978115, "learning_rate": 9.931426606869802e-06, "loss": 0.4158, "step": 1292 }, { "epoch": 0.08, "grad_norm": 2.954796512963682, "learning_rate": 9.931258405748846e-06, "loss": 0.3868, "step": 1293 }, { "epoch": 0.08, "grad_norm": 3.3411782414968396, "learning_rate": 9.931090000021218e-06, "loss": 0.364, "step": 1294 }, { "epoch": 0.08, "grad_norm": 2.117366431507436, "learning_rate": 9.930921389693907e-06, "loss": 0.4244, "step": 1295 }, { "epoch": 0.08, "grad_norm": 1.6985165345579143, "learning_rate": 9.93075257477391e-06, "loss": 0.4044, "step": 1296 }, { "epoch": 0.08, "grad_norm": 2.2261173854773073, "learning_rate": 9.930583555268232e-06, "loss": 0.3767, "step": 1297 }, { "epoch": 0.08, "grad_norm": 0.8436602713913757, "learning_rate": 9.930414331183883e-06, "loss": 0.4865, "step": 1298 }, { "epoch": 0.08, "grad_norm": 1.8361347906103542, "learning_rate": 9.930244902527885e-06, "loss": 0.4039, "step": 1299 }, { "epoch": 0.08, "grad_norm": 2.714770980754513, "learning_rate": 9.930075269307271e-06, "loss": 0.4118, "step": 1300 }, { "epoch": 0.08, "grad_norm": 3.5300074767503293, "learning_rate": 9.929905431529077e-06, "loss": 0.3933, "step": 1301 }, { "epoch": 0.08, "grad_norm": 187.2722523416001, "learning_rate": 9.92973538920035e-06, "loss": 0.42, "step": 1302 }, { "epoch": 0.08, "grad_norm": 5.621836919561685, "learning_rate": 9.929565142328145e-06, "loss": 0.419, "step": 1303 }, { "epoch": 0.08, "grad_norm": 3.4384579060836136, "learning_rate": 9.929394690919527e-06, "loss": 0.3842, "step": 1304 }, { "epoch": 0.08, "grad_norm": 4.729187611472591, "learning_rate": 9.929224034981568e-06, "loss": 0.3754, "step": 1305 }, { "epoch": 0.08, "grad_norm": 1.9428666565866435, "learning_rate": 9.929053174521348e-06, "loss": 0.4653, "step": 1306 }, { "epoch": 0.08, "grad_norm": 2.3237029777076876, "learning_rate": 9.928882109545956e-06, "loss": 0.389, "step": 1307 }, { "epoch": 0.08, "grad_norm": 1.655425575128045, "learning_rate": 9.928710840062492e-06, "loss": 0.4253, "step": 1308 }, { "epoch": 0.08, "grad_norm": 2.7001924494572114, "learning_rate": 9.92853936607806e-06, "loss": 0.4164, "step": 1309 }, { "epoch": 0.08, "grad_norm": 1.9253618092665186, "learning_rate": 9.928367687599775e-06, "loss": 0.3999, "step": 1310 }, { "epoch": 0.08, "grad_norm": 2.356637593843065, "learning_rate": 9.928195804634761e-06, "loss": 0.4199, "step": 1311 }, { "epoch": 0.08, "grad_norm": 1.6567326503830144, "learning_rate": 9.928023717190152e-06, "loss": 0.4097, "step": 1312 }, { "epoch": 0.08, "grad_norm": 1.8788831437638487, "learning_rate": 9.927851425273082e-06, "loss": 0.389, "step": 1313 }, { "epoch": 0.08, "grad_norm": 3.371769965405906, "learning_rate": 9.927678928890707e-06, "loss": 0.3723, "step": 1314 }, { "epoch": 0.08, "grad_norm": 2.353042406999752, "learning_rate": 9.92750622805018e-06, "loss": 0.3935, "step": 1315 }, { "epoch": 0.08, "grad_norm": 2.760828452052688, "learning_rate": 9.927333322758665e-06, "loss": 0.4225, "step": 1316 }, { "epoch": 0.08, "grad_norm": 2.7590947394628937, "learning_rate": 9.92716021302334e-06, "loss": 0.4391, "step": 1317 }, { "epoch": 0.08, "grad_norm": 2.405071562879864, "learning_rate": 9.926986898851387e-06, "loss": 0.4159, "step": 1318 }, { "epoch": 0.08, "grad_norm": 3.0184503357446797, "learning_rate": 9.926813380249995e-06, "loss": 0.3992, "step": 1319 }, { "epoch": 0.08, "grad_norm": 3.0824959041518154, "learning_rate": 9.926639657226366e-06, "loss": 0.4168, "step": 1320 }, { "epoch": 0.08, "grad_norm": 3.7458826976092214, "learning_rate": 9.926465729787707e-06, "loss": 0.378, "step": 1321 }, { "epoch": 0.08, "grad_norm": 1.5773154796096445, "learning_rate": 9.926291597941234e-06, "loss": 0.4083, "step": 1322 }, { "epoch": 0.08, "grad_norm": 3.11089775571572, "learning_rate": 9.926117261694171e-06, "loss": 0.375, "step": 1323 }, { "epoch": 0.08, "grad_norm": 2.5481490222725935, "learning_rate": 9.925942721053755e-06, "loss": 0.3894, "step": 1324 }, { "epoch": 0.08, "grad_norm": 3.283882783001254, "learning_rate": 9.925767976027226e-06, "loss": 0.4141, "step": 1325 }, { "epoch": 0.08, "grad_norm": 1.7701549571254545, "learning_rate": 9.925593026621833e-06, "loss": 0.3949, "step": 1326 }, { "epoch": 0.08, "grad_norm": 2.7391856293318613, "learning_rate": 9.925417872844838e-06, "loss": 0.3814, "step": 1327 }, { "epoch": 0.08, "grad_norm": 3.7757977417711746, "learning_rate": 9.925242514703505e-06, "loss": 0.4038, "step": 1328 }, { "epoch": 0.08, "grad_norm": 2.0179859633576873, "learning_rate": 9.925066952205113e-06, "loss": 0.4149, "step": 1329 }, { "epoch": 0.08, "grad_norm": 2.27224864723054, "learning_rate": 9.924891185356946e-06, "loss": 0.3984, "step": 1330 }, { "epoch": 0.08, "grad_norm": 31.31739209436363, "learning_rate": 9.924715214166297e-06, "loss": 0.401, "step": 1331 }, { "epoch": 0.08, "grad_norm": 2.6021015902600118, "learning_rate": 9.924539038640464e-06, "loss": 0.426, "step": 1332 }, { "epoch": 0.08, "grad_norm": 2.4224725330505392, "learning_rate": 9.92436265878676e-06, "loss": 0.4028, "step": 1333 }, { "epoch": 0.08, "grad_norm": 1.5472933057935412, "learning_rate": 9.924186074612502e-06, "loss": 0.3756, "step": 1334 }, { "epoch": 0.08, "grad_norm": 1.758829975204866, "learning_rate": 9.92400928612502e-06, "loss": 0.3987, "step": 1335 }, { "epoch": 0.08, "grad_norm": 5.868957251687971, "learning_rate": 9.923832293331645e-06, "loss": 0.3917, "step": 1336 }, { "epoch": 0.08, "grad_norm": 2.3265652216559642, "learning_rate": 9.923655096239722e-06, "loss": 0.4568, "step": 1337 }, { "epoch": 0.08, "grad_norm": 1.8043481729106445, "learning_rate": 9.923477694856605e-06, "loss": 0.4653, "step": 1338 }, { "epoch": 0.08, "grad_norm": 1.755369831885525, "learning_rate": 9.923300089189653e-06, "loss": 0.4351, "step": 1339 }, { "epoch": 0.08, "grad_norm": 1.6445669739920281, "learning_rate": 9.923122279246234e-06, "loss": 0.3942, "step": 1340 }, { "epoch": 0.08, "grad_norm": 6.275747062318478, "learning_rate": 9.922944265033729e-06, "loss": 0.3919, "step": 1341 }, { "epoch": 0.08, "grad_norm": 2.812507379854062, "learning_rate": 9.922766046559522e-06, "loss": 0.3981, "step": 1342 }, { "epoch": 0.08, "grad_norm": 2.6450415429396688, "learning_rate": 9.922587623831007e-06, "loss": 0.3976, "step": 1343 }, { "epoch": 0.08, "grad_norm": 2.4341460590153163, "learning_rate": 9.922408996855588e-06, "loss": 0.4085, "step": 1344 }, { "epoch": 0.08, "grad_norm": 1.877096783037955, "learning_rate": 9.922230165640678e-06, "loss": 0.4003, "step": 1345 }, { "epoch": 0.08, "grad_norm": 1.8133136042326141, "learning_rate": 9.922051130193694e-06, "loss": 0.3784, "step": 1346 }, { "epoch": 0.08, "grad_norm": 2.181148964621575, "learning_rate": 9.921871890522066e-06, "loss": 0.3794, "step": 1347 }, { "epoch": 0.08, "grad_norm": 2.6226953846995316, "learning_rate": 9.921692446633233e-06, "loss": 0.3905, "step": 1348 }, { "epoch": 0.08, "grad_norm": 2.190614017631786, "learning_rate": 9.921512798534637e-06, "loss": 0.4035, "step": 1349 }, { "epoch": 0.08, "grad_norm": 0.8681966725964875, "learning_rate": 9.921332946233733e-06, "loss": 0.4883, "step": 1350 }, { "epoch": 0.08, "grad_norm": 2.2761549349465735, "learning_rate": 9.921152889737985e-06, "loss": 0.4457, "step": 1351 }, { "epoch": 0.09, "grad_norm": 6.682495836410096, "learning_rate": 9.920972629054862e-06, "loss": 0.4148, "step": 1352 }, { "epoch": 0.09, "grad_norm": 1.799774426097016, "learning_rate": 9.920792164191844e-06, "loss": 0.4202, "step": 1353 }, { "epoch": 0.09, "grad_norm": 2.7054108308219185, "learning_rate": 9.920611495156418e-06, "loss": 0.409, "step": 1354 }, { "epoch": 0.09, "grad_norm": 0.7246505563609941, "learning_rate": 9.920430621956082e-06, "loss": 0.5196, "step": 1355 }, { "epoch": 0.09, "grad_norm": 1.5568292174747451, "learning_rate": 9.92024954459834e-06, "loss": 0.4426, "step": 1356 }, { "epoch": 0.09, "grad_norm": 1.8441637875609818, "learning_rate": 9.920068263090706e-06, "loss": 0.4272, "step": 1357 }, { "epoch": 0.09, "grad_norm": 2.087098202640914, "learning_rate": 9.9198867774407e-06, "loss": 0.4004, "step": 1358 }, { "epoch": 0.09, "grad_norm": 0.7063518045266224, "learning_rate": 9.919705087655851e-06, "loss": 0.4794, "step": 1359 }, { "epoch": 0.09, "grad_norm": 3.3065403119255796, "learning_rate": 9.919523193743701e-06, "loss": 0.4401, "step": 1360 }, { "epoch": 0.09, "grad_norm": 2.032868476159452, "learning_rate": 9.919341095711796e-06, "loss": 0.3968, "step": 1361 }, { "epoch": 0.09, "grad_norm": 5.026238979965795, "learning_rate": 9.91915879356769e-06, "loss": 0.4, "step": 1362 }, { "epoch": 0.09, "grad_norm": 3.0441710274220073, "learning_rate": 9.918976287318948e-06, "loss": 0.4487, "step": 1363 }, { "epoch": 0.09, "grad_norm": 2.460569208947006, "learning_rate": 9.918793576973145e-06, "loss": 0.4442, "step": 1364 }, { "epoch": 0.09, "grad_norm": 3.7484439196754873, "learning_rate": 9.91861066253786e-06, "loss": 0.3806, "step": 1365 }, { "epoch": 0.09, "grad_norm": 3.4863483898466465, "learning_rate": 9.918427544020678e-06, "loss": 0.4098, "step": 1366 }, { "epoch": 0.09, "grad_norm": 2.800766859707209, "learning_rate": 9.918244221429205e-06, "loss": 0.4336, "step": 1367 }, { "epoch": 0.09, "grad_norm": 1.6647415061590345, "learning_rate": 9.918060694771043e-06, "loss": 0.3888, "step": 1368 }, { "epoch": 0.09, "grad_norm": 4.901222269363619, "learning_rate": 9.917876964053806e-06, "loss": 0.4253, "step": 1369 }, { "epoch": 0.09, "grad_norm": 2.2585778539588093, "learning_rate": 9.91769302928512e-06, "loss": 0.4311, "step": 1370 }, { "epoch": 0.09, "grad_norm": 1.5765041214152822, "learning_rate": 9.917508890472613e-06, "loss": 0.4164, "step": 1371 }, { "epoch": 0.09, "grad_norm": 2.11036074889742, "learning_rate": 9.91732454762393e-06, "loss": 0.3994, "step": 1372 }, { "epoch": 0.09, "grad_norm": 1.676435633402841, "learning_rate": 9.917140000746717e-06, "loss": 0.4027, "step": 1373 }, { "epoch": 0.09, "grad_norm": 1.4989157433821845, "learning_rate": 9.916955249848631e-06, "loss": 0.3891, "step": 1374 }, { "epoch": 0.09, "grad_norm": 2.1544482520646615, "learning_rate": 9.916770294937339e-06, "loss": 0.3936, "step": 1375 }, { "epoch": 0.09, "grad_norm": 1.5951856923610135, "learning_rate": 9.916585136020513e-06, "loss": 0.3946, "step": 1376 }, { "epoch": 0.09, "grad_norm": 6.132462663210312, "learning_rate": 9.916399773105839e-06, "loss": 0.3855, "step": 1377 }, { "epoch": 0.09, "grad_norm": 1.869095672777846, "learning_rate": 9.916214206201003e-06, "loss": 0.4398, "step": 1378 }, { "epoch": 0.09, "grad_norm": 2.0817447810542613, "learning_rate": 9.91602843531371e-06, "loss": 0.418, "step": 1379 }, { "epoch": 0.09, "grad_norm": 2.5619346912924112, "learning_rate": 9.915842460451663e-06, "loss": 0.4257, "step": 1380 }, { "epoch": 0.09, "grad_norm": 1.6600325364141273, "learning_rate": 9.915656281622584e-06, "loss": 0.3744, "step": 1381 }, { "epoch": 0.09, "grad_norm": 1.8280151388735506, "learning_rate": 9.915469898834191e-06, "loss": 0.4255, "step": 1382 }, { "epoch": 0.09, "grad_norm": 2.4442944631790735, "learning_rate": 9.915283312094222e-06, "loss": 0.4083, "step": 1383 }, { "epoch": 0.09, "grad_norm": 1.6272636478051172, "learning_rate": 9.91509652141042e-06, "loss": 0.3985, "step": 1384 }, { "epoch": 0.09, "grad_norm": 2.187900171096838, "learning_rate": 9.91490952679053e-06, "loss": 0.4175, "step": 1385 }, { "epoch": 0.09, "grad_norm": 2.751596329155221, "learning_rate": 9.914722328242316e-06, "loss": 0.395, "step": 1386 }, { "epoch": 0.09, "grad_norm": 5.015177979893566, "learning_rate": 9.914534925773543e-06, "loss": 0.4644, "step": 1387 }, { "epoch": 0.09, "grad_norm": 2.5039838659252927, "learning_rate": 9.914347319391987e-06, "loss": 0.4122, "step": 1388 }, { "epoch": 0.09, "grad_norm": 1.7764630100827854, "learning_rate": 9.914159509105431e-06, "loss": 0.3779, "step": 1389 }, { "epoch": 0.09, "grad_norm": 1.9194266369463542, "learning_rate": 9.913971494921669e-06, "loss": 0.3932, "step": 1390 }, { "epoch": 0.09, "grad_norm": 1.6374283687756888, "learning_rate": 9.9137832768485e-06, "loss": 0.4016, "step": 1391 }, { "epoch": 0.09, "grad_norm": 1.6075015121871845, "learning_rate": 9.913594854893738e-06, "loss": 0.42, "step": 1392 }, { "epoch": 0.09, "grad_norm": 1.5945031378142487, "learning_rate": 9.913406229065196e-06, "loss": 0.4224, "step": 1393 }, { "epoch": 0.09, "grad_norm": 3.3031052827672984, "learning_rate": 9.913217399370702e-06, "loss": 0.3812, "step": 1394 }, { "epoch": 0.09, "grad_norm": 1.2678496994549624, "learning_rate": 9.913028365818092e-06, "loss": 0.3828, "step": 1395 }, { "epoch": 0.09, "grad_norm": 2.4283198790855907, "learning_rate": 9.912839128415209e-06, "loss": 0.4556, "step": 1396 }, { "epoch": 0.09, "grad_norm": 1.5556333121747936, "learning_rate": 9.912649687169901e-06, "loss": 0.4559, "step": 1397 }, { "epoch": 0.09, "grad_norm": 1.8638758042854766, "learning_rate": 9.912460042090035e-06, "loss": 0.3946, "step": 1398 }, { "epoch": 0.09, "grad_norm": 2.2184827489764256, "learning_rate": 9.912270193183476e-06, "loss": 0.3992, "step": 1399 }, { "epoch": 0.09, "grad_norm": 2.2500568925914206, "learning_rate": 9.912080140458102e-06, "loss": 0.3921, "step": 1400 }, { "epoch": 0.09, "grad_norm": 2.2813042713793945, "learning_rate": 9.911889883921797e-06, "loss": 0.4086, "step": 1401 }, { "epoch": 0.09, "grad_norm": 1.940302937643413, "learning_rate": 9.911699423582457e-06, "loss": 0.3584, "step": 1402 }, { "epoch": 0.09, "grad_norm": 1.8593565231209148, "learning_rate": 9.911508759447984e-06, "loss": 0.4025, "step": 1403 }, { "epoch": 0.09, "grad_norm": 1.6361669972872965, "learning_rate": 9.911317891526286e-06, "loss": 0.4029, "step": 1404 }, { "epoch": 0.09, "grad_norm": 2.0361185029938778, "learning_rate": 9.911126819825287e-06, "loss": 0.4166, "step": 1405 }, { "epoch": 0.09, "grad_norm": 1.6620122593043212, "learning_rate": 9.910935544352914e-06, "loss": 0.4295, "step": 1406 }, { "epoch": 0.09, "grad_norm": 1.9094808654016637, "learning_rate": 9.910744065117101e-06, "loss": 0.4207, "step": 1407 }, { "epoch": 0.09, "grad_norm": 5.115139760675823, "learning_rate": 9.910552382125797e-06, "loss": 0.3801, "step": 1408 }, { "epoch": 0.09, "grad_norm": 6.750216896874437, "learning_rate": 9.91036049538695e-06, "loss": 0.3902, "step": 1409 }, { "epoch": 0.09, "grad_norm": 2.764468872418187, "learning_rate": 9.910168404908525e-06, "loss": 0.409, "step": 1410 }, { "epoch": 0.09, "grad_norm": 1.8805666909647074, "learning_rate": 9.909976110698491e-06, "loss": 0.414, "step": 1411 }, { "epoch": 0.09, "grad_norm": 1.8817569820293847, "learning_rate": 9.909783612764827e-06, "loss": 0.408, "step": 1412 }, { "epoch": 0.09, "grad_norm": 1.8955912629798846, "learning_rate": 9.909590911115521e-06, "loss": 0.421, "step": 1413 }, { "epoch": 0.09, "grad_norm": 0.8233016733210808, "learning_rate": 9.909398005758567e-06, "loss": 0.5172, "step": 1414 }, { "epoch": 0.09, "grad_norm": 2.9509445029106045, "learning_rate": 9.909204896701969e-06, "loss": 0.399, "step": 1415 }, { "epoch": 0.09, "grad_norm": 3.0487482740909604, "learning_rate": 9.909011583953743e-06, "loss": 0.4208, "step": 1416 }, { "epoch": 0.09, "grad_norm": 1.9694569347932842, "learning_rate": 9.908818067521904e-06, "loss": 0.4085, "step": 1417 }, { "epoch": 0.09, "grad_norm": 3.0491318225826527, "learning_rate": 9.908624347414486e-06, "loss": 0.423, "step": 1418 }, { "epoch": 0.09, "grad_norm": 1.4949193002474532, "learning_rate": 9.908430423639524e-06, "loss": 0.4163, "step": 1419 }, { "epoch": 0.09, "grad_norm": 4.605398834105979, "learning_rate": 9.908236296205066e-06, "loss": 0.4119, "step": 1420 }, { "epoch": 0.09, "grad_norm": 1.4125172978253353, "learning_rate": 9.908041965119167e-06, "loss": 0.3951, "step": 1421 }, { "epoch": 0.09, "grad_norm": 2.185038540239043, "learning_rate": 9.907847430389887e-06, "loss": 0.3815, "step": 1422 }, { "epoch": 0.09, "grad_norm": 1.4387868815608758, "learning_rate": 9.9076526920253e-06, "loss": 0.3979, "step": 1423 }, { "epoch": 0.09, "grad_norm": 1.4015483848228638, "learning_rate": 9.907457750033487e-06, "loss": 0.3604, "step": 1424 }, { "epoch": 0.09, "grad_norm": 2.357191981985771, "learning_rate": 9.907262604422537e-06, "loss": 0.3918, "step": 1425 }, { "epoch": 0.09, "grad_norm": 1.5894567076827006, "learning_rate": 9.907067255200543e-06, "loss": 0.404, "step": 1426 }, { "epoch": 0.09, "grad_norm": 1.9072082104415833, "learning_rate": 9.906871702375611e-06, "loss": 0.3818, "step": 1427 }, { "epoch": 0.09, "grad_norm": 3.2901148439758, "learning_rate": 9.90667594595586e-06, "loss": 0.4283, "step": 1428 }, { "epoch": 0.09, "grad_norm": 2.327012966175589, "learning_rate": 9.906479985949407e-06, "loss": 0.3966, "step": 1429 }, { "epoch": 0.09, "grad_norm": 3.459879041421819, "learning_rate": 9.906283822364384e-06, "loss": 0.4073, "step": 1430 }, { "epoch": 0.09, "grad_norm": 1.7313958299477028, "learning_rate": 9.90608745520893e-06, "loss": 0.4236, "step": 1431 }, { "epoch": 0.09, "grad_norm": 4.530695929689069, "learning_rate": 9.905890884491196e-06, "loss": 0.4107, "step": 1432 }, { "epoch": 0.09, "grad_norm": 2.4345884043627475, "learning_rate": 9.905694110219335e-06, "loss": 0.4173, "step": 1433 }, { "epoch": 0.09, "grad_norm": 3.250153863573317, "learning_rate": 9.90549713240151e-06, "loss": 0.4001, "step": 1434 }, { "epoch": 0.09, "grad_norm": 1.9437826118184052, "learning_rate": 9.905299951045897e-06, "loss": 0.4219, "step": 1435 }, { "epoch": 0.09, "grad_norm": 1.5513169384343328, "learning_rate": 9.905102566160676e-06, "loss": 0.4077, "step": 1436 }, { "epoch": 0.09, "grad_norm": 2.1322290329463036, "learning_rate": 9.904904977754038e-06, "loss": 0.4017, "step": 1437 }, { "epoch": 0.09, "grad_norm": 3.6814866941618773, "learning_rate": 9.904707185834178e-06, "loss": 0.4068, "step": 1438 }, { "epoch": 0.09, "grad_norm": 15.386708142763933, "learning_rate": 9.904509190409306e-06, "loss": 0.4049, "step": 1439 }, { "epoch": 0.09, "grad_norm": 1.9453388681624317, "learning_rate": 9.904310991487638e-06, "loss": 0.4025, "step": 1440 }, { "epoch": 0.09, "grad_norm": 1.8745260712880345, "learning_rate": 9.904112589077395e-06, "loss": 0.4031, "step": 1441 }, { "epoch": 0.09, "grad_norm": 1.5975200337702475, "learning_rate": 9.90391398318681e-06, "loss": 0.4017, "step": 1442 }, { "epoch": 0.09, "grad_norm": 2.324353289238748, "learning_rate": 9.903715173824123e-06, "loss": 0.393, "step": 1443 }, { "epoch": 0.09, "grad_norm": 3.1388241598048117, "learning_rate": 9.903516160997583e-06, "loss": 0.3989, "step": 1444 }, { "epoch": 0.09, "grad_norm": 2.059743073559714, "learning_rate": 9.903316944715449e-06, "loss": 0.3701, "step": 1445 }, { "epoch": 0.09, "grad_norm": 3.886145433535198, "learning_rate": 9.903117524985986e-06, "loss": 0.3989, "step": 1446 }, { "epoch": 0.09, "grad_norm": 0.9183789478214723, "learning_rate": 9.902917901817466e-06, "loss": 0.519, "step": 1447 }, { "epoch": 0.09, "grad_norm": 1.6682113945767112, "learning_rate": 9.902718075218176e-06, "loss": 0.4025, "step": 1448 }, { "epoch": 0.09, "grad_norm": 2.043910702886714, "learning_rate": 9.902518045196404e-06, "loss": 0.4205, "step": 1449 }, { "epoch": 0.09, "grad_norm": 1.6660949741835804, "learning_rate": 9.902317811760449e-06, "loss": 0.3856, "step": 1450 }, { "epoch": 0.09, "grad_norm": 0.6677608029610768, "learning_rate": 9.902117374918623e-06, "loss": 0.4778, "step": 1451 }, { "epoch": 0.09, "grad_norm": 2.945677472625155, "learning_rate": 9.901916734679237e-06, "loss": 0.4131, "step": 1452 }, { "epoch": 0.09, "grad_norm": 2.673107488354125, "learning_rate": 9.901715891050622e-06, "loss": 0.4297, "step": 1453 }, { "epoch": 0.09, "grad_norm": 3.023626774814282, "learning_rate": 9.901514844041107e-06, "loss": 0.3997, "step": 1454 }, { "epoch": 0.09, "grad_norm": 1.7819456139503294, "learning_rate": 9.901313593659035e-06, "loss": 0.3899, "step": 1455 }, { "epoch": 0.09, "grad_norm": 13.755152624488014, "learning_rate": 9.901112139912757e-06, "loss": 0.4234, "step": 1456 }, { "epoch": 0.09, "grad_norm": 2.3820210120304215, "learning_rate": 9.90091048281063e-06, "loss": 0.3778, "step": 1457 }, { "epoch": 0.09, "grad_norm": 3.201837689877226, "learning_rate": 9.90070862236102e-06, "loss": 0.3819, "step": 1458 }, { "epoch": 0.09, "grad_norm": 14.46930200786034, "learning_rate": 9.900506558572309e-06, "loss": 0.4427, "step": 1459 }, { "epoch": 0.09, "grad_norm": 3.4506886847437297, "learning_rate": 9.900304291452873e-06, "loss": 0.3964, "step": 1460 }, { "epoch": 0.09, "grad_norm": 2.7782056866041125, "learning_rate": 9.90010182101111e-06, "loss": 0.38, "step": 1461 }, { "epoch": 0.09, "grad_norm": 3.1578013332547443, "learning_rate": 9.899899147255418e-06, "loss": 0.3963, "step": 1462 }, { "epoch": 0.09, "grad_norm": 1.6429588857787716, "learning_rate": 9.899696270194208e-06, "loss": 0.4039, "step": 1463 }, { "epoch": 0.09, "grad_norm": 1.782755101220264, "learning_rate": 9.899493189835896e-06, "loss": 0.4021, "step": 1464 }, { "epoch": 0.09, "grad_norm": 5.805700991513922, "learning_rate": 9.899289906188909e-06, "loss": 0.3774, "step": 1465 }, { "epoch": 0.09, "grad_norm": 1.1592057386846453, "learning_rate": 9.899086419261683e-06, "loss": 0.4994, "step": 1466 }, { "epoch": 0.09, "grad_norm": 2.2501526015494115, "learning_rate": 9.89888272906266e-06, "loss": 0.4112, "step": 1467 }, { "epoch": 0.09, "grad_norm": 2.562735614642925, "learning_rate": 9.89867883560029e-06, "loss": 0.4091, "step": 1468 }, { "epoch": 0.09, "grad_norm": 1.7883783901104329, "learning_rate": 9.898474738883033e-06, "loss": 0.4354, "step": 1469 }, { "epoch": 0.09, "grad_norm": 1.86557666058772, "learning_rate": 9.898270438919359e-06, "loss": 0.4203, "step": 1470 }, { "epoch": 0.09, "grad_norm": 1.3268077301449126, "learning_rate": 9.898065935717746e-06, "loss": 0.3679, "step": 1471 }, { "epoch": 0.09, "grad_norm": 2.0982417650484964, "learning_rate": 9.897861229286676e-06, "loss": 0.3848, "step": 1472 }, { "epoch": 0.09, "grad_norm": 9.484283339522934, "learning_rate": 9.897656319634643e-06, "loss": 0.412, "step": 1473 }, { "epoch": 0.09, "grad_norm": 2.871856135984185, "learning_rate": 9.897451206770152e-06, "loss": 0.3826, "step": 1474 }, { "epoch": 0.09, "grad_norm": 0.7713368662430186, "learning_rate": 9.897245890701713e-06, "loss": 0.4779, "step": 1475 }, { "epoch": 0.09, "grad_norm": 2.1635938435196946, "learning_rate": 9.89704037143784e-06, "loss": 0.4546, "step": 1476 }, { "epoch": 0.09, "grad_norm": 1.5875702231075441, "learning_rate": 9.896834648987065e-06, "loss": 0.4266, "step": 1477 }, { "epoch": 0.09, "grad_norm": 1.968822406602112, "learning_rate": 9.896628723357923e-06, "loss": 0.3995, "step": 1478 }, { "epoch": 0.09, "grad_norm": 4.271946906731635, "learning_rate": 9.896422594558957e-06, "loss": 0.4044, "step": 1479 }, { "epoch": 0.09, "grad_norm": 1.7425755441872492, "learning_rate": 9.896216262598722e-06, "loss": 0.3874, "step": 1480 }, { "epoch": 0.09, "grad_norm": 1.951237218746135, "learning_rate": 9.896009727485778e-06, "loss": 0.4111, "step": 1481 }, { "epoch": 0.09, "grad_norm": 0.664163980488461, "learning_rate": 9.895802989228691e-06, "loss": 0.4666, "step": 1482 }, { "epoch": 0.09, "grad_norm": 2.1918348287420946, "learning_rate": 9.895596047836045e-06, "loss": 0.416, "step": 1483 }, { "epoch": 0.09, "grad_norm": 2.302188351677537, "learning_rate": 9.895388903316424e-06, "loss": 0.3995, "step": 1484 }, { "epoch": 0.09, "grad_norm": 1.4967731946965595, "learning_rate": 9.895181555678419e-06, "loss": 0.429, "step": 1485 }, { "epoch": 0.09, "grad_norm": 1.4109589221032037, "learning_rate": 9.894974004930638e-06, "loss": 0.3997, "step": 1486 }, { "epoch": 0.09, "grad_norm": 1.6575259792296093, "learning_rate": 9.894766251081691e-06, "loss": 0.424, "step": 1487 }, { "epoch": 0.09, "grad_norm": 1.621454676112057, "learning_rate": 9.894558294140199e-06, "loss": 0.4205, "step": 1488 }, { "epoch": 0.09, "grad_norm": 1.4304440388356046, "learning_rate": 9.894350134114788e-06, "loss": 0.4239, "step": 1489 }, { "epoch": 0.09, "grad_norm": 2.149051462032158, "learning_rate": 9.894141771014098e-06, "loss": 0.439, "step": 1490 }, { "epoch": 0.09, "grad_norm": 2.675273168917547, "learning_rate": 9.893933204846772e-06, "loss": 0.395, "step": 1491 }, { "epoch": 0.09, "grad_norm": 2.9996833286022, "learning_rate": 9.893724435621466e-06, "loss": 0.4184, "step": 1492 }, { "epoch": 0.09, "grad_norm": 2.554857719359423, "learning_rate": 9.893515463346841e-06, "loss": 0.3705, "step": 1493 }, { "epoch": 0.09, "grad_norm": 10.521627959182956, "learning_rate": 9.893306288031565e-06, "loss": 0.4201, "step": 1494 }, { "epoch": 0.09, "grad_norm": 16.78095549023195, "learning_rate": 9.893096909684323e-06, "loss": 0.4135, "step": 1495 }, { "epoch": 0.09, "grad_norm": 2.2207693140787756, "learning_rate": 9.892887328313796e-06, "loss": 0.4111, "step": 1496 }, { "epoch": 0.09, "grad_norm": 1.956904065585402, "learning_rate": 9.892677543928687e-06, "loss": 0.4336, "step": 1497 }, { "epoch": 0.09, "grad_norm": 9.407424917915186, "learning_rate": 9.892467556537692e-06, "loss": 0.4286, "step": 1498 }, { "epoch": 0.09, "grad_norm": 1.5868108314301557, "learning_rate": 9.89225736614953e-06, "loss": 0.384, "step": 1499 }, { "epoch": 0.09, "grad_norm": 1.7259959262127642, "learning_rate": 9.89204697277292e-06, "loss": 0.3981, "step": 1500 }, { "epoch": 0.09, "grad_norm": 2.074368901182226, "learning_rate": 9.891836376416593e-06, "loss": 0.3943, "step": 1501 }, { "epoch": 0.09, "grad_norm": 1.731554332407603, "learning_rate": 9.891625577089285e-06, "loss": 0.3714, "step": 1502 }, { "epoch": 0.09, "grad_norm": 1.6119072743723795, "learning_rate": 9.891414574799743e-06, "loss": 0.413, "step": 1503 }, { "epoch": 0.09, "grad_norm": 3.301487286293184, "learning_rate": 9.891203369556722e-06, "loss": 0.3408, "step": 1504 }, { "epoch": 0.09, "grad_norm": 1.5721496853568406, "learning_rate": 9.890991961368986e-06, "loss": 0.3876, "step": 1505 }, { "epoch": 0.09, "grad_norm": 1.4907831396596125, "learning_rate": 9.890780350245305e-06, "loss": 0.3909, "step": 1506 }, { "epoch": 0.09, "grad_norm": 0.7123762050986583, "learning_rate": 9.890568536194462e-06, "loss": 0.4781, "step": 1507 }, { "epoch": 0.09, "grad_norm": 6.167422005271638, "learning_rate": 9.890356519225244e-06, "loss": 0.3659, "step": 1508 }, { "epoch": 0.09, "grad_norm": 4.645429350292824, "learning_rate": 9.890144299346445e-06, "loss": 0.3883, "step": 1509 }, { "epoch": 0.09, "grad_norm": 2.52323903959301, "learning_rate": 9.889931876566877e-06, "loss": 0.4334, "step": 1510 }, { "epoch": 0.1, "grad_norm": 2.3712718822586965, "learning_rate": 9.889719250895347e-06, "loss": 0.4106, "step": 1511 }, { "epoch": 0.1, "grad_norm": 2.3503318056051734, "learning_rate": 9.88950642234068e-06, "loss": 0.3916, "step": 1512 }, { "epoch": 0.1, "grad_norm": 0.6745853673430179, "learning_rate": 9.889293390911708e-06, "loss": 0.4582, "step": 1513 }, { "epoch": 0.1, "grad_norm": 3.1754403974935093, "learning_rate": 9.88908015661727e-06, "loss": 0.4488, "step": 1514 }, { "epoch": 0.1, "grad_norm": 1.9143321788962888, "learning_rate": 9.88886671946621e-06, "loss": 0.4121, "step": 1515 }, { "epoch": 0.1, "grad_norm": 1.9017372274543405, "learning_rate": 9.888653079467388e-06, "loss": 0.3977, "step": 1516 }, { "epoch": 0.1, "grad_norm": 1.5638253592781546, "learning_rate": 9.888439236629665e-06, "loss": 0.4016, "step": 1517 }, { "epoch": 0.1, "grad_norm": 2.771085353001597, "learning_rate": 9.888225190961916e-06, "loss": 0.4062, "step": 1518 }, { "epoch": 0.1, "grad_norm": 0.6580835384058848, "learning_rate": 9.888010942473021e-06, "loss": 0.4446, "step": 1519 }, { "epoch": 0.1, "grad_norm": 3.725761692401521, "learning_rate": 9.887796491171871e-06, "loss": 0.3749, "step": 1520 }, { "epoch": 0.1, "grad_norm": 2.135603827933371, "learning_rate": 9.887581837067362e-06, "loss": 0.4283, "step": 1521 }, { "epoch": 0.1, "grad_norm": 2.1041515451167183, "learning_rate": 9.8873669801684e-06, "loss": 0.4041, "step": 1522 }, { "epoch": 0.1, "grad_norm": 4.1887339764795035, "learning_rate": 9.887151920483904e-06, "loss": 0.4079, "step": 1523 }, { "epoch": 0.1, "grad_norm": 3.1493065584900863, "learning_rate": 9.886936658022792e-06, "loss": 0.42, "step": 1524 }, { "epoch": 0.1, "grad_norm": 2.0796373023459873, "learning_rate": 9.886721192793998e-06, "loss": 0.4442, "step": 1525 }, { "epoch": 0.1, "grad_norm": 1.7252849265660066, "learning_rate": 9.886505524806462e-06, "loss": 0.4001, "step": 1526 }, { "epoch": 0.1, "grad_norm": 2.115412855532158, "learning_rate": 9.886289654069134e-06, "loss": 0.3988, "step": 1527 }, { "epoch": 0.1, "grad_norm": 2.0289058711966885, "learning_rate": 9.886073580590968e-06, "loss": 0.4022, "step": 1528 }, { "epoch": 0.1, "grad_norm": 3.639479006211727, "learning_rate": 9.88585730438093e-06, "loss": 0.4213, "step": 1529 }, { "epoch": 0.1, "grad_norm": 3.285274508998952, "learning_rate": 9.885640825447995e-06, "loss": 0.4253, "step": 1530 }, { "epoch": 0.1, "grad_norm": 4.332633692302388, "learning_rate": 9.885424143801144e-06, "loss": 0.426, "step": 1531 }, { "epoch": 0.1, "grad_norm": 2.067381084276767, "learning_rate": 9.885207259449367e-06, "loss": 0.3872, "step": 1532 }, { "epoch": 0.1, "grad_norm": 2.012950955899202, "learning_rate": 9.884990172401664e-06, "loss": 0.3851, "step": 1533 }, { "epoch": 0.1, "grad_norm": 2.509710170829564, "learning_rate": 9.884772882667045e-06, "loss": 0.4018, "step": 1534 }, { "epoch": 0.1, "grad_norm": 2.557531037945831, "learning_rate": 9.88455539025452e-06, "loss": 0.4018, "step": 1535 }, { "epoch": 0.1, "grad_norm": 1.8023998549226363, "learning_rate": 9.884337695173115e-06, "loss": 0.3882, "step": 1536 }, { "epoch": 0.1, "grad_norm": 1.602399167302603, "learning_rate": 9.884119797431864e-06, "loss": 0.357, "step": 1537 }, { "epoch": 0.1, "grad_norm": 1.3876713227208, "learning_rate": 9.883901697039809e-06, "loss": 0.3714, "step": 1538 }, { "epoch": 0.1, "grad_norm": 2.6313126107514955, "learning_rate": 9.883683394005997e-06, "loss": 0.4047, "step": 1539 }, { "epoch": 0.1, "grad_norm": 1.3148597499003523, "learning_rate": 9.883464888339487e-06, "loss": 0.3718, "step": 1540 }, { "epoch": 0.1, "grad_norm": 1.8969185070682832, "learning_rate": 9.883246180049345e-06, "loss": 0.3797, "step": 1541 }, { "epoch": 0.1, "grad_norm": 0.7229688177338228, "learning_rate": 9.883027269144643e-06, "loss": 0.4941, "step": 1542 }, { "epoch": 0.1, "grad_norm": 0.7406262608377298, "learning_rate": 9.882808155634469e-06, "loss": 0.4786, "step": 1543 }, { "epoch": 0.1, "grad_norm": 4.130647243420708, "learning_rate": 9.88258883952791e-06, "loss": 0.3824, "step": 1544 }, { "epoch": 0.1, "grad_norm": 3.61938707034376, "learning_rate": 9.882369320834068e-06, "loss": 0.4437, "step": 1545 }, { "epoch": 0.1, "grad_norm": 2.2788536790741447, "learning_rate": 9.882149599562052e-06, "loss": 0.4287, "step": 1546 }, { "epoch": 0.1, "grad_norm": 11.4476349201232, "learning_rate": 9.881929675720976e-06, "loss": 0.4043, "step": 1547 }, { "epoch": 0.1, "grad_norm": 2.2189743944428164, "learning_rate": 9.881709549319967e-06, "loss": 0.4272, "step": 1548 }, { "epoch": 0.1, "grad_norm": 2.4537071417017278, "learning_rate": 9.881489220368159e-06, "loss": 0.424, "step": 1549 }, { "epoch": 0.1, "grad_norm": 1.9156323739614187, "learning_rate": 9.881268688874692e-06, "loss": 0.4018, "step": 1550 }, { "epoch": 0.1, "grad_norm": 1.8939609430497777, "learning_rate": 9.881047954848716e-06, "loss": 0.4109, "step": 1551 }, { "epoch": 0.1, "grad_norm": 2.203243748382527, "learning_rate": 9.880827018299392e-06, "loss": 0.3821, "step": 1552 }, { "epoch": 0.1, "grad_norm": 1.6113795149291312, "learning_rate": 9.880605879235885e-06, "loss": 0.4107, "step": 1553 }, { "epoch": 0.1, "grad_norm": 1.55470526097955, "learning_rate": 9.880384537667371e-06, "loss": 0.3983, "step": 1554 }, { "epoch": 0.1, "grad_norm": 3.5228408730582013, "learning_rate": 9.880162993603032e-06, "loss": 0.3949, "step": 1555 }, { "epoch": 0.1, "grad_norm": 1.716271503272738, "learning_rate": 9.879941247052066e-06, "loss": 0.3875, "step": 1556 }, { "epoch": 0.1, "grad_norm": 5.048444172986198, "learning_rate": 9.879719298023669e-06, "loss": 0.3819, "step": 1557 }, { "epoch": 0.1, "grad_norm": 1.7746808164630226, "learning_rate": 9.87949714652705e-06, "loss": 0.3964, "step": 1558 }, { "epoch": 0.1, "grad_norm": 2.0100644144483955, "learning_rate": 9.879274792571427e-06, "loss": 0.4053, "step": 1559 }, { "epoch": 0.1, "grad_norm": 2.0803703390667034, "learning_rate": 9.879052236166029e-06, "loss": 0.4091, "step": 1560 }, { "epoch": 0.1, "grad_norm": 2.0160432550163137, "learning_rate": 9.878829477320085e-06, "loss": 0.3961, "step": 1561 }, { "epoch": 0.1, "grad_norm": 5.542126576885845, "learning_rate": 9.87860651604284e-06, "loss": 0.4289, "step": 1562 }, { "epoch": 0.1, "grad_norm": 1.8848961311721117, "learning_rate": 9.878383352343546e-06, "loss": 0.4093, "step": 1563 }, { "epoch": 0.1, "grad_norm": 1.6335960775558451, "learning_rate": 9.878159986231461e-06, "loss": 0.3961, "step": 1564 }, { "epoch": 0.1, "grad_norm": 1.5331882824257386, "learning_rate": 9.877936417715856e-06, "loss": 0.3945, "step": 1565 }, { "epoch": 0.1, "grad_norm": 1.216200431204067, "learning_rate": 9.877712646806003e-06, "loss": 0.39, "step": 1566 }, { "epoch": 0.1, "grad_norm": 13.218505149285058, "learning_rate": 9.87748867351119e-06, "loss": 0.3896, "step": 1567 }, { "epoch": 0.1, "grad_norm": 2.1156303943815904, "learning_rate": 9.877264497840707e-06, "loss": 0.4114, "step": 1568 }, { "epoch": 0.1, "grad_norm": 1.875347790840662, "learning_rate": 9.877040119803855e-06, "loss": 0.403, "step": 1569 }, { "epoch": 0.1, "grad_norm": 2.9594594653352964, "learning_rate": 9.87681553940995e-06, "loss": 0.4144, "step": 1570 }, { "epoch": 0.1, "grad_norm": 1.7172301747489058, "learning_rate": 9.876590756668303e-06, "loss": 0.3885, "step": 1571 }, { "epoch": 0.1, "grad_norm": 1.5643726014145547, "learning_rate": 9.876365771588246e-06, "loss": 0.3906, "step": 1572 }, { "epoch": 0.1, "grad_norm": 1.379988230516699, "learning_rate": 9.876140584179111e-06, "loss": 0.4027, "step": 1573 }, { "epoch": 0.1, "grad_norm": 1.4426545999875537, "learning_rate": 9.87591519445024e-06, "loss": 0.3911, "step": 1574 }, { "epoch": 0.1, "grad_norm": 2.4506249836419176, "learning_rate": 9.87568960241099e-06, "loss": 0.3985, "step": 1575 }, { "epoch": 0.1, "grad_norm": 1.3969053387991355, "learning_rate": 9.875463808070715e-06, "loss": 0.395, "step": 1576 }, { "epoch": 0.1, "grad_norm": 3.8140566787652515, "learning_rate": 9.87523781143879e-06, "loss": 0.3764, "step": 1577 }, { "epoch": 0.1, "grad_norm": 1.6049219044618324, "learning_rate": 9.875011612524588e-06, "loss": 0.4054, "step": 1578 }, { "epoch": 0.1, "grad_norm": 1.6594386474651648, "learning_rate": 9.874785211337495e-06, "loss": 0.4294, "step": 1579 }, { "epoch": 0.1, "grad_norm": 1.8489354266790934, "learning_rate": 9.874558607886903e-06, "loss": 0.4106, "step": 1580 }, { "epoch": 0.1, "grad_norm": 1.9675224339666788, "learning_rate": 9.874331802182218e-06, "loss": 0.4027, "step": 1581 }, { "epoch": 0.1, "grad_norm": 2.438333569282693, "learning_rate": 9.874104794232849e-06, "loss": 0.3833, "step": 1582 }, { "epoch": 0.1, "grad_norm": 11.758263080262827, "learning_rate": 9.873877584048215e-06, "loss": 0.4013, "step": 1583 }, { "epoch": 0.1, "grad_norm": 2.3092880221130323, "learning_rate": 9.873650171637742e-06, "loss": 0.4122, "step": 1584 }, { "epoch": 0.1, "grad_norm": 2.797009473110623, "learning_rate": 9.873422557010868e-06, "loss": 0.3947, "step": 1585 }, { "epoch": 0.1, "grad_norm": 33.2149911845115, "learning_rate": 9.873194740177035e-06, "loss": 0.399, "step": 1586 }, { "epoch": 0.1, "grad_norm": 1.8303575239244754, "learning_rate": 9.872966721145696e-06, "loss": 0.4253, "step": 1587 }, { "epoch": 0.1, "grad_norm": 1.6917698522229314, "learning_rate": 9.872738499926313e-06, "loss": 0.4217, "step": 1588 }, { "epoch": 0.1, "grad_norm": 1.867497128558469, "learning_rate": 9.872510076528354e-06, "loss": 0.39, "step": 1589 }, { "epoch": 0.1, "grad_norm": 1.8831302942748656, "learning_rate": 9.872281450961298e-06, "loss": 0.3866, "step": 1590 }, { "epoch": 0.1, "grad_norm": 1.6060489849243156, "learning_rate": 9.872052623234632e-06, "loss": 0.4132, "step": 1591 }, { "epoch": 0.1, "grad_norm": 2.1373426595867966, "learning_rate": 9.871823593357847e-06, "loss": 0.4262, "step": 1592 }, { "epoch": 0.1, "grad_norm": 1.4616934618411233, "learning_rate": 9.871594361340448e-06, "loss": 0.377, "step": 1593 }, { "epoch": 0.1, "grad_norm": 2.1246185516809977, "learning_rate": 9.871364927191946e-06, "loss": 0.4494, "step": 1594 }, { "epoch": 0.1, "grad_norm": 1.6667229764513893, "learning_rate": 9.87113529092186e-06, "loss": 0.381, "step": 1595 }, { "epoch": 0.1, "grad_norm": 1.9040267498341337, "learning_rate": 9.870905452539721e-06, "loss": 0.3983, "step": 1596 }, { "epoch": 0.1, "grad_norm": 1.445000136769461, "learning_rate": 9.870675412055061e-06, "loss": 0.3836, "step": 1597 }, { "epoch": 0.1, "grad_norm": 0.98771284005042, "learning_rate": 9.870445169477428e-06, "loss": 0.5325, "step": 1598 }, { "epoch": 0.1, "grad_norm": 2.22609115830435, "learning_rate": 9.870214724816373e-06, "loss": 0.3934, "step": 1599 }, { "epoch": 0.1, "grad_norm": 1.9701962859123494, "learning_rate": 9.869984078081459e-06, "loss": 0.4125, "step": 1600 }, { "epoch": 0.1, "grad_norm": 2.816912168703317, "learning_rate": 9.869753229282256e-06, "loss": 0.3964, "step": 1601 }, { "epoch": 0.1, "grad_norm": 1.8402158447295298, "learning_rate": 9.869522178428342e-06, "loss": 0.3973, "step": 1602 }, { "epoch": 0.1, "grad_norm": 1.8551094047563308, "learning_rate": 9.869290925529303e-06, "loss": 0.4264, "step": 1603 }, { "epoch": 0.1, "grad_norm": 4.053508950539129, "learning_rate": 9.869059470594734e-06, "loss": 0.3979, "step": 1604 }, { "epoch": 0.1, "grad_norm": 1.370219472752349, "learning_rate": 9.86882781363424e-06, "loss": 0.3683, "step": 1605 }, { "epoch": 0.1, "grad_norm": 2.420278752012178, "learning_rate": 9.868595954657432e-06, "loss": 0.3844, "step": 1606 }, { "epoch": 0.1, "grad_norm": 7.094662872243442, "learning_rate": 9.86836389367393e-06, "loss": 0.4059, "step": 1607 }, { "epoch": 0.1, "grad_norm": 3.17522914834035, "learning_rate": 9.868131630693363e-06, "loss": 0.3919, "step": 1608 }, { "epoch": 0.1, "grad_norm": 2.0180806861260696, "learning_rate": 9.867899165725367e-06, "loss": 0.3979, "step": 1609 }, { "epoch": 0.1, "grad_norm": 1.8407740171109623, "learning_rate": 9.867666498779589e-06, "loss": 0.3942, "step": 1610 }, { "epoch": 0.1, "grad_norm": 1.7393380060798258, "learning_rate": 9.867433629865682e-06, "loss": 0.3783, "step": 1611 }, { "epoch": 0.1, "grad_norm": 1.653835467011482, "learning_rate": 9.867200558993308e-06, "loss": 0.4261, "step": 1612 }, { "epoch": 0.1, "grad_norm": 1.3885446718149328, "learning_rate": 9.866967286172138e-06, "loss": 0.3663, "step": 1613 }, { "epoch": 0.1, "grad_norm": 4.768975395850437, "learning_rate": 9.866733811411851e-06, "loss": 0.3793, "step": 1614 }, { "epoch": 0.1, "grad_norm": 1.6931236979606916, "learning_rate": 9.866500134722135e-06, "loss": 0.3825, "step": 1615 }, { "epoch": 0.1, "grad_norm": 1.6536041558537826, "learning_rate": 9.866266256112683e-06, "loss": 0.3672, "step": 1616 }, { "epoch": 0.1, "grad_norm": 2.2573399623354495, "learning_rate": 9.8660321755932e-06, "loss": 0.4195, "step": 1617 }, { "epoch": 0.1, "grad_norm": 1.4421460492372233, "learning_rate": 9.865797893173398e-06, "loss": 0.3736, "step": 1618 }, { "epoch": 0.1, "grad_norm": 2.634157856818414, "learning_rate": 9.865563408863001e-06, "loss": 0.439, "step": 1619 }, { "epoch": 0.1, "grad_norm": 0.8969091112606098, "learning_rate": 9.865328722671736e-06, "loss": 0.4869, "step": 1620 }, { "epoch": 0.1, "grad_norm": 3.780938074845408, "learning_rate": 9.86509383460934e-06, "loss": 0.3823, "step": 1621 }, { "epoch": 0.1, "grad_norm": 3.2094474894144644, "learning_rate": 9.86485874468556e-06, "loss": 0.4181, "step": 1622 }, { "epoch": 0.1, "grad_norm": 1.9213414327350735, "learning_rate": 9.864623452910147e-06, "loss": 0.3897, "step": 1623 }, { "epoch": 0.1, "grad_norm": 1.595731521198233, "learning_rate": 9.86438795929287e-06, "loss": 0.4508, "step": 1624 }, { "epoch": 0.1, "grad_norm": 1.2749829925861649, "learning_rate": 9.864152263843494e-06, "loss": 0.3683, "step": 1625 }, { "epoch": 0.1, "grad_norm": 1.931634651898662, "learning_rate": 9.863916366571801e-06, "loss": 0.3681, "step": 1626 }, { "epoch": 0.1, "grad_norm": 2.047504369413371, "learning_rate": 9.863680267487579e-06, "loss": 0.3851, "step": 1627 }, { "epoch": 0.1, "grad_norm": 1.562617848727528, "learning_rate": 9.863443966600625e-06, "loss": 0.43, "step": 1628 }, { "epoch": 0.1, "grad_norm": 2.3665948079790065, "learning_rate": 9.863207463920741e-06, "loss": 0.4091, "step": 1629 }, { "epoch": 0.1, "grad_norm": 1.9577528694811124, "learning_rate": 9.862970759457741e-06, "loss": 0.3883, "step": 1630 }, { "epoch": 0.1, "grad_norm": 1.7101952810443333, "learning_rate": 9.86273385322145e-06, "loss": 0.3971, "step": 1631 }, { "epoch": 0.1, "grad_norm": 1.8747489452894384, "learning_rate": 9.862496745221691e-06, "loss": 0.3899, "step": 1632 }, { "epoch": 0.1, "grad_norm": 4.399927821968412, "learning_rate": 9.862259435468305e-06, "loss": 0.4037, "step": 1633 }, { "epoch": 0.1, "grad_norm": 3.434823447356248, "learning_rate": 9.862021923971139e-06, "loss": 0.4224, "step": 1634 }, { "epoch": 0.1, "grad_norm": 1.7375833342698581, "learning_rate": 9.861784210740048e-06, "loss": 0.3817, "step": 1635 }, { "epoch": 0.1, "grad_norm": 1.8843841630356726, "learning_rate": 9.861546295784896e-06, "loss": 0.3869, "step": 1636 }, { "epoch": 0.1, "grad_norm": 1.8641709311826786, "learning_rate": 9.86130817911555e-06, "loss": 0.3636, "step": 1637 }, { "epoch": 0.1, "grad_norm": 2.42880075483143, "learning_rate": 9.861069860741896e-06, "loss": 0.4223, "step": 1638 }, { "epoch": 0.1, "grad_norm": 1.5093568068595866, "learning_rate": 9.860831340673818e-06, "loss": 0.3893, "step": 1639 }, { "epoch": 0.1, "grad_norm": 1.479179518708439, "learning_rate": 9.860592618921213e-06, "loss": 0.3596, "step": 1640 }, { "epoch": 0.1, "grad_norm": 1.8754934456139913, "learning_rate": 9.860353695493987e-06, "loss": 0.411, "step": 1641 }, { "epoch": 0.1, "grad_norm": 2.0307806127913177, "learning_rate": 9.860114570402055e-06, "loss": 0.4007, "step": 1642 }, { "epoch": 0.1, "grad_norm": 0.9901093263597351, "learning_rate": 9.859875243655336e-06, "loss": 0.5332, "step": 1643 }, { "epoch": 0.1, "grad_norm": 1.6527672581452526, "learning_rate": 9.85963571526376e-06, "loss": 0.4029, "step": 1644 }, { "epoch": 0.1, "grad_norm": 1.9962457409802663, "learning_rate": 9.859395985237268e-06, "loss": 0.4079, "step": 1645 }, { "epoch": 0.1, "grad_norm": 1.8445980379975375, "learning_rate": 9.859156053585805e-06, "loss": 0.4168, "step": 1646 }, { "epoch": 0.1, "grad_norm": 1.6673165701862798, "learning_rate": 9.858915920319325e-06, "loss": 0.3907, "step": 1647 }, { "epoch": 0.1, "grad_norm": 2.1450276935779082, "learning_rate": 9.858675585447795e-06, "loss": 0.3825, "step": 1648 }, { "epoch": 0.1, "grad_norm": 1.9224143513215968, "learning_rate": 9.858435048981184e-06, "loss": 0.4104, "step": 1649 }, { "epoch": 0.1, "grad_norm": 11.099476366730014, "learning_rate": 9.858194310929474e-06, "loss": 0.4048, "step": 1650 }, { "epoch": 0.1, "grad_norm": 1.7980374577923077, "learning_rate": 9.857953371302651e-06, "loss": 0.3801, "step": 1651 }, { "epoch": 0.1, "grad_norm": 1.4202534694887032, "learning_rate": 9.857712230110717e-06, "loss": 0.37, "step": 1652 }, { "epoch": 0.1, "grad_norm": 1.7026733049341634, "learning_rate": 9.857470887363672e-06, "loss": 0.3849, "step": 1653 }, { "epoch": 0.1, "grad_norm": 1.9840497547579354, "learning_rate": 9.857229343071532e-06, "loss": 0.4376, "step": 1654 }, { "epoch": 0.1, "grad_norm": 2.3932046299332677, "learning_rate": 9.85698759724432e-06, "loss": 0.3797, "step": 1655 }, { "epoch": 0.1, "grad_norm": 1.6382322967133962, "learning_rate": 9.856745649892066e-06, "loss": 0.3596, "step": 1656 }, { "epoch": 0.1, "grad_norm": 3.638171228039363, "learning_rate": 9.856503501024807e-06, "loss": 0.4371, "step": 1657 }, { "epoch": 0.1, "grad_norm": 2.0245138275986685, "learning_rate": 9.856261150652593e-06, "loss": 0.3624, "step": 1658 }, { "epoch": 0.1, "grad_norm": 1.5752020038104408, "learning_rate": 9.856018598785477e-06, "loss": 0.4202, "step": 1659 }, { "epoch": 0.1, "grad_norm": 1.6923476571685003, "learning_rate": 9.855775845433527e-06, "loss": 0.4174, "step": 1660 }, { "epoch": 0.1, "grad_norm": 1.9751350900252784, "learning_rate": 9.855532890606809e-06, "loss": 0.4053, "step": 1661 }, { "epoch": 0.1, "grad_norm": 1.6875648058751105, "learning_rate": 9.855289734315407e-06, "loss": 0.4246, "step": 1662 }, { "epoch": 0.1, "grad_norm": 1.7199870567275772, "learning_rate": 9.855046376569412e-06, "loss": 0.3807, "step": 1663 }, { "epoch": 0.1, "grad_norm": 2.969451586330859, "learning_rate": 9.854802817378918e-06, "loss": 0.3796, "step": 1664 }, { "epoch": 0.1, "grad_norm": 2.6592376380297793, "learning_rate": 9.854559056754031e-06, "loss": 0.4015, "step": 1665 }, { "epoch": 0.1, "grad_norm": 1.5959033763493164, "learning_rate": 9.85431509470487e-06, "loss": 0.3982, "step": 1666 }, { "epoch": 0.1, "grad_norm": 1.7046308592515858, "learning_rate": 9.85407093124155e-06, "loss": 0.3678, "step": 1667 }, { "epoch": 0.1, "grad_norm": 2.102849780940891, "learning_rate": 9.853826566374206e-06, "loss": 0.4168, "step": 1668 }, { "epoch": 0.1, "grad_norm": 1.734985635647761, "learning_rate": 9.853582000112976e-06, "loss": 0.3746, "step": 1669 }, { "epoch": 0.11, "grad_norm": 1.045023648234381, "learning_rate": 9.853337232468008e-06, "loss": 0.5443, "step": 1670 }, { "epoch": 0.11, "grad_norm": 2.4610084570737127, "learning_rate": 9.85309226344946e-06, "loss": 0.3926, "step": 1671 }, { "epoch": 0.11, "grad_norm": 1.8481416710293914, "learning_rate": 9.85284709306749e-06, "loss": 0.4055, "step": 1672 }, { "epoch": 0.11, "grad_norm": 5.965990410125677, "learning_rate": 9.852601721332278e-06, "loss": 0.3732, "step": 1673 }, { "epoch": 0.11, "grad_norm": 11.52394801893013, "learning_rate": 9.852356148253999e-06, "loss": 0.4066, "step": 1674 }, { "epoch": 0.11, "grad_norm": 5.366918402226468, "learning_rate": 9.852110373842846e-06, "loss": 0.3858, "step": 1675 }, { "epoch": 0.11, "grad_norm": 1.9034868674091014, "learning_rate": 9.851864398109015e-06, "loss": 0.4044, "step": 1676 }, { "epoch": 0.11, "grad_norm": 1.471418975171063, "learning_rate": 9.851618221062712e-06, "loss": 0.3832, "step": 1677 }, { "epoch": 0.11, "grad_norm": 1.7960215718083363, "learning_rate": 9.851371842714152e-06, "loss": 0.3868, "step": 1678 }, { "epoch": 0.11, "grad_norm": 2.1817685990868085, "learning_rate": 9.851125263073556e-06, "loss": 0.41, "step": 1679 }, { "epoch": 0.11, "grad_norm": 1.971487075980344, "learning_rate": 9.850878482151158e-06, "loss": 0.4223, "step": 1680 }, { "epoch": 0.11, "grad_norm": 1.410151505915163, "learning_rate": 9.850631499957196e-06, "loss": 0.3754, "step": 1681 }, { "epoch": 0.11, "grad_norm": 2.067354857684912, "learning_rate": 9.850384316501915e-06, "loss": 0.3854, "step": 1682 }, { "epoch": 0.11, "grad_norm": 1.8751145476407016, "learning_rate": 9.850136931795576e-06, "loss": 0.3807, "step": 1683 }, { "epoch": 0.11, "grad_norm": 1.7303156103006214, "learning_rate": 9.849889345848438e-06, "loss": 0.4143, "step": 1684 }, { "epoch": 0.11, "grad_norm": 1.4127764967959593, "learning_rate": 9.849641558670779e-06, "loss": 0.3912, "step": 1685 }, { "epoch": 0.11, "grad_norm": 1.9093287900042089, "learning_rate": 9.849393570272875e-06, "loss": 0.4027, "step": 1686 }, { "epoch": 0.11, "grad_norm": 1.5926673449856141, "learning_rate": 9.849145380665021e-06, "loss": 0.4059, "step": 1687 }, { "epoch": 0.11, "grad_norm": 2.00550667338651, "learning_rate": 9.848896989857512e-06, "loss": 0.3613, "step": 1688 }, { "epoch": 0.11, "grad_norm": 1.7627412396817441, "learning_rate": 9.848648397860654e-06, "loss": 0.3947, "step": 1689 }, { "epoch": 0.11, "grad_norm": 0.8667088767380863, "learning_rate": 9.848399604684762e-06, "loss": 0.486, "step": 1690 }, { "epoch": 0.11, "grad_norm": 3.3910336632697318, "learning_rate": 9.848150610340158e-06, "loss": 0.3763, "step": 1691 }, { "epoch": 0.11, "grad_norm": 1.9004403833278962, "learning_rate": 9.847901414837173e-06, "loss": 0.4266, "step": 1692 }, { "epoch": 0.11, "grad_norm": 2.020778797956488, "learning_rate": 9.847652018186149e-06, "loss": 0.3843, "step": 1693 }, { "epoch": 0.11, "grad_norm": 1.7097985113804344, "learning_rate": 9.847402420397431e-06, "loss": 0.4501, "step": 1694 }, { "epoch": 0.11, "grad_norm": 2.795067892387273, "learning_rate": 9.847152621481378e-06, "loss": 0.3937, "step": 1695 }, { "epoch": 0.11, "grad_norm": 2.137775892968101, "learning_rate": 9.846902621448354e-06, "loss": 0.4002, "step": 1696 }, { "epoch": 0.11, "grad_norm": 3.2106505072535425, "learning_rate": 9.846652420308728e-06, "loss": 0.4096, "step": 1697 }, { "epoch": 0.11, "grad_norm": 2.252113147118114, "learning_rate": 9.846402018072888e-06, "loss": 0.4264, "step": 1698 }, { "epoch": 0.11, "grad_norm": 1.8598361009513775, "learning_rate": 9.846151414751217e-06, "loss": 0.3912, "step": 1699 }, { "epoch": 0.11, "grad_norm": 0.6481025561732026, "learning_rate": 9.845900610354117e-06, "loss": 0.4814, "step": 1700 }, { "epoch": 0.11, "grad_norm": 1.8954598861269756, "learning_rate": 9.845649604891996e-06, "loss": 0.3972, "step": 1701 }, { "epoch": 0.11, "grad_norm": 1.6747124277930796, "learning_rate": 9.845398398375264e-06, "loss": 0.386, "step": 1702 }, { "epoch": 0.11, "grad_norm": 1.9833024773849135, "learning_rate": 9.845146990814345e-06, "loss": 0.3548, "step": 1703 }, { "epoch": 0.11, "grad_norm": 2.7982434928052577, "learning_rate": 9.844895382219673e-06, "loss": 0.3917, "step": 1704 }, { "epoch": 0.11, "grad_norm": 2.620190892161271, "learning_rate": 9.844643572601685e-06, "loss": 0.4271, "step": 1705 }, { "epoch": 0.11, "grad_norm": 1.9066001799296057, "learning_rate": 9.844391561970831e-06, "loss": 0.379, "step": 1706 }, { "epoch": 0.11, "grad_norm": 2.0798176899619896, "learning_rate": 9.844139350337566e-06, "loss": 0.3768, "step": 1707 }, { "epoch": 0.11, "grad_norm": 4.166746859557011, "learning_rate": 9.843886937712355e-06, "loss": 0.3948, "step": 1708 }, { "epoch": 0.11, "grad_norm": 2.285812184538659, "learning_rate": 9.843634324105671e-06, "loss": 0.4038, "step": 1709 }, { "epoch": 0.11, "grad_norm": 2.289329268104687, "learning_rate": 9.843381509527997e-06, "loss": 0.3844, "step": 1710 }, { "epoch": 0.11, "grad_norm": 1.8602726742533615, "learning_rate": 9.84312849398982e-06, "loss": 0.3818, "step": 1711 }, { "epoch": 0.11, "grad_norm": 0.6256248754386069, "learning_rate": 9.84287527750164e-06, "loss": 0.4762, "step": 1712 }, { "epoch": 0.11, "grad_norm": 1.8693918376721483, "learning_rate": 9.842621860073963e-06, "loss": 0.3712, "step": 1713 }, { "epoch": 0.11, "grad_norm": 3.3307457721439757, "learning_rate": 9.842368241717304e-06, "loss": 0.3741, "step": 1714 }, { "epoch": 0.11, "grad_norm": 2.146545990434858, "learning_rate": 9.842114422442184e-06, "loss": 0.3798, "step": 1715 }, { "epoch": 0.11, "grad_norm": 0.6478512616698305, "learning_rate": 9.841860402259139e-06, "loss": 0.4784, "step": 1716 }, { "epoch": 0.11, "grad_norm": 2.7282611243319868, "learning_rate": 9.841606181178703e-06, "loss": 0.4248, "step": 1717 }, { "epoch": 0.11, "grad_norm": 2.2365112124158246, "learning_rate": 9.841351759211426e-06, "loss": 0.4063, "step": 1718 }, { "epoch": 0.11, "grad_norm": 1.9877658777221152, "learning_rate": 9.841097136367868e-06, "loss": 0.3733, "step": 1719 }, { "epoch": 0.11, "grad_norm": 1.70774960444382, "learning_rate": 9.84084231265859e-06, "loss": 0.3845, "step": 1720 }, { "epoch": 0.11, "grad_norm": 1.967471005270002, "learning_rate": 9.840587288094165e-06, "loss": 0.3592, "step": 1721 }, { "epoch": 0.11, "grad_norm": 4.7789218726089935, "learning_rate": 9.840332062685179e-06, "loss": 0.3853, "step": 1722 }, { "epoch": 0.11, "grad_norm": 3.882678991935357, "learning_rate": 9.840076636442215e-06, "loss": 0.4165, "step": 1723 }, { "epoch": 0.11, "grad_norm": 8.273843033979599, "learning_rate": 9.839821009375876e-06, "loss": 0.3865, "step": 1724 }, { "epoch": 0.11, "grad_norm": 2.138827529030236, "learning_rate": 9.839565181496766e-06, "loss": 0.4044, "step": 1725 }, { "epoch": 0.11, "grad_norm": 1.5108712898976502, "learning_rate": 9.839309152815501e-06, "loss": 0.4047, "step": 1726 }, { "epoch": 0.11, "grad_norm": 2.570049755105157, "learning_rate": 9.839052923342704e-06, "loss": 0.3974, "step": 1727 }, { "epoch": 0.11, "grad_norm": 2.2654150722194646, "learning_rate": 9.838796493089004e-06, "loss": 0.4003, "step": 1728 }, { "epoch": 0.11, "grad_norm": 2.524340356891728, "learning_rate": 9.838539862065047e-06, "loss": 0.3893, "step": 1729 }, { "epoch": 0.11, "grad_norm": 0.7232827143685543, "learning_rate": 9.838283030281472e-06, "loss": 0.4895, "step": 1730 }, { "epoch": 0.11, "grad_norm": 1.608871570310679, "learning_rate": 9.838025997748943e-06, "loss": 0.3789, "step": 1731 }, { "epoch": 0.11, "grad_norm": 2.332670753315059, "learning_rate": 9.837768764478121e-06, "loss": 0.3698, "step": 1732 }, { "epoch": 0.11, "grad_norm": 1.937897982725912, "learning_rate": 9.837511330479683e-06, "loss": 0.4052, "step": 1733 }, { "epoch": 0.11, "grad_norm": 2.0249010151356908, "learning_rate": 9.837253695764304e-06, "loss": 0.3733, "step": 1734 }, { "epoch": 0.11, "grad_norm": 2.201770709371206, "learning_rate": 9.83699586034268e-06, "loss": 0.3763, "step": 1735 }, { "epoch": 0.11, "grad_norm": 0.6129306947845914, "learning_rate": 9.836737824225504e-06, "loss": 0.4478, "step": 1736 }, { "epoch": 0.11, "grad_norm": 2.872094004617166, "learning_rate": 9.836479587423487e-06, "loss": 0.4344, "step": 1737 }, { "epoch": 0.11, "grad_norm": 3.1390342220198892, "learning_rate": 9.836221149947339e-06, "loss": 0.3946, "step": 1738 }, { "epoch": 0.11, "grad_norm": 1.943947248325319, "learning_rate": 9.835962511807786e-06, "loss": 0.3732, "step": 1739 }, { "epoch": 0.11, "grad_norm": 1.724561629835258, "learning_rate": 9.835703673015559e-06, "loss": 0.3871, "step": 1740 }, { "epoch": 0.11, "grad_norm": 5.4942667460140875, "learning_rate": 9.835444633581398e-06, "loss": 0.3933, "step": 1741 }, { "epoch": 0.11, "grad_norm": 2.564816540790656, "learning_rate": 9.83518539351605e-06, "loss": 0.4036, "step": 1742 }, { "epoch": 0.11, "grad_norm": 2.6389411447725055, "learning_rate": 9.834925952830272e-06, "loss": 0.3921, "step": 1743 }, { "epoch": 0.11, "grad_norm": 6.692429649224205, "learning_rate": 9.834666311534828e-06, "loss": 0.3958, "step": 1744 }, { "epoch": 0.11, "grad_norm": 1.749031893229669, "learning_rate": 9.834406469640492e-06, "loss": 0.3612, "step": 1745 }, { "epoch": 0.11, "grad_norm": 1.7740671276584536, "learning_rate": 9.834146427158043e-06, "loss": 0.4254, "step": 1746 }, { "epoch": 0.11, "grad_norm": 0.7175704351884112, "learning_rate": 9.833886184098273e-06, "loss": 0.4693, "step": 1747 }, { "epoch": 0.11, "grad_norm": 1.7262648669789762, "learning_rate": 9.833625740471981e-06, "loss": 0.3786, "step": 1748 }, { "epoch": 0.11, "grad_norm": 1.563889080426523, "learning_rate": 9.833365096289971e-06, "loss": 0.3518, "step": 1749 }, { "epoch": 0.11, "grad_norm": 3.300867437494952, "learning_rate": 9.833104251563058e-06, "loss": 0.411, "step": 1750 }, { "epoch": 0.11, "grad_norm": 7.5285836738716725, "learning_rate": 9.832843206302063e-06, "loss": 0.373, "step": 1751 }, { "epoch": 0.11, "grad_norm": 1.806762271498663, "learning_rate": 9.832581960517821e-06, "loss": 0.3953, "step": 1752 }, { "epoch": 0.11, "grad_norm": 3.281968199466739, "learning_rate": 9.83232051422117e-06, "loss": 0.359, "step": 1753 }, { "epoch": 0.11, "grad_norm": 1.617781802791413, "learning_rate": 9.832058867422959e-06, "loss": 0.3946, "step": 1754 }, { "epoch": 0.11, "grad_norm": 2.94586607218607, "learning_rate": 9.831797020134039e-06, "loss": 0.4012, "step": 1755 }, { "epoch": 0.11, "grad_norm": 2.3379790367971496, "learning_rate": 9.831534972365282e-06, "loss": 0.3921, "step": 1756 }, { "epoch": 0.11, "grad_norm": 1.5255728322019193, "learning_rate": 9.831272724127555e-06, "loss": 0.3752, "step": 1757 }, { "epoch": 0.11, "grad_norm": 2.2969952038498853, "learning_rate": 9.831010275431743e-06, "loss": 0.3881, "step": 1758 }, { "epoch": 0.11, "grad_norm": 0.9045826114081688, "learning_rate": 9.830747626288732e-06, "loss": 0.4727, "step": 1759 }, { "epoch": 0.11, "grad_norm": 1.9483183425740722, "learning_rate": 9.830484776709424e-06, "loss": 0.3546, "step": 1760 }, { "epoch": 0.11, "grad_norm": 2.637340387906478, "learning_rate": 9.830221726704721e-06, "loss": 0.3681, "step": 1761 }, { "epoch": 0.11, "grad_norm": 2.1048434018018094, "learning_rate": 9.82995847628554e-06, "loss": 0.3739, "step": 1762 }, { "epoch": 0.11, "grad_norm": 2.3747090243167563, "learning_rate": 9.829695025462803e-06, "loss": 0.3902, "step": 1763 }, { "epoch": 0.11, "grad_norm": 1.6126801855514128, "learning_rate": 9.82943137424744e-06, "loss": 0.3728, "step": 1764 }, { "epoch": 0.11, "grad_norm": 1.7642456284124133, "learning_rate": 9.82916752265039e-06, "loss": 0.3927, "step": 1765 }, { "epoch": 0.11, "grad_norm": 11.211968680808807, "learning_rate": 9.828903470682604e-06, "loss": 0.3728, "step": 1766 }, { "epoch": 0.11, "grad_norm": 4.0141784081619045, "learning_rate": 9.828639218355036e-06, "loss": 0.3732, "step": 1767 }, { "epoch": 0.11, "grad_norm": 2.972918918329857, "learning_rate": 9.82837476567865e-06, "loss": 0.3737, "step": 1768 }, { "epoch": 0.11, "grad_norm": 2.4618233814111172, "learning_rate": 9.828110112664417e-06, "loss": 0.3696, "step": 1769 }, { "epoch": 0.11, "grad_norm": 1.7775168603784308, "learning_rate": 9.82784525932332e-06, "loss": 0.3962, "step": 1770 }, { "epoch": 0.11, "grad_norm": 2.6929858049923627, "learning_rate": 9.82758020566635e-06, "loss": 0.3979, "step": 1771 }, { "epoch": 0.11, "grad_norm": 1.6325018358882113, "learning_rate": 9.827314951704501e-06, "loss": 0.3647, "step": 1772 }, { "epoch": 0.11, "grad_norm": 1.457398218064355, "learning_rate": 9.827049497448782e-06, "loss": 0.3767, "step": 1773 }, { "epoch": 0.11, "grad_norm": 3.421800879401619, "learning_rate": 9.826783842910203e-06, "loss": 0.3973, "step": 1774 }, { "epoch": 0.11, "grad_norm": 2.3614445207751285, "learning_rate": 9.826517988099793e-06, "loss": 0.3866, "step": 1775 }, { "epoch": 0.11, "grad_norm": 2.1527589195533974, "learning_rate": 9.826251933028574e-06, "loss": 0.3845, "step": 1776 }, { "epoch": 0.11, "grad_norm": 2.9785993283026837, "learning_rate": 9.825985677707593e-06, "loss": 0.3833, "step": 1777 }, { "epoch": 0.11, "grad_norm": 2.5470676531409917, "learning_rate": 9.825719222147894e-06, "loss": 0.3563, "step": 1778 }, { "epoch": 0.11, "grad_norm": 1.671811416386511, "learning_rate": 9.825452566360533e-06, "loss": 0.5295, "step": 1779 }, { "epoch": 0.11, "grad_norm": 1.9049135842674518, "learning_rate": 9.825185710356573e-06, "loss": 0.3834, "step": 1780 }, { "epoch": 0.11, "grad_norm": 1.9408021248551481, "learning_rate": 9.824918654147088e-06, "loss": 0.3884, "step": 1781 }, { "epoch": 0.11, "grad_norm": 5.364538884082852, "learning_rate": 9.824651397743159e-06, "loss": 0.3815, "step": 1782 }, { "epoch": 0.11, "grad_norm": 2.100834262112139, "learning_rate": 9.824383941155872e-06, "loss": 0.3906, "step": 1783 }, { "epoch": 0.11, "grad_norm": 3.679919936795614, "learning_rate": 9.824116284396328e-06, "loss": 0.3599, "step": 1784 }, { "epoch": 0.11, "grad_norm": 3.890702532650259, "learning_rate": 9.82384842747563e-06, "loss": 0.4019, "step": 1785 }, { "epoch": 0.11, "grad_norm": 2.3214655606383343, "learning_rate": 9.823580370404893e-06, "loss": 0.4022, "step": 1786 }, { "epoch": 0.11, "grad_norm": 2.200897581694426, "learning_rate": 9.823312113195238e-06, "loss": 0.3882, "step": 1787 }, { "epoch": 0.11, "grad_norm": 2.072186629286691, "learning_rate": 9.823043655857796e-06, "loss": 0.3611, "step": 1788 }, { "epoch": 0.11, "grad_norm": 1.6297660956814268, "learning_rate": 9.822774998403707e-06, "loss": 0.3635, "step": 1789 }, { "epoch": 0.11, "grad_norm": 2.292905578531731, "learning_rate": 9.822506140844118e-06, "loss": 0.3811, "step": 1790 }, { "epoch": 0.11, "grad_norm": 1.7087918382651415, "learning_rate": 9.82223708319018e-06, "loss": 0.37, "step": 1791 }, { "epoch": 0.11, "grad_norm": 2.146685421380077, "learning_rate": 9.821967825453063e-06, "loss": 0.38, "step": 1792 }, { "epoch": 0.11, "grad_norm": 2.2668390265253864, "learning_rate": 9.821698367643936e-06, "loss": 0.3637, "step": 1793 }, { "epoch": 0.11, "grad_norm": 1.5321677385444092, "learning_rate": 9.82142870977398e-06, "loss": 0.3579, "step": 1794 }, { "epoch": 0.11, "grad_norm": 2.4785359192400516, "learning_rate": 9.821158851854381e-06, "loss": 0.3679, "step": 1795 }, { "epoch": 0.11, "grad_norm": 2.7017927566259976, "learning_rate": 9.82088879389634e-06, "loss": 0.3792, "step": 1796 }, { "epoch": 0.11, "grad_norm": 5.231011743947335, "learning_rate": 9.820618535911057e-06, "loss": 0.3807, "step": 1797 }, { "epoch": 0.11, "grad_norm": 5.535052581022571, "learning_rate": 9.820348077909751e-06, "loss": 0.3752, "step": 1798 }, { "epoch": 0.11, "grad_norm": 2.2642552869800614, "learning_rate": 9.820077419903642e-06, "loss": 0.3643, "step": 1799 }, { "epoch": 0.11, "grad_norm": 1.856920118871183, "learning_rate": 9.819806561903958e-06, "loss": 0.3782, "step": 1800 }, { "epoch": 0.11, "grad_norm": 1.4157727619762503, "learning_rate": 9.819535503921939e-06, "loss": 0.3795, "step": 1801 }, { "epoch": 0.11, "grad_norm": 1.6028140208829351, "learning_rate": 9.819264245968831e-06, "loss": 0.36, "step": 1802 }, { "epoch": 0.11, "grad_norm": 2.1175706556514085, "learning_rate": 9.81899278805589e-06, "loss": 0.379, "step": 1803 }, { "epoch": 0.11, "grad_norm": 1.7873196904004678, "learning_rate": 9.81872113019438e-06, "loss": 0.3611, "step": 1804 }, { "epoch": 0.11, "grad_norm": 3.47258866737647, "learning_rate": 9.818449272395569e-06, "loss": 0.4067, "step": 1805 }, { "epoch": 0.11, "grad_norm": 1.6120758077434205, "learning_rate": 9.818177214670742e-06, "loss": 0.3958, "step": 1806 }, { "epoch": 0.11, "grad_norm": 2.1997375866342472, "learning_rate": 9.81790495703118e-06, "loss": 0.3687, "step": 1807 }, { "epoch": 0.11, "grad_norm": 1.8691936911868914, "learning_rate": 9.817632499488188e-06, "loss": 0.3963, "step": 1808 }, { "epoch": 0.11, "grad_norm": 2.93149599238276, "learning_rate": 9.817359842053064e-06, "loss": 0.3514, "step": 1809 }, { "epoch": 0.11, "grad_norm": 2.8647426600058616, "learning_rate": 9.817086984737126e-06, "loss": 0.378, "step": 1810 }, { "epoch": 0.11, "grad_norm": 1.7732346592905521, "learning_rate": 9.81681392755169e-06, "loss": 0.3994, "step": 1811 }, { "epoch": 0.11, "grad_norm": 0.7418135546211012, "learning_rate": 9.81654067050809e-06, "loss": 0.491, "step": 1812 }, { "epoch": 0.11, "grad_norm": 1.9501015746712647, "learning_rate": 9.816267213617663e-06, "loss": 0.3689, "step": 1813 }, { "epoch": 0.11, "grad_norm": 2.1195063900736173, "learning_rate": 9.815993556891753e-06, "loss": 0.3815, "step": 1814 }, { "epoch": 0.11, "grad_norm": 2.2836222840638096, "learning_rate": 9.815719700341717e-06, "loss": 0.3979, "step": 1815 }, { "epoch": 0.11, "grad_norm": 1.610703393567781, "learning_rate": 9.815445643978918e-06, "loss": 0.3927, "step": 1816 }, { "epoch": 0.11, "grad_norm": 2.370038951133397, "learning_rate": 9.815171387814726e-06, "loss": 0.411, "step": 1817 }, { "epoch": 0.11, "grad_norm": 2.3157353744370717, "learning_rate": 9.814896931860518e-06, "loss": 0.3883, "step": 1818 }, { "epoch": 0.11, "grad_norm": 1.9855552743737201, "learning_rate": 9.814622276127685e-06, "loss": 0.3855, "step": 1819 }, { "epoch": 0.11, "grad_norm": 2.4350674817463283, "learning_rate": 9.814347420627624e-06, "loss": 0.3797, "step": 1820 }, { "epoch": 0.11, "grad_norm": 2.1328802884017537, "learning_rate": 9.814072365371736e-06, "loss": 0.4107, "step": 1821 }, { "epoch": 0.11, "grad_norm": 1.8727191776776886, "learning_rate": 9.813797110371435e-06, "loss": 0.4059, "step": 1822 }, { "epoch": 0.11, "grad_norm": 2.4758822076628575, "learning_rate": 9.81352165563814e-06, "loss": 0.3964, "step": 1823 }, { "epoch": 0.11, "grad_norm": 1.7728905932904224, "learning_rate": 9.813246001183283e-06, "loss": 0.3744, "step": 1824 }, { "epoch": 0.11, "grad_norm": 3.3548693281117927, "learning_rate": 9.812970147018301e-06, "loss": 0.387, "step": 1825 }, { "epoch": 0.11, "grad_norm": 0.7098037795919612, "learning_rate": 9.812694093154637e-06, "loss": 0.4919, "step": 1826 }, { "epoch": 0.11, "grad_norm": 1.8816550590224619, "learning_rate": 9.812417839603748e-06, "loss": 0.3631, "step": 1827 }, { "epoch": 0.11, "grad_norm": 2.3025814611521094, "learning_rate": 9.812141386377095e-06, "loss": 0.3602, "step": 1828 }, { "epoch": 0.12, "grad_norm": 3.8788408297975687, "learning_rate": 9.811864733486148e-06, "loss": 0.3771, "step": 1829 }, { "epoch": 0.12, "grad_norm": 1.3724140881944669, "learning_rate": 9.811587880942387e-06, "loss": 0.3961, "step": 1830 }, { "epoch": 0.12, "grad_norm": 2.4392367378303383, "learning_rate": 9.8113108287573e-06, "loss": 0.3919, "step": 1831 }, { "epoch": 0.12, "grad_norm": 1.6599125668086123, "learning_rate": 9.811033576942377e-06, "loss": 0.3523, "step": 1832 }, { "epoch": 0.12, "grad_norm": 3.176707774711695, "learning_rate": 9.81075612550913e-06, "loss": 0.3869, "step": 1833 }, { "epoch": 0.12, "grad_norm": 2.0351763040173565, "learning_rate": 9.810478474469063e-06, "loss": 0.3822, "step": 1834 }, { "epoch": 0.12, "grad_norm": 1.6228436799116237, "learning_rate": 9.8102006238337e-06, "loss": 0.3755, "step": 1835 }, { "epoch": 0.12, "grad_norm": 1.5019206471209272, "learning_rate": 9.80992257361457e-06, "loss": 0.3835, "step": 1836 }, { "epoch": 0.12, "grad_norm": 2.0701113391645256, "learning_rate": 9.809644323823208e-06, "loss": 0.3685, "step": 1837 }, { "epoch": 0.12, "grad_norm": 1.8122415268301864, "learning_rate": 9.809365874471162e-06, "loss": 0.3677, "step": 1838 }, { "epoch": 0.12, "grad_norm": 3.392152207416042, "learning_rate": 9.809087225569982e-06, "loss": 0.3762, "step": 1839 }, { "epoch": 0.12, "grad_norm": 3.973822000004366, "learning_rate": 9.808808377131232e-06, "loss": 0.3985, "step": 1840 }, { "epoch": 0.12, "grad_norm": 2.1141198723828953, "learning_rate": 9.80852932916648e-06, "loss": 0.3799, "step": 1841 }, { "epoch": 0.12, "grad_norm": 1.536958406072693, "learning_rate": 9.808250081687307e-06, "loss": 0.3838, "step": 1842 }, { "epoch": 0.12, "grad_norm": 1.6693594269266874, "learning_rate": 9.807970634705297e-06, "loss": 0.3677, "step": 1843 }, { "epoch": 0.12, "grad_norm": 3.879146621583926, "learning_rate": 9.807690988232046e-06, "loss": 0.3748, "step": 1844 }, { "epoch": 0.12, "grad_norm": 1.7360582201283752, "learning_rate": 9.807411142279155e-06, "loss": 0.3736, "step": 1845 }, { "epoch": 0.12, "grad_norm": 0.7292268293298563, "learning_rate": 9.807131096858237e-06, "loss": 0.4865, "step": 1846 }, { "epoch": 0.12, "grad_norm": 3.4239736621705856, "learning_rate": 9.806850851980913e-06, "loss": 0.4336, "step": 1847 }, { "epoch": 0.12, "grad_norm": 1.6510268875876362, "learning_rate": 9.806570407658807e-06, "loss": 0.3613, "step": 1848 }, { "epoch": 0.12, "grad_norm": 1.5781351338102192, "learning_rate": 9.806289763903558e-06, "loss": 0.4012, "step": 1849 }, { "epoch": 0.12, "grad_norm": 1.6703472805886768, "learning_rate": 9.80600892072681e-06, "loss": 0.3647, "step": 1850 }, { "epoch": 0.12, "grad_norm": 1.4835899672197248, "learning_rate": 9.805727878140216e-06, "loss": 0.3848, "step": 1851 }, { "epoch": 0.12, "grad_norm": 1.3772710502274328, "learning_rate": 9.805446636155435e-06, "loss": 0.3511, "step": 1852 }, { "epoch": 0.12, "grad_norm": 1.8725052006963179, "learning_rate": 9.80516519478414e-06, "loss": 0.3736, "step": 1853 }, { "epoch": 0.12, "grad_norm": 1.783161350617083, "learning_rate": 9.804883554038005e-06, "loss": 0.3717, "step": 1854 }, { "epoch": 0.12, "grad_norm": 5.443154753730953, "learning_rate": 9.804601713928716e-06, "loss": 0.3866, "step": 1855 }, { "epoch": 0.12, "grad_norm": 1.4668533782426225, "learning_rate": 9.804319674467969e-06, "loss": 0.3919, "step": 1856 }, { "epoch": 0.12, "grad_norm": 2.026507821088116, "learning_rate": 9.804037435667465e-06, "loss": 0.406, "step": 1857 }, { "epoch": 0.12, "grad_norm": 1.9596348999579245, "learning_rate": 9.803754997538915e-06, "loss": 0.3937, "step": 1858 }, { "epoch": 0.12, "grad_norm": 0.821964444594066, "learning_rate": 9.803472360094037e-06, "loss": 0.5198, "step": 1859 }, { "epoch": 0.12, "grad_norm": 1.844427766970074, "learning_rate": 9.80318952334456e-06, "loss": 0.3886, "step": 1860 }, { "epoch": 0.12, "grad_norm": 2.5945862940221964, "learning_rate": 9.802906487302217e-06, "loss": 0.4021, "step": 1861 }, { "epoch": 0.12, "grad_norm": 0.7011221792938005, "learning_rate": 9.802623251978754e-06, "loss": 0.4896, "step": 1862 }, { "epoch": 0.12, "grad_norm": 2.18323762157779, "learning_rate": 9.802339817385921e-06, "loss": 0.3756, "step": 1863 }, { "epoch": 0.12, "grad_norm": 2.2646470276799664, "learning_rate": 9.80205618353548e-06, "loss": 0.3962, "step": 1864 }, { "epoch": 0.12, "grad_norm": 1.8768123406953179, "learning_rate": 9.801772350439197e-06, "loss": 0.3497, "step": 1865 }, { "epoch": 0.12, "grad_norm": 1.8224164058034686, "learning_rate": 9.80148831810885e-06, "loss": 0.3835, "step": 1866 }, { "epoch": 0.12, "grad_norm": 1.4532793718077375, "learning_rate": 9.801204086556226e-06, "loss": 0.3818, "step": 1867 }, { "epoch": 0.12, "grad_norm": 2.242903254828831, "learning_rate": 9.800919655793118e-06, "loss": 0.4071, "step": 1868 }, { "epoch": 0.12, "grad_norm": 1.7055773310523081, "learning_rate": 9.800635025831323e-06, "loss": 0.3604, "step": 1869 }, { "epoch": 0.12, "grad_norm": 4.368379984554867, "learning_rate": 9.800350196682655e-06, "loss": 0.3688, "step": 1870 }, { "epoch": 0.12, "grad_norm": 0.8032910357834164, "learning_rate": 9.800065168358932e-06, "loss": 0.5017, "step": 1871 }, { "epoch": 0.12, "grad_norm": 5.270233253613688, "learning_rate": 9.799779940871978e-06, "loss": 0.3551, "step": 1872 }, { "epoch": 0.12, "grad_norm": 2.424770210701576, "learning_rate": 9.79949451423363e-06, "loss": 0.4159, "step": 1873 }, { "epoch": 0.12, "grad_norm": 1.5062834576744304, "learning_rate": 9.799208888455728e-06, "loss": 0.3857, "step": 1874 }, { "epoch": 0.12, "grad_norm": 1.9901631549421632, "learning_rate": 9.798923063550126e-06, "loss": 0.394, "step": 1875 }, { "epoch": 0.12, "grad_norm": 1.6528928084315886, "learning_rate": 9.798637039528682e-06, "loss": 0.3741, "step": 1876 }, { "epoch": 0.12, "grad_norm": 1.851195106538528, "learning_rate": 9.798350816403264e-06, "loss": 0.3903, "step": 1877 }, { "epoch": 0.12, "grad_norm": 1.6243796938564128, "learning_rate": 9.798064394185747e-06, "loss": 0.3806, "step": 1878 }, { "epoch": 0.12, "grad_norm": 2.0692094744188174, "learning_rate": 9.797777772888018e-06, "loss": 0.375, "step": 1879 }, { "epoch": 0.12, "grad_norm": 2.9722223855442804, "learning_rate": 9.797490952521965e-06, "loss": 0.404, "step": 1880 }, { "epoch": 0.12, "grad_norm": 4.323189381334899, "learning_rate": 9.797203933099492e-06, "loss": 0.4047, "step": 1881 }, { "epoch": 0.12, "grad_norm": 1.8726616653905463, "learning_rate": 9.796916714632507e-06, "loss": 0.4161, "step": 1882 }, { "epoch": 0.12, "grad_norm": 1.4112081624351538, "learning_rate": 9.796629297132927e-06, "loss": 0.3643, "step": 1883 }, { "epoch": 0.12, "grad_norm": 2.1861272231532065, "learning_rate": 9.79634168061268e-06, "loss": 0.3979, "step": 1884 }, { "epoch": 0.12, "grad_norm": 2.003887017501671, "learning_rate": 9.796053865083694e-06, "loss": 0.3589, "step": 1885 }, { "epoch": 0.12, "grad_norm": 1.6608401463733822, "learning_rate": 9.795765850557917e-06, "loss": 0.3634, "step": 1886 }, { "epoch": 0.12, "grad_norm": 3.719302499213828, "learning_rate": 9.795477637047295e-06, "loss": 0.3713, "step": 1887 }, { "epoch": 0.12, "grad_norm": 2.5173584641802416, "learning_rate": 9.795189224563788e-06, "loss": 0.3754, "step": 1888 }, { "epoch": 0.12, "grad_norm": 4.5218269577982015, "learning_rate": 9.794900613119364e-06, "loss": 0.3594, "step": 1889 }, { "epoch": 0.12, "grad_norm": 2.8582866394907955, "learning_rate": 9.794611802725997e-06, "loss": 0.3571, "step": 1890 }, { "epoch": 0.12, "grad_norm": 2.072569259131529, "learning_rate": 9.794322793395669e-06, "loss": 0.367, "step": 1891 }, { "epoch": 0.12, "grad_norm": 0.812020311598828, "learning_rate": 9.794033585140372e-06, "loss": 0.524, "step": 1892 }, { "epoch": 0.12, "grad_norm": 2.3552908356253797, "learning_rate": 9.793744177972108e-06, "loss": 0.3782, "step": 1893 }, { "epoch": 0.12, "grad_norm": 13.422310012617706, "learning_rate": 9.793454571902883e-06, "loss": 0.3586, "step": 1894 }, { "epoch": 0.12, "grad_norm": 3.1116590567977065, "learning_rate": 9.793164766944714e-06, "loss": 0.4087, "step": 1895 }, { "epoch": 0.12, "grad_norm": 1.5817790936508511, "learning_rate": 9.792874763109624e-06, "loss": 0.401, "step": 1896 }, { "epoch": 0.12, "grad_norm": 3.317936493490959, "learning_rate": 9.792584560409649e-06, "loss": 0.3802, "step": 1897 }, { "epoch": 0.12, "grad_norm": 0.6673907285813097, "learning_rate": 9.792294158856826e-06, "loss": 0.5049, "step": 1898 }, { "epoch": 0.12, "grad_norm": 1.5708418207204, "learning_rate": 9.792003558463207e-06, "loss": 0.3849, "step": 1899 }, { "epoch": 0.12, "grad_norm": 1.8808633162336108, "learning_rate": 9.79171275924085e-06, "loss": 0.3635, "step": 1900 }, { "epoch": 0.12, "grad_norm": 2.1897425596392917, "learning_rate": 9.791421761201816e-06, "loss": 0.3867, "step": 1901 }, { "epoch": 0.12, "grad_norm": 1.4173800838434034, "learning_rate": 9.791130564358187e-06, "loss": 0.3594, "step": 1902 }, { "epoch": 0.12, "grad_norm": 1.4725636900039665, "learning_rate": 9.790839168722037e-06, "loss": 0.3565, "step": 1903 }, { "epoch": 0.12, "grad_norm": 2.1064058612467136, "learning_rate": 9.790547574305463e-06, "loss": 0.3769, "step": 1904 }, { "epoch": 0.12, "grad_norm": 1.8853211503689868, "learning_rate": 9.79025578112056e-06, "loss": 0.3813, "step": 1905 }, { "epoch": 0.12, "grad_norm": 2.227915359719842, "learning_rate": 9.789963789179438e-06, "loss": 0.3948, "step": 1906 }, { "epoch": 0.12, "grad_norm": 4.595272909569428, "learning_rate": 9.789671598494208e-06, "loss": 0.372, "step": 1907 }, { "epoch": 0.12, "grad_norm": 2.7529055558688227, "learning_rate": 9.789379209076997e-06, "loss": 0.4011, "step": 1908 }, { "epoch": 0.12, "grad_norm": 3.2201098265594053, "learning_rate": 9.789086620939936e-06, "loss": 0.4142, "step": 1909 }, { "epoch": 0.12, "grad_norm": 1.6230325565626995, "learning_rate": 9.788793834095165e-06, "loss": 0.3868, "step": 1910 }, { "epoch": 0.12, "grad_norm": 1.8220788291104209, "learning_rate": 9.788500848554831e-06, "loss": 0.4005, "step": 1911 }, { "epoch": 0.12, "grad_norm": 1.86975313357887, "learning_rate": 9.788207664331093e-06, "loss": 0.3862, "step": 1912 }, { "epoch": 0.12, "grad_norm": 1.5087622500622635, "learning_rate": 9.787914281436112e-06, "loss": 0.4086, "step": 1913 }, { "epoch": 0.12, "grad_norm": 2.7034975329118294, "learning_rate": 9.787620699882064e-06, "loss": 0.3916, "step": 1914 }, { "epoch": 0.12, "grad_norm": 2.8567750650849524, "learning_rate": 9.78732691968113e-06, "loss": 0.3843, "step": 1915 }, { "epoch": 0.12, "grad_norm": 1.9640602234460758, "learning_rate": 9.787032940845499e-06, "loss": 0.3707, "step": 1916 }, { "epoch": 0.12, "grad_norm": 1.655332001580452, "learning_rate": 9.786738763387368e-06, "loss": 0.3766, "step": 1917 }, { "epoch": 0.12, "grad_norm": 2.033882687241404, "learning_rate": 9.786444387318943e-06, "loss": 0.3894, "step": 1918 }, { "epoch": 0.12, "grad_norm": 2.074227083942343, "learning_rate": 9.78614981265244e-06, "loss": 0.3678, "step": 1919 }, { "epoch": 0.12, "grad_norm": 0.8630051286114978, "learning_rate": 9.785855039400079e-06, "loss": 0.5252, "step": 1920 }, { "epoch": 0.12, "grad_norm": 2.519566927333756, "learning_rate": 9.785560067574092e-06, "loss": 0.3803, "step": 1921 }, { "epoch": 0.12, "grad_norm": 1.6710749180215918, "learning_rate": 9.785264897186718e-06, "loss": 0.38, "step": 1922 }, { "epoch": 0.12, "grad_norm": 2.0016881036651193, "learning_rate": 9.784969528250204e-06, "loss": 0.3609, "step": 1923 }, { "epoch": 0.12, "grad_norm": 1.9523850550089148, "learning_rate": 9.784673960776805e-06, "loss": 0.39, "step": 1924 }, { "epoch": 0.12, "grad_norm": 1.696981077145076, "learning_rate": 9.784378194778786e-06, "loss": 0.3593, "step": 1925 }, { "epoch": 0.12, "grad_norm": 2.303193951213647, "learning_rate": 9.784082230268415e-06, "loss": 0.3688, "step": 1926 }, { "epoch": 0.12, "grad_norm": 1.83381667490055, "learning_rate": 9.783786067257976e-06, "loss": 0.3837, "step": 1927 }, { "epoch": 0.12, "grad_norm": 1.7024276635852529, "learning_rate": 9.783489705759756e-06, "loss": 0.3759, "step": 1928 }, { "epoch": 0.12, "grad_norm": 3.0647040389760325, "learning_rate": 9.783193145786053e-06, "loss": 0.3857, "step": 1929 }, { "epoch": 0.12, "grad_norm": 2.017366550215066, "learning_rate": 9.78289638734917e-06, "loss": 0.3885, "step": 1930 }, { "epoch": 0.12, "grad_norm": 2.2335354084717443, "learning_rate": 9.78259943046142e-06, "loss": 0.358, "step": 1931 }, { "epoch": 0.12, "grad_norm": 1.7159171008819794, "learning_rate": 9.782302275135124e-06, "loss": 0.4002, "step": 1932 }, { "epoch": 0.12, "grad_norm": 2.280026934917581, "learning_rate": 9.782004921382612e-06, "loss": 0.3735, "step": 1933 }, { "epoch": 0.12, "grad_norm": 2.1313823366662983, "learning_rate": 9.781707369216224e-06, "loss": 0.3797, "step": 1934 }, { "epoch": 0.12, "grad_norm": 21.506648057275566, "learning_rate": 9.781409618648303e-06, "loss": 0.3678, "step": 1935 }, { "epoch": 0.12, "grad_norm": 1.6622627212392396, "learning_rate": 9.781111669691203e-06, "loss": 0.4027, "step": 1936 }, { "epoch": 0.12, "grad_norm": 2.1157157261458726, "learning_rate": 9.78081352235729e-06, "loss": 0.386, "step": 1937 }, { "epoch": 0.12, "grad_norm": 1.771600485678825, "learning_rate": 9.780515176658931e-06, "loss": 0.3738, "step": 1938 }, { "epoch": 0.12, "grad_norm": 2.0231255387124607, "learning_rate": 9.780216632608505e-06, "loss": 0.3615, "step": 1939 }, { "epoch": 0.12, "grad_norm": 1.5574669321774992, "learning_rate": 9.779917890218403e-06, "loss": 0.3783, "step": 1940 }, { "epoch": 0.12, "grad_norm": 5.11765915699987, "learning_rate": 9.779618949501016e-06, "loss": 0.3744, "step": 1941 }, { "epoch": 0.12, "grad_norm": 1.200780852177338, "learning_rate": 9.77931981046875e-06, "loss": 0.3644, "step": 1942 }, { "epoch": 0.12, "grad_norm": 2.2477496902459144, "learning_rate": 9.779020473134016e-06, "loss": 0.379, "step": 1943 }, { "epoch": 0.12, "grad_norm": 2.546558164427262, "learning_rate": 9.778720937509232e-06, "loss": 0.3738, "step": 1944 }, { "epoch": 0.12, "grad_norm": 1.606784032500254, "learning_rate": 9.77842120360683e-06, "loss": 0.3791, "step": 1945 }, { "epoch": 0.12, "grad_norm": 1.544110328603507, "learning_rate": 9.778121271439244e-06, "loss": 0.4039, "step": 1946 }, { "epoch": 0.12, "grad_norm": 1.8406293720086553, "learning_rate": 9.777821141018921e-06, "loss": 0.3507, "step": 1947 }, { "epoch": 0.12, "grad_norm": 2.305839609769871, "learning_rate": 9.777520812358312e-06, "loss": 0.3889, "step": 1948 }, { "epoch": 0.12, "grad_norm": 2.029526921175561, "learning_rate": 9.77722028546988e-06, "loss": 0.3825, "step": 1949 }, { "epoch": 0.12, "grad_norm": 1.9220591347031635, "learning_rate": 9.776919560366091e-06, "loss": 0.3596, "step": 1950 }, { "epoch": 0.12, "grad_norm": 1.4369895353485262, "learning_rate": 9.776618637059426e-06, "loss": 0.3793, "step": 1951 }, { "epoch": 0.12, "grad_norm": 2.1288381073443885, "learning_rate": 9.776317515562368e-06, "loss": 0.3773, "step": 1952 }, { "epoch": 0.12, "grad_norm": 0.907481904334835, "learning_rate": 9.776016195887412e-06, "loss": 0.51, "step": 1953 }, { "epoch": 0.12, "grad_norm": 1.881392823035069, "learning_rate": 9.775714678047062e-06, "loss": 0.3707, "step": 1954 }, { "epoch": 0.12, "grad_norm": 1.9875774410229394, "learning_rate": 9.775412962053827e-06, "loss": 0.3848, "step": 1955 }, { "epoch": 0.12, "grad_norm": 1.6703353259398892, "learning_rate": 9.775111047920227e-06, "loss": 0.3707, "step": 1956 }, { "epoch": 0.12, "grad_norm": 2.3450663523316644, "learning_rate": 9.774808935658789e-06, "loss": 0.3824, "step": 1957 }, { "epoch": 0.12, "grad_norm": 3.2576297293049863, "learning_rate": 9.774506625282045e-06, "loss": 0.3924, "step": 1958 }, { "epoch": 0.12, "grad_norm": 3.1682736851014184, "learning_rate": 9.774204116802541e-06, "loss": 0.3861, "step": 1959 }, { "epoch": 0.12, "grad_norm": 2.176652831134348, "learning_rate": 9.77390141023283e-06, "loss": 0.3936, "step": 1960 }, { "epoch": 0.12, "grad_norm": 2.462140381311946, "learning_rate": 9.773598505585469e-06, "loss": 0.3875, "step": 1961 }, { "epoch": 0.12, "grad_norm": 2.948576032937403, "learning_rate": 9.773295402873027e-06, "loss": 0.3846, "step": 1962 }, { "epoch": 0.12, "grad_norm": 2.40336693857439, "learning_rate": 9.772992102108081e-06, "loss": 0.372, "step": 1963 }, { "epoch": 0.12, "grad_norm": 1.4358865188548455, "learning_rate": 9.772688603303212e-06, "loss": 0.3784, "step": 1964 }, { "epoch": 0.12, "grad_norm": 1.1518668362808788, "learning_rate": 9.772384906471019e-06, "loss": 0.3786, "step": 1965 }, { "epoch": 0.12, "grad_norm": 1.74123761591902, "learning_rate": 9.7720810116241e-06, "loss": 0.3787, "step": 1966 }, { "epoch": 0.12, "grad_norm": 2.0715609386547476, "learning_rate": 9.771776918775062e-06, "loss": 0.3665, "step": 1967 }, { "epoch": 0.12, "grad_norm": 3.0775998085029475, "learning_rate": 9.771472627936523e-06, "loss": 0.3756, "step": 1968 }, { "epoch": 0.12, "grad_norm": 1.7415421794158161, "learning_rate": 9.771168139121112e-06, "loss": 0.3795, "step": 1969 }, { "epoch": 0.12, "grad_norm": 1.561194303588185, "learning_rate": 9.77086345234146e-06, "loss": 0.3796, "step": 1970 }, { "epoch": 0.12, "grad_norm": 1.8302956151937808, "learning_rate": 9.770558567610206e-06, "loss": 0.3749, "step": 1971 }, { "epoch": 0.12, "grad_norm": 5.810312018015889, "learning_rate": 9.770253484940006e-06, "loss": 0.3764, "step": 1972 }, { "epoch": 0.12, "grad_norm": 1.5425732988747862, "learning_rate": 9.769948204343516e-06, "loss": 0.3728, "step": 1973 }, { "epoch": 0.12, "grad_norm": 3.647153725909506, "learning_rate": 9.769642725833404e-06, "loss": 0.383, "step": 1974 }, { "epoch": 0.12, "grad_norm": 2.149574646060756, "learning_rate": 9.769337049422342e-06, "loss": 0.3739, "step": 1975 }, { "epoch": 0.12, "grad_norm": 2.0619356098848436, "learning_rate": 9.769031175123014e-06, "loss": 0.3921, "step": 1976 }, { "epoch": 0.12, "grad_norm": 4.1116561985412945, "learning_rate": 9.768725102948114e-06, "loss": 0.3822, "step": 1977 }, { "epoch": 0.12, "grad_norm": 1.8387468500880593, "learning_rate": 9.768418832910335e-06, "loss": 0.3498, "step": 1978 }, { "epoch": 0.12, "grad_norm": 1.466877314186575, "learning_rate": 9.768112365022393e-06, "loss": 0.3734, "step": 1979 }, { "epoch": 0.12, "grad_norm": 1.6249384200281445, "learning_rate": 9.767805699297e-06, "loss": 0.3763, "step": 1980 }, { "epoch": 0.12, "grad_norm": 2.317160429032891, "learning_rate": 9.767498835746877e-06, "loss": 0.3709, "step": 1981 }, { "epoch": 0.12, "grad_norm": 1.546149542105723, "learning_rate": 9.767191774384762e-06, "loss": 0.3775, "step": 1982 }, { "epoch": 0.12, "grad_norm": 2.392984475719745, "learning_rate": 9.76688451522339e-06, "loss": 0.3851, "step": 1983 }, { "epoch": 0.12, "grad_norm": 2.3679476559327224, "learning_rate": 9.766577058275515e-06, "loss": 0.3711, "step": 1984 }, { "epoch": 0.12, "grad_norm": 1.4697593364660515, "learning_rate": 9.76626940355389e-06, "loss": 0.3875, "step": 1985 }, { "epoch": 0.12, "grad_norm": 1.6129796454028429, "learning_rate": 9.765961551071281e-06, "loss": 0.3509, "step": 1986 }, { "epoch": 0.12, "grad_norm": 1.4820353944038447, "learning_rate": 9.765653500840465e-06, "loss": 0.3905, "step": 1987 }, { "epoch": 0.13, "grad_norm": 2.7673014753661827, "learning_rate": 9.765345252874218e-06, "loss": 0.3686, "step": 1988 }, { "epoch": 0.13, "grad_norm": 6.89545528951867, "learning_rate": 9.765036807185333e-06, "loss": 0.3747, "step": 1989 }, { "epoch": 0.13, "grad_norm": 1.8510555016509191, "learning_rate": 9.764728163786607e-06, "loss": 0.3469, "step": 1990 }, { "epoch": 0.13, "grad_norm": 2.5776007745789054, "learning_rate": 9.764419322690846e-06, "loss": 0.3916, "step": 1991 }, { "epoch": 0.13, "grad_norm": 3.6629761168978403, "learning_rate": 9.764110283910864e-06, "loss": 0.3613, "step": 1992 }, { "epoch": 0.13, "grad_norm": 1.8771437108275397, "learning_rate": 9.763801047459487e-06, "loss": 0.3824, "step": 1993 }, { "epoch": 0.13, "grad_norm": 1.8611967018793132, "learning_rate": 9.76349161334954e-06, "loss": 0.383, "step": 1994 }, { "epoch": 0.13, "grad_norm": 1.5387062747787261, "learning_rate": 9.763181981593868e-06, "loss": 0.3686, "step": 1995 }, { "epoch": 0.13, "grad_norm": 1.5395254340364815, "learning_rate": 9.762872152205313e-06, "loss": 0.3639, "step": 1996 }, { "epoch": 0.13, "grad_norm": 6.288502229213549, "learning_rate": 9.762562125196734e-06, "loss": 0.3671, "step": 1997 }, { "epoch": 0.13, "grad_norm": 2.357514527059941, "learning_rate": 9.762251900580992e-06, "loss": 0.3594, "step": 1998 }, { "epoch": 0.13, "grad_norm": 1.7716245546139546, "learning_rate": 9.76194147837096e-06, "loss": 0.3733, "step": 1999 }, { "epoch": 0.13, "grad_norm": 1.893383549361695, "learning_rate": 9.76163085857952e-06, "loss": 0.3726, "step": 2000 }, { "epoch": 0.13, "grad_norm": 0.8859889597204025, "learning_rate": 9.761320041219555e-06, "loss": 0.4947, "step": 2001 }, { "epoch": 0.13, "grad_norm": 2.221491499199223, "learning_rate": 9.761009026303968e-06, "loss": 0.3807, "step": 2002 }, { "epoch": 0.13, "grad_norm": 0.693554606557953, "learning_rate": 9.760697813845656e-06, "loss": 0.5143, "step": 2003 }, { "epoch": 0.13, "grad_norm": 1.7756681160884638, "learning_rate": 9.760386403857538e-06, "loss": 0.3808, "step": 2004 }, { "epoch": 0.13, "grad_norm": 2.765243284947136, "learning_rate": 9.760074796352532e-06, "loss": 0.3849, "step": 2005 }, { "epoch": 0.13, "grad_norm": 1.9655707569516148, "learning_rate": 9.759762991343569e-06, "loss": 0.386, "step": 2006 }, { "epoch": 0.13, "grad_norm": 1.769282624130448, "learning_rate": 9.759450988843585e-06, "loss": 0.3801, "step": 2007 }, { "epoch": 0.13, "grad_norm": 1.712213566302072, "learning_rate": 9.759138788865524e-06, "loss": 0.3818, "step": 2008 }, { "epoch": 0.13, "grad_norm": 1.82289568923332, "learning_rate": 9.758826391422343e-06, "loss": 0.3733, "step": 2009 }, { "epoch": 0.13, "grad_norm": 2.032705911899892, "learning_rate": 9.758513796527002e-06, "loss": 0.3661, "step": 2010 }, { "epoch": 0.13, "grad_norm": 1.6822890353301314, "learning_rate": 9.758201004192471e-06, "loss": 0.3604, "step": 2011 }, { "epoch": 0.13, "grad_norm": 2.190475743965018, "learning_rate": 9.75788801443173e-06, "loss": 0.3906, "step": 2012 }, { "epoch": 0.13, "grad_norm": 10.261698410710128, "learning_rate": 9.757574827257764e-06, "loss": 0.3629, "step": 2013 }, { "epoch": 0.13, "grad_norm": 1.0359058427099048, "learning_rate": 9.757261442683568e-06, "loss": 0.5211, "step": 2014 }, { "epoch": 0.13, "grad_norm": 1.6760454853543507, "learning_rate": 9.756947860722143e-06, "loss": 0.3784, "step": 2015 }, { "epoch": 0.13, "grad_norm": 2.8343607510351445, "learning_rate": 9.756634081386504e-06, "loss": 0.3767, "step": 2016 }, { "epoch": 0.13, "grad_norm": 5.226232322669994, "learning_rate": 9.756320104689667e-06, "loss": 0.3815, "step": 2017 }, { "epoch": 0.13, "grad_norm": 2.906365266928955, "learning_rate": 9.756005930644662e-06, "loss": 0.3899, "step": 2018 }, { "epoch": 0.13, "grad_norm": 6.127075075399167, "learning_rate": 9.755691559264522e-06, "loss": 0.4045, "step": 2019 }, { "epoch": 0.13, "grad_norm": 3.6405042644879044, "learning_rate": 9.755376990562295e-06, "loss": 0.3806, "step": 2020 }, { "epoch": 0.13, "grad_norm": 4.314911126990063, "learning_rate": 9.755062224551026e-06, "loss": 0.4185, "step": 2021 }, { "epoch": 0.13, "grad_norm": 1.9300617164925287, "learning_rate": 9.754747261243782e-06, "loss": 0.389, "step": 2022 }, { "epoch": 0.13, "grad_norm": 2.4834698528387698, "learning_rate": 9.754432100653628e-06, "loss": 0.3853, "step": 2023 }, { "epoch": 0.13, "grad_norm": 3.236187734459461, "learning_rate": 9.754116742793643e-06, "loss": 0.3711, "step": 2024 }, { "epoch": 0.13, "grad_norm": 3.210971605398184, "learning_rate": 9.753801187676908e-06, "loss": 0.3749, "step": 2025 }, { "epoch": 0.13, "grad_norm": 2.9677015445758617, "learning_rate": 9.753485435316518e-06, "loss": 0.349, "step": 2026 }, { "epoch": 0.13, "grad_norm": 1.8476003512204442, "learning_rate": 9.753169485725575e-06, "loss": 0.3663, "step": 2027 }, { "epoch": 0.13, "grad_norm": 8.368513933261431, "learning_rate": 9.752853338917187e-06, "loss": 0.3689, "step": 2028 }, { "epoch": 0.13, "grad_norm": 2.0499042881887637, "learning_rate": 9.752536994904473e-06, "loss": 0.3622, "step": 2029 }, { "epoch": 0.13, "grad_norm": 2.0956243531214254, "learning_rate": 9.752220453700556e-06, "loss": 0.351, "step": 2030 }, { "epoch": 0.13, "grad_norm": 3.314003420331242, "learning_rate": 9.751903715318572e-06, "loss": 0.3467, "step": 2031 }, { "epoch": 0.13, "grad_norm": 3.785946372863839, "learning_rate": 9.751586779771663e-06, "loss": 0.3788, "step": 2032 }, { "epoch": 0.13, "grad_norm": 2.4905383618900525, "learning_rate": 9.751269647072978e-06, "loss": 0.3885, "step": 2033 }, { "epoch": 0.13, "grad_norm": 3.6776188208779383, "learning_rate": 9.750952317235678e-06, "loss": 0.389, "step": 2034 }, { "epoch": 0.13, "grad_norm": 2.164191962494118, "learning_rate": 9.750634790272926e-06, "loss": 0.3705, "step": 2035 }, { "epoch": 0.13, "grad_norm": 4.562466612448937, "learning_rate": 9.750317066197899e-06, "loss": 0.3635, "step": 2036 }, { "epoch": 0.13, "grad_norm": 0.6978323981070979, "learning_rate": 9.74999914502378e-06, "loss": 0.4722, "step": 2037 }, { "epoch": 0.13, "grad_norm": 1.7676229652145903, "learning_rate": 9.749681026763758e-06, "loss": 0.3735, "step": 2038 }, { "epoch": 0.13, "grad_norm": 2.7321059484900543, "learning_rate": 9.749362711431034e-06, "loss": 0.3826, "step": 2039 }, { "epoch": 0.13, "grad_norm": 2.5780720677869677, "learning_rate": 9.749044199038817e-06, "loss": 0.3683, "step": 2040 }, { "epoch": 0.13, "grad_norm": 1.7770761131145016, "learning_rate": 9.74872548960032e-06, "loss": 0.3505, "step": 2041 }, { "epoch": 0.13, "grad_norm": 1.9147841645237889, "learning_rate": 9.748406583128766e-06, "loss": 0.3468, "step": 2042 }, { "epoch": 0.13, "grad_norm": 3.0539811371487953, "learning_rate": 9.748087479637392e-06, "loss": 0.3869, "step": 2043 }, { "epoch": 0.13, "grad_norm": 2.1851613542010764, "learning_rate": 9.747768179139433e-06, "loss": 0.3993, "step": 2044 }, { "epoch": 0.13, "grad_norm": 2.5271231616726504, "learning_rate": 9.74744868164814e-06, "loss": 0.3727, "step": 2045 }, { "epoch": 0.13, "grad_norm": 1.7743759366833747, "learning_rate": 9.747128987176768e-06, "loss": 0.3638, "step": 2046 }, { "epoch": 0.13, "grad_norm": 1.9534302961238634, "learning_rate": 9.746809095738581e-06, "loss": 0.3793, "step": 2047 }, { "epoch": 0.13, "grad_norm": 2.2704836228978835, "learning_rate": 9.746489007346856e-06, "loss": 0.3682, "step": 2048 }, { "epoch": 0.13, "grad_norm": 2.0785440079876905, "learning_rate": 9.746168722014871e-06, "loss": 0.3638, "step": 2049 }, { "epoch": 0.13, "grad_norm": 8.380900743695415, "learning_rate": 9.745848239755915e-06, "loss": 0.3559, "step": 2050 }, { "epoch": 0.13, "grad_norm": 2.1384483816761786, "learning_rate": 9.745527560583285e-06, "loss": 0.3768, "step": 2051 }, { "epoch": 0.13, "grad_norm": 2.75043907337137, "learning_rate": 9.745206684510289e-06, "loss": 0.3926, "step": 2052 }, { "epoch": 0.13, "grad_norm": 10.179888990727422, "learning_rate": 9.744885611550239e-06, "loss": 0.3968, "step": 2053 }, { "epoch": 0.13, "grad_norm": 2.9498528642622612, "learning_rate": 9.744564341716455e-06, "loss": 0.3305, "step": 2054 }, { "epoch": 0.13, "grad_norm": 2.414228701125571, "learning_rate": 9.744242875022272e-06, "loss": 0.3717, "step": 2055 }, { "epoch": 0.13, "grad_norm": 3.4024879159291146, "learning_rate": 9.743921211481024e-06, "loss": 0.3693, "step": 2056 }, { "epoch": 0.13, "grad_norm": 8.637135472120715, "learning_rate": 9.74359935110606e-06, "loss": 0.3602, "step": 2057 }, { "epoch": 0.13, "grad_norm": 2.3519461879250865, "learning_rate": 9.743277293910733e-06, "loss": 0.3867, "step": 2058 }, { "epoch": 0.13, "grad_norm": 2.7779436887641977, "learning_rate": 9.742955039908406e-06, "loss": 0.3596, "step": 2059 }, { "epoch": 0.13, "grad_norm": 2.943526674621215, "learning_rate": 9.742632589112448e-06, "loss": 0.3486, "step": 2060 }, { "epoch": 0.13, "grad_norm": 5.677983464515481, "learning_rate": 9.742309941536243e-06, "loss": 0.368, "step": 2061 }, { "epoch": 0.13, "grad_norm": 3.1149395646695925, "learning_rate": 9.741987097193174e-06, "loss": 0.3807, "step": 2062 }, { "epoch": 0.13, "grad_norm": 2.9735113730032214, "learning_rate": 9.741664056096637e-06, "loss": 0.3833, "step": 2063 }, { "epoch": 0.13, "grad_norm": 1.7984818734853594, "learning_rate": 9.741340818260036e-06, "loss": 0.3558, "step": 2064 }, { "epoch": 0.13, "grad_norm": 2.0996088945515368, "learning_rate": 9.741017383696784e-06, "loss": 0.3514, "step": 2065 }, { "epoch": 0.13, "grad_norm": 5.512897907985271, "learning_rate": 9.7406937524203e-06, "loss": 0.3621, "step": 2066 }, { "epoch": 0.13, "grad_norm": 1.969422018816204, "learning_rate": 9.740369924444011e-06, "loss": 0.3863, "step": 2067 }, { "epoch": 0.13, "grad_norm": 2.242100778849942, "learning_rate": 9.740045899781353e-06, "loss": 0.3789, "step": 2068 }, { "epoch": 0.13, "grad_norm": 3.046530406817976, "learning_rate": 9.739721678445772e-06, "loss": 0.3726, "step": 2069 }, { "epoch": 0.13, "grad_norm": 2.2390276927521047, "learning_rate": 9.73939726045072e-06, "loss": 0.3672, "step": 2070 }, { "epoch": 0.13, "grad_norm": 2.730884607148511, "learning_rate": 9.739072645809659e-06, "loss": 0.3649, "step": 2071 }, { "epoch": 0.13, "grad_norm": 7.137534335295253, "learning_rate": 9.738747834536056e-06, "loss": 0.3827, "step": 2072 }, { "epoch": 0.13, "grad_norm": 2.1657666343437287, "learning_rate": 9.738422826643385e-06, "loss": 0.3657, "step": 2073 }, { "epoch": 0.13, "grad_norm": 2.3417265608812627, "learning_rate": 9.738097622145138e-06, "loss": 0.3587, "step": 2074 }, { "epoch": 0.13, "grad_norm": 2.3546133096058433, "learning_rate": 9.737772221054805e-06, "loss": 0.3822, "step": 2075 }, { "epoch": 0.13, "grad_norm": 2.162846204439659, "learning_rate": 9.737446623385885e-06, "loss": 0.3858, "step": 2076 }, { "epoch": 0.13, "grad_norm": 1.91068662472612, "learning_rate": 9.737120829151892e-06, "loss": 0.369, "step": 2077 }, { "epoch": 0.13, "grad_norm": 1.8461118959609997, "learning_rate": 9.736794838366342e-06, "loss": 0.3679, "step": 2078 }, { "epoch": 0.13, "grad_norm": 2.5800370867292317, "learning_rate": 9.736468651042759e-06, "loss": 0.3669, "step": 2079 }, { "epoch": 0.13, "grad_norm": 2.39789416614131, "learning_rate": 9.73614226719468e-06, "loss": 0.362, "step": 2080 }, { "epoch": 0.13, "grad_norm": 3.6228173046928687, "learning_rate": 9.735815686835644e-06, "loss": 0.3846, "step": 2081 }, { "epoch": 0.13, "grad_norm": 1.861306080305837, "learning_rate": 9.735488909979205e-06, "loss": 0.3842, "step": 2082 }, { "epoch": 0.13, "grad_norm": 2.112625158009115, "learning_rate": 9.735161936638919e-06, "loss": 0.3508, "step": 2083 }, { "epoch": 0.13, "grad_norm": 1.706042145528432, "learning_rate": 9.734834766828355e-06, "loss": 0.3668, "step": 2084 }, { "epoch": 0.13, "grad_norm": 2.1558621862255363, "learning_rate": 9.734507400561087e-06, "loss": 0.3602, "step": 2085 }, { "epoch": 0.13, "grad_norm": 4.1062051333568, "learning_rate": 9.734179837850695e-06, "loss": 0.3581, "step": 2086 }, { "epoch": 0.13, "grad_norm": 1.6518722280133271, "learning_rate": 9.733852078710774e-06, "loss": 0.3534, "step": 2087 }, { "epoch": 0.13, "grad_norm": 3.3927538505710038, "learning_rate": 9.733524123154923e-06, "loss": 0.3999, "step": 2088 }, { "epoch": 0.13, "grad_norm": 4.83621811927603, "learning_rate": 9.733195971196747e-06, "loss": 0.3594, "step": 2089 }, { "epoch": 0.13, "grad_norm": 2.0494235345449296, "learning_rate": 9.732867622849863e-06, "loss": 0.3865, "step": 2090 }, { "epoch": 0.13, "grad_norm": 2.8664351300158333, "learning_rate": 9.732539078127895e-06, "loss": 0.3907, "step": 2091 }, { "epoch": 0.13, "grad_norm": 8.274292271643615, "learning_rate": 9.732210337044475e-06, "loss": 0.3833, "step": 2092 }, { "epoch": 0.13, "grad_norm": 3.3152591139212695, "learning_rate": 9.731881399613243e-06, "loss": 0.3815, "step": 2093 }, { "epoch": 0.13, "grad_norm": 2.6219381423340673, "learning_rate": 9.731552265847847e-06, "loss": 0.3649, "step": 2094 }, { "epoch": 0.13, "grad_norm": 1.9066997475563694, "learning_rate": 9.731222935761943e-06, "loss": 0.3697, "step": 2095 }, { "epoch": 0.13, "grad_norm": 1.9432450165133641, "learning_rate": 9.730893409369197e-06, "loss": 0.3576, "step": 2096 }, { "epoch": 0.13, "grad_norm": 2.014531214349162, "learning_rate": 9.730563686683278e-06, "loss": 0.3605, "step": 2097 }, { "epoch": 0.13, "grad_norm": 2.4821851517309583, "learning_rate": 9.730233767717872e-06, "loss": 0.3792, "step": 2098 }, { "epoch": 0.13, "grad_norm": 2.5019454300568817, "learning_rate": 9.729903652486664e-06, "loss": 0.352, "step": 2099 }, { "epoch": 0.13, "grad_norm": 2.2452438071156124, "learning_rate": 9.72957334100335e-06, "loss": 0.3746, "step": 2100 }, { "epoch": 0.13, "grad_norm": 2.347600971039136, "learning_rate": 9.72924283328164e-06, "loss": 0.3582, "step": 2101 }, { "epoch": 0.13, "grad_norm": 3.5775369431503403, "learning_rate": 9.728912129335243e-06, "loss": 0.3446, "step": 2102 }, { "epoch": 0.13, "grad_norm": 0.758077460841644, "learning_rate": 9.728581229177884e-06, "loss": 0.5081, "step": 2103 }, { "epoch": 0.13, "grad_norm": 1.7767864893793226, "learning_rate": 9.72825013282329e-06, "loss": 0.3555, "step": 2104 }, { "epoch": 0.13, "grad_norm": 3.174707671827372, "learning_rate": 9.7279188402852e-06, "loss": 0.349, "step": 2105 }, { "epoch": 0.13, "grad_norm": 3.286915943993544, "learning_rate": 9.72758735157736e-06, "loss": 0.3704, "step": 2106 }, { "epoch": 0.13, "grad_norm": 3.095936787943565, "learning_rate": 9.727255666713524e-06, "loss": 0.3737, "step": 2107 }, { "epoch": 0.13, "grad_norm": 2.4735221401501195, "learning_rate": 9.726923785707451e-06, "loss": 0.366, "step": 2108 }, { "epoch": 0.13, "grad_norm": 1.9593873396119232, "learning_rate": 9.726591708572916e-06, "loss": 0.3597, "step": 2109 }, { "epoch": 0.13, "grad_norm": 2.164677544404085, "learning_rate": 9.726259435323698e-06, "loss": 0.3707, "step": 2110 }, { "epoch": 0.13, "grad_norm": 3.9366566055794823, "learning_rate": 9.725926965973579e-06, "loss": 0.3761, "step": 2111 }, { "epoch": 0.13, "grad_norm": 3.6178898052831814, "learning_rate": 9.725594300536355e-06, "loss": 0.3591, "step": 2112 }, { "epoch": 0.13, "grad_norm": 1.830151832650409, "learning_rate": 9.72526143902583e-06, "loss": 0.3657, "step": 2113 }, { "epoch": 0.13, "grad_norm": 3.153213668681711, "learning_rate": 9.724928381455817e-06, "loss": 0.3519, "step": 2114 }, { "epoch": 0.13, "grad_norm": 2.0002205611991255, "learning_rate": 9.724595127840131e-06, "loss": 0.3581, "step": 2115 }, { "epoch": 0.13, "grad_norm": 2.641998053257897, "learning_rate": 9.724261678192602e-06, "loss": 0.3505, "step": 2116 }, { "epoch": 0.13, "grad_norm": 1.9981520047442376, "learning_rate": 9.723928032527066e-06, "loss": 0.3714, "step": 2117 }, { "epoch": 0.13, "grad_norm": 2.5665521763553363, "learning_rate": 9.723594190857363e-06, "loss": 0.3701, "step": 2118 }, { "epoch": 0.13, "grad_norm": 2.0869060682613885, "learning_rate": 9.72326015319735e-06, "loss": 0.353, "step": 2119 }, { "epoch": 0.13, "grad_norm": 2.2564321768009163, "learning_rate": 9.72292591956088e-06, "loss": 0.3621, "step": 2120 }, { "epoch": 0.13, "grad_norm": 2.8311405247028496, "learning_rate": 9.722591489961829e-06, "loss": 0.3469, "step": 2121 }, { "epoch": 0.13, "grad_norm": 1.9637519786487785, "learning_rate": 9.722256864414065e-06, "loss": 0.3475, "step": 2122 }, { "epoch": 0.13, "grad_norm": 3.885360005957735, "learning_rate": 9.721922042931478e-06, "loss": 0.3653, "step": 2123 }, { "epoch": 0.13, "grad_norm": 5.228766380795061, "learning_rate": 9.721587025527957e-06, "loss": 0.3675, "step": 2124 }, { "epoch": 0.13, "grad_norm": 1.8081692880168567, "learning_rate": 9.721251812217405e-06, "loss": 0.3461, "step": 2125 }, { "epoch": 0.13, "grad_norm": 10.592096217280288, "learning_rate": 9.720916403013729e-06, "loss": 0.3507, "step": 2126 }, { "epoch": 0.13, "grad_norm": 1.9566894768797642, "learning_rate": 9.720580797930845e-06, "loss": 0.3679, "step": 2127 }, { "epoch": 0.13, "grad_norm": 5.810191299703153, "learning_rate": 9.720244996982683e-06, "loss": 0.3338, "step": 2128 }, { "epoch": 0.13, "grad_norm": 2.345368455037903, "learning_rate": 9.719909000183167e-06, "loss": 0.3882, "step": 2129 }, { "epoch": 0.13, "grad_norm": 1.8941777637996409, "learning_rate": 9.719572807546246e-06, "loss": 0.3543, "step": 2130 }, { "epoch": 0.13, "grad_norm": 3.094741052343396, "learning_rate": 9.719236419085866e-06, "loss": 0.3667, "step": 2131 }, { "epoch": 0.13, "grad_norm": 2.6390743301002137, "learning_rate": 9.718899834815984e-06, "loss": 0.3394, "step": 2132 }, { "epoch": 0.13, "grad_norm": 3.913875356846735, "learning_rate": 9.718563054750566e-06, "loss": 0.3695, "step": 2133 }, { "epoch": 0.13, "grad_norm": 2.482946254818429, "learning_rate": 9.718226078903586e-06, "loss": 0.3589, "step": 2134 }, { "epoch": 0.13, "grad_norm": 1.7318896115244018, "learning_rate": 9.717888907289026e-06, "loss": 0.3977, "step": 2135 }, { "epoch": 0.13, "grad_norm": 1.6801088251335015, "learning_rate": 9.717551539920875e-06, "loss": 0.3653, "step": 2136 }, { "epoch": 0.13, "grad_norm": 6.584021393663589, "learning_rate": 9.717213976813131e-06, "loss": 0.386, "step": 2137 }, { "epoch": 0.13, "grad_norm": 3.6385926170889076, "learning_rate": 9.7168762179798e-06, "loss": 0.3489, "step": 2138 }, { "epoch": 0.13, "grad_norm": 1.9959221926480015, "learning_rate": 9.716538263434899e-06, "loss": 0.3412, "step": 2139 }, { "epoch": 0.13, "grad_norm": 2.781919752728311, "learning_rate": 9.716200113192445e-06, "loss": 0.3427, "step": 2140 }, { "epoch": 0.13, "grad_norm": 2.660943842445923, "learning_rate": 9.715861767266472e-06, "loss": 0.3367, "step": 2141 }, { "epoch": 0.13, "grad_norm": 3.2532444999278076, "learning_rate": 9.715523225671019e-06, "loss": 0.3718, "step": 2142 }, { "epoch": 0.13, "grad_norm": 2.5405682742241553, "learning_rate": 9.715184488420132e-06, "loss": 0.3913, "step": 2143 }, { "epoch": 0.13, "grad_norm": 2.3165325107589307, "learning_rate": 9.714845555527865e-06, "loss": 0.3552, "step": 2144 }, { "epoch": 0.13, "grad_norm": 4.514725721717478, "learning_rate": 9.714506427008282e-06, "loss": 0.3434, "step": 2145 }, { "epoch": 0.13, "grad_norm": 1.9133066620117767, "learning_rate": 9.714167102875452e-06, "loss": 0.3465, "step": 2146 }, { "epoch": 0.14, "grad_norm": 1.8019517890381582, "learning_rate": 9.713827583143455e-06, "loss": 0.3542, "step": 2147 }, { "epoch": 0.14, "grad_norm": 6.231724150089947, "learning_rate": 9.71348786782638e-06, "loss": 0.3729, "step": 2148 }, { "epoch": 0.14, "grad_norm": 3.2836644862699473, "learning_rate": 9.713147956938322e-06, "loss": 0.3694, "step": 2149 }, { "epoch": 0.14, "grad_norm": 3.874461379345722, "learning_rate": 9.712807850493382e-06, "loss": 0.345, "step": 2150 }, { "epoch": 0.14, "grad_norm": 3.0606832864004367, "learning_rate": 9.712467548505675e-06, "loss": 0.3876, "step": 2151 }, { "epoch": 0.14, "grad_norm": 1.5345752718763965, "learning_rate": 9.712127050989319e-06, "loss": 0.3635, "step": 2152 }, { "epoch": 0.14, "grad_norm": 1.781510323719983, "learning_rate": 9.711786357958442e-06, "loss": 0.3499, "step": 2153 }, { "epoch": 0.14, "grad_norm": 3.2632539701353243, "learning_rate": 9.711445469427179e-06, "loss": 0.3659, "step": 2154 }, { "epoch": 0.14, "grad_norm": 3.497717047183541, "learning_rate": 9.711104385409676e-06, "loss": 0.4012, "step": 2155 }, { "epoch": 0.14, "grad_norm": 3.013333543854991, "learning_rate": 9.710763105920082e-06, "loss": 0.3674, "step": 2156 }, { "epoch": 0.14, "grad_norm": 0.8311470053069792, "learning_rate": 9.710421630972563e-06, "loss": 0.4983, "step": 2157 }, { "epoch": 0.14, "grad_norm": 3.534554032587132, "learning_rate": 9.71007996058128e-06, "loss": 0.368, "step": 2158 }, { "epoch": 0.14, "grad_norm": 3.7369760274841606, "learning_rate": 9.709738094760415e-06, "loss": 0.3525, "step": 2159 }, { "epoch": 0.14, "grad_norm": 2.8680377874275687, "learning_rate": 9.709396033524153e-06, "loss": 0.3587, "step": 2160 }, { "epoch": 0.14, "grad_norm": 1.8702498262496199, "learning_rate": 9.709053776886683e-06, "loss": 0.3524, "step": 2161 }, { "epoch": 0.14, "grad_norm": 4.874239426872966, "learning_rate": 9.708711324862208e-06, "loss": 0.3459, "step": 2162 }, { "epoch": 0.14, "grad_norm": 1.7926988622182338, "learning_rate": 9.708368677464936e-06, "loss": 0.3364, "step": 2163 }, { "epoch": 0.14, "grad_norm": 3.068595156773728, "learning_rate": 9.708025834709085e-06, "loss": 0.3882, "step": 2164 }, { "epoch": 0.14, "grad_norm": 4.090259503267306, "learning_rate": 9.707682796608879e-06, "loss": 0.3667, "step": 2165 }, { "epoch": 0.14, "grad_norm": 2.543607895466217, "learning_rate": 9.707339563178554e-06, "loss": 0.3809, "step": 2166 }, { "epoch": 0.14, "grad_norm": 2.355073725762526, "learning_rate": 9.706996134432346e-06, "loss": 0.3698, "step": 2167 }, { "epoch": 0.14, "grad_norm": 9.039427524367994, "learning_rate": 9.70665251038451e-06, "loss": 0.3572, "step": 2168 }, { "epoch": 0.14, "grad_norm": 2.3888312239997838, "learning_rate": 9.706308691049302e-06, "loss": 0.3552, "step": 2169 }, { "epoch": 0.14, "grad_norm": 5.188509015657696, "learning_rate": 9.705964676440984e-06, "loss": 0.3437, "step": 2170 }, { "epoch": 0.14, "grad_norm": 4.681347300694205, "learning_rate": 9.705620466573837e-06, "loss": 0.3547, "step": 2171 }, { "epoch": 0.14, "grad_norm": 2.4445920081855586, "learning_rate": 9.705276061462135e-06, "loss": 0.3979, "step": 2172 }, { "epoch": 0.14, "grad_norm": 3.107750685951658, "learning_rate": 9.704931461120173e-06, "loss": 0.3716, "step": 2173 }, { "epoch": 0.14, "grad_norm": 2.6290719881406557, "learning_rate": 9.70458666556225e-06, "loss": 0.371, "step": 2174 }, { "epoch": 0.14, "grad_norm": 3.3135326378460883, "learning_rate": 9.704241674802668e-06, "loss": 0.3849, "step": 2175 }, { "epoch": 0.14, "grad_norm": 1.9109486876018453, "learning_rate": 9.703896488855742e-06, "loss": 0.3744, "step": 2176 }, { "epoch": 0.14, "grad_norm": 2.09958236067567, "learning_rate": 9.703551107735798e-06, "loss": 0.3471, "step": 2177 }, { "epoch": 0.14, "grad_norm": 0.8349254683229255, "learning_rate": 9.703205531457163e-06, "loss": 0.4838, "step": 2178 }, { "epoch": 0.14, "grad_norm": 4.459660001768668, "learning_rate": 9.702859760034177e-06, "loss": 0.3497, "step": 2179 }, { "epoch": 0.14, "grad_norm": 3.090096725810043, "learning_rate": 9.702513793481186e-06, "loss": 0.3532, "step": 2180 }, { "epoch": 0.14, "grad_norm": 6.560318138826427, "learning_rate": 9.702167631812544e-06, "loss": 0.3704, "step": 2181 }, { "epoch": 0.14, "grad_norm": 4.5780875389561455, "learning_rate": 9.701821275042618e-06, "loss": 0.3443, "step": 2182 }, { "epoch": 0.14, "grad_norm": 4.047438538653314, "learning_rate": 9.701474723185774e-06, "loss": 0.3573, "step": 2183 }, { "epoch": 0.14, "grad_norm": 2.001446806845423, "learning_rate": 9.701127976256392e-06, "loss": 0.3436, "step": 2184 }, { "epoch": 0.14, "grad_norm": 2.6806415294516435, "learning_rate": 9.700781034268861e-06, "loss": 0.3663, "step": 2185 }, { "epoch": 0.14, "grad_norm": 2.992265753593225, "learning_rate": 9.700433897237576e-06, "loss": 0.3769, "step": 2186 }, { "epoch": 0.14, "grad_norm": 2.383394810883486, "learning_rate": 9.70008656517694e-06, "loss": 0.3643, "step": 2187 }, { "epoch": 0.14, "grad_norm": 9.130913273587383, "learning_rate": 9.699739038101363e-06, "loss": 0.3518, "step": 2188 }, { "epoch": 0.14, "grad_norm": 2.142319729777803, "learning_rate": 9.699391316025266e-06, "loss": 0.3516, "step": 2189 }, { "epoch": 0.14, "grad_norm": 1.9270841797880551, "learning_rate": 9.699043398963075e-06, "loss": 0.3514, "step": 2190 }, { "epoch": 0.14, "grad_norm": 2.5876699053379104, "learning_rate": 9.69869528692923e-06, "loss": 0.37, "step": 2191 }, { "epoch": 0.14, "grad_norm": 3.541893933981502, "learning_rate": 9.698346979938169e-06, "loss": 0.3654, "step": 2192 }, { "epoch": 0.14, "grad_norm": 6.134315845942576, "learning_rate": 9.697998478004347e-06, "loss": 0.3506, "step": 2193 }, { "epoch": 0.14, "grad_norm": 2.6481892197476746, "learning_rate": 9.697649781142225e-06, "loss": 0.3597, "step": 2194 }, { "epoch": 0.14, "grad_norm": 2.612137458004614, "learning_rate": 9.697300889366268e-06, "loss": 0.3576, "step": 2195 }, { "epoch": 0.14, "grad_norm": 0.9316800321828856, "learning_rate": 9.696951802690955e-06, "loss": 0.5251, "step": 2196 }, { "epoch": 0.14, "grad_norm": 6.172566635186495, "learning_rate": 9.696602521130768e-06, "loss": 0.3546, "step": 2197 }, { "epoch": 0.14, "grad_norm": 5.960716584993781, "learning_rate": 9.6962530447002e-06, "loss": 0.3566, "step": 2198 }, { "epoch": 0.14, "grad_norm": 6.32410744425751, "learning_rate": 9.695903373413753e-06, "loss": 0.3782, "step": 2199 }, { "epoch": 0.14, "grad_norm": 3.665659509685868, "learning_rate": 9.695553507285934e-06, "loss": 0.3784, "step": 2200 }, { "epoch": 0.14, "grad_norm": 2.5038958624637533, "learning_rate": 9.695203446331258e-06, "loss": 0.3638, "step": 2201 }, { "epoch": 0.14, "grad_norm": 2.5654988510252177, "learning_rate": 9.694853190564253e-06, "loss": 0.3836, "step": 2202 }, { "epoch": 0.14, "grad_norm": 5.274045728265552, "learning_rate": 9.694502739999449e-06, "loss": 0.3323, "step": 2203 }, { "epoch": 0.14, "grad_norm": 3.3439694332278393, "learning_rate": 9.69415209465139e-06, "loss": 0.3605, "step": 2204 }, { "epoch": 0.14, "grad_norm": 3.560716690652859, "learning_rate": 9.693801254534622e-06, "loss": 0.3624, "step": 2205 }, { "epoch": 0.14, "grad_norm": 4.409334241591523, "learning_rate": 9.693450219663703e-06, "loss": 0.3699, "step": 2206 }, { "epoch": 0.14, "grad_norm": 1.9714270733155934, "learning_rate": 9.693098990053197e-06, "loss": 0.3708, "step": 2207 }, { "epoch": 0.14, "grad_norm": 2.0923282592001042, "learning_rate": 9.692747565717677e-06, "loss": 0.3682, "step": 2208 }, { "epoch": 0.14, "grad_norm": 3.3440626411450176, "learning_rate": 9.692395946671727e-06, "loss": 0.3488, "step": 2209 }, { "epoch": 0.14, "grad_norm": 2.5333082990512574, "learning_rate": 9.692044132929934e-06, "loss": 0.3774, "step": 2210 }, { "epoch": 0.14, "grad_norm": 3.0748073568895737, "learning_rate": 9.691692124506896e-06, "loss": 0.3603, "step": 2211 }, { "epoch": 0.14, "grad_norm": 2.2534062520128604, "learning_rate": 9.691339921417219e-06, "loss": 0.3377, "step": 2212 }, { "epoch": 0.14, "grad_norm": 2.5344521511625326, "learning_rate": 9.690987523675514e-06, "loss": 0.3578, "step": 2213 }, { "epoch": 0.14, "grad_norm": 4.335457593172534, "learning_rate": 9.690634931296408e-06, "loss": 0.3637, "step": 2214 }, { "epoch": 0.14, "grad_norm": 2.446251914770414, "learning_rate": 9.690282144294524e-06, "loss": 0.333, "step": 2215 }, { "epoch": 0.14, "grad_norm": 3.2504853190219287, "learning_rate": 9.689929162684503e-06, "loss": 0.3596, "step": 2216 }, { "epoch": 0.14, "grad_norm": 2.9979319485051117, "learning_rate": 9.689575986480992e-06, "loss": 0.3674, "step": 2217 }, { "epoch": 0.14, "grad_norm": 3.147606422679075, "learning_rate": 9.689222615698643e-06, "loss": 0.3513, "step": 2218 }, { "epoch": 0.14, "grad_norm": 3.0713409710626527, "learning_rate": 9.688869050352119e-06, "loss": 0.3708, "step": 2219 }, { "epoch": 0.14, "grad_norm": 6.697857059745171, "learning_rate": 9.688515290456092e-06, "loss": 0.3462, "step": 2220 }, { "epoch": 0.14, "grad_norm": 2.905978614169605, "learning_rate": 9.688161336025234e-06, "loss": 0.3429, "step": 2221 }, { "epoch": 0.14, "grad_norm": 2.909014222345689, "learning_rate": 9.687807187074238e-06, "loss": 0.3435, "step": 2222 }, { "epoch": 0.14, "grad_norm": 3.615511745018682, "learning_rate": 9.687452843617792e-06, "loss": 0.3758, "step": 2223 }, { "epoch": 0.14, "grad_norm": 1.8141319252177568, "learning_rate": 9.687098305670606e-06, "loss": 0.3673, "step": 2224 }, { "epoch": 0.14, "grad_norm": 2.3395920323086177, "learning_rate": 9.686743573247383e-06, "loss": 0.3605, "step": 2225 }, { "epoch": 0.14, "grad_norm": 2.6540624458603483, "learning_rate": 9.686388646362846e-06, "loss": 0.3421, "step": 2226 }, { "epoch": 0.14, "grad_norm": 4.120099167484428, "learning_rate": 9.68603352503172e-06, "loss": 0.3772, "step": 2227 }, { "epoch": 0.14, "grad_norm": 2.8111315477744756, "learning_rate": 9.685678209268738e-06, "loss": 0.3679, "step": 2228 }, { "epoch": 0.14, "grad_norm": 6.121181310744283, "learning_rate": 9.685322699088647e-06, "loss": 0.372, "step": 2229 }, { "epoch": 0.14, "grad_norm": 3.3190016218223635, "learning_rate": 9.684966994506193e-06, "loss": 0.3582, "step": 2230 }, { "epoch": 0.14, "grad_norm": 1.0008797873312196, "learning_rate": 9.684611095536137e-06, "loss": 0.513, "step": 2231 }, { "epoch": 0.14, "grad_norm": 3.085802656987168, "learning_rate": 9.684255002193246e-06, "loss": 0.3413, "step": 2232 }, { "epoch": 0.14, "grad_norm": 2.842011045685869, "learning_rate": 9.683898714492296e-06, "loss": 0.3636, "step": 2233 }, { "epoch": 0.14, "grad_norm": 2.345497910221064, "learning_rate": 9.683542232448068e-06, "loss": 0.356, "step": 2234 }, { "epoch": 0.14, "grad_norm": 3.3031539572997266, "learning_rate": 9.683185556075354e-06, "loss": 0.3674, "step": 2235 }, { "epoch": 0.14, "grad_norm": 3.5441288379330023, "learning_rate": 9.682828685388954e-06, "loss": 0.3426, "step": 2236 }, { "epoch": 0.14, "grad_norm": 6.07700230917055, "learning_rate": 9.682471620403673e-06, "loss": 0.3548, "step": 2237 }, { "epoch": 0.14, "grad_norm": 2.4081556992150213, "learning_rate": 9.682114361134327e-06, "loss": 0.3308, "step": 2238 }, { "epoch": 0.14, "grad_norm": 2.7716307869559373, "learning_rate": 9.681756907595741e-06, "loss": 0.3497, "step": 2239 }, { "epoch": 0.14, "grad_norm": 2.1025799805934735, "learning_rate": 9.681399259802744e-06, "loss": 0.3453, "step": 2240 }, { "epoch": 0.14, "grad_norm": 2.5764295638939267, "learning_rate": 9.681041417770176e-06, "loss": 0.3626, "step": 2241 }, { "epoch": 0.14, "grad_norm": 4.239281158943012, "learning_rate": 9.680683381512888e-06, "loss": 0.3478, "step": 2242 }, { "epoch": 0.14, "grad_norm": 2.2584329750144967, "learning_rate": 9.68032515104573e-06, "loss": 0.3585, "step": 2243 }, { "epoch": 0.14, "grad_norm": 2.780756543442347, "learning_rate": 9.679966726383569e-06, "loss": 0.382, "step": 2244 }, { "epoch": 0.14, "grad_norm": 12.641132909899936, "learning_rate": 9.679608107541278e-06, "loss": 0.376, "step": 2245 }, { "epoch": 0.14, "grad_norm": 2.9373484888075962, "learning_rate": 9.679249294533733e-06, "loss": 0.3515, "step": 2246 }, { "epoch": 0.14, "grad_norm": 2.7470182000533456, "learning_rate": 9.678890287375823e-06, "loss": 0.349, "step": 2247 }, { "epoch": 0.14, "grad_norm": 4.075101042853636, "learning_rate": 9.678531086082444e-06, "loss": 0.3598, "step": 2248 }, { "epoch": 0.14, "grad_norm": 4.545818294862518, "learning_rate": 9.6781716906685e-06, "loss": 0.3414, "step": 2249 }, { "epoch": 0.14, "grad_norm": 1.72612675981877, "learning_rate": 9.677812101148906e-06, "loss": 0.3542, "step": 2250 }, { "epoch": 0.14, "grad_norm": 11.599238197206029, "learning_rate": 9.677452317538576e-06, "loss": 0.3667, "step": 2251 }, { "epoch": 0.14, "grad_norm": 6.552295386551471, "learning_rate": 9.677092339852443e-06, "loss": 0.3757, "step": 2252 }, { "epoch": 0.14, "grad_norm": 3.2749637261026314, "learning_rate": 9.676732168105443e-06, "loss": 0.3502, "step": 2253 }, { "epoch": 0.14, "grad_norm": 2.8524830075707692, "learning_rate": 9.676371802312515e-06, "loss": 0.361, "step": 2254 }, { "epoch": 0.14, "grad_norm": 3.5399726269706826, "learning_rate": 9.676011242488616e-06, "loss": 0.3845, "step": 2255 }, { "epoch": 0.14, "grad_norm": 2.354759199309399, "learning_rate": 9.675650488648707e-06, "loss": 0.3341, "step": 2256 }, { "epoch": 0.14, "grad_norm": 3.603553884786818, "learning_rate": 9.675289540807752e-06, "loss": 0.3333, "step": 2257 }, { "epoch": 0.14, "grad_norm": 3.773587318579438, "learning_rate": 9.674928398980729e-06, "loss": 0.3362, "step": 2258 }, { "epoch": 0.14, "grad_norm": 2.67176691595643, "learning_rate": 9.674567063182626e-06, "loss": 0.3625, "step": 2259 }, { "epoch": 0.14, "grad_norm": 4.908384262448215, "learning_rate": 9.674205533428431e-06, "loss": 0.3691, "step": 2260 }, { "epoch": 0.14, "grad_norm": 2.307049662295108, "learning_rate": 9.673843809733145e-06, "loss": 0.3554, "step": 2261 }, { "epoch": 0.14, "grad_norm": 2.1999270013192764, "learning_rate": 9.67348189211178e-06, "loss": 0.3623, "step": 2262 }, { "epoch": 0.14, "grad_norm": 2.79505183529914, "learning_rate": 9.67311978057935e-06, "loss": 0.3797, "step": 2263 }, { "epoch": 0.14, "grad_norm": 2.568944247674684, "learning_rate": 9.672757475150878e-06, "loss": 0.3605, "step": 2264 }, { "epoch": 0.14, "grad_norm": 67.47337375924475, "learning_rate": 9.6723949758414e-06, "loss": 0.3194, "step": 2265 }, { "epoch": 0.14, "grad_norm": 3.1499024496376413, "learning_rate": 9.672032282665954e-06, "loss": 0.3772, "step": 2266 }, { "epoch": 0.14, "grad_norm": 2.4091814983918054, "learning_rate": 9.67166939563959e-06, "loss": 0.3843, "step": 2267 }, { "epoch": 0.14, "grad_norm": 2.3335453182494446, "learning_rate": 9.671306314777367e-06, "loss": 0.345, "step": 2268 }, { "epoch": 0.14, "grad_norm": 5.160702802736921, "learning_rate": 9.670943040094347e-06, "loss": 0.3515, "step": 2269 }, { "epoch": 0.14, "grad_norm": 3.4360182720795938, "learning_rate": 9.670579571605605e-06, "loss": 0.3471, "step": 2270 }, { "epoch": 0.14, "grad_norm": 2.653755317861475, "learning_rate": 9.670215909326219e-06, "loss": 0.3304, "step": 2271 }, { "epoch": 0.14, "grad_norm": 3.1601480854090656, "learning_rate": 9.669852053271278e-06, "loss": 0.3534, "step": 2272 }, { "epoch": 0.14, "grad_norm": 2.951268372737922, "learning_rate": 9.669488003455884e-06, "loss": 0.3548, "step": 2273 }, { "epoch": 0.14, "grad_norm": 2.1361984311785482, "learning_rate": 9.669123759895137e-06, "loss": 0.372, "step": 2274 }, { "epoch": 0.14, "grad_norm": 2.46221679531022, "learning_rate": 9.668759322604154e-06, "loss": 0.354, "step": 2275 }, { "epoch": 0.14, "grad_norm": 1.9824662415726244, "learning_rate": 9.66839469159805e-06, "loss": 0.3596, "step": 2276 }, { "epoch": 0.14, "grad_norm": 3.800468707625609, "learning_rate": 9.668029866891962e-06, "loss": 0.3483, "step": 2277 }, { "epoch": 0.14, "grad_norm": 1.0650565663973084, "learning_rate": 9.667664848501022e-06, "loss": 0.5287, "step": 2278 }, { "epoch": 0.14, "grad_norm": 2.8209529394633437, "learning_rate": 9.667299636440377e-06, "loss": 0.3604, "step": 2279 }, { "epoch": 0.14, "grad_norm": 3.6744587582900383, "learning_rate": 9.66693423072518e-06, "loss": 0.3607, "step": 2280 }, { "epoch": 0.14, "grad_norm": 3.5962459095432746, "learning_rate": 9.666568631370592e-06, "loss": 0.3614, "step": 2281 }, { "epoch": 0.14, "grad_norm": 0.6639755639194774, "learning_rate": 9.666202838391783e-06, "loss": 0.4927, "step": 2282 }, { "epoch": 0.14, "grad_norm": 3.2951901857763573, "learning_rate": 9.66583685180393e-06, "loss": 0.3445, "step": 2283 }, { "epoch": 0.14, "grad_norm": 4.511161265817613, "learning_rate": 9.66547067162222e-06, "loss": 0.3773, "step": 2284 }, { "epoch": 0.14, "grad_norm": 4.901182692439293, "learning_rate": 9.665104297861842e-06, "loss": 0.3601, "step": 2285 }, { "epoch": 0.14, "grad_norm": 2.5004267910064106, "learning_rate": 9.664737730538003e-06, "loss": 0.3425, "step": 2286 }, { "epoch": 0.14, "grad_norm": 2.699872788028884, "learning_rate": 9.66437096966591e-06, "loss": 0.3646, "step": 2287 }, { "epoch": 0.14, "grad_norm": 3.466253276055321, "learning_rate": 9.66400401526078e-06, "loss": 0.3685, "step": 2288 }, { "epoch": 0.14, "grad_norm": 2.974486155869181, "learning_rate": 9.663636867337838e-06, "loss": 0.348, "step": 2289 }, { "epoch": 0.14, "grad_norm": 2.009561889838298, "learning_rate": 9.66326952591232e-06, "loss": 0.3456, "step": 2290 }, { "epoch": 0.14, "grad_norm": 2.3282365927121345, "learning_rate": 9.662901990999468e-06, "loss": 0.3474, "step": 2291 }, { "epoch": 0.14, "grad_norm": 3.7540195970671437, "learning_rate": 9.662534262614528e-06, "loss": 0.3526, "step": 2292 }, { "epoch": 0.14, "grad_norm": 3.031315959580994, "learning_rate": 9.66216634077276e-06, "loss": 0.3614, "step": 2293 }, { "epoch": 0.14, "grad_norm": 2.290872175706192, "learning_rate": 9.66179822548943e-06, "loss": 0.3535, "step": 2294 }, { "epoch": 0.14, "grad_norm": 13.08464816871641, "learning_rate": 9.661429916779812e-06, "loss": 0.3481, "step": 2295 }, { "epoch": 0.14, "grad_norm": 10.618922565460359, "learning_rate": 9.661061414659185e-06, "loss": 0.3652, "step": 2296 }, { "epoch": 0.14, "grad_norm": 1.8224276494957194, "learning_rate": 9.660692719142843e-06, "loss": 0.3428, "step": 2297 }, { "epoch": 0.14, "grad_norm": 2.68191708088931, "learning_rate": 9.660323830246082e-06, "loss": 0.3411, "step": 2298 }, { "epoch": 0.14, "grad_norm": 4.328870414668892, "learning_rate": 9.659954747984206e-06, "loss": 0.3403, "step": 2299 }, { "epoch": 0.14, "grad_norm": 2.38353695337482, "learning_rate": 9.65958547237253e-06, "loss": 0.3736, "step": 2300 }, { "epoch": 0.14, "grad_norm": 6.749184432940049, "learning_rate": 9.659216003426378e-06, "loss": 0.3624, "step": 2301 }, { "epoch": 0.14, "grad_norm": 4.169476479975519, "learning_rate": 9.658846341161079e-06, "loss": 0.3761, "step": 2302 }, { "epoch": 0.14, "grad_norm": 2.365985932567745, "learning_rate": 9.658476485591968e-06, "loss": 0.3781, "step": 2303 }, { "epoch": 0.14, "grad_norm": 2.8827023001781615, "learning_rate": 9.658106436734395e-06, "loss": 0.3669, "step": 2304 }, { "epoch": 0.14, "grad_norm": 3.3501941336783827, "learning_rate": 9.657736194603711e-06, "loss": 0.3702, "step": 2305 }, { "epoch": 0.15, "grad_norm": 2.8538506973369806, "learning_rate": 9.657365759215281e-06, "loss": 0.3449, "step": 2306 }, { "epoch": 0.15, "grad_norm": 2.7980927854541333, "learning_rate": 9.656995130584473e-06, "loss": 0.3454, "step": 2307 }, { "epoch": 0.15, "grad_norm": 11.245172410954305, "learning_rate": 9.656624308726662e-06, "loss": 0.3634, "step": 2308 }, { "epoch": 0.15, "grad_norm": 2.389572450307798, "learning_rate": 9.65625329365724e-06, "loss": 0.3397, "step": 2309 }, { "epoch": 0.15, "grad_norm": 2.3100204934540214, "learning_rate": 9.6558820853916e-06, "loss": 0.3358, "step": 2310 }, { "epoch": 0.15, "grad_norm": 2.0980354364598153, "learning_rate": 9.655510683945139e-06, "loss": 0.3616, "step": 2311 }, { "epoch": 0.15, "grad_norm": 5.034610519930271, "learning_rate": 9.655139089333272e-06, "loss": 0.3441, "step": 2312 }, { "epoch": 0.15, "grad_norm": 2.5320154288031897, "learning_rate": 9.654767301571418e-06, "loss": 0.3595, "step": 2313 }, { "epoch": 0.15, "grad_norm": 1.9739325212177743, "learning_rate": 9.654395320674998e-06, "loss": 0.3398, "step": 2314 }, { "epoch": 0.15, "grad_norm": 2.2320474183680483, "learning_rate": 9.65402314665945e-06, "loss": 0.3235, "step": 2315 }, { "epoch": 0.15, "grad_norm": 2.2347126569903, "learning_rate": 9.653650779540214e-06, "loss": 0.3631, "step": 2316 }, { "epoch": 0.15, "grad_norm": 1.8442530371937012, "learning_rate": 9.653278219332742e-06, "loss": 0.3616, "step": 2317 }, { "epoch": 0.15, "grad_norm": 5.60449634011267, "learning_rate": 9.652905466052492e-06, "loss": 0.3279, "step": 2318 }, { "epoch": 0.15, "grad_norm": 2.8604555700909606, "learning_rate": 9.65253251971493e-06, "loss": 0.357, "step": 2319 }, { "epoch": 0.15, "grad_norm": 2.248440473110048, "learning_rate": 9.65215938033553e-06, "loss": 0.4123, "step": 2320 }, { "epoch": 0.15, "grad_norm": 3.2557147594249787, "learning_rate": 9.651786047929772e-06, "loss": 0.3519, "step": 2321 }, { "epoch": 0.15, "grad_norm": 2.337233149359728, "learning_rate": 9.651412522513151e-06, "loss": 0.3498, "step": 2322 }, { "epoch": 0.15, "grad_norm": 7.08644851302118, "learning_rate": 9.651038804101162e-06, "loss": 0.368, "step": 2323 }, { "epoch": 0.15, "grad_norm": 2.201117362555605, "learning_rate": 9.650664892709311e-06, "loss": 0.3862, "step": 2324 }, { "epoch": 0.15, "grad_norm": 2.785244373287499, "learning_rate": 9.650290788353114e-06, "loss": 0.3688, "step": 2325 }, { "epoch": 0.15, "grad_norm": 2.887624913253619, "learning_rate": 9.649916491048092e-06, "loss": 0.3399, "step": 2326 }, { "epoch": 0.15, "grad_norm": 3.0446341964073884, "learning_rate": 9.649542000809775e-06, "loss": 0.3469, "step": 2327 }, { "epoch": 0.15, "grad_norm": 1.2234333392104157, "learning_rate": 9.649167317653703e-06, "loss": 0.5292, "step": 2328 }, { "epoch": 0.15, "grad_norm": 2.0099881681755503, "learning_rate": 9.64879244159542e-06, "loss": 0.3513, "step": 2329 }, { "epoch": 0.15, "grad_norm": 2.855983876377842, "learning_rate": 9.648417372650482e-06, "loss": 0.3457, "step": 2330 }, { "epoch": 0.15, "grad_norm": 1.7475013771571395, "learning_rate": 9.64804211083445e-06, "loss": 0.3537, "step": 2331 }, { "epoch": 0.15, "grad_norm": 3.1653810440796653, "learning_rate": 9.647666656162898e-06, "loss": 0.3517, "step": 2332 }, { "epoch": 0.15, "grad_norm": 7.02294797819566, "learning_rate": 9.647291008651398e-06, "loss": 0.3651, "step": 2333 }, { "epoch": 0.15, "grad_norm": 5.048615989165704, "learning_rate": 9.64691516831554e-06, "loss": 0.3682, "step": 2334 }, { "epoch": 0.15, "grad_norm": 2.654696700868606, "learning_rate": 9.646539135170919e-06, "loss": 0.366, "step": 2335 }, { "epoch": 0.15, "grad_norm": 1.9975112521803116, "learning_rate": 9.646162909233135e-06, "loss": 0.3726, "step": 2336 }, { "epoch": 0.15, "grad_norm": 2.1020659732556655, "learning_rate": 9.6457864905178e-06, "loss": 0.3499, "step": 2337 }, { "epoch": 0.15, "grad_norm": 2.4810057879316187, "learning_rate": 9.64540987904053e-06, "loss": 0.3301, "step": 2338 }, { "epoch": 0.15, "grad_norm": 1.880311443001001, "learning_rate": 9.645033074816955e-06, "loss": 0.3508, "step": 2339 }, { "epoch": 0.15, "grad_norm": 1.9626301859520199, "learning_rate": 9.644656077862706e-06, "loss": 0.3628, "step": 2340 }, { "epoch": 0.15, "grad_norm": 2.6082585080628293, "learning_rate": 9.644278888193427e-06, "loss": 0.3801, "step": 2341 }, { "epoch": 0.15, "grad_norm": 9.508594621300968, "learning_rate": 9.64390150582477e-06, "loss": 0.3827, "step": 2342 }, { "epoch": 0.15, "grad_norm": 3.162779175873289, "learning_rate": 9.643523930772388e-06, "loss": 0.3826, "step": 2343 }, { "epoch": 0.15, "grad_norm": 3.9706900900386937, "learning_rate": 9.64314616305195e-06, "loss": 0.372, "step": 2344 }, { "epoch": 0.15, "grad_norm": 2.306415379539258, "learning_rate": 9.64276820267913e-06, "loss": 0.3443, "step": 2345 }, { "epoch": 0.15, "grad_norm": 4.345837477259276, "learning_rate": 9.642390049669614e-06, "loss": 0.345, "step": 2346 }, { "epoch": 0.15, "grad_norm": 3.0960833003109123, "learning_rate": 9.642011704039087e-06, "loss": 0.3802, "step": 2347 }, { "epoch": 0.15, "grad_norm": 2.1174944807098237, "learning_rate": 9.64163316580325e-06, "loss": 0.3522, "step": 2348 }, { "epoch": 0.15, "grad_norm": 2.5986618743569525, "learning_rate": 9.64125443497781e-06, "loss": 0.3747, "step": 2349 }, { "epoch": 0.15, "grad_norm": 4.178705716855747, "learning_rate": 9.640875511578475e-06, "loss": 0.3789, "step": 2350 }, { "epoch": 0.15, "grad_norm": 1.7108012864525584, "learning_rate": 9.640496395620976e-06, "loss": 0.3437, "step": 2351 }, { "epoch": 0.15, "grad_norm": 1.9976088571999921, "learning_rate": 9.640117087121038e-06, "loss": 0.3555, "step": 2352 }, { "epoch": 0.15, "grad_norm": 8.332575910936503, "learning_rate": 9.6397375860944e-06, "loss": 0.3602, "step": 2353 }, { "epoch": 0.15, "grad_norm": 2.0413358711022536, "learning_rate": 9.63935789255681e-06, "loss": 0.3297, "step": 2354 }, { "epoch": 0.15, "grad_norm": 2.9357488207033153, "learning_rate": 9.638978006524017e-06, "loss": 0.3331, "step": 2355 }, { "epoch": 0.15, "grad_norm": 2.794251389142506, "learning_rate": 9.63859792801179e-06, "loss": 0.3618, "step": 2356 }, { "epoch": 0.15, "grad_norm": 2.0938584713205217, "learning_rate": 9.638217657035895e-06, "loss": 0.3472, "step": 2357 }, { "epoch": 0.15, "grad_norm": 2.1667692379618164, "learning_rate": 9.637837193612112e-06, "loss": 0.3462, "step": 2358 }, { "epoch": 0.15, "grad_norm": 2.2927168392100215, "learning_rate": 9.637456537756224e-06, "loss": 0.353, "step": 2359 }, { "epoch": 0.15, "grad_norm": 1.7197863118466372, "learning_rate": 9.637075689484027e-06, "loss": 0.3371, "step": 2360 }, { "epoch": 0.15, "grad_norm": 9.817247900294978, "learning_rate": 9.636694648811326e-06, "loss": 0.3552, "step": 2361 }, { "epoch": 0.15, "grad_norm": 2.543037062907734, "learning_rate": 9.636313415753927e-06, "loss": 0.3741, "step": 2362 }, { "epoch": 0.15, "grad_norm": 3.536416547673591, "learning_rate": 9.635931990327649e-06, "loss": 0.3268, "step": 2363 }, { "epoch": 0.15, "grad_norm": 3.6625163197933777, "learning_rate": 9.635550372548317e-06, "loss": 0.3682, "step": 2364 }, { "epoch": 0.15, "grad_norm": 2.3342808485981523, "learning_rate": 9.635168562431769e-06, "loss": 0.3543, "step": 2365 }, { "epoch": 0.15, "grad_norm": 2.7700670597111325, "learning_rate": 9.634786559993842e-06, "loss": 0.3591, "step": 2366 }, { "epoch": 0.15, "grad_norm": 2.35645116878552, "learning_rate": 9.634404365250391e-06, "loss": 0.3506, "step": 2367 }, { "epoch": 0.15, "grad_norm": 4.707460131914564, "learning_rate": 9.63402197821727e-06, "loss": 0.3716, "step": 2368 }, { "epoch": 0.15, "grad_norm": 1.5427111309804353, "learning_rate": 9.633639398910346e-06, "loss": 0.3174, "step": 2369 }, { "epoch": 0.15, "grad_norm": 3.715698775498241, "learning_rate": 9.633256627345494e-06, "loss": 0.366, "step": 2370 }, { "epoch": 0.15, "grad_norm": 7.0358262155339855, "learning_rate": 9.632873663538594e-06, "loss": 0.3549, "step": 2371 }, { "epoch": 0.15, "grad_norm": 2.52722156177181, "learning_rate": 9.632490507505536e-06, "loss": 0.3644, "step": 2372 }, { "epoch": 0.15, "grad_norm": 2.0780316152770864, "learning_rate": 9.632107159262218e-06, "loss": 0.3586, "step": 2373 }, { "epoch": 0.15, "grad_norm": 2.64635097432633, "learning_rate": 9.631723618824549e-06, "loss": 0.3482, "step": 2374 }, { "epoch": 0.15, "grad_norm": 6.197373002378108, "learning_rate": 9.63133988620844e-06, "loss": 0.3509, "step": 2375 }, { "epoch": 0.15, "grad_norm": 1.87685837698477, "learning_rate": 9.63095596142981e-06, "loss": 0.3944, "step": 2376 }, { "epoch": 0.15, "grad_norm": 4.500786724373431, "learning_rate": 9.630571844504594e-06, "loss": 0.3502, "step": 2377 }, { "epoch": 0.15, "grad_norm": 2.7455177113536733, "learning_rate": 9.630187535448727e-06, "loss": 0.3756, "step": 2378 }, { "epoch": 0.15, "grad_norm": 5.23001573548013, "learning_rate": 9.629803034278155e-06, "loss": 0.4042, "step": 2379 }, { "epoch": 0.15, "grad_norm": 1.94647314913899, "learning_rate": 9.629418341008831e-06, "loss": 0.3496, "step": 2380 }, { "epoch": 0.15, "grad_norm": 2.537475468068543, "learning_rate": 9.62903345565672e-06, "loss": 0.3839, "step": 2381 }, { "epoch": 0.15, "grad_norm": 2.4068648716576693, "learning_rate": 9.628648378237786e-06, "loss": 0.3421, "step": 2382 }, { "epoch": 0.15, "grad_norm": 1.9439316442881507, "learning_rate": 9.62826310876801e-06, "loss": 0.3703, "step": 2383 }, { "epoch": 0.15, "grad_norm": 2.9368852642831618, "learning_rate": 9.627877647263378e-06, "loss": 0.3556, "step": 2384 }, { "epoch": 0.15, "grad_norm": 7.335718471294046, "learning_rate": 9.62749199373988e-06, "loss": 0.3522, "step": 2385 }, { "epoch": 0.15, "grad_norm": 4.029347325183008, "learning_rate": 9.627106148213521e-06, "loss": 0.3527, "step": 2386 }, { "epoch": 0.15, "grad_norm": 4.49920180217814, "learning_rate": 9.62672011070031e-06, "loss": 0.3331, "step": 2387 }, { "epoch": 0.15, "grad_norm": 3.666386157382629, "learning_rate": 9.626333881216263e-06, "loss": 0.3868, "step": 2388 }, { "epoch": 0.15, "grad_norm": 3.453130005653644, "learning_rate": 9.625947459777408e-06, "loss": 0.3699, "step": 2389 }, { "epoch": 0.15, "grad_norm": 2.799579787938819, "learning_rate": 9.625560846399774e-06, "loss": 0.3519, "step": 2390 }, { "epoch": 0.15, "grad_norm": 3.0096918035158344, "learning_rate": 9.625174041099403e-06, "loss": 0.3553, "step": 2391 }, { "epoch": 0.15, "grad_norm": 3.00932756878222, "learning_rate": 9.624787043892349e-06, "loss": 0.381, "step": 2392 }, { "epoch": 0.15, "grad_norm": 2.2037870386656717, "learning_rate": 9.624399854794664e-06, "loss": 0.3331, "step": 2393 }, { "epoch": 0.15, "grad_norm": 5.667139834720969, "learning_rate": 9.624012473822417e-06, "loss": 0.3477, "step": 2394 }, { "epoch": 0.15, "grad_norm": 2.955719936010595, "learning_rate": 9.623624900991676e-06, "loss": 0.3683, "step": 2395 }, { "epoch": 0.15, "grad_norm": 3.5910109694969887, "learning_rate": 9.623237136318529e-06, "loss": 0.3476, "step": 2396 }, { "epoch": 0.15, "grad_norm": 4.44330547353958, "learning_rate": 9.62284917981906e-06, "loss": 0.3623, "step": 2397 }, { "epoch": 0.15, "grad_norm": 2.7897407289814233, "learning_rate": 9.622461031509366e-06, "loss": 0.3634, "step": 2398 }, { "epoch": 0.15, "grad_norm": 2.556571649402105, "learning_rate": 9.622072691405557e-06, "loss": 0.3448, "step": 2399 }, { "epoch": 0.15, "grad_norm": 2.9887895139227743, "learning_rate": 9.621684159523739e-06, "loss": 0.3528, "step": 2400 }, { "epoch": 0.15, "grad_norm": 4.848350239825674, "learning_rate": 9.621295435880038e-06, "loss": 0.3416, "step": 2401 }, { "epoch": 0.15, "grad_norm": 4.979998864781763, "learning_rate": 9.62090652049058e-06, "loss": 0.3527, "step": 2402 }, { "epoch": 0.15, "grad_norm": 2.741250462625021, "learning_rate": 9.620517413371503e-06, "loss": 0.3682, "step": 2403 }, { "epoch": 0.15, "grad_norm": 2.4301657781684147, "learning_rate": 9.62012811453895e-06, "loss": 0.3721, "step": 2404 }, { "epoch": 0.15, "grad_norm": 2.741096371782502, "learning_rate": 9.619738624009078e-06, "loss": 0.359, "step": 2405 }, { "epoch": 0.15, "grad_norm": 6.18560320803686, "learning_rate": 9.619348941798044e-06, "loss": 0.345, "step": 2406 }, { "epoch": 0.15, "grad_norm": 3.1573392761835413, "learning_rate": 9.618959067922019e-06, "loss": 0.3554, "step": 2407 }, { "epoch": 0.15, "grad_norm": 5.900530906012863, "learning_rate": 9.618569002397176e-06, "loss": 0.3414, "step": 2408 }, { "epoch": 0.15, "grad_norm": 2.8469436713944822, "learning_rate": 9.618178745239701e-06, "loss": 0.3523, "step": 2409 }, { "epoch": 0.15, "grad_norm": 8.61986651265615, "learning_rate": 9.617788296465789e-06, "loss": 0.3599, "step": 2410 }, { "epoch": 0.15, "grad_norm": 4.172438301853682, "learning_rate": 9.617397656091637e-06, "loss": 0.3371, "step": 2411 }, { "epoch": 0.15, "grad_norm": 2.6323418059838244, "learning_rate": 9.617006824133455e-06, "loss": 0.3428, "step": 2412 }, { "epoch": 0.15, "grad_norm": 2.8478925125461014, "learning_rate": 9.61661580060746e-06, "loss": 0.366, "step": 2413 }, { "epoch": 0.15, "grad_norm": 9.964172206535194, "learning_rate": 9.616224585529873e-06, "loss": 0.3687, "step": 2414 }, { "epoch": 0.15, "grad_norm": 3.1357494876913283, "learning_rate": 9.615833178916932e-06, "loss": 0.3491, "step": 2415 }, { "epoch": 0.15, "grad_norm": 7.510423121848369, "learning_rate": 9.615441580784873e-06, "loss": 0.3324, "step": 2416 }, { "epoch": 0.15, "grad_norm": 3.4890764516931316, "learning_rate": 9.615049791149944e-06, "loss": 0.3411, "step": 2417 }, { "epoch": 0.15, "grad_norm": 3.176409776466373, "learning_rate": 9.614657810028402e-06, "loss": 0.3902, "step": 2418 }, { "epoch": 0.15, "grad_norm": 5.276996482228377, "learning_rate": 9.614265637436511e-06, "loss": 0.3526, "step": 2419 }, { "epoch": 0.15, "grad_norm": 2.3215123930986823, "learning_rate": 9.613873273390544e-06, "loss": 0.3386, "step": 2420 }, { "epoch": 0.15, "grad_norm": 3.603729331200219, "learning_rate": 9.613480717906778e-06, "loss": 0.3224, "step": 2421 }, { "epoch": 0.15, "grad_norm": 2.5575212249276613, "learning_rate": 9.613087971001502e-06, "loss": 0.3416, "step": 2422 }, { "epoch": 0.15, "grad_norm": 4.24107590249232, "learning_rate": 9.612695032691013e-06, "loss": 0.3392, "step": 2423 }, { "epoch": 0.15, "grad_norm": 4.012508516067043, "learning_rate": 9.612301902991615e-06, "loss": 0.3495, "step": 2424 }, { "epoch": 0.15, "grad_norm": 2.909970317601092, "learning_rate": 9.611908581919618e-06, "loss": 0.3626, "step": 2425 }, { "epoch": 0.15, "grad_norm": 3.230590477505716, "learning_rate": 9.611515069491342e-06, "loss": 0.37, "step": 2426 }, { "epoch": 0.15, "grad_norm": 3.9191284574500584, "learning_rate": 9.611121365723115e-06, "loss": 0.3696, "step": 2427 }, { "epoch": 0.15, "grad_norm": 1.8037932763176585, "learning_rate": 9.610727470631273e-06, "loss": 0.3561, "step": 2428 }, { "epoch": 0.15, "grad_norm": 2.8363081561004386, "learning_rate": 9.610333384232158e-06, "loss": 0.3554, "step": 2429 }, { "epoch": 0.15, "grad_norm": 7.137207765860285, "learning_rate": 9.609939106542123e-06, "loss": 0.3614, "step": 2430 }, { "epoch": 0.15, "grad_norm": 3.7826653475158007, "learning_rate": 9.609544637577524e-06, "loss": 0.3588, "step": 2431 }, { "epoch": 0.15, "grad_norm": 3.958046642291602, "learning_rate": 9.609149977354733e-06, "loss": 0.3699, "step": 2432 }, { "epoch": 0.15, "grad_norm": 3.2142013270687104, "learning_rate": 9.608755125890121e-06, "loss": 0.3577, "step": 2433 }, { "epoch": 0.15, "grad_norm": 2.7252581614357747, "learning_rate": 9.608360083200074e-06, "loss": 0.3535, "step": 2434 }, { "epoch": 0.15, "grad_norm": 4.9804888275379895, "learning_rate": 9.607964849300981e-06, "loss": 0.3517, "step": 2435 }, { "epoch": 0.15, "grad_norm": 3.1916292446784316, "learning_rate": 9.607569424209243e-06, "loss": 0.3332, "step": 2436 }, { "epoch": 0.15, "grad_norm": 5.127344557367979, "learning_rate": 9.607173807941263e-06, "loss": 0.3447, "step": 2437 }, { "epoch": 0.15, "grad_norm": 2.290307544206036, "learning_rate": 9.60677800051346e-06, "loss": 0.3452, "step": 2438 }, { "epoch": 0.15, "grad_norm": 2.7397610296303663, "learning_rate": 9.606382001942256e-06, "loss": 0.3292, "step": 2439 }, { "epoch": 0.15, "grad_norm": 6.187356183486216, "learning_rate": 9.605985812244079e-06, "loss": 0.3511, "step": 2440 }, { "epoch": 0.15, "grad_norm": 2.865261940931568, "learning_rate": 9.605589431435371e-06, "loss": 0.3339, "step": 2441 }, { "epoch": 0.15, "grad_norm": 3.4530789950310563, "learning_rate": 9.605192859532577e-06, "loss": 0.3564, "step": 2442 }, { "epoch": 0.15, "grad_norm": 3.7145727865835654, "learning_rate": 9.604796096552151e-06, "loss": 0.3763, "step": 2443 }, { "epoch": 0.15, "grad_norm": 2.854123911001881, "learning_rate": 9.604399142510557e-06, "loss": 0.3717, "step": 2444 }, { "epoch": 0.15, "grad_norm": 3.4863620391474894, "learning_rate": 9.604001997424261e-06, "loss": 0.3624, "step": 2445 }, { "epoch": 0.15, "grad_norm": 1.8969110954008894, "learning_rate": 9.603604661309747e-06, "loss": 0.3415, "step": 2446 }, { "epoch": 0.15, "grad_norm": 21.640684111149046, "learning_rate": 9.6032071341835e-06, "loss": 0.3714, "step": 2447 }, { "epoch": 0.15, "grad_norm": 4.682823158355087, "learning_rate": 9.602809416062011e-06, "loss": 0.3616, "step": 2448 }, { "epoch": 0.15, "grad_norm": 2.4358330308884466, "learning_rate": 9.602411506961784e-06, "loss": 0.3422, "step": 2449 }, { "epoch": 0.15, "grad_norm": 4.193203650915292, "learning_rate": 9.602013406899328e-06, "loss": 0.364, "step": 2450 }, { "epoch": 0.15, "grad_norm": 3.2529084116181983, "learning_rate": 9.601615115891164e-06, "loss": 0.3626, "step": 2451 }, { "epoch": 0.15, "grad_norm": 6.456621999420369, "learning_rate": 9.601216633953813e-06, "loss": 0.3517, "step": 2452 }, { "epoch": 0.15, "grad_norm": 2.8859910782301066, "learning_rate": 9.600817961103812e-06, "loss": 0.383, "step": 2453 }, { "epoch": 0.15, "grad_norm": 2.699870473647478, "learning_rate": 9.600419097357703e-06, "loss": 0.3199, "step": 2454 }, { "epoch": 0.15, "grad_norm": 3.5162567919215273, "learning_rate": 9.600020042732032e-06, "loss": 0.3282, "step": 2455 }, { "epoch": 0.15, "grad_norm": 1.7765206890967875, "learning_rate": 9.599620797243361e-06, "loss": 0.3416, "step": 2456 }, { "epoch": 0.15, "grad_norm": 3.122703075758099, "learning_rate": 9.599221360908252e-06, "loss": 0.3719, "step": 2457 }, { "epoch": 0.15, "grad_norm": 2.4466795969818937, "learning_rate": 9.59882173374328e-06, "loss": 0.3518, "step": 2458 }, { "epoch": 0.15, "grad_norm": 3.2239881683850915, "learning_rate": 9.598421915765026e-06, "loss": 0.3514, "step": 2459 }, { "epoch": 0.15, "grad_norm": 2.8689568773882375, "learning_rate": 9.598021906990079e-06, "loss": 0.3669, "step": 2460 }, { "epoch": 0.15, "grad_norm": 2.9236755291344516, "learning_rate": 9.597621707435036e-06, "loss": 0.3709, "step": 2461 }, { "epoch": 0.15, "grad_norm": 2.3783941980244863, "learning_rate": 9.5972213171165e-06, "loss": 0.3667, "step": 2462 }, { "epoch": 0.15, "grad_norm": 2.2096218671045524, "learning_rate": 9.59682073605109e-06, "loss": 0.3389, "step": 2463 }, { "epoch": 0.15, "grad_norm": 2.333596723331247, "learning_rate": 9.59641996425542e-06, "loss": 0.3414, "step": 2464 }, { "epoch": 0.16, "grad_norm": 4.812144092220335, "learning_rate": 9.596019001746122e-06, "loss": 0.3302, "step": 2465 }, { "epoch": 0.16, "grad_norm": 4.86242633104935, "learning_rate": 9.595617848539834e-06, "loss": 0.3439, "step": 2466 }, { "epoch": 0.16, "grad_norm": 2.066804945858839, "learning_rate": 9.595216504653197e-06, "loss": 0.347, "step": 2467 }, { "epoch": 0.16, "grad_norm": 3.222549658608177, "learning_rate": 9.594814970102865e-06, "loss": 0.3337, "step": 2468 }, { "epoch": 0.16, "grad_norm": 2.1083775393708573, "learning_rate": 9.594413244905499e-06, "loss": 0.3313, "step": 2469 }, { "epoch": 0.16, "grad_norm": 2.299993580756389, "learning_rate": 9.594011329077765e-06, "loss": 0.343, "step": 2470 }, { "epoch": 0.16, "grad_norm": 5.6414229614092255, "learning_rate": 9.593609222636344e-06, "loss": 0.3643, "step": 2471 }, { "epoch": 0.16, "grad_norm": 2.23002238737333, "learning_rate": 9.593206925597916e-06, "loss": 0.3566, "step": 2472 }, { "epoch": 0.16, "grad_norm": 2.3584540697125704, "learning_rate": 9.592804437979175e-06, "loss": 0.3536, "step": 2473 }, { "epoch": 0.16, "grad_norm": 2.5745857917975177, "learning_rate": 9.592401759796818e-06, "loss": 0.3418, "step": 2474 }, { "epoch": 0.16, "grad_norm": 3.021634240799391, "learning_rate": 9.591998891067558e-06, "loss": 0.3403, "step": 2475 }, { "epoch": 0.16, "grad_norm": 3.69197132094815, "learning_rate": 9.591595831808105e-06, "loss": 0.334, "step": 2476 }, { "epoch": 0.16, "grad_norm": 2.1549057972336096, "learning_rate": 9.591192582035187e-06, "loss": 0.346, "step": 2477 }, { "epoch": 0.16, "grad_norm": 3.1599262429004344, "learning_rate": 9.590789141765534e-06, "loss": 0.3469, "step": 2478 }, { "epoch": 0.16, "grad_norm": 3.8986599195289973, "learning_rate": 9.590385511015885e-06, "loss": 0.3643, "step": 2479 }, { "epoch": 0.16, "grad_norm": 2.043228553739762, "learning_rate": 9.589981689802988e-06, "loss": 0.3316, "step": 2480 }, { "epoch": 0.16, "grad_norm": 2.9831795768658242, "learning_rate": 9.589577678143596e-06, "loss": 0.3271, "step": 2481 }, { "epoch": 0.16, "grad_norm": 2.4642250151960425, "learning_rate": 9.589173476054476e-06, "loss": 0.3533, "step": 2482 }, { "epoch": 0.16, "grad_norm": 7.002826439586185, "learning_rate": 9.588769083552396e-06, "loss": 0.3752, "step": 2483 }, { "epoch": 0.16, "grad_norm": 2.1579029014664104, "learning_rate": 9.588364500654137e-06, "loss": 0.3288, "step": 2484 }, { "epoch": 0.16, "grad_norm": 0.905411452132041, "learning_rate": 9.587959727376485e-06, "loss": 0.4955, "step": 2485 }, { "epoch": 0.16, "grad_norm": 3.367883538218573, "learning_rate": 9.587554763736236e-06, "loss": 0.3361, "step": 2486 }, { "epoch": 0.16, "grad_norm": 2.728250185300285, "learning_rate": 9.58714960975019e-06, "loss": 0.3318, "step": 2487 }, { "epoch": 0.16, "grad_norm": 1.701796077702341, "learning_rate": 9.586744265435158e-06, "loss": 0.3359, "step": 2488 }, { "epoch": 0.16, "grad_norm": 3.930769574319089, "learning_rate": 9.58633873080796e-06, "loss": 0.3655, "step": 2489 }, { "epoch": 0.16, "grad_norm": 1.8947631935626261, "learning_rate": 9.585933005885423e-06, "loss": 0.3234, "step": 2490 }, { "epoch": 0.16, "grad_norm": 3.8732111665478097, "learning_rate": 9.58552709068438e-06, "loss": 0.3421, "step": 2491 }, { "epoch": 0.16, "grad_norm": 2.5178262424855333, "learning_rate": 9.585120985221672e-06, "loss": 0.3552, "step": 2492 }, { "epoch": 0.16, "grad_norm": 4.138368178767361, "learning_rate": 9.58471468951415e-06, "loss": 0.373, "step": 2493 }, { "epoch": 0.16, "grad_norm": 2.180941829419746, "learning_rate": 9.584308203578674e-06, "loss": 0.3418, "step": 2494 }, { "epoch": 0.16, "grad_norm": 1.64143411198683, "learning_rate": 9.583901527432106e-06, "loss": 0.3507, "step": 2495 }, { "epoch": 0.16, "grad_norm": 3.0856897197685846, "learning_rate": 9.583494661091324e-06, "loss": 0.3384, "step": 2496 }, { "epoch": 0.16, "grad_norm": 3.774938131234399, "learning_rate": 9.583087604573206e-06, "loss": 0.3668, "step": 2497 }, { "epoch": 0.16, "grad_norm": 2.7117203023785232, "learning_rate": 9.582680357894643e-06, "loss": 0.3745, "step": 2498 }, { "epoch": 0.16, "grad_norm": 1.8687654925308135, "learning_rate": 9.58227292107253e-06, "loss": 0.3309, "step": 2499 }, { "epoch": 0.16, "grad_norm": 3.0499200104652617, "learning_rate": 9.581865294123778e-06, "loss": 0.331, "step": 2500 }, { "epoch": 0.16, "grad_norm": 2.4279129559113004, "learning_rate": 9.581457477065294e-06, "loss": 0.3434, "step": 2501 }, { "epoch": 0.16, "grad_norm": 1.934042091373923, "learning_rate": 9.581049469914004e-06, "loss": 0.3533, "step": 2502 }, { "epoch": 0.16, "grad_norm": 1.9453778743274504, "learning_rate": 9.580641272686833e-06, "loss": 0.3558, "step": 2503 }, { "epoch": 0.16, "grad_norm": 0.8543505270470636, "learning_rate": 9.58023288540072e-06, "loss": 0.5233, "step": 2504 }, { "epoch": 0.16, "grad_norm": 2.2962050006236105, "learning_rate": 9.57982430807261e-06, "loss": 0.3444, "step": 2505 }, { "epoch": 0.16, "grad_norm": 5.39168303113425, "learning_rate": 9.579415540719453e-06, "loss": 0.36, "step": 2506 }, { "epoch": 0.16, "grad_norm": 1.6872424447087488, "learning_rate": 9.579006583358212e-06, "loss": 0.3264, "step": 2507 }, { "epoch": 0.16, "grad_norm": 2.727876161396312, "learning_rate": 9.578597436005854e-06, "loss": 0.3498, "step": 2508 }, { "epoch": 0.16, "grad_norm": 1.6006231337472312, "learning_rate": 9.578188098679357e-06, "loss": 0.3436, "step": 2509 }, { "epoch": 0.16, "grad_norm": 2.2575156671701886, "learning_rate": 9.577778571395704e-06, "loss": 0.3469, "step": 2510 }, { "epoch": 0.16, "grad_norm": 2.1869701324813695, "learning_rate": 9.577368854171887e-06, "loss": 0.365, "step": 2511 }, { "epoch": 0.16, "grad_norm": 2.1311656418139058, "learning_rate": 9.576958947024906e-06, "loss": 0.347, "step": 2512 }, { "epoch": 0.16, "grad_norm": 2.3141995316655706, "learning_rate": 9.576548849971767e-06, "loss": 0.3475, "step": 2513 }, { "epoch": 0.16, "grad_norm": 2.3770366881392695, "learning_rate": 9.576138563029489e-06, "loss": 0.3466, "step": 2514 }, { "epoch": 0.16, "grad_norm": 1.7840172310317712, "learning_rate": 9.575728086215093e-06, "loss": 0.3618, "step": 2515 }, { "epoch": 0.16, "grad_norm": 4.222578822306946, "learning_rate": 9.575317419545611e-06, "loss": 0.3115, "step": 2516 }, { "epoch": 0.16, "grad_norm": 2.216072415252696, "learning_rate": 9.574906563038084e-06, "loss": 0.3426, "step": 2517 }, { "epoch": 0.16, "grad_norm": 1.4519382156869867, "learning_rate": 9.574495516709557e-06, "loss": 0.336, "step": 2518 }, { "epoch": 0.16, "grad_norm": 3.205717967051008, "learning_rate": 9.574084280577085e-06, "loss": 0.34, "step": 2519 }, { "epoch": 0.16, "grad_norm": 1.9249991143022704, "learning_rate": 9.573672854657734e-06, "loss": 0.3676, "step": 2520 }, { "epoch": 0.16, "grad_norm": 1.6796598181338873, "learning_rate": 9.57326123896857e-06, "loss": 0.3558, "step": 2521 }, { "epoch": 0.16, "grad_norm": 1.9809568005933895, "learning_rate": 9.572849433526677e-06, "loss": 0.3659, "step": 2522 }, { "epoch": 0.16, "grad_norm": 3.563760658607657, "learning_rate": 9.572437438349136e-06, "loss": 0.3477, "step": 2523 }, { "epoch": 0.16, "grad_norm": 1.9663139792349593, "learning_rate": 9.572025253453045e-06, "loss": 0.3672, "step": 2524 }, { "epoch": 0.16, "grad_norm": 3.103341722871928, "learning_rate": 9.571612878855505e-06, "loss": 0.339, "step": 2525 }, { "epoch": 0.16, "grad_norm": 3.9121819361216263, "learning_rate": 9.571200314573628e-06, "loss": 0.3688, "step": 2526 }, { "epoch": 0.16, "grad_norm": 2.0710557211190554, "learning_rate": 9.57078756062453e-06, "loss": 0.3456, "step": 2527 }, { "epoch": 0.16, "grad_norm": 3.092516506635837, "learning_rate": 9.570374617025336e-06, "loss": 0.3876, "step": 2528 }, { "epoch": 0.16, "grad_norm": 2.5375152893594675, "learning_rate": 9.569961483793183e-06, "loss": 0.3618, "step": 2529 }, { "epoch": 0.16, "grad_norm": 1.9619645359300384, "learning_rate": 9.56954816094521e-06, "loss": 0.3596, "step": 2530 }, { "epoch": 0.16, "grad_norm": 3.156338035702917, "learning_rate": 9.569134648498568e-06, "loss": 0.3652, "step": 2531 }, { "epoch": 0.16, "grad_norm": 2.1336542173655793, "learning_rate": 9.568720946470414e-06, "loss": 0.3316, "step": 2532 }, { "epoch": 0.16, "grad_norm": 1.9472305196009703, "learning_rate": 9.568307054877911e-06, "loss": 0.3587, "step": 2533 }, { "epoch": 0.16, "grad_norm": 1.5438049737756938, "learning_rate": 9.567892973738236e-06, "loss": 0.3428, "step": 2534 }, { "epoch": 0.16, "grad_norm": 1.768489667835759, "learning_rate": 9.567478703068567e-06, "loss": 0.3454, "step": 2535 }, { "epoch": 0.16, "grad_norm": 1.7685372092996738, "learning_rate": 9.567064242886095e-06, "loss": 0.3357, "step": 2536 }, { "epoch": 0.16, "grad_norm": 2.305059977430131, "learning_rate": 9.566649593208015e-06, "loss": 0.3534, "step": 2537 }, { "epoch": 0.16, "grad_norm": 2.666618246425068, "learning_rate": 9.566234754051531e-06, "loss": 0.3724, "step": 2538 }, { "epoch": 0.16, "grad_norm": 3.009465063885724, "learning_rate": 9.565819725433857e-06, "loss": 0.3502, "step": 2539 }, { "epoch": 0.16, "grad_norm": 1.48785389289798, "learning_rate": 9.565404507372213e-06, "loss": 0.33, "step": 2540 }, { "epoch": 0.16, "grad_norm": 9.241466167236867, "learning_rate": 9.564989099883828e-06, "loss": 0.3404, "step": 2541 }, { "epoch": 0.16, "grad_norm": 2.588467178710788, "learning_rate": 9.564573502985936e-06, "loss": 0.3305, "step": 2542 }, { "epoch": 0.16, "grad_norm": 2.8390386030689703, "learning_rate": 9.564157716695783e-06, "loss": 0.3479, "step": 2543 }, { "epoch": 0.16, "grad_norm": 1.7269386059475975, "learning_rate": 9.563741741030616e-06, "loss": 0.3628, "step": 2544 }, { "epoch": 0.16, "grad_norm": 2.156759376096128, "learning_rate": 9.563325576007702e-06, "loss": 0.3594, "step": 2545 }, { "epoch": 0.16, "grad_norm": 1.6332586990458544, "learning_rate": 9.562909221644303e-06, "loss": 0.3368, "step": 2546 }, { "epoch": 0.16, "grad_norm": 3.44282817679179, "learning_rate": 9.562492677957695e-06, "loss": 0.3506, "step": 2547 }, { "epoch": 0.16, "grad_norm": 2.377567667525139, "learning_rate": 9.562075944965163e-06, "loss": 0.3544, "step": 2548 }, { "epoch": 0.16, "grad_norm": 5.488996642594074, "learning_rate": 9.561659022683996e-06, "loss": 0.3544, "step": 2549 }, { "epoch": 0.16, "grad_norm": 2.429938374982071, "learning_rate": 9.561241911131494e-06, "loss": 0.3443, "step": 2550 }, { "epoch": 0.16, "grad_norm": 1.8974550576283076, "learning_rate": 9.560824610324964e-06, "loss": 0.3361, "step": 2551 }, { "epoch": 0.16, "grad_norm": 3.1326873919114324, "learning_rate": 9.560407120281718e-06, "loss": 0.359, "step": 2552 }, { "epoch": 0.16, "grad_norm": 2.242227199761406, "learning_rate": 9.559989441019081e-06, "loss": 0.3543, "step": 2553 }, { "epoch": 0.16, "grad_norm": 3.0913298144477195, "learning_rate": 9.559571572554385e-06, "loss": 0.3724, "step": 2554 }, { "epoch": 0.16, "grad_norm": 1.890813144751611, "learning_rate": 9.559153514904964e-06, "loss": 0.3664, "step": 2555 }, { "epoch": 0.16, "grad_norm": 3.425751552170047, "learning_rate": 9.558735268088167e-06, "loss": 0.3593, "step": 2556 }, { "epoch": 0.16, "grad_norm": 2.7633058242306126, "learning_rate": 9.558316832121346e-06, "loss": 0.3535, "step": 2557 }, { "epoch": 0.16, "grad_norm": 2.5866897233691692, "learning_rate": 9.55789820702186e-06, "loss": 0.3379, "step": 2558 }, { "epoch": 0.16, "grad_norm": 1.733393093973504, "learning_rate": 9.557479392807085e-06, "loss": 0.3447, "step": 2559 }, { "epoch": 0.16, "grad_norm": 2.073121911444511, "learning_rate": 9.557060389494395e-06, "loss": 0.3395, "step": 2560 }, { "epoch": 0.16, "grad_norm": 13.08060753241703, "learning_rate": 9.556641197101173e-06, "loss": 0.3654, "step": 2561 }, { "epoch": 0.16, "grad_norm": 2.8624715183558385, "learning_rate": 9.556221815644818e-06, "loss": 0.3428, "step": 2562 }, { "epoch": 0.16, "grad_norm": 2.209289265708224, "learning_rate": 9.555802245142724e-06, "loss": 0.3388, "step": 2563 }, { "epoch": 0.16, "grad_norm": 1.6628985536653489, "learning_rate": 9.555382485612304e-06, "loss": 0.3393, "step": 2564 }, { "epoch": 0.16, "grad_norm": 2.5411508399725578, "learning_rate": 9.554962537070973e-06, "loss": 0.3606, "step": 2565 }, { "epoch": 0.16, "grad_norm": 6.727935169955974, "learning_rate": 9.554542399536156e-06, "loss": 0.3413, "step": 2566 }, { "epoch": 0.16, "grad_norm": 2.3995116338867413, "learning_rate": 9.554122073025284e-06, "loss": 0.338, "step": 2567 }, { "epoch": 0.16, "grad_norm": 2.866370812255361, "learning_rate": 9.5537015575558e-06, "loss": 0.3582, "step": 2568 }, { "epoch": 0.16, "grad_norm": 3.7772709331670047, "learning_rate": 9.553280853145148e-06, "loss": 0.3868, "step": 2569 }, { "epoch": 0.16, "grad_norm": 7.133986095637687, "learning_rate": 9.552859959810787e-06, "loss": 0.3481, "step": 2570 }, { "epoch": 0.16, "grad_norm": 2.649019599880264, "learning_rate": 9.55243887757018e-06, "loss": 0.3532, "step": 2571 }, { "epoch": 0.16, "grad_norm": 3.4470511545477005, "learning_rate": 9.552017606440798e-06, "loss": 0.3649, "step": 2572 }, { "epoch": 0.16, "grad_norm": 2.2246959892360683, "learning_rate": 9.551596146440119e-06, "loss": 0.3537, "step": 2573 }, { "epoch": 0.16, "grad_norm": 3.2384490679719873, "learning_rate": 9.551174497585632e-06, "loss": 0.3638, "step": 2574 }, { "epoch": 0.16, "grad_norm": 0.7628758447553016, "learning_rate": 9.550752659894831e-06, "loss": 0.4955, "step": 2575 }, { "epoch": 0.16, "grad_norm": 1.6820409910663758, "learning_rate": 9.550330633385218e-06, "loss": 0.3521, "step": 2576 }, { "epoch": 0.16, "grad_norm": 8.116288676880126, "learning_rate": 9.549908418074307e-06, "loss": 0.3675, "step": 2577 }, { "epoch": 0.16, "grad_norm": 2.8147406939372055, "learning_rate": 9.549486013979614e-06, "loss": 0.355, "step": 2578 }, { "epoch": 0.16, "grad_norm": 2.07027267824064, "learning_rate": 9.549063421118664e-06, "loss": 0.3554, "step": 2579 }, { "epoch": 0.16, "grad_norm": 3.179566494282651, "learning_rate": 9.548640639508994e-06, "loss": 0.3263, "step": 2580 }, { "epoch": 0.16, "grad_norm": 3.950898328802244, "learning_rate": 9.548217669168144e-06, "loss": 0.3655, "step": 2581 }, { "epoch": 0.16, "grad_norm": 1.6431588628993141, "learning_rate": 9.547794510113663e-06, "loss": 0.3521, "step": 2582 }, { "epoch": 0.16, "grad_norm": 3.463704793114517, "learning_rate": 9.547371162363112e-06, "loss": 0.3729, "step": 2583 }, { "epoch": 0.16, "grad_norm": 2.530652400296561, "learning_rate": 9.546947625934055e-06, "loss": 0.3831, "step": 2584 }, { "epoch": 0.16, "grad_norm": 2.4571569208762076, "learning_rate": 9.546523900844063e-06, "loss": 0.3703, "step": 2585 }, { "epoch": 0.16, "grad_norm": 2.049199584590343, "learning_rate": 9.54609998711072e-06, "loss": 0.3654, "step": 2586 }, { "epoch": 0.16, "grad_norm": 2.087287113882566, "learning_rate": 9.545675884751611e-06, "loss": 0.3658, "step": 2587 }, { "epoch": 0.16, "grad_norm": 2.5926436517104823, "learning_rate": 9.545251593784339e-06, "loss": 0.3583, "step": 2588 }, { "epoch": 0.16, "grad_norm": 2.2027156831411783, "learning_rate": 9.544827114226502e-06, "loss": 0.372, "step": 2589 }, { "epoch": 0.16, "grad_norm": 0.8152647557611538, "learning_rate": 9.544402446095718e-06, "loss": 0.5014, "step": 2590 }, { "epoch": 0.16, "grad_norm": 1.9393017128013679, "learning_rate": 9.543977589409603e-06, "loss": 0.3533, "step": 2591 }, { "epoch": 0.16, "grad_norm": 2.3638423578958885, "learning_rate": 9.543552544185788e-06, "loss": 0.3458, "step": 2592 }, { "epoch": 0.16, "grad_norm": 3.1573274962314226, "learning_rate": 9.54312731044191e-06, "loss": 0.3417, "step": 2593 }, { "epoch": 0.16, "grad_norm": 5.6799178455487676, "learning_rate": 9.542701888195606e-06, "loss": 0.3437, "step": 2594 }, { "epoch": 0.16, "grad_norm": 1.6581331359518916, "learning_rate": 9.542276277464534e-06, "loss": 0.3349, "step": 2595 }, { "epoch": 0.16, "grad_norm": 6.012567633353498, "learning_rate": 9.54185047826635e-06, "loss": 0.3543, "step": 2596 }, { "epoch": 0.16, "grad_norm": 1.7440730857683153, "learning_rate": 9.541424490618724e-06, "loss": 0.3475, "step": 2597 }, { "epoch": 0.16, "grad_norm": 2.0455347129235357, "learning_rate": 9.540998314539327e-06, "loss": 0.3222, "step": 2598 }, { "epoch": 0.16, "grad_norm": 3.5701933075545593, "learning_rate": 9.540571950045847e-06, "loss": 0.3627, "step": 2599 }, { "epoch": 0.16, "grad_norm": 4.105918678269197, "learning_rate": 9.540145397155972e-06, "loss": 0.3538, "step": 2600 }, { "epoch": 0.16, "grad_norm": 2.1305296537632055, "learning_rate": 9.539718655887398e-06, "loss": 0.3482, "step": 2601 }, { "epoch": 0.16, "grad_norm": 4.3103267626157065, "learning_rate": 9.539291726257835e-06, "loss": 0.3614, "step": 2602 }, { "epoch": 0.16, "grad_norm": 3.0582360901291676, "learning_rate": 9.538864608284994e-06, "loss": 0.3414, "step": 2603 }, { "epoch": 0.16, "grad_norm": 1.8434645933427474, "learning_rate": 9.5384373019866e-06, "loss": 0.3451, "step": 2604 }, { "epoch": 0.16, "grad_norm": 2.1976945757225415, "learning_rate": 9.538009807380381e-06, "loss": 0.3503, "step": 2605 }, { "epoch": 0.16, "grad_norm": 3.7150175096102385, "learning_rate": 9.537582124484074e-06, "loss": 0.348, "step": 2606 }, { "epoch": 0.16, "grad_norm": 1.6675833627117203, "learning_rate": 9.537154253315426e-06, "loss": 0.3355, "step": 2607 }, { "epoch": 0.16, "grad_norm": 4.916166557382315, "learning_rate": 9.53672619389219e-06, "loss": 0.3631, "step": 2608 }, { "epoch": 0.16, "grad_norm": 2.8562654869946944, "learning_rate": 9.536297946232124e-06, "loss": 0.3449, "step": 2609 }, { "epoch": 0.16, "grad_norm": 2.148763267097097, "learning_rate": 9.535869510353e-06, "loss": 0.3324, "step": 2610 }, { "epoch": 0.16, "grad_norm": 2.035107214695197, "learning_rate": 9.535440886272592e-06, "loss": 0.3491, "step": 2611 }, { "epoch": 0.16, "grad_norm": 7.074690254764154, "learning_rate": 9.535012074008688e-06, "loss": 0.3475, "step": 2612 }, { "epoch": 0.16, "grad_norm": 3.2345198467273626, "learning_rate": 9.534583073579076e-06, "loss": 0.3642, "step": 2613 }, { "epoch": 0.16, "grad_norm": 5.0103357340379375, "learning_rate": 9.534153885001557e-06, "loss": 0.3656, "step": 2614 }, { "epoch": 0.16, "grad_norm": 3.4933549283783316, "learning_rate": 9.533724508293942e-06, "loss": 0.3364, "step": 2615 }, { "epoch": 0.16, "grad_norm": 5.846978499674889, "learning_rate": 9.533294943474044e-06, "loss": 0.3725, "step": 2616 }, { "epoch": 0.16, "grad_norm": 4.751904603471427, "learning_rate": 9.532865190559686e-06, "loss": 0.3667, "step": 2617 }, { "epoch": 0.16, "grad_norm": 2.5832888425694582, "learning_rate": 9.532435249568701e-06, "loss": 0.3582, "step": 2618 }, { "epoch": 0.16, "grad_norm": 1.8667365545365433, "learning_rate": 9.532005120518927e-06, "loss": 0.3301, "step": 2619 }, { "epoch": 0.16, "grad_norm": 3.050198970183284, "learning_rate": 9.53157480342821e-06, "loss": 0.328, "step": 2620 }, { "epoch": 0.16, "grad_norm": 1.8338078372465996, "learning_rate": 9.531144298314406e-06, "loss": 0.3569, "step": 2621 }, { "epoch": 0.16, "grad_norm": 2.0190805522209225, "learning_rate": 9.530713605195377e-06, "loss": 0.3611, "step": 2622 }, { "epoch": 0.16, "grad_norm": 2.744709556009685, "learning_rate": 9.530282724088991e-06, "loss": 0.3411, "step": 2623 }, { "epoch": 0.17, "grad_norm": 2.7538256351208994, "learning_rate": 9.52985165501313e-06, "loss": 0.3534, "step": 2624 }, { "epoch": 0.17, "grad_norm": 5.853596636209183, "learning_rate": 9.529420397985678e-06, "loss": 0.3498, "step": 2625 }, { "epoch": 0.17, "grad_norm": 0.8416926794641617, "learning_rate": 9.528988953024528e-06, "loss": 0.5177, "step": 2626 }, { "epoch": 0.17, "grad_norm": 2.037586542004035, "learning_rate": 9.528557320147583e-06, "loss": 0.3797, "step": 2627 }, { "epoch": 0.17, "grad_norm": 1.91942641742022, "learning_rate": 9.52812549937275e-06, "loss": 0.3596, "step": 2628 }, { "epoch": 0.17, "grad_norm": 1.939099554594765, "learning_rate": 9.527693490717946e-06, "loss": 0.3371, "step": 2629 }, { "epoch": 0.17, "grad_norm": 2.1548338000960006, "learning_rate": 9.527261294201098e-06, "loss": 0.3316, "step": 2630 }, { "epoch": 0.17, "grad_norm": 3.740173607989743, "learning_rate": 9.52682890984014e-06, "loss": 0.3512, "step": 2631 }, { "epoch": 0.17, "grad_norm": 3.152701064283289, "learning_rate": 9.526396337653008e-06, "loss": 0.3423, "step": 2632 }, { "epoch": 0.17, "grad_norm": 3.988591564166526, "learning_rate": 9.525963577657651e-06, "loss": 0.3387, "step": 2633 }, { "epoch": 0.17, "grad_norm": 2.5276796406272264, "learning_rate": 9.525530629872027e-06, "loss": 0.3593, "step": 2634 }, { "epoch": 0.17, "grad_norm": 2.950624673873196, "learning_rate": 9.5250974943141e-06, "loss": 0.3651, "step": 2635 }, { "epoch": 0.17, "grad_norm": 2.785631448046799, "learning_rate": 9.524664171001839e-06, "loss": 0.3434, "step": 2636 }, { "epoch": 0.17, "grad_norm": 4.6251489728593995, "learning_rate": 9.524230659953227e-06, "loss": 0.34, "step": 2637 }, { "epoch": 0.17, "grad_norm": 2.6580689955724477, "learning_rate": 9.523796961186247e-06, "loss": 0.3601, "step": 2638 }, { "epoch": 0.17, "grad_norm": 2.3618623929564544, "learning_rate": 9.523363074718895e-06, "loss": 0.3278, "step": 2639 }, { "epoch": 0.17, "grad_norm": 3.126048441695036, "learning_rate": 9.522929000569177e-06, "loss": 0.3733, "step": 2640 }, { "epoch": 0.17, "grad_norm": 2.632442609393323, "learning_rate": 9.522494738755099e-06, "loss": 0.349, "step": 2641 }, { "epoch": 0.17, "grad_norm": 2.849686572228049, "learning_rate": 9.522060289294683e-06, "loss": 0.3543, "step": 2642 }, { "epoch": 0.17, "grad_norm": 1.8878210352938096, "learning_rate": 9.521625652205954e-06, "loss": 0.3485, "step": 2643 }, { "epoch": 0.17, "grad_norm": 1.7771626486961767, "learning_rate": 9.521190827506944e-06, "loss": 0.3576, "step": 2644 }, { "epoch": 0.17, "grad_norm": 1.8613438572064482, "learning_rate": 9.520755815215697e-06, "loss": 0.3407, "step": 2645 }, { "epoch": 0.17, "grad_norm": 2.6765864240324504, "learning_rate": 9.52032061535026e-06, "loss": 0.3447, "step": 2646 }, { "epoch": 0.17, "grad_norm": 2.5914867880010433, "learning_rate": 9.519885227928693e-06, "loss": 0.3449, "step": 2647 }, { "epoch": 0.17, "grad_norm": 1.9024737081700733, "learning_rate": 9.519449652969059e-06, "loss": 0.3508, "step": 2648 }, { "epoch": 0.17, "grad_norm": 1.8534869736610464, "learning_rate": 9.519013890489433e-06, "loss": 0.3361, "step": 2649 }, { "epoch": 0.17, "grad_norm": 4.054828248159896, "learning_rate": 9.518577940507893e-06, "loss": 0.3442, "step": 2650 }, { "epoch": 0.17, "grad_norm": 2.422687525071993, "learning_rate": 9.518141803042528e-06, "loss": 0.3325, "step": 2651 }, { "epoch": 0.17, "grad_norm": 5.20343090953775, "learning_rate": 9.517705478111434e-06, "loss": 0.3451, "step": 2652 }, { "epoch": 0.17, "grad_norm": 2.7751071814947603, "learning_rate": 9.517268965732716e-06, "loss": 0.3689, "step": 2653 }, { "epoch": 0.17, "grad_norm": 3.3940516617122967, "learning_rate": 9.516832265924485e-06, "loss": 0.365, "step": 2654 }, { "epoch": 0.17, "grad_norm": 2.354668871640598, "learning_rate": 9.516395378704862e-06, "loss": 0.3412, "step": 2655 }, { "epoch": 0.17, "grad_norm": 2.353812747703441, "learning_rate": 9.51595830409197e-06, "loss": 0.3643, "step": 2656 }, { "epoch": 0.17, "grad_norm": 2.076640095928003, "learning_rate": 9.515521042103948e-06, "loss": 0.3303, "step": 2657 }, { "epoch": 0.17, "grad_norm": 4.880974707584823, "learning_rate": 9.515083592758939e-06, "loss": 0.3404, "step": 2658 }, { "epoch": 0.17, "grad_norm": 12.592343016773818, "learning_rate": 9.514645956075088e-06, "loss": 0.3489, "step": 2659 }, { "epoch": 0.17, "grad_norm": 3.3784402938981795, "learning_rate": 9.514208132070561e-06, "loss": 0.3714, "step": 2660 }, { "epoch": 0.17, "grad_norm": 2.4593005782718267, "learning_rate": 9.513770120763518e-06, "loss": 0.3369, "step": 2661 }, { "epoch": 0.17, "grad_norm": 2.361727502900006, "learning_rate": 9.513331922172138e-06, "loss": 0.3442, "step": 2662 }, { "epoch": 0.17, "grad_norm": 3.366551211810116, "learning_rate": 9.512893536314598e-06, "loss": 0.3334, "step": 2663 }, { "epoch": 0.17, "grad_norm": 2.0336864622878514, "learning_rate": 9.51245496320909e-06, "loss": 0.3557, "step": 2664 }, { "epoch": 0.17, "grad_norm": 3.3405514413183313, "learning_rate": 9.51201620287381e-06, "loss": 0.3605, "step": 2665 }, { "epoch": 0.17, "grad_norm": 1.6840921552846995, "learning_rate": 9.511577255326965e-06, "loss": 0.3413, "step": 2666 }, { "epoch": 0.17, "grad_norm": 2.749522367654022, "learning_rate": 9.511138120586765e-06, "loss": 0.3463, "step": 2667 }, { "epoch": 0.17, "grad_norm": 1.6912371977457925, "learning_rate": 9.510698798671432e-06, "loss": 0.3545, "step": 2668 }, { "epoch": 0.17, "grad_norm": 2.436818083944917, "learning_rate": 9.510259289599195e-06, "loss": 0.3544, "step": 2669 }, { "epoch": 0.17, "grad_norm": 2.82972619419692, "learning_rate": 9.509819593388288e-06, "loss": 0.3525, "step": 2670 }, { "epoch": 0.17, "grad_norm": 7.082100565723151, "learning_rate": 9.509379710056954e-06, "loss": 0.3475, "step": 2671 }, { "epoch": 0.17, "grad_norm": 0.864480199932987, "learning_rate": 9.508939639623448e-06, "loss": 0.5275, "step": 2672 }, { "epoch": 0.17, "grad_norm": 2.9609948489310076, "learning_rate": 9.508499382106028e-06, "loss": 0.3582, "step": 2673 }, { "epoch": 0.17, "grad_norm": 3.7104948444024886, "learning_rate": 9.508058937522959e-06, "loss": 0.3544, "step": 2674 }, { "epoch": 0.17, "grad_norm": 2.8965084469653606, "learning_rate": 9.50761830589252e-06, "loss": 0.3704, "step": 2675 }, { "epoch": 0.17, "grad_norm": 2.239225738722652, "learning_rate": 9.507177487232989e-06, "loss": 0.3597, "step": 2676 }, { "epoch": 0.17, "grad_norm": 3.542830523225034, "learning_rate": 9.506736481562659e-06, "loss": 0.3436, "step": 2677 }, { "epoch": 0.17, "grad_norm": 2.401912160955188, "learning_rate": 9.506295288899827e-06, "loss": 0.3635, "step": 2678 }, { "epoch": 0.17, "grad_norm": 2.600686461753694, "learning_rate": 9.5058539092628e-06, "loss": 0.3507, "step": 2679 }, { "epoch": 0.17, "grad_norm": 0.6601039617645533, "learning_rate": 9.505412342669891e-06, "loss": 0.4829, "step": 2680 }, { "epoch": 0.17, "grad_norm": 2.66433923637712, "learning_rate": 9.504970589139422e-06, "loss": 0.3541, "step": 2681 }, { "epoch": 0.17, "grad_norm": 3.709006116533518, "learning_rate": 9.50452864868972e-06, "loss": 0.3532, "step": 2682 }, { "epoch": 0.17, "grad_norm": 4.122040693803943, "learning_rate": 9.504086521339124e-06, "loss": 0.342, "step": 2683 }, { "epoch": 0.17, "grad_norm": 2.271705062000997, "learning_rate": 9.50364420710598e-06, "loss": 0.3539, "step": 2684 }, { "epoch": 0.17, "grad_norm": 3.0816573387018766, "learning_rate": 9.503201706008636e-06, "loss": 0.3534, "step": 2685 }, { "epoch": 0.17, "grad_norm": 4.8107531371915115, "learning_rate": 9.502759018065455e-06, "loss": 0.3736, "step": 2686 }, { "epoch": 0.17, "grad_norm": 2.9865703063707727, "learning_rate": 9.502316143294806e-06, "loss": 0.3456, "step": 2687 }, { "epoch": 0.17, "grad_norm": 7.389427019947758, "learning_rate": 9.501873081715062e-06, "loss": 0.3439, "step": 2688 }, { "epoch": 0.17, "grad_norm": 3.81338473799617, "learning_rate": 9.501429833344608e-06, "loss": 0.3494, "step": 2689 }, { "epoch": 0.17, "grad_norm": 5.352215648096498, "learning_rate": 9.500986398201835e-06, "loss": 0.3599, "step": 2690 }, { "epoch": 0.17, "grad_norm": 5.099168036639196, "learning_rate": 9.50054277630514e-06, "loss": 0.3332, "step": 2691 }, { "epoch": 0.17, "grad_norm": 2.577477281100561, "learning_rate": 9.500098967672933e-06, "loss": 0.3376, "step": 2692 }, { "epoch": 0.17, "grad_norm": 2.00425205734045, "learning_rate": 9.499654972323627e-06, "loss": 0.3238, "step": 2693 }, { "epoch": 0.17, "grad_norm": 2.4047158583404316, "learning_rate": 9.499210790275642e-06, "loss": 0.3317, "step": 2694 }, { "epoch": 0.17, "grad_norm": 6.481222274156482, "learning_rate": 9.498766421547412e-06, "loss": 0.3344, "step": 2695 }, { "epoch": 0.17, "grad_norm": 8.760444148294617, "learning_rate": 9.498321866157372e-06, "loss": 0.3257, "step": 2696 }, { "epoch": 0.17, "grad_norm": 3.342022172020348, "learning_rate": 9.497877124123967e-06, "loss": 0.3182, "step": 2697 }, { "epoch": 0.17, "grad_norm": 2.1425706342728876, "learning_rate": 9.497432195465652e-06, "loss": 0.3347, "step": 2698 }, { "epoch": 0.17, "grad_norm": 4.284001068161913, "learning_rate": 9.496987080200886e-06, "loss": 0.3541, "step": 2699 }, { "epoch": 0.17, "grad_norm": 2.1254002365975726, "learning_rate": 9.496541778348138e-06, "loss": 0.344, "step": 2700 }, { "epoch": 0.17, "grad_norm": 3.9013143896783475, "learning_rate": 9.496096289925886e-06, "loss": 0.3475, "step": 2701 }, { "epoch": 0.17, "grad_norm": 2.3387279893590986, "learning_rate": 9.495650614952612e-06, "loss": 0.332, "step": 2702 }, { "epoch": 0.17, "grad_norm": 13.953452241927364, "learning_rate": 9.495204753446809e-06, "loss": 0.3452, "step": 2703 }, { "epoch": 0.17, "grad_norm": 9.111188700003503, "learning_rate": 9.494758705426978e-06, "loss": 0.3236, "step": 2704 }, { "epoch": 0.17, "grad_norm": 7.7961239694588595, "learning_rate": 9.494312470911622e-06, "loss": 0.3573, "step": 2705 }, { "epoch": 0.17, "grad_norm": 2.9370592498352033, "learning_rate": 9.493866049919261e-06, "loss": 0.3477, "step": 2706 }, { "epoch": 0.17, "grad_norm": 7.006058893233341, "learning_rate": 9.493419442468414e-06, "loss": 0.3378, "step": 2707 }, { "epoch": 0.17, "grad_norm": 3.65946956367097, "learning_rate": 9.492972648577616e-06, "loss": 0.3736, "step": 2708 }, { "epoch": 0.17, "grad_norm": 4.3844249311446575, "learning_rate": 9.4925256682654e-06, "loss": 0.3471, "step": 2709 }, { "epoch": 0.17, "grad_norm": 4.856291878012339, "learning_rate": 9.492078501550314e-06, "loss": 0.3516, "step": 2710 }, { "epoch": 0.17, "grad_norm": 4.264358389602798, "learning_rate": 9.491631148450914e-06, "loss": 0.345, "step": 2711 }, { "epoch": 0.17, "grad_norm": 5.857814904464563, "learning_rate": 9.491183608985757e-06, "loss": 0.3792, "step": 2712 }, { "epoch": 0.17, "grad_norm": 2.6986824116877774, "learning_rate": 9.490735883173417e-06, "loss": 0.3626, "step": 2713 }, { "epoch": 0.17, "grad_norm": 0.7352081879720826, "learning_rate": 9.490287971032468e-06, "loss": 0.475, "step": 2714 }, { "epoch": 0.17, "grad_norm": 3.1577051020112794, "learning_rate": 9.489839872581497e-06, "loss": 0.3453, "step": 2715 }, { "epoch": 0.17, "grad_norm": 3.2301244478097497, "learning_rate": 9.489391587839091e-06, "loss": 0.3643, "step": 2716 }, { "epoch": 0.17, "grad_norm": 5.677422097230705, "learning_rate": 9.488943116823857e-06, "loss": 0.3687, "step": 2717 }, { "epoch": 0.17, "grad_norm": 5.126018716941779, "learning_rate": 9.488494459554401e-06, "loss": 0.3197, "step": 2718 }, { "epoch": 0.17, "grad_norm": 4.7465849147696, "learning_rate": 9.488045616049335e-06, "loss": 0.3424, "step": 2719 }, { "epoch": 0.17, "grad_norm": 5.6874308654596755, "learning_rate": 9.487596586327286e-06, "loss": 0.3431, "step": 2720 }, { "epoch": 0.17, "grad_norm": 3.2281008504858373, "learning_rate": 9.487147370406882e-06, "loss": 0.3361, "step": 2721 }, { "epoch": 0.17, "grad_norm": 0.6901945952652017, "learning_rate": 9.486697968306764e-06, "loss": 0.4635, "step": 2722 }, { "epoch": 0.17, "grad_norm": 2.953759134002954, "learning_rate": 9.48624838004558e-06, "loss": 0.3449, "step": 2723 }, { "epoch": 0.17, "grad_norm": 3.082152399780204, "learning_rate": 9.48579860564198e-06, "loss": 0.3783, "step": 2724 }, { "epoch": 0.17, "grad_norm": 3.703853281066354, "learning_rate": 9.48534864511463e-06, "loss": 0.3493, "step": 2725 }, { "epoch": 0.17, "grad_norm": 2.8544233592956116, "learning_rate": 9.484898498482195e-06, "loss": 0.3318, "step": 2726 }, { "epoch": 0.17, "grad_norm": 3.9885102183229755, "learning_rate": 9.484448165763358e-06, "loss": 0.3502, "step": 2727 }, { "epoch": 0.17, "grad_norm": 2.9964802042385448, "learning_rate": 9.483997646976802e-06, "loss": 0.342, "step": 2728 }, { "epoch": 0.17, "grad_norm": 2.2126700481589343, "learning_rate": 9.483546942141216e-06, "loss": 0.3218, "step": 2729 }, { "epoch": 0.17, "grad_norm": 3.412135230470621, "learning_rate": 9.483096051275305e-06, "loss": 0.3721, "step": 2730 }, { "epoch": 0.17, "grad_norm": 2.8492547414066736, "learning_rate": 9.482644974397776e-06, "loss": 0.339, "step": 2731 }, { "epoch": 0.17, "grad_norm": 3.569498592151541, "learning_rate": 9.482193711527345e-06, "loss": 0.3215, "step": 2732 }, { "epoch": 0.17, "grad_norm": 3.6170889960445285, "learning_rate": 9.481742262682738e-06, "loss": 0.3352, "step": 2733 }, { "epoch": 0.17, "grad_norm": 3.202934192988548, "learning_rate": 9.48129062788268e-06, "loss": 0.3705, "step": 2734 }, { "epoch": 0.17, "grad_norm": 2.605696492639625, "learning_rate": 9.480838807145916e-06, "loss": 0.3302, "step": 2735 }, { "epoch": 0.17, "grad_norm": 6.185066278772632, "learning_rate": 9.480386800491192e-06, "loss": 0.3633, "step": 2736 }, { "epoch": 0.17, "grad_norm": 4.228770585131004, "learning_rate": 9.47993460793726e-06, "loss": 0.3279, "step": 2737 }, { "epoch": 0.17, "grad_norm": 11.401999316003607, "learning_rate": 9.479482229502886e-06, "loss": 0.3476, "step": 2738 }, { "epoch": 0.17, "grad_norm": 2.4559655797869313, "learning_rate": 9.479029665206837e-06, "loss": 0.3466, "step": 2739 }, { "epoch": 0.17, "grad_norm": 2.3456006468757646, "learning_rate": 9.47857691506789e-06, "loss": 0.3439, "step": 2740 }, { "epoch": 0.17, "grad_norm": 2.4428161643087463, "learning_rate": 9.478123979104831e-06, "loss": 0.3432, "step": 2741 }, { "epoch": 0.17, "grad_norm": 2.9358941894391037, "learning_rate": 9.477670857336458e-06, "loss": 0.3302, "step": 2742 }, { "epoch": 0.17, "grad_norm": 0.6408629330984094, "learning_rate": 9.477217549781565e-06, "loss": 0.512, "step": 2743 }, { "epoch": 0.17, "grad_norm": 4.42865217078952, "learning_rate": 9.476764056458964e-06, "loss": 0.3511, "step": 2744 }, { "epoch": 0.17, "grad_norm": 2.6952496177437375, "learning_rate": 9.476310377387473e-06, "loss": 0.3379, "step": 2745 }, { "epoch": 0.17, "grad_norm": 2.2930089427411766, "learning_rate": 9.47585651258591e-06, "loss": 0.3298, "step": 2746 }, { "epoch": 0.17, "grad_norm": 14.833766862752183, "learning_rate": 9.475402462073111e-06, "loss": 0.3575, "step": 2747 }, { "epoch": 0.17, "grad_norm": 5.956849764735481, "learning_rate": 9.474948225867916e-06, "loss": 0.3262, "step": 2748 }, { "epoch": 0.17, "grad_norm": 2.730707992760973, "learning_rate": 9.47449380398917e-06, "loss": 0.3422, "step": 2749 }, { "epoch": 0.17, "grad_norm": 3.2083692841088185, "learning_rate": 9.474039196455729e-06, "loss": 0.3314, "step": 2750 }, { "epoch": 0.17, "grad_norm": 0.6240525352921156, "learning_rate": 9.473584403286454e-06, "loss": 0.4988, "step": 2751 }, { "epoch": 0.17, "grad_norm": 3.187405850798595, "learning_rate": 9.473129424500218e-06, "loss": 0.3522, "step": 2752 }, { "epoch": 0.17, "grad_norm": 2.7106370564114695, "learning_rate": 9.472674260115895e-06, "loss": 0.3372, "step": 2753 }, { "epoch": 0.17, "grad_norm": 4.443339382493905, "learning_rate": 9.472218910152374e-06, "loss": 0.3364, "step": 2754 }, { "epoch": 0.17, "grad_norm": 3.4018084146984333, "learning_rate": 9.471763374628546e-06, "loss": 0.3498, "step": 2755 }, { "epoch": 0.17, "grad_norm": 1.7353046132565118, "learning_rate": 9.471307653563313e-06, "loss": 0.3442, "step": 2756 }, { "epoch": 0.17, "grad_norm": 4.083106695960372, "learning_rate": 9.470851746975582e-06, "loss": 0.341, "step": 2757 }, { "epoch": 0.17, "grad_norm": 5.237154015534022, "learning_rate": 9.470395654884273e-06, "loss": 0.3456, "step": 2758 }, { "epoch": 0.17, "grad_norm": 3.690322370540809, "learning_rate": 9.469939377308308e-06, "loss": 0.3349, "step": 2759 }, { "epoch": 0.17, "grad_norm": 3.4584919329566106, "learning_rate": 9.469482914266618e-06, "loss": 0.3625, "step": 2760 }, { "epoch": 0.17, "grad_norm": 2.093674191456435, "learning_rate": 9.469026265778142e-06, "loss": 0.3294, "step": 2761 }, { "epoch": 0.17, "grad_norm": 2.7750403189206274, "learning_rate": 9.468569431861831e-06, "loss": 0.3288, "step": 2762 }, { "epoch": 0.17, "grad_norm": 3.7542207612319314, "learning_rate": 9.468112412536636e-06, "loss": 0.3534, "step": 2763 }, { "epoch": 0.17, "grad_norm": 3.9043950895979895, "learning_rate": 9.46765520782152e-06, "loss": 0.3499, "step": 2764 }, { "epoch": 0.17, "grad_norm": 2.7052852220722436, "learning_rate": 9.467197817735455e-06, "loss": 0.3219, "step": 2765 }, { "epoch": 0.17, "grad_norm": 2.767424423941762, "learning_rate": 9.466740242297418e-06, "loss": 0.3533, "step": 2766 }, { "epoch": 0.17, "grad_norm": 3.7762048598563815, "learning_rate": 9.466282481526393e-06, "loss": 0.3347, "step": 2767 }, { "epoch": 0.17, "grad_norm": 2.2167111051395056, "learning_rate": 9.465824535441377e-06, "loss": 0.3383, "step": 2768 }, { "epoch": 0.17, "grad_norm": 4.126082207266447, "learning_rate": 9.465366404061366e-06, "loss": 0.3593, "step": 2769 }, { "epoch": 0.17, "grad_norm": 7.933674012815756, "learning_rate": 9.464908087405374e-06, "loss": 0.3248, "step": 2770 }, { "epoch": 0.17, "grad_norm": 3.3307450476425386, "learning_rate": 9.464449585492415e-06, "loss": 0.3469, "step": 2771 }, { "epoch": 0.17, "grad_norm": 2.5991292608208285, "learning_rate": 9.463990898341511e-06, "loss": 0.3588, "step": 2772 }, { "epoch": 0.17, "grad_norm": 2.0305170395619663, "learning_rate": 9.463532025971696e-06, "loss": 0.3189, "step": 2773 }, { "epoch": 0.17, "grad_norm": 2.243510270280099, "learning_rate": 9.46307296840201e-06, "loss": 0.319, "step": 2774 }, { "epoch": 0.17, "grad_norm": 3.0991391084486075, "learning_rate": 9.4626137256515e-06, "loss": 0.3376, "step": 2775 }, { "epoch": 0.17, "grad_norm": 6.942842649520549, "learning_rate": 9.46215429773922e-06, "loss": 0.3388, "step": 2776 }, { "epoch": 0.17, "grad_norm": 4.977117474931626, "learning_rate": 9.46169468468423e-06, "loss": 0.3439, "step": 2777 }, { "epoch": 0.17, "grad_norm": 2.224291263022979, "learning_rate": 9.461234886505604e-06, "loss": 0.3355, "step": 2778 }, { "epoch": 0.17, "grad_norm": 3.0031455465503547, "learning_rate": 9.46077490322242e-06, "loss": 0.3531, "step": 2779 }, { "epoch": 0.17, "grad_norm": 3.874284615010175, "learning_rate": 9.46031473485376e-06, "loss": 0.3409, "step": 2780 }, { "epoch": 0.17, "grad_norm": 2.3548662817068347, "learning_rate": 9.45985438141872e-06, "loss": 0.3325, "step": 2781 }, { "epoch": 0.17, "grad_norm": 4.455369618580227, "learning_rate": 9.4593938429364e-06, "loss": 0.3349, "step": 2782 }, { "epoch": 0.18, "grad_norm": 2.6616707836133435, "learning_rate": 9.45893311942591e-06, "loss": 0.3372, "step": 2783 }, { "epoch": 0.18, "grad_norm": 3.265891540437684, "learning_rate": 9.458472210906363e-06, "loss": 0.3339, "step": 2784 }, { "epoch": 0.18, "grad_norm": 2.51312814291431, "learning_rate": 9.458011117396886e-06, "loss": 0.3346, "step": 2785 }, { "epoch": 0.18, "grad_norm": 2.6527253674339057, "learning_rate": 9.45754983891661e-06, "loss": 0.332, "step": 2786 }, { "epoch": 0.18, "grad_norm": 4.38085799438359, "learning_rate": 9.457088375484671e-06, "loss": 0.3386, "step": 2787 }, { "epoch": 0.18, "grad_norm": 3.1564390974614422, "learning_rate": 9.45662672712022e-06, "loss": 0.3395, "step": 2788 }, { "epoch": 0.18, "grad_norm": 6.613545889420841, "learning_rate": 9.456164893842411e-06, "loss": 0.3485, "step": 2789 }, { "epoch": 0.18, "grad_norm": 3.3711434342955076, "learning_rate": 9.455702875670405e-06, "loss": 0.3349, "step": 2790 }, { "epoch": 0.18, "grad_norm": 3.663328043729727, "learning_rate": 9.455240672623373e-06, "loss": 0.3498, "step": 2791 }, { "epoch": 0.18, "grad_norm": 4.981069481642985, "learning_rate": 9.454778284720492e-06, "loss": 0.3459, "step": 2792 }, { "epoch": 0.18, "grad_norm": 2.414719483762963, "learning_rate": 9.454315711980947e-06, "loss": 0.342, "step": 2793 }, { "epoch": 0.18, "grad_norm": 2.8841543131580942, "learning_rate": 9.453852954423931e-06, "loss": 0.3418, "step": 2794 }, { "epoch": 0.18, "grad_norm": 2.0040390069744674, "learning_rate": 9.453390012068645e-06, "loss": 0.3222, "step": 2795 }, { "epoch": 0.18, "grad_norm": 3.13952994394859, "learning_rate": 9.452926884934299e-06, "loss": 0.3434, "step": 2796 }, { "epoch": 0.18, "grad_norm": 3.353655141473368, "learning_rate": 9.452463573040105e-06, "loss": 0.3338, "step": 2797 }, { "epoch": 0.18, "grad_norm": 1.9606795539717619, "learning_rate": 9.45200007640529e-06, "loss": 0.3514, "step": 2798 }, { "epoch": 0.18, "grad_norm": 2.886733353083376, "learning_rate": 9.451536395049083e-06, "loss": 0.3471, "step": 2799 }, { "epoch": 0.18, "grad_norm": 2.4776331690200872, "learning_rate": 9.451072528990726e-06, "loss": 0.3449, "step": 2800 }, { "epoch": 0.18, "grad_norm": 4.722283122940009, "learning_rate": 9.450608478249463e-06, "loss": 0.3412, "step": 2801 }, { "epoch": 0.18, "grad_norm": 2.666023026044664, "learning_rate": 9.45014424284455e-06, "loss": 0.3582, "step": 2802 }, { "epoch": 0.18, "grad_norm": 3.2828030174161635, "learning_rate": 9.449679822795246e-06, "loss": 0.3409, "step": 2803 }, { "epoch": 0.18, "grad_norm": 2.1537365589222173, "learning_rate": 9.449215218120823e-06, "loss": 0.3452, "step": 2804 }, { "epoch": 0.18, "grad_norm": 2.648680999644945, "learning_rate": 9.448750428840558e-06, "loss": 0.3366, "step": 2805 }, { "epoch": 0.18, "grad_norm": 2.2211160690738936, "learning_rate": 9.448285454973739e-06, "loss": 0.3302, "step": 2806 }, { "epoch": 0.18, "grad_norm": 3.5726546881622903, "learning_rate": 9.447820296539651e-06, "loss": 0.3401, "step": 2807 }, { "epoch": 0.18, "grad_norm": 3.4645098836059254, "learning_rate": 9.447354953557601e-06, "loss": 0.3373, "step": 2808 }, { "epoch": 0.18, "grad_norm": 3.460336111103413, "learning_rate": 9.446889426046894e-06, "loss": 0.3601, "step": 2809 }, { "epoch": 0.18, "grad_norm": 3.4755813323162292, "learning_rate": 9.446423714026846e-06, "loss": 0.3774, "step": 2810 }, { "epoch": 0.18, "grad_norm": 3.121433864366657, "learning_rate": 9.44595781751678e-06, "loss": 0.3458, "step": 2811 }, { "epoch": 0.18, "grad_norm": 3.359935062868799, "learning_rate": 9.445491736536027e-06, "loss": 0.3517, "step": 2812 }, { "epoch": 0.18, "grad_norm": 2.8225412692039424, "learning_rate": 9.445025471103928e-06, "loss": 0.3444, "step": 2813 }, { "epoch": 0.18, "grad_norm": 3.3172362395571358, "learning_rate": 9.444559021239826e-06, "loss": 0.3294, "step": 2814 }, { "epoch": 0.18, "grad_norm": 3.2337656213902073, "learning_rate": 9.444092386963075e-06, "loss": 0.3488, "step": 2815 }, { "epoch": 0.18, "grad_norm": 3.6685777243671267, "learning_rate": 9.443625568293038e-06, "loss": 0.3562, "step": 2816 }, { "epoch": 0.18, "grad_norm": 2.0430253762704766, "learning_rate": 9.443158565249082e-06, "loss": 0.3217, "step": 2817 }, { "epoch": 0.18, "grad_norm": 7.4861147230698055, "learning_rate": 9.442691377850585e-06, "loss": 0.3563, "step": 2818 }, { "epoch": 0.18, "grad_norm": 3.2111236880069094, "learning_rate": 9.442224006116935e-06, "loss": 0.3338, "step": 2819 }, { "epoch": 0.18, "grad_norm": 2.163038200171697, "learning_rate": 9.441756450067519e-06, "loss": 0.3481, "step": 2820 }, { "epoch": 0.18, "grad_norm": 2.91268043030134, "learning_rate": 9.441288709721737e-06, "loss": 0.3443, "step": 2821 }, { "epoch": 0.18, "grad_norm": 6.726556643663211, "learning_rate": 9.440820785098998e-06, "loss": 0.3482, "step": 2822 }, { "epoch": 0.18, "grad_norm": 2.2242300931295373, "learning_rate": 9.440352676218718e-06, "loss": 0.3519, "step": 2823 }, { "epoch": 0.18, "grad_norm": 6.962656578743003, "learning_rate": 9.439884383100319e-06, "loss": 0.3454, "step": 2824 }, { "epoch": 0.18, "grad_norm": 4.4737964293811485, "learning_rate": 9.43941590576323e-06, "loss": 0.3574, "step": 2825 }, { "epoch": 0.18, "grad_norm": 3.1128971723349954, "learning_rate": 9.438947244226889e-06, "loss": 0.3269, "step": 2826 }, { "epoch": 0.18, "grad_norm": 6.592958046916075, "learning_rate": 9.438478398510744e-06, "loss": 0.3734, "step": 2827 }, { "epoch": 0.18, "grad_norm": 2.5672723601376353, "learning_rate": 9.438009368634244e-06, "loss": 0.3451, "step": 2828 }, { "epoch": 0.18, "grad_norm": 4.442651213825842, "learning_rate": 9.437540154616856e-06, "loss": 0.3383, "step": 2829 }, { "epoch": 0.18, "grad_norm": 2.6106401136534685, "learning_rate": 9.437070756478043e-06, "loss": 0.3195, "step": 2830 }, { "epoch": 0.18, "grad_norm": 7.27165960013486, "learning_rate": 9.436601174237283e-06, "loss": 0.3486, "step": 2831 }, { "epoch": 0.18, "grad_norm": 3.241876553490455, "learning_rate": 9.43613140791406e-06, "loss": 0.3774, "step": 2832 }, { "epoch": 0.18, "grad_norm": 3.192553518065512, "learning_rate": 9.435661457527867e-06, "loss": 0.3465, "step": 2833 }, { "epoch": 0.18, "grad_norm": 4.755475426359888, "learning_rate": 9.435191323098201e-06, "loss": 0.3265, "step": 2834 }, { "epoch": 0.18, "grad_norm": 2.520657051379427, "learning_rate": 9.434721004644567e-06, "loss": 0.3434, "step": 2835 }, { "epoch": 0.18, "grad_norm": 6.363693194886535, "learning_rate": 9.434250502186483e-06, "loss": 0.3301, "step": 2836 }, { "epoch": 0.18, "grad_norm": 15.654449150747247, "learning_rate": 9.43377981574347e-06, "loss": 0.3455, "step": 2837 }, { "epoch": 0.18, "grad_norm": 3.0885392601630985, "learning_rate": 9.433308945335058e-06, "loss": 0.3423, "step": 2838 }, { "epoch": 0.18, "grad_norm": 5.274065645708655, "learning_rate": 9.432837890980781e-06, "loss": 0.3465, "step": 2839 }, { "epoch": 0.18, "grad_norm": 4.544574334105781, "learning_rate": 9.432366652700189e-06, "loss": 0.3329, "step": 2840 }, { "epoch": 0.18, "grad_norm": 2.8224492961804457, "learning_rate": 9.43189523051283e-06, "loss": 0.3466, "step": 2841 }, { "epoch": 0.18, "grad_norm": 5.262078457758155, "learning_rate": 9.431423624438264e-06, "loss": 0.3265, "step": 2842 }, { "epoch": 0.18, "grad_norm": 5.0143890497233015, "learning_rate": 9.430951834496064e-06, "loss": 0.3302, "step": 2843 }, { "epoch": 0.18, "grad_norm": 7.49727120577982, "learning_rate": 9.430479860705802e-06, "loss": 0.3324, "step": 2844 }, { "epoch": 0.18, "grad_norm": 3.1745162216049105, "learning_rate": 9.43000770308706e-06, "loss": 0.3195, "step": 2845 }, { "epoch": 0.18, "grad_norm": 7.5148866182805305, "learning_rate": 9.42953536165943e-06, "loss": 0.3487, "step": 2846 }, { "epoch": 0.18, "grad_norm": 3.0211553893290013, "learning_rate": 9.429062836442512e-06, "loss": 0.3315, "step": 2847 }, { "epoch": 0.18, "grad_norm": 2.42692257157526, "learning_rate": 9.428590127455908e-06, "loss": 0.3252, "step": 2848 }, { "epoch": 0.18, "grad_norm": 3.06852227211664, "learning_rate": 9.428117234719234e-06, "loss": 0.3433, "step": 2849 }, { "epoch": 0.18, "grad_norm": 2.9072314545965625, "learning_rate": 9.427644158252109e-06, "loss": 0.3688, "step": 2850 }, { "epoch": 0.18, "grad_norm": 2.852432088608627, "learning_rate": 9.427170898074166e-06, "loss": 0.3392, "step": 2851 }, { "epoch": 0.18, "grad_norm": 2.7173015240382314, "learning_rate": 9.426697454205039e-06, "loss": 0.3377, "step": 2852 }, { "epoch": 0.18, "grad_norm": 31.45710822411603, "learning_rate": 9.42622382666437e-06, "loss": 0.3548, "step": 2853 }, { "epoch": 0.18, "grad_norm": 3.633232496009611, "learning_rate": 9.425750015471813e-06, "loss": 0.3437, "step": 2854 }, { "epoch": 0.18, "grad_norm": 13.542315376695413, "learning_rate": 9.425276020647026e-06, "loss": 0.3293, "step": 2855 }, { "epoch": 0.18, "grad_norm": 3.719098152199475, "learning_rate": 9.42480184220968e-06, "loss": 0.3383, "step": 2856 }, { "epoch": 0.18, "grad_norm": 4.007903046452314, "learning_rate": 9.424327480179443e-06, "loss": 0.3383, "step": 2857 }, { "epoch": 0.18, "grad_norm": 0.6998075779836991, "learning_rate": 9.423852934576003e-06, "loss": 0.5152, "step": 2858 }, { "epoch": 0.18, "grad_norm": 5.315523560050483, "learning_rate": 9.423378205419044e-06, "loss": 0.3383, "step": 2859 }, { "epoch": 0.18, "grad_norm": 5.80912279664292, "learning_rate": 9.42290329272827e-06, "loss": 0.351, "step": 2860 }, { "epoch": 0.18, "grad_norm": 3.657376800209197, "learning_rate": 9.422428196523382e-06, "loss": 0.3621, "step": 2861 }, { "epoch": 0.18, "grad_norm": 3.3861887157105075, "learning_rate": 9.42195291682409e-06, "loss": 0.3439, "step": 2862 }, { "epoch": 0.18, "grad_norm": 5.104540718554345, "learning_rate": 9.421477453650118e-06, "loss": 0.3406, "step": 2863 }, { "epoch": 0.18, "grad_norm": 2.9449944011930014, "learning_rate": 9.421001807021196e-06, "loss": 0.3325, "step": 2864 }, { "epoch": 0.18, "grad_norm": 3.203217123864811, "learning_rate": 9.420525976957053e-06, "loss": 0.3271, "step": 2865 }, { "epoch": 0.18, "grad_norm": 0.5999976239510811, "learning_rate": 9.420049963477437e-06, "loss": 0.4892, "step": 2866 }, { "epoch": 0.18, "grad_norm": 6.444723589831954, "learning_rate": 9.419573766602097e-06, "loss": 0.3451, "step": 2867 }, { "epoch": 0.18, "grad_norm": 12.796242662466096, "learning_rate": 9.41909738635079e-06, "loss": 0.3507, "step": 2868 }, { "epoch": 0.18, "grad_norm": 2.75746100509902, "learning_rate": 9.418620822743284e-06, "loss": 0.3328, "step": 2869 }, { "epoch": 0.18, "grad_norm": 5.923284656401906, "learning_rate": 9.418144075799353e-06, "loss": 0.345, "step": 2870 }, { "epoch": 0.18, "grad_norm": 5.192724605693848, "learning_rate": 9.417667145538778e-06, "loss": 0.3493, "step": 2871 }, { "epoch": 0.18, "grad_norm": 15.947398314030883, "learning_rate": 9.417190031981343e-06, "loss": 0.3264, "step": 2872 }, { "epoch": 0.18, "grad_norm": 4.4545985955900775, "learning_rate": 9.41671273514685e-06, "loss": 0.3547, "step": 2873 }, { "epoch": 0.18, "grad_norm": 8.281984482714142, "learning_rate": 9.416235255055099e-06, "loss": 0.3592, "step": 2874 }, { "epoch": 0.18, "grad_norm": 6.176301761464211, "learning_rate": 9.415757591725905e-06, "loss": 0.366, "step": 2875 }, { "epoch": 0.18, "grad_norm": 8.554313991084236, "learning_rate": 9.415279745179084e-06, "loss": 0.3332, "step": 2876 }, { "epoch": 0.18, "grad_norm": 4.742543879468014, "learning_rate": 9.414801715434464e-06, "loss": 0.3368, "step": 2877 }, { "epoch": 0.18, "grad_norm": 5.879691455524878, "learning_rate": 9.41432350251188e-06, "loss": 0.3397, "step": 2878 }, { "epoch": 0.18, "grad_norm": 4.792333491669887, "learning_rate": 9.413845106431171e-06, "loss": 0.3477, "step": 2879 }, { "epoch": 0.18, "grad_norm": 2.6793008193667434, "learning_rate": 9.41336652721219e-06, "loss": 0.3394, "step": 2880 }, { "epoch": 0.18, "grad_norm": 7.5621857549984695, "learning_rate": 9.412887764874794e-06, "loss": 0.3129, "step": 2881 }, { "epoch": 0.18, "grad_norm": 2.3834765786758307, "learning_rate": 9.412408819438847e-06, "loss": 0.3301, "step": 2882 }, { "epoch": 0.18, "grad_norm": 4.512591564980338, "learning_rate": 9.41192969092422e-06, "loss": 0.3561, "step": 2883 }, { "epoch": 0.18, "grad_norm": 3.4747121695984333, "learning_rate": 9.411450379350791e-06, "loss": 0.3456, "step": 2884 }, { "epoch": 0.18, "grad_norm": 2.5570404248386294, "learning_rate": 9.410970884738453e-06, "loss": 0.3432, "step": 2885 }, { "epoch": 0.18, "grad_norm": 4.039301780760711, "learning_rate": 9.410491207107097e-06, "loss": 0.3325, "step": 2886 }, { "epoch": 0.18, "grad_norm": 3.0699921469434797, "learning_rate": 9.410011346476628e-06, "loss": 0.3136, "step": 2887 }, { "epoch": 0.18, "grad_norm": 3.257155959011966, "learning_rate": 9.409531302866954e-06, "loss": 0.3161, "step": 2888 }, { "epoch": 0.18, "grad_norm": 2.4186503244613218, "learning_rate": 9.409051076297994e-06, "loss": 0.3277, "step": 2889 }, { "epoch": 0.18, "grad_norm": 6.690839806498641, "learning_rate": 9.408570666789674e-06, "loss": 0.3273, "step": 2890 }, { "epoch": 0.18, "grad_norm": 2.5094390086689167, "learning_rate": 9.408090074361927e-06, "loss": 0.3208, "step": 2891 }, { "epoch": 0.18, "grad_norm": 26.004381051412743, "learning_rate": 9.407609299034693e-06, "loss": 0.3309, "step": 2892 }, { "epoch": 0.18, "grad_norm": 3.498975777808764, "learning_rate": 9.40712834082792e-06, "loss": 0.3423, "step": 2893 }, { "epoch": 0.18, "grad_norm": 20.55535006750138, "learning_rate": 9.406647199761564e-06, "loss": 0.3042, "step": 2894 }, { "epoch": 0.18, "grad_norm": 6.251041822218146, "learning_rate": 9.406165875855589e-06, "loss": 0.3514, "step": 2895 }, { "epoch": 0.18, "grad_norm": 0.6971310108682597, "learning_rate": 9.405684369129965e-06, "loss": 0.5189, "step": 2896 }, { "epoch": 0.18, "grad_norm": 5.029326151582228, "learning_rate": 9.40520267960467e-06, "loss": 0.3469, "step": 2897 }, { "epoch": 0.18, "grad_norm": 3.0755822619381115, "learning_rate": 9.404720807299694e-06, "loss": 0.3337, "step": 2898 }, { "epoch": 0.18, "grad_norm": 4.829548529739491, "learning_rate": 9.404238752235028e-06, "loss": 0.3392, "step": 2899 }, { "epoch": 0.18, "grad_norm": 6.697211143258505, "learning_rate": 9.40375651443067e-06, "loss": 0.3278, "step": 2900 }, { "epoch": 0.18, "grad_norm": 2.828282563810434, "learning_rate": 9.403274093906635e-06, "loss": 0.3295, "step": 2901 }, { "epoch": 0.18, "grad_norm": 3.471500784682395, "learning_rate": 9.402791490682938e-06, "loss": 0.3103, "step": 2902 }, { "epoch": 0.18, "grad_norm": 2.8394804349192406, "learning_rate": 9.4023087047796e-06, "loss": 0.3445, "step": 2903 }, { "epoch": 0.18, "grad_norm": 3.1678344497176716, "learning_rate": 9.401825736216654e-06, "loss": 0.3342, "step": 2904 }, { "epoch": 0.18, "grad_norm": 4.012326991399161, "learning_rate": 9.401342585014141e-06, "loss": 0.319, "step": 2905 }, { "epoch": 0.18, "grad_norm": 3.0374766040889085, "learning_rate": 9.400859251192104e-06, "loss": 0.3354, "step": 2906 }, { "epoch": 0.18, "grad_norm": 4.121481576580878, "learning_rate": 9.400375734770603e-06, "loss": 0.3411, "step": 2907 }, { "epoch": 0.18, "grad_norm": 4.287520775902825, "learning_rate": 9.399892035769696e-06, "loss": 0.3343, "step": 2908 }, { "epoch": 0.18, "grad_norm": 6.992660820053152, "learning_rate": 9.399408154209452e-06, "loss": 0.3436, "step": 2909 }, { "epoch": 0.18, "grad_norm": 4.734115944816152, "learning_rate": 9.39892409010995e-06, "loss": 0.3756, "step": 2910 }, { "epoch": 0.18, "grad_norm": 7.892929914048489, "learning_rate": 9.398439843491273e-06, "loss": 0.3462, "step": 2911 }, { "epoch": 0.18, "grad_norm": 8.99508133042363, "learning_rate": 9.397955414373518e-06, "loss": 0.331, "step": 2912 }, { "epoch": 0.18, "grad_norm": 8.587144228356296, "learning_rate": 9.397470802776777e-06, "loss": 0.3445, "step": 2913 }, { "epoch": 0.18, "grad_norm": 5.323607839701405, "learning_rate": 9.396986008721165e-06, "loss": 0.3257, "step": 2914 }, { "epoch": 0.18, "grad_norm": 16.71452420951083, "learning_rate": 9.396501032226793e-06, "loss": 0.3573, "step": 2915 }, { "epoch": 0.18, "grad_norm": 3.412411037982219, "learning_rate": 9.396015873313781e-06, "loss": 0.361, "step": 2916 }, { "epoch": 0.18, "grad_norm": 3.392461801012857, "learning_rate": 9.395530532002265e-06, "loss": 0.3419, "step": 2917 }, { "epoch": 0.18, "grad_norm": 2.1243772104218155, "learning_rate": 9.39504500831238e-06, "loss": 0.333, "step": 2918 }, { "epoch": 0.18, "grad_norm": 4.900944207336713, "learning_rate": 9.39455930226427e-06, "loss": 0.3153, "step": 2919 }, { "epoch": 0.18, "grad_norm": 2.4305900779326794, "learning_rate": 9.394073413878089e-06, "loss": 0.3236, "step": 2920 }, { "epoch": 0.18, "grad_norm": 6.136392600562325, "learning_rate": 9.393587343173998e-06, "loss": 0.3565, "step": 2921 }, { "epoch": 0.18, "grad_norm": 2.7350195142850207, "learning_rate": 9.393101090172164e-06, "loss": 0.327, "step": 2922 }, { "epoch": 0.18, "grad_norm": 4.63063528563335, "learning_rate": 9.392614654892761e-06, "loss": 0.3185, "step": 2923 }, { "epoch": 0.18, "grad_norm": 6.539628583735849, "learning_rate": 9.392128037355977e-06, "loss": 0.317, "step": 2924 }, { "epoch": 0.18, "grad_norm": 4.74064779015083, "learning_rate": 9.391641237581998e-06, "loss": 0.3446, "step": 2925 }, { "epoch": 0.18, "grad_norm": 3.6063991643633355, "learning_rate": 9.391154255591025e-06, "loss": 0.3148, "step": 2926 }, { "epoch": 0.18, "grad_norm": 6.9856885299036335, "learning_rate": 9.39066709140326e-06, "loss": 0.3384, "step": 2927 }, { "epoch": 0.18, "grad_norm": 1.995242282639124, "learning_rate": 9.39017974503892e-06, "loss": 0.3352, "step": 2928 }, { "epoch": 0.18, "grad_norm": 3.4789716544967804, "learning_rate": 9.389692216518224e-06, "loss": 0.3461, "step": 2929 }, { "epoch": 0.18, "grad_norm": 3.6322536561450733, "learning_rate": 9.3892045058614e-06, "loss": 0.3515, "step": 2930 }, { "epoch": 0.18, "grad_norm": 3.069745363414444, "learning_rate": 9.388716613088686e-06, "loss": 0.3188, "step": 2931 }, { "epoch": 0.18, "grad_norm": 1.8862009541003968, "learning_rate": 9.388228538220326e-06, "loss": 0.3243, "step": 2932 }, { "epoch": 0.18, "grad_norm": 2.801954251400872, "learning_rate": 9.387740281276568e-06, "loss": 0.3479, "step": 2933 }, { "epoch": 0.18, "grad_norm": 2.189852651221901, "learning_rate": 9.387251842277672e-06, "loss": 0.3114, "step": 2934 }, { "epoch": 0.18, "grad_norm": 16.32992466654144, "learning_rate": 9.386763221243905e-06, "loss": 0.346, "step": 2935 }, { "epoch": 0.18, "grad_norm": 3.4940048590995523, "learning_rate": 9.38627441819554e-06, "loss": 0.3467, "step": 2936 }, { "epoch": 0.18, "grad_norm": 0.9183728794530075, "learning_rate": 9.385785433152857e-06, "loss": 0.5135, "step": 2937 }, { "epoch": 0.18, "grad_norm": 2.947386359204182, "learning_rate": 9.385296266136148e-06, "loss": 0.3439, "step": 2938 }, { "epoch": 0.18, "grad_norm": 3.008728640681746, "learning_rate": 9.384806917165707e-06, "loss": 0.3548, "step": 2939 }, { "epoch": 0.18, "grad_norm": 2.5869169313035396, "learning_rate": 9.384317386261841e-06, "loss": 0.3093, "step": 2940 }, { "epoch": 0.18, "grad_norm": 2.7542506859998492, "learning_rate": 9.383827673444856e-06, "loss": 0.3358, "step": 2941 }, { "epoch": 0.19, "grad_norm": 1.8796394186241903, "learning_rate": 9.383337778735076e-06, "loss": 0.3122, "step": 2942 }, { "epoch": 0.19, "grad_norm": 2.122534408531552, "learning_rate": 9.382847702152827e-06, "loss": 0.3351, "step": 2943 }, { "epoch": 0.19, "grad_norm": 2.567870346776088, "learning_rate": 9.382357443718439e-06, "loss": 0.3158, "step": 2944 }, { "epoch": 0.19, "grad_norm": 7.072062774682705, "learning_rate": 9.381867003452258e-06, "loss": 0.3413, "step": 2945 }, { "epoch": 0.19, "grad_norm": 6.79819236834924, "learning_rate": 9.381376381374634e-06, "loss": 0.3347, "step": 2946 }, { "epoch": 0.19, "grad_norm": 2.8536223395008538, "learning_rate": 9.380885577505919e-06, "loss": 0.3458, "step": 2947 }, { "epoch": 0.19, "grad_norm": 3.572644934308234, "learning_rate": 9.380394591866482e-06, "loss": 0.3623, "step": 2948 }, { "epoch": 0.19, "grad_norm": 3.4068019338376945, "learning_rate": 9.379903424476692e-06, "loss": 0.3634, "step": 2949 }, { "epoch": 0.19, "grad_norm": 3.3813495653493635, "learning_rate": 9.379412075356928e-06, "loss": 0.3271, "step": 2950 }, { "epoch": 0.19, "grad_norm": 5.677791592204469, "learning_rate": 9.37892054452758e-06, "loss": 0.3262, "step": 2951 }, { "epoch": 0.19, "grad_norm": 3.9352185064751115, "learning_rate": 9.37842883200904e-06, "loss": 0.3411, "step": 2952 }, { "epoch": 0.19, "grad_norm": 2.0548663831312037, "learning_rate": 9.377936937821712e-06, "loss": 0.336, "step": 2953 }, { "epoch": 0.19, "grad_norm": 2.5100520512670617, "learning_rate": 9.377444861986004e-06, "loss": 0.3397, "step": 2954 }, { "epoch": 0.19, "grad_norm": 4.136633990219711, "learning_rate": 9.376952604522333e-06, "loss": 0.3238, "step": 2955 }, { "epoch": 0.19, "grad_norm": 18.523231953088995, "learning_rate": 9.376460165451125e-06, "loss": 0.3339, "step": 2956 }, { "epoch": 0.19, "grad_norm": 2.7411311506306433, "learning_rate": 9.37596754479281e-06, "loss": 0.3264, "step": 2957 }, { "epoch": 0.19, "grad_norm": 2.8916559043431933, "learning_rate": 9.37547474256783e-06, "loss": 0.3646, "step": 2958 }, { "epoch": 0.19, "grad_norm": 7.2563683169537105, "learning_rate": 9.37498175879663e-06, "loss": 0.3352, "step": 2959 }, { "epoch": 0.19, "grad_norm": 2.762369507271014, "learning_rate": 9.374488593499666e-06, "loss": 0.3786, "step": 2960 }, { "epoch": 0.19, "grad_norm": 12.92035920480478, "learning_rate": 9.3739952466974e-06, "loss": 0.3291, "step": 2961 }, { "epoch": 0.19, "grad_norm": 2.137139730565293, "learning_rate": 9.373501718410303e-06, "loss": 0.3303, "step": 2962 }, { "epoch": 0.19, "grad_norm": 3.367799803671223, "learning_rate": 9.373008008658852e-06, "loss": 0.3405, "step": 2963 }, { "epoch": 0.19, "grad_norm": 5.0506847971138855, "learning_rate": 9.372514117463531e-06, "loss": 0.3706, "step": 2964 }, { "epoch": 0.19, "grad_norm": 3.00407829290628, "learning_rate": 9.372020044844832e-06, "loss": 0.3437, "step": 2965 }, { "epoch": 0.19, "grad_norm": 2.7504908868335254, "learning_rate": 9.371525790823256e-06, "loss": 0.3101, "step": 2966 }, { "epoch": 0.19, "grad_norm": 4.506023327800912, "learning_rate": 9.371031355419311e-06, "loss": 0.3482, "step": 2967 }, { "epoch": 0.19, "grad_norm": 4.564355648983167, "learning_rate": 9.370536738653513e-06, "loss": 0.3386, "step": 2968 }, { "epoch": 0.19, "grad_norm": 38.763433171693926, "learning_rate": 9.37004194054638e-06, "loss": 0.3493, "step": 2969 }, { "epoch": 0.19, "grad_norm": 2.712320349004185, "learning_rate": 9.369546961118446e-06, "loss": 0.3124, "step": 2970 }, { "epoch": 0.19, "grad_norm": 3.9352975918037343, "learning_rate": 9.369051800390245e-06, "loss": 0.345, "step": 2971 }, { "epoch": 0.19, "grad_norm": 3.836155204661295, "learning_rate": 9.368556458382327e-06, "loss": 0.3134, "step": 2972 }, { "epoch": 0.19, "grad_norm": 5.151021719818088, "learning_rate": 9.368060935115243e-06, "loss": 0.3484, "step": 2973 }, { "epoch": 0.19, "grad_norm": 2.6961994500940625, "learning_rate": 9.36756523060955e-06, "loss": 0.3389, "step": 2974 }, { "epoch": 0.19, "grad_norm": 3.963762113080279, "learning_rate": 9.367069344885818e-06, "loss": 0.3335, "step": 2975 }, { "epoch": 0.19, "grad_norm": 6.356519212038357, "learning_rate": 9.366573277964623e-06, "loss": 0.3423, "step": 2976 }, { "epoch": 0.19, "grad_norm": 4.219726325252524, "learning_rate": 9.366077029866546e-06, "loss": 0.3258, "step": 2977 }, { "epoch": 0.19, "grad_norm": 3.647013140102176, "learning_rate": 9.365580600612178e-06, "loss": 0.3241, "step": 2978 }, { "epoch": 0.19, "grad_norm": 2.7452563532522922, "learning_rate": 9.365083990222118e-06, "loss": 0.3339, "step": 2979 }, { "epoch": 0.19, "grad_norm": 3.204300651698719, "learning_rate": 9.36458719871697e-06, "loss": 0.3233, "step": 2980 }, { "epoch": 0.19, "grad_norm": 4.127717263896908, "learning_rate": 9.364090226117346e-06, "loss": 0.3514, "step": 2981 }, { "epoch": 0.19, "grad_norm": 5.179443951254958, "learning_rate": 9.363593072443865e-06, "loss": 0.353, "step": 2982 }, { "epoch": 0.19, "grad_norm": 4.417908955575161, "learning_rate": 9.36309573771716e-06, "loss": 0.3516, "step": 2983 }, { "epoch": 0.19, "grad_norm": 0.9608663473809073, "learning_rate": 9.362598221957862e-06, "loss": 0.49, "step": 2984 }, { "epoch": 0.19, "grad_norm": 5.707532724763154, "learning_rate": 9.362100525186616e-06, "loss": 0.3401, "step": 2985 }, { "epoch": 0.19, "grad_norm": 4.223660909851831, "learning_rate": 9.361602647424069e-06, "loss": 0.3273, "step": 2986 }, { "epoch": 0.19, "grad_norm": 3.732123712149332, "learning_rate": 9.361104588690882e-06, "loss": 0.3412, "step": 2987 }, { "epoch": 0.19, "grad_norm": 3.3554936274931797, "learning_rate": 9.36060634900772e-06, "loss": 0.3259, "step": 2988 }, { "epoch": 0.19, "grad_norm": 6.429438026735335, "learning_rate": 9.360107928395254e-06, "loss": 0.3459, "step": 2989 }, { "epoch": 0.19, "grad_norm": 2.4984931577952993, "learning_rate": 9.359609326874169e-06, "loss": 0.3276, "step": 2990 }, { "epoch": 0.19, "grad_norm": 5.173541636929663, "learning_rate": 9.359110544465146e-06, "loss": 0.3234, "step": 2991 }, { "epoch": 0.19, "grad_norm": 2.5013455005168916, "learning_rate": 9.358611581188884e-06, "loss": 0.3576, "step": 2992 }, { "epoch": 0.19, "grad_norm": 4.090817040137613, "learning_rate": 9.358112437066088e-06, "loss": 0.3734, "step": 2993 }, { "epoch": 0.19, "grad_norm": 2.7682740799018313, "learning_rate": 9.357613112117464e-06, "loss": 0.3278, "step": 2994 }, { "epoch": 0.19, "grad_norm": 0.9360354284112907, "learning_rate": 9.357113606363732e-06, "loss": 0.487, "step": 2995 }, { "epoch": 0.19, "grad_norm": 2.3217461974291056, "learning_rate": 9.356613919825619e-06, "loss": 0.3453, "step": 2996 }, { "epoch": 0.19, "grad_norm": 1.8950696661048922, "learning_rate": 9.356114052523854e-06, "loss": 0.3449, "step": 2997 }, { "epoch": 0.19, "grad_norm": 2.0136953082508855, "learning_rate": 9.355614004479182e-06, "loss": 0.3282, "step": 2998 }, { "epoch": 0.19, "grad_norm": 5.2662429306810665, "learning_rate": 9.355113775712348e-06, "loss": 0.3294, "step": 2999 }, { "epoch": 0.19, "grad_norm": 3.899287811153719, "learning_rate": 9.354613366244108e-06, "loss": 0.3294, "step": 3000 }, { "epoch": 0.19, "grad_norm": 2.660311033876325, "learning_rate": 9.354112776095224e-06, "loss": 0.3208, "step": 3001 }, { "epoch": 0.19, "grad_norm": 6.027784651214224, "learning_rate": 9.353612005286467e-06, "loss": 0.332, "step": 3002 }, { "epoch": 0.19, "grad_norm": 7.437953303219593, "learning_rate": 9.353111053838616e-06, "loss": 0.3424, "step": 3003 }, { "epoch": 0.19, "grad_norm": 4.37422599106699, "learning_rate": 9.352609921772453e-06, "loss": 0.3381, "step": 3004 }, { "epoch": 0.19, "grad_norm": 4.086888289084342, "learning_rate": 9.352108609108775e-06, "loss": 0.3681, "step": 3005 }, { "epoch": 0.19, "grad_norm": 2.817461296972661, "learning_rate": 9.351607115868379e-06, "loss": 0.3162, "step": 3006 }, { "epoch": 0.19, "grad_norm": 2.6842867164610493, "learning_rate": 9.351105442072077e-06, "loss": 0.3308, "step": 3007 }, { "epoch": 0.19, "grad_norm": 7.074557487684647, "learning_rate": 9.350603587740681e-06, "loss": 0.3518, "step": 3008 }, { "epoch": 0.19, "grad_norm": 9.494142419307336, "learning_rate": 9.350101552895015e-06, "loss": 0.336, "step": 3009 }, { "epoch": 0.19, "grad_norm": 3.646759380098731, "learning_rate": 9.349599337555908e-06, "loss": 0.3509, "step": 3010 }, { "epoch": 0.19, "grad_norm": 4.209083844362035, "learning_rate": 9.3490969417442e-06, "loss": 0.3559, "step": 3011 }, { "epoch": 0.19, "grad_norm": 0.8798719763971381, "learning_rate": 9.348594365480731e-06, "loss": 0.5078, "step": 3012 }, { "epoch": 0.19, "grad_norm": 2.850350505727774, "learning_rate": 9.348091608786362e-06, "loss": 0.3135, "step": 3013 }, { "epoch": 0.19, "grad_norm": 2.3426099933361617, "learning_rate": 9.347588671681949e-06, "loss": 0.3241, "step": 3014 }, { "epoch": 0.19, "grad_norm": 4.129191082772908, "learning_rate": 9.347085554188358e-06, "loss": 0.3431, "step": 3015 }, { "epoch": 0.19, "grad_norm": 3.3763864590358255, "learning_rate": 9.346582256326466e-06, "loss": 0.3512, "step": 3016 }, { "epoch": 0.19, "grad_norm": 5.5564846425568994, "learning_rate": 9.346078778117157e-06, "loss": 0.3552, "step": 3017 }, { "epoch": 0.19, "grad_norm": 2.6375082356956074, "learning_rate": 9.34557511958132e-06, "loss": 0.3562, "step": 3018 }, { "epoch": 0.19, "grad_norm": 6.675383635989742, "learning_rate": 9.345071280739853e-06, "loss": 0.3266, "step": 3019 }, { "epoch": 0.19, "grad_norm": 3.8312994945653984, "learning_rate": 9.34456726161366e-06, "loss": 0.3541, "step": 3020 }, { "epoch": 0.19, "grad_norm": 2.981447370141572, "learning_rate": 9.344063062223653e-06, "loss": 0.3316, "step": 3021 }, { "epoch": 0.19, "grad_norm": 3.3336506962385037, "learning_rate": 9.343558682590757e-06, "loss": 0.3407, "step": 3022 }, { "epoch": 0.19, "grad_norm": 3.3114342319151735, "learning_rate": 9.343054122735894e-06, "loss": 0.3312, "step": 3023 }, { "epoch": 0.19, "grad_norm": 3.9229177179769605, "learning_rate": 9.342549382680002e-06, "loss": 0.3311, "step": 3024 }, { "epoch": 0.19, "grad_norm": 5.629641199016785, "learning_rate": 9.342044462444023e-06, "loss": 0.3182, "step": 3025 }, { "epoch": 0.19, "grad_norm": 4.004324983753988, "learning_rate": 9.341539362048906e-06, "loss": 0.3424, "step": 3026 }, { "epoch": 0.19, "grad_norm": 4.57432765267654, "learning_rate": 9.34103408151561e-06, "loss": 0.3384, "step": 3027 }, { "epoch": 0.19, "grad_norm": 3.6515450597736896, "learning_rate": 9.340528620865099e-06, "loss": 0.3425, "step": 3028 }, { "epoch": 0.19, "grad_norm": 7.024417049623289, "learning_rate": 9.340022980118346e-06, "loss": 0.3551, "step": 3029 }, { "epoch": 0.19, "grad_norm": 2.823128998136369, "learning_rate": 9.33951715929633e-06, "loss": 0.307, "step": 3030 }, { "epoch": 0.19, "grad_norm": 2.302761290109694, "learning_rate": 9.339011158420042e-06, "loss": 0.321, "step": 3031 }, { "epoch": 0.19, "grad_norm": 2.7861581439453387, "learning_rate": 9.338504977510471e-06, "loss": 0.3333, "step": 3032 }, { "epoch": 0.19, "grad_norm": 6.255436023938514, "learning_rate": 9.337998616588624e-06, "loss": 0.3197, "step": 3033 }, { "epoch": 0.19, "grad_norm": 3.7986968576143894, "learning_rate": 9.337492075675509e-06, "loss": 0.354, "step": 3034 }, { "epoch": 0.19, "grad_norm": 3.427175316812047, "learning_rate": 9.336985354792143e-06, "loss": 0.3422, "step": 3035 }, { "epoch": 0.19, "grad_norm": 6.262349587614101, "learning_rate": 9.336478453959552e-06, "loss": 0.3455, "step": 3036 }, { "epoch": 0.19, "grad_norm": 5.892854706867287, "learning_rate": 9.335971373198768e-06, "loss": 0.345, "step": 3037 }, { "epoch": 0.19, "grad_norm": 7.66963503363804, "learning_rate": 9.33546411253083e-06, "loss": 0.333, "step": 3038 }, { "epoch": 0.19, "grad_norm": 2.724888961257912, "learning_rate": 9.334956671976784e-06, "loss": 0.3304, "step": 3039 }, { "epoch": 0.19, "grad_norm": 3.5438924357371304, "learning_rate": 9.334449051557687e-06, "loss": 0.3718, "step": 3040 }, { "epoch": 0.19, "grad_norm": 5.697884305935806, "learning_rate": 9.3339412512946e-06, "loss": 0.3305, "step": 3041 }, { "epoch": 0.19, "grad_norm": 9.46906054185119, "learning_rate": 9.333433271208592e-06, "loss": 0.3239, "step": 3042 }, { "epoch": 0.19, "grad_norm": 3.9475025359327924, "learning_rate": 9.332925111320741e-06, "loss": 0.3392, "step": 3043 }, { "epoch": 0.19, "grad_norm": 3.746327794218007, "learning_rate": 9.33241677165213e-06, "loss": 0.3452, "step": 3044 }, { "epoch": 0.19, "grad_norm": 3.7648031217775895, "learning_rate": 9.331908252223853e-06, "loss": 0.3245, "step": 3045 }, { "epoch": 0.19, "grad_norm": 3.079432984670926, "learning_rate": 9.331399553057008e-06, "loss": 0.3239, "step": 3046 }, { "epoch": 0.19, "grad_norm": 4.08175775219486, "learning_rate": 9.330890674172703e-06, "loss": 0.3445, "step": 3047 }, { "epoch": 0.19, "grad_norm": 0.8930370994396265, "learning_rate": 9.33038161559205e-06, "loss": 0.5207, "step": 3048 }, { "epoch": 0.19, "grad_norm": 4.035597248186252, "learning_rate": 9.329872377336173e-06, "loss": 0.3343, "step": 3049 }, { "epoch": 0.19, "grad_norm": 7.783789110657642, "learning_rate": 9.3293629594262e-06, "loss": 0.3643, "step": 3050 }, { "epoch": 0.19, "grad_norm": 4.452415922961428, "learning_rate": 9.328853361883268e-06, "loss": 0.3556, "step": 3051 }, { "epoch": 0.19, "grad_norm": 3.4411137902450526, "learning_rate": 9.32834358472852e-06, "loss": 0.3363, "step": 3052 }, { "epoch": 0.19, "grad_norm": 4.742630188402976, "learning_rate": 9.327833627983111e-06, "loss": 0.3372, "step": 3053 }, { "epoch": 0.19, "grad_norm": 3.1818245706896975, "learning_rate": 9.327323491668197e-06, "loss": 0.3173, "step": 3054 }, { "epoch": 0.19, "grad_norm": 4.266553037326733, "learning_rate": 9.326813175804943e-06, "loss": 0.3431, "step": 3055 }, { "epoch": 0.19, "grad_norm": 3.3925988723286973, "learning_rate": 9.326302680414527e-06, "loss": 0.3535, "step": 3056 }, { "epoch": 0.19, "grad_norm": 2.8704980127405775, "learning_rate": 9.325792005518128e-06, "loss": 0.3413, "step": 3057 }, { "epoch": 0.19, "grad_norm": 3.718238107609266, "learning_rate": 9.325281151136936e-06, "loss": 0.3143, "step": 3058 }, { "epoch": 0.19, "grad_norm": 3.0086727732829504, "learning_rate": 9.324770117292146e-06, "loss": 0.3355, "step": 3059 }, { "epoch": 0.19, "grad_norm": 3.2553592507183766, "learning_rate": 9.324258904004961e-06, "loss": 0.3232, "step": 3060 }, { "epoch": 0.19, "grad_norm": 3.2727486642764254, "learning_rate": 9.323747511296594e-06, "loss": 0.3667, "step": 3061 }, { "epoch": 0.19, "grad_norm": 3.191618268891413, "learning_rate": 9.323235939188265e-06, "loss": 0.3226, "step": 3062 }, { "epoch": 0.19, "grad_norm": 2.8422933989115813, "learning_rate": 9.322724187701195e-06, "loss": 0.3238, "step": 3063 }, { "epoch": 0.19, "grad_norm": 2.052104265411854, "learning_rate": 9.322212256856622e-06, "loss": 0.3272, "step": 3064 }, { "epoch": 0.19, "grad_norm": 1.0366610133851966, "learning_rate": 9.321700146675785e-06, "loss": 0.5136, "step": 3065 }, { "epoch": 0.19, "grad_norm": 23.899692312448913, "learning_rate": 9.321187857179932e-06, "loss": 0.3569, "step": 3066 }, { "epoch": 0.19, "grad_norm": 4.692567049351729, "learning_rate": 9.320675388390321e-06, "loss": 0.3685, "step": 3067 }, { "epoch": 0.19, "grad_norm": 4.750752151923033, "learning_rate": 9.320162740328211e-06, "loss": 0.3474, "step": 3068 }, { "epoch": 0.19, "grad_norm": 3.9846018748256262, "learning_rate": 9.319649913014878e-06, "loss": 0.3264, "step": 3069 }, { "epoch": 0.19, "grad_norm": 4.545901753362208, "learning_rate": 9.319136906471598e-06, "loss": 0.3371, "step": 3070 }, { "epoch": 0.19, "grad_norm": 2.407767762102129, "learning_rate": 9.318623720719654e-06, "loss": 0.3256, "step": 3071 }, { "epoch": 0.19, "grad_norm": 2.6079027392657275, "learning_rate": 9.318110355780342e-06, "loss": 0.3315, "step": 3072 }, { "epoch": 0.19, "grad_norm": 4.657985241783449, "learning_rate": 9.317596811674962e-06, "loss": 0.3201, "step": 3073 }, { "epoch": 0.19, "grad_norm": 4.4748325443645935, "learning_rate": 9.317083088424822e-06, "loss": 0.3421, "step": 3074 }, { "epoch": 0.19, "grad_norm": 2.736417483683428, "learning_rate": 9.316569186051234e-06, "loss": 0.316, "step": 3075 }, { "epoch": 0.19, "grad_norm": 4.767032320678666, "learning_rate": 9.316055104575525e-06, "loss": 0.353, "step": 3076 }, { "epoch": 0.19, "grad_norm": 16.42877438132356, "learning_rate": 9.315540844019025e-06, "loss": 0.3299, "step": 3077 }, { "epoch": 0.19, "grad_norm": 3.1262551084630785, "learning_rate": 9.315026404403068e-06, "loss": 0.3619, "step": 3078 }, { "epoch": 0.19, "grad_norm": 2.8639683136116036, "learning_rate": 9.314511785749001e-06, "loss": 0.3223, "step": 3079 }, { "epoch": 0.19, "grad_norm": 3.035077716440803, "learning_rate": 9.313996988078178e-06, "loss": 0.3354, "step": 3080 }, { "epoch": 0.19, "grad_norm": 4.395584925167177, "learning_rate": 9.313482011411957e-06, "loss": 0.332, "step": 3081 }, { "epoch": 0.19, "grad_norm": 2.851017873781992, "learning_rate": 9.312966855771704e-06, "loss": 0.3354, "step": 3082 }, { "epoch": 0.19, "grad_norm": 4.727301729924524, "learning_rate": 9.312451521178798e-06, "loss": 0.3383, "step": 3083 }, { "epoch": 0.19, "grad_norm": 2.5206288984760876, "learning_rate": 9.311936007654618e-06, "loss": 0.3465, "step": 3084 }, { "epoch": 0.19, "grad_norm": 2.5301215803483834, "learning_rate": 9.311420315220553e-06, "loss": 0.3325, "step": 3085 }, { "epoch": 0.19, "grad_norm": 2.107286661096052, "learning_rate": 9.310904443898e-06, "loss": 0.3394, "step": 3086 }, { "epoch": 0.19, "grad_norm": 2.430047693361223, "learning_rate": 9.310388393708368e-06, "loss": 0.3134, "step": 3087 }, { "epoch": 0.19, "grad_norm": 3.0517604275704704, "learning_rate": 9.309872164673063e-06, "loss": 0.3392, "step": 3088 }, { "epoch": 0.19, "grad_norm": 0.8432433407508314, "learning_rate": 9.30935575681351e-06, "loss": 0.4864, "step": 3089 }, { "epoch": 0.19, "grad_norm": 3.094975924775508, "learning_rate": 9.30883917015113e-06, "loss": 0.3317, "step": 3090 }, { "epoch": 0.19, "grad_norm": 2.5260535063997995, "learning_rate": 9.308322404707359e-06, "loss": 0.3371, "step": 3091 }, { "epoch": 0.19, "grad_norm": 3.746233518227412, "learning_rate": 9.307805460503639e-06, "loss": 0.343, "step": 3092 }, { "epoch": 0.19, "grad_norm": 3.940903599970333, "learning_rate": 9.30728833756142e-06, "loss": 0.3205, "step": 3093 }, { "epoch": 0.19, "grad_norm": 6.4481602653343, "learning_rate": 9.306771035902155e-06, "loss": 0.315, "step": 3094 }, { "epoch": 0.19, "grad_norm": 0.6580255121552712, "learning_rate": 9.306253555547313e-06, "loss": 0.4848, "step": 3095 }, { "epoch": 0.19, "grad_norm": 2.9432766208942893, "learning_rate": 9.30573589651836e-06, "loss": 0.3398, "step": 3096 }, { "epoch": 0.19, "grad_norm": 2.5327925577779693, "learning_rate": 9.305218058836778e-06, "loss": 0.341, "step": 3097 }, { "epoch": 0.19, "grad_norm": 0.6333683879285672, "learning_rate": 9.304700042524052e-06, "loss": 0.4821, "step": 3098 }, { "epoch": 0.19, "grad_norm": 0.6309725759518167, "learning_rate": 9.304181847601675e-06, "loss": 0.5041, "step": 3099 }, { "epoch": 0.19, "grad_norm": 4.026104691477374, "learning_rate": 9.303663474091146e-06, "loss": 0.3304, "step": 3100 }, { "epoch": 0.2, "grad_norm": 2.704467245912126, "learning_rate": 9.303144922013979e-06, "loss": 0.3496, "step": 3101 }, { "epoch": 0.2, "grad_norm": 2.9587455227385404, "learning_rate": 9.302626191391684e-06, "loss": 0.3582, "step": 3102 }, { "epoch": 0.2, "grad_norm": 2.1080186806649497, "learning_rate": 9.302107282245785e-06, "loss": 0.3252, "step": 3103 }, { "epoch": 0.2, "grad_norm": 3.911883259041764, "learning_rate": 9.301588194597815e-06, "loss": 0.3305, "step": 3104 }, { "epoch": 0.2, "grad_norm": 2.475237511109905, "learning_rate": 9.30106892846931e-06, "loss": 0.3291, "step": 3105 }, { "epoch": 0.2, "grad_norm": 6.438939667144132, "learning_rate": 9.300549483881816e-06, "loss": 0.3584, "step": 3106 }, { "epoch": 0.2, "grad_norm": 5.738246580062574, "learning_rate": 9.300029860856886e-06, "loss": 0.3348, "step": 3107 }, { "epoch": 0.2, "grad_norm": 43.85318841066162, "learning_rate": 9.299510059416077e-06, "loss": 0.3466, "step": 3108 }, { "epoch": 0.2, "grad_norm": 4.173025583594784, "learning_rate": 9.298990079580959e-06, "loss": 0.3364, "step": 3109 }, { "epoch": 0.2, "grad_norm": 8.399942685315112, "learning_rate": 9.298469921373108e-06, "loss": 0.3367, "step": 3110 }, { "epoch": 0.2, "grad_norm": 4.2835717012918515, "learning_rate": 9.297949584814105e-06, "loss": 0.3471, "step": 3111 }, { "epoch": 0.2, "grad_norm": 2.326600887265767, "learning_rate": 9.297429069925539e-06, "loss": 0.3504, "step": 3112 }, { "epoch": 0.2, "grad_norm": 5.999845651609561, "learning_rate": 9.296908376729009e-06, "loss": 0.352, "step": 3113 }, { "epoch": 0.2, "grad_norm": 3.3929357335956967, "learning_rate": 9.296387505246116e-06, "loss": 0.3213, "step": 3114 }, { "epoch": 0.2, "grad_norm": 3.3982066984923813, "learning_rate": 9.295866455498477e-06, "loss": 0.3705, "step": 3115 }, { "epoch": 0.2, "grad_norm": 5.6702218223034135, "learning_rate": 9.295345227507707e-06, "loss": 0.3333, "step": 3116 }, { "epoch": 0.2, "grad_norm": 5.803485576760056, "learning_rate": 9.294823821295433e-06, "loss": 0.3195, "step": 3117 }, { "epoch": 0.2, "grad_norm": 1.9781165440263238, "learning_rate": 9.294302236883293e-06, "loss": 0.3333, "step": 3118 }, { "epoch": 0.2, "grad_norm": 0.9132682386226283, "learning_rate": 9.293780474292923e-06, "loss": 0.5514, "step": 3119 }, { "epoch": 0.2, "grad_norm": 2.9893112971431743, "learning_rate": 9.293258533545975e-06, "loss": 0.3381, "step": 3120 }, { "epoch": 0.2, "grad_norm": 2.947785338266833, "learning_rate": 9.292736414664105e-06, "loss": 0.3055, "step": 3121 }, { "epoch": 0.2, "grad_norm": 2.423557293362017, "learning_rate": 9.292214117668979e-06, "loss": 0.3434, "step": 3122 }, { "epoch": 0.2, "grad_norm": 3.0290706973690344, "learning_rate": 9.291691642582262e-06, "loss": 0.3491, "step": 3123 }, { "epoch": 0.2, "grad_norm": 22.13754401247412, "learning_rate": 9.291168989425636e-06, "loss": 0.3557, "step": 3124 }, { "epoch": 0.2, "grad_norm": 2.111758513326185, "learning_rate": 9.290646158220789e-06, "loss": 0.3426, "step": 3125 }, { "epoch": 0.2, "grad_norm": 3.1929741948548553, "learning_rate": 9.29012314898941e-06, "loss": 0.3313, "step": 3126 }, { "epoch": 0.2, "grad_norm": 4.386553889091693, "learning_rate": 9.289599961753201e-06, "loss": 0.33, "step": 3127 }, { "epoch": 0.2, "grad_norm": 5.792576308994678, "learning_rate": 9.289076596533873e-06, "loss": 0.3516, "step": 3128 }, { "epoch": 0.2, "grad_norm": 2.7360230492857958, "learning_rate": 9.288553053353136e-06, "loss": 0.3513, "step": 3129 }, { "epoch": 0.2, "grad_norm": 4.696616546068349, "learning_rate": 9.288029332232718e-06, "loss": 0.3338, "step": 3130 }, { "epoch": 0.2, "grad_norm": 2.458912832502881, "learning_rate": 9.287505433194344e-06, "loss": 0.3319, "step": 3131 }, { "epoch": 0.2, "grad_norm": 3.5319191980373486, "learning_rate": 9.286981356259756e-06, "loss": 0.3549, "step": 3132 }, { "epoch": 0.2, "grad_norm": 2.5404540188621425, "learning_rate": 9.286457101450695e-06, "loss": 0.333, "step": 3133 }, { "epoch": 0.2, "grad_norm": 0.8531424140218881, "learning_rate": 9.285932668788917e-06, "loss": 0.507, "step": 3134 }, { "epoch": 0.2, "grad_norm": 2.499563968202511, "learning_rate": 9.28540805829618e-06, "loss": 0.3394, "step": 3135 }, { "epoch": 0.2, "grad_norm": 3.601357275161658, "learning_rate": 9.284883269994249e-06, "loss": 0.3343, "step": 3136 }, { "epoch": 0.2, "grad_norm": 3.4033310464057345, "learning_rate": 9.284358303904902e-06, "loss": 0.3346, "step": 3137 }, { "epoch": 0.2, "grad_norm": 3.5510444402774155, "learning_rate": 9.28383316004992e-06, "loss": 0.3308, "step": 3138 }, { "epoch": 0.2, "grad_norm": 0.6581716057280045, "learning_rate": 9.28330783845109e-06, "loss": 0.5153, "step": 3139 }, { "epoch": 0.2, "grad_norm": 5.766367521467626, "learning_rate": 9.28278233913021e-06, "loss": 0.3639, "step": 3140 }, { "epoch": 0.2, "grad_norm": 2.648176503325645, "learning_rate": 9.282256662109082e-06, "loss": 0.3282, "step": 3141 }, { "epoch": 0.2, "grad_norm": 3.252889421194652, "learning_rate": 9.281730807409522e-06, "loss": 0.345, "step": 3142 }, { "epoch": 0.2, "grad_norm": 2.567440367216447, "learning_rate": 9.281204775053342e-06, "loss": 0.3296, "step": 3143 }, { "epoch": 0.2, "grad_norm": 3.164396048956933, "learning_rate": 9.280678565062375e-06, "loss": 0.3431, "step": 3144 }, { "epoch": 0.2, "grad_norm": 1.8443810127063882, "learning_rate": 9.280152177458449e-06, "loss": 0.3378, "step": 3145 }, { "epoch": 0.2, "grad_norm": 3.347160350923574, "learning_rate": 9.279625612263407e-06, "loss": 0.3405, "step": 3146 }, { "epoch": 0.2, "grad_norm": 4.193585439101035, "learning_rate": 9.279098869499097e-06, "loss": 0.3184, "step": 3147 }, { "epoch": 0.2, "grad_norm": 3.1915934188963817, "learning_rate": 9.278571949187376e-06, "loss": 0.3387, "step": 3148 }, { "epoch": 0.2, "grad_norm": 1.981672700854984, "learning_rate": 9.278044851350103e-06, "loss": 0.3314, "step": 3149 }, { "epoch": 0.2, "grad_norm": 0.8244792300051936, "learning_rate": 9.277517576009152e-06, "loss": 0.5116, "step": 3150 }, { "epoch": 0.2, "grad_norm": 3.0340519588206303, "learning_rate": 9.276990123186397e-06, "loss": 0.3521, "step": 3151 }, { "epoch": 0.2, "grad_norm": 0.6617316786693868, "learning_rate": 9.276462492903726e-06, "loss": 0.4951, "step": 3152 }, { "epoch": 0.2, "grad_norm": 2.3169855087654603, "learning_rate": 9.275934685183032e-06, "loss": 0.3192, "step": 3153 }, { "epoch": 0.2, "grad_norm": 3.0251337233818814, "learning_rate": 9.275406700046211e-06, "loss": 0.3288, "step": 3154 }, { "epoch": 0.2, "grad_norm": 3.1191920647426374, "learning_rate": 9.274878537515173e-06, "loss": 0.358, "step": 3155 }, { "epoch": 0.2, "grad_norm": 3.0121696385723533, "learning_rate": 9.274350197611832e-06, "loss": 0.3382, "step": 3156 }, { "epoch": 0.2, "grad_norm": 2.0404721997473247, "learning_rate": 9.273821680358107e-06, "loss": 0.3168, "step": 3157 }, { "epoch": 0.2, "grad_norm": 3.273886983149378, "learning_rate": 9.27329298577593e-06, "loss": 0.3313, "step": 3158 }, { "epoch": 0.2, "grad_norm": 3.057467819196444, "learning_rate": 9.272764113887237e-06, "loss": 0.3255, "step": 3159 }, { "epoch": 0.2, "grad_norm": 2.2025889870920823, "learning_rate": 9.272235064713974e-06, "loss": 0.3302, "step": 3160 }, { "epoch": 0.2, "grad_norm": 9.935982182351442, "learning_rate": 9.271705838278086e-06, "loss": 0.3437, "step": 3161 }, { "epoch": 0.2, "grad_norm": 1.7691037434104837, "learning_rate": 9.27117643460154e-06, "loss": 0.3425, "step": 3162 }, { "epoch": 0.2, "grad_norm": 2.309836296686852, "learning_rate": 9.270646853706293e-06, "loss": 0.3575, "step": 3163 }, { "epoch": 0.2, "grad_norm": 2.9099254762908546, "learning_rate": 9.270117095614324e-06, "loss": 0.3441, "step": 3164 }, { "epoch": 0.2, "grad_norm": 3.3344908505501163, "learning_rate": 9.269587160347612e-06, "loss": 0.3233, "step": 3165 }, { "epoch": 0.2, "grad_norm": 1.9357850985698077, "learning_rate": 9.269057047928144e-06, "loss": 0.3472, "step": 3166 }, { "epoch": 0.2, "grad_norm": 3.8801315563559973, "learning_rate": 9.268526758377919e-06, "loss": 0.3638, "step": 3167 }, { "epoch": 0.2, "grad_norm": 2.7230291838491256, "learning_rate": 9.267996291718936e-06, "loss": 0.3573, "step": 3168 }, { "epoch": 0.2, "grad_norm": 9.07079599260868, "learning_rate": 9.267465647973206e-06, "loss": 0.3545, "step": 3169 }, { "epoch": 0.2, "grad_norm": 2.1648255080740424, "learning_rate": 9.266934827162746e-06, "loss": 0.34, "step": 3170 }, { "epoch": 0.2, "grad_norm": 2.1379391469547526, "learning_rate": 9.26640382930958e-06, "loss": 0.3466, "step": 3171 }, { "epoch": 0.2, "grad_norm": 1.8543325603940297, "learning_rate": 9.265872654435743e-06, "loss": 0.3387, "step": 3172 }, { "epoch": 0.2, "grad_norm": 1.8881740805926435, "learning_rate": 9.26534130256327e-06, "loss": 0.3239, "step": 3173 }, { "epoch": 0.2, "grad_norm": 3.49069838949244, "learning_rate": 9.264809773714214e-06, "loss": 0.3218, "step": 3174 }, { "epoch": 0.2, "grad_norm": 2.6883041794621128, "learning_rate": 9.264278067910625e-06, "loss": 0.3517, "step": 3175 }, { "epoch": 0.2, "grad_norm": 4.842922373876776, "learning_rate": 9.263746185174562e-06, "loss": 0.3417, "step": 3176 }, { "epoch": 0.2, "grad_norm": 2.839745658010722, "learning_rate": 9.263214125528097e-06, "loss": 0.3349, "step": 3177 }, { "epoch": 0.2, "grad_norm": 3.713975034051698, "learning_rate": 9.262681888993306e-06, "loss": 0.3444, "step": 3178 }, { "epoch": 0.2, "grad_norm": 2.4326683224951275, "learning_rate": 9.262149475592272e-06, "loss": 0.3554, "step": 3179 }, { "epoch": 0.2, "grad_norm": 3.1903551994351953, "learning_rate": 9.261616885347087e-06, "loss": 0.3523, "step": 3180 }, { "epoch": 0.2, "grad_norm": 11.988480030316767, "learning_rate": 9.261084118279846e-06, "loss": 0.3268, "step": 3181 }, { "epoch": 0.2, "grad_norm": 3.1729441910723652, "learning_rate": 9.26055117441266e-06, "loss": 0.3306, "step": 3182 }, { "epoch": 0.2, "grad_norm": 1.8335747055425395, "learning_rate": 9.260018053767634e-06, "loss": 0.3294, "step": 3183 }, { "epoch": 0.2, "grad_norm": 2.483853626654878, "learning_rate": 9.259484756366894e-06, "loss": 0.3334, "step": 3184 }, { "epoch": 0.2, "grad_norm": 1.8398749503857479, "learning_rate": 9.258951282232567e-06, "loss": 0.3409, "step": 3185 }, { "epoch": 0.2, "grad_norm": 3.3869859868712218, "learning_rate": 9.258417631386784e-06, "loss": 0.3438, "step": 3186 }, { "epoch": 0.2, "grad_norm": 5.067702151113323, "learning_rate": 9.257883803851692e-06, "loss": 0.3668, "step": 3187 }, { "epoch": 0.2, "grad_norm": 3.1551763649208757, "learning_rate": 9.257349799649437e-06, "loss": 0.3519, "step": 3188 }, { "epoch": 0.2, "grad_norm": 2.8092883233789556, "learning_rate": 9.256815618802178e-06, "loss": 0.3489, "step": 3189 }, { "epoch": 0.2, "grad_norm": 2.8746975925359735, "learning_rate": 9.256281261332076e-06, "loss": 0.3201, "step": 3190 }, { "epoch": 0.2, "grad_norm": 3.940141382151419, "learning_rate": 9.255746727261305e-06, "loss": 0.3537, "step": 3191 }, { "epoch": 0.2, "grad_norm": 3.6974583371840613, "learning_rate": 9.255212016612044e-06, "loss": 0.3213, "step": 3192 }, { "epoch": 0.2, "grad_norm": 8.844741808366749, "learning_rate": 9.254677129406477e-06, "loss": 0.3473, "step": 3193 }, { "epoch": 0.2, "grad_norm": 3.2017702725927175, "learning_rate": 9.254142065666802e-06, "loss": 0.3399, "step": 3194 }, { "epoch": 0.2, "grad_norm": 2.8349854964968766, "learning_rate": 9.253606825415213e-06, "loss": 0.3329, "step": 3195 }, { "epoch": 0.2, "grad_norm": 4.461347680543548, "learning_rate": 9.253071408673924e-06, "loss": 0.3349, "step": 3196 }, { "epoch": 0.2, "grad_norm": 2.728796034177238, "learning_rate": 9.252535815465146e-06, "loss": 0.3277, "step": 3197 }, { "epoch": 0.2, "grad_norm": 2.1476735913302596, "learning_rate": 9.252000045811105e-06, "loss": 0.3488, "step": 3198 }, { "epoch": 0.2, "grad_norm": 2.572846878344725, "learning_rate": 9.25146409973403e-06, "loss": 0.3329, "step": 3199 }, { "epoch": 0.2, "grad_norm": 47.92774792019559, "learning_rate": 9.250927977256155e-06, "loss": 0.3573, "step": 3200 }, { "epoch": 0.2, "grad_norm": 6.197579392795628, "learning_rate": 9.25039167839973e-06, "loss": 0.3226, "step": 3201 }, { "epoch": 0.2, "grad_norm": 2.9942998432191135, "learning_rate": 9.249855203187007e-06, "loss": 0.3523, "step": 3202 }, { "epoch": 0.2, "grad_norm": 5.009663714699753, "learning_rate": 9.249318551640238e-06, "loss": 0.3222, "step": 3203 }, { "epoch": 0.2, "grad_norm": 5.592879992005864, "learning_rate": 9.2487817237817e-06, "loss": 0.3509, "step": 3204 }, { "epoch": 0.2, "grad_norm": 2.7727777628097505, "learning_rate": 9.248244719633659e-06, "loss": 0.3264, "step": 3205 }, { "epoch": 0.2, "grad_norm": 6.90503630217772, "learning_rate": 9.247707539218398e-06, "loss": 0.3486, "step": 3206 }, { "epoch": 0.2, "grad_norm": 6.495555896714801, "learning_rate": 9.247170182558207e-06, "loss": 0.3121, "step": 3207 }, { "epoch": 0.2, "grad_norm": 3.186933314109556, "learning_rate": 9.246632649675382e-06, "loss": 0.3618, "step": 3208 }, { "epoch": 0.2, "grad_norm": 3.0725383590343203, "learning_rate": 9.246094940592224e-06, "loss": 0.2995, "step": 3209 }, { "epoch": 0.2, "grad_norm": 2.7157495868341974, "learning_rate": 9.245557055331046e-06, "loss": 0.346, "step": 3210 }, { "epoch": 0.2, "grad_norm": 2.1595383518209115, "learning_rate": 9.245018993914166e-06, "loss": 0.32, "step": 3211 }, { "epoch": 0.2, "grad_norm": 14.779679331049103, "learning_rate": 9.244480756363904e-06, "loss": 0.3803, "step": 3212 }, { "epoch": 0.2, "grad_norm": 6.372475177234385, "learning_rate": 9.243942342702601e-06, "loss": 0.3582, "step": 3213 }, { "epoch": 0.2, "grad_norm": 2.2102588382653683, "learning_rate": 9.24340375295259e-06, "loss": 0.3208, "step": 3214 }, { "epoch": 0.2, "grad_norm": 3.1949738710810096, "learning_rate": 9.24286498713622e-06, "loss": 0.3499, "step": 3215 }, { "epoch": 0.2, "grad_norm": 4.516471386721431, "learning_rate": 9.242326045275846e-06, "loss": 0.3384, "step": 3216 }, { "epoch": 0.2, "grad_norm": 3.246970065640399, "learning_rate": 9.24178692739383e-06, "loss": 0.32, "step": 3217 }, { "epoch": 0.2, "grad_norm": 2.8097854465160834, "learning_rate": 9.241247633512539e-06, "loss": 0.3363, "step": 3218 }, { "epoch": 0.2, "grad_norm": 3.9042000808275117, "learning_rate": 9.240708163654351e-06, "loss": 0.3487, "step": 3219 }, { "epoch": 0.2, "grad_norm": 2.6299503296182896, "learning_rate": 9.240168517841648e-06, "loss": 0.3395, "step": 3220 }, { "epoch": 0.2, "grad_norm": 2.5934262865465665, "learning_rate": 9.239628696096823e-06, "loss": 0.3383, "step": 3221 }, { "epoch": 0.2, "grad_norm": 2.4141524686623534, "learning_rate": 9.239088698442272e-06, "loss": 0.3247, "step": 3222 }, { "epoch": 0.2, "grad_norm": 2.7875869828206374, "learning_rate": 9.238548524900401e-06, "loss": 0.3478, "step": 3223 }, { "epoch": 0.2, "grad_norm": 5.2898484715310765, "learning_rate": 9.238008175493625e-06, "loss": 0.3232, "step": 3224 }, { "epoch": 0.2, "grad_norm": 4.4207614453299655, "learning_rate": 9.23746765024436e-06, "loss": 0.3275, "step": 3225 }, { "epoch": 0.2, "grad_norm": 4.932672407280655, "learning_rate": 9.236926949175037e-06, "loss": 0.3167, "step": 3226 }, { "epoch": 0.2, "grad_norm": 2.8429470125541183, "learning_rate": 9.23638607230809e-06, "loss": 0.3219, "step": 3227 }, { "epoch": 0.2, "grad_norm": 4.862949427711048, "learning_rate": 9.235845019665959e-06, "loss": 0.3273, "step": 3228 }, { "epoch": 0.2, "grad_norm": 3.320392874054271, "learning_rate": 9.235303791271094e-06, "loss": 0.3384, "step": 3229 }, { "epoch": 0.2, "grad_norm": 7.5602851974905425, "learning_rate": 9.234762387145952e-06, "loss": 0.3367, "step": 3230 }, { "epoch": 0.2, "grad_norm": 3.0776500161744, "learning_rate": 9.234220807312998e-06, "loss": 0.3207, "step": 3231 }, { "epoch": 0.2, "grad_norm": 2.8843792434976, "learning_rate": 9.233679051794701e-06, "loss": 0.36, "step": 3232 }, { "epoch": 0.2, "grad_norm": 2.4680316916760128, "learning_rate": 9.23313712061354e-06, "loss": 0.3544, "step": 3233 }, { "epoch": 0.2, "grad_norm": 2.0833265338170346, "learning_rate": 9.232595013792004e-06, "loss": 0.3251, "step": 3234 }, { "epoch": 0.2, "grad_norm": 2.291851370050809, "learning_rate": 9.232052731352578e-06, "loss": 0.3223, "step": 3235 }, { "epoch": 0.2, "grad_norm": 2.4115139121529108, "learning_rate": 9.23151027331777e-06, "loss": 0.3164, "step": 3236 }, { "epoch": 0.2, "grad_norm": 2.2953978123927525, "learning_rate": 9.230967639710085e-06, "loss": 0.3322, "step": 3237 }, { "epoch": 0.2, "grad_norm": 2.742944667903736, "learning_rate": 9.230424830552035e-06, "loss": 0.3339, "step": 3238 }, { "epoch": 0.2, "grad_norm": 8.666546241009362, "learning_rate": 9.229881845866148e-06, "loss": 0.3319, "step": 3239 }, { "epoch": 0.2, "grad_norm": 5.7040286816864745, "learning_rate": 9.229338685674948e-06, "loss": 0.341, "step": 3240 }, { "epoch": 0.2, "grad_norm": 4.363329064621435, "learning_rate": 9.228795350000977e-06, "loss": 0.3384, "step": 3241 }, { "epoch": 0.2, "grad_norm": 2.861605510467439, "learning_rate": 9.228251838866772e-06, "loss": 0.3223, "step": 3242 }, { "epoch": 0.2, "grad_norm": 4.940652153111325, "learning_rate": 9.227708152294889e-06, "loss": 0.3407, "step": 3243 }, { "epoch": 0.2, "grad_norm": 5.22711923987941, "learning_rate": 9.227164290307887e-06, "loss": 0.3381, "step": 3244 }, { "epoch": 0.2, "grad_norm": 3.673798403637111, "learning_rate": 9.22662025292833e-06, "loss": 0.3444, "step": 3245 }, { "epoch": 0.2, "grad_norm": 13.555285733453422, "learning_rate": 9.226076040178788e-06, "loss": 0.3213, "step": 3246 }, { "epoch": 0.2, "grad_norm": 4.820065874417691, "learning_rate": 9.225531652081848e-06, "loss": 0.3215, "step": 3247 }, { "epoch": 0.2, "grad_norm": 6.12254801547705, "learning_rate": 9.224987088660094e-06, "loss": 0.3258, "step": 3248 }, { "epoch": 0.2, "grad_norm": 3.7767350258221444, "learning_rate": 9.22444234993612e-06, "loss": 0.353, "step": 3249 }, { "epoch": 0.2, "grad_norm": 5.323233219968319, "learning_rate": 9.223897435932532e-06, "loss": 0.3495, "step": 3250 }, { "epoch": 0.2, "grad_norm": 2.0002888207855123, "learning_rate": 9.223352346671935e-06, "loss": 0.3204, "step": 3251 }, { "epoch": 0.2, "grad_norm": 2.4286053134398724, "learning_rate": 9.222807082176948e-06, "loss": 0.3241, "step": 3252 }, { "epoch": 0.2, "grad_norm": 2.0042722335846075, "learning_rate": 9.222261642470194e-06, "loss": 0.3569, "step": 3253 }, { "epoch": 0.2, "grad_norm": 2.923639477056454, "learning_rate": 9.221716027574306e-06, "loss": 0.3627, "step": 3254 }, { "epoch": 0.2, "grad_norm": 3.0732575508603124, "learning_rate": 9.22117023751192e-06, "loss": 0.3112, "step": 3255 }, { "epoch": 0.2, "grad_norm": 2.941078040252632, "learning_rate": 9.220624272305683e-06, "loss": 0.3574, "step": 3256 }, { "epoch": 0.2, "grad_norm": 3.6451251005631082, "learning_rate": 9.22007813197825e-06, "loss": 0.3392, "step": 3257 }, { "epoch": 0.2, "grad_norm": 14.28083926606095, "learning_rate": 9.219531816552279e-06, "loss": 0.3649, "step": 3258 }, { "epoch": 0.2, "grad_norm": 7.9430400688118254, "learning_rate": 9.218985326050439e-06, "loss": 0.3463, "step": 3259 }, { "epoch": 0.21, "grad_norm": 26.166018466888865, "learning_rate": 9.218438660495401e-06, "loss": 0.3411, "step": 3260 }, { "epoch": 0.21, "grad_norm": 2.6872472537936742, "learning_rate": 9.217891819909854e-06, "loss": 0.3653, "step": 3261 }, { "epoch": 0.21, "grad_norm": 4.436469040818058, "learning_rate": 9.217344804316482e-06, "loss": 0.3364, "step": 3262 }, { "epoch": 0.21, "grad_norm": 5.1852007080247144, "learning_rate": 9.216797613737983e-06, "loss": 0.3385, "step": 3263 }, { "epoch": 0.21, "grad_norm": 3.3268107476088984, "learning_rate": 9.21625024819706e-06, "loss": 0.3381, "step": 3264 }, { "epoch": 0.21, "grad_norm": 9.110258063796547, "learning_rate": 9.215702707716427e-06, "loss": 0.3365, "step": 3265 }, { "epoch": 0.21, "grad_norm": 2.7578030950227537, "learning_rate": 9.2151549923188e-06, "loss": 0.3405, "step": 3266 }, { "epoch": 0.21, "grad_norm": 3.871857191905456, "learning_rate": 9.214607102026905e-06, "loss": 0.3576, "step": 3267 }, { "epoch": 0.21, "grad_norm": 2.68667289240282, "learning_rate": 9.214059036863477e-06, "loss": 0.3373, "step": 3268 }, { "epoch": 0.21, "grad_norm": 2.376630280488469, "learning_rate": 9.213510796851253e-06, "loss": 0.3487, "step": 3269 }, { "epoch": 0.21, "grad_norm": 2.6182677959069554, "learning_rate": 9.212962382012981e-06, "loss": 0.3455, "step": 3270 }, { "epoch": 0.21, "grad_norm": 6.791047510562209, "learning_rate": 9.21241379237142e-06, "loss": 0.3583, "step": 3271 }, { "epoch": 0.21, "grad_norm": 8.172415132107194, "learning_rate": 9.211865027949328e-06, "loss": 0.3175, "step": 3272 }, { "epoch": 0.21, "grad_norm": 2.5181523258848157, "learning_rate": 9.211316088769473e-06, "loss": 0.3381, "step": 3273 }, { "epoch": 0.21, "grad_norm": 4.622769907928998, "learning_rate": 9.210766974854634e-06, "loss": 0.3399, "step": 3274 }, { "epoch": 0.21, "grad_norm": 3.3280229125343714, "learning_rate": 9.210217686227593e-06, "loss": 0.3402, "step": 3275 }, { "epoch": 0.21, "grad_norm": 1.9442166229888342, "learning_rate": 9.209668222911143e-06, "loss": 0.341, "step": 3276 }, { "epoch": 0.21, "grad_norm": 3.0759456934942504, "learning_rate": 9.209118584928082e-06, "loss": 0.3301, "step": 3277 }, { "epoch": 0.21, "grad_norm": 3.1493635590756597, "learning_rate": 9.208568772301213e-06, "loss": 0.3369, "step": 3278 }, { "epoch": 0.21, "grad_norm": 2.8006361457377222, "learning_rate": 9.208018785053353e-06, "loss": 0.3372, "step": 3279 }, { "epoch": 0.21, "grad_norm": 3.5474933450527315, "learning_rate": 9.207468623207317e-06, "loss": 0.3464, "step": 3280 }, { "epoch": 0.21, "grad_norm": 2.7352724708460774, "learning_rate": 9.206918286785936e-06, "loss": 0.3218, "step": 3281 }, { "epoch": 0.21, "grad_norm": 2.254842152358999, "learning_rate": 9.206367775812042e-06, "loss": 0.3443, "step": 3282 }, { "epoch": 0.21, "grad_norm": 2.4955303744267425, "learning_rate": 9.205817090308478e-06, "loss": 0.3364, "step": 3283 }, { "epoch": 0.21, "grad_norm": 2.0817601413132634, "learning_rate": 9.205266230298092e-06, "loss": 0.3414, "step": 3284 }, { "epoch": 0.21, "grad_norm": 3.109554213401208, "learning_rate": 9.204715195803742e-06, "loss": 0.3199, "step": 3285 }, { "epoch": 0.21, "grad_norm": 2.138672698305992, "learning_rate": 9.20416398684829e-06, "loss": 0.3211, "step": 3286 }, { "epoch": 0.21, "grad_norm": 2.1371287351578165, "learning_rate": 9.203612603454605e-06, "loss": 0.3382, "step": 3287 }, { "epoch": 0.21, "grad_norm": 3.2386771843454425, "learning_rate": 9.203061045645567e-06, "loss": 0.3231, "step": 3288 }, { "epoch": 0.21, "grad_norm": 5.523871963082305, "learning_rate": 9.202509313444061e-06, "loss": 0.3278, "step": 3289 }, { "epoch": 0.21, "grad_norm": 2.824293203598094, "learning_rate": 9.20195740687298e-06, "loss": 0.3449, "step": 3290 }, { "epoch": 0.21, "grad_norm": 8.593839876042038, "learning_rate": 9.201405325955222e-06, "loss": 0.3364, "step": 3291 }, { "epoch": 0.21, "grad_norm": 2.112760574710892, "learning_rate": 9.200853070713695e-06, "loss": 0.3203, "step": 3292 }, { "epoch": 0.21, "grad_norm": 2.540605316940034, "learning_rate": 9.20030064117131e-06, "loss": 0.3193, "step": 3293 }, { "epoch": 0.21, "grad_norm": 2.32568423100732, "learning_rate": 9.199748037350996e-06, "loss": 0.3432, "step": 3294 }, { "epoch": 0.21, "grad_norm": 4.773215132307938, "learning_rate": 9.199195259275673e-06, "loss": 0.3374, "step": 3295 }, { "epoch": 0.21, "grad_norm": 6.438571219906153, "learning_rate": 9.19864230696828e-06, "loss": 0.3266, "step": 3296 }, { "epoch": 0.21, "grad_norm": 13.87962527297526, "learning_rate": 9.198089180451761e-06, "loss": 0.3335, "step": 3297 }, { "epoch": 0.21, "grad_norm": 7.452510535287162, "learning_rate": 9.197535879749065e-06, "loss": 0.3382, "step": 3298 }, { "epoch": 0.21, "grad_norm": 2.7645794132909876, "learning_rate": 9.196982404883147e-06, "loss": 0.3291, "step": 3299 }, { "epoch": 0.21, "grad_norm": 3.1315230713321487, "learning_rate": 9.196428755876978e-06, "loss": 0.3582, "step": 3300 }, { "epoch": 0.21, "grad_norm": 2.4407918219457003, "learning_rate": 9.195874932753525e-06, "loss": 0.3521, "step": 3301 }, { "epoch": 0.21, "grad_norm": 6.401129747125721, "learning_rate": 9.195320935535769e-06, "loss": 0.3602, "step": 3302 }, { "epoch": 0.21, "grad_norm": 2.532072527604524, "learning_rate": 9.194766764246694e-06, "loss": 0.3534, "step": 3303 }, { "epoch": 0.21, "grad_norm": 5.412806962110028, "learning_rate": 9.194212418909296e-06, "loss": 0.3307, "step": 3304 }, { "epoch": 0.21, "grad_norm": 2.5199638819278407, "learning_rate": 9.193657899546575e-06, "loss": 0.3348, "step": 3305 }, { "epoch": 0.21, "grad_norm": 2.5887591176661275, "learning_rate": 9.19310320618154e-06, "loss": 0.3424, "step": 3306 }, { "epoch": 0.21, "grad_norm": 12.593860703339445, "learning_rate": 9.192548338837204e-06, "loss": 0.3307, "step": 3307 }, { "epoch": 0.21, "grad_norm": 1.7497839312493255, "learning_rate": 9.19199329753659e-06, "loss": 0.3141, "step": 3308 }, { "epoch": 0.21, "grad_norm": 1.4448666963415697, "learning_rate": 9.191438082302731e-06, "loss": 0.3187, "step": 3309 }, { "epoch": 0.21, "grad_norm": 2.2215136403363744, "learning_rate": 9.190882693158658e-06, "loss": 0.3298, "step": 3310 }, { "epoch": 0.21, "grad_norm": 2.554905590613441, "learning_rate": 9.19032713012742e-06, "loss": 0.3416, "step": 3311 }, { "epoch": 0.21, "grad_norm": 1.8985134435188125, "learning_rate": 9.189771393232065e-06, "loss": 0.3219, "step": 3312 }, { "epoch": 0.21, "grad_norm": 2.8571913602771155, "learning_rate": 9.189215482495655e-06, "loss": 0.3188, "step": 3313 }, { "epoch": 0.21, "grad_norm": 2.0283925225733554, "learning_rate": 9.188659397941252e-06, "loss": 0.3381, "step": 3314 }, { "epoch": 0.21, "grad_norm": 3.5622267881856042, "learning_rate": 9.188103139591934e-06, "loss": 0.3366, "step": 3315 }, { "epoch": 0.21, "grad_norm": 1.6204691471689936, "learning_rate": 9.187546707470773e-06, "loss": 0.3257, "step": 3316 }, { "epoch": 0.21, "grad_norm": 2.479359532002313, "learning_rate": 9.186990101600865e-06, "loss": 0.3587, "step": 3317 }, { "epoch": 0.21, "grad_norm": 2.5156339811433504, "learning_rate": 9.186433322005298e-06, "loss": 0.3252, "step": 3318 }, { "epoch": 0.21, "grad_norm": 2.0677644046278445, "learning_rate": 9.185876368707178e-06, "loss": 0.3484, "step": 3319 }, { "epoch": 0.21, "grad_norm": 1.8363778250072302, "learning_rate": 9.185319241729614e-06, "loss": 0.3283, "step": 3320 }, { "epoch": 0.21, "grad_norm": 1.5325217068777581, "learning_rate": 9.184761941095716e-06, "loss": 0.3328, "step": 3321 }, { "epoch": 0.21, "grad_norm": 3.1078622392122868, "learning_rate": 9.184204466828618e-06, "loss": 0.3491, "step": 3322 }, { "epoch": 0.21, "grad_norm": 3.7331955765680322, "learning_rate": 9.18364681895144e-06, "loss": 0.3464, "step": 3323 }, { "epoch": 0.21, "grad_norm": 2.1285879808933634, "learning_rate": 9.183088997487326e-06, "loss": 0.3405, "step": 3324 }, { "epoch": 0.21, "grad_norm": 2.188071343440662, "learning_rate": 9.18253100245942e-06, "loss": 0.3391, "step": 3325 }, { "epoch": 0.21, "grad_norm": 2.56313134255, "learning_rate": 9.181972833890875e-06, "loss": 0.3324, "step": 3326 }, { "epoch": 0.21, "grad_norm": 2.0520794722060756, "learning_rate": 9.181414491804846e-06, "loss": 0.3396, "step": 3327 }, { "epoch": 0.21, "grad_norm": 3.4612046989301044, "learning_rate": 9.180855976224505e-06, "loss": 0.3285, "step": 3328 }, { "epoch": 0.21, "grad_norm": 2.2254198126880214, "learning_rate": 9.180297287173022e-06, "loss": 0.3317, "step": 3329 }, { "epoch": 0.21, "grad_norm": 5.358969469056055, "learning_rate": 9.179738424673582e-06, "loss": 0.3415, "step": 3330 }, { "epoch": 0.21, "grad_norm": 2.3580050157462966, "learning_rate": 9.179179388749369e-06, "loss": 0.3421, "step": 3331 }, { "epoch": 0.21, "grad_norm": 6.7782296233398425, "learning_rate": 9.17862017942358e-06, "loss": 0.3424, "step": 3332 }, { "epoch": 0.21, "grad_norm": 3.562563165541226, "learning_rate": 9.178060796719417e-06, "loss": 0.3355, "step": 3333 }, { "epoch": 0.21, "grad_norm": 2.1177647447941483, "learning_rate": 9.177501240660091e-06, "loss": 0.3427, "step": 3334 }, { "epoch": 0.21, "grad_norm": 2.871763419278553, "learning_rate": 9.176941511268818e-06, "loss": 0.3622, "step": 3335 }, { "epoch": 0.21, "grad_norm": 2.036188243176669, "learning_rate": 9.176381608568824e-06, "loss": 0.3325, "step": 3336 }, { "epoch": 0.21, "grad_norm": 3.487483995921712, "learning_rate": 9.175821532583338e-06, "loss": 0.3647, "step": 3337 }, { "epoch": 0.21, "grad_norm": 1.7326747344651108, "learning_rate": 9.1752612833356e-06, "loss": 0.3313, "step": 3338 }, { "epoch": 0.21, "grad_norm": 3.40782656536387, "learning_rate": 9.174700860848855e-06, "loss": 0.3335, "step": 3339 }, { "epoch": 0.21, "grad_norm": 2.9838042980440616, "learning_rate": 9.174140265146356e-06, "loss": 0.3451, "step": 3340 }, { "epoch": 0.21, "grad_norm": 2.7016867447401482, "learning_rate": 9.173579496251363e-06, "loss": 0.3469, "step": 3341 }, { "epoch": 0.21, "grad_norm": 2.090011723773526, "learning_rate": 9.173018554187145e-06, "loss": 0.3482, "step": 3342 }, { "epoch": 0.21, "grad_norm": 1.8980960308910082, "learning_rate": 9.172457438976974e-06, "loss": 0.3282, "step": 3343 }, { "epoch": 0.21, "grad_norm": 2.4699340183754797, "learning_rate": 9.171896150644132e-06, "loss": 0.3615, "step": 3344 }, { "epoch": 0.21, "grad_norm": 8.797645045459058, "learning_rate": 9.17133468921191e-06, "loss": 0.3462, "step": 3345 }, { "epoch": 0.21, "grad_norm": 1.4314848202489756, "learning_rate": 9.170773054703603e-06, "loss": 0.3203, "step": 3346 }, { "epoch": 0.21, "grad_norm": 3.872175357632315, "learning_rate": 9.170211247142514e-06, "loss": 0.3468, "step": 3347 }, { "epoch": 0.21, "grad_norm": 3.2198033083294213, "learning_rate": 9.169649266551951e-06, "loss": 0.3393, "step": 3348 }, { "epoch": 0.21, "grad_norm": 2.512652006046951, "learning_rate": 9.169087112955234e-06, "loss": 0.3391, "step": 3349 }, { "epoch": 0.21, "grad_norm": 2.6463453697273636, "learning_rate": 9.168524786375689e-06, "loss": 0.3312, "step": 3350 }, { "epoch": 0.21, "grad_norm": 3.908131989900112, "learning_rate": 9.167962286836647e-06, "loss": 0.3357, "step": 3351 }, { "epoch": 0.21, "grad_norm": 1.8293700890744862, "learning_rate": 9.167399614361445e-06, "loss": 0.3225, "step": 3352 }, { "epoch": 0.21, "grad_norm": 2.0266428828740053, "learning_rate": 9.166836768973434e-06, "loss": 0.3274, "step": 3353 }, { "epoch": 0.21, "grad_norm": 1.943899273866206, "learning_rate": 9.16627375069596e-06, "loss": 0.3369, "step": 3354 }, { "epoch": 0.21, "grad_norm": 3.884931678526371, "learning_rate": 9.16571055955239e-06, "loss": 0.3465, "step": 3355 }, { "epoch": 0.21, "grad_norm": 5.400698594126364, "learning_rate": 9.165147195566089e-06, "loss": 0.3294, "step": 3356 }, { "epoch": 0.21, "grad_norm": 2.1125453260908458, "learning_rate": 9.164583658760432e-06, "loss": 0.3434, "step": 3357 }, { "epoch": 0.21, "grad_norm": 3.468269633182379, "learning_rate": 9.164019949158804e-06, "loss": 0.3235, "step": 3358 }, { "epoch": 0.21, "grad_norm": 3.0730537988756113, "learning_rate": 9.163456066784591e-06, "loss": 0.3275, "step": 3359 }, { "epoch": 0.21, "grad_norm": 18.284549859365693, "learning_rate": 9.162892011661192e-06, "loss": 0.3543, "step": 3360 }, { "epoch": 0.21, "grad_norm": 2.5003264599554074, "learning_rate": 9.162327783812008e-06, "loss": 0.389, "step": 3361 }, { "epoch": 0.21, "grad_norm": 1.9787320970232836, "learning_rate": 9.161763383260452e-06, "loss": 0.3177, "step": 3362 }, { "epoch": 0.21, "grad_norm": 1.1673964662883662, "learning_rate": 9.16119881002994e-06, "loss": 0.5297, "step": 3363 }, { "epoch": 0.21, "grad_norm": 2.815700730125225, "learning_rate": 9.160634064143899e-06, "loss": 0.3383, "step": 3364 }, { "epoch": 0.21, "grad_norm": 1.7103598931713948, "learning_rate": 9.16006914562576e-06, "loss": 0.3523, "step": 3365 }, { "epoch": 0.21, "grad_norm": 2.024844366786342, "learning_rate": 9.159504054498964e-06, "loss": 0.37, "step": 3366 }, { "epoch": 0.21, "grad_norm": 2.676963080534276, "learning_rate": 9.158938790786955e-06, "loss": 0.3721, "step": 3367 }, { "epoch": 0.21, "grad_norm": 3.2917037062206203, "learning_rate": 9.15837335451319e-06, "loss": 0.3256, "step": 3368 }, { "epoch": 0.21, "grad_norm": 2.657558902634187, "learning_rate": 9.157807745701128e-06, "loss": 0.3164, "step": 3369 }, { "epoch": 0.21, "grad_norm": 2.6724387004788803, "learning_rate": 9.157241964374237e-06, "loss": 0.3387, "step": 3370 }, { "epoch": 0.21, "grad_norm": 4.503820732107395, "learning_rate": 9.156676010555993e-06, "loss": 0.3328, "step": 3371 }, { "epoch": 0.21, "grad_norm": 3.3457756669202174, "learning_rate": 9.15610988426988e-06, "loss": 0.3407, "step": 3372 }, { "epoch": 0.21, "grad_norm": 2.548715836200099, "learning_rate": 9.155543585539384e-06, "loss": 0.3289, "step": 3373 }, { "epoch": 0.21, "grad_norm": 3.0279051529499923, "learning_rate": 9.154977114388003e-06, "loss": 0.328, "step": 3374 }, { "epoch": 0.21, "grad_norm": 3.4596567994902307, "learning_rate": 9.154410470839243e-06, "loss": 0.3438, "step": 3375 }, { "epoch": 0.21, "grad_norm": 2.859275170217395, "learning_rate": 9.153843654916611e-06, "loss": 0.3806, "step": 3376 }, { "epoch": 0.21, "grad_norm": 4.131478239727446, "learning_rate": 9.15327666664363e-06, "loss": 0.3191, "step": 3377 }, { "epoch": 0.21, "grad_norm": 2.7706683433185906, "learning_rate": 9.152709506043823e-06, "loss": 0.3402, "step": 3378 }, { "epoch": 0.21, "grad_norm": 3.8437933644698665, "learning_rate": 9.152142173140722e-06, "loss": 0.3376, "step": 3379 }, { "epoch": 0.21, "grad_norm": 5.7778771841816985, "learning_rate": 9.151574667957868e-06, "loss": 0.3266, "step": 3380 }, { "epoch": 0.21, "grad_norm": 2.5977096609640595, "learning_rate": 9.151006990518806e-06, "loss": 0.3291, "step": 3381 }, { "epoch": 0.21, "grad_norm": 2.266092286796118, "learning_rate": 9.150439140847091e-06, "loss": 0.3347, "step": 3382 }, { "epoch": 0.21, "grad_norm": 2.818289659645148, "learning_rate": 9.149871118966285e-06, "loss": 0.3379, "step": 3383 }, { "epoch": 0.21, "grad_norm": 2.215748932020619, "learning_rate": 9.149302924899954e-06, "loss": 0.3164, "step": 3384 }, { "epoch": 0.21, "grad_norm": 5.570108372727355, "learning_rate": 9.148734558671675e-06, "loss": 0.3327, "step": 3385 }, { "epoch": 0.21, "grad_norm": 5.71923744775369, "learning_rate": 9.14816602030503e-06, "loss": 0.3755, "step": 3386 }, { "epoch": 0.21, "grad_norm": 2.4901020425627403, "learning_rate": 9.14759730982361e-06, "loss": 0.3154, "step": 3387 }, { "epoch": 0.21, "grad_norm": 2.661112022598833, "learning_rate": 9.14702842725101e-06, "loss": 0.3379, "step": 3388 }, { "epoch": 0.21, "grad_norm": 3.9211450406357264, "learning_rate": 9.146459372610834e-06, "loss": 0.3458, "step": 3389 }, { "epoch": 0.21, "grad_norm": 4.7073832687348816, "learning_rate": 9.145890145926695e-06, "loss": 0.3221, "step": 3390 }, { "epoch": 0.21, "grad_norm": 2.9851121593898338, "learning_rate": 9.145320747222208e-06, "loss": 0.3396, "step": 3391 }, { "epoch": 0.21, "grad_norm": 3.8435272556335773, "learning_rate": 9.144751176521002e-06, "loss": 0.3525, "step": 3392 }, { "epoch": 0.21, "grad_norm": 2.7941329282709573, "learning_rate": 9.144181433846707e-06, "loss": 0.3337, "step": 3393 }, { "epoch": 0.21, "grad_norm": 8.659776119343045, "learning_rate": 9.143611519222964e-06, "loss": 0.3274, "step": 3394 }, { "epoch": 0.21, "grad_norm": 7.398221496478616, "learning_rate": 9.143041432673419e-06, "loss": 0.336, "step": 3395 }, { "epoch": 0.21, "grad_norm": 5.213436868912288, "learning_rate": 9.142471174221726e-06, "loss": 0.3309, "step": 3396 }, { "epoch": 0.21, "grad_norm": 0.9721671511681025, "learning_rate": 9.141900743891546e-06, "loss": 0.5126, "step": 3397 }, { "epoch": 0.21, "grad_norm": 3.8562501935600255, "learning_rate": 9.141330141706546e-06, "loss": 0.3318, "step": 3398 }, { "epoch": 0.21, "grad_norm": 2.0973830541994762, "learning_rate": 9.140759367690404e-06, "loss": 0.3279, "step": 3399 }, { "epoch": 0.21, "grad_norm": 0.7071700560578132, "learning_rate": 9.1401884218668e-06, "loss": 0.5134, "step": 3400 }, { "epoch": 0.21, "grad_norm": 6.110391149165387, "learning_rate": 9.139617304259427e-06, "loss": 0.3299, "step": 3401 }, { "epoch": 0.21, "grad_norm": 5.5283307902572165, "learning_rate": 9.139046014891977e-06, "loss": 0.3416, "step": 3402 }, { "epoch": 0.21, "grad_norm": 4.180471716055632, "learning_rate": 9.138474553788157e-06, "loss": 0.3394, "step": 3403 }, { "epoch": 0.21, "grad_norm": 0.6996761485645479, "learning_rate": 9.137902920971678e-06, "loss": 0.5086, "step": 3404 }, { "epoch": 0.21, "grad_norm": 3.551640849621617, "learning_rate": 9.137331116466256e-06, "loss": 0.3267, "step": 3405 }, { "epoch": 0.21, "grad_norm": 3.8730634452546617, "learning_rate": 9.136759140295615e-06, "loss": 0.3462, "step": 3406 }, { "epoch": 0.21, "grad_norm": 3.467497394945697, "learning_rate": 9.136186992483492e-06, "loss": 0.3318, "step": 3407 }, { "epoch": 0.21, "grad_norm": 2.8178472384582345, "learning_rate": 9.135614673053624e-06, "loss": 0.3283, "step": 3408 }, { "epoch": 0.21, "grad_norm": 3.2951332013735337, "learning_rate": 9.135042182029757e-06, "loss": 0.3313, "step": 3409 }, { "epoch": 0.21, "grad_norm": 4.26418890154553, "learning_rate": 9.134469519435646e-06, "loss": 0.3418, "step": 3410 }, { "epoch": 0.21, "grad_norm": 4.818220638789112, "learning_rate": 9.13389668529505e-06, "loss": 0.3337, "step": 3411 }, { "epoch": 0.21, "grad_norm": 13.86822808388928, "learning_rate": 9.133323679631738e-06, "loss": 0.3174, "step": 3412 }, { "epoch": 0.21, "grad_norm": 3.109364057932612, "learning_rate": 9.132750502469485e-06, "loss": 0.3165, "step": 3413 }, { "epoch": 0.21, "grad_norm": 3.672558504126033, "learning_rate": 9.132177153832074e-06, "loss": 0.311, "step": 3414 }, { "epoch": 0.21, "grad_norm": 2.832940662614624, "learning_rate": 9.131603633743292e-06, "loss": 0.3322, "step": 3415 }, { "epoch": 0.21, "grad_norm": 2.204792192716843, "learning_rate": 9.131029942226937e-06, "loss": 0.3317, "step": 3416 }, { "epoch": 0.21, "grad_norm": 2.832418154868519, "learning_rate": 9.130456079306814e-06, "loss": 0.3822, "step": 3417 }, { "epoch": 0.21, "grad_norm": 2.5062226053206063, "learning_rate": 9.12988204500673e-06, "loss": 0.3308, "step": 3418 }, { "epoch": 0.22, "grad_norm": 3.1590126938251, "learning_rate": 9.129307839350504e-06, "loss": 0.3274, "step": 3419 }, { "epoch": 0.22, "grad_norm": 3.2142266209046824, "learning_rate": 9.128733462361963e-06, "loss": 0.3387, "step": 3420 }, { "epoch": 0.22, "grad_norm": 1.6472285423146584, "learning_rate": 9.128158914064934e-06, "loss": 0.317, "step": 3421 }, { "epoch": 0.22, "grad_norm": 3.5113775531399516, "learning_rate": 9.127584194483262e-06, "loss": 0.3489, "step": 3422 }, { "epoch": 0.22, "grad_norm": 4.943572123418341, "learning_rate": 9.12700930364079e-06, "loss": 0.319, "step": 3423 }, { "epoch": 0.22, "grad_norm": 2.8812612659283072, "learning_rate": 9.12643424156137e-06, "loss": 0.3208, "step": 3424 }, { "epoch": 0.22, "grad_norm": 2.9013054774185956, "learning_rate": 9.125859008268867e-06, "loss": 0.3423, "step": 3425 }, { "epoch": 0.22, "grad_norm": 2.544791406628796, "learning_rate": 9.125283603787142e-06, "loss": 0.3342, "step": 3426 }, { "epoch": 0.22, "grad_norm": 2.394809981961453, "learning_rate": 9.124708028140075e-06, "loss": 0.3356, "step": 3427 }, { "epoch": 0.22, "grad_norm": 3.0129142018826496, "learning_rate": 9.124132281351545e-06, "loss": 0.3326, "step": 3428 }, { "epoch": 0.22, "grad_norm": 4.7907079098252705, "learning_rate": 9.123556363445442e-06, "loss": 0.3419, "step": 3429 }, { "epoch": 0.22, "grad_norm": 2.423517417702465, "learning_rate": 9.12298027444566e-06, "loss": 0.3238, "step": 3430 }, { "epoch": 0.22, "grad_norm": 2.2696674535860373, "learning_rate": 9.122404014376104e-06, "loss": 0.341, "step": 3431 }, { "epoch": 0.22, "grad_norm": 1.8037064937903338, "learning_rate": 9.121827583260686e-06, "loss": 0.3079, "step": 3432 }, { "epoch": 0.22, "grad_norm": 2.7462740294945083, "learning_rate": 9.121250981123315e-06, "loss": 0.3332, "step": 3433 }, { "epoch": 0.22, "grad_norm": 4.9452868801654155, "learning_rate": 9.120674207987923e-06, "loss": 0.3197, "step": 3434 }, { "epoch": 0.22, "grad_norm": 2.483170471661308, "learning_rate": 9.12009726387844e-06, "loss": 0.3365, "step": 3435 }, { "epoch": 0.22, "grad_norm": 3.7605779973586477, "learning_rate": 9.119520148818804e-06, "loss": 0.311, "step": 3436 }, { "epoch": 0.22, "grad_norm": 4.403593592969136, "learning_rate": 9.118942862832958e-06, "loss": 0.3307, "step": 3437 }, { "epoch": 0.22, "grad_norm": 2.670852267455348, "learning_rate": 9.118365405944856e-06, "loss": 0.3207, "step": 3438 }, { "epoch": 0.22, "grad_norm": 9.939563130698033, "learning_rate": 9.11778777817846e-06, "loss": 0.3319, "step": 3439 }, { "epoch": 0.22, "grad_norm": 3.3762713484256435, "learning_rate": 9.117209979557734e-06, "loss": 0.3312, "step": 3440 }, { "epoch": 0.22, "grad_norm": 2.9976177286620933, "learning_rate": 9.116632010106654e-06, "loss": 0.3313, "step": 3441 }, { "epoch": 0.22, "grad_norm": 2.2074607433805937, "learning_rate": 9.116053869849198e-06, "loss": 0.3562, "step": 3442 }, { "epoch": 0.22, "grad_norm": 3.0660503344787537, "learning_rate": 9.115475558809358e-06, "loss": 0.3273, "step": 3443 }, { "epoch": 0.22, "grad_norm": 10.01375776371647, "learning_rate": 9.114897077011128e-06, "loss": 0.3725, "step": 3444 }, { "epoch": 0.22, "grad_norm": 2.6250311791907452, "learning_rate": 9.114318424478506e-06, "loss": 0.3419, "step": 3445 }, { "epoch": 0.22, "grad_norm": 3.186364353903956, "learning_rate": 9.113739601235508e-06, "loss": 0.3446, "step": 3446 }, { "epoch": 0.22, "grad_norm": 3.3887747665661734, "learning_rate": 9.113160607306143e-06, "loss": 0.3601, "step": 3447 }, { "epoch": 0.22, "grad_norm": 2.2462392757443417, "learning_rate": 9.112581442714443e-06, "loss": 0.3427, "step": 3448 }, { "epoch": 0.22, "grad_norm": 2.2132050586897387, "learning_rate": 9.112002107484433e-06, "loss": 0.2929, "step": 3449 }, { "epoch": 0.22, "grad_norm": 5.778137670186096, "learning_rate": 9.111422601640151e-06, "loss": 0.3448, "step": 3450 }, { "epoch": 0.22, "grad_norm": 0.9218415945345637, "learning_rate": 9.110842925205642e-06, "loss": 0.5113, "step": 3451 }, { "epoch": 0.22, "grad_norm": 3.3952053390492414, "learning_rate": 9.11026307820496e-06, "loss": 0.3367, "step": 3452 }, { "epoch": 0.22, "grad_norm": 8.006458909704897, "learning_rate": 9.10968306066216e-06, "loss": 0.3262, "step": 3453 }, { "epoch": 0.22, "grad_norm": 2.5686362626453807, "learning_rate": 9.109102872601312e-06, "loss": 0.3307, "step": 3454 }, { "epoch": 0.22, "grad_norm": 2.2922075069105805, "learning_rate": 9.108522514046487e-06, "loss": 0.3395, "step": 3455 }, { "epoch": 0.22, "grad_norm": 4.548510923432122, "learning_rate": 9.107941985021765e-06, "loss": 0.3599, "step": 3456 }, { "epoch": 0.22, "grad_norm": 3.0702866979160897, "learning_rate": 9.107361285551236e-06, "loss": 0.3282, "step": 3457 }, { "epoch": 0.22, "grad_norm": 3.821491197807495, "learning_rate": 9.106780415658989e-06, "loss": 0.3466, "step": 3458 }, { "epoch": 0.22, "grad_norm": 2.2321418725937776, "learning_rate": 9.106199375369127e-06, "loss": 0.3603, "step": 3459 }, { "epoch": 0.22, "grad_norm": 0.6049074226834605, "learning_rate": 9.10561816470576e-06, "loss": 0.4935, "step": 3460 }, { "epoch": 0.22, "grad_norm": 5.659252169270551, "learning_rate": 9.105036783693006e-06, "loss": 0.3379, "step": 3461 }, { "epoch": 0.22, "grad_norm": 3.256271392693812, "learning_rate": 9.104455232354982e-06, "loss": 0.3505, "step": 3462 }, { "epoch": 0.22, "grad_norm": 3.5900481103157382, "learning_rate": 9.10387351071582e-06, "loss": 0.334, "step": 3463 }, { "epoch": 0.22, "grad_norm": 2.7640226305940168, "learning_rate": 9.103291618799657e-06, "loss": 0.3398, "step": 3464 }, { "epoch": 0.22, "grad_norm": 7.739483356634692, "learning_rate": 9.102709556630639e-06, "loss": 0.3349, "step": 3465 }, { "epoch": 0.22, "grad_norm": 2.902883040974735, "learning_rate": 9.10212732423291e-06, "loss": 0.342, "step": 3466 }, { "epoch": 0.22, "grad_norm": 5.323800145239479, "learning_rate": 9.101544921630634e-06, "loss": 0.3215, "step": 3467 }, { "epoch": 0.22, "grad_norm": 16.354977132274946, "learning_rate": 9.100962348847974e-06, "loss": 0.3277, "step": 3468 }, { "epoch": 0.22, "grad_norm": 15.436653894832212, "learning_rate": 9.100379605909102e-06, "loss": 0.3378, "step": 3469 }, { "epoch": 0.22, "grad_norm": 2.5648961244173885, "learning_rate": 9.099796692838198e-06, "loss": 0.3047, "step": 3470 }, { "epoch": 0.22, "grad_norm": 102.54707431872112, "learning_rate": 9.099213609659448e-06, "loss": 0.3161, "step": 3471 }, { "epoch": 0.22, "grad_norm": 10.952786479893375, "learning_rate": 9.09863035639704e-06, "loss": 0.3645, "step": 3472 }, { "epoch": 0.22, "grad_norm": 6.18717403910133, "learning_rate": 9.098046933075182e-06, "loss": 0.3426, "step": 3473 }, { "epoch": 0.22, "grad_norm": 3.055273260646196, "learning_rate": 9.097463339718077e-06, "loss": 0.3182, "step": 3474 }, { "epoch": 0.22, "grad_norm": 3.137222206697158, "learning_rate": 9.09687957634994e-06, "loss": 0.3281, "step": 3475 }, { "epoch": 0.22, "grad_norm": 2.610557892693063, "learning_rate": 9.096295642994993e-06, "loss": 0.3319, "step": 3476 }, { "epoch": 0.22, "grad_norm": 7.246057627288268, "learning_rate": 9.095711539677464e-06, "loss": 0.3434, "step": 3477 }, { "epoch": 0.22, "grad_norm": 2.653807832279, "learning_rate": 9.095127266421589e-06, "loss": 0.3567, "step": 3478 }, { "epoch": 0.22, "grad_norm": 2.5642670646223147, "learning_rate": 9.094542823251609e-06, "loss": 0.3241, "step": 3479 }, { "epoch": 0.22, "grad_norm": 2.681253858790611, "learning_rate": 9.093958210191773e-06, "loss": 0.3339, "step": 3480 }, { "epoch": 0.22, "grad_norm": 10.795870686069218, "learning_rate": 9.09337342726634e-06, "loss": 0.3071, "step": 3481 }, { "epoch": 0.22, "grad_norm": 0.649140128270586, "learning_rate": 9.092788474499574e-06, "loss": 0.5054, "step": 3482 }, { "epoch": 0.22, "grad_norm": 5.564914198833503, "learning_rate": 9.092203351915744e-06, "loss": 0.3149, "step": 3483 }, { "epoch": 0.22, "grad_norm": 4.054874636347387, "learning_rate": 9.091618059539129e-06, "loss": 0.3367, "step": 3484 }, { "epoch": 0.22, "grad_norm": 4.561306503066033, "learning_rate": 9.091032597394012e-06, "loss": 0.333, "step": 3485 }, { "epoch": 0.22, "grad_norm": 4.352116762755251, "learning_rate": 9.090446965504687e-06, "loss": 0.3292, "step": 3486 }, { "epoch": 0.22, "grad_norm": 3.6444201086478323, "learning_rate": 9.089861163895453e-06, "loss": 0.3343, "step": 3487 }, { "epoch": 0.22, "grad_norm": 11.831919652606544, "learning_rate": 9.089275192590613e-06, "loss": 0.3517, "step": 3488 }, { "epoch": 0.22, "grad_norm": 3.5929553608429483, "learning_rate": 9.088689051614483e-06, "loss": 0.3397, "step": 3489 }, { "epoch": 0.22, "grad_norm": 3.6375140527785934, "learning_rate": 9.08810274099138e-06, "loss": 0.3196, "step": 3490 }, { "epoch": 0.22, "grad_norm": 2.36569335735749, "learning_rate": 9.087516260745635e-06, "loss": 0.3104, "step": 3491 }, { "epoch": 0.22, "grad_norm": 3.002990115600427, "learning_rate": 9.08692961090158e-06, "loss": 0.3313, "step": 3492 }, { "epoch": 0.22, "grad_norm": 3.842323738404551, "learning_rate": 9.086342791483555e-06, "loss": 0.3316, "step": 3493 }, { "epoch": 0.22, "grad_norm": 3.5964259177477587, "learning_rate": 9.08575580251591e-06, "loss": 0.3253, "step": 3494 }, { "epoch": 0.22, "grad_norm": 3.552320116874668, "learning_rate": 9.085168644022999e-06, "loss": 0.326, "step": 3495 }, { "epoch": 0.22, "grad_norm": 2.7945823372035328, "learning_rate": 9.084581316029186e-06, "loss": 0.3226, "step": 3496 }, { "epoch": 0.22, "grad_norm": 8.597765238121262, "learning_rate": 9.08399381855884e-06, "loss": 0.3434, "step": 3497 }, { "epoch": 0.22, "grad_norm": 3.3106496510742263, "learning_rate": 9.083406151636334e-06, "loss": 0.3027, "step": 3498 }, { "epoch": 0.22, "grad_norm": 2.31908087309126, "learning_rate": 9.082818315286054e-06, "loss": 0.34, "step": 3499 }, { "epoch": 0.22, "grad_norm": 3.601157452757643, "learning_rate": 9.082230309532393e-06, "loss": 0.3334, "step": 3500 }, { "epoch": 0.22, "grad_norm": 4.223736593445799, "learning_rate": 9.081642134399744e-06, "loss": 0.338, "step": 3501 }, { "epoch": 0.22, "grad_norm": 6.344115079313927, "learning_rate": 9.081053789912513e-06, "loss": 0.3377, "step": 3502 }, { "epoch": 0.22, "grad_norm": 4.746027179157278, "learning_rate": 9.080465276095112e-06, "loss": 0.3448, "step": 3503 }, { "epoch": 0.22, "grad_norm": 4.61044875039412, "learning_rate": 9.079876592971957e-06, "loss": 0.357, "step": 3504 }, { "epoch": 0.22, "grad_norm": 2.74052503939075, "learning_rate": 9.079287740567478e-06, "loss": 0.3373, "step": 3505 }, { "epoch": 0.22, "grad_norm": 3.1027324826146008, "learning_rate": 9.078698718906103e-06, "loss": 0.3181, "step": 3506 }, { "epoch": 0.22, "grad_norm": 0.6315058772643531, "learning_rate": 9.078109528012274e-06, "loss": 0.5071, "step": 3507 }, { "epoch": 0.22, "grad_norm": 3.2257699242738753, "learning_rate": 9.077520167910438e-06, "loss": 0.3336, "step": 3508 }, { "epoch": 0.22, "grad_norm": 2.594941072026912, "learning_rate": 9.076930638625047e-06, "loss": 0.3089, "step": 3509 }, { "epoch": 0.22, "grad_norm": 5.551073750864412, "learning_rate": 9.076340940180563e-06, "loss": 0.3648, "step": 3510 }, { "epoch": 0.22, "grad_norm": 9.045605593319252, "learning_rate": 9.075751072601453e-06, "loss": 0.3124, "step": 3511 }, { "epoch": 0.22, "grad_norm": 5.974039112999473, "learning_rate": 9.07516103591219e-06, "loss": 0.3474, "step": 3512 }, { "epoch": 0.22, "grad_norm": 3.7088514065386926, "learning_rate": 9.07457083013726e-06, "loss": 0.3248, "step": 3513 }, { "epoch": 0.22, "grad_norm": 4.516938561201477, "learning_rate": 9.073980455301148e-06, "loss": 0.3181, "step": 3514 }, { "epoch": 0.22, "grad_norm": 5.129209581638368, "learning_rate": 9.07338991142835e-06, "loss": 0.3183, "step": 3515 }, { "epoch": 0.22, "grad_norm": 2.8965342970940835, "learning_rate": 9.072799198543369e-06, "loss": 0.3116, "step": 3516 }, { "epoch": 0.22, "grad_norm": 9.422203672342523, "learning_rate": 9.072208316670716e-06, "loss": 0.3219, "step": 3517 }, { "epoch": 0.22, "grad_norm": 3.006018969271595, "learning_rate": 9.071617265834907e-06, "loss": 0.3378, "step": 3518 }, { "epoch": 0.22, "grad_norm": 3.4732222221762674, "learning_rate": 9.071026046060465e-06, "loss": 0.3313, "step": 3519 }, { "epoch": 0.22, "grad_norm": 3.5821227700492675, "learning_rate": 9.070434657371923e-06, "loss": 0.3191, "step": 3520 }, { "epoch": 0.22, "grad_norm": 3.1388317281603095, "learning_rate": 9.069843099793815e-06, "loss": 0.3446, "step": 3521 }, { "epoch": 0.22, "grad_norm": 3.4189128178981054, "learning_rate": 9.069251373350689e-06, "loss": 0.3223, "step": 3522 }, { "epoch": 0.22, "grad_norm": 5.516764703725452, "learning_rate": 9.068659478067096e-06, "loss": 0.3122, "step": 3523 }, { "epoch": 0.22, "grad_norm": 5.839666827460726, "learning_rate": 9.068067413967594e-06, "loss": 0.3333, "step": 3524 }, { "epoch": 0.22, "grad_norm": 5.475461679296307, "learning_rate": 9.067475181076751e-06, "loss": 0.3266, "step": 3525 }, { "epoch": 0.22, "grad_norm": 6.280856693851178, "learning_rate": 9.066882779419135e-06, "loss": 0.3116, "step": 3526 }, { "epoch": 0.22, "grad_norm": 4.340628060280135, "learning_rate": 9.066290209019331e-06, "loss": 0.3141, "step": 3527 }, { "epoch": 0.22, "grad_norm": 14.84375309229679, "learning_rate": 9.065697469901923e-06, "loss": 0.3334, "step": 3528 }, { "epoch": 0.22, "grad_norm": 3.915250938557479, "learning_rate": 9.065104562091506e-06, "loss": 0.3349, "step": 3529 }, { "epoch": 0.22, "grad_norm": 3.2244600292329144, "learning_rate": 9.064511485612679e-06, "loss": 0.3215, "step": 3530 }, { "epoch": 0.22, "grad_norm": 4.027082296129365, "learning_rate": 9.063918240490052e-06, "loss": 0.321, "step": 3531 }, { "epoch": 0.22, "grad_norm": 2.5432314854030387, "learning_rate": 9.063324826748239e-06, "loss": 0.3452, "step": 3532 }, { "epoch": 0.22, "grad_norm": 7.511747029055402, "learning_rate": 9.062731244411862e-06, "loss": 0.3438, "step": 3533 }, { "epoch": 0.22, "grad_norm": 8.897502735314697, "learning_rate": 9.062137493505548e-06, "loss": 0.3331, "step": 3534 }, { "epoch": 0.22, "grad_norm": 2.7969701159775067, "learning_rate": 9.061543574053936e-06, "loss": 0.3351, "step": 3535 }, { "epoch": 0.22, "grad_norm": 3.9887297670828126, "learning_rate": 9.060949486081665e-06, "loss": 0.3247, "step": 3536 }, { "epoch": 0.22, "grad_norm": 3.526286054079545, "learning_rate": 9.060355229613389e-06, "loss": 0.3153, "step": 3537 }, { "epoch": 0.22, "grad_norm": 3.94897289070156, "learning_rate": 9.059760804673761e-06, "loss": 0.355, "step": 3538 }, { "epoch": 0.22, "grad_norm": 12.96042756630533, "learning_rate": 9.059166211287447e-06, "loss": 0.328, "step": 3539 }, { "epoch": 0.22, "grad_norm": 2.2121761861262934, "learning_rate": 9.058571449479117e-06, "loss": 0.3284, "step": 3540 }, { "epoch": 0.22, "grad_norm": 8.092016249111747, "learning_rate": 9.057976519273448e-06, "loss": 0.3637, "step": 3541 }, { "epoch": 0.22, "grad_norm": 3.3464536840370003, "learning_rate": 9.057381420695126e-06, "loss": 0.3295, "step": 3542 }, { "epoch": 0.22, "grad_norm": 2.5024058788918464, "learning_rate": 9.056786153768841e-06, "loss": 0.3358, "step": 3543 }, { "epoch": 0.22, "grad_norm": 4.243955832177578, "learning_rate": 9.056190718519295e-06, "loss": 0.3442, "step": 3544 }, { "epoch": 0.22, "grad_norm": 3.5049918892965763, "learning_rate": 9.05559511497119e-06, "loss": 0.3402, "step": 3545 }, { "epoch": 0.22, "grad_norm": 3.46510198512261, "learning_rate": 9.05499934314924e-06, "loss": 0.3071, "step": 3546 }, { "epoch": 0.22, "grad_norm": 13.90580788485331, "learning_rate": 9.054403403078164e-06, "loss": 0.3401, "step": 3547 }, { "epoch": 0.22, "grad_norm": 10.320993086896735, "learning_rate": 9.053807294782692e-06, "loss": 0.3142, "step": 3548 }, { "epoch": 0.22, "grad_norm": 2.7345491157967383, "learning_rate": 9.053211018287553e-06, "loss": 0.3236, "step": 3549 }, { "epoch": 0.22, "grad_norm": 3.879192789824122, "learning_rate": 9.05261457361749e-06, "loss": 0.3221, "step": 3550 }, { "epoch": 0.22, "grad_norm": 2.852462718096176, "learning_rate": 9.05201796079725e-06, "loss": 0.338, "step": 3551 }, { "epoch": 0.22, "grad_norm": 7.012162404812537, "learning_rate": 9.051421179851588e-06, "loss": 0.3126, "step": 3552 }, { "epoch": 0.22, "grad_norm": 3.3622073026504316, "learning_rate": 9.050824230805266e-06, "loss": 0.3185, "step": 3553 }, { "epoch": 0.22, "grad_norm": 4.122895476222348, "learning_rate": 9.05022711368305e-06, "loss": 0.3386, "step": 3554 }, { "epoch": 0.22, "grad_norm": 4.988696057976078, "learning_rate": 9.049629828509719e-06, "loss": 0.3559, "step": 3555 }, { "epoch": 0.22, "grad_norm": 2.4607113623648877, "learning_rate": 9.04903237531005e-06, "loss": 0.3178, "step": 3556 }, { "epoch": 0.22, "grad_norm": 10.988982176083395, "learning_rate": 9.04843475410884e-06, "loss": 0.3264, "step": 3557 }, { "epoch": 0.22, "grad_norm": 4.371250697174492, "learning_rate": 9.047836964930877e-06, "loss": 0.3374, "step": 3558 }, { "epoch": 0.22, "grad_norm": 3.8809763407763955, "learning_rate": 9.047239007800972e-06, "loss": 0.3463, "step": 3559 }, { "epoch": 0.22, "grad_norm": 6.393894186510673, "learning_rate": 9.04664088274393e-06, "loss": 0.3185, "step": 3560 }, { "epoch": 0.22, "grad_norm": 2.970437004367653, "learning_rate": 9.04604258978457e-06, "loss": 0.3444, "step": 3561 }, { "epoch": 0.22, "grad_norm": 4.156822946079777, "learning_rate": 9.045444128947719e-06, "loss": 0.3195, "step": 3562 }, { "epoch": 0.22, "grad_norm": 5.363840755745807, "learning_rate": 9.0448455002582e-06, "loss": 0.3487, "step": 3563 }, { "epoch": 0.22, "grad_norm": 3.9172663491165216, "learning_rate": 9.044246703740863e-06, "loss": 0.3386, "step": 3564 }, { "epoch": 0.22, "grad_norm": 7.385849456951373, "learning_rate": 9.043647739420543e-06, "loss": 0.3249, "step": 3565 }, { "epoch": 0.22, "grad_norm": 5.663101178950992, "learning_rate": 9.043048607322097e-06, "loss": 0.3222, "step": 3566 }, { "epoch": 0.22, "grad_norm": 6.854934296067239, "learning_rate": 9.042449307470384e-06, "loss": 0.3484, "step": 3567 }, { "epoch": 0.22, "grad_norm": 4.6198768133298485, "learning_rate": 9.041849839890267e-06, "loss": 0.34, "step": 3568 }, { "epoch": 0.22, "grad_norm": 4.411133402728843, "learning_rate": 9.041250204606623e-06, "loss": 0.3426, "step": 3569 }, { "epoch": 0.22, "grad_norm": 8.885057626932522, "learning_rate": 9.040650401644329e-06, "loss": 0.3252, "step": 3570 }, { "epoch": 0.22, "grad_norm": 4.09092358706227, "learning_rate": 9.040050431028273e-06, "loss": 0.3179, "step": 3571 }, { "epoch": 0.22, "grad_norm": 5.794735501343648, "learning_rate": 9.039450292783349e-06, "loss": 0.3531, "step": 3572 }, { "epoch": 0.22, "grad_norm": 7.384471046150093, "learning_rate": 9.038849986934457e-06, "loss": 0.3264, "step": 3573 }, { "epoch": 0.22, "grad_norm": 2.685652594496571, "learning_rate": 9.038249513506506e-06, "loss": 0.3161, "step": 3574 }, { "epoch": 0.22, "grad_norm": 2.6532738353491947, "learning_rate": 9.03764887252441e-06, "loss": 0.3322, "step": 3575 }, { "epoch": 0.22, "grad_norm": 6.452544866358713, "learning_rate": 9.037048064013088e-06, "loss": 0.3356, "step": 3576 }, { "epoch": 0.22, "grad_norm": 24.05446308649078, "learning_rate": 9.036447087997473e-06, "loss": 0.3459, "step": 3577 }, { "epoch": 0.23, "grad_norm": 16.84714724386355, "learning_rate": 9.0358459445025e-06, "loss": 0.3222, "step": 3578 }, { "epoch": 0.23, "grad_norm": 7.087490678264344, "learning_rate": 9.035244633553109e-06, "loss": 0.3316, "step": 3579 }, { "epoch": 0.23, "grad_norm": 3.6993876837380792, "learning_rate": 9.034643155174251e-06, "loss": 0.3244, "step": 3580 }, { "epoch": 0.23, "grad_norm": 3.8286101056562893, "learning_rate": 9.034041509390884e-06, "loss": 0.3337, "step": 3581 }, { "epoch": 0.23, "grad_norm": 7.402972408534468, "learning_rate": 9.033439696227966e-06, "loss": 0.3333, "step": 3582 }, { "epoch": 0.23, "grad_norm": 5.200789197971586, "learning_rate": 9.032837715710472e-06, "loss": 0.3221, "step": 3583 }, { "epoch": 0.23, "grad_norm": 3.571183544346358, "learning_rate": 9.032235567863379e-06, "loss": 0.328, "step": 3584 }, { "epoch": 0.23, "grad_norm": 0.6601985659672188, "learning_rate": 9.031633252711669e-06, "loss": 0.5005, "step": 3585 }, { "epoch": 0.23, "grad_norm": 3.1690557004051634, "learning_rate": 9.031030770280335e-06, "loss": 0.3284, "step": 3586 }, { "epoch": 0.23, "grad_norm": 10.490221960830418, "learning_rate": 9.030428120594375e-06, "loss": 0.3033, "step": 3587 }, { "epoch": 0.23, "grad_norm": 4.145002496929343, "learning_rate": 9.029825303678794e-06, "loss": 0.325, "step": 3588 }, { "epoch": 0.23, "grad_norm": 3.538829016865508, "learning_rate": 9.029222319558602e-06, "loss": 0.3326, "step": 3589 }, { "epoch": 0.23, "grad_norm": 0.6067011415421141, "learning_rate": 9.028619168258818e-06, "loss": 0.5025, "step": 3590 }, { "epoch": 0.23, "grad_norm": 4.701706051279161, "learning_rate": 9.028015849804473e-06, "loss": 0.334, "step": 3591 }, { "epoch": 0.23, "grad_norm": 4.2478422724832585, "learning_rate": 9.027412364220592e-06, "loss": 0.3281, "step": 3592 }, { "epoch": 0.23, "grad_norm": 8.079626237912594, "learning_rate": 9.02680871153222e-06, "loss": 0.3066, "step": 3593 }, { "epoch": 0.23, "grad_norm": 0.6004598064573669, "learning_rate": 9.026204891764402e-06, "loss": 0.5165, "step": 3594 }, { "epoch": 0.23, "grad_norm": 5.2952808171753825, "learning_rate": 9.025600904942192e-06, "loss": 0.3151, "step": 3595 }, { "epoch": 0.23, "grad_norm": 3.4729453103217804, "learning_rate": 9.024996751090652e-06, "loss": 0.2955, "step": 3596 }, { "epoch": 0.23, "grad_norm": 4.162442025468631, "learning_rate": 9.024392430234843e-06, "loss": 0.3127, "step": 3597 }, { "epoch": 0.23, "grad_norm": 5.857648722786493, "learning_rate": 9.023787942399847e-06, "loss": 0.3066, "step": 3598 }, { "epoch": 0.23, "grad_norm": 3.573597314277269, "learning_rate": 9.023183287610742e-06, "loss": 0.3216, "step": 3599 }, { "epoch": 0.23, "grad_norm": 2.7757878378595167, "learning_rate": 9.022578465892616e-06, "loss": 0.3224, "step": 3600 }, { "epoch": 0.23, "grad_norm": 8.496563184280213, "learning_rate": 9.021973477270564e-06, "loss": 0.3266, "step": 3601 }, { "epoch": 0.23, "grad_norm": 3.343982204643135, "learning_rate": 9.02136832176969e-06, "loss": 0.3165, "step": 3602 }, { "epoch": 0.23, "grad_norm": 2.9868406128698726, "learning_rate": 9.0207629994151e-06, "loss": 0.3195, "step": 3603 }, { "epoch": 0.23, "grad_norm": 3.8329817214190003, "learning_rate": 9.020157510231913e-06, "loss": 0.3146, "step": 3604 }, { "epoch": 0.23, "grad_norm": 4.514187088615563, "learning_rate": 9.019551854245252e-06, "loss": 0.3298, "step": 3605 }, { "epoch": 0.23, "grad_norm": 8.818872750892389, "learning_rate": 9.018946031480242e-06, "loss": 0.3017, "step": 3606 }, { "epoch": 0.23, "grad_norm": 8.50794779055897, "learning_rate": 9.018340041962023e-06, "loss": 0.3221, "step": 3607 }, { "epoch": 0.23, "grad_norm": 0.6856148215037243, "learning_rate": 9.017733885715738e-06, "loss": 0.5393, "step": 3608 }, { "epoch": 0.23, "grad_norm": 3.187059545194669, "learning_rate": 9.01712756276654e-06, "loss": 0.3584, "step": 3609 }, { "epoch": 0.23, "grad_norm": 2.237515072571624, "learning_rate": 9.01652107313958e-06, "loss": 0.3429, "step": 3610 }, { "epoch": 0.23, "grad_norm": 7.8302884012032035, "learning_rate": 9.01591441686003e-06, "loss": 0.3252, "step": 3611 }, { "epoch": 0.23, "grad_norm": 3.8054598641119983, "learning_rate": 9.015307593953058e-06, "loss": 0.3379, "step": 3612 }, { "epoch": 0.23, "grad_norm": 2.8162869027273643, "learning_rate": 9.014700604443841e-06, "loss": 0.3201, "step": 3613 }, { "epoch": 0.23, "grad_norm": 3.565828862736785, "learning_rate": 9.014093448357565e-06, "loss": 0.3054, "step": 3614 }, { "epoch": 0.23, "grad_norm": 4.235441939588098, "learning_rate": 9.013486125719421e-06, "loss": 0.3294, "step": 3615 }, { "epoch": 0.23, "grad_norm": 4.50756348779826, "learning_rate": 9.012878636554612e-06, "loss": 0.3289, "step": 3616 }, { "epoch": 0.23, "grad_norm": 4.150400408841957, "learning_rate": 9.012270980888339e-06, "loss": 0.324, "step": 3617 }, { "epoch": 0.23, "grad_norm": 5.8499623137831405, "learning_rate": 9.011663158745815e-06, "loss": 0.3419, "step": 3618 }, { "epoch": 0.23, "grad_norm": 7.099733398191777, "learning_rate": 9.011055170152262e-06, "loss": 0.318, "step": 3619 }, { "epoch": 0.23, "grad_norm": 3.0350593646917723, "learning_rate": 9.010447015132906e-06, "loss": 0.3322, "step": 3620 }, { "epoch": 0.23, "grad_norm": 6.38263128775083, "learning_rate": 9.009838693712977e-06, "loss": 0.3161, "step": 3621 }, { "epoch": 0.23, "grad_norm": 4.93563306779475, "learning_rate": 9.009230205917723e-06, "loss": 0.347, "step": 3622 }, { "epoch": 0.23, "grad_norm": 2.199331672094639, "learning_rate": 9.008621551772384e-06, "loss": 0.2994, "step": 3623 }, { "epoch": 0.23, "grad_norm": 4.090170592346973, "learning_rate": 9.008012731302218e-06, "loss": 0.3171, "step": 3624 }, { "epoch": 0.23, "grad_norm": 0.6784359594335939, "learning_rate": 9.007403744532483e-06, "loss": 0.507, "step": 3625 }, { "epoch": 0.23, "grad_norm": 14.347774684910878, "learning_rate": 9.00679459148845e-06, "loss": 0.3225, "step": 3626 }, { "epoch": 0.23, "grad_norm": 5.125561745303137, "learning_rate": 9.006185272195392e-06, "loss": 0.3219, "step": 3627 }, { "epoch": 0.23, "grad_norm": 0.6468054495308965, "learning_rate": 9.005575786678591e-06, "loss": 0.5253, "step": 3628 }, { "epoch": 0.23, "grad_norm": 12.800250621248189, "learning_rate": 9.004966134963336e-06, "loss": 0.3316, "step": 3629 }, { "epoch": 0.23, "grad_norm": 4.103353805935026, "learning_rate": 9.004356317074923e-06, "loss": 0.3657, "step": 3630 }, { "epoch": 0.23, "grad_norm": 4.928793129887084, "learning_rate": 9.003746333038654e-06, "loss": 0.3305, "step": 3631 }, { "epoch": 0.23, "grad_norm": 3.8442002566843962, "learning_rate": 9.003136182879836e-06, "loss": 0.3017, "step": 3632 }, { "epoch": 0.23, "grad_norm": 5.138988018592437, "learning_rate": 9.00252586662379e-06, "loss": 0.3098, "step": 3633 }, { "epoch": 0.23, "grad_norm": 3.17121237081852, "learning_rate": 9.001915384295836e-06, "loss": 0.3109, "step": 3634 }, { "epoch": 0.23, "grad_norm": 6.181123988311583, "learning_rate": 9.001304735921304e-06, "loss": 0.3384, "step": 3635 }, { "epoch": 0.23, "grad_norm": 3.976924379649157, "learning_rate": 9.000693921525532e-06, "loss": 0.3378, "step": 3636 }, { "epoch": 0.23, "grad_norm": 23.55450110106337, "learning_rate": 9.000082941133864e-06, "loss": 0.3183, "step": 3637 }, { "epoch": 0.23, "grad_norm": 5.263468258046588, "learning_rate": 8.999471794771648e-06, "loss": 0.3129, "step": 3638 }, { "epoch": 0.23, "grad_norm": 3.2530543059965544, "learning_rate": 8.998860482464243e-06, "loss": 0.3411, "step": 3639 }, { "epoch": 0.23, "grad_norm": 3.108798511236559, "learning_rate": 8.998249004237015e-06, "loss": 0.3337, "step": 3640 }, { "epoch": 0.23, "grad_norm": 4.657061460978529, "learning_rate": 8.997637360115334e-06, "loss": 0.3208, "step": 3641 }, { "epoch": 0.23, "grad_norm": 4.294467745471402, "learning_rate": 8.99702555012458e-06, "loss": 0.3128, "step": 3642 }, { "epoch": 0.23, "grad_norm": 12.162573474260642, "learning_rate": 8.996413574290132e-06, "loss": 0.3148, "step": 3643 }, { "epoch": 0.23, "grad_norm": 3.229211969470677, "learning_rate": 8.99580143263739e-06, "loss": 0.3322, "step": 3644 }, { "epoch": 0.23, "grad_norm": 3.3248378481328436, "learning_rate": 8.995189125191747e-06, "loss": 0.3311, "step": 3645 }, { "epoch": 0.23, "grad_norm": 5.46492374513933, "learning_rate": 8.994576651978613e-06, "loss": 0.3179, "step": 3646 }, { "epoch": 0.23, "grad_norm": 3.584422035067047, "learning_rate": 8.993964013023398e-06, "loss": 0.3129, "step": 3647 }, { "epoch": 0.23, "grad_norm": 0.8053558546913938, "learning_rate": 8.993351208351521e-06, "loss": 0.4946, "step": 3648 }, { "epoch": 0.23, "grad_norm": 2.749542234067599, "learning_rate": 8.99273823798841e-06, "loss": 0.3234, "step": 3649 }, { "epoch": 0.23, "grad_norm": 4.943498660557402, "learning_rate": 8.992125101959499e-06, "loss": 0.3296, "step": 3650 }, { "epoch": 0.23, "grad_norm": 5.275118711234532, "learning_rate": 8.991511800290224e-06, "loss": 0.351, "step": 3651 }, { "epoch": 0.23, "grad_norm": 3.236552152549496, "learning_rate": 8.990898333006038e-06, "loss": 0.3288, "step": 3652 }, { "epoch": 0.23, "grad_norm": 2.566987854465521, "learning_rate": 8.990284700132388e-06, "loss": 0.3435, "step": 3653 }, { "epoch": 0.23, "grad_norm": 4.753969343465564, "learning_rate": 8.98967090169474e-06, "loss": 0.3323, "step": 3654 }, { "epoch": 0.23, "grad_norm": 4.09232575087877, "learning_rate": 8.98905693771856e-06, "loss": 0.3177, "step": 3655 }, { "epoch": 0.23, "grad_norm": 13.73294460088611, "learning_rate": 8.988442808229321e-06, "loss": 0.3312, "step": 3656 }, { "epoch": 0.23, "grad_norm": 3.124225989766791, "learning_rate": 8.987828513252508e-06, "loss": 0.3328, "step": 3657 }, { "epoch": 0.23, "grad_norm": 2.6993878960059834, "learning_rate": 8.987214052813605e-06, "loss": 0.321, "step": 3658 }, { "epoch": 0.23, "grad_norm": 3.571417924358675, "learning_rate": 8.98659942693811e-06, "loss": 0.3178, "step": 3659 }, { "epoch": 0.23, "grad_norm": 4.062422683062402, "learning_rate": 8.985984635651523e-06, "loss": 0.3215, "step": 3660 }, { "epoch": 0.23, "grad_norm": 8.53089332379965, "learning_rate": 8.985369678979351e-06, "loss": 0.3241, "step": 3661 }, { "epoch": 0.23, "grad_norm": 2.9476753349579776, "learning_rate": 8.984754556947116e-06, "loss": 0.3145, "step": 3662 }, { "epoch": 0.23, "grad_norm": 3.5988612497606813, "learning_rate": 8.984139269580337e-06, "loss": 0.3628, "step": 3663 }, { "epoch": 0.23, "grad_norm": 2.463845682359028, "learning_rate": 8.98352381690454e-06, "loss": 0.328, "step": 3664 }, { "epoch": 0.23, "grad_norm": 1.9701219402967818, "learning_rate": 8.982908198945266e-06, "loss": 0.3132, "step": 3665 }, { "epoch": 0.23, "grad_norm": 6.189989455839819, "learning_rate": 8.982292415728057e-06, "loss": 0.3458, "step": 3666 }, { "epoch": 0.23, "grad_norm": 2.1810603676710274, "learning_rate": 8.981676467278461e-06, "loss": 0.309, "step": 3667 }, { "epoch": 0.23, "grad_norm": 4.188915244416057, "learning_rate": 8.981060353622037e-06, "loss": 0.3099, "step": 3668 }, { "epoch": 0.23, "grad_norm": 0.8310137010128612, "learning_rate": 8.980444074784347e-06, "loss": 0.5004, "step": 3669 }, { "epoch": 0.23, "grad_norm": 2.259419586692642, "learning_rate": 8.979827630790962e-06, "loss": 0.3066, "step": 3670 }, { "epoch": 0.23, "grad_norm": 0.6841209023908836, "learning_rate": 8.979211021667462e-06, "loss": 0.4819, "step": 3671 }, { "epoch": 0.23, "grad_norm": 2.6469296979890933, "learning_rate": 8.978594247439428e-06, "loss": 0.3148, "step": 3672 }, { "epoch": 0.23, "grad_norm": 94.27647612541318, "learning_rate": 8.977977308132451e-06, "loss": 0.3161, "step": 3673 }, { "epoch": 0.23, "grad_norm": 3.418441586277652, "learning_rate": 8.97736020377213e-06, "loss": 0.3237, "step": 3674 }, { "epoch": 0.23, "grad_norm": 2.2618596391224317, "learning_rate": 8.976742934384069e-06, "loss": 0.3235, "step": 3675 }, { "epoch": 0.23, "grad_norm": 9.700137943136939, "learning_rate": 8.97612549999388e-06, "loss": 0.328, "step": 3676 }, { "epoch": 0.23, "grad_norm": 12.443272284273608, "learning_rate": 8.975507900627183e-06, "loss": 0.3398, "step": 3677 }, { "epoch": 0.23, "grad_norm": 3.320011875026783, "learning_rate": 8.9748901363096e-06, "loss": 0.3062, "step": 3678 }, { "epoch": 0.23, "grad_norm": 3.802621666852393, "learning_rate": 8.974272207066767e-06, "loss": 0.3297, "step": 3679 }, { "epoch": 0.23, "grad_norm": 6.487813321902356, "learning_rate": 8.973654112924321e-06, "loss": 0.3389, "step": 3680 }, { "epoch": 0.23, "grad_norm": 3.811909074531975, "learning_rate": 8.973035853907906e-06, "loss": 0.3192, "step": 3681 }, { "epoch": 0.23, "grad_norm": 2.903283211290379, "learning_rate": 8.972417430043178e-06, "loss": 0.3363, "step": 3682 }, { "epoch": 0.23, "grad_norm": 15.701398877082495, "learning_rate": 8.971798841355794e-06, "loss": 0.3427, "step": 3683 }, { "epoch": 0.23, "grad_norm": 2.833770578560621, "learning_rate": 8.971180087871423e-06, "loss": 0.3268, "step": 3684 }, { "epoch": 0.23, "grad_norm": 4.305994630544555, "learning_rate": 8.970561169615734e-06, "loss": 0.3375, "step": 3685 }, { "epoch": 0.23, "grad_norm": 26.902374538022954, "learning_rate": 8.969942086614413e-06, "loss": 0.3362, "step": 3686 }, { "epoch": 0.23, "grad_norm": 5.1060702742579815, "learning_rate": 8.969322838893141e-06, "loss": 0.3261, "step": 3687 }, { "epoch": 0.23, "grad_norm": 3.6775137361151065, "learning_rate": 8.968703426477614e-06, "loss": 0.3114, "step": 3688 }, { "epoch": 0.23, "grad_norm": 2.3155985014505402, "learning_rate": 8.968083849393535e-06, "loss": 0.3194, "step": 3689 }, { "epoch": 0.23, "grad_norm": 1.1287358994957526, "learning_rate": 8.967464107666605e-06, "loss": 0.5333, "step": 3690 }, { "epoch": 0.23, "grad_norm": 3.3079687843565235, "learning_rate": 8.966844201322546e-06, "loss": 0.3094, "step": 3691 }, { "epoch": 0.23, "grad_norm": 2.262970277809321, "learning_rate": 8.966224130387073e-06, "loss": 0.3257, "step": 3692 }, { "epoch": 0.23, "grad_norm": 2.5092325938675017, "learning_rate": 8.965603894885917e-06, "loss": 0.3101, "step": 3693 }, { "epoch": 0.23, "grad_norm": 2.107537115365177, "learning_rate": 8.96498349484481e-06, "loss": 0.3248, "step": 3694 }, { "epoch": 0.23, "grad_norm": 2.978604594760016, "learning_rate": 8.964362930289497e-06, "loss": 0.3308, "step": 3695 }, { "epoch": 0.23, "grad_norm": 2.581691378008034, "learning_rate": 8.963742201245725e-06, "loss": 0.3307, "step": 3696 }, { "epoch": 0.23, "grad_norm": 6.270998623950874, "learning_rate": 8.963121307739246e-06, "loss": 0.3145, "step": 3697 }, { "epoch": 0.23, "grad_norm": 3.328146910350842, "learning_rate": 8.962500249795826e-06, "loss": 0.3495, "step": 3698 }, { "epoch": 0.23, "grad_norm": 2.4659047660069, "learning_rate": 8.961879027441234e-06, "loss": 0.3082, "step": 3699 }, { "epoch": 0.23, "grad_norm": 1.6379598399266184, "learning_rate": 8.961257640701243e-06, "loss": 0.3328, "step": 3700 }, { "epoch": 0.23, "grad_norm": 4.803712893457548, "learning_rate": 8.960636089601636e-06, "loss": 0.309, "step": 3701 }, { "epoch": 0.23, "grad_norm": 1.910081057150349, "learning_rate": 8.960014374168203e-06, "loss": 0.3428, "step": 3702 }, { "epoch": 0.23, "grad_norm": 4.2453245711636765, "learning_rate": 8.959392494426739e-06, "loss": 0.324, "step": 3703 }, { "epoch": 0.23, "grad_norm": 2.6834998604875886, "learning_rate": 8.958770450403049e-06, "loss": 0.3078, "step": 3704 }, { "epoch": 0.23, "grad_norm": 3.0581316057051184, "learning_rate": 8.958148242122941e-06, "loss": 0.3152, "step": 3705 }, { "epoch": 0.23, "grad_norm": 3.5118843044790875, "learning_rate": 8.957525869612234e-06, "loss": 0.3574, "step": 3706 }, { "epoch": 0.23, "grad_norm": 3.8920246205566102, "learning_rate": 8.956903332896747e-06, "loss": 0.3389, "step": 3707 }, { "epoch": 0.23, "grad_norm": 2.730116226629794, "learning_rate": 8.956280632002312e-06, "loss": 0.312, "step": 3708 }, { "epoch": 0.23, "grad_norm": 4.230495785228844, "learning_rate": 8.955657766954768e-06, "loss": 0.3301, "step": 3709 }, { "epoch": 0.23, "grad_norm": 4.865760992518893, "learning_rate": 8.955034737779955e-06, "loss": 0.3301, "step": 3710 }, { "epoch": 0.23, "grad_norm": 5.083156251365915, "learning_rate": 8.95441154450373e-06, "loss": 0.3169, "step": 3711 }, { "epoch": 0.23, "grad_norm": 2.8244629017007736, "learning_rate": 8.953788187151941e-06, "loss": 0.3368, "step": 3712 }, { "epoch": 0.23, "grad_norm": 2.459738031389702, "learning_rate": 8.953164665750462e-06, "loss": 0.3271, "step": 3713 }, { "epoch": 0.23, "grad_norm": 1.7308846485377833, "learning_rate": 8.952540980325158e-06, "loss": 0.327, "step": 3714 }, { "epoch": 0.23, "grad_norm": 2.181800187862495, "learning_rate": 8.951917130901906e-06, "loss": 0.3441, "step": 3715 }, { "epoch": 0.23, "grad_norm": 3.1652979984756446, "learning_rate": 8.951293117506595e-06, "loss": 0.3309, "step": 3716 }, { "epoch": 0.23, "grad_norm": 5.781485351811903, "learning_rate": 8.950668940165112e-06, "loss": 0.3367, "step": 3717 }, { "epoch": 0.23, "grad_norm": 6.629968117730134, "learning_rate": 8.95004459890336e-06, "loss": 0.3476, "step": 3718 }, { "epoch": 0.23, "grad_norm": 3.1850867382302868, "learning_rate": 8.94942009374724e-06, "loss": 0.3261, "step": 3719 }, { "epoch": 0.23, "grad_norm": 2.3544394777063706, "learning_rate": 8.948795424722667e-06, "loss": 0.3375, "step": 3720 }, { "epoch": 0.23, "grad_norm": 6.028530381368752, "learning_rate": 8.948170591855556e-06, "loss": 0.3539, "step": 3721 }, { "epoch": 0.23, "grad_norm": 3.4090725307856715, "learning_rate": 8.947545595171836e-06, "loss": 0.3172, "step": 3722 }, { "epoch": 0.23, "grad_norm": 2.185929210270183, "learning_rate": 8.946920434697438e-06, "loss": 0.3359, "step": 3723 }, { "epoch": 0.23, "grad_norm": 2.6740142148931216, "learning_rate": 8.946295110458298e-06, "loss": 0.3222, "step": 3724 }, { "epoch": 0.23, "grad_norm": 1.9662346786765055, "learning_rate": 8.945669622480366e-06, "loss": 0.3276, "step": 3725 }, { "epoch": 0.23, "grad_norm": 3.0089739848021453, "learning_rate": 8.945043970789595e-06, "loss": 0.3392, "step": 3726 }, { "epoch": 0.23, "grad_norm": 2.270920344430561, "learning_rate": 8.94441815541194e-06, "loss": 0.3474, "step": 3727 }, { "epoch": 0.23, "grad_norm": 2.2934456477702465, "learning_rate": 8.94379217637337e-06, "loss": 0.3351, "step": 3728 }, { "epoch": 0.23, "grad_norm": 12.182996384810266, "learning_rate": 8.943166033699858e-06, "loss": 0.3303, "step": 3729 }, { "epoch": 0.23, "grad_norm": 2.6661865474886857, "learning_rate": 8.942539727417383e-06, "loss": 0.3328, "step": 3730 }, { "epoch": 0.23, "grad_norm": 0.8438046535209291, "learning_rate": 8.941913257551933e-06, "loss": 0.5067, "step": 3731 }, { "epoch": 0.23, "grad_norm": 2.816218547431086, "learning_rate": 8.941286624129499e-06, "loss": 0.3015, "step": 3732 }, { "epoch": 0.23, "grad_norm": 2.781419759224102, "learning_rate": 8.940659827176083e-06, "loss": 0.3221, "step": 3733 }, { "epoch": 0.23, "grad_norm": 5.139232330368408, "learning_rate": 8.940032866717691e-06, "loss": 0.3431, "step": 3734 }, { "epoch": 0.23, "grad_norm": 4.040457050157037, "learning_rate": 8.939405742780339e-06, "loss": 0.3329, "step": 3735 }, { "epoch": 0.23, "grad_norm": 4.425416944842065, "learning_rate": 8.938778455390044e-06, "loss": 0.3168, "step": 3736 }, { "epoch": 0.24, "grad_norm": 2.55777911347417, "learning_rate": 8.938151004572836e-06, "loss": 0.3279, "step": 3737 }, { "epoch": 0.24, "grad_norm": 3.3790259567541567, "learning_rate": 8.937523390354746e-06, "loss": 0.3341, "step": 3738 }, { "epoch": 0.24, "grad_norm": 2.4216656743961242, "learning_rate": 8.936895612761817e-06, "loss": 0.3277, "step": 3739 }, { "epoch": 0.24, "grad_norm": 3.976394989991298, "learning_rate": 8.936267671820097e-06, "loss": 0.3328, "step": 3740 }, { "epoch": 0.24, "grad_norm": 1.830408235172937, "learning_rate": 8.935639567555639e-06, "loss": 0.3122, "step": 3741 }, { "epoch": 0.24, "grad_norm": 2.3137290923268266, "learning_rate": 8.935011299994506e-06, "loss": 0.3483, "step": 3742 }, { "epoch": 0.24, "grad_norm": 0.6892055719287272, "learning_rate": 8.934382869162763e-06, "loss": 0.4885, "step": 3743 }, { "epoch": 0.24, "grad_norm": 3.3636777971339535, "learning_rate": 8.933754275086487e-06, "loss": 0.3242, "step": 3744 }, { "epoch": 0.24, "grad_norm": 2.5710501860164143, "learning_rate": 8.93312551779176e-06, "loss": 0.3491, "step": 3745 }, { "epoch": 0.24, "grad_norm": 2.2249085380746747, "learning_rate": 8.93249659730467e-06, "loss": 0.3356, "step": 3746 }, { "epoch": 0.24, "grad_norm": 2.5807819832528485, "learning_rate": 8.931867513651311e-06, "loss": 0.3285, "step": 3747 }, { "epoch": 0.24, "grad_norm": 2.8940150187139984, "learning_rate": 8.931238266857783e-06, "loss": 0.3306, "step": 3748 }, { "epoch": 0.24, "grad_norm": 19.766635051508676, "learning_rate": 8.930608856950198e-06, "loss": 0.313, "step": 3749 }, { "epoch": 0.24, "grad_norm": 4.104603320745064, "learning_rate": 8.92997928395467e-06, "loss": 0.3398, "step": 3750 }, { "epoch": 0.24, "grad_norm": 4.647511585459074, "learning_rate": 8.929349547897321e-06, "loss": 0.3167, "step": 3751 }, { "epoch": 0.24, "grad_norm": 3.7658197810386245, "learning_rate": 8.92871964880428e-06, "loss": 0.3431, "step": 3752 }, { "epoch": 0.24, "grad_norm": 2.5170280748470266, "learning_rate": 8.928089586701682e-06, "loss": 0.3142, "step": 3753 }, { "epoch": 0.24, "grad_norm": 3.8799127059400216, "learning_rate": 8.927459361615672e-06, "loss": 0.3149, "step": 3754 }, { "epoch": 0.24, "grad_norm": 2.1001750807614474, "learning_rate": 8.926828973572396e-06, "loss": 0.3292, "step": 3755 }, { "epoch": 0.24, "grad_norm": 5.834904750639392, "learning_rate": 8.92619842259801e-06, "loss": 0.3167, "step": 3756 }, { "epoch": 0.24, "grad_norm": 3.6708523043172825, "learning_rate": 8.925567708718679e-06, "loss": 0.3283, "step": 3757 }, { "epoch": 0.24, "grad_norm": 4.647499695013446, "learning_rate": 8.92493683196057e-06, "loss": 0.3223, "step": 3758 }, { "epoch": 0.24, "grad_norm": 3.0279041702713605, "learning_rate": 8.924305792349861e-06, "loss": 0.333, "step": 3759 }, { "epoch": 0.24, "grad_norm": 4.7332960540866695, "learning_rate": 8.923674589912735e-06, "loss": 0.3194, "step": 3760 }, { "epoch": 0.24, "grad_norm": 3.432950887421614, "learning_rate": 8.92304322467538e-06, "loss": 0.341, "step": 3761 }, { "epoch": 0.24, "grad_norm": 4.082062228111898, "learning_rate": 8.922411696663992e-06, "loss": 0.3478, "step": 3762 }, { "epoch": 0.24, "grad_norm": 2.9000167492809568, "learning_rate": 8.921780005904779e-06, "loss": 0.3429, "step": 3763 }, { "epoch": 0.24, "grad_norm": 3.2337721272758104, "learning_rate": 8.921148152423946e-06, "loss": 0.3257, "step": 3764 }, { "epoch": 0.24, "grad_norm": 2.471092327159333, "learning_rate": 8.920516136247712e-06, "loss": 0.3332, "step": 3765 }, { "epoch": 0.24, "grad_norm": 2.743527301229908, "learning_rate": 8.9198839574023e-06, "loss": 0.3247, "step": 3766 }, { "epoch": 0.24, "grad_norm": 1.899637854207825, "learning_rate": 8.91925161591394e-06, "loss": 0.3148, "step": 3767 }, { "epoch": 0.24, "grad_norm": 0.6936980063567434, "learning_rate": 8.918619111808869e-06, "loss": 0.5216, "step": 3768 }, { "epoch": 0.24, "grad_norm": 4.055760857112053, "learning_rate": 8.917986445113329e-06, "loss": 0.317, "step": 3769 }, { "epoch": 0.24, "grad_norm": 2.2410123659660615, "learning_rate": 8.917353615853575e-06, "loss": 0.3434, "step": 3770 }, { "epoch": 0.24, "grad_norm": 2.233610095788776, "learning_rate": 8.91672062405586e-06, "loss": 0.3511, "step": 3771 }, { "epoch": 0.24, "grad_norm": 3.05950930626397, "learning_rate": 8.916087469746448e-06, "loss": 0.3216, "step": 3772 }, { "epoch": 0.24, "grad_norm": 3.964420344525788, "learning_rate": 8.915454152951613e-06, "loss": 0.3484, "step": 3773 }, { "epoch": 0.24, "grad_norm": 2.502269758118046, "learning_rate": 8.91482067369763e-06, "loss": 0.3218, "step": 3774 }, { "epoch": 0.24, "grad_norm": 4.546544722679174, "learning_rate": 8.914187032010786e-06, "loss": 0.3439, "step": 3775 }, { "epoch": 0.24, "grad_norm": 2.9001461345661297, "learning_rate": 8.913553227917366e-06, "loss": 0.3146, "step": 3776 }, { "epoch": 0.24, "grad_norm": 2.895905726844477, "learning_rate": 8.912919261443674e-06, "loss": 0.341, "step": 3777 }, { "epoch": 0.24, "grad_norm": 3.256103512767372, "learning_rate": 8.912285132616012e-06, "loss": 0.3249, "step": 3778 }, { "epoch": 0.24, "grad_norm": 1.7257512595344169, "learning_rate": 8.911650841460688e-06, "loss": 0.3137, "step": 3779 }, { "epoch": 0.24, "grad_norm": 3.555245107250307, "learning_rate": 8.911016388004026e-06, "loss": 0.3293, "step": 3780 }, { "epoch": 0.24, "grad_norm": 5.247794459675173, "learning_rate": 8.910381772272345e-06, "loss": 0.3238, "step": 3781 }, { "epoch": 0.24, "grad_norm": 5.205686012722772, "learning_rate": 8.90974699429198e-06, "loss": 0.3413, "step": 3782 }, { "epoch": 0.24, "grad_norm": 4.756368157426908, "learning_rate": 8.909112054089266e-06, "loss": 0.3684, "step": 3783 }, { "epoch": 0.24, "grad_norm": 1.8866906639618424, "learning_rate": 8.90847695169055e-06, "loss": 0.3259, "step": 3784 }, { "epoch": 0.24, "grad_norm": 4.379658212804618, "learning_rate": 8.907841687122185e-06, "loss": 0.3373, "step": 3785 }, { "epoch": 0.24, "grad_norm": 3.1938073812990284, "learning_rate": 8.907206260410527e-06, "loss": 0.3446, "step": 3786 }, { "epoch": 0.24, "grad_norm": 3.0231368009515407, "learning_rate": 8.906570671581937e-06, "loss": 0.3385, "step": 3787 }, { "epoch": 0.24, "grad_norm": 3.854565265644447, "learning_rate": 8.905934920662796e-06, "loss": 0.3425, "step": 3788 }, { "epoch": 0.24, "grad_norm": 2.6267245134019084, "learning_rate": 8.905299007679475e-06, "loss": 0.3313, "step": 3789 }, { "epoch": 0.24, "grad_norm": 3.18832562818506, "learning_rate": 8.904662932658363e-06, "loss": 0.3223, "step": 3790 }, { "epoch": 0.24, "grad_norm": 3.2337071872214906, "learning_rate": 8.90402669562585e-06, "loss": 0.3307, "step": 3791 }, { "epoch": 0.24, "grad_norm": 3.152753565770728, "learning_rate": 8.903390296608334e-06, "loss": 0.3188, "step": 3792 }, { "epoch": 0.24, "grad_norm": 2.4122914444398, "learning_rate": 8.902753735632221e-06, "loss": 0.3407, "step": 3793 }, { "epoch": 0.24, "grad_norm": 8.65155176540431, "learning_rate": 8.902117012723926e-06, "loss": 0.3396, "step": 3794 }, { "epoch": 0.24, "grad_norm": 2.2503548434494247, "learning_rate": 8.901480127909862e-06, "loss": 0.3302, "step": 3795 }, { "epoch": 0.24, "grad_norm": 7.316943240218811, "learning_rate": 8.900843081216462e-06, "loss": 0.3291, "step": 3796 }, { "epoch": 0.24, "grad_norm": 2.2810976801956016, "learning_rate": 8.90020587267015e-06, "loss": 0.3135, "step": 3797 }, { "epoch": 0.24, "grad_norm": 2.7027236895256603, "learning_rate": 8.89956850229737e-06, "loss": 0.318, "step": 3798 }, { "epoch": 0.24, "grad_norm": 1.9369146122029532, "learning_rate": 8.898930970124567e-06, "loss": 0.3121, "step": 3799 }, { "epoch": 0.24, "grad_norm": 7.284425396522207, "learning_rate": 8.898293276178191e-06, "loss": 0.3415, "step": 3800 }, { "epoch": 0.24, "grad_norm": 7.813046511380066, "learning_rate": 8.897655420484705e-06, "loss": 0.3289, "step": 3801 }, { "epoch": 0.24, "grad_norm": 2.7255512268285473, "learning_rate": 8.897017403070572e-06, "loss": 0.3326, "step": 3802 }, { "epoch": 0.24, "grad_norm": 2.884150913605871, "learning_rate": 8.896379223962265e-06, "loss": 0.3316, "step": 3803 }, { "epoch": 0.24, "grad_norm": 2.4795682828721763, "learning_rate": 8.895740883186263e-06, "loss": 0.3576, "step": 3804 }, { "epoch": 0.24, "grad_norm": 3.6271732721104906, "learning_rate": 8.895102380769051e-06, "loss": 0.3586, "step": 3805 }, { "epoch": 0.24, "grad_norm": 4.08513026610504, "learning_rate": 8.894463716737125e-06, "loss": 0.337, "step": 3806 }, { "epoch": 0.24, "grad_norm": 2.4815764338416195, "learning_rate": 8.89382489111698e-06, "loss": 0.3277, "step": 3807 }, { "epoch": 0.24, "grad_norm": 2.491311962319985, "learning_rate": 8.893185903935125e-06, "loss": 0.3257, "step": 3808 }, { "epoch": 0.24, "grad_norm": 2.3811724499469147, "learning_rate": 8.892546755218071e-06, "loss": 0.3281, "step": 3809 }, { "epoch": 0.24, "grad_norm": 6.7815819883152955, "learning_rate": 8.89190744499234e-06, "loss": 0.3304, "step": 3810 }, { "epoch": 0.24, "grad_norm": 3.149020440309964, "learning_rate": 8.891267973284457e-06, "loss": 0.3349, "step": 3811 }, { "epoch": 0.24, "grad_norm": 1.9390393546693092, "learning_rate": 8.890628340120951e-06, "loss": 0.3374, "step": 3812 }, { "epoch": 0.24, "grad_norm": 1.97863421238819, "learning_rate": 8.889988545528366e-06, "loss": 0.3251, "step": 3813 }, { "epoch": 0.24, "grad_norm": 4.164845331444898, "learning_rate": 8.889348589533247e-06, "loss": 0.3195, "step": 3814 }, { "epoch": 0.24, "grad_norm": 2.3626702090731966, "learning_rate": 8.888708472162147e-06, "loss": 0.3169, "step": 3815 }, { "epoch": 0.24, "grad_norm": 1.7538277936932547, "learning_rate": 8.888068193441625e-06, "loss": 0.3222, "step": 3816 }, { "epoch": 0.24, "grad_norm": 1.8797303039884392, "learning_rate": 8.887427753398249e-06, "loss": 0.3289, "step": 3817 }, { "epoch": 0.24, "grad_norm": 1.6560216591031136, "learning_rate": 8.886787152058586e-06, "loss": 0.3268, "step": 3818 }, { "epoch": 0.24, "grad_norm": 1.9276721444233782, "learning_rate": 8.886146389449226e-06, "loss": 0.3232, "step": 3819 }, { "epoch": 0.24, "grad_norm": 4.730994382511737, "learning_rate": 8.885505465596747e-06, "loss": 0.3362, "step": 3820 }, { "epoch": 0.24, "grad_norm": 2.2764880557056943, "learning_rate": 8.884864380527744e-06, "loss": 0.3371, "step": 3821 }, { "epoch": 0.24, "grad_norm": 2.450776452083643, "learning_rate": 8.884223134268818e-06, "loss": 0.3482, "step": 3822 }, { "epoch": 0.24, "grad_norm": 2.20623220073423, "learning_rate": 8.883581726846577e-06, "loss": 0.3249, "step": 3823 }, { "epoch": 0.24, "grad_norm": 2.187844803081438, "learning_rate": 8.882940158287632e-06, "loss": 0.3042, "step": 3824 }, { "epoch": 0.24, "grad_norm": 2.7681831053019352, "learning_rate": 8.882298428618601e-06, "loss": 0.3337, "step": 3825 }, { "epoch": 0.24, "grad_norm": 2.651769413743395, "learning_rate": 8.881656537866114e-06, "loss": 0.33, "step": 3826 }, { "epoch": 0.24, "grad_norm": 1.684097477318379, "learning_rate": 8.8810144860568e-06, "loss": 0.3258, "step": 3827 }, { "epoch": 0.24, "grad_norm": 2.8215029317151425, "learning_rate": 8.880372273217307e-06, "loss": 0.3073, "step": 3828 }, { "epoch": 0.24, "grad_norm": 2.582169091594926, "learning_rate": 8.879729899374269e-06, "loss": 0.3372, "step": 3829 }, { "epoch": 0.24, "grad_norm": 2.6347330332007806, "learning_rate": 8.879087364554351e-06, "loss": 0.3361, "step": 3830 }, { "epoch": 0.24, "grad_norm": 2.2103303600187734, "learning_rate": 8.878444668784208e-06, "loss": 0.3396, "step": 3831 }, { "epoch": 0.24, "grad_norm": 1.8936190746169632, "learning_rate": 8.877801812090505e-06, "loss": 0.3371, "step": 3832 }, { "epoch": 0.24, "grad_norm": 6.024179134237282, "learning_rate": 8.877158794499919e-06, "loss": 0.3271, "step": 3833 }, { "epoch": 0.24, "grad_norm": 2.577105719299173, "learning_rate": 8.876515616039126e-06, "loss": 0.3583, "step": 3834 }, { "epoch": 0.24, "grad_norm": 1.7980166390455867, "learning_rate": 8.875872276734816e-06, "loss": 0.3328, "step": 3835 }, { "epoch": 0.24, "grad_norm": 1.718324015467852, "learning_rate": 8.87522877661368e-06, "loss": 0.3248, "step": 3836 }, { "epoch": 0.24, "grad_norm": 3.6030182399426876, "learning_rate": 8.874585115702419e-06, "loss": 0.3337, "step": 3837 }, { "epoch": 0.24, "grad_norm": 3.13675666787467, "learning_rate": 8.873941294027742e-06, "loss": 0.3441, "step": 3838 }, { "epoch": 0.24, "grad_norm": 3.92834574895222, "learning_rate": 8.873297311616356e-06, "loss": 0.3284, "step": 3839 }, { "epoch": 0.24, "grad_norm": 3.09250773906718, "learning_rate": 8.872653168494988e-06, "loss": 0.3543, "step": 3840 }, { "epoch": 0.24, "grad_norm": 3.0568890325978706, "learning_rate": 8.872008864690358e-06, "loss": 0.3658, "step": 3841 }, { "epoch": 0.24, "grad_norm": 11.629916235669576, "learning_rate": 8.871364400229206e-06, "loss": 0.3304, "step": 3842 }, { "epoch": 0.24, "grad_norm": 1.4120061975792972, "learning_rate": 8.870719775138266e-06, "loss": 0.3273, "step": 3843 }, { "epoch": 0.24, "grad_norm": 2.4307263050769583, "learning_rate": 8.870074989444289e-06, "loss": 0.3571, "step": 3844 }, { "epoch": 0.24, "grad_norm": 1.631956928757783, "learning_rate": 8.869430043174027e-06, "loss": 0.3187, "step": 3845 }, { "epoch": 0.24, "grad_norm": 1.9659698865753197, "learning_rate": 8.868784936354239e-06, "loss": 0.3204, "step": 3846 }, { "epoch": 0.24, "grad_norm": 2.3322765690960945, "learning_rate": 8.868139669011693e-06, "loss": 0.3383, "step": 3847 }, { "epoch": 0.24, "grad_norm": 2.497011087121863, "learning_rate": 8.86749424117316e-06, "loss": 0.3261, "step": 3848 }, { "epoch": 0.24, "grad_norm": 2.2073216602945696, "learning_rate": 8.866848652865422e-06, "loss": 0.3473, "step": 3849 }, { "epoch": 0.24, "grad_norm": 2.733412826783298, "learning_rate": 8.866202904115265e-06, "loss": 0.3377, "step": 3850 }, { "epoch": 0.24, "grad_norm": 4.996459300023647, "learning_rate": 8.865556994949484e-06, "loss": 0.344, "step": 3851 }, { "epoch": 0.24, "grad_norm": 0.7861627929439978, "learning_rate": 8.864910925394875e-06, "loss": 0.5338, "step": 3852 }, { "epoch": 0.24, "grad_norm": 1.8191845219998062, "learning_rate": 8.864264695478249e-06, "loss": 0.3341, "step": 3853 }, { "epoch": 0.24, "grad_norm": 2.6348075695485664, "learning_rate": 8.863618305226415e-06, "loss": 0.3345, "step": 3854 }, { "epoch": 0.24, "grad_norm": 2.3203016489538917, "learning_rate": 8.862971754666197e-06, "loss": 0.3778, "step": 3855 }, { "epoch": 0.24, "grad_norm": 2.023482839826505, "learning_rate": 8.862325043824417e-06, "loss": 0.3353, "step": 3856 }, { "epoch": 0.24, "grad_norm": 2.0528876842471475, "learning_rate": 8.861678172727912e-06, "loss": 0.3191, "step": 3857 }, { "epoch": 0.24, "grad_norm": 2.890524971239111, "learning_rate": 8.861031141403521e-06, "loss": 0.3361, "step": 3858 }, { "epoch": 0.24, "grad_norm": 3.9241296618676054, "learning_rate": 8.86038394987809e-06, "loss": 0.3414, "step": 3859 }, { "epoch": 0.24, "grad_norm": 2.095547267068001, "learning_rate": 8.859736598178472e-06, "loss": 0.3347, "step": 3860 }, { "epoch": 0.24, "grad_norm": 3.4156849155693427, "learning_rate": 8.859089086331525e-06, "loss": 0.3499, "step": 3861 }, { "epoch": 0.24, "grad_norm": 2.41831697144385, "learning_rate": 8.85844141436412e-06, "loss": 0.3265, "step": 3862 }, { "epoch": 0.24, "grad_norm": 3.005906095641753, "learning_rate": 8.857793582303126e-06, "loss": 0.3405, "step": 3863 }, { "epoch": 0.24, "grad_norm": 1.968545434221177, "learning_rate": 8.857145590175424e-06, "loss": 0.3602, "step": 3864 }, { "epoch": 0.24, "grad_norm": 0.7047470613912522, "learning_rate": 8.8564974380079e-06, "loss": 0.4768, "step": 3865 }, { "epoch": 0.24, "grad_norm": 1.9544057181880778, "learning_rate": 8.855849125827449e-06, "loss": 0.331, "step": 3866 }, { "epoch": 0.24, "grad_norm": 26.940024139412586, "learning_rate": 8.855200653660968e-06, "loss": 0.3507, "step": 3867 }, { "epoch": 0.24, "grad_norm": 5.0242492038546995, "learning_rate": 8.854552021535364e-06, "loss": 0.349, "step": 3868 }, { "epoch": 0.24, "grad_norm": 2.741353559421358, "learning_rate": 8.853903229477551e-06, "loss": 0.3585, "step": 3869 }, { "epoch": 0.24, "grad_norm": 0.6038561947185022, "learning_rate": 8.853254277514448e-06, "loss": 0.5322, "step": 3870 }, { "epoch": 0.24, "grad_norm": 2.1505945163444187, "learning_rate": 8.852605165672978e-06, "loss": 0.325, "step": 3871 }, { "epoch": 0.24, "grad_norm": 12.921557449567707, "learning_rate": 8.85195589398008e-06, "loss": 0.3531, "step": 3872 }, { "epoch": 0.24, "grad_norm": 1.8701218681116438, "learning_rate": 8.851306462462689e-06, "loss": 0.3349, "step": 3873 }, { "epoch": 0.24, "grad_norm": 5.742681666858432, "learning_rate": 8.850656871147751e-06, "loss": 0.3391, "step": 3874 }, { "epoch": 0.24, "grad_norm": 2.2987565301394373, "learning_rate": 8.850007120062222e-06, "loss": 0.328, "step": 3875 }, { "epoch": 0.24, "grad_norm": 1.915574876506993, "learning_rate": 8.849357209233058e-06, "loss": 0.3304, "step": 3876 }, { "epoch": 0.24, "grad_norm": 2.592669304463846, "learning_rate": 8.848707138687227e-06, "loss": 0.3223, "step": 3877 }, { "epoch": 0.24, "grad_norm": 7.59090898074836, "learning_rate": 8.848056908451698e-06, "loss": 0.3329, "step": 3878 }, { "epoch": 0.24, "grad_norm": 1.986947695461738, "learning_rate": 8.847406518553456e-06, "loss": 0.3285, "step": 3879 }, { "epoch": 0.24, "grad_norm": 2.0903116730798996, "learning_rate": 8.846755969019483e-06, "loss": 0.3604, "step": 3880 }, { "epoch": 0.24, "grad_norm": 2.156612702728645, "learning_rate": 8.846105259876771e-06, "loss": 0.3292, "step": 3881 }, { "epoch": 0.24, "grad_norm": 3.6536210372053626, "learning_rate": 8.845454391152322e-06, "loss": 0.3352, "step": 3882 }, { "epoch": 0.24, "grad_norm": 4.264987913288185, "learning_rate": 8.84480336287314e-06, "loss": 0.3223, "step": 3883 }, { "epoch": 0.24, "grad_norm": 4.925664752201105, "learning_rate": 8.844152175066236e-06, "loss": 0.3229, "step": 3884 }, { "epoch": 0.24, "grad_norm": 3.2169062660615215, "learning_rate": 8.84350082775863e-06, "loss": 0.3523, "step": 3885 }, { "epoch": 0.24, "grad_norm": 1.766125838423153, "learning_rate": 8.84284932097735e-06, "loss": 0.3239, "step": 3886 }, { "epoch": 0.24, "grad_norm": 1.894999079590576, "learning_rate": 8.842197654749424e-06, "loss": 0.3214, "step": 3887 }, { "epoch": 0.24, "grad_norm": 2.061528927370333, "learning_rate": 8.841545829101895e-06, "loss": 0.3276, "step": 3888 }, { "epoch": 0.24, "grad_norm": 3.705359907076887, "learning_rate": 8.840893844061804e-06, "loss": 0.3557, "step": 3889 }, { "epoch": 0.24, "grad_norm": 2.401023642036005, "learning_rate": 8.840241699656207e-06, "loss": 0.3414, "step": 3890 }, { "epoch": 0.24, "grad_norm": 3.5443030138097336, "learning_rate": 8.839589395912159e-06, "loss": 0.3158, "step": 3891 }, { "epoch": 0.24, "grad_norm": 3.336497941852103, "learning_rate": 8.838936932856727e-06, "loss": 0.3442, "step": 3892 }, { "epoch": 0.24, "grad_norm": 24.628950073411175, "learning_rate": 8.838284310516985e-06, "loss": 0.3462, "step": 3893 }, { "epoch": 0.24, "grad_norm": 1.9694358642533767, "learning_rate": 8.837631528920008e-06, "loss": 0.3383, "step": 3894 }, { "epoch": 0.24, "grad_norm": 1.9458972961783314, "learning_rate": 8.836978588092883e-06, "loss": 0.3158, "step": 3895 }, { "epoch": 0.25, "grad_norm": 1.8284781128041392, "learning_rate": 8.8363254880627e-06, "loss": 0.315, "step": 3896 }, { "epoch": 0.25, "grad_norm": 2.378205330626439, "learning_rate": 8.83567222885656e-06, "loss": 0.3201, "step": 3897 }, { "epoch": 0.25, "grad_norm": 5.530432467524255, "learning_rate": 8.835018810501565e-06, "loss": 0.3473, "step": 3898 }, { "epoch": 0.25, "grad_norm": 2.0424361820949244, "learning_rate": 8.834365233024829e-06, "loss": 0.3436, "step": 3899 }, { "epoch": 0.25, "grad_norm": 2.1196827075666516, "learning_rate": 8.833711496453468e-06, "loss": 0.3393, "step": 3900 }, { "epoch": 0.25, "grad_norm": 4.952618656269322, "learning_rate": 8.833057600814607e-06, "loss": 0.3624, "step": 3901 }, { "epoch": 0.25, "grad_norm": 1.440008519259911, "learning_rate": 8.832403546135379e-06, "loss": 0.3372, "step": 3902 }, { "epoch": 0.25, "grad_norm": 13.41174185418652, "learning_rate": 8.831749332442921e-06, "loss": 0.3237, "step": 3903 }, { "epoch": 0.25, "grad_norm": 3.2010395801925373, "learning_rate": 8.831094959764377e-06, "loss": 0.3481, "step": 3904 }, { "epoch": 0.25, "grad_norm": 7.583312141968117, "learning_rate": 8.830440428126898e-06, "loss": 0.3238, "step": 3905 }, { "epoch": 0.25, "grad_norm": 0.6722910462704026, "learning_rate": 8.829785737557642e-06, "loss": 0.4975, "step": 3906 }, { "epoch": 0.25, "grad_norm": 1.8359111684459553, "learning_rate": 8.829130888083774e-06, "loss": 0.3305, "step": 3907 }, { "epoch": 0.25, "grad_norm": 2.0463744849386925, "learning_rate": 8.828475879732463e-06, "loss": 0.3269, "step": 3908 }, { "epoch": 0.25, "grad_norm": 1.855492911832764, "learning_rate": 8.827820712530888e-06, "loss": 0.3249, "step": 3909 }, { "epoch": 0.25, "grad_norm": 1.4209041408111671, "learning_rate": 8.827165386506233e-06, "loss": 0.3198, "step": 3910 }, { "epoch": 0.25, "grad_norm": 3.925380030191963, "learning_rate": 8.826509901685689e-06, "loss": 0.3197, "step": 3911 }, { "epoch": 0.25, "grad_norm": 2.2511658113980753, "learning_rate": 8.82585425809645e-06, "loss": 0.3525, "step": 3912 }, { "epoch": 0.25, "grad_norm": 2.460079825444177, "learning_rate": 8.825198455765724e-06, "loss": 0.3389, "step": 3913 }, { "epoch": 0.25, "grad_norm": 2.9491557044021177, "learning_rate": 8.824542494720721e-06, "loss": 0.3192, "step": 3914 }, { "epoch": 0.25, "grad_norm": 3.9619002074501823, "learning_rate": 8.823886374988655e-06, "loss": 0.349, "step": 3915 }, { "epoch": 0.25, "grad_norm": 1.5782371474866366, "learning_rate": 8.823230096596751e-06, "loss": 0.3208, "step": 3916 }, { "epoch": 0.25, "grad_norm": 1.7775910976015772, "learning_rate": 8.82257365957224e-06, "loss": 0.341, "step": 3917 }, { "epoch": 0.25, "grad_norm": 2.419382613452219, "learning_rate": 8.821917063942359e-06, "loss": 0.3168, "step": 3918 }, { "epoch": 0.25, "grad_norm": 2.2680541421652127, "learning_rate": 8.82126030973435e-06, "loss": 0.3577, "step": 3919 }, { "epoch": 0.25, "grad_norm": 1.3986296824244648, "learning_rate": 8.820603396975463e-06, "loss": 0.3231, "step": 3920 }, { "epoch": 0.25, "grad_norm": 1.792165517815148, "learning_rate": 8.819946325692955e-06, "loss": 0.3388, "step": 3921 }, { "epoch": 0.25, "grad_norm": 1.9036276897324675, "learning_rate": 8.81928909591409e-06, "loss": 0.3421, "step": 3922 }, { "epoch": 0.25, "grad_norm": 2.5755263412413703, "learning_rate": 8.818631707666136e-06, "loss": 0.3314, "step": 3923 }, { "epoch": 0.25, "grad_norm": 2.905918617029896, "learning_rate": 8.817974160976368e-06, "loss": 0.3293, "step": 3924 }, { "epoch": 0.25, "grad_norm": 2.074687240108824, "learning_rate": 8.817316455872073e-06, "loss": 0.3397, "step": 3925 }, { "epoch": 0.25, "grad_norm": 1.6556156451209951, "learning_rate": 8.816658592380538e-06, "loss": 0.3581, "step": 3926 }, { "epoch": 0.25, "grad_norm": 2.5225123949991586, "learning_rate": 8.816000570529057e-06, "loss": 0.3395, "step": 3927 }, { "epoch": 0.25, "grad_norm": 1.8796581244586301, "learning_rate": 8.815342390344937e-06, "loss": 0.3456, "step": 3928 }, { "epoch": 0.25, "grad_norm": 3.6006706301784175, "learning_rate": 8.814684051855482e-06, "loss": 0.339, "step": 3929 }, { "epoch": 0.25, "grad_norm": 2.756539450428205, "learning_rate": 8.814025555088011e-06, "loss": 0.3544, "step": 3930 }, { "epoch": 0.25, "grad_norm": 2.0715619432674304, "learning_rate": 8.813366900069844e-06, "loss": 0.3456, "step": 3931 }, { "epoch": 0.25, "grad_norm": 2.210278623420081, "learning_rate": 8.812708086828312e-06, "loss": 0.3466, "step": 3932 }, { "epoch": 0.25, "grad_norm": 3.6949532304163624, "learning_rate": 8.81204911539075e-06, "loss": 0.337, "step": 3933 }, { "epoch": 0.25, "grad_norm": 3.4190388446630333, "learning_rate": 8.8113899857845e-06, "loss": 0.3304, "step": 3934 }, { "epoch": 0.25, "grad_norm": 2.012773969913957, "learning_rate": 8.810730698036906e-06, "loss": 0.3242, "step": 3935 }, { "epoch": 0.25, "grad_norm": 3.0562977370739435, "learning_rate": 8.81007125217533e-06, "loss": 0.3797, "step": 3936 }, { "epoch": 0.25, "grad_norm": 4.130834954945286, "learning_rate": 8.809411648227129e-06, "loss": 0.3616, "step": 3937 }, { "epoch": 0.25, "grad_norm": 2.227304738078117, "learning_rate": 8.808751886219673e-06, "loss": 0.349, "step": 3938 }, { "epoch": 0.25, "grad_norm": 2.147724538836497, "learning_rate": 8.808091966180337e-06, "loss": 0.3456, "step": 3939 }, { "epoch": 0.25, "grad_norm": 1.554994194092664, "learning_rate": 8.8074318881365e-06, "loss": 0.3311, "step": 3940 }, { "epoch": 0.25, "grad_norm": 1.822323489634922, "learning_rate": 8.80677165211555e-06, "loss": 0.3178, "step": 3941 }, { "epoch": 0.25, "grad_norm": 3.60007168743747, "learning_rate": 8.806111258144885e-06, "loss": 0.3416, "step": 3942 }, { "epoch": 0.25, "grad_norm": 3.2015769905986544, "learning_rate": 8.805450706251903e-06, "loss": 0.3459, "step": 3943 }, { "epoch": 0.25, "grad_norm": 2.2836085210108785, "learning_rate": 8.804789996464012e-06, "loss": 0.3469, "step": 3944 }, { "epoch": 0.25, "grad_norm": 4.745469579523758, "learning_rate": 8.804129128808626e-06, "loss": 0.3458, "step": 3945 }, { "epoch": 0.25, "grad_norm": 1.5482381798668035, "learning_rate": 8.803468103313165e-06, "loss": 0.3205, "step": 3946 }, { "epoch": 0.25, "grad_norm": 1.9689575422993166, "learning_rate": 8.802806920005055e-06, "loss": 0.3585, "step": 3947 }, { "epoch": 0.25, "grad_norm": 2.139807856050084, "learning_rate": 8.802145578911733e-06, "loss": 0.3171, "step": 3948 }, { "epoch": 0.25, "grad_norm": 1.5883947555878855, "learning_rate": 8.801484080060638e-06, "loss": 0.3435, "step": 3949 }, { "epoch": 0.25, "grad_norm": 2.9604793929364766, "learning_rate": 8.800822423479217e-06, "loss": 0.3577, "step": 3950 }, { "epoch": 0.25, "grad_norm": 3.927768363210243, "learning_rate": 8.800160609194922e-06, "loss": 0.3428, "step": 3951 }, { "epoch": 0.25, "grad_norm": 1.8353010270690588, "learning_rate": 8.799498637235213e-06, "loss": 0.3635, "step": 3952 }, { "epoch": 0.25, "grad_norm": 2.047035962116984, "learning_rate": 8.798836507627556e-06, "loss": 0.3295, "step": 3953 }, { "epoch": 0.25, "grad_norm": 2.977250737729621, "learning_rate": 8.798174220399427e-06, "loss": 0.361, "step": 3954 }, { "epoch": 0.25, "grad_norm": 1.8191323300886468, "learning_rate": 8.7975117755783e-06, "loss": 0.322, "step": 3955 }, { "epoch": 0.25, "grad_norm": 2.2833073536674253, "learning_rate": 8.796849173191669e-06, "loss": 0.3257, "step": 3956 }, { "epoch": 0.25, "grad_norm": 2.653580360973125, "learning_rate": 8.796186413267018e-06, "loss": 0.3586, "step": 3957 }, { "epoch": 0.25, "grad_norm": 1.5959191441071698, "learning_rate": 8.79552349583185e-06, "loss": 0.3352, "step": 3958 }, { "epoch": 0.25, "grad_norm": 2.68755513942267, "learning_rate": 8.794860420913672e-06, "loss": 0.3353, "step": 3959 }, { "epoch": 0.25, "grad_norm": 2.290039827976815, "learning_rate": 8.794197188539994e-06, "loss": 0.3505, "step": 3960 }, { "epoch": 0.25, "grad_norm": 2.6982893771975944, "learning_rate": 8.793533798738335e-06, "loss": 0.3631, "step": 3961 }, { "epoch": 0.25, "grad_norm": 2.368724845872284, "learning_rate": 8.792870251536223e-06, "loss": 0.3364, "step": 3962 }, { "epoch": 0.25, "grad_norm": 2.3437852662567056, "learning_rate": 8.792206546961187e-06, "loss": 0.3339, "step": 3963 }, { "epoch": 0.25, "grad_norm": 2.379021111034225, "learning_rate": 8.791542685040764e-06, "loss": 0.331, "step": 3964 }, { "epoch": 0.25, "grad_norm": 3.468420002441763, "learning_rate": 8.7908786658025e-06, "loss": 0.3697, "step": 3965 }, { "epoch": 0.25, "grad_norm": 4.40442348480613, "learning_rate": 8.790214489273948e-06, "loss": 0.3239, "step": 3966 }, { "epoch": 0.25, "grad_norm": 2.443752448014104, "learning_rate": 8.789550155482665e-06, "loss": 0.336, "step": 3967 }, { "epoch": 0.25, "grad_norm": 5.423170706382684, "learning_rate": 8.788885664456212e-06, "loss": 0.3522, "step": 3968 }, { "epoch": 0.25, "grad_norm": 2.0159703741815638, "learning_rate": 8.788221016222167e-06, "loss": 0.327, "step": 3969 }, { "epoch": 0.25, "grad_norm": 1.413832410196219, "learning_rate": 8.787556210808101e-06, "loss": 0.3159, "step": 3970 }, { "epoch": 0.25, "grad_norm": 2.229140339814073, "learning_rate": 8.786891248241602e-06, "loss": 0.3182, "step": 3971 }, { "epoch": 0.25, "grad_norm": 1.8559889221850456, "learning_rate": 8.786226128550257e-06, "loss": 0.3232, "step": 3972 }, { "epoch": 0.25, "grad_norm": 1.7735259003246646, "learning_rate": 8.785560851761666e-06, "loss": 0.3398, "step": 3973 }, { "epoch": 0.25, "grad_norm": 3.1257374329945917, "learning_rate": 8.784895417903431e-06, "loss": 0.318, "step": 3974 }, { "epoch": 0.25, "grad_norm": 3.20304958642847, "learning_rate": 8.784229827003163e-06, "loss": 0.3598, "step": 3975 }, { "epoch": 0.25, "grad_norm": 2.8481307769447963, "learning_rate": 8.783564079088478e-06, "loss": 0.3366, "step": 3976 }, { "epoch": 0.25, "grad_norm": 2.4349804126816066, "learning_rate": 8.782898174186998e-06, "loss": 0.3247, "step": 3977 }, { "epoch": 0.25, "grad_norm": 1.6125703437228829, "learning_rate": 8.782232112326353e-06, "loss": 0.3405, "step": 3978 }, { "epoch": 0.25, "grad_norm": 2.7563070234189557, "learning_rate": 8.78156589353418e-06, "loss": 0.3517, "step": 3979 }, { "epoch": 0.25, "grad_norm": 3.158906279170565, "learning_rate": 8.780899517838122e-06, "loss": 0.3371, "step": 3980 }, { "epoch": 0.25, "grad_norm": 3.023815966204676, "learning_rate": 8.78023298526583e-06, "loss": 0.3413, "step": 3981 }, { "epoch": 0.25, "grad_norm": 1.7695617599975126, "learning_rate": 8.779566295844953e-06, "loss": 0.3358, "step": 3982 }, { "epoch": 0.25, "grad_norm": 2.8670330030267603, "learning_rate": 8.778899449603158e-06, "loss": 0.345, "step": 3983 }, { "epoch": 0.25, "grad_norm": 2.7119260387214954, "learning_rate": 8.778232446568114e-06, "loss": 0.3423, "step": 3984 }, { "epoch": 0.25, "grad_norm": 1.6557380312202785, "learning_rate": 8.777565286767493e-06, "loss": 0.3149, "step": 3985 }, { "epoch": 0.25, "grad_norm": 2.031829735179775, "learning_rate": 8.776897970228979e-06, "loss": 0.3229, "step": 3986 }, { "epoch": 0.25, "grad_norm": 1.6405112659555778, "learning_rate": 8.77623049698026e-06, "loss": 0.3264, "step": 3987 }, { "epoch": 0.25, "grad_norm": 5.650907873991567, "learning_rate": 8.775562867049033e-06, "loss": 0.3352, "step": 3988 }, { "epoch": 0.25, "grad_norm": 3.377611140084689, "learning_rate": 8.774895080462993e-06, "loss": 0.3464, "step": 3989 }, { "epoch": 0.25, "grad_norm": 2.0190881235297744, "learning_rate": 8.774227137249851e-06, "loss": 0.3336, "step": 3990 }, { "epoch": 0.25, "grad_norm": 1.8384397193429323, "learning_rate": 8.773559037437323e-06, "loss": 0.3244, "step": 3991 }, { "epoch": 0.25, "grad_norm": 3.938548809288756, "learning_rate": 8.772890781053127e-06, "loss": 0.3227, "step": 3992 }, { "epoch": 0.25, "grad_norm": 1.931703895266037, "learning_rate": 8.772222368124992e-06, "loss": 0.3223, "step": 3993 }, { "epoch": 0.25, "grad_norm": 1.6433144475496635, "learning_rate": 8.77155379868065e-06, "loss": 0.3265, "step": 3994 }, { "epoch": 0.25, "grad_norm": 2.0306364090676436, "learning_rate": 8.770885072747843e-06, "loss": 0.3393, "step": 3995 }, { "epoch": 0.25, "grad_norm": 0.7653982096852402, "learning_rate": 8.770216190354316e-06, "loss": 0.515, "step": 3996 }, { "epoch": 0.25, "grad_norm": 1.755430314122129, "learning_rate": 8.769547151527821e-06, "loss": 0.3149, "step": 3997 }, { "epoch": 0.25, "grad_norm": 2.1910043438408513, "learning_rate": 8.76887795629612e-06, "loss": 0.3399, "step": 3998 }, { "epoch": 0.25, "grad_norm": 2.1466922075478108, "learning_rate": 8.768208604686979e-06, "loss": 0.3409, "step": 3999 }, { "epoch": 0.25, "grad_norm": 2.321892658536272, "learning_rate": 8.767539096728171e-06, "loss": 0.3238, "step": 4000 }, { "epoch": 0.25, "grad_norm": 3.00755075469908, "learning_rate": 8.766869432447473e-06, "loss": 0.3231, "step": 4001 }, { "epoch": 0.25, "grad_norm": 1.4972781413131402, "learning_rate": 8.76619961187267e-06, "loss": 0.3367, "step": 4002 }, { "epoch": 0.25, "grad_norm": 5.700095470221592, "learning_rate": 8.765529635031556e-06, "loss": 0.3504, "step": 4003 }, { "epoch": 0.25, "grad_norm": 2.124431337435667, "learning_rate": 8.76485950195193e-06, "loss": 0.321, "step": 4004 }, { "epoch": 0.25, "grad_norm": 4.177660901477263, "learning_rate": 8.764189212661597e-06, "loss": 0.3545, "step": 4005 }, { "epoch": 0.25, "grad_norm": 1.9543874288575571, "learning_rate": 8.763518767188367e-06, "loss": 0.3304, "step": 4006 }, { "epoch": 0.25, "grad_norm": 2.040240741145905, "learning_rate": 8.762848165560057e-06, "loss": 0.3569, "step": 4007 }, { "epoch": 0.25, "grad_norm": 3.1462098143685076, "learning_rate": 8.762177407804495e-06, "loss": 0.3365, "step": 4008 }, { "epoch": 0.25, "grad_norm": 1.3557725363827984, "learning_rate": 8.761506493949508e-06, "loss": 0.3272, "step": 4009 }, { "epoch": 0.25, "grad_norm": 2.049154870315858, "learning_rate": 8.760835424022938e-06, "loss": 0.3453, "step": 4010 }, { "epoch": 0.25, "grad_norm": 1.909236488879323, "learning_rate": 8.760164198052625e-06, "loss": 0.3437, "step": 4011 }, { "epoch": 0.25, "grad_norm": 1.986681260659995, "learning_rate": 8.75949281606642e-06, "loss": 0.3473, "step": 4012 }, { "epoch": 0.25, "grad_norm": 2.4724205262807475, "learning_rate": 8.75882127809218e-06, "loss": 0.3649, "step": 4013 }, { "epoch": 0.25, "grad_norm": 2.3375703560766286, "learning_rate": 8.758149584157772e-06, "loss": 0.3439, "step": 4014 }, { "epoch": 0.25, "grad_norm": 0.7335786317428861, "learning_rate": 8.757477734291059e-06, "loss": 0.5261, "step": 4015 }, { "epoch": 0.25, "grad_norm": 1.69490251316695, "learning_rate": 8.756805728519922e-06, "loss": 0.3438, "step": 4016 }, { "epoch": 0.25, "grad_norm": 3.163874147734496, "learning_rate": 8.756133566872242e-06, "loss": 0.3544, "step": 4017 }, { "epoch": 0.25, "grad_norm": 1.7208233723521968, "learning_rate": 8.75546124937591e-06, "loss": 0.3189, "step": 4018 }, { "epoch": 0.25, "grad_norm": 2.8147174898790515, "learning_rate": 8.754788776058818e-06, "loss": 0.3648, "step": 4019 }, { "epoch": 0.25, "grad_norm": 1.965910224778552, "learning_rate": 8.754116146948873e-06, "loss": 0.31, "step": 4020 }, { "epoch": 0.25, "grad_norm": 3.515679447901728, "learning_rate": 8.753443362073978e-06, "loss": 0.3281, "step": 4021 }, { "epoch": 0.25, "grad_norm": 3.3864591791292553, "learning_rate": 8.752770421462053e-06, "loss": 0.3678, "step": 4022 }, { "epoch": 0.25, "grad_norm": 2.511879566043592, "learning_rate": 8.752097325141017e-06, "loss": 0.3656, "step": 4023 }, { "epoch": 0.25, "grad_norm": 2.1900113369588854, "learning_rate": 8.751424073138801e-06, "loss": 0.3317, "step": 4024 }, { "epoch": 0.25, "grad_norm": 2.9244560198108775, "learning_rate": 8.750750665483333e-06, "loss": 0.3381, "step": 4025 }, { "epoch": 0.25, "grad_norm": 1.6220897907902792, "learning_rate": 8.750077102202562e-06, "loss": 0.3326, "step": 4026 }, { "epoch": 0.25, "grad_norm": 3.205144572196654, "learning_rate": 8.749403383324428e-06, "loss": 0.3267, "step": 4027 }, { "epoch": 0.25, "grad_norm": 4.67607270989675, "learning_rate": 8.748729508876889e-06, "loss": 0.3331, "step": 4028 }, { "epoch": 0.25, "grad_norm": 0.6842191927654142, "learning_rate": 8.748055478887905e-06, "loss": 0.5013, "step": 4029 }, { "epoch": 0.25, "grad_norm": 1.4214965308143919, "learning_rate": 8.74738129338544e-06, "loss": 0.347, "step": 4030 }, { "epoch": 0.25, "grad_norm": 1.8626450857396741, "learning_rate": 8.74670695239747e-06, "loss": 0.3354, "step": 4031 }, { "epoch": 0.25, "grad_norm": 2.7129575257256846, "learning_rate": 8.746032455951972e-06, "loss": 0.3427, "step": 4032 }, { "epoch": 0.25, "grad_norm": 1.894176120050417, "learning_rate": 8.745357804076937e-06, "loss": 0.3323, "step": 4033 }, { "epoch": 0.25, "grad_norm": 1.4032652273676638, "learning_rate": 8.744682996800351e-06, "loss": 0.3469, "step": 4034 }, { "epoch": 0.25, "grad_norm": 1.8921606911624178, "learning_rate": 8.744008034150217e-06, "loss": 0.3188, "step": 4035 }, { "epoch": 0.25, "grad_norm": 1.4596220130329085, "learning_rate": 8.743332916154541e-06, "loss": 0.3198, "step": 4036 }, { "epoch": 0.25, "grad_norm": 2.6352768292601403, "learning_rate": 8.742657642841333e-06, "loss": 0.3366, "step": 4037 }, { "epoch": 0.25, "grad_norm": 1.3039448632668107, "learning_rate": 8.741982214238611e-06, "loss": 0.3437, "step": 4038 }, { "epoch": 0.25, "grad_norm": 2.0927802688428327, "learning_rate": 8.7413066303744e-06, "loss": 0.3442, "step": 4039 }, { "epoch": 0.25, "grad_norm": 1.535874207421677, "learning_rate": 8.740630891276733e-06, "loss": 0.3189, "step": 4040 }, { "epoch": 0.25, "grad_norm": 2.388702468093148, "learning_rate": 8.739954996973646e-06, "loss": 0.3353, "step": 4041 }, { "epoch": 0.25, "grad_norm": 1.6144431932867787, "learning_rate": 8.739278947493182e-06, "loss": 0.3352, "step": 4042 }, { "epoch": 0.25, "grad_norm": 6.568850958839546, "learning_rate": 8.738602742863394e-06, "loss": 0.3593, "step": 4043 }, { "epoch": 0.25, "grad_norm": 1.5634723197923737, "learning_rate": 8.737926383112336e-06, "loss": 0.3541, "step": 4044 }, { "epoch": 0.25, "grad_norm": 1.9645900527362579, "learning_rate": 8.737249868268076e-06, "loss": 0.3212, "step": 4045 }, { "epoch": 0.25, "grad_norm": 1.8098296651161077, "learning_rate": 8.736573198358677e-06, "loss": 0.3475, "step": 4046 }, { "epoch": 0.25, "grad_norm": 6.7698301785311354, "learning_rate": 8.735896373412223e-06, "loss": 0.333, "step": 4047 }, { "epoch": 0.25, "grad_norm": 2.849821522211834, "learning_rate": 8.735219393456791e-06, "loss": 0.3535, "step": 4048 }, { "epoch": 0.25, "grad_norm": 3.854147709811618, "learning_rate": 8.73454225852047e-06, "loss": 0.3459, "step": 4049 }, { "epoch": 0.25, "grad_norm": 2.4135048040720535, "learning_rate": 8.73386496863136e-06, "loss": 0.3489, "step": 4050 }, { "epoch": 0.25, "grad_norm": 2.2122085838660452, "learning_rate": 8.73318752381756e-06, "loss": 0.3629, "step": 4051 }, { "epoch": 0.25, "grad_norm": 1.5692931435847661, "learning_rate": 8.732509924107178e-06, "loss": 0.3483, "step": 4052 }, { "epoch": 0.25, "grad_norm": 4.278172906750455, "learning_rate": 8.731832169528331e-06, "loss": 0.3405, "step": 4053 }, { "epoch": 0.25, "grad_norm": 1.6080014153388025, "learning_rate": 8.731154260109137e-06, "loss": 0.3407, "step": 4054 }, { "epoch": 0.26, "grad_norm": 2.348106601953248, "learning_rate": 8.730476195877726e-06, "loss": 0.3567, "step": 4055 }, { "epoch": 0.26, "grad_norm": 3.0510569170967754, "learning_rate": 8.729797976862231e-06, "loss": 0.3328, "step": 4056 }, { "epoch": 0.26, "grad_norm": 1.5894392806231072, "learning_rate": 8.729119603090794e-06, "loss": 0.3264, "step": 4057 }, { "epoch": 0.26, "grad_norm": 2.073645775213012, "learning_rate": 8.728441074591561e-06, "loss": 0.3173, "step": 4058 }, { "epoch": 0.26, "grad_norm": 1.54799909527397, "learning_rate": 8.727762391392685e-06, "loss": 0.3499, "step": 4059 }, { "epoch": 0.26, "grad_norm": 1.4898635989281481, "learning_rate": 8.727083553522326e-06, "loss": 0.3271, "step": 4060 }, { "epoch": 0.26, "grad_norm": 1.6195957426803977, "learning_rate": 8.726404561008652e-06, "loss": 0.34, "step": 4061 }, { "epoch": 0.26, "grad_norm": 2.4569605547413564, "learning_rate": 8.725725413879833e-06, "loss": 0.3474, "step": 4062 }, { "epoch": 0.26, "grad_norm": 1.63672464437792, "learning_rate": 8.725046112164048e-06, "loss": 0.3409, "step": 4063 }, { "epoch": 0.26, "grad_norm": 2.07783384634387, "learning_rate": 8.724366655889484e-06, "loss": 0.339, "step": 4064 }, { "epoch": 0.26, "grad_norm": 2.5052048789526022, "learning_rate": 8.723687045084332e-06, "loss": 0.3447, "step": 4065 }, { "epoch": 0.26, "grad_norm": 2.0756908514939045, "learning_rate": 8.72300727977679e-06, "loss": 0.3434, "step": 4066 }, { "epoch": 0.26, "grad_norm": 1.5933030953941294, "learning_rate": 8.722327359995064e-06, "loss": 0.3338, "step": 4067 }, { "epoch": 0.26, "grad_norm": 2.953815711101553, "learning_rate": 8.721647285767364e-06, "loss": 0.3369, "step": 4068 }, { "epoch": 0.26, "grad_norm": 2.0628212915236666, "learning_rate": 8.720967057121908e-06, "loss": 0.3524, "step": 4069 }, { "epoch": 0.26, "grad_norm": 3.1094032642905916, "learning_rate": 8.720286674086919e-06, "loss": 0.3364, "step": 4070 }, { "epoch": 0.26, "grad_norm": 16.29791883779948, "learning_rate": 8.719606136690628e-06, "loss": 0.335, "step": 4071 }, { "epoch": 0.26, "grad_norm": 1.9666908533427165, "learning_rate": 8.718925444961273e-06, "loss": 0.3611, "step": 4072 }, { "epoch": 0.26, "grad_norm": 3.5340981209677524, "learning_rate": 8.718244598927095e-06, "loss": 0.361, "step": 4073 }, { "epoch": 0.26, "grad_norm": 1.5327835452053369, "learning_rate": 8.717563598616342e-06, "loss": 0.3152, "step": 4074 }, { "epoch": 0.26, "grad_norm": 1.8868073105187637, "learning_rate": 8.716882444057275e-06, "loss": 0.356, "step": 4075 }, { "epoch": 0.26, "grad_norm": 3.0230935669880217, "learning_rate": 8.716201135278152e-06, "loss": 0.3268, "step": 4076 }, { "epoch": 0.26, "grad_norm": 2.0560875190386207, "learning_rate": 8.715519672307245e-06, "loss": 0.3375, "step": 4077 }, { "epoch": 0.26, "grad_norm": 2.0900475278362407, "learning_rate": 8.714838055172825e-06, "loss": 0.3439, "step": 4078 }, { "epoch": 0.26, "grad_norm": 2.0781631564196195, "learning_rate": 8.714156283903177e-06, "loss": 0.3391, "step": 4079 }, { "epoch": 0.26, "grad_norm": 2.270042035954629, "learning_rate": 8.713474358526588e-06, "loss": 0.332, "step": 4080 }, { "epoch": 0.26, "grad_norm": 1.7474073994956407, "learning_rate": 8.712792279071351e-06, "loss": 0.3173, "step": 4081 }, { "epoch": 0.26, "grad_norm": 1.6378876500682436, "learning_rate": 8.712110045565768e-06, "loss": 0.3154, "step": 4082 }, { "epoch": 0.26, "grad_norm": 2.4541175416986114, "learning_rate": 8.711427658038146e-06, "loss": 0.3295, "step": 4083 }, { "epoch": 0.26, "grad_norm": 1.7196100230078464, "learning_rate": 8.710745116516798e-06, "loss": 0.351, "step": 4084 }, { "epoch": 0.26, "grad_norm": 2.3541906105401913, "learning_rate": 8.710062421030043e-06, "loss": 0.3602, "step": 4085 }, { "epoch": 0.26, "grad_norm": 2.1103349288170503, "learning_rate": 8.709379571606211e-06, "loss": 0.3392, "step": 4086 }, { "epoch": 0.26, "grad_norm": 2.676024096888773, "learning_rate": 8.708696568273631e-06, "loss": 0.3126, "step": 4087 }, { "epoch": 0.26, "grad_norm": 6.769694033779992, "learning_rate": 8.708013411060642e-06, "loss": 0.3628, "step": 4088 }, { "epoch": 0.26, "grad_norm": 5.932547398377358, "learning_rate": 8.70733009999559e-06, "loss": 0.3451, "step": 4089 }, { "epoch": 0.26, "grad_norm": 4.156018069717261, "learning_rate": 8.706646635106828e-06, "loss": 0.3339, "step": 4090 }, { "epoch": 0.26, "grad_norm": 1.480207759245538, "learning_rate": 8.705963016422713e-06, "loss": 0.3145, "step": 4091 }, { "epoch": 0.26, "grad_norm": 1.9827084793169916, "learning_rate": 8.705279243971612e-06, "loss": 0.3582, "step": 4092 }, { "epoch": 0.26, "grad_norm": 1.8554133354964357, "learning_rate": 8.704595317781892e-06, "loss": 0.3173, "step": 4093 }, { "epoch": 0.26, "grad_norm": 1.399401452220164, "learning_rate": 8.703911237881932e-06, "loss": 0.3295, "step": 4094 }, { "epoch": 0.26, "grad_norm": 1.775914267033628, "learning_rate": 8.703227004300117e-06, "loss": 0.3348, "step": 4095 }, { "epoch": 0.26, "grad_norm": 1.7040139261096963, "learning_rate": 8.702542617064835e-06, "loss": 0.3288, "step": 4096 }, { "epoch": 0.26, "grad_norm": 2.0923667917542046, "learning_rate": 8.701858076204484e-06, "loss": 0.3464, "step": 4097 }, { "epoch": 0.26, "grad_norm": 1.7426022535152126, "learning_rate": 8.701173381747465e-06, "loss": 0.3281, "step": 4098 }, { "epoch": 0.26, "grad_norm": 1.7515229375498405, "learning_rate": 8.700488533722189e-06, "loss": 0.3328, "step": 4099 }, { "epoch": 0.26, "grad_norm": 3.783669089310867, "learning_rate": 8.699803532157071e-06, "loss": 0.3502, "step": 4100 }, { "epoch": 0.26, "grad_norm": 2.3042027517051666, "learning_rate": 8.699118377080534e-06, "loss": 0.3361, "step": 4101 }, { "epoch": 0.26, "grad_norm": 2.1278033478288436, "learning_rate": 8.698433068521005e-06, "loss": 0.3432, "step": 4102 }, { "epoch": 0.26, "grad_norm": 1.7118264209442484, "learning_rate": 8.697747606506917e-06, "loss": 0.355, "step": 4103 }, { "epoch": 0.26, "grad_norm": 1.8498136601044612, "learning_rate": 8.697061991066712e-06, "loss": 0.3486, "step": 4104 }, { "epoch": 0.26, "grad_norm": 6.732509917874962, "learning_rate": 8.696376222228841e-06, "loss": 0.3322, "step": 4105 }, { "epoch": 0.26, "grad_norm": 2.3786558315606947, "learning_rate": 8.695690300021755e-06, "loss": 0.3463, "step": 4106 }, { "epoch": 0.26, "grad_norm": 1.6395221922436516, "learning_rate": 8.695004224473912e-06, "loss": 0.3354, "step": 4107 }, { "epoch": 0.26, "grad_norm": 1.2973049883443966, "learning_rate": 8.69431799561378e-06, "loss": 0.3417, "step": 4108 }, { "epoch": 0.26, "grad_norm": 1.8760467049793943, "learning_rate": 8.693631613469837e-06, "loss": 0.325, "step": 4109 }, { "epoch": 0.26, "grad_norm": 2.086697428287031, "learning_rate": 8.692945078070555e-06, "loss": 0.3413, "step": 4110 }, { "epoch": 0.26, "grad_norm": 2.7362402259724723, "learning_rate": 8.692258389444422e-06, "loss": 0.3261, "step": 4111 }, { "epoch": 0.26, "grad_norm": 1.7640477171220377, "learning_rate": 8.69157154761993e-06, "loss": 0.3454, "step": 4112 }, { "epoch": 0.26, "grad_norm": 2.701332166606028, "learning_rate": 8.690884552625579e-06, "loss": 0.3194, "step": 4113 }, { "epoch": 0.26, "grad_norm": 2.1595312033463734, "learning_rate": 8.690197404489871e-06, "loss": 0.3383, "step": 4114 }, { "epoch": 0.26, "grad_norm": 7.520544858801723, "learning_rate": 8.689510103241318e-06, "loss": 0.3359, "step": 4115 }, { "epoch": 0.26, "grad_norm": 1.5493797661696136, "learning_rate": 8.68882264890844e-06, "loss": 0.3332, "step": 4116 }, { "epoch": 0.26, "grad_norm": 3.3290404208472024, "learning_rate": 8.688135041519756e-06, "loss": 0.358, "step": 4117 }, { "epoch": 0.26, "grad_norm": 2.1260846658752963, "learning_rate": 8.687447281103799e-06, "loss": 0.3466, "step": 4118 }, { "epoch": 0.26, "grad_norm": 2.3654767099177816, "learning_rate": 8.686759367689102e-06, "loss": 0.3299, "step": 4119 }, { "epoch": 0.26, "grad_norm": 1.6931618242551096, "learning_rate": 8.686071301304212e-06, "loss": 0.3394, "step": 4120 }, { "epoch": 0.26, "grad_norm": 0.6070622475886372, "learning_rate": 8.685383081977678e-06, "loss": 0.5224, "step": 4121 }, { "epoch": 0.26, "grad_norm": 1.9085627802921634, "learning_rate": 8.684694709738053e-06, "loss": 0.3409, "step": 4122 }, { "epoch": 0.26, "grad_norm": 1.823691904252609, "learning_rate": 8.6840061846139e-06, "loss": 0.331, "step": 4123 }, { "epoch": 0.26, "grad_norm": 1.55624305014329, "learning_rate": 8.683317506633787e-06, "loss": 0.3309, "step": 4124 }, { "epoch": 0.26, "grad_norm": 2.531029991410397, "learning_rate": 8.682628675826288e-06, "loss": 0.3762, "step": 4125 }, { "epoch": 0.26, "grad_norm": 1.6466895504634198, "learning_rate": 8.681939692219984e-06, "loss": 0.314, "step": 4126 }, { "epoch": 0.26, "grad_norm": 4.205852777188346, "learning_rate": 8.681250555843462e-06, "loss": 0.3475, "step": 4127 }, { "epoch": 0.26, "grad_norm": 5.253520849435823, "learning_rate": 8.680561266725316e-06, "loss": 0.3345, "step": 4128 }, { "epoch": 0.26, "grad_norm": 2.7651739332864027, "learning_rate": 8.679871824894146e-06, "loss": 0.3313, "step": 4129 }, { "epoch": 0.26, "grad_norm": 3.2964360891628264, "learning_rate": 8.679182230378558e-06, "loss": 0.3585, "step": 4130 }, { "epoch": 0.26, "grad_norm": 0.6070789575432157, "learning_rate": 8.678492483207163e-06, "loss": 0.4925, "step": 4131 }, { "epoch": 0.26, "grad_norm": 3.0468637162024867, "learning_rate": 8.677802583408583e-06, "loss": 0.366, "step": 4132 }, { "epoch": 0.26, "grad_norm": 3.2241862210350836, "learning_rate": 8.67711253101144e-06, "loss": 0.3831, "step": 4133 }, { "epoch": 0.26, "grad_norm": 3.990513516603685, "learning_rate": 8.676422326044366e-06, "loss": 0.353, "step": 4134 }, { "epoch": 0.26, "grad_norm": 2.21598960991638, "learning_rate": 8.675731968536004e-06, "loss": 0.3336, "step": 4135 }, { "epoch": 0.26, "grad_norm": 1.687426845091582, "learning_rate": 8.67504145851499e-06, "loss": 0.3341, "step": 4136 }, { "epoch": 0.26, "grad_norm": 2.5886330691574577, "learning_rate": 8.674350796009979e-06, "loss": 0.3206, "step": 4137 }, { "epoch": 0.26, "grad_norm": 2.1784018677232813, "learning_rate": 8.673659981049627e-06, "loss": 0.341, "step": 4138 }, { "epoch": 0.26, "grad_norm": 1.8005510641457785, "learning_rate": 8.672969013662597e-06, "loss": 0.3098, "step": 4139 }, { "epoch": 0.26, "grad_norm": 1.9983621755265903, "learning_rate": 8.67227789387756e-06, "loss": 0.3355, "step": 4140 }, { "epoch": 0.26, "grad_norm": 0.6094853938636869, "learning_rate": 8.671586621723186e-06, "loss": 0.4963, "step": 4141 }, { "epoch": 0.26, "grad_norm": 1.8654529172301353, "learning_rate": 8.670895197228166e-06, "loss": 0.3246, "step": 4142 }, { "epoch": 0.26, "grad_norm": 3.5184671360323554, "learning_rate": 8.670203620421183e-06, "loss": 0.3675, "step": 4143 }, { "epoch": 0.26, "grad_norm": 1.8400839084450171, "learning_rate": 8.669511891330935e-06, "loss": 0.3382, "step": 4144 }, { "epoch": 0.26, "grad_norm": 2.6928610919616545, "learning_rate": 8.668820009986116e-06, "loss": 0.3207, "step": 4145 }, { "epoch": 0.26, "grad_norm": 1.825620037479355, "learning_rate": 8.668127976415442e-06, "loss": 0.3346, "step": 4146 }, { "epoch": 0.26, "grad_norm": 1.4993675507647217, "learning_rate": 8.667435790647623e-06, "loss": 0.3233, "step": 4147 }, { "epoch": 0.26, "grad_norm": 2.683817712185597, "learning_rate": 8.666743452711377e-06, "loss": 0.3343, "step": 4148 }, { "epoch": 0.26, "grad_norm": 2.2707252503978688, "learning_rate": 8.666050962635433e-06, "loss": 0.3503, "step": 4149 }, { "epoch": 0.26, "grad_norm": 2.765477733405839, "learning_rate": 8.665358320448522e-06, "loss": 0.3297, "step": 4150 }, { "epoch": 0.26, "grad_norm": 3.764676704257325, "learning_rate": 8.664665526179385e-06, "loss": 0.321, "step": 4151 }, { "epoch": 0.26, "grad_norm": 1.4444340401680011, "learning_rate": 8.663972579856767e-06, "loss": 0.3356, "step": 4152 }, { "epoch": 0.26, "grad_norm": 2.2214300610166555, "learning_rate": 8.663279481509418e-06, "loss": 0.3376, "step": 4153 }, { "epoch": 0.26, "grad_norm": 2.5631225824079764, "learning_rate": 8.662586231166095e-06, "loss": 0.3251, "step": 4154 }, { "epoch": 0.26, "grad_norm": 1.829923085490244, "learning_rate": 8.661892828855565e-06, "loss": 0.3238, "step": 4155 }, { "epoch": 0.26, "grad_norm": 2.807054781236766, "learning_rate": 8.661199274606597e-06, "loss": 0.32, "step": 4156 }, { "epoch": 0.26, "grad_norm": 2.2096343244508474, "learning_rate": 8.66050556844797e-06, "loss": 0.3481, "step": 4157 }, { "epoch": 0.26, "grad_norm": 1.4568628902542375, "learning_rate": 8.659811710408464e-06, "loss": 0.3326, "step": 4158 }, { "epoch": 0.26, "grad_norm": 2.070347369135558, "learning_rate": 8.659117700516869e-06, "loss": 0.3417, "step": 4159 }, { "epoch": 0.26, "grad_norm": 1.7880604823176935, "learning_rate": 8.65842353880198e-06, "loss": 0.3596, "step": 4160 }, { "epoch": 0.26, "grad_norm": 1.8212989764591963, "learning_rate": 8.657729225292601e-06, "loss": 0.3374, "step": 4161 }, { "epoch": 0.26, "grad_norm": 2.3296034565064767, "learning_rate": 8.657034760017542e-06, "loss": 0.3242, "step": 4162 }, { "epoch": 0.26, "grad_norm": 2.671318243491196, "learning_rate": 8.656340143005613e-06, "loss": 0.343, "step": 4163 }, { "epoch": 0.26, "grad_norm": 1.5460127523977807, "learning_rate": 8.655645374285637e-06, "loss": 0.3277, "step": 4164 }, { "epoch": 0.26, "grad_norm": 2.5970223843268796, "learning_rate": 8.654950453886443e-06, "loss": 0.3515, "step": 4165 }, { "epoch": 0.26, "grad_norm": 1.4468730029500596, "learning_rate": 8.65425538183686e-06, "loss": 0.3288, "step": 4166 }, { "epoch": 0.26, "grad_norm": 2.317300326240315, "learning_rate": 8.653560158165732e-06, "loss": 0.3548, "step": 4167 }, { "epoch": 0.26, "grad_norm": 1.5643651382379886, "learning_rate": 8.652864782901904e-06, "loss": 0.3272, "step": 4168 }, { "epoch": 0.26, "grad_norm": 2.4015717564371584, "learning_rate": 8.652169256074228e-06, "loss": 0.3353, "step": 4169 }, { "epoch": 0.26, "grad_norm": 2.0544115789224655, "learning_rate": 8.651473577711562e-06, "loss": 0.3255, "step": 4170 }, { "epoch": 0.26, "grad_norm": 1.794751494229252, "learning_rate": 8.650777747842772e-06, "loss": 0.3206, "step": 4171 }, { "epoch": 0.26, "grad_norm": 2.483213570874632, "learning_rate": 8.65008176649673e-06, "loss": 0.3298, "step": 4172 }, { "epoch": 0.26, "grad_norm": 2.244015847694425, "learning_rate": 8.64938563370231e-06, "loss": 0.3627, "step": 4173 }, { "epoch": 0.26, "grad_norm": 1.5315204706072374, "learning_rate": 8.648689349488398e-06, "loss": 0.3274, "step": 4174 }, { "epoch": 0.26, "grad_norm": 2.0745904701481184, "learning_rate": 8.647992913883885e-06, "loss": 0.3253, "step": 4175 }, { "epoch": 0.26, "grad_norm": 1.9800660707088678, "learning_rate": 8.647296326917667e-06, "loss": 0.3457, "step": 4176 }, { "epoch": 0.26, "grad_norm": 2.9515521489351753, "learning_rate": 8.646599588618645e-06, "loss": 0.3346, "step": 4177 }, { "epoch": 0.26, "grad_norm": 1.9156005196757473, "learning_rate": 8.645902699015729e-06, "loss": 0.3266, "step": 4178 }, { "epoch": 0.26, "grad_norm": 1.4545223038385888, "learning_rate": 8.645205658137834e-06, "loss": 0.3307, "step": 4179 }, { "epoch": 0.26, "grad_norm": 2.0370436764504043, "learning_rate": 8.644508466013884e-06, "loss": 0.3282, "step": 4180 }, { "epoch": 0.26, "grad_norm": 3.0859785489733227, "learning_rate": 8.6438111226728e-06, "loss": 0.3272, "step": 4181 }, { "epoch": 0.26, "grad_norm": 2.0441706463861964, "learning_rate": 8.643113628143524e-06, "loss": 0.3334, "step": 4182 }, { "epoch": 0.26, "grad_norm": 0.6773270507292503, "learning_rate": 8.64241598245499e-06, "loss": 0.4833, "step": 4183 }, { "epoch": 0.26, "grad_norm": 2.2394590502165745, "learning_rate": 8.64171818563615e-06, "loss": 0.3534, "step": 4184 }, { "epoch": 0.26, "grad_norm": 2.302595319260231, "learning_rate": 8.641020237715953e-06, "loss": 0.3266, "step": 4185 }, { "epoch": 0.26, "grad_norm": 1.8034574015766647, "learning_rate": 8.640322138723357e-06, "loss": 0.3289, "step": 4186 }, { "epoch": 0.26, "grad_norm": 2.699338701431057, "learning_rate": 8.639623888687335e-06, "loss": 0.3412, "step": 4187 }, { "epoch": 0.26, "grad_norm": 3.8050538947985593, "learning_rate": 8.638925487636847e-06, "loss": 0.3431, "step": 4188 }, { "epoch": 0.26, "grad_norm": 2.361753746662144, "learning_rate": 8.638226935600881e-06, "loss": 0.3219, "step": 4189 }, { "epoch": 0.26, "grad_norm": 0.6258431357704101, "learning_rate": 8.637528232608415e-06, "loss": 0.5307, "step": 4190 }, { "epoch": 0.26, "grad_norm": 1.9084641772079904, "learning_rate": 8.636829378688443e-06, "loss": 0.3455, "step": 4191 }, { "epoch": 0.26, "grad_norm": 2.213081098267743, "learning_rate": 8.63613037386996e-06, "loss": 0.3346, "step": 4192 }, { "epoch": 0.26, "grad_norm": 1.9435176600534825, "learning_rate": 8.635431218181969e-06, "loss": 0.3315, "step": 4193 }, { "epoch": 0.26, "grad_norm": 2.360284991032729, "learning_rate": 8.63473191165348e-06, "loss": 0.3636, "step": 4194 }, { "epoch": 0.26, "grad_norm": 1.9172780629234418, "learning_rate": 8.634032454313507e-06, "loss": 0.3324, "step": 4195 }, { "epoch": 0.26, "grad_norm": 1.4100594044399137, "learning_rate": 8.633332846191074e-06, "loss": 0.3392, "step": 4196 }, { "epoch": 0.26, "grad_norm": 3.5594277567110817, "learning_rate": 8.632633087315207e-06, "loss": 0.3202, "step": 4197 }, { "epoch": 0.26, "grad_norm": 2.2788181000457977, "learning_rate": 8.631933177714942e-06, "loss": 0.3098, "step": 4198 }, { "epoch": 0.26, "grad_norm": 3.1170957465696976, "learning_rate": 8.631233117419317e-06, "loss": 0.3259, "step": 4199 }, { "epoch": 0.26, "grad_norm": 3.3692229325064487, "learning_rate": 8.630532906457381e-06, "loss": 0.3363, "step": 4200 }, { "epoch": 0.26, "grad_norm": 1.6538040463328463, "learning_rate": 8.629832544858186e-06, "loss": 0.3235, "step": 4201 }, { "epoch": 0.26, "grad_norm": 2.110178494042075, "learning_rate": 8.62913203265079e-06, "loss": 0.338, "step": 4202 }, { "epoch": 0.26, "grad_norm": 2.633671884984901, "learning_rate": 8.628431369864262e-06, "loss": 0.3267, "step": 4203 }, { "epoch": 0.26, "grad_norm": 1.4854503434641324, "learning_rate": 8.62773055652767e-06, "loss": 0.3124, "step": 4204 }, { "epoch": 0.26, "grad_norm": 1.7229283162260696, "learning_rate": 8.627029592670093e-06, "loss": 0.325, "step": 4205 }, { "epoch": 0.26, "grad_norm": 1.5823103223056096, "learning_rate": 8.626328478320617e-06, "loss": 0.3279, "step": 4206 }, { "epoch": 0.26, "grad_norm": 2.4211654887395215, "learning_rate": 8.625627213508332e-06, "loss": 0.333, "step": 4207 }, { "epoch": 0.26, "grad_norm": 2.332135911111867, "learning_rate": 8.624925798262335e-06, "loss": 0.371, "step": 4208 }, { "epoch": 0.26, "grad_norm": 1.8388218575572008, "learning_rate": 8.624224232611726e-06, "loss": 0.332, "step": 4209 }, { "epoch": 0.26, "grad_norm": 1.8924836693249167, "learning_rate": 8.623522516585618e-06, "loss": 0.324, "step": 4210 }, { "epoch": 0.26, "grad_norm": 2.458712333451677, "learning_rate": 8.622820650213122e-06, "loss": 0.3298, "step": 4211 }, { "epoch": 0.26, "grad_norm": 1.7460136933060542, "learning_rate": 8.622118633523364e-06, "loss": 0.3249, "step": 4212 }, { "epoch": 0.26, "grad_norm": 1.5571048992183727, "learning_rate": 8.621416466545472e-06, "loss": 0.3261, "step": 4213 }, { "epoch": 0.27, "grad_norm": 2.219814818474756, "learning_rate": 8.620714149308575e-06, "loss": 0.3262, "step": 4214 }, { "epoch": 0.27, "grad_norm": 2.5310169645285376, "learning_rate": 8.62001168184182e-06, "loss": 0.3346, "step": 4215 }, { "epoch": 0.27, "grad_norm": 3.0374915820909085, "learning_rate": 8.619309064174349e-06, "loss": 0.3604, "step": 4216 }, { "epoch": 0.27, "grad_norm": 3.4260085703202003, "learning_rate": 8.618606296335314e-06, "loss": 0.3424, "step": 4217 }, { "epoch": 0.27, "grad_norm": 2.2833963435369493, "learning_rate": 8.61790337835388e-06, "loss": 0.3269, "step": 4218 }, { "epoch": 0.27, "grad_norm": 1.4732389178520906, "learning_rate": 8.617200310259206e-06, "loss": 0.3445, "step": 4219 }, { "epoch": 0.27, "grad_norm": 1.580656104579461, "learning_rate": 8.616497092080469e-06, "loss": 0.3287, "step": 4220 }, { "epoch": 0.27, "grad_norm": 1.5432302970182048, "learning_rate": 8.615793723846842e-06, "loss": 0.3295, "step": 4221 }, { "epoch": 0.27, "grad_norm": 2.9865712153793176, "learning_rate": 8.615090205587513e-06, "loss": 0.353, "step": 4222 }, { "epoch": 0.27, "grad_norm": 0.682272812913298, "learning_rate": 8.614386537331667e-06, "loss": 0.4891, "step": 4223 }, { "epoch": 0.27, "grad_norm": 2.4364191794279937, "learning_rate": 8.613682719108507e-06, "loss": 0.3552, "step": 4224 }, { "epoch": 0.27, "grad_norm": 1.985457930299239, "learning_rate": 8.612978750947232e-06, "loss": 0.3408, "step": 4225 }, { "epoch": 0.27, "grad_norm": 1.666392022632377, "learning_rate": 8.61227463287705e-06, "loss": 0.3176, "step": 4226 }, { "epoch": 0.27, "grad_norm": 2.0680902210910026, "learning_rate": 8.611570364927178e-06, "loss": 0.31, "step": 4227 }, { "epoch": 0.27, "grad_norm": 2.3050551553530574, "learning_rate": 8.610865947126836e-06, "loss": 0.3189, "step": 4228 }, { "epoch": 0.27, "grad_norm": 1.90337570792768, "learning_rate": 8.610161379505253e-06, "loss": 0.3434, "step": 4229 }, { "epoch": 0.27, "grad_norm": 1.771438139668405, "learning_rate": 8.60945666209166e-06, "loss": 0.34, "step": 4230 }, { "epoch": 0.27, "grad_norm": 1.3298283604768903, "learning_rate": 8.6087517949153e-06, "loss": 0.3202, "step": 4231 }, { "epoch": 0.27, "grad_norm": 2.916247704002772, "learning_rate": 8.608046778005419e-06, "loss": 0.3261, "step": 4232 }, { "epoch": 0.27, "grad_norm": 2.030769209932224, "learning_rate": 8.607341611391268e-06, "loss": 0.3622, "step": 4233 }, { "epoch": 0.27, "grad_norm": 2.1697410164179995, "learning_rate": 8.606636295102106e-06, "loss": 0.3252, "step": 4234 }, { "epoch": 0.27, "grad_norm": 5.605232603316513, "learning_rate": 8.6059308291672e-06, "loss": 0.3363, "step": 4235 }, { "epoch": 0.27, "grad_norm": 1.6585366486403808, "learning_rate": 8.605225213615818e-06, "loss": 0.3221, "step": 4236 }, { "epoch": 0.27, "grad_norm": 2.2632132517410937, "learning_rate": 8.604519448477236e-06, "loss": 0.3261, "step": 4237 }, { "epoch": 0.27, "grad_norm": 1.7575344780553146, "learning_rate": 8.603813533780742e-06, "loss": 0.3425, "step": 4238 }, { "epoch": 0.27, "grad_norm": 1.8349045762388945, "learning_rate": 8.603107469555623e-06, "loss": 0.3225, "step": 4239 }, { "epoch": 0.27, "grad_norm": 2.4451097932181565, "learning_rate": 8.602401255831176e-06, "loss": 0.3448, "step": 4240 }, { "epoch": 0.27, "grad_norm": 2.2508351057350167, "learning_rate": 8.601694892636701e-06, "loss": 0.3168, "step": 4241 }, { "epoch": 0.27, "grad_norm": 1.6972953706707048, "learning_rate": 8.600988380001508e-06, "loss": 0.3391, "step": 4242 }, { "epoch": 0.27, "grad_norm": 4.1794147541088265, "learning_rate": 8.60028171795491e-06, "loss": 0.3366, "step": 4243 }, { "epoch": 0.27, "grad_norm": 1.5023694633565843, "learning_rate": 8.599574906526231e-06, "loss": 0.3464, "step": 4244 }, { "epoch": 0.27, "grad_norm": 1.6034641172287127, "learning_rate": 8.598867945744794e-06, "loss": 0.3569, "step": 4245 }, { "epoch": 0.27, "grad_norm": 1.6657183866249199, "learning_rate": 8.598160835639935e-06, "loss": 0.3222, "step": 4246 }, { "epoch": 0.27, "grad_norm": 1.5283025124717415, "learning_rate": 8.59745357624099e-06, "loss": 0.3339, "step": 4247 }, { "epoch": 0.27, "grad_norm": 1.2966176158873703, "learning_rate": 8.596746167577308e-06, "loss": 0.3289, "step": 4248 }, { "epoch": 0.27, "grad_norm": 2.5372664240922145, "learning_rate": 8.596038609678236e-06, "loss": 0.3139, "step": 4249 }, { "epoch": 0.27, "grad_norm": 2.3840010217867644, "learning_rate": 8.595330902573137e-06, "loss": 0.305, "step": 4250 }, { "epoch": 0.27, "grad_norm": 1.6570800328309143, "learning_rate": 8.594623046291374e-06, "loss": 0.3161, "step": 4251 }, { "epoch": 0.27, "grad_norm": 2.392855303032146, "learning_rate": 8.593915040862315e-06, "loss": 0.3046, "step": 4252 }, { "epoch": 0.27, "grad_norm": 4.553084091586555, "learning_rate": 8.593206886315338e-06, "loss": 0.3393, "step": 4253 }, { "epoch": 0.27, "grad_norm": 1.7687996769929684, "learning_rate": 8.592498582679823e-06, "loss": 0.347, "step": 4254 }, { "epoch": 0.27, "grad_norm": 1.7340705972518944, "learning_rate": 8.591790129985162e-06, "loss": 0.3371, "step": 4255 }, { "epoch": 0.27, "grad_norm": 1.3751645187296844, "learning_rate": 8.59108152826075e-06, "loss": 0.3306, "step": 4256 }, { "epoch": 0.27, "grad_norm": 2.558167259182883, "learning_rate": 8.590372777535984e-06, "loss": 0.3582, "step": 4257 }, { "epoch": 0.27, "grad_norm": 1.9325602192645501, "learning_rate": 8.589663877840278e-06, "loss": 0.3659, "step": 4258 }, { "epoch": 0.27, "grad_norm": 0.7099919053117987, "learning_rate": 8.588954829203039e-06, "loss": 0.4824, "step": 4259 }, { "epoch": 0.27, "grad_norm": 1.894137370774947, "learning_rate": 8.58824563165369e-06, "loss": 0.3661, "step": 4260 }, { "epoch": 0.27, "grad_norm": 1.7094474668222037, "learning_rate": 8.587536285221656e-06, "loss": 0.3231, "step": 4261 }, { "epoch": 0.27, "grad_norm": 1.6262659477981198, "learning_rate": 8.58682678993637e-06, "loss": 0.3357, "step": 4262 }, { "epoch": 0.27, "grad_norm": 2.1584387794679487, "learning_rate": 8.58611714582727e-06, "loss": 0.3448, "step": 4263 }, { "epoch": 0.27, "grad_norm": 6.5630222924968535, "learning_rate": 8.585407352923799e-06, "loss": 0.3483, "step": 4264 }, { "epoch": 0.27, "grad_norm": 1.9131127117402358, "learning_rate": 8.584697411255409e-06, "loss": 0.3447, "step": 4265 }, { "epoch": 0.27, "grad_norm": 7.8341610832471895, "learning_rate": 8.583987320851556e-06, "loss": 0.3329, "step": 4266 }, { "epoch": 0.27, "grad_norm": 3.3496653343170317, "learning_rate": 8.583277081741703e-06, "loss": 0.3336, "step": 4267 }, { "epoch": 0.27, "grad_norm": 1.607165837648559, "learning_rate": 8.58256669395532e-06, "loss": 0.3509, "step": 4268 }, { "epoch": 0.27, "grad_norm": 1.9243460639183332, "learning_rate": 8.581856157521882e-06, "loss": 0.3428, "step": 4269 }, { "epoch": 0.27, "grad_norm": 1.80555944204013, "learning_rate": 8.58114547247087e-06, "loss": 0.3495, "step": 4270 }, { "epoch": 0.27, "grad_norm": 1.2423985483323439, "learning_rate": 8.580434638831769e-06, "loss": 0.3277, "step": 4271 }, { "epoch": 0.27, "grad_norm": 1.5004579684570956, "learning_rate": 8.579723656634077e-06, "loss": 0.3033, "step": 4272 }, { "epoch": 0.27, "grad_norm": 1.5256845265497303, "learning_rate": 8.579012525907292e-06, "loss": 0.3342, "step": 4273 }, { "epoch": 0.27, "grad_norm": 1.6350629540576957, "learning_rate": 8.57830124668092e-06, "loss": 0.3616, "step": 4274 }, { "epoch": 0.27, "grad_norm": 2.07337339387284, "learning_rate": 8.577589818984473e-06, "loss": 0.3241, "step": 4275 }, { "epoch": 0.27, "grad_norm": 1.4436272941552504, "learning_rate": 8.576878242847472e-06, "loss": 0.327, "step": 4276 }, { "epoch": 0.27, "grad_norm": 1.5680174375865998, "learning_rate": 8.576166518299438e-06, "loss": 0.3427, "step": 4277 }, { "epoch": 0.27, "grad_norm": 1.9773442544469557, "learning_rate": 8.575454645369904e-06, "loss": 0.3532, "step": 4278 }, { "epoch": 0.27, "grad_norm": 1.9291989336041662, "learning_rate": 8.574742624088403e-06, "loss": 0.3336, "step": 4279 }, { "epoch": 0.27, "grad_norm": 2.1602198116166584, "learning_rate": 8.574030454484486e-06, "loss": 0.3252, "step": 4280 }, { "epoch": 0.27, "grad_norm": 1.8307352894774007, "learning_rate": 8.573318136587694e-06, "loss": 0.3255, "step": 4281 }, { "epoch": 0.27, "grad_norm": 1.9561684405910387, "learning_rate": 8.572605670427584e-06, "loss": 0.3391, "step": 4282 }, { "epoch": 0.27, "grad_norm": 1.6164287395728145, "learning_rate": 8.571893056033722e-06, "loss": 0.3404, "step": 4283 }, { "epoch": 0.27, "grad_norm": 1.8667852039769386, "learning_rate": 8.571180293435673e-06, "loss": 0.3273, "step": 4284 }, { "epoch": 0.27, "grad_norm": 1.3175344965314466, "learning_rate": 8.570467382663009e-06, "loss": 0.3239, "step": 4285 }, { "epoch": 0.27, "grad_norm": 7.789391909186617, "learning_rate": 8.569754323745311e-06, "loss": 0.3325, "step": 4286 }, { "epoch": 0.27, "grad_norm": 1.674364036570318, "learning_rate": 8.569041116712168e-06, "loss": 0.3414, "step": 4287 }, { "epoch": 0.27, "grad_norm": 1.8334698952624648, "learning_rate": 8.568327761593169e-06, "loss": 0.3276, "step": 4288 }, { "epoch": 0.27, "grad_norm": 1.4038681936410702, "learning_rate": 8.567614258417911e-06, "loss": 0.3397, "step": 4289 }, { "epoch": 0.27, "grad_norm": 1.2040393515936567, "learning_rate": 8.566900607216001e-06, "loss": 0.3368, "step": 4290 }, { "epoch": 0.27, "grad_norm": 1.4904611649416002, "learning_rate": 8.566186808017052e-06, "loss": 0.33, "step": 4291 }, { "epoch": 0.27, "grad_norm": 2.1832334506308713, "learning_rate": 8.565472860850675e-06, "loss": 0.3358, "step": 4292 }, { "epoch": 0.27, "grad_norm": 1.5032988899297923, "learning_rate": 8.564758765746499e-06, "loss": 0.3702, "step": 4293 }, { "epoch": 0.27, "grad_norm": 3.8313876251831345, "learning_rate": 8.564044522734147e-06, "loss": 0.3223, "step": 4294 }, { "epoch": 0.27, "grad_norm": 1.8462600345655764, "learning_rate": 8.563330131843259e-06, "loss": 0.3358, "step": 4295 }, { "epoch": 0.27, "grad_norm": 3.173645721342238, "learning_rate": 8.562615593103474e-06, "loss": 0.3745, "step": 4296 }, { "epoch": 0.27, "grad_norm": 1.6391342795380124, "learning_rate": 8.56190090654444e-06, "loss": 0.3325, "step": 4297 }, { "epoch": 0.27, "grad_norm": 1.4289099612583376, "learning_rate": 8.561186072195812e-06, "loss": 0.3388, "step": 4298 }, { "epoch": 0.27, "grad_norm": 1.2782260343317648, "learning_rate": 8.560471090087247e-06, "loss": 0.3629, "step": 4299 }, { "epoch": 0.27, "grad_norm": 1.204730944478129, "learning_rate": 8.559755960248414e-06, "loss": 0.3572, "step": 4300 }, { "epoch": 0.27, "grad_norm": 1.948083818207941, "learning_rate": 8.559040682708982e-06, "loss": 0.3281, "step": 4301 }, { "epoch": 0.27, "grad_norm": 2.274866949173534, "learning_rate": 8.558325257498632e-06, "loss": 0.3469, "step": 4302 }, { "epoch": 0.27, "grad_norm": 1.2081337728099282, "learning_rate": 8.557609684647045e-06, "loss": 0.3526, "step": 4303 }, { "epoch": 0.27, "grad_norm": 1.9668900755299428, "learning_rate": 8.556893964183913e-06, "loss": 0.3348, "step": 4304 }, { "epoch": 0.27, "grad_norm": 1.424996006234006, "learning_rate": 8.556178096138933e-06, "loss": 0.3227, "step": 4305 }, { "epoch": 0.27, "grad_norm": 1.4852727013421994, "learning_rate": 8.555462080541809e-06, "loss": 0.3303, "step": 4306 }, { "epoch": 0.27, "grad_norm": 1.5745261843264677, "learning_rate": 8.554745917422247e-06, "loss": 0.3625, "step": 4307 }, { "epoch": 0.27, "grad_norm": 1.7543657370706096, "learning_rate": 8.554029606809962e-06, "loss": 0.3403, "step": 4308 }, { "epoch": 0.27, "grad_norm": 1.6253439350883727, "learning_rate": 8.553313148734676e-06, "loss": 0.3302, "step": 4309 }, { "epoch": 0.27, "grad_norm": 1.8840392671545938, "learning_rate": 8.552596543226118e-06, "loss": 0.3388, "step": 4310 }, { "epoch": 0.27, "grad_norm": 3.295478319147859, "learning_rate": 8.551879790314018e-06, "loss": 0.3543, "step": 4311 }, { "epoch": 0.27, "grad_norm": 1.2614115819624998, "learning_rate": 8.551162890028118e-06, "loss": 0.3293, "step": 4312 }, { "epoch": 0.27, "grad_norm": 9.88470133706015, "learning_rate": 8.55044584239816e-06, "loss": 0.3529, "step": 4313 }, { "epoch": 0.27, "grad_norm": 1.4376100337365636, "learning_rate": 8.5497286474539e-06, "loss": 0.3379, "step": 4314 }, { "epoch": 0.27, "grad_norm": 2.1295369507884203, "learning_rate": 8.54901130522509e-06, "loss": 0.354, "step": 4315 }, { "epoch": 0.27, "grad_norm": 1.5413742819786713, "learning_rate": 8.5482938157415e-06, "loss": 0.3353, "step": 4316 }, { "epoch": 0.27, "grad_norm": 2.1450275082854895, "learning_rate": 8.547576179032896e-06, "loss": 0.3509, "step": 4317 }, { "epoch": 0.27, "grad_norm": 4.768370691279072, "learning_rate": 8.546858395129055e-06, "loss": 0.3514, "step": 4318 }, { "epoch": 0.27, "grad_norm": 1.2362246613404333, "learning_rate": 8.54614046405976e-06, "loss": 0.3244, "step": 4319 }, { "epoch": 0.27, "grad_norm": 1.1640030660592486, "learning_rate": 8.545422385854798e-06, "loss": 0.3587, "step": 4320 }, { "epoch": 0.27, "grad_norm": 1.5859332241452266, "learning_rate": 8.544704160543964e-06, "loss": 0.3406, "step": 4321 }, { "epoch": 0.27, "grad_norm": 2.7424619456056605, "learning_rate": 8.543985788157058e-06, "loss": 0.3434, "step": 4322 }, { "epoch": 0.27, "grad_norm": 2.3040625766961984, "learning_rate": 8.543267268723887e-06, "loss": 0.3342, "step": 4323 }, { "epoch": 0.27, "grad_norm": 1.7915454490679616, "learning_rate": 8.542548602274262e-06, "loss": 0.3421, "step": 4324 }, { "epoch": 0.27, "grad_norm": 2.5245026450484587, "learning_rate": 8.541829788838007e-06, "loss": 0.3603, "step": 4325 }, { "epoch": 0.27, "grad_norm": 1.8410130517323098, "learning_rate": 8.54111082844494e-06, "loss": 0.3339, "step": 4326 }, { "epoch": 0.27, "grad_norm": 1.4665300091391573, "learning_rate": 8.540391721124897e-06, "loss": 0.3287, "step": 4327 }, { "epoch": 0.27, "grad_norm": 1.9614137067521844, "learning_rate": 8.53967246690771e-06, "loss": 0.3522, "step": 4328 }, { "epoch": 0.27, "grad_norm": 1.169946975148502, "learning_rate": 8.538953065823229e-06, "loss": 0.3423, "step": 4329 }, { "epoch": 0.27, "grad_norm": 1.4254970564272986, "learning_rate": 8.538233517901298e-06, "loss": 0.335, "step": 4330 }, { "epoch": 0.27, "grad_norm": 1.8760149928771839, "learning_rate": 8.537513823171773e-06, "loss": 0.3336, "step": 4331 }, { "epoch": 0.27, "grad_norm": 1.527033180686907, "learning_rate": 8.536793981664515e-06, "loss": 0.3316, "step": 4332 }, { "epoch": 0.27, "grad_norm": 1.9056575167826721, "learning_rate": 8.536073993409394e-06, "loss": 0.3512, "step": 4333 }, { "epoch": 0.27, "grad_norm": 1.7523770080774033, "learning_rate": 8.535353858436284e-06, "loss": 0.3435, "step": 4334 }, { "epoch": 0.27, "grad_norm": 1.4986535924131184, "learning_rate": 8.534633576775059e-06, "loss": 0.326, "step": 4335 }, { "epoch": 0.27, "grad_norm": 2.1885721986240636, "learning_rate": 8.533913148455613e-06, "loss": 0.3488, "step": 4336 }, { "epoch": 0.27, "grad_norm": 1.4865190194148143, "learning_rate": 8.533192573507831e-06, "loss": 0.3345, "step": 4337 }, { "epoch": 0.27, "grad_norm": 1.7807621071293849, "learning_rate": 8.532471851961612e-06, "loss": 0.3287, "step": 4338 }, { "epoch": 0.27, "grad_norm": 1.7371039780349717, "learning_rate": 8.531750983846864e-06, "loss": 0.347, "step": 4339 }, { "epoch": 0.27, "grad_norm": 0.6273966515263565, "learning_rate": 8.531029969193494e-06, "loss": 0.5233, "step": 4340 }, { "epoch": 0.27, "grad_norm": 1.9254934974362559, "learning_rate": 8.53030880803142e-06, "loss": 0.3349, "step": 4341 }, { "epoch": 0.27, "grad_norm": 1.726887305529337, "learning_rate": 8.529587500390561e-06, "loss": 0.358, "step": 4342 }, { "epoch": 0.27, "grad_norm": 1.7184910973631984, "learning_rate": 8.528866046300848e-06, "loss": 0.3321, "step": 4343 }, { "epoch": 0.27, "grad_norm": 1.7275129845163628, "learning_rate": 8.528144445792215e-06, "loss": 0.3351, "step": 4344 }, { "epoch": 0.27, "grad_norm": 1.5879895951632277, "learning_rate": 8.527422698894602e-06, "loss": 0.3278, "step": 4345 }, { "epoch": 0.27, "grad_norm": 3.0130951180578815, "learning_rate": 8.526700805637955e-06, "loss": 0.327, "step": 4346 }, { "epoch": 0.27, "grad_norm": 2.2594256716898915, "learning_rate": 8.52597876605223e-06, "loss": 0.3464, "step": 4347 }, { "epoch": 0.27, "grad_norm": 1.8173665610318812, "learning_rate": 8.52525658016738e-06, "loss": 0.3337, "step": 4348 }, { "epoch": 0.27, "grad_norm": 1.1933980716705372, "learning_rate": 8.524534248013374e-06, "loss": 0.3506, "step": 4349 }, { "epoch": 0.27, "grad_norm": 1.4328666112013297, "learning_rate": 8.523811769620183e-06, "loss": 0.3288, "step": 4350 }, { "epoch": 0.27, "grad_norm": 1.3862717693388513, "learning_rate": 8.523089145017781e-06, "loss": 0.3382, "step": 4351 }, { "epoch": 0.27, "grad_norm": 2.963060191769809, "learning_rate": 8.522366374236155e-06, "loss": 0.3407, "step": 4352 }, { "epoch": 0.27, "grad_norm": 1.375648340144036, "learning_rate": 8.52164345730529e-06, "loss": 0.3235, "step": 4353 }, { "epoch": 0.27, "grad_norm": 1.759064297095795, "learning_rate": 8.520920394255183e-06, "loss": 0.3283, "step": 4354 }, { "epoch": 0.27, "grad_norm": 2.989751854975891, "learning_rate": 8.520197185115836e-06, "loss": 0.3171, "step": 4355 }, { "epoch": 0.27, "grad_norm": 1.5708895020208051, "learning_rate": 8.519473829917254e-06, "loss": 0.3305, "step": 4356 }, { "epoch": 0.27, "grad_norm": 1.4383389833553923, "learning_rate": 8.518750328689452e-06, "loss": 0.3189, "step": 4357 }, { "epoch": 0.27, "grad_norm": 1.702322153472481, "learning_rate": 8.518026681462448e-06, "loss": 0.3633, "step": 4358 }, { "epoch": 0.27, "grad_norm": 1.1957765031845529, "learning_rate": 8.517302888266268e-06, "loss": 0.3427, "step": 4359 }, { "epoch": 0.27, "grad_norm": 1.7601932345222768, "learning_rate": 8.516578949130945e-06, "loss": 0.3347, "step": 4360 }, { "epoch": 0.27, "grad_norm": 3.01378365039657, "learning_rate": 8.515854864086514e-06, "loss": 0.346, "step": 4361 }, { "epoch": 0.27, "grad_norm": 2.3238848278812276, "learning_rate": 8.515130633163023e-06, "loss": 0.3405, "step": 4362 }, { "epoch": 0.27, "grad_norm": 1.4612797367053143, "learning_rate": 8.514406256390514e-06, "loss": 0.365, "step": 4363 }, { "epoch": 0.27, "grad_norm": 1.3676256160402094, "learning_rate": 8.513681733799051e-06, "loss": 0.3427, "step": 4364 }, { "epoch": 0.27, "grad_norm": 1.7017726229022991, "learning_rate": 8.51295706541869e-06, "loss": 0.3312, "step": 4365 }, { "epoch": 0.27, "grad_norm": 1.8504650548271053, "learning_rate": 8.5122322512795e-06, "loss": 0.3181, "step": 4366 }, { "epoch": 0.27, "grad_norm": 1.6175004487616216, "learning_rate": 8.511507291411559e-06, "loss": 0.3372, "step": 4367 }, { "epoch": 0.27, "grad_norm": 1.6717481403154533, "learning_rate": 8.51078218584494e-06, "loss": 0.3453, "step": 4368 }, { "epoch": 0.27, "grad_norm": 1.6367362300887454, "learning_rate": 8.510056934609731e-06, "loss": 0.3267, "step": 4369 }, { "epoch": 0.27, "grad_norm": 1.6889658034505874, "learning_rate": 8.509331537736027e-06, "loss": 0.3549, "step": 4370 }, { "epoch": 0.27, "grad_norm": 1.563582268903867, "learning_rate": 8.508605995253925e-06, "loss": 0.3461, "step": 4371 }, { "epoch": 0.27, "grad_norm": 1.6291706586706887, "learning_rate": 8.507880307193528e-06, "loss": 0.3227, "step": 4372 }, { "epoch": 0.28, "grad_norm": 5.3164096190998, "learning_rate": 8.507154473584947e-06, "loss": 0.3685, "step": 4373 }, { "epoch": 0.28, "grad_norm": 1.4672278234014753, "learning_rate": 8.506428494458295e-06, "loss": 0.3194, "step": 4374 }, { "epoch": 0.28, "grad_norm": 1.4037526339260669, "learning_rate": 8.505702369843698e-06, "loss": 0.3379, "step": 4375 }, { "epoch": 0.28, "grad_norm": 2.6108254330532286, "learning_rate": 8.504976099771282e-06, "loss": 0.3407, "step": 4376 }, { "epoch": 0.28, "grad_norm": 3.0522815552892464, "learning_rate": 8.504249684271184e-06, "loss": 0.3404, "step": 4377 }, { "epoch": 0.28, "grad_norm": 1.5535046442865097, "learning_rate": 8.503523123373542e-06, "loss": 0.3355, "step": 4378 }, { "epoch": 0.28, "grad_norm": 1.3438725864520216, "learning_rate": 8.502796417108502e-06, "loss": 0.329, "step": 4379 }, { "epoch": 0.28, "grad_norm": 2.2135373884053635, "learning_rate": 8.502069565506217e-06, "loss": 0.3317, "step": 4380 }, { "epoch": 0.28, "grad_norm": 1.7996234124505242, "learning_rate": 8.501342568596845e-06, "loss": 0.3336, "step": 4381 }, { "epoch": 0.28, "grad_norm": 2.8120541851369305, "learning_rate": 8.500615426410552e-06, "loss": 0.379, "step": 4382 }, { "epoch": 0.28, "grad_norm": 1.9061961758448398, "learning_rate": 8.499888138977505e-06, "loss": 0.3278, "step": 4383 }, { "epoch": 0.28, "grad_norm": 1.535295646925185, "learning_rate": 8.499160706327885e-06, "loss": 0.3321, "step": 4384 }, { "epoch": 0.28, "grad_norm": 1.6314075826180614, "learning_rate": 8.498433128491871e-06, "loss": 0.3036, "step": 4385 }, { "epoch": 0.28, "grad_norm": 1.3899585689416163, "learning_rate": 8.497705405499653e-06, "loss": 0.3316, "step": 4386 }, { "epoch": 0.28, "grad_norm": 1.8082256460455335, "learning_rate": 8.496977537381423e-06, "loss": 0.3213, "step": 4387 }, { "epoch": 0.28, "grad_norm": 1.6111633913066594, "learning_rate": 8.496249524167385e-06, "loss": 0.3156, "step": 4388 }, { "epoch": 0.28, "grad_norm": 2.178875953257289, "learning_rate": 8.495521365887746e-06, "loss": 0.3249, "step": 4389 }, { "epoch": 0.28, "grad_norm": 1.5600057353396926, "learning_rate": 8.494793062572715e-06, "loss": 0.3251, "step": 4390 }, { "epoch": 0.28, "grad_norm": 1.9363086132080793, "learning_rate": 8.494064614252514e-06, "loss": 0.3483, "step": 4391 }, { "epoch": 0.28, "grad_norm": 2.3219061346519227, "learning_rate": 8.493336020957363e-06, "loss": 0.3385, "step": 4392 }, { "epoch": 0.28, "grad_norm": 2.0627756771022723, "learning_rate": 8.492607282717498e-06, "loss": 0.3138, "step": 4393 }, { "epoch": 0.28, "grad_norm": 1.969644771274343, "learning_rate": 8.491878399563152e-06, "loss": 0.3509, "step": 4394 }, { "epoch": 0.28, "grad_norm": 1.7271320488528492, "learning_rate": 8.49114937152457e-06, "loss": 0.3435, "step": 4395 }, { "epoch": 0.28, "grad_norm": 2.002131941452337, "learning_rate": 8.490420198631998e-06, "loss": 0.3385, "step": 4396 }, { "epoch": 0.28, "grad_norm": 1.077766615899552, "learning_rate": 8.489690880915695e-06, "loss": 0.3174, "step": 4397 }, { "epoch": 0.28, "grad_norm": 1.4175821939526254, "learning_rate": 8.488961418405917e-06, "loss": 0.3143, "step": 4398 }, { "epoch": 0.28, "grad_norm": 1.373658890823445, "learning_rate": 8.488231811132932e-06, "loss": 0.357, "step": 4399 }, { "epoch": 0.28, "grad_norm": 2.7707450262173765, "learning_rate": 8.487502059127015e-06, "loss": 0.3439, "step": 4400 }, { "epoch": 0.28, "grad_norm": 1.3140756892535392, "learning_rate": 8.486772162418442e-06, "loss": 0.3364, "step": 4401 }, { "epoch": 0.28, "grad_norm": 2.2274575926710707, "learning_rate": 8.4860421210375e-06, "loss": 0.3485, "step": 4402 }, { "epoch": 0.28, "grad_norm": 1.1946148108466015, "learning_rate": 8.485311935014478e-06, "loss": 0.3448, "step": 4403 }, { "epoch": 0.28, "grad_norm": 1.2386776861371314, "learning_rate": 8.484581604379673e-06, "loss": 0.3226, "step": 4404 }, { "epoch": 0.28, "grad_norm": 2.1940307130584027, "learning_rate": 8.483851129163388e-06, "loss": 0.3419, "step": 4405 }, { "epoch": 0.28, "grad_norm": 1.4218911382822264, "learning_rate": 8.48312050939593e-06, "loss": 0.3458, "step": 4406 }, { "epoch": 0.28, "grad_norm": 1.4026977993571956, "learning_rate": 8.482389745107618e-06, "loss": 0.3425, "step": 4407 }, { "epoch": 0.28, "grad_norm": 2.7591858574730552, "learning_rate": 8.481658836328767e-06, "loss": 0.333, "step": 4408 }, { "epoch": 0.28, "grad_norm": 1.3768237191404413, "learning_rate": 8.48092778308971e-06, "loss": 0.3452, "step": 4409 }, { "epoch": 0.28, "grad_norm": 1.1607715160729817, "learning_rate": 8.480196585420775e-06, "loss": 0.3401, "step": 4410 }, { "epoch": 0.28, "grad_norm": 1.7690587304651568, "learning_rate": 8.479465243352303e-06, "loss": 0.3353, "step": 4411 }, { "epoch": 0.28, "grad_norm": 1.166355849091709, "learning_rate": 8.478733756914636e-06, "loss": 0.3216, "step": 4412 }, { "epoch": 0.28, "grad_norm": 1.2161506734271486, "learning_rate": 8.478002126138127e-06, "loss": 0.3247, "step": 4413 }, { "epoch": 0.28, "grad_norm": 1.3162179304770918, "learning_rate": 8.477270351053133e-06, "loss": 0.3306, "step": 4414 }, { "epoch": 0.28, "grad_norm": 1.4929854712675306, "learning_rate": 8.476538431690016e-06, "loss": 0.3497, "step": 4415 }, { "epoch": 0.28, "grad_norm": 19.624411338106835, "learning_rate": 8.475806368079144e-06, "loss": 0.3609, "step": 4416 }, { "epoch": 0.28, "grad_norm": 2.0073075734938985, "learning_rate": 8.475074160250892e-06, "loss": 0.3313, "step": 4417 }, { "epoch": 0.28, "grad_norm": 2.807099711074169, "learning_rate": 8.47434180823564e-06, "loss": 0.3398, "step": 4418 }, { "epoch": 0.28, "grad_norm": 2.677998276150886, "learning_rate": 8.473609312063778e-06, "loss": 0.3462, "step": 4419 }, { "epoch": 0.28, "grad_norm": 0.6659747390409879, "learning_rate": 8.472876671765692e-06, "loss": 0.489, "step": 4420 }, { "epoch": 0.28, "grad_norm": 2.037584135778137, "learning_rate": 8.472143887371786e-06, "loss": 0.3379, "step": 4421 }, { "epoch": 0.28, "grad_norm": 1.3806910811268274, "learning_rate": 8.471410958912463e-06, "loss": 0.3233, "step": 4422 }, { "epoch": 0.28, "grad_norm": 2.0217306989118162, "learning_rate": 8.470677886418135e-06, "loss": 0.3474, "step": 4423 }, { "epoch": 0.28, "grad_norm": 1.211865032210044, "learning_rate": 8.469944669919214e-06, "loss": 0.3365, "step": 4424 }, { "epoch": 0.28, "grad_norm": 2.0957457196553895, "learning_rate": 8.469211309446127e-06, "loss": 0.3423, "step": 4425 }, { "epoch": 0.28, "grad_norm": 1.9589516548469754, "learning_rate": 8.4684778050293e-06, "loss": 0.3364, "step": 4426 }, { "epoch": 0.28, "grad_norm": 2.0751073588242526, "learning_rate": 8.467744156699168e-06, "loss": 0.3377, "step": 4427 }, { "epoch": 0.28, "grad_norm": 2.164582665735496, "learning_rate": 8.467010364486173e-06, "loss": 0.3701, "step": 4428 }, { "epoch": 0.28, "grad_norm": 11.773615112194289, "learning_rate": 8.466276428420759e-06, "loss": 0.3499, "step": 4429 }, { "epoch": 0.28, "grad_norm": 1.7825839864747932, "learning_rate": 8.465542348533379e-06, "loss": 0.3574, "step": 4430 }, { "epoch": 0.28, "grad_norm": 3.2084379741261175, "learning_rate": 8.46480812485449e-06, "loss": 0.35, "step": 4431 }, { "epoch": 0.28, "grad_norm": 1.4643439280425021, "learning_rate": 8.464073757414561e-06, "loss": 0.3522, "step": 4432 }, { "epoch": 0.28, "grad_norm": 1.6935562458211901, "learning_rate": 8.463339246244058e-06, "loss": 0.3346, "step": 4433 }, { "epoch": 0.28, "grad_norm": 1.6989172639998948, "learning_rate": 8.462604591373457e-06, "loss": 0.3388, "step": 4434 }, { "epoch": 0.28, "grad_norm": 1.6921131120928505, "learning_rate": 8.46186979283324e-06, "loss": 0.3521, "step": 4435 }, { "epoch": 0.28, "grad_norm": 2.552014840242945, "learning_rate": 8.461134850653899e-06, "loss": 0.3172, "step": 4436 }, { "epoch": 0.28, "grad_norm": 1.5128879381537257, "learning_rate": 8.460399764865923e-06, "loss": 0.353, "step": 4437 }, { "epoch": 0.28, "grad_norm": 2.2140103876396258, "learning_rate": 8.459664535499816e-06, "loss": 0.3549, "step": 4438 }, { "epoch": 0.28, "grad_norm": 2.9802234604045736, "learning_rate": 8.458929162586083e-06, "loss": 0.3498, "step": 4439 }, { "epoch": 0.28, "grad_norm": 5.301930046058223, "learning_rate": 8.458193646155234e-06, "loss": 0.329, "step": 4440 }, { "epoch": 0.28, "grad_norm": 2.6136415581442636, "learning_rate": 8.457457986237788e-06, "loss": 0.3502, "step": 4441 }, { "epoch": 0.28, "grad_norm": 2.7302452323933113, "learning_rate": 8.45672218286427e-06, "loss": 0.3241, "step": 4442 }, { "epoch": 0.28, "grad_norm": 1.1475670132876745, "learning_rate": 8.455986236065208e-06, "loss": 0.3252, "step": 4443 }, { "epoch": 0.28, "grad_norm": 1.6465254320379765, "learning_rate": 8.455250145871139e-06, "loss": 0.3287, "step": 4444 }, { "epoch": 0.28, "grad_norm": 2.2861135255516976, "learning_rate": 8.454513912312605e-06, "loss": 0.3169, "step": 4445 }, { "epoch": 0.28, "grad_norm": 0.697120502963351, "learning_rate": 8.45377753542015e-06, "loss": 0.5319, "step": 4446 }, { "epoch": 0.28, "grad_norm": 1.733200793446351, "learning_rate": 8.453041015224332e-06, "loss": 0.3337, "step": 4447 }, { "epoch": 0.28, "grad_norm": 2.060444362366062, "learning_rate": 8.452304351755712e-06, "loss": 0.3254, "step": 4448 }, { "epoch": 0.28, "grad_norm": 2.8920764037027373, "learning_rate": 8.451567545044848e-06, "loss": 0.3392, "step": 4449 }, { "epoch": 0.28, "grad_norm": 1.7831363366263777, "learning_rate": 8.450830595122317e-06, "loss": 0.3381, "step": 4450 }, { "epoch": 0.28, "grad_norm": 1.7005389347084314, "learning_rate": 8.450093502018698e-06, "loss": 0.3388, "step": 4451 }, { "epoch": 0.28, "grad_norm": 1.8978674480258144, "learning_rate": 8.449356265764568e-06, "loss": 0.3497, "step": 4452 }, { "epoch": 0.28, "grad_norm": 0.6128982561238903, "learning_rate": 8.448618886390523e-06, "loss": 0.4854, "step": 4453 }, { "epoch": 0.28, "grad_norm": 2.225443077703998, "learning_rate": 8.447881363927152e-06, "loss": 0.3379, "step": 4454 }, { "epoch": 0.28, "grad_norm": 2.170752349368643, "learning_rate": 8.44714369840506e-06, "loss": 0.3485, "step": 4455 }, { "epoch": 0.28, "grad_norm": 2.2547032767916018, "learning_rate": 8.446405889854853e-06, "loss": 0.3316, "step": 4456 }, { "epoch": 0.28, "grad_norm": 1.396362027733872, "learning_rate": 8.445667938307145e-06, "loss": 0.355, "step": 4457 }, { "epoch": 0.28, "grad_norm": 1.902622725375012, "learning_rate": 8.444929843792554e-06, "loss": 0.3245, "step": 4458 }, { "epoch": 0.28, "grad_norm": 2.0892517371336243, "learning_rate": 8.444191606341705e-06, "loss": 0.3295, "step": 4459 }, { "epoch": 0.28, "grad_norm": 1.5573086843510329, "learning_rate": 8.443453225985227e-06, "loss": 0.3189, "step": 4460 }, { "epoch": 0.28, "grad_norm": 2.786192565503805, "learning_rate": 8.442714702753761e-06, "loss": 0.3289, "step": 4461 }, { "epoch": 0.28, "grad_norm": 2.9314382992937276, "learning_rate": 8.441976036677945e-06, "loss": 0.3311, "step": 4462 }, { "epoch": 0.28, "grad_norm": 2.4805191730640384, "learning_rate": 8.44123722778843e-06, "loss": 0.3388, "step": 4463 }, { "epoch": 0.28, "grad_norm": 1.4171308586400473, "learning_rate": 8.440498276115872e-06, "loss": 0.3672, "step": 4464 }, { "epoch": 0.28, "grad_norm": 2.083731638138706, "learning_rate": 8.439759181690927e-06, "loss": 0.3624, "step": 4465 }, { "epoch": 0.28, "grad_norm": 1.6896070284168334, "learning_rate": 8.439019944544265e-06, "loss": 0.338, "step": 4466 }, { "epoch": 0.28, "grad_norm": 1.5516054145198155, "learning_rate": 8.438280564706556e-06, "loss": 0.3368, "step": 4467 }, { "epoch": 0.28, "grad_norm": 1.3380291500414585, "learning_rate": 8.43754104220848e-06, "loss": 0.3423, "step": 4468 }, { "epoch": 0.28, "grad_norm": 1.5818858495087924, "learning_rate": 8.436801377080721e-06, "loss": 0.3434, "step": 4469 }, { "epoch": 0.28, "grad_norm": 2.4457063362733797, "learning_rate": 8.436061569353969e-06, "loss": 0.3159, "step": 4470 }, { "epoch": 0.28, "grad_norm": 2.4635786151827817, "learning_rate": 8.435321619058918e-06, "loss": 0.3433, "step": 4471 }, { "epoch": 0.28, "grad_norm": 1.6632148533614326, "learning_rate": 8.434581526226271e-06, "loss": 0.3433, "step": 4472 }, { "epoch": 0.28, "grad_norm": 1.4451986013557694, "learning_rate": 8.433841290886736e-06, "loss": 0.3227, "step": 4473 }, { "epoch": 0.28, "grad_norm": 3.4790957748402054, "learning_rate": 8.433100913071028e-06, "loss": 0.3395, "step": 4474 }, { "epoch": 0.28, "grad_norm": 3.3330617974772014, "learning_rate": 8.432360392809864e-06, "loss": 0.3293, "step": 4475 }, { "epoch": 0.28, "grad_norm": 1.413761591379426, "learning_rate": 8.43161973013397e-06, "loss": 0.3207, "step": 4476 }, { "epoch": 0.28, "grad_norm": 1.3253103720450454, "learning_rate": 8.43087892507408e-06, "loss": 0.3223, "step": 4477 }, { "epoch": 0.28, "grad_norm": 2.140728020517338, "learning_rate": 8.430137977660929e-06, "loss": 0.3436, "step": 4478 }, { "epoch": 0.28, "grad_norm": 1.7925152385170382, "learning_rate": 8.42939688792526e-06, "loss": 0.3324, "step": 4479 }, { "epoch": 0.28, "grad_norm": 1.8727619116462806, "learning_rate": 8.428655655897824e-06, "loss": 0.3391, "step": 4480 }, { "epoch": 0.28, "grad_norm": 1.9297848501336063, "learning_rate": 8.427914281609374e-06, "loss": 0.3289, "step": 4481 }, { "epoch": 0.28, "grad_norm": 2.4249350042801914, "learning_rate": 8.427172765090671e-06, "loss": 0.3563, "step": 4482 }, { "epoch": 0.28, "grad_norm": 1.4973898360340145, "learning_rate": 8.426431106372484e-06, "loss": 0.3346, "step": 4483 }, { "epoch": 0.28, "grad_norm": 1.7919917592138357, "learning_rate": 8.425689305485583e-06, "loss": 0.3486, "step": 4484 }, { "epoch": 0.28, "grad_norm": 2.976785259701706, "learning_rate": 8.42494736246075e-06, "loss": 0.3702, "step": 4485 }, { "epoch": 0.28, "grad_norm": 1.6446864876745837, "learning_rate": 8.424205277328767e-06, "loss": 0.3395, "step": 4486 }, { "epoch": 0.28, "grad_norm": 1.5223096088696004, "learning_rate": 8.423463050120424e-06, "loss": 0.338, "step": 4487 }, { "epoch": 0.28, "grad_norm": 0.6865376594309599, "learning_rate": 8.42272068086652e-06, "loss": 0.4876, "step": 4488 }, { "epoch": 0.28, "grad_norm": 1.3421517139266632, "learning_rate": 8.421978169597853e-06, "loss": 0.3248, "step": 4489 }, { "epoch": 0.28, "grad_norm": 1.213398892885125, "learning_rate": 8.421235516345236e-06, "loss": 0.3262, "step": 4490 }, { "epoch": 0.28, "grad_norm": 2.924517519833465, "learning_rate": 8.420492721139479e-06, "loss": 0.3204, "step": 4491 }, { "epoch": 0.28, "grad_norm": 1.6575116442764894, "learning_rate": 8.419749784011405e-06, "loss": 0.3299, "step": 4492 }, { "epoch": 0.28, "grad_norm": 1.7710530905246775, "learning_rate": 8.419006704991837e-06, "loss": 0.3431, "step": 4493 }, { "epoch": 0.28, "grad_norm": 2.0894328764514416, "learning_rate": 8.418263484111607e-06, "loss": 0.325, "step": 4494 }, { "epoch": 0.28, "grad_norm": 1.4823357952923764, "learning_rate": 8.417520121401555e-06, "loss": 0.3408, "step": 4495 }, { "epoch": 0.28, "grad_norm": 0.6770118194333938, "learning_rate": 8.416776616892523e-06, "loss": 0.529, "step": 4496 }, { "epoch": 0.28, "grad_norm": 9.061292293556901, "learning_rate": 8.41603297061536e-06, "loss": 0.3535, "step": 4497 }, { "epoch": 0.28, "grad_norm": 1.3675176565008405, "learning_rate": 8.41528918260092e-06, "loss": 0.3406, "step": 4498 }, { "epoch": 0.28, "grad_norm": 1.8532972918923218, "learning_rate": 8.414545252880069e-06, "loss": 0.3355, "step": 4499 }, { "epoch": 0.28, "grad_norm": 1.5945330744361292, "learning_rate": 8.413801181483668e-06, "loss": 0.3247, "step": 4500 }, { "epoch": 0.28, "grad_norm": 1.7164676116388484, "learning_rate": 8.413056968442593e-06, "loss": 0.3302, "step": 4501 }, { "epoch": 0.28, "grad_norm": 0.5914714367032117, "learning_rate": 8.412312613787723e-06, "loss": 0.5142, "step": 4502 }, { "epoch": 0.28, "grad_norm": 1.4003420472063788, "learning_rate": 8.41156811754994e-06, "loss": 0.3173, "step": 4503 }, { "epoch": 0.28, "grad_norm": 1.867307296426739, "learning_rate": 8.410823479760138e-06, "loss": 0.3393, "step": 4504 }, { "epoch": 0.28, "grad_norm": 2.0443264810913835, "learning_rate": 8.41007870044921e-06, "loss": 0.3329, "step": 4505 }, { "epoch": 0.28, "grad_norm": 1.9652394315038801, "learning_rate": 8.40933377964806e-06, "loss": 0.3539, "step": 4506 }, { "epoch": 0.28, "grad_norm": 1.792371914748029, "learning_rate": 8.408588717387595e-06, "loss": 0.3289, "step": 4507 }, { "epoch": 0.28, "grad_norm": 1.5403269130661204, "learning_rate": 8.407843513698732e-06, "loss": 0.3216, "step": 4508 }, { "epoch": 0.28, "grad_norm": 1.8494122686288157, "learning_rate": 8.407098168612388e-06, "loss": 0.3396, "step": 4509 }, { "epoch": 0.28, "grad_norm": 2.0371687097009716, "learning_rate": 8.406352682159488e-06, "loss": 0.3519, "step": 4510 }, { "epoch": 0.28, "grad_norm": 1.395283305635252, "learning_rate": 8.405607054370967e-06, "loss": 0.3355, "step": 4511 }, { "epoch": 0.28, "grad_norm": 1.936582424310372, "learning_rate": 8.404861285277759e-06, "loss": 0.3329, "step": 4512 }, { "epoch": 0.28, "grad_norm": 2.1771448147845263, "learning_rate": 8.404115374910808e-06, "loss": 0.3288, "step": 4513 }, { "epoch": 0.28, "grad_norm": 2.045448180983775, "learning_rate": 8.403369323301065e-06, "loss": 0.3564, "step": 4514 }, { "epoch": 0.28, "grad_norm": 1.7095027800576776, "learning_rate": 8.402623130479482e-06, "loss": 0.3402, "step": 4515 }, { "epoch": 0.28, "grad_norm": 1.582270828514155, "learning_rate": 8.401876796477023e-06, "loss": 0.348, "step": 4516 }, { "epoch": 0.28, "grad_norm": 2.1942599791844697, "learning_rate": 8.401130321324653e-06, "loss": 0.3301, "step": 4517 }, { "epoch": 0.28, "grad_norm": 1.6912184946799662, "learning_rate": 8.400383705053344e-06, "loss": 0.3518, "step": 4518 }, { "epoch": 0.28, "grad_norm": 3.1691049921985823, "learning_rate": 8.399636947694077e-06, "loss": 0.3298, "step": 4519 }, { "epoch": 0.28, "grad_norm": 2.4106364768480293, "learning_rate": 8.398890049277834e-06, "loss": 0.35, "step": 4520 }, { "epoch": 0.28, "grad_norm": 2.3155101896571524, "learning_rate": 8.398143009835604e-06, "loss": 0.3349, "step": 4521 }, { "epoch": 0.28, "grad_norm": 2.6146989377182868, "learning_rate": 8.397395829398387e-06, "loss": 0.3234, "step": 4522 }, { "epoch": 0.28, "grad_norm": 3.8812126832136733, "learning_rate": 8.396648507997181e-06, "loss": 0.3219, "step": 4523 }, { "epoch": 0.28, "grad_norm": 3.6515107372210474, "learning_rate": 8.395901045662995e-06, "loss": 0.334, "step": 4524 }, { "epoch": 0.28, "grad_norm": 2.862319841649697, "learning_rate": 8.395153442426844e-06, "loss": 0.3393, "step": 4525 }, { "epoch": 0.28, "grad_norm": 2.00465234726739, "learning_rate": 8.394405698319748e-06, "loss": 0.3186, "step": 4526 }, { "epoch": 0.28, "grad_norm": 3.657619312354189, "learning_rate": 8.393657813372728e-06, "loss": 0.336, "step": 4527 }, { "epoch": 0.28, "grad_norm": 0.7031161656919498, "learning_rate": 8.392909787616817e-06, "loss": 0.5135, "step": 4528 }, { "epoch": 0.28, "grad_norm": 1.9786773654926382, "learning_rate": 8.392161621083054e-06, "loss": 0.3445, "step": 4529 }, { "epoch": 0.28, "grad_norm": 1.810524317888919, "learning_rate": 8.39141331380248e-06, "loss": 0.3348, "step": 4530 }, { "epoch": 0.28, "grad_norm": 2.2980793145349176, "learning_rate": 8.390664865806142e-06, "loss": 0.3429, "step": 4531 }, { "epoch": 0.29, "grad_norm": 2.032304141132441, "learning_rate": 8.389916277125099e-06, "loss": 0.3271, "step": 4532 }, { "epoch": 0.29, "grad_norm": 1.605666291927062, "learning_rate": 8.389167547790407e-06, "loss": 0.3182, "step": 4533 }, { "epoch": 0.29, "grad_norm": 1.7435515785285134, "learning_rate": 8.388418677833132e-06, "loss": 0.3269, "step": 4534 }, { "epoch": 0.29, "grad_norm": 1.3544372758044134, "learning_rate": 8.387669667284351e-06, "loss": 0.3291, "step": 4535 }, { "epoch": 0.29, "grad_norm": 2.319882159195161, "learning_rate": 8.386920516175135e-06, "loss": 0.32, "step": 4536 }, { "epoch": 0.29, "grad_norm": 2.8330191414718793, "learning_rate": 8.386171224536573e-06, "loss": 0.3397, "step": 4537 }, { "epoch": 0.29, "grad_norm": 1.6620619363299594, "learning_rate": 8.385421792399753e-06, "loss": 0.3347, "step": 4538 }, { "epoch": 0.29, "grad_norm": 2.28094788853945, "learning_rate": 8.384672219795766e-06, "loss": 0.3278, "step": 4539 }, { "epoch": 0.29, "grad_norm": 2.745254652276401, "learning_rate": 8.38392250675572e-06, "loss": 0.3362, "step": 4540 }, { "epoch": 0.29, "grad_norm": 1.7561920695282103, "learning_rate": 8.383172653310718e-06, "loss": 0.3304, "step": 4541 }, { "epoch": 0.29, "grad_norm": 1.8161784410716515, "learning_rate": 8.382422659491873e-06, "loss": 0.3289, "step": 4542 }, { "epoch": 0.29, "grad_norm": 1.7828874551969827, "learning_rate": 8.381672525330305e-06, "loss": 0.3089, "step": 4543 }, { "epoch": 0.29, "grad_norm": 1.8769878242003888, "learning_rate": 8.380922250857137e-06, "loss": 0.3293, "step": 4544 }, { "epoch": 0.29, "grad_norm": 1.4945991479535552, "learning_rate": 8.380171836103499e-06, "loss": 0.3183, "step": 4545 }, { "epoch": 0.29, "grad_norm": 4.5309737210634635, "learning_rate": 8.379421281100529e-06, "loss": 0.3357, "step": 4546 }, { "epoch": 0.29, "grad_norm": 2.9862667447555324, "learning_rate": 8.378670585879365e-06, "loss": 0.314, "step": 4547 }, { "epoch": 0.29, "grad_norm": 2.1606467474212208, "learning_rate": 8.37791975047116e-06, "loss": 0.3195, "step": 4548 }, { "epoch": 0.29, "grad_norm": 2.4590132781243565, "learning_rate": 8.377168774907063e-06, "loss": 0.3186, "step": 4549 }, { "epoch": 0.29, "grad_norm": 2.1345334959308437, "learning_rate": 8.376417659218234e-06, "loss": 0.3297, "step": 4550 }, { "epoch": 0.29, "grad_norm": 2.2319609286801514, "learning_rate": 8.375666403435842e-06, "loss": 0.3329, "step": 4551 }, { "epoch": 0.29, "grad_norm": 2.181362994384634, "learning_rate": 8.374915007591053e-06, "loss": 0.3396, "step": 4552 }, { "epoch": 0.29, "grad_norm": 1.959476969109861, "learning_rate": 8.374163471715046e-06, "loss": 0.3473, "step": 4553 }, { "epoch": 0.29, "grad_norm": 1.787575127099224, "learning_rate": 8.373411795839003e-06, "loss": 0.3293, "step": 4554 }, { "epoch": 0.29, "grad_norm": 2.6641799563675863, "learning_rate": 8.372659979994116e-06, "loss": 0.3301, "step": 4555 }, { "epoch": 0.29, "grad_norm": 4.621177104530586, "learning_rate": 8.371908024211572e-06, "loss": 0.3157, "step": 4556 }, { "epoch": 0.29, "grad_norm": 2.2845599080276258, "learning_rate": 8.371155928522578e-06, "loss": 0.3256, "step": 4557 }, { "epoch": 0.29, "grad_norm": 1.9115502875416628, "learning_rate": 8.370403692958334e-06, "loss": 0.3152, "step": 4558 }, { "epoch": 0.29, "grad_norm": 2.9359034905984105, "learning_rate": 8.369651317550055e-06, "loss": 0.326, "step": 4559 }, { "epoch": 0.29, "grad_norm": 1.47395122757619, "learning_rate": 8.368898802328957e-06, "loss": 0.3353, "step": 4560 }, { "epoch": 0.29, "grad_norm": 3.112484592043131, "learning_rate": 8.368146147326266e-06, "loss": 0.3227, "step": 4561 }, { "epoch": 0.29, "grad_norm": 1.837742348531059, "learning_rate": 8.367393352573207e-06, "loss": 0.3228, "step": 4562 }, { "epoch": 0.29, "grad_norm": 1.8198363011071297, "learning_rate": 8.366640418101018e-06, "loss": 0.3044, "step": 4563 }, { "epoch": 0.29, "grad_norm": 1.4792177581157653, "learning_rate": 8.365887343940937e-06, "loss": 0.3083, "step": 4564 }, { "epoch": 0.29, "grad_norm": 2.0749427789794592, "learning_rate": 8.365134130124214e-06, "loss": 0.3141, "step": 4565 }, { "epoch": 0.29, "grad_norm": 1.4680932736556904, "learning_rate": 8.364380776682095e-06, "loss": 0.3257, "step": 4566 }, { "epoch": 0.29, "grad_norm": 1.5283672458531308, "learning_rate": 8.363627283645843e-06, "loss": 0.3326, "step": 4567 }, { "epoch": 0.29, "grad_norm": 1.393840568899809, "learning_rate": 8.362873651046722e-06, "loss": 0.3201, "step": 4568 }, { "epoch": 0.29, "grad_norm": 2.5101834231715774, "learning_rate": 8.362119878916e-06, "loss": 0.3444, "step": 4569 }, { "epoch": 0.29, "grad_norm": 1.8470073253909227, "learning_rate": 8.361365967284951e-06, "loss": 0.326, "step": 4570 }, { "epoch": 0.29, "grad_norm": 2.531618393204744, "learning_rate": 8.360611916184859e-06, "loss": 0.3273, "step": 4571 }, { "epoch": 0.29, "grad_norm": 1.8945311703301655, "learning_rate": 8.359857725647009e-06, "loss": 0.3266, "step": 4572 }, { "epoch": 0.29, "grad_norm": 2.212260774091573, "learning_rate": 8.359103395702692e-06, "loss": 0.3195, "step": 4573 }, { "epoch": 0.29, "grad_norm": 3.352783258966287, "learning_rate": 8.358348926383211e-06, "loss": 0.3288, "step": 4574 }, { "epoch": 0.29, "grad_norm": 3.5333192301589995, "learning_rate": 8.357594317719868e-06, "loss": 0.322, "step": 4575 }, { "epoch": 0.29, "grad_norm": 1.4610462430917213, "learning_rate": 8.356839569743974e-06, "loss": 0.3299, "step": 4576 }, { "epoch": 0.29, "grad_norm": 2.887917428489183, "learning_rate": 8.35608468248684e-06, "loss": 0.3276, "step": 4577 }, { "epoch": 0.29, "grad_norm": 2.56190176490763, "learning_rate": 8.355329655979794e-06, "loss": 0.3346, "step": 4578 }, { "epoch": 0.29, "grad_norm": 2.1840627550859883, "learning_rate": 8.35457449025416e-06, "loss": 0.3176, "step": 4579 }, { "epoch": 0.29, "grad_norm": 1.8388061367703301, "learning_rate": 8.353819185341275e-06, "loss": 0.3181, "step": 4580 }, { "epoch": 0.29, "grad_norm": 1.8328163528404189, "learning_rate": 8.353063741272471e-06, "loss": 0.3113, "step": 4581 }, { "epoch": 0.29, "grad_norm": 1.4526576376010834, "learning_rate": 8.352308158079099e-06, "loss": 0.3248, "step": 4582 }, { "epoch": 0.29, "grad_norm": 2.7219056871357408, "learning_rate": 8.351552435792507e-06, "loss": 0.316, "step": 4583 }, { "epoch": 0.29, "grad_norm": 0.6905315900768709, "learning_rate": 8.35079657444405e-06, "loss": 0.5059, "step": 4584 }, { "epoch": 0.29, "grad_norm": 0.641832623872991, "learning_rate": 8.350040574065093e-06, "loss": 0.4916, "step": 4585 }, { "epoch": 0.29, "grad_norm": 1.6668764451013662, "learning_rate": 8.349284434687001e-06, "loss": 0.3373, "step": 4586 }, { "epoch": 0.29, "grad_norm": 0.5941483496192381, "learning_rate": 8.348528156341148e-06, "loss": 0.4646, "step": 4587 }, { "epoch": 0.29, "grad_norm": 2.479937255342335, "learning_rate": 8.347771739058917e-06, "loss": 0.3096, "step": 4588 }, { "epoch": 0.29, "grad_norm": 2.488931151716721, "learning_rate": 8.34701518287169e-06, "loss": 0.3407, "step": 4589 }, { "epoch": 0.29, "grad_norm": 1.957149558452516, "learning_rate": 8.346258487810855e-06, "loss": 0.3442, "step": 4590 }, { "epoch": 0.29, "grad_norm": 2.175477160163789, "learning_rate": 8.345501653907813e-06, "loss": 0.3293, "step": 4591 }, { "epoch": 0.29, "grad_norm": 0.7340860703640896, "learning_rate": 8.344744681193966e-06, "loss": 0.5371, "step": 4592 }, { "epoch": 0.29, "grad_norm": 1.4367219093086712, "learning_rate": 8.343987569700723e-06, "loss": 0.3157, "step": 4593 }, { "epoch": 0.29, "grad_norm": 4.348886619276724, "learning_rate": 8.343230319459493e-06, "loss": 0.3172, "step": 4594 }, { "epoch": 0.29, "grad_norm": 1.85393782325142, "learning_rate": 8.3424729305017e-06, "loss": 0.3538, "step": 4595 }, { "epoch": 0.29, "grad_norm": 1.5583309786518522, "learning_rate": 8.341715402858768e-06, "loss": 0.3281, "step": 4596 }, { "epoch": 0.29, "grad_norm": 1.494209692042095, "learning_rate": 8.340957736562127e-06, "loss": 0.3219, "step": 4597 }, { "epoch": 0.29, "grad_norm": 2.722372494117495, "learning_rate": 8.340199931643218e-06, "loss": 0.3217, "step": 4598 }, { "epoch": 0.29, "grad_norm": 1.2868989733023135, "learning_rate": 8.339441988133478e-06, "loss": 0.3336, "step": 4599 }, { "epoch": 0.29, "grad_norm": 2.7866223372150567, "learning_rate": 8.338683906064361e-06, "loss": 0.318, "step": 4600 }, { "epoch": 0.29, "grad_norm": 2.2134149476295644, "learning_rate": 8.337925685467318e-06, "loss": 0.3155, "step": 4601 }, { "epoch": 0.29, "grad_norm": 1.5640175712540683, "learning_rate": 8.337167326373809e-06, "loss": 0.3295, "step": 4602 }, { "epoch": 0.29, "grad_norm": 1.7998947475940565, "learning_rate": 8.3364088288153e-06, "loss": 0.3357, "step": 4603 }, { "epoch": 0.29, "grad_norm": 1.6051205017039099, "learning_rate": 8.335650192823263e-06, "loss": 0.3163, "step": 4604 }, { "epoch": 0.29, "grad_norm": 2.161571352742066, "learning_rate": 8.334891418429174e-06, "loss": 0.3105, "step": 4605 }, { "epoch": 0.29, "grad_norm": 5.988979064688914, "learning_rate": 8.334132505664519e-06, "loss": 0.3223, "step": 4606 }, { "epoch": 0.29, "grad_norm": 3.736417459213877, "learning_rate": 8.333373454560782e-06, "loss": 0.3307, "step": 4607 }, { "epoch": 0.29, "grad_norm": 1.6014179630032326, "learning_rate": 8.33261426514946e-06, "loss": 0.3223, "step": 4608 }, { "epoch": 0.29, "grad_norm": 0.657381201666818, "learning_rate": 8.331854937462052e-06, "loss": 0.5172, "step": 4609 }, { "epoch": 0.29, "grad_norm": 1.3674659918524494, "learning_rate": 8.331095471530066e-06, "loss": 0.3325, "step": 4610 }, { "epoch": 0.29, "grad_norm": 3.3468979278569857, "learning_rate": 8.33033586738501e-06, "loss": 0.3156, "step": 4611 }, { "epoch": 0.29, "grad_norm": 1.7585464608941155, "learning_rate": 8.329576125058406e-06, "loss": 0.3245, "step": 4612 }, { "epoch": 0.29, "grad_norm": 13.77020996096185, "learning_rate": 8.328816244581774e-06, "loss": 0.3179, "step": 4613 }, { "epoch": 0.29, "grad_norm": 2.35598120381295, "learning_rate": 8.328056225986642e-06, "loss": 0.3384, "step": 4614 }, { "epoch": 0.29, "grad_norm": 1.6658406559141374, "learning_rate": 8.327296069304547e-06, "loss": 0.3289, "step": 4615 }, { "epoch": 0.29, "grad_norm": 1.467570226639747, "learning_rate": 8.326535774567029e-06, "loss": 0.3234, "step": 4616 }, { "epoch": 0.29, "grad_norm": 1.6365120975622698, "learning_rate": 8.325775341805634e-06, "loss": 0.3279, "step": 4617 }, { "epoch": 0.29, "grad_norm": 1.7478469575446611, "learning_rate": 8.325014771051909e-06, "loss": 0.3288, "step": 4618 }, { "epoch": 0.29, "grad_norm": 1.997977303998103, "learning_rate": 8.324254062337419e-06, "loss": 0.3227, "step": 4619 }, { "epoch": 0.29, "grad_norm": 1.9471147861575473, "learning_rate": 8.323493215693721e-06, "loss": 0.3211, "step": 4620 }, { "epoch": 0.29, "grad_norm": 1.7943934292722723, "learning_rate": 8.322732231152386e-06, "loss": 0.3436, "step": 4621 }, { "epoch": 0.29, "grad_norm": 3.04573953156783, "learning_rate": 8.32197110874499e-06, "loss": 0.3142, "step": 4622 }, { "epoch": 0.29, "grad_norm": 1.8827940454540946, "learning_rate": 8.321209848503112e-06, "loss": 0.3174, "step": 4623 }, { "epoch": 0.29, "grad_norm": 1.3611606771710143, "learning_rate": 8.320448450458339e-06, "loss": 0.3154, "step": 4624 }, { "epoch": 0.29, "grad_norm": 1.3159068448606388, "learning_rate": 8.319686914642261e-06, "loss": 0.3214, "step": 4625 }, { "epoch": 0.29, "grad_norm": 0.6221485082863762, "learning_rate": 8.318925241086477e-06, "loss": 0.5184, "step": 4626 }, { "epoch": 0.29, "grad_norm": 2.030321585008596, "learning_rate": 8.31816342982259e-06, "loss": 0.3225, "step": 4627 }, { "epoch": 0.29, "grad_norm": 1.3398635464810131, "learning_rate": 8.317401480882207e-06, "loss": 0.3279, "step": 4628 }, { "epoch": 0.29, "grad_norm": 1.528329638237705, "learning_rate": 8.316639394296947e-06, "loss": 0.3319, "step": 4629 }, { "epoch": 0.29, "grad_norm": 2.3018221375463925, "learning_rate": 8.315877170098425e-06, "loss": 0.3286, "step": 4630 }, { "epoch": 0.29, "grad_norm": 3.8875170625119404, "learning_rate": 8.315114808318269e-06, "loss": 0.3098, "step": 4631 }, { "epoch": 0.29, "grad_norm": 2.28899150095528, "learning_rate": 8.314352308988114e-06, "loss": 0.3377, "step": 4632 }, { "epoch": 0.29, "grad_norm": 1.8717690001076601, "learning_rate": 8.313589672139593e-06, "loss": 0.3251, "step": 4633 }, { "epoch": 0.29, "grad_norm": 0.5992361475426525, "learning_rate": 8.312826897804352e-06, "loss": 0.495, "step": 4634 }, { "epoch": 0.29, "grad_norm": 1.815845661611426, "learning_rate": 8.312063986014039e-06, "loss": 0.3403, "step": 4635 }, { "epoch": 0.29, "grad_norm": 1.9168548744208262, "learning_rate": 8.311300936800306e-06, "loss": 0.3238, "step": 4636 }, { "epoch": 0.29, "grad_norm": 2.235787112400163, "learning_rate": 8.310537750194819e-06, "loss": 0.3287, "step": 4637 }, { "epoch": 0.29, "grad_norm": 2.6813044200360205, "learning_rate": 8.309774426229237e-06, "loss": 0.3224, "step": 4638 }, { "epoch": 0.29, "grad_norm": 3.4011164727267102, "learning_rate": 8.309010964935236e-06, "loss": 0.3136, "step": 4639 }, { "epoch": 0.29, "grad_norm": 1.6158207209231936, "learning_rate": 8.308247366344494e-06, "loss": 0.3341, "step": 4640 }, { "epoch": 0.29, "grad_norm": 1.733443758580431, "learning_rate": 8.307483630488693e-06, "loss": 0.3332, "step": 4641 }, { "epoch": 0.29, "grad_norm": 9.319928529755119, "learning_rate": 8.30671975739952e-06, "loss": 0.3152, "step": 4642 }, { "epoch": 0.29, "grad_norm": 3.8969761869791246, "learning_rate": 8.305955747108672e-06, "loss": 0.329, "step": 4643 }, { "epoch": 0.29, "grad_norm": 1.934293984723378, "learning_rate": 8.305191599647846e-06, "loss": 0.3244, "step": 4644 }, { "epoch": 0.29, "grad_norm": 2.1444513735312634, "learning_rate": 8.304427315048752e-06, "loss": 0.328, "step": 4645 }, { "epoch": 0.29, "grad_norm": 2.0981589553165523, "learning_rate": 8.303662893343099e-06, "loss": 0.3279, "step": 4646 }, { "epoch": 0.29, "grad_norm": 1.4599554538301995, "learning_rate": 8.302898334562604e-06, "loss": 0.3039, "step": 4647 }, { "epoch": 0.29, "grad_norm": 2.818042291414395, "learning_rate": 8.30213363873899e-06, "loss": 0.3419, "step": 4648 }, { "epoch": 0.29, "grad_norm": 1.6640616622599838, "learning_rate": 8.301368805903988e-06, "loss": 0.322, "step": 4649 }, { "epoch": 0.29, "grad_norm": 1.3387610727095836, "learning_rate": 8.300603836089329e-06, "loss": 0.3032, "step": 4650 }, { "epoch": 0.29, "grad_norm": 2.0141298870195135, "learning_rate": 8.299838729326754e-06, "loss": 0.3531, "step": 4651 }, { "epoch": 0.29, "grad_norm": 1.9261902144520084, "learning_rate": 8.29907348564801e-06, "loss": 0.3197, "step": 4652 }, { "epoch": 0.29, "grad_norm": 2.8634593622131974, "learning_rate": 8.298308105084845e-06, "loss": 0.3114, "step": 4653 }, { "epoch": 0.29, "grad_norm": 2.5803961812105665, "learning_rate": 8.29754258766902e-06, "loss": 0.3529, "step": 4654 }, { "epoch": 0.29, "grad_norm": 2.7374020598179145, "learning_rate": 8.296776933432296e-06, "loss": 0.348, "step": 4655 }, { "epoch": 0.29, "grad_norm": 2.7131753006160757, "learning_rate": 8.296011142406442e-06, "loss": 0.331, "step": 4656 }, { "epoch": 0.29, "grad_norm": 1.3262733550127155, "learning_rate": 8.29524521462323e-06, "loss": 0.3108, "step": 4657 }, { "epoch": 0.29, "grad_norm": 2.006111614964186, "learning_rate": 8.294479150114443e-06, "loss": 0.346, "step": 4658 }, { "epoch": 0.29, "grad_norm": 2.5931057728483875, "learning_rate": 8.293712948911862e-06, "loss": 0.3248, "step": 4659 }, { "epoch": 0.29, "grad_norm": 1.5811684522172256, "learning_rate": 8.292946611047282e-06, "loss": 0.3283, "step": 4660 }, { "epoch": 0.29, "grad_norm": 2.5822400594471997, "learning_rate": 8.292180136552498e-06, "loss": 0.3131, "step": 4661 }, { "epoch": 0.29, "grad_norm": 3.3242294608492324, "learning_rate": 8.29141352545931e-06, "loss": 0.3421, "step": 4662 }, { "epoch": 0.29, "grad_norm": 2.2683721125412544, "learning_rate": 8.290646777799531e-06, "loss": 0.3164, "step": 4663 }, { "epoch": 0.29, "grad_norm": 1.7722620972330796, "learning_rate": 8.289879893604973e-06, "loss": 0.3224, "step": 4664 }, { "epoch": 0.29, "grad_norm": 3.155502333050992, "learning_rate": 8.289112872907454e-06, "loss": 0.3285, "step": 4665 }, { "epoch": 0.29, "grad_norm": 2.9188493048321282, "learning_rate": 8.2883457157388e-06, "loss": 0.3478, "step": 4666 }, { "epoch": 0.29, "grad_norm": 2.03795725118682, "learning_rate": 8.28757842213084e-06, "loss": 0.339, "step": 4667 }, { "epoch": 0.29, "grad_norm": 3.084669198820233, "learning_rate": 8.286810992115413e-06, "loss": 0.3402, "step": 4668 }, { "epoch": 0.29, "grad_norm": 1.9010436454227169, "learning_rate": 8.28604342572436e-06, "loss": 0.2943, "step": 4669 }, { "epoch": 0.29, "grad_norm": 1.8805846955981191, "learning_rate": 8.28527572298953e-06, "loss": 0.3494, "step": 4670 }, { "epoch": 0.29, "grad_norm": 1.5105951866836607, "learning_rate": 8.284507883942771e-06, "loss": 0.3298, "step": 4671 }, { "epoch": 0.29, "grad_norm": 1.8701312657424063, "learning_rate": 8.283739908615949e-06, "loss": 0.3401, "step": 4672 }, { "epoch": 0.29, "grad_norm": 1.9356588574831817, "learning_rate": 8.282971797040925e-06, "loss": 0.326, "step": 4673 }, { "epoch": 0.29, "grad_norm": 1.5187844902542402, "learning_rate": 8.28220354924957e-06, "loss": 0.3107, "step": 4674 }, { "epoch": 0.29, "grad_norm": 2.1035198049693054, "learning_rate": 8.28143516527376e-06, "loss": 0.3319, "step": 4675 }, { "epoch": 0.29, "grad_norm": 22.636282393178313, "learning_rate": 8.280666645145377e-06, "loss": 0.3254, "step": 4676 }, { "epoch": 0.29, "grad_norm": 9.341425307095218, "learning_rate": 8.279897988896306e-06, "loss": 0.3503, "step": 4677 }, { "epoch": 0.29, "grad_norm": 3.2542353033770426, "learning_rate": 8.279129196558443e-06, "loss": 0.3463, "step": 4678 }, { "epoch": 0.29, "grad_norm": 2.656661930080295, "learning_rate": 8.278360268163686e-06, "loss": 0.3345, "step": 4679 }, { "epoch": 0.29, "grad_norm": 3.2763264469002285, "learning_rate": 8.277591203743937e-06, "loss": 0.3364, "step": 4680 }, { "epoch": 0.29, "grad_norm": 1.5888613647386105, "learning_rate": 8.276822003331108e-06, "loss": 0.3112, "step": 4681 }, { "epoch": 0.29, "grad_norm": 2.301783583669015, "learning_rate": 8.276052666957113e-06, "loss": 0.3297, "step": 4682 }, { "epoch": 0.29, "grad_norm": 1.6590518295557342, "learning_rate": 8.275283194653876e-06, "loss": 0.3207, "step": 4683 }, { "epoch": 0.29, "grad_norm": 3.573784037298755, "learning_rate": 8.27451358645332e-06, "loss": 0.3242, "step": 4684 }, { "epoch": 0.29, "grad_norm": 0.6889412945575218, "learning_rate": 8.27374384238738e-06, "loss": 0.5102, "step": 4685 }, { "epoch": 0.29, "grad_norm": 2.0184875092933416, "learning_rate": 8.272973962487991e-06, "loss": 0.3179, "step": 4686 }, { "epoch": 0.29, "grad_norm": 2.7308184830125093, "learning_rate": 8.272203946787101e-06, "loss": 0.3371, "step": 4687 }, { "epoch": 0.29, "grad_norm": 9.621088868982925, "learning_rate": 8.271433795316655e-06, "loss": 0.319, "step": 4688 }, { "epoch": 0.29, "grad_norm": 2.0747084076055846, "learning_rate": 8.270663508108611e-06, "loss": 0.3242, "step": 4689 }, { "epoch": 0.29, "grad_norm": 0.6603367027911864, "learning_rate": 8.26989308519493e-06, "loss": 0.5008, "step": 4690 }, { "epoch": 0.3, "grad_norm": 2.85065133230469, "learning_rate": 8.269122526607577e-06, "loss": 0.3447, "step": 4691 }, { "epoch": 0.3, "grad_norm": 1.8972568965185854, "learning_rate": 8.268351832378521e-06, "loss": 0.3276, "step": 4692 }, { "epoch": 0.3, "grad_norm": 3.379889960167494, "learning_rate": 8.267581002539745e-06, "loss": 0.3097, "step": 4693 }, { "epoch": 0.3, "grad_norm": 5.813434996376796, "learning_rate": 8.266810037123229e-06, "loss": 0.331, "step": 4694 }, { "epoch": 0.3, "grad_norm": 5.022182380703507, "learning_rate": 8.26603893616096e-06, "loss": 0.3046, "step": 4695 }, { "epoch": 0.3, "grad_norm": 3.313427973205733, "learning_rate": 8.265267699684937e-06, "loss": 0.3517, "step": 4696 }, { "epoch": 0.3, "grad_norm": 2.2638290308411952, "learning_rate": 8.264496327727157e-06, "loss": 0.3138, "step": 4697 }, { "epoch": 0.3, "grad_norm": 2.1232531724630594, "learning_rate": 8.263724820319624e-06, "loss": 0.3038, "step": 4698 }, { "epoch": 0.3, "grad_norm": 1.553147675273588, "learning_rate": 8.262953177494353e-06, "loss": 0.314, "step": 4699 }, { "epoch": 0.3, "grad_norm": 3.351131033381673, "learning_rate": 8.262181399283359e-06, "loss": 0.3085, "step": 4700 }, { "epoch": 0.3, "grad_norm": 1.609495727538814, "learning_rate": 8.261409485718663e-06, "loss": 0.3145, "step": 4701 }, { "epoch": 0.3, "grad_norm": 1.9284030865348525, "learning_rate": 8.260637436832295e-06, "loss": 0.3384, "step": 4702 }, { "epoch": 0.3, "grad_norm": 1.7319450065480584, "learning_rate": 8.259865252656289e-06, "loss": 0.319, "step": 4703 }, { "epoch": 0.3, "grad_norm": 0.6200426032426879, "learning_rate": 8.259092933222683e-06, "loss": 0.4603, "step": 4704 }, { "epoch": 0.3, "grad_norm": 9.693101313184172, "learning_rate": 8.258320478563524e-06, "loss": 0.3158, "step": 4705 }, { "epoch": 0.3, "grad_norm": 2.889716943238856, "learning_rate": 8.25754788871086e-06, "loss": 0.3037, "step": 4706 }, { "epoch": 0.3, "grad_norm": 1.4358539033489492, "learning_rate": 8.256775163696746e-06, "loss": 0.3106, "step": 4707 }, { "epoch": 0.3, "grad_norm": 9.238543110556, "learning_rate": 8.256002303553248e-06, "loss": 0.337, "step": 4708 }, { "epoch": 0.3, "grad_norm": 1.5476751159653763, "learning_rate": 8.25522930831243e-06, "loss": 0.3175, "step": 4709 }, { "epoch": 0.3, "grad_norm": 2.3005169638419916, "learning_rate": 8.254456178006368e-06, "loss": 0.337, "step": 4710 }, { "epoch": 0.3, "grad_norm": 0.599553246533363, "learning_rate": 8.253682912667136e-06, "loss": 0.4831, "step": 4711 }, { "epoch": 0.3, "grad_norm": 0.5740293443919963, "learning_rate": 8.252909512326821e-06, "loss": 0.4913, "step": 4712 }, { "epoch": 0.3, "grad_norm": 2.546963579802148, "learning_rate": 8.252135977017513e-06, "loss": 0.3189, "step": 4713 }, { "epoch": 0.3, "grad_norm": 4.429125795642956, "learning_rate": 8.251362306771306e-06, "loss": 0.3568, "step": 4714 }, { "epoch": 0.3, "grad_norm": 5.189305238838425, "learning_rate": 8.250588501620305e-06, "loss": 0.3221, "step": 4715 }, { "epoch": 0.3, "grad_norm": 3.5843130933913105, "learning_rate": 8.249814561596612e-06, "loss": 0.3119, "step": 4716 }, { "epoch": 0.3, "grad_norm": 3.071138163688954, "learning_rate": 8.24904048673234e-06, "loss": 0.3593, "step": 4717 }, { "epoch": 0.3, "grad_norm": 2.5238599435112223, "learning_rate": 8.248266277059607e-06, "loss": 0.3147, "step": 4718 }, { "epoch": 0.3, "grad_norm": 3.1740048627002473, "learning_rate": 8.247491932610536e-06, "loss": 0.3211, "step": 4719 }, { "epoch": 0.3, "grad_norm": 4.140818851619252, "learning_rate": 8.246717453417257e-06, "loss": 0.3097, "step": 4720 }, { "epoch": 0.3, "grad_norm": 1.4118365828492045, "learning_rate": 8.245942839511905e-06, "loss": 0.3084, "step": 4721 }, { "epoch": 0.3, "grad_norm": 1.8780104654279364, "learning_rate": 8.245168090926618e-06, "loss": 0.3362, "step": 4722 }, { "epoch": 0.3, "grad_norm": 3.509275582536811, "learning_rate": 8.244393207693544e-06, "loss": 0.3217, "step": 4723 }, { "epoch": 0.3, "grad_norm": 3.307156329842178, "learning_rate": 8.243618189844831e-06, "loss": 0.3173, "step": 4724 }, { "epoch": 0.3, "grad_norm": 1.6175065624426581, "learning_rate": 8.24284303741264e-06, "loss": 0.3281, "step": 4725 }, { "epoch": 0.3, "grad_norm": 2.091261383505988, "learning_rate": 8.242067750429131e-06, "loss": 0.3256, "step": 4726 }, { "epoch": 0.3, "grad_norm": 2.6042560866381548, "learning_rate": 8.241292328926473e-06, "loss": 0.3124, "step": 4727 }, { "epoch": 0.3, "grad_norm": 1.4559835196693374, "learning_rate": 8.240516772936837e-06, "loss": 0.3455, "step": 4728 }, { "epoch": 0.3, "grad_norm": 1.4597397892968809, "learning_rate": 8.239741082492405e-06, "loss": 0.329, "step": 4729 }, { "epoch": 0.3, "grad_norm": 0.714973562946189, "learning_rate": 8.238965257625363e-06, "loss": 0.5296, "step": 4730 }, { "epoch": 0.3, "grad_norm": 2.8917801488245396, "learning_rate": 8.238189298367898e-06, "loss": 0.3334, "step": 4731 }, { "epoch": 0.3, "grad_norm": 2.188231882240646, "learning_rate": 8.237413204752208e-06, "loss": 0.3409, "step": 4732 }, { "epoch": 0.3, "grad_norm": 2.7461597182270046, "learning_rate": 8.236636976810492e-06, "loss": 0.3443, "step": 4733 }, { "epoch": 0.3, "grad_norm": 3.224238394081794, "learning_rate": 8.235860614574962e-06, "loss": 0.3288, "step": 4734 }, { "epoch": 0.3, "grad_norm": 1.68912360675444, "learning_rate": 8.235084118077826e-06, "loss": 0.2999, "step": 4735 }, { "epoch": 0.3, "grad_norm": 3.6644920691114202, "learning_rate": 8.234307487351303e-06, "loss": 0.3414, "step": 4736 }, { "epoch": 0.3, "grad_norm": 1.9254952544282533, "learning_rate": 8.233530722427618e-06, "loss": 0.3194, "step": 4737 }, { "epoch": 0.3, "grad_norm": 1.7836344216234346, "learning_rate": 8.232753823339001e-06, "loss": 0.3346, "step": 4738 }, { "epoch": 0.3, "grad_norm": 1.9185557546294354, "learning_rate": 8.231976790117684e-06, "loss": 0.336, "step": 4739 }, { "epoch": 0.3, "grad_norm": 1.7885959894956402, "learning_rate": 8.231199622795912e-06, "loss": 0.3007, "step": 4740 }, { "epoch": 0.3, "grad_norm": 1.401987209489957, "learning_rate": 8.230422321405925e-06, "loss": 0.3291, "step": 4741 }, { "epoch": 0.3, "grad_norm": 2.816715072691335, "learning_rate": 8.22964488597998e-06, "loss": 0.3445, "step": 4742 }, { "epoch": 0.3, "grad_norm": 1.8542396643594432, "learning_rate": 8.228867316550332e-06, "loss": 0.3322, "step": 4743 }, { "epoch": 0.3, "grad_norm": 1.5081081724294076, "learning_rate": 8.228089613149244e-06, "loss": 0.3352, "step": 4744 }, { "epoch": 0.3, "grad_norm": 1.6356742044834944, "learning_rate": 8.227311775808984e-06, "loss": 0.3042, "step": 4745 }, { "epoch": 0.3, "grad_norm": 2.520840142695837, "learning_rate": 8.226533804561828e-06, "loss": 0.3259, "step": 4746 }, { "epoch": 0.3, "grad_norm": 1.768885534148715, "learning_rate": 8.22575569944005e-06, "loss": 0.3233, "step": 4747 }, { "epoch": 0.3, "grad_norm": 2.3583230985499926, "learning_rate": 8.22497746047594e-06, "loss": 0.3386, "step": 4748 }, { "epoch": 0.3, "grad_norm": 1.5577941188261932, "learning_rate": 8.224199087701787e-06, "loss": 0.3178, "step": 4749 }, { "epoch": 0.3, "grad_norm": 1.7525710060671866, "learning_rate": 8.223420581149887e-06, "loss": 0.3219, "step": 4750 }, { "epoch": 0.3, "grad_norm": 13.948686355608059, "learning_rate": 8.222641940852543e-06, "loss": 0.3244, "step": 4751 }, { "epoch": 0.3, "grad_norm": 1.5969601204197872, "learning_rate": 8.221863166842057e-06, "loss": 0.3273, "step": 4752 }, { "epoch": 0.3, "grad_norm": 1.6072918833551268, "learning_rate": 8.22108425915075e-06, "loss": 0.32, "step": 4753 }, { "epoch": 0.3, "grad_norm": 1.7374397474057224, "learning_rate": 8.220305217810933e-06, "loss": 0.3058, "step": 4754 }, { "epoch": 0.3, "grad_norm": 1.5285658154607755, "learning_rate": 8.219526042854933e-06, "loss": 0.314, "step": 4755 }, { "epoch": 0.3, "grad_norm": 3.6978959663770667, "learning_rate": 8.21874673431508e-06, "loss": 0.3296, "step": 4756 }, { "epoch": 0.3, "grad_norm": 6.685246099049766, "learning_rate": 8.217967292223707e-06, "loss": 0.3229, "step": 4757 }, { "epoch": 0.3, "grad_norm": 4.516889116784165, "learning_rate": 8.217187716613156e-06, "loss": 0.3214, "step": 4758 }, { "epoch": 0.3, "grad_norm": 3.5307710719855767, "learning_rate": 8.21640800751577e-06, "loss": 0.3346, "step": 4759 }, { "epoch": 0.3, "grad_norm": 2.339994045329839, "learning_rate": 8.215628164963906e-06, "loss": 0.3115, "step": 4760 }, { "epoch": 0.3, "grad_norm": 1.643101351187922, "learning_rate": 8.214848188989916e-06, "loss": 0.3225, "step": 4761 }, { "epoch": 0.3, "grad_norm": 0.7033291289125447, "learning_rate": 8.214068079626166e-06, "loss": 0.491, "step": 4762 }, { "epoch": 0.3, "grad_norm": 3.1133177261523097, "learning_rate": 8.213287836905021e-06, "loss": 0.3404, "step": 4763 }, { "epoch": 0.3, "grad_norm": 2.45630998250199, "learning_rate": 8.212507460858857e-06, "loss": 0.3314, "step": 4764 }, { "epoch": 0.3, "grad_norm": 2.4375229160064453, "learning_rate": 8.21172695152005e-06, "loss": 0.3505, "step": 4765 }, { "epoch": 0.3, "grad_norm": 2.0368390595621215, "learning_rate": 8.21094630892099e-06, "loss": 0.3092, "step": 4766 }, { "epoch": 0.3, "grad_norm": 1.8435608026026553, "learning_rate": 8.210165533094064e-06, "loss": 0.3264, "step": 4767 }, { "epoch": 0.3, "grad_norm": 17.623385415676598, "learning_rate": 8.209384624071667e-06, "loss": 0.3433, "step": 4768 }, { "epoch": 0.3, "grad_norm": 2.2817442670181367, "learning_rate": 8.208603581886203e-06, "loss": 0.3367, "step": 4769 }, { "epoch": 0.3, "grad_norm": 3.6621890877153036, "learning_rate": 8.207822406570077e-06, "loss": 0.3169, "step": 4770 }, { "epoch": 0.3, "grad_norm": 3.3938859499120446, "learning_rate": 8.207041098155701e-06, "loss": 0.3193, "step": 4771 }, { "epoch": 0.3, "grad_norm": 1.8851538537508348, "learning_rate": 8.206259656675493e-06, "loss": 0.3333, "step": 4772 }, { "epoch": 0.3, "grad_norm": 3.531857536210539, "learning_rate": 8.205478082161877e-06, "loss": 0.3247, "step": 4773 }, { "epoch": 0.3, "grad_norm": 2.44634320497755, "learning_rate": 8.204696374647282e-06, "loss": 0.3224, "step": 4774 }, { "epoch": 0.3, "grad_norm": 4.280888599599943, "learning_rate": 8.203914534164143e-06, "loss": 0.3238, "step": 4775 }, { "epoch": 0.3, "grad_norm": 1.6805330915645307, "learning_rate": 8.203132560744898e-06, "loss": 0.3106, "step": 4776 }, { "epoch": 0.3, "grad_norm": 1.865548206504745, "learning_rate": 8.202350454421995e-06, "loss": 0.3333, "step": 4777 }, { "epoch": 0.3, "grad_norm": 2.318842918556991, "learning_rate": 8.201568215227882e-06, "loss": 0.3232, "step": 4778 }, { "epoch": 0.3, "grad_norm": 5.388261525992749, "learning_rate": 8.200785843195019e-06, "loss": 0.3273, "step": 4779 }, { "epoch": 0.3, "grad_norm": 2.3297509743118145, "learning_rate": 8.200003338355863e-06, "loss": 0.3306, "step": 4780 }, { "epoch": 0.3, "grad_norm": 2.393080028369493, "learning_rate": 8.199220700742889e-06, "loss": 0.3222, "step": 4781 }, { "epoch": 0.3, "grad_norm": 2.6313111032338217, "learning_rate": 8.198437930388562e-06, "loss": 0.3494, "step": 4782 }, { "epoch": 0.3, "grad_norm": 1.4352270482371632, "learning_rate": 8.197655027325366e-06, "loss": 0.2982, "step": 4783 }, { "epoch": 0.3, "grad_norm": 2.206024029455773, "learning_rate": 8.196871991585784e-06, "loss": 0.3443, "step": 4784 }, { "epoch": 0.3, "grad_norm": 1.653110086631432, "learning_rate": 8.196088823202302e-06, "loss": 0.3143, "step": 4785 }, { "epoch": 0.3, "grad_norm": 3.2011751161782405, "learning_rate": 8.19530552220742e-06, "loss": 0.3529, "step": 4786 }, { "epoch": 0.3, "grad_norm": 17.924828819173896, "learning_rate": 8.194522088633637e-06, "loss": 0.3324, "step": 4787 }, { "epoch": 0.3, "grad_norm": 2.1398642044622593, "learning_rate": 8.193738522513458e-06, "loss": 0.3341, "step": 4788 }, { "epoch": 0.3, "grad_norm": 2.7157670836447547, "learning_rate": 8.192954823879392e-06, "loss": 0.3054, "step": 4789 }, { "epoch": 0.3, "grad_norm": 1.8737679459565855, "learning_rate": 8.192170992763962e-06, "loss": 0.2988, "step": 4790 }, { "epoch": 0.3, "grad_norm": 1.999504728006175, "learning_rate": 8.191387029199687e-06, "loss": 0.3295, "step": 4791 }, { "epoch": 0.3, "grad_norm": 0.7098577395923357, "learning_rate": 8.190602933219096e-06, "loss": 0.5288, "step": 4792 }, { "epoch": 0.3, "grad_norm": 2.072568427445285, "learning_rate": 8.189818704854721e-06, "loss": 0.3392, "step": 4793 }, { "epoch": 0.3, "grad_norm": 1.9416515731766864, "learning_rate": 8.189034344139102e-06, "loss": 0.3369, "step": 4794 }, { "epoch": 0.3, "grad_norm": 2.5445706462693876, "learning_rate": 8.188249851104785e-06, "loss": 0.3238, "step": 4795 }, { "epoch": 0.3, "grad_norm": 1.6009115325262264, "learning_rate": 8.18746522578432e-06, "loss": 0.3071, "step": 4796 }, { "epoch": 0.3, "grad_norm": 1.8634447593031986, "learning_rate": 8.186680468210258e-06, "loss": 0.3042, "step": 4797 }, { "epoch": 0.3, "grad_norm": 2.7643107661326325, "learning_rate": 8.185895578415164e-06, "loss": 0.3352, "step": 4798 }, { "epoch": 0.3, "grad_norm": 1.7129930040941912, "learning_rate": 8.185110556431606e-06, "loss": 0.3261, "step": 4799 }, { "epoch": 0.3, "grad_norm": 1.8987423140500093, "learning_rate": 8.184325402292151e-06, "loss": 0.3098, "step": 4800 }, { "epoch": 0.3, "grad_norm": 3.5328024607127184, "learning_rate": 8.18354011602938e-06, "loss": 0.3157, "step": 4801 }, { "epoch": 0.3, "grad_norm": 4.333752570392361, "learning_rate": 8.182754697675875e-06, "loss": 0.3052, "step": 4802 }, { "epoch": 0.3, "grad_norm": 2.3306054321306156, "learning_rate": 8.181969147264224e-06, "loss": 0.3358, "step": 4803 }, { "epoch": 0.3, "grad_norm": 2.1146384907583307, "learning_rate": 8.181183464827022e-06, "loss": 0.306, "step": 4804 }, { "epoch": 0.3, "grad_norm": 2.0758202525059914, "learning_rate": 8.180397650396868e-06, "loss": 0.3044, "step": 4805 }, { "epoch": 0.3, "grad_norm": 1.6975350539339977, "learning_rate": 8.179611704006364e-06, "loss": 0.3127, "step": 4806 }, { "epoch": 0.3, "grad_norm": 2.4776622404553157, "learning_rate": 8.178825625688126e-06, "loss": 0.3298, "step": 4807 }, { "epoch": 0.3, "grad_norm": 1.8269808749535046, "learning_rate": 8.178039415474765e-06, "loss": 0.3403, "step": 4808 }, { "epoch": 0.3, "grad_norm": 2.145335388209343, "learning_rate": 8.177253073398903e-06, "loss": 0.3016, "step": 4809 }, { "epoch": 0.3, "grad_norm": 2.8458166161902985, "learning_rate": 8.17646659949317e-06, "loss": 0.3338, "step": 4810 }, { "epoch": 0.3, "grad_norm": 3.144466171744352, "learning_rate": 8.175679993790194e-06, "loss": 0.3207, "step": 4811 }, { "epoch": 0.3, "grad_norm": 1.6336326139927528, "learning_rate": 8.174893256322613e-06, "loss": 0.3513, "step": 4812 }, { "epoch": 0.3, "grad_norm": 2.51376091258254, "learning_rate": 8.174106387123073e-06, "loss": 0.3496, "step": 4813 }, { "epoch": 0.3, "grad_norm": 1.7210806637465388, "learning_rate": 8.173319386224222e-06, "loss": 0.3484, "step": 4814 }, { "epoch": 0.3, "grad_norm": 4.65241854735433, "learning_rate": 8.172532253658711e-06, "loss": 0.3292, "step": 4815 }, { "epoch": 0.3, "grad_norm": 1.9905391000063817, "learning_rate": 8.171744989459205e-06, "loss": 0.3308, "step": 4816 }, { "epoch": 0.3, "grad_norm": 1.7518605995680492, "learning_rate": 8.170957593658365e-06, "loss": 0.3068, "step": 4817 }, { "epoch": 0.3, "grad_norm": 3.311984135292733, "learning_rate": 8.170170066288861e-06, "loss": 0.3356, "step": 4818 }, { "epoch": 0.3, "grad_norm": 2.122085113950779, "learning_rate": 8.169382407383368e-06, "loss": 0.3172, "step": 4819 }, { "epoch": 0.3, "grad_norm": 3.995065735215276, "learning_rate": 8.168594616974572e-06, "loss": 0.319, "step": 4820 }, { "epoch": 0.3, "grad_norm": 3.2246262626361033, "learning_rate": 8.167806695095157e-06, "loss": 0.2997, "step": 4821 }, { "epoch": 0.3, "grad_norm": 1.7612564023005002, "learning_rate": 8.167018641777817e-06, "loss": 0.3288, "step": 4822 }, { "epoch": 0.3, "grad_norm": 1.8208704590702947, "learning_rate": 8.166230457055247e-06, "loss": 0.3219, "step": 4823 }, { "epoch": 0.3, "grad_norm": 1.8920000445494063, "learning_rate": 8.16544214096015e-06, "loss": 0.3308, "step": 4824 }, { "epoch": 0.3, "grad_norm": 1.997619539738235, "learning_rate": 8.164653693525239e-06, "loss": 0.3073, "step": 4825 }, { "epoch": 0.3, "grad_norm": 1.3368032693286307, "learning_rate": 8.163865114783223e-06, "loss": 0.3083, "step": 4826 }, { "epoch": 0.3, "grad_norm": 1.614647051404713, "learning_rate": 8.163076404766823e-06, "loss": 0.3022, "step": 4827 }, { "epoch": 0.3, "grad_norm": 2.7694390217696063, "learning_rate": 8.162287563508767e-06, "loss": 0.3218, "step": 4828 }, { "epoch": 0.3, "grad_norm": 1.3419304443359232, "learning_rate": 8.161498591041783e-06, "loss": 0.3246, "step": 4829 }, { "epoch": 0.3, "grad_norm": 2.935574256862518, "learning_rate": 8.160709487398605e-06, "loss": 0.3258, "step": 4830 }, { "epoch": 0.3, "grad_norm": 2.4843662681417458, "learning_rate": 8.159920252611979e-06, "loss": 0.3159, "step": 4831 }, { "epoch": 0.3, "grad_norm": 1.47709063905116, "learning_rate": 8.159130886714647e-06, "loss": 0.3029, "step": 4832 }, { "epoch": 0.3, "grad_norm": 5.059276053569302, "learning_rate": 8.158341389739364e-06, "loss": 0.3372, "step": 4833 }, { "epoch": 0.3, "grad_norm": 0.7453200918215377, "learning_rate": 8.157551761718886e-06, "loss": 0.4911, "step": 4834 }, { "epoch": 0.3, "grad_norm": 5.658487968733307, "learning_rate": 8.156762002685978e-06, "loss": 0.316, "step": 4835 }, { "epoch": 0.3, "grad_norm": 4.804281861595954, "learning_rate": 8.155972112673406e-06, "loss": 0.3176, "step": 4836 }, { "epoch": 0.3, "grad_norm": 2.20719583393348, "learning_rate": 8.155182091713946e-06, "loss": 0.3131, "step": 4837 }, { "epoch": 0.3, "grad_norm": 1.3365859821620707, "learning_rate": 8.154391939840379e-06, "loss": 0.3064, "step": 4838 }, { "epoch": 0.3, "grad_norm": 1.7261160213559534, "learning_rate": 8.153601657085486e-06, "loss": 0.3408, "step": 4839 }, { "epoch": 0.3, "grad_norm": 1.563921809444925, "learning_rate": 8.15281124348206e-06, "loss": 0.3439, "step": 4840 }, { "epoch": 0.3, "grad_norm": 1.779037035691217, "learning_rate": 8.152020699062892e-06, "loss": 0.3169, "step": 4841 }, { "epoch": 0.3, "grad_norm": 1.6113109661634013, "learning_rate": 8.15123002386079e-06, "loss": 0.3282, "step": 4842 }, { "epoch": 0.3, "grad_norm": 1.6786433123007816, "learning_rate": 8.150439217908557e-06, "loss": 0.2942, "step": 4843 }, { "epoch": 0.3, "grad_norm": 1.3948528641322693, "learning_rate": 8.149648281239005e-06, "loss": 0.3266, "step": 4844 }, { "epoch": 0.3, "grad_norm": 2.406817990679698, "learning_rate": 8.148857213884948e-06, "loss": 0.3394, "step": 4845 }, { "epoch": 0.3, "grad_norm": 5.435589378225438, "learning_rate": 8.148066015879214e-06, "loss": 0.338, "step": 4846 }, { "epoch": 0.3, "grad_norm": 73.95252745390995, "learning_rate": 8.147274687254632e-06, "loss": 0.3143, "step": 4847 }, { "epoch": 0.3, "grad_norm": 1.933090324683608, "learning_rate": 8.146483228044032e-06, "loss": 0.3226, "step": 4848 }, { "epoch": 0.3, "grad_norm": 3.083182320648684, "learning_rate": 8.14569163828025e-06, "loss": 0.3297, "step": 4849 }, { "epoch": 0.31, "grad_norm": 3.81068733173486, "learning_rate": 8.144899917996139e-06, "loss": 0.3232, "step": 4850 }, { "epoch": 0.31, "grad_norm": 2.5115646565506933, "learning_rate": 8.144108067224544e-06, "loss": 0.3594, "step": 4851 }, { "epoch": 0.31, "grad_norm": 2.3704651812605, "learning_rate": 8.14331608599832e-06, "loss": 0.3486, "step": 4852 }, { "epoch": 0.31, "grad_norm": 4.750737390975413, "learning_rate": 8.142523974350327e-06, "loss": 0.3344, "step": 4853 }, { "epoch": 0.31, "grad_norm": 2.641300177158323, "learning_rate": 8.141731732313434e-06, "loss": 0.321, "step": 4854 }, { "epoch": 0.31, "grad_norm": 1.3377095274666442, "learning_rate": 8.140939359920512e-06, "loss": 0.3266, "step": 4855 }, { "epoch": 0.31, "grad_norm": 2.2454839829427358, "learning_rate": 8.140146857204433e-06, "loss": 0.345, "step": 4856 }, { "epoch": 0.31, "grad_norm": 1.6052923028643362, "learning_rate": 8.139354224198087e-06, "loss": 0.3172, "step": 4857 }, { "epoch": 0.31, "grad_norm": 3.2004381869875216, "learning_rate": 8.13856146093436e-06, "loss": 0.3211, "step": 4858 }, { "epoch": 0.31, "grad_norm": 1.8407810825110196, "learning_rate": 8.137768567446141e-06, "loss": 0.3172, "step": 4859 }, { "epoch": 0.31, "grad_norm": 1.8974771862862094, "learning_rate": 8.136975543766328e-06, "loss": 0.3347, "step": 4860 }, { "epoch": 0.31, "grad_norm": 1.834938564558332, "learning_rate": 8.136182389927832e-06, "loss": 0.3347, "step": 4861 }, { "epoch": 0.31, "grad_norm": 2.9249037333511576, "learning_rate": 8.135389105963556e-06, "loss": 0.3306, "step": 4862 }, { "epoch": 0.31, "grad_norm": 1.869441650437946, "learning_rate": 8.134595691906416e-06, "loss": 0.3262, "step": 4863 }, { "epoch": 0.31, "grad_norm": 2.4694686351112236, "learning_rate": 8.133802147789337e-06, "loss": 0.3013, "step": 4864 }, { "epoch": 0.31, "grad_norm": 2.6259746653299456, "learning_rate": 8.133008473645237e-06, "loss": 0.3365, "step": 4865 }, { "epoch": 0.31, "grad_norm": 1.679726853444609, "learning_rate": 8.132214669507052e-06, "loss": 0.3034, "step": 4866 }, { "epoch": 0.31, "grad_norm": 2.8052295131154517, "learning_rate": 8.131420735407716e-06, "loss": 0.3467, "step": 4867 }, { "epoch": 0.31, "grad_norm": 1.5368211521749648, "learning_rate": 8.130626671380172e-06, "loss": 0.3316, "step": 4868 }, { "epoch": 0.31, "grad_norm": 3.1732339031379313, "learning_rate": 8.129832477457367e-06, "loss": 0.3287, "step": 4869 }, { "epoch": 0.31, "grad_norm": 1.860292595498335, "learning_rate": 8.129038153672255e-06, "loss": 0.3396, "step": 4870 }, { "epoch": 0.31, "grad_norm": 5.155789115355333, "learning_rate": 8.128243700057788e-06, "loss": 0.3324, "step": 4871 }, { "epoch": 0.31, "grad_norm": 1.6100435575597394, "learning_rate": 8.127449116646937e-06, "loss": 0.311, "step": 4872 }, { "epoch": 0.31, "grad_norm": 1.7138489497901312, "learning_rate": 8.126654403472668e-06, "loss": 0.3104, "step": 4873 }, { "epoch": 0.31, "grad_norm": 1.7018536243420075, "learning_rate": 8.125859560567952e-06, "loss": 0.3157, "step": 4874 }, { "epoch": 0.31, "grad_norm": 1.75272407707653, "learning_rate": 8.12506458796577e-06, "loss": 0.3296, "step": 4875 }, { "epoch": 0.31, "grad_norm": 2.877593274176602, "learning_rate": 8.124269485699108e-06, "loss": 0.3159, "step": 4876 }, { "epoch": 0.31, "grad_norm": 2.099054102035657, "learning_rate": 8.123474253800958e-06, "loss": 0.305, "step": 4877 }, { "epoch": 0.31, "grad_norm": 1.6060449639620602, "learning_rate": 8.12267889230431e-06, "loss": 0.3247, "step": 4878 }, { "epoch": 0.31, "grad_norm": 2.09337815632032, "learning_rate": 8.12188340124217e-06, "loss": 0.3221, "step": 4879 }, { "epoch": 0.31, "grad_norm": 2.4440770016397826, "learning_rate": 8.121087780647543e-06, "loss": 0.3189, "step": 4880 }, { "epoch": 0.31, "grad_norm": 11.290636335528498, "learning_rate": 8.120292030553441e-06, "loss": 0.3163, "step": 4881 }, { "epoch": 0.31, "grad_norm": 2.955720730453499, "learning_rate": 8.119496150992879e-06, "loss": 0.3247, "step": 4882 }, { "epoch": 0.31, "grad_norm": 2.585402146371944, "learning_rate": 8.118700141998879e-06, "loss": 0.328, "step": 4883 }, { "epoch": 0.31, "grad_norm": 1.8193494686149803, "learning_rate": 8.117904003604472e-06, "loss": 0.3118, "step": 4884 }, { "epoch": 0.31, "grad_norm": 3.5046889722588577, "learning_rate": 8.117107735842693e-06, "loss": 0.3476, "step": 4885 }, { "epoch": 0.31, "grad_norm": 2.0278179379997585, "learning_rate": 8.116311338746574e-06, "loss": 0.3097, "step": 4886 }, { "epoch": 0.31, "grad_norm": 2.3797793022230813, "learning_rate": 8.115514812349163e-06, "loss": 0.3331, "step": 4887 }, { "epoch": 0.31, "grad_norm": 2.9909270540906427, "learning_rate": 8.11471815668351e-06, "loss": 0.3227, "step": 4888 }, { "epoch": 0.31, "grad_norm": 2.5316997453624013, "learning_rate": 8.113921371782667e-06, "loss": 0.3279, "step": 4889 }, { "epoch": 0.31, "grad_norm": 2.2260613296304994, "learning_rate": 8.113124457679695e-06, "loss": 0.2957, "step": 4890 }, { "epoch": 0.31, "grad_norm": 3.1211395293278983, "learning_rate": 8.112327414407662e-06, "loss": 0.3133, "step": 4891 }, { "epoch": 0.31, "grad_norm": 1.7679163209400715, "learning_rate": 8.111530241999634e-06, "loss": 0.3431, "step": 4892 }, { "epoch": 0.31, "grad_norm": 3.741665979087409, "learning_rate": 8.11073294048869e-06, "loss": 0.3054, "step": 4893 }, { "epoch": 0.31, "grad_norm": 1.2713386087154355, "learning_rate": 8.109935509907911e-06, "loss": 0.3154, "step": 4894 }, { "epoch": 0.31, "grad_norm": 1.5499479964956717, "learning_rate": 8.109137950290385e-06, "loss": 0.2943, "step": 4895 }, { "epoch": 0.31, "grad_norm": 1.6241753185389316, "learning_rate": 8.108340261669204e-06, "loss": 0.3314, "step": 4896 }, { "epoch": 0.31, "grad_norm": 2.065475597908749, "learning_rate": 8.107542444077461e-06, "loss": 0.3514, "step": 4897 }, { "epoch": 0.31, "grad_norm": 3.3892492590291545, "learning_rate": 8.106744497548265e-06, "loss": 0.3229, "step": 4898 }, { "epoch": 0.31, "grad_norm": 1.7329539118650756, "learning_rate": 8.105946422114721e-06, "loss": 0.2981, "step": 4899 }, { "epoch": 0.31, "grad_norm": 1.7567142614783802, "learning_rate": 8.105148217809941e-06, "loss": 0.3045, "step": 4900 }, { "epoch": 0.31, "grad_norm": 1.6199260900646555, "learning_rate": 8.104349884667048e-06, "loss": 0.3203, "step": 4901 }, { "epoch": 0.31, "grad_norm": 2.185619067488261, "learning_rate": 8.103551422719164e-06, "loss": 0.3026, "step": 4902 }, { "epoch": 0.31, "grad_norm": 2.0196664867388665, "learning_rate": 8.10275283199942e-06, "loss": 0.3277, "step": 4903 }, { "epoch": 0.31, "grad_norm": 1.4596505677728213, "learning_rate": 8.10195411254095e-06, "loss": 0.3148, "step": 4904 }, { "epoch": 0.31, "grad_norm": 2.0219263492276407, "learning_rate": 8.101155264376892e-06, "loss": 0.3206, "step": 4905 }, { "epoch": 0.31, "grad_norm": 2.787627867209791, "learning_rate": 8.100356287540397e-06, "loss": 0.3254, "step": 4906 }, { "epoch": 0.31, "grad_norm": 2.410642521194272, "learning_rate": 8.09955718206461e-06, "loss": 0.3093, "step": 4907 }, { "epoch": 0.31, "grad_norm": 2.0444840723299285, "learning_rate": 8.098757947982692e-06, "loss": 0.321, "step": 4908 }, { "epoch": 0.31, "grad_norm": 2.4640748718609533, "learning_rate": 8.097958585327801e-06, "loss": 0.3154, "step": 4909 }, { "epoch": 0.31, "grad_norm": 3.42997998563266, "learning_rate": 8.097159094133106e-06, "loss": 0.2988, "step": 4910 }, { "epoch": 0.31, "grad_norm": 0.8019002532735567, "learning_rate": 8.09635947443178e-06, "loss": 0.5156, "step": 4911 }, { "epoch": 0.31, "grad_norm": 2.2354178381952123, "learning_rate": 8.095559726256998e-06, "loss": 0.3281, "step": 4912 }, { "epoch": 0.31, "grad_norm": 4.041393490542275, "learning_rate": 8.094759849641946e-06, "loss": 0.3238, "step": 4913 }, { "epoch": 0.31, "grad_norm": 1.7435732778585893, "learning_rate": 8.093959844619812e-06, "loss": 0.3223, "step": 4914 }, { "epoch": 0.31, "grad_norm": 2.9563665748240093, "learning_rate": 8.093159711223788e-06, "loss": 0.3196, "step": 4915 }, { "epoch": 0.31, "grad_norm": 16.896494403766674, "learning_rate": 8.09235944948707e-06, "loss": 0.3184, "step": 4916 }, { "epoch": 0.31, "grad_norm": 1.5403427987104072, "learning_rate": 8.09155905944287e-06, "loss": 0.3232, "step": 4917 }, { "epoch": 0.31, "grad_norm": 2.278523899803821, "learning_rate": 8.090758541124394e-06, "loss": 0.313, "step": 4918 }, { "epoch": 0.31, "grad_norm": 2.0731620283807266, "learning_rate": 8.089957894564851e-06, "loss": 0.3244, "step": 4919 }, { "epoch": 0.31, "grad_norm": 3.9387298702383564, "learning_rate": 8.08915711979747e-06, "loss": 0.3258, "step": 4920 }, { "epoch": 0.31, "grad_norm": 2.0861885799415876, "learning_rate": 8.088356216855474e-06, "loss": 0.3176, "step": 4921 }, { "epoch": 0.31, "grad_norm": 3.3201997461856845, "learning_rate": 8.087555185772093e-06, "loss": 0.3651, "step": 4922 }, { "epoch": 0.31, "grad_norm": 2.122575679762488, "learning_rate": 8.08675402658056e-06, "loss": 0.3112, "step": 4923 }, { "epoch": 0.31, "grad_norm": 1.7670045849754261, "learning_rate": 8.085952739314123e-06, "loss": 0.3215, "step": 4924 }, { "epoch": 0.31, "grad_norm": 1.9158837604562065, "learning_rate": 8.085151324006024e-06, "loss": 0.3022, "step": 4925 }, { "epoch": 0.31, "grad_norm": 9.144228081612862, "learning_rate": 8.084349780689517e-06, "loss": 0.3358, "step": 4926 }, { "epoch": 0.31, "grad_norm": 2.215449578261158, "learning_rate": 8.08354810939786e-06, "loss": 0.3011, "step": 4927 }, { "epoch": 0.31, "grad_norm": 1.8327874943896953, "learning_rate": 8.082746310164316e-06, "loss": 0.3094, "step": 4928 }, { "epoch": 0.31, "grad_norm": 2.907844742173548, "learning_rate": 8.08194438302215e-06, "loss": 0.3482, "step": 4929 }, { "epoch": 0.31, "grad_norm": 3.2764122764388346, "learning_rate": 8.081142328004638e-06, "loss": 0.3163, "step": 4930 }, { "epoch": 0.31, "grad_norm": 1.6559404359038419, "learning_rate": 8.080340145145058e-06, "loss": 0.3305, "step": 4931 }, { "epoch": 0.31, "grad_norm": 3.8500527172802412, "learning_rate": 8.079537834476692e-06, "loss": 0.3137, "step": 4932 }, { "epoch": 0.31, "grad_norm": 2.6798642355980546, "learning_rate": 8.078735396032835e-06, "loss": 0.3312, "step": 4933 }, { "epoch": 0.31, "grad_norm": 1.9532205992175309, "learning_rate": 8.077932829846774e-06, "loss": 0.308, "step": 4934 }, { "epoch": 0.31, "grad_norm": 2.587313719105938, "learning_rate": 8.077130135951817e-06, "loss": 0.3021, "step": 4935 }, { "epoch": 0.31, "grad_norm": 0.7471957552749761, "learning_rate": 8.076327314381262e-06, "loss": 0.5175, "step": 4936 }, { "epoch": 0.31, "grad_norm": 2.477394257055449, "learning_rate": 8.075524365168426e-06, "loss": 0.3153, "step": 4937 }, { "epoch": 0.31, "grad_norm": 1.6740954872672904, "learning_rate": 8.074721288346617e-06, "loss": 0.3046, "step": 4938 }, { "epoch": 0.31, "grad_norm": 3.0022588763356945, "learning_rate": 8.073918083949164e-06, "loss": 0.3143, "step": 4939 }, { "epoch": 0.31, "grad_norm": 2.107769896618522, "learning_rate": 8.073114752009388e-06, "loss": 0.3132, "step": 4940 }, { "epoch": 0.31, "grad_norm": 9.767220171014733, "learning_rate": 8.072311292560624e-06, "loss": 0.308, "step": 4941 }, { "epoch": 0.31, "grad_norm": 6.565922059987354, "learning_rate": 8.071507705636204e-06, "loss": 0.3194, "step": 4942 }, { "epoch": 0.31, "grad_norm": 2.574200775674833, "learning_rate": 8.070703991269477e-06, "loss": 0.308, "step": 4943 }, { "epoch": 0.31, "grad_norm": 1.9724278178689933, "learning_rate": 8.069900149493786e-06, "loss": 0.3212, "step": 4944 }, { "epoch": 0.31, "grad_norm": 1.8255427530008028, "learning_rate": 8.069096180342486e-06, "loss": 0.3095, "step": 4945 }, { "epoch": 0.31, "grad_norm": 1.60634504768918, "learning_rate": 8.068292083848932e-06, "loss": 0.3256, "step": 4946 }, { "epoch": 0.31, "grad_norm": 4.373028519392421, "learning_rate": 8.067487860046492e-06, "loss": 0.3094, "step": 4947 }, { "epoch": 0.31, "grad_norm": 1.664247309678174, "learning_rate": 8.06668350896853e-06, "loss": 0.3132, "step": 4948 }, { "epoch": 0.31, "grad_norm": 1.753065592450937, "learning_rate": 8.065879030648423e-06, "loss": 0.3261, "step": 4949 }, { "epoch": 0.31, "grad_norm": 2.2169032541546447, "learning_rate": 8.06507442511955e-06, "loss": 0.318, "step": 4950 }, { "epoch": 0.31, "grad_norm": 4.545690984191229, "learning_rate": 8.064269692415296e-06, "loss": 0.3255, "step": 4951 }, { "epoch": 0.31, "grad_norm": 5.970656893769879, "learning_rate": 8.063464832569048e-06, "loss": 0.3155, "step": 4952 }, { "epoch": 0.31, "grad_norm": 2.2575909231858047, "learning_rate": 8.062659845614202e-06, "loss": 0.3308, "step": 4953 }, { "epoch": 0.31, "grad_norm": 1.801862396995543, "learning_rate": 8.06185473158416e-06, "loss": 0.33, "step": 4954 }, { "epoch": 0.31, "grad_norm": 2.160705406548701, "learning_rate": 8.061049490512326e-06, "loss": 0.3401, "step": 4955 }, { "epoch": 0.31, "grad_norm": 2.0949235628577947, "learning_rate": 8.060244122432116e-06, "loss": 0.3147, "step": 4956 }, { "epoch": 0.31, "grad_norm": 2.941835161034645, "learning_rate": 8.059438627376936e-06, "loss": 0.3074, "step": 4957 }, { "epoch": 0.31, "grad_norm": 5.744498126916404, "learning_rate": 8.058633005380216e-06, "loss": 0.3168, "step": 4958 }, { "epoch": 0.31, "grad_norm": 0.7371791076787418, "learning_rate": 8.05782725647538e-06, "loss": 0.4851, "step": 4959 }, { "epoch": 0.31, "grad_norm": 2.648512559719333, "learning_rate": 8.057021380695858e-06, "loss": 0.3249, "step": 4960 }, { "epoch": 0.31, "grad_norm": 1.8496177680693084, "learning_rate": 8.05621537807509e-06, "loss": 0.3165, "step": 4961 }, { "epoch": 0.31, "grad_norm": 5.874420013697325, "learning_rate": 8.055409248646517e-06, "loss": 0.308, "step": 4962 }, { "epoch": 0.31, "grad_norm": 1.9362228664429233, "learning_rate": 8.05460299244359e-06, "loss": 0.3011, "step": 4963 }, { "epoch": 0.31, "grad_norm": 1.8530717830919292, "learning_rate": 8.053796609499755e-06, "loss": 0.3365, "step": 4964 }, { "epoch": 0.31, "grad_norm": 2.188405320612547, "learning_rate": 8.052990099848478e-06, "loss": 0.3054, "step": 4965 }, { "epoch": 0.31, "grad_norm": 2.9289555558700955, "learning_rate": 8.052183463523218e-06, "loss": 0.3065, "step": 4966 }, { "epoch": 0.31, "grad_norm": 5.985572830928523, "learning_rate": 8.051376700557445e-06, "loss": 0.3128, "step": 4967 }, { "epoch": 0.31, "grad_norm": 1.9974222898370826, "learning_rate": 8.050569810984632e-06, "loss": 0.3191, "step": 4968 }, { "epoch": 0.31, "grad_norm": 2.7153716759739575, "learning_rate": 8.049762794838258e-06, "loss": 0.3156, "step": 4969 }, { "epoch": 0.31, "grad_norm": 1.9904526917215217, "learning_rate": 8.04895565215181e-06, "loss": 0.3171, "step": 4970 }, { "epoch": 0.31, "grad_norm": 2.6899332359955075, "learning_rate": 8.048148382958777e-06, "loss": 0.3442, "step": 4971 }, { "epoch": 0.31, "grad_norm": 3.683926435589481, "learning_rate": 8.047340987292653e-06, "loss": 0.3386, "step": 4972 }, { "epoch": 0.31, "grad_norm": 1.9911786050649964, "learning_rate": 8.046533465186939e-06, "loss": 0.3041, "step": 4973 }, { "epoch": 0.31, "grad_norm": 2.073778338160897, "learning_rate": 8.04572581667514e-06, "loss": 0.312, "step": 4974 }, { "epoch": 0.31, "grad_norm": 1.57490476913578, "learning_rate": 8.044918041790768e-06, "loss": 0.3215, "step": 4975 }, { "epoch": 0.31, "grad_norm": 1.919997197183369, "learning_rate": 8.044110140567337e-06, "loss": 0.3131, "step": 4976 }, { "epoch": 0.31, "grad_norm": 0.6494139966985812, "learning_rate": 8.04330211303837e-06, "loss": 0.4924, "step": 4977 }, { "epoch": 0.31, "grad_norm": 0.7070700658497352, "learning_rate": 8.042493959237391e-06, "loss": 0.5016, "step": 4978 }, { "epoch": 0.31, "grad_norm": 2.0483476573986326, "learning_rate": 8.041685679197936e-06, "loss": 0.3127, "step": 4979 }, { "epoch": 0.31, "grad_norm": 2.9444596376193637, "learning_rate": 8.040877272953538e-06, "loss": 0.3319, "step": 4980 }, { "epoch": 0.31, "grad_norm": 3.477915831462931, "learning_rate": 8.040068740537741e-06, "loss": 0.3317, "step": 4981 }, { "epoch": 0.31, "grad_norm": 2.230217332345066, "learning_rate": 8.039260081984092e-06, "loss": 0.3055, "step": 4982 }, { "epoch": 0.31, "grad_norm": 1.7654025561999291, "learning_rate": 8.038451297326146e-06, "loss": 0.3122, "step": 4983 }, { "epoch": 0.31, "grad_norm": 1.9280784523657415, "learning_rate": 8.037642386597456e-06, "loss": 0.308, "step": 4984 }, { "epoch": 0.31, "grad_norm": 2.166152756269824, "learning_rate": 8.03683334983159e-06, "loss": 0.333, "step": 4985 }, { "epoch": 0.31, "grad_norm": 5.922670774962704, "learning_rate": 8.036024187062113e-06, "loss": 0.3172, "step": 4986 }, { "epoch": 0.31, "grad_norm": 3.9193641648410344, "learning_rate": 8.0352148983226e-06, "loss": 0.3169, "step": 4987 }, { "epoch": 0.31, "grad_norm": 6.648087946869346, "learning_rate": 8.03440548364663e-06, "loss": 0.3145, "step": 4988 }, { "epoch": 0.31, "grad_norm": 1.9904013501905227, "learning_rate": 8.033595943067786e-06, "loss": 0.3083, "step": 4989 }, { "epoch": 0.31, "grad_norm": 6.056318935392141, "learning_rate": 8.032786276619658e-06, "loss": 0.349, "step": 4990 }, { "epoch": 0.31, "grad_norm": 2.7745935112318016, "learning_rate": 8.031976484335841e-06, "loss": 0.3216, "step": 4991 }, { "epoch": 0.31, "grad_norm": 2.2124179343633745, "learning_rate": 8.031166566249935e-06, "loss": 0.3111, "step": 4992 }, { "epoch": 0.31, "grad_norm": 3.971833357269287, "learning_rate": 8.030356522395544e-06, "loss": 0.3174, "step": 4993 }, { "epoch": 0.31, "grad_norm": 4.053995436223221, "learning_rate": 8.029546352806277e-06, "loss": 0.3173, "step": 4994 }, { "epoch": 0.31, "grad_norm": 12.351064506842642, "learning_rate": 8.028736057515753e-06, "loss": 0.3318, "step": 4995 }, { "epoch": 0.31, "grad_norm": 2.1149409158563297, "learning_rate": 8.02792563655759e-06, "loss": 0.3247, "step": 4996 }, { "epoch": 0.31, "grad_norm": 1.9858625097368985, "learning_rate": 8.027115089965413e-06, "loss": 0.3302, "step": 4997 }, { "epoch": 0.31, "grad_norm": 4.136126573258584, "learning_rate": 8.026304417772854e-06, "loss": 0.326, "step": 4998 }, { "epoch": 0.31, "grad_norm": 2.0309427426539384, "learning_rate": 8.025493620013549e-06, "loss": 0.3437, "step": 4999 }, { "epoch": 0.31, "grad_norm": 3.14193524045625, "learning_rate": 8.02468269672114e-06, "loss": 0.3162, "step": 5000 }, { "epoch": 0.31, "grad_norm": 2.995483997129608, "learning_rate": 8.023871647929276e-06, "loss": 0.3087, "step": 5001 }, { "epoch": 0.31, "grad_norm": 2.3277628667057098, "learning_rate": 8.023060473671605e-06, "loss": 0.3102, "step": 5002 }, { "epoch": 0.31, "grad_norm": 1.9788008089983757, "learning_rate": 8.022249173981784e-06, "loss": 0.3104, "step": 5003 }, { "epoch": 0.31, "grad_norm": 0.6996974523522912, "learning_rate": 8.021437748893478e-06, "loss": 0.4972, "step": 5004 }, { "epoch": 0.31, "grad_norm": 3.2174180211368424, "learning_rate": 8.020626198440352e-06, "loss": 0.3132, "step": 5005 }, { "epoch": 0.31, "grad_norm": 1.640476656829514, "learning_rate": 8.01981452265608e-06, "loss": 0.3143, "step": 5006 }, { "epoch": 0.31, "grad_norm": 4.334402462138009, "learning_rate": 8.01900272157434e-06, "loss": 0.3069, "step": 5007 }, { "epoch": 0.31, "grad_norm": 11.900448913831037, "learning_rate": 8.018190795228815e-06, "loss": 0.3441, "step": 5008 }, { "epoch": 0.32, "grad_norm": 4.063654509610975, "learning_rate": 8.017378743653193e-06, "loss": 0.3195, "step": 5009 }, { "epoch": 0.32, "grad_norm": 3.3909167925522223, "learning_rate": 8.016566566881166e-06, "loss": 0.3042, "step": 5010 }, { "epoch": 0.32, "grad_norm": 5.532290570296978, "learning_rate": 8.015754264946435e-06, "loss": 0.3315, "step": 5011 }, { "epoch": 0.32, "grad_norm": 2.7373760127048294, "learning_rate": 8.014941837882703e-06, "loss": 0.3432, "step": 5012 }, { "epoch": 0.32, "grad_norm": 2.0618836638509697, "learning_rate": 8.01412928572368e-06, "loss": 0.3232, "step": 5013 }, { "epoch": 0.32, "grad_norm": 1.876704991000476, "learning_rate": 8.01331660850308e-06, "loss": 0.3114, "step": 5014 }, { "epoch": 0.32, "grad_norm": 5.145002344501987, "learning_rate": 8.01250380625462e-06, "loss": 0.302, "step": 5015 }, { "epoch": 0.32, "grad_norm": 4.40383478343123, "learning_rate": 8.011690879012027e-06, "loss": 0.3279, "step": 5016 }, { "epoch": 0.32, "grad_norm": 2.8666662275168235, "learning_rate": 8.010877826809028e-06, "loss": 0.3141, "step": 5017 }, { "epoch": 0.32, "grad_norm": 5.595550835367412, "learning_rate": 8.01006464967936e-06, "loss": 0.3118, "step": 5018 }, { "epoch": 0.32, "grad_norm": 2.383487226000707, "learning_rate": 8.009251347656766e-06, "loss": 0.3132, "step": 5019 }, { "epoch": 0.32, "grad_norm": 4.675162959329228, "learning_rate": 8.008437920774987e-06, "loss": 0.3088, "step": 5020 }, { "epoch": 0.32, "grad_norm": 1.6265349900072492, "learning_rate": 8.007624369067776e-06, "loss": 0.2884, "step": 5021 }, { "epoch": 0.32, "grad_norm": 7.6831646227136945, "learning_rate": 8.006810692568886e-06, "loss": 0.3141, "step": 5022 }, { "epoch": 0.32, "grad_norm": 3.556303551718319, "learning_rate": 8.005996891312082e-06, "loss": 0.3039, "step": 5023 }, { "epoch": 0.32, "grad_norm": 2.3412784436245517, "learning_rate": 8.005182965331127e-06, "loss": 0.3417, "step": 5024 }, { "epoch": 0.32, "grad_norm": 4.284655036838846, "learning_rate": 8.004368914659792e-06, "loss": 0.3232, "step": 5025 }, { "epoch": 0.32, "grad_norm": 13.063304621855258, "learning_rate": 8.003554739331856e-06, "loss": 0.3083, "step": 5026 }, { "epoch": 0.32, "grad_norm": 3.4726216877214298, "learning_rate": 8.002740439381097e-06, "loss": 0.3258, "step": 5027 }, { "epoch": 0.32, "grad_norm": 2.2303048752751287, "learning_rate": 8.001926014841306e-06, "loss": 0.3162, "step": 5028 }, { "epoch": 0.32, "grad_norm": 3.301325872784528, "learning_rate": 8.001111465746269e-06, "loss": 0.3141, "step": 5029 }, { "epoch": 0.32, "grad_norm": 1.9678152985026538, "learning_rate": 8.00029679212979e-06, "loss": 0.3597, "step": 5030 }, { "epoch": 0.32, "grad_norm": 3.746665528896893, "learning_rate": 7.999481994025666e-06, "loss": 0.3292, "step": 5031 }, { "epoch": 0.32, "grad_norm": 2.8384568544072035, "learning_rate": 7.998667071467709e-06, "loss": 0.3117, "step": 5032 }, { "epoch": 0.32, "grad_norm": 3.151200798535191, "learning_rate": 7.997852024489727e-06, "loss": 0.3197, "step": 5033 }, { "epoch": 0.32, "grad_norm": 1.4186715153542795, "learning_rate": 7.99703685312554e-06, "loss": 0.2909, "step": 5034 }, { "epoch": 0.32, "grad_norm": 2.76518638238281, "learning_rate": 7.99622155740897e-06, "loss": 0.3145, "step": 5035 }, { "epoch": 0.32, "grad_norm": 2.2394895644670703, "learning_rate": 7.995406137373848e-06, "loss": 0.3109, "step": 5036 }, { "epoch": 0.32, "grad_norm": 2.5883403986052236, "learning_rate": 7.994590593054001e-06, "loss": 0.3132, "step": 5037 }, { "epoch": 0.32, "grad_norm": 3.0702547423913122, "learning_rate": 7.993774924483275e-06, "loss": 0.3181, "step": 5038 }, { "epoch": 0.32, "grad_norm": 2.0816594562824275, "learning_rate": 7.992959131695508e-06, "loss": 0.3235, "step": 5039 }, { "epoch": 0.32, "grad_norm": 3.63469659261388, "learning_rate": 7.99214321472455e-06, "loss": 0.3176, "step": 5040 }, { "epoch": 0.32, "grad_norm": 2.0387904745798937, "learning_rate": 7.991327173604257e-06, "loss": 0.3086, "step": 5041 }, { "epoch": 0.32, "grad_norm": 6.001197198470353, "learning_rate": 7.990511008368485e-06, "loss": 0.314, "step": 5042 }, { "epoch": 0.32, "grad_norm": 3.091889925430689, "learning_rate": 7.989694719051101e-06, "loss": 0.3303, "step": 5043 }, { "epoch": 0.32, "grad_norm": 16.46003381239394, "learning_rate": 7.988878305685972e-06, "loss": 0.2925, "step": 5044 }, { "epoch": 0.32, "grad_norm": 3.684788091771633, "learning_rate": 7.988061768306973e-06, "loss": 0.3333, "step": 5045 }, { "epoch": 0.32, "grad_norm": 7.578808138336421, "learning_rate": 7.987245106947984e-06, "loss": 0.3042, "step": 5046 }, { "epoch": 0.32, "grad_norm": 2.158788842711406, "learning_rate": 7.98642832164289e-06, "loss": 0.3235, "step": 5047 }, { "epoch": 0.32, "grad_norm": 11.113235636521349, "learning_rate": 7.98561141242558e-06, "loss": 0.3354, "step": 5048 }, { "epoch": 0.32, "grad_norm": 2.889934836684406, "learning_rate": 7.98479437932995e-06, "loss": 0.3025, "step": 5049 }, { "epoch": 0.32, "grad_norm": 2.9492874215275315, "learning_rate": 7.983977222389898e-06, "loss": 0.3066, "step": 5050 }, { "epoch": 0.32, "grad_norm": 2.1226520046678004, "learning_rate": 7.983159941639334e-06, "loss": 0.3231, "step": 5051 }, { "epoch": 0.32, "grad_norm": 2.3378657451530405, "learning_rate": 7.982342537112162e-06, "loss": 0.3139, "step": 5052 }, { "epoch": 0.32, "grad_norm": 2.6246577298481006, "learning_rate": 7.981525008842302e-06, "loss": 0.3338, "step": 5053 }, { "epoch": 0.32, "grad_norm": 2.021490134537214, "learning_rate": 7.980707356863673e-06, "loss": 0.2936, "step": 5054 }, { "epoch": 0.32, "grad_norm": 1.7435643705464416, "learning_rate": 7.979889581210203e-06, "loss": 0.3131, "step": 5055 }, { "epoch": 0.32, "grad_norm": 2.5048846923382837, "learning_rate": 7.979071681915821e-06, "loss": 0.3009, "step": 5056 }, { "epoch": 0.32, "grad_norm": 2.3088242899896563, "learning_rate": 7.978253659014463e-06, "loss": 0.3153, "step": 5057 }, { "epoch": 0.32, "grad_norm": 4.168719405448897, "learning_rate": 7.977435512540072e-06, "loss": 0.3066, "step": 5058 }, { "epoch": 0.32, "grad_norm": 1.7285131836876428, "learning_rate": 7.976617242526592e-06, "loss": 0.31, "step": 5059 }, { "epoch": 0.32, "grad_norm": 0.7048803919940461, "learning_rate": 7.975798849007974e-06, "loss": 0.5259, "step": 5060 }, { "epoch": 0.32, "grad_norm": 1.7197787822123591, "learning_rate": 7.974980332018178e-06, "loss": 0.3088, "step": 5061 }, { "epoch": 0.32, "grad_norm": 2.799530498120901, "learning_rate": 7.974161691591165e-06, "loss": 0.3328, "step": 5062 }, { "epoch": 0.32, "grad_norm": 2.5861386311184247, "learning_rate": 7.973342927760898e-06, "loss": 0.3019, "step": 5063 }, { "epoch": 0.32, "grad_norm": 2.2818011562125737, "learning_rate": 7.972524040561353e-06, "loss": 0.3123, "step": 5064 }, { "epoch": 0.32, "grad_norm": 2.008332525866915, "learning_rate": 7.971705030026507e-06, "loss": 0.3118, "step": 5065 }, { "epoch": 0.32, "grad_norm": 2.256715118907958, "learning_rate": 7.97088589619034e-06, "loss": 0.2927, "step": 5066 }, { "epoch": 0.32, "grad_norm": 6.674991938528282, "learning_rate": 7.970066639086839e-06, "loss": 0.301, "step": 5067 }, { "epoch": 0.32, "grad_norm": 2.1804556456397246, "learning_rate": 7.969247258749999e-06, "loss": 0.302, "step": 5068 }, { "epoch": 0.32, "grad_norm": 3.9860280400051207, "learning_rate": 7.968427755213814e-06, "loss": 0.3068, "step": 5069 }, { "epoch": 0.32, "grad_norm": 3.831230541476977, "learning_rate": 7.967608128512293e-06, "loss": 0.3122, "step": 5070 }, { "epoch": 0.32, "grad_norm": 2.5164274142461585, "learning_rate": 7.966788378679435e-06, "loss": 0.3592, "step": 5071 }, { "epoch": 0.32, "grad_norm": 1.837298725090378, "learning_rate": 7.965968505749262e-06, "loss": 0.291, "step": 5072 }, { "epoch": 0.32, "grad_norm": 1.5549773023611877, "learning_rate": 7.965148509755785e-06, "loss": 0.2925, "step": 5073 }, { "epoch": 0.32, "grad_norm": 3.2986968139774238, "learning_rate": 7.96432839073303e-06, "loss": 0.322, "step": 5074 }, { "epoch": 0.32, "grad_norm": 2.409482418167394, "learning_rate": 7.963508148715024e-06, "loss": 0.3006, "step": 5075 }, { "epoch": 0.32, "grad_norm": 1.6148311753650015, "learning_rate": 7.962687783735803e-06, "loss": 0.2995, "step": 5076 }, { "epoch": 0.32, "grad_norm": 2.440228470832698, "learning_rate": 7.961867295829403e-06, "loss": 0.31, "step": 5077 }, { "epoch": 0.32, "grad_norm": 6.806841277108355, "learning_rate": 7.961046685029868e-06, "loss": 0.3409, "step": 5078 }, { "epoch": 0.32, "grad_norm": 1.4445096722768909, "learning_rate": 7.960225951371245e-06, "loss": 0.3029, "step": 5079 }, { "epoch": 0.32, "grad_norm": 1.4989431264792445, "learning_rate": 7.959405094887591e-06, "loss": 0.2938, "step": 5080 }, { "epoch": 0.32, "grad_norm": 0.6791499788006095, "learning_rate": 7.958584115612963e-06, "loss": 0.5283, "step": 5081 }, { "epoch": 0.32, "grad_norm": 2.0749415291052133, "learning_rate": 7.957763013581425e-06, "loss": 0.3239, "step": 5082 }, { "epoch": 0.32, "grad_norm": 2.0466423740691315, "learning_rate": 7.956941788827046e-06, "loss": 0.3171, "step": 5083 }, { "epoch": 0.32, "grad_norm": 18.875177393278847, "learning_rate": 7.9561204413839e-06, "loss": 0.3002, "step": 5084 }, { "epoch": 0.32, "grad_norm": 1.7414609372383736, "learning_rate": 7.955298971286066e-06, "loss": 0.3201, "step": 5085 }, { "epoch": 0.32, "grad_norm": 1.8438288218830583, "learning_rate": 7.954477378567629e-06, "loss": 0.3316, "step": 5086 }, { "epoch": 0.32, "grad_norm": 3.979266579438885, "learning_rate": 7.953655663262676e-06, "loss": 0.3338, "step": 5087 }, { "epoch": 0.32, "grad_norm": 4.596344116499169, "learning_rate": 7.952833825405306e-06, "loss": 0.3172, "step": 5088 }, { "epoch": 0.32, "grad_norm": 2.374949279742272, "learning_rate": 7.952011865029614e-06, "loss": 0.3357, "step": 5089 }, { "epoch": 0.32, "grad_norm": 1.76099965096983, "learning_rate": 7.951189782169706e-06, "loss": 0.311, "step": 5090 }, { "epoch": 0.32, "grad_norm": 7.791065430818122, "learning_rate": 7.950367576859694e-06, "loss": 0.3222, "step": 5091 }, { "epoch": 0.32, "grad_norm": 2.064459872491958, "learning_rate": 7.94954524913369e-06, "loss": 0.3111, "step": 5092 }, { "epoch": 0.32, "grad_norm": 0.6358258563186658, "learning_rate": 7.948722799025814e-06, "loss": 0.5034, "step": 5093 }, { "epoch": 0.32, "grad_norm": 2.0212872046446795, "learning_rate": 7.94790022657019e-06, "loss": 0.3342, "step": 5094 }, { "epoch": 0.32, "grad_norm": 1.9751574512457168, "learning_rate": 7.947077531800953e-06, "loss": 0.3167, "step": 5095 }, { "epoch": 0.32, "grad_norm": 1.3474367355703467, "learning_rate": 7.946254714752233e-06, "loss": 0.3204, "step": 5096 }, { "epoch": 0.32, "grad_norm": 4.2114763334420475, "learning_rate": 7.945431775458172e-06, "loss": 0.3321, "step": 5097 }, { "epoch": 0.32, "grad_norm": 1.5620017746977082, "learning_rate": 7.944608713952913e-06, "loss": 0.3223, "step": 5098 }, { "epoch": 0.32, "grad_norm": 0.5993428243811117, "learning_rate": 7.94378553027061e-06, "loss": 0.4901, "step": 5099 }, { "epoch": 0.32, "grad_norm": 1.7364528052657522, "learning_rate": 7.942962224445416e-06, "loss": 0.3038, "step": 5100 }, { "epoch": 0.32, "grad_norm": 3.5768580631474247, "learning_rate": 7.942138796511493e-06, "loss": 0.3406, "step": 5101 }, { "epoch": 0.32, "grad_norm": 1.7684532732243523, "learning_rate": 7.941315246503006e-06, "loss": 0.281, "step": 5102 }, { "epoch": 0.32, "grad_norm": 1.8710782619801423, "learning_rate": 7.940491574454123e-06, "loss": 0.3424, "step": 5103 }, { "epoch": 0.32, "grad_norm": 1.3112896392186466, "learning_rate": 7.939667780399023e-06, "loss": 0.309, "step": 5104 }, { "epoch": 0.32, "grad_norm": 1.7026879528276058, "learning_rate": 7.938843864371886e-06, "loss": 0.3511, "step": 5105 }, { "epoch": 0.32, "grad_norm": 1.9839296373220543, "learning_rate": 7.938019826406895e-06, "loss": 0.308, "step": 5106 }, { "epoch": 0.32, "grad_norm": 1.2861315904284585, "learning_rate": 7.937195666538245e-06, "loss": 0.2957, "step": 5107 }, { "epoch": 0.32, "grad_norm": 1.9380431598967383, "learning_rate": 7.936371384800127e-06, "loss": 0.3362, "step": 5108 }, { "epoch": 0.32, "grad_norm": 2.770307461234666, "learning_rate": 7.935546981226747e-06, "loss": 0.3473, "step": 5109 }, { "epoch": 0.32, "grad_norm": 0.6061355860759877, "learning_rate": 7.934722455852308e-06, "loss": 0.4995, "step": 5110 }, { "epoch": 0.32, "grad_norm": 1.6253222845390156, "learning_rate": 7.933897808711022e-06, "loss": 0.2948, "step": 5111 }, { "epoch": 0.32, "grad_norm": 13.468429389333703, "learning_rate": 7.933073039837104e-06, "loss": 0.3258, "step": 5112 }, { "epoch": 0.32, "grad_norm": 2.1328610215687336, "learning_rate": 7.932248149264778e-06, "loss": 0.3124, "step": 5113 }, { "epoch": 0.32, "grad_norm": 3.1885687747250673, "learning_rate": 7.931423137028265e-06, "loss": 0.3138, "step": 5114 }, { "epoch": 0.32, "grad_norm": 1.6322587392956598, "learning_rate": 7.9305980031618e-06, "loss": 0.3143, "step": 5115 }, { "epoch": 0.32, "grad_norm": 2.691617172288375, "learning_rate": 7.929772747699621e-06, "loss": 0.3284, "step": 5116 }, { "epoch": 0.32, "grad_norm": 1.4915008100558418, "learning_rate": 7.928947370675963e-06, "loss": 0.3271, "step": 5117 }, { "epoch": 0.32, "grad_norm": 1.874462099288567, "learning_rate": 7.928121872125079e-06, "loss": 0.328, "step": 5118 }, { "epoch": 0.32, "grad_norm": 1.8486747584394894, "learning_rate": 7.927296252081218e-06, "loss": 0.3323, "step": 5119 }, { "epoch": 0.32, "grad_norm": 1.6815775240784545, "learning_rate": 7.926470510578634e-06, "loss": 0.3315, "step": 5120 }, { "epoch": 0.32, "grad_norm": 3.587468779319142, "learning_rate": 7.925644647651591e-06, "loss": 0.2997, "step": 5121 }, { "epoch": 0.32, "grad_norm": 3.234666949519142, "learning_rate": 7.924818663334356e-06, "loss": 0.3351, "step": 5122 }, { "epoch": 0.32, "grad_norm": 1.6422725811821304, "learning_rate": 7.9239925576612e-06, "loss": 0.3039, "step": 5123 }, { "epoch": 0.32, "grad_norm": 2.7219740756730872, "learning_rate": 7.923166330666397e-06, "loss": 0.3198, "step": 5124 }, { "epoch": 0.32, "grad_norm": 1.5385086878779366, "learning_rate": 7.922339982384232e-06, "loss": 0.3387, "step": 5125 }, { "epoch": 0.32, "grad_norm": 1.2084383566082457, "learning_rate": 7.92151351284899e-06, "loss": 0.3314, "step": 5126 }, { "epoch": 0.32, "grad_norm": 1.4743707311509031, "learning_rate": 7.920686922094964e-06, "loss": 0.3374, "step": 5127 }, { "epoch": 0.32, "grad_norm": 2.915323215254238, "learning_rate": 7.91986021015645e-06, "loss": 0.3095, "step": 5128 }, { "epoch": 0.32, "grad_norm": 3.2837458177313508, "learning_rate": 7.919033377067748e-06, "loss": 0.3379, "step": 5129 }, { "epoch": 0.32, "grad_norm": 1.2373511139087423, "learning_rate": 7.918206422863169e-06, "loss": 0.3304, "step": 5130 }, { "epoch": 0.32, "grad_norm": 1.5184108331320194, "learning_rate": 7.91737934757702e-06, "loss": 0.3183, "step": 5131 }, { "epoch": 0.32, "grad_norm": 2.169044407235978, "learning_rate": 7.91655215124362e-06, "loss": 0.3342, "step": 5132 }, { "epoch": 0.32, "grad_norm": 1.5965243231458044, "learning_rate": 7.915724833897291e-06, "loss": 0.318, "step": 5133 }, { "epoch": 0.32, "grad_norm": 1.310497701992734, "learning_rate": 7.914897395572362e-06, "loss": 0.3191, "step": 5134 }, { "epoch": 0.32, "grad_norm": 1.5435031686691405, "learning_rate": 7.91406983630316e-06, "loss": 0.3314, "step": 5135 }, { "epoch": 0.32, "grad_norm": 3.4206004188264725, "learning_rate": 7.913242156124026e-06, "loss": 0.3411, "step": 5136 }, { "epoch": 0.32, "grad_norm": 2.0573240464480085, "learning_rate": 7.9124143550693e-06, "loss": 0.3216, "step": 5137 }, { "epoch": 0.32, "grad_norm": 1.2794630248249228, "learning_rate": 7.911586433173328e-06, "loss": 0.305, "step": 5138 }, { "epoch": 0.32, "grad_norm": 3.3966530422442425, "learning_rate": 7.910758390470465e-06, "loss": 0.3013, "step": 5139 }, { "epoch": 0.32, "grad_norm": 2.358532216350067, "learning_rate": 7.909930226995066e-06, "loss": 0.3184, "step": 5140 }, { "epoch": 0.32, "grad_norm": 2.2842048344007564, "learning_rate": 7.909101942781494e-06, "loss": 0.3089, "step": 5141 }, { "epoch": 0.32, "grad_norm": 1.3209506365425492, "learning_rate": 7.908273537864114e-06, "loss": 0.3221, "step": 5142 }, { "epoch": 0.32, "grad_norm": 5.289693722947336, "learning_rate": 7.9074450122773e-06, "loss": 0.314, "step": 5143 }, { "epoch": 0.32, "grad_norm": 1.7378343070889373, "learning_rate": 7.906616366055427e-06, "loss": 0.3512, "step": 5144 }, { "epoch": 0.32, "grad_norm": 2.4920375500741807, "learning_rate": 7.905787599232878e-06, "loss": 0.3159, "step": 5145 }, { "epoch": 0.32, "grad_norm": 1.78577601822971, "learning_rate": 7.904958711844042e-06, "loss": 0.2949, "step": 5146 }, { "epoch": 0.32, "grad_norm": 2.6478832983625598, "learning_rate": 7.90412970392331e-06, "loss": 0.3033, "step": 5147 }, { "epoch": 0.32, "grad_norm": 1.7544514373362592, "learning_rate": 7.903300575505077e-06, "loss": 0.357, "step": 5148 }, { "epoch": 0.32, "grad_norm": 1.6290573379952231, "learning_rate": 7.902471326623746e-06, "loss": 0.3407, "step": 5149 }, { "epoch": 0.32, "grad_norm": 2.0217958670928295, "learning_rate": 7.901641957313724e-06, "loss": 0.3255, "step": 5150 }, { "epoch": 0.32, "grad_norm": 1.3195554411977217, "learning_rate": 7.900812467609423e-06, "loss": 0.3208, "step": 5151 }, { "epoch": 0.32, "grad_norm": 1.635799297520632, "learning_rate": 7.899982857545263e-06, "loss": 0.3306, "step": 5152 }, { "epoch": 0.32, "grad_norm": 1.8028536636867845, "learning_rate": 7.899153127155661e-06, "loss": 0.3093, "step": 5153 }, { "epoch": 0.32, "grad_norm": 1.760933628232171, "learning_rate": 7.898323276475045e-06, "loss": 0.3177, "step": 5154 }, { "epoch": 0.32, "grad_norm": 1.3091170778311194, "learning_rate": 7.897493305537851e-06, "loss": 0.3024, "step": 5155 }, { "epoch": 0.32, "grad_norm": 1.4170006007915341, "learning_rate": 7.896663214378512e-06, "loss": 0.3335, "step": 5156 }, { "epoch": 0.32, "grad_norm": 1.5269484964884819, "learning_rate": 7.895833003031472e-06, "loss": 0.3463, "step": 5157 }, { "epoch": 0.32, "grad_norm": 1.5275599386026182, "learning_rate": 7.895002671531175e-06, "loss": 0.3159, "step": 5158 }, { "epoch": 0.32, "grad_norm": 1.3944402239154978, "learning_rate": 7.894172219912077e-06, "loss": 0.3123, "step": 5159 }, { "epoch": 0.32, "grad_norm": 1.3081950411158063, "learning_rate": 7.893341648208632e-06, "loss": 0.3119, "step": 5160 }, { "epoch": 0.32, "grad_norm": 3.1249826879700624, "learning_rate": 7.892510956455305e-06, "loss": 0.3248, "step": 5161 }, { "epoch": 0.32, "grad_norm": 2.247131020693949, "learning_rate": 7.891680144686558e-06, "loss": 0.3134, "step": 5162 }, { "epoch": 0.32, "grad_norm": 0.6270633376295832, "learning_rate": 7.890849212936866e-06, "loss": 0.4912, "step": 5163 }, { "epoch": 0.32, "grad_norm": 1.3888517215084442, "learning_rate": 7.890018161240707e-06, "loss": 0.3212, "step": 5164 }, { "epoch": 0.32, "grad_norm": 1.4951206362262988, "learning_rate": 7.889186989632559e-06, "loss": 0.32, "step": 5165 }, { "epoch": 0.32, "grad_norm": 3.4390563164043506, "learning_rate": 7.888355698146913e-06, "loss": 0.3042, "step": 5166 }, { "epoch": 0.32, "grad_norm": 2.4237042466124175, "learning_rate": 7.887524286818257e-06, "loss": 0.3224, "step": 5167 }, { "epoch": 0.33, "grad_norm": 2.0496892155503756, "learning_rate": 7.886692755681091e-06, "loss": 0.323, "step": 5168 }, { "epoch": 0.33, "grad_norm": 2.3230823370618103, "learning_rate": 7.885861104769915e-06, "loss": 0.326, "step": 5169 }, { "epoch": 0.33, "grad_norm": 2.3049544437398652, "learning_rate": 7.885029334119237e-06, "loss": 0.3466, "step": 5170 }, { "epoch": 0.33, "grad_norm": 2.1769160680954185, "learning_rate": 7.884197443763566e-06, "loss": 0.3108, "step": 5171 }, { "epoch": 0.33, "grad_norm": 2.48672535677508, "learning_rate": 7.88336543373742e-06, "loss": 0.3526, "step": 5172 }, { "epoch": 0.33, "grad_norm": 2.1648446508690258, "learning_rate": 7.88253330407532e-06, "loss": 0.3276, "step": 5173 }, { "epoch": 0.33, "grad_norm": 1.340396240680619, "learning_rate": 7.881701054811796e-06, "loss": 0.3056, "step": 5174 }, { "epoch": 0.33, "grad_norm": 1.728428883608017, "learning_rate": 7.880868685981376e-06, "loss": 0.3191, "step": 5175 }, { "epoch": 0.33, "grad_norm": 1.4732255951886062, "learning_rate": 7.880036197618596e-06, "loss": 0.3213, "step": 5176 }, { "epoch": 0.33, "grad_norm": 1.623338077004673, "learning_rate": 7.879203589757998e-06, "loss": 0.3124, "step": 5177 }, { "epoch": 0.33, "grad_norm": 2.1853307873795726, "learning_rate": 7.878370862434131e-06, "loss": 0.3251, "step": 5178 }, { "epoch": 0.33, "grad_norm": 2.9052155557771617, "learning_rate": 7.877538015681542e-06, "loss": 0.3197, "step": 5179 }, { "epoch": 0.33, "grad_norm": 1.389894072532732, "learning_rate": 7.876705049534792e-06, "loss": 0.317, "step": 5180 }, { "epoch": 0.33, "grad_norm": 1.5412997149227012, "learning_rate": 7.875871964028439e-06, "loss": 0.3191, "step": 5181 }, { "epoch": 0.33, "grad_norm": 2.485679210130768, "learning_rate": 7.87503875919705e-06, "loss": 0.3386, "step": 5182 }, { "epoch": 0.33, "grad_norm": 1.6576984661492888, "learning_rate": 7.874205435075196e-06, "loss": 0.3207, "step": 5183 }, { "epoch": 0.33, "grad_norm": 2.8776673260890777, "learning_rate": 7.873371991697454e-06, "loss": 0.3164, "step": 5184 }, { "epoch": 0.33, "grad_norm": 1.8228351970614167, "learning_rate": 7.872538429098404e-06, "loss": 0.3204, "step": 5185 }, { "epoch": 0.33, "grad_norm": 2.01354442212947, "learning_rate": 7.871704747312631e-06, "loss": 0.3079, "step": 5186 }, { "epoch": 0.33, "grad_norm": 2.6381023841770297, "learning_rate": 7.87087094637473e-06, "loss": 0.3119, "step": 5187 }, { "epoch": 0.33, "grad_norm": 2.230613493458902, "learning_rate": 7.870037026319293e-06, "loss": 0.3333, "step": 5188 }, { "epoch": 0.33, "grad_norm": 1.271359396118646, "learning_rate": 7.869202987180921e-06, "loss": 0.3418, "step": 5189 }, { "epoch": 0.33, "grad_norm": 1.813108543229716, "learning_rate": 7.868368828994222e-06, "loss": 0.3196, "step": 5190 }, { "epoch": 0.33, "grad_norm": 4.8761358812895885, "learning_rate": 7.867534551793805e-06, "loss": 0.3143, "step": 5191 }, { "epoch": 0.33, "grad_norm": 1.808748874398439, "learning_rate": 7.866700155614286e-06, "loss": 0.3301, "step": 5192 }, { "epoch": 0.33, "grad_norm": 1.6232777439998736, "learning_rate": 7.865865640490287e-06, "loss": 0.3238, "step": 5193 }, { "epoch": 0.33, "grad_norm": 3.1706212707037715, "learning_rate": 7.865031006456432e-06, "loss": 0.3351, "step": 5194 }, { "epoch": 0.33, "grad_norm": 53.29319560873858, "learning_rate": 7.86419625354735e-06, "loss": 0.3162, "step": 5195 }, { "epoch": 0.33, "grad_norm": 1.7152615452368334, "learning_rate": 7.863361381797678e-06, "loss": 0.3194, "step": 5196 }, { "epoch": 0.33, "grad_norm": 1.723064779092635, "learning_rate": 7.862526391242058e-06, "loss": 0.3283, "step": 5197 }, { "epoch": 0.33, "grad_norm": 1.5969489563372483, "learning_rate": 7.861691281915133e-06, "loss": 0.3139, "step": 5198 }, { "epoch": 0.33, "grad_norm": 1.4729317597380558, "learning_rate": 7.860856053851554e-06, "loss": 0.3064, "step": 5199 }, { "epoch": 0.33, "grad_norm": 1.7380473371823648, "learning_rate": 7.860020707085976e-06, "loss": 0.3284, "step": 5200 }, { "epoch": 0.33, "grad_norm": 32.738389296582035, "learning_rate": 7.859185241653058e-06, "loss": 0.3313, "step": 5201 }, { "epoch": 0.33, "grad_norm": 1.8081041996036473, "learning_rate": 7.858349657587465e-06, "loss": 0.312, "step": 5202 }, { "epoch": 0.33, "grad_norm": 1.7493535284366597, "learning_rate": 7.85751395492387e-06, "loss": 0.3087, "step": 5203 }, { "epoch": 0.33, "grad_norm": 1.7270254734427284, "learning_rate": 7.856678133696945e-06, "loss": 0.3228, "step": 5204 }, { "epoch": 0.33, "grad_norm": 3.577619806778296, "learning_rate": 7.85584219394137e-06, "loss": 0.3132, "step": 5205 }, { "epoch": 0.33, "grad_norm": 1.7439505639616169, "learning_rate": 7.855006135691829e-06, "loss": 0.3037, "step": 5206 }, { "epoch": 0.33, "grad_norm": 1.7274519998258857, "learning_rate": 7.854169958983014e-06, "loss": 0.326, "step": 5207 }, { "epoch": 0.33, "grad_norm": 2.939987603120739, "learning_rate": 7.853333663849616e-06, "loss": 0.3271, "step": 5208 }, { "epoch": 0.33, "grad_norm": 2.389205281536424, "learning_rate": 7.852497250326337e-06, "loss": 0.3267, "step": 5209 }, { "epoch": 0.33, "grad_norm": 1.7981692340837032, "learning_rate": 7.85166071844788e-06, "loss": 0.3054, "step": 5210 }, { "epoch": 0.33, "grad_norm": 2.775791113021543, "learning_rate": 7.850824068248955e-06, "loss": 0.3166, "step": 5211 }, { "epoch": 0.33, "grad_norm": 4.622538023807633, "learning_rate": 7.849987299764276e-06, "loss": 0.3206, "step": 5212 }, { "epoch": 0.33, "grad_norm": 2.576171702100024, "learning_rate": 7.849150413028562e-06, "loss": 0.3217, "step": 5213 }, { "epoch": 0.33, "grad_norm": 3.2095791500536244, "learning_rate": 7.848313408076535e-06, "loss": 0.3331, "step": 5214 }, { "epoch": 0.33, "grad_norm": 2.4865642336849416, "learning_rate": 7.847476284942927e-06, "loss": 0.3322, "step": 5215 }, { "epoch": 0.33, "grad_norm": 1.634545356371704, "learning_rate": 7.846639043662472e-06, "loss": 0.3243, "step": 5216 }, { "epoch": 0.33, "grad_norm": 1.2618720493918716, "learning_rate": 7.845801684269905e-06, "loss": 0.3018, "step": 5217 }, { "epoch": 0.33, "grad_norm": 6.498254707575457, "learning_rate": 7.84496420679997e-06, "loss": 0.343, "step": 5218 }, { "epoch": 0.33, "grad_norm": 2.0083962241665834, "learning_rate": 7.84412661128742e-06, "loss": 0.3385, "step": 5219 }, { "epoch": 0.33, "grad_norm": 3.171225818129115, "learning_rate": 7.843288897767003e-06, "loss": 0.3316, "step": 5220 }, { "epoch": 0.33, "grad_norm": 1.813160418605828, "learning_rate": 7.842451066273478e-06, "loss": 0.3266, "step": 5221 }, { "epoch": 0.33, "grad_norm": 1.4333856933473443, "learning_rate": 7.841613116841612e-06, "loss": 0.3052, "step": 5222 }, { "epoch": 0.33, "grad_norm": 2.5155183948816697, "learning_rate": 7.84077504950617e-06, "loss": 0.3201, "step": 5223 }, { "epoch": 0.33, "grad_norm": 1.9748036782521914, "learning_rate": 7.839936864301924e-06, "loss": 0.3265, "step": 5224 }, { "epoch": 0.33, "grad_norm": 1.9203394613494438, "learning_rate": 7.839098561263655e-06, "loss": 0.3036, "step": 5225 }, { "epoch": 0.33, "grad_norm": 1.8496166679425865, "learning_rate": 7.838260140426145e-06, "loss": 0.325, "step": 5226 }, { "epoch": 0.33, "grad_norm": 2.574634293112357, "learning_rate": 7.837421601824178e-06, "loss": 0.3214, "step": 5227 }, { "epoch": 0.33, "grad_norm": 2.281911113871297, "learning_rate": 7.83658294549255e-06, "loss": 0.3317, "step": 5228 }, { "epoch": 0.33, "grad_norm": 1.792772421014619, "learning_rate": 7.835744171466056e-06, "loss": 0.3063, "step": 5229 }, { "epoch": 0.33, "grad_norm": 1.6417754999989007, "learning_rate": 7.834905279779501e-06, "loss": 0.3132, "step": 5230 }, { "epoch": 0.33, "grad_norm": 5.9212116571629325, "learning_rate": 7.83406627046769e-06, "loss": 0.3224, "step": 5231 }, { "epoch": 0.33, "grad_norm": 0.6016643983710616, "learning_rate": 7.833227143565436e-06, "loss": 0.5195, "step": 5232 }, { "epoch": 0.33, "grad_norm": 1.9190803509802752, "learning_rate": 7.832387899107555e-06, "loss": 0.3076, "step": 5233 }, { "epoch": 0.33, "grad_norm": 2.6528497273908247, "learning_rate": 7.831548537128868e-06, "loss": 0.327, "step": 5234 }, { "epoch": 0.33, "grad_norm": 1.203993813077144, "learning_rate": 7.830709057664205e-06, "loss": 0.3281, "step": 5235 }, { "epoch": 0.33, "grad_norm": 1.885793144761983, "learning_rate": 7.829869460748394e-06, "loss": 0.3215, "step": 5236 }, { "epoch": 0.33, "grad_norm": 1.5274124736436414, "learning_rate": 7.829029746416272e-06, "loss": 0.3208, "step": 5237 }, { "epoch": 0.33, "grad_norm": 0.570056472955061, "learning_rate": 7.828189914702681e-06, "loss": 0.4846, "step": 5238 }, { "epoch": 0.33, "grad_norm": 2.3544901211877676, "learning_rate": 7.82734996564247e-06, "loss": 0.3348, "step": 5239 }, { "epoch": 0.33, "grad_norm": 2.4647389721696236, "learning_rate": 7.826509899270484e-06, "loss": 0.3369, "step": 5240 }, { "epoch": 0.33, "grad_norm": 1.73010096681408, "learning_rate": 7.825669715621581e-06, "loss": 0.3229, "step": 5241 }, { "epoch": 0.33, "grad_norm": 1.9056460904486618, "learning_rate": 7.824829414730625e-06, "loss": 0.3263, "step": 5242 }, { "epoch": 0.33, "grad_norm": 1.3501727826015766, "learning_rate": 7.82398899663248e-06, "loss": 0.3186, "step": 5243 }, { "epoch": 0.33, "grad_norm": 3.88366306271441, "learning_rate": 7.823148461362013e-06, "loss": 0.3225, "step": 5244 }, { "epoch": 0.33, "grad_norm": 1.6255590264741315, "learning_rate": 7.822307808954102e-06, "loss": 0.3296, "step": 5245 }, { "epoch": 0.33, "grad_norm": 2.0484755965364156, "learning_rate": 7.821467039443628e-06, "loss": 0.3136, "step": 5246 }, { "epoch": 0.33, "grad_norm": 1.480892802698961, "learning_rate": 7.820626152865476e-06, "loss": 0.3027, "step": 5247 }, { "epoch": 0.33, "grad_norm": 2.1877077552477995, "learning_rate": 7.819785149254534e-06, "loss": 0.3071, "step": 5248 }, { "epoch": 0.33, "grad_norm": 3.65753819552042, "learning_rate": 7.818944028645696e-06, "loss": 0.3295, "step": 5249 }, { "epoch": 0.33, "grad_norm": 1.7443418408508877, "learning_rate": 7.818102791073866e-06, "loss": 0.3104, "step": 5250 }, { "epoch": 0.33, "grad_norm": 2.3554998717394255, "learning_rate": 7.817261436573944e-06, "loss": 0.3153, "step": 5251 }, { "epoch": 0.33, "grad_norm": 0.6195504060972558, "learning_rate": 7.816419965180842e-06, "loss": 0.4923, "step": 5252 }, { "epoch": 0.33, "grad_norm": 1.6823505897589057, "learning_rate": 7.815578376929472e-06, "loss": 0.3216, "step": 5253 }, { "epoch": 0.33, "grad_norm": 1.8260645558341941, "learning_rate": 7.814736671854755e-06, "loss": 0.3281, "step": 5254 }, { "epoch": 0.33, "grad_norm": 2.262671085875684, "learning_rate": 7.813894849991613e-06, "loss": 0.3362, "step": 5255 }, { "epoch": 0.33, "grad_norm": 2.189242209242595, "learning_rate": 7.813052911374976e-06, "loss": 0.3016, "step": 5256 }, { "epoch": 0.33, "grad_norm": 2.031791925024394, "learning_rate": 7.812210856039777e-06, "loss": 0.3191, "step": 5257 }, { "epoch": 0.33, "grad_norm": 2.8736459076904737, "learning_rate": 7.811368684020955e-06, "loss": 0.3345, "step": 5258 }, { "epoch": 0.33, "grad_norm": 2.377015528104474, "learning_rate": 7.810526395353453e-06, "loss": 0.3238, "step": 5259 }, { "epoch": 0.33, "grad_norm": 6.798601382094193, "learning_rate": 7.809683990072217e-06, "loss": 0.2939, "step": 5260 }, { "epoch": 0.33, "grad_norm": 2.363927610327008, "learning_rate": 7.808841468212204e-06, "loss": 0.3274, "step": 5261 }, { "epoch": 0.33, "grad_norm": 2.6227323400475644, "learning_rate": 7.807998829808368e-06, "loss": 0.3111, "step": 5262 }, { "epoch": 0.33, "grad_norm": 1.5637446224382276, "learning_rate": 7.807156074895673e-06, "loss": 0.328, "step": 5263 }, { "epoch": 0.33, "grad_norm": 1.846290688888176, "learning_rate": 7.806313203509087e-06, "loss": 0.3353, "step": 5264 }, { "epoch": 0.33, "grad_norm": 2.078383612207726, "learning_rate": 7.805470215683582e-06, "loss": 0.3266, "step": 5265 }, { "epoch": 0.33, "grad_norm": 1.979762433478405, "learning_rate": 7.804627111454132e-06, "loss": 0.3076, "step": 5266 }, { "epoch": 0.33, "grad_norm": 2.660159241950354, "learning_rate": 7.803783890855725e-06, "loss": 0.3431, "step": 5267 }, { "epoch": 0.33, "grad_norm": 1.6204503421514473, "learning_rate": 7.802940553923344e-06, "loss": 0.2973, "step": 5268 }, { "epoch": 0.33, "grad_norm": 1.6614170714011247, "learning_rate": 7.80209710069198e-06, "loss": 0.3181, "step": 5269 }, { "epoch": 0.33, "grad_norm": 2.763459680820304, "learning_rate": 7.801253531196629e-06, "loss": 0.3141, "step": 5270 }, { "epoch": 0.33, "grad_norm": 1.4819628461307792, "learning_rate": 7.800409845472294e-06, "loss": 0.3238, "step": 5271 }, { "epoch": 0.33, "grad_norm": 1.747752388557921, "learning_rate": 7.799566043553983e-06, "loss": 0.3047, "step": 5272 }, { "epoch": 0.33, "grad_norm": 2.1211767852187933, "learning_rate": 7.798722125476702e-06, "loss": 0.3111, "step": 5273 }, { "epoch": 0.33, "grad_norm": 1.5505624779238907, "learning_rate": 7.797878091275468e-06, "loss": 0.3108, "step": 5274 }, { "epoch": 0.33, "grad_norm": 1.5776271307834984, "learning_rate": 7.797033940985303e-06, "loss": 0.3152, "step": 5275 }, { "epoch": 0.33, "grad_norm": 1.7226888476530333, "learning_rate": 7.796189674641232e-06, "loss": 0.3272, "step": 5276 }, { "epoch": 0.33, "grad_norm": 1.308974819580043, "learning_rate": 7.795345292278284e-06, "loss": 0.299, "step": 5277 }, { "epoch": 0.33, "grad_norm": 2.3214760572985496, "learning_rate": 7.794500793931496e-06, "loss": 0.3226, "step": 5278 }, { "epoch": 0.33, "grad_norm": 7.811168133254081, "learning_rate": 7.793656179635907e-06, "loss": 0.3115, "step": 5279 }, { "epoch": 0.33, "grad_norm": 1.4014193914832929, "learning_rate": 7.79281144942656e-06, "loss": 0.3258, "step": 5280 }, { "epoch": 0.33, "grad_norm": 1.3525619311131196, "learning_rate": 7.791966603338505e-06, "loss": 0.2961, "step": 5281 }, { "epoch": 0.33, "grad_norm": 1.386191233696376, "learning_rate": 7.791121641406798e-06, "loss": 0.3316, "step": 5282 }, { "epoch": 0.33, "grad_norm": 1.7465533301761018, "learning_rate": 7.790276563666496e-06, "loss": 0.3113, "step": 5283 }, { "epoch": 0.33, "grad_norm": 1.848497159369416, "learning_rate": 7.789431370152663e-06, "loss": 0.3033, "step": 5284 }, { "epoch": 0.33, "grad_norm": 2.1902861996520175, "learning_rate": 7.788586060900366e-06, "loss": 0.3257, "step": 5285 }, { "epoch": 0.33, "grad_norm": 1.6632439520845537, "learning_rate": 7.787740635944682e-06, "loss": 0.3112, "step": 5286 }, { "epoch": 0.33, "grad_norm": 2.109090444914576, "learning_rate": 7.78689509532069e-06, "loss": 0.2934, "step": 5287 }, { "epoch": 0.33, "grad_norm": 1.5185527042333267, "learning_rate": 7.786049439063466e-06, "loss": 0.2903, "step": 5288 }, { "epoch": 0.33, "grad_norm": 1.3430976374311951, "learning_rate": 7.785203667208104e-06, "loss": 0.3141, "step": 5289 }, { "epoch": 0.33, "grad_norm": 1.615437410967679, "learning_rate": 7.784357779789695e-06, "loss": 0.3289, "step": 5290 }, { "epoch": 0.33, "grad_norm": 1.73339053657673, "learning_rate": 7.783511776843336e-06, "loss": 0.3611, "step": 5291 }, { "epoch": 0.33, "grad_norm": 2.078945198035065, "learning_rate": 7.78266565840413e-06, "loss": 0.325, "step": 5292 }, { "epoch": 0.33, "grad_norm": 1.8732262119614633, "learning_rate": 7.781819424507183e-06, "loss": 0.3265, "step": 5293 }, { "epoch": 0.33, "grad_norm": 1.7033490086311325, "learning_rate": 7.780973075187606e-06, "loss": 0.3422, "step": 5294 }, { "epoch": 0.33, "grad_norm": 1.8274348365490694, "learning_rate": 7.780126610480516e-06, "loss": 0.3221, "step": 5295 }, { "epoch": 0.33, "grad_norm": 1.921197079915513, "learning_rate": 7.779280030421036e-06, "loss": 0.3124, "step": 5296 }, { "epoch": 0.33, "grad_norm": 1.3620246030895387, "learning_rate": 7.77843333504429e-06, "loss": 0.3154, "step": 5297 }, { "epoch": 0.33, "grad_norm": 6.135086271585444, "learning_rate": 7.777586524385412e-06, "loss": 0.324, "step": 5298 }, { "epoch": 0.33, "grad_norm": 3.0187237282935357, "learning_rate": 7.776739598479534e-06, "loss": 0.3084, "step": 5299 }, { "epoch": 0.33, "grad_norm": 4.132250643424089, "learning_rate": 7.775892557361799e-06, "loss": 0.3285, "step": 5300 }, { "epoch": 0.33, "grad_norm": 1.2241173953747528, "learning_rate": 7.77504540106735e-06, "loss": 0.329, "step": 5301 }, { "epoch": 0.33, "grad_norm": 1.4780742936978613, "learning_rate": 7.774198129631339e-06, "loss": 0.32, "step": 5302 }, { "epoch": 0.33, "grad_norm": 1.6916449616547573, "learning_rate": 7.77335074308892e-06, "loss": 0.3068, "step": 5303 }, { "epoch": 0.33, "grad_norm": 3.1138490450859275, "learning_rate": 7.772503241475253e-06, "loss": 0.3271, "step": 5304 }, { "epoch": 0.33, "grad_norm": 2.5593199988123474, "learning_rate": 7.771655624825501e-06, "loss": 0.3124, "step": 5305 }, { "epoch": 0.33, "grad_norm": 2.1304137145738844, "learning_rate": 7.770807893174836e-06, "loss": 0.3112, "step": 5306 }, { "epoch": 0.33, "grad_norm": 1.545415392976424, "learning_rate": 7.769960046558427e-06, "loss": 0.3149, "step": 5307 }, { "epoch": 0.33, "grad_norm": 1.6742429253967805, "learning_rate": 7.769112085011458e-06, "loss": 0.3154, "step": 5308 }, { "epoch": 0.33, "grad_norm": 2.8038429385142214, "learning_rate": 7.76826400856911e-06, "loss": 0.3309, "step": 5309 }, { "epoch": 0.33, "grad_norm": 27.067237088615375, "learning_rate": 7.76741581726657e-06, "loss": 0.3173, "step": 5310 }, { "epoch": 0.33, "grad_norm": 1.9502668050879342, "learning_rate": 7.766567511139034e-06, "loss": 0.305, "step": 5311 }, { "epoch": 0.33, "grad_norm": 1.4164528935586955, "learning_rate": 7.765719090221697e-06, "loss": 0.3214, "step": 5312 }, { "epoch": 0.33, "grad_norm": 1.6561867214571613, "learning_rate": 7.764870554549762e-06, "loss": 0.3193, "step": 5313 }, { "epoch": 0.33, "grad_norm": 2.10729431809092, "learning_rate": 7.764021904158436e-06, "loss": 0.3334, "step": 5314 }, { "epoch": 0.33, "grad_norm": 1.4436624384800332, "learning_rate": 7.763173139082934e-06, "loss": 0.3215, "step": 5315 }, { "epoch": 0.33, "grad_norm": 1.4107756570196157, "learning_rate": 7.762324259358469e-06, "loss": 0.3056, "step": 5316 }, { "epoch": 0.33, "grad_norm": 2.486026851265413, "learning_rate": 7.761475265020265e-06, "loss": 0.3283, "step": 5317 }, { "epoch": 0.33, "grad_norm": 1.8007758072365392, "learning_rate": 7.760626156103545e-06, "loss": 0.3022, "step": 5318 }, { "epoch": 0.33, "grad_norm": 1.3263991040429182, "learning_rate": 7.759776932643546e-06, "loss": 0.3285, "step": 5319 }, { "epoch": 0.33, "grad_norm": 1.808546364182805, "learning_rate": 7.758927594675498e-06, "loss": 0.305, "step": 5320 }, { "epoch": 0.33, "grad_norm": 1.7624843777258097, "learning_rate": 7.758078142234644e-06, "loss": 0.3198, "step": 5321 }, { "epoch": 0.33, "grad_norm": 1.3564778264776514, "learning_rate": 7.757228575356227e-06, "loss": 0.3052, "step": 5322 }, { "epoch": 0.33, "grad_norm": 1.4638086532198022, "learning_rate": 7.756378894075503e-06, "loss": 0.3362, "step": 5323 }, { "epoch": 0.33, "grad_norm": 2.2779272416213177, "learning_rate": 7.75552909842772e-06, "loss": 0.2917, "step": 5324 }, { "epoch": 0.33, "grad_norm": 1.3558217933412742, "learning_rate": 7.754679188448142e-06, "loss": 0.3017, "step": 5325 }, { "epoch": 0.33, "grad_norm": 2.537657372578936, "learning_rate": 7.75382916417203e-06, "loss": 0.326, "step": 5326 }, { "epoch": 0.34, "grad_norm": 1.2663583805097514, "learning_rate": 7.752979025634657e-06, "loss": 0.3155, "step": 5327 }, { "epoch": 0.34, "grad_norm": 2.2802139646917126, "learning_rate": 7.752128772871292e-06, "loss": 0.3323, "step": 5328 }, { "epoch": 0.34, "grad_norm": 3.355152512161311, "learning_rate": 7.751278405917216e-06, "loss": 0.3211, "step": 5329 }, { "epoch": 0.34, "grad_norm": 1.245421577558753, "learning_rate": 7.750427924807712e-06, "loss": 0.3127, "step": 5330 }, { "epoch": 0.34, "grad_norm": 1.4510547679293457, "learning_rate": 7.74957732957807e-06, "loss": 0.3106, "step": 5331 }, { "epoch": 0.34, "grad_norm": 2.0675998325860987, "learning_rate": 7.748726620263581e-06, "loss": 0.3312, "step": 5332 }, { "epoch": 0.34, "grad_norm": 1.368644564235117, "learning_rate": 7.74787579689954e-06, "loss": 0.3126, "step": 5333 }, { "epoch": 0.34, "grad_norm": 2.8347779240013087, "learning_rate": 7.747024859521253e-06, "loss": 0.3666, "step": 5334 }, { "epoch": 0.34, "grad_norm": 1.782028464072049, "learning_rate": 7.746173808164023e-06, "loss": 0.3212, "step": 5335 }, { "epoch": 0.34, "grad_norm": 1.6486331249127584, "learning_rate": 7.745322642863167e-06, "loss": 0.3083, "step": 5336 }, { "epoch": 0.34, "grad_norm": 1.7510337222465016, "learning_rate": 7.744471363653996e-06, "loss": 0.3236, "step": 5337 }, { "epoch": 0.34, "grad_norm": 1.808508462901859, "learning_rate": 7.743619970571834e-06, "loss": 0.3005, "step": 5338 }, { "epoch": 0.34, "grad_norm": 1.957146390700833, "learning_rate": 7.742768463652007e-06, "loss": 0.3306, "step": 5339 }, { "epoch": 0.34, "grad_norm": 1.8095262232863547, "learning_rate": 7.741916842929843e-06, "loss": 0.3229, "step": 5340 }, { "epoch": 0.34, "grad_norm": 1.8734776294503417, "learning_rate": 7.741065108440682e-06, "loss": 0.3192, "step": 5341 }, { "epoch": 0.34, "grad_norm": 1.9952200392603823, "learning_rate": 7.74021326021986e-06, "loss": 0.3217, "step": 5342 }, { "epoch": 0.34, "grad_norm": 1.8703840335891198, "learning_rate": 7.739361298302721e-06, "loss": 0.3231, "step": 5343 }, { "epoch": 0.34, "grad_norm": 1.960761681104746, "learning_rate": 7.738509222724617e-06, "loss": 0.3142, "step": 5344 }, { "epoch": 0.34, "grad_norm": 1.620839815612159, "learning_rate": 7.7376570335209e-06, "loss": 0.3136, "step": 5345 }, { "epoch": 0.34, "grad_norm": 1.8388352318752739, "learning_rate": 7.736804730726932e-06, "loss": 0.3101, "step": 5346 }, { "epoch": 0.34, "grad_norm": 1.5383323686602937, "learning_rate": 7.735952314378075e-06, "loss": 0.3097, "step": 5347 }, { "epoch": 0.34, "grad_norm": 7.649249721364612, "learning_rate": 7.735099784509694e-06, "loss": 0.329, "step": 5348 }, { "epoch": 0.34, "grad_norm": 2.7810964237810616, "learning_rate": 7.734247141157168e-06, "loss": 0.3559, "step": 5349 }, { "epoch": 0.34, "grad_norm": 1.3182104693968337, "learning_rate": 7.733394384355869e-06, "loss": 0.3044, "step": 5350 }, { "epoch": 0.34, "grad_norm": 1.5859054146228193, "learning_rate": 7.732541514141182e-06, "loss": 0.297, "step": 5351 }, { "epoch": 0.34, "grad_norm": 3.34680833618101, "learning_rate": 7.731688530548495e-06, "loss": 0.2968, "step": 5352 }, { "epoch": 0.34, "grad_norm": 1.4483123754731415, "learning_rate": 7.7308354336132e-06, "loss": 0.3031, "step": 5353 }, { "epoch": 0.34, "grad_norm": 2.0288664027543004, "learning_rate": 7.72998222337069e-06, "loss": 0.3147, "step": 5354 }, { "epoch": 0.34, "grad_norm": 1.8317005133311832, "learning_rate": 7.72912889985637e-06, "loss": 0.306, "step": 5355 }, { "epoch": 0.34, "grad_norm": 1.4893061078897964, "learning_rate": 7.728275463105645e-06, "loss": 0.304, "step": 5356 }, { "epoch": 0.34, "grad_norm": 2.4565648018620156, "learning_rate": 7.727421913153925e-06, "loss": 0.3262, "step": 5357 }, { "epoch": 0.34, "grad_norm": 2.115236869711557, "learning_rate": 7.726568250036625e-06, "loss": 0.3133, "step": 5358 }, { "epoch": 0.34, "grad_norm": 1.6771999997855551, "learning_rate": 7.725714473789166e-06, "loss": 0.336, "step": 5359 }, { "epoch": 0.34, "grad_norm": 1.5117515210362769, "learning_rate": 7.724860584446969e-06, "loss": 0.2993, "step": 5360 }, { "epoch": 0.34, "grad_norm": 1.5205162452653131, "learning_rate": 7.724006582045468e-06, "loss": 0.3185, "step": 5361 }, { "epoch": 0.34, "grad_norm": 1.3642739193931954, "learning_rate": 7.723152466620098e-06, "loss": 0.3335, "step": 5362 }, { "epoch": 0.34, "grad_norm": 1.9031473059944004, "learning_rate": 7.722298238206294e-06, "loss": 0.3343, "step": 5363 }, { "epoch": 0.34, "grad_norm": 1.611505317540388, "learning_rate": 7.721443896839499e-06, "loss": 0.321, "step": 5364 }, { "epoch": 0.34, "grad_norm": 3.1382766240312923, "learning_rate": 7.720589442555164e-06, "loss": 0.3062, "step": 5365 }, { "epoch": 0.34, "grad_norm": 1.6297875963547095, "learning_rate": 7.719734875388742e-06, "loss": 0.3134, "step": 5366 }, { "epoch": 0.34, "grad_norm": 1.3373581391367027, "learning_rate": 7.718880195375686e-06, "loss": 0.3046, "step": 5367 }, { "epoch": 0.34, "grad_norm": 2.488188711680331, "learning_rate": 7.718025402551464e-06, "loss": 0.3134, "step": 5368 }, { "epoch": 0.34, "grad_norm": 9.349829263295176, "learning_rate": 7.71717049695154e-06, "loss": 0.3255, "step": 5369 }, { "epoch": 0.34, "grad_norm": 1.4576402250034397, "learning_rate": 7.716315478611386e-06, "loss": 0.3396, "step": 5370 }, { "epoch": 0.34, "grad_norm": 1.3413342808191373, "learning_rate": 7.715460347566476e-06, "loss": 0.3198, "step": 5371 }, { "epoch": 0.34, "grad_norm": 2.1504449548549363, "learning_rate": 7.714605103852297e-06, "loss": 0.335, "step": 5372 }, { "epoch": 0.34, "grad_norm": 1.3610529802936822, "learning_rate": 7.713749747504327e-06, "loss": 0.3352, "step": 5373 }, { "epoch": 0.34, "grad_norm": 1.7288968360147436, "learning_rate": 7.712894278558063e-06, "loss": 0.3354, "step": 5374 }, { "epoch": 0.34, "grad_norm": 1.5781156822755555, "learning_rate": 7.712038697048993e-06, "loss": 0.332, "step": 5375 }, { "epoch": 0.34, "grad_norm": 2.7727885194311908, "learning_rate": 7.711183003012624e-06, "loss": 0.3212, "step": 5376 }, { "epoch": 0.34, "grad_norm": 1.8413907164823824, "learning_rate": 7.710327196484453e-06, "loss": 0.3069, "step": 5377 }, { "epoch": 0.34, "grad_norm": 1.6852659524346563, "learning_rate": 7.709471277499995e-06, "loss": 0.317, "step": 5378 }, { "epoch": 0.34, "grad_norm": 10.089573112753548, "learning_rate": 7.708615246094759e-06, "loss": 0.2965, "step": 5379 }, { "epoch": 0.34, "grad_norm": 2.573012052285078, "learning_rate": 7.707759102304267e-06, "loss": 0.328, "step": 5380 }, { "epoch": 0.34, "grad_norm": 2.1998696507379023, "learning_rate": 7.706902846164042e-06, "loss": 0.312, "step": 5381 }, { "epoch": 0.34, "grad_norm": 2.3072517004354953, "learning_rate": 7.706046477709607e-06, "loss": 0.3219, "step": 5382 }, { "epoch": 0.34, "grad_norm": 1.829271017570389, "learning_rate": 7.705189996976497e-06, "loss": 0.3117, "step": 5383 }, { "epoch": 0.34, "grad_norm": 1.7827362078826985, "learning_rate": 7.704333404000252e-06, "loss": 0.3062, "step": 5384 }, { "epoch": 0.34, "grad_norm": 2.907364631886873, "learning_rate": 7.703476698816408e-06, "loss": 0.3182, "step": 5385 }, { "epoch": 0.34, "grad_norm": 1.68795382812034, "learning_rate": 7.702619881460515e-06, "loss": 0.3042, "step": 5386 }, { "epoch": 0.34, "grad_norm": 0.6616793374426807, "learning_rate": 7.701762951968121e-06, "loss": 0.4993, "step": 5387 }, { "epoch": 0.34, "grad_norm": 4.252617426188242, "learning_rate": 7.700905910374786e-06, "loss": 0.3106, "step": 5388 }, { "epoch": 0.34, "grad_norm": 3.460615221403972, "learning_rate": 7.700048756716066e-06, "loss": 0.3081, "step": 5389 }, { "epoch": 0.34, "grad_norm": 1.79653436274685, "learning_rate": 7.699191491027527e-06, "loss": 0.3311, "step": 5390 }, { "epoch": 0.34, "grad_norm": 1.7241779184814705, "learning_rate": 7.69833411334474e-06, "loss": 0.301, "step": 5391 }, { "epoch": 0.34, "grad_norm": 1.4955979687830763, "learning_rate": 7.697476623703278e-06, "loss": 0.3124, "step": 5392 }, { "epoch": 0.34, "grad_norm": 1.6196195767461121, "learning_rate": 7.696619022138718e-06, "loss": 0.3263, "step": 5393 }, { "epoch": 0.34, "grad_norm": 2.594175641596578, "learning_rate": 7.695761308686645e-06, "loss": 0.318, "step": 5394 }, { "epoch": 0.34, "grad_norm": 2.267398055036615, "learning_rate": 7.694903483382648e-06, "loss": 0.3158, "step": 5395 }, { "epoch": 0.34, "grad_norm": 2.85803065073527, "learning_rate": 7.694045546262318e-06, "loss": 0.3264, "step": 5396 }, { "epoch": 0.34, "grad_norm": 2.8948503634483607, "learning_rate": 7.693187497361254e-06, "loss": 0.3358, "step": 5397 }, { "epoch": 0.34, "grad_norm": 2.251156620044382, "learning_rate": 7.692329336715056e-06, "loss": 0.3063, "step": 5398 }, { "epoch": 0.34, "grad_norm": 1.7417624228202535, "learning_rate": 7.691471064359333e-06, "loss": 0.3052, "step": 5399 }, { "epoch": 0.34, "grad_norm": 1.1760982421251651, "learning_rate": 7.690612680329694e-06, "loss": 0.3022, "step": 5400 }, { "epoch": 0.34, "grad_norm": 1.883751960610199, "learning_rate": 7.689754184661757e-06, "loss": 0.3308, "step": 5401 }, { "epoch": 0.34, "grad_norm": 2.4214720740442326, "learning_rate": 7.68889557739114e-06, "loss": 0.3177, "step": 5402 }, { "epoch": 0.34, "grad_norm": 2.165026328108929, "learning_rate": 7.68803685855347e-06, "loss": 0.3196, "step": 5403 }, { "epoch": 0.34, "grad_norm": 1.4245136398666445, "learning_rate": 7.687178028184376e-06, "loss": 0.3203, "step": 5404 }, { "epoch": 0.34, "grad_norm": 1.8519914167079905, "learning_rate": 7.686319086319494e-06, "loss": 0.3122, "step": 5405 }, { "epoch": 0.34, "grad_norm": 9.359881063028313, "learning_rate": 7.68546003299446e-06, "loss": 0.32, "step": 5406 }, { "epoch": 0.34, "grad_norm": 2.026191006986111, "learning_rate": 7.68460086824492e-06, "loss": 0.3046, "step": 5407 }, { "epoch": 0.34, "grad_norm": 3.10177207340718, "learning_rate": 7.683741592106521e-06, "loss": 0.3249, "step": 5408 }, { "epoch": 0.34, "grad_norm": 4.159585352314094, "learning_rate": 7.682882204614918e-06, "loss": 0.3246, "step": 5409 }, { "epoch": 0.34, "grad_norm": 3.4964786417229705, "learning_rate": 7.682022705805765e-06, "loss": 0.3102, "step": 5410 }, { "epoch": 0.34, "grad_norm": 6.013546214111861, "learning_rate": 7.681163095714727e-06, "loss": 0.309, "step": 5411 }, { "epoch": 0.34, "grad_norm": 1.3057323594457988, "learning_rate": 7.68030337437747e-06, "loss": 0.3052, "step": 5412 }, { "epoch": 0.34, "grad_norm": 1.7469025350698477, "learning_rate": 7.679443541829665e-06, "loss": 0.3161, "step": 5413 }, { "epoch": 0.34, "grad_norm": 2.5623748611092583, "learning_rate": 7.67858359810699e-06, "loss": 0.3116, "step": 5414 }, { "epoch": 0.34, "grad_norm": 4.571531505164328, "learning_rate": 7.677723543245122e-06, "loss": 0.3091, "step": 5415 }, { "epoch": 0.34, "grad_norm": 2.9415793968538617, "learning_rate": 7.676863377279747e-06, "loss": 0.316, "step": 5416 }, { "epoch": 0.34, "grad_norm": 1.6608659713305773, "learning_rate": 7.676003100246558e-06, "loss": 0.3273, "step": 5417 }, { "epoch": 0.34, "grad_norm": 2.873022642099225, "learning_rate": 7.675142712181247e-06, "loss": 0.315, "step": 5418 }, { "epoch": 0.34, "grad_norm": 1.545331336651048, "learning_rate": 7.674282213119511e-06, "loss": 0.327, "step": 5419 }, { "epoch": 0.34, "grad_norm": 1.8345110872299504, "learning_rate": 7.673421603097058e-06, "loss": 0.3159, "step": 5420 }, { "epoch": 0.34, "grad_norm": 1.6698593550026084, "learning_rate": 7.672560882149594e-06, "loss": 0.3215, "step": 5421 }, { "epoch": 0.34, "grad_norm": 1.270607186862521, "learning_rate": 7.671700050312832e-06, "loss": 0.2988, "step": 5422 }, { "epoch": 0.34, "grad_norm": 2.852817023142275, "learning_rate": 7.670839107622488e-06, "loss": 0.309, "step": 5423 }, { "epoch": 0.34, "grad_norm": 2.437957536653853, "learning_rate": 7.669978054114286e-06, "loss": 0.3238, "step": 5424 }, { "epoch": 0.34, "grad_norm": 1.6842640113997158, "learning_rate": 7.669116889823955e-06, "loss": 0.2997, "step": 5425 }, { "epoch": 0.34, "grad_norm": 4.206636580338994, "learning_rate": 7.668255614787219e-06, "loss": 0.3039, "step": 5426 }, { "epoch": 0.34, "grad_norm": 1.9246387885589153, "learning_rate": 7.667394229039822e-06, "loss": 0.3201, "step": 5427 }, { "epoch": 0.34, "grad_norm": 1.708216086376897, "learning_rate": 7.666532732617498e-06, "loss": 0.3288, "step": 5428 }, { "epoch": 0.34, "grad_norm": 6.0998039251897485, "learning_rate": 7.665671125555996e-06, "loss": 0.3177, "step": 5429 }, { "epoch": 0.34, "grad_norm": 3.8879890695701977, "learning_rate": 7.664809407891063e-06, "loss": 0.3019, "step": 5430 }, { "epoch": 0.34, "grad_norm": 2.5072264638777915, "learning_rate": 7.663947579658456e-06, "loss": 0.3139, "step": 5431 }, { "epoch": 0.34, "grad_norm": 2.8296488345571333, "learning_rate": 7.66308564089393e-06, "loss": 0.3299, "step": 5432 }, { "epoch": 0.34, "grad_norm": 3.0746004340729414, "learning_rate": 7.662223591633254e-06, "loss": 0.3015, "step": 5433 }, { "epoch": 0.34, "grad_norm": 1.5580206769405773, "learning_rate": 7.66136143191219e-06, "loss": 0.308, "step": 5434 }, { "epoch": 0.34, "grad_norm": 1.4659077118525965, "learning_rate": 7.660499161766514e-06, "loss": 0.3092, "step": 5435 }, { "epoch": 0.34, "grad_norm": 1.6012483303825125, "learning_rate": 7.659636781232001e-06, "loss": 0.3118, "step": 5436 }, { "epoch": 0.34, "grad_norm": 1.9973094192329948, "learning_rate": 7.658774290344435e-06, "loss": 0.316, "step": 5437 }, { "epoch": 0.34, "grad_norm": 1.555775572637292, "learning_rate": 7.6579116891396e-06, "loss": 0.3129, "step": 5438 }, { "epoch": 0.34, "grad_norm": 1.538207398097279, "learning_rate": 7.65704897765329e-06, "loss": 0.3021, "step": 5439 }, { "epoch": 0.34, "grad_norm": 0.673180852549415, "learning_rate": 7.656186155921296e-06, "loss": 0.5251, "step": 5440 }, { "epoch": 0.34, "grad_norm": 1.902886639712222, "learning_rate": 7.655323223979424e-06, "loss": 0.3129, "step": 5441 }, { "epoch": 0.34, "grad_norm": 3.4532476838787907, "learning_rate": 7.654460181863473e-06, "loss": 0.2983, "step": 5442 }, { "epoch": 0.34, "grad_norm": 2.1444827784271028, "learning_rate": 7.653597029609254e-06, "loss": 0.3406, "step": 5443 }, { "epoch": 0.34, "grad_norm": 2.5583675835258517, "learning_rate": 7.65273376725258e-06, "loss": 0.3173, "step": 5444 }, { "epoch": 0.34, "grad_norm": 1.3527596301442983, "learning_rate": 7.651870394829272e-06, "loss": 0.3132, "step": 5445 }, { "epoch": 0.34, "grad_norm": 2.2149606730870763, "learning_rate": 7.651006912375149e-06, "loss": 0.3034, "step": 5446 }, { "epoch": 0.34, "grad_norm": 1.5382464713059398, "learning_rate": 7.650143319926044e-06, "loss": 0.3045, "step": 5447 }, { "epoch": 0.34, "grad_norm": 1.665601896616514, "learning_rate": 7.649279617517782e-06, "loss": 0.3081, "step": 5448 }, { "epoch": 0.34, "grad_norm": 1.5132442279557612, "learning_rate": 7.648415805186205e-06, "loss": 0.3163, "step": 5449 }, { "epoch": 0.34, "grad_norm": 2.677185939394891, "learning_rate": 7.64755188296715e-06, "loss": 0.3287, "step": 5450 }, { "epoch": 0.34, "grad_norm": 3.492380227783077, "learning_rate": 7.646687850896468e-06, "loss": 0.3185, "step": 5451 }, { "epoch": 0.34, "grad_norm": 1.8895882032413054, "learning_rate": 7.645823709010003e-06, "loss": 0.3086, "step": 5452 }, { "epoch": 0.34, "grad_norm": 1.4730331216348589, "learning_rate": 7.644959457343615e-06, "loss": 0.3091, "step": 5453 }, { "epoch": 0.34, "grad_norm": 1.6305290041659515, "learning_rate": 7.644095095933159e-06, "loss": 0.3231, "step": 5454 }, { "epoch": 0.34, "grad_norm": 1.9441251975660379, "learning_rate": 7.643230624814504e-06, "loss": 0.2992, "step": 5455 }, { "epoch": 0.34, "grad_norm": 1.8346439660799356, "learning_rate": 7.642366044023513e-06, "loss": 0.3424, "step": 5456 }, { "epoch": 0.34, "grad_norm": 2.5807228655220897, "learning_rate": 7.641501353596063e-06, "loss": 0.3012, "step": 5457 }, { "epoch": 0.34, "grad_norm": 0.646570700792608, "learning_rate": 7.640636553568028e-06, "loss": 0.4817, "step": 5458 }, { "epoch": 0.34, "grad_norm": 1.4828639952335128, "learning_rate": 7.639771643975294e-06, "loss": 0.2872, "step": 5459 }, { "epoch": 0.34, "grad_norm": 1.608629996755941, "learning_rate": 7.638906624853744e-06, "loss": 0.3362, "step": 5460 }, { "epoch": 0.34, "grad_norm": 1.3528149154625222, "learning_rate": 7.638041496239273e-06, "loss": 0.3083, "step": 5461 }, { "epoch": 0.34, "grad_norm": 1.3575733824670244, "learning_rate": 7.637176258167773e-06, "loss": 0.3014, "step": 5462 }, { "epoch": 0.34, "grad_norm": 1.8014531298328489, "learning_rate": 7.636310910675146e-06, "loss": 0.3332, "step": 5463 }, { "epoch": 0.34, "grad_norm": 6.9861518497379675, "learning_rate": 7.635445453797296e-06, "loss": 0.3078, "step": 5464 }, { "epoch": 0.34, "grad_norm": 1.6201889026965566, "learning_rate": 7.634579887570135e-06, "loss": 0.3286, "step": 5465 }, { "epoch": 0.34, "grad_norm": 1.8175134249410336, "learning_rate": 7.633714212029575e-06, "loss": 0.308, "step": 5466 }, { "epoch": 0.34, "grad_norm": 1.5878357263015503, "learning_rate": 7.632848427211533e-06, "loss": 0.3027, "step": 5467 }, { "epoch": 0.34, "grad_norm": 2.030875584750693, "learning_rate": 7.631982533151934e-06, "loss": 0.3293, "step": 5468 }, { "epoch": 0.34, "grad_norm": 1.93191629823843, "learning_rate": 7.631116529886704e-06, "loss": 0.3043, "step": 5469 }, { "epoch": 0.34, "grad_norm": 2.165738276276089, "learning_rate": 7.630250417451777e-06, "loss": 0.3533, "step": 5470 }, { "epoch": 0.34, "grad_norm": 2.0068217250358273, "learning_rate": 7.6293841958830874e-06, "loss": 0.3192, "step": 5471 }, { "epoch": 0.34, "grad_norm": 55.71888785457274, "learning_rate": 7.6285178652165785e-06, "loss": 0.2905, "step": 5472 }, { "epoch": 0.34, "grad_norm": 1.2722623000258324, "learning_rate": 7.627651425488193e-06, "loss": 0.3256, "step": 5473 }, { "epoch": 0.34, "grad_norm": 3.124205261884713, "learning_rate": 7.626784876733884e-06, "loss": 0.307, "step": 5474 }, { "epoch": 0.34, "grad_norm": 1.2024345777044216, "learning_rate": 7.625918218989603e-06, "loss": 0.3101, "step": 5475 }, { "epoch": 0.34, "grad_norm": 1.5904398430538602, "learning_rate": 7.625051452291312e-06, "loss": 0.2959, "step": 5476 }, { "epoch": 0.34, "grad_norm": 1.9110644815770113, "learning_rate": 7.624184576674973e-06, "loss": 0.304, "step": 5477 }, { "epoch": 0.34, "grad_norm": 2.974369373027941, "learning_rate": 7.6233175921765545e-06, "loss": 0.3172, "step": 5478 }, { "epoch": 0.34, "grad_norm": 1.5451315928813225, "learning_rate": 7.622450498832031e-06, "loss": 0.3024, "step": 5479 }, { "epoch": 0.34, "grad_norm": 2.039741423737045, "learning_rate": 7.621583296677377e-06, "loss": 0.3058, "step": 5480 }, { "epoch": 0.34, "grad_norm": 3.4226758284193703, "learning_rate": 7.620715985748578e-06, "loss": 0.3276, "step": 5481 }, { "epoch": 0.34, "grad_norm": 2.6398475391365492, "learning_rate": 7.619848566081615e-06, "loss": 0.3309, "step": 5482 }, { "epoch": 0.34, "grad_norm": 2.706618561917322, "learning_rate": 7.6189810377124826e-06, "loss": 0.3284, "step": 5483 }, { "epoch": 0.34, "grad_norm": 2.5754415149307848, "learning_rate": 7.618113400677176e-06, "loss": 0.3229, "step": 5484 }, { "epoch": 0.34, "grad_norm": 2.194172050701368, "learning_rate": 7.617245655011694e-06, "loss": 0.3234, "step": 5485 }, { "epoch": 0.35, "grad_norm": 2.050477147220965, "learning_rate": 7.6163778007520395e-06, "loss": 0.3344, "step": 5486 }, { "epoch": 0.35, "grad_norm": 0.6299172795638901, "learning_rate": 7.615509837934224e-06, "loss": 0.5109, "step": 5487 }, { "epoch": 0.35, "grad_norm": 2.237847129493219, "learning_rate": 7.61464176659426e-06, "loss": 0.318, "step": 5488 }, { "epoch": 0.35, "grad_norm": 14.729576606015852, "learning_rate": 7.6137735867681635e-06, "loss": 0.3284, "step": 5489 }, { "epoch": 0.35, "grad_norm": 1.9396008089136665, "learning_rate": 7.612905298491958e-06, "loss": 0.308, "step": 5490 }, { "epoch": 0.35, "grad_norm": 2.0854074033250587, "learning_rate": 7.6120369018016735e-06, "loss": 0.3206, "step": 5491 }, { "epoch": 0.35, "grad_norm": 1.697844250934911, "learning_rate": 7.611168396733336e-06, "loss": 0.3042, "step": 5492 }, { "epoch": 0.35, "grad_norm": 2.2836817591059266, "learning_rate": 7.610299783322984e-06, "loss": 0.3369, "step": 5493 }, { "epoch": 0.35, "grad_norm": 1.9248168667966188, "learning_rate": 7.609431061606658e-06, "loss": 0.3085, "step": 5494 }, { "epoch": 0.35, "grad_norm": 1.5950846935985723, "learning_rate": 7.6085622316204036e-06, "loss": 0.3099, "step": 5495 }, { "epoch": 0.35, "grad_norm": 1.7311641935530067, "learning_rate": 7.607693293400266e-06, "loss": 0.3181, "step": 5496 }, { "epoch": 0.35, "grad_norm": 2.14941056256365, "learning_rate": 7.606824246982305e-06, "loss": 0.3021, "step": 5497 }, { "epoch": 0.35, "grad_norm": 1.4293199970961823, "learning_rate": 7.6059550924025726e-06, "loss": 0.3105, "step": 5498 }, { "epoch": 0.35, "grad_norm": 2.303225757039219, "learning_rate": 7.605085829697139e-06, "loss": 0.2994, "step": 5499 }, { "epoch": 0.35, "grad_norm": 1.6735844612911548, "learning_rate": 7.604216458902063e-06, "loss": 0.3008, "step": 5500 }, { "epoch": 0.35, "grad_norm": 2.401747720583864, "learning_rate": 7.6033469800534235e-06, "loss": 0.3203, "step": 5501 }, { "epoch": 0.35, "grad_norm": 2.0383037151347123, "learning_rate": 7.602477393187291e-06, "loss": 0.3148, "step": 5502 }, { "epoch": 0.35, "grad_norm": 1.922985791964369, "learning_rate": 7.601607698339752e-06, "loss": 0.3433, "step": 5503 }, { "epoch": 0.35, "grad_norm": 3.9108365082688668, "learning_rate": 7.600737895546886e-06, "loss": 0.3222, "step": 5504 }, { "epoch": 0.35, "grad_norm": 3.1092126191259695, "learning_rate": 7.599867984844786e-06, "loss": 0.3178, "step": 5505 }, { "epoch": 0.35, "grad_norm": 1.5546278203591173, "learning_rate": 7.598997966269545e-06, "loss": 0.316, "step": 5506 }, { "epoch": 0.35, "grad_norm": 1.8597113984338176, "learning_rate": 7.5981278398572634e-06, "loss": 0.3008, "step": 5507 }, { "epoch": 0.35, "grad_norm": 1.3142683648737583, "learning_rate": 7.597257605644042e-06, "loss": 0.3095, "step": 5508 }, { "epoch": 0.35, "grad_norm": 1.85963895739769, "learning_rate": 7.59638726366599e-06, "loss": 0.3422, "step": 5509 }, { "epoch": 0.35, "grad_norm": 0.724941260177425, "learning_rate": 7.595516813959218e-06, "loss": 0.4937, "step": 5510 }, { "epoch": 0.35, "grad_norm": 1.7242903690271727, "learning_rate": 7.594646256559843e-06, "loss": 0.3016, "step": 5511 }, { "epoch": 0.35, "grad_norm": 3.0813544568681692, "learning_rate": 7.593775591503986e-06, "loss": 0.3284, "step": 5512 }, { "epoch": 0.35, "grad_norm": 1.8868391929230388, "learning_rate": 7.592904818827774e-06, "loss": 0.3198, "step": 5513 }, { "epoch": 0.35, "grad_norm": 2.37117999579112, "learning_rate": 7.592033938567335e-06, "loss": 0.3097, "step": 5514 }, { "epoch": 0.35, "grad_norm": 1.5172549935797623, "learning_rate": 7.591162950758803e-06, "loss": 0.304, "step": 5515 }, { "epoch": 0.35, "grad_norm": 4.1622814448229715, "learning_rate": 7.590291855438319e-06, "loss": 0.3181, "step": 5516 }, { "epoch": 0.35, "grad_norm": 1.976591741928131, "learning_rate": 7.589420652642025e-06, "loss": 0.2925, "step": 5517 }, { "epoch": 0.35, "grad_norm": 1.8019794737536576, "learning_rate": 7.588549342406068e-06, "loss": 0.3089, "step": 5518 }, { "epoch": 0.35, "grad_norm": 1.9974672229337505, "learning_rate": 7.587677924766601e-06, "loss": 0.313, "step": 5519 }, { "epoch": 0.35, "grad_norm": 1.6339012014344183, "learning_rate": 7.586806399759781e-06, "loss": 0.3142, "step": 5520 }, { "epoch": 0.35, "grad_norm": 3.2350736606527395, "learning_rate": 7.58593476742177e-06, "loss": 0.3176, "step": 5521 }, { "epoch": 0.35, "grad_norm": 5.936643097261422, "learning_rate": 7.58506302778873e-06, "loss": 0.3138, "step": 5522 }, { "epoch": 0.35, "grad_norm": 1.3658792722133504, "learning_rate": 7.584191180896835e-06, "loss": 0.3223, "step": 5523 }, { "epoch": 0.35, "grad_norm": 1.8727397687703295, "learning_rate": 7.583319226782258e-06, "loss": 0.3138, "step": 5524 }, { "epoch": 0.35, "grad_norm": 1.7414055833013713, "learning_rate": 7.582447165481177e-06, "loss": 0.2938, "step": 5525 }, { "epoch": 0.35, "grad_norm": 2.6510634094253187, "learning_rate": 7.581574997029777e-06, "loss": 0.3133, "step": 5526 }, { "epoch": 0.35, "grad_norm": 1.4435014538369866, "learning_rate": 7.580702721464244e-06, "loss": 0.321, "step": 5527 }, { "epoch": 0.35, "grad_norm": 2.090746458260443, "learning_rate": 7.57983033882077e-06, "loss": 0.3228, "step": 5528 }, { "epoch": 0.35, "grad_norm": 1.309977355524843, "learning_rate": 7.578957849135555e-06, "loss": 0.3054, "step": 5529 }, { "epoch": 0.35, "grad_norm": 0.6793297113210444, "learning_rate": 7.578085252444798e-06, "loss": 0.5371, "step": 5530 }, { "epoch": 0.35, "grad_norm": 2.109829442603735, "learning_rate": 7.577212548784705e-06, "loss": 0.3139, "step": 5531 }, { "epoch": 0.35, "grad_norm": 4.731572911362363, "learning_rate": 7.576339738191485e-06, "loss": 0.3142, "step": 5532 }, { "epoch": 0.35, "grad_norm": 1.9886255444932028, "learning_rate": 7.5754668207013535e-06, "loss": 0.3152, "step": 5533 }, { "epoch": 0.35, "grad_norm": 3.0076796278792806, "learning_rate": 7.574593796350527e-06, "loss": 0.2962, "step": 5534 }, { "epoch": 0.35, "grad_norm": 1.511482315572893, "learning_rate": 7.573720665175234e-06, "loss": 0.3052, "step": 5535 }, { "epoch": 0.35, "grad_norm": 2.816292089019836, "learning_rate": 7.572847427211698e-06, "loss": 0.335, "step": 5536 }, { "epoch": 0.35, "grad_norm": 1.9239117791983056, "learning_rate": 7.5719740824961516e-06, "loss": 0.3081, "step": 5537 }, { "epoch": 0.35, "grad_norm": 1.679967083566104, "learning_rate": 7.5711006310648316e-06, "loss": 0.3028, "step": 5538 }, { "epoch": 0.35, "grad_norm": 2.576149825720993, "learning_rate": 7.570227072953982e-06, "loss": 0.306, "step": 5539 }, { "epoch": 0.35, "grad_norm": 1.592116405588844, "learning_rate": 7.569353408199844e-06, "loss": 0.3108, "step": 5540 }, { "epoch": 0.35, "grad_norm": 1.5437412534820862, "learning_rate": 7.568479636838672e-06, "loss": 0.2998, "step": 5541 }, { "epoch": 0.35, "grad_norm": 1.7713488575365337, "learning_rate": 7.567605758906714e-06, "loss": 0.3167, "step": 5542 }, { "epoch": 0.35, "grad_norm": 3.2016171579570147, "learning_rate": 7.5667317744402345e-06, "loss": 0.3175, "step": 5543 }, { "epoch": 0.35, "grad_norm": 2.0686350989662605, "learning_rate": 7.565857683475495e-06, "loss": 0.3285, "step": 5544 }, { "epoch": 0.35, "grad_norm": 1.7541404676501888, "learning_rate": 7.564983486048763e-06, "loss": 0.3258, "step": 5545 }, { "epoch": 0.35, "grad_norm": 2.637809120824183, "learning_rate": 7.564109182196309e-06, "loss": 0.304, "step": 5546 }, { "epoch": 0.35, "grad_norm": 2.065564160982038, "learning_rate": 7.563234771954411e-06, "loss": 0.2936, "step": 5547 }, { "epoch": 0.35, "grad_norm": 5.124177000960691, "learning_rate": 7.562360255359349e-06, "loss": 0.3253, "step": 5548 }, { "epoch": 0.35, "grad_norm": 1.836370184152582, "learning_rate": 7.5614856324474095e-06, "loss": 0.3187, "step": 5549 }, { "epoch": 0.35, "grad_norm": 3.2304473261043967, "learning_rate": 7.560610903254881e-06, "loss": 0.3165, "step": 5550 }, { "epoch": 0.35, "grad_norm": 2.211099580150378, "learning_rate": 7.55973606781806e-06, "loss": 0.2984, "step": 5551 }, { "epoch": 0.35, "grad_norm": 10.106253574896783, "learning_rate": 7.558861126173241e-06, "loss": 0.3222, "step": 5552 }, { "epoch": 0.35, "grad_norm": 4.43029548593907, "learning_rate": 7.557986078356727e-06, "loss": 0.3309, "step": 5553 }, { "epoch": 0.35, "grad_norm": 2.4655876812264395, "learning_rate": 7.557110924404829e-06, "loss": 0.3112, "step": 5554 }, { "epoch": 0.35, "grad_norm": 2.5698564984498824, "learning_rate": 7.5562356643538586e-06, "loss": 0.3449, "step": 5555 }, { "epoch": 0.35, "grad_norm": 1.8906463936232092, "learning_rate": 7.555360298240128e-06, "loss": 0.3077, "step": 5556 }, { "epoch": 0.35, "grad_norm": 1.5490763519009287, "learning_rate": 7.554484826099959e-06, "loss": 0.3296, "step": 5557 }, { "epoch": 0.35, "grad_norm": 0.6689562673367422, "learning_rate": 7.55360924796968e-06, "loss": 0.5084, "step": 5558 }, { "epoch": 0.35, "grad_norm": 1.4674652561581336, "learning_rate": 7.552733563885616e-06, "loss": 0.3063, "step": 5559 }, { "epoch": 0.35, "grad_norm": 2.1978635073512747, "learning_rate": 7.551857773884101e-06, "loss": 0.3116, "step": 5560 }, { "epoch": 0.35, "grad_norm": 1.4731960314675723, "learning_rate": 7.550981878001476e-06, "loss": 0.3118, "step": 5561 }, { "epoch": 0.35, "grad_norm": 2.4934639668928678, "learning_rate": 7.550105876274082e-06, "loss": 0.299, "step": 5562 }, { "epoch": 0.35, "grad_norm": 2.4704119077333533, "learning_rate": 7.549229768738266e-06, "loss": 0.3229, "step": 5563 }, { "epoch": 0.35, "grad_norm": 1.678290582969975, "learning_rate": 7.548353555430378e-06, "loss": 0.3035, "step": 5564 }, { "epoch": 0.35, "grad_norm": 2.050749009636137, "learning_rate": 7.547477236386774e-06, "loss": 0.3078, "step": 5565 }, { "epoch": 0.35, "grad_norm": 2.146993060143408, "learning_rate": 7.546600811643816e-06, "loss": 0.3065, "step": 5566 }, { "epoch": 0.35, "grad_norm": 0.5933902958493147, "learning_rate": 7.545724281237867e-06, "loss": 0.5215, "step": 5567 }, { "epoch": 0.35, "grad_norm": 6.452429749055142, "learning_rate": 7.5448476452052955e-06, "loss": 0.3335, "step": 5568 }, { "epoch": 0.35, "grad_norm": 2.1746540608976783, "learning_rate": 7.543970903582473e-06, "loss": 0.3232, "step": 5569 }, { "epoch": 0.35, "grad_norm": 2.5632182117070235, "learning_rate": 7.543094056405782e-06, "loss": 0.3209, "step": 5570 }, { "epoch": 0.35, "grad_norm": 1.9638717159951005, "learning_rate": 7.542217103711601e-06, "loss": 0.3121, "step": 5571 }, { "epoch": 0.35, "grad_norm": 2.144578477203159, "learning_rate": 7.5413400455363154e-06, "loss": 0.3242, "step": 5572 }, { "epoch": 0.35, "grad_norm": 3.298585420131417, "learning_rate": 7.54046288191632e-06, "loss": 0.3155, "step": 5573 }, { "epoch": 0.35, "grad_norm": 1.6620275982754027, "learning_rate": 7.5395856128880045e-06, "loss": 0.3326, "step": 5574 }, { "epoch": 0.35, "grad_norm": 4.614520449225384, "learning_rate": 7.5387082384877715e-06, "loss": 0.3193, "step": 5575 }, { "epoch": 0.35, "grad_norm": 0.6048965548854468, "learning_rate": 7.537830758752025e-06, "loss": 0.5238, "step": 5576 }, { "epoch": 0.35, "grad_norm": 3.22001852217234, "learning_rate": 7.536953173717174e-06, "loss": 0.3176, "step": 5577 }, { "epoch": 0.35, "grad_norm": 1.3787820806039615, "learning_rate": 7.5360754834196284e-06, "loss": 0.301, "step": 5578 }, { "epoch": 0.35, "grad_norm": 2.234464955645964, "learning_rate": 7.535197687895805e-06, "loss": 0.3451, "step": 5579 }, { "epoch": 0.35, "grad_norm": 1.5321716541892436, "learning_rate": 7.53431978718213e-06, "loss": 0.3164, "step": 5580 }, { "epoch": 0.35, "grad_norm": 2.646921846462847, "learning_rate": 7.533441781315023e-06, "loss": 0.3161, "step": 5581 }, { "epoch": 0.35, "grad_norm": 1.8979577692014298, "learning_rate": 7.5325636703309165e-06, "loss": 0.3135, "step": 5582 }, { "epoch": 0.35, "grad_norm": 2.1137255471128475, "learning_rate": 7.531685454266246e-06, "loss": 0.3035, "step": 5583 }, { "epoch": 0.35, "grad_norm": 1.5792122713511922, "learning_rate": 7.5308071331574485e-06, "loss": 0.2999, "step": 5584 }, { "epoch": 0.35, "grad_norm": 3.5997296727761863, "learning_rate": 7.529928707040969e-06, "loss": 0.3255, "step": 5585 }, { "epoch": 0.35, "grad_norm": 1.950600940282619, "learning_rate": 7.529050175953253e-06, "loss": 0.3175, "step": 5586 }, { "epoch": 0.35, "grad_norm": 1.6416121319325168, "learning_rate": 7.528171539930753e-06, "loss": 0.3453, "step": 5587 }, { "epoch": 0.35, "grad_norm": 0.6169929566759427, "learning_rate": 7.527292799009924e-06, "loss": 0.5057, "step": 5588 }, { "epoch": 0.35, "grad_norm": 3.5772616025881065, "learning_rate": 7.526413953227231e-06, "loss": 0.3108, "step": 5589 }, { "epoch": 0.35, "grad_norm": 1.3937024867886079, "learning_rate": 7.5255350026191336e-06, "loss": 0.3059, "step": 5590 }, { "epoch": 0.35, "grad_norm": 2.7036393897636453, "learning_rate": 7.524655947222104e-06, "loss": 0.2912, "step": 5591 }, { "epoch": 0.35, "grad_norm": 5.303718523195396, "learning_rate": 7.523776787072613e-06, "loss": 0.3108, "step": 5592 }, { "epoch": 0.35, "grad_norm": 1.4651604061668437, "learning_rate": 7.5228975222071425e-06, "loss": 0.2991, "step": 5593 }, { "epoch": 0.35, "grad_norm": 1.744966095349305, "learning_rate": 7.522018152662171e-06, "loss": 0.3381, "step": 5594 }, { "epoch": 0.35, "grad_norm": 1.9847177841081725, "learning_rate": 7.521138678474187e-06, "loss": 0.3055, "step": 5595 }, { "epoch": 0.35, "grad_norm": 2.1188786865678044, "learning_rate": 7.520259099679681e-06, "loss": 0.3004, "step": 5596 }, { "epoch": 0.35, "grad_norm": 1.8088717725026118, "learning_rate": 7.5193794163151516e-06, "loss": 0.3149, "step": 5597 }, { "epoch": 0.35, "grad_norm": 2.0462386040246234, "learning_rate": 7.518499628417091e-06, "loss": 0.2974, "step": 5598 }, { "epoch": 0.35, "grad_norm": 1.5040063755350557, "learning_rate": 7.5176197360220096e-06, "loss": 0.3011, "step": 5599 }, { "epoch": 0.35, "grad_norm": 3.3183981886848737, "learning_rate": 7.516739739166412e-06, "loss": 0.3122, "step": 5600 }, { "epoch": 0.35, "grad_norm": 1.9674896556573818, "learning_rate": 7.515859637886814e-06, "loss": 0.3014, "step": 5601 }, { "epoch": 0.35, "grad_norm": 1.7152203946403568, "learning_rate": 7.514979432219731e-06, "loss": 0.3145, "step": 5602 }, { "epoch": 0.35, "grad_norm": 3.403720004356182, "learning_rate": 7.514099122201683e-06, "loss": 0.3127, "step": 5603 }, { "epoch": 0.35, "grad_norm": 1.7211492577570522, "learning_rate": 7.513218707869199e-06, "loss": 0.3112, "step": 5604 }, { "epoch": 0.35, "grad_norm": 0.6104140208393277, "learning_rate": 7.512338189258805e-06, "loss": 0.4887, "step": 5605 }, { "epoch": 0.35, "grad_norm": 1.5343344536735042, "learning_rate": 7.511457566407038e-06, "loss": 0.3113, "step": 5606 }, { "epoch": 0.35, "grad_norm": 1.8576184789734569, "learning_rate": 7.510576839350435e-06, "loss": 0.3208, "step": 5607 }, { "epoch": 0.35, "grad_norm": 2.7988619285075296, "learning_rate": 7.50969600812554e-06, "loss": 0.312, "step": 5608 }, { "epoch": 0.35, "grad_norm": 2.1524713125801425, "learning_rate": 7.5088150727689006e-06, "loss": 0.2999, "step": 5609 }, { "epoch": 0.35, "grad_norm": 1.8278447817509023, "learning_rate": 7.5079340333170654e-06, "loss": 0.3255, "step": 5610 }, { "epoch": 0.35, "grad_norm": 1.4991055701569922, "learning_rate": 7.507052889806595e-06, "loss": 0.3143, "step": 5611 }, { "epoch": 0.35, "grad_norm": 1.6456788524314352, "learning_rate": 7.506171642274046e-06, "loss": 0.3322, "step": 5612 }, { "epoch": 0.35, "grad_norm": 1.9083932346937371, "learning_rate": 7.505290290755985e-06, "loss": 0.2952, "step": 5613 }, { "epoch": 0.35, "grad_norm": 1.6959065724865343, "learning_rate": 7.5044088352889785e-06, "loss": 0.3051, "step": 5614 }, { "epoch": 0.35, "grad_norm": 1.5812591366602096, "learning_rate": 7.503527275909603e-06, "loss": 0.3112, "step": 5615 }, { "epoch": 0.35, "grad_norm": 1.4145626855077336, "learning_rate": 7.502645612654432e-06, "loss": 0.2984, "step": 5616 }, { "epoch": 0.35, "grad_norm": 1.5959014876694964, "learning_rate": 7.5017638455600505e-06, "loss": 0.3104, "step": 5617 }, { "epoch": 0.35, "grad_norm": 1.9324527414534696, "learning_rate": 7.5008819746630435e-06, "loss": 0.2996, "step": 5618 }, { "epoch": 0.35, "grad_norm": 1.672622208377859, "learning_rate": 7.500000000000001e-06, "loss": 0.2966, "step": 5619 }, { "epoch": 0.35, "grad_norm": 1.5514313095791672, "learning_rate": 7.499117921607518e-06, "loss": 0.3099, "step": 5620 }, { "epoch": 0.35, "grad_norm": 1.8434493314240692, "learning_rate": 7.498235739522193e-06, "loss": 0.3271, "step": 5621 }, { "epoch": 0.35, "grad_norm": 1.674892198816176, "learning_rate": 7.497353453780631e-06, "loss": 0.2908, "step": 5622 }, { "epoch": 0.35, "grad_norm": 3.470732541528881, "learning_rate": 7.496471064419437e-06, "loss": 0.3165, "step": 5623 }, { "epoch": 0.35, "grad_norm": 1.469813071897201, "learning_rate": 7.495588571475225e-06, "loss": 0.3097, "step": 5624 }, { "epoch": 0.35, "grad_norm": 2.559372444624539, "learning_rate": 7.494705974984611e-06, "loss": 0.3126, "step": 5625 }, { "epoch": 0.35, "grad_norm": 1.4119806716099852, "learning_rate": 7.493823274984215e-06, "loss": 0.3063, "step": 5626 }, { "epoch": 0.35, "grad_norm": 6.228598150709018, "learning_rate": 7.492940471510662e-06, "loss": 0.2916, "step": 5627 }, { "epoch": 0.35, "grad_norm": 1.987139959495272, "learning_rate": 7.492057564600579e-06, "loss": 0.3042, "step": 5628 }, { "epoch": 0.35, "grad_norm": 1.9055052025674284, "learning_rate": 7.491174554290602e-06, "loss": 0.3166, "step": 5629 }, { "epoch": 0.35, "grad_norm": 1.3521912386910904, "learning_rate": 7.490291440617369e-06, "loss": 0.3072, "step": 5630 }, { "epoch": 0.35, "grad_norm": 1.9339713899719875, "learning_rate": 7.489408223617521e-06, "loss": 0.3212, "step": 5631 }, { "epoch": 0.35, "grad_norm": 1.2991312285805725, "learning_rate": 7.488524903327703e-06, "loss": 0.3222, "step": 5632 }, { "epoch": 0.35, "grad_norm": 1.5746372787736225, "learning_rate": 7.487641479784566e-06, "loss": 0.3023, "step": 5633 }, { "epoch": 0.35, "grad_norm": 2.046394299527198, "learning_rate": 7.486757953024767e-06, "loss": 0.3154, "step": 5634 }, { "epoch": 0.35, "grad_norm": 2.5720029238898356, "learning_rate": 7.485874323084963e-06, "loss": 0.3192, "step": 5635 }, { "epoch": 0.35, "grad_norm": 2.1660435220907193, "learning_rate": 7.484990590001818e-06, "loss": 0.3327, "step": 5636 }, { "epoch": 0.35, "grad_norm": 1.6374788207877375, "learning_rate": 7.484106753811999e-06, "loss": 0.3035, "step": 5637 }, { "epoch": 0.35, "grad_norm": 2.6121513654484185, "learning_rate": 7.4832228145521805e-06, "loss": 0.3076, "step": 5638 }, { "epoch": 0.35, "grad_norm": 1.380066003424248, "learning_rate": 7.4823387722590345e-06, "loss": 0.3108, "step": 5639 }, { "epoch": 0.35, "grad_norm": 4.200349463868485, "learning_rate": 7.481454626969244e-06, "loss": 0.3084, "step": 5640 }, { "epoch": 0.35, "grad_norm": 13.807919801223866, "learning_rate": 7.480570378719494e-06, "loss": 0.3304, "step": 5641 }, { "epoch": 0.35, "grad_norm": 1.7027573751488092, "learning_rate": 7.4796860275464734e-06, "loss": 0.3271, "step": 5642 }, { "epoch": 0.35, "grad_norm": 1.9935426564123864, "learning_rate": 7.4788015734868745e-06, "loss": 0.3291, "step": 5643 }, { "epoch": 0.35, "grad_norm": 3.1901492399519804, "learning_rate": 7.477917016577396e-06, "loss": 0.3183, "step": 5644 }, { "epoch": 0.36, "grad_norm": 1.5598227356088434, "learning_rate": 7.477032356854739e-06, "loss": 0.3284, "step": 5645 }, { "epoch": 0.36, "grad_norm": 1.5607753249772587, "learning_rate": 7.47614759435561e-06, "loss": 0.3168, "step": 5646 }, { "epoch": 0.36, "grad_norm": 3.2946975217902708, "learning_rate": 7.475262729116718e-06, "loss": 0.3079, "step": 5647 }, { "epoch": 0.36, "grad_norm": 2.1161331653257256, "learning_rate": 7.47437776117478e-06, "loss": 0.3147, "step": 5648 }, { "epoch": 0.36, "grad_norm": 1.567136292200706, "learning_rate": 7.473492690566513e-06, "loss": 0.3097, "step": 5649 }, { "epoch": 0.36, "grad_norm": 1.6040788197915585, "learning_rate": 7.472607517328641e-06, "loss": 0.3063, "step": 5650 }, { "epoch": 0.36, "grad_norm": 7.251851878767218, "learning_rate": 7.4717222414978915e-06, "loss": 0.319, "step": 5651 }, { "epoch": 0.36, "grad_norm": 2.1348626804101776, "learning_rate": 7.470836863110996e-06, "loss": 0.3091, "step": 5652 }, { "epoch": 0.36, "grad_norm": 2.685702944543895, "learning_rate": 7.46995138220469e-06, "loss": 0.3122, "step": 5653 }, { "epoch": 0.36, "grad_norm": 1.3611225750638158, "learning_rate": 7.469065798815715e-06, "loss": 0.3128, "step": 5654 }, { "epoch": 0.36, "grad_norm": 58.515560938211244, "learning_rate": 7.468180112980813e-06, "loss": 0.2983, "step": 5655 }, { "epoch": 0.36, "grad_norm": 1.780049182137719, "learning_rate": 7.467294324736735e-06, "loss": 0.3055, "step": 5656 }, { "epoch": 0.36, "grad_norm": 1.674422368242577, "learning_rate": 7.466408434120231e-06, "loss": 0.33, "step": 5657 }, { "epoch": 0.36, "grad_norm": 3.670010019365439, "learning_rate": 7.4655224411680635e-06, "loss": 0.3054, "step": 5658 }, { "epoch": 0.36, "grad_norm": 1.3692648213304666, "learning_rate": 7.464636345916989e-06, "loss": 0.3146, "step": 5659 }, { "epoch": 0.36, "grad_norm": 1.428873933394962, "learning_rate": 7.463750148403776e-06, "loss": 0.3024, "step": 5660 }, { "epoch": 0.36, "grad_norm": 4.018528477444935, "learning_rate": 7.462863848665191e-06, "loss": 0.3379, "step": 5661 }, { "epoch": 0.36, "grad_norm": 1.79207294349736, "learning_rate": 7.4619774467380135e-06, "loss": 0.3152, "step": 5662 }, { "epoch": 0.36, "grad_norm": 3.8969671134218307, "learning_rate": 7.461090942659016e-06, "loss": 0.305, "step": 5663 }, { "epoch": 0.36, "grad_norm": 2.1867364046379683, "learning_rate": 7.4602043364649845e-06, "loss": 0.3102, "step": 5664 }, { "epoch": 0.36, "grad_norm": 1.9064886275259811, "learning_rate": 7.459317628192706e-06, "loss": 0.3092, "step": 5665 }, { "epoch": 0.36, "grad_norm": 3.51862220711062, "learning_rate": 7.458430817878971e-06, "loss": 0.3202, "step": 5666 }, { "epoch": 0.36, "grad_norm": 1.3127277383490619, "learning_rate": 7.457543905560574e-06, "loss": 0.3002, "step": 5667 }, { "epoch": 0.36, "grad_norm": 1.4085002021476818, "learning_rate": 7.4566568912743156e-06, "loss": 0.2915, "step": 5668 }, { "epoch": 0.36, "grad_norm": 2.169750998987912, "learning_rate": 7.455769775056998e-06, "loss": 0.3137, "step": 5669 }, { "epoch": 0.36, "grad_norm": 1.4067814726613208, "learning_rate": 7.454882556945433e-06, "loss": 0.2997, "step": 5670 }, { "epoch": 0.36, "grad_norm": 1.7267074013942263, "learning_rate": 7.453995236976428e-06, "loss": 0.3116, "step": 5671 }, { "epoch": 0.36, "grad_norm": 1.585783007967785, "learning_rate": 7.453107815186803e-06, "loss": 0.3141, "step": 5672 }, { "epoch": 0.36, "grad_norm": 1.4987161190589928, "learning_rate": 7.452220291613377e-06, "loss": 0.307, "step": 5673 }, { "epoch": 0.36, "grad_norm": 1.2510466920102021, "learning_rate": 7.451332666292977e-06, "loss": 0.3022, "step": 5674 }, { "epoch": 0.36, "grad_norm": 2.0184887848923205, "learning_rate": 7.450444939262429e-06, "loss": 0.3041, "step": 5675 }, { "epoch": 0.36, "grad_norm": 1.6660699120502802, "learning_rate": 7.4495571105585685e-06, "loss": 0.3086, "step": 5676 }, { "epoch": 0.36, "grad_norm": 2.8567088182384004, "learning_rate": 7.448669180218232e-06, "loss": 0.3147, "step": 5677 }, { "epoch": 0.36, "grad_norm": 2.0114870151083903, "learning_rate": 7.447781148278264e-06, "loss": 0.306, "step": 5678 }, { "epoch": 0.36, "grad_norm": 3.152110807995157, "learning_rate": 7.446893014775506e-06, "loss": 0.3235, "step": 5679 }, { "epoch": 0.36, "grad_norm": 1.3991897600041783, "learning_rate": 7.446004779746811e-06, "loss": 0.3234, "step": 5680 }, { "epoch": 0.36, "grad_norm": 1.6360703079205299, "learning_rate": 7.445116443229033e-06, "loss": 0.3147, "step": 5681 }, { "epoch": 0.36, "grad_norm": 0.6753473981753595, "learning_rate": 7.4442280052590325e-06, "loss": 0.4836, "step": 5682 }, { "epoch": 0.36, "grad_norm": 1.3987344153282701, "learning_rate": 7.443339465873668e-06, "loss": 0.3206, "step": 5683 }, { "epoch": 0.36, "grad_norm": 2.1578346647493634, "learning_rate": 7.442450825109811e-06, "loss": 0.3032, "step": 5684 }, { "epoch": 0.36, "grad_norm": 2.3961765871672758, "learning_rate": 7.4415620830043315e-06, "loss": 0.3422, "step": 5685 }, { "epoch": 0.36, "grad_norm": 3.3999347252216263, "learning_rate": 7.440673239594104e-06, "loss": 0.338, "step": 5686 }, { "epoch": 0.36, "grad_norm": 3.458042249081322, "learning_rate": 7.439784294916006e-06, "loss": 0.3071, "step": 5687 }, { "epoch": 0.36, "grad_norm": 1.442999583186028, "learning_rate": 7.438895249006928e-06, "loss": 0.3237, "step": 5688 }, { "epoch": 0.36, "grad_norm": 2.2220124555028837, "learning_rate": 7.438006101903752e-06, "loss": 0.3352, "step": 5689 }, { "epoch": 0.36, "grad_norm": 1.5793718312701053, "learning_rate": 7.4371168536433736e-06, "loss": 0.3303, "step": 5690 }, { "epoch": 0.36, "grad_norm": 1.402348641953203, "learning_rate": 7.436227504262686e-06, "loss": 0.3095, "step": 5691 }, { "epoch": 0.36, "grad_norm": 2.4817249184171337, "learning_rate": 7.435338053798594e-06, "loss": 0.3038, "step": 5692 }, { "epoch": 0.36, "grad_norm": 1.3749261366728374, "learning_rate": 7.434448502288e-06, "loss": 0.3013, "step": 5693 }, { "epoch": 0.36, "grad_norm": 1.353155418341018, "learning_rate": 7.433558849767814e-06, "loss": 0.3008, "step": 5694 }, { "epoch": 0.36, "grad_norm": 1.7818034013591229, "learning_rate": 7.4326690962749475e-06, "loss": 0.3158, "step": 5695 }, { "epoch": 0.36, "grad_norm": 1.7948200058142285, "learning_rate": 7.431779241846321e-06, "loss": 0.3059, "step": 5696 }, { "epoch": 0.36, "grad_norm": 2.411385096486676, "learning_rate": 7.430889286518853e-06, "loss": 0.35, "step": 5697 }, { "epoch": 0.36, "grad_norm": 1.6324917356146929, "learning_rate": 7.429999230329472e-06, "loss": 0.3016, "step": 5698 }, { "epoch": 0.36, "grad_norm": 1.281234103921979, "learning_rate": 7.429109073315105e-06, "loss": 0.3344, "step": 5699 }, { "epoch": 0.36, "grad_norm": 1.4392219907285286, "learning_rate": 7.42821881551269e-06, "loss": 0.298, "step": 5700 }, { "epoch": 0.36, "grad_norm": 3.384668267321661, "learning_rate": 7.427328456959162e-06, "loss": 0.3195, "step": 5701 }, { "epoch": 0.36, "grad_norm": 1.1959807559594506, "learning_rate": 7.4264379976914654e-06, "loss": 0.3035, "step": 5702 }, { "epoch": 0.36, "grad_norm": 2.690632201664417, "learning_rate": 7.425547437746546e-06, "loss": 0.3098, "step": 5703 }, { "epoch": 0.36, "grad_norm": 2.6763078633531703, "learning_rate": 7.424656777161357e-06, "loss": 0.3167, "step": 5704 }, { "epoch": 0.36, "grad_norm": 2.3469438471630495, "learning_rate": 7.4237660159728496e-06, "loss": 0.3122, "step": 5705 }, { "epoch": 0.36, "grad_norm": 1.6327036984516017, "learning_rate": 7.422875154217986e-06, "loss": 0.3062, "step": 5706 }, { "epoch": 0.36, "grad_norm": 1.3215204754316474, "learning_rate": 7.421984191933728e-06, "loss": 0.3152, "step": 5707 }, { "epoch": 0.36, "grad_norm": 0.6332467955778492, "learning_rate": 7.421093129157044e-06, "loss": 0.5212, "step": 5708 }, { "epoch": 0.36, "grad_norm": 1.6653563696691982, "learning_rate": 7.4202019659249066e-06, "loss": 0.3062, "step": 5709 }, { "epoch": 0.36, "grad_norm": 1.4541565492272985, "learning_rate": 7.419310702274289e-06, "loss": 0.3029, "step": 5710 }, { "epoch": 0.36, "grad_norm": 1.8504941946587374, "learning_rate": 7.418419338242176e-06, "loss": 0.2965, "step": 5711 }, { "epoch": 0.36, "grad_norm": 1.7463594995990253, "learning_rate": 7.417527873865548e-06, "loss": 0.3294, "step": 5712 }, { "epoch": 0.36, "grad_norm": 1.557788398969939, "learning_rate": 7.416636309181393e-06, "loss": 0.3411, "step": 5713 }, { "epoch": 0.36, "grad_norm": 0.6168204406296904, "learning_rate": 7.415744644226706e-06, "loss": 0.5094, "step": 5714 }, { "epoch": 0.36, "grad_norm": 0.6182301560704233, "learning_rate": 7.414852879038483e-06, "loss": 0.4927, "step": 5715 }, { "epoch": 0.36, "grad_norm": 1.4139939106748585, "learning_rate": 7.413961013653725e-06, "loss": 0.3065, "step": 5716 }, { "epoch": 0.36, "grad_norm": 1.8082851084173472, "learning_rate": 7.4130690481094356e-06, "loss": 0.3026, "step": 5717 }, { "epoch": 0.36, "grad_norm": 1.6075580421704037, "learning_rate": 7.412176982442629e-06, "loss": 0.3098, "step": 5718 }, { "epoch": 0.36, "grad_norm": 1.6559692571334896, "learning_rate": 7.41128481669031e-06, "loss": 0.3189, "step": 5719 }, { "epoch": 0.36, "grad_norm": 1.604115556433833, "learning_rate": 7.4103925508895046e-06, "loss": 0.2997, "step": 5720 }, { "epoch": 0.36, "grad_norm": 2.2416715029221943, "learning_rate": 7.40950018507723e-06, "loss": 0.3014, "step": 5721 }, { "epoch": 0.36, "grad_norm": 1.3092676267296375, "learning_rate": 7.408607719290512e-06, "loss": 0.3169, "step": 5722 }, { "epoch": 0.36, "grad_norm": 1.535729355769897, "learning_rate": 7.407715153566383e-06, "loss": 0.3111, "step": 5723 }, { "epoch": 0.36, "grad_norm": 1.9132874418537968, "learning_rate": 7.4068224879418734e-06, "loss": 0.3238, "step": 5724 }, { "epoch": 0.36, "grad_norm": 1.296794143617355, "learning_rate": 7.405929722454026e-06, "loss": 0.2883, "step": 5725 }, { "epoch": 0.36, "grad_norm": 0.6492150870470486, "learning_rate": 7.40503685713988e-06, "loss": 0.4716, "step": 5726 }, { "epoch": 0.36, "grad_norm": 2.3639429959010156, "learning_rate": 7.404143892036484e-06, "loss": 0.3275, "step": 5727 }, { "epoch": 0.36, "grad_norm": 4.2386137948775255, "learning_rate": 7.403250827180887e-06, "loss": 0.3349, "step": 5728 }, { "epoch": 0.36, "grad_norm": 1.2563994628426658, "learning_rate": 7.402357662610144e-06, "loss": 0.3175, "step": 5729 }, { "epoch": 0.36, "grad_norm": 1.7851120512806526, "learning_rate": 7.4014643983613155e-06, "loss": 0.3157, "step": 5730 }, { "epoch": 0.36, "grad_norm": 1.7893059924225179, "learning_rate": 7.4005710344714624e-06, "loss": 0.3052, "step": 5731 }, { "epoch": 0.36, "grad_norm": 1.4542712392411767, "learning_rate": 7.399677570977653e-06, "loss": 0.3406, "step": 5732 }, { "epoch": 0.36, "grad_norm": 1.9076157660029271, "learning_rate": 7.39878400791696e-06, "loss": 0.3087, "step": 5733 }, { "epoch": 0.36, "grad_norm": 1.3486840979132322, "learning_rate": 7.397890345326458e-06, "loss": 0.3052, "step": 5734 }, { "epoch": 0.36, "grad_norm": 1.6639528761915583, "learning_rate": 7.396996583243227e-06, "loss": 0.3196, "step": 5735 }, { "epoch": 0.36, "grad_norm": 1.3565853131676544, "learning_rate": 7.396102721704348e-06, "loss": 0.3005, "step": 5736 }, { "epoch": 0.36, "grad_norm": 1.2639933629837592, "learning_rate": 7.395208760746912e-06, "loss": 0.3085, "step": 5737 }, { "epoch": 0.36, "grad_norm": 1.657557845945615, "learning_rate": 7.394314700408012e-06, "loss": 0.3254, "step": 5738 }, { "epoch": 0.36, "grad_norm": 1.6632622764957616, "learning_rate": 7.39342054072474e-06, "loss": 0.3337, "step": 5739 }, { "epoch": 0.36, "grad_norm": 1.487677377720476, "learning_rate": 7.3925262817341996e-06, "loss": 0.2991, "step": 5740 }, { "epoch": 0.36, "grad_norm": 1.701000162151653, "learning_rate": 7.3916319234734935e-06, "loss": 0.3186, "step": 5741 }, { "epoch": 0.36, "grad_norm": 2.6058788602510203, "learning_rate": 7.390737465979732e-06, "loss": 0.287, "step": 5742 }, { "epoch": 0.36, "grad_norm": 1.710631298606624, "learning_rate": 7.389842909290025e-06, "loss": 0.3284, "step": 5743 }, { "epoch": 0.36, "grad_norm": 1.7821217348464067, "learning_rate": 7.388948253441492e-06, "loss": 0.3274, "step": 5744 }, { "epoch": 0.36, "grad_norm": 1.5799089572036988, "learning_rate": 7.388053498471253e-06, "loss": 0.3133, "step": 5745 }, { "epoch": 0.36, "grad_norm": 0.6500341324063666, "learning_rate": 7.387158644416432e-06, "loss": 0.4969, "step": 5746 }, { "epoch": 0.36, "grad_norm": 1.2714644253683998, "learning_rate": 7.386263691314157e-06, "loss": 0.3199, "step": 5747 }, { "epoch": 0.36, "grad_norm": 1.9249387926907506, "learning_rate": 7.385368639201567e-06, "loss": 0.3239, "step": 5748 }, { "epoch": 0.36, "grad_norm": 2.0455498736956876, "learning_rate": 7.384473488115792e-06, "loss": 0.3217, "step": 5749 }, { "epoch": 0.36, "grad_norm": 1.6326238233539525, "learning_rate": 7.383578238093979e-06, "loss": 0.3071, "step": 5750 }, { "epoch": 0.36, "grad_norm": 2.0793840173506237, "learning_rate": 7.38268288917327e-06, "loss": 0.3343, "step": 5751 }, { "epoch": 0.36, "grad_norm": 1.3578410524260112, "learning_rate": 7.381787441390815e-06, "loss": 0.311, "step": 5752 }, { "epoch": 0.36, "grad_norm": 2.504546431557493, "learning_rate": 7.380891894783769e-06, "loss": 0.3332, "step": 5753 }, { "epoch": 0.36, "grad_norm": 2.478990359257859, "learning_rate": 7.3799962493892895e-06, "loss": 0.3078, "step": 5754 }, { "epoch": 0.36, "grad_norm": 1.5863958876970328, "learning_rate": 7.379100505244538e-06, "loss": 0.3193, "step": 5755 }, { "epoch": 0.36, "grad_norm": 1.7685270421751085, "learning_rate": 7.378204662386683e-06, "loss": 0.2973, "step": 5756 }, { "epoch": 0.36, "grad_norm": 1.5372639700328596, "learning_rate": 7.37730872085289e-06, "loss": 0.3192, "step": 5757 }, { "epoch": 0.36, "grad_norm": 1.6127723564068863, "learning_rate": 7.376412680680336e-06, "loss": 0.3131, "step": 5758 }, { "epoch": 0.36, "grad_norm": 3.4735496079413415, "learning_rate": 7.375516541906199e-06, "loss": 0.2981, "step": 5759 }, { "epoch": 0.36, "grad_norm": 1.877987572363265, "learning_rate": 7.3746203045676625e-06, "loss": 0.3228, "step": 5760 }, { "epoch": 0.36, "grad_norm": 1.6400665714471598, "learning_rate": 7.37372396870191e-06, "loss": 0.3026, "step": 5761 }, { "epoch": 0.36, "grad_norm": 1.5285887762229906, "learning_rate": 7.372827534346134e-06, "loss": 0.322, "step": 5762 }, { "epoch": 0.36, "grad_norm": 1.5689585017596384, "learning_rate": 7.371931001537529e-06, "loss": 0.3102, "step": 5763 }, { "epoch": 0.36, "grad_norm": 2.342024338730037, "learning_rate": 7.371034370313296e-06, "loss": 0.3575, "step": 5764 }, { "epoch": 0.36, "grad_norm": 2.468124222734872, "learning_rate": 7.370137640710632e-06, "loss": 0.3145, "step": 5765 }, { "epoch": 0.36, "grad_norm": 2.320101177618037, "learning_rate": 7.36924081276675e-06, "loss": 0.3196, "step": 5766 }, { "epoch": 0.36, "grad_norm": 1.4339543507967045, "learning_rate": 7.368343886518857e-06, "loss": 0.2987, "step": 5767 }, { "epoch": 0.36, "grad_norm": 3.8696148075773147, "learning_rate": 7.3674468620041716e-06, "loss": 0.313, "step": 5768 }, { "epoch": 0.36, "grad_norm": 2.5474706499442066, "learning_rate": 7.366549739259908e-06, "loss": 0.3221, "step": 5769 }, { "epoch": 0.36, "grad_norm": 1.2964753833558096, "learning_rate": 7.365652518323294e-06, "loss": 0.3021, "step": 5770 }, { "epoch": 0.36, "grad_norm": 1.4044127931489798, "learning_rate": 7.364755199231555e-06, "loss": 0.3229, "step": 5771 }, { "epoch": 0.36, "grad_norm": 1.7601533043645516, "learning_rate": 7.363857782021922e-06, "loss": 0.3362, "step": 5772 }, { "epoch": 0.36, "grad_norm": 0.6087853838590078, "learning_rate": 7.362960266731632e-06, "loss": 0.4977, "step": 5773 }, { "epoch": 0.36, "grad_norm": 2.3781158828767315, "learning_rate": 7.362062653397923e-06, "loss": 0.32, "step": 5774 }, { "epoch": 0.36, "grad_norm": 8.658366266497918, "learning_rate": 7.361164942058038e-06, "loss": 0.3192, "step": 5775 }, { "epoch": 0.36, "grad_norm": 1.8875124548609907, "learning_rate": 7.360267132749227e-06, "loss": 0.3087, "step": 5776 }, { "epoch": 0.36, "grad_norm": 2.2727850664024363, "learning_rate": 7.359369225508738e-06, "loss": 0.3185, "step": 5777 }, { "epoch": 0.36, "grad_norm": 2.450642338466432, "learning_rate": 7.358471220373831e-06, "loss": 0.3204, "step": 5778 }, { "epoch": 0.36, "grad_norm": 3.6278554362256004, "learning_rate": 7.357573117381764e-06, "loss": 0.3008, "step": 5779 }, { "epoch": 0.36, "grad_norm": 1.9078310390802198, "learning_rate": 7.3566749165698e-06, "loss": 0.3023, "step": 5780 }, { "epoch": 0.36, "grad_norm": 1.4518003132283595, "learning_rate": 7.355776617975209e-06, "loss": 0.3324, "step": 5781 }, { "epoch": 0.36, "grad_norm": 1.9831437966496162, "learning_rate": 7.354878221635262e-06, "loss": 0.3067, "step": 5782 }, { "epoch": 0.36, "grad_norm": 2.9558412798166955, "learning_rate": 7.353979727587234e-06, "loss": 0.284, "step": 5783 }, { "epoch": 0.36, "grad_norm": 2.75995278297336, "learning_rate": 7.353081135868405e-06, "loss": 0.3316, "step": 5784 }, { "epoch": 0.36, "grad_norm": 2.4438940009092813, "learning_rate": 7.3521824465160605e-06, "loss": 0.3071, "step": 5785 }, { "epoch": 0.36, "grad_norm": 1.4964626549739581, "learning_rate": 7.3512836595674896e-06, "loss": 0.3031, "step": 5786 }, { "epoch": 0.36, "grad_norm": 4.195188164153965, "learning_rate": 7.350384775059983e-06, "loss": 0.3179, "step": 5787 }, { "epoch": 0.36, "grad_norm": 1.4793362289035, "learning_rate": 7.349485793030837e-06, "loss": 0.3084, "step": 5788 }, { "epoch": 0.36, "grad_norm": 1.7286359867807102, "learning_rate": 7.3485867135173514e-06, "loss": 0.3196, "step": 5789 }, { "epoch": 0.36, "grad_norm": 1.5745322911322044, "learning_rate": 7.347687536556833e-06, "loss": 0.3144, "step": 5790 }, { "epoch": 0.36, "grad_norm": 2.4459368475648984, "learning_rate": 7.346788262186588e-06, "loss": 0.3082, "step": 5791 }, { "epoch": 0.36, "grad_norm": 1.5842852315498037, "learning_rate": 7.34588889044393e-06, "loss": 0.3118, "step": 5792 }, { "epoch": 0.36, "grad_norm": 0.6431567181335914, "learning_rate": 7.344989421366175e-06, "loss": 0.5115, "step": 5793 }, { "epoch": 0.36, "grad_norm": 1.7974130873253658, "learning_rate": 7.3440898549906435e-06, "loss": 0.308, "step": 5794 }, { "epoch": 0.36, "grad_norm": 4.149415807747741, "learning_rate": 7.34319019135466e-06, "loss": 0.3145, "step": 5795 }, { "epoch": 0.36, "grad_norm": 2.0781212669866096, "learning_rate": 7.342290430495554e-06, "loss": 0.3185, "step": 5796 }, { "epoch": 0.36, "grad_norm": 1.6616416544173185, "learning_rate": 7.341390572450659e-06, "loss": 0.3071, "step": 5797 }, { "epoch": 0.36, "grad_norm": 5.433830528273421, "learning_rate": 7.340490617257309e-06, "loss": 0.3169, "step": 5798 }, { "epoch": 0.36, "grad_norm": 2.9030889016357984, "learning_rate": 7.339590564952845e-06, "loss": 0.3178, "step": 5799 }, { "epoch": 0.36, "grad_norm": 3.646943345805651, "learning_rate": 7.338690415574614e-06, "loss": 0.3067, "step": 5800 }, { "epoch": 0.36, "grad_norm": 1.6316014584446898, "learning_rate": 7.337790169159964e-06, "loss": 0.3009, "step": 5801 }, { "epoch": 0.36, "grad_norm": 3.144307940388868, "learning_rate": 7.3368898257462486e-06, "loss": 0.3031, "step": 5802 }, { "epoch": 0.36, "grad_norm": 2.4620217959917476, "learning_rate": 7.3359893853708205e-06, "loss": 0.3002, "step": 5803 }, { "epoch": 0.37, "grad_norm": 1.9917308700716931, "learning_rate": 7.335088848071046e-06, "loss": 0.3085, "step": 5804 }, { "epoch": 0.37, "grad_norm": 1.515370739560277, "learning_rate": 7.334188213884287e-06, "loss": 0.3089, "step": 5805 }, { "epoch": 0.37, "grad_norm": 1.8956378841874428, "learning_rate": 7.333287482847913e-06, "loss": 0.2921, "step": 5806 }, { "epoch": 0.37, "grad_norm": 2.0689430657937513, "learning_rate": 7.332386654999296e-06, "loss": 0.3124, "step": 5807 }, { "epoch": 0.37, "grad_norm": 2.1196299524360107, "learning_rate": 7.3314857303758155e-06, "loss": 0.3207, "step": 5808 }, { "epoch": 0.37, "grad_norm": 1.8664441193295482, "learning_rate": 7.330584709014849e-06, "loss": 0.3125, "step": 5809 }, { "epoch": 0.37, "grad_norm": 1.5672815518063077, "learning_rate": 7.329683590953785e-06, "loss": 0.3214, "step": 5810 }, { "epoch": 0.37, "grad_norm": 1.9459149264097486, "learning_rate": 7.328782376230011e-06, "loss": 0.3032, "step": 5811 }, { "epoch": 0.37, "grad_norm": 2.8956083387799723, "learning_rate": 7.32788106488092e-06, "loss": 0.3189, "step": 5812 }, { "epoch": 0.37, "grad_norm": 2.724382629263787, "learning_rate": 7.326979656943907e-06, "loss": 0.301, "step": 5813 }, { "epoch": 0.37, "grad_norm": 2.040882141972441, "learning_rate": 7.326078152456375e-06, "loss": 0.3041, "step": 5814 }, { "epoch": 0.37, "grad_norm": 1.9496157807918788, "learning_rate": 7.325176551455729e-06, "loss": 0.334, "step": 5815 }, { "epoch": 0.37, "grad_norm": 4.224184541491048, "learning_rate": 7.324274853979381e-06, "loss": 0.3067, "step": 5816 }, { "epoch": 0.37, "grad_norm": 5.00410639397539, "learning_rate": 7.323373060064738e-06, "loss": 0.2864, "step": 5817 }, { "epoch": 0.37, "grad_norm": 1.3888165581844634, "learning_rate": 7.322471169749219e-06, "loss": 0.3062, "step": 5818 }, { "epoch": 0.37, "grad_norm": 1.829558357402964, "learning_rate": 7.321569183070247e-06, "loss": 0.2967, "step": 5819 }, { "epoch": 0.37, "grad_norm": 2.2188317970095683, "learning_rate": 7.320667100065248e-06, "loss": 0.309, "step": 5820 }, { "epoch": 0.37, "grad_norm": 1.7218714069460432, "learning_rate": 7.319764920771646e-06, "loss": 0.3075, "step": 5821 }, { "epoch": 0.37, "grad_norm": 2.48354112910123, "learning_rate": 7.31886264522688e-06, "loss": 0.289, "step": 5822 }, { "epoch": 0.37, "grad_norm": 0.687544450349418, "learning_rate": 7.3179602734683815e-06, "loss": 0.5144, "step": 5823 }, { "epoch": 0.37, "grad_norm": 1.6247972565050295, "learning_rate": 7.317057805533596e-06, "loss": 0.305, "step": 5824 }, { "epoch": 0.37, "grad_norm": 2.133622568335391, "learning_rate": 7.316155241459966e-06, "loss": 0.3133, "step": 5825 }, { "epoch": 0.37, "grad_norm": 2.28950572443818, "learning_rate": 7.315252581284942e-06, "loss": 0.3122, "step": 5826 }, { "epoch": 0.37, "grad_norm": 1.472810479886116, "learning_rate": 7.314349825045975e-06, "loss": 0.3088, "step": 5827 }, { "epoch": 0.37, "grad_norm": 0.6783569563645876, "learning_rate": 7.313446972780522e-06, "loss": 0.5448, "step": 5828 }, { "epoch": 0.37, "grad_norm": 4.821842629114185, "learning_rate": 7.312544024526045e-06, "loss": 0.3261, "step": 5829 }, { "epoch": 0.37, "grad_norm": 3.204742629300778, "learning_rate": 7.311640980320012e-06, "loss": 0.3178, "step": 5830 }, { "epoch": 0.37, "grad_norm": 2.2984830118945174, "learning_rate": 7.310737840199886e-06, "loss": 0.312, "step": 5831 }, { "epoch": 0.37, "grad_norm": 1.2660732407209636, "learning_rate": 7.3098346042031435e-06, "loss": 0.3022, "step": 5832 }, { "epoch": 0.37, "grad_norm": 2.0585727921304535, "learning_rate": 7.308931272367259e-06, "loss": 0.3282, "step": 5833 }, { "epoch": 0.37, "grad_norm": 1.814486531397894, "learning_rate": 7.308027844729717e-06, "loss": 0.3001, "step": 5834 }, { "epoch": 0.37, "grad_norm": 2.139194598575215, "learning_rate": 7.3071243213279994e-06, "loss": 0.2996, "step": 5835 }, { "epoch": 0.37, "grad_norm": 1.854070756875088, "learning_rate": 7.306220702199596e-06, "loss": 0.3038, "step": 5836 }, { "epoch": 0.37, "grad_norm": 2.2854665347947902, "learning_rate": 7.305316987381998e-06, "loss": 0.3136, "step": 5837 }, { "epoch": 0.37, "grad_norm": 1.451365980106976, "learning_rate": 7.304413176912706e-06, "loss": 0.2991, "step": 5838 }, { "epoch": 0.37, "grad_norm": 1.5527165313661646, "learning_rate": 7.303509270829217e-06, "loss": 0.2992, "step": 5839 }, { "epoch": 0.37, "grad_norm": 2.184094753030994, "learning_rate": 7.302605269169036e-06, "loss": 0.3067, "step": 5840 }, { "epoch": 0.37, "grad_norm": 5.009560153174841, "learning_rate": 7.301701171969673e-06, "loss": 0.3107, "step": 5841 }, { "epoch": 0.37, "grad_norm": 1.8787207694283758, "learning_rate": 7.300796979268641e-06, "loss": 0.3032, "step": 5842 }, { "epoch": 0.37, "grad_norm": 1.867082892121984, "learning_rate": 7.299892691103455e-06, "loss": 0.3259, "step": 5843 }, { "epoch": 0.37, "grad_norm": 1.9656140868400405, "learning_rate": 7.298988307511637e-06, "loss": 0.3353, "step": 5844 }, { "epoch": 0.37, "grad_norm": 2.3475489149999587, "learning_rate": 7.298083828530708e-06, "loss": 0.3119, "step": 5845 }, { "epoch": 0.37, "grad_norm": 2.625330661974828, "learning_rate": 7.297179254198202e-06, "loss": 0.3035, "step": 5846 }, { "epoch": 0.37, "grad_norm": 1.5264940611890767, "learning_rate": 7.296274584551647e-06, "loss": 0.3146, "step": 5847 }, { "epoch": 0.37, "grad_norm": 2.45465879872521, "learning_rate": 7.295369819628582e-06, "loss": 0.317, "step": 5848 }, { "epoch": 0.37, "grad_norm": 1.891544266720139, "learning_rate": 7.294464959466545e-06, "loss": 0.289, "step": 5849 }, { "epoch": 0.37, "grad_norm": 3.0262458195071824, "learning_rate": 7.2935600041030815e-06, "loss": 0.3021, "step": 5850 }, { "epoch": 0.37, "grad_norm": 1.5013778140899698, "learning_rate": 7.292654953575739e-06, "loss": 0.2926, "step": 5851 }, { "epoch": 0.37, "grad_norm": 2.0427202461352336, "learning_rate": 7.291749807922072e-06, "loss": 0.308, "step": 5852 }, { "epoch": 0.37, "grad_norm": 1.7416220802159856, "learning_rate": 7.290844567179635e-06, "loss": 0.2939, "step": 5853 }, { "epoch": 0.37, "grad_norm": 1.8268191816210502, "learning_rate": 7.289939231385987e-06, "loss": 0.3055, "step": 5854 }, { "epoch": 0.37, "grad_norm": 2.5882337922854566, "learning_rate": 7.289033800578692e-06, "loss": 0.3254, "step": 5855 }, { "epoch": 0.37, "grad_norm": 1.622749871598468, "learning_rate": 7.2881282747953195e-06, "loss": 0.3143, "step": 5856 }, { "epoch": 0.37, "grad_norm": 2.408186670504864, "learning_rate": 7.287222654073442e-06, "loss": 0.2995, "step": 5857 }, { "epoch": 0.37, "grad_norm": 1.733951039006919, "learning_rate": 7.286316938450633e-06, "loss": 0.3032, "step": 5858 }, { "epoch": 0.37, "grad_norm": 1.96746684889688, "learning_rate": 7.285411127964472e-06, "loss": 0.2968, "step": 5859 }, { "epoch": 0.37, "grad_norm": 1.7309144654104038, "learning_rate": 7.284505222652546e-06, "loss": 0.2935, "step": 5860 }, { "epoch": 0.37, "grad_norm": 1.3396226499138895, "learning_rate": 7.28359922255244e-06, "loss": 0.2992, "step": 5861 }, { "epoch": 0.37, "grad_norm": 1.3488703823264145, "learning_rate": 7.2826931277017455e-06, "loss": 0.3092, "step": 5862 }, { "epoch": 0.37, "grad_norm": 1.7107288739922248, "learning_rate": 7.281786938138058e-06, "loss": 0.2875, "step": 5863 }, { "epoch": 0.37, "grad_norm": 1.714898213298103, "learning_rate": 7.28088065389898e-06, "loss": 0.311, "step": 5864 }, { "epoch": 0.37, "grad_norm": 4.156603177681332, "learning_rate": 7.279974275022111e-06, "loss": 0.3169, "step": 5865 }, { "epoch": 0.37, "grad_norm": 2.0778148806959416, "learning_rate": 7.279067801545059e-06, "loss": 0.292, "step": 5866 }, { "epoch": 0.37, "grad_norm": 2.1673113087277693, "learning_rate": 7.278161233505435e-06, "loss": 0.2949, "step": 5867 }, { "epoch": 0.37, "grad_norm": 1.5382648247895252, "learning_rate": 7.277254570940857e-06, "loss": 0.3342, "step": 5868 }, { "epoch": 0.37, "grad_norm": 1.723274882034758, "learning_rate": 7.276347813888942e-06, "loss": 0.3053, "step": 5869 }, { "epoch": 0.37, "grad_norm": 1.4908694377216396, "learning_rate": 7.275440962387311e-06, "loss": 0.294, "step": 5870 }, { "epoch": 0.37, "grad_norm": 2.122961585526543, "learning_rate": 7.274534016473595e-06, "loss": 0.2908, "step": 5871 }, { "epoch": 0.37, "grad_norm": 2.5943014249013667, "learning_rate": 7.273626976185422e-06, "loss": 0.3053, "step": 5872 }, { "epoch": 0.37, "grad_norm": 1.8921446662808121, "learning_rate": 7.272719841560426e-06, "loss": 0.3093, "step": 5873 }, { "epoch": 0.37, "grad_norm": 2.442207526439503, "learning_rate": 7.271812612636249e-06, "loss": 0.3018, "step": 5874 }, { "epoch": 0.37, "grad_norm": 3.687574785421414, "learning_rate": 7.270905289450529e-06, "loss": 0.3031, "step": 5875 }, { "epoch": 0.37, "grad_norm": 1.3049954404147388, "learning_rate": 7.269997872040918e-06, "loss": 0.2979, "step": 5876 }, { "epoch": 0.37, "grad_norm": 1.494379088008921, "learning_rate": 7.26909036044506e-06, "loss": 0.3144, "step": 5877 }, { "epoch": 0.37, "grad_norm": 1.4272037481827549, "learning_rate": 7.268182754700616e-06, "loss": 0.2785, "step": 5878 }, { "epoch": 0.37, "grad_norm": 1.5092362448586174, "learning_rate": 7.267275054845238e-06, "loss": 0.3021, "step": 5879 }, { "epoch": 0.37, "grad_norm": 2.204995359379711, "learning_rate": 7.266367260916594e-06, "loss": 0.3205, "step": 5880 }, { "epoch": 0.37, "grad_norm": 1.67255212483285, "learning_rate": 7.265459372952343e-06, "loss": 0.3152, "step": 5881 }, { "epoch": 0.37, "grad_norm": 1.5525485891146222, "learning_rate": 7.264551390990161e-06, "loss": 0.2957, "step": 5882 }, { "epoch": 0.37, "grad_norm": 1.7766000510995803, "learning_rate": 7.2636433150677185e-06, "loss": 0.3206, "step": 5883 }, { "epoch": 0.37, "grad_norm": 2.3475064172629985, "learning_rate": 7.262735145222696e-06, "loss": 0.316, "step": 5884 }, { "epoch": 0.37, "grad_norm": 2.233693133714042, "learning_rate": 7.261826881492771e-06, "loss": 0.3069, "step": 5885 }, { "epoch": 0.37, "grad_norm": 3.422949658024172, "learning_rate": 7.260918523915632e-06, "loss": 0.3156, "step": 5886 }, { "epoch": 0.37, "grad_norm": 1.485762226091767, "learning_rate": 7.260010072528968e-06, "loss": 0.2902, "step": 5887 }, { "epoch": 0.37, "grad_norm": 7.882394089851754, "learning_rate": 7.259101527370471e-06, "loss": 0.3042, "step": 5888 }, { "epoch": 0.37, "grad_norm": 2.5545202236983964, "learning_rate": 7.25819288847784e-06, "loss": 0.3175, "step": 5889 }, { "epoch": 0.37, "grad_norm": 2.1366015587414267, "learning_rate": 7.257284155888775e-06, "loss": 0.328, "step": 5890 }, { "epoch": 0.37, "grad_norm": 2.1565696032774317, "learning_rate": 7.25637532964098e-06, "loss": 0.3377, "step": 5891 }, { "epoch": 0.37, "grad_norm": 3.005323489987182, "learning_rate": 7.255466409772165e-06, "loss": 0.3136, "step": 5892 }, { "epoch": 0.37, "grad_norm": 5.50084749243097, "learning_rate": 7.254557396320043e-06, "loss": 0.3229, "step": 5893 }, { "epoch": 0.37, "grad_norm": 2.0641706062751233, "learning_rate": 7.253648289322331e-06, "loss": 0.3279, "step": 5894 }, { "epoch": 0.37, "grad_norm": 1.578493390770236, "learning_rate": 7.252739088816747e-06, "loss": 0.2983, "step": 5895 }, { "epoch": 0.37, "grad_norm": 2.0699252430865474, "learning_rate": 7.251829794841017e-06, "loss": 0.2977, "step": 5896 }, { "epoch": 0.37, "grad_norm": 1.4448905027082404, "learning_rate": 7.25092040743287e-06, "loss": 0.3252, "step": 5897 }, { "epoch": 0.37, "grad_norm": 1.6582162666588753, "learning_rate": 7.250010926630038e-06, "loss": 0.3047, "step": 5898 }, { "epoch": 0.37, "grad_norm": 3.029785768214059, "learning_rate": 7.2491013524702545e-06, "loss": 0.3072, "step": 5899 }, { "epoch": 0.37, "grad_norm": 1.9210438445093962, "learning_rate": 7.248191684991262e-06, "loss": 0.3059, "step": 5900 }, { "epoch": 0.37, "grad_norm": 1.5119819916754642, "learning_rate": 7.247281924230802e-06, "loss": 0.3119, "step": 5901 }, { "epoch": 0.37, "grad_norm": 3.348791307836979, "learning_rate": 7.246372070226625e-06, "loss": 0.3045, "step": 5902 }, { "epoch": 0.37, "grad_norm": 3.462685294050548, "learning_rate": 7.245462123016478e-06, "loss": 0.3037, "step": 5903 }, { "epoch": 0.37, "grad_norm": 5.657400086378503, "learning_rate": 7.244552082638122e-06, "loss": 0.3045, "step": 5904 }, { "epoch": 0.37, "grad_norm": 2.3256327447015273, "learning_rate": 7.243641949129312e-06, "loss": 0.3082, "step": 5905 }, { "epoch": 0.37, "grad_norm": 0.6509889104987481, "learning_rate": 7.242731722527814e-06, "loss": 0.4944, "step": 5906 }, { "epoch": 0.37, "grad_norm": 1.5261226749791494, "learning_rate": 7.24182140287139e-06, "loss": 0.3184, "step": 5907 }, { "epoch": 0.37, "grad_norm": 2.2445305914520484, "learning_rate": 7.2409109901978185e-06, "loss": 0.2872, "step": 5908 }, { "epoch": 0.37, "grad_norm": 1.7015555458004097, "learning_rate": 7.240000484544866e-06, "loss": 0.3241, "step": 5909 }, { "epoch": 0.37, "grad_norm": 2.3164805378662296, "learning_rate": 7.239089885950317e-06, "loss": 0.3069, "step": 5910 }, { "epoch": 0.37, "grad_norm": 2.7882030170435197, "learning_rate": 7.238179194451949e-06, "loss": 0.3103, "step": 5911 }, { "epoch": 0.37, "grad_norm": 3.2328517241237735, "learning_rate": 7.237268410087553e-06, "loss": 0.3195, "step": 5912 }, { "epoch": 0.37, "grad_norm": 0.5985679519054314, "learning_rate": 7.236357532894916e-06, "loss": 0.5348, "step": 5913 }, { "epoch": 0.37, "grad_norm": 2.7307987927378585, "learning_rate": 7.235446562911834e-06, "loss": 0.3005, "step": 5914 }, { "epoch": 0.37, "grad_norm": 1.8246946174040426, "learning_rate": 7.234535500176101e-06, "loss": 0.3063, "step": 5915 }, { "epoch": 0.37, "grad_norm": 2.6524543979794832, "learning_rate": 7.233624344725524e-06, "loss": 0.3209, "step": 5916 }, { "epoch": 0.37, "grad_norm": 1.3908195391034117, "learning_rate": 7.232713096597903e-06, "loss": 0.2883, "step": 5917 }, { "epoch": 0.37, "grad_norm": 2.2579837209121814, "learning_rate": 7.231801755831052e-06, "loss": 0.3053, "step": 5918 }, { "epoch": 0.37, "grad_norm": 1.698560050406029, "learning_rate": 7.230890322462781e-06, "loss": 0.2831, "step": 5919 }, { "epoch": 0.37, "grad_norm": 3.1326315798504343, "learning_rate": 7.2299787965309075e-06, "loss": 0.3413, "step": 5920 }, { "epoch": 0.37, "grad_norm": 1.6627139754391318, "learning_rate": 7.229067178073253e-06, "loss": 0.2878, "step": 5921 }, { "epoch": 0.37, "grad_norm": 1.7135080849189828, "learning_rate": 7.228155467127642e-06, "loss": 0.3277, "step": 5922 }, { "epoch": 0.37, "grad_norm": 1.844747474669683, "learning_rate": 7.227243663731904e-06, "loss": 0.3259, "step": 5923 }, { "epoch": 0.37, "grad_norm": 2.8893996693073825, "learning_rate": 7.226331767923871e-06, "loss": 0.3204, "step": 5924 }, { "epoch": 0.37, "grad_norm": 2.9262178045103964, "learning_rate": 7.225419779741376e-06, "loss": 0.3036, "step": 5925 }, { "epoch": 0.37, "grad_norm": 2.0445039946056185, "learning_rate": 7.224507699222263e-06, "loss": 0.3105, "step": 5926 }, { "epoch": 0.37, "grad_norm": 1.6887161344967128, "learning_rate": 7.223595526404374e-06, "loss": 0.3332, "step": 5927 }, { "epoch": 0.37, "grad_norm": 2.096963681172321, "learning_rate": 7.2226832613255584e-06, "loss": 0.3196, "step": 5928 }, { "epoch": 0.37, "grad_norm": 2.771536720348542, "learning_rate": 7.221770904023664e-06, "loss": 0.3128, "step": 5929 }, { "epoch": 0.37, "grad_norm": 3.399124860506791, "learning_rate": 7.2208584545365505e-06, "loss": 0.3207, "step": 5930 }, { "epoch": 0.37, "grad_norm": 1.545844740184038, "learning_rate": 7.219945912902073e-06, "loss": 0.2978, "step": 5931 }, { "epoch": 0.37, "grad_norm": 3.4043635595207227, "learning_rate": 7.2190332791580995e-06, "loss": 0.325, "step": 5932 }, { "epoch": 0.37, "grad_norm": 2.540370324210669, "learning_rate": 7.218120553342492e-06, "loss": 0.3149, "step": 5933 }, { "epoch": 0.37, "grad_norm": 3.080938864836028, "learning_rate": 7.217207735493122e-06, "loss": 0.2886, "step": 5934 }, { "epoch": 0.37, "grad_norm": 2.3301992556399873, "learning_rate": 7.216294825647866e-06, "loss": 0.3128, "step": 5935 }, { "epoch": 0.37, "grad_norm": 1.4832123564266622, "learning_rate": 7.215381823844601e-06, "loss": 0.2998, "step": 5936 }, { "epoch": 0.37, "grad_norm": 1.960706285117124, "learning_rate": 7.214468730121209e-06, "loss": 0.301, "step": 5937 }, { "epoch": 0.37, "grad_norm": 2.255026371704293, "learning_rate": 7.213555544515577e-06, "loss": 0.3036, "step": 5938 }, { "epoch": 0.37, "grad_norm": 2.4455784418932116, "learning_rate": 7.212642267065593e-06, "loss": 0.3153, "step": 5939 }, { "epoch": 0.37, "grad_norm": 0.6126928210379949, "learning_rate": 7.211728897809151e-06, "loss": 0.4656, "step": 5940 }, { "epoch": 0.37, "grad_norm": 1.9074410032665503, "learning_rate": 7.210815436784148e-06, "loss": 0.2978, "step": 5941 }, { "epoch": 0.37, "grad_norm": 2.1222972203089223, "learning_rate": 7.209901884028487e-06, "loss": 0.2926, "step": 5942 }, { "epoch": 0.37, "grad_norm": 2.0324798593508495, "learning_rate": 7.20898823958007e-06, "loss": 0.2851, "step": 5943 }, { "epoch": 0.37, "grad_norm": 1.8127984813845401, "learning_rate": 7.208074503476808e-06, "loss": 0.3103, "step": 5944 }, { "epoch": 0.37, "grad_norm": 2.6075085901736177, "learning_rate": 7.207160675756614e-06, "loss": 0.2974, "step": 5945 }, { "epoch": 0.37, "grad_norm": 2.2821636272598704, "learning_rate": 7.206246756457402e-06, "loss": 0.3086, "step": 5946 }, { "epoch": 0.37, "grad_norm": 2.726983264962258, "learning_rate": 7.205332745617095e-06, "loss": 0.3128, "step": 5947 }, { "epoch": 0.37, "grad_norm": 1.9714385902606477, "learning_rate": 7.204418643273613e-06, "loss": 0.3057, "step": 5948 }, { "epoch": 0.37, "grad_norm": 2.710025784138471, "learning_rate": 7.2035044494648865e-06, "loss": 0.3012, "step": 5949 }, { "epoch": 0.37, "grad_norm": 1.5061638534445656, "learning_rate": 7.202590164228849e-06, "loss": 0.3223, "step": 5950 }, { "epoch": 0.37, "grad_norm": 1.99989521694326, "learning_rate": 7.20167578760343e-06, "loss": 0.3166, "step": 5951 }, { "epoch": 0.37, "grad_norm": 11.156413550352374, "learning_rate": 7.200761319626574e-06, "loss": 0.3094, "step": 5952 }, { "epoch": 0.37, "grad_norm": 2.4174844838680327, "learning_rate": 7.199846760336221e-06, "loss": 0.2987, "step": 5953 }, { "epoch": 0.37, "grad_norm": 3.6831474095576104, "learning_rate": 7.198932109770319e-06, "loss": 0.2978, "step": 5954 }, { "epoch": 0.37, "grad_norm": 2.494215493871055, "learning_rate": 7.198017367966817e-06, "loss": 0.3126, "step": 5955 }, { "epoch": 0.37, "grad_norm": 4.3713637925407065, "learning_rate": 7.197102534963671e-06, "loss": 0.3164, "step": 5956 }, { "epoch": 0.37, "grad_norm": 2.1745249570043237, "learning_rate": 7.19618761079884e-06, "loss": 0.3085, "step": 5957 }, { "epoch": 0.37, "grad_norm": 0.6721736852571504, "learning_rate": 7.195272595510282e-06, "loss": 0.5019, "step": 5958 }, { "epoch": 0.37, "grad_norm": 3.759645629800595, "learning_rate": 7.194357489135966e-06, "loss": 0.3258, "step": 5959 }, { "epoch": 0.37, "grad_norm": 2.1010763393118905, "learning_rate": 7.193442291713858e-06, "loss": 0.3065, "step": 5960 }, { "epoch": 0.37, "grad_norm": 2.523171481090262, "learning_rate": 7.192527003281935e-06, "loss": 0.3185, "step": 5961 }, { "epoch": 0.37, "grad_norm": 1.8455391431495372, "learning_rate": 7.191611623878173e-06, "loss": 0.2899, "step": 5962 }, { "epoch": 0.38, "grad_norm": 2.526983148103827, "learning_rate": 7.1906961535405505e-06, "loss": 0.3059, "step": 5963 }, { "epoch": 0.38, "grad_norm": 4.225402680218895, "learning_rate": 7.189780592307054e-06, "loss": 0.3093, "step": 5964 }, { "epoch": 0.38, "grad_norm": 1.5167138065359553, "learning_rate": 7.188864940215671e-06, "loss": 0.2865, "step": 5965 }, { "epoch": 0.38, "grad_norm": 3.356918859670861, "learning_rate": 7.187949197304395e-06, "loss": 0.3218, "step": 5966 }, { "epoch": 0.38, "grad_norm": 16.89189404408381, "learning_rate": 7.187033363611219e-06, "loss": 0.318, "step": 5967 }, { "epoch": 0.38, "grad_norm": 2.11700703467767, "learning_rate": 7.186117439174145e-06, "loss": 0.3133, "step": 5968 }, { "epoch": 0.38, "grad_norm": 11.005995948321859, "learning_rate": 7.185201424031174e-06, "loss": 0.3221, "step": 5969 }, { "epoch": 0.38, "grad_norm": 1.470314521155802, "learning_rate": 7.184285318220316e-06, "loss": 0.2872, "step": 5970 }, { "epoch": 0.38, "grad_norm": 2.751373484900817, "learning_rate": 7.18336912177958e-06, "loss": 0.3091, "step": 5971 }, { "epoch": 0.38, "grad_norm": 2.0781543745306235, "learning_rate": 7.182452834746982e-06, "loss": 0.3037, "step": 5972 }, { "epoch": 0.38, "grad_norm": 1.7377097311702716, "learning_rate": 7.181536457160538e-06, "loss": 0.2901, "step": 5973 }, { "epoch": 0.38, "grad_norm": 1.736701016666469, "learning_rate": 7.180619989058273e-06, "loss": 0.3156, "step": 5974 }, { "epoch": 0.38, "grad_norm": 1.3994772603359749, "learning_rate": 7.17970343047821e-06, "loss": 0.2894, "step": 5975 }, { "epoch": 0.38, "grad_norm": 1.8318057658099611, "learning_rate": 7.178786781458381e-06, "loss": 0.3041, "step": 5976 }, { "epoch": 0.38, "grad_norm": 3.227806643026452, "learning_rate": 7.177870042036819e-06, "loss": 0.2914, "step": 5977 }, { "epoch": 0.38, "grad_norm": 8.697526425726963, "learning_rate": 7.176953212251559e-06, "loss": 0.3074, "step": 5978 }, { "epoch": 0.38, "grad_norm": 2.038781239013182, "learning_rate": 7.176036292140644e-06, "loss": 0.3121, "step": 5979 }, { "epoch": 0.38, "grad_norm": 2.1796377758250554, "learning_rate": 7.175119281742119e-06, "loss": 0.3029, "step": 5980 }, { "epoch": 0.38, "grad_norm": 5.080976565060239, "learning_rate": 7.174202181094031e-06, "loss": 0.3286, "step": 5981 }, { "epoch": 0.38, "grad_norm": 16.309657195172605, "learning_rate": 7.173284990234433e-06, "loss": 0.2914, "step": 5982 }, { "epoch": 0.38, "grad_norm": 0.6046106125210732, "learning_rate": 7.17236770920138e-06, "loss": 0.4895, "step": 5983 }, { "epoch": 0.38, "grad_norm": 3.87831532572056, "learning_rate": 7.1714503380329326e-06, "loss": 0.3074, "step": 5984 }, { "epoch": 0.38, "grad_norm": 1.794917558533384, "learning_rate": 7.170532876767153e-06, "loss": 0.3168, "step": 5985 }, { "epoch": 0.38, "grad_norm": 11.461320850332969, "learning_rate": 7.16961532544211e-06, "loss": 0.3167, "step": 5986 }, { "epoch": 0.38, "grad_norm": 1.3543175351221206, "learning_rate": 7.168697684095873e-06, "loss": 0.3126, "step": 5987 }, { "epoch": 0.38, "grad_norm": 2.4638785596026485, "learning_rate": 7.1677799527665186e-06, "loss": 0.3172, "step": 5988 }, { "epoch": 0.38, "grad_norm": 2.10295359690129, "learning_rate": 7.166862131492122e-06, "loss": 0.3179, "step": 5989 }, { "epoch": 0.38, "grad_norm": 2.904067089162369, "learning_rate": 7.165944220310766e-06, "loss": 0.2811, "step": 5990 }, { "epoch": 0.38, "grad_norm": 2.2090260665197143, "learning_rate": 7.165026219260538e-06, "loss": 0.3006, "step": 5991 }, { "epoch": 0.38, "grad_norm": 0.5909880695299142, "learning_rate": 7.164108128379528e-06, "loss": 0.5293, "step": 5992 }, { "epoch": 0.38, "grad_norm": 1.5514547504538685, "learning_rate": 7.163189947705826e-06, "loss": 0.3085, "step": 5993 }, { "epoch": 0.38, "grad_norm": 3.5225524480726405, "learning_rate": 7.162271677277532e-06, "loss": 0.2955, "step": 5994 }, { "epoch": 0.38, "grad_norm": 2.2985759849943497, "learning_rate": 7.161353317132744e-06, "loss": 0.3193, "step": 5995 }, { "epoch": 0.38, "grad_norm": 1.9162294349107698, "learning_rate": 7.160434867309569e-06, "loss": 0.3037, "step": 5996 }, { "epoch": 0.38, "grad_norm": 1.4891738981892237, "learning_rate": 7.159516327846114e-06, "loss": 0.3105, "step": 5997 }, { "epoch": 0.38, "grad_norm": 2.0724693220140935, "learning_rate": 7.1585976987804895e-06, "loss": 0.3097, "step": 5998 }, { "epoch": 0.38, "grad_norm": 2.259566667810883, "learning_rate": 7.157678980150814e-06, "loss": 0.3068, "step": 5999 }, { "epoch": 0.38, "grad_norm": 0.6444077881578327, "learning_rate": 7.156760171995204e-06, "loss": 0.5023, "step": 6000 }, { "epoch": 0.38, "grad_norm": 1.7271831934927013, "learning_rate": 7.155841274351784e-06, "loss": 0.3108, "step": 6001 }, { "epoch": 0.38, "grad_norm": 4.6636631154283315, "learning_rate": 7.154922287258681e-06, "loss": 0.2844, "step": 6002 }, { "epoch": 0.38, "grad_norm": 3.8343864224279804, "learning_rate": 7.1540032107540245e-06, "loss": 0.3197, "step": 6003 }, { "epoch": 0.38, "grad_norm": 1.848427973087907, "learning_rate": 7.1530840448759484e-06, "loss": 0.3049, "step": 6004 }, { "epoch": 0.38, "grad_norm": 1.8418480083532276, "learning_rate": 7.152164789662592e-06, "loss": 0.3111, "step": 6005 }, { "epoch": 0.38, "grad_norm": 1.7151396141888278, "learning_rate": 7.151245445152096e-06, "loss": 0.2947, "step": 6006 }, { "epoch": 0.38, "grad_norm": 0.6177711619734679, "learning_rate": 7.1503260113826035e-06, "loss": 0.484, "step": 6007 }, { "epoch": 0.38, "grad_norm": 4.707515386143622, "learning_rate": 7.1494064883922655e-06, "loss": 0.2996, "step": 6008 }, { "epoch": 0.38, "grad_norm": 2.4864084445145718, "learning_rate": 7.148486876219235e-06, "loss": 0.2861, "step": 6009 }, { "epoch": 0.38, "grad_norm": 1.7944702242957387, "learning_rate": 7.14756717490167e-06, "loss": 0.3191, "step": 6010 }, { "epoch": 0.38, "grad_norm": 1.9664301090511955, "learning_rate": 7.146647384477725e-06, "loss": 0.3012, "step": 6011 }, { "epoch": 0.38, "grad_norm": 3.66737973962642, "learning_rate": 7.145727504985569e-06, "loss": 0.2933, "step": 6012 }, { "epoch": 0.38, "grad_norm": 3.647342014756224, "learning_rate": 7.144807536463368e-06, "loss": 0.2938, "step": 6013 }, { "epoch": 0.38, "grad_norm": 2.518720756555653, "learning_rate": 7.143887478949292e-06, "loss": 0.3153, "step": 6014 }, { "epoch": 0.38, "grad_norm": 1.7140269567116875, "learning_rate": 7.142967332481516e-06, "loss": 0.3074, "step": 6015 }, { "epoch": 0.38, "grad_norm": 1.9129138897222344, "learning_rate": 7.142047097098219e-06, "loss": 0.3017, "step": 6016 }, { "epoch": 0.38, "grad_norm": 1.826972517723574, "learning_rate": 7.1411267728375845e-06, "loss": 0.3117, "step": 6017 }, { "epoch": 0.38, "grad_norm": 7.980931301259122, "learning_rate": 7.140206359737797e-06, "loss": 0.2969, "step": 6018 }, { "epoch": 0.38, "grad_norm": 2.2998712903592073, "learning_rate": 7.139285857837046e-06, "loss": 0.3169, "step": 6019 }, { "epoch": 0.38, "grad_norm": 1.756500055755841, "learning_rate": 7.138365267173524e-06, "loss": 0.31, "step": 6020 }, { "epoch": 0.38, "grad_norm": 2.2435415782378745, "learning_rate": 7.13744458778543e-06, "loss": 0.2979, "step": 6021 }, { "epoch": 0.38, "grad_norm": 2.228875636301696, "learning_rate": 7.136523819710963e-06, "loss": 0.3237, "step": 6022 }, { "epoch": 0.38, "grad_norm": 1.9203626706075438, "learning_rate": 7.135602962988327e-06, "loss": 0.3089, "step": 6023 }, { "epoch": 0.38, "grad_norm": 3.2807773580502912, "learning_rate": 7.134682017655732e-06, "loss": 0.2801, "step": 6024 }, { "epoch": 0.38, "grad_norm": 1.883383447407521, "learning_rate": 7.1337609837513875e-06, "loss": 0.3036, "step": 6025 }, { "epoch": 0.38, "grad_norm": 1.9989008456799338, "learning_rate": 7.132839861313511e-06, "loss": 0.2943, "step": 6026 }, { "epoch": 0.38, "grad_norm": 2.7139691608818124, "learning_rate": 7.13191865038032e-06, "loss": 0.3214, "step": 6027 }, { "epoch": 0.38, "grad_norm": 4.205294008880047, "learning_rate": 7.130997350990037e-06, "loss": 0.3241, "step": 6028 }, { "epoch": 0.38, "grad_norm": 1.8248584968970416, "learning_rate": 7.130075963180889e-06, "loss": 0.3089, "step": 6029 }, { "epoch": 0.38, "grad_norm": 1.7235284044945178, "learning_rate": 7.129154486991105e-06, "loss": 0.2949, "step": 6030 }, { "epoch": 0.38, "grad_norm": 1.491676776637185, "learning_rate": 7.128232922458922e-06, "loss": 0.2964, "step": 6031 }, { "epoch": 0.38, "grad_norm": 2.141666058168301, "learning_rate": 7.127311269622573e-06, "loss": 0.2879, "step": 6032 }, { "epoch": 0.38, "grad_norm": 2.352111095265622, "learning_rate": 7.126389528520301e-06, "loss": 0.3072, "step": 6033 }, { "epoch": 0.38, "grad_norm": 1.7030797976818755, "learning_rate": 7.125467699190351e-06, "loss": 0.3125, "step": 6034 }, { "epoch": 0.38, "grad_norm": 1.6068189899876875, "learning_rate": 7.1245457816709705e-06, "loss": 0.2837, "step": 6035 }, { "epoch": 0.38, "grad_norm": 0.6235180671792958, "learning_rate": 7.123623776000412e-06, "loss": 0.4872, "step": 6036 }, { "epoch": 0.38, "grad_norm": 2.5349126519788254, "learning_rate": 7.1227016822169315e-06, "loss": 0.3251, "step": 6037 }, { "epoch": 0.38, "grad_norm": 1.72208190723723, "learning_rate": 7.121779500358788e-06, "loss": 0.2987, "step": 6038 }, { "epoch": 0.38, "grad_norm": 2.946054516048329, "learning_rate": 7.120857230464244e-06, "loss": 0.3218, "step": 6039 }, { "epoch": 0.38, "grad_norm": 1.9674156888459624, "learning_rate": 7.119934872571566e-06, "loss": 0.2882, "step": 6040 }, { "epoch": 0.38, "grad_norm": 2.4279407274139766, "learning_rate": 7.119012426719024e-06, "loss": 0.3161, "step": 6041 }, { "epoch": 0.38, "grad_norm": 2.351650021155563, "learning_rate": 7.118089892944894e-06, "loss": 0.3239, "step": 6042 }, { "epoch": 0.38, "grad_norm": 3.0681820473331536, "learning_rate": 7.117167271287453e-06, "loss": 0.2923, "step": 6043 }, { "epoch": 0.38, "grad_norm": 3.118400686194755, "learning_rate": 7.116244561784979e-06, "loss": 0.3165, "step": 6044 }, { "epoch": 0.38, "grad_norm": 2.6274338441549956, "learning_rate": 7.11532176447576e-06, "loss": 0.3317, "step": 6045 }, { "epoch": 0.38, "grad_norm": 1.8604762163536679, "learning_rate": 7.114398879398084e-06, "loss": 0.296, "step": 6046 }, { "epoch": 0.38, "grad_norm": 2.4985839502161586, "learning_rate": 7.113475906590243e-06, "loss": 0.3202, "step": 6047 }, { "epoch": 0.38, "grad_norm": 2.256378121651349, "learning_rate": 7.112552846090533e-06, "loss": 0.3179, "step": 6048 }, { "epoch": 0.38, "grad_norm": 4.719310780296274, "learning_rate": 7.111629697937253e-06, "loss": 0.3108, "step": 6049 }, { "epoch": 0.38, "grad_norm": 2.0849549900381477, "learning_rate": 7.110706462168706e-06, "loss": 0.3093, "step": 6050 }, { "epoch": 0.38, "grad_norm": 2.0654583039100354, "learning_rate": 7.109783138823199e-06, "loss": 0.3036, "step": 6051 }, { "epoch": 0.38, "grad_norm": 3.3209794508592054, "learning_rate": 7.108859727939042e-06, "loss": 0.3176, "step": 6052 }, { "epoch": 0.38, "grad_norm": 2.340267151334955, "learning_rate": 7.107936229554549e-06, "loss": 0.297, "step": 6053 }, { "epoch": 0.38, "grad_norm": 1.9390822601459914, "learning_rate": 7.107012643708039e-06, "loss": 0.2924, "step": 6054 }, { "epoch": 0.38, "grad_norm": 4.77185725744881, "learning_rate": 7.10608897043783e-06, "loss": 0.2911, "step": 6055 }, { "epoch": 0.38, "grad_norm": 1.7767442966723028, "learning_rate": 7.10516520978225e-06, "loss": 0.3039, "step": 6056 }, { "epoch": 0.38, "grad_norm": 6.794659391202409, "learning_rate": 7.104241361779627e-06, "loss": 0.2888, "step": 6057 }, { "epoch": 0.38, "grad_norm": 2.4730653608893003, "learning_rate": 7.10331742646829e-06, "loss": 0.3009, "step": 6058 }, { "epoch": 0.38, "grad_norm": 3.28545990046778, "learning_rate": 7.102393403886578e-06, "loss": 0.3212, "step": 6059 }, { "epoch": 0.38, "grad_norm": 2.170668734452948, "learning_rate": 7.101469294072829e-06, "loss": 0.3019, "step": 6060 }, { "epoch": 0.38, "grad_norm": 2.303197692180006, "learning_rate": 7.100545097065389e-06, "loss": 0.2969, "step": 6061 }, { "epoch": 0.38, "grad_norm": 4.766199437062035, "learning_rate": 7.099620812902599e-06, "loss": 0.2956, "step": 6062 }, { "epoch": 0.38, "grad_norm": 2.234164688795876, "learning_rate": 7.098696441622814e-06, "loss": 0.2914, "step": 6063 }, { "epoch": 0.38, "grad_norm": 2.962299086533189, "learning_rate": 7.097771983264384e-06, "loss": 0.3295, "step": 6064 }, { "epoch": 0.38, "grad_norm": 1.501685559116226, "learning_rate": 7.096847437865671e-06, "loss": 0.2999, "step": 6065 }, { "epoch": 0.38, "grad_norm": 2.0932316992331916, "learning_rate": 7.095922805465031e-06, "loss": 0.3018, "step": 6066 }, { "epoch": 0.38, "grad_norm": 4.123320078935363, "learning_rate": 7.0949980861008315e-06, "loss": 0.2807, "step": 6067 }, { "epoch": 0.38, "grad_norm": 3.0151895169381566, "learning_rate": 7.0940732798114395e-06, "loss": 0.3129, "step": 6068 }, { "epoch": 0.38, "grad_norm": 1.5562491480090626, "learning_rate": 7.0931483866352305e-06, "loss": 0.3018, "step": 6069 }, { "epoch": 0.38, "grad_norm": 2.2349881269372105, "learning_rate": 7.092223406610574e-06, "loss": 0.3057, "step": 6070 }, { "epoch": 0.38, "grad_norm": 2.8705404189940347, "learning_rate": 7.091298339775854e-06, "loss": 0.3209, "step": 6071 }, { "epoch": 0.38, "grad_norm": 1.9862349323270718, "learning_rate": 7.0903731861694505e-06, "loss": 0.304, "step": 6072 }, { "epoch": 0.38, "grad_norm": 3.685636385013489, "learning_rate": 7.089447945829752e-06, "loss": 0.2902, "step": 6073 }, { "epoch": 0.38, "grad_norm": 2.142857306447189, "learning_rate": 7.088522618795145e-06, "loss": 0.2931, "step": 6074 }, { "epoch": 0.38, "grad_norm": 1.3857036789661261, "learning_rate": 7.087597205104026e-06, "loss": 0.3033, "step": 6075 }, { "epoch": 0.38, "grad_norm": 4.160089716025319, "learning_rate": 7.08667170479479e-06, "loss": 0.3069, "step": 6076 }, { "epoch": 0.38, "grad_norm": 2.1547547172595234, "learning_rate": 7.085746117905841e-06, "loss": 0.2862, "step": 6077 }, { "epoch": 0.38, "grad_norm": 1.707743010491154, "learning_rate": 7.084820444475579e-06, "loss": 0.3067, "step": 6078 }, { "epoch": 0.38, "grad_norm": 2.786143828091818, "learning_rate": 7.083894684542413e-06, "loss": 0.3, "step": 6079 }, { "epoch": 0.38, "grad_norm": 4.907577231597122, "learning_rate": 7.082968838144756e-06, "loss": 0.3341, "step": 6080 }, { "epoch": 0.38, "grad_norm": 2.2263893472766974, "learning_rate": 7.082042905321022e-06, "loss": 0.3041, "step": 6081 }, { "epoch": 0.38, "grad_norm": 3.33554888014138, "learning_rate": 7.081116886109629e-06, "loss": 0.3103, "step": 6082 }, { "epoch": 0.38, "grad_norm": 1.8114787057696915, "learning_rate": 7.080190780549002e-06, "loss": 0.3229, "step": 6083 }, { "epoch": 0.38, "grad_norm": 1.9649870410684962, "learning_rate": 7.079264588677564e-06, "loss": 0.3382, "step": 6084 }, { "epoch": 0.38, "grad_norm": 1.799504074440034, "learning_rate": 7.078338310533744e-06, "loss": 0.3073, "step": 6085 }, { "epoch": 0.38, "grad_norm": 2.6390799041427644, "learning_rate": 7.077411946155975e-06, "loss": 0.3015, "step": 6086 }, { "epoch": 0.38, "grad_norm": 3.308849381739574, "learning_rate": 7.076485495582696e-06, "loss": 0.2826, "step": 6087 }, { "epoch": 0.38, "grad_norm": 1.7807209331700014, "learning_rate": 7.0755589588523464e-06, "loss": 0.3127, "step": 6088 }, { "epoch": 0.38, "grad_norm": 2.062526210664507, "learning_rate": 7.074632336003368e-06, "loss": 0.3222, "step": 6089 }, { "epoch": 0.38, "grad_norm": 1.8211249363935218, "learning_rate": 7.0737056270742085e-06, "loss": 0.2989, "step": 6090 }, { "epoch": 0.38, "grad_norm": 3.8539341258880238, "learning_rate": 7.072778832103321e-06, "loss": 0.3089, "step": 6091 }, { "epoch": 0.38, "grad_norm": 1.903834652848273, "learning_rate": 7.071851951129156e-06, "loss": 0.3173, "step": 6092 }, { "epoch": 0.38, "grad_norm": 2.2189344812108156, "learning_rate": 7.070924984190175e-06, "loss": 0.2947, "step": 6093 }, { "epoch": 0.38, "grad_norm": 0.6056192596870574, "learning_rate": 7.069997931324837e-06, "loss": 0.5081, "step": 6094 }, { "epoch": 0.38, "grad_norm": 2.701728321342455, "learning_rate": 7.069070792571608e-06, "loss": 0.3021, "step": 6095 }, { "epoch": 0.38, "grad_norm": 2.3949983094981673, "learning_rate": 7.068143567968958e-06, "loss": 0.3015, "step": 6096 }, { "epoch": 0.38, "grad_norm": 2.2379630527545697, "learning_rate": 7.067216257555357e-06, "loss": 0.3115, "step": 6097 }, { "epoch": 0.38, "grad_norm": 0.5775514592063432, "learning_rate": 7.0662888613692815e-06, "loss": 0.4892, "step": 6098 }, { "epoch": 0.38, "grad_norm": 3.39311315109939, "learning_rate": 7.065361379449213e-06, "loss": 0.3138, "step": 6099 }, { "epoch": 0.38, "grad_norm": 3.2324433257890504, "learning_rate": 7.06443381183363e-06, "loss": 0.2861, "step": 6100 }, { "epoch": 0.38, "grad_norm": 1.8595551351518653, "learning_rate": 7.063506158561022e-06, "loss": 0.2958, "step": 6101 }, { "epoch": 0.38, "grad_norm": 2.989934325758585, "learning_rate": 7.062578419669877e-06, "loss": 0.2935, "step": 6102 }, { "epoch": 0.38, "grad_norm": 1.4691404106150727, "learning_rate": 7.061650595198692e-06, "loss": 0.3079, "step": 6103 }, { "epoch": 0.38, "grad_norm": 1.7616358926720772, "learning_rate": 7.060722685185961e-06, "loss": 0.2921, "step": 6104 }, { "epoch": 0.38, "grad_norm": 4.8235942004169186, "learning_rate": 7.0597946896701854e-06, "loss": 0.2923, "step": 6105 }, { "epoch": 0.38, "grad_norm": 1.6410184610210388, "learning_rate": 7.05886660868987e-06, "loss": 0.2969, "step": 6106 }, { "epoch": 0.38, "grad_norm": 1.6815154076557426, "learning_rate": 7.057938442283523e-06, "loss": 0.2984, "step": 6107 }, { "epoch": 0.38, "grad_norm": 1.720821468920267, "learning_rate": 7.057010190489651e-06, "loss": 0.289, "step": 6108 }, { "epoch": 0.38, "grad_norm": 1.6209093699482704, "learning_rate": 7.056081853346776e-06, "loss": 0.2944, "step": 6109 }, { "epoch": 0.38, "grad_norm": 0.6758360820245468, "learning_rate": 7.055153430893412e-06, "loss": 0.4849, "step": 6110 }, { "epoch": 0.38, "grad_norm": 2.8280460061261037, "learning_rate": 7.054224923168083e-06, "loss": 0.3002, "step": 6111 }, { "epoch": 0.38, "grad_norm": 1.9719266559597206, "learning_rate": 7.053296330209309e-06, "loss": 0.3061, "step": 6112 }, { "epoch": 0.38, "grad_norm": 3.6649698942428275, "learning_rate": 7.052367652055628e-06, "loss": 0.3133, "step": 6113 }, { "epoch": 0.38, "grad_norm": 5.329141100603012, "learning_rate": 7.051438888745566e-06, "loss": 0.322, "step": 6114 }, { "epoch": 0.38, "grad_norm": 1.6134970792257957, "learning_rate": 7.050510040317661e-06, "loss": 0.3054, "step": 6115 }, { "epoch": 0.38, "grad_norm": 5.600146444124667, "learning_rate": 7.0495811068104505e-06, "loss": 0.3033, "step": 6116 }, { "epoch": 0.38, "grad_norm": 4.484537435259375, "learning_rate": 7.048652088262481e-06, "loss": 0.3137, "step": 6117 }, { "epoch": 0.38, "grad_norm": 2.037218912035509, "learning_rate": 7.047722984712298e-06, "loss": 0.3306, "step": 6118 }, { "epoch": 0.38, "grad_norm": 2.391179915946461, "learning_rate": 7.0467937961984505e-06, "loss": 0.2994, "step": 6119 }, { "epoch": 0.38, "grad_norm": 2.731809271949276, "learning_rate": 7.045864522759492e-06, "loss": 0.3172, "step": 6120 }, { "epoch": 0.38, "grad_norm": 1.8965117256482238, "learning_rate": 7.044935164433982e-06, "loss": 0.2978, "step": 6121 }, { "epoch": 0.39, "grad_norm": 2.130368883395997, "learning_rate": 7.0440057212604784e-06, "loss": 0.3091, "step": 6122 }, { "epoch": 0.39, "grad_norm": 1.6828950671125487, "learning_rate": 7.043076193277548e-06, "loss": 0.2963, "step": 6123 }, { "epoch": 0.39, "grad_norm": 2.812860545122394, "learning_rate": 7.042146580523757e-06, "loss": 0.2947, "step": 6124 }, { "epoch": 0.39, "grad_norm": 3.2461511729264543, "learning_rate": 7.0412168830376785e-06, "loss": 0.2904, "step": 6125 }, { "epoch": 0.39, "grad_norm": 5.780148370156523, "learning_rate": 7.040287100857885e-06, "loss": 0.3078, "step": 6126 }, { "epoch": 0.39, "grad_norm": 2.443994902112538, "learning_rate": 7.039357234022954e-06, "loss": 0.2968, "step": 6127 }, { "epoch": 0.39, "grad_norm": 1.738888871812275, "learning_rate": 7.03842728257147e-06, "loss": 0.2973, "step": 6128 }, { "epoch": 0.39, "grad_norm": 1.7900939277729429, "learning_rate": 7.037497246542018e-06, "loss": 0.3074, "step": 6129 }, { "epoch": 0.39, "grad_norm": 4.45456775526933, "learning_rate": 7.036567125973187e-06, "loss": 0.2964, "step": 6130 }, { "epoch": 0.39, "grad_norm": 2.7554449753870167, "learning_rate": 7.035636920903568e-06, "loss": 0.304, "step": 6131 }, { "epoch": 0.39, "grad_norm": 1.5027604018520393, "learning_rate": 7.034706631371756e-06, "loss": 0.3056, "step": 6132 }, { "epoch": 0.39, "grad_norm": 3.6697743466379795, "learning_rate": 7.033776257416354e-06, "loss": 0.3144, "step": 6133 }, { "epoch": 0.39, "grad_norm": 2.565734714733023, "learning_rate": 7.03284579907596e-06, "loss": 0.2951, "step": 6134 }, { "epoch": 0.39, "grad_norm": 2.1532350004877565, "learning_rate": 7.031915256389186e-06, "loss": 0.3155, "step": 6135 }, { "epoch": 0.39, "grad_norm": 2.8870110224742196, "learning_rate": 7.030984629394637e-06, "loss": 0.3057, "step": 6136 }, { "epoch": 0.39, "grad_norm": 2.501300650823364, "learning_rate": 7.03005391813093e-06, "loss": 0.3081, "step": 6137 }, { "epoch": 0.39, "grad_norm": 3.4477855092138006, "learning_rate": 7.029123122636678e-06, "loss": 0.3133, "step": 6138 }, { "epoch": 0.39, "grad_norm": 1.3984298105624784, "learning_rate": 7.028192242950506e-06, "loss": 0.3016, "step": 6139 }, { "epoch": 0.39, "grad_norm": 5.2803001292669896, "learning_rate": 7.027261279111033e-06, "loss": 0.3061, "step": 6140 }, { "epoch": 0.39, "grad_norm": 42.62180019802184, "learning_rate": 7.02633023115689e-06, "loss": 0.3123, "step": 6141 }, { "epoch": 0.39, "grad_norm": 1.8786333964794473, "learning_rate": 7.025399099126705e-06, "loss": 0.2947, "step": 6142 }, { "epoch": 0.39, "grad_norm": 0.6413535434928973, "learning_rate": 7.024467883059116e-06, "loss": 0.5041, "step": 6143 }, { "epoch": 0.39, "grad_norm": 1.591790878851951, "learning_rate": 7.023536582992758e-06, "loss": 0.2987, "step": 6144 }, { "epoch": 0.39, "grad_norm": 2.416299671075495, "learning_rate": 7.022605198966274e-06, "loss": 0.2956, "step": 6145 }, { "epoch": 0.39, "grad_norm": 2.183675660620761, "learning_rate": 7.021673731018305e-06, "loss": 0.2978, "step": 6146 }, { "epoch": 0.39, "grad_norm": 2.2303023644855005, "learning_rate": 7.0207421791875045e-06, "loss": 0.3042, "step": 6147 }, { "epoch": 0.39, "grad_norm": 2.1889082267322033, "learning_rate": 7.01981054351252e-06, "loss": 0.2879, "step": 6148 }, { "epoch": 0.39, "grad_norm": 5.339385146404882, "learning_rate": 7.0188788240320095e-06, "loss": 0.3128, "step": 6149 }, { "epoch": 0.39, "grad_norm": 1.9486207992351334, "learning_rate": 7.017947020784629e-06, "loss": 0.307, "step": 6150 }, { "epoch": 0.39, "grad_norm": 7.696794029026286, "learning_rate": 7.017015133809044e-06, "loss": 0.2968, "step": 6151 }, { "epoch": 0.39, "grad_norm": 2.0364165147095457, "learning_rate": 7.016083163143918e-06, "loss": 0.2984, "step": 6152 }, { "epoch": 0.39, "grad_norm": 1.7201399926681409, "learning_rate": 7.015151108827921e-06, "loss": 0.2891, "step": 6153 }, { "epoch": 0.39, "grad_norm": 2.2876844560550476, "learning_rate": 7.014218970899724e-06, "loss": 0.3287, "step": 6154 }, { "epoch": 0.39, "grad_norm": 1.5222707925705408, "learning_rate": 7.013286749398008e-06, "loss": 0.3477, "step": 6155 }, { "epoch": 0.39, "grad_norm": 3.754050839924, "learning_rate": 7.0123544443614445e-06, "loss": 0.2759, "step": 6156 }, { "epoch": 0.39, "grad_norm": 2.2849969363113685, "learning_rate": 7.011422055828721e-06, "loss": 0.3232, "step": 6157 }, { "epoch": 0.39, "grad_norm": 1.9947993971964004, "learning_rate": 7.010489583838525e-06, "loss": 0.3348, "step": 6158 }, { "epoch": 0.39, "grad_norm": 2.0603287475019276, "learning_rate": 7.009557028429547e-06, "loss": 0.2989, "step": 6159 }, { "epoch": 0.39, "grad_norm": 1.601370430986458, "learning_rate": 7.008624389640476e-06, "loss": 0.3173, "step": 6160 }, { "epoch": 0.39, "grad_norm": 3.2302915464410913, "learning_rate": 7.0076916675100115e-06, "loss": 0.3141, "step": 6161 }, { "epoch": 0.39, "grad_norm": 3.503792068663737, "learning_rate": 7.0067588620768535e-06, "loss": 0.3128, "step": 6162 }, { "epoch": 0.39, "grad_norm": 1.704506418702534, "learning_rate": 7.005825973379707e-06, "loss": 0.2945, "step": 6163 }, { "epoch": 0.39, "grad_norm": 2.332277329306159, "learning_rate": 7.004893001457277e-06, "loss": 0.3014, "step": 6164 }, { "epoch": 0.39, "grad_norm": 1.6437128335933913, "learning_rate": 7.003959946348277e-06, "loss": 0.3038, "step": 6165 }, { "epoch": 0.39, "grad_norm": 2.875263815796361, "learning_rate": 7.003026808091417e-06, "loss": 0.308, "step": 6166 }, { "epoch": 0.39, "grad_norm": 1.3694084605402639, "learning_rate": 7.002093586725419e-06, "loss": 0.3067, "step": 6167 }, { "epoch": 0.39, "grad_norm": 1.8694930266490688, "learning_rate": 7.001160282289e-06, "loss": 0.305, "step": 6168 }, { "epoch": 0.39, "grad_norm": 1.7609396688890446, "learning_rate": 7.000226894820888e-06, "loss": 0.2942, "step": 6169 }, { "epoch": 0.39, "grad_norm": 2.0930306386116806, "learning_rate": 6.99929342435981e-06, "loss": 0.323, "step": 6170 }, { "epoch": 0.39, "grad_norm": 2.8113095138643645, "learning_rate": 6.998359870944495e-06, "loss": 0.2783, "step": 6171 }, { "epoch": 0.39, "grad_norm": 2.371474334736473, "learning_rate": 6.99742623461368e-06, "loss": 0.3029, "step": 6172 }, { "epoch": 0.39, "grad_norm": 1.530787928579816, "learning_rate": 6.996492515406104e-06, "loss": 0.3019, "step": 6173 }, { "epoch": 0.39, "grad_norm": 2.1724544346209704, "learning_rate": 6.995558713360505e-06, "loss": 0.3042, "step": 6174 }, { "epoch": 0.39, "grad_norm": 2.301487508566316, "learning_rate": 6.994624828515632e-06, "loss": 0.2969, "step": 6175 }, { "epoch": 0.39, "grad_norm": 1.9765221901474412, "learning_rate": 6.993690860910232e-06, "loss": 0.3137, "step": 6176 }, { "epoch": 0.39, "grad_norm": 2.297429021572898, "learning_rate": 6.992756810583057e-06, "loss": 0.3203, "step": 6177 }, { "epoch": 0.39, "grad_norm": 1.4405690815449252, "learning_rate": 6.991822677572862e-06, "loss": 0.2862, "step": 6178 }, { "epoch": 0.39, "grad_norm": 1.9009653441134091, "learning_rate": 6.9908884619184054e-06, "loss": 0.2977, "step": 6179 }, { "epoch": 0.39, "grad_norm": 4.049451385108946, "learning_rate": 6.98995416365845e-06, "loss": 0.302, "step": 6180 }, { "epoch": 0.39, "grad_norm": 2.8342916199501595, "learning_rate": 6.989019782831764e-06, "loss": 0.3101, "step": 6181 }, { "epoch": 0.39, "grad_norm": 3.0547641381279673, "learning_rate": 6.988085319477114e-06, "loss": 0.3038, "step": 6182 }, { "epoch": 0.39, "grad_norm": 2.3825054175343645, "learning_rate": 6.987150773633271e-06, "loss": 0.3111, "step": 6183 }, { "epoch": 0.39, "grad_norm": 1.8135322516377184, "learning_rate": 6.9862161453390145e-06, "loss": 0.2849, "step": 6184 }, { "epoch": 0.39, "grad_norm": 1.4229262153991158, "learning_rate": 6.9852814346331225e-06, "loss": 0.3119, "step": 6185 }, { "epoch": 0.39, "grad_norm": 4.675703339366878, "learning_rate": 6.984346641554376e-06, "loss": 0.2941, "step": 6186 }, { "epoch": 0.39, "grad_norm": 2.055667374737458, "learning_rate": 6.983411766141563e-06, "loss": 0.3274, "step": 6187 }, { "epoch": 0.39, "grad_norm": 2.6502831156998723, "learning_rate": 6.9824768084334736e-06, "loss": 0.2932, "step": 6188 }, { "epoch": 0.39, "grad_norm": 1.7299002277457307, "learning_rate": 6.9815417684689e-06, "loss": 0.3117, "step": 6189 }, { "epoch": 0.39, "grad_norm": 1.6634212748601143, "learning_rate": 6.980606646286637e-06, "loss": 0.3262, "step": 6190 }, { "epoch": 0.39, "grad_norm": 2.774921914155989, "learning_rate": 6.97967144192549e-06, "loss": 0.3047, "step": 6191 }, { "epoch": 0.39, "grad_norm": 2.1742148632581366, "learning_rate": 6.978736155424255e-06, "loss": 0.2885, "step": 6192 }, { "epoch": 0.39, "grad_norm": 2.024499807094335, "learning_rate": 6.977800786821744e-06, "loss": 0.2962, "step": 6193 }, { "epoch": 0.39, "grad_norm": 2.601096611957067, "learning_rate": 6.976865336156765e-06, "loss": 0.3191, "step": 6194 }, { "epoch": 0.39, "grad_norm": 1.990699948167919, "learning_rate": 6.975929803468133e-06, "loss": 0.3044, "step": 6195 }, { "epoch": 0.39, "grad_norm": 1.4728847852061975, "learning_rate": 6.974994188794662e-06, "loss": 0.2947, "step": 6196 }, { "epoch": 0.39, "grad_norm": 1.4848283441348078, "learning_rate": 6.974058492175176e-06, "loss": 0.2987, "step": 6197 }, { "epoch": 0.39, "grad_norm": 1.5987230287646015, "learning_rate": 6.973122713648495e-06, "loss": 0.2955, "step": 6198 }, { "epoch": 0.39, "grad_norm": 3.8083417251027383, "learning_rate": 6.97218685325345e-06, "loss": 0.293, "step": 6199 }, { "epoch": 0.39, "grad_norm": 1.970791728847927, "learning_rate": 6.9712509110288686e-06, "loss": 0.2979, "step": 6200 }, { "epoch": 0.39, "grad_norm": 2.2241409000408114, "learning_rate": 6.970314887013585e-06, "loss": 0.3084, "step": 6201 }, { "epoch": 0.39, "grad_norm": 1.776637136199845, "learning_rate": 6.969378781246436e-06, "loss": 0.3005, "step": 6202 }, { "epoch": 0.39, "grad_norm": 2.3347987153060936, "learning_rate": 6.968442593766266e-06, "loss": 0.319, "step": 6203 }, { "epoch": 0.39, "grad_norm": 2.2445610065185355, "learning_rate": 6.967506324611915e-06, "loss": 0.314, "step": 6204 }, { "epoch": 0.39, "grad_norm": 1.6001916367689244, "learning_rate": 6.9665699738222316e-06, "loss": 0.2999, "step": 6205 }, { "epoch": 0.39, "grad_norm": 4.2452831174311605, "learning_rate": 6.965633541436066e-06, "loss": 0.3092, "step": 6206 }, { "epoch": 0.39, "grad_norm": 2.9719513079696007, "learning_rate": 6.964697027492277e-06, "loss": 0.3064, "step": 6207 }, { "epoch": 0.39, "grad_norm": 3.3324282619334165, "learning_rate": 6.963760432029716e-06, "loss": 0.3328, "step": 6208 }, { "epoch": 0.39, "grad_norm": 3.3561641089060426, "learning_rate": 6.9628237550872465e-06, "loss": 0.3092, "step": 6209 }, { "epoch": 0.39, "grad_norm": 2.5942604909686575, "learning_rate": 6.961886996703733e-06, "loss": 0.3166, "step": 6210 }, { "epoch": 0.39, "grad_norm": 152.38072679946387, "learning_rate": 6.960950156918045e-06, "loss": 0.2966, "step": 6211 }, { "epoch": 0.39, "grad_norm": 1.6104351817313225, "learning_rate": 6.960013235769051e-06, "loss": 0.287, "step": 6212 }, { "epoch": 0.39, "grad_norm": 1.9535967986263134, "learning_rate": 6.959076233295625e-06, "loss": 0.3308, "step": 6213 }, { "epoch": 0.39, "grad_norm": 27.233761279908748, "learning_rate": 6.958139149536648e-06, "loss": 0.3323, "step": 6214 }, { "epoch": 0.39, "grad_norm": 2.0368906440057692, "learning_rate": 6.957201984531e-06, "loss": 0.3064, "step": 6215 }, { "epoch": 0.39, "grad_norm": 0.6859032984801705, "learning_rate": 6.956264738317564e-06, "loss": 0.5166, "step": 6216 }, { "epoch": 0.39, "grad_norm": 7.352088624571709, "learning_rate": 6.9553274109352305e-06, "loss": 0.3143, "step": 6217 }, { "epoch": 0.39, "grad_norm": 3.8277867727762485, "learning_rate": 6.954390002422889e-06, "loss": 0.2988, "step": 6218 }, { "epoch": 0.39, "grad_norm": 7.602359366787512, "learning_rate": 6.953452512819435e-06, "loss": 0.3167, "step": 6219 }, { "epoch": 0.39, "grad_norm": 1.5034782055592268, "learning_rate": 6.952514942163766e-06, "loss": 0.2939, "step": 6220 }, { "epoch": 0.39, "grad_norm": 1.8499237300295603, "learning_rate": 6.951577290494784e-06, "loss": 0.3092, "step": 6221 }, { "epoch": 0.39, "grad_norm": 2.5586430421078536, "learning_rate": 6.950639557851395e-06, "loss": 0.3071, "step": 6222 }, { "epoch": 0.39, "grad_norm": 31.27574456336476, "learning_rate": 6.949701744272506e-06, "loss": 0.3065, "step": 6223 }, { "epoch": 0.39, "grad_norm": 2.0730910107267553, "learning_rate": 6.9487638497970266e-06, "loss": 0.3084, "step": 6224 }, { "epoch": 0.39, "grad_norm": 2.949287808216297, "learning_rate": 6.947825874463876e-06, "loss": 0.3057, "step": 6225 }, { "epoch": 0.39, "grad_norm": 1.9460132840550055, "learning_rate": 6.946887818311969e-06, "loss": 0.292, "step": 6226 }, { "epoch": 0.39, "grad_norm": 1.5116987370461523, "learning_rate": 6.945949681380229e-06, "loss": 0.3014, "step": 6227 }, { "epoch": 0.39, "grad_norm": 1.727836623745217, "learning_rate": 6.9450114637075785e-06, "loss": 0.3037, "step": 6228 }, { "epoch": 0.39, "grad_norm": 2.5963086534531157, "learning_rate": 6.944073165332949e-06, "loss": 0.303, "step": 6229 }, { "epoch": 0.39, "grad_norm": 2.5931571148043875, "learning_rate": 6.943134786295272e-06, "loss": 0.3196, "step": 6230 }, { "epoch": 0.39, "grad_norm": 1.749088870863373, "learning_rate": 6.942196326633479e-06, "loss": 0.298, "step": 6231 }, { "epoch": 0.39, "grad_norm": 2.0658775691579407, "learning_rate": 6.941257786386511e-06, "loss": 0.3008, "step": 6232 }, { "epoch": 0.39, "grad_norm": 1.504533425052485, "learning_rate": 6.940319165593312e-06, "loss": 0.2871, "step": 6233 }, { "epoch": 0.39, "grad_norm": 1.921881588484143, "learning_rate": 6.939380464292822e-06, "loss": 0.3153, "step": 6234 }, { "epoch": 0.39, "grad_norm": 1.8339638466264867, "learning_rate": 6.938441682523992e-06, "loss": 0.3271, "step": 6235 }, { "epoch": 0.39, "grad_norm": 3.858992607009354, "learning_rate": 6.9375028203257745e-06, "loss": 0.3066, "step": 6236 }, { "epoch": 0.39, "grad_norm": 2.5885599558565247, "learning_rate": 6.936563877737124e-06, "loss": 0.3032, "step": 6237 }, { "epoch": 0.39, "grad_norm": 1.4347952306799847, "learning_rate": 6.935624854796996e-06, "loss": 0.2824, "step": 6238 }, { "epoch": 0.39, "grad_norm": 2.0816583249155474, "learning_rate": 6.934685751544356e-06, "loss": 0.2944, "step": 6239 }, { "epoch": 0.39, "grad_norm": 1.806656978109954, "learning_rate": 6.933746568018168e-06, "loss": 0.2914, "step": 6240 }, { "epoch": 0.39, "grad_norm": 2.12612682284159, "learning_rate": 6.932807304257401e-06, "loss": 0.2904, "step": 6241 }, { "epoch": 0.39, "grad_norm": 2.2979071889701084, "learning_rate": 6.931867960301024e-06, "loss": 0.3017, "step": 6242 }, { "epoch": 0.39, "grad_norm": 2.028160544617963, "learning_rate": 6.930928536188015e-06, "loss": 0.3065, "step": 6243 }, { "epoch": 0.39, "grad_norm": 0.7094229070482596, "learning_rate": 6.929989031957352e-06, "loss": 0.4919, "step": 6244 }, { "epoch": 0.39, "grad_norm": 2.496110455846103, "learning_rate": 6.929049447648015e-06, "loss": 0.3175, "step": 6245 }, { "epoch": 0.39, "grad_norm": 2.453174483302288, "learning_rate": 6.928109783298989e-06, "loss": 0.3055, "step": 6246 }, { "epoch": 0.39, "grad_norm": 4.99618496581895, "learning_rate": 6.927170038949267e-06, "loss": 0.3059, "step": 6247 }, { "epoch": 0.39, "grad_norm": 3.6402182504686644, "learning_rate": 6.926230214637833e-06, "loss": 0.3034, "step": 6248 }, { "epoch": 0.39, "grad_norm": 1.5233747917849692, "learning_rate": 6.925290310403689e-06, "loss": 0.3063, "step": 6249 }, { "epoch": 0.39, "grad_norm": 1.6409800461397064, "learning_rate": 6.9243503262858285e-06, "loss": 0.3051, "step": 6250 }, { "epoch": 0.39, "grad_norm": 1.977322549412839, "learning_rate": 6.9234102623232555e-06, "loss": 0.2981, "step": 6251 }, { "epoch": 0.39, "grad_norm": 0.656496815216476, "learning_rate": 6.922470118554975e-06, "loss": 0.5009, "step": 6252 }, { "epoch": 0.39, "grad_norm": 1.431708885245252, "learning_rate": 6.921529895019995e-06, "loss": 0.2936, "step": 6253 }, { "epoch": 0.39, "grad_norm": 2.556348654702341, "learning_rate": 6.920589591757324e-06, "loss": 0.3245, "step": 6254 }, { "epoch": 0.39, "grad_norm": 4.667316568236721, "learning_rate": 6.919649208805982e-06, "loss": 0.3048, "step": 6255 }, { "epoch": 0.39, "grad_norm": 7.935196027183069, "learning_rate": 6.9187087462049825e-06, "loss": 0.2968, "step": 6256 }, { "epoch": 0.39, "grad_norm": 5.454611931272937, "learning_rate": 6.917768203993351e-06, "loss": 0.3067, "step": 6257 }, { "epoch": 0.39, "grad_norm": 3.5713638831818426, "learning_rate": 6.91682758221011e-06, "loss": 0.3024, "step": 6258 }, { "epoch": 0.39, "grad_norm": 1.5883838395154235, "learning_rate": 6.915886880894288e-06, "loss": 0.3081, "step": 6259 }, { "epoch": 0.39, "grad_norm": 4.262431695447782, "learning_rate": 6.914946100084916e-06, "loss": 0.3142, "step": 6260 }, { "epoch": 0.39, "grad_norm": 1.8432977159515525, "learning_rate": 6.914005239821029e-06, "loss": 0.295, "step": 6261 }, { "epoch": 0.39, "grad_norm": 1.4358082624150301, "learning_rate": 6.913064300141664e-06, "loss": 0.3194, "step": 6262 }, { "epoch": 0.39, "grad_norm": 1.5763974867524773, "learning_rate": 6.912123281085865e-06, "loss": 0.2939, "step": 6263 }, { "epoch": 0.39, "grad_norm": 1.9178292226801221, "learning_rate": 6.911182182692674e-06, "loss": 0.3076, "step": 6264 }, { "epoch": 0.39, "grad_norm": 2.1300446005851024, "learning_rate": 6.910241005001139e-06, "loss": 0.3028, "step": 6265 }, { "epoch": 0.39, "grad_norm": 2.342598594435115, "learning_rate": 6.9092997480503125e-06, "loss": 0.2987, "step": 6266 }, { "epoch": 0.39, "grad_norm": 2.8683884499375756, "learning_rate": 6.908358411879249e-06, "loss": 0.3218, "step": 6267 }, { "epoch": 0.39, "grad_norm": 1.8350418262789883, "learning_rate": 6.907416996527003e-06, "loss": 0.2927, "step": 6268 }, { "epoch": 0.39, "grad_norm": 1.4211472264303675, "learning_rate": 6.906475502032639e-06, "loss": 0.2996, "step": 6269 }, { "epoch": 0.39, "grad_norm": 1.7646794548493567, "learning_rate": 6.90553392843522e-06, "loss": 0.2933, "step": 6270 }, { "epoch": 0.39, "grad_norm": 3.2783824298141386, "learning_rate": 6.904592275773816e-06, "loss": 0.3227, "step": 6271 }, { "epoch": 0.39, "grad_norm": 1.576439189656656, "learning_rate": 6.9036505440874915e-06, "loss": 0.3012, "step": 6272 }, { "epoch": 0.39, "grad_norm": 2.6992834889994706, "learning_rate": 6.902708733415328e-06, "loss": 0.3077, "step": 6273 }, { "epoch": 0.39, "grad_norm": 2.317652696958572, "learning_rate": 6.901766843796398e-06, "loss": 0.3153, "step": 6274 }, { "epoch": 0.39, "grad_norm": 2.432923743401709, "learning_rate": 6.900824875269785e-06, "loss": 0.3336, "step": 6275 }, { "epoch": 0.39, "grad_norm": 1.4839330873472332, "learning_rate": 6.8998828278745686e-06, "loss": 0.3213, "step": 6276 }, { "epoch": 0.39, "grad_norm": 1.5807956176225497, "learning_rate": 6.898940701649842e-06, "loss": 0.2924, "step": 6277 }, { "epoch": 0.39, "grad_norm": 3.3568996076431077, "learning_rate": 6.8979984966346914e-06, "loss": 0.3095, "step": 6278 }, { "epoch": 0.39, "grad_norm": 1.43490768163794, "learning_rate": 6.897056212868214e-06, "loss": 0.3039, "step": 6279 }, { "epoch": 0.39, "grad_norm": 1.9956618792777565, "learning_rate": 6.8961138503895005e-06, "loss": 0.3061, "step": 6280 }, { "epoch": 0.4, "grad_norm": 1.9596725519419582, "learning_rate": 6.89517140923766e-06, "loss": 0.3204, "step": 6281 }, { "epoch": 0.4, "grad_norm": 1.7387812431055278, "learning_rate": 6.89422888945179e-06, "loss": 0.2911, "step": 6282 }, { "epoch": 0.4, "grad_norm": 2.4441244823517567, "learning_rate": 6.893286291071e-06, "loss": 0.3176, "step": 6283 }, { "epoch": 0.4, "grad_norm": 1.0913105776521217, "learning_rate": 6.892343614134395e-06, "loss": 0.2994, "step": 6284 }, { "epoch": 0.4, "grad_norm": 1.550371241432521, "learning_rate": 6.891400858681097e-06, "loss": 0.3072, "step": 6285 }, { "epoch": 0.4, "grad_norm": 1.4329921081678139, "learning_rate": 6.890458024750214e-06, "loss": 0.3028, "step": 6286 }, { "epoch": 0.4, "grad_norm": 1.6129288678403952, "learning_rate": 6.889515112380871e-06, "loss": 0.3189, "step": 6287 }, { "epoch": 0.4, "grad_norm": 1.6080109905841038, "learning_rate": 6.88857212161219e-06, "loss": 0.3228, "step": 6288 }, { "epoch": 0.4, "grad_norm": 1.3992892619014397, "learning_rate": 6.887629052483299e-06, "loss": 0.2798, "step": 6289 }, { "epoch": 0.4, "grad_norm": 4.384952561702454, "learning_rate": 6.886685905033324e-06, "loss": 0.3005, "step": 6290 }, { "epoch": 0.4, "grad_norm": 1.456135948922505, "learning_rate": 6.885742679301399e-06, "loss": 0.3127, "step": 6291 }, { "epoch": 0.4, "grad_norm": 1.424520455355706, "learning_rate": 6.884799375326662e-06, "loss": 0.3069, "step": 6292 }, { "epoch": 0.4, "grad_norm": 1.9529667834212303, "learning_rate": 6.883855993148252e-06, "loss": 0.3136, "step": 6293 }, { "epoch": 0.4, "grad_norm": 1.515609439982961, "learning_rate": 6.882912532805308e-06, "loss": 0.3064, "step": 6294 }, { "epoch": 0.4, "grad_norm": 1.9749881320005336, "learning_rate": 6.8819689943369805e-06, "loss": 0.3119, "step": 6295 }, { "epoch": 0.4, "grad_norm": 1.952786377641116, "learning_rate": 6.881025377782415e-06, "loss": 0.318, "step": 6296 }, { "epoch": 0.4, "grad_norm": 1.8379560886548978, "learning_rate": 6.880081683180768e-06, "loss": 0.313, "step": 6297 }, { "epoch": 0.4, "grad_norm": 1.1068557721472867, "learning_rate": 6.879137910571191e-06, "loss": 0.2853, "step": 6298 }, { "epoch": 0.4, "grad_norm": 1.9633950582282589, "learning_rate": 6.878194059992846e-06, "loss": 0.2957, "step": 6299 }, { "epoch": 0.4, "grad_norm": 1.4827048174594124, "learning_rate": 6.8772501314848915e-06, "loss": 0.3068, "step": 6300 }, { "epoch": 0.4, "grad_norm": 1.4277887708746932, "learning_rate": 6.876306125086496e-06, "loss": 0.2938, "step": 6301 }, { "epoch": 0.4, "grad_norm": 1.445439710481941, "learning_rate": 6.8753620408368235e-06, "loss": 0.2964, "step": 6302 }, { "epoch": 0.4, "grad_norm": 2.037116657515197, "learning_rate": 6.8744178787750526e-06, "loss": 0.3028, "step": 6303 }, { "epoch": 0.4, "grad_norm": 2.177952274362125, "learning_rate": 6.873473638940354e-06, "loss": 0.2987, "step": 6304 }, { "epoch": 0.4, "grad_norm": 1.9791992705023835, "learning_rate": 6.872529321371906e-06, "loss": 0.3175, "step": 6305 }, { "epoch": 0.4, "grad_norm": 1.6868036208965926, "learning_rate": 6.87158492610889e-06, "loss": 0.3118, "step": 6306 }, { "epoch": 0.4, "grad_norm": 3.198675008942469, "learning_rate": 6.870640453190491e-06, "loss": 0.3157, "step": 6307 }, { "epoch": 0.4, "grad_norm": 2.433621451491889, "learning_rate": 6.869695902655898e-06, "loss": 0.304, "step": 6308 }, { "epoch": 0.4, "grad_norm": 1.9460073702454201, "learning_rate": 6.868751274544301e-06, "loss": 0.294, "step": 6309 }, { "epoch": 0.4, "grad_norm": 1.9670261253683712, "learning_rate": 6.867806568894893e-06, "loss": 0.311, "step": 6310 }, { "epoch": 0.4, "grad_norm": 0.6591038286039345, "learning_rate": 6.866861785746873e-06, "loss": 0.5118, "step": 6311 }, { "epoch": 0.4, "grad_norm": 1.4779175952239088, "learning_rate": 6.865916925139442e-06, "loss": 0.3067, "step": 6312 }, { "epoch": 0.4, "grad_norm": 2.511401538514108, "learning_rate": 6.864971987111804e-06, "loss": 0.3111, "step": 6313 }, { "epoch": 0.4, "grad_norm": 1.692742950586186, "learning_rate": 6.864026971703166e-06, "loss": 0.3071, "step": 6314 }, { "epoch": 0.4, "grad_norm": 5.481765192599177, "learning_rate": 6.863081878952738e-06, "loss": 0.3295, "step": 6315 }, { "epoch": 0.4, "grad_norm": 1.8603319326306396, "learning_rate": 6.8621367088997325e-06, "loss": 0.3238, "step": 6316 }, { "epoch": 0.4, "grad_norm": 1.715309388077017, "learning_rate": 6.8611914615833676e-06, "loss": 0.2939, "step": 6317 }, { "epoch": 0.4, "grad_norm": 2.9872358204036504, "learning_rate": 6.860246137042863e-06, "loss": 0.2956, "step": 6318 }, { "epoch": 0.4, "grad_norm": 1.8065665490318836, "learning_rate": 6.859300735317444e-06, "loss": 0.3002, "step": 6319 }, { "epoch": 0.4, "grad_norm": 1.342606612220326, "learning_rate": 6.858355256446333e-06, "loss": 0.3155, "step": 6320 }, { "epoch": 0.4, "grad_norm": 7.590118021556689, "learning_rate": 6.857409700468762e-06, "loss": 0.2907, "step": 6321 }, { "epoch": 0.4, "grad_norm": 1.921239443935161, "learning_rate": 6.856464067423963e-06, "loss": 0.3159, "step": 6322 }, { "epoch": 0.4, "grad_norm": 2.574425643570183, "learning_rate": 6.855518357351174e-06, "loss": 0.3008, "step": 6323 }, { "epoch": 0.4, "grad_norm": 1.5398285287025202, "learning_rate": 6.854572570289632e-06, "loss": 0.3065, "step": 6324 }, { "epoch": 0.4, "grad_norm": 2.8041753943042287, "learning_rate": 6.853626706278579e-06, "loss": 0.3078, "step": 6325 }, { "epoch": 0.4, "grad_norm": 17.057675770228045, "learning_rate": 6.852680765357262e-06, "loss": 0.3064, "step": 6326 }, { "epoch": 0.4, "grad_norm": 1.4892062230634326, "learning_rate": 6.85173474756493e-06, "loss": 0.31, "step": 6327 }, { "epoch": 0.4, "grad_norm": 2.0511590689152306, "learning_rate": 6.850788652940832e-06, "loss": 0.3012, "step": 6328 }, { "epoch": 0.4, "grad_norm": 2.0263552318565505, "learning_rate": 6.849842481524228e-06, "loss": 0.3301, "step": 6329 }, { "epoch": 0.4, "grad_norm": 1.8728865182573091, "learning_rate": 6.8488962333543715e-06, "loss": 0.3122, "step": 6330 }, { "epoch": 0.4, "grad_norm": 3.517592855766456, "learning_rate": 6.847949908470529e-06, "loss": 0.3074, "step": 6331 }, { "epoch": 0.4, "grad_norm": 4.178893106391159, "learning_rate": 6.84700350691196e-06, "loss": 0.307, "step": 6332 }, { "epoch": 0.4, "grad_norm": 3.6021767766807424, "learning_rate": 6.846057028717937e-06, "loss": 0.3197, "step": 6333 }, { "epoch": 0.4, "grad_norm": 4.501002131768941, "learning_rate": 6.845110473927727e-06, "loss": 0.3286, "step": 6334 }, { "epoch": 0.4, "grad_norm": 2.2132623633872157, "learning_rate": 6.844163842580608e-06, "loss": 0.3226, "step": 6335 }, { "epoch": 0.4, "grad_norm": 1.5350315206393872, "learning_rate": 6.8432171347158535e-06, "loss": 0.3115, "step": 6336 }, { "epoch": 0.4, "grad_norm": 1.1798307847448, "learning_rate": 6.842270350372749e-06, "loss": 0.2947, "step": 6337 }, { "epoch": 0.4, "grad_norm": 2.6360090486603727, "learning_rate": 6.8413234895905726e-06, "loss": 0.2967, "step": 6338 }, { "epoch": 0.4, "grad_norm": 2.9848580915620224, "learning_rate": 6.840376552408614e-06, "loss": 0.3115, "step": 6339 }, { "epoch": 0.4, "grad_norm": 1.3405026564912617, "learning_rate": 6.839429538866164e-06, "loss": 0.2822, "step": 6340 }, { "epoch": 0.4, "grad_norm": 1.5362126550497548, "learning_rate": 6.838482449002517e-06, "loss": 0.286, "step": 6341 }, { "epoch": 0.4, "grad_norm": 1.3069866190457384, "learning_rate": 6.837535282856966e-06, "loss": 0.2885, "step": 6342 }, { "epoch": 0.4, "grad_norm": 1.4765326992977221, "learning_rate": 6.836588040468812e-06, "loss": 0.3005, "step": 6343 }, { "epoch": 0.4, "grad_norm": 9.565457286700779, "learning_rate": 6.835640721877359e-06, "loss": 0.2854, "step": 6344 }, { "epoch": 0.4, "grad_norm": 2.559378590319128, "learning_rate": 6.834693327121913e-06, "loss": 0.3041, "step": 6345 }, { "epoch": 0.4, "grad_norm": 2.0677104007652565, "learning_rate": 6.83374585624178e-06, "loss": 0.3031, "step": 6346 }, { "epoch": 0.4, "grad_norm": 2.1149379020446104, "learning_rate": 6.832798309276275e-06, "loss": 0.3142, "step": 6347 }, { "epoch": 0.4, "grad_norm": 3.225979158875827, "learning_rate": 6.831850686264712e-06, "loss": 0.2883, "step": 6348 }, { "epoch": 0.4, "grad_norm": 1.4380784270508573, "learning_rate": 6.830902987246413e-06, "loss": 0.3088, "step": 6349 }, { "epoch": 0.4, "grad_norm": 2.1902049420419636, "learning_rate": 6.8299552122606934e-06, "loss": 0.3209, "step": 6350 }, { "epoch": 0.4, "grad_norm": 5.4983139239455925, "learning_rate": 6.829007361346885e-06, "loss": 0.3236, "step": 6351 }, { "epoch": 0.4, "grad_norm": 4.38038349341543, "learning_rate": 6.828059434544309e-06, "loss": 0.3019, "step": 6352 }, { "epoch": 0.4, "grad_norm": 1.5095839511716451, "learning_rate": 6.827111431892303e-06, "loss": 0.2996, "step": 6353 }, { "epoch": 0.4, "grad_norm": 1.3002850599763964, "learning_rate": 6.826163353430197e-06, "loss": 0.2835, "step": 6354 }, { "epoch": 0.4, "grad_norm": 1.782981876014829, "learning_rate": 6.82521519919733e-06, "loss": 0.3044, "step": 6355 }, { "epoch": 0.4, "grad_norm": 1.4652631665027267, "learning_rate": 6.8242669692330424e-06, "loss": 0.3209, "step": 6356 }, { "epoch": 0.4, "grad_norm": 2.2667979646321514, "learning_rate": 6.823318663576679e-06, "loss": 0.3175, "step": 6357 }, { "epoch": 0.4, "grad_norm": 1.3577067350887904, "learning_rate": 6.822370282267585e-06, "loss": 0.2862, "step": 6358 }, { "epoch": 0.4, "grad_norm": 1.5255784700649988, "learning_rate": 6.82142182534511e-06, "loss": 0.2999, "step": 6359 }, { "epoch": 0.4, "grad_norm": 2.7601001630013533, "learning_rate": 6.8204732928486096e-06, "loss": 0.2916, "step": 6360 }, { "epoch": 0.4, "grad_norm": 1.6091656405623176, "learning_rate": 6.819524684817439e-06, "loss": 0.312, "step": 6361 }, { "epoch": 0.4, "grad_norm": 1.7177599903981735, "learning_rate": 6.8185760012909566e-06, "loss": 0.2972, "step": 6362 }, { "epoch": 0.4, "grad_norm": 1.9267907314481778, "learning_rate": 6.817627242308525e-06, "loss": 0.3185, "step": 6363 }, { "epoch": 0.4, "grad_norm": 1.8570406736156893, "learning_rate": 6.816678407909511e-06, "loss": 0.316, "step": 6364 }, { "epoch": 0.4, "grad_norm": 2.0796908376535903, "learning_rate": 6.815729498133286e-06, "loss": 0.302, "step": 6365 }, { "epoch": 0.4, "grad_norm": 1.4157855196807505, "learning_rate": 6.814780513019214e-06, "loss": 0.3037, "step": 6366 }, { "epoch": 0.4, "grad_norm": 1.6743395572870021, "learning_rate": 6.813831452606678e-06, "loss": 0.2874, "step": 6367 }, { "epoch": 0.4, "grad_norm": 5.692394679105457, "learning_rate": 6.8128823169350535e-06, "loss": 0.3051, "step": 6368 }, { "epoch": 0.4, "grad_norm": 2.763263681427266, "learning_rate": 6.811933106043721e-06, "loss": 0.2882, "step": 6369 }, { "epoch": 0.4, "grad_norm": 2.9985345857072736, "learning_rate": 6.8109838199720655e-06, "loss": 0.3012, "step": 6370 }, { "epoch": 0.4, "grad_norm": 1.2706514808811527, "learning_rate": 6.8100344587594754e-06, "loss": 0.307, "step": 6371 }, { "epoch": 0.4, "grad_norm": 1.3051438527903696, "learning_rate": 6.809085022445341e-06, "loss": 0.3037, "step": 6372 }, { "epoch": 0.4, "grad_norm": 5.297934797149001, "learning_rate": 6.808135511069054e-06, "loss": 0.3042, "step": 6373 }, { "epoch": 0.4, "grad_norm": 5.325539927654011, "learning_rate": 6.807185924670013e-06, "loss": 0.2952, "step": 6374 }, { "epoch": 0.4, "grad_norm": 1.4080357034994866, "learning_rate": 6.80623626328762e-06, "loss": 0.2911, "step": 6375 }, { "epoch": 0.4, "grad_norm": 1.9720138672113132, "learning_rate": 6.805286526961274e-06, "loss": 0.3278, "step": 6376 }, { "epoch": 0.4, "grad_norm": 1.8250325174334556, "learning_rate": 6.804336715730385e-06, "loss": 0.3113, "step": 6377 }, { "epoch": 0.4, "grad_norm": 2.6273334819042335, "learning_rate": 6.803386829634361e-06, "loss": 0.3016, "step": 6378 }, { "epoch": 0.4, "grad_norm": 1.9125793157768267, "learning_rate": 6.8024368687126145e-06, "loss": 0.3044, "step": 6379 }, { "epoch": 0.4, "grad_norm": 2.037940233798932, "learning_rate": 6.801486833004559e-06, "loss": 0.3044, "step": 6380 }, { "epoch": 0.4, "grad_norm": 3.7520585156058943, "learning_rate": 6.8005367225496155e-06, "loss": 0.2997, "step": 6381 }, { "epoch": 0.4, "grad_norm": 2.6297250564109693, "learning_rate": 6.799586537387206e-06, "loss": 0.3251, "step": 6382 }, { "epoch": 0.4, "grad_norm": 1.6972051848994267, "learning_rate": 6.7986362775567545e-06, "loss": 0.3171, "step": 6383 }, { "epoch": 0.4, "grad_norm": 0.5832104903379132, "learning_rate": 6.79768594309769e-06, "loss": 0.4883, "step": 6384 }, { "epoch": 0.4, "grad_norm": 1.5790471983614522, "learning_rate": 6.796735534049441e-06, "loss": 0.3225, "step": 6385 }, { "epoch": 0.4, "grad_norm": 5.330783457874948, "learning_rate": 6.795785050451443e-06, "loss": 0.3118, "step": 6386 }, { "epoch": 0.4, "grad_norm": 5.835327429306243, "learning_rate": 6.7948344923431355e-06, "loss": 0.3374, "step": 6387 }, { "epoch": 0.4, "grad_norm": 1.617774730322946, "learning_rate": 6.793883859763955e-06, "loss": 0.3133, "step": 6388 }, { "epoch": 0.4, "grad_norm": 1.7923144480516422, "learning_rate": 6.792933152753348e-06, "loss": 0.3036, "step": 6389 }, { "epoch": 0.4, "grad_norm": 2.0125838015849853, "learning_rate": 6.791982371350761e-06, "loss": 0.2968, "step": 6390 }, { "epoch": 0.4, "grad_norm": 2.5522094024516724, "learning_rate": 6.791031515595641e-06, "loss": 0.3029, "step": 6391 }, { "epoch": 0.4, "grad_norm": 2.270122226573881, "learning_rate": 6.790080585527442e-06, "loss": 0.3262, "step": 6392 }, { "epoch": 0.4, "grad_norm": 1.4299924927621477, "learning_rate": 6.789129581185621e-06, "loss": 0.2933, "step": 6393 }, { "epoch": 0.4, "grad_norm": 1.428656152912421, "learning_rate": 6.788178502609635e-06, "loss": 0.2936, "step": 6394 }, { "epoch": 0.4, "grad_norm": 1.5712739983277206, "learning_rate": 6.787227349838946e-06, "loss": 0.3108, "step": 6395 }, { "epoch": 0.4, "grad_norm": 1.7508718567894106, "learning_rate": 6.786276122913021e-06, "loss": 0.3109, "step": 6396 }, { "epoch": 0.4, "grad_norm": 1.6948998904039196, "learning_rate": 6.785324821871326e-06, "loss": 0.2911, "step": 6397 }, { "epoch": 0.4, "grad_norm": 1.8042237657566123, "learning_rate": 6.784373446753334e-06, "loss": 0.3153, "step": 6398 }, { "epoch": 0.4, "grad_norm": 2.203912760157126, "learning_rate": 6.783421997598518e-06, "loss": 0.3216, "step": 6399 }, { "epoch": 0.4, "grad_norm": 2.128446226595074, "learning_rate": 6.782470474446357e-06, "loss": 0.2887, "step": 6400 }, { "epoch": 0.4, "grad_norm": 2.2689239331071493, "learning_rate": 6.781518877336328e-06, "loss": 0.3201, "step": 6401 }, { "epoch": 0.4, "grad_norm": 1.9469366874019725, "learning_rate": 6.7805672063079166e-06, "loss": 0.2989, "step": 6402 }, { "epoch": 0.4, "grad_norm": 1.8764163133376295, "learning_rate": 6.77961546140061e-06, "loss": 0.3321, "step": 6403 }, { "epoch": 0.4, "grad_norm": 2.9326197487964842, "learning_rate": 6.778663642653897e-06, "loss": 0.2928, "step": 6404 }, { "epoch": 0.4, "grad_norm": 2.683167891228867, "learning_rate": 6.77771175010727e-06, "loss": 0.2963, "step": 6405 }, { "epoch": 0.4, "grad_norm": 4.184407438462736, "learning_rate": 6.776759783800224e-06, "loss": 0.3145, "step": 6406 }, { "epoch": 0.4, "grad_norm": 2.2446580362794384, "learning_rate": 6.775807743772258e-06, "loss": 0.3081, "step": 6407 }, { "epoch": 0.4, "grad_norm": 1.926659571692815, "learning_rate": 6.7748556300628764e-06, "loss": 0.3009, "step": 6408 }, { "epoch": 0.4, "grad_norm": 1.6655192578806834, "learning_rate": 6.773903442711582e-06, "loss": 0.3182, "step": 6409 }, { "epoch": 0.4, "grad_norm": 1.3488201062460332, "learning_rate": 6.772951181757883e-06, "loss": 0.3062, "step": 6410 }, { "epoch": 0.4, "grad_norm": 3.2345830430784606, "learning_rate": 6.77199884724129e-06, "loss": 0.302, "step": 6411 }, { "epoch": 0.4, "grad_norm": 2.440144329245238, "learning_rate": 6.7710464392013165e-06, "loss": 0.3013, "step": 6412 }, { "epoch": 0.4, "grad_norm": 1.4499405252676987, "learning_rate": 6.770093957677483e-06, "loss": 0.2919, "step": 6413 }, { "epoch": 0.4, "grad_norm": 1.4967558730981563, "learning_rate": 6.769141402709305e-06, "loss": 0.2891, "step": 6414 }, { "epoch": 0.4, "grad_norm": 1.5492769404609563, "learning_rate": 6.7681887743363085e-06, "loss": 0.3092, "step": 6415 }, { "epoch": 0.4, "grad_norm": 2.085038937768041, "learning_rate": 6.767236072598018e-06, "loss": 0.3176, "step": 6416 }, { "epoch": 0.4, "grad_norm": 4.315609889684815, "learning_rate": 6.766283297533965e-06, "loss": 0.2823, "step": 6417 }, { "epoch": 0.4, "grad_norm": 2.0800677507978316, "learning_rate": 6.765330449183682e-06, "loss": 0.3076, "step": 6418 }, { "epoch": 0.4, "grad_norm": 1.8613492189867349, "learning_rate": 6.764377527586701e-06, "loss": 0.3399, "step": 6419 }, { "epoch": 0.4, "grad_norm": 9.716488026652963, "learning_rate": 6.763424532782562e-06, "loss": 0.3122, "step": 6420 }, { "epoch": 0.4, "grad_norm": 1.537163245755776, "learning_rate": 6.762471464810808e-06, "loss": 0.2919, "step": 6421 }, { "epoch": 0.4, "grad_norm": 3.444778544636523, "learning_rate": 6.761518323710983e-06, "loss": 0.289, "step": 6422 }, { "epoch": 0.4, "grad_norm": 1.4808424561732993, "learning_rate": 6.760565109522634e-06, "loss": 0.3142, "step": 6423 }, { "epoch": 0.4, "grad_norm": 1.5107185897637505, "learning_rate": 6.75961182228531e-06, "loss": 0.2986, "step": 6424 }, { "epoch": 0.4, "grad_norm": 1.6168280353607318, "learning_rate": 6.758658462038568e-06, "loss": 0.2971, "step": 6425 }, { "epoch": 0.4, "grad_norm": 2.0215373404663097, "learning_rate": 6.757705028821961e-06, "loss": 0.3061, "step": 6426 }, { "epoch": 0.4, "grad_norm": 2.0190968985247033, "learning_rate": 6.756751522675051e-06, "loss": 0.3095, "step": 6427 }, { "epoch": 0.4, "grad_norm": 1.5502530079717356, "learning_rate": 6.755797943637401e-06, "loss": 0.2924, "step": 6428 }, { "epoch": 0.4, "grad_norm": 1.7631284176320388, "learning_rate": 6.754844291748575e-06, "loss": 0.2864, "step": 6429 }, { "epoch": 0.4, "grad_norm": 3.1932181990125468, "learning_rate": 6.753890567048141e-06, "loss": 0.2986, "step": 6430 }, { "epoch": 0.4, "grad_norm": 3.0542045306519885, "learning_rate": 6.752936769575673e-06, "loss": 0.3014, "step": 6431 }, { "epoch": 0.4, "grad_norm": 1.8273598593855904, "learning_rate": 6.751982899370746e-06, "loss": 0.2978, "step": 6432 }, { "epoch": 0.4, "grad_norm": 6.733642977777779, "learning_rate": 6.751028956472935e-06, "loss": 0.314, "step": 6433 }, { "epoch": 0.4, "grad_norm": 2.397989501728536, "learning_rate": 6.7500749409218235e-06, "loss": 0.3007, "step": 6434 }, { "epoch": 0.4, "grad_norm": 3.7413082411923697, "learning_rate": 6.749120852756994e-06, "loss": 0.2918, "step": 6435 }, { "epoch": 0.4, "grad_norm": 1.7194939410932075, "learning_rate": 6.748166692018033e-06, "loss": 0.2819, "step": 6436 }, { "epoch": 0.4, "grad_norm": 1.772032781328613, "learning_rate": 6.7472124587445306e-06, "loss": 0.3071, "step": 6437 }, { "epoch": 0.4, "grad_norm": 1.731602382722578, "learning_rate": 6.746258152976082e-06, "loss": 0.2945, "step": 6438 }, { "epoch": 0.4, "grad_norm": 2.0333223022746494, "learning_rate": 6.745303774752279e-06, "loss": 0.3064, "step": 6439 }, { "epoch": 0.41, "grad_norm": 1.76952663531311, "learning_rate": 6.744349324112722e-06, "loss": 0.3132, "step": 6440 }, { "epoch": 0.41, "grad_norm": 3.6825494996694594, "learning_rate": 6.743394801097014e-06, "loss": 0.3132, "step": 6441 }, { "epoch": 0.41, "grad_norm": 2.51728955276763, "learning_rate": 6.7424402057447606e-06, "loss": 0.2873, "step": 6442 }, { "epoch": 0.41, "grad_norm": 1.7091262720438556, "learning_rate": 6.741485538095566e-06, "loss": 0.3023, "step": 6443 }, { "epoch": 0.41, "grad_norm": 3.8394371015675834, "learning_rate": 6.7405307981890436e-06, "loss": 0.3047, "step": 6444 }, { "epoch": 0.41, "grad_norm": 2.0800861005997704, "learning_rate": 6.739575986064807e-06, "loss": 0.3005, "step": 6445 }, { "epoch": 0.41, "grad_norm": 2.598482420410477, "learning_rate": 6.738621101762472e-06, "loss": 0.2811, "step": 6446 }, { "epoch": 0.41, "grad_norm": 1.7975966513433286, "learning_rate": 6.737666145321662e-06, "loss": 0.2843, "step": 6447 }, { "epoch": 0.41, "grad_norm": 2.9658994276713817, "learning_rate": 6.7367111167819955e-06, "loss": 0.2969, "step": 6448 }, { "epoch": 0.41, "grad_norm": 1.4202457333872267, "learning_rate": 6.735756016183099e-06, "loss": 0.2972, "step": 6449 }, { "epoch": 0.41, "grad_norm": 1.4356628148175767, "learning_rate": 6.734800843564604e-06, "loss": 0.2804, "step": 6450 }, { "epoch": 0.41, "grad_norm": 4.434828003590157, "learning_rate": 6.73384559896614e-06, "loss": 0.3023, "step": 6451 }, { "epoch": 0.41, "grad_norm": 1.9753955136574015, "learning_rate": 6.732890282427342e-06, "loss": 0.2929, "step": 6452 }, { "epoch": 0.41, "grad_norm": 1.7334199063473066, "learning_rate": 6.731934893987849e-06, "loss": 0.2801, "step": 6453 }, { "epoch": 0.41, "grad_norm": 3.458364112155257, "learning_rate": 6.7309794336873e-06, "loss": 0.2966, "step": 6454 }, { "epoch": 0.41, "grad_norm": 2.3031052151800613, "learning_rate": 6.730023901565341e-06, "loss": 0.304, "step": 6455 }, { "epoch": 0.41, "grad_norm": 1.6402178108076182, "learning_rate": 6.729068297661618e-06, "loss": 0.3019, "step": 6456 }, { "epoch": 0.41, "grad_norm": 1.6320293107294643, "learning_rate": 6.728112622015779e-06, "loss": 0.2945, "step": 6457 }, { "epoch": 0.41, "grad_norm": 2.5309669224890703, "learning_rate": 6.727156874667478e-06, "loss": 0.3075, "step": 6458 }, { "epoch": 0.41, "grad_norm": 3.432981301693717, "learning_rate": 6.726201055656369e-06, "loss": 0.2818, "step": 6459 }, { "epoch": 0.41, "grad_norm": 0.6973018453399739, "learning_rate": 6.725245165022114e-06, "loss": 0.4881, "step": 6460 }, { "epoch": 0.41, "grad_norm": 3.446417171142823, "learning_rate": 6.724289202804373e-06, "loss": 0.3192, "step": 6461 }, { "epoch": 0.41, "grad_norm": 1.3604008127576046, "learning_rate": 6.723333169042808e-06, "loss": 0.3047, "step": 6462 }, { "epoch": 0.41, "grad_norm": 1.807463817597169, "learning_rate": 6.722377063777091e-06, "loss": 0.3137, "step": 6463 }, { "epoch": 0.41, "grad_norm": 1.8294947866474718, "learning_rate": 6.72142088704689e-06, "loss": 0.3068, "step": 6464 }, { "epoch": 0.41, "grad_norm": 1.3060169917182747, "learning_rate": 6.720464638891878e-06, "loss": 0.2916, "step": 6465 }, { "epoch": 0.41, "grad_norm": 2.008175265254663, "learning_rate": 6.719508319351733e-06, "loss": 0.3022, "step": 6466 }, { "epoch": 0.41, "grad_norm": 2.124354686119315, "learning_rate": 6.718551928466133e-06, "loss": 0.3146, "step": 6467 }, { "epoch": 0.41, "grad_norm": 1.6295233948044467, "learning_rate": 6.717595466274762e-06, "loss": 0.2968, "step": 6468 }, { "epoch": 0.41, "grad_norm": 1.4702849046667679, "learning_rate": 6.716638932817303e-06, "loss": 0.2925, "step": 6469 }, { "epoch": 0.41, "grad_norm": 1.6444903445591657, "learning_rate": 6.715682328133447e-06, "loss": 0.2863, "step": 6470 }, { "epoch": 0.41, "grad_norm": 1.8408699849276253, "learning_rate": 6.714725652262882e-06, "loss": 0.2907, "step": 6471 }, { "epoch": 0.41, "grad_norm": 1.6301294973434417, "learning_rate": 6.713768905245306e-06, "loss": 0.2855, "step": 6472 }, { "epoch": 0.41, "grad_norm": 1.939913293202927, "learning_rate": 6.712812087120413e-06, "loss": 0.3038, "step": 6473 }, { "epoch": 0.41, "grad_norm": 2.3121552370294105, "learning_rate": 6.711855197927904e-06, "loss": 0.2905, "step": 6474 }, { "epoch": 0.41, "grad_norm": 1.5014649916385596, "learning_rate": 6.710898237707482e-06, "loss": 0.2995, "step": 6475 }, { "epoch": 0.41, "grad_norm": 1.4460069249305667, "learning_rate": 6.7099412064988555e-06, "loss": 0.3094, "step": 6476 }, { "epoch": 0.41, "grad_norm": 1.9799930823188674, "learning_rate": 6.708984104341728e-06, "loss": 0.3129, "step": 6477 }, { "epoch": 0.41, "grad_norm": 1.6505256145852305, "learning_rate": 6.708026931275817e-06, "loss": 0.3025, "step": 6478 }, { "epoch": 0.41, "grad_norm": 3.0605434419766584, "learning_rate": 6.707069687340834e-06, "loss": 0.3033, "step": 6479 }, { "epoch": 0.41, "grad_norm": 2.0207596390285953, "learning_rate": 6.706112372576499e-06, "loss": 0.2911, "step": 6480 }, { "epoch": 0.41, "grad_norm": 4.144713949978419, "learning_rate": 6.705154987022528e-06, "loss": 0.3052, "step": 6481 }, { "epoch": 0.41, "grad_norm": 1.6216085321664697, "learning_rate": 6.70419753071865e-06, "loss": 0.3001, "step": 6482 }, { "epoch": 0.41, "grad_norm": 1.7893907759835832, "learning_rate": 6.703240003704588e-06, "loss": 0.3005, "step": 6483 }, { "epoch": 0.41, "grad_norm": 2.026551985538254, "learning_rate": 6.702282406020076e-06, "loss": 0.3295, "step": 6484 }, { "epoch": 0.41, "grad_norm": 4.656580054248685, "learning_rate": 6.70132473770484e-06, "loss": 0.3237, "step": 6485 }, { "epoch": 0.41, "grad_norm": 2.3342468805975165, "learning_rate": 6.700366998798621e-06, "loss": 0.2965, "step": 6486 }, { "epoch": 0.41, "grad_norm": 2.3114027899184495, "learning_rate": 6.699409189341153e-06, "loss": 0.2865, "step": 6487 }, { "epoch": 0.41, "grad_norm": 1.660419345588762, "learning_rate": 6.69845130937218e-06, "loss": 0.2929, "step": 6488 }, { "epoch": 0.41, "grad_norm": 1.3563661499613435, "learning_rate": 6.697493358931446e-06, "loss": 0.3178, "step": 6489 }, { "epoch": 0.41, "grad_norm": 1.5526025441779743, "learning_rate": 6.696535338058699e-06, "loss": 0.2829, "step": 6490 }, { "epoch": 0.41, "grad_norm": 1.6259505361326623, "learning_rate": 6.695577246793684e-06, "loss": 0.2969, "step": 6491 }, { "epoch": 0.41, "grad_norm": 0.6633457265011925, "learning_rate": 6.694619085176159e-06, "loss": 0.5003, "step": 6492 }, { "epoch": 0.41, "grad_norm": 2.104362074544626, "learning_rate": 6.693660853245878e-06, "loss": 0.2913, "step": 6493 }, { "epoch": 0.41, "grad_norm": 3.0840101305702516, "learning_rate": 6.6927025510426015e-06, "loss": 0.2877, "step": 6494 }, { "epoch": 0.41, "grad_norm": 2.5817337343184006, "learning_rate": 6.691744178606087e-06, "loss": 0.3022, "step": 6495 }, { "epoch": 0.41, "grad_norm": 1.5247565946653607, "learning_rate": 6.690785735976103e-06, "loss": 0.2988, "step": 6496 }, { "epoch": 0.41, "grad_norm": 1.5498495605021445, "learning_rate": 6.6898272231924155e-06, "loss": 0.3035, "step": 6497 }, { "epoch": 0.41, "grad_norm": 2.986600088902626, "learning_rate": 6.688868640294796e-06, "loss": 0.2984, "step": 6498 }, { "epoch": 0.41, "grad_norm": 3.0870828973579, "learning_rate": 6.687909987323016e-06, "loss": 0.3022, "step": 6499 }, { "epoch": 0.41, "grad_norm": 1.8103354335933153, "learning_rate": 6.686951264316852e-06, "loss": 0.3161, "step": 6500 }, { "epoch": 0.41, "grad_norm": 1.5621955735177566, "learning_rate": 6.6859924713160825e-06, "loss": 0.2957, "step": 6501 }, { "epoch": 0.41, "grad_norm": 2.0167450198214447, "learning_rate": 6.685033608360494e-06, "loss": 0.306, "step": 6502 }, { "epoch": 0.41, "grad_norm": 2.373936432373496, "learning_rate": 6.684074675489864e-06, "loss": 0.3407, "step": 6503 }, { "epoch": 0.41, "grad_norm": 1.8784137649234551, "learning_rate": 6.683115672743989e-06, "loss": 0.2972, "step": 6504 }, { "epoch": 0.41, "grad_norm": 2.643761595008346, "learning_rate": 6.682156600162653e-06, "loss": 0.3029, "step": 6505 }, { "epoch": 0.41, "grad_norm": 1.7342194894575267, "learning_rate": 6.681197457785652e-06, "loss": 0.2768, "step": 6506 }, { "epoch": 0.41, "grad_norm": 2.846046235908457, "learning_rate": 6.680238245652782e-06, "loss": 0.2983, "step": 6507 }, { "epoch": 0.41, "grad_norm": 2.248225598354093, "learning_rate": 6.679278963803843e-06, "loss": 0.2868, "step": 6508 }, { "epoch": 0.41, "grad_norm": 0.6656411086377948, "learning_rate": 6.678319612278636e-06, "loss": 0.4823, "step": 6509 }, { "epoch": 0.41, "grad_norm": 2.085584357878793, "learning_rate": 6.677360191116971e-06, "loss": 0.2954, "step": 6510 }, { "epoch": 0.41, "grad_norm": 2.875879156190651, "learning_rate": 6.676400700358647e-06, "loss": 0.3085, "step": 6511 }, { "epoch": 0.41, "grad_norm": 3.1069616976492127, "learning_rate": 6.6754411400434835e-06, "loss": 0.2905, "step": 6512 }, { "epoch": 0.41, "grad_norm": 1.6032407445338612, "learning_rate": 6.674481510211292e-06, "loss": 0.2784, "step": 6513 }, { "epoch": 0.41, "grad_norm": 3.0661652484314277, "learning_rate": 6.673521810901886e-06, "loss": 0.3457, "step": 6514 }, { "epoch": 0.41, "grad_norm": 3.013500104714371, "learning_rate": 6.672562042155089e-06, "loss": 0.2974, "step": 6515 }, { "epoch": 0.41, "grad_norm": 4.027355388246828, "learning_rate": 6.671602204010722e-06, "loss": 0.28, "step": 6516 }, { "epoch": 0.41, "grad_norm": 2.221326735148984, "learning_rate": 6.67064229650861e-06, "loss": 0.3294, "step": 6517 }, { "epoch": 0.41, "grad_norm": 2.5814221531969004, "learning_rate": 6.669682319688582e-06, "loss": 0.2906, "step": 6518 }, { "epoch": 0.41, "grad_norm": 26.916348187608442, "learning_rate": 6.6687222735904675e-06, "loss": 0.3017, "step": 6519 }, { "epoch": 0.41, "grad_norm": 2.0790376071172463, "learning_rate": 6.667762158254104e-06, "loss": 0.2981, "step": 6520 }, { "epoch": 0.41, "grad_norm": 1.9109341389632886, "learning_rate": 6.6668019737193255e-06, "loss": 0.2845, "step": 6521 }, { "epoch": 0.41, "grad_norm": 6.032732941934832, "learning_rate": 6.665841720025972e-06, "loss": 0.2976, "step": 6522 }, { "epoch": 0.41, "grad_norm": 1.5256747676019773, "learning_rate": 6.664881397213887e-06, "loss": 0.3113, "step": 6523 }, { "epoch": 0.41, "grad_norm": 2.0470466405784453, "learning_rate": 6.663921005322917e-06, "loss": 0.3078, "step": 6524 }, { "epoch": 0.41, "grad_norm": 1.4551442549662454, "learning_rate": 6.662960544392907e-06, "loss": 0.3108, "step": 6525 }, { "epoch": 0.41, "grad_norm": 2.492917944019753, "learning_rate": 6.662000014463711e-06, "loss": 0.3122, "step": 6526 }, { "epoch": 0.41, "grad_norm": 1.261631175411658, "learning_rate": 6.661039415575183e-06, "loss": 0.2973, "step": 6527 }, { "epoch": 0.41, "grad_norm": 1.866713432003546, "learning_rate": 6.660078747767178e-06, "loss": 0.309, "step": 6528 }, { "epoch": 0.41, "grad_norm": 1.8077549956648529, "learning_rate": 6.659118011079558e-06, "loss": 0.3034, "step": 6529 }, { "epoch": 0.41, "grad_norm": 2.8211784951489682, "learning_rate": 6.658157205552185e-06, "loss": 0.2911, "step": 6530 }, { "epoch": 0.41, "grad_norm": 3.25057674642045, "learning_rate": 6.6571963312249236e-06, "loss": 0.3295, "step": 6531 }, { "epoch": 0.41, "grad_norm": 4.04367286593556, "learning_rate": 6.656235388137644e-06, "loss": 0.322, "step": 6532 }, { "epoch": 0.41, "grad_norm": 1.2583043092157906, "learning_rate": 6.655274376330214e-06, "loss": 0.2728, "step": 6533 }, { "epoch": 0.41, "grad_norm": 2.162039432449415, "learning_rate": 6.654313295842513e-06, "loss": 0.3119, "step": 6534 }, { "epoch": 0.41, "grad_norm": 2.1090665833326745, "learning_rate": 6.653352146714413e-06, "loss": 0.3204, "step": 6535 }, { "epoch": 0.41, "grad_norm": 1.2228168014288647, "learning_rate": 6.652390928985797e-06, "loss": 0.2973, "step": 6536 }, { "epoch": 0.41, "grad_norm": 2.771665543505001, "learning_rate": 6.651429642696545e-06, "loss": 0.2982, "step": 6537 }, { "epoch": 0.41, "grad_norm": 2.1660760953101317, "learning_rate": 6.6504682878865444e-06, "loss": 0.2934, "step": 6538 }, { "epoch": 0.41, "grad_norm": 7.0469822850552575, "learning_rate": 6.649506864595683e-06, "loss": 0.2928, "step": 6539 }, { "epoch": 0.41, "grad_norm": 2.6919902299540963, "learning_rate": 6.648545372863853e-06, "loss": 0.3067, "step": 6540 }, { "epoch": 0.41, "grad_norm": 2.131151185345146, "learning_rate": 6.647583812730945e-06, "loss": 0.3182, "step": 6541 }, { "epoch": 0.41, "grad_norm": 3.5973916018871677, "learning_rate": 6.646622184236861e-06, "loss": 0.3322, "step": 6542 }, { "epoch": 0.41, "grad_norm": 2.8035752923220416, "learning_rate": 6.6456604874214955e-06, "loss": 0.3168, "step": 6543 }, { "epoch": 0.41, "grad_norm": 2.097977463715777, "learning_rate": 6.644698722324755e-06, "loss": 0.2925, "step": 6544 }, { "epoch": 0.41, "grad_norm": 5.890742047703468, "learning_rate": 6.643736888986541e-06, "loss": 0.2945, "step": 6545 }, { "epoch": 0.41, "grad_norm": 2.8278897102287393, "learning_rate": 6.642774987446768e-06, "loss": 0.3043, "step": 6546 }, { "epoch": 0.41, "grad_norm": 1.8485565712722765, "learning_rate": 6.641813017745339e-06, "loss": 0.293, "step": 6547 }, { "epoch": 0.41, "grad_norm": 2.766376519928722, "learning_rate": 6.640850979922173e-06, "loss": 0.2878, "step": 6548 }, { "epoch": 0.41, "grad_norm": 2.8180337634210066, "learning_rate": 6.639888874017185e-06, "loss": 0.3108, "step": 6549 }, { "epoch": 0.41, "grad_norm": 1.8359271171744111, "learning_rate": 6.638926700070296e-06, "loss": 0.3123, "step": 6550 }, { "epoch": 0.41, "grad_norm": 3.1252229166077155, "learning_rate": 6.637964458121427e-06, "loss": 0.2957, "step": 6551 }, { "epoch": 0.41, "grad_norm": 2.7650068696064065, "learning_rate": 6.637002148210502e-06, "loss": 0.2968, "step": 6552 }, { "epoch": 0.41, "grad_norm": 1.545614146749676, "learning_rate": 6.63603977037745e-06, "loss": 0.2915, "step": 6553 }, { "epoch": 0.41, "grad_norm": 2.0902969731352163, "learning_rate": 6.635077324662203e-06, "loss": 0.281, "step": 6554 }, { "epoch": 0.41, "grad_norm": 1.898235766333002, "learning_rate": 6.6341148111046935e-06, "loss": 0.2912, "step": 6555 }, { "epoch": 0.41, "grad_norm": 4.0526073815626145, "learning_rate": 6.6331522297448584e-06, "loss": 0.3282, "step": 6556 }, { "epoch": 0.41, "grad_norm": 2.580336371799474, "learning_rate": 6.632189580622636e-06, "loss": 0.32, "step": 6557 }, { "epoch": 0.41, "grad_norm": 2.3217855246364456, "learning_rate": 6.631226863777968e-06, "loss": 0.3033, "step": 6558 }, { "epoch": 0.41, "grad_norm": 3.3520595471580505, "learning_rate": 6.6302640792508e-06, "loss": 0.3188, "step": 6559 }, { "epoch": 0.41, "grad_norm": 2.4813151010945194, "learning_rate": 6.629301227081082e-06, "loss": 0.2948, "step": 6560 }, { "epoch": 0.41, "grad_norm": 2.940935134432427, "learning_rate": 6.6283383073087595e-06, "loss": 0.2935, "step": 6561 }, { "epoch": 0.41, "grad_norm": 1.5880193534118219, "learning_rate": 6.62737531997379e-06, "loss": 0.2863, "step": 6562 }, { "epoch": 0.41, "grad_norm": 1.841185018704397, "learning_rate": 6.626412265116127e-06, "loss": 0.2871, "step": 6563 }, { "epoch": 0.41, "grad_norm": 4.447388579476549, "learning_rate": 6.625449142775731e-06, "loss": 0.2895, "step": 6564 }, { "epoch": 0.41, "grad_norm": 3.6735093007773516, "learning_rate": 6.624485952992563e-06, "loss": 0.3117, "step": 6565 }, { "epoch": 0.41, "grad_norm": 4.537175043517064, "learning_rate": 6.623522695806588e-06, "loss": 0.3177, "step": 6566 }, { "epoch": 0.41, "grad_norm": 1.6200215922676457, "learning_rate": 6.62255937125777e-06, "loss": 0.3127, "step": 6567 }, { "epoch": 0.41, "grad_norm": 10.564596987242483, "learning_rate": 6.621595979386084e-06, "loss": 0.295, "step": 6568 }, { "epoch": 0.41, "grad_norm": 8.986314193108306, "learning_rate": 6.6206325202315e-06, "loss": 0.3048, "step": 6569 }, { "epoch": 0.41, "grad_norm": 0.622307023271134, "learning_rate": 6.6196689938339946e-06, "loss": 0.5279, "step": 6570 }, { "epoch": 0.41, "grad_norm": 1.342715204296712, "learning_rate": 6.618705400233544e-06, "loss": 0.2875, "step": 6571 }, { "epoch": 0.41, "grad_norm": 2.057952809297888, "learning_rate": 6.617741739470134e-06, "loss": 0.3058, "step": 6572 }, { "epoch": 0.41, "grad_norm": 5.340990691708877, "learning_rate": 6.616778011583744e-06, "loss": 0.315, "step": 6573 }, { "epoch": 0.41, "grad_norm": 2.1280708954523515, "learning_rate": 6.6158142166143625e-06, "loss": 0.3019, "step": 6574 }, { "epoch": 0.41, "grad_norm": 2.9229894990239464, "learning_rate": 6.614850354601978e-06, "loss": 0.2781, "step": 6575 }, { "epoch": 0.41, "grad_norm": 1.5369966196890847, "learning_rate": 6.613886425586586e-06, "loss": 0.288, "step": 6576 }, { "epoch": 0.41, "grad_norm": 1.641657513587774, "learning_rate": 6.61292242960818e-06, "loss": 0.2982, "step": 6577 }, { "epoch": 0.41, "grad_norm": 1.7241730992220534, "learning_rate": 6.611958366706757e-06, "loss": 0.3175, "step": 6578 }, { "epoch": 0.41, "grad_norm": 1.8285734215480198, "learning_rate": 6.610994236922317e-06, "loss": 0.296, "step": 6579 }, { "epoch": 0.41, "grad_norm": 3.3209263340897497, "learning_rate": 6.610030040294866e-06, "loss": 0.3147, "step": 6580 }, { "epoch": 0.41, "grad_norm": 2.068601570984096, "learning_rate": 6.6090657768644085e-06, "loss": 0.2886, "step": 6581 }, { "epoch": 0.41, "grad_norm": 2.3594994164369227, "learning_rate": 6.608101446670953e-06, "loss": 0.312, "step": 6582 }, { "epoch": 0.41, "grad_norm": 1.6020994895289615, "learning_rate": 6.607137049754513e-06, "loss": 0.293, "step": 6583 }, { "epoch": 0.41, "grad_norm": 1.850261649027184, "learning_rate": 6.6061725861551026e-06, "loss": 0.2782, "step": 6584 }, { "epoch": 0.41, "grad_norm": 1.5573929837511624, "learning_rate": 6.605208055912737e-06, "loss": 0.3199, "step": 6585 }, { "epoch": 0.41, "grad_norm": 1.7566236394019696, "learning_rate": 6.604243459067441e-06, "loss": 0.2998, "step": 6586 }, { "epoch": 0.41, "grad_norm": 1.8706880225366673, "learning_rate": 6.6032787956592316e-06, "loss": 0.2885, "step": 6587 }, { "epoch": 0.41, "grad_norm": 1.8610202506770097, "learning_rate": 6.602314065728139e-06, "loss": 0.3132, "step": 6588 }, { "epoch": 0.41, "grad_norm": 1.7072450253287421, "learning_rate": 6.601349269314188e-06, "loss": 0.2845, "step": 6589 }, { "epoch": 0.41, "grad_norm": 1.6233152007016336, "learning_rate": 6.600384406457414e-06, "loss": 0.2859, "step": 6590 }, { "epoch": 0.41, "grad_norm": 1.702646182713347, "learning_rate": 6.599419477197846e-06, "loss": 0.3026, "step": 6591 }, { "epoch": 0.41, "grad_norm": 1.8484181919293035, "learning_rate": 6.598454481575525e-06, "loss": 0.3033, "step": 6592 }, { "epoch": 0.41, "grad_norm": 1.4374414106581042, "learning_rate": 6.597489419630486e-06, "loss": 0.3008, "step": 6593 }, { "epoch": 0.41, "grad_norm": 2.7344994247032326, "learning_rate": 6.5965242914027764e-06, "loss": 0.3069, "step": 6594 }, { "epoch": 0.41, "grad_norm": 2.349672451682584, "learning_rate": 6.595559096932436e-06, "loss": 0.3086, "step": 6595 }, { "epoch": 0.41, "grad_norm": 2.0954466858856042, "learning_rate": 6.594593836259516e-06, "loss": 0.2854, "step": 6596 }, { "epoch": 0.41, "grad_norm": 2.4831756558014324, "learning_rate": 6.5936285094240635e-06, "loss": 0.3074, "step": 6597 }, { "epoch": 0.41, "grad_norm": 2.5382543642269466, "learning_rate": 6.592663116466136e-06, "loss": 0.2879, "step": 6598 }, { "epoch": 0.42, "grad_norm": 2.0291490477998524, "learning_rate": 6.591697657425785e-06, "loss": 0.3152, "step": 6599 }, { "epoch": 0.42, "grad_norm": 6.254477060601201, "learning_rate": 6.590732132343072e-06, "loss": 0.2717, "step": 6600 }, { "epoch": 0.42, "grad_norm": 1.8654577208286884, "learning_rate": 6.589766541258056e-06, "loss": 0.3088, "step": 6601 }, { "epoch": 0.42, "grad_norm": 1.7547277008358781, "learning_rate": 6.588800884210804e-06, "loss": 0.2805, "step": 6602 }, { "epoch": 0.42, "grad_norm": 2.2004128043591042, "learning_rate": 6.587835161241381e-06, "loss": 0.3029, "step": 6603 }, { "epoch": 0.42, "grad_norm": 16.76608811992652, "learning_rate": 6.586869372389857e-06, "loss": 0.2944, "step": 6604 }, { "epoch": 0.42, "grad_norm": 1.8336780119686658, "learning_rate": 6.585903517696304e-06, "loss": 0.3146, "step": 6605 }, { "epoch": 0.42, "grad_norm": 1.3608484381333867, "learning_rate": 6.584937597200797e-06, "loss": 0.2952, "step": 6606 }, { "epoch": 0.42, "grad_norm": 1.7509240104914126, "learning_rate": 6.5839716109434136e-06, "loss": 0.3201, "step": 6607 }, { "epoch": 0.42, "grad_norm": 1.9604685934523443, "learning_rate": 6.583005558964235e-06, "loss": 0.2982, "step": 6608 }, { "epoch": 0.42, "grad_norm": 1.3077049783515002, "learning_rate": 6.582039441303344e-06, "loss": 0.2804, "step": 6609 }, { "epoch": 0.42, "grad_norm": 1.6072040591235521, "learning_rate": 6.581073258000827e-06, "loss": 0.3106, "step": 6610 }, { "epoch": 0.42, "grad_norm": 2.1569202949236703, "learning_rate": 6.580107009096771e-06, "loss": 0.2857, "step": 6611 }, { "epoch": 0.42, "grad_norm": 1.7634977147567996, "learning_rate": 6.57914069463127e-06, "loss": 0.3002, "step": 6612 }, { "epoch": 0.42, "grad_norm": 1.9597734943808822, "learning_rate": 6.578174314644416e-06, "loss": 0.3044, "step": 6613 }, { "epoch": 0.42, "grad_norm": 2.544044361929901, "learning_rate": 6.5772078691763065e-06, "loss": 0.2972, "step": 6614 }, { "epoch": 0.42, "grad_norm": 2.3856537063237093, "learning_rate": 6.57624135826704e-06, "loss": 0.29, "step": 6615 }, { "epoch": 0.42, "grad_norm": 1.7474925133654111, "learning_rate": 6.575274781956722e-06, "loss": 0.2909, "step": 6616 }, { "epoch": 0.42, "grad_norm": 1.7235652080990869, "learning_rate": 6.574308140285454e-06, "loss": 0.2926, "step": 6617 }, { "epoch": 0.42, "grad_norm": 2.6267696018741113, "learning_rate": 6.573341433293345e-06, "loss": 0.3091, "step": 6618 }, { "epoch": 0.42, "grad_norm": 2.279834635014297, "learning_rate": 6.572374661020505e-06, "loss": 0.3002, "step": 6619 }, { "epoch": 0.42, "grad_norm": 1.7558010712909244, "learning_rate": 6.571407823507049e-06, "loss": 0.2795, "step": 6620 }, { "epoch": 0.42, "grad_norm": 1.6294631332275649, "learning_rate": 6.5704409207930905e-06, "loss": 0.2777, "step": 6621 }, { "epoch": 0.42, "grad_norm": 2.4934477254417544, "learning_rate": 6.569473952918749e-06, "loss": 0.3045, "step": 6622 }, { "epoch": 0.42, "grad_norm": 2.202872314621412, "learning_rate": 6.5685069199241435e-06, "loss": 0.3073, "step": 6623 }, { "epoch": 0.42, "grad_norm": 2.6364301520786926, "learning_rate": 6.567539821849403e-06, "loss": 0.302, "step": 6624 }, { "epoch": 0.42, "grad_norm": 1.4578145608951738, "learning_rate": 6.566572658734649e-06, "loss": 0.2833, "step": 6625 }, { "epoch": 0.42, "grad_norm": 2.5653620306932607, "learning_rate": 6.565605430620014e-06, "loss": 0.3066, "step": 6626 }, { "epoch": 0.42, "grad_norm": 1.7116355398366587, "learning_rate": 6.564638137545627e-06, "loss": 0.3007, "step": 6627 }, { "epoch": 0.42, "grad_norm": 1.3005990082073562, "learning_rate": 6.563670779551627e-06, "loss": 0.2968, "step": 6628 }, { "epoch": 0.42, "grad_norm": 2.512059306827473, "learning_rate": 6.562703356678147e-06, "loss": 0.2852, "step": 6629 }, { "epoch": 0.42, "grad_norm": 0.8029879121206913, "learning_rate": 6.56173586896533e-06, "loss": 0.5057, "step": 6630 }, { "epoch": 0.42, "grad_norm": 2.51892830915327, "learning_rate": 6.560768316453317e-06, "loss": 0.3069, "step": 6631 }, { "epoch": 0.42, "grad_norm": 3.4714159838244183, "learning_rate": 6.5598006991822565e-06, "loss": 0.3107, "step": 6632 }, { "epoch": 0.42, "grad_norm": 3.17130969302371, "learning_rate": 6.558833017192292e-06, "loss": 0.3247, "step": 6633 }, { "epoch": 0.42, "grad_norm": 2.0429954347034123, "learning_rate": 6.557865270523578e-06, "loss": 0.2966, "step": 6634 }, { "epoch": 0.42, "grad_norm": 2.9615465707278945, "learning_rate": 6.556897459216266e-06, "loss": 0.2923, "step": 6635 }, { "epoch": 0.42, "grad_norm": 2.0565315431508737, "learning_rate": 6.555929583310515e-06, "loss": 0.2982, "step": 6636 }, { "epoch": 0.42, "grad_norm": 4.637913546854928, "learning_rate": 6.55496164284648e-06, "loss": 0.3019, "step": 6637 }, { "epoch": 0.42, "grad_norm": 3.1177319613133587, "learning_rate": 6.553993637864325e-06, "loss": 0.2935, "step": 6638 }, { "epoch": 0.42, "grad_norm": 1.5943474259786545, "learning_rate": 6.5530255684042145e-06, "loss": 0.2905, "step": 6639 }, { "epoch": 0.42, "grad_norm": 1.5647836227289802, "learning_rate": 6.552057434506314e-06, "loss": 0.2964, "step": 6640 }, { "epoch": 0.42, "grad_norm": 3.300130802169015, "learning_rate": 6.551089236210793e-06, "loss": 0.305, "step": 6641 }, { "epoch": 0.42, "grad_norm": 3.525592303022024, "learning_rate": 6.550120973557825e-06, "loss": 0.3034, "step": 6642 }, { "epoch": 0.42, "grad_norm": 1.5047122486921707, "learning_rate": 6.549152646587585e-06, "loss": 0.2963, "step": 6643 }, { "epoch": 0.42, "grad_norm": 2.5618472647096326, "learning_rate": 6.548184255340251e-06, "loss": 0.3237, "step": 6644 }, { "epoch": 0.42, "grad_norm": 1.5165527372393564, "learning_rate": 6.5472157998560005e-06, "loss": 0.3144, "step": 6645 }, { "epoch": 0.42, "grad_norm": 1.8126356620836233, "learning_rate": 6.5462472801750195e-06, "loss": 0.2912, "step": 6646 }, { "epoch": 0.42, "grad_norm": 2.288393285526579, "learning_rate": 6.545278696337492e-06, "loss": 0.2845, "step": 6647 }, { "epoch": 0.42, "grad_norm": 4.232613800542668, "learning_rate": 6.544310048383608e-06, "loss": 0.3276, "step": 6648 }, { "epoch": 0.42, "grad_norm": 1.9751667329786748, "learning_rate": 6.543341336353555e-06, "loss": 0.31, "step": 6649 }, { "epoch": 0.42, "grad_norm": 1.6580555248410977, "learning_rate": 6.54237256028753e-06, "loss": 0.2896, "step": 6650 }, { "epoch": 0.42, "grad_norm": 2.140158115245598, "learning_rate": 6.541403720225726e-06, "loss": 0.2798, "step": 6651 }, { "epoch": 0.42, "grad_norm": 3.087865297320279, "learning_rate": 6.5404348162083466e-06, "loss": 0.3037, "step": 6652 }, { "epoch": 0.42, "grad_norm": 2.5568908169910936, "learning_rate": 6.539465848275588e-06, "loss": 0.2964, "step": 6653 }, { "epoch": 0.42, "grad_norm": 0.7141778926779454, "learning_rate": 6.538496816467658e-06, "loss": 0.501, "step": 6654 }, { "epoch": 0.42, "grad_norm": 3.0369483023198427, "learning_rate": 6.537527720824763e-06, "loss": 0.3049, "step": 6655 }, { "epoch": 0.42, "grad_norm": 1.7767126454898903, "learning_rate": 6.536558561387112e-06, "loss": 0.3242, "step": 6656 }, { "epoch": 0.42, "grad_norm": 2.387913680174168, "learning_rate": 6.535589338194916e-06, "loss": 0.2958, "step": 6657 }, { "epoch": 0.42, "grad_norm": 1.7439653011474678, "learning_rate": 6.534620051288392e-06, "loss": 0.2962, "step": 6658 }, { "epoch": 0.42, "grad_norm": 4.1073700321500155, "learning_rate": 6.533650700707755e-06, "loss": 0.2867, "step": 6659 }, { "epoch": 0.42, "grad_norm": 4.72888325269297, "learning_rate": 6.532681286493227e-06, "loss": 0.3015, "step": 6660 }, { "epoch": 0.42, "grad_norm": 2.754324138536733, "learning_rate": 6.5317118086850296e-06, "loss": 0.3389, "step": 6661 }, { "epoch": 0.42, "grad_norm": 1.5798089948703729, "learning_rate": 6.53074226732339e-06, "loss": 0.2954, "step": 6662 }, { "epoch": 0.42, "grad_norm": 7.357269872813877, "learning_rate": 6.5297726624485335e-06, "loss": 0.2935, "step": 6663 }, { "epoch": 0.42, "grad_norm": 2.4157827770860054, "learning_rate": 6.5288029941006924e-06, "loss": 0.2849, "step": 6664 }, { "epoch": 0.42, "grad_norm": 2.483880541105889, "learning_rate": 6.527833262320099e-06, "loss": 0.3013, "step": 6665 }, { "epoch": 0.42, "grad_norm": 1.8280625928331715, "learning_rate": 6.526863467146991e-06, "loss": 0.311, "step": 6666 }, { "epoch": 0.42, "grad_norm": 16.137689554284446, "learning_rate": 6.525893608621604e-06, "loss": 0.3009, "step": 6667 }, { "epoch": 0.42, "grad_norm": 1.4052662791986477, "learning_rate": 6.524923686784184e-06, "loss": 0.3071, "step": 6668 }, { "epoch": 0.42, "grad_norm": 1.9190988175049173, "learning_rate": 6.523953701674969e-06, "loss": 0.3225, "step": 6669 }, { "epoch": 0.42, "grad_norm": 3.4271075268165996, "learning_rate": 6.5229836533342095e-06, "loss": 0.3028, "step": 6670 }, { "epoch": 0.42, "grad_norm": 1.9156249575803732, "learning_rate": 6.5220135418021515e-06, "loss": 0.3241, "step": 6671 }, { "epoch": 0.42, "grad_norm": 3.1031247527236507, "learning_rate": 6.521043367119049e-06, "loss": 0.3017, "step": 6672 }, { "epoch": 0.42, "grad_norm": 1.4463256394660986, "learning_rate": 6.520073129325156e-06, "loss": 0.2923, "step": 6673 }, { "epoch": 0.42, "grad_norm": 2.0932528750837225, "learning_rate": 6.51910282846073e-06, "loss": 0.2976, "step": 6674 }, { "epoch": 0.42, "grad_norm": 3.325389754089071, "learning_rate": 6.518132464566026e-06, "loss": 0.2991, "step": 6675 }, { "epoch": 0.42, "grad_norm": 1.4153017818677673, "learning_rate": 6.517162037681313e-06, "loss": 0.2982, "step": 6676 }, { "epoch": 0.42, "grad_norm": 1.4919394863188316, "learning_rate": 6.5161915478468495e-06, "loss": 0.2834, "step": 6677 }, { "epoch": 0.42, "grad_norm": 1.7836137050152663, "learning_rate": 6.5152209951029085e-06, "loss": 0.2939, "step": 6678 }, { "epoch": 0.42, "grad_norm": 3.0392726353543766, "learning_rate": 6.514250379489754e-06, "loss": 0.317, "step": 6679 }, { "epoch": 0.42, "grad_norm": 8.336572065414183, "learning_rate": 6.513279701047663e-06, "loss": 0.2825, "step": 6680 }, { "epoch": 0.42, "grad_norm": 1.5357636682880287, "learning_rate": 6.51230895981691e-06, "loss": 0.3017, "step": 6681 }, { "epoch": 0.42, "grad_norm": 3.056914598784362, "learning_rate": 6.511338155837772e-06, "loss": 0.3285, "step": 6682 }, { "epoch": 0.42, "grad_norm": 1.9321872914816856, "learning_rate": 6.510367289150528e-06, "loss": 0.2908, "step": 6683 }, { "epoch": 0.42, "grad_norm": 1.9659965305423102, "learning_rate": 6.509396359795465e-06, "loss": 0.3044, "step": 6684 }, { "epoch": 0.42, "grad_norm": 1.701003316316974, "learning_rate": 6.508425367812864e-06, "loss": 0.3084, "step": 6685 }, { "epoch": 0.42, "grad_norm": 1.3292562347262862, "learning_rate": 6.507454313243016e-06, "loss": 0.2932, "step": 6686 }, { "epoch": 0.42, "grad_norm": 1.8085632941667336, "learning_rate": 6.5064831961262095e-06, "loss": 0.2924, "step": 6687 }, { "epoch": 0.42, "grad_norm": 5.099391928724341, "learning_rate": 6.505512016502742e-06, "loss": 0.293, "step": 6688 }, { "epoch": 0.42, "grad_norm": 2.8982831618294798, "learning_rate": 6.504540774412905e-06, "loss": 0.3195, "step": 6689 }, { "epoch": 0.42, "grad_norm": 1.8581115161281003, "learning_rate": 6.503569469896999e-06, "loss": 0.2985, "step": 6690 }, { "epoch": 0.42, "grad_norm": 2.0070324488975997, "learning_rate": 6.502598102995326e-06, "loss": 0.3109, "step": 6691 }, { "epoch": 0.42, "grad_norm": 2.5611289647158277, "learning_rate": 6.5016266737481895e-06, "loss": 0.3013, "step": 6692 }, { "epoch": 0.42, "grad_norm": 2.1415445046374235, "learning_rate": 6.500655182195893e-06, "loss": 0.2832, "step": 6693 }, { "epoch": 0.42, "grad_norm": 1.6257866022093999, "learning_rate": 6.4996836283787475e-06, "loss": 0.2883, "step": 6694 }, { "epoch": 0.42, "grad_norm": 2.4598524460289037, "learning_rate": 6.498712012337065e-06, "loss": 0.2936, "step": 6695 }, { "epoch": 0.42, "grad_norm": 1.7563371977421394, "learning_rate": 6.497740334111161e-06, "loss": 0.3083, "step": 6696 }, { "epoch": 0.42, "grad_norm": 3.528684558497098, "learning_rate": 6.496768593741347e-06, "loss": 0.3136, "step": 6697 }, { "epoch": 0.42, "grad_norm": 2.0870146974393315, "learning_rate": 6.495796791267948e-06, "loss": 0.3032, "step": 6698 }, { "epoch": 0.42, "grad_norm": 2.1498320571843594, "learning_rate": 6.4948249267312826e-06, "loss": 0.3065, "step": 6699 }, { "epoch": 0.42, "grad_norm": 3.7312396090469386, "learning_rate": 6.493853000171677e-06, "loss": 0.3167, "step": 6700 }, { "epoch": 0.42, "grad_norm": 10.953251234131047, "learning_rate": 6.492881011629455e-06, "loss": 0.2992, "step": 6701 }, { "epoch": 0.42, "grad_norm": 2.4939549599591375, "learning_rate": 6.49190896114495e-06, "loss": 0.31, "step": 6702 }, { "epoch": 0.42, "grad_norm": 2.727483209643662, "learning_rate": 6.4909368487584916e-06, "loss": 0.3, "step": 6703 }, { "epoch": 0.42, "grad_norm": 4.580324751770327, "learning_rate": 6.489964674510415e-06, "loss": 0.3069, "step": 6704 }, { "epoch": 0.42, "grad_norm": 1.682575206260421, "learning_rate": 6.488992438441055e-06, "loss": 0.2941, "step": 6705 }, { "epoch": 0.42, "grad_norm": 1.6084755212680026, "learning_rate": 6.488020140590758e-06, "loss": 0.333, "step": 6706 }, { "epoch": 0.42, "grad_norm": 1.414128282407443, "learning_rate": 6.48704778099986e-06, "loss": 0.3224, "step": 6707 }, { "epoch": 0.42, "grad_norm": 2.3166081613666436, "learning_rate": 6.486075359708709e-06, "loss": 0.319, "step": 6708 }, { "epoch": 0.42, "grad_norm": 1.6840612137367519, "learning_rate": 6.485102876757652e-06, "loss": 0.3168, "step": 6709 }, { "epoch": 0.42, "grad_norm": 1.9088392499215119, "learning_rate": 6.48413033218704e-06, "loss": 0.2967, "step": 6710 }, { "epoch": 0.42, "grad_norm": 1.4688670212016326, "learning_rate": 6.483157726037222e-06, "loss": 0.2856, "step": 6711 }, { "epoch": 0.42, "grad_norm": 1.407240901376062, "learning_rate": 6.482185058348556e-06, "loss": 0.3038, "step": 6712 }, { "epoch": 0.42, "grad_norm": 1.7259059175476872, "learning_rate": 6.4812123291614005e-06, "loss": 0.2901, "step": 6713 }, { "epoch": 0.42, "grad_norm": 2.816169264353281, "learning_rate": 6.480239538516114e-06, "loss": 0.2976, "step": 6714 }, { "epoch": 0.42, "grad_norm": 1.8517276184680693, "learning_rate": 6.47926668645306e-06, "loss": 0.2997, "step": 6715 }, { "epoch": 0.42, "grad_norm": 2.155593367474794, "learning_rate": 6.478293773012603e-06, "loss": 0.305, "step": 6716 }, { "epoch": 0.42, "grad_norm": 1.7211834035333056, "learning_rate": 6.477320798235112e-06, "loss": 0.2883, "step": 6717 }, { "epoch": 0.42, "grad_norm": 2.5938584569789214, "learning_rate": 6.476347762160957e-06, "loss": 0.3108, "step": 6718 }, { "epoch": 0.42, "grad_norm": 2.8052742745153534, "learning_rate": 6.475374664830512e-06, "loss": 0.2925, "step": 6719 }, { "epoch": 0.42, "grad_norm": 3.2814627037738258, "learning_rate": 6.47440150628415e-06, "loss": 0.297, "step": 6720 }, { "epoch": 0.42, "grad_norm": 2.5331285415764486, "learning_rate": 6.473428286562251e-06, "loss": 0.2936, "step": 6721 }, { "epoch": 0.42, "grad_norm": 1.916705578993107, "learning_rate": 6.472455005705197e-06, "loss": 0.2896, "step": 6722 }, { "epoch": 0.42, "grad_norm": 15.634617436509288, "learning_rate": 6.471481663753367e-06, "loss": 0.288, "step": 6723 }, { "epoch": 0.42, "grad_norm": 3.1405479001930976, "learning_rate": 6.47050826074715e-06, "loss": 0.3104, "step": 6724 }, { "epoch": 0.42, "grad_norm": 1.829690751521165, "learning_rate": 6.469534796726934e-06, "loss": 0.2899, "step": 6725 }, { "epoch": 0.42, "grad_norm": 4.493429523614867, "learning_rate": 6.4685612717331096e-06, "loss": 0.2882, "step": 6726 }, { "epoch": 0.42, "grad_norm": 1.476047469509268, "learning_rate": 6.467587685806067e-06, "loss": 0.2847, "step": 6727 }, { "epoch": 0.42, "grad_norm": 2.330666995233044, "learning_rate": 6.466614038986208e-06, "loss": 0.2914, "step": 6728 }, { "epoch": 0.42, "grad_norm": 3.179439580181806, "learning_rate": 6.465640331313925e-06, "loss": 0.3025, "step": 6729 }, { "epoch": 0.42, "grad_norm": 1.7500517706162204, "learning_rate": 6.464666562829624e-06, "loss": 0.2953, "step": 6730 }, { "epoch": 0.42, "grad_norm": 7.3990291736926075, "learning_rate": 6.4636927335737025e-06, "loss": 0.2996, "step": 6731 }, { "epoch": 0.42, "grad_norm": 2.036953456502084, "learning_rate": 6.462718843586572e-06, "loss": 0.3033, "step": 6732 }, { "epoch": 0.42, "grad_norm": 0.6268336755904357, "learning_rate": 6.461744892908637e-06, "loss": 0.4836, "step": 6733 }, { "epoch": 0.42, "grad_norm": 1.7624383530294316, "learning_rate": 6.460770881580311e-06, "loss": 0.2899, "step": 6734 }, { "epoch": 0.42, "grad_norm": 2.6027756428985964, "learning_rate": 6.4597968096420045e-06, "loss": 0.2991, "step": 6735 }, { "epoch": 0.42, "grad_norm": 2.1245425071428894, "learning_rate": 6.4588226771341386e-06, "loss": 0.3101, "step": 6736 }, { "epoch": 0.42, "grad_norm": 1.5774516435729113, "learning_rate": 6.457848484097128e-06, "loss": 0.2938, "step": 6737 }, { "epoch": 0.42, "grad_norm": 2.307916791625838, "learning_rate": 6.456874230571393e-06, "loss": 0.3357, "step": 6738 }, { "epoch": 0.42, "grad_norm": 1.5779917268889785, "learning_rate": 6.455899916597359e-06, "loss": 0.2867, "step": 6739 }, { "epoch": 0.42, "grad_norm": 1.688366142488379, "learning_rate": 6.4549255422154525e-06, "loss": 0.2887, "step": 6740 }, { "epoch": 0.42, "grad_norm": 1.9617952338691722, "learning_rate": 6.4539511074660995e-06, "loss": 0.3096, "step": 6741 }, { "epoch": 0.42, "grad_norm": 3.260581858861846, "learning_rate": 6.452976612389733e-06, "loss": 0.3243, "step": 6742 }, { "epoch": 0.42, "grad_norm": 2.004827513471801, "learning_rate": 6.452002057026786e-06, "loss": 0.2999, "step": 6743 }, { "epoch": 0.42, "grad_norm": 1.6405049139436518, "learning_rate": 6.451027441417696e-06, "loss": 0.2742, "step": 6744 }, { "epoch": 0.42, "grad_norm": 2.6131278877084996, "learning_rate": 6.4500527656028985e-06, "loss": 0.2981, "step": 6745 }, { "epoch": 0.42, "grad_norm": 1.9106438014988696, "learning_rate": 6.449078029622837e-06, "loss": 0.2896, "step": 6746 }, { "epoch": 0.42, "grad_norm": 1.8401447823120518, "learning_rate": 6.448103233517954e-06, "loss": 0.3143, "step": 6747 }, { "epoch": 0.42, "grad_norm": 1.879439296180402, "learning_rate": 6.447128377328695e-06, "loss": 0.2922, "step": 6748 }, { "epoch": 0.42, "grad_norm": 2.279784983777409, "learning_rate": 6.4461534610955104e-06, "loss": 0.2867, "step": 6749 }, { "epoch": 0.42, "grad_norm": 2.746662266839023, "learning_rate": 6.445178484858849e-06, "loss": 0.2913, "step": 6750 }, { "epoch": 0.42, "grad_norm": 2.671399715811076, "learning_rate": 6.444203448659165e-06, "loss": 0.3099, "step": 6751 }, { "epoch": 0.42, "grad_norm": 2.0312387518092354, "learning_rate": 6.443228352536918e-06, "loss": 0.2873, "step": 6752 }, { "epoch": 0.42, "grad_norm": 1.9592013263028545, "learning_rate": 6.4422531965325594e-06, "loss": 0.2914, "step": 6753 }, { "epoch": 0.42, "grad_norm": 2.6378974765143877, "learning_rate": 6.441277980686556e-06, "loss": 0.3196, "step": 6754 }, { "epoch": 0.42, "grad_norm": 19.90402098774003, "learning_rate": 6.4403027050393675e-06, "loss": 0.3007, "step": 6755 }, { "epoch": 0.42, "grad_norm": 2.739745636095288, "learning_rate": 6.439327369631464e-06, "loss": 0.2911, "step": 6756 }, { "epoch": 0.42, "grad_norm": 1.7499854498849223, "learning_rate": 6.438351974503309e-06, "loss": 0.3105, "step": 6757 }, { "epoch": 0.43, "grad_norm": 2.36591451892812, "learning_rate": 6.437376519695376e-06, "loss": 0.3302, "step": 6758 }, { "epoch": 0.43, "grad_norm": 2.7624095080741506, "learning_rate": 6.436401005248139e-06, "loss": 0.2895, "step": 6759 }, { "epoch": 0.43, "grad_norm": 3.714746600818347, "learning_rate": 6.435425431202074e-06, "loss": 0.2996, "step": 6760 }, { "epoch": 0.43, "grad_norm": 2.379943254084911, "learning_rate": 6.434449797597657e-06, "loss": 0.2836, "step": 6761 }, { "epoch": 0.43, "grad_norm": 0.6241433111758001, "learning_rate": 6.433474104475369e-06, "loss": 0.4965, "step": 6762 }, { "epoch": 0.43, "grad_norm": 1.5604552983852458, "learning_rate": 6.432498351875696e-06, "loss": 0.2715, "step": 6763 }, { "epoch": 0.43, "grad_norm": 4.704246886300958, "learning_rate": 6.43152253983912e-06, "loss": 0.2903, "step": 6764 }, { "epoch": 0.43, "grad_norm": 1.7847322408734418, "learning_rate": 6.430546668406133e-06, "loss": 0.2985, "step": 6765 }, { "epoch": 0.43, "grad_norm": 3.539045795134168, "learning_rate": 6.429570737617223e-06, "loss": 0.2816, "step": 6766 }, { "epoch": 0.43, "grad_norm": 2.8569052860052397, "learning_rate": 6.428594747512884e-06, "loss": 0.2994, "step": 6767 }, { "epoch": 0.43, "grad_norm": 1.9623127045136122, "learning_rate": 6.427618698133612e-06, "loss": 0.2873, "step": 6768 }, { "epoch": 0.43, "grad_norm": 3.505181360047184, "learning_rate": 6.4266425895199036e-06, "loss": 0.2754, "step": 6769 }, { "epoch": 0.43, "grad_norm": 8.008662784013174, "learning_rate": 6.425666421712263e-06, "loss": 0.3048, "step": 6770 }, { "epoch": 0.43, "grad_norm": 1.7449333988077806, "learning_rate": 6.424690194751186e-06, "loss": 0.3042, "step": 6771 }, { "epoch": 0.43, "grad_norm": 5.11664783106136, "learning_rate": 6.423713908677185e-06, "loss": 0.309, "step": 6772 }, { "epoch": 0.43, "grad_norm": 2.3902479893888047, "learning_rate": 6.422737563530763e-06, "loss": 0.3006, "step": 6773 }, { "epoch": 0.43, "grad_norm": 1.6467051612233414, "learning_rate": 6.4217611593524355e-06, "loss": 0.2875, "step": 6774 }, { "epoch": 0.43, "grad_norm": 2.8336430361345313, "learning_rate": 6.420784696182709e-06, "loss": 0.3061, "step": 6775 }, { "epoch": 0.43, "grad_norm": 3.7093595735481975, "learning_rate": 6.419808174062103e-06, "loss": 0.2974, "step": 6776 }, { "epoch": 0.43, "grad_norm": 2.0017537665334055, "learning_rate": 6.418831593031134e-06, "loss": 0.3129, "step": 6777 }, { "epoch": 0.43, "grad_norm": 3.0562628003301255, "learning_rate": 6.417854953130323e-06, "loss": 0.2976, "step": 6778 }, { "epoch": 0.43, "grad_norm": 8.636640367023912, "learning_rate": 6.416878254400191e-06, "loss": 0.2958, "step": 6779 }, { "epoch": 0.43, "grad_norm": 1.6510810730970376, "learning_rate": 6.4159014968812634e-06, "loss": 0.2977, "step": 6780 }, { "epoch": 0.43, "grad_norm": 1.8031167414175482, "learning_rate": 6.4149246806140675e-06, "loss": 0.3017, "step": 6781 }, { "epoch": 0.43, "grad_norm": 1.480695841073619, "learning_rate": 6.413947805639136e-06, "loss": 0.2872, "step": 6782 }, { "epoch": 0.43, "grad_norm": 2.5016659168010187, "learning_rate": 6.412970871996995e-06, "loss": 0.2858, "step": 6783 }, { "epoch": 0.43, "grad_norm": 1.9446304107640044, "learning_rate": 6.411993879728184e-06, "loss": 0.2937, "step": 6784 }, { "epoch": 0.43, "grad_norm": 2.314690855240622, "learning_rate": 6.411016828873239e-06, "loss": 0.2796, "step": 6785 }, { "epoch": 0.43, "grad_norm": 0.5776542150995976, "learning_rate": 6.4100397194727005e-06, "loss": 0.4739, "step": 6786 }, { "epoch": 0.43, "grad_norm": 10.40466950989219, "learning_rate": 6.409062551567109e-06, "loss": 0.3025, "step": 6787 }, { "epoch": 0.43, "grad_norm": 3.6651469506478778, "learning_rate": 6.4080853251970086e-06, "loss": 0.2947, "step": 6788 }, { "epoch": 0.43, "grad_norm": 2.1833000065185026, "learning_rate": 6.4071080404029475e-06, "loss": 0.2975, "step": 6789 }, { "epoch": 0.43, "grad_norm": 3.8783876707876814, "learning_rate": 6.4061306972254745e-06, "loss": 0.3256, "step": 6790 }, { "epoch": 0.43, "grad_norm": 1.6929911454243527, "learning_rate": 6.405153295705142e-06, "loss": 0.3157, "step": 6791 }, { "epoch": 0.43, "grad_norm": 2.0992830830375038, "learning_rate": 6.404175835882503e-06, "loss": 0.3031, "step": 6792 }, { "epoch": 0.43, "grad_norm": 1.6012424203390276, "learning_rate": 6.403198317798113e-06, "loss": 0.2832, "step": 6793 }, { "epoch": 0.43, "grad_norm": 2.1412361975886807, "learning_rate": 6.402220741492533e-06, "loss": 0.3084, "step": 6794 }, { "epoch": 0.43, "grad_norm": 1.8586675520090896, "learning_rate": 6.401243107006325e-06, "loss": 0.2929, "step": 6795 }, { "epoch": 0.43, "grad_norm": 1.5001244380557788, "learning_rate": 6.4002654143800515e-06, "loss": 0.2797, "step": 6796 }, { "epoch": 0.43, "grad_norm": 3.8021882489270977, "learning_rate": 6.399287663654279e-06, "loss": 0.3144, "step": 6797 }, { "epoch": 0.43, "grad_norm": 1.561296588065728, "learning_rate": 6.398309854869574e-06, "loss": 0.2905, "step": 6798 }, { "epoch": 0.43, "grad_norm": 2.6822116196263686, "learning_rate": 6.397331988066512e-06, "loss": 0.3022, "step": 6799 }, { "epoch": 0.43, "grad_norm": 2.59133152600341, "learning_rate": 6.396354063285662e-06, "loss": 0.321, "step": 6800 }, { "epoch": 0.43, "grad_norm": 3.7238356491332714, "learning_rate": 6.395376080567602e-06, "loss": 0.2834, "step": 6801 }, { "epoch": 0.43, "grad_norm": 2.8237679990774085, "learning_rate": 6.394398039952911e-06, "loss": 0.2856, "step": 6802 }, { "epoch": 0.43, "grad_norm": 3.0978506014726537, "learning_rate": 6.3934199414821674e-06, "loss": 0.2922, "step": 6803 }, { "epoch": 0.43, "grad_norm": 2.6291710724231745, "learning_rate": 6.392441785195956e-06, "loss": 0.2884, "step": 6804 }, { "epoch": 0.43, "grad_norm": 2.6154631905494288, "learning_rate": 6.391463571134862e-06, "loss": 0.3206, "step": 6805 }, { "epoch": 0.43, "grad_norm": 2.8472115736003967, "learning_rate": 6.390485299339473e-06, "loss": 0.3124, "step": 6806 }, { "epoch": 0.43, "grad_norm": 2.049227778719141, "learning_rate": 6.389506969850378e-06, "loss": 0.3071, "step": 6807 }, { "epoch": 0.43, "grad_norm": 6.852184541794767, "learning_rate": 6.3885285827081725e-06, "loss": 0.2909, "step": 6808 }, { "epoch": 0.43, "grad_norm": 4.210604253205481, "learning_rate": 6.3875501379534486e-06, "loss": 0.2947, "step": 6809 }, { "epoch": 0.43, "grad_norm": 4.195088755565158, "learning_rate": 6.386571635626804e-06, "loss": 0.3426, "step": 6810 }, { "epoch": 0.43, "grad_norm": 2.254386928581936, "learning_rate": 6.3855930757688415e-06, "loss": 0.2929, "step": 6811 }, { "epoch": 0.43, "grad_norm": 8.427413066891106, "learning_rate": 6.38461445842016e-06, "loss": 0.3237, "step": 6812 }, { "epoch": 0.43, "grad_norm": 1.479301294544089, "learning_rate": 6.383635783621365e-06, "loss": 0.3242, "step": 6813 }, { "epoch": 0.43, "grad_norm": 4.4322326984415525, "learning_rate": 6.382657051413063e-06, "loss": 0.3136, "step": 6814 }, { "epoch": 0.43, "grad_norm": 2.673907078170811, "learning_rate": 6.3816782618358666e-06, "loss": 0.2763, "step": 6815 }, { "epoch": 0.43, "grad_norm": 1.5224796805783334, "learning_rate": 6.380699414930385e-06, "loss": 0.2943, "step": 6816 }, { "epoch": 0.43, "grad_norm": 1.9402496872929773, "learning_rate": 6.37972051073723e-06, "loss": 0.2899, "step": 6817 }, { "epoch": 0.43, "grad_norm": 1.8635711574000435, "learning_rate": 6.378741549297021e-06, "loss": 0.2881, "step": 6818 }, { "epoch": 0.43, "grad_norm": 1.8063934042696637, "learning_rate": 6.377762530650375e-06, "loss": 0.3006, "step": 6819 }, { "epoch": 0.43, "grad_norm": 3.094089157008058, "learning_rate": 6.376783454837916e-06, "loss": 0.3159, "step": 6820 }, { "epoch": 0.43, "grad_norm": 1.7428949729006693, "learning_rate": 6.375804321900267e-06, "loss": 0.2843, "step": 6821 }, { "epoch": 0.43, "grad_norm": 1.7019580540891321, "learning_rate": 6.3748251318780514e-06, "loss": 0.2909, "step": 6822 }, { "epoch": 0.43, "grad_norm": 1.6504712380753666, "learning_rate": 6.3738458848119e-06, "loss": 0.2845, "step": 6823 }, { "epoch": 0.43, "grad_norm": 1.885256138973961, "learning_rate": 6.372866580742442e-06, "loss": 0.2956, "step": 6824 }, { "epoch": 0.43, "grad_norm": 2.5142776791261525, "learning_rate": 6.371887219710312e-06, "loss": 0.2929, "step": 6825 }, { "epoch": 0.43, "grad_norm": 1.5168769328775624, "learning_rate": 6.370907801756143e-06, "loss": 0.2959, "step": 6826 }, { "epoch": 0.43, "grad_norm": 1.6316484169195389, "learning_rate": 6.369928326920575e-06, "loss": 0.2942, "step": 6827 }, { "epoch": 0.43, "grad_norm": 1.365074744515247, "learning_rate": 6.368948795244247e-06, "loss": 0.2868, "step": 6828 }, { "epoch": 0.43, "grad_norm": 3.8919705280839447, "learning_rate": 6.367969206767803e-06, "loss": 0.3115, "step": 6829 }, { "epoch": 0.43, "grad_norm": 1.8577146692649886, "learning_rate": 6.366989561531887e-06, "loss": 0.28, "step": 6830 }, { "epoch": 0.43, "grad_norm": 1.27404396869779, "learning_rate": 6.3660098595771445e-06, "loss": 0.3005, "step": 6831 }, { "epoch": 0.43, "grad_norm": 1.7231158169295044, "learning_rate": 6.365030100944227e-06, "loss": 0.2921, "step": 6832 }, { "epoch": 0.43, "grad_norm": 2.8452785989556784, "learning_rate": 6.364050285673788e-06, "loss": 0.3021, "step": 6833 }, { "epoch": 0.43, "grad_norm": 2.034796840341878, "learning_rate": 6.363070413806478e-06, "loss": 0.3077, "step": 6834 }, { "epoch": 0.43, "grad_norm": 2.0652641545771186, "learning_rate": 6.362090485382956e-06, "loss": 0.29, "step": 6835 }, { "epoch": 0.43, "grad_norm": 1.953695334345325, "learning_rate": 6.361110500443879e-06, "loss": 0.2861, "step": 6836 }, { "epoch": 0.43, "grad_norm": 4.583631036849745, "learning_rate": 6.360130459029912e-06, "loss": 0.3059, "step": 6837 }, { "epoch": 0.43, "grad_norm": 1.4901758759164343, "learning_rate": 6.3591503611817155e-06, "loss": 0.3115, "step": 6838 }, { "epoch": 0.43, "grad_norm": 2.829612896761817, "learning_rate": 6.358170206939955e-06, "loss": 0.3155, "step": 6839 }, { "epoch": 0.43, "grad_norm": 1.8677532522239744, "learning_rate": 6.357189996345302e-06, "loss": 0.2933, "step": 6840 }, { "epoch": 0.43, "grad_norm": 4.431188282024258, "learning_rate": 6.356209729438425e-06, "loss": 0.3105, "step": 6841 }, { "epoch": 0.43, "grad_norm": 3.2546802077530046, "learning_rate": 6.3552294062599975e-06, "loss": 0.3002, "step": 6842 }, { "epoch": 0.43, "grad_norm": 2.2200722143856417, "learning_rate": 6.354249026850694e-06, "loss": 0.3055, "step": 6843 }, { "epoch": 0.43, "grad_norm": 2.5011573402567864, "learning_rate": 6.3532685912511934e-06, "loss": 0.2855, "step": 6844 }, { "epoch": 0.43, "grad_norm": 2.2676207427517316, "learning_rate": 6.352288099502175e-06, "loss": 0.299, "step": 6845 }, { "epoch": 0.43, "grad_norm": 2.963298980139748, "learning_rate": 6.351307551644322e-06, "loss": 0.3154, "step": 6846 }, { "epoch": 0.43, "grad_norm": 2.4566492371647595, "learning_rate": 6.350326947718319e-06, "loss": 0.3119, "step": 6847 }, { "epoch": 0.43, "grad_norm": 1.859494361893289, "learning_rate": 6.3493462877648515e-06, "loss": 0.3009, "step": 6848 }, { "epoch": 0.43, "grad_norm": 8.36861430609139, "learning_rate": 6.348365571824611e-06, "loss": 0.2866, "step": 6849 }, { "epoch": 0.43, "grad_norm": 1.8902335631050846, "learning_rate": 6.3473847999382855e-06, "loss": 0.3038, "step": 6850 }, { "epoch": 0.43, "grad_norm": 2.148905293926362, "learning_rate": 6.346403972146574e-06, "loss": 0.3024, "step": 6851 }, { "epoch": 0.43, "grad_norm": 0.6120394598504009, "learning_rate": 6.345423088490169e-06, "loss": 0.5123, "step": 6852 }, { "epoch": 0.43, "grad_norm": 2.936424933189742, "learning_rate": 6.344442149009771e-06, "loss": 0.2978, "step": 6853 }, { "epoch": 0.43, "grad_norm": 1.4006229347917698, "learning_rate": 6.343461153746079e-06, "loss": 0.2899, "step": 6854 }, { "epoch": 0.43, "grad_norm": 3.043274006634977, "learning_rate": 6.3424801027397984e-06, "loss": 0.3094, "step": 6855 }, { "epoch": 0.43, "grad_norm": 1.3576158863718646, "learning_rate": 6.3414989960316345e-06, "loss": 0.2981, "step": 6856 }, { "epoch": 0.43, "grad_norm": 7.41163007617088, "learning_rate": 6.340517833662293e-06, "loss": 0.2937, "step": 6857 }, { "epoch": 0.43, "grad_norm": 1.530216208151628, "learning_rate": 6.339536615672486e-06, "loss": 0.2888, "step": 6858 }, { "epoch": 0.43, "grad_norm": 2.315749199744174, "learning_rate": 6.338555342102927e-06, "loss": 0.2903, "step": 6859 }, { "epoch": 0.43, "grad_norm": 3.2305350401404063, "learning_rate": 6.337574012994327e-06, "loss": 0.3106, "step": 6860 }, { "epoch": 0.43, "grad_norm": 2.6948606536470603, "learning_rate": 6.336592628387407e-06, "loss": 0.3062, "step": 6861 }, { "epoch": 0.43, "grad_norm": 2.6648834760085593, "learning_rate": 6.335611188322883e-06, "loss": 0.3025, "step": 6862 }, { "epoch": 0.43, "grad_norm": 2.526308257063972, "learning_rate": 6.334629692841481e-06, "loss": 0.2867, "step": 6863 }, { "epoch": 0.43, "grad_norm": 2.4595612981056614, "learning_rate": 6.333648141983921e-06, "loss": 0.3038, "step": 6864 }, { "epoch": 0.43, "grad_norm": 2.71043145840467, "learning_rate": 6.33266653579093e-06, "loss": 0.2847, "step": 6865 }, { "epoch": 0.43, "grad_norm": 1.5690542517115196, "learning_rate": 6.3316848743032385e-06, "loss": 0.3124, "step": 6866 }, { "epoch": 0.43, "grad_norm": 2.705659968142546, "learning_rate": 6.3307031575615775e-06, "loss": 0.292, "step": 6867 }, { "epoch": 0.43, "grad_norm": 1.960419911846101, "learning_rate": 6.329721385606676e-06, "loss": 0.2902, "step": 6868 }, { "epoch": 0.43, "grad_norm": 1.4939959391311788, "learning_rate": 6.328739558479275e-06, "loss": 0.3122, "step": 6869 }, { "epoch": 0.43, "grad_norm": 1.9057049387463765, "learning_rate": 6.3277576762201074e-06, "loss": 0.332, "step": 6870 }, { "epoch": 0.43, "grad_norm": 2.010189182878927, "learning_rate": 6.326775738869917e-06, "loss": 0.2978, "step": 6871 }, { "epoch": 0.43, "grad_norm": 3.6184330075214888, "learning_rate": 6.325793746469443e-06, "loss": 0.3035, "step": 6872 }, { "epoch": 0.43, "grad_norm": 1.5469203734541837, "learning_rate": 6.324811699059433e-06, "loss": 0.2852, "step": 6873 }, { "epoch": 0.43, "grad_norm": 1.6770697859282753, "learning_rate": 6.323829596680633e-06, "loss": 0.2777, "step": 6874 }, { "epoch": 0.43, "grad_norm": 1.5636152530996292, "learning_rate": 6.322847439373792e-06, "loss": 0.2983, "step": 6875 }, { "epoch": 0.43, "grad_norm": 2.6705672976398467, "learning_rate": 6.321865227179658e-06, "loss": 0.2815, "step": 6876 }, { "epoch": 0.43, "grad_norm": 1.5980499810557056, "learning_rate": 6.3208829601389896e-06, "loss": 0.2957, "step": 6877 }, { "epoch": 0.43, "grad_norm": 2.736204496034844, "learning_rate": 6.319900638292541e-06, "loss": 0.2895, "step": 6878 }, { "epoch": 0.43, "grad_norm": 1.7046260857418942, "learning_rate": 6.318918261681072e-06, "loss": 0.2889, "step": 6879 }, { "epoch": 0.43, "grad_norm": 1.848262816668194, "learning_rate": 6.3179358303453386e-06, "loss": 0.2919, "step": 6880 }, { "epoch": 0.43, "grad_norm": 28.675346970086203, "learning_rate": 6.3169533443261085e-06, "loss": 0.3188, "step": 6881 }, { "epoch": 0.43, "grad_norm": 1.5106934754378571, "learning_rate": 6.315970803664145e-06, "loss": 0.2992, "step": 6882 }, { "epoch": 0.43, "grad_norm": 1.4680564361738777, "learning_rate": 6.314988208400215e-06, "loss": 0.2846, "step": 6883 }, { "epoch": 0.43, "grad_norm": 3.740275830738221, "learning_rate": 6.314005558575089e-06, "loss": 0.3061, "step": 6884 }, { "epoch": 0.43, "grad_norm": 2.59199399670399, "learning_rate": 6.313022854229539e-06, "loss": 0.2955, "step": 6885 }, { "epoch": 0.43, "grad_norm": 0.6104358404222554, "learning_rate": 6.312040095404337e-06, "loss": 0.4985, "step": 6886 }, { "epoch": 0.43, "grad_norm": 2.0916533426754236, "learning_rate": 6.311057282140261e-06, "loss": 0.2856, "step": 6887 }, { "epoch": 0.43, "grad_norm": 1.7000841777319762, "learning_rate": 6.310074414478091e-06, "loss": 0.3012, "step": 6888 }, { "epoch": 0.43, "grad_norm": 1.6799428434908734, "learning_rate": 6.309091492458608e-06, "loss": 0.2927, "step": 6889 }, { "epoch": 0.43, "grad_norm": 5.172910443727807, "learning_rate": 6.308108516122591e-06, "loss": 0.3175, "step": 6890 }, { "epoch": 0.43, "grad_norm": 2.277465820846939, "learning_rate": 6.307125485510829e-06, "loss": 0.2921, "step": 6891 }, { "epoch": 0.43, "grad_norm": 2.569782192125306, "learning_rate": 6.306142400664108e-06, "loss": 0.2996, "step": 6892 }, { "epoch": 0.43, "grad_norm": 2.142878579832889, "learning_rate": 6.30515926162322e-06, "loss": 0.2881, "step": 6893 }, { "epoch": 0.43, "grad_norm": 1.5730768397650798, "learning_rate": 6.304176068428957e-06, "loss": 0.2816, "step": 6894 }, { "epoch": 0.43, "grad_norm": 2.7981238689247325, "learning_rate": 6.30319282112211e-06, "loss": 0.2968, "step": 6895 }, { "epoch": 0.43, "grad_norm": 1.5272437486330543, "learning_rate": 6.30220951974348e-06, "loss": 0.2975, "step": 6896 }, { "epoch": 0.43, "grad_norm": 3.5952759735211863, "learning_rate": 6.3012261643338635e-06, "loss": 0.2968, "step": 6897 }, { "epoch": 0.43, "grad_norm": 1.7209101307714316, "learning_rate": 6.3002427549340615e-06, "loss": 0.285, "step": 6898 }, { "epoch": 0.43, "grad_norm": 1.5355351233576975, "learning_rate": 6.299259291584879e-06, "loss": 0.3108, "step": 6899 }, { "epoch": 0.43, "grad_norm": 2.328437310208531, "learning_rate": 6.298275774327121e-06, "loss": 0.3472, "step": 6900 }, { "epoch": 0.43, "grad_norm": 2.6083503246623474, "learning_rate": 6.297292203201595e-06, "loss": 0.2988, "step": 6901 }, { "epoch": 0.43, "grad_norm": 2.9392954330231182, "learning_rate": 6.296308578249109e-06, "loss": 0.3219, "step": 6902 }, { "epoch": 0.43, "grad_norm": 0.639787763238278, "learning_rate": 6.29532489951048e-06, "loss": 0.5236, "step": 6903 }, { "epoch": 0.43, "grad_norm": 5.932409822949063, "learning_rate": 6.29434116702652e-06, "loss": 0.3096, "step": 6904 }, { "epoch": 0.43, "grad_norm": 4.038587484729636, "learning_rate": 6.293357380838046e-06, "loss": 0.3124, "step": 6905 }, { "epoch": 0.43, "grad_norm": 0.6437779728204444, "learning_rate": 6.292373540985875e-06, "loss": 0.544, "step": 6906 }, { "epoch": 0.43, "grad_norm": 1.4183458562866678, "learning_rate": 6.291389647510833e-06, "loss": 0.2946, "step": 6907 }, { "epoch": 0.43, "grad_norm": 3.5418600018117234, "learning_rate": 6.2904057004537385e-06, "loss": 0.3101, "step": 6908 }, { "epoch": 0.43, "grad_norm": 1.7109504249161431, "learning_rate": 6.2894216998554215e-06, "loss": 0.2858, "step": 6909 }, { "epoch": 0.43, "grad_norm": 2.8419552482693984, "learning_rate": 6.288437645756706e-06, "loss": 0.2864, "step": 6910 }, { "epoch": 0.43, "grad_norm": 1.85911169554198, "learning_rate": 6.287453538198426e-06, "loss": 0.311, "step": 6911 }, { "epoch": 0.43, "grad_norm": 2.5054704968118577, "learning_rate": 6.28646937722141e-06, "loss": 0.2945, "step": 6912 }, { "epoch": 0.43, "grad_norm": 3.157171929352471, "learning_rate": 6.285485162866496e-06, "loss": 0.2975, "step": 6913 }, { "epoch": 0.43, "grad_norm": 3.093106304030806, "learning_rate": 6.284500895174518e-06, "loss": 0.2969, "step": 6914 }, { "epoch": 0.43, "grad_norm": 2.951128097908572, "learning_rate": 6.283516574186318e-06, "loss": 0.3002, "step": 6915 }, { "epoch": 0.43, "grad_norm": 2.7873416091672234, "learning_rate": 6.282532199942735e-06, "loss": 0.308, "step": 6916 }, { "epoch": 0.44, "grad_norm": 3.5250801150628495, "learning_rate": 6.281547772484612e-06, "loss": 0.3029, "step": 6917 }, { "epoch": 0.44, "grad_norm": 2.3239861226448197, "learning_rate": 6.280563291852796e-06, "loss": 0.2981, "step": 6918 }, { "epoch": 0.44, "grad_norm": 1.896650528267158, "learning_rate": 6.279578758088135e-06, "loss": 0.2865, "step": 6919 }, { "epoch": 0.44, "grad_norm": 6.046720401907298, "learning_rate": 6.278594171231478e-06, "loss": 0.2971, "step": 6920 }, { "epoch": 0.44, "grad_norm": 4.107336850805136, "learning_rate": 6.277609531323678e-06, "loss": 0.3215, "step": 6921 }, { "epoch": 0.44, "grad_norm": 2.557701835265859, "learning_rate": 6.27662483840559e-06, "loss": 0.3184, "step": 6922 }, { "epoch": 0.44, "grad_norm": 1.8742361321090764, "learning_rate": 6.27564009251807e-06, "loss": 0.2952, "step": 6923 }, { "epoch": 0.44, "grad_norm": 2.2743838864681423, "learning_rate": 6.274655293701974e-06, "loss": 0.285, "step": 6924 }, { "epoch": 0.44, "grad_norm": 1.9320725595818318, "learning_rate": 6.273670441998169e-06, "loss": 0.2882, "step": 6925 }, { "epoch": 0.44, "grad_norm": 1.645404943097612, "learning_rate": 6.272685537447513e-06, "loss": 0.2664, "step": 6926 }, { "epoch": 0.44, "grad_norm": 2.2758878763486856, "learning_rate": 6.271700580090876e-06, "loss": 0.311, "step": 6927 }, { "epoch": 0.44, "grad_norm": 1.9482575278035108, "learning_rate": 6.270715569969119e-06, "loss": 0.3052, "step": 6928 }, { "epoch": 0.44, "grad_norm": 2.112387212977741, "learning_rate": 6.26973050712312e-06, "loss": 0.3033, "step": 6929 }, { "epoch": 0.44, "grad_norm": 2.7241744926684435, "learning_rate": 6.2687453915937445e-06, "loss": 0.3023, "step": 6930 }, { "epoch": 0.44, "grad_norm": 2.0555602708856138, "learning_rate": 6.267760223421871e-06, "loss": 0.2969, "step": 6931 }, { "epoch": 0.44, "grad_norm": 0.6248297912514691, "learning_rate": 6.266775002648373e-06, "loss": 0.5389, "step": 6932 }, { "epoch": 0.44, "grad_norm": 2.037528832629987, "learning_rate": 6.265789729314131e-06, "loss": 0.2989, "step": 6933 }, { "epoch": 0.44, "grad_norm": 3.6910461415496076, "learning_rate": 6.264804403460023e-06, "loss": 0.3171, "step": 6934 }, { "epoch": 0.44, "grad_norm": 3.1443824498795303, "learning_rate": 6.263819025126936e-06, "loss": 0.2882, "step": 6935 }, { "epoch": 0.44, "grad_norm": 2.0070005688932357, "learning_rate": 6.2628335943557505e-06, "loss": 0.2792, "step": 6936 }, { "epoch": 0.44, "grad_norm": 0.6577178825907559, "learning_rate": 6.261848111187358e-06, "loss": 0.5032, "step": 6937 }, { "epoch": 0.44, "grad_norm": 2.2880460372140314, "learning_rate": 6.260862575662645e-06, "loss": 0.311, "step": 6938 }, { "epoch": 0.44, "grad_norm": 3.2988816930377083, "learning_rate": 6.259876987822506e-06, "loss": 0.2874, "step": 6939 }, { "epoch": 0.44, "grad_norm": 6.303903898880498, "learning_rate": 6.258891347707829e-06, "loss": 0.2868, "step": 6940 }, { "epoch": 0.44, "grad_norm": 1.4554178697573767, "learning_rate": 6.257905655359519e-06, "loss": 0.2776, "step": 6941 }, { "epoch": 0.44, "grad_norm": 1.3989185717964019, "learning_rate": 6.256919910818466e-06, "loss": 0.3024, "step": 6942 }, { "epoch": 0.44, "grad_norm": 2.4713631313250355, "learning_rate": 6.255934114125574e-06, "loss": 0.2867, "step": 6943 }, { "epoch": 0.44, "grad_norm": 14.477810837095802, "learning_rate": 6.254948265321744e-06, "loss": 0.2911, "step": 6944 }, { "epoch": 0.44, "grad_norm": 1.9950190859446826, "learning_rate": 6.253962364447883e-06, "loss": 0.2826, "step": 6945 }, { "epoch": 0.44, "grad_norm": 3.5048232846671787, "learning_rate": 6.252976411544896e-06, "loss": 0.2893, "step": 6946 }, { "epoch": 0.44, "grad_norm": 2.81103356279454, "learning_rate": 6.251990406653691e-06, "loss": 0.3192, "step": 6947 }, { "epoch": 0.44, "grad_norm": 4.050149692739653, "learning_rate": 6.25100434981518e-06, "loss": 0.3046, "step": 6948 }, { "epoch": 0.44, "grad_norm": 2.911724694471854, "learning_rate": 6.250018241070278e-06, "loss": 0.2898, "step": 6949 }, { "epoch": 0.44, "grad_norm": 1.8625527564946367, "learning_rate": 6.249032080459898e-06, "loss": 0.296, "step": 6950 }, { "epoch": 0.44, "grad_norm": 12.71810994009719, "learning_rate": 6.248045868024958e-06, "loss": 0.2963, "step": 6951 }, { "epoch": 0.44, "grad_norm": 1.6184633602532053, "learning_rate": 6.247059603806379e-06, "loss": 0.287, "step": 6952 }, { "epoch": 0.44, "grad_norm": 2.176729576462319, "learning_rate": 6.246073287845083e-06, "loss": 0.2843, "step": 6953 }, { "epoch": 0.44, "grad_norm": 3.2516712664615466, "learning_rate": 6.24508692018199e-06, "loss": 0.2998, "step": 6954 }, { "epoch": 0.44, "grad_norm": 3.0138997086144292, "learning_rate": 6.2441005008580314e-06, "loss": 0.3118, "step": 6955 }, { "epoch": 0.44, "grad_norm": 1.3472168870737558, "learning_rate": 6.243114029914133e-06, "loss": 0.284, "step": 6956 }, { "epoch": 0.44, "grad_norm": 4.171887894936276, "learning_rate": 6.242127507391226e-06, "loss": 0.2945, "step": 6957 }, { "epoch": 0.44, "grad_norm": 1.359229503674128, "learning_rate": 6.241140933330241e-06, "loss": 0.2897, "step": 6958 }, { "epoch": 0.44, "grad_norm": 1.8023195721854788, "learning_rate": 6.240154307772115e-06, "loss": 0.2934, "step": 6959 }, { "epoch": 0.44, "grad_norm": 1.7584205824226065, "learning_rate": 6.2391676307577845e-06, "loss": 0.2928, "step": 6960 }, { "epoch": 0.44, "grad_norm": 2.3600575699249644, "learning_rate": 6.238180902328188e-06, "loss": 0.2819, "step": 6961 }, { "epoch": 0.44, "grad_norm": 1.928848767948197, "learning_rate": 6.237194122524264e-06, "loss": 0.2941, "step": 6962 }, { "epoch": 0.44, "grad_norm": 2.866835490502667, "learning_rate": 6.236207291386962e-06, "loss": 0.3024, "step": 6963 }, { "epoch": 0.44, "grad_norm": 2.4273734466129238, "learning_rate": 6.235220408957221e-06, "loss": 0.3057, "step": 6964 }, { "epoch": 0.44, "grad_norm": 2.862038667793041, "learning_rate": 6.234233475275994e-06, "loss": 0.2852, "step": 6965 }, { "epoch": 0.44, "grad_norm": 2.8983752565715197, "learning_rate": 6.233246490384224e-06, "loss": 0.2887, "step": 6966 }, { "epoch": 0.44, "grad_norm": 1.7021755181041338, "learning_rate": 6.23225945432287e-06, "loss": 0.2842, "step": 6967 }, { "epoch": 0.44, "grad_norm": 2.287035243422365, "learning_rate": 6.231272367132881e-06, "loss": 0.3123, "step": 6968 }, { "epoch": 0.44, "grad_norm": 2.945247621903697, "learning_rate": 6.230285228855215e-06, "loss": 0.2919, "step": 6969 }, { "epoch": 0.44, "grad_norm": 2.020562013384556, "learning_rate": 6.229298039530829e-06, "loss": 0.2861, "step": 6970 }, { "epoch": 0.44, "grad_norm": 2.0372056156931793, "learning_rate": 6.228310799200685e-06, "loss": 0.2948, "step": 6971 }, { "epoch": 0.44, "grad_norm": 1.912464584260742, "learning_rate": 6.227323507905743e-06, "loss": 0.2911, "step": 6972 }, { "epoch": 0.44, "grad_norm": 2.374210068437158, "learning_rate": 6.226336165686969e-06, "loss": 0.2929, "step": 6973 }, { "epoch": 0.44, "grad_norm": 4.135244812766038, "learning_rate": 6.225348772585329e-06, "loss": 0.3076, "step": 6974 }, { "epoch": 0.44, "grad_norm": 1.6406959607573017, "learning_rate": 6.224361328641794e-06, "loss": 0.2881, "step": 6975 }, { "epoch": 0.44, "grad_norm": 2.1404839011362387, "learning_rate": 6.2233738338973304e-06, "loss": 0.2953, "step": 6976 }, { "epoch": 0.44, "grad_norm": 1.649553312960678, "learning_rate": 6.222386288392914e-06, "loss": 0.307, "step": 6977 }, { "epoch": 0.44, "grad_norm": 2.513140057397445, "learning_rate": 6.2213986921695194e-06, "loss": 0.3009, "step": 6978 }, { "epoch": 0.44, "grad_norm": 2.273120614566552, "learning_rate": 6.220411045268124e-06, "loss": 0.2908, "step": 6979 }, { "epoch": 0.44, "grad_norm": 2.083398440398737, "learning_rate": 6.219423347729707e-06, "loss": 0.2883, "step": 6980 }, { "epoch": 0.44, "grad_norm": 2.8022818181447064, "learning_rate": 6.218435599595249e-06, "loss": 0.3016, "step": 6981 }, { "epoch": 0.44, "grad_norm": 2.715627016313357, "learning_rate": 6.217447800905733e-06, "loss": 0.2806, "step": 6982 }, { "epoch": 0.44, "grad_norm": 1.9147989051437198, "learning_rate": 6.216459951702146e-06, "loss": 0.2956, "step": 6983 }, { "epoch": 0.44, "grad_norm": 2.57260093166668, "learning_rate": 6.215472052025474e-06, "loss": 0.2859, "step": 6984 }, { "epoch": 0.44, "grad_norm": 2.6807125697290495, "learning_rate": 6.214484101916709e-06, "loss": 0.2853, "step": 6985 }, { "epoch": 0.44, "grad_norm": 1.712395827031748, "learning_rate": 6.21349610141684e-06, "loss": 0.3045, "step": 6986 }, { "epoch": 0.44, "grad_norm": 58.18177499566314, "learning_rate": 6.2125080505668645e-06, "loss": 0.3021, "step": 6987 }, { "epoch": 0.44, "grad_norm": 3.5855958975202866, "learning_rate": 6.2115199494077735e-06, "loss": 0.2961, "step": 6988 }, { "epoch": 0.44, "grad_norm": 2.754485949036334, "learning_rate": 6.210531797980571e-06, "loss": 0.3101, "step": 6989 }, { "epoch": 0.44, "grad_norm": 1.8998780353349038, "learning_rate": 6.2095435963262514e-06, "loss": 0.3012, "step": 6990 }, { "epoch": 0.44, "grad_norm": 2.6426158708593297, "learning_rate": 6.2085553444858225e-06, "loss": 0.2826, "step": 6991 }, { "epoch": 0.44, "grad_norm": 4.342512165812579, "learning_rate": 6.207567042500282e-06, "loss": 0.2853, "step": 6992 }, { "epoch": 0.44, "grad_norm": 2.042777282932608, "learning_rate": 6.206578690410643e-06, "loss": 0.277, "step": 6993 }, { "epoch": 0.44, "grad_norm": 1.6647666484805896, "learning_rate": 6.20559028825791e-06, "loss": 0.2834, "step": 6994 }, { "epoch": 0.44, "grad_norm": 1.5568308132031212, "learning_rate": 6.204601836083094e-06, "loss": 0.2961, "step": 6995 }, { "epoch": 0.44, "grad_norm": 3.0035566784926324, "learning_rate": 6.203613333927209e-06, "loss": 0.2787, "step": 6996 }, { "epoch": 0.44, "grad_norm": 1.3794714496850886, "learning_rate": 6.202624781831269e-06, "loss": 0.2754, "step": 6997 }, { "epoch": 0.44, "grad_norm": 2.683218158441574, "learning_rate": 6.20163617983629e-06, "loss": 0.303, "step": 6998 }, { "epoch": 0.44, "grad_norm": 2.316001094462051, "learning_rate": 6.200647527983292e-06, "loss": 0.2835, "step": 6999 }, { "epoch": 0.44, "grad_norm": 2.7957678371881527, "learning_rate": 6.199658826313295e-06, "loss": 0.3096, "step": 7000 }, { "epoch": 0.44, "grad_norm": 2.139733121461754, "learning_rate": 6.198670074867324e-06, "loss": 0.3103, "step": 7001 }, { "epoch": 0.44, "grad_norm": 1.4758325644328265, "learning_rate": 6.197681273686401e-06, "loss": 0.2833, "step": 7002 }, { "epoch": 0.44, "grad_norm": 5.786111703777581, "learning_rate": 6.196692422811554e-06, "loss": 0.3245, "step": 7003 }, { "epoch": 0.44, "grad_norm": 3.4195571136952108, "learning_rate": 6.195703522283813e-06, "loss": 0.304, "step": 7004 }, { "epoch": 0.44, "grad_norm": 2.368769143360143, "learning_rate": 6.194714572144212e-06, "loss": 0.3047, "step": 7005 }, { "epoch": 0.44, "grad_norm": 2.4142774733542174, "learning_rate": 6.193725572433779e-06, "loss": 0.3021, "step": 7006 }, { "epoch": 0.44, "grad_norm": 2.3621891793787175, "learning_rate": 6.192736523193551e-06, "loss": 0.3102, "step": 7007 }, { "epoch": 0.44, "grad_norm": 2.34851259692707, "learning_rate": 6.191747424464567e-06, "loss": 0.306, "step": 7008 }, { "epoch": 0.44, "grad_norm": 2.2927614161375742, "learning_rate": 6.1907582762878675e-06, "loss": 0.3111, "step": 7009 }, { "epoch": 0.44, "grad_norm": 2.2750354088498064, "learning_rate": 6.189769078704489e-06, "loss": 0.3084, "step": 7010 }, { "epoch": 0.44, "grad_norm": 10.013965366878603, "learning_rate": 6.188779831755479e-06, "loss": 0.3017, "step": 7011 }, { "epoch": 0.44, "grad_norm": 2.013557669244629, "learning_rate": 6.1877905354818825e-06, "loss": 0.3053, "step": 7012 }, { "epoch": 0.44, "grad_norm": 2.3582681305222066, "learning_rate": 6.186801189924748e-06, "loss": 0.2781, "step": 7013 }, { "epoch": 0.44, "grad_norm": 3.750226125509539, "learning_rate": 6.185811795125122e-06, "loss": 0.309, "step": 7014 }, { "epoch": 0.44, "grad_norm": 1.786131507164553, "learning_rate": 6.18482235112406e-06, "loss": 0.2924, "step": 7015 }, { "epoch": 0.44, "grad_norm": 1.8849568031186652, "learning_rate": 6.183832857962614e-06, "loss": 0.2992, "step": 7016 }, { "epoch": 0.44, "grad_norm": 2.856593453702831, "learning_rate": 6.18284331568184e-06, "loss": 0.3275, "step": 7017 }, { "epoch": 0.44, "grad_norm": 2.2274692600465977, "learning_rate": 6.181853724322795e-06, "loss": 0.2941, "step": 7018 }, { "epoch": 0.44, "grad_norm": 2.120222048101711, "learning_rate": 6.180864083926541e-06, "loss": 0.281, "step": 7019 }, { "epoch": 0.44, "grad_norm": 1.9364150542022918, "learning_rate": 6.179874394534138e-06, "loss": 0.3085, "step": 7020 }, { "epoch": 0.44, "grad_norm": 7.128182292913789, "learning_rate": 6.178884656186651e-06, "loss": 0.2761, "step": 7021 }, { "epoch": 0.44, "grad_norm": 2.6459217754761926, "learning_rate": 6.177894868925144e-06, "loss": 0.2888, "step": 7022 }, { "epoch": 0.44, "grad_norm": 5.379192067164824, "learning_rate": 6.17690503279069e-06, "loss": 0.306, "step": 7023 }, { "epoch": 0.44, "grad_norm": 2.3443047043338927, "learning_rate": 6.175915147824353e-06, "loss": 0.2965, "step": 7024 }, { "epoch": 0.44, "grad_norm": 2.9122964560256457, "learning_rate": 6.174925214067209e-06, "loss": 0.3075, "step": 7025 }, { "epoch": 0.44, "grad_norm": 1.7078695101143218, "learning_rate": 6.17393523156033e-06, "loss": 0.2822, "step": 7026 }, { "epoch": 0.44, "grad_norm": 3.4347276812654037, "learning_rate": 6.172945200344794e-06, "loss": 0.2816, "step": 7027 }, { "epoch": 0.44, "grad_norm": 2.052861955587367, "learning_rate": 6.171955120461679e-06, "loss": 0.2809, "step": 7028 }, { "epoch": 0.44, "grad_norm": 3.6833519806050177, "learning_rate": 6.170964991952063e-06, "loss": 0.3065, "step": 7029 }, { "epoch": 0.44, "grad_norm": 2.4351329321545907, "learning_rate": 6.169974814857029e-06, "loss": 0.2893, "step": 7030 }, { "epoch": 0.44, "grad_norm": 0.6541129622906818, "learning_rate": 6.168984589217665e-06, "loss": 0.4677, "step": 7031 }, { "epoch": 0.44, "grad_norm": 1.7955176642312072, "learning_rate": 6.167994315075051e-06, "loss": 0.3035, "step": 7032 }, { "epoch": 0.44, "grad_norm": 2.068203637383184, "learning_rate": 6.16700399247028e-06, "loss": 0.3021, "step": 7033 }, { "epoch": 0.44, "grad_norm": 1.5691572599151804, "learning_rate": 6.166013621444439e-06, "loss": 0.293, "step": 7034 }, { "epoch": 0.44, "grad_norm": 2.7879507597793993, "learning_rate": 6.165023202038623e-06, "loss": 0.2894, "step": 7035 }, { "epoch": 0.44, "grad_norm": 2.1276244932199386, "learning_rate": 6.164032734293923e-06, "loss": 0.2785, "step": 7036 }, { "epoch": 0.44, "grad_norm": 4.676875938315613, "learning_rate": 6.163042218251441e-06, "loss": 0.313, "step": 7037 }, { "epoch": 0.44, "grad_norm": 1.8636923280359605, "learning_rate": 6.162051653952268e-06, "loss": 0.2848, "step": 7038 }, { "epoch": 0.44, "grad_norm": 1.8956087264028727, "learning_rate": 6.16106104143751e-06, "loss": 0.3253, "step": 7039 }, { "epoch": 0.44, "grad_norm": 6.0370163004082045, "learning_rate": 6.160070380748266e-06, "loss": 0.3339, "step": 7040 }, { "epoch": 0.44, "grad_norm": 3.3509695033589586, "learning_rate": 6.159079671925643e-06, "loss": 0.2997, "step": 7041 }, { "epoch": 0.44, "grad_norm": 4.296569875515288, "learning_rate": 6.158088915010744e-06, "loss": 0.2879, "step": 7042 }, { "epoch": 0.44, "grad_norm": 0.6044115897038609, "learning_rate": 6.15709811004468e-06, "loss": 0.4974, "step": 7043 }, { "epoch": 0.44, "grad_norm": 1.8354352805137628, "learning_rate": 6.156107257068558e-06, "loss": 0.2906, "step": 7044 }, { "epoch": 0.44, "grad_norm": 3.2308941184507693, "learning_rate": 6.155116356123495e-06, "loss": 0.2823, "step": 7045 }, { "epoch": 0.44, "grad_norm": 1.911403847403431, "learning_rate": 6.154125407250602e-06, "loss": 0.29, "step": 7046 }, { "epoch": 0.44, "grad_norm": 2.47474944203617, "learning_rate": 6.153134410490995e-06, "loss": 0.2883, "step": 7047 }, { "epoch": 0.44, "grad_norm": 1.3271239921105271, "learning_rate": 6.152143365885794e-06, "loss": 0.2917, "step": 7048 }, { "epoch": 0.44, "grad_norm": 1.9133580788293076, "learning_rate": 6.151152273476118e-06, "loss": 0.2858, "step": 7049 }, { "epoch": 0.44, "grad_norm": 19.410731303828012, "learning_rate": 6.150161133303088e-06, "loss": 0.3041, "step": 7050 }, { "epoch": 0.44, "grad_norm": 3.48746589224747, "learning_rate": 6.149169945407832e-06, "loss": 0.2927, "step": 7051 }, { "epoch": 0.44, "grad_norm": 2.2059173948238775, "learning_rate": 6.1481787098314725e-06, "loss": 0.3055, "step": 7052 }, { "epoch": 0.44, "grad_norm": 2.0858129348336796, "learning_rate": 6.14718742661514e-06, "loss": 0.2933, "step": 7053 }, { "epoch": 0.44, "grad_norm": 2.4960797857699237, "learning_rate": 6.146196095799963e-06, "loss": 0.2749, "step": 7054 }, { "epoch": 0.44, "grad_norm": 4.904959603471647, "learning_rate": 6.145204717427073e-06, "loss": 0.291, "step": 7055 }, { "epoch": 0.44, "grad_norm": 2.2726555804621618, "learning_rate": 6.144213291537606e-06, "loss": 0.2991, "step": 7056 }, { "epoch": 0.44, "grad_norm": 1.5150798268392562, "learning_rate": 6.143221818172699e-06, "loss": 0.2961, "step": 7057 }, { "epoch": 0.44, "grad_norm": 5.619053693609467, "learning_rate": 6.142230297373486e-06, "loss": 0.299, "step": 7058 }, { "epoch": 0.44, "grad_norm": 3.5399819273112656, "learning_rate": 6.141238729181109e-06, "loss": 0.2867, "step": 7059 }, { "epoch": 0.44, "grad_norm": 2.4249270772240146, "learning_rate": 6.1402471136367115e-06, "loss": 0.2939, "step": 7060 }, { "epoch": 0.44, "grad_norm": 2.078587070086647, "learning_rate": 6.139255450781436e-06, "loss": 0.2876, "step": 7061 }, { "epoch": 0.44, "grad_norm": 1.5371946901276299, "learning_rate": 6.138263740656427e-06, "loss": 0.2646, "step": 7062 }, { "epoch": 0.44, "grad_norm": 2.154862380993804, "learning_rate": 6.137271983302834e-06, "loss": 0.312, "step": 7063 }, { "epoch": 0.44, "grad_norm": 1.2056076224871501, "learning_rate": 6.136280178761806e-06, "loss": 0.2954, "step": 7064 }, { "epoch": 0.44, "grad_norm": 3.7749927945554256, "learning_rate": 6.135288327074497e-06, "loss": 0.3172, "step": 7065 }, { "epoch": 0.44, "grad_norm": 1.8188161739814512, "learning_rate": 6.134296428282056e-06, "loss": 0.3134, "step": 7066 }, { "epoch": 0.44, "grad_norm": 3.233217469619812, "learning_rate": 6.1333044824256435e-06, "loss": 0.2885, "step": 7067 }, { "epoch": 0.44, "grad_norm": 1.975546975137299, "learning_rate": 6.132312489546414e-06, "loss": 0.2984, "step": 7068 }, { "epoch": 0.44, "grad_norm": 3.8913759033808324, "learning_rate": 6.131320449685529e-06, "loss": 0.2788, "step": 7069 }, { "epoch": 0.44, "grad_norm": 4.477800206370894, "learning_rate": 6.130328362884148e-06, "loss": 0.2956, "step": 7070 }, { "epoch": 0.44, "grad_norm": 0.6984344336266934, "learning_rate": 6.129336229183437e-06, "loss": 0.518, "step": 7071 }, { "epoch": 0.44, "grad_norm": 3.8583193101830235, "learning_rate": 6.128344048624558e-06, "loss": 0.3035, "step": 7072 }, { "epoch": 0.44, "grad_norm": 2.2840903028232784, "learning_rate": 6.1273518212486825e-06, "loss": 0.2788, "step": 7073 }, { "epoch": 0.44, "grad_norm": 4.548828807577619, "learning_rate": 6.126359547096975e-06, "loss": 0.2797, "step": 7074 }, { "epoch": 0.44, "grad_norm": 5.519022640767992, "learning_rate": 6.125367226210612e-06, "loss": 0.299, "step": 7075 }, { "epoch": 0.45, "grad_norm": 1.4147106372018525, "learning_rate": 6.124374858630762e-06, "loss": 0.2934, "step": 7076 }, { "epoch": 0.45, "grad_norm": 1.6580370895773484, "learning_rate": 6.123382444398603e-06, "loss": 0.2755, "step": 7077 }, { "epoch": 0.45, "grad_norm": 1.3587927852990556, "learning_rate": 6.12238998355531e-06, "loss": 0.2892, "step": 7078 }, { "epoch": 0.45, "grad_norm": 3.578125520614798, "learning_rate": 6.121397476142064e-06, "loss": 0.2821, "step": 7079 }, { "epoch": 0.45, "grad_norm": 2.038925348742282, "learning_rate": 6.1204049222000435e-06, "loss": 0.2789, "step": 7080 }, { "epoch": 0.45, "grad_norm": 3.3745016399428254, "learning_rate": 6.1194123217704336e-06, "loss": 0.2845, "step": 7081 }, { "epoch": 0.45, "grad_norm": 1.7100527980503495, "learning_rate": 6.118419674894418e-06, "loss": 0.2932, "step": 7082 }, { "epoch": 0.45, "grad_norm": 2.379610724171847, "learning_rate": 6.1174269816131845e-06, "loss": 0.3309, "step": 7083 }, { "epoch": 0.45, "grad_norm": 1.2657517778028917, "learning_rate": 6.11643424196792e-06, "loss": 0.295, "step": 7084 }, { "epoch": 0.45, "grad_norm": 5.733993237194969, "learning_rate": 6.115441455999816e-06, "loss": 0.2892, "step": 7085 }, { "epoch": 0.45, "grad_norm": 5.657008334448093, "learning_rate": 6.114448623750065e-06, "loss": 0.279, "step": 7086 }, { "epoch": 0.45, "grad_norm": 2.3213655311496555, "learning_rate": 6.113455745259861e-06, "loss": 0.3175, "step": 7087 }, { "epoch": 0.45, "grad_norm": 2.5280282867737696, "learning_rate": 6.112462820570399e-06, "loss": 0.3049, "step": 7088 }, { "epoch": 0.45, "grad_norm": 2.972235837101983, "learning_rate": 6.11146984972288e-06, "loss": 0.3146, "step": 7089 }, { "epoch": 0.45, "grad_norm": 0.5848359505232334, "learning_rate": 6.110476832758503e-06, "loss": 0.4673, "step": 7090 }, { "epoch": 0.45, "grad_norm": 2.6857764963936, "learning_rate": 6.10948376971847e-06, "loss": 0.2994, "step": 7091 }, { "epoch": 0.45, "grad_norm": 3.8886980918386227, "learning_rate": 6.108490660643982e-06, "loss": 0.2876, "step": 7092 }, { "epoch": 0.45, "grad_norm": 4.040459490022842, "learning_rate": 6.107497505576251e-06, "loss": 0.2765, "step": 7093 }, { "epoch": 0.45, "grad_norm": 6.509208148055233, "learning_rate": 6.106504304556479e-06, "loss": 0.2864, "step": 7094 }, { "epoch": 0.45, "grad_norm": 1.6197656977857953, "learning_rate": 6.10551105762588e-06, "loss": 0.3046, "step": 7095 }, { "epoch": 0.45, "grad_norm": 1.9027807338901899, "learning_rate": 6.104517764825662e-06, "loss": 0.2777, "step": 7096 }, { "epoch": 0.45, "grad_norm": 1.766224200303339, "learning_rate": 6.103524426197041e-06, "loss": 0.2775, "step": 7097 }, { "epoch": 0.45, "grad_norm": 2.965032226299235, "learning_rate": 6.10253104178123e-06, "loss": 0.2851, "step": 7098 }, { "epoch": 0.45, "grad_norm": 1.9785809208109495, "learning_rate": 6.1015376116194506e-06, "loss": 0.3002, "step": 7099 }, { "epoch": 0.45, "grad_norm": 1.3618764576495743, "learning_rate": 6.100544135752916e-06, "loss": 0.2931, "step": 7100 }, { "epoch": 0.45, "grad_norm": 2.8428649776226034, "learning_rate": 6.0995506142228525e-06, "loss": 0.2921, "step": 7101 }, { "epoch": 0.45, "grad_norm": 3.0358845575392865, "learning_rate": 6.09855704707048e-06, "loss": 0.2974, "step": 7102 }, { "epoch": 0.45, "grad_norm": 6.832942096883757, "learning_rate": 6.097563434337026e-06, "loss": 0.2927, "step": 7103 }, { "epoch": 0.45, "grad_norm": 2.005828899782704, "learning_rate": 6.096569776063712e-06, "loss": 0.2955, "step": 7104 }, { "epoch": 0.45, "grad_norm": 2.1884957411659056, "learning_rate": 6.095576072291774e-06, "loss": 0.3011, "step": 7105 }, { "epoch": 0.45, "grad_norm": 1.7264328630247696, "learning_rate": 6.094582323062437e-06, "loss": 0.2942, "step": 7106 }, { "epoch": 0.45, "grad_norm": 6.5422946025736, "learning_rate": 6.093588528416936e-06, "loss": 0.2919, "step": 7107 }, { "epoch": 0.45, "grad_norm": 4.149446507980943, "learning_rate": 6.092594688396504e-06, "loss": 0.3032, "step": 7108 }, { "epoch": 0.45, "grad_norm": 1.714015273782947, "learning_rate": 6.091600803042378e-06, "loss": 0.2971, "step": 7109 }, { "epoch": 0.45, "grad_norm": 1.6346469554531453, "learning_rate": 6.090606872395796e-06, "loss": 0.2971, "step": 7110 }, { "epoch": 0.45, "grad_norm": 2.6839766515658936, "learning_rate": 6.089612896497996e-06, "loss": 0.311, "step": 7111 }, { "epoch": 0.45, "grad_norm": 2.251165125405875, "learning_rate": 6.088618875390223e-06, "loss": 0.3107, "step": 7112 }, { "epoch": 0.45, "grad_norm": 2.228872572673163, "learning_rate": 6.087624809113721e-06, "loss": 0.2842, "step": 7113 }, { "epoch": 0.45, "grad_norm": 2.5442661757751215, "learning_rate": 6.086630697709731e-06, "loss": 0.2978, "step": 7114 }, { "epoch": 0.45, "grad_norm": 2.2380844371099577, "learning_rate": 6.085636541219504e-06, "loss": 0.2898, "step": 7115 }, { "epoch": 0.45, "grad_norm": 2.403116211780939, "learning_rate": 6.084642339684289e-06, "loss": 0.3015, "step": 7116 }, { "epoch": 0.45, "grad_norm": 1.8748136352845692, "learning_rate": 6.083648093145337e-06, "loss": 0.3157, "step": 7117 }, { "epoch": 0.45, "grad_norm": 4.264386049161767, "learning_rate": 6.0826538016439e-06, "loss": 0.3019, "step": 7118 }, { "epoch": 0.45, "grad_norm": 1.424593499904079, "learning_rate": 6.081659465221234e-06, "loss": 0.2966, "step": 7119 }, { "epoch": 0.45, "grad_norm": 3.2664871301678624, "learning_rate": 6.080665083918595e-06, "loss": 0.2923, "step": 7120 }, { "epoch": 0.45, "grad_norm": 0.6016217598693909, "learning_rate": 6.079670657777244e-06, "loss": 0.5003, "step": 7121 }, { "epoch": 0.45, "grad_norm": 1.599686531993162, "learning_rate": 6.078676186838438e-06, "loss": 0.2986, "step": 7122 }, { "epoch": 0.45, "grad_norm": 2.0419308709735158, "learning_rate": 6.077681671143443e-06, "loss": 0.3194, "step": 7123 }, { "epoch": 0.45, "grad_norm": 5.3146080493616905, "learning_rate": 6.076687110733519e-06, "loss": 0.2942, "step": 7124 }, { "epoch": 0.45, "grad_norm": 1.3781187233316055, "learning_rate": 6.075692505649937e-06, "loss": 0.2602, "step": 7125 }, { "epoch": 0.45, "grad_norm": 2.3047037457388875, "learning_rate": 6.074697855933959e-06, "loss": 0.2953, "step": 7126 }, { "epoch": 0.45, "grad_norm": 1.7933948377616522, "learning_rate": 6.07370316162686e-06, "loss": 0.2808, "step": 7127 }, { "epoch": 0.45, "grad_norm": 1.8672118045593933, "learning_rate": 6.0727084227699095e-06, "loss": 0.2843, "step": 7128 }, { "epoch": 0.45, "grad_norm": 1.9693002900450496, "learning_rate": 6.071713639404382e-06, "loss": 0.2911, "step": 7129 }, { "epoch": 0.45, "grad_norm": 2.8233998369605118, "learning_rate": 6.07071881157155e-06, "loss": 0.2838, "step": 7130 }, { "epoch": 0.45, "grad_norm": 2.053406986259955, "learning_rate": 6.069723939312695e-06, "loss": 0.3018, "step": 7131 }, { "epoch": 0.45, "grad_norm": 1.7077429588707012, "learning_rate": 6.068729022669092e-06, "loss": 0.2779, "step": 7132 }, { "epoch": 0.45, "grad_norm": 1.6434443393824358, "learning_rate": 6.067734061682024e-06, "loss": 0.2968, "step": 7133 }, { "epoch": 0.45, "grad_norm": 2.0316381344492025, "learning_rate": 6.066739056392774e-06, "loss": 0.2954, "step": 7134 }, { "epoch": 0.45, "grad_norm": 1.5362265329115818, "learning_rate": 6.065744006842626e-06, "loss": 0.2978, "step": 7135 }, { "epoch": 0.45, "grad_norm": 1.415171399932464, "learning_rate": 6.064748913072864e-06, "loss": 0.2853, "step": 7136 }, { "epoch": 0.45, "grad_norm": 1.6488797212567636, "learning_rate": 6.063753775124781e-06, "loss": 0.291, "step": 7137 }, { "epoch": 0.45, "grad_norm": 2.6571025766959937, "learning_rate": 6.062758593039663e-06, "loss": 0.2965, "step": 7138 }, { "epoch": 0.45, "grad_norm": 2.4006475600524397, "learning_rate": 6.061763366858804e-06, "loss": 0.2868, "step": 7139 }, { "epoch": 0.45, "grad_norm": 1.4657827633756655, "learning_rate": 6.060768096623496e-06, "loss": 0.3023, "step": 7140 }, { "epoch": 0.45, "grad_norm": 1.710688928026125, "learning_rate": 6.059772782375036e-06, "loss": 0.2826, "step": 7141 }, { "epoch": 0.45, "grad_norm": 3.5995991529604687, "learning_rate": 6.0587774241547205e-06, "loss": 0.3118, "step": 7142 }, { "epoch": 0.45, "grad_norm": 3.041110035251263, "learning_rate": 6.057782022003851e-06, "loss": 0.2813, "step": 7143 }, { "epoch": 0.45, "grad_norm": 2.0509229914424894, "learning_rate": 6.056786575963725e-06, "loss": 0.2976, "step": 7144 }, { "epoch": 0.45, "grad_norm": 2.1445565628745213, "learning_rate": 6.0557910860756466e-06, "loss": 0.2924, "step": 7145 }, { "epoch": 0.45, "grad_norm": 2.20498732398269, "learning_rate": 6.054795552380921e-06, "loss": 0.2961, "step": 7146 }, { "epoch": 0.45, "grad_norm": 3.8094318107666756, "learning_rate": 6.053799974920856e-06, "loss": 0.2857, "step": 7147 }, { "epoch": 0.45, "grad_norm": 1.4924062943715422, "learning_rate": 6.052804353736757e-06, "loss": 0.2792, "step": 7148 }, { "epoch": 0.45, "grad_norm": 12.540322723520255, "learning_rate": 6.051808688869934e-06, "loss": 0.3007, "step": 7149 }, { "epoch": 0.45, "grad_norm": 2.777149268579538, "learning_rate": 6.050812980361701e-06, "loss": 0.3074, "step": 7150 }, { "epoch": 0.45, "grad_norm": 1.3407254052984399, "learning_rate": 6.049817228253373e-06, "loss": 0.277, "step": 7151 }, { "epoch": 0.45, "grad_norm": 5.7574284251759975, "learning_rate": 6.048821432586261e-06, "loss": 0.3029, "step": 7152 }, { "epoch": 0.45, "grad_norm": 1.4172601326843792, "learning_rate": 6.047825593401686e-06, "loss": 0.2824, "step": 7153 }, { "epoch": 0.45, "grad_norm": 1.7614750685381748, "learning_rate": 6.046829710740966e-06, "loss": 0.302, "step": 7154 }, { "epoch": 0.45, "grad_norm": 1.925876730667548, "learning_rate": 6.045833784645422e-06, "loss": 0.304, "step": 7155 }, { "epoch": 0.45, "grad_norm": 1.8993360446893568, "learning_rate": 6.044837815156377e-06, "loss": 0.2811, "step": 7156 }, { "epoch": 0.45, "grad_norm": 2.5832044248453947, "learning_rate": 6.043841802315153e-06, "loss": 0.3002, "step": 7157 }, { "epoch": 0.45, "grad_norm": 5.597899906254344, "learning_rate": 6.042845746163081e-06, "loss": 0.3098, "step": 7158 }, { "epoch": 0.45, "grad_norm": 2.2392190828824825, "learning_rate": 6.041849646741485e-06, "loss": 0.3133, "step": 7159 }, { "epoch": 0.45, "grad_norm": 2.049087140610685, "learning_rate": 6.040853504091698e-06, "loss": 0.2963, "step": 7160 }, { "epoch": 0.45, "grad_norm": 1.7356790660117334, "learning_rate": 6.03985731825505e-06, "loss": 0.2759, "step": 7161 }, { "epoch": 0.45, "grad_norm": 3.5465860338521598, "learning_rate": 6.038861089272875e-06, "loss": 0.2939, "step": 7162 }, { "epoch": 0.45, "grad_norm": 1.560534782086251, "learning_rate": 6.037864817186507e-06, "loss": 0.2859, "step": 7163 }, { "epoch": 0.45, "grad_norm": 4.841044845563695, "learning_rate": 6.036868502037286e-06, "loss": 0.2788, "step": 7164 }, { "epoch": 0.45, "grad_norm": 3.0138936133192953, "learning_rate": 6.035872143866549e-06, "loss": 0.296, "step": 7165 }, { "epoch": 0.45, "grad_norm": 1.4743239005657713, "learning_rate": 6.034875742715636e-06, "loss": 0.296, "step": 7166 }, { "epoch": 0.45, "grad_norm": 1.88237633275076, "learning_rate": 6.03387929862589e-06, "loss": 0.3117, "step": 7167 }, { "epoch": 0.45, "grad_norm": 4.762908556869072, "learning_rate": 6.032882811638656e-06, "loss": 0.3031, "step": 7168 }, { "epoch": 0.45, "grad_norm": 1.416071537999233, "learning_rate": 6.03188628179528e-06, "loss": 0.2716, "step": 7169 }, { "epoch": 0.45, "grad_norm": 2.3807843710024055, "learning_rate": 6.030889709137109e-06, "loss": 0.2826, "step": 7170 }, { "epoch": 0.45, "grad_norm": 2.425970117898785, "learning_rate": 6.029893093705492e-06, "loss": 0.2963, "step": 7171 }, { "epoch": 0.45, "grad_norm": 4.606691631289159, "learning_rate": 6.0288964355417825e-06, "loss": 0.317, "step": 7172 }, { "epoch": 0.45, "grad_norm": 1.7422822195154766, "learning_rate": 6.027899734687332e-06, "loss": 0.2906, "step": 7173 }, { "epoch": 0.45, "grad_norm": 1.6710875179664204, "learning_rate": 6.026902991183496e-06, "loss": 0.3053, "step": 7174 }, { "epoch": 0.45, "grad_norm": 2.757845693594874, "learning_rate": 6.02590620507163e-06, "loss": 0.3094, "step": 7175 }, { "epoch": 0.45, "grad_norm": 4.299915257306338, "learning_rate": 6.024909376393093e-06, "loss": 0.2785, "step": 7176 }, { "epoch": 0.45, "grad_norm": 3.3634373999122538, "learning_rate": 6.0239125051892475e-06, "loss": 0.2949, "step": 7177 }, { "epoch": 0.45, "grad_norm": 2.150206470673079, "learning_rate": 6.022915591501453e-06, "loss": 0.2912, "step": 7178 }, { "epoch": 0.45, "grad_norm": 2.8093004842615352, "learning_rate": 6.021918635371072e-06, "loss": 0.2975, "step": 7179 }, { "epoch": 0.45, "grad_norm": 2.29026863303202, "learning_rate": 6.020921636839473e-06, "loss": 0.2986, "step": 7180 }, { "epoch": 0.45, "grad_norm": 1.6955111512391063, "learning_rate": 6.019924595948022e-06, "loss": 0.289, "step": 7181 }, { "epoch": 0.45, "grad_norm": 1.773420507313563, "learning_rate": 6.018927512738088e-06, "loss": 0.2971, "step": 7182 }, { "epoch": 0.45, "grad_norm": 1.4514447964474035, "learning_rate": 6.017930387251041e-06, "loss": 0.2911, "step": 7183 }, { "epoch": 0.45, "grad_norm": 1.4629645084668805, "learning_rate": 6.016933219528255e-06, "loss": 0.3011, "step": 7184 }, { "epoch": 0.45, "grad_norm": 2.3663085418713634, "learning_rate": 6.015936009611103e-06, "loss": 0.2928, "step": 7185 }, { "epoch": 0.45, "grad_norm": 2.1752263871073754, "learning_rate": 6.0149387575409615e-06, "loss": 0.3098, "step": 7186 }, { "epoch": 0.45, "grad_norm": 1.6604531010115278, "learning_rate": 6.0139414633592075e-06, "loss": 0.286, "step": 7187 }, { "epoch": 0.45, "grad_norm": 1.3047216009249556, "learning_rate": 6.012944127107222e-06, "loss": 0.2945, "step": 7188 }, { "epoch": 0.45, "grad_norm": 1.6102446940460937, "learning_rate": 6.0119467488263846e-06, "loss": 0.3039, "step": 7189 }, { "epoch": 0.45, "grad_norm": 1.6873860243434702, "learning_rate": 6.010949328558081e-06, "loss": 0.2813, "step": 7190 }, { "epoch": 0.45, "grad_norm": 3.65250739275473, "learning_rate": 6.009951866343693e-06, "loss": 0.2974, "step": 7191 }, { "epoch": 0.45, "grad_norm": 5.136638892242125, "learning_rate": 6.008954362224608e-06, "loss": 0.3219, "step": 7192 }, { "epoch": 0.45, "grad_norm": 2.76969378823849, "learning_rate": 6.007956816242214e-06, "loss": 0.3051, "step": 7193 }, { "epoch": 0.45, "grad_norm": 2.0477884946942178, "learning_rate": 6.006959228437903e-06, "loss": 0.279, "step": 7194 }, { "epoch": 0.45, "grad_norm": 2.955981559417683, "learning_rate": 6.005961598853064e-06, "loss": 0.289, "step": 7195 }, { "epoch": 0.45, "grad_norm": 1.8725487337804585, "learning_rate": 6.004963927529092e-06, "loss": 0.2863, "step": 7196 }, { "epoch": 0.45, "grad_norm": 2.5115666860324817, "learning_rate": 6.00396621450738e-06, "loss": 0.2904, "step": 7197 }, { "epoch": 0.45, "grad_norm": 1.806800120613164, "learning_rate": 6.002968459829328e-06, "loss": 0.3106, "step": 7198 }, { "epoch": 0.45, "grad_norm": 4.963758732342158, "learning_rate": 6.001970663536333e-06, "loss": 0.2889, "step": 7199 }, { "epoch": 0.45, "grad_norm": 194.9734329297264, "learning_rate": 6.000972825669795e-06, "loss": 0.3226, "step": 7200 }, { "epoch": 0.45, "grad_norm": 1.5076472658835243, "learning_rate": 5.999974946271116e-06, "loss": 0.2848, "step": 7201 }, { "epoch": 0.45, "grad_norm": 1.935928815908976, "learning_rate": 5.9989770253817015e-06, "loss": 0.2931, "step": 7202 }, { "epoch": 0.45, "grad_norm": 4.462734348506588, "learning_rate": 5.9979790630429556e-06, "loss": 0.2916, "step": 7203 }, { "epoch": 0.45, "grad_norm": 2.087409253017042, "learning_rate": 5.9969810592962866e-06, "loss": 0.3014, "step": 7204 }, { "epoch": 0.45, "grad_norm": 2.3780574766695843, "learning_rate": 5.995983014183101e-06, "loss": 0.3041, "step": 7205 }, { "epoch": 0.45, "grad_norm": 5.939480393315704, "learning_rate": 5.994984927744812e-06, "loss": 0.2984, "step": 7206 }, { "epoch": 0.45, "grad_norm": 1.6145280272522786, "learning_rate": 5.993986800022831e-06, "loss": 0.2807, "step": 7207 }, { "epoch": 0.45, "grad_norm": 3.8932343800479923, "learning_rate": 5.9929886310585715e-06, "loss": 0.2725, "step": 7208 }, { "epoch": 0.45, "grad_norm": 1.6408730271203036, "learning_rate": 5.9919904208934495e-06, "loss": 0.2866, "step": 7209 }, { "epoch": 0.45, "grad_norm": 2.057829410640411, "learning_rate": 5.990992169568884e-06, "loss": 0.3055, "step": 7210 }, { "epoch": 0.45, "grad_norm": 2.0850932435888914, "learning_rate": 5.989993877126294e-06, "loss": 0.2901, "step": 7211 }, { "epoch": 0.45, "grad_norm": 1.826839017585909, "learning_rate": 5.988995543607099e-06, "loss": 0.2915, "step": 7212 }, { "epoch": 0.45, "grad_norm": 2.125635241380182, "learning_rate": 5.9879971690527205e-06, "loss": 0.3033, "step": 7213 }, { "epoch": 0.45, "grad_norm": 1.8525267201779592, "learning_rate": 5.986998753504586e-06, "loss": 0.2862, "step": 7214 }, { "epoch": 0.45, "grad_norm": 4.721278197258601, "learning_rate": 5.98600029700412e-06, "loss": 0.3181, "step": 7215 }, { "epoch": 0.45, "grad_norm": 2.8176466356005263, "learning_rate": 5.985001799592751e-06, "loss": 0.2982, "step": 7216 }, { "epoch": 0.45, "grad_norm": 1.6227108712118903, "learning_rate": 5.984003261311907e-06, "loss": 0.2962, "step": 7217 }, { "epoch": 0.45, "grad_norm": 2.6973316147915964, "learning_rate": 5.983004682203019e-06, "loss": 0.2946, "step": 7218 }, { "epoch": 0.45, "grad_norm": 1.8593563400881514, "learning_rate": 5.982006062307523e-06, "loss": 0.2914, "step": 7219 }, { "epoch": 0.45, "grad_norm": 1.6367525378794925, "learning_rate": 5.9810074016668505e-06, "loss": 0.3104, "step": 7220 }, { "epoch": 0.45, "grad_norm": 2.2345639766843854, "learning_rate": 5.980008700322437e-06, "loss": 0.2808, "step": 7221 }, { "epoch": 0.45, "grad_norm": 1.1959260099266358, "learning_rate": 5.979009958315724e-06, "loss": 0.2763, "step": 7222 }, { "epoch": 0.45, "grad_norm": 2.869002819082661, "learning_rate": 5.978011175688146e-06, "loss": 0.3071, "step": 7223 }, { "epoch": 0.45, "grad_norm": 5.07459957347487, "learning_rate": 5.977012352481151e-06, "loss": 0.2802, "step": 7224 }, { "epoch": 0.45, "grad_norm": 2.8806389119383606, "learning_rate": 5.976013488736176e-06, "loss": 0.3045, "step": 7225 }, { "epoch": 0.45, "grad_norm": 1.9032865003983352, "learning_rate": 5.975014584494666e-06, "loss": 0.2754, "step": 7226 }, { "epoch": 0.45, "grad_norm": 1.6699520016809288, "learning_rate": 5.974015639798071e-06, "loss": 0.3022, "step": 7227 }, { "epoch": 0.45, "grad_norm": 1.76294421370008, "learning_rate": 5.973016654687838e-06, "loss": 0.3242, "step": 7228 }, { "epoch": 0.45, "grad_norm": 3.2832268842075085, "learning_rate": 5.972017629205414e-06, "loss": 0.2946, "step": 7229 }, { "epoch": 0.45, "grad_norm": 2.221390776982483, "learning_rate": 5.971018563392252e-06, "loss": 0.2823, "step": 7230 }, { "epoch": 0.45, "grad_norm": 6.806604932737318, "learning_rate": 5.970019457289807e-06, "loss": 0.2818, "step": 7231 }, { "epoch": 0.45, "grad_norm": 3.002898366321213, "learning_rate": 5.969020310939531e-06, "loss": 0.2873, "step": 7232 }, { "epoch": 0.45, "grad_norm": 1.7389739878325832, "learning_rate": 5.96802112438288e-06, "loss": 0.2862, "step": 7233 }, { "epoch": 0.45, "grad_norm": 1.7332056198816865, "learning_rate": 5.967021897661313e-06, "loss": 0.3007, "step": 7234 }, { "epoch": 0.46, "grad_norm": 2.8028012010013845, "learning_rate": 5.96602263081629e-06, "loss": 0.3179, "step": 7235 }, { "epoch": 0.46, "grad_norm": 3.0725242141711324, "learning_rate": 5.965023323889274e-06, "loss": 0.3128, "step": 7236 }, { "epoch": 0.46, "grad_norm": 1.6802336999132215, "learning_rate": 5.964023976921726e-06, "loss": 0.2844, "step": 7237 }, { "epoch": 0.46, "grad_norm": 2.1904381635886274, "learning_rate": 5.963024589955109e-06, "loss": 0.3195, "step": 7238 }, { "epoch": 0.46, "grad_norm": 3.3641856927142584, "learning_rate": 5.962025163030892e-06, "loss": 0.2782, "step": 7239 }, { "epoch": 0.46, "grad_norm": 1.806119631520209, "learning_rate": 5.961025696190542e-06, "loss": 0.293, "step": 7240 }, { "epoch": 0.46, "grad_norm": 7.6191839085668445, "learning_rate": 5.96002618947553e-06, "loss": 0.2976, "step": 7241 }, { "epoch": 0.46, "grad_norm": 5.4019028021268545, "learning_rate": 5.959026642927326e-06, "loss": 0.2995, "step": 7242 }, { "epoch": 0.46, "grad_norm": 6.688824631444094, "learning_rate": 5.958027056587402e-06, "loss": 0.2929, "step": 7243 }, { "epoch": 0.46, "grad_norm": 3.6488963726063965, "learning_rate": 5.957027430497234e-06, "loss": 0.303, "step": 7244 }, { "epoch": 0.46, "grad_norm": 1.7708326852048644, "learning_rate": 5.956027764698299e-06, "loss": 0.2841, "step": 7245 }, { "epoch": 0.46, "grad_norm": 1.5246818748926652, "learning_rate": 5.955028059232074e-06, "loss": 0.2819, "step": 7246 }, { "epoch": 0.46, "grad_norm": 4.3463251772205735, "learning_rate": 5.9540283141400375e-06, "loss": 0.312, "step": 7247 }, { "epoch": 0.46, "grad_norm": 4.583674965530944, "learning_rate": 5.953028529463671e-06, "loss": 0.2778, "step": 7248 }, { "epoch": 0.46, "grad_norm": 3.2052136113771246, "learning_rate": 5.952028705244461e-06, "loss": 0.2943, "step": 7249 }, { "epoch": 0.46, "grad_norm": 1.6632695069557133, "learning_rate": 5.951028841523887e-06, "loss": 0.2919, "step": 7250 }, { "epoch": 0.46, "grad_norm": 1.9484240863350015, "learning_rate": 5.9500289383434375e-06, "loss": 0.2834, "step": 7251 }, { "epoch": 0.46, "grad_norm": 3.637149342076611, "learning_rate": 5.949028995744599e-06, "loss": 0.2985, "step": 7252 }, { "epoch": 0.46, "grad_norm": 5.3914995576076095, "learning_rate": 5.9480290137688626e-06, "loss": 0.287, "step": 7253 }, { "epoch": 0.46, "grad_norm": 1.931397503932855, "learning_rate": 5.94702899245772e-06, "loss": 0.3006, "step": 7254 }, { "epoch": 0.46, "grad_norm": 1.8063217585051436, "learning_rate": 5.94602893185266e-06, "loss": 0.2959, "step": 7255 }, { "epoch": 0.46, "grad_norm": 4.289267573423351, "learning_rate": 5.94502883199518e-06, "loss": 0.2917, "step": 7256 }, { "epoch": 0.46, "grad_norm": 1.8022680327182692, "learning_rate": 5.944028692926776e-06, "loss": 0.3005, "step": 7257 }, { "epoch": 0.46, "grad_norm": 29.957254472344218, "learning_rate": 5.943028514688944e-06, "loss": 0.3, "step": 7258 }, { "epoch": 0.46, "grad_norm": 2.832096501221565, "learning_rate": 5.942028297323183e-06, "loss": 0.2954, "step": 7259 }, { "epoch": 0.46, "grad_norm": 1.930825432132346, "learning_rate": 5.941028040870997e-06, "loss": 0.297, "step": 7260 }, { "epoch": 0.46, "grad_norm": 3.0185155568750943, "learning_rate": 5.940027745373884e-06, "loss": 0.3066, "step": 7261 }, { "epoch": 0.46, "grad_norm": 3.288879995901175, "learning_rate": 5.9390274108733515e-06, "loss": 0.2902, "step": 7262 }, { "epoch": 0.46, "grad_norm": 2.686703416547137, "learning_rate": 5.938027037410903e-06, "loss": 0.2868, "step": 7263 }, { "epoch": 0.46, "grad_norm": 2.1451843873584933, "learning_rate": 5.937026625028047e-06, "loss": 0.2829, "step": 7264 }, { "epoch": 0.46, "grad_norm": 2.1755340959470746, "learning_rate": 5.936026173766292e-06, "loss": 0.318, "step": 7265 }, { "epoch": 0.46, "grad_norm": 55.750355936623194, "learning_rate": 5.9350256836671494e-06, "loss": 0.29, "step": 7266 }, { "epoch": 0.46, "grad_norm": 1.765183249115334, "learning_rate": 5.934025154772128e-06, "loss": 0.2789, "step": 7267 }, { "epoch": 0.46, "grad_norm": 2.9681713972256576, "learning_rate": 5.933024587122745e-06, "loss": 0.3115, "step": 7268 }, { "epoch": 0.46, "grad_norm": 2.1816787712382673, "learning_rate": 5.932023980760515e-06, "loss": 0.292, "step": 7269 }, { "epoch": 0.46, "grad_norm": 1.6475029963947472, "learning_rate": 5.931023335726957e-06, "loss": 0.2901, "step": 7270 }, { "epoch": 0.46, "grad_norm": 0.6838641190179063, "learning_rate": 5.930022652063583e-06, "loss": 0.5215, "step": 7271 }, { "epoch": 0.46, "grad_norm": 2.5108281733733957, "learning_rate": 5.929021929811919e-06, "loss": 0.2849, "step": 7272 }, { "epoch": 0.46, "grad_norm": 1.712095615158612, "learning_rate": 5.928021169013485e-06, "loss": 0.2846, "step": 7273 }, { "epoch": 0.46, "grad_norm": 14.674838096644239, "learning_rate": 5.927020369709805e-06, "loss": 0.2866, "step": 7274 }, { "epoch": 0.46, "grad_norm": 5.024535918173516, "learning_rate": 5.926019531942401e-06, "loss": 0.2937, "step": 7275 }, { "epoch": 0.46, "grad_norm": 2.4882969756059152, "learning_rate": 5.925018655752804e-06, "loss": 0.3078, "step": 7276 }, { "epoch": 0.46, "grad_norm": 1.561645532711883, "learning_rate": 5.924017741182539e-06, "loss": 0.3147, "step": 7277 }, { "epoch": 0.46, "grad_norm": 3.6572663485292467, "learning_rate": 5.923016788273138e-06, "loss": 0.2933, "step": 7278 }, { "epoch": 0.46, "grad_norm": 2.308643302273684, "learning_rate": 5.922015797066129e-06, "loss": 0.2912, "step": 7279 }, { "epoch": 0.46, "grad_norm": 4.030645572286068, "learning_rate": 5.921014767603048e-06, "loss": 0.3169, "step": 7280 }, { "epoch": 0.46, "grad_norm": 1.39587674260693, "learning_rate": 5.920013699925429e-06, "loss": 0.295, "step": 7281 }, { "epoch": 0.46, "grad_norm": 112.0111833183104, "learning_rate": 5.919012594074807e-06, "loss": 0.2982, "step": 7282 }, { "epoch": 0.46, "grad_norm": 2.6589209320407328, "learning_rate": 5.9180114500927185e-06, "loss": 0.2857, "step": 7283 }, { "epoch": 0.46, "grad_norm": 2.2727747773501745, "learning_rate": 5.917010268020707e-06, "loss": 0.2886, "step": 7284 }, { "epoch": 0.46, "grad_norm": 1.849798585055369, "learning_rate": 5.91600904790031e-06, "loss": 0.2879, "step": 7285 }, { "epoch": 0.46, "grad_norm": 2.9573511836423125, "learning_rate": 5.915007789773069e-06, "loss": 0.3066, "step": 7286 }, { "epoch": 0.46, "grad_norm": 1.8373904254532667, "learning_rate": 5.914006493680531e-06, "loss": 0.2846, "step": 7287 }, { "epoch": 0.46, "grad_norm": 1.30294072982048, "learning_rate": 5.9130051596642404e-06, "loss": 0.2938, "step": 7288 }, { "epoch": 0.46, "grad_norm": 3.657875267381038, "learning_rate": 5.912003787765743e-06, "loss": 0.2852, "step": 7289 }, { "epoch": 0.46, "grad_norm": 3.433293948338587, "learning_rate": 5.911002378026588e-06, "loss": 0.2856, "step": 7290 }, { "epoch": 0.46, "grad_norm": 3.630769748538676, "learning_rate": 5.910000930488326e-06, "loss": 0.2885, "step": 7291 }, { "epoch": 0.46, "grad_norm": 1.8859035035346345, "learning_rate": 5.908999445192513e-06, "loss": 0.2901, "step": 7292 }, { "epoch": 0.46, "grad_norm": 2.250733228068876, "learning_rate": 5.907997922180695e-06, "loss": 0.2854, "step": 7293 }, { "epoch": 0.46, "grad_norm": 2.0362654451623787, "learning_rate": 5.90699636149443e-06, "loss": 0.3085, "step": 7294 }, { "epoch": 0.46, "grad_norm": 2.9214597344762963, "learning_rate": 5.905994763175275e-06, "loss": 0.2974, "step": 7295 }, { "epoch": 0.46, "grad_norm": 1.5592251145918676, "learning_rate": 5.9049931272647905e-06, "loss": 0.2989, "step": 7296 }, { "epoch": 0.46, "grad_norm": 2.145934769889822, "learning_rate": 5.903991453804532e-06, "loss": 0.304, "step": 7297 }, { "epoch": 0.46, "grad_norm": 0.7282478379612267, "learning_rate": 5.9029897428360625e-06, "loss": 0.4977, "step": 7298 }, { "epoch": 0.46, "grad_norm": 3.846397462762834, "learning_rate": 5.9019879944009454e-06, "loss": 0.2994, "step": 7299 }, { "epoch": 0.46, "grad_norm": 2.584589394237719, "learning_rate": 5.900986208540744e-06, "loss": 0.3091, "step": 7300 }, { "epoch": 0.46, "grad_norm": 1.9359017420102511, "learning_rate": 5.899984385297023e-06, "loss": 0.2829, "step": 7301 }, { "epoch": 0.46, "grad_norm": 3.3744193182835036, "learning_rate": 5.898982524711353e-06, "loss": 0.2976, "step": 7302 }, { "epoch": 0.46, "grad_norm": 1.8582866800424578, "learning_rate": 5.897980626825301e-06, "loss": 0.2817, "step": 7303 }, { "epoch": 0.46, "grad_norm": 1.2194878079083267, "learning_rate": 5.896978691680438e-06, "loss": 0.2885, "step": 7304 }, { "epoch": 0.46, "grad_norm": 2.6776804598703174, "learning_rate": 5.895976719318335e-06, "loss": 0.2838, "step": 7305 }, { "epoch": 0.46, "grad_norm": 0.5756283658154336, "learning_rate": 5.894974709780568e-06, "loss": 0.4982, "step": 7306 }, { "epoch": 0.46, "grad_norm": 2.1569390316349253, "learning_rate": 5.893972663108709e-06, "loss": 0.286, "step": 7307 }, { "epoch": 0.46, "grad_norm": 1.669584606805467, "learning_rate": 5.892970579344337e-06, "loss": 0.2945, "step": 7308 }, { "epoch": 0.46, "grad_norm": 1.7325601149234242, "learning_rate": 5.891968458529028e-06, "loss": 0.3276, "step": 7309 }, { "epoch": 0.46, "grad_norm": 1.3263148341652418, "learning_rate": 5.890966300704366e-06, "loss": 0.293, "step": 7310 }, { "epoch": 0.46, "grad_norm": 2.6667154132344058, "learning_rate": 5.8899641059119284e-06, "loss": 0.2831, "step": 7311 }, { "epoch": 0.46, "grad_norm": 1.8787594973105606, "learning_rate": 5.8889618741933e-06, "loss": 0.3063, "step": 7312 }, { "epoch": 0.46, "grad_norm": 2.9704987007244803, "learning_rate": 5.887959605590062e-06, "loss": 0.274, "step": 7313 }, { "epoch": 0.46, "grad_norm": 2.0981803359192868, "learning_rate": 5.886957300143806e-06, "loss": 0.2906, "step": 7314 }, { "epoch": 0.46, "grad_norm": 6.1888955643199, "learning_rate": 5.885954957896115e-06, "loss": 0.2862, "step": 7315 }, { "epoch": 0.46, "grad_norm": 2.018121329207774, "learning_rate": 5.884952578888578e-06, "loss": 0.2933, "step": 7316 }, { "epoch": 0.46, "grad_norm": 1.7170643080728223, "learning_rate": 5.883950163162788e-06, "loss": 0.3004, "step": 7317 }, { "epoch": 0.46, "grad_norm": 18.44992481760076, "learning_rate": 5.882947710760336e-06, "loss": 0.3015, "step": 7318 }, { "epoch": 0.46, "grad_norm": 1.7227972049219455, "learning_rate": 5.881945221722815e-06, "loss": 0.2998, "step": 7319 }, { "epoch": 0.46, "grad_norm": 1.4908067710610826, "learning_rate": 5.880942696091818e-06, "loss": 0.2789, "step": 7320 }, { "epoch": 0.46, "grad_norm": 1.8873695098000953, "learning_rate": 5.879940133908946e-06, "loss": 0.3, "step": 7321 }, { "epoch": 0.46, "grad_norm": 2.2268818679339066, "learning_rate": 5.878937535215795e-06, "loss": 0.3084, "step": 7322 }, { "epoch": 0.46, "grad_norm": 2.069477399588808, "learning_rate": 5.877934900053963e-06, "loss": 0.2885, "step": 7323 }, { "epoch": 0.46, "grad_norm": 3.766883386039501, "learning_rate": 5.876932228465054e-06, "loss": 0.2943, "step": 7324 }, { "epoch": 0.46, "grad_norm": 1.5011298653926668, "learning_rate": 5.875929520490669e-06, "loss": 0.2968, "step": 7325 }, { "epoch": 0.46, "grad_norm": 1.8427409423855354, "learning_rate": 5.874926776172413e-06, "loss": 0.2957, "step": 7326 }, { "epoch": 0.46, "grad_norm": 1.78595019446095, "learning_rate": 5.873923995551888e-06, "loss": 0.2875, "step": 7327 }, { "epoch": 0.46, "grad_norm": 1.9306178224589718, "learning_rate": 5.8729211786707075e-06, "loss": 0.3156, "step": 7328 }, { "epoch": 0.46, "grad_norm": 1.985264088394038, "learning_rate": 5.871918325570475e-06, "loss": 0.3139, "step": 7329 }, { "epoch": 0.46, "grad_norm": 2.2983958776720996, "learning_rate": 5.8709154362928045e-06, "loss": 0.2781, "step": 7330 }, { "epoch": 0.46, "grad_norm": 3.5459953800347166, "learning_rate": 5.869912510879303e-06, "loss": 0.3131, "step": 7331 }, { "epoch": 0.46, "grad_norm": 12.871828278846024, "learning_rate": 5.868909549371588e-06, "loss": 0.298, "step": 7332 }, { "epoch": 0.46, "grad_norm": 2.6170950853305737, "learning_rate": 5.867906551811271e-06, "loss": 0.2908, "step": 7333 }, { "epoch": 0.46, "grad_norm": 6.107788983459267, "learning_rate": 5.866903518239973e-06, "loss": 0.284, "step": 7334 }, { "epoch": 0.46, "grad_norm": 4.4097658497869885, "learning_rate": 5.865900448699304e-06, "loss": 0.299, "step": 7335 }, { "epoch": 0.46, "grad_norm": 1.8048049716625916, "learning_rate": 5.864897343230889e-06, "loss": 0.2967, "step": 7336 }, { "epoch": 0.46, "grad_norm": 1.9191555674538232, "learning_rate": 5.8638942018763466e-06, "loss": 0.2897, "step": 7337 }, { "epoch": 0.46, "grad_norm": 1.9404756393540705, "learning_rate": 5.862891024677299e-06, "loss": 0.2794, "step": 7338 }, { "epoch": 0.46, "grad_norm": 2.765853922751242, "learning_rate": 5.861887811675372e-06, "loss": 0.3067, "step": 7339 }, { "epoch": 0.46, "grad_norm": 2.167100232172597, "learning_rate": 5.860884562912188e-06, "loss": 0.3097, "step": 7340 }, { "epoch": 0.46, "grad_norm": 1.9071164601550794, "learning_rate": 5.859881278429374e-06, "loss": 0.2868, "step": 7341 }, { "epoch": 0.46, "grad_norm": 1.6268019484936718, "learning_rate": 5.858877958268558e-06, "loss": 0.2861, "step": 7342 }, { "epoch": 0.46, "grad_norm": 2.7383118914966724, "learning_rate": 5.857874602471371e-06, "loss": 0.2839, "step": 7343 }, { "epoch": 0.46, "grad_norm": 2.3895124538247345, "learning_rate": 5.856871211079444e-06, "loss": 0.2999, "step": 7344 }, { "epoch": 0.46, "grad_norm": 2.9410957812527907, "learning_rate": 5.855867784134406e-06, "loss": 0.3021, "step": 7345 }, { "epoch": 0.46, "grad_norm": 1.4242961577547777, "learning_rate": 5.854864321677894e-06, "loss": 0.2935, "step": 7346 }, { "epoch": 0.46, "grad_norm": 0.6225016487997318, "learning_rate": 5.853860823751543e-06, "loss": 0.4978, "step": 7347 }, { "epoch": 0.46, "grad_norm": 1.6650591672455808, "learning_rate": 5.852857290396992e-06, "loss": 0.2858, "step": 7348 }, { "epoch": 0.46, "grad_norm": 2.6397770623242103, "learning_rate": 5.8518537216558745e-06, "loss": 0.2893, "step": 7349 }, { "epoch": 0.46, "grad_norm": 1.9201914622817458, "learning_rate": 5.850850117569834e-06, "loss": 0.2778, "step": 7350 }, { "epoch": 0.46, "grad_norm": 2.6469677312924365, "learning_rate": 5.84984647818051e-06, "loss": 0.3017, "step": 7351 }, { "epoch": 0.46, "grad_norm": 3.3510291544324198, "learning_rate": 5.848842803529547e-06, "loss": 0.2821, "step": 7352 }, { "epoch": 0.46, "grad_norm": 2.9665345362927438, "learning_rate": 5.847839093658587e-06, "loss": 0.2919, "step": 7353 }, { "epoch": 0.46, "grad_norm": 1.954562981846482, "learning_rate": 5.84683534860928e-06, "loss": 0.2845, "step": 7354 }, { "epoch": 0.46, "grad_norm": 1.8956564872328672, "learning_rate": 5.8458315684232685e-06, "loss": 0.2717, "step": 7355 }, { "epoch": 0.46, "grad_norm": 1.248810262332383, "learning_rate": 5.844827753142203e-06, "loss": 0.2868, "step": 7356 }, { "epoch": 0.46, "grad_norm": 3.113520115498332, "learning_rate": 5.843823902807733e-06, "loss": 0.2999, "step": 7357 }, { "epoch": 0.46, "grad_norm": 1.6703145513935096, "learning_rate": 5.84282001746151e-06, "loss": 0.297, "step": 7358 }, { "epoch": 0.46, "grad_norm": 1.8700202881329098, "learning_rate": 5.841816097145189e-06, "loss": 0.3041, "step": 7359 }, { "epoch": 0.46, "grad_norm": 2.0327005899982455, "learning_rate": 5.840812141900423e-06, "loss": 0.3033, "step": 7360 }, { "epoch": 0.46, "grad_norm": 1.9215461324762908, "learning_rate": 5.839808151768865e-06, "loss": 0.2884, "step": 7361 }, { "epoch": 0.46, "grad_norm": 1.4390662266099223, "learning_rate": 5.838804126792178e-06, "loss": 0.2844, "step": 7362 }, { "epoch": 0.46, "grad_norm": 4.799526167333, "learning_rate": 5.837800067012016e-06, "loss": 0.2845, "step": 7363 }, { "epoch": 0.46, "grad_norm": 2.447246239952149, "learning_rate": 5.836795972470041e-06, "loss": 0.3081, "step": 7364 }, { "epoch": 0.46, "grad_norm": 4.7774009755870805, "learning_rate": 5.835791843207916e-06, "loss": 0.3062, "step": 7365 }, { "epoch": 0.46, "grad_norm": 1.6124144871826007, "learning_rate": 5.8347876792673044e-06, "loss": 0.2904, "step": 7366 }, { "epoch": 0.46, "grad_norm": 4.649169209059497, "learning_rate": 5.833783480689868e-06, "loss": 0.3021, "step": 7367 }, { "epoch": 0.46, "grad_norm": 1.4987322099032652, "learning_rate": 5.832779247517273e-06, "loss": 0.2963, "step": 7368 }, { "epoch": 0.46, "grad_norm": 3.440929233885759, "learning_rate": 5.831774979791188e-06, "loss": 0.3098, "step": 7369 }, { "epoch": 0.46, "grad_norm": 4.051398084478484, "learning_rate": 5.830770677553282e-06, "loss": 0.2834, "step": 7370 }, { "epoch": 0.46, "grad_norm": 3.708495911664979, "learning_rate": 5.829766340845225e-06, "loss": 0.2849, "step": 7371 }, { "epoch": 0.46, "grad_norm": 4.400937708458666, "learning_rate": 5.828761969708689e-06, "loss": 0.3028, "step": 7372 }, { "epoch": 0.46, "grad_norm": 3.098191143314644, "learning_rate": 5.827757564185347e-06, "loss": 0.3081, "step": 7373 }, { "epoch": 0.46, "grad_norm": 1.5746115675118102, "learning_rate": 5.826753124316873e-06, "loss": 0.2972, "step": 7374 }, { "epoch": 0.46, "grad_norm": 3.5442040364507443, "learning_rate": 5.8257486501449435e-06, "loss": 0.2807, "step": 7375 }, { "epoch": 0.46, "grad_norm": 1.991918654063862, "learning_rate": 5.824744141711235e-06, "loss": 0.315, "step": 7376 }, { "epoch": 0.46, "grad_norm": 1.6218451131737361, "learning_rate": 5.823739599057427e-06, "loss": 0.2823, "step": 7377 }, { "epoch": 0.46, "grad_norm": 6.2641020700786445, "learning_rate": 5.822735022225202e-06, "loss": 0.3095, "step": 7378 }, { "epoch": 0.46, "grad_norm": 2.2021640666825215, "learning_rate": 5.821730411256238e-06, "loss": 0.2959, "step": 7379 }, { "epoch": 0.46, "grad_norm": 2.7549027559276418, "learning_rate": 5.82072576619222e-06, "loss": 0.3049, "step": 7380 }, { "epoch": 0.46, "grad_norm": 2.3850711295652784, "learning_rate": 5.819721087074832e-06, "loss": 0.3245, "step": 7381 }, { "epoch": 0.46, "grad_norm": 3.6691598483890555, "learning_rate": 5.818716373945762e-06, "loss": 0.3157, "step": 7382 }, { "epoch": 0.46, "grad_norm": 4.657245966556539, "learning_rate": 5.817711626846691e-06, "loss": 0.3141, "step": 7383 }, { "epoch": 0.46, "grad_norm": 1.68096296240861, "learning_rate": 5.816706845819316e-06, "loss": 0.2855, "step": 7384 }, { "epoch": 0.46, "grad_norm": 1.8480071058746694, "learning_rate": 5.815702030905322e-06, "loss": 0.2913, "step": 7385 }, { "epoch": 0.46, "grad_norm": 2.3705701435846134, "learning_rate": 5.8146971821464034e-06, "loss": 0.2957, "step": 7386 }, { "epoch": 0.46, "grad_norm": 3.711514997249587, "learning_rate": 5.8136922995842495e-06, "loss": 0.2887, "step": 7387 }, { "epoch": 0.46, "grad_norm": 1.5839163728817727, "learning_rate": 5.812687383260558e-06, "loss": 0.2914, "step": 7388 }, { "epoch": 0.46, "grad_norm": 1.7639738555541817, "learning_rate": 5.811682433217023e-06, "loss": 0.3118, "step": 7389 }, { "epoch": 0.46, "grad_norm": 2.8906218401729564, "learning_rate": 5.810677449495343e-06, "loss": 0.3015, "step": 7390 }, { "epoch": 0.46, "grad_norm": 2.83842369457081, "learning_rate": 5.809672432137215e-06, "loss": 0.2835, "step": 7391 }, { "epoch": 0.46, "grad_norm": 1.47335619090344, "learning_rate": 5.80866738118434e-06, "loss": 0.2795, "step": 7392 }, { "epoch": 0.46, "grad_norm": 3.863035727645143, "learning_rate": 5.807662296678418e-06, "loss": 0.3085, "step": 7393 }, { "epoch": 0.47, "grad_norm": 1.350936176999657, "learning_rate": 5.806657178661153e-06, "loss": 0.2814, "step": 7394 }, { "epoch": 0.47, "grad_norm": 2.674197133155234, "learning_rate": 5.805652027174249e-06, "loss": 0.3206, "step": 7395 }, { "epoch": 0.47, "grad_norm": 1.9593121527421304, "learning_rate": 5.804646842259413e-06, "loss": 0.3084, "step": 7396 }, { "epoch": 0.47, "grad_norm": 3.0677949674119387, "learning_rate": 5.803641623958348e-06, "loss": 0.2954, "step": 7397 }, { "epoch": 0.47, "grad_norm": 2.52424879535754, "learning_rate": 5.802636372312766e-06, "loss": 0.2749, "step": 7398 }, { "epoch": 0.47, "grad_norm": 2.1236913832671624, "learning_rate": 5.801631087364374e-06, "loss": 0.3015, "step": 7399 }, { "epoch": 0.47, "grad_norm": 1.5553322808566252, "learning_rate": 5.8006257691548865e-06, "loss": 0.2987, "step": 7400 }, { "epoch": 0.47, "grad_norm": 0.5785389856886474, "learning_rate": 5.799620417726012e-06, "loss": 0.463, "step": 7401 }, { "epoch": 0.47, "grad_norm": 1.4850186335183841, "learning_rate": 5.798615033119466e-06, "loss": 0.2934, "step": 7402 }, { "epoch": 0.47, "grad_norm": 25.044942457349375, "learning_rate": 5.797609615376964e-06, "loss": 0.2855, "step": 7403 }, { "epoch": 0.47, "grad_norm": 3.0788518723451155, "learning_rate": 5.796604164540224e-06, "loss": 0.2762, "step": 7404 }, { "epoch": 0.47, "grad_norm": 1.9639242140463364, "learning_rate": 5.795598680650959e-06, "loss": 0.2846, "step": 7405 }, { "epoch": 0.47, "grad_norm": 4.202881432684013, "learning_rate": 5.794593163750894e-06, "loss": 0.2703, "step": 7406 }, { "epoch": 0.47, "grad_norm": 7.61043453216115, "learning_rate": 5.793587613881747e-06, "loss": 0.3026, "step": 7407 }, { "epoch": 0.47, "grad_norm": 2.821645440211319, "learning_rate": 5.792582031085241e-06, "loss": 0.2941, "step": 7408 }, { "epoch": 0.47, "grad_norm": 3.1025830781435055, "learning_rate": 5.791576415403097e-06, "loss": 0.3294, "step": 7409 }, { "epoch": 0.47, "grad_norm": 2.43317684151675, "learning_rate": 5.790570766877043e-06, "loss": 0.296, "step": 7410 }, { "epoch": 0.47, "grad_norm": 2.0351764553562695, "learning_rate": 5.789565085548803e-06, "loss": 0.2845, "step": 7411 }, { "epoch": 0.47, "grad_norm": 1.6329261443402958, "learning_rate": 5.788559371460107e-06, "loss": 0.2876, "step": 7412 }, { "epoch": 0.47, "grad_norm": 2.1815197784932674, "learning_rate": 5.787553624652678e-06, "loss": 0.2648, "step": 7413 }, { "epoch": 0.47, "grad_norm": 1.672178902243905, "learning_rate": 5.786547845168253e-06, "loss": 0.2965, "step": 7414 }, { "epoch": 0.47, "grad_norm": 2.781489413288844, "learning_rate": 5.78554203304856e-06, "loss": 0.2973, "step": 7415 }, { "epoch": 0.47, "grad_norm": 1.906575158794279, "learning_rate": 5.784536188335334e-06, "loss": 0.2977, "step": 7416 }, { "epoch": 0.47, "grad_norm": 27.952144720770548, "learning_rate": 5.783530311070306e-06, "loss": 0.3185, "step": 7417 }, { "epoch": 0.47, "grad_norm": 3.3791989634924233, "learning_rate": 5.782524401295216e-06, "loss": 0.2817, "step": 7418 }, { "epoch": 0.47, "grad_norm": 1.6515979396728617, "learning_rate": 5.781518459051797e-06, "loss": 0.2805, "step": 7419 }, { "epoch": 0.47, "grad_norm": 1.8358959595852473, "learning_rate": 5.78051248438179e-06, "loss": 0.2967, "step": 7420 }, { "epoch": 0.47, "grad_norm": 2.4987211405224334, "learning_rate": 5.779506477326933e-06, "loss": 0.3037, "step": 7421 }, { "epoch": 0.47, "grad_norm": 1.9785200233494902, "learning_rate": 5.778500437928969e-06, "loss": 0.3012, "step": 7422 }, { "epoch": 0.47, "grad_norm": 2.1999170666941543, "learning_rate": 5.777494366229637e-06, "loss": 0.3118, "step": 7423 }, { "epoch": 0.47, "grad_norm": 2.253242248500906, "learning_rate": 5.7764882622706834e-06, "loss": 0.296, "step": 7424 }, { "epoch": 0.47, "grad_norm": 2.1496977476439847, "learning_rate": 5.7754821260938536e-06, "loss": 0.2802, "step": 7425 }, { "epoch": 0.47, "grad_norm": 2.083243302988889, "learning_rate": 5.774475957740892e-06, "loss": 0.293, "step": 7426 }, { "epoch": 0.47, "grad_norm": 2.395467071501667, "learning_rate": 5.7734697572535485e-06, "loss": 0.2744, "step": 7427 }, { "epoch": 0.47, "grad_norm": 2.17167924037797, "learning_rate": 5.7724635246735695e-06, "loss": 0.2966, "step": 7428 }, { "epoch": 0.47, "grad_norm": 1.6577735965038014, "learning_rate": 5.771457260042707e-06, "loss": 0.2826, "step": 7429 }, { "epoch": 0.47, "grad_norm": 2.0480434049578315, "learning_rate": 5.770450963402714e-06, "loss": 0.2939, "step": 7430 }, { "epoch": 0.47, "grad_norm": 1.797695230901797, "learning_rate": 5.76944463479534e-06, "loss": 0.2842, "step": 7431 }, { "epoch": 0.47, "grad_norm": 4.2435853244238055, "learning_rate": 5.7684382742623425e-06, "loss": 0.3024, "step": 7432 }, { "epoch": 0.47, "grad_norm": 1.480875688589397, "learning_rate": 5.767431881845475e-06, "loss": 0.2863, "step": 7433 }, { "epoch": 0.47, "grad_norm": 2.460237068746604, "learning_rate": 5.766425457586497e-06, "loss": 0.2841, "step": 7434 }, { "epoch": 0.47, "grad_norm": 5.2983250323644855, "learning_rate": 5.765419001527165e-06, "loss": 0.3179, "step": 7435 }, { "epoch": 0.47, "grad_norm": 1.2555286612370506, "learning_rate": 5.7644125137092395e-06, "loss": 0.2768, "step": 7436 }, { "epoch": 0.47, "grad_norm": 1.7879728690741918, "learning_rate": 5.76340599417448e-06, "loss": 0.3091, "step": 7437 }, { "epoch": 0.47, "grad_norm": 1.794846458366639, "learning_rate": 5.762399442964652e-06, "loss": 0.299, "step": 7438 }, { "epoch": 0.47, "grad_norm": 4.466717185262108, "learning_rate": 5.761392860121514e-06, "loss": 0.3052, "step": 7439 }, { "epoch": 0.47, "grad_norm": 13.508990408891954, "learning_rate": 5.760386245686836e-06, "loss": 0.3105, "step": 7440 }, { "epoch": 0.47, "grad_norm": 2.176518988435943, "learning_rate": 5.759379599702381e-06, "loss": 0.2905, "step": 7441 }, { "epoch": 0.47, "grad_norm": 5.571342908592038, "learning_rate": 5.758372922209918e-06, "loss": 0.2919, "step": 7442 }, { "epoch": 0.47, "grad_norm": 4.12035874006493, "learning_rate": 5.757366213251215e-06, "loss": 0.2881, "step": 7443 }, { "epoch": 0.47, "grad_norm": 2.872116166511122, "learning_rate": 5.756359472868044e-06, "loss": 0.3009, "step": 7444 }, { "epoch": 0.47, "grad_norm": 2.7007860292818515, "learning_rate": 5.755352701102174e-06, "loss": 0.2931, "step": 7445 }, { "epoch": 0.47, "grad_norm": 1.6324341594720326, "learning_rate": 5.754345897995378e-06, "loss": 0.2915, "step": 7446 }, { "epoch": 0.47, "grad_norm": 0.6107260486933718, "learning_rate": 5.753339063589431e-06, "loss": 0.4902, "step": 7447 }, { "epoch": 0.47, "grad_norm": 3.7491841544424926, "learning_rate": 5.752332197926109e-06, "loss": 0.2907, "step": 7448 }, { "epoch": 0.47, "grad_norm": 1.8626797410832066, "learning_rate": 5.751325301047188e-06, "loss": 0.3146, "step": 7449 }, { "epoch": 0.47, "grad_norm": 2.509480948330613, "learning_rate": 5.7503183729944454e-06, "loss": 0.2746, "step": 7450 }, { "epoch": 0.47, "grad_norm": 2.049359590867218, "learning_rate": 5.749311413809661e-06, "loss": 0.2858, "step": 7451 }, { "epoch": 0.47, "grad_norm": 2.04012364496551, "learning_rate": 5.748304423534615e-06, "loss": 0.2673, "step": 7452 }, { "epoch": 0.47, "grad_norm": 5.413054241860235, "learning_rate": 5.74729740221109e-06, "loss": 0.296, "step": 7453 }, { "epoch": 0.47, "grad_norm": 1.3595975797556992, "learning_rate": 5.746290349880867e-06, "loss": 0.2916, "step": 7454 }, { "epoch": 0.47, "grad_norm": 2.6144008908891876, "learning_rate": 5.745283266585732e-06, "loss": 0.2895, "step": 7455 }, { "epoch": 0.47, "grad_norm": 3.030825353977351, "learning_rate": 5.744276152367472e-06, "loss": 0.2749, "step": 7456 }, { "epoch": 0.47, "grad_norm": 2.3108544995994253, "learning_rate": 5.7432690072678696e-06, "loss": 0.2952, "step": 7457 }, { "epoch": 0.47, "grad_norm": 3.0277926170135334, "learning_rate": 5.7422618313287184e-06, "loss": 0.3021, "step": 7458 }, { "epoch": 0.47, "grad_norm": 2.9805852476289116, "learning_rate": 5.741254624591804e-06, "loss": 0.2806, "step": 7459 }, { "epoch": 0.47, "grad_norm": 2.9802090010372773, "learning_rate": 5.7402473870989205e-06, "loss": 0.3016, "step": 7460 }, { "epoch": 0.47, "grad_norm": 2.6529213476321996, "learning_rate": 5.7392401188918555e-06, "loss": 0.3039, "step": 7461 }, { "epoch": 0.47, "grad_norm": 1.7974486995779502, "learning_rate": 5.738232820012407e-06, "loss": 0.3013, "step": 7462 }, { "epoch": 0.47, "grad_norm": 11.920592923579262, "learning_rate": 5.737225490502366e-06, "loss": 0.3029, "step": 7463 }, { "epoch": 0.47, "grad_norm": 1.8315720248690885, "learning_rate": 5.736218130403532e-06, "loss": 0.298, "step": 7464 }, { "epoch": 0.47, "grad_norm": 2.032439390079224, "learning_rate": 5.735210739757697e-06, "loss": 0.2819, "step": 7465 }, { "epoch": 0.47, "grad_norm": 1.4071823116303686, "learning_rate": 5.734203318606666e-06, "loss": 0.2905, "step": 7466 }, { "epoch": 0.47, "grad_norm": 5.486510636408435, "learning_rate": 5.7331958669922335e-06, "loss": 0.3097, "step": 7467 }, { "epoch": 0.47, "grad_norm": 5.174635017024112, "learning_rate": 5.732188384956203e-06, "loss": 0.2864, "step": 7468 }, { "epoch": 0.47, "grad_norm": 2.0793500591352876, "learning_rate": 5.731180872540374e-06, "loss": 0.2764, "step": 7469 }, { "epoch": 0.47, "grad_norm": 3.267484630972985, "learning_rate": 5.730173329786554e-06, "loss": 0.2977, "step": 7470 }, { "epoch": 0.47, "grad_norm": 2.2034813592776934, "learning_rate": 5.729165756736544e-06, "loss": 0.3035, "step": 7471 }, { "epoch": 0.47, "grad_norm": 1.4909879137858109, "learning_rate": 5.728158153432153e-06, "loss": 0.2937, "step": 7472 }, { "epoch": 0.47, "grad_norm": 1.9755301438766115, "learning_rate": 5.727150519915186e-06, "loss": 0.2899, "step": 7473 }, { "epoch": 0.47, "grad_norm": 2.884894116487073, "learning_rate": 5.726142856227453e-06, "loss": 0.2662, "step": 7474 }, { "epoch": 0.47, "grad_norm": 2.4461228179609695, "learning_rate": 5.725135162410762e-06, "loss": 0.2871, "step": 7475 }, { "epoch": 0.47, "grad_norm": 12.404019744402751, "learning_rate": 5.724127438506925e-06, "loss": 0.2991, "step": 7476 }, { "epoch": 0.47, "grad_norm": 2.1658715308810628, "learning_rate": 5.723119684557755e-06, "loss": 0.2907, "step": 7477 }, { "epoch": 0.47, "grad_norm": 1.5756943612655252, "learning_rate": 5.722111900605066e-06, "loss": 0.2918, "step": 7478 }, { "epoch": 0.47, "grad_norm": 3.070764156037807, "learning_rate": 5.721104086690671e-06, "loss": 0.2856, "step": 7479 }, { "epoch": 0.47, "grad_norm": 2.3053552661157948, "learning_rate": 5.720096242856385e-06, "loss": 0.2934, "step": 7480 }, { "epoch": 0.47, "grad_norm": 2.739945420154742, "learning_rate": 5.719088369144028e-06, "loss": 0.2909, "step": 7481 }, { "epoch": 0.47, "grad_norm": 2.5469861022930633, "learning_rate": 5.718080465595419e-06, "loss": 0.2824, "step": 7482 }, { "epoch": 0.47, "grad_norm": 4.480193219958061, "learning_rate": 5.717072532252373e-06, "loss": 0.2886, "step": 7483 }, { "epoch": 0.47, "grad_norm": 3.8616266062916327, "learning_rate": 5.716064569156716e-06, "loss": 0.2935, "step": 7484 }, { "epoch": 0.47, "grad_norm": 2.953623144323701, "learning_rate": 5.715056576350267e-06, "loss": 0.2921, "step": 7485 }, { "epoch": 0.47, "grad_norm": 3.594419332724903, "learning_rate": 5.714048553874852e-06, "loss": 0.2933, "step": 7486 }, { "epoch": 0.47, "grad_norm": 3.2998054298609465, "learning_rate": 5.713040501772292e-06, "loss": 0.3098, "step": 7487 }, { "epoch": 0.47, "grad_norm": 5.61312532244727, "learning_rate": 5.712032420084418e-06, "loss": 0.3033, "step": 7488 }, { "epoch": 0.47, "grad_norm": 5.946469730706228, "learning_rate": 5.7110243088530525e-06, "loss": 0.3049, "step": 7489 }, { "epoch": 0.47, "grad_norm": 1.989729102428159, "learning_rate": 5.710016168120026e-06, "loss": 0.2912, "step": 7490 }, { "epoch": 0.47, "grad_norm": 2.582846243896762, "learning_rate": 5.7090079979271665e-06, "loss": 0.291, "step": 7491 }, { "epoch": 0.47, "grad_norm": 55.800817128984455, "learning_rate": 5.707999798316307e-06, "loss": 0.284, "step": 7492 }, { "epoch": 0.47, "grad_norm": 5.815348963611848, "learning_rate": 5.706991569329277e-06, "loss": 0.2922, "step": 7493 }, { "epoch": 0.47, "grad_norm": 3.4766139781374577, "learning_rate": 5.705983311007913e-06, "loss": 0.2965, "step": 7494 }, { "epoch": 0.47, "grad_norm": 2.246352461340009, "learning_rate": 5.704975023394045e-06, "loss": 0.2933, "step": 7495 }, { "epoch": 0.47, "grad_norm": 4.385660925283995, "learning_rate": 5.703966706529513e-06, "loss": 0.2883, "step": 7496 }, { "epoch": 0.47, "grad_norm": 2.116793870423358, "learning_rate": 5.702958360456151e-06, "loss": 0.3052, "step": 7497 }, { "epoch": 0.47, "grad_norm": 3.2875175653246824, "learning_rate": 5.7019499852158e-06, "loss": 0.2993, "step": 7498 }, { "epoch": 0.47, "grad_norm": 2.3467825549517634, "learning_rate": 5.700941580850294e-06, "loss": 0.2891, "step": 7499 }, { "epoch": 0.47, "grad_norm": 2.0220070722338837, "learning_rate": 5.6999331474014795e-06, "loss": 0.2853, "step": 7500 }, { "epoch": 0.47, "grad_norm": 1.889623982996533, "learning_rate": 5.698924684911195e-06, "loss": 0.2908, "step": 7501 }, { "epoch": 0.47, "grad_norm": 7.640199975628539, "learning_rate": 5.697916193421283e-06, "loss": 0.2958, "step": 7502 }, { "epoch": 0.47, "grad_norm": 2.216291310154457, "learning_rate": 5.6969076729735885e-06, "loss": 0.2744, "step": 7503 }, { "epoch": 0.47, "grad_norm": 1.9070709643064623, "learning_rate": 5.695899123609957e-06, "loss": 0.2845, "step": 7504 }, { "epoch": 0.47, "grad_norm": 2.766277469410023, "learning_rate": 5.694890545372235e-06, "loss": 0.3101, "step": 7505 }, { "epoch": 0.47, "grad_norm": 0.6244973568012134, "learning_rate": 5.693881938302271e-06, "loss": 0.4962, "step": 7506 }, { "epoch": 0.47, "grad_norm": 2.4504037855239074, "learning_rate": 5.69287330244191e-06, "loss": 0.2893, "step": 7507 }, { "epoch": 0.47, "grad_norm": 2.361867015535825, "learning_rate": 5.691864637833009e-06, "loss": 0.3067, "step": 7508 }, { "epoch": 0.47, "grad_norm": 2.129035141662319, "learning_rate": 5.690855944517413e-06, "loss": 0.3131, "step": 7509 }, { "epoch": 0.47, "grad_norm": 2.733406459487125, "learning_rate": 5.689847222536976e-06, "loss": 0.3003, "step": 7510 }, { "epoch": 0.47, "grad_norm": 2.0861672973751695, "learning_rate": 5.688838471933553e-06, "loss": 0.291, "step": 7511 }, { "epoch": 0.47, "grad_norm": 1.5806383363046554, "learning_rate": 5.687829692749e-06, "loss": 0.3019, "step": 7512 }, { "epoch": 0.47, "grad_norm": 3.7720927581792263, "learning_rate": 5.686820885025169e-06, "loss": 0.2696, "step": 7513 }, { "epoch": 0.47, "grad_norm": 1.4731309256315446, "learning_rate": 5.68581204880392e-06, "loss": 0.2839, "step": 7514 }, { "epoch": 0.47, "grad_norm": 1.3839875728516073, "learning_rate": 5.684803184127112e-06, "loss": 0.2824, "step": 7515 }, { "epoch": 0.47, "grad_norm": 4.729233174779593, "learning_rate": 5.683794291036604e-06, "loss": 0.2875, "step": 7516 }, { "epoch": 0.47, "grad_norm": 3.4638741674560807, "learning_rate": 5.682785369574254e-06, "loss": 0.2753, "step": 7517 }, { "epoch": 0.47, "grad_norm": 7.180851821766722, "learning_rate": 5.681776419781928e-06, "loss": 0.2909, "step": 7518 }, { "epoch": 0.47, "grad_norm": 2.1562037127667515, "learning_rate": 5.680767441701487e-06, "loss": 0.296, "step": 7519 }, { "epoch": 0.47, "grad_norm": 2.203281795230169, "learning_rate": 5.679758435374797e-06, "loss": 0.2793, "step": 7520 }, { "epoch": 0.47, "grad_norm": 4.809742360489713, "learning_rate": 5.678749400843719e-06, "loss": 0.3229, "step": 7521 }, { "epoch": 0.47, "grad_norm": 3.5053153872792064, "learning_rate": 5.677740338150126e-06, "loss": 0.3021, "step": 7522 }, { "epoch": 0.47, "grad_norm": 0.5869112896278917, "learning_rate": 5.6767312473358805e-06, "loss": 0.4689, "step": 7523 }, { "epoch": 0.47, "grad_norm": 1.8921322492543409, "learning_rate": 5.6757221284428554e-06, "loss": 0.2785, "step": 7524 }, { "epoch": 0.47, "grad_norm": 3.511930460075979, "learning_rate": 5.674712981512915e-06, "loss": 0.3081, "step": 7525 }, { "epoch": 0.47, "grad_norm": 9.569747007160839, "learning_rate": 5.673703806587939e-06, "loss": 0.2707, "step": 7526 }, { "epoch": 0.47, "grad_norm": 1.4256604965425432, "learning_rate": 5.672694603709794e-06, "loss": 0.2847, "step": 7527 }, { "epoch": 0.47, "grad_norm": 2.0290628140660143, "learning_rate": 5.671685372920355e-06, "loss": 0.2814, "step": 7528 }, { "epoch": 0.47, "grad_norm": 1.446966074724786, "learning_rate": 5.670676114261495e-06, "loss": 0.2957, "step": 7529 }, { "epoch": 0.47, "grad_norm": 1.6950616372965643, "learning_rate": 5.669666827775095e-06, "loss": 0.2872, "step": 7530 }, { "epoch": 0.47, "grad_norm": 2.926014532229951, "learning_rate": 5.668657513503027e-06, "loss": 0.3254, "step": 7531 }, { "epoch": 0.47, "grad_norm": 0.600040825964026, "learning_rate": 5.667648171487171e-06, "loss": 0.5185, "step": 7532 }, { "epoch": 0.47, "grad_norm": 0.6331571157681681, "learning_rate": 5.666638801769406e-06, "loss": 0.489, "step": 7533 }, { "epoch": 0.47, "grad_norm": 2.9454720619669392, "learning_rate": 5.665629404391615e-06, "loss": 0.2997, "step": 7534 }, { "epoch": 0.47, "grad_norm": 3.650075794061309, "learning_rate": 5.664619979395676e-06, "loss": 0.3163, "step": 7535 }, { "epoch": 0.47, "grad_norm": 2.1008226354398274, "learning_rate": 5.663610526823474e-06, "loss": 0.2842, "step": 7536 }, { "epoch": 0.47, "grad_norm": 2.0083496967367487, "learning_rate": 5.662601046716893e-06, "loss": 0.3236, "step": 7537 }, { "epoch": 0.47, "grad_norm": 1.540398965207893, "learning_rate": 5.661591539117818e-06, "loss": 0.2709, "step": 7538 }, { "epoch": 0.47, "grad_norm": 2.263167581705944, "learning_rate": 5.660582004068134e-06, "loss": 0.285, "step": 7539 }, { "epoch": 0.47, "grad_norm": 2.470621667087632, "learning_rate": 5.6595724416097285e-06, "loss": 0.2997, "step": 7540 }, { "epoch": 0.47, "grad_norm": 2.2905877109285977, "learning_rate": 5.658562851784493e-06, "loss": 0.3073, "step": 7541 }, { "epoch": 0.47, "grad_norm": 2.614383038345281, "learning_rate": 5.657553234634314e-06, "loss": 0.2739, "step": 7542 }, { "epoch": 0.47, "grad_norm": 1.544974988609688, "learning_rate": 5.656543590201084e-06, "loss": 0.2823, "step": 7543 }, { "epoch": 0.47, "grad_norm": 1.6832459754722957, "learning_rate": 5.655533918526693e-06, "loss": 0.292, "step": 7544 }, { "epoch": 0.47, "grad_norm": 2.17433286086938, "learning_rate": 5.654524219653035e-06, "loss": 0.3132, "step": 7545 }, { "epoch": 0.47, "grad_norm": 1.8187781261421623, "learning_rate": 5.653514493622007e-06, "loss": 0.2918, "step": 7546 }, { "epoch": 0.47, "grad_norm": 6.373290493754437, "learning_rate": 5.652504740475499e-06, "loss": 0.2919, "step": 7547 }, { "epoch": 0.47, "grad_norm": 3.2056108905279466, "learning_rate": 5.651494960255412e-06, "loss": 0.3059, "step": 7548 }, { "epoch": 0.47, "grad_norm": 1.6706705669316926, "learning_rate": 5.650485153003642e-06, "loss": 0.2879, "step": 7549 }, { "epoch": 0.47, "grad_norm": 2.029525412874832, "learning_rate": 5.649475318762088e-06, "loss": 0.2935, "step": 7550 }, { "epoch": 0.47, "grad_norm": 2.2076103327721412, "learning_rate": 5.648465457572648e-06, "loss": 0.3061, "step": 7551 }, { "epoch": 0.47, "grad_norm": 2.077242752112191, "learning_rate": 5.647455569477225e-06, "loss": 0.2989, "step": 7552 }, { "epoch": 0.48, "grad_norm": 1.812558801833626, "learning_rate": 5.646445654517721e-06, "loss": 0.3027, "step": 7553 }, { "epoch": 0.48, "grad_norm": 1.2839181061111227, "learning_rate": 5.645435712736038e-06, "loss": 0.2699, "step": 7554 }, { "epoch": 0.48, "grad_norm": 2.2677247153083626, "learning_rate": 5.6444257441740804e-06, "loss": 0.2771, "step": 7555 }, { "epoch": 0.48, "grad_norm": 2.4156651211418265, "learning_rate": 5.643415748873755e-06, "loss": 0.284, "step": 7556 }, { "epoch": 0.48, "grad_norm": 3.546944978902009, "learning_rate": 5.642405726876967e-06, "loss": 0.2665, "step": 7557 }, { "epoch": 0.48, "grad_norm": 2.834067952436137, "learning_rate": 5.641395678225624e-06, "loss": 0.2789, "step": 7558 }, { "epoch": 0.48, "grad_norm": 1.7530318367081685, "learning_rate": 5.640385602961634e-06, "loss": 0.2797, "step": 7559 }, { "epoch": 0.48, "grad_norm": 2.3080553025611903, "learning_rate": 5.6393755011269115e-06, "loss": 0.2973, "step": 7560 }, { "epoch": 0.48, "grad_norm": 1.8055236618951296, "learning_rate": 5.63836537276336e-06, "loss": 0.2944, "step": 7561 }, { "epoch": 0.48, "grad_norm": 2.7602991910046204, "learning_rate": 5.6373552179128975e-06, "loss": 0.2921, "step": 7562 }, { "epoch": 0.48, "grad_norm": 1.8516542757364982, "learning_rate": 5.636345036617433e-06, "loss": 0.2922, "step": 7563 }, { "epoch": 0.48, "grad_norm": 2.8027538131341654, "learning_rate": 5.635334828918886e-06, "loss": 0.3032, "step": 7564 }, { "epoch": 0.48, "grad_norm": 2.391280447097233, "learning_rate": 5.6343245948591655e-06, "loss": 0.3, "step": 7565 }, { "epoch": 0.48, "grad_norm": 1.7098293431121996, "learning_rate": 5.633314334480191e-06, "loss": 0.2855, "step": 7566 }, { "epoch": 0.48, "grad_norm": 1.8748959742737594, "learning_rate": 5.632304047823881e-06, "loss": 0.2792, "step": 7567 }, { "epoch": 0.48, "grad_norm": 2.337783856025002, "learning_rate": 5.631293734932153e-06, "loss": 0.3168, "step": 7568 }, { "epoch": 0.48, "grad_norm": 2.1377957947763133, "learning_rate": 5.630283395846926e-06, "loss": 0.2996, "step": 7569 }, { "epoch": 0.48, "grad_norm": 2.395129393465031, "learning_rate": 5.6292730306101215e-06, "loss": 0.2916, "step": 7570 }, { "epoch": 0.48, "grad_norm": 2.5285132336855396, "learning_rate": 5.6282626392636615e-06, "loss": 0.2921, "step": 7571 }, { "epoch": 0.48, "grad_norm": 59.335283073129325, "learning_rate": 5.6272522218494695e-06, "loss": 0.2852, "step": 7572 }, { "epoch": 0.48, "grad_norm": 2.699520023762649, "learning_rate": 5.626241778409467e-06, "loss": 0.2873, "step": 7573 }, { "epoch": 0.48, "grad_norm": 2.392732779304917, "learning_rate": 5.625231308985582e-06, "loss": 0.294, "step": 7574 }, { "epoch": 0.48, "grad_norm": 3.6432966342313273, "learning_rate": 5.624220813619739e-06, "loss": 0.3014, "step": 7575 }, { "epoch": 0.48, "grad_norm": 4.380568531075516, "learning_rate": 5.623210292353865e-06, "loss": 0.3115, "step": 7576 }, { "epoch": 0.48, "grad_norm": 1.951581765742306, "learning_rate": 5.62219974522989e-06, "loss": 0.2775, "step": 7577 }, { "epoch": 0.48, "grad_norm": 2.2032960059647033, "learning_rate": 5.621189172289742e-06, "loss": 0.3008, "step": 7578 }, { "epoch": 0.48, "grad_norm": 2.386585173453586, "learning_rate": 5.620178573575352e-06, "loss": 0.31, "step": 7579 }, { "epoch": 0.48, "grad_norm": 3.233562515286271, "learning_rate": 5.6191679491286525e-06, "loss": 0.2862, "step": 7580 }, { "epoch": 0.48, "grad_norm": 1.9552337487300702, "learning_rate": 5.618157298991574e-06, "loss": 0.2896, "step": 7581 }, { "epoch": 0.48, "grad_norm": 1.7236303988944257, "learning_rate": 5.61714662320605e-06, "loss": 0.2865, "step": 7582 }, { "epoch": 0.48, "grad_norm": 6.463293909716265, "learning_rate": 5.616135921814018e-06, "loss": 0.2837, "step": 7583 }, { "epoch": 0.48, "grad_norm": 2.9170548306979804, "learning_rate": 5.615125194857411e-06, "loss": 0.2936, "step": 7584 }, { "epoch": 0.48, "grad_norm": 0.6618161709336832, "learning_rate": 5.614114442378169e-06, "loss": 0.5309, "step": 7585 }, { "epoch": 0.48, "grad_norm": 4.161281708041211, "learning_rate": 5.613103664418227e-06, "loss": 0.2977, "step": 7586 }, { "epoch": 0.48, "grad_norm": 1.8520552299589774, "learning_rate": 5.6120928610195235e-06, "loss": 0.2714, "step": 7587 }, { "epoch": 0.48, "grad_norm": 3.3206685316379283, "learning_rate": 5.611082032224001e-06, "loss": 0.293, "step": 7588 }, { "epoch": 0.48, "grad_norm": 1.8522249948067022, "learning_rate": 5.610071178073601e-06, "loss": 0.2773, "step": 7589 }, { "epoch": 0.48, "grad_norm": 1.6560173966684166, "learning_rate": 5.609060298610263e-06, "loss": 0.2934, "step": 7590 }, { "epoch": 0.48, "grad_norm": 2.457576734413159, "learning_rate": 5.6080493938759314e-06, "loss": 0.3222, "step": 7591 }, { "epoch": 0.48, "grad_norm": 1.7121792610360973, "learning_rate": 5.607038463912551e-06, "loss": 0.292, "step": 7592 }, { "epoch": 0.48, "grad_norm": 1.7733976375907252, "learning_rate": 5.606027508762067e-06, "loss": 0.3028, "step": 7593 }, { "epoch": 0.48, "grad_norm": 3.6817503215393352, "learning_rate": 5.605016528466424e-06, "loss": 0.2795, "step": 7594 }, { "epoch": 0.48, "grad_norm": 2.016365833923353, "learning_rate": 5.604005523067569e-06, "loss": 0.2804, "step": 7595 }, { "epoch": 0.48, "grad_norm": 3.2220054674335756, "learning_rate": 5.602994492607454e-06, "loss": 0.2928, "step": 7596 }, { "epoch": 0.48, "grad_norm": 1.2291516796281352, "learning_rate": 5.601983437128027e-06, "loss": 0.2753, "step": 7597 }, { "epoch": 0.48, "grad_norm": 1.7494213461557602, "learning_rate": 5.600972356671238e-06, "loss": 0.2812, "step": 7598 }, { "epoch": 0.48, "grad_norm": 2.3269791290799406, "learning_rate": 5.599961251279037e-06, "loss": 0.2952, "step": 7599 }, { "epoch": 0.48, "grad_norm": 1.2304916030670954, "learning_rate": 5.5989501209933795e-06, "loss": 0.2765, "step": 7600 }, { "epoch": 0.48, "grad_norm": 1.459507001809702, "learning_rate": 5.5979389658562165e-06, "loss": 0.2799, "step": 7601 }, { "epoch": 0.48, "grad_norm": 3.4381696557232515, "learning_rate": 5.596927785909505e-06, "loss": 0.2888, "step": 7602 }, { "epoch": 0.48, "grad_norm": 1.7657934821560524, "learning_rate": 5.595916581195198e-06, "loss": 0.2875, "step": 7603 }, { "epoch": 0.48, "grad_norm": 1.6843243807157469, "learning_rate": 5.594905351755254e-06, "loss": 0.2724, "step": 7604 }, { "epoch": 0.48, "grad_norm": 1.9240401785154129, "learning_rate": 5.59389409763163e-06, "loss": 0.2747, "step": 7605 }, { "epoch": 0.48, "grad_norm": 1.8506574965474858, "learning_rate": 5.592882818866286e-06, "loss": 0.2846, "step": 7606 }, { "epoch": 0.48, "grad_norm": 2.044141352460662, "learning_rate": 5.591871515501181e-06, "loss": 0.2951, "step": 7607 }, { "epoch": 0.48, "grad_norm": 3.4142261519026587, "learning_rate": 5.590860187578274e-06, "loss": 0.2961, "step": 7608 }, { "epoch": 0.48, "grad_norm": 1.8414898976085707, "learning_rate": 5.589848835139529e-06, "loss": 0.2907, "step": 7609 }, { "epoch": 0.48, "grad_norm": 3.8158250187790252, "learning_rate": 5.588837458226908e-06, "loss": 0.2802, "step": 7610 }, { "epoch": 0.48, "grad_norm": 1.1173824203542102, "learning_rate": 5.587826056882376e-06, "loss": 0.2924, "step": 7611 }, { "epoch": 0.48, "grad_norm": 1.8089366617244502, "learning_rate": 5.586814631147895e-06, "loss": 0.2819, "step": 7612 }, { "epoch": 0.48, "grad_norm": 1.6528462451797643, "learning_rate": 5.5858031810654345e-06, "loss": 0.2996, "step": 7613 }, { "epoch": 0.48, "grad_norm": 1.735345799156136, "learning_rate": 5.5847917066769585e-06, "loss": 0.2984, "step": 7614 }, { "epoch": 0.48, "grad_norm": 1.8993270059871377, "learning_rate": 5.583780208024436e-06, "loss": 0.2917, "step": 7615 }, { "epoch": 0.48, "grad_norm": 1.3079231039346049, "learning_rate": 5.582768685149837e-06, "loss": 0.2794, "step": 7616 }, { "epoch": 0.48, "grad_norm": 2.6259454534568314, "learning_rate": 5.581757138095128e-06, "loss": 0.329, "step": 7617 }, { "epoch": 0.48, "grad_norm": 1.8301455628536605, "learning_rate": 5.5807455669022825e-06, "loss": 0.2785, "step": 7618 }, { "epoch": 0.48, "grad_norm": 1.789044571160198, "learning_rate": 5.579733971613274e-06, "loss": 0.3055, "step": 7619 }, { "epoch": 0.48, "grad_norm": 5.880487546744792, "learning_rate": 5.578722352270071e-06, "loss": 0.2963, "step": 7620 }, { "epoch": 0.48, "grad_norm": 1.533953734467629, "learning_rate": 5.57771070891465e-06, "loss": 0.2783, "step": 7621 }, { "epoch": 0.48, "grad_norm": 1.443776325070946, "learning_rate": 5.576699041588986e-06, "loss": 0.293, "step": 7622 }, { "epoch": 0.48, "grad_norm": 1.8706229546458053, "learning_rate": 5.575687350335055e-06, "loss": 0.2737, "step": 7623 }, { "epoch": 0.48, "grad_norm": 2.16178471422946, "learning_rate": 5.574675635194833e-06, "loss": 0.3048, "step": 7624 }, { "epoch": 0.48, "grad_norm": 0.6636299220898205, "learning_rate": 5.573663896210297e-06, "loss": 0.5273, "step": 7625 }, { "epoch": 0.48, "grad_norm": 2.486550452320844, "learning_rate": 5.5726521334234275e-06, "loss": 0.3054, "step": 7626 }, { "epoch": 0.48, "grad_norm": 5.261091326497108, "learning_rate": 5.5716403468762045e-06, "loss": 0.2917, "step": 7627 }, { "epoch": 0.48, "grad_norm": 2.1698329739370568, "learning_rate": 5.570628536610608e-06, "loss": 0.2911, "step": 7628 }, { "epoch": 0.48, "grad_norm": 3.6663475686674074, "learning_rate": 5.56961670266862e-06, "loss": 0.2871, "step": 7629 }, { "epoch": 0.48, "grad_norm": 2.4871906189983948, "learning_rate": 5.5686048450922224e-06, "loss": 0.2918, "step": 7630 }, { "epoch": 0.48, "grad_norm": 1.6193746845400931, "learning_rate": 5.567592963923401e-06, "loss": 0.2884, "step": 7631 }, { "epoch": 0.48, "grad_norm": 2.180697079712196, "learning_rate": 5.566581059204139e-06, "loss": 0.3088, "step": 7632 }, { "epoch": 0.48, "grad_norm": 1.8345635015984232, "learning_rate": 5.5655691309764225e-06, "loss": 0.2951, "step": 7633 }, { "epoch": 0.48, "grad_norm": 1.6623792226280405, "learning_rate": 5.564557179282237e-06, "loss": 0.2883, "step": 7634 }, { "epoch": 0.48, "grad_norm": 3.1105137740109474, "learning_rate": 5.563545204163574e-06, "loss": 0.3031, "step": 7635 }, { "epoch": 0.48, "grad_norm": 2.000333524241662, "learning_rate": 5.562533205662417e-06, "loss": 0.284, "step": 7636 }, { "epoch": 0.48, "grad_norm": 2.499717981356932, "learning_rate": 5.561521183820759e-06, "loss": 0.2777, "step": 7637 }, { "epoch": 0.48, "grad_norm": 1.8619320855670536, "learning_rate": 5.5605091386805896e-06, "loss": 0.2966, "step": 7638 }, { "epoch": 0.48, "grad_norm": 3.0606967986000067, "learning_rate": 5.5594970702839005e-06, "loss": 0.2829, "step": 7639 }, { "epoch": 0.48, "grad_norm": 1.561519112722402, "learning_rate": 5.558484978672684e-06, "loss": 0.3106, "step": 7640 }, { "epoch": 0.48, "grad_norm": 1.9140444866929631, "learning_rate": 5.557472863888935e-06, "loss": 0.2895, "step": 7641 }, { "epoch": 0.48, "grad_norm": 2.599444497655152, "learning_rate": 5.556460725974645e-06, "loss": 0.292, "step": 7642 }, { "epoch": 0.48, "grad_norm": 2.613480966642762, "learning_rate": 5.555448564971812e-06, "loss": 0.3196, "step": 7643 }, { "epoch": 0.48, "grad_norm": 3.5954900095024396, "learning_rate": 5.554436380922431e-06, "loss": 0.2959, "step": 7644 }, { "epoch": 0.48, "grad_norm": 2.1933243969816356, "learning_rate": 5.553424173868501e-06, "loss": 0.2781, "step": 7645 }, { "epoch": 0.48, "grad_norm": 3.497756212293826, "learning_rate": 5.552411943852017e-06, "loss": 0.2954, "step": 7646 }, { "epoch": 0.48, "grad_norm": 11.920059588064577, "learning_rate": 5.551399690914981e-06, "loss": 0.3116, "step": 7647 }, { "epoch": 0.48, "grad_norm": 2.044223232505679, "learning_rate": 5.550387415099393e-06, "loss": 0.2804, "step": 7648 }, { "epoch": 0.48, "grad_norm": 2.742875490724477, "learning_rate": 5.549375116447254e-06, "loss": 0.3274, "step": 7649 }, { "epoch": 0.48, "grad_norm": 1.6956356747388157, "learning_rate": 5.548362795000565e-06, "loss": 0.2997, "step": 7650 }, { "epoch": 0.48, "grad_norm": 1.303764207487624, "learning_rate": 5.54735045080133e-06, "loss": 0.2811, "step": 7651 }, { "epoch": 0.48, "grad_norm": 3.4929475893767643, "learning_rate": 5.546338083891551e-06, "loss": 0.2798, "step": 7652 }, { "epoch": 0.48, "grad_norm": 1.4465382185303735, "learning_rate": 5.545325694313237e-06, "loss": 0.3021, "step": 7653 }, { "epoch": 0.48, "grad_norm": 1.9279209253137302, "learning_rate": 5.544313282108389e-06, "loss": 0.2811, "step": 7654 }, { "epoch": 0.48, "grad_norm": 1.7286870769978662, "learning_rate": 5.543300847319017e-06, "loss": 0.2946, "step": 7655 }, { "epoch": 0.48, "grad_norm": 0.6097230773486723, "learning_rate": 5.542288389987128e-06, "loss": 0.4863, "step": 7656 }, { "epoch": 0.48, "grad_norm": 3.278823836160525, "learning_rate": 5.541275910154731e-06, "loss": 0.293, "step": 7657 }, { "epoch": 0.48, "grad_norm": 1.8592303195067155, "learning_rate": 5.540263407863834e-06, "loss": 0.2974, "step": 7658 }, { "epoch": 0.48, "grad_norm": 1.7508571877058219, "learning_rate": 5.53925088315645e-06, "loss": 0.2959, "step": 7659 }, { "epoch": 0.48, "grad_norm": 2.384552100276538, "learning_rate": 5.538238336074587e-06, "loss": 0.2918, "step": 7660 }, { "epoch": 0.48, "grad_norm": 2.0424155998200018, "learning_rate": 5.537225766660261e-06, "loss": 0.2951, "step": 7661 }, { "epoch": 0.48, "grad_norm": 3.887461789212646, "learning_rate": 5.536213174955484e-06, "loss": 0.2815, "step": 7662 }, { "epoch": 0.48, "grad_norm": 4.936148551169702, "learning_rate": 5.535200561002268e-06, "loss": 0.2975, "step": 7663 }, { "epoch": 0.48, "grad_norm": 1.8999436939087193, "learning_rate": 5.534187924842631e-06, "loss": 0.2789, "step": 7664 }, { "epoch": 0.48, "grad_norm": 2.618104287259037, "learning_rate": 5.53317526651859e-06, "loss": 0.2986, "step": 7665 }, { "epoch": 0.48, "grad_norm": 4.0458312152352685, "learning_rate": 5.532162586072158e-06, "loss": 0.323, "step": 7666 }, { "epoch": 0.48, "grad_norm": 2.0072672077035305, "learning_rate": 5.5311498835453555e-06, "loss": 0.2891, "step": 7667 }, { "epoch": 0.48, "grad_norm": 2.0729173082436962, "learning_rate": 5.530137158980201e-06, "loss": 0.2811, "step": 7668 }, { "epoch": 0.48, "grad_norm": 2.6521791172526483, "learning_rate": 5.529124412418715e-06, "loss": 0.3063, "step": 7669 }, { "epoch": 0.48, "grad_norm": 1.6462108771835293, "learning_rate": 5.5281116439029156e-06, "loss": 0.2912, "step": 7670 }, { "epoch": 0.48, "grad_norm": 3.316829178533151, "learning_rate": 5.527098853474828e-06, "loss": 0.3091, "step": 7671 }, { "epoch": 0.48, "grad_norm": 1.5556510477501275, "learning_rate": 5.526086041176472e-06, "loss": 0.2787, "step": 7672 }, { "epoch": 0.48, "grad_norm": 1.6613192769447411, "learning_rate": 5.525073207049871e-06, "loss": 0.2797, "step": 7673 }, { "epoch": 0.48, "grad_norm": 0.6538928837380741, "learning_rate": 5.524060351137049e-06, "loss": 0.5012, "step": 7674 }, { "epoch": 0.48, "grad_norm": 2.299107483738187, "learning_rate": 5.523047473480036e-06, "loss": 0.2783, "step": 7675 }, { "epoch": 0.48, "grad_norm": 1.3144949143742457, "learning_rate": 5.522034574120851e-06, "loss": 0.2781, "step": 7676 }, { "epoch": 0.48, "grad_norm": 1.5728670283895205, "learning_rate": 5.521021653101525e-06, "loss": 0.2924, "step": 7677 }, { "epoch": 0.48, "grad_norm": 1.291260118859905, "learning_rate": 5.520008710464085e-06, "loss": 0.2724, "step": 7678 }, { "epoch": 0.48, "grad_norm": 1.790638424702302, "learning_rate": 5.518995746250561e-06, "loss": 0.2688, "step": 7679 }, { "epoch": 0.48, "grad_norm": 2.192349802244243, "learning_rate": 5.517982760502981e-06, "loss": 0.3109, "step": 7680 }, { "epoch": 0.48, "grad_norm": 1.6548324309082583, "learning_rate": 5.516969753263376e-06, "loss": 0.2932, "step": 7681 }, { "epoch": 0.48, "grad_norm": 2.1810413925839023, "learning_rate": 5.515956724573778e-06, "loss": 0.3011, "step": 7682 }, { "epoch": 0.48, "grad_norm": 2.95441875468757, "learning_rate": 5.51494367447622e-06, "loss": 0.2775, "step": 7683 }, { "epoch": 0.48, "grad_norm": 2.9777505854560355, "learning_rate": 5.513930603012733e-06, "loss": 0.2919, "step": 7684 }, { "epoch": 0.48, "grad_norm": 3.322860532123437, "learning_rate": 5.512917510225353e-06, "loss": 0.2773, "step": 7685 }, { "epoch": 0.48, "grad_norm": 3.8650869664910847, "learning_rate": 5.511904396156113e-06, "loss": 0.2916, "step": 7686 }, { "epoch": 0.48, "grad_norm": 6.095873520856129, "learning_rate": 5.510891260847053e-06, "loss": 0.2906, "step": 7687 }, { "epoch": 0.48, "grad_norm": 2.1493326173476626, "learning_rate": 5.509878104340205e-06, "loss": 0.2932, "step": 7688 }, { "epoch": 0.48, "grad_norm": 4.960223046675856, "learning_rate": 5.508864926677609e-06, "loss": 0.3028, "step": 7689 }, { "epoch": 0.48, "grad_norm": 1.3690556805584755, "learning_rate": 5.507851727901304e-06, "loss": 0.2919, "step": 7690 }, { "epoch": 0.48, "grad_norm": 1.6671958339056985, "learning_rate": 5.506838508053329e-06, "loss": 0.2885, "step": 7691 }, { "epoch": 0.48, "grad_norm": 1.9193056688864807, "learning_rate": 5.505825267175723e-06, "loss": 0.2721, "step": 7692 }, { "epoch": 0.48, "grad_norm": 2.4266198792138582, "learning_rate": 5.50481200531053e-06, "loss": 0.2947, "step": 7693 }, { "epoch": 0.48, "grad_norm": 2.7212335690916, "learning_rate": 5.503798722499789e-06, "loss": 0.2802, "step": 7694 }, { "epoch": 0.48, "grad_norm": 2.9068507374041386, "learning_rate": 5.502785418785545e-06, "loss": 0.2757, "step": 7695 }, { "epoch": 0.48, "grad_norm": 2.520798957009045, "learning_rate": 5.501772094209838e-06, "loss": 0.303, "step": 7696 }, { "epoch": 0.48, "grad_norm": 2.073194997785977, "learning_rate": 5.500758748814719e-06, "loss": 0.2753, "step": 7697 }, { "epoch": 0.48, "grad_norm": 1.5058041779306213, "learning_rate": 5.4997453826422286e-06, "loss": 0.2885, "step": 7698 }, { "epoch": 0.48, "grad_norm": 2.8423842920647373, "learning_rate": 5.498731995734416e-06, "loss": 0.2876, "step": 7699 }, { "epoch": 0.48, "grad_norm": 1.992622462410933, "learning_rate": 5.497718588133325e-06, "loss": 0.2942, "step": 7700 }, { "epoch": 0.48, "grad_norm": 4.8564163935973115, "learning_rate": 5.4967051598810074e-06, "loss": 0.3164, "step": 7701 }, { "epoch": 0.48, "grad_norm": 7.905517536132355, "learning_rate": 5.4956917110195096e-06, "loss": 0.2868, "step": 7702 }, { "epoch": 0.48, "grad_norm": 1.7386476152167354, "learning_rate": 5.494678241590883e-06, "loss": 0.3107, "step": 7703 }, { "epoch": 0.48, "grad_norm": 1.7342282734045082, "learning_rate": 5.493664751637176e-06, "loss": 0.3173, "step": 7704 }, { "epoch": 0.48, "grad_norm": 3.0065624921530523, "learning_rate": 5.492651241200443e-06, "loss": 0.3109, "step": 7705 }, { "epoch": 0.48, "grad_norm": 2.7653686719302852, "learning_rate": 5.491637710322735e-06, "loss": 0.3246, "step": 7706 }, { "epoch": 0.48, "grad_norm": 5.287865288384318, "learning_rate": 5.490624159046104e-06, "loss": 0.2763, "step": 7707 }, { "epoch": 0.48, "grad_norm": 1.5601821995705238, "learning_rate": 5.4896105874126045e-06, "loss": 0.2883, "step": 7708 }, { "epoch": 0.48, "grad_norm": 1.5696044566668663, "learning_rate": 5.488596995464295e-06, "loss": 0.281, "step": 7709 }, { "epoch": 0.48, "grad_norm": 5.08586468425905, "learning_rate": 5.4875833832432265e-06, "loss": 0.3066, "step": 7710 }, { "epoch": 0.48, "grad_norm": 2.815212508735295, "learning_rate": 5.486569750791457e-06, "loss": 0.2832, "step": 7711 }, { "epoch": 0.49, "grad_norm": 15.05406033481365, "learning_rate": 5.485556098151045e-06, "loss": 0.2888, "step": 7712 }, { "epoch": 0.49, "grad_norm": 2.7899025505151624, "learning_rate": 5.484542425364049e-06, "loss": 0.2945, "step": 7713 }, { "epoch": 0.49, "grad_norm": 3.4809174048149893, "learning_rate": 5.483528732472525e-06, "loss": 0.2767, "step": 7714 }, { "epoch": 0.49, "grad_norm": 0.6367459434759566, "learning_rate": 5.482515019518536e-06, "loss": 0.5068, "step": 7715 }, { "epoch": 0.49, "grad_norm": 0.6513782255389672, "learning_rate": 5.481501286544142e-06, "loss": 0.4778, "step": 7716 }, { "epoch": 0.49, "grad_norm": 2.362815848020362, "learning_rate": 5.480487533591405e-06, "loss": 0.319, "step": 7717 }, { "epoch": 0.49, "grad_norm": 2.171551057908031, "learning_rate": 5.479473760702386e-06, "loss": 0.2669, "step": 7718 }, { "epoch": 0.49, "grad_norm": 3.258315300024818, "learning_rate": 5.478459967919149e-06, "loss": 0.3362, "step": 7719 }, { "epoch": 0.49, "grad_norm": 1.6593417003266453, "learning_rate": 5.477446155283758e-06, "loss": 0.303, "step": 7720 }, { "epoch": 0.49, "grad_norm": 3.5581072388702726, "learning_rate": 5.476432322838279e-06, "loss": 0.3513, "step": 7721 }, { "epoch": 0.49, "grad_norm": 4.589422087143522, "learning_rate": 5.4754184706247745e-06, "loss": 0.2976, "step": 7722 }, { "epoch": 0.49, "grad_norm": 2.0525801997020117, "learning_rate": 5.474404598685315e-06, "loss": 0.2846, "step": 7723 }, { "epoch": 0.49, "grad_norm": 11.565055781579796, "learning_rate": 5.473390707061965e-06, "loss": 0.2857, "step": 7724 }, { "epoch": 0.49, "grad_norm": 1.6844960046250694, "learning_rate": 5.4723767957967955e-06, "loss": 0.2706, "step": 7725 }, { "epoch": 0.49, "grad_norm": 2.364943055973655, "learning_rate": 5.4713628649318716e-06, "loss": 0.3081, "step": 7726 }, { "epoch": 0.49, "grad_norm": 1.6268223581545005, "learning_rate": 5.470348914509267e-06, "loss": 0.2675, "step": 7727 }, { "epoch": 0.49, "grad_norm": 1.5371517334755775, "learning_rate": 5.46933494457105e-06, "loss": 0.2983, "step": 7728 }, { "epoch": 0.49, "grad_norm": 3.4963837345356237, "learning_rate": 5.468320955159293e-06, "loss": 0.281, "step": 7729 }, { "epoch": 0.49, "grad_norm": 1.9170074890030921, "learning_rate": 5.467306946316066e-06, "loss": 0.2804, "step": 7730 }, { "epoch": 0.49, "grad_norm": 5.316003764567229, "learning_rate": 5.466292918083444e-06, "loss": 0.2852, "step": 7731 }, { "epoch": 0.49, "grad_norm": 5.30807583938851, "learning_rate": 5.4652788705035024e-06, "loss": 0.308, "step": 7732 }, { "epoch": 0.49, "grad_norm": 1.7788902829699256, "learning_rate": 5.464264803618312e-06, "loss": 0.304, "step": 7733 }, { "epoch": 0.49, "grad_norm": 2.0855516531344285, "learning_rate": 5.463250717469951e-06, "loss": 0.3121, "step": 7734 }, { "epoch": 0.49, "grad_norm": 2.734483862790188, "learning_rate": 5.462236612100496e-06, "loss": 0.2936, "step": 7735 }, { "epoch": 0.49, "grad_norm": 2.071891275691287, "learning_rate": 5.461222487552022e-06, "loss": 0.3151, "step": 7736 }, { "epoch": 0.49, "grad_norm": 2.735982423941206, "learning_rate": 5.460208343866607e-06, "loss": 0.2907, "step": 7737 }, { "epoch": 0.49, "grad_norm": 2.4393485166060214, "learning_rate": 5.4591941810863314e-06, "loss": 0.2968, "step": 7738 }, { "epoch": 0.49, "grad_norm": 0.8097763811921834, "learning_rate": 5.458179999253274e-06, "loss": 0.4989, "step": 7739 }, { "epoch": 0.49, "grad_norm": 2.011088761778192, "learning_rate": 5.457165798409514e-06, "loss": 0.2902, "step": 7740 }, { "epoch": 0.49, "grad_norm": 2.871560217624289, "learning_rate": 5.456151578597133e-06, "loss": 0.3, "step": 7741 }, { "epoch": 0.49, "grad_norm": 1.724933861169226, "learning_rate": 5.455137339858212e-06, "loss": 0.3002, "step": 7742 }, { "epoch": 0.49, "grad_norm": 2.233794883202092, "learning_rate": 5.454123082234837e-06, "loss": 0.2932, "step": 7743 }, { "epoch": 0.49, "grad_norm": 1.911994075382329, "learning_rate": 5.4531088057690864e-06, "loss": 0.2949, "step": 7744 }, { "epoch": 0.49, "grad_norm": 2.8799183901741925, "learning_rate": 5.4520945105030466e-06, "loss": 0.2649, "step": 7745 }, { "epoch": 0.49, "grad_norm": 2.617560859902063, "learning_rate": 5.451080196478803e-06, "loss": 0.2842, "step": 7746 }, { "epoch": 0.49, "grad_norm": 3.1354696114542797, "learning_rate": 5.450065863738442e-06, "loss": 0.283, "step": 7747 }, { "epoch": 0.49, "grad_norm": 3.2134631563214096, "learning_rate": 5.449051512324046e-06, "loss": 0.3138, "step": 7748 }, { "epoch": 0.49, "grad_norm": 0.6089507927171013, "learning_rate": 5.448037142277708e-06, "loss": 0.49, "step": 7749 }, { "epoch": 0.49, "grad_norm": 2.031014695423584, "learning_rate": 5.447022753641511e-06, "loss": 0.2648, "step": 7750 }, { "epoch": 0.49, "grad_norm": 15.389528252136984, "learning_rate": 5.446008346457549e-06, "loss": 0.2884, "step": 7751 }, { "epoch": 0.49, "grad_norm": 2.8959402309699893, "learning_rate": 5.444993920767905e-06, "loss": 0.2959, "step": 7752 }, { "epoch": 0.49, "grad_norm": 1.9432775176127008, "learning_rate": 5.443979476614674e-06, "loss": 0.2958, "step": 7753 }, { "epoch": 0.49, "grad_norm": 1.6567397525583398, "learning_rate": 5.442965014039947e-06, "loss": 0.2832, "step": 7754 }, { "epoch": 0.49, "grad_norm": 1.751743086406248, "learning_rate": 5.441950533085814e-06, "loss": 0.2994, "step": 7755 }, { "epoch": 0.49, "grad_norm": 2.3200325140976727, "learning_rate": 5.440936033794368e-06, "loss": 0.2856, "step": 7756 }, { "epoch": 0.49, "grad_norm": 3.5140588085294735, "learning_rate": 5.439921516207704e-06, "loss": 0.2817, "step": 7757 }, { "epoch": 0.49, "grad_norm": 1.233163376378304, "learning_rate": 5.438906980367914e-06, "loss": 0.2879, "step": 7758 }, { "epoch": 0.49, "grad_norm": 2.199435167670893, "learning_rate": 5.437892426317095e-06, "loss": 0.2917, "step": 7759 }, { "epoch": 0.49, "grad_norm": 3.0216974147693554, "learning_rate": 5.436877854097338e-06, "loss": 0.2944, "step": 7760 }, { "epoch": 0.49, "grad_norm": 1.7542178720395472, "learning_rate": 5.435863263750747e-06, "loss": 0.2779, "step": 7761 }, { "epoch": 0.49, "grad_norm": 3.0811060036110973, "learning_rate": 5.434848655319414e-06, "loss": 0.2844, "step": 7762 }, { "epoch": 0.49, "grad_norm": 2.383498166954667, "learning_rate": 5.433834028845436e-06, "loss": 0.2921, "step": 7763 }, { "epoch": 0.49, "grad_norm": 2.3910709502821, "learning_rate": 5.432819384370914e-06, "loss": 0.2864, "step": 7764 }, { "epoch": 0.49, "grad_norm": 3.151322561434658, "learning_rate": 5.431804721937949e-06, "loss": 0.2692, "step": 7765 }, { "epoch": 0.49, "grad_norm": 9.33066431819288, "learning_rate": 5.4307900415886374e-06, "loss": 0.2765, "step": 7766 }, { "epoch": 0.49, "grad_norm": 2.1432211433391815, "learning_rate": 5.429775343365082e-06, "loss": 0.3102, "step": 7767 }, { "epoch": 0.49, "grad_norm": 1.9742557435916317, "learning_rate": 5.428760627309384e-06, "loss": 0.2803, "step": 7768 }, { "epoch": 0.49, "grad_norm": 1.2674436633968107, "learning_rate": 5.427745893463647e-06, "loss": 0.2865, "step": 7769 }, { "epoch": 0.49, "grad_norm": 0.6088395490169526, "learning_rate": 5.426731141869973e-06, "loss": 0.4981, "step": 7770 }, { "epoch": 0.49, "grad_norm": 4.399780861060085, "learning_rate": 5.425716372570466e-06, "loss": 0.295, "step": 7771 }, { "epoch": 0.49, "grad_norm": 2.097867324104314, "learning_rate": 5.4247015856072295e-06, "loss": 0.2791, "step": 7772 }, { "epoch": 0.49, "grad_norm": 1.747160477436948, "learning_rate": 5.4236867810223715e-06, "loss": 0.2953, "step": 7773 }, { "epoch": 0.49, "grad_norm": 2.007433151237102, "learning_rate": 5.4226719588579935e-06, "loss": 0.3012, "step": 7774 }, { "epoch": 0.49, "grad_norm": 2.0237241169132663, "learning_rate": 5.421657119156208e-06, "loss": 0.285, "step": 7775 }, { "epoch": 0.49, "grad_norm": 2.1575521191540536, "learning_rate": 5.420642261959118e-06, "loss": 0.2872, "step": 7776 }, { "epoch": 0.49, "grad_norm": 3.5160786933243062, "learning_rate": 5.419627387308836e-06, "loss": 0.2766, "step": 7777 }, { "epoch": 0.49, "grad_norm": 1.6131710196763749, "learning_rate": 5.418612495247465e-06, "loss": 0.2878, "step": 7778 }, { "epoch": 0.49, "grad_norm": 3.3376476265177493, "learning_rate": 5.4175975858171204e-06, "loss": 0.2933, "step": 7779 }, { "epoch": 0.49, "grad_norm": 1.4680007350926543, "learning_rate": 5.416582659059909e-06, "loss": 0.2956, "step": 7780 }, { "epoch": 0.49, "grad_norm": 1.6099706267873062, "learning_rate": 5.4155677150179446e-06, "loss": 0.2824, "step": 7781 }, { "epoch": 0.49, "grad_norm": 2.052321759581494, "learning_rate": 5.414552753733334e-06, "loss": 0.2837, "step": 7782 }, { "epoch": 0.49, "grad_norm": 0.6167219981940524, "learning_rate": 5.413537775248198e-06, "loss": 0.4771, "step": 7783 }, { "epoch": 0.49, "grad_norm": 15.557764588811983, "learning_rate": 5.412522779604642e-06, "loss": 0.2922, "step": 7784 }, { "epoch": 0.49, "grad_norm": 6.8335568018077275, "learning_rate": 5.411507766844784e-06, "loss": 0.3058, "step": 7785 }, { "epoch": 0.49, "grad_norm": 5.289772327945619, "learning_rate": 5.410492737010737e-06, "loss": 0.2712, "step": 7786 }, { "epoch": 0.49, "grad_norm": 1.7982921294117131, "learning_rate": 5.40947769014462e-06, "loss": 0.2914, "step": 7787 }, { "epoch": 0.49, "grad_norm": 2.441009830512988, "learning_rate": 5.408462626288544e-06, "loss": 0.2992, "step": 7788 }, { "epoch": 0.49, "grad_norm": 2.525357207988294, "learning_rate": 5.4074475454846275e-06, "loss": 0.3066, "step": 7789 }, { "epoch": 0.49, "grad_norm": 18.45928730511721, "learning_rate": 5.4064324477749895e-06, "loss": 0.291, "step": 7790 }, { "epoch": 0.49, "grad_norm": 3.0957578722529746, "learning_rate": 5.405417333201749e-06, "loss": 0.2946, "step": 7791 }, { "epoch": 0.49, "grad_norm": 2.1496594428860574, "learning_rate": 5.404402201807022e-06, "loss": 0.2933, "step": 7792 }, { "epoch": 0.49, "grad_norm": 4.366107406800677, "learning_rate": 5.403387053632928e-06, "loss": 0.2781, "step": 7793 }, { "epoch": 0.49, "grad_norm": 2.017692727243486, "learning_rate": 5.4023718887215906e-06, "loss": 0.2955, "step": 7794 }, { "epoch": 0.49, "grad_norm": 1.5367372740132124, "learning_rate": 5.401356707115128e-06, "loss": 0.2757, "step": 7795 }, { "epoch": 0.49, "grad_norm": 1.4266617915544213, "learning_rate": 5.400341508855663e-06, "loss": 0.2735, "step": 7796 }, { "epoch": 0.49, "grad_norm": 3.657927667472044, "learning_rate": 5.3993262939853175e-06, "loss": 0.2901, "step": 7797 }, { "epoch": 0.49, "grad_norm": 2.038135513034856, "learning_rate": 5.3983110625462144e-06, "loss": 0.3043, "step": 7798 }, { "epoch": 0.49, "grad_norm": 2.4296953850693885, "learning_rate": 5.397295814580479e-06, "loss": 0.3101, "step": 7799 }, { "epoch": 0.49, "grad_norm": 2.029014493023995, "learning_rate": 5.396280550130234e-06, "loss": 0.2989, "step": 7800 }, { "epoch": 0.49, "grad_norm": 1.6424893600592279, "learning_rate": 5.395265269237604e-06, "loss": 0.2816, "step": 7801 }, { "epoch": 0.49, "grad_norm": 2.602559189504729, "learning_rate": 5.394249971944717e-06, "loss": 0.3001, "step": 7802 }, { "epoch": 0.49, "grad_norm": 2.380024316459885, "learning_rate": 5.393234658293699e-06, "loss": 0.2773, "step": 7803 }, { "epoch": 0.49, "grad_norm": 3.1857487631942005, "learning_rate": 5.392219328326674e-06, "loss": 0.317, "step": 7804 }, { "epoch": 0.49, "grad_norm": 5.544890840486099, "learning_rate": 5.391203982085775e-06, "loss": 0.2928, "step": 7805 }, { "epoch": 0.49, "grad_norm": 0.6648914466207075, "learning_rate": 5.390188619613127e-06, "loss": 0.4924, "step": 7806 }, { "epoch": 0.49, "grad_norm": 1.8318319675761625, "learning_rate": 5.389173240950861e-06, "loss": 0.2878, "step": 7807 }, { "epoch": 0.49, "grad_norm": 2.2661003354214695, "learning_rate": 5.388157846141105e-06, "loss": 0.3093, "step": 7808 }, { "epoch": 0.49, "grad_norm": 3.3939666974106566, "learning_rate": 5.3871424352259904e-06, "loss": 0.2809, "step": 7809 }, { "epoch": 0.49, "grad_norm": 4.132518406526393, "learning_rate": 5.386127008247649e-06, "loss": 0.3026, "step": 7810 }, { "epoch": 0.49, "grad_norm": 1.422275574293052, "learning_rate": 5.385111565248212e-06, "loss": 0.2769, "step": 7811 }, { "epoch": 0.49, "grad_norm": 2.377147047080068, "learning_rate": 5.384096106269811e-06, "loss": 0.2952, "step": 7812 }, { "epoch": 0.49, "grad_norm": 1.5309259557800277, "learning_rate": 5.383080631354582e-06, "loss": 0.2883, "step": 7813 }, { "epoch": 0.49, "grad_norm": 3.260658727800435, "learning_rate": 5.3820651405446564e-06, "loss": 0.2987, "step": 7814 }, { "epoch": 0.49, "grad_norm": 2.189576227553726, "learning_rate": 5.38104963388217e-06, "loss": 0.3113, "step": 7815 }, { "epoch": 0.49, "grad_norm": 6.321911564146936, "learning_rate": 5.380034111409257e-06, "loss": 0.2805, "step": 7816 }, { "epoch": 0.49, "grad_norm": 4.417295041113762, "learning_rate": 5.379018573168056e-06, "loss": 0.3089, "step": 7817 }, { "epoch": 0.49, "grad_norm": 4.347253533304037, "learning_rate": 5.378003019200699e-06, "loss": 0.2785, "step": 7818 }, { "epoch": 0.49, "grad_norm": 2.114979822151295, "learning_rate": 5.376987449549325e-06, "loss": 0.3094, "step": 7819 }, { "epoch": 0.49, "grad_norm": 1.763573269754107, "learning_rate": 5.375971864256071e-06, "loss": 0.2907, "step": 7820 }, { "epoch": 0.49, "grad_norm": 3.257328131567649, "learning_rate": 5.3749562633630795e-06, "loss": 0.2863, "step": 7821 }, { "epoch": 0.49, "grad_norm": 2.837231319199659, "learning_rate": 5.373940646912485e-06, "loss": 0.2845, "step": 7822 }, { "epoch": 0.49, "grad_norm": 2.0511031390317953, "learning_rate": 5.372925014946428e-06, "loss": 0.2895, "step": 7823 }, { "epoch": 0.49, "grad_norm": 1.6441202753609079, "learning_rate": 5.371909367507051e-06, "loss": 0.2964, "step": 7824 }, { "epoch": 0.49, "grad_norm": 4.657212904994267, "learning_rate": 5.370893704636495e-06, "loss": 0.3129, "step": 7825 }, { "epoch": 0.49, "grad_norm": 3.289250358737912, "learning_rate": 5.369878026376899e-06, "loss": 0.2884, "step": 7826 }, { "epoch": 0.49, "grad_norm": 4.438647161835852, "learning_rate": 5.368862332770406e-06, "loss": 0.2834, "step": 7827 }, { "epoch": 0.49, "grad_norm": 2.208852229409675, "learning_rate": 5.36784662385916e-06, "loss": 0.2716, "step": 7828 }, { "epoch": 0.49, "grad_norm": 1.9287582100892882, "learning_rate": 5.366830899685306e-06, "loss": 0.277, "step": 7829 }, { "epoch": 0.49, "grad_norm": 2.903851899455363, "learning_rate": 5.365815160290983e-06, "loss": 0.2959, "step": 7830 }, { "epoch": 0.49, "grad_norm": 2.1247015783700065, "learning_rate": 5.364799405718342e-06, "loss": 0.3171, "step": 7831 }, { "epoch": 0.49, "grad_norm": 1.6676871413798984, "learning_rate": 5.3637836360095255e-06, "loss": 0.2803, "step": 7832 }, { "epoch": 0.49, "grad_norm": 1.7662499131575617, "learning_rate": 5.3627678512066795e-06, "loss": 0.2876, "step": 7833 }, { "epoch": 0.49, "grad_norm": 0.6006633156553667, "learning_rate": 5.36175205135195e-06, "loss": 0.4691, "step": 7834 }, { "epoch": 0.49, "grad_norm": 3.4657365438642187, "learning_rate": 5.360736236487486e-06, "loss": 0.2788, "step": 7835 }, { "epoch": 0.49, "grad_norm": 4.961876106210694, "learning_rate": 5.359720406655435e-06, "loss": 0.2735, "step": 7836 }, { "epoch": 0.49, "grad_norm": 2.2152937648310767, "learning_rate": 5.358704561897946e-06, "loss": 0.2892, "step": 7837 }, { "epoch": 0.49, "grad_norm": 2.053247394787333, "learning_rate": 5.357688702257165e-06, "loss": 0.2795, "step": 7838 }, { "epoch": 0.49, "grad_norm": 26.465164130014912, "learning_rate": 5.3566728277752474e-06, "loss": 0.2893, "step": 7839 }, { "epoch": 0.49, "grad_norm": 1.5914615193943744, "learning_rate": 5.355656938494339e-06, "loss": 0.2733, "step": 7840 }, { "epoch": 0.49, "grad_norm": 6.148678242380159, "learning_rate": 5.354641034456595e-06, "loss": 0.3147, "step": 7841 }, { "epoch": 0.49, "grad_norm": 1.5725284290527721, "learning_rate": 5.353625115704161e-06, "loss": 0.2893, "step": 7842 }, { "epoch": 0.49, "grad_norm": 1.870482497199345, "learning_rate": 5.352609182279195e-06, "loss": 0.297, "step": 7843 }, { "epoch": 0.49, "grad_norm": 1.8492267958161406, "learning_rate": 5.351593234223847e-06, "loss": 0.2903, "step": 7844 }, { "epoch": 0.49, "grad_norm": 1.461672599556936, "learning_rate": 5.35057727158027e-06, "loss": 0.2575, "step": 7845 }, { "epoch": 0.49, "grad_norm": 1.2814202977521445, "learning_rate": 5.349561294390622e-06, "loss": 0.2828, "step": 7846 }, { "epoch": 0.49, "grad_norm": 1.742395186833296, "learning_rate": 5.348545302697054e-06, "loss": 0.3104, "step": 7847 }, { "epoch": 0.49, "grad_norm": 1.5456809314140165, "learning_rate": 5.347529296541721e-06, "loss": 0.29, "step": 7848 }, { "epoch": 0.49, "grad_norm": 26.66109870909653, "learning_rate": 5.346513275966782e-06, "loss": 0.278, "step": 7849 }, { "epoch": 0.49, "grad_norm": 3.1719069188333964, "learning_rate": 5.34549724101439e-06, "loss": 0.2842, "step": 7850 }, { "epoch": 0.49, "grad_norm": 1.2886971048340783, "learning_rate": 5.344481191726706e-06, "loss": 0.2795, "step": 7851 }, { "epoch": 0.49, "grad_norm": 2.3802020504268517, "learning_rate": 5.343465128145884e-06, "loss": 0.2882, "step": 7852 }, { "epoch": 0.49, "grad_norm": 1.4542347798396273, "learning_rate": 5.342449050314084e-06, "loss": 0.2714, "step": 7853 }, { "epoch": 0.49, "grad_norm": 2.7576394213758326, "learning_rate": 5.3414329582734635e-06, "loss": 0.3048, "step": 7854 }, { "epoch": 0.49, "grad_norm": 1.9112309111662238, "learning_rate": 5.340416852066185e-06, "loss": 0.2774, "step": 7855 }, { "epoch": 0.49, "grad_norm": 5.112117802862802, "learning_rate": 5.339400731734404e-06, "loss": 0.281, "step": 7856 }, { "epoch": 0.49, "grad_norm": 1.6640068522887566, "learning_rate": 5.338384597320287e-06, "loss": 0.281, "step": 7857 }, { "epoch": 0.49, "grad_norm": 1.9509680169432204, "learning_rate": 5.33736844886599e-06, "loss": 0.2896, "step": 7858 }, { "epoch": 0.49, "grad_norm": 1.3020465188496206, "learning_rate": 5.336352286413678e-06, "loss": 0.2813, "step": 7859 }, { "epoch": 0.49, "grad_norm": 3.7139299680622218, "learning_rate": 5.335336110005511e-06, "loss": 0.2889, "step": 7860 }, { "epoch": 0.49, "grad_norm": 1.7794580784858343, "learning_rate": 5.3343199196836545e-06, "loss": 0.2877, "step": 7861 }, { "epoch": 0.49, "grad_norm": 1.419535508943437, "learning_rate": 5.33330371549027e-06, "loss": 0.2707, "step": 7862 }, { "epoch": 0.49, "grad_norm": 2.62458308913267, "learning_rate": 5.332287497467523e-06, "loss": 0.3135, "step": 7863 }, { "epoch": 0.49, "grad_norm": 2.1344687018315707, "learning_rate": 5.331271265657576e-06, "loss": 0.3049, "step": 7864 }, { "epoch": 0.49, "grad_norm": 1.423922270242874, "learning_rate": 5.330255020102598e-06, "loss": 0.2822, "step": 7865 }, { "epoch": 0.49, "grad_norm": 2.138042924156716, "learning_rate": 5.329238760844751e-06, "loss": 0.3126, "step": 7866 }, { "epoch": 0.49, "grad_norm": 3.442546742227438, "learning_rate": 5.328222487926204e-06, "loss": 0.3147, "step": 7867 }, { "epoch": 0.49, "grad_norm": 11.370689501851116, "learning_rate": 5.327206201389121e-06, "loss": 0.2864, "step": 7868 }, { "epoch": 0.49, "grad_norm": 6.397388091452828, "learning_rate": 5.326189901275673e-06, "loss": 0.2871, "step": 7869 }, { "epoch": 0.49, "grad_norm": 2.2757360750300504, "learning_rate": 5.325173587628028e-06, "loss": 0.2865, "step": 7870 }, { "epoch": 0.5, "grad_norm": 1.8233082627858472, "learning_rate": 5.324157260488351e-06, "loss": 0.3177, "step": 7871 }, { "epoch": 0.5, "grad_norm": 2.0797174636973628, "learning_rate": 5.3231409198988136e-06, "loss": 0.2977, "step": 7872 }, { "epoch": 0.5, "grad_norm": 1.4364150530437172, "learning_rate": 5.322124565901587e-06, "loss": 0.2831, "step": 7873 }, { "epoch": 0.5, "grad_norm": 1.5394900767597413, "learning_rate": 5.321108198538839e-06, "loss": 0.3183, "step": 7874 }, { "epoch": 0.5, "grad_norm": 2.1928337204093706, "learning_rate": 5.3200918178527415e-06, "loss": 0.2794, "step": 7875 }, { "epoch": 0.5, "grad_norm": 9.42904582272032, "learning_rate": 5.319075423885466e-06, "loss": 0.2946, "step": 7876 }, { "epoch": 0.5, "grad_norm": 1.3753258728152593, "learning_rate": 5.318059016679184e-06, "loss": 0.2875, "step": 7877 }, { "epoch": 0.5, "grad_norm": 1.5241915561094757, "learning_rate": 5.31704259627607e-06, "loss": 0.2903, "step": 7878 }, { "epoch": 0.5, "grad_norm": 2.3199319689602134, "learning_rate": 5.316026162718294e-06, "loss": 0.3044, "step": 7879 }, { "epoch": 0.5, "grad_norm": 1.9476814347952272, "learning_rate": 5.315009716048031e-06, "loss": 0.3045, "step": 7880 }, { "epoch": 0.5, "grad_norm": 1.7439968772929455, "learning_rate": 5.313993256307457e-06, "loss": 0.2993, "step": 7881 }, { "epoch": 0.5, "grad_norm": 1.9012851842038336, "learning_rate": 5.312976783538743e-06, "loss": 0.2832, "step": 7882 }, { "epoch": 0.5, "grad_norm": 1.7364297307752168, "learning_rate": 5.3119602977840675e-06, "loss": 0.274, "step": 7883 }, { "epoch": 0.5, "grad_norm": 2.1144750913095374, "learning_rate": 5.310943799085605e-06, "loss": 0.285, "step": 7884 }, { "epoch": 0.5, "grad_norm": 1.7648450352078844, "learning_rate": 5.309927287485533e-06, "loss": 0.2915, "step": 7885 }, { "epoch": 0.5, "grad_norm": 1.7324473464984051, "learning_rate": 5.308910763026025e-06, "loss": 0.277, "step": 7886 }, { "epoch": 0.5, "grad_norm": 3.7847826428085893, "learning_rate": 5.3078942257492635e-06, "loss": 0.29, "step": 7887 }, { "epoch": 0.5, "grad_norm": 2.407069782983817, "learning_rate": 5.306877675697422e-06, "loss": 0.299, "step": 7888 }, { "epoch": 0.5, "grad_norm": 2.0505374188782954, "learning_rate": 5.305861112912682e-06, "loss": 0.3191, "step": 7889 }, { "epoch": 0.5, "grad_norm": 2.4128359405053703, "learning_rate": 5.3048445374372195e-06, "loss": 0.2896, "step": 7890 }, { "epoch": 0.5, "grad_norm": 4.525059847611393, "learning_rate": 5.303827949313216e-06, "loss": 0.2844, "step": 7891 }, { "epoch": 0.5, "grad_norm": 2.0767857423777576, "learning_rate": 5.302811348582851e-06, "loss": 0.2865, "step": 7892 }, { "epoch": 0.5, "grad_norm": 1.8358489094171377, "learning_rate": 5.301794735288307e-06, "loss": 0.3444, "step": 7893 }, { "epoch": 0.5, "grad_norm": 1.868254505609128, "learning_rate": 5.300778109471761e-06, "loss": 0.2877, "step": 7894 }, { "epoch": 0.5, "grad_norm": 0.5981025833410399, "learning_rate": 5.2997614711753995e-06, "loss": 0.4733, "step": 7895 }, { "epoch": 0.5, "grad_norm": 1.6909587767237433, "learning_rate": 5.298744820441401e-06, "loss": 0.2701, "step": 7896 }, { "epoch": 0.5, "grad_norm": 5.89792792416731, "learning_rate": 5.297728157311949e-06, "loss": 0.2771, "step": 7897 }, { "epoch": 0.5, "grad_norm": 1.3995807621527219, "learning_rate": 5.296711481829227e-06, "loss": 0.2811, "step": 7898 }, { "epoch": 0.5, "grad_norm": 3.174499932223166, "learning_rate": 5.295694794035419e-06, "loss": 0.2876, "step": 7899 }, { "epoch": 0.5, "grad_norm": 3.0959527284495594, "learning_rate": 5.2946780939727084e-06, "loss": 0.2969, "step": 7900 }, { "epoch": 0.5, "grad_norm": 5.323792377120759, "learning_rate": 5.29366138168328e-06, "loss": 0.3005, "step": 7901 }, { "epoch": 0.5, "grad_norm": 1.8694081748452838, "learning_rate": 5.292644657209319e-06, "loss": 0.2853, "step": 7902 }, { "epoch": 0.5, "grad_norm": 2.0469038900242316, "learning_rate": 5.291627920593014e-06, "loss": 0.2886, "step": 7903 }, { "epoch": 0.5, "grad_norm": 0.5926515851845977, "learning_rate": 5.290611171876545e-06, "loss": 0.4816, "step": 7904 }, { "epoch": 0.5, "grad_norm": 1.5227893147930167, "learning_rate": 5.289594411102103e-06, "loss": 0.2952, "step": 7905 }, { "epoch": 0.5, "grad_norm": 2.4201675984421223, "learning_rate": 5.288577638311876e-06, "loss": 0.2912, "step": 7906 }, { "epoch": 0.5, "grad_norm": 1.7328693828558057, "learning_rate": 5.287560853548051e-06, "loss": 0.2797, "step": 7907 }, { "epoch": 0.5, "grad_norm": 0.5812456041002827, "learning_rate": 5.286544056852814e-06, "loss": 0.489, "step": 7908 }, { "epoch": 0.5, "grad_norm": 2.4004950807853747, "learning_rate": 5.285527248268354e-06, "loss": 0.2968, "step": 7909 }, { "epoch": 0.5, "grad_norm": 2.3279271592857764, "learning_rate": 5.2845104278368616e-06, "loss": 0.2952, "step": 7910 }, { "epoch": 0.5, "grad_norm": 2.069382801279133, "learning_rate": 5.283493595600529e-06, "loss": 0.2866, "step": 7911 }, { "epoch": 0.5, "grad_norm": 1.8005879057685519, "learning_rate": 5.28247675160154e-06, "loss": 0.2826, "step": 7912 }, { "epoch": 0.5, "grad_norm": 1.8240246921078522, "learning_rate": 5.281459895882091e-06, "loss": 0.2896, "step": 7913 }, { "epoch": 0.5, "grad_norm": 0.5857586805162879, "learning_rate": 5.28044302848437e-06, "loss": 0.4779, "step": 7914 }, { "epoch": 0.5, "grad_norm": 1.883077599717525, "learning_rate": 5.279426149450571e-06, "loss": 0.2655, "step": 7915 }, { "epoch": 0.5, "grad_norm": 3.706124916748602, "learning_rate": 5.278409258822883e-06, "loss": 0.2961, "step": 7916 }, { "epoch": 0.5, "grad_norm": 1.3641752380519125, "learning_rate": 5.277392356643501e-06, "loss": 0.2874, "step": 7917 }, { "epoch": 0.5, "grad_norm": 2.7134409761385343, "learning_rate": 5.276375442954618e-06, "loss": 0.2846, "step": 7918 }, { "epoch": 0.5, "grad_norm": 1.7934162210103406, "learning_rate": 5.275358517798428e-06, "loss": 0.304, "step": 7919 }, { "epoch": 0.5, "grad_norm": 1.784469004248253, "learning_rate": 5.27434158121712e-06, "loss": 0.2854, "step": 7920 }, { "epoch": 0.5, "grad_norm": 3.640741575724123, "learning_rate": 5.273324633252897e-06, "loss": 0.2837, "step": 7921 }, { "epoch": 0.5, "grad_norm": 1.6976472680018913, "learning_rate": 5.272307673947947e-06, "loss": 0.2747, "step": 7922 }, { "epoch": 0.5, "grad_norm": 1.7838076336567272, "learning_rate": 5.271290703344469e-06, "loss": 0.2779, "step": 7923 }, { "epoch": 0.5, "grad_norm": 1.283616882704457, "learning_rate": 5.270273721484657e-06, "loss": 0.2924, "step": 7924 }, { "epoch": 0.5, "grad_norm": 1.6596932629621144, "learning_rate": 5.269256728410709e-06, "loss": 0.2836, "step": 7925 }, { "epoch": 0.5, "grad_norm": 1.3390694006704675, "learning_rate": 5.268239724164819e-06, "loss": 0.2877, "step": 7926 }, { "epoch": 0.5, "grad_norm": 2.4626165226004626, "learning_rate": 5.267222708789189e-06, "loss": 0.2914, "step": 7927 }, { "epoch": 0.5, "grad_norm": 2.5017678493186915, "learning_rate": 5.266205682326013e-06, "loss": 0.3004, "step": 7928 }, { "epoch": 0.5, "grad_norm": 1.4501573454757062, "learning_rate": 5.265188644817492e-06, "loss": 0.2717, "step": 7929 }, { "epoch": 0.5, "grad_norm": 2.9411583587568177, "learning_rate": 5.264171596305821e-06, "loss": 0.3036, "step": 7930 }, { "epoch": 0.5, "grad_norm": 1.5920735045894463, "learning_rate": 5.263154536833202e-06, "loss": 0.3052, "step": 7931 }, { "epoch": 0.5, "grad_norm": 1.8451966896150458, "learning_rate": 5.262137466441834e-06, "loss": 0.3133, "step": 7932 }, { "epoch": 0.5, "grad_norm": 2.4059217498548913, "learning_rate": 5.261120385173917e-06, "loss": 0.2871, "step": 7933 }, { "epoch": 0.5, "grad_norm": 2.705879722655072, "learning_rate": 5.260103293071651e-06, "loss": 0.2989, "step": 7934 }, { "epoch": 0.5, "grad_norm": 2.1456088508382196, "learning_rate": 5.259086190177237e-06, "loss": 0.3024, "step": 7935 }, { "epoch": 0.5, "grad_norm": 2.79279948609646, "learning_rate": 5.258069076532877e-06, "loss": 0.2995, "step": 7936 }, { "epoch": 0.5, "grad_norm": 2.050047551143473, "learning_rate": 5.257051952180774e-06, "loss": 0.3132, "step": 7937 }, { "epoch": 0.5, "grad_norm": 5.05594381130633, "learning_rate": 5.256034817163127e-06, "loss": 0.3071, "step": 7938 }, { "epoch": 0.5, "grad_norm": 2.0684131487006554, "learning_rate": 5.255017671522142e-06, "loss": 0.2834, "step": 7939 }, { "epoch": 0.5, "grad_norm": 2.241061238094606, "learning_rate": 5.254000515300019e-06, "loss": 0.286, "step": 7940 }, { "epoch": 0.5, "grad_norm": 2.7180458792869078, "learning_rate": 5.252983348538967e-06, "loss": 0.2931, "step": 7941 }, { "epoch": 0.5, "grad_norm": 3.548697770337576, "learning_rate": 5.2519661712811845e-06, "loss": 0.2959, "step": 7942 }, { "epoch": 0.5, "grad_norm": 2.3352332012162123, "learning_rate": 5.250948983568876e-06, "loss": 0.3082, "step": 7943 }, { "epoch": 0.5, "grad_norm": 1.7361041982240581, "learning_rate": 5.249931785444251e-06, "loss": 0.293, "step": 7944 }, { "epoch": 0.5, "grad_norm": 1.7055614049768366, "learning_rate": 5.248914576949512e-06, "loss": 0.2985, "step": 7945 }, { "epoch": 0.5, "grad_norm": 1.4417005900917659, "learning_rate": 5.2478973581268645e-06, "loss": 0.29, "step": 7946 }, { "epoch": 0.5, "grad_norm": 1.9337301910545015, "learning_rate": 5.246880129018515e-06, "loss": 0.2833, "step": 7947 }, { "epoch": 0.5, "grad_norm": 2.5940934482588602, "learning_rate": 5.24586288966667e-06, "loss": 0.2961, "step": 7948 }, { "epoch": 0.5, "grad_norm": 1.8112097807465304, "learning_rate": 5.24484564011354e-06, "loss": 0.3054, "step": 7949 }, { "epoch": 0.5, "grad_norm": 3.205560532580205, "learning_rate": 5.2438283804013265e-06, "loss": 0.2837, "step": 7950 }, { "epoch": 0.5, "grad_norm": 4.236503306564229, "learning_rate": 5.242811110572243e-06, "loss": 0.2845, "step": 7951 }, { "epoch": 0.5, "grad_norm": 3.203666350475942, "learning_rate": 5.241793830668492e-06, "loss": 0.2985, "step": 7952 }, { "epoch": 0.5, "grad_norm": 2.0611771440486573, "learning_rate": 5.240776540732288e-06, "loss": 0.2676, "step": 7953 }, { "epoch": 0.5, "grad_norm": 1.653082607696945, "learning_rate": 5.239759240805835e-06, "loss": 0.2984, "step": 7954 }, { "epoch": 0.5, "grad_norm": 4.7539020266152905, "learning_rate": 5.238741930931348e-06, "loss": 0.2772, "step": 7955 }, { "epoch": 0.5, "grad_norm": 2.283071163067402, "learning_rate": 5.237724611151034e-06, "loss": 0.2782, "step": 7956 }, { "epoch": 0.5, "grad_norm": 0.6218217562479014, "learning_rate": 5.2367072815071015e-06, "loss": 0.5029, "step": 7957 }, { "epoch": 0.5, "grad_norm": 6.276229364350079, "learning_rate": 5.235689942041765e-06, "loss": 0.3007, "step": 7958 }, { "epoch": 0.5, "grad_norm": 1.387998062722005, "learning_rate": 5.234672592797236e-06, "loss": 0.2815, "step": 7959 }, { "epoch": 0.5, "grad_norm": 1.6021124702158773, "learning_rate": 5.233655233815721e-06, "loss": 0.2809, "step": 7960 }, { "epoch": 0.5, "grad_norm": 5.611070341895981, "learning_rate": 5.232637865139436e-06, "loss": 0.2809, "step": 7961 }, { "epoch": 0.5, "grad_norm": 1.496121167524374, "learning_rate": 5.231620486810594e-06, "loss": 0.3149, "step": 7962 }, { "epoch": 0.5, "grad_norm": 1.3004514797850226, "learning_rate": 5.230603098871406e-06, "loss": 0.2733, "step": 7963 }, { "epoch": 0.5, "grad_norm": 3.787446885363309, "learning_rate": 5.229585701364086e-06, "loss": 0.2961, "step": 7964 }, { "epoch": 0.5, "grad_norm": 1.5105309186295437, "learning_rate": 5.228568294330847e-06, "loss": 0.2878, "step": 7965 }, { "epoch": 0.5, "grad_norm": 1.9272804431534816, "learning_rate": 5.227550877813903e-06, "loss": 0.299, "step": 7966 }, { "epoch": 0.5, "grad_norm": 2.332172028792701, "learning_rate": 5.226533451855471e-06, "loss": 0.29, "step": 7967 }, { "epoch": 0.5, "grad_norm": 1.8385924181373738, "learning_rate": 5.225516016497761e-06, "loss": 0.2868, "step": 7968 }, { "epoch": 0.5, "grad_norm": 3.238441197023188, "learning_rate": 5.224498571782992e-06, "loss": 0.2826, "step": 7969 }, { "epoch": 0.5, "grad_norm": 2.7610732894069905, "learning_rate": 5.223481117753379e-06, "loss": 0.2874, "step": 7970 }, { "epoch": 0.5, "grad_norm": 0.6148411709942464, "learning_rate": 5.222463654451138e-06, "loss": 0.5345, "step": 7971 }, { "epoch": 0.5, "grad_norm": 1.850150281025383, "learning_rate": 5.221446181918484e-06, "loss": 0.3108, "step": 7972 }, { "epoch": 0.5, "grad_norm": 1.8929554689762693, "learning_rate": 5.220428700197635e-06, "loss": 0.2887, "step": 7973 }, { "epoch": 0.5, "grad_norm": 2.2302322929908756, "learning_rate": 5.219411209330807e-06, "loss": 0.3011, "step": 7974 }, { "epoch": 0.5, "grad_norm": 2.6597011141365123, "learning_rate": 5.218393709360219e-06, "loss": 0.2634, "step": 7975 }, { "epoch": 0.5, "grad_norm": 1.8544295318833768, "learning_rate": 5.217376200328087e-06, "loss": 0.2901, "step": 7976 }, { "epoch": 0.5, "grad_norm": 2.6148198615159677, "learning_rate": 5.216358682276631e-06, "loss": 0.2785, "step": 7977 }, { "epoch": 0.5, "grad_norm": 1.8760216823100602, "learning_rate": 5.215341155248069e-06, "loss": 0.2778, "step": 7978 }, { "epoch": 0.5, "grad_norm": 2.19102729214334, "learning_rate": 5.214323619284619e-06, "loss": 0.2741, "step": 7979 }, { "epoch": 0.5, "grad_norm": 1.5999705552751469, "learning_rate": 5.213306074428503e-06, "loss": 0.2764, "step": 7980 }, { "epoch": 0.5, "grad_norm": 1.5029067945288253, "learning_rate": 5.212288520721939e-06, "loss": 0.2786, "step": 7981 }, { "epoch": 0.5, "grad_norm": 4.002508771973122, "learning_rate": 5.2112709582071464e-06, "loss": 0.2876, "step": 7982 }, { "epoch": 0.5, "grad_norm": 20.874911072768043, "learning_rate": 5.210253386926346e-06, "loss": 0.2807, "step": 7983 }, { "epoch": 0.5, "grad_norm": 2.293807677620967, "learning_rate": 5.20923580692176e-06, "loss": 0.2857, "step": 7984 }, { "epoch": 0.5, "grad_norm": 1.896818899562079, "learning_rate": 5.208218218235609e-06, "loss": 0.2899, "step": 7985 }, { "epoch": 0.5, "grad_norm": 3.450485937256754, "learning_rate": 5.207200620910114e-06, "loss": 0.2894, "step": 7986 }, { "epoch": 0.5, "grad_norm": 2.0716587200662073, "learning_rate": 5.206183014987497e-06, "loss": 0.2711, "step": 7987 }, { "epoch": 0.5, "grad_norm": 1.8071708646158895, "learning_rate": 5.205165400509982e-06, "loss": 0.2742, "step": 7988 }, { "epoch": 0.5, "grad_norm": 1.2390266213904055, "learning_rate": 5.2041477775197875e-06, "loss": 0.2896, "step": 7989 }, { "epoch": 0.5, "grad_norm": 2.839432018744269, "learning_rate": 5.20313014605914e-06, "loss": 0.2774, "step": 7990 }, { "epoch": 0.5, "grad_norm": 2.098592260273801, "learning_rate": 5.202112506170263e-06, "loss": 0.2836, "step": 7991 }, { "epoch": 0.5, "grad_norm": 3.037684427485203, "learning_rate": 5.201094857895377e-06, "loss": 0.271, "step": 7992 }, { "epoch": 0.5, "grad_norm": 2.0779139106441136, "learning_rate": 5.200077201276711e-06, "loss": 0.2819, "step": 7993 }, { "epoch": 0.5, "grad_norm": 3.0179452472155095, "learning_rate": 5.1990595363564845e-06, "loss": 0.3158, "step": 7994 }, { "epoch": 0.5, "grad_norm": 2.2662962933022985, "learning_rate": 5.198041863176925e-06, "loss": 0.293, "step": 7995 }, { "epoch": 0.5, "grad_norm": 1.694266813304795, "learning_rate": 5.197024181780256e-06, "loss": 0.2718, "step": 7996 }, { "epoch": 0.5, "grad_norm": 1.5220341333856093, "learning_rate": 5.196006492208705e-06, "loss": 0.2784, "step": 7997 }, { "epoch": 0.5, "grad_norm": 2.6307678542495885, "learning_rate": 5.194988794504495e-06, "loss": 0.3027, "step": 7998 }, { "epoch": 0.5, "grad_norm": 1.6771723970344932, "learning_rate": 5.193971088709855e-06, "loss": 0.2854, "step": 7999 }, { "epoch": 0.5, "grad_norm": 1.6996155697591504, "learning_rate": 5.192953374867009e-06, "loss": 0.2816, "step": 8000 }, { "epoch": 0.5, "grad_norm": 2.342947985914777, "learning_rate": 5.1919356530181865e-06, "loss": 0.277, "step": 8001 }, { "epoch": 0.5, "grad_norm": 3.1335009395378743, "learning_rate": 5.190917923205611e-06, "loss": 0.2752, "step": 8002 }, { "epoch": 0.5, "grad_norm": 1.8525786953817451, "learning_rate": 5.189900185471511e-06, "loss": 0.2832, "step": 8003 }, { "epoch": 0.5, "grad_norm": 4.18225132389599, "learning_rate": 5.188882439858117e-06, "loss": 0.2876, "step": 8004 }, { "epoch": 0.5, "grad_norm": 2.368988241224818, "learning_rate": 5.187864686407656e-06, "loss": 0.2769, "step": 8005 }, { "epoch": 0.5, "grad_norm": 2.2003221974676173, "learning_rate": 5.186846925162353e-06, "loss": 0.2889, "step": 8006 }, { "epoch": 0.5, "grad_norm": 1.4983689874048514, "learning_rate": 5.1858291561644394e-06, "loss": 0.2831, "step": 8007 }, { "epoch": 0.5, "grad_norm": 1.8058272796197763, "learning_rate": 5.184811379456145e-06, "loss": 0.2903, "step": 8008 }, { "epoch": 0.5, "grad_norm": 1.7720071199535712, "learning_rate": 5.183793595079697e-06, "loss": 0.2825, "step": 8009 }, { "epoch": 0.5, "grad_norm": 1.181826200879759, "learning_rate": 5.1827758030773275e-06, "loss": 0.2599, "step": 8010 }, { "epoch": 0.5, "grad_norm": 3.567614006042909, "learning_rate": 5.181758003491265e-06, "loss": 0.2941, "step": 8011 }, { "epoch": 0.5, "grad_norm": 2.1988998909507154, "learning_rate": 5.1807401963637404e-06, "loss": 0.29, "step": 8012 }, { "epoch": 0.5, "grad_norm": 2.083542254299491, "learning_rate": 5.179722381736983e-06, "loss": 0.2762, "step": 8013 }, { "epoch": 0.5, "grad_norm": 1.2464794092514846, "learning_rate": 5.178704559653227e-06, "loss": 0.271, "step": 8014 }, { "epoch": 0.5, "grad_norm": 0.59916161382316, "learning_rate": 5.1776867301547e-06, "loss": 0.5044, "step": 8015 }, { "epoch": 0.5, "grad_norm": 2.2064660580278344, "learning_rate": 5.176668893283634e-06, "loss": 0.2926, "step": 8016 }, { "epoch": 0.5, "grad_norm": 1.6907536057633934, "learning_rate": 5.175651049082262e-06, "loss": 0.2787, "step": 8017 }, { "epoch": 0.5, "grad_norm": 2.519586990692234, "learning_rate": 5.174633197592818e-06, "loss": 0.283, "step": 8018 }, { "epoch": 0.5, "grad_norm": 2.131038688080356, "learning_rate": 5.1736153388575305e-06, "loss": 0.2877, "step": 8019 }, { "epoch": 0.5, "grad_norm": 1.3906065783995365, "learning_rate": 5.172597472918635e-06, "loss": 0.2841, "step": 8020 }, { "epoch": 0.5, "grad_norm": 1.3181186580179853, "learning_rate": 5.1715795998183625e-06, "loss": 0.2885, "step": 8021 }, { "epoch": 0.5, "grad_norm": 2.0147444254565037, "learning_rate": 5.1705617195989495e-06, "loss": 0.2849, "step": 8022 }, { "epoch": 0.5, "grad_norm": 3.284732829682858, "learning_rate": 5.169543832302627e-06, "loss": 0.2959, "step": 8023 }, { "epoch": 0.5, "grad_norm": 2.7715806483230008, "learning_rate": 5.168525937971629e-06, "loss": 0.2925, "step": 8024 }, { "epoch": 0.5, "grad_norm": 1.5507911583763665, "learning_rate": 5.167508036648191e-06, "loss": 0.3008, "step": 8025 }, { "epoch": 0.5, "grad_norm": 1.8122030484431046, "learning_rate": 5.166490128374548e-06, "loss": 0.2793, "step": 8026 }, { "epoch": 0.5, "grad_norm": 3.2404819603162123, "learning_rate": 5.165472213192934e-06, "loss": 0.3042, "step": 8027 }, { "epoch": 0.5, "grad_norm": 1.9126359663228494, "learning_rate": 5.164454291145582e-06, "loss": 0.2706, "step": 8028 }, { "epoch": 0.5, "grad_norm": 1.6862084248984586, "learning_rate": 5.163436362274731e-06, "loss": 0.2823, "step": 8029 }, { "epoch": 0.5, "grad_norm": 1.8249442111586103, "learning_rate": 5.162418426622615e-06, "loss": 0.2798, "step": 8030 }, { "epoch": 0.51, "grad_norm": 1.659652593548811, "learning_rate": 5.1614004842314694e-06, "loss": 0.2699, "step": 8031 }, { "epoch": 0.51, "grad_norm": 1.9987159271790569, "learning_rate": 5.160382535143531e-06, "loss": 0.2833, "step": 8032 }, { "epoch": 0.51, "grad_norm": 1.9910294536543716, "learning_rate": 5.159364579401036e-06, "loss": 0.2881, "step": 8033 }, { "epoch": 0.51, "grad_norm": 10.151206820163265, "learning_rate": 5.158346617046221e-06, "loss": 0.2704, "step": 8034 }, { "epoch": 0.51, "grad_norm": 1.932043974434462, "learning_rate": 5.157328648121325e-06, "loss": 0.2865, "step": 8035 }, { "epoch": 0.51, "grad_norm": 3.8742209859969425, "learning_rate": 5.156310672668584e-06, "loss": 0.2841, "step": 8036 }, { "epoch": 0.51, "grad_norm": 1.6486940944055828, "learning_rate": 5.155292690730235e-06, "loss": 0.2925, "step": 8037 }, { "epoch": 0.51, "grad_norm": 1.6656818332786414, "learning_rate": 5.154274702348517e-06, "loss": 0.2841, "step": 8038 }, { "epoch": 0.51, "grad_norm": 1.736002438024291, "learning_rate": 5.153256707565666e-06, "loss": 0.2774, "step": 8039 }, { "epoch": 0.51, "grad_norm": 1.7506932901812695, "learning_rate": 5.152238706423925e-06, "loss": 0.2733, "step": 8040 }, { "epoch": 0.51, "grad_norm": 1.6446735246618587, "learning_rate": 5.151220698965526e-06, "loss": 0.2824, "step": 8041 }, { "epoch": 0.51, "grad_norm": 3.3495018044799867, "learning_rate": 5.1502026852327136e-06, "loss": 0.293, "step": 8042 }, { "epoch": 0.51, "grad_norm": 1.9900894311739992, "learning_rate": 5.149184665267725e-06, "loss": 0.2782, "step": 8043 }, { "epoch": 0.51, "grad_norm": 2.4473681503604716, "learning_rate": 5.148166639112799e-06, "loss": 0.2822, "step": 8044 }, { "epoch": 0.51, "grad_norm": 2.715485009361348, "learning_rate": 5.1471486068101774e-06, "loss": 0.3001, "step": 8045 }, { "epoch": 0.51, "grad_norm": 1.3454430352257016, "learning_rate": 5.146130568402097e-06, "loss": 0.2769, "step": 8046 }, { "epoch": 0.51, "grad_norm": 2.3760925397182366, "learning_rate": 5.1451125239308e-06, "loss": 0.2995, "step": 8047 }, { "epoch": 0.51, "grad_norm": 1.4208275815335933, "learning_rate": 5.144094473438528e-06, "loss": 0.2735, "step": 8048 }, { "epoch": 0.51, "grad_norm": 1.4885921654964185, "learning_rate": 5.14307641696752e-06, "loss": 0.2719, "step": 8049 }, { "epoch": 0.51, "grad_norm": 1.845783800197305, "learning_rate": 5.142058354560016e-06, "loss": 0.2892, "step": 8050 }, { "epoch": 0.51, "grad_norm": 1.552123748741543, "learning_rate": 5.141040286258259e-06, "loss": 0.2753, "step": 8051 }, { "epoch": 0.51, "grad_norm": 2.1764173994410405, "learning_rate": 5.140022212104492e-06, "loss": 0.3153, "step": 8052 }, { "epoch": 0.51, "grad_norm": 1.3296462212815934, "learning_rate": 5.139004132140953e-06, "loss": 0.2999, "step": 8053 }, { "epoch": 0.51, "grad_norm": 1.8875624634374253, "learning_rate": 5.137986046409884e-06, "loss": 0.2735, "step": 8054 }, { "epoch": 0.51, "grad_norm": 1.5061306577739693, "learning_rate": 5.136967954953531e-06, "loss": 0.309, "step": 8055 }, { "epoch": 0.51, "grad_norm": 1.5540241913902926, "learning_rate": 5.135949857814134e-06, "loss": 0.2859, "step": 8056 }, { "epoch": 0.51, "grad_norm": 2.2405392597573455, "learning_rate": 5.134931755033936e-06, "loss": 0.2963, "step": 8057 }, { "epoch": 0.51, "grad_norm": 4.134265261415188, "learning_rate": 5.13391364665518e-06, "loss": 0.3058, "step": 8058 }, { "epoch": 0.51, "grad_norm": 1.6670677602740802, "learning_rate": 5.132895532720108e-06, "loss": 0.2937, "step": 8059 }, { "epoch": 0.51, "grad_norm": 1.2613904123723603, "learning_rate": 5.131877413270965e-06, "loss": 0.2802, "step": 8060 }, { "epoch": 0.51, "grad_norm": 1.5338154753482607, "learning_rate": 5.130859288349993e-06, "loss": 0.2936, "step": 8061 }, { "epoch": 0.51, "grad_norm": 1.9411781150630898, "learning_rate": 5.129841157999438e-06, "loss": 0.2738, "step": 8062 }, { "epoch": 0.51, "grad_norm": 2.2707669150827714, "learning_rate": 5.128823022261542e-06, "loss": 0.2865, "step": 8063 }, { "epoch": 0.51, "grad_norm": 11.183724499773126, "learning_rate": 5.127804881178551e-06, "loss": 0.3193, "step": 8064 }, { "epoch": 0.51, "grad_norm": 1.8548627104452602, "learning_rate": 5.126786734792706e-06, "loss": 0.2981, "step": 8065 }, { "epoch": 0.51, "grad_norm": 2.4332607932074577, "learning_rate": 5.1257685831462565e-06, "loss": 0.3022, "step": 8066 }, { "epoch": 0.51, "grad_norm": 2.5992390097914027, "learning_rate": 5.124750426281444e-06, "loss": 0.3095, "step": 8067 }, { "epoch": 0.51, "grad_norm": 12.319122411374217, "learning_rate": 5.123732264240517e-06, "loss": 0.3037, "step": 8068 }, { "epoch": 0.51, "grad_norm": 2.5635183548913294, "learning_rate": 5.122714097065714e-06, "loss": 0.2961, "step": 8069 }, { "epoch": 0.51, "grad_norm": 2.354103677632639, "learning_rate": 5.1216959247992896e-06, "loss": 0.3098, "step": 8070 }, { "epoch": 0.51, "grad_norm": 0.6294266845144957, "learning_rate": 5.120677747483482e-06, "loss": 0.4819, "step": 8071 }, { "epoch": 0.51, "grad_norm": 1.5833706118593205, "learning_rate": 5.119659565160542e-06, "loss": 0.3056, "step": 8072 }, { "epoch": 0.51, "grad_norm": 3.2022867462856706, "learning_rate": 5.1186413778727125e-06, "loss": 0.2961, "step": 8073 }, { "epoch": 0.51, "grad_norm": 1.2339430255592672, "learning_rate": 5.117623185662245e-06, "loss": 0.2904, "step": 8074 }, { "epoch": 0.51, "grad_norm": 2.4727351428623927, "learning_rate": 5.11660498857138e-06, "loss": 0.2811, "step": 8075 }, { "epoch": 0.51, "grad_norm": 1.4549063477747954, "learning_rate": 5.115586786642367e-06, "loss": 0.3009, "step": 8076 }, { "epoch": 0.51, "grad_norm": 2.154997404078864, "learning_rate": 5.1145685799174525e-06, "loss": 0.2995, "step": 8077 }, { "epoch": 0.51, "grad_norm": 3.298505816209827, "learning_rate": 5.113550368438885e-06, "loss": 0.2827, "step": 8078 }, { "epoch": 0.51, "grad_norm": 1.4602174794329403, "learning_rate": 5.112532152248911e-06, "loss": 0.2928, "step": 8079 }, { "epoch": 0.51, "grad_norm": 1.9825143170171933, "learning_rate": 5.111513931389777e-06, "loss": 0.299, "step": 8080 }, { "epoch": 0.51, "grad_norm": 2.405679316456453, "learning_rate": 5.110495705903734e-06, "loss": 0.3207, "step": 8081 }, { "epoch": 0.51, "grad_norm": 3.0238107680183663, "learning_rate": 5.109477475833027e-06, "loss": 0.2906, "step": 8082 }, { "epoch": 0.51, "grad_norm": 1.418260652052426, "learning_rate": 5.108459241219905e-06, "loss": 0.2823, "step": 8083 }, { "epoch": 0.51, "grad_norm": 0.5686712902216726, "learning_rate": 5.107441002106616e-06, "loss": 0.5054, "step": 8084 }, { "epoch": 0.51, "grad_norm": 3.1012274202739682, "learning_rate": 5.106422758535408e-06, "loss": 0.2993, "step": 8085 }, { "epoch": 0.51, "grad_norm": 1.3269839688973062, "learning_rate": 5.105404510548534e-06, "loss": 0.2973, "step": 8086 }, { "epoch": 0.51, "grad_norm": 2.3121186570343597, "learning_rate": 5.1043862581882375e-06, "loss": 0.2844, "step": 8087 }, { "epoch": 0.51, "grad_norm": 1.5471815690013278, "learning_rate": 5.103368001496769e-06, "loss": 0.2817, "step": 8088 }, { "epoch": 0.51, "grad_norm": 2.5163684845845093, "learning_rate": 5.102349740516379e-06, "loss": 0.2978, "step": 8089 }, { "epoch": 0.51, "grad_norm": 1.8501363901100054, "learning_rate": 5.101331475289318e-06, "loss": 0.2905, "step": 8090 }, { "epoch": 0.51, "grad_norm": 1.9429442626330335, "learning_rate": 5.100313205857832e-06, "loss": 0.3, "step": 8091 }, { "epoch": 0.51, "grad_norm": 2.242598751930439, "learning_rate": 5.099294932264174e-06, "loss": 0.2949, "step": 8092 }, { "epoch": 0.51, "grad_norm": 8.958946009346706, "learning_rate": 5.098276654550593e-06, "loss": 0.2762, "step": 8093 }, { "epoch": 0.51, "grad_norm": 2.0006594531298725, "learning_rate": 5.097258372759339e-06, "loss": 0.2834, "step": 8094 }, { "epoch": 0.51, "grad_norm": 1.4309018910323494, "learning_rate": 5.096240086932661e-06, "loss": 0.2853, "step": 8095 }, { "epoch": 0.51, "grad_norm": 1.5859847155762092, "learning_rate": 5.095221797112814e-06, "loss": 0.2821, "step": 8096 }, { "epoch": 0.51, "grad_norm": 1.3161250674204685, "learning_rate": 5.094203503342042e-06, "loss": 0.2821, "step": 8097 }, { "epoch": 0.51, "grad_norm": 0.6229600351794886, "learning_rate": 5.093185205662602e-06, "loss": 0.5152, "step": 8098 }, { "epoch": 0.51, "grad_norm": 2.575849273510752, "learning_rate": 5.09216690411674e-06, "loss": 0.2875, "step": 8099 }, { "epoch": 0.51, "grad_norm": 1.3971618086227922, "learning_rate": 5.091148598746711e-06, "loss": 0.2902, "step": 8100 }, { "epoch": 0.51, "grad_norm": 3.0548110743047925, "learning_rate": 5.090130289594764e-06, "loss": 0.2877, "step": 8101 }, { "epoch": 0.51, "grad_norm": 2.410056373113431, "learning_rate": 5.089111976703151e-06, "loss": 0.2888, "step": 8102 }, { "epoch": 0.51, "grad_norm": 1.2334959627674285, "learning_rate": 5.088093660114125e-06, "loss": 0.2724, "step": 8103 }, { "epoch": 0.51, "grad_norm": 2.1024339018243983, "learning_rate": 5.087075339869937e-06, "loss": 0.2774, "step": 8104 }, { "epoch": 0.51, "grad_norm": 1.7350354360047069, "learning_rate": 5.086057016012836e-06, "loss": 0.2965, "step": 8105 }, { "epoch": 0.51, "grad_norm": 11.461945190557998, "learning_rate": 5.085038688585079e-06, "loss": 0.2894, "step": 8106 }, { "epoch": 0.51, "grad_norm": 2.1600860846477663, "learning_rate": 5.0840203576289135e-06, "loss": 0.2837, "step": 8107 }, { "epoch": 0.51, "grad_norm": 1.1997379931564873, "learning_rate": 5.083002023186596e-06, "loss": 0.2927, "step": 8108 }, { "epoch": 0.51, "grad_norm": 1.3115319555158724, "learning_rate": 5.081983685300377e-06, "loss": 0.2974, "step": 8109 }, { "epoch": 0.51, "grad_norm": 3.0692406318852896, "learning_rate": 5.080965344012509e-06, "loss": 0.2764, "step": 8110 }, { "epoch": 0.51, "grad_norm": 2.957828967322293, "learning_rate": 5.079946999365244e-06, "loss": 0.2705, "step": 8111 }, { "epoch": 0.51, "grad_norm": 2.403710494563031, "learning_rate": 5.078928651400838e-06, "loss": 0.2998, "step": 8112 }, { "epoch": 0.51, "grad_norm": 2.3844751932063026, "learning_rate": 5.07791030016154e-06, "loss": 0.2831, "step": 8113 }, { "epoch": 0.51, "grad_norm": 1.8789455986311219, "learning_rate": 5.076891945689606e-06, "loss": 0.2876, "step": 8114 }, { "epoch": 0.51, "grad_norm": 2.2053585238370856, "learning_rate": 5.075873588027288e-06, "loss": 0.3017, "step": 8115 }, { "epoch": 0.51, "grad_norm": 1.981578176439994, "learning_rate": 5.074855227216842e-06, "loss": 0.2856, "step": 8116 }, { "epoch": 0.51, "grad_norm": 1.2393565715017612, "learning_rate": 5.073836863300517e-06, "loss": 0.2858, "step": 8117 }, { "epoch": 0.51, "grad_norm": 2.124483252278634, "learning_rate": 5.072818496320572e-06, "loss": 0.3109, "step": 8118 }, { "epoch": 0.51, "grad_norm": 2.7475757024501903, "learning_rate": 5.071800126319256e-06, "loss": 0.3005, "step": 8119 }, { "epoch": 0.51, "grad_norm": 1.9141583768087556, "learning_rate": 5.070781753338828e-06, "loss": 0.2769, "step": 8120 }, { "epoch": 0.51, "grad_norm": 1.451323318961555, "learning_rate": 5.069763377421536e-06, "loss": 0.3067, "step": 8121 }, { "epoch": 0.51, "grad_norm": 1.5071921951160125, "learning_rate": 5.06874499860964e-06, "loss": 0.3003, "step": 8122 }, { "epoch": 0.51, "grad_norm": 3.0897711700626984, "learning_rate": 5.067726616945391e-06, "loss": 0.2949, "step": 8123 }, { "epoch": 0.51, "grad_norm": 2.4642319003419675, "learning_rate": 5.066708232471045e-06, "loss": 0.2986, "step": 8124 }, { "epoch": 0.51, "grad_norm": 2.7422941442622264, "learning_rate": 5.065689845228854e-06, "loss": 0.2899, "step": 8125 }, { "epoch": 0.51, "grad_norm": 1.8900938353400445, "learning_rate": 5.064671455261078e-06, "loss": 0.3086, "step": 8126 }, { "epoch": 0.51, "grad_norm": 2.1285408519171547, "learning_rate": 5.063653062609966e-06, "loss": 0.298, "step": 8127 }, { "epoch": 0.51, "grad_norm": 1.8505723361191253, "learning_rate": 5.062634667317776e-06, "loss": 0.3228, "step": 8128 }, { "epoch": 0.51, "grad_norm": 10.772859833035717, "learning_rate": 5.061616269426761e-06, "loss": 0.2894, "step": 8129 }, { "epoch": 0.51, "grad_norm": 1.9855041274032852, "learning_rate": 5.06059786897918e-06, "loss": 0.2919, "step": 8130 }, { "epoch": 0.51, "grad_norm": 1.6736157068345445, "learning_rate": 5.059579466017285e-06, "loss": 0.2881, "step": 8131 }, { "epoch": 0.51, "grad_norm": 1.8385217449012525, "learning_rate": 5.058561060583331e-06, "loss": 0.2595, "step": 8132 }, { "epoch": 0.51, "grad_norm": 1.528387329673985, "learning_rate": 5.057542652719574e-06, "loss": 0.2834, "step": 8133 }, { "epoch": 0.51, "grad_norm": 1.7429707142645707, "learning_rate": 5.056524242468274e-06, "loss": 0.2893, "step": 8134 }, { "epoch": 0.51, "grad_norm": 2.7041324964610656, "learning_rate": 5.055505829871678e-06, "loss": 0.2857, "step": 8135 }, { "epoch": 0.51, "grad_norm": 1.4335164106800034, "learning_rate": 5.054487414972049e-06, "loss": 0.2865, "step": 8136 }, { "epoch": 0.51, "grad_norm": 1.9924978004561862, "learning_rate": 5.053468997811641e-06, "loss": 0.2778, "step": 8137 }, { "epoch": 0.51, "grad_norm": 40.865144961517984, "learning_rate": 5.052450578432708e-06, "loss": 0.2702, "step": 8138 }, { "epoch": 0.51, "grad_norm": 1.3016727652926572, "learning_rate": 5.051432156877508e-06, "loss": 0.2788, "step": 8139 }, { "epoch": 0.51, "grad_norm": 1.9413116312589866, "learning_rate": 5.050413733188296e-06, "loss": 0.298, "step": 8140 }, { "epoch": 0.51, "grad_norm": 1.7847582140692475, "learning_rate": 5.049395307407329e-06, "loss": 0.2858, "step": 8141 }, { "epoch": 0.51, "grad_norm": 1.529547832735798, "learning_rate": 5.048376879576864e-06, "loss": 0.2785, "step": 8142 }, { "epoch": 0.51, "grad_norm": 2.6373970021235005, "learning_rate": 5.047358449739154e-06, "loss": 0.3086, "step": 8143 }, { "epoch": 0.51, "grad_norm": 2.66063556871953, "learning_rate": 5.046340017936459e-06, "loss": 0.2812, "step": 8144 }, { "epoch": 0.51, "grad_norm": 2.173410028038673, "learning_rate": 5.045321584211035e-06, "loss": 0.2834, "step": 8145 }, { "epoch": 0.51, "grad_norm": 8.189909801659308, "learning_rate": 5.044303148605137e-06, "loss": 0.3044, "step": 8146 }, { "epoch": 0.51, "grad_norm": 2.449596123719175, "learning_rate": 5.043284711161022e-06, "loss": 0.3042, "step": 8147 }, { "epoch": 0.51, "grad_norm": 2.1180777194657785, "learning_rate": 5.042266271920949e-06, "loss": 0.2785, "step": 8148 }, { "epoch": 0.51, "grad_norm": 2.8784394804409024, "learning_rate": 5.041247830927173e-06, "loss": 0.3005, "step": 8149 }, { "epoch": 0.51, "grad_norm": 1.6236132036555047, "learning_rate": 5.040229388221952e-06, "loss": 0.2856, "step": 8150 }, { "epoch": 0.51, "grad_norm": 2.4877568330618107, "learning_rate": 5.03921094384754e-06, "loss": 0.2725, "step": 8151 }, { "epoch": 0.51, "grad_norm": 6.779632116067725, "learning_rate": 5.038192497846198e-06, "loss": 0.2945, "step": 8152 }, { "epoch": 0.51, "grad_norm": 1.8263603784506386, "learning_rate": 5.037174050260181e-06, "loss": 0.2847, "step": 8153 }, { "epoch": 0.51, "grad_norm": 2.1698830201554764, "learning_rate": 5.036155601131747e-06, "loss": 0.2729, "step": 8154 }, { "epoch": 0.51, "grad_norm": 3.4927890770584638, "learning_rate": 5.035137150503151e-06, "loss": 0.3065, "step": 8155 }, { "epoch": 0.51, "grad_norm": 1.5545318227740046, "learning_rate": 5.034118698416654e-06, "loss": 0.2747, "step": 8156 }, { "epoch": 0.51, "grad_norm": 1.4572955541828734, "learning_rate": 5.033100244914512e-06, "loss": 0.2763, "step": 8157 }, { "epoch": 0.51, "grad_norm": 5.746315447390973, "learning_rate": 5.03208179003898e-06, "loss": 0.3129, "step": 8158 }, { "epoch": 0.51, "grad_norm": 1.4664765942120763, "learning_rate": 5.03106333383232e-06, "loss": 0.271, "step": 8159 }, { "epoch": 0.51, "grad_norm": 14.394632767386923, "learning_rate": 5.030044876336786e-06, "loss": 0.3045, "step": 8160 }, { "epoch": 0.51, "grad_norm": 1.9123375926286352, "learning_rate": 5.029026417594637e-06, "loss": 0.3159, "step": 8161 }, { "epoch": 0.51, "grad_norm": 3.5269617215825892, "learning_rate": 5.02800795764813e-06, "loss": 0.2955, "step": 8162 }, { "epoch": 0.51, "grad_norm": 16.546546453695523, "learning_rate": 5.026989496539523e-06, "loss": 0.3047, "step": 8163 }, { "epoch": 0.51, "grad_norm": 1.9162045761280904, "learning_rate": 5.025971034311075e-06, "loss": 0.2804, "step": 8164 }, { "epoch": 0.51, "grad_norm": 2.3073722605086497, "learning_rate": 5.024952571005041e-06, "loss": 0.3066, "step": 8165 }, { "epoch": 0.51, "grad_norm": 1.9830524460973746, "learning_rate": 5.02393410666368e-06, "loss": 0.2835, "step": 8166 }, { "epoch": 0.51, "grad_norm": 2.961502319580719, "learning_rate": 5.022915641329252e-06, "loss": 0.2923, "step": 8167 }, { "epoch": 0.51, "grad_norm": 2.189041131058403, "learning_rate": 5.021897175044014e-06, "loss": 0.2874, "step": 8168 }, { "epoch": 0.51, "grad_norm": 1.6629475421529214, "learning_rate": 5.020878707850222e-06, "loss": 0.2778, "step": 8169 }, { "epoch": 0.51, "grad_norm": 1.6436906042157808, "learning_rate": 5.0198602397901355e-06, "loss": 0.2887, "step": 8170 }, { "epoch": 0.51, "grad_norm": 2.706239287634321, "learning_rate": 5.018841770906011e-06, "loss": 0.2839, "step": 8171 }, { "epoch": 0.51, "grad_norm": 3.700147076277438, "learning_rate": 5.017823301240111e-06, "loss": 0.306, "step": 8172 }, { "epoch": 0.51, "grad_norm": 2.644524884182939, "learning_rate": 5.016804830834687e-06, "loss": 0.2715, "step": 8173 }, { "epoch": 0.51, "grad_norm": 1.574413984967847, "learning_rate": 5.015786359732003e-06, "loss": 0.2824, "step": 8174 }, { "epoch": 0.51, "grad_norm": 2.6463092278210696, "learning_rate": 5.014767887974316e-06, "loss": 0.2892, "step": 8175 }, { "epoch": 0.51, "grad_norm": 2.8302860169498425, "learning_rate": 5.013749415603881e-06, "loss": 0.2733, "step": 8176 }, { "epoch": 0.51, "grad_norm": 1.6922566209092376, "learning_rate": 5.0127309426629575e-06, "loss": 0.2665, "step": 8177 }, { "epoch": 0.51, "grad_norm": 2.767475220858458, "learning_rate": 5.011712469193808e-06, "loss": 0.285, "step": 8178 }, { "epoch": 0.51, "grad_norm": 2.821881597810828, "learning_rate": 5.010693995238684e-06, "loss": 0.2857, "step": 8179 }, { "epoch": 0.51, "grad_norm": 1.4949420014722998, "learning_rate": 5.009675520839851e-06, "loss": 0.2976, "step": 8180 }, { "epoch": 0.51, "grad_norm": 1.9029231329131013, "learning_rate": 5.008657046039559e-06, "loss": 0.27, "step": 8181 }, { "epoch": 0.51, "grad_norm": 1.8667607852679868, "learning_rate": 5.007638570880073e-06, "loss": 0.2902, "step": 8182 }, { "epoch": 0.51, "grad_norm": 0.572467918809941, "learning_rate": 5.0066200954036495e-06, "loss": 0.4681, "step": 8183 }, { "epoch": 0.51, "grad_norm": 3.2587774934395273, "learning_rate": 5.005601619652546e-06, "loss": 0.303, "step": 8184 }, { "epoch": 0.51, "grad_norm": 1.9537089598222606, "learning_rate": 5.00458314366902e-06, "loss": 0.2837, "step": 8185 }, { "epoch": 0.51, "grad_norm": 1.4045046250409439, "learning_rate": 5.003564667495334e-06, "loss": 0.2768, "step": 8186 }, { "epoch": 0.51, "grad_norm": 1.526752482442207, "learning_rate": 5.002546191173742e-06, "loss": 0.2804, "step": 8187 }, { "epoch": 0.51, "grad_norm": 1.8022644753475676, "learning_rate": 5.0015277147465035e-06, "loss": 0.2986, "step": 8188 }, { "epoch": 0.51, "grad_norm": 1.6767843667420423, "learning_rate": 5.000509238255877e-06, "loss": 0.2828, "step": 8189 }, { "epoch": 0.52, "grad_norm": 2.3425617857572867, "learning_rate": 4.999490761744123e-06, "loss": 0.2862, "step": 8190 }, { "epoch": 0.52, "grad_norm": 5.9245674360968374, "learning_rate": 4.9984722852534964e-06, "loss": 0.2871, "step": 8191 }, { "epoch": 0.52, "grad_norm": 2.766118190616532, "learning_rate": 4.99745380882626e-06, "loss": 0.2969, "step": 8192 }, { "epoch": 0.52, "grad_norm": 1.9747232663183154, "learning_rate": 4.996435332504668e-06, "loss": 0.2745, "step": 8193 }, { "epoch": 0.52, "grad_norm": 2.8748079083011437, "learning_rate": 4.995416856330981e-06, "loss": 0.2835, "step": 8194 }, { "epoch": 0.52, "grad_norm": 2.8798777101177233, "learning_rate": 4.994398380347456e-06, "loss": 0.2848, "step": 8195 }, { "epoch": 0.52, "grad_norm": 2.639808692388689, "learning_rate": 4.993379904596353e-06, "loss": 0.2872, "step": 8196 }, { "epoch": 0.52, "grad_norm": 2.1435094676835655, "learning_rate": 4.992361429119927e-06, "loss": 0.2858, "step": 8197 }, { "epoch": 0.52, "grad_norm": 1.9851074493971386, "learning_rate": 4.991342953960442e-06, "loss": 0.2826, "step": 8198 }, { "epoch": 0.52, "grad_norm": 1.499481015971225, "learning_rate": 4.990324479160151e-06, "loss": 0.3049, "step": 8199 }, { "epoch": 0.52, "grad_norm": 1.4346083663205933, "learning_rate": 4.989306004761317e-06, "loss": 0.2792, "step": 8200 }, { "epoch": 0.52, "grad_norm": 5.699142284733824, "learning_rate": 4.988287530806194e-06, "loss": 0.3179, "step": 8201 }, { "epoch": 0.52, "grad_norm": 1.651240072834334, "learning_rate": 4.987269057337043e-06, "loss": 0.2788, "step": 8202 }, { "epoch": 0.52, "grad_norm": 1.6379106598045439, "learning_rate": 4.98625058439612e-06, "loss": 0.2961, "step": 8203 }, { "epoch": 0.52, "grad_norm": 2.0033425123299136, "learning_rate": 4.985232112025688e-06, "loss": 0.3004, "step": 8204 }, { "epoch": 0.52, "grad_norm": 1.4606393446522026, "learning_rate": 4.984213640267996e-06, "loss": 0.2692, "step": 8205 }, { "epoch": 0.52, "grad_norm": 2.1594238111119237, "learning_rate": 4.983195169165313e-06, "loss": 0.3018, "step": 8206 }, { "epoch": 0.52, "grad_norm": 4.073785619809118, "learning_rate": 4.9821766987598905e-06, "loss": 0.2803, "step": 8207 }, { "epoch": 0.52, "grad_norm": 0.6358225243717622, "learning_rate": 4.98115822909399e-06, "loss": 0.487, "step": 8208 }, { "epoch": 0.52, "grad_norm": 2.245613572813672, "learning_rate": 4.980139760209867e-06, "loss": 0.2846, "step": 8209 }, { "epoch": 0.52, "grad_norm": 3.402414324187724, "learning_rate": 4.979121292149781e-06, "loss": 0.2835, "step": 8210 }, { "epoch": 0.52, "grad_norm": 1.7324346028894333, "learning_rate": 4.978102824955988e-06, "loss": 0.2897, "step": 8211 }, { "epoch": 0.52, "grad_norm": 1.938265565068531, "learning_rate": 4.977084358670749e-06, "loss": 0.3019, "step": 8212 }, { "epoch": 0.52, "grad_norm": 2.07939685069403, "learning_rate": 4.97606589333632e-06, "loss": 0.2738, "step": 8213 }, { "epoch": 0.52, "grad_norm": 2.361960190281248, "learning_rate": 4.975047428994961e-06, "loss": 0.2816, "step": 8214 }, { "epoch": 0.52, "grad_norm": 2.18567816403474, "learning_rate": 4.9740289656889276e-06, "loss": 0.2914, "step": 8215 }, { "epoch": 0.52, "grad_norm": 25.224564304578116, "learning_rate": 4.9730105034604795e-06, "loss": 0.3092, "step": 8216 }, { "epoch": 0.52, "grad_norm": 2.58551899278767, "learning_rate": 4.971992042351872e-06, "loss": 0.2799, "step": 8217 }, { "epoch": 0.52, "grad_norm": 2.323951453494458, "learning_rate": 4.970973582405366e-06, "loss": 0.3097, "step": 8218 }, { "epoch": 0.52, "grad_norm": 2.253398014201736, "learning_rate": 4.969955123663216e-06, "loss": 0.2934, "step": 8219 }, { "epoch": 0.52, "grad_norm": 1.9966836673695352, "learning_rate": 4.968936666167681e-06, "loss": 0.2935, "step": 8220 }, { "epoch": 0.52, "grad_norm": 1.897449863210299, "learning_rate": 4.96791820996102e-06, "loss": 0.2923, "step": 8221 }, { "epoch": 0.52, "grad_norm": 1.8632552958564557, "learning_rate": 4.96689975508549e-06, "loss": 0.2731, "step": 8222 }, { "epoch": 0.52, "grad_norm": 1.7206196721758091, "learning_rate": 4.9658813015833465e-06, "loss": 0.2853, "step": 8223 }, { "epoch": 0.52, "grad_norm": 2.3362008394752314, "learning_rate": 4.964862849496851e-06, "loss": 0.3139, "step": 8224 }, { "epoch": 0.52, "grad_norm": 1.8345989807494922, "learning_rate": 4.9638443988682555e-06, "loss": 0.3108, "step": 8225 }, { "epoch": 0.52, "grad_norm": 1.7005985005836821, "learning_rate": 4.962825949739822e-06, "loss": 0.2738, "step": 8226 }, { "epoch": 0.52, "grad_norm": 6.040241574651639, "learning_rate": 4.961807502153802e-06, "loss": 0.2914, "step": 8227 }, { "epoch": 0.52, "grad_norm": 3.763435577672746, "learning_rate": 4.960789056152461e-06, "loss": 0.2603, "step": 8228 }, { "epoch": 0.52, "grad_norm": 4.959686260414063, "learning_rate": 4.9597706117780495e-06, "loss": 0.3001, "step": 8229 }, { "epoch": 0.52, "grad_norm": 2.130706889609274, "learning_rate": 4.958752169072828e-06, "loss": 0.2816, "step": 8230 }, { "epoch": 0.52, "grad_norm": 2.4536967288211615, "learning_rate": 4.957733728079051e-06, "loss": 0.2915, "step": 8231 }, { "epoch": 0.52, "grad_norm": 2.5668313154641167, "learning_rate": 4.956715288838979e-06, "loss": 0.3039, "step": 8232 }, { "epoch": 0.52, "grad_norm": 4.61844755821173, "learning_rate": 4.955696851394865e-06, "loss": 0.2838, "step": 8233 }, { "epoch": 0.52, "grad_norm": 2.768573779849712, "learning_rate": 4.954678415788968e-06, "loss": 0.2896, "step": 8234 }, { "epoch": 0.52, "grad_norm": 0.6050610772215912, "learning_rate": 4.9536599820635414e-06, "loss": 0.4678, "step": 8235 }, { "epoch": 0.52, "grad_norm": 2.3711887432190775, "learning_rate": 4.952641550260846e-06, "loss": 0.3181, "step": 8236 }, { "epoch": 0.52, "grad_norm": 2.2925269815694542, "learning_rate": 4.951623120423138e-06, "loss": 0.2657, "step": 8237 }, { "epoch": 0.52, "grad_norm": 1.6303030514441423, "learning_rate": 4.9506046925926725e-06, "loss": 0.2648, "step": 8238 }, { "epoch": 0.52, "grad_norm": 2.3464748567621223, "learning_rate": 4.949586266811705e-06, "loss": 0.2856, "step": 8239 }, { "epoch": 0.52, "grad_norm": 1.6626060977836936, "learning_rate": 4.948567843122494e-06, "loss": 0.277, "step": 8240 }, { "epoch": 0.52, "grad_norm": 2.3347243208653583, "learning_rate": 4.947549421567294e-06, "loss": 0.2907, "step": 8241 }, { "epoch": 0.52, "grad_norm": 1.8140112395828367, "learning_rate": 4.94653100218836e-06, "loss": 0.3006, "step": 8242 }, { "epoch": 0.52, "grad_norm": 0.6213584045470085, "learning_rate": 4.945512585027951e-06, "loss": 0.4728, "step": 8243 }, { "epoch": 0.52, "grad_norm": 1.5861131829846289, "learning_rate": 4.944494170128323e-06, "loss": 0.2835, "step": 8244 }, { "epoch": 0.52, "grad_norm": 1.6417152034342402, "learning_rate": 4.9434757575317286e-06, "loss": 0.289, "step": 8245 }, { "epoch": 0.52, "grad_norm": 1.6850436567976557, "learning_rate": 4.9424573472804264e-06, "loss": 0.2732, "step": 8246 }, { "epoch": 0.52, "grad_norm": 13.001742424332056, "learning_rate": 4.9414389394166705e-06, "loss": 0.2648, "step": 8247 }, { "epoch": 0.52, "grad_norm": 1.693485053493527, "learning_rate": 4.940420533982718e-06, "loss": 0.2872, "step": 8248 }, { "epoch": 0.52, "grad_norm": 2.6446310687576586, "learning_rate": 4.9394021310208225e-06, "loss": 0.3017, "step": 8249 }, { "epoch": 0.52, "grad_norm": 1.886786817674654, "learning_rate": 4.938383730573239e-06, "loss": 0.2839, "step": 8250 }, { "epoch": 0.52, "grad_norm": 1.9207533099181677, "learning_rate": 4.937365332682225e-06, "loss": 0.2939, "step": 8251 }, { "epoch": 0.52, "grad_norm": 2.0088184978962906, "learning_rate": 4.936346937390036e-06, "loss": 0.3113, "step": 8252 }, { "epoch": 0.52, "grad_norm": 2.0325608995373363, "learning_rate": 4.935328544738925e-06, "loss": 0.2965, "step": 8253 }, { "epoch": 0.52, "grad_norm": 0.5485844470082665, "learning_rate": 4.934310154771147e-06, "loss": 0.482, "step": 8254 }, { "epoch": 0.52, "grad_norm": 3.1495215469116555, "learning_rate": 4.933291767528957e-06, "loss": 0.2817, "step": 8255 }, { "epoch": 0.52, "grad_norm": 1.5681454838604005, "learning_rate": 4.9322733830546115e-06, "loss": 0.2775, "step": 8256 }, { "epoch": 0.52, "grad_norm": 1.6840039722539915, "learning_rate": 4.931255001390361e-06, "loss": 0.2898, "step": 8257 }, { "epoch": 0.52, "grad_norm": 0.5878128307571517, "learning_rate": 4.930236622578464e-06, "loss": 0.5024, "step": 8258 }, { "epoch": 0.52, "grad_norm": 2.5355625212747435, "learning_rate": 4.929218246661174e-06, "loss": 0.3032, "step": 8259 }, { "epoch": 0.52, "grad_norm": 1.6850041002117744, "learning_rate": 4.928199873680745e-06, "loss": 0.2735, "step": 8260 }, { "epoch": 0.52, "grad_norm": 1.4972453730764776, "learning_rate": 4.927181503679429e-06, "loss": 0.2757, "step": 8261 }, { "epoch": 0.52, "grad_norm": 1.5194446073500507, "learning_rate": 4.9261631366994845e-06, "loss": 0.2926, "step": 8262 }, { "epoch": 0.52, "grad_norm": 1.8590668669251484, "learning_rate": 4.925144772783161e-06, "loss": 0.2843, "step": 8263 }, { "epoch": 0.52, "grad_norm": 1.651182949328135, "learning_rate": 4.924126411972714e-06, "loss": 0.2989, "step": 8264 }, { "epoch": 0.52, "grad_norm": 1.482248813457576, "learning_rate": 4.923108054310395e-06, "loss": 0.268, "step": 8265 }, { "epoch": 0.52, "grad_norm": 1.3343332975702737, "learning_rate": 4.922089699838462e-06, "loss": 0.2852, "step": 8266 }, { "epoch": 0.52, "grad_norm": 2.48254594985263, "learning_rate": 4.921071348599164e-06, "loss": 0.2729, "step": 8267 }, { "epoch": 0.52, "grad_norm": 2.603544230043783, "learning_rate": 4.9200530006347575e-06, "loss": 0.2889, "step": 8268 }, { "epoch": 0.52, "grad_norm": 2.537772216536732, "learning_rate": 4.919034655987493e-06, "loss": 0.306, "step": 8269 }, { "epoch": 0.52, "grad_norm": 7.232419358392029, "learning_rate": 4.918016314699625e-06, "loss": 0.2935, "step": 8270 }, { "epoch": 0.52, "grad_norm": 2.022585956446659, "learning_rate": 4.916997976813406e-06, "loss": 0.2878, "step": 8271 }, { "epoch": 0.52, "grad_norm": 5.722205604554446, "learning_rate": 4.9159796423710865e-06, "loss": 0.2765, "step": 8272 }, { "epoch": 0.52, "grad_norm": 2.387381502409368, "learning_rate": 4.914961311414922e-06, "loss": 0.2707, "step": 8273 }, { "epoch": 0.52, "grad_norm": 1.4932612601342625, "learning_rate": 4.913942983987165e-06, "loss": 0.2898, "step": 8274 }, { "epoch": 0.52, "grad_norm": 1.4789325587072022, "learning_rate": 4.912924660130065e-06, "loss": 0.2836, "step": 8275 }, { "epoch": 0.52, "grad_norm": 11.592684636643195, "learning_rate": 4.911906339885877e-06, "loss": 0.3092, "step": 8276 }, { "epoch": 0.52, "grad_norm": 1.7816285396426617, "learning_rate": 4.91088802329685e-06, "loss": 0.2907, "step": 8277 }, { "epoch": 0.52, "grad_norm": 2.490854989742586, "learning_rate": 4.909869710405238e-06, "loss": 0.2886, "step": 8278 }, { "epoch": 0.52, "grad_norm": 2.5912026344454357, "learning_rate": 4.90885140125329e-06, "loss": 0.2676, "step": 8279 }, { "epoch": 0.52, "grad_norm": 2.210525238436266, "learning_rate": 4.907833095883261e-06, "loss": 0.3003, "step": 8280 }, { "epoch": 0.52, "grad_norm": 2.1620126673627773, "learning_rate": 4.9068147943374e-06, "loss": 0.278, "step": 8281 }, { "epoch": 0.52, "grad_norm": 2.1962018503609535, "learning_rate": 4.905796496657959e-06, "loss": 0.3072, "step": 8282 }, { "epoch": 0.52, "grad_norm": 1.5142125020454695, "learning_rate": 4.904778202887189e-06, "loss": 0.3122, "step": 8283 }, { "epoch": 0.52, "grad_norm": 1.3758775597468256, "learning_rate": 4.90375991306734e-06, "loss": 0.2883, "step": 8284 }, { "epoch": 0.52, "grad_norm": 2.068861653821894, "learning_rate": 4.902741627240663e-06, "loss": 0.2706, "step": 8285 }, { "epoch": 0.52, "grad_norm": 1.5981640795469874, "learning_rate": 4.90172334544941e-06, "loss": 0.2863, "step": 8286 }, { "epoch": 0.52, "grad_norm": 2.46047673929236, "learning_rate": 4.900705067735827e-06, "loss": 0.2893, "step": 8287 }, { "epoch": 0.52, "grad_norm": 2.188271235936475, "learning_rate": 4.899686794142169e-06, "loss": 0.2984, "step": 8288 }, { "epoch": 0.52, "grad_norm": 1.5869922792937985, "learning_rate": 4.8986685247106835e-06, "loss": 0.2747, "step": 8289 }, { "epoch": 0.52, "grad_norm": 5.901737525071232, "learning_rate": 4.897650259483623e-06, "loss": 0.284, "step": 8290 }, { "epoch": 0.52, "grad_norm": 1.7613463201423094, "learning_rate": 4.8966319985032325e-06, "loss": 0.2656, "step": 8291 }, { "epoch": 0.52, "grad_norm": 1.7949183307816083, "learning_rate": 4.895613741811766e-06, "loss": 0.2663, "step": 8292 }, { "epoch": 0.52, "grad_norm": 1.8372796177519661, "learning_rate": 4.894595489451468e-06, "loss": 0.2813, "step": 8293 }, { "epoch": 0.52, "grad_norm": 2.9594151641872095, "learning_rate": 4.893577241464592e-06, "loss": 0.2773, "step": 8294 }, { "epoch": 0.52, "grad_norm": 1.5707005784419934, "learning_rate": 4.892558997893386e-06, "loss": 0.2879, "step": 8295 }, { "epoch": 0.52, "grad_norm": 2.994405121263485, "learning_rate": 4.8915407587800965e-06, "loss": 0.3043, "step": 8296 }, { "epoch": 0.52, "grad_norm": 1.889226416275823, "learning_rate": 4.890522524166974e-06, "loss": 0.2962, "step": 8297 }, { "epoch": 0.52, "grad_norm": 1.236552999968069, "learning_rate": 4.889504294096268e-06, "loss": 0.2787, "step": 8298 }, { "epoch": 0.52, "grad_norm": 1.773305178868573, "learning_rate": 4.8884860686102234e-06, "loss": 0.2971, "step": 8299 }, { "epoch": 0.52, "grad_norm": 2.168430941044765, "learning_rate": 4.887467847751091e-06, "loss": 0.2717, "step": 8300 }, { "epoch": 0.52, "grad_norm": 2.2006097595084317, "learning_rate": 4.8864496315611175e-06, "loss": 0.2749, "step": 8301 }, { "epoch": 0.52, "grad_norm": 1.9690783700100913, "learning_rate": 4.8854314200825475e-06, "loss": 0.2873, "step": 8302 }, { "epoch": 0.52, "grad_norm": 3.004249797363292, "learning_rate": 4.884413213357635e-06, "loss": 0.2803, "step": 8303 }, { "epoch": 0.52, "grad_norm": 1.8958393299053462, "learning_rate": 4.883395011428622e-06, "loss": 0.2962, "step": 8304 }, { "epoch": 0.52, "grad_norm": 1.800553602780511, "learning_rate": 4.882376814337757e-06, "loss": 0.2843, "step": 8305 }, { "epoch": 0.52, "grad_norm": 1.4315211587650654, "learning_rate": 4.881358622127288e-06, "loss": 0.2698, "step": 8306 }, { "epoch": 0.52, "grad_norm": 1.3705326963569648, "learning_rate": 4.880340434839459e-06, "loss": 0.2917, "step": 8307 }, { "epoch": 0.52, "grad_norm": 2.2516337919657996, "learning_rate": 4.8793222525165205e-06, "loss": 0.302, "step": 8308 }, { "epoch": 0.52, "grad_norm": 1.4027960878908383, "learning_rate": 4.878304075200712e-06, "loss": 0.2815, "step": 8309 }, { "epoch": 0.52, "grad_norm": 3.1470103947421344, "learning_rate": 4.8772859029342864e-06, "loss": 0.2772, "step": 8310 }, { "epoch": 0.52, "grad_norm": 1.6994330713125576, "learning_rate": 4.876267735759486e-06, "loss": 0.279, "step": 8311 }, { "epoch": 0.52, "grad_norm": 2.2505510396157145, "learning_rate": 4.875249573718557e-06, "loss": 0.2926, "step": 8312 }, { "epoch": 0.52, "grad_norm": 1.5588934345512109, "learning_rate": 4.874231416853744e-06, "loss": 0.3097, "step": 8313 }, { "epoch": 0.52, "grad_norm": 1.8267615338004741, "learning_rate": 4.8732132652072956e-06, "loss": 0.2931, "step": 8314 }, { "epoch": 0.52, "grad_norm": 4.78098828533289, "learning_rate": 4.872195118821452e-06, "loss": 0.2902, "step": 8315 }, { "epoch": 0.52, "grad_norm": 2.375384396299737, "learning_rate": 4.871176977738461e-06, "loss": 0.3106, "step": 8316 }, { "epoch": 0.52, "grad_norm": 1.7625365282938825, "learning_rate": 4.870158842000563e-06, "loss": 0.2898, "step": 8317 }, { "epoch": 0.52, "grad_norm": 2.473743047081472, "learning_rate": 4.869140711650008e-06, "loss": 0.2962, "step": 8318 }, { "epoch": 0.52, "grad_norm": 4.630733487511131, "learning_rate": 4.868122586729036e-06, "loss": 0.2954, "step": 8319 }, { "epoch": 0.52, "grad_norm": 2.0925891721373913, "learning_rate": 4.867104467279894e-06, "loss": 0.29, "step": 8320 }, { "epoch": 0.52, "grad_norm": 2.0939923448910562, "learning_rate": 4.866086353344822e-06, "loss": 0.2712, "step": 8321 }, { "epoch": 0.52, "grad_norm": 6.41877420552967, "learning_rate": 4.865068244966066e-06, "loss": 0.2929, "step": 8322 }, { "epoch": 0.52, "grad_norm": 2.270981754482077, "learning_rate": 4.864050142185868e-06, "loss": 0.296, "step": 8323 }, { "epoch": 0.52, "grad_norm": 1.9316351673686134, "learning_rate": 4.86303204504647e-06, "loss": 0.2827, "step": 8324 }, { "epoch": 0.52, "grad_norm": 1.5418993099495082, "learning_rate": 4.862013953590117e-06, "loss": 0.2727, "step": 8325 }, { "epoch": 0.52, "grad_norm": 2.5715061253466334, "learning_rate": 4.860995867859049e-06, "loss": 0.2824, "step": 8326 }, { "epoch": 0.52, "grad_norm": 1.6413300874583487, "learning_rate": 4.85997778789551e-06, "loss": 0.3233, "step": 8327 }, { "epoch": 0.52, "grad_norm": 1.7151059470009289, "learning_rate": 4.858959713741742e-06, "loss": 0.2717, "step": 8328 }, { "epoch": 0.52, "grad_norm": 2.5936853821012376, "learning_rate": 4.857941645439985e-06, "loss": 0.2895, "step": 8329 }, { "epoch": 0.52, "grad_norm": 3.4025304259215643, "learning_rate": 4.856923583032483e-06, "loss": 0.2677, "step": 8330 }, { "epoch": 0.52, "grad_norm": 1.5654484206878563, "learning_rate": 4.855905526561474e-06, "loss": 0.2879, "step": 8331 }, { "epoch": 0.52, "grad_norm": 2.695336616262636, "learning_rate": 4.8548874760692e-06, "loss": 0.3043, "step": 8332 }, { "epoch": 0.52, "grad_norm": 1.7832227001250294, "learning_rate": 4.8538694315979045e-06, "loss": 0.2927, "step": 8333 }, { "epoch": 0.52, "grad_norm": 2.1140181232744344, "learning_rate": 4.852851393189825e-06, "loss": 0.2861, "step": 8334 }, { "epoch": 0.52, "grad_norm": 1.4286677077307632, "learning_rate": 4.8518333608872015e-06, "loss": 0.2594, "step": 8335 }, { "epoch": 0.52, "grad_norm": 1.9602561494411748, "learning_rate": 4.850815334732277e-06, "loss": 0.2874, "step": 8336 }, { "epoch": 0.52, "grad_norm": 1.7235780246174375, "learning_rate": 4.849797314767288e-06, "loss": 0.2986, "step": 8337 }, { "epoch": 0.52, "grad_norm": 3.9333024538245205, "learning_rate": 4.848779301034476e-06, "loss": 0.2795, "step": 8338 }, { "epoch": 0.52, "grad_norm": 19.95784289340205, "learning_rate": 4.847761293576078e-06, "loss": 0.2863, "step": 8339 }, { "epoch": 0.52, "grad_norm": 2.346495865872955, "learning_rate": 4.846743292434334e-06, "loss": 0.2605, "step": 8340 }, { "epoch": 0.52, "grad_norm": 1.8440387401706564, "learning_rate": 4.845725297651485e-06, "loss": 0.2689, "step": 8341 }, { "epoch": 0.52, "grad_norm": 1.971613200240072, "learning_rate": 4.844707309269767e-06, "loss": 0.2854, "step": 8342 }, { "epoch": 0.52, "grad_norm": 2.0397420833369697, "learning_rate": 4.8436893273314176e-06, "loss": 0.2885, "step": 8343 }, { "epoch": 0.52, "grad_norm": 2.888534178756261, "learning_rate": 4.842671351878677e-06, "loss": 0.2862, "step": 8344 }, { "epoch": 0.52, "grad_norm": 1.8523021751627184, "learning_rate": 4.84165338295378e-06, "loss": 0.289, "step": 8345 }, { "epoch": 0.52, "grad_norm": 6.75321363063784, "learning_rate": 4.840635420598967e-06, "loss": 0.2845, "step": 8346 }, { "epoch": 0.52, "grad_norm": 1.9647520906752194, "learning_rate": 4.839617464856471e-06, "loss": 0.2811, "step": 8347 }, { "epoch": 0.52, "grad_norm": 1.413440971219273, "learning_rate": 4.838599515768532e-06, "loss": 0.2945, "step": 8348 }, { "epoch": 0.53, "grad_norm": 3.0070939153612004, "learning_rate": 4.837581573377387e-06, "loss": 0.2803, "step": 8349 }, { "epoch": 0.53, "grad_norm": 2.0024772625960554, "learning_rate": 4.836563637725271e-06, "loss": 0.2965, "step": 8350 }, { "epoch": 0.53, "grad_norm": 1.783291008563796, "learning_rate": 4.835545708854419e-06, "loss": 0.2852, "step": 8351 }, { "epoch": 0.53, "grad_norm": 2.226463281537102, "learning_rate": 4.834527786807069e-06, "loss": 0.2825, "step": 8352 }, { "epoch": 0.53, "grad_norm": 1.3102782421763481, "learning_rate": 4.8335098716254545e-06, "loss": 0.2644, "step": 8353 }, { "epoch": 0.53, "grad_norm": 2.7464204611587495, "learning_rate": 4.832491963351809e-06, "loss": 0.2997, "step": 8354 }, { "epoch": 0.53, "grad_norm": 4.88987909463794, "learning_rate": 4.831474062028372e-06, "loss": 0.2811, "step": 8355 }, { "epoch": 0.53, "grad_norm": 3.314611497551799, "learning_rate": 4.830456167697375e-06, "loss": 0.2821, "step": 8356 }, { "epoch": 0.53, "grad_norm": 1.34470913155694, "learning_rate": 4.829438280401052e-06, "loss": 0.3104, "step": 8357 }, { "epoch": 0.53, "grad_norm": 2.271400913559649, "learning_rate": 4.828420400181639e-06, "loss": 0.2796, "step": 8358 }, { "epoch": 0.53, "grad_norm": 2.0068432578888458, "learning_rate": 4.827402527081368e-06, "loss": 0.2887, "step": 8359 }, { "epoch": 0.53, "grad_norm": 2.600485994703669, "learning_rate": 4.826384661142472e-06, "loss": 0.2832, "step": 8360 }, { "epoch": 0.53, "grad_norm": 2.3312780804242537, "learning_rate": 4.825366802407184e-06, "loss": 0.2862, "step": 8361 }, { "epoch": 0.53, "grad_norm": 7.674285070392237, "learning_rate": 4.824348950917739e-06, "loss": 0.279, "step": 8362 }, { "epoch": 0.53, "grad_norm": 3.066978445862203, "learning_rate": 4.823331106716368e-06, "loss": 0.2793, "step": 8363 }, { "epoch": 0.53, "grad_norm": 1.894374156222564, "learning_rate": 4.822313269845303e-06, "loss": 0.3039, "step": 8364 }, { "epoch": 0.53, "grad_norm": 1.510245593542162, "learning_rate": 4.821295440346775e-06, "loss": 0.2798, "step": 8365 }, { "epoch": 0.53, "grad_norm": 3.0874445027030997, "learning_rate": 4.820277618263018e-06, "loss": 0.2804, "step": 8366 }, { "epoch": 0.53, "grad_norm": 1.8498454825062158, "learning_rate": 4.819259803636261e-06, "loss": 0.2761, "step": 8367 }, { "epoch": 0.53, "grad_norm": 1.6144161860062747, "learning_rate": 4.8182419965087375e-06, "loss": 0.295, "step": 8368 }, { "epoch": 0.53, "grad_norm": 3.6204533056451464, "learning_rate": 4.817224196922673e-06, "loss": 0.2924, "step": 8369 }, { "epoch": 0.53, "grad_norm": 2.140072947789576, "learning_rate": 4.8162064049203035e-06, "loss": 0.2773, "step": 8370 }, { "epoch": 0.53, "grad_norm": 2.3893163555588957, "learning_rate": 4.815188620543857e-06, "loss": 0.2981, "step": 8371 }, { "epoch": 0.53, "grad_norm": 0.5803892602864232, "learning_rate": 4.814170843835561e-06, "loss": 0.5214, "step": 8372 }, { "epoch": 0.53, "grad_norm": 1.850281135022143, "learning_rate": 4.813153074837648e-06, "loss": 0.2747, "step": 8373 }, { "epoch": 0.53, "grad_norm": 2.7567157375102114, "learning_rate": 4.812135313592348e-06, "loss": 0.28, "step": 8374 }, { "epoch": 0.53, "grad_norm": 1.8665012917513455, "learning_rate": 4.8111175601418845e-06, "loss": 0.2767, "step": 8375 }, { "epoch": 0.53, "grad_norm": 1.6674636307333104, "learning_rate": 4.810099814528489e-06, "loss": 0.2818, "step": 8376 }, { "epoch": 0.53, "grad_norm": 14.574159705529876, "learning_rate": 4.809082076794391e-06, "loss": 0.2801, "step": 8377 }, { "epoch": 0.53, "grad_norm": 0.6994142917636991, "learning_rate": 4.808064346981815e-06, "loss": 0.5043, "step": 8378 }, { "epoch": 0.53, "grad_norm": 2.043551212387841, "learning_rate": 4.8070466251329926e-06, "loss": 0.2809, "step": 8379 }, { "epoch": 0.53, "grad_norm": 2.3474862659250526, "learning_rate": 4.806028911290147e-06, "loss": 0.3091, "step": 8380 }, { "epoch": 0.53, "grad_norm": 1.5123096367847348, "learning_rate": 4.805011205495505e-06, "loss": 0.2909, "step": 8381 }, { "epoch": 0.53, "grad_norm": 2.5323443497815594, "learning_rate": 4.803993507791298e-06, "loss": 0.29, "step": 8382 }, { "epoch": 0.53, "grad_norm": 2.514766547547126, "learning_rate": 4.8029758182197455e-06, "loss": 0.2773, "step": 8383 }, { "epoch": 0.53, "grad_norm": 2.778383922553858, "learning_rate": 4.801958136823076e-06, "loss": 0.2793, "step": 8384 }, { "epoch": 0.53, "grad_norm": 0.5753223131355168, "learning_rate": 4.800940463643517e-06, "loss": 0.4815, "step": 8385 }, { "epoch": 0.53, "grad_norm": 1.5322787209475415, "learning_rate": 4.799922798723291e-06, "loss": 0.2826, "step": 8386 }, { "epoch": 0.53, "grad_norm": 1.590813516641714, "learning_rate": 4.798905142104624e-06, "loss": 0.2833, "step": 8387 }, { "epoch": 0.53, "grad_norm": 3.406993004851999, "learning_rate": 4.79788749382974e-06, "loss": 0.2867, "step": 8388 }, { "epoch": 0.53, "grad_norm": 2.7434548670177867, "learning_rate": 4.796869853940862e-06, "loss": 0.2695, "step": 8389 }, { "epoch": 0.53, "grad_norm": 1.8224793567413622, "learning_rate": 4.795852222480215e-06, "loss": 0.2792, "step": 8390 }, { "epoch": 0.53, "grad_norm": 1.9544250845793802, "learning_rate": 4.7948345994900205e-06, "loss": 0.2935, "step": 8391 }, { "epoch": 0.53, "grad_norm": 3.295874486943766, "learning_rate": 4.793816985012503e-06, "loss": 0.2921, "step": 8392 }, { "epoch": 0.53, "grad_norm": 4.128313276949677, "learning_rate": 4.792799379089888e-06, "loss": 0.302, "step": 8393 }, { "epoch": 0.53, "grad_norm": 3.058216889694372, "learning_rate": 4.791781781764392e-06, "loss": 0.2642, "step": 8394 }, { "epoch": 0.53, "grad_norm": 2.1364291315479154, "learning_rate": 4.79076419307824e-06, "loss": 0.283, "step": 8395 }, { "epoch": 0.53, "grad_norm": 1.8379552323039976, "learning_rate": 4.789746613073655e-06, "loss": 0.2699, "step": 8396 }, { "epoch": 0.53, "grad_norm": 2.510025828512651, "learning_rate": 4.788729041792855e-06, "loss": 0.2553, "step": 8397 }, { "epoch": 0.53, "grad_norm": 2.339536887154089, "learning_rate": 4.787711479278063e-06, "loss": 0.2747, "step": 8398 }, { "epoch": 0.53, "grad_norm": 2.253674938618319, "learning_rate": 4.7866939255714975e-06, "loss": 0.2681, "step": 8399 }, { "epoch": 0.53, "grad_norm": 2.5930586442139374, "learning_rate": 4.78567638071538e-06, "loss": 0.2904, "step": 8400 }, { "epoch": 0.53, "grad_norm": 3.60352921632056, "learning_rate": 4.784658844751933e-06, "loss": 0.2972, "step": 8401 }, { "epoch": 0.53, "grad_norm": 4.056599153816232, "learning_rate": 4.7836413177233705e-06, "loss": 0.3112, "step": 8402 }, { "epoch": 0.53, "grad_norm": 1.321717028384552, "learning_rate": 4.782623799671914e-06, "loss": 0.2693, "step": 8403 }, { "epoch": 0.53, "grad_norm": 1.7516074821540624, "learning_rate": 4.781606290639783e-06, "loss": 0.2831, "step": 8404 }, { "epoch": 0.53, "grad_norm": 1.5426926052473047, "learning_rate": 4.780588790669195e-06, "loss": 0.2751, "step": 8405 }, { "epoch": 0.53, "grad_norm": 1.4934233565380441, "learning_rate": 4.779571299802366e-06, "loss": 0.2705, "step": 8406 }, { "epoch": 0.53, "grad_norm": 1.5869935741782923, "learning_rate": 4.778553818081517e-06, "loss": 0.2717, "step": 8407 }, { "epoch": 0.53, "grad_norm": 2.6472550756050794, "learning_rate": 4.777536345548863e-06, "loss": 0.2861, "step": 8408 }, { "epoch": 0.53, "grad_norm": 2.0903636854619405, "learning_rate": 4.7765188822466226e-06, "loss": 0.2822, "step": 8409 }, { "epoch": 0.53, "grad_norm": 1.5875475778358343, "learning_rate": 4.775501428217009e-06, "loss": 0.3064, "step": 8410 }, { "epoch": 0.53, "grad_norm": 0.6261372605388646, "learning_rate": 4.77448398350224e-06, "loss": 0.5134, "step": 8411 }, { "epoch": 0.53, "grad_norm": 1.6717839644134513, "learning_rate": 4.773466548144532e-06, "loss": 0.2782, "step": 8412 }, { "epoch": 0.53, "grad_norm": 0.5573152097917939, "learning_rate": 4.7724491221860986e-06, "loss": 0.5053, "step": 8413 }, { "epoch": 0.53, "grad_norm": 7.065201075386083, "learning_rate": 4.771431705669154e-06, "loss": 0.3012, "step": 8414 }, { "epoch": 0.53, "grad_norm": 2.008813283520479, "learning_rate": 4.770414298635916e-06, "loss": 0.2862, "step": 8415 }, { "epoch": 0.53, "grad_norm": 17.200652662346773, "learning_rate": 4.769396901128595e-06, "loss": 0.2675, "step": 8416 }, { "epoch": 0.53, "grad_norm": 6.614085080775041, "learning_rate": 4.768379513189408e-06, "loss": 0.2782, "step": 8417 }, { "epoch": 0.53, "grad_norm": 1.5337764137942766, "learning_rate": 4.7673621348605655e-06, "loss": 0.2881, "step": 8418 }, { "epoch": 0.53, "grad_norm": 1.9528811152936985, "learning_rate": 4.76634476618428e-06, "loss": 0.2912, "step": 8419 }, { "epoch": 0.53, "grad_norm": 1.8610145314625597, "learning_rate": 4.765327407202768e-06, "loss": 0.2822, "step": 8420 }, { "epoch": 0.53, "grad_norm": 1.7819661999567937, "learning_rate": 4.7643100579582355e-06, "loss": 0.2901, "step": 8421 }, { "epoch": 0.53, "grad_norm": 1.4974709682042993, "learning_rate": 4.7632927184928985e-06, "loss": 0.2886, "step": 8422 }, { "epoch": 0.53, "grad_norm": 2.346654853336016, "learning_rate": 4.762275388848968e-06, "loss": 0.2819, "step": 8423 }, { "epoch": 0.53, "grad_norm": 2.5313475112359463, "learning_rate": 4.761258069068654e-06, "loss": 0.3097, "step": 8424 }, { "epoch": 0.53, "grad_norm": 0.5794129127802287, "learning_rate": 4.760240759194166e-06, "loss": 0.4798, "step": 8425 }, { "epoch": 0.53, "grad_norm": 2.2478992436172196, "learning_rate": 4.759223459267715e-06, "loss": 0.296, "step": 8426 }, { "epoch": 0.53, "grad_norm": 2.3140798998622945, "learning_rate": 4.758206169331509e-06, "loss": 0.2999, "step": 8427 }, { "epoch": 0.53, "grad_norm": 10.209286981225102, "learning_rate": 4.757188889427761e-06, "loss": 0.3071, "step": 8428 }, { "epoch": 0.53, "grad_norm": 4.151286698593463, "learning_rate": 4.756171619598674e-06, "loss": 0.3238, "step": 8429 }, { "epoch": 0.53, "grad_norm": 2.89099243932232, "learning_rate": 4.7551543598864614e-06, "loss": 0.3135, "step": 8430 }, { "epoch": 0.53, "grad_norm": 1.817848693334988, "learning_rate": 4.75413711033333e-06, "loss": 0.2839, "step": 8431 }, { "epoch": 0.53, "grad_norm": 2.5286534124216655, "learning_rate": 4.753119870981486e-06, "loss": 0.2844, "step": 8432 }, { "epoch": 0.53, "grad_norm": 2.4574432505536095, "learning_rate": 4.752102641873136e-06, "loss": 0.2969, "step": 8433 }, { "epoch": 0.53, "grad_norm": 1.6359299468810733, "learning_rate": 4.75108542305049e-06, "loss": 0.2988, "step": 8434 }, { "epoch": 0.53, "grad_norm": 2.6438591581902067, "learning_rate": 4.75006821455575e-06, "loss": 0.3049, "step": 8435 }, { "epoch": 0.53, "grad_norm": 2.105956843821535, "learning_rate": 4.749051016431123e-06, "loss": 0.2815, "step": 8436 }, { "epoch": 0.53, "grad_norm": 7.813638413684079, "learning_rate": 4.748033828718818e-06, "loss": 0.2864, "step": 8437 }, { "epoch": 0.53, "grad_norm": 2.9256826268681375, "learning_rate": 4.747016651461035e-06, "loss": 0.2766, "step": 8438 }, { "epoch": 0.53, "grad_norm": 1.9729885408359846, "learning_rate": 4.7459994846999815e-06, "loss": 0.2787, "step": 8439 }, { "epoch": 0.53, "grad_norm": 1.5754963023637483, "learning_rate": 4.744982328477859e-06, "loss": 0.2613, "step": 8440 }, { "epoch": 0.53, "grad_norm": 1.5413783623211113, "learning_rate": 4.7439651828368736e-06, "loss": 0.2698, "step": 8441 }, { "epoch": 0.53, "grad_norm": 1.826139696164723, "learning_rate": 4.7429480478192285e-06, "loss": 0.2829, "step": 8442 }, { "epoch": 0.53, "grad_norm": 2.77022466616211, "learning_rate": 4.741930923467123e-06, "loss": 0.2852, "step": 8443 }, { "epoch": 0.53, "grad_norm": 1.6323025777956142, "learning_rate": 4.740913809822763e-06, "loss": 0.2783, "step": 8444 }, { "epoch": 0.53, "grad_norm": 1.6126840257945725, "learning_rate": 4.7398967069283505e-06, "loss": 0.2754, "step": 8445 }, { "epoch": 0.53, "grad_norm": 1.675572458730645, "learning_rate": 4.7388796148260846e-06, "loss": 0.3032, "step": 8446 }, { "epoch": 0.53, "grad_norm": 1.918201435085034, "learning_rate": 4.737862533558168e-06, "loss": 0.2745, "step": 8447 }, { "epoch": 0.53, "grad_norm": 1.7615596289358781, "learning_rate": 4.7368454631668e-06, "loss": 0.2914, "step": 8448 }, { "epoch": 0.53, "grad_norm": 1.924231687650136, "learning_rate": 4.73582840369418e-06, "loss": 0.2871, "step": 8449 }, { "epoch": 0.53, "grad_norm": 3.0118428570949543, "learning_rate": 4.734811355182511e-06, "loss": 0.2607, "step": 8450 }, { "epoch": 0.53, "grad_norm": 1.965448863629376, "learning_rate": 4.7337943176739874e-06, "loss": 0.2744, "step": 8451 }, { "epoch": 0.53, "grad_norm": 1.291794914480757, "learning_rate": 4.732777291210812e-06, "loss": 0.2702, "step": 8452 }, { "epoch": 0.53, "grad_norm": 1.8498930986453157, "learning_rate": 4.7317602758351814e-06, "loss": 0.2725, "step": 8453 }, { "epoch": 0.53, "grad_norm": 1.5519068068456685, "learning_rate": 4.730743271589293e-06, "loss": 0.2793, "step": 8454 }, { "epoch": 0.53, "grad_norm": 1.7848523060138315, "learning_rate": 4.7297262785153455e-06, "loss": 0.3003, "step": 8455 }, { "epoch": 0.53, "grad_norm": 1.7848219075019645, "learning_rate": 4.728709296655533e-06, "loss": 0.3335, "step": 8456 }, { "epoch": 0.53, "grad_norm": 1.9582090084489585, "learning_rate": 4.727692326052054e-06, "loss": 0.281, "step": 8457 }, { "epoch": 0.53, "grad_norm": 2.322363457982876, "learning_rate": 4.726675366747104e-06, "loss": 0.2695, "step": 8458 }, { "epoch": 0.53, "grad_norm": 6.359884895054364, "learning_rate": 4.72565841878288e-06, "loss": 0.2899, "step": 8459 }, { "epoch": 0.53, "grad_norm": 1.60113332580965, "learning_rate": 4.724641482201574e-06, "loss": 0.2953, "step": 8460 }, { "epoch": 0.53, "grad_norm": 1.6231979197784872, "learning_rate": 4.723624557045383e-06, "loss": 0.268, "step": 8461 }, { "epoch": 0.53, "grad_norm": 2.517094361127118, "learning_rate": 4.7226076433564994e-06, "loss": 0.287, "step": 8462 }, { "epoch": 0.53, "grad_norm": 1.7999813408429162, "learning_rate": 4.721590741177119e-06, "loss": 0.2705, "step": 8463 }, { "epoch": 0.53, "grad_norm": 0.5857471206262909, "learning_rate": 4.720573850549431e-06, "loss": 0.4761, "step": 8464 }, { "epoch": 0.53, "grad_norm": 1.8948979812232025, "learning_rate": 4.719556971515631e-06, "loss": 0.2996, "step": 8465 }, { "epoch": 0.53, "grad_norm": 2.329984744611168, "learning_rate": 4.718540104117909e-06, "loss": 0.2931, "step": 8466 }, { "epoch": 0.53, "grad_norm": 1.8623617760742492, "learning_rate": 4.717523248398461e-06, "loss": 0.2758, "step": 8467 }, { "epoch": 0.53, "grad_norm": 1.5228386275642694, "learning_rate": 4.716506404399473e-06, "loss": 0.279, "step": 8468 }, { "epoch": 0.53, "grad_norm": 2.033646923485184, "learning_rate": 4.715489572163139e-06, "loss": 0.27, "step": 8469 }, { "epoch": 0.53, "grad_norm": 4.077267248613091, "learning_rate": 4.714472751731647e-06, "loss": 0.3069, "step": 8470 }, { "epoch": 0.53, "grad_norm": 2.5367434960848136, "learning_rate": 4.713455943147188e-06, "loss": 0.266, "step": 8471 }, { "epoch": 0.53, "grad_norm": 2.2880178154407433, "learning_rate": 4.712439146451952e-06, "loss": 0.2747, "step": 8472 }, { "epoch": 0.53, "grad_norm": 1.8439083862790828, "learning_rate": 4.711422361688124e-06, "loss": 0.2847, "step": 8473 }, { "epoch": 0.53, "grad_norm": 1.403644545133131, "learning_rate": 4.710405588897896e-06, "loss": 0.2765, "step": 8474 }, { "epoch": 0.53, "grad_norm": 1.467542722163241, "learning_rate": 4.709388828123457e-06, "loss": 0.2785, "step": 8475 }, { "epoch": 0.53, "grad_norm": 1.4478503120744513, "learning_rate": 4.708372079406989e-06, "loss": 0.28, "step": 8476 }, { "epoch": 0.53, "grad_norm": 5.252758370568109, "learning_rate": 4.707355342790682e-06, "loss": 0.2976, "step": 8477 }, { "epoch": 0.53, "grad_norm": 3.5136777425886425, "learning_rate": 4.706338618316721e-06, "loss": 0.292, "step": 8478 }, { "epoch": 0.53, "grad_norm": 2.106395076529786, "learning_rate": 4.705321906027293e-06, "loss": 0.2699, "step": 8479 }, { "epoch": 0.53, "grad_norm": 1.79012790847718, "learning_rate": 4.7043052059645835e-06, "loss": 0.2797, "step": 8480 }, { "epoch": 0.53, "grad_norm": 1.7093815457865003, "learning_rate": 4.703288518170774e-06, "loss": 0.2861, "step": 8481 }, { "epoch": 0.53, "grad_norm": 1.464103243951695, "learning_rate": 4.702271842688052e-06, "loss": 0.3002, "step": 8482 }, { "epoch": 0.53, "grad_norm": 1.665304705321683, "learning_rate": 4.701255179558601e-06, "loss": 0.2728, "step": 8483 }, { "epoch": 0.53, "grad_norm": 1.6848328109589452, "learning_rate": 4.700238528824602e-06, "loss": 0.273, "step": 8484 }, { "epoch": 0.53, "grad_norm": 1.8086438140132717, "learning_rate": 4.69922189052824e-06, "loss": 0.2679, "step": 8485 }, { "epoch": 0.53, "grad_norm": 1.892734079629832, "learning_rate": 4.698205264711695e-06, "loss": 0.3, "step": 8486 }, { "epoch": 0.53, "grad_norm": 1.5813826797534165, "learning_rate": 4.6971886514171495e-06, "loss": 0.2855, "step": 8487 }, { "epoch": 0.53, "grad_norm": 2.783067685410438, "learning_rate": 4.696172050686784e-06, "loss": 0.2845, "step": 8488 }, { "epoch": 0.53, "grad_norm": 1.1740685978314063, "learning_rate": 4.695155462562781e-06, "loss": 0.2731, "step": 8489 }, { "epoch": 0.53, "grad_norm": 1.9210350324320693, "learning_rate": 4.69413888708732e-06, "loss": 0.291, "step": 8490 }, { "epoch": 0.53, "grad_norm": 2.6339544719116774, "learning_rate": 4.693122324302579e-06, "loss": 0.2918, "step": 8491 }, { "epoch": 0.53, "grad_norm": 2.660077704029485, "learning_rate": 4.692105774250738e-06, "loss": 0.2829, "step": 8492 }, { "epoch": 0.53, "grad_norm": 1.51817554738402, "learning_rate": 4.6910892369739754e-06, "loss": 0.2641, "step": 8493 }, { "epoch": 0.53, "grad_norm": 2.570369080557774, "learning_rate": 4.690072712514469e-06, "loss": 0.2775, "step": 8494 }, { "epoch": 0.53, "grad_norm": 2.190856266048138, "learning_rate": 4.689056200914396e-06, "loss": 0.2969, "step": 8495 }, { "epoch": 0.53, "grad_norm": 3.5318523971378313, "learning_rate": 4.6880397022159325e-06, "loss": 0.2933, "step": 8496 }, { "epoch": 0.53, "grad_norm": 1.398498445276512, "learning_rate": 4.687023216461257e-06, "loss": 0.2805, "step": 8497 }, { "epoch": 0.53, "grad_norm": 1.4772995611057593, "learning_rate": 4.686006743692544e-06, "loss": 0.2819, "step": 8498 }, { "epoch": 0.53, "grad_norm": 1.7652774759334076, "learning_rate": 4.68499028395197e-06, "loss": 0.2809, "step": 8499 }, { "epoch": 0.53, "grad_norm": 1.805621268942833, "learning_rate": 4.683973837281708e-06, "loss": 0.2728, "step": 8500 }, { "epoch": 0.53, "grad_norm": 2.9497376117748084, "learning_rate": 4.682957403723933e-06, "loss": 0.2872, "step": 8501 }, { "epoch": 0.53, "grad_norm": 2.7000536329925047, "learning_rate": 4.681940983320818e-06, "loss": 0.2906, "step": 8502 }, { "epoch": 0.53, "grad_norm": 4.1535056506735675, "learning_rate": 4.680924576114535e-06, "loss": 0.282, "step": 8503 }, { "epoch": 0.53, "grad_norm": 3.450309175314244, "learning_rate": 4.679908182147259e-06, "loss": 0.2887, "step": 8504 }, { "epoch": 0.53, "grad_norm": 1.8818322464896178, "learning_rate": 4.6788918014611624e-06, "loss": 0.2849, "step": 8505 }, { "epoch": 0.53, "grad_norm": 2.470227527852286, "learning_rate": 4.677875434098414e-06, "loss": 0.318, "step": 8506 }, { "epoch": 0.53, "grad_norm": 3.937588863534305, "learning_rate": 4.676859080101188e-06, "loss": 0.2813, "step": 8507 }, { "epoch": 0.54, "grad_norm": 1.3713852251171992, "learning_rate": 4.675842739511651e-06, "loss": 0.2837, "step": 8508 }, { "epoch": 0.54, "grad_norm": 1.4273832632733572, "learning_rate": 4.674826412371975e-06, "loss": 0.2943, "step": 8509 }, { "epoch": 0.54, "grad_norm": 2.864045484347699, "learning_rate": 4.673810098724329e-06, "loss": 0.2909, "step": 8510 }, { "epoch": 0.54, "grad_norm": 3.155734353607268, "learning_rate": 4.6727937986108794e-06, "loss": 0.2982, "step": 8511 }, { "epoch": 0.54, "grad_norm": 1.6988717509513624, "learning_rate": 4.671777512073797e-06, "loss": 0.2829, "step": 8512 }, { "epoch": 0.54, "grad_norm": 1.9494067114925544, "learning_rate": 4.67076123915525e-06, "loss": 0.2889, "step": 8513 }, { "epoch": 0.54, "grad_norm": 1.6166359579235476, "learning_rate": 4.669744979897404e-06, "loss": 0.2766, "step": 8514 }, { "epoch": 0.54, "grad_norm": 1.32629855727391, "learning_rate": 4.668728734342425e-06, "loss": 0.2683, "step": 8515 }, { "epoch": 0.54, "grad_norm": 1.9156123254111672, "learning_rate": 4.667712502532479e-06, "loss": 0.2899, "step": 8516 }, { "epoch": 0.54, "grad_norm": 3.110190444078173, "learning_rate": 4.666696284509731e-06, "loss": 0.279, "step": 8517 }, { "epoch": 0.54, "grad_norm": 1.9956441228668982, "learning_rate": 4.6656800803163454e-06, "loss": 0.2739, "step": 8518 }, { "epoch": 0.54, "grad_norm": 2.25414985881702, "learning_rate": 4.664663889994489e-06, "loss": 0.2828, "step": 8519 }, { "epoch": 0.54, "grad_norm": 1.939016030513565, "learning_rate": 4.6636477135863224e-06, "loss": 0.2696, "step": 8520 }, { "epoch": 0.54, "grad_norm": 1.9488688939407426, "learning_rate": 4.662631551134011e-06, "loss": 0.303, "step": 8521 }, { "epoch": 0.54, "grad_norm": 5.78436442800251, "learning_rate": 4.6616154026797144e-06, "loss": 0.3002, "step": 8522 }, { "epoch": 0.54, "grad_norm": 5.224794063484492, "learning_rate": 4.660599268265597e-06, "loss": 0.278, "step": 8523 }, { "epoch": 0.54, "grad_norm": 4.548502201923166, "learning_rate": 4.659583147933817e-06, "loss": 0.2826, "step": 8524 }, { "epoch": 0.54, "grad_norm": 41.696948042233466, "learning_rate": 4.6585670417265365e-06, "loss": 0.2909, "step": 8525 }, { "epoch": 0.54, "grad_norm": 2.2166487367959773, "learning_rate": 4.657550949685917e-06, "loss": 0.2808, "step": 8526 }, { "epoch": 0.54, "grad_norm": 1.3152271681091539, "learning_rate": 4.656534871854118e-06, "loss": 0.2922, "step": 8527 }, { "epoch": 0.54, "grad_norm": 13.357387346133596, "learning_rate": 4.655518808273296e-06, "loss": 0.2826, "step": 8528 }, { "epoch": 0.54, "grad_norm": 1.937390647985258, "learning_rate": 4.654502758985611e-06, "loss": 0.2791, "step": 8529 }, { "epoch": 0.54, "grad_norm": 5.403687022326223, "learning_rate": 4.65348672403322e-06, "loss": 0.3071, "step": 8530 }, { "epoch": 0.54, "grad_norm": 1.448216136206693, "learning_rate": 4.652470703458281e-06, "loss": 0.2832, "step": 8531 }, { "epoch": 0.54, "grad_norm": 2.3510141476195585, "learning_rate": 4.651454697302949e-06, "loss": 0.2922, "step": 8532 }, { "epoch": 0.54, "grad_norm": 1.2209394625057297, "learning_rate": 4.650438705609379e-06, "loss": 0.2792, "step": 8533 }, { "epoch": 0.54, "grad_norm": 0.6012992133253265, "learning_rate": 4.6494227284197296e-06, "loss": 0.4771, "step": 8534 }, { "epoch": 0.54, "grad_norm": 1.5696914695097803, "learning_rate": 4.648406765776154e-06, "loss": 0.2923, "step": 8535 }, { "epoch": 0.54, "grad_norm": 1.9863923207164877, "learning_rate": 4.647390817720806e-06, "loss": 0.2801, "step": 8536 }, { "epoch": 0.54, "grad_norm": 2.9554050504720806, "learning_rate": 4.6463748842958416e-06, "loss": 0.2992, "step": 8537 }, { "epoch": 0.54, "grad_norm": 2.619655729795758, "learning_rate": 4.645358965543409e-06, "loss": 0.2754, "step": 8538 }, { "epoch": 0.54, "grad_norm": 1.2776661114982213, "learning_rate": 4.644343061505663e-06, "loss": 0.2914, "step": 8539 }, { "epoch": 0.54, "grad_norm": 3.154509107004463, "learning_rate": 4.643327172224753e-06, "loss": 0.271, "step": 8540 }, { "epoch": 0.54, "grad_norm": 1.8772663486449142, "learning_rate": 4.6423112977428346e-06, "loss": 0.3015, "step": 8541 }, { "epoch": 0.54, "grad_norm": 1.500139646785539, "learning_rate": 4.641295438102056e-06, "loss": 0.2648, "step": 8542 }, { "epoch": 0.54, "grad_norm": 2.1114096276150285, "learning_rate": 4.6402795933445665e-06, "loss": 0.2748, "step": 8543 }, { "epoch": 0.54, "grad_norm": 1.1839224960513643, "learning_rate": 4.639263763512516e-06, "loss": 0.2672, "step": 8544 }, { "epoch": 0.54, "grad_norm": 2.8680031295894155, "learning_rate": 4.638247948648052e-06, "loss": 0.2775, "step": 8545 }, { "epoch": 0.54, "grad_norm": 9.729256112303656, "learning_rate": 4.637232148793323e-06, "loss": 0.2943, "step": 8546 }, { "epoch": 0.54, "grad_norm": 1.6163734714470552, "learning_rate": 4.636216363990478e-06, "loss": 0.2851, "step": 8547 }, { "epoch": 0.54, "grad_norm": 3.069947633542292, "learning_rate": 4.635200594281658e-06, "loss": 0.2773, "step": 8548 }, { "epoch": 0.54, "grad_norm": 1.2162562662701955, "learning_rate": 4.634184839709018e-06, "loss": 0.277, "step": 8549 }, { "epoch": 0.54, "grad_norm": 2.268725482968977, "learning_rate": 4.6331691003146966e-06, "loss": 0.2577, "step": 8550 }, { "epoch": 0.54, "grad_norm": 2.2527727220316223, "learning_rate": 4.632153376140841e-06, "loss": 0.269, "step": 8551 }, { "epoch": 0.54, "grad_norm": 4.20018551401685, "learning_rate": 4.631137667229596e-06, "loss": 0.3038, "step": 8552 }, { "epoch": 0.54, "grad_norm": 1.7370076469343152, "learning_rate": 4.630121973623104e-06, "loss": 0.3108, "step": 8553 }, { "epoch": 0.54, "grad_norm": 1.4379113774010683, "learning_rate": 4.629106295363508e-06, "loss": 0.2833, "step": 8554 }, { "epoch": 0.54, "grad_norm": 1.6625491943937982, "learning_rate": 4.628090632492949e-06, "loss": 0.28, "step": 8555 }, { "epoch": 0.54, "grad_norm": 2.0031481828937756, "learning_rate": 4.627074985053572e-06, "loss": 0.2916, "step": 8556 }, { "epoch": 0.54, "grad_norm": 2.3079447897823093, "learning_rate": 4.626059353087516e-06, "loss": 0.2753, "step": 8557 }, { "epoch": 0.54, "grad_norm": 1.3593400223302623, "learning_rate": 4.625043736636922e-06, "loss": 0.2905, "step": 8558 }, { "epoch": 0.54, "grad_norm": 1.9397299322249433, "learning_rate": 4.62402813574393e-06, "loss": 0.2906, "step": 8559 }, { "epoch": 0.54, "grad_norm": 3.4514322496734615, "learning_rate": 4.623012550450677e-06, "loss": 0.278, "step": 8560 }, { "epoch": 0.54, "grad_norm": 1.9483003155099303, "learning_rate": 4.621996980799305e-06, "loss": 0.2826, "step": 8561 }, { "epoch": 0.54, "grad_norm": 1.9815518749981667, "learning_rate": 4.620981426831948e-06, "loss": 0.287, "step": 8562 }, { "epoch": 0.54, "grad_norm": 2.2575559472488433, "learning_rate": 4.6199658885907425e-06, "loss": 0.293, "step": 8563 }, { "epoch": 0.54, "grad_norm": 4.102273372198595, "learning_rate": 4.61895036611783e-06, "loss": 0.2909, "step": 8564 }, { "epoch": 0.54, "grad_norm": 2.1884664624238077, "learning_rate": 4.617934859455344e-06, "loss": 0.2628, "step": 8565 }, { "epoch": 0.54, "grad_norm": 1.9237841388999217, "learning_rate": 4.6169193686454185e-06, "loss": 0.2933, "step": 8566 }, { "epoch": 0.54, "grad_norm": 1.712097895034254, "learning_rate": 4.61590389373019e-06, "loss": 0.2899, "step": 8567 }, { "epoch": 0.54, "grad_norm": 1.9398340986304743, "learning_rate": 4.61488843475179e-06, "loss": 0.2946, "step": 8568 }, { "epoch": 0.54, "grad_norm": 1.362201687229322, "learning_rate": 4.613872991752354e-06, "loss": 0.2809, "step": 8569 }, { "epoch": 0.54, "grad_norm": 1.9868417860586314, "learning_rate": 4.6128575647740095e-06, "loss": 0.2718, "step": 8570 }, { "epoch": 0.54, "grad_norm": 2.8410643381187213, "learning_rate": 4.611842153858896e-06, "loss": 0.2704, "step": 8571 }, { "epoch": 0.54, "grad_norm": 2.913388566325249, "learning_rate": 4.61082675904914e-06, "loss": 0.283, "step": 8572 }, { "epoch": 0.54, "grad_norm": 4.162397071141212, "learning_rate": 4.609811380386874e-06, "loss": 0.2895, "step": 8573 }, { "epoch": 0.54, "grad_norm": 1.719229953542822, "learning_rate": 4.608796017914226e-06, "loss": 0.2624, "step": 8574 }, { "epoch": 0.54, "grad_norm": 4.228916571775909, "learning_rate": 4.607780671673327e-06, "loss": 0.2829, "step": 8575 }, { "epoch": 0.54, "grad_norm": 1.7571849859213218, "learning_rate": 4.6067653417063035e-06, "loss": 0.3053, "step": 8576 }, { "epoch": 0.54, "grad_norm": 4.437977724296435, "learning_rate": 4.6057500280552854e-06, "loss": 0.2789, "step": 8577 }, { "epoch": 0.54, "grad_norm": 3.9557853163063963, "learning_rate": 4.604734730762396e-06, "loss": 0.2942, "step": 8578 }, { "epoch": 0.54, "grad_norm": 4.121689168314926, "learning_rate": 4.6037194498697675e-06, "loss": 0.2935, "step": 8579 }, { "epoch": 0.54, "grad_norm": 2.756789307169671, "learning_rate": 4.602704185419523e-06, "loss": 0.2775, "step": 8580 }, { "epoch": 0.54, "grad_norm": 4.070867974225797, "learning_rate": 4.601688937453787e-06, "loss": 0.2829, "step": 8581 }, { "epoch": 0.54, "grad_norm": 0.5464122002278814, "learning_rate": 4.600673706014684e-06, "loss": 0.4769, "step": 8582 }, { "epoch": 0.54, "grad_norm": 1.4424936918505127, "learning_rate": 4.59965849114434e-06, "loss": 0.3037, "step": 8583 }, { "epoch": 0.54, "grad_norm": 1.458360579085679, "learning_rate": 4.598643292884874e-06, "loss": 0.2863, "step": 8584 }, { "epoch": 0.54, "grad_norm": 1.6193533008497647, "learning_rate": 4.59762811127841e-06, "loss": 0.2804, "step": 8585 }, { "epoch": 0.54, "grad_norm": 1.4594848985223432, "learning_rate": 4.596612946367071e-06, "loss": 0.2713, "step": 8586 }, { "epoch": 0.54, "grad_norm": 1.6832068829104752, "learning_rate": 4.59559779819298e-06, "loss": 0.2624, "step": 8587 }, { "epoch": 0.54, "grad_norm": 1.8235682189158133, "learning_rate": 4.594582666798253e-06, "loss": 0.2949, "step": 8588 }, { "epoch": 0.54, "grad_norm": 3.7972655430515903, "learning_rate": 4.593567552225011e-06, "loss": 0.2872, "step": 8589 }, { "epoch": 0.54, "grad_norm": 3.1422914693986663, "learning_rate": 4.592552454515373e-06, "loss": 0.2717, "step": 8590 }, { "epoch": 0.54, "grad_norm": 2.070577279731365, "learning_rate": 4.5915373737114585e-06, "loss": 0.2867, "step": 8591 }, { "epoch": 0.54, "grad_norm": 2.1193049675765288, "learning_rate": 4.590522309855383e-06, "loss": 0.2759, "step": 8592 }, { "epoch": 0.54, "grad_norm": 2.2639417689430745, "learning_rate": 4.589507262989263e-06, "loss": 0.2853, "step": 8593 }, { "epoch": 0.54, "grad_norm": 1.916129817849262, "learning_rate": 4.5884922331552165e-06, "loss": 0.28, "step": 8594 }, { "epoch": 0.54, "grad_norm": 1.9396402468979705, "learning_rate": 4.587477220395359e-06, "loss": 0.2951, "step": 8595 }, { "epoch": 0.54, "grad_norm": 1.5468369879825565, "learning_rate": 4.586462224751805e-06, "loss": 0.2859, "step": 8596 }, { "epoch": 0.54, "grad_norm": 3.114272870505431, "learning_rate": 4.585447246266667e-06, "loss": 0.2912, "step": 8597 }, { "epoch": 0.54, "grad_norm": 2.880106452110881, "learning_rate": 4.584432284982058e-06, "loss": 0.2962, "step": 8598 }, { "epoch": 0.54, "grad_norm": 1.9331625466461235, "learning_rate": 4.583417340940094e-06, "loss": 0.2841, "step": 8599 }, { "epoch": 0.54, "grad_norm": 2.71786524563575, "learning_rate": 4.58240241418288e-06, "loss": 0.2853, "step": 8600 }, { "epoch": 0.54, "grad_norm": 2.1148639066727943, "learning_rate": 4.581387504752536e-06, "loss": 0.2777, "step": 8601 }, { "epoch": 0.54, "grad_norm": 2.0081180780315173, "learning_rate": 4.580372612691166e-06, "loss": 0.2582, "step": 8602 }, { "epoch": 0.54, "grad_norm": 6.70801635144531, "learning_rate": 4.579357738040882e-06, "loss": 0.2769, "step": 8603 }, { "epoch": 0.54, "grad_norm": 1.9732370667667531, "learning_rate": 4.5783428808437934e-06, "loss": 0.2713, "step": 8604 }, { "epoch": 0.54, "grad_norm": 3.900251136523736, "learning_rate": 4.577328041142008e-06, "loss": 0.2843, "step": 8605 }, { "epoch": 0.54, "grad_norm": 2.325128911545758, "learning_rate": 4.576313218977632e-06, "loss": 0.2766, "step": 8606 }, { "epoch": 0.54, "grad_norm": 4.347966589831407, "learning_rate": 4.575298414392771e-06, "loss": 0.2701, "step": 8607 }, { "epoch": 0.54, "grad_norm": 3.127965411152177, "learning_rate": 4.574283627429535e-06, "loss": 0.2671, "step": 8608 }, { "epoch": 0.54, "grad_norm": 1.9182318503140041, "learning_rate": 4.573268858130028e-06, "loss": 0.2719, "step": 8609 }, { "epoch": 0.54, "grad_norm": 2.347908355016946, "learning_rate": 4.5722541065363535e-06, "loss": 0.2667, "step": 8610 }, { "epoch": 0.54, "grad_norm": 1.8857029220855384, "learning_rate": 4.571239372690617e-06, "loss": 0.2738, "step": 8611 }, { "epoch": 0.54, "grad_norm": 1.7631583278078047, "learning_rate": 4.570224656634919e-06, "loss": 0.2803, "step": 8612 }, { "epoch": 0.54, "grad_norm": 1.9975087766146427, "learning_rate": 4.569209958411365e-06, "loss": 0.277, "step": 8613 }, { "epoch": 0.54, "grad_norm": 0.6197380296160668, "learning_rate": 4.568195278062053e-06, "loss": 0.4757, "step": 8614 }, { "epoch": 0.54, "grad_norm": 2.8655645199727724, "learning_rate": 4.567180615629086e-06, "loss": 0.3062, "step": 8615 }, { "epoch": 0.54, "grad_norm": 4.266578233175302, "learning_rate": 4.566165971154564e-06, "loss": 0.2906, "step": 8616 }, { "epoch": 0.54, "grad_norm": 1.562224386288989, "learning_rate": 4.565151344680588e-06, "loss": 0.2779, "step": 8617 }, { "epoch": 0.54, "grad_norm": 1.4590352152499, "learning_rate": 4.564136736249254e-06, "loss": 0.2596, "step": 8618 }, { "epoch": 0.54, "grad_norm": 1.6523264225530647, "learning_rate": 4.5631221459026624e-06, "loss": 0.2881, "step": 8619 }, { "epoch": 0.54, "grad_norm": 1.93516897016039, "learning_rate": 4.562107573682908e-06, "loss": 0.314, "step": 8620 }, { "epoch": 0.54, "grad_norm": 3.2328858142846673, "learning_rate": 4.561093019632089e-06, "loss": 0.2709, "step": 8621 }, { "epoch": 0.54, "grad_norm": 0.551435138906967, "learning_rate": 4.560078483792297e-06, "loss": 0.4583, "step": 8622 }, { "epoch": 0.54, "grad_norm": 2.183199839452602, "learning_rate": 4.5590639662056325e-06, "loss": 0.2828, "step": 8623 }, { "epoch": 0.54, "grad_norm": 1.9929203831223479, "learning_rate": 4.5580494669141865e-06, "loss": 0.2738, "step": 8624 }, { "epoch": 0.54, "grad_norm": 2.0960387672284257, "learning_rate": 4.557034985960054e-06, "loss": 0.3031, "step": 8625 }, { "epoch": 0.54, "grad_norm": 2.5282488495606374, "learning_rate": 4.556020523385326e-06, "loss": 0.2897, "step": 8626 }, { "epoch": 0.54, "grad_norm": 1.578240727402863, "learning_rate": 4.5550060792320965e-06, "loss": 0.2672, "step": 8627 }, { "epoch": 0.54, "grad_norm": 1.5666210547579098, "learning_rate": 4.5539916535424535e-06, "loss": 0.2606, "step": 8628 }, { "epoch": 0.54, "grad_norm": 3.456100286133945, "learning_rate": 4.552977246358491e-06, "loss": 0.2853, "step": 8629 }, { "epoch": 0.54, "grad_norm": 0.6108797312051854, "learning_rate": 4.551962857722293e-06, "loss": 0.4985, "step": 8630 }, { "epoch": 0.54, "grad_norm": 2.6290358899769215, "learning_rate": 4.5509484876759545e-06, "loss": 0.2777, "step": 8631 }, { "epoch": 0.54, "grad_norm": 1.7857611995949587, "learning_rate": 4.54993413626156e-06, "loss": 0.2962, "step": 8632 }, { "epoch": 0.54, "grad_norm": 1.5804072172844503, "learning_rate": 4.548919803521199e-06, "loss": 0.2651, "step": 8633 }, { "epoch": 0.54, "grad_norm": 1.814613053214455, "learning_rate": 4.547905489496955e-06, "loss": 0.2728, "step": 8634 }, { "epoch": 0.54, "grad_norm": 2.3240438288757073, "learning_rate": 4.546891194230917e-06, "loss": 0.265, "step": 8635 }, { "epoch": 0.54, "grad_norm": 2.843873010889801, "learning_rate": 4.545876917765166e-06, "loss": 0.2854, "step": 8636 }, { "epoch": 0.54, "grad_norm": 2.5127610501912625, "learning_rate": 4.544862660141788e-06, "loss": 0.3024, "step": 8637 }, { "epoch": 0.54, "grad_norm": 1.8378733164015084, "learning_rate": 4.543848421402868e-06, "loss": 0.2815, "step": 8638 }, { "epoch": 0.54, "grad_norm": 2.3257112494158623, "learning_rate": 4.542834201590488e-06, "loss": 0.3038, "step": 8639 }, { "epoch": 0.54, "grad_norm": 2.0907842299810633, "learning_rate": 4.541820000746727e-06, "loss": 0.2982, "step": 8640 }, { "epoch": 0.54, "grad_norm": 3.189431225467172, "learning_rate": 4.54080581891367e-06, "loss": 0.2771, "step": 8641 }, { "epoch": 0.54, "grad_norm": 2.114439143240404, "learning_rate": 4.5397916561333945e-06, "loss": 0.2768, "step": 8642 }, { "epoch": 0.54, "grad_norm": 2.3184204342694437, "learning_rate": 4.538777512447981e-06, "loss": 0.2953, "step": 8643 }, { "epoch": 0.54, "grad_norm": 2.8734650242845112, "learning_rate": 4.537763387899506e-06, "loss": 0.2931, "step": 8644 }, { "epoch": 0.54, "grad_norm": 1.7485818078402564, "learning_rate": 4.5367492825300495e-06, "loss": 0.2725, "step": 8645 }, { "epoch": 0.54, "grad_norm": 2.3865100544345346, "learning_rate": 4.535735196381688e-06, "loss": 0.2777, "step": 8646 }, { "epoch": 0.54, "grad_norm": 2.404794384400355, "learning_rate": 4.5347211294965e-06, "loss": 0.2811, "step": 8647 }, { "epoch": 0.54, "grad_norm": 1.6905924900884721, "learning_rate": 4.533707081916557e-06, "loss": 0.3042, "step": 8648 }, { "epoch": 0.54, "grad_norm": 2.273842475985826, "learning_rate": 4.532693053683937e-06, "loss": 0.2846, "step": 8649 }, { "epoch": 0.54, "grad_norm": 3.868565034561557, "learning_rate": 4.53167904484071e-06, "loss": 0.2742, "step": 8650 }, { "epoch": 0.54, "grad_norm": 2.248031544343279, "learning_rate": 4.530665055428953e-06, "loss": 0.2874, "step": 8651 }, { "epoch": 0.54, "grad_norm": 1.511332066634212, "learning_rate": 4.529651085490735e-06, "loss": 0.2816, "step": 8652 }, { "epoch": 0.54, "grad_norm": 2.139553668217882, "learning_rate": 4.528637135068128e-06, "loss": 0.2804, "step": 8653 }, { "epoch": 0.54, "grad_norm": 1.8447021782049204, "learning_rate": 4.527623204203205e-06, "loss": 0.2768, "step": 8654 }, { "epoch": 0.54, "grad_norm": 5.548620663837192, "learning_rate": 4.526609292938036e-06, "loss": 0.32, "step": 8655 }, { "epoch": 0.54, "grad_norm": 1.9007032332417215, "learning_rate": 4.525595401314686e-06, "loss": 0.2812, "step": 8656 }, { "epoch": 0.54, "grad_norm": 1.7562137069187282, "learning_rate": 4.524581529375227e-06, "loss": 0.2723, "step": 8657 }, { "epoch": 0.54, "grad_norm": 5.464324448956869, "learning_rate": 4.523567677161724e-06, "loss": 0.2802, "step": 8658 }, { "epoch": 0.54, "grad_norm": 1.9539615556465253, "learning_rate": 4.522553844716245e-06, "loss": 0.2672, "step": 8659 }, { "epoch": 0.54, "grad_norm": 1.6783096749471333, "learning_rate": 4.521540032080852e-06, "loss": 0.2769, "step": 8660 }, { "epoch": 0.54, "grad_norm": 5.525224386916247, "learning_rate": 4.520526239297615e-06, "loss": 0.2747, "step": 8661 }, { "epoch": 0.54, "grad_norm": 2.4201583037172347, "learning_rate": 4.519512466408596e-06, "loss": 0.2856, "step": 8662 }, { "epoch": 0.54, "grad_norm": 2.2608409999456804, "learning_rate": 4.5184987134558595e-06, "loss": 0.2971, "step": 8663 }, { "epoch": 0.54, "grad_norm": 1.8139226028308588, "learning_rate": 4.517484980481465e-06, "loss": 0.2911, "step": 8664 }, { "epoch": 0.54, "grad_norm": 1.8104233666573968, "learning_rate": 4.516471267527477e-06, "loss": 0.2688, "step": 8665 }, { "epoch": 0.54, "grad_norm": 2.159298667844816, "learning_rate": 4.515457574635954e-06, "loss": 0.2754, "step": 8666 }, { "epoch": 0.55, "grad_norm": 2.8085537988998825, "learning_rate": 4.514443901848955e-06, "loss": 0.2794, "step": 8667 }, { "epoch": 0.55, "grad_norm": 1.823481615533778, "learning_rate": 4.513430249208543e-06, "loss": 0.2566, "step": 8668 }, { "epoch": 0.55, "grad_norm": 2.183569794940634, "learning_rate": 4.512416616756775e-06, "loss": 0.2781, "step": 8669 }, { "epoch": 0.55, "grad_norm": 1.4572901475172415, "learning_rate": 4.511403004535707e-06, "loss": 0.2845, "step": 8670 }, { "epoch": 0.55, "grad_norm": 2.1257860807033913, "learning_rate": 4.510389412587396e-06, "loss": 0.2743, "step": 8671 }, { "epoch": 0.55, "grad_norm": 2.586562221477153, "learning_rate": 4.509375840953898e-06, "loss": 0.2644, "step": 8672 }, { "epoch": 0.55, "grad_norm": 1.8402364882793383, "learning_rate": 4.508362289677269e-06, "loss": 0.283, "step": 8673 }, { "epoch": 0.55, "grad_norm": 4.456371909412483, "learning_rate": 4.507348758799559e-06, "loss": 0.2768, "step": 8674 }, { "epoch": 0.55, "grad_norm": 1.8813355872225273, "learning_rate": 4.506335248362824e-06, "loss": 0.2696, "step": 8675 }, { "epoch": 0.55, "grad_norm": 3.8061554021897575, "learning_rate": 4.505321758409119e-06, "loss": 0.2823, "step": 8676 }, { "epoch": 0.55, "grad_norm": 1.3721196467245311, "learning_rate": 4.504308288980492e-06, "loss": 0.2671, "step": 8677 }, { "epoch": 0.55, "grad_norm": 2.2092868208521197, "learning_rate": 4.503294840118994e-06, "loss": 0.2712, "step": 8678 }, { "epoch": 0.55, "grad_norm": 1.8938703630067009, "learning_rate": 4.502281411866677e-06, "loss": 0.2878, "step": 8679 }, { "epoch": 0.55, "grad_norm": 3.299576991819874, "learning_rate": 4.501268004265586e-06, "loss": 0.2706, "step": 8680 }, { "epoch": 0.55, "grad_norm": 4.936327453854064, "learning_rate": 4.500254617357773e-06, "loss": 0.2829, "step": 8681 }, { "epoch": 0.55, "grad_norm": 2.0996755835709644, "learning_rate": 4.4992412511852816e-06, "loss": 0.3129, "step": 8682 }, { "epoch": 0.55, "grad_norm": 4.186719977759636, "learning_rate": 4.498227905790162e-06, "loss": 0.2874, "step": 8683 }, { "epoch": 0.55, "grad_norm": 2.1423388689125566, "learning_rate": 4.497214581214456e-06, "loss": 0.2692, "step": 8684 }, { "epoch": 0.55, "grad_norm": 1.7107853309303507, "learning_rate": 4.4962012775002125e-06, "loss": 0.2653, "step": 8685 }, { "epoch": 0.55, "grad_norm": 1.6882321677791206, "learning_rate": 4.495187994689471e-06, "loss": 0.2743, "step": 8686 }, { "epoch": 0.55, "grad_norm": 4.5269752474096725, "learning_rate": 4.494174732824279e-06, "loss": 0.2619, "step": 8687 }, { "epoch": 0.55, "grad_norm": 2.587270876376558, "learning_rate": 4.493161491946673e-06, "loss": 0.2748, "step": 8688 }, { "epoch": 0.55, "grad_norm": 1.4339995786329136, "learning_rate": 4.492148272098696e-06, "loss": 0.2663, "step": 8689 }, { "epoch": 0.55, "grad_norm": 3.0538978301953326, "learning_rate": 4.4911350733223915e-06, "loss": 0.2745, "step": 8690 }, { "epoch": 0.55, "grad_norm": 3.0367939560783195, "learning_rate": 4.490121895659796e-06, "loss": 0.2801, "step": 8691 }, { "epoch": 0.55, "grad_norm": 2.317636489632323, "learning_rate": 4.489108739152948e-06, "loss": 0.2782, "step": 8692 }, { "epoch": 0.55, "grad_norm": 5.0480680365327215, "learning_rate": 4.488095603843888e-06, "loss": 0.2802, "step": 8693 }, { "epoch": 0.55, "grad_norm": 1.730880327492681, "learning_rate": 4.487082489774649e-06, "loss": 0.3069, "step": 8694 }, { "epoch": 0.55, "grad_norm": 20.811687343272325, "learning_rate": 4.486069396987269e-06, "loss": 0.2839, "step": 8695 }, { "epoch": 0.55, "grad_norm": 1.6116323535684252, "learning_rate": 4.485056325523783e-06, "loss": 0.2679, "step": 8696 }, { "epoch": 0.55, "grad_norm": 2.489179353127172, "learning_rate": 4.484043275426223e-06, "loss": 0.286, "step": 8697 }, { "epoch": 0.55, "grad_norm": 1.955007266093121, "learning_rate": 4.483030246736625e-06, "loss": 0.2889, "step": 8698 }, { "epoch": 0.55, "grad_norm": 3.05642462748354, "learning_rate": 4.48201723949702e-06, "loss": 0.2564, "step": 8699 }, { "epoch": 0.55, "grad_norm": 2.624203864039519, "learning_rate": 4.4810042537494395e-06, "loss": 0.2807, "step": 8700 }, { "epoch": 0.55, "grad_norm": 2.792966610432871, "learning_rate": 4.479991289535916e-06, "loss": 0.2711, "step": 8701 }, { "epoch": 0.55, "grad_norm": 2.1525156680549467, "learning_rate": 4.4789783468984765e-06, "loss": 0.2988, "step": 8702 }, { "epoch": 0.55, "grad_norm": 19.965924416228063, "learning_rate": 4.477965425879152e-06, "loss": 0.2902, "step": 8703 }, { "epoch": 0.55, "grad_norm": 4.090974749486585, "learning_rate": 4.476952526519966e-06, "loss": 0.263, "step": 8704 }, { "epoch": 0.55, "grad_norm": 1.9306939845629139, "learning_rate": 4.47593964886295e-06, "loss": 0.2849, "step": 8705 }, { "epoch": 0.55, "grad_norm": 10.230335887825449, "learning_rate": 4.47492679295013e-06, "loss": 0.2798, "step": 8706 }, { "epoch": 0.55, "grad_norm": 1.8523561843967347, "learning_rate": 4.47391395882353e-06, "loss": 0.2971, "step": 8707 }, { "epoch": 0.55, "grad_norm": 2.903418956748041, "learning_rate": 4.472901146525174e-06, "loss": 0.2805, "step": 8708 }, { "epoch": 0.55, "grad_norm": 2.2876029265816054, "learning_rate": 4.471888356097086e-06, "loss": 0.2949, "step": 8709 }, { "epoch": 0.55, "grad_norm": 1.753283396738876, "learning_rate": 4.470875587581287e-06, "loss": 0.2638, "step": 8710 }, { "epoch": 0.55, "grad_norm": 6.397146123534283, "learning_rate": 4.4698628410198015e-06, "loss": 0.2911, "step": 8711 }, { "epoch": 0.55, "grad_norm": 1.6834451016165375, "learning_rate": 4.468850116454645e-06, "loss": 0.2852, "step": 8712 }, { "epoch": 0.55, "grad_norm": 1.5940769876826701, "learning_rate": 4.467837413927843e-06, "loss": 0.2776, "step": 8713 }, { "epoch": 0.55, "grad_norm": 2.068817197256334, "learning_rate": 4.466824733481411e-06, "loss": 0.284, "step": 8714 }, { "epoch": 0.55, "grad_norm": 2.733355617713016, "learning_rate": 4.46581207515737e-06, "loss": 0.2746, "step": 8715 }, { "epoch": 0.55, "grad_norm": 2.4566253155911313, "learning_rate": 4.464799438997733e-06, "loss": 0.3043, "step": 8716 }, { "epoch": 0.55, "grad_norm": 0.6146027774224536, "learning_rate": 4.463786825044519e-06, "loss": 0.5033, "step": 8717 }, { "epoch": 0.55, "grad_norm": 1.60328592098261, "learning_rate": 4.462774233339741e-06, "loss": 0.2617, "step": 8718 }, { "epoch": 0.55, "grad_norm": 1.744840156791795, "learning_rate": 4.461761663925413e-06, "loss": 0.2869, "step": 8719 }, { "epoch": 0.55, "grad_norm": 2.196057371140877, "learning_rate": 4.460749116843553e-06, "loss": 0.2602, "step": 8720 }, { "epoch": 0.55, "grad_norm": 3.9060712691560484, "learning_rate": 4.459736592136167e-06, "loss": 0.2886, "step": 8721 }, { "epoch": 0.55, "grad_norm": 1.8881644284223496, "learning_rate": 4.45872408984527e-06, "loss": 0.275, "step": 8722 }, { "epoch": 0.55, "grad_norm": 1.7432649745383522, "learning_rate": 4.457711610012873e-06, "loss": 0.2992, "step": 8723 }, { "epoch": 0.55, "grad_norm": 2.916697148134185, "learning_rate": 4.456699152680984e-06, "loss": 0.2965, "step": 8724 }, { "epoch": 0.55, "grad_norm": 0.5853925670611175, "learning_rate": 4.455686717891613e-06, "loss": 0.4762, "step": 8725 }, { "epoch": 0.55, "grad_norm": 1.7172402368895858, "learning_rate": 4.454674305686766e-06, "loss": 0.2704, "step": 8726 }, { "epoch": 0.55, "grad_norm": 1.559030240890836, "learning_rate": 4.45366191610845e-06, "loss": 0.2662, "step": 8727 }, { "epoch": 0.55, "grad_norm": 2.1223966740884235, "learning_rate": 4.452649549198672e-06, "loss": 0.2764, "step": 8728 }, { "epoch": 0.55, "grad_norm": 3.6184665383907286, "learning_rate": 4.451637204999436e-06, "loss": 0.277, "step": 8729 }, { "epoch": 0.55, "grad_norm": 2.869302526381741, "learning_rate": 4.450624883552747e-06, "loss": 0.3031, "step": 8730 }, { "epoch": 0.55, "grad_norm": 7.422092786800748, "learning_rate": 4.449612584900608e-06, "loss": 0.2922, "step": 8731 }, { "epoch": 0.55, "grad_norm": 1.5763176131167882, "learning_rate": 4.4486003090850195e-06, "loss": 0.2778, "step": 8732 }, { "epoch": 0.55, "grad_norm": 1.9535099900192123, "learning_rate": 4.447588056147985e-06, "loss": 0.2831, "step": 8733 }, { "epoch": 0.55, "grad_norm": 2.1141862189935123, "learning_rate": 4.4465758261315e-06, "loss": 0.2727, "step": 8734 }, { "epoch": 0.55, "grad_norm": 2.1301601391388307, "learning_rate": 4.445563619077569e-06, "loss": 0.2897, "step": 8735 }, { "epoch": 0.55, "grad_norm": 6.302629049170874, "learning_rate": 4.444551435028189e-06, "loss": 0.2682, "step": 8736 }, { "epoch": 0.55, "grad_norm": 2.269731032538597, "learning_rate": 4.443539274025356e-06, "loss": 0.2707, "step": 8737 }, { "epoch": 0.55, "grad_norm": 2.3597455198382655, "learning_rate": 4.442527136111067e-06, "loss": 0.2982, "step": 8738 }, { "epoch": 0.55, "grad_norm": 2.2546564141912344, "learning_rate": 4.441515021327317e-06, "loss": 0.2788, "step": 8739 }, { "epoch": 0.55, "grad_norm": 2.8816917240269984, "learning_rate": 4.4405029297161e-06, "loss": 0.297, "step": 8740 }, { "epoch": 0.55, "grad_norm": 4.307853514275158, "learning_rate": 4.439490861319413e-06, "loss": 0.2669, "step": 8741 }, { "epoch": 0.55, "grad_norm": 1.4396761296655378, "learning_rate": 4.4384788161792425e-06, "loss": 0.2729, "step": 8742 }, { "epoch": 0.55, "grad_norm": 3.150460184328523, "learning_rate": 4.437466794337584e-06, "loss": 0.2988, "step": 8743 }, { "epoch": 0.55, "grad_norm": 1.8233458776763678, "learning_rate": 4.4364547958364275e-06, "loss": 0.2809, "step": 8744 }, { "epoch": 0.55, "grad_norm": 3.339442174771447, "learning_rate": 4.4354428207177645e-06, "loss": 0.3095, "step": 8745 }, { "epoch": 0.55, "grad_norm": 3.346649406151598, "learning_rate": 4.434430869023579e-06, "loss": 0.2827, "step": 8746 }, { "epoch": 0.55, "grad_norm": 2.5648129658430627, "learning_rate": 4.433418940795863e-06, "loss": 0.2657, "step": 8747 }, { "epoch": 0.55, "grad_norm": 2.066333816262808, "learning_rate": 4.432407036076601e-06, "loss": 0.2871, "step": 8748 }, { "epoch": 0.55, "grad_norm": 3.1392619710472855, "learning_rate": 4.4313951549077775e-06, "loss": 0.2633, "step": 8749 }, { "epoch": 0.55, "grad_norm": 5.482440548432782, "learning_rate": 4.430383297331381e-06, "loss": 0.2965, "step": 8750 }, { "epoch": 0.55, "grad_norm": 2.566228056856757, "learning_rate": 4.429371463389393e-06, "loss": 0.2913, "step": 8751 }, { "epoch": 0.55, "grad_norm": 5.598308007124853, "learning_rate": 4.428359653123796e-06, "loss": 0.2866, "step": 8752 }, { "epoch": 0.55, "grad_norm": 2.1237908125520835, "learning_rate": 4.427347866576573e-06, "loss": 0.2841, "step": 8753 }, { "epoch": 0.55, "grad_norm": 1.917361363393779, "learning_rate": 4.4263361037897046e-06, "loss": 0.2552, "step": 8754 }, { "epoch": 0.55, "grad_norm": 3.9872444843215287, "learning_rate": 4.4253243648051705e-06, "loss": 0.2882, "step": 8755 }, { "epoch": 0.55, "grad_norm": 2.092550239934306, "learning_rate": 4.424312649664948e-06, "loss": 0.2674, "step": 8756 }, { "epoch": 0.55, "grad_norm": 1.619481056358797, "learning_rate": 4.423300958411014e-06, "loss": 0.2811, "step": 8757 }, { "epoch": 0.55, "grad_norm": 2.3155184754507414, "learning_rate": 4.422289291085351e-06, "loss": 0.2899, "step": 8758 }, { "epoch": 0.55, "grad_norm": 0.6327497334036932, "learning_rate": 4.42127764772993e-06, "loss": 0.4672, "step": 8759 }, { "epoch": 0.55, "grad_norm": 3.089554601225153, "learning_rate": 4.420266028386728e-06, "loss": 0.2847, "step": 8760 }, { "epoch": 0.55, "grad_norm": 2.346104769242135, "learning_rate": 4.419254433097719e-06, "loss": 0.2635, "step": 8761 }, { "epoch": 0.55, "grad_norm": 1.890118108497835, "learning_rate": 4.418242861904873e-06, "loss": 0.2842, "step": 8762 }, { "epoch": 0.55, "grad_norm": 4.0512350898967515, "learning_rate": 4.417231314850166e-06, "loss": 0.279, "step": 8763 }, { "epoch": 0.55, "grad_norm": 2.2090497839069054, "learning_rate": 4.416219791975565e-06, "loss": 0.2671, "step": 8764 }, { "epoch": 0.55, "grad_norm": 3.9640017675332144, "learning_rate": 4.415208293323042e-06, "loss": 0.3007, "step": 8765 }, { "epoch": 0.55, "grad_norm": 2.5219195963378986, "learning_rate": 4.414196818934567e-06, "loss": 0.2573, "step": 8766 }, { "epoch": 0.55, "grad_norm": 1.841370004026359, "learning_rate": 4.413185368852106e-06, "loss": 0.2699, "step": 8767 }, { "epoch": 0.55, "grad_norm": 2.8130023330384395, "learning_rate": 4.412173943117626e-06, "loss": 0.2773, "step": 8768 }, { "epoch": 0.55, "grad_norm": 2.7112533878540717, "learning_rate": 4.4111625417730935e-06, "loss": 0.2785, "step": 8769 }, { "epoch": 0.55, "grad_norm": 3.498945737985453, "learning_rate": 4.410151164860472e-06, "loss": 0.2918, "step": 8770 }, { "epoch": 0.55, "grad_norm": 12.3917418633594, "learning_rate": 4.409139812421726e-06, "loss": 0.2571, "step": 8771 }, { "epoch": 0.55, "grad_norm": 3.9056012879771713, "learning_rate": 4.408128484498821e-06, "loss": 0.3047, "step": 8772 }, { "epoch": 0.55, "grad_norm": 3.110824954788999, "learning_rate": 4.407117181133715e-06, "loss": 0.2934, "step": 8773 }, { "epoch": 0.55, "grad_norm": 4.58552081856733, "learning_rate": 4.406105902368371e-06, "loss": 0.2676, "step": 8774 }, { "epoch": 0.55, "grad_norm": 1.6280054243402897, "learning_rate": 4.405094648244747e-06, "loss": 0.2647, "step": 8775 }, { "epoch": 0.55, "grad_norm": 1.9021394426162868, "learning_rate": 4.404083418804803e-06, "loss": 0.2641, "step": 8776 }, { "epoch": 0.55, "grad_norm": 2.3452257948415998, "learning_rate": 4.403072214090498e-06, "loss": 0.2935, "step": 8777 }, { "epoch": 0.55, "grad_norm": 4.024300497890209, "learning_rate": 4.402061034143785e-06, "loss": 0.2634, "step": 8778 }, { "epoch": 0.55, "grad_norm": 3.0979430973519455, "learning_rate": 4.401049879006621e-06, "loss": 0.2896, "step": 8779 }, { "epoch": 0.55, "grad_norm": 3.0786174457969815, "learning_rate": 4.400038748720964e-06, "loss": 0.2682, "step": 8780 }, { "epoch": 0.55, "grad_norm": 5.778758056238694, "learning_rate": 4.399027643328764e-06, "loss": 0.2667, "step": 8781 }, { "epoch": 0.55, "grad_norm": 7.407984165891581, "learning_rate": 4.3980165628719745e-06, "loss": 0.3092, "step": 8782 }, { "epoch": 0.55, "grad_norm": 1.9337897479889632, "learning_rate": 4.397005507392547e-06, "loss": 0.2704, "step": 8783 }, { "epoch": 0.55, "grad_norm": 2.0966101736019462, "learning_rate": 4.3959944769324314e-06, "loss": 0.2716, "step": 8784 }, { "epoch": 0.55, "grad_norm": 1.5932500690387283, "learning_rate": 4.394983471533579e-06, "loss": 0.2712, "step": 8785 }, { "epoch": 0.55, "grad_norm": 1.7693085950654157, "learning_rate": 4.393972491237935e-06, "loss": 0.2749, "step": 8786 }, { "epoch": 0.55, "grad_norm": 2.6338923319208116, "learning_rate": 4.39296153608745e-06, "loss": 0.2894, "step": 8787 }, { "epoch": 0.55, "grad_norm": 3.283903438910121, "learning_rate": 4.39195060612407e-06, "loss": 0.2868, "step": 8788 }, { "epoch": 0.55, "grad_norm": 2.030213942018656, "learning_rate": 4.390939701389738e-06, "loss": 0.2699, "step": 8789 }, { "epoch": 0.55, "grad_norm": 2.5366393441986386, "learning_rate": 4.3899288219264e-06, "loss": 0.2844, "step": 8790 }, { "epoch": 0.55, "grad_norm": 3.4356533143359775, "learning_rate": 4.388917967776e-06, "loss": 0.2797, "step": 8791 }, { "epoch": 0.55, "grad_norm": 2.782377717338023, "learning_rate": 4.387907138980477e-06, "loss": 0.287, "step": 8792 }, { "epoch": 0.55, "grad_norm": 2.123170864027156, "learning_rate": 4.386896335581776e-06, "loss": 0.2809, "step": 8793 }, { "epoch": 0.55, "grad_norm": 2.6215816364133904, "learning_rate": 4.385885557621833e-06, "loss": 0.2708, "step": 8794 }, { "epoch": 0.55, "grad_norm": 3.036613786061605, "learning_rate": 4.38487480514259e-06, "loss": 0.2908, "step": 8795 }, { "epoch": 0.55, "grad_norm": 1.3803151073275663, "learning_rate": 4.383864078185984e-06, "loss": 0.2701, "step": 8796 }, { "epoch": 0.55, "grad_norm": 2.813758549658407, "learning_rate": 4.382853376793951e-06, "loss": 0.2751, "step": 8797 }, { "epoch": 0.55, "grad_norm": 2.309428422198222, "learning_rate": 4.381842701008428e-06, "loss": 0.2689, "step": 8798 }, { "epoch": 0.55, "grad_norm": 2.4856878017942488, "learning_rate": 4.38083205087135e-06, "loss": 0.2734, "step": 8799 }, { "epoch": 0.55, "grad_norm": 2.587352160696565, "learning_rate": 4.379821426424649e-06, "loss": 0.2836, "step": 8800 }, { "epoch": 0.55, "grad_norm": 1.6788180503058727, "learning_rate": 4.378810827710258e-06, "loss": 0.2842, "step": 8801 }, { "epoch": 0.55, "grad_norm": 1.7877170572081649, "learning_rate": 4.3778002547701115e-06, "loss": 0.2987, "step": 8802 }, { "epoch": 0.55, "grad_norm": 1.8127992907502553, "learning_rate": 4.376789707646136e-06, "loss": 0.2597, "step": 8803 }, { "epoch": 0.55, "grad_norm": 1.6606962822774591, "learning_rate": 4.375779186380264e-06, "loss": 0.2674, "step": 8804 }, { "epoch": 0.55, "grad_norm": 1.6529512964717805, "learning_rate": 4.37476869101442e-06, "loss": 0.2755, "step": 8805 }, { "epoch": 0.55, "grad_norm": 2.1804116028790723, "learning_rate": 4.373758221590534e-06, "loss": 0.2697, "step": 8806 }, { "epoch": 0.55, "grad_norm": 0.659638487284497, "learning_rate": 4.372747778150534e-06, "loss": 0.4995, "step": 8807 }, { "epoch": 0.55, "grad_norm": 2.5290014777933707, "learning_rate": 4.37173736073634e-06, "loss": 0.303, "step": 8808 }, { "epoch": 0.55, "grad_norm": 7.305633194643116, "learning_rate": 4.3707269693898785e-06, "loss": 0.2826, "step": 8809 }, { "epoch": 0.55, "grad_norm": 2.0251981831796755, "learning_rate": 4.3697166041530745e-06, "loss": 0.3124, "step": 8810 }, { "epoch": 0.55, "grad_norm": 5.292296163971953, "learning_rate": 4.368706265067848e-06, "loss": 0.296, "step": 8811 }, { "epoch": 0.55, "grad_norm": 2.16034555572653, "learning_rate": 4.3676959521761204e-06, "loss": 0.3045, "step": 8812 }, { "epoch": 0.55, "grad_norm": 3.2779825250555747, "learning_rate": 4.36668566551981e-06, "loss": 0.2639, "step": 8813 }, { "epoch": 0.55, "grad_norm": 3.272932253024524, "learning_rate": 4.365675405140835e-06, "loss": 0.2909, "step": 8814 }, { "epoch": 0.55, "grad_norm": 2.0181629381687625, "learning_rate": 4.364665171081117e-06, "loss": 0.319, "step": 8815 }, { "epoch": 0.55, "grad_norm": 1.6001049146500168, "learning_rate": 4.363654963382566e-06, "loss": 0.2653, "step": 8816 }, { "epoch": 0.55, "grad_norm": 4.862646015552912, "learning_rate": 4.362644782087103e-06, "loss": 0.2967, "step": 8817 }, { "epoch": 0.55, "grad_norm": 1.7613298739018228, "learning_rate": 4.361634627236641e-06, "loss": 0.2885, "step": 8818 }, { "epoch": 0.55, "grad_norm": 3.3554023763187777, "learning_rate": 4.36062449887309e-06, "loss": 0.2969, "step": 8819 }, { "epoch": 0.55, "grad_norm": 2.3492156090735654, "learning_rate": 4.3596143970383665e-06, "loss": 0.2937, "step": 8820 }, { "epoch": 0.55, "grad_norm": 2.0430566626109665, "learning_rate": 4.358604321774378e-06, "loss": 0.2853, "step": 8821 }, { "epoch": 0.55, "grad_norm": 4.331127963768257, "learning_rate": 4.357594273123034e-06, "loss": 0.2727, "step": 8822 }, { "epoch": 0.55, "grad_norm": 2.7288435665347155, "learning_rate": 4.356584251126247e-06, "loss": 0.2853, "step": 8823 }, { "epoch": 0.55, "grad_norm": 3.4801098710765923, "learning_rate": 4.35557425582592e-06, "loss": 0.2593, "step": 8824 }, { "epoch": 0.55, "grad_norm": 1.534546151544299, "learning_rate": 4.354564287263963e-06, "loss": 0.289, "step": 8825 }, { "epoch": 0.56, "grad_norm": 2.500854742667218, "learning_rate": 4.353554345482281e-06, "loss": 0.3003, "step": 8826 }, { "epoch": 0.56, "grad_norm": 2.0811475203848486, "learning_rate": 4.352544430522776e-06, "loss": 0.2637, "step": 8827 }, { "epoch": 0.56, "grad_norm": 1.736678915037675, "learning_rate": 4.3515345424273534e-06, "loss": 0.2727, "step": 8828 }, { "epoch": 0.56, "grad_norm": 2.779011418387543, "learning_rate": 4.350524681237914e-06, "loss": 0.2915, "step": 8829 }, { "epoch": 0.56, "grad_norm": 2.3772966415982157, "learning_rate": 4.349514846996359e-06, "loss": 0.3121, "step": 8830 }, { "epoch": 0.56, "grad_norm": 2.9069514685270286, "learning_rate": 4.348505039744588e-06, "loss": 0.264, "step": 8831 }, { "epoch": 0.56, "grad_norm": 1.8079596990685507, "learning_rate": 4.347495259524502e-06, "loss": 0.2663, "step": 8832 }, { "epoch": 0.56, "grad_norm": 3.0552600442669373, "learning_rate": 4.346485506377995e-06, "loss": 0.29, "step": 8833 }, { "epoch": 0.56, "grad_norm": 4.443785518207296, "learning_rate": 4.345475780346966e-06, "loss": 0.2826, "step": 8834 }, { "epoch": 0.56, "grad_norm": 1.4957452462973535, "learning_rate": 4.344466081473308e-06, "loss": 0.2745, "step": 8835 }, { "epoch": 0.56, "grad_norm": 2.79804967424767, "learning_rate": 4.343456409798919e-06, "loss": 0.2953, "step": 8836 }, { "epoch": 0.56, "grad_norm": 2.847971924603, "learning_rate": 4.342446765365688e-06, "loss": 0.2739, "step": 8837 }, { "epoch": 0.56, "grad_norm": 2.6603864587335897, "learning_rate": 4.341437148215509e-06, "loss": 0.2813, "step": 8838 }, { "epoch": 0.56, "grad_norm": 1.9345727328991653, "learning_rate": 4.3404275583902715e-06, "loss": 0.2838, "step": 8839 }, { "epoch": 0.56, "grad_norm": 1.8273485211933544, "learning_rate": 4.339417995931868e-06, "loss": 0.2805, "step": 8840 }, { "epoch": 0.56, "grad_norm": 1.9221728253766586, "learning_rate": 4.3384084608821836e-06, "loss": 0.2709, "step": 8841 }, { "epoch": 0.56, "grad_norm": 2.3001131084931226, "learning_rate": 4.337398953283108e-06, "loss": 0.2908, "step": 8842 }, { "epoch": 0.56, "grad_norm": 5.327751492623303, "learning_rate": 4.3363894731765275e-06, "loss": 0.2832, "step": 8843 }, { "epoch": 0.56, "grad_norm": 2.286516408866927, "learning_rate": 4.335380020604325e-06, "loss": 0.2774, "step": 8844 }, { "epoch": 0.56, "grad_norm": 3.4334218433812853, "learning_rate": 4.334370595608388e-06, "loss": 0.2909, "step": 8845 }, { "epoch": 0.56, "grad_norm": 2.558465768562449, "learning_rate": 4.333361198230594e-06, "loss": 0.2876, "step": 8846 }, { "epoch": 0.56, "grad_norm": 3.5918837871139515, "learning_rate": 4.33235182851283e-06, "loss": 0.2767, "step": 8847 }, { "epoch": 0.56, "grad_norm": 1.7756730051623848, "learning_rate": 4.331342486496974e-06, "loss": 0.2787, "step": 8848 }, { "epoch": 0.56, "grad_norm": 4.72648946975082, "learning_rate": 4.330333172224906e-06, "loss": 0.2697, "step": 8849 }, { "epoch": 0.56, "grad_norm": 1.9080727141380536, "learning_rate": 4.3293238857385055e-06, "loss": 0.2665, "step": 8850 }, { "epoch": 0.56, "grad_norm": 1.5495870138713592, "learning_rate": 4.328314627079647e-06, "loss": 0.2699, "step": 8851 }, { "epoch": 0.56, "grad_norm": 2.228494027164655, "learning_rate": 4.327305396290208e-06, "loss": 0.2666, "step": 8852 }, { "epoch": 0.56, "grad_norm": 4.40738067116132, "learning_rate": 4.326296193412061e-06, "loss": 0.2687, "step": 8853 }, { "epoch": 0.56, "grad_norm": 3.4099924326658404, "learning_rate": 4.325287018487085e-06, "loss": 0.2666, "step": 8854 }, { "epoch": 0.56, "grad_norm": 2.716089079601977, "learning_rate": 4.324277871557146e-06, "loss": 0.2682, "step": 8855 }, { "epoch": 0.56, "grad_norm": 2.406387412827769, "learning_rate": 4.323268752664121e-06, "loss": 0.2712, "step": 8856 }, { "epoch": 0.56, "grad_norm": 2.047449486050684, "learning_rate": 4.322259661849876e-06, "loss": 0.263, "step": 8857 }, { "epoch": 0.56, "grad_norm": 8.474439262709888, "learning_rate": 4.321250599156282e-06, "loss": 0.2917, "step": 8858 }, { "epoch": 0.56, "grad_norm": 2.1979414166924056, "learning_rate": 4.320241564625206e-06, "loss": 0.2574, "step": 8859 }, { "epoch": 0.56, "grad_norm": 1.9528934305107515, "learning_rate": 4.319232558298514e-06, "loss": 0.2685, "step": 8860 }, { "epoch": 0.56, "grad_norm": 4.164371315133676, "learning_rate": 4.318223580218072e-06, "loss": 0.298, "step": 8861 }, { "epoch": 0.56, "grad_norm": 2.1918782637115277, "learning_rate": 4.3172146304257466e-06, "loss": 0.2712, "step": 8862 }, { "epoch": 0.56, "grad_norm": 5.402434207344038, "learning_rate": 4.316205708963398e-06, "loss": 0.2665, "step": 8863 }, { "epoch": 0.56, "grad_norm": 1.443865349213514, "learning_rate": 4.315196815872889e-06, "loss": 0.2689, "step": 8864 }, { "epoch": 0.56, "grad_norm": 2.0935529911608937, "learning_rate": 4.314187951196081e-06, "loss": 0.2977, "step": 8865 }, { "epoch": 0.56, "grad_norm": 2.4900474685702436, "learning_rate": 4.313179114974832e-06, "loss": 0.2671, "step": 8866 }, { "epoch": 0.56, "grad_norm": 1.5291984547588007, "learning_rate": 4.312170307251003e-06, "loss": 0.2703, "step": 8867 }, { "epoch": 0.56, "grad_norm": 2.4134436391843623, "learning_rate": 4.311161528066447e-06, "loss": 0.2716, "step": 8868 }, { "epoch": 0.56, "grad_norm": 2.422658943864349, "learning_rate": 4.310152777463025e-06, "loss": 0.2814, "step": 8869 }, { "epoch": 0.56, "grad_norm": 1.5609013701217902, "learning_rate": 4.309144055482589e-06, "loss": 0.273, "step": 8870 }, { "epoch": 0.56, "grad_norm": 3.8882123327606752, "learning_rate": 4.308135362166993e-06, "loss": 0.2802, "step": 8871 }, { "epoch": 0.56, "grad_norm": 1.5452605078974324, "learning_rate": 4.307126697558091e-06, "loss": 0.2589, "step": 8872 }, { "epoch": 0.56, "grad_norm": 2.0462801740054424, "learning_rate": 4.306118061697732e-06, "loss": 0.2643, "step": 8873 }, { "epoch": 0.56, "grad_norm": 1.617369301677123, "learning_rate": 4.305109454627766e-06, "loss": 0.2598, "step": 8874 }, { "epoch": 0.56, "grad_norm": 3.2837312884593506, "learning_rate": 4.304100876390045e-06, "loss": 0.2671, "step": 8875 }, { "epoch": 0.56, "grad_norm": 2.102618332814836, "learning_rate": 4.303092327026412e-06, "loss": 0.2781, "step": 8876 }, { "epoch": 0.56, "grad_norm": 5.642870964127251, "learning_rate": 4.302083806578718e-06, "loss": 0.2976, "step": 8877 }, { "epoch": 0.56, "grad_norm": 3.2466194788050067, "learning_rate": 4.301075315088808e-06, "loss": 0.2836, "step": 8878 }, { "epoch": 0.56, "grad_norm": 1.6094421454054106, "learning_rate": 4.300066852598522e-06, "loss": 0.2735, "step": 8879 }, { "epoch": 0.56, "grad_norm": 2.7641210259148083, "learning_rate": 4.299058419149707e-06, "loss": 0.304, "step": 8880 }, { "epoch": 0.56, "grad_norm": 2.497019961234568, "learning_rate": 4.298050014784203e-06, "loss": 0.2897, "step": 8881 }, { "epoch": 0.56, "grad_norm": 2.2130726264740694, "learning_rate": 4.29704163954385e-06, "loss": 0.2581, "step": 8882 }, { "epoch": 0.56, "grad_norm": 1.7827414494481095, "learning_rate": 4.296033293470487e-06, "loss": 0.2575, "step": 8883 }, { "epoch": 0.56, "grad_norm": 1.5952871810474307, "learning_rate": 4.295024976605955e-06, "loss": 0.2678, "step": 8884 }, { "epoch": 0.56, "grad_norm": 5.048186536648519, "learning_rate": 4.2940166889920885e-06, "loss": 0.305, "step": 8885 }, { "epoch": 0.56, "grad_norm": 2.468594996183448, "learning_rate": 4.293008430670724e-06, "loss": 0.2753, "step": 8886 }, { "epoch": 0.56, "grad_norm": 2.04924145427867, "learning_rate": 4.2920002016836944e-06, "loss": 0.2693, "step": 8887 }, { "epoch": 0.56, "grad_norm": 1.8709365030415537, "learning_rate": 4.290992002072836e-06, "loss": 0.2706, "step": 8888 }, { "epoch": 0.56, "grad_norm": 1.874629366645945, "learning_rate": 4.2899838318799765e-06, "loss": 0.2766, "step": 8889 }, { "epoch": 0.56, "grad_norm": 1.879841030053625, "learning_rate": 4.288975691146949e-06, "loss": 0.271, "step": 8890 }, { "epoch": 0.56, "grad_norm": 2.1328135006619013, "learning_rate": 4.287967579915583e-06, "loss": 0.2801, "step": 8891 }, { "epoch": 0.56, "grad_norm": 1.8558413617193286, "learning_rate": 4.286959498227708e-06, "loss": 0.2657, "step": 8892 }, { "epoch": 0.56, "grad_norm": 3.7925751896906728, "learning_rate": 4.285951446125149e-06, "loss": 0.26, "step": 8893 }, { "epoch": 0.56, "grad_norm": 2.128898122164901, "learning_rate": 4.284943423649734e-06, "loss": 0.3001, "step": 8894 }, { "epoch": 0.56, "grad_norm": 2.1364360496462225, "learning_rate": 4.283935430843285e-06, "loss": 0.2593, "step": 8895 }, { "epoch": 0.56, "grad_norm": 1.7862988839976337, "learning_rate": 4.2829274677476284e-06, "loss": 0.261, "step": 8896 }, { "epoch": 0.56, "grad_norm": 2.728330977776241, "learning_rate": 4.281919534404584e-06, "loss": 0.2665, "step": 8897 }, { "epoch": 0.56, "grad_norm": 1.9728779970840908, "learning_rate": 4.280911630855972e-06, "loss": 0.2592, "step": 8898 }, { "epoch": 0.56, "grad_norm": 2.7561037680395684, "learning_rate": 4.279903757143615e-06, "loss": 0.2888, "step": 8899 }, { "epoch": 0.56, "grad_norm": 1.9675289154909317, "learning_rate": 4.278895913309331e-06, "loss": 0.2886, "step": 8900 }, { "epoch": 0.56, "grad_norm": 2.267841875167877, "learning_rate": 4.277888099394935e-06, "loss": 0.2786, "step": 8901 }, { "epoch": 0.56, "grad_norm": 5.322292873054597, "learning_rate": 4.276880315442246e-06, "loss": 0.2762, "step": 8902 }, { "epoch": 0.56, "grad_norm": 2.0458923668148548, "learning_rate": 4.275872561493076e-06, "loss": 0.2754, "step": 8903 }, { "epoch": 0.56, "grad_norm": 3.526948251766585, "learning_rate": 4.274864837589241e-06, "loss": 0.2759, "step": 8904 }, { "epoch": 0.56, "grad_norm": 3.3858840883045325, "learning_rate": 4.27385714377255e-06, "loss": 0.2814, "step": 8905 }, { "epoch": 0.56, "grad_norm": 7.315365768078168, "learning_rate": 4.272849480084815e-06, "loss": 0.2815, "step": 8906 }, { "epoch": 0.56, "grad_norm": 4.38894179386223, "learning_rate": 4.271841846567848e-06, "loss": 0.2845, "step": 8907 }, { "epoch": 0.56, "grad_norm": 2.2669536190395756, "learning_rate": 4.270834243263457e-06, "loss": 0.2651, "step": 8908 }, { "epoch": 0.56, "grad_norm": 6.1972565635394075, "learning_rate": 4.269826670213448e-06, "loss": 0.2761, "step": 8909 }, { "epoch": 0.56, "grad_norm": 1.9157004083104388, "learning_rate": 4.268819127459628e-06, "loss": 0.2791, "step": 8910 }, { "epoch": 0.56, "grad_norm": 2.1022674094391616, "learning_rate": 4.2678116150437996e-06, "loss": 0.2913, "step": 8911 }, { "epoch": 0.56, "grad_norm": 1.9327625254882868, "learning_rate": 4.266804133007768e-06, "loss": 0.2667, "step": 8912 }, { "epoch": 0.56, "grad_norm": 1.9456884074858392, "learning_rate": 4.265796681393334e-06, "loss": 0.2734, "step": 8913 }, { "epoch": 0.56, "grad_norm": 4.111866457250431, "learning_rate": 4.264789260242302e-06, "loss": 0.2823, "step": 8914 }, { "epoch": 0.56, "grad_norm": 1.7224210336776686, "learning_rate": 4.2637818695964695e-06, "loss": 0.2656, "step": 8915 }, { "epoch": 0.56, "grad_norm": 2.3786303189828897, "learning_rate": 4.262774509497635e-06, "loss": 0.2583, "step": 8916 }, { "epoch": 0.56, "grad_norm": 0.6003560851926935, "learning_rate": 4.261767179987595e-06, "loss": 0.4711, "step": 8917 }, { "epoch": 0.56, "grad_norm": 2.0008897705973374, "learning_rate": 4.260759881108146e-06, "loss": 0.2676, "step": 8918 }, { "epoch": 0.56, "grad_norm": 1.5084236603405603, "learning_rate": 4.259752612901082e-06, "loss": 0.2782, "step": 8919 }, { "epoch": 0.56, "grad_norm": 2.2484175022206485, "learning_rate": 4.258745375408197e-06, "loss": 0.2773, "step": 8920 }, { "epoch": 0.56, "grad_norm": 1.9786812173023716, "learning_rate": 4.257738168671282e-06, "loss": 0.2699, "step": 8921 }, { "epoch": 0.56, "grad_norm": 0.5888677555861132, "learning_rate": 4.256730992732131e-06, "loss": 0.4822, "step": 8922 }, { "epoch": 0.56, "grad_norm": 2.9832092959576713, "learning_rate": 4.25572384763253e-06, "loss": 0.267, "step": 8923 }, { "epoch": 0.56, "grad_norm": 1.6721588709757083, "learning_rate": 4.25471673341427e-06, "loss": 0.277, "step": 8924 }, { "epoch": 0.56, "grad_norm": 2.28239510560659, "learning_rate": 4.253709650119136e-06, "loss": 0.286, "step": 8925 }, { "epoch": 0.56, "grad_norm": 2.500595624838231, "learning_rate": 4.252702597788914e-06, "loss": 0.2811, "step": 8926 }, { "epoch": 0.56, "grad_norm": 3.236622128288961, "learning_rate": 4.251695576465387e-06, "loss": 0.2617, "step": 8927 }, { "epoch": 0.56, "grad_norm": 2.2044057139659348, "learning_rate": 4.25068858619034e-06, "loss": 0.2667, "step": 8928 }, { "epoch": 0.56, "grad_norm": 10.69057199713099, "learning_rate": 4.249681627005555e-06, "loss": 0.2886, "step": 8929 }, { "epoch": 0.56, "grad_norm": 2.8128634196629156, "learning_rate": 4.248674698952813e-06, "loss": 0.2816, "step": 8930 }, { "epoch": 0.56, "grad_norm": 4.9092676289630655, "learning_rate": 4.247667802073892e-06, "loss": 0.271, "step": 8931 }, { "epoch": 0.56, "grad_norm": 1.7101813743100585, "learning_rate": 4.2466609364105705e-06, "loss": 0.2688, "step": 8932 }, { "epoch": 0.56, "grad_norm": 2.0294173890705043, "learning_rate": 4.2456541020046235e-06, "loss": 0.2712, "step": 8933 }, { "epoch": 0.56, "grad_norm": 2.3269548514176295, "learning_rate": 4.24464729889783e-06, "loss": 0.2714, "step": 8934 }, { "epoch": 0.56, "grad_norm": 3.065650619612148, "learning_rate": 4.243640527131956e-06, "loss": 0.2695, "step": 8935 }, { "epoch": 0.56, "grad_norm": 2.756013136161544, "learning_rate": 4.242633786748786e-06, "loss": 0.2737, "step": 8936 }, { "epoch": 0.56, "grad_norm": 1.9746356836729337, "learning_rate": 4.241627077790083e-06, "loss": 0.2925, "step": 8937 }, { "epoch": 0.56, "grad_norm": 2.188654632913122, "learning_rate": 4.24062040029762e-06, "loss": 0.288, "step": 8938 }, { "epoch": 0.56, "grad_norm": 2.3826716843561906, "learning_rate": 4.239613754313166e-06, "loss": 0.2909, "step": 8939 }, { "epoch": 0.56, "grad_norm": 2.8464110193219216, "learning_rate": 4.238607139878488e-06, "loss": 0.2727, "step": 8940 }, { "epoch": 0.56, "grad_norm": 1.8897168708999552, "learning_rate": 4.237600557035351e-06, "loss": 0.2829, "step": 8941 }, { "epoch": 0.56, "grad_norm": 5.811244011148771, "learning_rate": 4.236594005825522e-06, "loss": 0.2942, "step": 8942 }, { "epoch": 0.56, "grad_norm": 2.3708395973271745, "learning_rate": 4.235587486290761e-06, "loss": 0.2816, "step": 8943 }, { "epoch": 0.56, "grad_norm": 1.848707589575137, "learning_rate": 4.234580998472836e-06, "loss": 0.2735, "step": 8944 }, { "epoch": 0.56, "grad_norm": 2.0818197647066383, "learning_rate": 4.2335745424135035e-06, "loss": 0.2895, "step": 8945 }, { "epoch": 0.56, "grad_norm": 1.8105190386405015, "learning_rate": 4.232568118154526e-06, "loss": 0.2802, "step": 8946 }, { "epoch": 0.56, "grad_norm": 2.6744721580363158, "learning_rate": 4.231561725737659e-06, "loss": 0.303, "step": 8947 }, { "epoch": 0.56, "grad_norm": 2.7062637016033686, "learning_rate": 4.230555365204662e-06, "loss": 0.2674, "step": 8948 }, { "epoch": 0.56, "grad_norm": 2.014579609195482, "learning_rate": 4.229549036597289e-06, "loss": 0.2693, "step": 8949 }, { "epoch": 0.56, "grad_norm": 1.8115931725341627, "learning_rate": 4.228542739957293e-06, "loss": 0.2751, "step": 8950 }, { "epoch": 0.56, "grad_norm": 3.1619926271356884, "learning_rate": 4.227536475326431e-06, "loss": 0.3196, "step": 8951 }, { "epoch": 0.56, "grad_norm": 4.148882830322034, "learning_rate": 4.226530242746454e-06, "loss": 0.2791, "step": 8952 }, { "epoch": 0.56, "grad_norm": 2.0251760402930987, "learning_rate": 4.2255240422591084e-06, "loss": 0.2858, "step": 8953 }, { "epoch": 0.56, "grad_norm": 2.6610577453796638, "learning_rate": 4.224517873906149e-06, "loss": 0.2654, "step": 8954 }, { "epoch": 0.56, "grad_norm": 2.2941427565915737, "learning_rate": 4.223511737729317e-06, "loss": 0.2887, "step": 8955 }, { "epoch": 0.56, "grad_norm": 2.6778714793567895, "learning_rate": 4.222505633770365e-06, "loss": 0.3064, "step": 8956 }, { "epoch": 0.56, "grad_norm": 2.4397769365284927, "learning_rate": 4.221499562071034e-06, "loss": 0.2769, "step": 8957 }, { "epoch": 0.56, "grad_norm": 2.5081654100224524, "learning_rate": 4.220493522673067e-06, "loss": 0.2824, "step": 8958 }, { "epoch": 0.56, "grad_norm": 1.6314409939815784, "learning_rate": 4.219487515618211e-06, "loss": 0.28, "step": 8959 }, { "epoch": 0.56, "grad_norm": 5.368558570660478, "learning_rate": 4.218481540948204e-06, "loss": 0.2683, "step": 8960 }, { "epoch": 0.56, "grad_norm": 2.9065182706097263, "learning_rate": 4.217475598704785e-06, "loss": 0.2981, "step": 8961 }, { "epoch": 0.56, "grad_norm": 2.1749750023851813, "learning_rate": 4.216469688929695e-06, "loss": 0.2622, "step": 8962 }, { "epoch": 0.56, "grad_norm": 2.770772405111811, "learning_rate": 4.215463811664668e-06, "loss": 0.3086, "step": 8963 }, { "epoch": 0.56, "grad_norm": 2.412456722464413, "learning_rate": 4.214457966951442e-06, "loss": 0.2791, "step": 8964 }, { "epoch": 0.56, "grad_norm": 1.9763561165106647, "learning_rate": 4.213452154831747e-06, "loss": 0.256, "step": 8965 }, { "epoch": 0.56, "grad_norm": 2.39744250713294, "learning_rate": 4.212446375347322e-06, "loss": 0.2825, "step": 8966 }, { "epoch": 0.56, "grad_norm": 2.4996357824472555, "learning_rate": 4.211440628539896e-06, "loss": 0.2927, "step": 8967 }, { "epoch": 0.56, "grad_norm": 3.857622541435515, "learning_rate": 4.210434914451199e-06, "loss": 0.2789, "step": 8968 }, { "epoch": 0.56, "grad_norm": 2.0531193896832303, "learning_rate": 4.209429233122958e-06, "loss": 0.2887, "step": 8969 }, { "epoch": 0.56, "grad_norm": 2.7214215778903053, "learning_rate": 4.208423584596904e-06, "loss": 0.2895, "step": 8970 }, { "epoch": 0.56, "grad_norm": 3.1945404384958525, "learning_rate": 4.2074179689147605e-06, "loss": 0.2825, "step": 8971 }, { "epoch": 0.56, "grad_norm": 2.990575182429611, "learning_rate": 4.206412386118255e-06, "loss": 0.2554, "step": 8972 }, { "epoch": 0.56, "grad_norm": 3.0597585020530214, "learning_rate": 4.205406836249106e-06, "loss": 0.2827, "step": 8973 }, { "epoch": 0.56, "grad_norm": 2.3306231040741285, "learning_rate": 4.20440131934904e-06, "loss": 0.2652, "step": 8974 }, { "epoch": 0.56, "grad_norm": 2.1147726140867236, "learning_rate": 4.203395835459778e-06, "loss": 0.2653, "step": 8975 }, { "epoch": 0.56, "grad_norm": 1.7720739081897614, "learning_rate": 4.202390384623037e-06, "loss": 0.2642, "step": 8976 }, { "epoch": 0.56, "grad_norm": 3.163967357881253, "learning_rate": 4.201384966880535e-06, "loss": 0.2689, "step": 8977 }, { "epoch": 0.56, "grad_norm": 2.0608159511849995, "learning_rate": 4.200379582273991e-06, "loss": 0.2697, "step": 8978 }, { "epoch": 0.56, "grad_norm": 1.8158680199937984, "learning_rate": 4.199374230845116e-06, "loss": 0.2654, "step": 8979 }, { "epoch": 0.56, "grad_norm": 1.9732864716133736, "learning_rate": 4.198368912635626e-06, "loss": 0.2733, "step": 8980 }, { "epoch": 0.56, "grad_norm": 4.561783066553702, "learning_rate": 4.197363627687234e-06, "loss": 0.2807, "step": 8981 }, { "epoch": 0.56, "grad_norm": 8.314744656264878, "learning_rate": 4.196358376041653e-06, "loss": 0.273, "step": 8982 }, { "epoch": 0.56, "grad_norm": 4.240795509974399, "learning_rate": 4.195353157740589e-06, "loss": 0.2938, "step": 8983 }, { "epoch": 0.56, "grad_norm": 2.195059031410783, "learning_rate": 4.194347972825752e-06, "loss": 0.3006, "step": 8984 }, { "epoch": 0.57, "grad_norm": 2.17132524491701, "learning_rate": 4.193342821338848e-06, "loss": 0.2773, "step": 8985 }, { "epoch": 0.57, "grad_norm": 6.7391202719302985, "learning_rate": 4.192337703321584e-06, "loss": 0.2733, "step": 8986 }, { "epoch": 0.57, "grad_norm": 2.0077722985303383, "learning_rate": 4.191332618815663e-06, "loss": 0.2546, "step": 8987 }, { "epoch": 0.57, "grad_norm": 2.1119538811705834, "learning_rate": 4.190327567862786e-06, "loss": 0.2733, "step": 8988 }, { "epoch": 0.57, "grad_norm": 2.54282550770554, "learning_rate": 4.189322550504658e-06, "loss": 0.2759, "step": 8989 }, { "epoch": 0.57, "grad_norm": 2.1324939689176095, "learning_rate": 4.1883175667829775e-06, "loss": 0.2736, "step": 8990 }, { "epoch": 0.57, "grad_norm": 1.9892797161565217, "learning_rate": 4.1873126167394425e-06, "loss": 0.2995, "step": 8991 }, { "epoch": 0.57, "grad_norm": 2.969412900225151, "learning_rate": 4.186307700415752e-06, "loss": 0.2571, "step": 8992 }, { "epoch": 0.57, "grad_norm": 1.8208932198518593, "learning_rate": 4.185302817853599e-06, "loss": 0.2683, "step": 8993 }, { "epoch": 0.57, "grad_norm": 11.344225976821004, "learning_rate": 4.18429796909468e-06, "loss": 0.2904, "step": 8994 }, { "epoch": 0.57, "grad_norm": 3.1287523203770173, "learning_rate": 4.183293154180684e-06, "loss": 0.2871, "step": 8995 }, { "epoch": 0.57, "grad_norm": 2.5447129794893724, "learning_rate": 4.1822883731533085e-06, "loss": 0.2702, "step": 8996 }, { "epoch": 0.57, "grad_norm": 2.6363423048591406, "learning_rate": 4.18128362605424e-06, "loss": 0.2783, "step": 8997 }, { "epoch": 0.57, "grad_norm": 3.5618377162236095, "learning_rate": 4.180278912925169e-06, "loss": 0.29, "step": 8998 }, { "epoch": 0.57, "grad_norm": 1.874233588117033, "learning_rate": 4.1792742338077806e-06, "loss": 0.2697, "step": 8999 }, { "epoch": 0.57, "grad_norm": 2.1873105727580326, "learning_rate": 4.178269588743764e-06, "loss": 0.2781, "step": 9000 }, { "epoch": 0.57, "grad_norm": 1.5649838974752368, "learning_rate": 4.1772649777748e-06, "loss": 0.2714, "step": 9001 }, { "epoch": 0.57, "grad_norm": 3.177294078163326, "learning_rate": 4.1762604009425745e-06, "loss": 0.258, "step": 9002 }, { "epoch": 0.57, "grad_norm": 2.1328497363526147, "learning_rate": 4.175255858288765e-06, "loss": 0.2743, "step": 9003 }, { "epoch": 0.57, "grad_norm": 2.170079281837565, "learning_rate": 4.174251349855058e-06, "loss": 0.2749, "step": 9004 }, { "epoch": 0.57, "grad_norm": 1.715033219634509, "learning_rate": 4.173246875683128e-06, "loss": 0.2712, "step": 9005 }, { "epoch": 0.57, "grad_norm": 3.0388177094530606, "learning_rate": 4.1722424358146555e-06, "loss": 0.2861, "step": 9006 }, { "epoch": 0.57, "grad_norm": 3.4158034859742052, "learning_rate": 4.1712380302913125e-06, "loss": 0.2651, "step": 9007 }, { "epoch": 0.57, "grad_norm": 5.455353466151032, "learning_rate": 4.170233659154777e-06, "loss": 0.2708, "step": 9008 }, { "epoch": 0.57, "grad_norm": 2.0803466282254988, "learning_rate": 4.169229322446719e-06, "loss": 0.2818, "step": 9009 }, { "epoch": 0.57, "grad_norm": 7.300872631067211, "learning_rate": 4.168225020208813e-06, "loss": 0.2863, "step": 9010 }, { "epoch": 0.57, "grad_norm": 1.9927010428964396, "learning_rate": 4.167220752482728e-06, "loss": 0.2686, "step": 9011 }, { "epoch": 0.57, "grad_norm": 1.8583405483567006, "learning_rate": 4.166216519310134e-06, "loss": 0.2753, "step": 9012 }, { "epoch": 0.57, "grad_norm": 2.1441962938758565, "learning_rate": 4.165212320732696e-06, "loss": 0.2592, "step": 9013 }, { "epoch": 0.57, "grad_norm": 2.7073128293033326, "learning_rate": 4.1642081567920845e-06, "loss": 0.2812, "step": 9014 }, { "epoch": 0.57, "grad_norm": 0.6457581470100427, "learning_rate": 4.163204027529959e-06, "loss": 0.4653, "step": 9015 }, { "epoch": 0.57, "grad_norm": 2.6065210477113765, "learning_rate": 4.162199932987986e-06, "loss": 0.2681, "step": 9016 }, { "epoch": 0.57, "grad_norm": 2.4496132064690843, "learning_rate": 4.161195873207824e-06, "loss": 0.2724, "step": 9017 }, { "epoch": 0.57, "grad_norm": 1.954544716150455, "learning_rate": 4.1601918482311355e-06, "loss": 0.2716, "step": 9018 }, { "epoch": 0.57, "grad_norm": 1.8725483274683463, "learning_rate": 4.159187858099579e-06, "loss": 0.276, "step": 9019 }, { "epoch": 0.57, "grad_norm": 1.2842182803767546, "learning_rate": 4.158183902854813e-06, "loss": 0.2446, "step": 9020 }, { "epoch": 0.57, "grad_norm": 1.8445995790033862, "learning_rate": 4.15717998253849e-06, "loss": 0.2841, "step": 9021 }, { "epoch": 0.57, "grad_norm": 1.8235877124634015, "learning_rate": 4.156176097192269e-06, "loss": 0.2764, "step": 9022 }, { "epoch": 0.57, "grad_norm": 2.319956144085122, "learning_rate": 4.1551722468577995e-06, "loss": 0.2723, "step": 9023 }, { "epoch": 0.57, "grad_norm": 1.9463835209956808, "learning_rate": 4.154168431576734e-06, "loss": 0.2898, "step": 9024 }, { "epoch": 0.57, "grad_norm": 2.209764728812881, "learning_rate": 4.153164651390721e-06, "loss": 0.2845, "step": 9025 }, { "epoch": 0.57, "grad_norm": 5.926766489150122, "learning_rate": 4.152160906341413e-06, "loss": 0.2861, "step": 9026 }, { "epoch": 0.57, "grad_norm": 2.843391699472927, "learning_rate": 4.151157196470454e-06, "loss": 0.2769, "step": 9027 }, { "epoch": 0.57, "grad_norm": 1.6687440898243957, "learning_rate": 4.150153521819491e-06, "loss": 0.2732, "step": 9028 }, { "epoch": 0.57, "grad_norm": 2.4487680916028647, "learning_rate": 4.149149882430168e-06, "loss": 0.3057, "step": 9029 }, { "epoch": 0.57, "grad_norm": 1.5382904606927403, "learning_rate": 4.148146278344128e-06, "loss": 0.2713, "step": 9030 }, { "epoch": 0.57, "grad_norm": 1.5606882320486197, "learning_rate": 4.147142709603011e-06, "loss": 0.27, "step": 9031 }, { "epoch": 0.57, "grad_norm": 3.189897193920214, "learning_rate": 4.1461391762484574e-06, "loss": 0.2991, "step": 9032 }, { "epoch": 0.57, "grad_norm": 2.827595602428548, "learning_rate": 4.145135678322106e-06, "loss": 0.2787, "step": 9033 }, { "epoch": 0.57, "grad_norm": 4.112619212290249, "learning_rate": 4.144132215865595e-06, "loss": 0.2929, "step": 9034 }, { "epoch": 0.57, "grad_norm": 6.160940061450189, "learning_rate": 4.143128788920558e-06, "loss": 0.2894, "step": 9035 }, { "epoch": 0.57, "grad_norm": 4.106577037100918, "learning_rate": 4.14212539752863e-06, "loss": 0.2768, "step": 9036 }, { "epoch": 0.57, "grad_norm": 1.7121411615911037, "learning_rate": 4.141122041731443e-06, "loss": 0.3053, "step": 9037 }, { "epoch": 0.57, "grad_norm": 6.595180974080103, "learning_rate": 4.140118721570628e-06, "loss": 0.2968, "step": 9038 }, { "epoch": 0.57, "grad_norm": 2.7088594743653682, "learning_rate": 4.139115437087814e-06, "loss": 0.2978, "step": 9039 }, { "epoch": 0.57, "grad_norm": 2.6442278626820217, "learning_rate": 4.138112188324629e-06, "loss": 0.2948, "step": 9040 }, { "epoch": 0.57, "grad_norm": 2.3567313026743775, "learning_rate": 4.1371089753227e-06, "loss": 0.2952, "step": 9041 }, { "epoch": 0.57, "grad_norm": 1.5945756403391027, "learning_rate": 4.136105798123654e-06, "loss": 0.2747, "step": 9042 }, { "epoch": 0.57, "grad_norm": 2.150254403840772, "learning_rate": 4.135102656769112e-06, "loss": 0.2576, "step": 9043 }, { "epoch": 0.57, "grad_norm": 2.429389980405473, "learning_rate": 4.134099551300698e-06, "loss": 0.2616, "step": 9044 }, { "epoch": 0.57, "grad_norm": 2.0269162189147427, "learning_rate": 4.1330964817600305e-06, "loss": 0.285, "step": 9045 }, { "epoch": 0.57, "grad_norm": 5.05733529819904, "learning_rate": 4.1320934481887305e-06, "loss": 0.3027, "step": 9046 }, { "epoch": 0.57, "grad_norm": 2.051632157570671, "learning_rate": 4.131090450628413e-06, "loss": 0.3013, "step": 9047 }, { "epoch": 0.57, "grad_norm": 2.121106128712773, "learning_rate": 4.1300874891206974e-06, "loss": 0.2817, "step": 9048 }, { "epoch": 0.57, "grad_norm": 4.394807891657507, "learning_rate": 4.129084563707197e-06, "loss": 0.293, "step": 9049 }, { "epoch": 0.57, "grad_norm": 4.530018270277474, "learning_rate": 4.128081674429526e-06, "loss": 0.2618, "step": 9050 }, { "epoch": 0.57, "grad_norm": 2.044056314133268, "learning_rate": 4.127078821329294e-06, "loss": 0.2705, "step": 9051 }, { "epoch": 0.57, "grad_norm": 6.244917727649454, "learning_rate": 4.126076004448113e-06, "loss": 0.2619, "step": 9052 }, { "epoch": 0.57, "grad_norm": 5.312164056316612, "learning_rate": 4.125073223827591e-06, "loss": 0.2734, "step": 9053 }, { "epoch": 0.57, "grad_norm": 3.7450869136011407, "learning_rate": 4.124070479509334e-06, "loss": 0.2841, "step": 9054 }, { "epoch": 0.57, "grad_norm": 1.820645042683543, "learning_rate": 4.1230677715349475e-06, "loss": 0.2722, "step": 9055 }, { "epoch": 0.57, "grad_norm": 2.8573115699327634, "learning_rate": 4.122065099946038e-06, "loss": 0.2865, "step": 9056 }, { "epoch": 0.57, "grad_norm": 3.7987228881782893, "learning_rate": 4.121062464784206e-06, "loss": 0.2854, "step": 9057 }, { "epoch": 0.57, "grad_norm": 2.2720463921823826, "learning_rate": 4.120059866091056e-06, "loss": 0.2716, "step": 9058 }, { "epoch": 0.57, "grad_norm": 2.864948724792566, "learning_rate": 4.119057303908183e-06, "loss": 0.2764, "step": 9059 }, { "epoch": 0.57, "grad_norm": 2.9189110251778523, "learning_rate": 4.118054778277189e-06, "loss": 0.2754, "step": 9060 }, { "epoch": 0.57, "grad_norm": 4.1701046556276715, "learning_rate": 4.117052289239667e-06, "loss": 0.2539, "step": 9061 }, { "epoch": 0.57, "grad_norm": 2.3569895924012774, "learning_rate": 4.1160498368372124e-06, "loss": 0.279, "step": 9062 }, { "epoch": 0.57, "grad_norm": 2.6604809832646694, "learning_rate": 4.1150474211114225e-06, "loss": 0.2853, "step": 9063 }, { "epoch": 0.57, "grad_norm": 3.0861832642577056, "learning_rate": 4.1140450421038865e-06, "loss": 0.2757, "step": 9064 }, { "epoch": 0.57, "grad_norm": 13.451331767900175, "learning_rate": 4.113042699856195e-06, "loss": 0.2689, "step": 9065 }, { "epoch": 0.57, "grad_norm": 2.133384723763996, "learning_rate": 4.112040394409939e-06, "loss": 0.2833, "step": 9066 }, { "epoch": 0.57, "grad_norm": 2.403540064432925, "learning_rate": 4.1110381258067026e-06, "loss": 0.2886, "step": 9067 }, { "epoch": 0.57, "grad_norm": 2.025493214522345, "learning_rate": 4.110035894088074e-06, "loss": 0.2818, "step": 9068 }, { "epoch": 0.57, "grad_norm": 2.3290467463766698, "learning_rate": 4.109033699295637e-06, "loss": 0.2886, "step": 9069 }, { "epoch": 0.57, "grad_norm": 4.0095072490111425, "learning_rate": 4.108031541470972e-06, "loss": 0.2735, "step": 9070 }, { "epoch": 0.57, "grad_norm": 2.362202204453788, "learning_rate": 4.107029420655664e-06, "loss": 0.2803, "step": 9071 }, { "epoch": 0.57, "grad_norm": 2.0550589270341977, "learning_rate": 4.106027336891293e-06, "loss": 0.2646, "step": 9072 }, { "epoch": 0.57, "grad_norm": 3.8612014518138986, "learning_rate": 4.105025290219435e-06, "loss": 0.2814, "step": 9073 }, { "epoch": 0.57, "grad_norm": 2.388373069164007, "learning_rate": 4.104023280681667e-06, "loss": 0.2834, "step": 9074 }, { "epoch": 0.57, "grad_norm": 8.504939264725376, "learning_rate": 4.1030213083195645e-06, "loss": 0.3156, "step": 9075 }, { "epoch": 0.57, "grad_norm": 2.2076874841158785, "learning_rate": 4.102019373174702e-06, "loss": 0.2874, "step": 9076 }, { "epoch": 0.57, "grad_norm": 1.9629721043413078, "learning_rate": 4.101017475288648e-06, "loss": 0.2761, "step": 9077 }, { "epoch": 0.57, "grad_norm": 3.2157956782011876, "learning_rate": 4.100015614702977e-06, "loss": 0.2773, "step": 9078 }, { "epoch": 0.57, "grad_norm": 3.229573127715457, "learning_rate": 4.099013791459258e-06, "loss": 0.2923, "step": 9079 }, { "epoch": 0.57, "grad_norm": 9.148341882612355, "learning_rate": 4.098012005599056e-06, "loss": 0.2728, "step": 9080 }, { "epoch": 0.57, "grad_norm": 3.6428228018447255, "learning_rate": 4.097010257163938e-06, "loss": 0.2649, "step": 9081 }, { "epoch": 0.57, "grad_norm": 2.395846389958749, "learning_rate": 4.09600854619547e-06, "loss": 0.2696, "step": 9082 }, { "epoch": 0.57, "grad_norm": 4.852686782804362, "learning_rate": 4.095006872735211e-06, "loss": 0.2809, "step": 9083 }, { "epoch": 0.57, "grad_norm": 13.909755305140324, "learning_rate": 4.094005236824726e-06, "loss": 0.2701, "step": 9084 }, { "epoch": 0.57, "grad_norm": 3.5149630657118824, "learning_rate": 4.093003638505571e-06, "loss": 0.2874, "step": 9085 }, { "epoch": 0.57, "grad_norm": 1.9605236579980447, "learning_rate": 4.092002077819307e-06, "loss": 0.2682, "step": 9086 }, { "epoch": 0.57, "grad_norm": 2.133333854294839, "learning_rate": 4.09100055480749e-06, "loss": 0.2695, "step": 9087 }, { "epoch": 0.57, "grad_norm": 2.173200404535774, "learning_rate": 4.0899990695116745e-06, "loss": 0.2763, "step": 9088 }, { "epoch": 0.57, "grad_norm": 0.6469218776810701, "learning_rate": 4.088997621973413e-06, "loss": 0.5043, "step": 9089 }, { "epoch": 0.57, "grad_norm": 1.6861730144194709, "learning_rate": 4.0879962122342596e-06, "loss": 0.2667, "step": 9090 }, { "epoch": 0.57, "grad_norm": 3.750872360073634, "learning_rate": 4.086994840335763e-06, "loss": 0.2781, "step": 9091 }, { "epoch": 0.57, "grad_norm": 0.5900024528158422, "learning_rate": 4.08599350631947e-06, "loss": 0.4712, "step": 9092 }, { "epoch": 0.57, "grad_norm": 1.9922181100792156, "learning_rate": 4.084992210226932e-06, "loss": 0.2779, "step": 9093 }, { "epoch": 0.57, "grad_norm": 3.8149982471392105, "learning_rate": 4.083990952099692e-06, "loss": 0.2883, "step": 9094 }, { "epoch": 0.57, "grad_norm": 3.60490040605363, "learning_rate": 4.0829897319792944e-06, "loss": 0.2722, "step": 9095 }, { "epoch": 0.57, "grad_norm": 8.097376451613655, "learning_rate": 4.081988549907282e-06, "loss": 0.2839, "step": 9096 }, { "epoch": 0.57, "grad_norm": 2.5063950607141985, "learning_rate": 4.080987405925195e-06, "loss": 0.2883, "step": 9097 }, { "epoch": 0.57, "grad_norm": 2.3265190953944352, "learning_rate": 4.079986300074573e-06, "loss": 0.2712, "step": 9098 }, { "epoch": 0.57, "grad_norm": 3.057986856956302, "learning_rate": 4.078985232396953e-06, "loss": 0.2662, "step": 9099 }, { "epoch": 0.57, "grad_norm": 2.2865199414983657, "learning_rate": 4.0779842029338714e-06, "loss": 0.3091, "step": 9100 }, { "epoch": 0.57, "grad_norm": 3.199288830026781, "learning_rate": 4.076983211726863e-06, "loss": 0.2693, "step": 9101 }, { "epoch": 0.57, "grad_norm": 3.625226987848647, "learning_rate": 4.075982258817462e-06, "loss": 0.2732, "step": 9102 }, { "epoch": 0.57, "grad_norm": 2.3269084514484146, "learning_rate": 4.074981344247197e-06, "loss": 0.2831, "step": 9103 }, { "epoch": 0.57, "grad_norm": 2.2655176838129396, "learning_rate": 4.0739804680576e-06, "loss": 0.2731, "step": 9104 }, { "epoch": 0.57, "grad_norm": 2.8790560167473216, "learning_rate": 4.0729796302901975e-06, "loss": 0.2747, "step": 9105 }, { "epoch": 0.57, "grad_norm": 1.7285640356748337, "learning_rate": 4.071978830986518e-06, "loss": 0.2942, "step": 9106 }, { "epoch": 0.57, "grad_norm": 2.2212964584503663, "learning_rate": 4.070978070188083e-06, "loss": 0.3006, "step": 9107 }, { "epoch": 0.57, "grad_norm": 2.8213856857007094, "learning_rate": 4.069977347936418e-06, "loss": 0.285, "step": 9108 }, { "epoch": 0.57, "grad_norm": 2.794564427043924, "learning_rate": 4.068976664273046e-06, "loss": 0.309, "step": 9109 }, { "epoch": 0.57, "grad_norm": 0.6854725343272413, "learning_rate": 4.067976019239486e-06, "loss": 0.4747, "step": 9110 }, { "epoch": 0.57, "grad_norm": 2.2039729196478537, "learning_rate": 4.0669754128772554e-06, "loss": 0.2753, "step": 9111 }, { "epoch": 0.57, "grad_norm": 4.479916467364978, "learning_rate": 4.065974845227874e-06, "loss": 0.2869, "step": 9112 }, { "epoch": 0.57, "grad_norm": 1.750741494660911, "learning_rate": 4.064974316332854e-06, "loss": 0.2864, "step": 9113 }, { "epoch": 0.57, "grad_norm": 1.8052909941077098, "learning_rate": 4.063973826233708e-06, "loss": 0.2689, "step": 9114 }, { "epoch": 0.57, "grad_norm": 1.745078312464633, "learning_rate": 4.062973374971954e-06, "loss": 0.2915, "step": 9115 }, { "epoch": 0.57, "grad_norm": 3.8686242934885526, "learning_rate": 4.061972962589098e-06, "loss": 0.2714, "step": 9116 }, { "epoch": 0.57, "grad_norm": 2.269804397596038, "learning_rate": 4.06097258912665e-06, "loss": 0.281, "step": 9117 }, { "epoch": 0.57, "grad_norm": 2.988428007549461, "learning_rate": 4.059972254626118e-06, "loss": 0.2704, "step": 9118 }, { "epoch": 0.57, "grad_norm": 2.45296716345786, "learning_rate": 4.058971959129006e-06, "loss": 0.2776, "step": 9119 }, { "epoch": 0.57, "grad_norm": 2.297886432980261, "learning_rate": 4.057971702676819e-06, "loss": 0.2805, "step": 9120 }, { "epoch": 0.57, "grad_norm": 3.579428080254274, "learning_rate": 4.0569714853110585e-06, "loss": 0.275, "step": 9121 }, { "epoch": 0.57, "grad_norm": 19.50183963770546, "learning_rate": 4.055971307073225e-06, "loss": 0.2918, "step": 9122 }, { "epoch": 0.57, "grad_norm": 2.102321279641808, "learning_rate": 4.054971168004822e-06, "loss": 0.2697, "step": 9123 }, { "epoch": 0.57, "grad_norm": 2.8339897951859103, "learning_rate": 4.0539710681473415e-06, "loss": 0.2822, "step": 9124 }, { "epoch": 0.57, "grad_norm": 6.964936061859733, "learning_rate": 4.052971007542283e-06, "loss": 0.2614, "step": 9125 }, { "epoch": 0.57, "grad_norm": 1.9912289138458814, "learning_rate": 4.051970986231139e-06, "loss": 0.2736, "step": 9126 }, { "epoch": 0.57, "grad_norm": 1.8981902565682565, "learning_rate": 4.0509710042554026e-06, "loss": 0.285, "step": 9127 }, { "epoch": 0.57, "grad_norm": 0.654667301972239, "learning_rate": 4.049971061656566e-06, "loss": 0.4594, "step": 9128 }, { "epoch": 0.57, "grad_norm": 3.034644345762902, "learning_rate": 4.048971158476114e-06, "loss": 0.2729, "step": 9129 }, { "epoch": 0.57, "grad_norm": 3.933527205490952, "learning_rate": 4.04797129475554e-06, "loss": 0.2709, "step": 9130 }, { "epoch": 0.57, "grad_norm": 2.2155708788882147, "learning_rate": 4.04697147053633e-06, "loss": 0.2634, "step": 9131 }, { "epoch": 0.57, "grad_norm": 2.733420587729311, "learning_rate": 4.045971685859964e-06, "loss": 0.2842, "step": 9132 }, { "epoch": 0.57, "grad_norm": 2.216164533715891, "learning_rate": 4.044971940767927e-06, "loss": 0.2582, "step": 9133 }, { "epoch": 0.57, "grad_norm": 11.635077684144465, "learning_rate": 4.043972235301703e-06, "loss": 0.2646, "step": 9134 }, { "epoch": 0.57, "grad_norm": 2.5178197638888022, "learning_rate": 4.042972569502767e-06, "loss": 0.2963, "step": 9135 }, { "epoch": 0.57, "grad_norm": 1.9129191601519608, "learning_rate": 4.0419729434126e-06, "loss": 0.2809, "step": 9136 }, { "epoch": 0.57, "grad_norm": 4.08700108767703, "learning_rate": 4.040973357072676e-06, "loss": 0.286, "step": 9137 }, { "epoch": 0.57, "grad_norm": 3.037377534653133, "learning_rate": 4.039973810524471e-06, "loss": 0.2813, "step": 9138 }, { "epoch": 0.57, "grad_norm": 1.9294021769241192, "learning_rate": 4.038974303809459e-06, "loss": 0.2758, "step": 9139 }, { "epoch": 0.57, "grad_norm": 38.69368353543663, "learning_rate": 4.03797483696911e-06, "loss": 0.2897, "step": 9140 }, { "epoch": 0.57, "grad_norm": 4.955541783425685, "learning_rate": 4.036975410044892e-06, "loss": 0.2692, "step": 9141 }, { "epoch": 0.57, "grad_norm": 2.7956800707173444, "learning_rate": 4.035976023078278e-06, "loss": 0.2683, "step": 9142 }, { "epoch": 0.57, "grad_norm": 2.4126732889509968, "learning_rate": 4.0349766761107275e-06, "loss": 0.2791, "step": 9143 }, { "epoch": 0.58, "grad_norm": 1.8318435302789828, "learning_rate": 4.033977369183709e-06, "loss": 0.2669, "step": 9144 }, { "epoch": 0.58, "grad_norm": 2.4843744083775534, "learning_rate": 4.032978102338688e-06, "loss": 0.2585, "step": 9145 }, { "epoch": 0.58, "grad_norm": 10.755507177658057, "learning_rate": 4.031978875617122e-06, "loss": 0.2679, "step": 9146 }, { "epoch": 0.58, "grad_norm": 2.047013529731842, "learning_rate": 4.030979689060471e-06, "loss": 0.2694, "step": 9147 }, { "epoch": 0.58, "grad_norm": 6.280461486830635, "learning_rate": 4.029980542710196e-06, "loss": 0.2728, "step": 9148 }, { "epoch": 0.58, "grad_norm": 3.7130861819098677, "learning_rate": 4.028981436607749e-06, "loss": 0.2845, "step": 9149 }, { "epoch": 0.58, "grad_norm": 2.5905654057925287, "learning_rate": 4.0279823707945885e-06, "loss": 0.259, "step": 9150 }, { "epoch": 0.58, "grad_norm": 4.786063709105976, "learning_rate": 4.0269833453121644e-06, "loss": 0.2669, "step": 9151 }, { "epoch": 0.58, "grad_norm": 2.093826153277464, "learning_rate": 4.025984360201929e-06, "loss": 0.2681, "step": 9152 }, { "epoch": 0.58, "grad_norm": 3.129957657546236, "learning_rate": 4.0249854155053345e-06, "loss": 0.2589, "step": 9153 }, { "epoch": 0.58, "grad_norm": 1.694219205540417, "learning_rate": 4.023986511263827e-06, "loss": 0.2757, "step": 9154 }, { "epoch": 0.58, "grad_norm": 5.1687010584432, "learning_rate": 4.022987647518851e-06, "loss": 0.2802, "step": 9155 }, { "epoch": 0.58, "grad_norm": 2.6205551709536556, "learning_rate": 4.0219888243118546e-06, "loss": 0.282, "step": 9156 }, { "epoch": 0.58, "grad_norm": 4.042505089343204, "learning_rate": 4.020990041684278e-06, "loss": 0.2599, "step": 9157 }, { "epoch": 0.58, "grad_norm": 3.9101776779715314, "learning_rate": 4.019991299677565e-06, "loss": 0.2828, "step": 9158 }, { "epoch": 0.58, "grad_norm": 2.17251905671189, "learning_rate": 4.018992598333151e-06, "loss": 0.2864, "step": 9159 }, { "epoch": 0.58, "grad_norm": 1.9085476506756567, "learning_rate": 4.017993937692478e-06, "loss": 0.2713, "step": 9160 }, { "epoch": 0.58, "grad_norm": 3.4533500041412646, "learning_rate": 4.0169953177969814e-06, "loss": 0.28, "step": 9161 }, { "epoch": 0.58, "grad_norm": 2.684952679191809, "learning_rate": 4.015996738688094e-06, "loss": 0.2866, "step": 9162 }, { "epoch": 0.58, "grad_norm": 1.9267543954730049, "learning_rate": 4.01499820040725e-06, "loss": 0.2744, "step": 9163 }, { "epoch": 0.58, "grad_norm": 2.418685357774127, "learning_rate": 4.013999702995881e-06, "loss": 0.2793, "step": 9164 }, { "epoch": 0.58, "grad_norm": 3.914806246349283, "learning_rate": 4.013001246495415e-06, "loss": 0.2716, "step": 9165 }, { "epoch": 0.58, "grad_norm": 2.0052424753735694, "learning_rate": 4.012002830947281e-06, "loss": 0.2658, "step": 9166 }, { "epoch": 0.58, "grad_norm": 2.88961432657455, "learning_rate": 4.011004456392903e-06, "loss": 0.2607, "step": 9167 }, { "epoch": 0.58, "grad_norm": 2.295553631017359, "learning_rate": 4.010006122873707e-06, "loss": 0.2821, "step": 9168 }, { "epoch": 0.58, "grad_norm": 4.235881630326167, "learning_rate": 4.009007830431118e-06, "loss": 0.2823, "step": 9169 }, { "epoch": 0.58, "grad_norm": 0.672443743563525, "learning_rate": 4.008009579106551e-06, "loss": 0.487, "step": 9170 }, { "epoch": 0.58, "grad_norm": 5.006177691577419, "learning_rate": 4.007011368941429e-06, "loss": 0.27, "step": 9171 }, { "epoch": 0.58, "grad_norm": 9.546157449135116, "learning_rate": 4.0060131999771715e-06, "loss": 0.3116, "step": 9172 }, { "epoch": 0.58, "grad_norm": 2.827833423456838, "learning_rate": 4.00501507225519e-06, "loss": 0.2665, "step": 9173 }, { "epoch": 0.58, "grad_norm": 3.168560296788205, "learning_rate": 4.0040169858169e-06, "loss": 0.2701, "step": 9174 }, { "epoch": 0.58, "grad_norm": 4.7020082264023735, "learning_rate": 4.003018940703716e-06, "loss": 0.2705, "step": 9175 }, { "epoch": 0.58, "grad_norm": 5.167975200895409, "learning_rate": 4.002020936957045e-06, "loss": 0.2627, "step": 9176 }, { "epoch": 0.58, "grad_norm": 2.5360734183114646, "learning_rate": 4.0010229746183e-06, "loss": 0.2587, "step": 9177 }, { "epoch": 0.58, "grad_norm": 3.23888902376058, "learning_rate": 4.0000250537288845e-06, "loss": 0.2696, "step": 9178 }, { "epoch": 0.58, "grad_norm": 4.088208684446045, "learning_rate": 3.999027174330206e-06, "loss": 0.2776, "step": 9179 }, { "epoch": 0.58, "grad_norm": 1.856449926681893, "learning_rate": 3.9980293364636694e-06, "loss": 0.2619, "step": 9180 }, { "epoch": 0.58, "grad_norm": 5.48506973500211, "learning_rate": 3.9970315401706726e-06, "loss": 0.2817, "step": 9181 }, { "epoch": 0.58, "grad_norm": 2.0070379975397, "learning_rate": 3.99603378549262e-06, "loss": 0.2701, "step": 9182 }, { "epoch": 0.58, "grad_norm": 5.975709514802917, "learning_rate": 3.99503607247091e-06, "loss": 0.3074, "step": 9183 }, { "epoch": 0.58, "grad_norm": 2.600647208977965, "learning_rate": 3.994038401146937e-06, "loss": 0.2801, "step": 9184 }, { "epoch": 0.58, "grad_norm": 2.6339462822329724, "learning_rate": 3.993040771562098e-06, "loss": 0.2858, "step": 9185 }, { "epoch": 0.58, "grad_norm": 3.7211227449044615, "learning_rate": 3.9920431837577876e-06, "loss": 0.2728, "step": 9186 }, { "epoch": 0.58, "grad_norm": 2.1177502246771835, "learning_rate": 3.991045637775393e-06, "loss": 0.2637, "step": 9187 }, { "epoch": 0.58, "grad_norm": 0.6273683184959713, "learning_rate": 3.99004813365631e-06, "loss": 0.5095, "step": 9188 }, { "epoch": 0.58, "grad_norm": 4.10203118302687, "learning_rate": 3.98905067144192e-06, "loss": 0.2816, "step": 9189 }, { "epoch": 0.58, "grad_norm": 8.98073069918242, "learning_rate": 3.988053251173615e-06, "loss": 0.267, "step": 9190 }, { "epoch": 0.58, "grad_norm": 2.354093564094669, "learning_rate": 3.987055872892779e-06, "loss": 0.2778, "step": 9191 }, { "epoch": 0.58, "grad_norm": 2.5464327724788087, "learning_rate": 3.986058536640793e-06, "loss": 0.256, "step": 9192 }, { "epoch": 0.58, "grad_norm": 2.2718249165470352, "learning_rate": 3.985061242459039e-06, "loss": 0.284, "step": 9193 }, { "epoch": 0.58, "grad_norm": 5.600607028959871, "learning_rate": 3.984063990388899e-06, "loss": 0.2738, "step": 9194 }, { "epoch": 0.58, "grad_norm": 2.969615068838298, "learning_rate": 3.983066780471747e-06, "loss": 0.2857, "step": 9195 }, { "epoch": 0.58, "grad_norm": 2.4830170588299847, "learning_rate": 3.982069612748959e-06, "loss": 0.2752, "step": 9196 }, { "epoch": 0.58, "grad_norm": 2.40807304756567, "learning_rate": 3.981072487261913e-06, "loss": 0.2702, "step": 9197 }, { "epoch": 0.58, "grad_norm": 5.8419269627637975, "learning_rate": 3.9800754040519785e-06, "loss": 0.2587, "step": 9198 }, { "epoch": 0.58, "grad_norm": 3.357213022948169, "learning_rate": 3.979078363160528e-06, "loss": 0.2775, "step": 9199 }, { "epoch": 0.58, "grad_norm": 3.630534764748263, "learning_rate": 3.9780813646289286e-06, "loss": 0.2736, "step": 9200 }, { "epoch": 0.58, "grad_norm": 2.526560502368286, "learning_rate": 3.977084408498549e-06, "loss": 0.271, "step": 9201 }, { "epoch": 0.58, "grad_norm": 0.5819987109373376, "learning_rate": 3.976087494810754e-06, "loss": 0.4673, "step": 9202 }, { "epoch": 0.58, "grad_norm": 4.79083350753169, "learning_rate": 3.975090623606907e-06, "loss": 0.2933, "step": 9203 }, { "epoch": 0.58, "grad_norm": 2.6885841543703464, "learning_rate": 3.97409379492837e-06, "loss": 0.2806, "step": 9204 }, { "epoch": 0.58, "grad_norm": 7.983305670568188, "learning_rate": 3.973097008816505e-06, "loss": 0.2739, "step": 9205 }, { "epoch": 0.58, "grad_norm": 0.6457106968133617, "learning_rate": 3.972100265312669e-06, "loss": 0.5008, "step": 9206 }, { "epoch": 0.58, "grad_norm": 2.7651344050525344, "learning_rate": 3.971103564458219e-06, "loss": 0.2857, "step": 9207 }, { "epoch": 0.58, "grad_norm": 3.8530805703622715, "learning_rate": 3.970106906294509e-06, "loss": 0.2693, "step": 9208 }, { "epoch": 0.58, "grad_norm": 4.628739316517356, "learning_rate": 3.9691102908628925e-06, "loss": 0.2895, "step": 9209 }, { "epoch": 0.58, "grad_norm": 2.30876149085146, "learning_rate": 3.968113718204722e-06, "loss": 0.2686, "step": 9210 }, { "epoch": 0.58, "grad_norm": 3.652158089528851, "learning_rate": 3.967117188361345e-06, "loss": 0.2991, "step": 9211 }, { "epoch": 0.58, "grad_norm": 14.236986397916008, "learning_rate": 3.96612070137411e-06, "loss": 0.2507, "step": 9212 }, { "epoch": 0.58, "grad_norm": 3.993317824882817, "learning_rate": 3.965124257284366e-06, "loss": 0.2902, "step": 9213 }, { "epoch": 0.58, "grad_norm": 2.9551932726352708, "learning_rate": 3.964127856133453e-06, "loss": 0.2962, "step": 9214 }, { "epoch": 0.58, "grad_norm": 2.4245063545873173, "learning_rate": 3.963131497962715e-06, "loss": 0.2711, "step": 9215 }, { "epoch": 0.58, "grad_norm": 4.503509197929178, "learning_rate": 3.9621351828134935e-06, "loss": 0.2891, "step": 9216 }, { "epoch": 0.58, "grad_norm": 3.3175082169082923, "learning_rate": 3.961138910727126e-06, "loss": 0.2628, "step": 9217 }, { "epoch": 0.58, "grad_norm": 4.206590509360579, "learning_rate": 3.960142681744952e-06, "loss": 0.3006, "step": 9218 }, { "epoch": 0.58, "grad_norm": 1.976329787919778, "learning_rate": 3.959146495908303e-06, "loss": 0.2751, "step": 9219 }, { "epoch": 0.58, "grad_norm": 3.659175816800913, "learning_rate": 3.958150353258515e-06, "loss": 0.2952, "step": 9220 }, { "epoch": 0.58, "grad_norm": 1.8089002270216088, "learning_rate": 3.957154253836921e-06, "loss": 0.2695, "step": 9221 }, { "epoch": 0.58, "grad_norm": 2.261169418230604, "learning_rate": 3.9561581976848475e-06, "loss": 0.2722, "step": 9222 }, { "epoch": 0.58, "grad_norm": 2.8436599195476457, "learning_rate": 3.955162184843625e-06, "loss": 0.2913, "step": 9223 }, { "epoch": 0.58, "grad_norm": 1.7849197509321244, "learning_rate": 3.95416621535458e-06, "loss": 0.2746, "step": 9224 }, { "epoch": 0.58, "grad_norm": 4.4996914745383165, "learning_rate": 3.953170289259036e-06, "loss": 0.275, "step": 9225 }, { "epoch": 0.58, "grad_norm": 2.2381976452551235, "learning_rate": 3.952174406598314e-06, "loss": 0.2761, "step": 9226 }, { "epoch": 0.58, "grad_norm": 4.158901164094756, "learning_rate": 3.951178567413739e-06, "loss": 0.2662, "step": 9227 }, { "epoch": 0.58, "grad_norm": 3.369904440501148, "learning_rate": 3.950182771746629e-06, "loss": 0.2514, "step": 9228 }, { "epoch": 0.58, "grad_norm": 3.123030064883653, "learning_rate": 3.9491870196383e-06, "loss": 0.2937, "step": 9229 }, { "epoch": 0.58, "grad_norm": 2.6122154605217522, "learning_rate": 3.948191311130067e-06, "loss": 0.2537, "step": 9230 }, { "epoch": 0.58, "grad_norm": 3.5841996804823477, "learning_rate": 3.947195646263246e-06, "loss": 0.2647, "step": 9231 }, { "epoch": 0.58, "grad_norm": 4.3264531672810795, "learning_rate": 3.946200025079147e-06, "loss": 0.2782, "step": 9232 }, { "epoch": 0.58, "grad_norm": 2.549684787107598, "learning_rate": 3.945204447619081e-06, "loss": 0.2824, "step": 9233 }, { "epoch": 0.58, "grad_norm": 4.459502477500608, "learning_rate": 3.944208913924354e-06, "loss": 0.2699, "step": 9234 }, { "epoch": 0.58, "grad_norm": 0.6854850466208346, "learning_rate": 3.943213424036277e-06, "loss": 0.4983, "step": 9235 }, { "epoch": 0.58, "grad_norm": 3.7813734130762415, "learning_rate": 3.942217977996151e-06, "loss": 0.2781, "step": 9236 }, { "epoch": 0.58, "grad_norm": 21.838722174618145, "learning_rate": 3.94122257584528e-06, "loss": 0.2824, "step": 9237 }, { "epoch": 0.58, "grad_norm": 1.6116462264193507, "learning_rate": 3.940227217624965e-06, "loss": 0.2923, "step": 9238 }, { "epoch": 0.58, "grad_norm": 1.8344541824735696, "learning_rate": 3.939231903376505e-06, "loss": 0.2806, "step": 9239 }, { "epoch": 0.58, "grad_norm": 3.9031682065374627, "learning_rate": 3.938236633141199e-06, "loss": 0.265, "step": 9240 }, { "epoch": 0.58, "grad_norm": 3.1751783097786794, "learning_rate": 3.937241406960338e-06, "loss": 0.2911, "step": 9241 }, { "epoch": 0.58, "grad_norm": 10.212821549681234, "learning_rate": 3.93624622487522e-06, "loss": 0.2713, "step": 9242 }, { "epoch": 0.58, "grad_norm": 4.7039019833786515, "learning_rate": 3.935251086927137e-06, "loss": 0.2872, "step": 9243 }, { "epoch": 0.58, "grad_norm": 4.078187430760622, "learning_rate": 3.934255993157375e-06, "loss": 0.2821, "step": 9244 }, { "epoch": 0.58, "grad_norm": 3.336290319319591, "learning_rate": 3.933260943607228e-06, "loss": 0.2729, "step": 9245 }, { "epoch": 0.58, "grad_norm": 2.4912092543409914, "learning_rate": 3.932265938317977e-06, "loss": 0.2828, "step": 9246 }, { "epoch": 0.58, "grad_norm": 0.6318907624468713, "learning_rate": 3.931270977330909e-06, "loss": 0.4828, "step": 9247 }, { "epoch": 0.58, "grad_norm": 1.7379207239142243, "learning_rate": 3.930276060687307e-06, "loss": 0.2641, "step": 9248 }, { "epoch": 0.58, "grad_norm": 1.6218894435067417, "learning_rate": 3.92928118842845e-06, "loss": 0.2833, "step": 9249 }, { "epoch": 0.58, "grad_norm": 0.601040136110174, "learning_rate": 3.928286360595619e-06, "loss": 0.5165, "step": 9250 }, { "epoch": 0.58, "grad_norm": 2.1536563858684312, "learning_rate": 3.927291577230091e-06, "loss": 0.2702, "step": 9251 }, { "epoch": 0.58, "grad_norm": 2.8247434711763884, "learning_rate": 3.926296838373141e-06, "loss": 0.2659, "step": 9252 }, { "epoch": 0.58, "grad_norm": 2.1103878092146062, "learning_rate": 3.925302144066042e-06, "loss": 0.2779, "step": 9253 }, { "epoch": 0.58, "grad_norm": 8.466824691038825, "learning_rate": 3.924307494350066e-06, "loss": 0.2753, "step": 9254 }, { "epoch": 0.58, "grad_norm": 1.8396376075892753, "learning_rate": 3.9233128892664815e-06, "loss": 0.2634, "step": 9255 }, { "epoch": 0.58, "grad_norm": 5.258611616343802, "learning_rate": 3.922318328856557e-06, "loss": 0.2826, "step": 9256 }, { "epoch": 0.58, "grad_norm": 1.947238415015567, "learning_rate": 3.921323813161562e-06, "loss": 0.2762, "step": 9257 }, { "epoch": 0.58, "grad_norm": 3.0129693418965022, "learning_rate": 3.9203293422227564e-06, "loss": 0.2792, "step": 9258 }, { "epoch": 0.58, "grad_norm": 1.8433568163588185, "learning_rate": 3.919334916081406e-06, "loss": 0.2579, "step": 9259 }, { "epoch": 0.58, "grad_norm": 2.605504807618717, "learning_rate": 3.918340534778767e-06, "loss": 0.3022, "step": 9260 }, { "epoch": 0.58, "grad_norm": 1.9318041140657272, "learning_rate": 3.917346198356103e-06, "loss": 0.2713, "step": 9261 }, { "epoch": 0.58, "grad_norm": 3.643841981750636, "learning_rate": 3.916351906854665e-06, "loss": 0.2656, "step": 9262 }, { "epoch": 0.58, "grad_norm": 1.9256949886880956, "learning_rate": 3.915357660315712e-06, "loss": 0.2691, "step": 9263 }, { "epoch": 0.58, "grad_norm": 1.9563109630509625, "learning_rate": 3.914363458780496e-06, "loss": 0.2651, "step": 9264 }, { "epoch": 0.58, "grad_norm": 2.4508005363252714, "learning_rate": 3.913369302290271e-06, "loss": 0.2775, "step": 9265 }, { "epoch": 0.58, "grad_norm": 3.5971326424201586, "learning_rate": 3.912375190886281e-06, "loss": 0.2648, "step": 9266 }, { "epoch": 0.58, "grad_norm": 1.675208419701591, "learning_rate": 3.911381124609778e-06, "loss": 0.2537, "step": 9267 }, { "epoch": 0.58, "grad_norm": 3.2601122342457307, "learning_rate": 3.9103871035020044e-06, "loss": 0.29, "step": 9268 }, { "epoch": 0.58, "grad_norm": 2.2851296786488087, "learning_rate": 3.909393127604206e-06, "loss": 0.2794, "step": 9269 }, { "epoch": 0.58, "grad_norm": 2.941750832709662, "learning_rate": 3.908399196957625e-06, "loss": 0.2805, "step": 9270 }, { "epoch": 0.58, "grad_norm": 2.407309898368578, "learning_rate": 3.907405311603497e-06, "loss": 0.2876, "step": 9271 }, { "epoch": 0.58, "grad_norm": 2.078145251398732, "learning_rate": 3.906411471583065e-06, "loss": 0.2695, "step": 9272 }, { "epoch": 0.58, "grad_norm": 2.8576362429051776, "learning_rate": 3.905417676937564e-06, "loss": 0.2726, "step": 9273 }, { "epoch": 0.58, "grad_norm": 3.2375411146914583, "learning_rate": 3.9044239277082275e-06, "loss": 0.2649, "step": 9274 }, { "epoch": 0.58, "grad_norm": 1.5901883542414676, "learning_rate": 3.903430223936289e-06, "loss": 0.2771, "step": 9275 }, { "epoch": 0.58, "grad_norm": 2.386079394630121, "learning_rate": 3.9024365656629774e-06, "loss": 0.2847, "step": 9276 }, { "epoch": 0.58, "grad_norm": 2.1223109386297265, "learning_rate": 3.901442952929522e-06, "loss": 0.2957, "step": 9277 }, { "epoch": 0.58, "grad_norm": 1.6301339216888115, "learning_rate": 3.900449385777148e-06, "loss": 0.2757, "step": 9278 }, { "epoch": 0.58, "grad_norm": 1.331999589760647, "learning_rate": 3.899455864247085e-06, "loss": 0.2602, "step": 9279 }, { "epoch": 0.58, "grad_norm": 2.0459651310011617, "learning_rate": 3.898462388380551e-06, "loss": 0.2835, "step": 9280 }, { "epoch": 0.58, "grad_norm": 1.6533536651957998, "learning_rate": 3.897468958218771e-06, "loss": 0.2727, "step": 9281 }, { "epoch": 0.58, "grad_norm": 2.462847176750664, "learning_rate": 3.896475573802961e-06, "loss": 0.2798, "step": 9282 }, { "epoch": 0.58, "grad_norm": 2.5149493904701377, "learning_rate": 3.895482235174341e-06, "loss": 0.2849, "step": 9283 }, { "epoch": 0.58, "grad_norm": 6.296923975090486, "learning_rate": 3.894488942374123e-06, "loss": 0.257, "step": 9284 }, { "epoch": 0.58, "grad_norm": 2.256151768772217, "learning_rate": 3.893495695443522e-06, "loss": 0.2639, "step": 9285 }, { "epoch": 0.58, "grad_norm": 7.313014087149545, "learning_rate": 3.89250249442375e-06, "loss": 0.278, "step": 9286 }, { "epoch": 0.58, "grad_norm": 1.540986611801791, "learning_rate": 3.891509339356018e-06, "loss": 0.2769, "step": 9287 }, { "epoch": 0.58, "grad_norm": 1.7000881897142213, "learning_rate": 3.890516230281532e-06, "loss": 0.2718, "step": 9288 }, { "epoch": 0.58, "grad_norm": 1.8600218578769911, "learning_rate": 3.889523167241499e-06, "loss": 0.2876, "step": 9289 }, { "epoch": 0.58, "grad_norm": 1.6955977080804243, "learning_rate": 3.888530150277121e-06, "loss": 0.2753, "step": 9290 }, { "epoch": 0.58, "grad_norm": 1.8290332204823654, "learning_rate": 3.887537179429603e-06, "loss": 0.2724, "step": 9291 }, { "epoch": 0.58, "grad_norm": 2.2165372570674795, "learning_rate": 3.886544254740141e-06, "loss": 0.2516, "step": 9292 }, { "epoch": 0.58, "grad_norm": 1.6392012056535392, "learning_rate": 3.885551376249936e-06, "loss": 0.2578, "step": 9293 }, { "epoch": 0.58, "grad_norm": 4.667023314668314, "learning_rate": 3.884558544000184e-06, "loss": 0.2903, "step": 9294 }, { "epoch": 0.58, "grad_norm": 7.238751676907104, "learning_rate": 3.883565758032081e-06, "loss": 0.2625, "step": 9295 }, { "epoch": 0.58, "grad_norm": 2.480637919262386, "learning_rate": 3.882573018386816e-06, "loss": 0.2831, "step": 9296 }, { "epoch": 0.58, "grad_norm": 4.551646106426363, "learning_rate": 3.8815803251055826e-06, "loss": 0.274, "step": 9297 }, { "epoch": 0.58, "grad_norm": 1.5964922109046245, "learning_rate": 3.880587678229567e-06, "loss": 0.2902, "step": 9298 }, { "epoch": 0.58, "grad_norm": 2.608368448008926, "learning_rate": 3.879595077799958e-06, "loss": 0.2754, "step": 9299 }, { "epoch": 0.58, "grad_norm": 1.9146461555348528, "learning_rate": 3.878602523857938e-06, "loss": 0.27, "step": 9300 }, { "epoch": 0.58, "grad_norm": 1.861027720405857, "learning_rate": 3.8776100164446905e-06, "loss": 0.2795, "step": 9301 }, { "epoch": 0.58, "grad_norm": 2.5705217704095804, "learning_rate": 3.876617555601398e-06, "loss": 0.2834, "step": 9302 }, { "epoch": 0.59, "grad_norm": 1.8548233970260224, "learning_rate": 3.875625141369239e-06, "loss": 0.2783, "step": 9303 }, { "epoch": 0.59, "grad_norm": 3.3720798077685035, "learning_rate": 3.874632773789389e-06, "loss": 0.2749, "step": 9304 }, { "epoch": 0.59, "grad_norm": 2.3129696441353427, "learning_rate": 3.8736404529030255e-06, "loss": 0.2564, "step": 9305 }, { "epoch": 0.59, "grad_norm": 2.6769039219375834, "learning_rate": 3.87264817875132e-06, "loss": 0.2874, "step": 9306 }, { "epoch": 0.59, "grad_norm": 0.600291436389585, "learning_rate": 3.871655951375443e-06, "loss": 0.5002, "step": 9307 }, { "epoch": 0.59, "grad_norm": 3.9061199034288974, "learning_rate": 3.870663770816563e-06, "loss": 0.2943, "step": 9308 }, { "epoch": 0.59, "grad_norm": 2.855016061376826, "learning_rate": 3.869671637115853e-06, "loss": 0.2887, "step": 9309 }, { "epoch": 0.59, "grad_norm": 1.925621228706962, "learning_rate": 3.868679550314472e-06, "loss": 0.2769, "step": 9310 }, { "epoch": 0.59, "grad_norm": 3.4470475541665326, "learning_rate": 3.867687510453587e-06, "loss": 0.2895, "step": 9311 }, { "epoch": 0.59, "grad_norm": 2.4803229483520712, "learning_rate": 3.866695517574358e-06, "loss": 0.2755, "step": 9312 }, { "epoch": 0.59, "grad_norm": 2.0185927478834387, "learning_rate": 3.865703571717946e-06, "loss": 0.3111, "step": 9313 }, { "epoch": 0.59, "grad_norm": 1.425685119607202, "learning_rate": 3.864711672925506e-06, "loss": 0.2812, "step": 9314 }, { "epoch": 0.59, "grad_norm": 1.6278036288086817, "learning_rate": 3.863719821238196e-06, "loss": 0.2859, "step": 9315 }, { "epoch": 0.59, "grad_norm": 6.178543980447864, "learning_rate": 3.862728016697167e-06, "loss": 0.2752, "step": 9316 }, { "epoch": 0.59, "grad_norm": 2.0612224909353554, "learning_rate": 3.8617362593435745e-06, "loss": 0.2922, "step": 9317 }, { "epoch": 0.59, "grad_norm": 1.5189909760652602, "learning_rate": 3.860744549218566e-06, "loss": 0.2694, "step": 9318 }, { "epoch": 0.59, "grad_norm": 4.038894841179102, "learning_rate": 3.85975288636329e-06, "loss": 0.2928, "step": 9319 }, { "epoch": 0.59, "grad_norm": 2.6361349039783613, "learning_rate": 3.858761270818892e-06, "loss": 0.2979, "step": 9320 }, { "epoch": 0.59, "grad_norm": 2.7192020723611874, "learning_rate": 3.857769702626516e-06, "loss": 0.3063, "step": 9321 }, { "epoch": 0.59, "grad_norm": 4.736338572412326, "learning_rate": 3.8567781818273034e-06, "loss": 0.285, "step": 9322 }, { "epoch": 0.59, "grad_norm": 1.5403393409699893, "learning_rate": 3.855786708462394e-06, "loss": 0.2597, "step": 9323 }, { "epoch": 0.59, "grad_norm": 5.53307827225536, "learning_rate": 3.854795282572926e-06, "loss": 0.2653, "step": 9324 }, { "epoch": 0.59, "grad_norm": 1.7944944551201483, "learning_rate": 3.853803904200039e-06, "loss": 0.253, "step": 9325 }, { "epoch": 0.59, "grad_norm": 1.586030407716853, "learning_rate": 3.852812573384861e-06, "loss": 0.2652, "step": 9326 }, { "epoch": 0.59, "grad_norm": 2.6646869134092412, "learning_rate": 3.851821290168528e-06, "loss": 0.2666, "step": 9327 }, { "epoch": 0.59, "grad_norm": 2.4260893578703877, "learning_rate": 3.85083005459217e-06, "loss": 0.2783, "step": 9328 }, { "epoch": 0.59, "grad_norm": 3.8494398011020112, "learning_rate": 3.8498388666969134e-06, "loss": 0.2729, "step": 9329 }, { "epoch": 0.59, "grad_norm": 2.1302486781491954, "learning_rate": 3.848847726523885e-06, "loss": 0.2606, "step": 9330 }, { "epoch": 0.59, "grad_norm": 3.996773571331155, "learning_rate": 3.847856634114207e-06, "loss": 0.2671, "step": 9331 }, { "epoch": 0.59, "grad_norm": 1.847318593175028, "learning_rate": 3.846865589509006e-06, "loss": 0.2897, "step": 9332 }, { "epoch": 0.59, "grad_norm": 2.1254200614702987, "learning_rate": 3.8458745927494e-06, "loss": 0.275, "step": 9333 }, { "epoch": 0.59, "grad_norm": 4.608875024556574, "learning_rate": 3.844883643876507e-06, "loss": 0.2774, "step": 9334 }, { "epoch": 0.59, "grad_norm": 1.9053440485058792, "learning_rate": 3.843892742931443e-06, "loss": 0.2788, "step": 9335 }, { "epoch": 0.59, "grad_norm": 3.9981418936468147, "learning_rate": 3.842901889955322e-06, "loss": 0.2995, "step": 9336 }, { "epoch": 0.59, "grad_norm": 2.1693773742322167, "learning_rate": 3.841911084989259e-06, "loss": 0.2745, "step": 9337 }, { "epoch": 0.59, "grad_norm": 2.9074893159153605, "learning_rate": 3.840920328074358e-06, "loss": 0.2681, "step": 9338 }, { "epoch": 0.59, "grad_norm": 3.941526961208954, "learning_rate": 3.839929619251734e-06, "loss": 0.2647, "step": 9339 }, { "epoch": 0.59, "grad_norm": 1.8735502187969488, "learning_rate": 3.838938958562491e-06, "loss": 0.2893, "step": 9340 }, { "epoch": 0.59, "grad_norm": 1.9307053582966782, "learning_rate": 3.837948346047733e-06, "loss": 0.2689, "step": 9341 }, { "epoch": 0.59, "grad_norm": 1.8708470553193515, "learning_rate": 3.836957781748562e-06, "loss": 0.2788, "step": 9342 }, { "epoch": 0.59, "grad_norm": 3.1371577226420304, "learning_rate": 3.835967265706078e-06, "loss": 0.2703, "step": 9343 }, { "epoch": 0.59, "grad_norm": 1.7927673673217885, "learning_rate": 3.834976797961379e-06, "loss": 0.2856, "step": 9344 }, { "epoch": 0.59, "grad_norm": 1.538581634819629, "learning_rate": 3.833986378555562e-06, "loss": 0.2729, "step": 9345 }, { "epoch": 0.59, "grad_norm": 5.25168465298363, "learning_rate": 3.832996007529721e-06, "loss": 0.2933, "step": 9346 }, { "epoch": 0.59, "grad_norm": 3.675468828378678, "learning_rate": 3.83200568492495e-06, "loss": 0.2854, "step": 9347 }, { "epoch": 0.59, "grad_norm": 3.805766266755283, "learning_rate": 3.8310154107823375e-06, "loss": 0.2628, "step": 9348 }, { "epoch": 0.59, "grad_norm": 2.571832877111927, "learning_rate": 3.8300251851429715e-06, "loss": 0.2714, "step": 9349 }, { "epoch": 0.59, "grad_norm": 2.098782017288707, "learning_rate": 3.829035008047939e-06, "loss": 0.2877, "step": 9350 }, { "epoch": 0.59, "grad_norm": 2.2974646345997423, "learning_rate": 3.8280448795383245e-06, "loss": 0.267, "step": 9351 }, { "epoch": 0.59, "grad_norm": 4.85193585986451, "learning_rate": 3.827054799655207e-06, "loss": 0.2624, "step": 9352 }, { "epoch": 0.59, "grad_norm": 1.7888315975033644, "learning_rate": 3.82606476843967e-06, "loss": 0.2521, "step": 9353 }, { "epoch": 0.59, "grad_norm": 1.4060659652934158, "learning_rate": 3.825074785932792e-06, "loss": 0.2576, "step": 9354 }, { "epoch": 0.59, "grad_norm": 15.16639271451097, "learning_rate": 3.8240848521756484e-06, "loss": 0.2634, "step": 9355 }, { "epoch": 0.59, "grad_norm": 2.4092035208211344, "learning_rate": 3.823094967209312e-06, "loss": 0.2721, "step": 9356 }, { "epoch": 0.59, "grad_norm": 2.46751038974512, "learning_rate": 3.822105131074857e-06, "loss": 0.2893, "step": 9357 }, { "epoch": 0.59, "grad_norm": 2.020572096130606, "learning_rate": 3.8211153438133515e-06, "loss": 0.2713, "step": 9358 }, { "epoch": 0.59, "grad_norm": 1.5594628901051883, "learning_rate": 3.820125605465864e-06, "loss": 0.2877, "step": 9359 }, { "epoch": 0.59, "grad_norm": 1.5046444553647635, "learning_rate": 3.81913591607346e-06, "loss": 0.2523, "step": 9360 }, { "epoch": 0.59, "grad_norm": 1.8256666327324984, "learning_rate": 3.8181462756772056e-06, "loss": 0.2721, "step": 9361 }, { "epoch": 0.59, "grad_norm": 8.853344318626228, "learning_rate": 3.817156684318161e-06, "loss": 0.2699, "step": 9362 }, { "epoch": 0.59, "grad_norm": 1.6700246115584205, "learning_rate": 3.816167142037388e-06, "loss": 0.2871, "step": 9363 }, { "epoch": 0.59, "grad_norm": 1.9158309584807425, "learning_rate": 3.815177648875941e-06, "loss": 0.2523, "step": 9364 }, { "epoch": 0.59, "grad_norm": 12.271281314895718, "learning_rate": 3.81418820487488e-06, "loss": 0.2864, "step": 9365 }, { "epoch": 0.59, "grad_norm": 0.6268130753348529, "learning_rate": 3.813198810075255e-06, "loss": 0.4973, "step": 9366 }, { "epoch": 0.59, "grad_norm": 2.335047972564448, "learning_rate": 3.8122094645181196e-06, "loss": 0.2704, "step": 9367 }, { "epoch": 0.59, "grad_norm": 9.566688826713257, "learning_rate": 3.811220168244521e-06, "loss": 0.2794, "step": 9368 }, { "epoch": 0.59, "grad_norm": 3.3316070491854632, "learning_rate": 3.8102309212955122e-06, "loss": 0.271, "step": 9369 }, { "epoch": 0.59, "grad_norm": 4.087907916226869, "learning_rate": 3.809241723712135e-06, "loss": 0.2845, "step": 9370 }, { "epoch": 0.59, "grad_norm": 2.2158212205162338, "learning_rate": 3.8082525755354346e-06, "loss": 0.2862, "step": 9371 }, { "epoch": 0.59, "grad_norm": 2.623133242692415, "learning_rate": 3.80726347680645e-06, "loss": 0.2795, "step": 9372 }, { "epoch": 0.59, "grad_norm": 2.954505107845916, "learning_rate": 3.8062744275662237e-06, "loss": 0.2834, "step": 9373 }, { "epoch": 0.59, "grad_norm": 2.1406731505733547, "learning_rate": 3.8052854278557904e-06, "loss": 0.2801, "step": 9374 }, { "epoch": 0.59, "grad_norm": 3.781870275502023, "learning_rate": 3.8042964777161862e-06, "loss": 0.279, "step": 9375 }, { "epoch": 0.59, "grad_norm": 3.0254021780680995, "learning_rate": 3.8033075771884457e-06, "loss": 0.2877, "step": 9376 }, { "epoch": 0.59, "grad_norm": 2.2269111364961764, "learning_rate": 3.8023187263136008e-06, "loss": 0.2835, "step": 9377 }, { "epoch": 0.59, "grad_norm": 0.5921458140843855, "learning_rate": 3.8013299251326775e-06, "loss": 0.4999, "step": 9378 }, { "epoch": 0.59, "grad_norm": 3.241316463523188, "learning_rate": 3.800341173686706e-06, "loss": 0.2681, "step": 9379 }, { "epoch": 0.59, "grad_norm": 2.1789631352634182, "learning_rate": 3.79935247201671e-06, "loss": 0.2764, "step": 9380 }, { "epoch": 0.59, "grad_norm": 1.931271919058685, "learning_rate": 3.7983638201637124e-06, "loss": 0.2557, "step": 9381 }, { "epoch": 0.59, "grad_norm": 7.297751134830079, "learning_rate": 3.7973752181687336e-06, "loss": 0.2736, "step": 9382 }, { "epoch": 0.59, "grad_norm": 1.4650614200000651, "learning_rate": 3.7963866660727918e-06, "loss": 0.2677, "step": 9383 }, { "epoch": 0.59, "grad_norm": 1.5135719897702828, "learning_rate": 3.7953981639169063e-06, "loss": 0.2601, "step": 9384 }, { "epoch": 0.59, "grad_norm": 2.4514162525073404, "learning_rate": 3.7944097117420913e-06, "loss": 0.2797, "step": 9385 }, { "epoch": 0.59, "grad_norm": 4.396225022896346, "learning_rate": 3.793421309589358e-06, "loss": 0.2727, "step": 9386 }, { "epoch": 0.59, "grad_norm": 1.50773517879765, "learning_rate": 3.792432957499719e-06, "loss": 0.2707, "step": 9387 }, { "epoch": 0.59, "grad_norm": 1.8732056382137225, "learning_rate": 3.7914446555141805e-06, "loss": 0.2551, "step": 9388 }, { "epoch": 0.59, "grad_norm": 0.6133073243689083, "learning_rate": 3.7904564036737502e-06, "loss": 0.5252, "step": 9389 }, { "epoch": 0.59, "grad_norm": 1.7486217173572725, "learning_rate": 3.7894682020194296e-06, "loss": 0.2639, "step": 9390 }, { "epoch": 0.59, "grad_norm": 2.292660267779196, "learning_rate": 3.788480050592226e-06, "loss": 0.2578, "step": 9391 }, { "epoch": 0.59, "grad_norm": 1.8630531458317638, "learning_rate": 3.7874919494331363e-06, "loss": 0.268, "step": 9392 }, { "epoch": 0.59, "grad_norm": 2.87811553970909, "learning_rate": 3.78650389858316e-06, "loss": 0.2967, "step": 9393 }, { "epoch": 0.59, "grad_norm": 1.5650994855817189, "learning_rate": 3.7855158980832924e-06, "loss": 0.2624, "step": 9394 }, { "epoch": 0.59, "grad_norm": 1.4322746388884324, "learning_rate": 3.7845279479745277e-06, "loss": 0.2668, "step": 9395 }, { "epoch": 0.59, "grad_norm": 2.359742918628945, "learning_rate": 3.7835400482978565e-06, "loss": 0.2761, "step": 9396 }, { "epoch": 0.59, "grad_norm": 1.5529438969965241, "learning_rate": 3.78255219909427e-06, "loss": 0.2994, "step": 9397 }, { "epoch": 0.59, "grad_norm": 5.203903238239566, "learning_rate": 3.781564400404752e-06, "loss": 0.2731, "step": 9398 }, { "epoch": 0.59, "grad_norm": 3.66217503373457, "learning_rate": 3.780576652270295e-06, "loss": 0.2969, "step": 9399 }, { "epoch": 0.59, "grad_norm": 2.2899445082648175, "learning_rate": 3.7795889547318764e-06, "loss": 0.2672, "step": 9400 }, { "epoch": 0.59, "grad_norm": 21.069135078016973, "learning_rate": 3.778601307830482e-06, "loss": 0.2834, "step": 9401 }, { "epoch": 0.59, "grad_norm": 1.6697055773900062, "learning_rate": 3.777613711607087e-06, "loss": 0.2886, "step": 9402 }, { "epoch": 0.59, "grad_norm": 1.8849297878403781, "learning_rate": 3.776626166102672e-06, "loss": 0.2937, "step": 9403 }, { "epoch": 0.59, "grad_norm": 4.233006061379948, "learning_rate": 3.7756386713582086e-06, "loss": 0.2789, "step": 9404 }, { "epoch": 0.59, "grad_norm": 1.848638240209906, "learning_rate": 3.7746512274146707e-06, "loss": 0.2647, "step": 9405 }, { "epoch": 0.59, "grad_norm": 1.6240572167892289, "learning_rate": 3.773663834313031e-06, "loss": 0.2654, "step": 9406 }, { "epoch": 0.59, "grad_norm": 1.6588593622644934, "learning_rate": 3.7726764920942584e-06, "loss": 0.2684, "step": 9407 }, { "epoch": 0.59, "grad_norm": 2.4118707001780626, "learning_rate": 3.7716892007993166e-06, "loss": 0.2892, "step": 9408 }, { "epoch": 0.59, "grad_norm": 2.497149809190493, "learning_rate": 3.7707019604691725e-06, "loss": 0.254, "step": 9409 }, { "epoch": 0.59, "grad_norm": 8.524310881558819, "learning_rate": 3.7697147711447867e-06, "loss": 0.2731, "step": 9410 }, { "epoch": 0.59, "grad_norm": 1.8771781720957033, "learning_rate": 3.7687276328671215e-06, "loss": 0.2852, "step": 9411 }, { "epoch": 0.59, "grad_norm": 6.266244121038863, "learning_rate": 3.7677405456771325e-06, "loss": 0.2612, "step": 9412 }, { "epoch": 0.59, "grad_norm": 3.647449996299619, "learning_rate": 3.766753509615776e-06, "loss": 0.2778, "step": 9413 }, { "epoch": 0.59, "grad_norm": 2.6143241562048347, "learning_rate": 3.7657665247240083e-06, "loss": 0.2749, "step": 9414 }, { "epoch": 0.59, "grad_norm": 6.357830174106946, "learning_rate": 3.76477959104278e-06, "loss": 0.2874, "step": 9415 }, { "epoch": 0.59, "grad_norm": 2.6185836244562273, "learning_rate": 3.7637927086130398e-06, "loss": 0.3081, "step": 9416 }, { "epoch": 0.59, "grad_norm": 2.9563248475437076, "learning_rate": 3.762805877475737e-06, "loss": 0.2863, "step": 9417 }, { "epoch": 0.59, "grad_norm": 1.730649850736178, "learning_rate": 3.761819097671815e-06, "loss": 0.2695, "step": 9418 }, { "epoch": 0.59, "grad_norm": 2.1894437178596085, "learning_rate": 3.760832369242219e-06, "loss": 0.2765, "step": 9419 }, { "epoch": 0.59, "grad_norm": 2.5623841114549686, "learning_rate": 3.7598456922278855e-06, "loss": 0.2895, "step": 9420 }, { "epoch": 0.59, "grad_norm": 1.4984246170610744, "learning_rate": 3.75885906666976e-06, "loss": 0.2524, "step": 9421 }, { "epoch": 0.59, "grad_norm": 1.983242143886795, "learning_rate": 3.757872492608775e-06, "loss": 0.2677, "step": 9422 }, { "epoch": 0.59, "grad_norm": 1.8807899664735235, "learning_rate": 3.7568859700858685e-06, "loss": 0.292, "step": 9423 }, { "epoch": 0.59, "grad_norm": 1.7306002817121755, "learning_rate": 3.7558994991419694e-06, "loss": 0.2591, "step": 9424 }, { "epoch": 0.59, "grad_norm": 2.767129311082147, "learning_rate": 3.754913079818012e-06, "loss": 0.3087, "step": 9425 }, { "epoch": 0.59, "grad_norm": 3.1372657968123443, "learning_rate": 3.75392671215492e-06, "loss": 0.2889, "step": 9426 }, { "epoch": 0.59, "grad_norm": 1.8084331556717348, "learning_rate": 3.7529403961936217e-06, "loss": 0.2693, "step": 9427 }, { "epoch": 0.59, "grad_norm": 2.3941833346339667, "learning_rate": 3.751954131975042e-06, "loss": 0.2737, "step": 9428 }, { "epoch": 0.59, "grad_norm": 1.6733878253655239, "learning_rate": 3.7509679195401033e-06, "loss": 0.2736, "step": 9429 }, { "epoch": 0.59, "grad_norm": 3.475572726616073, "learning_rate": 3.749981758929724e-06, "loss": 0.2622, "step": 9430 }, { "epoch": 0.59, "grad_norm": 0.5976562643264937, "learning_rate": 3.7489956501848214e-06, "loss": 0.4603, "step": 9431 }, { "epoch": 0.59, "grad_norm": 2.140521257382837, "learning_rate": 3.748009593346311e-06, "loss": 0.2665, "step": 9432 }, { "epoch": 0.59, "grad_norm": 2.2425698579874407, "learning_rate": 3.7470235884551077e-06, "loss": 0.2951, "step": 9433 }, { "epoch": 0.59, "grad_norm": 2.6330897447653205, "learning_rate": 3.74603763555212e-06, "loss": 0.2741, "step": 9434 }, { "epoch": 0.59, "grad_norm": 1.7797622857168383, "learning_rate": 3.7450517346782565e-06, "loss": 0.2763, "step": 9435 }, { "epoch": 0.59, "grad_norm": 2.9628850449714346, "learning_rate": 3.7440658858744274e-06, "loss": 0.2518, "step": 9436 }, { "epoch": 0.59, "grad_norm": 5.0060470636898975, "learning_rate": 3.7430800891815356e-06, "loss": 0.2906, "step": 9437 }, { "epoch": 0.59, "grad_norm": 1.486718453980096, "learning_rate": 3.7420943446404835e-06, "loss": 0.2733, "step": 9438 }, { "epoch": 0.59, "grad_norm": 3.0856812294426126, "learning_rate": 3.7411086522921714e-06, "loss": 0.2784, "step": 9439 }, { "epoch": 0.59, "grad_norm": 1.869901731710971, "learning_rate": 3.7401230121774966e-06, "loss": 0.2764, "step": 9440 }, { "epoch": 0.59, "grad_norm": 4.112262487038523, "learning_rate": 3.739137424337357e-06, "loss": 0.2543, "step": 9441 }, { "epoch": 0.59, "grad_norm": 8.635283562506698, "learning_rate": 3.7381518888126434e-06, "loss": 0.2939, "step": 9442 }, { "epoch": 0.59, "grad_norm": 10.366542969840427, "learning_rate": 3.73716640564425e-06, "loss": 0.3017, "step": 9443 }, { "epoch": 0.59, "grad_norm": 1.4567349436988268, "learning_rate": 3.736180974873065e-06, "loss": 0.2892, "step": 9444 }, { "epoch": 0.59, "grad_norm": 7.916334200846063, "learning_rate": 3.735195596539978e-06, "loss": 0.2974, "step": 9445 }, { "epoch": 0.59, "grad_norm": 1.306518722634645, "learning_rate": 3.734210270685871e-06, "loss": 0.2655, "step": 9446 }, { "epoch": 0.59, "grad_norm": 2.316169047949291, "learning_rate": 3.733224997351629e-06, "loss": 0.2803, "step": 9447 }, { "epoch": 0.59, "grad_norm": 4.0936376291264605, "learning_rate": 3.7322397765781304e-06, "loss": 0.2902, "step": 9448 }, { "epoch": 0.59, "grad_norm": 5.853870498108596, "learning_rate": 3.731254608406257e-06, "loss": 0.2482, "step": 9449 }, { "epoch": 0.59, "grad_norm": 5.200638253883915, "learning_rate": 3.730269492876881e-06, "loss": 0.2712, "step": 9450 }, { "epoch": 0.59, "grad_norm": 1.4939312279906427, "learning_rate": 3.7292844300308808e-06, "loss": 0.2799, "step": 9451 }, { "epoch": 0.59, "grad_norm": 2.0311724594938396, "learning_rate": 3.728299419909126e-06, "loss": 0.281, "step": 9452 }, { "epoch": 0.59, "grad_norm": 2.0709075397930206, "learning_rate": 3.7273144625524883e-06, "loss": 0.2861, "step": 9453 }, { "epoch": 0.59, "grad_norm": 1.3837120508819816, "learning_rate": 3.7263295580018326e-06, "loss": 0.2674, "step": 9454 }, { "epoch": 0.59, "grad_norm": 2.591818620406059, "learning_rate": 3.7253447062980276e-06, "loss": 0.2651, "step": 9455 }, { "epoch": 0.59, "grad_norm": 2.938159348620676, "learning_rate": 3.724359907481933e-06, "loss": 0.2679, "step": 9456 }, { "epoch": 0.59, "grad_norm": 4.725483164376001, "learning_rate": 3.7233751615944115e-06, "loss": 0.2777, "step": 9457 }, { "epoch": 0.59, "grad_norm": 3.838041008716286, "learning_rate": 3.7223904686763222e-06, "loss": 0.2736, "step": 9458 }, { "epoch": 0.59, "grad_norm": 1.664735300992799, "learning_rate": 3.7214058287685228e-06, "loss": 0.2697, "step": 9459 }, { "epoch": 0.59, "grad_norm": 6.254194483518903, "learning_rate": 3.7204212419118656e-06, "loss": 0.2801, "step": 9460 }, { "epoch": 0.59, "grad_norm": 1.681588347728504, "learning_rate": 3.719436708147206e-06, "loss": 0.2703, "step": 9461 }, { "epoch": 0.6, "grad_norm": 1.4553635322149021, "learning_rate": 3.71845222751539e-06, "loss": 0.2541, "step": 9462 }, { "epoch": 0.6, "grad_norm": 1.7166927046328586, "learning_rate": 3.7174678000572684e-06, "loss": 0.2831, "step": 9463 }, { "epoch": 0.6, "grad_norm": 3.008685742866287, "learning_rate": 3.7164834258136847e-06, "loss": 0.2794, "step": 9464 }, { "epoch": 0.6, "grad_norm": 2.4774342221177545, "learning_rate": 3.7154991048254823e-06, "loss": 0.2678, "step": 9465 }, { "epoch": 0.6, "grad_norm": 1.443197475780415, "learning_rate": 3.7145148371335048e-06, "loss": 0.2886, "step": 9466 }, { "epoch": 0.6, "grad_norm": 2.467522895981137, "learning_rate": 3.713530622778591e-06, "loss": 0.2793, "step": 9467 }, { "epoch": 0.6, "grad_norm": 1.8569011603306602, "learning_rate": 3.712546461801576e-06, "loss": 0.301, "step": 9468 }, { "epoch": 0.6, "grad_norm": 2.245168060416518, "learning_rate": 3.7115623542432956e-06, "loss": 0.2734, "step": 9469 }, { "epoch": 0.6, "grad_norm": 4.025178619022494, "learning_rate": 3.7105783001445806e-06, "loss": 0.2636, "step": 9470 }, { "epoch": 0.6, "grad_norm": 1.2278134931293527, "learning_rate": 3.7095942995462636e-06, "loss": 0.2647, "step": 9471 }, { "epoch": 0.6, "grad_norm": 2.1190219662361325, "learning_rate": 3.7086103524891685e-06, "loss": 0.2782, "step": 9472 }, { "epoch": 0.6, "grad_norm": 2.2340393525549143, "learning_rate": 3.7076264590141254e-06, "loss": 0.2719, "step": 9473 }, { "epoch": 0.6, "grad_norm": 1.9674372597394898, "learning_rate": 3.706642619161955e-06, "loss": 0.2818, "step": 9474 }, { "epoch": 0.6, "grad_norm": 2.1223506864169996, "learning_rate": 3.7056588329734824e-06, "loss": 0.285, "step": 9475 }, { "epoch": 0.6, "grad_norm": 2.1125003844713035, "learning_rate": 3.704675100489521e-06, "loss": 0.272, "step": 9476 }, { "epoch": 0.6, "grad_norm": 4.984208235662652, "learning_rate": 3.7036914217508925e-06, "loss": 0.2669, "step": 9477 }, { "epoch": 0.6, "grad_norm": 2.0854133214183492, "learning_rate": 3.7027077967984082e-06, "loss": 0.293, "step": 9478 }, { "epoch": 0.6, "grad_norm": 1.723829858868617, "learning_rate": 3.7017242256728823e-06, "loss": 0.2886, "step": 9479 }, { "epoch": 0.6, "grad_norm": 3.370612351249497, "learning_rate": 3.7007407084151227e-06, "loss": 0.2492, "step": 9480 }, { "epoch": 0.6, "grad_norm": 1.740248805408432, "learning_rate": 3.6997572450659393e-06, "loss": 0.2811, "step": 9481 }, { "epoch": 0.6, "grad_norm": 0.5811140238654917, "learning_rate": 3.6987738356661377e-06, "loss": 0.4914, "step": 9482 }, { "epoch": 0.6, "grad_norm": 5.374049072434476, "learning_rate": 3.6977904802565224e-06, "loss": 0.2781, "step": 9483 }, { "epoch": 0.6, "grad_norm": 1.8660932853686598, "learning_rate": 3.6968071788778915e-06, "loss": 0.2557, "step": 9484 }, { "epoch": 0.6, "grad_norm": 1.9796606495350515, "learning_rate": 3.6958239315710467e-06, "loss": 0.2699, "step": 9485 }, { "epoch": 0.6, "grad_norm": 4.372045014189948, "learning_rate": 3.6948407383767814e-06, "loss": 0.2952, "step": 9486 }, { "epoch": 0.6, "grad_norm": 2.1563314601829284, "learning_rate": 3.693857599335892e-06, "loss": 0.2675, "step": 9487 }, { "epoch": 0.6, "grad_norm": 2.778049127970359, "learning_rate": 3.6928745144891733e-06, "loss": 0.2735, "step": 9488 }, { "epoch": 0.6, "grad_norm": 2.650237607707643, "learning_rate": 3.6918914838774112e-06, "loss": 0.2921, "step": 9489 }, { "epoch": 0.6, "grad_norm": 3.5079599585202224, "learning_rate": 3.6909085075413944e-06, "loss": 0.2912, "step": 9490 }, { "epoch": 0.6, "grad_norm": 2.219577893499021, "learning_rate": 3.6899255855219103e-06, "loss": 0.2749, "step": 9491 }, { "epoch": 0.6, "grad_norm": 2.6408530900837763, "learning_rate": 3.688942717859739e-06, "loss": 0.2782, "step": 9492 }, { "epoch": 0.6, "grad_norm": 1.8833484663984135, "learning_rate": 3.687959904595665e-06, "loss": 0.2658, "step": 9493 }, { "epoch": 0.6, "grad_norm": 1.4750824970868708, "learning_rate": 3.686977145770464e-06, "loss": 0.2938, "step": 9494 }, { "epoch": 0.6, "grad_norm": 1.5744418776824995, "learning_rate": 3.6859944414249115e-06, "loss": 0.2761, "step": 9495 }, { "epoch": 0.6, "grad_norm": 3.3620626512794933, "learning_rate": 3.685011791599786e-06, "loss": 0.2546, "step": 9496 }, { "epoch": 0.6, "grad_norm": 2.194208138257682, "learning_rate": 3.6840291963358564e-06, "loss": 0.2784, "step": 9497 }, { "epoch": 0.6, "grad_norm": 3.2014091159418414, "learning_rate": 3.683046655673892e-06, "loss": 0.2624, "step": 9498 }, { "epoch": 0.6, "grad_norm": 2.0876981272408086, "learning_rate": 3.682064169654663e-06, "loss": 0.2714, "step": 9499 }, { "epoch": 0.6, "grad_norm": 1.9262082312810136, "learning_rate": 3.681081738318931e-06, "loss": 0.2787, "step": 9500 }, { "epoch": 0.6, "grad_norm": 1.8365381110416983, "learning_rate": 3.680099361707461e-06, "loss": 0.2538, "step": 9501 }, { "epoch": 0.6, "grad_norm": 1.6147746977786728, "learning_rate": 3.6791170398610117e-06, "loss": 0.2692, "step": 9502 }, { "epoch": 0.6, "grad_norm": 1.5471520912199355, "learning_rate": 3.6781347728203433e-06, "loss": 0.3003, "step": 9503 }, { "epoch": 0.6, "grad_norm": 1.770211887379422, "learning_rate": 3.6771525606262106e-06, "loss": 0.2785, "step": 9504 }, { "epoch": 0.6, "grad_norm": 3.170504245820591, "learning_rate": 3.676170403319369e-06, "loss": 0.2679, "step": 9505 }, { "epoch": 0.6, "grad_norm": 1.6214248961645321, "learning_rate": 3.6751883009405677e-06, "loss": 0.2667, "step": 9506 }, { "epoch": 0.6, "grad_norm": 1.5515567298608008, "learning_rate": 3.674206253530558e-06, "loss": 0.2733, "step": 9507 }, { "epoch": 0.6, "grad_norm": 3.007672022318736, "learning_rate": 3.673224261130085e-06, "loss": 0.259, "step": 9508 }, { "epoch": 0.6, "grad_norm": 2.1697933187202643, "learning_rate": 3.6722423237798934e-06, "loss": 0.2869, "step": 9509 }, { "epoch": 0.6, "grad_norm": 4.79572793943694, "learning_rate": 3.671260441520727e-06, "loss": 0.2651, "step": 9510 }, { "epoch": 0.6, "grad_norm": 1.8426392257354633, "learning_rate": 3.6702786143933244e-06, "loss": 0.2844, "step": 9511 }, { "epoch": 0.6, "grad_norm": 1.6975805241094903, "learning_rate": 3.6692968424384246e-06, "loss": 0.2553, "step": 9512 }, { "epoch": 0.6, "grad_norm": 2.6287716843869617, "learning_rate": 3.668315125696763e-06, "loss": 0.2726, "step": 9513 }, { "epoch": 0.6, "grad_norm": 2.325283612108341, "learning_rate": 3.667333464209071e-06, "loss": 0.2788, "step": 9514 }, { "epoch": 0.6, "grad_norm": 4.702951496301354, "learning_rate": 3.6663518580160816e-06, "loss": 0.2747, "step": 9515 }, { "epoch": 0.6, "grad_norm": 2.004179882663363, "learning_rate": 3.665370307158521e-06, "loss": 0.2756, "step": 9516 }, { "epoch": 0.6, "grad_norm": 1.5485402154075512, "learning_rate": 3.6643888116771166e-06, "loss": 0.2737, "step": 9517 }, { "epoch": 0.6, "grad_norm": 1.4038471225670404, "learning_rate": 3.663407371612595e-06, "loss": 0.2727, "step": 9518 }, { "epoch": 0.6, "grad_norm": 1.7067413961004168, "learning_rate": 3.6624259870056745e-06, "loss": 0.2655, "step": 9519 }, { "epoch": 0.6, "grad_norm": 5.277148410551273, "learning_rate": 3.661444657897075e-06, "loss": 0.3208, "step": 9520 }, { "epoch": 0.6, "grad_norm": 1.7869677855628887, "learning_rate": 3.6604633843275155e-06, "loss": 0.2756, "step": 9521 }, { "epoch": 0.6, "grad_norm": 2.4524726043992415, "learning_rate": 3.6594821663377084e-06, "loss": 0.2738, "step": 9522 }, { "epoch": 0.6, "grad_norm": 4.060214208521723, "learning_rate": 3.6585010039683684e-06, "loss": 0.2724, "step": 9523 }, { "epoch": 0.6, "grad_norm": 1.8525163279935382, "learning_rate": 3.6575198972602024e-06, "loss": 0.2647, "step": 9524 }, { "epoch": 0.6, "grad_norm": 2.025691398342604, "learning_rate": 3.6565388462539216e-06, "loss": 0.2776, "step": 9525 }, { "epoch": 0.6, "grad_norm": 1.6410247161687948, "learning_rate": 3.655557850990231e-06, "loss": 0.2577, "step": 9526 }, { "epoch": 0.6, "grad_norm": 3.0813297162239737, "learning_rate": 3.6545769115098324e-06, "loss": 0.2805, "step": 9527 }, { "epoch": 0.6, "grad_norm": 4.382361030620752, "learning_rate": 3.6535960278534273e-06, "loss": 0.2588, "step": 9528 }, { "epoch": 0.6, "grad_norm": 1.6712409287031182, "learning_rate": 3.6526152000617153e-06, "loss": 0.2617, "step": 9529 }, { "epoch": 0.6, "grad_norm": 0.6444792416426703, "learning_rate": 3.651634428175391e-06, "loss": 0.5259, "step": 9530 }, { "epoch": 0.6, "grad_norm": 4.142673954328926, "learning_rate": 3.6506537122351506e-06, "loss": 0.2795, "step": 9531 }, { "epoch": 0.6, "grad_norm": 1.4073711576279704, "learning_rate": 3.6496730522816826e-06, "loss": 0.2696, "step": 9532 }, { "epoch": 0.6, "grad_norm": 1.810236169890428, "learning_rate": 3.6486924483556785e-06, "loss": 0.288, "step": 9533 }, { "epoch": 0.6, "grad_norm": 2.2463740861290313, "learning_rate": 3.6477119004978256e-06, "loss": 0.2631, "step": 9534 }, { "epoch": 0.6, "grad_norm": 0.6096661233310655, "learning_rate": 3.646731408748808e-06, "loss": 0.485, "step": 9535 }, { "epoch": 0.6, "grad_norm": 1.5564148241602025, "learning_rate": 3.6457509731493067e-06, "loss": 0.2662, "step": 9536 }, { "epoch": 0.6, "grad_norm": 2.2205695796794913, "learning_rate": 3.6447705937400046e-06, "loss": 0.2825, "step": 9537 }, { "epoch": 0.6, "grad_norm": 2.3763773775770396, "learning_rate": 3.6437902705615767e-06, "loss": 0.272, "step": 9538 }, { "epoch": 0.6, "grad_norm": 1.6252637185109093, "learning_rate": 3.642810003654699e-06, "loss": 0.285, "step": 9539 }, { "epoch": 0.6, "grad_norm": 4.600352836256971, "learning_rate": 3.6418297930600453e-06, "loss": 0.2809, "step": 9540 }, { "epoch": 0.6, "grad_norm": 1.8982121176160733, "learning_rate": 3.6408496388182857e-06, "loss": 0.2713, "step": 9541 }, { "epoch": 0.6, "grad_norm": 1.8034384724322994, "learning_rate": 3.639869540970089e-06, "loss": 0.261, "step": 9542 }, { "epoch": 0.6, "grad_norm": 2.097463555484217, "learning_rate": 3.6388894995561217e-06, "loss": 0.2792, "step": 9543 }, { "epoch": 0.6, "grad_norm": 1.704476350937524, "learning_rate": 3.637909514617046e-06, "loss": 0.2811, "step": 9544 }, { "epoch": 0.6, "grad_norm": 2.3409169881743384, "learning_rate": 3.6369295861935244e-06, "loss": 0.2808, "step": 9545 }, { "epoch": 0.6, "grad_norm": 3.630134912623273, "learning_rate": 3.6359497143262147e-06, "loss": 0.2964, "step": 9546 }, { "epoch": 0.6, "grad_norm": 2.659050686141471, "learning_rate": 3.6349698990557726e-06, "loss": 0.2805, "step": 9547 }, { "epoch": 0.6, "grad_norm": 3.6016395129545904, "learning_rate": 3.6339901404228563e-06, "loss": 0.2605, "step": 9548 }, { "epoch": 0.6, "grad_norm": 17.524636413876273, "learning_rate": 3.6330104384681146e-06, "loss": 0.2828, "step": 9549 }, { "epoch": 0.6, "grad_norm": 2.1901263874629353, "learning_rate": 3.6320307932321975e-06, "loss": 0.2774, "step": 9550 }, { "epoch": 0.6, "grad_norm": 2.596512830833323, "learning_rate": 3.631051204755754e-06, "loss": 0.2891, "step": 9551 }, { "epoch": 0.6, "grad_norm": 1.7342481675109205, "learning_rate": 3.630071673079426e-06, "loss": 0.278, "step": 9552 }, { "epoch": 0.6, "grad_norm": 2.0497940911905683, "learning_rate": 3.629092198243859e-06, "loss": 0.277, "step": 9553 }, { "epoch": 0.6, "grad_norm": 1.957816654030364, "learning_rate": 3.6281127802896897e-06, "loss": 0.2831, "step": 9554 }, { "epoch": 0.6, "grad_norm": 6.179435272257459, "learning_rate": 3.6271334192575588e-06, "loss": 0.2623, "step": 9555 }, { "epoch": 0.6, "grad_norm": 1.9349456326869183, "learning_rate": 3.626154115188102e-06, "loss": 0.272, "step": 9556 }, { "epoch": 0.6, "grad_norm": 1.9523069537094317, "learning_rate": 3.6251748681219494e-06, "loss": 0.2677, "step": 9557 }, { "epoch": 0.6, "grad_norm": 2.4384347855510473, "learning_rate": 3.6241956780997345e-06, "loss": 0.2821, "step": 9558 }, { "epoch": 0.6, "grad_norm": 1.8262044086859577, "learning_rate": 3.623216545162085e-06, "loss": 0.2821, "step": 9559 }, { "epoch": 0.6, "grad_norm": 1.9774129701399263, "learning_rate": 3.6222374693496252e-06, "loss": 0.2746, "step": 9560 }, { "epoch": 0.6, "grad_norm": 3.2819152854959217, "learning_rate": 3.621258450702982e-06, "loss": 0.3035, "step": 9561 }, { "epoch": 0.6, "grad_norm": 1.9882917236724367, "learning_rate": 3.620279489262772e-06, "loss": 0.2748, "step": 9562 }, { "epoch": 0.6, "grad_norm": 2.247525828893059, "learning_rate": 3.6193005850696173e-06, "loss": 0.2816, "step": 9563 }, { "epoch": 0.6, "grad_norm": 1.8202638992073676, "learning_rate": 3.6183217381641355e-06, "loss": 0.2862, "step": 9564 }, { "epoch": 0.6, "grad_norm": 7.940772928729769, "learning_rate": 3.617342948586937e-06, "loss": 0.2683, "step": 9565 }, { "epoch": 0.6, "grad_norm": 1.7387636580466237, "learning_rate": 3.616364216378636e-06, "loss": 0.2791, "step": 9566 }, { "epoch": 0.6, "grad_norm": 2.1988999742472717, "learning_rate": 3.615385541579842e-06, "loss": 0.2757, "step": 9567 }, { "epoch": 0.6, "grad_norm": 3.2291481414072614, "learning_rate": 3.614406924231161e-06, "loss": 0.2854, "step": 9568 }, { "epoch": 0.6, "grad_norm": 1.9529787782012953, "learning_rate": 3.613428364373196e-06, "loss": 0.2823, "step": 9569 }, { "epoch": 0.6, "grad_norm": 1.7818943939925946, "learning_rate": 3.612449862046553e-06, "loss": 0.2595, "step": 9570 }, { "epoch": 0.6, "grad_norm": 3.4601553066560093, "learning_rate": 3.611471417291829e-06, "loss": 0.2745, "step": 9571 }, { "epoch": 0.6, "grad_norm": 1.6440577796244955, "learning_rate": 3.610493030149623e-06, "loss": 0.2721, "step": 9572 }, { "epoch": 0.6, "grad_norm": 2.678286651123387, "learning_rate": 3.6095147006605293e-06, "loss": 0.2681, "step": 9573 }, { "epoch": 0.6, "grad_norm": 1.7834784881495682, "learning_rate": 3.6085364288651394e-06, "loss": 0.2722, "step": 9574 }, { "epoch": 0.6, "grad_norm": 2.814325177259081, "learning_rate": 3.6075582148040454e-06, "loss": 0.2871, "step": 9575 }, { "epoch": 0.6, "grad_norm": 0.6202310688168078, "learning_rate": 3.606580058517834e-06, "loss": 0.5071, "step": 9576 }, { "epoch": 0.6, "grad_norm": 1.7258981426967315, "learning_rate": 3.6056019600470893e-06, "loss": 0.2909, "step": 9577 }, { "epoch": 0.6, "grad_norm": 1.7696629825994823, "learning_rate": 3.6046239194323983e-06, "loss": 0.264, "step": 9578 }, { "epoch": 0.6, "grad_norm": 1.8065173962302052, "learning_rate": 3.6036459367143388e-06, "loss": 0.2726, "step": 9579 }, { "epoch": 0.6, "grad_norm": 1.2862529438157462, "learning_rate": 3.602668011933489e-06, "loss": 0.276, "step": 9580 }, { "epoch": 0.6, "grad_norm": 2.418036462727418, "learning_rate": 3.6016901451304265e-06, "loss": 0.3053, "step": 9581 }, { "epoch": 0.6, "grad_norm": 2.4270508975282663, "learning_rate": 3.6007123363457232e-06, "loss": 0.2808, "step": 9582 }, { "epoch": 0.6, "grad_norm": 7.536831210253516, "learning_rate": 3.5997345856199506e-06, "loss": 0.2839, "step": 9583 }, { "epoch": 0.6, "grad_norm": 2.9428651752459847, "learning_rate": 3.5987568929936756e-06, "loss": 0.2851, "step": 9584 }, { "epoch": 0.6, "grad_norm": 1.5331140393216478, "learning_rate": 3.5977792585074668e-06, "loss": 0.2546, "step": 9585 }, { "epoch": 0.6, "grad_norm": 3.3586872468600992, "learning_rate": 3.596801682201888e-06, "loss": 0.284, "step": 9586 }, { "epoch": 0.6, "grad_norm": 2.404316042555868, "learning_rate": 3.5958241641174995e-06, "loss": 0.2785, "step": 9587 }, { "epoch": 0.6, "grad_norm": 2.1202416960554027, "learning_rate": 3.5948467042948597e-06, "loss": 0.2809, "step": 9588 }, { "epoch": 0.6, "grad_norm": 2.6431241123399385, "learning_rate": 3.5938693027745276e-06, "loss": 0.2767, "step": 9589 }, { "epoch": 0.6, "grad_norm": 1.3141477584046433, "learning_rate": 3.5928919595970546e-06, "loss": 0.2599, "step": 9590 }, { "epoch": 0.6, "grad_norm": 1.770921993973036, "learning_rate": 3.5919146748029923e-06, "loss": 0.2676, "step": 9591 }, { "epoch": 0.6, "grad_norm": 1.6714197358579923, "learning_rate": 3.590937448432893e-06, "loss": 0.2825, "step": 9592 }, { "epoch": 0.6, "grad_norm": 1.5364436102142878, "learning_rate": 3.589960280527301e-06, "loss": 0.278, "step": 9593 }, { "epoch": 0.6, "grad_norm": 3.229902802414021, "learning_rate": 3.5889831711267618e-06, "loss": 0.2728, "step": 9594 }, { "epoch": 0.6, "grad_norm": 1.7823816207742216, "learning_rate": 3.5880061202718175e-06, "loss": 0.2687, "step": 9595 }, { "epoch": 0.6, "grad_norm": 2.947545949653769, "learning_rate": 3.587029128003006e-06, "loss": 0.2854, "step": 9596 }, { "epoch": 0.6, "grad_norm": 1.9485674708226834, "learning_rate": 3.5860521943608672e-06, "loss": 0.2777, "step": 9597 }, { "epoch": 0.6, "grad_norm": 1.4068436711392398, "learning_rate": 3.5850753193859333e-06, "loss": 0.2802, "step": 9598 }, { "epoch": 0.6, "grad_norm": 2.1895938060414317, "learning_rate": 3.584098503118737e-06, "loss": 0.2553, "step": 9599 }, { "epoch": 0.6, "grad_norm": 2.440780159511413, "learning_rate": 3.5831217455998103e-06, "loss": 0.2854, "step": 9600 }, { "epoch": 0.6, "grad_norm": 2.7029271831529336, "learning_rate": 3.5821450468696772e-06, "loss": 0.2564, "step": 9601 }, { "epoch": 0.6, "grad_norm": 6.233549206608332, "learning_rate": 3.581168406968867e-06, "loss": 0.2702, "step": 9602 }, { "epoch": 0.6, "grad_norm": 2.5303859237555177, "learning_rate": 3.5801918259378975e-06, "loss": 0.2618, "step": 9603 }, { "epoch": 0.6, "grad_norm": 2.476267417080039, "learning_rate": 3.5792153038172916e-06, "loss": 0.2846, "step": 9604 }, { "epoch": 0.6, "grad_norm": 1.717802952989295, "learning_rate": 3.5782388406475675e-06, "loss": 0.2742, "step": 9605 }, { "epoch": 0.6, "grad_norm": 1.9311225811939217, "learning_rate": 3.577262436469237e-06, "loss": 0.2551, "step": 9606 }, { "epoch": 0.6, "grad_norm": 1.5687961319052377, "learning_rate": 3.5762860913228157e-06, "loss": 0.2778, "step": 9607 }, { "epoch": 0.6, "grad_norm": 1.662154331815429, "learning_rate": 3.575309805248815e-06, "loss": 0.2594, "step": 9608 }, { "epoch": 0.6, "grad_norm": 2.770985071755837, "learning_rate": 3.57433357828774e-06, "loss": 0.276, "step": 9609 }, { "epoch": 0.6, "grad_norm": 1.9560992930775103, "learning_rate": 3.5733574104800973e-06, "loss": 0.2923, "step": 9610 }, { "epoch": 0.6, "grad_norm": 2.889595933118841, "learning_rate": 3.5723813018663895e-06, "loss": 0.2866, "step": 9611 }, { "epoch": 0.6, "grad_norm": 1.8200372092854165, "learning_rate": 3.5714052524871166e-06, "loss": 0.2617, "step": 9612 }, { "epoch": 0.6, "grad_norm": 1.854558588932161, "learning_rate": 3.5704292623827786e-06, "loss": 0.2647, "step": 9613 }, { "epoch": 0.6, "grad_norm": 8.259319595869737, "learning_rate": 3.569453331593867e-06, "loss": 0.294, "step": 9614 }, { "epoch": 0.6, "grad_norm": 2.326140397985674, "learning_rate": 3.5684774601608797e-06, "loss": 0.2748, "step": 9615 }, { "epoch": 0.6, "grad_norm": 1.686550438990365, "learning_rate": 3.5675016481243065e-06, "loss": 0.273, "step": 9616 }, { "epoch": 0.6, "grad_norm": 1.8149565447399794, "learning_rate": 3.566525895524632e-06, "loss": 0.2666, "step": 9617 }, { "epoch": 0.6, "grad_norm": 1.734911669419096, "learning_rate": 3.5655502024023447e-06, "loss": 0.2823, "step": 9618 }, { "epoch": 0.6, "grad_norm": 6.464908315509748, "learning_rate": 3.564574568797928e-06, "loss": 0.267, "step": 9619 }, { "epoch": 0.6, "grad_norm": 2.6175812496529116, "learning_rate": 3.5635989947518625e-06, "loss": 0.2653, "step": 9620 }, { "epoch": 0.61, "grad_norm": 2.6728828239123588, "learning_rate": 3.562623480304623e-06, "loss": 0.2904, "step": 9621 }, { "epoch": 0.61, "grad_norm": 4.739819967402756, "learning_rate": 3.5616480254966925e-06, "loss": 0.2685, "step": 9622 }, { "epoch": 0.61, "grad_norm": 3.673540114471451, "learning_rate": 3.5606726303685378e-06, "loss": 0.2773, "step": 9623 }, { "epoch": 0.61, "grad_norm": 1.8132328684780588, "learning_rate": 3.5596972949606333e-06, "loss": 0.2837, "step": 9624 }, { "epoch": 0.61, "grad_norm": 1.9380952332327748, "learning_rate": 3.5587220193134457e-06, "loss": 0.2864, "step": 9625 }, { "epoch": 0.61, "grad_norm": 2.7651821095245928, "learning_rate": 3.5577468034674414e-06, "loss": 0.2706, "step": 9626 }, { "epoch": 0.61, "grad_norm": 2.231627056763443, "learning_rate": 3.5567716474630853e-06, "loss": 0.2685, "step": 9627 }, { "epoch": 0.61, "grad_norm": 1.6114014750004175, "learning_rate": 3.5557965513408353e-06, "loss": 0.2595, "step": 9628 }, { "epoch": 0.61, "grad_norm": 2.01174626091983, "learning_rate": 3.554821515141151e-06, "loss": 0.2746, "step": 9629 }, { "epoch": 0.61, "grad_norm": 2.084811627206056, "learning_rate": 3.553846538904491e-06, "loss": 0.2602, "step": 9630 }, { "epoch": 0.61, "grad_norm": 2.415620312522918, "learning_rate": 3.5528716226713055e-06, "loss": 0.2632, "step": 9631 }, { "epoch": 0.61, "grad_norm": 2.1303684915519185, "learning_rate": 3.551896766482048e-06, "loss": 0.2802, "step": 9632 }, { "epoch": 0.61, "grad_norm": 1.593530392158075, "learning_rate": 3.5509219703771647e-06, "loss": 0.2547, "step": 9633 }, { "epoch": 0.61, "grad_norm": 3.328479410567316, "learning_rate": 3.5499472343971027e-06, "loss": 0.2509, "step": 9634 }, { "epoch": 0.61, "grad_norm": 13.704078756234964, "learning_rate": 3.5489725585823064e-06, "loss": 0.2883, "step": 9635 }, { "epoch": 0.61, "grad_norm": 2.634038761388328, "learning_rate": 3.5479979429732134e-06, "loss": 0.2686, "step": 9636 }, { "epoch": 0.61, "grad_norm": 3.1474182986777586, "learning_rate": 3.5470233876102672e-06, "loss": 0.2815, "step": 9637 }, { "epoch": 0.61, "grad_norm": 1.6120178257820572, "learning_rate": 3.5460488925339013e-06, "loss": 0.2823, "step": 9638 }, { "epoch": 0.61, "grad_norm": 2.6475636322091867, "learning_rate": 3.545074457784549e-06, "loss": 0.2595, "step": 9639 }, { "epoch": 0.61, "grad_norm": 1.5034596261966902, "learning_rate": 3.5441000834026427e-06, "loss": 0.2702, "step": 9640 }, { "epoch": 0.61, "grad_norm": 1.7355213612859415, "learning_rate": 3.543125769428608e-06, "loss": 0.2643, "step": 9641 }, { "epoch": 0.61, "grad_norm": 1.7340420068099016, "learning_rate": 3.542151515902874e-06, "loss": 0.2674, "step": 9642 }, { "epoch": 0.61, "grad_norm": 1.8024617141733321, "learning_rate": 3.5411773228658635e-06, "loss": 0.2716, "step": 9643 }, { "epoch": 0.61, "grad_norm": 2.428585854706941, "learning_rate": 3.5402031903579946e-06, "loss": 0.2707, "step": 9644 }, { "epoch": 0.61, "grad_norm": 3.9927899800573674, "learning_rate": 3.5392291184196903e-06, "loss": 0.2814, "step": 9645 }, { "epoch": 0.61, "grad_norm": 1.8924978471352056, "learning_rate": 3.5382551070913647e-06, "loss": 0.2616, "step": 9646 }, { "epoch": 0.61, "grad_norm": 1.9397480744608648, "learning_rate": 3.5372811564134303e-06, "loss": 0.2586, "step": 9647 }, { "epoch": 0.61, "grad_norm": 2.2187808600142427, "learning_rate": 3.5363072664263e-06, "loss": 0.2868, "step": 9648 }, { "epoch": 0.61, "grad_norm": 5.965577050260435, "learning_rate": 3.5353334371703797e-06, "loss": 0.2712, "step": 9649 }, { "epoch": 0.61, "grad_norm": 2.48875613701296, "learning_rate": 3.5343596686860757e-06, "loss": 0.274, "step": 9650 }, { "epoch": 0.61, "grad_norm": 4.033883317329177, "learning_rate": 3.533385961013793e-06, "loss": 0.2754, "step": 9651 }, { "epoch": 0.61, "grad_norm": 2.7437274630426374, "learning_rate": 3.5324123141939327e-06, "loss": 0.2636, "step": 9652 }, { "epoch": 0.61, "grad_norm": 2.274467485172925, "learning_rate": 3.5314387282668917e-06, "loss": 0.2636, "step": 9653 }, { "epoch": 0.61, "grad_norm": 2.2679854576901377, "learning_rate": 3.530465203273067e-06, "loss": 0.2601, "step": 9654 }, { "epoch": 0.61, "grad_norm": 1.7316515382461843, "learning_rate": 3.5294917392528504e-06, "loss": 0.2671, "step": 9655 }, { "epoch": 0.61, "grad_norm": 1.906097991358715, "learning_rate": 3.5285183362466346e-06, "loss": 0.2932, "step": 9656 }, { "epoch": 0.61, "grad_norm": 2.4206860044962317, "learning_rate": 3.527544994294806e-06, "loss": 0.2917, "step": 9657 }, { "epoch": 0.61, "grad_norm": 1.8644289231604958, "learning_rate": 3.5265717134377496e-06, "loss": 0.3008, "step": 9658 }, { "epoch": 0.61, "grad_norm": 2.173226564855597, "learning_rate": 3.5255984937158505e-06, "loss": 0.2708, "step": 9659 }, { "epoch": 0.61, "grad_norm": 2.0491998347676033, "learning_rate": 3.52462533516949e-06, "loss": 0.2832, "step": 9660 }, { "epoch": 0.61, "grad_norm": 3.1631596140764504, "learning_rate": 3.523652237839044e-06, "loss": 0.2863, "step": 9661 }, { "epoch": 0.61, "grad_norm": 4.586519950366805, "learning_rate": 3.52267920176489e-06, "loss": 0.2811, "step": 9662 }, { "epoch": 0.61, "grad_norm": 2.7611328724501245, "learning_rate": 3.5217062269873986e-06, "loss": 0.2673, "step": 9663 }, { "epoch": 0.61, "grad_norm": 2.0337344588209123, "learning_rate": 3.520733313546942e-06, "loss": 0.2796, "step": 9664 }, { "epoch": 0.61, "grad_norm": 2.3318295373856515, "learning_rate": 3.519760461483888e-06, "loss": 0.2879, "step": 9665 }, { "epoch": 0.61, "grad_norm": 2.1105205949953616, "learning_rate": 3.5187876708386004e-06, "loss": 0.2802, "step": 9666 }, { "epoch": 0.61, "grad_norm": 1.4433968414646754, "learning_rate": 3.517814941651444e-06, "loss": 0.2717, "step": 9667 }, { "epoch": 0.61, "grad_norm": 1.3791071439894624, "learning_rate": 3.5168422739627794e-06, "loss": 0.2735, "step": 9668 }, { "epoch": 0.61, "grad_norm": 2.4088311824071043, "learning_rate": 3.515869667812962e-06, "loss": 0.2816, "step": 9669 }, { "epoch": 0.61, "grad_norm": 2.6487354639853633, "learning_rate": 3.5148971232423495e-06, "loss": 0.2695, "step": 9670 }, { "epoch": 0.61, "grad_norm": 13.471460897138366, "learning_rate": 3.513924640291292e-06, "loss": 0.2724, "step": 9671 }, { "epoch": 0.61, "grad_norm": 1.8466820640785455, "learning_rate": 3.5129522190001407e-06, "loss": 0.2717, "step": 9672 }, { "epoch": 0.61, "grad_norm": 1.8338084218164097, "learning_rate": 3.5119798594092426e-06, "loss": 0.2607, "step": 9673 }, { "epoch": 0.61, "grad_norm": 3.141658710673713, "learning_rate": 3.5110075615589445e-06, "loss": 0.2931, "step": 9674 }, { "epoch": 0.61, "grad_norm": 5.422081082535986, "learning_rate": 3.5100353254895867e-06, "loss": 0.2635, "step": 9675 }, { "epoch": 0.61, "grad_norm": 1.8453361788018223, "learning_rate": 3.509063151241511e-06, "loss": 0.2673, "step": 9676 }, { "epoch": 0.61, "grad_norm": 3.836042848355021, "learning_rate": 3.508091038855052e-06, "loss": 0.2799, "step": 9677 }, { "epoch": 0.61, "grad_norm": 1.6202018431544734, "learning_rate": 3.507118988370547e-06, "loss": 0.2726, "step": 9678 }, { "epoch": 0.61, "grad_norm": 0.5788904459655787, "learning_rate": 3.5061469998283255e-06, "loss": 0.4571, "step": 9679 }, { "epoch": 0.61, "grad_norm": 1.5411428286339852, "learning_rate": 3.5051750732687183e-06, "loss": 0.2745, "step": 9680 }, { "epoch": 0.61, "grad_norm": 1.6994969809747569, "learning_rate": 3.504203208732052e-06, "loss": 0.2718, "step": 9681 }, { "epoch": 0.61, "grad_norm": 3.6459733915683215, "learning_rate": 3.5032314062586525e-06, "loss": 0.2885, "step": 9682 }, { "epoch": 0.61, "grad_norm": 1.7305200495893007, "learning_rate": 3.5022596658888404e-06, "loss": 0.2721, "step": 9683 }, { "epoch": 0.61, "grad_norm": 2.9519146830725447, "learning_rate": 3.501287987662936e-06, "loss": 0.2606, "step": 9684 }, { "epoch": 0.61, "grad_norm": 3.86565487359643, "learning_rate": 3.500316371621253e-06, "loss": 0.2788, "step": 9685 }, { "epoch": 0.61, "grad_norm": 0.6391418581472857, "learning_rate": 3.4993448178041095e-06, "loss": 0.4783, "step": 9686 }, { "epoch": 0.61, "grad_norm": 2.99616721580779, "learning_rate": 3.498373326251814e-06, "loss": 0.2875, "step": 9687 }, { "epoch": 0.61, "grad_norm": 2.8719292714633617, "learning_rate": 3.4974018970046745e-06, "loss": 0.283, "step": 9688 }, { "epoch": 0.61, "grad_norm": 1.9315024307023858, "learning_rate": 3.496430530103001e-06, "loss": 0.274, "step": 9689 }, { "epoch": 0.61, "grad_norm": 1.725138300741341, "learning_rate": 3.4954592255870964e-06, "loss": 0.2675, "step": 9690 }, { "epoch": 0.61, "grad_norm": 1.989168976277709, "learning_rate": 3.4944879834972595e-06, "loss": 0.2755, "step": 9691 }, { "epoch": 0.61, "grad_norm": 0.5705142104128255, "learning_rate": 3.493516803873791e-06, "loss": 0.4812, "step": 9692 }, { "epoch": 0.61, "grad_norm": 2.558959175112957, "learning_rate": 3.492545686756986e-06, "loss": 0.2681, "step": 9693 }, { "epoch": 0.61, "grad_norm": 1.7260435908193597, "learning_rate": 3.4915746321871384e-06, "loss": 0.2738, "step": 9694 }, { "epoch": 0.61, "grad_norm": 2.9122895850254102, "learning_rate": 3.490603640204538e-06, "loss": 0.2647, "step": 9695 }, { "epoch": 0.61, "grad_norm": 1.7505762332473782, "learning_rate": 3.4896327108494723e-06, "loss": 0.2547, "step": 9696 }, { "epoch": 0.61, "grad_norm": 3.4773745456442398, "learning_rate": 3.488661844162229e-06, "loss": 0.2824, "step": 9697 }, { "epoch": 0.61, "grad_norm": 1.4595874996958724, "learning_rate": 3.487691040183091e-06, "loss": 0.2674, "step": 9698 }, { "epoch": 0.61, "grad_norm": 3.7705851454020305, "learning_rate": 3.4867202989523375e-06, "loss": 0.2934, "step": 9699 }, { "epoch": 0.61, "grad_norm": 3.9124997415924883, "learning_rate": 3.4857496205102475e-06, "loss": 0.281, "step": 9700 }, { "epoch": 0.61, "grad_norm": 5.965828772178735, "learning_rate": 3.484779004897094e-06, "loss": 0.2715, "step": 9701 }, { "epoch": 0.61, "grad_norm": 2.1927954447603812, "learning_rate": 3.4838084521531513e-06, "loss": 0.268, "step": 9702 }, { "epoch": 0.61, "grad_norm": 13.349569070821062, "learning_rate": 3.4828379623186883e-06, "loss": 0.2792, "step": 9703 }, { "epoch": 0.61, "grad_norm": 2.1068968748909445, "learning_rate": 3.481867535433974e-06, "loss": 0.2952, "step": 9704 }, { "epoch": 0.61, "grad_norm": 2.076498174164943, "learning_rate": 3.4808971715392724e-06, "loss": 0.2959, "step": 9705 }, { "epoch": 0.61, "grad_norm": 2.3310854290572403, "learning_rate": 3.4799268706748457e-06, "loss": 0.2836, "step": 9706 }, { "epoch": 0.61, "grad_norm": 4.888579570534031, "learning_rate": 3.4789566328809523e-06, "loss": 0.263, "step": 9707 }, { "epoch": 0.61, "grad_norm": 2.048864209009855, "learning_rate": 3.4779864581978506e-06, "loss": 0.2666, "step": 9708 }, { "epoch": 0.61, "grad_norm": 1.963110769760852, "learning_rate": 3.477016346665793e-06, "loss": 0.2819, "step": 9709 }, { "epoch": 0.61, "grad_norm": 3.2173594046331218, "learning_rate": 3.4760462983250327e-06, "loss": 0.2682, "step": 9710 }, { "epoch": 0.61, "grad_norm": 1.6566223404374978, "learning_rate": 3.475076313215817e-06, "loss": 0.2577, "step": 9711 }, { "epoch": 0.61, "grad_norm": 2.672514898123288, "learning_rate": 3.4741063913783958e-06, "loss": 0.2884, "step": 9712 }, { "epoch": 0.61, "grad_norm": 1.78725750672085, "learning_rate": 3.47313653285301e-06, "loss": 0.2536, "step": 9713 }, { "epoch": 0.61, "grad_norm": 2.9106984866986485, "learning_rate": 3.4721667376799022e-06, "loss": 0.2626, "step": 9714 }, { "epoch": 0.61, "grad_norm": 1.8114622629790127, "learning_rate": 3.4711970058993092e-06, "loss": 0.2593, "step": 9715 }, { "epoch": 0.61, "grad_norm": 2.947573439174941, "learning_rate": 3.4702273375514695e-06, "loss": 0.3067, "step": 9716 }, { "epoch": 0.61, "grad_norm": 3.0173391992265914, "learning_rate": 3.469257732676613e-06, "loss": 0.2655, "step": 9717 }, { "epoch": 0.61, "grad_norm": 2.6964881703085593, "learning_rate": 3.468288191314971e-06, "loss": 0.2756, "step": 9718 }, { "epoch": 0.61, "grad_norm": 6.87439735988396, "learning_rate": 3.4673187135067737e-06, "loss": 0.2887, "step": 9719 }, { "epoch": 0.61, "grad_norm": 3.6457496674905365, "learning_rate": 3.4663492992922467e-06, "loss": 0.2737, "step": 9720 }, { "epoch": 0.61, "grad_norm": 5.40900693369108, "learning_rate": 3.4653799487116097e-06, "loss": 0.3005, "step": 9721 }, { "epoch": 0.61, "grad_norm": 1.9989324760989478, "learning_rate": 3.464410661805086e-06, "loss": 0.2814, "step": 9722 }, { "epoch": 0.61, "grad_norm": 2.24778928134871, "learning_rate": 3.46344143861289e-06, "loss": 0.2707, "step": 9723 }, { "epoch": 0.61, "grad_norm": 2.873259969543035, "learning_rate": 3.4624722791752395e-06, "loss": 0.2812, "step": 9724 }, { "epoch": 0.61, "grad_norm": 3.9071851397361037, "learning_rate": 3.4615031835323437e-06, "loss": 0.2688, "step": 9725 }, { "epoch": 0.61, "grad_norm": 4.747612671781315, "learning_rate": 3.4605341517244123e-06, "loss": 0.2671, "step": 9726 }, { "epoch": 0.61, "grad_norm": 4.549920079770489, "learning_rate": 3.4595651837916542e-06, "loss": 0.2821, "step": 9727 }, { "epoch": 0.61, "grad_norm": 2.629429335854162, "learning_rate": 3.4585962797742745e-06, "loss": 0.2571, "step": 9728 }, { "epoch": 0.61, "grad_norm": 3.3913865108676804, "learning_rate": 3.457627439712472e-06, "loss": 0.2669, "step": 9729 }, { "epoch": 0.61, "grad_norm": 2.973275614830469, "learning_rate": 3.456658663646447e-06, "loss": 0.2744, "step": 9730 }, { "epoch": 0.61, "grad_norm": 2.1536431811532277, "learning_rate": 3.4556899516163942e-06, "loss": 0.2558, "step": 9731 }, { "epoch": 0.61, "grad_norm": 2.173750887599849, "learning_rate": 3.4547213036625107e-06, "loss": 0.2679, "step": 9732 }, { "epoch": 0.61, "grad_norm": 3.444293839107353, "learning_rate": 3.4537527198249804e-06, "loss": 0.2827, "step": 9733 }, { "epoch": 0.61, "grad_norm": 3.1970971103190347, "learning_rate": 3.452784200144e-06, "loss": 0.2825, "step": 9734 }, { "epoch": 0.61, "grad_norm": 2.0197608298174905, "learning_rate": 3.45181574465975e-06, "loss": 0.2738, "step": 9735 }, { "epoch": 0.61, "grad_norm": 7.586690916293952, "learning_rate": 3.4508473534124155e-06, "loss": 0.2553, "step": 9736 }, { "epoch": 0.61, "grad_norm": 2.221204728485687, "learning_rate": 3.4498790264421755e-06, "loss": 0.2999, "step": 9737 }, { "epoch": 0.61, "grad_norm": 15.562557774905292, "learning_rate": 3.4489107637892084e-06, "loss": 0.272, "step": 9738 }, { "epoch": 0.61, "grad_norm": 8.060300648749852, "learning_rate": 3.4479425654936883e-06, "loss": 0.2861, "step": 9739 }, { "epoch": 0.61, "grad_norm": 3.095010236330334, "learning_rate": 3.4469744315957863e-06, "loss": 0.2723, "step": 9740 }, { "epoch": 0.61, "grad_norm": 2.1483076401567973, "learning_rate": 3.4460063621356753e-06, "loss": 0.2575, "step": 9741 }, { "epoch": 0.61, "grad_norm": 1.8839660452427271, "learning_rate": 3.445038357153521e-06, "loss": 0.2767, "step": 9742 }, { "epoch": 0.61, "grad_norm": 1.5896504501694708, "learning_rate": 3.4440704166894865e-06, "loss": 0.2677, "step": 9743 }, { "epoch": 0.61, "grad_norm": 1.948613565615765, "learning_rate": 3.443102540783735e-06, "loss": 0.2889, "step": 9744 }, { "epoch": 0.61, "grad_norm": 1.870423470052233, "learning_rate": 3.4421347294764236e-06, "loss": 0.2557, "step": 9745 }, { "epoch": 0.61, "grad_norm": 8.399463870416707, "learning_rate": 3.44116698280771e-06, "loss": 0.2863, "step": 9746 }, { "epoch": 0.61, "grad_norm": 3.383572031556528, "learning_rate": 3.440199300817746e-06, "loss": 0.2531, "step": 9747 }, { "epoch": 0.61, "grad_norm": 2.3395363548219366, "learning_rate": 3.4392316835466834e-06, "loss": 0.2646, "step": 9748 }, { "epoch": 0.61, "grad_norm": 1.4901782868002784, "learning_rate": 3.4382641310346705e-06, "loss": 0.2628, "step": 9749 }, { "epoch": 0.61, "grad_norm": 0.6094170931302001, "learning_rate": 3.437296643321854e-06, "loss": 0.4974, "step": 9750 }, { "epoch": 0.61, "grad_norm": 1.5547683876758882, "learning_rate": 3.4363292204483745e-06, "loss": 0.2522, "step": 9751 }, { "epoch": 0.61, "grad_norm": 13.152238272698963, "learning_rate": 3.435361862454374e-06, "loss": 0.2768, "step": 9752 }, { "epoch": 0.61, "grad_norm": 3.340303677330552, "learning_rate": 3.4343945693799885e-06, "loss": 0.313, "step": 9753 }, { "epoch": 0.61, "grad_norm": 3.167579734046241, "learning_rate": 3.4334273412653534e-06, "loss": 0.2723, "step": 9754 }, { "epoch": 0.61, "grad_norm": 1.9885965037663498, "learning_rate": 3.4324601781505973e-06, "loss": 0.2588, "step": 9755 }, { "epoch": 0.61, "grad_norm": 1.8703308316835587, "learning_rate": 3.431493080075856e-06, "loss": 0.2655, "step": 9756 }, { "epoch": 0.61, "grad_norm": 2.507613676412212, "learning_rate": 3.4305260470812522e-06, "loss": 0.2658, "step": 9757 }, { "epoch": 0.61, "grad_norm": 2.202107476049587, "learning_rate": 3.429559079206911e-06, "loss": 0.2546, "step": 9758 }, { "epoch": 0.61, "grad_norm": 2.5256971890429445, "learning_rate": 3.428592176492952e-06, "loss": 0.2929, "step": 9759 }, { "epoch": 0.61, "grad_norm": 0.6158143000365329, "learning_rate": 3.427625338979496e-06, "loss": 0.4779, "step": 9760 }, { "epoch": 0.61, "grad_norm": 1.7972835015970774, "learning_rate": 3.4266585667066567e-06, "loss": 0.2903, "step": 9761 }, { "epoch": 0.61, "grad_norm": 3.0395101630351227, "learning_rate": 3.4256918597145484e-06, "loss": 0.2757, "step": 9762 }, { "epoch": 0.61, "grad_norm": 2.310466766664242, "learning_rate": 3.424725218043279e-06, "loss": 0.274, "step": 9763 }, { "epoch": 0.61, "grad_norm": 1.3011208309822484, "learning_rate": 3.4237586417329604e-06, "loss": 0.2813, "step": 9764 }, { "epoch": 0.61, "grad_norm": 2.5851888721427545, "learning_rate": 3.4227921308236943e-06, "loss": 0.2658, "step": 9765 }, { "epoch": 0.61, "grad_norm": 2.4594726188202216, "learning_rate": 3.4218256853555864e-06, "loss": 0.2666, "step": 9766 }, { "epoch": 0.61, "grad_norm": 2.282973661879234, "learning_rate": 3.4208593053687323e-06, "loss": 0.263, "step": 9767 }, { "epoch": 0.61, "grad_norm": 2.229247173957106, "learning_rate": 3.419892990903231e-06, "loss": 0.2613, "step": 9768 }, { "epoch": 0.61, "grad_norm": 1.8265981069370543, "learning_rate": 3.4189267419991756e-06, "loss": 0.2859, "step": 9769 }, { "epoch": 0.61, "grad_norm": 1.6153322422561114, "learning_rate": 3.417960558696657e-06, "loss": 0.2768, "step": 9770 }, { "epoch": 0.61, "grad_norm": 2.0136087141017507, "learning_rate": 3.4169944410357657e-06, "loss": 0.285, "step": 9771 }, { "epoch": 0.61, "grad_norm": 2.0237742417858455, "learning_rate": 3.4160283890565877e-06, "loss": 0.2685, "step": 9772 }, { "epoch": 0.61, "grad_norm": 1.7441269897520957, "learning_rate": 3.4150624027992043e-06, "loss": 0.2813, "step": 9773 }, { "epoch": 0.61, "grad_norm": 0.6276477488264848, "learning_rate": 3.4140964823036984e-06, "loss": 0.5328, "step": 9774 }, { "epoch": 0.61, "grad_norm": 2.180546889951866, "learning_rate": 3.413130627610145e-06, "loss": 0.2735, "step": 9775 }, { "epoch": 0.61, "grad_norm": 2.752192174421867, "learning_rate": 3.4121648387586216e-06, "loss": 0.2816, "step": 9776 }, { "epoch": 0.61, "grad_norm": 2.0246969590769095, "learning_rate": 3.411199115789198e-06, "loss": 0.2931, "step": 9777 }, { "epoch": 0.61, "grad_norm": 1.73087687941042, "learning_rate": 3.4102334587419437e-06, "loss": 0.2611, "step": 9778 }, { "epoch": 0.61, "grad_norm": 1.5906763656306007, "learning_rate": 3.4092678676569292e-06, "loss": 0.2669, "step": 9779 }, { "epoch": 0.62, "grad_norm": 2.366577653114579, "learning_rate": 3.408302342574216e-06, "loss": 0.2923, "step": 9780 }, { "epoch": 0.62, "grad_norm": 4.000430447468467, "learning_rate": 3.407336883533866e-06, "loss": 0.282, "step": 9781 }, { "epoch": 0.62, "grad_norm": 2.4360295371800174, "learning_rate": 3.406371490575938e-06, "loss": 0.2631, "step": 9782 }, { "epoch": 0.62, "grad_norm": 0.5623069510926189, "learning_rate": 3.405406163740487e-06, "loss": 0.4546, "step": 9783 }, { "epoch": 0.62, "grad_norm": 1.6064021606481163, "learning_rate": 3.4044409030675663e-06, "loss": 0.2704, "step": 9784 }, { "epoch": 0.62, "grad_norm": 2.2011488028748056, "learning_rate": 3.403475708597225e-06, "loss": 0.2574, "step": 9785 }, { "epoch": 0.62, "grad_norm": 3.1070165081930075, "learning_rate": 3.402510580369514e-06, "loss": 0.2781, "step": 9786 }, { "epoch": 0.62, "grad_norm": 2.156579431733777, "learning_rate": 3.4015455184244763e-06, "loss": 0.2719, "step": 9787 }, { "epoch": 0.62, "grad_norm": 4.668771498571321, "learning_rate": 3.400580522802155e-06, "loss": 0.2711, "step": 9788 }, { "epoch": 0.62, "grad_norm": 1.9583571969718583, "learning_rate": 3.399615593542588e-06, "loss": 0.2656, "step": 9789 }, { "epoch": 0.62, "grad_norm": 14.246863501708958, "learning_rate": 3.398650730685813e-06, "loss": 0.2775, "step": 9790 }, { "epoch": 0.62, "grad_norm": 1.3448309348479337, "learning_rate": 3.3976859342718625e-06, "loss": 0.2746, "step": 9791 }, { "epoch": 0.62, "grad_norm": 1.9250642633814026, "learning_rate": 3.3967212043407705e-06, "loss": 0.2709, "step": 9792 }, { "epoch": 0.62, "grad_norm": 3.1228204801377535, "learning_rate": 3.39575654093256e-06, "loss": 0.2697, "step": 9793 }, { "epoch": 0.62, "grad_norm": 5.445133103556168, "learning_rate": 3.3947919440872627e-06, "loss": 0.289, "step": 9794 }, { "epoch": 0.62, "grad_norm": 1.874824749896127, "learning_rate": 3.3938274138448982e-06, "loss": 0.2524, "step": 9795 }, { "epoch": 0.62, "grad_norm": 2.5459995597759115, "learning_rate": 3.392862950245488e-06, "loss": 0.2621, "step": 9796 }, { "epoch": 0.62, "grad_norm": 2.6170387526796945, "learning_rate": 3.3918985533290484e-06, "loss": 0.2795, "step": 9797 }, { "epoch": 0.62, "grad_norm": 3.712002999552004, "learning_rate": 3.390934223135594e-06, "loss": 0.278, "step": 9798 }, { "epoch": 0.62, "grad_norm": 2.586512750861614, "learning_rate": 3.389969959705136e-06, "loss": 0.2934, "step": 9799 }, { "epoch": 0.62, "grad_norm": 2.2440423120988036, "learning_rate": 3.3890057630776834e-06, "loss": 0.2631, "step": 9800 }, { "epoch": 0.62, "grad_norm": 2.127058916272636, "learning_rate": 3.3880416332932442e-06, "loss": 0.2785, "step": 9801 }, { "epoch": 0.62, "grad_norm": 2.004798853258625, "learning_rate": 3.3870775703918212e-06, "loss": 0.2769, "step": 9802 }, { "epoch": 0.62, "grad_norm": 3.5113578287649134, "learning_rate": 3.3861135744134143e-06, "loss": 0.2554, "step": 9803 }, { "epoch": 0.62, "grad_norm": 1.3826156861765981, "learning_rate": 3.3851496453980225e-06, "loss": 0.2639, "step": 9804 }, { "epoch": 0.62, "grad_norm": 6.885710051223053, "learning_rate": 3.3841857833856396e-06, "loss": 0.2786, "step": 9805 }, { "epoch": 0.62, "grad_norm": 3.0955458839006647, "learning_rate": 3.383221988416259e-06, "loss": 0.2697, "step": 9806 }, { "epoch": 0.62, "grad_norm": 3.7450305018461862, "learning_rate": 3.382258260529869e-06, "loss": 0.2817, "step": 9807 }, { "epoch": 0.62, "grad_norm": 3.645270739428072, "learning_rate": 3.381294599766456e-06, "loss": 0.2641, "step": 9808 }, { "epoch": 0.62, "grad_norm": 1.6665386253635939, "learning_rate": 3.3803310061660067e-06, "loss": 0.2734, "step": 9809 }, { "epoch": 0.62, "grad_norm": 2.422786529716522, "learning_rate": 3.3793674797685016e-06, "loss": 0.2616, "step": 9810 }, { "epoch": 0.62, "grad_norm": 2.273669920485398, "learning_rate": 3.3784040206139175e-06, "loss": 0.2683, "step": 9811 }, { "epoch": 0.62, "grad_norm": 2.0069537499235546, "learning_rate": 3.3774406287422313e-06, "loss": 0.2736, "step": 9812 }, { "epoch": 0.62, "grad_norm": 3.5082467590142463, "learning_rate": 3.376477304193415e-06, "loss": 0.2646, "step": 9813 }, { "epoch": 0.62, "grad_norm": 4.558642561368121, "learning_rate": 3.37551404700744e-06, "loss": 0.2674, "step": 9814 }, { "epoch": 0.62, "grad_norm": 5.327870894682449, "learning_rate": 3.374550857224269e-06, "loss": 0.265, "step": 9815 }, { "epoch": 0.62, "grad_norm": 1.69322909734235, "learning_rate": 3.3735877348838738e-06, "loss": 0.2586, "step": 9816 }, { "epoch": 0.62, "grad_norm": 1.9039186857196588, "learning_rate": 3.372624680026211e-06, "loss": 0.2621, "step": 9817 }, { "epoch": 0.62, "grad_norm": 1.720366894920928, "learning_rate": 3.3716616926912414e-06, "loss": 0.2874, "step": 9818 }, { "epoch": 0.62, "grad_norm": 1.9363342186241619, "learning_rate": 3.37069877291892e-06, "loss": 0.2884, "step": 9819 }, { "epoch": 0.62, "grad_norm": 1.9685218744187376, "learning_rate": 3.369735920749201e-06, "loss": 0.2739, "step": 9820 }, { "epoch": 0.62, "grad_norm": 2.8789735450524714, "learning_rate": 3.3687731362220334e-06, "loss": 0.2632, "step": 9821 }, { "epoch": 0.62, "grad_norm": 1.96679302862232, "learning_rate": 3.3678104193773654e-06, "loss": 0.2771, "step": 9822 }, { "epoch": 0.62, "grad_norm": 1.6051793016702407, "learning_rate": 3.366847770255143e-06, "loss": 0.268, "step": 9823 }, { "epoch": 0.62, "grad_norm": 2.357721907351094, "learning_rate": 3.3658851888953078e-06, "loss": 0.2678, "step": 9824 }, { "epoch": 0.62, "grad_norm": 1.5222149364058855, "learning_rate": 3.364922675337798e-06, "loss": 0.2628, "step": 9825 }, { "epoch": 0.62, "grad_norm": 10.32973395785985, "learning_rate": 3.3639602296225514e-06, "loss": 0.2764, "step": 9826 }, { "epoch": 0.62, "grad_norm": 2.6085685513684953, "learning_rate": 3.3629978517895e-06, "loss": 0.2667, "step": 9827 }, { "epoch": 0.62, "grad_norm": 1.9163977237106014, "learning_rate": 3.3620355418785767e-06, "loss": 0.2632, "step": 9828 }, { "epoch": 0.62, "grad_norm": 1.5173020236245705, "learning_rate": 3.361073299929707e-06, "loss": 0.2714, "step": 9829 }, { "epoch": 0.62, "grad_norm": 1.3939246286157458, "learning_rate": 3.3601111259828155e-06, "loss": 0.2841, "step": 9830 }, { "epoch": 0.62, "grad_norm": 0.6786020369777017, "learning_rate": 3.3591490200778276e-06, "loss": 0.4974, "step": 9831 }, { "epoch": 0.62, "grad_norm": 3.85122913637047, "learning_rate": 3.358186982254662e-06, "loss": 0.27, "step": 9832 }, { "epoch": 0.62, "grad_norm": 5.2280408651590315, "learning_rate": 3.357225012553234e-06, "loss": 0.2727, "step": 9833 }, { "epoch": 0.62, "grad_norm": 2.391669393244888, "learning_rate": 3.356263111013459e-06, "loss": 0.2653, "step": 9834 }, { "epoch": 0.62, "grad_norm": 2.045810554509706, "learning_rate": 3.3553012776752468e-06, "loss": 0.2593, "step": 9835 }, { "epoch": 0.62, "grad_norm": 2.2256173942111057, "learning_rate": 3.354339512578506e-06, "loss": 0.2686, "step": 9836 }, { "epoch": 0.62, "grad_norm": 1.6763608588122259, "learning_rate": 3.3533778157631403e-06, "loss": 0.28, "step": 9837 }, { "epoch": 0.62, "grad_norm": 2.97164642342639, "learning_rate": 3.3524161872690548e-06, "loss": 0.2568, "step": 9838 }, { "epoch": 0.62, "grad_norm": 6.3938486688541465, "learning_rate": 3.351454627136148e-06, "loss": 0.2994, "step": 9839 }, { "epoch": 0.62, "grad_norm": 2.146615352814978, "learning_rate": 3.350493135404318e-06, "loss": 0.2748, "step": 9840 }, { "epoch": 0.62, "grad_norm": 2.1725227018700743, "learning_rate": 3.3495317121134564e-06, "loss": 0.2771, "step": 9841 }, { "epoch": 0.62, "grad_norm": 2.92116124176794, "learning_rate": 3.3485703573034567e-06, "loss": 0.2912, "step": 9842 }, { "epoch": 0.62, "grad_norm": 4.178429621380662, "learning_rate": 3.347609071014205e-06, "loss": 0.2731, "step": 9843 }, { "epoch": 0.62, "grad_norm": 1.61580037997173, "learning_rate": 3.3466478532855896e-06, "loss": 0.2633, "step": 9844 }, { "epoch": 0.62, "grad_norm": 2.3181438531650134, "learning_rate": 3.3456867041574893e-06, "loss": 0.2793, "step": 9845 }, { "epoch": 0.62, "grad_norm": 1.6170399567764744, "learning_rate": 3.344725623669786e-06, "loss": 0.2638, "step": 9846 }, { "epoch": 0.62, "grad_norm": 1.7100224589147104, "learning_rate": 3.343764611862358e-06, "loss": 0.281, "step": 9847 }, { "epoch": 0.62, "grad_norm": 2.716361876494082, "learning_rate": 3.342803668775078e-06, "loss": 0.2834, "step": 9848 }, { "epoch": 0.62, "grad_norm": 3.649937776220787, "learning_rate": 3.341842794447817e-06, "loss": 0.2747, "step": 9849 }, { "epoch": 0.62, "grad_norm": 3.6239701754106592, "learning_rate": 3.340881988920445e-06, "loss": 0.2655, "step": 9850 }, { "epoch": 0.62, "grad_norm": 1.6416178897046088, "learning_rate": 3.339921252232824e-06, "loss": 0.2581, "step": 9851 }, { "epoch": 0.62, "grad_norm": 3.0418941412002503, "learning_rate": 3.3389605844248187e-06, "loss": 0.2735, "step": 9852 }, { "epoch": 0.62, "grad_norm": 3.894837418888004, "learning_rate": 3.33799998553629e-06, "loss": 0.296, "step": 9853 }, { "epoch": 0.62, "grad_norm": 4.081975530778152, "learning_rate": 3.337039455607094e-06, "loss": 0.2669, "step": 9854 }, { "epoch": 0.62, "grad_norm": 2.358622137777047, "learning_rate": 3.336078994677085e-06, "loss": 0.2616, "step": 9855 }, { "epoch": 0.62, "grad_norm": 2.0191263319974504, "learning_rate": 3.3351186027861147e-06, "loss": 0.2834, "step": 9856 }, { "epoch": 0.62, "grad_norm": 4.739503391061953, "learning_rate": 3.334158279974029e-06, "loss": 0.2865, "step": 9857 }, { "epoch": 0.62, "grad_norm": 18.83563435733747, "learning_rate": 3.3331980262806774e-06, "loss": 0.268, "step": 9858 }, { "epoch": 0.62, "grad_norm": 3.471785915960166, "learning_rate": 3.3322378417458985e-06, "loss": 0.2512, "step": 9859 }, { "epoch": 0.62, "grad_norm": 2.12319852461157, "learning_rate": 3.3312777264095325e-06, "loss": 0.2789, "step": 9860 }, { "epoch": 0.62, "grad_norm": 0.603471809159579, "learning_rate": 3.330317680311419e-06, "loss": 0.4489, "step": 9861 }, { "epoch": 0.62, "grad_norm": 0.621121359786358, "learning_rate": 3.3293577034913916e-06, "loss": 0.5016, "step": 9862 }, { "epoch": 0.62, "grad_norm": 1.979352789753031, "learning_rate": 3.3283977959892798e-06, "loss": 0.2651, "step": 9863 }, { "epoch": 0.62, "grad_norm": 1.6987291839572347, "learning_rate": 3.3274379578449133e-06, "loss": 0.2607, "step": 9864 }, { "epoch": 0.62, "grad_norm": 1.9402783902988476, "learning_rate": 3.326478189098116e-06, "loss": 0.2928, "step": 9865 }, { "epoch": 0.62, "grad_norm": 1.6762661068517062, "learning_rate": 3.3255184897887116e-06, "loss": 0.266, "step": 9866 }, { "epoch": 0.62, "grad_norm": 2.020978401809176, "learning_rate": 3.324558859956517e-06, "loss": 0.2607, "step": 9867 }, { "epoch": 0.62, "grad_norm": 1.4115015915816638, "learning_rate": 3.323599299641353e-06, "loss": 0.2877, "step": 9868 }, { "epoch": 0.62, "grad_norm": 4.478799202552657, "learning_rate": 3.3226398088830316e-06, "loss": 0.268, "step": 9869 }, { "epoch": 0.62, "grad_norm": 5.805467424554952, "learning_rate": 3.321680387721364e-06, "loss": 0.2557, "step": 9870 }, { "epoch": 0.62, "grad_norm": 2.4345762879435267, "learning_rate": 3.3207210361961583e-06, "loss": 0.2633, "step": 9871 }, { "epoch": 0.62, "grad_norm": 1.8915575667164246, "learning_rate": 3.3197617543472203e-06, "loss": 0.2677, "step": 9872 }, { "epoch": 0.62, "grad_norm": 3.935309816684142, "learning_rate": 3.3188025422143497e-06, "loss": 0.2802, "step": 9873 }, { "epoch": 0.62, "grad_norm": 4.896504435668146, "learning_rate": 3.3178433998373495e-06, "loss": 0.2717, "step": 9874 }, { "epoch": 0.62, "grad_norm": 1.5406812601018596, "learning_rate": 3.3168843272560127e-06, "loss": 0.257, "step": 9875 }, { "epoch": 0.62, "grad_norm": 2.32919562705976, "learning_rate": 3.315925324510135e-06, "loss": 0.2696, "step": 9876 }, { "epoch": 0.62, "grad_norm": 2.8104804398343184, "learning_rate": 3.3149663916395075e-06, "loss": 0.2703, "step": 9877 }, { "epoch": 0.62, "grad_norm": 2.282822067080632, "learning_rate": 3.314007528683918e-06, "loss": 0.2709, "step": 9878 }, { "epoch": 0.62, "grad_norm": 1.894601814571518, "learning_rate": 3.3130487356831496e-06, "loss": 0.2748, "step": 9879 }, { "epoch": 0.62, "grad_norm": 3.1047171441470285, "learning_rate": 3.312090012676987e-06, "loss": 0.2716, "step": 9880 }, { "epoch": 0.62, "grad_norm": 4.249803689698617, "learning_rate": 3.3111313597052065e-06, "loss": 0.2742, "step": 9881 }, { "epoch": 0.62, "grad_norm": 1.659785378661244, "learning_rate": 3.3101727768075854e-06, "loss": 0.2567, "step": 9882 }, { "epoch": 0.62, "grad_norm": 2.3689414848001045, "learning_rate": 3.3092142640238983e-06, "loss": 0.2664, "step": 9883 }, { "epoch": 0.62, "grad_norm": 1.8799493568687033, "learning_rate": 3.3082558213939142e-06, "loss": 0.2563, "step": 9884 }, { "epoch": 0.62, "grad_norm": 2.0347806542432405, "learning_rate": 3.3072974489574006e-06, "loss": 0.2704, "step": 9885 }, { "epoch": 0.62, "grad_norm": 2.54993934788399, "learning_rate": 3.3063391467541227e-06, "loss": 0.2857, "step": 9886 }, { "epoch": 0.62, "grad_norm": 4.424712384660554, "learning_rate": 3.3053809148238426e-06, "loss": 0.2804, "step": 9887 }, { "epoch": 0.62, "grad_norm": 2.3711495443193664, "learning_rate": 3.3044227532063177e-06, "loss": 0.2611, "step": 9888 }, { "epoch": 0.62, "grad_norm": 7.119978494852936, "learning_rate": 3.3034646619413046e-06, "loss": 0.269, "step": 9889 }, { "epoch": 0.62, "grad_norm": 2.85465927054288, "learning_rate": 3.3025066410685546e-06, "loss": 0.2636, "step": 9890 }, { "epoch": 0.62, "grad_norm": 2.2333958252491697, "learning_rate": 3.3015486906278203e-06, "loss": 0.2562, "step": 9891 }, { "epoch": 0.62, "grad_norm": 2.332761445734929, "learning_rate": 3.300590810658848e-06, "loss": 0.2783, "step": 9892 }, { "epoch": 0.62, "grad_norm": 1.5840899591393893, "learning_rate": 3.299633001201381e-06, "loss": 0.2738, "step": 9893 }, { "epoch": 0.62, "grad_norm": 6.311613716909245, "learning_rate": 3.298675262295162e-06, "loss": 0.2682, "step": 9894 }, { "epoch": 0.62, "grad_norm": 2.4500980765916665, "learning_rate": 3.2977175939799265e-06, "loss": 0.2909, "step": 9895 }, { "epoch": 0.62, "grad_norm": 1.5356380795765818, "learning_rate": 3.2967599962954134e-06, "loss": 0.2674, "step": 9896 }, { "epoch": 0.62, "grad_norm": 2.255907166982026, "learning_rate": 3.2958024692813515e-06, "loss": 0.2817, "step": 9897 }, { "epoch": 0.62, "grad_norm": 2.8803975011197585, "learning_rate": 3.2948450129774726e-06, "loss": 0.2743, "step": 9898 }, { "epoch": 0.62, "grad_norm": 2.180125193183308, "learning_rate": 3.2938876274235034e-06, "loss": 0.2553, "step": 9899 }, { "epoch": 0.62, "grad_norm": 2.068154661473635, "learning_rate": 3.292930312659167e-06, "loss": 0.2903, "step": 9900 }, { "epoch": 0.62, "grad_norm": 2.4971141147919904, "learning_rate": 3.291973068724184e-06, "loss": 0.2838, "step": 9901 }, { "epoch": 0.62, "grad_norm": 2.245842240400228, "learning_rate": 3.2910158956582726e-06, "loss": 0.2833, "step": 9902 }, { "epoch": 0.62, "grad_norm": 1.7865857003738088, "learning_rate": 3.290058793501147e-06, "loss": 0.2667, "step": 9903 }, { "epoch": 0.62, "grad_norm": 0.6155417737874286, "learning_rate": 3.289101762292517e-06, "loss": 0.4693, "step": 9904 }, { "epoch": 0.62, "grad_norm": 2.5305638923653797, "learning_rate": 3.288144802072097e-06, "loss": 0.2593, "step": 9905 }, { "epoch": 0.62, "grad_norm": 2.105242003805574, "learning_rate": 3.287187912879588e-06, "loss": 0.252, "step": 9906 }, { "epoch": 0.62, "grad_norm": 2.4182474802379645, "learning_rate": 3.286231094754695e-06, "loss": 0.2677, "step": 9907 }, { "epoch": 0.62, "grad_norm": 2.8304821396172453, "learning_rate": 3.2852743477371185e-06, "loss": 0.2703, "step": 9908 }, { "epoch": 0.62, "grad_norm": 2.877794321716428, "learning_rate": 3.284317671866555e-06, "loss": 0.2678, "step": 9909 }, { "epoch": 0.62, "grad_norm": 5.412188533589843, "learning_rate": 3.2833610671826988e-06, "loss": 0.2593, "step": 9910 }, { "epoch": 0.62, "grad_norm": 2.953949652599423, "learning_rate": 3.28240453372524e-06, "loss": 0.2764, "step": 9911 }, { "epoch": 0.62, "grad_norm": 7.846073759409722, "learning_rate": 3.281448071533867e-06, "loss": 0.2697, "step": 9912 }, { "epoch": 0.62, "grad_norm": 1.9134779397970272, "learning_rate": 3.2804916806482683e-06, "loss": 0.2639, "step": 9913 }, { "epoch": 0.62, "grad_norm": 4.747388588514957, "learning_rate": 3.279535361108123e-06, "loss": 0.2822, "step": 9914 }, { "epoch": 0.62, "grad_norm": 3.358002438438269, "learning_rate": 3.278579112953111e-06, "loss": 0.2751, "step": 9915 }, { "epoch": 0.62, "grad_norm": 2.095930422185279, "learning_rate": 3.2776229362229106e-06, "loss": 0.2527, "step": 9916 }, { "epoch": 0.62, "grad_norm": 2.3417385104267594, "learning_rate": 3.2766668309571926e-06, "loss": 0.265, "step": 9917 }, { "epoch": 0.62, "grad_norm": 2.0250674225646965, "learning_rate": 3.2757107971956302e-06, "loss": 0.2903, "step": 9918 }, { "epoch": 0.62, "grad_norm": 0.6154484535404571, "learning_rate": 3.2747548349778867e-06, "loss": 0.5037, "step": 9919 }, { "epoch": 0.62, "grad_norm": 1.5693137529839287, "learning_rate": 3.2737989443436314e-06, "loss": 0.2629, "step": 9920 }, { "epoch": 0.62, "grad_norm": 3.7442982164101064, "learning_rate": 3.2728431253325243e-06, "loss": 0.2834, "step": 9921 }, { "epoch": 0.62, "grad_norm": 2.7455489369445782, "learning_rate": 3.271887377984223e-06, "loss": 0.2588, "step": 9922 }, { "epoch": 0.62, "grad_norm": 1.5076871106230614, "learning_rate": 3.270931702338384e-06, "loss": 0.2717, "step": 9923 }, { "epoch": 0.62, "grad_norm": 2.382204334404688, "learning_rate": 3.269976098434661e-06, "loss": 0.2707, "step": 9924 }, { "epoch": 0.62, "grad_norm": 2.0311926029643566, "learning_rate": 3.2690205663127007e-06, "loss": 0.2585, "step": 9925 }, { "epoch": 0.62, "grad_norm": 1.8506969859923204, "learning_rate": 3.268065106012153e-06, "loss": 0.2643, "step": 9926 }, { "epoch": 0.62, "grad_norm": 1.80756019697676, "learning_rate": 3.2671097175726587e-06, "loss": 0.2786, "step": 9927 }, { "epoch": 0.62, "grad_norm": 1.8991757724178666, "learning_rate": 3.266154401033861e-06, "loss": 0.2704, "step": 9928 }, { "epoch": 0.62, "grad_norm": 3.9288666279158564, "learning_rate": 3.265199156435398e-06, "loss": 0.2869, "step": 9929 }, { "epoch": 0.62, "grad_norm": 32.79802203209757, "learning_rate": 3.2642439838169023e-06, "loss": 0.2579, "step": 9930 }, { "epoch": 0.62, "grad_norm": 6.356750385280537, "learning_rate": 3.2632888832180066e-06, "loss": 0.2794, "step": 9931 }, { "epoch": 0.62, "grad_norm": 2.4159543439827265, "learning_rate": 3.2623338546783407e-06, "loss": 0.2729, "step": 9932 }, { "epoch": 0.62, "grad_norm": 2.611233782833403, "learning_rate": 3.261378898237529e-06, "loss": 0.2532, "step": 9933 }, { "epoch": 0.62, "grad_norm": 2.83625966464755, "learning_rate": 3.2604240139351937e-06, "loss": 0.2778, "step": 9934 }, { "epoch": 0.62, "grad_norm": 2.0249454942762197, "learning_rate": 3.2594692018109577e-06, "loss": 0.2567, "step": 9935 }, { "epoch": 0.62, "grad_norm": 4.095720306438147, "learning_rate": 3.258514461904435e-06, "loss": 0.2827, "step": 9936 }, { "epoch": 0.62, "grad_norm": 1.8356246857067402, "learning_rate": 3.257559794255241e-06, "loss": 0.2582, "step": 9937 }, { "epoch": 0.62, "grad_norm": 15.879396767792844, "learning_rate": 3.2566051989029866e-06, "loss": 0.2895, "step": 9938 }, { "epoch": 0.63, "grad_norm": 3.4436324697446716, "learning_rate": 3.2556506758872784e-06, "loss": 0.2581, "step": 9939 }, { "epoch": 0.63, "grad_norm": 6.428921470844976, "learning_rate": 3.2546962252477234e-06, "loss": 0.2653, "step": 9940 }, { "epoch": 0.63, "grad_norm": 3.023948149755126, "learning_rate": 3.2537418470239208e-06, "loss": 0.2679, "step": 9941 }, { "epoch": 0.63, "grad_norm": 3.2480300983769723, "learning_rate": 3.2527875412554694e-06, "loss": 0.2454, "step": 9942 }, { "epoch": 0.63, "grad_norm": 2.1378039251153815, "learning_rate": 3.2518333079819684e-06, "loss": 0.2695, "step": 9943 }, { "epoch": 0.63, "grad_norm": 1.3900987952765609, "learning_rate": 3.2508791472430073e-06, "loss": 0.2566, "step": 9944 }, { "epoch": 0.63, "grad_norm": 3.7525763418817006, "learning_rate": 3.2499250590781773e-06, "loss": 0.2575, "step": 9945 }, { "epoch": 0.63, "grad_norm": 2.6967761651448225, "learning_rate": 3.248971043527066e-06, "loss": 0.2747, "step": 9946 }, { "epoch": 0.63, "grad_norm": 2.40864027668934, "learning_rate": 3.248017100629256e-06, "loss": 0.2552, "step": 9947 }, { "epoch": 0.63, "grad_norm": 7.492652892048151, "learning_rate": 3.247063230424329e-06, "loss": 0.2662, "step": 9948 }, { "epoch": 0.63, "grad_norm": 38.11552204506903, "learning_rate": 3.2461094329518593e-06, "loss": 0.2578, "step": 9949 }, { "epoch": 0.63, "grad_norm": 1.9645221821642918, "learning_rate": 3.245155708251426e-06, "loss": 0.2744, "step": 9950 }, { "epoch": 0.63, "grad_norm": 2.1398337374814704, "learning_rate": 3.244202056362601e-06, "loss": 0.2595, "step": 9951 }, { "epoch": 0.63, "grad_norm": 2.3540382346625854, "learning_rate": 3.2432484773249495e-06, "loss": 0.272, "step": 9952 }, { "epoch": 0.63, "grad_norm": 2.011525360598053, "learning_rate": 3.2422949711780395e-06, "loss": 0.26, "step": 9953 }, { "epoch": 0.63, "grad_norm": 3.7813564347347532, "learning_rate": 3.2413415379614345e-06, "loss": 0.2591, "step": 9954 }, { "epoch": 0.63, "grad_norm": 7.539751554887144, "learning_rate": 3.2403881777146905e-06, "loss": 0.3004, "step": 9955 }, { "epoch": 0.63, "grad_norm": 5.311087492806082, "learning_rate": 3.2394348904773687e-06, "loss": 0.2835, "step": 9956 }, { "epoch": 0.63, "grad_norm": 2.4307233930329737, "learning_rate": 3.2384816762890182e-06, "loss": 0.2661, "step": 9957 }, { "epoch": 0.63, "grad_norm": 5.040809683042068, "learning_rate": 3.2375285351891918e-06, "loss": 0.2787, "step": 9958 }, { "epoch": 0.63, "grad_norm": 4.0616870068222015, "learning_rate": 3.2365754672174386e-06, "loss": 0.2668, "step": 9959 }, { "epoch": 0.63, "grad_norm": 7.763535396368723, "learning_rate": 3.235622472413301e-06, "loss": 0.2627, "step": 9960 }, { "epoch": 0.63, "grad_norm": 1.6023289537315193, "learning_rate": 3.234669550816321e-06, "loss": 0.2675, "step": 9961 }, { "epoch": 0.63, "grad_norm": 13.374333265303514, "learning_rate": 3.2337167024660366e-06, "loss": 0.2709, "step": 9962 }, { "epoch": 0.63, "grad_norm": 1.3761195103993602, "learning_rate": 3.2327639274019835e-06, "loss": 0.254, "step": 9963 }, { "epoch": 0.63, "grad_norm": 2.732504353626915, "learning_rate": 3.2318112256636923e-06, "loss": 0.2654, "step": 9964 }, { "epoch": 0.63, "grad_norm": 1.6397055592623355, "learning_rate": 3.230858597290697e-06, "loss": 0.2584, "step": 9965 }, { "epoch": 0.63, "grad_norm": 2.623168773235153, "learning_rate": 3.229906042322519e-06, "loss": 0.269, "step": 9966 }, { "epoch": 0.63, "grad_norm": 6.852660925017179, "learning_rate": 3.2289535607986843e-06, "loss": 0.2669, "step": 9967 }, { "epoch": 0.63, "grad_norm": 2.3472769934172164, "learning_rate": 3.2280011527587118e-06, "loss": 0.2578, "step": 9968 }, { "epoch": 0.63, "grad_norm": 2.6888069980344453, "learning_rate": 3.227048818242119e-06, "loss": 0.2539, "step": 9969 }, { "epoch": 0.63, "grad_norm": 1.9437871595464753, "learning_rate": 3.2260965572884202e-06, "loss": 0.2621, "step": 9970 }, { "epoch": 0.63, "grad_norm": 4.803572970593614, "learning_rate": 3.2251443699371252e-06, "loss": 0.2575, "step": 9971 }, { "epoch": 0.63, "grad_norm": 2.299282413598395, "learning_rate": 3.2241922562277416e-06, "loss": 0.3004, "step": 9972 }, { "epoch": 0.63, "grad_norm": 1.9689419717334427, "learning_rate": 3.223240216199778e-06, "loss": 0.2797, "step": 9973 }, { "epoch": 0.63, "grad_norm": 3.7677124015119414, "learning_rate": 3.222288249892732e-06, "loss": 0.2732, "step": 9974 }, { "epoch": 0.63, "grad_norm": 5.337022225180417, "learning_rate": 3.221336357346105e-06, "loss": 0.2664, "step": 9975 }, { "epoch": 0.63, "grad_norm": 2.3705375609607087, "learning_rate": 3.220384538599392e-06, "loss": 0.2635, "step": 9976 }, { "epoch": 0.63, "grad_norm": 7.426477112735168, "learning_rate": 3.2194327936920842e-06, "loss": 0.2733, "step": 9977 }, { "epoch": 0.63, "grad_norm": 2.2674385705041455, "learning_rate": 3.2184811226636746e-06, "loss": 0.2802, "step": 9978 }, { "epoch": 0.63, "grad_norm": 1.6950749532968423, "learning_rate": 3.217529525553645e-06, "loss": 0.2738, "step": 9979 }, { "epoch": 0.63, "grad_norm": 2.2637541576120266, "learning_rate": 3.2165780024014825e-06, "loss": 0.269, "step": 9980 }, { "epoch": 0.63, "grad_norm": 2.958006121460725, "learning_rate": 3.215626553246667e-06, "loss": 0.2787, "step": 9981 }, { "epoch": 0.63, "grad_norm": 2.8480662328736113, "learning_rate": 3.2146751781286743e-06, "loss": 0.2801, "step": 9982 }, { "epoch": 0.63, "grad_norm": 2.2434885960103785, "learning_rate": 3.213723877086979e-06, "loss": 0.2725, "step": 9983 }, { "epoch": 0.63, "grad_norm": 1.5809920577684438, "learning_rate": 3.2127726501610558e-06, "loss": 0.2435, "step": 9984 }, { "epoch": 0.63, "grad_norm": 2.75565235219549, "learning_rate": 3.2118214973903673e-06, "loss": 0.2713, "step": 9985 }, { "epoch": 0.63, "grad_norm": 3.3000391861556224, "learning_rate": 3.2108704188143803e-06, "loss": 0.2737, "step": 9986 }, { "epoch": 0.63, "grad_norm": 1.742276002723779, "learning_rate": 3.209919414472559e-06, "loss": 0.2541, "step": 9987 }, { "epoch": 0.63, "grad_norm": 6.87638450087813, "learning_rate": 3.20896848440436e-06, "loss": 0.2627, "step": 9988 }, { "epoch": 0.63, "grad_norm": 24.26284996646524, "learning_rate": 3.2080176286492415e-06, "loss": 0.2722, "step": 9989 }, { "epoch": 0.63, "grad_norm": 1.5086493109460943, "learning_rate": 3.2070668472466525e-06, "loss": 0.2743, "step": 9990 }, { "epoch": 0.63, "grad_norm": 2.9097411949715766, "learning_rate": 3.2061161402360454e-06, "loss": 0.2787, "step": 9991 }, { "epoch": 0.63, "grad_norm": 1.9428254453100957, "learning_rate": 3.2051655076568666e-06, "loss": 0.2745, "step": 9992 }, { "epoch": 0.63, "grad_norm": 2.1766278325768242, "learning_rate": 3.204214949548558e-06, "loss": 0.2748, "step": 9993 }, { "epoch": 0.63, "grad_norm": 1.5148475629807059, "learning_rate": 3.2032644659505595e-06, "loss": 0.2597, "step": 9994 }, { "epoch": 0.63, "grad_norm": 1.8351187003270963, "learning_rate": 3.2023140569023124e-06, "loss": 0.2713, "step": 9995 }, { "epoch": 0.63, "grad_norm": 2.3119515675998468, "learning_rate": 3.2013637224432463e-06, "loss": 0.2553, "step": 9996 }, { "epoch": 0.63, "grad_norm": 4.5777577907954905, "learning_rate": 3.200413462612796e-06, "loss": 0.2628, "step": 9997 }, { "epoch": 0.63, "grad_norm": 3.4677048711679106, "learning_rate": 3.1994632774503853e-06, "loss": 0.2832, "step": 9998 }, { "epoch": 0.63, "grad_norm": 2.5972451498659193, "learning_rate": 3.198513166995442e-06, "loss": 0.2736, "step": 9999 }, { "epoch": 0.63, "grad_norm": 1.927327376868954, "learning_rate": 3.197563131287389e-06, "loss": 0.2639, "step": 10000 }, { "epoch": 0.63, "grad_norm": 2.18965047883231, "learning_rate": 3.19661317036564e-06, "loss": 0.266, "step": 10001 }, { "epoch": 0.63, "grad_norm": 2.341591965281301, "learning_rate": 3.195663284269615e-06, "loss": 0.2678, "step": 10002 }, { "epoch": 0.63, "grad_norm": 2.1173636223746146, "learning_rate": 3.1947134730387265e-06, "loss": 0.2764, "step": 10003 }, { "epoch": 0.63, "grad_norm": 1.9686828247975843, "learning_rate": 3.1937637367123814e-06, "loss": 0.2741, "step": 10004 }, { "epoch": 0.63, "grad_norm": 4.627160398881354, "learning_rate": 3.192814075329988e-06, "loss": 0.2683, "step": 10005 }, { "epoch": 0.63, "grad_norm": 2.0770602123616824, "learning_rate": 3.191864488930948e-06, "loss": 0.2644, "step": 10006 }, { "epoch": 0.63, "grad_norm": 3.2461568601882127, "learning_rate": 3.190914977554661e-06, "loss": 0.2699, "step": 10007 }, { "epoch": 0.63, "grad_norm": 2.6242753466890067, "learning_rate": 3.1899655412405266e-06, "loss": 0.2572, "step": 10008 }, { "epoch": 0.63, "grad_norm": 2.364714501622136, "learning_rate": 3.1890161800279353e-06, "loss": 0.2779, "step": 10009 }, { "epoch": 0.63, "grad_norm": 1.3796187874313297, "learning_rate": 3.188066893956279e-06, "loss": 0.2623, "step": 10010 }, { "epoch": 0.63, "grad_norm": 1.772680539942646, "learning_rate": 3.1871176830649473e-06, "loss": 0.2629, "step": 10011 }, { "epoch": 0.63, "grad_norm": 1.728648012424328, "learning_rate": 3.1861685473933223e-06, "loss": 0.263, "step": 10012 }, { "epoch": 0.63, "grad_norm": 2.5028407688194734, "learning_rate": 3.1852194869807873e-06, "loss": 0.2677, "step": 10013 }, { "epoch": 0.63, "grad_norm": 1.4799106491738157, "learning_rate": 3.1842705018667173e-06, "loss": 0.2721, "step": 10014 }, { "epoch": 0.63, "grad_norm": 1.7620365634749426, "learning_rate": 3.18332159209049e-06, "loss": 0.273, "step": 10015 }, { "epoch": 0.63, "grad_norm": 1.433121238508442, "learning_rate": 3.1823727576914753e-06, "loss": 0.2537, "step": 10016 }, { "epoch": 0.63, "grad_norm": 1.7569294426756594, "learning_rate": 3.181423998709045e-06, "loss": 0.2812, "step": 10017 }, { "epoch": 0.63, "grad_norm": 7.7976168662829215, "learning_rate": 3.180475315182563e-06, "loss": 0.2777, "step": 10018 }, { "epoch": 0.63, "grad_norm": 2.626302240605091, "learning_rate": 3.1795267071513925e-06, "loss": 0.2528, "step": 10019 }, { "epoch": 0.63, "grad_norm": 2.014450872227873, "learning_rate": 3.178578174654891e-06, "loss": 0.2717, "step": 10020 }, { "epoch": 0.63, "grad_norm": 1.9183255383889546, "learning_rate": 3.1776297177324167e-06, "loss": 0.2754, "step": 10021 }, { "epoch": 0.63, "grad_norm": 1.8944432411742664, "learning_rate": 3.1766813364233234e-06, "loss": 0.2729, "step": 10022 }, { "epoch": 0.63, "grad_norm": 2.5160483787146775, "learning_rate": 3.175733030766959e-06, "loss": 0.2736, "step": 10023 }, { "epoch": 0.63, "grad_norm": 1.8786096699155248, "learning_rate": 3.1747848008026704e-06, "loss": 0.2707, "step": 10024 }, { "epoch": 0.63, "grad_norm": 2.078054043201778, "learning_rate": 3.1738366465698043e-06, "loss": 0.2744, "step": 10025 }, { "epoch": 0.63, "grad_norm": 4.514279063968887, "learning_rate": 3.1728885681076983e-06, "loss": 0.2889, "step": 10026 }, { "epoch": 0.63, "grad_norm": 3.123097377846155, "learning_rate": 3.1719405654556924e-06, "loss": 0.2667, "step": 10027 }, { "epoch": 0.63, "grad_norm": 1.8728657758142397, "learning_rate": 3.1709926386531174e-06, "loss": 0.2802, "step": 10028 }, { "epoch": 0.63, "grad_norm": 2.9623826940788787, "learning_rate": 3.170044787739307e-06, "loss": 0.2829, "step": 10029 }, { "epoch": 0.63, "grad_norm": 1.9078598998142404, "learning_rate": 3.1690970127535904e-06, "loss": 0.2637, "step": 10030 }, { "epoch": 0.63, "grad_norm": 1.6845025177566337, "learning_rate": 3.168149313735288e-06, "loss": 0.2784, "step": 10031 }, { "epoch": 0.63, "grad_norm": 3.983978912469738, "learning_rate": 3.1672016907237256e-06, "loss": 0.2751, "step": 10032 }, { "epoch": 0.63, "grad_norm": 2.2511549794876786, "learning_rate": 3.166254143758222e-06, "loss": 0.2652, "step": 10033 }, { "epoch": 0.63, "grad_norm": 2.1075240554115413, "learning_rate": 3.165306672878089e-06, "loss": 0.2564, "step": 10034 }, { "epoch": 0.63, "grad_norm": 1.7632322212063483, "learning_rate": 3.1643592781226428e-06, "loss": 0.2728, "step": 10035 }, { "epoch": 0.63, "grad_norm": 7.46186032204869, "learning_rate": 3.163411959531189e-06, "loss": 0.2616, "step": 10036 }, { "epoch": 0.63, "grad_norm": 7.881101303832084, "learning_rate": 3.1624647171430355e-06, "loss": 0.2651, "step": 10037 }, { "epoch": 0.63, "grad_norm": 3.2289831230123385, "learning_rate": 3.1615175509974855e-06, "loss": 0.2933, "step": 10038 }, { "epoch": 0.63, "grad_norm": 1.8019027339394116, "learning_rate": 3.160570461133836e-06, "loss": 0.29, "step": 10039 }, { "epoch": 0.63, "grad_norm": 5.696416257606188, "learning_rate": 3.1596234475913855e-06, "loss": 0.2814, "step": 10040 }, { "epoch": 0.63, "grad_norm": 0.6196570926025736, "learning_rate": 3.1586765104094295e-06, "loss": 0.4681, "step": 10041 }, { "epoch": 0.63, "grad_norm": 1.579448029063411, "learning_rate": 3.1577296496272535e-06, "loss": 0.2805, "step": 10042 }, { "epoch": 0.63, "grad_norm": 1.3746432894832377, "learning_rate": 3.156782865284148e-06, "loss": 0.2717, "step": 10043 }, { "epoch": 0.63, "grad_norm": 1.8715667524654522, "learning_rate": 3.155836157419394e-06, "loss": 0.2784, "step": 10044 }, { "epoch": 0.63, "grad_norm": 3.518868977270992, "learning_rate": 3.1548895260722743e-06, "loss": 0.2655, "step": 10045 }, { "epoch": 0.63, "grad_norm": 2.2942224365251014, "learning_rate": 3.1539429712820634e-06, "loss": 0.2673, "step": 10046 }, { "epoch": 0.63, "grad_norm": 1.6836469079689118, "learning_rate": 3.15299649308804e-06, "loss": 0.2619, "step": 10047 }, { "epoch": 0.63, "grad_norm": 2.3316655067254635, "learning_rate": 3.152050091529472e-06, "loss": 0.2682, "step": 10048 }, { "epoch": 0.63, "grad_norm": 6.145213553660683, "learning_rate": 3.151103766645629e-06, "loss": 0.2715, "step": 10049 }, { "epoch": 0.63, "grad_norm": 2.251853760793825, "learning_rate": 3.150157518475774e-06, "loss": 0.2514, "step": 10050 }, { "epoch": 0.63, "grad_norm": 2.5113708883773502, "learning_rate": 3.149211347059169e-06, "loss": 0.282, "step": 10051 }, { "epoch": 0.63, "grad_norm": 1.7868447197110866, "learning_rate": 3.1482652524350727e-06, "loss": 0.2689, "step": 10052 }, { "epoch": 0.63, "grad_norm": 4.510469220806779, "learning_rate": 3.1473192346427396e-06, "loss": 0.2811, "step": 10053 }, { "epoch": 0.63, "grad_norm": 2.4004931673113967, "learning_rate": 3.146373293721422e-06, "loss": 0.2644, "step": 10054 }, { "epoch": 0.63, "grad_norm": 1.8329117459328055, "learning_rate": 3.14542742971037e-06, "loss": 0.2899, "step": 10055 }, { "epoch": 0.63, "grad_norm": 2.7556121061371597, "learning_rate": 3.1444816426488275e-06, "loss": 0.2678, "step": 10056 }, { "epoch": 0.63, "grad_norm": 1.8984575322681536, "learning_rate": 3.143535932576038e-06, "loss": 0.2619, "step": 10057 }, { "epoch": 0.63, "grad_norm": 2.4185456732644264, "learning_rate": 3.1425902995312394e-06, "loss": 0.2663, "step": 10058 }, { "epoch": 0.63, "grad_norm": 2.4057435765175152, "learning_rate": 3.141644743553668e-06, "loss": 0.2918, "step": 10059 }, { "epoch": 0.63, "grad_norm": 1.9093835713711051, "learning_rate": 3.1406992646825583e-06, "loss": 0.2776, "step": 10060 }, { "epoch": 0.63, "grad_norm": 1.8243505802516449, "learning_rate": 3.1397538629571368e-06, "loss": 0.2678, "step": 10061 }, { "epoch": 0.63, "grad_norm": 2.95825607455039, "learning_rate": 3.1388085384166324e-06, "loss": 0.2818, "step": 10062 }, { "epoch": 0.63, "grad_norm": 3.369785162060367, "learning_rate": 3.1378632911002688e-06, "loss": 0.2591, "step": 10063 }, { "epoch": 0.63, "grad_norm": 1.8408484348366791, "learning_rate": 3.136918121047264e-06, "loss": 0.2596, "step": 10064 }, { "epoch": 0.63, "grad_norm": 3.530616936246068, "learning_rate": 3.135973028296836e-06, "loss": 0.2759, "step": 10065 }, { "epoch": 0.63, "grad_norm": 1.8773389145388941, "learning_rate": 3.135028012888197e-06, "loss": 0.2538, "step": 10066 }, { "epoch": 0.63, "grad_norm": 3.29984652337607, "learning_rate": 3.134083074860559e-06, "loss": 0.2716, "step": 10067 }, { "epoch": 0.63, "grad_norm": 3.1437430964536395, "learning_rate": 3.1331382142531265e-06, "loss": 0.2825, "step": 10068 }, { "epoch": 0.63, "grad_norm": 2.2273024299155457, "learning_rate": 3.132193431105108e-06, "loss": 0.274, "step": 10069 }, { "epoch": 0.63, "grad_norm": 1.4882246604550748, "learning_rate": 3.1312487254557006e-06, "loss": 0.2665, "step": 10070 }, { "epoch": 0.63, "grad_norm": 1.3844836479646911, "learning_rate": 3.1303040973441036e-06, "loss": 0.2646, "step": 10071 }, { "epoch": 0.63, "grad_norm": 1.9536467788912197, "learning_rate": 3.1293595468095094e-06, "loss": 0.2821, "step": 10072 }, { "epoch": 0.63, "grad_norm": 1.6201119669717978, "learning_rate": 3.1284150738911125e-06, "loss": 0.2499, "step": 10073 }, { "epoch": 0.63, "grad_norm": 2.5418702408945038, "learning_rate": 3.127470678628096e-06, "loss": 0.2761, "step": 10074 }, { "epoch": 0.63, "grad_norm": 3.3283382132612167, "learning_rate": 3.1265263610596475e-06, "loss": 0.2641, "step": 10075 }, { "epoch": 0.63, "grad_norm": 1.4433540304588321, "learning_rate": 3.125582121224947e-06, "loss": 0.263, "step": 10076 }, { "epoch": 0.63, "grad_norm": 20.278285307033244, "learning_rate": 3.124637959163176e-06, "loss": 0.2679, "step": 10077 }, { "epoch": 0.63, "grad_norm": 4.388136569112997, "learning_rate": 3.123693874913506e-06, "loss": 0.2773, "step": 10078 }, { "epoch": 0.63, "grad_norm": 1.8994297306023664, "learning_rate": 3.12274986851511e-06, "loss": 0.2762, "step": 10079 }, { "epoch": 0.63, "grad_norm": 2.3895787241115194, "learning_rate": 3.1218059400071564e-06, "loss": 0.2609, "step": 10080 }, { "epoch": 0.63, "grad_norm": 2.6774317358193516, "learning_rate": 3.1208620894288105e-06, "loss": 0.2852, "step": 10081 }, { "epoch": 0.63, "grad_norm": 1.7341905061504503, "learning_rate": 3.1199183168192337e-06, "loss": 0.2881, "step": 10082 }, { "epoch": 0.63, "grad_norm": 2.3286729637431853, "learning_rate": 3.1189746222175843e-06, "loss": 0.2579, "step": 10083 }, { "epoch": 0.63, "grad_norm": 2.110021476189369, "learning_rate": 3.11803100566302e-06, "loss": 0.2665, "step": 10084 }, { "epoch": 0.63, "grad_norm": 2.706249935834069, "learning_rate": 3.117087467194693e-06, "loss": 0.2639, "step": 10085 }, { "epoch": 0.63, "grad_norm": 1.9493757887438823, "learning_rate": 3.11614400685175e-06, "loss": 0.2732, "step": 10086 }, { "epoch": 0.63, "grad_norm": 2.2076273682628904, "learning_rate": 3.1152006246733395e-06, "loss": 0.2708, "step": 10087 }, { "epoch": 0.63, "grad_norm": 2.313402505219228, "learning_rate": 3.114257320698602e-06, "loss": 0.2534, "step": 10088 }, { "epoch": 0.63, "grad_norm": 3.113284925556673, "learning_rate": 3.1133140949666785e-06, "loss": 0.2744, "step": 10089 }, { "epoch": 0.63, "grad_norm": 2.1397030668441617, "learning_rate": 3.1123709475167043e-06, "loss": 0.2732, "step": 10090 }, { "epoch": 0.63, "grad_norm": 1.6414653821745988, "learning_rate": 3.11142787838781e-06, "loss": 0.264, "step": 10091 }, { "epoch": 0.63, "grad_norm": 4.693477360207317, "learning_rate": 3.110484887619129e-06, "loss": 0.2702, "step": 10092 }, { "epoch": 0.63, "grad_norm": 1.6470543237118083, "learning_rate": 3.109541975249787e-06, "loss": 0.2557, "step": 10093 }, { "epoch": 0.63, "grad_norm": 2.5648910258663804, "learning_rate": 3.1085991413189053e-06, "loss": 0.2962, "step": 10094 }, { "epoch": 0.63, "grad_norm": 2.0426294366035807, "learning_rate": 3.1076563858656062e-06, "loss": 0.2506, "step": 10095 }, { "epoch": 0.63, "grad_norm": 2.4564743363958335, "learning_rate": 3.1067137089290033e-06, "loss": 0.2732, "step": 10096 }, { "epoch": 0.63, "grad_norm": 2.19124921830546, "learning_rate": 3.105771110548212e-06, "loss": 0.2663, "step": 10097 }, { "epoch": 0.64, "grad_norm": 13.372263704027683, "learning_rate": 3.10482859076234e-06, "loss": 0.278, "step": 10098 }, { "epoch": 0.64, "grad_norm": 3.0814643219929176, "learning_rate": 3.1038861496104987e-06, "loss": 0.2662, "step": 10099 }, { "epoch": 0.64, "grad_norm": 2.246160079019944, "learning_rate": 3.102943787131788e-06, "loss": 0.2625, "step": 10100 }, { "epoch": 0.64, "grad_norm": 3.450483413831347, "learning_rate": 3.1020015033653094e-06, "loss": 0.2438, "step": 10101 }, { "epoch": 0.64, "grad_norm": 3.261575768600086, "learning_rate": 3.10105929835016e-06, "loss": 0.2801, "step": 10102 }, { "epoch": 0.64, "grad_norm": 1.8687858229308059, "learning_rate": 3.100117172125433e-06, "loss": 0.2806, "step": 10103 }, { "epoch": 0.64, "grad_norm": 1.880372771729046, "learning_rate": 3.099175124730218e-06, "loss": 0.2568, "step": 10104 }, { "epoch": 0.64, "grad_norm": 18.921810094277788, "learning_rate": 3.0982331562036037e-06, "loss": 0.3059, "step": 10105 }, { "epoch": 0.64, "grad_norm": 17.71574008247158, "learning_rate": 3.097291266584673e-06, "loss": 0.2753, "step": 10106 }, { "epoch": 0.64, "grad_norm": 2.5085525891596148, "learning_rate": 3.0963494559125084e-06, "loss": 0.2687, "step": 10107 }, { "epoch": 0.64, "grad_norm": 3.174098534749825, "learning_rate": 3.095407724226186e-06, "loss": 0.2516, "step": 10108 }, { "epoch": 0.64, "grad_norm": 1.9535102854308344, "learning_rate": 3.0944660715647813e-06, "loss": 0.2641, "step": 10109 }, { "epoch": 0.64, "grad_norm": 4.172982818492106, "learning_rate": 3.093524497967362e-06, "loss": 0.3047, "step": 10110 }, { "epoch": 0.64, "grad_norm": 2.7053760550966017, "learning_rate": 3.0925830034729986e-06, "loss": 0.2637, "step": 10111 }, { "epoch": 0.64, "grad_norm": 2.1150917499177964, "learning_rate": 3.091641588120754e-06, "loss": 0.2561, "step": 10112 }, { "epoch": 0.64, "grad_norm": 2.8556553949819037, "learning_rate": 3.0907002519496875e-06, "loss": 0.2812, "step": 10113 }, { "epoch": 0.64, "grad_norm": 2.596939683625185, "learning_rate": 3.089758994998861e-06, "loss": 0.2707, "step": 10114 }, { "epoch": 0.64, "grad_norm": 2.900356263255975, "learning_rate": 3.088817817307327e-06, "loss": 0.2668, "step": 10115 }, { "epoch": 0.64, "grad_norm": 1.7163415576738235, "learning_rate": 3.0878767189141355e-06, "loss": 0.2665, "step": 10116 }, { "epoch": 0.64, "grad_norm": 2.344543836072783, "learning_rate": 3.0869356998583366e-06, "loss": 0.2718, "step": 10117 }, { "epoch": 0.64, "grad_norm": 2.340726681295385, "learning_rate": 3.0859947601789724e-06, "loss": 0.2615, "step": 10118 }, { "epoch": 0.64, "grad_norm": 2.515503426889501, "learning_rate": 3.0850538999150866e-06, "loss": 0.2706, "step": 10119 }, { "epoch": 0.64, "grad_norm": 1.876976324052386, "learning_rate": 3.084113119105714e-06, "loss": 0.2899, "step": 10120 }, { "epoch": 0.64, "grad_norm": 2.083568949066206, "learning_rate": 3.08317241778989e-06, "loss": 0.2715, "step": 10121 }, { "epoch": 0.64, "grad_norm": 2.544694145277682, "learning_rate": 3.0822317960066493e-06, "loss": 0.2864, "step": 10122 }, { "epoch": 0.64, "grad_norm": 1.934966786984859, "learning_rate": 3.081291253795018e-06, "loss": 0.2742, "step": 10123 }, { "epoch": 0.64, "grad_norm": 2.1233943000294593, "learning_rate": 3.0803507911940193e-06, "loss": 0.3055, "step": 10124 }, { "epoch": 0.64, "grad_norm": 1.6936264444753946, "learning_rate": 3.0794104082426772e-06, "loss": 0.2738, "step": 10125 }, { "epoch": 0.64, "grad_norm": 3.3895582488985383, "learning_rate": 3.078470104980008e-06, "loss": 0.2519, "step": 10126 }, { "epoch": 0.64, "grad_norm": 1.712330595434032, "learning_rate": 3.0775298814450273e-06, "loss": 0.2664, "step": 10127 }, { "epoch": 0.64, "grad_norm": 7.127102241181531, "learning_rate": 3.076589737676744e-06, "loss": 0.2912, "step": 10128 }, { "epoch": 0.64, "grad_norm": 2.7741587075314817, "learning_rate": 3.0756496737141715e-06, "loss": 0.2667, "step": 10129 }, { "epoch": 0.64, "grad_norm": 4.889646570103967, "learning_rate": 3.074709689596312e-06, "loss": 0.2549, "step": 10130 }, { "epoch": 0.64, "grad_norm": 1.6729520432348077, "learning_rate": 3.0737697853621672e-06, "loss": 0.2533, "step": 10131 }, { "epoch": 0.64, "grad_norm": 1.5657536258905018, "learning_rate": 3.072829961050735e-06, "loss": 0.292, "step": 10132 }, { "epoch": 0.64, "grad_norm": 2.539708974291937, "learning_rate": 3.071890216701012e-06, "loss": 0.263, "step": 10133 }, { "epoch": 0.64, "grad_norm": 1.88300859835801, "learning_rate": 3.0709505523519868e-06, "loss": 0.287, "step": 10134 }, { "epoch": 0.64, "grad_norm": 2.5642082145514746, "learning_rate": 3.07001096804265e-06, "loss": 0.2758, "step": 10135 }, { "epoch": 0.64, "grad_norm": 2.100345082396697, "learning_rate": 3.0690714638119843e-06, "loss": 0.2756, "step": 10136 }, { "epoch": 0.64, "grad_norm": 3.8571735285393314, "learning_rate": 3.068132039698977e-06, "loss": 0.2894, "step": 10137 }, { "epoch": 0.64, "grad_norm": 1.8375961100095908, "learning_rate": 3.0671926957426e-06, "loss": 0.2916, "step": 10138 }, { "epoch": 0.64, "grad_norm": 2.91512447998109, "learning_rate": 3.066253431981834e-06, "loss": 0.2778, "step": 10139 }, { "epoch": 0.64, "grad_norm": 1.965821722495347, "learning_rate": 3.0653142484556454e-06, "loss": 0.2604, "step": 10140 }, { "epoch": 0.64, "grad_norm": 2.48749806441734, "learning_rate": 3.0643751452030065e-06, "loss": 0.2872, "step": 10141 }, { "epoch": 0.64, "grad_norm": 1.7852139975220183, "learning_rate": 3.0634361222628794e-06, "loss": 0.2716, "step": 10142 }, { "epoch": 0.64, "grad_norm": 16.413355486247813, "learning_rate": 3.0624971796742263e-06, "loss": 0.2601, "step": 10143 }, { "epoch": 0.64, "grad_norm": 1.7839414408876089, "learning_rate": 3.0615583174760083e-06, "loss": 0.2815, "step": 10144 }, { "epoch": 0.64, "grad_norm": 1.9117422090997844, "learning_rate": 3.0606195357071795e-06, "loss": 0.2891, "step": 10145 }, { "epoch": 0.64, "grad_norm": 2.6819593658141683, "learning_rate": 3.0596808344066896e-06, "loss": 0.265, "step": 10146 }, { "epoch": 0.64, "grad_norm": 3.459310506845838, "learning_rate": 3.0587422136134896e-06, "loss": 0.269, "step": 10147 }, { "epoch": 0.64, "grad_norm": 1.7168692223380229, "learning_rate": 3.0578036733665224e-06, "loss": 0.2603, "step": 10148 }, { "epoch": 0.64, "grad_norm": 29.877550580355468, "learning_rate": 3.0568652137047312e-06, "loss": 0.2778, "step": 10149 }, { "epoch": 0.64, "grad_norm": 9.178501763688367, "learning_rate": 3.0559268346670507e-06, "loss": 0.2973, "step": 10150 }, { "epoch": 0.64, "grad_norm": 1.7895490436953738, "learning_rate": 3.0549885362924215e-06, "loss": 0.2616, "step": 10151 }, { "epoch": 0.64, "grad_norm": 5.7808985512461115, "learning_rate": 3.0540503186197724e-06, "loss": 0.2733, "step": 10152 }, { "epoch": 0.64, "grad_norm": 2.800684782977907, "learning_rate": 3.0531121816880327e-06, "loss": 0.3003, "step": 10153 }, { "epoch": 0.64, "grad_norm": 0.6093911880438868, "learning_rate": 3.052174125536126e-06, "loss": 0.492, "step": 10154 }, { "epoch": 0.64, "grad_norm": 5.932611203987603, "learning_rate": 3.0512361502029747e-06, "loss": 0.2607, "step": 10155 }, { "epoch": 0.64, "grad_norm": 2.5259995957077686, "learning_rate": 3.0502982557274962e-06, "loss": 0.2785, "step": 10156 }, { "epoch": 0.64, "grad_norm": 5.030242197573419, "learning_rate": 3.049360442148608e-06, "loss": 0.2988, "step": 10157 }, { "epoch": 0.64, "grad_norm": 0.6263975983204364, "learning_rate": 3.0484227095052154e-06, "loss": 0.5001, "step": 10158 }, { "epoch": 0.64, "grad_norm": 1.7789997055425444, "learning_rate": 3.047485057836235e-06, "loss": 0.2448, "step": 10159 }, { "epoch": 0.64, "grad_norm": 2.440988523108026, "learning_rate": 3.046547487180566e-06, "loss": 0.2703, "step": 10160 }, { "epoch": 0.64, "grad_norm": 2.2293867136504932, "learning_rate": 3.0456099975771126e-06, "loss": 0.2588, "step": 10161 }, { "epoch": 0.64, "grad_norm": 2.1273934230670917, "learning_rate": 3.044672589064771e-06, "loss": 0.2485, "step": 10162 }, { "epoch": 0.64, "grad_norm": 4.213251687312522, "learning_rate": 3.043735261682438e-06, "loss": 0.2592, "step": 10163 }, { "epoch": 0.64, "grad_norm": 10.799080709975172, "learning_rate": 3.0427980154690017e-06, "loss": 0.277, "step": 10164 }, { "epoch": 0.64, "grad_norm": 1.932450694159075, "learning_rate": 3.041860850463352e-06, "loss": 0.2712, "step": 10165 }, { "epoch": 0.64, "grad_norm": 3.63425435114625, "learning_rate": 3.0409237667043744e-06, "loss": 0.2686, "step": 10166 }, { "epoch": 0.64, "grad_norm": 2.872411582669073, "learning_rate": 3.039986764230951e-06, "loss": 0.3041, "step": 10167 }, { "epoch": 0.64, "grad_norm": 2.0505403463626175, "learning_rate": 3.0390498430819565e-06, "loss": 0.2716, "step": 10168 }, { "epoch": 0.64, "grad_norm": 4.092470412115965, "learning_rate": 3.038113003296268e-06, "loss": 0.2618, "step": 10169 }, { "epoch": 0.64, "grad_norm": 2.8945188839773484, "learning_rate": 3.037176244912755e-06, "loss": 0.2612, "step": 10170 }, { "epoch": 0.64, "grad_norm": 4.428301612150675, "learning_rate": 3.036239567970287e-06, "loss": 0.2788, "step": 10171 }, { "epoch": 0.64, "grad_norm": 5.3656500157664695, "learning_rate": 3.035302972507726e-06, "loss": 0.2892, "step": 10172 }, { "epoch": 0.64, "grad_norm": 3.3679817876270866, "learning_rate": 3.034366458563933e-06, "loss": 0.2709, "step": 10173 }, { "epoch": 0.64, "grad_norm": 4.598483411415566, "learning_rate": 3.0334300261777693e-06, "loss": 0.2705, "step": 10174 }, { "epoch": 0.64, "grad_norm": 3.075141900099125, "learning_rate": 3.032493675388087e-06, "loss": 0.287, "step": 10175 }, { "epoch": 0.64, "grad_norm": 3.685290551967383, "learning_rate": 3.031557406233736e-06, "loss": 0.286, "step": 10176 }, { "epoch": 0.64, "grad_norm": 2.6572913861913072, "learning_rate": 3.0306212187535653e-06, "loss": 0.2689, "step": 10177 }, { "epoch": 0.64, "grad_norm": 5.208896982634066, "learning_rate": 3.029685112986417e-06, "loss": 0.2475, "step": 10178 }, { "epoch": 0.64, "grad_norm": 2.6373364895627884, "learning_rate": 3.028749088971135e-06, "loss": 0.2988, "step": 10179 }, { "epoch": 0.64, "grad_norm": 5.907750624306396, "learning_rate": 3.027813146746551e-06, "loss": 0.2695, "step": 10180 }, { "epoch": 0.64, "grad_norm": 1.7924498823603048, "learning_rate": 3.026877286351505e-06, "loss": 0.2633, "step": 10181 }, { "epoch": 0.64, "grad_norm": 4.356377903405441, "learning_rate": 3.0259415078248246e-06, "loss": 0.2504, "step": 10182 }, { "epoch": 0.64, "grad_norm": 3.2792728631645054, "learning_rate": 3.025005811205339e-06, "loss": 0.2803, "step": 10183 }, { "epoch": 0.64, "grad_norm": 5.673231353724311, "learning_rate": 3.024070196531869e-06, "loss": 0.2573, "step": 10184 }, { "epoch": 0.64, "grad_norm": 1.8496966766462282, "learning_rate": 3.023134663843236e-06, "loss": 0.2724, "step": 10185 }, { "epoch": 0.64, "grad_norm": 2.0429540934790427, "learning_rate": 3.0221992131782573e-06, "loss": 0.292, "step": 10186 }, { "epoch": 0.64, "grad_norm": 1.5778665551464879, "learning_rate": 3.021263844575747e-06, "loss": 0.2662, "step": 10187 }, { "epoch": 0.64, "grad_norm": 1.54086520150844, "learning_rate": 3.020328558074511e-06, "loss": 0.2547, "step": 10188 }, { "epoch": 0.64, "grad_norm": 1.527184549260713, "learning_rate": 3.0193933537133624e-06, "loss": 0.2532, "step": 10189 }, { "epoch": 0.64, "grad_norm": 4.666722457301461, "learning_rate": 3.0184582315311013e-06, "loss": 0.2761, "step": 10190 }, { "epoch": 0.64, "grad_norm": 1.7200665926247563, "learning_rate": 3.017523191566528e-06, "loss": 0.2668, "step": 10191 }, { "epoch": 0.64, "grad_norm": 1.727789154045117, "learning_rate": 3.0165882338584383e-06, "loss": 0.2527, "step": 10192 }, { "epoch": 0.64, "grad_norm": 3.2410172379505586, "learning_rate": 3.0156533584456268e-06, "loss": 0.2678, "step": 10193 }, { "epoch": 0.64, "grad_norm": 2.33067398381951, "learning_rate": 3.0147185653668805e-06, "loss": 0.2725, "step": 10194 }, { "epoch": 0.64, "grad_norm": 2.12044012495356, "learning_rate": 3.0137838546609867e-06, "loss": 0.2834, "step": 10195 }, { "epoch": 0.64, "grad_norm": 9.15527127240461, "learning_rate": 3.0128492263667287e-06, "loss": 0.2804, "step": 10196 }, { "epoch": 0.64, "grad_norm": 2.012823032618822, "learning_rate": 3.011914680522888e-06, "loss": 0.2732, "step": 10197 }, { "epoch": 0.64, "grad_norm": 23.83203618924269, "learning_rate": 3.0109802171682366e-06, "loss": 0.2702, "step": 10198 }, { "epoch": 0.64, "grad_norm": 2.096726270747896, "learning_rate": 3.0100458363415506e-06, "loss": 0.2744, "step": 10199 }, { "epoch": 0.64, "grad_norm": 2.472775888169146, "learning_rate": 3.0091115380815962e-06, "loss": 0.277, "step": 10200 }, { "epoch": 0.64, "grad_norm": 2.624312152490665, "learning_rate": 3.0081773224271417e-06, "loss": 0.2604, "step": 10201 }, { "epoch": 0.64, "grad_norm": 3.142779529305478, "learning_rate": 3.007243189416946e-06, "loss": 0.2689, "step": 10202 }, { "epoch": 0.64, "grad_norm": 3.450999900769122, "learning_rate": 3.006309139089769e-06, "loss": 0.2885, "step": 10203 }, { "epoch": 0.64, "grad_norm": 2.0392459597264607, "learning_rate": 3.005375171484369e-06, "loss": 0.2577, "step": 10204 }, { "epoch": 0.64, "grad_norm": 1.6237970373006214, "learning_rate": 3.0044412866394964e-06, "loss": 0.2672, "step": 10205 }, { "epoch": 0.64, "grad_norm": 2.0768314942856674, "learning_rate": 3.003507484593898e-06, "loss": 0.2729, "step": 10206 }, { "epoch": 0.64, "grad_norm": 4.674237992765155, "learning_rate": 3.002573765386322e-06, "loss": 0.268, "step": 10207 }, { "epoch": 0.64, "grad_norm": 3.139310557894765, "learning_rate": 3.0016401290555065e-06, "loss": 0.261, "step": 10208 }, { "epoch": 0.64, "grad_norm": 2.8527313217119192, "learning_rate": 3.000706575640193e-06, "loss": 0.2751, "step": 10209 }, { "epoch": 0.64, "grad_norm": 2.017620907037851, "learning_rate": 2.9997731051791113e-06, "loss": 0.2442, "step": 10210 }, { "epoch": 0.64, "grad_norm": 2.694812998113918, "learning_rate": 2.9988397177109994e-06, "loss": 0.2822, "step": 10211 }, { "epoch": 0.64, "grad_norm": 2.196133921080475, "learning_rate": 2.997906413274582e-06, "loss": 0.2685, "step": 10212 }, { "epoch": 0.64, "grad_norm": 2.6917330466268106, "learning_rate": 2.9969731919085844e-06, "loss": 0.2771, "step": 10213 }, { "epoch": 0.64, "grad_norm": 1.8836634430406083, "learning_rate": 2.9960400536517246e-06, "loss": 0.2705, "step": 10214 }, { "epoch": 0.64, "grad_norm": 5.13448276858894, "learning_rate": 2.9951069985427246e-06, "loss": 0.2855, "step": 10215 }, { "epoch": 0.64, "grad_norm": 2.815158030942339, "learning_rate": 2.9941740266202946e-06, "loss": 0.2834, "step": 10216 }, { "epoch": 0.64, "grad_norm": 0.6404612673691276, "learning_rate": 2.9932411379231486e-06, "loss": 0.4989, "step": 10217 }, { "epoch": 0.64, "grad_norm": 1.9222654985393097, "learning_rate": 2.9923083324899894e-06, "loss": 0.2677, "step": 10218 }, { "epoch": 0.64, "grad_norm": 3.2963999108599005, "learning_rate": 2.9913756103595258e-06, "loss": 0.2842, "step": 10219 }, { "epoch": 0.64, "grad_norm": 2.0794310772310274, "learning_rate": 2.9904429715704554e-06, "loss": 0.2727, "step": 10220 }, { "epoch": 0.64, "grad_norm": 2.543411504214379, "learning_rate": 2.989510416161476e-06, "loss": 0.28, "step": 10221 }, { "epoch": 0.64, "grad_norm": 1.8187753782238385, "learning_rate": 2.988577944171279e-06, "loss": 0.2506, "step": 10222 }, { "epoch": 0.64, "grad_norm": 4.950717599692712, "learning_rate": 2.9876455556385576e-06, "loss": 0.2727, "step": 10223 }, { "epoch": 0.64, "grad_norm": 2.298750541110782, "learning_rate": 2.9867132506019958e-06, "loss": 0.274, "step": 10224 }, { "epoch": 0.64, "grad_norm": 2.0069221080400816, "learning_rate": 2.9857810291002755e-06, "loss": 0.2578, "step": 10225 }, { "epoch": 0.64, "grad_norm": 2.8974810685327945, "learning_rate": 2.984848891172079e-06, "loss": 0.2718, "step": 10226 }, { "epoch": 0.64, "grad_norm": 2.0549144684644705, "learning_rate": 2.9839168368560827e-06, "loss": 0.2783, "step": 10227 }, { "epoch": 0.64, "grad_norm": 4.234453526518632, "learning_rate": 2.982984866190957e-06, "loss": 0.2693, "step": 10228 }, { "epoch": 0.64, "grad_norm": 1.7125253929904005, "learning_rate": 2.9820529792153717e-06, "loss": 0.278, "step": 10229 }, { "epoch": 0.64, "grad_norm": 1.8623729075608615, "learning_rate": 2.9811211759679926e-06, "loss": 0.2622, "step": 10230 }, { "epoch": 0.64, "grad_norm": 2.154812623973168, "learning_rate": 2.9801894564874824e-06, "loss": 0.2763, "step": 10231 }, { "epoch": 0.64, "grad_norm": 1.864391409126263, "learning_rate": 2.9792578208124976e-06, "loss": 0.2888, "step": 10232 }, { "epoch": 0.64, "grad_norm": 1.5593740056610697, "learning_rate": 2.9783262689816963e-06, "loss": 0.2681, "step": 10233 }, { "epoch": 0.64, "grad_norm": 2.492521212032518, "learning_rate": 2.977394801033728e-06, "loss": 0.2676, "step": 10234 }, { "epoch": 0.64, "grad_norm": 0.6088182633682745, "learning_rate": 2.976463417007244e-06, "loss": 0.4982, "step": 10235 }, { "epoch": 0.64, "grad_norm": 3.6838069910757487, "learning_rate": 2.975532116940885e-06, "loss": 0.2583, "step": 10236 }, { "epoch": 0.64, "grad_norm": 4.151719183837695, "learning_rate": 2.974600900873296e-06, "loss": 0.2623, "step": 10237 }, { "epoch": 0.64, "grad_norm": 2.7806131137092898, "learning_rate": 2.9736697688431116e-06, "loss": 0.2849, "step": 10238 }, { "epoch": 0.64, "grad_norm": 2.0241893268833118, "learning_rate": 2.972738720888969e-06, "loss": 0.2668, "step": 10239 }, { "epoch": 0.64, "grad_norm": 3.57971094866745, "learning_rate": 2.971807757049496e-06, "loss": 0.2606, "step": 10240 }, { "epoch": 0.64, "grad_norm": 4.060102433166916, "learning_rate": 2.970876877363322e-06, "loss": 0.2874, "step": 10241 }, { "epoch": 0.64, "grad_norm": 2.50574208213328, "learning_rate": 2.9699460818690714e-06, "loss": 0.2685, "step": 10242 }, { "epoch": 0.64, "grad_norm": 16.948747642390405, "learning_rate": 2.9690153706053638e-06, "loss": 0.2691, "step": 10243 }, { "epoch": 0.64, "grad_norm": 1.7094953346386037, "learning_rate": 2.968084743610815e-06, "loss": 0.274, "step": 10244 }, { "epoch": 0.64, "grad_norm": 3.663651213945327, "learning_rate": 2.9671542009240406e-06, "loss": 0.2665, "step": 10245 }, { "epoch": 0.64, "grad_norm": 4.41579868964434, "learning_rate": 2.966223742583648e-06, "loss": 0.282, "step": 10246 }, { "epoch": 0.64, "grad_norm": 1.6469141833717196, "learning_rate": 2.965293368628244e-06, "loss": 0.2562, "step": 10247 }, { "epoch": 0.64, "grad_norm": 1.5138190901598279, "learning_rate": 2.964363079096434e-06, "loss": 0.2613, "step": 10248 }, { "epoch": 0.64, "grad_norm": 2.275875266250925, "learning_rate": 2.963432874026815e-06, "loss": 0.2759, "step": 10249 }, { "epoch": 0.64, "grad_norm": 2.69531648687304, "learning_rate": 2.962502753457982e-06, "loss": 0.2606, "step": 10250 }, { "epoch": 0.64, "grad_norm": 1.7325910610110424, "learning_rate": 2.9615727174285307e-06, "loss": 0.3142, "step": 10251 }, { "epoch": 0.64, "grad_norm": 0.6427437877053559, "learning_rate": 2.960642765977047e-06, "loss": 0.4688, "step": 10252 }, { "epoch": 0.64, "grad_norm": 3.863669200416984, "learning_rate": 2.9597128991421187e-06, "loss": 0.263, "step": 10253 }, { "epoch": 0.64, "grad_norm": 5.272733400023962, "learning_rate": 2.9587831169623244e-06, "loss": 0.2607, "step": 10254 }, { "epoch": 0.64, "grad_norm": 2.0399852713986415, "learning_rate": 2.957853419476243e-06, "loss": 0.2753, "step": 10255 }, { "epoch": 0.64, "grad_norm": 1.9569226797701358, "learning_rate": 2.956923806722453e-06, "loss": 0.2727, "step": 10256 }, { "epoch": 0.65, "grad_norm": 1.8961836722298089, "learning_rate": 2.9559942787395224e-06, "loss": 0.2579, "step": 10257 }, { "epoch": 0.65, "grad_norm": 7.6855425757466405, "learning_rate": 2.9550648355660195e-06, "loss": 0.2668, "step": 10258 }, { "epoch": 0.65, "grad_norm": 1.574673243926472, "learning_rate": 2.9541354772405096e-06, "loss": 0.2774, "step": 10259 }, { "epoch": 0.65, "grad_norm": 2.6174823727962924, "learning_rate": 2.953206203801552e-06, "loss": 0.2526, "step": 10260 }, { "epoch": 0.65, "grad_norm": 1.7780847535332385, "learning_rate": 2.952277015287705e-06, "loss": 0.2639, "step": 10261 }, { "epoch": 0.65, "grad_norm": 1.7532879482171553, "learning_rate": 2.95134791173752e-06, "loss": 0.2646, "step": 10262 }, { "epoch": 0.65, "grad_norm": 1.7286493146123572, "learning_rate": 2.9504188931895507e-06, "loss": 0.2757, "step": 10263 }, { "epoch": 0.65, "grad_norm": 2.061230434689712, "learning_rate": 2.9494899596823405e-06, "loss": 0.2696, "step": 10264 }, { "epoch": 0.65, "grad_norm": 3.080154476416327, "learning_rate": 2.948561111254436e-06, "loss": 0.2778, "step": 10265 }, { "epoch": 0.65, "grad_norm": 1.5940276876912574, "learning_rate": 2.9476323479443736e-06, "loss": 0.272, "step": 10266 }, { "epoch": 0.65, "grad_norm": 4.435622371099773, "learning_rate": 2.9467036697906914e-06, "loss": 0.2613, "step": 10267 }, { "epoch": 0.65, "grad_norm": 1.9486384831096513, "learning_rate": 2.9457750768319202e-06, "loss": 0.2561, "step": 10268 }, { "epoch": 0.65, "grad_norm": 2.032735319772164, "learning_rate": 2.9448465691065906e-06, "loss": 0.2667, "step": 10269 }, { "epoch": 0.65, "grad_norm": 4.890455038224558, "learning_rate": 2.9439181466532253e-06, "loss": 0.279, "step": 10270 }, { "epoch": 0.65, "grad_norm": 1.8318093530757913, "learning_rate": 2.9429898095103494e-06, "loss": 0.2814, "step": 10271 }, { "epoch": 0.65, "grad_norm": 1.9060370669230478, "learning_rate": 2.942061557716479e-06, "loss": 0.257, "step": 10272 }, { "epoch": 0.65, "grad_norm": 1.847142304524054, "learning_rate": 2.9411333913101316e-06, "loss": 0.2783, "step": 10273 }, { "epoch": 0.65, "grad_norm": 2.158199419371395, "learning_rate": 2.940205310329816e-06, "loss": 0.2849, "step": 10274 }, { "epoch": 0.65, "grad_norm": 1.6207273913013245, "learning_rate": 2.9392773148140406e-06, "loss": 0.2559, "step": 10275 }, { "epoch": 0.65, "grad_norm": 1.6615952738715276, "learning_rate": 2.9383494048013096e-06, "loss": 0.2675, "step": 10276 }, { "epoch": 0.65, "grad_norm": 2.3387500118835, "learning_rate": 2.937421580330123e-06, "loss": 0.2515, "step": 10277 }, { "epoch": 0.65, "grad_norm": 3.1542056823758857, "learning_rate": 2.9364938414389797e-06, "loss": 0.2688, "step": 10278 }, { "epoch": 0.65, "grad_norm": 2.2811552349845754, "learning_rate": 2.9355661881663717e-06, "loss": 0.251, "step": 10279 }, { "epoch": 0.65, "grad_norm": 2.309169932842242, "learning_rate": 2.9346386205507893e-06, "loss": 0.268, "step": 10280 }, { "epoch": 0.65, "grad_norm": 2.630879173555384, "learning_rate": 2.9337111386307197e-06, "loss": 0.2721, "step": 10281 }, { "epoch": 0.65, "grad_norm": 1.522778125874977, "learning_rate": 2.9327837424446442e-06, "loss": 0.2688, "step": 10282 }, { "epoch": 0.65, "grad_norm": 1.7498924542565566, "learning_rate": 2.9318564320310444e-06, "loss": 0.2521, "step": 10283 }, { "epoch": 0.65, "grad_norm": 1.3253732961167728, "learning_rate": 2.9309292074283936e-06, "loss": 0.2515, "step": 10284 }, { "epoch": 0.65, "grad_norm": 1.9211988222354697, "learning_rate": 2.930002068675164e-06, "loss": 0.2653, "step": 10285 }, { "epoch": 0.65, "grad_norm": 2.0755383269524335, "learning_rate": 2.9290750158098268e-06, "loss": 0.2695, "step": 10286 }, { "epoch": 0.65, "grad_norm": 2.0960936078231605, "learning_rate": 2.9281480488708445e-06, "loss": 0.2752, "step": 10287 }, { "epoch": 0.65, "grad_norm": 2.2600792081025394, "learning_rate": 2.9272211678966804e-06, "loss": 0.2849, "step": 10288 }, { "epoch": 0.65, "grad_norm": 1.3490394661035454, "learning_rate": 2.9262943729257924e-06, "loss": 0.2601, "step": 10289 }, { "epoch": 0.65, "grad_norm": 1.773377022832766, "learning_rate": 2.9253676639966335e-06, "loss": 0.2684, "step": 10290 }, { "epoch": 0.65, "grad_norm": 0.578923030565268, "learning_rate": 2.924441041147656e-06, "loss": 0.4941, "step": 10291 }, { "epoch": 0.65, "grad_norm": 2.8919456581718603, "learning_rate": 2.923514504417304e-06, "loss": 0.2621, "step": 10292 }, { "epoch": 0.65, "grad_norm": 17.785325821684957, "learning_rate": 2.9225880538440242e-06, "loss": 0.282, "step": 10293 }, { "epoch": 0.65, "grad_norm": 1.8583411826395766, "learning_rate": 2.921661689466257e-06, "loss": 0.2702, "step": 10294 }, { "epoch": 0.65, "grad_norm": 3.3448475890108256, "learning_rate": 2.9207354113224384e-06, "loss": 0.2938, "step": 10295 }, { "epoch": 0.65, "grad_norm": 1.936621432202642, "learning_rate": 2.919809219451e-06, "loss": 0.2579, "step": 10296 }, { "epoch": 0.65, "grad_norm": 3.199092871572625, "learning_rate": 2.918883113890371e-06, "loss": 0.2857, "step": 10297 }, { "epoch": 0.65, "grad_norm": 2.7950546614614975, "learning_rate": 2.9179570946789798e-06, "loss": 0.2776, "step": 10298 }, { "epoch": 0.65, "grad_norm": 1.6884177362490773, "learning_rate": 2.9170311618552467e-06, "loss": 0.2708, "step": 10299 }, { "epoch": 0.65, "grad_norm": 4.239822309094006, "learning_rate": 2.916105315457588e-06, "loss": 0.2713, "step": 10300 }, { "epoch": 0.65, "grad_norm": 2.8058828654033277, "learning_rate": 2.9151795555244245e-06, "loss": 0.2629, "step": 10301 }, { "epoch": 0.65, "grad_norm": 1.50441884306904, "learning_rate": 2.9142538820941613e-06, "loss": 0.2506, "step": 10302 }, { "epoch": 0.65, "grad_norm": 1.8329288039954224, "learning_rate": 2.913328295205211e-06, "loss": 0.2677, "step": 10303 }, { "epoch": 0.65, "grad_norm": 1.591947523918817, "learning_rate": 2.9124027948959767e-06, "loss": 0.2601, "step": 10304 }, { "epoch": 0.65, "grad_norm": 2.9945975029575576, "learning_rate": 2.9114773812048558e-06, "loss": 0.2811, "step": 10305 }, { "epoch": 0.65, "grad_norm": 2.414079663222745, "learning_rate": 2.9105520541702503e-06, "loss": 0.2735, "step": 10306 }, { "epoch": 0.65, "grad_norm": 2.046007457779282, "learning_rate": 2.9096268138305495e-06, "loss": 0.2627, "step": 10307 }, { "epoch": 0.65, "grad_norm": 1.7599522828206562, "learning_rate": 2.908701660224147e-06, "loss": 0.2625, "step": 10308 }, { "epoch": 0.65, "grad_norm": 2.567266171204114, "learning_rate": 2.9077765933894277e-06, "loss": 0.2823, "step": 10309 }, { "epoch": 0.65, "grad_norm": 1.4579231919226723, "learning_rate": 2.906851613364771e-06, "loss": 0.2655, "step": 10310 }, { "epoch": 0.65, "grad_norm": 2.240191366744254, "learning_rate": 2.905926720188561e-06, "loss": 0.2702, "step": 10311 }, { "epoch": 0.65, "grad_norm": 1.9275614105475722, "learning_rate": 2.905001913899171e-06, "loss": 0.2667, "step": 10312 }, { "epoch": 0.65, "grad_norm": 2.144183315313289, "learning_rate": 2.9040771945349707e-06, "loss": 0.2659, "step": 10313 }, { "epoch": 0.65, "grad_norm": 2.1892419343701666, "learning_rate": 2.90315256213433e-06, "loss": 0.2584, "step": 10314 }, { "epoch": 0.65, "grad_norm": 1.6502800388151524, "learning_rate": 2.9022280167356167e-06, "loss": 0.2601, "step": 10315 }, { "epoch": 0.65, "grad_norm": 1.7334182686629287, "learning_rate": 2.901303558377188e-06, "loss": 0.2769, "step": 10316 }, { "epoch": 0.65, "grad_norm": 2.2887259233707633, "learning_rate": 2.9003791870974005e-06, "loss": 0.2708, "step": 10317 }, { "epoch": 0.65, "grad_norm": 1.9577276044161858, "learning_rate": 2.8994549029346132e-06, "loss": 0.2741, "step": 10318 }, { "epoch": 0.65, "grad_norm": 2.1935604083815123, "learning_rate": 2.8985307059271718e-06, "loss": 0.2652, "step": 10319 }, { "epoch": 0.65, "grad_norm": 1.4808680686436608, "learning_rate": 2.897606596113424e-06, "loss": 0.2761, "step": 10320 }, { "epoch": 0.65, "grad_norm": 1.8842912010224175, "learning_rate": 2.8966825735317113e-06, "loss": 0.255, "step": 10321 }, { "epoch": 0.65, "grad_norm": 1.4303413346360598, "learning_rate": 2.895758638220374e-06, "loss": 0.2603, "step": 10322 }, { "epoch": 0.65, "grad_norm": 1.8270048984190406, "learning_rate": 2.894834790217751e-06, "loss": 0.2594, "step": 10323 }, { "epoch": 0.65, "grad_norm": 1.8392163756216793, "learning_rate": 2.8939110295621707e-06, "loss": 0.2733, "step": 10324 }, { "epoch": 0.65, "grad_norm": 3.4869095340523093, "learning_rate": 2.892987356291962e-06, "loss": 0.2696, "step": 10325 }, { "epoch": 0.65, "grad_norm": 2.135158149137114, "learning_rate": 2.892063770445451e-06, "loss": 0.2605, "step": 10326 }, { "epoch": 0.65, "grad_norm": 1.7634824739944561, "learning_rate": 2.8911402720609594e-06, "loss": 0.282, "step": 10327 }, { "epoch": 0.65, "grad_norm": 1.7813903204132482, "learning_rate": 2.8902168611768032e-06, "loss": 0.2767, "step": 10328 }, { "epoch": 0.65, "grad_norm": 2.219030505984542, "learning_rate": 2.889293537831295e-06, "loss": 0.2818, "step": 10329 }, { "epoch": 0.65, "grad_norm": 3.3709590337236626, "learning_rate": 2.8883703020627467e-06, "loss": 0.256, "step": 10330 }, { "epoch": 0.65, "grad_norm": 2.326527557524211, "learning_rate": 2.8874471539094672e-06, "loss": 0.2878, "step": 10331 }, { "epoch": 0.65, "grad_norm": 2.1707468394485088, "learning_rate": 2.8865240934097584e-06, "loss": 0.3013, "step": 10332 }, { "epoch": 0.65, "grad_norm": 3.6931270236050118, "learning_rate": 2.885601120601916e-06, "loss": 0.2657, "step": 10333 }, { "epoch": 0.65, "grad_norm": 2.010592130910668, "learning_rate": 2.884678235524241e-06, "loss": 0.2706, "step": 10334 }, { "epoch": 0.65, "grad_norm": 2.6936869219379527, "learning_rate": 2.8837554382150233e-06, "loss": 0.2831, "step": 10335 }, { "epoch": 0.65, "grad_norm": 1.5181472268339018, "learning_rate": 2.882832728712551e-06, "loss": 0.2701, "step": 10336 }, { "epoch": 0.65, "grad_norm": 2.255743161270764, "learning_rate": 2.8819101070551073e-06, "loss": 0.2889, "step": 10337 }, { "epoch": 0.65, "grad_norm": 1.8339844827187541, "learning_rate": 2.8809875732809762e-06, "loss": 0.2728, "step": 10338 }, { "epoch": 0.65, "grad_norm": 6.079500585688239, "learning_rate": 2.8800651274284356e-06, "loss": 0.2915, "step": 10339 }, { "epoch": 0.65, "grad_norm": 1.8361709025280701, "learning_rate": 2.8791427695357586e-06, "loss": 0.2862, "step": 10340 }, { "epoch": 0.65, "grad_norm": 1.6004733164058902, "learning_rate": 2.8782204996412134e-06, "loss": 0.2578, "step": 10341 }, { "epoch": 0.65, "grad_norm": 1.7316254866285132, "learning_rate": 2.8772983177830706e-06, "loss": 0.2716, "step": 10342 }, { "epoch": 0.65, "grad_norm": 2.9777529165571632, "learning_rate": 2.8763762239995903e-06, "loss": 0.2781, "step": 10343 }, { "epoch": 0.65, "grad_norm": 1.7553244499987246, "learning_rate": 2.8754542183290304e-06, "loss": 0.2734, "step": 10344 }, { "epoch": 0.65, "grad_norm": 1.9787242758944577, "learning_rate": 2.874532300809651e-06, "loss": 0.271, "step": 10345 }, { "epoch": 0.65, "grad_norm": 1.7816249472016847, "learning_rate": 2.8736104714796996e-06, "loss": 0.258, "step": 10346 }, { "epoch": 0.65, "grad_norm": 2.844539267323949, "learning_rate": 2.8726887303774286e-06, "loss": 0.2856, "step": 10347 }, { "epoch": 0.65, "grad_norm": 3.110903491931399, "learning_rate": 2.8717670775410805e-06, "loss": 0.2573, "step": 10348 }, { "epoch": 0.65, "grad_norm": 3.1961716277508603, "learning_rate": 2.8708455130088946e-06, "loss": 0.2925, "step": 10349 }, { "epoch": 0.65, "grad_norm": 2.840889344725491, "learning_rate": 2.8699240368191124e-06, "loss": 0.2822, "step": 10350 }, { "epoch": 0.65, "grad_norm": 2.1900770769306392, "learning_rate": 2.8690026490099655e-06, "loss": 0.281, "step": 10351 }, { "epoch": 0.65, "grad_norm": 2.032055040907349, "learning_rate": 2.8680813496196814e-06, "loss": 0.2678, "step": 10352 }, { "epoch": 0.65, "grad_norm": 1.83485329391566, "learning_rate": 2.8671601386864913e-06, "loss": 0.2698, "step": 10353 }, { "epoch": 0.65, "grad_norm": 2.4372661675444647, "learning_rate": 2.8662390162486125e-06, "loss": 0.2813, "step": 10354 }, { "epoch": 0.65, "grad_norm": 2.8711937602281714, "learning_rate": 2.865317982344269e-06, "loss": 0.274, "step": 10355 }, { "epoch": 0.65, "grad_norm": 5.647553746292898, "learning_rate": 2.864397037011675e-06, "loss": 0.2646, "step": 10356 }, { "epoch": 0.65, "grad_norm": 2.0758115605281473, "learning_rate": 2.8634761802890387e-06, "loss": 0.2634, "step": 10357 }, { "epoch": 0.65, "grad_norm": 1.8131575878304973, "learning_rate": 2.862555412214572e-06, "loss": 0.2681, "step": 10358 }, { "epoch": 0.65, "grad_norm": 1.9124368285371574, "learning_rate": 2.8616347328264764e-06, "loss": 0.2602, "step": 10359 }, { "epoch": 0.65, "grad_norm": 0.6047603012030696, "learning_rate": 2.860714142162956e-06, "loss": 0.4803, "step": 10360 }, { "epoch": 0.65, "grad_norm": 1.6202814522088258, "learning_rate": 2.859793640262205e-06, "loss": 0.2636, "step": 10361 }, { "epoch": 0.65, "grad_norm": 2.4631381294144927, "learning_rate": 2.8588732271624163e-06, "loss": 0.2689, "step": 10362 }, { "epoch": 0.65, "grad_norm": 1.6366857709413358, "learning_rate": 2.857952902901782e-06, "loss": 0.2693, "step": 10363 }, { "epoch": 0.65, "grad_norm": 3.974490977238434, "learning_rate": 2.857032667518486e-06, "loss": 0.2676, "step": 10364 }, { "epoch": 0.65, "grad_norm": 2.598651836509557, "learning_rate": 2.856112521050709e-06, "loss": 0.2498, "step": 10365 }, { "epoch": 0.65, "grad_norm": 2.3267187774940883, "learning_rate": 2.8551924635366344e-06, "loss": 0.2692, "step": 10366 }, { "epoch": 0.65, "grad_norm": 1.980678960588676, "learning_rate": 2.854272495014431e-06, "loss": 0.2727, "step": 10367 }, { "epoch": 0.65, "grad_norm": 5.860173799441823, "learning_rate": 2.8533526155222757e-06, "loss": 0.2635, "step": 10368 }, { "epoch": 0.65, "grad_norm": 3.0060463510588296, "learning_rate": 2.8524328250983337e-06, "loss": 0.2659, "step": 10369 }, { "epoch": 0.65, "grad_norm": 2.350605986020169, "learning_rate": 2.8515131237807653e-06, "loss": 0.2752, "step": 10370 }, { "epoch": 0.65, "grad_norm": 2.3201057957361955, "learning_rate": 2.8505935116077353e-06, "loss": 0.2608, "step": 10371 }, { "epoch": 0.65, "grad_norm": 1.5239889432924736, "learning_rate": 2.8496739886173994e-06, "loss": 0.2675, "step": 10372 }, { "epoch": 0.65, "grad_norm": 0.6003174799229049, "learning_rate": 2.848754554847907e-06, "loss": 0.4657, "step": 10373 }, { "epoch": 0.65, "grad_norm": 8.931957096457086, "learning_rate": 2.8478352103374085e-06, "loss": 0.2639, "step": 10374 }, { "epoch": 0.65, "grad_norm": 2.554794346000949, "learning_rate": 2.846915955124052e-06, "loss": 0.2705, "step": 10375 }, { "epoch": 0.65, "grad_norm": 7.1017749279224756, "learning_rate": 2.8459967892459767e-06, "loss": 0.278, "step": 10376 }, { "epoch": 0.65, "grad_norm": 2.2290570477432845, "learning_rate": 2.845077712741321e-06, "loss": 0.2522, "step": 10377 }, { "epoch": 0.65, "grad_norm": 1.7791977780449229, "learning_rate": 2.844158725648216e-06, "loss": 0.2546, "step": 10378 }, { "epoch": 0.65, "grad_norm": 3.3351526868247428, "learning_rate": 2.843239828004797e-06, "loss": 0.2738, "step": 10379 }, { "epoch": 0.65, "grad_norm": 2.3032516202419586, "learning_rate": 2.8423210198491886e-06, "loss": 0.2727, "step": 10380 }, { "epoch": 0.65, "grad_norm": 1.9390686255543936, "learning_rate": 2.8414023012195113e-06, "loss": 0.2549, "step": 10381 }, { "epoch": 0.65, "grad_norm": 3.0219803962271286, "learning_rate": 2.8404836721538866e-06, "loss": 0.2501, "step": 10382 }, { "epoch": 0.65, "grad_norm": 1.8175216309949775, "learning_rate": 2.8395651326904323e-06, "loss": 0.2698, "step": 10383 }, { "epoch": 0.65, "grad_norm": 1.7904621967344234, "learning_rate": 2.8386466828672575e-06, "loss": 0.2797, "step": 10384 }, { "epoch": 0.65, "grad_norm": 9.842134789346314, "learning_rate": 2.8377283227224717e-06, "loss": 0.2784, "step": 10385 }, { "epoch": 0.65, "grad_norm": 6.694564305938711, "learning_rate": 2.8368100522941755e-06, "loss": 0.2632, "step": 10386 }, { "epoch": 0.65, "grad_norm": 1.7988356164491621, "learning_rate": 2.8358918716204746e-06, "loss": 0.2983, "step": 10387 }, { "epoch": 0.65, "grad_norm": 1.9138366241854534, "learning_rate": 2.8349737807394646e-06, "loss": 0.2747, "step": 10388 }, { "epoch": 0.65, "grad_norm": 1.9613620859420138, "learning_rate": 2.8340557796892353e-06, "loss": 0.2824, "step": 10389 }, { "epoch": 0.65, "grad_norm": 2.8392574585700325, "learning_rate": 2.833137868507879e-06, "loss": 0.264, "step": 10390 }, { "epoch": 0.65, "grad_norm": 1.4377198742833721, "learning_rate": 2.832220047233483e-06, "loss": 0.2531, "step": 10391 }, { "epoch": 0.65, "grad_norm": 2.3127795153622928, "learning_rate": 2.831302315904128e-06, "loss": 0.2677, "step": 10392 }, { "epoch": 0.65, "grad_norm": 3.99991517431462, "learning_rate": 2.83038467455789e-06, "loss": 0.2699, "step": 10393 }, { "epoch": 0.65, "grad_norm": 2.820705929289855, "learning_rate": 2.8294671232328473e-06, "loss": 0.2634, "step": 10394 }, { "epoch": 0.65, "grad_norm": 1.518450643734968, "learning_rate": 2.8285496619670695e-06, "loss": 0.2773, "step": 10395 }, { "epoch": 0.65, "grad_norm": 3.074314846461634, "learning_rate": 2.827632290798621e-06, "loss": 0.2569, "step": 10396 }, { "epoch": 0.65, "grad_norm": 1.5583800976647444, "learning_rate": 2.826715009765569e-06, "loss": 0.2846, "step": 10397 }, { "epoch": 0.65, "grad_norm": 2.108231671123516, "learning_rate": 2.825797818905969e-06, "loss": 0.2596, "step": 10398 }, { "epoch": 0.65, "grad_norm": 3.240758915529665, "learning_rate": 2.8248807182578817e-06, "loss": 0.2717, "step": 10399 }, { "epoch": 0.65, "grad_norm": 1.8199852189859267, "learning_rate": 2.8239637078593574e-06, "loss": 0.2542, "step": 10400 }, { "epoch": 0.65, "grad_norm": 0.6174296789920071, "learning_rate": 2.823046787748441e-06, "loss": 0.4429, "step": 10401 }, { "epoch": 0.65, "grad_norm": 2.456548481553083, "learning_rate": 2.8221299579631834e-06, "loss": 0.2578, "step": 10402 }, { "epoch": 0.65, "grad_norm": 2.3983486438540016, "learning_rate": 2.821213218541621e-06, "loss": 0.2788, "step": 10403 }, { "epoch": 0.65, "grad_norm": 1.8787150019399343, "learning_rate": 2.8202965695217906e-06, "loss": 0.2491, "step": 10404 }, { "epoch": 0.65, "grad_norm": 1.8893076754105873, "learning_rate": 2.8193800109417293e-06, "loss": 0.2709, "step": 10405 }, { "epoch": 0.65, "grad_norm": 2.1820426995449576, "learning_rate": 2.818463542839462e-06, "loss": 0.274, "step": 10406 }, { "epoch": 0.65, "grad_norm": 1.5720702494820933, "learning_rate": 2.8175471652530193e-06, "loss": 0.2558, "step": 10407 }, { "epoch": 0.65, "grad_norm": 1.9245355640229331, "learning_rate": 2.816630878220421e-06, "loss": 0.2593, "step": 10408 }, { "epoch": 0.65, "grad_norm": 5.193600663839504, "learning_rate": 2.8157146817796843e-06, "loss": 0.2668, "step": 10409 }, { "epoch": 0.65, "grad_norm": 1.6029318065473126, "learning_rate": 2.8147985759688267e-06, "loss": 0.2584, "step": 10410 }, { "epoch": 0.65, "grad_norm": 1.7862824366550654, "learning_rate": 2.8138825608258556e-06, "loss": 0.2584, "step": 10411 }, { "epoch": 0.65, "grad_norm": 8.073517752624644, "learning_rate": 2.812966636388782e-06, "loss": 0.2871, "step": 10412 }, { "epoch": 0.65, "grad_norm": 1.7103306706033008, "learning_rate": 2.8120508026956074e-06, "loss": 0.2525, "step": 10413 }, { "epoch": 0.65, "grad_norm": 2.1626339198226368, "learning_rate": 2.811135059784329e-06, "loss": 0.273, "step": 10414 }, { "epoch": 0.65, "grad_norm": 4.238689838195442, "learning_rate": 2.8102194076929475e-06, "loss": 0.2761, "step": 10415 }, { "epoch": 0.66, "grad_norm": 2.3273465781146676, "learning_rate": 2.809303846459452e-06, "loss": 0.2752, "step": 10416 }, { "epoch": 0.66, "grad_norm": 1.5742835740189338, "learning_rate": 2.808388376121829e-06, "loss": 0.2618, "step": 10417 }, { "epoch": 0.66, "grad_norm": 6.530856597631842, "learning_rate": 2.8074729967180664e-06, "loss": 0.2651, "step": 10418 }, { "epoch": 0.66, "grad_norm": 4.828599931339612, "learning_rate": 2.8065577082861416e-06, "loss": 0.2969, "step": 10419 }, { "epoch": 0.66, "grad_norm": 1.966342558520523, "learning_rate": 2.805642510864036e-06, "loss": 0.2579, "step": 10420 }, { "epoch": 0.66, "grad_norm": 4.479634855617904, "learning_rate": 2.80472740448972e-06, "loss": 0.2649, "step": 10421 }, { "epoch": 0.66, "grad_norm": 4.368448757489006, "learning_rate": 2.8038123892011615e-06, "loss": 0.2586, "step": 10422 }, { "epoch": 0.66, "grad_norm": 2.217167700332758, "learning_rate": 2.8028974650363296e-06, "loss": 0.2658, "step": 10423 }, { "epoch": 0.66, "grad_norm": 1.7521901095895094, "learning_rate": 2.8019826320331843e-06, "loss": 0.2817, "step": 10424 }, { "epoch": 0.66, "grad_norm": 1.2625006956315874, "learning_rate": 2.8010678902296822e-06, "loss": 0.2641, "step": 10425 }, { "epoch": 0.66, "grad_norm": 17.121993836236154, "learning_rate": 2.800153239663779e-06, "loss": 0.2673, "step": 10426 }, { "epoch": 0.66, "grad_norm": 2.14308314556635, "learning_rate": 2.7992386803734267e-06, "loss": 0.2674, "step": 10427 }, { "epoch": 0.66, "grad_norm": 4.809321431847288, "learning_rate": 2.7983242123965708e-06, "loss": 0.2715, "step": 10428 }, { "epoch": 0.66, "grad_norm": 3.809139352899553, "learning_rate": 2.7974098357711544e-06, "loss": 0.2771, "step": 10429 }, { "epoch": 0.66, "grad_norm": 1.5875698629766402, "learning_rate": 2.796495550535113e-06, "loss": 0.2859, "step": 10430 }, { "epoch": 0.66, "grad_norm": 2.5018999272604305, "learning_rate": 2.795581356726388e-06, "loss": 0.2542, "step": 10431 }, { "epoch": 0.66, "grad_norm": 7.149900104850301, "learning_rate": 2.7946672543829077e-06, "loss": 0.2958, "step": 10432 }, { "epoch": 0.66, "grad_norm": 2.2645565055309302, "learning_rate": 2.7937532435425985e-06, "loss": 0.2633, "step": 10433 }, { "epoch": 0.66, "grad_norm": 2.1242294696964263, "learning_rate": 2.792839324243386e-06, "loss": 0.2756, "step": 10434 }, { "epoch": 0.66, "grad_norm": 2.6295723493836345, "learning_rate": 2.791925496523191e-06, "loss": 0.2699, "step": 10435 }, { "epoch": 0.66, "grad_norm": 2.182514401554479, "learning_rate": 2.7910117604199305e-06, "loss": 0.2775, "step": 10436 }, { "epoch": 0.66, "grad_norm": 3.013516551641431, "learning_rate": 2.7900981159715157e-06, "loss": 0.25, "step": 10437 }, { "epoch": 0.66, "grad_norm": 2.1994852426873885, "learning_rate": 2.7891845632158527e-06, "loss": 0.2791, "step": 10438 }, { "epoch": 0.66, "grad_norm": 2.345011476749204, "learning_rate": 2.788271102190851e-06, "loss": 0.262, "step": 10439 }, { "epoch": 0.66, "grad_norm": 2.1211271212366243, "learning_rate": 2.7873577329344105e-06, "loss": 0.2467, "step": 10440 }, { "epoch": 0.66, "grad_norm": 1.490889428901454, "learning_rate": 2.786444455484425e-06, "loss": 0.2579, "step": 10441 }, { "epoch": 0.66, "grad_norm": 3.436044397374049, "learning_rate": 2.785531269878791e-06, "loss": 0.271, "step": 10442 }, { "epoch": 0.66, "grad_norm": 4.416322267236971, "learning_rate": 2.784618176155399e-06, "loss": 0.2669, "step": 10443 }, { "epoch": 0.66, "grad_norm": 2.2415655374759598, "learning_rate": 2.783705174352135e-06, "loss": 0.2725, "step": 10444 }, { "epoch": 0.66, "grad_norm": 2.3137567313680365, "learning_rate": 2.78279226450688e-06, "loss": 0.2933, "step": 10445 }, { "epoch": 0.66, "grad_norm": 2.787664114700681, "learning_rate": 2.7818794466575095e-06, "loss": 0.2632, "step": 10446 }, { "epoch": 0.66, "grad_norm": 2.5499048197975385, "learning_rate": 2.7809667208419034e-06, "loss": 0.2829, "step": 10447 }, { "epoch": 0.66, "grad_norm": 4.293831891826393, "learning_rate": 2.7800540870979287e-06, "loss": 0.2886, "step": 10448 }, { "epoch": 0.66, "grad_norm": 1.2140332473576787, "learning_rate": 2.7791415454634507e-06, "loss": 0.2527, "step": 10449 }, { "epoch": 0.66, "grad_norm": 1.3097334048601796, "learning_rate": 2.778229095976336e-06, "loss": 0.2625, "step": 10450 }, { "epoch": 0.66, "grad_norm": 3.0738810231232616, "learning_rate": 2.7773167386744432e-06, "loss": 0.2872, "step": 10451 }, { "epoch": 0.66, "grad_norm": 2.570713387112575, "learning_rate": 2.7764044735956275e-06, "loss": 0.265, "step": 10452 }, { "epoch": 0.66, "grad_norm": 1.7532889903374367, "learning_rate": 2.775492300777739e-06, "loss": 0.2567, "step": 10453 }, { "epoch": 0.66, "grad_norm": 2.0799293435877386, "learning_rate": 2.774580220258625e-06, "loss": 0.2694, "step": 10454 }, { "epoch": 0.66, "grad_norm": 2.0581896280321694, "learning_rate": 2.773668232076132e-06, "loss": 0.2703, "step": 10455 }, { "epoch": 0.66, "grad_norm": 1.9368825258630105, "learning_rate": 2.7727563362680965e-06, "loss": 0.2723, "step": 10456 }, { "epoch": 0.66, "grad_norm": 2.7194825863986827, "learning_rate": 2.771844532872359e-06, "loss": 0.2633, "step": 10457 }, { "epoch": 0.66, "grad_norm": 1.6965752359933366, "learning_rate": 2.770932821926747e-06, "loss": 0.2712, "step": 10458 }, { "epoch": 0.66, "grad_norm": 2.9523782752312764, "learning_rate": 2.7700212034690933e-06, "loss": 0.3004, "step": 10459 }, { "epoch": 0.66, "grad_norm": 0.6194571393296509, "learning_rate": 2.769109677537222e-06, "loss": 0.475, "step": 10460 }, { "epoch": 0.66, "grad_norm": 7.743386645198735, "learning_rate": 2.7681982441689513e-06, "loss": 0.2706, "step": 10461 }, { "epoch": 0.66, "grad_norm": 2.4932330766273485, "learning_rate": 2.7672869034020978e-06, "loss": 0.2814, "step": 10462 }, { "epoch": 0.66, "grad_norm": 4.197278076937047, "learning_rate": 2.766375655274479e-06, "loss": 0.2517, "step": 10463 }, { "epoch": 0.66, "grad_norm": 1.9752372692344045, "learning_rate": 2.765464499823899e-06, "loss": 0.2739, "step": 10464 }, { "epoch": 0.66, "grad_norm": 1.5880826352631157, "learning_rate": 2.7645534370881682e-06, "loss": 0.2636, "step": 10465 }, { "epoch": 0.66, "grad_norm": 2.308944477965517, "learning_rate": 2.7636424671050843e-06, "loss": 0.2867, "step": 10466 }, { "epoch": 0.66, "grad_norm": 2.810625758286272, "learning_rate": 2.762731589912448e-06, "loss": 0.2854, "step": 10467 }, { "epoch": 0.66, "grad_norm": 2.024849569191932, "learning_rate": 2.7618208055480523e-06, "loss": 0.2541, "step": 10468 }, { "epoch": 0.66, "grad_norm": 1.2611604161150793, "learning_rate": 2.7609101140496863e-06, "loss": 0.5138, "step": 10469 }, { "epoch": 0.66, "grad_norm": 2.8194719512935786, "learning_rate": 2.7599995154551352e-06, "loss": 0.2866, "step": 10470 }, { "epoch": 0.66, "grad_norm": 2.858857505565335, "learning_rate": 2.7590890098021828e-06, "loss": 0.2742, "step": 10471 }, { "epoch": 0.66, "grad_norm": 2.101557608251054, "learning_rate": 2.75817859712861e-06, "loss": 0.2592, "step": 10472 }, { "epoch": 0.66, "grad_norm": 2.183406893174561, "learning_rate": 2.757268277472188e-06, "loss": 0.2591, "step": 10473 }, { "epoch": 0.66, "grad_norm": 2.952509958724208, "learning_rate": 2.7563580508706877e-06, "loss": 0.2596, "step": 10474 }, { "epoch": 0.66, "grad_norm": 2.351708371100462, "learning_rate": 2.755447917361879e-06, "loss": 0.2709, "step": 10475 }, { "epoch": 0.66, "grad_norm": 4.05542319249982, "learning_rate": 2.754537876983523e-06, "loss": 0.2849, "step": 10476 }, { "epoch": 0.66, "grad_norm": 3.067770469205034, "learning_rate": 2.753627929773377e-06, "loss": 0.2716, "step": 10477 }, { "epoch": 0.66, "grad_norm": 1.784719241808555, "learning_rate": 2.7527180757691973e-06, "loss": 0.2702, "step": 10478 }, { "epoch": 0.66, "grad_norm": 2.4259988908413455, "learning_rate": 2.7518083150087395e-06, "loss": 0.2831, "step": 10479 }, { "epoch": 0.66, "grad_norm": 1.6593522983902174, "learning_rate": 2.750898647529747e-06, "loss": 0.2487, "step": 10480 }, { "epoch": 0.66, "grad_norm": 1.7200176513562162, "learning_rate": 2.7499890733699645e-06, "loss": 0.2582, "step": 10481 }, { "epoch": 0.66, "grad_norm": 4.286478809297931, "learning_rate": 2.74907959256713e-06, "loss": 0.258, "step": 10482 }, { "epoch": 0.66, "grad_norm": 1.8739935265522478, "learning_rate": 2.748170205158984e-06, "loss": 0.2632, "step": 10483 }, { "epoch": 0.66, "grad_norm": 2.6781717455937804, "learning_rate": 2.747260911183255e-06, "loss": 0.2756, "step": 10484 }, { "epoch": 0.66, "grad_norm": 1.75517122915535, "learning_rate": 2.7463517106776704e-06, "loss": 0.256, "step": 10485 }, { "epoch": 0.66, "grad_norm": 2.2610652072859203, "learning_rate": 2.7454426036799566e-06, "loss": 0.2699, "step": 10486 }, { "epoch": 0.66, "grad_norm": 2.1226346728576484, "learning_rate": 2.7445335902278347e-06, "loss": 0.2606, "step": 10487 }, { "epoch": 0.66, "grad_norm": 3.2975074133000453, "learning_rate": 2.7436246703590206e-06, "loss": 0.2949, "step": 10488 }, { "epoch": 0.66, "grad_norm": 35.896181409660656, "learning_rate": 2.742715844111228e-06, "loss": 0.2694, "step": 10489 }, { "epoch": 0.66, "grad_norm": 2.3142204998937435, "learning_rate": 2.7418071115221613e-06, "loss": 0.2709, "step": 10490 }, { "epoch": 0.66, "grad_norm": 1.391682267713271, "learning_rate": 2.740898472629531e-06, "loss": 0.2739, "step": 10491 }, { "epoch": 0.66, "grad_norm": 1.6642260926130596, "learning_rate": 2.7399899274710346e-06, "loss": 0.2461, "step": 10492 }, { "epoch": 0.66, "grad_norm": 2.789876276801303, "learning_rate": 2.7390814760843695e-06, "loss": 0.2763, "step": 10493 }, { "epoch": 0.66, "grad_norm": 1.5921129456610974, "learning_rate": 2.738173118507229e-06, "loss": 0.2837, "step": 10494 }, { "epoch": 0.66, "grad_norm": 2.2845068057407825, "learning_rate": 2.7372648547773063e-06, "loss": 0.263, "step": 10495 }, { "epoch": 0.66, "grad_norm": 3.989157271413783, "learning_rate": 2.736356684932283e-06, "loss": 0.2817, "step": 10496 }, { "epoch": 0.66, "grad_norm": 4.220640561908065, "learning_rate": 2.7354486090098414e-06, "loss": 0.254, "step": 10497 }, { "epoch": 0.66, "grad_norm": 1.5036794369636048, "learning_rate": 2.734540627047658e-06, "loss": 0.2701, "step": 10498 }, { "epoch": 0.66, "grad_norm": 2.7119482713201197, "learning_rate": 2.7336327390834093e-06, "loss": 0.2751, "step": 10499 }, { "epoch": 0.66, "grad_norm": 2.766943979785282, "learning_rate": 2.7327249451547642e-06, "loss": 0.2656, "step": 10500 }, { "epoch": 0.66, "grad_norm": 2.3265290673315775, "learning_rate": 2.7318172452993864e-06, "loss": 0.2688, "step": 10501 }, { "epoch": 0.66, "grad_norm": 3.4995671438417246, "learning_rate": 2.7309096395549395e-06, "loss": 0.2785, "step": 10502 }, { "epoch": 0.66, "grad_norm": 1.7718723163471015, "learning_rate": 2.730002127959084e-06, "loss": 0.2642, "step": 10503 }, { "epoch": 0.66, "grad_norm": 1.7948091013136696, "learning_rate": 2.729094710549472e-06, "loss": 0.2661, "step": 10504 }, { "epoch": 0.66, "grad_norm": 2.3992510558784863, "learning_rate": 2.728187387363754e-06, "loss": 0.266, "step": 10505 }, { "epoch": 0.66, "grad_norm": 2.4472813449806115, "learning_rate": 2.727280158439575e-06, "loss": 0.2615, "step": 10506 }, { "epoch": 0.66, "grad_norm": 3.843309802113135, "learning_rate": 2.726373023814581e-06, "loss": 0.2763, "step": 10507 }, { "epoch": 0.66, "grad_norm": 1.76893973000872, "learning_rate": 2.7254659835264064e-06, "loss": 0.2833, "step": 10508 }, { "epoch": 0.66, "grad_norm": 2.059903131021249, "learning_rate": 2.7245590376126895e-06, "loss": 0.2599, "step": 10509 }, { "epoch": 0.66, "grad_norm": 2.791626822790956, "learning_rate": 2.7236521861110586e-06, "loss": 0.2861, "step": 10510 }, { "epoch": 0.66, "grad_norm": 5.1410902440432285, "learning_rate": 2.722745429059144e-06, "loss": 0.2682, "step": 10511 }, { "epoch": 0.66, "grad_norm": 1.5038795765002517, "learning_rate": 2.721838766494566e-06, "loss": 0.2631, "step": 10512 }, { "epoch": 0.66, "grad_norm": 2.661065644468989, "learning_rate": 2.720932198454944e-06, "loss": 0.2746, "step": 10513 }, { "epoch": 0.66, "grad_norm": 7.049903290024286, "learning_rate": 2.720025724977892e-06, "loss": 0.2755, "step": 10514 }, { "epoch": 0.66, "grad_norm": 2.268990141101887, "learning_rate": 2.719119346101023e-06, "loss": 0.2765, "step": 10515 }, { "epoch": 0.66, "grad_norm": 2.02605309083175, "learning_rate": 2.7182130618619423e-06, "loss": 0.2684, "step": 10516 }, { "epoch": 0.66, "grad_norm": 1.6260057395368743, "learning_rate": 2.7173068722982566e-06, "loss": 0.2691, "step": 10517 }, { "epoch": 0.66, "grad_norm": 2.3232572660315918, "learning_rate": 2.716400777447561e-06, "loss": 0.274, "step": 10518 }, { "epoch": 0.66, "grad_norm": 5.1385260684203, "learning_rate": 2.7154947773474556e-06, "loss": 0.2659, "step": 10519 }, { "epoch": 0.66, "grad_norm": 1.6609671404572608, "learning_rate": 2.7145888720355297e-06, "loss": 0.2648, "step": 10520 }, { "epoch": 0.66, "grad_norm": 2.8010479623478184, "learning_rate": 2.71368306154937e-06, "loss": 0.2567, "step": 10521 }, { "epoch": 0.66, "grad_norm": 1.8557498651632571, "learning_rate": 2.7127773459265604e-06, "loss": 0.2804, "step": 10522 }, { "epoch": 0.66, "grad_norm": 1.6119371441864176, "learning_rate": 2.71187172520468e-06, "loss": 0.2511, "step": 10523 }, { "epoch": 0.66, "grad_norm": 3.1180144127404255, "learning_rate": 2.710966199421309e-06, "loss": 0.2618, "step": 10524 }, { "epoch": 0.66, "grad_norm": 2.1346526102771137, "learning_rate": 2.7100607686140155e-06, "loss": 0.2676, "step": 10525 }, { "epoch": 0.66, "grad_norm": 3.39223189442905, "learning_rate": 2.709155432820366e-06, "loss": 0.2747, "step": 10526 }, { "epoch": 0.66, "grad_norm": 0.5559264437866219, "learning_rate": 2.7082501920779293e-06, "loss": 0.4723, "step": 10527 }, { "epoch": 0.66, "grad_norm": 1.5950884134719407, "learning_rate": 2.707345046424262e-06, "loss": 0.2625, "step": 10528 }, { "epoch": 0.66, "grad_norm": 2.627215776788749, "learning_rate": 2.706439995896921e-06, "loss": 0.2647, "step": 10529 }, { "epoch": 0.66, "grad_norm": 27.149907227318852, "learning_rate": 2.705535040533457e-06, "loss": 0.275, "step": 10530 }, { "epoch": 0.66, "grad_norm": 2.0678446646933226, "learning_rate": 2.7046301803714194e-06, "loss": 0.2686, "step": 10531 }, { "epoch": 0.66, "grad_norm": 3.4813309748440227, "learning_rate": 2.703725415448354e-06, "loss": 0.2779, "step": 10532 }, { "epoch": 0.66, "grad_norm": 2.2494720936144463, "learning_rate": 2.7028207458017996e-06, "loss": 0.2647, "step": 10533 }, { "epoch": 0.66, "grad_norm": 3.0571871680881295, "learning_rate": 2.701916171469292e-06, "loss": 0.2637, "step": 10534 }, { "epoch": 0.66, "grad_norm": 2.8114706590401224, "learning_rate": 2.7010116924883654e-06, "loss": 0.2637, "step": 10535 }, { "epoch": 0.66, "grad_norm": 2.136596359366606, "learning_rate": 2.7001073088965467e-06, "loss": 0.2782, "step": 10536 }, { "epoch": 0.66, "grad_norm": 1.7784525920721663, "learning_rate": 2.699203020731362e-06, "loss": 0.2686, "step": 10537 }, { "epoch": 0.66, "grad_norm": 1.7372657869244117, "learning_rate": 2.6982988280303255e-06, "loss": 0.2945, "step": 10538 }, { "epoch": 0.66, "grad_norm": 2.6775509141565044, "learning_rate": 2.6973947308309647e-06, "loss": 0.2817, "step": 10539 }, { "epoch": 0.66, "grad_norm": 3.6063032286178625, "learning_rate": 2.6964907291707844e-06, "loss": 0.2749, "step": 10540 }, { "epoch": 0.66, "grad_norm": 3.037502628470929, "learning_rate": 2.6955868230872963e-06, "loss": 0.2749, "step": 10541 }, { "epoch": 0.66, "grad_norm": 2.3071206627776366, "learning_rate": 2.6946830126180016e-06, "loss": 0.2795, "step": 10542 }, { "epoch": 0.66, "grad_norm": 3.5233088589992856, "learning_rate": 2.6937792978004056e-06, "loss": 0.2677, "step": 10543 }, { "epoch": 0.66, "grad_norm": 2.1354782356786717, "learning_rate": 2.6928756786720026e-06, "loss": 0.2873, "step": 10544 }, { "epoch": 0.66, "grad_norm": 1.4270493297206324, "learning_rate": 2.691972155270286e-06, "loss": 0.2619, "step": 10545 }, { "epoch": 0.66, "grad_norm": 2.020419575907655, "learning_rate": 2.69106872763274e-06, "loss": 0.2749, "step": 10546 }, { "epoch": 0.66, "grad_norm": 1.7673720866108846, "learning_rate": 2.6901653957968577e-06, "loss": 0.2802, "step": 10547 }, { "epoch": 0.66, "grad_norm": 2.578574726522088, "learning_rate": 2.6892621598001157e-06, "loss": 0.2695, "step": 10548 }, { "epoch": 0.66, "grad_norm": 1.9129988722106914, "learning_rate": 2.6883590196799913e-06, "loss": 0.2724, "step": 10549 }, { "epoch": 0.66, "grad_norm": 2.9851458303620775, "learning_rate": 2.687455975473955e-06, "loss": 0.275, "step": 10550 }, { "epoch": 0.66, "grad_norm": 3.2185346204376484, "learning_rate": 2.6865530272194796e-06, "loss": 0.2657, "step": 10551 }, { "epoch": 0.66, "grad_norm": 2.017259300764955, "learning_rate": 2.6856501749540287e-06, "loss": 0.2625, "step": 10552 }, { "epoch": 0.66, "grad_norm": 1.8912869264547008, "learning_rate": 2.6847474187150603e-06, "loss": 0.256, "step": 10553 }, { "epoch": 0.66, "grad_norm": 1.946934685722033, "learning_rate": 2.683844758540034e-06, "loss": 0.2483, "step": 10554 }, { "epoch": 0.66, "grad_norm": 2.2920732445097354, "learning_rate": 2.682942194466405e-06, "loss": 0.2595, "step": 10555 }, { "epoch": 0.66, "grad_norm": 0.5986369436181105, "learning_rate": 2.682039726531619e-06, "loss": 0.482, "step": 10556 }, { "epoch": 0.66, "grad_norm": 4.0339234045457175, "learning_rate": 2.6811373547731224e-06, "loss": 0.2664, "step": 10557 }, { "epoch": 0.66, "grad_norm": 1.6464528118677992, "learning_rate": 2.680235079228354e-06, "loss": 0.2931, "step": 10558 }, { "epoch": 0.66, "grad_norm": 2.344006149002412, "learning_rate": 2.6793328999347546e-06, "loss": 0.2614, "step": 10559 }, { "epoch": 0.66, "grad_norm": 0.5891767476746761, "learning_rate": 2.6784308169297525e-06, "loss": 0.4672, "step": 10560 }, { "epoch": 0.66, "grad_norm": 1.2521914885194227, "learning_rate": 2.677528830250782e-06, "loss": 0.2643, "step": 10561 }, { "epoch": 0.66, "grad_norm": 0.5716718943515395, "learning_rate": 2.6766269399352628e-06, "loss": 0.4468, "step": 10562 }, { "epoch": 0.66, "grad_norm": 1.8879084440995968, "learning_rate": 2.6757251460206215e-06, "loss": 0.2757, "step": 10563 }, { "epoch": 0.66, "grad_norm": 1.6270430444033495, "learning_rate": 2.6748234485442713e-06, "loss": 0.2806, "step": 10564 }, { "epoch": 0.66, "grad_norm": 2.943469820556087, "learning_rate": 2.6739218475436267e-06, "loss": 0.2607, "step": 10565 }, { "epoch": 0.66, "grad_norm": 1.8286178782546847, "learning_rate": 2.6730203430560946e-06, "loss": 0.2743, "step": 10566 }, { "epoch": 0.66, "grad_norm": 1.9813317388532967, "learning_rate": 2.6721189351190835e-06, "loss": 0.2587, "step": 10567 }, { "epoch": 0.66, "grad_norm": 1.6332584518626676, "learning_rate": 2.6712176237699907e-06, "loss": 0.2704, "step": 10568 }, { "epoch": 0.66, "grad_norm": 2.8949438163141363, "learning_rate": 2.6703164090462164e-06, "loss": 0.2613, "step": 10569 }, { "epoch": 0.66, "grad_norm": 1.4791171219959713, "learning_rate": 2.66941529098515e-06, "loss": 0.2728, "step": 10570 }, { "epoch": 0.66, "grad_norm": 2.0272497452550495, "learning_rate": 2.668514269624186e-06, "loss": 0.253, "step": 10571 }, { "epoch": 0.66, "grad_norm": 2.5323856618951797, "learning_rate": 2.6676133450007053e-06, "loss": 0.267, "step": 10572 }, { "epoch": 0.66, "grad_norm": 2.572834718520556, "learning_rate": 2.66671251715209e-06, "loss": 0.2722, "step": 10573 }, { "epoch": 0.66, "grad_norm": 1.872051124777162, "learning_rate": 2.6658117861157146e-06, "loss": 0.2553, "step": 10574 }, { "epoch": 0.67, "grad_norm": 2.555297755025289, "learning_rate": 2.6649111519289537e-06, "loss": 0.281, "step": 10575 }, { "epoch": 0.67, "grad_norm": 1.6974026491636993, "learning_rate": 2.66401061462918e-06, "loss": 0.2612, "step": 10576 }, { "epoch": 0.67, "grad_norm": 2.7852817265602523, "learning_rate": 2.663110174253754e-06, "loss": 0.295, "step": 10577 }, { "epoch": 0.67, "grad_norm": 1.4251423398342096, "learning_rate": 2.6622098308400364e-06, "loss": 0.286, "step": 10578 }, { "epoch": 0.67, "grad_norm": 1.9080344692995987, "learning_rate": 2.6613095844253866e-06, "loss": 0.2485, "step": 10579 }, { "epoch": 0.67, "grad_norm": 1.5804396059148693, "learning_rate": 2.6604094350471564e-06, "loss": 0.2516, "step": 10580 }, { "epoch": 0.67, "grad_norm": 2.6728663237700423, "learning_rate": 2.6595093827426942e-06, "loss": 0.2954, "step": 10581 }, { "epoch": 0.67, "grad_norm": 2.7142380925622875, "learning_rate": 2.6586094275493435e-06, "loss": 0.2792, "step": 10582 }, { "epoch": 0.67, "grad_norm": 1.6355585114588722, "learning_rate": 2.6577095695044452e-06, "loss": 0.254, "step": 10583 }, { "epoch": 0.67, "grad_norm": 2.617590949599532, "learning_rate": 2.65680980864534e-06, "loss": 0.2618, "step": 10584 }, { "epoch": 0.67, "grad_norm": 4.231354730375861, "learning_rate": 2.655910145009358e-06, "loss": 0.2841, "step": 10585 }, { "epoch": 0.67, "grad_norm": 3.0582960429211345, "learning_rate": 2.6550105786338255e-06, "loss": 0.2628, "step": 10586 }, { "epoch": 0.67, "grad_norm": 2.722263420641252, "learning_rate": 2.6541111095560713e-06, "loss": 0.2611, "step": 10587 }, { "epoch": 0.67, "grad_norm": 1.7153358268931276, "learning_rate": 2.6532117378134138e-06, "loss": 0.2621, "step": 10588 }, { "epoch": 0.67, "grad_norm": 1.917108995509193, "learning_rate": 2.6523124634431698e-06, "loss": 0.2591, "step": 10589 }, { "epoch": 0.67, "grad_norm": 1.4969706127629552, "learning_rate": 2.6514132864826477e-06, "loss": 0.2656, "step": 10590 }, { "epoch": 0.67, "grad_norm": 4.9993401405330005, "learning_rate": 2.6505142069691636e-06, "loss": 0.2675, "step": 10591 }, { "epoch": 0.67, "grad_norm": 2.058974857078737, "learning_rate": 2.6496152249400187e-06, "loss": 0.2601, "step": 10592 }, { "epoch": 0.67, "grad_norm": 1.701421722075308, "learning_rate": 2.648716340432512e-06, "loss": 0.2703, "step": 10593 }, { "epoch": 0.67, "grad_norm": 1.6556566591839421, "learning_rate": 2.647817553483939e-06, "loss": 0.2735, "step": 10594 }, { "epoch": 0.67, "grad_norm": 1.7339461124670978, "learning_rate": 2.646918864131596e-06, "loss": 0.261, "step": 10595 }, { "epoch": 0.67, "grad_norm": 2.31932599983908, "learning_rate": 2.6460202724127693e-06, "loss": 0.2598, "step": 10596 }, { "epoch": 0.67, "grad_norm": 1.7623526986928681, "learning_rate": 2.645121778364742e-06, "loss": 0.2889, "step": 10597 }, { "epoch": 0.67, "grad_norm": 2.1739857003218686, "learning_rate": 2.644223382024791e-06, "loss": 0.2743, "step": 10598 }, { "epoch": 0.67, "grad_norm": 2.187989754663518, "learning_rate": 2.6433250834301998e-06, "loss": 0.2596, "step": 10599 }, { "epoch": 0.67, "grad_norm": 3.4144606729819666, "learning_rate": 2.6424268826182377e-06, "loss": 0.2605, "step": 10600 }, { "epoch": 0.67, "grad_norm": 2.590019006914779, "learning_rate": 2.6415287796261707e-06, "loss": 0.2806, "step": 10601 }, { "epoch": 0.67, "grad_norm": 2.5424578813493213, "learning_rate": 2.640630774491262e-06, "loss": 0.2729, "step": 10602 }, { "epoch": 0.67, "grad_norm": 2.7529690132675753, "learning_rate": 2.639732867250776e-06, "loss": 0.2659, "step": 10603 }, { "epoch": 0.67, "grad_norm": 1.8640262209601803, "learning_rate": 2.6388350579419646e-06, "loss": 0.2669, "step": 10604 }, { "epoch": 0.67, "grad_norm": 3.213846652562735, "learning_rate": 2.637937346602079e-06, "loss": 0.2823, "step": 10605 }, { "epoch": 0.67, "grad_norm": 3.6387929469524227, "learning_rate": 2.6370397332683684e-06, "loss": 0.2788, "step": 10606 }, { "epoch": 0.67, "grad_norm": 1.876297500016865, "learning_rate": 2.636142217978078e-06, "loss": 0.2602, "step": 10607 }, { "epoch": 0.67, "grad_norm": 3.678408696886539, "learning_rate": 2.6352448007684466e-06, "loss": 0.2888, "step": 10608 }, { "epoch": 0.67, "grad_norm": 1.9014697504973876, "learning_rate": 2.634347481676708e-06, "loss": 0.2664, "step": 10609 }, { "epoch": 0.67, "grad_norm": 2.0913355328851155, "learning_rate": 2.6334502607400923e-06, "loss": 0.2727, "step": 10610 }, { "epoch": 0.67, "grad_norm": 4.120909900148851, "learning_rate": 2.6325531379958314e-06, "loss": 0.2548, "step": 10611 }, { "epoch": 0.67, "grad_norm": 1.507661159249386, "learning_rate": 2.631656113481145e-06, "loss": 0.2621, "step": 10612 }, { "epoch": 0.67, "grad_norm": 2.049524855023579, "learning_rate": 2.6307591872332514e-06, "loss": 0.2565, "step": 10613 }, { "epoch": 0.67, "grad_norm": 1.9332949503970627, "learning_rate": 2.6298623592893676e-06, "loss": 0.2923, "step": 10614 }, { "epoch": 0.67, "grad_norm": 1.6203202242253394, "learning_rate": 2.628965629686706e-06, "loss": 0.2765, "step": 10615 }, { "epoch": 0.67, "grad_norm": 1.835225537225523, "learning_rate": 2.628068998462472e-06, "loss": 0.2629, "step": 10616 }, { "epoch": 0.67, "grad_norm": 2.322081098431167, "learning_rate": 2.627172465653868e-06, "loss": 0.2647, "step": 10617 }, { "epoch": 0.67, "grad_norm": 2.072891592974846, "learning_rate": 2.6262760312980914e-06, "loss": 0.2528, "step": 10618 }, { "epoch": 0.67, "grad_norm": 3.4635736856140795, "learning_rate": 2.62537969543234e-06, "loss": 0.2692, "step": 10619 }, { "epoch": 0.67, "grad_norm": 1.6192277971923672, "learning_rate": 2.6244834580938016e-06, "loss": 0.2586, "step": 10620 }, { "epoch": 0.67, "grad_norm": 6.180052056443318, "learning_rate": 2.623587319319665e-06, "loss": 0.2631, "step": 10621 }, { "epoch": 0.67, "grad_norm": 2.1416244976145387, "learning_rate": 2.6226912791471103e-06, "loss": 0.2495, "step": 10622 }, { "epoch": 0.67, "grad_norm": 1.4886024493190455, "learning_rate": 2.6217953376133187e-06, "loss": 0.2619, "step": 10623 }, { "epoch": 0.67, "grad_norm": 2.510835148698963, "learning_rate": 2.6208994947554626e-06, "loss": 0.2615, "step": 10624 }, { "epoch": 0.67, "grad_norm": 2.400618012820244, "learning_rate": 2.620003750610712e-06, "loss": 0.2911, "step": 10625 }, { "epoch": 0.67, "grad_norm": 2.696640786017668, "learning_rate": 2.6191081052162315e-06, "loss": 0.2587, "step": 10626 }, { "epoch": 0.67, "grad_norm": 1.2871950304100317, "learning_rate": 2.6182125586091867e-06, "loss": 0.3058, "step": 10627 }, { "epoch": 0.67, "grad_norm": 1.6835677362491857, "learning_rate": 2.6173171108267316e-06, "loss": 0.2666, "step": 10628 }, { "epoch": 0.67, "grad_norm": 10.260840460070348, "learning_rate": 2.6164217619060234e-06, "loss": 0.2759, "step": 10629 }, { "epoch": 0.67, "grad_norm": 1.3818412361050458, "learning_rate": 2.615526511884208e-06, "loss": 0.2622, "step": 10630 }, { "epoch": 0.67, "grad_norm": 1.523975425219532, "learning_rate": 2.6146313607984355e-06, "loss": 0.2578, "step": 10631 }, { "epoch": 0.67, "grad_norm": 1.8168268645072094, "learning_rate": 2.6137363086858435e-06, "loss": 0.2821, "step": 10632 }, { "epoch": 0.67, "grad_norm": 1.7028858622010878, "learning_rate": 2.612841355583571e-06, "loss": 0.2742, "step": 10633 }, { "epoch": 0.67, "grad_norm": 2.2438395892141583, "learning_rate": 2.6119465015287493e-06, "loss": 0.2598, "step": 10634 }, { "epoch": 0.67, "grad_norm": 1.5397188637521444, "learning_rate": 2.6110517465585085e-06, "loss": 0.2648, "step": 10635 }, { "epoch": 0.67, "grad_norm": 1.8103106211290358, "learning_rate": 2.610157090709976e-06, "loss": 0.2634, "step": 10636 }, { "epoch": 0.67, "grad_norm": 2.546450720595291, "learning_rate": 2.6092625340202703e-06, "loss": 0.2728, "step": 10637 }, { "epoch": 0.67, "grad_norm": 2.8677470316918625, "learning_rate": 2.6083680765265073e-06, "loss": 0.266, "step": 10638 }, { "epoch": 0.67, "grad_norm": 1.6099796704289258, "learning_rate": 2.607473718265802e-06, "loss": 0.2597, "step": 10639 }, { "epoch": 0.67, "grad_norm": 1.8982228396210488, "learning_rate": 2.6065794592752623e-06, "loss": 0.2763, "step": 10640 }, { "epoch": 0.67, "grad_norm": 1.7486909185828226, "learning_rate": 2.6056852995919918e-06, "loss": 0.268, "step": 10641 }, { "epoch": 0.67, "grad_norm": 2.8147658964351727, "learning_rate": 2.604791239253089e-06, "loss": 0.2676, "step": 10642 }, { "epoch": 0.67, "grad_norm": 16.94077740106074, "learning_rate": 2.6038972782956516e-06, "loss": 0.2558, "step": 10643 }, { "epoch": 0.67, "grad_norm": 2.0246544360765015, "learning_rate": 2.6030034167567752e-06, "loss": 0.2569, "step": 10644 }, { "epoch": 0.67, "grad_norm": 1.750501290100431, "learning_rate": 2.6021096546735438e-06, "loss": 0.2758, "step": 10645 }, { "epoch": 0.67, "grad_norm": 1.559194212820961, "learning_rate": 2.60121599208304e-06, "loss": 0.2756, "step": 10646 }, { "epoch": 0.67, "grad_norm": 1.4951093445217885, "learning_rate": 2.600322429022347e-06, "loss": 0.2699, "step": 10647 }, { "epoch": 0.67, "grad_norm": 2.6597456654934737, "learning_rate": 2.5994289655285396e-06, "loss": 0.2889, "step": 10648 }, { "epoch": 0.67, "grad_norm": 1.5161201312991648, "learning_rate": 2.5985356016386883e-06, "loss": 0.2552, "step": 10649 }, { "epoch": 0.67, "grad_norm": 2.0867449553152335, "learning_rate": 2.597642337389858e-06, "loss": 0.2597, "step": 10650 }, { "epoch": 0.67, "grad_norm": 2.2875975532603436, "learning_rate": 2.596749172819114e-06, "loss": 0.2604, "step": 10651 }, { "epoch": 0.67, "grad_norm": 1.5626438460521015, "learning_rate": 2.595856107963518e-06, "loss": 0.2703, "step": 10652 }, { "epoch": 0.67, "grad_norm": 3.6844249062005776, "learning_rate": 2.5949631428601218e-06, "loss": 0.2707, "step": 10653 }, { "epoch": 0.67, "grad_norm": 2.084937417918076, "learning_rate": 2.594070277545975e-06, "loss": 0.2796, "step": 10654 }, { "epoch": 0.67, "grad_norm": 1.587092909145393, "learning_rate": 2.593177512058127e-06, "loss": 0.2475, "step": 10655 }, { "epoch": 0.67, "grad_norm": 0.6498549567788552, "learning_rate": 2.5922848464336203e-06, "loss": 0.4876, "step": 10656 }, { "epoch": 0.67, "grad_norm": 3.9631345189313008, "learning_rate": 2.5913922807094894e-06, "loss": 0.2555, "step": 10657 }, { "epoch": 0.67, "grad_norm": 3.348123254239563, "learning_rate": 2.590499814922772e-06, "loss": 0.2619, "step": 10658 }, { "epoch": 0.67, "grad_norm": 1.464586238642134, "learning_rate": 2.5896074491104963e-06, "loss": 0.2535, "step": 10659 }, { "epoch": 0.67, "grad_norm": 1.929509701228109, "learning_rate": 2.58871518330969e-06, "loss": 0.2537, "step": 10660 }, { "epoch": 0.67, "grad_norm": 0.5794823207730463, "learning_rate": 2.5878230175573743e-06, "loss": 0.49, "step": 10661 }, { "epoch": 0.67, "grad_norm": 2.523906905390259, "learning_rate": 2.586930951890564e-06, "loss": 0.2549, "step": 10662 }, { "epoch": 0.67, "grad_norm": 2.0591089722327425, "learning_rate": 2.5860389863462765e-06, "loss": 0.2622, "step": 10663 }, { "epoch": 0.67, "grad_norm": 4.3123396773365945, "learning_rate": 2.5851471209615186e-06, "loss": 0.2466, "step": 10664 }, { "epoch": 0.67, "grad_norm": 1.9443522141802896, "learning_rate": 2.5842553557732953e-06, "loss": 0.2563, "step": 10665 }, { "epoch": 0.67, "grad_norm": 1.588902182894885, "learning_rate": 2.5833636908186064e-06, "loss": 0.269, "step": 10666 }, { "epoch": 0.67, "grad_norm": 5.672968990815488, "learning_rate": 2.582472126134454e-06, "loss": 0.2626, "step": 10667 }, { "epoch": 0.67, "grad_norm": 2.8489354443635064, "learning_rate": 2.581580661757826e-06, "loss": 0.2627, "step": 10668 }, { "epoch": 0.67, "grad_norm": 2.4598953094511153, "learning_rate": 2.5806892977257126e-06, "loss": 0.2863, "step": 10669 }, { "epoch": 0.67, "grad_norm": 1.6698336809258087, "learning_rate": 2.579798034075095e-06, "loss": 0.2629, "step": 10670 }, { "epoch": 0.67, "grad_norm": 2.629764415185163, "learning_rate": 2.5789068708429576e-06, "loss": 0.2706, "step": 10671 }, { "epoch": 0.67, "grad_norm": 2.0127877473725233, "learning_rate": 2.578015808066273e-06, "loss": 0.2604, "step": 10672 }, { "epoch": 0.67, "grad_norm": 2.507531804997822, "learning_rate": 2.5771248457820165e-06, "loss": 0.2522, "step": 10673 }, { "epoch": 0.67, "grad_norm": 1.7626399370202608, "learning_rate": 2.5762339840271513e-06, "loss": 0.255, "step": 10674 }, { "epoch": 0.67, "grad_norm": 1.5260960041805964, "learning_rate": 2.575343222838645e-06, "loss": 0.2523, "step": 10675 }, { "epoch": 0.67, "grad_norm": 1.6774592924681375, "learning_rate": 2.574452562253455e-06, "loss": 0.2879, "step": 10676 }, { "epoch": 0.67, "grad_norm": 2.077359334144614, "learning_rate": 2.5735620023085367e-06, "loss": 0.2625, "step": 10677 }, { "epoch": 0.67, "grad_norm": 2.2258704980151838, "learning_rate": 2.572671543040839e-06, "loss": 0.2721, "step": 10678 }, { "epoch": 0.67, "grad_norm": 4.357607266475697, "learning_rate": 2.571781184487312e-06, "loss": 0.2816, "step": 10679 }, { "epoch": 0.67, "grad_norm": 1.4169846712490002, "learning_rate": 2.570890926684895e-06, "loss": 0.2798, "step": 10680 }, { "epoch": 0.67, "grad_norm": 2.784672021914526, "learning_rate": 2.57000076967053e-06, "loss": 0.2625, "step": 10681 }, { "epoch": 0.67, "grad_norm": 2.240635996638068, "learning_rate": 2.569110713481147e-06, "loss": 0.2633, "step": 10682 }, { "epoch": 0.67, "grad_norm": 1.615192306625452, "learning_rate": 2.56822075815368e-06, "loss": 0.2609, "step": 10683 }, { "epoch": 0.67, "grad_norm": 1.6451409067071499, "learning_rate": 2.567330903725054e-06, "loss": 0.276, "step": 10684 }, { "epoch": 0.67, "grad_norm": 1.3289556990694675, "learning_rate": 2.566441150232189e-06, "loss": 0.276, "step": 10685 }, { "epoch": 0.67, "grad_norm": 1.7213448987786697, "learning_rate": 2.5655514977120013e-06, "loss": 0.2786, "step": 10686 }, { "epoch": 0.67, "grad_norm": 2.4652025240297863, "learning_rate": 2.5646619462014062e-06, "loss": 0.2752, "step": 10687 }, { "epoch": 0.67, "grad_norm": 2.09302677785505, "learning_rate": 2.5637724957373144e-06, "loss": 0.2726, "step": 10688 }, { "epoch": 0.67, "grad_norm": 2.7267881270737377, "learning_rate": 2.5628831463566285e-06, "loss": 0.2611, "step": 10689 }, { "epoch": 0.67, "grad_norm": 7.744178529246718, "learning_rate": 2.561993898096249e-06, "loss": 0.2617, "step": 10690 }, { "epoch": 0.67, "grad_norm": 1.8202378836545405, "learning_rate": 2.5611047509930737e-06, "loss": 0.259, "step": 10691 }, { "epoch": 0.67, "grad_norm": 1.862782784274091, "learning_rate": 2.560215705083995e-06, "loss": 0.2537, "step": 10692 }, { "epoch": 0.67, "grad_norm": 2.8719111448227204, "learning_rate": 2.5593267604058998e-06, "loss": 0.2604, "step": 10693 }, { "epoch": 0.67, "grad_norm": 4.139683591121575, "learning_rate": 2.558437916995671e-06, "loss": 0.2696, "step": 10694 }, { "epoch": 0.67, "grad_norm": 3.59879788854556, "learning_rate": 2.5575491748901892e-06, "loss": 0.2744, "step": 10695 }, { "epoch": 0.67, "grad_norm": 1.9827703484918457, "learning_rate": 2.5566605341263322e-06, "loss": 0.2725, "step": 10696 }, { "epoch": 0.67, "grad_norm": 1.7400888471913067, "learning_rate": 2.55577199474097e-06, "loss": 0.2694, "step": 10697 }, { "epoch": 0.67, "grad_norm": 0.6260521221584945, "learning_rate": 2.5548835567709672e-06, "loss": 0.5226, "step": 10698 }, { "epoch": 0.67, "grad_norm": 1.9467036804749311, "learning_rate": 2.5539952202531905e-06, "loss": 0.2886, "step": 10699 }, { "epoch": 0.67, "grad_norm": 1.751481328040724, "learning_rate": 2.5531069852244968e-06, "loss": 0.2718, "step": 10700 }, { "epoch": 0.67, "grad_norm": 0.5923047119665359, "learning_rate": 2.5522188517217404e-06, "loss": 0.4505, "step": 10701 }, { "epoch": 0.67, "grad_norm": 1.921487192719859, "learning_rate": 2.551330819781769e-06, "loss": 0.261, "step": 10702 }, { "epoch": 0.67, "grad_norm": 0.5990451619774673, "learning_rate": 2.5504428894414323e-06, "loss": 0.4841, "step": 10703 }, { "epoch": 0.67, "grad_norm": 2.3902303604684936, "learning_rate": 2.5495550607375723e-06, "loss": 0.2637, "step": 10704 }, { "epoch": 0.67, "grad_norm": 1.8548515651860293, "learning_rate": 2.5486673337070254e-06, "loss": 0.2663, "step": 10705 }, { "epoch": 0.67, "grad_norm": 5.238012333062435, "learning_rate": 2.5477797083866227e-06, "loss": 0.2698, "step": 10706 }, { "epoch": 0.67, "grad_norm": 1.7506722369020096, "learning_rate": 2.5468921848131984e-06, "loss": 0.2593, "step": 10707 }, { "epoch": 0.67, "grad_norm": 1.8529942145505167, "learning_rate": 2.546004763023574e-06, "loss": 0.2758, "step": 10708 }, { "epoch": 0.67, "grad_norm": 2.5379960918388584, "learning_rate": 2.5451174430545704e-06, "loss": 0.2713, "step": 10709 }, { "epoch": 0.67, "grad_norm": 1.5288099794809744, "learning_rate": 2.5442302249430027e-06, "loss": 0.2624, "step": 10710 }, { "epoch": 0.67, "grad_norm": 7.197753413877993, "learning_rate": 2.5433431087256853e-06, "loss": 0.2876, "step": 10711 }, { "epoch": 0.67, "grad_norm": 1.7506673837411795, "learning_rate": 2.542456094439427e-06, "loss": 0.247, "step": 10712 }, { "epoch": 0.67, "grad_norm": 1.4603022809404356, "learning_rate": 2.5415691821210314e-06, "loss": 0.2709, "step": 10713 }, { "epoch": 0.67, "grad_norm": 1.8888431331421702, "learning_rate": 2.540682371807295e-06, "loss": 0.2782, "step": 10714 }, { "epoch": 0.67, "grad_norm": 2.0378610460366953, "learning_rate": 2.5397956635350164e-06, "loss": 0.257, "step": 10715 }, { "epoch": 0.67, "grad_norm": 2.4467611374502765, "learning_rate": 2.5389090573409863e-06, "loss": 0.2741, "step": 10716 }, { "epoch": 0.67, "grad_norm": 1.68515593439781, "learning_rate": 2.5380225532619886e-06, "loss": 0.2544, "step": 10717 }, { "epoch": 0.67, "grad_norm": 2.8409282143245345, "learning_rate": 2.5371361513348103e-06, "loss": 0.2771, "step": 10718 }, { "epoch": 0.67, "grad_norm": 2.6940132298961257, "learning_rate": 2.5362498515962253e-06, "loss": 0.2586, "step": 10719 }, { "epoch": 0.67, "grad_norm": 2.183619629229947, "learning_rate": 2.5353636540830117e-06, "loss": 0.2792, "step": 10720 }, { "epoch": 0.67, "grad_norm": 1.510278437598951, "learning_rate": 2.534477558831938e-06, "loss": 0.259, "step": 10721 }, { "epoch": 0.67, "grad_norm": 1.6693127198703324, "learning_rate": 2.533591565879768e-06, "loss": 0.265, "step": 10722 }, { "epoch": 0.67, "grad_norm": 1.8263344850511583, "learning_rate": 2.5327056752632674e-06, "loss": 0.2586, "step": 10723 }, { "epoch": 0.67, "grad_norm": 2.214349382760798, "learning_rate": 2.5318198870191877e-06, "loss": 0.2747, "step": 10724 }, { "epoch": 0.67, "grad_norm": 1.6365207180987742, "learning_rate": 2.530934201184287e-06, "loss": 0.2497, "step": 10725 }, { "epoch": 0.67, "grad_norm": 2.2915169082627025, "learning_rate": 2.5300486177953123e-06, "loss": 0.2671, "step": 10726 }, { "epoch": 0.67, "grad_norm": 3.352748126876919, "learning_rate": 2.5291631368890047e-06, "loss": 0.2549, "step": 10727 }, { "epoch": 0.67, "grad_norm": 2.764105496513473, "learning_rate": 2.52827775850211e-06, "loss": 0.2692, "step": 10728 }, { "epoch": 0.67, "grad_norm": 1.7484368178982617, "learning_rate": 2.5273924826713615e-06, "loss": 0.2528, "step": 10729 }, { "epoch": 0.67, "grad_norm": 1.6640790963285514, "learning_rate": 2.526507309433488e-06, "loss": 0.2734, "step": 10730 }, { "epoch": 0.67, "grad_norm": 4.623934747375392, "learning_rate": 2.5256222388252223e-06, "loss": 0.2813, "step": 10731 }, { "epoch": 0.67, "grad_norm": 1.731381905726239, "learning_rate": 2.524737270883283e-06, "loss": 0.2752, "step": 10732 }, { "epoch": 0.67, "grad_norm": 3.9969171361101767, "learning_rate": 2.5238524056443923e-06, "loss": 0.2743, "step": 10733 }, { "epoch": 0.68, "grad_norm": 0.6007300200548918, "learning_rate": 2.522967643145263e-06, "loss": 0.493, "step": 10734 }, { "epoch": 0.68, "grad_norm": 3.0248368372994143, "learning_rate": 2.5220829834226052e-06, "loss": 0.2668, "step": 10735 }, { "epoch": 0.68, "grad_norm": 2.9164470473306445, "learning_rate": 2.521198426513127e-06, "loss": 0.2797, "step": 10736 }, { "epoch": 0.68, "grad_norm": 3.4311060110477762, "learning_rate": 2.520313972453529e-06, "loss": 0.2667, "step": 10737 }, { "epoch": 0.68, "grad_norm": 5.356554789475532, "learning_rate": 2.519429621280507e-06, "loss": 0.2399, "step": 10738 }, { "epoch": 0.68, "grad_norm": 2.921065855938856, "learning_rate": 2.518545373030755e-06, "loss": 0.2678, "step": 10739 }, { "epoch": 0.68, "grad_norm": 2.897986968370867, "learning_rate": 2.5176612277409663e-06, "loss": 0.2874, "step": 10740 }, { "epoch": 0.68, "grad_norm": 1.5972060760578377, "learning_rate": 2.516777185447822e-06, "loss": 0.2643, "step": 10741 }, { "epoch": 0.68, "grad_norm": 1.8534673337783578, "learning_rate": 2.5158932461880025e-06, "loss": 0.2643, "step": 10742 }, { "epoch": 0.68, "grad_norm": 1.9849075715174838, "learning_rate": 2.5150094099981824e-06, "loss": 0.25, "step": 10743 }, { "epoch": 0.68, "grad_norm": 1.9097060867123994, "learning_rate": 2.5141256769150384e-06, "loss": 0.2774, "step": 10744 }, { "epoch": 0.68, "grad_norm": 1.6541412249210272, "learning_rate": 2.513242046975235e-06, "loss": 0.2625, "step": 10745 }, { "epoch": 0.68, "grad_norm": 1.9101433073194343, "learning_rate": 2.5123585202154345e-06, "loss": 0.2517, "step": 10746 }, { "epoch": 0.68, "grad_norm": 4.61749849088729, "learning_rate": 2.511475096672298e-06, "loss": 0.2729, "step": 10747 }, { "epoch": 0.68, "grad_norm": 1.8666139577702474, "learning_rate": 2.5105917763824807e-06, "loss": 0.2888, "step": 10748 }, { "epoch": 0.68, "grad_norm": 3.5604030068521526, "learning_rate": 2.5097085593826324e-06, "loss": 0.2559, "step": 10749 }, { "epoch": 0.68, "grad_norm": 1.6913263652043975, "learning_rate": 2.5088254457093976e-06, "loss": 0.2734, "step": 10750 }, { "epoch": 0.68, "grad_norm": 1.887452588989263, "learning_rate": 2.507942435399422e-06, "loss": 0.2699, "step": 10751 }, { "epoch": 0.68, "grad_norm": 1.9885788573119678, "learning_rate": 2.5070595284893405e-06, "loss": 0.2784, "step": 10752 }, { "epoch": 0.68, "grad_norm": 2.321763075651108, "learning_rate": 2.5061767250157883e-06, "loss": 0.2862, "step": 10753 }, { "epoch": 0.68, "grad_norm": 2.4527588060279713, "learning_rate": 2.50529402501539e-06, "loss": 0.2875, "step": 10754 }, { "epoch": 0.68, "grad_norm": 2.9871044033792176, "learning_rate": 2.5044114285247747e-06, "loss": 0.2728, "step": 10755 }, { "epoch": 0.68, "grad_norm": 4.427113811441325, "learning_rate": 2.5035289355805634e-06, "loss": 0.27, "step": 10756 }, { "epoch": 0.68, "grad_norm": 2.5494336135191533, "learning_rate": 2.502646546219371e-06, "loss": 0.26, "step": 10757 }, { "epoch": 0.68, "grad_norm": 1.7655865115779152, "learning_rate": 2.501764260477807e-06, "loss": 0.2568, "step": 10758 }, { "epoch": 0.68, "grad_norm": 2.0906854762511573, "learning_rate": 2.5008820783924838e-06, "loss": 0.2565, "step": 10759 }, { "epoch": 0.68, "grad_norm": 2.466375728740451, "learning_rate": 2.5000000000000015e-06, "loss": 0.2751, "step": 10760 }, { "epoch": 0.68, "grad_norm": 1.912316393244981, "learning_rate": 2.4991180253369594e-06, "loss": 0.2599, "step": 10761 }, { "epoch": 0.68, "grad_norm": 4.0501135923341485, "learning_rate": 2.4982361544399507e-06, "loss": 0.2634, "step": 10762 }, { "epoch": 0.68, "grad_norm": 2.086258207425374, "learning_rate": 2.497354387345568e-06, "loss": 0.2649, "step": 10763 }, { "epoch": 0.68, "grad_norm": 1.8297431359179088, "learning_rate": 2.496472724090399e-06, "loss": 0.2745, "step": 10764 }, { "epoch": 0.68, "grad_norm": 1.8082842533895456, "learning_rate": 2.495591164711023e-06, "loss": 0.2486, "step": 10765 }, { "epoch": 0.68, "grad_norm": 2.9832680845163195, "learning_rate": 2.4947097092440158e-06, "loss": 0.2703, "step": 10766 }, { "epoch": 0.68, "grad_norm": 1.5536316284314864, "learning_rate": 2.4938283577259555e-06, "loss": 0.2626, "step": 10767 }, { "epoch": 0.68, "grad_norm": 1.7983514948701425, "learning_rate": 2.492947110193407e-06, "loss": 0.2522, "step": 10768 }, { "epoch": 0.68, "grad_norm": 1.964056558789052, "learning_rate": 2.492065966682935e-06, "loss": 0.2577, "step": 10769 }, { "epoch": 0.68, "grad_norm": 0.6199639346245456, "learning_rate": 2.4911849272311015e-06, "loss": 0.4848, "step": 10770 }, { "epoch": 0.68, "grad_norm": 2.102287851861108, "learning_rate": 2.49030399187446e-06, "loss": 0.2702, "step": 10771 }, { "epoch": 0.68, "grad_norm": 1.610894320541659, "learning_rate": 2.4894231606495663e-06, "loss": 0.2717, "step": 10772 }, { "epoch": 0.68, "grad_norm": 2.1510178553218715, "learning_rate": 2.488542433592964e-06, "loss": 0.2612, "step": 10773 }, { "epoch": 0.68, "grad_norm": 2.5313128153843305, "learning_rate": 2.487661810741196e-06, "loss": 0.275, "step": 10774 }, { "epoch": 0.68, "grad_norm": 1.9076731489355447, "learning_rate": 2.486781292130803e-06, "loss": 0.2698, "step": 10775 }, { "epoch": 0.68, "grad_norm": 2.0592369070006264, "learning_rate": 2.4859008777983183e-06, "loss": 0.2503, "step": 10776 }, { "epoch": 0.68, "grad_norm": 1.5654897475717329, "learning_rate": 2.4850205677802703e-06, "loss": 0.2477, "step": 10777 }, { "epoch": 0.68, "grad_norm": 2.043550501093226, "learning_rate": 2.484140362113187e-06, "loss": 0.2689, "step": 10778 }, { "epoch": 0.68, "grad_norm": 2.541976162692309, "learning_rate": 2.4832602608335874e-06, "loss": 0.2726, "step": 10779 }, { "epoch": 0.68, "grad_norm": 3.6565962060903403, "learning_rate": 2.482380263977992e-06, "loss": 0.2785, "step": 10780 }, { "epoch": 0.68, "grad_norm": 2.551375349168563, "learning_rate": 2.481500371582911e-06, "loss": 0.2544, "step": 10781 }, { "epoch": 0.68, "grad_norm": 2.989536025504863, "learning_rate": 2.4806205836848505e-06, "loss": 0.2705, "step": 10782 }, { "epoch": 0.68, "grad_norm": 3.8500592155614894, "learning_rate": 2.4797409003203198e-06, "loss": 0.2571, "step": 10783 }, { "epoch": 0.68, "grad_norm": 2.7112224868963137, "learning_rate": 2.4788613215258134e-06, "loss": 0.2846, "step": 10784 }, { "epoch": 0.68, "grad_norm": 1.6933863777923017, "learning_rate": 2.4779818473378303e-06, "loss": 0.266, "step": 10785 }, { "epoch": 0.68, "grad_norm": 5.859174589384238, "learning_rate": 2.4771024777928605e-06, "loss": 0.2869, "step": 10786 }, { "epoch": 0.68, "grad_norm": 1.708089033096962, "learning_rate": 2.4762232129273876e-06, "loss": 0.2466, "step": 10787 }, { "epoch": 0.68, "grad_norm": 2.518585611091397, "learning_rate": 2.475344052777898e-06, "loss": 0.2499, "step": 10788 }, { "epoch": 0.68, "grad_norm": 0.6049145839246542, "learning_rate": 2.474464997380869e-06, "loss": 0.4662, "step": 10789 }, { "epoch": 0.68, "grad_norm": 2.2675838406970885, "learning_rate": 2.4735860467727705e-06, "loss": 0.267, "step": 10790 }, { "epoch": 0.68, "grad_norm": 4.825313867014563, "learning_rate": 2.4727072009900764e-06, "loss": 0.2668, "step": 10791 }, { "epoch": 0.68, "grad_norm": 3.6622286202561916, "learning_rate": 2.4718284600692476e-06, "loss": 0.2778, "step": 10792 }, { "epoch": 0.68, "grad_norm": 1.9039837610390817, "learning_rate": 2.4709498240467484e-06, "loss": 0.2611, "step": 10793 }, { "epoch": 0.68, "grad_norm": 6.9378279982357665, "learning_rate": 2.470071292959033e-06, "loss": 0.2796, "step": 10794 }, { "epoch": 0.68, "grad_norm": 1.913454823643306, "learning_rate": 2.469192866842552e-06, "loss": 0.2654, "step": 10795 }, { "epoch": 0.68, "grad_norm": 1.4094865781866206, "learning_rate": 2.4683145457337553e-06, "loss": 0.2606, "step": 10796 }, { "epoch": 0.68, "grad_norm": 4.386602186222407, "learning_rate": 2.467436329669085e-06, "loss": 0.2776, "step": 10797 }, { "epoch": 0.68, "grad_norm": 2.55838764175605, "learning_rate": 2.4665582186849783e-06, "loss": 0.2859, "step": 10798 }, { "epoch": 0.68, "grad_norm": 2.9091561101391172, "learning_rate": 2.465680212817871e-06, "loss": 0.2752, "step": 10799 }, { "epoch": 0.68, "grad_norm": 2.9170688023309563, "learning_rate": 2.464802312104195e-06, "loss": 0.2616, "step": 10800 }, { "epoch": 0.68, "grad_norm": 1.857867731191885, "learning_rate": 2.4639245165803732e-06, "loss": 0.2678, "step": 10801 }, { "epoch": 0.68, "grad_norm": 1.8543701442588305, "learning_rate": 2.4630468262828283e-06, "loss": 0.2689, "step": 10802 }, { "epoch": 0.68, "grad_norm": 0.586511193132362, "learning_rate": 2.4621692412479753e-06, "loss": 0.455, "step": 10803 }, { "epoch": 0.68, "grad_norm": 1.9110652553644543, "learning_rate": 2.4612917615122293e-06, "loss": 0.2768, "step": 10804 }, { "epoch": 0.68, "grad_norm": 6.711422682568751, "learning_rate": 2.460414387111998e-06, "loss": 0.2612, "step": 10805 }, { "epoch": 0.68, "grad_norm": 3.575483264792797, "learning_rate": 2.4595371180836823e-06, "loss": 0.272, "step": 10806 }, { "epoch": 0.68, "grad_norm": 0.5967939552732896, "learning_rate": 2.458659954463684e-06, "loss": 0.4742, "step": 10807 }, { "epoch": 0.68, "grad_norm": 1.608502248002645, "learning_rate": 2.4577828962884005e-06, "loss": 0.2655, "step": 10808 }, { "epoch": 0.68, "grad_norm": 4.121806103608374, "learning_rate": 2.45690594359422e-06, "loss": 0.2745, "step": 10809 }, { "epoch": 0.68, "grad_norm": 1.782328293865478, "learning_rate": 2.4560290964175288e-06, "loss": 0.2619, "step": 10810 }, { "epoch": 0.68, "grad_norm": 2.3729383561164705, "learning_rate": 2.4551523547947066e-06, "loss": 0.2568, "step": 10811 }, { "epoch": 0.68, "grad_norm": 1.8005132301573186, "learning_rate": 2.454275718762136e-06, "loss": 0.268, "step": 10812 }, { "epoch": 0.68, "grad_norm": 2.2151613944442685, "learning_rate": 2.4533991883561868e-06, "loss": 0.2606, "step": 10813 }, { "epoch": 0.68, "grad_norm": 2.80747473819906, "learning_rate": 2.452522763613227e-06, "loss": 0.2608, "step": 10814 }, { "epoch": 0.68, "grad_norm": 1.666388877098076, "learning_rate": 2.4516464445696223e-06, "loss": 0.2583, "step": 10815 }, { "epoch": 0.68, "grad_norm": 2.739491756727603, "learning_rate": 2.4507702312617353e-06, "loss": 0.2749, "step": 10816 }, { "epoch": 0.68, "grad_norm": 8.52263683453807, "learning_rate": 2.4498941237259195e-06, "loss": 0.2863, "step": 10817 }, { "epoch": 0.68, "grad_norm": 2.2901257801936823, "learning_rate": 2.449018121998526e-06, "loss": 0.2567, "step": 10818 }, { "epoch": 0.68, "grad_norm": 2.4581399080578046, "learning_rate": 2.4481422261158995e-06, "loss": 0.269, "step": 10819 }, { "epoch": 0.68, "grad_norm": 5.349425912986184, "learning_rate": 2.4472664361143868e-06, "loss": 0.2516, "step": 10820 }, { "epoch": 0.68, "grad_norm": 1.2633289576747733, "learning_rate": 2.4463907520303216e-06, "loss": 0.2705, "step": 10821 }, { "epoch": 0.68, "grad_norm": 2.1854781840997743, "learning_rate": 2.4455151739000415e-06, "loss": 0.2529, "step": 10822 }, { "epoch": 0.68, "grad_norm": 1.9687167937030654, "learning_rate": 2.4446397017598726e-06, "loss": 0.2765, "step": 10823 }, { "epoch": 0.68, "grad_norm": 1.8201049864582572, "learning_rate": 2.443764335646143e-06, "loss": 0.2695, "step": 10824 }, { "epoch": 0.68, "grad_norm": 2.0189675172167174, "learning_rate": 2.4428890755951717e-06, "loss": 0.2641, "step": 10825 }, { "epoch": 0.68, "grad_norm": 2.149857973922792, "learning_rate": 2.4420139216432746e-06, "loss": 0.2684, "step": 10826 }, { "epoch": 0.68, "grad_norm": 5.691615332807052, "learning_rate": 2.441138873826761e-06, "loss": 0.286, "step": 10827 }, { "epoch": 0.68, "grad_norm": 1.4865271893224243, "learning_rate": 2.4402639321819433e-06, "loss": 0.2649, "step": 10828 }, { "epoch": 0.68, "grad_norm": 1.659360497673168, "learning_rate": 2.4393890967451194e-06, "loss": 0.2525, "step": 10829 }, { "epoch": 0.68, "grad_norm": 3.738767329789025, "learning_rate": 2.4385143675525917e-06, "loss": 0.2873, "step": 10830 }, { "epoch": 0.68, "grad_norm": 1.3378421747300495, "learning_rate": 2.4376397446406508e-06, "loss": 0.2767, "step": 10831 }, { "epoch": 0.68, "grad_norm": 3.4810079656240105, "learning_rate": 2.43676522804559e-06, "loss": 0.2536, "step": 10832 }, { "epoch": 0.68, "grad_norm": 1.7610103965553026, "learning_rate": 2.435890817803693e-06, "loss": 0.2519, "step": 10833 }, { "epoch": 0.68, "grad_norm": 1.9028691179983896, "learning_rate": 2.4350165139512384e-06, "loss": 0.2553, "step": 10834 }, { "epoch": 0.68, "grad_norm": 1.611323828889131, "learning_rate": 2.4341423165245064e-06, "loss": 0.2689, "step": 10835 }, { "epoch": 0.68, "grad_norm": 3.1389346807689904, "learning_rate": 2.433268225559765e-06, "loss": 0.2633, "step": 10836 }, { "epoch": 0.68, "grad_norm": 0.6035650523608391, "learning_rate": 2.4323942410932862e-06, "loss": 0.488, "step": 10837 }, { "epoch": 0.68, "grad_norm": 1.7200242146295561, "learning_rate": 2.4315203631613303e-06, "loss": 0.2671, "step": 10838 }, { "epoch": 0.68, "grad_norm": 3.601566766359058, "learning_rate": 2.4306465918001554e-06, "loss": 0.2707, "step": 10839 }, { "epoch": 0.68, "grad_norm": 1.9848085958619472, "learning_rate": 2.429772927046019e-06, "loss": 0.2733, "step": 10840 }, { "epoch": 0.68, "grad_norm": 24.69827138673236, "learning_rate": 2.4288993689351692e-06, "loss": 0.2514, "step": 10841 }, { "epoch": 0.68, "grad_norm": 2.054443038449273, "learning_rate": 2.428025917503849e-06, "loss": 0.2792, "step": 10842 }, { "epoch": 0.68, "grad_norm": 2.841957651692076, "learning_rate": 2.427152572788304e-06, "loss": 0.2766, "step": 10843 }, { "epoch": 0.68, "grad_norm": 1.8254332768160941, "learning_rate": 2.4262793348247663e-06, "loss": 0.2604, "step": 10844 }, { "epoch": 0.68, "grad_norm": 1.8950096525597646, "learning_rate": 2.425406203649473e-06, "loss": 0.25, "step": 10845 }, { "epoch": 0.68, "grad_norm": 1.593341431234992, "learning_rate": 2.4245331792986486e-06, "loss": 0.2486, "step": 10846 }, { "epoch": 0.68, "grad_norm": 1.5497930112050453, "learning_rate": 2.4236602618085157e-06, "loss": 0.26, "step": 10847 }, { "epoch": 0.68, "grad_norm": 1.6949615459239775, "learning_rate": 2.4227874512152965e-06, "loss": 0.2577, "step": 10848 }, { "epoch": 0.68, "grad_norm": 3.7377251655209793, "learning_rate": 2.421914747555204e-06, "loss": 0.2725, "step": 10849 }, { "epoch": 0.68, "grad_norm": 2.3957716170763947, "learning_rate": 2.4210421508644454e-06, "loss": 0.2528, "step": 10850 }, { "epoch": 0.68, "grad_norm": 1.5906823880435084, "learning_rate": 2.4201696611792285e-06, "loss": 0.25, "step": 10851 }, { "epoch": 0.68, "grad_norm": 1.3668835120315774, "learning_rate": 2.419297278535757e-06, "loss": 0.2646, "step": 10852 }, { "epoch": 0.68, "grad_norm": 5.0248823520360695, "learning_rate": 2.418425002970225e-06, "loss": 0.2751, "step": 10853 }, { "epoch": 0.68, "grad_norm": 1.9113828394692174, "learning_rate": 2.417552834518825e-06, "loss": 0.2627, "step": 10854 }, { "epoch": 0.68, "grad_norm": 2.6343467579385385, "learning_rate": 2.4166807732177435e-06, "loss": 0.27, "step": 10855 }, { "epoch": 0.68, "grad_norm": 3.444913547484067, "learning_rate": 2.4158088191031666e-06, "loss": 0.2646, "step": 10856 }, { "epoch": 0.68, "grad_norm": 2.23435673794038, "learning_rate": 2.414936972211272e-06, "loss": 0.2681, "step": 10857 }, { "epoch": 0.68, "grad_norm": 8.170829744020603, "learning_rate": 2.4140652325782317e-06, "loss": 0.2614, "step": 10858 }, { "epoch": 0.68, "grad_norm": 2.3178830668271937, "learning_rate": 2.4131936002402182e-06, "loss": 0.2608, "step": 10859 }, { "epoch": 0.68, "grad_norm": 3.276373835655226, "learning_rate": 2.4123220752333994e-06, "loss": 0.2635, "step": 10860 }, { "epoch": 0.68, "grad_norm": 3.4365126148774165, "learning_rate": 2.4114506575939333e-06, "loss": 0.2683, "step": 10861 }, { "epoch": 0.68, "grad_norm": 2.012939648087876, "learning_rate": 2.4105793473579773e-06, "loss": 0.2638, "step": 10862 }, { "epoch": 0.68, "grad_norm": 3.619364225017148, "learning_rate": 2.4097081445616814e-06, "loss": 0.2718, "step": 10863 }, { "epoch": 0.68, "grad_norm": 4.465368191838411, "learning_rate": 2.408837049241198e-06, "loss": 0.2668, "step": 10864 }, { "epoch": 0.68, "grad_norm": 2.533203366468756, "learning_rate": 2.407966061432668e-06, "loss": 0.289, "step": 10865 }, { "epoch": 0.68, "grad_norm": 1.4687487469774676, "learning_rate": 2.407095181172227e-06, "loss": 0.2565, "step": 10866 }, { "epoch": 0.68, "grad_norm": 3.344562654570713, "learning_rate": 2.4062244084960136e-06, "loss": 0.2687, "step": 10867 }, { "epoch": 0.68, "grad_norm": 12.930673067198498, "learning_rate": 2.4053537434401576e-06, "loss": 0.2762, "step": 10868 }, { "epoch": 0.68, "grad_norm": 1.551485306675878, "learning_rate": 2.404483186040784e-06, "loss": 0.2492, "step": 10869 }, { "epoch": 0.68, "grad_norm": 2.0665610186258525, "learning_rate": 2.403612736334013e-06, "loss": 0.2472, "step": 10870 }, { "epoch": 0.68, "grad_norm": 2.064619466844578, "learning_rate": 2.402742394355959e-06, "loss": 0.2585, "step": 10871 }, { "epoch": 0.68, "grad_norm": 3.7820775829933004, "learning_rate": 2.4018721601427387e-06, "loss": 0.2896, "step": 10872 }, { "epoch": 0.68, "grad_norm": 1.8421601210465457, "learning_rate": 2.4010020337304575e-06, "loss": 0.2704, "step": 10873 }, { "epoch": 0.68, "grad_norm": 4.444179906428219, "learning_rate": 2.4001320151552153e-06, "loss": 0.2871, "step": 10874 }, { "epoch": 0.68, "grad_norm": 2.0461616569532914, "learning_rate": 2.3992621044531146e-06, "loss": 0.2598, "step": 10875 }, { "epoch": 0.68, "grad_norm": 0.6281500200261377, "learning_rate": 2.39839230166025e-06, "loss": 0.4726, "step": 10876 }, { "epoch": 0.68, "grad_norm": 2.289931686135603, "learning_rate": 2.3975226068127107e-06, "loss": 0.2634, "step": 10877 }, { "epoch": 0.68, "grad_norm": 2.166019674554041, "learning_rate": 2.39665301994658e-06, "loss": 0.2687, "step": 10878 }, { "epoch": 0.68, "grad_norm": 2.9610345374597213, "learning_rate": 2.395783541097938e-06, "loss": 0.2669, "step": 10879 }, { "epoch": 0.68, "grad_norm": 2.7195799517151467, "learning_rate": 2.3949141703028643e-06, "loss": 0.2563, "step": 10880 }, { "epoch": 0.68, "grad_norm": 2.4205161530393116, "learning_rate": 2.394044907597427e-06, "loss": 0.272, "step": 10881 }, { "epoch": 0.68, "grad_norm": 0.6188588342391146, "learning_rate": 2.393175753017697e-06, "loss": 0.4873, "step": 10882 }, { "epoch": 0.68, "grad_norm": 1.7269059264314834, "learning_rate": 2.3923067065997336e-06, "loss": 0.2583, "step": 10883 }, { "epoch": 0.68, "grad_norm": 2.129786075912959, "learning_rate": 2.391437768379598e-06, "loss": 0.2668, "step": 10884 }, { "epoch": 0.68, "grad_norm": 1.879601366199349, "learning_rate": 2.3905689383933433e-06, "loss": 0.2583, "step": 10885 }, { "epoch": 0.68, "grad_norm": 2.90273285050398, "learning_rate": 2.3897002166770183e-06, "loss": 0.2636, "step": 10886 }, { "epoch": 0.68, "grad_norm": 2.7457911686373264, "learning_rate": 2.3888316032666653e-06, "loss": 0.264, "step": 10887 }, { "epoch": 0.68, "grad_norm": 3.245829896736995, "learning_rate": 2.3879630981983277e-06, "loss": 0.2754, "step": 10888 }, { "epoch": 0.68, "grad_norm": 3.3258908249547043, "learning_rate": 2.387094701508042e-06, "loss": 0.2521, "step": 10889 }, { "epoch": 0.68, "grad_norm": 1.8457514254007812, "learning_rate": 2.3862264132318385e-06, "loss": 0.2533, "step": 10890 }, { "epoch": 0.68, "grad_norm": 1.3115242101589804, "learning_rate": 2.385358233405741e-06, "loss": 0.2682, "step": 10891 }, { "epoch": 0.68, "grad_norm": 2.3842661648783077, "learning_rate": 2.3844901620657775e-06, "loss": 0.2691, "step": 10892 }, { "epoch": 0.69, "grad_norm": 2.3848533479403673, "learning_rate": 2.3836221992479626e-06, "loss": 0.2574, "step": 10893 }, { "epoch": 0.69, "grad_norm": 1.646166234318717, "learning_rate": 2.3827543449883094e-06, "loss": 0.2657, "step": 10894 }, { "epoch": 0.69, "grad_norm": 2.3394498095130616, "learning_rate": 2.3818865993228256e-06, "loss": 0.2471, "step": 10895 }, { "epoch": 0.69, "grad_norm": 2.3183147321099677, "learning_rate": 2.3810189622875174e-06, "loss": 0.2886, "step": 10896 }, { "epoch": 0.69, "grad_norm": 1.4799604803379114, "learning_rate": 2.3801514339183857e-06, "loss": 0.2763, "step": 10897 }, { "epoch": 0.69, "grad_norm": 1.559558408132068, "learning_rate": 2.3792840142514244e-06, "loss": 0.2563, "step": 10898 }, { "epoch": 0.69, "grad_norm": 1.9476723320016283, "learning_rate": 2.3784167033226222e-06, "loss": 0.259, "step": 10899 }, { "epoch": 0.69, "grad_norm": 3.2389654261430936, "learning_rate": 2.37754950116797e-06, "loss": 0.262, "step": 10900 }, { "epoch": 0.69, "grad_norm": 2.656437300482076, "learning_rate": 2.3766824078234468e-06, "loss": 0.2593, "step": 10901 }, { "epoch": 0.69, "grad_norm": 2.7259779210563946, "learning_rate": 2.3758154233250298e-06, "loss": 0.2618, "step": 10902 }, { "epoch": 0.69, "grad_norm": 10.099447602178023, "learning_rate": 2.3749485477086883e-06, "loss": 0.2494, "step": 10903 }, { "epoch": 0.69, "grad_norm": 1.8399474411979575, "learning_rate": 2.374081781010398e-06, "loss": 0.2732, "step": 10904 }, { "epoch": 0.69, "grad_norm": 5.3599037997487375, "learning_rate": 2.373215123266118e-06, "loss": 0.2799, "step": 10905 }, { "epoch": 0.69, "grad_norm": 2.9549760089744432, "learning_rate": 2.372348574511809e-06, "loss": 0.2547, "step": 10906 }, { "epoch": 0.69, "grad_norm": 1.8140650942537564, "learning_rate": 2.371482134783423e-06, "loss": 0.2832, "step": 10907 }, { "epoch": 0.69, "grad_norm": 2.3040300334623325, "learning_rate": 2.370615804116914e-06, "loss": 0.2495, "step": 10908 }, { "epoch": 0.69, "grad_norm": 2.3787249135748456, "learning_rate": 2.369749582548225e-06, "loss": 0.2658, "step": 10909 }, { "epoch": 0.69, "grad_norm": 3.7002413232831315, "learning_rate": 2.3688834701132964e-06, "loss": 0.2777, "step": 10910 }, { "epoch": 0.69, "grad_norm": 1.6570233627211397, "learning_rate": 2.3680174668480656e-06, "loss": 0.2641, "step": 10911 }, { "epoch": 0.69, "grad_norm": 2.9881638843813065, "learning_rate": 2.367151572788467e-06, "loss": 0.2632, "step": 10912 }, { "epoch": 0.69, "grad_norm": 1.8864654449008889, "learning_rate": 2.3662857879704265e-06, "loss": 0.2591, "step": 10913 }, { "epoch": 0.69, "grad_norm": 2.382164067425556, "learning_rate": 2.3654201124298658e-06, "loss": 0.252, "step": 10914 }, { "epoch": 0.69, "grad_norm": 0.6536734335078284, "learning_rate": 2.364554546202703e-06, "loss": 0.4739, "step": 10915 }, { "epoch": 0.69, "grad_norm": 0.6142496272903245, "learning_rate": 2.363689089324855e-06, "loss": 0.4544, "step": 10916 }, { "epoch": 0.69, "grad_norm": 1.5179567144839843, "learning_rate": 2.362823741832229e-06, "loss": 0.274, "step": 10917 }, { "epoch": 0.69, "grad_norm": 3.103286936456892, "learning_rate": 2.3619585037607283e-06, "loss": 0.2823, "step": 10918 }, { "epoch": 0.69, "grad_norm": 3.6817210396512827, "learning_rate": 2.3610933751462554e-06, "loss": 0.2719, "step": 10919 }, { "epoch": 0.69, "grad_norm": 3.667014841922441, "learning_rate": 2.360228356024707e-06, "loss": 0.2576, "step": 10920 }, { "epoch": 0.69, "grad_norm": 2.08097641517747, "learning_rate": 2.3593634464319732e-06, "loss": 0.2874, "step": 10921 }, { "epoch": 0.69, "grad_norm": 1.3777251939030866, "learning_rate": 2.35849864640394e-06, "loss": 0.2536, "step": 10922 }, { "epoch": 0.69, "grad_norm": 2.0453044671274974, "learning_rate": 2.357633955976488e-06, "loss": 0.2549, "step": 10923 }, { "epoch": 0.69, "grad_norm": 1.7899662206002802, "learning_rate": 2.3567693751854985e-06, "loss": 0.2622, "step": 10924 }, { "epoch": 0.69, "grad_norm": 1.7915825281831113, "learning_rate": 2.3559049040668425e-06, "loss": 0.2678, "step": 10925 }, { "epoch": 0.69, "grad_norm": 1.9438606617371923, "learning_rate": 2.355040542656387e-06, "loss": 0.2499, "step": 10926 }, { "epoch": 0.69, "grad_norm": 4.531104619929896, "learning_rate": 2.354176290989996e-06, "loss": 0.2725, "step": 10927 }, { "epoch": 0.69, "grad_norm": 2.2581310911745267, "learning_rate": 2.353312149103533e-06, "loss": 0.2563, "step": 10928 }, { "epoch": 0.69, "grad_norm": 2.91521569417984, "learning_rate": 2.3524481170328506e-06, "loss": 0.2713, "step": 10929 }, { "epoch": 0.69, "grad_norm": 2.413535705432135, "learning_rate": 2.3515841948137976e-06, "loss": 0.272, "step": 10930 }, { "epoch": 0.69, "grad_norm": 1.8006589061091516, "learning_rate": 2.350720382482219e-06, "loss": 0.2605, "step": 10931 }, { "epoch": 0.69, "grad_norm": 1.5129055315177493, "learning_rate": 2.349856680073959e-06, "loss": 0.2665, "step": 10932 }, { "epoch": 0.69, "grad_norm": 0.6849296253493475, "learning_rate": 2.34899308762485e-06, "loss": 0.5027, "step": 10933 }, { "epoch": 0.69, "grad_norm": 5.583730405393349, "learning_rate": 2.34812960517073e-06, "loss": 0.2675, "step": 10934 }, { "epoch": 0.69, "grad_norm": 1.4774859424883449, "learning_rate": 2.347266232747419e-06, "loss": 0.2637, "step": 10935 }, { "epoch": 0.69, "grad_norm": 2.05368579524338, "learning_rate": 2.346402970390748e-06, "loss": 0.2647, "step": 10936 }, { "epoch": 0.69, "grad_norm": 2.0026683642084535, "learning_rate": 2.345539818136529e-06, "loss": 0.2465, "step": 10937 }, { "epoch": 0.69, "grad_norm": 2.2468664922163395, "learning_rate": 2.344676776020579e-06, "loss": 0.25, "step": 10938 }, { "epoch": 0.69, "grad_norm": 2.3456737338948885, "learning_rate": 2.343813844078704e-06, "loss": 0.2531, "step": 10939 }, { "epoch": 0.69, "grad_norm": 2.2275206115756694, "learning_rate": 2.3429510223467116e-06, "loss": 0.262, "step": 10940 }, { "epoch": 0.69, "grad_norm": 1.8450804172787154, "learning_rate": 2.3420883108603997e-06, "loss": 0.2921, "step": 10941 }, { "epoch": 0.69, "grad_norm": 7.667519709873651, "learning_rate": 2.3412257096555663e-06, "loss": 0.2575, "step": 10942 }, { "epoch": 0.69, "grad_norm": 2.0751679166352885, "learning_rate": 2.3403632187679987e-06, "loss": 0.2628, "step": 10943 }, { "epoch": 0.69, "grad_norm": 3.46009451010248, "learning_rate": 2.3395008382334873e-06, "loss": 0.2603, "step": 10944 }, { "epoch": 0.69, "grad_norm": 2.63868910424025, "learning_rate": 2.3386385680878116e-06, "loss": 0.2651, "step": 10945 }, { "epoch": 0.69, "grad_norm": 3.0428530019975164, "learning_rate": 2.3377764083667494e-06, "loss": 0.2817, "step": 10946 }, { "epoch": 0.69, "grad_norm": 8.260584311841463, "learning_rate": 2.33691435910607e-06, "loss": 0.2916, "step": 10947 }, { "epoch": 0.69, "grad_norm": 1.435284089693954, "learning_rate": 2.336052420341544e-06, "loss": 0.2585, "step": 10948 }, { "epoch": 0.69, "grad_norm": 4.1945625299153075, "learning_rate": 2.335190592108937e-06, "loss": 0.2737, "step": 10949 }, { "epoch": 0.69, "grad_norm": 1.6983782016735072, "learning_rate": 2.3343288744440055e-06, "loss": 0.2687, "step": 10950 }, { "epoch": 0.69, "grad_norm": 1.9708341111944603, "learning_rate": 2.333467267382502e-06, "loss": 0.2581, "step": 10951 }, { "epoch": 0.69, "grad_norm": 6.119651187058837, "learning_rate": 2.3326057709601795e-06, "loss": 0.2765, "step": 10952 }, { "epoch": 0.69, "grad_norm": 2.8961854399545466, "learning_rate": 2.331744385212782e-06, "loss": 0.2429, "step": 10953 }, { "epoch": 0.69, "grad_norm": 2.2148688811033104, "learning_rate": 2.330883110176049e-06, "loss": 0.268, "step": 10954 }, { "epoch": 0.69, "grad_norm": 2.9929853204084975, "learning_rate": 2.3300219458857144e-06, "loss": 0.2792, "step": 10955 }, { "epoch": 0.69, "grad_norm": 2.621514330675251, "learning_rate": 2.3291608923775117e-06, "loss": 0.2754, "step": 10956 }, { "epoch": 0.69, "grad_norm": 6.704714025645811, "learning_rate": 2.3282999496871694e-06, "loss": 0.2779, "step": 10957 }, { "epoch": 0.69, "grad_norm": 1.5268878406254864, "learning_rate": 2.327439117850408e-06, "loss": 0.2488, "step": 10958 }, { "epoch": 0.69, "grad_norm": 1.6423044441394652, "learning_rate": 2.3265783969029425e-06, "loss": 0.2549, "step": 10959 }, { "epoch": 0.69, "grad_norm": 1.4383915248263024, "learning_rate": 2.32571778688049e-06, "loss": 0.243, "step": 10960 }, { "epoch": 0.69, "grad_norm": 1.7243666016070005, "learning_rate": 2.3248572878187566e-06, "loss": 0.2502, "step": 10961 }, { "epoch": 0.69, "grad_norm": 4.910469847432548, "learning_rate": 2.323996899753445e-06, "loss": 0.2767, "step": 10962 }, { "epoch": 0.69, "grad_norm": 2.2472062476080032, "learning_rate": 2.3231366227202523e-06, "loss": 0.2494, "step": 10963 }, { "epoch": 0.69, "grad_norm": 2.5338759061150538, "learning_rate": 2.3222764567548795e-06, "loss": 0.2517, "step": 10964 }, { "epoch": 0.69, "grad_norm": 2.02135852969192, "learning_rate": 2.3214164018930124e-06, "loss": 0.2633, "step": 10965 }, { "epoch": 0.69, "grad_norm": 2.0287612662433467, "learning_rate": 2.3205564581703367e-06, "loss": 0.2604, "step": 10966 }, { "epoch": 0.69, "grad_norm": 1.7936296361297288, "learning_rate": 2.3196966256225306e-06, "loss": 0.2523, "step": 10967 }, { "epoch": 0.69, "grad_norm": 3.7106428506218263, "learning_rate": 2.318836904285274e-06, "loss": 0.3034, "step": 10968 }, { "epoch": 0.69, "grad_norm": 2.1569842069850975, "learning_rate": 2.317977294194237e-06, "loss": 0.2735, "step": 10969 }, { "epoch": 0.69, "grad_norm": 1.4835992143932462, "learning_rate": 2.3171177953850837e-06, "loss": 0.2524, "step": 10970 }, { "epoch": 0.69, "grad_norm": 2.3607794822239407, "learning_rate": 2.3162584078934785e-06, "loss": 0.2833, "step": 10971 }, { "epoch": 0.69, "grad_norm": 1.6030701736115138, "learning_rate": 2.315399131755081e-06, "loss": 0.2819, "step": 10972 }, { "epoch": 0.69, "grad_norm": 2.975997946367097, "learning_rate": 2.314539967005541e-06, "loss": 0.2594, "step": 10973 }, { "epoch": 0.69, "grad_norm": 2.213239373020768, "learning_rate": 2.313680913680509e-06, "loss": 0.2581, "step": 10974 }, { "epoch": 0.69, "grad_norm": 2.0669890418402885, "learning_rate": 2.312821971815625e-06, "loss": 0.2738, "step": 10975 }, { "epoch": 0.69, "grad_norm": 10.436783865483608, "learning_rate": 2.3119631414465317e-06, "loss": 0.2694, "step": 10976 }, { "epoch": 0.69, "grad_norm": 10.107960424948507, "learning_rate": 2.3111044226088625e-06, "loss": 0.2808, "step": 10977 }, { "epoch": 0.69, "grad_norm": 1.5052658011789797, "learning_rate": 2.3102458153382446e-06, "loss": 0.2551, "step": 10978 }, { "epoch": 0.69, "grad_norm": 0.5972256617506898, "learning_rate": 2.309387319670306e-06, "loss": 0.4581, "step": 10979 }, { "epoch": 0.69, "grad_norm": 2.3041711980146915, "learning_rate": 2.3085289356406682e-06, "loss": 0.2531, "step": 10980 }, { "epoch": 0.69, "grad_norm": 2.740272779062172, "learning_rate": 2.3076706632849448e-06, "loss": 0.2681, "step": 10981 }, { "epoch": 0.69, "grad_norm": 5.0874210368604125, "learning_rate": 2.3068125026387483e-06, "loss": 0.2864, "step": 10982 }, { "epoch": 0.69, "grad_norm": 2.2536115526514644, "learning_rate": 2.3059544537376834e-06, "loss": 0.2549, "step": 10983 }, { "epoch": 0.69, "grad_norm": 3.1361431338645454, "learning_rate": 2.3050965166173543e-06, "loss": 0.2565, "step": 10984 }, { "epoch": 0.69, "grad_norm": 2.2496520356866707, "learning_rate": 2.3042386913133558e-06, "loss": 0.2632, "step": 10985 }, { "epoch": 0.69, "grad_norm": 1.9677520799895307, "learning_rate": 2.303380977861284e-06, "loss": 0.26, "step": 10986 }, { "epoch": 0.69, "grad_norm": 1.5742895822778318, "learning_rate": 2.3025233762967238e-06, "loss": 0.2553, "step": 10987 }, { "epoch": 0.69, "grad_norm": 1.8123616300708694, "learning_rate": 2.3016658866552618e-06, "loss": 0.2454, "step": 10988 }, { "epoch": 0.69, "grad_norm": 1.888777549190069, "learning_rate": 2.3008085089724743e-06, "loss": 0.251, "step": 10989 }, { "epoch": 0.69, "grad_norm": 1.8074781999311407, "learning_rate": 2.299951243283936e-06, "loss": 0.2707, "step": 10990 }, { "epoch": 0.69, "grad_norm": 2.4374729526319707, "learning_rate": 2.299094089625215e-06, "loss": 0.2717, "step": 10991 }, { "epoch": 0.69, "grad_norm": 2.2798070048658396, "learning_rate": 2.2982370480318794e-06, "loss": 0.2607, "step": 10992 }, { "epoch": 0.69, "grad_norm": 4.9999077928147075, "learning_rate": 2.297380118539486e-06, "loss": 0.2686, "step": 10993 }, { "epoch": 0.69, "grad_norm": 1.64007053011502, "learning_rate": 2.2965233011835936e-06, "loss": 0.2602, "step": 10994 }, { "epoch": 0.69, "grad_norm": 2.910541496731146, "learning_rate": 2.295666595999749e-06, "loss": 0.2669, "step": 10995 }, { "epoch": 0.69, "grad_norm": 1.788300761654256, "learning_rate": 2.294810003023503e-06, "loss": 0.275, "step": 10996 }, { "epoch": 0.69, "grad_norm": 1.442347356213636, "learning_rate": 2.293953522290395e-06, "loss": 0.261, "step": 10997 }, { "epoch": 0.69, "grad_norm": 1.8529495014929396, "learning_rate": 2.2930971538359612e-06, "loss": 0.2622, "step": 10998 }, { "epoch": 0.69, "grad_norm": 1.1671811736080213, "learning_rate": 2.292240897695734e-06, "loss": 0.264, "step": 10999 }, { "epoch": 0.69, "grad_norm": 1.9863585472536043, "learning_rate": 2.2913847539052405e-06, "loss": 0.2704, "step": 11000 }, { "epoch": 0.69, "grad_norm": 1.4190139119943348, "learning_rate": 2.2905287225000062e-06, "loss": 0.2787, "step": 11001 }, { "epoch": 0.69, "grad_norm": 2.967742712789526, "learning_rate": 2.2896728035155487e-06, "loss": 0.2645, "step": 11002 }, { "epoch": 0.69, "grad_norm": 1.5972908958120042, "learning_rate": 2.288816996987378e-06, "loss": 0.2608, "step": 11003 }, { "epoch": 0.69, "grad_norm": 1.698590185358748, "learning_rate": 2.287961302951008e-06, "loss": 0.2595, "step": 11004 }, { "epoch": 0.69, "grad_norm": 2.511746720975442, "learning_rate": 2.2871057214419403e-06, "loss": 0.2618, "step": 11005 }, { "epoch": 0.69, "grad_norm": 3.0104081680942985, "learning_rate": 2.2862502524956748e-06, "loss": 0.2788, "step": 11006 }, { "epoch": 0.69, "grad_norm": 4.437342448768188, "learning_rate": 2.285394896147705e-06, "loss": 0.2695, "step": 11007 }, { "epoch": 0.69, "grad_norm": 6.567028789019159, "learning_rate": 2.284539652433522e-06, "loss": 0.2776, "step": 11008 }, { "epoch": 0.69, "grad_norm": 2.3600236114539146, "learning_rate": 2.283684521388615e-06, "loss": 0.2813, "step": 11009 }, { "epoch": 0.69, "grad_norm": 1.4460102376280342, "learning_rate": 2.282829503048461e-06, "loss": 0.2683, "step": 11010 }, { "epoch": 0.69, "grad_norm": 2.765288002276459, "learning_rate": 2.2819745974485357e-06, "loss": 0.2613, "step": 11011 }, { "epoch": 0.69, "grad_norm": 3.0838044190318, "learning_rate": 2.281119804624314e-06, "loss": 0.2598, "step": 11012 }, { "epoch": 0.69, "grad_norm": 2.331842545145046, "learning_rate": 2.2802651246112606e-06, "loss": 0.2604, "step": 11013 }, { "epoch": 0.69, "grad_norm": 2.082322749369407, "learning_rate": 2.279410557444838e-06, "loss": 0.2504, "step": 11014 }, { "epoch": 0.69, "grad_norm": 3.6204593116768944, "learning_rate": 2.278556103160502e-06, "loss": 0.2498, "step": 11015 }, { "epoch": 0.69, "grad_norm": 1.3905597247715427, "learning_rate": 2.2777017617937073e-06, "loss": 0.2799, "step": 11016 }, { "epoch": 0.69, "grad_norm": 2.472427955573566, "learning_rate": 2.2768475333799035e-06, "loss": 0.247, "step": 11017 }, { "epoch": 0.69, "grad_norm": 1.6162241556398924, "learning_rate": 2.2759934179545325e-06, "loss": 0.2591, "step": 11018 }, { "epoch": 0.69, "grad_norm": 1.9127677604285067, "learning_rate": 2.275139415553031e-06, "loss": 0.2582, "step": 11019 }, { "epoch": 0.69, "grad_norm": 1.7527933414260848, "learning_rate": 2.2742855262108364e-06, "loss": 0.2616, "step": 11020 }, { "epoch": 0.69, "grad_norm": 3.7751755045145465, "learning_rate": 2.273431749963377e-06, "loss": 0.2736, "step": 11021 }, { "epoch": 0.69, "grad_norm": 1.5290407432488182, "learning_rate": 2.272578086846078e-06, "loss": 0.2625, "step": 11022 }, { "epoch": 0.69, "grad_norm": 2.5421788814637516, "learning_rate": 2.271724536894354e-06, "loss": 0.2705, "step": 11023 }, { "epoch": 0.69, "grad_norm": 2.8246065823332165, "learning_rate": 2.270871100143629e-06, "loss": 0.2605, "step": 11024 }, { "epoch": 0.69, "grad_norm": 4.860304452842204, "learning_rate": 2.27001777662931e-06, "loss": 0.2649, "step": 11025 }, { "epoch": 0.69, "grad_norm": 2.445004033177439, "learning_rate": 2.2691645663868016e-06, "loss": 0.2386, "step": 11026 }, { "epoch": 0.69, "grad_norm": 2.2881442401364325, "learning_rate": 2.2683114694515045e-06, "loss": 0.2618, "step": 11027 }, { "epoch": 0.69, "grad_norm": 2.64561475847575, "learning_rate": 2.2674584858588185e-06, "loss": 0.2607, "step": 11028 }, { "epoch": 0.69, "grad_norm": 1.3218454980779675, "learning_rate": 2.266605615644133e-06, "loss": 0.2588, "step": 11029 }, { "epoch": 0.69, "grad_norm": 1.9211058016416054, "learning_rate": 2.265752858842834e-06, "loss": 0.2549, "step": 11030 }, { "epoch": 0.69, "grad_norm": 1.5657039121450247, "learning_rate": 2.2649002154903056e-06, "loss": 0.2798, "step": 11031 }, { "epoch": 0.69, "grad_norm": 2.7234951650213524, "learning_rate": 2.2640476856219267e-06, "loss": 0.2708, "step": 11032 }, { "epoch": 0.69, "grad_norm": 1.779077471648995, "learning_rate": 2.263195269273069e-06, "loss": 0.2506, "step": 11033 }, { "epoch": 0.69, "grad_norm": 4.195402374947855, "learning_rate": 2.2623429664791015e-06, "loss": 0.2627, "step": 11034 }, { "epoch": 0.69, "grad_norm": 2.4381753877699035, "learning_rate": 2.261490777275384e-06, "loss": 0.2743, "step": 11035 }, { "epoch": 0.69, "grad_norm": 1.5370562146420013, "learning_rate": 2.260638701697281e-06, "loss": 0.2655, "step": 11036 }, { "epoch": 0.69, "grad_norm": 4.194549114731265, "learning_rate": 2.259786739780143e-06, "loss": 0.2479, "step": 11037 }, { "epoch": 0.69, "grad_norm": 3.5973098267399335, "learning_rate": 2.2589348915593197e-06, "loss": 0.2622, "step": 11038 }, { "epoch": 0.69, "grad_norm": 1.3700132494673471, "learning_rate": 2.2580831570701557e-06, "loss": 0.2622, "step": 11039 }, { "epoch": 0.69, "grad_norm": 2.890398019948207, "learning_rate": 2.2572315363479935e-06, "loss": 0.2653, "step": 11040 }, { "epoch": 0.69, "grad_norm": 1.7266076924332996, "learning_rate": 2.2563800294281667e-06, "loss": 0.2753, "step": 11041 }, { "epoch": 0.69, "grad_norm": 1.7583690345518133, "learning_rate": 2.2555286363460053e-06, "loss": 0.2725, "step": 11042 }, { "epoch": 0.69, "grad_norm": 1.7996428411083296, "learning_rate": 2.254677357136834e-06, "loss": 0.2598, "step": 11043 }, { "epoch": 0.69, "grad_norm": 4.019957369916979, "learning_rate": 2.2538261918359776e-06, "loss": 0.2731, "step": 11044 }, { "epoch": 0.69, "grad_norm": 1.4689716646829647, "learning_rate": 2.252975140478748e-06, "loss": 0.2626, "step": 11045 }, { "epoch": 0.69, "grad_norm": 1.377654027126183, "learning_rate": 2.2521242031004613e-06, "loss": 0.2428, "step": 11046 }, { "epoch": 0.69, "grad_norm": 2.0002552348555516, "learning_rate": 2.2512733797364202e-06, "loss": 0.2699, "step": 11047 }, { "epoch": 0.69, "grad_norm": 1.6584052503371383, "learning_rate": 2.25042267042193e-06, "loss": 0.2887, "step": 11048 }, { "epoch": 0.69, "grad_norm": 1.39934382994924, "learning_rate": 2.249572075192288e-06, "loss": 0.2663, "step": 11049 }, { "epoch": 0.69, "grad_norm": 0.5849785628171148, "learning_rate": 2.2487215940827856e-06, "loss": 0.4702, "step": 11050 }, { "epoch": 0.69, "grad_norm": 1.7096365971610517, "learning_rate": 2.247871227128709e-06, "loss": 0.2526, "step": 11051 }, { "epoch": 0.7, "grad_norm": 1.5156501599784997, "learning_rate": 2.247020974365344e-06, "loss": 0.2687, "step": 11052 }, { "epoch": 0.7, "grad_norm": 2.368127450149746, "learning_rate": 2.24617083582797e-06, "loss": 0.2711, "step": 11053 }, { "epoch": 0.7, "grad_norm": 1.861099343005788, "learning_rate": 2.2453208115518595e-06, "loss": 0.2611, "step": 11054 }, { "epoch": 0.7, "grad_norm": 2.69917155539132, "learning_rate": 2.2444709015722792e-06, "loss": 0.2694, "step": 11055 }, { "epoch": 0.7, "grad_norm": 1.808405165629587, "learning_rate": 2.2436211059244977e-06, "loss": 0.255, "step": 11056 }, { "epoch": 0.7, "grad_norm": 1.9282026490070858, "learning_rate": 2.2427714246437726e-06, "loss": 0.2549, "step": 11057 }, { "epoch": 0.7, "grad_norm": 2.073363292368241, "learning_rate": 2.241921857765358e-06, "loss": 0.2526, "step": 11058 }, { "epoch": 0.7, "grad_norm": 2.1626567118325903, "learning_rate": 2.241072405324503e-06, "loss": 0.2602, "step": 11059 }, { "epoch": 0.7, "grad_norm": 2.5080723008362154, "learning_rate": 2.240223067356454e-06, "loss": 0.269, "step": 11060 }, { "epoch": 0.7, "grad_norm": 1.632891593904277, "learning_rate": 2.239373843896454e-06, "loss": 0.2847, "step": 11061 }, { "epoch": 0.7, "grad_norm": 2.089561431675379, "learning_rate": 2.2385247349797367e-06, "loss": 0.2714, "step": 11062 }, { "epoch": 0.7, "grad_norm": 1.538838110790415, "learning_rate": 2.237675740641531e-06, "loss": 0.2515, "step": 11063 }, { "epoch": 0.7, "grad_norm": 1.681589298357147, "learning_rate": 2.2368268609170673e-06, "loss": 0.2563, "step": 11064 }, { "epoch": 0.7, "grad_norm": 1.4931947400211565, "learning_rate": 2.235978095841565e-06, "loss": 0.2504, "step": 11065 }, { "epoch": 0.7, "grad_norm": 3.0144539321232036, "learning_rate": 2.2351294454502408e-06, "loss": 0.2758, "step": 11066 }, { "epoch": 0.7, "grad_norm": 2.223518759245911, "learning_rate": 2.234280909778305e-06, "loss": 0.2767, "step": 11067 }, { "epoch": 0.7, "grad_norm": 1.3950419129427172, "learning_rate": 2.2334324888609673e-06, "loss": 0.2549, "step": 11068 }, { "epoch": 0.7, "grad_norm": 5.276218192016725, "learning_rate": 2.2325841827334306e-06, "loss": 0.2688, "step": 11069 }, { "epoch": 0.7, "grad_norm": 7.855309539941102, "learning_rate": 2.2317359914308924e-06, "loss": 0.279, "step": 11070 }, { "epoch": 0.7, "grad_norm": 3.116590015760823, "learning_rate": 2.2308879149885426e-06, "loss": 0.2692, "step": 11071 }, { "epoch": 0.7, "grad_norm": 1.923673125791136, "learning_rate": 2.2300399534415733e-06, "loss": 0.2587, "step": 11072 }, { "epoch": 0.7, "grad_norm": 8.903104121230704, "learning_rate": 2.229192106825167e-06, "loss": 0.2644, "step": 11073 }, { "epoch": 0.7, "grad_norm": 1.8260474646408806, "learning_rate": 2.2283443751745015e-06, "loss": 0.2615, "step": 11074 }, { "epoch": 0.7, "grad_norm": 1.9848969494847946, "learning_rate": 2.2274967585247485e-06, "loss": 0.2506, "step": 11075 }, { "epoch": 0.7, "grad_norm": 1.6350166096238177, "learning_rate": 2.2266492569110804e-06, "loss": 0.254, "step": 11076 }, { "epoch": 0.7, "grad_norm": 2.573425443333891, "learning_rate": 2.225801870368662e-06, "loss": 0.2526, "step": 11077 }, { "epoch": 0.7, "grad_norm": 1.4488845512042205, "learning_rate": 2.2249545989326516e-06, "loss": 0.2705, "step": 11078 }, { "epoch": 0.7, "grad_norm": 2.009221734149319, "learning_rate": 2.224107442638202e-06, "loss": 0.2724, "step": 11079 }, { "epoch": 0.7, "grad_norm": 2.10424937516931, "learning_rate": 2.223260401520467e-06, "loss": 0.2525, "step": 11080 }, { "epoch": 0.7, "grad_norm": 1.6278741200920794, "learning_rate": 2.2224134756145897e-06, "loss": 0.2737, "step": 11081 }, { "epoch": 0.7, "grad_norm": 2.0663359360044953, "learning_rate": 2.221566664955709e-06, "loss": 0.2653, "step": 11082 }, { "epoch": 0.7, "grad_norm": 3.455547465712494, "learning_rate": 2.2207199695789653e-06, "loss": 0.2879, "step": 11083 }, { "epoch": 0.7, "grad_norm": 1.793989986136283, "learning_rate": 2.219873389519484e-06, "loss": 0.2757, "step": 11084 }, { "epoch": 0.7, "grad_norm": 1.8067461841811359, "learning_rate": 2.2190269248123958e-06, "loss": 0.2644, "step": 11085 }, { "epoch": 0.7, "grad_norm": 2.9519397446729565, "learning_rate": 2.21818057549282e-06, "loss": 0.286, "step": 11086 }, { "epoch": 0.7, "grad_norm": 1.6867546979026484, "learning_rate": 2.2173343415958715e-06, "loss": 0.2628, "step": 11087 }, { "epoch": 0.7, "grad_norm": 4.901729711202853, "learning_rate": 2.2164882231566655e-06, "loss": 0.259, "step": 11088 }, { "epoch": 0.7, "grad_norm": 1.6494661718749033, "learning_rate": 2.2156422202103067e-06, "loss": 0.2563, "step": 11089 }, { "epoch": 0.7, "grad_norm": 2.048678844580366, "learning_rate": 2.2147963327918964e-06, "loss": 0.2569, "step": 11090 }, { "epoch": 0.7, "grad_norm": 1.4641382326084147, "learning_rate": 2.2139505609365352e-06, "loss": 0.2874, "step": 11091 }, { "epoch": 0.7, "grad_norm": 2.515578197114846, "learning_rate": 2.213104904679312e-06, "loss": 0.2614, "step": 11092 }, { "epoch": 0.7, "grad_norm": 1.5400364181574417, "learning_rate": 2.212259364055318e-06, "loss": 0.2852, "step": 11093 }, { "epoch": 0.7, "grad_norm": 2.0606239270475943, "learning_rate": 2.2114139390996348e-06, "loss": 0.2686, "step": 11094 }, { "epoch": 0.7, "grad_norm": 1.3630679878668983, "learning_rate": 2.2105686298473384e-06, "loss": 0.2529, "step": 11095 }, { "epoch": 0.7, "grad_norm": 2.2767080461181015, "learning_rate": 2.2097234363335058e-06, "loss": 0.256, "step": 11096 }, { "epoch": 0.7, "grad_norm": 1.5891959395349309, "learning_rate": 2.2088783585932024e-06, "loss": 0.2578, "step": 11097 }, { "epoch": 0.7, "grad_norm": 2.074386376433146, "learning_rate": 2.2080333966614953e-06, "loss": 0.2602, "step": 11098 }, { "epoch": 0.7, "grad_norm": 1.8349753863808815, "learning_rate": 2.2071885505734413e-06, "loss": 0.2693, "step": 11099 }, { "epoch": 0.7, "grad_norm": 1.8212975785039227, "learning_rate": 2.2063438203640934e-06, "loss": 0.2584, "step": 11100 }, { "epoch": 0.7, "grad_norm": 4.166344451571758, "learning_rate": 2.2054992060685044e-06, "loss": 0.2836, "step": 11101 }, { "epoch": 0.7, "grad_norm": 2.9066922810415483, "learning_rate": 2.204654707721717e-06, "loss": 0.2511, "step": 11102 }, { "epoch": 0.7, "grad_norm": 2.638776594110408, "learning_rate": 2.2038103253587685e-06, "loss": 0.2519, "step": 11103 }, { "epoch": 0.7, "grad_norm": 2.9514373854405695, "learning_rate": 2.2029660590146983e-06, "loss": 0.2416, "step": 11104 }, { "epoch": 0.7, "grad_norm": 1.6263658028104124, "learning_rate": 2.202121908724533e-06, "loss": 0.2704, "step": 11105 }, { "epoch": 0.7, "grad_norm": 2.3227303615510393, "learning_rate": 2.2012778745233003e-06, "loss": 0.2711, "step": 11106 }, { "epoch": 0.7, "grad_norm": 2.7892407403209454, "learning_rate": 2.200433956446019e-06, "loss": 0.2743, "step": 11107 }, { "epoch": 0.7, "grad_norm": 1.6171984573005869, "learning_rate": 2.199590154527706e-06, "loss": 0.2783, "step": 11108 }, { "epoch": 0.7, "grad_norm": 1.6675790926600702, "learning_rate": 2.198746468803372e-06, "loss": 0.2618, "step": 11109 }, { "epoch": 0.7, "grad_norm": 2.107905547457524, "learning_rate": 2.1979028993080233e-06, "loss": 0.265, "step": 11110 }, { "epoch": 0.7, "grad_norm": 1.9079694562089151, "learning_rate": 2.1970594460766575e-06, "loss": 0.2471, "step": 11111 }, { "epoch": 0.7, "grad_norm": 1.3998216764906461, "learning_rate": 2.196216109144275e-06, "loss": 0.2611, "step": 11112 }, { "epoch": 0.7, "grad_norm": 4.071212472082706, "learning_rate": 2.195372888545867e-06, "loss": 0.2594, "step": 11113 }, { "epoch": 0.7, "grad_norm": 5.185912615809283, "learning_rate": 2.1945297843164197e-06, "loss": 0.2772, "step": 11114 }, { "epoch": 0.7, "grad_norm": 2.572312217957228, "learning_rate": 2.193686796490913e-06, "loss": 0.2617, "step": 11115 }, { "epoch": 0.7, "grad_norm": 1.7676879298770152, "learning_rate": 2.1928439251043278e-06, "loss": 0.2563, "step": 11116 }, { "epoch": 0.7, "grad_norm": 2.734980177494873, "learning_rate": 2.1920011701916337e-06, "loss": 0.2686, "step": 11117 }, { "epoch": 0.7, "grad_norm": 4.030139485054264, "learning_rate": 2.1911585317877986e-06, "loss": 0.2384, "step": 11118 }, { "epoch": 0.7, "grad_norm": 2.232825620521009, "learning_rate": 2.1903160099277835e-06, "loss": 0.2612, "step": 11119 }, { "epoch": 0.7, "grad_norm": 1.6215912547677702, "learning_rate": 2.1894736046465482e-06, "loss": 0.2608, "step": 11120 }, { "epoch": 0.7, "grad_norm": 1.8951279261507745, "learning_rate": 2.1886313159790463e-06, "loss": 0.2862, "step": 11121 }, { "epoch": 0.7, "grad_norm": 2.1648138596848687, "learning_rate": 2.187789143960225e-06, "loss": 0.2668, "step": 11122 }, { "epoch": 0.7, "grad_norm": 3.05366253846743, "learning_rate": 2.186947088625025e-06, "loss": 0.2593, "step": 11123 }, { "epoch": 0.7, "grad_norm": 0.621587619191963, "learning_rate": 2.1861051500083886e-06, "loss": 0.4774, "step": 11124 }, { "epoch": 0.7, "grad_norm": 7.862334799297239, "learning_rate": 2.185263328145248e-06, "loss": 0.267, "step": 11125 }, { "epoch": 0.7, "grad_norm": 0.6372825072994112, "learning_rate": 2.184421623070531e-06, "loss": 0.4961, "step": 11126 }, { "epoch": 0.7, "grad_norm": 5.111011552477857, "learning_rate": 2.1835800348191604e-06, "loss": 0.2583, "step": 11127 }, { "epoch": 0.7, "grad_norm": 0.5497573113746697, "learning_rate": 2.182738563426057e-06, "loss": 0.4468, "step": 11128 }, { "epoch": 0.7, "grad_norm": 1.9332307363591024, "learning_rate": 2.181897208926136e-06, "loss": 0.2603, "step": 11129 }, { "epoch": 0.7, "grad_norm": 1.3140671813940632, "learning_rate": 2.1810559713543052e-06, "loss": 0.2479, "step": 11130 }, { "epoch": 0.7, "grad_norm": 1.5273597655369189, "learning_rate": 2.1802148507454675e-06, "loss": 0.2669, "step": 11131 }, { "epoch": 0.7, "grad_norm": 1.6823541823304715, "learning_rate": 2.1793738471345253e-06, "loss": 0.2507, "step": 11132 }, { "epoch": 0.7, "grad_norm": 1.5571655208137851, "learning_rate": 2.1785329605563733e-06, "loss": 0.2642, "step": 11133 }, { "epoch": 0.7, "grad_norm": 3.2515328013997116, "learning_rate": 2.1776921910458982e-06, "loss": 0.2714, "step": 11134 }, { "epoch": 0.7, "grad_norm": 2.2716633686626753, "learning_rate": 2.1768515386379886e-06, "loss": 0.2675, "step": 11135 }, { "epoch": 0.7, "grad_norm": 2.7139707947096543, "learning_rate": 2.1760110033675215e-06, "loss": 0.2651, "step": 11136 }, { "epoch": 0.7, "grad_norm": 3.818952731486872, "learning_rate": 2.175170585269375e-06, "loss": 0.2651, "step": 11137 }, { "epoch": 0.7, "grad_norm": 2.7930437580722884, "learning_rate": 2.174330284378419e-06, "loss": 0.2592, "step": 11138 }, { "epoch": 0.7, "grad_norm": 1.488512840589353, "learning_rate": 2.1734901007295166e-06, "loss": 0.263, "step": 11139 }, { "epoch": 0.7, "grad_norm": 3.3384207012645537, "learning_rate": 2.172650034357532e-06, "loss": 0.3014, "step": 11140 }, { "epoch": 0.7, "grad_norm": 1.7797887466403626, "learning_rate": 2.1718100852973193e-06, "loss": 0.2636, "step": 11141 }, { "epoch": 0.7, "grad_norm": 2.2366899928859194, "learning_rate": 2.1709702535837286e-06, "loss": 0.263, "step": 11142 }, { "epoch": 0.7, "grad_norm": 1.8869791297124512, "learning_rate": 2.1701305392516083e-06, "loss": 0.2718, "step": 11143 }, { "epoch": 0.7, "grad_norm": 1.627419730919339, "learning_rate": 2.169290942335796e-06, "loss": 0.2685, "step": 11144 }, { "epoch": 0.7, "grad_norm": 3.9933536815401744, "learning_rate": 2.1684514628711327e-06, "loss": 0.2591, "step": 11145 }, { "epoch": 0.7, "grad_norm": 0.585672142831425, "learning_rate": 2.167612100892448e-06, "loss": 0.4837, "step": 11146 }, { "epoch": 0.7, "grad_norm": 1.9656785182238308, "learning_rate": 2.166772856434566e-06, "loss": 0.2734, "step": 11147 }, { "epoch": 0.7, "grad_norm": 1.689638391273336, "learning_rate": 2.1659337295323117e-06, "loss": 0.2667, "step": 11148 }, { "epoch": 0.7, "grad_norm": 1.6796494381982516, "learning_rate": 2.1650947202204998e-06, "loss": 0.2477, "step": 11149 }, { "epoch": 0.7, "grad_norm": 1.486513243309083, "learning_rate": 2.164255828533945e-06, "loss": 0.2616, "step": 11150 }, { "epoch": 0.7, "grad_norm": 3.1551226352039694, "learning_rate": 2.1634170545074525e-06, "loss": 0.2679, "step": 11151 }, { "epoch": 0.7, "grad_norm": 2.074976292114402, "learning_rate": 2.162578398175823e-06, "loss": 0.2644, "step": 11152 }, { "epoch": 0.7, "grad_norm": 2.784326610502005, "learning_rate": 2.1617398595738575e-06, "loss": 0.2662, "step": 11153 }, { "epoch": 0.7, "grad_norm": 2.0804451457232425, "learning_rate": 2.1609014387363463e-06, "loss": 0.253, "step": 11154 }, { "epoch": 0.7, "grad_norm": 1.8393783950324805, "learning_rate": 2.1600631356980755e-06, "loss": 0.2741, "step": 11155 }, { "epoch": 0.7, "grad_norm": 3.8965425607078283, "learning_rate": 2.1592249504938317e-06, "loss": 0.2648, "step": 11156 }, { "epoch": 0.7, "grad_norm": 3.13709796683566, "learning_rate": 2.1583868831583883e-06, "loss": 0.3057, "step": 11157 }, { "epoch": 0.7, "grad_norm": 1.521775100929573, "learning_rate": 2.1575489337265226e-06, "loss": 0.2547, "step": 11158 }, { "epoch": 0.7, "grad_norm": 2.2896605781529495, "learning_rate": 2.156711102233e-06, "loss": 0.2598, "step": 11159 }, { "epoch": 0.7, "grad_norm": 3.0154422851143377, "learning_rate": 2.155873388712582e-06, "loss": 0.2795, "step": 11160 }, { "epoch": 0.7, "grad_norm": 2.2864212875100796, "learning_rate": 2.155035793200031e-06, "loss": 0.2646, "step": 11161 }, { "epoch": 0.7, "grad_norm": 1.6538919218949895, "learning_rate": 2.1541983157300983e-06, "loss": 0.2664, "step": 11162 }, { "epoch": 0.7, "grad_norm": 4.097220743349051, "learning_rate": 2.15336095633753e-06, "loss": 0.2788, "step": 11163 }, { "epoch": 0.7, "grad_norm": 1.620666279732925, "learning_rate": 2.152523715057072e-06, "loss": 0.2747, "step": 11164 }, { "epoch": 0.7, "grad_norm": 3.1825001412007596, "learning_rate": 2.151686591923465e-06, "loss": 0.2903, "step": 11165 }, { "epoch": 0.7, "grad_norm": 2.634309417557884, "learning_rate": 2.1508495869714395e-06, "loss": 0.2575, "step": 11166 }, { "epoch": 0.7, "grad_norm": 1.7076290668926548, "learning_rate": 2.1500127002357264e-06, "loss": 0.2708, "step": 11167 }, { "epoch": 0.7, "grad_norm": 1.545814349863931, "learning_rate": 2.1491759317510456e-06, "loss": 0.2598, "step": 11168 }, { "epoch": 0.7, "grad_norm": 1.8192730287147911, "learning_rate": 2.148339281552122e-06, "loss": 0.2671, "step": 11169 }, { "epoch": 0.7, "grad_norm": 0.5983696864805788, "learning_rate": 2.147502749673666e-06, "loss": 0.4713, "step": 11170 }, { "epoch": 0.7, "grad_norm": 2.4567805993697562, "learning_rate": 2.146666336150386e-06, "loss": 0.2584, "step": 11171 }, { "epoch": 0.7, "grad_norm": 2.7143646039687903, "learning_rate": 2.1458300410169875e-06, "loss": 0.2721, "step": 11172 }, { "epoch": 0.7, "grad_norm": 1.5423343491809198, "learning_rate": 2.1449938643081717e-06, "loss": 0.27, "step": 11173 }, { "epoch": 0.7, "grad_norm": 5.500103176962945, "learning_rate": 2.1441578060586322e-06, "loss": 0.2648, "step": 11174 }, { "epoch": 0.7, "grad_norm": 1.8847348174787146, "learning_rate": 2.1433218663030576e-06, "loss": 0.2666, "step": 11175 }, { "epoch": 0.7, "grad_norm": 1.5373001390888033, "learning_rate": 2.142486045076131e-06, "loss": 0.2703, "step": 11176 }, { "epoch": 0.7, "grad_norm": 1.4707709473272812, "learning_rate": 2.141650342412536e-06, "loss": 0.2726, "step": 11177 }, { "epoch": 0.7, "grad_norm": 1.477765128720051, "learning_rate": 2.1408147583469446e-06, "loss": 0.2521, "step": 11178 }, { "epoch": 0.7, "grad_norm": 2.38179731331009, "learning_rate": 2.139979292914026e-06, "loss": 0.2706, "step": 11179 }, { "epoch": 0.7, "grad_norm": 2.201913602413681, "learning_rate": 2.1391439461484463e-06, "loss": 0.2605, "step": 11180 }, { "epoch": 0.7, "grad_norm": 13.54492519426392, "learning_rate": 2.1383087180848685e-06, "loss": 0.2594, "step": 11181 }, { "epoch": 0.7, "grad_norm": 1.6837664152401797, "learning_rate": 2.137473608757944e-06, "loss": 0.2779, "step": 11182 }, { "epoch": 0.7, "grad_norm": 5.5342625439670545, "learning_rate": 2.1366386182023237e-06, "loss": 0.2605, "step": 11183 }, { "epoch": 0.7, "grad_norm": 2.238471559858704, "learning_rate": 2.1358037464526516e-06, "loss": 0.2699, "step": 11184 }, { "epoch": 0.7, "grad_norm": 4.21358807675687, "learning_rate": 2.1349689935435715e-06, "loss": 0.2727, "step": 11185 }, { "epoch": 0.7, "grad_norm": 3.2865253666288954, "learning_rate": 2.1341343595097163e-06, "loss": 0.3045, "step": 11186 }, { "epoch": 0.7, "grad_norm": 2.5527068162802222, "learning_rate": 2.133299844385715e-06, "loss": 0.2666, "step": 11187 }, { "epoch": 0.7, "grad_norm": 2.3260381470819143, "learning_rate": 2.1324654482061953e-06, "loss": 0.2555, "step": 11188 }, { "epoch": 0.7, "grad_norm": 0.6032118521629376, "learning_rate": 2.131631171005779e-06, "loss": 0.4699, "step": 11189 }, { "epoch": 0.7, "grad_norm": 2.7199801215884585, "learning_rate": 2.13079701281908e-06, "loss": 0.2771, "step": 11190 }, { "epoch": 0.7, "grad_norm": 1.7997946886436644, "learning_rate": 2.129962973680708e-06, "loss": 0.2753, "step": 11191 }, { "epoch": 0.7, "grad_norm": 3.3796796507655373, "learning_rate": 2.1291290536252716e-06, "loss": 0.2567, "step": 11192 }, { "epoch": 0.7, "grad_norm": 4.750675715309363, "learning_rate": 2.12829525268737e-06, "loss": 0.2799, "step": 11193 }, { "epoch": 0.7, "grad_norm": 2.40052063887953, "learning_rate": 2.1274615709015972e-06, "loss": 0.2622, "step": 11194 }, { "epoch": 0.7, "grad_norm": 3.4484046836847706, "learning_rate": 2.126628008302548e-06, "loss": 0.2883, "step": 11195 }, { "epoch": 0.7, "grad_norm": 2.1333827805240704, "learning_rate": 2.1257945649248048e-06, "loss": 0.2729, "step": 11196 }, { "epoch": 0.7, "grad_norm": 1.5699763365801473, "learning_rate": 2.1249612408029516e-06, "loss": 0.246, "step": 11197 }, { "epoch": 0.7, "grad_norm": 2.9683026273894892, "learning_rate": 2.124128035971563e-06, "loss": 0.2814, "step": 11198 }, { "epoch": 0.7, "grad_norm": 1.8845964088070704, "learning_rate": 2.123294950465209e-06, "loss": 0.268, "step": 11199 }, { "epoch": 0.7, "grad_norm": 1.4046556680794728, "learning_rate": 2.122461984318459e-06, "loss": 0.2496, "step": 11200 }, { "epoch": 0.7, "grad_norm": 2.362065342879254, "learning_rate": 2.121629137565872e-06, "loss": 0.2524, "step": 11201 }, { "epoch": 0.7, "grad_norm": 1.8259556858118575, "learning_rate": 2.1207964102420032e-06, "loss": 0.2635, "step": 11202 }, { "epoch": 0.7, "grad_norm": 2.946762179583013, "learning_rate": 2.119963802381407e-06, "loss": 0.2573, "step": 11203 }, { "epoch": 0.7, "grad_norm": 1.479033609249847, "learning_rate": 2.119131314018626e-06, "loss": 0.2742, "step": 11204 }, { "epoch": 0.7, "grad_norm": 0.5973115487074803, "learning_rate": 2.1182989451882056e-06, "loss": 0.4903, "step": 11205 }, { "epoch": 0.7, "grad_norm": 1.5642157796981417, "learning_rate": 2.117466695924681e-06, "loss": 0.2651, "step": 11206 }, { "epoch": 0.7, "grad_norm": 1.257603880333518, "learning_rate": 2.116634566262581e-06, "loss": 0.242, "step": 11207 }, { "epoch": 0.7, "grad_norm": 2.3102517941324026, "learning_rate": 2.115802556236436e-06, "loss": 0.2617, "step": 11208 }, { "epoch": 0.7, "grad_norm": 2.6392256961189067, "learning_rate": 2.1149706658807646e-06, "loss": 0.2691, "step": 11209 }, { "epoch": 0.7, "grad_norm": 1.976684480344322, "learning_rate": 2.1141388952300855e-06, "loss": 0.2957, "step": 11210 }, { "epoch": 0.71, "grad_norm": 2.8422656535467308, "learning_rate": 2.1133072443189106e-06, "loss": 0.259, "step": 11211 }, { "epoch": 0.71, "grad_norm": 1.367002481481189, "learning_rate": 2.112475713181743e-06, "loss": 0.2685, "step": 11212 }, { "epoch": 0.71, "grad_norm": 2.5502750509184016, "learning_rate": 2.1116443018530887e-06, "loss": 0.2928, "step": 11213 }, { "epoch": 0.71, "grad_norm": 2.695723953025711, "learning_rate": 2.110813010367443e-06, "loss": 0.2697, "step": 11214 }, { "epoch": 0.71, "grad_norm": 2.120399936015536, "learning_rate": 2.109981838759295e-06, "loss": 0.2632, "step": 11215 }, { "epoch": 0.71, "grad_norm": 0.6510816232567276, "learning_rate": 2.109150787063134e-06, "loss": 0.4772, "step": 11216 }, { "epoch": 0.71, "grad_norm": 1.6061274158898775, "learning_rate": 2.108319855313443e-06, "loss": 0.2485, "step": 11217 }, { "epoch": 0.71, "grad_norm": 2.1842445092804588, "learning_rate": 2.1074890435446976e-06, "loss": 0.2538, "step": 11218 }, { "epoch": 0.71, "grad_norm": 1.5670237583121522, "learning_rate": 2.1066583517913696e-06, "loss": 0.2578, "step": 11219 }, { "epoch": 0.71, "grad_norm": 2.618919175699726, "learning_rate": 2.105827780087924e-06, "loss": 0.2719, "step": 11220 }, { "epoch": 0.71, "grad_norm": 8.536684002560847, "learning_rate": 2.104997328468826e-06, "loss": 0.2759, "step": 11221 }, { "epoch": 0.71, "grad_norm": 0.5864349389049004, "learning_rate": 2.104166996968531e-06, "loss": 0.4442, "step": 11222 }, { "epoch": 0.71, "grad_norm": 25.57374727550384, "learning_rate": 2.10333678562149e-06, "loss": 0.2453, "step": 11223 }, { "epoch": 0.71, "grad_norm": 2.1942965238801406, "learning_rate": 2.1025066944621498e-06, "loss": 0.2526, "step": 11224 }, { "epoch": 0.71, "grad_norm": 0.5604876554963547, "learning_rate": 2.101676723524955e-06, "loss": 0.4653, "step": 11225 }, { "epoch": 0.71, "grad_norm": 1.538286455308112, "learning_rate": 2.1008468728443416e-06, "loss": 0.2652, "step": 11226 }, { "epoch": 0.71, "grad_norm": 4.305232031863097, "learning_rate": 2.1000171424547406e-06, "loss": 0.2627, "step": 11227 }, { "epoch": 0.71, "grad_norm": 1.5268653904148923, "learning_rate": 2.0991875323905773e-06, "loss": 0.2597, "step": 11228 }, { "epoch": 0.71, "grad_norm": 2.4579551686758565, "learning_rate": 2.0983580426862777e-06, "loss": 0.2527, "step": 11229 }, { "epoch": 0.71, "grad_norm": 1.6436004300885614, "learning_rate": 2.0975286733762575e-06, "loss": 0.2676, "step": 11230 }, { "epoch": 0.71, "grad_norm": 2.0157942786307443, "learning_rate": 2.096699424494925e-06, "loss": 0.2687, "step": 11231 }, { "epoch": 0.71, "grad_norm": 2.089680836571682, "learning_rate": 2.0958702960766907e-06, "loss": 0.275, "step": 11232 }, { "epoch": 0.71, "grad_norm": 2.4347347039415697, "learning_rate": 2.0950412881559584e-06, "loss": 0.2839, "step": 11233 }, { "epoch": 0.71, "grad_norm": 1.8412385516769503, "learning_rate": 2.094212400767122e-06, "loss": 0.2751, "step": 11234 }, { "epoch": 0.71, "grad_norm": 2.072658839230748, "learning_rate": 2.093383633944575e-06, "loss": 0.2532, "step": 11235 }, { "epoch": 0.71, "grad_norm": 2.642079530520852, "learning_rate": 2.092554987722702e-06, "loss": 0.2385, "step": 11236 }, { "epoch": 0.71, "grad_norm": 1.8786982703045854, "learning_rate": 2.091726462135888e-06, "loss": 0.278, "step": 11237 }, { "epoch": 0.71, "grad_norm": 1.8000350325657724, "learning_rate": 2.0908980572185093e-06, "loss": 0.2574, "step": 11238 }, { "epoch": 0.71, "grad_norm": 2.059791199192832, "learning_rate": 2.0900697730049353e-06, "loss": 0.2777, "step": 11239 }, { "epoch": 0.71, "grad_norm": 2.4467078106599707, "learning_rate": 2.089241609529535e-06, "loss": 0.2698, "step": 11240 }, { "epoch": 0.71, "grad_norm": 2.5454674748671775, "learning_rate": 2.0884135668266725e-06, "loss": 0.2583, "step": 11241 }, { "epoch": 0.71, "grad_norm": 1.5129349252160442, "learning_rate": 2.0875856449307026e-06, "loss": 0.2698, "step": 11242 }, { "epoch": 0.71, "grad_norm": 2.078105517035438, "learning_rate": 2.0867578438759766e-06, "loss": 0.2515, "step": 11243 }, { "epoch": 0.71, "grad_norm": 1.8883553602994059, "learning_rate": 2.085930163696841e-06, "loss": 0.2811, "step": 11244 }, { "epoch": 0.71, "grad_norm": 1.6460025422701143, "learning_rate": 2.0851026044276405e-06, "loss": 0.2616, "step": 11245 }, { "epoch": 0.71, "grad_norm": 1.6729825269022631, "learning_rate": 2.0842751661027087e-06, "loss": 0.2549, "step": 11246 }, { "epoch": 0.71, "grad_norm": 2.219001016374269, "learning_rate": 2.0834478487563815e-06, "loss": 0.2702, "step": 11247 }, { "epoch": 0.71, "grad_norm": 1.6866832792573418, "learning_rate": 2.082620652422981e-06, "loss": 0.2761, "step": 11248 }, { "epoch": 0.71, "grad_norm": 2.055320779195588, "learning_rate": 2.081793577136833e-06, "loss": 0.2475, "step": 11249 }, { "epoch": 0.71, "grad_norm": 1.9123174739920292, "learning_rate": 2.0809666229322533e-06, "loss": 0.2486, "step": 11250 }, { "epoch": 0.71, "grad_norm": 1.997780556288267, "learning_rate": 2.0801397898435534e-06, "loss": 0.2686, "step": 11251 }, { "epoch": 0.71, "grad_norm": 2.1026974105828473, "learning_rate": 2.0793130779050374e-06, "loss": 0.2579, "step": 11252 }, { "epoch": 0.71, "grad_norm": 1.8078411799573328, "learning_rate": 2.078486487151012e-06, "loss": 0.26, "step": 11253 }, { "epoch": 0.71, "grad_norm": 2.882044324375287, "learning_rate": 2.077660017615769e-06, "loss": 0.2412, "step": 11254 }, { "epoch": 0.71, "grad_norm": 2.2613193331472154, "learning_rate": 2.076833669333605e-06, "loss": 0.2832, "step": 11255 }, { "epoch": 0.71, "grad_norm": 4.408733614715769, "learning_rate": 2.0760074423388015e-06, "loss": 0.2847, "step": 11256 }, { "epoch": 0.71, "grad_norm": 2.70585575241767, "learning_rate": 2.075181336665645e-06, "loss": 0.2561, "step": 11257 }, { "epoch": 0.71, "grad_norm": 2.8369894987381366, "learning_rate": 2.0743553523484107e-06, "loss": 0.2864, "step": 11258 }, { "epoch": 0.71, "grad_norm": 1.9047531515855274, "learning_rate": 2.0735294894213687e-06, "loss": 0.2486, "step": 11259 }, { "epoch": 0.71, "grad_norm": 1.8312429988424974, "learning_rate": 2.072703747918784e-06, "loss": 0.277, "step": 11260 }, { "epoch": 0.71, "grad_norm": 1.84859456575309, "learning_rate": 2.0718781278749206e-06, "loss": 0.2704, "step": 11261 }, { "epoch": 0.71, "grad_norm": 1.4552664135300197, "learning_rate": 2.0710526293240367e-06, "loss": 0.2571, "step": 11262 }, { "epoch": 0.71, "grad_norm": 6.2689442857165725, "learning_rate": 2.0702272523003815e-06, "loss": 0.2815, "step": 11263 }, { "epoch": 0.71, "grad_norm": 3.4942252443609294, "learning_rate": 2.0694019968381993e-06, "loss": 0.2756, "step": 11264 }, { "epoch": 0.71, "grad_norm": 2.261708605292174, "learning_rate": 2.068576862971736e-06, "loss": 0.2721, "step": 11265 }, { "epoch": 0.71, "grad_norm": 3.1416618633960653, "learning_rate": 2.0677518507352246e-06, "loss": 0.2548, "step": 11266 }, { "epoch": 0.71, "grad_norm": 3.741672270038633, "learning_rate": 2.066926960162896e-06, "loss": 0.248, "step": 11267 }, { "epoch": 0.71, "grad_norm": 3.3673046110835774, "learning_rate": 2.066102191288979e-06, "loss": 0.2728, "step": 11268 }, { "epoch": 0.71, "grad_norm": 1.8066562817407386, "learning_rate": 2.065277544147692e-06, "loss": 0.2591, "step": 11269 }, { "epoch": 0.71, "grad_norm": 1.6442037120335296, "learning_rate": 2.0644530187732538e-06, "loss": 0.2759, "step": 11270 }, { "epoch": 0.71, "grad_norm": 2.7962337544630333, "learning_rate": 2.0636286151998737e-06, "loss": 0.2592, "step": 11271 }, { "epoch": 0.71, "grad_norm": 3.7439235779197984, "learning_rate": 2.0628043334617565e-06, "loss": 0.2651, "step": 11272 }, { "epoch": 0.71, "grad_norm": 1.802187952148651, "learning_rate": 2.061980173593106e-06, "loss": 0.2611, "step": 11273 }, { "epoch": 0.71, "grad_norm": 2.046964530052051, "learning_rate": 2.061156135628117e-06, "loss": 0.2458, "step": 11274 }, { "epoch": 0.71, "grad_norm": 1.7023506125150913, "learning_rate": 2.060332219600978e-06, "loss": 0.2495, "step": 11275 }, { "epoch": 0.71, "grad_norm": 1.4510190663030633, "learning_rate": 2.059508425545876e-06, "loss": 0.2583, "step": 11276 }, { "epoch": 0.71, "grad_norm": 0.5984112980284597, "learning_rate": 2.058684753496995e-06, "loss": 0.4544, "step": 11277 }, { "epoch": 0.71, "grad_norm": 1.718038916639918, "learning_rate": 2.0578612034885083e-06, "loss": 0.2435, "step": 11278 }, { "epoch": 0.71, "grad_norm": 2.1972264636950234, "learning_rate": 2.057037775554585e-06, "loss": 0.2464, "step": 11279 }, { "epoch": 0.71, "grad_norm": 1.3567895580061946, "learning_rate": 2.056214469729391e-06, "loss": 0.2606, "step": 11280 }, { "epoch": 0.71, "grad_norm": 1.6567632067021063, "learning_rate": 2.055391286047088e-06, "loss": 0.2594, "step": 11281 }, { "epoch": 0.71, "grad_norm": 2.132715962089489, "learning_rate": 2.0545682245418314e-06, "loss": 0.2676, "step": 11282 }, { "epoch": 0.71, "grad_norm": 1.8983599977686207, "learning_rate": 2.053745285247769e-06, "loss": 0.2495, "step": 11283 }, { "epoch": 0.71, "grad_norm": 2.2749586217527535, "learning_rate": 2.0529224681990477e-06, "loss": 0.2441, "step": 11284 }, { "epoch": 0.71, "grad_norm": 2.194913080985443, "learning_rate": 2.0520997734298096e-06, "loss": 0.2555, "step": 11285 }, { "epoch": 0.71, "grad_norm": 1.6124897014250361, "learning_rate": 2.0512772009741883e-06, "loss": 0.2616, "step": 11286 }, { "epoch": 0.71, "grad_norm": 3.539949427933494, "learning_rate": 2.050454750866313e-06, "loss": 0.2718, "step": 11287 }, { "epoch": 0.71, "grad_norm": 2.7385771706836386, "learning_rate": 2.0496324231403074e-06, "loss": 0.2653, "step": 11288 }, { "epoch": 0.71, "grad_norm": 1.6378637675240364, "learning_rate": 2.048810217830295e-06, "loss": 0.2604, "step": 11289 }, { "epoch": 0.71, "grad_norm": 2.0440401002533757, "learning_rate": 2.0479881349703885e-06, "loss": 0.2511, "step": 11290 }, { "epoch": 0.71, "grad_norm": 2.6730871924817556, "learning_rate": 2.047166174594696e-06, "loss": 0.2533, "step": 11291 }, { "epoch": 0.71, "grad_norm": 1.8096736846885213, "learning_rate": 2.0463443367373232e-06, "loss": 0.2552, "step": 11292 }, { "epoch": 0.71, "grad_norm": 1.8010067054137464, "learning_rate": 2.045522621432372e-06, "loss": 0.2632, "step": 11293 }, { "epoch": 0.71, "grad_norm": 4.317427290002973, "learning_rate": 2.0447010287139357e-06, "loss": 0.2924, "step": 11294 }, { "epoch": 0.71, "grad_norm": 1.7938139433949192, "learning_rate": 2.0438795586161027e-06, "loss": 0.2562, "step": 11295 }, { "epoch": 0.71, "grad_norm": 1.696871152362575, "learning_rate": 2.0430582111729553e-06, "loss": 0.2522, "step": 11296 }, { "epoch": 0.71, "grad_norm": 3.303049992480189, "learning_rate": 2.0422369864185764e-06, "loss": 0.2691, "step": 11297 }, { "epoch": 0.71, "grad_norm": 1.7809763358651913, "learning_rate": 2.0414158843870374e-06, "loss": 0.2572, "step": 11298 }, { "epoch": 0.71, "grad_norm": 1.5836344539530396, "learning_rate": 2.0405949051124098e-06, "loss": 0.2917, "step": 11299 }, { "epoch": 0.71, "grad_norm": 6.499452163795848, "learning_rate": 2.039774048628754e-06, "loss": 0.3023, "step": 11300 }, { "epoch": 0.71, "grad_norm": 2.742859695149067, "learning_rate": 2.0389533149701337e-06, "loss": 0.2683, "step": 11301 }, { "epoch": 0.71, "grad_norm": 1.7488158903285846, "learning_rate": 2.038132704170599e-06, "loss": 0.2764, "step": 11302 }, { "epoch": 0.71, "grad_norm": 1.8710981474400807, "learning_rate": 2.037312216264199e-06, "loss": 0.2616, "step": 11303 }, { "epoch": 0.71, "grad_norm": 1.7558263431775918, "learning_rate": 2.0364918512849763e-06, "loss": 0.2754, "step": 11304 }, { "epoch": 0.71, "grad_norm": 1.3723448692660163, "learning_rate": 2.035671609266972e-06, "loss": 0.2581, "step": 11305 }, { "epoch": 0.71, "grad_norm": 2.655202570536015, "learning_rate": 2.0348514902442163e-06, "loss": 0.2574, "step": 11306 }, { "epoch": 0.71, "grad_norm": 1.4593716364778655, "learning_rate": 2.0340314942507404e-06, "loss": 0.2409, "step": 11307 }, { "epoch": 0.71, "grad_norm": 1.4099536551064702, "learning_rate": 2.0332116213205643e-06, "loss": 0.2351, "step": 11308 }, { "epoch": 0.71, "grad_norm": 1.631861540894847, "learning_rate": 2.0323918714877094e-06, "loss": 0.2514, "step": 11309 }, { "epoch": 0.71, "grad_norm": 1.8491413432317427, "learning_rate": 2.031572244786187e-06, "loss": 0.2756, "step": 11310 }, { "epoch": 0.71, "grad_norm": 3.418752229745969, "learning_rate": 2.0307527412500044e-06, "loss": 0.2906, "step": 11311 }, { "epoch": 0.71, "grad_norm": 3.559290461037333, "learning_rate": 2.0299333609131635e-06, "loss": 0.2817, "step": 11312 }, { "epoch": 0.71, "grad_norm": 4.14333564023226, "learning_rate": 2.0291141038096616e-06, "loss": 0.2633, "step": 11313 }, { "epoch": 0.71, "grad_norm": 1.988662410341561, "learning_rate": 2.028294969973495e-06, "loss": 0.2566, "step": 11314 }, { "epoch": 0.71, "grad_norm": 2.276707696834116, "learning_rate": 2.0274759594386485e-06, "loss": 0.2485, "step": 11315 }, { "epoch": 0.71, "grad_norm": 0.6027524435209456, "learning_rate": 2.0266570722391023e-06, "loss": 0.4605, "step": 11316 }, { "epoch": 0.71, "grad_norm": 1.5611061983899408, "learning_rate": 2.025838308408837e-06, "loss": 0.2797, "step": 11317 }, { "epoch": 0.71, "grad_norm": 1.7898125265811011, "learning_rate": 2.0250196679818234e-06, "loss": 0.2612, "step": 11318 }, { "epoch": 0.71, "grad_norm": 1.8054608946122124, "learning_rate": 2.024201150992028e-06, "loss": 0.274, "step": 11319 }, { "epoch": 0.71, "grad_norm": 1.8249182693775046, "learning_rate": 2.02338275747341e-06, "loss": 0.2681, "step": 11320 }, { "epoch": 0.71, "grad_norm": 4.2086545351873355, "learning_rate": 2.022564487459929e-06, "loss": 0.2592, "step": 11321 }, { "epoch": 0.71, "grad_norm": 2.263025405654816, "learning_rate": 2.0217463409855377e-06, "loss": 0.2577, "step": 11322 }, { "epoch": 0.71, "grad_norm": 1.6013874667329715, "learning_rate": 2.0209283180841803e-06, "loss": 0.2683, "step": 11323 }, { "epoch": 0.71, "grad_norm": 2.716110993432087, "learning_rate": 2.0201104187897975e-06, "loss": 0.2551, "step": 11324 }, { "epoch": 0.71, "grad_norm": 2.1477287728078567, "learning_rate": 2.0192926431363274e-06, "loss": 0.2529, "step": 11325 }, { "epoch": 0.71, "grad_norm": 2.315932900324255, "learning_rate": 2.0184749911576996e-06, "loss": 0.2609, "step": 11326 }, { "epoch": 0.71, "grad_norm": 1.5035421602010106, "learning_rate": 2.017657462887841e-06, "loss": 0.2578, "step": 11327 }, { "epoch": 0.71, "grad_norm": 5.905785262196667, "learning_rate": 2.016840058360667e-06, "loss": 0.2752, "step": 11328 }, { "epoch": 0.71, "grad_norm": 2.764265828420965, "learning_rate": 2.016022777610102e-06, "loss": 0.2609, "step": 11329 }, { "epoch": 0.71, "grad_norm": 2.0082251367652777, "learning_rate": 2.0152056206700516e-06, "loss": 0.2576, "step": 11330 }, { "epoch": 0.71, "grad_norm": 1.9534691166959397, "learning_rate": 2.0143885875744217e-06, "loss": 0.2507, "step": 11331 }, { "epoch": 0.71, "grad_norm": 1.9746059108719314, "learning_rate": 2.013571678357111e-06, "loss": 0.2694, "step": 11332 }, { "epoch": 0.71, "grad_norm": 1.6165579992429837, "learning_rate": 2.0127548930520173e-06, "loss": 0.2777, "step": 11333 }, { "epoch": 0.71, "grad_norm": 7.443260674923377, "learning_rate": 2.0119382316930293e-06, "loss": 0.265, "step": 11334 }, { "epoch": 0.71, "grad_norm": 1.9760462315592937, "learning_rate": 2.011121694314031e-06, "loss": 0.2701, "step": 11335 }, { "epoch": 0.71, "grad_norm": 4.320175705161023, "learning_rate": 2.0103052809488994e-06, "loss": 0.2614, "step": 11336 }, { "epoch": 0.71, "grad_norm": 2.020051859425412, "learning_rate": 2.009488991631515e-06, "loss": 0.2622, "step": 11337 }, { "epoch": 0.71, "grad_norm": 2.721175284752356, "learning_rate": 2.0086728263957446e-06, "loss": 0.2686, "step": 11338 }, { "epoch": 0.71, "grad_norm": 1.6424330441567097, "learning_rate": 2.007856785275451e-06, "loss": 0.2632, "step": 11339 }, { "epoch": 0.71, "grad_norm": 2.101973955010961, "learning_rate": 2.0070408683044927e-06, "loss": 0.2554, "step": 11340 }, { "epoch": 0.71, "grad_norm": 1.7957450911433073, "learning_rate": 2.0062250755167273e-06, "loss": 0.2608, "step": 11341 }, { "epoch": 0.71, "grad_norm": 1.434905798012273, "learning_rate": 2.005409406946e-06, "loss": 0.2681, "step": 11342 }, { "epoch": 0.71, "grad_norm": 4.055028020870884, "learning_rate": 2.0045938626261545e-06, "loss": 0.2695, "step": 11343 }, { "epoch": 0.71, "grad_norm": 1.4087870325594685, "learning_rate": 2.0037784425910297e-06, "loss": 0.2586, "step": 11344 }, { "epoch": 0.71, "grad_norm": 2.3772419911175398, "learning_rate": 2.0029631468744608e-06, "loss": 0.2676, "step": 11345 }, { "epoch": 0.71, "grad_norm": 1.628980658008442, "learning_rate": 2.0021479755102746e-06, "loss": 0.2662, "step": 11346 }, { "epoch": 0.71, "grad_norm": 2.73750429787218, "learning_rate": 2.0013329285322935e-06, "loss": 0.2606, "step": 11347 }, { "epoch": 0.71, "grad_norm": 2.216705420367553, "learning_rate": 2.000518005974334e-06, "loss": 0.2732, "step": 11348 }, { "epoch": 0.71, "grad_norm": 1.5501045833967493, "learning_rate": 1.999703207870211e-06, "loss": 0.2518, "step": 11349 }, { "epoch": 0.71, "grad_norm": 2.345120807856687, "learning_rate": 1.998888534253732e-06, "loss": 0.2445, "step": 11350 }, { "epoch": 0.71, "grad_norm": 0.5801532472428638, "learning_rate": 1.9980739851586962e-06, "loss": 0.467, "step": 11351 }, { "epoch": 0.71, "grad_norm": 1.4128805356799254, "learning_rate": 1.9972595606189026e-06, "loss": 0.2514, "step": 11352 }, { "epoch": 0.71, "grad_norm": 1.8872851415395644, "learning_rate": 1.996445260668145e-06, "loss": 0.2667, "step": 11353 }, { "epoch": 0.71, "grad_norm": 1.51318997529435, "learning_rate": 1.995631085340209e-06, "loss": 0.2538, "step": 11354 }, { "epoch": 0.71, "grad_norm": 1.9785116508051475, "learning_rate": 1.994817034668875e-06, "loss": 0.2547, "step": 11355 }, { "epoch": 0.71, "grad_norm": 2.1575406168232623, "learning_rate": 1.9940031086879184e-06, "loss": 0.2498, "step": 11356 }, { "epoch": 0.71, "grad_norm": 2.4897315642909783, "learning_rate": 1.9931893074311145e-06, "loss": 0.272, "step": 11357 }, { "epoch": 0.71, "grad_norm": 1.7826147316133816, "learning_rate": 1.9923756309322246e-06, "loss": 0.2541, "step": 11358 }, { "epoch": 0.71, "grad_norm": 2.3455543589097223, "learning_rate": 1.9915620792250133e-06, "loss": 0.2675, "step": 11359 }, { "epoch": 0.71, "grad_norm": 0.5774023758944333, "learning_rate": 1.9907486523432336e-06, "loss": 0.4495, "step": 11360 }, { "epoch": 0.71, "grad_norm": 1.6034605147044834, "learning_rate": 1.989935350320639e-06, "loss": 0.265, "step": 11361 }, { "epoch": 0.71, "grad_norm": 2.1300232647843877, "learning_rate": 1.9891221731909733e-06, "loss": 0.268, "step": 11362 }, { "epoch": 0.71, "grad_norm": 1.669427878905994, "learning_rate": 1.988309120987977e-06, "loss": 0.2664, "step": 11363 }, { "epoch": 0.71, "grad_norm": 1.704242222687707, "learning_rate": 1.987496193745382e-06, "loss": 0.2603, "step": 11364 }, { "epoch": 0.71, "grad_norm": 1.579530652329033, "learning_rate": 1.986683391496923e-06, "loss": 0.2534, "step": 11365 }, { "epoch": 0.71, "grad_norm": 1.7661306057677901, "learning_rate": 1.9858707142763205e-06, "loss": 0.2562, "step": 11366 }, { "epoch": 0.71, "grad_norm": 1.7420720500346207, "learning_rate": 1.985058162117297e-06, "loss": 0.2624, "step": 11367 }, { "epoch": 0.71, "grad_norm": 0.5833631407263254, "learning_rate": 1.9842457350535642e-06, "loss": 0.5086, "step": 11368 }, { "epoch": 0.71, "grad_norm": 0.5620309514403976, "learning_rate": 1.9834334331188345e-06, "loss": 0.4662, "step": 11369 }, { "epoch": 0.72, "grad_norm": 1.294759900570698, "learning_rate": 1.9826212563468095e-06, "loss": 0.2521, "step": 11370 }, { "epoch": 0.72, "grad_norm": 2.5657930904808723, "learning_rate": 1.981809204771188e-06, "loss": 0.2622, "step": 11371 }, { "epoch": 0.72, "grad_norm": 3.3102104453249055, "learning_rate": 1.9809972784256614e-06, "loss": 0.2722, "step": 11372 }, { "epoch": 0.72, "grad_norm": 2.113928762991423, "learning_rate": 1.9801854773439206e-06, "loss": 0.2465, "step": 11373 }, { "epoch": 0.72, "grad_norm": 2.9706746452481925, "learning_rate": 1.9793738015596497e-06, "loss": 0.2603, "step": 11374 }, { "epoch": 0.72, "grad_norm": 2.2277375849547645, "learning_rate": 1.978562251106525e-06, "loss": 0.2622, "step": 11375 }, { "epoch": 0.72, "grad_norm": 2.907641102352029, "learning_rate": 1.977750826018217e-06, "loss": 0.2648, "step": 11376 }, { "epoch": 0.72, "grad_norm": 3.592571779688514, "learning_rate": 1.9769395263283973e-06, "loss": 0.2776, "step": 11377 }, { "epoch": 0.72, "grad_norm": 2.2381136243548942, "learning_rate": 1.9761283520707268e-06, "loss": 0.2761, "step": 11378 }, { "epoch": 0.72, "grad_norm": 1.9940143447905285, "learning_rate": 1.9753173032788616e-06, "loss": 0.2401, "step": 11379 }, { "epoch": 0.72, "grad_norm": 2.5298983352048547, "learning_rate": 1.9745063799864506e-06, "loss": 0.2899, "step": 11380 }, { "epoch": 0.72, "grad_norm": 1.8051648505776428, "learning_rate": 1.973695582227147e-06, "loss": 0.27, "step": 11381 }, { "epoch": 0.72, "grad_norm": 1.4991138970649578, "learning_rate": 1.972884910034589e-06, "loss": 0.2431, "step": 11382 }, { "epoch": 0.72, "grad_norm": 2.0111220715082245, "learning_rate": 1.972074363442413e-06, "loss": 0.2606, "step": 11383 }, { "epoch": 0.72, "grad_norm": 2.8018546620806766, "learning_rate": 1.971263942484248e-06, "loss": 0.2494, "step": 11384 }, { "epoch": 0.72, "grad_norm": 4.002055503375998, "learning_rate": 1.9704536471937238e-06, "loss": 0.2641, "step": 11385 }, { "epoch": 0.72, "grad_norm": 1.7357738843295063, "learning_rate": 1.969643477604458e-06, "loss": 0.2574, "step": 11386 }, { "epoch": 0.72, "grad_norm": 2.028767298644523, "learning_rate": 1.9688334337500677e-06, "loss": 0.2617, "step": 11387 }, { "epoch": 0.72, "grad_norm": 1.581514765675958, "learning_rate": 1.968023515664158e-06, "loss": 0.2517, "step": 11388 }, { "epoch": 0.72, "grad_norm": 1.9354875005489085, "learning_rate": 1.967213723380342e-06, "loss": 0.2719, "step": 11389 }, { "epoch": 0.72, "grad_norm": 2.143422083357361, "learning_rate": 1.966404056932215e-06, "loss": 0.2546, "step": 11390 }, { "epoch": 0.72, "grad_norm": 1.7764018934828976, "learning_rate": 1.9655945163533723e-06, "loss": 0.2585, "step": 11391 }, { "epoch": 0.72, "grad_norm": 1.497693339571945, "learning_rate": 1.964785101677401e-06, "loss": 0.2645, "step": 11392 }, { "epoch": 0.72, "grad_norm": 2.2809630448123803, "learning_rate": 1.9639758129378888e-06, "loss": 0.2706, "step": 11393 }, { "epoch": 0.72, "grad_norm": 3.075029666733048, "learning_rate": 1.9631666501684126e-06, "loss": 0.253, "step": 11394 }, { "epoch": 0.72, "grad_norm": 1.643925236239441, "learning_rate": 1.9623576134025447e-06, "loss": 0.2568, "step": 11395 }, { "epoch": 0.72, "grad_norm": 1.7403959416229888, "learning_rate": 1.9615487026738546e-06, "loss": 0.2488, "step": 11396 }, { "epoch": 0.72, "grad_norm": 1.897176321303575, "learning_rate": 1.9607399180159076e-06, "loss": 0.2625, "step": 11397 }, { "epoch": 0.72, "grad_norm": 1.4038872054017089, "learning_rate": 1.9599312594622594e-06, "loss": 0.2656, "step": 11398 }, { "epoch": 0.72, "grad_norm": 2.4292549886742427, "learning_rate": 1.9591227270464635e-06, "loss": 0.2451, "step": 11399 }, { "epoch": 0.72, "grad_norm": 1.6982447805255887, "learning_rate": 1.958314320802065e-06, "loss": 0.2666, "step": 11400 }, { "epoch": 0.72, "grad_norm": 1.4206654848390734, "learning_rate": 1.9575060407626094e-06, "loss": 0.2529, "step": 11401 }, { "epoch": 0.72, "grad_norm": 1.900697119871712, "learning_rate": 1.9566978869616327e-06, "loss": 0.2656, "step": 11402 }, { "epoch": 0.72, "grad_norm": 1.6884534322469396, "learning_rate": 1.9558898594326635e-06, "loss": 0.2742, "step": 11403 }, { "epoch": 0.72, "grad_norm": 2.341516010359561, "learning_rate": 1.9550819582092317e-06, "loss": 0.2589, "step": 11404 }, { "epoch": 0.72, "grad_norm": 1.6031833805313624, "learning_rate": 1.9542741833248597e-06, "loss": 0.2622, "step": 11405 }, { "epoch": 0.72, "grad_norm": 2.1997605954118207, "learning_rate": 1.953466534813062e-06, "loss": 0.2777, "step": 11406 }, { "epoch": 0.72, "grad_norm": 2.261326186835715, "learning_rate": 1.952659012707348e-06, "loss": 0.2638, "step": 11407 }, { "epoch": 0.72, "grad_norm": 5.663279408984441, "learning_rate": 1.9518516170412234e-06, "loss": 0.2684, "step": 11408 }, { "epoch": 0.72, "grad_norm": 1.5113837783844868, "learning_rate": 1.9510443478481905e-06, "loss": 0.2465, "step": 11409 }, { "epoch": 0.72, "grad_norm": 1.8365462034653697, "learning_rate": 1.950237205161741e-06, "loss": 0.2643, "step": 11410 }, { "epoch": 0.72, "grad_norm": 1.4929882173889322, "learning_rate": 1.9494301890153695e-06, "loss": 0.2603, "step": 11411 }, { "epoch": 0.72, "grad_norm": 1.408961938331628, "learning_rate": 1.948623299442556e-06, "loss": 0.2482, "step": 11412 }, { "epoch": 0.72, "grad_norm": 1.737767357578204, "learning_rate": 1.947816536476783e-06, "loss": 0.2648, "step": 11413 }, { "epoch": 0.72, "grad_norm": 2.524209273130116, "learning_rate": 1.9470099001515237e-06, "loss": 0.2671, "step": 11414 }, { "epoch": 0.72, "grad_norm": 2.4741269951647045, "learning_rate": 1.9462033905002457e-06, "loss": 0.2698, "step": 11415 }, { "epoch": 0.72, "grad_norm": 2.2131247086558754, "learning_rate": 1.945397007556412e-06, "loss": 0.2547, "step": 11416 }, { "epoch": 0.72, "grad_norm": 2.4341785772433284, "learning_rate": 1.9445907513534834e-06, "loss": 0.2648, "step": 11417 }, { "epoch": 0.72, "grad_norm": 3.5722984405073626, "learning_rate": 1.94378462192491e-06, "loss": 0.2631, "step": 11418 }, { "epoch": 0.72, "grad_norm": 1.5805557957652592, "learning_rate": 1.942978619304143e-06, "loss": 0.2633, "step": 11419 }, { "epoch": 0.72, "grad_norm": 1.472081491767904, "learning_rate": 1.9421727435246214e-06, "loss": 0.2489, "step": 11420 }, { "epoch": 0.72, "grad_norm": 2.2142139857719663, "learning_rate": 1.941366994619785e-06, "loss": 0.2494, "step": 11421 }, { "epoch": 0.72, "grad_norm": 1.7860826111303763, "learning_rate": 1.9405613726230653e-06, "loss": 0.2556, "step": 11422 }, { "epoch": 0.72, "grad_norm": 2.8046385687067628, "learning_rate": 1.939755877567888e-06, "loss": 0.2595, "step": 11423 }, { "epoch": 0.72, "grad_norm": 2.6670271164555346, "learning_rate": 1.9389505094876736e-06, "loss": 0.2674, "step": 11424 }, { "epoch": 0.72, "grad_norm": 1.8890571202441133, "learning_rate": 1.938145268415839e-06, "loss": 0.2969, "step": 11425 }, { "epoch": 0.72, "grad_norm": 4.9879525381207985, "learning_rate": 1.9373401543857983e-06, "loss": 0.2796, "step": 11426 }, { "epoch": 0.72, "grad_norm": 1.3161419496663187, "learning_rate": 1.936535167430954e-06, "loss": 0.2518, "step": 11427 }, { "epoch": 0.72, "grad_norm": 4.232052173403798, "learning_rate": 1.935730307584705e-06, "loss": 0.2599, "step": 11428 }, { "epoch": 0.72, "grad_norm": 1.9742934527172964, "learning_rate": 1.9349255748804506e-06, "loss": 0.2528, "step": 11429 }, { "epoch": 0.72, "grad_norm": 1.8988202940201568, "learning_rate": 1.934120969351578e-06, "loss": 0.2792, "step": 11430 }, { "epoch": 0.72, "grad_norm": 2.403931595456558, "learning_rate": 1.9333164910314715e-06, "loss": 0.2518, "step": 11431 }, { "epoch": 0.72, "grad_norm": 5.326714842309047, "learning_rate": 1.9325121399535103e-06, "loss": 0.2718, "step": 11432 }, { "epoch": 0.72, "grad_norm": 1.867530048596062, "learning_rate": 1.9317079161510675e-06, "loss": 0.2716, "step": 11433 }, { "epoch": 0.72, "grad_norm": 1.516557314740614, "learning_rate": 1.9309038196575157e-06, "loss": 0.2595, "step": 11434 }, { "epoch": 0.72, "grad_norm": 1.586599339571851, "learning_rate": 1.9300998505062157e-06, "loss": 0.2398, "step": 11435 }, { "epoch": 0.72, "grad_norm": 3.2522506558220243, "learning_rate": 1.9292960087305234e-06, "loss": 0.2481, "step": 11436 }, { "epoch": 0.72, "grad_norm": 2.0020347706698143, "learning_rate": 1.9284922943637965e-06, "loss": 0.2502, "step": 11437 }, { "epoch": 0.72, "grad_norm": 1.79563012579662, "learning_rate": 1.9276887074393793e-06, "loss": 0.2682, "step": 11438 }, { "epoch": 0.72, "grad_norm": 1.8305073671706702, "learning_rate": 1.926885247990615e-06, "loss": 0.2958, "step": 11439 }, { "epoch": 0.72, "grad_norm": 2.72451949456854, "learning_rate": 1.926081916050838e-06, "loss": 0.2643, "step": 11440 }, { "epoch": 0.72, "grad_norm": 0.6114740381338866, "learning_rate": 1.9252787116533826e-06, "loss": 0.459, "step": 11441 }, { "epoch": 0.72, "grad_norm": 2.5079612345430435, "learning_rate": 1.924475634831576e-06, "loss": 0.2539, "step": 11442 }, { "epoch": 0.72, "grad_norm": 2.127769946153632, "learning_rate": 1.9236726856187387e-06, "loss": 0.279, "step": 11443 }, { "epoch": 0.72, "grad_norm": 1.8740271212326134, "learning_rate": 1.922869864048184e-06, "loss": 0.2692, "step": 11444 }, { "epoch": 0.72, "grad_norm": 7.823193367665255, "learning_rate": 1.9220671701532256e-06, "loss": 0.2842, "step": 11445 }, { "epoch": 0.72, "grad_norm": 2.3000630886978786, "learning_rate": 1.921264603967168e-06, "loss": 0.2528, "step": 11446 }, { "epoch": 0.72, "grad_norm": 2.1792497744263115, "learning_rate": 1.9204621655233096e-06, "loss": 0.2544, "step": 11447 }, { "epoch": 0.72, "grad_norm": 3.960125826508275, "learning_rate": 1.9196598548549445e-06, "loss": 0.2498, "step": 11448 }, { "epoch": 0.72, "grad_norm": 2.0879985838242017, "learning_rate": 1.9188576719953635e-06, "loss": 0.274, "step": 11449 }, { "epoch": 0.72, "grad_norm": 1.5514771715147577, "learning_rate": 1.918055616977852e-06, "loss": 0.2523, "step": 11450 }, { "epoch": 0.72, "grad_norm": 1.915211518804343, "learning_rate": 1.917253689835687e-06, "loss": 0.2548, "step": 11451 }, { "epoch": 0.72, "grad_norm": 0.6524053314383431, "learning_rate": 1.91645189060214e-06, "loss": 0.482, "step": 11452 }, { "epoch": 0.72, "grad_norm": 4.990368602133595, "learning_rate": 1.915650219310483e-06, "loss": 0.2472, "step": 11453 }, { "epoch": 0.72, "grad_norm": 1.7150313533698642, "learning_rate": 1.914848675993977e-06, "loss": 0.2724, "step": 11454 }, { "epoch": 0.72, "grad_norm": 2.56708186980046, "learning_rate": 1.914047260685878e-06, "loss": 0.2543, "step": 11455 }, { "epoch": 0.72, "grad_norm": 2.1096326092215487, "learning_rate": 1.91324597341944e-06, "loss": 0.2535, "step": 11456 }, { "epoch": 0.72, "grad_norm": 2.1502170936499385, "learning_rate": 1.9124448142279084e-06, "loss": 0.2619, "step": 11457 }, { "epoch": 0.72, "grad_norm": 2.4409823053965565, "learning_rate": 1.9116437831445273e-06, "loss": 0.2714, "step": 11458 }, { "epoch": 0.72, "grad_norm": 3.231188912890816, "learning_rate": 1.9108428802025308e-06, "loss": 0.2617, "step": 11459 }, { "epoch": 0.72, "grad_norm": 3.5516612379231187, "learning_rate": 1.910042105435149e-06, "loss": 0.2598, "step": 11460 }, { "epoch": 0.72, "grad_norm": 2.5415978599533364, "learning_rate": 1.90924145887561e-06, "loss": 0.2527, "step": 11461 }, { "epoch": 0.72, "grad_norm": 2.4079412109689216, "learning_rate": 1.90844094055713e-06, "loss": 0.2778, "step": 11462 }, { "epoch": 0.72, "grad_norm": 1.5578665959114903, "learning_rate": 1.9076405505129298e-06, "loss": 0.248, "step": 11463 }, { "epoch": 0.72, "grad_norm": 2.7101699307622105, "learning_rate": 1.9068402887762133e-06, "loss": 0.269, "step": 11464 }, { "epoch": 0.72, "grad_norm": 1.8690068427042108, "learning_rate": 1.9060401553801887e-06, "loss": 0.267, "step": 11465 }, { "epoch": 0.72, "grad_norm": 2.802343913344231, "learning_rate": 1.9052401503580541e-06, "loss": 0.2572, "step": 11466 }, { "epoch": 0.72, "grad_norm": 1.9048858238915385, "learning_rate": 1.904440273743003e-06, "loss": 0.2667, "step": 11467 }, { "epoch": 0.72, "grad_norm": 1.5851466547212005, "learning_rate": 1.903640525568221e-06, "loss": 0.27, "step": 11468 }, { "epoch": 0.72, "grad_norm": 0.6106298306366249, "learning_rate": 1.902840905866895e-06, "loss": 0.4944, "step": 11469 }, { "epoch": 0.72, "grad_norm": 1.7960634647164426, "learning_rate": 1.9020414146721995e-06, "loss": 0.2557, "step": 11470 }, { "epoch": 0.72, "grad_norm": 4.069294754634697, "learning_rate": 1.9012420520173097e-06, "loss": 0.265, "step": 11471 }, { "epoch": 0.72, "grad_norm": 2.1230797406007387, "learning_rate": 1.9004428179353895e-06, "loss": 0.249, "step": 11472 }, { "epoch": 0.72, "grad_norm": 1.977106912460412, "learning_rate": 1.8996437124596045e-06, "loss": 0.2778, "step": 11473 }, { "epoch": 0.72, "grad_norm": 2.148882914640681, "learning_rate": 1.8988447356231082e-06, "loss": 0.2437, "step": 11474 }, { "epoch": 0.72, "grad_norm": 1.612463440365609, "learning_rate": 1.8980458874590525e-06, "loss": 0.2563, "step": 11475 }, { "epoch": 0.72, "grad_norm": 1.251921214254672, "learning_rate": 1.8972471680005805e-06, "loss": 0.2567, "step": 11476 }, { "epoch": 0.72, "grad_norm": 1.6270869313081857, "learning_rate": 1.8964485772808345e-06, "loss": 0.2501, "step": 11477 }, { "epoch": 0.72, "grad_norm": 2.031496906945291, "learning_rate": 1.8956501153329516e-06, "loss": 0.268, "step": 11478 }, { "epoch": 0.72, "grad_norm": 1.8361410734328967, "learning_rate": 1.8948517821900592e-06, "loss": 0.2653, "step": 11479 }, { "epoch": 0.72, "grad_norm": 2.1441496679139687, "learning_rate": 1.8940535778852804e-06, "loss": 0.2717, "step": 11480 }, { "epoch": 0.72, "grad_norm": 2.7183299102711644, "learning_rate": 1.8932555024517363e-06, "loss": 0.2746, "step": 11481 }, { "epoch": 0.72, "grad_norm": 2.1089751911758734, "learning_rate": 1.8924575559225406e-06, "loss": 0.2465, "step": 11482 }, { "epoch": 0.72, "grad_norm": 1.8839783331538997, "learning_rate": 1.8916597383308e-06, "loss": 0.2596, "step": 11483 }, { "epoch": 0.72, "grad_norm": 3.550563791260173, "learning_rate": 1.8908620497096159e-06, "loss": 0.2631, "step": 11484 }, { "epoch": 0.72, "grad_norm": 4.710879720987616, "learning_rate": 1.8900644900920884e-06, "loss": 0.2597, "step": 11485 }, { "epoch": 0.72, "grad_norm": 32.768201741543656, "learning_rate": 1.8892670595113105e-06, "loss": 0.2624, "step": 11486 }, { "epoch": 0.72, "grad_norm": 2.045250859829823, "learning_rate": 1.8884697580003674e-06, "loss": 0.2452, "step": 11487 }, { "epoch": 0.72, "grad_norm": 2.4852251987712686, "learning_rate": 1.8876725855923394e-06, "loss": 0.2683, "step": 11488 }, { "epoch": 0.72, "grad_norm": 1.6527115258742013, "learning_rate": 1.8868755423203056e-06, "loss": 0.2781, "step": 11489 }, { "epoch": 0.72, "grad_norm": 2.399537261154635, "learning_rate": 1.886078628217335e-06, "loss": 0.2605, "step": 11490 }, { "epoch": 0.72, "grad_norm": 1.9713359301015374, "learning_rate": 1.8852818433164927e-06, "loss": 0.2516, "step": 11491 }, { "epoch": 0.72, "grad_norm": 4.296147865855016, "learning_rate": 1.8844851876508375e-06, "loss": 0.2785, "step": 11492 }, { "epoch": 0.72, "grad_norm": 2.3306562118900755, "learning_rate": 1.883688661253426e-06, "loss": 0.2675, "step": 11493 }, { "epoch": 0.72, "grad_norm": 2.3282691457943727, "learning_rate": 1.882892264157309e-06, "loss": 0.2817, "step": 11494 }, { "epoch": 0.72, "grad_norm": 2.276776008424065, "learning_rate": 1.8820959963955282e-06, "loss": 0.2816, "step": 11495 }, { "epoch": 0.72, "grad_norm": 4.638741689277499, "learning_rate": 1.8812998580011204e-06, "loss": 0.268, "step": 11496 }, { "epoch": 0.72, "grad_norm": 2.652807009176715, "learning_rate": 1.880503849007123e-06, "loss": 0.27, "step": 11497 }, { "epoch": 0.72, "grad_norm": 0.67812107606071, "learning_rate": 1.8797079694465619e-06, "loss": 0.4994, "step": 11498 }, { "epoch": 0.72, "grad_norm": 0.6366241977833366, "learning_rate": 1.8789122193524594e-06, "loss": 0.4871, "step": 11499 }, { "epoch": 0.72, "grad_norm": 1.891088314246333, "learning_rate": 1.878116598757831e-06, "loss": 0.2449, "step": 11500 }, { "epoch": 0.72, "grad_norm": 2.89586591882636, "learning_rate": 1.8773211076956898e-06, "loss": 0.2506, "step": 11501 }, { "epoch": 0.72, "grad_norm": 2.8680385957877492, "learning_rate": 1.8765257461990444e-06, "loss": 0.2644, "step": 11502 }, { "epoch": 0.72, "grad_norm": 1.5303857899235052, "learning_rate": 1.875730514300893e-06, "loss": 0.2511, "step": 11503 }, { "epoch": 0.72, "grad_norm": 2.1661314046330924, "learning_rate": 1.8749354120342307e-06, "loss": 0.2789, "step": 11504 }, { "epoch": 0.72, "grad_norm": 5.91931618367787, "learning_rate": 1.8741404394320506e-06, "loss": 0.2493, "step": 11505 }, { "epoch": 0.72, "grad_norm": 2.0348641881384446, "learning_rate": 1.8733455965273355e-06, "loss": 0.2845, "step": 11506 }, { "epoch": 0.72, "grad_norm": 2.931735348327818, "learning_rate": 1.8725508833530638e-06, "loss": 0.2584, "step": 11507 }, { "epoch": 0.72, "grad_norm": 2.981218916565921, "learning_rate": 1.8717562999422128e-06, "loss": 0.2699, "step": 11508 }, { "epoch": 0.72, "grad_norm": 3.1187270794333974, "learning_rate": 1.8709618463277474e-06, "loss": 0.2518, "step": 11509 }, { "epoch": 0.72, "grad_norm": 2.451284277050738, "learning_rate": 1.8701675225426341e-06, "loss": 0.2623, "step": 11510 }, { "epoch": 0.72, "grad_norm": 5.391133879892232, "learning_rate": 1.8693733286198296e-06, "loss": 0.2484, "step": 11511 }, { "epoch": 0.72, "grad_norm": 3.650244144192052, "learning_rate": 1.8685792645922852e-06, "loss": 0.2504, "step": 11512 }, { "epoch": 0.72, "grad_norm": 1.5001549512291834, "learning_rate": 1.86778533049295e-06, "loss": 0.2578, "step": 11513 }, { "epoch": 0.72, "grad_norm": 2.153463694552769, "learning_rate": 1.8669915263547656e-06, "loss": 0.258, "step": 11514 }, { "epoch": 0.72, "grad_norm": 6.506997653386509, "learning_rate": 1.8661978522106654e-06, "loss": 0.2477, "step": 11515 }, { "epoch": 0.72, "grad_norm": 4.008297387897438, "learning_rate": 1.8654043080935846e-06, "loss": 0.2602, "step": 11516 }, { "epoch": 0.72, "grad_norm": 1.7498243961326454, "learning_rate": 1.864610894036445e-06, "loss": 0.2524, "step": 11517 }, { "epoch": 0.72, "grad_norm": 1.510139821959095, "learning_rate": 1.86381761007217e-06, "loss": 0.2503, "step": 11518 }, { "epoch": 0.72, "grad_norm": 2.8953997341756814, "learning_rate": 1.863024456233673e-06, "loss": 0.2512, "step": 11519 }, { "epoch": 0.72, "grad_norm": 2.9859824432416895, "learning_rate": 1.8622314325538615e-06, "loss": 0.2664, "step": 11520 }, { "epoch": 0.72, "grad_norm": 1.771046837551444, "learning_rate": 1.8614385390656432e-06, "loss": 0.2611, "step": 11521 }, { "epoch": 0.72, "grad_norm": 2.571088068936452, "learning_rate": 1.8606457758019125e-06, "loss": 0.2409, "step": 11522 }, { "epoch": 0.72, "grad_norm": 1.928979470443616, "learning_rate": 1.8598531427955662e-06, "loss": 0.2667, "step": 11523 }, { "epoch": 0.72, "grad_norm": 1.703417933287143, "learning_rate": 1.859060640079491e-06, "loss": 0.2641, "step": 11524 }, { "epoch": 0.72, "grad_norm": 4.716672621669686, "learning_rate": 1.8582682676865666e-06, "loss": 0.2802, "step": 11525 }, { "epoch": 0.72, "grad_norm": 2.834495224753895, "learning_rate": 1.8574760256496742e-06, "loss": 0.2664, "step": 11526 }, { "epoch": 0.72, "grad_norm": 3.3960608525254616, "learning_rate": 1.8566839140016829e-06, "loss": 0.2581, "step": 11527 }, { "epoch": 0.72, "grad_norm": 2.3241034315555993, "learning_rate": 1.8558919327754577e-06, "loss": 0.2741, "step": 11528 }, { "epoch": 0.73, "grad_norm": 2.684780560119694, "learning_rate": 1.8551000820038628e-06, "loss": 0.2645, "step": 11529 }, { "epoch": 0.73, "grad_norm": 1.8758240644753343, "learning_rate": 1.8543083617197494e-06, "loss": 0.2545, "step": 11530 }, { "epoch": 0.73, "grad_norm": 2.4815599160007857, "learning_rate": 1.853516771955971e-06, "loss": 0.2745, "step": 11531 }, { "epoch": 0.73, "grad_norm": 1.8268556016603312, "learning_rate": 1.852725312745371e-06, "loss": 0.2628, "step": 11532 }, { "epoch": 0.73, "grad_norm": 1.8432119921001295, "learning_rate": 1.8519339841207857e-06, "loss": 0.2567, "step": 11533 }, { "epoch": 0.73, "grad_norm": 3.9344243019241505, "learning_rate": 1.8511427861150526e-06, "loss": 0.2548, "step": 11534 }, { "epoch": 0.73, "grad_norm": 1.6120661362937332, "learning_rate": 1.8503517187609993e-06, "loss": 0.2566, "step": 11535 }, { "epoch": 0.73, "grad_norm": 1.8039736735689818, "learning_rate": 1.8495607820914451e-06, "loss": 0.2505, "step": 11536 }, { "epoch": 0.73, "grad_norm": 2.5540975630774976, "learning_rate": 1.8487699761392098e-06, "loss": 0.2689, "step": 11537 }, { "epoch": 0.73, "grad_norm": 1.7908464601448393, "learning_rate": 1.8479793009371073e-06, "loss": 0.2442, "step": 11538 }, { "epoch": 0.73, "grad_norm": 1.2954945231916777, "learning_rate": 1.8471887565179426e-06, "loss": 0.2534, "step": 11539 }, { "epoch": 0.73, "grad_norm": 1.3186157692833462, "learning_rate": 1.8463983429145143e-06, "loss": 0.2682, "step": 11540 }, { "epoch": 0.73, "grad_norm": 1.8801713595759486, "learning_rate": 1.8456080601596216e-06, "loss": 0.264, "step": 11541 }, { "epoch": 0.73, "grad_norm": 3.462008547163594, "learning_rate": 1.844817908286054e-06, "loss": 0.2373, "step": 11542 }, { "epoch": 0.73, "grad_norm": 2.6097105168691064, "learning_rate": 1.8440278873265948e-06, "loss": 0.2622, "step": 11543 }, { "epoch": 0.73, "grad_norm": 1.9709023890444757, "learning_rate": 1.843237997314023e-06, "loss": 0.2596, "step": 11544 }, { "epoch": 0.73, "grad_norm": 1.8745638645026685, "learning_rate": 1.8424482382811137e-06, "loss": 0.2769, "step": 11545 }, { "epoch": 0.73, "grad_norm": 5.586185787760726, "learning_rate": 1.841658610260637e-06, "loss": 0.2613, "step": 11546 }, { "epoch": 0.73, "grad_norm": 1.6403014749982072, "learning_rate": 1.8408691132853551e-06, "loss": 0.2676, "step": 11547 }, { "epoch": 0.73, "grad_norm": 2.2678189150366417, "learning_rate": 1.8400797473880223e-06, "loss": 0.3071, "step": 11548 }, { "epoch": 0.73, "grad_norm": 1.6357951822984167, "learning_rate": 1.8392905126013955e-06, "loss": 0.2721, "step": 11549 }, { "epoch": 0.73, "grad_norm": 2.567934800131614, "learning_rate": 1.8385014089582192e-06, "loss": 0.2688, "step": 11550 }, { "epoch": 0.73, "grad_norm": 1.3716559900717848, "learning_rate": 1.8377124364912353e-06, "loss": 0.253, "step": 11551 }, { "epoch": 0.73, "grad_norm": 1.8073247367751648, "learning_rate": 1.8369235952331777e-06, "loss": 0.2723, "step": 11552 }, { "epoch": 0.73, "grad_norm": 4.31472427613521, "learning_rate": 1.8361348852167777e-06, "loss": 0.2721, "step": 11553 }, { "epoch": 0.73, "grad_norm": 1.8746213927342585, "learning_rate": 1.8353463064747629e-06, "loss": 0.2794, "step": 11554 }, { "epoch": 0.73, "grad_norm": 1.976646350966091, "learning_rate": 1.834557859039851e-06, "loss": 0.2614, "step": 11555 }, { "epoch": 0.73, "grad_norm": 1.8511431337909376, "learning_rate": 1.8337695429447545e-06, "loss": 0.2523, "step": 11556 }, { "epoch": 0.73, "grad_norm": 2.471850900623397, "learning_rate": 1.832981358222185e-06, "loss": 0.2729, "step": 11557 }, { "epoch": 0.73, "grad_norm": 1.5496506970491823, "learning_rate": 1.832193304904844e-06, "loss": 0.2541, "step": 11558 }, { "epoch": 0.73, "grad_norm": 1.615265765841692, "learning_rate": 1.831405383025428e-06, "loss": 0.2706, "step": 11559 }, { "epoch": 0.73, "grad_norm": 1.5479936667300176, "learning_rate": 1.8306175926166325e-06, "loss": 0.2746, "step": 11560 }, { "epoch": 0.73, "grad_norm": 1.380276231801311, "learning_rate": 1.8298299337111408e-06, "loss": 0.2718, "step": 11561 }, { "epoch": 0.73, "grad_norm": 1.4341596745639706, "learning_rate": 1.8290424063416373e-06, "loss": 0.2644, "step": 11562 }, { "epoch": 0.73, "grad_norm": 3.034541876921524, "learning_rate": 1.828255010540797e-06, "loss": 0.297, "step": 11563 }, { "epoch": 0.73, "grad_norm": 1.7744960371721548, "learning_rate": 1.8274677463412882e-06, "loss": 0.254, "step": 11564 }, { "epoch": 0.73, "grad_norm": 1.5001021581761966, "learning_rate": 1.8266806137757798e-06, "loss": 0.2655, "step": 11565 }, { "epoch": 0.73, "grad_norm": 2.615301559198779, "learning_rate": 1.8258936128769284e-06, "loss": 0.2687, "step": 11566 }, { "epoch": 0.73, "grad_norm": 6.013126014624267, "learning_rate": 1.8251067436773872e-06, "loss": 0.2796, "step": 11567 }, { "epoch": 0.73, "grad_norm": 1.8850472742960813, "learning_rate": 1.8243200062098088e-06, "loss": 0.2565, "step": 11568 }, { "epoch": 0.73, "grad_norm": 1.5647787071432304, "learning_rate": 1.8235334005068317e-06, "loss": 0.2748, "step": 11569 }, { "epoch": 0.73, "grad_norm": 2.069001867201661, "learning_rate": 1.8227469266010977e-06, "loss": 0.2538, "step": 11570 }, { "epoch": 0.73, "grad_norm": 4.263115722281822, "learning_rate": 1.8219605845252376e-06, "loss": 0.2987, "step": 11571 }, { "epoch": 0.73, "grad_norm": 1.9129042617801826, "learning_rate": 1.8211743743118754e-06, "loss": 0.284, "step": 11572 }, { "epoch": 0.73, "grad_norm": 2.319808480813705, "learning_rate": 1.8203882959936363e-06, "loss": 0.2652, "step": 11573 }, { "epoch": 0.73, "grad_norm": 2.900489197518082, "learning_rate": 1.8196023496031335e-06, "loss": 0.2683, "step": 11574 }, { "epoch": 0.73, "grad_norm": 1.4806769121490957, "learning_rate": 1.8188165351729792e-06, "loss": 0.2531, "step": 11575 }, { "epoch": 0.73, "grad_norm": 3.3055423915071636, "learning_rate": 1.8180308527357776e-06, "loss": 0.2787, "step": 11576 }, { "epoch": 0.73, "grad_norm": 4.6946897793832365, "learning_rate": 1.817245302324126e-06, "loss": 0.2731, "step": 11577 }, { "epoch": 0.73, "grad_norm": 2.6195659234728494, "learning_rate": 1.8164598839706216e-06, "loss": 0.2789, "step": 11578 }, { "epoch": 0.73, "grad_norm": 1.6389624269780703, "learning_rate": 1.815674597707851e-06, "loss": 0.2615, "step": 11579 }, { "epoch": 0.73, "grad_norm": 3.228590629549094, "learning_rate": 1.814889443568396e-06, "loss": 0.2412, "step": 11580 }, { "epoch": 0.73, "grad_norm": 1.825630571773635, "learning_rate": 1.8141044215848368e-06, "loss": 0.2548, "step": 11581 }, { "epoch": 0.73, "grad_norm": 14.064168411558224, "learning_rate": 1.8133195317897423e-06, "loss": 0.2622, "step": 11582 }, { "epoch": 0.73, "grad_norm": 1.5773045508505517, "learning_rate": 1.8125347742156823e-06, "loss": 0.259, "step": 11583 }, { "epoch": 0.73, "grad_norm": 1.5084784166258076, "learning_rate": 1.8117501488952166e-06, "loss": 0.2521, "step": 11584 }, { "epoch": 0.73, "grad_norm": 2.833971890557965, "learning_rate": 1.810965655860898e-06, "loss": 0.2571, "step": 11585 }, { "epoch": 0.73, "grad_norm": 2.91254258856792, "learning_rate": 1.8101812951452808e-06, "loss": 0.2701, "step": 11586 }, { "epoch": 0.73, "grad_norm": 0.6042623625113431, "learning_rate": 1.809397066780907e-06, "loss": 0.456, "step": 11587 }, { "epoch": 0.73, "grad_norm": 1.617086890004137, "learning_rate": 1.8086129708003142e-06, "loss": 0.2647, "step": 11588 }, { "epoch": 0.73, "grad_norm": 2.5307919202942437, "learning_rate": 1.8078290072360382e-06, "loss": 0.2663, "step": 11589 }, { "epoch": 0.73, "grad_norm": 2.5452028492532603, "learning_rate": 1.8070451761206082e-06, "loss": 0.2675, "step": 11590 }, { "epoch": 0.73, "grad_norm": 1.4609771617984315, "learning_rate": 1.806261477486545e-06, "loss": 0.263, "step": 11591 }, { "epoch": 0.73, "grad_norm": 1.8342722329839665, "learning_rate": 1.8054779113663662e-06, "loss": 0.258, "step": 11592 }, { "epoch": 0.73, "grad_norm": 1.735699545196827, "learning_rate": 1.8046944777925806e-06, "loss": 0.2414, "step": 11593 }, { "epoch": 0.73, "grad_norm": 2.20141948652523, "learning_rate": 1.803911176797699e-06, "loss": 0.2677, "step": 11594 }, { "epoch": 0.73, "grad_norm": 2.323350389575852, "learning_rate": 1.8031280084142193e-06, "loss": 0.2714, "step": 11595 }, { "epoch": 0.73, "grad_norm": 1.919456952414611, "learning_rate": 1.802344972674635e-06, "loss": 0.2565, "step": 11596 }, { "epoch": 0.73, "grad_norm": 2.4279110676808817, "learning_rate": 1.8015620696114377e-06, "loss": 0.2703, "step": 11597 }, { "epoch": 0.73, "grad_norm": 2.823465183847571, "learning_rate": 1.8007792992571128e-06, "loss": 0.2616, "step": 11598 }, { "epoch": 0.73, "grad_norm": 1.46668559256316, "learning_rate": 1.7999966616441372e-06, "loss": 0.2607, "step": 11599 }, { "epoch": 0.73, "grad_norm": 4.103914243666078, "learning_rate": 1.7992141568049837e-06, "loss": 0.288, "step": 11600 }, { "epoch": 0.73, "grad_norm": 1.7392868339710725, "learning_rate": 1.798431784772119e-06, "loss": 0.2563, "step": 11601 }, { "epoch": 0.73, "grad_norm": 1.827683844614338, "learning_rate": 1.7976495455780074e-06, "loss": 0.2777, "step": 11602 }, { "epoch": 0.73, "grad_norm": 1.4846781854381141, "learning_rate": 1.796867439255104e-06, "loss": 0.2756, "step": 11603 }, { "epoch": 0.73, "grad_norm": 3.290617499398284, "learning_rate": 1.7960854658358584e-06, "loss": 0.2957, "step": 11604 }, { "epoch": 0.73, "grad_norm": 2.596333979997199, "learning_rate": 1.7953036253527178e-06, "loss": 0.2638, "step": 11605 }, { "epoch": 0.73, "grad_norm": 2.0672337342872313, "learning_rate": 1.7945219178381236e-06, "loss": 0.2683, "step": 11606 }, { "epoch": 0.73, "grad_norm": 3.5226318927342812, "learning_rate": 1.7937403433245087e-06, "loss": 0.28, "step": 11607 }, { "epoch": 0.73, "grad_norm": 1.2869851636087017, "learning_rate": 1.7929589018443016e-06, "loss": 0.2498, "step": 11608 }, { "epoch": 0.73, "grad_norm": 2.5392580115799617, "learning_rate": 1.7921775934299246e-06, "loss": 0.2635, "step": 11609 }, { "epoch": 0.73, "grad_norm": 8.542464530669765, "learning_rate": 1.791396418113799e-06, "loss": 0.2789, "step": 11610 }, { "epoch": 0.73, "grad_norm": 2.4273074585088805, "learning_rate": 1.7906153759283346e-06, "loss": 0.2502, "step": 11611 }, { "epoch": 0.73, "grad_norm": 1.5442717287376975, "learning_rate": 1.789834466905937e-06, "loss": 0.2487, "step": 11612 }, { "epoch": 0.73, "grad_norm": 1.4770303384864432, "learning_rate": 1.7890536910790095e-06, "loss": 0.2492, "step": 11613 }, { "epoch": 0.73, "grad_norm": 1.4491829471908595, "learning_rate": 1.7882730484799494e-06, "loss": 0.2712, "step": 11614 }, { "epoch": 0.73, "grad_norm": 1.5239929949679778, "learning_rate": 1.787492539141145e-06, "loss": 0.27, "step": 11615 }, { "epoch": 0.73, "grad_norm": 3.1348628007020314, "learning_rate": 1.7867121630949814e-06, "loss": 0.2663, "step": 11616 }, { "epoch": 0.73, "grad_norm": 1.5677734227878761, "learning_rate": 1.7859319203738357e-06, "loss": 0.2536, "step": 11617 }, { "epoch": 0.73, "grad_norm": 8.219686016898887, "learning_rate": 1.785151811010085e-06, "loss": 0.2414, "step": 11618 }, { "epoch": 0.73, "grad_norm": 2.4438572905554685, "learning_rate": 1.7843718350360944e-06, "loss": 0.2588, "step": 11619 }, { "epoch": 0.73, "grad_norm": 1.814113199465102, "learning_rate": 1.7835919924842298e-06, "loss": 0.26, "step": 11620 }, { "epoch": 0.73, "grad_norm": 2.7805856542310003, "learning_rate": 1.7828122833868445e-06, "loss": 0.2621, "step": 11621 }, { "epoch": 0.73, "grad_norm": 2.1671388259741913, "learning_rate": 1.7820327077762938e-06, "loss": 0.2487, "step": 11622 }, { "epoch": 0.73, "grad_norm": 2.289129757929059, "learning_rate": 1.7812532656849218e-06, "loss": 0.2776, "step": 11623 }, { "epoch": 0.73, "grad_norm": 1.8509117063410268, "learning_rate": 1.7804739571450675e-06, "loss": 0.2709, "step": 11624 }, { "epoch": 0.73, "grad_norm": 2.2528244140635003, "learning_rate": 1.7796947821890681e-06, "loss": 0.268, "step": 11625 }, { "epoch": 0.73, "grad_norm": 3.195180930040564, "learning_rate": 1.7789157408492513e-06, "loss": 0.2635, "step": 11626 }, { "epoch": 0.73, "grad_norm": 2.200946659581297, "learning_rate": 1.778136833157943e-06, "loss": 0.2733, "step": 11627 }, { "epoch": 0.73, "grad_norm": 1.9669832001505725, "learning_rate": 1.7773580591474599e-06, "loss": 0.2679, "step": 11628 }, { "epoch": 0.73, "grad_norm": 2.146853320790772, "learning_rate": 1.7765794188501133e-06, "loss": 0.2667, "step": 11629 }, { "epoch": 0.73, "grad_norm": 4.671603421163086, "learning_rate": 1.7758009122982144e-06, "loss": 0.2478, "step": 11630 }, { "epoch": 0.73, "grad_norm": 1.4877291221639142, "learning_rate": 1.7750225395240623e-06, "loss": 0.2583, "step": 11631 }, { "epoch": 0.73, "grad_norm": 1.791835426391931, "learning_rate": 1.7742443005599507e-06, "loss": 0.2683, "step": 11632 }, { "epoch": 0.73, "grad_norm": 2.398489748969295, "learning_rate": 1.7734661954381754e-06, "loss": 0.2643, "step": 11633 }, { "epoch": 0.73, "grad_norm": 1.9239909326068136, "learning_rate": 1.7726882241910166e-06, "loss": 0.2454, "step": 11634 }, { "epoch": 0.73, "grad_norm": 1.893966136002067, "learning_rate": 1.771910386850757e-06, "loss": 0.2524, "step": 11635 }, { "epoch": 0.73, "grad_norm": 2.3547275542984183, "learning_rate": 1.7711326834496694e-06, "loss": 0.2678, "step": 11636 }, { "epoch": 0.73, "grad_norm": 2.4015095807488276, "learning_rate": 1.7703551140200203e-06, "loss": 0.2713, "step": 11637 }, { "epoch": 0.73, "grad_norm": 5.010584242813043, "learning_rate": 1.769577678594076e-06, "loss": 0.2388, "step": 11638 }, { "epoch": 0.73, "grad_norm": 2.702886554338077, "learning_rate": 1.7688003772040912e-06, "loss": 0.2969, "step": 11639 }, { "epoch": 0.73, "grad_norm": 2.2686726796212513, "learning_rate": 1.7680232098823164e-06, "loss": 0.2788, "step": 11640 }, { "epoch": 0.73, "grad_norm": 1.6408821662912878, "learning_rate": 1.7672461766609993e-06, "loss": 0.2493, "step": 11641 }, { "epoch": 0.73, "grad_norm": 5.642616086181689, "learning_rate": 1.7664692775723825e-06, "loss": 0.2551, "step": 11642 }, { "epoch": 0.73, "grad_norm": 1.6519694481770981, "learning_rate": 1.7656925126486979e-06, "loss": 0.2634, "step": 11643 }, { "epoch": 0.73, "grad_norm": 1.416757240748026, "learning_rate": 1.7649158819221761e-06, "loss": 0.2641, "step": 11644 }, { "epoch": 0.73, "grad_norm": 2.305871361033636, "learning_rate": 1.7641393854250395e-06, "loss": 0.2666, "step": 11645 }, { "epoch": 0.73, "grad_norm": 1.4664810327906352, "learning_rate": 1.7633630231895083e-06, "loss": 0.2602, "step": 11646 }, { "epoch": 0.73, "grad_norm": 1.7818493241273152, "learning_rate": 1.7625867952477943e-06, "loss": 0.2669, "step": 11647 }, { "epoch": 0.73, "grad_norm": 1.4525700460055968, "learning_rate": 1.7618107016321023e-06, "loss": 0.2501, "step": 11648 }, { "epoch": 0.73, "grad_norm": 1.7809141559111528, "learning_rate": 1.7610347423746365e-06, "loss": 0.2684, "step": 11649 }, { "epoch": 0.73, "grad_norm": 1.7893666663528591, "learning_rate": 1.760258917507594e-06, "loss": 0.25, "step": 11650 }, { "epoch": 0.73, "grad_norm": 1.5303411523326833, "learning_rate": 1.7594832270631633e-06, "loss": 0.261, "step": 11651 }, { "epoch": 0.73, "grad_norm": 0.6455878897895337, "learning_rate": 1.7587076710735296e-06, "loss": 0.4513, "step": 11652 }, { "epoch": 0.73, "grad_norm": 2.8996348615728103, "learning_rate": 1.75793224957087e-06, "loss": 0.2832, "step": 11653 }, { "epoch": 0.73, "grad_norm": 2.2014973717587885, "learning_rate": 1.7571569625873613e-06, "loss": 0.2597, "step": 11654 }, { "epoch": 0.73, "grad_norm": 2.001370566681674, "learning_rate": 1.7563818101551704e-06, "loss": 0.2902, "step": 11655 }, { "epoch": 0.73, "grad_norm": 2.6365757940514447, "learning_rate": 1.7556067923064578e-06, "loss": 0.2573, "step": 11656 }, { "epoch": 0.73, "grad_norm": 1.9545635215836095, "learning_rate": 1.7548319090733822e-06, "loss": 0.2548, "step": 11657 }, { "epoch": 0.73, "grad_norm": 1.3930601162195142, "learning_rate": 1.7540571604880963e-06, "loss": 0.2507, "step": 11658 }, { "epoch": 0.73, "grad_norm": 2.077357826721203, "learning_rate": 1.753282546582744e-06, "loss": 0.2753, "step": 11659 }, { "epoch": 0.73, "grad_norm": 1.5105846893542216, "learning_rate": 1.7525080673894663e-06, "loss": 0.262, "step": 11660 }, { "epoch": 0.73, "grad_norm": 2.0441988034041034, "learning_rate": 1.7517337229403946e-06, "loss": 0.2709, "step": 11661 }, { "epoch": 0.73, "grad_norm": 1.696674881499434, "learning_rate": 1.7509595132676627e-06, "loss": 0.2694, "step": 11662 }, { "epoch": 0.73, "grad_norm": 2.05382432379998, "learning_rate": 1.750185438403391e-06, "loss": 0.2597, "step": 11663 }, { "epoch": 0.73, "grad_norm": 1.4667357356081046, "learning_rate": 1.7494114983796966e-06, "loss": 0.2646, "step": 11664 }, { "epoch": 0.73, "grad_norm": 2.0920409162784868, "learning_rate": 1.7486376932286925e-06, "loss": 0.2512, "step": 11665 }, { "epoch": 0.73, "grad_norm": 2.345179870515097, "learning_rate": 1.7478640229824872e-06, "loss": 0.2571, "step": 11666 }, { "epoch": 0.73, "grad_norm": 2.7376213684105677, "learning_rate": 1.7470904876731804e-06, "loss": 0.2741, "step": 11667 }, { "epoch": 0.73, "grad_norm": 2.771613632062838, "learning_rate": 1.7463170873328667e-06, "loss": 0.2697, "step": 11668 }, { "epoch": 0.73, "grad_norm": 1.55801121445325, "learning_rate": 1.745543821993635e-06, "loss": 0.2608, "step": 11669 }, { "epoch": 0.73, "grad_norm": 0.5823996231364373, "learning_rate": 1.7447706916875717e-06, "loss": 0.4698, "step": 11670 }, { "epoch": 0.73, "grad_norm": 3.647196429200539, "learning_rate": 1.7439976964467532e-06, "loss": 0.2504, "step": 11671 }, { "epoch": 0.73, "grad_norm": 1.6926303658328, "learning_rate": 1.743224836303255e-06, "loss": 0.2425, "step": 11672 }, { "epoch": 0.73, "grad_norm": 1.6919745608462098, "learning_rate": 1.7424521112891418e-06, "loss": 0.2689, "step": 11673 }, { "epoch": 0.73, "grad_norm": 1.9286600778162801, "learning_rate": 1.741679521436478e-06, "loss": 0.277, "step": 11674 }, { "epoch": 0.73, "grad_norm": 34.17848592255194, "learning_rate": 1.740907066777318e-06, "loss": 0.2728, "step": 11675 }, { "epoch": 0.73, "grad_norm": 1.7389085754189018, "learning_rate": 1.7401347473437124e-06, "loss": 0.2624, "step": 11676 }, { "epoch": 0.73, "grad_norm": 1.5115580443115493, "learning_rate": 1.7393625631677052e-06, "loss": 0.2638, "step": 11677 }, { "epoch": 0.73, "grad_norm": 2.299529952387337, "learning_rate": 1.7385905142813387e-06, "loss": 0.2683, "step": 11678 }, { "epoch": 0.73, "grad_norm": 4.678281920994348, "learning_rate": 1.7378186007166426e-06, "loss": 0.2594, "step": 11679 }, { "epoch": 0.73, "grad_norm": 2.6147927500913823, "learning_rate": 1.7370468225056487e-06, "loss": 0.2489, "step": 11680 }, { "epoch": 0.73, "grad_norm": 3.3803832366180733, "learning_rate": 1.7362751796803757e-06, "loss": 0.2699, "step": 11681 }, { "epoch": 0.73, "grad_norm": 2.2956701821478753, "learning_rate": 1.7355036722728447e-06, "loss": 0.2695, "step": 11682 }, { "epoch": 0.73, "grad_norm": 2.0852043888052307, "learning_rate": 1.7347323003150647e-06, "loss": 0.2632, "step": 11683 }, { "epoch": 0.73, "grad_norm": 0.5722202040312103, "learning_rate": 1.7339610638390413e-06, "loss": 0.4754, "step": 11684 }, { "epoch": 0.73, "grad_norm": 1.4999099087433647, "learning_rate": 1.733189962876773e-06, "loss": 0.2473, "step": 11685 }, { "epoch": 0.73, "grad_norm": 1.3405561995654947, "learning_rate": 1.7324189974602552e-06, "loss": 0.2453, "step": 11686 }, { "epoch": 0.73, "grad_norm": 1.7920033792237762, "learning_rate": 1.731648167621479e-06, "loss": 0.2598, "step": 11687 }, { "epoch": 0.74, "grad_norm": 1.9209915036232665, "learning_rate": 1.7308774733924254e-06, "loss": 0.2486, "step": 11688 }, { "epoch": 0.74, "grad_norm": 1.2248039585299622, "learning_rate": 1.7301069148050708e-06, "loss": 0.2375, "step": 11689 }, { "epoch": 0.74, "grad_norm": 2.2562677500023627, "learning_rate": 1.7293364918913896e-06, "loss": 0.2654, "step": 11690 }, { "epoch": 0.74, "grad_norm": 2.5772938891061674, "learning_rate": 1.7285662046833469e-06, "loss": 0.2592, "step": 11691 }, { "epoch": 0.74, "grad_norm": 1.263217946340115, "learning_rate": 1.727796053212903e-06, "loss": 0.257, "step": 11692 }, { "epoch": 0.74, "grad_norm": 1.4642998078322564, "learning_rate": 1.7270260375120114e-06, "loss": 0.2497, "step": 11693 }, { "epoch": 0.74, "grad_norm": 1.922779491169293, "learning_rate": 1.7262561576126225e-06, "loss": 0.255, "step": 11694 }, { "epoch": 0.74, "grad_norm": 1.7178176465998325, "learning_rate": 1.7254864135466825e-06, "loss": 0.2653, "step": 11695 }, { "epoch": 0.74, "grad_norm": 1.791889022442444, "learning_rate": 1.7247168053461272e-06, "loss": 0.269, "step": 11696 }, { "epoch": 0.74, "grad_norm": 2.0789070804220042, "learning_rate": 1.7239473330428874e-06, "loss": 0.2611, "step": 11697 }, { "epoch": 0.74, "grad_norm": 2.733873805643619, "learning_rate": 1.7231779966688938e-06, "loss": 0.2638, "step": 11698 }, { "epoch": 0.74, "grad_norm": 2.8731252908626708, "learning_rate": 1.7224087962560654e-06, "loss": 0.2865, "step": 11699 }, { "epoch": 0.74, "grad_norm": 0.5683024424539835, "learning_rate": 1.7216397318363175e-06, "loss": 0.4845, "step": 11700 }, { "epoch": 0.74, "grad_norm": 2.3266079086383, "learning_rate": 1.7208708034415566e-06, "loss": 0.2603, "step": 11701 }, { "epoch": 0.74, "grad_norm": 1.4772091036325459, "learning_rate": 1.7201020111036942e-06, "loss": 0.249, "step": 11702 }, { "epoch": 0.74, "grad_norm": 2.3318413488655816, "learning_rate": 1.7193333548546248e-06, "loss": 0.2519, "step": 11703 }, { "epoch": 0.74, "grad_norm": 5.879203382531289, "learning_rate": 1.7185648347262419e-06, "loss": 0.2798, "step": 11704 }, { "epoch": 0.74, "grad_norm": 4.337589940419514, "learning_rate": 1.7177964507504309e-06, "loss": 0.2612, "step": 11705 }, { "epoch": 0.74, "grad_norm": 2.8062267155195184, "learning_rate": 1.7170282029590762e-06, "loss": 0.2572, "step": 11706 }, { "epoch": 0.74, "grad_norm": 4.588670748118247, "learning_rate": 1.7162600913840527e-06, "loss": 0.2658, "step": 11707 }, { "epoch": 0.74, "grad_norm": 3.0250954480159216, "learning_rate": 1.715492116057229e-06, "loss": 0.27, "step": 11708 }, { "epoch": 0.74, "grad_norm": 3.308641864443634, "learning_rate": 1.7147242770104716e-06, "loss": 0.2613, "step": 11709 }, { "epoch": 0.74, "grad_norm": 2.746783481461446, "learning_rate": 1.7139565742756409e-06, "loss": 0.2492, "step": 11710 }, { "epoch": 0.74, "grad_norm": 3.0127040872933692, "learning_rate": 1.713189007884588e-06, "loss": 0.2541, "step": 11711 }, { "epoch": 0.74, "grad_norm": 1.461726681942587, "learning_rate": 1.7124215778691616e-06, "loss": 0.25, "step": 11712 }, { "epoch": 0.74, "grad_norm": 1.7282673392088093, "learning_rate": 1.7116542842612017e-06, "loss": 0.2601, "step": 11713 }, { "epoch": 0.74, "grad_norm": 2.184984498330675, "learning_rate": 1.710887127092548e-06, "loss": 0.2656, "step": 11714 }, { "epoch": 0.74, "grad_norm": 2.3939637027447924, "learning_rate": 1.7101201063950295e-06, "loss": 0.2477, "step": 11715 }, { "epoch": 0.74, "grad_norm": 2.0316670554870813, "learning_rate": 1.7093532222004694e-06, "loss": 0.2789, "step": 11716 }, { "epoch": 0.74, "grad_norm": 2.2368565527433213, "learning_rate": 1.7085864745406894e-06, "loss": 0.2769, "step": 11717 }, { "epoch": 0.74, "grad_norm": 1.6876175509679776, "learning_rate": 1.707819863447504e-06, "loss": 0.2523, "step": 11718 }, { "epoch": 0.74, "grad_norm": 0.5491355963980498, "learning_rate": 1.7070533889527202e-06, "loss": 0.4875, "step": 11719 }, { "epoch": 0.74, "grad_norm": 2.0802363436224196, "learning_rate": 1.7062870510881401e-06, "loss": 0.277, "step": 11720 }, { "epoch": 0.74, "grad_norm": 2.3939600406632313, "learning_rate": 1.7055208498855591e-06, "loss": 0.2505, "step": 11721 }, { "epoch": 0.74, "grad_norm": 1.6561300208490082, "learning_rate": 1.7047547853767714e-06, "loss": 0.2656, "step": 11722 }, { "epoch": 0.74, "grad_norm": 3.085061399844055, "learning_rate": 1.7039888575935588e-06, "loss": 0.2596, "step": 11723 }, { "epoch": 0.74, "grad_norm": 2.35310945864418, "learning_rate": 1.7032230665677046e-06, "loss": 0.2597, "step": 11724 }, { "epoch": 0.74, "grad_norm": 1.4761506138469251, "learning_rate": 1.7024574123309795e-06, "loss": 0.2599, "step": 11725 }, { "epoch": 0.74, "grad_norm": 1.4213739859126764, "learning_rate": 1.7016918949151551e-06, "loss": 0.2459, "step": 11726 }, { "epoch": 0.74, "grad_norm": 1.5246435825088704, "learning_rate": 1.7009265143519927e-06, "loss": 0.2595, "step": 11727 }, { "epoch": 0.74, "grad_norm": 2.3481570591323324, "learning_rate": 1.7001612706732484e-06, "loss": 0.2469, "step": 11728 }, { "epoch": 0.74, "grad_norm": 2.235773637995172, "learning_rate": 1.6993961639106726e-06, "loss": 0.2686, "step": 11729 }, { "epoch": 0.74, "grad_norm": 5.271644764279985, "learning_rate": 1.6986311940960148e-06, "loss": 0.2753, "step": 11730 }, { "epoch": 0.74, "grad_norm": 1.937126375319672, "learning_rate": 1.6978663612610102e-06, "loss": 0.2731, "step": 11731 }, { "epoch": 0.74, "grad_norm": 1.8566076457703664, "learning_rate": 1.6971016654373979e-06, "loss": 0.25, "step": 11732 }, { "epoch": 0.74, "grad_norm": 2.036749952673217, "learning_rate": 1.6963371066569017e-06, "loss": 0.2727, "step": 11733 }, { "epoch": 0.74, "grad_norm": 2.7444668121962237, "learning_rate": 1.6955726849512495e-06, "loss": 0.2608, "step": 11734 }, { "epoch": 0.74, "grad_norm": 2.2119610463202544, "learning_rate": 1.6948084003521553e-06, "loss": 0.2685, "step": 11735 }, { "epoch": 0.74, "grad_norm": 1.5387795986646753, "learning_rate": 1.6940442528913309e-06, "loss": 0.2516, "step": 11736 }, { "epoch": 0.74, "grad_norm": 2.508764890414543, "learning_rate": 1.6932802426004812e-06, "loss": 0.2688, "step": 11737 }, { "epoch": 0.74, "grad_norm": 2.67967061402469, "learning_rate": 1.692516369511308e-06, "loss": 0.2596, "step": 11738 }, { "epoch": 0.74, "grad_norm": 0.6086632782889763, "learning_rate": 1.6917526336555068e-06, "loss": 0.5037, "step": 11739 }, { "epoch": 0.74, "grad_norm": 7.296951764295234, "learning_rate": 1.6909890350647645e-06, "loss": 0.2721, "step": 11740 }, { "epoch": 0.74, "grad_norm": 4.745862100151195, "learning_rate": 1.6902255737707634e-06, "loss": 0.2608, "step": 11741 }, { "epoch": 0.74, "grad_norm": 1.730642746716277, "learning_rate": 1.6894622498051838e-06, "loss": 0.2485, "step": 11742 }, { "epoch": 0.74, "grad_norm": 3.094308521086035, "learning_rate": 1.6886990631996951e-06, "loss": 0.2685, "step": 11743 }, { "epoch": 0.74, "grad_norm": 4.691004634565247, "learning_rate": 1.6879360139859646e-06, "loss": 0.2427, "step": 11744 }, { "epoch": 0.74, "grad_norm": 0.60926938700073, "learning_rate": 1.6871731021956495e-06, "loss": 0.4766, "step": 11745 }, { "epoch": 0.74, "grad_norm": 1.9327936579866638, "learning_rate": 1.6864103278604071e-06, "loss": 0.2583, "step": 11746 }, { "epoch": 0.74, "grad_norm": 1.493622110827338, "learning_rate": 1.685647691011887e-06, "loss": 0.2675, "step": 11747 }, { "epoch": 0.74, "grad_norm": 2.4140127398872653, "learning_rate": 1.6848851916817316e-06, "loss": 0.2686, "step": 11748 }, { "epoch": 0.74, "grad_norm": 3.9979295985962144, "learning_rate": 1.6841228299015755e-06, "loss": 0.2646, "step": 11749 }, { "epoch": 0.74, "grad_norm": 2.257469315564016, "learning_rate": 1.683360605703055e-06, "loss": 0.2735, "step": 11750 }, { "epoch": 0.74, "grad_norm": 3.7783167662283246, "learning_rate": 1.6825985191177941e-06, "loss": 0.2637, "step": 11751 }, { "epoch": 0.74, "grad_norm": 4.997435546723059, "learning_rate": 1.681836570177413e-06, "loss": 0.2706, "step": 11752 }, { "epoch": 0.74, "grad_norm": 0.597448862879755, "learning_rate": 1.681074758913523e-06, "loss": 0.5039, "step": 11753 }, { "epoch": 0.74, "grad_norm": 1.9406822339699066, "learning_rate": 1.6803130853577392e-06, "loss": 0.2589, "step": 11754 }, { "epoch": 0.74, "grad_norm": 3.901023273563546, "learning_rate": 1.679551549541662e-06, "loss": 0.2591, "step": 11755 }, { "epoch": 0.74, "grad_norm": 0.578241439993665, "learning_rate": 1.678790151496889e-06, "loss": 0.473, "step": 11756 }, { "epoch": 0.74, "grad_norm": 13.754896853850894, "learning_rate": 1.6780288912550102e-06, "loss": 0.2598, "step": 11757 }, { "epoch": 0.74, "grad_norm": 3.9530054450688783, "learning_rate": 1.6772677688476146e-06, "loss": 0.2555, "step": 11758 }, { "epoch": 0.74, "grad_norm": 1.4590854111521858, "learning_rate": 1.6765067843062816e-06, "loss": 0.2727, "step": 11759 }, { "epoch": 0.74, "grad_norm": 1.8566288786439034, "learning_rate": 1.6757459376625852e-06, "loss": 0.2562, "step": 11760 }, { "epoch": 0.74, "grad_norm": 3.599038445668937, "learning_rate": 1.6749852289480906e-06, "loss": 0.2493, "step": 11761 }, { "epoch": 0.74, "grad_norm": 2.1675105011173987, "learning_rate": 1.6742246581943683e-06, "loss": 0.2554, "step": 11762 }, { "epoch": 0.74, "grad_norm": 1.8702437625465167, "learning_rate": 1.673464225432972e-06, "loss": 0.2645, "step": 11763 }, { "epoch": 0.74, "grad_norm": 5.628692368302644, "learning_rate": 1.672703930695454e-06, "loss": 0.2567, "step": 11764 }, { "epoch": 0.74, "grad_norm": 0.5839239474989595, "learning_rate": 1.6719437740133577e-06, "loss": 0.4346, "step": 11765 }, { "epoch": 0.74, "grad_norm": 2.07993418322798, "learning_rate": 1.6711837554182276e-06, "loss": 0.2704, "step": 11766 }, { "epoch": 0.74, "grad_norm": 2.081586985271671, "learning_rate": 1.6704238749415958e-06, "loss": 0.2715, "step": 11767 }, { "epoch": 0.74, "grad_norm": 2.54605044868619, "learning_rate": 1.6696641326149897e-06, "loss": 0.2447, "step": 11768 }, { "epoch": 0.74, "grad_norm": 7.49403293486929, "learning_rate": 1.6689045284699347e-06, "loss": 0.2713, "step": 11769 }, { "epoch": 0.74, "grad_norm": 1.7950145411044751, "learning_rate": 1.6681450625379487e-06, "loss": 0.2581, "step": 11770 }, { "epoch": 0.74, "grad_norm": 2.933289996831663, "learning_rate": 1.6673857348505422e-06, "loss": 0.2577, "step": 11771 }, { "epoch": 0.74, "grad_norm": 2.0326330429046733, "learning_rate": 1.6666265454392206e-06, "loss": 0.2669, "step": 11772 }, { "epoch": 0.74, "grad_norm": 1.9024021251154875, "learning_rate": 1.665867494335483e-06, "loss": 0.2706, "step": 11773 }, { "epoch": 0.74, "grad_norm": 3.053164362489364, "learning_rate": 1.6651085815708268e-06, "loss": 0.2647, "step": 11774 }, { "epoch": 0.74, "grad_norm": 4.514542001307621, "learning_rate": 1.664349807176739e-06, "loss": 0.2515, "step": 11775 }, { "epoch": 0.74, "grad_norm": 3.7235526814462925, "learning_rate": 1.6635911711847007e-06, "loss": 0.255, "step": 11776 }, { "epoch": 0.74, "grad_norm": 2.166238222918449, "learning_rate": 1.6628326736261914e-06, "loss": 0.2648, "step": 11777 }, { "epoch": 0.74, "grad_norm": 1.6559977904009444, "learning_rate": 1.6620743145326829e-06, "loss": 0.2807, "step": 11778 }, { "epoch": 0.74, "grad_norm": 2.0426253329229977, "learning_rate": 1.6613160939356403e-06, "loss": 0.255, "step": 11779 }, { "epoch": 0.74, "grad_norm": 2.0517655887107757, "learning_rate": 1.6605580118665233e-06, "loss": 0.2503, "step": 11780 }, { "epoch": 0.74, "grad_norm": 2.1754399538743616, "learning_rate": 1.6598000683567838e-06, "loss": 0.2556, "step": 11781 }, { "epoch": 0.74, "grad_norm": 3.901653787557663, "learning_rate": 1.659042263437874e-06, "loss": 0.2483, "step": 11782 }, { "epoch": 0.74, "grad_norm": 7.345049395981367, "learning_rate": 1.6582845971412332e-06, "loss": 0.2777, "step": 11783 }, { "epoch": 0.74, "grad_norm": 2.06937098899403, "learning_rate": 1.657527069498302e-06, "loss": 0.2586, "step": 11784 }, { "epoch": 0.74, "grad_norm": 1.5680557772512367, "learning_rate": 1.6567696805405075e-06, "loss": 0.2572, "step": 11785 }, { "epoch": 0.74, "grad_norm": 1.3874081282609536, "learning_rate": 1.656012430299279e-06, "loss": 0.2771, "step": 11786 }, { "epoch": 0.74, "grad_norm": 0.5989031663760926, "learning_rate": 1.655255318806035e-06, "loss": 0.4963, "step": 11787 }, { "epoch": 0.74, "grad_norm": 4.529049548014022, "learning_rate": 1.6544983460921882e-06, "loss": 0.2653, "step": 11788 }, { "epoch": 0.74, "grad_norm": 1.802033404813695, "learning_rate": 1.6537415121891454e-06, "loss": 0.2643, "step": 11789 }, { "epoch": 0.74, "grad_norm": 1.6588807491390738, "learning_rate": 1.6529848171283113e-06, "loss": 0.2607, "step": 11790 }, { "epoch": 0.74, "grad_norm": 1.8993431460926697, "learning_rate": 1.652228260941084e-06, "loss": 0.249, "step": 11791 }, { "epoch": 0.74, "grad_norm": 14.431290294460897, "learning_rate": 1.6514718436588517e-06, "loss": 0.2689, "step": 11792 }, { "epoch": 0.74, "grad_norm": 3.036317463653162, "learning_rate": 1.6507155653129992e-06, "loss": 0.2694, "step": 11793 }, { "epoch": 0.74, "grad_norm": 1.8506399393182689, "learning_rate": 1.6499594259349083e-06, "loss": 0.2653, "step": 11794 }, { "epoch": 0.74, "grad_norm": 1.4949799470416039, "learning_rate": 1.6492034255559514e-06, "loss": 0.2744, "step": 11795 }, { "epoch": 0.74, "grad_norm": 5.313066645298156, "learning_rate": 1.6484475642074955e-06, "loss": 0.264, "step": 11796 }, { "epoch": 0.74, "grad_norm": 2.4319119523253767, "learning_rate": 1.6476918419209021e-06, "loss": 0.2578, "step": 11797 }, { "epoch": 0.74, "grad_norm": 7.21520285802145, "learning_rate": 1.6469362587275283e-06, "loss": 0.2688, "step": 11798 }, { "epoch": 0.74, "grad_norm": 1.9102041030243637, "learning_rate": 1.6461808146587266e-06, "loss": 0.2512, "step": 11799 }, { "epoch": 0.74, "grad_norm": 2.13520231046298, "learning_rate": 1.6454255097458399e-06, "loss": 0.2572, "step": 11800 }, { "epoch": 0.74, "grad_norm": 0.6482321469633137, "learning_rate": 1.6446703440202054e-06, "loss": 0.4889, "step": 11801 }, { "epoch": 0.74, "grad_norm": 2.4670376716034883, "learning_rate": 1.6439153175131601e-06, "loss": 0.2662, "step": 11802 }, { "epoch": 0.74, "grad_norm": 2.500349371351017, "learning_rate": 1.6431604302560289e-06, "loss": 0.2621, "step": 11803 }, { "epoch": 0.74, "grad_norm": 2.568830629198864, "learning_rate": 1.6424056822801343e-06, "loss": 0.27, "step": 11804 }, { "epoch": 0.74, "grad_norm": 1.689698718055636, "learning_rate": 1.6416510736167894e-06, "loss": 0.2718, "step": 11805 }, { "epoch": 0.74, "grad_norm": 4.305639380884586, "learning_rate": 1.6408966042973073e-06, "loss": 0.2599, "step": 11806 }, { "epoch": 0.74, "grad_norm": 2.9843904255661617, "learning_rate": 1.6401422743529927e-06, "loss": 0.2596, "step": 11807 }, { "epoch": 0.74, "grad_norm": 1.5982928678592767, "learning_rate": 1.6393880838151432e-06, "loss": 0.2589, "step": 11808 }, { "epoch": 0.74, "grad_norm": 1.4961203988435854, "learning_rate": 1.6386340327150496e-06, "loss": 0.2616, "step": 11809 }, { "epoch": 0.74, "grad_norm": 2.2307769427273816, "learning_rate": 1.6378801210840018e-06, "loss": 0.2721, "step": 11810 }, { "epoch": 0.74, "grad_norm": 1.9924991045431384, "learning_rate": 1.6371263489532795e-06, "loss": 0.2602, "step": 11811 }, { "epoch": 0.74, "grad_norm": 4.355843185123422, "learning_rate": 1.6363727163541587e-06, "loss": 0.2713, "step": 11812 }, { "epoch": 0.74, "grad_norm": 1.6402361533211867, "learning_rate": 1.6356192233179063e-06, "loss": 0.2499, "step": 11813 }, { "epoch": 0.74, "grad_norm": 2.775833491056217, "learning_rate": 1.634865869875788e-06, "loss": 0.2841, "step": 11814 }, { "epoch": 0.74, "grad_norm": 2.4126877447225055, "learning_rate": 1.6341126560590637e-06, "loss": 0.2692, "step": 11815 }, { "epoch": 0.74, "grad_norm": 1.721410768568857, "learning_rate": 1.633359581898984e-06, "loss": 0.2507, "step": 11816 }, { "epoch": 0.74, "grad_norm": 0.588412549744532, "learning_rate": 1.6326066474267931e-06, "loss": 0.4395, "step": 11817 }, { "epoch": 0.74, "grad_norm": 2.3592591381615065, "learning_rate": 1.6318538526737354e-06, "loss": 0.2556, "step": 11818 }, { "epoch": 0.74, "grad_norm": 2.297946060006865, "learning_rate": 1.631101197671044e-06, "loss": 0.2519, "step": 11819 }, { "epoch": 0.74, "grad_norm": 1.6660246422459932, "learning_rate": 1.6303486824499459e-06, "loss": 0.2548, "step": 11820 }, { "epoch": 0.74, "grad_norm": 1.7470054571205338, "learning_rate": 1.6295963070416658e-06, "loss": 0.2657, "step": 11821 }, { "epoch": 0.74, "grad_norm": 0.6014955834137027, "learning_rate": 1.628844071477424e-06, "loss": 0.4609, "step": 11822 }, { "epoch": 0.74, "grad_norm": 2.769016713587465, "learning_rate": 1.628091975788429e-06, "loss": 0.2657, "step": 11823 }, { "epoch": 0.74, "grad_norm": 1.8453180889738028, "learning_rate": 1.627340020005887e-06, "loss": 0.291, "step": 11824 }, { "epoch": 0.74, "grad_norm": 1.4959257455667023, "learning_rate": 1.6265882041609964e-06, "loss": 0.2688, "step": 11825 }, { "epoch": 0.74, "grad_norm": 1.7455441204424988, "learning_rate": 1.6258365282849547e-06, "loss": 0.2446, "step": 11826 }, { "epoch": 0.74, "grad_norm": 2.5213502263259033, "learning_rate": 1.6250849924089485e-06, "loss": 0.2907, "step": 11827 }, { "epoch": 0.74, "grad_norm": 1.9003721308085535, "learning_rate": 1.6243335965641593e-06, "loss": 0.2483, "step": 11828 }, { "epoch": 0.74, "grad_norm": 1.6456645060294284, "learning_rate": 1.6235823407817647e-06, "loss": 0.246, "step": 11829 }, { "epoch": 0.74, "grad_norm": 2.3052556292761452, "learning_rate": 1.622831225092938e-06, "loss": 0.2571, "step": 11830 }, { "epoch": 0.74, "grad_norm": 1.7321240346034084, "learning_rate": 1.6220802495288418e-06, "loss": 0.2507, "step": 11831 }, { "epoch": 0.74, "grad_norm": 2.1755243859919093, "learning_rate": 1.6213294141206366e-06, "loss": 0.2687, "step": 11832 }, { "epoch": 0.74, "grad_norm": 4.039563658824441, "learning_rate": 1.6205787188994726e-06, "loss": 0.2647, "step": 11833 }, { "epoch": 0.74, "grad_norm": 5.4653985842077635, "learning_rate": 1.6198281638965023e-06, "loss": 0.2693, "step": 11834 }, { "epoch": 0.74, "grad_norm": 2.6229795598659216, "learning_rate": 1.619077749142864e-06, "loss": 0.2536, "step": 11835 }, { "epoch": 0.74, "grad_norm": 14.362800910237805, "learning_rate": 1.618327474669696e-06, "loss": 0.2458, "step": 11836 }, { "epoch": 0.74, "grad_norm": 2.165323961044263, "learning_rate": 1.6175773405081263e-06, "loss": 0.2558, "step": 11837 }, { "epoch": 0.74, "grad_norm": 3.0797768971008512, "learning_rate": 1.6168273466892826e-06, "loss": 0.2724, "step": 11838 }, { "epoch": 0.74, "grad_norm": 1.51796264846945, "learning_rate": 1.6160774932442808e-06, "loss": 0.2655, "step": 11839 }, { "epoch": 0.74, "grad_norm": 2.81021565265868, "learning_rate": 1.6153277802042349e-06, "loss": 0.2753, "step": 11840 }, { "epoch": 0.74, "grad_norm": 5.32776950753152, "learning_rate": 1.6145782076002493e-06, "loss": 0.2506, "step": 11841 }, { "epoch": 0.74, "grad_norm": 3.8041643534287357, "learning_rate": 1.6138287754634285e-06, "loss": 0.2615, "step": 11842 }, { "epoch": 0.74, "grad_norm": 1.9553419239251053, "learning_rate": 1.6130794838248653e-06, "loss": 0.2461, "step": 11843 }, { "epoch": 0.74, "grad_norm": 1.8993415032975691, "learning_rate": 1.6123303327156515e-06, "loss": 0.241, "step": 11844 }, { "epoch": 0.74, "grad_norm": 1.476463975922597, "learning_rate": 1.6115813221668676e-06, "loss": 0.252, "step": 11845 }, { "epoch": 0.74, "grad_norm": 1.791918695056272, "learning_rate": 1.6108324522095948e-06, "loss": 0.269, "step": 11846 }, { "epoch": 0.75, "grad_norm": 9.34204523919045, "learning_rate": 1.6100837228749034e-06, "loss": 0.2535, "step": 11847 }, { "epoch": 0.75, "grad_norm": 1.9681715046276151, "learning_rate": 1.6093351341938595e-06, "loss": 0.2697, "step": 11848 }, { "epoch": 0.75, "grad_norm": 2.92936775419886, "learning_rate": 1.6085866861975219e-06, "loss": 0.2582, "step": 11849 }, { "epoch": 0.75, "grad_norm": 2.7529375084635115, "learning_rate": 1.6078383789169466e-06, "loss": 0.2626, "step": 11850 }, { "epoch": 0.75, "grad_norm": 3.7072302835746838, "learning_rate": 1.6070902123831832e-06, "loss": 0.2662, "step": 11851 }, { "epoch": 0.75, "grad_norm": 1.57082915701216, "learning_rate": 1.6063421866272738e-06, "loss": 0.2618, "step": 11852 }, { "epoch": 0.75, "grad_norm": 1.8261968009035916, "learning_rate": 1.6055943016802533e-06, "loss": 0.2644, "step": 11853 }, { "epoch": 0.75, "grad_norm": 2.4569429665757117, "learning_rate": 1.604846557573156e-06, "loss": 0.2488, "step": 11854 }, { "epoch": 0.75, "grad_norm": 1.946417091387337, "learning_rate": 1.6040989543370057e-06, "loss": 0.2365, "step": 11855 }, { "epoch": 0.75, "grad_norm": 2.243356464107158, "learning_rate": 1.603351492002821e-06, "loss": 0.249, "step": 11856 }, { "epoch": 0.75, "grad_norm": 2.093287370996489, "learning_rate": 1.602604170601615e-06, "loss": 0.2695, "step": 11857 }, { "epoch": 0.75, "grad_norm": 1.5898220441347646, "learning_rate": 1.601856990164396e-06, "loss": 0.2597, "step": 11858 }, { "epoch": 0.75, "grad_norm": 2.709914051757555, "learning_rate": 1.6011099507221677e-06, "loss": 0.2553, "step": 11859 }, { "epoch": 0.75, "grad_norm": 2.9656589445698858, "learning_rate": 1.600363052305925e-06, "loss": 0.2483, "step": 11860 }, { "epoch": 0.75, "grad_norm": 2.1406403928513473, "learning_rate": 1.599616294946656e-06, "loss": 0.2502, "step": 11861 }, { "epoch": 0.75, "grad_norm": 1.5859280108116744, "learning_rate": 1.5988696786753489e-06, "loss": 0.2568, "step": 11862 }, { "epoch": 0.75, "grad_norm": 2.451305252896568, "learning_rate": 1.5981232035229789e-06, "loss": 0.2579, "step": 11863 }, { "epoch": 0.75, "grad_norm": 1.8968391425248419, "learning_rate": 1.59737686952052e-06, "loss": 0.2645, "step": 11864 }, { "epoch": 0.75, "grad_norm": 1.7087956113154734, "learning_rate": 1.596630676698937e-06, "loss": 0.2586, "step": 11865 }, { "epoch": 0.75, "grad_norm": 2.6320847648591372, "learning_rate": 1.5958846250891924e-06, "loss": 0.2636, "step": 11866 }, { "epoch": 0.75, "grad_norm": 2.3935022032319897, "learning_rate": 1.5951387147222426e-06, "loss": 0.2676, "step": 11867 }, { "epoch": 0.75, "grad_norm": 1.972545154743655, "learning_rate": 1.5943929456290353e-06, "loss": 0.2584, "step": 11868 }, { "epoch": 0.75, "grad_norm": 1.3620123371947725, "learning_rate": 1.5936473178405115e-06, "loss": 0.2434, "step": 11869 }, { "epoch": 0.75, "grad_norm": 1.7552358800792407, "learning_rate": 1.5929018313876133e-06, "loss": 0.2584, "step": 11870 }, { "epoch": 0.75, "grad_norm": 1.484971098012521, "learning_rate": 1.5921564863012696e-06, "loss": 0.2621, "step": 11871 }, { "epoch": 0.75, "grad_norm": 1.3294578172874192, "learning_rate": 1.5914112826124046e-06, "loss": 0.2528, "step": 11872 }, { "epoch": 0.75, "grad_norm": 1.7504983993422945, "learning_rate": 1.5906662203519413e-06, "loss": 0.2433, "step": 11873 }, { "epoch": 0.75, "grad_norm": 2.3484787834414336, "learning_rate": 1.5899212995507907e-06, "loss": 0.272, "step": 11874 }, { "epoch": 0.75, "grad_norm": 3.7115203724763024, "learning_rate": 1.5891765202398636e-06, "loss": 0.2535, "step": 11875 }, { "epoch": 0.75, "grad_norm": 2.4853380230261126, "learning_rate": 1.5884318824500616e-06, "loss": 0.2553, "step": 11876 }, { "epoch": 0.75, "grad_norm": 1.5846041938247355, "learning_rate": 1.5876873862122781e-06, "loss": 0.2719, "step": 11877 }, { "epoch": 0.75, "grad_norm": 1.4196277911506971, "learning_rate": 1.586943031557408e-06, "loss": 0.2557, "step": 11878 }, { "epoch": 0.75, "grad_norm": 1.9084763581467403, "learning_rate": 1.586198818516334e-06, "loss": 0.2528, "step": 11879 }, { "epoch": 0.75, "grad_norm": 1.710566703156637, "learning_rate": 1.5854547471199328e-06, "loss": 0.2591, "step": 11880 }, { "epoch": 0.75, "grad_norm": 3.1133888050406626, "learning_rate": 1.5847108173990804e-06, "loss": 0.2901, "step": 11881 }, { "epoch": 0.75, "grad_norm": 3.8562106526254234, "learning_rate": 1.583967029384641e-06, "loss": 0.2629, "step": 11882 }, { "epoch": 0.75, "grad_norm": 0.6122073234864452, "learning_rate": 1.5832233831074783e-06, "loss": 0.4742, "step": 11883 }, { "epoch": 0.75, "grad_norm": 0.6017867089945995, "learning_rate": 1.582479878598447e-06, "loss": 0.4793, "step": 11884 }, { "epoch": 0.75, "grad_norm": 1.6468932831399952, "learning_rate": 1.581736515888394e-06, "loss": 0.2553, "step": 11885 }, { "epoch": 0.75, "grad_norm": 2.9967885114687465, "learning_rate": 1.5809932950081657e-06, "loss": 0.2653, "step": 11886 }, { "epoch": 0.75, "grad_norm": 2.266315630618689, "learning_rate": 1.5802502159885968e-06, "loss": 0.2681, "step": 11887 }, { "epoch": 0.75, "grad_norm": 4.986349448661057, "learning_rate": 1.5795072788605226e-06, "loss": 0.2631, "step": 11888 }, { "epoch": 0.75, "grad_norm": 3.7236149444049063, "learning_rate": 1.5787644836547667e-06, "loss": 0.2822, "step": 11889 }, { "epoch": 0.75, "grad_norm": 2.077695773204747, "learning_rate": 1.5780218304021477e-06, "loss": 0.263, "step": 11890 }, { "epoch": 0.75, "grad_norm": 2.5311655300179265, "learning_rate": 1.5772793191334828e-06, "loss": 0.255, "step": 11891 }, { "epoch": 0.75, "grad_norm": 2.4024394966952487, "learning_rate": 1.576536949879578e-06, "loss": 0.2611, "step": 11892 }, { "epoch": 0.75, "grad_norm": 1.950961361803954, "learning_rate": 1.5757947226712344e-06, "loss": 0.2727, "step": 11893 }, { "epoch": 0.75, "grad_norm": 18.33122662785955, "learning_rate": 1.5750526375392517e-06, "loss": 0.281, "step": 11894 }, { "epoch": 0.75, "grad_norm": 1.494641768361291, "learning_rate": 1.5743106945144166e-06, "loss": 0.2587, "step": 11895 }, { "epoch": 0.75, "grad_norm": 2.407395698512057, "learning_rate": 1.5735688936275174e-06, "loss": 0.2778, "step": 11896 }, { "epoch": 0.75, "grad_norm": 1.5668321288737272, "learning_rate": 1.5728272349093287e-06, "loss": 0.2511, "step": 11897 }, { "epoch": 0.75, "grad_norm": 1.810810671072288, "learning_rate": 1.5720857183906275e-06, "loss": 0.2566, "step": 11898 }, { "epoch": 0.75, "grad_norm": 2.810282556503871, "learning_rate": 1.571344344102178e-06, "loss": 0.2534, "step": 11899 }, { "epoch": 0.75, "grad_norm": 2.1878011512487316, "learning_rate": 1.5706031120747417e-06, "loss": 0.2653, "step": 11900 }, { "epoch": 0.75, "grad_norm": 4.0712484006038085, "learning_rate": 1.569862022339072e-06, "loss": 0.2483, "step": 11901 }, { "epoch": 0.75, "grad_norm": 1.6527144597464218, "learning_rate": 1.56912107492592e-06, "loss": 0.273, "step": 11902 }, { "epoch": 0.75, "grad_norm": 19.838911551780072, "learning_rate": 1.56838026986603e-06, "loss": 0.2706, "step": 11903 }, { "epoch": 0.75, "grad_norm": 1.522449482689555, "learning_rate": 1.567639607190138e-06, "loss": 0.238, "step": 11904 }, { "epoch": 0.75, "grad_norm": 3.014349755394319, "learning_rate": 1.566899086928973e-06, "loss": 0.247, "step": 11905 }, { "epoch": 0.75, "grad_norm": 1.2942124436971643, "learning_rate": 1.5661587091132646e-06, "loss": 0.2423, "step": 11906 }, { "epoch": 0.75, "grad_norm": 1.6652853533826246, "learning_rate": 1.565418473773731e-06, "loss": 0.2477, "step": 11907 }, { "epoch": 0.75, "grad_norm": 2.688960854504309, "learning_rate": 1.564678380941085e-06, "loss": 0.2634, "step": 11908 }, { "epoch": 0.75, "grad_norm": 1.8093549149604553, "learning_rate": 1.5639384306460331e-06, "loss": 0.254, "step": 11909 }, { "epoch": 0.75, "grad_norm": 1.751705599615996, "learning_rate": 1.5631986229192791e-06, "loss": 0.253, "step": 11910 }, { "epoch": 0.75, "grad_norm": 2.4181956495410795, "learning_rate": 1.5624589577915205e-06, "loss": 0.2694, "step": 11911 }, { "epoch": 0.75, "grad_norm": 2.585333299902547, "learning_rate": 1.5617194352934451e-06, "loss": 0.2748, "step": 11912 }, { "epoch": 0.75, "grad_norm": 1.3754365359386904, "learning_rate": 1.5609800554557358e-06, "loss": 0.2587, "step": 11913 }, { "epoch": 0.75, "grad_norm": 2.7493901033412564, "learning_rate": 1.5602408183090744e-06, "loss": 0.2659, "step": 11914 }, { "epoch": 0.75, "grad_norm": 1.479838596165756, "learning_rate": 1.559501723884131e-06, "loss": 0.2382, "step": 11915 }, { "epoch": 0.75, "grad_norm": 2.45159273101338, "learning_rate": 1.5587627722115717e-06, "loss": 0.244, "step": 11916 }, { "epoch": 0.75, "grad_norm": 1.7023586831820152, "learning_rate": 1.5580239633220563e-06, "loss": 0.2616, "step": 11917 }, { "epoch": 0.75, "grad_norm": 1.8896552588110056, "learning_rate": 1.55728529724624e-06, "loss": 0.2639, "step": 11918 }, { "epoch": 0.75, "grad_norm": 1.9517282312415587, "learning_rate": 1.5565467740147732e-06, "loss": 0.2377, "step": 11919 }, { "epoch": 0.75, "grad_norm": 5.248304202353867, "learning_rate": 1.555808393658297e-06, "loss": 0.2753, "step": 11920 }, { "epoch": 0.75, "grad_norm": 1.7899906901154115, "learning_rate": 1.5550701562074465e-06, "loss": 0.2597, "step": 11921 }, { "epoch": 0.75, "grad_norm": 1.769585045134498, "learning_rate": 1.5543320616928558e-06, "loss": 0.2622, "step": 11922 }, { "epoch": 0.75, "grad_norm": 2.4658665214876905, "learning_rate": 1.5535941101451478e-06, "loss": 0.2566, "step": 11923 }, { "epoch": 0.75, "grad_norm": 2.0309553236705917, "learning_rate": 1.5528563015949421e-06, "loss": 0.2774, "step": 11924 }, { "epoch": 0.75, "grad_norm": 1.5888897895927225, "learning_rate": 1.5521186360728497e-06, "loss": 0.262, "step": 11925 }, { "epoch": 0.75, "grad_norm": 2.3547565026352504, "learning_rate": 1.5513811136094786e-06, "loss": 0.2545, "step": 11926 }, { "epoch": 0.75, "grad_norm": 4.266784297481978, "learning_rate": 1.5506437342354324e-06, "loss": 0.2558, "step": 11927 }, { "epoch": 0.75, "grad_norm": 2.7020897530161574, "learning_rate": 1.549906497981305e-06, "loss": 0.2874, "step": 11928 }, { "epoch": 0.75, "grad_norm": 10.027743530206067, "learning_rate": 1.549169404877683e-06, "loss": 0.2871, "step": 11929 }, { "epoch": 0.75, "grad_norm": 1.3002851894086886, "learning_rate": 1.5484324549551532e-06, "loss": 0.2446, "step": 11930 }, { "epoch": 0.75, "grad_norm": 1.940854903658385, "learning_rate": 1.5476956482442918e-06, "loss": 0.2544, "step": 11931 }, { "epoch": 0.75, "grad_norm": 1.6021044647928193, "learning_rate": 1.5469589847756677e-06, "loss": 0.2539, "step": 11932 }, { "epoch": 0.75, "grad_norm": 1.2905678702315992, "learning_rate": 1.5462224645798507e-06, "loss": 0.2675, "step": 11933 }, { "epoch": 0.75, "grad_norm": 2.2728942236191934, "learning_rate": 1.5454860876873968e-06, "loss": 0.2612, "step": 11934 }, { "epoch": 0.75, "grad_norm": 1.296256772914394, "learning_rate": 1.5447498541288625e-06, "loss": 0.246, "step": 11935 }, { "epoch": 0.75, "grad_norm": 2.1979331063387524, "learning_rate": 1.544013763934794e-06, "loss": 0.2654, "step": 11936 }, { "epoch": 0.75, "grad_norm": 2.21921664885315, "learning_rate": 1.543277817135731e-06, "loss": 0.2635, "step": 11937 }, { "epoch": 0.75, "grad_norm": 5.553450963655312, "learning_rate": 1.5425420137622133e-06, "loss": 0.2692, "step": 11938 }, { "epoch": 0.75, "grad_norm": 2.189599600989339, "learning_rate": 1.5418063538447687e-06, "loss": 0.248, "step": 11939 }, { "epoch": 0.75, "grad_norm": 2.383964627773125, "learning_rate": 1.5410708374139189e-06, "loss": 0.2725, "step": 11940 }, { "epoch": 0.75, "grad_norm": 1.4591173037880572, "learning_rate": 1.5403354645001857e-06, "loss": 0.2633, "step": 11941 }, { "epoch": 0.75, "grad_norm": 1.7464732086978818, "learning_rate": 1.5396002351340772e-06, "loss": 0.2695, "step": 11942 }, { "epoch": 0.75, "grad_norm": 2.1924013514986886, "learning_rate": 1.5388651493461026e-06, "loss": 0.2667, "step": 11943 }, { "epoch": 0.75, "grad_norm": 1.8687920252622625, "learning_rate": 1.5381302071667614e-06, "loss": 0.2557, "step": 11944 }, { "epoch": 0.75, "grad_norm": 2.254162357203387, "learning_rate": 1.5373954086265447e-06, "loss": 0.2754, "step": 11945 }, { "epoch": 0.75, "grad_norm": 2.1932413521027496, "learning_rate": 1.5366607537559448e-06, "loss": 0.2463, "step": 11946 }, { "epoch": 0.75, "grad_norm": 4.168190153008269, "learning_rate": 1.5359262425854398e-06, "loss": 0.2597, "step": 11947 }, { "epoch": 0.75, "grad_norm": 10.760150342410197, "learning_rate": 1.5351918751455097e-06, "loss": 0.2689, "step": 11948 }, { "epoch": 0.75, "grad_norm": 3.5582380661426294, "learning_rate": 1.5344576514666231e-06, "loss": 0.2531, "step": 11949 }, { "epoch": 0.75, "grad_norm": 2.2917949160450024, "learning_rate": 1.533723571579242e-06, "loss": 0.2466, "step": 11950 }, { "epoch": 0.75, "grad_norm": 3.0579349956275412, "learning_rate": 1.532989635513829e-06, "loss": 0.2569, "step": 11951 }, { "epoch": 0.75, "grad_norm": 3.1387940862761026, "learning_rate": 1.5322558433008332e-06, "loss": 0.2787, "step": 11952 }, { "epoch": 0.75, "grad_norm": 2.0218715524721236, "learning_rate": 1.531522194970701e-06, "loss": 0.2627, "step": 11953 }, { "epoch": 0.75, "grad_norm": 1.740251383808904, "learning_rate": 1.5307886905538732e-06, "loss": 0.2422, "step": 11954 }, { "epoch": 0.75, "grad_norm": 0.594114810697703, "learning_rate": 1.530055330080787e-06, "loss": 0.4549, "step": 11955 }, { "epoch": 0.75, "grad_norm": 2.8247060917469438, "learning_rate": 1.5293221135818676e-06, "loss": 0.2722, "step": 11956 }, { "epoch": 0.75, "grad_norm": 1.896839741503334, "learning_rate": 1.5285890410875388e-06, "loss": 0.2472, "step": 11957 }, { "epoch": 0.75, "grad_norm": 1.6456790709071374, "learning_rate": 1.527856112628215e-06, "loss": 0.2497, "step": 11958 }, { "epoch": 0.75, "grad_norm": 2.44152185672861, "learning_rate": 1.5271233282343095e-06, "loss": 0.2591, "step": 11959 }, { "epoch": 0.75, "grad_norm": 2.0892971632742032, "learning_rate": 1.5263906879362256e-06, "loss": 0.2578, "step": 11960 }, { "epoch": 0.75, "grad_norm": 4.194558456264757, "learning_rate": 1.525658191764361e-06, "loss": 0.2721, "step": 11961 }, { "epoch": 0.75, "grad_norm": 1.8897807708386882, "learning_rate": 1.5249258397491086e-06, "loss": 0.2713, "step": 11962 }, { "epoch": 0.75, "grad_norm": 2.0664841821110844, "learning_rate": 1.5241936319208572e-06, "loss": 0.2652, "step": 11963 }, { "epoch": 0.75, "grad_norm": 1.6448856221330757, "learning_rate": 1.5234615683099857e-06, "loss": 0.2601, "step": 11964 }, { "epoch": 0.75, "grad_norm": 1.678796882725787, "learning_rate": 1.5227296489468685e-06, "loss": 0.2756, "step": 11965 }, { "epoch": 0.75, "grad_norm": 1.664808175604224, "learning_rate": 1.5219978738618734e-06, "loss": 0.2668, "step": 11966 }, { "epoch": 0.75, "grad_norm": 4.100819538826289, "learning_rate": 1.5212662430853659e-06, "loss": 0.2499, "step": 11967 }, { "epoch": 0.75, "grad_norm": 5.376328737191698, "learning_rate": 1.5205347566477002e-06, "loss": 0.2637, "step": 11968 }, { "epoch": 0.75, "grad_norm": 0.6641584539080455, "learning_rate": 1.5198034145792267e-06, "loss": 0.4767, "step": 11969 }, { "epoch": 0.75, "grad_norm": 1.7424025291555487, "learning_rate": 1.5190722169102906e-06, "loss": 0.2749, "step": 11970 }, { "epoch": 0.75, "grad_norm": 1.6825061395834442, "learning_rate": 1.518341163671233e-06, "loss": 0.2453, "step": 11971 }, { "epoch": 0.75, "grad_norm": 1.900240666346348, "learning_rate": 1.5176102548923844e-06, "loss": 0.2641, "step": 11972 }, { "epoch": 0.75, "grad_norm": 1.948203795265517, "learning_rate": 1.5168794906040719e-06, "loss": 0.2842, "step": 11973 }, { "epoch": 0.75, "grad_norm": 1.9603099547812366, "learning_rate": 1.516148870836614e-06, "loss": 0.2641, "step": 11974 }, { "epoch": 0.75, "grad_norm": 2.1220814185419328, "learning_rate": 1.5154183956203295e-06, "loss": 0.2746, "step": 11975 }, { "epoch": 0.75, "grad_norm": 1.771850191668032, "learning_rate": 1.514688064985525e-06, "loss": 0.2763, "step": 11976 }, { "epoch": 0.75, "grad_norm": 4.574735579899599, "learning_rate": 1.513957878962502e-06, "loss": 0.249, "step": 11977 }, { "epoch": 0.75, "grad_norm": 2.5782235861767826, "learning_rate": 1.5132278375815578e-06, "loss": 0.2788, "step": 11978 }, { "epoch": 0.75, "grad_norm": 1.5446073858393137, "learning_rate": 1.512497940872986e-06, "loss": 0.2519, "step": 11979 }, { "epoch": 0.75, "grad_norm": 2.8054213731930635, "learning_rate": 1.5117681888670694e-06, "loss": 0.2471, "step": 11980 }, { "epoch": 0.75, "grad_norm": 1.4743759970123511, "learning_rate": 1.5110385815940842e-06, "loss": 0.2548, "step": 11981 }, { "epoch": 0.75, "grad_norm": 1.678778934250006, "learning_rate": 1.5103091190843072e-06, "loss": 0.2675, "step": 11982 }, { "epoch": 0.75, "grad_norm": 2.3988365123464415, "learning_rate": 1.509579801368003e-06, "loss": 0.2404, "step": 11983 }, { "epoch": 0.75, "grad_norm": 3.505055998955773, "learning_rate": 1.5088506284754312e-06, "loss": 0.2514, "step": 11984 }, { "epoch": 0.75, "grad_norm": 5.609320783732682, "learning_rate": 1.5081216004368492e-06, "loss": 0.2704, "step": 11985 }, { "epoch": 0.75, "grad_norm": 1.7426797782797734, "learning_rate": 1.5073927172825032e-06, "loss": 0.2583, "step": 11986 }, { "epoch": 0.75, "grad_norm": 2.4989185183015086, "learning_rate": 1.506663979042638e-06, "loss": 0.2446, "step": 11987 }, { "epoch": 0.75, "grad_norm": 3.5553763225014214, "learning_rate": 1.505935385747489e-06, "loss": 0.2661, "step": 11988 }, { "epoch": 0.75, "grad_norm": 1.9648034406003163, "learning_rate": 1.505206937427286e-06, "loss": 0.2724, "step": 11989 }, { "epoch": 0.75, "grad_norm": 1.6727122932410934, "learning_rate": 1.5044786341122557e-06, "loss": 0.2687, "step": 11990 }, { "epoch": 0.75, "grad_norm": 2.0541108125247995, "learning_rate": 1.503750475832616e-06, "loss": 0.2917, "step": 11991 }, { "epoch": 0.75, "grad_norm": 1.864789485460652, "learning_rate": 1.5030224626185774e-06, "loss": 0.2374, "step": 11992 }, { "epoch": 0.75, "grad_norm": 3.556706876194909, "learning_rate": 1.5022945945003499e-06, "loss": 0.2578, "step": 11993 }, { "epoch": 0.75, "grad_norm": 8.16915453716689, "learning_rate": 1.50156687150813e-06, "loss": 0.2727, "step": 11994 }, { "epoch": 0.75, "grad_norm": 1.9587952436999463, "learning_rate": 1.5008392936721166e-06, "loss": 0.2529, "step": 11995 }, { "epoch": 0.75, "grad_norm": 1.7195241674442108, "learning_rate": 1.5001118610224963e-06, "loss": 0.2389, "step": 11996 }, { "epoch": 0.75, "grad_norm": 3.7931812262722704, "learning_rate": 1.4993845735894496e-06, "loss": 0.2544, "step": 11997 }, { "epoch": 0.75, "grad_norm": 2.2405025422970355, "learning_rate": 1.4986574314031566e-06, "loss": 0.2645, "step": 11998 }, { "epoch": 0.75, "grad_norm": 2.135625269821901, "learning_rate": 1.497930434493784e-06, "loss": 0.2536, "step": 11999 }, { "epoch": 0.75, "grad_norm": 4.046505744040854, "learning_rate": 1.4972035828914993e-06, "loss": 0.2862, "step": 12000 }, { "epoch": 0.75, "grad_norm": 3.482562277878135, "learning_rate": 1.4964768766264604e-06, "loss": 0.2631, "step": 12001 }, { "epoch": 0.75, "grad_norm": 2.534614146089112, "learning_rate": 1.4957503157288166e-06, "loss": 0.2569, "step": 12002 }, { "epoch": 0.75, "grad_norm": 1.8568030182913553, "learning_rate": 1.4950239002287181e-06, "loss": 0.2724, "step": 12003 }, { "epoch": 0.75, "grad_norm": 3.555922049773706, "learning_rate": 1.494297630156304e-06, "loss": 0.2596, "step": 12004 }, { "epoch": 0.75, "grad_norm": 2.725616201416361, "learning_rate": 1.493571505541706e-06, "loss": 0.2715, "step": 12005 }, { "epoch": 0.76, "grad_norm": 2.7002847467613185, "learning_rate": 1.492845526415056e-06, "loss": 0.2787, "step": 12006 }, { "epoch": 0.76, "grad_norm": 10.678468794217633, "learning_rate": 1.492119692806473e-06, "loss": 0.2497, "step": 12007 }, { "epoch": 0.76, "grad_norm": 1.8387834655868671, "learning_rate": 1.4913940047460762e-06, "loss": 0.2665, "step": 12008 }, { "epoch": 0.76, "grad_norm": 1.716906784132446, "learning_rate": 1.490668462263974e-06, "loss": 0.2576, "step": 12009 }, { "epoch": 0.76, "grad_norm": 5.013935443864974, "learning_rate": 1.4899430653902691e-06, "loss": 0.2543, "step": 12010 }, { "epoch": 0.76, "grad_norm": 3.024479615880311, "learning_rate": 1.4892178141550628e-06, "loss": 0.2492, "step": 12011 }, { "epoch": 0.76, "grad_norm": 3.08530901693931, "learning_rate": 1.4884927085884448e-06, "loss": 0.265, "step": 12012 }, { "epoch": 0.76, "grad_norm": 2.111331303976927, "learning_rate": 1.4877677487205005e-06, "loss": 0.2605, "step": 12013 }, { "epoch": 0.76, "grad_norm": 1.5905043143237334, "learning_rate": 1.4870429345813103e-06, "loss": 0.2578, "step": 12014 }, { "epoch": 0.76, "grad_norm": 0.6271409889503476, "learning_rate": 1.4863182662009501e-06, "loss": 0.4487, "step": 12015 }, { "epoch": 0.76, "grad_norm": 1.5999760615757777, "learning_rate": 1.4855937436094858e-06, "loss": 0.2424, "step": 12016 }, { "epoch": 0.76, "grad_norm": 7.238525805627581, "learning_rate": 1.4848693668369795e-06, "loss": 0.2494, "step": 12017 }, { "epoch": 0.76, "grad_norm": 2.4332015326435306, "learning_rate": 1.4841451359134857e-06, "loss": 0.2514, "step": 12018 }, { "epoch": 0.76, "grad_norm": 1.7978355334402403, "learning_rate": 1.483421050869056e-06, "loss": 0.257, "step": 12019 }, { "epoch": 0.76, "grad_norm": 2.8016530563684148, "learning_rate": 1.482697111733733e-06, "loss": 0.2761, "step": 12020 }, { "epoch": 0.76, "grad_norm": 2.0149743446607356, "learning_rate": 1.4819733185375535e-06, "loss": 0.2535, "step": 12021 }, { "epoch": 0.76, "grad_norm": 1.885860580385384, "learning_rate": 1.481249671310549e-06, "loss": 0.2835, "step": 12022 }, { "epoch": 0.76, "grad_norm": 1.4484556718172874, "learning_rate": 1.4805261700827472e-06, "loss": 0.2524, "step": 12023 }, { "epoch": 0.76, "grad_norm": 1.5016746816025455, "learning_rate": 1.4798028148841665e-06, "loss": 0.2574, "step": 12024 }, { "epoch": 0.76, "grad_norm": 2.199610801733965, "learning_rate": 1.4790796057448187e-06, "loss": 0.2754, "step": 12025 }, { "epoch": 0.76, "grad_norm": 2.1407007334441586, "learning_rate": 1.478356542694711e-06, "loss": 0.2673, "step": 12026 }, { "epoch": 0.76, "grad_norm": 2.7195045978267984, "learning_rate": 1.4776336257638468e-06, "loss": 0.26, "step": 12027 }, { "epoch": 0.76, "grad_norm": 1.8504353959269848, "learning_rate": 1.4769108549822198e-06, "loss": 0.2772, "step": 12028 }, { "epoch": 0.76, "grad_norm": 2.6572654675745015, "learning_rate": 1.476188230379818e-06, "loss": 0.2517, "step": 12029 }, { "epoch": 0.76, "grad_norm": 5.972166544547642, "learning_rate": 1.475465751986625e-06, "loss": 0.2586, "step": 12030 }, { "epoch": 0.76, "grad_norm": 1.5642743584507386, "learning_rate": 1.4747434198326206e-06, "loss": 0.252, "step": 12031 }, { "epoch": 0.76, "grad_norm": 3.171954189885917, "learning_rate": 1.474021233947772e-06, "loss": 0.2424, "step": 12032 }, { "epoch": 0.76, "grad_norm": 3.604920851024164, "learning_rate": 1.4732991943620462e-06, "loss": 0.2734, "step": 12033 }, { "epoch": 0.76, "grad_norm": 2.744629193997247, "learning_rate": 1.4725773011053995e-06, "loss": 0.2484, "step": 12034 }, { "epoch": 0.76, "grad_norm": 1.3283157992310137, "learning_rate": 1.4718555542077873e-06, "loss": 0.279, "step": 12035 }, { "epoch": 0.76, "grad_norm": 1.6727049308694935, "learning_rate": 1.471133953699153e-06, "loss": 0.2636, "step": 12036 }, { "epoch": 0.76, "grad_norm": 1.5159553532366852, "learning_rate": 1.4704124996094405e-06, "loss": 0.2327, "step": 12037 }, { "epoch": 0.76, "grad_norm": 0.6535073658917429, "learning_rate": 1.4696911919685813e-06, "loss": 0.465, "step": 12038 }, { "epoch": 0.76, "grad_norm": 4.195212118883088, "learning_rate": 1.4689700308065064e-06, "loss": 0.2502, "step": 12039 }, { "epoch": 0.76, "grad_norm": 4.714089820462613, "learning_rate": 1.4682490161531371e-06, "loss": 0.2472, "step": 12040 }, { "epoch": 0.76, "grad_norm": 2.225937081733685, "learning_rate": 1.467528148038389e-06, "loss": 0.2574, "step": 12041 }, { "epoch": 0.76, "grad_norm": 3.3797596864482804, "learning_rate": 1.4668074264921712e-06, "loss": 0.2474, "step": 12042 }, { "epoch": 0.76, "grad_norm": 3.1504471511916403, "learning_rate": 1.4660868515443899e-06, "loss": 0.2754, "step": 12043 }, { "epoch": 0.76, "grad_norm": 1.9441453582088524, "learning_rate": 1.4653664232249409e-06, "loss": 0.2717, "step": 12044 }, { "epoch": 0.76, "grad_norm": 1.650606618098762, "learning_rate": 1.4646461415637188e-06, "loss": 0.2508, "step": 12045 }, { "epoch": 0.76, "grad_norm": 1.818647293254394, "learning_rate": 1.4639260065906058e-06, "loss": 0.2645, "step": 12046 }, { "epoch": 0.76, "grad_norm": 2.7636787860325356, "learning_rate": 1.4632060183354857e-06, "loss": 0.2701, "step": 12047 }, { "epoch": 0.76, "grad_norm": 1.5771368792371883, "learning_rate": 1.4624861768282295e-06, "loss": 0.2503, "step": 12048 }, { "epoch": 0.76, "grad_norm": 1.9217027774237119, "learning_rate": 1.4617664820987055e-06, "loss": 0.272, "step": 12049 }, { "epoch": 0.76, "grad_norm": 2.1391593192395453, "learning_rate": 1.461046934176773e-06, "loss": 0.2544, "step": 12050 }, { "epoch": 0.76, "grad_norm": 1.8396394512248049, "learning_rate": 1.460327533092289e-06, "loss": 0.2571, "step": 12051 }, { "epoch": 0.76, "grad_norm": 2.4925167561067028, "learning_rate": 1.4596082788751048e-06, "loss": 0.2481, "step": 12052 }, { "epoch": 0.76, "grad_norm": 3.106952728761021, "learning_rate": 1.4588891715550612e-06, "loss": 0.2565, "step": 12053 }, { "epoch": 0.76, "grad_norm": 2.475499050212135, "learning_rate": 1.458170211161994e-06, "loss": 0.2689, "step": 12054 }, { "epoch": 0.76, "grad_norm": 2.554015332841206, "learning_rate": 1.457451397725737e-06, "loss": 0.2559, "step": 12055 }, { "epoch": 0.76, "grad_norm": 2.2286089315703848, "learning_rate": 1.4567327312761143e-06, "loss": 0.2537, "step": 12056 }, { "epoch": 0.76, "grad_norm": 2.140349767536782, "learning_rate": 1.4560142118429427e-06, "loss": 0.2764, "step": 12057 }, { "epoch": 0.76, "grad_norm": 1.9077386950447266, "learning_rate": 1.4552958394560374e-06, "loss": 0.2356, "step": 12058 }, { "epoch": 0.76, "grad_norm": 2.411234553571343, "learning_rate": 1.4545776141452023e-06, "loss": 0.2842, "step": 12059 }, { "epoch": 0.76, "grad_norm": 2.1401287558122895, "learning_rate": 1.4538595359402413e-06, "loss": 0.2646, "step": 12060 }, { "epoch": 0.76, "grad_norm": 2.198233857607244, "learning_rate": 1.4531416048709469e-06, "loss": 0.2531, "step": 12061 }, { "epoch": 0.76, "grad_norm": 1.2902543845869912, "learning_rate": 1.4524238209671048e-06, "loss": 0.2575, "step": 12062 }, { "epoch": 0.76, "grad_norm": 0.6376816292524472, "learning_rate": 1.4517061842585017e-06, "loss": 0.482, "step": 12063 }, { "epoch": 0.76, "grad_norm": 2.2892471408943904, "learning_rate": 1.450988694774912e-06, "loss": 0.2568, "step": 12064 }, { "epoch": 0.76, "grad_norm": 2.6052649285732987, "learning_rate": 1.4502713525461026e-06, "loss": 0.2605, "step": 12065 }, { "epoch": 0.76, "grad_norm": 5.933754656876215, "learning_rate": 1.44955415760184e-06, "loss": 0.2531, "step": 12066 }, { "epoch": 0.76, "grad_norm": 1.5154627525102058, "learning_rate": 1.448837109971884e-06, "loss": 0.2529, "step": 12067 }, { "epoch": 0.76, "grad_norm": 1.2899602501426988, "learning_rate": 1.448120209685983e-06, "loss": 0.2479, "step": 12068 }, { "epoch": 0.76, "grad_norm": 14.110370146331876, "learning_rate": 1.4474034567738838e-06, "loss": 0.2645, "step": 12069 }, { "epoch": 0.76, "grad_norm": 1.6665665184508645, "learning_rate": 1.4466868512653237e-06, "loss": 0.2937, "step": 12070 }, { "epoch": 0.76, "grad_norm": 2.774512981607251, "learning_rate": 1.4459703931900393e-06, "loss": 0.2538, "step": 12071 }, { "epoch": 0.76, "grad_norm": 3.3921342498113027, "learning_rate": 1.4452540825777555e-06, "loss": 0.2513, "step": 12072 }, { "epoch": 0.76, "grad_norm": 1.697737022695345, "learning_rate": 1.4445379194581926e-06, "loss": 0.2451, "step": 12073 }, { "epoch": 0.76, "grad_norm": 2.600425139139422, "learning_rate": 1.4438219038610662e-06, "loss": 0.2547, "step": 12074 }, { "epoch": 0.76, "grad_norm": 1.585123484955184, "learning_rate": 1.4431060358160876e-06, "loss": 0.2588, "step": 12075 }, { "epoch": 0.76, "grad_norm": 2.4003655430597806, "learning_rate": 1.4423903153529568e-06, "loss": 0.2881, "step": 12076 }, { "epoch": 0.76, "grad_norm": 1.3755810571745266, "learning_rate": 1.441674742501371e-06, "loss": 0.2571, "step": 12077 }, { "epoch": 0.76, "grad_norm": 2.3908024382707085, "learning_rate": 1.440959317291019e-06, "loss": 0.2651, "step": 12078 }, { "epoch": 0.76, "grad_norm": 2.4494320693999985, "learning_rate": 1.4402440397515882e-06, "loss": 0.2506, "step": 12079 }, { "epoch": 0.76, "grad_norm": 1.9483648144851375, "learning_rate": 1.4395289099127545e-06, "loss": 0.2616, "step": 12080 }, { "epoch": 0.76, "grad_norm": 2.104228117768375, "learning_rate": 1.4388139278041895e-06, "loss": 0.2622, "step": 12081 }, { "epoch": 0.76, "grad_norm": 3.2158394746627743, "learning_rate": 1.4380990934555593e-06, "loss": 0.2713, "step": 12082 }, { "epoch": 0.76, "grad_norm": 2.209765771543538, "learning_rate": 1.4373844068965265e-06, "loss": 0.2498, "step": 12083 }, { "epoch": 0.76, "grad_norm": 2.2188412155029993, "learning_rate": 1.4366698681567425e-06, "loss": 0.2683, "step": 12084 }, { "epoch": 0.76, "grad_norm": 3.2706651879775706, "learning_rate": 1.4359554772658551e-06, "loss": 0.2465, "step": 12085 }, { "epoch": 0.76, "grad_norm": 1.3405066611028733, "learning_rate": 1.4352412342535033e-06, "loss": 0.2461, "step": 12086 }, { "epoch": 0.76, "grad_norm": 2.7765449780426272, "learning_rate": 1.434527139149326e-06, "loss": 0.2667, "step": 12087 }, { "epoch": 0.76, "grad_norm": 2.955786494324836, "learning_rate": 1.4338131919829513e-06, "loss": 0.2804, "step": 12088 }, { "epoch": 0.76, "grad_norm": 2.6086075855513613, "learning_rate": 1.4330993927839993e-06, "loss": 0.2731, "step": 12089 }, { "epoch": 0.76, "grad_norm": 2.2540326874061916, "learning_rate": 1.4323857415820892e-06, "loss": 0.261, "step": 12090 }, { "epoch": 0.76, "grad_norm": 2.211682084578564, "learning_rate": 1.4316722384068332e-06, "loss": 0.2623, "step": 12091 }, { "epoch": 0.76, "grad_norm": 1.7568744545590538, "learning_rate": 1.430958883287834e-06, "loss": 0.2656, "step": 12092 }, { "epoch": 0.76, "grad_norm": 1.8693378545721617, "learning_rate": 1.43024567625469e-06, "loss": 0.2701, "step": 12093 }, { "epoch": 0.76, "grad_norm": 1.9474574766802821, "learning_rate": 1.4295326173369921e-06, "loss": 0.2653, "step": 12094 }, { "epoch": 0.76, "grad_norm": 3.2457017927086524, "learning_rate": 1.4288197065643288e-06, "loss": 0.2696, "step": 12095 }, { "epoch": 0.76, "grad_norm": 0.5463452648486415, "learning_rate": 1.428106943966278e-06, "loss": 0.4737, "step": 12096 }, { "epoch": 0.76, "grad_norm": 1.7727890902270838, "learning_rate": 1.4273943295724163e-06, "loss": 0.2806, "step": 12097 }, { "epoch": 0.76, "grad_norm": 2.3453058333942383, "learning_rate": 1.4266818634123075e-06, "loss": 0.2552, "step": 12098 }, { "epoch": 0.76, "grad_norm": 2.686475443139553, "learning_rate": 1.4259695455155165e-06, "loss": 0.2444, "step": 12099 }, { "epoch": 0.76, "grad_norm": 5.097105243851308, "learning_rate": 1.4252573759115974e-06, "loss": 0.2453, "step": 12100 }, { "epoch": 0.76, "grad_norm": 3.442296196789242, "learning_rate": 1.4245453546300996e-06, "loss": 0.2646, "step": 12101 }, { "epoch": 0.76, "grad_norm": 1.737063829014486, "learning_rate": 1.4238334817005638e-06, "loss": 0.2488, "step": 12102 }, { "epoch": 0.76, "grad_norm": 1.618681622374901, "learning_rate": 1.4231217571525308e-06, "loss": 0.2622, "step": 12103 }, { "epoch": 0.76, "grad_norm": 1.5502227468010201, "learning_rate": 1.4224101810155273e-06, "loss": 0.2547, "step": 12104 }, { "epoch": 0.76, "grad_norm": 3.0551417447986835, "learning_rate": 1.4216987533190819e-06, "loss": 0.2794, "step": 12105 }, { "epoch": 0.76, "grad_norm": 2.6893047780363037, "learning_rate": 1.4209874740927087e-06, "loss": 0.2388, "step": 12106 }, { "epoch": 0.76, "grad_norm": 5.8125832767489305, "learning_rate": 1.4202763433659244e-06, "loss": 0.2748, "step": 12107 }, { "epoch": 0.76, "grad_norm": 3.8689003970630766, "learning_rate": 1.419565361168233e-06, "loss": 0.2619, "step": 12108 }, { "epoch": 0.76, "grad_norm": 6.820352185620478, "learning_rate": 1.4188545275291343e-06, "loss": 0.2524, "step": 12109 }, { "epoch": 0.76, "grad_norm": 4.904757860928734, "learning_rate": 1.4181438424781203e-06, "loss": 0.2708, "step": 12110 }, { "epoch": 0.76, "grad_norm": 2.219140879375936, "learning_rate": 1.4174333060446804e-06, "loss": 0.2796, "step": 12111 }, { "epoch": 0.76, "grad_norm": 21.511571905586756, "learning_rate": 1.4167229182582974e-06, "loss": 0.2649, "step": 12112 }, { "epoch": 0.76, "grad_norm": 0.5516098362282018, "learning_rate": 1.4160126791484453e-06, "loss": 0.4471, "step": 12113 }, { "epoch": 0.76, "grad_norm": 1.6622975944093763, "learning_rate": 1.4153025887445914e-06, "loss": 0.2423, "step": 12114 }, { "epoch": 0.76, "grad_norm": 3.50915692108032, "learning_rate": 1.4145926470762023e-06, "loss": 0.2729, "step": 12115 }, { "epoch": 0.76, "grad_norm": 2.390954474437639, "learning_rate": 1.4138828541727322e-06, "loss": 0.2811, "step": 12116 }, { "epoch": 0.76, "grad_norm": 1.882459526932715, "learning_rate": 1.413173210063632e-06, "loss": 0.2581, "step": 12117 }, { "epoch": 0.76, "grad_norm": 10.78501533400747, "learning_rate": 1.4124637147783431e-06, "loss": 0.2893, "step": 12118 }, { "epoch": 0.76, "grad_norm": 1.6551772270319498, "learning_rate": 1.4117543683463103e-06, "loss": 0.2491, "step": 12119 }, { "epoch": 0.76, "grad_norm": 2.4012756677228824, "learning_rate": 1.4110451707969624e-06, "loss": 0.2481, "step": 12120 }, { "epoch": 0.76, "grad_norm": 2.0419703125098643, "learning_rate": 1.4103361221597244e-06, "loss": 0.2601, "step": 12121 }, { "epoch": 0.76, "grad_norm": 2.0914647375675712, "learning_rate": 1.4096272224640155e-06, "loss": 0.2561, "step": 12122 }, { "epoch": 0.76, "grad_norm": 2.2375686612246555, "learning_rate": 1.4089184717392524e-06, "loss": 0.2406, "step": 12123 }, { "epoch": 0.76, "grad_norm": 1.9218048095779081, "learning_rate": 1.4082098700148395e-06, "loss": 0.2569, "step": 12124 }, { "epoch": 0.76, "grad_norm": 2.7098076528240505, "learning_rate": 1.407501417320179e-06, "loss": 0.2744, "step": 12125 }, { "epoch": 0.76, "grad_norm": 2.9171778187326898, "learning_rate": 1.406793113684663e-06, "loss": 0.2599, "step": 12126 }, { "epoch": 0.76, "grad_norm": 5.74314241740096, "learning_rate": 1.4060849591376858e-06, "loss": 0.2549, "step": 12127 }, { "epoch": 0.76, "grad_norm": 2.414918815760029, "learning_rate": 1.405376953708627e-06, "loss": 0.2585, "step": 12128 }, { "epoch": 0.76, "grad_norm": 1.799812980432534, "learning_rate": 1.4046690974268634e-06, "loss": 0.2424, "step": 12129 }, { "epoch": 0.76, "grad_norm": 1.629016152305734, "learning_rate": 1.4039613903217636e-06, "loss": 0.2597, "step": 12130 }, { "epoch": 0.76, "grad_norm": 6.118408806726114, "learning_rate": 1.403253832422694e-06, "loss": 0.2555, "step": 12131 }, { "epoch": 0.76, "grad_norm": 5.47675570741331, "learning_rate": 1.402546423759012e-06, "loss": 0.2481, "step": 12132 }, { "epoch": 0.76, "grad_norm": 2.5083457030073633, "learning_rate": 1.401839164360067e-06, "loss": 0.2457, "step": 12133 }, { "epoch": 0.76, "grad_norm": 2.1849445597091224, "learning_rate": 1.401132054255206e-06, "loss": 0.2577, "step": 12134 }, { "epoch": 0.76, "grad_norm": 2.4755413355219726, "learning_rate": 1.4004250934737701e-06, "loss": 0.26, "step": 12135 }, { "epoch": 0.76, "grad_norm": 4.156483687758241, "learning_rate": 1.39971828204509e-06, "loss": 0.2516, "step": 12136 }, { "epoch": 0.76, "grad_norm": 1.6256868513634082, "learning_rate": 1.3990116199984938e-06, "loss": 0.2482, "step": 12137 }, { "epoch": 0.76, "grad_norm": 1.5934940225949321, "learning_rate": 1.3983051073632996e-06, "loss": 0.2511, "step": 12138 }, { "epoch": 0.76, "grad_norm": 5.487029862492722, "learning_rate": 1.397598744168826e-06, "loss": 0.2706, "step": 12139 }, { "epoch": 0.76, "grad_norm": 1.862388876202757, "learning_rate": 1.3968925304443786e-06, "loss": 0.2494, "step": 12140 }, { "epoch": 0.76, "grad_norm": 1.6227209940447236, "learning_rate": 1.3961864662192586e-06, "loss": 0.2535, "step": 12141 }, { "epoch": 0.76, "grad_norm": 1.7130382213619153, "learning_rate": 1.3954805515227632e-06, "loss": 0.2531, "step": 12142 }, { "epoch": 0.76, "grad_norm": 1.8278220899417228, "learning_rate": 1.3947747863841836e-06, "loss": 0.2528, "step": 12143 }, { "epoch": 0.76, "grad_norm": 1.251253788843478, "learning_rate": 1.3940691708328018e-06, "loss": 0.2574, "step": 12144 }, { "epoch": 0.76, "grad_norm": 3.746734438790287, "learning_rate": 1.3933637048978949e-06, "loss": 0.2588, "step": 12145 }, { "epoch": 0.76, "grad_norm": 1.9579740225844458, "learning_rate": 1.3926583886087326e-06, "loss": 0.2477, "step": 12146 }, { "epoch": 0.76, "grad_norm": 3.138227823451893, "learning_rate": 1.3919532219945819e-06, "loss": 0.2992, "step": 12147 }, { "epoch": 0.76, "grad_norm": 2.7109840867803783, "learning_rate": 1.3912482050846999e-06, "loss": 0.2637, "step": 12148 }, { "epoch": 0.76, "grad_norm": 2.216299194263243, "learning_rate": 1.390543337908341e-06, "loss": 0.2548, "step": 12149 }, { "epoch": 0.76, "grad_norm": 1.7964161006614783, "learning_rate": 1.3898386204947485e-06, "loss": 0.2665, "step": 12150 }, { "epoch": 0.76, "grad_norm": 1.9030536138531213, "learning_rate": 1.3891340528731656e-06, "loss": 0.2543, "step": 12151 }, { "epoch": 0.76, "grad_norm": 3.386004139164735, "learning_rate": 1.3884296350728245e-06, "loss": 0.2494, "step": 12152 }, { "epoch": 0.76, "grad_norm": 2.3586158587829322, "learning_rate": 1.3877253671229528e-06, "loss": 0.256, "step": 12153 }, { "epoch": 0.76, "grad_norm": 0.5827677368568623, "learning_rate": 1.38702124905277e-06, "loss": 0.4691, "step": 12154 }, { "epoch": 0.76, "grad_norm": 1.9524956657439987, "learning_rate": 1.386317280891495e-06, "loss": 0.2708, "step": 12155 }, { "epoch": 0.76, "grad_norm": 1.794127078629825, "learning_rate": 1.3856134626683325e-06, "loss": 0.2545, "step": 12156 }, { "epoch": 0.76, "grad_norm": 2.9505565417195476, "learning_rate": 1.384909794412489e-06, "loss": 0.2647, "step": 12157 }, { "epoch": 0.76, "grad_norm": 2.3767627075695774, "learning_rate": 1.3842062761531577e-06, "loss": 0.2529, "step": 12158 }, { "epoch": 0.76, "grad_norm": 2.275406547439901, "learning_rate": 1.3835029079195322e-06, "loss": 0.2542, "step": 12159 }, { "epoch": 0.76, "grad_norm": 2.1390473320247203, "learning_rate": 1.3827996897407942e-06, "loss": 0.2385, "step": 12160 }, { "epoch": 0.76, "grad_norm": 4.502283166060426, "learning_rate": 1.3820966216461224e-06, "loss": 0.25, "step": 12161 }, { "epoch": 0.76, "grad_norm": 1.8176713406240028, "learning_rate": 1.3813937036646858e-06, "loss": 0.2561, "step": 12162 }, { "epoch": 0.76, "grad_norm": 2.0244744902021257, "learning_rate": 1.3806909358256522e-06, "loss": 0.2474, "step": 12163 }, { "epoch": 0.76, "grad_norm": 3.7662667869668818, "learning_rate": 1.3799883181581818e-06, "loss": 0.2703, "step": 12164 }, { "epoch": 0.77, "grad_norm": 6.787122337335751, "learning_rate": 1.3792858506914258e-06, "loss": 0.2507, "step": 12165 }, { "epoch": 0.77, "grad_norm": 2.411214800296559, "learning_rate": 1.37858353345453e-06, "loss": 0.2721, "step": 12166 }, { "epoch": 0.77, "grad_norm": 2.065825198070482, "learning_rate": 1.3778813664766366e-06, "loss": 0.2403, "step": 12167 }, { "epoch": 0.77, "grad_norm": 0.5864386815362713, "learning_rate": 1.377179349786879e-06, "loss": 0.4672, "step": 12168 }, { "epoch": 0.77, "grad_norm": 1.9330363884101904, "learning_rate": 1.376477483414385e-06, "loss": 0.2511, "step": 12169 }, { "epoch": 0.77, "grad_norm": 2.3594106557510997, "learning_rate": 1.3757757673882753e-06, "loss": 0.2415, "step": 12170 }, { "epoch": 0.77, "grad_norm": 2.854869228502983, "learning_rate": 1.375074201737666e-06, "loss": 0.2635, "step": 12171 }, { "epoch": 0.77, "grad_norm": 3.5622857479687187, "learning_rate": 1.3743727864916684e-06, "loss": 0.2404, "step": 12172 }, { "epoch": 0.77, "grad_norm": 1.9452416758872502, "learning_rate": 1.3736715216793833e-06, "loss": 0.2608, "step": 12173 }, { "epoch": 0.77, "grad_norm": 1.7397170903180896, "learning_rate": 1.3729704073299066e-06, "loss": 0.2683, "step": 12174 }, { "epoch": 0.77, "grad_norm": 3.022305899674779, "learning_rate": 1.3722694434723316e-06, "loss": 0.2855, "step": 12175 }, { "epoch": 0.77, "grad_norm": 1.9460262754557436, "learning_rate": 1.3715686301357407e-06, "loss": 0.2422, "step": 12176 }, { "epoch": 0.77, "grad_norm": 2.843969987891146, "learning_rate": 1.3708679673492127e-06, "loss": 0.2627, "step": 12177 }, { "epoch": 0.77, "grad_norm": 3.0233249351876323, "learning_rate": 1.3701674551418154e-06, "loss": 0.2664, "step": 12178 }, { "epoch": 0.77, "grad_norm": 1.773220153771355, "learning_rate": 1.3694670935426202e-06, "loss": 0.2419, "step": 12179 }, { "epoch": 0.77, "grad_norm": 1.8184339760275756, "learning_rate": 1.3687668825806843e-06, "loss": 0.241, "step": 12180 }, { "epoch": 0.77, "grad_norm": 2.2042115949749004, "learning_rate": 1.3680668222850607e-06, "loss": 0.2504, "step": 12181 }, { "epoch": 0.77, "grad_norm": 2.52442515121413, "learning_rate": 1.3673669126847938e-06, "loss": 0.2895, "step": 12182 }, { "epoch": 0.77, "grad_norm": 3.2367914963257904, "learning_rate": 1.3666671538089272e-06, "loss": 0.2564, "step": 12183 }, { "epoch": 0.77, "grad_norm": 0.6387767623544527, "learning_rate": 1.365967545686494e-06, "loss": 0.4548, "step": 12184 }, { "epoch": 0.77, "grad_norm": 1.5423614370986392, "learning_rate": 1.3652680883465224e-06, "loss": 0.2467, "step": 12185 }, { "epoch": 0.77, "grad_norm": 2.296864002626352, "learning_rate": 1.364568781818031e-06, "loss": 0.2464, "step": 12186 }, { "epoch": 0.77, "grad_norm": 2.202580176727354, "learning_rate": 1.3638696261300404e-06, "loss": 0.2654, "step": 12187 }, { "epoch": 0.77, "grad_norm": 2.1249299824419166, "learning_rate": 1.3631706213115582e-06, "loss": 0.2424, "step": 12188 }, { "epoch": 0.77, "grad_norm": 2.350336052881772, "learning_rate": 1.3624717673915861e-06, "loss": 0.2368, "step": 12189 }, { "epoch": 0.77, "grad_norm": 3.4282564488224514, "learning_rate": 1.36177306439912e-06, "loss": 0.2518, "step": 12190 }, { "epoch": 0.77, "grad_norm": 2.3358017943237215, "learning_rate": 1.3610745123631536e-06, "loss": 0.2449, "step": 12191 }, { "epoch": 0.77, "grad_norm": 9.099503338369686, "learning_rate": 1.360376111312669e-06, "loss": 0.2403, "step": 12192 }, { "epoch": 0.77, "grad_norm": 3.835473065478738, "learning_rate": 1.359677861276642e-06, "loss": 0.2733, "step": 12193 }, { "epoch": 0.77, "grad_norm": 0.5521528226802335, "learning_rate": 1.3589797622840473e-06, "loss": 0.4471, "step": 12194 }, { "epoch": 0.77, "grad_norm": 1.8520327327932877, "learning_rate": 1.3582818143638505e-06, "loss": 0.2594, "step": 12195 }, { "epoch": 0.77, "grad_norm": 1.6606673204036535, "learning_rate": 1.35758401754501e-06, "loss": 0.2568, "step": 12196 }, { "epoch": 0.77, "grad_norm": 1.789776641886652, "learning_rate": 1.3568863718564779e-06, "loss": 0.26, "step": 12197 }, { "epoch": 0.77, "grad_norm": 3.3393705210298186, "learning_rate": 1.3561888773271997e-06, "loss": 0.2544, "step": 12198 }, { "epoch": 0.77, "grad_norm": 2.2725920425656425, "learning_rate": 1.355491533986119e-06, "loss": 0.2607, "step": 12199 }, { "epoch": 0.77, "grad_norm": 1.4027854019975927, "learning_rate": 1.354794341862166e-06, "loss": 0.2545, "step": 12200 }, { "epoch": 0.77, "grad_norm": 2.6602622518563193, "learning_rate": 1.3540973009842722e-06, "loss": 0.2488, "step": 12201 }, { "epoch": 0.77, "grad_norm": 4.4201286451726185, "learning_rate": 1.3534004113813553e-06, "loss": 0.2449, "step": 12202 }, { "epoch": 0.77, "grad_norm": 1.4548751633048314, "learning_rate": 1.3527036730823344e-06, "loss": 0.2549, "step": 12203 }, { "epoch": 0.77, "grad_norm": 10.878016111572727, "learning_rate": 1.3520070861161162e-06, "loss": 0.2718, "step": 12204 }, { "epoch": 0.77, "grad_norm": 4.554610789370885, "learning_rate": 1.3513106505116036e-06, "loss": 0.2627, "step": 12205 }, { "epoch": 0.77, "grad_norm": 1.8104370302578667, "learning_rate": 1.350614366297691e-06, "loss": 0.2473, "step": 12206 }, { "epoch": 0.77, "grad_norm": 2.2328859510418466, "learning_rate": 1.3499182335032728e-06, "loss": 0.2394, "step": 12207 }, { "epoch": 0.77, "grad_norm": 3.4119699133315216, "learning_rate": 1.3492222521572286e-06, "loss": 0.2797, "step": 12208 }, { "epoch": 0.77, "grad_norm": 1.9954328470563016, "learning_rate": 1.3485264222884387e-06, "loss": 0.2661, "step": 12209 }, { "epoch": 0.77, "grad_norm": 2.8542275818763985, "learning_rate": 1.3478307439257721e-06, "loss": 0.2546, "step": 12210 }, { "epoch": 0.77, "grad_norm": 2.0227989591725586, "learning_rate": 1.347135217098096e-06, "loss": 0.2864, "step": 12211 }, { "epoch": 0.77, "grad_norm": 1.7970596585819967, "learning_rate": 1.3464398418342683e-06, "loss": 0.2425, "step": 12212 }, { "epoch": 0.77, "grad_norm": 2.4076250465106765, "learning_rate": 1.3457446181631412e-06, "loss": 0.255, "step": 12213 }, { "epoch": 0.77, "grad_norm": 3.0487167145072678, "learning_rate": 1.3450495461135587e-06, "loss": 0.2798, "step": 12214 }, { "epoch": 0.77, "grad_norm": 1.9664566338913183, "learning_rate": 1.3443546257143624e-06, "loss": 0.2515, "step": 12215 }, { "epoch": 0.77, "grad_norm": 1.9933489856074946, "learning_rate": 1.3436598569943877e-06, "loss": 0.272, "step": 12216 }, { "epoch": 0.77, "grad_norm": 6.363127307583459, "learning_rate": 1.34296523998246e-06, "loss": 0.2543, "step": 12217 }, { "epoch": 0.77, "grad_norm": 1.7804329629619244, "learning_rate": 1.3422707747073982e-06, "loss": 0.26, "step": 12218 }, { "epoch": 0.77, "grad_norm": 18.699902909772035, "learning_rate": 1.3415764611980203e-06, "loss": 0.2512, "step": 12219 }, { "epoch": 0.77, "grad_norm": 2.5363615952587026, "learning_rate": 1.3408822994831334e-06, "loss": 0.2604, "step": 12220 }, { "epoch": 0.77, "grad_norm": 1.743496592571209, "learning_rate": 1.340188289591539e-06, "loss": 0.2669, "step": 12221 }, { "epoch": 0.77, "grad_norm": 2.5999052530662206, "learning_rate": 1.339494431552032e-06, "loss": 0.269, "step": 12222 }, { "epoch": 0.77, "grad_norm": 2.2818387282556643, "learning_rate": 1.3388007253934027e-06, "loss": 0.2514, "step": 12223 }, { "epoch": 0.77, "grad_norm": 3.7594046784324404, "learning_rate": 1.3381071711444354e-06, "loss": 0.2566, "step": 12224 }, { "epoch": 0.77, "grad_norm": 4.996730874594099, "learning_rate": 1.3374137688339061e-06, "loss": 0.2631, "step": 12225 }, { "epoch": 0.77, "grad_norm": 1.6209726289735795, "learning_rate": 1.3367205184905834e-06, "loss": 0.238, "step": 12226 }, { "epoch": 0.77, "grad_norm": 3.4512645326127482, "learning_rate": 1.3360274201432345e-06, "loss": 0.2676, "step": 12227 }, { "epoch": 0.77, "grad_norm": 4.004577670521368, "learning_rate": 1.3353344738206158e-06, "loss": 0.2577, "step": 12228 }, { "epoch": 0.77, "grad_norm": 1.9752401514118758, "learning_rate": 1.3346416795514794e-06, "loss": 0.2645, "step": 12229 }, { "epoch": 0.77, "grad_norm": 3.8238158477170305, "learning_rate": 1.3339490373645686e-06, "loss": 0.2532, "step": 12230 }, { "epoch": 0.77, "grad_norm": 0.5715346091088411, "learning_rate": 1.3332565472886238e-06, "loss": 0.4747, "step": 12231 }, { "epoch": 0.77, "grad_norm": 3.0124216246458646, "learning_rate": 1.3325642093523789e-06, "loss": 0.247, "step": 12232 }, { "epoch": 0.77, "grad_norm": 2.4418951142527257, "learning_rate": 1.3318720235845594e-06, "loss": 0.253, "step": 12233 }, { "epoch": 0.77, "grad_norm": 3.4855711853991473, "learning_rate": 1.3311799900138834e-06, "loss": 0.2633, "step": 12234 }, { "epoch": 0.77, "grad_norm": 4.7269823728543825, "learning_rate": 1.3304881086690674e-06, "loss": 0.262, "step": 12235 }, { "epoch": 0.77, "grad_norm": 2.111751311674048, "learning_rate": 1.3297963795788177e-06, "loss": 0.2404, "step": 12236 }, { "epoch": 0.77, "grad_norm": 1.866616775899338, "learning_rate": 1.3291048027718357e-06, "loss": 0.2553, "step": 12237 }, { "epoch": 0.77, "grad_norm": 3.0361375820927377, "learning_rate": 1.3284133782768139e-06, "loss": 0.2482, "step": 12238 }, { "epoch": 0.77, "grad_norm": 1.8277084526024296, "learning_rate": 1.3277221061224422e-06, "loss": 0.2697, "step": 12239 }, { "epoch": 0.77, "grad_norm": 1.760103770365269, "learning_rate": 1.3270309863374047e-06, "loss": 0.277, "step": 12240 }, { "epoch": 0.77, "grad_norm": 3.4116259303005694, "learning_rate": 1.3263400189503756e-06, "loss": 0.2468, "step": 12241 }, { "epoch": 0.77, "grad_norm": 1.8392725729239117, "learning_rate": 1.3256492039900226e-06, "loss": 0.241, "step": 12242 }, { "epoch": 0.77, "grad_norm": 11.103783520812081, "learning_rate": 1.324958541485012e-06, "loss": 0.259, "step": 12243 }, { "epoch": 0.77, "grad_norm": 0.6020325443230078, "learning_rate": 1.3242680314639995e-06, "loss": 0.486, "step": 12244 }, { "epoch": 0.77, "grad_norm": 2.3984756000123766, "learning_rate": 1.3235776739556333e-06, "loss": 0.247, "step": 12245 }, { "epoch": 0.77, "grad_norm": 2.2871965411899904, "learning_rate": 1.3228874689885606e-06, "loss": 0.2402, "step": 12246 }, { "epoch": 0.77, "grad_norm": 4.720329837754432, "learning_rate": 1.3221974165914176e-06, "loss": 0.2578, "step": 12247 }, { "epoch": 0.77, "grad_norm": 2.7224414669061514, "learning_rate": 1.3215075167928372e-06, "loss": 0.2637, "step": 12248 }, { "epoch": 0.77, "grad_norm": 1.820060765595058, "learning_rate": 1.3208177696214436e-06, "loss": 0.2714, "step": 12249 }, { "epoch": 0.77, "grad_norm": 1.5356019136963701, "learning_rate": 1.3201281751058542e-06, "loss": 0.2641, "step": 12250 }, { "epoch": 0.77, "grad_norm": 1.9641856076478836, "learning_rate": 1.319438733274685e-06, "loss": 0.2455, "step": 12251 }, { "epoch": 0.77, "grad_norm": 2.2092214727781436, "learning_rate": 1.3187494441565397e-06, "loss": 0.2526, "step": 12252 }, { "epoch": 0.77, "grad_norm": 8.519156337722903, "learning_rate": 1.318060307780017e-06, "loss": 0.275, "step": 12253 }, { "epoch": 0.77, "grad_norm": 2.9369238548468752, "learning_rate": 1.3173713241737123e-06, "loss": 0.2549, "step": 12254 }, { "epoch": 0.77, "grad_norm": 1.776159112521937, "learning_rate": 1.3166824933662136e-06, "loss": 0.2591, "step": 12255 }, { "epoch": 0.77, "grad_norm": 1.7435280964426205, "learning_rate": 1.3159938153861012e-06, "loss": 0.2569, "step": 12256 }, { "epoch": 0.77, "grad_norm": 2.043634005003201, "learning_rate": 1.3153052902619484e-06, "loss": 0.266, "step": 12257 }, { "epoch": 0.77, "grad_norm": 2.939685880575297, "learning_rate": 1.3146169180223228e-06, "loss": 0.257, "step": 12258 }, { "epoch": 0.77, "grad_norm": 2.0139655057295855, "learning_rate": 1.3139286986957884e-06, "loss": 0.2564, "step": 12259 }, { "epoch": 0.77, "grad_norm": 1.6765394533407212, "learning_rate": 1.3132406323108981e-06, "loss": 0.2587, "step": 12260 }, { "epoch": 0.77, "grad_norm": 0.5876611684976215, "learning_rate": 1.3125527188962034e-06, "loss": 0.4666, "step": 12261 }, { "epoch": 0.77, "grad_norm": 3.0890686446704105, "learning_rate": 1.311864958480245e-06, "loss": 0.2331, "step": 12262 }, { "epoch": 0.77, "grad_norm": 1.677254633816732, "learning_rate": 1.311177351091562e-06, "loss": 0.2422, "step": 12263 }, { "epoch": 0.77, "grad_norm": 2.256836932168832, "learning_rate": 1.3104898967586827e-06, "loss": 0.2553, "step": 12264 }, { "epoch": 0.77, "grad_norm": 1.70247783766268, "learning_rate": 1.3098025955101306e-06, "loss": 0.2522, "step": 12265 }, { "epoch": 0.77, "grad_norm": 2.1079012304757425, "learning_rate": 1.3091154473744221e-06, "loss": 0.2481, "step": 12266 }, { "epoch": 0.77, "grad_norm": 0.603033998295756, "learning_rate": 1.308428452380071e-06, "loss": 0.4413, "step": 12267 }, { "epoch": 0.77, "grad_norm": 3.8033667127370485, "learning_rate": 1.3077416105555784e-06, "loss": 0.2582, "step": 12268 }, { "epoch": 0.77, "grad_norm": 5.134625154010529, "learning_rate": 1.3070549219294465e-06, "loss": 0.25, "step": 12269 }, { "epoch": 0.77, "grad_norm": 3.9638394449868155, "learning_rate": 1.306368386530164e-06, "loss": 0.2741, "step": 12270 }, { "epoch": 0.77, "grad_norm": 2.6311224043929804, "learning_rate": 1.3056820043862189e-06, "loss": 0.2553, "step": 12271 }, { "epoch": 0.77, "grad_norm": 3.800949375937788, "learning_rate": 1.3049957755260894e-06, "loss": 0.2474, "step": 12272 }, { "epoch": 0.77, "grad_norm": 2.7937288608334936, "learning_rate": 1.3043096999782478e-06, "loss": 0.2493, "step": 12273 }, { "epoch": 0.77, "grad_norm": 1.947387471232567, "learning_rate": 1.3036237777711602e-06, "loss": 0.2383, "step": 12274 }, { "epoch": 0.77, "grad_norm": 1.8362005445986116, "learning_rate": 1.3029380089332872e-06, "loss": 0.2557, "step": 12275 }, { "epoch": 0.77, "grad_norm": 2.2221529467055463, "learning_rate": 1.3022523934930848e-06, "loss": 0.2772, "step": 12276 }, { "epoch": 0.77, "grad_norm": 2.750850222222142, "learning_rate": 1.301566931478998e-06, "loss": 0.2386, "step": 12277 }, { "epoch": 0.77, "grad_norm": 2.210715233519939, "learning_rate": 1.3008816229194671e-06, "loss": 0.278, "step": 12278 }, { "epoch": 0.77, "grad_norm": 2.228814960391766, "learning_rate": 1.3001964678429296e-06, "loss": 0.2579, "step": 12279 }, { "epoch": 0.77, "grad_norm": 2.7137110584683724, "learning_rate": 1.2995114662778125e-06, "loss": 0.2445, "step": 12280 }, { "epoch": 0.77, "grad_norm": 1.6586462192318272, "learning_rate": 1.2988266182525371e-06, "loss": 0.25, "step": 12281 }, { "epoch": 0.77, "grad_norm": 1.5299095831505582, "learning_rate": 1.298141923795518e-06, "loss": 0.2341, "step": 12282 }, { "epoch": 0.77, "grad_norm": 1.6383193506290523, "learning_rate": 1.2974573829351655e-06, "loss": 0.2546, "step": 12283 }, { "epoch": 0.77, "grad_norm": 3.1167957018806236, "learning_rate": 1.2967729956998843e-06, "loss": 0.2629, "step": 12284 }, { "epoch": 0.77, "grad_norm": 3.167159529146566, "learning_rate": 1.296088762118069e-06, "loss": 0.2414, "step": 12285 }, { "epoch": 0.77, "grad_norm": 1.7300635099688708, "learning_rate": 1.2954046822181088e-06, "loss": 0.2661, "step": 12286 }, { "epoch": 0.77, "grad_norm": 1.9307077429699162, "learning_rate": 1.2947207560283891e-06, "loss": 0.2538, "step": 12287 }, { "epoch": 0.77, "grad_norm": 2.343178622654274, "learning_rate": 1.2940369835772875e-06, "loss": 0.2707, "step": 12288 }, { "epoch": 0.77, "grad_norm": 3.000069593865243, "learning_rate": 1.2933533648931734e-06, "loss": 0.2533, "step": 12289 }, { "epoch": 0.77, "grad_norm": 1.9158547480528179, "learning_rate": 1.2926699000044107e-06, "loss": 0.2614, "step": 12290 }, { "epoch": 0.77, "grad_norm": 2.064007732610284, "learning_rate": 1.2919865889393584e-06, "loss": 0.266, "step": 12291 }, { "epoch": 0.77, "grad_norm": 1.5719989869828415, "learning_rate": 1.2913034317263706e-06, "loss": 0.2428, "step": 12292 }, { "epoch": 0.77, "grad_norm": 0.5747876282402975, "learning_rate": 1.2906204283937907e-06, "loss": 0.4773, "step": 12293 }, { "epoch": 0.77, "grad_norm": 6.215520242756292, "learning_rate": 1.2899375789699564e-06, "loss": 0.2596, "step": 12294 }, { "epoch": 0.77, "grad_norm": 1.8201404682568871, "learning_rate": 1.289254883483203e-06, "loss": 0.2761, "step": 12295 }, { "epoch": 0.77, "grad_norm": 1.6073309541184808, "learning_rate": 1.2885723419618556e-06, "loss": 0.2527, "step": 12296 }, { "epoch": 0.77, "grad_norm": 2.1361266884470442, "learning_rate": 1.2878899544342327e-06, "loss": 0.2513, "step": 12297 }, { "epoch": 0.77, "grad_norm": 1.8028098344253858, "learning_rate": 1.2872077209286505e-06, "loss": 0.2404, "step": 12298 }, { "epoch": 0.77, "grad_norm": 9.785329958891657, "learning_rate": 1.2865256414734133e-06, "loss": 0.2312, "step": 12299 }, { "epoch": 0.77, "grad_norm": 6.398529969598262, "learning_rate": 1.285843716096824e-06, "loss": 0.2621, "step": 12300 }, { "epoch": 0.77, "grad_norm": 2.7311487904016185, "learning_rate": 1.2851619448271762e-06, "loss": 0.2635, "step": 12301 }, { "epoch": 0.77, "grad_norm": 3.3297448433351207, "learning_rate": 1.2844803276927565e-06, "loss": 0.2564, "step": 12302 }, { "epoch": 0.77, "grad_norm": 2.5766723875318966, "learning_rate": 1.283798864721849e-06, "loss": 0.2797, "step": 12303 }, { "epoch": 0.77, "grad_norm": 1.9294270427827505, "learning_rate": 1.2831175559427266e-06, "loss": 0.2548, "step": 12304 }, { "epoch": 0.77, "grad_norm": 1.5845101405312683, "learning_rate": 1.2824364013836583e-06, "loss": 0.2635, "step": 12305 }, { "epoch": 0.77, "grad_norm": 3.954355978769204, "learning_rate": 1.2817554010729071e-06, "loss": 0.2688, "step": 12306 }, { "epoch": 0.77, "grad_norm": 1.4917984473922121, "learning_rate": 1.2810745550387283e-06, "loss": 0.264, "step": 12307 }, { "epoch": 0.77, "grad_norm": 2.4676256062330664, "learning_rate": 1.2803938633093726e-06, "loss": 0.2606, "step": 12308 }, { "epoch": 0.77, "grad_norm": 2.3007111312262816, "learning_rate": 1.2797133259130822e-06, "loss": 0.2567, "step": 12309 }, { "epoch": 0.77, "grad_norm": 1.7365384012426694, "learning_rate": 1.2790329428780928e-06, "loss": 0.2528, "step": 12310 }, { "epoch": 0.77, "grad_norm": 2.859494170895407, "learning_rate": 1.2783527142326375e-06, "loss": 0.262, "step": 12311 }, { "epoch": 0.77, "grad_norm": 2.078863950762435, "learning_rate": 1.277672640004936e-06, "loss": 0.2674, "step": 12312 }, { "epoch": 0.77, "grad_norm": 1.7138783608599124, "learning_rate": 1.2769927202232108e-06, "loss": 0.2524, "step": 12313 }, { "epoch": 0.77, "grad_norm": 1.8801935359782573, "learning_rate": 1.2763129549156699e-06, "loss": 0.2678, "step": 12314 }, { "epoch": 0.77, "grad_norm": 1.53751842020365, "learning_rate": 1.2756333441105168e-06, "loss": 0.2458, "step": 12315 }, { "epoch": 0.77, "grad_norm": 4.082930999696426, "learning_rate": 1.2749538878359535e-06, "loss": 0.2641, "step": 12316 }, { "epoch": 0.77, "grad_norm": 1.973673385915936, "learning_rate": 1.2742745861201694e-06, "loss": 0.2432, "step": 12317 }, { "epoch": 0.77, "grad_norm": 2.012117341277662, "learning_rate": 1.273595438991349e-06, "loss": 0.2942, "step": 12318 }, { "epoch": 0.77, "grad_norm": 1.8174064867377344, "learning_rate": 1.2729164464776743e-06, "loss": 0.2483, "step": 12319 }, { "epoch": 0.77, "grad_norm": 1.6989239722003788, "learning_rate": 1.2722376086073147e-06, "loss": 0.281, "step": 12320 }, { "epoch": 0.77, "grad_norm": 1.6236687822976983, "learning_rate": 1.27155892540844e-06, "loss": 0.259, "step": 12321 }, { "epoch": 0.77, "grad_norm": 1.7435312816188404, "learning_rate": 1.270880396909207e-06, "loss": 0.2619, "step": 12322 }, { "epoch": 0.77, "grad_norm": 1.9706493887070782, "learning_rate": 1.2702020231377688e-06, "loss": 0.2336, "step": 12323 }, { "epoch": 0.78, "grad_norm": 1.4683597384257465, "learning_rate": 1.2695238041222752e-06, "loss": 0.2698, "step": 12324 }, { "epoch": 0.78, "grad_norm": 1.937780816532654, "learning_rate": 1.2688457398908648e-06, "loss": 0.2518, "step": 12325 }, { "epoch": 0.78, "grad_norm": 2.1077805243028296, "learning_rate": 1.2681678304716711e-06, "loss": 0.2603, "step": 12326 }, { "epoch": 0.78, "grad_norm": 0.5625326232647218, "learning_rate": 1.267490075892822e-06, "loss": 0.4493, "step": 12327 }, { "epoch": 0.78, "grad_norm": 2.3462421567618548, "learning_rate": 1.2668124761824408e-06, "loss": 0.2449, "step": 12328 }, { "epoch": 0.78, "grad_norm": 5.261398561192962, "learning_rate": 1.266135031368641e-06, "loss": 0.2699, "step": 12329 }, { "epoch": 0.78, "grad_norm": 2.170633701689669, "learning_rate": 1.265457741479531e-06, "loss": 0.2718, "step": 12330 }, { "epoch": 0.78, "grad_norm": 2.8752811960831397, "learning_rate": 1.2647806065432106e-06, "loss": 0.2518, "step": 12331 }, { "epoch": 0.78, "grad_norm": 5.166894900555235, "learning_rate": 1.2641036265877794e-06, "loss": 0.2643, "step": 12332 }, { "epoch": 0.78, "grad_norm": 2.247543311408234, "learning_rate": 1.2634268016413242e-06, "loss": 0.2607, "step": 12333 }, { "epoch": 0.78, "grad_norm": 1.9844969605356477, "learning_rate": 1.2627501317319263e-06, "loss": 0.2614, "step": 12334 }, { "epoch": 0.78, "grad_norm": 3.2404791224035177, "learning_rate": 1.2620736168876636e-06, "loss": 0.2507, "step": 12335 }, { "epoch": 0.78, "grad_norm": 1.941104051602157, "learning_rate": 1.2613972571366074e-06, "loss": 0.2586, "step": 12336 }, { "epoch": 0.78, "grad_norm": 2.378846833590021, "learning_rate": 1.2607210525068192e-06, "loss": 0.2526, "step": 12337 }, { "epoch": 0.78, "grad_norm": 1.9412812644753084, "learning_rate": 1.260045003026355e-06, "loss": 0.2595, "step": 12338 }, { "epoch": 0.78, "grad_norm": 2.836719651860733, "learning_rate": 1.2593691087232684e-06, "loss": 0.2706, "step": 12339 }, { "epoch": 0.78, "grad_norm": 2.541883754239091, "learning_rate": 1.258693369625601e-06, "loss": 0.2536, "step": 12340 }, { "epoch": 0.78, "grad_norm": 1.9858546006136708, "learning_rate": 1.2580177857613912e-06, "loss": 0.2424, "step": 12341 }, { "epoch": 0.78, "grad_norm": 8.818343775609208, "learning_rate": 1.2573423571586686e-06, "loss": 0.2537, "step": 12342 }, { "epoch": 0.78, "grad_norm": 1.8340129739737918, "learning_rate": 1.2566670838454593e-06, "loss": 0.2549, "step": 12343 }, { "epoch": 0.78, "grad_norm": 0.5880251809977763, "learning_rate": 1.2559919658497827e-06, "loss": 0.4537, "step": 12344 }, { "epoch": 0.78, "grad_norm": 2.0419660037749505, "learning_rate": 1.2553170031996497e-06, "loss": 0.2734, "step": 12345 }, { "epoch": 0.78, "grad_norm": 2.1628444714305917, "learning_rate": 1.2546421959230642e-06, "loss": 0.2404, "step": 12346 }, { "epoch": 0.78, "grad_norm": 5.122535391561523, "learning_rate": 1.253967544048028e-06, "loss": 0.2551, "step": 12347 }, { "epoch": 0.78, "grad_norm": 4.0228905299660935, "learning_rate": 1.2532930476025322e-06, "loss": 0.2608, "step": 12348 }, { "epoch": 0.78, "grad_norm": 3.1976743069358076, "learning_rate": 1.2526187066145628e-06, "loss": 0.2584, "step": 12349 }, { "epoch": 0.78, "grad_norm": 3.140037319583642, "learning_rate": 1.2519445211120979e-06, "loss": 0.2546, "step": 12350 }, { "epoch": 0.78, "grad_norm": 1.8626642140723708, "learning_rate": 1.2512704911231116e-06, "loss": 0.2392, "step": 12351 }, { "epoch": 0.78, "grad_norm": 6.315816455665586, "learning_rate": 1.2505966166755729e-06, "loss": 0.2531, "step": 12352 }, { "epoch": 0.78, "grad_norm": 3.7528098609029947, "learning_rate": 1.2499228977974403e-06, "loss": 0.2682, "step": 12353 }, { "epoch": 0.78, "grad_norm": 1.9548536727600647, "learning_rate": 1.2492493345166662e-06, "loss": 0.2473, "step": 12354 }, { "epoch": 0.78, "grad_norm": 2.5180813166607314, "learning_rate": 1.2485759268612008e-06, "loss": 0.2503, "step": 12355 }, { "epoch": 0.78, "grad_norm": 2.4406103045390592, "learning_rate": 1.2479026748589834e-06, "loss": 0.25, "step": 12356 }, { "epoch": 0.78, "grad_norm": 4.0824957613167765, "learning_rate": 1.2472295785379468e-06, "loss": 0.2502, "step": 12357 }, { "epoch": 0.78, "grad_norm": 1.5232142161234588, "learning_rate": 1.2465566379260224e-06, "loss": 0.2542, "step": 12358 }, { "epoch": 0.78, "grad_norm": 1.9319094125273095, "learning_rate": 1.245883853051128e-06, "loss": 0.2372, "step": 12359 }, { "epoch": 0.78, "grad_norm": 1.8080425389440837, "learning_rate": 1.2452112239411823e-06, "loss": 0.2629, "step": 12360 }, { "epoch": 0.78, "grad_norm": 12.304213693016038, "learning_rate": 1.2445387506240925e-06, "loss": 0.2732, "step": 12361 }, { "epoch": 0.78, "grad_norm": 0.6106698475194454, "learning_rate": 1.243866433127759e-06, "loss": 0.4669, "step": 12362 }, { "epoch": 0.78, "grad_norm": 1.8446023964467877, "learning_rate": 1.24319427148008e-06, "loss": 0.2544, "step": 12363 }, { "epoch": 0.78, "grad_norm": 3.3624951310690876, "learning_rate": 1.242522265708942e-06, "loss": 0.2857, "step": 12364 }, { "epoch": 0.78, "grad_norm": 5.60000752409501, "learning_rate": 1.2418504158422306e-06, "loss": 0.2747, "step": 12365 }, { "epoch": 0.78, "grad_norm": 1.652813906098466, "learning_rate": 1.2411787219078208e-06, "loss": 0.2423, "step": 12366 }, { "epoch": 0.78, "grad_norm": 3.5945675052839787, "learning_rate": 1.2405071839335808e-06, "loss": 0.262, "step": 12367 }, { "epoch": 0.78, "grad_norm": 2.565938368415108, "learning_rate": 1.2398358019473766e-06, "loss": 0.2546, "step": 12368 }, { "epoch": 0.78, "grad_norm": 1.755968406847944, "learning_rate": 1.2391645759770642e-06, "loss": 0.2445, "step": 12369 }, { "epoch": 0.78, "grad_norm": 178.19725017159877, "learning_rate": 1.238493506050492e-06, "loss": 0.2667, "step": 12370 }, { "epoch": 0.78, "grad_norm": 1.518734467800095, "learning_rate": 1.2378225921955067e-06, "loss": 0.2489, "step": 12371 }, { "epoch": 0.78, "grad_norm": 3.685700762759292, "learning_rate": 1.237151834439943e-06, "loss": 0.2652, "step": 12372 }, { "epoch": 0.78, "grad_norm": 1.6633170610302996, "learning_rate": 1.2364812328116348e-06, "loss": 0.2425, "step": 12373 }, { "epoch": 0.78, "grad_norm": 1.9514821613397824, "learning_rate": 1.235810787338405e-06, "loss": 0.2413, "step": 12374 }, { "epoch": 0.78, "grad_norm": 4.374962238376874, "learning_rate": 1.2351404980480702e-06, "loss": 0.2653, "step": 12375 }, { "epoch": 0.78, "grad_norm": 0.5858438771644625, "learning_rate": 1.2344703649684442e-06, "loss": 0.4414, "step": 12376 }, { "epoch": 0.78, "grad_norm": 2.555637482244403, "learning_rate": 1.233800388127332e-06, "loss": 0.2658, "step": 12377 }, { "epoch": 0.78, "grad_norm": 1.9046070394648476, "learning_rate": 1.233130567552529e-06, "loss": 0.2615, "step": 12378 }, { "epoch": 0.78, "grad_norm": 2.2725242366830396, "learning_rate": 1.2324609032718298e-06, "loss": 0.2602, "step": 12379 }, { "epoch": 0.78, "grad_norm": 1.9590298443285727, "learning_rate": 1.2317913953130213e-06, "loss": 0.272, "step": 12380 }, { "epoch": 0.78, "grad_norm": 1.9062195970508713, "learning_rate": 1.2311220437038801e-06, "loss": 0.2589, "step": 12381 }, { "epoch": 0.78, "grad_norm": 1.7911703758876762, "learning_rate": 1.23045284847218e-06, "loss": 0.2562, "step": 12382 }, { "epoch": 0.78, "grad_norm": 2.5286150250647546, "learning_rate": 1.2297838096456855e-06, "loss": 0.2563, "step": 12383 }, { "epoch": 0.78, "grad_norm": 1.4972484247860378, "learning_rate": 1.2291149272521586e-06, "loss": 0.2602, "step": 12384 }, { "epoch": 0.78, "grad_norm": 2.0914695433560055, "learning_rate": 1.2284462013193515e-06, "loss": 0.2404, "step": 12385 }, { "epoch": 0.78, "grad_norm": 1.8861475027800798, "learning_rate": 1.227777631875009e-06, "loss": 0.2666, "step": 12386 }, { "epoch": 0.78, "grad_norm": 2.5058317852938243, "learning_rate": 1.2271092189468726e-06, "loss": 0.2568, "step": 12387 }, { "epoch": 0.78, "grad_norm": 2.8664571856773016, "learning_rate": 1.2264409625626778e-06, "loss": 0.245, "step": 12388 }, { "epoch": 0.78, "grad_norm": 2.626670678213925, "learning_rate": 1.2257728627501497e-06, "loss": 0.2518, "step": 12389 }, { "epoch": 0.78, "grad_norm": 1.7524761963452473, "learning_rate": 1.2251049195370097e-06, "loss": 0.2582, "step": 12390 }, { "epoch": 0.78, "grad_norm": 1.7858765545749833, "learning_rate": 1.2244371329509697e-06, "loss": 0.2594, "step": 12391 }, { "epoch": 0.78, "grad_norm": 1.6927565945282785, "learning_rate": 1.223769503019741e-06, "loss": 0.2558, "step": 12392 }, { "epoch": 0.78, "grad_norm": 4.001672851441979, "learning_rate": 1.2231020297710228e-06, "loss": 0.2422, "step": 12393 }, { "epoch": 0.78, "grad_norm": 3.661360873683687, "learning_rate": 1.2224347132325082e-06, "loss": 0.2527, "step": 12394 }, { "epoch": 0.78, "grad_norm": 2.009360135903299, "learning_rate": 1.2217675534318873e-06, "loss": 0.243, "step": 12395 }, { "epoch": 0.78, "grad_norm": 2.2687522923484273, "learning_rate": 1.2211005503968431e-06, "loss": 0.2624, "step": 12396 }, { "epoch": 0.78, "grad_norm": 2.4004277469686612, "learning_rate": 1.220433704155049e-06, "loss": 0.2477, "step": 12397 }, { "epoch": 0.78, "grad_norm": 2.0374917725442665, "learning_rate": 1.219767014734174e-06, "loss": 0.2686, "step": 12398 }, { "epoch": 0.78, "grad_norm": 2.6373522856638223, "learning_rate": 1.2191004821618785e-06, "loss": 0.2533, "step": 12399 }, { "epoch": 0.78, "grad_norm": 2.590581844065551, "learning_rate": 1.2184341064658206e-06, "loss": 0.2631, "step": 12400 }, { "epoch": 0.78, "grad_norm": 2.877908650692595, "learning_rate": 1.2177678876736488e-06, "loss": 0.254, "step": 12401 }, { "epoch": 0.78, "grad_norm": 7.574956335709526, "learning_rate": 1.2171018258130042e-06, "loss": 0.2649, "step": 12402 }, { "epoch": 0.78, "grad_norm": 4.123557344062702, "learning_rate": 1.2164359209115235e-06, "loss": 0.244, "step": 12403 }, { "epoch": 0.78, "grad_norm": 1.2841673830322133, "learning_rate": 1.2157701729968384e-06, "loss": 0.2629, "step": 12404 }, { "epoch": 0.78, "grad_norm": 1.658671307641624, "learning_rate": 1.2151045820965708e-06, "loss": 0.2447, "step": 12405 }, { "epoch": 0.78, "grad_norm": 3.439038942895115, "learning_rate": 1.2144391482383361e-06, "loss": 0.2476, "step": 12406 }, { "epoch": 0.78, "grad_norm": 4.109420604086303, "learning_rate": 1.2137738714497437e-06, "loss": 0.2548, "step": 12407 }, { "epoch": 0.78, "grad_norm": 1.7114240748355343, "learning_rate": 1.2131087517584006e-06, "loss": 0.2651, "step": 12408 }, { "epoch": 0.78, "grad_norm": 1.9308033531199176, "learning_rate": 1.2124437891918995e-06, "loss": 0.2484, "step": 12409 }, { "epoch": 0.78, "grad_norm": 1.7548605238780082, "learning_rate": 1.2117789837778349e-06, "loss": 0.2356, "step": 12410 }, { "epoch": 0.78, "grad_norm": 2.974220225322346, "learning_rate": 1.2111143355437877e-06, "loss": 0.234, "step": 12411 }, { "epoch": 0.78, "grad_norm": 1.752797791187074, "learning_rate": 1.2104498445173373e-06, "loss": 0.2339, "step": 12412 }, { "epoch": 0.78, "grad_norm": 0.6263168882674981, "learning_rate": 1.2097855107260542e-06, "loss": 0.4699, "step": 12413 }, { "epoch": 0.78, "grad_norm": 2.156443661231618, "learning_rate": 1.209121334197501e-06, "loss": 0.247, "step": 12414 }, { "epoch": 0.78, "grad_norm": 2.4142469559438777, "learning_rate": 1.2084573149592382e-06, "loss": 0.2503, "step": 12415 }, { "epoch": 0.78, "grad_norm": 1.647771114225083, "learning_rate": 1.2077934530388163e-06, "loss": 0.2396, "step": 12416 }, { "epoch": 0.78, "grad_norm": 6.529543479823659, "learning_rate": 1.2071297484637785e-06, "loss": 0.2468, "step": 12417 }, { "epoch": 0.78, "grad_norm": 1.6622057967595985, "learning_rate": 1.2064662012616651e-06, "loss": 0.2584, "step": 12418 }, { "epoch": 0.78, "grad_norm": 2.868284889800844, "learning_rate": 1.2058028114600061e-06, "loss": 0.2506, "step": 12419 }, { "epoch": 0.78, "grad_norm": 2.3575051245893266, "learning_rate": 1.205139579086329e-06, "loss": 0.2731, "step": 12420 }, { "epoch": 0.78, "grad_norm": 1.8052124583456362, "learning_rate": 1.204476504168151e-06, "loss": 0.244, "step": 12421 }, { "epoch": 0.78, "grad_norm": 1.7006314857029459, "learning_rate": 1.203813586732983e-06, "loss": 0.2552, "step": 12422 }, { "epoch": 0.78, "grad_norm": 2.815932145949014, "learning_rate": 1.2031508268083342e-06, "loss": 0.2782, "step": 12423 }, { "epoch": 0.78, "grad_norm": 3.5145252146341597, "learning_rate": 1.2024882244216996e-06, "loss": 0.2438, "step": 12424 }, { "epoch": 0.78, "grad_norm": 2.940945168592251, "learning_rate": 1.201825779600575e-06, "loss": 0.2598, "step": 12425 }, { "epoch": 0.78, "grad_norm": 1.6301880384540195, "learning_rate": 1.2011634923724452e-06, "loss": 0.2485, "step": 12426 }, { "epoch": 0.78, "grad_norm": 1.843311714911491, "learning_rate": 1.2005013627647889e-06, "loss": 0.2532, "step": 12427 }, { "epoch": 0.78, "grad_norm": 1.7614695281699693, "learning_rate": 1.1998393908050803e-06, "loss": 0.2537, "step": 12428 }, { "epoch": 0.78, "grad_norm": 2.1405152867619286, "learning_rate": 1.1991775765207854e-06, "loss": 0.2679, "step": 12429 }, { "epoch": 0.78, "grad_norm": 0.5735871136892312, "learning_rate": 1.1985159199393626e-06, "loss": 0.4516, "step": 12430 }, { "epoch": 0.78, "grad_norm": 2.7317408857345176, "learning_rate": 1.1978544210882675e-06, "loss": 0.2497, "step": 12431 }, { "epoch": 0.78, "grad_norm": 2.075251173038843, "learning_rate": 1.197193079994945e-06, "loss": 0.267, "step": 12432 }, { "epoch": 0.78, "grad_norm": 1.828177396460023, "learning_rate": 1.1965318966868372e-06, "loss": 0.2445, "step": 12433 }, { "epoch": 0.78, "grad_norm": 1.91106666692495, "learning_rate": 1.1958708711913769e-06, "loss": 0.2403, "step": 12434 }, { "epoch": 0.78, "grad_norm": 1.7164893516252615, "learning_rate": 1.1952100035359893e-06, "loss": 0.261, "step": 12435 }, { "epoch": 0.78, "grad_norm": 1.9739400176144315, "learning_rate": 1.1945492937480984e-06, "loss": 0.2515, "step": 12436 }, { "epoch": 0.78, "grad_norm": 1.6913900386011396, "learning_rate": 1.1938887418551164e-06, "loss": 0.254, "step": 12437 }, { "epoch": 0.78, "grad_norm": 1.442235477648228, "learning_rate": 1.1932283478844497e-06, "loss": 0.2649, "step": 12438 }, { "epoch": 0.78, "grad_norm": 1.726568134779693, "learning_rate": 1.1925681118635008e-06, "loss": 0.2729, "step": 12439 }, { "epoch": 0.78, "grad_norm": 0.6070856432939178, "learning_rate": 1.1919080338196642e-06, "loss": 0.4923, "step": 12440 }, { "epoch": 0.78, "grad_norm": 3.9357918761025683, "learning_rate": 1.1912481137803277e-06, "loss": 0.2582, "step": 12441 }, { "epoch": 0.78, "grad_norm": 4.239781688494264, "learning_rate": 1.1905883517728723e-06, "loss": 0.2741, "step": 12442 }, { "epoch": 0.78, "grad_norm": 12.548832100515837, "learning_rate": 1.1899287478246707e-06, "loss": 0.2451, "step": 12443 }, { "epoch": 0.78, "grad_norm": 1.9318255389905616, "learning_rate": 1.1892693019630946e-06, "loss": 0.2755, "step": 12444 }, { "epoch": 0.78, "grad_norm": 2.309551825458991, "learning_rate": 1.1886100142155032e-06, "loss": 0.2632, "step": 12445 }, { "epoch": 0.78, "grad_norm": 2.5611418476221632, "learning_rate": 1.1879508846092513e-06, "loss": 0.2738, "step": 12446 }, { "epoch": 0.78, "grad_norm": 3.5195417147947374, "learning_rate": 1.1872919131716875e-06, "loss": 0.2497, "step": 12447 }, { "epoch": 0.78, "grad_norm": 1.8920029144962827, "learning_rate": 1.1866330999301562e-06, "loss": 0.2658, "step": 12448 }, { "epoch": 0.78, "grad_norm": 3.4493648452887045, "learning_rate": 1.1859744449119908e-06, "loss": 0.2571, "step": 12449 }, { "epoch": 0.78, "grad_norm": 1.5764234114088442, "learning_rate": 1.1853159481445203e-06, "loss": 0.2499, "step": 12450 }, { "epoch": 0.78, "grad_norm": 1.9210018274176985, "learning_rate": 1.184657609655065e-06, "loss": 0.2657, "step": 12451 }, { "epoch": 0.78, "grad_norm": 2.6734108429947123, "learning_rate": 1.183999429470944e-06, "loss": 0.2585, "step": 12452 }, { "epoch": 0.78, "grad_norm": 2.85496893988578, "learning_rate": 1.1833414076194643e-06, "loss": 0.2543, "step": 12453 }, { "epoch": 0.78, "grad_norm": 1.874318287772399, "learning_rate": 1.1826835441279277e-06, "loss": 0.2703, "step": 12454 }, { "epoch": 0.78, "grad_norm": 2.020251792916572, "learning_rate": 1.182025839023631e-06, "loss": 0.2456, "step": 12455 }, { "epoch": 0.78, "grad_norm": 3.054236271080544, "learning_rate": 1.1813682923338654e-06, "loss": 0.2484, "step": 12456 }, { "epoch": 0.78, "grad_norm": 2.5462757259698257, "learning_rate": 1.1807109040859115e-06, "loss": 0.256, "step": 12457 }, { "epoch": 0.78, "grad_norm": 2.7216708233070572, "learning_rate": 1.1800536743070467e-06, "loss": 0.2622, "step": 12458 }, { "epoch": 0.78, "grad_norm": 3.7179250263832087, "learning_rate": 1.1793966030245379e-06, "loss": 0.2536, "step": 12459 }, { "epoch": 0.78, "grad_norm": 0.6603924928332737, "learning_rate": 1.1787396902656518e-06, "loss": 0.4963, "step": 12460 }, { "epoch": 0.78, "grad_norm": 2.5371285628577036, "learning_rate": 1.1780829360576418e-06, "loss": 0.2527, "step": 12461 }, { "epoch": 0.78, "grad_norm": 2.1436323176442866, "learning_rate": 1.1774263404277607e-06, "loss": 0.2524, "step": 12462 }, { "epoch": 0.78, "grad_norm": 2.620969723792433, "learning_rate": 1.1767699034032492e-06, "loss": 0.2527, "step": 12463 }, { "epoch": 0.78, "grad_norm": 4.06020484986667, "learning_rate": 1.1761136250113465e-06, "loss": 0.2586, "step": 12464 }, { "epoch": 0.78, "grad_norm": 10.324309916458656, "learning_rate": 1.1754575052792815e-06, "loss": 0.2812, "step": 12465 }, { "epoch": 0.78, "grad_norm": 2.1097163696447896, "learning_rate": 1.1748015442342781e-06, "loss": 0.273, "step": 12466 }, { "epoch": 0.78, "grad_norm": 0.5635493235170816, "learning_rate": 1.1741457419035507e-06, "loss": 0.4458, "step": 12467 }, { "epoch": 0.78, "grad_norm": 9.33909884006406, "learning_rate": 1.1734900983143137e-06, "loss": 0.2516, "step": 12468 }, { "epoch": 0.78, "grad_norm": 2.627532320798, "learning_rate": 1.1728346134937684e-06, "loss": 0.2681, "step": 12469 }, { "epoch": 0.78, "grad_norm": 2.4333070540487776, "learning_rate": 1.1721792874691134e-06, "loss": 0.2567, "step": 12470 }, { "epoch": 0.78, "grad_norm": 1.9691859001338756, "learning_rate": 1.1715241202675376e-06, "loss": 0.269, "step": 12471 }, { "epoch": 0.78, "grad_norm": 3.214259415331407, "learning_rate": 1.170869111916228e-06, "loss": 0.247, "step": 12472 }, { "epoch": 0.78, "grad_norm": 2.141149431017653, "learning_rate": 1.1702142624423597e-06, "loss": 0.2471, "step": 12473 }, { "epoch": 0.78, "grad_norm": 1.4431629562547268, "learning_rate": 1.169559571873104e-06, "loss": 0.2388, "step": 12474 }, { "epoch": 0.78, "grad_norm": 1.9320162319467629, "learning_rate": 1.1689050402356245e-06, "loss": 0.2676, "step": 12475 }, { "epoch": 0.78, "grad_norm": 1.9550619027480893, "learning_rate": 1.1682506675570794e-06, "loss": 0.2668, "step": 12476 }, { "epoch": 0.78, "grad_norm": 3.423839090169735, "learning_rate": 1.1675964538646212e-06, "loss": 0.2612, "step": 12477 }, { "epoch": 0.78, "grad_norm": 2.703081067910517, "learning_rate": 1.1669423991853934e-06, "loss": 0.2656, "step": 12478 }, { "epoch": 0.78, "grad_norm": 2.809074591000523, "learning_rate": 1.1662885035465326e-06, "loss": 0.2561, "step": 12479 }, { "epoch": 0.78, "grad_norm": 6.289841843814653, "learning_rate": 1.1656347669751721e-06, "loss": 0.2365, "step": 12480 }, { "epoch": 0.78, "grad_norm": 1.7508959446791594, "learning_rate": 1.1649811894984365e-06, "loss": 0.2742, "step": 12481 }, { "epoch": 0.78, "grad_norm": 2.768419559671135, "learning_rate": 1.1643277711434426e-06, "loss": 0.2486, "step": 12482 }, { "epoch": 0.79, "grad_norm": 3.1725298500932095, "learning_rate": 1.1636745119373006e-06, "loss": 0.2634, "step": 12483 }, { "epoch": 0.79, "grad_norm": 2.0440031235292655, "learning_rate": 1.1630214119071176e-06, "loss": 0.2552, "step": 12484 }, { "epoch": 0.79, "grad_norm": 2.165666677740684, "learning_rate": 1.1623684710799931e-06, "loss": 0.2787, "step": 12485 }, { "epoch": 0.79, "grad_norm": 2.634598689959782, "learning_rate": 1.1617156894830168e-06, "loss": 0.2526, "step": 12486 }, { "epoch": 0.79, "grad_norm": 2.0799976191475413, "learning_rate": 1.1610630671432726e-06, "loss": 0.2522, "step": 12487 }, { "epoch": 0.79, "grad_norm": 6.526831303344769, "learning_rate": 1.160410604087842e-06, "loss": 0.275, "step": 12488 }, { "epoch": 0.79, "grad_norm": 1.7336969226906678, "learning_rate": 1.1597583003437957e-06, "loss": 0.2441, "step": 12489 }, { "epoch": 0.79, "grad_norm": 1.3347651756990528, "learning_rate": 1.1591061559381983e-06, "loss": 0.2344, "step": 12490 }, { "epoch": 0.79, "grad_norm": 1.5842669488335628, "learning_rate": 1.1584541708981056e-06, "loss": 0.2454, "step": 12491 }, { "epoch": 0.79, "grad_norm": 2.0914782853210023, "learning_rate": 1.157802345250576e-06, "loss": 0.241, "step": 12492 }, { "epoch": 0.79, "grad_norm": 0.6067020423351305, "learning_rate": 1.1571506790226512e-06, "loss": 0.5115, "step": 12493 }, { "epoch": 0.79, "grad_norm": 4.577052080039558, "learning_rate": 1.1564991722413703e-06, "loss": 0.2581, "step": 12494 }, { "epoch": 0.79, "grad_norm": 2.141169498888766, "learning_rate": 1.1558478249337645e-06, "loss": 0.2558, "step": 12495 }, { "epoch": 0.79, "grad_norm": 1.8072534450279496, "learning_rate": 1.155196637126862e-06, "loss": 0.2558, "step": 12496 }, { "epoch": 0.79, "grad_norm": 2.4883261753788135, "learning_rate": 1.1545456088476798e-06, "loss": 0.2548, "step": 12497 }, { "epoch": 0.79, "grad_norm": 2.1176064660172833, "learning_rate": 1.1538947401232292e-06, "loss": 0.2452, "step": 12498 }, { "epoch": 0.79, "grad_norm": 2.44912751435392, "learning_rate": 1.1532440309805172e-06, "loss": 0.2758, "step": 12499 }, { "epoch": 0.79, "grad_norm": 2.273582491757086, "learning_rate": 1.1525934814465445e-06, "loss": 0.2393, "step": 12500 }, { "epoch": 0.79, "grad_norm": 1.9041350921919935, "learning_rate": 1.151943091548302e-06, "loss": 0.2604, "step": 12501 }, { "epoch": 0.79, "grad_norm": 2.287163244391985, "learning_rate": 1.1512928613127755e-06, "loss": 0.2495, "step": 12502 }, { "epoch": 0.79, "grad_norm": 3.4595723358799626, "learning_rate": 1.1506427907669433e-06, "loss": 0.256, "step": 12503 }, { "epoch": 0.79, "grad_norm": 4.49651984953011, "learning_rate": 1.1499928799377797e-06, "loss": 0.2595, "step": 12504 }, { "epoch": 0.79, "grad_norm": 2.125624539039244, "learning_rate": 1.1493431288522506e-06, "loss": 0.2559, "step": 12505 }, { "epoch": 0.79, "grad_norm": 1.8762300636033735, "learning_rate": 1.1486935375373127e-06, "loss": 0.2578, "step": 12506 }, { "epoch": 0.79, "grad_norm": 1.5016800503813323, "learning_rate": 1.1480441060199205e-06, "loss": 0.2544, "step": 12507 }, { "epoch": 0.79, "grad_norm": 6.838339903371633, "learning_rate": 1.147394834327022e-06, "loss": 0.2664, "step": 12508 }, { "epoch": 0.79, "grad_norm": 3.881716108332491, "learning_rate": 1.1467457224855545e-06, "loss": 0.2537, "step": 12509 }, { "epoch": 0.79, "grad_norm": 1.5807947566489926, "learning_rate": 1.1460967705224513e-06, "loss": 0.2605, "step": 12510 }, { "epoch": 0.79, "grad_norm": 4.26284734727908, "learning_rate": 1.1454479784646366e-06, "loss": 0.2579, "step": 12511 }, { "epoch": 0.79, "grad_norm": 5.8058990245876085, "learning_rate": 1.1447993463390338e-06, "loss": 0.2557, "step": 12512 }, { "epoch": 0.79, "grad_norm": 2.6603955876008762, "learning_rate": 1.1441508741725532e-06, "loss": 0.2607, "step": 12513 }, { "epoch": 0.79, "grad_norm": 1.8662390847806956, "learning_rate": 1.1435025619921003e-06, "loss": 0.27, "step": 12514 }, { "epoch": 0.79, "grad_norm": 1.8993136860139344, "learning_rate": 1.1428544098245764e-06, "loss": 0.2605, "step": 12515 }, { "epoch": 0.79, "grad_norm": 1.6618781794788233, "learning_rate": 1.1422064176968751e-06, "loss": 0.2393, "step": 12516 }, { "epoch": 0.79, "grad_norm": 1.6535574441274803, "learning_rate": 1.1415585856358818e-06, "loss": 0.2459, "step": 12517 }, { "epoch": 0.79, "grad_norm": 2.1107706388054064, "learning_rate": 1.140910913668476e-06, "loss": 0.254, "step": 12518 }, { "epoch": 0.79, "grad_norm": 2.0057155683324996, "learning_rate": 1.1402634018215297e-06, "loss": 0.2567, "step": 12519 }, { "epoch": 0.79, "grad_norm": 1.8805554651096124, "learning_rate": 1.1396160501219122e-06, "loss": 0.256, "step": 12520 }, { "epoch": 0.79, "grad_norm": 1.5254401353977964, "learning_rate": 1.1389688585964797e-06, "loss": 0.2589, "step": 12521 }, { "epoch": 0.79, "grad_norm": 2.7727760794380036, "learning_rate": 1.1383218272720886e-06, "loss": 0.2486, "step": 12522 }, { "epoch": 0.79, "grad_norm": 4.226380985944135, "learning_rate": 1.1376749561755829e-06, "loss": 0.251, "step": 12523 }, { "epoch": 0.79, "grad_norm": 1.9691395237304765, "learning_rate": 1.137028245333805e-06, "loss": 0.2513, "step": 12524 }, { "epoch": 0.79, "grad_norm": 3.7968276456284897, "learning_rate": 1.1363816947735866e-06, "loss": 0.2797, "step": 12525 }, { "epoch": 0.79, "grad_norm": 1.7319762883552918, "learning_rate": 1.135735304521754e-06, "loss": 0.2488, "step": 12526 }, { "epoch": 0.79, "grad_norm": 2.3259678274218527, "learning_rate": 1.1350890746051257e-06, "loss": 0.2511, "step": 12527 }, { "epoch": 0.79, "grad_norm": 1.774371210727824, "learning_rate": 1.1344430050505174e-06, "loss": 0.2683, "step": 12528 }, { "epoch": 0.79, "grad_norm": 3.4778816333882983, "learning_rate": 1.1337970958847354e-06, "loss": 0.2398, "step": 12529 }, { "epoch": 0.79, "grad_norm": 2.35855918601479, "learning_rate": 1.1331513471345796e-06, "loss": 0.2563, "step": 12530 }, { "epoch": 0.79, "grad_norm": 2.002575286983129, "learning_rate": 1.1325057588268406e-06, "loss": 0.2638, "step": 12531 }, { "epoch": 0.79, "grad_norm": 1.794344514719857, "learning_rate": 1.1318603309883092e-06, "loss": 0.2573, "step": 12532 }, { "epoch": 0.79, "grad_norm": 3.0473068150855536, "learning_rate": 1.131215063645763e-06, "loss": 0.2479, "step": 12533 }, { "epoch": 0.79, "grad_norm": 2.1101361089676454, "learning_rate": 1.1305699568259754e-06, "loss": 0.2535, "step": 12534 }, { "epoch": 0.79, "grad_norm": 2.6986855224429824, "learning_rate": 1.129925010555712e-06, "loss": 0.2493, "step": 12535 }, { "epoch": 0.79, "grad_norm": 2.41802514819793, "learning_rate": 1.129280224861734e-06, "loss": 0.2528, "step": 12536 }, { "epoch": 0.79, "grad_norm": 2.1709270847637714, "learning_rate": 1.128635599770796e-06, "loss": 0.2532, "step": 12537 }, { "epoch": 0.79, "grad_norm": 1.6679563241448176, "learning_rate": 1.1279911353096428e-06, "loss": 0.2451, "step": 12538 }, { "epoch": 0.79, "grad_norm": 1.737360594545524, "learning_rate": 1.127346831505014e-06, "loss": 0.2513, "step": 12539 }, { "epoch": 0.79, "grad_norm": 2.782237902939371, "learning_rate": 1.126702688383645e-06, "loss": 0.2483, "step": 12540 }, { "epoch": 0.79, "grad_norm": 2.1581764216894004, "learning_rate": 1.1260587059722612e-06, "loss": 0.2604, "step": 12541 }, { "epoch": 0.79, "grad_norm": 1.9506341887736045, "learning_rate": 1.1254148842975826e-06, "loss": 0.2657, "step": 12542 }, { "epoch": 0.79, "grad_norm": 2.031156869127606, "learning_rate": 1.1247712233863196e-06, "loss": 0.243, "step": 12543 }, { "epoch": 0.79, "grad_norm": 1.8372615209296899, "learning_rate": 1.1241277232651849e-06, "loss": 0.2634, "step": 12544 }, { "epoch": 0.79, "grad_norm": 4.997029139268055, "learning_rate": 1.123484383960875e-06, "loss": 0.2414, "step": 12545 }, { "epoch": 0.79, "grad_norm": 3.031356522291658, "learning_rate": 1.1228412055000831e-06, "loss": 0.2791, "step": 12546 }, { "epoch": 0.79, "grad_norm": 2.188305805998532, "learning_rate": 1.1221981879094952e-06, "loss": 0.2515, "step": 12547 }, { "epoch": 0.79, "grad_norm": 5.713710686893574, "learning_rate": 1.121555331215794e-06, "loss": 0.2685, "step": 12548 }, { "epoch": 0.79, "grad_norm": 0.6204778214657698, "learning_rate": 1.12091263544565e-06, "loss": 0.4421, "step": 12549 }, { "epoch": 0.79, "grad_norm": 1.6060995304773358, "learning_rate": 1.1202701006257317e-06, "loss": 0.248, "step": 12550 }, { "epoch": 0.79, "grad_norm": 4.884199085345835, "learning_rate": 1.119627726782695e-06, "loss": 0.2599, "step": 12551 }, { "epoch": 0.79, "grad_norm": 2.031984087381864, "learning_rate": 1.1189855139431988e-06, "loss": 0.2416, "step": 12552 }, { "epoch": 0.79, "grad_norm": 3.219641225185397, "learning_rate": 1.1183434621338874e-06, "loss": 0.267, "step": 12553 }, { "epoch": 0.79, "grad_norm": 2.19017248474657, "learning_rate": 1.1177015713814005e-06, "loss": 0.2626, "step": 12554 }, { "epoch": 0.79, "grad_norm": 2.9210577488538396, "learning_rate": 1.1170598417123695e-06, "loss": 0.2715, "step": 12555 }, { "epoch": 0.79, "grad_norm": 3.2658083660306367, "learning_rate": 1.116418273153424e-06, "loss": 0.239, "step": 12556 }, { "epoch": 0.79, "grad_norm": 1.335857299370904, "learning_rate": 1.1157768657311824e-06, "loss": 0.2561, "step": 12557 }, { "epoch": 0.79, "grad_norm": 1.8964327199140771, "learning_rate": 1.1151356194722563e-06, "loss": 0.2807, "step": 12558 }, { "epoch": 0.79, "grad_norm": 2.270531759566121, "learning_rate": 1.114494534403253e-06, "loss": 0.2648, "step": 12559 }, { "epoch": 0.79, "grad_norm": 1.6292387873161762, "learning_rate": 1.1138536105507752e-06, "loss": 0.2628, "step": 12560 }, { "epoch": 0.79, "grad_norm": 2.33056391057903, "learning_rate": 1.1132128479414133e-06, "loss": 0.2614, "step": 12561 }, { "epoch": 0.79, "grad_norm": 4.3726849490375415, "learning_rate": 1.1125722466017547e-06, "loss": 0.2387, "step": 12562 }, { "epoch": 0.79, "grad_norm": 5.069500013054947, "learning_rate": 1.1119318065583763e-06, "loss": 0.256, "step": 12563 }, { "epoch": 0.79, "grad_norm": 2.369525702333648, "learning_rate": 1.111291527837855e-06, "loss": 0.2503, "step": 12564 }, { "epoch": 0.79, "grad_norm": 1.6172731811989232, "learning_rate": 1.110651410466755e-06, "loss": 0.2294, "step": 12565 }, { "epoch": 0.79, "grad_norm": 1.9636983040958742, "learning_rate": 1.1100114544716351e-06, "loss": 0.2402, "step": 12566 }, { "epoch": 0.79, "grad_norm": 2.0941481280855103, "learning_rate": 1.1093716598790494e-06, "loss": 0.2619, "step": 12567 }, { "epoch": 0.79, "grad_norm": 2.8640338042927724, "learning_rate": 1.1087320267155448e-06, "loss": 0.2475, "step": 12568 }, { "epoch": 0.79, "grad_norm": 1.731232259530004, "learning_rate": 1.108092555007661e-06, "loss": 0.2579, "step": 12569 }, { "epoch": 0.79, "grad_norm": 2.035099166033582, "learning_rate": 1.1074532447819291e-06, "loss": 0.2555, "step": 12570 }, { "epoch": 0.79, "grad_norm": 2.88827441752297, "learning_rate": 1.1068140960648753e-06, "loss": 0.2681, "step": 12571 }, { "epoch": 0.79, "grad_norm": 2.257072743708595, "learning_rate": 1.1061751088830208e-06, "loss": 0.2593, "step": 12572 }, { "epoch": 0.79, "grad_norm": 1.754039672030549, "learning_rate": 1.1055362832628757e-06, "loss": 0.2535, "step": 12573 }, { "epoch": 0.79, "grad_norm": 1.4491884704594167, "learning_rate": 1.1048976192309496e-06, "loss": 0.2379, "step": 12574 }, { "epoch": 0.79, "grad_norm": 2.9714295966204913, "learning_rate": 1.1042591168137379e-06, "loss": 0.2552, "step": 12575 }, { "epoch": 0.79, "grad_norm": 4.122563903119853, "learning_rate": 1.103620776037736e-06, "loss": 0.2591, "step": 12576 }, { "epoch": 0.79, "grad_norm": 1.7153754520571782, "learning_rate": 1.1029825969294294e-06, "loss": 0.2519, "step": 12577 }, { "epoch": 0.79, "grad_norm": 2.915400174354112, "learning_rate": 1.102344579515297e-06, "loss": 0.2725, "step": 12578 }, { "epoch": 0.79, "grad_norm": 1.9739363400590335, "learning_rate": 1.1017067238218093e-06, "loss": 0.2466, "step": 12579 }, { "epoch": 0.79, "grad_norm": 2.3522173238319866, "learning_rate": 1.1010690298754352e-06, "loss": 0.2969, "step": 12580 }, { "epoch": 0.79, "grad_norm": 1.8681438226119749, "learning_rate": 1.1004314977026304e-06, "loss": 0.256, "step": 12581 }, { "epoch": 0.79, "grad_norm": 2.1370796301878596, "learning_rate": 1.0997941273298512e-06, "loss": 0.2456, "step": 12582 }, { "epoch": 0.79, "grad_norm": 1.5404171831350435, "learning_rate": 1.09915691878354e-06, "loss": 0.2436, "step": 12583 }, { "epoch": 0.79, "grad_norm": 2.030863537508889, "learning_rate": 1.0985198720901375e-06, "loss": 0.2489, "step": 12584 }, { "epoch": 0.79, "grad_norm": 2.387414855463944, "learning_rate": 1.097882987276076e-06, "loss": 0.2555, "step": 12585 }, { "epoch": 0.79, "grad_norm": 1.8833909598958247, "learning_rate": 1.09724626436778e-06, "loss": 0.2542, "step": 12586 }, { "epoch": 0.79, "grad_norm": 4.31885258932891, "learning_rate": 1.0966097033916674e-06, "loss": 0.2561, "step": 12587 }, { "epoch": 0.79, "grad_norm": 0.6444723961562331, "learning_rate": 1.095973304374151e-06, "loss": 0.456, "step": 12588 }, { "epoch": 0.79, "grad_norm": 2.0304281248864497, "learning_rate": 1.095337067341638e-06, "loss": 0.2607, "step": 12589 }, { "epoch": 0.79, "grad_norm": 2.4028968508352064, "learning_rate": 1.094700992320526e-06, "loss": 0.2495, "step": 12590 }, { "epoch": 0.79, "grad_norm": 0.6004150796113565, "learning_rate": 1.0940650793372048e-06, "loss": 0.4702, "step": 12591 }, { "epoch": 0.79, "grad_norm": 1.704736975204606, "learning_rate": 1.0934293284180625e-06, "loss": 0.2513, "step": 12592 }, { "epoch": 0.79, "grad_norm": 2.7708386570462173, "learning_rate": 1.092793739589476e-06, "loss": 0.2751, "step": 12593 }, { "epoch": 0.79, "grad_norm": 1.935491697614958, "learning_rate": 1.0921583128778174e-06, "loss": 0.256, "step": 12594 }, { "epoch": 0.79, "grad_norm": 2.163138681186147, "learning_rate": 1.0915230483094502e-06, "loss": 0.245, "step": 12595 }, { "epoch": 0.79, "grad_norm": 2.0448580586558363, "learning_rate": 1.090887945910734e-06, "loss": 0.2569, "step": 12596 }, { "epoch": 0.79, "grad_norm": 1.8075072237919652, "learning_rate": 1.0902530057080218e-06, "loss": 0.2554, "step": 12597 }, { "epoch": 0.79, "grad_norm": 1.3769456760994443, "learning_rate": 1.0896182277276568e-06, "loss": 0.2421, "step": 12598 }, { "epoch": 0.79, "grad_norm": 8.635367036395204, "learning_rate": 1.0889836119959757e-06, "loss": 0.2763, "step": 12599 }, { "epoch": 0.79, "grad_norm": 2.465053926785206, "learning_rate": 1.0883491585393125e-06, "loss": 0.2391, "step": 12600 }, { "epoch": 0.79, "grad_norm": 1.4718448703491538, "learning_rate": 1.0877148673839905e-06, "loss": 0.2541, "step": 12601 }, { "epoch": 0.79, "grad_norm": 2.782588444553286, "learning_rate": 1.0870807385563282e-06, "loss": 0.2515, "step": 12602 }, { "epoch": 0.79, "grad_norm": 2.755272247053251, "learning_rate": 1.0864467720826343e-06, "loss": 0.2696, "step": 12603 }, { "epoch": 0.79, "grad_norm": 0.60891908528279, "learning_rate": 1.0858129679892148e-06, "loss": 0.4695, "step": 12604 }, { "epoch": 0.79, "grad_norm": 5.766288661741712, "learning_rate": 1.0851793263023696e-06, "loss": 0.2431, "step": 12605 }, { "epoch": 0.79, "grad_norm": 8.057605425557707, "learning_rate": 1.0845458470483877e-06, "loss": 0.268, "step": 12606 }, { "epoch": 0.79, "grad_norm": 1.766783735190372, "learning_rate": 1.083912530253552e-06, "loss": 0.2559, "step": 12607 }, { "epoch": 0.79, "grad_norm": 1.691451251144815, "learning_rate": 1.0832793759441418e-06, "loss": 0.2425, "step": 12608 }, { "epoch": 0.79, "grad_norm": 5.444297540711089, "learning_rate": 1.082646384146428e-06, "loss": 0.2727, "step": 12609 }, { "epoch": 0.79, "grad_norm": 1.9744189851229572, "learning_rate": 1.0820135548866718e-06, "loss": 0.2696, "step": 12610 }, { "epoch": 0.79, "grad_norm": 1.9574124572550855, "learning_rate": 1.0813808881911326e-06, "loss": 0.2555, "step": 12611 }, { "epoch": 0.79, "grad_norm": 1.865698936407434, "learning_rate": 1.0807483840860616e-06, "loss": 0.2522, "step": 12612 }, { "epoch": 0.79, "grad_norm": 5.47363020299997, "learning_rate": 1.080116042597702e-06, "loss": 0.2427, "step": 12613 }, { "epoch": 0.79, "grad_norm": 1.5894927783966755, "learning_rate": 1.0794838637522898e-06, "loss": 0.2488, "step": 12614 }, { "epoch": 0.79, "grad_norm": 2.6714157842975013, "learning_rate": 1.0788518475760545e-06, "loss": 0.2562, "step": 12615 }, { "epoch": 0.79, "grad_norm": 2.059916192120151, "learning_rate": 1.0782199940952226e-06, "loss": 0.2521, "step": 12616 }, { "epoch": 0.79, "grad_norm": 2.582203056058463, "learning_rate": 1.0775883033360085e-06, "loss": 0.2539, "step": 12617 }, { "epoch": 0.79, "grad_norm": 2.281674729655202, "learning_rate": 1.0769567753246214e-06, "loss": 0.2754, "step": 12618 }, { "epoch": 0.79, "grad_norm": 5.247061740126943, "learning_rate": 1.076325410087266e-06, "loss": 0.245, "step": 12619 }, { "epoch": 0.79, "grad_norm": 2.107378748262202, "learning_rate": 1.0756942076501397e-06, "loss": 0.2466, "step": 12620 }, { "epoch": 0.79, "grad_norm": 2.070681722068169, "learning_rate": 1.0750631680394314e-06, "loss": 0.2543, "step": 12621 }, { "epoch": 0.79, "grad_norm": 2.894514451643714, "learning_rate": 1.0744322912813231e-06, "loss": 0.2681, "step": 12622 }, { "epoch": 0.79, "grad_norm": 7.6935646765427474, "learning_rate": 1.0738015774019911e-06, "loss": 0.2756, "step": 12623 }, { "epoch": 0.79, "grad_norm": 1.4813043818811578, "learning_rate": 1.0731710264276062e-06, "loss": 0.2463, "step": 12624 }, { "epoch": 0.79, "grad_norm": 2.0690950187614274, "learning_rate": 1.072540638384329e-06, "loss": 0.2559, "step": 12625 }, { "epoch": 0.79, "grad_norm": 2.1003674811340676, "learning_rate": 1.0719104132983176e-06, "loss": 0.273, "step": 12626 }, { "epoch": 0.79, "grad_norm": 1.999234616830355, "learning_rate": 1.0712803511957199e-06, "loss": 0.2639, "step": 12627 }, { "epoch": 0.79, "grad_norm": 2.0810651801883537, "learning_rate": 1.0706504521026788e-06, "loss": 0.2828, "step": 12628 }, { "epoch": 0.79, "grad_norm": 2.252220017339744, "learning_rate": 1.0700207160453308e-06, "loss": 0.2584, "step": 12629 }, { "epoch": 0.79, "grad_norm": 2.5991030471073953, "learning_rate": 1.0693911430498032e-06, "loss": 0.243, "step": 12630 }, { "epoch": 0.79, "grad_norm": 5.270072849526293, "learning_rate": 1.0687617331422173e-06, "loss": 0.2957, "step": 12631 }, { "epoch": 0.79, "grad_norm": 2.0615147930890165, "learning_rate": 1.0681324863486907e-06, "loss": 0.2353, "step": 12632 }, { "epoch": 0.79, "grad_norm": 1.942022182797392, "learning_rate": 1.0675034026953302e-06, "loss": 0.2611, "step": 12633 }, { "epoch": 0.79, "grad_norm": 1.7247293771471623, "learning_rate": 1.06687448220824e-06, "loss": 0.2438, "step": 12634 }, { "epoch": 0.79, "grad_norm": 27.532396668252304, "learning_rate": 1.0662457249135116e-06, "loss": 0.2613, "step": 12635 }, { "epoch": 0.79, "grad_norm": 1.9627645819310615, "learning_rate": 1.0656171308372371e-06, "loss": 0.2532, "step": 12636 }, { "epoch": 0.79, "grad_norm": 2.860449922467492, "learning_rate": 1.0649887000054954e-06, "loss": 0.2438, "step": 12637 }, { "epoch": 0.79, "grad_norm": 4.428910228790399, "learning_rate": 1.0643604324443623e-06, "loss": 0.2636, "step": 12638 }, { "epoch": 0.79, "grad_norm": 1.5791223517173654, "learning_rate": 1.0637323281799045e-06, "loss": 0.2629, "step": 12639 }, { "epoch": 0.79, "grad_norm": 2.5656433746105467, "learning_rate": 1.063104387238183e-06, "loss": 0.2398, "step": 12640 }, { "epoch": 0.79, "grad_norm": 4.084940454890411, "learning_rate": 1.0624766096452555e-06, "loss": 0.243, "step": 12641 }, { "epoch": 0.8, "grad_norm": 1.509478982840512, "learning_rate": 1.0618489954271667e-06, "loss": 0.244, "step": 12642 }, { "epoch": 0.8, "grad_norm": 2.604940557742458, "learning_rate": 1.0612215446099566e-06, "loss": 0.2649, "step": 12643 }, { "epoch": 0.8, "grad_norm": 3.152044277074773, "learning_rate": 1.0605942572196626e-06, "loss": 0.2692, "step": 12644 }, { "epoch": 0.8, "grad_norm": 3.2724483867519862, "learning_rate": 1.0599671332823096e-06, "loss": 0.2498, "step": 12645 }, { "epoch": 0.8, "grad_norm": 1.3261794034612644, "learning_rate": 1.0593401728239183e-06, "loss": 0.2654, "step": 12646 }, { "epoch": 0.8, "grad_norm": 1.697217798764649, "learning_rate": 1.0587133758705015e-06, "loss": 0.257, "step": 12647 }, { "epoch": 0.8, "grad_norm": 0.556527071138105, "learning_rate": 1.0580867424480674e-06, "loss": 0.4191, "step": 12648 }, { "epoch": 0.8, "grad_norm": 1.7960712602951898, "learning_rate": 1.0574602725826171e-06, "loss": 0.2576, "step": 12649 }, { "epoch": 0.8, "grad_norm": 1.3832907326687363, "learning_rate": 1.0568339663001431e-06, "loss": 0.2487, "step": 12650 }, { "epoch": 0.8, "grad_norm": 1.4440824776907264, "learning_rate": 1.0562078236266304e-06, "loss": 0.2546, "step": 12651 }, { "epoch": 0.8, "grad_norm": 2.7054232843073094, "learning_rate": 1.0555818445880612e-06, "loss": 0.2566, "step": 12652 }, { "epoch": 0.8, "grad_norm": 2.1606381253869293, "learning_rate": 1.0549560292104071e-06, "loss": 0.2563, "step": 12653 }, { "epoch": 0.8, "grad_norm": 1.4857746976529542, "learning_rate": 1.054330377519635e-06, "loss": 0.2505, "step": 12654 }, { "epoch": 0.8, "grad_norm": 1.5759929981336405, "learning_rate": 1.0537048895417024e-06, "loss": 0.2412, "step": 12655 }, { "epoch": 0.8, "grad_norm": 1.4120537014752426, "learning_rate": 1.0530795653025634e-06, "loss": 0.2384, "step": 12656 }, { "epoch": 0.8, "grad_norm": 2.0359148928023334, "learning_rate": 1.052454404828165e-06, "loss": 0.267, "step": 12657 }, { "epoch": 0.8, "grad_norm": 1.715593916937375, "learning_rate": 1.0518294081444447e-06, "loss": 0.2517, "step": 12658 }, { "epoch": 0.8, "grad_norm": 2.7637411676576837, "learning_rate": 1.0512045752773336e-06, "loss": 0.2485, "step": 12659 }, { "epoch": 0.8, "grad_norm": 1.9585179326587279, "learning_rate": 1.0505799062527605e-06, "loss": 0.258, "step": 12660 }, { "epoch": 0.8, "grad_norm": 1.7517473444612268, "learning_rate": 1.0499554010966418e-06, "loss": 0.2686, "step": 12661 }, { "epoch": 0.8, "grad_norm": 2.1278934042656314, "learning_rate": 1.0493310598348894e-06, "loss": 0.2586, "step": 12662 }, { "epoch": 0.8, "grad_norm": 2.4294070183837997, "learning_rate": 1.048706882493407e-06, "loss": 0.2745, "step": 12663 }, { "epoch": 0.8, "grad_norm": 1.563678841463182, "learning_rate": 1.0480828690980949e-06, "loss": 0.2446, "step": 12664 }, { "epoch": 0.8, "grad_norm": 5.356532435630444, "learning_rate": 1.0474590196748447e-06, "loss": 0.2536, "step": 12665 }, { "epoch": 0.8, "grad_norm": 2.3055337489752814, "learning_rate": 1.0468353342495407e-06, "loss": 0.2638, "step": 12666 }, { "epoch": 0.8, "grad_norm": 2.271229517955468, "learning_rate": 1.0462118128480586e-06, "loss": 0.2704, "step": 12667 }, { "epoch": 0.8, "grad_norm": 4.103510597477528, "learning_rate": 1.0455884554962725e-06, "loss": 0.2555, "step": 12668 }, { "epoch": 0.8, "grad_norm": 1.7146463056873202, "learning_rate": 1.0449652622200458e-06, "loss": 0.2393, "step": 12669 }, { "epoch": 0.8, "grad_norm": 2.4774842234541814, "learning_rate": 1.044342233045233e-06, "loss": 0.2438, "step": 12670 }, { "epoch": 0.8, "grad_norm": 3.0275772087169353, "learning_rate": 1.0437193679976887e-06, "loss": 0.2474, "step": 12671 }, { "epoch": 0.8, "grad_norm": 2.1172927523323017, "learning_rate": 1.0430966671032538e-06, "loss": 0.2519, "step": 12672 }, { "epoch": 0.8, "grad_norm": 2.3934315297416164, "learning_rate": 1.0424741303877678e-06, "loss": 0.2507, "step": 12673 }, { "epoch": 0.8, "grad_norm": 1.9443693351825124, "learning_rate": 1.0418517578770594e-06, "loss": 0.2457, "step": 12674 }, { "epoch": 0.8, "grad_norm": 1.69091874738381, "learning_rate": 1.041229549596951e-06, "loss": 0.2632, "step": 12675 }, { "epoch": 0.8, "grad_norm": 2.526762667542659, "learning_rate": 1.0406075055732612e-06, "loss": 0.239, "step": 12676 }, { "epoch": 0.8, "grad_norm": 1.4981714523450758, "learning_rate": 1.0399856258317987e-06, "loss": 0.2469, "step": 12677 }, { "epoch": 0.8, "grad_norm": 1.5519275324277573, "learning_rate": 1.039363910398365e-06, "loss": 0.2467, "step": 12678 }, { "epoch": 0.8, "grad_norm": 1.6470867041201902, "learning_rate": 1.0387423592987584e-06, "loss": 0.2581, "step": 12679 }, { "epoch": 0.8, "grad_norm": 1.9412357340760982, "learning_rate": 1.0381209725587666e-06, "loss": 0.2582, "step": 12680 }, { "epoch": 0.8, "grad_norm": 1.693824949661888, "learning_rate": 1.0374997502041739e-06, "loss": 0.2697, "step": 12681 }, { "epoch": 0.8, "grad_norm": 3.7987068313073133, "learning_rate": 1.0368786922607548e-06, "loss": 0.2595, "step": 12682 }, { "epoch": 0.8, "grad_norm": 2.366354239649986, "learning_rate": 1.0362577987542766e-06, "loss": 0.2667, "step": 12683 }, { "epoch": 0.8, "grad_norm": 2.726829433353076, "learning_rate": 1.035637069710504e-06, "loss": 0.2383, "step": 12684 }, { "epoch": 0.8, "grad_norm": 1.799693095943145, "learning_rate": 1.0350165051551897e-06, "loss": 0.2378, "step": 12685 }, { "epoch": 0.8, "grad_norm": 2.207478286308404, "learning_rate": 1.0343961051140845e-06, "loss": 0.2402, "step": 12686 }, { "epoch": 0.8, "grad_norm": 2.626782023532939, "learning_rate": 1.0337758696129285e-06, "loss": 0.2538, "step": 12687 }, { "epoch": 0.8, "grad_norm": 2.924991570707471, "learning_rate": 1.0331557986774553e-06, "loss": 0.2448, "step": 12688 }, { "epoch": 0.8, "grad_norm": 2.2163584607766262, "learning_rate": 1.0325358923333956e-06, "loss": 0.2855, "step": 12689 }, { "epoch": 0.8, "grad_norm": 2.505883828485406, "learning_rate": 1.031916150606468e-06, "loss": 0.2641, "step": 12690 }, { "epoch": 0.8, "grad_norm": 1.9471321499543168, "learning_rate": 1.0312965735223868e-06, "loss": 0.2357, "step": 12691 }, { "epoch": 0.8, "grad_norm": 2.48507471862247, "learning_rate": 1.0306771611068595e-06, "loss": 0.2602, "step": 12692 }, { "epoch": 0.8, "grad_norm": 0.6326021287696546, "learning_rate": 1.030057913385588e-06, "loss": 0.4777, "step": 12693 }, { "epoch": 0.8, "grad_norm": 4.45425023918084, "learning_rate": 1.0294388303842661e-06, "loss": 0.2637, "step": 12694 }, { "epoch": 0.8, "grad_norm": 2.2861385267994963, "learning_rate": 1.0288199121285775e-06, "loss": 0.2343, "step": 12695 }, { "epoch": 0.8, "grad_norm": 1.7641721723017243, "learning_rate": 1.0282011586442064e-06, "loss": 0.2537, "step": 12696 }, { "epoch": 0.8, "grad_norm": 3.974476382647005, "learning_rate": 1.0275825699568231e-06, "loss": 0.2493, "step": 12697 }, { "epoch": 0.8, "grad_norm": 2.123960814989126, "learning_rate": 1.0269641460920954e-06, "loss": 0.2348, "step": 12698 }, { "epoch": 0.8, "grad_norm": 2.3583238287554678, "learning_rate": 1.0263458870756808e-06, "loss": 0.2453, "step": 12699 }, { "epoch": 0.8, "grad_norm": 2.018289054983028, "learning_rate": 1.0257277929332332e-06, "loss": 0.2637, "step": 12700 }, { "epoch": 0.8, "grad_norm": 2.665058189261105, "learning_rate": 1.0251098636904e-06, "loss": 0.2504, "step": 12701 }, { "epoch": 0.8, "grad_norm": 2.91998136874884, "learning_rate": 1.0244920993728185e-06, "loss": 0.273, "step": 12702 }, { "epoch": 0.8, "grad_norm": 2.3023157744331306, "learning_rate": 1.0238745000061201e-06, "loss": 0.2412, "step": 12703 }, { "epoch": 0.8, "grad_norm": 3.7980366962356116, "learning_rate": 1.0232570656159325e-06, "loss": 0.2548, "step": 12704 }, { "epoch": 0.8, "grad_norm": 1.4788772562573218, "learning_rate": 1.0226397962278723e-06, "loss": 0.26, "step": 12705 }, { "epoch": 0.8, "grad_norm": 1.5763378591316777, "learning_rate": 1.0220226918675519e-06, "loss": 0.2523, "step": 12706 }, { "epoch": 0.8, "grad_norm": 2.201895235173311, "learning_rate": 1.0214057525605742e-06, "loss": 0.2688, "step": 12707 }, { "epoch": 0.8, "grad_norm": 1.531095714465481, "learning_rate": 1.0207889783325386e-06, "loss": 0.2441, "step": 12708 }, { "epoch": 0.8, "grad_norm": 2.7185099593682156, "learning_rate": 1.0201723692090376e-06, "loss": 0.2475, "step": 12709 }, { "epoch": 0.8, "grad_norm": 5.803324053474017, "learning_rate": 1.019555925215654e-06, "loss": 0.2596, "step": 12710 }, { "epoch": 0.8, "grad_norm": 1.6728505484323652, "learning_rate": 1.0189396463779632e-06, "loss": 0.2465, "step": 12711 }, { "epoch": 0.8, "grad_norm": 0.611571019272076, "learning_rate": 1.0183235327215396e-06, "loss": 0.4697, "step": 12712 }, { "epoch": 0.8, "grad_norm": 1.5883228438460357, "learning_rate": 1.0177075842719448e-06, "loss": 0.2656, "step": 12713 }, { "epoch": 0.8, "grad_norm": 0.6129906049332171, "learning_rate": 1.0170918010547355e-06, "loss": 0.4433, "step": 12714 }, { "epoch": 0.8, "grad_norm": 2.389832644539706, "learning_rate": 1.0164761830954606e-06, "loss": 0.2487, "step": 12715 }, { "epoch": 0.8, "grad_norm": 2.426246001497093, "learning_rate": 1.0158607304196643e-06, "loss": 0.2529, "step": 12716 }, { "epoch": 0.8, "grad_norm": 2.101607057639297, "learning_rate": 1.0152454430528847e-06, "loss": 0.2349, "step": 12717 }, { "epoch": 0.8, "grad_norm": 1.9409808877926633, "learning_rate": 1.0146303210206487e-06, "loss": 0.2657, "step": 12718 }, { "epoch": 0.8, "grad_norm": 2.479657028448129, "learning_rate": 1.0140153643484784e-06, "loss": 0.2712, "step": 12719 }, { "epoch": 0.8, "grad_norm": 1.708947078782897, "learning_rate": 1.0134005730618922e-06, "loss": 0.2384, "step": 12720 }, { "epoch": 0.8, "grad_norm": 2.027054384171033, "learning_rate": 1.012785947186397e-06, "loss": 0.2579, "step": 12721 }, { "epoch": 0.8, "grad_norm": 1.8035305780545818, "learning_rate": 1.0121714867474936e-06, "loss": 0.2394, "step": 12722 }, { "epoch": 0.8, "grad_norm": 2.134175423304704, "learning_rate": 1.0115571917706795e-06, "loss": 0.2453, "step": 12723 }, { "epoch": 0.8, "grad_norm": 3.70589306166117, "learning_rate": 1.010943062281441e-06, "loss": 0.2683, "step": 12724 }, { "epoch": 0.8, "grad_norm": 0.6795361124172253, "learning_rate": 1.0103290983052611e-06, "loss": 0.4873, "step": 12725 }, { "epoch": 0.8, "grad_norm": 3.470026123728847, "learning_rate": 1.0097152998676135e-06, "loss": 0.2792, "step": 12726 }, { "epoch": 0.8, "grad_norm": 3.051263251291704, "learning_rate": 1.009101666993964e-06, "loss": 0.2619, "step": 12727 }, { "epoch": 0.8, "grad_norm": 3.9010949523591494, "learning_rate": 1.0084881997097768e-06, "loss": 0.247, "step": 12728 }, { "epoch": 0.8, "grad_norm": 2.496350425292508, "learning_rate": 1.0078748980405033e-06, "loss": 0.261, "step": 12729 }, { "epoch": 0.8, "grad_norm": 1.5563006687611018, "learning_rate": 1.0072617620115904e-06, "loss": 0.2591, "step": 12730 }, { "epoch": 0.8, "grad_norm": 1.3271151319139887, "learning_rate": 1.0066487916484803e-06, "loss": 0.2479, "step": 12731 }, { "epoch": 0.8, "grad_norm": 2.9768100611518893, "learning_rate": 1.0060359869766028e-06, "loss": 0.2716, "step": 12732 }, { "epoch": 0.8, "grad_norm": 2.7156932569004555, "learning_rate": 1.0054233480213881e-06, "loss": 0.2751, "step": 12733 }, { "epoch": 0.8, "grad_norm": 1.698599394553698, "learning_rate": 1.0048108748082536e-06, "loss": 0.2638, "step": 12734 }, { "epoch": 0.8, "grad_norm": 2.1124446176601013, "learning_rate": 1.0041985673626115e-06, "loss": 0.2565, "step": 12735 }, { "epoch": 0.8, "grad_norm": 2.215011893965315, "learning_rate": 1.0035864257098687e-06, "loss": 0.2579, "step": 12736 }, { "epoch": 0.8, "grad_norm": 3.0804420682947686, "learning_rate": 1.0029744498754225e-06, "loss": 0.2562, "step": 12737 }, { "epoch": 0.8, "grad_norm": 1.6035858514247574, "learning_rate": 1.002362639884667e-06, "loss": 0.2662, "step": 12738 }, { "epoch": 0.8, "grad_norm": 2.602340476526753, "learning_rate": 1.0017509957629868e-06, "loss": 0.2575, "step": 12739 }, { "epoch": 0.8, "grad_norm": 3.558152836341865, "learning_rate": 1.0011395175357574e-06, "loss": 0.2493, "step": 12740 }, { "epoch": 0.8, "grad_norm": 6.4520598093403265, "learning_rate": 1.0005282052283539e-06, "loss": 0.2486, "step": 12741 }, { "epoch": 0.8, "grad_norm": 1.725741189843362, "learning_rate": 9.999170588661388e-07, "loss": 0.2716, "step": 12742 }, { "epoch": 0.8, "grad_norm": 4.360730072590659, "learning_rate": 9.99306078474469e-07, "loss": 0.2407, "step": 12743 }, { "epoch": 0.8, "grad_norm": 1.755519875333957, "learning_rate": 9.986952640786972e-07, "loss": 0.2392, "step": 12744 }, { "epoch": 0.8, "grad_norm": 4.155347866804942, "learning_rate": 9.980846157041645e-07, "loss": 0.2585, "step": 12745 }, { "epoch": 0.8, "grad_norm": 1.6009202595564043, "learning_rate": 9.974741333762106e-07, "loss": 0.2654, "step": 12746 }, { "epoch": 0.8, "grad_norm": 2.64625703476425, "learning_rate": 9.968638171201644e-07, "loss": 0.2722, "step": 12747 }, { "epoch": 0.8, "grad_norm": 3.4761751049718286, "learning_rate": 9.96253666961347e-07, "loss": 0.2639, "step": 12748 }, { "epoch": 0.8, "grad_norm": 2.7387890736189626, "learning_rate": 9.956436829250782e-07, "loss": 0.2654, "step": 12749 }, { "epoch": 0.8, "grad_norm": 1.40965657231041, "learning_rate": 9.950338650366659e-07, "loss": 0.2449, "step": 12750 }, { "epoch": 0.8, "grad_norm": 1.7228430507127512, "learning_rate": 9.944242133214098e-07, "loss": 0.2585, "step": 12751 }, { "epoch": 0.8, "grad_norm": 2.0072685071796164, "learning_rate": 9.938147278046083e-07, "loss": 0.2517, "step": 12752 }, { "epoch": 0.8, "grad_norm": 3.765746681085658, "learning_rate": 9.932054085115512e-07, "loss": 0.2647, "step": 12753 }, { "epoch": 0.8, "grad_norm": 3.536878930923567, "learning_rate": 9.925962554675185e-07, "loss": 0.2676, "step": 12754 }, { "epoch": 0.8, "grad_norm": 1.6169529515121037, "learning_rate": 9.919872686977849e-07, "loss": 0.2656, "step": 12755 }, { "epoch": 0.8, "grad_norm": 1.7243509227023388, "learning_rate": 9.913784482276167e-07, "loss": 0.2635, "step": 12756 }, { "epoch": 0.8, "grad_norm": 1.3668179612717561, "learning_rate": 9.90769794082279e-07, "loss": 0.2522, "step": 12757 }, { "epoch": 0.8, "grad_norm": 1.9917937310395633, "learning_rate": 9.901613062870236e-07, "loss": 0.2597, "step": 12758 }, { "epoch": 0.8, "grad_norm": 1.6579782239575436, "learning_rate": 9.89552984867096e-07, "loss": 0.2537, "step": 12759 }, { "epoch": 0.8, "grad_norm": 2.3337970745103087, "learning_rate": 9.889448298477388e-07, "loss": 0.2597, "step": 12760 }, { "epoch": 0.8, "grad_norm": 0.6246909970505483, "learning_rate": 9.88336841254186e-07, "loss": 0.488, "step": 12761 }, { "epoch": 0.8, "grad_norm": 3.5945754365392033, "learning_rate": 9.87729019111664e-07, "loss": 0.2525, "step": 12762 }, { "epoch": 0.8, "grad_norm": 26.287682843088376, "learning_rate": 9.871213634453908e-07, "loss": 0.2531, "step": 12763 }, { "epoch": 0.8, "grad_norm": 1.913740426617599, "learning_rate": 9.865138742805792e-07, "loss": 0.2536, "step": 12764 }, { "epoch": 0.8, "grad_norm": 2.300508795335946, "learning_rate": 9.859065516424365e-07, "loss": 0.2608, "step": 12765 }, { "epoch": 0.8, "grad_norm": 1.8217823616327986, "learning_rate": 9.852993955561607e-07, "loss": 0.272, "step": 12766 }, { "epoch": 0.8, "grad_norm": 2.057594120481516, "learning_rate": 9.846924060469433e-07, "loss": 0.2533, "step": 12767 }, { "epoch": 0.8, "grad_norm": 13.860080822210733, "learning_rate": 9.840855831399694e-07, "loss": 0.2516, "step": 12768 }, { "epoch": 0.8, "grad_norm": 1.7091044263044803, "learning_rate": 9.834789268604188e-07, "loss": 0.2561, "step": 12769 }, { "epoch": 0.8, "grad_norm": 4.678264760509789, "learning_rate": 9.828724372334624e-07, "loss": 0.2418, "step": 12770 }, { "epoch": 0.8, "grad_norm": 2.0376825477549967, "learning_rate": 9.822661142842621e-07, "loss": 0.2461, "step": 12771 }, { "epoch": 0.8, "grad_norm": 2.6357489493609827, "learning_rate": 9.816599580379783e-07, "loss": 0.2875, "step": 12772 }, { "epoch": 0.8, "grad_norm": 1.4977430951989361, "learning_rate": 9.8105396851976e-07, "loss": 0.2596, "step": 12773 }, { "epoch": 0.8, "grad_norm": 4.921853057194947, "learning_rate": 9.8044814575475e-07, "loss": 0.2389, "step": 12774 }, { "epoch": 0.8, "grad_norm": 1.7556084874755709, "learning_rate": 9.798424897680876e-07, "loss": 0.2353, "step": 12775 }, { "epoch": 0.8, "grad_norm": 2.7325476307067564, "learning_rate": 9.79237000584899e-07, "loss": 0.2584, "step": 12776 }, { "epoch": 0.8, "grad_norm": 1.9852158607627954, "learning_rate": 9.786316782303108e-07, "loss": 0.2637, "step": 12777 }, { "epoch": 0.8, "grad_norm": 5.209972925321309, "learning_rate": 9.78026522729436e-07, "loss": 0.2469, "step": 12778 }, { "epoch": 0.8, "grad_norm": 1.8084338101734356, "learning_rate": 9.774215341073844e-07, "loss": 0.2574, "step": 12779 }, { "epoch": 0.8, "grad_norm": 1.7451656808315519, "learning_rate": 9.76816712389259e-07, "loss": 0.2579, "step": 12780 }, { "epoch": 0.8, "grad_norm": 1.6661527500741184, "learning_rate": 9.762120576001543e-07, "loss": 0.2327, "step": 12781 }, { "epoch": 0.8, "grad_norm": 1.656309263103782, "learning_rate": 9.756075697651573e-07, "loss": 0.2496, "step": 12782 }, { "epoch": 0.8, "grad_norm": 1.8303826124579643, "learning_rate": 9.750032489093514e-07, "loss": 0.2729, "step": 12783 }, { "epoch": 0.8, "grad_norm": 1.530801960260885, "learning_rate": 9.743990950578087e-07, "loss": 0.2436, "step": 12784 }, { "epoch": 0.8, "grad_norm": 1.7175450632254168, "learning_rate": 9.73795108235599e-07, "loss": 0.2557, "step": 12785 }, { "epoch": 0.8, "grad_norm": 1.8929034367106248, "learning_rate": 9.731912884677814e-07, "loss": 0.2563, "step": 12786 }, { "epoch": 0.8, "grad_norm": 0.5587418258075241, "learning_rate": 9.72587635779409e-07, "loss": 0.4351, "step": 12787 }, { "epoch": 0.8, "grad_norm": 3.842594908282135, "learning_rate": 9.719841501955296e-07, "loss": 0.2666, "step": 12788 }, { "epoch": 0.8, "grad_norm": 3.128309565322895, "learning_rate": 9.713808317411815e-07, "loss": 0.2747, "step": 12789 }, { "epoch": 0.8, "grad_norm": 1.7326631363657472, "learning_rate": 9.707776804414e-07, "loss": 0.2498, "step": 12790 }, { "epoch": 0.8, "grad_norm": 3.05237976920696, "learning_rate": 9.701746963212084e-07, "loss": 0.2568, "step": 12791 }, { "epoch": 0.8, "grad_norm": 5.393489461025843, "learning_rate": 9.695718794056258e-07, "loss": 0.2575, "step": 12792 }, { "epoch": 0.8, "grad_norm": 4.3888296321583455, "learning_rate": 9.689692297196657e-07, "loss": 0.2639, "step": 12793 }, { "epoch": 0.8, "grad_norm": 1.5725615619980446, "learning_rate": 9.683667472883324e-07, "loss": 0.2482, "step": 12794 }, { "epoch": 0.8, "grad_norm": 2.647815195484356, "learning_rate": 9.677644321366226e-07, "loss": 0.2362, "step": 12795 }, { "epoch": 0.8, "grad_norm": 5.174720726874843, "learning_rate": 9.671622842895294e-07, "loss": 0.2453, "step": 12796 }, { "epoch": 0.8, "grad_norm": 1.4792700718431802, "learning_rate": 9.66560303772035e-07, "loss": 0.2445, "step": 12797 }, { "epoch": 0.8, "grad_norm": 1.5752754867579595, "learning_rate": 9.65958490609119e-07, "loss": 0.2413, "step": 12798 }, { "epoch": 0.8, "grad_norm": 6.7160303960197085, "learning_rate": 9.653568448257504e-07, "loss": 0.2587, "step": 12799 }, { "epoch": 0.8, "grad_norm": 2.0086538354064927, "learning_rate": 9.647553664468918e-07, "loss": 0.2394, "step": 12800 }, { "epoch": 0.81, "grad_norm": 1.9664307217398085, "learning_rate": 9.641540554975015e-07, "loss": 0.2393, "step": 12801 }, { "epoch": 0.81, "grad_norm": 1.615912294402693, "learning_rate": 9.635529120025282e-07, "loss": 0.2488, "step": 12802 }, { "epoch": 0.81, "grad_norm": 3.917978411956469, "learning_rate": 9.629519359869121e-07, "loss": 0.2617, "step": 12803 }, { "epoch": 0.81, "grad_norm": 3.1349444477823143, "learning_rate": 9.623511274755914e-07, "loss": 0.2725, "step": 12804 }, { "epoch": 0.81, "grad_norm": 2.2862184235505376, "learning_rate": 9.617504864934956e-07, "loss": 0.2523, "step": 12805 }, { "epoch": 0.81, "grad_norm": 1.6156777265235944, "learning_rate": 9.611500130655443e-07, "loss": 0.2493, "step": 12806 }, { "epoch": 0.81, "grad_norm": 2.8710067540421056, "learning_rate": 9.60549707216653e-07, "loss": 0.2344, "step": 12807 }, { "epoch": 0.81, "grad_norm": 2.0947206449001587, "learning_rate": 9.599495689717276e-07, "loss": 0.248, "step": 12808 }, { "epoch": 0.81, "grad_norm": 2.667282840868198, "learning_rate": 9.593495983556723e-07, "loss": 0.2886, "step": 12809 }, { "epoch": 0.81, "grad_norm": 1.5302462702039108, "learning_rate": 9.58749795393379e-07, "loss": 0.2506, "step": 12810 }, { "epoch": 0.81, "grad_norm": 2.076053237357009, "learning_rate": 9.581501601097332e-07, "loss": 0.2556, "step": 12811 }, { "epoch": 0.81, "grad_norm": 2.006810597267588, "learning_rate": 9.57550692529617e-07, "loss": 0.2599, "step": 12812 }, { "epoch": 0.81, "grad_norm": 2.172004393315114, "learning_rate": 9.569513926779033e-07, "loss": 0.2536, "step": 12813 }, { "epoch": 0.81, "grad_norm": 12.770538705483302, "learning_rate": 9.563522605794579e-07, "loss": 0.2527, "step": 12814 }, { "epoch": 0.81, "grad_norm": 11.051092054586464, "learning_rate": 9.557532962591398e-07, "loss": 0.2669, "step": 12815 }, { "epoch": 0.81, "grad_norm": 2.1166252046240914, "learning_rate": 9.551544997417995e-07, "loss": 0.2507, "step": 12816 }, { "epoch": 0.81, "grad_norm": 3.1193775209546764, "learning_rate": 9.545558710522844e-07, "loss": 0.2492, "step": 12817 }, { "epoch": 0.81, "grad_norm": 1.575697964565946, "learning_rate": 9.53957410215432e-07, "loss": 0.2512, "step": 12818 }, { "epoch": 0.81, "grad_norm": 1.6749766983147105, "learning_rate": 9.533591172560714e-07, "loss": 0.2675, "step": 12819 }, { "epoch": 0.81, "grad_norm": 3.2145755436199703, "learning_rate": 9.527609921990294e-07, "loss": 0.2808, "step": 12820 }, { "epoch": 0.81, "grad_norm": 2.2916494501173883, "learning_rate": 9.521630350691235e-07, "loss": 0.2596, "step": 12821 }, { "epoch": 0.81, "grad_norm": 0.6229630033286424, "learning_rate": 9.515652458911629e-07, "loss": 0.4291, "step": 12822 }, { "epoch": 0.81, "grad_norm": 2.3131772122071124, "learning_rate": 9.509676246899513e-07, "loss": 0.2415, "step": 12823 }, { "epoch": 0.81, "grad_norm": 1.8486509994036227, "learning_rate": 9.503701714902836e-07, "loss": 0.2713, "step": 12824 }, { "epoch": 0.81, "grad_norm": 1.7387775178104907, "learning_rate": 9.497728863169514e-07, "loss": 0.2508, "step": 12825 }, { "epoch": 0.81, "grad_norm": 1.8702734825778233, "learning_rate": 9.491757691947367e-07, "loss": 0.2491, "step": 12826 }, { "epoch": 0.81, "grad_norm": 1.7961841237914369, "learning_rate": 9.485788201484125e-07, "loss": 0.2639, "step": 12827 }, { "epoch": 0.81, "grad_norm": 0.576440342154987, "learning_rate": 9.479820392027494e-07, "loss": 0.4522, "step": 12828 }, { "epoch": 0.81, "grad_norm": 2.179496433329002, "learning_rate": 9.473854263825105e-07, "loss": 0.2623, "step": 12829 }, { "epoch": 0.81, "grad_norm": 1.3879735818632433, "learning_rate": 9.467889817124481e-07, "loss": 0.2404, "step": 12830 }, { "epoch": 0.81, "grad_norm": 3.22086572123504, "learning_rate": 9.461927052173097e-07, "loss": 0.2447, "step": 12831 }, { "epoch": 0.81, "grad_norm": 2.394871369895212, "learning_rate": 9.455965969218356e-07, "loss": 0.2446, "step": 12832 }, { "epoch": 0.81, "grad_norm": 4.12420794196283, "learning_rate": 9.450006568507614e-07, "loss": 0.255, "step": 12833 }, { "epoch": 0.81, "grad_norm": 3.0344644144945154, "learning_rate": 9.444048850288112e-07, "loss": 0.2411, "step": 12834 }, { "epoch": 0.81, "grad_norm": 3.399867326488132, "learning_rate": 9.43809281480707e-07, "loss": 0.2623, "step": 12835 }, { "epoch": 0.81, "grad_norm": 2.8563141513055275, "learning_rate": 9.432138462311591e-07, "loss": 0.273, "step": 12836 }, { "epoch": 0.81, "grad_norm": 2.431892728930356, "learning_rate": 9.426185793048754e-07, "loss": 0.2535, "step": 12837 }, { "epoch": 0.81, "grad_norm": 2.930204766985941, "learning_rate": 9.420234807265538e-07, "loss": 0.2507, "step": 12838 }, { "epoch": 0.81, "grad_norm": 2.1713764203900605, "learning_rate": 9.414285505208859e-07, "loss": 0.2457, "step": 12839 }, { "epoch": 0.81, "grad_norm": 1.9472027670717913, "learning_rate": 9.408337887125546e-07, "loss": 0.2546, "step": 12840 }, { "epoch": 0.81, "grad_norm": 1.644738162379368, "learning_rate": 9.402391953262396e-07, "loss": 0.2392, "step": 12841 }, { "epoch": 0.81, "grad_norm": 1.6318367312835584, "learning_rate": 9.396447703866124e-07, "loss": 0.2492, "step": 12842 }, { "epoch": 0.81, "grad_norm": 1.7337807755938024, "learning_rate": 9.390505139183359e-07, "loss": 0.2597, "step": 12843 }, { "epoch": 0.81, "grad_norm": 5.795926326609472, "learning_rate": 9.384564259460655e-07, "loss": 0.2571, "step": 12844 }, { "epoch": 0.81, "grad_norm": 2.3201580163658386, "learning_rate": 9.378625064944529e-07, "loss": 0.2558, "step": 12845 }, { "epoch": 0.81, "grad_norm": 5.997465048882375, "learning_rate": 9.372687555881405e-07, "loss": 0.2662, "step": 12846 }, { "epoch": 0.81, "grad_norm": 3.0019004629313737, "learning_rate": 9.366751732517632e-07, "loss": 0.2517, "step": 12847 }, { "epoch": 0.81, "grad_norm": 4.361205174224972, "learning_rate": 9.360817595099491e-07, "loss": 0.2498, "step": 12848 }, { "epoch": 0.81, "grad_norm": 1.6482585744755953, "learning_rate": 9.354885143873216e-07, "loss": 0.2615, "step": 12849 }, { "epoch": 0.81, "grad_norm": 1.5524535099587276, "learning_rate": 9.348954379084957e-07, "loss": 0.2543, "step": 12850 }, { "epoch": 0.81, "grad_norm": 3.1563532493672573, "learning_rate": 9.343025300980791e-07, "loss": 0.2533, "step": 12851 }, { "epoch": 0.81, "grad_norm": 0.5901668742846251, "learning_rate": 9.337097909806703e-07, "loss": 0.4666, "step": 12852 }, { "epoch": 0.81, "grad_norm": 2.0350044462128816, "learning_rate": 9.331172205808659e-07, "loss": 0.2605, "step": 12853 }, { "epoch": 0.81, "grad_norm": 1.8035053011714821, "learning_rate": 9.325248189232521e-07, "loss": 0.2636, "step": 12854 }, { "epoch": 0.81, "grad_norm": 1.3109480430631197, "learning_rate": 9.319325860324069e-07, "loss": 0.2425, "step": 12855 }, { "epoch": 0.81, "grad_norm": 1.790154375725121, "learning_rate": 9.313405219329041e-07, "loss": 0.2624, "step": 12856 }, { "epoch": 0.81, "grad_norm": 1.9340313613188451, "learning_rate": 9.307486266493109e-07, "loss": 0.2546, "step": 12857 }, { "epoch": 0.81, "grad_norm": 3.552539089425548, "learning_rate": 9.301569002061855e-07, "loss": 0.2468, "step": 12858 }, { "epoch": 0.81, "grad_norm": 2.235652719347328, "learning_rate": 9.295653426280793e-07, "loss": 0.245, "step": 12859 }, { "epoch": 0.81, "grad_norm": 1.5643757271303158, "learning_rate": 9.289739539395354e-07, "loss": 0.2407, "step": 12860 }, { "epoch": 0.81, "grad_norm": 5.573721095433635, "learning_rate": 9.283827341650942e-07, "loss": 0.2391, "step": 12861 }, { "epoch": 0.81, "grad_norm": 1.8385291457680781, "learning_rate": 9.277916833292855e-07, "loss": 0.2621, "step": 12862 }, { "epoch": 0.81, "grad_norm": 1.895979292505263, "learning_rate": 9.272008014566314e-07, "loss": 0.2624, "step": 12863 }, { "epoch": 0.81, "grad_norm": 2.2921231198056478, "learning_rate": 9.266100885716506e-07, "loss": 0.2464, "step": 12864 }, { "epoch": 0.81, "grad_norm": 1.461850035186532, "learning_rate": 9.260195446988535e-07, "loss": 0.2441, "step": 12865 }, { "epoch": 0.81, "grad_norm": 1.6173435668297256, "learning_rate": 9.254291698627416e-07, "loss": 0.249, "step": 12866 }, { "epoch": 0.81, "grad_norm": 2.168350408546128, "learning_rate": 9.248389640878109e-07, "loss": 0.2275, "step": 12867 }, { "epoch": 0.81, "grad_norm": 2.2547508996488923, "learning_rate": 9.242489273985483e-07, "loss": 0.2603, "step": 12868 }, { "epoch": 0.81, "grad_norm": 1.5311450365000812, "learning_rate": 9.236590598194384e-07, "loss": 0.2443, "step": 12869 }, { "epoch": 0.81, "grad_norm": 1.687600126500834, "learning_rate": 9.230693613749547e-07, "loss": 0.2647, "step": 12870 }, { "epoch": 0.81, "grad_norm": 2.3168067255507325, "learning_rate": 9.224798320895634e-07, "loss": 0.24, "step": 12871 }, { "epoch": 0.81, "grad_norm": 2.99266560903549, "learning_rate": 9.218904719877258e-07, "loss": 0.2475, "step": 12872 }, { "epoch": 0.81, "grad_norm": 1.5829464029758733, "learning_rate": 9.213012810938976e-07, "loss": 0.2483, "step": 12873 }, { "epoch": 0.81, "grad_norm": 1.9153073565322267, "learning_rate": 9.207122594325241e-07, "loss": 0.2744, "step": 12874 }, { "epoch": 0.81, "grad_norm": 1.7244034798619865, "learning_rate": 9.201234070280446e-07, "loss": 0.2507, "step": 12875 }, { "epoch": 0.81, "grad_norm": 2.0440624380199686, "learning_rate": 9.195347239048902e-07, "loss": 0.2725, "step": 12876 }, { "epoch": 0.81, "grad_norm": 0.5612430259333051, "learning_rate": 9.189462100874891e-07, "loss": 0.4663, "step": 12877 }, { "epoch": 0.81, "grad_norm": 41.09160100313767, "learning_rate": 9.183578656002584e-07, "loss": 0.2568, "step": 12878 }, { "epoch": 0.81, "grad_norm": 2.1080926671363964, "learning_rate": 9.177696904676086e-07, "loss": 0.279, "step": 12879 }, { "epoch": 0.81, "grad_norm": 1.5491718013748426, "learning_rate": 9.171816847139447e-07, "loss": 0.2621, "step": 12880 }, { "epoch": 0.81, "grad_norm": 1.5374963144072058, "learning_rate": 9.165938483636666e-07, "loss": 0.242, "step": 12881 }, { "epoch": 0.81, "grad_norm": 1.8972797521043407, "learning_rate": 9.160061814411625e-07, "loss": 0.276, "step": 12882 }, { "epoch": 0.81, "grad_norm": 3.3082568587378285, "learning_rate": 9.154186839708157e-07, "loss": 0.2648, "step": 12883 }, { "epoch": 0.81, "grad_norm": 1.782943228581823, "learning_rate": 9.148313559770011e-07, "loss": 0.261, "step": 12884 }, { "epoch": 0.81, "grad_norm": 0.6283344357888913, "learning_rate": 9.142441974840915e-07, "loss": 0.4548, "step": 12885 }, { "epoch": 0.81, "grad_norm": 0.5597703822869617, "learning_rate": 9.136572085164458e-07, "loss": 0.4444, "step": 12886 }, { "epoch": 0.81, "grad_norm": 2.304495916707265, "learning_rate": 9.130703890984222e-07, "loss": 0.2518, "step": 12887 }, { "epoch": 0.81, "grad_norm": 2.488735308124743, "learning_rate": 9.124837392543656e-07, "loss": 0.2576, "step": 12888 }, { "epoch": 0.81, "grad_norm": 1.6975954867493765, "learning_rate": 9.118972590086206e-07, "loss": 0.2461, "step": 12889 }, { "epoch": 0.81, "grad_norm": 1.4664348549370394, "learning_rate": 9.113109483855193e-07, "loss": 0.2495, "step": 12890 }, { "epoch": 0.81, "grad_norm": 2.4709030237841163, "learning_rate": 9.107248074093894e-07, "loss": 0.2503, "step": 12891 }, { "epoch": 0.81, "grad_norm": 3.001523179076291, "learning_rate": 9.101388361045488e-07, "loss": 0.2319, "step": 12892 }, { "epoch": 0.81, "grad_norm": 3.130189121210011, "learning_rate": 9.095530344953141e-07, "loss": 0.2468, "step": 12893 }, { "epoch": 0.81, "grad_norm": 1.3774295368187583, "learning_rate": 9.08967402605988e-07, "loss": 0.2412, "step": 12894 }, { "epoch": 0.81, "grad_norm": 2.3103110629274615, "learning_rate": 9.083819404608724e-07, "loss": 0.2583, "step": 12895 }, { "epoch": 0.81, "grad_norm": 2.244186611790629, "learning_rate": 9.07796648084256e-07, "loss": 0.2593, "step": 12896 }, { "epoch": 0.81, "grad_norm": 0.6191162625720816, "learning_rate": 9.072115255004266e-07, "loss": 0.4698, "step": 12897 }, { "epoch": 0.81, "grad_norm": 1.615867861107461, "learning_rate": 9.066265727336604e-07, "loss": 0.2664, "step": 12898 }, { "epoch": 0.81, "grad_norm": 1.8105279253481417, "learning_rate": 9.060417898082285e-07, "loss": 0.2538, "step": 12899 }, { "epoch": 0.81, "grad_norm": 3.3438830026991466, "learning_rate": 9.054571767483932e-07, "loss": 0.26, "step": 12900 }, { "epoch": 0.81, "grad_norm": 2.057403257654064, "learning_rate": 9.048727335784124e-07, "loss": 0.2596, "step": 12901 }, { "epoch": 0.81, "grad_norm": 2.3587940448726448, "learning_rate": 9.042884603225372e-07, "loss": 0.2396, "step": 12902 }, { "epoch": 0.81, "grad_norm": 1.7891339199649128, "learning_rate": 9.03704357005008e-07, "loss": 0.2504, "step": 12903 }, { "epoch": 0.81, "grad_norm": 1.539726229067604, "learning_rate": 9.031204236500601e-07, "loss": 0.2624, "step": 12904 }, { "epoch": 0.81, "grad_norm": 1.7644042066418655, "learning_rate": 9.025366602819235e-07, "loss": 0.2628, "step": 12905 }, { "epoch": 0.81, "grad_norm": 1.4714732485959336, "learning_rate": 9.019530669248195e-07, "loss": 0.2528, "step": 12906 }, { "epoch": 0.81, "grad_norm": 1.682278135449895, "learning_rate": 9.013696436029607e-07, "loss": 0.2503, "step": 12907 }, { "epoch": 0.81, "grad_norm": 1.6022755323694893, "learning_rate": 9.007863903405551e-07, "loss": 0.2609, "step": 12908 }, { "epoch": 0.81, "grad_norm": 1.7965323724809963, "learning_rate": 9.002033071618027e-07, "loss": 0.267, "step": 12909 }, { "epoch": 0.81, "grad_norm": 1.890080830376393, "learning_rate": 8.996203940908982e-07, "loss": 0.2665, "step": 12910 }, { "epoch": 0.81, "grad_norm": 2.6391509388317074, "learning_rate": 8.990376511520272e-07, "loss": 0.2571, "step": 12911 }, { "epoch": 0.81, "grad_norm": 2.438269915323138, "learning_rate": 8.984550783693663e-07, "loss": 0.2434, "step": 12912 }, { "epoch": 0.81, "grad_norm": 2.484500330904699, "learning_rate": 8.978726757670908e-07, "loss": 0.2498, "step": 12913 }, { "epoch": 0.81, "grad_norm": 0.6036576597089971, "learning_rate": 8.972904433693646e-07, "loss": 0.4462, "step": 12914 }, { "epoch": 0.81, "grad_norm": 1.653183563269335, "learning_rate": 8.967083812003446e-07, "loss": 0.2505, "step": 12915 }, { "epoch": 0.81, "grad_norm": 1.416421650311631, "learning_rate": 8.961264892841798e-07, "loss": 0.2437, "step": 12916 }, { "epoch": 0.81, "grad_norm": 1.8761232962721022, "learning_rate": 8.955447676450191e-07, "loss": 0.2705, "step": 12917 }, { "epoch": 0.81, "grad_norm": 3.757939120259039, "learning_rate": 8.94963216306996e-07, "loss": 0.258, "step": 12918 }, { "epoch": 0.81, "grad_norm": 1.4166521640306378, "learning_rate": 8.943818352942401e-07, "loss": 0.2511, "step": 12919 }, { "epoch": 0.81, "grad_norm": 2.5323323393674855, "learning_rate": 8.938006246308734e-07, "loss": 0.2661, "step": 12920 }, { "epoch": 0.81, "grad_norm": 3.422876001062616, "learning_rate": 8.932195843410135e-07, "loss": 0.2556, "step": 12921 }, { "epoch": 0.81, "grad_norm": 1.6779079539753463, "learning_rate": 8.926387144487675e-07, "loss": 0.2487, "step": 12922 }, { "epoch": 0.81, "grad_norm": 3.8720471401606082, "learning_rate": 8.920580149782354e-07, "loss": 0.2805, "step": 12923 }, { "epoch": 0.81, "grad_norm": 1.9861607351680166, "learning_rate": 8.914774859535131e-07, "loss": 0.269, "step": 12924 }, { "epoch": 0.81, "grad_norm": 2.306090823850083, "learning_rate": 8.908971273986883e-07, "loss": 0.2541, "step": 12925 }, { "epoch": 0.81, "grad_norm": 1.7058781362913975, "learning_rate": 8.903169393378403e-07, "loss": 0.249, "step": 12926 }, { "epoch": 0.81, "grad_norm": 2.717577300853418, "learning_rate": 8.897369217950424e-07, "loss": 0.2565, "step": 12927 }, { "epoch": 0.81, "grad_norm": 1.6589760285420476, "learning_rate": 8.891570747943584e-07, "loss": 0.2591, "step": 12928 }, { "epoch": 0.81, "grad_norm": 2.305319716338477, "learning_rate": 8.885773983598512e-07, "loss": 0.283, "step": 12929 }, { "epoch": 0.81, "grad_norm": 15.79531730809761, "learning_rate": 8.879978925155697e-07, "loss": 0.2434, "step": 12930 }, { "epoch": 0.81, "grad_norm": 2.0361184049130663, "learning_rate": 8.874185572855581e-07, "loss": 0.2496, "step": 12931 }, { "epoch": 0.81, "grad_norm": 2.442694204864611, "learning_rate": 8.868393926938557e-07, "loss": 0.2603, "step": 12932 }, { "epoch": 0.81, "grad_norm": 2.8625723943847077, "learning_rate": 8.862603987644942e-07, "loss": 0.2457, "step": 12933 }, { "epoch": 0.81, "grad_norm": 1.4946677941746433, "learning_rate": 8.856815755214953e-07, "loss": 0.2475, "step": 12934 }, { "epoch": 0.81, "grad_norm": 1.2963165714071727, "learning_rate": 8.851029229888752e-07, "loss": 0.2418, "step": 12935 }, { "epoch": 0.81, "grad_norm": 0.583915943443398, "learning_rate": 8.845244411906429e-07, "loss": 0.4955, "step": 12936 }, { "epoch": 0.81, "grad_norm": 2.3722575964806563, "learning_rate": 8.839461301508028e-07, "loss": 0.2765, "step": 12937 }, { "epoch": 0.81, "grad_norm": 2.2057989099379087, "learning_rate": 8.833679898933472e-07, "loss": 0.2733, "step": 12938 }, { "epoch": 0.81, "grad_norm": 4.690125514832156, "learning_rate": 8.827900204422674e-07, "loss": 0.2689, "step": 12939 }, { "epoch": 0.81, "grad_norm": 1.8825434705305653, "learning_rate": 8.822122218215406e-07, "loss": 0.2667, "step": 12940 }, { "epoch": 0.81, "grad_norm": 1.6403611439779844, "learning_rate": 8.816345940551446e-07, "loss": 0.2476, "step": 12941 }, { "epoch": 0.81, "grad_norm": 1.505775895939719, "learning_rate": 8.81057137167044e-07, "loss": 0.2499, "step": 12942 }, { "epoch": 0.81, "grad_norm": 1.2747582625413525, "learning_rate": 8.80479851181199e-07, "loss": 0.2676, "step": 12943 }, { "epoch": 0.81, "grad_norm": 2.8306581193431004, "learning_rate": 8.799027361215607e-07, "loss": 0.2467, "step": 12944 }, { "epoch": 0.81, "grad_norm": 1.9694901453239277, "learning_rate": 8.793257920120774e-07, "loss": 0.272, "step": 12945 }, { "epoch": 0.81, "grad_norm": 1.8756997540171922, "learning_rate": 8.78749018876685e-07, "loss": 0.2542, "step": 12946 }, { "epoch": 0.81, "grad_norm": 2.621652512605524, "learning_rate": 8.781724167393168e-07, "loss": 0.2575, "step": 12947 }, { "epoch": 0.81, "grad_norm": 2.6949599893835643, "learning_rate": 8.775959856238953e-07, "loss": 0.2553, "step": 12948 }, { "epoch": 0.81, "grad_norm": 3.237138056523293, "learning_rate": 8.770197255543395e-07, "loss": 0.2743, "step": 12949 }, { "epoch": 0.81, "grad_norm": 2.0481638052120377, "learning_rate": 8.764436365545592e-07, "loss": 0.2534, "step": 12950 }, { "epoch": 0.81, "grad_norm": 2.0585499310221427, "learning_rate": 8.75867718648456e-07, "loss": 0.2583, "step": 12951 }, { "epoch": 0.81, "grad_norm": 4.588495449316286, "learning_rate": 8.752919718599256e-07, "loss": 0.2605, "step": 12952 }, { "epoch": 0.81, "grad_norm": 1.775181838047722, "learning_rate": 8.747163962128574e-07, "loss": 0.2455, "step": 12953 }, { "epoch": 0.81, "grad_norm": 2.716596625811185, "learning_rate": 8.741409917311344e-07, "loss": 0.2475, "step": 12954 }, { "epoch": 0.81, "grad_norm": 1.9314409012285993, "learning_rate": 8.735657584386297e-07, "loss": 0.2515, "step": 12955 }, { "epoch": 0.81, "grad_norm": 5.929806241612521, "learning_rate": 8.729906963592105e-07, "loss": 0.2601, "step": 12956 }, { "epoch": 0.81, "grad_norm": 1.971862189547834, "learning_rate": 8.724158055167386e-07, "loss": 0.2582, "step": 12957 }, { "epoch": 0.81, "grad_norm": 1.9891388691416703, "learning_rate": 8.718410859350663e-07, "loss": 0.2402, "step": 12958 }, { "epoch": 0.81, "grad_norm": 4.022258753997461, "learning_rate": 8.712665376380403e-07, "loss": 0.2615, "step": 12959 }, { "epoch": 0.82, "grad_norm": 1.5026717591833973, "learning_rate": 8.706921606494973e-07, "loss": 0.2603, "step": 12960 }, { "epoch": 0.82, "grad_norm": 12.121274332580468, "learning_rate": 8.701179549932709e-07, "loss": 0.2524, "step": 12961 }, { "epoch": 0.82, "grad_norm": 3.721103342967426, "learning_rate": 8.695439206931877e-07, "loss": 0.26, "step": 12962 }, { "epoch": 0.82, "grad_norm": 3.4580206751263725, "learning_rate": 8.689700577730637e-07, "loss": 0.2503, "step": 12963 }, { "epoch": 0.82, "grad_norm": 1.6261407089771402, "learning_rate": 8.68396366256708e-07, "loss": 0.2521, "step": 12964 }, { "epoch": 0.82, "grad_norm": 4.315283498841684, "learning_rate": 8.678228461679272e-07, "loss": 0.287, "step": 12965 }, { "epoch": 0.82, "grad_norm": 1.870100406785817, "learning_rate": 8.672494975305157e-07, "loss": 0.2541, "step": 12966 }, { "epoch": 0.82, "grad_norm": 1.8365201530208666, "learning_rate": 8.666763203682637e-07, "loss": 0.2448, "step": 12967 }, { "epoch": 0.82, "grad_norm": 3.331207549065846, "learning_rate": 8.661033147049496e-07, "loss": 0.2659, "step": 12968 }, { "epoch": 0.82, "grad_norm": 0.6157615990454055, "learning_rate": 8.655304805643549e-07, "loss": 0.4566, "step": 12969 }, { "epoch": 0.82, "grad_norm": 1.8008006782529928, "learning_rate": 8.649578179702434e-07, "loss": 0.2544, "step": 12970 }, { "epoch": 0.82, "grad_norm": 1.532257319195622, "learning_rate": 8.64385326946377e-07, "loss": 0.2559, "step": 12971 }, { "epoch": 0.82, "grad_norm": 3.867931380705508, "learning_rate": 8.638130075165079e-07, "loss": 0.273, "step": 12972 }, { "epoch": 0.82, "grad_norm": 2.1692684286071247, "learning_rate": 8.632408597043851e-07, "loss": 0.2718, "step": 12973 }, { "epoch": 0.82, "grad_norm": 4.268772053840724, "learning_rate": 8.626688835337471e-07, "loss": 0.2619, "step": 12974 }, { "epoch": 0.82, "grad_norm": 2.6911422353850587, "learning_rate": 8.620970790283251e-07, "loss": 0.2482, "step": 12975 }, { "epoch": 0.82, "grad_norm": 2.855496193606545, "learning_rate": 8.615254462118427e-07, "loss": 0.2643, "step": 12976 }, { "epoch": 0.82, "grad_norm": 17.318438943732563, "learning_rate": 8.609539851080234e-07, "loss": 0.2607, "step": 12977 }, { "epoch": 0.82, "grad_norm": 2.7953197647700785, "learning_rate": 8.603826957405742e-07, "loss": 0.2741, "step": 12978 }, { "epoch": 0.82, "grad_norm": 3.143269425400992, "learning_rate": 8.598115781332006e-07, "loss": 0.2431, "step": 12979 }, { "epoch": 0.82, "grad_norm": 3.362342912598155, "learning_rate": 8.592406323095964e-07, "loss": 0.2635, "step": 12980 }, { "epoch": 0.82, "grad_norm": 0.5253350567382885, "learning_rate": 8.58669858293455e-07, "loss": 0.422, "step": 12981 }, { "epoch": 0.82, "grad_norm": 2.248639521149241, "learning_rate": 8.580992561084567e-07, "loss": 0.2567, "step": 12982 }, { "epoch": 0.82, "grad_norm": 1.737356727789742, "learning_rate": 8.575288257782755e-07, "loss": 0.2581, "step": 12983 }, { "epoch": 0.82, "grad_norm": 1.8994868493645913, "learning_rate": 8.569585673265818e-07, "loss": 0.2729, "step": 12984 }, { "epoch": 0.82, "grad_norm": 1.7322243587602308, "learning_rate": 8.563884807770373e-07, "loss": 0.2507, "step": 12985 }, { "epoch": 0.82, "grad_norm": 5.5259055541114055, "learning_rate": 8.558185661532942e-07, "loss": 0.267, "step": 12986 }, { "epoch": 0.82, "grad_norm": 1.718643461005986, "learning_rate": 8.552488234789996e-07, "loss": 0.2719, "step": 12987 }, { "epoch": 0.82, "grad_norm": 5.129722357225122, "learning_rate": 8.546792527777925e-07, "loss": 0.278, "step": 12988 }, { "epoch": 0.82, "grad_norm": 1.4463680420732525, "learning_rate": 8.541098540733067e-07, "loss": 0.2546, "step": 12989 }, { "epoch": 0.82, "grad_norm": 2.257847127807278, "learning_rate": 8.535406273891678e-07, "loss": 0.2536, "step": 12990 }, { "epoch": 0.82, "grad_norm": 1.9405059302079728, "learning_rate": 8.529715727489912e-07, "loss": 0.2497, "step": 12991 }, { "epoch": 0.82, "grad_norm": 2.4784141342344626, "learning_rate": 8.524026901763905e-07, "loss": 0.2645, "step": 12992 }, { "epoch": 0.82, "grad_norm": 3.9415160324746514, "learning_rate": 8.518339796949704e-07, "loss": 0.2873, "step": 12993 }, { "epoch": 0.82, "grad_norm": 1.8877086490397215, "learning_rate": 8.512654413283261e-07, "loss": 0.2462, "step": 12994 }, { "epoch": 0.82, "grad_norm": 4.172067567216997, "learning_rate": 8.506970751000477e-07, "loss": 0.2547, "step": 12995 }, { "epoch": 0.82, "grad_norm": 9.178190644279585, "learning_rate": 8.501288810337166e-07, "loss": 0.2644, "step": 12996 }, { "epoch": 0.82, "grad_norm": 1.999811991749933, "learning_rate": 8.495608591529103e-07, "loss": 0.2413, "step": 12997 }, { "epoch": 0.82, "grad_norm": 1.7508726125273202, "learning_rate": 8.489930094811949e-07, "loss": 0.256, "step": 12998 }, { "epoch": 0.82, "grad_norm": 3.424967339738821, "learning_rate": 8.484253320421337e-07, "loss": 0.2648, "step": 12999 }, { "epoch": 0.82, "grad_norm": 1.644868330238792, "learning_rate": 8.478578268592779e-07, "loss": 0.2572, "step": 13000 }, { "epoch": 0.82, "grad_norm": 2.041706935467664, "learning_rate": 8.472904939561776e-07, "loss": 0.2583, "step": 13001 }, { "epoch": 0.82, "grad_norm": 2.989928503212847, "learning_rate": 8.467233333563707e-07, "loss": 0.2508, "step": 13002 }, { "epoch": 0.82, "grad_norm": 1.300913008024875, "learning_rate": 8.461563450833898e-07, "loss": 0.2443, "step": 13003 }, { "epoch": 0.82, "grad_norm": 3.046459539029601, "learning_rate": 8.455895291607585e-07, "loss": 0.2885, "step": 13004 }, { "epoch": 0.82, "grad_norm": 3.6342500234576756, "learning_rate": 8.450228856119974e-07, "loss": 0.2371, "step": 13005 }, { "epoch": 0.82, "grad_norm": 2.8703494527860083, "learning_rate": 8.444564144606177e-07, "loss": 0.2566, "step": 13006 }, { "epoch": 0.82, "grad_norm": 1.3922039419797838, "learning_rate": 8.438901157301221e-07, "loss": 0.2384, "step": 13007 }, { "epoch": 0.82, "grad_norm": 6.7703336374296015, "learning_rate": 8.433239894440071e-07, "loss": 0.2472, "step": 13008 }, { "epoch": 0.82, "grad_norm": 1.9150429948459586, "learning_rate": 8.427580356257636e-07, "loss": 0.2544, "step": 13009 }, { "epoch": 0.82, "grad_norm": 0.5940597519353604, "learning_rate": 8.421922542988736e-07, "loss": 0.4712, "step": 13010 }, { "epoch": 0.82, "grad_norm": 1.8368424429118755, "learning_rate": 8.416266454868122e-07, "loss": 0.2502, "step": 13011 }, { "epoch": 0.82, "grad_norm": 1.8532035972685212, "learning_rate": 8.410612092130455e-07, "loss": 0.2496, "step": 13012 }, { "epoch": 0.82, "grad_norm": 2.388364709508336, "learning_rate": 8.40495945501037e-07, "loss": 0.2618, "step": 13013 }, { "epoch": 0.82, "grad_norm": 4.132774989207916, "learning_rate": 8.399308543742408e-07, "loss": 0.2689, "step": 13014 }, { "epoch": 0.82, "grad_norm": 1.895250884264923, "learning_rate": 8.393659358561024e-07, "loss": 0.2568, "step": 13015 }, { "epoch": 0.82, "grad_norm": 2.0614659051887556, "learning_rate": 8.388011899700605e-07, "loss": 0.2532, "step": 13016 }, { "epoch": 0.82, "grad_norm": 1.7982387345417716, "learning_rate": 8.382366167395495e-07, "loss": 0.2294, "step": 13017 }, { "epoch": 0.82, "grad_norm": 1.8196576703216867, "learning_rate": 8.37672216187993e-07, "loss": 0.2392, "step": 13018 }, { "epoch": 0.82, "grad_norm": 1.9383974662274779, "learning_rate": 8.3710798833881e-07, "loss": 0.2627, "step": 13019 }, { "epoch": 0.82, "grad_norm": 1.959002470919046, "learning_rate": 8.36543933215409e-07, "loss": 0.26, "step": 13020 }, { "epoch": 0.82, "grad_norm": 2.624990470072323, "learning_rate": 8.359800508411958e-07, "loss": 0.2714, "step": 13021 }, { "epoch": 0.82, "grad_norm": 2.8743607113147993, "learning_rate": 8.354163412395671e-07, "loss": 0.2504, "step": 13022 }, { "epoch": 0.82, "grad_norm": 0.5898234152130922, "learning_rate": 8.348528044339122e-07, "loss": 0.4621, "step": 13023 }, { "epoch": 0.82, "grad_norm": 2.3573439550623307, "learning_rate": 8.34289440447611e-07, "loss": 0.2858, "step": 13024 }, { "epoch": 0.82, "grad_norm": 2.501002413577344, "learning_rate": 8.33726249304041e-07, "loss": 0.2591, "step": 13025 }, { "epoch": 0.82, "grad_norm": 1.8653577441948457, "learning_rate": 8.331632310265691e-07, "loss": 0.2568, "step": 13026 }, { "epoch": 0.82, "grad_norm": 14.172463980478453, "learning_rate": 8.326003856385561e-07, "loss": 0.2819, "step": 13027 }, { "epoch": 0.82, "grad_norm": 3.0252902553124086, "learning_rate": 8.320377131633545e-07, "loss": 0.2463, "step": 13028 }, { "epoch": 0.82, "grad_norm": 2.009323889116786, "learning_rate": 8.314752136243104e-07, "loss": 0.252, "step": 13029 }, { "epoch": 0.82, "grad_norm": 0.5814904848572607, "learning_rate": 8.309128870447658e-07, "loss": 0.4545, "step": 13030 }, { "epoch": 0.82, "grad_norm": 1.6858426417017702, "learning_rate": 8.303507334480499e-07, "loss": 0.2535, "step": 13031 }, { "epoch": 0.82, "grad_norm": 1.342394518394939, "learning_rate": 8.297887528574877e-07, "loss": 0.2461, "step": 13032 }, { "epoch": 0.82, "grad_norm": 2.292131029782006, "learning_rate": 8.292269452963981e-07, "loss": 0.2667, "step": 13033 }, { "epoch": 0.82, "grad_norm": 1.4481962584993637, "learning_rate": 8.286653107880904e-07, "loss": 0.2496, "step": 13034 }, { "epoch": 0.82, "grad_norm": 1.5152240136766413, "learning_rate": 8.281038493558674e-07, "loss": 0.2593, "step": 13035 }, { "epoch": 0.82, "grad_norm": 2.622225821286546, "learning_rate": 8.275425610230269e-07, "loss": 0.2631, "step": 13036 }, { "epoch": 0.82, "grad_norm": 1.6396270837493994, "learning_rate": 8.269814458128556e-07, "loss": 0.2505, "step": 13037 }, { "epoch": 0.82, "grad_norm": 1.4101603639984648, "learning_rate": 8.264205037486367e-07, "loss": 0.2561, "step": 13038 }, { "epoch": 0.82, "grad_norm": 2.516095248356477, "learning_rate": 8.258597348536452e-07, "loss": 0.2655, "step": 13039 }, { "epoch": 0.82, "grad_norm": 2.599263168900209, "learning_rate": 8.252991391511455e-07, "loss": 0.2586, "step": 13040 }, { "epoch": 0.82, "grad_norm": 1.9634882322785763, "learning_rate": 8.247387166644011e-07, "loss": 0.2556, "step": 13041 }, { "epoch": 0.82, "grad_norm": 8.347372733686193, "learning_rate": 8.24178467416663e-07, "loss": 0.2784, "step": 13042 }, { "epoch": 0.82, "grad_norm": 3.183040854119145, "learning_rate": 8.236183914311769e-07, "loss": 0.2704, "step": 13043 }, { "epoch": 0.82, "grad_norm": 5.055340739105419, "learning_rate": 8.230584887311826e-07, "loss": 0.2661, "step": 13044 }, { "epoch": 0.82, "grad_norm": 1.8134922061435854, "learning_rate": 8.224987593399098e-07, "loss": 0.2686, "step": 13045 }, { "epoch": 0.82, "grad_norm": 1.811205474303526, "learning_rate": 8.219392032805846e-07, "loss": 0.2595, "step": 13046 }, { "epoch": 0.82, "grad_norm": 4.203829807712511, "learning_rate": 8.213798205764228e-07, "loss": 0.2692, "step": 13047 }, { "epoch": 0.82, "grad_norm": 2.807074794904907, "learning_rate": 8.208206112506329e-07, "loss": 0.2538, "step": 13048 }, { "epoch": 0.82, "grad_norm": 2.5836032842498233, "learning_rate": 8.202615753264204e-07, "loss": 0.3067, "step": 13049 }, { "epoch": 0.82, "grad_norm": 2.563150443366848, "learning_rate": 8.19702712826978e-07, "loss": 0.2932, "step": 13050 }, { "epoch": 0.82, "grad_norm": 4.188312959201578, "learning_rate": 8.191440237754961e-07, "loss": 0.2533, "step": 13051 }, { "epoch": 0.82, "grad_norm": 1.3865614737569423, "learning_rate": 8.185855081951538e-07, "loss": 0.2511, "step": 13052 }, { "epoch": 0.82, "grad_norm": 1.8887032677106688, "learning_rate": 8.180271661091266e-07, "loss": 0.252, "step": 13053 }, { "epoch": 0.82, "grad_norm": 3.8215593645014145, "learning_rate": 8.174689975405809e-07, "loss": 0.241, "step": 13054 }, { "epoch": 0.82, "grad_norm": 3.4799180184625094, "learning_rate": 8.169110025126747e-07, "loss": 0.2423, "step": 13055 }, { "epoch": 0.82, "grad_norm": 2.0505103696824696, "learning_rate": 8.163531810485603e-07, "loss": 0.2702, "step": 13056 }, { "epoch": 0.82, "grad_norm": 0.633765709039043, "learning_rate": 8.157955331713846e-07, "loss": 0.4643, "step": 13057 }, { "epoch": 0.82, "grad_norm": 1.6442694501102397, "learning_rate": 8.15238058904283e-07, "loss": 0.2624, "step": 13058 }, { "epoch": 0.82, "grad_norm": 2.4342128978916855, "learning_rate": 8.146807582703886e-07, "loss": 0.2529, "step": 13059 }, { "epoch": 0.82, "grad_norm": 1.573319810450124, "learning_rate": 8.141236312928219e-07, "loss": 0.2547, "step": 13060 }, { "epoch": 0.82, "grad_norm": 1.7221318023916532, "learning_rate": 8.135666779947027e-07, "loss": 0.2413, "step": 13061 }, { "epoch": 0.82, "grad_norm": 2.087675320801948, "learning_rate": 8.13009898399137e-07, "loss": 0.2439, "step": 13062 }, { "epoch": 0.82, "grad_norm": 2.1914829503325612, "learning_rate": 8.124532925292283e-07, "loss": 0.2403, "step": 13063 }, { "epoch": 0.82, "grad_norm": 2.3682928785855832, "learning_rate": 8.118968604080684e-07, "loss": 0.261, "step": 13064 }, { "epoch": 0.82, "grad_norm": 2.031624591214938, "learning_rate": 8.113406020587472e-07, "loss": 0.2685, "step": 13065 }, { "epoch": 0.82, "grad_norm": 2.358590074475766, "learning_rate": 8.107845175043455e-07, "loss": 0.272, "step": 13066 }, { "epoch": 0.82, "grad_norm": 2.4451503825110303, "learning_rate": 8.102286067679354e-07, "loss": 0.2553, "step": 13067 }, { "epoch": 0.82, "grad_norm": 2.508759541027896, "learning_rate": 8.096728698725803e-07, "loss": 0.2408, "step": 13068 }, { "epoch": 0.82, "grad_norm": 2.233493219583581, "learning_rate": 8.091173068413427e-07, "loss": 0.2453, "step": 13069 }, { "epoch": 0.82, "grad_norm": 1.7521175580643524, "learning_rate": 8.085619176972715e-07, "loss": 0.2529, "step": 13070 }, { "epoch": 0.82, "grad_norm": 3.9641078041129796, "learning_rate": 8.080067024634109e-07, "loss": 0.2446, "step": 13071 }, { "epoch": 0.82, "grad_norm": 0.6226189982102704, "learning_rate": 8.07451661162797e-07, "loss": 0.469, "step": 13072 }, { "epoch": 0.82, "grad_norm": 1.6846918995664295, "learning_rate": 8.068967938184608e-07, "loss": 0.2421, "step": 13073 }, { "epoch": 0.82, "grad_norm": 2.890165279115881, "learning_rate": 8.063421004534256e-07, "loss": 0.2569, "step": 13074 }, { "epoch": 0.82, "grad_norm": 1.6052195605657091, "learning_rate": 8.057875810907051e-07, "loss": 0.2359, "step": 13075 }, { "epoch": 0.82, "grad_norm": 1.615479030906575, "learning_rate": 8.052332357533066e-07, "loss": 0.2479, "step": 13076 }, { "epoch": 0.82, "grad_norm": 1.929071088557361, "learning_rate": 8.046790644642327e-07, "loss": 0.249, "step": 13077 }, { "epoch": 0.82, "grad_norm": 2.3559093829198097, "learning_rate": 8.041250672464768e-07, "loss": 0.2535, "step": 13078 }, { "epoch": 0.82, "grad_norm": 2.4031842626611275, "learning_rate": 8.035712441230237e-07, "loss": 0.2788, "step": 13079 }, { "epoch": 0.82, "grad_norm": 1.2772086345128029, "learning_rate": 8.030175951168528e-07, "loss": 0.2559, "step": 13080 }, { "epoch": 0.82, "grad_norm": 0.6154076525591489, "learning_rate": 8.024641202509365e-07, "loss": 0.4888, "step": 13081 }, { "epoch": 0.82, "grad_norm": 1.9684861077151803, "learning_rate": 8.019108195482406e-07, "loss": 0.2489, "step": 13082 }, { "epoch": 0.82, "grad_norm": 2.1455532813566687, "learning_rate": 8.013576930317218e-07, "loss": 0.2679, "step": 13083 }, { "epoch": 0.82, "grad_norm": 1.9666094734323385, "learning_rate": 8.008047407243285e-07, "loss": 0.2671, "step": 13084 }, { "epoch": 0.82, "grad_norm": 4.4877253216716495, "learning_rate": 8.002519626490063e-07, "loss": 0.2542, "step": 13085 }, { "epoch": 0.82, "grad_norm": 2.1984824133815075, "learning_rate": 7.996993588286894e-07, "loss": 0.251, "step": 13086 }, { "epoch": 0.82, "grad_norm": 1.715523572971109, "learning_rate": 7.991469292863058e-07, "loss": 0.2515, "step": 13087 }, { "epoch": 0.82, "grad_norm": 2.76257404545504, "learning_rate": 7.985946740447792e-07, "loss": 0.2559, "step": 13088 }, { "epoch": 0.82, "grad_norm": 2.7938664839444995, "learning_rate": 7.980425931270203e-07, "loss": 0.2837, "step": 13089 }, { "epoch": 0.82, "grad_norm": 1.3795535571582775, "learning_rate": 7.974906865559396e-07, "loss": 0.2578, "step": 13090 }, { "epoch": 0.82, "grad_norm": 0.5783046593675997, "learning_rate": 7.969389543544343e-07, "loss": 0.4643, "step": 13091 }, { "epoch": 0.82, "grad_norm": 2.0700533399711567, "learning_rate": 7.963873965453961e-07, "loss": 0.259, "step": 13092 }, { "epoch": 0.82, "grad_norm": 1.932886429506239, "learning_rate": 7.958360131517123e-07, "loss": 0.2496, "step": 13093 }, { "epoch": 0.82, "grad_norm": 1.9467870566710295, "learning_rate": 7.952848041962601e-07, "loss": 0.2563, "step": 13094 }, { "epoch": 0.82, "grad_norm": 1.5904858129912447, "learning_rate": 7.947337697019087e-07, "loss": 0.2476, "step": 13095 }, { "epoch": 0.82, "grad_norm": 1.7050276569152154, "learning_rate": 7.941829096915237e-07, "loss": 0.2524, "step": 13096 }, { "epoch": 0.82, "grad_norm": 2.9404042332245237, "learning_rate": 7.936322241879585e-07, "loss": 0.2556, "step": 13097 }, { "epoch": 0.82, "grad_norm": 1.4866373699453335, "learning_rate": 7.930817132140656e-07, "loss": 0.2457, "step": 13098 }, { "epoch": 0.82, "grad_norm": 1.7794633701255003, "learning_rate": 7.92531376792684e-07, "loss": 0.2714, "step": 13099 }, { "epoch": 0.82, "grad_norm": 3.492381490675387, "learning_rate": 7.919812149466483e-07, "loss": 0.2514, "step": 13100 }, { "epoch": 0.82, "grad_norm": 2.3275529548383482, "learning_rate": 7.914312276987873e-07, "loss": 0.2514, "step": 13101 }, { "epoch": 0.82, "grad_norm": 4.607591987618546, "learning_rate": 7.908814150719185e-07, "loss": 0.2561, "step": 13102 }, { "epoch": 0.82, "grad_norm": 2.048912563356653, "learning_rate": 7.903317770888574e-07, "loss": 0.2599, "step": 13103 }, { "epoch": 0.82, "grad_norm": 4.764661385919167, "learning_rate": 7.897823137724081e-07, "loss": 0.2659, "step": 13104 }, { "epoch": 0.82, "grad_norm": 4.161276809576672, "learning_rate": 7.892330251453672e-07, "loss": 0.2393, "step": 13105 }, { "epoch": 0.82, "grad_norm": 0.6182133375116967, "learning_rate": 7.886839112305288e-07, "loss": 0.4546, "step": 13106 }, { "epoch": 0.82, "grad_norm": 1.9272856699175505, "learning_rate": 7.881349720506754e-07, "loss": 0.2671, "step": 13107 }, { "epoch": 0.82, "grad_norm": 5.285158660255171, "learning_rate": 7.875862076285812e-07, "loss": 0.287, "step": 13108 }, { "epoch": 0.82, "grad_norm": 1.6366069699449706, "learning_rate": 7.870376179870187e-07, "loss": 0.238, "step": 13109 }, { "epoch": 0.82, "grad_norm": 23.590969575871426, "learning_rate": 7.864892031487476e-07, "loss": 0.2653, "step": 13110 }, { "epoch": 0.82, "grad_norm": 3.009571809747192, "learning_rate": 7.859409631365245e-07, "loss": 0.2476, "step": 13111 }, { "epoch": 0.82, "grad_norm": 13.366996876037401, "learning_rate": 7.853928979730962e-07, "loss": 0.2549, "step": 13112 }, { "epoch": 0.82, "grad_norm": 3.0365944811854693, "learning_rate": 7.848450076812008e-07, "loss": 0.2719, "step": 13113 }, { "epoch": 0.82, "grad_norm": 2.413728089958753, "learning_rate": 7.842972922835745e-07, "loss": 0.2596, "step": 13114 }, { "epoch": 0.82, "grad_norm": 1.4686232985740932, "learning_rate": 7.837497518029419e-07, "loss": 0.2396, "step": 13115 }, { "epoch": 0.82, "grad_norm": 1.712527937302349, "learning_rate": 7.832023862620192e-07, "loss": 0.2501, "step": 13116 }, { "epoch": 0.82, "grad_norm": 1.7211641572292184, "learning_rate": 7.826551956835193e-07, "loss": 0.2667, "step": 13117 }, { "epoch": 0.82, "grad_norm": 1.8893581009564415, "learning_rate": 7.821081800901475e-07, "loss": 0.2509, "step": 13118 }, { "epoch": 0.83, "grad_norm": 1.6012426350371194, "learning_rate": 7.815613395045996e-07, "loss": 0.2522, "step": 13119 }, { "epoch": 0.83, "grad_norm": 3.0291694474029827, "learning_rate": 7.810146739495638e-07, "loss": 0.2445, "step": 13120 }, { "epoch": 0.83, "grad_norm": 1.7227236330250217, "learning_rate": 7.804681834477223e-07, "loss": 0.2576, "step": 13121 }, { "epoch": 0.83, "grad_norm": 0.5701982959971131, "learning_rate": 7.799218680217513e-07, "loss": 0.4314, "step": 13122 }, { "epoch": 0.83, "grad_norm": 2.602939963796262, "learning_rate": 7.793757276943181e-07, "loss": 0.2579, "step": 13123 }, { "epoch": 0.83, "grad_norm": 1.744144377902834, "learning_rate": 7.788297624880814e-07, "loss": 0.2362, "step": 13124 }, { "epoch": 0.83, "grad_norm": 2.3946643028272416, "learning_rate": 7.782839724256952e-07, "loss": 0.2598, "step": 13125 }, { "epoch": 0.83, "grad_norm": 3.4460012789736063, "learning_rate": 7.777383575298069e-07, "loss": 0.2554, "step": 13126 }, { "epoch": 0.83, "grad_norm": 2.449953303242268, "learning_rate": 7.771929178230542e-07, "loss": 0.2568, "step": 13127 }, { "epoch": 0.83, "grad_norm": 2.4017882233989867, "learning_rate": 7.766476533280659e-07, "loss": 0.273, "step": 13128 }, { "epoch": 0.83, "grad_norm": 2.3111915189622882, "learning_rate": 7.761025640674696e-07, "loss": 0.2588, "step": 13129 }, { "epoch": 0.83, "grad_norm": 5.916331245414365, "learning_rate": 7.755576500638806e-07, "loss": 0.273, "step": 13130 }, { "epoch": 0.83, "grad_norm": 4.460799444511176, "learning_rate": 7.75012911339908e-07, "loss": 0.2489, "step": 13131 }, { "epoch": 0.83, "grad_norm": 2.0456496495781993, "learning_rate": 7.74468347918153e-07, "loss": 0.2764, "step": 13132 }, { "epoch": 0.83, "grad_norm": 1.6935012493677806, "learning_rate": 7.739239598212111e-07, "loss": 0.2417, "step": 13133 }, { "epoch": 0.83, "grad_norm": 3.1028167821480643, "learning_rate": 7.733797470716725e-07, "loss": 0.2647, "step": 13134 }, { "epoch": 0.83, "grad_norm": 1.4120097639929552, "learning_rate": 7.728357096921152e-07, "loss": 0.2461, "step": 13135 }, { "epoch": 0.83, "grad_norm": 1.8307900217480282, "learning_rate": 7.722918477051112e-07, "loss": 0.2509, "step": 13136 }, { "epoch": 0.83, "grad_norm": 14.174653470847996, "learning_rate": 7.717481611332289e-07, "loss": 0.2601, "step": 13137 }, { "epoch": 0.83, "grad_norm": 2.6613766491342608, "learning_rate": 7.712046499990255e-07, "loss": 0.2677, "step": 13138 }, { "epoch": 0.83, "grad_norm": 5.22315979333206, "learning_rate": 7.706613143250524e-07, "loss": 0.2511, "step": 13139 }, { "epoch": 0.83, "grad_norm": 1.6642103270753872, "learning_rate": 7.701181541338526e-07, "loss": 0.259, "step": 13140 }, { "epoch": 0.83, "grad_norm": 7.5889709885115195, "learning_rate": 7.695751694479636e-07, "loss": 0.2491, "step": 13141 }, { "epoch": 0.83, "grad_norm": 2.247138147154396, "learning_rate": 7.690323602899163e-07, "loss": 0.2498, "step": 13142 }, { "epoch": 0.83, "grad_norm": 2.5433199558546358, "learning_rate": 7.684897266822311e-07, "loss": 0.2634, "step": 13143 }, { "epoch": 0.83, "grad_norm": 1.9310444110559235, "learning_rate": 7.67947268647422e-07, "loss": 0.2526, "step": 13144 }, { "epoch": 0.83, "grad_norm": 2.2473337510819267, "learning_rate": 7.67404986207999e-07, "loss": 0.2774, "step": 13145 }, { "epoch": 0.83, "grad_norm": 1.7584321078236875, "learning_rate": 7.668628793864607e-07, "loss": 0.2491, "step": 13146 }, { "epoch": 0.83, "grad_norm": 2.3861916248282093, "learning_rate": 7.663209482052997e-07, "loss": 0.2442, "step": 13147 }, { "epoch": 0.83, "grad_norm": 2.506798592293803, "learning_rate": 7.657791926870034e-07, "loss": 0.2767, "step": 13148 }, { "epoch": 0.83, "grad_norm": 1.35585626017092, "learning_rate": 7.652376128540478e-07, "loss": 0.2345, "step": 13149 }, { "epoch": 0.83, "grad_norm": 1.4655352478852601, "learning_rate": 7.646962087289073e-07, "loss": 0.2452, "step": 13150 }, { "epoch": 0.83, "grad_norm": 0.5919262894747134, "learning_rate": 7.641549803340431e-07, "loss": 0.5069, "step": 13151 }, { "epoch": 0.83, "grad_norm": 1.4528374372651491, "learning_rate": 7.636139276919119e-07, "loss": 0.2492, "step": 13152 }, { "epoch": 0.83, "grad_norm": 3.2757614043041645, "learning_rate": 7.630730508249639e-07, "loss": 0.2659, "step": 13153 }, { "epoch": 0.83, "grad_norm": 1.702400540208499, "learning_rate": 7.625323497556414e-07, "loss": 0.2468, "step": 13154 }, { "epoch": 0.83, "grad_norm": 2.078352471027836, "learning_rate": 7.619918245063768e-07, "loss": 0.2396, "step": 13155 }, { "epoch": 0.83, "grad_norm": 2.5552855032131907, "learning_rate": 7.614514750996005e-07, "loss": 0.2879, "step": 13156 }, { "epoch": 0.83, "grad_norm": 1.6837207980809006, "learning_rate": 7.609113015577291e-07, "loss": 0.2469, "step": 13157 }, { "epoch": 0.83, "grad_norm": 2.518276060545688, "learning_rate": 7.60371303903179e-07, "loss": 0.2697, "step": 13158 }, { "epoch": 0.83, "grad_norm": 6.810011764493329, "learning_rate": 7.598314821583542e-07, "loss": 0.26, "step": 13159 }, { "epoch": 0.83, "grad_norm": 3.5868077906950226, "learning_rate": 7.592918363456509e-07, "loss": 0.2648, "step": 13160 }, { "epoch": 0.83, "grad_norm": 11.504520961715231, "learning_rate": 7.58752366487463e-07, "loss": 0.2546, "step": 13161 }, { "epoch": 0.83, "grad_norm": 2.1614706095236045, "learning_rate": 7.58213072606171e-07, "loss": 0.2611, "step": 13162 }, { "epoch": 0.83, "grad_norm": 2.6102353747156255, "learning_rate": 7.576739547241546e-07, "loss": 0.2494, "step": 13163 }, { "epoch": 0.83, "grad_norm": 2.548698086483542, "learning_rate": 7.571350128637811e-07, "loss": 0.2656, "step": 13164 }, { "epoch": 0.83, "grad_norm": 1.954585837576112, "learning_rate": 7.565962470474109e-07, "loss": 0.2697, "step": 13165 }, { "epoch": 0.83, "grad_norm": 1.8921314004904175, "learning_rate": 7.560576572974004e-07, "loss": 0.2626, "step": 13166 }, { "epoch": 0.83, "grad_norm": 1.425673724714399, "learning_rate": 7.555192436360958e-07, "loss": 0.2373, "step": 13167 }, { "epoch": 0.83, "grad_norm": 2.1670183969758345, "learning_rate": 7.549810060858359e-07, "loss": 0.2578, "step": 13168 }, { "epoch": 0.83, "grad_norm": 1.4958604366379131, "learning_rate": 7.544429446689544e-07, "loss": 0.2669, "step": 13169 }, { "epoch": 0.83, "grad_norm": 2.2188583707750205, "learning_rate": 7.539050594077768e-07, "loss": 0.2569, "step": 13170 }, { "epoch": 0.83, "grad_norm": 2.5344431709143387, "learning_rate": 7.533673503246203e-07, "loss": 0.2821, "step": 13171 }, { "epoch": 0.83, "grad_norm": 2.3546647669187095, "learning_rate": 7.528298174417953e-07, "loss": 0.2604, "step": 13172 }, { "epoch": 0.83, "grad_norm": 1.2383655837220653, "learning_rate": 7.522924607816034e-07, "loss": 0.2359, "step": 13173 }, { "epoch": 0.83, "grad_norm": 3.26240150075405, "learning_rate": 7.517552803663441e-07, "loss": 0.2444, "step": 13174 }, { "epoch": 0.83, "grad_norm": 1.8469803590004534, "learning_rate": 7.512182762183035e-07, "loss": 0.2438, "step": 13175 }, { "epoch": 0.83, "grad_norm": 2.8829531334783365, "learning_rate": 7.506814483597619e-07, "loss": 0.2579, "step": 13176 }, { "epoch": 0.83, "grad_norm": 1.7580614740710847, "learning_rate": 7.501447968129949e-07, "loss": 0.2526, "step": 13177 }, { "epoch": 0.83, "grad_norm": 2.3951182972511615, "learning_rate": 7.496083216002703e-07, "loss": 0.2467, "step": 13178 }, { "epoch": 0.83, "grad_norm": 1.5016522555526541, "learning_rate": 7.490720227438453e-07, "loss": 0.2339, "step": 13179 }, { "epoch": 0.83, "grad_norm": 7.336968063304557, "learning_rate": 7.485359002659731e-07, "loss": 0.2534, "step": 13180 }, { "epoch": 0.83, "grad_norm": 1.7470124960417608, "learning_rate": 7.479999541888966e-07, "loss": 0.2551, "step": 13181 }, { "epoch": 0.83, "grad_norm": 7.100544671494195, "learning_rate": 7.474641845348557e-07, "loss": 0.2671, "step": 13182 }, { "epoch": 0.83, "grad_norm": 2.7244243275115387, "learning_rate": 7.469285913260787e-07, "loss": 0.2454, "step": 13183 }, { "epoch": 0.83, "grad_norm": 2.3512624149281267, "learning_rate": 7.46393174584788e-07, "loss": 0.2696, "step": 13184 }, { "epoch": 0.83, "grad_norm": 2.814366230180223, "learning_rate": 7.458579343331996e-07, "loss": 0.261, "step": 13185 }, { "epoch": 0.83, "grad_norm": 1.7616757227815985, "learning_rate": 7.453228705935228e-07, "loss": 0.2757, "step": 13186 }, { "epoch": 0.83, "grad_norm": 2.1225086198030043, "learning_rate": 7.447879833879574e-07, "loss": 0.2778, "step": 13187 }, { "epoch": 0.83, "grad_norm": 1.559266608611239, "learning_rate": 7.442532727386964e-07, "loss": 0.2539, "step": 13188 }, { "epoch": 0.83, "grad_norm": 2.19378700486509, "learning_rate": 7.437187386679252e-07, "loss": 0.2413, "step": 13189 }, { "epoch": 0.83, "grad_norm": 3.0478588030052065, "learning_rate": 7.431843811978246e-07, "loss": 0.253, "step": 13190 }, { "epoch": 0.83, "grad_norm": 2.267341315514969, "learning_rate": 7.426502003505653e-07, "loss": 0.2662, "step": 13191 }, { "epoch": 0.83, "grad_norm": 1.3898595182236768, "learning_rate": 7.421161961483098e-07, "loss": 0.2508, "step": 13192 }, { "epoch": 0.83, "grad_norm": 1.727880332474302, "learning_rate": 7.415823686132162e-07, "loss": 0.2531, "step": 13193 }, { "epoch": 0.83, "grad_norm": 0.6029757269574015, "learning_rate": 7.410487177674347e-07, "loss": 0.4658, "step": 13194 }, { "epoch": 0.83, "grad_norm": 1.4805363682229404, "learning_rate": 7.40515243633107e-07, "loss": 0.2562, "step": 13195 }, { "epoch": 0.83, "grad_norm": 1.652345291347373, "learning_rate": 7.399819462323677e-07, "loss": 0.2593, "step": 13196 }, { "epoch": 0.83, "grad_norm": 2.792579656590822, "learning_rate": 7.394488255873422e-07, "loss": 0.2517, "step": 13197 }, { "epoch": 0.83, "grad_norm": 1.647559192520862, "learning_rate": 7.389158817201541e-07, "loss": 0.2488, "step": 13198 }, { "epoch": 0.83, "grad_norm": 2.3104969639439124, "learning_rate": 7.383831146529136e-07, "loss": 0.241, "step": 13199 }, { "epoch": 0.83, "grad_norm": 1.7519835733076743, "learning_rate": 7.378505244077283e-07, "loss": 0.261, "step": 13200 }, { "epoch": 0.83, "grad_norm": 1.9419281616050887, "learning_rate": 7.373181110066941e-07, "loss": 0.2629, "step": 13201 }, { "epoch": 0.83, "grad_norm": 2.056614347365575, "learning_rate": 7.367858744719036e-07, "loss": 0.2662, "step": 13202 }, { "epoch": 0.83, "grad_norm": 4.19721152563661, "learning_rate": 7.3625381482544e-07, "loss": 0.263, "step": 13203 }, { "epoch": 0.83, "grad_norm": 1.3233526406143272, "learning_rate": 7.357219320893789e-07, "loss": 0.2509, "step": 13204 }, { "epoch": 0.83, "grad_norm": 1.899407420166879, "learning_rate": 7.351902262857874e-07, "loss": 0.265, "step": 13205 }, { "epoch": 0.83, "grad_norm": 2.4408411128453946, "learning_rate": 7.346586974367298e-07, "loss": 0.2658, "step": 13206 }, { "epoch": 0.83, "grad_norm": 1.8495275953807058, "learning_rate": 7.341273455642577e-07, "loss": 0.2699, "step": 13207 }, { "epoch": 0.83, "grad_norm": 1.9123545368181403, "learning_rate": 7.33596170690421e-07, "loss": 0.2567, "step": 13208 }, { "epoch": 0.83, "grad_norm": 2.2028353397593383, "learning_rate": 7.330651728372551e-07, "loss": 0.264, "step": 13209 }, { "epoch": 0.83, "grad_norm": 1.7664593579300796, "learning_rate": 7.325343520267957e-07, "loss": 0.2734, "step": 13210 }, { "epoch": 0.83, "grad_norm": 2.070524073509291, "learning_rate": 7.32003708281066e-07, "loss": 0.2561, "step": 13211 }, { "epoch": 0.83, "grad_norm": 4.088478926229157, "learning_rate": 7.314732416220821e-07, "loss": 0.2613, "step": 13212 }, { "epoch": 0.83, "grad_norm": 1.8787847196137988, "learning_rate": 7.309429520718558e-07, "loss": 0.2474, "step": 13213 }, { "epoch": 0.83, "grad_norm": 2.271709088883474, "learning_rate": 7.304128396523885e-07, "loss": 0.2565, "step": 13214 }, { "epoch": 0.83, "grad_norm": 1.2318000708085362, "learning_rate": 7.29882904385677e-07, "loss": 0.281, "step": 13215 }, { "epoch": 0.83, "grad_norm": 2.775665621712501, "learning_rate": 7.293531462937087e-07, "loss": 0.2556, "step": 13216 }, { "epoch": 0.83, "grad_norm": 1.339480552195945, "learning_rate": 7.288235653984621e-07, "loss": 0.2503, "step": 13217 }, { "epoch": 0.83, "grad_norm": 1.643151832930828, "learning_rate": 7.282941617219142e-07, "loss": 0.2349, "step": 13218 }, { "epoch": 0.83, "grad_norm": 1.345094487632265, "learning_rate": 7.277649352860283e-07, "loss": 0.2469, "step": 13219 }, { "epoch": 0.83, "grad_norm": 2.1513817184507706, "learning_rate": 7.272358861127626e-07, "loss": 0.2731, "step": 13220 }, { "epoch": 0.83, "grad_norm": 1.6633674018639215, "learning_rate": 7.267070142240706e-07, "loss": 0.2499, "step": 13221 }, { "epoch": 0.83, "grad_norm": 2.9642687538246135, "learning_rate": 7.261783196418937e-07, "loss": 0.2829, "step": 13222 }, { "epoch": 0.83, "grad_norm": 1.9624608906456469, "learning_rate": 7.256498023881703e-07, "loss": 0.2467, "step": 13223 }, { "epoch": 0.83, "grad_norm": 1.484682183252951, "learning_rate": 7.251214624848285e-07, "loss": 0.2586, "step": 13224 }, { "epoch": 0.83, "grad_norm": 1.9693426805083505, "learning_rate": 7.245932999537897e-07, "loss": 0.2455, "step": 13225 }, { "epoch": 0.83, "grad_norm": 2.792972748711006, "learning_rate": 7.240653148169696e-07, "loss": 0.2732, "step": 13226 }, { "epoch": 0.83, "grad_norm": 2.354445537378943, "learning_rate": 7.235375070962747e-07, "loss": 0.2671, "step": 13227 }, { "epoch": 0.83, "grad_norm": 9.647517482193596, "learning_rate": 7.230098768136035e-07, "loss": 0.2676, "step": 13228 }, { "epoch": 0.83, "grad_norm": 2.466791352935791, "learning_rate": 7.22482423990849e-07, "loss": 0.2462, "step": 13229 }, { "epoch": 0.83, "grad_norm": 3.4521212964525203, "learning_rate": 7.219551486498976e-07, "loss": 0.2588, "step": 13230 }, { "epoch": 0.83, "grad_norm": 1.8064537812046055, "learning_rate": 7.214280508126259e-07, "loss": 0.2543, "step": 13231 }, { "epoch": 0.83, "grad_norm": 5.374520532959189, "learning_rate": 7.20901130500904e-07, "loss": 0.2537, "step": 13232 }, { "epoch": 0.83, "grad_norm": 3.0395352338624595, "learning_rate": 7.203743877365938e-07, "loss": 0.2387, "step": 13233 }, { "epoch": 0.83, "grad_norm": 2.892245557213895, "learning_rate": 7.198478225415522e-07, "loss": 0.2541, "step": 13234 }, { "epoch": 0.83, "grad_norm": 3.7168205865884385, "learning_rate": 7.193214349376276e-07, "loss": 0.2518, "step": 13235 }, { "epoch": 0.83, "grad_norm": 1.6872993447475668, "learning_rate": 7.187952249466585e-07, "loss": 0.261, "step": 13236 }, { "epoch": 0.83, "grad_norm": 2.214984353252086, "learning_rate": 7.182691925904795e-07, "loss": 0.2493, "step": 13237 }, { "epoch": 0.83, "grad_norm": 1.7230628565198083, "learning_rate": 7.177433378909183e-07, "loss": 0.2305, "step": 13238 }, { "epoch": 0.83, "grad_norm": 2.2796634006638157, "learning_rate": 7.172176608697923e-07, "loss": 0.2675, "step": 13239 }, { "epoch": 0.83, "grad_norm": 3.098432726002284, "learning_rate": 7.166921615489125e-07, "loss": 0.2395, "step": 13240 }, { "epoch": 0.83, "grad_norm": 1.9166388897722202, "learning_rate": 7.16166839950082e-07, "loss": 0.248, "step": 13241 }, { "epoch": 0.83, "grad_norm": 2.1520564944718763, "learning_rate": 7.156416960950991e-07, "loss": 0.2768, "step": 13242 }, { "epoch": 0.83, "grad_norm": 1.8871636579136695, "learning_rate": 7.15116730005752e-07, "loss": 0.2446, "step": 13243 }, { "epoch": 0.83, "grad_norm": 2.098539015704749, "learning_rate": 7.145919417038216e-07, "loss": 0.2442, "step": 13244 }, { "epoch": 0.83, "grad_norm": 1.5020532192567475, "learning_rate": 7.140673312110835e-07, "loss": 0.2506, "step": 13245 }, { "epoch": 0.83, "grad_norm": 1.376122086608251, "learning_rate": 7.135428985493053e-07, "loss": 0.2538, "step": 13246 }, { "epoch": 0.83, "grad_norm": 1.8465195022234036, "learning_rate": 7.130186437402464e-07, "loss": 0.2473, "step": 13247 }, { "epoch": 0.83, "grad_norm": 1.438491086285166, "learning_rate": 7.124945668056577e-07, "loss": 0.2552, "step": 13248 }, { "epoch": 0.83, "grad_norm": 1.513353316282746, "learning_rate": 7.119706677672844e-07, "loss": 0.2412, "step": 13249 }, { "epoch": 0.83, "grad_norm": 17.7935000376769, "learning_rate": 7.114469466468654e-07, "loss": 0.2554, "step": 13250 }, { "epoch": 0.83, "grad_norm": 0.6543348206489981, "learning_rate": 7.109234034661288e-07, "loss": 0.474, "step": 13251 }, { "epoch": 0.83, "grad_norm": 1.700080862271829, "learning_rate": 7.104000382467996e-07, "loss": 0.2575, "step": 13252 }, { "epoch": 0.83, "grad_norm": 2.3586833265131317, "learning_rate": 7.098768510105908e-07, "loss": 0.253, "step": 13253 }, { "epoch": 0.83, "grad_norm": 1.6050639256172698, "learning_rate": 7.093538417792128e-07, "loss": 0.2639, "step": 13254 }, { "epoch": 0.83, "grad_norm": 2.064275598744791, "learning_rate": 7.088310105743645e-07, "loss": 0.2479, "step": 13255 }, { "epoch": 0.83, "grad_norm": 0.6008283946217221, "learning_rate": 7.083083574177402e-07, "loss": 0.4699, "step": 13256 }, { "epoch": 0.83, "grad_norm": 4.864594186193576, "learning_rate": 7.077858823310235e-07, "loss": 0.2373, "step": 13257 }, { "epoch": 0.83, "grad_norm": 1.4337804645080292, "learning_rate": 7.072635853358956e-07, "loss": 0.227, "step": 13258 }, { "epoch": 0.83, "grad_norm": 2.7580940825618323, "learning_rate": 7.067414664540251e-07, "loss": 0.252, "step": 13259 }, { "epoch": 0.83, "grad_norm": 1.630406533128059, "learning_rate": 7.062195257070781e-07, "loss": 0.245, "step": 13260 }, { "epoch": 0.83, "grad_norm": 1.5015015359620418, "learning_rate": 7.056977631167083e-07, "loss": 0.2495, "step": 13261 }, { "epoch": 0.83, "grad_norm": 1.3854198484063311, "learning_rate": 7.051761787045674e-07, "loss": 0.2632, "step": 13262 }, { "epoch": 0.83, "grad_norm": 1.8801030117422544, "learning_rate": 7.046547724922953e-07, "loss": 0.2759, "step": 13263 }, { "epoch": 0.83, "grad_norm": 1.8163154524820007, "learning_rate": 7.041335445015258e-07, "loss": 0.2415, "step": 13264 }, { "epoch": 0.83, "grad_norm": 2.217397588848267, "learning_rate": 7.036124947538847e-07, "loss": 0.2631, "step": 13265 }, { "epoch": 0.83, "grad_norm": 1.6033136688166005, "learning_rate": 7.030916232709922e-07, "loss": 0.2685, "step": 13266 }, { "epoch": 0.83, "grad_norm": 0.6105248645404295, "learning_rate": 7.025709300744621e-07, "loss": 0.488, "step": 13267 }, { "epoch": 0.83, "grad_norm": 3.035529685884794, "learning_rate": 7.020504151858965e-07, "loss": 0.2497, "step": 13268 }, { "epoch": 0.83, "grad_norm": 1.5992868409870489, "learning_rate": 7.015300786268925e-07, "loss": 0.2444, "step": 13269 }, { "epoch": 0.83, "grad_norm": 4.169102854967177, "learning_rate": 7.010099204190418e-07, "loss": 0.2435, "step": 13270 }, { "epoch": 0.83, "grad_norm": 2.972304788053702, "learning_rate": 7.00489940583925e-07, "loss": 0.2737, "step": 13271 }, { "epoch": 0.83, "grad_norm": 0.5843583323031228, "learning_rate": 6.999701391431174e-07, "loss": 0.4534, "step": 13272 }, { "epoch": 0.83, "grad_norm": 2.4708934859929377, "learning_rate": 6.994505161181859e-07, "loss": 0.2625, "step": 13273 }, { "epoch": 0.83, "grad_norm": 2.672414264833315, "learning_rate": 6.989310715306902e-07, "loss": 0.2716, "step": 13274 }, { "epoch": 0.83, "grad_norm": 1.8924993545687268, "learning_rate": 6.984118054021854e-07, "loss": 0.2454, "step": 13275 }, { "epoch": 0.83, "grad_norm": 2.939114888197796, "learning_rate": 6.978927177542156e-07, "loss": 0.2576, "step": 13276 }, { "epoch": 0.83, "grad_norm": 3.333441468598719, "learning_rate": 6.973738086083171e-07, "loss": 0.2646, "step": 13277 }, { "epoch": 0.84, "grad_norm": 2.4711775409493217, "learning_rate": 6.968550779860222e-07, "loss": 0.2591, "step": 13278 }, { "epoch": 0.84, "grad_norm": 1.5540539054006046, "learning_rate": 6.963365259088539e-07, "loss": 0.2614, "step": 13279 }, { "epoch": 0.84, "grad_norm": 2.0696361553775606, "learning_rate": 6.958181523983276e-07, "loss": 0.23, "step": 13280 }, { "epoch": 0.84, "grad_norm": 1.8755504916308086, "learning_rate": 6.952999574759483e-07, "loss": 0.2683, "step": 13281 }, { "epoch": 0.84, "grad_norm": 3.721954281596013, "learning_rate": 6.947819411632223e-07, "loss": 0.2467, "step": 13282 }, { "epoch": 0.84, "grad_norm": 6.834870733049077, "learning_rate": 6.942641034816405e-07, "loss": 0.2611, "step": 13283 }, { "epoch": 0.84, "grad_norm": 2.995436936193978, "learning_rate": 6.937464444526887e-07, "loss": 0.2544, "step": 13284 }, { "epoch": 0.84, "grad_norm": 1.8579390339059376, "learning_rate": 6.932289640978446e-07, "loss": 0.2307, "step": 13285 }, { "epoch": 0.84, "grad_norm": 2.5764521190366008, "learning_rate": 6.927116624385821e-07, "loss": 0.2841, "step": 13286 }, { "epoch": 0.84, "grad_norm": 2.2031118651098205, "learning_rate": 6.92194539496363e-07, "loss": 0.2812, "step": 13287 }, { "epoch": 0.84, "grad_norm": 1.8542181699164357, "learning_rate": 6.916775952926425e-07, "loss": 0.2473, "step": 13288 }, { "epoch": 0.84, "grad_norm": 2.453543301183245, "learning_rate": 6.911608298488715e-07, "loss": 0.2425, "step": 13289 }, { "epoch": 0.84, "grad_norm": 2.2483628857390707, "learning_rate": 6.906442431864919e-07, "loss": 0.2574, "step": 13290 }, { "epoch": 0.84, "grad_norm": 2.43022301675465, "learning_rate": 6.90127835326937e-07, "loss": 0.2297, "step": 13291 }, { "epoch": 0.84, "grad_norm": 2.1328799968395646, "learning_rate": 6.896116062916335e-07, "loss": 0.2452, "step": 13292 }, { "epoch": 0.84, "grad_norm": 2.2169989321064327, "learning_rate": 6.89095556101999e-07, "loss": 0.2653, "step": 13293 }, { "epoch": 0.84, "grad_norm": 1.4787048257489264, "learning_rate": 6.885796847794485e-07, "loss": 0.2352, "step": 13294 }, { "epoch": 0.84, "grad_norm": 1.5187295195723474, "learning_rate": 6.880639923453846e-07, "loss": 0.2444, "step": 13295 }, { "epoch": 0.84, "grad_norm": 2.0944255461533845, "learning_rate": 6.875484788212039e-07, "loss": 0.2459, "step": 13296 }, { "epoch": 0.84, "grad_norm": 1.7562433286035743, "learning_rate": 6.870331442282957e-07, "loss": 0.2441, "step": 13297 }, { "epoch": 0.84, "grad_norm": 2.1571936644591587, "learning_rate": 6.865179885880446e-07, "loss": 0.2726, "step": 13298 }, { "epoch": 0.84, "grad_norm": 1.8982898554061316, "learning_rate": 6.860030119218241e-07, "loss": 0.2473, "step": 13299 }, { "epoch": 0.84, "grad_norm": 2.7195865458687813, "learning_rate": 6.854882142510011e-07, "loss": 0.2874, "step": 13300 }, { "epoch": 0.84, "grad_norm": 2.073445083949944, "learning_rate": 6.849735955969339e-07, "loss": 0.2666, "step": 13301 }, { "epoch": 0.84, "grad_norm": 3.4489188422992814, "learning_rate": 6.844591559809777e-07, "loss": 0.2475, "step": 13302 }, { "epoch": 0.84, "grad_norm": 2.3863288274383887, "learning_rate": 6.83944895424477e-07, "loss": 0.2506, "step": 13303 }, { "epoch": 0.84, "grad_norm": 1.4542813608180722, "learning_rate": 6.834308139487672e-07, "loss": 0.2434, "step": 13304 }, { "epoch": 0.84, "grad_norm": 3.339508487499196, "learning_rate": 6.829169115751799e-07, "loss": 0.2617, "step": 13305 }, { "epoch": 0.84, "grad_norm": 12.19686953944788, "learning_rate": 6.824031883250393e-07, "loss": 0.2518, "step": 13306 }, { "epoch": 0.84, "grad_norm": 5.828381161059583, "learning_rate": 6.818896442196593e-07, "loss": 0.2425, "step": 13307 }, { "epoch": 0.84, "grad_norm": 2.455156198998655, "learning_rate": 6.81376279280348e-07, "loss": 0.2705, "step": 13308 }, { "epoch": 0.84, "grad_norm": 1.6372669096726111, "learning_rate": 6.808630935284039e-07, "loss": 0.2509, "step": 13309 }, { "epoch": 0.84, "grad_norm": 2.09744505580353, "learning_rate": 6.803500869851232e-07, "loss": 0.2365, "step": 13310 }, { "epoch": 0.84, "grad_norm": 1.894054835084033, "learning_rate": 6.798372596717889e-07, "loss": 0.2611, "step": 13311 }, { "epoch": 0.84, "grad_norm": 0.6162670830008417, "learning_rate": 6.793246116096813e-07, "loss": 0.4855, "step": 13312 }, { "epoch": 0.84, "grad_norm": 2.4248765821721117, "learning_rate": 6.788121428200689e-07, "loss": 0.2712, "step": 13313 }, { "epoch": 0.84, "grad_norm": 2.9485942370631357, "learning_rate": 6.782998533242169e-07, "loss": 0.2617, "step": 13314 }, { "epoch": 0.84, "grad_norm": 2.025037599325337, "learning_rate": 6.777877431433799e-07, "loss": 0.2631, "step": 13315 }, { "epoch": 0.84, "grad_norm": 3.608005797982861, "learning_rate": 6.772758122988072e-07, "loss": 0.2475, "step": 13316 }, { "epoch": 0.84, "grad_norm": 5.677616574605857, "learning_rate": 6.767640608117376e-07, "loss": 0.2854, "step": 13317 }, { "epoch": 0.84, "grad_norm": 2.4455361472405373, "learning_rate": 6.762524887034072e-07, "loss": 0.2669, "step": 13318 }, { "epoch": 0.84, "grad_norm": 3.889383126616042, "learning_rate": 6.757410959950395e-07, "loss": 0.2522, "step": 13319 }, { "epoch": 0.84, "grad_norm": 4.433859600496642, "learning_rate": 6.752298827078563e-07, "loss": 0.2429, "step": 13320 }, { "epoch": 0.84, "grad_norm": 6.962131400877815, "learning_rate": 6.747188488630651e-07, "loss": 0.2784, "step": 13321 }, { "epoch": 0.84, "grad_norm": 8.928368177247108, "learning_rate": 6.74207994481873e-07, "loss": 0.2576, "step": 13322 }, { "epoch": 0.84, "grad_norm": 4.2226999805099, "learning_rate": 6.736973195854741e-07, "loss": 0.2595, "step": 13323 }, { "epoch": 0.84, "grad_norm": 3.0145970102896236, "learning_rate": 6.731868241950585e-07, "loss": 0.2345, "step": 13324 }, { "epoch": 0.84, "grad_norm": 1.79198770341662, "learning_rate": 6.726765083318054e-07, "loss": 0.2609, "step": 13325 }, { "epoch": 0.84, "grad_norm": 1.8091921624436693, "learning_rate": 6.721663720168897e-07, "loss": 0.25, "step": 13326 }, { "epoch": 0.84, "grad_norm": 1.5577873315549133, "learning_rate": 6.716564152714799e-07, "loss": 0.2506, "step": 13327 }, { "epoch": 0.84, "grad_norm": 1.750512885142994, "learning_rate": 6.711466381167336e-07, "loss": 0.2323, "step": 13328 }, { "epoch": 0.84, "grad_norm": 2.3412387824989707, "learning_rate": 6.706370405738011e-07, "loss": 0.2749, "step": 13329 }, { "epoch": 0.84, "grad_norm": 1.4847828746570872, "learning_rate": 6.701276226638287e-07, "loss": 0.2463, "step": 13330 }, { "epoch": 0.84, "grad_norm": 2.790574376609553, "learning_rate": 6.696183844079513e-07, "loss": 0.2551, "step": 13331 }, { "epoch": 0.84, "grad_norm": 2.0287865241314864, "learning_rate": 6.691093258272995e-07, "loss": 0.2726, "step": 13332 }, { "epoch": 0.84, "grad_norm": 1.8746573160134075, "learning_rate": 6.686004469429919e-07, "loss": 0.252, "step": 13333 }, { "epoch": 0.84, "grad_norm": 3.972510693307056, "learning_rate": 6.680917477761472e-07, "loss": 0.2859, "step": 13334 }, { "epoch": 0.84, "grad_norm": 1.8567151185780817, "learning_rate": 6.675832283478701e-07, "loss": 0.2478, "step": 13335 }, { "epoch": 0.84, "grad_norm": 2.6380011904104417, "learning_rate": 6.670748886792605e-07, "loss": 0.247, "step": 13336 }, { "epoch": 0.84, "grad_norm": 2.3114982050939092, "learning_rate": 6.665667287914085e-07, "loss": 0.2753, "step": 13337 }, { "epoch": 0.84, "grad_norm": 4.475359041361067, "learning_rate": 6.660587487054016e-07, "loss": 0.2607, "step": 13338 }, { "epoch": 0.84, "grad_norm": 0.6237312823262653, "learning_rate": 6.655509484423145e-07, "loss": 0.4768, "step": 13339 }, { "epoch": 0.84, "grad_norm": 1.687907174511587, "learning_rate": 6.650433280232177e-07, "loss": 0.2569, "step": 13340 }, { "epoch": 0.84, "grad_norm": 1.5854181964847236, "learning_rate": 6.64535887469171e-07, "loss": 0.252, "step": 13341 }, { "epoch": 0.84, "grad_norm": 2.270724207695215, "learning_rate": 6.640286268012325e-07, "loss": 0.2527, "step": 13342 }, { "epoch": 0.84, "grad_norm": 1.6781150362675301, "learning_rate": 6.635215460404482e-07, "loss": 0.2488, "step": 13343 }, { "epoch": 0.84, "grad_norm": 2.107836793691485, "learning_rate": 6.630146452078579e-07, "loss": 0.2637, "step": 13344 }, { "epoch": 0.84, "grad_norm": 2.5307032776601286, "learning_rate": 6.625079243244914e-07, "loss": 0.2651, "step": 13345 }, { "epoch": 0.84, "grad_norm": 4.159166297456615, "learning_rate": 6.62001383411377e-07, "loss": 0.2504, "step": 13346 }, { "epoch": 0.84, "grad_norm": 1.4547549816024652, "learning_rate": 6.6149502248953e-07, "loss": 0.2554, "step": 13347 }, { "epoch": 0.84, "grad_norm": 1.8706399694321145, "learning_rate": 6.609888415799598e-07, "loss": 0.25, "step": 13348 }, { "epoch": 0.84, "grad_norm": 0.6056446752643904, "learning_rate": 6.604828407036695e-07, "loss": 0.4846, "step": 13349 }, { "epoch": 0.84, "grad_norm": 4.340569330601539, "learning_rate": 6.599770198816547e-07, "loss": 0.2835, "step": 13350 }, { "epoch": 0.84, "grad_norm": 6.345268295357784, "learning_rate": 6.594713791349022e-07, "loss": 0.2702, "step": 13351 }, { "epoch": 0.84, "grad_norm": 3.0795552831337734, "learning_rate": 6.589659184843922e-07, "loss": 0.2562, "step": 13352 }, { "epoch": 0.84, "grad_norm": 1.5328508534892715, "learning_rate": 6.584606379510955e-07, "loss": 0.2458, "step": 13353 }, { "epoch": 0.84, "grad_norm": 1.522539358597986, "learning_rate": 6.579555375559793e-07, "loss": 0.2622, "step": 13354 }, { "epoch": 0.84, "grad_norm": 1.9604813076177836, "learning_rate": 6.574506173200008e-07, "loss": 0.2598, "step": 13355 }, { "epoch": 0.84, "grad_norm": 1.385096274681475, "learning_rate": 6.569458772641074e-07, "loss": 0.2707, "step": 13356 }, { "epoch": 0.84, "grad_norm": 3.670842291996775, "learning_rate": 6.564413174092443e-07, "loss": 0.279, "step": 13357 }, { "epoch": 0.84, "grad_norm": 1.42196166335391, "learning_rate": 6.55936937776347e-07, "loss": 0.2447, "step": 13358 }, { "epoch": 0.84, "grad_norm": 1.81297513134383, "learning_rate": 6.554327383863418e-07, "loss": 0.2646, "step": 13359 }, { "epoch": 0.84, "grad_norm": 1.9493446450335274, "learning_rate": 6.549287192601494e-07, "loss": 0.2555, "step": 13360 }, { "epoch": 0.84, "grad_norm": 1.8715200415549311, "learning_rate": 6.54424880418681e-07, "loss": 0.238, "step": 13361 }, { "epoch": 0.84, "grad_norm": 2.266689418164463, "learning_rate": 6.539212218828439e-07, "loss": 0.2708, "step": 13362 }, { "epoch": 0.84, "grad_norm": 2.5689967724985774, "learning_rate": 6.53417743673534e-07, "loss": 0.2632, "step": 13363 }, { "epoch": 0.84, "grad_norm": 2.131417965387195, "learning_rate": 6.529144458116432e-07, "loss": 0.2539, "step": 13364 }, { "epoch": 0.84, "grad_norm": 7.492466989934758, "learning_rate": 6.524113283180522e-07, "loss": 0.2598, "step": 13365 }, { "epoch": 0.84, "grad_norm": 2.6808072474302356, "learning_rate": 6.519083912136381e-07, "loss": 0.2658, "step": 13366 }, { "epoch": 0.84, "grad_norm": 1.5444482608135894, "learning_rate": 6.514056345192682e-07, "loss": 0.2466, "step": 13367 }, { "epoch": 0.84, "grad_norm": 2.100620416313012, "learning_rate": 6.50903058255803e-07, "loss": 0.2548, "step": 13368 }, { "epoch": 0.84, "grad_norm": 1.6240214069296524, "learning_rate": 6.504006624440934e-07, "loss": 0.2517, "step": 13369 }, { "epoch": 0.84, "grad_norm": 1.831067554539817, "learning_rate": 6.498984471049869e-07, "loss": 0.2366, "step": 13370 }, { "epoch": 0.84, "grad_norm": 2.425528581014166, "learning_rate": 6.493964122593193e-07, "loss": 0.276, "step": 13371 }, { "epoch": 0.84, "grad_norm": 1.7891392092614682, "learning_rate": 6.488945579279237e-07, "loss": 0.2584, "step": 13372 }, { "epoch": 0.84, "grad_norm": 3.0941617305695686, "learning_rate": 6.483928841316201e-07, "loss": 0.2413, "step": 13373 }, { "epoch": 0.84, "grad_norm": 1.8179463764117592, "learning_rate": 6.47891390891226e-07, "loss": 0.2757, "step": 13374 }, { "epoch": 0.84, "grad_norm": 1.5556348084502554, "learning_rate": 6.473900782275482e-07, "loss": 0.2557, "step": 13375 }, { "epoch": 0.84, "grad_norm": 2.2595594744221827, "learning_rate": 6.468889461613875e-07, "loss": 0.2805, "step": 13376 }, { "epoch": 0.84, "grad_norm": 1.4269481979911813, "learning_rate": 6.463879947135349e-07, "loss": 0.2396, "step": 13377 }, { "epoch": 0.84, "grad_norm": 3.5734877924953476, "learning_rate": 6.458872239047776e-07, "loss": 0.2814, "step": 13378 }, { "epoch": 0.84, "grad_norm": 6.752253659863759, "learning_rate": 6.453866337558939e-07, "loss": 0.2464, "step": 13379 }, { "epoch": 0.84, "grad_norm": 3.141801705001186, "learning_rate": 6.448862242876536e-07, "loss": 0.2844, "step": 13380 }, { "epoch": 0.84, "grad_norm": 2.0042754335064585, "learning_rate": 6.443859955208187e-07, "loss": 0.2642, "step": 13381 }, { "epoch": 0.84, "grad_norm": 2.36639946167784, "learning_rate": 6.438859474761461e-07, "loss": 0.261, "step": 13382 }, { "epoch": 0.84, "grad_norm": 1.7727691878110658, "learning_rate": 6.43386080174383e-07, "loss": 0.2633, "step": 13383 }, { "epoch": 0.84, "grad_norm": 2.5624324691703557, "learning_rate": 6.428863936362694e-07, "loss": 0.2822, "step": 13384 }, { "epoch": 0.84, "grad_norm": 2.512964644654413, "learning_rate": 6.423868878825373e-07, "loss": 0.2765, "step": 13385 }, { "epoch": 0.84, "grad_norm": 2.037510962546739, "learning_rate": 6.418875629339133e-07, "loss": 0.2756, "step": 13386 }, { "epoch": 0.84, "grad_norm": 2.0939592328857977, "learning_rate": 6.413884188111163e-07, "loss": 0.2613, "step": 13387 }, { "epoch": 0.84, "grad_norm": 3.3063107985055353, "learning_rate": 6.408894555348555e-07, "loss": 0.2566, "step": 13388 }, { "epoch": 0.84, "grad_norm": 2.2158554344475223, "learning_rate": 6.40390673125833e-07, "loss": 0.2719, "step": 13389 }, { "epoch": 0.84, "grad_norm": 1.8427488080983283, "learning_rate": 6.398920716047458e-07, "loss": 0.2431, "step": 13390 }, { "epoch": 0.84, "grad_norm": 1.3407186830334374, "learning_rate": 6.39393650992281e-07, "loss": 0.2597, "step": 13391 }, { "epoch": 0.84, "grad_norm": 3.373527650414928, "learning_rate": 6.388954113091195e-07, "loss": 0.2386, "step": 13392 }, { "epoch": 0.84, "grad_norm": 1.4077006452566116, "learning_rate": 6.383973525759318e-07, "loss": 0.2544, "step": 13393 }, { "epoch": 0.84, "grad_norm": 4.117246292700643, "learning_rate": 6.378994748133855e-07, "loss": 0.2587, "step": 13394 }, { "epoch": 0.84, "grad_norm": 2.6371280794912155, "learning_rate": 6.374017780421387e-07, "loss": 0.2621, "step": 13395 }, { "epoch": 0.84, "grad_norm": 2.445796724466462, "learning_rate": 6.369042622828408e-07, "loss": 0.2597, "step": 13396 }, { "epoch": 0.84, "grad_norm": 2.233675456331351, "learning_rate": 6.364069275561341e-07, "loss": 0.2545, "step": 13397 }, { "epoch": 0.84, "grad_norm": 8.886902055526752, "learning_rate": 6.359097738826559e-07, "loss": 0.2439, "step": 13398 }, { "epoch": 0.84, "grad_norm": 2.4013864313638993, "learning_rate": 6.354128012830319e-07, "loss": 0.2656, "step": 13399 }, { "epoch": 0.84, "grad_norm": 1.2310331489065025, "learning_rate": 6.349160097778839e-07, "loss": 0.2474, "step": 13400 }, { "epoch": 0.84, "grad_norm": 4.737654862163965, "learning_rate": 6.344193993878223e-07, "loss": 0.247, "step": 13401 }, { "epoch": 0.84, "grad_norm": 0.5846140988984052, "learning_rate": 6.339229701334543e-07, "loss": 0.4467, "step": 13402 }, { "epoch": 0.84, "grad_norm": 1.4099723567048166, "learning_rate": 6.334267220353779e-07, "loss": 0.2469, "step": 13403 }, { "epoch": 0.84, "grad_norm": 1.6596960463805375, "learning_rate": 6.329306551141834e-07, "loss": 0.2722, "step": 13404 }, { "epoch": 0.84, "grad_norm": 4.121620019677974, "learning_rate": 6.324347693904515e-07, "loss": 0.2378, "step": 13405 }, { "epoch": 0.84, "grad_norm": 1.9842353800341574, "learning_rate": 6.319390648847596e-07, "loss": 0.2748, "step": 13406 }, { "epoch": 0.84, "grad_norm": 3.082599947725112, "learning_rate": 6.314435416176745e-07, "loss": 0.2491, "step": 13407 }, { "epoch": 0.84, "grad_norm": 1.825142349968624, "learning_rate": 6.30948199609755e-07, "loss": 0.2587, "step": 13408 }, { "epoch": 0.84, "grad_norm": 1.5584881233554497, "learning_rate": 6.304530388815555e-07, "loss": 0.2518, "step": 13409 }, { "epoch": 0.84, "grad_norm": 1.3550523837795236, "learning_rate": 6.299580594536214e-07, "loss": 0.2402, "step": 13410 }, { "epoch": 0.84, "grad_norm": 13.775748833236635, "learning_rate": 6.294632613464891e-07, "loss": 0.2489, "step": 13411 }, { "epoch": 0.84, "grad_norm": 1.4795996961937647, "learning_rate": 6.289686445806897e-07, "loss": 0.2497, "step": 13412 }, { "epoch": 0.84, "grad_norm": 1.737286113311703, "learning_rate": 6.284742091767437e-07, "loss": 0.2534, "step": 13413 }, { "epoch": 0.84, "grad_norm": 2.0572373881665493, "learning_rate": 6.279799551551685e-07, "loss": 0.258, "step": 13414 }, { "epoch": 0.84, "grad_norm": 2.7420740158125616, "learning_rate": 6.274858825364693e-07, "loss": 0.2472, "step": 13415 }, { "epoch": 0.84, "grad_norm": 2.535841739020183, "learning_rate": 6.269919913411487e-07, "loss": 0.2548, "step": 13416 }, { "epoch": 0.84, "grad_norm": 2.37352567868235, "learning_rate": 6.264982815896964e-07, "loss": 0.2687, "step": 13417 }, { "epoch": 0.84, "grad_norm": 2.0267893266036907, "learning_rate": 6.260047533025998e-07, "loss": 0.2646, "step": 13418 }, { "epoch": 0.84, "grad_norm": 4.328546667924095, "learning_rate": 6.255114065003353e-07, "loss": 0.2637, "step": 13419 }, { "epoch": 0.84, "grad_norm": 2.644197023306434, "learning_rate": 6.250182412033723e-07, "loss": 0.2681, "step": 13420 }, { "epoch": 0.84, "grad_norm": 4.605194812598993, "learning_rate": 6.245252574321719e-07, "loss": 0.2687, "step": 13421 }, { "epoch": 0.84, "grad_norm": 2.342267842668547, "learning_rate": 6.24032455207192e-07, "loss": 0.2654, "step": 13422 }, { "epoch": 0.84, "grad_norm": 1.5008454522030399, "learning_rate": 6.235398345488769e-07, "loss": 0.2371, "step": 13423 }, { "epoch": 0.84, "grad_norm": 2.216123174695491, "learning_rate": 6.230473954776683e-07, "loss": 0.262, "step": 13424 }, { "epoch": 0.84, "grad_norm": 3.1194285837111244, "learning_rate": 6.225551380139966e-07, "loss": 0.241, "step": 13425 }, { "epoch": 0.84, "grad_norm": 1.9680174260662269, "learning_rate": 6.220630621782886e-07, "loss": 0.2508, "step": 13426 }, { "epoch": 0.84, "grad_norm": 2.765124437463708, "learning_rate": 6.215711679909603e-07, "loss": 0.2565, "step": 13427 }, { "epoch": 0.84, "grad_norm": 2.0869895718564586, "learning_rate": 6.210794554724209e-07, "loss": 0.2593, "step": 13428 }, { "epoch": 0.84, "grad_norm": 2.613307153027159, "learning_rate": 6.205879246430718e-07, "loss": 0.2364, "step": 13429 }, { "epoch": 0.84, "grad_norm": 1.7907274267962165, "learning_rate": 6.200965755233085e-07, "loss": 0.2368, "step": 13430 }, { "epoch": 0.84, "grad_norm": 2.0093124444776462, "learning_rate": 6.19605408133519e-07, "loss": 0.2706, "step": 13431 }, { "epoch": 0.84, "grad_norm": 2.8182818504541047, "learning_rate": 6.191144224940815e-07, "loss": 0.2419, "step": 13432 }, { "epoch": 0.84, "grad_norm": 2.4251340973267377, "learning_rate": 6.186236186253669e-07, "loss": 0.2584, "step": 13433 }, { "epoch": 0.84, "grad_norm": 1.4680766014110684, "learning_rate": 6.181329965477417e-07, "loss": 0.2584, "step": 13434 }, { "epoch": 0.84, "grad_norm": 16.852248030799743, "learning_rate": 6.176425562815613e-07, "loss": 0.273, "step": 13435 }, { "epoch": 0.84, "grad_norm": 2.2213768022193507, "learning_rate": 6.171522978471755e-07, "loss": 0.257, "step": 13436 }, { "epoch": 0.85, "grad_norm": 1.7927775350305657, "learning_rate": 6.166622212649248e-07, "loss": 0.2699, "step": 13437 }, { "epoch": 0.85, "grad_norm": 1.7780035068961886, "learning_rate": 6.161723265551439e-07, "loss": 0.2439, "step": 13438 }, { "epoch": 0.85, "grad_norm": 1.3467121746151032, "learning_rate": 6.156826137381605e-07, "loss": 0.2427, "step": 13439 }, { "epoch": 0.85, "grad_norm": 1.8487489334041787, "learning_rate": 6.151930828342933e-07, "loss": 0.2558, "step": 13440 }, { "epoch": 0.85, "grad_norm": 2.1103522994585706, "learning_rate": 6.147037338638523e-07, "loss": 0.2488, "step": 13441 }, { "epoch": 0.85, "grad_norm": 1.5836701096911896, "learning_rate": 6.142145668471434e-07, "loss": 0.248, "step": 13442 }, { "epoch": 0.85, "grad_norm": 7.656884641141751, "learning_rate": 6.137255818044618e-07, "loss": 0.2581, "step": 13443 }, { "epoch": 0.85, "grad_norm": 1.5778648933304922, "learning_rate": 6.132367787560972e-07, "loss": 0.2572, "step": 13444 }, { "epoch": 0.85, "grad_norm": 2.228521511300527, "learning_rate": 6.127481577223293e-07, "loss": 0.2729, "step": 13445 }, { "epoch": 0.85, "grad_norm": 1.9416217940728462, "learning_rate": 6.12259718723433e-07, "loss": 0.2716, "step": 13446 }, { "epoch": 0.85, "grad_norm": 21.189065981942754, "learning_rate": 6.117714617796755e-07, "loss": 0.2664, "step": 13447 }, { "epoch": 0.85, "grad_norm": 1.5858810560868786, "learning_rate": 6.11283386911315e-07, "loss": 0.2734, "step": 13448 }, { "epoch": 0.85, "grad_norm": 1.8034508169949752, "learning_rate": 6.107954941386002e-07, "loss": 0.2564, "step": 13449 }, { "epoch": 0.85, "grad_norm": 2.4459616840143066, "learning_rate": 6.103077834817778e-07, "loss": 0.2415, "step": 13450 }, { "epoch": 0.85, "grad_norm": 1.5788569586631858, "learning_rate": 6.098202549610821e-07, "loss": 0.2687, "step": 13451 }, { "epoch": 0.85, "grad_norm": 1.6741883371995105, "learning_rate": 6.09332908596742e-07, "loss": 0.2449, "step": 13452 }, { "epoch": 0.85, "grad_norm": 3.280979706528602, "learning_rate": 6.088457444089774e-07, "loss": 0.2589, "step": 13453 }, { "epoch": 0.85, "grad_norm": 2.0334725165814875, "learning_rate": 6.083587624180021e-07, "loss": 0.2751, "step": 13454 }, { "epoch": 0.85, "grad_norm": 2.0825827042994396, "learning_rate": 6.078719626440238e-07, "loss": 0.265, "step": 13455 }, { "epoch": 0.85, "grad_norm": 2.2214091746545925, "learning_rate": 6.073853451072387e-07, "loss": 0.253, "step": 13456 }, { "epoch": 0.85, "grad_norm": 5.350869610333454, "learning_rate": 6.068989098278366e-07, "loss": 0.2475, "step": 13457 }, { "epoch": 0.85, "grad_norm": 6.5054463094168895, "learning_rate": 6.064126568260026e-07, "loss": 0.2722, "step": 13458 }, { "epoch": 0.85, "grad_norm": 4.375200479008572, "learning_rate": 6.059265861219122e-07, "loss": 0.2624, "step": 13459 }, { "epoch": 0.85, "grad_norm": 1.491099614455404, "learning_rate": 6.054406977357308e-07, "loss": 0.2601, "step": 13460 }, { "epoch": 0.85, "grad_norm": 2.080453525909033, "learning_rate": 6.049549916876213e-07, "loss": 0.2557, "step": 13461 }, { "epoch": 0.85, "grad_norm": 1.7852621489247482, "learning_rate": 6.044694679977353e-07, "loss": 0.2426, "step": 13462 }, { "epoch": 0.85, "grad_norm": 3.50377367865537, "learning_rate": 6.03984126686219e-07, "loss": 0.2959, "step": 13463 }, { "epoch": 0.85, "grad_norm": 2.8221389980922518, "learning_rate": 6.034989677732095e-07, "loss": 0.2645, "step": 13464 }, { "epoch": 0.85, "grad_norm": 1.782062025100652, "learning_rate": 6.030139912788363e-07, "loss": 0.2419, "step": 13465 }, { "epoch": 0.85, "grad_norm": 1.7677736292569717, "learning_rate": 6.025291972232233e-07, "loss": 0.2638, "step": 13466 }, { "epoch": 0.85, "grad_norm": 2.0981586975400246, "learning_rate": 6.020445856264845e-07, "loss": 0.2484, "step": 13467 }, { "epoch": 0.85, "grad_norm": 5.568847040983956, "learning_rate": 6.015601565087265e-07, "loss": 0.2353, "step": 13468 }, { "epoch": 0.85, "grad_norm": 1.535192413258961, "learning_rate": 6.010759098900514e-07, "loss": 0.2656, "step": 13469 }, { "epoch": 0.85, "grad_norm": 1.9884449494052558, "learning_rate": 6.005918457905491e-07, "loss": 0.2595, "step": 13470 }, { "epoch": 0.85, "grad_norm": 1.727137316725863, "learning_rate": 6.001079642303059e-07, "loss": 0.2539, "step": 13471 }, { "epoch": 0.85, "grad_norm": 1.5961615519099264, "learning_rate": 5.996242652293987e-07, "loss": 0.2397, "step": 13472 }, { "epoch": 0.85, "grad_norm": 4.032893371999254, "learning_rate": 5.99140748807896e-07, "loss": 0.2435, "step": 13473 }, { "epoch": 0.85, "grad_norm": 1.7176294075536171, "learning_rate": 5.98657414985861e-07, "loss": 0.2473, "step": 13474 }, { "epoch": 0.85, "grad_norm": 1.4133225690296671, "learning_rate": 5.981742637833465e-07, "loss": 0.2331, "step": 13475 }, { "epoch": 0.85, "grad_norm": 3.8912674668183675, "learning_rate": 5.976912952204017e-07, "loss": 0.2321, "step": 13476 }, { "epoch": 0.85, "grad_norm": 1.7216631369538566, "learning_rate": 5.972085093170637e-07, "loss": 0.2466, "step": 13477 }, { "epoch": 0.85, "grad_norm": 4.1920283523365836, "learning_rate": 5.967259060933644e-07, "loss": 0.2559, "step": 13478 }, { "epoch": 0.85, "grad_norm": 0.6549034677481446, "learning_rate": 5.962434855693295e-07, "loss": 0.4658, "step": 13479 }, { "epoch": 0.85, "grad_norm": 1.6649424133775486, "learning_rate": 5.957612477649743e-07, "loss": 0.2529, "step": 13480 }, { "epoch": 0.85, "grad_norm": 3.3857659518307384, "learning_rate": 5.952791927003066e-07, "loss": 0.2601, "step": 13481 }, { "epoch": 0.85, "grad_norm": 1.6784070898508838, "learning_rate": 5.9479732039533e-07, "loss": 0.2544, "step": 13482 }, { "epoch": 0.85, "grad_norm": 5.187974414621806, "learning_rate": 5.94315630870036e-07, "loss": 0.2726, "step": 13483 }, { "epoch": 0.85, "grad_norm": 1.5241617959174945, "learning_rate": 5.938341241444123e-07, "loss": 0.2262, "step": 13484 }, { "epoch": 0.85, "grad_norm": 3.8372876965840628, "learning_rate": 5.93352800238437e-07, "loss": 0.2463, "step": 13485 }, { "epoch": 0.85, "grad_norm": 1.8398225505536796, "learning_rate": 5.928716591720813e-07, "loss": 0.2664, "step": 13486 }, { "epoch": 0.85, "grad_norm": 1.4749406718398648, "learning_rate": 5.92390700965309e-07, "loss": 0.2532, "step": 13487 }, { "epoch": 0.85, "grad_norm": 2.5737951844579836, "learning_rate": 5.91909925638075e-07, "loss": 0.2412, "step": 13488 }, { "epoch": 0.85, "grad_norm": 2.508596175108206, "learning_rate": 5.914293332103266e-07, "loss": 0.2619, "step": 13489 }, { "epoch": 0.85, "grad_norm": 3.189303442915712, "learning_rate": 5.90948923702006e-07, "loss": 0.2576, "step": 13490 }, { "epoch": 0.85, "grad_norm": 1.539680287597827, "learning_rate": 5.90468697133047e-07, "loss": 0.2418, "step": 13491 }, { "epoch": 0.85, "grad_norm": 1.6590691911248443, "learning_rate": 5.89988653523374e-07, "loss": 0.2508, "step": 13492 }, { "epoch": 0.85, "grad_norm": 1.7274892066635184, "learning_rate": 5.895087928929033e-07, "loss": 0.2595, "step": 13493 }, { "epoch": 0.85, "grad_norm": 1.7782888705814661, "learning_rate": 5.890291152615479e-07, "loss": 0.2515, "step": 13494 }, { "epoch": 0.85, "grad_norm": 2.8518785472865065, "learning_rate": 5.885496206492097e-07, "loss": 0.2582, "step": 13495 }, { "epoch": 0.85, "grad_norm": 1.550453497656156, "learning_rate": 5.880703090757833e-07, "loss": 0.2589, "step": 13496 }, { "epoch": 0.85, "grad_norm": 2.3147100621447727, "learning_rate": 5.875911805611551e-07, "loss": 0.2595, "step": 13497 }, { "epoch": 0.85, "grad_norm": 2.3720167509519614, "learning_rate": 5.871122351252056e-07, "loss": 0.2464, "step": 13498 }, { "epoch": 0.85, "grad_norm": 1.546748493072562, "learning_rate": 5.866334727878093e-07, "loss": 0.2449, "step": 13499 }, { "epoch": 0.85, "grad_norm": 1.3805248661240959, "learning_rate": 5.861548935688288e-07, "loss": 0.2417, "step": 13500 }, { "epoch": 0.85, "grad_norm": 5.565451613317308, "learning_rate": 5.856764974881213e-07, "loss": 0.261, "step": 13501 }, { "epoch": 0.85, "grad_norm": 4.48745688257381, "learning_rate": 5.851982845655368e-07, "loss": 0.2569, "step": 13502 }, { "epoch": 0.85, "grad_norm": 1.2245977594989972, "learning_rate": 5.847202548209174e-07, "loss": 0.2456, "step": 13503 }, { "epoch": 0.85, "grad_norm": 1.8972157072057165, "learning_rate": 5.842424082740972e-07, "loss": 0.2516, "step": 13504 }, { "epoch": 0.85, "grad_norm": 3.8840926956516535, "learning_rate": 5.837647449449019e-07, "loss": 0.241, "step": 13505 }, { "epoch": 0.85, "grad_norm": 1.6658739249500758, "learning_rate": 5.832872648531512e-07, "loss": 0.2783, "step": 13506 }, { "epoch": 0.85, "grad_norm": 2.0168126895491363, "learning_rate": 5.828099680186577e-07, "loss": 0.2419, "step": 13507 }, { "epoch": 0.85, "grad_norm": 1.9599077828272837, "learning_rate": 5.823328544612245e-07, "loss": 0.2429, "step": 13508 }, { "epoch": 0.85, "grad_norm": 1.7095248415156747, "learning_rate": 5.818559242006472e-07, "loss": 0.2533, "step": 13509 }, { "epoch": 0.85, "grad_norm": 1.7421267957726232, "learning_rate": 5.813791772567157e-07, "loss": 0.2464, "step": 13510 }, { "epoch": 0.85, "grad_norm": 1.800904870901792, "learning_rate": 5.809026136492107e-07, "loss": 0.2547, "step": 13511 }, { "epoch": 0.85, "grad_norm": 3.881181483130517, "learning_rate": 5.804262333979044e-07, "loss": 0.2457, "step": 13512 }, { "epoch": 0.85, "grad_norm": 4.552193320850763, "learning_rate": 5.799500365225647e-07, "loss": 0.2456, "step": 13513 }, { "epoch": 0.85, "grad_norm": 1.77824722130231, "learning_rate": 5.794740230429475e-07, "loss": 0.257, "step": 13514 }, { "epoch": 0.85, "grad_norm": 2.251834361956646, "learning_rate": 5.789981929788063e-07, "loss": 0.2541, "step": 13515 }, { "epoch": 0.85, "grad_norm": 1.7435382526825194, "learning_rate": 5.785225463498828e-07, "loss": 0.2603, "step": 13516 }, { "epoch": 0.85, "grad_norm": 1.6986746749677004, "learning_rate": 5.780470831759111e-07, "loss": 0.2572, "step": 13517 }, { "epoch": 0.85, "grad_norm": 4.969212550955034, "learning_rate": 5.775718034766209e-07, "loss": 0.2665, "step": 13518 }, { "epoch": 0.85, "grad_norm": 1.6106812182464696, "learning_rate": 5.77096707271732e-07, "loss": 0.267, "step": 13519 }, { "epoch": 0.85, "grad_norm": 2.7315584862627755, "learning_rate": 5.766217945809554e-07, "loss": 0.2478, "step": 13520 }, { "epoch": 0.85, "grad_norm": 2.6835443075396217, "learning_rate": 5.761470654239987e-07, "loss": 0.2552, "step": 13521 }, { "epoch": 0.85, "grad_norm": 1.980618816403487, "learning_rate": 5.756725198205565e-07, "loss": 0.2636, "step": 13522 }, { "epoch": 0.85, "grad_norm": 3.3422479895103936, "learning_rate": 5.751981577903216e-07, "loss": 0.2599, "step": 13523 }, { "epoch": 0.85, "grad_norm": 1.5699725414163723, "learning_rate": 5.747239793529735e-07, "loss": 0.2315, "step": 13524 }, { "epoch": 0.85, "grad_norm": 1.614291219139042, "learning_rate": 5.742499845281874e-07, "loss": 0.2545, "step": 13525 }, { "epoch": 0.85, "grad_norm": 2.364417162361661, "learning_rate": 5.737761733356312e-07, "loss": 0.2654, "step": 13526 }, { "epoch": 0.85, "grad_norm": 1.5999843211238662, "learning_rate": 5.733025457949625e-07, "loss": 0.231, "step": 13527 }, { "epoch": 0.85, "grad_norm": 2.106616621026424, "learning_rate": 5.728291019258347e-07, "loss": 0.263, "step": 13528 }, { "epoch": 0.85, "grad_norm": 1.9067278643917807, "learning_rate": 5.723558417478914e-07, "loss": 0.2597, "step": 13529 }, { "epoch": 0.85, "grad_norm": 1.8953330005585292, "learning_rate": 5.718827652807673e-07, "loss": 0.256, "step": 13530 }, { "epoch": 0.85, "grad_norm": 2.104440011041646, "learning_rate": 5.714098725440936e-07, "loss": 0.2536, "step": 13531 }, { "epoch": 0.85, "grad_norm": 2.749262134426467, "learning_rate": 5.709371635574907e-07, "loss": 0.2563, "step": 13532 }, { "epoch": 0.85, "grad_norm": 0.5855358582502723, "learning_rate": 5.704646383405698e-07, "loss": 0.4516, "step": 13533 }, { "epoch": 0.85, "grad_norm": 1.4983159479219632, "learning_rate": 5.699922969129406e-07, "loss": 0.2454, "step": 13534 }, { "epoch": 0.85, "grad_norm": 3.589545383496322, "learning_rate": 5.695201392941985e-07, "loss": 0.2669, "step": 13535 }, { "epoch": 0.85, "grad_norm": 1.378932170923166, "learning_rate": 5.690481655039359e-07, "loss": 0.2542, "step": 13536 }, { "epoch": 0.85, "grad_norm": 2.2701012800953952, "learning_rate": 5.685763755617357e-07, "loss": 0.2683, "step": 13537 }, { "epoch": 0.85, "grad_norm": 2.00697994797012, "learning_rate": 5.68104769487171e-07, "loss": 0.2499, "step": 13538 }, { "epoch": 0.85, "grad_norm": 2.9762545860281695, "learning_rate": 5.676333472998131e-07, "loss": 0.2611, "step": 13539 }, { "epoch": 0.85, "grad_norm": 3.0532568180829864, "learning_rate": 5.671621090192203e-07, "loss": 0.2431, "step": 13540 }, { "epoch": 0.85, "grad_norm": 5.670252802575042, "learning_rate": 5.66691054664944e-07, "loss": 0.2341, "step": 13541 }, { "epoch": 0.85, "grad_norm": 2.7785526737823396, "learning_rate": 5.662201842565301e-07, "loss": 0.2701, "step": 13542 }, { "epoch": 0.85, "grad_norm": 2.335948586720108, "learning_rate": 5.657494978135169e-07, "loss": 0.2658, "step": 13543 }, { "epoch": 0.85, "grad_norm": 1.6821751756813734, "learning_rate": 5.652789953554338e-07, "loss": 0.2459, "step": 13544 }, { "epoch": 0.85, "grad_norm": 2.7165543974639266, "learning_rate": 5.648086769018019e-07, "loss": 0.2508, "step": 13545 }, { "epoch": 0.85, "grad_norm": 1.3152126272537419, "learning_rate": 5.643385424721342e-07, "loss": 0.243, "step": 13546 }, { "epoch": 0.85, "grad_norm": 2.6376253550899063, "learning_rate": 5.63868592085941e-07, "loss": 0.2722, "step": 13547 }, { "epoch": 0.85, "grad_norm": 2.2887753226860474, "learning_rate": 5.633988257627187e-07, "loss": 0.2433, "step": 13548 }, { "epoch": 0.85, "grad_norm": 0.577455232390544, "learning_rate": 5.629292435219586e-07, "loss": 0.4639, "step": 13549 }, { "epoch": 0.85, "grad_norm": 1.8079642268829752, "learning_rate": 5.624598453831453e-07, "loss": 0.2437, "step": 13550 }, { "epoch": 0.85, "grad_norm": 1.853634505212085, "learning_rate": 5.619906313657558e-07, "loss": 0.2513, "step": 13551 }, { "epoch": 0.85, "grad_norm": 2.0104315005559203, "learning_rate": 5.615216014892577e-07, "loss": 0.2434, "step": 13552 }, { "epoch": 0.85, "grad_norm": 1.7080766982438698, "learning_rate": 5.610527557731126e-07, "loss": 0.2691, "step": 13553 }, { "epoch": 0.85, "grad_norm": 1.636742689216997, "learning_rate": 5.605840942367713e-07, "loss": 0.2523, "step": 13554 }, { "epoch": 0.85, "grad_norm": 2.7398274197926993, "learning_rate": 5.60115616899683e-07, "loss": 0.2946, "step": 13555 }, { "epoch": 0.85, "grad_norm": 1.470485277610429, "learning_rate": 5.596473237812833e-07, "loss": 0.2481, "step": 13556 }, { "epoch": 0.85, "grad_norm": 2.2016074473377323, "learning_rate": 5.591792149010022e-07, "loss": 0.2522, "step": 13557 }, { "epoch": 0.85, "grad_norm": 2.0406811642020126, "learning_rate": 5.587112902782638e-07, "loss": 0.2464, "step": 13558 }, { "epoch": 0.85, "grad_norm": 1.7116847885826194, "learning_rate": 5.582435499324829e-07, "loss": 0.2581, "step": 13559 }, { "epoch": 0.85, "grad_norm": 2.439569701263958, "learning_rate": 5.577759938830668e-07, "loss": 0.277, "step": 13560 }, { "epoch": 0.85, "grad_norm": 2.3058979220050366, "learning_rate": 5.57308622149415e-07, "loss": 0.2702, "step": 13561 }, { "epoch": 0.85, "grad_norm": 1.5615495482226585, "learning_rate": 5.568414347509188e-07, "loss": 0.2566, "step": 13562 }, { "epoch": 0.85, "grad_norm": 2.491320604001727, "learning_rate": 5.563744317069642e-07, "loss": 0.2455, "step": 13563 }, { "epoch": 0.85, "grad_norm": 1.9199517882555668, "learning_rate": 5.559076130369273e-07, "loss": 0.2255, "step": 13564 }, { "epoch": 0.85, "grad_norm": 2.3006377515034426, "learning_rate": 5.554409787601755e-07, "loss": 0.242, "step": 13565 }, { "epoch": 0.85, "grad_norm": 3.6010076661097696, "learning_rate": 5.549745288960729e-07, "loss": 0.2485, "step": 13566 }, { "epoch": 0.85, "grad_norm": 2.113651352580854, "learning_rate": 5.545082634639726e-07, "loss": 0.2385, "step": 13567 }, { "epoch": 0.85, "grad_norm": 1.8076755945116019, "learning_rate": 5.54042182483221e-07, "loss": 0.2542, "step": 13568 }, { "epoch": 0.85, "grad_norm": 1.5008753258119847, "learning_rate": 5.535762859731547e-07, "loss": 0.2428, "step": 13569 }, { "epoch": 0.85, "grad_norm": 2.2858870591617326, "learning_rate": 5.531105739531073e-07, "loss": 0.2642, "step": 13570 }, { "epoch": 0.85, "grad_norm": 2.312244975118548, "learning_rate": 5.52645046442401e-07, "loss": 0.2473, "step": 13571 }, { "epoch": 0.85, "grad_norm": 2.3237576285969577, "learning_rate": 5.521797034603499e-07, "loss": 0.2434, "step": 13572 }, { "epoch": 0.85, "grad_norm": 1.5538232390013953, "learning_rate": 5.517145450262639e-07, "loss": 0.2557, "step": 13573 }, { "epoch": 0.85, "grad_norm": 2.0129384304103244, "learning_rate": 5.512495711594418e-07, "loss": 0.2396, "step": 13574 }, { "epoch": 0.85, "grad_norm": 1.6514149107581721, "learning_rate": 5.507847818791778e-07, "loss": 0.2658, "step": 13575 }, { "epoch": 0.85, "grad_norm": 1.6918203015153468, "learning_rate": 5.503201772047556e-07, "loss": 0.2524, "step": 13576 }, { "epoch": 0.85, "grad_norm": 0.6636673205522431, "learning_rate": 5.498557571554519e-07, "loss": 0.4798, "step": 13577 }, { "epoch": 0.85, "grad_norm": 2.565588593550987, "learning_rate": 5.493915217505386e-07, "loss": 0.2524, "step": 13578 }, { "epoch": 0.85, "grad_norm": 2.7679157801907177, "learning_rate": 5.489274710092746e-07, "loss": 0.2763, "step": 13579 }, { "epoch": 0.85, "grad_norm": 1.4383566699951482, "learning_rate": 5.484636049509173e-07, "loss": 0.2352, "step": 13580 }, { "epoch": 0.85, "grad_norm": 1.800519061403932, "learning_rate": 5.47999923594712e-07, "loss": 0.2356, "step": 13581 }, { "epoch": 0.85, "grad_norm": 0.5724452368813069, "learning_rate": 5.475364269598959e-07, "loss": 0.4391, "step": 13582 }, { "epoch": 0.85, "grad_norm": 2.7872398232081483, "learning_rate": 5.470731150657033e-07, "loss": 0.2431, "step": 13583 }, { "epoch": 0.85, "grad_norm": 1.934105082982598, "learning_rate": 5.466099879313563e-07, "loss": 0.2714, "step": 13584 }, { "epoch": 0.85, "grad_norm": 3.8558269694509377, "learning_rate": 5.461470455760698e-07, "loss": 0.2742, "step": 13585 }, { "epoch": 0.85, "grad_norm": 1.7107592585793334, "learning_rate": 5.45684288019055e-07, "loss": 0.2502, "step": 13586 }, { "epoch": 0.85, "grad_norm": 1.9950131298505234, "learning_rate": 5.452217152795092e-07, "loss": 0.2433, "step": 13587 }, { "epoch": 0.85, "grad_norm": 2.3131958912287827, "learning_rate": 5.447593273766283e-07, "loss": 0.2603, "step": 13588 }, { "epoch": 0.85, "grad_norm": 4.12074925694779, "learning_rate": 5.442971243295964e-07, "loss": 0.248, "step": 13589 }, { "epoch": 0.85, "grad_norm": 1.9615825929841024, "learning_rate": 5.438351061575897e-07, "loss": 0.2313, "step": 13590 }, { "epoch": 0.85, "grad_norm": 2.175260952878419, "learning_rate": 5.433732728797808e-07, "loss": 0.2481, "step": 13591 }, { "epoch": 0.85, "grad_norm": 2.0754082213129466, "learning_rate": 5.429116245153304e-07, "loss": 0.2584, "step": 13592 }, { "epoch": 0.85, "grad_norm": 1.691298911844114, "learning_rate": 5.424501610833921e-07, "loss": 0.2483, "step": 13593 }, { "epoch": 0.85, "grad_norm": 1.6745785689146537, "learning_rate": 5.41988882603115e-07, "loss": 0.2606, "step": 13594 }, { "epoch": 0.85, "grad_norm": 2.6332867781682845, "learning_rate": 5.415277890936377e-07, "loss": 0.2592, "step": 13595 }, { "epoch": 0.86, "grad_norm": 1.6742408535272966, "learning_rate": 5.410668805740921e-07, "loss": 0.2464, "step": 13596 }, { "epoch": 0.86, "grad_norm": 5.014704593624091, "learning_rate": 5.406061570636012e-07, "loss": 0.3037, "step": 13597 }, { "epoch": 0.86, "grad_norm": 2.3033209178746947, "learning_rate": 5.401456185812809e-07, "loss": 0.2595, "step": 13598 }, { "epoch": 0.86, "grad_norm": 3.3024031502079834, "learning_rate": 5.396852651462409e-07, "loss": 0.2795, "step": 13599 }, { "epoch": 0.86, "grad_norm": 0.6235627715437748, "learning_rate": 5.392250967775825e-07, "loss": 0.4798, "step": 13600 }, { "epoch": 0.86, "grad_norm": 1.7957041900238733, "learning_rate": 5.387651134943966e-07, "loss": 0.2478, "step": 13601 }, { "epoch": 0.86, "grad_norm": 3.086923984648585, "learning_rate": 5.383053153157703e-07, "loss": 0.2586, "step": 13602 }, { "epoch": 0.86, "grad_norm": 3.866657186222648, "learning_rate": 5.378457022607819e-07, "loss": 0.2716, "step": 13603 }, { "epoch": 0.86, "grad_norm": 1.7137875550053123, "learning_rate": 5.373862743485014e-07, "loss": 0.2462, "step": 13604 }, { "epoch": 0.86, "grad_norm": 1.3864030986668119, "learning_rate": 5.369270315979908e-07, "loss": 0.2356, "step": 13605 }, { "epoch": 0.86, "grad_norm": 2.4348947069003946, "learning_rate": 5.364679740283041e-07, "loss": 0.2462, "step": 13606 }, { "epoch": 0.86, "grad_norm": 1.5469037905580496, "learning_rate": 5.360091016584901e-07, "loss": 0.2552, "step": 13607 }, { "epoch": 0.86, "grad_norm": 1.607533664012968, "learning_rate": 5.355504145075874e-07, "loss": 0.2375, "step": 13608 }, { "epoch": 0.86, "grad_norm": 1.5724723128126032, "learning_rate": 5.350919125946269e-07, "loss": 0.2497, "step": 13609 }, { "epoch": 0.86, "grad_norm": 2.0558780389893303, "learning_rate": 5.346335959386333e-07, "loss": 0.2488, "step": 13610 }, { "epoch": 0.86, "grad_norm": 1.9389550348105287, "learning_rate": 5.341754645586244e-07, "loss": 0.2505, "step": 13611 }, { "epoch": 0.86, "grad_norm": 4.871688509492094, "learning_rate": 5.337175184736077e-07, "loss": 0.2274, "step": 13612 }, { "epoch": 0.86, "grad_norm": 3.4524242667794094, "learning_rate": 5.332597577025845e-07, "loss": 0.2704, "step": 13613 }, { "epoch": 0.86, "grad_norm": 1.7026679359135917, "learning_rate": 5.328021822645463e-07, "loss": 0.262, "step": 13614 }, { "epoch": 0.86, "grad_norm": 1.9682632115171763, "learning_rate": 5.323447921784813e-07, "loss": 0.2597, "step": 13615 }, { "epoch": 0.86, "grad_norm": 2.8148672515326587, "learning_rate": 5.318875874633661e-07, "loss": 0.2491, "step": 13616 }, { "epoch": 0.86, "grad_norm": 1.560120130082495, "learning_rate": 5.314305681381704e-07, "loss": 0.2768, "step": 13617 }, { "epoch": 0.86, "grad_norm": 12.424553683391421, "learning_rate": 5.309737342218574e-07, "loss": 0.2412, "step": 13618 }, { "epoch": 0.86, "grad_norm": 1.572884717040157, "learning_rate": 5.30517085733383e-07, "loss": 0.2529, "step": 13619 }, { "epoch": 0.86, "grad_norm": 5.6180434518983775, "learning_rate": 5.300606226916938e-07, "loss": 0.2694, "step": 13620 }, { "epoch": 0.86, "grad_norm": 2.0940017996641846, "learning_rate": 5.296043451157285e-07, "loss": 0.2543, "step": 13621 }, { "epoch": 0.86, "grad_norm": 3.3697832821973646, "learning_rate": 5.29148253024418e-07, "loss": 0.2648, "step": 13622 }, { "epoch": 0.86, "grad_norm": 1.9218459094353313, "learning_rate": 5.286923464366889e-07, "loss": 0.2452, "step": 13623 }, { "epoch": 0.86, "grad_norm": 2.248390007409386, "learning_rate": 5.282366253714555e-07, "loss": 0.2567, "step": 13624 }, { "epoch": 0.86, "grad_norm": 1.1861708252662804, "learning_rate": 5.277810898476283e-07, "loss": 0.2561, "step": 13625 }, { "epoch": 0.86, "grad_norm": 2.343048640813765, "learning_rate": 5.273257398841059e-07, "loss": 0.2496, "step": 13626 }, { "epoch": 0.86, "grad_norm": 4.602411140422076, "learning_rate": 5.268705754997838e-07, "loss": 0.2558, "step": 13627 }, { "epoch": 0.86, "grad_norm": 1.6285792398465866, "learning_rate": 5.26415596713547e-07, "loss": 0.2552, "step": 13628 }, { "epoch": 0.86, "grad_norm": 1.755575220971988, "learning_rate": 5.259608035442726e-07, "loss": 0.2617, "step": 13629 }, { "epoch": 0.86, "grad_norm": 2.0699700530282445, "learning_rate": 5.255061960108309e-07, "loss": 0.2441, "step": 13630 }, { "epoch": 0.86, "grad_norm": 2.030379181313497, "learning_rate": 5.250517741320854e-07, "loss": 0.2528, "step": 13631 }, { "epoch": 0.86, "grad_norm": 2.351254439335697, "learning_rate": 5.245975379268892e-07, "loss": 0.247, "step": 13632 }, { "epoch": 0.86, "grad_norm": 1.2964371653384898, "learning_rate": 5.241434874140916e-07, "loss": 0.2484, "step": 13633 }, { "epoch": 0.86, "grad_norm": 2.1064082182356647, "learning_rate": 5.236896226125293e-07, "loss": 0.2584, "step": 13634 }, { "epoch": 0.86, "grad_norm": 2.374465623231935, "learning_rate": 5.232359435410366e-07, "loss": 0.2646, "step": 13635 }, { "epoch": 0.86, "grad_norm": 1.9347914803804405, "learning_rate": 5.227824502184364e-07, "loss": 0.2734, "step": 13636 }, { "epoch": 0.86, "grad_norm": 3.2171314538639177, "learning_rate": 5.223291426635447e-07, "loss": 0.2626, "step": 13637 }, { "epoch": 0.86, "grad_norm": 2.2351019414444853, "learning_rate": 5.21876020895169e-07, "loss": 0.2675, "step": 13638 }, { "epoch": 0.86, "grad_norm": 2.2136681821585618, "learning_rate": 5.214230849321111e-07, "loss": 0.2506, "step": 13639 }, { "epoch": 0.86, "grad_norm": 2.83766489256189, "learning_rate": 5.209703347931655e-07, "loss": 0.2735, "step": 13640 }, { "epoch": 0.86, "grad_norm": 3.174020625700449, "learning_rate": 5.205177704971159e-07, "loss": 0.2671, "step": 13641 }, { "epoch": 0.86, "grad_norm": 2.430670413441794, "learning_rate": 5.200653920627402e-07, "loss": 0.2969, "step": 13642 }, { "epoch": 0.86, "grad_norm": 1.3900125754278547, "learning_rate": 5.196131995088089e-07, "loss": 0.2399, "step": 13643 }, { "epoch": 0.86, "grad_norm": 2.2168712897411376, "learning_rate": 5.191611928540846e-07, "loss": 0.2571, "step": 13644 }, { "epoch": 0.86, "grad_norm": 2.418268250393664, "learning_rate": 5.187093721173198e-07, "loss": 0.2576, "step": 13645 }, { "epoch": 0.86, "grad_norm": 3.0594401712808112, "learning_rate": 5.182577373172642e-07, "loss": 0.2432, "step": 13646 }, { "epoch": 0.86, "grad_norm": 3.3299983009554044, "learning_rate": 5.178062884726547e-07, "loss": 0.2425, "step": 13647 }, { "epoch": 0.86, "grad_norm": 1.743647726617732, "learning_rate": 5.173550256022241e-07, "loss": 0.2393, "step": 13648 }, { "epoch": 0.86, "grad_norm": 2.8736867425503996, "learning_rate": 5.169039487246958e-07, "loss": 0.2661, "step": 13649 }, { "epoch": 0.86, "grad_norm": 1.7689666442057652, "learning_rate": 5.164530578587845e-07, "loss": 0.2541, "step": 13650 }, { "epoch": 0.86, "grad_norm": 26.14275002768576, "learning_rate": 5.160023530232005e-07, "loss": 0.2637, "step": 13651 }, { "epoch": 0.86, "grad_norm": 1.4701743228404707, "learning_rate": 5.155518342366434e-07, "loss": 0.2543, "step": 13652 }, { "epoch": 0.86, "grad_norm": 3.1312873255106126, "learning_rate": 5.151015015178046e-07, "loss": 0.2821, "step": 13653 }, { "epoch": 0.86, "grad_norm": 12.76115379036279, "learning_rate": 5.14651354885371e-07, "loss": 0.233, "step": 13654 }, { "epoch": 0.86, "grad_norm": 1.5255376443447688, "learning_rate": 5.142013943580205e-07, "loss": 0.2645, "step": 13655 }, { "epoch": 0.86, "grad_norm": 1.810116332603644, "learning_rate": 5.137516199544218e-07, "loss": 0.2718, "step": 13656 }, { "epoch": 0.86, "grad_norm": 1.7718179422281233, "learning_rate": 5.133020316932369e-07, "loss": 0.2302, "step": 13657 }, { "epoch": 0.86, "grad_norm": 2.1572912137807267, "learning_rate": 5.128526295931191e-07, "loss": 0.2445, "step": 13658 }, { "epoch": 0.86, "grad_norm": 4.02531717500136, "learning_rate": 5.124034136727163e-07, "loss": 0.2533, "step": 13659 }, { "epoch": 0.86, "grad_norm": 2.6125455807078777, "learning_rate": 5.119543839506668e-07, "loss": 0.2394, "step": 13660 }, { "epoch": 0.86, "grad_norm": 2.12656228148672, "learning_rate": 5.115055404456009e-07, "loss": 0.2765, "step": 13661 }, { "epoch": 0.86, "grad_norm": 1.8159771758577405, "learning_rate": 5.110568831761426e-07, "loss": 0.24, "step": 13662 }, { "epoch": 0.86, "grad_norm": 5.327296188057833, "learning_rate": 5.106084121609084e-07, "loss": 0.2397, "step": 13663 }, { "epoch": 0.86, "grad_norm": 5.4702360420956335, "learning_rate": 5.101601274185053e-07, "loss": 0.2505, "step": 13664 }, { "epoch": 0.86, "grad_norm": 2.1902812194953794, "learning_rate": 5.097120289675333e-07, "loss": 0.2713, "step": 13665 }, { "epoch": 0.86, "grad_norm": 1.485408023468269, "learning_rate": 5.092641168265839e-07, "loss": 0.2488, "step": 13666 }, { "epoch": 0.86, "grad_norm": 2.7370814254015277, "learning_rate": 5.088163910142436e-07, "loss": 0.26, "step": 13667 }, { "epoch": 0.86, "grad_norm": 1.8861457859821287, "learning_rate": 5.083688515490881e-07, "loss": 0.2478, "step": 13668 }, { "epoch": 0.86, "grad_norm": 1.9405465602943042, "learning_rate": 5.079214984496866e-07, "loss": 0.2441, "step": 13669 }, { "epoch": 0.86, "grad_norm": 0.5700041359223784, "learning_rate": 5.074743317346009e-07, "loss": 0.4724, "step": 13670 }, { "epoch": 0.86, "grad_norm": 1.9281739077401292, "learning_rate": 5.070273514223856e-07, "loss": 0.2599, "step": 13671 }, { "epoch": 0.86, "grad_norm": 6.494583527548455, "learning_rate": 5.065805575315863e-07, "loss": 0.2602, "step": 13672 }, { "epoch": 0.86, "grad_norm": 1.7839197396281787, "learning_rate": 5.061339500807405e-07, "loss": 0.2468, "step": 13673 }, { "epoch": 0.86, "grad_norm": 1.8851465665212364, "learning_rate": 5.056875290883778e-07, "loss": 0.2432, "step": 13674 }, { "epoch": 0.86, "grad_norm": 1.9874666537147156, "learning_rate": 5.05241294573024e-07, "loss": 0.2552, "step": 13675 }, { "epoch": 0.86, "grad_norm": 2.640552388955175, "learning_rate": 5.047952465531913e-07, "loss": 0.2825, "step": 13676 }, { "epoch": 0.86, "grad_norm": 1.8929427231370501, "learning_rate": 5.04349385047389e-07, "loss": 0.2486, "step": 13677 }, { "epoch": 0.86, "grad_norm": 4.17727300472136, "learning_rate": 5.039037100741151e-07, "loss": 0.2703, "step": 13678 }, { "epoch": 0.86, "grad_norm": 2.327589387383158, "learning_rate": 5.03458221651863e-07, "loss": 0.2643, "step": 13679 }, { "epoch": 0.86, "grad_norm": 4.17373665414888, "learning_rate": 5.030129197991162e-07, "loss": 0.2398, "step": 13680 }, { "epoch": 0.86, "grad_norm": 2.434944858566004, "learning_rate": 5.025678045343507e-07, "loss": 0.248, "step": 13681 }, { "epoch": 0.86, "grad_norm": 1.5754890071326177, "learning_rate": 5.021228758760344e-07, "loss": 0.2445, "step": 13682 }, { "epoch": 0.86, "grad_norm": 1.7873413417206307, "learning_rate": 5.016781338426302e-07, "loss": 0.256, "step": 13683 }, { "epoch": 0.86, "grad_norm": 2.3658169989413795, "learning_rate": 5.01233578452589e-07, "loss": 0.2623, "step": 13684 }, { "epoch": 0.86, "grad_norm": 2.8189939340242294, "learning_rate": 5.007892097243588e-07, "loss": 0.256, "step": 13685 }, { "epoch": 0.86, "grad_norm": 2.9382916850983416, "learning_rate": 5.003450276763744e-07, "loss": 0.2364, "step": 13686 }, { "epoch": 0.86, "grad_norm": 1.8440640191006197, "learning_rate": 4.999010323270681e-07, "loss": 0.2578, "step": 13687 }, { "epoch": 0.86, "grad_norm": 0.6054567038581964, "learning_rate": 4.994572236948608e-07, "loss": 0.4699, "step": 13688 }, { "epoch": 0.86, "grad_norm": 2.129324880100568, "learning_rate": 4.990136017981678e-07, "loss": 0.253, "step": 13689 }, { "epoch": 0.86, "grad_norm": 1.9361134202184722, "learning_rate": 4.985701666553938e-07, "loss": 0.2667, "step": 13690 }, { "epoch": 0.86, "grad_norm": 1.8162227472397376, "learning_rate": 4.981269182849391e-07, "loss": 0.2513, "step": 13691 }, { "epoch": 0.86, "grad_norm": 1.6966984178058855, "learning_rate": 4.976838567051956e-07, "loss": 0.2838, "step": 13692 }, { "epoch": 0.86, "grad_norm": 1.6477148137784117, "learning_rate": 4.972409819345458e-07, "loss": 0.2677, "step": 13693 }, { "epoch": 0.86, "grad_norm": 1.4327947666479184, "learning_rate": 4.967982939913646e-07, "loss": 0.2505, "step": 13694 }, { "epoch": 0.86, "grad_norm": 2.744999869600108, "learning_rate": 4.963557928940215e-07, "loss": 0.2514, "step": 13695 }, { "epoch": 0.86, "grad_norm": 1.366089183714872, "learning_rate": 4.959134786608766e-07, "loss": 0.2442, "step": 13696 }, { "epoch": 0.86, "grad_norm": 1.657426892235389, "learning_rate": 4.95471351310281e-07, "loss": 0.2579, "step": 13697 }, { "epoch": 0.86, "grad_norm": 2.26204346002552, "learning_rate": 4.950294108605791e-07, "loss": 0.2478, "step": 13698 }, { "epoch": 0.86, "grad_norm": 1.7537968602009564, "learning_rate": 4.94587657330109e-07, "loss": 0.2577, "step": 13699 }, { "epoch": 0.86, "grad_norm": 1.5656007558597471, "learning_rate": 4.941460907372004e-07, "loss": 0.2605, "step": 13700 }, { "epoch": 0.86, "grad_norm": 1.9827331043292564, "learning_rate": 4.937047111001736e-07, "loss": 0.248, "step": 13701 }, { "epoch": 0.86, "grad_norm": 1.591557031894639, "learning_rate": 4.932635184373413e-07, "loss": 0.2339, "step": 13702 }, { "epoch": 0.86, "grad_norm": 1.7381995198494065, "learning_rate": 4.928225127670123e-07, "loss": 0.2604, "step": 13703 }, { "epoch": 0.86, "grad_norm": 2.173427432588498, "learning_rate": 4.923816941074822e-07, "loss": 0.2622, "step": 13704 }, { "epoch": 0.86, "grad_norm": 2.787580573787161, "learning_rate": 4.919410624770421e-07, "loss": 0.265, "step": 13705 }, { "epoch": 0.86, "grad_norm": 1.5860671434993519, "learning_rate": 4.915006178939724e-07, "loss": 0.2413, "step": 13706 }, { "epoch": 0.86, "grad_norm": 1.9961316065912431, "learning_rate": 4.910603603765523e-07, "loss": 0.2378, "step": 13707 }, { "epoch": 0.86, "grad_norm": 5.846316850977396, "learning_rate": 4.906202899430463e-07, "loss": 0.2671, "step": 13708 }, { "epoch": 0.86, "grad_norm": 2.5538424167891645, "learning_rate": 4.901804066117144e-07, "loss": 0.2652, "step": 13709 }, { "epoch": 0.86, "grad_norm": 2.961948405895061, "learning_rate": 4.897407104008067e-07, "loss": 0.2477, "step": 13710 }, { "epoch": 0.86, "grad_norm": 1.6083660588937367, "learning_rate": 4.893012013285692e-07, "loss": 0.2593, "step": 13711 }, { "epoch": 0.86, "grad_norm": 2.1479071891206654, "learning_rate": 4.88861879413236e-07, "loss": 0.2587, "step": 13712 }, { "epoch": 0.86, "grad_norm": 1.5699512695917646, "learning_rate": 4.884227446730372e-07, "loss": 0.2552, "step": 13713 }, { "epoch": 0.86, "grad_norm": 1.493464694099462, "learning_rate": 4.879837971261897e-07, "loss": 0.2419, "step": 13714 }, { "epoch": 0.86, "grad_norm": 3.30024647646415, "learning_rate": 4.875450367909101e-07, "loss": 0.245, "step": 13715 }, { "epoch": 0.86, "grad_norm": 1.6888980037888235, "learning_rate": 4.871064636854029e-07, "loss": 0.255, "step": 13716 }, { "epoch": 0.86, "grad_norm": 2.7814713071352997, "learning_rate": 4.866680778278637e-07, "loss": 0.2575, "step": 13717 }, { "epoch": 0.86, "grad_norm": 1.6932877666833304, "learning_rate": 4.862298792364817e-07, "loss": 0.2323, "step": 13718 }, { "epoch": 0.86, "grad_norm": 1.8334929928287969, "learning_rate": 4.857918679294405e-07, "loss": 0.2474, "step": 13719 }, { "epoch": 0.86, "grad_norm": 2.446489705834415, "learning_rate": 4.853540439249127e-07, "loss": 0.2401, "step": 13720 }, { "epoch": 0.86, "grad_norm": 1.7818780712405435, "learning_rate": 4.849164072410639e-07, "loss": 0.2392, "step": 13721 }, { "epoch": 0.86, "grad_norm": 2.837794344465931, "learning_rate": 4.844789578960524e-07, "loss": 0.248, "step": 13722 }, { "epoch": 0.86, "grad_norm": 2.40695280700196, "learning_rate": 4.840416959080307e-07, "loss": 0.2362, "step": 13723 }, { "epoch": 0.86, "grad_norm": 1.8239567390662956, "learning_rate": 4.836046212951406e-07, "loss": 0.2471, "step": 13724 }, { "epoch": 0.86, "grad_norm": 5.673927413885572, "learning_rate": 4.831677340755164e-07, "loss": 0.255, "step": 13725 }, { "epoch": 0.86, "grad_norm": 6.480471311827824, "learning_rate": 4.827310342672847e-07, "loss": 0.2653, "step": 13726 }, { "epoch": 0.86, "grad_norm": 1.8703191755513764, "learning_rate": 4.822945218885672e-07, "loss": 0.2748, "step": 13727 }, { "epoch": 0.86, "grad_norm": 16.017521689606188, "learning_rate": 4.818581969574743e-07, "loss": 0.2774, "step": 13728 }, { "epoch": 0.86, "grad_norm": 3.560156684088177, "learning_rate": 4.814220594921087e-07, "loss": 0.2772, "step": 13729 }, { "epoch": 0.86, "grad_norm": 3.2924727580181514, "learning_rate": 4.809861095105678e-07, "loss": 0.2722, "step": 13730 }, { "epoch": 0.86, "grad_norm": 7.2608226204571915, "learning_rate": 4.805503470309408e-07, "loss": 0.2402, "step": 13731 }, { "epoch": 0.86, "grad_norm": 4.364101019053066, "learning_rate": 4.801147720713079e-07, "loss": 0.2558, "step": 13732 }, { "epoch": 0.86, "grad_norm": 2.025503460901223, "learning_rate": 4.796793846497411e-07, "loss": 0.2509, "step": 13733 }, { "epoch": 0.86, "grad_norm": 1.9664366367185016, "learning_rate": 4.792441847843043e-07, "loss": 0.2557, "step": 13734 }, { "epoch": 0.86, "grad_norm": 2.5237687523827645, "learning_rate": 4.788091724930571e-07, "loss": 0.2648, "step": 13735 }, { "epoch": 0.86, "grad_norm": 1.3612160673713032, "learning_rate": 4.783743477940473e-07, "loss": 0.2591, "step": 13736 }, { "epoch": 0.86, "grad_norm": 1.576773378582313, "learning_rate": 4.779397107053174e-07, "loss": 0.2589, "step": 13737 }, { "epoch": 0.86, "grad_norm": 3.8327080019559063, "learning_rate": 4.775052612449005e-07, "loss": 0.2464, "step": 13738 }, { "epoch": 0.86, "grad_norm": 0.5667142764524086, "learning_rate": 4.770709994308242e-07, "loss": 0.4484, "step": 13739 }, { "epoch": 0.86, "grad_norm": 6.785820436627895, "learning_rate": 4.766369252811054e-07, "loss": 0.2348, "step": 13740 }, { "epoch": 0.86, "grad_norm": 1.524841412966559, "learning_rate": 4.762030388137551e-07, "loss": 0.2459, "step": 13741 }, { "epoch": 0.86, "grad_norm": 4.5118561493442915, "learning_rate": 4.75769340046775e-07, "loss": 0.2508, "step": 13742 }, { "epoch": 0.86, "grad_norm": 3.615071272129694, "learning_rate": 4.7533582899816133e-07, "loss": 0.2495, "step": 13743 }, { "epoch": 0.86, "grad_norm": 2.296283413077024, "learning_rate": 4.7490250568590137e-07, "loss": 0.2477, "step": 13744 }, { "epoch": 0.86, "grad_norm": 2.759273953850784, "learning_rate": 4.744693701279735e-07, "loss": 0.2501, "step": 13745 }, { "epoch": 0.86, "grad_norm": 1.827686209491277, "learning_rate": 4.7403642234234935e-07, "loss": 0.2392, "step": 13746 }, { "epoch": 0.86, "grad_norm": 1.9542298104846654, "learning_rate": 4.736036623469936e-07, "loss": 0.2857, "step": 13747 }, { "epoch": 0.86, "grad_norm": 1.6202546621098888, "learning_rate": 4.731710901598624e-07, "loss": 0.2453, "step": 13748 }, { "epoch": 0.86, "grad_norm": 0.6092639131500509, "learning_rate": 4.7273870579890247e-07, "loss": 0.455, "step": 13749 }, { "epoch": 0.86, "grad_norm": 1.5931816960150724, "learning_rate": 4.7230650928205447e-07, "loss": 0.2485, "step": 13750 }, { "epoch": 0.86, "grad_norm": 3.50300836641801, "learning_rate": 4.7187450062725126e-07, "loss": 0.2433, "step": 13751 }, { "epoch": 0.86, "grad_norm": 1.9782678999273278, "learning_rate": 4.7144267985241856e-07, "loss": 0.2375, "step": 13752 }, { "epoch": 0.86, "grad_norm": 3.880827791846489, "learning_rate": 4.7101104697547307e-07, "loss": 0.2495, "step": 13753 }, { "epoch": 0.86, "grad_norm": 1.4834969288427793, "learning_rate": 4.7057960201432275e-07, "loss": 0.2439, "step": 13754 }, { "epoch": 0.87, "grad_norm": 2.6071836496123604, "learning_rate": 4.7014834498687045e-07, "loss": 0.26, "step": 13755 }, { "epoch": 0.87, "grad_norm": 2.6627098367754756, "learning_rate": 4.697172759110097e-07, "loss": 0.2676, "step": 13756 }, { "epoch": 0.87, "grad_norm": 2.042789929514817, "learning_rate": 4.692863948046256e-07, "loss": 0.2488, "step": 13757 }, { "epoch": 0.87, "grad_norm": 1.7472584622802503, "learning_rate": 4.6885570168559493e-07, "loss": 0.2474, "step": 13758 }, { "epoch": 0.87, "grad_norm": 1.9202966305175444, "learning_rate": 4.6842519657179066e-07, "loss": 0.2454, "step": 13759 }, { "epoch": 0.87, "grad_norm": 1.9996697367178797, "learning_rate": 4.67994879481074e-07, "loss": 0.249, "step": 13760 }, { "epoch": 0.87, "grad_norm": 1.6712291343920274, "learning_rate": 4.6756475043130024e-07, "loss": 0.2482, "step": 13761 }, { "epoch": 0.87, "grad_norm": 1.5700233439317361, "learning_rate": 4.671348094403139e-07, "loss": 0.2602, "step": 13762 }, { "epoch": 0.87, "grad_norm": 6.035639140420502, "learning_rate": 4.667050565259568e-07, "loss": 0.2517, "step": 13763 }, { "epoch": 0.87, "grad_norm": 2.480546374106348, "learning_rate": 4.662754917060591e-07, "loss": 0.2502, "step": 13764 }, { "epoch": 0.87, "grad_norm": 14.413137043776564, "learning_rate": 4.658461149984439e-07, "loss": 0.259, "step": 13765 }, { "epoch": 0.87, "grad_norm": 1.9089607350914712, "learning_rate": 4.654169264209246e-07, "loss": 0.2489, "step": 13766 }, { "epoch": 0.87, "grad_norm": 2.7866982543067795, "learning_rate": 4.649879259913137e-07, "loss": 0.25, "step": 13767 }, { "epoch": 0.87, "grad_norm": 1.6881030706778166, "learning_rate": 4.645591137274091e-07, "loss": 0.2475, "step": 13768 }, { "epoch": 0.87, "grad_norm": 1.746952614699426, "learning_rate": 4.6413048964700224e-07, "loss": 0.2604, "step": 13769 }, { "epoch": 0.87, "grad_norm": 4.169900530410927, "learning_rate": 4.637020537678771e-07, "loss": 0.2567, "step": 13770 }, { "epoch": 0.87, "grad_norm": 3.6558378005064505, "learning_rate": 4.6327380610781235e-07, "loss": 0.2552, "step": 13771 }, { "epoch": 0.87, "grad_norm": 1.41977707890803, "learning_rate": 4.628457466845754e-07, "loss": 0.269, "step": 13772 }, { "epoch": 0.87, "grad_norm": 1.9287978507545889, "learning_rate": 4.6241787551592645e-07, "loss": 0.2408, "step": 13773 }, { "epoch": 0.87, "grad_norm": 2.0971421383884636, "learning_rate": 4.619901926196191e-07, "loss": 0.2337, "step": 13774 }, { "epoch": 0.87, "grad_norm": 1.8631539706243492, "learning_rate": 4.615626980134003e-07, "loss": 0.231, "step": 13775 }, { "epoch": 0.87, "grad_norm": 7.092877746318571, "learning_rate": 4.611353917150063e-07, "loss": 0.2634, "step": 13776 }, { "epoch": 0.87, "grad_norm": 6.893296375540724, "learning_rate": 4.6070827374216697e-07, "loss": 0.2471, "step": 13777 }, { "epoch": 0.87, "grad_norm": 2.5562991316454764, "learning_rate": 4.602813441126025e-07, "loss": 0.2428, "step": 13778 }, { "epoch": 0.87, "grad_norm": 2.4261447464071324, "learning_rate": 4.5985460284403037e-07, "loss": 0.2529, "step": 13779 }, { "epoch": 0.87, "grad_norm": 1.9234299626647677, "learning_rate": 4.5942804995415423e-07, "loss": 0.2584, "step": 13780 }, { "epoch": 0.87, "grad_norm": 2.9145088635375673, "learning_rate": 4.590016854606727e-07, "loss": 0.2472, "step": 13781 }, { "epoch": 0.87, "grad_norm": 2.63369464339832, "learning_rate": 4.585755093812766e-07, "loss": 0.2552, "step": 13782 }, { "epoch": 0.87, "grad_norm": 4.292750135238071, "learning_rate": 4.5814952173365067e-07, "loss": 0.2442, "step": 13783 }, { "epoch": 0.87, "grad_norm": 2.4874279774455776, "learning_rate": 4.57723722535468e-07, "loss": 0.2508, "step": 13784 }, { "epoch": 0.87, "grad_norm": 2.206289831535517, "learning_rate": 4.5729811180439567e-07, "loss": 0.2893, "step": 13785 }, { "epoch": 0.87, "grad_norm": 1.633603228069572, "learning_rate": 4.568726895580933e-07, "loss": 0.2497, "step": 13786 }, { "epoch": 0.87, "grad_norm": 1.9702627800297179, "learning_rate": 4.5644745581421293e-07, "loss": 0.2472, "step": 13787 }, { "epoch": 0.87, "grad_norm": 1.9207013771987873, "learning_rate": 4.560224105903971e-07, "loss": 0.2396, "step": 13788 }, { "epoch": 0.87, "grad_norm": 2.1453534898516478, "learning_rate": 4.5559755390428284e-07, "loss": 0.2474, "step": 13789 }, { "epoch": 0.87, "grad_norm": 1.5964527090143048, "learning_rate": 4.5517288577349773e-07, "loss": 0.2507, "step": 13790 }, { "epoch": 0.87, "grad_norm": 4.745744419491947, "learning_rate": 4.5474840621566264e-07, "loss": 0.2604, "step": 13791 }, { "epoch": 0.87, "grad_norm": 5.319181385274683, "learning_rate": 4.543241152483896e-07, "loss": 0.2559, "step": 13792 }, { "epoch": 0.87, "grad_norm": 1.4470094108943803, "learning_rate": 4.539000128892829e-07, "loss": 0.2411, "step": 13793 }, { "epoch": 0.87, "grad_norm": 1.677805217558302, "learning_rate": 4.534760991559384e-07, "loss": 0.2533, "step": 13794 }, { "epoch": 0.87, "grad_norm": 2.9837572300801964, "learning_rate": 4.5305237406594705e-07, "loss": 0.2482, "step": 13795 }, { "epoch": 0.87, "grad_norm": 2.477426482139802, "learning_rate": 4.526288376368887e-07, "loss": 0.2587, "step": 13796 }, { "epoch": 0.87, "grad_norm": 1.5367189378808117, "learning_rate": 4.5220548988633707e-07, "loss": 0.2347, "step": 13797 }, { "epoch": 0.87, "grad_norm": 1.7406631956229388, "learning_rate": 4.5178233083185694e-07, "loss": 0.2459, "step": 13798 }, { "epoch": 0.87, "grad_norm": 2.0021300722332454, "learning_rate": 4.5135936049100715e-07, "loss": 0.2563, "step": 13799 }, { "epoch": 0.87, "grad_norm": 2.8486010054680313, "learning_rate": 4.509365788813369e-07, "loss": 0.2592, "step": 13800 }, { "epoch": 0.87, "grad_norm": 1.6365894065618019, "learning_rate": 4.5051398602038775e-07, "loss": 0.2453, "step": 13801 }, { "epoch": 0.87, "grad_norm": 1.6871080344047993, "learning_rate": 4.5009158192569345e-07, "loss": 0.2438, "step": 13802 }, { "epoch": 0.87, "grad_norm": 1.6626935079153413, "learning_rate": 4.496693666147811e-07, "loss": 0.2542, "step": 13803 }, { "epoch": 0.87, "grad_norm": 1.5095499566522133, "learning_rate": 4.4924734010517e-07, "loss": 0.2382, "step": 13804 }, { "epoch": 0.87, "grad_norm": 2.3281388263377885, "learning_rate": 4.488255024143695e-07, "loss": 0.234, "step": 13805 }, { "epoch": 0.87, "grad_norm": 2.0891323273649456, "learning_rate": 4.484038535598817e-07, "loss": 0.2299, "step": 13806 }, { "epoch": 0.87, "grad_norm": 2.7026057419641507, "learning_rate": 4.479823935592037e-07, "loss": 0.2737, "step": 13807 }, { "epoch": 0.87, "grad_norm": 2.185720517961521, "learning_rate": 4.4756112242982153e-07, "loss": 0.2582, "step": 13808 }, { "epoch": 0.87, "grad_norm": 1.930030917707115, "learning_rate": 4.471400401892145e-07, "loss": 0.2391, "step": 13809 }, { "epoch": 0.87, "grad_norm": 46.063136179670856, "learning_rate": 4.4671914685485317e-07, "loss": 0.2607, "step": 13810 }, { "epoch": 0.87, "grad_norm": 2.6535333465397057, "learning_rate": 4.4629844244420126e-07, "loss": 0.2566, "step": 13811 }, { "epoch": 0.87, "grad_norm": 1.7253326486467209, "learning_rate": 4.458779269747171e-07, "loss": 0.2543, "step": 13812 }, { "epoch": 0.87, "grad_norm": 2.7635589860368603, "learning_rate": 4.4545760046384614e-07, "loss": 0.2454, "step": 13813 }, { "epoch": 0.87, "grad_norm": 0.5949430510246059, "learning_rate": 4.4503746292902825e-07, "loss": 0.5078, "step": 13814 }, { "epoch": 0.87, "grad_norm": 1.6246812372877706, "learning_rate": 4.4461751438769794e-07, "loss": 0.2505, "step": 13815 }, { "epoch": 0.87, "grad_norm": 1.6625477921143004, "learning_rate": 4.441977548572779e-07, "loss": 0.2535, "step": 13816 }, { "epoch": 0.87, "grad_norm": 2.0977268379278056, "learning_rate": 4.4377818435518474e-07, "loss": 0.2446, "step": 13817 }, { "epoch": 0.87, "grad_norm": 2.5827314666332564, "learning_rate": 4.433588028988267e-07, "loss": 0.2744, "step": 13818 }, { "epoch": 0.87, "grad_norm": 2.8016090532650186, "learning_rate": 4.4293961050560607e-07, "loss": 0.2382, "step": 13819 }, { "epoch": 0.87, "grad_norm": 5.132460261241813, "learning_rate": 4.4252060719291556e-07, "loss": 0.2425, "step": 13820 }, { "epoch": 0.87, "grad_norm": 2.5789131835596515, "learning_rate": 4.4210179297814016e-07, "loss": 0.2624, "step": 13821 }, { "epoch": 0.87, "grad_norm": 2.2596280978187036, "learning_rate": 4.4168316787865594e-07, "loss": 0.2693, "step": 13822 }, { "epoch": 0.87, "grad_norm": 2.649634793274662, "learning_rate": 4.412647319118346e-07, "loss": 0.2507, "step": 13823 }, { "epoch": 0.87, "grad_norm": 4.694741837403251, "learning_rate": 4.4084648509503724e-07, "loss": 0.2551, "step": 13824 }, { "epoch": 0.87, "grad_norm": 1.948856281137073, "learning_rate": 4.404284274456161e-07, "loss": 0.2437, "step": 13825 }, { "epoch": 0.87, "grad_norm": 2.0648872061383656, "learning_rate": 4.4001055898091894e-07, "loss": 0.2397, "step": 13826 }, { "epoch": 0.87, "grad_norm": 4.172524952943479, "learning_rate": 4.395928797182819e-07, "loss": 0.2837, "step": 13827 }, { "epoch": 0.87, "grad_norm": 1.4186274080291301, "learning_rate": 4.391753896750378e-07, "loss": 0.2512, "step": 13828 }, { "epoch": 0.87, "grad_norm": 1.491560574259554, "learning_rate": 4.3875808886850777e-07, "loss": 0.226, "step": 13829 }, { "epoch": 0.87, "grad_norm": 1.410133617998824, "learning_rate": 4.383409773160052e-07, "loss": 0.2469, "step": 13830 }, { "epoch": 0.87, "grad_norm": 1.6049657604459995, "learning_rate": 4.3792405503483903e-07, "loss": 0.2737, "step": 13831 }, { "epoch": 0.87, "grad_norm": 2.0627550030741637, "learning_rate": 4.375073220423065e-07, "loss": 0.2654, "step": 13832 }, { "epoch": 0.87, "grad_norm": 1.2355520813069347, "learning_rate": 4.370907783556988e-07, "loss": 0.2291, "step": 13833 }, { "epoch": 0.87, "grad_norm": 2.11886330585489, "learning_rate": 4.3667442399229985e-07, "loss": 0.242, "step": 13834 }, { "epoch": 0.87, "grad_norm": 1.6402330554041666, "learning_rate": 4.362582589693837e-07, "loss": 0.2468, "step": 13835 }, { "epoch": 0.87, "grad_norm": 2.214726740768506, "learning_rate": 4.358422833042192e-07, "loss": 0.2672, "step": 13836 }, { "epoch": 0.87, "grad_norm": 1.6252222120775173, "learning_rate": 4.354264970140654e-07, "loss": 0.259, "step": 13837 }, { "epoch": 0.87, "grad_norm": 1.9010466084452147, "learning_rate": 4.3501090011617286e-07, "loss": 0.2659, "step": 13838 }, { "epoch": 0.87, "grad_norm": 1.9746578949649074, "learning_rate": 4.3459549262778736e-07, "loss": 0.2697, "step": 13839 }, { "epoch": 0.87, "grad_norm": 1.6810623032065262, "learning_rate": 4.3418027456614277e-07, "loss": 0.2365, "step": 13840 }, { "epoch": 0.87, "grad_norm": 1.972024926647831, "learning_rate": 4.337652459484698e-07, "loss": 0.2651, "step": 13841 }, { "epoch": 0.87, "grad_norm": 3.0992942064946996, "learning_rate": 4.333504067919858e-07, "loss": 0.2449, "step": 13842 }, { "epoch": 0.87, "grad_norm": 2.4261767185343013, "learning_rate": 4.329357571139059e-07, "loss": 0.266, "step": 13843 }, { "epoch": 0.87, "grad_norm": 1.6206963025094863, "learning_rate": 4.3252129693143353e-07, "loss": 0.2383, "step": 13844 }, { "epoch": 0.87, "grad_norm": 1.9267780552117144, "learning_rate": 4.321070262617655e-07, "loss": 0.2376, "step": 13845 }, { "epoch": 0.87, "grad_norm": 2.13778457696387, "learning_rate": 4.3169294512208917e-07, "loss": 0.2397, "step": 13846 }, { "epoch": 0.87, "grad_norm": 1.8575974450737254, "learning_rate": 4.3127905352958796e-07, "loss": 0.2455, "step": 13847 }, { "epoch": 0.87, "grad_norm": 2.2793197489275094, "learning_rate": 4.308653515014327e-07, "loss": 0.2686, "step": 13848 }, { "epoch": 0.87, "grad_norm": 2.2701318796487207, "learning_rate": 4.304518390547907e-07, "loss": 0.2414, "step": 13849 }, { "epoch": 0.87, "grad_norm": 2.41990295399939, "learning_rate": 4.300385162068177e-07, "loss": 0.2486, "step": 13850 }, { "epoch": 0.87, "grad_norm": 2.0833381921166843, "learning_rate": 4.296253829746644e-07, "loss": 0.2442, "step": 13851 }, { "epoch": 0.87, "grad_norm": 1.911491157143949, "learning_rate": 4.292124393754715e-07, "loss": 0.2502, "step": 13852 }, { "epoch": 0.87, "grad_norm": 1.8997245751700704, "learning_rate": 4.287996854263737e-07, "loss": 0.245, "step": 13853 }, { "epoch": 0.87, "grad_norm": 12.068283235664751, "learning_rate": 4.283871211444951e-07, "loss": 0.2574, "step": 13854 }, { "epoch": 0.87, "grad_norm": 1.7681632505693208, "learning_rate": 4.2797474654695527e-07, "loss": 0.2595, "step": 13855 }, { "epoch": 0.87, "grad_norm": 2.108444660808345, "learning_rate": 4.2756256165086443e-07, "loss": 0.2533, "step": 13856 }, { "epoch": 0.87, "grad_norm": 4.354750050889478, "learning_rate": 4.27150566473325e-07, "loss": 0.2423, "step": 13857 }, { "epoch": 0.87, "grad_norm": 2.4171742333104715, "learning_rate": 4.267387610314294e-07, "loss": 0.2518, "step": 13858 }, { "epoch": 0.87, "grad_norm": 1.537594496454954, "learning_rate": 4.2632714534226685e-07, "loss": 0.2562, "step": 13859 }, { "epoch": 0.87, "grad_norm": 1.4358005144862205, "learning_rate": 4.259157194229152e-07, "loss": 0.2552, "step": 13860 }, { "epoch": 0.87, "grad_norm": 2.960417309318742, "learning_rate": 4.2550448329044426e-07, "loss": 0.2631, "step": 13861 }, { "epoch": 0.87, "grad_norm": 2.0160656549023637, "learning_rate": 4.2509343696191695e-07, "loss": 0.2341, "step": 13862 }, { "epoch": 0.87, "grad_norm": 2.1833622724811916, "learning_rate": 4.246825804543886e-07, "loss": 0.2584, "step": 13863 }, { "epoch": 0.87, "grad_norm": 4.0390217373261255, "learning_rate": 4.242719137849077e-07, "loss": 0.2477, "step": 13864 }, { "epoch": 0.87, "grad_norm": 2.1054123638811757, "learning_rate": 4.238614369705124e-07, "loss": 0.2627, "step": 13865 }, { "epoch": 0.87, "grad_norm": 1.906825018692233, "learning_rate": 4.2345115002823345e-07, "loss": 0.2481, "step": 13866 }, { "epoch": 0.87, "grad_norm": 2.700215689055635, "learning_rate": 4.2304105297509614e-07, "loss": 0.2509, "step": 13867 }, { "epoch": 0.87, "grad_norm": 0.6226385486277936, "learning_rate": 4.2263114582811515e-07, "loss": 0.4667, "step": 13868 }, { "epoch": 0.87, "grad_norm": 3.782780375876242, "learning_rate": 4.2222142860429805e-07, "loss": 0.2603, "step": 13869 }, { "epoch": 0.87, "grad_norm": 2.428520394096701, "learning_rate": 4.2181190132064396e-07, "loss": 0.2606, "step": 13870 }, { "epoch": 0.87, "grad_norm": 2.8714541166991, "learning_rate": 4.2140256399414603e-07, "loss": 0.25, "step": 13871 }, { "epoch": 0.87, "grad_norm": 1.749954869223884, "learning_rate": 4.2099341664178894e-07, "loss": 0.2418, "step": 13872 }, { "epoch": 0.87, "grad_norm": 1.5808420850754918, "learning_rate": 4.205844592805486e-07, "loss": 0.2579, "step": 13873 }, { "epoch": 0.87, "grad_norm": 1.6077489134122638, "learning_rate": 4.2017569192739195e-07, "loss": 0.2445, "step": 13874 }, { "epoch": 0.87, "grad_norm": 2.2296919846073795, "learning_rate": 4.1976711459928096e-07, "loss": 0.2458, "step": 13875 }, { "epoch": 0.87, "grad_norm": 2.3033601982153282, "learning_rate": 4.193587273131683e-07, "loss": 0.2425, "step": 13876 }, { "epoch": 0.87, "grad_norm": 1.5669571185389406, "learning_rate": 4.189505300859981e-07, "loss": 0.2415, "step": 13877 }, { "epoch": 0.87, "grad_norm": 1.371627591062167, "learning_rate": 4.185425229347062e-07, "loss": 0.2718, "step": 13878 }, { "epoch": 0.87, "grad_norm": 1.5515246372638904, "learning_rate": 4.1813470587622305e-07, "loss": 0.2468, "step": 13879 }, { "epoch": 0.87, "grad_norm": 1.5876116038820525, "learning_rate": 4.1772707892746954e-07, "loss": 0.2466, "step": 13880 }, { "epoch": 0.87, "grad_norm": 1.609968141072775, "learning_rate": 4.173196421053588e-07, "loss": 0.2574, "step": 13881 }, { "epoch": 0.87, "grad_norm": 1.7243799581860655, "learning_rate": 4.1691239542679507e-07, "loss": 0.2411, "step": 13882 }, { "epoch": 0.87, "grad_norm": 2.670650289852566, "learning_rate": 4.165053389086776e-07, "loss": 0.2344, "step": 13883 }, { "epoch": 0.87, "grad_norm": 2.3328112575453748, "learning_rate": 4.160984725678946e-07, "loss": 0.2386, "step": 13884 }, { "epoch": 0.87, "grad_norm": 1.812917492437089, "learning_rate": 4.1569179642132694e-07, "loss": 0.2498, "step": 13885 }, { "epoch": 0.87, "grad_norm": 3.894752647902837, "learning_rate": 4.152853104858506e-07, "loss": 0.2851, "step": 13886 }, { "epoch": 0.87, "grad_norm": 1.7591454201584609, "learning_rate": 4.148790147783288e-07, "loss": 0.2484, "step": 13887 }, { "epoch": 0.87, "grad_norm": 2.6124279603834912, "learning_rate": 4.1447290931562136e-07, "loss": 0.2714, "step": 13888 }, { "epoch": 0.87, "grad_norm": 0.6797118342691032, "learning_rate": 4.1406699411457804e-07, "loss": 0.4849, "step": 13889 }, { "epoch": 0.87, "grad_norm": 1.7244922133291847, "learning_rate": 4.1366126919203996e-07, "loss": 0.2346, "step": 13890 }, { "epoch": 0.87, "grad_norm": 2.31265028502726, "learning_rate": 4.1325573456484304e-07, "loss": 0.2473, "step": 13891 }, { "epoch": 0.87, "grad_norm": 11.915228006423384, "learning_rate": 4.1285039024981265e-07, "loss": 0.2521, "step": 13892 }, { "epoch": 0.87, "grad_norm": 1.970610547757536, "learning_rate": 4.12445236263766e-07, "loss": 0.2559, "step": 13893 }, { "epoch": 0.87, "grad_norm": 2.088654997788465, "learning_rate": 4.120402726235156e-07, "loss": 0.2501, "step": 13894 }, { "epoch": 0.87, "grad_norm": 2.5273454258558474, "learning_rate": 4.116354993458632e-07, "loss": 0.247, "step": 13895 }, { "epoch": 0.87, "grad_norm": 1.7837724964174653, "learning_rate": 4.1123091644760413e-07, "loss": 0.2514, "step": 13896 }, { "epoch": 0.87, "grad_norm": 7.43589432026048, "learning_rate": 4.108265239455256e-07, "loss": 0.2583, "step": 13897 }, { "epoch": 0.87, "grad_norm": 1.7085514184862256, "learning_rate": 4.104223218564046e-07, "loss": 0.2497, "step": 13898 }, { "epoch": 0.87, "grad_norm": 2.785846022395972, "learning_rate": 4.10018310197014e-07, "loss": 0.2431, "step": 13899 }, { "epoch": 0.87, "grad_norm": 2.3647852333575528, "learning_rate": 4.096144889841158e-07, "loss": 0.2541, "step": 13900 }, { "epoch": 0.87, "grad_norm": 5.52349487523597, "learning_rate": 4.092108582344673e-07, "loss": 0.2454, "step": 13901 }, { "epoch": 0.87, "grad_norm": 2.5695289241864465, "learning_rate": 4.088074179648138e-07, "loss": 0.2677, "step": 13902 }, { "epoch": 0.87, "grad_norm": 1.3692451951898694, "learning_rate": 4.084041681918949e-07, "loss": 0.2382, "step": 13903 }, { "epoch": 0.87, "grad_norm": 3.023437322970993, "learning_rate": 4.0800110893244314e-07, "loss": 0.2872, "step": 13904 }, { "epoch": 0.87, "grad_norm": 1.9305002568167204, "learning_rate": 4.0759824020318197e-07, "loss": 0.2657, "step": 13905 }, { "epoch": 0.87, "grad_norm": 2.9176846662156515, "learning_rate": 4.0719556202082567e-07, "loss": 0.277, "step": 13906 }, { "epoch": 0.87, "grad_norm": 44.749710952022866, "learning_rate": 4.067930744020837e-07, "loss": 0.2478, "step": 13907 }, { "epoch": 0.87, "grad_norm": 0.5910365872232302, "learning_rate": 4.0639077736365606e-07, "loss": 0.448, "step": 13908 }, { "epoch": 0.87, "grad_norm": 1.9684827752178375, "learning_rate": 4.059886709222344e-07, "loss": 0.2292, "step": 13909 }, { "epoch": 0.87, "grad_norm": 1.9454508858254342, "learning_rate": 4.055867550945025e-07, "loss": 0.2514, "step": 13910 }, { "epoch": 0.87, "grad_norm": 2.549682611788474, "learning_rate": 4.05185029897136e-07, "loss": 0.2571, "step": 13911 }, { "epoch": 0.87, "grad_norm": 1.9142156366389025, "learning_rate": 4.0478349534680496e-07, "loss": 0.2688, "step": 13912 }, { "epoch": 0.87, "grad_norm": 1.6895084210184605, "learning_rate": 4.043821514601681e-07, "loss": 0.231, "step": 13913 }, { "epoch": 0.88, "grad_norm": 1.7273889713895971, "learning_rate": 4.039809982538784e-07, "loss": 0.2617, "step": 13914 }, { "epoch": 0.88, "grad_norm": 1.6850846852629544, "learning_rate": 4.0358003574458017e-07, "loss": 0.2704, "step": 13915 }, { "epoch": 0.88, "grad_norm": 3.836940898758, "learning_rate": 4.031792639489113e-07, "loss": 0.2617, "step": 13916 }, { "epoch": 0.88, "grad_norm": 1.7979533425324887, "learning_rate": 4.027786828834995e-07, "loss": 0.2469, "step": 13917 }, { "epoch": 0.88, "grad_norm": 3.8854678790418142, "learning_rate": 4.023782925649661e-07, "loss": 0.2533, "step": 13918 }, { "epoch": 0.88, "grad_norm": 7.0842772759433394, "learning_rate": 4.0197809300992206e-07, "loss": 0.2403, "step": 13919 }, { "epoch": 0.88, "grad_norm": 2.74645143191841, "learning_rate": 4.0157808423497537e-07, "loss": 0.242, "step": 13920 }, { "epoch": 0.88, "grad_norm": 1.9685761744317802, "learning_rate": 4.011782662567215e-07, "loss": 0.2506, "step": 13921 }, { "epoch": 0.88, "grad_norm": 4.701193911893507, "learning_rate": 4.007786390917495e-07, "loss": 0.2859, "step": 13922 }, { "epoch": 0.88, "grad_norm": 2.0616095332696727, "learning_rate": 4.0037920275664e-07, "loss": 0.2662, "step": 13923 }, { "epoch": 0.88, "grad_norm": 21.16385087060391, "learning_rate": 3.999799572679686e-07, "loss": 0.2591, "step": 13924 }, { "epoch": 0.88, "grad_norm": 1.4668387854806826, "learning_rate": 3.9958090264229923e-07, "loss": 0.262, "step": 13925 }, { "epoch": 0.88, "grad_norm": 2.007863733481297, "learning_rate": 3.991820388961887e-07, "loss": 0.2543, "step": 13926 }, { "epoch": 0.88, "grad_norm": 5.130776382220719, "learning_rate": 3.987833660461882e-07, "loss": 0.2571, "step": 13927 }, { "epoch": 0.88, "grad_norm": 2.5468586335375507, "learning_rate": 3.9838488410883837e-07, "loss": 0.248, "step": 13928 }, { "epoch": 0.88, "grad_norm": 1.4178728606148994, "learning_rate": 3.979865931006732e-07, "loss": 0.2422, "step": 13929 }, { "epoch": 0.88, "grad_norm": 1.9995809761593755, "learning_rate": 3.975884930382173e-07, "loss": 0.2523, "step": 13930 }, { "epoch": 0.88, "grad_norm": 7.640397388681757, "learning_rate": 3.971905839379903e-07, "loss": 0.2557, "step": 13931 }, { "epoch": 0.88, "grad_norm": 0.5824689224284922, "learning_rate": 3.9679286581650155e-07, "loss": 0.4839, "step": 13932 }, { "epoch": 0.88, "grad_norm": 3.1923699861803274, "learning_rate": 3.963953386902536e-07, "loss": 0.2433, "step": 13933 }, { "epoch": 0.88, "grad_norm": 1.6409875478485179, "learning_rate": 3.9599800257573873e-07, "loss": 0.2524, "step": 13934 }, { "epoch": 0.88, "grad_norm": 2.5029582875661958, "learning_rate": 3.9560085748944545e-07, "loss": 0.2452, "step": 13935 }, { "epoch": 0.88, "grad_norm": 1.4388595656727534, "learning_rate": 3.9520390344785053e-07, "loss": 0.2566, "step": 13936 }, { "epoch": 0.88, "grad_norm": 1.8138724186168833, "learning_rate": 3.948071404674242e-07, "loss": 0.2362, "step": 13937 }, { "epoch": 0.88, "grad_norm": 4.1889370186957295, "learning_rate": 3.944105685646299e-07, "loss": 0.2666, "step": 13938 }, { "epoch": 0.88, "grad_norm": 1.7682123636206566, "learning_rate": 3.940141877559212e-07, "loss": 0.2374, "step": 13939 }, { "epoch": 0.88, "grad_norm": 3.6072873298661396, "learning_rate": 3.9361799805774535e-07, "loss": 0.2722, "step": 13940 }, { "epoch": 0.88, "grad_norm": 1.566699737378959, "learning_rate": 3.9322199948654104e-07, "loss": 0.2591, "step": 13941 }, { "epoch": 0.88, "grad_norm": 1.9252607057894588, "learning_rate": 3.928261920587373e-07, "loss": 0.2651, "step": 13942 }, { "epoch": 0.88, "grad_norm": 2.3426039067240128, "learning_rate": 3.9243057579075925e-07, "loss": 0.2664, "step": 13943 }, { "epoch": 0.88, "grad_norm": 3.7662689443890587, "learning_rate": 3.9203515069902055e-07, "loss": 0.2601, "step": 13944 }, { "epoch": 0.88, "grad_norm": 4.124532112595285, "learning_rate": 3.9163991679992684e-07, "loss": 0.2439, "step": 13945 }, { "epoch": 0.88, "grad_norm": 1.6089708090188581, "learning_rate": 3.9124487410987955e-07, "loss": 0.2543, "step": 13946 }, { "epoch": 0.88, "grad_norm": 5.599972590825963, "learning_rate": 3.9085002264526775e-07, "loss": 0.2804, "step": 13947 }, { "epoch": 0.88, "grad_norm": 1.4550784722978682, "learning_rate": 3.904553624224761e-07, "loss": 0.2425, "step": 13948 }, { "epoch": 0.88, "grad_norm": 3.7191182616936205, "learning_rate": 3.900608934578787e-07, "loss": 0.2789, "step": 13949 }, { "epoch": 0.88, "grad_norm": 3.2779646523062587, "learning_rate": 3.896666157678425e-07, "loss": 0.2508, "step": 13950 }, { "epoch": 0.88, "grad_norm": 2.2814600805195453, "learning_rate": 3.8927252936872774e-07, "loss": 0.2458, "step": 13951 }, { "epoch": 0.88, "grad_norm": 2.8321334127552613, "learning_rate": 3.8887863427688463e-07, "loss": 0.2674, "step": 13952 }, { "epoch": 0.88, "grad_norm": 2.8102398638850765, "learning_rate": 3.884849305086585e-07, "loss": 0.2571, "step": 13953 }, { "epoch": 0.88, "grad_norm": 3.041175786355012, "learning_rate": 3.88091418080383e-07, "loss": 0.2591, "step": 13954 }, { "epoch": 0.88, "grad_norm": 2.72017722148624, "learning_rate": 3.8769809700838546e-07, "loss": 0.2519, "step": 13955 }, { "epoch": 0.88, "grad_norm": 3.3002952112703197, "learning_rate": 3.8730496730898735e-07, "loss": 0.2631, "step": 13956 }, { "epoch": 0.88, "grad_norm": 1.9066585536648852, "learning_rate": 3.86912028998499e-07, "loss": 0.2697, "step": 13957 }, { "epoch": 0.88, "grad_norm": 1.880663232172344, "learning_rate": 3.865192820932234e-07, "loss": 0.2556, "step": 13958 }, { "epoch": 0.88, "grad_norm": 3.5605055531928658, "learning_rate": 3.861267266094587e-07, "loss": 0.2604, "step": 13959 }, { "epoch": 0.88, "grad_norm": 2.4718300748092226, "learning_rate": 3.857343625634896e-07, "loss": 0.2539, "step": 13960 }, { "epoch": 0.88, "grad_norm": 2.3417541065616234, "learning_rate": 3.8534218997159923e-07, "loss": 0.2506, "step": 13961 }, { "epoch": 0.88, "grad_norm": 7.137743805169988, "learning_rate": 3.8495020885005784e-07, "loss": 0.2604, "step": 13962 }, { "epoch": 0.88, "grad_norm": 1.5818669127051017, "learning_rate": 3.8455841921512803e-07, "loss": 0.261, "step": 13963 }, { "epoch": 0.88, "grad_norm": 1.9518111291864557, "learning_rate": 3.8416682108306904e-07, "loss": 0.2433, "step": 13964 }, { "epoch": 0.88, "grad_norm": 2.0509379586656284, "learning_rate": 3.837754144701267e-07, "loss": 0.2475, "step": 13965 }, { "epoch": 0.88, "grad_norm": 1.8171415430236428, "learning_rate": 3.8338419939254135e-07, "loss": 0.2511, "step": 13966 }, { "epoch": 0.88, "grad_norm": 1.6052968104197836, "learning_rate": 3.8299317586654503e-07, "loss": 0.2385, "step": 13967 }, { "epoch": 0.88, "grad_norm": 1.8037691832569482, "learning_rate": 3.826023439083637e-07, "loss": 0.2419, "step": 13968 }, { "epoch": 0.88, "grad_norm": 1.9415721273173152, "learning_rate": 3.822117035342127e-07, "loss": 0.2567, "step": 13969 }, { "epoch": 0.88, "grad_norm": 2.835155764947781, "learning_rate": 3.8182125476030007e-07, "loss": 0.2549, "step": 13970 }, { "epoch": 0.88, "grad_norm": 5.448693023832294, "learning_rate": 3.8143099760282574e-07, "loss": 0.2523, "step": 13971 }, { "epoch": 0.88, "grad_norm": 2.7763422546295775, "learning_rate": 3.8104093207798343e-07, "loss": 0.2626, "step": 13972 }, { "epoch": 0.88, "grad_norm": 1.806042382047809, "learning_rate": 3.8065105820195736e-07, "loss": 0.2539, "step": 13973 }, { "epoch": 0.88, "grad_norm": 1.449936481099475, "learning_rate": 3.802613759909224e-07, "loss": 0.2609, "step": 13974 }, { "epoch": 0.88, "grad_norm": 1.4197453166483975, "learning_rate": 3.798718854610489e-07, "loss": 0.2458, "step": 13975 }, { "epoch": 0.88, "grad_norm": 2.3431706323038983, "learning_rate": 3.794825866284979e-07, "loss": 0.2467, "step": 13976 }, { "epoch": 0.88, "grad_norm": 1.621411461075429, "learning_rate": 3.7909347950942145e-07, "loss": 0.2604, "step": 13977 }, { "epoch": 0.88, "grad_norm": 1.9454255595883063, "learning_rate": 3.787045641199638e-07, "loss": 0.2576, "step": 13978 }, { "epoch": 0.88, "grad_norm": 3.528482977675164, "learning_rate": 3.783158404762616e-07, "loss": 0.2829, "step": 13979 }, { "epoch": 0.88, "grad_norm": 2.0598896829424405, "learning_rate": 3.7792730859444515e-07, "loss": 0.244, "step": 13980 }, { "epoch": 0.88, "grad_norm": 2.5935719863410593, "learning_rate": 3.775389684906344e-07, "loss": 0.2599, "step": 13981 }, { "epoch": 0.88, "grad_norm": 3.389542266877843, "learning_rate": 3.771508201809415e-07, "loss": 0.2557, "step": 13982 }, { "epoch": 0.88, "grad_norm": 1.6116713768741266, "learning_rate": 3.7676286368147184e-07, "loss": 0.2321, "step": 13983 }, { "epoch": 0.88, "grad_norm": 2.6815315489178015, "learning_rate": 3.763750990083237e-07, "loss": 0.2549, "step": 13984 }, { "epoch": 0.88, "grad_norm": 2.3457852196671523, "learning_rate": 3.7598752617758527e-07, "loss": 0.2725, "step": 13985 }, { "epoch": 0.88, "grad_norm": 2.2386337262106424, "learning_rate": 3.756001452053376e-07, "loss": 0.2878, "step": 13986 }, { "epoch": 0.88, "grad_norm": 1.9042725851865951, "learning_rate": 3.7521295610765287e-07, "loss": 0.246, "step": 13987 }, { "epoch": 0.88, "grad_norm": 2.2466391317913295, "learning_rate": 3.748259589005976e-07, "loss": 0.265, "step": 13988 }, { "epoch": 0.88, "grad_norm": 1.748354744579556, "learning_rate": 3.744391536002279e-07, "loss": 0.2495, "step": 13989 }, { "epoch": 0.88, "grad_norm": 2.063570841130431, "learning_rate": 3.7405254022259475e-07, "loss": 0.248, "step": 13990 }, { "epoch": 0.88, "grad_norm": 1.4251580723790165, "learning_rate": 3.73666118783737e-07, "loss": 0.2545, "step": 13991 }, { "epoch": 0.88, "grad_norm": 1.8153410444853335, "learning_rate": 3.7327988929969083e-07, "loss": 0.2422, "step": 13992 }, { "epoch": 0.88, "grad_norm": 2.5122610022769543, "learning_rate": 3.728938517864794e-07, "loss": 0.2398, "step": 13993 }, { "epoch": 0.88, "grad_norm": 4.528630755370866, "learning_rate": 3.725080062601211e-07, "loss": 0.2474, "step": 13994 }, { "epoch": 0.88, "grad_norm": 2.520792065023425, "learning_rate": 3.721223527366241e-07, "loss": 0.2541, "step": 13995 }, { "epoch": 0.88, "grad_norm": 2.040619967891763, "learning_rate": 3.7173689123199133e-07, "loss": 0.2395, "step": 13996 }, { "epoch": 0.88, "grad_norm": 2.4496151423932235, "learning_rate": 3.7135162176221483e-07, "loss": 0.2449, "step": 13997 }, { "epoch": 0.88, "grad_norm": 2.6070179068284256, "learning_rate": 3.7096654434328197e-07, "loss": 0.2529, "step": 13998 }, { "epoch": 0.88, "grad_norm": 2.5364656885845123, "learning_rate": 3.7058165899116816e-07, "loss": 0.2596, "step": 13999 }, { "epoch": 0.88, "grad_norm": 1.9744628668453355, "learning_rate": 3.7019696572184515e-07, "loss": 0.2331, "step": 14000 }, { "epoch": 0.88, "grad_norm": 2.073741734694081, "learning_rate": 3.698124645512735e-07, "loss": 0.2638, "step": 14001 }, { "epoch": 0.88, "grad_norm": 1.6839561431804393, "learning_rate": 3.69428155495406e-07, "loss": 0.2586, "step": 14002 }, { "epoch": 0.88, "grad_norm": 2.968946990777144, "learning_rate": 3.690440385701899e-07, "loss": 0.2401, "step": 14003 }, { "epoch": 0.88, "grad_norm": 1.4699091569746803, "learning_rate": 3.686601137915613e-07, "loss": 0.2474, "step": 14004 }, { "epoch": 0.88, "grad_norm": 3.703743291144797, "learning_rate": 3.6827638117545193e-07, "loss": 0.2546, "step": 14005 }, { "epoch": 0.88, "grad_norm": 1.3149306029380574, "learning_rate": 3.6789284073778187e-07, "loss": 0.24, "step": 14006 }, { "epoch": 0.88, "grad_norm": 1.82716596559018, "learning_rate": 3.67509492494465e-07, "loss": 0.253, "step": 14007 }, { "epoch": 0.88, "grad_norm": 2.057829474780221, "learning_rate": 3.6712633646140805e-07, "loss": 0.251, "step": 14008 }, { "epoch": 0.88, "grad_norm": 3.9292991903661747, "learning_rate": 3.667433726545083e-07, "loss": 0.2547, "step": 14009 }, { "epoch": 0.88, "grad_norm": 1.4976106902865847, "learning_rate": 3.6636060108965533e-07, "loss": 0.2447, "step": 14010 }, { "epoch": 0.88, "grad_norm": 2.3557776302475415, "learning_rate": 3.6597802178273136e-07, "loss": 0.2606, "step": 14011 }, { "epoch": 0.88, "grad_norm": 1.4506621013509584, "learning_rate": 3.655956347496098e-07, "loss": 0.2412, "step": 14012 }, { "epoch": 0.88, "grad_norm": 1.949391643633007, "learning_rate": 3.6521344000615746e-07, "loss": 0.2685, "step": 14013 }, { "epoch": 0.88, "grad_norm": 1.8876606327230736, "learning_rate": 3.6483143756823215e-07, "loss": 0.2476, "step": 14014 }, { "epoch": 0.88, "grad_norm": 2.563638569520903, "learning_rate": 3.644496274516829e-07, "loss": 0.2509, "step": 14015 }, { "epoch": 0.88, "grad_norm": 5.964949734790135, "learning_rate": 3.6406800967235255e-07, "loss": 0.2532, "step": 14016 }, { "epoch": 0.88, "grad_norm": 1.4108558848561747, "learning_rate": 3.636865842460752e-07, "loss": 0.2566, "step": 14017 }, { "epoch": 0.88, "grad_norm": 13.420700808848416, "learning_rate": 3.633053511886753e-07, "loss": 0.2523, "step": 14018 }, { "epoch": 0.88, "grad_norm": 1.425152784819349, "learning_rate": 3.6292431051597244e-07, "loss": 0.2413, "step": 14019 }, { "epoch": 0.88, "grad_norm": 2.4925792531002458, "learning_rate": 3.625434622437768e-07, "loss": 0.2517, "step": 14020 }, { "epoch": 0.88, "grad_norm": 1.7139325459117283, "learning_rate": 3.6216280638789013e-07, "loss": 0.2476, "step": 14021 }, { "epoch": 0.88, "grad_norm": 1.5914139311807765, "learning_rate": 3.6178234296410595e-07, "loss": 0.267, "step": 14022 }, { "epoch": 0.88, "grad_norm": 2.605044548152336, "learning_rate": 3.614020719882105e-07, "loss": 0.2739, "step": 14023 }, { "epoch": 0.88, "grad_norm": 0.5833532287491568, "learning_rate": 3.610219934759829e-07, "loss": 0.4876, "step": 14024 }, { "epoch": 0.88, "grad_norm": 1.5081303045273848, "learning_rate": 3.606421074431926e-07, "loss": 0.2441, "step": 14025 }, { "epoch": 0.88, "grad_norm": 2.640048597494044, "learning_rate": 3.6026241390560045e-07, "loss": 0.263, "step": 14026 }, { "epoch": 0.88, "grad_norm": 3.6926999195341494, "learning_rate": 3.5988291287896216e-07, "loss": 0.2586, "step": 14027 }, { "epoch": 0.88, "grad_norm": 2.026754301064587, "learning_rate": 3.5950360437902454e-07, "loss": 0.2507, "step": 14028 }, { "epoch": 0.88, "grad_norm": 2.4190361609887163, "learning_rate": 3.591244884215245e-07, "loss": 0.2418, "step": 14029 }, { "epoch": 0.88, "grad_norm": 3.8392270202784946, "learning_rate": 3.587455650221927e-07, "loss": 0.2717, "step": 14030 }, { "epoch": 0.88, "grad_norm": 1.9051917167279349, "learning_rate": 3.5836683419675056e-07, "loss": 0.2445, "step": 14031 }, { "epoch": 0.88, "grad_norm": 1.905415648496474, "learning_rate": 3.5798829596091325e-07, "loss": 0.265, "step": 14032 }, { "epoch": 0.88, "grad_norm": 2.982409193120526, "learning_rate": 3.57609950330387e-07, "loss": 0.2652, "step": 14033 }, { "epoch": 0.88, "grad_norm": 1.6893216064066778, "learning_rate": 3.572317973208689e-07, "loss": 0.2448, "step": 14034 }, { "epoch": 0.88, "grad_norm": 1.6312148138750535, "learning_rate": 3.568538369480501e-07, "loss": 0.2357, "step": 14035 }, { "epoch": 0.88, "grad_norm": 7.7249586999456525, "learning_rate": 3.564760692276137e-07, "loss": 0.252, "step": 14036 }, { "epoch": 0.88, "grad_norm": 3.150511611161764, "learning_rate": 3.560984941752327e-07, "loss": 0.2446, "step": 14037 }, { "epoch": 0.88, "grad_norm": 4.2946408833896355, "learning_rate": 3.5572111180657396e-07, "loss": 0.2455, "step": 14038 }, { "epoch": 0.88, "grad_norm": 3.7397380751360467, "learning_rate": 3.553439221372945e-07, "loss": 0.2622, "step": 14039 }, { "epoch": 0.88, "grad_norm": 2.069791112226758, "learning_rate": 3.549669251830462e-07, "loss": 0.2431, "step": 14040 }, { "epoch": 0.88, "grad_norm": 2.3324155647873717, "learning_rate": 3.5459012095947097e-07, "loss": 0.2493, "step": 14041 }, { "epoch": 0.88, "grad_norm": 1.3983670371869428, "learning_rate": 3.542135094822019e-07, "loss": 0.2521, "step": 14042 }, { "epoch": 0.88, "grad_norm": 1.900478955509874, "learning_rate": 3.538370907668659e-07, "loss": 0.2554, "step": 14043 }, { "epoch": 0.88, "grad_norm": 1.3513096505059168, "learning_rate": 3.5346086482908273e-07, "loss": 0.2529, "step": 14044 }, { "epoch": 0.88, "grad_norm": 6.418736693991127, "learning_rate": 3.5308483168446094e-07, "loss": 0.2541, "step": 14045 }, { "epoch": 0.88, "grad_norm": 1.6215455821904665, "learning_rate": 3.527089913486037e-07, "loss": 0.2604, "step": 14046 }, { "epoch": 0.88, "grad_norm": 2.033883167194892, "learning_rate": 3.5233334383710406e-07, "loss": 0.2517, "step": 14047 }, { "epoch": 0.88, "grad_norm": 1.6772044399704664, "learning_rate": 3.5195788916555005e-07, "loss": 0.2459, "step": 14048 }, { "epoch": 0.88, "grad_norm": 3.2660530849734006, "learning_rate": 3.515826273495182e-07, "loss": 0.2619, "step": 14049 }, { "epoch": 0.88, "grad_norm": 5.069942588734494, "learning_rate": 3.5120755840458043e-07, "loss": 0.271, "step": 14050 }, { "epoch": 0.88, "grad_norm": 4.523335034358723, "learning_rate": 3.5083268234629763e-07, "loss": 0.2791, "step": 14051 }, { "epoch": 0.88, "grad_norm": 1.7345082687962807, "learning_rate": 3.5045799919022515e-07, "loss": 0.2497, "step": 14052 }, { "epoch": 0.88, "grad_norm": 2.2259196000159602, "learning_rate": 3.500835089519089e-07, "loss": 0.249, "step": 14053 }, { "epoch": 0.88, "grad_norm": 3.157667260164587, "learning_rate": 3.4970921164688755e-07, "loss": 0.269, "step": 14054 }, { "epoch": 0.88, "grad_norm": 1.957867738843563, "learning_rate": 3.4933510729068976e-07, "loss": 0.2485, "step": 14055 }, { "epoch": 0.88, "grad_norm": 2.1714570268075017, "learning_rate": 3.4896119589883925e-07, "loss": 0.287, "step": 14056 }, { "epoch": 0.88, "grad_norm": 1.9125151174917374, "learning_rate": 3.4858747748684916e-07, "loss": 0.2367, "step": 14057 }, { "epoch": 0.88, "grad_norm": 1.7969448507382682, "learning_rate": 3.4821395207022767e-07, "loss": 0.2535, "step": 14058 }, { "epoch": 0.88, "grad_norm": 2.0609274176620787, "learning_rate": 3.4784061966447124e-07, "loss": 0.2704, "step": 14059 }, { "epoch": 0.88, "grad_norm": 1.9087048246241596, "learning_rate": 3.4746748028507084e-07, "loss": 0.245, "step": 14060 }, { "epoch": 0.88, "grad_norm": 1.714360568446815, "learning_rate": 3.4709453394750847e-07, "loss": 0.2378, "step": 14061 }, { "epoch": 0.88, "grad_norm": 2.5383303949841536, "learning_rate": 3.46721780667259e-07, "loss": 0.2581, "step": 14062 }, { "epoch": 0.88, "grad_norm": 1.6783334254549231, "learning_rate": 3.463492204597868e-07, "loss": 0.2299, "step": 14063 }, { "epoch": 0.88, "grad_norm": 5.326612215247788, "learning_rate": 3.459768533405511e-07, "loss": 0.2436, "step": 14064 }, { "epoch": 0.88, "grad_norm": 1.981796445493719, "learning_rate": 3.456046793250034e-07, "loss": 0.2461, "step": 14065 }, { "epoch": 0.88, "grad_norm": 3.5639625510084794, "learning_rate": 3.4523269842858477e-07, "loss": 0.2501, "step": 14066 }, { "epoch": 0.88, "grad_norm": 2.1504799983494314, "learning_rate": 3.448609106667283e-07, "loss": 0.2593, "step": 14067 }, { "epoch": 0.88, "grad_norm": 2.439883229790237, "learning_rate": 3.444893160548618e-07, "loss": 0.2311, "step": 14068 }, { "epoch": 0.88, "grad_norm": 1.7139580722442127, "learning_rate": 3.4411791460840285e-07, "loss": 0.2502, "step": 14069 }, { "epoch": 0.88, "grad_norm": 2.773717588972613, "learning_rate": 3.4374670634276073e-07, "loss": 0.2537, "step": 14070 }, { "epoch": 0.88, "grad_norm": 1.7377046459612524, "learning_rate": 3.4337569127333767e-07, "loss": 0.2453, "step": 14071 }, { "epoch": 0.88, "grad_norm": 2.3101557933414867, "learning_rate": 3.4300486941552913e-07, "loss": 0.2499, "step": 14072 }, { "epoch": 0.89, "grad_norm": 1.5894446259716395, "learning_rate": 3.426342407847205e-07, "loss": 0.2514, "step": 14073 }, { "epoch": 0.89, "grad_norm": 1.3154537038250578, "learning_rate": 3.422638053962896e-07, "loss": 0.2577, "step": 14074 }, { "epoch": 0.89, "grad_norm": 2.073380209818981, "learning_rate": 3.418935632656062e-07, "loss": 0.2504, "step": 14075 }, { "epoch": 0.89, "grad_norm": 1.567365734048435, "learning_rate": 3.4152351440803254e-07, "loss": 0.2563, "step": 14076 }, { "epoch": 0.89, "grad_norm": 1.903767169799387, "learning_rate": 3.41153658838923e-07, "loss": 0.252, "step": 14077 }, { "epoch": 0.89, "grad_norm": 1.5611119479604987, "learning_rate": 3.407839965736237e-07, "loss": 0.2565, "step": 14078 }, { "epoch": 0.89, "grad_norm": 2.1750747738002634, "learning_rate": 3.4041452762747007e-07, "loss": 0.2578, "step": 14079 }, { "epoch": 0.89, "grad_norm": 1.7385006166764045, "learning_rate": 3.4004525201579543e-07, "loss": 0.2648, "step": 14080 }, { "epoch": 0.89, "grad_norm": 1.6157085561575384, "learning_rate": 3.3967616975392026e-07, "loss": 0.2698, "step": 14081 }, { "epoch": 0.89, "grad_norm": 0.6148333518349004, "learning_rate": 3.3930728085715903e-07, "loss": 0.4513, "step": 14082 }, { "epoch": 0.89, "grad_norm": 4.023096653748799, "learning_rate": 3.389385853408156e-07, "loss": 0.2526, "step": 14083 }, { "epoch": 0.89, "grad_norm": 1.7164957939293168, "learning_rate": 3.385700832201905e-07, "loss": 0.2379, "step": 14084 }, { "epoch": 0.89, "grad_norm": 1.4329485094320742, "learning_rate": 3.38201774510572e-07, "loss": 0.2549, "step": 14085 }, { "epoch": 0.89, "grad_norm": 1.9916654475637625, "learning_rate": 3.378336592272419e-07, "loss": 0.2469, "step": 14086 }, { "epoch": 0.89, "grad_norm": 1.6054290856093585, "learning_rate": 3.374657373854734e-07, "loss": 0.2778, "step": 14087 }, { "epoch": 0.89, "grad_norm": 1.7978478650048377, "learning_rate": 3.3709800900053437e-07, "loss": 0.2581, "step": 14088 }, { "epoch": 0.89, "grad_norm": 1.5952629654325567, "learning_rate": 3.367304740876809e-07, "loss": 0.2488, "step": 14089 }, { "epoch": 0.89, "grad_norm": 2.246410194685025, "learning_rate": 3.3636313266216304e-07, "loss": 0.2753, "step": 14090 }, { "epoch": 0.89, "grad_norm": 1.7010692136279622, "learning_rate": 3.359959847392219e-07, "loss": 0.2496, "step": 14091 }, { "epoch": 0.89, "grad_norm": 1.4884789518705417, "learning_rate": 3.35629030334092e-07, "loss": 0.267, "step": 14092 }, { "epoch": 0.89, "grad_norm": 1.9281994986521709, "learning_rate": 3.352622694619989e-07, "loss": 0.267, "step": 14093 }, { "epoch": 0.89, "grad_norm": 3.592981451803473, "learning_rate": 3.3489570213815826e-07, "loss": 0.2518, "step": 14094 }, { "epoch": 0.89, "grad_norm": 2.579066271172909, "learning_rate": 3.3452932837778174e-07, "loss": 0.2388, "step": 14095 }, { "epoch": 0.89, "grad_norm": 1.780184275461993, "learning_rate": 3.3416314819607056e-07, "loss": 0.2543, "step": 14096 }, { "epoch": 0.89, "grad_norm": 2.710008204718657, "learning_rate": 3.3379716160821805e-07, "loss": 0.2681, "step": 14097 }, { "epoch": 0.89, "grad_norm": 1.8055841737255092, "learning_rate": 3.334313686294094e-07, "loss": 0.267, "step": 14098 }, { "epoch": 0.89, "grad_norm": 0.550078482054344, "learning_rate": 3.330657692748213e-07, "loss": 0.4701, "step": 14099 }, { "epoch": 0.89, "grad_norm": 1.820093325681551, "learning_rate": 3.327003635596243e-07, "loss": 0.2367, "step": 14100 }, { "epoch": 0.89, "grad_norm": 1.451465411921623, "learning_rate": 3.3233515149897865e-07, "loss": 0.2731, "step": 14101 }, { "epoch": 0.89, "grad_norm": 2.1110474763313096, "learning_rate": 3.319701331080394e-07, "loss": 0.2602, "step": 14102 }, { "epoch": 0.89, "grad_norm": 2.5133777270564037, "learning_rate": 3.316053084019494e-07, "loss": 0.2365, "step": 14103 }, { "epoch": 0.89, "grad_norm": 2.214333969472905, "learning_rate": 3.312406773958482e-07, "loss": 0.237, "step": 14104 }, { "epoch": 0.89, "grad_norm": 2.24721399983154, "learning_rate": 3.3087624010486377e-07, "loss": 0.2385, "step": 14105 }, { "epoch": 0.89, "grad_norm": 2.214415640869386, "learning_rate": 3.305119965441178e-07, "loss": 0.2538, "step": 14106 }, { "epoch": 0.89, "grad_norm": 1.7871951376817026, "learning_rate": 3.3014794672872165e-07, "loss": 0.2494, "step": 14107 }, { "epoch": 0.89, "grad_norm": 1.7727323497798866, "learning_rate": 3.2978409067378315e-07, "loss": 0.2539, "step": 14108 }, { "epoch": 0.89, "grad_norm": 3.1630773104422993, "learning_rate": 3.294204283943969e-07, "loss": 0.2707, "step": 14109 }, { "epoch": 0.89, "grad_norm": 1.4628341334900339, "learning_rate": 3.2905695990565365e-07, "loss": 0.2406, "step": 14110 }, { "epoch": 0.89, "grad_norm": 4.44670128812514, "learning_rate": 3.28693685222633e-07, "loss": 0.2572, "step": 14111 }, { "epoch": 0.89, "grad_norm": 1.3998173995318388, "learning_rate": 3.2833060436040955e-07, "loss": 0.2351, "step": 14112 }, { "epoch": 0.89, "grad_norm": 2.009462336526005, "learning_rate": 3.2796771733404673e-07, "loss": 0.273, "step": 14113 }, { "epoch": 0.89, "grad_norm": 2.518736069927823, "learning_rate": 3.2760502415860206e-07, "loss": 0.2618, "step": 14114 }, { "epoch": 0.89, "grad_norm": 2.2074491028281913, "learning_rate": 3.2724252484912343e-07, "loss": 0.2364, "step": 14115 }, { "epoch": 0.89, "grad_norm": 3.4589832200098267, "learning_rate": 3.268802194206516e-07, "loss": 0.2741, "step": 14116 }, { "epoch": 0.89, "grad_norm": 1.6906481773450206, "learning_rate": 3.2651810788822125e-07, "loss": 0.2707, "step": 14117 }, { "epoch": 0.89, "grad_norm": 5.642285376411281, "learning_rate": 3.261561902668553e-07, "loss": 0.2485, "step": 14118 }, { "epoch": 0.89, "grad_norm": 4.427392316989657, "learning_rate": 3.257944665715701e-07, "loss": 0.2606, "step": 14119 }, { "epoch": 0.89, "grad_norm": 2.9736124977420078, "learning_rate": 3.254329368173753e-07, "loss": 0.2429, "step": 14120 }, { "epoch": 0.89, "grad_norm": 2.099895252274889, "learning_rate": 3.2507160101927113e-07, "loss": 0.2521, "step": 14121 }, { "epoch": 0.89, "grad_norm": 2.658880185049216, "learning_rate": 3.247104591922495e-07, "loss": 0.2555, "step": 14122 }, { "epoch": 0.89, "grad_norm": 18.07870804946324, "learning_rate": 3.243495113512951e-07, "loss": 0.2532, "step": 14123 }, { "epoch": 0.89, "grad_norm": 2.3801652884066087, "learning_rate": 3.239887575113837e-07, "loss": 0.2649, "step": 14124 }, { "epoch": 0.89, "grad_norm": 1.5853525265651602, "learning_rate": 3.236281976874855e-07, "loss": 0.2461, "step": 14125 }, { "epoch": 0.89, "grad_norm": 1.4957718282050858, "learning_rate": 3.2326783189455925e-07, "loss": 0.2521, "step": 14126 }, { "epoch": 0.89, "grad_norm": 1.9464437640196859, "learning_rate": 3.229076601475567e-07, "loss": 0.241, "step": 14127 }, { "epoch": 0.89, "grad_norm": 1.9489841920359445, "learning_rate": 3.225476824614238e-07, "loss": 0.2653, "step": 14128 }, { "epoch": 0.89, "grad_norm": 1.3831255692450444, "learning_rate": 3.221878988510957e-07, "loss": 0.2459, "step": 14129 }, { "epoch": 0.89, "grad_norm": 2.194250706861133, "learning_rate": 3.2182830933149997e-07, "loss": 0.2692, "step": 14130 }, { "epoch": 0.89, "grad_norm": 1.6009420196293114, "learning_rate": 3.2146891391755586e-07, "loss": 0.2399, "step": 14131 }, { "epoch": 0.89, "grad_norm": 2.0690534754572023, "learning_rate": 3.21109712624178e-07, "loss": 0.2596, "step": 14132 }, { "epoch": 0.89, "grad_norm": 1.7260369369536, "learning_rate": 3.207507054662684e-07, "loss": 0.2451, "step": 14133 }, { "epoch": 0.89, "grad_norm": 3.6769983034758233, "learning_rate": 3.2039189245872404e-07, "loss": 0.2526, "step": 14134 }, { "epoch": 0.89, "grad_norm": 1.7042673355599904, "learning_rate": 3.2003327361643077e-07, "loss": 0.241, "step": 14135 }, { "epoch": 0.89, "grad_norm": 2.756754665829947, "learning_rate": 3.196748489542706e-07, "loss": 0.2406, "step": 14136 }, { "epoch": 0.89, "grad_norm": 3.3749427363919344, "learning_rate": 3.193166184871138e-07, "loss": 0.2585, "step": 14137 }, { "epoch": 0.89, "grad_norm": 2.0212558009921273, "learning_rate": 3.189585822298241e-07, "loss": 0.2545, "step": 14138 }, { "epoch": 0.89, "grad_norm": 2.5790218799240625, "learning_rate": 3.186007401972563e-07, "loss": 0.2491, "step": 14139 }, { "epoch": 0.89, "grad_norm": 4.00351015696923, "learning_rate": 3.182430924042601e-07, "loss": 0.2524, "step": 14140 }, { "epoch": 0.89, "grad_norm": 1.7627894836248061, "learning_rate": 3.178856388656737e-07, "loss": 0.2461, "step": 14141 }, { "epoch": 0.89, "grad_norm": 2.0022439223692308, "learning_rate": 3.175283795963291e-07, "loss": 0.2889, "step": 14142 }, { "epoch": 0.89, "grad_norm": 1.8741973059886514, "learning_rate": 3.171713146110478e-07, "loss": 0.2544, "step": 14143 }, { "epoch": 0.89, "grad_norm": 2.751331496634442, "learning_rate": 3.168144439246468e-07, "loss": 0.2607, "step": 14144 }, { "epoch": 0.89, "grad_norm": 0.5805750487119463, "learning_rate": 3.1645776755193314e-07, "loss": 0.4583, "step": 14145 }, { "epoch": 0.89, "grad_norm": 1.5341586703803891, "learning_rate": 3.16101285507705e-07, "loss": 0.2458, "step": 14146 }, { "epoch": 0.89, "grad_norm": 2.264454355592782, "learning_rate": 3.1574499780675395e-07, "loss": 0.2426, "step": 14147 }, { "epoch": 0.89, "grad_norm": 2.5053034981439968, "learning_rate": 3.1538890446386363e-07, "loss": 0.2665, "step": 14148 }, { "epoch": 0.89, "grad_norm": 1.6998832531170613, "learning_rate": 3.1503300549380833e-07, "loss": 0.2437, "step": 14149 }, { "epoch": 0.89, "grad_norm": 1.5599972917303324, "learning_rate": 3.1467730091135574e-07, "loss": 0.2438, "step": 14150 }, { "epoch": 0.89, "grad_norm": 1.7217109211200416, "learning_rate": 3.143217907312629e-07, "loss": 0.2439, "step": 14151 }, { "epoch": 0.89, "grad_norm": 2.288253222037634, "learning_rate": 3.1396647496828245e-07, "loss": 0.2578, "step": 14152 }, { "epoch": 0.89, "grad_norm": 2.1278280755127885, "learning_rate": 3.1361135363715544e-07, "loss": 0.2558, "step": 14153 }, { "epoch": 0.89, "grad_norm": 5.190756776753083, "learning_rate": 3.132564267526178e-07, "loss": 0.2413, "step": 14154 }, { "epoch": 0.89, "grad_norm": 1.800167930523466, "learning_rate": 3.1290169432939556e-07, "loss": 0.2439, "step": 14155 }, { "epoch": 0.89, "grad_norm": 1.8003421648981754, "learning_rate": 3.1254715638220745e-07, "loss": 0.2555, "step": 14156 }, { "epoch": 0.89, "grad_norm": 1.6455429528094807, "learning_rate": 3.12192812925764e-07, "loss": 0.2428, "step": 14157 }, { "epoch": 0.89, "grad_norm": 1.964151007411487, "learning_rate": 3.118386639747667e-07, "loss": 0.2515, "step": 14158 }, { "epoch": 0.89, "grad_norm": 1.7635693224675286, "learning_rate": 3.1148470954391e-07, "loss": 0.2377, "step": 14159 }, { "epoch": 0.89, "grad_norm": 4.644987155396305, "learning_rate": 3.1113094964788095e-07, "loss": 0.2636, "step": 14160 }, { "epoch": 0.89, "grad_norm": 1.8162035050659775, "learning_rate": 3.107773843013567e-07, "loss": 0.2484, "step": 14161 }, { "epoch": 0.89, "grad_norm": 3.0751770294342355, "learning_rate": 3.104240135190084e-07, "loss": 0.2448, "step": 14162 }, { "epoch": 0.89, "grad_norm": 7.366092576526622, "learning_rate": 3.1007083731549705e-07, "loss": 0.2435, "step": 14163 }, { "epoch": 0.89, "grad_norm": 3.1585048475823325, "learning_rate": 3.0971785570547696e-07, "loss": 0.2738, "step": 14164 }, { "epoch": 0.89, "grad_norm": 5.872299106794302, "learning_rate": 3.093650687035943e-07, "loss": 0.2769, "step": 14165 }, { "epoch": 0.89, "grad_norm": 3.6620275634652115, "learning_rate": 3.090124763244867e-07, "loss": 0.2637, "step": 14166 }, { "epoch": 0.89, "grad_norm": 1.8573763061893935, "learning_rate": 3.086600785827826e-07, "loss": 0.2516, "step": 14167 }, { "epoch": 0.89, "grad_norm": 3.389102685457744, "learning_rate": 3.0830787549310405e-07, "loss": 0.2562, "step": 14168 }, { "epoch": 0.89, "grad_norm": 2.277507817959017, "learning_rate": 3.079558670700666e-07, "loss": 0.2492, "step": 14169 }, { "epoch": 0.89, "grad_norm": 3.8974370453711717, "learning_rate": 3.076040533282737e-07, "loss": 0.2626, "step": 14170 }, { "epoch": 0.89, "grad_norm": 1.743171319336527, "learning_rate": 3.07252434282323e-07, "loss": 0.2562, "step": 14171 }, { "epoch": 0.89, "grad_norm": 2.3050480895414043, "learning_rate": 3.069010099468045e-07, "loss": 0.2441, "step": 14172 }, { "epoch": 0.89, "grad_norm": 1.970789597954902, "learning_rate": 3.0654978033629934e-07, "loss": 0.2646, "step": 14173 }, { "epoch": 0.89, "grad_norm": 2.5200774121937837, "learning_rate": 3.0619874546537973e-07, "loss": 0.2297, "step": 14174 }, { "epoch": 0.89, "grad_norm": 2.0452896948858283, "learning_rate": 3.058479053486113e-07, "loss": 0.249, "step": 14175 }, { "epoch": 0.89, "grad_norm": 3.379949916591384, "learning_rate": 3.0549726000055067e-07, "loss": 0.257, "step": 14176 }, { "epoch": 0.89, "grad_norm": 5.079848781613622, "learning_rate": 3.051468094357474e-07, "loss": 0.2511, "step": 14177 }, { "epoch": 0.89, "grad_norm": 2.3859222886999807, "learning_rate": 3.047965536687425e-07, "loss": 0.2462, "step": 14178 }, { "epoch": 0.89, "grad_norm": 1.4474674964475036, "learning_rate": 3.0444649271406723e-07, "loss": 0.2438, "step": 14179 }, { "epoch": 0.89, "grad_norm": 1.4229944900090208, "learning_rate": 3.040966265862483e-07, "loss": 0.2629, "step": 14180 }, { "epoch": 0.89, "grad_norm": 1.9965843948030662, "learning_rate": 3.037469552998007e-07, "loss": 0.2595, "step": 14181 }, { "epoch": 0.89, "grad_norm": 2.320974293952236, "learning_rate": 3.03397478869234e-07, "loss": 0.2556, "step": 14182 }, { "epoch": 0.89, "grad_norm": 8.860521330464051, "learning_rate": 3.0304819730904656e-07, "loss": 0.2445, "step": 14183 }, { "epoch": 0.89, "grad_norm": 3.01671078890323, "learning_rate": 3.0269911063373293e-07, "loss": 0.248, "step": 14184 }, { "epoch": 0.89, "grad_norm": 2.3216123341239854, "learning_rate": 3.023502188577765e-07, "loss": 0.2576, "step": 14185 }, { "epoch": 0.89, "grad_norm": 1.7143623675867794, "learning_rate": 3.02001521995654e-07, "loss": 0.2564, "step": 14186 }, { "epoch": 0.89, "grad_norm": 1.9182187327746425, "learning_rate": 3.016530200618323e-07, "loss": 0.236, "step": 14187 }, { "epoch": 0.89, "grad_norm": 2.512797203465373, "learning_rate": 3.013047130707725e-07, "loss": 0.248, "step": 14188 }, { "epoch": 0.89, "grad_norm": 1.5181350357604373, "learning_rate": 3.009566010369258e-07, "loss": 0.2428, "step": 14189 }, { "epoch": 0.89, "grad_norm": 1.5479538287869512, "learning_rate": 3.006086839747363e-07, "loss": 0.2248, "step": 14190 }, { "epoch": 0.89, "grad_norm": 2.25517044002195, "learning_rate": 3.002609618986385e-07, "loss": 0.244, "step": 14191 }, { "epoch": 0.89, "grad_norm": 1.7239218655688637, "learning_rate": 2.9991343482306144e-07, "loss": 0.2516, "step": 14192 }, { "epoch": 0.89, "grad_norm": 5.15709818814808, "learning_rate": 2.995661027624253e-07, "loss": 0.2628, "step": 14193 }, { "epoch": 0.89, "grad_norm": 7.970519353518592, "learning_rate": 2.9921896573114005e-07, "loss": 0.2681, "step": 14194 }, { "epoch": 0.89, "grad_norm": 2.254104932453268, "learning_rate": 2.988720237436088e-07, "loss": 0.2515, "step": 14195 }, { "epoch": 0.89, "grad_norm": 1.833982346041316, "learning_rate": 2.985252768142283e-07, "loss": 0.2325, "step": 14196 }, { "epoch": 0.89, "grad_norm": 3.8311408798011928, "learning_rate": 2.981787249573842e-07, "loss": 0.2432, "step": 14197 }, { "epoch": 0.89, "grad_norm": 2.7006812690340607, "learning_rate": 2.978323681874562e-07, "loss": 0.2515, "step": 14198 }, { "epoch": 0.89, "grad_norm": 2.802188316984068, "learning_rate": 2.97486206518815e-07, "loss": 0.2527, "step": 14199 }, { "epoch": 0.89, "grad_norm": 2.2877420965198394, "learning_rate": 2.9714023996582407e-07, "loss": 0.2373, "step": 14200 }, { "epoch": 0.89, "grad_norm": 1.7574268777869833, "learning_rate": 2.967944685428381e-07, "loss": 0.2576, "step": 14201 }, { "epoch": 0.89, "grad_norm": 2.4443036412371706, "learning_rate": 2.964488922642034e-07, "loss": 0.2607, "step": 14202 }, { "epoch": 0.89, "grad_norm": 2.4874626471306946, "learning_rate": 2.961035111442584e-07, "loss": 0.2362, "step": 14203 }, { "epoch": 0.89, "grad_norm": 2.2762819698663903, "learning_rate": 2.9575832519733396e-07, "loss": 0.2684, "step": 14204 }, { "epoch": 0.89, "grad_norm": 1.5384853321987708, "learning_rate": 2.9541333443775244e-07, "loss": 0.2393, "step": 14205 }, { "epoch": 0.89, "grad_norm": 1.8635726006801168, "learning_rate": 2.950685388798269e-07, "loss": 0.2597, "step": 14206 }, { "epoch": 0.89, "grad_norm": 1.778220642726601, "learning_rate": 2.9472393853786473e-07, "loss": 0.2505, "step": 14207 }, { "epoch": 0.89, "grad_norm": 1.4520739525243302, "learning_rate": 2.9437953342616453e-07, "loss": 0.2337, "step": 14208 }, { "epoch": 0.89, "grad_norm": 2.024836258829966, "learning_rate": 2.94035323559016e-07, "loss": 0.2725, "step": 14209 }, { "epoch": 0.89, "grad_norm": 2.1470186297578016, "learning_rate": 2.936913089507004e-07, "loss": 0.232, "step": 14210 }, { "epoch": 0.89, "grad_norm": 6.491248426266962, "learning_rate": 2.9334748961549084e-07, "loss": 0.2528, "step": 14211 }, { "epoch": 0.89, "grad_norm": 2.1762916699011883, "learning_rate": 2.9300386556765483e-07, "loss": 0.2593, "step": 14212 }, { "epoch": 0.89, "grad_norm": 1.62343959964132, "learning_rate": 2.926604368214486e-07, "loss": 0.26, "step": 14213 }, { "epoch": 0.89, "grad_norm": 2.668567414873467, "learning_rate": 2.92317203391122e-07, "loss": 0.2283, "step": 14214 }, { "epoch": 0.89, "grad_norm": 2.203581337492975, "learning_rate": 2.919741652909164e-07, "loss": 0.2465, "step": 14215 }, { "epoch": 0.89, "grad_norm": 3.96762141276433, "learning_rate": 2.916313225350653e-07, "loss": 0.2685, "step": 14216 }, { "epoch": 0.89, "grad_norm": 1.8209855745035428, "learning_rate": 2.912886751377941e-07, "loss": 0.2372, "step": 14217 }, { "epoch": 0.89, "grad_norm": 2.9995706117839998, "learning_rate": 2.909462231133192e-07, "loss": 0.2564, "step": 14218 }, { "epoch": 0.89, "grad_norm": 1.9604757938209916, "learning_rate": 2.9060396647584867e-07, "loss": 0.2531, "step": 14219 }, { "epoch": 0.89, "grad_norm": 0.60968839104951, "learning_rate": 2.9026190523958553e-07, "loss": 0.4309, "step": 14220 }, { "epoch": 0.89, "grad_norm": 2.906944155396832, "learning_rate": 2.899200394187202e-07, "loss": 0.2336, "step": 14221 }, { "epoch": 0.89, "grad_norm": 1.6227806585445994, "learning_rate": 2.895783690274395e-07, "loss": 0.2838, "step": 14222 }, { "epoch": 0.89, "grad_norm": 2.1101751156505704, "learning_rate": 2.892368940799184e-07, "loss": 0.2715, "step": 14223 }, { "epoch": 0.89, "grad_norm": 1.9735421721284456, "learning_rate": 2.88895614590326e-07, "loss": 0.2304, "step": 14224 }, { "epoch": 0.89, "grad_norm": 2.744707844567755, "learning_rate": 2.885545305728227e-07, "loss": 0.2486, "step": 14225 }, { "epoch": 0.89, "grad_norm": 1.8419252848727798, "learning_rate": 2.8821364204156045e-07, "loss": 0.2437, "step": 14226 }, { "epoch": 0.89, "grad_norm": 1.4988629913186162, "learning_rate": 2.878729490106824e-07, "loss": 0.2621, "step": 14227 }, { "epoch": 0.89, "grad_norm": 2.1972532747467786, "learning_rate": 2.875324514943256e-07, "loss": 0.2395, "step": 14228 }, { "epoch": 0.89, "grad_norm": 2.3631920326134166, "learning_rate": 2.871921495066182e-07, "loss": 0.2524, "step": 14229 }, { "epoch": 0.89, "grad_norm": 2.707015049201672, "learning_rate": 2.8685204306168004e-07, "loss": 0.2359, "step": 14230 }, { "epoch": 0.89, "grad_norm": 2.2513505161088827, "learning_rate": 2.865121321736203e-07, "loss": 0.2479, "step": 14231 }, { "epoch": 0.9, "grad_norm": 2.2149533956543466, "learning_rate": 2.861724168565461e-07, "loss": 0.2502, "step": 14232 }, { "epoch": 0.9, "grad_norm": 1.4707674493259502, "learning_rate": 2.8583289712455e-07, "loss": 0.2347, "step": 14233 }, { "epoch": 0.9, "grad_norm": 1.7805922254398394, "learning_rate": 2.8549357299172077e-07, "loss": 0.2522, "step": 14234 }, { "epoch": 0.9, "grad_norm": 4.128419079823596, "learning_rate": 2.851544444721366e-07, "loss": 0.2654, "step": 14235 }, { "epoch": 0.9, "grad_norm": 0.5927763243012352, "learning_rate": 2.8481551157986896e-07, "loss": 0.4553, "step": 14236 }, { "epoch": 0.9, "grad_norm": 1.457753305233146, "learning_rate": 2.8447677432898115e-07, "loss": 0.2534, "step": 14237 }, { "epoch": 0.9, "grad_norm": 1.431673570825116, "learning_rate": 2.8413823273352793e-07, "loss": 0.2714, "step": 14238 }, { "epoch": 0.9, "grad_norm": 1.510482169073478, "learning_rate": 2.8379988680755533e-07, "loss": 0.2539, "step": 14239 }, { "epoch": 0.9, "grad_norm": 3.0339140084886718, "learning_rate": 2.8346173656510266e-07, "loss": 0.2578, "step": 14240 }, { "epoch": 0.9, "grad_norm": 2.0177448908096074, "learning_rate": 2.8312378202020043e-07, "loss": 0.2609, "step": 14241 }, { "epoch": 0.9, "grad_norm": 1.7390656271983145, "learning_rate": 2.827860231868701e-07, "loss": 0.246, "step": 14242 }, { "epoch": 0.9, "grad_norm": 1.8694647885348494, "learning_rate": 2.824484600791261e-07, "loss": 0.2546, "step": 14243 }, { "epoch": 0.9, "grad_norm": 3.422264378514909, "learning_rate": 2.821110927109744e-07, "loss": 0.2517, "step": 14244 }, { "epoch": 0.9, "grad_norm": 2.0384834894355954, "learning_rate": 2.817739210964143e-07, "loss": 0.2479, "step": 14245 }, { "epoch": 0.9, "grad_norm": 2.198299849321342, "learning_rate": 2.814369452494348e-07, "loss": 0.2527, "step": 14246 }, { "epoch": 0.9, "grad_norm": 1.563116367906744, "learning_rate": 2.811001651840167e-07, "loss": 0.2415, "step": 14247 }, { "epoch": 0.9, "grad_norm": 3.3240036326220106, "learning_rate": 2.807635809141357e-07, "loss": 0.2539, "step": 14248 }, { "epoch": 0.9, "grad_norm": 1.7415728194434241, "learning_rate": 2.804271924537555e-07, "loss": 0.2681, "step": 14249 }, { "epoch": 0.9, "grad_norm": 1.9932981358469144, "learning_rate": 2.800909998168333e-07, "loss": 0.2923, "step": 14250 }, { "epoch": 0.9, "grad_norm": 2.173290918256281, "learning_rate": 2.797550030173196e-07, "loss": 0.2571, "step": 14251 }, { "epoch": 0.9, "grad_norm": 1.6016106518789888, "learning_rate": 2.7941920206915443e-07, "loss": 0.2537, "step": 14252 }, { "epoch": 0.9, "grad_norm": 1.9258956452904854, "learning_rate": 2.790835969862721e-07, "loss": 0.2591, "step": 14253 }, { "epoch": 0.9, "grad_norm": 1.5544338341820572, "learning_rate": 2.7874818778259647e-07, "loss": 0.2523, "step": 14254 }, { "epoch": 0.9, "grad_norm": 1.7179957423032626, "learning_rate": 2.7841297447204374e-07, "loss": 0.2558, "step": 14255 }, { "epoch": 0.9, "grad_norm": 0.6069787880469615, "learning_rate": 2.780779570685238e-07, "loss": 0.4768, "step": 14256 }, { "epoch": 0.9, "grad_norm": 1.9796644903922684, "learning_rate": 2.7774313558593667e-07, "loss": 0.2461, "step": 14257 }, { "epoch": 0.9, "grad_norm": 4.394635213376667, "learning_rate": 2.774085100381735e-07, "loss": 0.2583, "step": 14258 }, { "epoch": 0.9, "grad_norm": 2.704137124756935, "learning_rate": 2.770740804391209e-07, "loss": 0.2651, "step": 14259 }, { "epoch": 0.9, "grad_norm": 1.5746760808446791, "learning_rate": 2.767398468026522e-07, "loss": 0.2547, "step": 14260 }, { "epoch": 0.9, "grad_norm": 1.4546499789228757, "learning_rate": 2.764058091426375e-07, "loss": 0.2529, "step": 14261 }, { "epoch": 0.9, "grad_norm": 1.9139363708796495, "learning_rate": 2.760719674729362e-07, "loss": 0.2607, "step": 14262 }, { "epoch": 0.9, "grad_norm": 1.6077906748820867, "learning_rate": 2.757383218073983e-07, "loss": 0.2686, "step": 14263 }, { "epoch": 0.9, "grad_norm": 1.965502298767634, "learning_rate": 2.7540487215986956e-07, "loss": 0.2644, "step": 14264 }, { "epoch": 0.9, "grad_norm": 6.499242183724396, "learning_rate": 2.750716185441843e-07, "loss": 0.2774, "step": 14265 }, { "epoch": 0.9, "grad_norm": 1.7116661990277742, "learning_rate": 2.747385609741704e-07, "loss": 0.2505, "step": 14266 }, { "epoch": 0.9, "grad_norm": 1.5395731190456927, "learning_rate": 2.7440569946364624e-07, "loss": 0.2774, "step": 14267 }, { "epoch": 0.9, "grad_norm": 1.7632531684727295, "learning_rate": 2.7407303402642305e-07, "loss": 0.2357, "step": 14268 }, { "epoch": 0.9, "grad_norm": 4.288278037230518, "learning_rate": 2.737405646763042e-07, "loss": 0.2523, "step": 14269 }, { "epoch": 0.9, "grad_norm": 3.7551981683033855, "learning_rate": 2.7340829142708413e-07, "loss": 0.2626, "step": 14270 }, { "epoch": 0.9, "grad_norm": 4.598934700154759, "learning_rate": 2.730762142925492e-07, "loss": 0.2698, "step": 14271 }, { "epoch": 0.9, "grad_norm": 1.3412242311082365, "learning_rate": 2.727443332864782e-07, "loss": 0.2633, "step": 14272 }, { "epoch": 0.9, "grad_norm": 5.464517531893419, "learning_rate": 2.724126484226408e-07, "loss": 0.2382, "step": 14273 }, { "epoch": 0.9, "grad_norm": 1.4759007137241236, "learning_rate": 2.720811597148004e-07, "loss": 0.2479, "step": 14274 }, { "epoch": 0.9, "grad_norm": 25.146667317474584, "learning_rate": 2.7174986717670995e-07, "loss": 0.2456, "step": 14275 }, { "epoch": 0.9, "grad_norm": 1.3813183177187598, "learning_rate": 2.7141877082211664e-07, "loss": 0.2673, "step": 14276 }, { "epoch": 0.9, "grad_norm": 1.2713790345902896, "learning_rate": 2.710878706647574e-07, "loss": 0.2469, "step": 14277 }, { "epoch": 0.9, "grad_norm": 1.790607553768402, "learning_rate": 2.707571667183617e-07, "loss": 0.2391, "step": 14278 }, { "epoch": 0.9, "grad_norm": 0.5958337388918039, "learning_rate": 2.704266589966503e-07, "loss": 0.4604, "step": 14279 }, { "epoch": 0.9, "grad_norm": 2.13640474544427, "learning_rate": 2.700963475133378e-07, "loss": 0.2527, "step": 14280 }, { "epoch": 0.9, "grad_norm": 13.868114083564988, "learning_rate": 2.6976623228212984e-07, "loss": 0.2594, "step": 14281 }, { "epoch": 0.9, "grad_norm": 4.599182555203552, "learning_rate": 2.694363133167227e-07, "loss": 0.2476, "step": 14282 }, { "epoch": 0.9, "grad_norm": 1.4228390381837008, "learning_rate": 2.6910659063080436e-07, "loss": 0.2458, "step": 14283 }, { "epoch": 0.9, "grad_norm": 2.0723920804520524, "learning_rate": 2.687770642380577e-07, "loss": 0.2479, "step": 14284 }, { "epoch": 0.9, "grad_norm": 1.8751990552545472, "learning_rate": 2.684477341521541e-07, "loss": 0.2351, "step": 14285 }, { "epoch": 0.9, "grad_norm": 6.010661882498264, "learning_rate": 2.681186003867581e-07, "loss": 0.2362, "step": 14286 }, { "epoch": 0.9, "grad_norm": 3.0588857871857464, "learning_rate": 2.6778966295552546e-07, "loss": 0.2571, "step": 14287 }, { "epoch": 0.9, "grad_norm": 1.7254992247541738, "learning_rate": 2.674609218721053e-07, "loss": 0.2582, "step": 14288 }, { "epoch": 0.9, "grad_norm": 2.4153781255581883, "learning_rate": 2.6713237715013773e-07, "loss": 0.2272, "step": 14289 }, { "epoch": 0.9, "grad_norm": 2.188195995898477, "learning_rate": 2.668040288032547e-07, "loss": 0.2548, "step": 14290 }, { "epoch": 0.9, "grad_norm": 1.9173221933713778, "learning_rate": 2.664758768450787e-07, "loss": 0.2433, "step": 14291 }, { "epoch": 0.9, "grad_norm": 1.454731247284654, "learning_rate": 2.6614792128922704e-07, "loss": 0.2399, "step": 14292 }, { "epoch": 0.9, "grad_norm": 1.8180687209741582, "learning_rate": 2.6582016214930617e-07, "loss": 0.2396, "step": 14293 }, { "epoch": 0.9, "grad_norm": 4.232075975795993, "learning_rate": 2.6549259943891567e-07, "loss": 0.264, "step": 14294 }, { "epoch": 0.9, "grad_norm": 1.4984812271406946, "learning_rate": 2.6516523317164647e-07, "loss": 0.2487, "step": 14295 }, { "epoch": 0.9, "grad_norm": 1.888018384220596, "learning_rate": 2.648380633610814e-07, "loss": 0.2498, "step": 14296 }, { "epoch": 0.9, "grad_norm": 3.2812566108764694, "learning_rate": 2.645110900207959e-07, "loss": 0.2475, "step": 14297 }, { "epoch": 0.9, "grad_norm": 2.02868864682591, "learning_rate": 2.641843131643573e-07, "loss": 0.2654, "step": 14298 }, { "epoch": 0.9, "grad_norm": 6.377008929284228, "learning_rate": 2.638577328053221e-07, "loss": 0.2389, "step": 14299 }, { "epoch": 0.9, "grad_norm": 1.6874707178334174, "learning_rate": 2.6353134895724273e-07, "loss": 0.2376, "step": 14300 }, { "epoch": 0.9, "grad_norm": 0.5864689451606748, "learning_rate": 2.632051616336606e-07, "loss": 0.4357, "step": 14301 }, { "epoch": 0.9, "grad_norm": 1.7089429809272876, "learning_rate": 2.628791708481099e-07, "loss": 0.2554, "step": 14302 }, { "epoch": 0.9, "grad_norm": 1.9622773307672783, "learning_rate": 2.625533766141153e-07, "loss": 0.2371, "step": 14303 }, { "epoch": 0.9, "grad_norm": 0.5524344654306752, "learning_rate": 2.622277789451966e-07, "loss": 0.4532, "step": 14304 }, { "epoch": 0.9, "grad_norm": 10.304183526786705, "learning_rate": 2.6190237785486237e-07, "loss": 0.2519, "step": 14305 }, { "epoch": 0.9, "grad_norm": 2.68409037718728, "learning_rate": 2.615771733566147e-07, "loss": 0.2468, "step": 14306 }, { "epoch": 0.9, "grad_norm": 2.051235304864181, "learning_rate": 2.6125216546394605e-07, "loss": 0.2468, "step": 14307 }, { "epoch": 0.9, "grad_norm": 1.5563011857453501, "learning_rate": 2.609273541903423e-07, "loss": 0.2477, "step": 14308 }, { "epoch": 0.9, "grad_norm": 2.5047866240943875, "learning_rate": 2.606027395492805e-07, "loss": 0.2472, "step": 14309 }, { "epoch": 0.9, "grad_norm": 1.3888584474943344, "learning_rate": 2.6027832155422816e-07, "loss": 0.2334, "step": 14310 }, { "epoch": 0.9, "grad_norm": 19.57147551750055, "learning_rate": 2.599541002186479e-07, "loss": 0.2357, "step": 14311 }, { "epoch": 0.9, "grad_norm": 1.9039100663689588, "learning_rate": 2.5963007555599053e-07, "loss": 0.2385, "step": 14312 }, { "epoch": 0.9, "grad_norm": 3.9219853419890818, "learning_rate": 2.5930624757970205e-07, "loss": 0.2683, "step": 14313 }, { "epoch": 0.9, "grad_norm": 0.6234271727171482, "learning_rate": 2.5898261630321717e-07, "loss": 0.4833, "step": 14314 }, { "epoch": 0.9, "grad_norm": 1.272651886494399, "learning_rate": 2.5865918173996476e-07, "loss": 0.2535, "step": 14315 }, { "epoch": 0.9, "grad_norm": 2.1642482305498887, "learning_rate": 2.5833594390336447e-07, "loss": 0.2562, "step": 14316 }, { "epoch": 0.9, "grad_norm": 2.4491858440788827, "learning_rate": 2.580129028068273e-07, "loss": 0.2455, "step": 14317 }, { "epoch": 0.9, "grad_norm": 2.0427806683094007, "learning_rate": 2.576900584637582e-07, "loss": 0.2562, "step": 14318 }, { "epoch": 0.9, "grad_norm": 1.8198197369485298, "learning_rate": 2.5736741088755237e-07, "loss": 0.2501, "step": 14319 }, { "epoch": 0.9, "grad_norm": 1.7798821511568765, "learning_rate": 2.570449600915953e-07, "loss": 0.2625, "step": 14320 }, { "epoch": 0.9, "grad_norm": 1.810146008158133, "learning_rate": 2.5672270608926855e-07, "loss": 0.2701, "step": 14321 }, { "epoch": 0.9, "grad_norm": 3.6614336260490514, "learning_rate": 2.5640064889394133e-07, "loss": 0.2521, "step": 14322 }, { "epoch": 0.9, "grad_norm": 2.0944552441167548, "learning_rate": 2.560787885189758e-07, "loss": 0.2378, "step": 14323 }, { "epoch": 0.9, "grad_norm": 1.766888339721811, "learning_rate": 2.5575712497772855e-07, "loss": 0.238, "step": 14324 }, { "epoch": 0.9, "grad_norm": 1.3644201234443156, "learning_rate": 2.554356582835443e-07, "loss": 0.2379, "step": 14325 }, { "epoch": 0.9, "grad_norm": 1.6866521254325533, "learning_rate": 2.55114388449762e-07, "loss": 0.2405, "step": 14326 }, { "epoch": 0.9, "grad_norm": 1.5681223064819383, "learning_rate": 2.5479331548971197e-07, "loss": 0.2653, "step": 14327 }, { "epoch": 0.9, "grad_norm": 2.453224986863318, "learning_rate": 2.544724394167153e-07, "loss": 0.2489, "step": 14328 }, { "epoch": 0.9, "grad_norm": 3.2150220301972587, "learning_rate": 2.5415176024408685e-07, "loss": 0.2578, "step": 14329 }, { "epoch": 0.9, "grad_norm": 4.820358215394252, "learning_rate": 2.5383127798513094e-07, "loss": 0.2462, "step": 14330 }, { "epoch": 0.9, "grad_norm": 2.4161023122403655, "learning_rate": 2.535109926531448e-07, "loss": 0.2375, "step": 14331 }, { "epoch": 0.9, "grad_norm": 1.6473279939021952, "learning_rate": 2.531909042614189e-07, "loss": 0.2449, "step": 14332 }, { "epoch": 0.9, "grad_norm": 2.0198870517579244, "learning_rate": 2.528710128232337e-07, "loss": 0.2579, "step": 14333 }, { "epoch": 0.9, "grad_norm": 4.056495398028475, "learning_rate": 2.525513183518624e-07, "loss": 0.2614, "step": 14334 }, { "epoch": 0.9, "grad_norm": 3.9809781313122357, "learning_rate": 2.5223182086056897e-07, "loss": 0.2776, "step": 14335 }, { "epoch": 0.9, "grad_norm": 2.0434178773472356, "learning_rate": 2.5191252036260995e-07, "loss": 0.2606, "step": 14336 }, { "epoch": 0.9, "grad_norm": 24.372143294614947, "learning_rate": 2.5159341687123416e-07, "loss": 0.2412, "step": 14337 }, { "epoch": 0.9, "grad_norm": 2.2169340053483104, "learning_rate": 2.512745103996822e-07, "loss": 0.2494, "step": 14338 }, { "epoch": 0.9, "grad_norm": 3.239186597790997, "learning_rate": 2.5095580096118454e-07, "loss": 0.2634, "step": 14339 }, { "epoch": 0.9, "grad_norm": 1.440272853098814, "learning_rate": 2.506372885689662e-07, "loss": 0.2594, "step": 14340 }, { "epoch": 0.9, "grad_norm": 2.489070722551848, "learning_rate": 2.503189732362432e-07, "loss": 0.2652, "step": 14341 }, { "epoch": 0.9, "grad_norm": 1.5590398551133493, "learning_rate": 2.5000085497622227e-07, "loss": 0.255, "step": 14342 }, { "epoch": 0.9, "grad_norm": 2.299914823370509, "learning_rate": 2.496829338021028e-07, "loss": 0.2448, "step": 14343 }, { "epoch": 0.9, "grad_norm": 6.067053193087143, "learning_rate": 2.4936520972707487e-07, "loss": 0.2686, "step": 14344 }, { "epoch": 0.9, "grad_norm": 1.384261052662673, "learning_rate": 2.49047682764324e-07, "loss": 0.2452, "step": 14345 }, { "epoch": 0.9, "grad_norm": 1.5748765177490964, "learning_rate": 2.4873035292702243e-07, "loss": 0.2394, "step": 14346 }, { "epoch": 0.9, "grad_norm": 2.994306853704753, "learning_rate": 2.484132202283379e-07, "loss": 0.235, "step": 14347 }, { "epoch": 0.9, "grad_norm": 4.551821758825019, "learning_rate": 2.4809628468142834e-07, "loss": 0.2566, "step": 14348 }, { "epoch": 0.9, "grad_norm": 3.728590827808792, "learning_rate": 2.477795462994448e-07, "loss": 0.2514, "step": 14349 }, { "epoch": 0.9, "grad_norm": 1.7651862151945834, "learning_rate": 2.47463005095529e-07, "loss": 0.2599, "step": 14350 }, { "epoch": 0.9, "grad_norm": 1.5845281034009686, "learning_rate": 2.4714666108281436e-07, "loss": 0.2386, "step": 14351 }, { "epoch": 0.9, "grad_norm": 2.3639805165174725, "learning_rate": 2.468305142744259e-07, "loss": 0.2458, "step": 14352 }, { "epoch": 0.9, "grad_norm": 2.3358090310402124, "learning_rate": 2.465145646834832e-07, "loss": 0.2839, "step": 14353 }, { "epoch": 0.9, "grad_norm": 1.8326084014107749, "learning_rate": 2.4619881232309405e-07, "loss": 0.2583, "step": 14354 }, { "epoch": 0.9, "grad_norm": 3.716078901090518, "learning_rate": 2.458832572063591e-07, "loss": 0.2569, "step": 14355 }, { "epoch": 0.9, "grad_norm": 1.665638152776214, "learning_rate": 2.4556789934637226e-07, "loss": 0.2643, "step": 14356 }, { "epoch": 0.9, "grad_norm": 2.3691234592486214, "learning_rate": 2.452527387562187e-07, "loss": 0.264, "step": 14357 }, { "epoch": 0.9, "grad_norm": 1.6303826087677693, "learning_rate": 2.44937775448974e-07, "loss": 0.248, "step": 14358 }, { "epoch": 0.9, "grad_norm": 2.0313384949244813, "learning_rate": 2.446230094377067e-07, "loss": 0.2543, "step": 14359 }, { "epoch": 0.9, "grad_norm": 3.520181498866394, "learning_rate": 2.4430844073547786e-07, "loss": 0.2543, "step": 14360 }, { "epoch": 0.9, "grad_norm": 1.48213903885956, "learning_rate": 2.439940693553389e-07, "loss": 0.2608, "step": 14361 }, { "epoch": 0.9, "grad_norm": 1.6595244579834318, "learning_rate": 2.436798953103331e-07, "loss": 0.2354, "step": 14362 }, { "epoch": 0.9, "grad_norm": 2.821776732101652, "learning_rate": 2.4336591861349734e-07, "loss": 0.2395, "step": 14363 }, { "epoch": 0.9, "grad_norm": 2.6243041882966267, "learning_rate": 2.430521392778573e-07, "loss": 0.2387, "step": 14364 }, { "epoch": 0.9, "grad_norm": 2.224976937315625, "learning_rate": 2.4273855731643427e-07, "loss": 0.265, "step": 14365 }, { "epoch": 0.9, "grad_norm": 4.428468718138498, "learning_rate": 2.4242517274223776e-07, "loss": 0.2701, "step": 14366 }, { "epoch": 0.9, "grad_norm": 3.651647854495142, "learning_rate": 2.421119855682713e-07, "loss": 0.2388, "step": 14367 }, { "epoch": 0.9, "grad_norm": 2.1431795516245846, "learning_rate": 2.417989958075295e-07, "loss": 0.2473, "step": 14368 }, { "epoch": 0.9, "grad_norm": 1.736890072614024, "learning_rate": 2.414862034729998e-07, "loss": 0.2267, "step": 14369 }, { "epoch": 0.9, "grad_norm": 2.6582103219946127, "learning_rate": 2.4117360857765836e-07, "loss": 0.2711, "step": 14370 }, { "epoch": 0.9, "grad_norm": 1.9213955854946294, "learning_rate": 2.408612111344771e-07, "loss": 0.2475, "step": 14371 }, { "epoch": 0.9, "grad_norm": 1.453657492157642, "learning_rate": 2.4054901115641684e-07, "loss": 0.2423, "step": 14372 }, { "epoch": 0.9, "grad_norm": 2.2477633989523333, "learning_rate": 2.402370086564326e-07, "loss": 0.2734, "step": 14373 }, { "epoch": 0.9, "grad_norm": 3.1075849917982463, "learning_rate": 2.399252036474686e-07, "loss": 0.2374, "step": 14374 }, { "epoch": 0.9, "grad_norm": 0.5765891430791905, "learning_rate": 2.396135961424628e-07, "loss": 0.4522, "step": 14375 }, { "epoch": 0.9, "grad_norm": 2.3969949856910713, "learning_rate": 2.393021861543449e-07, "loss": 0.2587, "step": 14376 }, { "epoch": 0.9, "grad_norm": 2.2012000712684663, "learning_rate": 2.3899097369603385e-07, "loss": 0.2437, "step": 14377 }, { "epoch": 0.9, "grad_norm": 1.3542205146342408, "learning_rate": 2.38679958780445e-07, "loss": 0.2485, "step": 14378 }, { "epoch": 0.9, "grad_norm": 3.7431888350531586, "learning_rate": 2.3836914142048194e-07, "loss": 0.2611, "step": 14379 }, { "epoch": 0.9, "grad_norm": 1.878420594068702, "learning_rate": 2.3805852162903987e-07, "loss": 0.2539, "step": 14380 }, { "epoch": 0.9, "grad_norm": 2.0501687454298296, "learning_rate": 2.3774809941900844e-07, "loss": 0.2462, "step": 14381 }, { "epoch": 0.9, "grad_norm": 1.6814288297169528, "learning_rate": 2.3743787480326742e-07, "loss": 0.2493, "step": 14382 }, { "epoch": 0.9, "grad_norm": 1.4408187485143142, "learning_rate": 2.3712784779468756e-07, "loss": 0.2543, "step": 14383 }, { "epoch": 0.9, "grad_norm": 2.189193046287574, "learning_rate": 2.3681801840613362e-07, "loss": 0.2395, "step": 14384 }, { "epoch": 0.9, "grad_norm": 2.229798587352424, "learning_rate": 2.3650838665045972e-07, "loss": 0.2501, "step": 14385 }, { "epoch": 0.9, "grad_norm": 0.5913219911759955, "learning_rate": 2.361989525405145e-07, "loss": 0.5088, "step": 14386 }, { "epoch": 0.9, "grad_norm": 1.6949106012743453, "learning_rate": 2.3588971608913604e-07, "loss": 0.2478, "step": 14387 }, { "epoch": 0.9, "grad_norm": 1.6218761311965364, "learning_rate": 2.3558067730915513e-07, "loss": 0.265, "step": 14388 }, { "epoch": 0.9, "grad_norm": 0.5670724941813509, "learning_rate": 2.3527183621339434e-07, "loss": 0.4422, "step": 14389 }, { "epoch": 0.9, "grad_norm": 1.608523501003697, "learning_rate": 2.3496319281466895e-07, "loss": 0.242, "step": 14390 }, { "epoch": 0.91, "grad_norm": 1.733549344985571, "learning_rate": 2.346547471257832e-07, "loss": 0.2607, "step": 14391 }, { "epoch": 0.91, "grad_norm": 4.095912393550003, "learning_rate": 2.3434649915953623e-07, "loss": 0.2513, "step": 14392 }, { "epoch": 0.91, "grad_norm": 3.9514309466854387, "learning_rate": 2.3403844892871896e-07, "loss": 0.2403, "step": 14393 }, { "epoch": 0.91, "grad_norm": 2.280382700689735, "learning_rate": 2.337305964461112e-07, "loss": 0.2472, "step": 14394 }, { "epoch": 0.91, "grad_norm": 7.861369097382789, "learning_rate": 2.334229417244871e-07, "loss": 0.2915, "step": 14395 }, { "epoch": 0.91, "grad_norm": 1.7780582043153892, "learning_rate": 2.3311548477661038e-07, "loss": 0.2412, "step": 14396 }, { "epoch": 0.91, "grad_norm": 1.3023809363689742, "learning_rate": 2.3280822561524031e-07, "loss": 0.2448, "step": 14397 }, { "epoch": 0.91, "grad_norm": 1.3497213326356283, "learning_rate": 2.325011642531244e-07, "loss": 0.2253, "step": 14398 }, { "epoch": 0.91, "grad_norm": 2.0955837113376727, "learning_rate": 2.3219430070300254e-07, "loss": 0.2589, "step": 14399 }, { "epoch": 0.91, "grad_norm": 2.2183426439622114, "learning_rate": 2.3188763497760725e-07, "loss": 0.2486, "step": 14400 }, { "epoch": 0.91, "grad_norm": 8.473385434235075, "learning_rate": 2.3158116708966448e-07, "loss": 0.2436, "step": 14401 }, { "epoch": 0.91, "grad_norm": 2.461460164221812, "learning_rate": 2.31274897051888e-07, "loss": 0.2448, "step": 14402 }, { "epoch": 0.91, "grad_norm": 5.590503190490564, "learning_rate": 2.3096882487698703e-07, "loss": 0.2574, "step": 14403 }, { "epoch": 0.91, "grad_norm": 1.8828166767004388, "learning_rate": 2.306629505776592e-07, "loss": 0.2563, "step": 14404 }, { "epoch": 0.91, "grad_norm": 1.951661123735288, "learning_rate": 2.3035727416659769e-07, "loss": 0.2545, "step": 14405 }, { "epoch": 0.91, "grad_norm": 1.580776276640741, "learning_rate": 2.300517956564846e-07, "loss": 0.2396, "step": 14406 }, { "epoch": 0.91, "grad_norm": 0.6372326130218704, "learning_rate": 2.2974651505999425e-07, "loss": 0.4515, "step": 14407 }, { "epoch": 0.91, "grad_norm": 1.6914910927303681, "learning_rate": 2.2944143238979366e-07, "loss": 0.2527, "step": 14408 }, { "epoch": 0.91, "grad_norm": 1.5940336644161897, "learning_rate": 2.291365476585422e-07, "loss": 0.2505, "step": 14409 }, { "epoch": 0.91, "grad_norm": 2.0332868183136243, "learning_rate": 2.2883186087888977e-07, "loss": 0.2595, "step": 14410 }, { "epoch": 0.91, "grad_norm": 2.3147909450333746, "learning_rate": 2.2852737206347786e-07, "loss": 0.2482, "step": 14411 }, { "epoch": 0.91, "grad_norm": 2.6875666958481723, "learning_rate": 2.2822308122493976e-07, "loss": 0.2426, "step": 14412 }, { "epoch": 0.91, "grad_norm": 2.4211114418589434, "learning_rate": 2.2791898837590197e-07, "loss": 0.2424, "step": 14413 }, { "epoch": 0.91, "grad_norm": 2.3004597385252072, "learning_rate": 2.2761509352898114e-07, "loss": 0.2405, "step": 14414 }, { "epoch": 0.91, "grad_norm": 2.79546572072472, "learning_rate": 2.2731139669678714e-07, "loss": 0.2537, "step": 14415 }, { "epoch": 0.91, "grad_norm": 1.8991778276486342, "learning_rate": 2.2700789789192047e-07, "loss": 0.2639, "step": 14416 }, { "epoch": 0.91, "grad_norm": 2.1232139901719056, "learning_rate": 2.267045971269738e-07, "loss": 0.2575, "step": 14417 }, { "epoch": 0.91, "grad_norm": 6.485955926120402, "learning_rate": 2.2640149441453208e-07, "loss": 0.2552, "step": 14418 }, { "epoch": 0.91, "grad_norm": 1.736427901752236, "learning_rate": 2.2609858976717136e-07, "loss": 0.2514, "step": 14419 }, { "epoch": 0.91, "grad_norm": 3.1880579531991087, "learning_rate": 2.2579588319745883e-07, "loss": 0.232, "step": 14420 }, { "epoch": 0.91, "grad_norm": 1.815118180142573, "learning_rate": 2.2549337471795553e-07, "loss": 0.2424, "step": 14421 }, { "epoch": 0.91, "grad_norm": 2.3752905844804904, "learning_rate": 2.2519106434121252e-07, "loss": 0.2527, "step": 14422 }, { "epoch": 0.91, "grad_norm": 7.792289956779686, "learning_rate": 2.2488895207977312e-07, "loss": 0.2513, "step": 14423 }, { "epoch": 0.91, "grad_norm": 2.562866945134141, "learning_rate": 2.245870379461723e-07, "loss": 0.2743, "step": 14424 }, { "epoch": 0.91, "grad_norm": 1.6285545836109259, "learning_rate": 2.242853219529384e-07, "loss": 0.2343, "step": 14425 }, { "epoch": 0.91, "grad_norm": 6.25526385611361, "learning_rate": 2.2398380411258858e-07, "loss": 0.247, "step": 14426 }, { "epoch": 0.91, "grad_norm": 1.3273318668415506, "learning_rate": 2.2368248443763451e-07, "loss": 0.2348, "step": 14427 }, { "epoch": 0.91, "grad_norm": 2.2994044788467543, "learning_rate": 2.2338136294057677e-07, "loss": 0.2448, "step": 14428 }, { "epoch": 0.91, "grad_norm": 3.657924935810576, "learning_rate": 2.2308043963391034e-07, "loss": 0.2749, "step": 14429 }, { "epoch": 0.91, "grad_norm": 1.9342738710559804, "learning_rate": 2.2277971453012193e-07, "loss": 0.2678, "step": 14430 }, { "epoch": 0.91, "grad_norm": 2.58915421386696, "learning_rate": 2.2247918764168874e-07, "loss": 0.2363, "step": 14431 }, { "epoch": 0.91, "grad_norm": 1.3492375979172468, "learning_rate": 2.2217885898107915e-07, "loss": 0.2566, "step": 14432 }, { "epoch": 0.91, "grad_norm": 2.0956570689213967, "learning_rate": 2.2187872856075544e-07, "loss": 0.2487, "step": 14433 }, { "epoch": 0.91, "grad_norm": 2.3599289344086705, "learning_rate": 2.2157879639317038e-07, "loss": 0.2549, "step": 14434 }, { "epoch": 0.91, "grad_norm": 3.4080477810531007, "learning_rate": 2.2127906249076903e-07, "loss": 0.2407, "step": 14435 }, { "epoch": 0.91, "grad_norm": 7.772172873548713, "learning_rate": 2.209795268659859e-07, "loss": 0.2739, "step": 14436 }, { "epoch": 0.91, "grad_norm": 1.7411833702255235, "learning_rate": 2.2068018953125103e-07, "loss": 0.2594, "step": 14437 }, { "epoch": 0.91, "grad_norm": 3.220970600829254, "learning_rate": 2.203810504989845e-07, "loss": 0.2555, "step": 14438 }, { "epoch": 0.91, "grad_norm": 0.5872756190965056, "learning_rate": 2.2008210978159806e-07, "loss": 0.465, "step": 14439 }, { "epoch": 0.91, "grad_norm": 1.851639766650723, "learning_rate": 2.1978336739149454e-07, "loss": 0.2621, "step": 14440 }, { "epoch": 0.91, "grad_norm": 1.6739715102465174, "learning_rate": 2.1948482334106957e-07, "loss": 0.2453, "step": 14441 }, { "epoch": 0.91, "grad_norm": 2.050088892825613, "learning_rate": 2.1918647764271105e-07, "loss": 0.2378, "step": 14442 }, { "epoch": 0.91, "grad_norm": 1.8791823839520525, "learning_rate": 2.1888833030879685e-07, "loss": 0.2463, "step": 14443 }, { "epoch": 0.91, "grad_norm": 2.755452865345263, "learning_rate": 2.1859038135169764e-07, "loss": 0.2743, "step": 14444 }, { "epoch": 0.91, "grad_norm": 2.249296166630754, "learning_rate": 2.1829263078377683e-07, "loss": 0.259, "step": 14445 }, { "epoch": 0.91, "grad_norm": 1.645143216423926, "learning_rate": 2.179950786173879e-07, "loss": 0.2462, "step": 14446 }, { "epoch": 0.91, "grad_norm": 1.779318669753915, "learning_rate": 2.1769772486487705e-07, "loss": 0.2586, "step": 14447 }, { "epoch": 0.91, "grad_norm": 4.837384937670963, "learning_rate": 2.174005695385817e-07, "loss": 0.2718, "step": 14448 }, { "epoch": 0.91, "grad_norm": 3.052738596929044, "learning_rate": 2.171036126508319e-07, "loss": 0.2602, "step": 14449 }, { "epoch": 0.91, "grad_norm": 2.7928776179684216, "learning_rate": 2.1680685421394842e-07, "loss": 0.2711, "step": 14450 }, { "epoch": 0.91, "grad_norm": 2.4483639431258877, "learning_rate": 2.1651029424024417e-07, "loss": 0.2412, "step": 14451 }, { "epoch": 0.91, "grad_norm": 7.2787994474514255, "learning_rate": 2.1621393274202429e-07, "loss": 0.259, "step": 14452 }, { "epoch": 0.91, "grad_norm": 2.6261215902194026, "learning_rate": 2.1591776973158564e-07, "loss": 0.2349, "step": 14453 }, { "epoch": 0.91, "grad_norm": 1.6680651222154, "learning_rate": 2.1562180522121613e-07, "loss": 0.2498, "step": 14454 }, { "epoch": 0.91, "grad_norm": 1.8700943683892384, "learning_rate": 2.153260392231965e-07, "loss": 0.2511, "step": 14455 }, { "epoch": 0.91, "grad_norm": 1.567964666506948, "learning_rate": 2.1503047174979695e-07, "loss": 0.2715, "step": 14456 }, { "epoch": 0.91, "grad_norm": 4.614727062017292, "learning_rate": 2.147351028132827e-07, "loss": 0.2762, "step": 14457 }, { "epoch": 0.91, "grad_norm": 2.8235129338382885, "learning_rate": 2.144399324259089e-07, "loss": 0.2731, "step": 14458 }, { "epoch": 0.91, "grad_norm": 2.5803840532779754, "learning_rate": 2.1414496059992183e-07, "loss": 0.2477, "step": 14459 }, { "epoch": 0.91, "grad_norm": 2.0769844778878053, "learning_rate": 2.138501873475607e-07, "loss": 0.2418, "step": 14460 }, { "epoch": 0.91, "grad_norm": 2.7474217544896757, "learning_rate": 2.135556126810573e-07, "loss": 0.241, "step": 14461 }, { "epoch": 0.91, "grad_norm": 2.3863284081175946, "learning_rate": 2.13261236612633e-07, "loss": 0.2686, "step": 14462 }, { "epoch": 0.91, "grad_norm": 0.6041412725538277, "learning_rate": 2.1296705915450244e-07, "loss": 0.488, "step": 14463 }, { "epoch": 0.91, "grad_norm": 1.34452761724846, "learning_rate": 2.1267308031887036e-07, "loss": 0.237, "step": 14464 }, { "epoch": 0.91, "grad_norm": 3.4947442283899726, "learning_rate": 2.123793001179364e-07, "loss": 0.2694, "step": 14465 }, { "epoch": 0.91, "grad_norm": 3.26729705768746, "learning_rate": 2.1208571856388915e-07, "loss": 0.2596, "step": 14466 }, { "epoch": 0.91, "grad_norm": 1.775837042839868, "learning_rate": 2.117923356689089e-07, "loss": 0.2589, "step": 14467 }, { "epoch": 0.91, "grad_norm": 2.5906264427923897, "learning_rate": 2.114991514451692e-07, "loss": 0.2517, "step": 14468 }, { "epoch": 0.91, "grad_norm": 0.6102118575831724, "learning_rate": 2.112061659048359e-07, "loss": 0.4594, "step": 14469 }, { "epoch": 0.91, "grad_norm": 3.4509361422530795, "learning_rate": 2.109133790600648e-07, "loss": 0.2598, "step": 14470 }, { "epoch": 0.91, "grad_norm": 4.615792194612049, "learning_rate": 2.1062079092300402e-07, "loss": 0.2468, "step": 14471 }, { "epoch": 0.91, "grad_norm": 2.0041096957855564, "learning_rate": 2.103284015057927e-07, "loss": 0.2534, "step": 14472 }, { "epoch": 0.91, "grad_norm": 2.2664870866984965, "learning_rate": 2.1003621082056392e-07, "loss": 0.2418, "step": 14473 }, { "epoch": 0.91, "grad_norm": 2.3768270188270497, "learning_rate": 2.097442188794402e-07, "loss": 0.2837, "step": 14474 }, { "epoch": 0.91, "grad_norm": 1.427340659654771, "learning_rate": 2.0945242569453795e-07, "loss": 0.253, "step": 14475 }, { "epoch": 0.91, "grad_norm": 2.5161216053208117, "learning_rate": 2.091608312779625e-07, "loss": 0.2401, "step": 14476 }, { "epoch": 0.91, "grad_norm": 2.675503374474496, "learning_rate": 2.088694356418147e-07, "loss": 0.2688, "step": 14477 }, { "epoch": 0.91, "grad_norm": 1.8923580657315366, "learning_rate": 2.0857823879818384e-07, "loss": 0.2454, "step": 14478 }, { "epoch": 0.91, "grad_norm": 2.3759402065898474, "learning_rate": 2.0828724075915298e-07, "loss": 0.2556, "step": 14479 }, { "epoch": 0.91, "grad_norm": 2.476388684288924, "learning_rate": 2.079964415367941e-07, "loss": 0.2338, "step": 14480 }, { "epoch": 0.91, "grad_norm": 2.4283349735457755, "learning_rate": 2.0770584114317483e-07, "loss": 0.2636, "step": 14481 }, { "epoch": 0.91, "grad_norm": 1.6486641676674842, "learning_rate": 2.074154395903527e-07, "loss": 0.2402, "step": 14482 }, { "epoch": 0.91, "grad_norm": 1.6213172513410736, "learning_rate": 2.0712523689037645e-07, "loss": 0.2556, "step": 14483 }, { "epoch": 0.91, "grad_norm": 1.8537371799773197, "learning_rate": 2.0683523305528696e-07, "loss": 0.24, "step": 14484 }, { "epoch": 0.91, "grad_norm": 2.7785473319886127, "learning_rate": 2.0654542809711798e-07, "loss": 0.2585, "step": 14485 }, { "epoch": 0.91, "grad_norm": 1.7651719366311103, "learning_rate": 2.0625582202789317e-07, "loss": 0.2319, "step": 14486 }, { "epoch": 0.91, "grad_norm": 2.1273793923211395, "learning_rate": 2.0596641485962854e-07, "loss": 0.2457, "step": 14487 }, { "epoch": 0.91, "grad_norm": 4.21819875878106, "learning_rate": 2.056772066043322e-07, "loss": 0.2698, "step": 14488 }, { "epoch": 0.91, "grad_norm": 1.5709435104374834, "learning_rate": 2.053881972740046e-07, "loss": 0.2511, "step": 14489 }, { "epoch": 0.91, "grad_norm": 1.8761287437363046, "learning_rate": 2.0509938688063723e-07, "loss": 0.2411, "step": 14490 }, { "epoch": 0.91, "grad_norm": 1.9211021468018759, "learning_rate": 2.0481077543621275e-07, "loss": 0.2368, "step": 14491 }, { "epoch": 0.91, "grad_norm": 1.5115448951643704, "learning_rate": 2.04522362952706e-07, "loss": 0.2417, "step": 14492 }, { "epoch": 0.91, "grad_norm": 5.088136531250573, "learning_rate": 2.0423414944208464e-07, "loss": 0.2441, "step": 14493 }, { "epoch": 0.91, "grad_norm": 2.087766755890298, "learning_rate": 2.0394613491630688e-07, "loss": 0.2645, "step": 14494 }, { "epoch": 0.91, "grad_norm": 2.107246310436187, "learning_rate": 2.036583193873226e-07, "loss": 0.2588, "step": 14495 }, { "epoch": 0.91, "grad_norm": 2.204240909998728, "learning_rate": 2.0337070286707283e-07, "loss": 0.2476, "step": 14496 }, { "epoch": 0.91, "grad_norm": 4.413546846926048, "learning_rate": 2.0308328536749355e-07, "loss": 0.2509, "step": 14497 }, { "epoch": 0.91, "grad_norm": 1.7561510964926772, "learning_rate": 2.0279606690050856e-07, "loss": 0.2469, "step": 14498 }, { "epoch": 0.91, "grad_norm": 8.338753213289381, "learning_rate": 2.0250904747803614e-07, "loss": 0.2612, "step": 14499 }, { "epoch": 0.91, "grad_norm": 2.0824586160643634, "learning_rate": 2.0222222711198392e-07, "loss": 0.2393, "step": 14500 }, { "epoch": 0.91, "grad_norm": 2.007315300107275, "learning_rate": 2.0193560581425408e-07, "loss": 0.2318, "step": 14501 }, { "epoch": 0.91, "grad_norm": 1.9865525920497915, "learning_rate": 2.0164918359673769e-07, "loss": 0.2509, "step": 14502 }, { "epoch": 0.91, "grad_norm": 1.7197148031101857, "learning_rate": 2.013629604713202e-07, "loss": 0.2382, "step": 14503 }, { "epoch": 0.91, "grad_norm": 1.8947838501290832, "learning_rate": 2.0107693644987492e-07, "loss": 0.254, "step": 14504 }, { "epoch": 0.91, "grad_norm": 4.902731433394685, "learning_rate": 2.007911115442729e-07, "loss": 0.2588, "step": 14505 }, { "epoch": 0.91, "grad_norm": 2.289651256926037, "learning_rate": 2.005054857663724e-07, "loss": 0.2502, "step": 14506 }, { "epoch": 0.91, "grad_norm": 2.186328093635684, "learning_rate": 2.0022005912802345e-07, "loss": 0.2609, "step": 14507 }, { "epoch": 0.91, "grad_norm": 5.237839020173491, "learning_rate": 1.999348316410693e-07, "loss": 0.2598, "step": 14508 }, { "epoch": 0.91, "grad_norm": 1.3844167421565585, "learning_rate": 1.9964980331734552e-07, "loss": 0.2406, "step": 14509 }, { "epoch": 0.91, "grad_norm": 0.5850318063128662, "learning_rate": 1.9936497416867816e-07, "loss": 0.4568, "step": 14510 }, { "epoch": 0.91, "grad_norm": 1.7980759564399063, "learning_rate": 1.9908034420688394e-07, "loss": 0.266, "step": 14511 }, { "epoch": 0.91, "grad_norm": 1.731553582372513, "learning_rate": 1.9879591344377335e-07, "loss": 0.2584, "step": 14512 }, { "epoch": 0.91, "grad_norm": 1.4881645457346415, "learning_rate": 1.9851168189114923e-07, "loss": 0.2767, "step": 14513 }, { "epoch": 0.91, "grad_norm": 1.590704362330037, "learning_rate": 1.9822764956080375e-07, "loss": 0.2573, "step": 14514 }, { "epoch": 0.91, "grad_norm": 1.706228859345715, "learning_rate": 1.9794381646452198e-07, "loss": 0.2427, "step": 14515 }, { "epoch": 0.91, "grad_norm": 1.93471096274332, "learning_rate": 1.9766018261408005e-07, "loss": 0.2557, "step": 14516 }, { "epoch": 0.91, "grad_norm": 4.9366506521476845, "learning_rate": 1.973767480212474e-07, "loss": 0.255, "step": 14517 }, { "epoch": 0.91, "grad_norm": 2.2290112195682714, "learning_rate": 1.9709351269778408e-07, "loss": 0.272, "step": 14518 }, { "epoch": 0.91, "grad_norm": 3.3340555024224874, "learning_rate": 1.9681047665544127e-07, "loss": 0.2726, "step": 14519 }, { "epoch": 0.91, "grad_norm": 1.747504705651108, "learning_rate": 1.9652763990596345e-07, "loss": 0.2516, "step": 14520 }, { "epoch": 0.91, "grad_norm": 2.562454675871726, "learning_rate": 1.9624500246108625e-07, "loss": 0.2443, "step": 14521 }, { "epoch": 0.91, "grad_norm": 2.5122784108971654, "learning_rate": 1.9596256433253635e-07, "loss": 0.2681, "step": 14522 }, { "epoch": 0.91, "grad_norm": 2.092076801555144, "learning_rate": 1.956803255320322e-07, "loss": 0.2794, "step": 14523 }, { "epoch": 0.91, "grad_norm": 1.704990015234847, "learning_rate": 1.953982860712844e-07, "loss": 0.2503, "step": 14524 }, { "epoch": 0.91, "grad_norm": 0.6040679585377352, "learning_rate": 1.9511644596199643e-07, "loss": 0.4704, "step": 14525 }, { "epoch": 0.91, "grad_norm": 3.2386558262934875, "learning_rate": 1.9483480521586107e-07, "loss": 0.2623, "step": 14526 }, { "epoch": 0.91, "grad_norm": 0.6204712726676649, "learning_rate": 1.945533638445646e-07, "loss": 0.4777, "step": 14527 }, { "epoch": 0.91, "grad_norm": 2.6546399832572103, "learning_rate": 1.9427212185978428e-07, "loss": 0.2554, "step": 14528 }, { "epoch": 0.91, "grad_norm": 3.868847229193744, "learning_rate": 1.9399107927319028e-07, "loss": 0.2563, "step": 14529 }, { "epoch": 0.91, "grad_norm": 2.100870480623982, "learning_rate": 1.9371023609644268e-07, "loss": 0.2759, "step": 14530 }, { "epoch": 0.91, "grad_norm": 1.7400093479151766, "learning_rate": 1.9342959234119385e-07, "loss": 0.2337, "step": 14531 }, { "epoch": 0.91, "grad_norm": 1.7265859632734524, "learning_rate": 1.931491480190889e-07, "loss": 0.2514, "step": 14532 }, { "epoch": 0.91, "grad_norm": 2.2983224300409826, "learning_rate": 1.9286890314176353e-07, "loss": 0.2502, "step": 14533 }, { "epoch": 0.91, "grad_norm": 2.2969022201778935, "learning_rate": 1.9258885772084567e-07, "loss": 0.2416, "step": 14534 }, { "epoch": 0.91, "grad_norm": 7.508959980294738, "learning_rate": 1.9230901176795548e-07, "loss": 0.2665, "step": 14535 }, { "epoch": 0.91, "grad_norm": 3.582752042354064, "learning_rate": 1.9202936529470363e-07, "loss": 0.2625, "step": 14536 }, { "epoch": 0.91, "grad_norm": 2.081445959892226, "learning_rate": 1.9174991831269362e-07, "loss": 0.2679, "step": 14537 }, { "epoch": 0.91, "grad_norm": 1.287830484110232, "learning_rate": 1.9147067083351954e-07, "loss": 0.256, "step": 14538 }, { "epoch": 0.91, "grad_norm": 1.5175800942879811, "learning_rate": 1.911916228687688e-07, "loss": 0.2376, "step": 14539 }, { "epoch": 0.91, "grad_norm": 1.4282350306356104, "learning_rate": 1.909127744300182e-07, "loss": 0.2453, "step": 14540 }, { "epoch": 0.91, "grad_norm": 2.5090376650146013, "learning_rate": 1.9063412552883852e-07, "loss": 0.2629, "step": 14541 }, { "epoch": 0.91, "grad_norm": 3.2789070229309223, "learning_rate": 1.9035567617679163e-07, "loss": 0.2532, "step": 14542 }, { "epoch": 0.91, "grad_norm": 2.200914609783236, "learning_rate": 1.9007742638543104e-07, "loss": 0.2634, "step": 14543 }, { "epoch": 0.91, "grad_norm": 1.4406926563074134, "learning_rate": 1.8979937616630084e-07, "loss": 0.2368, "step": 14544 }, { "epoch": 0.91, "grad_norm": 1.9354443070109852, "learning_rate": 1.8952152553093906e-07, "loss": 0.24, "step": 14545 }, { "epoch": 0.91, "grad_norm": 2.382994471291222, "learning_rate": 1.892438744908731e-07, "loss": 0.2618, "step": 14546 }, { "epoch": 0.91, "grad_norm": 2.4377555631615566, "learning_rate": 1.889664230576238e-07, "loss": 0.251, "step": 14547 }, { "epoch": 0.91, "grad_norm": 2.6563752045766305, "learning_rate": 1.8868917124270248e-07, "loss": 0.2372, "step": 14548 }, { "epoch": 0.91, "grad_norm": 1.7992839791383575, "learning_rate": 1.8841211905761326e-07, "loss": 0.2531, "step": 14549 }, { "epoch": 0.92, "grad_norm": 1.48632246323107, "learning_rate": 1.881352665138525e-07, "loss": 0.2559, "step": 14550 }, { "epoch": 0.92, "grad_norm": 2.2703408625191157, "learning_rate": 1.8785861362290603e-07, "loss": 0.2454, "step": 14551 }, { "epoch": 0.92, "grad_norm": 2.1449732033066944, "learning_rate": 1.8758216039625243e-07, "loss": 0.263, "step": 14552 }, { "epoch": 0.92, "grad_norm": 2.95268284637273, "learning_rate": 1.873059068453631e-07, "loss": 0.2396, "step": 14553 }, { "epoch": 0.92, "grad_norm": 2.5756193425105325, "learning_rate": 1.870298529817005e-07, "loss": 0.2581, "step": 14554 }, { "epoch": 0.92, "grad_norm": 4.413862692424448, "learning_rate": 1.8675399881671774e-07, "loss": 0.2625, "step": 14555 }, { "epoch": 0.92, "grad_norm": 1.978311636808432, "learning_rate": 1.8647834436186009e-07, "loss": 0.2501, "step": 14556 }, { "epoch": 0.92, "grad_norm": 3.0528327277025133, "learning_rate": 1.862028896285667e-07, "loss": 0.2568, "step": 14557 }, { "epoch": 0.92, "grad_norm": 1.7813793578847024, "learning_rate": 1.859276346282657e-07, "loss": 0.2491, "step": 14558 }, { "epoch": 0.92, "grad_norm": 1.5221083090039145, "learning_rate": 1.856525793723779e-07, "loss": 0.2474, "step": 14559 }, { "epoch": 0.92, "grad_norm": 2.390504163933587, "learning_rate": 1.853777238723148e-07, "loss": 0.2787, "step": 14560 }, { "epoch": 0.92, "grad_norm": 5.200196410811054, "learning_rate": 1.8510306813948276e-07, "loss": 0.241, "step": 14561 }, { "epoch": 0.92, "grad_norm": 2.498783783971337, "learning_rate": 1.848286121852766e-07, "loss": 0.2649, "step": 14562 }, { "epoch": 0.92, "grad_norm": 1.679129399596893, "learning_rate": 1.8455435602108328e-07, "loss": 0.2649, "step": 14563 }, { "epoch": 0.92, "grad_norm": 1.9345832226516606, "learning_rate": 1.8428029965828265e-07, "loss": 0.2578, "step": 14564 }, { "epoch": 0.92, "grad_norm": 5.894186801501865, "learning_rate": 1.8400644310824722e-07, "loss": 0.2427, "step": 14565 }, { "epoch": 0.92, "grad_norm": 3.265496929716585, "learning_rate": 1.8373278638233848e-07, "loss": 0.2575, "step": 14566 }, { "epoch": 0.92, "grad_norm": 5.193911921550233, "learning_rate": 1.834593294919107e-07, "loss": 0.2396, "step": 14567 }, { "epoch": 0.92, "grad_norm": 5.398539465303125, "learning_rate": 1.8318607244831033e-07, "loss": 0.2497, "step": 14568 }, { "epoch": 0.92, "grad_norm": 4.081467693048924, "learning_rate": 1.8291301526287609e-07, "loss": 0.2617, "step": 14569 }, { "epoch": 0.92, "grad_norm": 1.7714467938424623, "learning_rate": 1.8264015794693668e-07, "loss": 0.269, "step": 14570 }, { "epoch": 0.92, "grad_norm": 1.8395494084039425, "learning_rate": 1.8236750051181306e-07, "loss": 0.249, "step": 14571 }, { "epoch": 0.92, "grad_norm": 6.3142334681720875, "learning_rate": 1.8209504296881896e-07, "loss": 0.2662, "step": 14572 }, { "epoch": 0.92, "grad_norm": 2.3435171338601832, "learning_rate": 1.818227853292598e-07, "loss": 0.2555, "step": 14573 }, { "epoch": 0.92, "grad_norm": 3.7046417545149053, "learning_rate": 1.8155072760443149e-07, "loss": 0.2416, "step": 14574 }, { "epoch": 0.92, "grad_norm": 1.7680273985641055, "learning_rate": 1.812788698056217e-07, "loss": 0.2514, "step": 14575 }, { "epoch": 0.92, "grad_norm": 2.417788014275917, "learning_rate": 1.810072119441103e-07, "loss": 0.2207, "step": 14576 }, { "epoch": 0.92, "grad_norm": 2.4467649856462055, "learning_rate": 1.8073575403116938e-07, "loss": 0.261, "step": 14577 }, { "epoch": 0.92, "grad_norm": 1.5405873106651053, "learning_rate": 1.804644960780616e-07, "loss": 0.2691, "step": 14578 }, { "epoch": 0.92, "grad_norm": 2.2291964329191467, "learning_rate": 1.8019343809604295e-07, "loss": 0.2568, "step": 14579 }, { "epoch": 0.92, "grad_norm": 2.00943408163364, "learning_rate": 1.7992258009635942e-07, "loss": 0.2352, "step": 14580 }, { "epoch": 0.92, "grad_norm": 2.2836406128994153, "learning_rate": 1.7965192209024928e-07, "loss": 0.2456, "step": 14581 }, { "epoch": 0.92, "grad_norm": 1.832438125777982, "learning_rate": 1.7938146408894296e-07, "loss": 0.2412, "step": 14582 }, { "epoch": 0.92, "grad_norm": 1.929832837409836, "learning_rate": 1.7911120610366262e-07, "loss": 0.2451, "step": 14583 }, { "epoch": 0.92, "grad_norm": 1.6513242660518177, "learning_rate": 1.7884114814561983e-07, "loss": 0.2472, "step": 14584 }, { "epoch": 0.92, "grad_norm": 3.1905309233386983, "learning_rate": 1.7857129022602226e-07, "loss": 0.2466, "step": 14585 }, { "epoch": 0.92, "grad_norm": 1.4813312295210759, "learning_rate": 1.783016323560649e-07, "loss": 0.2347, "step": 14586 }, { "epoch": 0.92, "grad_norm": 2.563174579858422, "learning_rate": 1.7803217454693766e-07, "loss": 0.2482, "step": 14587 }, { "epoch": 0.92, "grad_norm": 1.9113023448252877, "learning_rate": 1.7776291680981995e-07, "loss": 0.2412, "step": 14588 }, { "epoch": 0.92, "grad_norm": 2.2786178310528573, "learning_rate": 1.7749385915588446e-07, "loss": 0.2415, "step": 14589 }, { "epoch": 0.92, "grad_norm": 29.05300778970683, "learning_rate": 1.7722500159629452e-07, "loss": 0.2653, "step": 14590 }, { "epoch": 0.92, "grad_norm": 4.290002453105786, "learning_rate": 1.769563441422051e-07, "loss": 0.2697, "step": 14591 }, { "epoch": 0.92, "grad_norm": 2.092070753675626, "learning_rate": 1.7668788680476334e-07, "loss": 0.2562, "step": 14592 }, { "epoch": 0.92, "grad_norm": 2.3838436164833396, "learning_rate": 1.7641962959510872e-07, "loss": 0.252, "step": 14593 }, { "epoch": 0.92, "grad_norm": 2.3092892132355134, "learning_rate": 1.7615157252437121e-07, "loss": 0.2522, "step": 14594 }, { "epoch": 0.92, "grad_norm": 1.8603278509510393, "learning_rate": 1.7588371560367357e-07, "loss": 0.253, "step": 14595 }, { "epoch": 0.92, "grad_norm": 1.3223387568769163, "learning_rate": 1.7561605884412858e-07, "loss": 0.2407, "step": 14596 }, { "epoch": 0.92, "grad_norm": 1.5872890972249871, "learning_rate": 1.7534860225684292e-07, "loss": 0.2713, "step": 14597 }, { "epoch": 0.92, "grad_norm": 4.67526615629178, "learning_rate": 1.7508134585291325e-07, "loss": 0.2395, "step": 14598 }, { "epoch": 0.92, "grad_norm": 1.986689727135611, "learning_rate": 1.748142896434285e-07, "loss": 0.2468, "step": 14599 }, { "epoch": 0.92, "grad_norm": 2.0775548200314744, "learning_rate": 1.7454743363946868e-07, "loss": 0.249, "step": 14600 }, { "epoch": 0.92, "grad_norm": 2.308339774678808, "learning_rate": 1.7428077785210663e-07, "loss": 0.2538, "step": 14601 }, { "epoch": 0.92, "grad_norm": 1.9148666821890743, "learning_rate": 1.7401432229240733e-07, "loss": 0.252, "step": 14602 }, { "epoch": 0.92, "grad_norm": 2.2472774814567003, "learning_rate": 1.7374806697142588e-07, "loss": 0.2507, "step": 14603 }, { "epoch": 0.92, "grad_norm": 2.1864405904788144, "learning_rate": 1.7348201190020897e-07, "loss": 0.2447, "step": 14604 }, { "epoch": 0.92, "grad_norm": 2.1737520105448525, "learning_rate": 1.7321615708979666e-07, "loss": 0.2357, "step": 14605 }, { "epoch": 0.92, "grad_norm": 1.8597862814083623, "learning_rate": 1.729505025512196e-07, "loss": 0.2456, "step": 14606 }, { "epoch": 0.92, "grad_norm": 1.7164992487378634, "learning_rate": 1.726850482954995e-07, "loss": 0.2436, "step": 14607 }, { "epoch": 0.92, "grad_norm": 6.306614413990062, "learning_rate": 1.7241979433365086e-07, "loss": 0.2708, "step": 14608 }, { "epoch": 0.92, "grad_norm": 1.5303694058213413, "learning_rate": 1.721547406766799e-07, "loss": 0.2535, "step": 14609 }, { "epoch": 0.92, "grad_norm": 1.5516930701767238, "learning_rate": 1.7188988733558397e-07, "loss": 0.2565, "step": 14610 }, { "epoch": 0.92, "grad_norm": 2.152221530150172, "learning_rate": 1.7162523432135258e-07, "loss": 0.2453, "step": 14611 }, { "epoch": 0.92, "grad_norm": 1.9981888361709716, "learning_rate": 1.7136078164496585e-07, "loss": 0.2567, "step": 14612 }, { "epoch": 0.92, "grad_norm": 3.4227979480329496, "learning_rate": 1.7109652931739717e-07, "loss": 0.2439, "step": 14613 }, { "epoch": 0.92, "grad_norm": 2.0908760732808362, "learning_rate": 1.7083247734961117e-07, "loss": 0.2493, "step": 14614 }, { "epoch": 0.92, "grad_norm": 1.599814462824902, "learning_rate": 1.705686257525624e-07, "loss": 0.2726, "step": 14615 }, { "epoch": 0.92, "grad_norm": 1.6584689323598625, "learning_rate": 1.703049745371993e-07, "loss": 0.2503, "step": 14616 }, { "epoch": 0.92, "grad_norm": 4.167687084073127, "learning_rate": 1.7004152371446146e-07, "loss": 0.2372, "step": 14617 }, { "epoch": 0.92, "grad_norm": 1.3676841305083616, "learning_rate": 1.6977827329528017e-07, "loss": 0.2458, "step": 14618 }, { "epoch": 0.92, "grad_norm": 3.2506013133346543, "learning_rate": 1.6951522329057778e-07, "loss": 0.2397, "step": 14619 }, { "epoch": 0.92, "grad_norm": 2.640926651918078, "learning_rate": 1.6925237371126835e-07, "loss": 0.2512, "step": 14620 }, { "epoch": 0.92, "grad_norm": 2.0368443257180364, "learning_rate": 1.689897245682587e-07, "loss": 0.2287, "step": 14621 }, { "epoch": 0.92, "grad_norm": 3.0133313326001674, "learning_rate": 1.687272758724462e-07, "loss": 0.2415, "step": 14622 }, { "epoch": 0.92, "grad_norm": 2.4267649921275667, "learning_rate": 1.6846502763471939e-07, "loss": 0.2583, "step": 14623 }, { "epoch": 0.92, "grad_norm": 1.3878008863385654, "learning_rate": 1.682029798659618e-07, "loss": 0.268, "step": 14624 }, { "epoch": 0.92, "grad_norm": 1.4831645260225281, "learning_rate": 1.679411325770436e-07, "loss": 0.2292, "step": 14625 }, { "epoch": 0.92, "grad_norm": 4.735729228860837, "learning_rate": 1.6767948577883109e-07, "loss": 0.2554, "step": 14626 }, { "epoch": 0.92, "grad_norm": 2.0309999751510386, "learning_rate": 1.6741803948218005e-07, "loss": 0.2595, "step": 14627 }, { "epoch": 0.92, "grad_norm": 3.356528461492822, "learning_rate": 1.6715679369793737e-07, "loss": 0.2595, "step": 14628 }, { "epoch": 0.92, "grad_norm": 1.275932866756124, "learning_rate": 1.6689574843694433e-07, "loss": 0.261, "step": 14629 }, { "epoch": 0.92, "grad_norm": 2.3970316663035125, "learning_rate": 1.6663490371003122e-07, "loss": 0.2604, "step": 14630 }, { "epoch": 0.92, "grad_norm": 3.076284490200964, "learning_rate": 1.663742595280199e-07, "loss": 0.2542, "step": 14631 }, { "epoch": 0.92, "grad_norm": 4.965953426197452, "learning_rate": 1.661138159017267e-07, "loss": 0.2699, "step": 14632 }, { "epoch": 0.92, "grad_norm": 2.1067068405456113, "learning_rate": 1.6585357284195748e-07, "loss": 0.2421, "step": 14633 }, { "epoch": 0.92, "grad_norm": 1.3118731101592522, "learning_rate": 1.6559353035950964e-07, "loss": 0.2449, "step": 14634 }, { "epoch": 0.92, "grad_norm": 2.0456770023365625, "learning_rate": 1.653336884651735e-07, "loss": 0.2421, "step": 14635 }, { "epoch": 0.92, "grad_norm": 2.2109111172764475, "learning_rate": 1.650740471697293e-07, "loss": 0.2572, "step": 14636 }, { "epoch": 0.92, "grad_norm": 2.8522415018301728, "learning_rate": 1.648146064839512e-07, "loss": 0.2899, "step": 14637 }, { "epoch": 0.92, "grad_norm": 2.0267974383100484, "learning_rate": 1.6455536641860282e-07, "loss": 0.2638, "step": 14638 }, { "epoch": 0.92, "grad_norm": 2.106440965143541, "learning_rate": 1.6429632698444165e-07, "loss": 0.2435, "step": 14639 }, { "epoch": 0.92, "grad_norm": 2.92420689986729, "learning_rate": 1.6403748819221464e-07, "loss": 0.2491, "step": 14640 }, { "epoch": 0.92, "grad_norm": 1.6832401826812362, "learning_rate": 1.6377885005266214e-07, "loss": 0.246, "step": 14641 }, { "epoch": 0.92, "grad_norm": 1.5134706548275112, "learning_rate": 1.6352041257651496e-07, "loss": 0.2451, "step": 14642 }, { "epoch": 0.92, "grad_norm": 1.9732603558655655, "learning_rate": 1.6326217577449677e-07, "loss": 0.2621, "step": 14643 }, { "epoch": 0.92, "grad_norm": 1.5648125019183925, "learning_rate": 1.6300413965732176e-07, "loss": 0.2573, "step": 14644 }, { "epoch": 0.92, "grad_norm": 2.2224410022222307, "learning_rate": 1.6274630423569582e-07, "loss": 0.2426, "step": 14645 }, { "epoch": 0.92, "grad_norm": 3.098652634617864, "learning_rate": 1.6248866952031816e-07, "loss": 0.2636, "step": 14646 }, { "epoch": 0.92, "grad_norm": 1.9518050289061362, "learning_rate": 1.622312355218786e-07, "loss": 0.2593, "step": 14647 }, { "epoch": 0.92, "grad_norm": 2.2932777745542174, "learning_rate": 1.6197400225105686e-07, "loss": 0.246, "step": 14648 }, { "epoch": 0.92, "grad_norm": 1.716278503850628, "learning_rate": 1.617169697185278e-07, "loss": 0.2448, "step": 14649 }, { "epoch": 0.92, "grad_norm": 3.036732537531056, "learning_rate": 1.6146013793495564e-07, "loss": 0.2515, "step": 14650 }, { "epoch": 0.92, "grad_norm": 2.430620711198442, "learning_rate": 1.612035069109963e-07, "loss": 0.2367, "step": 14651 }, { "epoch": 0.92, "grad_norm": 6.377390063577401, "learning_rate": 1.6094707665729736e-07, "loss": 0.2644, "step": 14652 }, { "epoch": 0.92, "grad_norm": 2.6673442596094272, "learning_rate": 1.6069084718449978e-07, "loss": 0.2721, "step": 14653 }, { "epoch": 0.92, "grad_norm": 1.7329225185422226, "learning_rate": 1.6043481850323505e-07, "loss": 0.2418, "step": 14654 }, { "epoch": 0.92, "grad_norm": 2.859340110203497, "learning_rate": 1.6017899062412578e-07, "loss": 0.2399, "step": 14655 }, { "epoch": 0.92, "grad_norm": 2.983389765371011, "learning_rate": 1.5992336355778572e-07, "loss": 0.2482, "step": 14656 }, { "epoch": 0.92, "grad_norm": 2.257335331812706, "learning_rate": 1.59667937314823e-07, "loss": 0.2784, "step": 14657 }, { "epoch": 0.92, "grad_norm": 1.519793464270759, "learning_rate": 1.5941271190583473e-07, "loss": 0.2484, "step": 14658 }, { "epoch": 0.92, "grad_norm": 1.7246468564172275, "learning_rate": 1.5915768734141135e-07, "loss": 0.2494, "step": 14659 }, { "epoch": 0.92, "grad_norm": 6.079357549788692, "learning_rate": 1.5890286363213326e-07, "loss": 0.2515, "step": 14660 }, { "epoch": 0.92, "grad_norm": 1.3967370792960778, "learning_rate": 1.5864824078857365e-07, "loss": 0.2315, "step": 14661 }, { "epoch": 0.92, "grad_norm": 1.742727083738558, "learning_rate": 1.5839381882129856e-07, "loss": 0.2474, "step": 14662 }, { "epoch": 0.92, "grad_norm": 1.7703861623718138, "learning_rate": 1.5813959774086396e-07, "loss": 0.2495, "step": 14663 }, { "epoch": 0.92, "grad_norm": 2.21980916951687, "learning_rate": 1.5788557755781642e-07, "loss": 0.2424, "step": 14664 }, { "epoch": 0.92, "grad_norm": 6.670680642865481, "learning_rate": 1.576317582826975e-07, "loss": 0.244, "step": 14665 }, { "epoch": 0.92, "grad_norm": 2.7967100125911086, "learning_rate": 1.5737813992603822e-07, "loss": 0.2396, "step": 14666 }, { "epoch": 0.92, "grad_norm": 1.4662996903675816, "learning_rate": 1.571247224983613e-07, "loss": 0.2282, "step": 14667 }, { "epoch": 0.92, "grad_norm": 2.131216940287285, "learning_rate": 1.5687150601018053e-07, "loss": 0.246, "step": 14668 }, { "epoch": 0.92, "grad_norm": 2.264780672903606, "learning_rate": 1.5661849047200361e-07, "loss": 0.2483, "step": 14669 }, { "epoch": 0.92, "grad_norm": 1.9630664181022202, "learning_rate": 1.5636567589432939e-07, "loss": 0.2606, "step": 14670 }, { "epoch": 0.92, "grad_norm": 1.7084230757094758, "learning_rate": 1.5611306228764612e-07, "loss": 0.277, "step": 14671 }, { "epoch": 0.92, "grad_norm": 1.6489836653289587, "learning_rate": 1.5586064966243487e-07, "loss": 0.2559, "step": 14672 }, { "epoch": 0.92, "grad_norm": 1.3012312620610775, "learning_rate": 1.5560843802917001e-07, "loss": 0.2304, "step": 14673 }, { "epoch": 0.92, "grad_norm": 1.5971566825111916, "learning_rate": 1.5535642739831602e-07, "loss": 0.2489, "step": 14674 }, { "epoch": 0.92, "grad_norm": 2.060596037769323, "learning_rate": 1.5510461778032836e-07, "loss": 0.2475, "step": 14675 }, { "epoch": 0.92, "grad_norm": 3.3823634922069425, "learning_rate": 1.5485300918565593e-07, "loss": 0.2478, "step": 14676 }, { "epoch": 0.92, "grad_norm": 2.1116629273618397, "learning_rate": 1.5460160162473757e-07, "loss": 0.2436, "step": 14677 }, { "epoch": 0.92, "grad_norm": 4.303742306352026, "learning_rate": 1.5435039510800555e-07, "loss": 0.2688, "step": 14678 }, { "epoch": 0.92, "grad_norm": 0.5724092937533349, "learning_rate": 1.5409938964588254e-07, "loss": 0.4582, "step": 14679 }, { "epoch": 0.92, "grad_norm": 1.3640143428741367, "learning_rate": 1.538485852487831e-07, "loss": 0.252, "step": 14680 }, { "epoch": 0.92, "grad_norm": 7.24412176119006, "learning_rate": 1.5359798192711383e-07, "loss": 0.2668, "step": 14681 }, { "epoch": 0.92, "grad_norm": 1.9384342811547017, "learning_rate": 1.5334757969127202e-07, "loss": 0.2374, "step": 14682 }, { "epoch": 0.92, "grad_norm": 1.9555963481625724, "learning_rate": 1.5309737855164763e-07, "loss": 0.2467, "step": 14683 }, { "epoch": 0.92, "grad_norm": 2.1158433271097215, "learning_rate": 1.5284737851862296e-07, "loss": 0.2479, "step": 14684 }, { "epoch": 0.92, "grad_norm": 2.0871617596354586, "learning_rate": 1.525975796025686e-07, "loss": 0.2537, "step": 14685 }, { "epoch": 0.92, "grad_norm": 1.6673814261868143, "learning_rate": 1.523479818138518e-07, "loss": 0.2444, "step": 14686 }, { "epoch": 0.92, "grad_norm": 1.9984525737896035, "learning_rate": 1.520985851628276e-07, "loss": 0.2594, "step": 14687 }, { "epoch": 0.92, "grad_norm": 5.968073224426047, "learning_rate": 1.518493896598433e-07, "loss": 0.2605, "step": 14688 }, { "epoch": 0.92, "grad_norm": 3.527886828538164, "learning_rate": 1.5160039531523952e-07, "loss": 0.2522, "step": 14689 }, { "epoch": 0.92, "grad_norm": 8.890379303935964, "learning_rate": 1.5135160213934685e-07, "loss": 0.2545, "step": 14690 }, { "epoch": 0.92, "grad_norm": 1.7934664789798513, "learning_rate": 1.5110301014248874e-07, "loss": 0.2629, "step": 14691 }, { "epoch": 0.92, "grad_norm": 2.08821812985751, "learning_rate": 1.508546193349797e-07, "loss": 0.2369, "step": 14692 }, { "epoch": 0.92, "grad_norm": 2.761791256746903, "learning_rate": 1.5060642972712425e-07, "loss": 0.2438, "step": 14693 }, { "epoch": 0.92, "grad_norm": 1.9194521860460347, "learning_rate": 1.503584413292225e-07, "loss": 0.2624, "step": 14694 }, { "epoch": 0.92, "grad_norm": 3.392746401289107, "learning_rate": 1.501106541515629e-07, "loss": 0.2517, "step": 14695 }, { "epoch": 0.92, "grad_norm": 1.9097213975952536, "learning_rate": 1.4986306820442609e-07, "loss": 0.2392, "step": 14696 }, { "epoch": 0.92, "grad_norm": 4.627763148595492, "learning_rate": 1.4961568349808607e-07, "loss": 0.2517, "step": 14697 }, { "epoch": 0.92, "grad_norm": 2.1211150282529068, "learning_rate": 1.493685000428058e-07, "loss": 0.258, "step": 14698 }, { "epoch": 0.92, "grad_norm": 1.3680378321681013, "learning_rate": 1.491215178488431e-07, "loss": 0.2396, "step": 14699 }, { "epoch": 0.92, "grad_norm": 3.6550561107235926, "learning_rate": 1.4887473692644428e-07, "loss": 0.2441, "step": 14700 }, { "epoch": 0.92, "grad_norm": 3.03823116777284, "learning_rate": 1.4862815728584888e-07, "loss": 0.254, "step": 14701 }, { "epoch": 0.92, "grad_norm": 10.08310411298709, "learning_rate": 1.4838177893728878e-07, "loss": 0.2368, "step": 14702 }, { "epoch": 0.92, "grad_norm": 3.7705972847750595, "learning_rate": 1.481356018909863e-07, "loss": 0.2491, "step": 14703 }, { "epoch": 0.92, "grad_norm": 5.236174680489363, "learning_rate": 1.4788962615715498e-07, "loss": 0.2484, "step": 14704 }, { "epoch": 0.92, "grad_norm": 2.7189632644735378, "learning_rate": 1.4764385174600104e-07, "loss": 0.2314, "step": 14705 }, { "epoch": 0.92, "grad_norm": 2.197762770919978, "learning_rate": 1.4739827866772304e-07, "loss": 0.2461, "step": 14706 }, { "epoch": 0.92, "grad_norm": 2.039026192120943, "learning_rate": 1.4715290693251006e-07, "loss": 0.2515, "step": 14707 }, { "epoch": 0.92, "grad_norm": 1.676748054690642, "learning_rate": 1.4690773655054225e-07, "loss": 0.2501, "step": 14708 }, { "epoch": 0.93, "grad_norm": 2.409012596567277, "learning_rate": 1.4666276753199204e-07, "loss": 0.2634, "step": 14709 }, { "epoch": 0.93, "grad_norm": 1.662885215623147, "learning_rate": 1.4641799988702466e-07, "loss": 0.2401, "step": 14710 }, { "epoch": 0.93, "grad_norm": 2.6202459054772875, "learning_rate": 1.4617343362579528e-07, "loss": 0.2667, "step": 14711 }, { "epoch": 0.93, "grad_norm": 1.967525754697255, "learning_rate": 1.4592906875845137e-07, "loss": 0.2438, "step": 14712 }, { "epoch": 0.93, "grad_norm": 1.8626807965066814, "learning_rate": 1.4568490529513203e-07, "loss": 0.2446, "step": 14713 }, { "epoch": 0.93, "grad_norm": 2.0173005214412774, "learning_rate": 1.454409432459686e-07, "loss": 0.2634, "step": 14714 }, { "epoch": 0.93, "grad_norm": 1.8575748264284897, "learning_rate": 1.4519718262108296e-07, "loss": 0.2685, "step": 14715 }, { "epoch": 0.93, "grad_norm": 3.4461683819404567, "learning_rate": 1.4495362343058872e-07, "loss": 0.2551, "step": 14716 }, { "epoch": 0.93, "grad_norm": 1.1198459156949172, "learning_rate": 1.4471026568459335e-07, "loss": 0.2503, "step": 14717 }, { "epoch": 0.93, "grad_norm": 2.660210048012057, "learning_rate": 1.4446710939319263e-07, "loss": 0.2433, "step": 14718 }, { "epoch": 0.93, "grad_norm": 2.711256064625923, "learning_rate": 1.4422415456647577e-07, "loss": 0.2506, "step": 14719 }, { "epoch": 0.93, "grad_norm": 3.548776361514621, "learning_rate": 1.4398140121452297e-07, "loss": 0.2609, "step": 14720 }, { "epoch": 0.93, "grad_norm": 2.3296713450433755, "learning_rate": 1.4373884934740733e-07, "loss": 0.2477, "step": 14721 }, { "epoch": 0.93, "grad_norm": 1.6352457523620478, "learning_rate": 1.43496498975193e-07, "loss": 0.2445, "step": 14722 }, { "epoch": 0.93, "grad_norm": 1.5220597702437866, "learning_rate": 1.432543501079353e-07, "loss": 0.2491, "step": 14723 }, { "epoch": 0.93, "grad_norm": 2.929715711269501, "learning_rate": 1.4301240275568006e-07, "loss": 0.2342, "step": 14724 }, { "epoch": 0.93, "grad_norm": 3.6164918458979822, "learning_rate": 1.4277065692846815e-07, "loss": 0.2332, "step": 14725 }, { "epoch": 0.93, "grad_norm": 1.6748028845409968, "learning_rate": 1.425291126363293e-07, "loss": 0.2513, "step": 14726 }, { "epoch": 0.93, "grad_norm": 2.2800775503316286, "learning_rate": 1.422877698892844e-07, "loss": 0.2614, "step": 14727 }, { "epoch": 0.93, "grad_norm": 1.630118151963712, "learning_rate": 1.420466286973493e-07, "loss": 0.228, "step": 14728 }, { "epoch": 0.93, "grad_norm": 1.7986034412082135, "learning_rate": 1.4180568907052717e-07, "loss": 0.2692, "step": 14729 }, { "epoch": 0.93, "grad_norm": 6.117392210899359, "learning_rate": 1.4156495101881662e-07, "loss": 0.2381, "step": 14730 }, { "epoch": 0.93, "grad_norm": 12.937335080838224, "learning_rate": 1.4132441455220637e-07, "loss": 0.2561, "step": 14731 }, { "epoch": 0.93, "grad_norm": 2.191753514025765, "learning_rate": 1.4108407968067506e-07, "loss": 0.2523, "step": 14732 }, { "epoch": 0.93, "grad_norm": 1.810721605975489, "learning_rate": 1.4084394641419641e-07, "loss": 0.2494, "step": 14733 }, { "epoch": 0.93, "grad_norm": 1.8148423028706926, "learning_rate": 1.4060401476273356e-07, "loss": 0.2674, "step": 14734 }, { "epoch": 0.93, "grad_norm": 7.572950294863925, "learning_rate": 1.403642847362402e-07, "loss": 0.2552, "step": 14735 }, { "epoch": 0.93, "grad_norm": 1.585125551063138, "learning_rate": 1.401247563446656e-07, "loss": 0.2495, "step": 14736 }, { "epoch": 0.93, "grad_norm": 3.390141227624101, "learning_rate": 1.3988542959794627e-07, "loss": 0.2582, "step": 14737 }, { "epoch": 0.93, "grad_norm": 2.6615242836036774, "learning_rate": 1.3964630450601314e-07, "loss": 0.2558, "step": 14738 }, { "epoch": 0.93, "grad_norm": 1.8693471718371781, "learning_rate": 1.3940738107878826e-07, "loss": 0.253, "step": 14739 }, { "epoch": 0.93, "grad_norm": 3.1627254114858387, "learning_rate": 1.3916865932618373e-07, "loss": 0.2313, "step": 14740 }, { "epoch": 0.93, "grad_norm": 2.4294468952066604, "learning_rate": 1.389301392581055e-07, "loss": 0.243, "step": 14741 }, { "epoch": 0.93, "grad_norm": 2.5798209152898646, "learning_rate": 1.386918208844501e-07, "loss": 0.2698, "step": 14742 }, { "epoch": 0.93, "grad_norm": 2.345631241114109, "learning_rate": 1.3845370421510573e-07, "loss": 0.2437, "step": 14743 }, { "epoch": 0.93, "grad_norm": 2.3598763642601126, "learning_rate": 1.3821578925995282e-07, "loss": 0.2586, "step": 14744 }, { "epoch": 0.93, "grad_norm": 1.4424637281151522, "learning_rate": 1.3797807602886128e-07, "loss": 0.2349, "step": 14745 }, { "epoch": 0.93, "grad_norm": 5.6177531521316855, "learning_rate": 1.3774056453169647e-07, "loss": 0.249, "step": 14746 }, { "epoch": 0.93, "grad_norm": 1.7221479334460554, "learning_rate": 1.3750325477831116e-07, "loss": 0.2579, "step": 14747 }, { "epoch": 0.93, "grad_norm": 2.1935476166031673, "learning_rate": 1.3726614677855243e-07, "loss": 0.2647, "step": 14748 }, { "epoch": 0.93, "grad_norm": 2.301822660137684, "learning_rate": 1.3702924054225908e-07, "loss": 0.2598, "step": 14749 }, { "epoch": 0.93, "grad_norm": 1.8087374879222473, "learning_rate": 1.367925360792599e-07, "loss": 0.2465, "step": 14750 }, { "epoch": 0.93, "grad_norm": 6.626035391338491, "learning_rate": 1.3655603339937652e-07, "loss": 0.2419, "step": 14751 }, { "epoch": 0.93, "grad_norm": 2.5735250489342802, "learning_rate": 1.3631973251242158e-07, "loss": 0.2521, "step": 14752 }, { "epoch": 0.93, "grad_norm": 2.5395385324028603, "learning_rate": 1.3608363342819952e-07, "loss": 0.2329, "step": 14753 }, { "epoch": 0.93, "grad_norm": 1.5198408950056004, "learning_rate": 1.3584773615650748e-07, "loss": 0.256, "step": 14754 }, { "epoch": 0.93, "grad_norm": 3.9608517463716955, "learning_rate": 1.3561204070713264e-07, "loss": 0.2576, "step": 14755 }, { "epoch": 0.93, "grad_norm": 2.0182165284492783, "learning_rate": 1.3537654708985326e-07, "loss": 0.2557, "step": 14756 }, { "epoch": 0.93, "grad_norm": 1.6962218711990547, "learning_rate": 1.351412553144421e-07, "loss": 0.2469, "step": 14757 }, { "epoch": 0.93, "grad_norm": 1.8662322458064182, "learning_rate": 1.3490616539066138e-07, "loss": 0.2358, "step": 14758 }, { "epoch": 0.93, "grad_norm": 2.1189782158984234, "learning_rate": 1.3467127732826545e-07, "loss": 0.2495, "step": 14759 }, { "epoch": 0.93, "grad_norm": 1.8511935868137062, "learning_rate": 1.3443659113699993e-07, "loss": 0.2464, "step": 14760 }, { "epoch": 0.93, "grad_norm": 4.088782987727659, "learning_rate": 1.3420210682660194e-07, "loss": 0.2713, "step": 14761 }, { "epoch": 0.93, "grad_norm": 4.7116420973206825, "learning_rate": 1.3396782440680157e-07, "loss": 0.2399, "step": 14762 }, { "epoch": 0.93, "grad_norm": 2.3910080932062456, "learning_rate": 1.337337438873193e-07, "loss": 0.2592, "step": 14763 }, { "epoch": 0.93, "grad_norm": 1.8280881242317046, "learning_rate": 1.3349986527786684e-07, "loss": 0.2558, "step": 14764 }, { "epoch": 0.93, "grad_norm": 4.283105581231077, "learning_rate": 1.332661885881492e-07, "loss": 0.2803, "step": 14765 }, { "epoch": 0.93, "grad_norm": 2.2888267676262974, "learning_rate": 1.3303271382786199e-07, "loss": 0.2386, "step": 14766 }, { "epoch": 0.93, "grad_norm": 1.3866444593266483, "learning_rate": 1.3279944100669183e-07, "loss": 0.2534, "step": 14767 }, { "epoch": 0.93, "grad_norm": 2.0197531724971145, "learning_rate": 1.3256637013431827e-07, "loss": 0.2561, "step": 14768 }, { "epoch": 0.93, "grad_norm": 3.3216086750641067, "learning_rate": 1.3233350122041133e-07, "loss": 0.2549, "step": 14769 }, { "epoch": 0.93, "grad_norm": 2.8146968874953524, "learning_rate": 1.3210083427463326e-07, "loss": 0.2606, "step": 14770 }, { "epoch": 0.93, "grad_norm": 1.782544295627537, "learning_rate": 1.318683693066386e-07, "loss": 0.2327, "step": 14771 }, { "epoch": 0.93, "grad_norm": 2.2115745025861973, "learning_rate": 1.3163610632607128e-07, "loss": 0.2511, "step": 14772 }, { "epoch": 0.93, "grad_norm": 4.529932750224458, "learning_rate": 1.3140404534256912e-07, "loss": 0.2469, "step": 14773 }, { "epoch": 0.93, "grad_norm": 5.592638550689956, "learning_rate": 1.3117218636576113e-07, "loss": 0.2387, "step": 14774 }, { "epoch": 0.93, "grad_norm": 2.4553856056557426, "learning_rate": 1.3094052940526735e-07, "loss": 0.2684, "step": 14775 }, { "epoch": 0.93, "grad_norm": 1.79622933342868, "learning_rate": 1.3070907447069903e-07, "loss": 0.2478, "step": 14776 }, { "epoch": 0.93, "grad_norm": 2.23924406557907, "learning_rate": 1.3047782157166013e-07, "loss": 0.274, "step": 14777 }, { "epoch": 0.93, "grad_norm": 4.051012920838647, "learning_rate": 1.3024677071774573e-07, "loss": 0.2337, "step": 14778 }, { "epoch": 0.93, "grad_norm": 2.4814995187614595, "learning_rate": 1.3001592191854261e-07, "loss": 0.2735, "step": 14779 }, { "epoch": 0.93, "grad_norm": 1.8391784390693475, "learning_rate": 1.2978527518362815e-07, "loss": 0.2427, "step": 14780 }, { "epoch": 0.93, "grad_norm": 0.613378542900168, "learning_rate": 1.2955483052257355e-07, "loss": 0.4245, "step": 14781 }, { "epoch": 0.93, "grad_norm": 2.30056942275997, "learning_rate": 1.2932458794494007e-07, "loss": 0.2374, "step": 14782 }, { "epoch": 0.93, "grad_norm": 1.7433319544034398, "learning_rate": 1.2909454746028062e-07, "loss": 0.2659, "step": 14783 }, { "epoch": 0.93, "grad_norm": 3.3767453602211006, "learning_rate": 1.288647090781403e-07, "loss": 0.2436, "step": 14784 }, { "epoch": 0.93, "grad_norm": 1.9230309022735703, "learning_rate": 1.2863507280805488e-07, "loss": 0.255, "step": 14785 }, { "epoch": 0.93, "grad_norm": 1.2803826526183115, "learning_rate": 1.2840563865955335e-07, "loss": 0.2485, "step": 14786 }, { "epoch": 0.93, "grad_norm": 2.3656651932396824, "learning_rate": 1.2817640664215425e-07, "loss": 0.2539, "step": 14787 }, { "epoch": 0.93, "grad_norm": 2.059815424855098, "learning_rate": 1.2794737676536993e-07, "loss": 0.2443, "step": 14788 }, { "epoch": 0.93, "grad_norm": 2.9648153798521695, "learning_rate": 1.277185490387023e-07, "loss": 0.2516, "step": 14789 }, { "epoch": 0.93, "grad_norm": 2.101183941336599, "learning_rate": 1.2748992347164647e-07, "loss": 0.2503, "step": 14790 }, { "epoch": 0.93, "grad_norm": 1.9660790907228658, "learning_rate": 1.272615000736882e-07, "loss": 0.2535, "step": 14791 }, { "epoch": 0.93, "grad_norm": 1.3704224188798575, "learning_rate": 1.270332788543055e-07, "loss": 0.2598, "step": 14792 }, { "epoch": 0.93, "grad_norm": 1.4233464489865375, "learning_rate": 1.2680525982296688e-07, "loss": 0.2513, "step": 14793 }, { "epoch": 0.93, "grad_norm": 1.5876366069095593, "learning_rate": 1.265774429891342e-07, "loss": 0.2374, "step": 14794 }, { "epoch": 0.93, "grad_norm": 3.1348939968086453, "learning_rate": 1.2634982836225884e-07, "loss": 0.244, "step": 14795 }, { "epoch": 0.93, "grad_norm": 2.1557460702912743, "learning_rate": 1.2612241595178654e-07, "loss": 0.2486, "step": 14796 }, { "epoch": 0.93, "grad_norm": 2.403929852882607, "learning_rate": 1.2589520576715142e-07, "loss": 0.2632, "step": 14797 }, { "epoch": 0.93, "grad_norm": 3.6775150488385586, "learning_rate": 1.256681978177826e-07, "loss": 0.2869, "step": 14798 }, { "epoch": 0.93, "grad_norm": 2.891385801479705, "learning_rate": 1.2544139211309757e-07, "loss": 0.2361, "step": 14799 }, { "epoch": 0.93, "grad_norm": 2.3701499166962425, "learning_rate": 1.2521478866250714e-07, "loss": 0.2386, "step": 14800 }, { "epoch": 0.93, "grad_norm": 1.7063263012705459, "learning_rate": 1.2498838747541374e-07, "loss": 0.2405, "step": 14801 }, { "epoch": 0.93, "grad_norm": 1.5580683549274503, "learning_rate": 1.2476218856121103e-07, "loss": 0.238, "step": 14802 }, { "epoch": 0.93, "grad_norm": 2.144137443763496, "learning_rate": 1.245361919292848e-07, "loss": 0.2485, "step": 14803 }, { "epoch": 0.93, "grad_norm": 5.354295341876992, "learning_rate": 1.2431039758901198e-07, "loss": 0.239, "step": 14804 }, { "epoch": 0.93, "grad_norm": 1.679861511521898, "learning_rate": 1.2408480554976067e-07, "loss": 0.2415, "step": 14805 }, { "epoch": 0.93, "grad_norm": 2.945784493613841, "learning_rate": 1.2385941582089168e-07, "loss": 0.2479, "step": 14806 }, { "epoch": 0.93, "grad_norm": 4.563666531930614, "learning_rate": 1.2363422841175644e-07, "loss": 0.2722, "step": 14807 }, { "epoch": 0.93, "grad_norm": 1.9128993895990256, "learning_rate": 1.2340924333169802e-07, "loss": 0.244, "step": 14808 }, { "epoch": 0.93, "grad_norm": 2.144993451579595, "learning_rate": 1.2318446059005173e-07, "loss": 0.2555, "step": 14809 }, { "epoch": 0.93, "grad_norm": 2.279521249213373, "learning_rate": 1.2295988019614514e-07, "loss": 0.2424, "step": 14810 }, { "epoch": 0.93, "grad_norm": 2.2654319812179002, "learning_rate": 1.227355021592952e-07, "loss": 0.242, "step": 14811 }, { "epoch": 0.93, "grad_norm": 5.042725802533616, "learning_rate": 1.225113264888128e-07, "loss": 0.2491, "step": 14812 }, { "epoch": 0.93, "grad_norm": 1.8868224572855978, "learning_rate": 1.222873531939983e-07, "loss": 0.2765, "step": 14813 }, { "epoch": 0.93, "grad_norm": 3.576975161629902, "learning_rate": 1.2206358228414593e-07, "loss": 0.2748, "step": 14814 }, { "epoch": 0.93, "grad_norm": 2.581179031064587, "learning_rate": 1.218400137685394e-07, "loss": 0.2444, "step": 14815 }, { "epoch": 0.93, "grad_norm": 2.887526890406968, "learning_rate": 1.2161664765645454e-07, "loss": 0.2552, "step": 14816 }, { "epoch": 0.93, "grad_norm": 1.500305220643014, "learning_rate": 1.2139348395716066e-07, "loss": 0.2606, "step": 14817 }, { "epoch": 0.93, "grad_norm": 1.79615867270646, "learning_rate": 1.2117052267991648e-07, "loss": 0.2489, "step": 14818 }, { "epoch": 0.93, "grad_norm": 0.6042147400663841, "learning_rate": 1.2094776383397344e-07, "loss": 0.4384, "step": 14819 }, { "epoch": 0.93, "grad_norm": 1.5319324540550467, "learning_rate": 1.2072520742857363e-07, "loss": 0.2571, "step": 14820 }, { "epoch": 0.93, "grad_norm": 1.9823282127894013, "learning_rate": 1.2050285347295077e-07, "loss": 0.2385, "step": 14821 }, { "epoch": 0.93, "grad_norm": 2.0395177496762456, "learning_rate": 1.2028070197633246e-07, "loss": 0.2582, "step": 14822 }, { "epoch": 0.93, "grad_norm": 2.045153436724005, "learning_rate": 1.2005875294793522e-07, "loss": 0.2829, "step": 14823 }, { "epoch": 0.93, "grad_norm": 2.798874262521781, "learning_rate": 1.1983700639696727e-07, "loss": 0.243, "step": 14824 }, { "epoch": 0.93, "grad_norm": 1.5626622951190008, "learning_rate": 1.1961546233263011e-07, "loss": 0.2233, "step": 14825 }, { "epoch": 0.93, "grad_norm": 6.677377284479592, "learning_rate": 1.193941207641164e-07, "loss": 0.2554, "step": 14826 }, { "epoch": 0.93, "grad_norm": 1.9631409156833521, "learning_rate": 1.1917298170060987e-07, "loss": 0.273, "step": 14827 }, { "epoch": 0.93, "grad_norm": 21.48583609118683, "learning_rate": 1.1895204515128544e-07, "loss": 0.2649, "step": 14828 }, { "epoch": 0.93, "grad_norm": 3.277225363412385, "learning_rate": 1.1873131112530967e-07, "loss": 0.2658, "step": 14829 }, { "epoch": 0.93, "grad_norm": 2.1121952784711446, "learning_rate": 1.1851077963184243e-07, "loss": 0.2496, "step": 14830 }, { "epoch": 0.93, "grad_norm": 1.5936972522637975, "learning_rate": 1.1829045068003364e-07, "loss": 0.2751, "step": 14831 }, { "epoch": 0.93, "grad_norm": 2.806239788123379, "learning_rate": 1.1807032427902488e-07, "loss": 0.2448, "step": 14832 }, { "epoch": 0.93, "grad_norm": 1.668576839612423, "learning_rate": 1.1785040043794882e-07, "loss": 0.2567, "step": 14833 }, { "epoch": 0.93, "grad_norm": 1.5403410715242014, "learning_rate": 1.1763067916593263e-07, "loss": 0.2494, "step": 14834 }, { "epoch": 0.93, "grad_norm": 2.1104326000099825, "learning_rate": 1.1741116047209067e-07, "loss": 0.2458, "step": 14835 }, { "epoch": 0.93, "grad_norm": 2.390632507976463, "learning_rate": 1.1719184436553288e-07, "loss": 0.2611, "step": 14836 }, { "epoch": 0.93, "grad_norm": 2.223425231824431, "learning_rate": 1.1697273085535755e-07, "loss": 0.2453, "step": 14837 }, { "epoch": 0.93, "grad_norm": 1.835627269380618, "learning_rate": 1.1675381995065738e-07, "loss": 0.2377, "step": 14838 }, { "epoch": 0.93, "grad_norm": 5.252125941938038, "learning_rate": 1.1653511166051457e-07, "loss": 0.2505, "step": 14839 }, { "epoch": 0.93, "grad_norm": 1.7293188119700607, "learning_rate": 1.1631660599400407e-07, "loss": 0.2301, "step": 14840 }, { "epoch": 0.93, "grad_norm": 1.947457346600411, "learning_rate": 1.1609830296019142e-07, "loss": 0.2451, "step": 14841 }, { "epoch": 0.93, "grad_norm": 4.380286439795994, "learning_rate": 1.1588020256813604e-07, "loss": 0.2607, "step": 14842 }, { "epoch": 0.93, "grad_norm": 1.6688985718457132, "learning_rate": 1.1566230482688567e-07, "loss": 0.2476, "step": 14843 }, { "epoch": 0.93, "grad_norm": 1.491500712169069, "learning_rate": 1.1544460974548199e-07, "loss": 0.2514, "step": 14844 }, { "epoch": 0.93, "grad_norm": 1.8003044903232, "learning_rate": 1.1522711733295722e-07, "loss": 0.252, "step": 14845 }, { "epoch": 0.93, "grad_norm": 1.9114935174019365, "learning_rate": 1.1500982759833579e-07, "loss": 0.2585, "step": 14846 }, { "epoch": 0.93, "grad_norm": 2.4443451893432773, "learning_rate": 1.1479274055063327e-07, "loss": 0.2583, "step": 14847 }, { "epoch": 0.93, "grad_norm": 1.937288000014147, "learning_rate": 1.1457585619885692e-07, "loss": 0.2452, "step": 14848 }, { "epoch": 0.93, "grad_norm": 1.854551755026898, "learning_rate": 1.1435917455200562e-07, "loss": 0.2517, "step": 14849 }, { "epoch": 0.93, "grad_norm": 1.9920062840874568, "learning_rate": 1.1414269561907054e-07, "loss": 0.2611, "step": 14850 }, { "epoch": 0.93, "grad_norm": 3.664366102243156, "learning_rate": 1.1392641940903337e-07, "loss": 0.2729, "step": 14851 }, { "epoch": 0.93, "grad_norm": 2.3581522728478457, "learning_rate": 1.1371034593086749e-07, "loss": 0.2433, "step": 14852 }, { "epoch": 0.93, "grad_norm": 2.339598302772925, "learning_rate": 1.1349447519353796e-07, "loss": 0.2434, "step": 14853 }, { "epoch": 0.93, "grad_norm": 2.3366893826110973, "learning_rate": 1.1327880720600205e-07, "loss": 0.2384, "step": 14854 }, { "epoch": 0.93, "grad_norm": 0.6284451781576239, "learning_rate": 1.1306334197720925e-07, "loss": 0.4828, "step": 14855 }, { "epoch": 0.93, "grad_norm": 1.561425902150267, "learning_rate": 1.12848079516098e-07, "loss": 0.2665, "step": 14856 }, { "epoch": 0.93, "grad_norm": 4.023058133647233, "learning_rate": 1.1263301983160002e-07, "loss": 0.2497, "step": 14857 }, { "epoch": 0.93, "grad_norm": 1.916481387111643, "learning_rate": 1.124181629326393e-07, "loss": 0.2697, "step": 14858 }, { "epoch": 0.93, "grad_norm": 2.4385159119139947, "learning_rate": 1.1220350882813091e-07, "loss": 0.2673, "step": 14859 }, { "epoch": 0.93, "grad_norm": 8.651392137228159, "learning_rate": 1.1198905752697997e-07, "loss": 0.2486, "step": 14860 }, { "epoch": 0.93, "grad_norm": 2.9391530254969616, "learning_rate": 1.117748090380849e-07, "loss": 0.2568, "step": 14861 }, { "epoch": 0.93, "grad_norm": 1.8450702886107342, "learning_rate": 1.1156076337033583e-07, "loss": 0.2385, "step": 14862 }, { "epoch": 0.93, "grad_norm": 1.6987128208037827, "learning_rate": 1.1134692053261343e-07, "loss": 0.2416, "step": 14863 }, { "epoch": 0.93, "grad_norm": 1.674250944224833, "learning_rate": 1.1113328053379113e-07, "loss": 0.2634, "step": 14864 }, { "epoch": 0.93, "grad_norm": 2.0697943603454942, "learning_rate": 1.1091984338273188e-07, "loss": 0.2233, "step": 14865 }, { "epoch": 0.93, "grad_norm": 1.8118060020421574, "learning_rate": 1.1070660908829245e-07, "loss": 0.2265, "step": 14866 }, { "epoch": 0.93, "grad_norm": 1.651358541009252, "learning_rate": 1.1049357765932078e-07, "loss": 0.2519, "step": 14867 }, { "epoch": 0.94, "grad_norm": 1.8171109253631073, "learning_rate": 1.1028074910465481e-07, "loss": 0.2572, "step": 14868 }, { "epoch": 0.94, "grad_norm": 3.3117779403954093, "learning_rate": 1.1006812343312467e-07, "loss": 0.2469, "step": 14869 }, { "epoch": 0.94, "grad_norm": 0.5892931590116841, "learning_rate": 1.09855700653555e-07, "loss": 0.4931, "step": 14870 }, { "epoch": 0.94, "grad_norm": 2.5588480046248643, "learning_rate": 1.0964348077475817e-07, "loss": 0.2355, "step": 14871 }, { "epoch": 0.94, "grad_norm": 2.4196301254817536, "learning_rate": 1.0943146380553938e-07, "loss": 0.257, "step": 14872 }, { "epoch": 0.94, "grad_norm": 2.6697389354697143, "learning_rate": 1.092196497546949e-07, "loss": 0.2584, "step": 14873 }, { "epoch": 0.94, "grad_norm": 1.2976118335943434, "learning_rate": 1.0900803863101494e-07, "loss": 0.2447, "step": 14874 }, { "epoch": 0.94, "grad_norm": 0.5761961831633281, "learning_rate": 1.0879663044327915e-07, "loss": 0.4653, "step": 14875 }, { "epoch": 0.94, "grad_norm": 1.8734145186457203, "learning_rate": 1.0858542520025828e-07, "loss": 0.2423, "step": 14876 }, { "epoch": 0.94, "grad_norm": 2.153025758468859, "learning_rate": 1.0837442291071587e-07, "loss": 0.2702, "step": 14877 }, { "epoch": 0.94, "grad_norm": 2.889608873689613, "learning_rate": 1.0816362358340826e-07, "loss": 0.2807, "step": 14878 }, { "epoch": 0.94, "grad_norm": 1.5377182230887265, "learning_rate": 1.0795302722708012e-07, "loss": 0.2508, "step": 14879 }, { "epoch": 0.94, "grad_norm": 2.1302354393816434, "learning_rate": 1.0774263385047057e-07, "loss": 0.2591, "step": 14880 }, { "epoch": 0.94, "grad_norm": 2.1321161995180526, "learning_rate": 1.0753244346230818e-07, "loss": 0.2572, "step": 14881 }, { "epoch": 0.94, "grad_norm": 2.977443119313658, "learning_rate": 1.0732245607131542e-07, "loss": 0.264, "step": 14882 }, { "epoch": 0.94, "grad_norm": 2.755625879920134, "learning_rate": 1.071126716862042e-07, "loss": 0.2435, "step": 14883 }, { "epoch": 0.94, "grad_norm": 2.2010017414711407, "learning_rate": 1.0690309031567814e-07, "loss": 0.2478, "step": 14884 }, { "epoch": 0.94, "grad_norm": 2.1106095110096064, "learning_rate": 1.066937119684347e-07, "loss": 0.2456, "step": 14885 }, { "epoch": 0.94, "grad_norm": 2.1943699278626636, "learning_rate": 1.0648453665316028e-07, "loss": 0.2483, "step": 14886 }, { "epoch": 0.94, "grad_norm": 1.5251210715085795, "learning_rate": 1.0627556437853459e-07, "loss": 0.2629, "step": 14887 }, { "epoch": 0.94, "grad_norm": 1.7813309320362822, "learning_rate": 1.0606679515322849e-07, "loss": 0.2611, "step": 14888 }, { "epoch": 0.94, "grad_norm": 1.591465764685345, "learning_rate": 1.0585822898590281e-07, "loss": 0.2479, "step": 14889 }, { "epoch": 0.94, "grad_norm": 1.6430687983850112, "learning_rate": 1.056498658852123e-07, "loss": 0.2616, "step": 14890 }, { "epoch": 0.94, "grad_norm": 1.6878018452247103, "learning_rate": 1.0544170585980229e-07, "loss": 0.2474, "step": 14891 }, { "epoch": 0.94, "grad_norm": 2.3871371505976424, "learning_rate": 1.0523374891830972e-07, "loss": 0.237, "step": 14892 }, { "epoch": 0.94, "grad_norm": 5.598496951092441, "learning_rate": 1.0502599506936273e-07, "loss": 0.2536, "step": 14893 }, { "epoch": 0.94, "grad_norm": 1.4978143617102027, "learning_rate": 1.0481844432158162e-07, "loss": 0.2616, "step": 14894 }, { "epoch": 0.94, "grad_norm": 1.9994766603064726, "learning_rate": 1.0461109668357894e-07, "loss": 0.2497, "step": 14895 }, { "epoch": 0.94, "grad_norm": 1.6608440324327385, "learning_rate": 1.0440395216395616e-07, "loss": 0.2401, "step": 14896 }, { "epoch": 0.94, "grad_norm": 3.151947723333777, "learning_rate": 1.0419701077130917e-07, "loss": 0.2757, "step": 14897 }, { "epoch": 0.94, "grad_norm": 2.112650879500353, "learning_rate": 1.0399027251422444e-07, "loss": 0.2534, "step": 14898 }, { "epoch": 0.94, "grad_norm": 2.759699635665989, "learning_rate": 1.0378373740127901e-07, "loss": 0.2596, "step": 14899 }, { "epoch": 0.94, "grad_norm": 2.6249261058165243, "learning_rate": 1.0357740544104323e-07, "loss": 0.2532, "step": 14900 }, { "epoch": 0.94, "grad_norm": 2.1306714110980276, "learning_rate": 1.0337127664207803e-07, "loss": 0.2454, "step": 14901 }, { "epoch": 0.94, "grad_norm": 2.653563634502309, "learning_rate": 1.0316535101293601e-07, "loss": 0.2685, "step": 14902 }, { "epoch": 0.94, "grad_norm": 2.025075574024474, "learning_rate": 1.0295962856216146e-07, "loss": 0.2491, "step": 14903 }, { "epoch": 0.94, "grad_norm": 1.9884337748471876, "learning_rate": 1.0275410929828977e-07, "loss": 0.2661, "step": 14904 }, { "epoch": 0.94, "grad_norm": 2.6602176710280725, "learning_rate": 1.0254879322984856e-07, "loss": 0.2496, "step": 14905 }, { "epoch": 0.94, "grad_norm": 1.579321150659445, "learning_rate": 1.0234368036535658e-07, "loss": 0.2698, "step": 14906 }, { "epoch": 0.94, "grad_norm": 3.0472771201422084, "learning_rate": 1.0213877071332478e-07, "loss": 0.242, "step": 14907 }, { "epoch": 0.94, "grad_norm": 2.2621855761825884, "learning_rate": 1.0193406428225528e-07, "loss": 0.266, "step": 14908 }, { "epoch": 0.94, "grad_norm": 1.860810566412694, "learning_rate": 1.017295610806407e-07, "loss": 0.2344, "step": 14909 }, { "epoch": 0.94, "grad_norm": 4.795315979060604, "learning_rate": 1.0152526111696759e-07, "loss": 0.2565, "step": 14910 }, { "epoch": 0.94, "grad_norm": 2.634323388444125, "learning_rate": 1.0132116439971196e-07, "loss": 0.2555, "step": 14911 }, { "epoch": 0.94, "grad_norm": 0.5809638260400033, "learning_rate": 1.0111727093734203e-07, "loss": 0.4658, "step": 14912 }, { "epoch": 0.94, "grad_norm": 2.379199203821618, "learning_rate": 1.009135807383177e-07, "loss": 0.2499, "step": 14913 }, { "epoch": 0.94, "grad_norm": 2.57060843423349, "learning_rate": 1.0071009381109053e-07, "loss": 0.2687, "step": 14914 }, { "epoch": 0.94, "grad_norm": 1.6136274412195344, "learning_rate": 1.0050681016410435e-07, "loss": 0.2417, "step": 14915 }, { "epoch": 0.94, "grad_norm": 2.465798957401808, "learning_rate": 1.0030372980579294e-07, "loss": 0.2354, "step": 14916 }, { "epoch": 0.94, "grad_norm": 2.478186727032362, "learning_rate": 1.0010085274458236e-07, "loss": 0.2527, "step": 14917 }, { "epoch": 0.94, "grad_norm": 3.3420232502846567, "learning_rate": 9.989817898889087e-08, "loss": 0.2502, "step": 14918 }, { "epoch": 0.94, "grad_norm": 0.5862422368537016, "learning_rate": 9.969570854712785e-08, "loss": 0.463, "step": 14919 }, { "epoch": 0.94, "grad_norm": 4.186868427191014, "learning_rate": 9.949344142769323e-08, "loss": 0.2473, "step": 14920 }, { "epoch": 0.94, "grad_norm": 3.4484885809275756, "learning_rate": 9.929137763897923e-08, "loss": 0.2739, "step": 14921 }, { "epoch": 0.94, "grad_norm": 2.5240728567295854, "learning_rate": 9.908951718937187e-08, "loss": 0.2563, "step": 14922 }, { "epoch": 0.94, "grad_norm": 3.4450657920114627, "learning_rate": 9.888786008724504e-08, "loss": 0.2589, "step": 14923 }, { "epoch": 0.94, "grad_norm": 6.12220374274891, "learning_rate": 9.868640634096649e-08, "loss": 0.2486, "step": 14924 }, { "epoch": 0.94, "grad_norm": 3.9133550174936365, "learning_rate": 9.848515595889452e-08, "loss": 0.2531, "step": 14925 }, { "epoch": 0.94, "grad_norm": 1.6291496748320533, "learning_rate": 9.828410894937912e-08, "loss": 0.2408, "step": 14926 }, { "epoch": 0.94, "grad_norm": 2.610575991622655, "learning_rate": 9.808326532076307e-08, "loss": 0.2346, "step": 14927 }, { "epoch": 0.94, "grad_norm": 3.1243219912170472, "learning_rate": 9.788262508137913e-08, "loss": 0.2532, "step": 14928 }, { "epoch": 0.94, "grad_norm": 2.0825214835054133, "learning_rate": 9.768218823955122e-08, "loss": 0.2421, "step": 14929 }, { "epoch": 0.94, "grad_norm": 2.534624003973788, "learning_rate": 9.748195480359768e-08, "loss": 0.2403, "step": 14930 }, { "epoch": 0.94, "grad_norm": 3.8788918273845305, "learning_rate": 9.728192478182574e-08, "loss": 0.2333, "step": 14931 }, { "epoch": 0.94, "grad_norm": 2.2294369483115686, "learning_rate": 9.708209818253489e-08, "loss": 0.2545, "step": 14932 }, { "epoch": 0.94, "grad_norm": 1.6569128406506433, "learning_rate": 9.68824750140157e-08, "loss": 0.2584, "step": 14933 }, { "epoch": 0.94, "grad_norm": 5.83855571000696, "learning_rate": 9.668305528455212e-08, "loss": 0.2572, "step": 14934 }, { "epoch": 0.94, "grad_norm": 1.9125795231859963, "learning_rate": 9.648383900241808e-08, "loss": 0.2447, "step": 14935 }, { "epoch": 0.94, "grad_norm": 2.365058972026654, "learning_rate": 9.628482617587864e-08, "loss": 0.2636, "step": 14936 }, { "epoch": 0.94, "grad_norm": 21.5959645402046, "learning_rate": 9.608601681319163e-08, "loss": 0.2661, "step": 14937 }, { "epoch": 0.94, "grad_norm": 2.2741830157771217, "learning_rate": 9.588741092260656e-08, "loss": 0.2293, "step": 14938 }, { "epoch": 0.94, "grad_norm": 1.5611052605208338, "learning_rate": 9.568900851236351e-08, "loss": 0.2521, "step": 14939 }, { "epoch": 0.94, "grad_norm": 1.9518424864515562, "learning_rate": 9.549080959069423e-08, "loss": 0.2522, "step": 14940 }, { "epoch": 0.94, "grad_norm": 2.5103558130100954, "learning_rate": 9.52928141658227e-08, "loss": 0.2702, "step": 14941 }, { "epoch": 0.94, "grad_norm": 3.3833230777335555, "learning_rate": 9.509502224596401e-08, "loss": 0.2482, "step": 14942 }, { "epoch": 0.94, "grad_norm": 3.6141775631828845, "learning_rate": 9.489743383932548e-08, "loss": 0.2636, "step": 14943 }, { "epoch": 0.94, "grad_norm": 3.225754419827806, "learning_rate": 9.470004895410445e-08, "loss": 0.2495, "step": 14944 }, { "epoch": 0.94, "grad_norm": 2.493882899914474, "learning_rate": 9.450286759849048e-08, "loss": 0.2477, "step": 14945 }, { "epoch": 0.94, "grad_norm": 3.010916841228309, "learning_rate": 9.430588978066647e-08, "loss": 0.2546, "step": 14946 }, { "epoch": 0.94, "grad_norm": 1.8776254788072027, "learning_rate": 9.410911550880474e-08, "loss": 0.2467, "step": 14947 }, { "epoch": 0.94, "grad_norm": 2.6419471230118083, "learning_rate": 9.391254479106993e-08, "loss": 0.2477, "step": 14948 }, { "epoch": 0.94, "grad_norm": 2.398983459069012, "learning_rate": 9.371617763561658e-08, "loss": 0.2386, "step": 14949 }, { "epoch": 0.94, "grad_norm": 1.8036445531365404, "learning_rate": 9.352001405059486e-08, "loss": 0.2445, "step": 14950 }, { "epoch": 0.94, "grad_norm": 2.566218181636836, "learning_rate": 9.332405404414158e-08, "loss": 0.2581, "step": 14951 }, { "epoch": 0.94, "grad_norm": 1.9469322935504314, "learning_rate": 9.312829762438914e-08, "loss": 0.2408, "step": 14952 }, { "epoch": 0.94, "grad_norm": 3.2739847687461685, "learning_rate": 9.29327447994588e-08, "loss": 0.2665, "step": 14953 }, { "epoch": 0.94, "grad_norm": 1.6358838442443655, "learning_rate": 9.27373955774652e-08, "loss": 0.2404, "step": 14954 }, { "epoch": 0.94, "grad_norm": 2.082394203426563, "learning_rate": 9.254224996651351e-08, "loss": 0.2456, "step": 14955 }, { "epoch": 0.94, "grad_norm": 1.9432220621634133, "learning_rate": 9.23473079747006e-08, "loss": 0.2391, "step": 14956 }, { "epoch": 0.94, "grad_norm": 1.6062936362069737, "learning_rate": 9.215256961011443e-08, "loss": 0.2485, "step": 14957 }, { "epoch": 0.94, "grad_norm": 1.917760650477404, "learning_rate": 9.195803488083521e-08, "loss": 0.248, "step": 14958 }, { "epoch": 0.94, "grad_norm": 1.693826435999172, "learning_rate": 9.176370379493482e-08, "loss": 0.2592, "step": 14959 }, { "epoch": 0.94, "grad_norm": 2.468578559414902, "learning_rate": 9.15695763604768e-08, "loss": 0.2753, "step": 14960 }, { "epoch": 0.94, "grad_norm": 1.576035149210496, "learning_rate": 9.137565258551529e-08, "loss": 0.247, "step": 14961 }, { "epoch": 0.94, "grad_norm": 2.4686507036846277, "learning_rate": 9.11819324780966e-08, "loss": 0.2434, "step": 14962 }, { "epoch": 0.94, "grad_norm": 6.0674562135055385, "learning_rate": 9.098841604625874e-08, "loss": 0.2457, "step": 14963 }, { "epoch": 0.94, "grad_norm": 2.1382600137892642, "learning_rate": 9.079510329803087e-08, "loss": 0.2476, "step": 14964 }, { "epoch": 0.94, "grad_norm": 1.5833971663826814, "learning_rate": 9.060199424143378e-08, "loss": 0.27, "step": 14965 }, { "epoch": 0.94, "grad_norm": 1.9141649054444287, "learning_rate": 9.040908888447941e-08, "loss": 0.2503, "step": 14966 }, { "epoch": 0.94, "grad_norm": 4.8754388846088315, "learning_rate": 9.021638723517357e-08, "loss": 0.2495, "step": 14967 }, { "epoch": 0.94, "grad_norm": 1.5964389953001117, "learning_rate": 9.002388930150984e-08, "loss": 0.2387, "step": 14968 }, { "epoch": 0.94, "grad_norm": 1.7661147240402528, "learning_rate": 8.983159509147577e-08, "loss": 0.2673, "step": 14969 }, { "epoch": 0.94, "grad_norm": 1.7161758825257591, "learning_rate": 8.963950461305104e-08, "loss": 0.2499, "step": 14970 }, { "epoch": 0.94, "grad_norm": 3.12529174164548, "learning_rate": 8.944761787420486e-08, "loss": 0.253, "step": 14971 }, { "epoch": 0.94, "grad_norm": 1.4962954036350211, "learning_rate": 8.925593488289918e-08, "loss": 0.2443, "step": 14972 }, { "epoch": 0.94, "grad_norm": 2.9436273201767413, "learning_rate": 8.906445564708655e-08, "loss": 0.2393, "step": 14973 }, { "epoch": 0.94, "grad_norm": 1.7900452883307112, "learning_rate": 8.88731801747128e-08, "loss": 0.2557, "step": 14974 }, { "epoch": 0.94, "grad_norm": 1.5962253752787288, "learning_rate": 8.868210847371384e-08, "loss": 0.2517, "step": 14975 }, { "epoch": 0.94, "grad_norm": 1.5808888653862876, "learning_rate": 8.84912405520183e-08, "loss": 0.2523, "step": 14976 }, { "epoch": 0.94, "grad_norm": 3.7288831022952715, "learning_rate": 8.83005764175443e-08, "loss": 0.2666, "step": 14977 }, { "epoch": 0.94, "grad_norm": 2.3162567983125717, "learning_rate": 8.811011607820386e-08, "loss": 0.247, "step": 14978 }, { "epoch": 0.94, "grad_norm": 2.445641448881334, "learning_rate": 8.791985954189952e-08, "loss": 0.2445, "step": 14979 }, { "epoch": 0.94, "grad_norm": 3.1815497088289924, "learning_rate": 8.772980681652444e-08, "loss": 0.2432, "step": 14980 }, { "epoch": 0.94, "grad_norm": 8.421640283671424, "learning_rate": 8.753995790996505e-08, "loss": 0.2469, "step": 14981 }, { "epoch": 0.94, "grad_norm": 2.9510725059112817, "learning_rate": 8.735031283009842e-08, "loss": 0.2497, "step": 14982 }, { "epoch": 0.94, "grad_norm": 2.912487167504048, "learning_rate": 8.716087158479324e-08, "loss": 0.264, "step": 14983 }, { "epoch": 0.94, "grad_norm": 10.834676258213733, "learning_rate": 8.697163418190935e-08, "loss": 0.2354, "step": 14984 }, { "epoch": 0.94, "grad_norm": 1.7326168534625863, "learning_rate": 8.678260062929877e-08, "loss": 0.243, "step": 14985 }, { "epoch": 0.94, "grad_norm": 2.2672894190913695, "learning_rate": 8.659377093480581e-08, "loss": 0.2508, "step": 14986 }, { "epoch": 0.94, "grad_norm": 5.359989194874794, "learning_rate": 8.640514510626363e-08, "loss": 0.2445, "step": 14987 }, { "epoch": 0.94, "grad_norm": 1.5369905580351089, "learning_rate": 8.621672315149987e-08, "loss": 0.2529, "step": 14988 }, { "epoch": 0.94, "grad_norm": 2.060738960519203, "learning_rate": 8.602850507833161e-08, "loss": 0.2556, "step": 14989 }, { "epoch": 0.94, "grad_norm": 2.2529995206791567, "learning_rate": 8.584049089456981e-08, "loss": 0.2445, "step": 14990 }, { "epoch": 0.94, "grad_norm": 1.6858711108570388, "learning_rate": 8.56526806080138e-08, "loss": 0.2659, "step": 14991 }, { "epoch": 0.94, "grad_norm": 2.2813124187359204, "learning_rate": 8.546507422645734e-08, "loss": 0.265, "step": 14992 }, { "epoch": 0.94, "grad_norm": 1.848471024723462, "learning_rate": 8.52776717576842e-08, "loss": 0.264, "step": 14993 }, { "epoch": 0.94, "grad_norm": 3.4011796314883185, "learning_rate": 8.509047320946984e-08, "loss": 0.25, "step": 14994 }, { "epoch": 0.94, "grad_norm": 2.0682365920212433, "learning_rate": 8.490347858958192e-08, "loss": 0.265, "step": 14995 }, { "epoch": 0.94, "grad_norm": 1.7556126978556454, "learning_rate": 8.471668790577814e-08, "loss": 0.23, "step": 14996 }, { "epoch": 0.94, "grad_norm": 2.0838923037676325, "learning_rate": 8.45301011658095e-08, "loss": 0.2506, "step": 14997 }, { "epoch": 0.94, "grad_norm": 2.085841390214276, "learning_rate": 8.434371837741817e-08, "loss": 0.2425, "step": 14998 }, { "epoch": 0.94, "grad_norm": 1.8883906705242588, "learning_rate": 8.415753954833738e-08, "loss": 0.2346, "step": 14999 }, { "epoch": 0.94, "grad_norm": 4.727605274229623, "learning_rate": 8.397156468629209e-08, "loss": 0.2544, "step": 15000 }, { "epoch": 0.94, "grad_norm": 1.83609080121886, "learning_rate": 8.378579379899776e-08, "loss": 0.2489, "step": 15001 }, { "epoch": 0.94, "grad_norm": 3.3191386930002, "learning_rate": 8.360022689416324e-08, "loss": 0.2397, "step": 15002 }, { "epoch": 0.94, "grad_norm": 3.977769675272214, "learning_rate": 8.34148639794874e-08, "loss": 0.273, "step": 15003 }, { "epoch": 0.94, "grad_norm": 2.456322072507468, "learning_rate": 8.322970506266237e-08, "loss": 0.2705, "step": 15004 }, { "epoch": 0.94, "grad_norm": 1.5705162597152489, "learning_rate": 8.304475015136925e-08, "loss": 0.2507, "step": 15005 }, { "epoch": 0.94, "grad_norm": 1.7703958453252453, "learning_rate": 8.285999925328359e-08, "loss": 0.2599, "step": 15006 }, { "epoch": 0.94, "grad_norm": 1.6048938773161612, "learning_rate": 8.267545237607034e-08, "loss": 0.2806, "step": 15007 }, { "epoch": 0.94, "grad_norm": 1.9367012150118517, "learning_rate": 8.249110952738726e-08, "loss": 0.2694, "step": 15008 }, { "epoch": 0.94, "grad_norm": 1.542691414723443, "learning_rate": 8.230697071488159e-08, "loss": 0.2444, "step": 15009 }, { "epoch": 0.94, "grad_norm": 1.8070296743870038, "learning_rate": 8.212303594619497e-08, "loss": 0.2357, "step": 15010 }, { "epoch": 0.94, "grad_norm": 2.6064423374607273, "learning_rate": 8.193930522895799e-08, "loss": 0.2422, "step": 15011 }, { "epoch": 0.94, "grad_norm": 2.226075116398095, "learning_rate": 8.175577857079565e-08, "loss": 0.2658, "step": 15012 }, { "epoch": 0.94, "grad_norm": 2.4496437824003148, "learning_rate": 8.157245597932129e-08, "loss": 0.2333, "step": 15013 }, { "epoch": 0.94, "grad_norm": 1.6669729270492224, "learning_rate": 8.138933746214218e-08, "loss": 0.2417, "step": 15014 }, { "epoch": 0.94, "grad_norm": 2.7373856422281864, "learning_rate": 8.120642302685556e-08, "loss": 0.2746, "step": 15015 }, { "epoch": 0.94, "grad_norm": 2.5336644560674157, "learning_rate": 8.102371268105147e-08, "loss": 0.2556, "step": 15016 }, { "epoch": 0.94, "grad_norm": 2.0907280259399346, "learning_rate": 8.084120643231052e-08, "loss": 0.2781, "step": 15017 }, { "epoch": 0.94, "grad_norm": 2.130195687230351, "learning_rate": 8.0658904288205e-08, "loss": 0.2447, "step": 15018 }, { "epoch": 0.94, "grad_norm": 1.9566567457339217, "learning_rate": 8.047680625629994e-08, "loss": 0.2598, "step": 15019 }, { "epoch": 0.94, "grad_norm": 2.0714324441971175, "learning_rate": 8.029491234414987e-08, "loss": 0.245, "step": 15020 }, { "epoch": 0.94, "grad_norm": 3.337980440495657, "learning_rate": 8.01132225593021e-08, "loss": 0.2596, "step": 15021 }, { "epoch": 0.94, "grad_norm": 1.88405739578724, "learning_rate": 7.993173690929556e-08, "loss": 0.2569, "step": 15022 }, { "epoch": 0.94, "grad_norm": 10.187463976959984, "learning_rate": 7.975045540166038e-08, "loss": 0.2596, "step": 15023 }, { "epoch": 0.94, "grad_norm": 1.6669701272689876, "learning_rate": 7.956937804391829e-08, "loss": 0.2441, "step": 15024 }, { "epoch": 0.94, "grad_norm": 4.487160049490356, "learning_rate": 7.938850484358219e-08, "loss": 0.2497, "step": 15025 }, { "epoch": 0.94, "grad_norm": 4.416276728830592, "learning_rate": 7.920783580815661e-08, "loss": 0.2573, "step": 15026 }, { "epoch": 0.95, "grad_norm": 1.4075389471818622, "learning_rate": 7.90273709451389e-08, "loss": 0.2511, "step": 15027 }, { "epoch": 0.95, "grad_norm": 1.9727910733199612, "learning_rate": 7.884711026201586e-08, "loss": 0.2477, "step": 15028 }, { "epoch": 0.95, "grad_norm": 0.5693306121869144, "learning_rate": 7.866705376626704e-08, "loss": 0.4655, "step": 15029 }, { "epoch": 0.95, "grad_norm": 2.0371422961176253, "learning_rate": 7.848720146536426e-08, "loss": 0.2606, "step": 15030 }, { "epoch": 0.95, "grad_norm": 11.406852070741884, "learning_rate": 7.830755336676821e-08, "loss": 0.2479, "step": 15031 }, { "epoch": 0.95, "grad_norm": 1.5372067319191896, "learning_rate": 7.812810947793403e-08, "loss": 0.2406, "step": 15032 }, { "epoch": 0.95, "grad_norm": 2.855173786389777, "learning_rate": 7.79488698063069e-08, "loss": 0.2345, "step": 15033 }, { "epoch": 0.95, "grad_norm": 1.7803893474498058, "learning_rate": 7.776983435932306e-08, "loss": 0.2698, "step": 15034 }, { "epoch": 0.95, "grad_norm": 3.087583867651988, "learning_rate": 7.759100314441215e-08, "loss": 0.2364, "step": 15035 }, { "epoch": 0.95, "grad_norm": 2.5924903559618606, "learning_rate": 7.741237616899377e-08, "loss": 0.2318, "step": 15036 }, { "epoch": 0.95, "grad_norm": 14.902985871795474, "learning_rate": 7.723395344047924e-08, "loss": 0.2538, "step": 15037 }, { "epoch": 0.95, "grad_norm": 3.4096437595077678, "learning_rate": 7.705573496627205e-08, "loss": 0.2553, "step": 15038 }, { "epoch": 0.95, "grad_norm": 1.9456729915431172, "learning_rate": 7.687772075376632e-08, "loss": 0.2485, "step": 15039 }, { "epoch": 0.95, "grad_norm": 2.4682587504945, "learning_rate": 7.669991081034889e-08, "loss": 0.256, "step": 15040 }, { "epoch": 0.95, "grad_norm": 1.8428692360405576, "learning_rate": 7.65223051433961e-08, "loss": 0.2554, "step": 15041 }, { "epoch": 0.95, "grad_norm": 3.502670488676359, "learning_rate": 7.634490376027814e-08, "loss": 0.2487, "step": 15042 }, { "epoch": 0.95, "grad_norm": 1.9273363329139828, "learning_rate": 7.616770666835582e-08, "loss": 0.2646, "step": 15043 }, { "epoch": 0.95, "grad_norm": 1.7949329714879687, "learning_rate": 7.599071387498102e-08, "loss": 0.2502, "step": 15044 }, { "epoch": 0.95, "grad_norm": 1.5112986784603015, "learning_rate": 7.581392538749732e-08, "loss": 0.2472, "step": 15045 }, { "epoch": 0.95, "grad_norm": 3.149502030469735, "learning_rate": 7.563734121324052e-08, "loss": 0.2416, "step": 15046 }, { "epoch": 0.95, "grad_norm": 2.9709106388728097, "learning_rate": 7.546096135953696e-08, "loss": 0.2388, "step": 15047 }, { "epoch": 0.95, "grad_norm": 1.7272547408062873, "learning_rate": 7.528478583370469e-08, "loss": 0.2577, "step": 15048 }, { "epoch": 0.95, "grad_norm": 2.3685895048468675, "learning_rate": 7.510881464305452e-08, "loss": 0.2716, "step": 15049 }, { "epoch": 0.95, "grad_norm": 1.436988767256087, "learning_rate": 7.493304779488675e-08, "loss": 0.2371, "step": 15050 }, { "epoch": 0.95, "grad_norm": 1.9474019351298308, "learning_rate": 7.475748529649496e-08, "loss": 0.2501, "step": 15051 }, { "epoch": 0.95, "grad_norm": 2.414570049926212, "learning_rate": 7.458212715516389e-08, "loss": 0.2675, "step": 15052 }, { "epoch": 0.95, "grad_norm": 1.5213522155912707, "learning_rate": 7.440697337816771e-08, "loss": 0.2414, "step": 15053 }, { "epoch": 0.95, "grad_norm": 3.6456077863037066, "learning_rate": 7.423202397277618e-08, "loss": 0.2575, "step": 15054 }, { "epoch": 0.95, "grad_norm": 1.9318247935661166, "learning_rate": 7.405727894624626e-08, "loss": 0.2592, "step": 15055 }, { "epoch": 0.95, "grad_norm": 2.316707003497598, "learning_rate": 7.388273830582937e-08, "loss": 0.2544, "step": 15056 }, { "epoch": 0.95, "grad_norm": 1.678109446238628, "learning_rate": 7.370840205876806e-08, "loss": 0.2528, "step": 15057 }, { "epoch": 0.95, "grad_norm": 3.0675570350701333, "learning_rate": 7.353427021229486e-08, "loss": 0.2776, "step": 15058 }, { "epoch": 0.95, "grad_norm": 2.1464382725465234, "learning_rate": 7.336034277363513e-08, "loss": 0.2456, "step": 15059 }, { "epoch": 0.95, "grad_norm": 3.433912470881727, "learning_rate": 7.318661975000585e-08, "loss": 0.2528, "step": 15060 }, { "epoch": 0.95, "grad_norm": 1.9775996362674466, "learning_rate": 7.301310114861404e-08, "loss": 0.2734, "step": 15061 }, { "epoch": 0.95, "grad_norm": 2.356976812157187, "learning_rate": 7.283978697666005e-08, "loss": 0.2637, "step": 15062 }, { "epoch": 0.95, "grad_norm": 1.4432642208060746, "learning_rate": 7.266667724133536e-08, "loss": 0.239, "step": 15063 }, { "epoch": 0.95, "grad_norm": 1.875363497132506, "learning_rate": 7.249377194982199e-08, "loss": 0.2493, "step": 15064 }, { "epoch": 0.95, "grad_norm": 2.750832913721003, "learning_rate": 7.23210711092942e-08, "loss": 0.2551, "step": 15065 }, { "epoch": 0.95, "grad_norm": 3.1965919730419743, "learning_rate": 7.214857472691794e-08, "loss": 0.2591, "step": 15066 }, { "epoch": 0.95, "grad_norm": 3.4181288946124706, "learning_rate": 7.19762828098497e-08, "loss": 0.2371, "step": 15067 }, { "epoch": 0.95, "grad_norm": 1.9364263772404247, "learning_rate": 7.18041953652393e-08, "loss": 0.2429, "step": 15068 }, { "epoch": 0.95, "grad_norm": 7.27039854486909, "learning_rate": 7.16323124002255e-08, "loss": 0.2847, "step": 15069 }, { "epoch": 0.95, "grad_norm": 2.423011681705568, "learning_rate": 7.146063392194147e-08, "loss": 0.2654, "step": 15070 }, { "epoch": 0.95, "grad_norm": 1.917807672109599, "learning_rate": 7.128915993750929e-08, "loss": 0.2587, "step": 15071 }, { "epoch": 0.95, "grad_norm": 1.7531678404609035, "learning_rate": 7.111789045404493e-08, "loss": 0.2276, "step": 15072 }, { "epoch": 0.95, "grad_norm": 1.6049922607264762, "learning_rate": 7.094682547865328e-08, "loss": 0.2468, "step": 15073 }, { "epoch": 0.95, "grad_norm": 2.1576768292165927, "learning_rate": 7.07759650184331e-08, "loss": 0.2592, "step": 15074 }, { "epoch": 0.95, "grad_norm": 11.952215411700562, "learning_rate": 7.06053090804737e-08, "loss": 0.251, "step": 15075 }, { "epoch": 0.95, "grad_norm": 2.157583088277711, "learning_rate": 7.043485767185554e-08, "loss": 0.2449, "step": 15076 }, { "epoch": 0.95, "grad_norm": 0.5970967109403809, "learning_rate": 7.026461079965075e-08, "loss": 0.4728, "step": 15077 }, { "epoch": 0.95, "grad_norm": 1.564200936615172, "learning_rate": 7.009456847092311e-08, "loss": 0.2579, "step": 15078 }, { "epoch": 0.95, "grad_norm": 1.3959489720807112, "learning_rate": 6.99247306927292e-08, "loss": 0.2551, "step": 15079 }, { "epoch": 0.95, "grad_norm": 1.7185456841383286, "learning_rate": 6.975509747211451e-08, "loss": 0.2431, "step": 15080 }, { "epoch": 0.95, "grad_norm": 2.557992125173991, "learning_rate": 6.958566881611783e-08, "loss": 0.2555, "step": 15081 }, { "epoch": 0.95, "grad_norm": 2.473151134879203, "learning_rate": 6.941644473176967e-08, "loss": 0.2649, "step": 15082 }, { "epoch": 0.95, "grad_norm": 1.8596363940244411, "learning_rate": 6.924742522609052e-08, "loss": 0.2565, "step": 15083 }, { "epoch": 0.95, "grad_norm": 3.3633102982141923, "learning_rate": 6.90786103060942e-08, "loss": 0.2463, "step": 15084 }, { "epoch": 0.95, "grad_norm": 2.0047519931535174, "learning_rate": 6.890999997878345e-08, "loss": 0.2507, "step": 15085 }, { "epoch": 0.95, "grad_norm": 1.9008710161498206, "learning_rate": 6.8741594251156e-08, "loss": 0.2304, "step": 15086 }, { "epoch": 0.95, "grad_norm": 2.4535875985794044, "learning_rate": 6.857339313019906e-08, "loss": 0.2404, "step": 15087 }, { "epoch": 0.95, "grad_norm": 2.2091228671246594, "learning_rate": 6.840539662289148e-08, "loss": 0.2338, "step": 15088 }, { "epoch": 0.95, "grad_norm": 1.5789767588666381, "learning_rate": 6.823760473620267e-08, "loss": 0.2537, "step": 15089 }, { "epoch": 0.95, "grad_norm": 1.5449648442695354, "learning_rate": 6.807001747709596e-08, "loss": 0.2328, "step": 15090 }, { "epoch": 0.95, "grad_norm": 1.4428670856828945, "learning_rate": 6.790263485252413e-08, "loss": 0.2286, "step": 15091 }, { "epoch": 0.95, "grad_norm": 1.9390355575658578, "learning_rate": 6.773545686943272e-08, "loss": 0.2468, "step": 15092 }, { "epoch": 0.95, "grad_norm": 3.6580568443351344, "learning_rate": 6.756848353475732e-08, "loss": 0.2672, "step": 15093 }, { "epoch": 0.95, "grad_norm": 1.6013723138859637, "learning_rate": 6.740171485542624e-08, "loss": 0.2356, "step": 15094 }, { "epoch": 0.95, "grad_norm": 1.8487835934831445, "learning_rate": 6.723515083836008e-08, "loss": 0.2636, "step": 15095 }, { "epoch": 0.95, "grad_norm": 3.0637371279345085, "learning_rate": 6.706879149046886e-08, "loss": 0.2614, "step": 15096 }, { "epoch": 0.95, "grad_norm": 3.01843306386544, "learning_rate": 6.690263681865539e-08, "loss": 0.2572, "step": 15097 }, { "epoch": 0.95, "grad_norm": 2.2889947065763074, "learning_rate": 6.67366868298136e-08, "loss": 0.2775, "step": 15098 }, { "epoch": 0.95, "grad_norm": 1.7265614047933293, "learning_rate": 6.657094153082911e-08, "loss": 0.2431, "step": 15099 }, { "epoch": 0.95, "grad_norm": 1.9326881289441615, "learning_rate": 6.640540092857916e-08, "loss": 0.2275, "step": 15100 }, { "epoch": 0.95, "grad_norm": 1.4671067198548577, "learning_rate": 6.624006502993219e-08, "loss": 0.2304, "step": 15101 }, { "epoch": 0.95, "grad_norm": 5.3049156344853685, "learning_rate": 6.607493384174768e-08, "loss": 0.2589, "step": 15102 }, { "epoch": 0.95, "grad_norm": 1.7234941969486317, "learning_rate": 6.59100073708785e-08, "loss": 0.2327, "step": 15103 }, { "epoch": 0.95, "grad_norm": 2.9464370795901975, "learning_rate": 6.574528562416693e-08, "loss": 0.2623, "step": 15104 }, { "epoch": 0.95, "grad_norm": 2.2161176030459635, "learning_rate": 6.558076860844752e-08, "loss": 0.2487, "step": 15105 }, { "epoch": 0.95, "grad_norm": 1.4946278049032118, "learning_rate": 6.54164563305465e-08, "loss": 0.2314, "step": 15106 }, { "epoch": 0.95, "grad_norm": 2.9899022284806125, "learning_rate": 6.525234879728171e-08, "loss": 0.2554, "step": 15107 }, { "epoch": 0.95, "grad_norm": 2.34642547748058, "learning_rate": 6.508844601546216e-08, "loss": 0.2299, "step": 15108 }, { "epoch": 0.95, "grad_norm": 1.7554524323469507, "learning_rate": 6.492474799188797e-08, "loss": 0.2528, "step": 15109 }, { "epoch": 0.95, "grad_norm": 1.8134892649807535, "learning_rate": 6.476125473335149e-08, "loss": 0.2243, "step": 15110 }, { "epoch": 0.95, "grad_norm": 1.700990344186364, "learning_rate": 6.459796624663728e-08, "loss": 0.2436, "step": 15111 }, { "epoch": 0.95, "grad_norm": 2.1122002978515972, "learning_rate": 6.443488253851937e-08, "loss": 0.2323, "step": 15112 }, { "epoch": 0.95, "grad_norm": 1.8460731893107822, "learning_rate": 6.427200361576458e-08, "loss": 0.2474, "step": 15113 }, { "epoch": 0.95, "grad_norm": 1.770180126530419, "learning_rate": 6.410932948513138e-08, "loss": 0.2674, "step": 15114 }, { "epoch": 0.95, "grad_norm": 1.761875834139763, "learning_rate": 6.394686015336882e-08, "loss": 0.2701, "step": 15115 }, { "epoch": 0.95, "grad_norm": 3.8154205062658497, "learning_rate": 6.378459562721928e-08, "loss": 0.2609, "step": 15116 }, { "epoch": 0.95, "grad_norm": 2.0372390233772855, "learning_rate": 6.362253591341405e-08, "loss": 0.2741, "step": 15117 }, { "epoch": 0.95, "grad_norm": 6.583921479661432, "learning_rate": 6.346068101867775e-08, "loss": 0.2517, "step": 15118 }, { "epoch": 0.95, "grad_norm": 1.5572213060348525, "learning_rate": 6.329903094972612e-08, "loss": 0.2502, "step": 15119 }, { "epoch": 0.95, "grad_norm": 1.6531779228784358, "learning_rate": 6.313758571326656e-08, "loss": 0.2426, "step": 15120 }, { "epoch": 0.95, "grad_norm": 1.9409091805365677, "learning_rate": 6.29763453159965e-08, "loss": 0.2331, "step": 15121 }, { "epoch": 0.95, "grad_norm": 3.0001766903916027, "learning_rate": 6.281530976460781e-08, "loss": 0.2695, "step": 15122 }, { "epoch": 0.95, "grad_norm": 3.1533895809246753, "learning_rate": 6.265447906578126e-08, "loss": 0.282, "step": 15123 }, { "epoch": 0.95, "grad_norm": 1.6730403870285562, "learning_rate": 6.24938532261904e-08, "loss": 0.2532, "step": 15124 }, { "epoch": 0.95, "grad_norm": 1.7155203670651034, "learning_rate": 6.233343225249933e-08, "loss": 0.2575, "step": 15125 }, { "epoch": 0.95, "grad_norm": 4.999608808053576, "learning_rate": 6.217321615136385e-08, "loss": 0.2409, "step": 15126 }, { "epoch": 0.95, "grad_norm": 0.5850726638459498, "learning_rate": 6.201320492943252e-08, "loss": 0.4774, "step": 15127 }, { "epoch": 0.95, "grad_norm": 0.6008261510209832, "learning_rate": 6.18533985933445e-08, "loss": 0.465, "step": 15128 }, { "epoch": 0.95, "grad_norm": 1.5886253994562793, "learning_rate": 6.169379714973e-08, "loss": 0.2581, "step": 15129 }, { "epoch": 0.95, "grad_norm": 1.8513448530907524, "learning_rate": 6.153440060521099e-08, "loss": 0.2394, "step": 15130 }, { "epoch": 0.95, "grad_norm": 1.9469107701208122, "learning_rate": 6.137520896640159e-08, "loss": 0.2506, "step": 15131 }, { "epoch": 0.95, "grad_norm": 1.7405142829403057, "learning_rate": 6.121622223990708e-08, "loss": 0.2295, "step": 15132 }, { "epoch": 0.95, "grad_norm": 2.4737681393194144, "learning_rate": 6.105744043232331e-08, "loss": 0.2386, "step": 15133 }, { "epoch": 0.95, "grad_norm": 1.862263669110029, "learning_rate": 6.089886355023889e-08, "loss": 0.2727, "step": 15134 }, { "epoch": 0.95, "grad_norm": 1.8948451670601327, "learning_rate": 6.074049160023355e-08, "loss": 0.2458, "step": 15135 }, { "epoch": 0.95, "grad_norm": 1.8907923683215584, "learning_rate": 6.05823245888787e-08, "loss": 0.2381, "step": 15136 }, { "epoch": 0.95, "grad_norm": 1.933079028711817, "learning_rate": 6.042436252273631e-08, "loss": 0.2426, "step": 15137 }, { "epoch": 0.95, "grad_norm": 0.6434428434070673, "learning_rate": 6.026660540836005e-08, "loss": 0.4853, "step": 15138 }, { "epoch": 0.95, "grad_norm": 0.6234215158027452, "learning_rate": 6.010905325229744e-08, "loss": 0.4676, "step": 15139 }, { "epoch": 0.95, "grad_norm": 2.0697498060359614, "learning_rate": 5.99517060610838e-08, "loss": 0.2556, "step": 15140 }, { "epoch": 0.95, "grad_norm": 2.6493974884538214, "learning_rate": 5.979456384124893e-08, "loss": 0.269, "step": 15141 }, { "epoch": 0.95, "grad_norm": 1.8341882391238067, "learning_rate": 5.963762659931205e-08, "loss": 0.2673, "step": 15142 }, { "epoch": 0.95, "grad_norm": 2.287705961590037, "learning_rate": 5.948089434178517e-08, "loss": 0.249, "step": 15143 }, { "epoch": 0.95, "grad_norm": 1.897937430213338, "learning_rate": 5.932436707517142e-08, "loss": 0.2577, "step": 15144 }, { "epoch": 0.95, "grad_norm": 1.8812675656392879, "learning_rate": 5.91680448059645e-08, "loss": 0.247, "step": 15145 }, { "epoch": 0.95, "grad_norm": 2.3695730808633724, "learning_rate": 5.901192754065199e-08, "loss": 0.2478, "step": 15146 }, { "epoch": 0.95, "grad_norm": 2.3134578969140613, "learning_rate": 5.8856015285710946e-08, "loss": 0.2592, "step": 15147 }, { "epoch": 0.95, "grad_norm": 2.519528924904941, "learning_rate": 5.870030804761007e-08, "loss": 0.2606, "step": 15148 }, { "epoch": 0.95, "grad_norm": 1.8495245079254352, "learning_rate": 5.8544805832810304e-08, "loss": 0.2696, "step": 15149 }, { "epoch": 0.95, "grad_norm": 1.6019247537774153, "learning_rate": 5.838950864776316e-08, "loss": 0.27, "step": 15150 }, { "epoch": 0.95, "grad_norm": 2.1327114632354314, "learning_rate": 5.8234416498912925e-08, "loss": 0.2542, "step": 15151 }, { "epoch": 0.95, "grad_norm": 2.571365519697078, "learning_rate": 5.8079529392694453e-08, "loss": 0.2636, "step": 15152 }, { "epoch": 0.95, "grad_norm": 3.16703759712683, "learning_rate": 5.792484733553372e-08, "loss": 0.2447, "step": 15153 }, { "epoch": 0.95, "grad_norm": 2.010961220939667, "learning_rate": 5.7770370333849466e-08, "loss": 0.2569, "step": 15154 }, { "epoch": 0.95, "grad_norm": 1.640572411925826, "learning_rate": 5.7616098394051026e-08, "loss": 0.262, "step": 15155 }, { "epoch": 0.95, "grad_norm": 1.5472826722278206, "learning_rate": 5.746203152253937e-08, "loss": 0.2489, "step": 15156 }, { "epoch": 0.95, "grad_norm": 3.4749328699426574, "learning_rate": 5.730816972570663e-08, "loss": 0.249, "step": 15157 }, { "epoch": 0.95, "grad_norm": 2.423856974609935, "learning_rate": 5.715451300993713e-08, "loss": 0.2591, "step": 15158 }, { "epoch": 0.95, "grad_norm": 2.009584840737862, "learning_rate": 5.700106138160688e-08, "loss": 0.24, "step": 15159 }, { "epoch": 0.95, "grad_norm": 1.3581814776620722, "learning_rate": 5.684781484708135e-08, "loss": 0.2549, "step": 15160 }, { "epoch": 0.95, "grad_norm": 1.9425870284310038, "learning_rate": 5.6694773412721e-08, "loss": 0.2465, "step": 15161 }, { "epoch": 0.95, "grad_norm": 1.2810184054673877, "learning_rate": 5.65419370848741e-08, "loss": 0.2414, "step": 15162 }, { "epoch": 0.95, "grad_norm": 2.703815186506678, "learning_rate": 5.638930586988334e-08, "loss": 0.2448, "step": 15163 }, { "epoch": 0.95, "grad_norm": 0.6213095471075559, "learning_rate": 5.623687977408088e-08, "loss": 0.4262, "step": 15164 }, { "epoch": 0.95, "grad_norm": 1.682341354058051, "learning_rate": 5.6084658803791656e-08, "loss": 0.2496, "step": 15165 }, { "epoch": 0.95, "grad_norm": 3.215689411723757, "learning_rate": 5.5932642965331184e-08, "loss": 0.2569, "step": 15166 }, { "epoch": 0.95, "grad_norm": 0.6587726584601461, "learning_rate": 5.578083226500663e-08, "loss": 0.4948, "step": 15167 }, { "epoch": 0.95, "grad_norm": 11.168351878895283, "learning_rate": 5.562922670911741e-08, "loss": 0.2371, "step": 15168 }, { "epoch": 0.95, "grad_norm": 1.8955329916455563, "learning_rate": 5.547782630395404e-08, "loss": 0.2467, "step": 15169 }, { "epoch": 0.95, "grad_norm": 2.588851101880383, "learning_rate": 5.532663105579761e-08, "loss": 0.2449, "step": 15170 }, { "epoch": 0.95, "grad_norm": 1.6489272704683102, "learning_rate": 5.517564097092254e-08, "loss": 0.2478, "step": 15171 }, { "epoch": 0.95, "grad_norm": 16.90788333060237, "learning_rate": 5.5024856055593266e-08, "loss": 0.2429, "step": 15172 }, { "epoch": 0.95, "grad_norm": 2.28757202297265, "learning_rate": 5.487427631606534e-08, "loss": 0.2411, "step": 15173 }, { "epoch": 0.95, "grad_norm": 1.8575776825344417, "learning_rate": 5.472390175858766e-08, "loss": 0.2436, "step": 15174 }, { "epoch": 0.95, "grad_norm": 2.2881137453485976, "learning_rate": 5.457373238939856e-08, "loss": 0.2565, "step": 15175 }, { "epoch": 0.95, "grad_norm": 1.8198144124812337, "learning_rate": 5.4423768214730274e-08, "loss": 0.2694, "step": 15176 }, { "epoch": 0.95, "grad_norm": 2.8031232580146708, "learning_rate": 5.427400924080395e-08, "loss": 0.267, "step": 15177 }, { "epoch": 0.95, "grad_norm": 2.6278084081181734, "learning_rate": 5.4124455473832935e-08, "loss": 0.2529, "step": 15178 }, { "epoch": 0.95, "grad_norm": 2.021537135167698, "learning_rate": 5.397510692002339e-08, "loss": 0.2541, "step": 15179 }, { "epoch": 0.95, "grad_norm": 3.402138276385479, "learning_rate": 5.3825963585572015e-08, "loss": 0.2371, "step": 15180 }, { "epoch": 0.95, "grad_norm": 1.8101820977372205, "learning_rate": 5.367702547666664e-08, "loss": 0.2647, "step": 15181 }, { "epoch": 0.95, "grad_norm": 1.8894330996454136, "learning_rate": 5.3528292599486776e-08, "loss": 0.2579, "step": 15182 }, { "epoch": 0.95, "grad_norm": 1.8951110145995576, "learning_rate": 5.337976496020469e-08, "loss": 0.2417, "step": 15183 }, { "epoch": 0.95, "grad_norm": 1.9426591456784061, "learning_rate": 5.323144256498214e-08, "loss": 0.2446, "step": 15184 }, { "epoch": 0.95, "grad_norm": 1.4369025705594134, "learning_rate": 5.308332541997363e-08, "loss": 0.2347, "step": 15185 }, { "epoch": 0.96, "grad_norm": 1.8318883764582616, "learning_rate": 5.293541353132425e-08, "loss": 0.2488, "step": 15186 }, { "epoch": 0.96, "grad_norm": 1.6685713685598524, "learning_rate": 5.2787706905171874e-08, "loss": 0.2597, "step": 15187 }, { "epoch": 0.96, "grad_norm": 1.5352402352444785, "learning_rate": 5.264020554764438e-08, "loss": 0.2425, "step": 15188 }, { "epoch": 0.96, "grad_norm": 1.7706277661782548, "learning_rate": 5.249290946486241e-08, "loss": 0.2435, "step": 15189 }, { "epoch": 0.96, "grad_norm": 4.736725762980753, "learning_rate": 5.23458186629372e-08, "loss": 0.2516, "step": 15190 }, { "epoch": 0.96, "grad_norm": 3.764797371212197, "learning_rate": 5.2198933147972194e-08, "loss": 0.2485, "step": 15191 }, { "epoch": 0.96, "grad_norm": 2.0022105372767873, "learning_rate": 5.205225292606198e-08, "loss": 0.2567, "step": 15192 }, { "epoch": 0.96, "grad_norm": 2.594500950542812, "learning_rate": 5.1905778003292216e-08, "loss": 0.2422, "step": 15193 }, { "epoch": 0.96, "grad_norm": 1.9204515577690455, "learning_rate": 5.175950838574029e-08, "loss": 0.2449, "step": 15194 }, { "epoch": 0.96, "grad_norm": 1.5331104549298378, "learning_rate": 5.1613444079475774e-08, "loss": 0.2333, "step": 15195 }, { "epoch": 0.96, "grad_norm": 2.47501339219459, "learning_rate": 5.1467585090558825e-08, "loss": 0.2581, "step": 15196 }, { "epoch": 0.96, "grad_norm": 2.684539632185198, "learning_rate": 5.132193142504072e-08, "loss": 0.247, "step": 15197 }, { "epoch": 0.96, "grad_norm": 2.027822068369087, "learning_rate": 5.1176483088966054e-08, "loss": 0.265, "step": 15198 }, { "epoch": 0.96, "grad_norm": 2.6415789247623427, "learning_rate": 5.103124008836891e-08, "loss": 0.2477, "step": 15199 }, { "epoch": 0.96, "grad_norm": 8.164109607072536, "learning_rate": 5.088620242927666e-08, "loss": 0.2474, "step": 15200 }, { "epoch": 0.96, "grad_norm": 2.494603751571222, "learning_rate": 5.0741370117705634e-08, "loss": 0.2491, "step": 15201 }, { "epoch": 0.96, "grad_norm": 2.0621501866695215, "learning_rate": 5.0596743159666565e-08, "loss": 0.2587, "step": 15202 }, { "epoch": 0.96, "grad_norm": 1.7005264148884391, "learning_rate": 5.0452321561159646e-08, "loss": 0.2458, "step": 15203 }, { "epoch": 0.96, "grad_norm": 2.2018407435742287, "learning_rate": 5.030810532817732e-08, "loss": 0.256, "step": 15204 }, { "epoch": 0.96, "grad_norm": 2.7323948238513522, "learning_rate": 5.016409446670312e-08, "loss": 0.2473, "step": 15205 }, { "epoch": 0.96, "grad_norm": 3.363654318878327, "learning_rate": 5.002028898271227e-08, "loss": 0.2645, "step": 15206 }, { "epoch": 0.96, "grad_norm": 2.236699145240007, "learning_rate": 4.987668888217223e-08, "loss": 0.2473, "step": 15207 }, { "epoch": 0.96, "grad_norm": 2.194310880329028, "learning_rate": 4.9733294171041e-08, "loss": 0.2478, "step": 15208 }, { "epoch": 0.96, "grad_norm": 3.8973970223237857, "learning_rate": 4.959010485526772e-08, "loss": 0.2526, "step": 15209 }, { "epoch": 0.96, "grad_norm": 0.6451938731953772, "learning_rate": 4.9447120940793735e-08, "loss": 0.466, "step": 15210 }, { "epoch": 0.96, "grad_norm": 3.048792251373977, "learning_rate": 4.9304342433552086e-08, "loss": 0.2539, "step": 15211 }, { "epoch": 0.96, "grad_norm": 5.019522967877576, "learning_rate": 4.9161769339466926e-08, "loss": 0.236, "step": 15212 }, { "epoch": 0.96, "grad_norm": 1.906157398199811, "learning_rate": 4.9019401664453516e-08, "loss": 0.2635, "step": 15213 }, { "epoch": 0.96, "grad_norm": 2.303859144721179, "learning_rate": 4.887723941441824e-08, "loss": 0.2594, "step": 15214 }, { "epoch": 0.96, "grad_norm": 2.3740232477896925, "learning_rate": 4.8735282595261393e-08, "loss": 0.2337, "step": 15215 }, { "epoch": 0.96, "grad_norm": 4.811416062202169, "learning_rate": 4.8593531212872136e-08, "loss": 0.268, "step": 15216 }, { "epoch": 0.96, "grad_norm": 2.247098452250411, "learning_rate": 4.845198527313188e-08, "loss": 0.2526, "step": 15217 }, { "epoch": 0.96, "grad_norm": 1.3133988129658465, "learning_rate": 4.831064478191316e-08, "loss": 0.2411, "step": 15218 }, { "epoch": 0.96, "grad_norm": 3.008405293056618, "learning_rate": 4.8169509745081275e-08, "loss": 0.2535, "step": 15219 }, { "epoch": 0.96, "grad_norm": 3.710309663320064, "learning_rate": 4.80285801684921e-08, "loss": 0.2553, "step": 15220 }, { "epoch": 0.96, "grad_norm": 1.9966221546370129, "learning_rate": 4.7887856057993175e-08, "loss": 0.2604, "step": 15221 }, { "epoch": 0.96, "grad_norm": 2.1092487647744567, "learning_rate": 4.774733741942206e-08, "loss": 0.2524, "step": 15222 }, { "epoch": 0.96, "grad_norm": 0.5856665390191502, "learning_rate": 4.7607024258610744e-08, "loss": 0.4571, "step": 15223 }, { "epoch": 0.96, "grad_norm": 1.4573738058872443, "learning_rate": 4.746691658138014e-08, "loss": 0.2579, "step": 15224 }, { "epoch": 0.96, "grad_norm": 3.677805411856043, "learning_rate": 4.732701439354448e-08, "loss": 0.2383, "step": 15225 }, { "epoch": 0.96, "grad_norm": 3.5062308680197356, "learning_rate": 4.7187317700906896e-08, "loss": 0.257, "step": 15226 }, { "epoch": 0.96, "grad_norm": 3.9946536866791273, "learning_rate": 4.704782650926554e-08, "loss": 0.249, "step": 15227 }, { "epoch": 0.96, "grad_norm": 1.8148324274248189, "learning_rate": 4.6908540824406876e-08, "loss": 0.2462, "step": 15228 }, { "epoch": 0.96, "grad_norm": 2.0039248350527212, "learning_rate": 4.676946065211074e-08, "loss": 0.236, "step": 15229 }, { "epoch": 0.96, "grad_norm": 4.078730454449582, "learning_rate": 4.6630585998147515e-08, "loss": 0.2339, "step": 15230 }, { "epoch": 0.96, "grad_norm": 4.510118986408492, "learning_rate": 4.649191686827925e-08, "loss": 0.2611, "step": 15231 }, { "epoch": 0.96, "grad_norm": 2.525831984473719, "learning_rate": 4.635345326826024e-08, "loss": 0.2518, "step": 15232 }, { "epoch": 0.96, "grad_norm": 7.9945673532516714, "learning_rate": 4.621519520383477e-08, "loss": 0.2547, "step": 15233 }, { "epoch": 0.96, "grad_norm": 2.5247985117279472, "learning_rate": 4.607714268073937e-08, "loss": 0.2339, "step": 15234 }, { "epoch": 0.96, "grad_norm": 2.0937951549587672, "learning_rate": 4.5939295704703344e-08, "loss": 0.2394, "step": 15235 }, { "epoch": 0.96, "grad_norm": 2.5020797471347267, "learning_rate": 4.58016542814449e-08, "loss": 0.2435, "step": 15236 }, { "epoch": 0.96, "grad_norm": 1.6930752115180783, "learning_rate": 4.5664218416675565e-08, "loss": 0.2256, "step": 15237 }, { "epoch": 0.96, "grad_norm": 1.582958312268396, "learning_rate": 4.5526988116097457e-08, "loss": 0.2464, "step": 15238 }, { "epoch": 0.96, "grad_norm": 3.2690202315888017, "learning_rate": 4.5389963385405467e-08, "loss": 0.2457, "step": 15239 }, { "epoch": 0.96, "grad_norm": 2.265770290941597, "learning_rate": 4.525314423028393e-08, "loss": 0.2421, "step": 15240 }, { "epoch": 0.96, "grad_norm": 1.7212025690661907, "learning_rate": 4.5116530656409415e-08, "loss": 0.2394, "step": 15241 }, { "epoch": 0.96, "grad_norm": 1.6488513883421396, "learning_rate": 4.498012266945129e-08, "loss": 0.2444, "step": 15242 }, { "epoch": 0.96, "grad_norm": 2.300497615099552, "learning_rate": 4.4843920275068896e-08, "loss": 0.2392, "step": 15243 }, { "epoch": 0.96, "grad_norm": 1.5748133550149743, "learning_rate": 4.470792347891384e-08, "loss": 0.2573, "step": 15244 }, { "epoch": 0.96, "grad_norm": 1.558385985174884, "learning_rate": 4.457213228662882e-08, "loss": 0.2549, "step": 15245 }, { "epoch": 0.96, "grad_norm": 4.0750824986953065, "learning_rate": 4.443654670384767e-08, "loss": 0.2583, "step": 15246 }, { "epoch": 0.96, "grad_norm": 1.5004155359426379, "learning_rate": 4.430116673619589e-08, "loss": 0.2519, "step": 15247 }, { "epoch": 0.96, "grad_norm": 2.177245468921564, "learning_rate": 4.4165992389291754e-08, "loss": 0.2343, "step": 15248 }, { "epoch": 0.96, "grad_norm": 2.55994081459802, "learning_rate": 4.4031023668742454e-08, "loss": 0.2513, "step": 15249 }, { "epoch": 0.96, "grad_norm": 2.236676502308055, "learning_rate": 4.3896260580149067e-08, "loss": 0.2675, "step": 15250 }, { "epoch": 0.96, "grad_norm": 1.4437197521405114, "learning_rate": 4.3761703129102664e-08, "loss": 0.2501, "step": 15251 }, { "epoch": 0.96, "grad_norm": 4.823777221525274, "learning_rate": 4.362735132118656e-08, "loss": 0.2368, "step": 15252 }, { "epoch": 0.96, "grad_norm": 1.5703363684189728, "learning_rate": 4.349320516197575e-08, "loss": 0.2443, "step": 15253 }, { "epoch": 0.96, "grad_norm": 2.0679800829988095, "learning_rate": 4.335926465703466e-08, "loss": 0.2592, "step": 15254 }, { "epoch": 0.96, "grad_norm": 2.356343132527862, "learning_rate": 4.3225529811922186e-08, "loss": 0.2567, "step": 15255 }, { "epoch": 0.96, "grad_norm": 1.9619603710102285, "learning_rate": 4.309200063218666e-08, "loss": 0.2355, "step": 15256 }, { "epoch": 0.96, "grad_norm": 1.7831380507861792, "learning_rate": 4.29586771233681e-08, "loss": 0.2513, "step": 15257 }, { "epoch": 0.96, "grad_norm": 1.860489360122903, "learning_rate": 4.28255592909993e-08, "loss": 0.248, "step": 15258 }, { "epoch": 0.96, "grad_norm": 2.005892768479395, "learning_rate": 4.2692647140602507e-08, "loss": 0.2482, "step": 15259 }, { "epoch": 0.96, "grad_norm": 2.023296402407743, "learning_rate": 4.255994067769331e-08, "loss": 0.2537, "step": 15260 }, { "epoch": 0.96, "grad_norm": 2.220633330102731, "learning_rate": 4.2427439907777866e-08, "loss": 0.2471, "step": 15261 }, { "epoch": 0.96, "grad_norm": 3.869521845471784, "learning_rate": 4.229514483635288e-08, "loss": 0.2561, "step": 15262 }, { "epoch": 0.96, "grad_norm": 1.8901609326295237, "learning_rate": 4.216305546890842e-08, "loss": 0.2535, "step": 15263 }, { "epoch": 0.96, "grad_norm": 2.390020115903151, "learning_rate": 4.2031171810925083e-08, "loss": 0.2356, "step": 15264 }, { "epoch": 0.96, "grad_norm": 0.5936812413296529, "learning_rate": 4.189949386787462e-08, "loss": 0.4859, "step": 15265 }, { "epoch": 0.96, "grad_norm": 3.337940223191999, "learning_rate": 4.176802164522042e-08, "loss": 0.2596, "step": 15266 }, { "epoch": 0.96, "grad_norm": 2.4417081749550698, "learning_rate": 4.163675514841814e-08, "loss": 0.2438, "step": 15267 }, { "epoch": 0.96, "grad_norm": 2.660431688649492, "learning_rate": 4.1505694382913965e-08, "loss": 0.2727, "step": 15268 }, { "epoch": 0.96, "grad_norm": 14.53405796262389, "learning_rate": 4.1374839354146325e-08, "loss": 0.2781, "step": 15269 }, { "epoch": 0.96, "grad_norm": 2.377763272319495, "learning_rate": 4.12441900675431e-08, "loss": 0.2716, "step": 15270 }, { "epoch": 0.96, "grad_norm": 2.9765658395738166, "learning_rate": 4.111374652852662e-08, "loss": 0.2513, "step": 15271 }, { "epoch": 0.96, "grad_norm": 1.5273943077283598, "learning_rate": 4.098350874250867e-08, "loss": 0.2389, "step": 15272 }, { "epoch": 0.96, "grad_norm": 4.407040531772963, "learning_rate": 4.085347671489382e-08, "loss": 0.2365, "step": 15273 }, { "epoch": 0.96, "grad_norm": 2.3047366264035998, "learning_rate": 4.072365045107551e-08, "loss": 0.2713, "step": 15274 }, { "epoch": 0.96, "grad_norm": 1.8629879975745975, "learning_rate": 4.059402995644224e-08, "loss": 0.2543, "step": 15275 }, { "epoch": 0.96, "grad_norm": 2.072581575390451, "learning_rate": 4.046461523637191e-08, "loss": 0.254, "step": 15276 }, { "epoch": 0.96, "grad_norm": 2.3297485353315848, "learning_rate": 4.033540629623356e-08, "loss": 0.2598, "step": 15277 }, { "epoch": 0.96, "grad_norm": 2.317567473137255, "learning_rate": 4.020640314138846e-08, "loss": 0.242, "step": 15278 }, { "epoch": 0.96, "grad_norm": 2.165336164376101, "learning_rate": 4.0077605777189e-08, "loss": 0.2333, "step": 15279 }, { "epoch": 0.96, "grad_norm": 3.028975643073122, "learning_rate": 3.9949014208979784e-08, "loss": 0.2373, "step": 15280 }, { "epoch": 0.96, "grad_norm": 7.164789527296874, "learning_rate": 3.9820628442096e-08, "loss": 0.2557, "step": 15281 }, { "epoch": 0.96, "grad_norm": 2.1806254702067998, "learning_rate": 3.969244848186449e-08, "loss": 0.267, "step": 15282 }, { "epoch": 0.96, "grad_norm": 3.0176171263476332, "learning_rate": 3.9564474333603776e-08, "loss": 0.2469, "step": 15283 }, { "epoch": 0.96, "grad_norm": 1.6824030486712855, "learning_rate": 3.943670600262406e-08, "loss": 0.2523, "step": 15284 }, { "epoch": 0.96, "grad_norm": 1.7226523653560994, "learning_rate": 3.9309143494226097e-08, "loss": 0.2443, "step": 15285 }, { "epoch": 0.96, "grad_norm": 1.582261031254253, "learning_rate": 3.918178681370288e-08, "loss": 0.2482, "step": 15286 }, { "epoch": 0.96, "grad_norm": 1.6870161425798145, "learning_rate": 3.905463596633852e-08, "loss": 0.2538, "step": 15287 }, { "epoch": 0.96, "grad_norm": 2.18981167878124, "learning_rate": 3.8927690957409893e-08, "loss": 0.2541, "step": 15288 }, { "epoch": 0.96, "grad_norm": 1.774093789964343, "learning_rate": 3.880095179218224e-08, "loss": 0.2418, "step": 15289 }, { "epoch": 0.96, "grad_norm": 1.8224984339852053, "learning_rate": 3.86744184759158e-08, "loss": 0.2511, "step": 15290 }, { "epoch": 0.96, "grad_norm": 1.8674508338644273, "learning_rate": 3.8548091013859704e-08, "loss": 0.2294, "step": 15291 }, { "epoch": 0.96, "grad_norm": 1.49393034334191, "learning_rate": 3.842196941125587e-08, "loss": 0.2419, "step": 15292 }, { "epoch": 0.96, "grad_norm": 8.091091727312069, "learning_rate": 3.8296053673337884e-08, "loss": 0.2439, "step": 15293 }, { "epoch": 0.96, "grad_norm": 9.896800212058695, "learning_rate": 3.8170343805328245e-08, "loss": 0.2341, "step": 15294 }, { "epoch": 0.96, "grad_norm": 7.168275542320641, "learning_rate": 3.8044839812445554e-08, "loss": 0.2693, "step": 15295 }, { "epoch": 0.96, "grad_norm": 1.7257924184085172, "learning_rate": 3.791954169989509e-08, "loss": 0.2476, "step": 15296 }, { "epoch": 0.96, "grad_norm": 4.648661235010644, "learning_rate": 3.7794449472877135e-08, "loss": 0.2383, "step": 15297 }, { "epoch": 0.96, "grad_norm": 1.5738737607136402, "learning_rate": 3.7669563136580875e-08, "loss": 0.2597, "step": 15298 }, { "epoch": 0.96, "grad_norm": 1.802143903144524, "learning_rate": 3.754488269618883e-08, "loss": 0.2622, "step": 15299 }, { "epoch": 0.96, "grad_norm": 2.3231835161972927, "learning_rate": 3.742040815687353e-08, "loss": 0.2636, "step": 15300 }, { "epoch": 0.96, "grad_norm": 2.4970093285623225, "learning_rate": 3.729613952380029e-08, "loss": 0.2484, "step": 15301 }, { "epoch": 0.96, "grad_norm": 3.997487054648361, "learning_rate": 3.7172076802124426e-08, "loss": 0.2698, "step": 15302 }, { "epoch": 0.96, "grad_norm": 3.5618913184894834, "learning_rate": 3.704821999699515e-08, "loss": 0.2391, "step": 15303 }, { "epoch": 0.96, "grad_norm": 1.5674667541409975, "learning_rate": 3.692456911354947e-08, "loss": 0.246, "step": 15304 }, { "epoch": 0.96, "grad_norm": 1.9085429118159454, "learning_rate": 3.6801124156919385e-08, "loss": 0.2421, "step": 15305 }, { "epoch": 0.96, "grad_norm": 1.8696135249059707, "learning_rate": 3.667788513222581e-08, "loss": 0.2791, "step": 15306 }, { "epoch": 0.96, "grad_norm": 1.8700458518702057, "learning_rate": 3.655485204458353e-08, "loss": 0.2482, "step": 15307 }, { "epoch": 0.96, "grad_norm": 1.9142051065697887, "learning_rate": 3.6432024899095694e-08, "loss": 0.2413, "step": 15308 }, { "epoch": 0.96, "grad_norm": 2.4659636876350683, "learning_rate": 3.630940370085934e-08, "loss": 0.2555, "step": 15309 }, { "epoch": 0.96, "grad_norm": 1.777845902709994, "learning_rate": 3.61869884549626e-08, "loss": 0.2485, "step": 15310 }, { "epoch": 0.96, "grad_norm": 1.8942817230681803, "learning_rate": 3.606477916648477e-08, "loss": 0.2484, "step": 15311 }, { "epoch": 0.96, "grad_norm": 1.51286077522307, "learning_rate": 3.594277584049566e-08, "loss": 0.2529, "step": 15312 }, { "epoch": 0.96, "grad_norm": 2.048395491240227, "learning_rate": 3.5820978482058454e-08, "loss": 0.2435, "step": 15313 }, { "epoch": 0.96, "grad_norm": 1.345472689640056, "learning_rate": 3.569938709622578e-08, "loss": 0.2331, "step": 15314 }, { "epoch": 0.96, "grad_norm": 1.6320413597453367, "learning_rate": 3.557800168804359e-08, "loss": 0.2619, "step": 15315 }, { "epoch": 0.96, "grad_norm": 1.702694496363717, "learning_rate": 3.545682226254732e-08, "loss": 0.2455, "step": 15316 }, { "epoch": 0.96, "grad_norm": 1.936633360459089, "learning_rate": 3.5335848824765706e-08, "loss": 0.235, "step": 15317 }, { "epoch": 0.96, "grad_norm": 1.488881124586551, "learning_rate": 3.521508137971807e-08, "loss": 0.25, "step": 15318 }, { "epoch": 0.96, "grad_norm": 1.250987039169261, "learning_rate": 3.5094519932415417e-08, "loss": 0.2662, "step": 15319 }, { "epoch": 0.96, "grad_norm": 2.0940118980209528, "learning_rate": 3.4974164487859285e-08, "loss": 0.2497, "step": 15320 }, { "epoch": 0.96, "grad_norm": 2.0866958738940644, "learning_rate": 3.485401505104458e-08, "loss": 0.276, "step": 15321 }, { "epoch": 0.96, "grad_norm": 7.139820748030344, "learning_rate": 3.473407162695508e-08, "loss": 0.2411, "step": 15322 }, { "epoch": 0.96, "grad_norm": 2.1695480514946923, "learning_rate": 3.4614334220569036e-08, "loss": 0.2628, "step": 15323 }, { "epoch": 0.96, "grad_norm": 1.8046521207051538, "learning_rate": 3.449480283685302e-08, "loss": 0.2346, "step": 15324 }, { "epoch": 0.96, "grad_norm": 1.5955712789364611, "learning_rate": 3.4375477480768084e-08, "loss": 0.2492, "step": 15325 }, { "epoch": 0.96, "grad_norm": 1.2857057519472268, "learning_rate": 3.4256358157264135e-08, "loss": 0.2481, "step": 15326 }, { "epoch": 0.96, "grad_norm": 2.2482746178734794, "learning_rate": 3.41374448712839e-08, "loss": 0.2562, "step": 15327 }, { "epoch": 0.96, "grad_norm": 3.036424485527265, "learning_rate": 3.4018737627761754e-08, "loss": 0.2627, "step": 15328 }, { "epoch": 0.96, "grad_norm": 2.8016908421713853, "learning_rate": 3.390023643162266e-08, "loss": 0.2429, "step": 15329 }, { "epoch": 0.96, "grad_norm": 1.7336543605057326, "learning_rate": 3.3781941287783224e-08, "loss": 0.2595, "step": 15330 }, { "epoch": 0.96, "grad_norm": 2.2087324847340573, "learning_rate": 3.3663852201152314e-08, "loss": 0.2451, "step": 15331 }, { "epoch": 0.96, "grad_norm": 2.0713500706815813, "learning_rate": 3.354596917662989e-08, "loss": 0.2574, "step": 15332 }, { "epoch": 0.96, "grad_norm": 2.0011230273975733, "learning_rate": 3.342829221910593e-08, "loss": 0.2434, "step": 15333 }, { "epoch": 0.96, "grad_norm": 2.4172602084071526, "learning_rate": 3.331082133346375e-08, "loss": 0.2604, "step": 15334 }, { "epoch": 0.96, "grad_norm": 1.5952833301376854, "learning_rate": 3.3193556524577784e-08, "loss": 0.2522, "step": 15335 }, { "epoch": 0.96, "grad_norm": 2.3358224769108644, "learning_rate": 3.3076497797313034e-08, "loss": 0.2514, "step": 15336 }, { "epoch": 0.96, "grad_norm": 1.7516779276850174, "learning_rate": 3.295964515652672e-08, "loss": 0.2582, "step": 15337 }, { "epoch": 0.96, "grad_norm": 1.5643769409130932, "learning_rate": 3.284299860706719e-08, "loss": 0.2499, "step": 15338 }, { "epoch": 0.96, "grad_norm": 3.434310465857878, "learning_rate": 3.27265581537739e-08, "loss": 0.2326, "step": 15339 }, { "epoch": 0.96, "grad_norm": 2.992712023337565, "learning_rate": 3.2610323801479104e-08, "loss": 0.2592, "step": 15340 }, { "epoch": 0.96, "grad_norm": 2.135565201470293, "learning_rate": 3.249429555500505e-08, "loss": 0.2453, "step": 15341 }, { "epoch": 0.96, "grad_norm": 1.8042016918879589, "learning_rate": 3.2378473419165665e-08, "loss": 0.2573, "step": 15342 }, { "epoch": 0.96, "grad_norm": 2.7511426969549913, "learning_rate": 3.226285739876711e-08, "loss": 0.2443, "step": 15343 }, { "epoch": 0.96, "grad_norm": 1.8925565070592203, "learning_rate": 3.21474474986061e-08, "loss": 0.2407, "step": 15344 }, { "epoch": 0.97, "grad_norm": 1.81522924280968, "learning_rate": 3.203224372347158e-08, "loss": 0.2434, "step": 15345 }, { "epoch": 0.97, "grad_norm": 2.4283249443979504, "learning_rate": 3.191724607814306e-08, "loss": 0.2528, "step": 15346 }, { "epoch": 0.97, "grad_norm": 2.6450169322172354, "learning_rate": 3.1802454567392837e-08, "loss": 0.2834, "step": 15347 }, { "epoch": 0.97, "grad_norm": 4.363494925275413, "learning_rate": 3.1687869195983215e-08, "loss": 0.2407, "step": 15348 }, { "epoch": 0.97, "grad_norm": 1.6525994502119377, "learning_rate": 3.157348996866816e-08, "loss": 0.2457, "step": 15349 }, { "epoch": 0.97, "grad_norm": 2.843293513615338, "learning_rate": 3.145931689019388e-08, "loss": 0.2526, "step": 15350 }, { "epoch": 0.97, "grad_norm": 1.4776435375146455, "learning_rate": 3.134534996529826e-08, "loss": 0.2527, "step": 15351 }, { "epoch": 0.97, "grad_norm": 2.082787644659762, "learning_rate": 3.123158919870917e-08, "loss": 0.2484, "step": 15352 }, { "epoch": 0.97, "grad_norm": 2.3803864607492664, "learning_rate": 3.111803459514673e-08, "loss": 0.2596, "step": 15353 }, { "epoch": 0.97, "grad_norm": 3.4063893010098014, "learning_rate": 3.1004686159322726e-08, "loss": 0.237, "step": 15354 }, { "epoch": 0.97, "grad_norm": 1.6274120290660266, "learning_rate": 3.089154389594062e-08, "loss": 0.2482, "step": 15355 }, { "epoch": 0.97, "grad_norm": 1.7085256371492532, "learning_rate": 3.0778607809694436e-08, "loss": 0.2512, "step": 15356 }, { "epoch": 0.97, "grad_norm": 1.8195484725010698, "learning_rate": 3.066587790526987e-08, "loss": 0.241, "step": 15357 }, { "epoch": 0.97, "grad_norm": 3.7289982967678537, "learning_rate": 3.055335418734429e-08, "loss": 0.2586, "step": 15358 }, { "epoch": 0.97, "grad_norm": 2.061965627953282, "learning_rate": 3.0441036660587284e-08, "loss": 0.2397, "step": 15359 }, { "epoch": 0.97, "grad_norm": 2.354734299441215, "learning_rate": 3.032892532965848e-08, "loss": 0.2589, "step": 15360 }, { "epoch": 0.97, "grad_norm": 3.004312778668439, "learning_rate": 3.021702019920969e-08, "loss": 0.2737, "step": 15361 }, { "epoch": 0.97, "grad_norm": 3.0144702909820316, "learning_rate": 3.010532127388388e-08, "loss": 0.2508, "step": 15362 }, { "epoch": 0.97, "grad_norm": 2.4394008718255376, "learning_rate": 2.999382855831623e-08, "loss": 0.2509, "step": 15363 }, { "epoch": 0.97, "grad_norm": 1.8584716934521521, "learning_rate": 2.9882542057131926e-08, "loss": 0.2515, "step": 15364 }, { "epoch": 0.97, "grad_norm": 3.0465589848756323, "learning_rate": 2.9771461774949494e-08, "loss": 0.2526, "step": 15365 }, { "epoch": 0.97, "grad_norm": 1.3813740686620206, "learning_rate": 2.9660587716376366e-08, "loss": 0.252, "step": 15366 }, { "epoch": 0.97, "grad_norm": 2.745844013136599, "learning_rate": 2.954991988601441e-08, "loss": 0.2466, "step": 15367 }, { "epoch": 0.97, "grad_norm": 2.638236706039188, "learning_rate": 2.9439458288454404e-08, "loss": 0.2594, "step": 15368 }, { "epoch": 0.97, "grad_norm": 25.038448563217997, "learning_rate": 2.9329202928280452e-08, "loss": 0.2502, "step": 15369 }, { "epoch": 0.97, "grad_norm": 1.5932460614647606, "learning_rate": 2.9219153810066124e-08, "loss": 0.2472, "step": 15370 }, { "epoch": 0.97, "grad_norm": 2.268113488709931, "learning_rate": 2.9109310938378875e-08, "loss": 0.2559, "step": 15371 }, { "epoch": 0.97, "grad_norm": 1.813926907059919, "learning_rate": 2.899967431777506e-08, "loss": 0.2567, "step": 15372 }, { "epoch": 0.97, "grad_norm": 1.452551507658095, "learning_rate": 2.889024395280493e-08, "loss": 0.2387, "step": 15373 }, { "epoch": 0.97, "grad_norm": 1.3181105248950122, "learning_rate": 2.8781019848007628e-08, "loss": 0.2358, "step": 15374 }, { "epoch": 0.97, "grad_norm": 0.6590910298493007, "learning_rate": 2.8672002007915646e-08, "loss": 0.4294, "step": 15375 }, { "epoch": 0.97, "grad_norm": 5.022896727496877, "learning_rate": 2.856319043705258e-08, "loss": 0.2814, "step": 15376 }, { "epoch": 0.97, "grad_norm": 3.3888693843039, "learning_rate": 2.8454585139933154e-08, "loss": 0.2563, "step": 15377 }, { "epoch": 0.97, "grad_norm": 1.4575689950990918, "learning_rate": 2.8346186121063214e-08, "loss": 0.2508, "step": 15378 }, { "epoch": 0.97, "grad_norm": 2.189791923584707, "learning_rate": 2.8237993384940822e-08, "loss": 0.2488, "step": 15379 }, { "epoch": 0.97, "grad_norm": 1.899432846434351, "learning_rate": 2.8130006936055167e-08, "loss": 0.2619, "step": 15380 }, { "epoch": 0.97, "grad_norm": 1.512986310092415, "learning_rate": 2.8022226778885997e-08, "loss": 0.2454, "step": 15381 }, { "epoch": 0.97, "grad_norm": 3.269107239754572, "learning_rate": 2.7914652917906405e-08, "loss": 0.2504, "step": 15382 }, { "epoch": 0.97, "grad_norm": 1.9599260607219626, "learning_rate": 2.7807285357578374e-08, "loss": 0.2604, "step": 15383 }, { "epoch": 0.97, "grad_norm": 1.6436723989716344, "learning_rate": 2.7700124102358896e-08, "loss": 0.2492, "step": 15384 }, { "epoch": 0.97, "grad_norm": 1.8879250478746306, "learning_rate": 2.759316915669219e-08, "loss": 0.2588, "step": 15385 }, { "epoch": 0.97, "grad_norm": 4.293526808767585, "learning_rate": 2.7486420525017487e-08, "loss": 0.2644, "step": 15386 }, { "epoch": 0.97, "grad_norm": 1.6808194686052513, "learning_rate": 2.737987821176291e-08, "loss": 0.2605, "step": 15387 }, { "epoch": 0.97, "grad_norm": 3.4008204885221236, "learning_rate": 2.7273542221349925e-08, "loss": 0.2593, "step": 15388 }, { "epoch": 0.97, "grad_norm": 1.9107654947024302, "learning_rate": 2.7167412558189997e-08, "loss": 0.2473, "step": 15389 }, { "epoch": 0.97, "grad_norm": 1.9418409289716292, "learning_rate": 2.7061489226686832e-08, "loss": 0.2492, "step": 15390 }, { "epoch": 0.97, "grad_norm": 9.665281881741754, "learning_rate": 2.6955772231235243e-08, "loss": 0.2605, "step": 15391 }, { "epoch": 0.97, "grad_norm": 1.6116081555458277, "learning_rate": 2.685026157622228e-08, "loss": 0.231, "step": 15392 }, { "epoch": 0.97, "grad_norm": 4.538967965499641, "learning_rate": 2.6744957266024996e-08, "loss": 0.2465, "step": 15393 }, { "epoch": 0.97, "grad_norm": 3.183306124935992, "learning_rate": 2.663985930501267e-08, "loss": 0.2425, "step": 15394 }, { "epoch": 0.97, "grad_norm": 2.2725031333267514, "learning_rate": 2.653496769754682e-08, "loss": 0.2721, "step": 15395 }, { "epoch": 0.97, "grad_norm": 3.0530465258193353, "learning_rate": 2.6430282447978404e-08, "loss": 0.2602, "step": 15396 }, { "epoch": 0.97, "grad_norm": 0.6272586979980177, "learning_rate": 2.6325803560652286e-08, "loss": 0.4753, "step": 15397 }, { "epoch": 0.97, "grad_norm": 1.4854977102106606, "learning_rate": 2.6221531039902214e-08, "loss": 0.2478, "step": 15398 }, { "epoch": 0.97, "grad_norm": 1.603740926459719, "learning_rate": 2.611746489005529e-08, "loss": 0.2354, "step": 15399 }, { "epoch": 0.97, "grad_norm": 2.184837014693623, "learning_rate": 2.601360511542972e-08, "loss": 0.2535, "step": 15400 }, { "epoch": 0.97, "grad_norm": 1.5317966287391347, "learning_rate": 2.5909951720334837e-08, "loss": 0.2505, "step": 15401 }, { "epoch": 0.97, "grad_norm": 2.6660374121280324, "learning_rate": 2.580650470906998e-08, "loss": 0.2465, "step": 15402 }, { "epoch": 0.97, "grad_norm": 1.8738114424906243, "learning_rate": 2.570326408592949e-08, "loss": 0.2437, "step": 15403 }, { "epoch": 0.97, "grad_norm": 2.308996330877488, "learning_rate": 2.56002298551955e-08, "loss": 0.2437, "step": 15404 }, { "epoch": 0.97, "grad_norm": 1.550698509333058, "learning_rate": 2.549740202114348e-08, "loss": 0.2572, "step": 15405 }, { "epoch": 0.97, "grad_norm": 2.6730352932360026, "learning_rate": 2.5394780588039458e-08, "loss": 0.2435, "step": 15406 }, { "epoch": 0.97, "grad_norm": 2.4473176371846725, "learning_rate": 2.5292365560142252e-08, "loss": 0.2696, "step": 15407 }, { "epoch": 0.97, "grad_norm": 2.624639056178406, "learning_rate": 2.5190156941700684e-08, "loss": 0.2465, "step": 15408 }, { "epoch": 0.97, "grad_norm": 1.3593923238769758, "learning_rate": 2.508815473695636e-08, "loss": 0.25, "step": 15409 }, { "epoch": 0.97, "grad_norm": 3.7386045395773486, "learning_rate": 2.4986358950140343e-08, "loss": 0.2516, "step": 15410 }, { "epoch": 0.97, "grad_norm": 1.6639875402424946, "learning_rate": 2.488476958547703e-08, "loss": 0.2334, "step": 15411 }, { "epoch": 0.97, "grad_norm": 2.0738530249733325, "learning_rate": 2.478338664718194e-08, "loss": 0.2721, "step": 15412 }, { "epoch": 0.97, "grad_norm": 5.110522461368836, "learning_rate": 2.4682210139460593e-08, "loss": 0.2526, "step": 15413 }, { "epoch": 0.97, "grad_norm": 2.3880459034330768, "learning_rate": 2.4581240066511304e-08, "loss": 0.2446, "step": 15414 }, { "epoch": 0.97, "grad_norm": 2.31725079189721, "learning_rate": 2.4480476432524048e-08, "loss": 0.2545, "step": 15415 }, { "epoch": 0.97, "grad_norm": 1.5664437641217384, "learning_rate": 2.4379919241679373e-08, "loss": 0.2427, "step": 15416 }, { "epoch": 0.97, "grad_norm": 2.2831622809714314, "learning_rate": 2.4279568498149497e-08, "loss": 0.24, "step": 15417 }, { "epoch": 0.97, "grad_norm": 2.498901627489701, "learning_rate": 2.4179424206098314e-08, "loss": 0.2312, "step": 15418 }, { "epoch": 0.97, "grad_norm": 1.7619439479261354, "learning_rate": 2.4079486369680826e-08, "loss": 0.2488, "step": 15419 }, { "epoch": 0.97, "grad_norm": 1.457464055928428, "learning_rate": 2.3979754993043724e-08, "loss": 0.2428, "step": 15420 }, { "epoch": 0.97, "grad_norm": 1.5065920712687704, "learning_rate": 2.3880230080324806e-08, "loss": 0.2288, "step": 15421 }, { "epoch": 0.97, "grad_norm": 2.4139226223849604, "learning_rate": 2.3780911635653547e-08, "loss": 0.2548, "step": 15422 }, { "epoch": 0.97, "grad_norm": 2.3718508502763997, "learning_rate": 2.3681799663151096e-08, "loss": 0.237, "step": 15423 }, { "epoch": 0.97, "grad_norm": 1.7419795838586325, "learning_rate": 2.358289416693027e-08, "loss": 0.2354, "step": 15424 }, { "epoch": 0.97, "grad_norm": 3.1453250036548543, "learning_rate": 2.3484195151093902e-08, "loss": 0.2387, "step": 15425 }, { "epoch": 0.97, "grad_norm": 0.6081404995631711, "learning_rate": 2.338570261973705e-08, "loss": 0.4764, "step": 15426 }, { "epoch": 0.97, "grad_norm": 44.83175261527692, "learning_rate": 2.328741657694755e-08, "loss": 0.2558, "step": 15427 }, { "epoch": 0.97, "grad_norm": 2.0605645560487424, "learning_rate": 2.3189337026802705e-08, "loss": 0.2572, "step": 15428 }, { "epoch": 0.97, "grad_norm": 1.4817074203972622, "learning_rate": 2.309146397337203e-08, "loss": 0.2215, "step": 15429 }, { "epoch": 0.97, "grad_norm": 1.8491849328290098, "learning_rate": 2.2993797420716168e-08, "loss": 0.2571, "step": 15430 }, { "epoch": 0.97, "grad_norm": 1.38646516516461, "learning_rate": 2.2896337372887988e-08, "loss": 0.2439, "step": 15431 }, { "epoch": 0.97, "grad_norm": 2.3661684537440792, "learning_rate": 2.279908383393148e-08, "loss": 0.2512, "step": 15432 }, { "epoch": 0.97, "grad_norm": 2.6656297796028037, "learning_rate": 2.270203680788119e-08, "loss": 0.2546, "step": 15433 }, { "epoch": 0.97, "grad_norm": 2.933725777283793, "learning_rate": 2.26051962987639e-08, "loss": 0.2518, "step": 15434 }, { "epoch": 0.97, "grad_norm": 6.072465880182918, "learning_rate": 2.2508562310598057e-08, "loss": 0.2501, "step": 15435 }, { "epoch": 0.97, "grad_norm": 2.1916570925011354, "learning_rate": 2.2412134847392687e-08, "loss": 0.2322, "step": 15436 }, { "epoch": 0.97, "grad_norm": 1.8154144355138124, "learning_rate": 2.2315913913149578e-08, "loss": 0.2652, "step": 15437 }, { "epoch": 0.97, "grad_norm": 3.5564233721356238, "learning_rate": 2.221989951185999e-08, "loss": 0.2607, "step": 15438 }, { "epoch": 0.97, "grad_norm": 1.6670904655104286, "learning_rate": 2.2124091647509062e-08, "loss": 0.2332, "step": 15439 }, { "epoch": 0.97, "grad_norm": 1.606807819661613, "learning_rate": 2.202849032407084e-08, "loss": 0.236, "step": 15440 }, { "epoch": 0.97, "grad_norm": 1.765907588009598, "learning_rate": 2.193309554551215e-08, "loss": 0.2437, "step": 15441 }, { "epoch": 0.97, "grad_norm": 3.44992618005198, "learning_rate": 2.1837907315791495e-08, "loss": 0.2725, "step": 15442 }, { "epoch": 0.97, "grad_norm": 3.149696000434322, "learning_rate": 2.174292563885849e-08, "loss": 0.2739, "step": 15443 }, { "epoch": 0.97, "grad_norm": 2.8933918783675505, "learning_rate": 2.1648150518653875e-08, "loss": 0.2779, "step": 15444 }, { "epoch": 0.97, "grad_norm": 2.5328495552060035, "learning_rate": 2.1553581959110058e-08, "loss": 0.2617, "step": 15445 }, { "epoch": 0.97, "grad_norm": 4.205367458624773, "learning_rate": 2.145921996415057e-08, "loss": 0.2679, "step": 15446 }, { "epoch": 0.97, "grad_norm": 2.5079999903802914, "learning_rate": 2.1365064537691162e-08, "loss": 0.2599, "step": 15447 }, { "epoch": 0.97, "grad_norm": 3.5755684981923537, "learning_rate": 2.1271115683638154e-08, "loss": 0.2687, "step": 15448 }, { "epoch": 0.97, "grad_norm": 7.141764026252246, "learning_rate": 2.11773734058901e-08, "loss": 0.2464, "step": 15449 }, { "epoch": 0.97, "grad_norm": 4.341899232552611, "learning_rate": 2.1083837708335554e-08, "loss": 0.2536, "step": 15450 }, { "epoch": 0.97, "grad_norm": 2.1419034502819514, "learning_rate": 2.0990508594856407e-08, "loss": 0.2622, "step": 15451 }, { "epoch": 0.97, "grad_norm": 1.9159169909098033, "learning_rate": 2.089738606932512e-08, "loss": 0.2618, "step": 15452 }, { "epoch": 0.97, "grad_norm": 2.9294412910823078, "learning_rate": 2.0804470135604714e-08, "loss": 0.2548, "step": 15453 }, { "epoch": 0.97, "grad_norm": 1.9508149838605389, "learning_rate": 2.0711760797550996e-08, "loss": 0.2355, "step": 15454 }, { "epoch": 0.97, "grad_norm": 2.5438058980307416, "learning_rate": 2.0619258059010883e-08, "loss": 0.2515, "step": 15455 }, { "epoch": 0.97, "grad_norm": 0.6200725801548135, "learning_rate": 2.0526961923821864e-08, "loss": 0.4887, "step": 15456 }, { "epoch": 0.97, "grad_norm": 1.3413472880789359, "learning_rate": 2.0434872395813655e-08, "loss": 0.2345, "step": 15457 }, { "epoch": 0.97, "grad_norm": 3.2186162078452494, "learning_rate": 2.034298947880764e-08, "loss": 0.2521, "step": 15458 }, { "epoch": 0.97, "grad_norm": 2.486717289994227, "learning_rate": 2.0251313176615218e-08, "loss": 0.2518, "step": 15459 }, { "epoch": 0.97, "grad_norm": 1.9950808963470017, "learning_rate": 2.015984349304112e-08, "loss": 0.2483, "step": 15460 }, { "epoch": 0.97, "grad_norm": 3.413461480828005, "learning_rate": 2.0068580431880647e-08, "loss": 0.2463, "step": 15461 }, { "epoch": 0.97, "grad_norm": 2.2821608242697047, "learning_rate": 1.9977523996919658e-08, "loss": 0.2445, "step": 15462 }, { "epoch": 0.97, "grad_norm": 1.851568948622649, "learning_rate": 1.9886674191937348e-08, "loss": 0.2472, "step": 15463 }, { "epoch": 0.97, "grad_norm": 3.0410138494571632, "learning_rate": 1.9796031020702376e-08, "loss": 0.2572, "step": 15464 }, { "epoch": 0.97, "grad_norm": 1.6359560002023097, "learning_rate": 1.970559448697562e-08, "loss": 0.2428, "step": 15465 }, { "epoch": 0.97, "grad_norm": 1.4668214706477742, "learning_rate": 1.9615364594509633e-08, "loss": 0.2549, "step": 15466 }, { "epoch": 0.97, "grad_norm": 0.6060142031172588, "learning_rate": 1.9525341347048643e-08, "loss": 0.4653, "step": 15467 }, { "epoch": 0.97, "grad_norm": 1.766276291352814, "learning_rate": 1.943552474832744e-08, "loss": 0.257, "step": 15468 }, { "epoch": 0.97, "grad_norm": 1.4656088738430961, "learning_rate": 1.934591480207304e-08, "loss": 0.2327, "step": 15469 }, { "epoch": 0.97, "grad_norm": 1.7355086601017375, "learning_rate": 1.925651151200303e-08, "loss": 0.2464, "step": 15470 }, { "epoch": 0.97, "grad_norm": 1.817621729816515, "learning_rate": 1.9167314881827214e-08, "loss": 0.2401, "step": 15471 }, { "epoch": 0.97, "grad_norm": 3.0191311831408743, "learning_rate": 1.9078324915246527e-08, "loss": 0.2651, "step": 15472 }, { "epoch": 0.97, "grad_norm": 2.793188698041432, "learning_rate": 1.898954161595301e-08, "loss": 0.2711, "step": 15473 }, { "epoch": 0.97, "grad_norm": 2.369588591877338, "learning_rate": 1.8900964987630388e-08, "loss": 0.2505, "step": 15474 }, { "epoch": 0.97, "grad_norm": 2.923550585192585, "learning_rate": 1.8812595033954607e-08, "loss": 0.2578, "step": 15475 }, { "epoch": 0.97, "grad_norm": 2.6308461250264963, "learning_rate": 1.8724431758592177e-08, "loss": 0.2697, "step": 15476 }, { "epoch": 0.97, "grad_norm": 2.9505678082335662, "learning_rate": 1.8636475165200176e-08, "loss": 0.2364, "step": 15477 }, { "epoch": 0.97, "grad_norm": 2.0326938089135087, "learning_rate": 1.8548725257429014e-08, "loss": 0.2496, "step": 15478 }, { "epoch": 0.97, "grad_norm": 2.625344297906875, "learning_rate": 1.8461182038919666e-08, "loss": 0.2496, "step": 15479 }, { "epoch": 0.97, "grad_norm": 2.096852475475712, "learning_rate": 1.8373845513303124e-08, "loss": 0.2666, "step": 15480 }, { "epoch": 0.97, "grad_norm": 1.6312141642097142, "learning_rate": 1.8286715684204815e-08, "loss": 0.2385, "step": 15481 }, { "epoch": 0.97, "grad_norm": 2.692134022410049, "learning_rate": 1.819979255523907e-08, "loss": 0.2397, "step": 15482 }, { "epoch": 0.97, "grad_norm": 11.99791696158461, "learning_rate": 1.8113076130012453e-08, "loss": 0.2468, "step": 15483 }, { "epoch": 0.97, "grad_norm": 2.399654529619578, "learning_rate": 1.802656641212375e-08, "loss": 0.2539, "step": 15484 }, { "epoch": 0.97, "grad_norm": 2.4177192326430204, "learning_rate": 1.7940263405161195e-08, "loss": 0.2839, "step": 15485 }, { "epoch": 0.97, "grad_norm": 1.2935373024116066, "learning_rate": 1.785416711270638e-08, "loss": 0.2504, "step": 15486 }, { "epoch": 0.97, "grad_norm": 1.9160151498153903, "learning_rate": 1.7768277538331435e-08, "loss": 0.2612, "step": 15487 }, { "epoch": 0.97, "grad_norm": 3.9762390016418894, "learning_rate": 1.7682594685600184e-08, "loss": 0.2539, "step": 15488 }, { "epoch": 0.97, "grad_norm": 1.7756054644360466, "learning_rate": 1.759711855806756e-08, "loss": 0.2508, "step": 15489 }, { "epoch": 0.97, "grad_norm": 0.6438743333723528, "learning_rate": 1.751184915928017e-08, "loss": 0.4626, "step": 15490 }, { "epoch": 0.97, "grad_norm": 2.0046281064284197, "learning_rate": 1.742678649277574e-08, "loss": 0.2559, "step": 15491 }, { "epoch": 0.97, "grad_norm": 1.5446583872634272, "learning_rate": 1.7341930562084775e-08, "loss": 0.2403, "step": 15492 }, { "epoch": 0.97, "grad_norm": 1.942016087724381, "learning_rate": 1.7257281370726688e-08, "loss": 0.2788, "step": 15493 }, { "epoch": 0.97, "grad_norm": 2.612550085794354, "learning_rate": 1.7172838922214773e-08, "loss": 0.2519, "step": 15494 }, { "epoch": 0.97, "grad_norm": 1.6222136585881686, "learning_rate": 1.7088603220051792e-08, "loss": 0.2407, "step": 15495 }, { "epoch": 0.97, "grad_norm": 1.5933577644267796, "learning_rate": 1.7004574267733832e-08, "loss": 0.248, "step": 15496 }, { "epoch": 0.97, "grad_norm": 3.5081318471711267, "learning_rate": 1.6920752068746438e-08, "loss": 0.2567, "step": 15497 }, { "epoch": 0.97, "grad_norm": 2.3722598268780937, "learning_rate": 1.6837136626568496e-08, "loss": 0.2556, "step": 15498 }, { "epoch": 0.97, "grad_norm": 2.1696781911728378, "learning_rate": 1.6753727944668342e-08, "loss": 0.2415, "step": 15499 }, { "epoch": 0.97, "grad_norm": 2.2465984209971315, "learning_rate": 1.667052602650765e-08, "loss": 0.2682, "step": 15500 }, { "epoch": 0.97, "grad_norm": 3.073112480747819, "learning_rate": 1.6587530875538106e-08, "loss": 0.2451, "step": 15501 }, { "epoch": 0.97, "grad_norm": 2.0039550855240713, "learning_rate": 1.6504742495203064e-08, "loss": 0.2682, "step": 15502 }, { "epoch": 0.97, "grad_norm": 3.716537895168068, "learning_rate": 1.642216088893811e-08, "loss": 0.2615, "step": 15503 }, { "epoch": 0.98, "grad_norm": 1.6264490824986113, "learning_rate": 1.6339786060169393e-08, "loss": 0.2498, "step": 15504 }, { "epoch": 0.98, "grad_norm": 1.9944649389374496, "learning_rate": 1.625761801231529e-08, "loss": 0.2611, "step": 15505 }, { "epoch": 0.98, "grad_norm": 2.9685160961845805, "learning_rate": 1.617565674878474e-08, "loss": 0.2435, "step": 15506 }, { "epoch": 0.98, "grad_norm": 1.9792377528694678, "learning_rate": 1.6093902272978357e-08, "loss": 0.2532, "step": 15507 }, { "epoch": 0.98, "grad_norm": 1.5625692901850257, "learning_rate": 1.6012354588288425e-08, "loss": 0.2504, "step": 15508 }, { "epoch": 0.98, "grad_norm": 2.872568214140006, "learning_rate": 1.5931013698098353e-08, "loss": 0.2818, "step": 15509 }, { "epoch": 0.98, "grad_norm": 6.658827039770151, "learning_rate": 1.5849879605783214e-08, "loss": 0.2456, "step": 15510 }, { "epoch": 0.98, "grad_norm": 1.7288425240765597, "learning_rate": 1.5768952314709763e-08, "loss": 0.2483, "step": 15511 }, { "epoch": 0.98, "grad_norm": 1.8807100627466924, "learning_rate": 1.5688231828234757e-08, "loss": 0.248, "step": 15512 }, { "epoch": 0.98, "grad_norm": 1.8400376668637661, "learning_rate": 1.560771814970885e-08, "loss": 0.2626, "step": 15513 }, { "epoch": 0.98, "grad_norm": 1.9019812541798318, "learning_rate": 1.5527411282471594e-08, "loss": 0.2478, "step": 15514 }, { "epoch": 0.98, "grad_norm": 2.0171208771970015, "learning_rate": 1.5447311229855876e-08, "loss": 0.2468, "step": 15515 }, { "epoch": 0.98, "grad_norm": 1.9905667456664624, "learning_rate": 1.5367417995184597e-08, "loss": 0.2474, "step": 15516 }, { "epoch": 0.98, "grad_norm": 2.473877934475984, "learning_rate": 1.5287731581772326e-08, "loss": 0.2532, "step": 15517 }, { "epoch": 0.98, "grad_norm": 1.4390865518188125, "learning_rate": 1.5208251992926415e-08, "loss": 0.2654, "step": 15518 }, { "epoch": 0.98, "grad_norm": 2.4060498089965208, "learning_rate": 1.512897923194423e-08, "loss": 0.2412, "step": 15519 }, { "epoch": 0.98, "grad_norm": 1.5454154221178362, "learning_rate": 1.504991330211425e-08, "loss": 0.2609, "step": 15520 }, { "epoch": 0.98, "grad_norm": 3.583097433815207, "learning_rate": 1.4971054206718294e-08, "loss": 0.2747, "step": 15521 }, { "epoch": 0.98, "grad_norm": 2.73913176597066, "learning_rate": 1.489240194902708e-08, "loss": 0.23, "step": 15522 }, { "epoch": 0.98, "grad_norm": 2.3986844251397104, "learning_rate": 1.4813956532305218e-08, "loss": 0.2508, "step": 15523 }, { "epoch": 0.98, "grad_norm": 1.873586140642064, "learning_rate": 1.4735717959806773e-08, "loss": 0.2375, "step": 15524 }, { "epoch": 0.98, "grad_norm": 2.141798204858941, "learning_rate": 1.4657686234778035e-08, "loss": 0.2457, "step": 15525 }, { "epoch": 0.98, "grad_norm": 3.5805240595153993, "learning_rate": 1.4579861360457525e-08, "loss": 0.2656, "step": 15526 }, { "epoch": 0.98, "grad_norm": 1.7488438587677326, "learning_rate": 1.4502243340072663e-08, "loss": 0.2449, "step": 15527 }, { "epoch": 0.98, "grad_norm": 2.712316081614816, "learning_rate": 1.4424832176845871e-08, "loss": 0.2637, "step": 15528 }, { "epoch": 0.98, "grad_norm": 1.9890059149896477, "learning_rate": 1.4347627873987912e-08, "loss": 0.2353, "step": 15529 }, { "epoch": 0.98, "grad_norm": 3.5888310231399467, "learning_rate": 1.4270630434701782e-08, "loss": 0.2559, "step": 15530 }, { "epoch": 0.98, "grad_norm": 1.5157996681402393, "learning_rate": 1.4193839862183812e-08, "loss": 0.2622, "step": 15531 }, { "epoch": 0.98, "grad_norm": 2.2218685234480606, "learning_rate": 1.4117256159618676e-08, "loss": 0.2466, "step": 15532 }, { "epoch": 0.98, "grad_norm": 1.744680873333697, "learning_rate": 1.4040879330184387e-08, "loss": 0.2638, "step": 15533 }, { "epoch": 0.98, "grad_norm": 2.730767422762499, "learning_rate": 1.3964709377050079e-08, "loss": 0.2698, "step": 15534 }, { "epoch": 0.98, "grad_norm": 4.182953404008009, "learning_rate": 1.3888746303376554e-08, "loss": 0.242, "step": 15535 }, { "epoch": 0.98, "grad_norm": 2.8149028322694094, "learning_rate": 1.3812990112315184e-08, "loss": 0.2658, "step": 15536 }, { "epoch": 0.98, "grad_norm": 2.093215393823958, "learning_rate": 1.3737440807009006e-08, "loss": 0.2417, "step": 15537 }, { "epoch": 0.98, "grad_norm": 5.060166318007312, "learning_rate": 1.3662098390593292e-08, "loss": 0.2604, "step": 15538 }, { "epoch": 0.98, "grad_norm": 2.620758001831998, "learning_rate": 1.3586962866193875e-08, "loss": 0.244, "step": 15539 }, { "epoch": 0.98, "grad_norm": 2.927748783609269, "learning_rate": 1.3512034236927706e-08, "loss": 0.2574, "step": 15540 }, { "epoch": 0.98, "grad_norm": 2.2031237302777953, "learning_rate": 1.3437312505905075e-08, "loss": 0.2363, "step": 15541 }, { "epoch": 0.98, "grad_norm": 2.546532599190121, "learning_rate": 1.3362797676224614e-08, "loss": 0.239, "step": 15542 }, { "epoch": 0.98, "grad_norm": 0.5388820068781249, "learning_rate": 1.3288489750979406e-08, "loss": 0.4509, "step": 15543 }, { "epoch": 0.98, "grad_norm": 1.9298914157208156, "learning_rate": 1.3214388733252536e-08, "loss": 0.2424, "step": 15544 }, { "epoch": 0.98, "grad_norm": 1.7125518414190737, "learning_rate": 1.3140494626117661e-08, "loss": 0.2542, "step": 15545 }, { "epoch": 0.98, "grad_norm": 2.8646247302854344, "learning_rate": 1.3066807432641216e-08, "loss": 0.254, "step": 15546 }, { "epoch": 0.98, "grad_norm": 1.445369105515142, "learning_rate": 1.2993327155880753e-08, "loss": 0.2486, "step": 15547 }, { "epoch": 0.98, "grad_norm": 2.2019131649084476, "learning_rate": 1.2920053798885501e-08, "loss": 0.2489, "step": 15548 }, { "epoch": 0.98, "grad_norm": 2.1827807345721975, "learning_rate": 1.2846987364695252e-08, "loss": 0.2577, "step": 15549 }, { "epoch": 0.98, "grad_norm": 2.213672803846767, "learning_rate": 1.2774127856341467e-08, "loss": 0.27, "step": 15550 }, { "epoch": 0.98, "grad_norm": 1.7643336230298274, "learning_rate": 1.2701475276847286e-08, "loss": 0.2403, "step": 15551 }, { "epoch": 0.98, "grad_norm": 2.206378727249492, "learning_rate": 1.2629029629227518e-08, "loss": 0.2517, "step": 15552 }, { "epoch": 0.98, "grad_norm": 1.9358869169210136, "learning_rate": 1.2556790916488093e-08, "loss": 0.2446, "step": 15553 }, { "epoch": 0.98, "grad_norm": 1.8228967276809194, "learning_rate": 1.2484759141625502e-08, "loss": 0.2459, "step": 15554 }, { "epoch": 0.98, "grad_norm": 2.0295069711263496, "learning_rate": 1.2412934307629575e-08, "loss": 0.2421, "step": 15555 }, { "epoch": 0.98, "grad_norm": 2.7658988365955928, "learning_rate": 1.2341316417479598e-08, "loss": 0.2474, "step": 15556 }, { "epoch": 0.98, "grad_norm": 3.233087249645574, "learning_rate": 1.2269905474147636e-08, "loss": 0.2555, "step": 15557 }, { "epoch": 0.98, "grad_norm": 2.5075996025079546, "learning_rate": 1.2198701480596875e-08, "loss": 0.2483, "step": 15558 }, { "epoch": 0.98, "grad_norm": 1.6876271772076141, "learning_rate": 1.2127704439781062e-08, "loss": 0.2457, "step": 15559 }, { "epoch": 0.98, "grad_norm": 1.793953200667405, "learning_rate": 1.2056914354646176e-08, "loss": 0.2621, "step": 15560 }, { "epoch": 0.98, "grad_norm": 2.0639247294016263, "learning_rate": 1.1986331228129311e-08, "loss": 0.2438, "step": 15561 }, { "epoch": 0.98, "grad_norm": 0.5793979500542205, "learning_rate": 1.1915955063159235e-08, "loss": 0.4536, "step": 15562 }, { "epoch": 0.98, "grad_norm": 3.7061081259812236, "learning_rate": 1.1845785862656389e-08, "loss": 0.2573, "step": 15563 }, { "epoch": 0.98, "grad_norm": 2.576161420494555, "learning_rate": 1.177582362953178e-08, "loss": 0.2634, "step": 15564 }, { "epoch": 0.98, "grad_norm": 2.9030760447820265, "learning_rate": 1.1706068366688083e-08, "loss": 0.252, "step": 15565 }, { "epoch": 0.98, "grad_norm": 3.0726290133180094, "learning_rate": 1.1636520077020207e-08, "loss": 0.2577, "step": 15566 }, { "epoch": 0.98, "grad_norm": 1.5106248481346085, "learning_rate": 1.1567178763413068e-08, "loss": 0.2475, "step": 15567 }, { "epoch": 0.98, "grad_norm": 1.9558351080447594, "learning_rate": 1.149804442874436e-08, "loss": 0.2494, "step": 15568 }, { "epoch": 0.98, "grad_norm": 2.334714522306353, "learning_rate": 1.1429117075882345e-08, "loss": 0.2684, "step": 15569 }, { "epoch": 0.98, "grad_norm": 2.570248109556965, "learning_rate": 1.1360396707686961e-08, "loss": 0.2425, "step": 15570 }, { "epoch": 0.98, "grad_norm": 1.8808737821337238, "learning_rate": 1.1291883327009257e-08, "loss": 0.2564, "step": 15571 }, { "epoch": 0.98, "grad_norm": 2.4292775300354967, "learning_rate": 1.1223576936692515e-08, "loss": 0.2506, "step": 15572 }, { "epoch": 0.98, "grad_norm": 4.519205841181352, "learning_rate": 1.115547753957058e-08, "loss": 0.2727, "step": 15573 }, { "epoch": 0.98, "grad_norm": 1.46784944440388, "learning_rate": 1.1087585138469525e-08, "loss": 0.2453, "step": 15574 }, { "epoch": 0.98, "grad_norm": 0.6034317401022334, "learning_rate": 1.101989973620543e-08, "loss": 0.4681, "step": 15575 }, { "epoch": 0.98, "grad_norm": 2.070420111968326, "learning_rate": 1.095242133558716e-08, "loss": 0.2451, "step": 15576 }, { "epoch": 0.98, "grad_norm": 0.5708556517898078, "learning_rate": 1.0885149939414141e-08, "loss": 0.4573, "step": 15577 }, { "epoch": 0.98, "grad_norm": 2.197566537185328, "learning_rate": 1.0818085550478585e-08, "loss": 0.2465, "step": 15578 }, { "epoch": 0.98, "grad_norm": 1.6830089399389538, "learning_rate": 1.0751228171561601e-08, "loss": 0.2567, "step": 15579 }, { "epoch": 0.98, "grad_norm": 7.2262230473066715, "learning_rate": 1.0684577805438744e-08, "loss": 0.2353, "step": 15580 }, { "epoch": 0.98, "grad_norm": 3.2733926244862537, "learning_rate": 1.0618134454874473e-08, "loss": 0.2528, "step": 15581 }, { "epoch": 0.98, "grad_norm": 3.0396419553233973, "learning_rate": 1.0551898122626025e-08, "loss": 0.2399, "step": 15582 }, { "epoch": 0.98, "grad_norm": 4.229861437779296, "learning_rate": 1.0485868811441757e-08, "loss": 0.2535, "step": 15583 }, { "epoch": 0.98, "grad_norm": 1.7751439642267388, "learning_rate": 1.0420046524061145e-08, "loss": 0.2878, "step": 15584 }, { "epoch": 0.98, "grad_norm": 1.6262032897060499, "learning_rate": 1.0354431263214782e-08, "loss": 0.2643, "step": 15585 }, { "epoch": 0.98, "grad_norm": 2.040780135198672, "learning_rate": 1.0289023031626044e-08, "loss": 0.2429, "step": 15586 }, { "epoch": 0.98, "grad_norm": 1.965122746676793, "learning_rate": 1.0223821832008873e-08, "loss": 0.267, "step": 15587 }, { "epoch": 0.98, "grad_norm": 1.4362883851264039, "learning_rate": 1.0158827667067772e-08, "loss": 0.2502, "step": 15588 }, { "epoch": 0.98, "grad_norm": 3.511057216270994, "learning_rate": 1.0094040539499473e-08, "loss": 0.2568, "step": 15589 }, { "epoch": 0.98, "grad_norm": 2.246104436570195, "learning_rate": 1.0029460451992933e-08, "loss": 0.2413, "step": 15590 }, { "epoch": 0.98, "grad_norm": 1.6716867312882933, "learning_rate": 9.965087407227125e-09, "loss": 0.2654, "step": 15591 }, { "epoch": 0.98, "grad_norm": 1.7472619600651458, "learning_rate": 9.900921407873243e-09, "loss": 0.2385, "step": 15592 }, { "epoch": 0.98, "grad_norm": 1.725106848403077, "learning_rate": 9.836962456593602e-09, "loss": 0.2644, "step": 15593 }, { "epoch": 0.98, "grad_norm": 1.6500384360411164, "learning_rate": 9.77321055604219e-09, "loss": 0.2547, "step": 15594 }, { "epoch": 0.98, "grad_norm": 1.632168563033859, "learning_rate": 9.709665708863558e-09, "loss": 0.2495, "step": 15595 }, { "epoch": 0.98, "grad_norm": 6.006364411613694, "learning_rate": 9.646327917694486e-09, "loss": 0.2405, "step": 15596 }, { "epoch": 0.98, "grad_norm": 1.9932137846393272, "learning_rate": 9.58319718516343e-09, "loss": 0.2555, "step": 15597 }, { "epoch": 0.98, "grad_norm": 1.9316758936017333, "learning_rate": 9.5202735138894e-09, "loss": 0.2499, "step": 15598 }, { "epoch": 0.98, "grad_norm": 0.5850876663634432, "learning_rate": 9.45755690648309e-09, "loss": 0.4632, "step": 15599 }, { "epoch": 0.98, "grad_norm": 1.855408833305095, "learning_rate": 9.395047365547416e-09, "loss": 0.2405, "step": 15600 }, { "epoch": 0.98, "grad_norm": 2.236241917557413, "learning_rate": 9.332744893675306e-09, "loss": 0.2761, "step": 15601 }, { "epoch": 0.98, "grad_norm": 1.6952567859126504, "learning_rate": 9.27064949345191e-09, "loss": 0.25, "step": 15602 }, { "epoch": 0.98, "grad_norm": 1.9630476757870707, "learning_rate": 9.208761167453507e-09, "loss": 0.2625, "step": 15603 }, { "epoch": 0.98, "grad_norm": 1.962904644754545, "learning_rate": 9.147079918249146e-09, "loss": 0.2604, "step": 15604 }, { "epoch": 0.98, "grad_norm": 2.1188646288857007, "learning_rate": 9.085605748396231e-09, "loss": 0.242, "step": 15605 }, { "epoch": 0.98, "grad_norm": 1.4531888245836753, "learning_rate": 9.024338660447162e-09, "loss": 0.2361, "step": 15606 }, { "epoch": 0.98, "grad_norm": 2.7107240539871507, "learning_rate": 8.963278656942687e-09, "loss": 0.2487, "step": 15607 }, { "epoch": 0.98, "grad_norm": 17.318751386047623, "learning_rate": 8.902425740416886e-09, "loss": 0.2462, "step": 15608 }, { "epoch": 0.98, "grad_norm": 1.5774320024795008, "learning_rate": 8.841779913394966e-09, "loss": 0.2671, "step": 15609 }, { "epoch": 0.98, "grad_norm": 1.6641220630708113, "learning_rate": 8.781341178393244e-09, "loss": 0.2442, "step": 15610 }, { "epoch": 0.98, "grad_norm": 1.9925184886437746, "learning_rate": 8.72110953791805e-09, "loss": 0.2538, "step": 15611 }, { "epoch": 0.98, "grad_norm": 2.4335687958758183, "learning_rate": 8.66108499447016e-09, "loss": 0.2492, "step": 15612 }, { "epoch": 0.98, "grad_norm": 1.5373189443861768, "learning_rate": 8.601267550539805e-09, "loss": 0.2437, "step": 15613 }, { "epoch": 0.98, "grad_norm": 0.5720932307561466, "learning_rate": 8.541657208607778e-09, "loss": 0.4494, "step": 15614 }, { "epoch": 0.98, "grad_norm": 1.7043081794435382, "learning_rate": 8.482253971148768e-09, "loss": 0.266, "step": 15615 }, { "epoch": 0.98, "grad_norm": 0.6149659013118141, "learning_rate": 8.423057840626914e-09, "loss": 0.4963, "step": 15616 }, { "epoch": 0.98, "grad_norm": 2.613573159584638, "learning_rate": 8.364068819498029e-09, "loss": 0.2672, "step": 15617 }, { "epoch": 0.98, "grad_norm": 2.908704026341994, "learning_rate": 8.305286910210709e-09, "loss": 0.2619, "step": 15618 }, { "epoch": 0.98, "grad_norm": 1.9634236224880925, "learning_rate": 8.246712115203004e-09, "loss": 0.2433, "step": 15619 }, { "epoch": 0.98, "grad_norm": 1.8058417900140946, "learning_rate": 8.188344436905193e-09, "loss": 0.2485, "step": 15620 }, { "epoch": 0.98, "grad_norm": 2.0499520966327007, "learning_rate": 8.130183877739784e-09, "loss": 0.2506, "step": 15621 }, { "epoch": 0.98, "grad_norm": 1.464304071789921, "learning_rate": 8.072230440119843e-09, "loss": 0.2453, "step": 15622 }, { "epoch": 0.98, "grad_norm": 1.9687961653155635, "learning_rate": 8.014484126449008e-09, "loss": 0.2397, "step": 15623 }, { "epoch": 0.98, "grad_norm": 3.0285807789622403, "learning_rate": 7.9569449391248e-09, "loss": 0.273, "step": 15624 }, { "epoch": 0.98, "grad_norm": 9.591317288318226, "learning_rate": 7.899612880533092e-09, "loss": 0.2368, "step": 15625 }, { "epoch": 0.98, "grad_norm": 3.6196789453325597, "learning_rate": 7.842487953054202e-09, "loss": 0.2361, "step": 15626 }, { "epoch": 0.98, "grad_norm": 2.2419885920789207, "learning_rate": 7.785570159056788e-09, "loss": 0.2387, "step": 15627 }, { "epoch": 0.98, "grad_norm": 1.905001739989543, "learning_rate": 7.728859500903408e-09, "loss": 0.2557, "step": 15628 }, { "epoch": 0.98, "grad_norm": 1.7865140658066843, "learning_rate": 7.672355980947177e-09, "loss": 0.2647, "step": 15629 }, { "epoch": 0.98, "grad_norm": 2.1961707316110086, "learning_rate": 7.616059601532333e-09, "loss": 0.2467, "step": 15630 }, { "epoch": 0.98, "grad_norm": 2.7961826128246297, "learning_rate": 7.55997036499423e-09, "loss": 0.2543, "step": 15631 }, { "epoch": 0.98, "grad_norm": 1.9538604690082362, "learning_rate": 7.504088273661003e-09, "loss": 0.2887, "step": 15632 }, { "epoch": 0.98, "grad_norm": 3.2481173055607213, "learning_rate": 7.448413329850801e-09, "loss": 0.2598, "step": 15633 }, { "epoch": 0.98, "grad_norm": 1.9807230966892162, "learning_rate": 7.392945535873441e-09, "loss": 0.2485, "step": 15634 }, { "epoch": 0.98, "grad_norm": 1.754303864248791, "learning_rate": 7.337684894030417e-09, "loss": 0.2374, "step": 15635 }, { "epoch": 0.98, "grad_norm": 7.07331941428063, "learning_rate": 7.282631406615448e-09, "loss": 0.2354, "step": 15636 }, { "epoch": 0.98, "grad_norm": 1.7190981757398527, "learning_rate": 7.2277850759117085e-09, "loss": 0.2255, "step": 15637 }, { "epoch": 0.98, "grad_norm": 2.5759893447618776, "learning_rate": 7.173145904195156e-09, "loss": 0.2581, "step": 15638 }, { "epoch": 0.98, "grad_norm": 1.8790706297505653, "learning_rate": 7.11871389373342e-09, "loss": 0.2496, "step": 15639 }, { "epoch": 0.98, "grad_norm": 9.577593385186562, "learning_rate": 7.064489046784695e-09, "loss": 0.2642, "step": 15640 }, { "epoch": 0.98, "grad_norm": 2.6178861605317176, "learning_rate": 7.010471365598292e-09, "loss": 0.2423, "step": 15641 }, { "epoch": 0.98, "grad_norm": 1.979658809139444, "learning_rate": 6.956660852416308e-09, "loss": 0.2475, "step": 15642 }, { "epoch": 0.98, "grad_norm": 2.265061644184496, "learning_rate": 6.903057509470845e-09, "loss": 0.2513, "step": 15643 }, { "epoch": 0.98, "grad_norm": 4.183084640261362, "learning_rate": 6.8496613389867905e-09, "loss": 0.2348, "step": 15644 }, { "epoch": 0.98, "grad_norm": 3.0015993779853, "learning_rate": 6.796472343178484e-09, "loss": 0.2701, "step": 15645 }, { "epoch": 0.98, "grad_norm": 9.768687284571175, "learning_rate": 6.743490524254159e-09, "loss": 0.2748, "step": 15646 }, { "epoch": 0.98, "grad_norm": 2.518585898499266, "learning_rate": 6.690715884410947e-09, "loss": 0.2454, "step": 15647 }, { "epoch": 0.98, "grad_norm": 32.006304581087356, "learning_rate": 6.638148425838764e-09, "loss": 0.2526, "step": 15648 }, { "epoch": 0.98, "grad_norm": 1.733578765111345, "learning_rate": 6.585788150719752e-09, "loss": 0.2453, "step": 15649 }, { "epoch": 0.98, "grad_norm": 4.082609978964282, "learning_rate": 6.533635061225507e-09, "loss": 0.2773, "step": 15650 }, { "epoch": 0.98, "grad_norm": 3.1356784380508858, "learning_rate": 6.481689159519855e-09, "loss": 0.2572, "step": 15651 }, { "epoch": 0.98, "grad_norm": 5.234922177587976, "learning_rate": 6.429950447758293e-09, "loss": 0.2598, "step": 15652 }, { "epoch": 0.98, "grad_norm": 6.653254671064186, "learning_rate": 6.378418928087993e-09, "loss": 0.2567, "step": 15653 }, { "epoch": 0.98, "grad_norm": 1.8209219972317714, "learning_rate": 6.327094602646688e-09, "loss": 0.2629, "step": 15654 }, { "epoch": 0.98, "grad_norm": 2.331626509852034, "learning_rate": 6.275977473563788e-09, "loss": 0.2584, "step": 15655 }, { "epoch": 0.98, "grad_norm": 1.9876333163916338, "learning_rate": 6.2250675429609274e-09, "loss": 0.2544, "step": 15656 }, { "epoch": 0.98, "grad_norm": 2.5051829218987804, "learning_rate": 6.174364812949752e-09, "loss": 0.2595, "step": 15657 }, { "epoch": 0.98, "grad_norm": 3.0610047525807573, "learning_rate": 6.123869285634132e-09, "loss": 0.2503, "step": 15658 }, { "epoch": 0.98, "grad_norm": 1.385699698392544, "learning_rate": 6.073580963109061e-09, "loss": 0.2527, "step": 15659 }, { "epoch": 0.98, "grad_norm": 2.234972326993217, "learning_rate": 6.023499847461201e-09, "loss": 0.2408, "step": 15660 }, { "epoch": 0.98, "grad_norm": 1.8396623098913352, "learning_rate": 5.973625940769445e-09, "loss": 0.2506, "step": 15661 }, { "epoch": 0.98, "grad_norm": 2.006516227653823, "learning_rate": 5.9239592451015845e-09, "loss": 0.2293, "step": 15662 }, { "epoch": 0.99, "grad_norm": 4.629313662584817, "learning_rate": 5.874499762519303e-09, "loss": 0.2426, "step": 15663 }, { "epoch": 0.99, "grad_norm": 1.5684966662259532, "learning_rate": 5.825247495074849e-09, "loss": 0.2422, "step": 15664 }, { "epoch": 0.99, "grad_norm": 1.6761734588470107, "learning_rate": 5.776202444811585e-09, "loss": 0.2435, "step": 15665 }, { "epoch": 0.99, "grad_norm": 5.046085493563638, "learning_rate": 5.727364613763997e-09, "loss": 0.2548, "step": 15666 }, { "epoch": 0.99, "grad_norm": 2.0059501740205063, "learning_rate": 5.678734003958797e-09, "loss": 0.2685, "step": 15667 }, { "epoch": 0.99, "grad_norm": 12.98830537254205, "learning_rate": 5.63031061741437e-09, "loss": 0.2554, "step": 15668 }, { "epoch": 0.99, "grad_norm": 2.0175163559699087, "learning_rate": 5.582094456139109e-09, "loss": 0.2514, "step": 15669 }, { "epoch": 0.99, "grad_norm": 3.5522079285470918, "learning_rate": 5.534085522133637e-09, "loss": 0.2492, "step": 15670 }, { "epoch": 0.99, "grad_norm": 6.112999635984669, "learning_rate": 5.486283817390248e-09, "loss": 0.2524, "step": 15671 }, { "epoch": 0.99, "grad_norm": 1.520334575559302, "learning_rate": 5.438689343892356e-09, "loss": 0.2453, "step": 15672 }, { "epoch": 0.99, "grad_norm": 2.1018062447739836, "learning_rate": 5.391302103615048e-09, "loss": 0.2641, "step": 15673 }, { "epoch": 0.99, "grad_norm": 1.4175783931073094, "learning_rate": 5.344122098523419e-09, "loss": 0.2365, "step": 15674 }, { "epoch": 0.99, "grad_norm": 2.016727443018837, "learning_rate": 5.2971493305753465e-09, "loss": 0.2684, "step": 15675 }, { "epoch": 0.99, "grad_norm": 3.2345848687961936, "learning_rate": 5.2503838017203824e-09, "loss": 0.258, "step": 15676 }, { "epoch": 0.99, "grad_norm": 2.011374306576539, "learning_rate": 5.2038255138991965e-09, "loss": 0.2425, "step": 15677 }, { "epoch": 0.99, "grad_norm": 14.356641889740356, "learning_rate": 5.157474469042467e-09, "loss": 0.231, "step": 15678 }, { "epoch": 0.99, "grad_norm": 2.0070010729827286, "learning_rate": 5.111330669074211e-09, "loss": 0.2491, "step": 15679 }, { "epoch": 0.99, "grad_norm": 1.776880313302346, "learning_rate": 5.065394115909006e-09, "loss": 0.2489, "step": 15680 }, { "epoch": 0.99, "grad_norm": 2.8691306274525914, "learning_rate": 5.019664811451997e-09, "loss": 0.264, "step": 15681 }, { "epoch": 0.99, "grad_norm": 2.12316657401478, "learning_rate": 4.974142757601663e-09, "loss": 0.2579, "step": 15682 }, { "epoch": 0.99, "grad_norm": 0.6520729617270807, "learning_rate": 4.92882795624594e-09, "loss": 0.4264, "step": 15683 }, { "epoch": 0.99, "grad_norm": 1.5793191284545007, "learning_rate": 4.8837204092655465e-09, "loss": 0.2545, "step": 15684 }, { "epoch": 0.99, "grad_norm": 4.038925760909837, "learning_rate": 4.838820118532317e-09, "loss": 0.2648, "step": 15685 }, { "epoch": 0.99, "grad_norm": 2.8814905684080334, "learning_rate": 4.7941270859086505e-09, "loss": 0.2638, "step": 15686 }, { "epoch": 0.99, "grad_norm": 1.7749038360763734, "learning_rate": 4.749641313248621e-09, "loss": 0.2488, "step": 15687 }, { "epoch": 0.99, "grad_norm": 6.117625713689163, "learning_rate": 4.705362802399082e-09, "loss": 0.273, "step": 15688 }, { "epoch": 0.99, "grad_norm": 2.2739046925445483, "learning_rate": 4.661291555196346e-09, "loss": 0.2455, "step": 15689 }, { "epoch": 0.99, "grad_norm": 4.335449567235336, "learning_rate": 4.617427573469502e-09, "loss": 0.2577, "step": 15690 }, { "epoch": 0.99, "grad_norm": 1.9443615278019875, "learning_rate": 4.573770859038207e-09, "loss": 0.236, "step": 15691 }, { "epoch": 0.99, "grad_norm": 2.722616558266238, "learning_rate": 4.530321413714345e-09, "loss": 0.2628, "step": 15692 }, { "epoch": 0.99, "grad_norm": 2.163307092856542, "learning_rate": 4.487079239300363e-09, "loss": 0.2504, "step": 15693 }, { "epoch": 0.99, "grad_norm": 1.5143142547497772, "learning_rate": 4.444044337590381e-09, "loss": 0.2548, "step": 15694 }, { "epoch": 0.99, "grad_norm": 3.357737687257555, "learning_rate": 4.401216710370193e-09, "loss": 0.2414, "step": 15695 }, { "epoch": 0.99, "grad_norm": 3.296475992183728, "learning_rate": 4.358596359416712e-09, "loss": 0.2516, "step": 15696 }, { "epoch": 0.99, "grad_norm": 2.3018184705700144, "learning_rate": 4.316183286497966e-09, "loss": 0.2452, "step": 15697 }, { "epoch": 0.99, "grad_norm": 2.0956838160957654, "learning_rate": 4.273977493374215e-09, "loss": 0.2639, "step": 15698 }, { "epoch": 0.99, "grad_norm": 1.8657023384057065, "learning_rate": 4.231978981796836e-09, "loss": 0.2547, "step": 15699 }, { "epoch": 0.99, "grad_norm": 2.5622571930646347, "learning_rate": 4.190187753507769e-09, "loss": 0.2471, "step": 15700 }, { "epoch": 0.99, "grad_norm": 1.8443428570988152, "learning_rate": 4.148603810241736e-09, "loss": 0.2454, "step": 15701 }, { "epoch": 0.99, "grad_norm": 2.6598271444935904, "learning_rate": 4.107227153724025e-09, "loss": 0.2499, "step": 15702 }, { "epoch": 0.99, "grad_norm": 1.7876087746085303, "learning_rate": 4.066057785670486e-09, "loss": 0.2591, "step": 15703 }, { "epoch": 0.99, "grad_norm": 2.151683819391787, "learning_rate": 4.0250957077903055e-09, "loss": 0.239, "step": 15704 }, { "epoch": 0.99, "grad_norm": 2.6222148335428344, "learning_rate": 3.984340921782681e-09, "loss": 0.2608, "step": 15705 }, { "epoch": 0.99, "grad_norm": 2.1699803301021197, "learning_rate": 3.9437934293384824e-09, "loss": 0.2648, "step": 15706 }, { "epoch": 0.99, "grad_norm": 2.633422960341346, "learning_rate": 3.903453232140808e-09, "loss": 0.2743, "step": 15707 }, { "epoch": 0.99, "grad_norm": 3.548380016945056, "learning_rate": 3.863320331862763e-09, "loss": 0.2577, "step": 15708 }, { "epoch": 0.99, "grad_norm": 1.6850794141643164, "learning_rate": 3.823394730169683e-09, "loss": 0.2489, "step": 15709 }, { "epoch": 0.99, "grad_norm": 1.8572656559590008, "learning_rate": 3.78367642871802e-09, "loss": 0.2517, "step": 15710 }, { "epoch": 0.99, "grad_norm": 2.5703480181271305, "learning_rate": 3.744165429155344e-09, "loss": 0.2683, "step": 15711 }, { "epoch": 0.99, "grad_norm": 2.6371975870181545, "learning_rate": 3.7048617331225667e-09, "loss": 0.2547, "step": 15712 }, { "epoch": 0.99, "grad_norm": 1.740872662529989, "learning_rate": 3.6657653422489392e-09, "loss": 0.2464, "step": 15713 }, { "epoch": 0.99, "grad_norm": 1.9950757202341172, "learning_rate": 3.626876258157608e-09, "loss": 0.2741, "step": 15714 }, { "epoch": 0.99, "grad_norm": 3.7426453294714634, "learning_rate": 3.5881944824611716e-09, "loss": 0.2569, "step": 15715 }, { "epoch": 0.99, "grad_norm": 2.5666520416987435, "learning_rate": 3.5497200167655676e-09, "loss": 0.2771, "step": 15716 }, { "epoch": 0.99, "grad_norm": 3.1091677599967005, "learning_rate": 3.511452862666742e-09, "loss": 0.2572, "step": 15717 }, { "epoch": 0.99, "grad_norm": 2.7261772790606957, "learning_rate": 3.473393021752314e-09, "loss": 0.2442, "step": 15718 }, { "epoch": 0.99, "grad_norm": 3.517246950564713, "learning_rate": 3.4355404956021297e-09, "loss": 0.226, "step": 15719 }, { "epoch": 0.99, "grad_norm": 2.6825080849977034, "learning_rate": 3.3978952857854907e-09, "loss": 0.2485, "step": 15720 }, { "epoch": 0.99, "grad_norm": 2.79111575766408, "learning_rate": 3.3604573938655903e-09, "loss": 0.2495, "step": 15721 }, { "epoch": 0.99, "grad_norm": 2.6244642837968324, "learning_rate": 3.323226821395631e-09, "loss": 0.2741, "step": 15722 }, { "epoch": 0.99, "grad_norm": 1.8370531258603937, "learning_rate": 3.2862035699199323e-09, "loss": 0.2484, "step": 15723 }, { "epoch": 0.99, "grad_norm": 1.320558568930975, "learning_rate": 3.2493876409744886e-09, "loss": 0.2359, "step": 15724 }, { "epoch": 0.99, "grad_norm": 3.3111944569878298, "learning_rate": 3.212779036087521e-09, "loss": 0.258, "step": 15725 }, { "epoch": 0.99, "grad_norm": 2.748964897584076, "learning_rate": 3.17637775677726e-09, "loss": 0.2621, "step": 15726 }, { "epoch": 0.99, "grad_norm": 1.8969812154128607, "learning_rate": 3.1401838045547197e-09, "loss": 0.2515, "step": 15727 }, { "epoch": 0.99, "grad_norm": 2.7619772801912825, "learning_rate": 3.1041971809209204e-09, "loss": 0.2615, "step": 15728 }, { "epoch": 0.99, "grad_norm": 2.075026575288025, "learning_rate": 3.068417887370223e-09, "loss": 0.2475, "step": 15729 }, { "epoch": 0.99, "grad_norm": 1.6039535760746912, "learning_rate": 3.0328459253858854e-09, "loss": 0.2509, "step": 15730 }, { "epoch": 0.99, "grad_norm": 1.519921143423708, "learning_rate": 2.9974812964445042e-09, "loss": 0.2557, "step": 15731 }, { "epoch": 0.99, "grad_norm": 2.1964090774125284, "learning_rate": 2.9623240020132395e-09, "loss": 0.2567, "step": 15732 }, { "epoch": 0.99, "grad_norm": 3.817719585498989, "learning_rate": 2.9273740435514787e-09, "loss": 0.2522, "step": 15733 }, { "epoch": 0.99, "grad_norm": 1.5741963012685687, "learning_rate": 2.892631422508063e-09, "loss": 0.2473, "step": 15734 }, { "epoch": 0.99, "grad_norm": 3.812412797870793, "learning_rate": 2.8580961403251726e-09, "loss": 0.2561, "step": 15735 }, { "epoch": 0.99, "grad_norm": 1.9472104741231293, "learning_rate": 2.8237681984361054e-09, "loss": 0.2661, "step": 15736 }, { "epoch": 0.99, "grad_norm": 9.120596604382238, "learning_rate": 2.789647598264722e-09, "loss": 0.2563, "step": 15737 }, { "epoch": 0.99, "grad_norm": 2.0668650554990875, "learning_rate": 2.755734341227112e-09, "loss": 0.2436, "step": 15738 }, { "epoch": 0.99, "grad_norm": 1.5249412288243576, "learning_rate": 2.722028428730483e-09, "loss": 0.2396, "step": 15739 }, { "epoch": 0.99, "grad_norm": 2.724048197980926, "learning_rate": 2.6885298621726062e-09, "loss": 0.2658, "step": 15740 }, { "epoch": 0.99, "grad_norm": 2.8083641336883494, "learning_rate": 2.6552386429434806e-09, "loss": 0.2402, "step": 15741 }, { "epoch": 0.99, "grad_norm": 2.129485889147207, "learning_rate": 2.6221547724253337e-09, "loss": 0.2671, "step": 15742 }, { "epoch": 0.99, "grad_norm": 1.6534557948417292, "learning_rate": 2.5892782519904015e-09, "loss": 0.2695, "step": 15743 }, { "epoch": 0.99, "grad_norm": 8.642079432730648, "learning_rate": 2.5566090830025924e-09, "loss": 0.2415, "step": 15744 }, { "epoch": 0.99, "grad_norm": 1.7533545002534725, "learning_rate": 2.5241472668174894e-09, "loss": 0.2553, "step": 15745 }, { "epoch": 0.99, "grad_norm": 1.678989742258969, "learning_rate": 2.4918928047817924e-09, "loss": 0.2423, "step": 15746 }, { "epoch": 0.99, "grad_norm": 4.397889746513507, "learning_rate": 2.459845698234431e-09, "loss": 0.2508, "step": 15747 }, { "epoch": 0.99, "grad_norm": 2.2909800370640574, "learning_rate": 2.4280059485043418e-09, "loss": 0.2471, "step": 15748 }, { "epoch": 0.99, "grad_norm": 2.0282399774213684, "learning_rate": 2.396373556913245e-09, "loss": 0.2549, "step": 15749 }, { "epoch": 0.99, "grad_norm": 2.6786566203014583, "learning_rate": 2.364948524773425e-09, "loss": 0.258, "step": 15750 }, { "epoch": 0.99, "grad_norm": 2.319857187054752, "learning_rate": 2.333730853388283e-09, "loss": 0.2609, "step": 15751 }, { "epoch": 0.99, "grad_norm": 1.984921089124113, "learning_rate": 2.3027205440540047e-09, "loss": 0.2581, "step": 15752 }, { "epoch": 0.99, "grad_norm": 1.7630771807481402, "learning_rate": 2.271917598056228e-09, "loss": 0.2774, "step": 15753 }, { "epoch": 0.99, "grad_norm": 6.303271519897911, "learning_rate": 2.24132201667393e-09, "loss": 0.2545, "step": 15754 }, { "epoch": 0.99, "grad_norm": 3.4398670295687306, "learning_rate": 2.210933801176096e-09, "loss": 0.2438, "step": 15755 }, { "epoch": 0.99, "grad_norm": 2.558926362763449, "learning_rate": 2.1807529528239392e-09, "loss": 0.2329, "step": 15756 }, { "epoch": 0.99, "grad_norm": 1.820919925346816, "learning_rate": 2.1507794728692354e-09, "loss": 0.2495, "step": 15757 }, { "epoch": 0.99, "grad_norm": 2.6281178551012414, "learning_rate": 2.121013362555435e-09, "loss": 0.2455, "step": 15758 }, { "epoch": 0.99, "grad_norm": 8.12444545364323, "learning_rate": 2.0914546231187717e-09, "loss": 0.284, "step": 15759 }, { "epoch": 0.99, "grad_norm": 3.310457589552341, "learning_rate": 2.0621032557843755e-09, "loss": 0.2672, "step": 15760 }, { "epoch": 0.99, "grad_norm": 2.2508547205728386, "learning_rate": 2.0329592617712724e-09, "loss": 0.2461, "step": 15761 }, { "epoch": 0.99, "grad_norm": 2.8216047821695156, "learning_rate": 2.0040226422873844e-09, "loss": 0.2507, "step": 15762 }, { "epoch": 0.99, "grad_norm": 8.340007702846668, "learning_rate": 1.975293398534528e-09, "loss": 0.262, "step": 15763 }, { "epoch": 0.99, "grad_norm": 2.0180235361840233, "learning_rate": 1.9467715317039725e-09, "loss": 0.2574, "step": 15764 }, { "epoch": 0.99, "grad_norm": 3.4677583375723975, "learning_rate": 1.918457042979216e-09, "loss": 0.2313, "step": 15765 }, { "epoch": 0.99, "grad_norm": 1.9576456015077983, "learning_rate": 1.8903499335359842e-09, "loss": 0.2334, "step": 15766 }, { "epoch": 0.99, "grad_norm": 2.617471321302201, "learning_rate": 1.8624502045389015e-09, "loss": 0.2472, "step": 15767 }, { "epoch": 0.99, "grad_norm": 2.239156455603878, "learning_rate": 1.8347578571470404e-09, "loss": 0.2591, "step": 15768 }, { "epoch": 0.99, "grad_norm": 3.2377567823075415, "learning_rate": 1.8072728925089266e-09, "loss": 0.2524, "step": 15769 }, { "epoch": 0.99, "grad_norm": 4.370827325043801, "learning_rate": 1.7799953117642045e-09, "loss": 0.2456, "step": 15770 }, { "epoch": 0.99, "grad_norm": 3.1988984807116836, "learning_rate": 1.752925116045856e-09, "loss": 0.269, "step": 15771 }, { "epoch": 0.99, "grad_norm": 1.7036850099691692, "learning_rate": 1.7260623064763172e-09, "loss": 0.2628, "step": 15772 }, { "epoch": 0.99, "grad_norm": 2.498788690928352, "learning_rate": 1.6994068841702515e-09, "loss": 0.2555, "step": 15773 }, { "epoch": 0.99, "grad_norm": 0.6238314332509672, "learning_rate": 1.6729588502339966e-09, "loss": 0.4649, "step": 15774 }, { "epoch": 0.99, "grad_norm": 1.5360329641170327, "learning_rate": 1.6467182057650077e-09, "loss": 0.2423, "step": 15775 }, { "epoch": 0.99, "grad_norm": 1.8678257484980374, "learning_rate": 1.6206849518513036e-09, "loss": 0.265, "step": 15776 }, { "epoch": 0.99, "grad_norm": 1.4887046116583083, "learning_rate": 1.5948590895736859e-09, "loss": 0.2588, "step": 15777 }, { "epoch": 0.99, "grad_norm": 1.4301680936463346, "learning_rate": 1.5692406200035204e-09, "loss": 0.2352, "step": 15778 }, { "epoch": 0.99, "grad_norm": 1.3356872740475663, "learning_rate": 1.5438295442032902e-09, "loss": 0.2508, "step": 15779 }, { "epoch": 0.99, "grad_norm": 3.7309428404312084, "learning_rate": 1.5186258632282625e-09, "loss": 0.2546, "step": 15780 }, { "epoch": 0.99, "grad_norm": 1.6027635420829873, "learning_rate": 1.493629578123712e-09, "loss": 0.2466, "step": 15781 }, { "epoch": 0.99, "grad_norm": 0.5931391141911617, "learning_rate": 1.4688406899271424e-09, "loss": 0.4778, "step": 15782 }, { "epoch": 0.99, "grad_norm": 6.626739295954805, "learning_rate": 1.44425919966662e-09, "loss": 0.263, "step": 15783 }, { "epoch": 0.99, "grad_norm": 2.968552764813573, "learning_rate": 1.4198851083618847e-09, "loss": 0.2673, "step": 15784 }, { "epoch": 0.99, "grad_norm": 1.866082105062887, "learning_rate": 1.395718417024905e-09, "loss": 0.2523, "step": 15785 }, { "epoch": 0.99, "grad_norm": 2.0104393193516383, "learning_rate": 1.3717591266576569e-09, "loss": 0.2483, "step": 15786 }, { "epoch": 0.99, "grad_norm": 2.2977593434636496, "learning_rate": 1.3480072382549003e-09, "loss": 0.2622, "step": 15787 }, { "epoch": 0.99, "grad_norm": 1.8680346634061324, "learning_rate": 1.3244627528019582e-09, "loss": 0.242, "step": 15788 }, { "epoch": 0.99, "grad_norm": 2.6706379357274512, "learning_rate": 1.3011256712758268e-09, "loss": 0.245, "step": 15789 }, { "epoch": 0.99, "grad_norm": 1.806097211194816, "learning_rate": 1.2779959946446209e-09, "loss": 0.2431, "step": 15790 }, { "epoch": 0.99, "grad_norm": 1.9816721896288432, "learning_rate": 1.2550737238681277e-09, "loss": 0.2592, "step": 15791 }, { "epoch": 0.99, "grad_norm": 2.773295092238538, "learning_rate": 1.2323588598972536e-09, "loss": 0.2515, "step": 15792 }, { "epoch": 0.99, "grad_norm": 1.7284142664191817, "learning_rate": 1.209851403675133e-09, "loss": 0.2438, "step": 15793 }, { "epoch": 0.99, "grad_norm": 2.970835538706795, "learning_rate": 1.1875513561343531e-09, "loss": 0.2575, "step": 15794 }, { "epoch": 0.99, "grad_norm": 3.4136915313654406, "learning_rate": 1.1654587182013955e-09, "loss": 0.2522, "step": 15795 }, { "epoch": 0.99, "grad_norm": 2.2831527404125618, "learning_rate": 1.1435734907921936e-09, "loss": 0.2637, "step": 15796 }, { "epoch": 0.99, "grad_norm": 2.4595129764799224, "learning_rate": 1.1218956748154652e-09, "loss": 0.2572, "step": 15797 }, { "epoch": 0.99, "grad_norm": 1.535759149745823, "learning_rate": 1.100425271170491e-09, "loss": 0.2367, "step": 15798 }, { "epoch": 0.99, "grad_norm": 2.6849277462826926, "learning_rate": 1.0791622807471147e-09, "loss": 0.25, "step": 15799 }, { "epoch": 0.99, "grad_norm": 2.577346126166828, "learning_rate": 1.0581067044290738e-09, "loss": 0.2434, "step": 15800 }, { "epoch": 0.99, "grad_norm": 3.5184578024039044, "learning_rate": 1.0372585430890037e-09, "loss": 0.2529, "step": 15801 }, { "epoch": 0.99, "grad_norm": 1.7656763929677726, "learning_rate": 1.016617797592323e-09, "loss": 0.2501, "step": 15802 }, { "epoch": 0.99, "grad_norm": 2.271362466702832, "learning_rate": 9.96184468795014e-10, "loss": 0.2572, "step": 15803 }, { "epoch": 0.99, "grad_norm": 1.4264577474246123, "learning_rate": 9.75958557545842e-10, "loss": 0.2431, "step": 15804 }, { "epoch": 0.99, "grad_norm": 1.6250150979802267, "learning_rate": 9.559400646830253e-10, "loss": 0.2377, "step": 15805 }, { "epoch": 0.99, "grad_norm": 2.1494566019549723, "learning_rate": 9.36128991037566e-10, "loss": 0.2628, "step": 15806 }, { "epoch": 0.99, "grad_norm": 1.9165050424740038, "learning_rate": 9.165253374315841e-10, "loss": 0.2624, "step": 15807 }, { "epoch": 0.99, "grad_norm": 1.6692706515997264, "learning_rate": 8.971291046783182e-10, "loss": 0.2418, "step": 15808 }, { "epoch": 0.99, "grad_norm": 2.037366880622655, "learning_rate": 8.779402935826797e-10, "loss": 0.2546, "step": 15809 }, { "epoch": 0.99, "grad_norm": 2.026183330748864, "learning_rate": 8.589589049406988e-10, "loss": 0.2605, "step": 15810 }, { "epoch": 0.99, "grad_norm": 0.6477617273822848, "learning_rate": 8.401849395395234e-10, "loss": 0.4714, "step": 15811 }, { "epoch": 0.99, "grad_norm": 1.7289607764256854, "learning_rate": 8.216183981590853e-10, "loss": 0.2438, "step": 15812 }, { "epoch": 0.99, "grad_norm": 2.7450392671904176, "learning_rate": 8.032592815693241e-10, "loss": 0.2455, "step": 15813 }, { "epoch": 0.99, "grad_norm": 5.457123771551411, "learning_rate": 7.851075905312977e-10, "loss": 0.2626, "step": 15814 }, { "epoch": 0.99, "grad_norm": 2.861847714252405, "learning_rate": 7.671633257994026e-10, "loss": 0.2697, "step": 15815 }, { "epoch": 0.99, "grad_norm": 2.0476718483286223, "learning_rate": 7.494264881174884e-10, "loss": 0.2674, "step": 15816 }, { "epoch": 0.99, "grad_norm": 2.2708692136065403, "learning_rate": 7.318970782210777e-10, "loss": 0.2535, "step": 15817 }, { "epoch": 0.99, "grad_norm": 2.011896763030752, "learning_rate": 7.145750968384768e-10, "loss": 0.2311, "step": 15818 }, { "epoch": 0.99, "grad_norm": 1.89508554758061, "learning_rate": 6.97460544687445e-10, "loss": 0.264, "step": 15819 }, { "epoch": 0.99, "grad_norm": 1.808696185442965, "learning_rate": 6.805534224785248e-10, "loss": 0.2623, "step": 15820 }, { "epoch": 0.99, "grad_norm": 2.8490841953622192, "learning_rate": 6.638537309133775e-10, "loss": 0.2665, "step": 15821 }, { "epoch": 1.0, "grad_norm": 2.448466173634456, "learning_rate": 6.473614706847819e-10, "loss": 0.2433, "step": 15822 }, { "epoch": 1.0, "grad_norm": 2.379150334893618, "learning_rate": 6.310766424771908e-10, "loss": 0.2646, "step": 15823 }, { "epoch": 1.0, "grad_norm": 1.778485870475655, "learning_rate": 6.149992469661747e-10, "loss": 0.29, "step": 15824 }, { "epoch": 1.0, "grad_norm": 3.8603618420930625, "learning_rate": 5.991292848184227e-10, "loss": 0.2474, "step": 15825 }, { "epoch": 1.0, "grad_norm": 3.0839198732403688, "learning_rate": 5.83466756692852e-10, "loss": 0.2466, "step": 15826 }, { "epoch": 1.0, "grad_norm": 2.881392580062999, "learning_rate": 5.680116632389432e-10, "loss": 0.2509, "step": 15827 }, { "epoch": 1.0, "grad_norm": 2.4326814213899266, "learning_rate": 5.527640050984051e-10, "loss": 0.2532, "step": 15828 }, { "epoch": 1.0, "grad_norm": 2.111285348025681, "learning_rate": 5.377237829040649e-10, "loss": 0.2332, "step": 15829 }, { "epoch": 1.0, "grad_norm": 6.345627418125003, "learning_rate": 5.228909972793129e-10, "loss": 0.259, "step": 15830 }, { "epoch": 1.0, "grad_norm": 4.885811337884381, "learning_rate": 5.082656488397675e-10, "loss": 0.2576, "step": 15831 }, { "epoch": 1.0, "grad_norm": 4.720055567255641, "learning_rate": 4.938477381921658e-10, "loss": 0.2491, "step": 15832 }, { "epoch": 1.0, "grad_norm": 2.8205032582896923, "learning_rate": 4.796372659354731e-10, "loss": 0.2564, "step": 15833 }, { "epoch": 1.0, "grad_norm": 1.7918494828314753, "learning_rate": 4.656342326586627e-10, "loss": 0.2618, "step": 15834 }, { "epoch": 1.0, "grad_norm": 2.2529947044315977, "learning_rate": 4.5183863894293633e-10, "loss": 0.2503, "step": 15835 }, { "epoch": 1.0, "grad_norm": 2.8860136646471233, "learning_rate": 4.382504853600589e-10, "loss": 0.2625, "step": 15836 }, { "epoch": 1.0, "grad_norm": 2.084821422668374, "learning_rate": 4.2486977247513384e-10, "loss": 0.2601, "step": 15837 }, { "epoch": 1.0, "grad_norm": 1.4990367180345032, "learning_rate": 4.116965008421625e-10, "loss": 0.2525, "step": 15838 }, { "epoch": 1.0, "grad_norm": 11.549193529741228, "learning_rate": 3.987306710079297e-10, "loss": 0.2696, "step": 15839 }, { "epoch": 1.0, "grad_norm": 0.6142007502113539, "learning_rate": 3.859722835114488e-10, "loss": 0.475, "step": 15840 }, { "epoch": 1.0, "grad_norm": 2.1968732666911017, "learning_rate": 3.734213388806307e-10, "loss": 0.2521, "step": 15841 }, { "epoch": 1.0, "grad_norm": 3.3236889276011943, "learning_rate": 3.6107783763728033e-10, "loss": 0.2618, "step": 15842 }, { "epoch": 1.0, "grad_norm": 1.7506306941277392, "learning_rate": 3.4894178029265537e-10, "loss": 0.2204, "step": 15843 }, { "epoch": 1.0, "grad_norm": 1.660360868628995, "learning_rate": 3.3701316735135213e-10, "loss": 0.2597, "step": 15844 }, { "epoch": 1.0, "grad_norm": 2.311502716852735, "learning_rate": 3.2529199930741996e-10, "loss": 0.2475, "step": 15845 }, { "epoch": 1.0, "grad_norm": 1.693419771168762, "learning_rate": 3.1377827664769156e-10, "loss": 0.2484, "step": 15846 }, { "epoch": 1.0, "grad_norm": 0.6339411603170874, "learning_rate": 3.024719998495629e-10, "loss": 0.4808, "step": 15847 }, { "epoch": 1.0, "grad_norm": 4.071155479781498, "learning_rate": 2.9137316938265826e-10, "loss": 0.2594, "step": 15848 }, { "epoch": 1.0, "grad_norm": 1.9132552133377398, "learning_rate": 2.804817857071651e-10, "loss": 0.2572, "step": 15849 }, { "epoch": 1.0, "grad_norm": 3.448989462257928, "learning_rate": 2.697978492749442e-10, "loss": 0.2482, "step": 15850 }, { "epoch": 1.0, "grad_norm": 1.550668758577631, "learning_rate": 2.593213605289746e-10, "loss": 0.2423, "step": 15851 }, { "epoch": 1.0, "grad_norm": 1.6655926186417245, "learning_rate": 2.4905231990446367e-10, "loss": 0.246, "step": 15852 }, { "epoch": 1.0, "grad_norm": 2.202039656965659, "learning_rate": 2.3899072782773703e-10, "loss": 0.2553, "step": 15853 }, { "epoch": 1.0, "grad_norm": 1.9603793889571737, "learning_rate": 2.2913658471512834e-10, "loss": 0.2544, "step": 15854 }, { "epoch": 1.0, "grad_norm": 2.060598463139521, "learning_rate": 2.1948989097686503e-10, "loss": 0.2634, "step": 15855 }, { "epoch": 1.0, "grad_norm": 2.0960565068295582, "learning_rate": 2.1005064701262733e-10, "loss": 0.2478, "step": 15856 }, { "epoch": 1.0, "grad_norm": 18.82967641871937, "learning_rate": 2.008188532132138e-10, "loss": 0.2424, "step": 15857 }, { "epoch": 1.0, "grad_norm": 1.9338115148910051, "learning_rate": 1.9179450996331673e-10, "loss": 0.2551, "step": 15858 }, { "epoch": 1.0, "grad_norm": 4.993639365186874, "learning_rate": 1.82977617635971e-10, "loss": 0.2549, "step": 15859 }, { "epoch": 1.0, "grad_norm": 1.7870016346903912, "learning_rate": 1.7436817659810534e-10, "loss": 0.26, "step": 15860 }, { "epoch": 1.0, "grad_norm": 1.873555944531384, "learning_rate": 1.6596618720610135e-10, "loss": 0.2539, "step": 15861 }, { "epoch": 1.0, "grad_norm": 1.8403489382086395, "learning_rate": 1.5777164980856908e-10, "loss": 0.2622, "step": 15862 }, { "epoch": 1.0, "grad_norm": 1.8220043644736423, "learning_rate": 1.4978456474634694e-10, "loss": 0.2485, "step": 15863 }, { "epoch": 1.0, "grad_norm": 3.5134512089914693, "learning_rate": 1.4200493234972633e-10, "loss": 0.2714, "step": 15864 }, { "epoch": 1.0, "grad_norm": 1.8601289860607761, "learning_rate": 1.3443275294233727e-10, "loss": 0.2703, "step": 15865 }, { "epoch": 1.0, "grad_norm": 3.55878366768738, "learning_rate": 1.2706802683837282e-10, "loss": 0.2404, "step": 15866 }, { "epoch": 1.0, "grad_norm": 2.2219665945766827, "learning_rate": 1.1991075434258924e-10, "loss": 0.255, "step": 15867 }, { "epoch": 1.0, "grad_norm": 0.5993892094123481, "learning_rate": 1.1296093575308142e-10, "loss": 0.4483, "step": 15868 }, { "epoch": 1.0, "grad_norm": 1.4817994356802835, "learning_rate": 1.062185713573971e-10, "loss": 0.2407, "step": 15869 }, { "epoch": 1.0, "grad_norm": 1.5873082189193888, "learning_rate": 9.96836614353125e-11, "loss": 0.2353, "step": 15870 }, { "epoch": 1.0, "grad_norm": 1.6658670506839874, "learning_rate": 9.335620625827712e-11, "loss": 0.2402, "step": 15871 }, { "epoch": 1.0, "grad_norm": 1.5357579254195182, "learning_rate": 8.723620608885875e-11, "loss": 0.2492, "step": 15872 }, { "epoch": 1.0, "grad_norm": 1.9062473174883259, "learning_rate": 8.13236611807433e-11, "loss": 0.2477, "step": 15873 }, { "epoch": 1.0, "grad_norm": 29.69040100379666, "learning_rate": 7.56185717792901e-11, "loss": 0.2662, "step": 15874 }, { "epoch": 1.0, "grad_norm": 4.6464851171485275, "learning_rate": 7.012093812153175e-11, "loss": 0.2362, "step": 15875 }, { "epoch": 1.0, "grad_norm": 2.762516786693178, "learning_rate": 6.483076043506398e-11, "loss": 0.2303, "step": 15876 }, { "epoch": 1.0, "grad_norm": 4.826107944026365, "learning_rate": 5.974803893971093e-11, "loss": 0.2523, "step": 15877 }, { "epoch": 1.0, "grad_norm": 5.22165720210061, "learning_rate": 5.4872773846414984e-11, "loss": 0.2604, "step": 15878 }, { "epoch": 1.0, "grad_norm": 13.016177664600205, "learning_rate": 5.0204965357236735e-11, "loss": 0.2421, "step": 15879 }, { "epoch": 1.0, "grad_norm": 2.134437829279411, "learning_rate": 4.5744613665910096e-11, "loss": 0.2467, "step": 15880 }, { "epoch": 1.0, "grad_norm": 3.2137617734187565, "learning_rate": 4.149171895784232e-11, "loss": 0.2435, "step": 15881 }, { "epoch": 1.0, "grad_norm": 2.1415668129522456, "learning_rate": 3.744628140900375e-11, "loss": 0.2699, "step": 15882 }, { "epoch": 1.0, "grad_norm": 2.4250313758395325, "learning_rate": 3.360830118759317e-11, "loss": 0.2462, "step": 15883 }, { "epoch": 1.0, "grad_norm": 2.481129533467969, "learning_rate": 2.997777845237249e-11, "loss": 0.2671, "step": 15884 }, { "epoch": 1.0, "grad_norm": 1.5735979939100921, "learning_rate": 2.655471335433202e-11, "loss": 0.2434, "step": 15885 }, { "epoch": 1.0, "grad_norm": 2.087847386570869, "learning_rate": 2.3339106036135428e-11, "loss": 0.2548, "step": 15886 }, { "epoch": 1.0, "grad_norm": 1.5564851815198115, "learning_rate": 2.0330956629899256e-11, "loss": 0.239, "step": 15887 }, { "epoch": 1.0, "grad_norm": 3.614140017072922, "learning_rate": 1.7530265261633817e-11, "loss": 0.2525, "step": 15888 }, { "epoch": 1.0, "grad_norm": 1.4842722954885819, "learning_rate": 1.493703204624719e-11, "loss": 0.2512, "step": 15889 }, { "epoch": 1.0, "grad_norm": 1.3875573976995257, "learning_rate": 1.2551257092541236e-11, "loss": 0.2483, "step": 15890 }, { "epoch": 1.0, "grad_norm": 3.1632674817088717, "learning_rate": 1.0372940499325801e-11, "loss": 0.2434, "step": 15891 }, { "epoch": 1.0, "grad_norm": 2.6108113224693525, "learning_rate": 8.402082356528951e-12, "loss": 0.2375, "step": 15892 }, { "epoch": 1.0, "grad_norm": 3.0574607295809564, "learning_rate": 6.638682745752078e-12, "loss": 0.2501, "step": 15893 }, { "epoch": 1.0, "grad_norm": 1.9223745668222756, "learning_rate": 5.082741740825015e-12, "loss": 0.2481, "step": 15894 }, { "epoch": 1.0, "grad_norm": 2.911128079235186, "learning_rate": 3.734259406140694e-12, "loss": 0.2495, "step": 15895 }, { "epoch": 1.0, "grad_norm": 0.6302976214860023, "learning_rate": 2.5932357972102695e-12, "loss": 0.4538, "step": 15896 }, { "epoch": 1.0, "grad_norm": 1.5654230457121145, "learning_rate": 1.6596709617733298e-12, "loss": 0.2426, "step": 15897 }, { "epoch": 1.0, "grad_norm": 2.76785231886163, "learning_rate": 9.33564938687681e-13, "loss": 0.2583, "step": 15898 }, { "epoch": 1.0, "grad_norm": 1.8857348369708409, "learning_rate": 4.149177579293451e-13, "loss": 0.2469, "step": 15899 }, { "epoch": 1.0, "grad_norm": 1.9988417197412622, "learning_rate": 1.037294405925593e-13, "loss": 0.2535, "step": 15900 }, { "epoch": 1.0, "grad_norm": 7.689559692396786, "learning_rate": 0.0, "loss": 0.2487, "step": 15901 }, { "epoch": 1.0, "step": 15901, "total_flos": 1.4822327104372736e+16, "train_loss": 0.06416682437559874, "train_runtime": 143299.6981, "train_samples_per_second": 28.408, "train_steps_per_second": 0.111 } ], "logging_steps": 1.0, "max_steps": 15901, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, "total_flos": 1.4822327104372736e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }