{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998001465591899, "eval_steps": 500, "global_step": 938, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.705882352941177e-06, "loss": 2.3894, "step": 2 }, { "epoch": 0.0, "learning_rate": 9.411764705882354e-06, "loss": 2.4461, "step": 4 }, { "epoch": 0.01, "learning_rate": 1.411764705882353e-05, "loss": 2.5984, "step": 6 }, { "epoch": 0.01, "learning_rate": 1.8823529411764708e-05, "loss": 2.7012, "step": 8 }, { "epoch": 0.01, "learning_rate": 2.3529411764705884e-05, "loss": 2.5558, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.823529411764706e-05, "loss": 2.7513, "step": 12 }, { "epoch": 0.01, "learning_rate": 3.294117647058824e-05, "loss": 2.7527, "step": 14 }, { "epoch": 0.02, "learning_rate": 3.7647058823529415e-05, "loss": 2.6276, "step": 16 }, { "epoch": 0.02, "learning_rate": 4.235294117647059e-05, "loss": 2.6711, "step": 18 }, { "epoch": 0.02, "learning_rate": 4.705882352941177e-05, "loss": 2.6975, "step": 20 }, { "epoch": 0.02, "learning_rate": 5.176470588235295e-05, "loss": 2.5489, "step": 22 }, { "epoch": 0.03, "learning_rate": 5.647058823529412e-05, "loss": 2.5387, "step": 24 }, { "epoch": 0.03, "learning_rate": 6.11764705882353e-05, "loss": 2.5694, "step": 26 }, { "epoch": 0.03, "learning_rate": 6.588235294117648e-05, "loss": 2.6244, "step": 28 }, { "epoch": 0.03, "learning_rate": 7.058823529411765e-05, "loss": 2.5018, "step": 30 }, { "epoch": 0.03, "learning_rate": 7.529411764705883e-05, "loss": 2.4123, "step": 32 }, { "epoch": 0.04, "learning_rate": 8e-05, "loss": 2.3234, "step": 34 }, { "epoch": 0.04, "learning_rate": 8.470588235294118e-05, "loss": 2.0958, "step": 36 }, { "epoch": 0.04, "learning_rate": 8.941176470588236e-05, "loss": 2.2023, "step": 38 }, { "epoch": 0.04, "learning_rate": 9.411764705882353e-05, "loss": 2.1864, "step": 40 }, { "epoch": 0.04, "learning_rate": 9.882352941176471e-05, "loss": 2.0768, "step": 42 }, { "epoch": 0.05, "learning_rate": 0.0001035294117647059, "loss": 2.189, "step": 44 }, { "epoch": 0.05, "learning_rate": 0.00010823529411764706, "loss": 1.9094, "step": 46 }, { "epoch": 0.05, "learning_rate": 0.00011294117647058824, "loss": 2.0136, "step": 48 }, { "epoch": 0.05, "learning_rate": 0.00011764705882352942, "loss": 1.7677, "step": 50 }, { "epoch": 0.06, "learning_rate": 0.0001223529411764706, "loss": 2.3789, "step": 52 }, { "epoch": 0.06, "learning_rate": 0.00012705882352941175, "loss": 2.2419, "step": 54 }, { "epoch": 0.06, "learning_rate": 0.00013176470588235296, "loss": 2.2654, "step": 56 }, { "epoch": 0.06, "learning_rate": 0.00013647058823529413, "loss": 2.3797, "step": 58 }, { "epoch": 0.06, "learning_rate": 0.0001411764705882353, "loss": 2.319, "step": 60 }, { "epoch": 0.07, "learning_rate": 0.00014588235294117646, "loss": 2.3527, "step": 62 }, { "epoch": 0.07, "learning_rate": 0.00015058823529411766, "loss": 2.129, "step": 64 }, { "epoch": 0.07, "learning_rate": 0.00015529411764705884, "loss": 2.2285, "step": 66 }, { "epoch": 0.07, "learning_rate": 0.00016, "loss": 2.2231, "step": 68 }, { "epoch": 0.07, "learning_rate": 0.0001647058823529412, "loss": 2.0318, "step": 70 }, { "epoch": 0.08, "learning_rate": 0.00016941176470588237, "loss": 2.2135, "step": 72 }, { "epoch": 0.08, "learning_rate": 0.00017411764705882354, "loss": 2.093, "step": 74 }, { "epoch": 0.08, "learning_rate": 0.00017882352941176472, "loss": 2.0507, "step": 76 }, { "epoch": 0.08, "learning_rate": 0.0001835294117647059, "loss": 2.115, "step": 78 }, { "epoch": 0.09, "learning_rate": 0.00018823529411764707, "loss": 2.1991, "step": 80 }, { "epoch": 0.09, "learning_rate": 0.00019294117647058825, "loss": 2.1561, "step": 82 }, { "epoch": 0.09, "learning_rate": 0.00019764705882352942, "loss": 2.1816, "step": 84 }, { "epoch": 0.09, "learning_rate": 0.00019999993373829138, "loss": 1.9079, "step": 86 }, { "epoch": 0.09, "learning_rate": 0.00019999940364514925, "loss": 2.1371, "step": 88 }, { "epoch": 0.1, "learning_rate": 0.00019999834346167496, "loss": 2.0254, "step": 90 }, { "epoch": 0.1, "learning_rate": 0.00019999675319348848, "loss": 1.9081, "step": 92 }, { "epoch": 0.1, "learning_rate": 0.0001999946328490197, "loss": 1.9681, "step": 94 }, { "epoch": 0.1, "learning_rate": 0.0001999919824395085, "loss": 2.1084, "step": 96 }, { "epoch": 0.1, "learning_rate": 0.00019998880197900446, "loss": 1.8421, "step": 98 }, { "epoch": 0.11, "learning_rate": 0.00019998509148436697, "loss": 2.0253, "step": 100 }, { "epoch": 0.11, "learning_rate": 0.0001999808509752652, "loss": 2.2719, "step": 102 }, { "epoch": 0.11, "learning_rate": 0.00019997608047417776, "loss": 2.3961, "step": 104 }, { "epoch": 0.11, "learning_rate": 0.0001999707800063928, "loss": 2.2332, "step": 106 }, { "epoch": 0.12, "learning_rate": 0.00019996494960000774, "loss": 2.3148, "step": 108 }, { "epoch": 0.12, "learning_rate": 0.00019995858928592916, "loss": 2.2659, "step": 110 }, { "epoch": 0.12, "learning_rate": 0.00019995169909787271, "loss": 2.3069, "step": 112 }, { "epoch": 0.12, "learning_rate": 0.0001999442790723628, "loss": 2.1792, "step": 114 }, { "epoch": 0.12, "learning_rate": 0.0001999363292487325, "loss": 2.2628, "step": 116 }, { "epoch": 0.13, "learning_rate": 0.00019992784966912333, "loss": 2.3282, "step": 118 }, { "epoch": 0.13, "learning_rate": 0.00019991884037848497, "loss": 2.286, "step": 120 }, { "epoch": 0.13, "learning_rate": 0.00019990930142457515, "loss": 2.2392, "step": 122 }, { "epoch": 0.13, "learning_rate": 0.00019989923285795914, "loss": 2.1468, "step": 124 }, { "epoch": 0.13, "learning_rate": 0.0001998886347320098, "loss": 2.2453, "step": 126 }, { "epoch": 0.14, "learning_rate": 0.00019987750710290713, "loss": 2.1149, "step": 128 }, { "epoch": 0.14, "learning_rate": 0.00019986585002963793, "loss": 2.1051, "step": 130 }, { "epoch": 0.14, "learning_rate": 0.00019985366357399564, "loss": 2.0007, "step": 132 }, { "epoch": 0.14, "learning_rate": 0.00019984094780057978, "loss": 1.963, "step": 134 }, { "epoch": 0.14, "learning_rate": 0.00019982770277679596, "loss": 1.9235, "step": 136 }, { "epoch": 0.15, "learning_rate": 0.00019981392857285505, "loss": 1.861, "step": 138 }, { "epoch": 0.15, "learning_rate": 0.0001997996252617733, "loss": 2.1247, "step": 140 }, { "epoch": 0.15, "learning_rate": 0.00019978479291937165, "loss": 1.7524, "step": 142 }, { "epoch": 0.15, "learning_rate": 0.0001997694316242753, "loss": 1.9346, "step": 144 }, { "epoch": 0.16, "learning_rate": 0.00019975354145791355, "loss": 1.6186, "step": 146 }, { "epoch": 0.16, "learning_rate": 0.00019973712250451908, "loss": 1.9757, "step": 148 }, { "epoch": 0.16, "learning_rate": 0.00019972017485112774, "loss": 1.9019, "step": 150 }, { "epoch": 0.16, "learning_rate": 0.00019970269858757787, "loss": 2.2553, "step": 152 }, { "epoch": 0.16, "learning_rate": 0.00019968469380651015, "loss": 2.2539, "step": 154 }, { "epoch": 0.17, "learning_rate": 0.00019966616060336655, "loss": 2.173, "step": 156 }, { "epoch": 0.17, "learning_rate": 0.00019964709907639057, "loss": 2.1496, "step": 158 }, { "epoch": 0.17, "learning_rate": 0.000199627509326626, "loss": 2.2138, "step": 160 }, { "epoch": 0.17, "learning_rate": 0.00019960739145791684, "loss": 2.2994, "step": 162 }, { "epoch": 0.17, "learning_rate": 0.00019958674557690666, "loss": 2.2498, "step": 164 }, { "epoch": 0.18, "learning_rate": 0.00019956557179303788, "loss": 2.2241, "step": 166 }, { "epoch": 0.18, "learning_rate": 0.00019954387021855138, "loss": 2.257, "step": 168 }, { "epoch": 0.18, "learning_rate": 0.00019952164096848578, "loss": 2.1482, "step": 170 }, { "epoch": 0.18, "learning_rate": 0.00019949888416067688, "loss": 2.1853, "step": 172 }, { "epoch": 0.19, "learning_rate": 0.00019947559991575706, "loss": 2.1136, "step": 174 }, { "epoch": 0.19, "learning_rate": 0.00019945178835715458, "loss": 2.1496, "step": 176 }, { "epoch": 0.19, "learning_rate": 0.00019942744961109297, "loss": 2.0761, "step": 178 }, { "epoch": 0.19, "learning_rate": 0.0001994025838065903, "loss": 2.0665, "step": 180 }, { "epoch": 0.19, "learning_rate": 0.00019937719107545864, "loss": 2.1202, "step": 182 }, { "epoch": 0.2, "learning_rate": 0.00019935127155230314, "loss": 1.9078, "step": 184 }, { "epoch": 0.2, "learning_rate": 0.0001993248253745216, "loss": 1.9247, "step": 186 }, { "epoch": 0.2, "learning_rate": 0.0001992978526823034, "loss": 1.8159, "step": 188 }, { "epoch": 0.2, "learning_rate": 0.00019927035361862904, "loss": 1.7471, "step": 190 }, { "epoch": 0.2, "learning_rate": 0.0001992423283292693, "loss": 1.8876, "step": 192 }, { "epoch": 0.21, "learning_rate": 0.00019921377696278437, "loss": 1.771, "step": 194 }, { "epoch": 0.21, "learning_rate": 0.00019918469967052327, "loss": 1.7633, "step": 196 }, { "epoch": 0.21, "learning_rate": 0.00019915509660662275, "loss": 1.7469, "step": 198 }, { "epoch": 0.21, "learning_rate": 0.00019912496792800677, "loss": 1.9769, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.00019909431379438544, "loss": 2.2334, "step": 202 }, { "epoch": 0.22, "learning_rate": 0.0001990631343682544, "loss": 2.226, "step": 204 }, { "epoch": 0.22, "learning_rate": 0.00019903142981489373, "loss": 2.129, "step": 206 }, { "epoch": 0.22, "learning_rate": 0.0001989992003023672, "loss": 2.2408, "step": 208 }, { "epoch": 0.22, "learning_rate": 0.00019896644600152135, "loss": 2.142, "step": 210 }, { "epoch": 0.23, "learning_rate": 0.0001989331670859846, "loss": 2.3385, "step": 212 }, { "epoch": 0.23, "learning_rate": 0.00019889936373216634, "loss": 2.2516, "step": 214 }, { "epoch": 0.23, "learning_rate": 0.0001988650361192559, "loss": 2.2209, "step": 216 }, { "epoch": 0.23, "learning_rate": 0.00019883018442922178, "loss": 2.1603, "step": 218 }, { "epoch": 0.23, "learning_rate": 0.0001987948088468105, "loss": 2.2952, "step": 220 }, { "epoch": 0.24, "learning_rate": 0.00019875890955954573, "loss": 2.2628, "step": 222 }, { "epoch": 0.24, "learning_rate": 0.00019872248675772722, "loss": 2.2564, "step": 224 }, { "epoch": 0.24, "learning_rate": 0.0001986855406344299, "loss": 2.0969, "step": 226 }, { "epoch": 0.24, "learning_rate": 0.00019864807138550273, "loss": 2.1252, "step": 228 }, { "epoch": 0.25, "learning_rate": 0.00019861007920956786, "loss": 1.9448, "step": 230 }, { "epoch": 0.25, "learning_rate": 0.0001985715643080192, "loss": 1.9488, "step": 232 }, { "epoch": 0.25, "learning_rate": 0.00019853252688502187, "loss": 1.9668, "step": 234 }, { "epoch": 0.25, "learning_rate": 0.00019849296714751063, "loss": 1.8091, "step": 236 }, { "epoch": 0.25, "learning_rate": 0.0001984528853051891, "loss": 1.9742, "step": 238 }, { "epoch": 0.26, "learning_rate": 0.00019841228157052853, "loss": 1.6913, "step": 240 }, { "epoch": 0.26, "learning_rate": 0.00019837115615876664, "loss": 1.8882, "step": 242 }, { "epoch": 0.26, "learning_rate": 0.00019832950928790657, "loss": 1.5621, "step": 244 }, { "epoch": 0.26, "learning_rate": 0.0001982873411787157, "loss": 1.6418, "step": 246 }, { "epoch": 0.26, "learning_rate": 0.0001982446520547244, "loss": 1.837, "step": 248 }, { "epoch": 0.27, "learning_rate": 0.00019820144214222497, "loss": 1.8345, "step": 250 }, { "epoch": 0.27, "learning_rate": 0.00019815771167027034, "loss": 2.2542, "step": 252 }, { "epoch": 0.27, "learning_rate": 0.00019811346087067287, "loss": 2.3098, "step": 254 }, { "epoch": 0.27, "learning_rate": 0.00019806868997800317, "loss": 2.1615, "step": 256 }, { "epoch": 0.27, "learning_rate": 0.0001980233992295889, "loss": 2.314, "step": 258 }, { "epoch": 0.28, "learning_rate": 0.00019797758886551324, "loss": 2.2309, "step": 260 }, { "epoch": 0.28, "learning_rate": 0.0001979312591286141, "loss": 2.1631, "step": 262 }, { "epoch": 0.28, "learning_rate": 0.00019788441026448225, "loss": 2.0791, "step": 264 }, { "epoch": 0.28, "learning_rate": 0.0001978370425214606, "loss": 2.2653, "step": 266 }, { "epoch": 0.29, "learning_rate": 0.0001977891561506424, "loss": 2.2547, "step": 268 }, { "epoch": 0.29, "learning_rate": 0.00019774075140587024, "loss": 2.2326, "step": 270 }, { "epoch": 0.29, "learning_rate": 0.00019769182854373444, "loss": 2.0206, "step": 272 }, { "epoch": 0.29, "learning_rate": 0.000197642387823572, "loss": 2.0665, "step": 274 }, { "epoch": 0.29, "learning_rate": 0.00019759242950746487, "loss": 2.1532, "step": 276 }, { "epoch": 0.3, "learning_rate": 0.0001975419538602389, "loss": 2.0988, "step": 278 }, { "epoch": 0.3, "learning_rate": 0.0001974909611494622, "loss": 1.937, "step": 280 }, { "epoch": 0.3, "learning_rate": 0.0001974394516454438, "loss": 1.9614, "step": 282 }, { "epoch": 0.3, "learning_rate": 0.00019738742562123225, "loss": 1.8518, "step": 284 }, { "epoch": 0.3, "learning_rate": 0.00019733488335261408, "loss": 1.7994, "step": 286 }, { "epoch": 0.31, "learning_rate": 0.00019728182511811245, "loss": 1.9275, "step": 288 }, { "epoch": 0.31, "learning_rate": 0.00019722825119898566, "loss": 1.9991, "step": 290 }, { "epoch": 0.31, "learning_rate": 0.0001971741618792255, "loss": 1.6737, "step": 292 }, { "epoch": 0.31, "learning_rate": 0.00019711955744555594, "loss": 1.4669, "step": 294 }, { "epoch": 0.32, "learning_rate": 0.0001970644381874316, "loss": 1.5002, "step": 296 }, { "epoch": 0.32, "learning_rate": 0.00019700880439703602, "loss": 1.9151, "step": 298 }, { "epoch": 0.32, "learning_rate": 0.00019695265636928032, "loss": 1.7493, "step": 300 }, { "epoch": 0.32, "learning_rate": 0.00019689599440180153, "loss": 2.2535, "step": 302 }, { "epoch": 0.32, "learning_rate": 0.00019683881879496107, "loss": 2.2597, "step": 304 }, { "epoch": 0.33, "learning_rate": 0.00019678112985184308, "loss": 2.3117, "step": 306 }, { "epoch": 0.33, "learning_rate": 0.00019672292787825292, "loss": 2.1624, "step": 308 }, { "epoch": 0.33, "learning_rate": 0.00019666421318271547, "loss": 2.1857, "step": 310 }, { "epoch": 0.33, "learning_rate": 0.0001966049860764735, "loss": 2.2101, "step": 312 }, { "epoch": 0.33, "learning_rate": 0.00019654524687348607, "loss": 2.1749, "step": 314 }, { "epoch": 0.34, "learning_rate": 0.00019648499589042676, "loss": 2.1557, "step": 316 }, { "epoch": 0.34, "learning_rate": 0.00019642423344668218, "loss": 2.2259, "step": 318 }, { "epoch": 0.34, "learning_rate": 0.00019636295986435003, "loss": 2.1723, "step": 320 }, { "epoch": 0.34, "learning_rate": 0.00019630117546823759, "loss": 2.294, "step": 322 }, { "epoch": 0.35, "learning_rate": 0.00019623888058585993, "loss": 2.2241, "step": 324 }, { "epoch": 0.35, "learning_rate": 0.00019617607554743818, "loss": 2.1496, "step": 326 }, { "epoch": 0.35, "learning_rate": 0.00019611276068589776, "loss": 2.1116, "step": 328 }, { "epoch": 0.35, "learning_rate": 0.00019604893633686662, "loss": 2.1723, "step": 330 }, { "epoch": 0.35, "learning_rate": 0.0001959846028386735, "loss": 2.0301, "step": 332 }, { "epoch": 0.36, "learning_rate": 0.00019591976053234608, "loss": 1.8651, "step": 334 }, { "epoch": 0.36, "learning_rate": 0.0001958544097616092, "loss": 1.8286, "step": 336 }, { "epoch": 0.36, "learning_rate": 0.00019578855087288302, "loss": 1.9081, "step": 338 }, { "epoch": 0.36, "learning_rate": 0.0001957221842152813, "loss": 1.8546, "step": 340 }, { "epoch": 0.36, "learning_rate": 0.0001956553101406093, "loss": 1.7914, "step": 342 }, { "epoch": 0.37, "learning_rate": 0.00019558792900336216, "loss": 1.7997, "step": 344 }, { "epoch": 0.37, "learning_rate": 0.00019552004116072294, "loss": 1.3596, "step": 346 }, { "epoch": 0.37, "learning_rate": 0.0001954516469725606, "loss": 1.6549, "step": 348 }, { "epoch": 0.37, "learning_rate": 0.00019538274680142834, "loss": 1.6592, "step": 350 }, { "epoch": 0.38, "learning_rate": 0.00019531334101256147, "loss": 2.2759, "step": 352 }, { "epoch": 0.38, "learning_rate": 0.00019524342997387557, "loss": 2.1805, "step": 354 }, { "epoch": 0.38, "learning_rate": 0.0001951730140559645, "loss": 2.2585, "step": 356 }, { "epoch": 0.38, "learning_rate": 0.00019510209363209847, "loss": 2.2573, "step": 358 }, { "epoch": 0.38, "learning_rate": 0.00019503066907822198, "loss": 2.3154, "step": 360 }, { "epoch": 0.39, "learning_rate": 0.000194958740772952, "loss": 2.2306, "step": 362 }, { "epoch": 0.39, "learning_rate": 0.00019488630909757579, "loss": 2.3178, "step": 364 }, { "epoch": 0.39, "learning_rate": 0.00019481337443604893, "loss": 2.2691, "step": 366 }, { "epoch": 0.39, "learning_rate": 0.0001947399371749933, "loss": 2.153, "step": 368 }, { "epoch": 0.39, "learning_rate": 0.00019466599770369509, "loss": 2.0471, "step": 370 }, { "epoch": 0.4, "learning_rate": 0.00019459155641410257, "loss": 2.3142, "step": 372 }, { "epoch": 0.4, "learning_rate": 0.00019451661370082426, "loss": 2.1241, "step": 374 }, { "epoch": 0.4, "learning_rate": 0.0001944411699611265, "loss": 2.1079, "step": 376 }, { "epoch": 0.4, "learning_rate": 0.0001943652255949317, "loss": 2.063, "step": 378 }, { "epoch": 0.41, "learning_rate": 0.00019428878100481606, "loss": 2.0062, "step": 380 }, { "epoch": 0.41, "learning_rate": 0.00019421183659600725, "loss": 1.9, "step": 382 }, { "epoch": 0.41, "learning_rate": 0.00019413439277638265, "loss": 1.9723, "step": 384 }, { "epoch": 0.41, "learning_rate": 0.00019405644995646696, "loss": 1.969, "step": 386 }, { "epoch": 0.41, "learning_rate": 0.00019397800854942986, "loss": 2.0202, "step": 388 }, { "epoch": 0.42, "learning_rate": 0.00019389906897108428, "loss": 1.9028, "step": 390 }, { "epoch": 0.42, "learning_rate": 0.0001938196316398837, "loss": 1.8172, "step": 392 }, { "epoch": 0.42, "learning_rate": 0.00019373969697692028, "loss": 1.7243, "step": 394 }, { "epoch": 0.42, "learning_rate": 0.00019365926540592247, "loss": 1.4621, "step": 396 }, { "epoch": 0.42, "learning_rate": 0.0001935783373532528, "loss": 1.7987, "step": 398 }, { "epoch": 0.43, "learning_rate": 0.00019349691324790555, "loss": 1.7935, "step": 400 }, { "epoch": 0.43, "learning_rate": 0.0001934149935215047, "loss": 2.2858, "step": 402 }, { "epoch": 0.43, "learning_rate": 0.00019333257860830135, "loss": 2.3798, "step": 404 }, { "epoch": 0.43, "learning_rate": 0.00019324966894517155, "loss": 2.1534, "step": 406 }, { "epoch": 0.43, "learning_rate": 0.00019316626497161408, "loss": 2.2648, "step": 408 }, { "epoch": 0.44, "learning_rate": 0.00019308236712974795, "loss": 2.2946, "step": 410 }, { "epoch": 0.44, "learning_rate": 0.0001929979758643102, "loss": 2.319, "step": 412 }, { "epoch": 0.44, "learning_rate": 0.00019291309162265338, "loss": 2.2271, "step": 414 }, { "epoch": 0.44, "learning_rate": 0.0001928277148547434, "loss": 2.0746, "step": 416 }, { "epoch": 0.45, "learning_rate": 0.00019274184601315687, "loss": 2.1231, "step": 418 }, { "epoch": 0.45, "learning_rate": 0.000192655485553079, "loss": 2.3176, "step": 420 }, { "epoch": 0.45, "learning_rate": 0.00019256863393230094, "loss": 2.1657, "step": 422 }, { "epoch": 0.45, "learning_rate": 0.00019248129161121748, "loss": 2.2696, "step": 424 }, { "epoch": 0.45, "learning_rate": 0.0001923934590528246, "loss": 2.2132, "step": 426 }, { "epoch": 0.46, "learning_rate": 0.00019230513672271698, "loss": 2.104, "step": 428 }, { "epoch": 0.46, "learning_rate": 0.0001922163250890855, "loss": 1.9319, "step": 430 }, { "epoch": 0.46, "learning_rate": 0.0001921270246227149, "loss": 1.9152, "step": 432 }, { "epoch": 0.46, "learning_rate": 0.00019203723579698108, "loss": 1.8615, "step": 434 }, { "epoch": 0.46, "learning_rate": 0.00019194695908784882, "loss": 2.0695, "step": 436 }, { "epoch": 0.47, "learning_rate": 0.0001918561949738691, "loss": 1.8626, "step": 438 }, { "epoch": 0.47, "learning_rate": 0.0001917649439361765, "loss": 1.7538, "step": 440 }, { "epoch": 0.47, "learning_rate": 0.00019167320645848695, "loss": 2.0082, "step": 442 }, { "epoch": 0.47, "learning_rate": 0.00019158098302709476, "loss": 1.6011, "step": 444 }, { "epoch": 0.48, "learning_rate": 0.00019148827413087034, "loss": 1.5286, "step": 446 }, { "epoch": 0.48, "learning_rate": 0.00019139508026125754, "loss": 1.8795, "step": 448 }, { "epoch": 0.48, "learning_rate": 0.000191301401912271, "loss": 1.7509, "step": 450 }, { "epoch": 0.48, "learning_rate": 0.00019120723958049353, "loss": 2.3155, "step": 452 }, { "epoch": 0.48, "learning_rate": 0.0001911125937650736, "loss": 2.2125, "step": 454 }, { "epoch": 0.49, "learning_rate": 0.00019101746496772242, "loss": 2.2262, "step": 456 }, { "epoch": 0.49, "learning_rate": 0.0001909218536927116, "loss": 2.3185, "step": 458 }, { "epoch": 0.49, "learning_rate": 0.0001908257604468703, "loss": 2.2496, "step": 460 }, { "epoch": 0.49, "learning_rate": 0.00019072918573958254, "loss": 2.3569, "step": 462 }, { "epoch": 0.49, "learning_rate": 0.0001906321300827846, "loss": 2.1697, "step": 464 }, { "epoch": 0.5, "learning_rate": 0.00019053459399096215, "loss": 2.197, "step": 466 }, { "epoch": 0.5, "learning_rate": 0.00019043657798114766, "loss": 1.9929, "step": 468 }, { "epoch": 0.5, "learning_rate": 0.00019033808257291768, "loss": 2.1488, "step": 470 }, { "epoch": 0.5, "learning_rate": 0.0001902391082883899, "loss": 2.2224, "step": 472 }, { "epoch": 0.51, "learning_rate": 0.00019013965565222062, "loss": 2.0495, "step": 474 }, { "epoch": 0.51, "learning_rate": 0.00019003972519160178, "loss": 2.0964, "step": 476 }, { "epoch": 0.51, "learning_rate": 0.0001899393174362582, "loss": 1.8708, "step": 478 }, { "epoch": 0.51, "learning_rate": 0.00018983843291844492, "loss": 1.9741, "step": 480 }, { "epoch": 0.51, "learning_rate": 0.00018973707217294415, "loss": 1.9908, "step": 482 }, { "epoch": 0.52, "learning_rate": 0.00018963523573706264, "loss": 1.7973, "step": 484 }, { "epoch": 0.52, "learning_rate": 0.0001895329241506287, "loss": 1.9293, "step": 486 }, { "epoch": 0.52, "learning_rate": 0.00018943013795598944, "loss": 1.94, "step": 488 }, { "epoch": 0.52, "learning_rate": 0.00018932687769800767, "loss": 1.9435, "step": 490 }, { "epoch": 0.52, "learning_rate": 0.00018922314392405944, "loss": 1.7907, "step": 492 }, { "epoch": 0.53, "learning_rate": 0.00018911893718403063, "loss": 1.6344, "step": 494 }, { "epoch": 0.53, "learning_rate": 0.00018901425803031447, "loss": 1.3563, "step": 496 }, { "epoch": 0.53, "learning_rate": 0.00018890910701780826, "loss": 1.7, "step": 498 }, { "epoch": 0.53, "learning_rate": 0.00018880348470391077, "loss": 1.7682, "step": 500 }, { "epoch": 0.54, "learning_rate": 0.00018869739164851889, "loss": 2.2041, "step": 502 }, { "epoch": 0.54, "learning_rate": 0.00018859082841402513, "loss": 2.237, "step": 504 }, { "epoch": 0.54, "learning_rate": 0.0001884837955653142, "loss": 2.1694, "step": 506 }, { "epoch": 0.54, "learning_rate": 0.00018837629366976025, "loss": 2.2373, "step": 508 }, { "epoch": 0.54, "learning_rate": 0.0001882683232972239, "loss": 2.2511, "step": 510 }, { "epoch": 0.55, "learning_rate": 0.000188159885020049, "loss": 2.2474, "step": 512 }, { "epoch": 0.55, "learning_rate": 0.00018805097941305984, "loss": 2.1938, "step": 514 }, { "epoch": 0.55, "learning_rate": 0.00018794160705355796, "loss": 2.0874, "step": 516 }, { "epoch": 0.55, "learning_rate": 0.00018783176852131908, "loss": 2.2795, "step": 518 }, { "epoch": 0.55, "learning_rate": 0.00018772146439859015, "loss": 2.179, "step": 520 }, { "epoch": 0.56, "learning_rate": 0.00018761069527008613, "loss": 2.0959, "step": 522 }, { "epoch": 0.56, "learning_rate": 0.00018749946172298698, "loss": 2.3345, "step": 524 }, { "epoch": 0.56, "learning_rate": 0.00018738776434693447, "loss": 2.0522, "step": 526 }, { "epoch": 0.56, "learning_rate": 0.00018727560373402917, "loss": 2.1055, "step": 528 }, { "epoch": 0.56, "learning_rate": 0.00018716298047882714, "loss": 2.0651, "step": 530 }, { "epoch": 0.57, "learning_rate": 0.00018704989517833695, "loss": 2.0961, "step": 532 }, { "epoch": 0.57, "learning_rate": 0.0001869363484320164, "loss": 1.9879, "step": 534 }, { "epoch": 0.57, "learning_rate": 0.00018682234084176945, "loss": 2.0118, "step": 536 }, { "epoch": 0.57, "learning_rate": 0.0001867078730119429, "loss": 1.9059, "step": 538 }, { "epoch": 0.58, "learning_rate": 0.00018659294554932324, "loss": 1.8227, "step": 540 }, { "epoch": 0.58, "learning_rate": 0.00018647755906313348, "loss": 1.8794, "step": 542 }, { "epoch": 0.58, "learning_rate": 0.0001863617141650299, "loss": 1.6243, "step": 544 }, { "epoch": 0.58, "learning_rate": 0.00018624541146909873, "loss": 1.6429, "step": 546 }, { "epoch": 0.58, "learning_rate": 0.00018612865159185304, "loss": 2.03, "step": 548 }, { "epoch": 0.59, "learning_rate": 0.0001860114351522293, "loss": 1.7897, "step": 550 }, { "epoch": 0.59, "learning_rate": 0.00018589376277158425, "loss": 2.2226, "step": 552 }, { "epoch": 0.59, "learning_rate": 0.00018577563507369153, "loss": 2.2998, "step": 554 }, { "epoch": 0.59, "learning_rate": 0.00018565705268473837, "loss": 2.1385, "step": 556 }, { "epoch": 0.59, "learning_rate": 0.0001855380162333223, "loss": 2.1793, "step": 558 }, { "epoch": 0.6, "learning_rate": 0.0001854185263504478, "loss": 2.2456, "step": 560 }, { "epoch": 0.6, "learning_rate": 0.00018529858366952298, "loss": 2.162, "step": 562 }, { "epoch": 0.6, "learning_rate": 0.00018517818882635617, "loss": 2.1046, "step": 564 }, { "epoch": 0.6, "learning_rate": 0.0001850573424591526, "loss": 2.1761, "step": 566 }, { "epoch": 0.61, "learning_rate": 0.00018493604520851097, "loss": 2.2593, "step": 568 }, { "epoch": 0.61, "learning_rate": 0.00018481429771742018, "loss": 2.2067, "step": 570 }, { "epoch": 0.61, "learning_rate": 0.00018469210063125572, "loss": 2.1257, "step": 572 }, { "epoch": 0.61, "learning_rate": 0.00018456945459777643, "loss": 2.2823, "step": 574 }, { "epoch": 0.61, "learning_rate": 0.0001844463602671209, "loss": 2.2942, "step": 576 }, { "epoch": 0.62, "learning_rate": 0.0001843228182918042, "loss": 2.1126, "step": 578 }, { "epoch": 0.62, "learning_rate": 0.0001841988293267143, "loss": 2.2014, "step": 580 }, { "epoch": 0.62, "learning_rate": 0.00018407439402910858, "loss": 2.0244, "step": 582 }, { "epoch": 0.62, "learning_rate": 0.00018394951305861055, "loss": 1.9842, "step": 584 }, { "epoch": 0.62, "learning_rate": 0.00018382418707720604, "loss": 1.8637, "step": 586 }, { "epoch": 0.63, "learning_rate": 0.00018369841674923998, "loss": 1.8229, "step": 588 }, { "epoch": 0.63, "learning_rate": 0.00018357220274141262, "loss": 1.8023, "step": 590 }, { "epoch": 0.63, "learning_rate": 0.00018344554572277628, "loss": 1.6493, "step": 592 }, { "epoch": 0.63, "learning_rate": 0.00018331844636473152, "loss": 1.4272, "step": 594 }, { "epoch": 0.64, "learning_rate": 0.00018319090534102381, "loss": 1.5226, "step": 596 }, { "epoch": 0.64, "learning_rate": 0.0001830629233277398, "loss": 1.5868, "step": 598 }, { "epoch": 0.64, "learning_rate": 0.00018293450100330375, "loss": 1.6859, "step": 600 }, { "epoch": 0.64, "learning_rate": 0.00018280563904847415, "loss": 2.2781, "step": 602 }, { "epoch": 0.64, "learning_rate": 0.0001826763381463398, "loss": 2.2743, "step": 604 }, { "epoch": 0.65, "learning_rate": 0.0001825465989823164, "loss": 2.2974, "step": 606 }, { "epoch": 0.65, "learning_rate": 0.00018241642224414272, "loss": 2.089, "step": 608 }, { "epoch": 0.65, "learning_rate": 0.00018228580862187727, "loss": 2.3559, "step": 610 }, { "epoch": 0.65, "learning_rate": 0.00018215475880789433, "loss": 2.2152, "step": 612 }, { "epoch": 0.65, "learning_rate": 0.00018202327349688043, "loss": 2.1726, "step": 614 }, { "epoch": 0.66, "learning_rate": 0.00018189135338583066, "loss": 2.2242, "step": 616 }, { "epoch": 0.66, "learning_rate": 0.00018175899917404492, "loss": 2.2506, "step": 618 }, { "epoch": 0.66, "learning_rate": 0.00018162621156312433, "loss": 2.1324, "step": 620 }, { "epoch": 0.66, "learning_rate": 0.00018149299125696735, "loss": 2.006, "step": 622 }, { "epoch": 0.67, "learning_rate": 0.00018135933896176612, "loss": 2.2178, "step": 624 }, { "epoch": 0.67, "learning_rate": 0.00018122525538600282, "loss": 2.0817, "step": 626 }, { "epoch": 0.67, "learning_rate": 0.00018109074124044572, "loss": 2.0006, "step": 628 }, { "epoch": 0.67, "learning_rate": 0.00018095579723814557, "loss": 1.9424, "step": 630 }, { "epoch": 0.67, "learning_rate": 0.00018082042409443174, "loss": 1.9661, "step": 632 }, { "epoch": 0.68, "learning_rate": 0.00018068462252690843, "loss": 1.8678, "step": 634 }, { "epoch": 0.68, "learning_rate": 0.00018054839325545096, "loss": 1.8343, "step": 636 }, { "epoch": 0.68, "learning_rate": 0.0001804117370022018, "loss": 1.8003, "step": 638 }, { "epoch": 0.68, "learning_rate": 0.0001802746544915669, "loss": 1.7928, "step": 640 }, { "epoch": 0.68, "learning_rate": 0.00018013714645021166, "loss": 1.8922, "step": 642 }, { "epoch": 0.69, "learning_rate": 0.00017999921360705733, "loss": 1.6961, "step": 644 }, { "epoch": 0.69, "learning_rate": 0.0001798608566932769, "loss": 1.3043, "step": 646 }, { "epoch": 0.69, "learning_rate": 0.00017972207644229138, "loss": 1.8295, "step": 648 }, { "epoch": 0.69, "learning_rate": 0.0001795828735897658, "loss": 1.7105, "step": 650 }, { "epoch": 0.69, "learning_rate": 0.00017944324887360553, "loss": 2.1881, "step": 652 }, { "epoch": 0.7, "learning_rate": 0.000179303203033952, "loss": 2.2434, "step": 654 }, { "epoch": 0.7, "learning_rate": 0.0001791627368131792, "loss": 2.3566, "step": 656 }, { "epoch": 0.7, "learning_rate": 0.00017902185095588927, "loss": 2.3222, "step": 658 }, { "epoch": 0.7, "learning_rate": 0.00017888054620890915, "loss": 2.181, "step": 660 }, { "epoch": 0.71, "learning_rate": 0.00017873882332128597, "loss": 2.3261, "step": 662 }, { "epoch": 0.71, "learning_rate": 0.00017859668304428365, "loss": 2.2798, "step": 664 }, { "epoch": 0.71, "learning_rate": 0.00017845412613137844, "loss": 2.0487, "step": 666 }, { "epoch": 0.71, "learning_rate": 0.00017831115333825535, "loss": 2.1863, "step": 668 }, { "epoch": 0.71, "learning_rate": 0.00017816776542280377, "loss": 2.1308, "step": 670 }, { "epoch": 0.72, "learning_rate": 0.0001780239631451138, "loss": 2.3122, "step": 672 }, { "epoch": 0.72, "learning_rate": 0.0001778797472674719, "loss": 2.2757, "step": 674 }, { "epoch": 0.72, "learning_rate": 0.00017773511855435708, "loss": 2.0241, "step": 676 }, { "epoch": 0.72, "learning_rate": 0.00017759007777243672, "loss": 1.9896, "step": 678 }, { "epoch": 0.72, "learning_rate": 0.00017744462569056256, "loss": 1.9607, "step": 680 }, { "epoch": 0.73, "learning_rate": 0.00017729876307976663, "loss": 1.9798, "step": 682 }, { "epoch": 0.73, "learning_rate": 0.00017715249071325717, "loss": 1.9075, "step": 684 }, { "epoch": 0.73, "learning_rate": 0.00017700580936641443, "loss": 2.0141, "step": 686 }, { "epoch": 0.73, "learning_rate": 0.00017685871981678672, "loss": 1.9238, "step": 688 }, { "epoch": 0.74, "learning_rate": 0.00017671122284408614, "loss": 1.9244, "step": 690 }, { "epoch": 0.74, "learning_rate": 0.00017656331923018457, "loss": 1.6621, "step": 692 }, { "epoch": 0.74, "learning_rate": 0.00017641500975910945, "loss": 1.7402, "step": 694 }, { "epoch": 0.74, "learning_rate": 0.0001762662952170396, "loss": 1.3913, "step": 696 }, { "epoch": 0.74, "learning_rate": 0.0001761171763923012, "loss": 1.5825, "step": 698 }, { "epoch": 0.75, "learning_rate": 0.0001759676540753634, "loss": 1.6809, "step": 700 }, { "epoch": 0.75, "learning_rate": 0.00017581772905883423, "loss": 2.3459, "step": 702 }, { "epoch": 0.75, "learning_rate": 0.00017566740213745648, "loss": 2.1963, "step": 704 }, { "epoch": 0.75, "learning_rate": 0.00017551667410810337, "loss": 2.1334, "step": 706 }, { "epoch": 0.75, "learning_rate": 0.00017536554576977442, "loss": 2.3778, "step": 708 }, { "epoch": 0.76, "learning_rate": 0.00017521401792359108, "loss": 2.183, "step": 710 }, { "epoch": 0.76, "learning_rate": 0.0001750620913727926, "loss": 2.1674, "step": 712 }, { "epoch": 0.76, "learning_rate": 0.00017490976692273176, "loss": 2.3534, "step": 714 }, { "epoch": 0.76, "learning_rate": 0.00017475704538087055, "loss": 2.1677, "step": 716 }, { "epoch": 0.77, "learning_rate": 0.00017460392755677592, "loss": 2.1642, "step": 718 }, { "epoch": 0.77, "learning_rate": 0.0001744504142621155, "loss": 2.1983, "step": 720 }, { "epoch": 0.77, "learning_rate": 0.0001742965063106533, "loss": 2.1661, "step": 722 }, { "epoch": 0.77, "learning_rate": 0.0001741422045182453, "loss": 2.2135, "step": 724 }, { "epoch": 0.77, "learning_rate": 0.00017398750970283532, "loss": 2.1288, "step": 726 }, { "epoch": 0.78, "learning_rate": 0.00017383242268445047, "loss": 1.9906, "step": 728 }, { "epoch": 0.78, "learning_rate": 0.00017367694428519696, "loss": 1.9031, "step": 730 }, { "epoch": 0.78, "learning_rate": 0.00017352107532925569, "loss": 1.831, "step": 732 }, { "epoch": 0.78, "learning_rate": 0.00017336481664287777, "loss": 1.9116, "step": 734 }, { "epoch": 0.78, "learning_rate": 0.00017320816905438044, "loss": 1.8241, "step": 736 }, { "epoch": 0.79, "learning_rate": 0.0001730511333941423, "loss": 1.8745, "step": 738 }, { "epoch": 0.79, "learning_rate": 0.00017289371049459922, "loss": 1.8306, "step": 740 }, { "epoch": 0.79, "learning_rate": 0.00017273590119023968, "loss": 1.6032, "step": 742 }, { "epoch": 0.79, "learning_rate": 0.00017257770631760058, "loss": 1.3571, "step": 744 }, { "epoch": 0.8, "learning_rate": 0.00017241912671526265, "loss": 1.6282, "step": 746 }, { "epoch": 0.8, "learning_rate": 0.00017226016322384604, "loss": 1.5942, "step": 748 }, { "epoch": 0.8, "learning_rate": 0.00017210081668600586, "loss": 1.6988, "step": 750 }, { "epoch": 0.8, "learning_rate": 0.00017194108794642775, "loss": 2.334, "step": 752 }, { "epoch": 0.8, "learning_rate": 0.00017178097785182337, "loss": 2.2731, "step": 754 }, { "epoch": 0.81, "learning_rate": 0.0001716204872509259, "loss": 2.1642, "step": 756 }, { "epoch": 0.81, "learning_rate": 0.00017145961699448559, "loss": 2.4107, "step": 758 }, { "epoch": 0.81, "learning_rate": 0.00017129836793526517, "loss": 2.2767, "step": 760 }, { "epoch": 0.81, "learning_rate": 0.00017113674092803543, "loss": 2.3137, "step": 762 }, { "epoch": 0.81, "learning_rate": 0.00017097473682957067, "loss": 2.3095, "step": 764 }, { "epoch": 0.82, "learning_rate": 0.00017081235649864395, "loss": 2.1327, "step": 766 }, { "epoch": 0.82, "learning_rate": 0.00017064960079602297, "loss": 2.2666, "step": 768 }, { "epoch": 0.82, "learning_rate": 0.00017048647058446505, "loss": 2.2871, "step": 770 }, { "epoch": 0.82, "learning_rate": 0.00017032296672871283, "loss": 2.2055, "step": 772 }, { "epoch": 0.82, "learning_rate": 0.00017015909009548966, "loss": 2.1066, "step": 774 }, { "epoch": 0.83, "learning_rate": 0.00016999484155349483, "loss": 2.1887, "step": 776 }, { "epoch": 0.83, "learning_rate": 0.00016983022197339923, "loss": 2.1014, "step": 778 }, { "epoch": 0.83, "learning_rate": 0.00016966523222784058, "loss": 2.1138, "step": 780 }, { "epoch": 0.83, "learning_rate": 0.00016949987319141868, "loss": 1.8663, "step": 782 }, { "epoch": 0.84, "learning_rate": 0.0001693341457406911, "loss": 1.7421, "step": 784 }, { "epoch": 0.84, "learning_rate": 0.00016916805075416823, "loss": 1.8293, "step": 786 }, { "epoch": 0.84, "learning_rate": 0.0001690015891123088, "loss": 1.8993, "step": 788 }, { "epoch": 0.84, "learning_rate": 0.00016883476169751518, "loss": 1.8486, "step": 790 }, { "epoch": 0.84, "learning_rate": 0.0001686675693941286, "loss": 1.7033, "step": 792 }, { "epoch": 0.85, "learning_rate": 0.00016850001308842458, "loss": 1.5906, "step": 794 }, { "epoch": 0.85, "learning_rate": 0.00016833209366860826, "loss": 1.4477, "step": 796 }, { "epoch": 0.85, "learning_rate": 0.00016816381202480946, "loss": 1.8339, "step": 798 }, { "epoch": 0.85, "learning_rate": 0.00016799516904907828, "loss": 1.6691, "step": 800 }, { "epoch": 0.85, "learning_rate": 0.00016782616563538016, "loss": 2.4258, "step": 802 }, { "epoch": 0.86, "learning_rate": 0.0001676568026795912, "loss": 2.3445, "step": 804 }, { "epoch": 0.86, "learning_rate": 0.00016748708107949337, "loss": 2.137, "step": 806 }, { "epoch": 0.86, "learning_rate": 0.00016731700173476988, "loss": 2.2656, "step": 808 }, { "epoch": 0.86, "learning_rate": 0.00016714656554700022, "loss": 2.2945, "step": 810 }, { "epoch": 0.87, "learning_rate": 0.0001669757734196556, "loss": 2.0641, "step": 812 }, { "epoch": 0.87, "learning_rate": 0.0001668046262580939, "loss": 2.0698, "step": 814 }, { "epoch": 0.87, "learning_rate": 0.00016663312496955517, "loss": 2.1787, "step": 816 }, { "epoch": 0.87, "learning_rate": 0.00016646127046315653, "loss": 2.0295, "step": 818 }, { "epoch": 0.87, "learning_rate": 0.0001662890636498875, "loss": 2.2736, "step": 820 }, { "epoch": 0.88, "learning_rate": 0.00016611650544260526, "loss": 2.1205, "step": 822 }, { "epoch": 0.88, "learning_rate": 0.00016594359675602962, "loss": 2.1249, "step": 824 }, { "epoch": 0.88, "learning_rate": 0.00016577033850673824, "loss": 1.9355, "step": 826 }, { "epoch": 0.88, "learning_rate": 0.00016559673161316188, "loss": 1.8423, "step": 828 }, { "epoch": 0.88, "learning_rate": 0.00016542277699557934, "loss": 2.0979, "step": 830 }, { "epoch": 0.89, "learning_rate": 0.00016524847557611278, "loss": 1.7747, "step": 832 }, { "epoch": 0.89, "learning_rate": 0.00016507382827872264, "loss": 1.7137, "step": 834 }, { "epoch": 0.89, "learning_rate": 0.0001648988360292029, "loss": 1.9926, "step": 836 }, { "epoch": 0.89, "learning_rate": 0.0001647234997551761, "loss": 2.033, "step": 838 }, { "epoch": 0.9, "learning_rate": 0.00016454782038608835, "loss": 1.7298, "step": 840 }, { "epoch": 0.9, "learning_rate": 0.00016437179885320466, "loss": 1.8326, "step": 842 }, { "epoch": 0.9, "learning_rate": 0.00016419543608960367, "loss": 1.5744, "step": 844 }, { "epoch": 0.9, "learning_rate": 0.00016401873303017287, "loss": 1.471, "step": 846 }, { "epoch": 0.9, "learning_rate": 0.00016384169061160376, "loss": 1.7169, "step": 848 }, { "epoch": 0.91, "learning_rate": 0.00016366430977238667, "loss": 1.5636, "step": 850 }, { "epoch": 0.91, "learning_rate": 0.00016348659145280585, "loss": 2.3071, "step": 852 }, { "epoch": 0.91, "learning_rate": 0.00016330853659493456, "loss": 2.1993, "step": 854 }, { "epoch": 0.91, "learning_rate": 0.00016313014614263003, "loss": 2.1583, "step": 856 }, { "epoch": 0.91, "learning_rate": 0.0001629514210415284, "loss": 2.1996, "step": 858 }, { "epoch": 0.92, "learning_rate": 0.00016277236223903986, "loss": 2.309, "step": 860 }, { "epoch": 0.92, "learning_rate": 0.00016259297068434343, "loss": 2.2155, "step": 862 }, { "epoch": 0.92, "learning_rate": 0.0001624132473283821, "loss": 2.2474, "step": 864 }, { "epoch": 0.92, "learning_rate": 0.00016223319312385766, "loss": 2.2034, "step": 866 }, { "epoch": 0.93, "learning_rate": 0.00016205280902522576, "loss": 2.2016, "step": 868 }, { "epoch": 0.93, "learning_rate": 0.00016187209598869074, "loss": 2.133, "step": 870 }, { "epoch": 0.93, "learning_rate": 0.00016169105497220064, "loss": 2.1746, "step": 872 }, { "epoch": 0.93, "learning_rate": 0.00016150968693544215, "loss": 2.0806, "step": 874 }, { "epoch": 0.93, "learning_rate": 0.00016132799283983542, "loss": 2.2318, "step": 876 }, { "epoch": 0.94, "learning_rate": 0.000161145973648529, "loss": 2.0729, "step": 878 }, { "epoch": 0.94, "learning_rate": 0.0001609636303263948, "loss": 1.8199, "step": 880 }, { "epoch": 0.94, "learning_rate": 0.00016078096384002292, "loss": 2.0973, "step": 882 }, { "epoch": 0.94, "learning_rate": 0.00016059797515771652, "loss": 1.7302, "step": 884 }, { "epoch": 0.94, "learning_rate": 0.00016041466524948663, "loss": 1.9608, "step": 886 }, { "epoch": 0.95, "learning_rate": 0.00016023103508704725, "loss": 1.8483, "step": 888 }, { "epoch": 0.95, "learning_rate": 0.00016004708564380985, "loss": 1.9501, "step": 890 }, { "epoch": 0.95, "learning_rate": 0.0001598628178948785, "loss": 1.6526, "step": 892 }, { "epoch": 0.95, "learning_rate": 0.0001596782328170445, "loss": 1.4811, "step": 894 }, { "epoch": 0.96, "learning_rate": 0.00015949333138878138, "loss": 1.4048, "step": 896 }, { "epoch": 0.96, "learning_rate": 0.00015930811459023957, "loss": 1.5489, "step": 898 }, { "epoch": 0.96, "learning_rate": 0.00015912258340324126, "loss": 1.7197, "step": 900 }, { "epoch": 0.96, "learning_rate": 0.00015893673881127524, "loss": 2.1217, "step": 902 }, { "epoch": 0.96, "learning_rate": 0.00015875058179949151, "loss": 2.2086, "step": 904 }, { "epoch": 0.97, "learning_rate": 0.00015856411335469638, "loss": 2.094, "step": 906 }, { "epoch": 0.97, "learning_rate": 0.00015837733446534688, "loss": 2.3415, "step": 908 }, { "epoch": 0.97, "learning_rate": 0.00015819024612154575, "loss": 2.2378, "step": 910 }, { "epoch": 0.97, "learning_rate": 0.00015800284931503618, "loss": 2.1351, "step": 912 }, { "epoch": 0.97, "learning_rate": 0.0001578151450391964, "loss": 2.1795, "step": 914 }, { "epoch": 0.98, "learning_rate": 0.00015762713428903454, "loss": 2.0282, "step": 916 }, { "epoch": 0.98, "learning_rate": 0.00015743881806118342, "loss": 2.2858, "step": 918 }, { "epoch": 0.98, "learning_rate": 0.00015725019735389503, "loss": 2.0929, "step": 920 }, { "epoch": 0.98, "learning_rate": 0.00015706127316703557, "loss": 1.9913, "step": 922 }, { "epoch": 0.98, "learning_rate": 0.0001568720465020798, "loss": 2.0128, "step": 924 }, { "epoch": 0.99, "learning_rate": 0.00015668251836210595, "loss": 1.8792, "step": 926 }, { "epoch": 0.99, "learning_rate": 0.0001564926897517904, "loss": 1.6978, "step": 928 }, { "epoch": 0.99, "learning_rate": 0.0001563025616774022, "loss": 2.0335, "step": 930 }, { "epoch": 0.99, "learning_rate": 0.0001561121351467979, "loss": 1.845, "step": 932 }, { "epoch": 1.0, "learning_rate": 0.00015592141116941628, "loss": 1.3927, "step": 934 }, { "epoch": 1.0, "learning_rate": 0.00015573039075627256, "loss": 1.6165, "step": 936 }, { "epoch": 1.0, "learning_rate": 0.00015553907491995365, "loss": 1.6737, "step": 938 } ], "logging_steps": 2, "max_steps": 2814, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.51669593729024e+16, "trial_name": null, "trial_params": null }