{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.983219390926041, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.4444444444444447e-05, "loss": 2.7675, "step": 2 }, { "epoch": 0.04, "learning_rate": 8.888888888888889e-05, "loss": 2.6931, "step": 4 }, { "epoch": 0.06, "learning_rate": 0.00013333333333333334, "loss": 2.6733, "step": 6 }, { "epoch": 0.08, "learning_rate": 0.00017777777777777779, "loss": 2.5515, "step": 8 }, { "epoch": 0.1, "learning_rate": 0.00019999417253661235, "loss": 2.4826, "step": 10 }, { "epoch": 0.12, "learning_rate": 0.00019994755690455152, "loss": 2.5387, "step": 12 }, { "epoch": 0.14, "learning_rate": 0.0001998543473718677, "loss": 2.4596, "step": 14 }, { "epoch": 0.16, "learning_rate": 0.00019971458739130598, "loss": 2.4936, "step": 16 }, { "epoch": 0.18, "learning_rate": 0.0001995283421166614, "loss": 2.1189, "step": 18 }, { "epoch": 0.2, "learning_rate": 0.00019929569837240564, "loss": 1.9708, "step": 20 }, { "epoch": 0.22, "learning_rate": 0.00019901676461321068, "loss": 2.0963, "step": 22 }, { "epoch": 0.24, "learning_rate": 0.00019869167087338907, "loss": 2.0887, "step": 24 }, { "epoch": 0.26, "learning_rate": 0.00019832056870627417, "loss": 2.218, "step": 26 }, { "epoch": 0.28, "learning_rate": 0.00019790363111356837, "loss": 2.358, "step": 28 }, { "epoch": 0.3, "learning_rate": 0.00019744105246469263, "loss": 2.334, "step": 30 }, { "epoch": 0.32, "learning_rate": 0.00019693304840617457, "loss": 2.1776, "step": 32 }, { "epoch": 0.34, "learning_rate": 0.00019637985576111778, "loss": 2.3149, "step": 34 }, { "epoch": 0.36, "learning_rate": 0.00019578173241879872, "loss": 2.1172, "step": 36 }, { "epoch": 0.38, "learning_rate": 0.00019513895721444286, "loss": 2.183, "step": 38 }, { "epoch": 0.4, "learning_rate": 0.00019445182979923654, "loss": 2.2688, "step": 40 }, { "epoch": 0.42, "learning_rate": 0.00019372067050063438, "loss": 2.2487, "step": 42 }, { "epoch": 0.44, "learning_rate": 0.00019294582017302797, "loss": 2.1218, "step": 44 }, { "epoch": 0.46, "learning_rate": 0.0001921276400388451, "loss": 2.1302, "step": 46 }, { "epoch": 0.48, "learning_rate": 0.00019126651152015403, "loss": 2.1458, "step": 48 }, { "epoch": 0.5, "learning_rate": 0.00019036283606085053, "loss": 2.1425, "step": 50 }, { "epoch": 0.52, "learning_rate": 0.00018941703493951164, "loss": 2.2523, "step": 52 }, { "epoch": 0.54, "learning_rate": 0.00018842954907300236, "loss": 2.1411, "step": 54 }, { "epoch": 0.56, "learning_rate": 0.0001874008388109276, "loss": 2.2491, "step": 56 }, { "epoch": 0.58, "learning_rate": 0.00018633138372102468, "loss": 2.1121, "step": 58 }, { "epoch": 0.6, "learning_rate": 0.00018522168236559695, "loss": 2.2793, "step": 60 }, { "epoch": 0.62, "learning_rate": 0.00018407225206909208, "loss": 2.1577, "step": 62 }, { "epoch": 0.64, "learning_rate": 0.00018288362867693414, "loss": 2.1961, "step": 64 }, { "epoch": 0.66, "learning_rate": 0.0001816563663057211, "loss": 2.1825, "step": 66 }, { "epoch": 0.68, "learning_rate": 0.000180391037084905, "loss": 2.0917, "step": 68 }, { "epoch": 0.7, "learning_rate": 0.00017908823089007457, "loss": 2.0896, "step": 70 }, { "epoch": 0.72, "learning_rate": 0.00017774855506796496, "loss": 2.0877, "step": 72 }, { "epoch": 0.74, "learning_rate": 0.0001763726341533227, "loss": 2.1369, "step": 74 }, { "epoch": 0.76, "learning_rate": 0.0001749611095777581, "loss": 2.0917, "step": 76 }, { "epoch": 0.78, "learning_rate": 0.00017351463937072004, "loss": 2.3862, "step": 78 }, { "epoch": 0.8, "learning_rate": 0.000172033897852734, "loss": 2.1755, "step": 80 }, { "epoch": 0.82, "learning_rate": 0.0001705195753210446, "loss": 2.1165, "step": 82 }, { "epoch": 0.84, "learning_rate": 0.00016897237772781044, "loss": 2.1959, "step": 84 }, { "epoch": 0.86, "learning_rate": 0.00016739302635100108, "loss": 2.1884, "step": 86 }, { "epoch": 0.88, "learning_rate": 0.00016578225745814907, "loss": 2.2531, "step": 88 }, { "epoch": 0.89, "learning_rate": 0.000164140821963114, "loss": 2.2549, "step": 90 }, { "epoch": 0.91, "learning_rate": 0.00016246948507601914, "loss": 2.0919, "step": 92 }, { "epoch": 0.93, "learning_rate": 0.0001607690259465229, "loss": 2.0073, "step": 94 }, { "epoch": 0.95, "learning_rate": 0.00015904023730059228, "loss": 2.1638, "step": 96 }, { "epoch": 0.97, "learning_rate": 0.000157283925070947, "loss": 2.0956, "step": 98 }, { "epoch": 0.99, "learning_rate": 0.000155500908021347, "loss": 1.9043, "step": 100 }, { "epoch": 1.01, "learning_rate": 0.0001536920173648984, "loss": 2.0836, "step": 102 }, { "epoch": 1.03, "learning_rate": 0.0001518580963765555, "loss": 2.1632, "step": 104 }, { "epoch": 1.05, "learning_rate": 0.00015000000000000001, "loss": 2.0727, "step": 106 }, { "epoch": 1.07, "learning_rate": 0.00014811859444908052, "loss": 2.2505, "step": 108 }, { "epoch": 1.09, "learning_rate": 0.0001462147568039977, "loss": 2.1901, "step": 110 }, { "epoch": 1.11, "learning_rate": 0.00014428937460242417, "loss": 2.1774, "step": 112 }, { "epoch": 1.13, "learning_rate": 0.00014234334542574906, "loss": 2.1062, "step": 114 }, { "epoch": 1.15, "learning_rate": 0.00014037757648064018, "loss": 2.0187, "step": 116 }, { "epoch": 1.17, "learning_rate": 0.00013839298417611963, "loss": 2.0678, "step": 118 }, { "epoch": 1.19, "learning_rate": 0.00013639049369634876, "loss": 2.0362, "step": 120 }, { "epoch": 1.21, "learning_rate": 0.00013437103856932264, "loss": 2.0308, "step": 122 }, { "epoch": 1.23, "learning_rate": 0.00013233556023167485, "loss": 1.9568, "step": 124 }, { "epoch": 1.25, "learning_rate": 0.00013028500758979506, "loss": 2.1364, "step": 126 }, { "epoch": 1.27, "learning_rate": 0.00012822033657746478, "loss": 2.2107, "step": 128 }, { "epoch": 1.29, "learning_rate": 0.00012614250971021657, "loss": 2.0798, "step": 130 }, { "epoch": 1.31, "learning_rate": 0.00012405249563662537, "loss": 2.2118, "step": 132 }, { "epoch": 1.33, "learning_rate": 0.00012195126868674051, "loss": 2.1874, "step": 134 }, { "epoch": 1.35, "learning_rate": 0.000119839808417869, "loss": 2.1919, "step": 136 }, { "epoch": 1.37, "learning_rate": 0.0001177190991579223, "loss": 2.0894, "step": 138 }, { "epoch": 1.39, "learning_rate": 0.00011559012954653865, "loss": 2.1773, "step": 140 }, { "epoch": 1.41, "learning_rate": 0.00011345389207419588, "loss": 2.2115, "step": 142 }, { "epoch": 1.43, "learning_rate": 0.00011131138261952845, "loss": 1.9207, "step": 144 }, { "epoch": 1.45, "learning_rate": 0.0001091635999850655, "loss": 1.9258, "step": 146 }, { "epoch": 1.47, "learning_rate": 0.00010701154543160541, "loss": 1.9662, "step": 148 }, { "epoch": 1.49, "learning_rate": 0.00010485622221144484, "loss": 2.0598, "step": 150 }, { "epoch": 1.51, "learning_rate": 0.00010269863510067872, "loss": 1.933, "step": 152 }, { "epoch": 1.53, "learning_rate": 0.00010053978993079045, "loss": 2.2902, "step": 154 }, { "epoch": 1.55, "learning_rate": 9.838069311974986e-05, "loss": 2.1651, "step": 156 }, { "epoch": 1.57, "learning_rate": 9.622235120283769e-05, "loss": 2.0562, "step": 158 }, { "epoch": 1.59, "learning_rate": 9.406577036341548e-05, "loss": 2.1876, "step": 160 }, { "epoch": 1.61, "learning_rate": 9.19119559638596e-05, "loss": 2.1455, "step": 162 }, { "epoch": 1.63, "learning_rate": 8.976191207687775e-05, "loss": 2.0586, "step": 164 }, { "epoch": 1.65, "learning_rate": 8.7616641017427e-05, "loss": 2.0516, "step": 166 }, { "epoch": 1.67, "learning_rate": 8.5477142875451e-05, "loss": 2.1687, "step": 168 }, { "epoch": 1.69, "learning_rate": 8.334441504965455e-05, "loss": 1.8243, "step": 170 }, { "epoch": 1.71, "learning_rate": 8.1219451782533e-05, "loss": 2.011, "step": 172 }, { "epoch": 1.73, "learning_rate": 7.91032436968725e-05, "loss": 2.0671, "step": 174 }, { "epoch": 1.75, "learning_rate": 7.699677733393826e-05, "loss": 2.1099, "step": 176 }, { "epoch": 1.77, "learning_rate": 7.490103469356513e-05, "loss": 2.2192, "step": 178 }, { "epoch": 1.79, "learning_rate": 7.281699277636572e-05, "loss": 2.1303, "step": 180 }, { "epoch": 1.81, "learning_rate": 7.07456231282686e-05, "loss": 2.2478, "step": 182 }, { "epoch": 1.83, "learning_rate": 6.868789138759976e-05, "loss": 2.2372, "step": 184 }, { "epoch": 1.85, "learning_rate": 6.664475683491796e-05, "loss": 2.0905, "step": 186 }, { "epoch": 1.87, "learning_rate": 6.461717194581393e-05, "loss": 2.1761, "step": 188 }, { "epoch": 1.89, "learning_rate": 6.260608194688206e-05, "loss": 2.2156, "step": 190 }, { "epoch": 1.91, "learning_rate": 6.061242437507131e-05, "loss": 2.0831, "step": 192 }, { "epoch": 1.93, "learning_rate": 5.863712864062089e-05, "loss": 1.9666, "step": 194 }, { "epoch": 1.95, "learning_rate": 5.668111559378471e-05, "loss": 2.0193, "step": 196 }, { "epoch": 1.97, "learning_rate": 5.474529709554612e-05, "loss": 2.0009, "step": 198 }, { "epoch": 1.99, "learning_rate": 5.283057559252341e-05, "loss": 1.8429, "step": 200 }, { "epoch": 2.01, "learning_rate": 5.0937843696263966e-05, "loss": 1.825, "step": 202 }, { "epoch": 2.03, "learning_rate": 4.9067983767123736e-05, "loss": 2.1982, "step": 204 }, { "epoch": 2.05, "learning_rate": 4.722186750292511e-05, "loss": 2.2024, "step": 206 }, { "epoch": 2.07, "learning_rate": 4.540035553258619e-05, "loss": 2.0208, "step": 208 }, { "epoch": 2.09, "learning_rate": 4.360429701490934e-05, "loss": 2.0707, "step": 210 }, { "epoch": 2.11, "learning_rate": 4.183452924271776e-05, "loss": 2.0485, "step": 212 }, { "epoch": 2.13, "learning_rate": 4.009187725252309e-05, "loss": 2.1584, "step": 214 }, { "epoch": 2.15, "learning_rate": 3.8377153439907266e-05, "loss": 2.1151, "step": 216 }, { "epoch": 2.17, "learning_rate": 3.669115718079702e-05, "loss": 1.9831, "step": 218 }, { "epoch": 2.19, "learning_rate": 3.503467445880789e-05, "loss": 1.9593, "step": 220 }, { "epoch": 2.21, "learning_rate": 3.340847749883191e-05, "loss": 1.9555, "step": 222 }, { "epoch": 2.23, "learning_rate": 3.1813324407038825e-05, "loss": 1.9759, "step": 224 }, { "epoch": 2.25, "learning_rate": 3.0249958817459722e-05, "loss": 1.9367, "step": 226 }, { "epoch": 2.27, "learning_rate": 2.8719109545317103e-05, "loss": 2.0455, "step": 228 }, { "epoch": 2.29, "learning_rate": 2.722149024726307e-05, "loss": 2.0912, "step": 230 }, { "epoch": 2.31, "learning_rate": 2.5757799088684654e-05, "loss": 2.0974, "step": 232 }, { "epoch": 2.33, "learning_rate": 2.432871841823047e-05, "loss": 2.1669, "step": 234 }, { "epoch": 2.35, "learning_rate": 2.2934914449711087e-05, "loss": 2.0861, "step": 236 }, { "epoch": 2.37, "learning_rate": 2.157703695152109e-05, "loss": 2.2297, "step": 238 }, { "epoch": 2.39, "learning_rate": 2.025571894372794e-05, "loss": 2.078, "step": 240 }, { "epoch": 2.41, "learning_rate": 1.897157640296825e-05, "loss": 2.0759, "step": 242 }, { "epoch": 2.43, "learning_rate": 1.772520797528988e-05, "loss": 1.957, "step": 244 }, { "epoch": 2.45, "learning_rate": 1.65171946970729e-05, "loss": 2.0201, "step": 246 }, { "epoch": 2.47, "learning_rate": 1.534809972415998e-05, "loss": 1.972, "step": 248 }, { "epoch": 2.49, "learning_rate": 1.4218468069322578e-05, "loss": 1.9812, "step": 250 }, { "epoch": 2.51, "learning_rate": 1.3128826348184887e-05, "loss": 1.7967, "step": 252 }, { "epoch": 2.53, "learning_rate": 1.2079682533724379e-05, "loss": 2.1202, "step": 254 }, { "epoch": 2.55, "learning_rate": 1.1071525719463095e-05, "loss": 2.2584, "step": 256 }, { "epoch": 2.57, "learning_rate": 1.010482589146048e-05, "loss": 2.1405, "step": 258 }, { "epoch": 2.59, "learning_rate": 9.180033709213454e-06, "loss": 2.1317, "step": 260 }, { "epoch": 2.61, "learning_rate": 8.297580295566575e-06, "loss": 2.0388, "step": 262 }, { "epoch": 2.63, "learning_rate": 7.457877035729588e-06, "loss": 2.0719, "step": 264 }, { "epoch": 2.65, "learning_rate": 6.661315385496425e-06, "loss": 2.1417, "step": 266 }, { "epoch": 2.67, "learning_rate": 5.908266688755049e-06, "loss": 2.0137, "step": 268 }, { "epoch": 2.68, "learning_rate": 5.199082004372957e-06, "loss": 2.0492, "step": 270 }, { "epoch": 2.7, "learning_rate": 4.534091942539475e-06, "loss": 1.9081, "step": 272 }, { "epoch": 2.72, "learning_rate": 3.913606510640644e-06, "loss": 2.0606, "step": 274 }, { "epoch": 2.74, "learning_rate": 3.3379149687388867e-06, "loss": 1.8957, "step": 276 }, { "epoch": 2.76, "learning_rate": 2.8072856947248037e-06, "loss": 2.1141, "step": 278 }, { "epoch": 2.78, "learning_rate": 2.3219660592038285e-06, "loss": 2.002, "step": 280 }, { "epoch": 2.8, "learning_rate": 1.882182310176095e-06, "loss": 2.0662, "step": 282 }, { "epoch": 2.82, "learning_rate": 1.488139467563354e-06, "loss": 1.9553, "step": 284 }, { "epoch": 2.84, "learning_rate": 1.1400212276321376e-06, "loss": 2.2417, "step": 286 }, { "epoch": 2.86, "learning_rate": 8.379898773574924e-07, "loss": 2.2588, "step": 288 }, { "epoch": 2.88, "learning_rate": 5.821862187675775e-07, "loss": 2.0636, "step": 290 }, { "epoch": 2.9, "learning_rate": 3.727295033040035e-07, "loss": 2.1176, "step": 292 }, { "epoch": 2.92, "learning_rate": 2.0971737622883515e-07, "loss": 1.9134, "step": 294 }, { "epoch": 2.94, "learning_rate": 9.32258311039269e-08, "loss": 1.8922, "step": 296 }, { "epoch": 2.96, "learning_rate": 2.3309174364027907e-08, "loss": 1.9239, "step": 298 }, { "epoch": 2.98, "learning_rate": 0.0, "loss": 1.9386, "step": 300 } ], "logging_steps": 2, "max_steps": 300, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.587049079238656e+16, "trial_name": null, "trial_params": null }