{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.983219390926041, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.4444444444444447e-05, "loss": 2.5697, "step": 2 }, { "epoch": 0.04, "learning_rate": 8.888888888888889e-05, "loss": 2.5895, "step": 4 }, { "epoch": 0.06, "learning_rate": 0.00013333333333333334, "loss": 2.5404, "step": 6 }, { "epoch": 0.08, "learning_rate": 0.00017777777777777779, "loss": 2.5212, "step": 8 }, { "epoch": 0.1, "learning_rate": 0.00019999417253661235, "loss": 2.5517, "step": 10 }, { "epoch": 0.12, "learning_rate": 0.00019994755690455152, "loss": 2.3682, "step": 12 }, { "epoch": 0.14, "learning_rate": 0.0001998543473718677, "loss": 2.5259, "step": 14 }, { "epoch": 0.16, "learning_rate": 0.00019971458739130598, "loss": 2.3868, "step": 16 }, { "epoch": 0.18, "learning_rate": 0.0001995283421166614, "loss": 2.0537, "step": 18 }, { "epoch": 0.2, "learning_rate": 0.00019929569837240564, "loss": 2.1381, "step": 20 }, { "epoch": 0.22, "learning_rate": 0.00019901676461321068, "loss": 2.0976, "step": 22 }, { "epoch": 0.24, "learning_rate": 0.00019869167087338907, "loss": 2.2247, "step": 24 }, { "epoch": 0.26, "learning_rate": 0.00019832056870627417, "loss": 2.0686, "step": 26 }, { "epoch": 0.28, "learning_rate": 0.00019790363111356837, "loss": 2.2601, "step": 28 }, { "epoch": 0.3, "learning_rate": 0.00019744105246469263, "loss": 2.273, "step": 30 }, { "epoch": 0.32, "learning_rate": 0.00019693304840617457, "loss": 2.1859, "step": 32 }, { "epoch": 0.34, "learning_rate": 0.00019637985576111778, "loss": 2.2706, "step": 34 }, { "epoch": 0.36, "learning_rate": 0.00019578173241879872, "loss": 2.3597, "step": 36 }, { "epoch": 0.38, "learning_rate": 0.00019513895721444286, "loss": 2.21, "step": 38 }, { "epoch": 0.4, "learning_rate": 0.00019445182979923654, "loss": 2.2261, "step": 40 }, { "epoch": 0.42, "learning_rate": 0.00019372067050063438, "loss": 2.1489, "step": 42 }, { "epoch": 0.44, "learning_rate": 0.00019294582017302797, "loss": 1.9741, "step": 44 }, { "epoch": 0.46, "learning_rate": 0.0001921276400388451, "loss": 2.0867, "step": 46 }, { "epoch": 0.48, "learning_rate": 0.00019126651152015403, "loss": 2.0778, "step": 48 }, { "epoch": 0.5, "learning_rate": 0.00019036283606085053, "loss": 2.2826, "step": 50 }, { "epoch": 0.52, "learning_rate": 0.00018941703493951164, "loss": 2.1793, "step": 52 }, { "epoch": 0.54, "learning_rate": 0.00018842954907300236, "loss": 2.1489, "step": 54 }, { "epoch": 0.56, "learning_rate": 0.0001874008388109276, "loss": 2.21, "step": 56 }, { "epoch": 0.58, "learning_rate": 0.00018633138372102468, "loss": 2.2462, "step": 58 }, { "epoch": 0.6, "learning_rate": 0.00018522168236559695, "loss": 2.2693, "step": 60 }, { "epoch": 0.62, "learning_rate": 0.00018407225206909208, "loss": 2.0625, "step": 62 }, { "epoch": 0.64, "learning_rate": 0.00018288362867693414, "loss": 2.1222, "step": 64 }, { "epoch": 0.66, "learning_rate": 0.0001816563663057211, "loss": 2.184, "step": 66 }, { "epoch": 0.68, "learning_rate": 0.000180391037084905, "loss": 2.1686, "step": 68 }, { "epoch": 0.7, "learning_rate": 0.00017908823089007457, "loss": 1.968, "step": 70 }, { "epoch": 0.72, "learning_rate": 0.00017774855506796496, "loss": 2.0057, "step": 72 }, { "epoch": 0.74, "learning_rate": 0.0001763726341533227, "loss": 2.1126, "step": 74 }, { "epoch": 0.76, "learning_rate": 0.0001749611095777581, "loss": 2.0198, "step": 76 }, { "epoch": 0.78, "learning_rate": 0.00017351463937072004, "loss": 2.2914, "step": 78 }, { "epoch": 0.8, "learning_rate": 0.000172033897852734, "loss": 2.2551, "step": 80 }, { "epoch": 0.82, "learning_rate": 0.0001705195753210446, "loss": 2.1974, "step": 82 }, { "epoch": 0.84, "learning_rate": 0.00016897237772781044, "loss": 2.1299, "step": 84 }, { "epoch": 0.86, "learning_rate": 0.00016739302635100108, "loss": 2.1766, "step": 86 }, { "epoch": 0.88, "learning_rate": 0.00016578225745814907, "loss": 2.217, "step": 88 }, { "epoch": 0.89, "learning_rate": 0.000164140821963114, "loss": 2.2156, "step": 90 }, { "epoch": 0.91, "learning_rate": 0.00016246948507601914, "loss": 2.0829, "step": 92 }, { "epoch": 0.93, "learning_rate": 0.0001607690259465229, "loss": 1.9199, "step": 94 }, { "epoch": 0.95, "learning_rate": 0.00015904023730059228, "loss": 2.0483, "step": 96 }, { "epoch": 0.97, "learning_rate": 0.000157283925070947, "loss": 1.99, "step": 98 }, { "epoch": 0.99, "learning_rate": 0.000155500908021347, "loss": 1.791, "step": 100 }, { "epoch": 1.01, "learning_rate": 0.0001536920173648984, "loss": 2.1892, "step": 102 }, { "epoch": 1.03, "learning_rate": 0.0001518580963765555, "loss": 2.2071, "step": 104 }, { "epoch": 1.05, "learning_rate": 0.00015000000000000001, "loss": 2.1052, "step": 106 }, { "epoch": 1.07, "learning_rate": 0.00014811859444908052, "loss": 2.1146, "step": 108 }, { "epoch": 1.09, "learning_rate": 0.0001462147568039977, "loss": 2.215, "step": 110 }, { "epoch": 1.11, "learning_rate": 0.00014428937460242417, "loss": 2.135, "step": 112 }, { "epoch": 1.13, "learning_rate": 0.00014234334542574906, "loss": 2.1541, "step": 114 }, { "epoch": 1.15, "learning_rate": 0.00014037757648064018, "loss": 2.1113, "step": 116 }, { "epoch": 1.17, "learning_rate": 0.00013839298417611963, "loss": 2.1941, "step": 118 }, { "epoch": 1.19, "learning_rate": 0.00013639049369634876, "loss": 1.9565, "step": 120 }, { "epoch": 1.21, "learning_rate": 0.00013437103856932264, "loss": 2.0152, "step": 122 }, { "epoch": 1.23, "learning_rate": 0.00013233556023167485, "loss": 2.0077, "step": 124 }, { "epoch": 1.25, "learning_rate": 0.00013028500758979506, "loss": 1.846, "step": 126 }, { "epoch": 1.27, "learning_rate": 0.00012822033657746478, "loss": 2.0307, "step": 128 }, { "epoch": 1.29, "learning_rate": 0.00012614250971021657, "loss": 2.0594, "step": 130 }, { "epoch": 1.31, "learning_rate": 0.00012405249563662537, "loss": 2.0874, "step": 132 }, { "epoch": 1.33, "learning_rate": 0.00012195126868674051, "loss": 2.1116, "step": 134 }, { "epoch": 1.35, "learning_rate": 0.000119839808417869, "loss": 2.2352, "step": 136 }, { "epoch": 1.37, "learning_rate": 0.0001177190991579223, "loss": 2.138, "step": 138 }, { "epoch": 1.39, "learning_rate": 0.00011559012954653865, "loss": 2.027, "step": 140 }, { "epoch": 1.41, "learning_rate": 0.00011345389207419588, "loss": 2.1868, "step": 142 }, { "epoch": 1.43, "learning_rate": 0.00011131138261952845, "loss": 1.8327, "step": 144 }, { "epoch": 1.45, "learning_rate": 0.0001091635999850655, "loss": 1.9577, "step": 146 }, { "epoch": 1.47, "learning_rate": 0.00010701154543160541, "loss": 1.9917, "step": 148 }, { "epoch": 1.49, "learning_rate": 0.00010485622221144484, "loss": 2.0174, "step": 150 }, { "epoch": 1.51, "learning_rate": 0.00010269863510067872, "loss": 1.8088, "step": 152 }, { "epoch": 1.53, "learning_rate": 0.00010053978993079045, "loss": 2.1077, "step": 154 }, { "epoch": 1.55, "learning_rate": 9.838069311974986e-05, "loss": 2.0825, "step": 156 }, { "epoch": 1.57, "learning_rate": 9.622235120283769e-05, "loss": 2.1353, "step": 158 }, { "epoch": 1.59, "learning_rate": 9.406577036341548e-05, "loss": 2.2037, "step": 160 }, { "epoch": 1.61, "learning_rate": 9.19119559638596e-05, "loss": 2.1083, "step": 162 }, { "epoch": 1.63, "learning_rate": 8.976191207687775e-05, "loss": 2.0183, "step": 164 }, { "epoch": 1.65, "learning_rate": 8.7616641017427e-05, "loss": 2.1374, "step": 166 }, { "epoch": 1.67, "learning_rate": 8.5477142875451e-05, "loss": 2.0109, "step": 168 }, { "epoch": 1.69, "learning_rate": 8.334441504965455e-05, "loss": 1.9927, "step": 170 }, { "epoch": 1.71, "learning_rate": 8.1219451782533e-05, "loss": 1.8696, "step": 172 }, { "epoch": 1.73, "learning_rate": 7.91032436968725e-05, "loss": 2.0078, "step": 174 }, { "epoch": 1.75, "learning_rate": 7.699677733393826e-05, "loss": 2.0233, "step": 176 }, { "epoch": 1.77, "learning_rate": 7.490103469356513e-05, "loss": 2.1897, "step": 178 }, { "epoch": 1.79, "learning_rate": 7.281699277636572e-05, "loss": 2.2561, "step": 180 }, { "epoch": 1.81, "learning_rate": 7.07456231282686e-05, "loss": 2.0742, "step": 182 }, { "epoch": 1.83, "learning_rate": 6.868789138759976e-05, "loss": 2.1923, "step": 184 }, { "epoch": 1.85, "learning_rate": 6.664475683491796e-05, "loss": 2.2466, "step": 186 }, { "epoch": 1.87, "learning_rate": 6.461717194581393e-05, "loss": 1.9814, "step": 188 }, { "epoch": 1.89, "learning_rate": 6.260608194688206e-05, "loss": 2.141, "step": 190 }, { "epoch": 1.91, "learning_rate": 6.061242437507131e-05, "loss": 2.1967, "step": 192 }, { "epoch": 1.93, "learning_rate": 5.863712864062089e-05, "loss": 1.906, "step": 194 }, { "epoch": 1.95, "learning_rate": 5.668111559378471e-05, "loss": 1.8858, "step": 196 }, { "epoch": 1.97, "learning_rate": 5.474529709554612e-05, "loss": 1.9545, "step": 198 }, { "epoch": 1.99, "learning_rate": 5.283057559252341e-05, "loss": 1.9466, "step": 200 }, { "epoch": 2.01, "learning_rate": 5.0937843696263966e-05, "loss": 1.9956, "step": 202 }, { "epoch": 2.03, "learning_rate": 4.9067983767123736e-05, "loss": 2.1361, "step": 204 }, { "epoch": 2.05, "learning_rate": 4.722186750292511e-05, "loss": 2.0693, "step": 206 }, { "epoch": 2.07, "learning_rate": 4.540035553258619e-05, "loss": 2.1495, "step": 208 }, { "epoch": 2.09, "learning_rate": 4.360429701490934e-05, "loss": 2.2293, "step": 210 }, { "epoch": 2.11, "learning_rate": 4.183452924271776e-05, "loss": 2.1223, "step": 212 }, { "epoch": 2.13, "learning_rate": 4.009187725252309e-05, "loss": 2.1263, "step": 214 }, { "epoch": 2.15, "learning_rate": 3.8377153439907266e-05, "loss": 2.1873, "step": 216 }, { "epoch": 2.17, "learning_rate": 3.669115718079702e-05, "loss": 2.0112, "step": 218 }, { "epoch": 2.19, "learning_rate": 3.503467445880789e-05, "loss": 1.9042, "step": 220 }, { "epoch": 2.21, "learning_rate": 3.340847749883191e-05, "loss": 1.9094, "step": 222 }, { "epoch": 2.23, "learning_rate": 3.1813324407038825e-05, "loss": 1.9675, "step": 224 }, { "epoch": 2.25, "learning_rate": 3.0249958817459722e-05, "loss": 1.8769, "step": 226 }, { "epoch": 2.27, "learning_rate": 2.8719109545317103e-05, "loss": 2.1195, "step": 228 }, { "epoch": 2.29, "learning_rate": 2.722149024726307e-05, "loss": 2.145, "step": 230 }, { "epoch": 2.31, "learning_rate": 2.5757799088684654e-05, "loss": 2.0707, "step": 232 }, { "epoch": 2.33, "learning_rate": 2.432871841823047e-05, "loss": 2.0882, "step": 234 }, { "epoch": 2.35, "learning_rate": 2.2934914449711087e-05, "loss": 2.1098, "step": 236 }, { "epoch": 2.37, "learning_rate": 2.157703695152109e-05, "loss": 1.9743, "step": 238 }, { "epoch": 2.39, "learning_rate": 2.025571894372794e-05, "loss": 2.0888, "step": 240 }, { "epoch": 2.41, "learning_rate": 1.897157640296825e-05, "loss": 2.1608, "step": 242 }, { "epoch": 2.43, "learning_rate": 1.772520797528988e-05, "loss": 2.0018, "step": 244 }, { "epoch": 2.45, "learning_rate": 1.65171946970729e-05, "loss": 1.8554, "step": 246 }, { "epoch": 2.47, "learning_rate": 1.534809972415998e-05, "loss": 1.9581, "step": 248 }, { "epoch": 2.49, "learning_rate": 1.4218468069322578e-05, "loss": 1.7347, "step": 250 }, { "epoch": 2.51, "learning_rate": 1.3128826348184887e-05, "loss": 1.9621, "step": 252 }, { "epoch": 2.53, "learning_rate": 1.2079682533724379e-05, "loss": 2.0392, "step": 254 }, { "epoch": 2.55, "learning_rate": 1.1071525719463095e-05, "loss": 2.0806, "step": 256 }, { "epoch": 2.57, "learning_rate": 1.010482589146048e-05, "loss": 1.961, "step": 258 }, { "epoch": 2.59, "learning_rate": 9.180033709213454e-06, "loss": 2.1009, "step": 260 }, { "epoch": 2.61, "learning_rate": 8.297580295566575e-06, "loss": 2.0751, "step": 262 }, { "epoch": 2.63, "learning_rate": 7.457877035729588e-06, "loss": 2.0697, "step": 264 }, { "epoch": 2.65, "learning_rate": 6.661315385496425e-06, "loss": 2.0509, "step": 266 }, { "epoch": 2.67, "learning_rate": 5.908266688755049e-06, "loss": 2.0297, "step": 268 }, { "epoch": 2.68, "learning_rate": 5.199082004372957e-06, "loss": 1.9283, "step": 270 }, { "epoch": 2.7, "learning_rate": 4.534091942539475e-06, "loss": 1.9338, "step": 272 }, { "epoch": 2.72, "learning_rate": 3.913606510640644e-06, "loss": 2.081, "step": 274 }, { "epoch": 2.74, "learning_rate": 3.3379149687388867e-06, "loss": 1.7918, "step": 276 }, { "epoch": 2.76, "learning_rate": 2.8072856947248037e-06, "loss": 2.1411, "step": 278 }, { "epoch": 2.78, "learning_rate": 2.3219660592038285e-06, "loss": 2.07, "step": 280 }, { "epoch": 2.8, "learning_rate": 1.882182310176095e-06, "loss": 2.0207, "step": 282 }, { "epoch": 2.82, "learning_rate": 1.488139467563354e-06, "loss": 2.1235, "step": 284 }, { "epoch": 2.84, "learning_rate": 1.1400212276321376e-06, "loss": 2.0617, "step": 286 }, { "epoch": 2.86, "learning_rate": 8.379898773574924e-07, "loss": 2.1492, "step": 288 }, { "epoch": 2.88, "learning_rate": 5.821862187675775e-07, "loss": 2.1032, "step": 290 }, { "epoch": 2.9, "learning_rate": 3.727295033040035e-07, "loss": 2.0329, "step": 292 }, { "epoch": 2.92, "learning_rate": 2.0971737622883515e-07, "loss": 1.8083, "step": 294 }, { "epoch": 2.94, "learning_rate": 9.32258311039269e-08, "loss": 1.8436, "step": 296 }, { "epoch": 2.96, "learning_rate": 2.3309174364027907e-08, "loss": 1.9567, "step": 298 }, { "epoch": 2.98, "learning_rate": 0.0, "loss": 1.9306, "step": 300 } ], "logging_steps": 2, "max_steps": 300, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.6018582893019136e+16, "trial_name": null, "trial_params": null }