{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 182810, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.98632459931076e-05, "loss": 2.0893, "step": 500 }, { "epoch": 0.05, "learning_rate": 4.97264919862152e-05, "loss": 2.0282, "step": 1000 }, { "epoch": 0.08, "learning_rate": 4.9589737979322795e-05, "loss": 2.0359, "step": 1500 }, { "epoch": 0.11, "learning_rate": 4.94529839724304e-05, "loss": 2.0573, "step": 2000 }, { "epoch": 0.14, "learning_rate": 4.931622996553799e-05, "loss": 2.0498, "step": 2500 }, { "epoch": 0.16, "learning_rate": 4.9179475958645595e-05, "loss": 1.9906, "step": 3000 }, { "epoch": 0.19, "learning_rate": 4.904272195175319e-05, "loss": 2.0297, "step": 3500 }, { "epoch": 0.22, "learning_rate": 4.8905967944860786e-05, "loss": 1.9994, "step": 4000 }, { "epoch": 0.25, "learning_rate": 4.876921393796838e-05, "loss": 2.0443, "step": 4500 }, { "epoch": 0.27, "learning_rate": 4.863245993107598e-05, "loss": 1.9891, "step": 5000 }, { "epoch": 0.3, "learning_rate": 4.849570592418358e-05, "loss": 1.9978, "step": 5500 }, { "epoch": 0.33, "learning_rate": 4.835895191729118e-05, "loss": 2.0001, "step": 6000 }, { "epoch": 0.36, "learning_rate": 4.8222197910398776e-05, "loss": 1.9579, "step": 6500 }, { "epoch": 0.38, "learning_rate": 4.808544390350638e-05, "loss": 1.9862, "step": 7000 }, { "epoch": 0.41, "learning_rate": 4.7948689896613974e-05, "loss": 1.959, "step": 7500 }, { "epoch": 0.44, "learning_rate": 4.781193588972157e-05, "loss": 1.9861, "step": 8000 }, { "epoch": 0.46, "learning_rate": 4.7675181882829165e-05, "loss": 1.9626, "step": 8500 }, { "epoch": 0.49, "learning_rate": 4.753842787593677e-05, "loss": 1.9609, "step": 9000 }, { "epoch": 0.52, "learning_rate": 4.740167386904437e-05, "loss": 1.9949, "step": 9500 }, { "epoch": 0.55, "learning_rate": 4.7264919862151964e-05, "loss": 1.9398, "step": 10000 }, { "epoch": 0.57, "learning_rate": 4.712816585525956e-05, "loss": 1.9657, "step": 10500 }, { "epoch": 0.6, "learning_rate": 4.699141184836716e-05, "loss": 1.9369, "step": 11000 }, { "epoch": 0.63, "learning_rate": 4.685465784147476e-05, "loss": 1.9916, "step": 11500 }, { "epoch": 0.66, "learning_rate": 4.671790383458235e-05, "loss": 1.9725, "step": 12000 }, { "epoch": 0.68, "learning_rate": 4.6581149827689955e-05, "loss": 1.9161, "step": 12500 }, { "epoch": 0.71, "learning_rate": 4.644439582079756e-05, "loss": 1.9733, "step": 13000 }, { "epoch": 0.74, "learning_rate": 4.630764181390515e-05, "loss": 1.952, "step": 13500 }, { "epoch": 0.77, "learning_rate": 4.617088780701275e-05, "loss": 1.9321, "step": 14000 }, { "epoch": 0.79, "learning_rate": 4.603413380012034e-05, "loss": 1.935, "step": 14500 }, { "epoch": 0.82, "learning_rate": 4.5897379793227945e-05, "loss": 1.9755, "step": 15000 }, { "epoch": 0.85, "learning_rate": 4.576062578633554e-05, "loss": 1.9573, "step": 15500 }, { "epoch": 0.88, "learning_rate": 4.5623871779443136e-05, "loss": 1.9531, "step": 16000 }, { "epoch": 0.9, "learning_rate": 4.548711777255074e-05, "loss": 1.9465, "step": 16500 }, { "epoch": 0.93, "learning_rate": 4.535036376565834e-05, "loss": 1.9641, "step": 17000 }, { "epoch": 0.96, "learning_rate": 4.5213609758765936e-05, "loss": 1.9157, "step": 17500 }, { "epoch": 0.98, "learning_rate": 4.507685575187353e-05, "loss": 1.9518, "step": 18000 }, { "epoch": 1.01, "learning_rate": 4.4940101744981127e-05, "loss": 1.7654, "step": 18500 }, { "epoch": 1.04, "learning_rate": 4.480334773808873e-05, "loss": 1.6299, "step": 19000 }, { "epoch": 1.07, "learning_rate": 4.4666593731196324e-05, "loss": 1.6508, "step": 19500 }, { "epoch": 1.09, "learning_rate": 4.4529839724303926e-05, "loss": 1.6008, "step": 20000 }, { "epoch": 1.12, "learning_rate": 4.439308571741152e-05, "loss": 1.6127, "step": 20500 }, { "epoch": 1.15, "learning_rate": 4.4256331710519124e-05, "loss": 1.5774, "step": 21000 }, { "epoch": 1.18, "learning_rate": 4.411957770362672e-05, "loss": 1.6349, "step": 21500 }, { "epoch": 1.2, "learning_rate": 4.3982823696734315e-05, "loss": 1.6484, "step": 22000 }, { "epoch": 1.23, "learning_rate": 4.384606968984191e-05, "loss": 1.6607, "step": 22500 }, { "epoch": 1.26, "learning_rate": 4.370931568294951e-05, "loss": 1.6338, "step": 23000 }, { "epoch": 1.29, "learning_rate": 4.3572561676057114e-05, "loss": 1.6342, "step": 23500 }, { "epoch": 1.31, "learning_rate": 4.343580766916471e-05, "loss": 1.6453, "step": 24000 }, { "epoch": 1.34, "learning_rate": 4.3299053662272305e-05, "loss": 1.6547, "step": 24500 }, { "epoch": 1.37, "learning_rate": 4.316229965537991e-05, "loss": 1.6587, "step": 25000 }, { "epoch": 1.39, "learning_rate": 4.30255456484875e-05, "loss": 1.6449, "step": 25500 }, { "epoch": 1.42, "learning_rate": 4.28887916415951e-05, "loss": 1.6473, "step": 26000 }, { "epoch": 1.45, "learning_rate": 4.275203763470269e-05, "loss": 1.6378, "step": 26500 }, { "epoch": 1.48, "learning_rate": 4.2615283627810296e-05, "loss": 1.6495, "step": 27000 }, { "epoch": 1.5, "learning_rate": 4.24785296209179e-05, "loss": 1.695, "step": 27500 }, { "epoch": 1.53, "learning_rate": 4.234177561402549e-05, "loss": 1.7073, "step": 28000 }, { "epoch": 1.56, "learning_rate": 4.220502160713309e-05, "loss": 1.7791, "step": 28500 }, { "epoch": 1.59, "learning_rate": 4.206826760024069e-05, "loss": 1.7924, "step": 29000 }, { "epoch": 1.61, "learning_rate": 4.1931513593348286e-05, "loss": 1.8115, "step": 29500 }, { "epoch": 1.64, "learning_rate": 4.179475958645588e-05, "loss": 1.7199, "step": 30000 }, { "epoch": 1.67, "learning_rate": 4.1658005579563484e-05, "loss": 1.7923, "step": 30500 }, { "epoch": 1.7, "learning_rate": 4.1521251572671086e-05, "loss": 1.6858, "step": 31000 }, { "epoch": 1.72, "learning_rate": 4.138449756577868e-05, "loss": 1.6759, "step": 31500 }, { "epoch": 1.75, "learning_rate": 4.1247743558886276e-05, "loss": 1.6905, "step": 32000 }, { "epoch": 1.78, "learning_rate": 4.111098955199388e-05, "loss": 1.6696, "step": 32500 }, { "epoch": 1.81, "learning_rate": 4.0974235545101474e-05, "loss": 1.7242, "step": 33000 }, { "epoch": 1.83, "learning_rate": 4.083748153820907e-05, "loss": 1.7341, "step": 33500 }, { "epoch": 1.86, "learning_rate": 4.0700727531316665e-05, "loss": 1.7026, "step": 34000 }, { "epoch": 1.89, "learning_rate": 4.056397352442427e-05, "loss": 1.7249, "step": 34500 }, { "epoch": 1.91, "learning_rate": 4.042721951753187e-05, "loss": 1.6447, "step": 35000 }, { "epoch": 1.94, "learning_rate": 4.0290465510639464e-05, "loss": 1.6748, "step": 35500 }, { "epoch": 1.97, "learning_rate": 4.015371150374706e-05, "loss": 1.6952, "step": 36000 }, { "epoch": 2.0, "learning_rate": 4.001695749685466e-05, "loss": 1.6569, "step": 36500 }, { "epoch": 2.02, "learning_rate": 3.988020348996226e-05, "loss": 1.4236, "step": 37000 }, { "epoch": 2.05, "learning_rate": 3.974344948306985e-05, "loss": 1.4146, "step": 37500 }, { "epoch": 2.08, "learning_rate": 3.9606695476177455e-05, "loss": 1.4398, "step": 38000 }, { "epoch": 2.11, "learning_rate": 3.946994146928506e-05, "loss": 1.433, "step": 38500 }, { "epoch": 2.13, "learning_rate": 3.933318746239265e-05, "loss": 1.4019, "step": 39000 }, { "epoch": 2.16, "learning_rate": 3.919643345550025e-05, "loss": 1.4358, "step": 39500 }, { "epoch": 2.19, "learning_rate": 3.905967944860784e-05, "loss": 1.4445, "step": 40000 }, { "epoch": 2.22, "learning_rate": 3.8922925441715445e-05, "loss": 1.3795, "step": 40500 }, { "epoch": 2.24, "learning_rate": 3.878617143482304e-05, "loss": 1.4617, "step": 41000 }, { "epoch": 2.27, "learning_rate": 3.864941742793064e-05, "loss": 1.399, "step": 41500 }, { "epoch": 2.3, "learning_rate": 3.851266342103824e-05, "loss": 1.4227, "step": 42000 }, { "epoch": 2.32, "learning_rate": 3.837590941414584e-05, "loss": 1.406, "step": 42500 }, { "epoch": 2.35, "learning_rate": 3.8239155407253436e-05, "loss": 1.4229, "step": 43000 }, { "epoch": 2.38, "learning_rate": 3.810240140036103e-05, "loss": 1.4309, "step": 43500 }, { "epoch": 2.41, "learning_rate": 3.796564739346863e-05, "loss": 1.4129, "step": 44000 }, { "epoch": 2.43, "learning_rate": 3.782889338657623e-05, "loss": 1.4409, "step": 44500 }, { "epoch": 2.46, "learning_rate": 3.7692139379683824e-05, "loss": 1.3927, "step": 45000 }, { "epoch": 2.49, "learning_rate": 3.7555385372791426e-05, "loss": 1.4553, "step": 45500 }, { "epoch": 2.52, "learning_rate": 3.741863136589902e-05, "loss": 1.4204, "step": 46000 }, { "epoch": 2.54, "learning_rate": 3.7281877359006624e-05, "loss": 1.4391, "step": 46500 }, { "epoch": 2.57, "learning_rate": 3.714512335211422e-05, "loss": 1.4732, "step": 47000 }, { "epoch": 2.6, "learning_rate": 3.7008369345221815e-05, "loss": 1.4435, "step": 47500 }, { "epoch": 2.63, "learning_rate": 3.687161533832941e-05, "loss": 1.4206, "step": 48000 }, { "epoch": 2.65, "learning_rate": 3.673486133143701e-05, "loss": 1.4552, "step": 48500 }, { "epoch": 2.68, "learning_rate": 3.6598107324544614e-05, "loss": 1.4611, "step": 49000 }, { "epoch": 2.71, "learning_rate": 3.646135331765221e-05, "loss": 1.4487, "step": 49500 }, { "epoch": 2.74, "learning_rate": 3.6324599310759805e-05, "loss": 1.4679, "step": 50000 }, { "epoch": 2.76, "learning_rate": 3.618784530386741e-05, "loss": 1.4245, "step": 50500 }, { "epoch": 2.79, "learning_rate": 3.6051091296975e-05, "loss": 1.4871, "step": 51000 }, { "epoch": 2.82, "learning_rate": 3.59143372900826e-05, "loss": 1.4553, "step": 51500 }, { "epoch": 2.84, "learning_rate": 3.57775832831902e-05, "loss": 1.4775, "step": 52000 }, { "epoch": 2.87, "learning_rate": 3.56408292762978e-05, "loss": 1.4399, "step": 52500 }, { "epoch": 2.9, "learning_rate": 3.55040752694054e-05, "loss": 1.4695, "step": 53000 }, { "epoch": 2.93, "learning_rate": 3.536732126251299e-05, "loss": 1.4722, "step": 53500 }, { "epoch": 2.95, "learning_rate": 3.523056725562059e-05, "loss": 1.4315, "step": 54000 }, { "epoch": 2.98, "learning_rate": 3.509381324872819e-05, "loss": 1.4428, "step": 54500 }, { "epoch": 3.01, "learning_rate": 3.4957059241835786e-05, "loss": 1.4064, "step": 55000 }, { "epoch": 3.04, "learning_rate": 3.482030523494338e-05, "loss": 1.216, "step": 55500 }, { "epoch": 3.06, "learning_rate": 3.4683551228050984e-05, "loss": 1.2302, "step": 56000 }, { "epoch": 3.09, "learning_rate": 3.4546797221158586e-05, "loss": 1.217, "step": 56500 }, { "epoch": 3.12, "learning_rate": 3.441004321426618e-05, "loss": 1.2273, "step": 57000 }, { "epoch": 3.15, "learning_rate": 3.427328920737378e-05, "loss": 1.2445, "step": 57500 }, { "epoch": 3.17, "learning_rate": 3.413653520048137e-05, "loss": 1.276, "step": 58000 }, { "epoch": 3.2, "learning_rate": 3.3999781193588974e-05, "loss": 1.2173, "step": 58500 }, { "epoch": 3.23, "learning_rate": 3.386302718669657e-05, "loss": 1.2329, "step": 59000 }, { "epoch": 3.25, "learning_rate": 3.372627317980417e-05, "loss": 1.2375, "step": 59500 }, { "epoch": 3.28, "learning_rate": 3.358951917291177e-05, "loss": 1.231, "step": 60000 }, { "epoch": 3.31, "learning_rate": 3.345276516601937e-05, "loss": 1.2456, "step": 60500 }, { "epoch": 3.34, "learning_rate": 3.3316011159126965e-05, "loss": 1.2182, "step": 61000 }, { "epoch": 3.36, "learning_rate": 3.317925715223456e-05, "loss": 1.2585, "step": 61500 }, { "epoch": 3.39, "learning_rate": 3.3042503145342155e-05, "loss": 1.2544, "step": 62000 }, { "epoch": 3.42, "learning_rate": 3.290574913844976e-05, "loss": 1.2676, "step": 62500 }, { "epoch": 3.45, "learning_rate": 3.276899513155735e-05, "loss": 1.2598, "step": 63000 }, { "epoch": 3.47, "learning_rate": 3.2632241124664955e-05, "loss": 1.235, "step": 63500 }, { "epoch": 3.5, "learning_rate": 3.249548711777256e-05, "loss": 1.2327, "step": 64000 }, { "epoch": 3.53, "learning_rate": 3.235873311088015e-05, "loss": 1.2675, "step": 64500 }, { "epoch": 3.56, "learning_rate": 3.222197910398775e-05, "loss": 1.2959, "step": 65000 }, { "epoch": 3.58, "learning_rate": 3.2085225097095343e-05, "loss": 1.259, "step": 65500 }, { "epoch": 3.61, "learning_rate": 3.1948471090202946e-05, "loss": 1.2785, "step": 66000 }, { "epoch": 3.64, "learning_rate": 3.181171708331054e-05, "loss": 1.2611, "step": 66500 }, { "epoch": 3.67, "learning_rate": 3.167496307641814e-05, "loss": 1.2528, "step": 67000 }, { "epoch": 3.69, "learning_rate": 3.153820906952574e-05, "loss": 1.2584, "step": 67500 }, { "epoch": 3.72, "learning_rate": 3.140145506263334e-05, "loss": 1.2675, "step": 68000 }, { "epoch": 3.75, "learning_rate": 3.1264701055740936e-05, "loss": 1.2754, "step": 68500 }, { "epoch": 3.77, "learning_rate": 3.112794704884853e-05, "loss": 1.2927, "step": 69000 }, { "epoch": 3.8, "learning_rate": 3.099119304195613e-05, "loss": 1.2775, "step": 69500 }, { "epoch": 3.83, "learning_rate": 3.085443903506373e-05, "loss": 1.2686, "step": 70000 }, { "epoch": 3.86, "learning_rate": 3.071768502817133e-05, "loss": 1.2631, "step": 70500 }, { "epoch": 3.88, "learning_rate": 3.0580931021278927e-05, "loss": 1.2756, "step": 71000 }, { "epoch": 3.91, "learning_rate": 3.0444177014386522e-05, "loss": 1.2529, "step": 71500 }, { "epoch": 3.94, "learning_rate": 3.0307423007494124e-05, "loss": 1.2656, "step": 72000 }, { "epoch": 3.97, "learning_rate": 3.017066900060172e-05, "loss": 1.2597, "step": 72500 }, { "epoch": 3.99, "learning_rate": 3.0033914993709318e-05, "loss": 1.2928, "step": 73000 }, { "epoch": 4.02, "learning_rate": 2.9897160986816914e-05, "loss": 1.1113, "step": 73500 }, { "epoch": 4.05, "learning_rate": 2.9760406979924516e-05, "loss": 1.0677, "step": 74000 }, { "epoch": 4.08, "learning_rate": 2.962365297303211e-05, "loss": 1.054, "step": 74500 }, { "epoch": 4.1, "learning_rate": 2.948689896613971e-05, "loss": 1.0708, "step": 75000 }, { "epoch": 4.13, "learning_rate": 2.9350144959247305e-05, "loss": 1.0741, "step": 75500 }, { "epoch": 4.16, "learning_rate": 2.9213390952354908e-05, "loss": 1.0736, "step": 76000 }, { "epoch": 4.18, "learning_rate": 2.9076636945462503e-05, "loss": 1.0618, "step": 76500 }, { "epoch": 4.21, "learning_rate": 2.8939882938570102e-05, "loss": 1.1291, "step": 77000 }, { "epoch": 4.24, "learning_rate": 2.8803128931677697e-05, "loss": 1.0901, "step": 77500 }, { "epoch": 4.27, "learning_rate": 2.86663749247853e-05, "loss": 1.0895, "step": 78000 }, { "epoch": 4.29, "learning_rate": 2.8529620917892898e-05, "loss": 1.0702, "step": 78500 }, { "epoch": 4.32, "learning_rate": 2.8392866911000493e-05, "loss": 1.0729, "step": 79000 }, { "epoch": 4.35, "learning_rate": 2.825611290410809e-05, "loss": 1.0829, "step": 79500 }, { "epoch": 4.38, "learning_rate": 2.811935889721569e-05, "loss": 1.105, "step": 80000 }, { "epoch": 4.4, "learning_rate": 2.798260489032329e-05, "loss": 1.1153, "step": 80500 }, { "epoch": 4.43, "learning_rate": 2.7845850883430885e-05, "loss": 1.0886, "step": 81000 }, { "epoch": 4.46, "learning_rate": 2.770909687653848e-05, "loss": 1.112, "step": 81500 }, { "epoch": 4.49, "learning_rate": 2.7572342869646083e-05, "loss": 1.1041, "step": 82000 }, { "epoch": 4.51, "learning_rate": 2.743558886275368e-05, "loss": 1.096, "step": 82500 }, { "epoch": 4.54, "learning_rate": 2.7298834855861277e-05, "loss": 1.1532, "step": 83000 }, { "epoch": 4.57, "learning_rate": 2.7162080848968872e-05, "loss": 1.127, "step": 83500 }, { "epoch": 4.59, "learning_rate": 2.7025326842076474e-05, "loss": 1.1254, "step": 84000 }, { "epoch": 4.62, "learning_rate": 2.6888572835184073e-05, "loss": 1.1162, "step": 84500 }, { "epoch": 4.65, "learning_rate": 2.675181882829167e-05, "loss": 1.0954, "step": 85000 }, { "epoch": 4.68, "learning_rate": 2.6615064821399267e-05, "loss": 1.1377, "step": 85500 }, { "epoch": 4.7, "learning_rate": 2.647831081450687e-05, "loss": 1.1012, "step": 86000 }, { "epoch": 4.73, "learning_rate": 2.6341556807614465e-05, "loss": 1.1177, "step": 86500 }, { "epoch": 4.76, "learning_rate": 2.620480280072206e-05, "loss": 1.1125, "step": 87000 }, { "epoch": 4.79, "learning_rate": 2.606804879382966e-05, "loss": 1.1555, "step": 87500 }, { "epoch": 4.81, "learning_rate": 2.593129478693726e-05, "loss": 1.1127, "step": 88000 }, { "epoch": 4.84, "learning_rate": 2.5794540780044857e-05, "loss": 1.1456, "step": 88500 }, { "epoch": 4.87, "learning_rate": 2.5657786773152452e-05, "loss": 1.1249, "step": 89000 }, { "epoch": 4.9, "learning_rate": 2.552103276626005e-05, "loss": 1.1422, "step": 89500 }, { "epoch": 4.92, "learning_rate": 2.5384278759367653e-05, "loss": 1.1422, "step": 90000 }, { "epoch": 4.95, "learning_rate": 2.5247524752475248e-05, "loss": 1.1592, "step": 90500 }, { "epoch": 4.98, "learning_rate": 2.5110770745582847e-05, "loss": 1.1493, "step": 91000 }, { "epoch": 5.01, "learning_rate": 2.4974016738690446e-05, "loss": 1.0908, "step": 91500 }, { "epoch": 5.03, "learning_rate": 2.483726273179804e-05, "loss": 0.9386, "step": 92000 }, { "epoch": 5.06, "learning_rate": 2.470050872490564e-05, "loss": 0.9452, "step": 92500 }, { "epoch": 5.09, "learning_rate": 2.456375471801324e-05, "loss": 0.98, "step": 93000 }, { "epoch": 5.11, "learning_rate": 2.4427000711120838e-05, "loss": 0.9672, "step": 93500 }, { "epoch": 5.14, "learning_rate": 2.4290246704228433e-05, "loss": 0.9567, "step": 94000 }, { "epoch": 5.17, "learning_rate": 2.415349269733603e-05, "loss": 0.9555, "step": 94500 }, { "epoch": 5.2, "learning_rate": 2.401673869044363e-05, "loss": 0.9843, "step": 95000 }, { "epoch": 5.22, "learning_rate": 2.387998468355123e-05, "loss": 0.977, "step": 95500 }, { "epoch": 5.25, "learning_rate": 2.3743230676658828e-05, "loss": 0.9672, "step": 96000 }, { "epoch": 5.28, "learning_rate": 2.3606476669766427e-05, "loss": 0.9683, "step": 96500 }, { "epoch": 5.31, "learning_rate": 2.3469722662874026e-05, "loss": 0.9976, "step": 97000 }, { "epoch": 5.33, "learning_rate": 2.333296865598162e-05, "loss": 0.9912, "step": 97500 }, { "epoch": 5.36, "learning_rate": 2.319621464908922e-05, "loss": 0.9843, "step": 98000 }, { "epoch": 5.39, "learning_rate": 2.305946064219682e-05, "loss": 0.9622, "step": 98500 }, { "epoch": 5.42, "learning_rate": 2.2922706635304417e-05, "loss": 0.9723, "step": 99000 }, { "epoch": 5.44, "learning_rate": 2.2785952628412013e-05, "loss": 0.9783, "step": 99500 }, { "epoch": 5.47, "learning_rate": 2.264919862151961e-05, "loss": 0.956, "step": 100000 }, { "epoch": 5.5, "learning_rate": 2.251244461462721e-05, "loss": 0.9982, "step": 100500 }, { "epoch": 5.52, "learning_rate": 2.237569060773481e-05, "loss": 0.9763, "step": 101000 }, { "epoch": 5.55, "learning_rate": 2.2238936600842404e-05, "loss": 0.9901, "step": 101500 }, { "epoch": 5.58, "learning_rate": 2.2102182593950007e-05, "loss": 0.966, "step": 102000 }, { "epoch": 5.61, "learning_rate": 2.1965428587057602e-05, "loss": 0.9847, "step": 102500 }, { "epoch": 5.63, "learning_rate": 2.18286745801652e-05, "loss": 0.9981, "step": 103000 }, { "epoch": 5.66, "learning_rate": 2.1691920573272796e-05, "loss": 0.9539, "step": 103500 }, { "epoch": 5.69, "learning_rate": 2.1555166566380398e-05, "loss": 0.9897, "step": 104000 }, { "epoch": 5.72, "learning_rate": 2.1418412559487994e-05, "loss": 1.0071, "step": 104500 }, { "epoch": 5.74, "learning_rate": 2.1281658552595592e-05, "loss": 0.9948, "step": 105000 }, { "epoch": 5.77, "learning_rate": 2.114490454570319e-05, "loss": 1.0028, "step": 105500 }, { "epoch": 5.8, "learning_rate": 2.100815053881079e-05, "loss": 1.0115, "step": 106000 }, { "epoch": 5.83, "learning_rate": 2.0871396531918385e-05, "loss": 0.978, "step": 106500 }, { "epoch": 5.85, "learning_rate": 2.0734642525025984e-05, "loss": 0.9922, "step": 107000 }, { "epoch": 5.88, "learning_rate": 2.0597888518133583e-05, "loss": 1.0203, "step": 107500 }, { "epoch": 5.91, "learning_rate": 2.046113451124118e-05, "loss": 1.0288, "step": 108000 }, { "epoch": 5.94, "learning_rate": 2.0324380504348777e-05, "loss": 0.9899, "step": 108500 }, { "epoch": 5.96, "learning_rate": 2.0187626497456376e-05, "loss": 0.9988, "step": 109000 }, { "epoch": 5.99, "learning_rate": 2.0050872490563975e-05, "loss": 1.0026, "step": 109500 }, { "epoch": 6.02, "learning_rate": 1.9914118483671573e-05, "loss": 0.9024, "step": 110000 }, { "epoch": 6.04, "learning_rate": 1.977736447677917e-05, "loss": 0.849, "step": 110500 }, { "epoch": 6.07, "learning_rate": 1.964061046988677e-05, "loss": 0.8861, "step": 111000 }, { "epoch": 6.1, "learning_rate": 1.9503856462994366e-05, "loss": 0.8508, "step": 111500 }, { "epoch": 6.13, "learning_rate": 1.9367102456101965e-05, "loss": 0.8596, "step": 112000 }, { "epoch": 6.15, "learning_rate": 1.923034844920956e-05, "loss": 0.8596, "step": 112500 }, { "epoch": 6.18, "learning_rate": 1.9093594442317163e-05, "loss": 0.8316, "step": 113000 }, { "epoch": 6.21, "learning_rate": 1.8956840435424758e-05, "loss": 0.8432, "step": 113500 }, { "epoch": 6.24, "learning_rate": 1.8820086428532357e-05, "loss": 0.8965, "step": 114000 }, { "epoch": 6.26, "learning_rate": 1.8683332421639956e-05, "loss": 0.8633, "step": 114500 }, { "epoch": 6.29, "learning_rate": 1.8546578414747554e-05, "loss": 0.8741, "step": 115000 }, { "epoch": 6.32, "learning_rate": 1.840982440785515e-05, "loss": 0.859, "step": 115500 }, { "epoch": 6.35, "learning_rate": 1.827307040096275e-05, "loss": 0.8788, "step": 116000 }, { "epoch": 6.37, "learning_rate": 1.8136316394070347e-05, "loss": 0.8782, "step": 116500 }, { "epoch": 6.4, "learning_rate": 1.7999562387177946e-05, "loss": 0.8872, "step": 117000 }, { "epoch": 6.43, "learning_rate": 1.786280838028554e-05, "loss": 0.9024, "step": 117500 }, { "epoch": 6.45, "learning_rate": 1.772605437339314e-05, "loss": 0.8986, "step": 118000 }, { "epoch": 6.48, "learning_rate": 1.758930036650074e-05, "loss": 0.8831, "step": 118500 }, { "epoch": 6.51, "learning_rate": 1.7452546359608338e-05, "loss": 0.8677, "step": 119000 }, { "epoch": 6.54, "learning_rate": 1.7315792352715933e-05, "loss": 0.8984, "step": 119500 }, { "epoch": 6.56, "learning_rate": 1.7179038345823535e-05, "loss": 0.8661, "step": 120000 }, { "epoch": 6.59, "learning_rate": 1.704228433893113e-05, "loss": 0.9107, "step": 120500 }, { "epoch": 6.62, "learning_rate": 1.690553033203873e-05, "loss": 0.9041, "step": 121000 }, { "epoch": 6.65, "learning_rate": 1.6768776325146325e-05, "loss": 0.8869, "step": 121500 }, { "epoch": 6.67, "learning_rate": 1.6632022318253927e-05, "loss": 0.9012, "step": 122000 }, { "epoch": 6.7, "learning_rate": 1.6495268311361522e-05, "loss": 0.8737, "step": 122500 }, { "epoch": 6.73, "learning_rate": 1.635851430446912e-05, "loss": 0.8927, "step": 123000 }, { "epoch": 6.76, "learning_rate": 1.622176029757672e-05, "loss": 0.8836, "step": 123500 }, { "epoch": 6.78, "learning_rate": 1.608500629068432e-05, "loss": 0.8753, "step": 124000 }, { "epoch": 6.81, "learning_rate": 1.5948252283791914e-05, "loss": 0.885, "step": 124500 }, { "epoch": 6.84, "learning_rate": 1.5811498276899513e-05, "loss": 0.8998, "step": 125000 }, { "epoch": 6.87, "learning_rate": 1.567474427000711e-05, "loss": 0.916, "step": 125500 }, { "epoch": 6.89, "learning_rate": 1.553799026311471e-05, "loss": 0.8755, "step": 126000 }, { "epoch": 6.92, "learning_rate": 1.5401236256222306e-05, "loss": 0.8842, "step": 126500 }, { "epoch": 6.95, "learning_rate": 1.5264482249329905e-05, "loss": 0.905, "step": 127000 }, { "epoch": 6.97, "learning_rate": 1.5127728242437505e-05, "loss": 0.9004, "step": 127500 }, { "epoch": 7.0, "learning_rate": 1.4990974235545102e-05, "loss": 0.8866, "step": 128000 }, { "epoch": 7.03, "learning_rate": 1.48542202286527e-05, "loss": 0.7771, "step": 128500 }, { "epoch": 7.06, "learning_rate": 1.4717466221760298e-05, "loss": 0.8103, "step": 129000 }, { "epoch": 7.08, "learning_rate": 1.4580712214867898e-05, "loss": 0.8078, "step": 129500 }, { "epoch": 7.11, "learning_rate": 1.4443958207975494e-05, "loss": 0.7961, "step": 130000 }, { "epoch": 7.14, "learning_rate": 1.4307204201083094e-05, "loss": 0.7902, "step": 130500 }, { "epoch": 7.17, "learning_rate": 1.417045019419069e-05, "loss": 0.8105, "step": 131000 }, { "epoch": 7.19, "learning_rate": 1.403369618729829e-05, "loss": 0.7907, "step": 131500 }, { "epoch": 7.22, "learning_rate": 1.3896942180405887e-05, "loss": 0.792, "step": 132000 }, { "epoch": 7.25, "learning_rate": 1.3760188173513486e-05, "loss": 0.7866, "step": 132500 }, { "epoch": 7.28, "learning_rate": 1.3623434166621083e-05, "loss": 0.8102, "step": 133000 }, { "epoch": 7.3, "learning_rate": 1.3486680159728682e-05, "loss": 0.7984, "step": 133500 }, { "epoch": 7.33, "learning_rate": 1.3349926152836279e-05, "loss": 0.8041, "step": 134000 }, { "epoch": 7.36, "learning_rate": 1.3213172145943878e-05, "loss": 0.8205, "step": 134500 }, { "epoch": 7.38, "learning_rate": 1.3076418139051475e-05, "loss": 0.7662, "step": 135000 }, { "epoch": 7.41, "learning_rate": 1.2939664132159074e-05, "loss": 0.7829, "step": 135500 }, { "epoch": 7.44, "learning_rate": 1.280291012526667e-05, "loss": 0.7997, "step": 136000 }, { "epoch": 7.47, "learning_rate": 1.266615611837427e-05, "loss": 0.8083, "step": 136500 }, { "epoch": 7.49, "learning_rate": 1.2529402111481866e-05, "loss": 0.8018, "step": 137000 }, { "epoch": 7.52, "learning_rate": 1.2392648104589465e-05, "loss": 0.789, "step": 137500 }, { "epoch": 7.55, "learning_rate": 1.2255894097697062e-05, "loss": 0.7978, "step": 138000 }, { "epoch": 7.58, "learning_rate": 1.2119140090804661e-05, "loss": 0.7952, "step": 138500 }, { "epoch": 7.6, "learning_rate": 1.1982386083912258e-05, "loss": 0.7908, "step": 139000 }, { "epoch": 7.63, "learning_rate": 1.1845632077019857e-05, "loss": 0.8163, "step": 139500 }, { "epoch": 7.66, "learning_rate": 1.1708878070127454e-05, "loss": 0.7917, "step": 140000 }, { "epoch": 7.69, "learning_rate": 1.1572124063235053e-05, "loss": 0.7945, "step": 140500 }, { "epoch": 7.71, "learning_rate": 1.1435370056342652e-05, "loss": 0.8289, "step": 141000 }, { "epoch": 7.74, "learning_rate": 1.1298616049450249e-05, "loss": 0.7807, "step": 141500 }, { "epoch": 7.77, "learning_rate": 1.1161862042557847e-05, "loss": 0.8143, "step": 142000 }, { "epoch": 7.79, "learning_rate": 1.1025108035665444e-05, "loss": 0.79, "step": 142500 }, { "epoch": 7.82, "learning_rate": 1.0888354028773043e-05, "loss": 0.7928, "step": 143000 }, { "epoch": 7.85, "learning_rate": 1.0751600021880642e-05, "loss": 0.8083, "step": 143500 }, { "epoch": 7.88, "learning_rate": 1.061484601498824e-05, "loss": 0.8019, "step": 144000 }, { "epoch": 7.9, "learning_rate": 1.0478092008095838e-05, "loss": 0.7891, "step": 144500 }, { "epoch": 7.93, "learning_rate": 1.0341338001203437e-05, "loss": 0.8143, "step": 145000 }, { "epoch": 7.96, "learning_rate": 1.0204583994311034e-05, "loss": 0.82, "step": 145500 }, { "epoch": 7.99, "learning_rate": 1.0067829987418633e-05, "loss": 0.8235, "step": 146000 }, { "epoch": 8.01, "learning_rate": 9.931075980526231e-06, "loss": 0.7471, "step": 146500 }, { "epoch": 8.04, "learning_rate": 9.794321973633828e-06, "loss": 0.7027, "step": 147000 }, { "epoch": 8.07, "learning_rate": 9.657567966741427e-06, "loss": 0.7308, "step": 147500 }, { "epoch": 8.1, "learning_rate": 9.520813959849024e-06, "loss": 0.726, "step": 148000 }, { "epoch": 8.12, "learning_rate": 9.384059952956623e-06, "loss": 0.7165, "step": 148500 }, { "epoch": 8.15, "learning_rate": 9.24730594606422e-06, "loss": 0.7214, "step": 149000 }, { "epoch": 8.18, "learning_rate": 9.110551939171819e-06, "loss": 0.7191, "step": 149500 }, { "epoch": 8.21, "learning_rate": 8.973797932279416e-06, "loss": 0.7499, "step": 150000 }, { "epoch": 8.23, "learning_rate": 8.837043925387015e-06, "loss": 0.7111, "step": 150500 }, { "epoch": 8.26, "learning_rate": 8.700289918494613e-06, "loss": 0.7301, "step": 151000 }, { "epoch": 8.29, "learning_rate": 8.56353591160221e-06, "loss": 0.7363, "step": 151500 }, { "epoch": 8.31, "learning_rate": 8.42678190470981e-06, "loss": 0.7207, "step": 152000 }, { "epoch": 8.34, "learning_rate": 8.290027897817406e-06, "loss": 0.7344, "step": 152500 }, { "epoch": 8.37, "learning_rate": 8.153273890925005e-06, "loss": 0.7097, "step": 153000 }, { "epoch": 8.4, "learning_rate": 8.016519884032602e-06, "loss": 0.7385, "step": 153500 }, { "epoch": 8.42, "learning_rate": 7.879765877140201e-06, "loss": 0.7314, "step": 154000 }, { "epoch": 8.45, "learning_rate": 7.743011870247798e-06, "loss": 0.7595, "step": 154500 }, { "epoch": 8.48, "learning_rate": 7.606257863355397e-06, "loss": 0.7482, "step": 155000 }, { "epoch": 8.51, "learning_rate": 7.469503856462995e-06, "loss": 0.7449, "step": 155500 }, { "epoch": 8.53, "learning_rate": 7.332749849570593e-06, "loss": 0.7366, "step": 156000 }, { "epoch": 8.56, "learning_rate": 7.195995842678191e-06, "loss": 0.7261, "step": 156500 }, { "epoch": 8.59, "learning_rate": 7.059241835785789e-06, "loss": 0.7209, "step": 157000 }, { "epoch": 8.62, "learning_rate": 6.922487828893387e-06, "loss": 0.7439, "step": 157500 }, { "epoch": 8.64, "learning_rate": 6.785733822000985e-06, "loss": 0.7301, "step": 158000 }, { "epoch": 8.67, "learning_rate": 6.648979815108583e-06, "loss": 0.7198, "step": 158500 }, { "epoch": 8.7, "learning_rate": 6.512225808216181e-06, "loss": 0.7509, "step": 159000 }, { "epoch": 8.72, "learning_rate": 6.375471801323779e-06, "loss": 0.749, "step": 159500 }, { "epoch": 8.75, "learning_rate": 6.238717794431377e-06, "loss": 0.7126, "step": 160000 }, { "epoch": 8.78, "learning_rate": 6.101963787538975e-06, "loss": 0.7409, "step": 160500 }, { "epoch": 8.81, "learning_rate": 5.965209780646573e-06, "loss": 0.7593, "step": 161000 }, { "epoch": 8.83, "learning_rate": 5.828455773754171e-06, "loss": 0.7404, "step": 161500 }, { "epoch": 8.86, "learning_rate": 5.6917017668617695e-06, "loss": 0.7502, "step": 162000 }, { "epoch": 8.89, "learning_rate": 5.5549477599693675e-06, "loss": 0.7489, "step": 162500 }, { "epoch": 8.92, "learning_rate": 5.418193753076965e-06, "loss": 0.7303, "step": 163000 }, { "epoch": 8.94, "learning_rate": 5.281439746184563e-06, "loss": 0.7215, "step": 163500 }, { "epoch": 8.97, "learning_rate": 5.144685739292161e-06, "loss": 0.7592, "step": 164000 }, { "epoch": 9.0, "learning_rate": 5.007931732399759e-06, "loss": 0.7516, "step": 164500 }, { "epoch": 9.03, "learning_rate": 4.871177725507357e-06, "loss": 0.6963, "step": 165000 }, { "epoch": 9.05, "learning_rate": 4.734423718614955e-06, "loss": 0.7041, "step": 165500 }, { "epoch": 9.08, "learning_rate": 4.597669711722554e-06, "loss": 0.6961, "step": 166000 }, { "epoch": 9.11, "learning_rate": 4.460915704830152e-06, "loss": 0.6921, "step": 166500 }, { "epoch": 9.14, "learning_rate": 4.32416169793775e-06, "loss": 0.6988, "step": 167000 }, { "epoch": 9.16, "learning_rate": 4.1874076910453484e-06, "loss": 0.676, "step": 167500 }, { "epoch": 9.19, "learning_rate": 4.050653684152946e-06, "loss": 0.6658, "step": 168000 }, { "epoch": 9.22, "learning_rate": 3.913899677260544e-06, "loss": 0.6926, "step": 168500 }, { "epoch": 9.24, "learning_rate": 3.777145670368142e-06, "loss": 0.68, "step": 169000 }, { "epoch": 9.27, "learning_rate": 3.64039166347574e-06, "loss": 0.6911, "step": 169500 }, { "epoch": 9.3, "learning_rate": 3.5036376565833385e-06, "loss": 0.7205, "step": 170000 }, { "epoch": 9.33, "learning_rate": 3.3668836496909364e-06, "loss": 0.711, "step": 170500 }, { "epoch": 9.35, "learning_rate": 3.2301296427985343e-06, "loss": 0.6982, "step": 171000 }, { "epoch": 9.38, "learning_rate": 3.0933756359061323e-06, "loss": 0.677, "step": 171500 }, { "epoch": 9.41, "learning_rate": 2.95662162901373e-06, "loss": 0.6964, "step": 172000 }, { "epoch": 9.44, "learning_rate": 2.8198676221213285e-06, "loss": 0.6738, "step": 172500 }, { "epoch": 9.46, "learning_rate": 2.6831136152289265e-06, "loss": 0.6774, "step": 173000 }, { "epoch": 9.49, "learning_rate": 2.5463596083365244e-06, "loss": 0.6757, "step": 173500 }, { "epoch": 9.52, "learning_rate": 2.4096056014441223e-06, "loss": 0.6856, "step": 174000 }, { "epoch": 9.55, "learning_rate": 2.2728515945517207e-06, "loss": 0.6794, "step": 174500 }, { "epoch": 9.57, "learning_rate": 2.1360975876593186e-06, "loss": 0.6813, "step": 175000 }, { "epoch": 9.6, "learning_rate": 1.9993435807669165e-06, "loss": 0.7041, "step": 175500 }, { "epoch": 9.63, "learning_rate": 1.8625895738745147e-06, "loss": 0.7096, "step": 176000 }, { "epoch": 9.65, "learning_rate": 1.7258355669821126e-06, "loss": 0.6898, "step": 176500 }, { "epoch": 9.68, "learning_rate": 1.5890815600897107e-06, "loss": 0.685, "step": 177000 }, { "epoch": 9.71, "learning_rate": 1.4523275531973089e-06, "loss": 0.7295, "step": 177500 }, { "epoch": 9.74, "learning_rate": 1.3155735463049068e-06, "loss": 0.6815, "step": 178000 }, { "epoch": 9.76, "learning_rate": 1.178819539412505e-06, "loss": 0.6813, "step": 178500 }, { "epoch": 9.79, "learning_rate": 1.0420655325201029e-06, "loss": 0.6895, "step": 179000 }, { "epoch": 9.82, "learning_rate": 9.053115256277009e-07, "loss": 0.6738, "step": 179500 }, { "epoch": 9.85, "learning_rate": 7.685575187352989e-07, "loss": 0.6672, "step": 180000 }, { "epoch": 9.87, "learning_rate": 6.318035118428971e-07, "loss": 0.7159, "step": 180500 }, { "epoch": 9.9, "learning_rate": 4.950495049504951e-07, "loss": 0.6866, "step": 181000 }, { "epoch": 9.93, "learning_rate": 3.582954980580931e-07, "loss": 0.6515, "step": 181500 }, { "epoch": 9.96, "learning_rate": 2.2154149116569118e-07, "loss": 0.6941, "step": 182000 }, { "epoch": 9.98, "learning_rate": 8.478748427328921e-08, "loss": 0.6977, "step": 182500 }, { "epoch": 10.0, "step": 182810, "total_flos": 1.6977577396666368e+17, "train_loss": 1.1540878025462047, "train_runtime": 36258.7506, "train_samples_per_second": 5.042, "train_steps_per_second": 5.042 } ], "max_steps": 182810, "num_train_epochs": 10, "total_flos": 1.6977577396666368e+17, "trial_name": null, "trial_params": null }