|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 57385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.945543260433913e-06, |
|
"loss": 3.7697, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.891086520867823e-06, |
|
"loss": 2.9544, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.836629781301735e-06, |
|
"loss": 2.4309, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.782173041735646e-06, |
|
"loss": 2.1416, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.727716302169558e-06, |
|
"loss": 2.0346, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.673259562603468e-06, |
|
"loss": 1.9859, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.61880282303738e-06, |
|
"loss": 1.9512, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.56434608347129e-06, |
|
"loss": 1.9171, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.509889343905202e-06, |
|
"loss": 1.8989, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.455432604339114e-06, |
|
"loss": 1.868, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.400975864773026e-06, |
|
"loss": 1.8423, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.346519125206936e-06, |
|
"loss": 1.8311, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.292062385640848e-06, |
|
"loss": 1.8139, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.237605646074758e-06, |
|
"loss": 1.809, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.18314890650867e-06, |
|
"loss": 1.7932, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.128692166942582e-06, |
|
"loss": 1.7807, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.074235427376494e-06, |
|
"loss": 1.7729, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.019778687810404e-06, |
|
"loss": 1.7695, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.965321948244316e-06, |
|
"loss": 1.7464, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.910865208678226e-06, |
|
"loss": 1.7436, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.856408469112138e-06, |
|
"loss": 1.736, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.80195172954605e-06, |
|
"loss": 1.729, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.7001079320907593, |
|
"eval_runtime": 3.4215, |
|
"eval_samples_per_second": 39.748, |
|
"eval_steps_per_second": 4.969, |
|
"step": 11477 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.747494989979961e-06, |
|
"loss": 1.7264, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.693038250413871e-06, |
|
"loss": 1.708, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.638581510847783e-06, |
|
"loss": 1.6948, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.584124771281695e-06, |
|
"loss": 1.698, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.529668031715605e-06, |
|
"loss": 1.692, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.475211292149517e-06, |
|
"loss": 1.684, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.420754552583429e-06, |
|
"loss": 1.6879, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.36629781301734e-06, |
|
"loss": 1.6804, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.311841073451251e-06, |
|
"loss": 1.6713, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.257384333885163e-06, |
|
"loss": 1.6703, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.202927594319073e-06, |
|
"loss": 1.6646, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 8.148470854752985e-06, |
|
"loss": 1.651, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 8.094014115186897e-06, |
|
"loss": 1.6488, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 8.039557375620808e-06, |
|
"loss": 1.6452, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 7.985100636054719e-06, |
|
"loss": 1.6386, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 7.93064389648863e-06, |
|
"loss": 1.6349, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 7.87618715692254e-06, |
|
"loss": 1.6345, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.821730417356452e-06, |
|
"loss": 1.6294, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.767273677790364e-06, |
|
"loss": 1.631, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.712816938224276e-06, |
|
"loss": 1.6261, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.658360198658186e-06, |
|
"loss": 1.6281, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.603903459092098e-06, |
|
"loss": 1.611, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 7.549446719526009e-06, |
|
"loss": 1.6155, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.596663475036621, |
|
"eval_runtime": 3.4296, |
|
"eval_samples_per_second": 39.655, |
|
"eval_steps_per_second": 4.957, |
|
"step": 22954 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 7.49498997995992e-06, |
|
"loss": 1.6029, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 7.440533240393831e-06, |
|
"loss": 1.607, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 7.386076500827744e-06, |
|
"loss": 1.5977, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 7.331619761261655e-06, |
|
"loss": 1.5922, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 7.277163021695566e-06, |
|
"loss": 1.5956, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.222706282129477e-06, |
|
"loss": 1.5855, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.168249542563388e-06, |
|
"loss": 1.5826, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 7.1137928029972995e-06, |
|
"loss": 1.5846, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 7.059336063431211e-06, |
|
"loss": 1.5899, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 7.004879323865122e-06, |
|
"loss": 1.5828, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 6.950422584299033e-06, |
|
"loss": 1.5762, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 6.895965844732945e-06, |
|
"loss": 1.5739, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.841509105166856e-06, |
|
"loss": 1.574, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 6.787052365600767e-06, |
|
"loss": 1.5759, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.732595626034678e-06, |
|
"loss": 1.5737, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.67813888646859e-06, |
|
"loss": 1.5637, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.623682146902502e-06, |
|
"loss": 1.5635, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 6.569225407336413e-06, |
|
"loss": 1.5641, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.514768667770324e-06, |
|
"loss": 1.553, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 6.460311928204235e-06, |
|
"loss": 1.5699, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 6.405855188638146e-06, |
|
"loss": 1.5695, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 6.3513984490720584e-06, |
|
"loss": 1.5665, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.296941709505969e-06, |
|
"loss": 1.5527, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.5436657667160034, |
|
"eval_runtime": 3.2624, |
|
"eval_samples_per_second": 41.687, |
|
"eval_steps_per_second": 5.211, |
|
"step": 34431 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 6.24248496993988e-06, |
|
"loss": 1.5506, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 6.188028230373791e-06, |
|
"loss": 1.559, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 6.133571490807702e-06, |
|
"loss": 1.5388, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 6.079114751241613e-06, |
|
"loss": 1.5467, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 6.024658011675526e-06, |
|
"loss": 1.5391, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 5.970201272109437e-06, |
|
"loss": 1.5364, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 5.915744532543348e-06, |
|
"loss": 1.5376, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 5.861287792977259e-06, |
|
"loss": 1.5397, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 5.806831053411171e-06, |
|
"loss": 1.5336, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 5.752374313845082e-06, |
|
"loss": 1.5378, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 5.697917574278993e-06, |
|
"loss": 1.5318, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 5.643460834712905e-06, |
|
"loss": 1.5252, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 5.589004095146816e-06, |
|
"loss": 1.5333, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 5.5345473555807275e-06, |
|
"loss": 1.5299, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 5.4800906160146385e-06, |
|
"loss": 1.5215, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 5.4256338764485495e-06, |
|
"loss": 1.52, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 5.3711771368824605e-06, |
|
"loss": 1.5258, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 5.316720397316373e-06, |
|
"loss": 1.5256, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 5.262263657750284e-06, |
|
"loss": 1.5205, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 5.207806918184195e-06, |
|
"loss": 1.5236, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 5.153350178618106e-06, |
|
"loss": 1.5281, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 5.098893439052017e-06, |
|
"loss": 1.5175, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 5.044436699485928e-06, |
|
"loss": 1.5215, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.5107132196426392, |
|
"eval_runtime": 3.2672, |
|
"eval_samples_per_second": 41.625, |
|
"eval_steps_per_second": 5.203, |
|
"step": 45908 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.98997995991984e-06, |
|
"loss": 1.5202, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.935523220353751e-06, |
|
"loss": 1.5136, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.881066480787663e-06, |
|
"loss": 1.5119, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 4.826609741221574e-06, |
|
"loss": 1.5052, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 4.772153001655485e-06, |
|
"loss": 1.5088, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 4.717696262089397e-06, |
|
"loss": 1.5078, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 4.663239522523308e-06, |
|
"loss": 1.5099, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 4.608782782957219e-06, |
|
"loss": 1.5098, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 4.5543260433911305e-06, |
|
"loss": 1.5044, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 4.4998693038250415e-06, |
|
"loss": 1.5049, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 4.445412564258953e-06, |
|
"loss": 1.4958, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 4.390955824692864e-06, |
|
"loss": 1.5073, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 4.336499085126776e-06, |
|
"loss": 1.5015, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 4.282042345560687e-06, |
|
"loss": 1.5022, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 4.227585605994598e-06, |
|
"loss": 1.4973, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 4.17312886642851e-06, |
|
"loss": 1.4944, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 4.118672126862421e-06, |
|
"loss": 1.5024, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 4.064215387296332e-06, |
|
"loss": 1.4981, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 4.009758647730244e-06, |
|
"loss": 1.4942, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 3.955301908164155e-06, |
|
"loss": 1.4964, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 3.900845168598066e-06, |
|
"loss": 1.4995, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 3.846388429031978e-06, |
|
"loss": 1.4959, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 3.7919316894658886e-06, |
|
"loss": 1.4922, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.4904537200927734, |
|
"eval_runtime": 3.2628, |
|
"eval_samples_per_second": 41.682, |
|
"eval_steps_per_second": 5.21, |
|
"step": 57385 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 91816, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 8.99595804672e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|