|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9257570970799296, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.939817043813193e-05, |
|
"loss": 3.7959, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.8796340876263846e-05, |
|
"loss": 2.4163, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8194511314395764e-05, |
|
"loss": 2.2765, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.759268175252768e-05, |
|
"loss": 2.1919, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.699085219065961e-05, |
|
"loss": 2.1446, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.6389022628791526e-05, |
|
"loss": 2.099, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.578719306692345e-05, |
|
"loss": 2.0678, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.518536350505537e-05, |
|
"loss": 2.0363, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.458353394318729e-05, |
|
"loss": 2.0215, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.3981704381319214e-05, |
|
"loss": 1.9806, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.337987481945113e-05, |
|
"loss": 1.9762, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.277804525758306e-05, |
|
"loss": 1.9529, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.2176215695714976e-05, |
|
"loss": 1.9387, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.15743861338469e-05, |
|
"loss": 1.9218, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.097255657197881e-05, |
|
"loss": 1.9309, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.037072701011074e-05, |
|
"loss": 1.9178, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.9768897448242656e-05, |
|
"loss": 1.8922, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.916706788637458e-05, |
|
"loss": 1.8956, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.85652383245065e-05, |
|
"loss": 1.8586, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.7963408762638425e-05, |
|
"loss": 1.8704, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.736157920077034e-05, |
|
"loss": 1.8613, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.675974963890226e-05, |
|
"loss": 1.8462, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.615792007703419e-05, |
|
"loss": 1.8478, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.5556090515166105e-05, |
|
"loss": 1.8343, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.495426095329803e-05, |
|
"loss": 1.8253, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.435243139142995e-05, |
|
"loss": 1.8151, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.375060182956187e-05, |
|
"loss": 1.8214, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.314877226769379e-05, |
|
"loss": 1.8282, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.254694270582571e-05, |
|
"loss": 1.8126, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.1945113143957636e-05, |
|
"loss": 1.8166, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.1343283582089554e-05, |
|
"loss": 1.7904, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.074145402022148e-05, |
|
"loss": 1.8105, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.0139624458353395e-05, |
|
"loss": 1.783, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.9537794896485316e-05, |
|
"loss": 1.79, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.8935965334617238e-05, |
|
"loss": 1.7718, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.833413577274916e-05, |
|
"loss": 1.7597, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.7732306210881082e-05, |
|
"loss": 1.7698, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7130476649013004e-05, |
|
"loss": 1.7491, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.652864708714492e-05, |
|
"loss": 1.7619, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.592681752527684e-05, |
|
"loss": 1.7624, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.5324987963408762e-05, |
|
"loss": 1.7416, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.5835336446762085, |
|
"eval_runtime": 4.4301, |
|
"eval_samples_per_second": 225.726, |
|
"eval_steps_per_second": 28.216, |
|
"step": 4154 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.4723158401540687e-05, |
|
"loss": 1.7607, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.4121328839672606e-05, |
|
"loss": 1.7205, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.3519499277804528e-05, |
|
"loss": 1.7291, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.291766971593645e-05, |
|
"loss": 1.7262, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2315840154068368e-05, |
|
"loss": 1.735, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.171401059220029e-05, |
|
"loss": 1.7306, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.111218103033221e-05, |
|
"loss": 1.7141, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.051035146846413e-05, |
|
"loss": 1.7185, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.990852190659605e-05, |
|
"loss": 1.7133, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.9306692344727973e-05, |
|
"loss": 1.7126, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.8704862782859895e-05, |
|
"loss": 1.71, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.8103033220991817e-05, |
|
"loss": 1.7059, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.750120365912374e-05, |
|
"loss": 1.7092, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.6899374097255657e-05, |
|
"loss": 1.7026, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.629754453538758e-05, |
|
"loss": 1.7148, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.56957149735195e-05, |
|
"loss": 1.7137, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.5093885411651421e-05, |
|
"loss": 1.7005, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.4492055849783343e-05, |
|
"loss": 1.7074, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.3890226287915261e-05, |
|
"loss": 1.6943, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.3288396726047183e-05, |
|
"loss": 1.6917, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.2686567164179105e-05, |
|
"loss": 1.6953, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.2084737602311027e-05, |
|
"loss": 1.6869, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.1482908040442947e-05, |
|
"loss": 1.6904, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.0881078478574869e-05, |
|
"loss": 1.6988, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.0279248916706789e-05, |
|
"loss": 1.6942, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.67741935483871e-06, |
|
"loss": 1.6887, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.075589792970632e-06, |
|
"loss": 1.6858, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.473760231102552e-06, |
|
"loss": 1.6848, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.871930669234472e-06, |
|
"loss": 1.6951, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.270101107366393e-06, |
|
"loss": 1.6921, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.668271545498315e-06, |
|
"loss": 1.6801, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.066441983630236e-06, |
|
"loss": 1.6934, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.464612421762157e-06, |
|
"loss": 1.6914, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.862782859894078e-06, |
|
"loss": 1.6872, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.260953298025999e-06, |
|
"loss": 1.6799, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.6591237361579204e-06, |
|
"loss": 1.6832, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.0572941742898413e-06, |
|
"loss": 1.6902, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.4554646124217623e-06, |
|
"loss": 1.6805, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8536350505536832e-06, |
|
"loss": 1.676, |
|
"step": 8000 |
|
} |
|
], |
|
"max_steps": 8308, |
|
"num_train_epochs": 2, |
|
"total_flos": 5.801913298450115e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|