|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.95603827256271, |
|
"global_step": 38500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9741401603310062e-05, |
|
"loss": 1.9914, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.948280320662012e-05, |
|
"loss": 1.606, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.922420480993018e-05, |
|
"loss": 1.4903, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.896560641324024e-05, |
|
"loss": 1.4065, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.87070080165503e-05, |
|
"loss": 1.3202, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.844840961986036e-05, |
|
"loss": 1.288, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8189811223170417e-05, |
|
"loss": 1.2592, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.1307439804077148, |
|
"eval_runtime": 12.893, |
|
"eval_samples_per_second": 1193.128, |
|
"eval_steps_per_second": 149.151, |
|
"step": 3867 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.7931212826480477e-05, |
|
"loss": 1.2107, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.7672614429790537e-05, |
|
"loss": 1.1851, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.7414016033100598e-05, |
|
"loss": 1.1513, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.7155417636410658e-05, |
|
"loss": 1.1181, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.6896819239720715e-05, |
|
"loss": 1.0948, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.6638220843030775e-05, |
|
"loss": 1.0955, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.6379622446340835e-05, |
|
"loss": 1.0562, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.6121024049650892e-05, |
|
"loss": 1.0578, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.9538503885269165, |
|
"eval_runtime": 12.8064, |
|
"eval_samples_per_second": 1201.199, |
|
"eval_steps_per_second": 150.16, |
|
"step": 7734 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5862425652960952e-05, |
|
"loss": 1.0484, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.5603827256271013e-05, |
|
"loss": 1.0226, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.5345228859581073e-05, |
|
"loss": 1.0189, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.5086630462891131e-05, |
|
"loss": 0.9812, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.482803206620119e-05, |
|
"loss": 0.9543, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.456943366951125e-05, |
|
"loss": 0.9917, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.431083527282131e-05, |
|
"loss": 0.9423, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.4052236876131369e-05, |
|
"loss": 0.9628, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.8621994256973267, |
|
"eval_runtime": 12.1534, |
|
"eval_samples_per_second": 1265.738, |
|
"eval_steps_per_second": 158.228, |
|
"step": 11601 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.379363847944143e-05, |
|
"loss": 0.9401, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.3535040082751488e-05, |
|
"loss": 0.9208, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.3276441686061548e-05, |
|
"loss": 0.9068, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.3017843289371609e-05, |
|
"loss": 0.9114, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.2759244892681665e-05, |
|
"loss": 0.9146, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.2500646495991726e-05, |
|
"loss": 0.8964, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 1.2242048099301784e-05, |
|
"loss": 0.9084, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.8124715685844421, |
|
"eval_runtime": 12.1415, |
|
"eval_samples_per_second": 1266.974, |
|
"eval_steps_per_second": 158.382, |
|
"step": 15468 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 1.1983449702611844e-05, |
|
"loss": 0.8739, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.1724851305921905e-05, |
|
"loss": 0.8625, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.1466252909231963e-05, |
|
"loss": 0.8507, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.1207654512542024e-05, |
|
"loss": 0.8759, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.0949056115852084e-05, |
|
"loss": 0.8383, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 1.0690457719162142e-05, |
|
"loss": 0.8487, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.0431859322472203e-05, |
|
"loss": 0.8595, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.017326092578226e-05, |
|
"loss": 0.8374, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.7769160270690918, |
|
"eval_runtime": 12.8439, |
|
"eval_samples_per_second": 1197.691, |
|
"eval_steps_per_second": 149.721, |
|
"step": 19335 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 9.91466252909232e-06, |
|
"loss": 0.8352, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 9.65606413240238e-06, |
|
"loss": 0.8663, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 9.397465735712439e-06, |
|
"loss": 0.8169, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 9.138867339022499e-06, |
|
"loss": 0.8187, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 8.880268942332558e-06, |
|
"loss": 0.8153, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 8.621670545642618e-06, |
|
"loss": 0.8133, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 8.363072148952676e-06, |
|
"loss": 0.7901, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 8.104473752262737e-06, |
|
"loss": 0.8139, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.7311471104621887, |
|
"eval_runtime": 12.8348, |
|
"eval_samples_per_second": 1198.535, |
|
"eval_steps_per_second": 149.827, |
|
"step": 23202 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 7.845875355572797e-06, |
|
"loss": 0.7811, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 7.5872769588828555e-06, |
|
"loss": 0.8063, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 7.328678562192915e-06, |
|
"loss": 0.785, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 7.070080165502975e-06, |
|
"loss": 0.7909, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 6.811481768813034e-06, |
|
"loss": 0.7727, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 6.552883372123093e-06, |
|
"loss": 0.7779, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 6.294284975433153e-06, |
|
"loss": 0.7627, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 6.035686578743212e-06, |
|
"loss": 0.7577, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.6985681653022766, |
|
"eval_runtime": 12.1148, |
|
"eval_samples_per_second": 1269.772, |
|
"eval_steps_per_second": 158.732, |
|
"step": 27069 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 5.777088182053272e-06, |
|
"loss": 0.7761, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 5.518489785363331e-06, |
|
"loss": 0.7782, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 5.25989138867339e-06, |
|
"loss": 0.7335, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 5.00129299198345e-06, |
|
"loss": 0.7656, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 4.742694595293509e-06, |
|
"loss": 0.761, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 4.484096198603569e-06, |
|
"loss": 0.7517, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 4.225497801913628e-06, |
|
"loss": 0.75, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.6888388991355896, |
|
"eval_runtime": 12.1237, |
|
"eval_samples_per_second": 1268.839, |
|
"eval_steps_per_second": 158.615, |
|
"step": 30936 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.966899405223688e-06, |
|
"loss": 0.7545, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 3.708301008533747e-06, |
|
"loss": 0.723, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 3.449702611843807e-06, |
|
"loss": 0.7352, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 3.191104215153866e-06, |
|
"loss": 0.7253, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 2.932505818463926e-06, |
|
"loss": 0.7248, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 2.6739074217739853e-06, |
|
"loss": 0.7371, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 2.4153090250840447e-06, |
|
"loss": 0.7284, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 2.156710628394104e-06, |
|
"loss": 0.7205, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.6802728772163391, |
|
"eval_runtime": 12.1115, |
|
"eval_samples_per_second": 1270.111, |
|
"eval_steps_per_second": 158.774, |
|
"step": 34803 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 1.8981122317041636e-06, |
|
"loss": 0.7059, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 1.6395138350142232e-06, |
|
"loss": 0.7321, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 1.3809154383242826e-06, |
|
"loss": 0.7392, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 1.122317041634342e-06, |
|
"loss": 0.7314, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 8.637186449444015e-07, |
|
"loss": 0.73, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 6.051202482544609e-07, |
|
"loss": 0.7199, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 3.465218515645203e-07, |
|
"loss": 0.7258, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 8.792345487457979e-08, |
|
"loss": 0.7562, |
|
"step": 38500 |
|
} |
|
], |
|
"max_steps": 38670, |
|
"num_train_epochs": 10, |
|
"total_flos": 1994099670728550.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|