|
{ |
|
"best_metric": 70.80451562424408, |
|
"best_model_checkpoint": "/root/turkic_qa/en_kaz_models/en_kaz_xlm_roberta_base_squad_model/checkpoint-3260", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 6520, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 652, |
|
"train_exact_match": 54.645354645354644, |
|
"train_f1": 71.43920046551005, |
|
"train_runtime": 13.6364, |
|
"train_samples_per_second": 89.98, |
|
"train_steps_per_second": 3.227 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 42.09185028076172, |
|
"learning_rate": 5e-06, |
|
"loss": 1.6894, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 51.0, |
|
"eval_f1": 66.75535596211667, |
|
"eval_runtime": 42.9083, |
|
"eval_samples_per_second": 89.913, |
|
"eval_steps_per_second": 3.216, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1304, |
|
"train_exact_match": 60.03996003996004, |
|
"train_f1": 74.97586392957278, |
|
"train_runtime": 13.9152, |
|
"train_samples_per_second": 89.183, |
|
"train_steps_per_second": 3.234 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 25.847925186157227, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3571, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 54.0625, |
|
"eval_f1": 69.25389311582437, |
|
"eval_runtime": 43.0882, |
|
"eval_samples_per_second": 89.537, |
|
"eval_steps_per_second": 3.203, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1956, |
|
"train_exact_match": 68.23176823176823, |
|
"train_f1": 81.76253341059837, |
|
"train_runtime": 13.5561, |
|
"train_samples_per_second": 88.964, |
|
"train_steps_per_second": 3.246 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 34.729644775390625, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 1.1643, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 54.9375, |
|
"eval_f1": 69.89098890413919, |
|
"eval_runtime": 43.0049, |
|
"eval_samples_per_second": 89.711, |
|
"eval_steps_per_second": 3.209, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2608, |
|
"train_exact_match": 71.82817182817183, |
|
"train_f1": 85.225535613071, |
|
"train_runtime": 14.2911, |
|
"train_samples_per_second": 88.167, |
|
"train_steps_per_second": 3.149 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 33.257789611816406, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.9703, |
|
"step": 2608 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 56.1875, |
|
"eval_f1": 70.69884423902388, |
|
"eval_runtime": 43.7869, |
|
"eval_samples_per_second": 88.109, |
|
"eval_steps_per_second": 3.152, |
|
"step": 2608 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3260, |
|
"train_exact_match": 76.42357642357642, |
|
"train_f1": 88.7083439838053, |
|
"train_runtime": 13.5599, |
|
"train_samples_per_second": 89.234, |
|
"train_steps_per_second": 3.245 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 29.723169326782227, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.8246, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 56.53125, |
|
"eval_f1": 70.80451562424408, |
|
"eval_runtime": 43.1752, |
|
"eval_samples_per_second": 89.357, |
|
"eval_steps_per_second": 3.196, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 3912, |
|
"train_exact_match": 77.32267732267732, |
|
"train_f1": 89.41850450829368, |
|
"train_runtime": 13.3805, |
|
"train_samples_per_second": 87.964, |
|
"train_steps_per_second": 3.214 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 43.614688873291016, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7106, |
|
"step": 3912 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 56.28125, |
|
"eval_f1": 70.69128089372614, |
|
"eval_runtime": 43.3271, |
|
"eval_samples_per_second": 89.044, |
|
"eval_steps_per_second": 3.185, |
|
"step": 3912 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 4564, |
|
"train_exact_match": 80.31968031968032, |
|
"train_f1": 90.9932970975269, |
|
"train_runtime": 13.7154, |
|
"train_samples_per_second": 88.222, |
|
"train_steps_per_second": 3.208 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 16.68464469909668, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.6206, |
|
"step": 4564 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 56.28125, |
|
"eval_f1": 70.50206404433744, |
|
"eval_runtime": 43.2324, |
|
"eval_samples_per_second": 89.239, |
|
"eval_steps_per_second": 3.192, |
|
"step": 4564 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 5216, |
|
"train_exact_match": 82.41758241758242, |
|
"train_f1": 92.16600808934162, |
|
"train_runtime": 13.6769, |
|
"train_samples_per_second": 88.105, |
|
"train_steps_per_second": 3.217 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 30.79519271850586, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.5603, |
|
"step": 5216 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 56.40625, |
|
"eval_f1": 70.75408379302462, |
|
"eval_runtime": 43.2887, |
|
"eval_samples_per_second": 89.123, |
|
"eval_steps_per_second": 3.188, |
|
"step": 5216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 5868, |
|
"train_exact_match": 83.61638361638362, |
|
"train_f1": 93.40487188804495, |
|
"train_runtime": 13.7502, |
|
"train_samples_per_second": 88.508, |
|
"train_steps_per_second": 3.2 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 45.799346923828125, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.5138, |
|
"step": 5868 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 56.46875, |
|
"eval_f1": 70.54295738591223, |
|
"eval_runtime": 43.2261, |
|
"eval_samples_per_second": 89.252, |
|
"eval_steps_per_second": 3.193, |
|
"step": 5868 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6520, |
|
"train_exact_match": 84.31568431568432, |
|
"train_f1": 93.73109223264476, |
|
"train_runtime": 13.3013, |
|
"train_samples_per_second": 88.563, |
|
"train_steps_per_second": 3.233 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 22.400699615478516, |
|
"learning_rate": 0.0, |
|
"loss": 0.4785, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 56.34375, |
|
"eval_f1": 70.56177522205788, |
|
"eval_runtime": 43.1408, |
|
"eval_samples_per_second": 89.428, |
|
"eval_steps_per_second": 3.199, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6520, |
|
"total_flos": 3.575911440121344e+16, |
|
"train_loss": 0.8889602286684001, |
|
"train_runtime": 4193.6079, |
|
"train_samples_per_second": 43.511, |
|
"train_steps_per_second": 1.555 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6520, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.575911440121344e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|