res_nw_irq_1.5 / trainer_state.json
nlparabic's picture
End of training
12d7391 verified
{
"best_metric": 1.1622098684310913,
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_irq_1.5/checkpoint-5285",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 10570,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.9448983073234558,
"learning_rate": 4.865067829457365e-05,
"loss": 2.1843,
"step": 1057
},
{
"epoch": 1.0,
"eval_bleu": 0.32059447298359617,
"eval_loss": 1.250288486480713,
"eval_rouge1": 0.4317992213146498,
"eval_rouge2": 0.17666011159565037,
"eval_rougeL": 0.42939906088943486,
"eval_runtime": 57.4393,
"eval_samples_per_second": 36.787,
"eval_steps_per_second": 4.614,
"step": 1057
},
{
"epoch": 2.0,
"grad_norm": 0.9019042253494263,
"learning_rate": 4.609011627906977e-05,
"loss": 0.6485,
"step": 2114
},
{
"epoch": 2.0,
"eval_bleu": 0.33426327412457646,
"eval_loss": 1.1708217859268188,
"eval_rouge1": 0.467300193070487,
"eval_rouge2": 0.20644532227230164,
"eval_rougeL": 0.4650745009842402,
"eval_runtime": 57.7779,
"eval_samples_per_second": 36.571,
"eval_steps_per_second": 4.587,
"step": 2114
},
{
"epoch": 3.0,
"grad_norm": 0.9029839038848877,
"learning_rate": 4.3529554263565894e-05,
"loss": 0.5688,
"step": 3171
},
{
"epoch": 3.0,
"eval_bleu": 0.34132680263228726,
"eval_loss": 1.1971228122711182,
"eval_rouge1": 0.48735678632841767,
"eval_rouge2": 0.2243137926840742,
"eval_rougeL": 0.4850815732515227,
"eval_runtime": 57.4731,
"eval_samples_per_second": 36.765,
"eval_steps_per_second": 4.611,
"step": 3171
},
{
"epoch": 4.0,
"grad_norm": 1.0017938613891602,
"learning_rate": 4.096899224806201e-05,
"loss": 0.5047,
"step": 4228
},
{
"epoch": 4.0,
"eval_bleu": 0.35003996586498176,
"eval_loss": 1.197810173034668,
"eval_rouge1": 0.5054128473867857,
"eval_rouge2": 0.2430653416823758,
"eval_rougeL": 0.5030122857034254,
"eval_runtime": 72.8161,
"eval_samples_per_second": 29.018,
"eval_steps_per_second": 3.639,
"step": 4228
},
{
"epoch": 5.0,
"grad_norm": 1.0304298400878906,
"learning_rate": 3.840843023255814e-05,
"loss": 0.4482,
"step": 5285
},
{
"epoch": 5.0,
"eval_bleu": 0.3502421699944588,
"eval_loss": 1.1622098684310913,
"eval_rouge1": 0.5096445443599783,
"eval_rouge2": 0.24692932072035692,
"eval_rougeL": 0.5075533735060334,
"eval_runtime": 61.0964,
"eval_samples_per_second": 34.585,
"eval_steps_per_second": 4.337,
"step": 5285
},
{
"epoch": 6.0,
"grad_norm": 1.1523360013961792,
"learning_rate": 3.5847868217054265e-05,
"loss": 0.3971,
"step": 6342
},
{
"epoch": 6.0,
"eval_bleu": 0.3536202464127507,
"eval_loss": 1.2058446407318115,
"eval_rouge1": 0.5150055128188905,
"eval_rouge2": 0.25352326254776286,
"eval_rougeL": 0.5127629511333613,
"eval_runtime": 57.6879,
"eval_samples_per_second": 36.628,
"eval_steps_per_second": 4.594,
"step": 6342
},
{
"epoch": 7.0,
"grad_norm": 0.8643929958343506,
"learning_rate": 3.328730620155039e-05,
"loss": 0.3513,
"step": 7399
},
{
"epoch": 7.0,
"eval_bleu": 0.3518598607895297,
"eval_loss": 1.2158491611480713,
"eval_rouge1": 0.5180154682612823,
"eval_rouge2": 0.25664229580394476,
"eval_rougeL": 0.5154649421568362,
"eval_runtime": 121.1214,
"eval_samples_per_second": 17.445,
"eval_steps_per_second": 2.188,
"step": 7399
},
{
"epoch": 8.0,
"grad_norm": 1.1156113147735596,
"learning_rate": 3.0726744186046517e-05,
"loss": 0.3105,
"step": 8456
},
{
"epoch": 8.0,
"eval_bleu": 0.35342870570348306,
"eval_loss": 1.2344255447387695,
"eval_rouge1": 0.5225097884447725,
"eval_rouge2": 0.2629872301742399,
"eval_rougeL": 0.5202162260924711,
"eval_runtime": 57.6564,
"eval_samples_per_second": 36.648,
"eval_steps_per_second": 4.596,
"step": 8456
},
{
"epoch": 9.0,
"grad_norm": 1.2186470031738281,
"learning_rate": 2.816618217054264e-05,
"loss": 0.2743,
"step": 9513
},
{
"epoch": 9.0,
"eval_bleu": 0.35543765162196594,
"eval_loss": 1.2988064289093018,
"eval_rouge1": 0.5277394207696515,
"eval_rouge2": 0.26929133120484805,
"eval_rougeL": 0.5249418411792439,
"eval_runtime": 57.8354,
"eval_samples_per_second": 36.535,
"eval_steps_per_second": 4.582,
"step": 9513
},
{
"epoch": 10.0,
"grad_norm": 1.028747797012329,
"learning_rate": 2.560562015503876e-05,
"loss": 0.2426,
"step": 10570
},
{
"epoch": 10.0,
"eval_bleu": 0.3550745821213054,
"eval_loss": 1.3222092390060425,
"eval_rouge1": 0.5235234737619403,
"eval_rouge2": 0.26498166415837476,
"eval_rougeL": 0.5206776898832342,
"eval_runtime": 57.289,
"eval_samples_per_second": 36.883,
"eval_steps_per_second": 4.626,
"step": 10570
},
{
"epoch": 10.0,
"step": 10570,
"total_flos": 9.5780710219776e+16,
"train_loss": 0.5930478791640181,
"train_runtime": 28339.5203,
"train_samples_per_second": 5.965,
"train_steps_per_second": 0.746
}
],
"logging_steps": 500,
"max_steps": 21140,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.5780710219776e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}