res_nw_dj_aragpt2-base / trainer_state.json
nlparabic's picture
End of training
e71621c verified
raw
history blame
7.11 kB
{
"best_metric": 0.07523266971111298,
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_dj_aragpt2-base/checkpoint-16074",
"epoch": 11.0,
"eval_steps": 500,
"global_step": 29469,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.1490328013896942,
"learning_rate": 4.7947437829691034e-05,
"loss": 0.435,
"step": 2679
},
{
"epoch": 1.0,
"eval_bleu": 0.022539363481404024,
"eval_loss": 0.08990845084190369,
"eval_rouge1": 0.26967504263175696,
"eval_rouge2": 0.06811111838722575,
"eval_rougeL": 0.2648760311441921,
"eval_runtime": 118.8639,
"eval_samples_per_second": 45.262,
"eval_steps_per_second": 5.662,
"step": 2679
},
{
"epoch": 2.0,
"grad_norm": 0.1779935508966446,
"learning_rate": 4.542388847023361e-05,
"loss": 0.0908,
"step": 5358
},
{
"epoch": 2.0,
"eval_bleu": 0.0494923940085718,
"eval_loss": 0.08216600120067596,
"eval_rouge1": 0.34383088385557475,
"eval_rouge2": 0.12325553058649232,
"eval_rougeL": 0.33940964680072816,
"eval_runtime": 181.022,
"eval_samples_per_second": 29.72,
"eval_steps_per_second": 3.718,
"step": 5358
},
{
"epoch": 3.0,
"grad_norm": 0.13422226905822754,
"learning_rate": 4.290033911077619e-05,
"loss": 0.0808,
"step": 8037
},
{
"epoch": 3.0,
"eval_bleu": 0.06700443598211842,
"eval_loss": 0.0785822868347168,
"eval_rouge1": 0.38347464472946946,
"eval_rouge2": 0.15815766995910013,
"eval_rougeL": 0.37897410206784377,
"eval_runtime": 118.7728,
"eval_samples_per_second": 45.297,
"eval_steps_per_second": 5.666,
"step": 8037
},
{
"epoch": 4.0,
"grad_norm": 0.17974382638931274,
"learning_rate": 4.0376789751318766e-05,
"loss": 0.0738,
"step": 10716
},
{
"epoch": 4.0,
"eval_bleu": 0.0782017656533744,
"eval_loss": 0.07652640342712402,
"eval_rouge1": 0.4066218577182069,
"eval_rouge2": 0.1797711825940213,
"eval_rougeL": 0.4024533186612807,
"eval_runtime": 302.5488,
"eval_samples_per_second": 17.782,
"eval_steps_per_second": 2.224,
"step": 10716
},
{
"epoch": 5.0,
"grad_norm": 0.1600717157125473,
"learning_rate": 3.785324039186134e-05,
"loss": 0.0681,
"step": 13395
},
{
"epoch": 5.0,
"eval_bleu": 0.088003678878398,
"eval_loss": 0.07556667178869247,
"eval_rouge1": 0.42424169752606233,
"eval_rouge2": 0.19635936454770994,
"eval_rougeL": 0.42040996405014475,
"eval_runtime": 119.8742,
"eval_samples_per_second": 44.88,
"eval_steps_per_second": 5.614,
"step": 13395
},
{
"epoch": 6.0,
"grad_norm": 0.17717573046684265,
"learning_rate": 3.532969103240392e-05,
"loss": 0.0632,
"step": 16074
},
{
"epoch": 6.0,
"eval_bleu": 0.09284298963391195,
"eval_loss": 0.07523266971111298,
"eval_rouge1": 0.434313103224844,
"eval_rouge2": 0.2043067195259623,
"eval_rougeL": 0.430449964990971,
"eval_runtime": 119.9224,
"eval_samples_per_second": 44.862,
"eval_steps_per_second": 5.612,
"step": 16074
},
{
"epoch": 7.0,
"grad_norm": 0.20939625799655914,
"learning_rate": 3.28061416729465e-05,
"loss": 0.059,
"step": 18753
},
{
"epoch": 7.0,
"eval_bleu": 0.09955364884197439,
"eval_loss": 0.07554977387189865,
"eval_rouge1": 0.4438832318000667,
"eval_rouge2": 0.21524219506914244,
"eval_rougeL": 0.44011453518152965,
"eval_runtime": 119.8903,
"eval_samples_per_second": 44.874,
"eval_steps_per_second": 5.613,
"step": 18753
},
{
"epoch": 8.0,
"grad_norm": 0.18116699159145355,
"learning_rate": 3.028259231348907e-05,
"loss": 0.0552,
"step": 21432
},
{
"epoch": 8.0,
"eval_bleu": 0.10150066662504155,
"eval_loss": 0.07605580985546112,
"eval_rouge1": 0.45000685735983936,
"eval_rouge2": 0.2217308384368585,
"eval_rougeL": 0.44629486754481273,
"eval_runtime": 180.8168,
"eval_samples_per_second": 29.754,
"eval_steps_per_second": 3.722,
"step": 21432
},
{
"epoch": 9.0,
"grad_norm": 0.20604003965854645,
"learning_rate": 2.775904295403165e-05,
"loss": 0.0517,
"step": 24111
},
{
"epoch": 9.0,
"eval_bleu": 0.10504101584036675,
"eval_loss": 0.07660207897424698,
"eval_rouge1": 0.4527224851378999,
"eval_rouge2": 0.2249827236829457,
"eval_rougeL": 0.44886876248725205,
"eval_runtime": 241.7568,
"eval_samples_per_second": 22.254,
"eval_steps_per_second": 2.784,
"step": 24111
},
{
"epoch": 10.0,
"grad_norm": 0.17469635605812073,
"learning_rate": 2.523549359457423e-05,
"loss": 0.0486,
"step": 26790
},
{
"epoch": 10.0,
"eval_bleu": 0.10926649986209491,
"eval_loss": 0.07839509844779968,
"eval_rouge1": 0.4612349211854463,
"eval_rouge2": 0.23378642120255436,
"eval_rougeL": 0.457790117200362,
"eval_runtime": 180.2876,
"eval_samples_per_second": 29.841,
"eval_steps_per_second": 3.733,
"step": 26790
},
{
"epoch": 11.0,
"grad_norm": 0.19318881630897522,
"learning_rate": 2.2711944235116806e-05,
"loss": 0.0458,
"step": 29469
},
{
"epoch": 11.0,
"eval_bleu": 0.11121120716108289,
"eval_loss": 0.07983218133449554,
"eval_rouge1": 0.4633689566450456,
"eval_rouge2": 0.2356348733383184,
"eval_rougeL": 0.4600017445894788,
"eval_runtime": 242.0184,
"eval_samples_per_second": 22.23,
"eval_steps_per_second": 2.781,
"step": 29469
},
{
"epoch": 11.0,
"step": 29469,
"total_flos": 1.23182992982016e+17,
"train_loss": 0.09744292338859069,
"train_runtime": 17256.5383,
"train_samples_per_second": 24.836,
"train_steps_per_second": 3.105
}
],
"logging_steps": 500,
"max_steps": 53580,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.23182992982016e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}