{ "best_metric": 0.07523266971111298, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_dj_aragpt2-base/checkpoint-16074", "epoch": 11.0, "eval_steps": 500, "global_step": 29469, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.1490328013896942, "learning_rate": 4.7947437829691034e-05, "loss": 0.435, "step": 2679 }, { "epoch": 1.0, "eval_bleu": 0.022539363481404024, "eval_loss": 0.08990845084190369, "eval_rouge1": 0.26967504263175696, "eval_rouge2": 0.06811111838722575, "eval_rougeL": 0.2648760311441921, "eval_runtime": 118.8639, "eval_samples_per_second": 45.262, "eval_steps_per_second": 5.662, "step": 2679 }, { "epoch": 2.0, "grad_norm": 0.1779935508966446, "learning_rate": 4.542388847023361e-05, "loss": 0.0908, "step": 5358 }, { "epoch": 2.0, "eval_bleu": 0.0494923940085718, "eval_loss": 0.08216600120067596, "eval_rouge1": 0.34383088385557475, "eval_rouge2": 0.12325553058649232, "eval_rougeL": 0.33940964680072816, "eval_runtime": 181.022, "eval_samples_per_second": 29.72, "eval_steps_per_second": 3.718, "step": 5358 }, { "epoch": 3.0, "grad_norm": 0.13422226905822754, "learning_rate": 4.290033911077619e-05, "loss": 0.0808, "step": 8037 }, { "epoch": 3.0, "eval_bleu": 0.06700443598211842, "eval_loss": 0.0785822868347168, "eval_rouge1": 0.38347464472946946, "eval_rouge2": 0.15815766995910013, "eval_rougeL": 0.37897410206784377, "eval_runtime": 118.7728, "eval_samples_per_second": 45.297, "eval_steps_per_second": 5.666, "step": 8037 }, { "epoch": 4.0, "grad_norm": 0.17974382638931274, "learning_rate": 4.0376789751318766e-05, "loss": 0.0738, "step": 10716 }, { "epoch": 4.0, "eval_bleu": 0.0782017656533744, "eval_loss": 0.07652640342712402, "eval_rouge1": 0.4066218577182069, "eval_rouge2": 0.1797711825940213, "eval_rougeL": 0.4024533186612807, "eval_runtime": 302.5488, "eval_samples_per_second": 17.782, "eval_steps_per_second": 2.224, "step": 10716 }, { "epoch": 5.0, "grad_norm": 0.1600717157125473, "learning_rate": 3.785324039186134e-05, "loss": 0.0681, "step": 13395 }, { "epoch": 5.0, "eval_bleu": 0.088003678878398, "eval_loss": 0.07556667178869247, "eval_rouge1": 0.42424169752606233, "eval_rouge2": 0.19635936454770994, "eval_rougeL": 0.42040996405014475, "eval_runtime": 119.8742, "eval_samples_per_second": 44.88, "eval_steps_per_second": 5.614, "step": 13395 }, { "epoch": 6.0, "grad_norm": 0.17717573046684265, "learning_rate": 3.532969103240392e-05, "loss": 0.0632, "step": 16074 }, { "epoch": 6.0, "eval_bleu": 0.09284298963391195, "eval_loss": 0.07523266971111298, "eval_rouge1": 0.434313103224844, "eval_rouge2": 0.2043067195259623, "eval_rougeL": 0.430449964990971, "eval_runtime": 119.9224, "eval_samples_per_second": 44.862, "eval_steps_per_second": 5.612, "step": 16074 }, { "epoch": 7.0, "grad_norm": 0.20939625799655914, "learning_rate": 3.28061416729465e-05, "loss": 0.059, "step": 18753 }, { "epoch": 7.0, "eval_bleu": 0.09955364884197439, "eval_loss": 0.07554977387189865, "eval_rouge1": 0.4438832318000667, "eval_rouge2": 0.21524219506914244, "eval_rougeL": 0.44011453518152965, "eval_runtime": 119.8903, "eval_samples_per_second": 44.874, "eval_steps_per_second": 5.613, "step": 18753 }, { "epoch": 8.0, "grad_norm": 0.18116699159145355, "learning_rate": 3.028259231348907e-05, "loss": 0.0552, "step": 21432 }, { "epoch": 8.0, "eval_bleu": 0.10150066662504155, "eval_loss": 0.07605580985546112, "eval_rouge1": 0.45000685735983936, "eval_rouge2": 0.2217308384368585, "eval_rougeL": 0.44629486754481273, "eval_runtime": 180.8168, "eval_samples_per_second": 29.754, "eval_steps_per_second": 3.722, "step": 21432 }, { "epoch": 9.0, "grad_norm": 0.20604003965854645, "learning_rate": 2.775904295403165e-05, "loss": 0.0517, "step": 24111 }, { "epoch": 9.0, "eval_bleu": 0.10504101584036675, "eval_loss": 0.07660207897424698, "eval_rouge1": 0.4527224851378999, "eval_rouge2": 0.2249827236829457, "eval_rougeL": 0.44886876248725205, "eval_runtime": 241.7568, "eval_samples_per_second": 22.254, "eval_steps_per_second": 2.784, "step": 24111 }, { "epoch": 10.0, "grad_norm": 0.17469635605812073, "learning_rate": 2.523549359457423e-05, "loss": 0.0486, "step": 26790 }, { "epoch": 10.0, "eval_bleu": 0.10926649986209491, "eval_loss": 0.07839509844779968, "eval_rouge1": 0.4612349211854463, "eval_rouge2": 0.23378642120255436, "eval_rougeL": 0.457790117200362, "eval_runtime": 180.2876, "eval_samples_per_second": 29.841, "eval_steps_per_second": 3.733, "step": 26790 }, { "epoch": 11.0, "grad_norm": 0.19318881630897522, "learning_rate": 2.2711944235116806e-05, "loss": 0.0458, "step": 29469 }, { "epoch": 11.0, "eval_bleu": 0.11121120716108289, "eval_loss": 0.07983218133449554, "eval_rouge1": 0.4633689566450456, "eval_rouge2": 0.2356348733383184, "eval_rougeL": 0.4600017445894788, "eval_runtime": 242.0184, "eval_samples_per_second": 22.23, "eval_steps_per_second": 2.781, "step": 29469 }, { "epoch": 11.0, "step": 29469, "total_flos": 1.23182992982016e+17, "train_loss": 0.09744292338859069, "train_runtime": 17256.5383, "train_samples_per_second": 24.836, "train_steps_per_second": 3.105 } ], "logging_steps": 500, "max_steps": 53580, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.23182992982016e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }