{ "best_metric": 0.4183219075202942, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_gulf/checkpoint-3352", "epoch": 9.0, "eval_steps": 500, "global_step": 7542, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.1270569562911987, "learning_rate": 4.8960639606396064e-05, "loss": 1.9575, "step": 838 }, { "epoch": 1.0, "eval_bleu": 0.17899151773967856, "eval_loss": 0.4805418848991394, "eval_rouge1": 0.5300700267573193, "eval_rouge2": 0.25846419107204655, "eval_rougeL": 0.5287371290578213, "eval_runtime": 26.4765, "eval_samples_per_second": 63.264, "eval_steps_per_second": 7.932, "step": 838 }, { "epoch": 2.0, "grad_norm": 0.8918192982673645, "learning_rate": 4.6383763837638376e-05, "loss": 0.422, "step": 1676 }, { "epoch": 2.0, "eval_bleu": 0.2111987576275603, "eval_loss": 0.4326799809932709, "eval_rouge1": 0.5789346222407353, "eval_rouge2": 0.3115433580134526, "eval_rougeL": 0.5777838651600141, "eval_runtime": 36.5489, "eval_samples_per_second": 45.829, "eval_steps_per_second": 5.746, "step": 1676 }, { "epoch": 3.0, "grad_norm": 1.1533405780792236, "learning_rate": 4.380688806888069e-05, "loss": 0.331, "step": 2514 }, { "epoch": 3.0, "eval_bleu": 0.22887638181669745, "eval_loss": 0.41845184564590454, "eval_rouge1": 0.6066983915980586, "eval_rouge2": 0.34626660980048474, "eval_rougeL": 0.6055721645591957, "eval_runtime": 25.3383, "eval_samples_per_second": 66.106, "eval_steps_per_second": 8.288, "step": 2514 }, { "epoch": 4.0, "grad_norm": 1.3615330457687378, "learning_rate": 4.123001230012301e-05, "loss": 0.2638, "step": 3352 }, { "epoch": 4.0, "eval_bleu": 0.23736106874390867, "eval_loss": 0.4183219075202942, "eval_rouge1": 0.6213217050496613, "eval_rouge2": 0.3629928034660268, "eval_rougeL": 0.6194945230812778, "eval_runtime": 16.1799, "eval_samples_per_second": 103.524, "eval_steps_per_second": 12.979, "step": 3352 }, { "epoch": 5.0, "grad_norm": 1.29606294631958, "learning_rate": 3.865313653136531e-05, "loss": 0.2131, "step": 4190 }, { "epoch": 5.0, "eval_bleu": 0.24225546513762886, "eval_loss": 0.42484816908836365, "eval_rouge1": 0.6280394265150125, "eval_rouge2": 0.3718667234148674, "eval_rougeL": 0.6264526155291095, "eval_runtime": 25.4283, "eval_samples_per_second": 65.872, "eval_steps_per_second": 8.259, "step": 4190 }, { "epoch": 6.0, "grad_norm": 1.9162726402282715, "learning_rate": 3.6076260762607624e-05, "loss": 0.1756, "step": 5028 }, { "epoch": 6.0, "eval_bleu": 0.25110200021461276, "eval_loss": 0.4322951138019562, "eval_rouge1": 0.6329099900871186, "eval_rouge2": 0.3818443278310246, "eval_rougeL": 0.6311516008372622, "eval_runtime": 13.3457, "eval_samples_per_second": 125.508, "eval_steps_per_second": 15.735, "step": 5028 }, { "epoch": 7.0, "grad_norm": 1.2213388681411743, "learning_rate": 3.349938499384994e-05, "loss": 0.148, "step": 5866 }, { "epoch": 7.0, "eval_bleu": 0.24584223218153298, "eval_loss": 0.4369480013847351, "eval_rouge1": 0.6346640667719812, "eval_rouge2": 0.3890963668887653, "eval_rougeL": 0.6326978702709232, "eval_runtime": 6.3313, "eval_samples_per_second": 264.559, "eval_steps_per_second": 33.169, "step": 5866 }, { "epoch": 8.0, "grad_norm": 1.774276852607727, "learning_rate": 3.092250922509225e-05, "loss": 0.1285, "step": 6704 }, { "epoch": 8.0, "eval_bleu": 0.2499016538097514, "eval_loss": 0.4447513520717621, "eval_rouge1": 0.6380175687280993, "eval_rouge2": 0.38845767776215345, "eval_rougeL": 0.6368182514454324, "eval_runtime": 5.4681, "eval_samples_per_second": 306.322, "eval_steps_per_second": 38.405, "step": 6704 }, { "epoch": 9.0, "grad_norm": 0.779574990272522, "learning_rate": 2.8345633456334564e-05, "loss": 0.1152, "step": 7542 }, { "epoch": 9.0, "eval_bleu": 0.2538776211394201, "eval_loss": 0.45251065492630005, "eval_rouge1": 0.6407467681145795, "eval_rouge2": 0.3976657405838769, "eval_rougeL": 0.6393461853572592, "eval_runtime": 6.3546, "eval_samples_per_second": 263.59, "eval_steps_per_second": 33.047, "step": 7542 }, { "epoch": 9.0, "step": 7542, "total_flos": 3938977382400000.0, "train_loss": 0.41718409883979146, "train_runtime": 2958.6328, "train_samples_per_second": 45.291, "train_steps_per_second": 5.665 } ], "logging_steps": 500, "max_steps": 16760, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3938977382400000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }