{ "best_metric": 0.4183219075202942, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_gulf/checkpoint-3352", "epoch": 14.0, "eval_steps": 500, "global_step": 11732, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.1270569562911987, "learning_rate": 4.8960639606396064e-05, "loss": 1.9575, "step": 838 }, { "epoch": 1.0, "eval_bleu": 0.17899151773967856, "eval_loss": 0.4805418848991394, "eval_rouge1": 0.5300700267573193, "eval_rouge2": 0.25846419107204655, "eval_rougeL": 0.5287371290578213, "eval_runtime": 26.4765, "eval_samples_per_second": 63.264, "eval_steps_per_second": 7.932, "step": 838 }, { "epoch": 2.0, "grad_norm": 0.8918192982673645, "learning_rate": 4.6383763837638376e-05, "loss": 0.422, "step": 1676 }, { "epoch": 2.0, "eval_bleu": 0.2111987576275603, "eval_loss": 0.4326799809932709, "eval_rouge1": 0.5789346222407353, "eval_rouge2": 0.3115433580134526, "eval_rougeL": 0.5777838651600141, "eval_runtime": 36.5489, "eval_samples_per_second": 45.829, "eval_steps_per_second": 5.746, "step": 1676 }, { "epoch": 3.0, "grad_norm": 1.1533405780792236, "learning_rate": 4.380688806888069e-05, "loss": 0.331, "step": 2514 }, { "epoch": 3.0, "eval_bleu": 0.22887638181669745, "eval_loss": 0.41845184564590454, "eval_rouge1": 0.6066983915980586, "eval_rouge2": 0.34626660980048474, "eval_rougeL": 0.6055721645591957, "eval_runtime": 25.3383, "eval_samples_per_second": 66.106, "eval_steps_per_second": 8.288, "step": 2514 }, { "epoch": 4.0, "grad_norm": 1.3615330457687378, "learning_rate": 4.123001230012301e-05, "loss": 0.2638, "step": 3352 }, { "epoch": 4.0, "eval_bleu": 0.23736106874390867, "eval_loss": 0.4183219075202942, "eval_rouge1": 0.6213217050496613, "eval_rouge2": 0.3629928034660268, "eval_rougeL": 0.6194945230812778, "eval_runtime": 16.1799, "eval_samples_per_second": 103.524, "eval_steps_per_second": 12.979, "step": 3352 }, { "epoch": 5.0, "grad_norm": 1.29606294631958, "learning_rate": 3.865313653136531e-05, "loss": 0.2131, "step": 4190 }, { "epoch": 5.0, "eval_bleu": 0.24225546513762886, "eval_loss": 0.42484816908836365, "eval_rouge1": 0.6280394265150125, "eval_rouge2": 0.3718667234148674, "eval_rougeL": 0.6264526155291095, "eval_runtime": 25.4283, "eval_samples_per_second": 65.872, "eval_steps_per_second": 8.259, "step": 4190 }, { "epoch": 6.0, "grad_norm": 1.9162726402282715, "learning_rate": 3.6076260762607624e-05, "loss": 0.1756, "step": 5028 }, { "epoch": 6.0, "eval_bleu": 0.25110200021461276, "eval_loss": 0.4322951138019562, "eval_rouge1": 0.6329099900871186, "eval_rouge2": 0.3818443278310246, "eval_rougeL": 0.6311516008372622, "eval_runtime": 13.3457, "eval_samples_per_second": 125.508, "eval_steps_per_second": 15.735, "step": 5028 }, { "epoch": 7.0, "grad_norm": 1.2213388681411743, "learning_rate": 3.349938499384994e-05, "loss": 0.148, "step": 5866 }, { "epoch": 7.0, "eval_bleu": 0.24584223218153298, "eval_loss": 0.4369480013847351, "eval_rouge1": 0.6346640667719812, "eval_rouge2": 0.3890963668887653, "eval_rougeL": 0.6326978702709232, "eval_runtime": 6.3313, "eval_samples_per_second": 264.559, "eval_steps_per_second": 33.169, "step": 5866 }, { "epoch": 8.0, "grad_norm": 1.774276852607727, "learning_rate": 3.092250922509225e-05, "loss": 0.1285, "step": 6704 }, { "epoch": 8.0, "eval_bleu": 0.2499016538097514, "eval_loss": 0.4447513520717621, "eval_rouge1": 0.6380175687280993, "eval_rouge2": 0.38845767776215345, "eval_rougeL": 0.6368182514454324, "eval_runtime": 5.4681, "eval_samples_per_second": 306.322, "eval_steps_per_second": 38.405, "step": 6704 }, { "epoch": 9.0, "grad_norm": 0.779574990272522, "learning_rate": 2.8345633456334564e-05, "loss": 0.1152, "step": 7542 }, { "epoch": 9.0, "eval_bleu": 0.2538776211394201, "eval_loss": 0.45251065492630005, "eval_rouge1": 0.6407467681145795, "eval_rouge2": 0.3976657405838769, "eval_rougeL": 0.6393461853572592, "eval_runtime": 6.3546, "eval_samples_per_second": 263.59, "eval_steps_per_second": 33.047, "step": 7542 }, { "epoch": 10.0, "grad_norm": 1.159719705581665, "learning_rate": 2.5768757687576876e-05, "loss": 0.105, "step": 8380 }, { "epoch": 10.0, "eval_bleu": 0.2555335148067654, "eval_loss": 0.45896273851394653, "eval_rouge1": 0.6440840106866277, "eval_rouge2": 0.3998364161836553, "eval_rougeL": 0.6425824451349154, "eval_runtime": 5.4849, "eval_samples_per_second": 305.385, "eval_steps_per_second": 38.287, "step": 8380 }, { "epoch": 11.0, "grad_norm": 0.923600435256958, "learning_rate": 2.3191881918819188e-05, "loss": 0.0982, "step": 9218 }, { "epoch": 11.0, "eval_bleu": 0.25802091175426106, "eval_loss": 0.46457362174987793, "eval_rouge1": 0.6455262594531862, "eval_rouge2": 0.40187791063551404, "eval_rougeL": 0.6445205006389869, "eval_runtime": 6.4432, "eval_samples_per_second": 259.965, "eval_steps_per_second": 32.593, "step": 9218 }, { "epoch": 12.0, "grad_norm": 0.9120431542396545, "learning_rate": 2.0615006150061504e-05, "loss": 0.0936, "step": 10056 }, { "epoch": 12.0, "eval_bleu": 0.257202175451904, "eval_loss": 0.470233678817749, "eval_rouge1": 0.6456936789584837, "eval_rouge2": 0.4045772651416589, "eval_rougeL": 0.6445820832270409, "eval_runtime": 13.7399, "eval_samples_per_second": 121.907, "eval_steps_per_second": 15.284, "step": 10056 }, { "epoch": 13.0, "grad_norm": 0.6435267329216003, "learning_rate": 1.8038130381303812e-05, "loss": 0.0899, "step": 10894 }, { "epoch": 13.0, "eval_bleu": 0.25765634141938715, "eval_loss": 0.47367072105407715, "eval_rouge1": 0.6488279078084271, "eval_rouge2": 0.40534740561340493, "eval_rougeL": 0.6478436883847489, "eval_runtime": 17.2761, "eval_samples_per_second": 96.955, "eval_steps_per_second": 12.156, "step": 10894 }, { "epoch": 14.0, "grad_norm": 0.7371336221694946, "learning_rate": 1.5461254612546124e-05, "loss": 0.0871, "step": 11732 }, { "epoch": 14.0, "eval_bleu": 0.2606034683461693, "eval_loss": 0.4779162108898163, "eval_rouge1": 0.6491650390141908, "eval_rouge2": 0.40618585329089035, "eval_rougeL": 0.6482221321240169, "eval_runtime": 5.371, "eval_samples_per_second": 311.859, "eval_steps_per_second": 39.099, "step": 11732 }, { "epoch": 14.0, "step": 11732, "total_flos": 6127298150400000.0, "train_loss": 0.03383859399638808, "train_runtime": 1080.9024, "train_samples_per_second": 123.97, "train_steps_per_second": 15.506 } ], "logging_steps": 500, "max_steps": 16760, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6127298150400000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }