{ "best_metric": 1.9084105491638184, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/results/checkpoint-8500", "epoch": 20.0, "eval_steps": 500, "global_step": 9220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.567398119122257, "grad_norm": 1.353641152381897, "learning_rate": 5e-05, "loss": 3.359, "step": 500 }, { "epoch": 1.567398119122257, "eval_bleu": 0.11424038411303619, "eval_loss": 3.128293514251709, "eval_rouge1": 0.3297614987151056, "eval_rouge2": 0.08429294540985294, "eval_rougeL": 0.2561476738686219, "eval_runtime": 26.8133, "eval_samples_per_second": 31.589, "eval_steps_per_second": 3.953, "step": 500 }, { "epoch": 3.134796238244514, "grad_norm": 1.156111717224121, "learning_rate": 2.71689497716895e-05, "loss": 2.9208, "step": 1000 }, { "epoch": 3.134796238244514, "eval_bleu": 0.1490666503828191, "eval_loss": 2.729825496673584, "eval_rouge1": 0.40409071928626966, "eval_rouge2": 0.14297878002377568, "eval_rougeL": 0.34083403761346187, "eval_runtime": 27.234, "eval_samples_per_second": 31.101, "eval_steps_per_second": 3.892, "step": 1000 }, { "epoch": 4.702194357366771, "grad_norm": 1.1165863275527954, "learning_rate": 4.337899543378996e-06, "loss": 2.619, "step": 1500 }, { "epoch": 4.702194357366771, "eval_bleu": 0.16068905926811505, "eval_loss": 2.6229476928710938, "eval_rouge1": 0.4264320027787866, "eval_rouge2": 0.1630682859845051, "eval_rougeL": 0.367472815476786, "eval_runtime": 27.3027, "eval_samples_per_second": 31.023, "eval_steps_per_second": 3.882, "step": 1500 }, { "epoch": 4.3383947939262475, "grad_norm": 1.10550856590271, "learning_rate": 4.139908256880734e-05, "loss": 2.4047, "step": 2000 }, { "epoch": 4.3383947939262475, "eval_bleu": 0.27212534220096674, "eval_loss": 2.200192451477051, "eval_rouge1": 0.49764917064550795, "eval_rouge2": 0.25417403674525624, "eval_rougeL": 0.4505978761161964, "eval_runtime": 29.8301, "eval_samples_per_second": 31.009, "eval_steps_per_second": 3.889, "step": 2000 }, { "epoch": 5.422993492407809, "grad_norm": 1.0486189126968384, "learning_rate": 3.8532110091743125e-05, "loss": 2.19, "step": 2500 }, { "epoch": 5.422993492407809, "eval_bleu": 0.2853635265097057, "eval_loss": 2.099168539047241, "eval_rouge1": 0.5205238075842558, "eval_rouge2": 0.27883621341002174, "eval_rougeL": 0.4772785679427928, "eval_runtime": 29.5017, "eval_samples_per_second": 31.354, "eval_steps_per_second": 3.932, "step": 2500 }, { "epoch": 6.507592190889371, "grad_norm": 1.0022239685058594, "learning_rate": 3.56651376146789e-05, "loss": 2.0473, "step": 3000 }, { "epoch": 6.507592190889371, "eval_bleu": 0.29294689624288234, "eval_loss": 2.0362119674682617, "eval_rouge1": 0.5380910185587349, "eval_rouge2": 0.29647105961235576, "eval_rougeL": 0.49649873151947865, "eval_runtime": 29.6658, "eval_samples_per_second": 31.181, "eval_steps_per_second": 3.91, "step": 3000 }, { "epoch": 7.592190889370933, "grad_norm": 1.1853405237197876, "learning_rate": 3.2798165137614676e-05, "loss": 1.9397, "step": 3500 }, { "epoch": 7.592190889370933, "eval_bleu": 0.2996126116957466, "eval_loss": 1.9933106899261475, "eval_rouge1": 0.5494053286639744, "eval_rouge2": 0.31025003697020603, "eval_rougeL": 0.5101736274334897, "eval_runtime": 29.6088, "eval_samples_per_second": 31.241, "eval_steps_per_second": 3.918, "step": 3500 }, { "epoch": 8.676789587852495, "grad_norm": 1.1255462169647217, "learning_rate": 2.9931192660550462e-05, "loss": 1.857, "step": 4000 }, { "epoch": 8.676789587852495, "eval_bleu": 0.30241485912380783, "eval_loss": 1.9647237062454224, "eval_rouge1": 0.5597611557009092, "eval_rouge2": 0.3191422306947157, "eval_rougeL": 0.5202653323875917, "eval_runtime": 29.9377, "eval_samples_per_second": 30.897, "eval_steps_per_second": 3.875, "step": 4000 }, { "epoch": 9.761388286334057, "grad_norm": 1.1697229146957397, "learning_rate": 2.7064220183486238e-05, "loss": 1.784, "step": 4500 }, { "epoch": 9.761388286334057, "eval_bleu": 0.3061719577143718, "eval_loss": 1.9443068504333496, "eval_rouge1": 0.567492271856554, "eval_rouge2": 0.3269182124324805, "eval_rougeL": 0.5278573882748132, "eval_runtime": 29.751, "eval_samples_per_second": 31.091, "eval_steps_per_second": 3.899, "step": 4500 }, { "epoch": 10.845986984815617, "grad_norm": 1.070591926574707, "learning_rate": 2.419724770642202e-05, "loss": 1.7239, "step": 5000 }, { "epoch": 10.845986984815617, "eval_bleu": 0.309858394526436, "eval_loss": 1.931990385055542, "eval_rouge1": 0.5723606535196859, "eval_rouge2": 0.3338521436125379, "eval_rougeL": 0.5341216118802655, "eval_runtime": 29.6886, "eval_samples_per_second": 31.157, "eval_steps_per_second": 3.907, "step": 5000 }, { "epoch": 11.93058568329718, "grad_norm": 1.0755261182785034, "learning_rate": 2.13302752293578e-05, "loss": 1.6713, "step": 5500 }, { "epoch": 11.93058568329718, "eval_bleu": 0.3115672562854492, "eval_loss": 1.920640230178833, "eval_rouge1": 0.5765467952167939, "eval_rouge2": 0.33826641143296676, "eval_rougeL": 0.5387314433190069, "eval_runtime": 29.7016, "eval_samples_per_second": 31.143, "eval_steps_per_second": 3.906, "step": 5500 }, { "epoch": 13.015184381778742, "grad_norm": 1.0826488733291626, "learning_rate": 1.8463302752293578e-05, "loss": 1.6263, "step": 6000 }, { "epoch": 13.015184381778742, "eval_bleu": 0.31268695772405475, "eval_loss": 1.916778564453125, "eval_rouge1": 0.5780842791223908, "eval_rouge2": 0.34164409810850394, "eval_rougeL": 0.5415509673961407, "eval_runtime": 29.789, "eval_samples_per_second": 31.052, "eval_steps_per_second": 3.894, "step": 6000 }, { "epoch": 14.099783080260304, "grad_norm": 1.0868735313415527, "learning_rate": 1.559633027522936e-05, "loss": 1.5869, "step": 6500 }, { "epoch": 14.099783080260304, "eval_bleu": 0.31365743559233084, "eval_loss": 1.9147837162017822, "eval_rouge1": 0.5829184758698387, "eval_rouge2": 0.3448101826360943, "eval_rougeL": 0.5450794961513086, "eval_runtime": 29.7645, "eval_samples_per_second": 31.077, "eval_steps_per_second": 3.897, "step": 6500 }, { "epoch": 15.184381778741866, "grad_norm": 1.0827687978744507, "learning_rate": 1.2729357798165138e-05, "loss": 1.5544, "step": 7000 }, { "epoch": 15.184381778741866, "eval_bleu": 0.315769500599606, "eval_loss": 1.9121257066726685, "eval_rouge1": 0.5844681250407762, "eval_rouge2": 0.34764910748110744, "eval_rougeL": 0.5476190296456669, "eval_runtime": 29.7415, "eval_samples_per_second": 31.101, "eval_steps_per_second": 3.9, "step": 7000 }, { "epoch": 16.268980477223426, "grad_norm": 1.1430450677871704, "learning_rate": 9.862385321100918e-06, "loss": 1.5307, "step": 7500 }, { "epoch": 16.268980477223426, "eval_bleu": 0.31648880861794926, "eval_loss": 1.9105726480484009, "eval_rouge1": 0.5852713451659596, "eval_rouge2": 0.34877835378762495, "eval_rougeL": 0.5486197186684263, "eval_runtime": 29.7345, "eval_samples_per_second": 31.109, "eval_steps_per_second": 3.901, "step": 7500 }, { "epoch": 17.35357917570499, "grad_norm": 1.0865087509155273, "learning_rate": 6.995412844036697e-06, "loss": 1.5087, "step": 8000 }, { "epoch": 17.35357917570499, "eval_bleu": 0.31692571547155524, "eval_loss": 1.9093118906021118, "eval_rouge1": 0.5860996975913157, "eval_rouge2": 0.3503907384934047, "eval_rougeL": 0.5500340150392318, "eval_runtime": 29.7497, "eval_samples_per_second": 31.093, "eval_steps_per_second": 3.899, "step": 8000 }, { "epoch": 18.43817787418655, "grad_norm": 1.1252211332321167, "learning_rate": 4.128440366972477e-06, "loss": 1.4937, "step": 8500 }, { "epoch": 18.43817787418655, "eval_bleu": 0.31723468269919336, "eval_loss": 1.9084105491638184, "eval_rouge1": 0.5868586694605076, "eval_rouge2": 0.350546625127078, "eval_rougeL": 0.5503666110741787, "eval_runtime": 29.7351, "eval_samples_per_second": 31.108, "eval_steps_per_second": 3.901, "step": 8500 }, { "epoch": 19.522776572668114, "grad_norm": 1.150936245918274, "learning_rate": 1.261467889908257e-06, "loss": 1.4824, "step": 9000 }, { "epoch": 19.522776572668114, "eval_bleu": 0.3177718226409019, "eval_loss": 1.9086270332336426, "eval_rouge1": 0.5875550437490973, "eval_rouge2": 0.3512666976647323, "eval_rougeL": 0.5509556223633276, "eval_runtime": 30.1604, "eval_samples_per_second": 30.669, "eval_steps_per_second": 3.846, "step": 9000 }, { "epoch": 20.0, "step": 9220, "total_flos": 2.8862709792768e+16, "train_loss": 1.4422371688478681, "train_runtime": 3284.8472, "train_samples_per_second": 22.412, "train_steps_per_second": 2.807 } ], "logging_steps": 500, "max_steps": 9220, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8862709792768e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }