{ "best_metric": 0.05337703973054886, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_yem_aragpt2-base/checkpoint-1377", "epoch": 14.0, "eval_steps": 500, "global_step": 2142, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.3274374306201935, "learning_rate": 1.53e-05, "loss": 5.8224, "step": 153 }, { "epoch": 1.0, "eval_bleu": 0.00392224521343418, "eval_loss": 0.1128806546330452, "eval_rouge1": 0.07124894337258883, "eval_rouge2": 0.0028587092731829577, "eval_rougeL": 0.06948923152022689, "eval_runtime": 7.1373, "eval_samples_per_second": 42.593, "eval_steps_per_second": 5.324, "step": 153 }, { "epoch": 2.0, "grad_norm": 0.3334260880947113, "learning_rate": 3.06e-05, "loss": 0.1108, "step": 306 }, { "epoch": 2.0, "eval_bleu": 0.0, "eval_loss": 0.06912114471197128, "eval_rouge1": 0.09507863230508208, "eval_rouge2": 0.007788772376873063, "eval_rougeL": 0.09317127790126112, "eval_runtime": 6.9813, "eval_samples_per_second": 43.545, "eval_steps_per_second": 5.443, "step": 306 }, { "epoch": 3.0, "grad_norm": 0.2704195976257324, "learning_rate": 4.5900000000000004e-05, "loss": 0.0775, "step": 459 }, { "epoch": 3.0, "eval_bleu": 0.006737157818299724, "eval_loss": 0.06276746839284897, "eval_rouge1": 0.12907377035145118, "eval_rouge2": 0.015683124441348124, "eval_rougeL": 0.1286203798342947, "eval_runtime": 6.9851, "eval_samples_per_second": 43.521, "eval_steps_per_second": 5.44, "step": 459 }, { "epoch": 4.0, "grad_norm": 0.32800641655921936, "learning_rate": 4.7812500000000003e-05, "loss": 0.0678, "step": 612 }, { "epoch": 4.0, "eval_bleu": 0.008597461521382226, "eval_loss": 0.059174273163080215, "eval_rouge1": 0.1524066357986381, "eval_rouge2": 0.027263211934264564, "eval_rougeL": 0.14917670820737453, "eval_runtime": 6.9134, "eval_samples_per_second": 43.973, "eval_steps_per_second": 5.497, "step": 612 }, { "epoch": 5.0, "grad_norm": 0.18473878502845764, "learning_rate": 4.482421875e-05, "loss": 0.0603, "step": 765 }, { "epoch": 5.0, "eval_bleu": 0.016158903735540513, "eval_loss": 0.05656920000910759, "eval_rouge1": 0.19186830417118805, "eval_rouge2": 0.04134465720083692, "eval_rougeL": 0.18829939413002822, "eval_runtime": 6.9842, "eval_samples_per_second": 43.527, "eval_steps_per_second": 5.441, "step": 765 }, { "epoch": 6.0, "grad_norm": 0.2642894685268402, "learning_rate": 4.18359375e-05, "loss": 0.0547, "step": 918 }, { "epoch": 6.0, "eval_bleu": 0.018741315914778903, "eval_loss": 0.05464606359601021, "eval_rouge1": 0.2238907288379966, "eval_rouge2": 0.05989126658873175, "eval_rougeL": 0.22184683032173025, "eval_runtime": 7.0034, "eval_samples_per_second": 43.408, "eval_steps_per_second": 5.426, "step": 918 }, { "epoch": 7.0, "grad_norm": 0.24372832477092743, "learning_rate": 3.884765625e-05, "loss": 0.0498, "step": 1071 }, { "epoch": 7.0, "eval_bleu": 0.029468363597052377, "eval_loss": 0.054019927978515625, "eval_rouge1": 0.26839447635173574, "eval_rouge2": 0.07325536872796896, "eval_rougeL": 0.2638108570075386, "eval_runtime": 6.9888, "eval_samples_per_second": 43.498, "eval_steps_per_second": 5.437, "step": 1071 }, { "epoch": 8.0, "grad_norm": 0.2509661316871643, "learning_rate": 3.5859375e-05, "loss": 0.0456, "step": 1224 }, { "epoch": 8.0, "eval_bleu": 0.029195772701040013, "eval_loss": 0.053611643612384796, "eval_rouge1": 0.2883971192764231, "eval_rouge2": 0.08184845936724905, "eval_rougeL": 0.28406572737215363, "eval_runtime": 6.9871, "eval_samples_per_second": 43.509, "eval_steps_per_second": 5.439, "step": 1224 }, { "epoch": 9.0, "grad_norm": 0.23149649798870087, "learning_rate": 3.287109375e-05, "loss": 0.0419, "step": 1377 }, { "epoch": 9.0, "eval_bleu": 0.04280893852047394, "eval_loss": 0.05337703973054886, "eval_rouge1": 0.31388693258006284, "eval_rouge2": 0.11036995985896458, "eval_rougeL": 0.3096668301159336, "eval_runtime": 6.9883, "eval_samples_per_second": 43.501, "eval_steps_per_second": 5.438, "step": 1377 }, { "epoch": 10.0, "grad_norm": 0.2521679401397705, "learning_rate": 2.9882812500000002e-05, "loss": 0.0385, "step": 1530 }, { "epoch": 10.0, "eval_bleu": 0.046113511822263795, "eval_loss": 0.05344715714454651, "eval_rouge1": 0.32550355167793193, "eval_rouge2": 0.11175887195986847, "eval_rougeL": 0.3185482802099305, "eval_runtime": 6.9943, "eval_samples_per_second": 43.464, "eval_steps_per_second": 5.433, "step": 1530 }, { "epoch": 11.0, "grad_norm": 0.23900777101516724, "learning_rate": 2.689453125e-05, "loss": 0.0354, "step": 1683 }, { "epoch": 11.0, "eval_bleu": 0.047259792068284935, "eval_loss": 0.05403715744614601, "eval_rouge1": 0.3357586244098568, "eval_rouge2": 0.12191279657273882, "eval_rougeL": 0.3287623290777512, "eval_runtime": 7.0002, "eval_samples_per_second": 43.427, "eval_steps_per_second": 5.428, "step": 1683 }, { "epoch": 12.0, "grad_norm": 0.24781078100204468, "learning_rate": 2.3906250000000002e-05, "loss": 0.0331, "step": 1836 }, { "epoch": 12.0, "eval_bleu": 0.0475663160301358, "eval_loss": 0.05399588495492935, "eval_rouge1": 0.3483203778520053, "eval_rouge2": 0.13122980561384404, "eval_rougeL": 0.3442445629111672, "eval_runtime": 6.9958, "eval_samples_per_second": 43.455, "eval_steps_per_second": 5.432, "step": 1836 }, { "epoch": 13.0, "grad_norm": 0.3433144986629486, "learning_rate": 2.091796875e-05, "loss": 0.0308, "step": 1989 }, { "epoch": 13.0, "eval_bleu": 0.05895730441080768, "eval_loss": 0.05515788868069649, "eval_rouge1": 0.3599156447280303, "eval_rouge2": 0.14385576959590118, "eval_rougeL": 0.3539109033921022, "eval_runtime": 6.9936, "eval_samples_per_second": 43.469, "eval_steps_per_second": 5.434, "step": 1989 }, { "epoch": 14.0, "grad_norm": 0.3219660520553589, "learning_rate": 1.79296875e-05, "loss": 0.0291, "step": 2142 }, { "epoch": 14.0, "eval_bleu": 0.06246557033315383, "eval_loss": 0.05557234585285187, "eval_rouge1": 0.37373672906337496, "eval_rouge2": 0.14894175508943397, "eval_rougeL": 0.3669928154707829, "eval_runtime": 6.9886, "eval_samples_per_second": 43.499, "eval_steps_per_second": 5.437, "step": 2142 }, { "epoch": 14.0, "step": 2142, "total_flos": 8918419636224000.0, "train_loss": 0.46412634738241815, "train_runtime": 1276.9352, "train_samples_per_second": 19.093, "train_steps_per_second": 2.396 } ], "logging_steps": 500, "max_steps": 3060, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8918419636224000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }