|
{ |
|
"best_metric": 0.05337703973054886, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_yem_aragpt2-base/checkpoint-1377", |
|
"epoch": 14.0, |
|
"eval_steps": 500, |
|
"global_step": 2142, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3274374306201935, |
|
"learning_rate": 1.53e-05, |
|
"loss": 5.8224, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.00392224521343418, |
|
"eval_loss": 0.1128806546330452, |
|
"eval_rouge1": 0.07124894337258883, |
|
"eval_rouge2": 0.0028587092731829577, |
|
"eval_rougeL": 0.06948923152022689, |
|
"eval_runtime": 7.1373, |
|
"eval_samples_per_second": 42.593, |
|
"eval_steps_per_second": 5.324, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.3334260880947113, |
|
"learning_rate": 3.06e-05, |
|
"loss": 0.1108, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.0, |
|
"eval_loss": 0.06912114471197128, |
|
"eval_rouge1": 0.09507863230508208, |
|
"eval_rouge2": 0.007788772376873063, |
|
"eval_rougeL": 0.09317127790126112, |
|
"eval_runtime": 6.9813, |
|
"eval_samples_per_second": 43.545, |
|
"eval_steps_per_second": 5.443, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.2704195976257324, |
|
"learning_rate": 4.5900000000000004e-05, |
|
"loss": 0.0775, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.006737157818299724, |
|
"eval_loss": 0.06276746839284897, |
|
"eval_rouge1": 0.12907377035145118, |
|
"eval_rouge2": 0.015683124441348124, |
|
"eval_rougeL": 0.1286203798342947, |
|
"eval_runtime": 6.9851, |
|
"eval_samples_per_second": 43.521, |
|
"eval_steps_per_second": 5.44, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.32800641655921936, |
|
"learning_rate": 4.7812500000000003e-05, |
|
"loss": 0.0678, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.008597461521382226, |
|
"eval_loss": 0.059174273163080215, |
|
"eval_rouge1": 0.1524066357986381, |
|
"eval_rouge2": 0.027263211934264564, |
|
"eval_rougeL": 0.14917670820737453, |
|
"eval_runtime": 6.9134, |
|
"eval_samples_per_second": 43.973, |
|
"eval_steps_per_second": 5.497, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.18473878502845764, |
|
"learning_rate": 4.482421875e-05, |
|
"loss": 0.0603, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.016158903735540513, |
|
"eval_loss": 0.05656920000910759, |
|
"eval_rouge1": 0.19186830417118805, |
|
"eval_rouge2": 0.04134465720083692, |
|
"eval_rougeL": 0.18829939413002822, |
|
"eval_runtime": 6.9842, |
|
"eval_samples_per_second": 43.527, |
|
"eval_steps_per_second": 5.441, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.2642894685268402, |
|
"learning_rate": 4.18359375e-05, |
|
"loss": 0.0547, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.018741315914778903, |
|
"eval_loss": 0.05464606359601021, |
|
"eval_rouge1": 0.2238907288379966, |
|
"eval_rouge2": 0.05989126658873175, |
|
"eval_rougeL": 0.22184683032173025, |
|
"eval_runtime": 7.0034, |
|
"eval_samples_per_second": 43.408, |
|
"eval_steps_per_second": 5.426, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.24372832477092743, |
|
"learning_rate": 3.884765625e-05, |
|
"loss": 0.0498, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.029468363597052377, |
|
"eval_loss": 0.054019927978515625, |
|
"eval_rouge1": 0.26839447635173574, |
|
"eval_rouge2": 0.07325536872796896, |
|
"eval_rougeL": 0.2638108570075386, |
|
"eval_runtime": 6.9888, |
|
"eval_samples_per_second": 43.498, |
|
"eval_steps_per_second": 5.437, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.2509661316871643, |
|
"learning_rate": 3.5859375e-05, |
|
"loss": 0.0456, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.029195772701040013, |
|
"eval_loss": 0.053611643612384796, |
|
"eval_rouge1": 0.2883971192764231, |
|
"eval_rouge2": 0.08184845936724905, |
|
"eval_rougeL": 0.28406572737215363, |
|
"eval_runtime": 6.9871, |
|
"eval_samples_per_second": 43.509, |
|
"eval_steps_per_second": 5.439, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.23149649798870087, |
|
"learning_rate": 3.287109375e-05, |
|
"loss": 0.0419, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.04280893852047394, |
|
"eval_loss": 0.05337703973054886, |
|
"eval_rouge1": 0.31388693258006284, |
|
"eval_rouge2": 0.11036995985896458, |
|
"eval_rougeL": 0.3096668301159336, |
|
"eval_runtime": 6.9883, |
|
"eval_samples_per_second": 43.501, |
|
"eval_steps_per_second": 5.438, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.2521679401397705, |
|
"learning_rate": 2.9882812500000002e-05, |
|
"loss": 0.0385, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.046113511822263795, |
|
"eval_loss": 0.05344715714454651, |
|
"eval_rouge1": 0.32550355167793193, |
|
"eval_rouge2": 0.11175887195986847, |
|
"eval_rougeL": 0.3185482802099305, |
|
"eval_runtime": 6.9943, |
|
"eval_samples_per_second": 43.464, |
|
"eval_steps_per_second": 5.433, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.23900777101516724, |
|
"learning_rate": 2.689453125e-05, |
|
"loss": 0.0354, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 0.047259792068284935, |
|
"eval_loss": 0.05403715744614601, |
|
"eval_rouge1": 0.3357586244098568, |
|
"eval_rouge2": 0.12191279657273882, |
|
"eval_rougeL": 0.3287623290777512, |
|
"eval_runtime": 7.0002, |
|
"eval_samples_per_second": 43.427, |
|
"eval_steps_per_second": 5.428, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.24781078100204468, |
|
"learning_rate": 2.3906250000000002e-05, |
|
"loss": 0.0331, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 0.0475663160301358, |
|
"eval_loss": 0.05399588495492935, |
|
"eval_rouge1": 0.3483203778520053, |
|
"eval_rouge2": 0.13122980561384404, |
|
"eval_rougeL": 0.3442445629111672, |
|
"eval_runtime": 6.9958, |
|
"eval_samples_per_second": 43.455, |
|
"eval_steps_per_second": 5.432, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.3433144986629486, |
|
"learning_rate": 2.091796875e-05, |
|
"loss": 0.0308, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 0.05895730441080768, |
|
"eval_loss": 0.05515788868069649, |
|
"eval_rouge1": 0.3599156447280303, |
|
"eval_rouge2": 0.14385576959590118, |
|
"eval_rougeL": 0.3539109033921022, |
|
"eval_runtime": 6.9936, |
|
"eval_samples_per_second": 43.469, |
|
"eval_steps_per_second": 5.434, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.3219660520553589, |
|
"learning_rate": 1.79296875e-05, |
|
"loss": 0.0291, |
|
"step": 2142 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 0.06246557033315383, |
|
"eval_loss": 0.05557234585285187, |
|
"eval_rouge1": 0.37373672906337496, |
|
"eval_rouge2": 0.14894175508943397, |
|
"eval_rougeL": 0.3669928154707829, |
|
"eval_runtime": 6.9886, |
|
"eval_samples_per_second": 43.499, |
|
"eval_steps_per_second": 5.437, |
|
"step": 2142 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"step": 2142, |
|
"total_flos": 8918419636224000.0, |
|
"train_loss": 0.46412634738241815, |
|
"train_runtime": 1276.9352, |
|
"train_samples_per_second": 19.093, |
|
"train_steps_per_second": 2.396 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3060, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8918419636224000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|