|
{ |
|
"best_metric": 0.0678805559873581, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_arabiangpt0.3/checkpoint-101256", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 270016, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.5094387531280518, |
|
"learning_rate": 4.753520917958906e-05, |
|
"loss": 0.0845, |
|
"step": 33752 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.1360083183193832, |
|
"eval_loss": 0.0708920955657959, |
|
"eval_rouge1": 0.48412628006639347, |
|
"eval_rouge2": 0.2595444928790475, |
|
"eval_rougeL": 0.4811152550714598, |
|
"eval_runtime": 2076.8563, |
|
"eval_samples_per_second": 16.249, |
|
"eval_steps_per_second": 4.062, |
|
"step": 33752 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.2100098133087158, |
|
"learning_rate": 4.503335606487384e-05, |
|
"loss": 0.0638, |
|
"step": 67504 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.15568253204889895, |
|
"eval_loss": 0.06831026077270508, |
|
"eval_rouge1": 0.516649446787459, |
|
"eval_rouge2": 0.2948018415714037, |
|
"eval_rougeL": 0.514024802556861, |
|
"eval_runtime": 2012.7332, |
|
"eval_samples_per_second": 16.766, |
|
"eval_steps_per_second": 4.192, |
|
"step": 67504 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.4080255627632141, |
|
"learning_rate": 4.253150295015863e-05, |
|
"loss": 0.0546, |
|
"step": 101256 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.16813566870280167, |
|
"eval_loss": 0.0678805559873581, |
|
"eval_rouge1": 0.5342839149814531, |
|
"eval_rouge2": 0.3157655613133348, |
|
"eval_rougeL": 0.5319180103197795, |
|
"eval_runtime": 1952.412, |
|
"eval_samples_per_second": 17.284, |
|
"eval_steps_per_second": 4.321, |
|
"step": 101256 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.40639498829841614, |
|
"learning_rate": 4.002964983544342e-05, |
|
"loss": 0.0468, |
|
"step": 135008 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.18301542857870237, |
|
"eval_loss": 0.06886506825685501, |
|
"eval_rouge1": 0.5476940640774866, |
|
"eval_rouge2": 0.33203989935219014, |
|
"eval_rougeL": 0.5452209555156204, |
|
"eval_runtime": 1954.1613, |
|
"eval_samples_per_second": 17.269, |
|
"eval_steps_per_second": 4.317, |
|
"step": 135008 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.5491040945053101, |
|
"learning_rate": 3.7527796720728204e-05, |
|
"loss": 0.0401, |
|
"step": 168760 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.19405846737344298, |
|
"eval_loss": 0.0706464946269989, |
|
"eval_rouge1": 0.5548980705277954, |
|
"eval_rouge2": 0.34218497796474945, |
|
"eval_rougeL": 0.55265645352457, |
|
"eval_runtime": 2015.2556, |
|
"eval_samples_per_second": 16.745, |
|
"eval_steps_per_second": 4.187, |
|
"step": 168760 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.2615252435207367, |
|
"learning_rate": 3.502594360601299e-05, |
|
"loss": 0.0345, |
|
"step": 202512 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.20164731422462528, |
|
"eval_loss": 0.07342757284641266, |
|
"eval_rouge1": 0.5579768475101489, |
|
"eval_rouge2": 0.34858517959726787, |
|
"eval_rougeL": 0.5558276150233377, |
|
"eval_runtime": 2028.3934, |
|
"eval_samples_per_second": 16.637, |
|
"eval_steps_per_second": 4.159, |
|
"step": 202512 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.24876318871974945, |
|
"learning_rate": 3.252409049129777e-05, |
|
"loss": 0.0302, |
|
"step": 236264 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.2083227447079373, |
|
"eval_loss": 0.0768970176577568, |
|
"eval_rouge1": 0.5600411658774835, |
|
"eval_rouge2": 0.3535281728359022, |
|
"eval_rougeL": 0.5576670140997706, |
|
"eval_runtime": 2016.181, |
|
"eval_samples_per_second": 16.738, |
|
"eval_steps_per_second": 4.185, |
|
"step": 236264 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.5295557379722595, |
|
"learning_rate": 3.0022237376582564e-05, |
|
"loss": 0.0269, |
|
"step": 270016 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.21123631944078344, |
|
"eval_loss": 0.07967726141214371, |
|
"eval_rouge1": 0.5591904067243532, |
|
"eval_rouge2": 0.3545914045960048, |
|
"eval_rougeL": 0.5569180306629817, |
|
"eval_runtime": 2017.1957, |
|
"eval_samples_per_second": 16.729, |
|
"eval_steps_per_second": 4.183, |
|
"step": 270016 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 270016, |
|
"total_flos": 2.0060826554471547e+18, |
|
"train_loss": 0.04765514560038514, |
|
"train_runtime": 203650.1496, |
|
"train_samples_per_second": 13.259, |
|
"train_steps_per_second": 3.315 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 675040, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.0060826554471547e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|