|
{ |
|
"best_metric": 0.3495321273803711, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_gulf_03/checkpoint-1672", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 5852, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.8581334352493286, |
|
"learning_rate": 4.896424167694204e-05, |
|
"loss": 1.1759, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.41556477975575595, |
|
"eval_loss": 0.37735649943351746, |
|
"eval_rouge1": 0.6364308285085805, |
|
"eval_rouge2": 0.36296002645709646, |
|
"eval_rougeL": 0.6351682159497727, |
|
"eval_runtime": 16.4559, |
|
"eval_samples_per_second": 101.605, |
|
"eval_steps_per_second": 12.701, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.0768027305603027, |
|
"learning_rate": 4.638717632552405e-05, |
|
"loss": 0.2824, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.43550162080921356, |
|
"eval_loss": 0.3495321273803711, |
|
"eval_rouge1": 0.6807566585005222, |
|
"eval_rouge2": 0.42184236623206983, |
|
"eval_rougeL": 0.6798697145255593, |
|
"eval_runtime": 12.3396, |
|
"eval_samples_per_second": 135.499, |
|
"eval_steps_per_second": 16.937, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.4890309572219849, |
|
"learning_rate": 4.3810110974106046e-05, |
|
"loss": 0.1846, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.44798984557044996, |
|
"eval_loss": 0.35791030526161194, |
|
"eval_rouge1": 0.6988772317351744, |
|
"eval_rouge2": 0.4541195944585126, |
|
"eval_rougeL": 0.6983599524493045, |
|
"eval_runtime": 13.092, |
|
"eval_samples_per_second": 127.712, |
|
"eval_steps_per_second": 15.964, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.9949209690093994, |
|
"learning_rate": 4.1233045622688044e-05, |
|
"loss": 0.1351, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.4449484201171957, |
|
"eval_loss": 0.36742404103279114, |
|
"eval_rouge1": 0.6992663461784911, |
|
"eval_rouge2": 0.45557653256736347, |
|
"eval_rougeL": 0.6984837283971637, |
|
"eval_runtime": 12.3128, |
|
"eval_samples_per_second": 135.794, |
|
"eval_steps_per_second": 16.974, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.245552897453308, |
|
"learning_rate": 3.8655980271270036e-05, |
|
"loss": 0.11, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.4488174409372482, |
|
"eval_loss": 0.3791240453720093, |
|
"eval_rouge1": 0.7066508462378398, |
|
"eval_rouge2": 0.46813923665655116, |
|
"eval_rougeL": 0.7055578408590141, |
|
"eval_runtime": 18.2521, |
|
"eval_samples_per_second": 91.606, |
|
"eval_steps_per_second": 11.451, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.253645896911621, |
|
"learning_rate": 3.6078914919852034e-05, |
|
"loss": 0.0981, |
|
"step": 5016 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.4582553582864403, |
|
"eval_loss": 0.39224255084991455, |
|
"eval_rouge1": 0.7135223253955782, |
|
"eval_rouge2": 0.4789126140106226, |
|
"eval_rougeL": 0.7129027733719826, |
|
"eval_runtime": 65.9112, |
|
"eval_samples_per_second": 25.367, |
|
"eval_steps_per_second": 3.171, |
|
"step": 5016 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.6491497755050659, |
|
"learning_rate": 3.350184956843403e-05, |
|
"loss": 0.0917, |
|
"step": 5852 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.4567065309085701, |
|
"eval_loss": 0.3964388072490692, |
|
"eval_rouge1": 0.7126519996998933, |
|
"eval_rouge2": 0.47787123299802714, |
|
"eval_rougeL": 0.7120354739021442, |
|
"eval_runtime": 21.7898, |
|
"eval_samples_per_second": 76.733, |
|
"eval_steps_per_second": 9.592, |
|
"step": 5852 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 5852, |
|
"total_flos": 1.086463725010944e+16, |
|
"train_loss": 0.29683067467794466, |
|
"train_runtime": 4067.7631, |
|
"train_samples_per_second": 32.868, |
|
"train_steps_per_second": 4.11 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.086463725010944e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|