|
{ |
|
"best_metric": 0.04986047372221947, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_gulf_aragpt2-base/checkpoint-4180", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 8360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.2068207710981369, |
|
"learning_rate": 4.896424167694204e-05, |
|
"loss": 1.1245, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.00522127887673706, |
|
"eval_loss": 0.060549940913915634, |
|
"eval_rouge1": 0.16261381712821865, |
|
"eval_rouge2": 0.016191591492833397, |
|
"eval_rougeL": 0.16017425267387253, |
|
"eval_runtime": 159.9696, |
|
"eval_samples_per_second": 10.452, |
|
"eval_steps_per_second": 1.306, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.19557742774486542, |
|
"learning_rate": 4.638717632552405e-05, |
|
"loss": 0.0632, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.011943726030788412, |
|
"eval_loss": 0.05519399791955948, |
|
"eval_rouge1": 0.2541122949630313, |
|
"eval_rouge2": 0.051699799633347324, |
|
"eval_rougeL": 0.2510765914158727, |
|
"eval_runtime": 221.3727, |
|
"eval_samples_per_second": 7.553, |
|
"eval_steps_per_second": 0.944, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.14342406392097473, |
|
"learning_rate": 4.3810110974106046e-05, |
|
"loss": 0.055, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.02252740132414782, |
|
"eval_loss": 0.05222811922430992, |
|
"eval_rouge1": 0.30471930149085674, |
|
"eval_rouge2": 0.08185853906387802, |
|
"eval_rougeL": 0.30154781860876523, |
|
"eval_runtime": 36.8724, |
|
"eval_samples_per_second": 45.346, |
|
"eval_steps_per_second": 5.668, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.16051243245601654, |
|
"learning_rate": 4.1233045622688044e-05, |
|
"loss": 0.0492, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.032582447132208606, |
|
"eval_loss": 0.051017943769693375, |
|
"eval_rouge1": 0.3346924293421615, |
|
"eval_rouge2": 0.10192760972807041, |
|
"eval_rougeL": 0.33180504017979595, |
|
"eval_runtime": 159.805, |
|
"eval_samples_per_second": 10.463, |
|
"eval_steps_per_second": 1.308, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.15151090919971466, |
|
"learning_rate": 3.8655980271270036e-05, |
|
"loss": 0.0444, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.03933306151343601, |
|
"eval_loss": 0.04986047372221947, |
|
"eval_rouge1": 0.35895878709405493, |
|
"eval_rouge2": 0.124247769713655, |
|
"eval_rougeL": 0.3572225001900925, |
|
"eval_runtime": 159.8295, |
|
"eval_samples_per_second": 10.461, |
|
"eval_steps_per_second": 1.308, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.11806467920541763, |
|
"learning_rate": 3.6078914919852034e-05, |
|
"loss": 0.0402, |
|
"step": 5016 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.04622708241683158, |
|
"eval_loss": 0.04995572566986084, |
|
"eval_rouge1": 0.3810393862934106, |
|
"eval_rouge2": 0.13792791296322293, |
|
"eval_rougeL": 0.3788340225664032, |
|
"eval_runtime": 159.7942, |
|
"eval_samples_per_second": 10.463, |
|
"eval_steps_per_second": 1.308, |
|
"step": 5016 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.15260820090770721, |
|
"learning_rate": 3.350184956843403e-05, |
|
"loss": 0.0366, |
|
"step": 5852 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.04988840099834184, |
|
"eval_loss": 0.05027288198471069, |
|
"eval_rouge1": 0.3961215133733164, |
|
"eval_rouge2": 0.15246703105477005, |
|
"eval_rougeL": 0.3937993288436519, |
|
"eval_runtime": 36.8733, |
|
"eval_samples_per_second": 45.344, |
|
"eval_steps_per_second": 5.668, |
|
"step": 5852 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.14294394850730896, |
|
"learning_rate": 3.092478421701603e-05, |
|
"loss": 0.0334, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.05614309403616892, |
|
"eval_loss": 0.050896577537059784, |
|
"eval_rouge1": 0.407117249999222, |
|
"eval_rouge2": 0.15984144200856937, |
|
"eval_rougeL": 0.40517229812025873, |
|
"eval_runtime": 37.0371, |
|
"eval_samples_per_second": 45.144, |
|
"eval_steps_per_second": 5.643, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.20682789385318756, |
|
"learning_rate": 2.8347718865598028e-05, |
|
"loss": 0.0307, |
|
"step": 7524 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.0606895848585085, |
|
"eval_loss": 0.05167483910918236, |
|
"eval_rouge1": 0.41011122575047865, |
|
"eval_rouge2": 0.1731982844904956, |
|
"eval_rougeL": 0.40855415076231816, |
|
"eval_runtime": 159.7295, |
|
"eval_samples_per_second": 10.468, |
|
"eval_steps_per_second": 1.308, |
|
"step": 7524 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.13990797102451324, |
|
"learning_rate": 2.5770653514180026e-05, |
|
"loss": 0.0283, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.06531719425427113, |
|
"eval_loss": 0.05382031202316284, |
|
"eval_rouge1": 0.41672477670989894, |
|
"eval_rouge2": 0.17546430438593302, |
|
"eval_rougeL": 0.4150315774802549, |
|
"eval_runtime": 159.9272, |
|
"eval_samples_per_second": 10.455, |
|
"eval_steps_per_second": 1.307, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 8360, |
|
"total_flos": 3.49347446784e+16, |
|
"train_loss": 0.15054923290270938, |
|
"train_runtime": 5668.5001, |
|
"train_samples_per_second": 23.586, |
|
"train_steps_per_second": 2.95 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.49347446784e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|