|
{ |
|
"best_metric": 1.1622098684310913, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_irq_1.5/checkpoint-5285", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 10570, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.9448983073234558, |
|
"learning_rate": 4.865067829457365e-05, |
|
"loss": 2.1843, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.32059447298359617, |
|
"eval_loss": 1.250288486480713, |
|
"eval_rouge1": 0.4317992213146498, |
|
"eval_rouge2": 0.17666011159565037, |
|
"eval_rougeL": 0.42939906088943486, |
|
"eval_runtime": 57.4393, |
|
"eval_samples_per_second": 36.787, |
|
"eval_steps_per_second": 4.614, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.9019042253494263, |
|
"learning_rate": 4.609011627906977e-05, |
|
"loss": 0.6485, |
|
"step": 2114 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.33426327412457646, |
|
"eval_loss": 1.1708217859268188, |
|
"eval_rouge1": 0.467300193070487, |
|
"eval_rouge2": 0.20644532227230164, |
|
"eval_rougeL": 0.4650745009842402, |
|
"eval_runtime": 57.7779, |
|
"eval_samples_per_second": 36.571, |
|
"eval_steps_per_second": 4.587, |
|
"step": 2114 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.9029839038848877, |
|
"learning_rate": 4.3529554263565894e-05, |
|
"loss": 0.5688, |
|
"step": 3171 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.34132680263228726, |
|
"eval_loss": 1.1971228122711182, |
|
"eval_rouge1": 0.48735678632841767, |
|
"eval_rouge2": 0.2243137926840742, |
|
"eval_rougeL": 0.4850815732515227, |
|
"eval_runtime": 57.4731, |
|
"eval_samples_per_second": 36.765, |
|
"eval_steps_per_second": 4.611, |
|
"step": 3171 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.0017938613891602, |
|
"learning_rate": 4.096899224806201e-05, |
|
"loss": 0.5047, |
|
"step": 4228 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.35003996586498176, |
|
"eval_loss": 1.197810173034668, |
|
"eval_rouge1": 0.5054128473867857, |
|
"eval_rouge2": 0.2430653416823758, |
|
"eval_rougeL": 0.5030122857034254, |
|
"eval_runtime": 72.8161, |
|
"eval_samples_per_second": 29.018, |
|
"eval_steps_per_second": 3.639, |
|
"step": 4228 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.0304298400878906, |
|
"learning_rate": 3.840843023255814e-05, |
|
"loss": 0.4482, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.3502421699944588, |
|
"eval_loss": 1.1622098684310913, |
|
"eval_rouge1": 0.5096445443599783, |
|
"eval_rouge2": 0.24692932072035692, |
|
"eval_rougeL": 0.5075533735060334, |
|
"eval_runtime": 61.0964, |
|
"eval_samples_per_second": 34.585, |
|
"eval_steps_per_second": 4.337, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.1523360013961792, |
|
"learning_rate": 3.5847868217054265e-05, |
|
"loss": 0.3971, |
|
"step": 6342 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.3536202464127507, |
|
"eval_loss": 1.2058446407318115, |
|
"eval_rouge1": 0.5150055128188905, |
|
"eval_rouge2": 0.25352326254776286, |
|
"eval_rougeL": 0.5127629511333613, |
|
"eval_runtime": 57.6879, |
|
"eval_samples_per_second": 36.628, |
|
"eval_steps_per_second": 4.594, |
|
"step": 6342 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.8643929958343506, |
|
"learning_rate": 3.328730620155039e-05, |
|
"loss": 0.3513, |
|
"step": 7399 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.3518598607895297, |
|
"eval_loss": 1.2158491611480713, |
|
"eval_rouge1": 0.5180154682612823, |
|
"eval_rouge2": 0.25664229580394476, |
|
"eval_rougeL": 0.5154649421568362, |
|
"eval_runtime": 121.1214, |
|
"eval_samples_per_second": 17.445, |
|
"eval_steps_per_second": 2.188, |
|
"step": 7399 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.1156113147735596, |
|
"learning_rate": 3.0726744186046517e-05, |
|
"loss": 0.3105, |
|
"step": 8456 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.35342870570348306, |
|
"eval_loss": 1.2344255447387695, |
|
"eval_rouge1": 0.5225097884447725, |
|
"eval_rouge2": 0.2629872301742399, |
|
"eval_rougeL": 0.5202162260924711, |
|
"eval_runtime": 57.6564, |
|
"eval_samples_per_second": 36.648, |
|
"eval_steps_per_second": 4.596, |
|
"step": 8456 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.2186470031738281, |
|
"learning_rate": 2.816618217054264e-05, |
|
"loss": 0.2743, |
|
"step": 9513 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.35543765162196594, |
|
"eval_loss": 1.2988064289093018, |
|
"eval_rouge1": 0.5277394207696515, |
|
"eval_rouge2": 0.26929133120484805, |
|
"eval_rougeL": 0.5249418411792439, |
|
"eval_runtime": 57.8354, |
|
"eval_samples_per_second": 36.535, |
|
"eval_steps_per_second": 4.582, |
|
"step": 9513 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.028747797012329, |
|
"learning_rate": 2.560562015503876e-05, |
|
"loss": 0.2426, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.3550745821213054, |
|
"eval_loss": 1.3222092390060425, |
|
"eval_rouge1": 0.5235234737619403, |
|
"eval_rouge2": 0.26498166415837476, |
|
"eval_rougeL": 0.5206776898832342, |
|
"eval_runtime": 57.289, |
|
"eval_samples_per_second": 36.883, |
|
"eval_steps_per_second": 4.626, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 10570, |
|
"total_flos": 9.5780710219776e+16, |
|
"train_loss": 0.5930478791640181, |
|
"train_runtime": 28339.5203, |
|
"train_samples_per_second": 5.965, |
|
"train_steps_per_second": 0.746 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 21140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.5780710219776e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|