|
{ |
|
"best_metric": 0.20781023800373077, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_irq_aragpt2-base/checkpoint-12684", |
|
"epoch": 17.0, |
|
"eval_steps": 500, |
|
"global_step": 17969, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.21245746314525604, |
|
"learning_rate": 4.865067829457365e-05, |
|
"loss": 0.9334, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.003236761532472025, |
|
"eval_loss": 0.24612107872962952, |
|
"eval_rouge1": 0.1592859430436859, |
|
"eval_rouge2": 0.01850245530694966, |
|
"eval_rougeL": 0.15327183350590745, |
|
"eval_runtime": 47.2784, |
|
"eval_samples_per_second": 44.693, |
|
"eval_steps_per_second": 5.605, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.1900666505098343, |
|
"learning_rate": 4.609011627906977e-05, |
|
"loss": 0.0868, |
|
"step": 2114 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.01485892408066134, |
|
"eval_loss": 0.2332119345664978, |
|
"eval_rouge1": 0.24547054015158856, |
|
"eval_rouge2": 0.05101888171169662, |
|
"eval_rougeL": 0.23941488622001442, |
|
"eval_runtime": 106.4482, |
|
"eval_samples_per_second": 19.85, |
|
"eval_steps_per_second": 2.489, |
|
"step": 2114 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.17272518575191498, |
|
"learning_rate": 4.3529554263565894e-05, |
|
"loss": 0.0767, |
|
"step": 3171 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.025242358861939478, |
|
"eval_loss": 0.23418210446834564, |
|
"eval_rouge1": 0.2960794609105263, |
|
"eval_rouge2": 0.078188861596172, |
|
"eval_rougeL": 0.2910032597311343, |
|
"eval_runtime": 46.6571, |
|
"eval_samples_per_second": 45.288, |
|
"eval_steps_per_second": 5.68, |
|
"step": 3171 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.26465269923210144, |
|
"learning_rate": 4.096899224806201e-05, |
|
"loss": 0.0696, |
|
"step": 4228 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.040436621142289915, |
|
"eval_loss": 0.2277713567018509, |
|
"eval_rouge1": 0.3300020560602195, |
|
"eval_rouge2": 0.10496894572689558, |
|
"eval_rougeL": 0.32523455556499214, |
|
"eval_runtime": 46.6771, |
|
"eval_samples_per_second": 45.268, |
|
"eval_steps_per_second": 5.677, |
|
"step": 4228 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.19601380825042725, |
|
"learning_rate": 3.840843023255814e-05, |
|
"loss": 0.0636, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.05166825934118283, |
|
"eval_loss": 0.22189512848854065, |
|
"eval_rouge1": 0.3535589938019066, |
|
"eval_rouge2": 0.12152558383102884, |
|
"eval_rougeL": 0.3480324766784169, |
|
"eval_runtime": 76.0841, |
|
"eval_samples_per_second": 27.772, |
|
"eval_steps_per_second": 3.483, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.22665895521640778, |
|
"learning_rate": 3.5847868217054265e-05, |
|
"loss": 0.0587, |
|
"step": 6342 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.05899956949346335, |
|
"eval_loss": 0.22368572652339935, |
|
"eval_rouge1": 0.3654354164962346, |
|
"eval_rouge2": 0.13482081314433186, |
|
"eval_rougeL": 0.3611275649771175, |
|
"eval_runtime": 108.2192, |
|
"eval_samples_per_second": 19.525, |
|
"eval_steps_per_second": 2.449, |
|
"step": 6342 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.1510099470615387, |
|
"learning_rate": 3.328730620155039e-05, |
|
"loss": 0.0542, |
|
"step": 7399 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.06674675327067288, |
|
"eval_loss": 0.21935084462165833, |
|
"eval_rouge1": 0.375454032579127, |
|
"eval_rouge2": 0.1440449212655856, |
|
"eval_rougeL": 0.37123789211821234, |
|
"eval_runtime": 169.6548, |
|
"eval_samples_per_second": 12.455, |
|
"eval_steps_per_second": 1.562, |
|
"step": 7399 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.17433832585811615, |
|
"learning_rate": 3.0726744186046517e-05, |
|
"loss": 0.0502, |
|
"step": 8456 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.07150751079471653, |
|
"eval_loss": 0.20800183713436127, |
|
"eval_rouge1": 0.38017302561406896, |
|
"eval_rouge2": 0.1520586373563908, |
|
"eval_rougeL": 0.3761265584375465, |
|
"eval_runtime": 59.9519, |
|
"eval_samples_per_second": 35.245, |
|
"eval_steps_per_second": 4.42, |
|
"step": 8456 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.23176324367523193, |
|
"learning_rate": 2.816618217054264e-05, |
|
"loss": 0.0468, |
|
"step": 9513 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.07701622828998697, |
|
"eval_loss": 0.21230556070804596, |
|
"eval_rouge1": 0.39305333263185294, |
|
"eval_rouge2": 0.16160655664305132, |
|
"eval_rougeL": 0.3889099045386816, |
|
"eval_runtime": 47.0282, |
|
"eval_samples_per_second": 44.93, |
|
"eval_steps_per_second": 5.635, |
|
"step": 9513 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.22061537206172943, |
|
"learning_rate": 2.560562015503876e-05, |
|
"loss": 0.0438, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.08116803822206035, |
|
"eval_loss": 0.21123112738132477, |
|
"eval_rouge1": 0.39211790695723037, |
|
"eval_rouge2": 0.16476729267221837, |
|
"eval_rougeL": 0.3884162333833129, |
|
"eval_runtime": 52.3744, |
|
"eval_samples_per_second": 40.344, |
|
"eval_steps_per_second": 5.06, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.29454106092453003, |
|
"learning_rate": 2.3045058139534884e-05, |
|
"loss": 0.0408, |
|
"step": 11627 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 0.08163331720700502, |
|
"eval_loss": 0.21018122136592865, |
|
"eval_rouge1": 0.39673991981574686, |
|
"eval_rouge2": 0.16533579969575996, |
|
"eval_rougeL": 0.3935895133910881, |
|
"eval_runtime": 169.7591, |
|
"eval_samples_per_second": 12.447, |
|
"eval_steps_per_second": 1.561, |
|
"step": 11627 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.20639048516750336, |
|
"learning_rate": 2.0484496124031007e-05, |
|
"loss": 0.0384, |
|
"step": 12684 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 0.08445355735141684, |
|
"eval_loss": 0.20781023800373077, |
|
"eval_rouge1": 0.40183732359428986, |
|
"eval_rouge2": 0.17106600485502385, |
|
"eval_rougeL": 0.39778146195081354, |
|
"eval_runtime": 169.7164, |
|
"eval_samples_per_second": 12.45, |
|
"eval_steps_per_second": 1.561, |
|
"step": 12684 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.206428661942482, |
|
"learning_rate": 1.7923934108527132e-05, |
|
"loss": 0.0363, |
|
"step": 13741 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 0.08696571570490616, |
|
"eval_loss": 0.2144923061132431, |
|
"eval_rouge1": 0.4023135478695373, |
|
"eval_rouge2": 0.1719689510122407, |
|
"eval_rougeL": 0.39862674092398764, |
|
"eval_runtime": 169.681, |
|
"eval_samples_per_second": 12.453, |
|
"eval_steps_per_second": 1.562, |
|
"step": 13741 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.1995713710784912, |
|
"learning_rate": 1.5363372093023258e-05, |
|
"loss": 0.0343, |
|
"step": 14798 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 0.08783833270174173, |
|
"eval_loss": 0.21651192009449005, |
|
"eval_rouge1": 0.406251961398795, |
|
"eval_rouge2": 0.17568144009642983, |
|
"eval_rougeL": 0.4022547036607911, |
|
"eval_runtime": 169.5372, |
|
"eval_samples_per_second": 12.463, |
|
"eval_steps_per_second": 1.563, |
|
"step": 14798 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.22589825093746185, |
|
"learning_rate": 1.280281007751938e-05, |
|
"loss": 0.0327, |
|
"step": 15855 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 0.09203652391095736, |
|
"eval_loss": 0.21689023077487946, |
|
"eval_rouge1": 0.4049011529575125, |
|
"eval_rouge2": 0.17920037507226205, |
|
"eval_rougeL": 0.4013754949817908, |
|
"eval_runtime": 169.7215, |
|
"eval_samples_per_second": 12.45, |
|
"eval_steps_per_second": 1.561, |
|
"step": 15855 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.2390994280576706, |
|
"learning_rate": 1.0242248062015503e-05, |
|
"loss": 0.0313, |
|
"step": 16912 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 0.09196638031992929, |
|
"eval_loss": 0.21753403544425964, |
|
"eval_rouge1": 0.4078242153735353, |
|
"eval_rouge2": 0.18209517089320632, |
|
"eval_rougeL": 0.40483345816401317, |
|
"eval_runtime": 169.6048, |
|
"eval_samples_per_second": 12.458, |
|
"eval_steps_per_second": 1.562, |
|
"step": 16912 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.31637752056121826, |
|
"learning_rate": 7.681686046511629e-06, |
|
"loss": 0.0301, |
|
"step": 17969 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 0.09441216110381762, |
|
"eval_loss": 0.2191299945116043, |
|
"eval_rouge1": 0.41033944929551286, |
|
"eval_rouge2": 0.18390791273310297, |
|
"eval_rougeL": 0.40661041910814566, |
|
"eval_runtime": 169.743, |
|
"eval_samples_per_second": 12.448, |
|
"eval_steps_per_second": 1.561, |
|
"step": 17969 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"step": 17969, |
|
"total_flos": 7.5086968651776e+16, |
|
"train_loss": 0.10163650903314878, |
|
"train_runtime": 11373.3667, |
|
"train_samples_per_second": 14.863, |
|
"train_steps_per_second": 1.859 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 21140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.5086968651776e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|