|
{ |
|
"best_metric": 0.4183219075202942, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_gulf/checkpoint-3352", |
|
"epoch": 14.0, |
|
"eval_steps": 500, |
|
"global_step": 11732, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.1270569562911987, |
|
"learning_rate": 4.8960639606396064e-05, |
|
"loss": 1.9575, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.17899151773967856, |
|
"eval_loss": 0.4805418848991394, |
|
"eval_rouge1": 0.5300700267573193, |
|
"eval_rouge2": 0.25846419107204655, |
|
"eval_rougeL": 0.5287371290578213, |
|
"eval_runtime": 26.4765, |
|
"eval_samples_per_second": 63.264, |
|
"eval_steps_per_second": 7.932, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.8918192982673645, |
|
"learning_rate": 4.6383763837638376e-05, |
|
"loss": 0.422, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.2111987576275603, |
|
"eval_loss": 0.4326799809932709, |
|
"eval_rouge1": 0.5789346222407353, |
|
"eval_rouge2": 0.3115433580134526, |
|
"eval_rougeL": 0.5777838651600141, |
|
"eval_runtime": 36.5489, |
|
"eval_samples_per_second": 45.829, |
|
"eval_steps_per_second": 5.746, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.1533405780792236, |
|
"learning_rate": 4.380688806888069e-05, |
|
"loss": 0.331, |
|
"step": 2514 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.22887638181669745, |
|
"eval_loss": 0.41845184564590454, |
|
"eval_rouge1": 0.6066983915980586, |
|
"eval_rouge2": 0.34626660980048474, |
|
"eval_rougeL": 0.6055721645591957, |
|
"eval_runtime": 25.3383, |
|
"eval_samples_per_second": 66.106, |
|
"eval_steps_per_second": 8.288, |
|
"step": 2514 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.3615330457687378, |
|
"learning_rate": 4.123001230012301e-05, |
|
"loss": 0.2638, |
|
"step": 3352 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.23736106874390867, |
|
"eval_loss": 0.4183219075202942, |
|
"eval_rouge1": 0.6213217050496613, |
|
"eval_rouge2": 0.3629928034660268, |
|
"eval_rougeL": 0.6194945230812778, |
|
"eval_runtime": 16.1799, |
|
"eval_samples_per_second": 103.524, |
|
"eval_steps_per_second": 12.979, |
|
"step": 3352 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.29606294631958, |
|
"learning_rate": 3.865313653136531e-05, |
|
"loss": 0.2131, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.24225546513762886, |
|
"eval_loss": 0.42484816908836365, |
|
"eval_rouge1": 0.6280394265150125, |
|
"eval_rouge2": 0.3718667234148674, |
|
"eval_rougeL": 0.6264526155291095, |
|
"eval_runtime": 25.4283, |
|
"eval_samples_per_second": 65.872, |
|
"eval_steps_per_second": 8.259, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.9162726402282715, |
|
"learning_rate": 3.6076260762607624e-05, |
|
"loss": 0.1756, |
|
"step": 5028 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.25110200021461276, |
|
"eval_loss": 0.4322951138019562, |
|
"eval_rouge1": 0.6329099900871186, |
|
"eval_rouge2": 0.3818443278310246, |
|
"eval_rougeL": 0.6311516008372622, |
|
"eval_runtime": 13.3457, |
|
"eval_samples_per_second": 125.508, |
|
"eval_steps_per_second": 15.735, |
|
"step": 5028 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.2213388681411743, |
|
"learning_rate": 3.349938499384994e-05, |
|
"loss": 0.148, |
|
"step": 5866 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.24584223218153298, |
|
"eval_loss": 0.4369480013847351, |
|
"eval_rouge1": 0.6346640667719812, |
|
"eval_rouge2": 0.3890963668887653, |
|
"eval_rougeL": 0.6326978702709232, |
|
"eval_runtime": 6.3313, |
|
"eval_samples_per_second": 264.559, |
|
"eval_steps_per_second": 33.169, |
|
"step": 5866 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.774276852607727, |
|
"learning_rate": 3.092250922509225e-05, |
|
"loss": 0.1285, |
|
"step": 6704 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.2499016538097514, |
|
"eval_loss": 0.4447513520717621, |
|
"eval_rouge1": 0.6380175687280993, |
|
"eval_rouge2": 0.38845767776215345, |
|
"eval_rougeL": 0.6368182514454324, |
|
"eval_runtime": 5.4681, |
|
"eval_samples_per_second": 306.322, |
|
"eval_steps_per_second": 38.405, |
|
"step": 6704 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.779574990272522, |
|
"learning_rate": 2.8345633456334564e-05, |
|
"loss": 0.1152, |
|
"step": 7542 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.2538776211394201, |
|
"eval_loss": 0.45251065492630005, |
|
"eval_rouge1": 0.6407467681145795, |
|
"eval_rouge2": 0.3976657405838769, |
|
"eval_rougeL": 0.6393461853572592, |
|
"eval_runtime": 6.3546, |
|
"eval_samples_per_second": 263.59, |
|
"eval_steps_per_second": 33.047, |
|
"step": 7542 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.159719705581665, |
|
"learning_rate": 2.5768757687576876e-05, |
|
"loss": 0.105, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.2555335148067654, |
|
"eval_loss": 0.45896273851394653, |
|
"eval_rouge1": 0.6440840106866277, |
|
"eval_rouge2": 0.3998364161836553, |
|
"eval_rougeL": 0.6425824451349154, |
|
"eval_runtime": 5.4849, |
|
"eval_samples_per_second": 305.385, |
|
"eval_steps_per_second": 38.287, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.923600435256958, |
|
"learning_rate": 2.3191881918819188e-05, |
|
"loss": 0.0982, |
|
"step": 9218 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 0.25802091175426106, |
|
"eval_loss": 0.46457362174987793, |
|
"eval_rouge1": 0.6455262594531862, |
|
"eval_rouge2": 0.40187791063551404, |
|
"eval_rougeL": 0.6445205006389869, |
|
"eval_runtime": 6.4432, |
|
"eval_samples_per_second": 259.965, |
|
"eval_steps_per_second": 32.593, |
|
"step": 9218 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.9120431542396545, |
|
"learning_rate": 2.0615006150061504e-05, |
|
"loss": 0.0936, |
|
"step": 10056 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 0.257202175451904, |
|
"eval_loss": 0.470233678817749, |
|
"eval_rouge1": 0.6456936789584837, |
|
"eval_rouge2": 0.4045772651416589, |
|
"eval_rougeL": 0.6445820832270409, |
|
"eval_runtime": 13.7399, |
|
"eval_samples_per_second": 121.907, |
|
"eval_steps_per_second": 15.284, |
|
"step": 10056 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.6435267329216003, |
|
"learning_rate": 1.8038130381303812e-05, |
|
"loss": 0.0899, |
|
"step": 10894 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 0.25765634141938715, |
|
"eval_loss": 0.47367072105407715, |
|
"eval_rouge1": 0.6488279078084271, |
|
"eval_rouge2": 0.40534740561340493, |
|
"eval_rougeL": 0.6478436883847489, |
|
"eval_runtime": 17.2761, |
|
"eval_samples_per_second": 96.955, |
|
"eval_steps_per_second": 12.156, |
|
"step": 10894 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.7371336221694946, |
|
"learning_rate": 1.5461254612546124e-05, |
|
"loss": 0.0871, |
|
"step": 11732 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 0.2606034683461693, |
|
"eval_loss": 0.4779162108898163, |
|
"eval_rouge1": 0.6491650390141908, |
|
"eval_rouge2": 0.40618585329089035, |
|
"eval_rougeL": 0.6482221321240169, |
|
"eval_runtime": 5.371, |
|
"eval_samples_per_second": 311.859, |
|
"eval_steps_per_second": 39.099, |
|
"step": 11732 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"step": 11732, |
|
"total_flos": 6127298150400000.0, |
|
"train_loss": 0.03383859399638808, |
|
"train_runtime": 1080.9024, |
|
"train_samples_per_second": 123.97, |
|
"train_steps_per_second": 15.506 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6127298150400000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|