|
{ |
|
"best_metric": 3.359348773956299, |
|
"best_model_checkpoint": "BASH-Coder-Flan-T5-base/checkpoint-5614", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 8020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.556818181818182e-05, |
|
"loss": 4.3554, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 15.2363, |
|
"eval_loss": 3.59275484085083, |
|
"eval_rouge1": 22.7234, |
|
"eval_rouge2": 6.7951, |
|
"eval_rougeL": 22.0647, |
|
"eval_rougeLsum": 22.0744, |
|
"eval_runtime": 190.6891, |
|
"eval_samples_per_second": 9.675, |
|
"eval_steps_per_second": 0.608, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.0505050505050506e-05, |
|
"loss": 3.5335, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 15.168, |
|
"eval_loss": 3.4654388427734375, |
|
"eval_rouge1": 25.7842, |
|
"eval_rouge2": 8.5847, |
|
"eval_rougeL": 24.8207, |
|
"eval_rougeLsum": 24.8808, |
|
"eval_runtime": 187.8163, |
|
"eval_samples_per_second": 9.823, |
|
"eval_steps_per_second": 0.618, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.5441919191919196e-05, |
|
"loss": 3.3341, |
|
"step": 2406 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 15.6472, |
|
"eval_loss": 3.4077794551849365, |
|
"eval_rouge1": 25.5756, |
|
"eval_rouge2": 8.4456, |
|
"eval_rougeL": 24.706, |
|
"eval_rougeLsum": 24.7207, |
|
"eval_runtime": 187.2408, |
|
"eval_samples_per_second": 9.854, |
|
"eval_steps_per_second": 0.62, |
|
"step": 2406 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.037878787878788e-05, |
|
"loss": 3.2011, |
|
"step": 3208 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 16.2748, |
|
"eval_loss": 3.3789384365081787, |
|
"eval_rouge1": 26.0638, |
|
"eval_rouge2": 8.6853, |
|
"eval_rougeL": 25.0862, |
|
"eval_rougeLsum": 25.1223, |
|
"eval_runtime": 221.9787, |
|
"eval_samples_per_second": 8.312, |
|
"eval_steps_per_second": 0.523, |
|
"step": 3208 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.5315656565656566e-05, |
|
"loss": 3.1059, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 15.7366, |
|
"eval_loss": 3.362180709838867, |
|
"eval_rouge1": 26.7254, |
|
"eval_rouge2": 9.1138, |
|
"eval_rougeL": 25.7985, |
|
"eval_rougeLsum": 25.8521, |
|
"eval_runtime": 195.6438, |
|
"eval_samples_per_second": 9.43, |
|
"eval_steps_per_second": 0.593, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.0252525252525253e-05, |
|
"loss": 3.0336, |
|
"step": 4812 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 16.548, |
|
"eval_loss": 3.366245746612549, |
|
"eval_rouge1": 26.4655, |
|
"eval_rouge2": 9.1283, |
|
"eval_rougeL": 25.4587, |
|
"eval_rougeLsum": 25.5112, |
|
"eval_runtime": 221.12, |
|
"eval_samples_per_second": 8.344, |
|
"eval_steps_per_second": 0.525, |
|
"step": 4812 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.518939393939394e-05, |
|
"loss": 2.9727, |
|
"step": 5614 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 15.5431, |
|
"eval_loss": 3.359348773956299, |
|
"eval_rouge1": 26.8211, |
|
"eval_rouge2": 9.3045, |
|
"eval_rougeL": 25.8497, |
|
"eval_rougeLsum": 25.8772, |
|
"eval_runtime": 192.8568, |
|
"eval_samples_per_second": 9.567, |
|
"eval_steps_per_second": 0.601, |
|
"step": 5614 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.0126262626262626e-05, |
|
"loss": 2.9298, |
|
"step": 6416 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 15.916, |
|
"eval_loss": 3.364309549331665, |
|
"eval_rouge1": 26.8932, |
|
"eval_rouge2": 9.3537, |
|
"eval_rougeL": 25.9444, |
|
"eval_rougeLsum": 26.0088, |
|
"eval_runtime": 199.9625, |
|
"eval_samples_per_second": 9.227, |
|
"eval_steps_per_second": 0.58, |
|
"step": 6416 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.063131313131313e-06, |
|
"loss": 2.9005, |
|
"step": 7218 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 15.71, |
|
"eval_loss": 3.3606483936309814, |
|
"eval_rouge1": 27.1732, |
|
"eval_rouge2": 9.5661, |
|
"eval_rougeL": 26.1198, |
|
"eval_rougeLsum": 26.1515, |
|
"eval_runtime": 186.8674, |
|
"eval_samples_per_second": 9.873, |
|
"eval_steps_per_second": 0.621, |
|
"step": 7218 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.8846, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 15.5767, |
|
"eval_loss": 3.3608038425445557, |
|
"eval_rouge1": 27.0741, |
|
"eval_rouge2": 9.3824, |
|
"eval_rougeL": 26.133, |
|
"eval_rougeLsum": 26.1559, |
|
"eval_runtime": 189.991, |
|
"eval_samples_per_second": 9.711, |
|
"eval_steps_per_second": 0.611, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 8020, |
|
"total_flos": 2.196016829300736e+16, |
|
"train_loss": 3.2251040794011065, |
|
"train_runtime": 10442.016, |
|
"train_samples_per_second": 6.142, |
|
"train_steps_per_second": 0.768 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8020, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.196016829300736e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|