|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.994241842610364, |
|
"eval_steps": 500, |
|
"global_step": 2340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.9034, |
|
"eval_gen_len": 26.296727272727274, |
|
"eval_loss": 1.8257849216461182, |
|
"eval_precision": 0.9049, |
|
"eval_recall": 0.9023, |
|
"eval_rouge1": 0.4338, |
|
"eval_rouge2": 0.1906, |
|
"eval_rougeL": 0.3496, |
|
"eval_rougeLsum": 0.3498, |
|
"eval_runtime": 513.5292, |
|
"eval_samples_per_second": 5.355, |
|
"eval_steps_per_second": 0.335, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.3589743589743592e-05, |
|
"loss": 2.1621, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.9054, |
|
"eval_gen_len": 26.272727272727273, |
|
"eval_loss": 1.7537195682525635, |
|
"eval_precision": 0.9068, |
|
"eval_recall": 0.9044, |
|
"eval_rouge1": 0.4449, |
|
"eval_rouge2": 0.2005, |
|
"eval_rougeL": 0.3633, |
|
"eval_rougeLsum": 0.3633, |
|
"eval_runtime": 505.2109, |
|
"eval_samples_per_second": 5.443, |
|
"eval_steps_per_second": 0.34, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.17948717948718e-06, |
|
"loss": 1.8794, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.9066, |
|
"eval_gen_len": 26.434545454545454, |
|
"eval_loss": 1.726783275604248, |
|
"eval_precision": 0.9078, |
|
"eval_recall": 0.9058, |
|
"eval_rouge1": 0.4518, |
|
"eval_rouge2": 0.2061, |
|
"eval_rougeL": 0.3696, |
|
"eval_rougeLsum": 0.3695, |
|
"eval_runtime": 507.6007, |
|
"eval_samples_per_second": 5.418, |
|
"eval_steps_per_second": 0.339, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 1.8271, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.9069, |
|
"eval_gen_len": 26.39709090909091, |
|
"eval_loss": 1.7157036066055298, |
|
"eval_precision": 0.9082, |
|
"eval_recall": 0.906, |
|
"eval_rouge1": 0.4539, |
|
"eval_rouge2": 0.2075, |
|
"eval_rougeL": 0.3716, |
|
"eval_rougeLsum": 0.3714, |
|
"eval_runtime": 505.7397, |
|
"eval_samples_per_second": 5.438, |
|
"eval_steps_per_second": 0.34, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.9074, |
|
"eval_gen_len": 26.301454545454547, |
|
"eval_loss": 1.703265905380249, |
|
"eval_precision": 0.9087, |
|
"eval_recall": 0.9065, |
|
"eval_rouge1": 0.4561, |
|
"eval_rouge2": 0.2098, |
|
"eval_rougeL": 0.3735, |
|
"eval_rougeLsum": 0.3734, |
|
"eval_runtime": 505.7439, |
|
"eval_samples_per_second": 5.438, |
|
"eval_steps_per_second": 0.34, |
|
"step": 1951 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 2.9059829059829063e-06, |
|
"loss": 1.8067, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_f1": 0.9076, |
|
"eval_gen_len": 26.337818181818182, |
|
"eval_loss": 1.6991122961044312, |
|
"eval_precision": 0.909, |
|
"eval_recall": 0.9067, |
|
"eval_rouge1": 0.4572, |
|
"eval_rouge2": 0.2103, |
|
"eval_rougeL": 0.3743, |
|
"eval_rougeLsum": 0.3742, |
|
"eval_runtime": 504.0406, |
|
"eval_samples_per_second": 5.456, |
|
"eval_steps_per_second": 0.341, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"step": 2340, |
|
"total_flos": 4.3261061209522176e+17, |
|
"train_loss": 0.646622069269164, |
|
"train_runtime": 5096.5205, |
|
"train_samples_per_second": 58.864, |
|
"train_steps_per_second": 0.459 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2340, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 4.3261061209522176e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|