|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9773260359655981, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8373729476153247e-05, |
|
"loss": 1.6126, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_exact_match": 68.15336463223787, |
|
"eval_f1": 73.98256617857639, |
|
"eval_loss": 1.189887523651123, |
|
"eval_runtime": 1542.7266, |
|
"eval_samples_per_second": 7.463, |
|
"eval_steps_per_second": 3.732, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.674550430023456e-05, |
|
"loss": 1.1965, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_exact_match": 75.13475917231786, |
|
"eval_f1": 79.95184555682803, |
|
"eval_loss": 0.9056070446968079, |
|
"eval_runtime": 1533.9546, |
|
"eval_samples_per_second": 7.505, |
|
"eval_steps_per_second": 3.753, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.511695334897055e-05, |
|
"loss": 1.0769, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_exact_match": 77.56042427403929, |
|
"eval_f1": 82.22503770504048, |
|
"eval_loss": 0.9495312571525574, |
|
"eval_runtime": 1533.818, |
|
"eval_samples_per_second": 7.506, |
|
"eval_steps_per_second": 3.753, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.3488728173051864e-05, |
|
"loss": 0.9992, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_exact_match": 81.09024517475221, |
|
"eval_f1": 85.49992934663945, |
|
"eval_loss": 0.8323877453804016, |
|
"eval_runtime": 1541.3611, |
|
"eval_samples_per_second": 7.469, |
|
"eval_steps_per_second": 3.735, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.1860502997133176e-05, |
|
"loss": 0.9329, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_exact_match": 81.264127977743, |
|
"eval_f1": 85.43481659242194, |
|
"eval_loss": 0.7231199741363525, |
|
"eval_runtime": 1535.496, |
|
"eval_samples_per_second": 7.498, |
|
"eval_steps_per_second": 3.749, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.0232277821214492e-05, |
|
"loss": 0.8913, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_exact_match": 81.92488262910798, |
|
"eval_f1": 85.96065424027016, |
|
"eval_loss": 0.7569286227226257, |
|
"eval_runtime": 1529.0019, |
|
"eval_samples_per_second": 7.53, |
|
"eval_steps_per_second": 3.765, |
|
"step": 30000 |
|
} |
|
], |
|
"max_steps": 92088, |
|
"num_train_epochs": 3, |
|
"total_flos": 6.619410874368e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|