|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 14, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7e-05, |
|
"loss": 2.6914, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.44772838299951145, |
|
"eval_loss": 2.689453125, |
|
"eval_runtime": 2.0744, |
|
"eval_samples_per_second": 5.785, |
|
"eval_steps_per_second": 0.964, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7e-05, |
|
"loss": 2.6897, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.44772838299951145, |
|
"eval_loss": 2.689453125, |
|
"eval_runtime": 1.2329, |
|
"eval_samples_per_second": 9.733, |
|
"eval_steps_per_second": 1.622, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.5e-05, |
|
"loss": 2.668, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.44031916625956685, |
|
"eval_loss": 2.703125, |
|
"eval_runtime": 1.3532, |
|
"eval_samples_per_second": 8.868, |
|
"eval_steps_per_second": 1.478, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 2.7434, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.45330565054551375, |
|
"eval_loss": 2.591796875, |
|
"eval_runtime": 1.1891, |
|
"eval_samples_per_second": 10.092, |
|
"eval_steps_per_second": 1.682, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.4999999999999995e-05, |
|
"loss": 2.6265, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.4617733268197362, |
|
"eval_loss": 2.541015625, |
|
"eval_runtime": 1.3466, |
|
"eval_samples_per_second": 8.912, |
|
"eval_steps_per_second": 1.485, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.9999999999999996e-05, |
|
"loss": 2.5259, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.46405308581664223, |
|
"eval_loss": 2.515625, |
|
"eval_runtime": 1.3043, |
|
"eval_samples_per_second": 9.2, |
|
"eval_steps_per_second": 1.533, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 2.5566, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.46665852467024915, |
|
"eval_loss": 2.490234375, |
|
"eval_runtime": 1.3073, |
|
"eval_samples_per_second": 9.179, |
|
"eval_steps_per_second": 1.53, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.9999999999999996e-05, |
|
"loss": 2.2317, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.4706888128969223, |
|
"eval_loss": 2.4765625, |
|
"eval_runtime": 2.2111, |
|
"eval_samples_per_second": 5.427, |
|
"eval_steps_per_second": 0.905, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.5e-05, |
|
"loss": 2.2397, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.4705259729685719, |
|
"eval_loss": 2.47265625, |
|
"eval_runtime": 1.2075, |
|
"eval_samples_per_second": 9.938, |
|
"eval_steps_per_second": 1.656, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 2.0162, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.4689789936492428, |
|
"eval_loss": 2.4765625, |
|
"eval_runtime": 1.6528, |
|
"eval_samples_per_second": 7.26, |
|
"eval_steps_per_second": 1.21, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.4999999999999998e-05, |
|
"loss": 2.0537, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_accuracy": 0.4706888128969223, |
|
"eval_loss": 2.48046875, |
|
"eval_runtime": 1.6588, |
|
"eval_samples_per_second": 7.234, |
|
"eval_steps_per_second": 1.206, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 2.1432, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.4714215925744993, |
|
"eval_loss": 2.470703125, |
|
"eval_runtime": 2.2121, |
|
"eval_samples_per_second": 5.425, |
|
"eval_steps_per_second": 0.904, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.4999999999999999e-05, |
|
"loss": 2.0822, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.47235792216251427, |
|
"eval_loss": 2.45703125, |
|
"eval_runtime": 1.2064, |
|
"eval_samples_per_second": 9.947, |
|
"eval_steps_per_second": 1.658, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 1.9056, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.47410845139228136, |
|
"eval_loss": 2.451171875, |
|
"eval_runtime": 2.1018, |
|
"eval_samples_per_second": 5.709, |
|
"eval_steps_per_second": 0.952, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 14, |
|
"total_flos": 821815148544.0, |
|
"train_loss": 2.3695591517857144, |
|
"train_runtime": 69.0598, |
|
"train_samples_per_second": 1.448, |
|
"train_steps_per_second": 0.203 |
|
} |
|
], |
|
"max_steps": 14, |
|
"num_train_epochs": 2, |
|
"total_flos": 821815148544.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|