|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 1136, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.995598591549296e-05, |
|
"loss": 2.3251, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.379401408450705e-05, |
|
"loss": 1.8881, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5851831552023385, |
|
"eval_loss": 1.8030234575271606, |
|
"eval_runtime": 68.3081, |
|
"eval_samples_per_second": 1.801, |
|
"eval_steps_per_second": 0.234, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.7544014084507044e-05, |
|
"loss": 1.0326, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5736850675785703, |
|
"eval_loss": 1.9792582988739014, |
|
"eval_runtime": 67.3896, |
|
"eval_samples_per_second": 1.825, |
|
"eval_steps_per_second": 0.237, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.129401408450705e-05, |
|
"loss": 0.2979, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5582232177964184, |
|
"eval_loss": 2.4750924110412598, |
|
"eval_runtime": 67.4719, |
|
"eval_samples_per_second": 1.823, |
|
"eval_steps_per_second": 0.237, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.5044014084507045e-05, |
|
"loss": 0.0966, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5602765895758616, |
|
"eval_loss": 2.7331745624542236, |
|
"eval_runtime": 67.3819, |
|
"eval_samples_per_second": 1.825, |
|
"eval_steps_per_second": 0.237, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.8794014084507046e-05, |
|
"loss": 0.0519, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5593313236503151, |
|
"eval_loss": 2.9102349281311035, |
|
"eval_runtime": 67.3712, |
|
"eval_samples_per_second": 1.826, |
|
"eval_steps_per_second": 0.237, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.2544014084507044e-05, |
|
"loss": 0.0211, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5565511297516492, |
|
"eval_loss": 3.0963499546051025, |
|
"eval_runtime": 67.3971, |
|
"eval_samples_per_second": 1.825, |
|
"eval_steps_per_second": 0.237, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 6.2940140845070425e-06, |
|
"loss": 0.0038, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5585012371862849, |
|
"eval_loss": 3.196666955947876, |
|
"eval_runtime": 67.4214, |
|
"eval_samples_per_second": 1.824, |
|
"eval_steps_per_second": 0.237, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.4014084507042253e-08, |
|
"loss": 0.0028, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5584178313693249, |
|
"eval_loss": 3.250995397567749, |
|
"eval_runtime": 67.4231, |
|
"eval_samples_per_second": 1.824, |
|
"eval_steps_per_second": 0.237, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 1136, |
|
"total_flos": 2.845346179967877e+18, |
|
"train_loss": 0.424729940363429, |
|
"train_runtime": 26678.4113, |
|
"train_samples_per_second": 0.681, |
|
"train_steps_per_second": 0.043 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1136, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 2.845346179967877e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|