|
{ |
|
"best_metric": 3.8871757984161377, |
|
"best_model_checkpoint": "./models/gpt_test9/checkpoint-10000", |
|
"epoch": 4.0, |
|
"global_step": 10592, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.830188679245283e-05, |
|
"loss": 3.311, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 4.033947467803955, |
|
"eval_runtime": 6.7632, |
|
"eval_samples_per_second": 63.432, |
|
"eval_steps_per_second": 3.992, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.7041544271926144e-05, |
|
"loss": 3.1133, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 3.9776785373687744, |
|
"eval_runtime": 6.7558, |
|
"eval_samples_per_second": 63.501, |
|
"eval_steps_per_second": 3.997, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.3894250944187998e-05, |
|
"loss": 2.9875, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 3.9545652866363525, |
|
"eval_runtime": 6.7588, |
|
"eval_samples_per_second": 63.472, |
|
"eval_steps_per_second": 3.995, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.0746957616449853e-05, |
|
"loss": 2.8697, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 3.9269347190856934, |
|
"eval_runtime": 6.7568, |
|
"eval_samples_per_second": 63.491, |
|
"eval_steps_per_second": 3.996, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.759966428871171e-05, |
|
"loss": 2.8669, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 3.915926933288574, |
|
"eval_runtime": 6.7665, |
|
"eval_samples_per_second": 63.401, |
|
"eval_steps_per_second": 3.99, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.4452370960973563e-05, |
|
"loss": 2.7308, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 3.9065561294555664, |
|
"eval_runtime": 6.7494, |
|
"eval_samples_per_second": 63.562, |
|
"eval_steps_per_second": 4.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.1305077633235418e-05, |
|
"loss": 2.709, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_loss": 3.899543285369873, |
|
"eval_runtime": 6.761, |
|
"eval_samples_per_second": 63.452, |
|
"eval_steps_per_second": 3.993, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 8.157784305497272e-06, |
|
"loss": 2.6979, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_loss": 3.8976171016693115, |
|
"eval_runtime": 6.7563, |
|
"eval_samples_per_second": 63.496, |
|
"eval_steps_per_second": 3.996, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 5.010490977759127e-06, |
|
"loss": 2.5878, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_loss": 3.897768020629883, |
|
"eval_runtime": 6.7533, |
|
"eval_samples_per_second": 63.524, |
|
"eval_steps_per_second": 3.998, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.863197650020982e-06, |
|
"loss": 2.5824, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 3.8871757984161377, |
|
"eval_runtime": 6.7573, |
|
"eval_samples_per_second": 63.487, |
|
"eval_steps_per_second": 3.996, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 10592, |
|
"total_flos": 4.4280638078976e+16, |
|
"train_loss": 2.831424217569864, |
|
"train_runtime": 8708.1966, |
|
"train_samples_per_second": 19.461, |
|
"train_steps_per_second": 1.216 |
|
} |
|
], |
|
"max_steps": 10592, |
|
"num_train_epochs": 4, |
|
"total_flos": 4.4280638078976e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|