|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.292993630573248, |
|
"eval_steps": 100, |
|
"global_step": 1616, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1355, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.157366156578064, |
|
"eval_runtime": 30.4555, |
|
"eval_samples_per_second": 65.67, |
|
"eval_steps_per_second": 1.051, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2e-05, |
|
"loss": 0.881, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 1.3262789249420166, |
|
"eval_runtime": 29.1383, |
|
"eval_samples_per_second": 68.638, |
|
"eval_steps_per_second": 1.098, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5496, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 1.3176809549331665, |
|
"eval_runtime": 28.8909, |
|
"eval_samples_per_second": 69.226, |
|
"eval_steps_per_second": 1.108, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2597, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.5806535482406616, |
|
"eval_runtime": 29.1193, |
|
"eval_samples_per_second": 68.683, |
|
"eval_steps_per_second": 1.099, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1877, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_loss": 1.7526657581329346, |
|
"eval_runtime": 29.1196, |
|
"eval_samples_per_second": 68.682, |
|
"eval_steps_per_second": 1.099, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1158, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"eval_loss": 1.7486767768859863, |
|
"eval_runtime": 29.2706, |
|
"eval_samples_per_second": 68.328, |
|
"eval_steps_per_second": 1.093, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0855, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"eval_loss": 1.873838186264038, |
|
"eval_runtime": 29.2292, |
|
"eval_samples_per_second": 68.425, |
|
"eval_steps_per_second": 1.095, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0645, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_loss": 1.9275007247924805, |
|
"eval_runtime": 29.013, |
|
"eval_samples_per_second": 68.935, |
|
"eval_steps_per_second": 1.103, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0518, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 1.9070993661880493, |
|
"eval_runtime": 30.5536, |
|
"eval_samples_per_second": 65.459, |
|
"eval_steps_per_second": 1.047, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0464, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_loss": 1.9601927995681763, |
|
"eval_runtime": 28.9827, |
|
"eval_samples_per_second": 69.007, |
|
"eval_steps_per_second": 1.104, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0367, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"eval_loss": 1.973179817199707, |
|
"eval_runtime": 29.0672, |
|
"eval_samples_per_second": 68.806, |
|
"eval_steps_per_second": 1.101, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0288, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"eval_loss": 2.0399632453918457, |
|
"eval_runtime": 29.0049, |
|
"eval_samples_per_second": 68.954, |
|
"eval_steps_per_second": 1.103, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0265, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"eval_loss": 2.0276734828948975, |
|
"eval_runtime": 28.9115, |
|
"eval_samples_per_second": 69.177, |
|
"eval_steps_per_second": 1.107, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0287, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"eval_loss": 2.049071788787842, |
|
"eval_runtime": 29.0231, |
|
"eval_samples_per_second": 68.911, |
|
"eval_steps_per_second": 1.103, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0195, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"eval_loss": 2.043515205383301, |
|
"eval_runtime": 29.1891, |
|
"eval_samples_per_second": 68.519, |
|
"eval_steps_per_second": 1.096, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0215, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"eval_loss": 2.101804733276367, |
|
"eval_runtime": 28.9478, |
|
"eval_samples_per_second": 69.09, |
|
"eval_steps_per_second": 1.105, |
|
"step": 1600 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1616, |
|
"num_train_epochs": 11, |
|
"save_steps": 808, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|