|
{ |
|
"best_metric": 0.618320643901825, |
|
"best_model_checkpoint": "hBERTv1_data_aug_cola/checkpoint-1670", |
|
"epoch": 7.0, |
|
"global_step": 5845, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6084, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.6251746416091919, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.3477, |
|
"eval_samples_per_second": 773.92, |
|
"eval_steps_per_second": 3.71, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.6066, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.618320643901825, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.336, |
|
"eval_samples_per_second": 780.681, |
|
"eval_steps_per_second": 3.742, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.6065, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.6184751391410828, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.3377, |
|
"eval_samples_per_second": 779.697, |
|
"eval_steps_per_second": 3.738, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.6062, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.6219027042388916, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.3355, |
|
"eval_samples_per_second": 780.969, |
|
"eval_steps_per_second": 3.744, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6061, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.6204879879951477, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.34, |
|
"eval_samples_per_second": 778.371, |
|
"eval_steps_per_second": 3.731, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.400119760479042e-05, |
|
"loss": 0.6066, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.6183536648750305, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.3404, |
|
"eval_samples_per_second": 778.146, |
|
"eval_steps_per_second": 3.73, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3001197604790424e-05, |
|
"loss": 0.6061, |
|
"step": 5845 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.6187195181846619, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.3402, |
|
"eval_samples_per_second": 778.271, |
|
"eval_steps_per_second": 3.731, |
|
"step": 5845 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 5845, |
|
"total_flos": 1.8886821472423117e+17, |
|
"train_loss": 0.6066560117070546, |
|
"train_runtime": 5543.8638, |
|
"train_samples_per_second": 1926.535, |
|
"train_steps_per_second": 7.531 |
|
} |
|
], |
|
"max_steps": 41750, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.8886821472423117e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|