|
{ |
|
"best_metric": 1.0982866287231445, |
|
"best_model_checkpoint": "hBERTv2_mnli/checkpoint-15340", |
|
"epoch": 15.0, |
|
"global_step": 23010, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.90013037809648e-05, |
|
"loss": 1.0992, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0996192693710327, |
|
"eval_runtime": 11.4609, |
|
"eval_samples_per_second": 856.391, |
|
"eval_steps_per_second": 3.403, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.80013037809648e-05, |
|
"loss": 1.0988, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.098758578300476, |
|
"eval_runtime": 11.4241, |
|
"eval_samples_per_second": 859.146, |
|
"eval_steps_per_second": 3.414, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.70013037809648e-05, |
|
"loss": 1.0987, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.098650336265564, |
|
"eval_runtime": 11.3879, |
|
"eval_samples_per_second": 861.881, |
|
"eval_steps_per_second": 3.425, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.60013037809648e-05, |
|
"loss": 1.0986, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.0986592769622803, |
|
"eval_runtime": 11.4873, |
|
"eval_samples_per_second": 854.425, |
|
"eval_steps_per_second": 3.395, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.50013037809648e-05, |
|
"loss": 1.0987, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0984472036361694, |
|
"eval_runtime": 11.4054, |
|
"eval_samples_per_second": 860.558, |
|
"eval_steps_per_second": 3.419, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.40013037809648e-05, |
|
"loss": 1.0987, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.0986328125, |
|
"eval_runtime": 11.4628, |
|
"eval_samples_per_second": 856.251, |
|
"eval_steps_per_second": 3.402, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3001303780964804e-05, |
|
"loss": 1.0986, |
|
"step": 10738 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0986063480377197, |
|
"eval_runtime": 11.4244, |
|
"eval_samples_per_second": 859.13, |
|
"eval_steps_per_second": 3.414, |
|
"step": 10738 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.20039113428944e-05, |
|
"loss": 1.0987, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0986063480377197, |
|
"eval_runtime": 11.4277, |
|
"eval_samples_per_second": 858.877, |
|
"eval_steps_per_second": 3.413, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.10045632333768e-05, |
|
"loss": 1.0986, |
|
"step": 13806 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.098358392715454, |
|
"eval_runtime": 11.4149, |
|
"eval_samples_per_second": 859.839, |
|
"eval_steps_per_second": 3.417, |
|
"step": 13806 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.000456323337679e-05, |
|
"loss": 1.0986, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0982866287231445, |
|
"eval_runtime": 11.4072, |
|
"eval_samples_per_second": 860.424, |
|
"eval_steps_per_second": 3.419, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9004563233376796e-05, |
|
"loss": 1.0987, |
|
"step": 16874 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0986417531967163, |
|
"eval_runtime": 11.4708, |
|
"eval_samples_per_second": 855.65, |
|
"eval_steps_per_second": 3.4, |
|
"step": 16874 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.800456323337679e-05, |
|
"loss": 1.0987, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0983929634094238, |
|
"eval_runtime": 11.4592, |
|
"eval_samples_per_second": 856.517, |
|
"eval_steps_per_second": 3.403, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7004563233376795e-05, |
|
"loss": 1.0986, |
|
"step": 19942 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0983316898345947, |
|
"eval_runtime": 11.4401, |
|
"eval_samples_per_second": 857.948, |
|
"eval_steps_per_second": 3.409, |
|
"step": 19942 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.600456323337679e-05, |
|
"loss": 1.0986, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0983843803405762, |
|
"eval_runtime": 11.4436, |
|
"eval_samples_per_second": 857.683, |
|
"eval_steps_per_second": 3.408, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.50045632333768e-05, |
|
"loss": 1.0986, |
|
"step": 23010 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0986328125, |
|
"eval_runtime": 11.4531, |
|
"eval_samples_per_second": 856.97, |
|
"eval_steps_per_second": 3.405, |
|
"step": 23010 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 23010, |
|
"total_flos": 7.33692663127081e+17, |
|
"train_loss": 1.0986910359541369, |
|
"train_runtime": 20364.3039, |
|
"train_samples_per_second": 964.192, |
|
"train_steps_per_second": 3.766 |
|
} |
|
], |
|
"max_steps": 76700, |
|
"num_train_epochs": 50, |
|
"total_flos": 7.33692663127081e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|