|
{ |
|
"best_metric": 0.25090140104293823, |
|
"best_model_checkpoint": "/data/jcanete/all_results/mldoc/albeto_large/epochs_3_bs_16_lr_5e-6/checkpoint-1700", |
|
"epoch": 3.0, |
|
"global_step": 1776, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.28299999237060547, |
|
"eval_loss": 1.2837506532669067, |
|
"eval_runtime": 8.1555, |
|
"eval_samples_per_second": 122.617, |
|
"eval_steps_per_second": 7.725, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.531000018119812, |
|
"eval_loss": 1.1732338666915894, |
|
"eval_runtime": 8.1951, |
|
"eval_samples_per_second": 122.024, |
|
"eval_steps_per_second": 7.688, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.5540000200271606, |
|
"eval_loss": 0.9090207815170288, |
|
"eval_runtime": 8.1405, |
|
"eval_samples_per_second": 122.843, |
|
"eval_steps_per_second": 7.739, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5619999766349792, |
|
"eval_loss": 0.8500489592552185, |
|
"eval_runtime": 8.4022, |
|
"eval_samples_per_second": 119.017, |
|
"eval_steps_per_second": 7.498, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.603603603603604e-06, |
|
"loss": 0.2667, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.6660000085830688, |
|
"eval_loss": 0.7123118042945862, |
|
"eval_runtime": 8.0822, |
|
"eval_samples_per_second": 123.728, |
|
"eval_steps_per_second": 7.795, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.7820000052452087, |
|
"eval_loss": 0.6517438888549805, |
|
"eval_runtime": 8.9908, |
|
"eval_samples_per_second": 111.225, |
|
"eval_steps_per_second": 7.007, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.8230000138282776, |
|
"eval_loss": 0.5667398571968079, |
|
"eval_runtime": 8.2149, |
|
"eval_samples_per_second": 121.73, |
|
"eval_steps_per_second": 7.669, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.8650000095367432, |
|
"eval_loss": 0.47660863399505615, |
|
"eval_runtime": 8.0853, |
|
"eval_samples_per_second": 123.682, |
|
"eval_steps_per_second": 7.792, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.8799999952316284, |
|
"eval_loss": 0.38954979181289673, |
|
"eval_runtime": 8.0289, |
|
"eval_samples_per_second": 124.55, |
|
"eval_steps_per_second": 7.847, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.195945945945946e-06, |
|
"loss": 0.1264, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.8420000076293945, |
|
"eval_loss": 0.5382915139198303, |
|
"eval_runtime": 8.0917, |
|
"eval_samples_per_second": 123.584, |
|
"eval_steps_per_second": 7.786, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.9100000262260437, |
|
"eval_loss": 0.36202001571655273, |
|
"eval_runtime": 7.986, |
|
"eval_samples_per_second": 125.219, |
|
"eval_steps_per_second": 7.889, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.9240000247955322, |
|
"eval_loss": 0.3379409909248352, |
|
"eval_runtime": 8.6218, |
|
"eval_samples_per_second": 115.985, |
|
"eval_steps_per_second": 7.307, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.9279999732971191, |
|
"eval_loss": 0.29248443245887756, |
|
"eval_runtime": 8.2615, |
|
"eval_samples_per_second": 121.044, |
|
"eval_steps_per_second": 7.626, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.9240000247955322, |
|
"eval_loss": 0.28705379366874695, |
|
"eval_runtime": 8.1584, |
|
"eval_samples_per_second": 122.573, |
|
"eval_steps_per_second": 7.722, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.882882882882883e-07, |
|
"loss": 0.0676, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.9279999732971191, |
|
"eval_loss": 0.2619408965110779, |
|
"eval_runtime": 7.9492, |
|
"eval_samples_per_second": 125.798, |
|
"eval_steps_per_second": 7.925, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_accuracy": 0.9279999732971191, |
|
"eval_loss": 0.26721176505088806, |
|
"eval_runtime": 7.9627, |
|
"eval_samples_per_second": 125.585, |
|
"eval_steps_per_second": 7.912, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_accuracy": 0.9319999814033508, |
|
"eval_loss": 0.25090140104293823, |
|
"eval_runtime": 8.0714, |
|
"eval_samples_per_second": 123.894, |
|
"eval_steps_per_second": 7.805, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1776, |
|
"total_flos": 982359617226624.0, |
|
"train_loss": 0.13927180917413384, |
|
"train_runtime": 1029.288, |
|
"train_samples_per_second": 27.567, |
|
"train_steps_per_second": 1.725 |
|
} |
|
], |
|
"max_steps": 1776, |
|
"num_train_epochs": 3, |
|
"total_flos": 982359617226624.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|