|
{ |
|
"best_metric": 0.15601937472820282, |
|
"best_model_checkpoint": "/data/jcanete/all_results/mldoc/albeto_tiny/epochs_4_bs_16_lr_3e-5/checkpoint-2100", |
|
"epoch": 4.0, |
|
"global_step": 2368, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.29100000858306885, |
|
"eval_loss": 1.4215397834777832, |
|
"eval_runtime": 0.9993, |
|
"eval_samples_per_second": 1000.659, |
|
"eval_steps_per_second": 63.042, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.5360000133514404, |
|
"eval_loss": 1.0526843070983887, |
|
"eval_runtime": 0.9899, |
|
"eval_samples_per_second": 1010.212, |
|
"eval_steps_per_second": 63.643, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.7480000257492065, |
|
"eval_loss": 0.8007577657699585, |
|
"eval_runtime": 0.9905, |
|
"eval_samples_per_second": 1009.612, |
|
"eval_steps_per_second": 63.606, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.7699999809265137, |
|
"eval_loss": 0.7043776512145996, |
|
"eval_runtime": 1.0018, |
|
"eval_samples_per_second": 998.248, |
|
"eval_steps_per_second": 62.89, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.3703547297297295e-05, |
|
"loss": 0.2646, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.843999981880188, |
|
"eval_loss": 0.46549031138420105, |
|
"eval_runtime": 1.0012, |
|
"eval_samples_per_second": 998.759, |
|
"eval_steps_per_second": 62.922, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.8529999852180481, |
|
"eval_loss": 0.46177130937576294, |
|
"eval_runtime": 0.9926, |
|
"eval_samples_per_second": 1007.431, |
|
"eval_steps_per_second": 63.468, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.9089999794960022, |
|
"eval_loss": 0.3174803853034973, |
|
"eval_runtime": 0.9914, |
|
"eval_samples_per_second": 1008.709, |
|
"eval_steps_per_second": 63.549, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.9269999861717224, |
|
"eval_loss": 0.2770524322986603, |
|
"eval_runtime": 0.9992, |
|
"eval_samples_per_second": 1000.797, |
|
"eval_steps_per_second": 63.05, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.9449999928474426, |
|
"eval_loss": 0.2170553058385849, |
|
"eval_runtime": 0.9905, |
|
"eval_samples_per_second": 1009.635, |
|
"eval_steps_per_second": 63.607, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.736908783783784e-05, |
|
"loss": 0.0843, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.9350000023841858, |
|
"eval_loss": 0.23940612375736237, |
|
"eval_runtime": 0.9908, |
|
"eval_samples_per_second": 1009.32, |
|
"eval_steps_per_second": 63.587, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.9480000138282776, |
|
"eval_loss": 0.20756861567497253, |
|
"eval_runtime": 1.0016, |
|
"eval_samples_per_second": 998.361, |
|
"eval_steps_per_second": 62.897, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.9419999718666077, |
|
"eval_loss": 0.21042801439762115, |
|
"eval_runtime": 0.9893, |
|
"eval_samples_per_second": 1010.862, |
|
"eval_steps_per_second": 63.684, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.9509999752044678, |
|
"eval_loss": 0.18397970497608185, |
|
"eval_runtime": 0.9927, |
|
"eval_samples_per_second": 1007.38, |
|
"eval_steps_per_second": 63.465, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.9549999833106995, |
|
"eval_loss": 0.16272501647472382, |
|
"eval_runtime": 0.9936, |
|
"eval_samples_per_second": 1006.426, |
|
"eval_steps_per_second": 63.405, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.1047297297297297e-05, |
|
"loss": 0.0457, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.9380000233650208, |
|
"eval_loss": 0.21701455116271973, |
|
"eval_runtime": 0.992, |
|
"eval_samples_per_second": 1008.023, |
|
"eval_steps_per_second": 63.505, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_accuracy": 0.9419999718666077, |
|
"eval_loss": 0.20601776242256165, |
|
"eval_runtime": 0.9926, |
|
"eval_samples_per_second": 1007.478, |
|
"eval_steps_per_second": 63.471, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_accuracy": 0.9520000219345093, |
|
"eval_loss": 0.1742561310529709, |
|
"eval_runtime": 0.9948, |
|
"eval_samples_per_second": 1005.234, |
|
"eval_steps_per_second": 63.33, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_accuracy": 0.9490000009536743, |
|
"eval_loss": 0.17574115097522736, |
|
"eval_runtime": 0.9934, |
|
"eval_samples_per_second": 1006.664, |
|
"eval_steps_per_second": 63.42, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.9399999976158142, |
|
"eval_loss": 0.21880225837230682, |
|
"eval_runtime": 1.0033, |
|
"eval_samples_per_second": 996.71, |
|
"eval_steps_per_second": 62.793, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 4.712837837837838e-06, |
|
"loss": 0.0388, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_accuracy": 0.9559999704360962, |
|
"eval_loss": 0.16699624061584473, |
|
"eval_runtime": 0.9915, |
|
"eval_samples_per_second": 1008.575, |
|
"eval_steps_per_second": 63.54, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_accuracy": 0.9589999914169312, |
|
"eval_loss": 0.15601937472820282, |
|
"eval_runtime": 0.9908, |
|
"eval_samples_per_second": 1009.251, |
|
"eval_steps_per_second": 63.583, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_accuracy": 0.9559999704360962, |
|
"eval_loss": 0.1640777885913849, |
|
"eval_runtime": 0.9918, |
|
"eval_samples_per_second": 1008.262, |
|
"eval_steps_per_second": 63.521, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_accuracy": 0.9549999833106995, |
|
"eval_loss": 0.16484832763671875, |
|
"eval_runtime": 1.0391, |
|
"eval_samples_per_second": 962.351, |
|
"eval_steps_per_second": 60.628, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2368, |
|
"total_flos": 124651187836032.0, |
|
"train_loss": 0.09686436967269794, |
|
"train_runtime": 383.5478, |
|
"train_samples_per_second": 98.637, |
|
"train_steps_per_second": 6.174 |
|
} |
|
], |
|
"max_steps": 2368, |
|
"num_train_epochs": 4, |
|
"total_flos": 124651187836032.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|