|
{ |
|
"best_metric": 0.5737453699111938, |
|
"best_model_checkpoint": "./vit-molecul/checkpoint-160", |
|
"epoch": 20.0, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.85e-06, |
|
"loss": 0.723, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.61, |
|
"eval_f1": 0.6096486838154338, |
|
"eval_loss": 0.6790218949317932, |
|
"eval_runtime": 19.8848, |
|
"eval_samples_per_second": 5.029, |
|
"eval_steps_per_second": 0.101, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.7e-06, |
|
"loss": 0.6915, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.62, |
|
"eval_f1": 0.5924495924495925, |
|
"eval_loss": 0.6660856604576111, |
|
"eval_runtime": 20.4688, |
|
"eval_samples_per_second": 4.885, |
|
"eval_steps_per_second": 0.098, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.55e-06, |
|
"loss": 0.6689, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.69, |
|
"eval_f1": 0.6892230576441103, |
|
"eval_loss": 0.647020697593689, |
|
"eval_runtime": 20.1108, |
|
"eval_samples_per_second": 4.972, |
|
"eval_steps_per_second": 0.099, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.6517, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.64, |
|
"eval_f1": 0.6376811594202898, |
|
"eval_loss": 0.6355536580085754, |
|
"eval_runtime": 17.3784, |
|
"eval_samples_per_second": 5.754, |
|
"eval_steps_per_second": 0.115, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.6368, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.72, |
|
"eval_f1": 0.7198879551820727, |
|
"eval_loss": 0.6288898587226868, |
|
"eval_runtime": 19.3335, |
|
"eval_samples_per_second": 5.172, |
|
"eval_steps_per_second": 0.103, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.1e-06, |
|
"loss": 0.621, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.73, |
|
"eval_f1": 0.7293233082706766, |
|
"eval_loss": 0.621720552444458, |
|
"eval_runtime": 21.1008, |
|
"eval_samples_per_second": 4.739, |
|
"eval_steps_per_second": 0.095, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.95e-06, |
|
"loss": 0.6061, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.69, |
|
"eval_f1": 0.6862030569895738, |
|
"eval_loss": 0.6197251081466675, |
|
"eval_runtime": 20.06, |
|
"eval_samples_per_second": 4.985, |
|
"eval_steps_per_second": 0.1, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.8e-06, |
|
"loss": 0.5924, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.73, |
|
"eval_f1": 0.7293233082706766, |
|
"eval_loss": 0.6086930632591248, |
|
"eval_runtime": 18.4413, |
|
"eval_samples_per_second": 5.423, |
|
"eval_steps_per_second": 0.108, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.65e-06, |
|
"loss": 0.5767, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.72, |
|
"eval_f1": 0.7198879551820727, |
|
"eval_loss": 0.6002515554428101, |
|
"eval_runtime": 19.1541, |
|
"eval_samples_per_second": 5.221, |
|
"eval_steps_per_second": 0.104, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.5633, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.72, |
|
"eval_f1": 0.7195512820512822, |
|
"eval_loss": 0.5953283905982971, |
|
"eval_runtime": 18.6111, |
|
"eval_samples_per_second": 5.373, |
|
"eval_steps_per_second": 0.107, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1.35e-06, |
|
"loss": 0.5491, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.72, |
|
"eval_f1": 0.7198879551820727, |
|
"eval_loss": 0.5884998440742493, |
|
"eval_runtime": 19.1596, |
|
"eval_samples_per_second": 5.219, |
|
"eval_steps_per_second": 0.104, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.5351, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.71, |
|
"eval_f1": 0.7099709970997101, |
|
"eval_loss": 0.5868653655052185, |
|
"eval_runtime": 19.9846, |
|
"eval_samples_per_second": 5.004, |
|
"eval_steps_per_second": 0.1, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.05e-06, |
|
"loss": 0.5239, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7, |
|
"eval_f1": 0.6995192307692307, |
|
"eval_loss": 0.5866873264312744, |
|
"eval_runtime": 18.0538, |
|
"eval_samples_per_second": 5.539, |
|
"eval_steps_per_second": 0.111, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 9e-07, |
|
"loss": 0.5118, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.71, |
|
"eval_f1": 0.7099709970997101, |
|
"eval_loss": 0.5804287195205688, |
|
"eval_runtime": 13.1833, |
|
"eval_samples_per_second": 7.585, |
|
"eval_steps_per_second": 0.152, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.502, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.71, |
|
"eval_f1": 0.7099709970997101, |
|
"eval_loss": 0.5751881003379822, |
|
"eval_runtime": 14.0771, |
|
"eval_samples_per_second": 7.104, |
|
"eval_steps_per_second": 0.142, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.4942, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.72, |
|
"eval_f1": 0.7198879551820727, |
|
"eval_loss": 0.5737959742546082, |
|
"eval_runtime": 9.472, |
|
"eval_samples_per_second": 10.557, |
|
"eval_steps_per_second": 0.211, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 4.5e-07, |
|
"loss": 0.4885, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.71, |
|
"eval_f1": 0.7085720028137876, |
|
"eval_loss": 0.5770530104637146, |
|
"eval_runtime": 12.9019, |
|
"eval_samples_per_second": 7.751, |
|
"eval_steps_per_second": 0.155, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 0.4831, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.71, |
|
"eval_f1": 0.7085720028137876, |
|
"eval_loss": 0.5750694870948792, |
|
"eval_runtime": 10.5808, |
|
"eval_samples_per_second": 9.451, |
|
"eval_steps_per_second": 0.189, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 1.5000000000000002e-07, |
|
"loss": 0.4793, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.71, |
|
"eval_f1": 0.7085720028137876, |
|
"eval_loss": 0.5742676258087158, |
|
"eval_runtime": 12.6314, |
|
"eval_samples_per_second": 7.917, |
|
"eval_steps_per_second": 0.158, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.4774, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.71, |
|
"eval_f1": 0.7085720028137876, |
|
"eval_loss": 0.5737453699111938, |
|
"eval_runtime": 16.6943, |
|
"eval_samples_per_second": 5.99, |
|
"eval_steps_per_second": 0.12, |
|
"step": 160 |
|
} |
|
], |
|
"max_steps": 160, |
|
"num_train_epochs": 20, |
|
"total_flos": 6.19935916916736e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|