|
{ |
|
"best_metric": 0.14660954475402832, |
|
"best_model_checkpoint": "./Beit-doc-classification/checkpoint-240", |
|
"epoch": 18.75, |
|
"eval_steps": 10, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.8906250000000006e-05, |
|
"loss": 2.174, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.6964285714285714, |
|
"eval_loss": 0.7704058885574341, |
|
"eval_runtime": 0.2746, |
|
"eval_samples_per_second": 203.898, |
|
"eval_steps_per_second": 25.487, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.734375e-05, |
|
"loss": 0.8459, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6054777503013611, |
|
"eval_runtime": 0.2697, |
|
"eval_samples_per_second": 207.624, |
|
"eval_steps_per_second": 25.953, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.5781250000000005e-05, |
|
"loss": 0.5794, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.4210772216320038, |
|
"eval_runtime": 0.2721, |
|
"eval_samples_per_second": 205.818, |
|
"eval_steps_per_second": 25.727, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.421875e-05, |
|
"loss": 0.356, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.23619188368320465, |
|
"eval_runtime": 0.276, |
|
"eval_samples_per_second": 202.895, |
|
"eval_steps_per_second": 25.362, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.2656250000000003e-05, |
|
"loss": 0.2762, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.9464285714285714, |
|
"eval_loss": 0.2510121166706085, |
|
"eval_runtime": 0.2788, |
|
"eval_samples_per_second": 200.857, |
|
"eval_steps_per_second": 25.107, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 4.1093750000000006e-05, |
|
"loss": 0.2331, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.18232552707195282, |
|
"eval_runtime": 0.2722, |
|
"eval_samples_per_second": 205.715, |
|
"eval_steps_per_second": 25.714, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 3.953125e-05, |
|
"loss": 0.183, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.1663789004087448, |
|
"eval_runtime": 0.2703, |
|
"eval_samples_per_second": 207.189, |
|
"eval_steps_per_second": 25.899, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.7968750000000005e-05, |
|
"loss": 0.1171, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.182296484708786, |
|
"eval_runtime": 0.2718, |
|
"eval_samples_per_second": 206.04, |
|
"eval_steps_per_second": 25.755, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 3.640625e-05, |
|
"loss": 0.0875, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.18081991374492645, |
|
"eval_runtime": 0.2717, |
|
"eval_samples_per_second": 206.097, |
|
"eval_steps_per_second": 25.762, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 3.484375e-05, |
|
"loss": 0.1186, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.1665651649236679, |
|
"eval_runtime": 0.2734, |
|
"eval_samples_per_second": 204.829, |
|
"eval_steps_per_second": 25.604, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 3.3281250000000006e-05, |
|
"loss": 0.1174, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.14654958248138428, |
|
"eval_runtime": 0.2716, |
|
"eval_samples_per_second": 206.219, |
|
"eval_steps_per_second": 25.777, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 3.171875e-05, |
|
"loss": 0.0519, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.1532459259033203, |
|
"eval_runtime": 0.2731, |
|
"eval_samples_per_second": 205.051, |
|
"eval_steps_per_second": 25.631, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 3.015625e-05, |
|
"loss": 0.0338, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.18999779224395752, |
|
"eval_runtime": 0.272, |
|
"eval_samples_per_second": 205.856, |
|
"eval_steps_per_second": 25.732, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 2.8593750000000004e-05, |
|
"loss": 0.0241, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.1611316055059433, |
|
"eval_runtime": 0.2718, |
|
"eval_samples_per_second": 206.047, |
|
"eval_steps_per_second": 25.756, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 2.7031250000000003e-05, |
|
"loss": 0.0138, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.17584912478923798, |
|
"eval_runtime": 0.2713, |
|
"eval_samples_per_second": 206.411, |
|
"eval_steps_per_second": 25.801, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.5468750000000002e-05, |
|
"loss": 0.0155, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.14303383231163025, |
|
"eval_runtime": 0.2704, |
|
"eval_samples_per_second": 207.101, |
|
"eval_steps_per_second": 25.888, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 2.3906250000000002e-05, |
|
"loss": 0.0478, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.15157583355903625, |
|
"eval_runtime": 0.2795, |
|
"eval_samples_per_second": 200.393, |
|
"eval_steps_per_second": 25.049, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 2.234375e-05, |
|
"loss": 0.0228, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.17853426933288574, |
|
"eval_runtime": 0.2946, |
|
"eval_samples_per_second": 190.103, |
|
"eval_steps_per_second": 23.763, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 2.0781250000000004e-05, |
|
"loss": 0.0295, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.16022925078868866, |
|
"eval_runtime": 0.2758, |
|
"eval_samples_per_second": 203.034, |
|
"eval_steps_per_second": 25.379, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.9218750000000003e-05, |
|
"loss": 0.0084, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.19061991572380066, |
|
"eval_runtime": 0.281, |
|
"eval_samples_per_second": 199.27, |
|
"eval_steps_per_second": 24.909, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 1.7656250000000002e-05, |
|
"loss": 0.0064, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.1927066147327423, |
|
"eval_runtime": 0.272, |
|
"eval_samples_per_second": 205.909, |
|
"eval_steps_per_second": 25.739, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 1.609375e-05, |
|
"loss": 0.0131, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.17037805914878845, |
|
"eval_runtime": 0.2756, |
|
"eval_samples_per_second": 203.19, |
|
"eval_steps_per_second": 25.399, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 1.4531250000000003e-05, |
|
"loss": 0.0134, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.14340999722480774, |
|
"eval_runtime": 0.2733, |
|
"eval_samples_per_second": 204.909, |
|
"eval_steps_per_second": 25.614, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.2968750000000002e-05, |
|
"loss": 0.0082, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9821428571428571, |
|
"eval_loss": 0.14660954475402832, |
|
"eval_runtime": 0.2748, |
|
"eval_samples_per_second": 203.761, |
|
"eval_steps_per_second": 25.47, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 1.140625e-05, |
|
"loss": 0.0082, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.15767399966716766, |
|
"eval_runtime": 0.2783, |
|
"eval_samples_per_second": 201.226, |
|
"eval_steps_per_second": 25.153, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 9.84375e-06, |
|
"loss": 0.0104, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.17043821513652802, |
|
"eval_runtime": 0.2775, |
|
"eval_samples_per_second": 201.786, |
|
"eval_steps_per_second": 25.223, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 8.28125e-06, |
|
"loss": 0.0099, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.1689167469739914, |
|
"eval_runtime": 0.2808, |
|
"eval_samples_per_second": 199.425, |
|
"eval_steps_per_second": 24.928, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 6.71875e-06, |
|
"loss": 0.0033, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.1613139808177948, |
|
"eval_runtime": 0.2796, |
|
"eval_samples_per_second": 200.293, |
|
"eval_steps_per_second": 25.037, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 5.15625e-06, |
|
"loss": 0.0046, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.15769068896770477, |
|
"eval_runtime": 0.2801, |
|
"eval_samples_per_second": 199.909, |
|
"eval_steps_per_second": 24.989, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 3.5937499999999997e-06, |
|
"loss": 0.0017, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.15903140604496002, |
|
"eval_runtime": 0.2817, |
|
"eval_samples_per_second": 198.793, |
|
"eval_steps_per_second": 24.849, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 320, |
|
"num_train_epochs": 20, |
|
"save_steps": 30, |
|
"total_flos": 7.227949247524454e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|