{ "best_metric": 0.14660954475402832, "best_model_checkpoint": "./Beit-doc-classification/checkpoint-240", "epoch": 18.75, "eval_steps": 10, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.62, "learning_rate": 4.8906250000000006e-05, "loss": 2.174, "step": 10 }, { "epoch": 0.62, "eval_accuracy": 0.6964285714285714, "eval_loss": 0.7704058885574341, "eval_runtime": 0.2746, "eval_samples_per_second": 203.898, "eval_steps_per_second": 25.487, "step": 10 }, { "epoch": 1.25, "learning_rate": 4.734375e-05, "loss": 0.8459, "step": 20 }, { "epoch": 1.25, "eval_accuracy": 0.75, "eval_loss": 0.6054777503013611, "eval_runtime": 0.2697, "eval_samples_per_second": 207.624, "eval_steps_per_second": 25.953, "step": 20 }, { "epoch": 1.88, "learning_rate": 4.5781250000000005e-05, "loss": 0.5794, "step": 30 }, { "epoch": 1.88, "eval_accuracy": 0.75, "eval_loss": 0.4210772216320038, "eval_runtime": 0.2721, "eval_samples_per_second": 205.818, "eval_steps_per_second": 25.727, "step": 30 }, { "epoch": 2.5, "learning_rate": 4.421875e-05, "loss": 0.356, "step": 40 }, { "epoch": 2.5, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.23619188368320465, "eval_runtime": 0.276, "eval_samples_per_second": 202.895, "eval_steps_per_second": 25.362, "step": 40 }, { "epoch": 3.12, "learning_rate": 4.2656250000000003e-05, "loss": 0.2762, "step": 50 }, { "epoch": 3.12, "eval_accuracy": 0.9464285714285714, "eval_loss": 0.2510121166706085, "eval_runtime": 0.2788, "eval_samples_per_second": 200.857, "eval_steps_per_second": 25.107, "step": 50 }, { "epoch": 3.75, "learning_rate": 4.1093750000000006e-05, "loss": 0.2331, "step": 60 }, { "epoch": 3.75, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.18232552707195282, "eval_runtime": 0.2722, "eval_samples_per_second": 205.715, "eval_steps_per_second": 25.714, "step": 60 }, { "epoch": 4.38, "learning_rate": 3.953125e-05, "loss": 0.183, "step": 70 }, { "epoch": 4.38, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.1663789004087448, "eval_runtime": 0.2703, "eval_samples_per_second": 207.189, "eval_steps_per_second": 25.899, "step": 70 }, { "epoch": 5.0, "learning_rate": 3.7968750000000005e-05, "loss": 0.1171, "step": 80 }, { "epoch": 5.0, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.182296484708786, "eval_runtime": 0.2718, "eval_samples_per_second": 206.04, "eval_steps_per_second": 25.755, "step": 80 }, { "epoch": 5.62, "learning_rate": 3.640625e-05, "loss": 0.0875, "step": 90 }, { "epoch": 5.62, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.18081991374492645, "eval_runtime": 0.2717, "eval_samples_per_second": 206.097, "eval_steps_per_second": 25.762, "step": 90 }, { "epoch": 6.25, "learning_rate": 3.484375e-05, "loss": 0.1186, "step": 100 }, { "epoch": 6.25, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.1665651649236679, "eval_runtime": 0.2734, "eval_samples_per_second": 204.829, "eval_steps_per_second": 25.604, "step": 100 }, { "epoch": 6.88, "learning_rate": 3.3281250000000006e-05, "loss": 0.1174, "step": 110 }, { "epoch": 6.88, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.14654958248138428, "eval_runtime": 0.2716, "eval_samples_per_second": 206.219, "eval_steps_per_second": 25.777, "step": 110 }, { "epoch": 7.5, "learning_rate": 3.171875e-05, "loss": 0.0519, "step": 120 }, { "epoch": 7.5, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.1532459259033203, "eval_runtime": 0.2731, "eval_samples_per_second": 205.051, "eval_steps_per_second": 25.631, "step": 120 }, { "epoch": 8.12, "learning_rate": 3.015625e-05, "loss": 0.0338, "step": 130 }, { "epoch": 8.12, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.18999779224395752, "eval_runtime": 0.272, "eval_samples_per_second": 205.856, "eval_steps_per_second": 25.732, "step": 130 }, { "epoch": 8.75, "learning_rate": 2.8593750000000004e-05, "loss": 0.0241, "step": 140 }, { "epoch": 8.75, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.1611316055059433, "eval_runtime": 0.2718, "eval_samples_per_second": 206.047, "eval_steps_per_second": 25.756, "step": 140 }, { "epoch": 9.38, "learning_rate": 2.7031250000000003e-05, "loss": 0.0138, "step": 150 }, { "epoch": 9.38, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.17584912478923798, "eval_runtime": 0.2713, "eval_samples_per_second": 206.411, "eval_steps_per_second": 25.801, "step": 150 }, { "epoch": 10.0, "learning_rate": 2.5468750000000002e-05, "loss": 0.0155, "step": 160 }, { "epoch": 10.0, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.14303383231163025, "eval_runtime": 0.2704, "eval_samples_per_second": 207.101, "eval_steps_per_second": 25.888, "step": 160 }, { "epoch": 10.62, "learning_rate": 2.3906250000000002e-05, "loss": 0.0478, "step": 170 }, { "epoch": 10.62, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.15157583355903625, "eval_runtime": 0.2795, "eval_samples_per_second": 200.393, "eval_steps_per_second": 25.049, "step": 170 }, { "epoch": 11.25, "learning_rate": 2.234375e-05, "loss": 0.0228, "step": 180 }, { "epoch": 11.25, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.17853426933288574, "eval_runtime": 0.2946, "eval_samples_per_second": 190.103, "eval_steps_per_second": 23.763, "step": 180 }, { "epoch": 11.88, "learning_rate": 2.0781250000000004e-05, "loss": 0.0295, "step": 190 }, { "epoch": 11.88, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.16022925078868866, "eval_runtime": 0.2758, "eval_samples_per_second": 203.034, "eval_steps_per_second": 25.379, "step": 190 }, { "epoch": 12.5, "learning_rate": 1.9218750000000003e-05, "loss": 0.0084, "step": 200 }, { "epoch": 12.5, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.19061991572380066, "eval_runtime": 0.281, "eval_samples_per_second": 199.27, "eval_steps_per_second": 24.909, "step": 200 }, { "epoch": 13.12, "learning_rate": 1.7656250000000002e-05, "loss": 0.0064, "step": 210 }, { "epoch": 13.12, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.1927066147327423, "eval_runtime": 0.272, "eval_samples_per_second": 205.909, "eval_steps_per_second": 25.739, "step": 210 }, { "epoch": 13.75, "learning_rate": 1.609375e-05, "loss": 0.0131, "step": 220 }, { "epoch": 13.75, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.17037805914878845, "eval_runtime": 0.2756, "eval_samples_per_second": 203.19, "eval_steps_per_second": 25.399, "step": 220 }, { "epoch": 14.38, "learning_rate": 1.4531250000000003e-05, "loss": 0.0134, "step": 230 }, { "epoch": 14.38, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.14340999722480774, "eval_runtime": 0.2733, "eval_samples_per_second": 204.909, "eval_steps_per_second": 25.614, "step": 230 }, { "epoch": 15.0, "learning_rate": 1.2968750000000002e-05, "loss": 0.0082, "step": 240 }, { "epoch": 15.0, "eval_accuracy": 0.9821428571428571, "eval_loss": 0.14660954475402832, "eval_runtime": 0.2748, "eval_samples_per_second": 203.761, "eval_steps_per_second": 25.47, "step": 240 }, { "epoch": 15.62, "learning_rate": 1.140625e-05, "loss": 0.0082, "step": 250 }, { "epoch": 15.62, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.15767399966716766, "eval_runtime": 0.2783, "eval_samples_per_second": 201.226, "eval_steps_per_second": 25.153, "step": 250 }, { "epoch": 16.25, "learning_rate": 9.84375e-06, "loss": 0.0104, "step": 260 }, { "epoch": 16.25, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.17043821513652802, "eval_runtime": 0.2775, "eval_samples_per_second": 201.786, "eval_steps_per_second": 25.223, "step": 260 }, { "epoch": 16.88, "learning_rate": 8.28125e-06, "loss": 0.0099, "step": 270 }, { "epoch": 16.88, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.1689167469739914, "eval_runtime": 0.2808, "eval_samples_per_second": 199.425, "eval_steps_per_second": 24.928, "step": 270 }, { "epoch": 17.5, "learning_rate": 6.71875e-06, "loss": 0.0033, "step": 280 }, { "epoch": 17.5, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.1613139808177948, "eval_runtime": 0.2796, "eval_samples_per_second": 200.293, "eval_steps_per_second": 25.037, "step": 280 }, { "epoch": 18.12, "learning_rate": 5.15625e-06, "loss": 0.0046, "step": 290 }, { "epoch": 18.12, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.15769068896770477, "eval_runtime": 0.2801, "eval_samples_per_second": 199.909, "eval_steps_per_second": 24.989, "step": 290 }, { "epoch": 18.75, "learning_rate": 3.5937499999999997e-06, "loss": 0.0017, "step": 300 }, { "epoch": 18.75, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.15903140604496002, "eval_runtime": 0.2817, "eval_samples_per_second": 198.793, "eval_steps_per_second": 24.849, "step": 300 } ], "logging_steps": 10, "max_steps": 320, "num_train_epochs": 20, "save_steps": 30, "total_flos": 7.227949247524454e+17, "trial_name": null, "trial_params": null }