|
{ |
|
"best_metric": 0.5909090909090909, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-isic217/checkpoint-563", |
|
"epoch": 48.97959183673469, |
|
"eval_steps": 500, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 19.762880325317383, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 2.2252, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 15.404119491577148, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 2.2679, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"eval_accuracy": 0.09090909090909091, |
|
"eval_loss": 2.1550188064575195, |
|
"eval_runtime": 6.6011, |
|
"eval_samples_per_second": 3.333, |
|
"eval_steps_per_second": 1.666, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.2244897959183674, |
|
"grad_norm": 21.01092529296875, |
|
"learning_rate": 1.25e-05, |
|
"loss": 2.0909, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 19.893291473388672, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 2.0504, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.2727272727272727, |
|
"eval_loss": 2.0559206008911133, |
|
"eval_runtime": 3.7884, |
|
"eval_samples_per_second": 5.807, |
|
"eval_steps_per_second": 2.904, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 18.42198944091797, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 1.987, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.4489795918367347, |
|
"grad_norm": 29.596494674682617, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.8355, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 23.39991569519043, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 1.8943, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.979591836734694, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 2.0185506343841553, |
|
"eval_runtime": 3.7191, |
|
"eval_samples_per_second": 5.915, |
|
"eval_steps_per_second": 2.958, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 3.2653061224489797, |
|
"grad_norm": 34.72861099243164, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.6665, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.673469387755102, |
|
"grad_norm": 23.47280502319336, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.5671, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.815375804901123, |
|
"eval_runtime": 3.7909, |
|
"eval_samples_per_second": 5.803, |
|
"eval_steps_per_second": 2.902, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 4.081632653061225, |
|
"grad_norm": 28.841514587402344, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.664, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.489795918367347, |
|
"grad_norm": 30.53756332397461, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 1.553, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.8979591836734695, |
|
"grad_norm": 47.8935432434082, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3425, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.979591836734694, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 2.047502279281616, |
|
"eval_runtime": 3.683, |
|
"eval_samples_per_second": 5.973, |
|
"eval_steps_per_second": 2.987, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 5.3061224489795915, |
|
"grad_norm": 21.777742385864258, |
|
"learning_rate": 4.9537037037037035e-05, |
|
"loss": 1.2543, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 32.264713287353516, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 1.2758, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 2.1913788318634033, |
|
"eval_runtime": 3.8174, |
|
"eval_samples_per_second": 5.763, |
|
"eval_steps_per_second": 2.882, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 6.122448979591836, |
|
"grad_norm": 30.771316528320312, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 1.0049, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.530612244897959, |
|
"grad_norm": 37.03094482421875, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.9949, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.938775510204081, |
|
"grad_norm": 38.22206497192383, |
|
"learning_rate": 4.768518518518519e-05, |
|
"loss": 0.9808, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.979591836734694, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 2.047807455062866, |
|
"eval_runtime": 3.6897, |
|
"eval_samples_per_second": 5.963, |
|
"eval_steps_per_second": 2.981, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 7.346938775510204, |
|
"grad_norm": 30.003612518310547, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.9797, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 7.755102040816326, |
|
"grad_norm": 48.88200378417969, |
|
"learning_rate": 4.675925925925926e-05, |
|
"loss": 0.7246, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 1.884019374847412, |
|
"eval_runtime": 4.5579, |
|
"eval_samples_per_second": 4.827, |
|
"eval_steps_per_second": 2.413, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 8.16326530612245, |
|
"grad_norm": 25.771787643432617, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.8633, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"grad_norm": 22.62033462524414, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.5814, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.979591836734693, |
|
"grad_norm": 37.1585578918457, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 0.7323, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.979591836734693, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.1831090450286865, |
|
"eval_runtime": 3.7295, |
|
"eval_samples_per_second": 5.899, |
|
"eval_steps_per_second": 2.949, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 9.387755102040817, |
|
"grad_norm": 31.566368103027344, |
|
"learning_rate": 4.490740740740741e-05, |
|
"loss": 0.5923, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 9.795918367346939, |
|
"grad_norm": 61.39928436279297, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.4881, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 2.286790132522583, |
|
"eval_runtime": 3.7469, |
|
"eval_samples_per_second": 5.872, |
|
"eval_steps_per_second": 2.936, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 10.204081632653061, |
|
"grad_norm": 57.27079772949219, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 0.4739, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 10.612244897959183, |
|
"grad_norm": 12.431879043579102, |
|
"learning_rate": 4.351851851851852e-05, |
|
"loss": 0.4346, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 10.979591836734693, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.231236457824707, |
|
"eval_runtime": 3.6895, |
|
"eval_samples_per_second": 5.963, |
|
"eval_steps_per_second": 2.981, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 11.020408163265307, |
|
"grad_norm": 13.14640998840332, |
|
"learning_rate": 4.305555555555556e-05, |
|
"loss": 0.3967, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 37.04918670654297, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.4297, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 11.83673469387755, |
|
"grad_norm": 35.07551574707031, |
|
"learning_rate": 4.212962962962963e-05, |
|
"loss": 0.5647, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 1.9897090196609497, |
|
"eval_runtime": 3.8455, |
|
"eval_samples_per_second": 5.721, |
|
"eval_steps_per_second": 2.86, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 12.244897959183673, |
|
"grad_norm": 5.31343936920166, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.376, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.653061224489797, |
|
"grad_norm": 2.7732479572296143, |
|
"learning_rate": 4.1203703703703705e-05, |
|
"loss": 0.1464, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 12.979591836734693, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.057887315750122, |
|
"eval_runtime": 3.7271, |
|
"eval_samples_per_second": 5.903, |
|
"eval_steps_per_second": 2.951, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 13.061224489795919, |
|
"grad_norm": 27.16363525390625, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.4971, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 13.46938775510204, |
|
"grad_norm": 21.138622283935547, |
|
"learning_rate": 4.027777777777778e-05, |
|
"loss": 0.5071, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 13.877551020408163, |
|
"grad_norm": 79.29064178466797, |
|
"learning_rate": 3.981481481481482e-05, |
|
"loss": 0.5575, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.1859312057495117, |
|
"eval_runtime": 3.7744, |
|
"eval_samples_per_second": 5.829, |
|
"eval_steps_per_second": 2.914, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 1.0631258487701416, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 0.213, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 14.693877551020408, |
|
"grad_norm": 59.045860290527344, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.3894, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 14.979591836734693, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 2.7353034019470215, |
|
"eval_runtime": 4.1695, |
|
"eval_samples_per_second": 5.276, |
|
"eval_steps_per_second": 2.638, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 15.10204081632653, |
|
"grad_norm": 7.641679763793945, |
|
"learning_rate": 3.8425925925925924e-05, |
|
"loss": 0.2927, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 15.510204081632653, |
|
"grad_norm": 30.950389862060547, |
|
"learning_rate": 3.7962962962962964e-05, |
|
"loss": 0.4006, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 15.918367346938776, |
|
"grad_norm": 6.0692524909973145, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4326, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 2.4454774856567383, |
|
"eval_runtime": 3.8093, |
|
"eval_samples_per_second": 5.775, |
|
"eval_steps_per_second": 2.888, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 16.3265306122449, |
|
"grad_norm": 37.877662658691406, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.381, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.73469387755102, |
|
"grad_norm": 6.306503772735596, |
|
"learning_rate": 3.6574074074074076e-05, |
|
"loss": 0.3715, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 16.979591836734695, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 2.310415744781494, |
|
"eval_runtime": 3.7058, |
|
"eval_samples_per_second": 5.937, |
|
"eval_steps_per_second": 2.968, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 11.820294380187988, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.3212, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 17.551020408163264, |
|
"grad_norm": 49.839988708496094, |
|
"learning_rate": 3.564814814814815e-05, |
|
"loss": 0.2577, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 17.959183673469386, |
|
"grad_norm": 18.69984245300293, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.3966, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.459726333618164, |
|
"eval_runtime": 3.7623, |
|
"eval_samples_per_second": 5.848, |
|
"eval_steps_per_second": 2.924, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 18.367346938775512, |
|
"grad_norm": 53.232845306396484, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.2414, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 18.775510204081634, |
|
"grad_norm": 17.193056106567383, |
|
"learning_rate": 3.425925925925926e-05, |
|
"loss": 0.1855, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 18.979591836734695, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 2.3334853649139404, |
|
"eval_runtime": 3.7675, |
|
"eval_samples_per_second": 5.839, |
|
"eval_steps_per_second": 2.92, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 19.183673469387756, |
|
"grad_norm": 83.63492584228516, |
|
"learning_rate": 3.3796296296296295e-05, |
|
"loss": 0.147, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 19.591836734693878, |
|
"grad_norm": 18.476999282836914, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1192, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 4.202638626098633, |
|
"learning_rate": 3.2870370370370375e-05, |
|
"loss": 0.1528, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.3629674911499023, |
|
"eval_runtime": 3.7957, |
|
"eval_samples_per_second": 5.796, |
|
"eval_steps_per_second": 2.898, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 20.408163265306122, |
|
"grad_norm": 69.58875274658203, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.1692, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.816326530612244, |
|
"grad_norm": 27.37713050842285, |
|
"learning_rate": 3.194444444444444e-05, |
|
"loss": 0.2036, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 20.979591836734695, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.3520021438598633, |
|
"eval_runtime": 3.6999, |
|
"eval_samples_per_second": 5.946, |
|
"eval_steps_per_second": 2.973, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 21.224489795918366, |
|
"grad_norm": 47.89410400390625, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.2093, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 21.632653061224488, |
|
"grad_norm": 42.28773880004883, |
|
"learning_rate": 3.101851851851852e-05, |
|
"loss": 0.2026, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.7011702060699463, |
|
"eval_runtime": 4.0581, |
|
"eval_samples_per_second": 5.421, |
|
"eval_steps_per_second": 2.711, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 22.040816326530614, |
|
"grad_norm": 9.840641975402832, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.2354, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 22.448979591836736, |
|
"grad_norm": 292.9500732421875, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 0.1092, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"grad_norm": 3.9684982299804688, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.2127, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 22.979591836734695, |
|
"eval_accuracy": 0.5909090909090909, |
|
"eval_loss": 2.3724472522735596, |
|
"eval_runtime": 3.7338, |
|
"eval_samples_per_second": 5.892, |
|
"eval_steps_per_second": 2.946, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 23.26530612244898, |
|
"grad_norm": 33.97622299194336, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.287, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 23.6734693877551, |
|
"grad_norm": 64.21463012695312, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 0.2719, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.3181818181818182, |
|
"eval_loss": 3.0376410484313965, |
|
"eval_runtime": 3.8252, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 2.876, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 24.081632653061224, |
|
"grad_norm": 18.13422203063965, |
|
"learning_rate": 2.824074074074074e-05, |
|
"loss": 0.1286, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 24.489795918367346, |
|
"grad_norm": 1.6358630657196045, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0524, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 24.897959183673468, |
|
"grad_norm": 2.2687888145446777, |
|
"learning_rate": 2.7314814814814816e-05, |
|
"loss": 0.1292, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 24.979591836734695, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.568352699279785, |
|
"eval_runtime": 3.7655, |
|
"eval_samples_per_second": 5.842, |
|
"eval_steps_per_second": 2.921, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 25.306122448979593, |
|
"grad_norm": 0.1713099479675293, |
|
"learning_rate": 2.6851851851851855e-05, |
|
"loss": 0.2063, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"grad_norm": 0.19898605346679688, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 0.2533, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.6974411010742188, |
|
"eval_runtime": 4.1166, |
|
"eval_samples_per_second": 5.344, |
|
"eval_steps_per_second": 2.672, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 26.122448979591837, |
|
"grad_norm": 10.351354598999023, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.0784, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 26.53061224489796, |
|
"grad_norm": 53.10356903076172, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 0.102, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 26.93877551020408, |
|
"grad_norm": 34.5041389465332, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1947, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 26.979591836734695, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.695702075958252, |
|
"eval_runtime": 3.6793, |
|
"eval_samples_per_second": 5.979, |
|
"eval_steps_per_second": 2.99, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 27.346938775510203, |
|
"grad_norm": 0.4286749064922333, |
|
"learning_rate": 2.4537037037037038e-05, |
|
"loss": 0.1586, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 27.755102040816325, |
|
"grad_norm": 1.2722495794296265, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.1805, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.8953325748443604, |
|
"eval_runtime": 3.8043, |
|
"eval_samples_per_second": 5.783, |
|
"eval_steps_per_second": 2.891, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 28.163265306122447, |
|
"grad_norm": 9.904804229736328, |
|
"learning_rate": 2.361111111111111e-05, |
|
"loss": 0.3122, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 34.21662521362305, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.0842, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 28.979591836734695, |
|
"grad_norm": 0.2031356394290924, |
|
"learning_rate": 2.2685185185185187e-05, |
|
"loss": 0.1123, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 28.979591836734695, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.824019432067871, |
|
"eval_runtime": 4.2297, |
|
"eval_samples_per_second": 5.201, |
|
"eval_steps_per_second": 2.601, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 29.387755102040817, |
|
"grad_norm": 0.74458909034729, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1034, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 29.79591836734694, |
|
"grad_norm": 52.627017974853516, |
|
"learning_rate": 2.175925925925926e-05, |
|
"loss": 0.2143, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.3879992961883545, |
|
"eval_runtime": 3.7999, |
|
"eval_samples_per_second": 5.79, |
|
"eval_steps_per_second": 2.895, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 30.20408163265306, |
|
"grad_norm": 18.753475189208984, |
|
"learning_rate": 2.1296296296296296e-05, |
|
"loss": 0.1084, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 30.612244897959183, |
|
"grad_norm": 36.20948028564453, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.1845, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 30.979591836734695, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 2.607203483581543, |
|
"eval_runtime": 3.7239, |
|
"eval_samples_per_second": 5.908, |
|
"eval_steps_per_second": 2.954, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 31.020408163265305, |
|
"grad_norm": 125.52183532714844, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.1467, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 31.428571428571427, |
|
"grad_norm": 0.5813603401184082, |
|
"learning_rate": 1.990740740740741e-05, |
|
"loss": 0.073, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 31.836734693877553, |
|
"grad_norm": 29.528520584106445, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.0921, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.7255632877349854, |
|
"eval_runtime": 3.7809, |
|
"eval_samples_per_second": 5.819, |
|
"eval_steps_per_second": 2.909, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 32.244897959183675, |
|
"grad_norm": 23.8216495513916, |
|
"learning_rate": 1.8981481481481482e-05, |
|
"loss": 0.0362, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 32.6530612244898, |
|
"grad_norm": 8.946208000183105, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0276, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 32.97959183673469, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.4073522090911865, |
|
"eval_runtime": 3.7423, |
|
"eval_samples_per_second": 5.879, |
|
"eval_steps_per_second": 2.939, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 33.06122448979592, |
|
"grad_norm": 11.886552810668945, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 0.0145, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 33.46938775510204, |
|
"grad_norm": 2.590672492980957, |
|
"learning_rate": 1.7592592592592595e-05, |
|
"loss": 0.0735, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 33.87755102040816, |
|
"grad_norm": 2.8415122032165527, |
|
"learning_rate": 1.712962962962963e-05, |
|
"loss": 0.0876, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.6042609214782715, |
|
"eval_runtime": 3.8508, |
|
"eval_samples_per_second": 5.713, |
|
"eval_steps_per_second": 2.857, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 34.285714285714285, |
|
"grad_norm": 0.48890063166618347, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0343, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 34.69387755102041, |
|
"grad_norm": 5.784631252288818, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 0.0253, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 34.97959183673469, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.76198673248291, |
|
"eval_runtime": 3.6773, |
|
"eval_samples_per_second": 5.983, |
|
"eval_steps_per_second": 2.991, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 35.10204081632653, |
|
"grad_norm": 3.974219560623169, |
|
"learning_rate": 1.574074074074074e-05, |
|
"loss": 0.1737, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 35.51020408163265, |
|
"grad_norm": 0.8458910584449768, |
|
"learning_rate": 1.527777777777778e-05, |
|
"loss": 0.1227, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 35.91836734693877, |
|
"grad_norm": 49.0916748046875, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.1904, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.6911137104034424, |
|
"eval_runtime": 4.2847, |
|
"eval_samples_per_second": 5.135, |
|
"eval_steps_per_second": 2.567, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 36.326530612244895, |
|
"grad_norm": 33.6256103515625, |
|
"learning_rate": 1.4351851851851853e-05, |
|
"loss": 0.2075, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 36.734693877551024, |
|
"grad_norm": 0.20336776971817017, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.072, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 36.97959183673469, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.6528260707855225, |
|
"eval_runtime": 3.7052, |
|
"eval_samples_per_second": 5.938, |
|
"eval_steps_per_second": 2.969, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 37.142857142857146, |
|
"grad_norm": 18.30384063720703, |
|
"learning_rate": 1.3425925925925928e-05, |
|
"loss": 0.1398, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 37.55102040816327, |
|
"grad_norm": 0.07602863758802414, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.069, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 37.95918367346939, |
|
"grad_norm": 8.712791442871094, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.169, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.645385265350342, |
|
"eval_runtime": 3.7757, |
|
"eval_samples_per_second": 5.827, |
|
"eval_steps_per_second": 2.913, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 38.36734693877551, |
|
"grad_norm": 0.9796821475028992, |
|
"learning_rate": 1.2037037037037037e-05, |
|
"loss": 0.0137, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 38.775510204081634, |
|
"grad_norm": 0.2808334529399872, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 0.0978, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 38.97959183673469, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.626924514770508, |
|
"eval_runtime": 3.7403, |
|
"eval_samples_per_second": 5.882, |
|
"eval_steps_per_second": 2.941, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 39.183673469387756, |
|
"grad_norm": 0.008785932324826717, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0195, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 39.59183673469388, |
|
"grad_norm": 0.38346049189567566, |
|
"learning_rate": 1.0648148148148148e-05, |
|
"loss": 0.0283, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 91.21365356445312, |
|
"learning_rate": 1.0185185185185185e-05, |
|
"loss": 0.069, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.4153661727905273, |
|
"eval_runtime": 3.8277, |
|
"eval_samples_per_second": 5.748, |
|
"eval_steps_per_second": 2.874, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 40.40816326530612, |
|
"grad_norm": 50.51347351074219, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.1111, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 40.816326530612244, |
|
"grad_norm": 0.06127191707491875, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.0159, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.97959183673469, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.7026443481445312, |
|
"eval_runtime": 3.6976, |
|
"eval_samples_per_second": 5.95, |
|
"eval_steps_per_second": 2.975, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 41.224489795918366, |
|
"grad_norm": 0.4312690794467926, |
|
"learning_rate": 8.796296296296297e-06, |
|
"loss": 0.1659, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 41.63265306122449, |
|
"grad_norm": 0.1739516258239746, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.2046, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.521285057067871, |
|
"eval_runtime": 4.0417, |
|
"eval_samples_per_second": 5.443, |
|
"eval_steps_per_second": 2.722, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 42.04081632653061, |
|
"grad_norm": 0.023499183356761932, |
|
"learning_rate": 7.87037037037037e-06, |
|
"loss": 0.0502, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 42.44897959183673, |
|
"grad_norm": 8.13757038116455, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.1637, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"grad_norm": 0.8705015778541565, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.0329, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 42.97959183673469, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.639939069747925, |
|
"eval_runtime": 4.1783, |
|
"eval_samples_per_second": 5.265, |
|
"eval_steps_per_second": 2.633, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 43.265306122448976, |
|
"grad_norm": 0.7091279029846191, |
|
"learning_rate": 6.481481481481481e-06, |
|
"loss": 0.008, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 43.673469387755105, |
|
"grad_norm": 0.27460241317749023, |
|
"learning_rate": 6.0185185185185185e-06, |
|
"loss": 0.0166, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.778676986694336, |
|
"eval_runtime": 3.8444, |
|
"eval_samples_per_second": 5.723, |
|
"eval_steps_per_second": 2.861, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 44.08163265306123, |
|
"grad_norm": 9.933192253112793, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0275, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 44.48979591836735, |
|
"grad_norm": 0.6145612001419067, |
|
"learning_rate": 5.092592592592592e-06, |
|
"loss": 0.0086, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 44.89795918367347, |
|
"grad_norm": 0.030872033908963203, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.0812, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 44.97959183673469, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.8176286220550537, |
|
"eval_runtime": 3.7182, |
|
"eval_samples_per_second": 5.917, |
|
"eval_steps_per_second": 2.958, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 45.30612244897959, |
|
"grad_norm": 0.21678821742534637, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.0005, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 45.714285714285715, |
|
"grad_norm": 1.7509175539016724, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0197, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.804949998855591, |
|
"eval_runtime": 3.8321, |
|
"eval_samples_per_second": 5.741, |
|
"eval_steps_per_second": 2.87, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 46.12244897959184, |
|
"grad_norm": 0.18609049916267395, |
|
"learning_rate": 3.2407407407407406e-06, |
|
"loss": 0.2891, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 46.53061224489796, |
|
"grad_norm": 0.7729312777519226, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.0178, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 46.93877551020408, |
|
"grad_norm": 1.1792634725570679, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 0.0989, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 46.97959183673469, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.7478928565979004, |
|
"eval_runtime": 3.7127, |
|
"eval_samples_per_second": 5.926, |
|
"eval_steps_per_second": 2.963, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 47.3469387755102, |
|
"grad_norm": 0.029640546068549156, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.0158, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 47.755102040816325, |
|
"grad_norm": 0.2686777412891388, |
|
"learning_rate": 1.388888888888889e-06, |
|
"loss": 0.054, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.761411190032959, |
|
"eval_runtime": 3.8702, |
|
"eval_samples_per_second": 5.684, |
|
"eval_steps_per_second": 2.842, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 48.16326530612245, |
|
"grad_norm": 0.17129023373126984, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.0709, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 48.57142857142857, |
|
"grad_norm": 0.13873820006847382, |
|
"learning_rate": 4.6296296296296297e-07, |
|
"loss": 0.0133, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 48.97959183673469, |
|
"grad_norm": 0.48775115609169006, |
|
"learning_rate": 0.0, |
|
"loss": 0.1095, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 48.97959183673469, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.7604033946990967, |
|
"eval_runtime": 3.7642, |
|
"eval_samples_per_second": 5.845, |
|
"eval_steps_per_second": 2.922, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 48.97959183673469, |
|
"step": 1200, |
|
"total_flos": 2.374708462608384e+17, |
|
"train_loss": 0.4113796023776134, |
|
"train_runtime": 1728.9004, |
|
"train_samples_per_second": 5.639, |
|
"train_steps_per_second": 0.694 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.374708462608384e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|