|
{ |
|
"best_metric": 0.41899441340782123, |
|
"best_model_checkpoint": "convnextv2-base-1k-224-for-pre_evaluation/checkpoint-128", |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 480, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 1.6, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.29608938547486036, |
|
"eval_loss": 1.5315604209899902, |
|
"eval_runtime": 6.8342, |
|
"eval_samples_per_second": 52.384, |
|
"eval_steps_per_second": 1.756, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 1.535, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.125e-05, |
|
"loss": 1.5084, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.2849162011173184, |
|
"eval_loss": 1.5060781240463257, |
|
"eval_runtime": 7.4696, |
|
"eval_samples_per_second": 47.928, |
|
"eval_steps_per_second": 1.607, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.5134, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3240223463687151, |
|
"eval_loss": 1.4968407154083252, |
|
"eval_runtime": 7.1172, |
|
"eval_samples_per_second": 50.301, |
|
"eval_steps_per_second": 1.686, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.976851851851852e-05, |
|
"loss": 1.4694, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 1.4663, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.33519553072625696, |
|
"eval_loss": 1.4607384204864502, |
|
"eval_runtime": 7.4013, |
|
"eval_samples_per_second": 48.37, |
|
"eval_steps_per_second": 1.621, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 4.745370370370371e-05, |
|
"loss": 1.4375, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 1.4046, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.3268156424581006, |
|
"eval_loss": 1.4509011507034302, |
|
"eval_runtime": 6.83, |
|
"eval_samples_per_second": 52.416, |
|
"eval_steps_per_second": 1.757, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 1.4085, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.388268156424581, |
|
"eval_loss": 1.4423185586929321, |
|
"eval_runtime": 7.3683, |
|
"eval_samples_per_second": 48.587, |
|
"eval_steps_per_second": 1.629, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 1.3765, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 4.282407407407408e-05, |
|
"loss": 1.3443, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.4022346368715084, |
|
"eval_loss": 1.400512456893921, |
|
"eval_runtime": 6.9156, |
|
"eval_samples_per_second": 51.767, |
|
"eval_steps_per_second": 1.735, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.3025, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.41899441340782123, |
|
"eval_loss": 1.359870195388794, |
|
"eval_runtime": 7.041, |
|
"eval_samples_per_second": 50.845, |
|
"eval_steps_per_second": 1.704, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 4.0509259259259265e-05, |
|
"loss": 1.2668, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 1.2627, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.39106145251396646, |
|
"eval_loss": 1.3638169765472412, |
|
"eval_runtime": 7.2532, |
|
"eval_samples_per_second": 49.357, |
|
"eval_steps_per_second": 1.654, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 3.8194444444444444e-05, |
|
"loss": 1.2427, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 1.2099, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.34916201117318435, |
|
"eval_loss": 1.4057648181915283, |
|
"eval_runtime": 7.508, |
|
"eval_samples_per_second": 47.682, |
|
"eval_steps_per_second": 1.598, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 3.587962962962963e-05, |
|
"loss": 1.2086, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.3407821229050279, |
|
"eval_loss": 1.443146824836731, |
|
"eval_runtime": 7.4283, |
|
"eval_samples_per_second": 48.194, |
|
"eval_steps_per_second": 1.615, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 1.1684, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 3.3564814814814815e-05, |
|
"loss": 1.1393, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.34916201117318435, |
|
"eval_loss": 1.4143450260162354, |
|
"eval_runtime": 7.1396, |
|
"eval_samples_per_second": 50.143, |
|
"eval_steps_per_second": 1.681, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 1.1039, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.388268156424581, |
|
"eval_loss": 1.4305065870285034, |
|
"eval_runtime": 6.8442, |
|
"eval_samples_per_second": 52.307, |
|
"eval_steps_per_second": 1.753, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 3.125e-05, |
|
"loss": 1.0641, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 1.0551, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.35195530726256985, |
|
"eval_loss": 1.5202596187591553, |
|
"eval_runtime": 7.2925, |
|
"eval_samples_per_second": 49.091, |
|
"eval_steps_per_second": 1.646, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 2.8935185185185186e-05, |
|
"loss": 1.0686, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 1.0368, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.3324022346368715, |
|
"eval_loss": 1.5117393732070923, |
|
"eval_runtime": 6.8377, |
|
"eval_samples_per_second": 52.356, |
|
"eval_steps_per_second": 1.755, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 2.6620370370370372e-05, |
|
"loss": 0.9753, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.3770949720670391, |
|
"eval_loss": 1.4545259475708008, |
|
"eval_runtime": 7.0796, |
|
"eval_samples_per_second": 50.568, |
|
"eval_steps_per_second": 1.695, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 0.9677, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 0.938, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.33519553072625696, |
|
"eval_loss": 1.5396308898925781, |
|
"eval_runtime": 7.1168, |
|
"eval_samples_per_second": 50.304, |
|
"eval_steps_per_second": 1.686, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.899, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.3407821229050279, |
|
"eval_loss": 1.5770219564437866, |
|
"eval_runtime": 6.8822, |
|
"eval_samples_per_second": 52.018, |
|
"eval_steps_per_second": 1.744, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 2.1990740740740743e-05, |
|
"loss": 0.9047, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.8629, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.3128491620111732, |
|
"eval_loss": 1.7105906009674072, |
|
"eval_runtime": 7.3502, |
|
"eval_samples_per_second": 48.706, |
|
"eval_steps_per_second": 1.633, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"learning_rate": 1.967592592592593e-05, |
|
"loss": 0.8624, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.8674, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.33519553072625696, |
|
"eval_loss": 1.5864217281341553, |
|
"eval_runtime": 7.1963, |
|
"eval_samples_per_second": 49.748, |
|
"eval_steps_per_second": 1.668, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 20.62, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 0.7789, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.3407821229050279, |
|
"eval_loss": 1.6129050254821777, |
|
"eval_runtime": 7.1496, |
|
"eval_samples_per_second": 50.073, |
|
"eval_steps_per_second": 1.678, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 0.8161, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"learning_rate": 1.5046296296296297e-05, |
|
"loss": 0.7426, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.36033519553072624, |
|
"eval_loss": 1.6353477239608765, |
|
"eval_runtime": 7.4456, |
|
"eval_samples_per_second": 48.082, |
|
"eval_steps_per_second": 1.612, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.7677, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.3463687150837989, |
|
"eval_loss": 1.6793445348739624, |
|
"eval_runtime": 6.994, |
|
"eval_samples_per_second": 51.187, |
|
"eval_steps_per_second": 1.716, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 23.12, |
|
"learning_rate": 1.2731481481481482e-05, |
|
"loss": 0.7327, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 23.75, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 0.7172, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.3575418994413408, |
|
"eval_loss": 1.6759321689605713, |
|
"eval_runtime": 7.4394, |
|
"eval_samples_per_second": 48.122, |
|
"eval_steps_per_second": 1.613, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 24.38, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 0.6759, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.6809, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.3659217877094972, |
|
"eval_loss": 1.701292634010315, |
|
"eval_runtime": 7.4138, |
|
"eval_samples_per_second": 48.288, |
|
"eval_steps_per_second": 1.619, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"learning_rate": 8.101851851851852e-06, |
|
"loss": 0.6619, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.36312849162011174, |
|
"eval_loss": 1.7108293771743774, |
|
"eval_runtime": 7.238, |
|
"eval_samples_per_second": 49.461, |
|
"eval_steps_per_second": 1.658, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 26.25, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.6773, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"learning_rate": 5.787037037037038e-06, |
|
"loss": 0.6656, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.3715083798882682, |
|
"eval_loss": 1.7327028512954712, |
|
"eval_runtime": 6.8416, |
|
"eval_samples_per_second": 52.327, |
|
"eval_steps_per_second": 1.754, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.6258, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.3547486033519553, |
|
"eval_loss": 1.7377949953079224, |
|
"eval_runtime": 7.2785, |
|
"eval_samples_per_second": 49.186, |
|
"eval_steps_per_second": 1.649, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 0.6646, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 28.75, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 0.6173, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.36033519553072624, |
|
"eval_loss": 1.7461235523223877, |
|
"eval_runtime": 6.8622, |
|
"eval_samples_per_second": 52.17, |
|
"eval_steps_per_second": 1.749, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 29.38, |
|
"learning_rate": 1.1574074074074074e-06, |
|
"loss": 0.6482, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.6214, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.35195530726256985, |
|
"eval_loss": 1.7475444078445435, |
|
"eval_runtime": 7.4355, |
|
"eval_samples_per_second": 48.148, |
|
"eval_steps_per_second": 1.614, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 480, |
|
"total_flos": 4.840276186658304e+18, |
|
"train_loss": 1.0325976332028708, |
|
"train_runtime": 3493.542, |
|
"train_samples_per_second": 17.492, |
|
"train_steps_per_second": 0.137 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 480, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 4.840276186658304e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|