|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 38.0, |
|
"global_step": 4028, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3832186408159307e-05, |
|
"loss": 1.7921, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7072680181157848, |
|
"eval_loss": 1.4908801317214966, |
|
"eval_runtime": 35.2427, |
|
"eval_samples_per_second": 50.223, |
|
"eval_steps_per_second": 0.17, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.5888124272106204e-05, |
|
"loss": 1.4864, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7269994857792617, |
|
"eval_loss": 1.3473752737045288, |
|
"eval_runtime": 34.9927, |
|
"eval_samples_per_second": 50.582, |
|
"eval_steps_per_second": 0.171, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.7090770826327895e-05, |
|
"loss": 1.3756, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7408165661368673, |
|
"eval_loss": 1.2464056015014648, |
|
"eval_runtime": 34.6698, |
|
"eval_samples_per_second": 51.053, |
|
"eval_steps_per_second": 0.173, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.7944062136053104e-05, |
|
"loss": 1.3032, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7535117507143178, |
|
"eval_loss": 1.1721432209014893, |
|
"eval_runtime": 34.7274, |
|
"eval_samples_per_second": 50.968, |
|
"eval_steps_per_second": 0.173, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.860592629580032e-05, |
|
"loss": 1.2584, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7601500829124033, |
|
"eval_loss": 1.1279706954956055, |
|
"eval_runtime": 34.8917, |
|
"eval_samples_per_second": 50.728, |
|
"eval_steps_per_second": 0.172, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.9146708690274792e-05, |
|
"loss": 1.2103, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7582043276054062, |
|
"eval_loss": 1.1379011869430542, |
|
"eval_runtime": 34.6334, |
|
"eval_samples_per_second": 51.107, |
|
"eval_steps_per_second": 0.173, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.9603933689955228e-05, |
|
"loss": 1.183, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7704412195286752, |
|
"eval_loss": 1.062961220741272, |
|
"eval_runtime": 34.3317, |
|
"eval_samples_per_second": 51.556, |
|
"eval_steps_per_second": 0.175, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1546, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7686654368803604, |
|
"eval_loss": 1.0676072835922241, |
|
"eval_runtime": 34.7431, |
|
"eval_samples_per_second": 50.945, |
|
"eval_steps_per_second": 0.173, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1263, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7708787569856306, |
|
"eval_loss": 1.0571365356445312, |
|
"eval_runtime": 34.8256, |
|
"eval_samples_per_second": 50.825, |
|
"eval_steps_per_second": 0.172, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1191, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.781194386249465, |
|
"eval_loss": 0.9872472286224365, |
|
"eval_runtime": 34.7655, |
|
"eval_samples_per_second": 50.913, |
|
"eval_steps_per_second": 0.173, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0948, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7812378898109896, |
|
"eval_loss": 0.9977697134017944, |
|
"eval_runtime": 34.7432, |
|
"eval_samples_per_second": 50.945, |
|
"eval_steps_per_second": 0.173, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0841, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7803615513259536, |
|
"eval_loss": 0.9978513717651367, |
|
"eval_runtime": 34.6545, |
|
"eval_samples_per_second": 51.076, |
|
"eval_steps_per_second": 0.173, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0688, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7858584354172097, |
|
"eval_loss": 0.9791596531867981, |
|
"eval_runtime": 34.9372, |
|
"eval_samples_per_second": 50.662, |
|
"eval_steps_per_second": 0.172, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0605, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7874798359386868, |
|
"eval_loss": 0.9556354284286499, |
|
"eval_runtime": 34.6859, |
|
"eval_samples_per_second": 51.029, |
|
"eval_steps_per_second": 0.173, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0499, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7894171406706451, |
|
"eval_loss": 0.9544544816017151, |
|
"eval_runtime": 34.7413, |
|
"eval_samples_per_second": 50.948, |
|
"eval_steps_per_second": 0.173, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0351, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7894932603326688, |
|
"eval_loss": 0.9460939168930054, |
|
"eval_runtime": 34.704, |
|
"eval_samples_per_second": 51.003, |
|
"eval_steps_per_second": 0.173, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0286, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7885454720602293, |
|
"eval_loss": 0.9521207809448242, |
|
"eval_runtime": 34.7606, |
|
"eval_samples_per_second": 50.92, |
|
"eval_steps_per_second": 0.173, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0173, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7890063270306843, |
|
"eval_loss": 0.9481790661811829, |
|
"eval_runtime": 34.6779, |
|
"eval_samples_per_second": 51.041, |
|
"eval_steps_per_second": 0.173, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0079, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7932532825907861, |
|
"eval_loss": 0.9254797101020813, |
|
"eval_runtime": 34.6588, |
|
"eval_samples_per_second": 51.069, |
|
"eval_steps_per_second": 0.173, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7949952307142005, |
|
"eval_loss": 0.9181823134422302, |
|
"eval_runtime": 34.812, |
|
"eval_samples_per_second": 50.845, |
|
"eval_steps_per_second": 0.172, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.993, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7947981315379723, |
|
"eval_loss": 0.9146238565444946, |
|
"eval_runtime": 34.6312, |
|
"eval_samples_per_second": 51.11, |
|
"eval_steps_per_second": 0.173, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9814, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7955541733309638, |
|
"eval_loss": 0.9044105410575867, |
|
"eval_runtime": 35.6117, |
|
"eval_samples_per_second": 49.703, |
|
"eval_steps_per_second": 0.168, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9733, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7970804770891298, |
|
"eval_loss": 0.8979274034500122, |
|
"eval_runtime": 35.0401, |
|
"eval_samples_per_second": 50.514, |
|
"eval_steps_per_second": 0.171, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9725, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8038572415242785, |
|
"eval_loss": 0.867423415184021, |
|
"eval_runtime": 34.3534, |
|
"eval_samples_per_second": 51.523, |
|
"eval_steps_per_second": 0.175, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.963, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7997166910097103, |
|
"eval_loss": 0.8823758959770203, |
|
"eval_runtime": 34.7324, |
|
"eval_samples_per_second": 50.961, |
|
"eval_steps_per_second": 0.173, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9587, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8008366256100395, |
|
"eval_loss": 0.8787974119186401, |
|
"eval_runtime": 34.68, |
|
"eval_samples_per_second": 51.038, |
|
"eval_steps_per_second": 0.173, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9523, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.802576527423399, |
|
"eval_loss": 0.8776472806930542, |
|
"eval_runtime": 34.5847, |
|
"eval_samples_per_second": 51.179, |
|
"eval_steps_per_second": 0.173, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9503, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8046389019572702, |
|
"eval_loss": 0.8658241629600525, |
|
"eval_runtime": 34.7046, |
|
"eval_samples_per_second": 51.002, |
|
"eval_steps_per_second": 0.173, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9357, |
|
"step": 3074 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.8068694155258628, |
|
"eval_loss": 0.8559547066688538, |
|
"eval_runtime": 34.6364, |
|
"eval_samples_per_second": 51.102, |
|
"eval_steps_per_second": 0.173, |
|
"step": 3074 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9404, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8065936664154513, |
|
"eval_loss": 0.8534895777702332, |
|
"eval_runtime": 34.816, |
|
"eval_samples_per_second": 50.839, |
|
"eval_steps_per_second": 0.172, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.931, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.8132200449776263, |
|
"eval_loss": 0.8150569796562195, |
|
"eval_runtime": 34.836, |
|
"eval_samples_per_second": 50.81, |
|
"eval_steps_per_second": 0.172, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.925, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8086711486831303, |
|
"eval_loss": 0.8455161452293396, |
|
"eval_runtime": 35.717, |
|
"eval_samples_per_second": 49.556, |
|
"eval_steps_per_second": 0.168, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9159, |
|
"step": 3498 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.8050653384287288, |
|
"eval_loss": 0.8572449088096619, |
|
"eval_runtime": 34.373, |
|
"eval_samples_per_second": 51.494, |
|
"eval_steps_per_second": 0.175, |
|
"step": 3498 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9102, |
|
"step": 3604 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8064940892606243, |
|
"eval_loss": 0.8592977523803711, |
|
"eval_runtime": 34.6394, |
|
"eval_samples_per_second": 51.098, |
|
"eval_steps_per_second": 0.173, |
|
"step": 3604 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9128, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8136768783591909, |
|
"eval_loss": 0.813452959060669, |
|
"eval_runtime": 34.7174, |
|
"eval_samples_per_second": 50.983, |
|
"eval_steps_per_second": 0.173, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9067, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8131441385250527, |
|
"eval_loss": 0.821983277797699, |
|
"eval_runtime": 34.8233, |
|
"eval_samples_per_second": 50.828, |
|
"eval_steps_per_second": 0.172, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8989, |
|
"step": 3922 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.8117314414207155, |
|
"eval_loss": 0.827382504940033, |
|
"eval_runtime": 34.7538, |
|
"eval_samples_per_second": 50.93, |
|
"eval_steps_per_second": 0.173, |
|
"step": 3922 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8928, |
|
"step": 4028 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8077465845969074, |
|
"eval_loss": 0.8441253900527954, |
|
"eval_runtime": 34.7385, |
|
"eval_samples_per_second": 50.952, |
|
"eval_steps_per_second": 0.173, |
|
"step": 4028 |
|
} |
|
], |
|
"max_steps": 4240, |
|
"num_train_epochs": 40, |
|
"total_flos": 1274410698801152.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|