|
{ |
|
"best_metric": 0.8361669242658424, |
|
"best_model_checkpoint": "mit-b2-fv-finetuned-memes/checkpoint-180", |
|
"epoch": 19.987654320987655, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6187, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6e-05, |
|
"loss": 1.3683, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5703245749613601, |
|
"eval_f1": 0.4915430460832317, |
|
"eval_loss": 1.1798477172851562, |
|
"eval_precision": 0.4913898246935893, |
|
"eval_recall": 0.5703245749613601, |
|
"eval_runtime": 25.7297, |
|
"eval_samples_per_second": 50.292, |
|
"eval_steps_per_second": 0.816, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9e-05, |
|
"loss": 1.1618, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00012, |
|
"loss": 1.0113, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.615919629057187, |
|
"eval_f1": 0.6273659431894075, |
|
"eval_loss": 1.0383963584899902, |
|
"eval_precision": 0.6812645697652936, |
|
"eval_recall": 0.615919629057187, |
|
"eval_runtime": 24.3501, |
|
"eval_samples_per_second": 53.142, |
|
"eval_steps_per_second": 0.862, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00011666666666666667, |
|
"loss": 0.89, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00011333333333333333, |
|
"loss": 0.7581, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.6808346213292118, |
|
"eval_f1": 0.6839700359928248, |
|
"eval_loss": 0.834769070148468, |
|
"eval_precision": 0.7376556278142583, |
|
"eval_recall": 0.6808346213292118, |
|
"eval_runtime": 25.9343, |
|
"eval_samples_per_second": 49.895, |
|
"eval_steps_per_second": 0.81, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.00011, |
|
"loss": 0.706, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.00010666666666666667, |
|
"loss": 0.6241, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.7712519319938176, |
|
"eval_f1": 0.7734801228569064, |
|
"eval_loss": 0.6034244894981384, |
|
"eval_precision": 0.7864230813661904, |
|
"eval_recall": 0.7712519319938176, |
|
"eval_runtime": 24.0546, |
|
"eval_samples_per_second": 53.794, |
|
"eval_steps_per_second": 0.873, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00010333333333333334, |
|
"loss": 0.574, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4999, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.794435857805255, |
|
"eval_f1": 0.7908928134527277, |
|
"eval_loss": 0.5480836033821106, |
|
"eval_precision": 0.7999661256925431, |
|
"eval_recall": 0.794435857805255, |
|
"eval_runtime": 24.7472, |
|
"eval_samples_per_second": 52.289, |
|
"eval_steps_per_second": 0.849, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 0.4429, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 0.3981, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.8021638330757341, |
|
"eval_f1": 0.8000125861001945, |
|
"eval_loss": 0.5253472328186035, |
|
"eval_precision": 0.8090800980369639, |
|
"eval_recall": 0.8021638330757341, |
|
"eval_runtime": 24.5494, |
|
"eval_samples_per_second": 52.71, |
|
"eval_steps_per_second": 0.855, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 9e-05, |
|
"loss": 0.3603, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 0.3484, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.8238021638330757, |
|
"eval_f1": 0.8146387182540739, |
|
"eval_loss": 0.46875712275505066, |
|
"eval_precision": 0.8147156146167328, |
|
"eval_recall": 0.8238021638330757, |
|
"eval_runtime": 25.2779, |
|
"eval_samples_per_second": 51.191, |
|
"eval_steps_per_second": 0.831, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 0.2789, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 7.999999999999999e-05, |
|
"loss": 0.3142, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_accuracy": 0.7867078825347759, |
|
"eval_f1": 0.7919733028920879, |
|
"eval_loss": 0.6245487928390503, |
|
"eval_precision": 0.820948058010093, |
|
"eval_recall": 0.7867078825347759, |
|
"eval_runtime": 24.2688, |
|
"eval_samples_per_second": 53.32, |
|
"eval_steps_per_second": 0.865, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 7.666666666666667e-05, |
|
"loss": 0.2513, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 7.333333333333334e-05, |
|
"loss": 0.2339, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.8361669242658424, |
|
"eval_f1": 0.8354791396567843, |
|
"eval_loss": 0.5053289532661438, |
|
"eval_precision": 0.8426050546923035, |
|
"eval_recall": 0.8361669242658424, |
|
"eval_runtime": 24.696, |
|
"eval_samples_per_second": 52.397, |
|
"eval_steps_per_second": 0.85, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 7.000000000000001e-05, |
|
"loss": 0.1999, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.2284, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.8230293663060279, |
|
"eval_f1": 0.8187153015149123, |
|
"eval_loss": 0.5069507360458374, |
|
"eval_precision": 0.822032270944375, |
|
"eval_recall": 0.8230293663060279, |
|
"eval_runtime": 23.6165, |
|
"eval_samples_per_second": 54.792, |
|
"eval_steps_per_second": 0.889, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 6.333333333333333e-05, |
|
"loss": 0.1861, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 6e-05, |
|
"loss": 0.1824, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.8006182380216383, |
|
"eval_f1": 0.8035059555919015, |
|
"eval_loss": 0.5779785513877869, |
|
"eval_precision": 0.8138172496848511, |
|
"eval_recall": 0.8006182380216383, |
|
"eval_runtime": 24.9222, |
|
"eval_samples_per_second": 51.922, |
|
"eval_steps_per_second": 0.843, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 5.6666666666666664e-05, |
|
"loss": 0.1647, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 0.1561, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_accuracy": 0.8253477588871716, |
|
"eval_f1": 0.8217716611197545, |
|
"eval_loss": 0.5429410338401794, |
|
"eval_precision": 0.8196794558105368, |
|
"eval_recall": 0.8253477588871716, |
|
"eval_runtime": 23.8337, |
|
"eval_samples_per_second": 54.293, |
|
"eval_steps_per_second": 0.881, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1551, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.1229, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.8330757341576507, |
|
"eval_f1": 0.8303358084478046, |
|
"eval_loss": 0.5324836373329163, |
|
"eval_precision": 0.8296069273511578, |
|
"eval_recall": 0.8330757341576507, |
|
"eval_runtime": 25.348, |
|
"eval_samples_per_second": 51.049, |
|
"eval_steps_per_second": 0.828, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.1208, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 3.9999999999999996e-05, |
|
"loss": 0.1232, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_accuracy": 0.8276661514683153, |
|
"eval_f1": 0.8273115902224707, |
|
"eval_loss": 0.5595067143440247, |
|
"eval_precision": 0.8290015047050906, |
|
"eval_recall": 0.8276661514683153, |
|
"eval_runtime": 23.6014, |
|
"eval_samples_per_second": 54.827, |
|
"eval_steps_per_second": 0.89, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 3.666666666666667e-05, |
|
"loss": 0.1204, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.118, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.8292117465224111, |
|
"eval_f1": 0.8298744039909668, |
|
"eval_loss": 0.5973792672157288, |
|
"eval_precision": 0.8344810455815268, |
|
"eval_recall": 0.8292117465224111, |
|
"eval_runtime": 24.4918, |
|
"eval_samples_per_second": 52.834, |
|
"eval_steps_per_second": 0.857, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1015, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.11, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_accuracy": 0.8253477588871716, |
|
"eval_f1": 0.8230916961516846, |
|
"eval_loss": 0.579598069190979, |
|
"eval_precision": 0.8228234989922505, |
|
"eval_recall": 0.8253477588871716, |
|
"eval_runtime": 23.8333, |
|
"eval_samples_per_second": 54.294, |
|
"eval_steps_per_second": 0.881, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.1037, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 0.0948, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.8346213292117465, |
|
"eval_f1": 0.8348916431445179, |
|
"eval_loss": 0.5581147074699402, |
|
"eval_precision": 0.8357545769977985, |
|
"eval_recall": 0.8346213292117465, |
|
"eval_runtime": 24.5732, |
|
"eval_samples_per_second": 52.659, |
|
"eval_steps_per_second": 0.855, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0933, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0985, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_accuracy": 0.8338485316846986, |
|
"eval_f1": 0.8318239397011512, |
|
"eval_loss": 0.569961428642273, |
|
"eval_precision": 0.830062297595451, |
|
"eval_recall": 0.8338485316846986, |
|
"eval_runtime": 24.4249, |
|
"eval_samples_per_second": 52.979, |
|
"eval_steps_per_second": 0.86, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 18.49, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 0.0877, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0821, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.8330757341576507, |
|
"eval_f1": 0.833525849625881, |
|
"eval_loss": 0.5755681395530701, |
|
"eval_precision": 0.8342801097840022, |
|
"eval_recall": 0.8330757341576507, |
|
"eval_runtime": 23.5787, |
|
"eval_samples_per_second": 54.88, |
|
"eval_steps_per_second": 0.891, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0843, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 0.0, |
|
"loss": 0.0813, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"eval_accuracy": 0.8323029366306027, |
|
"eval_f1": 0.831492653119617, |
|
"eval_loss": 0.5984169840812683, |
|
"eval_precision": 0.831217385971583, |
|
"eval_recall": 0.8323029366306027, |
|
"eval_runtime": 24.9692, |
|
"eval_samples_per_second": 51.824, |
|
"eval_steps_per_second": 0.841, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"step": 400, |
|
"total_flos": 1.1809647563061068e+19, |
|
"train_loss": 0.36638923436403276, |
|
"train_runtime": 5624.3012, |
|
"train_samples_per_second": 18.399, |
|
"train_steps_per_second": 0.071 |
|
} |
|
], |
|
"max_steps": 400, |
|
"num_train_epochs": 20, |
|
"total_flos": 1.1809647563061068e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|