mit-b2-fv-finetuned-memes / trainer_state.json
paul
End of training
353a9d7
raw
history blame
12.6 kB
{
"best_metric": 0.8361669242658424,
"best_model_checkpoint": "mit-b2-fv-finetuned-memes/checkpoint-180",
"epoch": 19.987654320987655,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.49,
"learning_rate": 3e-05,
"loss": 1.6187,
"step": 10
},
{
"epoch": 0.99,
"learning_rate": 6e-05,
"loss": 1.3683,
"step": 20
},
{
"epoch": 0.99,
"eval_accuracy": 0.5703245749613601,
"eval_f1": 0.4915430460832317,
"eval_loss": 1.1798477172851562,
"eval_precision": 0.4913898246935893,
"eval_recall": 0.5703245749613601,
"eval_runtime": 25.7297,
"eval_samples_per_second": 50.292,
"eval_steps_per_second": 0.816,
"step": 20
},
{
"epoch": 1.49,
"learning_rate": 9e-05,
"loss": 1.1618,
"step": 30
},
{
"epoch": 1.99,
"learning_rate": 0.00012,
"loss": 1.0113,
"step": 40
},
{
"epoch": 1.99,
"eval_accuracy": 0.615919629057187,
"eval_f1": 0.6273659431894075,
"eval_loss": 1.0383963584899902,
"eval_precision": 0.6812645697652936,
"eval_recall": 0.615919629057187,
"eval_runtime": 24.3501,
"eval_samples_per_second": 53.142,
"eval_steps_per_second": 0.862,
"step": 40
},
{
"epoch": 2.49,
"learning_rate": 0.00011666666666666667,
"loss": 0.89,
"step": 50
},
{
"epoch": 2.99,
"learning_rate": 0.00011333333333333333,
"loss": 0.7581,
"step": 60
},
{
"epoch": 2.99,
"eval_accuracy": 0.6808346213292118,
"eval_f1": 0.6839700359928248,
"eval_loss": 0.834769070148468,
"eval_precision": 0.7376556278142583,
"eval_recall": 0.6808346213292118,
"eval_runtime": 25.9343,
"eval_samples_per_second": 49.895,
"eval_steps_per_second": 0.81,
"step": 60
},
{
"epoch": 3.49,
"learning_rate": 0.00011,
"loss": 0.706,
"step": 70
},
{
"epoch": 3.99,
"learning_rate": 0.00010666666666666667,
"loss": 0.6241,
"step": 80
},
{
"epoch": 3.99,
"eval_accuracy": 0.7712519319938176,
"eval_f1": 0.7734801228569064,
"eval_loss": 0.6034244894981384,
"eval_precision": 0.7864230813661904,
"eval_recall": 0.7712519319938176,
"eval_runtime": 24.0546,
"eval_samples_per_second": 53.794,
"eval_steps_per_second": 0.873,
"step": 80
},
{
"epoch": 4.49,
"learning_rate": 0.00010333333333333334,
"loss": 0.574,
"step": 90
},
{
"epoch": 4.99,
"learning_rate": 0.0001,
"loss": 0.4999,
"step": 100
},
{
"epoch": 4.99,
"eval_accuracy": 0.794435857805255,
"eval_f1": 0.7908928134527277,
"eval_loss": 0.5480836033821106,
"eval_precision": 0.7999661256925431,
"eval_recall": 0.794435857805255,
"eval_runtime": 24.7472,
"eval_samples_per_second": 52.289,
"eval_steps_per_second": 0.849,
"step": 100
},
{
"epoch": 5.49,
"learning_rate": 9.666666666666667e-05,
"loss": 0.4429,
"step": 110
},
{
"epoch": 5.99,
"learning_rate": 9.333333333333334e-05,
"loss": 0.3981,
"step": 120
},
{
"epoch": 5.99,
"eval_accuracy": 0.8021638330757341,
"eval_f1": 0.8000125861001945,
"eval_loss": 0.5253472328186035,
"eval_precision": 0.8090800980369639,
"eval_recall": 0.8021638330757341,
"eval_runtime": 24.5494,
"eval_samples_per_second": 52.71,
"eval_steps_per_second": 0.855,
"step": 120
},
{
"epoch": 6.49,
"learning_rate": 9e-05,
"loss": 0.3603,
"step": 130
},
{
"epoch": 6.99,
"learning_rate": 8.666666666666667e-05,
"loss": 0.3484,
"step": 140
},
{
"epoch": 6.99,
"eval_accuracy": 0.8238021638330757,
"eval_f1": 0.8146387182540739,
"eval_loss": 0.46875712275505066,
"eval_precision": 0.8147156146167328,
"eval_recall": 0.8238021638330757,
"eval_runtime": 25.2779,
"eval_samples_per_second": 51.191,
"eval_steps_per_second": 0.831,
"step": 140
},
{
"epoch": 7.49,
"learning_rate": 8.333333333333333e-05,
"loss": 0.2789,
"step": 150
},
{
"epoch": 7.99,
"learning_rate": 7.999999999999999e-05,
"loss": 0.3142,
"step": 160
},
{
"epoch": 7.99,
"eval_accuracy": 0.7867078825347759,
"eval_f1": 0.7919733028920879,
"eval_loss": 0.6245487928390503,
"eval_precision": 0.820948058010093,
"eval_recall": 0.7867078825347759,
"eval_runtime": 24.2688,
"eval_samples_per_second": 53.32,
"eval_steps_per_second": 0.865,
"step": 160
},
{
"epoch": 8.49,
"learning_rate": 7.666666666666667e-05,
"loss": 0.2513,
"step": 170
},
{
"epoch": 8.99,
"learning_rate": 7.333333333333334e-05,
"loss": 0.2339,
"step": 180
},
{
"epoch": 8.99,
"eval_accuracy": 0.8361669242658424,
"eval_f1": 0.8354791396567843,
"eval_loss": 0.5053289532661438,
"eval_precision": 0.8426050546923035,
"eval_recall": 0.8361669242658424,
"eval_runtime": 24.696,
"eval_samples_per_second": 52.397,
"eval_steps_per_second": 0.85,
"step": 180
},
{
"epoch": 9.49,
"learning_rate": 7.000000000000001e-05,
"loss": 0.1999,
"step": 190
},
{
"epoch": 9.99,
"learning_rate": 6.666666666666667e-05,
"loss": 0.2284,
"step": 200
},
{
"epoch": 9.99,
"eval_accuracy": 0.8230293663060279,
"eval_f1": 0.8187153015149123,
"eval_loss": 0.5069507360458374,
"eval_precision": 0.822032270944375,
"eval_recall": 0.8230293663060279,
"eval_runtime": 23.6165,
"eval_samples_per_second": 54.792,
"eval_steps_per_second": 0.889,
"step": 200
},
{
"epoch": 10.49,
"learning_rate": 6.333333333333333e-05,
"loss": 0.1861,
"step": 210
},
{
"epoch": 10.99,
"learning_rate": 6e-05,
"loss": 0.1824,
"step": 220
},
{
"epoch": 10.99,
"eval_accuracy": 0.8006182380216383,
"eval_f1": 0.8035059555919015,
"eval_loss": 0.5779785513877869,
"eval_precision": 0.8138172496848511,
"eval_recall": 0.8006182380216383,
"eval_runtime": 24.9222,
"eval_samples_per_second": 51.922,
"eval_steps_per_second": 0.843,
"step": 220
},
{
"epoch": 11.49,
"learning_rate": 5.6666666666666664e-05,
"loss": 0.1647,
"step": 230
},
{
"epoch": 11.99,
"learning_rate": 5.333333333333333e-05,
"loss": 0.1561,
"step": 240
},
{
"epoch": 11.99,
"eval_accuracy": 0.8253477588871716,
"eval_f1": 0.8217716611197545,
"eval_loss": 0.5429410338401794,
"eval_precision": 0.8196794558105368,
"eval_recall": 0.8253477588871716,
"eval_runtime": 23.8337,
"eval_samples_per_second": 54.293,
"eval_steps_per_second": 0.881,
"step": 240
},
{
"epoch": 12.49,
"learning_rate": 5e-05,
"loss": 0.1551,
"step": 250
},
{
"epoch": 12.99,
"learning_rate": 4.666666666666667e-05,
"loss": 0.1229,
"step": 260
},
{
"epoch": 12.99,
"eval_accuracy": 0.8330757341576507,
"eval_f1": 0.8303358084478046,
"eval_loss": 0.5324836373329163,
"eval_precision": 0.8296069273511578,
"eval_recall": 0.8330757341576507,
"eval_runtime": 25.348,
"eval_samples_per_second": 51.049,
"eval_steps_per_second": 0.828,
"step": 260
},
{
"epoch": 13.49,
"learning_rate": 4.3333333333333334e-05,
"loss": 0.1208,
"step": 270
},
{
"epoch": 13.99,
"learning_rate": 3.9999999999999996e-05,
"loss": 0.1232,
"step": 280
},
{
"epoch": 13.99,
"eval_accuracy": 0.8276661514683153,
"eval_f1": 0.8273115902224707,
"eval_loss": 0.5595067143440247,
"eval_precision": 0.8290015047050906,
"eval_recall": 0.8276661514683153,
"eval_runtime": 23.6014,
"eval_samples_per_second": 54.827,
"eval_steps_per_second": 0.89,
"step": 280
},
{
"epoch": 14.49,
"learning_rate": 3.666666666666667e-05,
"loss": 0.1204,
"step": 290
},
{
"epoch": 14.99,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.118,
"step": 300
},
{
"epoch": 14.99,
"eval_accuracy": 0.8292117465224111,
"eval_f1": 0.8298744039909668,
"eval_loss": 0.5973792672157288,
"eval_precision": 0.8344810455815268,
"eval_recall": 0.8292117465224111,
"eval_runtime": 24.4918,
"eval_samples_per_second": 52.834,
"eval_steps_per_second": 0.857,
"step": 300
},
{
"epoch": 15.49,
"learning_rate": 3e-05,
"loss": 0.1015,
"step": 310
},
{
"epoch": 15.99,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.11,
"step": 320
},
{
"epoch": 15.99,
"eval_accuracy": 0.8253477588871716,
"eval_f1": 0.8230916961516846,
"eval_loss": 0.579598069190979,
"eval_precision": 0.8228234989922505,
"eval_recall": 0.8253477588871716,
"eval_runtime": 23.8333,
"eval_samples_per_second": 54.294,
"eval_steps_per_second": 0.881,
"step": 320
},
{
"epoch": 16.49,
"learning_rate": 2.3333333333333336e-05,
"loss": 0.1037,
"step": 330
},
{
"epoch": 16.99,
"learning_rate": 1.9999999999999998e-05,
"loss": 0.0948,
"step": 340
},
{
"epoch": 16.99,
"eval_accuracy": 0.8346213292117465,
"eval_f1": 0.8348916431445179,
"eval_loss": 0.5581147074699402,
"eval_precision": 0.8357545769977985,
"eval_recall": 0.8346213292117465,
"eval_runtime": 24.5732,
"eval_samples_per_second": 52.659,
"eval_steps_per_second": 0.855,
"step": 340
},
{
"epoch": 17.49,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0933,
"step": 350
},
{
"epoch": 17.99,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.0985,
"step": 360
},
{
"epoch": 17.99,
"eval_accuracy": 0.8338485316846986,
"eval_f1": 0.8318239397011512,
"eval_loss": 0.569961428642273,
"eval_precision": 0.830062297595451,
"eval_recall": 0.8338485316846986,
"eval_runtime": 24.4249,
"eval_samples_per_second": 52.979,
"eval_steps_per_second": 0.86,
"step": 360
},
{
"epoch": 18.49,
"learning_rate": 9.999999999999999e-06,
"loss": 0.0877,
"step": 370
},
{
"epoch": 18.99,
"learning_rate": 6.666666666666667e-06,
"loss": 0.0821,
"step": 380
},
{
"epoch": 18.99,
"eval_accuracy": 0.8330757341576507,
"eval_f1": 0.833525849625881,
"eval_loss": 0.5755681395530701,
"eval_precision": 0.8342801097840022,
"eval_recall": 0.8330757341576507,
"eval_runtime": 23.5787,
"eval_samples_per_second": 54.88,
"eval_steps_per_second": 0.891,
"step": 380
},
{
"epoch": 19.49,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0843,
"step": 390
},
{
"epoch": 19.99,
"learning_rate": 0.0,
"loss": 0.0813,
"step": 400
},
{
"epoch": 19.99,
"eval_accuracy": 0.8323029366306027,
"eval_f1": 0.831492653119617,
"eval_loss": 0.5984169840812683,
"eval_precision": 0.831217385971583,
"eval_recall": 0.8323029366306027,
"eval_runtime": 24.9692,
"eval_samples_per_second": 51.824,
"eval_steps_per_second": 0.841,
"step": 400
},
{
"epoch": 19.99,
"step": 400,
"total_flos": 1.1809647563061068e+19,
"train_loss": 0.36638923436403276,
"train_runtime": 5624.3012,
"train_samples_per_second": 18.399,
"train_steps_per_second": 0.071
}
],
"max_steps": 400,
"num_train_epochs": 20,
"total_flos": 1.1809647563061068e+19,
"trial_name": null,
"trial_params": null
}