google-vit-large-finetuned / trainer_state.json
emre570's picture
Upload 8 files
e4d7864 verified
{
"best_metric": 0.9358974358974359,
"best_model_checkpoint": "output-models/checkpoint-470",
"epoch": 40.0,
"eval_steps": 500,
"global_step": 1880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"step": 47,
"train_accuracy": 0.7072649572649573,
"train_loss": 0.6782774329185486,
"train_runtime": 13.5956,
"train_samples_per_second": 34.423,
"train_steps_per_second": 8.606
},
{
"epoch": 1.0,
"eval_accuracy": 0.7243589743589743,
"eval_loss": 0.6180987358093262,
"eval_runtime": 35.9375,
"eval_samples_per_second": 4.341,
"eval_steps_per_second": 1.085,
"step": 47
},
{
"epoch": 2.0,
"step": 94,
"train_accuracy": 0.75,
"train_loss": 0.5690865516662598,
"train_runtime": 13.7448,
"train_samples_per_second": 34.049,
"train_steps_per_second": 8.512
},
{
"epoch": 2.0,
"eval_accuracy": 0.8012820512820513,
"eval_loss": 0.5130882263183594,
"eval_runtime": 4.4157,
"eval_samples_per_second": 35.328,
"eval_steps_per_second": 8.832,
"step": 94
},
{
"epoch": 3.0,
"step": 141,
"train_accuracy": 0.7350427350427351,
"train_loss": 0.6460429430007935,
"train_runtime": 14.0717,
"train_samples_per_second": 33.258,
"train_steps_per_second": 8.315
},
{
"epoch": 3.0,
"eval_accuracy": 0.8205128205128205,
"eval_loss": 0.45179083943367004,
"eval_runtime": 4.8055,
"eval_samples_per_second": 32.463,
"eval_steps_per_second": 8.116,
"step": 141
},
{
"epoch": 4.0,
"step": 188,
"train_accuracy": 0.8098290598290598,
"train_loss": 0.39663246273994446,
"train_runtime": 13.7878,
"train_samples_per_second": 33.943,
"train_steps_per_second": 8.486
},
{
"epoch": 4.0,
"eval_accuracy": 0.8846153846153846,
"eval_loss": 0.2980358898639679,
"eval_runtime": 4.4674,
"eval_samples_per_second": 34.919,
"eval_steps_per_second": 8.73,
"step": 188
},
{
"epoch": 5.0,
"step": 235,
"train_accuracy": 0.8632478632478633,
"train_loss": 0.3636291027069092,
"train_runtime": 13.58,
"train_samples_per_second": 34.462,
"train_steps_per_second": 8.616
},
{
"epoch": 5.0,
"eval_accuracy": 0.8717948717948718,
"eval_loss": 0.2997760772705078,
"eval_runtime": 4.9986,
"eval_samples_per_second": 31.208,
"eval_steps_per_second": 7.802,
"step": 235
},
{
"epoch": 6.0,
"step": 282,
"train_accuracy": 0.8376068376068376,
"train_loss": 0.4208720922470093,
"train_runtime": 13.5735,
"train_samples_per_second": 34.479,
"train_steps_per_second": 8.62
},
{
"epoch": 6.0,
"eval_accuracy": 0.9102564102564102,
"eval_loss": 0.33183348178863525,
"eval_runtime": 4.4091,
"eval_samples_per_second": 35.381,
"eval_steps_per_second": 8.845,
"step": 282
},
{
"epoch": 7.0,
"step": 329,
"train_accuracy": 0.8568376068376068,
"train_loss": 0.32207924127578735,
"train_runtime": 13.5635,
"train_samples_per_second": 34.504,
"train_steps_per_second": 8.626
},
{
"epoch": 7.0,
"eval_accuracy": 0.9166666666666666,
"eval_loss": 0.24086996912956238,
"eval_runtime": 4.7402,
"eval_samples_per_second": 32.91,
"eval_steps_per_second": 8.228,
"step": 329
},
{
"epoch": 8.0,
"step": 376,
"train_accuracy": 0.8846153846153846,
"train_loss": 0.3257001042366028,
"train_runtime": 13.9348,
"train_samples_per_second": 33.585,
"train_steps_per_second": 8.396
},
{
"epoch": 8.0,
"eval_accuracy": 0.8717948717948718,
"eval_loss": 0.3424080014228821,
"eval_runtime": 4.9539,
"eval_samples_per_second": 31.49,
"eval_steps_per_second": 7.873,
"step": 376
},
{
"epoch": 9.0,
"step": 423,
"train_accuracy": 0.8717948717948718,
"train_loss": 0.2687961757183075,
"train_runtime": 13.6829,
"train_samples_per_second": 34.203,
"train_steps_per_second": 8.551
},
{
"epoch": 9.0,
"eval_accuracy": 0.9166666666666666,
"eval_loss": 0.25928938388824463,
"eval_runtime": 4.4508,
"eval_samples_per_second": 35.05,
"eval_steps_per_second": 8.762,
"step": 423
},
{
"epoch": 10.0,
"step": 470,
"train_accuracy": 0.9102564102564102,
"train_loss": 0.2113831341266632,
"train_runtime": 13.44,
"train_samples_per_second": 34.821,
"train_steps_per_second": 8.705
},
{
"epoch": 10.0,
"eval_accuracy": 0.9358974358974359,
"eval_loss": 0.25084006786346436,
"eval_runtime": 4.8092,
"eval_samples_per_second": 32.438,
"eval_steps_per_second": 8.109,
"step": 470
},
{
"epoch": 10.64,
"grad_norm": 7.874776840209961,
"learning_rate": 1.4680851063829789e-05,
"loss": 0.4595,
"step": 500
},
{
"epoch": 11.0,
"step": 517,
"train_accuracy": 0.8995726495726496,
"train_loss": 0.25938984751701355,
"train_runtime": 13.5841,
"train_samples_per_second": 34.452,
"train_steps_per_second": 8.613
},
{
"epoch": 11.0,
"eval_accuracy": 0.9294871794871795,
"eval_loss": 0.32409772276878357,
"eval_runtime": 4.4182,
"eval_samples_per_second": 35.308,
"eval_steps_per_second": 8.827,
"step": 517
},
{
"epoch": 12.0,
"step": 564,
"train_accuracy": 0.8952991452991453,
"train_loss": 0.26763853430747986,
"train_runtime": 13.8203,
"train_samples_per_second": 33.863,
"train_steps_per_second": 8.466
},
{
"epoch": 12.0,
"eval_accuracy": 0.8910256410256411,
"eval_loss": 0.3308241069316864,
"eval_runtime": 4.4447,
"eval_samples_per_second": 35.098,
"eval_steps_per_second": 8.774,
"step": 564
},
{
"epoch": 13.0,
"step": 611,
"train_accuracy": 0.9081196581196581,
"train_loss": 0.23129615187644958,
"train_runtime": 13.4973,
"train_samples_per_second": 34.674,
"train_steps_per_second": 8.668
},
{
"epoch": 13.0,
"eval_accuracy": 0.9294871794871795,
"eval_loss": 0.255931556224823,
"eval_runtime": 4.6436,
"eval_samples_per_second": 33.595,
"eval_steps_per_second": 8.399,
"step": 611
},
{
"epoch": 14.0,
"step": 658,
"train_accuracy": 0.8846153846153846,
"train_loss": 0.31185245513916016,
"train_runtime": 13.5189,
"train_samples_per_second": 34.618,
"train_steps_per_second": 8.655
},
{
"epoch": 14.0,
"eval_accuracy": 0.9166666666666666,
"eval_loss": 0.3017214834690094,
"eval_runtime": 4.6078,
"eval_samples_per_second": 33.856,
"eval_steps_per_second": 8.464,
"step": 658
},
{
"epoch": 15.0,
"step": 705,
"train_accuracy": 0.9166666666666666,
"train_loss": 0.3007480502128601,
"train_runtime": 13.4373,
"train_samples_per_second": 34.829,
"train_steps_per_second": 8.707
},
{
"epoch": 15.0,
"eval_accuracy": 0.9038461538461539,
"eval_loss": 0.4220944046974182,
"eval_runtime": 4.9304,
"eval_samples_per_second": 31.64,
"eval_steps_per_second": 7.91,
"step": 705
},
{
"epoch": 16.0,
"step": 752,
"train_accuracy": 0.9188034188034188,
"train_loss": 0.20939397811889648,
"train_runtime": 13.6683,
"train_samples_per_second": 34.24,
"train_steps_per_second": 8.56
},
{
"epoch": 16.0,
"eval_accuracy": 0.9166666666666666,
"eval_loss": 0.36174264550209045,
"eval_runtime": 4.4608,
"eval_samples_per_second": 34.971,
"eval_steps_per_second": 8.743,
"step": 752
},
{
"epoch": 17.0,
"step": 799,
"train_accuracy": 0.9209401709401709,
"train_loss": 0.18879051506519318,
"train_runtime": 13.7929,
"train_samples_per_second": 33.931,
"train_steps_per_second": 8.483
},
{
"epoch": 17.0,
"eval_accuracy": 0.9102564102564102,
"eval_loss": 0.35188791155815125,
"eval_runtime": 4.7732,
"eval_samples_per_second": 32.683,
"eval_steps_per_second": 8.171,
"step": 799
},
{
"epoch": 18.0,
"step": 846,
"train_accuracy": 0.8952991452991453,
"train_loss": 0.25016605854034424,
"train_runtime": 13.3521,
"train_samples_per_second": 35.051,
"train_steps_per_second": 8.763
},
{
"epoch": 18.0,
"eval_accuracy": 0.9102564102564102,
"eval_loss": 0.3965354859828949,
"eval_runtime": 4.3963,
"eval_samples_per_second": 35.485,
"eval_steps_per_second": 8.871,
"step": 846
},
{
"epoch": 19.0,
"step": 893,
"train_accuracy": 0.9209401709401709,
"train_loss": 0.1891285479068756,
"train_runtime": 13.4884,
"train_samples_per_second": 34.696,
"train_steps_per_second": 8.674
},
{
"epoch": 19.0,
"eval_accuracy": 0.9038461538461539,
"eval_loss": 0.31604066491127014,
"eval_runtime": 5.1415,
"eval_samples_per_second": 30.341,
"eval_steps_per_second": 7.585,
"step": 893
},
{
"epoch": 20.0,
"step": 940,
"train_accuracy": 0.9401709401709402,
"train_loss": 0.1873449832201004,
"train_runtime": 13.9057,
"train_samples_per_second": 33.655,
"train_steps_per_second": 8.414
},
{
"epoch": 20.0,
"eval_accuracy": 0.9294871794871795,
"eval_loss": 0.3332672119140625,
"eval_runtime": 4.9421,
"eval_samples_per_second": 31.565,
"eval_steps_per_second": 7.891,
"step": 940
},
{
"epoch": 21.0,
"step": 987,
"train_accuracy": 0.9230769230769231,
"train_loss": 0.18881197273731232,
"train_runtime": 13.5338,
"train_samples_per_second": 34.58,
"train_steps_per_second": 8.645
},
{
"epoch": 21.0,
"eval_accuracy": 0.8910256410256411,
"eval_loss": 0.3720751404762268,
"eval_runtime": 4.8223,
"eval_samples_per_second": 32.35,
"eval_steps_per_second": 8.088,
"step": 987
},
{
"epoch": 21.28,
"grad_norm": 5.682499408721924,
"learning_rate": 9.361702127659576e-06,
"loss": 0.2485,
"step": 1000
},
{
"epoch": 22.0,
"step": 1034,
"train_accuracy": 0.9444444444444444,
"train_loss": 0.1338244080543518,
"train_runtime": 13.6664,
"train_samples_per_second": 34.245,
"train_steps_per_second": 8.561
},
{
"epoch": 22.0,
"eval_accuracy": 0.9166666666666666,
"eval_loss": 0.3777410686016083,
"eval_runtime": 4.3337,
"eval_samples_per_second": 35.997,
"eval_steps_per_second": 8.999,
"step": 1034
},
{
"epoch": 23.0,
"step": 1081,
"train_accuracy": 0.9252136752136753,
"train_loss": 0.18711484968662262,
"train_runtime": 13.702,
"train_samples_per_second": 34.155,
"train_steps_per_second": 8.539
},
{
"epoch": 23.0,
"eval_accuracy": 0.9038461538461539,
"eval_loss": 0.3984796702861786,
"eval_runtime": 4.5267,
"eval_samples_per_second": 34.462,
"eval_steps_per_second": 8.616,
"step": 1081
},
{
"epoch": 24.0,
"step": 1128,
"train_accuracy": 0.9444444444444444,
"train_loss": 0.161672905087471,
"train_runtime": 13.9523,
"train_samples_per_second": 33.543,
"train_steps_per_second": 8.386
},
{
"epoch": 24.0,
"eval_accuracy": 0.9230769230769231,
"eval_loss": 0.38641923666000366,
"eval_runtime": 5.1128,
"eval_samples_per_second": 30.511,
"eval_steps_per_second": 7.628,
"step": 1128
},
{
"epoch": 25.0,
"step": 1175,
"train_accuracy": 0.9444444444444444,
"train_loss": 0.17891307175159454,
"train_runtime": 13.7483,
"train_samples_per_second": 34.041,
"train_steps_per_second": 8.51
},
{
"epoch": 25.0,
"eval_accuracy": 0.9230769230769231,
"eval_loss": 0.42098188400268555,
"eval_runtime": 5.0358,
"eval_samples_per_second": 30.978,
"eval_steps_per_second": 7.745,
"step": 1175
},
{
"epoch": 26.0,
"step": 1222,
"train_accuracy": 0.9572649572649573,
"train_loss": 0.10899731516838074,
"train_runtime": 14.0913,
"train_samples_per_second": 33.212,
"train_steps_per_second": 8.303
},
{
"epoch": 26.0,
"eval_accuracy": 0.9038461538461539,
"eval_loss": 0.4160342216491699,
"eval_runtime": 4.7918,
"eval_samples_per_second": 32.555,
"eval_steps_per_second": 8.139,
"step": 1222
},
{
"epoch": 27.0,
"step": 1269,
"train_accuracy": 0.938034188034188,
"train_loss": 0.16018715500831604,
"train_runtime": 13.888,
"train_samples_per_second": 33.698,
"train_steps_per_second": 8.425
},
{
"epoch": 27.0,
"eval_accuracy": 0.9102564102564102,
"eval_loss": 0.39854034781455994,
"eval_runtime": 4.8553,
"eval_samples_per_second": 32.13,
"eval_steps_per_second": 8.032,
"step": 1269
},
{
"epoch": 28.0,
"step": 1316,
"train_accuracy": 0.9444444444444444,
"train_loss": 0.14988763630390167,
"train_runtime": 13.7687,
"train_samples_per_second": 33.99,
"train_steps_per_second": 8.498
},
{
"epoch": 28.0,
"eval_accuracy": 0.9102564102564102,
"eval_loss": 0.40767335891723633,
"eval_runtime": 4.434,
"eval_samples_per_second": 35.182,
"eval_steps_per_second": 8.796,
"step": 1316
},
{
"epoch": 29.0,
"step": 1363,
"train_accuracy": 0.9316239316239316,
"train_loss": 0.17876969277858734,
"train_runtime": 13.3686,
"train_samples_per_second": 35.008,
"train_steps_per_second": 8.752
},
{
"epoch": 29.0,
"eval_accuracy": 0.8782051282051282,
"eval_loss": 0.6035234928131104,
"eval_runtime": 4.8916,
"eval_samples_per_second": 31.891,
"eval_steps_per_second": 7.973,
"step": 1363
},
{
"epoch": 30.0,
"step": 1410,
"train_accuracy": 0.9572649572649573,
"train_loss": 0.12170404940843582,
"train_runtime": 13.3084,
"train_samples_per_second": 35.166,
"train_steps_per_second": 8.791
},
{
"epoch": 30.0,
"eval_accuracy": 0.9230769230769231,
"eval_loss": 0.3604837954044342,
"eval_runtime": 4.5664,
"eval_samples_per_second": 34.163,
"eval_steps_per_second": 8.541,
"step": 1410
},
{
"epoch": 31.0,
"step": 1457,
"train_accuracy": 0.9444444444444444,
"train_loss": 0.17193935811519623,
"train_runtime": 13.3567,
"train_samples_per_second": 35.039,
"train_steps_per_second": 8.76
},
{
"epoch": 31.0,
"eval_accuracy": 0.9038461538461539,
"eval_loss": 0.42400404810905457,
"eval_runtime": 4.3974,
"eval_samples_per_second": 35.475,
"eval_steps_per_second": 8.869,
"step": 1457
},
{
"epoch": 31.91,
"grad_norm": 0.40821418166160583,
"learning_rate": 4.042553191489362e-06,
"loss": 0.1715,
"step": 1500
},
{
"epoch": 32.0,
"step": 1504,
"train_accuracy": 0.9551282051282052,
"train_loss": 0.15071353316307068,
"train_runtime": 13.338,
"train_samples_per_second": 35.088,
"train_steps_per_second": 8.772
},
{
"epoch": 32.0,
"eval_accuracy": 0.9166666666666666,
"eval_loss": 0.3800322711467743,
"eval_runtime": 4.4234,
"eval_samples_per_second": 35.267,
"eval_steps_per_second": 8.817,
"step": 1504
},
{
"epoch": 33.0,
"step": 1551,
"train_accuracy": 0.9423076923076923,
"train_loss": 0.15298214554786682,
"train_runtime": 13.6206,
"train_samples_per_second": 34.36,
"train_steps_per_second": 8.59
},
{
"epoch": 33.0,
"eval_accuracy": 0.9038461538461539,
"eval_loss": 0.42538413405418396,
"eval_runtime": 4.5076,
"eval_samples_per_second": 34.608,
"eval_steps_per_second": 8.652,
"step": 1551
},
{
"epoch": 34.0,
"step": 1598,
"train_accuracy": 0.9615384615384616,
"train_loss": 0.09072276204824448,
"train_runtime": 13.7151,
"train_samples_per_second": 34.123,
"train_steps_per_second": 8.531
},
{
"epoch": 34.0,
"eval_accuracy": 0.9230769230769231,
"eval_loss": 0.41397902369499207,
"eval_runtime": 4.448,
"eval_samples_per_second": 35.072,
"eval_steps_per_second": 8.768,
"step": 1598
},
{
"epoch": 35.0,
"step": 1645,
"train_accuracy": 0.9594017094017094,
"train_loss": 0.15199129283428192,
"train_runtime": 13.6214,
"train_samples_per_second": 34.358,
"train_steps_per_second": 8.589
},
{
"epoch": 35.0,
"eval_accuracy": 0.9230769230769231,
"eval_loss": 0.39104607701301575,
"eval_runtime": 4.4304,
"eval_samples_per_second": 35.211,
"eval_steps_per_second": 8.803,
"step": 1645
},
{
"epoch": 36.0,
"step": 1692,
"train_accuracy": 0.9594017094017094,
"train_loss": 0.134719118475914,
"train_runtime": 13.9053,
"train_samples_per_second": 33.656,
"train_steps_per_second": 8.414
},
{
"epoch": 36.0,
"eval_accuracy": 0.9102564102564102,
"eval_loss": 0.4161369204521179,
"eval_runtime": 4.6871,
"eval_samples_per_second": 33.283,
"eval_steps_per_second": 8.321,
"step": 1692
},
{
"epoch": 37.0,
"step": 1739,
"train_accuracy": 0.9529914529914529,
"train_loss": 0.16535791754722595,
"train_runtime": 14.0143,
"train_samples_per_second": 33.394,
"train_steps_per_second": 8.349
},
{
"epoch": 37.0,
"eval_accuracy": 0.9102564102564102,
"eval_loss": 0.43847039341926575,
"eval_runtime": 4.8404,
"eval_samples_per_second": 32.229,
"eval_steps_per_second": 8.057,
"step": 1739
},
{
"epoch": 38.0,
"step": 1786,
"train_accuracy": 0.9487179487179487,
"train_loss": 0.11996147781610489,
"train_runtime": 14.1086,
"train_samples_per_second": 33.171,
"train_steps_per_second": 8.293
},
{
"epoch": 38.0,
"eval_accuracy": 0.9166666666666666,
"eval_loss": 0.41302695870399475,
"eval_runtime": 4.7702,
"eval_samples_per_second": 32.703,
"eval_steps_per_second": 8.176,
"step": 1786
},
{
"epoch": 39.0,
"step": 1833,
"train_accuracy": 0.9529914529914529,
"train_loss": 0.17013560235500336,
"train_runtime": 13.8197,
"train_samples_per_second": 33.865,
"train_steps_per_second": 8.466
},
{
"epoch": 39.0,
"eval_accuracy": 0.9166666666666666,
"eval_loss": 0.4181523323059082,
"eval_runtime": 5.0402,
"eval_samples_per_second": 30.951,
"eval_steps_per_second": 7.738,
"step": 1833
},
{
"epoch": 40.0,
"step": 1880,
"train_accuracy": 0.9551282051282052,
"train_loss": 0.11466003954410553,
"train_runtime": 13.6881,
"train_samples_per_second": 34.19,
"train_steps_per_second": 8.548
},
{
"epoch": 40.0,
"eval_accuracy": 0.9166666666666666,
"eval_loss": 0.4133930802345276,
"eval_runtime": 4.7653,
"eval_samples_per_second": 32.736,
"eval_steps_per_second": 8.184,
"step": 1880
}
],
"logging_steps": 500,
"max_steps": 1880,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 5.128065177052447e+18,
"train_batch_size": 10,
"trial_name": null,
"trial_params": null
}