resnet-50-finetuned-barkley / trainer_state.json
alyzbane's picture
End of training
dbed317 verified
raw
history blame
20.7 kB
{
"best_metric": 0.8743740573152337,
"best_model_checkpoint": "resnet-50-finetuned-barkley\\checkpoint-950",
"epoch": 27.0,
"eval_steps": 500,
"global_step": 1026,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"train_accuracy": 0.16611842105263158
},
{
"epoch": 1.0,
"grad_norm": 1.3611191511154175,
"learning_rate": 1.3823166234831842e-06,
"loss": 1.6171,
"step": 38
},
{
"epoch": 1.0,
"eval_accuracy": 0.17376068376068377,
"eval_error_rate": 0.8262393162393162,
"eval_f1": 0.06643250805361339,
"eval_loss": 1.619532585144043,
"eval_precision": 0.06626276231021362,
"eval_recall": 0.1513157894736842,
"eval_runtime": 26.7886,
"eval_samples_per_second": 5.674,
"eval_steps_per_second": 0.187,
"eval_top1_accuracy": 0.1513157894736842,
"step": 38
},
{
"epoch": 2.0,
"train_accuracy": 0.15862573099415206
},
{
"epoch": 2.0,
"grad_norm": 1.6634056568145752,
"learning_rate": 3.058622191852561e-06,
"loss": 1.6149,
"step": 76
},
{
"epoch": 2.0,
"eval_accuracy": 0.17853192559074912,
"eval_error_rate": 0.8214680744092508,
"eval_f1": 0.08020327273951958,
"eval_loss": 1.6160310506820679,
"eval_precision": 0.29525518341307816,
"eval_recall": 0.15789473684210525,
"eval_runtime": 26.6794,
"eval_samples_per_second": 5.697,
"eval_steps_per_second": 0.187,
"eval_top1_accuracy": 0.15789473684210525,
"step": 76
},
{
"epoch": 3.0,
"train_accuracy": 0.1783625730994152
},
{
"epoch": 3.0,
"grad_norm": 1.5223407745361328,
"learning_rate": 5.625554080420859e-06,
"loss": 1.6119,
"step": 114
},
{
"epoch": 3.0,
"eval_accuracy": 0.1771794871794872,
"eval_error_rate": 0.8228205128205128,
"eval_f1": 0.08341165413533834,
"eval_loss": 1.6111546754837036,
"eval_precision": 0.08044783010156971,
"eval_recall": 0.15789473684210525,
"eval_runtime": 26.2938,
"eval_samples_per_second": 5.781,
"eval_steps_per_second": 0.19,
"eval_top1_accuracy": 0.15789473684210525,
"step": 114
},
{
"epoch": 4.0,
"train_accuracy": 0.20833333333333334
},
{
"epoch": 4.0,
"grad_norm": 1.3961833715438843,
"learning_rate": 8.771702474591739e-06,
"loss": 1.6041,
"step": 152
},
{
"epoch": 4.0,
"eval_accuracy": 0.21552538964303675,
"eval_error_rate": 0.7844746103569633,
"eval_f1": 0.14610577502522265,
"eval_loss": 1.6015431880950928,
"eval_precision": 0.4161313363641264,
"eval_recall": 0.19736842105263158,
"eval_runtime": 28.8611,
"eval_samples_per_second": 5.267,
"eval_steps_per_second": 0.173,
"eval_top1_accuracy": 0.19736842105263158,
"step": 152
},
{
"epoch": 5.0,
"train_accuracy": 0.24926900584795322
},
{
"epoch": 5.0,
"grad_norm": 1.5497902631759644,
"learning_rate": 1.2115389351475484e-05,
"loss": 1.5945,
"step": 190
},
{
"epoch": 5.0,
"eval_accuracy": 0.30917043740573147,
"eval_error_rate": 0.6908295625942685,
"eval_f1": 0.24280704310495774,
"eval_loss": 1.5894904136657715,
"eval_precision": 0.40893317853457173,
"eval_recall": 0.2894736842105263,
"eval_runtime": 27.1975,
"eval_samples_per_second": 5.589,
"eval_steps_per_second": 0.184,
"eval_top1_accuracy": 0.2894736842105263,
"step": 190
},
{
"epoch": 6.0,
"train_accuracy": 0.33260233918128657
},
{
"epoch": 6.0,
"grad_norm": 1.6292859315872192,
"learning_rate": 1.5250972116877936e-05,
"loss": 1.5777,
"step": 228
},
{
"epoch": 6.0,
"eval_accuracy": 0.4663499245852186,
"eval_error_rate": 0.5336500754147814,
"eval_f1": 0.39436167836929775,
"eval_loss": 1.5709806680679321,
"eval_precision": 0.5764411027568922,
"eval_recall": 0.4407894736842105,
"eval_runtime": 26.1121,
"eval_samples_per_second": 5.821,
"eval_steps_per_second": 0.191,
"eval_top1_accuracy": 0.4407894736842105,
"step": 228
},
{
"epoch": 7.0,
"train_accuracy": 0.4093567251461988
},
{
"epoch": 7.0,
"grad_norm": 1.4761221408843994,
"learning_rate": 1.7798054527340503e-05,
"loss": 1.561,
"step": 266
},
{
"epoch": 7.0,
"eval_accuracy": 0.5173303167420814,
"eval_error_rate": 0.4826696832579186,
"eval_f1": 0.45158707610515547,
"eval_loss": 1.5490015745162964,
"eval_precision": 0.6013273211996122,
"eval_recall": 0.4934210526315789,
"eval_runtime": 27.8381,
"eval_samples_per_second": 5.46,
"eval_steps_per_second": 0.18,
"eval_top1_accuracy": 0.5,
"step": 266
},
{
"epoch": 8.0,
"train_accuracy": 0.47733918128654973
},
{
"epoch": 8.0,
"grad_norm": 1.691532850265503,
"learning_rate": 1.9447634826822778e-05,
"loss": 1.536,
"step": 304
},
{
"epoch": 8.0,
"eval_accuracy": 0.5450075414781297,
"eval_error_rate": 0.4549924585218703,
"eval_f1": 0.4711077818086626,
"eval_loss": 1.5221654176712036,
"eval_precision": 0.63772879167616,
"eval_recall": 0.5131578947368421,
"eval_runtime": 26.2831,
"eval_samples_per_second": 5.783,
"eval_steps_per_second": 0.19,
"eval_top1_accuracy": 0.5131578947368421,
"step": 304
},
{
"epoch": 9.0,
"train_accuracy": 0.5372807017543859
},
{
"epoch": 9.0,
"grad_norm": 1.69563889503479,
"learning_rate": 1.9999922507143676e-05,
"loss": 1.5081,
"step": 342
},
{
"epoch": 9.0,
"eval_accuracy": 0.625032679738562,
"eval_error_rate": 0.374967320261438,
"eval_f1": 0.5868892868172693,
"eval_loss": 1.4911595582962036,
"eval_precision": 0.7594769272604533,
"eval_recall": 0.5986842105263158,
"eval_runtime": 27.44,
"eval_samples_per_second": 5.539,
"eval_steps_per_second": 0.182,
"eval_top1_accuracy": 0.5986842105263158,
"step": 342
},
{
"epoch": 10.0,
"train_accuracy": 0.5957602339181286
},
{
"epoch": 10.0,
"grad_norm": 1.694950819015503,
"learning_rate": 1.9882364575351117e-05,
"loss": 1.4756,
"step": 380
},
{
"epoch": 10.0,
"eval_accuracy": 0.6683459024635495,
"eval_error_rate": 0.3316540975364505,
"eval_f1": 0.6293044227869377,
"eval_loss": 1.4565558433532715,
"eval_precision": 0.7578870575407971,
"eval_recall": 0.6447368421052632,
"eval_runtime": 25.8912,
"eval_samples_per_second": 5.871,
"eval_steps_per_second": 0.193,
"eval_top1_accuracy": 0.6447368421052632,
"step": 380
},
{
"epoch": 11.0,
"train_accuracy": 0.6111111111111112
},
{
"epoch": 11.0,
"grad_norm": 1.591986894607544,
"learning_rate": 1.9544051842595e-05,
"loss": 1.4387,
"step": 418
},
{
"epoch": 11.0,
"eval_accuracy": 0.698541980894922,
"eval_error_rate": 0.30145801910507797,
"eval_f1": 0.6692242337767512,
"eval_loss": 1.4155722856521606,
"eval_precision": 0.7913987173816895,
"eval_recall": 0.6776315789473685,
"eval_runtime": 26.7774,
"eval_samples_per_second": 5.676,
"eval_steps_per_second": 0.187,
"eval_top1_accuracy": 0.6776315789473685,
"step": 418
},
{
"epoch": 12.0,
"train_accuracy": 0.6469298245614035
},
{
"epoch": 12.0,
"grad_norm": 1.8263485431671143,
"learning_rate": 1.9009692640269474e-05,
"loss": 1.3993,
"step": 456
},
{
"epoch": 12.0,
"eval_accuracy": 0.7080442433383609,
"eval_error_rate": 0.2919557566616391,
"eval_f1": 0.6731791852575238,
"eval_loss": 1.3737214803695679,
"eval_precision": 0.7997336700204268,
"eval_recall": 0.6842105263157895,
"eval_runtime": 25.8778,
"eval_samples_per_second": 5.874,
"eval_steps_per_second": 0.193,
"eval_top1_accuracy": 0.6842105263157895,
"step": 456
},
{
"epoch": 13.0,
"train_accuracy": 0.6527777777777778
},
{
"epoch": 13.0,
"grad_norm": 1.8651676177978516,
"learning_rate": 1.826239469360898e-05,
"loss": 1.358,
"step": 494
},
{
"epoch": 13.0,
"eval_accuracy": 0.7232277526395173,
"eval_error_rate": 0.27677224736048267,
"eval_f1": 0.7048030719072474,
"eval_loss": 1.3288253545761108,
"eval_precision": 0.8290264820356354,
"eval_recall": 0.7039473684210527,
"eval_runtime": 26.9677,
"eval_samples_per_second": 5.636,
"eval_steps_per_second": 0.185,
"eval_top1_accuracy": 0.7039473684210527,
"step": 494
},
{
"epoch": 14.0,
"train_accuracy": 0.7017543859649122
},
{
"epoch": 14.0,
"grad_norm": 2.0590460300445557,
"learning_rate": 1.733052939622339e-05,
"loss": 1.3139,
"step": 532
},
{
"epoch": 14.0,
"eval_accuracy": 0.7592207139265963,
"eval_error_rate": 0.2407792860734037,
"eval_f1": 0.7373063276167947,
"eval_loss": 1.2805979251861572,
"eval_precision": 0.8276704808408175,
"eval_recall": 0.743421052631579,
"eval_runtime": 24.8464,
"eval_samples_per_second": 6.118,
"eval_steps_per_second": 0.201,
"eval_top1_accuracy": 0.75,
"step": 532
},
{
"epoch": 15.0,
"train_accuracy": 0.7185672514619883
},
{
"epoch": 15.0,
"grad_norm": 2.4823436737060547,
"learning_rate": 1.6234913078995263e-05,
"loss": 1.262,
"step": 570
},
{
"epoch": 15.0,
"eval_accuracy": 0.7828808446455506,
"eval_error_rate": 0.21711915535444937,
"eval_f1": 0.7664102924938837,
"eval_loss": 1.2344970703125,
"eval_precision": 0.8477553968860445,
"eval_recall": 0.7697368421052632,
"eval_runtime": 26.3117,
"eval_samples_per_second": 5.777,
"eval_steps_per_second": 0.19,
"eval_top1_accuracy": 0.7697368421052632,
"step": 570
},
{
"epoch": 16.0,
"train_accuracy": 0.7236842105263158
},
{
"epoch": 16.0,
"grad_norm": 2.0972795486450195,
"learning_rate": 1.5000020000000002e-05,
"loss": 1.2184,
"step": 608
},
{
"epoch": 16.0,
"eval_accuracy": 0.7817697335344395,
"eval_error_rate": 0.21823026646556054,
"eval_f1": 0.7654284912445182,
"eval_loss": 1.1887174844741821,
"eval_precision": 0.8322669418644651,
"eval_recall": 0.7697368421052632,
"eval_runtime": 25.3816,
"eval_samples_per_second": 5.989,
"eval_steps_per_second": 0.197,
"eval_top1_accuracy": 0.7697368421052632,
"step": 608
},
{
"epoch": 17.0,
"train_accuracy": 0.7448830409356725
},
{
"epoch": 17.0,
"grad_norm": 1.9592589139938354,
"learning_rate": 1.365343563002298e-05,
"loss": 1.1803,
"step": 646
},
{
"epoch": 17.0,
"eval_accuracy": 0.7930819507290096,
"eval_error_rate": 0.20691804927099045,
"eval_f1": 0.773531629357028,
"eval_loss": 1.1408498287200928,
"eval_precision": 0.8423109913821989,
"eval_recall": 0.7763157894736842,
"eval_runtime": 26.1892,
"eval_samples_per_second": 5.804,
"eval_steps_per_second": 0.191,
"eval_top1_accuracy": 0.7763157894736842,
"step": 646
},
{
"epoch": 18.0,
"train_accuracy": 0.7580409356725146
},
{
"epoch": 18.0,
"grad_norm": 2.348762035369873,
"learning_rate": 1.2225240438725788e-05,
"loss": 1.1422,
"step": 684
},
{
"epoch": 18.0,
"eval_accuracy": 0.83166918049271,
"eval_error_rate": 0.16833081950729,
"eval_f1": 0.810022076785411,
"eval_loss": 1.096580147743225,
"eval_precision": 0.8594414607948442,
"eval_recall": 0.8157894736842105,
"eval_runtime": 25.0285,
"eval_samples_per_second": 6.073,
"eval_steps_per_second": 0.2,
"eval_top1_accuracy": 0.8157894736842105,
"step": 684
},
{
"epoch": 19.0,
"train_accuracy": 0.7733918128654971
},
{
"epoch": 19.0,
"grad_norm": 2.2395124435424805,
"learning_rate": 1.0747337946660503e-05,
"loss": 1.1032,
"step": 722
},
{
"epoch": 19.0,
"eval_accuracy": 0.8144746103569632,
"eval_error_rate": 0.18552538964303678,
"eval_f1": 0.7968691491574786,
"eval_loss": 1.0586965084075928,
"eval_precision": 0.8430972766845342,
"eval_recall": 0.8026315789473685,
"eval_runtime": 25.4802,
"eval_samples_per_second": 5.965,
"eval_steps_per_second": 0.196,
"eval_top1_accuracy": 0.8026315789473685,
"step": 722
},
{
"epoch": 20.0,
"train_accuracy": 0.7850877192982456
},
{
"epoch": 20.0,
"grad_norm": 2.3096365928649902,
"learning_rate": 9.252742053339503e-06,
"loss": 1.058,
"step": 760
},
{
"epoch": 20.0,
"eval_accuracy": 0.8486626445449975,
"eval_error_rate": 0.1513373554550025,
"eval_f1": 0.8300500291649842,
"eval_loss": 1.0289386510849,
"eval_precision": 0.8609899749373433,
"eval_recall": 0.8355263157894737,
"eval_runtime": 27.6424,
"eval_samples_per_second": 5.499,
"eval_steps_per_second": 0.181,
"eval_top1_accuracy": 0.8355263157894737,
"step": 760
},
{
"epoch": 21.0,
"train_accuracy": 0.8084795321637427
},
{
"epoch": 21.0,
"grad_norm": 2.616567611694336,
"learning_rate": 7.774839561274216e-06,
"loss": 1.0252,
"step": 798
},
{
"epoch": 21.0,
"eval_accuracy": 0.8534338863750628,
"eval_error_rate": 0.1465661136249372,
"eval_f1": 0.8370304500033733,
"eval_loss": 0.9917795062065125,
"eval_precision": 0.8575724637681159,
"eval_recall": 0.8421052631578947,
"eval_runtime": 24.0656,
"eval_samples_per_second": 6.316,
"eval_steps_per_second": 0.208,
"eval_top1_accuracy": 0.8421052631578947,
"step": 798
},
{
"epoch": 22.0,
"train_accuracy": 0.814327485380117
},
{
"epoch": 22.0,
"grad_norm": 2.2312376499176025,
"learning_rate": 6.346644369977025e-06,
"loss": 1.002,
"step": 836
},
{
"epoch": 22.0,
"eval_accuracy": 0.8611261940673707,
"eval_error_rate": 0.13887380593262932,
"eval_f1": 0.8435398841932419,
"eval_loss": 0.9727317690849304,
"eval_precision": 0.8677232854864433,
"eval_recall": 0.8486842105263158,
"eval_runtime": 25.3651,
"eval_samples_per_second": 5.992,
"eval_steps_per_second": 0.197,
"eval_top1_accuracy": 0.8486842105263158,
"step": 836
},
{
"epoch": 23.0,
"train_accuracy": 0.8179824561403509
},
{
"epoch": 23.0,
"grad_norm": 2.2069149017333984,
"learning_rate": 5.000060000000003e-06,
"loss": 0.9812,
"step": 874
},
{
"epoch": 23.0,
"eval_accuracy": 0.8677928607340373,
"eval_error_rate": 0.13220713926596273,
"eval_f1": 0.8497099815147097,
"eval_loss": 0.9464592933654785,
"eval_precision": 0.8795061782362883,
"eval_recall": 0.8552631578947368,
"eval_runtime": 26.8567,
"eval_samples_per_second": 5.66,
"eval_steps_per_second": 0.186,
"eval_top1_accuracy": 0.8552631578947368,
"step": 874
},
{
"epoch": 24.0,
"train_accuracy": 0.8187134502923976
},
{
"epoch": 24.0,
"grad_norm": 2.32728910446167,
"learning_rate": 3.76516692100474e-06,
"loss": 0.9636,
"step": 912
},
{
"epoch": 24.0,
"eval_accuracy": 0.8699296128707893,
"eval_error_rate": 0.1300703871292107,
"eval_f1": 0.8485082765446309,
"eval_loss": 0.93310546875,
"eval_precision": 0.8820191260980734,
"eval_recall": 0.8552631578947368,
"eval_runtime": 27.6614,
"eval_samples_per_second": 5.495,
"eval_steps_per_second": 0.181,
"eval_top1_accuracy": 0.8552631578947368,
"step": 912
},
{
"epoch": 25.0,
"train_accuracy": 0.8150584795321637
},
{
"epoch": 25.0,
"grad_norm": 2.2066571712493896,
"learning_rate": 2.6963844978948743e-06,
"loss": 0.9591,
"step": 950
},
{
"epoch": 25.0,
"eval_accuracy": 0.8743740573152337,
"eval_error_rate": 0.12562594268476635,
"eval_f1": 0.8573701550510042,
"eval_loss": 0.92206209897995,
"eval_precision": 0.877970723615921,
"eval_recall": 0.8618421052631579,
"eval_runtime": 26.1743,
"eval_samples_per_second": 5.807,
"eval_steps_per_second": 0.191,
"eval_top1_accuracy": 0.8618421052631579,
"step": 950
},
{
"epoch": 26.0,
"train_accuracy": 0.8194444444444444
},
{
"epoch": 26.0,
"grad_norm": 2.345517873764038,
"learning_rate": 1.75992615737436e-06,
"loss": 0.948,
"step": 988
},
{
"epoch": 26.0,
"eval_accuracy": 0.8743740573152337,
"eval_error_rate": 0.12562594268476635,
"eval_f1": 0.8573701550510042,
"eval_loss": 0.9158027768135071,
"eval_precision": 0.877970723615921,
"eval_recall": 0.8618421052631579,
"eval_runtime": 26.0018,
"eval_samples_per_second": 5.846,
"eval_steps_per_second": 0.192,
"eval_top1_accuracy": 0.868421052631579,
"step": 988
},
{
"epoch": 27.0,
"train_accuracy": 0.8201754385964912
},
{
"epoch": 27.0,
"grad_norm": 2.246762990951538,
"learning_rate": 1.0075383437198693e-06,
"loss": 0.9384,
"step": 1026
},
{
"epoch": 27.0,
"eval_accuracy": 0.8601005530417295,
"eval_error_rate": 0.1398994469582705,
"eval_f1": 0.8431034082329946,
"eval_loss": 0.901735782623291,
"eval_precision": 0.8684637995623307,
"eval_recall": 0.8486842105263158,
"eval_runtime": 25.9225,
"eval_samples_per_second": 5.864,
"eval_steps_per_second": 0.193,
"eval_top1_accuracy": 0.8486842105263158,
"step": 1026
},
{
"epoch": 27.0,
"step": 1026,
"total_flos": 6.9738304117683e+17,
"train_loss": 1.2960130829095375,
"train_runtime": 5177.2299,
"train_samples_per_second": 7.046,
"train_steps_per_second": 0.22
}
],
"logging_steps": 500,
"max_steps": 1140,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 2
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.9738304117683e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}