|
{
|
|
"best_metric": 0.8743740573152337,
|
|
"best_model_checkpoint": "resnet-50-finetuned-barkley\\checkpoint-950",
|
|
"epoch": 27.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1026,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 1.0,
|
|
"train_accuracy": 0.16611842105263158
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 1.3611191511154175,
|
|
"learning_rate": 1.3823166234831842e-06,
|
|
"loss": 1.6171,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_accuracy": 0.17376068376068377,
|
|
"eval_error_rate": 0.8262393162393162,
|
|
"eval_f1": 0.06643250805361339,
|
|
"eval_loss": 1.619532585144043,
|
|
"eval_precision": 0.06626276231021362,
|
|
"eval_recall": 0.1513157894736842,
|
|
"eval_runtime": 26.7886,
|
|
"eval_samples_per_second": 5.674,
|
|
"eval_steps_per_second": 0.187,
|
|
"eval_top1_accuracy": 0.1513157894736842,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"train_accuracy": 0.15862573099415206
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 1.6634056568145752,
|
|
"learning_rate": 3.058622191852561e-06,
|
|
"loss": 1.6149,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_accuracy": 0.17853192559074912,
|
|
"eval_error_rate": 0.8214680744092508,
|
|
"eval_f1": 0.08020327273951958,
|
|
"eval_loss": 1.6160310506820679,
|
|
"eval_precision": 0.29525518341307816,
|
|
"eval_recall": 0.15789473684210525,
|
|
"eval_runtime": 26.6794,
|
|
"eval_samples_per_second": 5.697,
|
|
"eval_steps_per_second": 0.187,
|
|
"eval_top1_accuracy": 0.15789473684210525,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"train_accuracy": 0.1783625730994152
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 1.5223407745361328,
|
|
"learning_rate": 5.625554080420859e-06,
|
|
"loss": 1.6119,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_accuracy": 0.1771794871794872,
|
|
"eval_error_rate": 0.8228205128205128,
|
|
"eval_f1": 0.08341165413533834,
|
|
"eval_loss": 1.6111546754837036,
|
|
"eval_precision": 0.08044783010156971,
|
|
"eval_recall": 0.15789473684210525,
|
|
"eval_runtime": 26.2938,
|
|
"eval_samples_per_second": 5.781,
|
|
"eval_steps_per_second": 0.19,
|
|
"eval_top1_accuracy": 0.15789473684210525,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"train_accuracy": 0.20833333333333334
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 1.3961833715438843,
|
|
"learning_rate": 8.771702474591739e-06,
|
|
"loss": 1.6041,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_accuracy": 0.21552538964303675,
|
|
"eval_error_rate": 0.7844746103569633,
|
|
"eval_f1": 0.14610577502522265,
|
|
"eval_loss": 1.6015431880950928,
|
|
"eval_precision": 0.4161313363641264,
|
|
"eval_recall": 0.19736842105263158,
|
|
"eval_runtime": 28.8611,
|
|
"eval_samples_per_second": 5.267,
|
|
"eval_steps_per_second": 0.173,
|
|
"eval_top1_accuracy": 0.19736842105263158,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"train_accuracy": 0.24926900584795322
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 1.5497902631759644,
|
|
"learning_rate": 1.2115389351475484e-05,
|
|
"loss": 1.5945,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_accuracy": 0.30917043740573147,
|
|
"eval_error_rate": 0.6908295625942685,
|
|
"eval_f1": 0.24280704310495774,
|
|
"eval_loss": 1.5894904136657715,
|
|
"eval_precision": 0.40893317853457173,
|
|
"eval_recall": 0.2894736842105263,
|
|
"eval_runtime": 27.1975,
|
|
"eval_samples_per_second": 5.589,
|
|
"eval_steps_per_second": 0.184,
|
|
"eval_top1_accuracy": 0.2894736842105263,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"train_accuracy": 0.33260233918128657
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 1.6292859315872192,
|
|
"learning_rate": 1.5250972116877936e-05,
|
|
"loss": 1.5777,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"eval_accuracy": 0.4663499245852186,
|
|
"eval_error_rate": 0.5336500754147814,
|
|
"eval_f1": 0.39436167836929775,
|
|
"eval_loss": 1.5709806680679321,
|
|
"eval_precision": 0.5764411027568922,
|
|
"eval_recall": 0.4407894736842105,
|
|
"eval_runtime": 26.1121,
|
|
"eval_samples_per_second": 5.821,
|
|
"eval_steps_per_second": 0.191,
|
|
"eval_top1_accuracy": 0.4407894736842105,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"train_accuracy": 0.4093567251461988
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 1.4761221408843994,
|
|
"learning_rate": 1.7798054527340503e-05,
|
|
"loss": 1.561,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"eval_accuracy": 0.5173303167420814,
|
|
"eval_error_rate": 0.4826696832579186,
|
|
"eval_f1": 0.45158707610515547,
|
|
"eval_loss": 1.5490015745162964,
|
|
"eval_precision": 0.6013273211996122,
|
|
"eval_recall": 0.4934210526315789,
|
|
"eval_runtime": 27.8381,
|
|
"eval_samples_per_second": 5.46,
|
|
"eval_steps_per_second": 0.18,
|
|
"eval_top1_accuracy": 0.5,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"train_accuracy": 0.47733918128654973
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"grad_norm": 1.691532850265503,
|
|
"learning_rate": 1.9447634826822778e-05,
|
|
"loss": 1.536,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_accuracy": 0.5450075414781297,
|
|
"eval_error_rate": 0.4549924585218703,
|
|
"eval_f1": 0.4711077818086626,
|
|
"eval_loss": 1.5221654176712036,
|
|
"eval_precision": 0.63772879167616,
|
|
"eval_recall": 0.5131578947368421,
|
|
"eval_runtime": 26.2831,
|
|
"eval_samples_per_second": 5.783,
|
|
"eval_steps_per_second": 0.19,
|
|
"eval_top1_accuracy": 0.5131578947368421,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"train_accuracy": 0.5372807017543859
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"grad_norm": 1.69563889503479,
|
|
"learning_rate": 1.9999922507143676e-05,
|
|
"loss": 1.5081,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"eval_accuracy": 0.625032679738562,
|
|
"eval_error_rate": 0.374967320261438,
|
|
"eval_f1": 0.5868892868172693,
|
|
"eval_loss": 1.4911595582962036,
|
|
"eval_precision": 0.7594769272604533,
|
|
"eval_recall": 0.5986842105263158,
|
|
"eval_runtime": 27.44,
|
|
"eval_samples_per_second": 5.539,
|
|
"eval_steps_per_second": 0.182,
|
|
"eval_top1_accuracy": 0.5986842105263158,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"train_accuracy": 0.5957602339181286
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"grad_norm": 1.694950819015503,
|
|
"learning_rate": 1.9882364575351117e-05,
|
|
"loss": 1.4756,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"eval_accuracy": 0.6683459024635495,
|
|
"eval_error_rate": 0.3316540975364505,
|
|
"eval_f1": 0.6293044227869377,
|
|
"eval_loss": 1.4565558433532715,
|
|
"eval_precision": 0.7578870575407971,
|
|
"eval_recall": 0.6447368421052632,
|
|
"eval_runtime": 25.8912,
|
|
"eval_samples_per_second": 5.871,
|
|
"eval_steps_per_second": 0.193,
|
|
"eval_top1_accuracy": 0.6447368421052632,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"train_accuracy": 0.6111111111111112
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"grad_norm": 1.591986894607544,
|
|
"learning_rate": 1.9544051842595e-05,
|
|
"loss": 1.4387,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"eval_accuracy": 0.698541980894922,
|
|
"eval_error_rate": 0.30145801910507797,
|
|
"eval_f1": 0.6692242337767512,
|
|
"eval_loss": 1.4155722856521606,
|
|
"eval_precision": 0.7913987173816895,
|
|
"eval_recall": 0.6776315789473685,
|
|
"eval_runtime": 26.7774,
|
|
"eval_samples_per_second": 5.676,
|
|
"eval_steps_per_second": 0.187,
|
|
"eval_top1_accuracy": 0.6776315789473685,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"train_accuracy": 0.6469298245614035
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"grad_norm": 1.8263485431671143,
|
|
"learning_rate": 1.9009692640269474e-05,
|
|
"loss": 1.3993,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_accuracy": 0.7080442433383609,
|
|
"eval_error_rate": 0.2919557566616391,
|
|
"eval_f1": 0.6731791852575238,
|
|
"eval_loss": 1.3737214803695679,
|
|
"eval_precision": 0.7997336700204268,
|
|
"eval_recall": 0.6842105263157895,
|
|
"eval_runtime": 25.8778,
|
|
"eval_samples_per_second": 5.874,
|
|
"eval_steps_per_second": 0.193,
|
|
"eval_top1_accuracy": 0.6842105263157895,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"train_accuracy": 0.6527777777777778
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"grad_norm": 1.8651676177978516,
|
|
"learning_rate": 1.826239469360898e-05,
|
|
"loss": 1.358,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"eval_accuracy": 0.7232277526395173,
|
|
"eval_error_rate": 0.27677224736048267,
|
|
"eval_f1": 0.7048030719072474,
|
|
"eval_loss": 1.3288253545761108,
|
|
"eval_precision": 0.8290264820356354,
|
|
"eval_recall": 0.7039473684210527,
|
|
"eval_runtime": 26.9677,
|
|
"eval_samples_per_second": 5.636,
|
|
"eval_steps_per_second": 0.185,
|
|
"eval_top1_accuracy": 0.7039473684210527,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"train_accuracy": 0.7017543859649122
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"grad_norm": 2.0590460300445557,
|
|
"learning_rate": 1.733052939622339e-05,
|
|
"loss": 1.3139,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"eval_accuracy": 0.7592207139265963,
|
|
"eval_error_rate": 0.2407792860734037,
|
|
"eval_f1": 0.7373063276167947,
|
|
"eval_loss": 1.2805979251861572,
|
|
"eval_precision": 0.8276704808408175,
|
|
"eval_recall": 0.743421052631579,
|
|
"eval_runtime": 24.8464,
|
|
"eval_samples_per_second": 6.118,
|
|
"eval_steps_per_second": 0.201,
|
|
"eval_top1_accuracy": 0.75,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"train_accuracy": 0.7185672514619883
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"grad_norm": 2.4823436737060547,
|
|
"learning_rate": 1.6234913078995263e-05,
|
|
"loss": 1.262,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"eval_accuracy": 0.7828808446455506,
|
|
"eval_error_rate": 0.21711915535444937,
|
|
"eval_f1": 0.7664102924938837,
|
|
"eval_loss": 1.2344970703125,
|
|
"eval_precision": 0.8477553968860445,
|
|
"eval_recall": 0.7697368421052632,
|
|
"eval_runtime": 26.3117,
|
|
"eval_samples_per_second": 5.777,
|
|
"eval_steps_per_second": 0.19,
|
|
"eval_top1_accuracy": 0.7697368421052632,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"train_accuracy": 0.7236842105263158
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"grad_norm": 2.0972795486450195,
|
|
"learning_rate": 1.5000020000000002e-05,
|
|
"loss": 1.2184,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"eval_accuracy": 0.7817697335344395,
|
|
"eval_error_rate": 0.21823026646556054,
|
|
"eval_f1": 0.7654284912445182,
|
|
"eval_loss": 1.1887174844741821,
|
|
"eval_precision": 0.8322669418644651,
|
|
"eval_recall": 0.7697368421052632,
|
|
"eval_runtime": 25.3816,
|
|
"eval_samples_per_second": 5.989,
|
|
"eval_steps_per_second": 0.197,
|
|
"eval_top1_accuracy": 0.7697368421052632,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 17.0,
|
|
"train_accuracy": 0.7448830409356725
|
|
},
|
|
{
|
|
"epoch": 17.0,
|
|
"grad_norm": 1.9592589139938354,
|
|
"learning_rate": 1.365343563002298e-05,
|
|
"loss": 1.1803,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 17.0,
|
|
"eval_accuracy": 0.7930819507290096,
|
|
"eval_error_rate": 0.20691804927099045,
|
|
"eval_f1": 0.773531629357028,
|
|
"eval_loss": 1.1408498287200928,
|
|
"eval_precision": 0.8423109913821989,
|
|
"eval_recall": 0.7763157894736842,
|
|
"eval_runtime": 26.1892,
|
|
"eval_samples_per_second": 5.804,
|
|
"eval_steps_per_second": 0.191,
|
|
"eval_top1_accuracy": 0.7763157894736842,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 18.0,
|
|
"train_accuracy": 0.7580409356725146
|
|
},
|
|
{
|
|
"epoch": 18.0,
|
|
"grad_norm": 2.348762035369873,
|
|
"learning_rate": 1.2225240438725788e-05,
|
|
"loss": 1.1422,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 18.0,
|
|
"eval_accuracy": 0.83166918049271,
|
|
"eval_error_rate": 0.16833081950729,
|
|
"eval_f1": 0.810022076785411,
|
|
"eval_loss": 1.096580147743225,
|
|
"eval_precision": 0.8594414607948442,
|
|
"eval_recall": 0.8157894736842105,
|
|
"eval_runtime": 25.0285,
|
|
"eval_samples_per_second": 6.073,
|
|
"eval_steps_per_second": 0.2,
|
|
"eval_top1_accuracy": 0.8157894736842105,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 19.0,
|
|
"train_accuracy": 0.7733918128654971
|
|
},
|
|
{
|
|
"epoch": 19.0,
|
|
"grad_norm": 2.2395124435424805,
|
|
"learning_rate": 1.0747337946660503e-05,
|
|
"loss": 1.1032,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 19.0,
|
|
"eval_accuracy": 0.8144746103569632,
|
|
"eval_error_rate": 0.18552538964303678,
|
|
"eval_f1": 0.7968691491574786,
|
|
"eval_loss": 1.0586965084075928,
|
|
"eval_precision": 0.8430972766845342,
|
|
"eval_recall": 0.8026315789473685,
|
|
"eval_runtime": 25.4802,
|
|
"eval_samples_per_second": 5.965,
|
|
"eval_steps_per_second": 0.196,
|
|
"eval_top1_accuracy": 0.8026315789473685,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"train_accuracy": 0.7850877192982456
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"grad_norm": 2.3096365928649902,
|
|
"learning_rate": 9.252742053339503e-06,
|
|
"loss": 1.058,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"eval_accuracy": 0.8486626445449975,
|
|
"eval_error_rate": 0.1513373554550025,
|
|
"eval_f1": 0.8300500291649842,
|
|
"eval_loss": 1.0289386510849,
|
|
"eval_precision": 0.8609899749373433,
|
|
"eval_recall": 0.8355263157894737,
|
|
"eval_runtime": 27.6424,
|
|
"eval_samples_per_second": 5.499,
|
|
"eval_steps_per_second": 0.181,
|
|
"eval_top1_accuracy": 0.8355263157894737,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 21.0,
|
|
"train_accuracy": 0.8084795321637427
|
|
},
|
|
{
|
|
"epoch": 21.0,
|
|
"grad_norm": 2.616567611694336,
|
|
"learning_rate": 7.774839561274216e-06,
|
|
"loss": 1.0252,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 21.0,
|
|
"eval_accuracy": 0.8534338863750628,
|
|
"eval_error_rate": 0.1465661136249372,
|
|
"eval_f1": 0.8370304500033733,
|
|
"eval_loss": 0.9917795062065125,
|
|
"eval_precision": 0.8575724637681159,
|
|
"eval_recall": 0.8421052631578947,
|
|
"eval_runtime": 24.0656,
|
|
"eval_samples_per_second": 6.316,
|
|
"eval_steps_per_second": 0.208,
|
|
"eval_top1_accuracy": 0.8421052631578947,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 22.0,
|
|
"train_accuracy": 0.814327485380117
|
|
},
|
|
{
|
|
"epoch": 22.0,
|
|
"grad_norm": 2.2312376499176025,
|
|
"learning_rate": 6.346644369977025e-06,
|
|
"loss": 1.002,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 22.0,
|
|
"eval_accuracy": 0.8611261940673707,
|
|
"eval_error_rate": 0.13887380593262932,
|
|
"eval_f1": 0.8435398841932419,
|
|
"eval_loss": 0.9727317690849304,
|
|
"eval_precision": 0.8677232854864433,
|
|
"eval_recall": 0.8486842105263158,
|
|
"eval_runtime": 25.3651,
|
|
"eval_samples_per_second": 5.992,
|
|
"eval_steps_per_second": 0.197,
|
|
"eval_top1_accuracy": 0.8486842105263158,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 23.0,
|
|
"train_accuracy": 0.8179824561403509
|
|
},
|
|
{
|
|
"epoch": 23.0,
|
|
"grad_norm": 2.2069149017333984,
|
|
"learning_rate": 5.000060000000003e-06,
|
|
"loss": 0.9812,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 23.0,
|
|
"eval_accuracy": 0.8677928607340373,
|
|
"eval_error_rate": 0.13220713926596273,
|
|
"eval_f1": 0.8497099815147097,
|
|
"eval_loss": 0.9464592933654785,
|
|
"eval_precision": 0.8795061782362883,
|
|
"eval_recall": 0.8552631578947368,
|
|
"eval_runtime": 26.8567,
|
|
"eval_samples_per_second": 5.66,
|
|
"eval_steps_per_second": 0.186,
|
|
"eval_top1_accuracy": 0.8552631578947368,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"train_accuracy": 0.8187134502923976
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"grad_norm": 2.32728910446167,
|
|
"learning_rate": 3.76516692100474e-06,
|
|
"loss": 0.9636,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"eval_accuracy": 0.8699296128707893,
|
|
"eval_error_rate": 0.1300703871292107,
|
|
"eval_f1": 0.8485082765446309,
|
|
"eval_loss": 0.93310546875,
|
|
"eval_precision": 0.8820191260980734,
|
|
"eval_recall": 0.8552631578947368,
|
|
"eval_runtime": 27.6614,
|
|
"eval_samples_per_second": 5.495,
|
|
"eval_steps_per_second": 0.181,
|
|
"eval_top1_accuracy": 0.8552631578947368,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"train_accuracy": 0.8150584795321637
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"grad_norm": 2.2066571712493896,
|
|
"learning_rate": 2.6963844978948743e-06,
|
|
"loss": 0.9591,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"eval_accuracy": 0.8743740573152337,
|
|
"eval_error_rate": 0.12562594268476635,
|
|
"eval_f1": 0.8573701550510042,
|
|
"eval_loss": 0.92206209897995,
|
|
"eval_precision": 0.877970723615921,
|
|
"eval_recall": 0.8618421052631579,
|
|
"eval_runtime": 26.1743,
|
|
"eval_samples_per_second": 5.807,
|
|
"eval_steps_per_second": 0.191,
|
|
"eval_top1_accuracy": 0.8618421052631579,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 26.0,
|
|
"train_accuracy": 0.8194444444444444
|
|
},
|
|
{
|
|
"epoch": 26.0,
|
|
"grad_norm": 2.345517873764038,
|
|
"learning_rate": 1.75992615737436e-06,
|
|
"loss": 0.948,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 26.0,
|
|
"eval_accuracy": 0.8743740573152337,
|
|
"eval_error_rate": 0.12562594268476635,
|
|
"eval_f1": 0.8573701550510042,
|
|
"eval_loss": 0.9158027768135071,
|
|
"eval_precision": 0.877970723615921,
|
|
"eval_recall": 0.8618421052631579,
|
|
"eval_runtime": 26.0018,
|
|
"eval_samples_per_second": 5.846,
|
|
"eval_steps_per_second": 0.192,
|
|
"eval_top1_accuracy": 0.868421052631579,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 27.0,
|
|
"train_accuracy": 0.8201754385964912
|
|
},
|
|
{
|
|
"epoch": 27.0,
|
|
"grad_norm": 2.246762990951538,
|
|
"learning_rate": 1.0075383437198693e-06,
|
|
"loss": 0.9384,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 27.0,
|
|
"eval_accuracy": 0.8601005530417295,
|
|
"eval_error_rate": 0.1398994469582705,
|
|
"eval_f1": 0.8431034082329946,
|
|
"eval_loss": 0.901735782623291,
|
|
"eval_precision": 0.8684637995623307,
|
|
"eval_recall": 0.8486842105263158,
|
|
"eval_runtime": 25.9225,
|
|
"eval_samples_per_second": 5.864,
|
|
"eval_steps_per_second": 0.193,
|
|
"eval_top1_accuracy": 0.8486842105263158,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 27.0,
|
|
"step": 1026,
|
|
"total_flos": 6.9738304117683e+17,
|
|
"train_loss": 1.2960130829095375,
|
|
"train_runtime": 5177.2299,
|
|
"train_samples_per_second": 7.046,
|
|
"train_steps_per_second": 0.22
|
|
}
|
|
],
|
|
"logging_steps": 500,
|
|
"max_steps": 1140,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 30,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"EarlyStoppingCallback": {
|
|
"args": {
|
|
"early_stopping_patience": 2,
|
|
"early_stopping_threshold": 0.0
|
|
},
|
|
"attributes": {
|
|
"early_stopping_patience_counter": 2
|
|
}
|
|
},
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 6.9738304117683e+17,
|
|
"train_batch_size": 32,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|