bert-large-uncased-scsmall / trainer_state.json
ZongqianLi's picture
Upload 130 files
1c5059c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 36.0,
"global_step": 972,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.2262943855309169e-05,
"loss": 2.057,
"step": 27
},
{
"epoch": 1.0,
"eval_accuracy": 0.6772454727151596,
"eval_loss": 1.7236038446426392,
"eval_runtime": 13.5733,
"eval_samples_per_second": 32.711,
"eval_steps_per_second": 0.147,
"step": 27
},
{
"epoch": 2.0,
"learning_rate": 1.4841962570206113e-05,
"loss": 1.7092,
"step": 54
},
{
"epoch": 2.0,
"eval_accuracy": 0.6982139066622192,
"eval_loss": 1.5525641441345215,
"eval_runtime": 12.8991,
"eval_samples_per_second": 34.421,
"eval_steps_per_second": 0.155,
"step": 54
},
{
"epoch": 3.0,
"learning_rate": 1.6350591807078892e-05,
"loss": 1.5646,
"step": 81
},
{
"epoch": 3.0,
"eval_accuracy": 0.7100154712905392,
"eval_loss": 1.452789068222046,
"eval_runtime": 12.9607,
"eval_samples_per_second": 34.258,
"eval_steps_per_second": 0.154,
"step": 81
},
{
"epoch": 4.0,
"learning_rate": 1.7420981285103056e-05,
"loss": 1.4688,
"step": 108
},
{
"epoch": 4.0,
"eval_accuracy": 0.7290463925156171,
"eval_loss": 1.3419641256332397,
"eval_runtime": 12.9711,
"eval_samples_per_second": 34.23,
"eval_steps_per_second": 0.154,
"step": 108
},
{
"epoch": 5.0,
"learning_rate": 1.825123986666868e-05,
"loss": 1.3785,
"step": 135
},
{
"epoch": 5.0,
"eval_accuracy": 0.74070063507858,
"eval_loss": 1.2742513418197632,
"eval_runtime": 12.944,
"eval_samples_per_second": 34.302,
"eval_steps_per_second": 0.155,
"step": 135
},
{
"epoch": 6.0,
"learning_rate": 1.892961052197583e-05,
"loss": 1.3459,
"step": 162
},
{
"epoch": 6.0,
"eval_accuracy": 0.739266862170088,
"eval_loss": 1.2691913843154907,
"eval_runtime": 12.9659,
"eval_samples_per_second": 34.244,
"eval_steps_per_second": 0.154,
"step": 162
},
{
"epoch": 7.0,
"learning_rate": 1.9503164738653782e-05,
"loss": 1.3059,
"step": 189
},
{
"epoch": 7.0,
"eval_accuracy": 0.7480331638828371,
"eval_loss": 1.2231497764587402,
"eval_runtime": 12.921,
"eval_samples_per_second": 34.363,
"eval_steps_per_second": 0.155,
"step": 189
},
{
"epoch": 8.0,
"learning_rate": 1.9999999999999998e-05,
"loss": 1.2666,
"step": 216
},
{
"epoch": 8.0,
"eval_accuracy": 0.7594158570229099,
"eval_loss": 1.151406168937683,
"eval_runtime": 12.9461,
"eval_samples_per_second": 34.296,
"eval_steps_per_second": 0.154,
"step": 216
},
{
"epoch": 9.0,
"learning_rate": 2e-05,
"loss": 1.2463,
"step": 243
},
{
"epoch": 9.0,
"eval_accuracy": 0.7459728430463017,
"eval_loss": 1.2034211158752441,
"eval_runtime": 13.0359,
"eval_samples_per_second": 34.06,
"eval_steps_per_second": 0.153,
"step": 243
},
{
"epoch": 10.0,
"learning_rate": 2e-05,
"loss": 1.2276,
"step": 270
},
{
"epoch": 10.0,
"eval_accuracy": 0.7586074755335456,
"eval_loss": 1.1566089391708374,
"eval_runtime": 12.9245,
"eval_samples_per_second": 34.353,
"eval_steps_per_second": 0.155,
"step": 270
},
{
"epoch": 11.0,
"learning_rate": 2e-05,
"loss": 1.189,
"step": 297
},
{
"epoch": 11.0,
"eval_accuracy": 0.7611567732115677,
"eval_loss": 1.1319142580032349,
"eval_runtime": 12.9698,
"eval_samples_per_second": 34.233,
"eval_steps_per_second": 0.154,
"step": 297
},
{
"epoch": 12.0,
"learning_rate": 2e-05,
"loss": 1.1568,
"step": 324
},
{
"epoch": 12.0,
"eval_accuracy": 0.7545374996471618,
"eval_loss": 1.1536731719970703,
"eval_runtime": 12.9366,
"eval_samples_per_second": 34.321,
"eval_steps_per_second": 0.155,
"step": 324
},
{
"epoch": 13.0,
"learning_rate": 2e-05,
"loss": 1.1447,
"step": 351
},
{
"epoch": 13.0,
"eval_accuracy": 0.7683153013910355,
"eval_loss": 1.0927276611328125,
"eval_runtime": 12.9899,
"eval_samples_per_second": 34.18,
"eval_steps_per_second": 0.154,
"step": 351
},
{
"epoch": 14.0,
"learning_rate": 2e-05,
"loss": 1.1262,
"step": 378
},
{
"epoch": 14.0,
"eval_accuracy": 0.7699539058709365,
"eval_loss": 1.0704097747802734,
"eval_runtime": 12.9791,
"eval_samples_per_second": 34.209,
"eval_steps_per_second": 0.154,
"step": 378
},
{
"epoch": 15.0,
"learning_rate": 2e-05,
"loss": 1.1173,
"step": 405
},
{
"epoch": 15.0,
"eval_accuracy": 0.774438983954053,
"eval_loss": 1.0296632051467896,
"eval_runtime": 12.9361,
"eval_samples_per_second": 34.323,
"eval_steps_per_second": 0.155,
"step": 405
},
{
"epoch": 16.0,
"learning_rate": 2e-05,
"loss": 1.0997,
"step": 432
},
{
"epoch": 16.0,
"eval_accuracy": 0.7737643712219984,
"eval_loss": 1.0550481081008911,
"eval_runtime": 12.9715,
"eval_samples_per_second": 34.229,
"eval_steps_per_second": 0.154,
"step": 432
},
{
"epoch": 17.0,
"learning_rate": 2e-05,
"loss": 1.0962,
"step": 459
},
{
"epoch": 17.0,
"eval_accuracy": 0.7746747253401614,
"eval_loss": 1.0655121803283691,
"eval_runtime": 12.9954,
"eval_samples_per_second": 34.166,
"eval_steps_per_second": 0.154,
"step": 459
},
{
"epoch": 18.0,
"learning_rate": 2e-05,
"loss": 1.0864,
"step": 486
},
{
"epoch": 18.0,
"eval_accuracy": 0.7735955893309492,
"eval_loss": 1.0611152648925781,
"eval_runtime": 12.9386,
"eval_samples_per_second": 34.316,
"eval_steps_per_second": 0.155,
"step": 486
},
{
"epoch": 19.0,
"learning_rate": 2e-05,
"loss": 1.0817,
"step": 513
},
{
"epoch": 19.0,
"eval_accuracy": 0.7697911607576493,
"eval_loss": 1.0739043951034546,
"eval_runtime": 13.0147,
"eval_samples_per_second": 34.115,
"eval_steps_per_second": 0.154,
"step": 513
},
{
"epoch": 20.0,
"learning_rate": 2e-05,
"loss": 1.0615,
"step": 540
},
{
"epoch": 20.0,
"eval_accuracy": 0.7779283925151024,
"eval_loss": 1.0259206295013428,
"eval_runtime": 12.248,
"eval_samples_per_second": 36.251,
"eval_steps_per_second": 0.163,
"step": 540
},
{
"epoch": 21.0,
"learning_rate": 2e-05,
"loss": 1.0337,
"step": 567
},
{
"epoch": 21.0,
"eval_accuracy": 0.7820505322259913,
"eval_loss": 1.0050867795944214,
"eval_runtime": 13.0151,
"eval_samples_per_second": 34.114,
"eval_steps_per_second": 0.154,
"step": 567
},
{
"epoch": 22.0,
"learning_rate": 2e-05,
"loss": 1.0248,
"step": 594
},
{
"epoch": 22.0,
"eval_accuracy": 0.7823669101512009,
"eval_loss": 0.9815566539764404,
"eval_runtime": 12.966,
"eval_samples_per_second": 34.243,
"eval_steps_per_second": 0.154,
"step": 594
},
{
"epoch": 23.0,
"learning_rate": 2e-05,
"loss": 1.0078,
"step": 621
},
{
"epoch": 23.0,
"eval_accuracy": 0.7890613318979696,
"eval_loss": 0.9701399207115173,
"eval_runtime": 12.9372,
"eval_samples_per_second": 34.32,
"eval_steps_per_second": 0.155,
"step": 621
},
{
"epoch": 24.0,
"learning_rate": 2e-05,
"loss": 1.0161,
"step": 648
},
{
"epoch": 24.0,
"eval_accuracy": 0.7868592237542407,
"eval_loss": 0.9783701300621033,
"eval_runtime": 12.9845,
"eval_samples_per_second": 34.195,
"eval_steps_per_second": 0.154,
"step": 648
},
{
"epoch": 25.0,
"learning_rate": 2e-05,
"loss": 1.0005,
"step": 675
},
{
"epoch": 25.0,
"eval_accuracy": 0.782170183167169,
"eval_loss": 0.9962915182113647,
"eval_runtime": 13.0152,
"eval_samples_per_second": 34.114,
"eval_steps_per_second": 0.154,
"step": 675
},
{
"epoch": 26.0,
"learning_rate": 2e-05,
"loss": 1.0008,
"step": 702
},
{
"epoch": 26.0,
"eval_accuracy": 0.790520909757887,
"eval_loss": 0.9529848694801331,
"eval_runtime": 13.0266,
"eval_samples_per_second": 34.084,
"eval_steps_per_second": 0.154,
"step": 702
},
{
"epoch": 27.0,
"learning_rate": 2e-05,
"loss": 0.9961,
"step": 729
},
{
"epoch": 27.0,
"eval_accuracy": 0.7787046824557895,
"eval_loss": 1.0195859670639038,
"eval_runtime": 12.9955,
"eval_samples_per_second": 34.166,
"eval_steps_per_second": 0.154,
"step": 729
},
{
"epoch": 28.0,
"learning_rate": 2e-05,
"loss": 0.9834,
"step": 756
},
{
"epoch": 28.0,
"eval_accuracy": 0.7917842772205873,
"eval_loss": 0.9555456638336182,
"eval_runtime": 12.2326,
"eval_samples_per_second": 36.296,
"eval_steps_per_second": 0.163,
"step": 756
},
{
"epoch": 29.0,
"learning_rate": 2e-05,
"loss": 0.9647,
"step": 783
},
{
"epoch": 29.0,
"eval_accuracy": 0.7914557776443338,
"eval_loss": 0.9375360608100891,
"eval_runtime": 13.0096,
"eval_samples_per_second": 34.129,
"eval_steps_per_second": 0.154,
"step": 783
},
{
"epoch": 30.0,
"learning_rate": 2e-05,
"loss": 0.967,
"step": 810
},
{
"epoch": 30.0,
"eval_accuracy": 0.793787977110495,
"eval_loss": 0.9494355320930481,
"eval_runtime": 13.0239,
"eval_samples_per_second": 34.091,
"eval_steps_per_second": 0.154,
"step": 810
},
{
"epoch": 31.0,
"learning_rate": 2e-05,
"loss": 0.9625,
"step": 837
},
{
"epoch": 31.0,
"eval_accuracy": 0.7860187306097597,
"eval_loss": 0.9812522530555725,
"eval_runtime": 12.989,
"eval_samples_per_second": 34.183,
"eval_steps_per_second": 0.154,
"step": 837
},
{
"epoch": 32.0,
"learning_rate": 2e-05,
"loss": 0.9578,
"step": 864
},
{
"epoch": 32.0,
"eval_accuracy": 0.7956582591346297,
"eval_loss": 0.9389752149581909,
"eval_runtime": 13.0492,
"eval_samples_per_second": 34.025,
"eval_steps_per_second": 0.153,
"step": 864
},
{
"epoch": 33.0,
"learning_rate": 2e-05,
"loss": 0.9462,
"step": 891
},
{
"epoch": 33.0,
"eval_accuracy": 0.79146801197472,
"eval_loss": 0.9519514441490173,
"eval_runtime": 12.965,
"eval_samples_per_second": 34.246,
"eval_steps_per_second": 0.154,
"step": 891
},
{
"epoch": 34.0,
"learning_rate": 2e-05,
"loss": 0.9468,
"step": 918
},
{
"epoch": 34.0,
"eval_accuracy": 0.7949606757937664,
"eval_loss": 0.922423243522644,
"eval_runtime": 12.9466,
"eval_samples_per_second": 34.295,
"eval_steps_per_second": 0.154,
"step": 918
},
{
"epoch": 35.0,
"learning_rate": 2e-05,
"loss": 0.9357,
"step": 945
},
{
"epoch": 35.0,
"eval_accuracy": 0.8009954921111946,
"eval_loss": 0.908001184463501,
"eval_runtime": 12.9778,
"eval_samples_per_second": 34.212,
"eval_steps_per_second": 0.154,
"step": 945
},
{
"epoch": 36.0,
"learning_rate": 2e-05,
"loss": 0.9328,
"step": 972
},
{
"epoch": 36.0,
"eval_accuracy": 0.7935578330893118,
"eval_loss": 0.9237804412841797,
"eval_runtime": 12.9467,
"eval_samples_per_second": 34.294,
"eval_steps_per_second": 0.154,
"step": 972
}
],
"max_steps": 1080,
"num_train_epochs": 40,
"total_flos": 302668861931520.0,
"trial_name": null,
"trial_params": null
}