bde-pos-bert-cased-base / trainer_state.json
Shu Huang
update
e2eab22
raw
history blame
9.05 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"global_step": 12240,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.61,
"learning_rate": 1.9183006535947716e-05,
"loss": 0.4329,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.9725273278019229,
"eval_f1": 0.9629807384654764,
"eval_loss": 0.10430943965911865,
"eval_precision": 0.9637516411028211,
"eval_recall": 0.9622110681287765,
"eval_runtime": 2.936,
"eval_samples_per_second": 494.214,
"eval_steps_per_second": 30.995,
"step": 816
},
{
"epoch": 1.23,
"learning_rate": 1.8366013071895427e-05,
"loss": 0.0811,
"step": 1000
},
{
"epoch": 1.84,
"learning_rate": 1.7549019607843138e-05,
"loss": 0.0607,
"step": 1500
},
{
"epoch": 2.0,
"eval_accuracy": 0.9751349927564862,
"eval_f1": 0.9635150914122107,
"eval_loss": 0.09400150179862976,
"eval_precision": 0.9641165172855314,
"eval_recall": 0.9629144154224879,
"eval_runtime": 2.7493,
"eval_samples_per_second": 527.777,
"eval_steps_per_second": 33.1,
"step": 1632
},
{
"epoch": 2.45,
"learning_rate": 1.6732026143790852e-05,
"loss": 0.0428,
"step": 2000
},
{
"epoch": 3.0,
"eval_accuracy": 0.9755037534571316,
"eval_f1": 0.9632957125043962,
"eval_loss": 0.09555820375680923,
"eval_precision": 0.9633573141486811,
"eval_recall": 0.9632341187378113,
"eval_runtime": 2.7415,
"eval_samples_per_second": 529.269,
"eval_steps_per_second": 33.193,
"step": 2448
},
{
"epoch": 3.06,
"learning_rate": 1.5915032679738563e-05,
"loss": 0.0363,
"step": 2500
},
{
"epoch": 3.68,
"learning_rate": 1.5098039215686276e-05,
"loss": 0.0249,
"step": 3000
},
{
"epoch": 4.0,
"eval_accuracy": 0.9765046753588832,
"eval_f1": 0.9649540551338394,
"eval_loss": 0.0989251509308815,
"eval_precision": 0.9646919734151329,
"eval_recall": 0.9652162792928163,
"eval_runtime": 2.7759,
"eval_samples_per_second": 522.708,
"eval_steps_per_second": 32.782,
"step": 3264
},
{
"epoch": 4.29,
"learning_rate": 1.4281045751633989e-05,
"loss": 0.0221,
"step": 3500
},
{
"epoch": 4.9,
"learning_rate": 1.3464052287581701e-05,
"loss": 0.0177,
"step": 4000
},
{
"epoch": 5.0,
"eval_accuracy": 0.976451995258791,
"eval_f1": 0.9646675296035285,
"eval_loss": 0.10913769155740738,
"eval_precision": 0.9643747204294204,
"eval_recall": 0.9649605166405576,
"eval_runtime": 3.7551,
"eval_samples_per_second": 386.403,
"eval_steps_per_second": 24.233,
"step": 4080
},
{
"epoch": 5.51,
"learning_rate": 1.2647058823529412e-05,
"loss": 0.0129,
"step": 4500
},
{
"epoch": 6.0,
"eval_accuracy": 0.9780060582115107,
"eval_f1": 0.9675017575254042,
"eval_loss": 0.11124212294816971,
"eval_precision": 0.967038231818327,
"eval_recall": 0.9679657278045973,
"eval_runtime": 3.8249,
"eval_samples_per_second": 379.357,
"eval_steps_per_second": 23.792,
"step": 4896
},
{
"epoch": 6.13,
"learning_rate": 1.1830065359477125e-05,
"loss": 0.0119,
"step": 5000
},
{
"epoch": 6.74,
"learning_rate": 1.1013071895424838e-05,
"loss": 0.0084,
"step": 5500
},
{
"epoch": 7.0,
"eval_accuracy": 0.9783221388120638,
"eval_f1": 0.9677996994212259,
"eval_loss": 0.12139276415109634,
"eval_precision": 0.967985415933732,
"eval_recall": 0.9676140541577416,
"eval_runtime": 3.747,
"eval_samples_per_second": 387.246,
"eval_steps_per_second": 24.286,
"step": 5712
},
{
"epoch": 7.35,
"learning_rate": 1.0196078431372549e-05,
"loss": 0.0076,
"step": 6000
},
{
"epoch": 7.97,
"learning_rate": 9.379084967320261e-06,
"loss": 0.0061,
"step": 6500
},
{
"epoch": 8.0,
"eval_accuracy": 0.9775319373106809,
"eval_f1": 0.9664444551020734,
"eval_loss": 0.13343702256679535,
"eval_precision": 0.9665217113257019,
"eval_recall": 0.9663672112279804,
"eval_runtime": 2.7649,
"eval_samples_per_second": 524.787,
"eval_steps_per_second": 32.912,
"step": 6528
},
{
"epoch": 8.58,
"learning_rate": 8.562091503267974e-06,
"loss": 0.0048,
"step": 7000
},
{
"epoch": 9.0,
"eval_accuracy": 0.9775055972606348,
"eval_f1": 0.9661577811525858,
"eval_loss": 0.13892702758312225,
"eval_precision": 0.9662041181736795,
"eval_recall": 0.9661114485757217,
"eval_runtime": 3.8371,
"eval_samples_per_second": 378.146,
"eval_steps_per_second": 23.716,
"step": 7344
},
{
"epoch": 9.19,
"learning_rate": 7.745098039215687e-06,
"loss": 0.0047,
"step": 7500
},
{
"epoch": 9.8,
"learning_rate": 6.928104575163399e-06,
"loss": 0.0044,
"step": 8000
},
{
"epoch": 10.0,
"eval_accuracy": 0.9781640985117872,
"eval_f1": 0.967271564473579,
"eval_loss": 0.14489226043224335,
"eval_precision": 0.9674726540011515,
"eval_recall": 0.9670705585216919,
"eval_runtime": 3.7406,
"eval_samples_per_second": 387.909,
"eval_steps_per_second": 24.328,
"step": 8160
},
{
"epoch": 10.42,
"learning_rate": 6.111111111111112e-06,
"loss": 0.0034,
"step": 8500
},
{
"epoch": 11.0,
"eval_accuracy": 0.9779797181614646,
"eval_f1": 0.9672149490880608,
"eval_loss": 0.14611582458019257,
"eval_precision": 0.9671994884910486,
"eval_recall": 0.9672304101793535,
"eval_runtime": 2.8969,
"eval_samples_per_second": 500.886,
"eval_steps_per_second": 31.413,
"step": 8976
},
{
"epoch": 11.03,
"learning_rate": 5.294117647058824e-06,
"loss": 0.0034,
"step": 9000
},
{
"epoch": 11.64,
"learning_rate": 4.477124183006537e-06,
"loss": 0.0025,
"step": 9500
},
{
"epoch": 12.0,
"eval_accuracy": 0.9784274990122481,
"eval_f1": 0.9676501159350764,
"eval_loss": 0.14966140687465668,
"eval_precision": 0.9680061428205784,
"eval_recall": 0.9672943508424182,
"eval_runtime": 2.7972,
"eval_samples_per_second": 518.731,
"eval_steps_per_second": 32.532,
"step": 9792
},
{
"epoch": 12.25,
"learning_rate": 3.6601307189542484e-06,
"loss": 0.0022,
"step": 10000
},
{
"epoch": 12.87,
"learning_rate": 2.843137254901961e-06,
"loss": 0.0024,
"step": 10500
},
{
"epoch": 13.0,
"eval_accuracy": 0.977848017911234,
"eval_f1": 0.9670199670679264,
"eval_loss": 0.15780866146087646,
"eval_precision": 0.9670972692971798,
"eval_recall": 0.9669426771955625,
"eval_runtime": 3.8111,
"eval_samples_per_second": 380.734,
"eval_steps_per_second": 23.878,
"step": 10608
},
{
"epoch": 13.48,
"learning_rate": 2.0261437908496734e-06,
"loss": 0.0014,
"step": 11000
},
{
"epoch": 14.0,
"eval_accuracy": 0.978111418411695,
"eval_f1": 0.9674521213671389,
"eval_loss": 0.15866732597351074,
"eval_precision": 0.9675139888089528,
"eval_recall": 0.9673902618370153,
"eval_runtime": 2.8382,
"eval_samples_per_second": 511.243,
"eval_steps_per_second": 32.063,
"step": 11424
},
{
"epoch": 14.09,
"learning_rate": 1.2091503267973858e-06,
"loss": 0.0019,
"step": 11500
},
{
"epoch": 14.71,
"learning_rate": 3.921568627450981e-07,
"loss": 0.0016,
"step": 12000
},
{
"epoch": 15.0,
"eval_accuracy": 0.978401158962202,
"eval_f1": 0.96785434549785,
"eval_loss": 0.15796583890914917,
"eval_precision": 0.9678388746803069,
"eval_recall": 0.9678698168100003,
"eval_runtime": 3.7576,
"eval_samples_per_second": 386.151,
"eval_steps_per_second": 24.218,
"step": 12240
},
{
"epoch": 15.0,
"step": 12240,
"total_flos": 5.119363918698906e+16,
"train_loss": 0.03262665262993644,
"train_runtime": 785.1159,
"train_samples_per_second": 249.403,
"train_steps_per_second": 15.59
}
],
"max_steps": 12240,
"num_train_epochs": 15,
"total_flos": 5.119363918698906e+16,
"trial_name": null,
"trial_params": null
}