tibetan-bert_tncc-document_tsheg / trainer_state.json
metaphors's picture
Upload 7 files
08f9258 verified
{
"best_metric": 0.6611545464384188,
"best_model_checkpoint": "../saved_model/tibetan-bert_tncc-document_v3/checkpoint-3234",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 4620,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.6271739130434782,
"eval_loss": 1.15811288356781,
"eval_macro-f1": 0.5321491435605941,
"eval_macro-precision": 0.5499509034708364,
"eval_macro-recall": 0.5513303846525741,
"eval_runtime": 7.2664,
"eval_samples_per_second": 126.61,
"eval_steps_per_second": 3.991,
"eval_weighted-f1": 0.6286606311348418,
"eval_weighted-precision": 0.6570526714130747,
"eval_weighted-recall": 0.6271739130434782,
"step": 231
},
{
"epoch": 2.0,
"eval_accuracy": 0.6467391304347826,
"eval_loss": 1.068244218826294,
"eval_macro-f1": 0.5647362494157037,
"eval_macro-precision": 0.6183747770237591,
"eval_macro-recall": 0.5557985823725112,
"eval_runtime": 7.286,
"eval_samples_per_second": 126.269,
"eval_steps_per_second": 3.98,
"eval_weighted-f1": 0.6340133102826743,
"eval_weighted-precision": 0.661646599688849,
"eval_weighted-recall": 0.6467391304347826,
"step": 462
},
{
"epoch": 2.16,
"learning_rate": 4.458874458874459e-05,
"loss": 1.1735,
"step": 500
},
{
"epoch": 3.0,
"eval_accuracy": 0.6760869565217391,
"eval_loss": 0.9934693574905396,
"eval_macro-f1": 0.5945571067918397,
"eval_macro-precision": 0.6219488737703451,
"eval_macro-recall": 0.5845017075090547,
"eval_runtime": 7.2737,
"eval_samples_per_second": 126.483,
"eval_steps_per_second": 3.987,
"eval_weighted-f1": 0.6693675078151828,
"eval_weighted-precision": 0.6716730940106624,
"eval_weighted-recall": 0.6760869565217391,
"step": 693
},
{
"epoch": 4.0,
"eval_accuracy": 0.6760869565217391,
"eval_loss": 1.0614756345748901,
"eval_macro-f1": 0.6069076519660962,
"eval_macro-precision": 0.6513367891333143,
"eval_macro-recall": 0.5927988194005301,
"eval_runtime": 7.33,
"eval_samples_per_second": 125.511,
"eval_steps_per_second": 3.956,
"eval_weighted-f1": 0.668192819938365,
"eval_weighted-precision": 0.6913129775105903,
"eval_weighted-recall": 0.6760869565217391,
"step": 924
},
{
"epoch": 4.33,
"learning_rate": 3.917748917748918e-05,
"loss": 0.6662,
"step": 1000
},
{
"epoch": 5.0,
"eval_accuracy": 0.6608695652173913,
"eval_loss": 1.1701490879058838,
"eval_macro-f1": 0.594957375773476,
"eval_macro-precision": 0.6170952632020997,
"eval_macro-recall": 0.6011833102671116,
"eval_runtime": 7.2756,
"eval_samples_per_second": 126.45,
"eval_steps_per_second": 3.986,
"eval_weighted-f1": 0.6654696189795196,
"eval_weighted-precision": 0.6851333084994409,
"eval_weighted-recall": 0.6608695652173913,
"step": 1155
},
{
"epoch": 6.0,
"eval_accuracy": 0.6510869565217391,
"eval_loss": 1.292517066001892,
"eval_macro-f1": 0.607937784808065,
"eval_macro-precision": 0.630719857369284,
"eval_macro-recall": 0.6225829586625629,
"eval_runtime": 7.2833,
"eval_samples_per_second": 126.317,
"eval_steps_per_second": 3.982,
"eval_weighted-f1": 0.6579657588719497,
"eval_weighted-precision": 0.68649865311892,
"eval_weighted-recall": 0.6510869565217391,
"step": 1386
},
{
"epoch": 6.49,
"learning_rate": 3.376623376623377e-05,
"loss": 0.3247,
"step": 1500
},
{
"epoch": 7.0,
"eval_accuracy": 0.6695652173913044,
"eval_loss": 1.3797581195831299,
"eval_macro-f1": 0.6129092280782845,
"eval_macro-precision": 0.6387021571282967,
"eval_macro-recall": 0.609094564757661,
"eval_runtime": 7.2749,
"eval_samples_per_second": 126.461,
"eval_steps_per_second": 3.986,
"eval_weighted-f1": 0.6640159054651581,
"eval_weighted-precision": 0.6700483998986603,
"eval_weighted-recall": 0.6695652173913044,
"step": 1617
},
{
"epoch": 8.0,
"eval_accuracy": 0.6706521739130434,
"eval_loss": 1.4838347434997559,
"eval_macro-f1": 0.6061542359667785,
"eval_macro-precision": 0.611272244576803,
"eval_macro-recall": 0.6146650230118652,
"eval_runtime": 7.2825,
"eval_samples_per_second": 126.33,
"eval_steps_per_second": 3.982,
"eval_weighted-f1": 0.6720993101087696,
"eval_weighted-precision": 0.6820628069048843,
"eval_weighted-recall": 0.6706521739130434,
"step": 1848
},
{
"epoch": 8.66,
"learning_rate": 2.8354978354978357e-05,
"loss": 0.1507,
"step": 2000
},
{
"epoch": 9.0,
"eval_accuracy": 0.6880434782608695,
"eval_loss": 1.5808299779891968,
"eval_macro-f1": 0.6506642163845188,
"eval_macro-precision": 0.6540683372674448,
"eval_macro-recall": 0.6603270904104771,
"eval_runtime": 7.2595,
"eval_samples_per_second": 126.731,
"eval_steps_per_second": 3.995,
"eval_weighted-f1": 0.6915267384661025,
"eval_weighted-precision": 0.7075992309158492,
"eval_weighted-recall": 0.6880434782608695,
"step": 2079
},
{
"epoch": 10.0,
"eval_accuracy": 0.6717391304347826,
"eval_loss": 1.650195837020874,
"eval_macro-f1": 0.6034609083187376,
"eval_macro-precision": 0.6321546665662738,
"eval_macro-recall": 0.5917026419660609,
"eval_runtime": 7.2865,
"eval_samples_per_second": 126.261,
"eval_steps_per_second": 3.98,
"eval_weighted-f1": 0.6684456866047149,
"eval_weighted-precision": 0.6745276629705688,
"eval_weighted-recall": 0.6717391304347826,
"step": 2310
},
{
"epoch": 10.82,
"learning_rate": 2.2943722943722946e-05,
"loss": 0.0896,
"step": 2500
},
{
"epoch": 11.0,
"eval_accuracy": 0.6804347826086956,
"eval_loss": 1.738294005393982,
"eval_macro-f1": 0.6302114432029545,
"eval_macro-precision": 0.64659326690522,
"eval_macro-recall": 0.6353590685660309,
"eval_runtime": 7.3402,
"eval_samples_per_second": 125.337,
"eval_steps_per_second": 3.951,
"eval_weighted-f1": 0.6820100289567567,
"eval_weighted-precision": 0.6975783236550734,
"eval_weighted-recall": 0.6804347826086956,
"step": 2541
},
{
"epoch": 12.0,
"eval_accuracy": 0.6989130434782609,
"eval_loss": 1.7147595882415771,
"eval_macro-f1": 0.6515414811628367,
"eval_macro-precision": 0.6658442974988088,
"eval_macro-recall": 0.6496260625897462,
"eval_runtime": 7.2787,
"eval_samples_per_second": 126.396,
"eval_steps_per_second": 3.984,
"eval_weighted-f1": 0.6976782715450106,
"eval_weighted-precision": 0.7017023034717548,
"eval_weighted-recall": 0.6989130434782609,
"step": 2772
},
{
"epoch": 12.99,
"learning_rate": 1.7532467532467535e-05,
"loss": 0.0646,
"step": 3000
},
{
"epoch": 13.0,
"eval_accuracy": 0.6891304347826087,
"eval_loss": 1.7946357727050781,
"eval_macro-f1": 0.648332711071471,
"eval_macro-precision": 0.6479765490771864,
"eval_macro-recall": 0.6515217848664077,
"eval_runtime": 7.3066,
"eval_samples_per_second": 125.913,
"eval_steps_per_second": 3.969,
"eval_weighted-f1": 0.6915983518325557,
"eval_weighted-precision": 0.6986485129748002,
"eval_weighted-recall": 0.6891304347826087,
"step": 3003
},
{
"epoch": 14.0,
"eval_accuracy": 0.7,
"eval_loss": 1.7724699974060059,
"eval_macro-f1": 0.6611545464384188,
"eval_macro-precision": 0.667409141168159,
"eval_macro-recall": 0.6627804433172214,
"eval_runtime": 7.2494,
"eval_samples_per_second": 126.906,
"eval_steps_per_second": 4.0,
"eval_weighted-f1": 0.7033455944346818,
"eval_weighted-precision": 0.7140252489602517,
"eval_weighted-recall": 0.7,
"step": 3234
},
{
"epoch": 15.0,
"eval_accuracy": 0.6923913043478261,
"eval_loss": 1.819846510887146,
"eval_macro-f1": 0.6556012492821643,
"eval_macro-precision": 0.6602686382879858,
"eval_macro-recall": 0.6668664107682606,
"eval_runtime": 7.2775,
"eval_samples_per_second": 126.418,
"eval_steps_per_second": 3.985,
"eval_weighted-f1": 0.6965952163097968,
"eval_weighted-precision": 0.7083445248462037,
"eval_weighted-recall": 0.6923913043478261,
"step": 3465
},
{
"epoch": 15.15,
"learning_rate": 1.2121212121212122e-05,
"loss": 0.042,
"step": 3500
},
{
"epoch": 16.0,
"eval_accuracy": 0.6945652173913044,
"eval_loss": 1.784122109413147,
"eval_macro-f1": 0.6563585998978742,
"eval_macro-precision": 0.6655291218706761,
"eval_macro-recall": 0.6534120619783158,
"eval_runtime": 7.297,
"eval_samples_per_second": 126.08,
"eval_steps_per_second": 3.974,
"eval_weighted-f1": 0.6949462526633576,
"eval_weighted-precision": 0.6993999521302994,
"eval_weighted-recall": 0.6945652173913044,
"step": 3696
},
{
"epoch": 17.0,
"eval_accuracy": 0.6945652173913044,
"eval_loss": 1.7921020984649658,
"eval_macro-f1": 0.654978142271046,
"eval_macro-precision": 0.6614421486999998,
"eval_macro-recall": 0.657140253465508,
"eval_runtime": 7.359,
"eval_samples_per_second": 125.018,
"eval_steps_per_second": 3.941,
"eval_weighted-f1": 0.6972072799287201,
"eval_weighted-precision": 0.705999712282921,
"eval_weighted-recall": 0.6945652173913044,
"step": 3927
},
{
"epoch": 17.32,
"learning_rate": 6.709956709956711e-06,
"loss": 0.0314,
"step": 4000
},
{
"epoch": 18.0,
"eval_accuracy": 0.6945652173913044,
"eval_loss": 1.824020266532898,
"eval_macro-f1": 0.6548614235086001,
"eval_macro-precision": 0.6544996322588115,
"eval_macro-recall": 0.660921203092836,
"eval_runtime": 7.3304,
"eval_samples_per_second": 125.504,
"eval_steps_per_second": 3.956,
"eval_weighted-f1": 0.6960752624759597,
"eval_weighted-precision": 0.7018632204313372,
"eval_weighted-recall": 0.6945652173913044,
"step": 4158
},
{
"epoch": 19.0,
"eval_accuracy": 0.6923913043478261,
"eval_loss": 1.8412573337554932,
"eval_macro-f1": 0.6506506908222951,
"eval_macro-precision": 0.6468541951851238,
"eval_macro-recall": 0.6600299174294355,
"eval_runtime": 7.2351,
"eval_samples_per_second": 127.157,
"eval_steps_per_second": 4.008,
"eval_weighted-f1": 0.69621624934211,
"eval_weighted-precision": 0.7054368084525598,
"eval_weighted-recall": 0.6923913043478261,
"step": 4389
},
{
"epoch": 19.48,
"learning_rate": 1.2987012987012988e-06,
"loss": 0.0233,
"step": 4500
},
{
"epoch": 20.0,
"eval_accuracy": 0.691304347826087,
"eval_loss": 1.8325966596603394,
"eval_macro-f1": 0.6463398836192601,
"eval_macro-precision": 0.6403230244612891,
"eval_macro-recall": 0.6567581821874694,
"eval_runtime": 7.2741,
"eval_samples_per_second": 126.476,
"eval_steps_per_second": 3.987,
"eval_weighted-f1": 0.6940975925997279,
"eval_weighted-precision": 0.7008420722361637,
"eval_weighted-recall": 0.691304347826087,
"step": 4620
}
],
"logging_steps": 500,
"max_steps": 4620,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 3.87544755290112e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}