{ "best_metric": 0.6611545464384188, "best_model_checkpoint": "../saved_model/tibetan-bert_tncc-document_v3/checkpoint-3234", "epoch": 20.0, "eval_steps": 500, "global_step": 4620, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.6271739130434782, "eval_loss": 1.15811288356781, "eval_macro-f1": 0.5321491435605941, "eval_macro-precision": 0.5499509034708364, "eval_macro-recall": 0.5513303846525741, "eval_runtime": 7.2664, "eval_samples_per_second": 126.61, "eval_steps_per_second": 3.991, "eval_weighted-f1": 0.6286606311348418, "eval_weighted-precision": 0.6570526714130747, "eval_weighted-recall": 0.6271739130434782, "step": 231 }, { "epoch": 2.0, "eval_accuracy": 0.6467391304347826, "eval_loss": 1.068244218826294, "eval_macro-f1": 0.5647362494157037, "eval_macro-precision": 0.6183747770237591, "eval_macro-recall": 0.5557985823725112, "eval_runtime": 7.286, "eval_samples_per_second": 126.269, "eval_steps_per_second": 3.98, "eval_weighted-f1": 0.6340133102826743, "eval_weighted-precision": 0.661646599688849, "eval_weighted-recall": 0.6467391304347826, "step": 462 }, { "epoch": 2.16, "learning_rate": 4.458874458874459e-05, "loss": 1.1735, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.6760869565217391, "eval_loss": 0.9934693574905396, "eval_macro-f1": 0.5945571067918397, "eval_macro-precision": 0.6219488737703451, "eval_macro-recall": 0.5845017075090547, "eval_runtime": 7.2737, "eval_samples_per_second": 126.483, "eval_steps_per_second": 3.987, "eval_weighted-f1": 0.6693675078151828, "eval_weighted-precision": 0.6716730940106624, "eval_weighted-recall": 0.6760869565217391, "step": 693 }, { "epoch": 4.0, "eval_accuracy": 0.6760869565217391, "eval_loss": 1.0614756345748901, "eval_macro-f1": 0.6069076519660962, "eval_macro-precision": 0.6513367891333143, "eval_macro-recall": 0.5927988194005301, "eval_runtime": 7.33, "eval_samples_per_second": 125.511, "eval_steps_per_second": 3.956, "eval_weighted-f1": 0.668192819938365, "eval_weighted-precision": 0.6913129775105903, "eval_weighted-recall": 0.6760869565217391, "step": 924 }, { "epoch": 4.33, "learning_rate": 3.917748917748918e-05, "loss": 0.6662, "step": 1000 }, { "epoch": 5.0, "eval_accuracy": 0.6608695652173913, "eval_loss": 1.1701490879058838, "eval_macro-f1": 0.594957375773476, "eval_macro-precision": 0.6170952632020997, "eval_macro-recall": 0.6011833102671116, "eval_runtime": 7.2756, "eval_samples_per_second": 126.45, "eval_steps_per_second": 3.986, "eval_weighted-f1": 0.6654696189795196, "eval_weighted-precision": 0.6851333084994409, "eval_weighted-recall": 0.6608695652173913, "step": 1155 }, { "epoch": 6.0, "eval_accuracy": 0.6510869565217391, "eval_loss": 1.292517066001892, "eval_macro-f1": 0.607937784808065, "eval_macro-precision": 0.630719857369284, "eval_macro-recall": 0.6225829586625629, "eval_runtime": 7.2833, "eval_samples_per_second": 126.317, "eval_steps_per_second": 3.982, "eval_weighted-f1": 0.6579657588719497, "eval_weighted-precision": 0.68649865311892, "eval_weighted-recall": 0.6510869565217391, "step": 1386 }, { "epoch": 6.49, "learning_rate": 3.376623376623377e-05, "loss": 0.3247, "step": 1500 }, { "epoch": 7.0, "eval_accuracy": 0.6695652173913044, "eval_loss": 1.3797581195831299, "eval_macro-f1": 0.6129092280782845, "eval_macro-precision": 0.6387021571282967, "eval_macro-recall": 0.609094564757661, "eval_runtime": 7.2749, "eval_samples_per_second": 126.461, "eval_steps_per_second": 3.986, "eval_weighted-f1": 0.6640159054651581, "eval_weighted-precision": 0.6700483998986603, "eval_weighted-recall": 0.6695652173913044, "step": 1617 }, { "epoch": 8.0, "eval_accuracy": 0.6706521739130434, "eval_loss": 1.4838347434997559, "eval_macro-f1": 0.6061542359667785, "eval_macro-precision": 0.611272244576803, "eval_macro-recall": 0.6146650230118652, "eval_runtime": 7.2825, "eval_samples_per_second": 126.33, "eval_steps_per_second": 3.982, "eval_weighted-f1": 0.6720993101087696, "eval_weighted-precision": 0.6820628069048843, "eval_weighted-recall": 0.6706521739130434, "step": 1848 }, { "epoch": 8.66, "learning_rate": 2.8354978354978357e-05, "loss": 0.1507, "step": 2000 }, { "epoch": 9.0, "eval_accuracy": 0.6880434782608695, "eval_loss": 1.5808299779891968, "eval_macro-f1": 0.6506642163845188, "eval_macro-precision": 0.6540683372674448, "eval_macro-recall": 0.6603270904104771, "eval_runtime": 7.2595, "eval_samples_per_second": 126.731, "eval_steps_per_second": 3.995, "eval_weighted-f1": 0.6915267384661025, "eval_weighted-precision": 0.7075992309158492, "eval_weighted-recall": 0.6880434782608695, "step": 2079 }, { "epoch": 10.0, "eval_accuracy": 0.6717391304347826, "eval_loss": 1.650195837020874, "eval_macro-f1": 0.6034609083187376, "eval_macro-precision": 0.6321546665662738, "eval_macro-recall": 0.5917026419660609, "eval_runtime": 7.2865, "eval_samples_per_second": 126.261, "eval_steps_per_second": 3.98, "eval_weighted-f1": 0.6684456866047149, "eval_weighted-precision": 0.6745276629705688, "eval_weighted-recall": 0.6717391304347826, "step": 2310 }, { "epoch": 10.82, "learning_rate": 2.2943722943722946e-05, "loss": 0.0896, "step": 2500 }, { "epoch": 11.0, "eval_accuracy": 0.6804347826086956, "eval_loss": 1.738294005393982, "eval_macro-f1": 0.6302114432029545, "eval_macro-precision": 0.64659326690522, "eval_macro-recall": 0.6353590685660309, "eval_runtime": 7.3402, "eval_samples_per_second": 125.337, "eval_steps_per_second": 3.951, "eval_weighted-f1": 0.6820100289567567, "eval_weighted-precision": 0.6975783236550734, "eval_weighted-recall": 0.6804347826086956, "step": 2541 }, { "epoch": 12.0, "eval_accuracy": 0.6989130434782609, "eval_loss": 1.7147595882415771, "eval_macro-f1": 0.6515414811628367, "eval_macro-precision": 0.6658442974988088, "eval_macro-recall": 0.6496260625897462, "eval_runtime": 7.2787, "eval_samples_per_second": 126.396, "eval_steps_per_second": 3.984, "eval_weighted-f1": 0.6976782715450106, "eval_weighted-precision": 0.7017023034717548, "eval_weighted-recall": 0.6989130434782609, "step": 2772 }, { "epoch": 12.99, "learning_rate": 1.7532467532467535e-05, "loss": 0.0646, "step": 3000 }, { "epoch": 13.0, "eval_accuracy": 0.6891304347826087, "eval_loss": 1.7946357727050781, "eval_macro-f1": 0.648332711071471, "eval_macro-precision": 0.6479765490771864, "eval_macro-recall": 0.6515217848664077, "eval_runtime": 7.3066, "eval_samples_per_second": 125.913, "eval_steps_per_second": 3.969, "eval_weighted-f1": 0.6915983518325557, "eval_weighted-precision": 0.6986485129748002, "eval_weighted-recall": 0.6891304347826087, "step": 3003 }, { "epoch": 14.0, "eval_accuracy": 0.7, "eval_loss": 1.7724699974060059, "eval_macro-f1": 0.6611545464384188, "eval_macro-precision": 0.667409141168159, "eval_macro-recall": 0.6627804433172214, "eval_runtime": 7.2494, "eval_samples_per_second": 126.906, "eval_steps_per_second": 4.0, "eval_weighted-f1": 0.7033455944346818, "eval_weighted-precision": 0.7140252489602517, "eval_weighted-recall": 0.7, "step": 3234 }, { "epoch": 15.0, "eval_accuracy": 0.6923913043478261, "eval_loss": 1.819846510887146, "eval_macro-f1": 0.6556012492821643, "eval_macro-precision": 0.6602686382879858, "eval_macro-recall": 0.6668664107682606, "eval_runtime": 7.2775, "eval_samples_per_second": 126.418, "eval_steps_per_second": 3.985, "eval_weighted-f1": 0.6965952163097968, "eval_weighted-precision": 0.7083445248462037, "eval_weighted-recall": 0.6923913043478261, "step": 3465 }, { "epoch": 15.15, "learning_rate": 1.2121212121212122e-05, "loss": 0.042, "step": 3500 }, { "epoch": 16.0, "eval_accuracy": 0.6945652173913044, "eval_loss": 1.784122109413147, "eval_macro-f1": 0.6563585998978742, "eval_macro-precision": 0.6655291218706761, "eval_macro-recall": 0.6534120619783158, "eval_runtime": 7.297, "eval_samples_per_second": 126.08, "eval_steps_per_second": 3.974, "eval_weighted-f1": 0.6949462526633576, "eval_weighted-precision": 0.6993999521302994, "eval_weighted-recall": 0.6945652173913044, "step": 3696 }, { "epoch": 17.0, "eval_accuracy": 0.6945652173913044, "eval_loss": 1.7921020984649658, "eval_macro-f1": 0.654978142271046, "eval_macro-precision": 0.6614421486999998, "eval_macro-recall": 0.657140253465508, "eval_runtime": 7.359, "eval_samples_per_second": 125.018, "eval_steps_per_second": 3.941, "eval_weighted-f1": 0.6972072799287201, "eval_weighted-precision": 0.705999712282921, "eval_weighted-recall": 0.6945652173913044, "step": 3927 }, { "epoch": 17.32, "learning_rate": 6.709956709956711e-06, "loss": 0.0314, "step": 4000 }, { "epoch": 18.0, "eval_accuracy": 0.6945652173913044, "eval_loss": 1.824020266532898, "eval_macro-f1": 0.6548614235086001, "eval_macro-precision": 0.6544996322588115, "eval_macro-recall": 0.660921203092836, "eval_runtime": 7.3304, "eval_samples_per_second": 125.504, "eval_steps_per_second": 3.956, "eval_weighted-f1": 0.6960752624759597, "eval_weighted-precision": 0.7018632204313372, "eval_weighted-recall": 0.6945652173913044, "step": 4158 }, { "epoch": 19.0, "eval_accuracy": 0.6923913043478261, "eval_loss": 1.8412573337554932, "eval_macro-f1": 0.6506506908222951, "eval_macro-precision": 0.6468541951851238, "eval_macro-recall": 0.6600299174294355, "eval_runtime": 7.2351, "eval_samples_per_second": 127.157, "eval_steps_per_second": 4.008, "eval_weighted-f1": 0.69621624934211, "eval_weighted-precision": 0.7054368084525598, "eval_weighted-recall": 0.6923913043478261, "step": 4389 }, { "epoch": 19.48, "learning_rate": 1.2987012987012988e-06, "loss": 0.0233, "step": 4500 }, { "epoch": 20.0, "eval_accuracy": 0.691304347826087, "eval_loss": 1.8325966596603394, "eval_macro-f1": 0.6463398836192601, "eval_macro-precision": 0.6403230244612891, "eval_macro-recall": 0.6567581821874694, "eval_runtime": 7.2741, "eval_samples_per_second": 126.476, "eval_steps_per_second": 3.987, "eval_weighted-f1": 0.6940975925997279, "eval_weighted-precision": 0.7008420722361637, "eval_weighted-recall": 0.691304347826087, "step": 4620 } ], "logging_steps": 500, "max_steps": 4620, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 3.87544755290112e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }