{ "best_metric": 0.6057453163676313, "best_model_checkpoint": "../saved_model/tibetan-bert_tncc-title_v3/checkpoint-4640", "epoch": 20.0, "eval_steps": 500, "global_step": 4640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5717367853290184, "eval_loss": 1.3059929609298706, "eval_macro-f1": 0.4599932337899495, "eval_macro-precision": 0.5198165980322329, "eval_macro-recall": 0.47524513594607626, "eval_runtime": 7.3569, "eval_samples_per_second": 126.004, "eval_steps_per_second": 3.942, "eval_weighted-f1": 0.5542294574300444, "eval_weighted-precision": 0.591566389333376, "eval_weighted-recall": 0.5717367853290184, "step": 232 }, { "epoch": 2.0, "eval_accuracy": 0.6148867313915858, "eval_loss": 1.2135106325149536, "eval_macro-f1": 0.5584398853408242, "eval_macro-precision": 0.5749611619851209, "eval_macro-recall": 0.5695895916407867, "eval_runtime": 7.3464, "eval_samples_per_second": 126.185, "eval_steps_per_second": 3.948, "eval_weighted-f1": 0.608666635280638, "eval_weighted-precision": 0.6187269920143599, "eval_weighted-recall": 0.6148867313915858, "step": 464 }, { "epoch": 2.16, "learning_rate": 4.461206896551724e-05, "loss": 1.2716, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.6170442286947141, "eval_loss": 1.2330070734024048, "eval_macro-f1": 0.5516603706806994, "eval_macro-precision": 0.575159910949547, "eval_macro-recall": 0.5503575961333385, "eval_runtime": 7.2744, "eval_samples_per_second": 127.433, "eval_steps_per_second": 3.987, "eval_weighted-f1": 0.6100508919269417, "eval_weighted-precision": 0.6247775875728347, "eval_weighted-recall": 0.6170442286947141, "step": 696 }, { "epoch": 4.0, "eval_accuracy": 0.6192017259978425, "eval_loss": 1.3646526336669922, "eval_macro-f1": 0.5686376693802021, "eval_macro-precision": 0.5941264267230656, "eval_macro-recall": 0.5623900177716177, "eval_runtime": 7.3362, "eval_samples_per_second": 126.359, "eval_steps_per_second": 3.953, "eval_weighted-f1": 0.6165925170578278, "eval_weighted-precision": 0.6295963103897588, "eval_weighted-recall": 0.6192017259978425, "step": 928 }, { "epoch": 4.31, "learning_rate": 3.922413793103448e-05, "loss": 0.5952, "step": 1000 }, { "epoch": 5.0, "eval_accuracy": 0.6256742179072277, "eval_loss": 1.435981035232544, "eval_macro-f1": 0.5864237155073179, "eval_macro-precision": 0.641734562145522, "eval_macro-recall": 0.5708896457267656, "eval_runtime": 7.3366, "eval_samples_per_second": 126.353, "eval_steps_per_second": 3.953, "eval_weighted-f1": 0.6206257136654274, "eval_weighted-precision": 0.6340957047257687, "eval_weighted-recall": 0.6256742179072277, "step": 1160 }, { "epoch": 6.0, "eval_accuracy": 0.6213592233009708, "eval_loss": 1.639809489250183, "eval_macro-f1": 0.5665517506857142, "eval_macro-precision": 0.579234133027197, "eval_macro-recall": 0.5645134686065383, "eval_runtime": 7.3778, "eval_samples_per_second": 125.647, "eval_steps_per_second": 3.931, "eval_weighted-f1": 0.61941625450029, "eval_weighted-precision": 0.6256700095561613, "eval_weighted-recall": 0.6213592233009708, "step": 1392 }, { "epoch": 6.47, "learning_rate": 3.383620689655172e-05, "loss": 0.259, "step": 1500 }, { "epoch": 7.0, "eval_accuracy": 0.6127292340884574, "eval_loss": 1.7420923709869385, "eval_macro-f1": 0.5600620804632168, "eval_macro-precision": 0.580446830474752, "eval_macro-recall": 0.5638504244142538, "eval_runtime": 7.3417, "eval_samples_per_second": 126.265, "eval_steps_per_second": 3.95, "eval_weighted-f1": 0.609536126843797, "eval_weighted-precision": 0.6248423019885316, "eval_weighted-recall": 0.6127292340884574, "step": 1624 }, { "epoch": 8.0, "eval_accuracy": 0.6138079827400216, "eval_loss": 1.8710674047470093, "eval_macro-f1": 0.5822983345441143, "eval_macro-precision": 0.5948063750427987, "eval_macro-recall": 0.5794954217168365, "eval_runtime": 7.3262, "eval_samples_per_second": 126.532, "eval_steps_per_second": 3.958, "eval_weighted-f1": 0.6125939186272255, "eval_weighted-precision": 0.6180044167574804, "eval_weighted-recall": 0.6138079827400216, "step": 1856 }, { "epoch": 8.62, "learning_rate": 2.844827586206897e-05, "loss": 0.1433, "step": 2000 }, { "epoch": 9.0, "eval_accuracy": 0.6084142394822006, "eval_loss": 1.9591827392578125, "eval_macro-f1": 0.5632816067487086, "eval_macro-precision": 0.5909968207291227, "eval_macro-recall": 0.5509900757560654, "eval_runtime": 7.3577, "eval_samples_per_second": 125.991, "eval_steps_per_second": 3.941, "eval_weighted-f1": 0.6058400058395833, "eval_weighted-precision": 0.615382962851156, "eval_weighted-recall": 0.6084142394822006, "step": 2088 }, { "epoch": 10.0, "eval_accuracy": 0.627831715210356, "eval_loss": 1.9844281673431396, "eval_macro-f1": 0.5720261323200381, "eval_macro-precision": 0.6115468809046185, "eval_macro-recall": 0.5521363552960614, "eval_runtime": 7.2941, "eval_samples_per_second": 127.089, "eval_steps_per_second": 3.976, "eval_weighted-f1": 0.6186046402157037, "eval_weighted-precision": 0.6241715352593074, "eval_weighted-recall": 0.627831715210356, "step": 2320 }, { "epoch": 10.78, "learning_rate": 2.306034482758621e-05, "loss": 0.0918, "step": 2500 }, { "epoch": 11.0, "eval_accuracy": 0.622437971952535, "eval_loss": 2.121650218963623, "eval_macro-f1": 0.575328889968513, "eval_macro-precision": 0.6046131450610978, "eval_macro-recall": 0.5644507417595815, "eval_runtime": 7.3602, "eval_samples_per_second": 125.948, "eval_steps_per_second": 3.94, "eval_weighted-f1": 0.6210201377968305, "eval_weighted-precision": 0.6318263677896466, "eval_weighted-recall": 0.622437971952535, "step": 2552 }, { "epoch": 12.0, "eval_accuracy": 0.6148867313915858, "eval_loss": 2.1600427627563477, "eval_macro-f1": 0.5634622559142987, "eval_macro-precision": 0.5755339049051247, "eval_macro-recall": 0.5624852754080202, "eval_runtime": 7.3312, "eval_samples_per_second": 126.446, "eval_steps_per_second": 3.956, "eval_weighted-f1": 0.611920153364688, "eval_weighted-precision": 0.6153916861013311, "eval_weighted-recall": 0.6148867313915858, "step": 2784 }, { "epoch": 12.93, "learning_rate": 1.767241379310345e-05, "loss": 0.0677, "step": 3000 }, { "epoch": 13.0, "eval_accuracy": 0.627831715210356, "eval_loss": 2.1390113830566406, "eval_macro-f1": 0.5765388673761228, "eval_macro-precision": 0.5891444176758638, "eval_macro-recall": 0.5793730614376024, "eval_runtime": 7.2808, "eval_samples_per_second": 127.321, "eval_steps_per_second": 3.983, "eval_weighted-f1": 0.6268062992828414, "eval_weighted-precision": 0.6385775273222236, "eval_weighted-recall": 0.627831715210356, "step": 3016 }, { "epoch": 14.0, "eval_accuracy": 0.6440129449838188, "eval_loss": 2.148944854736328, "eval_macro-f1": 0.6041323498671228, "eval_macro-precision": 0.6330168044904465, "eval_macro-recall": 0.5906969006063283, "eval_runtime": 7.3225, "eval_samples_per_second": 126.597, "eval_steps_per_second": 3.96, "eval_weighted-f1": 0.6385550648663262, "eval_weighted-precision": 0.645638414060538, "eval_weighted-recall": 0.6440129449838188, "step": 3248 }, { "epoch": 15.0, "eval_accuracy": 0.6353829557713053, "eval_loss": 2.176727056503296, "eval_macro-f1": 0.5855092053952029, "eval_macro-precision": 0.6205452521876899, "eval_macro-recall": 0.5749431187067001, "eval_runtime": 7.2634, "eval_samples_per_second": 127.627, "eval_steps_per_second": 3.993, "eval_weighted-f1": 0.6299837224200732, "eval_weighted-precision": 0.6440851231945072, "eval_weighted-recall": 0.6353829557713053, "step": 3480 }, { "epoch": 15.09, "learning_rate": 1.228448275862069e-05, "loss": 0.0481, "step": 3500 }, { "epoch": 16.0, "eval_accuracy": 0.6343042071197411, "eval_loss": 2.2005436420440674, "eval_macro-f1": 0.5953777118862926, "eval_macro-precision": 0.6279439331751326, "eval_macro-recall": 0.5827314498455615, "eval_runtime": 7.3585, "eval_samples_per_second": 125.977, "eval_steps_per_second": 3.941, "eval_weighted-f1": 0.6295400705432875, "eval_weighted-precision": 0.6407972206921422, "eval_weighted-recall": 0.6343042071197411, "step": 3712 }, { "epoch": 17.0, "eval_accuracy": 0.6353829557713053, "eval_loss": 2.2075998783111572, "eval_macro-f1": 0.5922748273767572, "eval_macro-precision": 0.6185327701706522, "eval_macro-recall": 0.5831902307851583, "eval_runtime": 7.315, "eval_samples_per_second": 126.726, "eval_steps_per_second": 3.964, "eval_weighted-f1": 0.6292221063855338, "eval_weighted-precision": 0.640744637974118, "eval_weighted-recall": 0.6353829557713053, "step": 3944 }, { "epoch": 17.24, "learning_rate": 6.896551724137932e-06, "loss": 0.0344, "step": 4000 }, { "epoch": 18.0, "eval_accuracy": 0.639697950377562, "eval_loss": 2.186396598815918, "eval_macro-f1": 0.5937344000879472, "eval_macro-precision": 0.6082172147504935, "eval_macro-recall": 0.586912298660763, "eval_runtime": 7.2683, "eval_samples_per_second": 127.54, "eval_steps_per_second": 3.99, "eval_weighted-f1": 0.6356937811215412, "eval_weighted-precision": 0.6386402558809051, "eval_weighted-recall": 0.639697950377562, "step": 4176 }, { "epoch": 19.0, "eval_accuracy": 0.6461704422869471, "eval_loss": 2.172349691390991, "eval_macro-f1": 0.6048666691904897, "eval_macro-precision": 0.6140548549534073, "eval_macro-recall": 0.6011637605520835, "eval_runtime": 7.3386, "eval_samples_per_second": 126.318, "eval_steps_per_second": 3.952, "eval_weighted-f1": 0.6432377710619339, "eval_weighted-precision": 0.6448782718814935, "eval_weighted-recall": 0.6461704422869471, "step": 4408 }, { "epoch": 19.4, "learning_rate": 1.5086206896551726e-06, "loss": 0.0272, "step": 4500 }, { "epoch": 20.0, "eval_accuracy": 0.6461704422869471, "eval_loss": 2.182253837585449, "eval_macro-f1": 0.6057453163676313, "eval_macro-precision": 0.6251264122566944, "eval_macro-recall": 0.5956135892129149, "eval_runtime": 7.2862, "eval_samples_per_second": 127.227, "eval_steps_per_second": 3.98, "eval_weighted-f1": 0.6423371190506697, "eval_weighted-precision": 0.645041986434426, "eval_weighted-recall": 0.6461704422869471, "step": 4640 } ], "logging_steps": 500, "max_steps": 4640, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 3.90597117566976e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }