|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 12240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9183006535947716e-05, |
|
"loss": 0.4329, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9725273278019229, |
|
"eval_f1": 0.9629807384654764, |
|
"eval_loss": 0.10430943965911865, |
|
"eval_precision": 0.9637516411028211, |
|
"eval_recall": 0.9622110681287765, |
|
"eval_runtime": 2.936, |
|
"eval_samples_per_second": 494.214, |
|
"eval_steps_per_second": 30.995, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.8366013071895427e-05, |
|
"loss": 0.0811, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.7549019607843138e-05, |
|
"loss": 0.0607, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9751349927564862, |
|
"eval_f1": 0.9635150914122107, |
|
"eval_loss": 0.09400150179862976, |
|
"eval_precision": 0.9641165172855314, |
|
"eval_recall": 0.9629144154224879, |
|
"eval_runtime": 2.7493, |
|
"eval_samples_per_second": 527.777, |
|
"eval_steps_per_second": 33.1, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.6732026143790852e-05, |
|
"loss": 0.0428, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9755037534571316, |
|
"eval_f1": 0.9632957125043962, |
|
"eval_loss": 0.09555820375680923, |
|
"eval_precision": 0.9633573141486811, |
|
"eval_recall": 0.9632341187378113, |
|
"eval_runtime": 2.7415, |
|
"eval_samples_per_second": 529.269, |
|
"eval_steps_per_second": 33.193, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.5915032679738563e-05, |
|
"loss": 0.0363, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.5098039215686276e-05, |
|
"loss": 0.0249, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9765046753588832, |
|
"eval_f1": 0.9649540551338394, |
|
"eval_loss": 0.0989251509308815, |
|
"eval_precision": 0.9646919734151329, |
|
"eval_recall": 0.9652162792928163, |
|
"eval_runtime": 2.7759, |
|
"eval_samples_per_second": 522.708, |
|
"eval_steps_per_second": 32.782, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.4281045751633989e-05, |
|
"loss": 0.0221, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.3464052287581701e-05, |
|
"loss": 0.0177, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.976451995258791, |
|
"eval_f1": 0.9646675296035285, |
|
"eval_loss": 0.10913769155740738, |
|
"eval_precision": 0.9643747204294204, |
|
"eval_recall": 0.9649605166405576, |
|
"eval_runtime": 3.7551, |
|
"eval_samples_per_second": 386.403, |
|
"eval_steps_per_second": 24.233, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 1.2647058823529412e-05, |
|
"loss": 0.0129, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9780060582115107, |
|
"eval_f1": 0.9675017575254042, |
|
"eval_loss": 0.11124212294816971, |
|
"eval_precision": 0.967038231818327, |
|
"eval_recall": 0.9679657278045973, |
|
"eval_runtime": 3.8249, |
|
"eval_samples_per_second": 379.357, |
|
"eval_steps_per_second": 23.792, |
|
"step": 4896 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 1.1830065359477125e-05, |
|
"loss": 0.0119, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.1013071895424838e-05, |
|
"loss": 0.0084, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9783221388120638, |
|
"eval_f1": 0.9677996994212259, |
|
"eval_loss": 0.12139276415109634, |
|
"eval_precision": 0.967985415933732, |
|
"eval_recall": 0.9676140541577416, |
|
"eval_runtime": 3.747, |
|
"eval_samples_per_second": 387.246, |
|
"eval_steps_per_second": 24.286, |
|
"step": 5712 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.0196078431372549e-05, |
|
"loss": 0.0076, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 9.379084967320261e-06, |
|
"loss": 0.0061, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9775319373106809, |
|
"eval_f1": 0.9664444551020734, |
|
"eval_loss": 0.13343702256679535, |
|
"eval_precision": 0.9665217113257019, |
|
"eval_recall": 0.9663672112279804, |
|
"eval_runtime": 2.7649, |
|
"eval_samples_per_second": 524.787, |
|
"eval_steps_per_second": 32.912, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 8.562091503267974e-06, |
|
"loss": 0.0048, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9775055972606348, |
|
"eval_f1": 0.9661577811525858, |
|
"eval_loss": 0.13892702758312225, |
|
"eval_precision": 0.9662041181736795, |
|
"eval_recall": 0.9661114485757217, |
|
"eval_runtime": 3.8371, |
|
"eval_samples_per_second": 378.146, |
|
"eval_steps_per_second": 23.716, |
|
"step": 7344 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 7.745098039215687e-06, |
|
"loss": 0.0047, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 6.928104575163399e-06, |
|
"loss": 0.0044, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9781640985117872, |
|
"eval_f1": 0.967271564473579, |
|
"eval_loss": 0.14489226043224335, |
|
"eval_precision": 0.9674726540011515, |
|
"eval_recall": 0.9670705585216919, |
|
"eval_runtime": 3.7406, |
|
"eval_samples_per_second": 387.909, |
|
"eval_steps_per_second": 24.328, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.0034, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9779797181614646, |
|
"eval_f1": 0.9672149490880608, |
|
"eval_loss": 0.14611582458019257, |
|
"eval_precision": 0.9671994884910486, |
|
"eval_recall": 0.9672304101793535, |
|
"eval_runtime": 2.8969, |
|
"eval_samples_per_second": 500.886, |
|
"eval_steps_per_second": 31.413, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 5.294117647058824e-06, |
|
"loss": 0.0034, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 4.477124183006537e-06, |
|
"loss": 0.0025, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9784274990122481, |
|
"eval_f1": 0.9676501159350764, |
|
"eval_loss": 0.14966140687465668, |
|
"eval_precision": 0.9680061428205784, |
|
"eval_recall": 0.9672943508424182, |
|
"eval_runtime": 2.7972, |
|
"eval_samples_per_second": 518.731, |
|
"eval_steps_per_second": 32.532, |
|
"step": 9792 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 3.6601307189542484e-06, |
|
"loss": 0.0022, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"learning_rate": 2.843137254901961e-06, |
|
"loss": 0.0024, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.977848017911234, |
|
"eval_f1": 0.9670199670679264, |
|
"eval_loss": 0.15780866146087646, |
|
"eval_precision": 0.9670972692971798, |
|
"eval_recall": 0.9669426771955625, |
|
"eval_runtime": 3.8111, |
|
"eval_samples_per_second": 380.734, |
|
"eval_steps_per_second": 23.878, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 2.0261437908496734e-06, |
|
"loss": 0.0014, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.978111418411695, |
|
"eval_f1": 0.9674521213671389, |
|
"eval_loss": 0.15866732597351074, |
|
"eval_precision": 0.9675139888089528, |
|
"eval_recall": 0.9673902618370153, |
|
"eval_runtime": 2.8382, |
|
"eval_samples_per_second": 511.243, |
|
"eval_steps_per_second": 32.063, |
|
"step": 11424 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 1.2091503267973858e-06, |
|
"loss": 0.0019, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 3.921568627450981e-07, |
|
"loss": 0.0016, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.978401158962202, |
|
"eval_f1": 0.96785434549785, |
|
"eval_loss": 0.15796583890914917, |
|
"eval_precision": 0.9678388746803069, |
|
"eval_recall": 0.9678698168100003, |
|
"eval_runtime": 3.7576, |
|
"eval_samples_per_second": 386.151, |
|
"eval_steps_per_second": 24.218, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 12240, |
|
"total_flos": 5.119363918698906e+16, |
|
"train_loss": 0.03262665262993644, |
|
"train_runtime": 785.1159, |
|
"train_samples_per_second": 249.403, |
|
"train_steps_per_second": 15.59 |
|
} |
|
], |
|
"max_steps": 12240, |
|
"num_train_epochs": 15, |
|
"total_flos": 5.119363918698906e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|