|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.0, |
|
"global_step": 61693, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.9769017554665847e-05, |
|
"loss": 0.0666, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.953803510933169e-05, |
|
"loss": 0.0447, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9307052663997538e-05, |
|
"loss": 0.0411, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.9076070218663384e-05, |
|
"loss": 0.038, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.8845087773329226e-05, |
|
"loss": 0.0346, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.8614105327995072e-05, |
|
"loss": 0.0332, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9877246731440231, |
|
"eval_f1": 0.889670288672294, |
|
"eval_loss": 0.03370480611920357, |
|
"eval_precision": 0.8513566583363085, |
|
"eval_recall": 0.9315948823127258, |
|
"eval_runtime": 50.7022, |
|
"eval_samples_per_second": 438.443, |
|
"eval_steps_per_second": 13.708, |
|
"step": 3247 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.8383122882660918e-05, |
|
"loss": 0.0291, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.8152140437326767e-05, |
|
"loss": 0.0226, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.792115799199261e-05, |
|
"loss": 0.023, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.7690175546658455e-05, |
|
"loss": 0.0235, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.74591931013243e-05, |
|
"loss": 0.023, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.7228210655990143e-05, |
|
"loss": 0.0218, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9908514749994501, |
|
"eval_f1": 0.9142371280113368, |
|
"eval_loss": 0.0262825395911932, |
|
"eval_precision": 0.8852986917939805, |
|
"eval_recall": 0.9451313604844223, |
|
"eval_runtime": 50.2363, |
|
"eval_samples_per_second": 442.508, |
|
"eval_steps_per_second": 13.835, |
|
"step": 6494 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.6997228210655992e-05, |
|
"loss": 0.0228, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.6766245765321838e-05, |
|
"loss": 0.014, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.653526331998768e-05, |
|
"loss": 0.0143, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.6304280874653526e-05, |
|
"loss": 0.0158, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.6073298429319372e-05, |
|
"loss": 0.015, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.5842315983985218e-05, |
|
"loss": 0.0142, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.5611333538651064e-05, |
|
"loss": 0.0147, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9930254150662127, |
|
"eval_f1": 0.9280089446645269, |
|
"eval_loss": 0.024331996217370033, |
|
"eval_precision": 0.9160101990333752, |
|
"eval_recall": 0.9403262037308331, |
|
"eval_runtime": 50.2727, |
|
"eval_samples_per_second": 442.188, |
|
"eval_steps_per_second": 13.825, |
|
"step": 9741 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.538035109331691e-05, |
|
"loss": 0.0122, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.5149368647982755e-05, |
|
"loss": 0.0103, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 2.4918386202648598e-05, |
|
"loss": 0.0095, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.4687403757314447e-05, |
|
"loss": 0.0111, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.4456421311980292e-05, |
|
"loss": 0.0099, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.4225438866646135e-05, |
|
"loss": 0.0097, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9936326074754469, |
|
"eval_f1": 0.9362323294406885, |
|
"eval_loss": 0.027174057438969612, |
|
"eval_precision": 0.920862222054295, |
|
"eval_recall": 0.9521242308819221, |
|
"eval_runtime": 50.3309, |
|
"eval_samples_per_second": 441.677, |
|
"eval_steps_per_second": 13.809, |
|
"step": 12988 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.399445642131198e-05, |
|
"loss": 0.0102, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 2.3763473975977826e-05, |
|
"loss": 0.0076, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.3532491530643672e-05, |
|
"loss": 0.007, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 2.3301509085309518e-05, |
|
"loss": 0.007, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 2.3070526639975364e-05, |
|
"loss": 0.0082, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.2839544194641206e-05, |
|
"loss": 0.0079, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.2608561749307052e-05, |
|
"loss": 0.0077, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9943201214002335, |
|
"eval_f1": 0.9403489135049596, |
|
"eval_loss": 0.023451806977391243, |
|
"eval_precision": 0.929177551915486, |
|
"eval_recall": 0.9517921672038285, |
|
"eval_runtime": 50.2007, |
|
"eval_samples_per_second": 442.822, |
|
"eval_steps_per_second": 13.844, |
|
"step": 16235 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.2377579303972898e-05, |
|
"loss": 0.0064, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 2.2146596858638743e-05, |
|
"loss": 0.0052, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 2.191561441330459e-05, |
|
"loss": 0.0055, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 2.1684631967970435e-05, |
|
"loss": 0.0061, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 2.145364952263628e-05, |
|
"loss": 0.0056, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 2.1222667077302123e-05, |
|
"loss": 0.0054, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.994450643863006, |
|
"eval_f1": 0.9450084195333173, |
|
"eval_loss": 0.0263934638351202, |
|
"eval_precision": 0.9312535558505595, |
|
"eval_recall": 0.9591757007520265, |
|
"eval_runtime": 50.2902, |
|
"eval_samples_per_second": 442.035, |
|
"eval_steps_per_second": 13.82, |
|
"step": 19482 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 2.0991684631967972e-05, |
|
"loss": 0.0062, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 2.0760702186633818e-05, |
|
"loss": 0.0033, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 2.052971974129966e-05, |
|
"loss": 0.0042, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 2.0298737295965506e-05, |
|
"loss": 0.0042, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 2.0067754850631352e-05, |
|
"loss": 0.0045, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.9836772405297198e-05, |
|
"loss": 0.0039, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 1.9605789959963044e-05, |
|
"loss": 0.0043, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9948756785494699, |
|
"eval_f1": 0.9503865101154663, |
|
"eval_loss": 0.026261983439326286, |
|
"eval_precision": 0.9473980473223471, |
|
"eval_recall": 0.9533938861216916, |
|
"eval_runtime": 50.3703, |
|
"eval_samples_per_second": 441.332, |
|
"eval_steps_per_second": 13.798, |
|
"step": 22729 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.937480751462889e-05, |
|
"loss": 0.004, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 1.914382506929473e-05, |
|
"loss": 0.0029, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 1.8912842623960577e-05, |
|
"loss": 0.0031, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.8681860178626427e-05, |
|
"loss": 0.0034, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 1.8450877733292272e-05, |
|
"loss": 0.0037, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 1.8219895287958115e-05, |
|
"loss": 0.0036, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.994658141111516, |
|
"eval_f1": 0.9494068997650896, |
|
"eval_loss": 0.02981843240559101, |
|
"eval_precision": 0.943654361962835, |
|
"eval_recall": 0.9552300029299736, |
|
"eval_runtime": 50.1982, |
|
"eval_samples_per_second": 442.845, |
|
"eval_steps_per_second": 13.845, |
|
"step": 25976 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.798891284262396e-05, |
|
"loss": 0.0036, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 1.7757930397289806e-05, |
|
"loss": 0.0027, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 1.7526947951955652e-05, |
|
"loss": 0.0026, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 1.7295965506621498e-05, |
|
"loss": 0.0029, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 1.7064983061287344e-05, |
|
"loss": 0.0028, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 1.6834000615953186e-05, |
|
"loss": 0.0029, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 1.6603018170619032e-05, |
|
"loss": 0.0027, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9942837854765313, |
|
"eval_f1": 0.9483075157773954, |
|
"eval_loss": 0.03117133490741253, |
|
"eval_precision": 0.9288564203427929, |
|
"eval_recall": 0.9685906826838558, |
|
"eval_runtime": 50.2531, |
|
"eval_samples_per_second": 442.361, |
|
"eval_steps_per_second": 13.83, |
|
"step": 29223 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 1.637203572528488e-05, |
|
"loss": 0.0028, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 1.6141053279950723e-05, |
|
"loss": 0.0019, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 1.591007083461657e-05, |
|
"loss": 0.0022, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 1.5679088389282415e-05, |
|
"loss": 0.0024, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 1.544810594394826e-05, |
|
"loss": 0.0024, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 1.5217123498614105e-05, |
|
"loss": 0.0023, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9948642040472482, |
|
"eval_f1": 0.9512794425354655, |
|
"eval_loss": 0.036810312420129776, |
|
"eval_precision": 0.935158800551037, |
|
"eval_recall": 0.9679656216427386, |
|
"eval_runtime": 50.4106, |
|
"eval_samples_per_second": 440.978, |
|
"eval_steps_per_second": 13.787, |
|
"step": 32470 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 1.4986141053279952e-05, |
|
"loss": 0.0024, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 1.4755158607945796e-05, |
|
"loss": 0.0018, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 1.4524176162611642e-05, |
|
"loss": 0.0018, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"learning_rate": 1.4293193717277488e-05, |
|
"loss": 0.002, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 1.4062211271943332e-05, |
|
"loss": 0.0024, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 1.383122882660918e-05, |
|
"loss": 0.002, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 1.3600246381275023e-05, |
|
"loss": 0.0019, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9952223041374186, |
|
"eval_f1": 0.9571868114802629, |
|
"eval_loss": 0.034278471022844315, |
|
"eval_precision": 0.9512868001697727, |
|
"eval_recall": 0.9631604648891493, |
|
"eval_runtime": 50.1571, |
|
"eval_samples_per_second": 443.208, |
|
"eval_steps_per_second": 13.856, |
|
"step": 35717 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 1.3369263935940867e-05, |
|
"loss": 0.0018, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 1.3138281490606715e-05, |
|
"loss": 0.0014, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 1.2907299045272559e-05, |
|
"loss": 0.0016, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 1.2676316599938406e-05, |
|
"loss": 0.0016, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 1.244533415460425e-05, |
|
"loss": 0.0014, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 1.2214351709270095e-05, |
|
"loss": 0.0018, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9950344091635375, |
|
"eval_f1": 0.9542591914950985, |
|
"eval_loss": 0.03573078662157059, |
|
"eval_precision": 0.941064747108317, |
|
"eval_recall": 0.9678288895399941, |
|
"eval_runtime": 50.2611, |
|
"eval_samples_per_second": 442.291, |
|
"eval_steps_per_second": 13.828, |
|
"step": 38964 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 1.1983369263935942e-05, |
|
"loss": 0.0015, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 1.1752386818601786e-05, |
|
"loss": 0.0014, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 1.1521404373267632e-05, |
|
"loss": 0.0016, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 1.1290421927933478e-05, |
|
"loss": 0.0017, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 1.1059439482599322e-05, |
|
"loss": 0.0011, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 1.082845703726517e-05, |
|
"loss": 0.0014, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 1.0597474591931013e-05, |
|
"loss": 0.0014, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9954953016694444, |
|
"eval_f1": 0.9582205552548141, |
|
"eval_loss": 0.034824222326278687, |
|
"eval_precision": 0.9485197022180545, |
|
"eval_recall": 0.9681218869030179, |
|
"eval_runtime": 50.5254, |
|
"eval_samples_per_second": 439.977, |
|
"eval_steps_per_second": 13.755, |
|
"step": 42211 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 1.0366492146596857e-05, |
|
"loss": 0.0013, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 1.0135509701262705e-05, |
|
"loss": 0.001, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 9.904527255928549e-06, |
|
"loss": 0.0011, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"learning_rate": 9.673544810594396e-06, |
|
"loss": 0.0012, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 9.44256236526024e-06, |
|
"loss": 0.0013, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 9.211579919926085e-06, |
|
"loss": 0.0012, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.99525290281001, |
|
"eval_f1": 0.9559630182389084, |
|
"eval_loss": 0.03898163139820099, |
|
"eval_precision": 0.9486449577811544, |
|
"eval_recall": 0.9633948627795683, |
|
"eval_runtime": 50.2226, |
|
"eval_samples_per_second": 442.629, |
|
"eval_steps_per_second": 13.838, |
|
"step": 45458 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 8.980597474591932e-06, |
|
"loss": 0.0012, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 8.749615029257776e-06, |
|
"loss": 0.0007, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 8.518632583923622e-06, |
|
"loss": 0.0009, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 8.287650138589468e-06, |
|
"loss": 0.0013, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 8.056667693255312e-06, |
|
"loss": 0.0012, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 7.82568524792116e-06, |
|
"loss": 0.001, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 7.594702802587003e-06, |
|
"loss": 0.001, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9952519466014915, |
|
"eval_f1": 0.9577630980820753, |
|
"eval_loss": 0.04225644841790199, |
|
"eval_precision": 0.9485612475573777, |
|
"eval_recall": 0.9671452290262721, |
|
"eval_runtime": 56.1286, |
|
"eval_samples_per_second": 396.055, |
|
"eval_steps_per_second": 12.382, |
|
"step": 48705 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 7.363720357252849e-06, |
|
"loss": 0.0009, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 7.132737911918695e-06, |
|
"loss": 0.0008, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 6.90175546658454e-06, |
|
"loss": 0.0009, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"learning_rate": 6.6707730212503855e-06, |
|
"loss": 0.0008, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 6.43979057591623e-06, |
|
"loss": 0.0007, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 6.208808130582076e-06, |
|
"loss": 0.0008, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9950430150402038, |
|
"eval_f1": 0.9558787995940656, |
|
"eval_loss": 0.04262382909655571, |
|
"eval_precision": 0.9460493590969964, |
|
"eval_recall": 0.9659146401015725, |
|
"eval_runtime": 50.4693, |
|
"eval_samples_per_second": 440.466, |
|
"eval_steps_per_second": 13.771, |
|
"step": 51952 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 5.977825685247921e-06, |
|
"loss": 0.0008, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 5.746843239913767e-06, |
|
"loss": 0.0008, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 5.515860794579612e-06, |
|
"loss": 0.0008, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"learning_rate": 5.2848783492454576e-06, |
|
"loss": 0.0009, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 5.0538959039113025e-06, |
|
"loss": 0.0008, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"learning_rate": 4.822913458577148e-06, |
|
"loss": 0.0006, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 4.591931013242994e-06, |
|
"loss": 0.001, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9952113077394561, |
|
"eval_f1": 0.9582838092477171, |
|
"eval_loss": 0.04263555258512497, |
|
"eval_precision": 0.9482501434308663, |
|
"eval_recall": 0.9685320832112511, |
|
"eval_runtime": 50.1705, |
|
"eval_samples_per_second": 443.089, |
|
"eval_steps_per_second": 13.853, |
|
"step": 55199 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 4.360948567908839e-06, |
|
"loss": 0.0008, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 4.129966122574684e-06, |
|
"loss": 0.0008, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 3.89898367724053e-06, |
|
"loss": 0.0008, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 3.668001231906375e-06, |
|
"loss": 0.0007, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 3.4370187865722208e-06, |
|
"loss": 0.0007, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 3.206036341238066e-06, |
|
"loss": 0.0006, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9952199136161224, |
|
"eval_f1": 0.9574789135649616, |
|
"eval_loss": 0.045220062136650085, |
|
"eval_precision": 0.9483607656785913, |
|
"eval_recall": 0.9667740990331087, |
|
"eval_runtime": 50.3784, |
|
"eval_samples_per_second": 441.26, |
|
"eval_steps_per_second": 13.796, |
|
"step": 58446 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 2.9750538959039115e-06, |
|
"loss": 0.0007, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 18.17, |
|
"learning_rate": 2.744071450569757e-06, |
|
"loss": 0.0007, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 2.513089005235602e-06, |
|
"loss": 0.0006, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 18.48, |
|
"learning_rate": 2.2821065599014475e-06, |
|
"loss": 0.0006, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 2.051124114567293e-06, |
|
"loss": 0.0007, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 1.8201416692331382e-06, |
|
"loss": 0.0007, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 1.5891592238989836e-06, |
|
"loss": 0.0006, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9954895644183336, |
|
"eval_f1": 0.959768669851647, |
|
"eval_loss": 0.046882398426532745, |
|
"eval_precision": 0.9504664125500412, |
|
"eval_recall": 0.969254810040043, |
|
"eval_runtime": 50.5066, |
|
"eval_samples_per_second": 440.141, |
|
"eval_steps_per_second": 13.761, |
|
"step": 61693 |
|
} |
|
], |
|
"max_steps": 64940, |
|
"num_train_epochs": 20, |
|
"total_flos": 5.157312926507336e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|