{ "best_metric": 0.9738785407252287, "best_model_checkpoint": "drive/MyDrive/albertina/checkpoint-9000", "epoch": 0.8024966562639322, "global_step": 9000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.0000000000000002e-07, "loss": 0.7187, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.0000000000000003e-07, "loss": 0.7199, "step": 20 }, { "epoch": 0.0, "learning_rate": 6.000000000000001e-07, "loss": 0.6146, "step": 30 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-07, "loss": 0.6195, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.6245, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.2000000000000002e-06, "loss": 0.6241, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.4000000000000001e-06, "loss": 0.5657, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.6000000000000001e-06, "loss": 0.4948, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.8000000000000001e-06, "loss": 0.5167, "step": 90 }, { "epoch": 0.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.4677, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.2e-06, "loss": 0.4529, "step": 110 }, { "epoch": 0.01, "learning_rate": 2.4000000000000003e-06, "loss": 0.4148, "step": 120 }, { "epoch": 0.01, "learning_rate": 2.6e-06, "loss": 0.3799, "step": 130 }, { "epoch": 0.01, "learning_rate": 2.8000000000000003e-06, "loss": 0.3091, "step": 140 }, { "epoch": 0.01, "learning_rate": 3e-06, "loss": 0.3131, "step": 150 }, { "epoch": 0.01, "learning_rate": 3.2000000000000003e-06, "loss": 0.2855, "step": 160 }, { "epoch": 0.02, "learning_rate": 3.4000000000000005e-06, "loss": 0.2601, "step": 170 }, { "epoch": 0.02, "learning_rate": 3.6000000000000003e-06, "loss": 0.193, "step": 180 }, { "epoch": 0.02, "learning_rate": 3.8000000000000005e-06, "loss": 0.2041, "step": 190 }, { "epoch": 0.02, "learning_rate": 4.000000000000001e-06, "loss": 0.326, "step": 200 }, { "epoch": 0.02, "learning_rate": 4.2000000000000004e-06, "loss": 0.1665, "step": 210 }, { "epoch": 0.02, "learning_rate": 4.4e-06, "loss": 0.2885, "step": 220 }, { "epoch": 0.02, "learning_rate": 4.600000000000001e-06, "loss": 0.2399, "step": 230 }, { "epoch": 0.02, "learning_rate": 4.800000000000001e-06, "loss": 0.2109, "step": 240 }, { "epoch": 0.02, "learning_rate": 5e-06, "loss": 0.2259, "step": 250 }, { "epoch": 0.02, "learning_rate": 5.2e-06, "loss": 0.2323, "step": 260 }, { "epoch": 0.02, "learning_rate": 5.400000000000001e-06, "loss": 0.1955, "step": 270 }, { "epoch": 0.02, "learning_rate": 5.600000000000001e-06, "loss": 0.1765, "step": 280 }, { "epoch": 0.03, "learning_rate": 5.8e-06, "loss": 0.1722, "step": 290 }, { "epoch": 0.03, "learning_rate": 6e-06, "loss": 0.1648, "step": 300 }, { "epoch": 0.03, "learning_rate": 6.200000000000001e-06, "loss": 0.1617, "step": 310 }, { "epoch": 0.03, "learning_rate": 6.4000000000000006e-06, "loss": 0.1687, "step": 320 }, { "epoch": 0.03, "learning_rate": 6.600000000000001e-06, "loss": 0.1727, "step": 330 }, { "epoch": 0.03, "learning_rate": 6.800000000000001e-06, "loss": 0.2064, "step": 340 }, { "epoch": 0.03, "learning_rate": 7e-06, "loss": 0.1663, "step": 350 }, { "epoch": 0.03, "learning_rate": 7.2000000000000005e-06, "loss": 0.1032, "step": 360 }, { "epoch": 0.03, "learning_rate": 7.4e-06, "loss": 0.1592, "step": 370 }, { "epoch": 0.03, "learning_rate": 7.600000000000001e-06, "loss": 0.2606, "step": 380 }, { "epoch": 0.03, "learning_rate": 7.800000000000002e-06, "loss": 0.0709, "step": 390 }, { "epoch": 0.04, "learning_rate": 8.000000000000001e-06, "loss": 0.117, "step": 400 }, { "epoch": 0.04, "learning_rate": 8.2e-06, "loss": 0.1836, "step": 410 }, { "epoch": 0.04, "learning_rate": 8.400000000000001e-06, "loss": 0.0998, "step": 420 }, { "epoch": 0.04, "learning_rate": 8.6e-06, "loss": 0.1288, "step": 430 }, { "epoch": 0.04, "learning_rate": 8.8e-06, "loss": 0.1877, "step": 440 }, { "epoch": 0.04, "learning_rate": 9e-06, "loss": 0.1695, "step": 450 }, { "epoch": 0.04, "learning_rate": 9.200000000000002e-06, "loss": 0.1736, "step": 460 }, { "epoch": 0.04, "learning_rate": 9.4e-06, "loss": 0.195, "step": 470 }, { "epoch": 0.04, "learning_rate": 9.600000000000001e-06, "loss": 0.1599, "step": 480 }, { "epoch": 0.04, "learning_rate": 9.800000000000001e-06, "loss": 0.0596, "step": 490 }, { "epoch": 0.04, "learning_rate": 1e-05, "loss": 0.0599, "step": 500 }, { "epoch": 0.05, "learning_rate": 9.998200629779578e-06, "loss": 0.2157, "step": 510 }, { "epoch": 0.05, "learning_rate": 9.996401259559155e-06, "loss": 0.1006, "step": 520 }, { "epoch": 0.05, "learning_rate": 9.994601889338731e-06, "loss": 0.2082, "step": 530 }, { "epoch": 0.05, "learning_rate": 9.99280251911831e-06, "loss": 0.2305, "step": 540 }, { "epoch": 0.05, "learning_rate": 9.991003148897887e-06, "loss": 0.1995, "step": 550 }, { "epoch": 0.05, "learning_rate": 9.989203778677464e-06, "loss": 0.1551, "step": 560 }, { "epoch": 0.05, "learning_rate": 9.98740440845704e-06, "loss": 0.0971, "step": 570 }, { "epoch": 0.05, "learning_rate": 9.985605038236617e-06, "loss": 0.2007, "step": 580 }, { "epoch": 0.05, "learning_rate": 9.983805668016196e-06, "loss": 0.1312, "step": 590 }, { "epoch": 0.05, "learning_rate": 9.982006297795773e-06, "loss": 0.1931, "step": 600 }, { "epoch": 0.05, "learning_rate": 9.98020692757535e-06, "loss": 0.223, "step": 610 }, { "epoch": 0.06, "learning_rate": 9.978407557354927e-06, "loss": 0.3252, "step": 620 }, { "epoch": 0.06, "learning_rate": 9.976608187134503e-06, "loss": 0.0981, "step": 630 }, { "epoch": 0.06, "learning_rate": 9.97480881691408e-06, "loss": 0.1815, "step": 640 }, { "epoch": 0.06, "learning_rate": 9.973009446693657e-06, "loss": 0.1782, "step": 650 }, { "epoch": 0.06, "learning_rate": 9.971210076473236e-06, "loss": 0.1804, "step": 660 }, { "epoch": 0.06, "learning_rate": 9.969410706252813e-06, "loss": 0.1271, "step": 670 }, { "epoch": 0.06, "learning_rate": 9.96761133603239e-06, "loss": 0.1316, "step": 680 }, { "epoch": 0.06, "learning_rate": 9.965811965811966e-06, "loss": 0.2092, "step": 690 }, { "epoch": 0.06, "learning_rate": 9.964012595591543e-06, "loss": 0.0782, "step": 700 }, { "epoch": 0.06, "learning_rate": 9.962213225371122e-06, "loss": 0.2433, "step": 710 }, { "epoch": 0.06, "learning_rate": 9.960413855150699e-06, "loss": 0.1883, "step": 720 }, { "epoch": 0.07, "learning_rate": 9.958614484930275e-06, "loss": 0.032, "step": 730 }, { "epoch": 0.07, "learning_rate": 9.956815114709852e-06, "loss": 0.2181, "step": 740 }, { "epoch": 0.07, "learning_rate": 9.955015744489429e-06, "loss": 0.1828, "step": 750 }, { "epoch": 0.07, "learning_rate": 9.953216374269008e-06, "loss": 0.1206, "step": 760 }, { "epoch": 0.07, "learning_rate": 9.951417004048583e-06, "loss": 0.1253, "step": 770 }, { "epoch": 0.07, "learning_rate": 9.949617633828161e-06, "loss": 0.1047, "step": 780 }, { "epoch": 0.07, "learning_rate": 9.947818263607738e-06, "loss": 0.1174, "step": 790 }, { "epoch": 0.07, "learning_rate": 9.946018893387315e-06, "loss": 0.3025, "step": 800 }, { "epoch": 0.07, "learning_rate": 9.944219523166892e-06, "loss": 0.0581, "step": 810 }, { "epoch": 0.07, "learning_rate": 9.942420152946469e-06, "loss": 0.2664, "step": 820 }, { "epoch": 0.07, "learning_rate": 9.940620782726047e-06, "loss": 0.1218, "step": 830 }, { "epoch": 0.07, "learning_rate": 9.938821412505624e-06, "loss": 0.1708, "step": 840 }, { "epoch": 0.08, "learning_rate": 9.937022042285201e-06, "loss": 0.1626, "step": 850 }, { "epoch": 0.08, "learning_rate": 9.935222672064778e-06, "loss": 0.0553, "step": 860 }, { "epoch": 0.08, "learning_rate": 9.933423301844355e-06, "loss": 0.1209, "step": 870 }, { "epoch": 0.08, "learning_rate": 9.931623931623933e-06, "loss": 0.11, "step": 880 }, { "epoch": 0.08, "learning_rate": 9.929824561403509e-06, "loss": 0.0945, "step": 890 }, { "epoch": 0.08, "learning_rate": 9.928025191183087e-06, "loss": 0.2105, "step": 900 }, { "epoch": 0.08, "learning_rate": 9.926225820962664e-06, "loss": 0.1548, "step": 910 }, { "epoch": 0.08, "learning_rate": 9.92442645074224e-06, "loss": 0.1819, "step": 920 }, { "epoch": 0.08, "learning_rate": 9.922627080521818e-06, "loss": 0.1461, "step": 930 }, { "epoch": 0.08, "learning_rate": 9.920827710301395e-06, "loss": 0.1917, "step": 940 }, { "epoch": 0.08, "learning_rate": 9.919028340080973e-06, "loss": 0.0796, "step": 950 }, { "epoch": 0.09, "learning_rate": 9.91722896986055e-06, "loss": 0.1768, "step": 960 }, { "epoch": 0.09, "learning_rate": 9.915429599640127e-06, "loss": 0.1726, "step": 970 }, { "epoch": 0.09, "learning_rate": 9.913630229419704e-06, "loss": 0.1244, "step": 980 }, { "epoch": 0.09, "learning_rate": 9.91183085919928e-06, "loss": 0.0765, "step": 990 }, { "epoch": 0.09, "learning_rate": 9.910031488978859e-06, "loss": 0.1842, "step": 1000 }, { "epoch": 0.09, "learning_rate": 9.908232118758436e-06, "loss": 0.1192, "step": 1010 }, { "epoch": 0.09, "learning_rate": 9.906432748538013e-06, "loss": 0.132, "step": 1020 }, { "epoch": 0.09, "learning_rate": 9.90463337831759e-06, "loss": 0.1347, "step": 1030 }, { "epoch": 0.09, "learning_rate": 9.902834008097167e-06, "loss": 0.0591, "step": 1040 }, { "epoch": 0.09, "learning_rate": 9.901034637876743e-06, "loss": 0.1588, "step": 1050 }, { "epoch": 0.09, "learning_rate": 9.89923526765632e-06, "loss": 0.1273, "step": 1060 }, { "epoch": 0.1, "learning_rate": 9.897435897435899e-06, "loss": 0.0527, "step": 1070 }, { "epoch": 0.1, "learning_rate": 9.895636527215476e-06, "loss": 0.1438, "step": 1080 }, { "epoch": 0.1, "learning_rate": 9.893837156995053e-06, "loss": 0.1085, "step": 1090 }, { "epoch": 0.1, "learning_rate": 9.89203778677463e-06, "loss": 0.0793, "step": 1100 }, { "epoch": 0.1, "learning_rate": 9.890238416554206e-06, "loss": 0.1617, "step": 1110 }, { "epoch": 0.1, "learning_rate": 9.888439046333785e-06, "loss": 0.0164, "step": 1120 }, { "epoch": 0.1, "learning_rate": 9.886639676113362e-06, "loss": 0.2053, "step": 1130 }, { "epoch": 0.1, "learning_rate": 9.884840305892939e-06, "loss": 0.1774, "step": 1140 }, { "epoch": 0.1, "learning_rate": 9.883040935672515e-06, "loss": 0.1195, "step": 1150 }, { "epoch": 0.1, "learning_rate": 9.881241565452092e-06, "loss": 0.0487, "step": 1160 }, { "epoch": 0.1, "learning_rate": 9.879442195231669e-06, "loss": 0.1629, "step": 1170 }, { "epoch": 0.11, "learning_rate": 9.877642825011246e-06, "loss": 0.0964, "step": 1180 }, { "epoch": 0.11, "learning_rate": 9.875843454790825e-06, "loss": 0.1443, "step": 1190 }, { "epoch": 0.11, "learning_rate": 9.874044084570401e-06, "loss": 0.1089, "step": 1200 }, { "epoch": 0.11, "learning_rate": 9.872244714349978e-06, "loss": 0.0233, "step": 1210 }, { "epoch": 0.11, "learning_rate": 9.870445344129555e-06, "loss": 0.0712, "step": 1220 }, { "epoch": 0.11, "learning_rate": 9.868645973909132e-06, "loss": 0.2537, "step": 1230 }, { "epoch": 0.11, "learning_rate": 9.86684660368871e-06, "loss": 0.2621, "step": 1240 }, { "epoch": 0.11, "learning_rate": 9.865047233468287e-06, "loss": 0.1302, "step": 1250 }, { "epoch": 0.11, "learning_rate": 9.863247863247864e-06, "loss": 0.136, "step": 1260 }, { "epoch": 0.11, "learning_rate": 9.861448493027441e-06, "loss": 0.146, "step": 1270 }, { "epoch": 0.11, "learning_rate": 9.859649122807018e-06, "loss": 0.0938, "step": 1280 }, { "epoch": 0.12, "learning_rate": 9.857849752586597e-06, "loss": 0.0718, "step": 1290 }, { "epoch": 0.12, "learning_rate": 9.856050382366172e-06, "loss": 0.1728, "step": 1300 }, { "epoch": 0.12, "learning_rate": 9.85425101214575e-06, "loss": 0.2543, "step": 1310 }, { "epoch": 0.12, "learning_rate": 9.852451641925327e-06, "loss": 0.1327, "step": 1320 }, { "epoch": 0.12, "learning_rate": 9.850652271704904e-06, "loss": 0.1058, "step": 1330 }, { "epoch": 0.12, "learning_rate": 9.84885290148448e-06, "loss": 0.13, "step": 1340 }, { "epoch": 0.12, "learning_rate": 9.847053531264058e-06, "loss": 0.1322, "step": 1350 }, { "epoch": 0.12, "learning_rate": 9.845254161043636e-06, "loss": 0.1909, "step": 1360 }, { "epoch": 0.12, "learning_rate": 9.843454790823213e-06, "loss": 0.078, "step": 1370 }, { "epoch": 0.12, "learning_rate": 9.84165542060279e-06, "loss": 0.1836, "step": 1380 }, { "epoch": 0.12, "learning_rate": 9.839856050382367e-06, "loss": 0.1491, "step": 1390 }, { "epoch": 0.12, "learning_rate": 9.838056680161944e-06, "loss": 0.139, "step": 1400 }, { "epoch": 0.13, "learning_rate": 9.836257309941522e-06, "loss": 0.0737, "step": 1410 }, { "epoch": 0.13, "learning_rate": 9.834457939721097e-06, "loss": 0.1696, "step": 1420 }, { "epoch": 0.13, "learning_rate": 9.832658569500676e-06, "loss": 0.2328, "step": 1430 }, { "epoch": 0.13, "learning_rate": 9.830859199280253e-06, "loss": 0.1342, "step": 1440 }, { "epoch": 0.13, "learning_rate": 9.82905982905983e-06, "loss": 0.1243, "step": 1450 }, { "epoch": 0.13, "learning_rate": 9.827260458839407e-06, "loss": 0.1969, "step": 1460 }, { "epoch": 0.13, "learning_rate": 9.825461088618983e-06, "loss": 0.2298, "step": 1470 }, { "epoch": 0.13, "learning_rate": 9.823661718398562e-06, "loss": 0.1167, "step": 1480 }, { "epoch": 0.13, "learning_rate": 9.821862348178139e-06, "loss": 0.123, "step": 1490 }, { "epoch": 0.13, "learning_rate": 9.820062977957716e-06, "loss": 0.1128, "step": 1500 }, { "epoch": 0.13, "eval_accuracy": 0.9694931312174325, "eval_f1": 0.94606967396159, "eval_loss": 0.09655023366212845, "eval_precision": 0.9585926009729607, "eval_recall": 0.9338697233550094, "eval_runtime": 436.9466, "eval_samples_per_second": 72.469, "eval_steps_per_second": 4.531, "step": 1500 }, { "epoch": 0.13, "learning_rate": 9.818263607737293e-06, "loss": 0.0397, "step": 1510 }, { "epoch": 0.14, "learning_rate": 9.81646423751687e-06, "loss": 0.1572, "step": 1520 }, { "epoch": 0.14, "learning_rate": 9.814664867296448e-06, "loss": 0.1214, "step": 1530 }, { "epoch": 0.14, "learning_rate": 9.812865497076025e-06, "loss": 0.135, "step": 1540 }, { "epoch": 0.14, "learning_rate": 9.811066126855602e-06, "loss": 0.3403, "step": 1550 }, { "epoch": 0.14, "learning_rate": 9.809266756635179e-06, "loss": 0.0645, "step": 1560 }, { "epoch": 0.14, "learning_rate": 9.807467386414755e-06, "loss": 0.1162, "step": 1570 }, { "epoch": 0.14, "learning_rate": 9.805668016194332e-06, "loss": 0.065, "step": 1580 }, { "epoch": 0.14, "learning_rate": 9.803868645973909e-06, "loss": 0.0923, "step": 1590 }, { "epoch": 0.14, "learning_rate": 9.802069275753488e-06, "loss": 0.2101, "step": 1600 }, { "epoch": 0.14, "learning_rate": 9.800269905533065e-06, "loss": 0.1123, "step": 1610 }, { "epoch": 0.14, "learning_rate": 9.798470535312641e-06, "loss": 0.2323, "step": 1620 }, { "epoch": 0.15, "learning_rate": 9.796671165092218e-06, "loss": 0.0653, "step": 1630 }, { "epoch": 0.15, "learning_rate": 9.794871794871795e-06, "loss": 0.1639, "step": 1640 }, { "epoch": 0.15, "learning_rate": 9.793072424651374e-06, "loss": 0.0505, "step": 1650 }, { "epoch": 0.15, "learning_rate": 9.79127305443095e-06, "loss": 0.1409, "step": 1660 }, { "epoch": 0.15, "learning_rate": 9.789473684210527e-06, "loss": 0.1419, "step": 1670 }, { "epoch": 0.15, "learning_rate": 9.787674313990104e-06, "loss": 0.1625, "step": 1680 }, { "epoch": 0.15, "learning_rate": 9.785874943769681e-06, "loss": 0.0771, "step": 1690 }, { "epoch": 0.15, "learning_rate": 9.784075573549258e-06, "loss": 0.0929, "step": 1700 }, { "epoch": 0.15, "learning_rate": 9.782276203328835e-06, "loss": 0.095, "step": 1710 }, { "epoch": 0.15, "learning_rate": 9.780476833108413e-06, "loss": 0.1673, "step": 1720 }, { "epoch": 0.15, "learning_rate": 9.77867746288799e-06, "loss": 0.1376, "step": 1730 }, { "epoch": 0.16, "learning_rate": 9.776878092667567e-06, "loss": 0.0783, "step": 1740 }, { "epoch": 0.16, "learning_rate": 9.775078722447144e-06, "loss": 0.1428, "step": 1750 }, { "epoch": 0.16, "learning_rate": 9.77327935222672e-06, "loss": 0.0576, "step": 1760 }, { "epoch": 0.16, "learning_rate": 9.7714799820063e-06, "loss": 0.0739, "step": 1770 }, { "epoch": 0.16, "learning_rate": 9.769680611785876e-06, "loss": 0.09, "step": 1780 }, { "epoch": 0.16, "learning_rate": 9.767881241565453e-06, "loss": 0.0987, "step": 1790 }, { "epoch": 0.16, "learning_rate": 9.76608187134503e-06, "loss": 0.1613, "step": 1800 }, { "epoch": 0.16, "learning_rate": 9.764282501124607e-06, "loss": 0.058, "step": 1810 }, { "epoch": 0.16, "learning_rate": 9.762483130904185e-06, "loss": 0.218, "step": 1820 }, { "epoch": 0.16, "learning_rate": 9.76068376068376e-06, "loss": 0.1083, "step": 1830 }, { "epoch": 0.16, "learning_rate": 9.758884390463339e-06, "loss": 0.1274, "step": 1840 }, { "epoch": 0.16, "learning_rate": 9.757085020242916e-06, "loss": 0.1016, "step": 1850 }, { "epoch": 0.17, "learning_rate": 9.755285650022493e-06, "loss": 0.1442, "step": 1860 }, { "epoch": 0.17, "learning_rate": 9.75348627980207e-06, "loss": 0.0771, "step": 1870 }, { "epoch": 0.17, "learning_rate": 9.751686909581647e-06, "loss": 0.0832, "step": 1880 }, { "epoch": 0.17, "learning_rate": 9.749887539361225e-06, "loss": 0.1264, "step": 1890 }, { "epoch": 0.17, "learning_rate": 9.748088169140802e-06, "loss": 0.1182, "step": 1900 }, { "epoch": 0.17, "learning_rate": 9.746288798920379e-06, "loss": 0.1582, "step": 1910 }, { "epoch": 0.17, "learning_rate": 9.744489428699956e-06, "loss": 0.1256, "step": 1920 }, { "epoch": 0.17, "learning_rate": 9.742690058479533e-06, "loss": 0.1081, "step": 1930 }, { "epoch": 0.17, "learning_rate": 9.740890688259111e-06, "loss": 0.0922, "step": 1940 }, { "epoch": 0.17, "learning_rate": 9.739091318038686e-06, "loss": 0.1155, "step": 1950 }, { "epoch": 0.17, "learning_rate": 9.737291947818265e-06, "loss": 0.0559, "step": 1960 }, { "epoch": 0.18, "learning_rate": 9.735492577597842e-06, "loss": 0.1008, "step": 1970 }, { "epoch": 0.18, "learning_rate": 9.733693207377419e-06, "loss": 0.1123, "step": 1980 }, { "epoch": 0.18, "learning_rate": 9.731893837156995e-06, "loss": 0.1095, "step": 1990 }, { "epoch": 0.18, "learning_rate": 9.730094466936572e-06, "loss": 0.1269, "step": 2000 }, { "epoch": 0.18, "learning_rate": 9.72829509671615e-06, "loss": 0.1702, "step": 2010 }, { "epoch": 0.18, "learning_rate": 9.726495726495728e-06, "loss": 0.1762, "step": 2020 }, { "epoch": 0.18, "learning_rate": 9.724696356275305e-06, "loss": 0.0528, "step": 2030 }, { "epoch": 0.18, "learning_rate": 9.722896986054881e-06, "loss": 0.0912, "step": 2040 }, { "epoch": 0.18, "learning_rate": 9.721097615834458e-06, "loss": 0.0528, "step": 2050 }, { "epoch": 0.18, "learning_rate": 9.719298245614037e-06, "loss": 0.1277, "step": 2060 }, { "epoch": 0.18, "learning_rate": 9.717498875393614e-06, "loss": 0.1685, "step": 2070 }, { "epoch": 0.19, "learning_rate": 9.71569950517319e-06, "loss": 0.0825, "step": 2080 }, { "epoch": 0.19, "learning_rate": 9.713900134952767e-06, "loss": 0.1238, "step": 2090 }, { "epoch": 0.19, "learning_rate": 9.712100764732344e-06, "loss": 0.0815, "step": 2100 }, { "epoch": 0.19, "learning_rate": 9.710301394511921e-06, "loss": 0.0942, "step": 2110 }, { "epoch": 0.19, "learning_rate": 9.708502024291498e-06, "loss": 0.0789, "step": 2120 }, { "epoch": 0.19, "learning_rate": 9.706702654071076e-06, "loss": 0.1439, "step": 2130 }, { "epoch": 0.19, "learning_rate": 9.704903283850653e-06, "loss": 0.1, "step": 2140 }, { "epoch": 0.19, "learning_rate": 9.70310391363023e-06, "loss": 0.0548, "step": 2150 }, { "epoch": 0.19, "learning_rate": 9.701304543409807e-06, "loss": 0.1491, "step": 2160 }, { "epoch": 0.19, "learning_rate": 9.699505173189384e-06, "loss": 0.063, "step": 2170 }, { "epoch": 0.19, "learning_rate": 9.697705802968962e-06, "loss": 0.2628, "step": 2180 }, { "epoch": 0.2, "learning_rate": 9.69590643274854e-06, "loss": 0.2376, "step": 2190 }, { "epoch": 0.2, "learning_rate": 9.694107062528116e-06, "loss": 0.1094, "step": 2200 }, { "epoch": 0.2, "learning_rate": 9.692307692307693e-06, "loss": 0.143, "step": 2210 }, { "epoch": 0.2, "learning_rate": 9.69050832208727e-06, "loss": 0.1503, "step": 2220 }, { "epoch": 0.2, "learning_rate": 9.688708951866847e-06, "loss": 0.1998, "step": 2230 }, { "epoch": 0.2, "learning_rate": 9.686909581646424e-06, "loss": 0.0649, "step": 2240 }, { "epoch": 0.2, "learning_rate": 9.685110211426002e-06, "loss": 0.024, "step": 2250 }, { "epoch": 0.2, "learning_rate": 9.683310841205579e-06, "loss": 0.119, "step": 2260 }, { "epoch": 0.2, "learning_rate": 9.681511470985156e-06, "loss": 0.228, "step": 2270 }, { "epoch": 0.2, "learning_rate": 9.679712100764733e-06, "loss": 0.1202, "step": 2280 }, { "epoch": 0.2, "learning_rate": 9.67791273054431e-06, "loss": 0.053, "step": 2290 }, { "epoch": 0.21, "learning_rate": 9.676113360323888e-06, "loss": 0.1156, "step": 2300 }, { "epoch": 0.21, "learning_rate": 9.674313990103465e-06, "loss": 0.1197, "step": 2310 }, { "epoch": 0.21, "learning_rate": 9.672514619883042e-06, "loss": 0.0872, "step": 2320 }, { "epoch": 0.21, "learning_rate": 9.670715249662619e-06, "loss": 0.1937, "step": 2330 }, { "epoch": 0.21, "learning_rate": 9.668915879442196e-06, "loss": 0.172, "step": 2340 }, { "epoch": 0.21, "learning_rate": 9.667116509221774e-06, "loss": 0.1065, "step": 2350 }, { "epoch": 0.21, "learning_rate": 9.66531713900135e-06, "loss": 0.12, "step": 2360 }, { "epoch": 0.21, "learning_rate": 9.663517768780928e-06, "loss": 0.094, "step": 2370 }, { "epoch": 0.21, "learning_rate": 9.661718398560505e-06, "loss": 0.0623, "step": 2380 }, { "epoch": 0.21, "learning_rate": 9.659919028340082e-06, "loss": 0.1098, "step": 2390 }, { "epoch": 0.21, "learning_rate": 9.658119658119659e-06, "loss": 0.1679, "step": 2400 }, { "epoch": 0.21, "learning_rate": 9.656320287899235e-06, "loss": 0.1222, "step": 2410 }, { "epoch": 0.22, "learning_rate": 9.654520917678814e-06, "loss": 0.0483, "step": 2420 }, { "epoch": 0.22, "learning_rate": 9.65272154745839e-06, "loss": 0.1289, "step": 2430 }, { "epoch": 0.22, "learning_rate": 9.650922177237968e-06, "loss": 0.0802, "step": 2440 }, { "epoch": 0.22, "learning_rate": 9.649122807017545e-06, "loss": 0.0691, "step": 2450 }, { "epoch": 0.22, "learning_rate": 9.647323436797121e-06, "loss": 0.2148, "step": 2460 }, { "epoch": 0.22, "learning_rate": 9.6455240665767e-06, "loss": 0.1005, "step": 2470 }, { "epoch": 0.22, "learning_rate": 9.643724696356275e-06, "loss": 0.1019, "step": 2480 }, { "epoch": 0.22, "learning_rate": 9.641925326135854e-06, "loss": 0.1362, "step": 2490 }, { "epoch": 0.22, "learning_rate": 9.64012595591543e-06, "loss": 0.0445, "step": 2500 }, { "epoch": 0.22, "learning_rate": 9.638326585695007e-06, "loss": 0.1021, "step": 2510 }, { "epoch": 0.22, "learning_rate": 9.636527215474584e-06, "loss": 0.2398, "step": 2520 }, { "epoch": 0.23, "learning_rate": 9.634727845254161e-06, "loss": 0.1228, "step": 2530 }, { "epoch": 0.23, "learning_rate": 9.63292847503374e-06, "loss": 0.1529, "step": 2540 }, { "epoch": 0.23, "learning_rate": 9.631129104813316e-06, "loss": 0.0813, "step": 2550 }, { "epoch": 0.23, "learning_rate": 9.629329734592893e-06, "loss": 0.0542, "step": 2560 }, { "epoch": 0.23, "learning_rate": 9.62753036437247e-06, "loss": 0.1951, "step": 2570 }, { "epoch": 0.23, "learning_rate": 9.625730994152047e-06, "loss": 0.0304, "step": 2580 }, { "epoch": 0.23, "learning_rate": 9.623931623931626e-06, "loss": 0.1102, "step": 2590 }, { "epoch": 0.23, "learning_rate": 9.6221322537112e-06, "loss": 0.0727, "step": 2600 }, { "epoch": 0.23, "learning_rate": 9.62033288349078e-06, "loss": 0.136, "step": 2610 }, { "epoch": 0.23, "learning_rate": 9.618533513270356e-06, "loss": 0.1733, "step": 2620 }, { "epoch": 0.23, "learning_rate": 9.616734143049933e-06, "loss": 0.043, "step": 2630 }, { "epoch": 0.24, "learning_rate": 9.61493477282951e-06, "loss": 0.1242, "step": 2640 }, { "epoch": 0.24, "learning_rate": 9.613135402609087e-06, "loss": 0.0807, "step": 2650 }, { "epoch": 0.24, "learning_rate": 9.611336032388665e-06, "loss": 0.084, "step": 2660 }, { "epoch": 0.24, "learning_rate": 9.609536662168242e-06, "loss": 0.0175, "step": 2670 }, { "epoch": 0.24, "learning_rate": 9.607737291947819e-06, "loss": 0.0686, "step": 2680 }, { "epoch": 0.24, "learning_rate": 9.605937921727396e-06, "loss": 0.0984, "step": 2690 }, { "epoch": 0.24, "learning_rate": 9.604138551506973e-06, "loss": 0.0717, "step": 2700 }, { "epoch": 0.24, "learning_rate": 9.602339181286551e-06, "loss": 0.0323, "step": 2710 }, { "epoch": 0.24, "learning_rate": 9.600539811066128e-06, "loss": 0.1174, "step": 2720 }, { "epoch": 0.24, "learning_rate": 9.598740440845705e-06, "loss": 0.2252, "step": 2730 }, { "epoch": 0.24, "learning_rate": 9.596941070625282e-06, "loss": 0.1463, "step": 2740 }, { "epoch": 0.25, "learning_rate": 9.595141700404859e-06, "loss": 0.1296, "step": 2750 }, { "epoch": 0.25, "learning_rate": 9.593342330184436e-06, "loss": 0.0498, "step": 2760 }, { "epoch": 0.25, "learning_rate": 9.591542959964013e-06, "loss": 0.0488, "step": 2770 }, { "epoch": 0.25, "learning_rate": 9.589743589743591e-06, "loss": 0.1701, "step": 2780 }, { "epoch": 0.25, "learning_rate": 9.587944219523168e-06, "loss": 0.0986, "step": 2790 }, { "epoch": 0.25, "learning_rate": 9.586144849302745e-06, "loss": 0.0605, "step": 2800 }, { "epoch": 0.25, "learning_rate": 9.584345479082322e-06, "loss": 0.0779, "step": 2810 }, { "epoch": 0.25, "learning_rate": 9.582546108861898e-06, "loss": 0.1724, "step": 2820 }, { "epoch": 0.25, "learning_rate": 9.580746738641477e-06, "loss": 0.0229, "step": 2830 }, { "epoch": 0.25, "learning_rate": 9.578947368421054e-06, "loss": 0.1895, "step": 2840 }, { "epoch": 0.25, "learning_rate": 9.57714799820063e-06, "loss": 0.1627, "step": 2850 }, { "epoch": 0.26, "learning_rate": 9.575348627980208e-06, "loss": 0.061, "step": 2860 }, { "epoch": 0.26, "learning_rate": 9.573549257759784e-06, "loss": 0.0394, "step": 2870 }, { "epoch": 0.26, "learning_rate": 9.571749887539361e-06, "loss": 0.0101, "step": 2880 }, { "epoch": 0.26, "learning_rate": 9.569950517318938e-06, "loss": 0.1223, "step": 2890 }, { "epoch": 0.26, "learning_rate": 9.568151147098517e-06, "loss": 0.0859, "step": 2900 }, { "epoch": 0.26, "learning_rate": 9.566351776878094e-06, "loss": 0.0245, "step": 2910 }, { "epoch": 0.26, "learning_rate": 9.56455240665767e-06, "loss": 0.077, "step": 2920 }, { "epoch": 0.26, "learning_rate": 9.562753036437247e-06, "loss": 0.1988, "step": 2930 }, { "epoch": 0.26, "learning_rate": 9.560953666216824e-06, "loss": 0.1803, "step": 2940 }, { "epoch": 0.26, "learning_rate": 9.559154295996403e-06, "loss": 0.0734, "step": 2950 }, { "epoch": 0.26, "learning_rate": 9.55735492577598e-06, "loss": 0.0309, "step": 2960 }, { "epoch": 0.26, "learning_rate": 9.555555555555556e-06, "loss": 0.0958, "step": 2970 }, { "epoch": 0.27, "learning_rate": 9.553756185335133e-06, "loss": 0.1951, "step": 2980 }, { "epoch": 0.27, "learning_rate": 9.55195681511471e-06, "loss": 0.0235, "step": 2990 }, { "epoch": 0.27, "learning_rate": 9.550157444894289e-06, "loss": 0.0854, "step": 3000 }, { "epoch": 0.27, "eval_accuracy": 0.9742618032528028, "eval_f1": 0.9549798375959787, "eval_loss": 0.1135290339589119, "eval_precision": 0.9572535991140643, "eval_recall": 0.9527168521988317, "eval_runtime": 437.2717, "eval_samples_per_second": 72.415, "eval_steps_per_second": 4.528, "step": 3000 }, { "epoch": 0.27, "learning_rate": 9.548358074673864e-06, "loss": 0.0418, "step": 3010 }, { "epoch": 0.27, "learning_rate": 9.546558704453442e-06, "loss": 0.0694, "step": 3020 }, { "epoch": 0.27, "learning_rate": 9.54475933423302e-06, "loss": 0.048, "step": 3030 }, { "epoch": 0.27, "learning_rate": 9.542959964012596e-06, "loss": 0.1657, "step": 3040 }, { "epoch": 0.27, "learning_rate": 9.541160593792173e-06, "loss": 0.0778, "step": 3050 }, { "epoch": 0.27, "learning_rate": 9.53936122357175e-06, "loss": 0.1125, "step": 3060 }, { "epoch": 0.27, "learning_rate": 9.537561853351328e-06, "loss": 0.0662, "step": 3070 }, { "epoch": 0.27, "learning_rate": 9.535762483130905e-06, "loss": 0.1191, "step": 3080 }, { "epoch": 0.28, "learning_rate": 9.533963112910482e-06, "loss": 0.171, "step": 3090 }, { "epoch": 0.28, "learning_rate": 9.532163742690059e-06, "loss": 0.0703, "step": 3100 }, { "epoch": 0.28, "learning_rate": 9.530364372469636e-06, "loss": 0.138, "step": 3110 }, { "epoch": 0.28, "learning_rate": 9.528565002249214e-06, "loss": 0.1945, "step": 3120 }, { "epoch": 0.28, "learning_rate": 9.52676563202879e-06, "loss": 0.1241, "step": 3130 }, { "epoch": 0.28, "learning_rate": 9.524966261808368e-06, "loss": 0.1185, "step": 3140 }, { "epoch": 0.28, "learning_rate": 9.523166891587945e-06, "loss": 0.0739, "step": 3150 }, { "epoch": 0.28, "learning_rate": 9.521367521367522e-06, "loss": 0.0361, "step": 3160 }, { "epoch": 0.28, "learning_rate": 9.519568151147099e-06, "loss": 0.1103, "step": 3170 }, { "epoch": 0.28, "learning_rate": 9.517768780926676e-06, "loss": 0.1141, "step": 3180 }, { "epoch": 0.28, "learning_rate": 9.515969410706254e-06, "loss": 0.0741, "step": 3190 }, { "epoch": 0.29, "learning_rate": 9.514170040485831e-06, "loss": 0.0637, "step": 3200 }, { "epoch": 0.29, "learning_rate": 9.512370670265408e-06, "loss": 0.2084, "step": 3210 }, { "epoch": 0.29, "learning_rate": 9.510571300044985e-06, "loss": 0.1303, "step": 3220 }, { "epoch": 0.29, "learning_rate": 9.508771929824562e-06, "loss": 0.1449, "step": 3230 }, { "epoch": 0.29, "learning_rate": 9.50697255960414e-06, "loss": 0.0977, "step": 3240 }, { "epoch": 0.29, "learning_rate": 9.505173189383717e-06, "loss": 0.0754, "step": 3250 }, { "epoch": 0.29, "learning_rate": 9.503373819163294e-06, "loss": 0.0237, "step": 3260 }, { "epoch": 0.29, "learning_rate": 9.50157444894287e-06, "loss": 0.1629, "step": 3270 }, { "epoch": 0.29, "learning_rate": 9.499775078722448e-06, "loss": 0.1183, "step": 3280 }, { "epoch": 0.29, "learning_rate": 9.497975708502024e-06, "loss": 0.0365, "step": 3290 }, { "epoch": 0.29, "learning_rate": 9.496176338281601e-06, "loss": 0.0068, "step": 3300 }, { "epoch": 0.3, "learning_rate": 9.49437696806118e-06, "loss": 0.2428, "step": 3310 }, { "epoch": 0.3, "learning_rate": 9.492577597840757e-06, "loss": 0.0858, "step": 3320 }, { "epoch": 0.3, "learning_rate": 9.490778227620334e-06, "loss": 0.0579, "step": 3330 }, { "epoch": 0.3, "learning_rate": 9.48897885739991e-06, "loss": 0.0911, "step": 3340 }, { "epoch": 0.3, "learning_rate": 9.487179487179487e-06, "loss": 0.0851, "step": 3350 }, { "epoch": 0.3, "learning_rate": 9.485380116959066e-06, "loss": 0.1322, "step": 3360 }, { "epoch": 0.3, "learning_rate": 9.483580746738643e-06, "loss": 0.0657, "step": 3370 }, { "epoch": 0.3, "learning_rate": 9.48178137651822e-06, "loss": 0.0299, "step": 3380 }, { "epoch": 0.3, "learning_rate": 9.479982006297796e-06, "loss": 0.2368, "step": 3390 }, { "epoch": 0.3, "learning_rate": 9.478182636077373e-06, "loss": 0.064, "step": 3400 }, { "epoch": 0.3, "learning_rate": 9.47638326585695e-06, "loss": 0.1288, "step": 3410 }, { "epoch": 0.3, "learning_rate": 9.474583895636527e-06, "loss": 0.0765, "step": 3420 }, { "epoch": 0.31, "learning_rate": 9.472784525416106e-06, "loss": 0.1403, "step": 3430 }, { "epoch": 0.31, "learning_rate": 9.470985155195682e-06, "loss": 0.0662, "step": 3440 }, { "epoch": 0.31, "learning_rate": 9.46918578497526e-06, "loss": 0.1429, "step": 3450 }, { "epoch": 0.31, "learning_rate": 9.467386414754836e-06, "loss": 0.1111, "step": 3460 }, { "epoch": 0.31, "learning_rate": 9.465587044534413e-06, "loss": 0.1807, "step": 3470 }, { "epoch": 0.31, "learning_rate": 9.463787674313992e-06, "loss": 0.1044, "step": 3480 }, { "epoch": 0.31, "learning_rate": 9.461988304093568e-06, "loss": 0.0654, "step": 3490 }, { "epoch": 0.31, "learning_rate": 9.460188933873145e-06, "loss": 0.0255, "step": 3500 }, { "epoch": 0.31, "learning_rate": 9.458389563652722e-06, "loss": 0.1917, "step": 3510 }, { "epoch": 0.31, "learning_rate": 9.456590193432299e-06, "loss": 0.02, "step": 3520 }, { "epoch": 0.31, "learning_rate": 9.454790823211878e-06, "loss": 0.0694, "step": 3530 }, { "epoch": 0.32, "learning_rate": 9.452991452991453e-06, "loss": 0.1769, "step": 3540 }, { "epoch": 0.32, "learning_rate": 9.451192082771031e-06, "loss": 0.0723, "step": 3550 }, { "epoch": 0.32, "learning_rate": 9.449392712550608e-06, "loss": 0.1041, "step": 3560 }, { "epoch": 0.32, "learning_rate": 9.447593342330185e-06, "loss": 0.0711, "step": 3570 }, { "epoch": 0.32, "learning_rate": 9.445793972109762e-06, "loss": 0.0932, "step": 3580 }, { "epoch": 0.32, "learning_rate": 9.443994601889339e-06, "loss": 0.1224, "step": 3590 }, { "epoch": 0.32, "learning_rate": 9.442195231668917e-06, "loss": 0.0452, "step": 3600 }, { "epoch": 0.32, "learning_rate": 9.440395861448494e-06, "loss": 0.0763, "step": 3610 }, { "epoch": 0.32, "learning_rate": 9.438596491228071e-06, "loss": 0.0422, "step": 3620 }, { "epoch": 0.32, "learning_rate": 9.436797121007648e-06, "loss": 0.0184, "step": 3630 }, { "epoch": 0.32, "learning_rate": 9.434997750787225e-06, "loss": 0.0059, "step": 3640 }, { "epoch": 0.33, "learning_rate": 9.433198380566803e-06, "loss": 0.1277, "step": 3650 }, { "epoch": 0.33, "learning_rate": 9.431399010346378e-06, "loss": 0.0845, "step": 3660 }, { "epoch": 0.33, "learning_rate": 9.429599640125957e-06, "loss": 0.1707, "step": 3670 }, { "epoch": 0.33, "learning_rate": 9.427800269905534e-06, "loss": 0.1214, "step": 3680 }, { "epoch": 0.33, "learning_rate": 9.42600089968511e-06, "loss": 0.0493, "step": 3690 }, { "epoch": 0.33, "learning_rate": 9.424201529464688e-06, "loss": 0.1305, "step": 3700 }, { "epoch": 0.33, "learning_rate": 9.422402159244264e-06, "loss": 0.0809, "step": 3710 }, { "epoch": 0.33, "learning_rate": 9.420602789023843e-06, "loss": 0.0707, "step": 3720 }, { "epoch": 0.33, "learning_rate": 9.41880341880342e-06, "loss": 0.1068, "step": 3730 }, { "epoch": 0.33, "learning_rate": 9.417004048582997e-06, "loss": 0.0152, "step": 3740 }, { "epoch": 0.33, "learning_rate": 9.415204678362574e-06, "loss": 0.0773, "step": 3750 }, { "epoch": 0.34, "learning_rate": 9.41340530814215e-06, "loss": 0.072, "step": 3760 }, { "epoch": 0.34, "learning_rate": 9.411605937921729e-06, "loss": 0.1304, "step": 3770 }, { "epoch": 0.34, "learning_rate": 9.409806567701306e-06, "loss": 0.0825, "step": 3780 }, { "epoch": 0.34, "learning_rate": 9.408007197480883e-06, "loss": 0.1158, "step": 3790 }, { "epoch": 0.34, "learning_rate": 9.40620782726046e-06, "loss": 0.2465, "step": 3800 }, { "epoch": 0.34, "learning_rate": 9.404408457040036e-06, "loss": 0.2057, "step": 3810 }, { "epoch": 0.34, "learning_rate": 9.402609086819613e-06, "loss": 0.1457, "step": 3820 }, { "epoch": 0.34, "learning_rate": 9.40080971659919e-06, "loss": 0.0986, "step": 3830 }, { "epoch": 0.34, "learning_rate": 9.399010346378769e-06, "loss": 0.1613, "step": 3840 }, { "epoch": 0.34, "learning_rate": 9.397210976158346e-06, "loss": 0.0951, "step": 3850 }, { "epoch": 0.34, "learning_rate": 9.395411605937922e-06, "loss": 0.094, "step": 3860 }, { "epoch": 0.35, "learning_rate": 9.3936122357175e-06, "loss": 0.0467, "step": 3870 }, { "epoch": 0.35, "learning_rate": 9.391812865497076e-06, "loss": 0.1046, "step": 3880 }, { "epoch": 0.35, "learning_rate": 9.390013495276655e-06, "loss": 0.0671, "step": 3890 }, { "epoch": 0.35, "learning_rate": 9.388214125056232e-06, "loss": 0.0632, "step": 3900 }, { "epoch": 0.35, "learning_rate": 9.386414754835808e-06, "loss": 0.1289, "step": 3910 }, { "epoch": 0.35, "learning_rate": 9.384615384615385e-06, "loss": 0.0962, "step": 3920 }, { "epoch": 0.35, "learning_rate": 9.382816014394962e-06, "loss": 0.1108, "step": 3930 }, { "epoch": 0.35, "learning_rate": 9.381016644174539e-06, "loss": 0.1097, "step": 3940 }, { "epoch": 0.35, "learning_rate": 9.379217273954116e-06, "loss": 0.0337, "step": 3950 }, { "epoch": 0.35, "learning_rate": 9.377417903733694e-06, "loss": 0.0564, "step": 3960 }, { "epoch": 0.35, "learning_rate": 9.375618533513271e-06, "loss": 0.086, "step": 3970 }, { "epoch": 0.35, "learning_rate": 9.373819163292848e-06, "loss": 0.0716, "step": 3980 }, { "epoch": 0.36, "learning_rate": 9.372019793072425e-06, "loss": 0.0274, "step": 3990 }, { "epoch": 0.36, "learning_rate": 9.370220422852002e-06, "loss": 0.0945, "step": 4000 }, { "epoch": 0.36, "learning_rate": 9.36842105263158e-06, "loss": 0.2289, "step": 4010 }, { "epoch": 0.36, "learning_rate": 9.366621682411157e-06, "loss": 0.1001, "step": 4020 }, { "epoch": 0.36, "learning_rate": 9.364822312190734e-06, "loss": 0.127, "step": 4030 }, { "epoch": 0.36, "learning_rate": 9.363022941970311e-06, "loss": 0.0575, "step": 4040 }, { "epoch": 0.36, "learning_rate": 9.361223571749888e-06, "loss": 0.14, "step": 4050 }, { "epoch": 0.36, "learning_rate": 9.359424201529466e-06, "loss": 0.1394, "step": 4060 }, { "epoch": 0.36, "learning_rate": 9.357624831309042e-06, "loss": 0.0982, "step": 4070 }, { "epoch": 0.36, "learning_rate": 9.35582546108862e-06, "loss": 0.137, "step": 4080 }, { "epoch": 0.36, "learning_rate": 9.354026090868197e-06, "loss": 0.0678, "step": 4090 }, { "epoch": 0.37, "learning_rate": 9.352226720647774e-06, "loss": 0.1493, "step": 4100 }, { "epoch": 0.37, "learning_rate": 9.35042735042735e-06, "loss": 0.0452, "step": 4110 }, { "epoch": 0.37, "learning_rate": 9.348627980206928e-06, "loss": 0.0618, "step": 4120 }, { "epoch": 0.37, "learning_rate": 9.346828609986506e-06, "loss": 0.0688, "step": 4130 }, { "epoch": 0.37, "learning_rate": 9.345029239766083e-06, "loss": 0.0446, "step": 4140 }, { "epoch": 0.37, "learning_rate": 9.34322986954566e-06, "loss": 0.1038, "step": 4150 }, { "epoch": 0.37, "learning_rate": 9.341430499325237e-06, "loss": 0.0517, "step": 4160 }, { "epoch": 0.37, "learning_rate": 9.339631129104814e-06, "loss": 0.1752, "step": 4170 }, { "epoch": 0.37, "learning_rate": 9.337831758884392e-06, "loss": 0.0554, "step": 4180 }, { "epoch": 0.37, "learning_rate": 9.336032388663967e-06, "loss": 0.0795, "step": 4190 }, { "epoch": 0.37, "learning_rate": 9.334233018443546e-06, "loss": 0.0729, "step": 4200 }, { "epoch": 0.38, "learning_rate": 9.332433648223123e-06, "loss": 0.1994, "step": 4210 }, { "epoch": 0.38, "learning_rate": 9.3306342780027e-06, "loss": 0.1256, "step": 4220 }, { "epoch": 0.38, "learning_rate": 9.328834907782276e-06, "loss": 0.0416, "step": 4230 }, { "epoch": 0.38, "learning_rate": 9.327035537561853e-06, "loss": 0.0455, "step": 4240 }, { "epoch": 0.38, "learning_rate": 9.325236167341432e-06, "loss": 0.1276, "step": 4250 }, { "epoch": 0.38, "learning_rate": 9.323436797121009e-06, "loss": 0.1538, "step": 4260 }, { "epoch": 0.38, "learning_rate": 9.321637426900586e-06, "loss": 0.072, "step": 4270 }, { "epoch": 0.38, "learning_rate": 9.319838056680162e-06, "loss": 0.1352, "step": 4280 }, { "epoch": 0.38, "learning_rate": 9.31803868645974e-06, "loss": 0.0613, "step": 4290 }, { "epoch": 0.38, "learning_rate": 9.316239316239318e-06, "loss": 0.1611, "step": 4300 }, { "epoch": 0.38, "learning_rate": 9.314439946018893e-06, "loss": 0.0851, "step": 4310 }, { "epoch": 0.39, "learning_rate": 9.312640575798472e-06, "loss": 0.1095, "step": 4320 }, { "epoch": 0.39, "learning_rate": 9.310841205578048e-06, "loss": 0.1378, "step": 4330 }, { "epoch": 0.39, "learning_rate": 9.309041835357625e-06, "loss": 0.0981, "step": 4340 }, { "epoch": 0.39, "learning_rate": 9.307242465137202e-06, "loss": 0.0396, "step": 4350 }, { "epoch": 0.39, "learning_rate": 9.305443094916779e-06, "loss": 0.1012, "step": 4360 }, { "epoch": 0.39, "learning_rate": 9.303643724696358e-06, "loss": 0.1195, "step": 4370 }, { "epoch": 0.39, "learning_rate": 9.301844354475934e-06, "loss": 0.0449, "step": 4380 }, { "epoch": 0.39, "learning_rate": 9.300044984255511e-06, "loss": 0.0778, "step": 4390 }, { "epoch": 0.39, "learning_rate": 9.298245614035088e-06, "loss": 0.0422, "step": 4400 }, { "epoch": 0.39, "learning_rate": 9.296446243814665e-06, "loss": 0.1016, "step": 4410 }, { "epoch": 0.39, "learning_rate": 9.294646873594244e-06, "loss": 0.1021, "step": 4420 }, { "epoch": 0.4, "learning_rate": 9.29284750337382e-06, "loss": 0.1569, "step": 4430 }, { "epoch": 0.4, "learning_rate": 9.291048133153397e-06, "loss": 0.1313, "step": 4440 }, { "epoch": 0.4, "learning_rate": 9.289248762932974e-06, "loss": 0.0682, "step": 4450 }, { "epoch": 0.4, "learning_rate": 9.287449392712551e-06, "loss": 0.1345, "step": 4460 }, { "epoch": 0.4, "learning_rate": 9.285650022492128e-06, "loss": 0.0751, "step": 4470 }, { "epoch": 0.4, "learning_rate": 9.283850652271705e-06, "loss": 0.1235, "step": 4480 }, { "epoch": 0.4, "learning_rate": 9.282051282051283e-06, "loss": 0.1123, "step": 4490 }, { "epoch": 0.4, "learning_rate": 9.28025191183086e-06, "loss": 0.0496, "step": 4500 }, { "epoch": 0.4, "eval_accuracy": 0.9711984841307437, "eval_f1": 0.9508832399827659, "eval_loss": 0.08512861281633377, "eval_precision": 0.9297525013164823, "eval_recall": 0.9729968037032954, "eval_runtime": 436.7164, "eval_samples_per_second": 72.507, "eval_steps_per_second": 4.534, "step": 4500 }, { "epoch": 0.4, "learning_rate": 9.278452541610437e-06, "loss": 0.1278, "step": 4510 }, { "epoch": 0.4, "learning_rate": 9.276653171390014e-06, "loss": 0.0659, "step": 4520 }, { "epoch": 0.4, "learning_rate": 9.27485380116959e-06, "loss": 0.1262, "step": 4530 }, { "epoch": 0.4, "learning_rate": 9.27305443094917e-06, "loss": 0.0633, "step": 4540 }, { "epoch": 0.41, "learning_rate": 9.271255060728746e-06, "loss": 0.1094, "step": 4550 }, { "epoch": 0.41, "learning_rate": 9.269455690508323e-06, "loss": 0.1054, "step": 4560 }, { "epoch": 0.41, "learning_rate": 9.2676563202879e-06, "loss": 0.0375, "step": 4570 }, { "epoch": 0.41, "learning_rate": 9.265856950067477e-06, "loss": 0.0434, "step": 4580 }, { "epoch": 0.41, "learning_rate": 9.264057579847054e-06, "loss": 0.0875, "step": 4590 }, { "epoch": 0.41, "learning_rate": 9.26225820962663e-06, "loss": 0.0963, "step": 4600 }, { "epoch": 0.41, "learning_rate": 9.260458839406209e-06, "loss": 0.1243, "step": 4610 }, { "epoch": 0.41, "learning_rate": 9.258659469185786e-06, "loss": 0.1053, "step": 4620 }, { "epoch": 0.41, "learning_rate": 9.256860098965363e-06, "loss": 0.0566, "step": 4630 }, { "epoch": 0.41, "learning_rate": 9.25506072874494e-06, "loss": 0.0947, "step": 4640 }, { "epoch": 0.41, "learning_rate": 9.253261358524516e-06, "loss": 0.1326, "step": 4650 }, { "epoch": 0.42, "learning_rate": 9.251461988304095e-06, "loss": 0.1622, "step": 4660 }, { "epoch": 0.42, "learning_rate": 9.249662618083672e-06, "loss": 0.0438, "step": 4670 }, { "epoch": 0.42, "learning_rate": 9.247863247863249e-06, "loss": 0.0346, "step": 4680 }, { "epoch": 0.42, "learning_rate": 9.246063877642826e-06, "loss": 0.0647, "step": 4690 }, { "epoch": 0.42, "learning_rate": 9.244264507422402e-06, "loss": 0.2105, "step": 4700 }, { "epoch": 0.42, "learning_rate": 9.242465137201981e-06, "loss": 0.0517, "step": 4710 }, { "epoch": 0.42, "learning_rate": 9.240665766981556e-06, "loss": 0.0369, "step": 4720 }, { "epoch": 0.42, "learning_rate": 9.238866396761135e-06, "loss": 0.0792, "step": 4730 }, { "epoch": 0.42, "learning_rate": 9.237067026540712e-06, "loss": 0.1478, "step": 4740 }, { "epoch": 0.42, "learning_rate": 9.235267656320288e-06, "loss": 0.0625, "step": 4750 }, { "epoch": 0.42, "learning_rate": 9.233468286099865e-06, "loss": 0.1502, "step": 4760 }, { "epoch": 0.43, "learning_rate": 9.231668915879442e-06, "loss": 0.1159, "step": 4770 }, { "epoch": 0.43, "learning_rate": 9.22986954565902e-06, "loss": 0.0383, "step": 4780 }, { "epoch": 0.43, "learning_rate": 9.228070175438598e-06, "loss": 0.0419, "step": 4790 }, { "epoch": 0.43, "learning_rate": 9.226270805218174e-06, "loss": 0.0887, "step": 4800 }, { "epoch": 0.43, "learning_rate": 9.224471434997751e-06, "loss": 0.0085, "step": 4810 }, { "epoch": 0.43, "learning_rate": 9.222672064777328e-06, "loss": 0.1119, "step": 4820 }, { "epoch": 0.43, "learning_rate": 9.220872694556907e-06, "loss": 0.0901, "step": 4830 }, { "epoch": 0.43, "learning_rate": 9.219073324336482e-06, "loss": 0.1066, "step": 4840 }, { "epoch": 0.43, "learning_rate": 9.21727395411606e-06, "loss": 0.096, "step": 4850 }, { "epoch": 0.43, "learning_rate": 9.215474583895637e-06, "loss": 0.1194, "step": 4860 }, { "epoch": 0.43, "learning_rate": 9.213675213675214e-06, "loss": 0.0887, "step": 4870 }, { "epoch": 0.44, "learning_rate": 9.211875843454791e-06, "loss": 0.0658, "step": 4880 }, { "epoch": 0.44, "learning_rate": 9.210076473234368e-06, "loss": 0.1369, "step": 4890 }, { "epoch": 0.44, "learning_rate": 9.208277103013946e-06, "loss": 0.1153, "step": 4900 }, { "epoch": 0.44, "learning_rate": 9.206477732793523e-06, "loss": 0.0603, "step": 4910 }, { "epoch": 0.44, "learning_rate": 9.2046783625731e-06, "loss": 0.1061, "step": 4920 }, { "epoch": 0.44, "learning_rate": 9.202878992352677e-06, "loss": 0.0689, "step": 4930 }, { "epoch": 0.44, "learning_rate": 9.201079622132254e-06, "loss": 0.1351, "step": 4940 }, { "epoch": 0.44, "learning_rate": 9.199280251911832e-06, "loss": 0.1209, "step": 4950 }, { "epoch": 0.44, "learning_rate": 9.19748088169141e-06, "loss": 0.0371, "step": 4960 }, { "epoch": 0.44, "learning_rate": 9.195681511470986e-06, "loss": 0.0833, "step": 4970 }, { "epoch": 0.44, "learning_rate": 9.193882141250563e-06, "loss": 0.1354, "step": 4980 }, { "epoch": 0.44, "learning_rate": 9.19208277103014e-06, "loss": 0.2174, "step": 4990 }, { "epoch": 0.45, "learning_rate": 9.190283400809717e-06, "loss": 0.1419, "step": 5000 }, { "epoch": 0.45, "learning_rate": 9.188484030589294e-06, "loss": 0.1198, "step": 5010 }, { "epoch": 0.45, "learning_rate": 9.186684660368872e-06, "loss": 0.1137, "step": 5020 }, { "epoch": 0.45, "learning_rate": 9.184885290148449e-06, "loss": 0.094, "step": 5030 }, { "epoch": 0.45, "learning_rate": 9.183085919928026e-06, "loss": 0.0805, "step": 5040 }, { "epoch": 0.45, "learning_rate": 9.181286549707603e-06, "loss": 0.0516, "step": 5050 }, { "epoch": 0.45, "learning_rate": 9.17948717948718e-06, "loss": 0.0696, "step": 5060 }, { "epoch": 0.45, "learning_rate": 9.177687809266758e-06, "loss": 0.0769, "step": 5070 }, { "epoch": 0.45, "learning_rate": 9.175888439046335e-06, "loss": 0.0959, "step": 5080 }, { "epoch": 0.45, "learning_rate": 9.174089068825912e-06, "loss": 0.0542, "step": 5090 }, { "epoch": 0.45, "learning_rate": 9.172289698605489e-06, "loss": 0.0712, "step": 5100 }, { "epoch": 0.46, "learning_rate": 9.170490328385066e-06, "loss": 0.0537, "step": 5110 }, { "epoch": 0.46, "learning_rate": 9.168690958164642e-06, "loss": 0.1163, "step": 5120 }, { "epoch": 0.46, "learning_rate": 9.16689158794422e-06, "loss": 0.0765, "step": 5130 }, { "epoch": 0.46, "learning_rate": 9.165092217723798e-06, "loss": 0.2003, "step": 5140 }, { "epoch": 0.46, "learning_rate": 9.163292847503375e-06, "loss": 0.0108, "step": 5150 }, { "epoch": 0.46, "learning_rate": 9.161493477282952e-06, "loss": 0.1257, "step": 5160 }, { "epoch": 0.46, "learning_rate": 9.159694107062528e-06, "loss": 0.1449, "step": 5170 }, { "epoch": 0.46, "learning_rate": 9.157894736842105e-06, "loss": 0.053, "step": 5180 }, { "epoch": 0.46, "learning_rate": 9.156095366621684e-06, "loss": 0.0475, "step": 5190 }, { "epoch": 0.46, "learning_rate": 9.15429599640126e-06, "loss": 0.029, "step": 5200 }, { "epoch": 0.46, "learning_rate": 9.152496626180838e-06, "loss": 0.0036, "step": 5210 }, { "epoch": 0.47, "learning_rate": 9.150697255960414e-06, "loss": 0.0076, "step": 5220 }, { "epoch": 0.47, "learning_rate": 9.148897885739991e-06, "loss": 0.0655, "step": 5230 }, { "epoch": 0.47, "learning_rate": 9.14709851551957e-06, "loss": 0.0534, "step": 5240 }, { "epoch": 0.47, "learning_rate": 9.145299145299145e-06, "loss": 0.0378, "step": 5250 }, { "epoch": 0.47, "learning_rate": 9.143499775078724e-06, "loss": 0.1112, "step": 5260 }, { "epoch": 0.47, "learning_rate": 9.1417004048583e-06, "loss": 0.1258, "step": 5270 }, { "epoch": 0.47, "learning_rate": 9.139901034637877e-06, "loss": 0.1018, "step": 5280 }, { "epoch": 0.47, "learning_rate": 9.138101664417454e-06, "loss": 0.1015, "step": 5290 }, { "epoch": 0.47, "learning_rate": 9.136302294197031e-06, "loss": 0.2089, "step": 5300 }, { "epoch": 0.47, "learning_rate": 9.13450292397661e-06, "loss": 0.0352, "step": 5310 }, { "epoch": 0.47, "learning_rate": 9.132703553756186e-06, "loss": 0.1233, "step": 5320 }, { "epoch": 0.48, "learning_rate": 9.130904183535763e-06, "loss": 0.1092, "step": 5330 }, { "epoch": 0.48, "learning_rate": 9.12910481331534e-06, "loss": 0.0103, "step": 5340 }, { "epoch": 0.48, "learning_rate": 9.127305443094917e-06, "loss": 0.0304, "step": 5350 }, { "epoch": 0.48, "learning_rate": 9.125506072874496e-06, "loss": 0.1365, "step": 5360 }, { "epoch": 0.48, "learning_rate": 9.12370670265407e-06, "loss": 0.1332, "step": 5370 }, { "epoch": 0.48, "learning_rate": 9.12190733243365e-06, "loss": 0.0738, "step": 5380 }, { "epoch": 0.48, "learning_rate": 9.120107962213226e-06, "loss": 0.1559, "step": 5390 }, { "epoch": 0.48, "learning_rate": 9.118308591992803e-06, "loss": 0.139, "step": 5400 }, { "epoch": 0.48, "learning_rate": 9.11650922177238e-06, "loss": 0.1414, "step": 5410 }, { "epoch": 0.48, "learning_rate": 9.114709851551957e-06, "loss": 0.1316, "step": 5420 }, { "epoch": 0.48, "learning_rate": 9.112910481331535e-06, "loss": 0.1166, "step": 5430 }, { "epoch": 0.49, "learning_rate": 9.111111111111112e-06, "loss": 0.0909, "step": 5440 }, { "epoch": 0.49, "learning_rate": 9.109311740890689e-06, "loss": 0.1103, "step": 5450 }, { "epoch": 0.49, "learning_rate": 9.107512370670266e-06, "loss": 0.1618, "step": 5460 }, { "epoch": 0.49, "learning_rate": 9.105713000449843e-06, "loss": 0.0721, "step": 5470 }, { "epoch": 0.49, "learning_rate": 9.103913630229421e-06, "loss": 0.1584, "step": 5480 }, { "epoch": 0.49, "learning_rate": 9.102114260008998e-06, "loss": 0.1035, "step": 5490 }, { "epoch": 0.49, "learning_rate": 9.100314889788575e-06, "loss": 0.028, "step": 5500 }, { "epoch": 0.49, "learning_rate": 9.098515519568152e-06, "loss": 0.1161, "step": 5510 }, { "epoch": 0.49, "learning_rate": 9.096716149347729e-06, "loss": 0.1424, "step": 5520 }, { "epoch": 0.49, "learning_rate": 9.094916779127306e-06, "loss": 0.1636, "step": 5530 }, { "epoch": 0.49, "learning_rate": 9.093117408906882e-06, "loss": 0.1167, "step": 5540 }, { "epoch": 0.49, "learning_rate": 9.091318038686461e-06, "loss": 0.0831, "step": 5550 }, { "epoch": 0.5, "learning_rate": 9.089518668466038e-06, "loss": 0.1418, "step": 5560 }, { "epoch": 0.5, "learning_rate": 9.087719298245615e-06, "loss": 0.1449, "step": 5570 }, { "epoch": 0.5, "learning_rate": 9.085919928025192e-06, "loss": 0.1729, "step": 5580 }, { "epoch": 0.5, "learning_rate": 9.084120557804768e-06, "loss": 0.125, "step": 5590 }, { "epoch": 0.5, "learning_rate": 9.082321187584347e-06, "loss": 0.0741, "step": 5600 }, { "epoch": 0.5, "learning_rate": 9.080521817363924e-06, "loss": 0.0811, "step": 5610 }, { "epoch": 0.5, "learning_rate": 9.0787224471435e-06, "loss": 0.1593, "step": 5620 }, { "epoch": 0.5, "learning_rate": 9.076923076923078e-06, "loss": 0.1943, "step": 5630 }, { "epoch": 0.5, "learning_rate": 9.075123706702654e-06, "loss": 0.0983, "step": 5640 }, { "epoch": 0.5, "learning_rate": 9.073324336482231e-06, "loss": 0.1401, "step": 5650 }, { "epoch": 0.5, "learning_rate": 9.071524966261808e-06, "loss": 0.1739, "step": 5660 }, { "epoch": 0.51, "learning_rate": 9.069725596041387e-06, "loss": 0.1541, "step": 5670 }, { "epoch": 0.51, "learning_rate": 9.067926225820964e-06, "loss": 0.1699, "step": 5680 }, { "epoch": 0.51, "learning_rate": 9.06612685560054e-06, "loss": 0.0351, "step": 5690 }, { "epoch": 0.51, "learning_rate": 9.064327485380117e-06, "loss": 0.0941, "step": 5700 }, { "epoch": 0.51, "learning_rate": 9.062528115159694e-06, "loss": 0.1932, "step": 5710 }, { "epoch": 0.51, "learning_rate": 9.060728744939273e-06, "loss": 0.0819, "step": 5720 }, { "epoch": 0.51, "learning_rate": 9.05892937471885e-06, "loss": 0.2269, "step": 5730 }, { "epoch": 0.51, "learning_rate": 9.057130004498426e-06, "loss": 0.0967, "step": 5740 }, { "epoch": 0.51, "learning_rate": 9.055330634278003e-06, "loss": 0.0594, "step": 5750 }, { "epoch": 0.51, "learning_rate": 9.05353126405758e-06, "loss": 0.2919, "step": 5760 }, { "epoch": 0.51, "learning_rate": 9.051731893837159e-06, "loss": 0.0618, "step": 5770 }, { "epoch": 0.52, "learning_rate": 9.049932523616734e-06, "loss": 0.1166, "step": 5780 }, { "epoch": 0.52, "learning_rate": 9.048133153396312e-06, "loss": 0.124, "step": 5790 }, { "epoch": 0.52, "learning_rate": 9.04633378317589e-06, "loss": 0.1845, "step": 5800 }, { "epoch": 0.52, "learning_rate": 9.044534412955466e-06, "loss": 0.1357, "step": 5810 }, { "epoch": 0.52, "learning_rate": 9.042735042735043e-06, "loss": 0.1428, "step": 5820 }, { "epoch": 0.52, "learning_rate": 9.04093567251462e-06, "loss": 0.1932, "step": 5830 }, { "epoch": 0.52, "learning_rate": 9.039136302294198e-06, "loss": 0.0585, "step": 5840 }, { "epoch": 0.52, "learning_rate": 9.037336932073775e-06, "loss": 0.0983, "step": 5850 }, { "epoch": 0.52, "learning_rate": 9.035537561853352e-06, "loss": 0.0764, "step": 5860 }, { "epoch": 0.52, "learning_rate": 9.033738191632929e-06, "loss": 0.1484, "step": 5870 }, { "epoch": 0.52, "learning_rate": 9.031938821412506e-06, "loss": 0.1209, "step": 5880 }, { "epoch": 0.53, "learning_rate": 9.030139451192084e-06, "loss": 0.0421, "step": 5890 }, { "epoch": 0.53, "learning_rate": 9.02834008097166e-06, "loss": 0.0788, "step": 5900 }, { "epoch": 0.53, "learning_rate": 9.026540710751238e-06, "loss": 0.0637, "step": 5910 }, { "epoch": 0.53, "learning_rate": 9.024741340530815e-06, "loss": 0.0962, "step": 5920 }, { "epoch": 0.53, "learning_rate": 9.022941970310392e-06, "loss": 0.0681, "step": 5930 }, { "epoch": 0.53, "learning_rate": 9.021142600089969e-06, "loss": 0.1625, "step": 5940 }, { "epoch": 0.53, "learning_rate": 9.019343229869546e-06, "loss": 0.1218, "step": 5950 }, { "epoch": 0.53, "learning_rate": 9.017543859649124e-06, "loss": 0.0072, "step": 5960 }, { "epoch": 0.53, "learning_rate": 9.015744489428701e-06, "loss": 0.1611, "step": 5970 }, { "epoch": 0.53, "learning_rate": 9.013945119208278e-06, "loss": 0.1108, "step": 5980 }, { "epoch": 0.53, "learning_rate": 9.012145748987855e-06, "loss": 0.1706, "step": 5990 }, { "epoch": 0.53, "learning_rate": 9.010346378767432e-06, "loss": 0.0967, "step": 6000 }, { "epoch": 0.53, "eval_accuracy": 0.9735354492341702, "eval_f1": 0.954082191780822, "eval_loss": 0.09180905669927597, "eval_precision": 0.9486760379208892, "eval_recall": 0.9595503141188141, "eval_runtime": 436.4287, "eval_samples_per_second": 72.555, "eval_steps_per_second": 4.537, "step": 6000 }, { "epoch": 0.54, "learning_rate": 9.00854700854701e-06, "loss": 0.0714, "step": 6010 }, { "epoch": 0.54, "learning_rate": 9.006747638326587e-06, "loss": 0.1627, "step": 6020 }, { "epoch": 0.54, "learning_rate": 9.004948268106164e-06, "loss": 0.0399, "step": 6030 }, { "epoch": 0.54, "learning_rate": 9.00314889788574e-06, "loss": 0.0568, "step": 6040 }, { "epoch": 0.54, "learning_rate": 9.001349527665318e-06, "loss": 0.1471, "step": 6050 }, { "epoch": 0.54, "learning_rate": 8.999550157444894e-06, "loss": 0.0979, "step": 6060 }, { "epoch": 0.54, "learning_rate": 8.997750787224471e-06, "loss": 0.0177, "step": 6070 }, { "epoch": 0.54, "learning_rate": 8.99595141700405e-06, "loss": 0.1574, "step": 6080 }, { "epoch": 0.54, "learning_rate": 8.994152046783627e-06, "loss": 0.1683, "step": 6090 }, { "epoch": 0.54, "learning_rate": 8.992352676563204e-06, "loss": 0.1035, "step": 6100 }, { "epoch": 0.54, "learning_rate": 8.99055330634278e-06, "loss": 0.1381, "step": 6110 }, { "epoch": 0.55, "learning_rate": 8.988753936122357e-06, "loss": 0.1026, "step": 6120 }, { "epoch": 0.55, "learning_rate": 8.986954565901936e-06, "loss": 0.0454, "step": 6130 }, { "epoch": 0.55, "learning_rate": 8.985155195681513e-06, "loss": 0.0653, "step": 6140 }, { "epoch": 0.55, "learning_rate": 8.98335582546109e-06, "loss": 0.0681, "step": 6150 }, { "epoch": 0.55, "learning_rate": 8.981556455240666e-06, "loss": 0.1286, "step": 6160 }, { "epoch": 0.55, "learning_rate": 8.979757085020243e-06, "loss": 0.0823, "step": 6170 }, { "epoch": 0.55, "learning_rate": 8.97795771479982e-06, "loss": 0.0489, "step": 6180 }, { "epoch": 0.55, "learning_rate": 8.976158344579397e-06, "loss": 0.0605, "step": 6190 }, { "epoch": 0.55, "learning_rate": 8.974358974358976e-06, "loss": 0.054, "step": 6200 }, { "epoch": 0.55, "learning_rate": 8.972559604138552e-06, "loss": 0.1616, "step": 6210 }, { "epoch": 0.55, "learning_rate": 8.97076023391813e-06, "loss": 0.0963, "step": 6220 }, { "epoch": 0.56, "learning_rate": 8.968960863697706e-06, "loss": 0.1245, "step": 6230 }, { "epoch": 0.56, "learning_rate": 8.967161493477283e-06, "loss": 0.092, "step": 6240 }, { "epoch": 0.56, "learning_rate": 8.965362123256862e-06, "loss": 0.0728, "step": 6250 }, { "epoch": 0.56, "learning_rate": 8.963562753036438e-06, "loss": 0.1138, "step": 6260 }, { "epoch": 0.56, "learning_rate": 8.961763382816015e-06, "loss": 0.1143, "step": 6270 }, { "epoch": 0.56, "learning_rate": 8.959964012595592e-06, "loss": 0.0954, "step": 6280 }, { "epoch": 0.56, "learning_rate": 8.958164642375169e-06, "loss": 0.0606, "step": 6290 }, { "epoch": 0.56, "learning_rate": 8.956365272154746e-06, "loss": 0.0667, "step": 6300 }, { "epoch": 0.56, "learning_rate": 8.954565901934323e-06, "loss": 0.0505, "step": 6310 }, { "epoch": 0.56, "learning_rate": 8.952766531713901e-06, "loss": 0.1699, "step": 6320 }, { "epoch": 0.56, "learning_rate": 8.950967161493478e-06, "loss": 0.0568, "step": 6330 }, { "epoch": 0.57, "learning_rate": 8.949167791273055e-06, "loss": 0.1413, "step": 6340 }, { "epoch": 0.57, "learning_rate": 8.947368421052632e-06, "loss": 0.0903, "step": 6350 }, { "epoch": 0.57, "learning_rate": 8.945569050832209e-06, "loss": 0.1166, "step": 6360 }, { "epoch": 0.57, "learning_rate": 8.943769680611787e-06, "loss": 0.0781, "step": 6370 }, { "epoch": 0.57, "learning_rate": 8.941970310391364e-06, "loss": 0.0385, "step": 6380 }, { "epoch": 0.57, "learning_rate": 8.940170940170941e-06, "loss": 0.0508, "step": 6390 }, { "epoch": 0.57, "learning_rate": 8.938371569950518e-06, "loss": 0.0893, "step": 6400 }, { "epoch": 0.57, "learning_rate": 8.936572199730095e-06, "loss": 0.1399, "step": 6410 }, { "epoch": 0.57, "learning_rate": 8.934772829509673e-06, "loss": 0.0252, "step": 6420 }, { "epoch": 0.57, "learning_rate": 8.932973459289248e-06, "loss": 0.0843, "step": 6430 }, { "epoch": 0.57, "learning_rate": 8.931174089068827e-06, "loss": 0.0468, "step": 6440 }, { "epoch": 0.58, "learning_rate": 8.929374718848404e-06, "loss": 0.0843, "step": 6450 }, { "epoch": 0.58, "learning_rate": 8.92757534862798e-06, "loss": 0.0478, "step": 6460 }, { "epoch": 0.58, "learning_rate": 8.925775978407558e-06, "loss": 0.1855, "step": 6470 }, { "epoch": 0.58, "learning_rate": 8.923976608187134e-06, "loss": 0.033, "step": 6480 }, { "epoch": 0.58, "learning_rate": 8.922177237966713e-06, "loss": 0.1028, "step": 6490 }, { "epoch": 0.58, "learning_rate": 8.92037786774629e-06, "loss": 0.1134, "step": 6500 }, { "epoch": 0.58, "learning_rate": 8.918578497525867e-06, "loss": 0.0714, "step": 6510 }, { "epoch": 0.58, "learning_rate": 8.916779127305444e-06, "loss": 0.1058, "step": 6520 }, { "epoch": 0.58, "learning_rate": 8.91497975708502e-06, "loss": 0.0672, "step": 6530 }, { "epoch": 0.58, "learning_rate": 8.913180386864599e-06, "loss": 0.1081, "step": 6540 }, { "epoch": 0.58, "learning_rate": 8.911381016644174e-06, "loss": 0.0558, "step": 6550 }, { "epoch": 0.58, "learning_rate": 8.909581646423753e-06, "loss": 0.1227, "step": 6560 }, { "epoch": 0.59, "learning_rate": 8.90778227620333e-06, "loss": 0.1211, "step": 6570 }, { "epoch": 0.59, "learning_rate": 8.905982905982906e-06, "loss": 0.0534, "step": 6580 }, { "epoch": 0.59, "learning_rate": 8.904183535762483e-06, "loss": 0.1179, "step": 6590 }, { "epoch": 0.59, "learning_rate": 8.90238416554206e-06, "loss": 0.1224, "step": 6600 }, { "epoch": 0.59, "learning_rate": 8.900584795321639e-06, "loss": 0.0878, "step": 6610 }, { "epoch": 0.59, "learning_rate": 8.898785425101216e-06, "loss": 0.12, "step": 6620 }, { "epoch": 0.59, "learning_rate": 8.896986054880792e-06, "loss": 0.1773, "step": 6630 }, { "epoch": 0.59, "learning_rate": 8.89518668466037e-06, "loss": 0.0991, "step": 6640 }, { "epoch": 0.59, "learning_rate": 8.893387314439946e-06, "loss": 0.1262, "step": 6650 }, { "epoch": 0.59, "learning_rate": 8.891587944219525e-06, "loss": 0.0043, "step": 6660 }, { "epoch": 0.59, "learning_rate": 8.889788573999102e-06, "loss": 0.0821, "step": 6670 }, { "epoch": 0.6, "learning_rate": 8.887989203778678e-06, "loss": 0.212, "step": 6680 }, { "epoch": 0.6, "learning_rate": 8.886189833558255e-06, "loss": 0.0983, "step": 6690 }, { "epoch": 0.6, "learning_rate": 8.884390463337832e-06, "loss": 0.1402, "step": 6700 }, { "epoch": 0.6, "learning_rate": 8.882591093117409e-06, "loss": 0.159, "step": 6710 }, { "epoch": 0.6, "learning_rate": 8.880791722896986e-06, "loss": 0.1258, "step": 6720 }, { "epoch": 0.6, "learning_rate": 8.878992352676564e-06, "loss": 0.0954, "step": 6730 }, { "epoch": 0.6, "learning_rate": 8.877192982456141e-06, "loss": 0.0412, "step": 6740 }, { "epoch": 0.6, "learning_rate": 8.875393612235718e-06, "loss": 0.078, "step": 6750 }, { "epoch": 0.6, "learning_rate": 8.873594242015295e-06, "loss": 0.1009, "step": 6760 }, { "epoch": 0.6, "learning_rate": 8.871794871794872e-06, "loss": 0.099, "step": 6770 }, { "epoch": 0.6, "learning_rate": 8.86999550157445e-06, "loss": 0.0313, "step": 6780 }, { "epoch": 0.61, "learning_rate": 8.868196131354027e-06, "loss": 0.0375, "step": 6790 }, { "epoch": 0.61, "learning_rate": 8.866396761133604e-06, "loss": 0.0885, "step": 6800 }, { "epoch": 0.61, "learning_rate": 8.864597390913181e-06, "loss": 0.0761, "step": 6810 }, { "epoch": 0.61, "learning_rate": 8.862798020692758e-06, "loss": 0.0892, "step": 6820 }, { "epoch": 0.61, "learning_rate": 8.860998650472335e-06, "loss": 0.1141, "step": 6830 }, { "epoch": 0.61, "learning_rate": 8.859199280251912e-06, "loss": 0.0784, "step": 6840 }, { "epoch": 0.61, "learning_rate": 8.85739991003149e-06, "loss": 0.1206, "step": 6850 }, { "epoch": 0.61, "learning_rate": 8.855600539811067e-06, "loss": 0.054, "step": 6860 }, { "epoch": 0.61, "learning_rate": 8.853801169590644e-06, "loss": 0.2208, "step": 6870 }, { "epoch": 0.61, "learning_rate": 8.85200179937022e-06, "loss": 0.0689, "step": 6880 }, { "epoch": 0.61, "learning_rate": 8.850202429149798e-06, "loss": 0.0898, "step": 6890 }, { "epoch": 0.62, "learning_rate": 8.848403058929376e-06, "loss": 0.1601, "step": 6900 }, { "epoch": 0.62, "learning_rate": 8.846603688708953e-06, "loss": 0.0574, "step": 6910 }, { "epoch": 0.62, "learning_rate": 8.84480431848853e-06, "loss": 0.0564, "step": 6920 }, { "epoch": 0.62, "learning_rate": 8.843004948268107e-06, "loss": 0.0072, "step": 6930 }, { "epoch": 0.62, "learning_rate": 8.841205578047684e-06, "loss": 0.0368, "step": 6940 }, { "epoch": 0.62, "learning_rate": 8.839406207827262e-06, "loss": 0.1152, "step": 6950 }, { "epoch": 0.62, "learning_rate": 8.837606837606837e-06, "loss": 0.0871, "step": 6960 }, { "epoch": 0.62, "learning_rate": 8.835807467386416e-06, "loss": 0.0936, "step": 6970 }, { "epoch": 0.62, "learning_rate": 8.834008097165993e-06, "loss": 0.2966, "step": 6980 }, { "epoch": 0.62, "learning_rate": 8.83220872694557e-06, "loss": 0.1432, "step": 6990 }, { "epoch": 0.62, "learning_rate": 8.830409356725146e-06, "loss": 0.135, "step": 7000 }, { "epoch": 0.63, "learning_rate": 8.828609986504723e-06, "loss": 0.0894, "step": 7010 }, { "epoch": 0.63, "learning_rate": 8.826810616284302e-06, "loss": 0.1052, "step": 7020 }, { "epoch": 0.63, "learning_rate": 8.825011246063879e-06, "loss": 0.1165, "step": 7030 }, { "epoch": 0.63, "learning_rate": 8.823211875843456e-06, "loss": 0.0955, "step": 7040 }, { "epoch": 0.63, "learning_rate": 8.821412505623032e-06, "loss": 0.0356, "step": 7050 }, { "epoch": 0.63, "learning_rate": 8.81961313540261e-06, "loss": 0.143, "step": 7060 }, { "epoch": 0.63, "learning_rate": 8.817813765182188e-06, "loss": 0.0743, "step": 7070 }, { "epoch": 0.63, "learning_rate": 8.816014394961763e-06, "loss": 0.0865, "step": 7080 }, { "epoch": 0.63, "learning_rate": 8.814215024741342e-06, "loss": 0.1262, "step": 7090 }, { "epoch": 0.63, "learning_rate": 8.812415654520918e-06, "loss": 0.0891, "step": 7100 }, { "epoch": 0.63, "learning_rate": 8.810616284300495e-06, "loss": 0.1062, "step": 7110 }, { "epoch": 0.63, "learning_rate": 8.808816914080072e-06, "loss": 0.0651, "step": 7120 }, { "epoch": 0.64, "learning_rate": 8.807017543859649e-06, "loss": 0.1016, "step": 7130 }, { "epoch": 0.64, "learning_rate": 8.805218173639228e-06, "loss": 0.1007, "step": 7140 }, { "epoch": 0.64, "learning_rate": 8.803418803418804e-06, "loss": 0.0866, "step": 7150 }, { "epoch": 0.64, "learning_rate": 8.801619433198381e-06, "loss": 0.0866, "step": 7160 }, { "epoch": 0.64, "learning_rate": 8.799820062977958e-06, "loss": 0.0524, "step": 7170 }, { "epoch": 0.64, "learning_rate": 8.798020692757535e-06, "loss": 0.0092, "step": 7180 }, { "epoch": 0.64, "learning_rate": 8.796221322537114e-06, "loss": 0.0843, "step": 7190 }, { "epoch": 0.64, "learning_rate": 8.79442195231669e-06, "loss": 0.0962, "step": 7200 }, { "epoch": 0.64, "learning_rate": 8.792622582096267e-06, "loss": 0.0755, "step": 7210 }, { "epoch": 0.64, "learning_rate": 8.790823211875844e-06, "loss": 0.1832, "step": 7220 }, { "epoch": 0.64, "learning_rate": 8.789023841655421e-06, "loss": 0.1657, "step": 7230 }, { "epoch": 0.65, "learning_rate": 8.787224471434998e-06, "loss": 0.0969, "step": 7240 }, { "epoch": 0.65, "learning_rate": 8.785425101214575e-06, "loss": 0.1005, "step": 7250 }, { "epoch": 0.65, "learning_rate": 8.783625730994153e-06, "loss": 0.0896, "step": 7260 }, { "epoch": 0.65, "learning_rate": 8.78182636077373e-06, "loss": 0.0941, "step": 7270 }, { "epoch": 0.65, "learning_rate": 8.780026990553307e-06, "loss": 0.1214, "step": 7280 }, { "epoch": 0.65, "learning_rate": 8.778227620332884e-06, "loss": 0.1319, "step": 7290 }, { "epoch": 0.65, "learning_rate": 8.77642825011246e-06, "loss": 0.0806, "step": 7300 }, { "epoch": 0.65, "learning_rate": 8.77462887989204e-06, "loss": 0.1304, "step": 7310 }, { "epoch": 0.65, "learning_rate": 8.772829509671616e-06, "loss": 0.0723, "step": 7320 }, { "epoch": 0.65, "learning_rate": 8.771030139451193e-06, "loss": 0.176, "step": 7330 }, { "epoch": 0.65, "learning_rate": 8.76923076923077e-06, "loss": 0.0992, "step": 7340 }, { "epoch": 0.66, "learning_rate": 8.767431399010347e-06, "loss": 0.128, "step": 7350 }, { "epoch": 0.66, "learning_rate": 8.765632028789924e-06, "loss": 0.0585, "step": 7360 }, { "epoch": 0.66, "learning_rate": 8.7638326585695e-06, "loss": 0.2104, "step": 7370 }, { "epoch": 0.66, "learning_rate": 8.762033288349079e-06, "loss": 0.0491, "step": 7380 }, { "epoch": 0.66, "learning_rate": 8.760233918128656e-06, "loss": 0.1178, "step": 7390 }, { "epoch": 0.66, "learning_rate": 8.758434547908233e-06, "loss": 0.181, "step": 7400 }, { "epoch": 0.66, "learning_rate": 8.75663517768781e-06, "loss": 0.1329, "step": 7410 }, { "epoch": 0.66, "learning_rate": 8.754835807467386e-06, "loss": 0.1067, "step": 7420 }, { "epoch": 0.66, "learning_rate": 8.753036437246965e-06, "loss": 0.1344, "step": 7430 }, { "epoch": 0.66, "learning_rate": 8.751237067026542e-06, "loss": 0.0359, "step": 7440 }, { "epoch": 0.66, "learning_rate": 8.749437696806119e-06, "loss": 0.0587, "step": 7450 }, { "epoch": 0.67, "learning_rate": 8.747638326585696e-06, "loss": 0.0763, "step": 7460 }, { "epoch": 0.67, "learning_rate": 8.745838956365272e-06, "loss": 0.1267, "step": 7470 }, { "epoch": 0.67, "learning_rate": 8.744039586144851e-06, "loss": 0.0407, "step": 7480 }, { "epoch": 0.67, "learning_rate": 8.742240215924426e-06, "loss": 0.0741, "step": 7490 }, { "epoch": 0.67, "learning_rate": 8.740440845704005e-06, "loss": 0.0914, "step": 7500 }, { "epoch": 0.67, "eval_accuracy": 0.971293225959261, "eval_f1": 0.95092057664273, "eval_loss": 0.12410593777894974, "eval_precision": 0.9320491109229466, "eval_recall": 0.9705720268929792, "eval_runtime": 436.6013, "eval_samples_per_second": 72.526, "eval_steps_per_second": 4.535, "step": 7500 }, { "epoch": 0.67, "learning_rate": 8.738641475483582e-06, "loss": 0.1633, "step": 7510 }, { "epoch": 0.67, "learning_rate": 8.736842105263158e-06, "loss": 0.1843, "step": 7520 }, { "epoch": 0.67, "learning_rate": 8.735042735042735e-06, "loss": 0.1008, "step": 7530 }, { "epoch": 0.67, "learning_rate": 8.733243364822312e-06, "loss": 0.0962, "step": 7540 }, { "epoch": 0.67, "learning_rate": 8.73144399460189e-06, "loss": 0.0271, "step": 7550 }, { "epoch": 0.67, "learning_rate": 8.729644624381468e-06, "loss": 0.1435, "step": 7560 }, { "epoch": 0.67, "learning_rate": 8.727845254161044e-06, "loss": 0.0794, "step": 7570 }, { "epoch": 0.68, "learning_rate": 8.726045883940621e-06, "loss": 0.1544, "step": 7580 }, { "epoch": 0.68, "learning_rate": 8.724246513720198e-06, "loss": 0.0104, "step": 7590 }, { "epoch": 0.68, "learning_rate": 8.722447143499777e-06, "loss": 0.0687, "step": 7600 }, { "epoch": 0.68, "learning_rate": 8.720647773279352e-06, "loss": 0.1329, "step": 7610 }, { "epoch": 0.68, "learning_rate": 8.71884840305893e-06, "loss": 0.0721, "step": 7620 }, { "epoch": 0.68, "learning_rate": 8.717049032838507e-06, "loss": 0.1454, "step": 7630 }, { "epoch": 0.68, "learning_rate": 8.715249662618084e-06, "loss": 0.0498, "step": 7640 }, { "epoch": 0.68, "learning_rate": 8.713450292397661e-06, "loss": 0.1081, "step": 7650 }, { "epoch": 0.68, "learning_rate": 8.711650922177238e-06, "loss": 0.072, "step": 7660 }, { "epoch": 0.68, "learning_rate": 8.709851551956816e-06, "loss": 0.0765, "step": 7670 }, { "epoch": 0.68, "learning_rate": 8.708052181736393e-06, "loss": 0.1375, "step": 7680 }, { "epoch": 0.69, "learning_rate": 8.70625281151597e-06, "loss": 0.1537, "step": 7690 }, { "epoch": 0.69, "learning_rate": 8.704453441295547e-06, "loss": 0.097, "step": 7700 }, { "epoch": 0.69, "learning_rate": 8.702654071075124e-06, "loss": 0.1346, "step": 7710 }, { "epoch": 0.69, "learning_rate": 8.700854700854702e-06, "loss": 0.1619, "step": 7720 }, { "epoch": 0.69, "learning_rate": 8.69905533063428e-06, "loss": 0.1027, "step": 7730 }, { "epoch": 0.69, "learning_rate": 8.697255960413856e-06, "loss": 0.1377, "step": 7740 }, { "epoch": 0.69, "learning_rate": 8.695456590193433e-06, "loss": 0.0791, "step": 7750 }, { "epoch": 0.69, "learning_rate": 8.693657219973012e-06, "loss": 0.0814, "step": 7760 }, { "epoch": 0.69, "learning_rate": 8.691857849752587e-06, "loss": 0.078, "step": 7770 }, { "epoch": 0.69, "learning_rate": 8.690058479532164e-06, "loss": 0.0902, "step": 7780 }, { "epoch": 0.69, "learning_rate": 8.688259109311742e-06, "loss": 0.144, "step": 7790 }, { "epoch": 0.7, "learning_rate": 8.686459739091319e-06, "loss": 0.1267, "step": 7800 }, { "epoch": 0.7, "learning_rate": 8.684660368870896e-06, "loss": 0.0564, "step": 7810 }, { "epoch": 0.7, "learning_rate": 8.682860998650473e-06, "loss": 0.0782, "step": 7820 }, { "epoch": 0.7, "learning_rate": 8.68106162843005e-06, "loss": 0.1353, "step": 7830 }, { "epoch": 0.7, "learning_rate": 8.679262258209628e-06, "loss": 0.0428, "step": 7840 }, { "epoch": 0.7, "learning_rate": 8.677462887989205e-06, "loss": 0.0864, "step": 7850 }, { "epoch": 0.7, "learning_rate": 8.675663517768782e-06, "loss": 0.0717, "step": 7860 }, { "epoch": 0.7, "learning_rate": 8.673864147548359e-06, "loss": 0.1192, "step": 7870 }, { "epoch": 0.7, "learning_rate": 8.672064777327936e-06, "loss": 0.0728, "step": 7880 }, { "epoch": 0.7, "learning_rate": 8.670265407107512e-06, "loss": 0.0615, "step": 7890 }, { "epoch": 0.7, "learning_rate": 8.66846603688709e-06, "loss": 0.0884, "step": 7900 }, { "epoch": 0.71, "learning_rate": 8.666666666666668e-06, "loss": 0.1488, "step": 7910 }, { "epoch": 0.71, "learning_rate": 8.664867296446245e-06, "loss": 0.1203, "step": 7920 }, { "epoch": 0.71, "learning_rate": 8.663067926225822e-06, "loss": 0.0469, "step": 7930 }, { "epoch": 0.71, "learning_rate": 8.661268556005398e-06, "loss": 0.0475, "step": 7940 }, { "epoch": 0.71, "learning_rate": 8.659469185784975e-06, "loss": 0.1008, "step": 7950 }, { "epoch": 0.71, "learning_rate": 8.657669815564554e-06, "loss": 0.0544, "step": 7960 }, { "epoch": 0.71, "learning_rate": 8.65587044534413e-06, "loss": 0.1543, "step": 7970 }, { "epoch": 0.71, "learning_rate": 8.654071075123708e-06, "loss": 0.1327, "step": 7980 }, { "epoch": 0.71, "learning_rate": 8.652271704903284e-06, "loss": 0.3129, "step": 7990 }, { "epoch": 0.71, "learning_rate": 8.650472334682861e-06, "loss": 0.0302, "step": 8000 }, { "epoch": 0.71, "learning_rate": 8.64867296446244e-06, "loss": 0.1717, "step": 8010 }, { "epoch": 0.72, "learning_rate": 8.646873594242015e-06, "loss": 0.0878, "step": 8020 }, { "epoch": 0.72, "learning_rate": 8.645074224021594e-06, "loss": 0.0869, "step": 8030 }, { "epoch": 0.72, "learning_rate": 8.64327485380117e-06, "loss": 0.0919, "step": 8040 }, { "epoch": 0.72, "learning_rate": 8.641475483580747e-06, "loss": 0.058, "step": 8050 }, { "epoch": 0.72, "learning_rate": 8.639676113360324e-06, "loss": 0.1208, "step": 8060 }, { "epoch": 0.72, "learning_rate": 8.637876743139901e-06, "loss": 0.0707, "step": 8070 }, { "epoch": 0.72, "learning_rate": 8.63607737291948e-06, "loss": 0.0532, "step": 8080 }, { "epoch": 0.72, "learning_rate": 8.634278002699056e-06, "loss": 0.0889, "step": 8090 }, { "epoch": 0.72, "learning_rate": 8.632478632478633e-06, "loss": 0.0517, "step": 8100 }, { "epoch": 0.72, "learning_rate": 8.63067926225821e-06, "loss": 0.1343, "step": 8110 }, { "epoch": 0.72, "learning_rate": 8.628879892037787e-06, "loss": 0.0304, "step": 8120 }, { "epoch": 0.72, "learning_rate": 8.627080521817366e-06, "loss": 0.1531, "step": 8130 }, { "epoch": 0.73, "learning_rate": 8.62528115159694e-06, "loss": 0.1384, "step": 8140 }, { "epoch": 0.73, "learning_rate": 8.62348178137652e-06, "loss": 0.0572, "step": 8150 }, { "epoch": 0.73, "learning_rate": 8.621682411156096e-06, "loss": 0.043, "step": 8160 }, { "epoch": 0.73, "learning_rate": 8.619883040935673e-06, "loss": 0.0598, "step": 8170 }, { "epoch": 0.73, "learning_rate": 8.61808367071525e-06, "loss": 0.0271, "step": 8180 }, { "epoch": 0.73, "learning_rate": 8.616284300494827e-06, "loss": 0.0278, "step": 8190 }, { "epoch": 0.73, "learning_rate": 8.614484930274405e-06, "loss": 0.0878, "step": 8200 }, { "epoch": 0.73, "learning_rate": 8.612685560053982e-06, "loss": 0.1291, "step": 8210 }, { "epoch": 0.73, "learning_rate": 8.610886189833559e-06, "loss": 0.091, "step": 8220 }, { "epoch": 0.73, "learning_rate": 8.609086819613136e-06, "loss": 0.053, "step": 8230 }, { "epoch": 0.73, "learning_rate": 8.607287449392713e-06, "loss": 0.0576, "step": 8240 }, { "epoch": 0.74, "learning_rate": 8.605488079172291e-06, "loss": 0.1051, "step": 8250 }, { "epoch": 0.74, "learning_rate": 8.603688708951866e-06, "loss": 0.0692, "step": 8260 }, { "epoch": 0.74, "learning_rate": 8.601889338731445e-06, "loss": 0.1337, "step": 8270 }, { "epoch": 0.74, "learning_rate": 8.600089968511022e-06, "loss": 0.0394, "step": 8280 }, { "epoch": 0.74, "learning_rate": 8.598290598290599e-06, "loss": 0.066, "step": 8290 }, { "epoch": 0.74, "learning_rate": 8.596491228070176e-06, "loss": 0.0414, "step": 8300 }, { "epoch": 0.74, "learning_rate": 8.594691857849752e-06, "loss": 0.1076, "step": 8310 }, { "epoch": 0.74, "learning_rate": 8.592892487629331e-06, "loss": 0.1802, "step": 8320 }, { "epoch": 0.74, "learning_rate": 8.591093117408908e-06, "loss": 0.0185, "step": 8330 }, { "epoch": 0.74, "learning_rate": 8.589293747188485e-06, "loss": 0.0604, "step": 8340 }, { "epoch": 0.74, "learning_rate": 8.587494376968062e-06, "loss": 0.1542, "step": 8350 }, { "epoch": 0.75, "learning_rate": 8.585695006747638e-06, "loss": 0.1218, "step": 8360 }, { "epoch": 0.75, "learning_rate": 8.583895636527217e-06, "loss": 0.1441, "step": 8370 }, { "epoch": 0.75, "learning_rate": 8.582096266306794e-06, "loss": 0.0921, "step": 8380 }, { "epoch": 0.75, "learning_rate": 8.58029689608637e-06, "loss": 0.0408, "step": 8390 }, { "epoch": 0.75, "learning_rate": 8.578497525865948e-06, "loss": 0.0247, "step": 8400 }, { "epoch": 0.75, "learning_rate": 8.576698155645524e-06, "loss": 0.0663, "step": 8410 }, { "epoch": 0.75, "learning_rate": 8.574898785425101e-06, "loss": 0.148, "step": 8420 }, { "epoch": 0.75, "learning_rate": 8.573099415204678e-06, "loss": 0.0787, "step": 8430 }, { "epoch": 0.75, "learning_rate": 8.571300044984257e-06, "loss": 0.0468, "step": 8440 }, { "epoch": 0.75, "learning_rate": 8.569500674763834e-06, "loss": 0.1468, "step": 8450 }, { "epoch": 0.75, "learning_rate": 8.56770130454341e-06, "loss": 0.0841, "step": 8460 }, { "epoch": 0.76, "learning_rate": 8.565901934322987e-06, "loss": 0.0974, "step": 8470 }, { "epoch": 0.76, "learning_rate": 8.564102564102564e-06, "loss": 0.0644, "step": 8480 }, { "epoch": 0.76, "learning_rate": 8.562303193882143e-06, "loss": 0.055, "step": 8490 }, { "epoch": 0.76, "learning_rate": 8.56050382366172e-06, "loss": 0.0476, "step": 8500 }, { "epoch": 0.76, "learning_rate": 8.558704453441296e-06, "loss": 0.2432, "step": 8510 }, { "epoch": 0.76, "learning_rate": 8.556905083220873e-06, "loss": 0.1153, "step": 8520 }, { "epoch": 0.76, "learning_rate": 8.55510571300045e-06, "loss": 0.0691, "step": 8530 }, { "epoch": 0.76, "learning_rate": 8.553306342780027e-06, "loss": 0.0634, "step": 8540 }, { "epoch": 0.76, "learning_rate": 8.551506972559604e-06, "loss": 0.1217, "step": 8550 }, { "epoch": 0.76, "learning_rate": 8.549707602339182e-06, "loss": 0.15, "step": 8560 }, { "epoch": 0.76, "learning_rate": 8.54790823211876e-06, "loss": 0.1146, "step": 8570 }, { "epoch": 0.77, "learning_rate": 8.546108861898336e-06, "loss": 0.0232, "step": 8580 }, { "epoch": 0.77, "learning_rate": 8.544309491677913e-06, "loss": 0.1652, "step": 8590 }, { "epoch": 0.77, "learning_rate": 8.54251012145749e-06, "loss": 0.1147, "step": 8600 }, { "epoch": 0.77, "learning_rate": 8.540710751237068e-06, "loss": 0.1156, "step": 8610 }, { "epoch": 0.77, "learning_rate": 8.538911381016645e-06, "loss": 0.0725, "step": 8620 }, { "epoch": 0.77, "learning_rate": 8.537112010796222e-06, "loss": 0.0058, "step": 8630 }, { "epoch": 0.77, "learning_rate": 8.535312640575799e-06, "loss": 0.1656, "step": 8640 }, { "epoch": 0.77, "learning_rate": 8.533513270355376e-06, "loss": 0.0576, "step": 8650 }, { "epoch": 0.77, "learning_rate": 8.531713900134954e-06, "loss": 0.1474, "step": 8660 }, { "epoch": 0.77, "learning_rate": 8.52991452991453e-06, "loss": 0.1511, "step": 8670 }, { "epoch": 0.77, "learning_rate": 8.528115159694108e-06, "loss": 0.1008, "step": 8680 }, { "epoch": 0.77, "learning_rate": 8.526315789473685e-06, "loss": 0.0811, "step": 8690 }, { "epoch": 0.78, "learning_rate": 8.524516419253262e-06, "loss": 0.0772, "step": 8700 }, { "epoch": 0.78, "learning_rate": 8.522717049032839e-06, "loss": 0.0955, "step": 8710 }, { "epoch": 0.78, "learning_rate": 8.520917678812416e-06, "loss": 0.0719, "step": 8720 }, { "epoch": 0.78, "learning_rate": 8.519118308591994e-06, "loss": 0.1383, "step": 8730 }, { "epoch": 0.78, "learning_rate": 8.517318938371571e-06, "loss": 0.0035, "step": 8740 }, { "epoch": 0.78, "learning_rate": 8.515519568151148e-06, "loss": 0.1452, "step": 8750 }, { "epoch": 0.78, "learning_rate": 8.513720197930725e-06, "loss": 0.1435, "step": 8760 }, { "epoch": 0.78, "learning_rate": 8.511920827710302e-06, "loss": 0.1142, "step": 8770 }, { "epoch": 0.78, "learning_rate": 8.51012145748988e-06, "loss": 0.1441, "step": 8780 }, { "epoch": 0.78, "learning_rate": 8.508322087269455e-06, "loss": 0.179, "step": 8790 }, { "epoch": 0.78, "learning_rate": 8.506522717049034e-06, "loss": 0.0934, "step": 8800 }, { "epoch": 0.79, "learning_rate": 8.50472334682861e-06, "loss": 0.0647, "step": 8810 }, { "epoch": 0.79, "learning_rate": 8.502923976608188e-06, "loss": 0.0749, "step": 8820 }, { "epoch": 0.79, "learning_rate": 8.501124606387764e-06, "loss": 0.1038, "step": 8830 }, { "epoch": 0.79, "learning_rate": 8.499325236167341e-06, "loss": 0.0616, "step": 8840 }, { "epoch": 0.79, "learning_rate": 8.49752586594692e-06, "loss": 0.1128, "step": 8850 }, { "epoch": 0.79, "learning_rate": 8.495726495726497e-06, "loss": 0.1401, "step": 8860 }, { "epoch": 0.79, "learning_rate": 8.493927125506074e-06, "loss": 0.2143, "step": 8870 }, { "epoch": 0.79, "learning_rate": 8.49212775528565e-06, "loss": 0.0586, "step": 8880 }, { "epoch": 0.79, "learning_rate": 8.490328385065227e-06, "loss": 0.0917, "step": 8890 }, { "epoch": 0.79, "learning_rate": 8.488529014844806e-06, "loss": 0.1244, "step": 8900 }, { "epoch": 0.79, "learning_rate": 8.486729644624383e-06, "loss": 0.0877, "step": 8910 }, { "epoch": 0.8, "learning_rate": 8.48493027440396e-06, "loss": 0.0792, "step": 8920 }, { "epoch": 0.8, "learning_rate": 8.483130904183536e-06, "loss": 0.1058, "step": 8930 }, { "epoch": 0.8, "learning_rate": 8.481331533963115e-06, "loss": 0.1133, "step": 8940 }, { "epoch": 0.8, "learning_rate": 8.47953216374269e-06, "loss": 0.0392, "step": 8950 }, { "epoch": 0.8, "learning_rate": 8.477732793522267e-06, "loss": 0.1745, "step": 8960 }, { "epoch": 0.8, "learning_rate": 8.475933423301846e-06, "loss": 0.1019, "step": 8970 }, { "epoch": 0.8, "learning_rate": 8.474134053081422e-06, "loss": 0.166, "step": 8980 }, { "epoch": 0.8, "learning_rate": 8.472334682861e-06, "loss": 0.1927, "step": 8990 }, { "epoch": 0.8, "learning_rate": 8.470535312640576e-06, "loss": 0.1251, "step": 9000 }, { "epoch": 0.8, "eval_accuracy": 0.9701563240170535, "eval_f1": 0.9492399419885051, "eval_loss": 0.09671162813901901, "eval_precision": 0.9258172673931265, "eval_recall": 0.9738785407252287, "eval_runtime": 437.2054, "eval_samples_per_second": 72.426, "eval_steps_per_second": 4.529, "step": 9000 } ], "max_steps": 56075, "num_train_epochs": 5, "total_flos": 7.6243123003392e+16, "trial_name": null, "trial_params": null }