diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { - "best_metric": 0.9738785407252287, - "best_model_checkpoint": "drive/MyDrive/albertina/checkpoint-9000", - "epoch": 0.8024966562639322, - "global_step": 9000, + "best_metric": 0.9616444395459054, + "best_model_checkpoint": "drive/MyDrive/albertina/checkpoint-3000", + "epoch": 0.26749888542131073, + "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -10,5479 +10,1831 @@ { "epoch": 0.0, "learning_rate": 2.0000000000000002e-07, - "loss": 0.7187, + "loss": 0.1336, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.0000000000000003e-07, - "loss": 0.7199, + "loss": 0.1124, "step": 20 }, { "epoch": 0.0, "learning_rate": 6.000000000000001e-07, - "loss": 0.6146, + "loss": 0.1444, "step": 30 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-07, - "loss": 0.6195, + "loss": 0.0761, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, - "loss": 0.6245, + "loss": 0.1128, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.2000000000000002e-06, - "loss": 0.6241, + "loss": 0.0611, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.4000000000000001e-06, - "loss": 0.5657, + "loss": 0.1892, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.6000000000000001e-06, - "loss": 0.4948, + "loss": 0.0343, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.8000000000000001e-06, - "loss": 0.5167, + "loss": 0.0828, "step": 90 }, { "epoch": 0.01, "learning_rate": 2.0000000000000003e-06, - "loss": 0.4677, + "loss": 0.1669, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.2e-06, - "loss": 0.4529, + "loss": 0.092, "step": 110 }, { "epoch": 0.01, "learning_rate": 2.4000000000000003e-06, - "loss": 0.4148, + "loss": 0.0867, "step": 120 }, { "epoch": 0.01, "learning_rate": 2.6e-06, - "loss": 0.3799, + "loss": 0.0552, "step": 130 }, { "epoch": 0.01, "learning_rate": 2.8000000000000003e-06, - "loss": 0.3091, + "loss": 0.0261, "step": 140 }, { "epoch": 0.01, "learning_rate": 3e-06, - "loss": 0.3131, + "loss": 0.0743, "step": 150 }, { "epoch": 0.01, "learning_rate": 3.2000000000000003e-06, - "loss": 0.2855, + "loss": 0.0904, "step": 160 }, { "epoch": 0.02, "learning_rate": 3.4000000000000005e-06, - "loss": 0.2601, + "loss": 0.0438, "step": 170 }, { "epoch": 0.02, "learning_rate": 3.6000000000000003e-06, - "loss": 0.193, + "loss": 0.0075, "step": 180 }, { "epoch": 0.02, "learning_rate": 3.8000000000000005e-06, - "loss": 0.2041, + "loss": 0.0208, "step": 190 }, { "epoch": 0.02, "learning_rate": 4.000000000000001e-06, - "loss": 0.326, + "loss": 0.0486, "step": 200 }, { "epoch": 0.02, "learning_rate": 4.2000000000000004e-06, - "loss": 0.1665, + "loss": 0.2147, "step": 210 }, { "epoch": 0.02, "learning_rate": 4.4e-06, - "loss": 0.2885, + "loss": 0.0606, "step": 220 }, { "epoch": 0.02, "learning_rate": 4.600000000000001e-06, - "loss": 0.2399, + "loss": 0.1771, "step": 230 }, { "epoch": 0.02, "learning_rate": 4.800000000000001e-06, - "loss": 0.2109, + "loss": 0.1196, "step": 240 }, { "epoch": 0.02, "learning_rate": 5e-06, - "loss": 0.2259, + "loss": 0.1299, "step": 250 }, { "epoch": 0.02, "learning_rate": 5.2e-06, - "loss": 0.2323, + "loss": 0.0107, "step": 260 }, { "epoch": 0.02, "learning_rate": 5.400000000000001e-06, - "loss": 0.1955, + "loss": 0.1268, "step": 270 }, { "epoch": 0.02, "learning_rate": 5.600000000000001e-06, - "loss": 0.1765, + "loss": 0.1448, "step": 280 }, { "epoch": 0.03, "learning_rate": 5.8e-06, - "loss": 0.1722, + "loss": 0.0086, "step": 290 }, { "epoch": 0.03, "learning_rate": 6e-06, - "loss": 0.1648, + "loss": 0.0789, "step": 300 }, { "epoch": 0.03, "learning_rate": 6.200000000000001e-06, - "loss": 0.1617, + "loss": 0.0931, "step": 310 }, { "epoch": 0.03, "learning_rate": 6.4000000000000006e-06, - "loss": 0.1687, + "loss": 0.0277, "step": 320 }, { "epoch": 0.03, "learning_rate": 6.600000000000001e-06, - "loss": 0.1727, + "loss": 0.0705, "step": 330 }, { "epoch": 0.03, "learning_rate": 6.800000000000001e-06, - "loss": 0.2064, + "loss": 0.0323, "step": 340 }, { "epoch": 0.03, "learning_rate": 7e-06, - "loss": 0.1663, + "loss": 0.1415, "step": 350 }, { "epoch": 0.03, "learning_rate": 7.2000000000000005e-06, - "loss": 0.1032, + "loss": 0.0234, "step": 360 }, { "epoch": 0.03, "learning_rate": 7.4e-06, - "loss": 0.1592, + "loss": 0.0493, "step": 370 }, { "epoch": 0.03, "learning_rate": 7.600000000000001e-06, - "loss": 0.2606, + "loss": 0.0803, "step": 380 }, { "epoch": 0.03, "learning_rate": 7.800000000000002e-06, - "loss": 0.0709, + "loss": 0.0166, "step": 390 }, { "epoch": 0.04, "learning_rate": 8.000000000000001e-06, - "loss": 0.117, + "loss": 0.0832, "step": 400 }, { "epoch": 0.04, "learning_rate": 8.2e-06, - "loss": 0.1836, + "loss": 0.0722, "step": 410 }, { "epoch": 0.04, "learning_rate": 8.400000000000001e-06, - "loss": 0.0998, + "loss": 0.0077, "step": 420 }, { "epoch": 0.04, "learning_rate": 8.6e-06, - "loss": 0.1288, + "loss": 0.082, "step": 430 }, { "epoch": 0.04, "learning_rate": 8.8e-06, - "loss": 0.1877, + "loss": 0.0458, "step": 440 }, { "epoch": 0.04, "learning_rate": 9e-06, - "loss": 0.1695, + "loss": 0.0319, "step": 450 }, { "epoch": 0.04, "learning_rate": 9.200000000000002e-06, - "loss": 0.1736, + "loss": 0.096, "step": 460 }, { "epoch": 0.04, "learning_rate": 9.4e-06, - "loss": 0.195, + "loss": 0.0713, "step": 470 }, { "epoch": 0.04, "learning_rate": 9.600000000000001e-06, - "loss": 0.1599, + "loss": 0.1074, "step": 480 }, { "epoch": 0.04, "learning_rate": 9.800000000000001e-06, - "loss": 0.0596, + "loss": 0.0695, "step": 490 }, { "epoch": 0.04, "learning_rate": 1e-05, - "loss": 0.0599, + "loss": 0.0408, "step": 500 }, { "epoch": 0.05, "learning_rate": 9.998200629779578e-06, - "loss": 0.2157, + "loss": 0.0412, "step": 510 }, { "epoch": 0.05, "learning_rate": 9.996401259559155e-06, - "loss": 0.1006, + "loss": 0.092, "step": 520 }, { "epoch": 0.05, "learning_rate": 9.994601889338731e-06, - "loss": 0.2082, + "loss": 0.0777, "step": 530 }, { "epoch": 0.05, "learning_rate": 9.99280251911831e-06, - "loss": 0.2305, + "loss": 0.0442, "step": 540 }, { "epoch": 0.05, "learning_rate": 9.991003148897887e-06, - "loss": 0.1995, + "loss": 0.1633, "step": 550 }, { "epoch": 0.05, "learning_rate": 9.989203778677464e-06, - "loss": 0.1551, + "loss": 0.0985, "step": 560 }, { "epoch": 0.05, "learning_rate": 9.98740440845704e-06, - "loss": 0.0971, + "loss": 0.0819, "step": 570 }, { "epoch": 0.05, "learning_rate": 9.985605038236617e-06, - "loss": 0.2007, + "loss": 0.1122, "step": 580 }, { "epoch": 0.05, "learning_rate": 9.983805668016196e-06, - "loss": 0.1312, + "loss": 0.0936, "step": 590 }, { "epoch": 0.05, "learning_rate": 9.982006297795773e-06, - "loss": 0.1931, + "loss": 0.0693, "step": 600 }, { "epoch": 0.05, "learning_rate": 9.98020692757535e-06, - "loss": 0.223, + "loss": 0.0854, "step": 610 }, { "epoch": 0.06, "learning_rate": 9.978407557354927e-06, - "loss": 0.3252, + "loss": 0.137, "step": 620 }, { "epoch": 0.06, "learning_rate": 9.976608187134503e-06, - "loss": 0.0981, + "loss": 0.0019, "step": 630 }, { "epoch": 0.06, "learning_rate": 9.97480881691408e-06, - "loss": 0.1815, + "loss": 0.1362, "step": 640 }, { "epoch": 0.06, "learning_rate": 9.973009446693657e-06, - "loss": 0.1782, + "loss": 0.0923, "step": 650 }, { "epoch": 0.06, "learning_rate": 9.971210076473236e-06, - "loss": 0.1804, + "loss": 0.0557, "step": 660 }, { "epoch": 0.06, "learning_rate": 9.969410706252813e-06, - "loss": 0.1271, + "loss": 0.0505, "step": 670 }, { "epoch": 0.06, "learning_rate": 9.96761133603239e-06, - "loss": 0.1316, + "loss": 0.0414, "step": 680 }, { "epoch": 0.06, "learning_rate": 9.965811965811966e-06, - "loss": 0.2092, + "loss": 0.074, "step": 690 }, { "epoch": 0.06, "learning_rate": 9.964012595591543e-06, - "loss": 0.0782, + "loss": 0.1012, "step": 700 }, { "epoch": 0.06, "learning_rate": 9.962213225371122e-06, - "loss": 0.2433, + "loss": 0.0792, "step": 710 }, { "epoch": 0.06, "learning_rate": 9.960413855150699e-06, - "loss": 0.1883, + "loss": 0.0928, "step": 720 }, { "epoch": 0.07, "learning_rate": 9.958614484930275e-06, - "loss": 0.032, + "loss": 0.0312, "step": 730 }, { "epoch": 0.07, "learning_rate": 9.956815114709852e-06, - "loss": 0.2181, + "loss": 0.0529, "step": 740 }, { "epoch": 0.07, "learning_rate": 9.955015744489429e-06, - "loss": 0.1828, + "loss": 0.0793, "step": 750 }, { "epoch": 0.07, "learning_rate": 9.953216374269008e-06, - "loss": 0.1206, + "loss": 0.0597, "step": 760 }, { "epoch": 0.07, "learning_rate": 9.951417004048583e-06, - "loss": 0.1253, + "loss": 0.1541, "step": 770 }, { "epoch": 0.07, "learning_rate": 9.949617633828161e-06, - "loss": 0.1047, + "loss": 0.087, "step": 780 }, { "epoch": 0.07, "learning_rate": 9.947818263607738e-06, - "loss": 0.1174, + "loss": 0.034, "step": 790 }, { "epoch": 0.07, "learning_rate": 9.946018893387315e-06, - "loss": 0.3025, + "loss": 0.0948, "step": 800 }, { "epoch": 0.07, "learning_rate": 9.944219523166892e-06, - "loss": 0.0581, + "loss": 0.0078, "step": 810 }, { "epoch": 0.07, "learning_rate": 9.942420152946469e-06, - "loss": 0.2664, + "loss": 0.0797, "step": 820 }, { "epoch": 0.07, "learning_rate": 9.940620782726047e-06, - "loss": 0.1218, + "loss": 0.0859, "step": 830 }, { "epoch": 0.07, "learning_rate": 9.938821412505624e-06, - "loss": 0.1708, + "loss": 0.1256, "step": 840 }, { "epoch": 0.08, "learning_rate": 9.937022042285201e-06, - "loss": 0.1626, + "loss": 0.0775, "step": 850 }, { "epoch": 0.08, "learning_rate": 9.935222672064778e-06, - "loss": 0.0553, + "loss": 0.0539, "step": 860 }, { "epoch": 0.08, "learning_rate": 9.933423301844355e-06, - "loss": 0.1209, + "loss": 0.0795, "step": 870 }, { "epoch": 0.08, "learning_rate": 9.931623931623933e-06, - "loss": 0.11, + "loss": 0.0099, "step": 880 }, { "epoch": 0.08, "learning_rate": 9.929824561403509e-06, - "loss": 0.0945, + "loss": 0.0253, "step": 890 }, { "epoch": 0.08, "learning_rate": 9.928025191183087e-06, - "loss": 0.2105, + "loss": 0.0442, "step": 900 }, { "epoch": 0.08, "learning_rate": 9.926225820962664e-06, - "loss": 0.1548, + "loss": 0.1785, "step": 910 }, { "epoch": 0.08, "learning_rate": 9.92442645074224e-06, - "loss": 0.1819, + "loss": 0.1241, "step": 920 }, { "epoch": 0.08, "learning_rate": 9.922627080521818e-06, - "loss": 0.1461, + "loss": 0.1332, "step": 930 }, { "epoch": 0.08, "learning_rate": 9.920827710301395e-06, - "loss": 0.1917, + "loss": 0.0407, "step": 940 }, { "epoch": 0.08, "learning_rate": 9.919028340080973e-06, - "loss": 0.0796, + "loss": 0.0546, "step": 950 }, { "epoch": 0.09, "learning_rate": 9.91722896986055e-06, - "loss": 0.1768, + "loss": 0.1157, "step": 960 }, { "epoch": 0.09, "learning_rate": 9.915429599640127e-06, - "loss": 0.1726, + "loss": 0.0697, "step": 970 }, { "epoch": 0.09, "learning_rate": 9.913630229419704e-06, - "loss": 0.1244, + "loss": 0.1253, "step": 980 }, { "epoch": 0.09, "learning_rate": 9.91183085919928e-06, - "loss": 0.0765, + "loss": 0.0205, "step": 990 }, { "epoch": 0.09, "learning_rate": 9.910031488978859e-06, - "loss": 0.1842, + "loss": 0.1289, "step": 1000 }, { "epoch": 0.09, "learning_rate": 9.908232118758436e-06, - "loss": 0.1192, + "loss": 0.0956, "step": 1010 }, { "epoch": 0.09, "learning_rate": 9.906432748538013e-06, - "loss": 0.132, + "loss": 0.0972, "step": 1020 }, { "epoch": 0.09, "learning_rate": 9.90463337831759e-06, - "loss": 0.1347, + "loss": 0.0199, "step": 1030 }, { "epoch": 0.09, "learning_rate": 9.902834008097167e-06, - "loss": 0.0591, + "loss": 0.0263, "step": 1040 }, { "epoch": 0.09, "learning_rate": 9.901034637876743e-06, - "loss": 0.1588, + "loss": 0.0388, "step": 1050 }, { "epoch": 0.09, "learning_rate": 9.89923526765632e-06, - "loss": 0.1273, + "loss": 0.119, "step": 1060 }, { "epoch": 0.1, "learning_rate": 9.897435897435899e-06, - "loss": 0.0527, + "loss": 0.032, "step": 1070 }, { "epoch": 0.1, "learning_rate": 9.895636527215476e-06, - "loss": 0.1438, + "loss": 0.0877, "step": 1080 }, { "epoch": 0.1, "learning_rate": 9.893837156995053e-06, - "loss": 0.1085, + "loss": 0.0679, "step": 1090 }, { "epoch": 0.1, "learning_rate": 9.89203778677463e-06, - "loss": 0.0793, + "loss": 0.0273, "step": 1100 }, { "epoch": 0.1, "learning_rate": 9.890238416554206e-06, - "loss": 0.1617, + "loss": 0.048, "step": 1110 }, { "epoch": 0.1, "learning_rate": 9.888439046333785e-06, - "loss": 0.0164, + "loss": 0.0054, "step": 1120 }, { "epoch": 0.1, "learning_rate": 9.886639676113362e-06, - "loss": 0.2053, + "loss": 0.0812, "step": 1130 }, { "epoch": 0.1, "learning_rate": 9.884840305892939e-06, - "loss": 0.1774, + "loss": 0.0903, "step": 1140 }, { "epoch": 0.1, "learning_rate": 9.883040935672515e-06, - "loss": 0.1195, + "loss": 0.0092, "step": 1150 }, { "epoch": 0.1, "learning_rate": 9.881241565452092e-06, - "loss": 0.0487, + "loss": 0.0013, "step": 1160 }, { "epoch": 0.1, "learning_rate": 9.879442195231669e-06, - "loss": 0.1629, + "loss": 0.165, "step": 1170 }, { "epoch": 0.11, "learning_rate": 9.877642825011246e-06, - "loss": 0.0964, + "loss": 0.0509, "step": 1180 }, { "epoch": 0.11, "learning_rate": 9.875843454790825e-06, - "loss": 0.1443, + "loss": 0.0653, "step": 1190 }, { "epoch": 0.11, "learning_rate": 9.874044084570401e-06, - "loss": 0.1089, + "loss": 0.0253, "step": 1200 }, { "epoch": 0.11, "learning_rate": 9.872244714349978e-06, - "loss": 0.0233, + "loss": 0.0375, "step": 1210 }, { "epoch": 0.11, "learning_rate": 9.870445344129555e-06, - "loss": 0.0712, + "loss": 0.0689, "step": 1220 }, { "epoch": 0.11, "learning_rate": 9.868645973909132e-06, - "loss": 0.2537, + "loss": 0.0674, "step": 1230 }, { "epoch": 0.11, "learning_rate": 9.86684660368871e-06, - "loss": 0.2621, + "loss": 0.1405, "step": 1240 }, { "epoch": 0.11, "learning_rate": 9.865047233468287e-06, - "loss": 0.1302, + "loss": 0.0604, "step": 1250 }, { "epoch": 0.11, "learning_rate": 9.863247863247864e-06, - "loss": 0.136, + "loss": 0.0388, "step": 1260 }, { "epoch": 0.11, "learning_rate": 9.861448493027441e-06, - "loss": 0.146, + "loss": 0.0627, "step": 1270 }, { "epoch": 0.11, "learning_rate": 9.859649122807018e-06, - "loss": 0.0938, + "loss": 0.0909, "step": 1280 }, { "epoch": 0.12, "learning_rate": 9.857849752586597e-06, - "loss": 0.0718, + "loss": 0.0419, "step": 1290 }, { "epoch": 0.12, "learning_rate": 9.856050382366172e-06, - "loss": 0.1728, + "loss": 0.0019, "step": 1300 }, { "epoch": 0.12, "learning_rate": 9.85425101214575e-06, - "loss": 0.2543, + "loss": 0.1776, "step": 1310 }, { "epoch": 0.12, "learning_rate": 9.852451641925327e-06, - "loss": 0.1327, + "loss": 0.003, "step": 1320 }, { "epoch": 0.12, "learning_rate": 9.850652271704904e-06, - "loss": 0.1058, + "loss": 0.0764, "step": 1330 }, { "epoch": 0.12, "learning_rate": 9.84885290148448e-06, - "loss": 0.13, + "loss": 0.0753, "step": 1340 }, { "epoch": 0.12, "learning_rate": 9.847053531264058e-06, - "loss": 0.1322, + "loss": 0.0831, "step": 1350 }, { "epoch": 0.12, "learning_rate": 9.845254161043636e-06, - "loss": 0.1909, + "loss": 0.1177, "step": 1360 }, { "epoch": 0.12, "learning_rate": 9.843454790823213e-06, - "loss": 0.078, + "loss": 0.1527, "step": 1370 }, { "epoch": 0.12, "learning_rate": 9.84165542060279e-06, - "loss": 0.1836, + "loss": 0.1304, "step": 1380 }, { "epoch": 0.12, "learning_rate": 9.839856050382367e-06, - "loss": 0.1491, + "loss": 0.0945, "step": 1390 }, { "epoch": 0.12, "learning_rate": 9.838056680161944e-06, - "loss": 0.139, + "loss": 0.0516, "step": 1400 }, { "epoch": 0.13, "learning_rate": 9.836257309941522e-06, - "loss": 0.0737, + "loss": 0.0216, "step": 1410 }, { "epoch": 0.13, "learning_rate": 9.834457939721097e-06, - "loss": 0.1696, + "loss": 0.1259, "step": 1420 }, { "epoch": 0.13, "learning_rate": 9.832658569500676e-06, - "loss": 0.2328, + "loss": 0.0558, "step": 1430 }, { "epoch": 0.13, "learning_rate": 9.830859199280253e-06, - "loss": 0.1342, + "loss": 0.0711, "step": 1440 }, { "epoch": 0.13, "learning_rate": 9.82905982905983e-06, - "loss": 0.1243, + "loss": 0.1041, "step": 1450 }, { "epoch": 0.13, "learning_rate": 9.827260458839407e-06, - "loss": 0.1969, + "loss": 0.1173, "step": 1460 }, { "epoch": 0.13, "learning_rate": 9.825461088618983e-06, - "loss": 0.2298, + "loss": 0.1194, "step": 1470 }, { "epoch": 0.13, "learning_rate": 9.823661718398562e-06, - "loss": 0.1167, + "loss": 0.1103, "step": 1480 }, { "epoch": 0.13, "learning_rate": 9.821862348178139e-06, - "loss": 0.123, + "loss": 0.0481, "step": 1490 }, { "epoch": 0.13, "learning_rate": 9.820062977957716e-06, - "loss": 0.1128, + "loss": 0.0413, "step": 1500 }, { "epoch": 0.13, - "eval_accuracy": 0.9694931312174325, - "eval_f1": 0.94606967396159, - "eval_loss": 0.09655023366212845, - "eval_precision": 0.9585926009729607, - "eval_recall": 0.9338697233550094, - "eval_runtime": 436.9466, - "eval_samples_per_second": 72.469, - "eval_steps_per_second": 4.531, + "eval_accuracy": 0.9769145744512869, + "eval_f1": 0.9595305320267951, + "eval_loss": 0.07930105179548264, + "eval_precision": 0.9639599555061179, + "eval_recall": 0.955141629009148, + "eval_runtime": 436.6474, + "eval_samples_per_second": 72.518, + "eval_steps_per_second": 4.535, "step": 1500 }, { "epoch": 0.13, "learning_rate": 9.818263607737293e-06, - "loss": 0.0397, + "loss": 0.0825, "step": 1510 }, { "epoch": 0.14, "learning_rate": 9.81646423751687e-06, - "loss": 0.1572, + "loss": 0.0769, "step": 1520 }, { "epoch": 0.14, "learning_rate": 9.814664867296448e-06, - "loss": 0.1214, + "loss": 0.0901, "step": 1530 }, { "epoch": 0.14, "learning_rate": 9.812865497076025e-06, - "loss": 0.135, + "loss": 0.0237, "step": 1540 }, { "epoch": 0.14, "learning_rate": 9.811066126855602e-06, - "loss": 0.3403, + "loss": 0.1067, "step": 1550 }, { "epoch": 0.14, "learning_rate": 9.809266756635179e-06, - "loss": 0.0645, + "loss": 0.0536, "step": 1560 }, { "epoch": 0.14, "learning_rate": 9.807467386414755e-06, - "loss": 0.1162, + "loss": 0.0372, "step": 1570 }, { "epoch": 0.14, "learning_rate": 9.805668016194332e-06, - "loss": 0.065, + "loss": 0.075, "step": 1580 }, { "epoch": 0.14, "learning_rate": 9.803868645973909e-06, - "loss": 0.0923, + "loss": 0.0022, "step": 1590 }, { "epoch": 0.14, "learning_rate": 9.802069275753488e-06, - "loss": 0.2101, + "loss": 0.1077, "step": 1600 }, { "epoch": 0.14, "learning_rate": 9.800269905533065e-06, - "loss": 0.1123, + "loss": 0.019, "step": 1610 }, { "epoch": 0.14, "learning_rate": 9.798470535312641e-06, - "loss": 0.2323, + "loss": 0.0456, "step": 1620 }, { "epoch": 0.15, "learning_rate": 9.796671165092218e-06, - "loss": 0.0653, + "loss": 0.0074, "step": 1630 }, { "epoch": 0.15, "learning_rate": 9.794871794871795e-06, - "loss": 0.1639, + "loss": 0.0944, "step": 1640 }, { "epoch": 0.15, "learning_rate": 9.793072424651374e-06, - "loss": 0.0505, + "loss": 0.0055, "step": 1650 }, { "epoch": 0.15, "learning_rate": 9.79127305443095e-06, - "loss": 0.1409, + "loss": 0.0979, "step": 1660 }, { "epoch": 0.15, "learning_rate": 9.789473684210527e-06, - "loss": 0.1419, + "loss": 0.2072, "step": 1670 }, { "epoch": 0.15, "learning_rate": 9.787674313990104e-06, - "loss": 0.1625, + "loss": 0.1238, "step": 1680 }, { "epoch": 0.15, "learning_rate": 9.785874943769681e-06, - "loss": 0.0771, + "loss": 0.0366, "step": 1690 }, { "epoch": 0.15, "learning_rate": 9.784075573549258e-06, - "loss": 0.0929, + "loss": 0.0754, "step": 1700 }, { "epoch": 0.15, "learning_rate": 9.782276203328835e-06, - "loss": 0.095, + "loss": 0.042, "step": 1710 }, { "epoch": 0.15, "learning_rate": 9.780476833108413e-06, - "loss": 0.1673, + "loss": 0.0614, "step": 1720 }, { "epoch": 0.15, "learning_rate": 9.77867746288799e-06, - "loss": 0.1376, + "loss": 0.0572, "step": 1730 }, { "epoch": 0.16, "learning_rate": 9.776878092667567e-06, - "loss": 0.0783, + "loss": 0.0323, "step": 1740 }, { "epoch": 0.16, "learning_rate": 9.775078722447144e-06, - "loss": 0.1428, + "loss": 0.058, "step": 1750 }, { "epoch": 0.16, "learning_rate": 9.77327935222672e-06, - "loss": 0.0576, + "loss": 0.0715, "step": 1760 }, { "epoch": 0.16, "learning_rate": 9.7714799820063e-06, - "loss": 0.0739, + "loss": 0.0783, "step": 1770 }, { "epoch": 0.16, "learning_rate": 9.769680611785876e-06, - "loss": 0.09, + "loss": 0.0982, "step": 1780 }, { "epoch": 0.16, "learning_rate": 9.767881241565453e-06, - "loss": 0.0987, + "loss": 0.0858, "step": 1790 }, { "epoch": 0.16, "learning_rate": 9.76608187134503e-06, - "loss": 0.1613, + "loss": 0.0535, "step": 1800 }, { "epoch": 0.16, "learning_rate": 9.764282501124607e-06, - "loss": 0.058, + "loss": 0.0447, "step": 1810 }, { "epoch": 0.16, "learning_rate": 9.762483130904185e-06, - "loss": 0.218, + "loss": 0.0819, "step": 1820 }, { "epoch": 0.16, "learning_rate": 9.76068376068376e-06, - "loss": 0.1083, + "loss": 0.0565, "step": 1830 }, { "epoch": 0.16, "learning_rate": 9.758884390463339e-06, - "loss": 0.1274, + "loss": 0.0829, "step": 1840 }, { "epoch": 0.16, "learning_rate": 9.757085020242916e-06, - "loss": 0.1016, + "loss": 0.1505, "step": 1850 }, { "epoch": 0.17, "learning_rate": 9.755285650022493e-06, - "loss": 0.1442, + "loss": 0.1586, "step": 1860 }, { "epoch": 0.17, "learning_rate": 9.75348627980207e-06, - "loss": 0.0771, + "loss": 0.0098, "step": 1870 }, { "epoch": 0.17, "learning_rate": 9.751686909581647e-06, - "loss": 0.0832, + "loss": 0.1513, "step": 1880 }, { "epoch": 0.17, "learning_rate": 9.749887539361225e-06, - "loss": 0.1264, + "loss": 0.136, "step": 1890 }, { "epoch": 0.17, "learning_rate": 9.748088169140802e-06, - "loss": 0.1182, + "loss": 0.1072, "step": 1900 }, { "epoch": 0.17, "learning_rate": 9.746288798920379e-06, - "loss": 0.1582, + "loss": 0.0829, "step": 1910 }, { "epoch": 0.17, "learning_rate": 9.744489428699956e-06, - "loss": 0.1256, + "loss": 0.0647, "step": 1920 }, { "epoch": 0.17, "learning_rate": 9.742690058479533e-06, - "loss": 0.1081, + "loss": 0.035, "step": 1930 }, { "epoch": 0.17, "learning_rate": 9.740890688259111e-06, - "loss": 0.0922, + "loss": 0.077, "step": 1940 }, { "epoch": 0.17, "learning_rate": 9.739091318038686e-06, - "loss": 0.1155, + "loss": 0.0325, "step": 1950 }, { "epoch": 0.17, "learning_rate": 9.737291947818265e-06, - "loss": 0.0559, + "loss": 0.0155, "step": 1960 }, { "epoch": 0.18, "learning_rate": 9.735492577597842e-06, - "loss": 0.1008, + "loss": 0.041, "step": 1970 }, { "epoch": 0.18, "learning_rate": 9.733693207377419e-06, - "loss": 0.1123, + "loss": 0.1311, "step": 1980 }, { "epoch": 0.18, "learning_rate": 9.731893837156995e-06, - "loss": 0.1095, + "loss": 0.1448, "step": 1990 }, { "epoch": 0.18, "learning_rate": 9.730094466936572e-06, - "loss": 0.1269, + "loss": 0.1902, "step": 2000 }, { "epoch": 0.18, "learning_rate": 9.72829509671615e-06, - "loss": 0.1702, + "loss": 0.1521, "step": 2010 }, { "epoch": 0.18, "learning_rate": 9.726495726495728e-06, - "loss": 0.1762, + "loss": 0.2189, "step": 2020 }, { "epoch": 0.18, "learning_rate": 9.724696356275305e-06, - "loss": 0.0528, + "loss": 0.0697, "step": 2030 }, { "epoch": 0.18, "learning_rate": 9.722896986054881e-06, - "loss": 0.0912, + "loss": 0.0175, "step": 2040 }, { "epoch": 0.18, "learning_rate": 9.721097615834458e-06, - "loss": 0.0528, + "loss": 0.0511, "step": 2050 }, { "epoch": 0.18, "learning_rate": 9.719298245614037e-06, - "loss": 0.1277, + "loss": 0.1222, "step": 2060 }, { "epoch": 0.18, "learning_rate": 9.717498875393614e-06, - "loss": 0.1685, + "loss": 0.0637, "step": 2070 }, { "epoch": 0.19, "learning_rate": 9.71569950517319e-06, - "loss": 0.0825, + "loss": 0.0311, "step": 2080 }, { "epoch": 0.19, "learning_rate": 9.713900134952767e-06, - "loss": 0.1238, + "loss": 0.0493, "step": 2090 }, { "epoch": 0.19, "learning_rate": 9.712100764732344e-06, - "loss": 0.0815, + "loss": 0.0671, "step": 2100 }, { "epoch": 0.19, "learning_rate": 9.710301394511921e-06, - "loss": 0.0942, + "loss": 0.0948, "step": 2110 }, { "epoch": 0.19, "learning_rate": 9.708502024291498e-06, - "loss": 0.0789, + "loss": 0.0475, "step": 2120 }, { "epoch": 0.19, "learning_rate": 9.706702654071076e-06, - "loss": 0.1439, + "loss": 0.042, "step": 2130 }, { "epoch": 0.19, "learning_rate": 9.704903283850653e-06, - "loss": 0.1, + "loss": 0.0755, "step": 2140 }, { "epoch": 0.19, "learning_rate": 9.70310391363023e-06, - "loss": 0.0548, + "loss": 0.0164, "step": 2150 }, { "epoch": 0.19, "learning_rate": 9.701304543409807e-06, - "loss": 0.1491, + "loss": 0.0568, "step": 2160 }, { "epoch": 0.19, "learning_rate": 9.699505173189384e-06, - "loss": 0.063, + "loss": 0.0074, "step": 2170 }, { "epoch": 0.19, "learning_rate": 9.697705802968962e-06, - "loss": 0.2628, + "loss": 0.2288, "step": 2180 }, { "epoch": 0.2, "learning_rate": 9.69590643274854e-06, - "loss": 0.2376, + "loss": 0.1608, "step": 2190 }, { "epoch": 0.2, "learning_rate": 9.694107062528116e-06, - "loss": 0.1094, + "loss": 0.0774, "step": 2200 }, { "epoch": 0.2, "learning_rate": 9.692307692307693e-06, - "loss": 0.143, + "loss": 0.1041, "step": 2210 }, { "epoch": 0.2, "learning_rate": 9.69050832208727e-06, - "loss": 0.1503, + "loss": 0.0561, "step": 2220 }, { "epoch": 0.2, "learning_rate": 9.688708951866847e-06, - "loss": 0.1998, + "loss": 0.1579, "step": 2230 }, { "epoch": 0.2, "learning_rate": 9.686909581646424e-06, - "loss": 0.0649, + "loss": 0.0396, "step": 2240 }, { "epoch": 0.2, "learning_rate": 9.685110211426002e-06, - "loss": 0.024, + "loss": 0.0969, "step": 2250 }, { "epoch": 0.2, "learning_rate": 9.683310841205579e-06, - "loss": 0.119, + "loss": 0.0862, "step": 2260 }, { "epoch": 0.2, "learning_rate": 9.681511470985156e-06, - "loss": 0.228, + "loss": 0.1806, "step": 2270 }, { "epoch": 0.2, "learning_rate": 9.679712100764733e-06, - "loss": 0.1202, + "loss": 0.0316, "step": 2280 }, { "epoch": 0.2, "learning_rate": 9.67791273054431e-06, - "loss": 0.053, + "loss": 0.059, "step": 2290 }, { "epoch": 0.21, "learning_rate": 9.676113360323888e-06, - "loss": 0.1156, + "loss": 0.0042, "step": 2300 }, { "epoch": 0.21, "learning_rate": 9.674313990103465e-06, - "loss": 0.1197, + "loss": 0.0331, "step": 2310 }, { "epoch": 0.21, "learning_rate": 9.672514619883042e-06, - "loss": 0.0872, + "loss": 0.0941, "step": 2320 }, { "epoch": 0.21, "learning_rate": 9.670715249662619e-06, - "loss": 0.1937, + "loss": 0.0765, "step": 2330 }, { "epoch": 0.21, "learning_rate": 9.668915879442196e-06, - "loss": 0.172, + "loss": 0.1153, "step": 2340 }, { "epoch": 0.21, "learning_rate": 9.667116509221774e-06, - "loss": 0.1065, + "loss": 0.208, "step": 2350 }, { "epoch": 0.21, "learning_rate": 9.66531713900135e-06, - "loss": 0.12, + "loss": 0.1071, "step": 2360 }, { "epoch": 0.21, "learning_rate": 9.663517768780928e-06, - "loss": 0.094, + "loss": 0.0316, "step": 2370 }, { "epoch": 0.21, "learning_rate": 9.661718398560505e-06, - "loss": 0.0623, + "loss": 0.0204, "step": 2380 }, { "epoch": 0.21, "learning_rate": 9.659919028340082e-06, - "loss": 0.1098, + "loss": 0.0503, "step": 2390 }, { "epoch": 0.21, "learning_rate": 9.658119658119659e-06, - "loss": 0.1679, + "loss": 0.1018, "step": 2400 }, { "epoch": 0.21, "learning_rate": 9.656320287899235e-06, - "loss": 0.1222, + "loss": 0.0993, "step": 2410 }, { "epoch": 0.22, "learning_rate": 9.654520917678814e-06, - "loss": 0.0483, + "loss": 0.0304, "step": 2420 }, { "epoch": 0.22, "learning_rate": 9.65272154745839e-06, - "loss": 0.1289, + "loss": 0.0567, "step": 2430 }, { "epoch": 0.22, "learning_rate": 9.650922177237968e-06, - "loss": 0.0802, + "loss": 0.0308, "step": 2440 }, { "epoch": 0.22, "learning_rate": 9.649122807017545e-06, - "loss": 0.0691, + "loss": 0.0647, "step": 2450 }, { "epoch": 0.22, "learning_rate": 9.647323436797121e-06, - "loss": 0.2148, + "loss": 0.1115, "step": 2460 }, { "epoch": 0.22, "learning_rate": 9.6455240665767e-06, - "loss": 0.1005, + "loss": 0.0392, "step": 2470 }, { "epoch": 0.22, "learning_rate": 9.643724696356275e-06, - "loss": 0.1019, + "loss": 0.1018, "step": 2480 }, { "epoch": 0.22, "learning_rate": 9.641925326135854e-06, - "loss": 0.1362, + "loss": 0.0271, "step": 2490 }, { "epoch": 0.22, "learning_rate": 9.64012595591543e-06, - "loss": 0.0445, + "loss": 0.1548, "step": 2500 }, { "epoch": 0.22, "learning_rate": 9.638326585695007e-06, - "loss": 0.1021, + "loss": 0.0135, "step": 2510 }, { "epoch": 0.22, "learning_rate": 9.636527215474584e-06, - "loss": 0.2398, + "loss": 0.0794, "step": 2520 }, { "epoch": 0.23, "learning_rate": 9.634727845254161e-06, - "loss": 0.1228, + "loss": 0.0612, "step": 2530 }, { "epoch": 0.23, "learning_rate": 9.63292847503374e-06, - "loss": 0.1529, + "loss": 0.0375, "step": 2540 }, { "epoch": 0.23, "learning_rate": 9.631129104813316e-06, - "loss": 0.0813, + "loss": 0.0702, "step": 2550 }, { "epoch": 0.23, "learning_rate": 9.629329734592893e-06, - "loss": 0.0542, + "loss": 0.0353, "step": 2560 }, { "epoch": 0.23, "learning_rate": 9.62753036437247e-06, - "loss": 0.1951, + "loss": 0.0771, "step": 2570 }, { "epoch": 0.23, "learning_rate": 9.625730994152047e-06, - "loss": 0.0304, + "loss": 0.0078, "step": 2580 }, { "epoch": 0.23, "learning_rate": 9.623931623931626e-06, - "loss": 0.1102, + "loss": 0.0934, "step": 2590 }, { "epoch": 0.23, "learning_rate": 9.6221322537112e-06, - "loss": 0.0727, + "loss": 0.0922, "step": 2600 }, { "epoch": 0.23, "learning_rate": 9.62033288349078e-06, - "loss": 0.136, + "loss": 0.0676, "step": 2610 }, { "epoch": 0.23, "learning_rate": 9.618533513270356e-06, - "loss": 0.1733, + "loss": 0.1065, "step": 2620 }, { "epoch": 0.23, "learning_rate": 9.616734143049933e-06, - "loss": 0.043, + "loss": 0.0321, "step": 2630 }, { "epoch": 0.24, "learning_rate": 9.61493477282951e-06, - "loss": 0.1242, + "loss": 0.0563, "step": 2640 }, { "epoch": 0.24, "learning_rate": 9.613135402609087e-06, - "loss": 0.0807, + "loss": 0.1313, "step": 2650 }, { "epoch": 0.24, "learning_rate": 9.611336032388665e-06, - "loss": 0.084, + "loss": 0.0676, "step": 2660 }, { "epoch": 0.24, "learning_rate": 9.609536662168242e-06, - "loss": 0.0175, + "loss": 0.0306, "step": 2670 }, { "epoch": 0.24, "learning_rate": 9.607737291947819e-06, - "loss": 0.0686, + "loss": 0.0298, "step": 2680 }, { "epoch": 0.24, "learning_rate": 9.605937921727396e-06, - "loss": 0.0984, + "loss": 0.0494, "step": 2690 }, { "epoch": 0.24, "learning_rate": 9.604138551506973e-06, - "loss": 0.0717, + "loss": 0.024, "step": 2700 }, { "epoch": 0.24, "learning_rate": 9.602339181286551e-06, - "loss": 0.0323, + "loss": 0.0462, "step": 2710 }, { "epoch": 0.24, "learning_rate": 9.600539811066128e-06, - "loss": 0.1174, + "loss": 0.0931, "step": 2720 }, { "epoch": 0.24, "learning_rate": 9.598740440845705e-06, - "loss": 0.2252, + "loss": 0.1424, "step": 2730 }, { "epoch": 0.24, "learning_rate": 9.596941070625282e-06, - "loss": 0.1463, + "loss": 0.0485, "step": 2740 }, { "epoch": 0.25, "learning_rate": 9.595141700404859e-06, - "loss": 0.1296, + "loss": 0.1279, "step": 2750 }, { "epoch": 0.25, "learning_rate": 9.593342330184436e-06, - "loss": 0.0498, + "loss": 0.0226, "step": 2760 }, { "epoch": 0.25, "learning_rate": 9.591542959964013e-06, - "loss": 0.0488, + "loss": 0.0528, "step": 2770 }, { "epoch": 0.25, "learning_rate": 9.589743589743591e-06, - "loss": 0.1701, + "loss": 0.0527, "step": 2780 }, { "epoch": 0.25, "learning_rate": 9.587944219523168e-06, - "loss": 0.0986, + "loss": 0.0817, "step": 2790 }, { "epoch": 0.25, "learning_rate": 9.586144849302745e-06, - "loss": 0.0605, + "loss": 0.0079, "step": 2800 }, { "epoch": 0.25, "learning_rate": 9.584345479082322e-06, - "loss": 0.0779, + "loss": 0.1174, "step": 2810 }, { "epoch": 0.25, "learning_rate": 9.582546108861898e-06, - "loss": 0.1724, + "loss": 0.0392, "step": 2820 }, { "epoch": 0.25, "learning_rate": 9.580746738641477e-06, - "loss": 0.0229, + "loss": 0.0268, "step": 2830 }, { "epoch": 0.25, "learning_rate": 9.578947368421054e-06, - "loss": 0.1895, + "loss": 0.0946, "step": 2840 }, { "epoch": 0.25, "learning_rate": 9.57714799820063e-06, - "loss": 0.1627, + "loss": 0.0524, "step": 2850 }, { "epoch": 0.26, "learning_rate": 9.575348627980208e-06, - "loss": 0.061, + "loss": 0.0382, "step": 2860 }, { "epoch": 0.26, "learning_rate": 9.573549257759784e-06, - "loss": 0.0394, + "loss": 0.0014, "step": 2870 }, { "epoch": 0.26, "learning_rate": 9.571749887539361e-06, - "loss": 0.0101, + "loss": 0.0695, "step": 2880 }, { "epoch": 0.26, "learning_rate": 9.569950517318938e-06, - "loss": 0.1223, + "loss": 0.0376, "step": 2890 }, { "epoch": 0.26, "learning_rate": 9.568151147098517e-06, - "loss": 0.0859, + "loss": 0.04, "step": 2900 }, { "epoch": 0.26, "learning_rate": 9.566351776878094e-06, - "loss": 0.0245, + "loss": 0.0388, "step": 2910 }, { "epoch": 0.26, "learning_rate": 9.56455240665767e-06, - "loss": 0.077, + "loss": 0.0649, "step": 2920 }, { "epoch": 0.26, "learning_rate": 9.562753036437247e-06, - "loss": 0.1988, + "loss": 0.1104, "step": 2930 }, { "epoch": 0.26, "learning_rate": 9.560953666216824e-06, - "loss": 0.1803, + "loss": 0.1071, "step": 2940 }, { "epoch": 0.26, "learning_rate": 9.559154295996403e-06, - "loss": 0.0734, + "loss": 0.1165, "step": 2950 }, { "epoch": 0.26, "learning_rate": 9.55735492577598e-06, - "loss": 0.0309, + "loss": 0.0218, "step": 2960 }, { "epoch": 0.26, "learning_rate": 9.555555555555556e-06, - "loss": 0.0958, + "loss": 0.0353, "step": 2970 }, { "epoch": 0.27, "learning_rate": 9.553756185335133e-06, - "loss": 0.1951, + "loss": 0.0902, "step": 2980 }, { "epoch": 0.27, "learning_rate": 9.55195681511471e-06, - "loss": 0.0235, + "loss": 0.0277, "step": 2990 }, { "epoch": 0.27, "learning_rate": 9.550157444894289e-06, - "loss": 0.0854, + "loss": 0.0377, "step": 3000 }, { "epoch": 0.27, - "eval_accuracy": 0.9742618032528028, - "eval_f1": 0.9549798375959787, - "eval_loss": 0.1135290339589119, - "eval_precision": 0.9572535991140643, - "eval_recall": 0.9527168521988317, - "eval_runtime": 437.2717, - "eval_samples_per_second": 72.415, - "eval_steps_per_second": 4.528, + "eval_accuracy": 0.9766303489657351, + "eval_f1": 0.9593183067619572, + "eval_loss": 0.09248499572277069, + "eval_precision": 0.9570034002413075, + "eval_recall": 0.9616444395459054, + "eval_runtime": 436.5505, + "eval_samples_per_second": 72.535, + "eval_steps_per_second": 4.536, "step": 3000 - }, - { - "epoch": 0.27, - "learning_rate": 9.548358074673864e-06, - "loss": 0.0418, - "step": 3010 - }, - { - "epoch": 0.27, - "learning_rate": 9.546558704453442e-06, - "loss": 0.0694, - "step": 3020 - }, - { - "epoch": 0.27, - "learning_rate": 9.54475933423302e-06, - "loss": 0.048, - "step": 3030 - }, - { - "epoch": 0.27, - "learning_rate": 9.542959964012596e-06, - "loss": 0.1657, - "step": 3040 - }, - { - "epoch": 0.27, - "learning_rate": 9.541160593792173e-06, - "loss": 0.0778, - "step": 3050 - }, - { - "epoch": 0.27, - "learning_rate": 9.53936122357175e-06, - "loss": 0.1125, - "step": 3060 - }, - { - "epoch": 0.27, - "learning_rate": 9.537561853351328e-06, - "loss": 0.0662, - "step": 3070 - }, - { - "epoch": 0.27, - "learning_rate": 9.535762483130905e-06, - "loss": 0.1191, - "step": 3080 - }, - { - "epoch": 0.28, - "learning_rate": 9.533963112910482e-06, - "loss": 0.171, - "step": 3090 - }, - { - "epoch": 0.28, - "learning_rate": 9.532163742690059e-06, - "loss": 0.0703, - "step": 3100 - }, - { - "epoch": 0.28, - "learning_rate": 9.530364372469636e-06, - "loss": 0.138, - "step": 3110 - }, - { - "epoch": 0.28, - "learning_rate": 9.528565002249214e-06, - "loss": 0.1945, - "step": 3120 - }, - { - "epoch": 0.28, - "learning_rate": 9.52676563202879e-06, - "loss": 0.1241, - "step": 3130 - }, - { - "epoch": 0.28, - "learning_rate": 9.524966261808368e-06, - "loss": 0.1185, - "step": 3140 - }, - { - "epoch": 0.28, - "learning_rate": 9.523166891587945e-06, - "loss": 0.0739, - "step": 3150 - }, - { - "epoch": 0.28, - "learning_rate": 9.521367521367522e-06, - "loss": 0.0361, - "step": 3160 - }, - { - "epoch": 0.28, - "learning_rate": 9.519568151147099e-06, - "loss": 0.1103, - "step": 3170 - }, - { - "epoch": 0.28, - "learning_rate": 9.517768780926676e-06, - "loss": 0.1141, - "step": 3180 - }, - { - "epoch": 0.28, - "learning_rate": 9.515969410706254e-06, - "loss": 0.0741, - "step": 3190 - }, - { - "epoch": 0.29, - "learning_rate": 9.514170040485831e-06, - "loss": 0.0637, - "step": 3200 - }, - { - "epoch": 0.29, - "learning_rate": 9.512370670265408e-06, - "loss": 0.2084, - "step": 3210 - }, - { - "epoch": 0.29, - "learning_rate": 9.510571300044985e-06, - "loss": 0.1303, - "step": 3220 - }, - { - "epoch": 0.29, - "learning_rate": 9.508771929824562e-06, - "loss": 0.1449, - "step": 3230 - }, - { - "epoch": 0.29, - "learning_rate": 9.50697255960414e-06, - "loss": 0.0977, - "step": 3240 - }, - { - "epoch": 0.29, - "learning_rate": 9.505173189383717e-06, - "loss": 0.0754, - "step": 3250 - }, - { - "epoch": 0.29, - "learning_rate": 9.503373819163294e-06, - "loss": 0.0237, - "step": 3260 - }, - { - "epoch": 0.29, - "learning_rate": 9.50157444894287e-06, - "loss": 0.1629, - "step": 3270 - }, - { - "epoch": 0.29, - "learning_rate": 9.499775078722448e-06, - "loss": 0.1183, - "step": 3280 - }, - { - "epoch": 0.29, - "learning_rate": 9.497975708502024e-06, - "loss": 0.0365, - "step": 3290 - }, - { - "epoch": 0.29, - "learning_rate": 9.496176338281601e-06, - "loss": 0.0068, - "step": 3300 - }, - { - "epoch": 0.3, - "learning_rate": 9.49437696806118e-06, - "loss": 0.2428, - "step": 3310 - }, - { - "epoch": 0.3, - "learning_rate": 9.492577597840757e-06, - "loss": 0.0858, - "step": 3320 - }, - { - "epoch": 0.3, - "learning_rate": 9.490778227620334e-06, - "loss": 0.0579, - "step": 3330 - }, - { - "epoch": 0.3, - "learning_rate": 9.48897885739991e-06, - "loss": 0.0911, - "step": 3340 - }, - { - "epoch": 0.3, - "learning_rate": 9.487179487179487e-06, - "loss": 0.0851, - "step": 3350 - }, - { - "epoch": 0.3, - "learning_rate": 9.485380116959066e-06, - "loss": 0.1322, - "step": 3360 - }, - { - "epoch": 0.3, - "learning_rate": 9.483580746738643e-06, - "loss": 0.0657, - "step": 3370 - }, - { - "epoch": 0.3, - "learning_rate": 9.48178137651822e-06, - "loss": 0.0299, - "step": 3380 - }, - { - "epoch": 0.3, - "learning_rate": 9.479982006297796e-06, - "loss": 0.2368, - "step": 3390 - }, - { - "epoch": 0.3, - "learning_rate": 9.478182636077373e-06, - "loss": 0.064, - "step": 3400 - }, - { - "epoch": 0.3, - "learning_rate": 9.47638326585695e-06, - "loss": 0.1288, - "step": 3410 - }, - { - "epoch": 0.3, - "learning_rate": 9.474583895636527e-06, - "loss": 0.0765, - "step": 3420 - }, - { - "epoch": 0.31, - "learning_rate": 9.472784525416106e-06, - "loss": 0.1403, - "step": 3430 - }, - { - "epoch": 0.31, - "learning_rate": 9.470985155195682e-06, - "loss": 0.0662, - "step": 3440 - }, - { - "epoch": 0.31, - "learning_rate": 9.46918578497526e-06, - "loss": 0.1429, - "step": 3450 - }, - { - "epoch": 0.31, - "learning_rate": 9.467386414754836e-06, - "loss": 0.1111, - "step": 3460 - }, - { - "epoch": 0.31, - "learning_rate": 9.465587044534413e-06, - "loss": 0.1807, - "step": 3470 - }, - { - "epoch": 0.31, - "learning_rate": 9.463787674313992e-06, - "loss": 0.1044, - "step": 3480 - }, - { - "epoch": 0.31, - "learning_rate": 9.461988304093568e-06, - "loss": 0.0654, - "step": 3490 - }, - { - "epoch": 0.31, - "learning_rate": 9.460188933873145e-06, - "loss": 0.0255, - "step": 3500 - }, - { - "epoch": 0.31, - "learning_rate": 9.458389563652722e-06, - "loss": 0.1917, - "step": 3510 - }, - { - "epoch": 0.31, - "learning_rate": 9.456590193432299e-06, - "loss": 0.02, - "step": 3520 - }, - { - "epoch": 0.31, - "learning_rate": 9.454790823211878e-06, - "loss": 0.0694, - "step": 3530 - }, - { - "epoch": 0.32, - "learning_rate": 9.452991452991453e-06, - "loss": 0.1769, - "step": 3540 - }, - { - "epoch": 0.32, - "learning_rate": 9.451192082771031e-06, - "loss": 0.0723, - "step": 3550 - }, - { - "epoch": 0.32, - "learning_rate": 9.449392712550608e-06, - "loss": 0.1041, - "step": 3560 - }, - { - "epoch": 0.32, - "learning_rate": 9.447593342330185e-06, - "loss": 0.0711, - "step": 3570 - }, - { - "epoch": 0.32, - "learning_rate": 9.445793972109762e-06, - "loss": 0.0932, - "step": 3580 - }, - { - "epoch": 0.32, - "learning_rate": 9.443994601889339e-06, - "loss": 0.1224, - "step": 3590 - }, - { - "epoch": 0.32, - "learning_rate": 9.442195231668917e-06, - "loss": 0.0452, - "step": 3600 - }, - { - "epoch": 0.32, - "learning_rate": 9.440395861448494e-06, - "loss": 0.0763, - "step": 3610 - }, - { - "epoch": 0.32, - "learning_rate": 9.438596491228071e-06, - "loss": 0.0422, - "step": 3620 - }, - { - "epoch": 0.32, - "learning_rate": 9.436797121007648e-06, - "loss": 0.0184, - "step": 3630 - }, - { - "epoch": 0.32, - "learning_rate": 9.434997750787225e-06, - "loss": 0.0059, - "step": 3640 - }, - { - "epoch": 0.33, - "learning_rate": 9.433198380566803e-06, - "loss": 0.1277, - "step": 3650 - }, - { - "epoch": 0.33, - "learning_rate": 9.431399010346378e-06, - "loss": 0.0845, - "step": 3660 - }, - { - "epoch": 0.33, - "learning_rate": 9.429599640125957e-06, - "loss": 0.1707, - "step": 3670 - }, - { - "epoch": 0.33, - "learning_rate": 9.427800269905534e-06, - "loss": 0.1214, - "step": 3680 - }, - { - "epoch": 0.33, - "learning_rate": 9.42600089968511e-06, - "loss": 0.0493, - "step": 3690 - }, - { - "epoch": 0.33, - "learning_rate": 9.424201529464688e-06, - "loss": 0.1305, - "step": 3700 - }, - { - "epoch": 0.33, - "learning_rate": 9.422402159244264e-06, - "loss": 0.0809, - "step": 3710 - }, - { - "epoch": 0.33, - "learning_rate": 9.420602789023843e-06, - "loss": 0.0707, - "step": 3720 - }, - { - "epoch": 0.33, - "learning_rate": 9.41880341880342e-06, - "loss": 0.1068, - "step": 3730 - }, - { - "epoch": 0.33, - "learning_rate": 9.417004048582997e-06, - "loss": 0.0152, - "step": 3740 - }, - { - "epoch": 0.33, - "learning_rate": 9.415204678362574e-06, - "loss": 0.0773, - "step": 3750 - }, - { - "epoch": 0.34, - "learning_rate": 9.41340530814215e-06, - "loss": 0.072, - "step": 3760 - }, - { - "epoch": 0.34, - "learning_rate": 9.411605937921729e-06, - "loss": 0.1304, - "step": 3770 - }, - { - "epoch": 0.34, - "learning_rate": 9.409806567701306e-06, - "loss": 0.0825, - "step": 3780 - }, - { - "epoch": 0.34, - "learning_rate": 9.408007197480883e-06, - "loss": 0.1158, - "step": 3790 - }, - { - "epoch": 0.34, - "learning_rate": 9.40620782726046e-06, - "loss": 0.2465, - "step": 3800 - }, - { - "epoch": 0.34, - "learning_rate": 9.404408457040036e-06, - "loss": 0.2057, - "step": 3810 - }, - { - "epoch": 0.34, - "learning_rate": 9.402609086819613e-06, - "loss": 0.1457, - "step": 3820 - }, - { - "epoch": 0.34, - "learning_rate": 9.40080971659919e-06, - "loss": 0.0986, - "step": 3830 - }, - { - "epoch": 0.34, - "learning_rate": 9.399010346378769e-06, - "loss": 0.1613, - "step": 3840 - }, - { - "epoch": 0.34, - "learning_rate": 9.397210976158346e-06, - "loss": 0.0951, - "step": 3850 - }, - { - "epoch": 0.34, - "learning_rate": 9.395411605937922e-06, - "loss": 0.094, - "step": 3860 - }, - { - "epoch": 0.35, - "learning_rate": 9.3936122357175e-06, - "loss": 0.0467, - "step": 3870 - }, - { - "epoch": 0.35, - "learning_rate": 9.391812865497076e-06, - "loss": 0.1046, - "step": 3880 - }, - { - "epoch": 0.35, - "learning_rate": 9.390013495276655e-06, - "loss": 0.0671, - "step": 3890 - }, - { - "epoch": 0.35, - "learning_rate": 9.388214125056232e-06, - "loss": 0.0632, - "step": 3900 - }, - { - "epoch": 0.35, - "learning_rate": 9.386414754835808e-06, - "loss": 0.1289, - "step": 3910 - }, - { - "epoch": 0.35, - "learning_rate": 9.384615384615385e-06, - "loss": 0.0962, - "step": 3920 - }, - { - "epoch": 0.35, - "learning_rate": 9.382816014394962e-06, - "loss": 0.1108, - "step": 3930 - }, - { - "epoch": 0.35, - "learning_rate": 9.381016644174539e-06, - "loss": 0.1097, - "step": 3940 - }, - { - "epoch": 0.35, - "learning_rate": 9.379217273954116e-06, - "loss": 0.0337, - "step": 3950 - }, - { - "epoch": 0.35, - "learning_rate": 9.377417903733694e-06, - "loss": 0.0564, - "step": 3960 - }, - { - "epoch": 0.35, - "learning_rate": 9.375618533513271e-06, - "loss": 0.086, - "step": 3970 - }, - { - "epoch": 0.35, - "learning_rate": 9.373819163292848e-06, - "loss": 0.0716, - "step": 3980 - }, - { - "epoch": 0.36, - "learning_rate": 9.372019793072425e-06, - "loss": 0.0274, - "step": 3990 - }, - { - "epoch": 0.36, - "learning_rate": 9.370220422852002e-06, - "loss": 0.0945, - "step": 4000 - }, - { - "epoch": 0.36, - "learning_rate": 9.36842105263158e-06, - "loss": 0.2289, - "step": 4010 - }, - { - "epoch": 0.36, - "learning_rate": 9.366621682411157e-06, - "loss": 0.1001, - "step": 4020 - }, - { - "epoch": 0.36, - "learning_rate": 9.364822312190734e-06, - "loss": 0.127, - "step": 4030 - }, - { - "epoch": 0.36, - "learning_rate": 9.363022941970311e-06, - "loss": 0.0575, - "step": 4040 - }, - { - "epoch": 0.36, - "learning_rate": 9.361223571749888e-06, - "loss": 0.14, - "step": 4050 - }, - { - "epoch": 0.36, - "learning_rate": 9.359424201529466e-06, - "loss": 0.1394, - "step": 4060 - }, - { - "epoch": 0.36, - "learning_rate": 9.357624831309042e-06, - "loss": 0.0982, - "step": 4070 - }, - { - "epoch": 0.36, - "learning_rate": 9.35582546108862e-06, - "loss": 0.137, - "step": 4080 - }, - { - "epoch": 0.36, - "learning_rate": 9.354026090868197e-06, - "loss": 0.0678, - "step": 4090 - }, - { - "epoch": 0.37, - "learning_rate": 9.352226720647774e-06, - "loss": 0.1493, - "step": 4100 - }, - { - "epoch": 0.37, - "learning_rate": 9.35042735042735e-06, - "loss": 0.0452, - "step": 4110 - }, - { - "epoch": 0.37, - "learning_rate": 9.348627980206928e-06, - "loss": 0.0618, - "step": 4120 - }, - { - "epoch": 0.37, - "learning_rate": 9.346828609986506e-06, - "loss": 0.0688, - "step": 4130 - }, - { - "epoch": 0.37, - "learning_rate": 9.345029239766083e-06, - "loss": 0.0446, - "step": 4140 - }, - { - "epoch": 0.37, - "learning_rate": 9.34322986954566e-06, - "loss": 0.1038, - "step": 4150 - }, - { - "epoch": 0.37, - "learning_rate": 9.341430499325237e-06, - "loss": 0.0517, - "step": 4160 - }, - { - "epoch": 0.37, - "learning_rate": 9.339631129104814e-06, - "loss": 0.1752, - "step": 4170 - }, - { - "epoch": 0.37, - "learning_rate": 9.337831758884392e-06, - "loss": 0.0554, - "step": 4180 - }, - { - "epoch": 0.37, - "learning_rate": 9.336032388663967e-06, - "loss": 0.0795, - "step": 4190 - }, - { - "epoch": 0.37, - "learning_rate": 9.334233018443546e-06, - "loss": 0.0729, - "step": 4200 - }, - { - "epoch": 0.38, - "learning_rate": 9.332433648223123e-06, - "loss": 0.1994, - "step": 4210 - }, - { - "epoch": 0.38, - "learning_rate": 9.3306342780027e-06, - "loss": 0.1256, - "step": 4220 - }, - { - "epoch": 0.38, - "learning_rate": 9.328834907782276e-06, - "loss": 0.0416, - "step": 4230 - }, - { - "epoch": 0.38, - "learning_rate": 9.327035537561853e-06, - "loss": 0.0455, - "step": 4240 - }, - { - "epoch": 0.38, - "learning_rate": 9.325236167341432e-06, - "loss": 0.1276, - "step": 4250 - }, - { - "epoch": 0.38, - "learning_rate": 9.323436797121009e-06, - "loss": 0.1538, - "step": 4260 - }, - { - "epoch": 0.38, - "learning_rate": 9.321637426900586e-06, - "loss": 0.072, - "step": 4270 - }, - { - "epoch": 0.38, - "learning_rate": 9.319838056680162e-06, - "loss": 0.1352, - "step": 4280 - }, - { - "epoch": 0.38, - "learning_rate": 9.31803868645974e-06, - "loss": 0.0613, - "step": 4290 - }, - { - "epoch": 0.38, - "learning_rate": 9.316239316239318e-06, - "loss": 0.1611, - "step": 4300 - }, - { - "epoch": 0.38, - "learning_rate": 9.314439946018893e-06, - "loss": 0.0851, - "step": 4310 - }, - { - "epoch": 0.39, - "learning_rate": 9.312640575798472e-06, - "loss": 0.1095, - "step": 4320 - }, - { - "epoch": 0.39, - "learning_rate": 9.310841205578048e-06, - "loss": 0.1378, - "step": 4330 - }, - { - "epoch": 0.39, - "learning_rate": 9.309041835357625e-06, - "loss": 0.0981, - "step": 4340 - }, - { - "epoch": 0.39, - "learning_rate": 9.307242465137202e-06, - "loss": 0.0396, - "step": 4350 - }, - { - "epoch": 0.39, - "learning_rate": 9.305443094916779e-06, - "loss": 0.1012, - "step": 4360 - }, - { - "epoch": 0.39, - "learning_rate": 9.303643724696358e-06, - "loss": 0.1195, - "step": 4370 - }, - { - "epoch": 0.39, - "learning_rate": 9.301844354475934e-06, - "loss": 0.0449, - "step": 4380 - }, - { - "epoch": 0.39, - "learning_rate": 9.300044984255511e-06, - "loss": 0.0778, - "step": 4390 - }, - { - "epoch": 0.39, - "learning_rate": 9.298245614035088e-06, - "loss": 0.0422, - "step": 4400 - }, - { - "epoch": 0.39, - "learning_rate": 9.296446243814665e-06, - "loss": 0.1016, - "step": 4410 - }, - { - "epoch": 0.39, - "learning_rate": 9.294646873594244e-06, - "loss": 0.1021, - "step": 4420 - }, - { - "epoch": 0.4, - "learning_rate": 9.29284750337382e-06, - "loss": 0.1569, - "step": 4430 - }, - { - "epoch": 0.4, - "learning_rate": 9.291048133153397e-06, - "loss": 0.1313, - "step": 4440 - }, - { - "epoch": 0.4, - "learning_rate": 9.289248762932974e-06, - "loss": 0.0682, - "step": 4450 - }, - { - "epoch": 0.4, - "learning_rate": 9.287449392712551e-06, - "loss": 0.1345, - "step": 4460 - }, - { - "epoch": 0.4, - "learning_rate": 9.285650022492128e-06, - "loss": 0.0751, - "step": 4470 - }, - { - "epoch": 0.4, - "learning_rate": 9.283850652271705e-06, - "loss": 0.1235, - "step": 4480 - }, - { - "epoch": 0.4, - "learning_rate": 9.282051282051283e-06, - "loss": 0.1123, - "step": 4490 - }, - { - "epoch": 0.4, - "learning_rate": 9.28025191183086e-06, - "loss": 0.0496, - "step": 4500 - }, - { - "epoch": 0.4, - "eval_accuracy": 0.9711984841307437, - "eval_f1": 0.9508832399827659, - "eval_loss": 0.08512861281633377, - "eval_precision": 0.9297525013164823, - "eval_recall": 0.9729968037032954, - "eval_runtime": 436.7164, - "eval_samples_per_second": 72.507, - "eval_steps_per_second": 4.534, - "step": 4500 - }, - { - "epoch": 0.4, - "learning_rate": 9.278452541610437e-06, - "loss": 0.1278, - "step": 4510 - }, - { - "epoch": 0.4, - "learning_rate": 9.276653171390014e-06, - "loss": 0.0659, - "step": 4520 - }, - { - "epoch": 0.4, - "learning_rate": 9.27485380116959e-06, - "loss": 0.1262, - "step": 4530 - }, - { - "epoch": 0.4, - "learning_rate": 9.27305443094917e-06, - "loss": 0.0633, - "step": 4540 - }, - { - "epoch": 0.41, - "learning_rate": 9.271255060728746e-06, - "loss": 0.1094, - "step": 4550 - }, - { - "epoch": 0.41, - "learning_rate": 9.269455690508323e-06, - "loss": 0.1054, - "step": 4560 - }, - { - "epoch": 0.41, - "learning_rate": 9.2676563202879e-06, - "loss": 0.0375, - "step": 4570 - }, - { - "epoch": 0.41, - "learning_rate": 9.265856950067477e-06, - "loss": 0.0434, - "step": 4580 - }, - { - "epoch": 0.41, - "learning_rate": 9.264057579847054e-06, - "loss": 0.0875, - "step": 4590 - }, - { - "epoch": 0.41, - "learning_rate": 9.26225820962663e-06, - "loss": 0.0963, - "step": 4600 - }, - { - "epoch": 0.41, - "learning_rate": 9.260458839406209e-06, - "loss": 0.1243, - "step": 4610 - }, - { - "epoch": 0.41, - "learning_rate": 9.258659469185786e-06, - "loss": 0.1053, - "step": 4620 - }, - { - "epoch": 0.41, - "learning_rate": 9.256860098965363e-06, - "loss": 0.0566, - "step": 4630 - }, - { - "epoch": 0.41, - "learning_rate": 9.25506072874494e-06, - "loss": 0.0947, - "step": 4640 - }, - { - "epoch": 0.41, - "learning_rate": 9.253261358524516e-06, - "loss": 0.1326, - "step": 4650 - }, - { - "epoch": 0.42, - "learning_rate": 9.251461988304095e-06, - "loss": 0.1622, - "step": 4660 - }, - { - "epoch": 0.42, - "learning_rate": 9.249662618083672e-06, - "loss": 0.0438, - "step": 4670 - }, - { - "epoch": 0.42, - "learning_rate": 9.247863247863249e-06, - "loss": 0.0346, - "step": 4680 - }, - { - "epoch": 0.42, - "learning_rate": 9.246063877642826e-06, - "loss": 0.0647, - "step": 4690 - }, - { - "epoch": 0.42, - "learning_rate": 9.244264507422402e-06, - "loss": 0.2105, - "step": 4700 - }, - { - "epoch": 0.42, - "learning_rate": 9.242465137201981e-06, - "loss": 0.0517, - "step": 4710 - }, - { - "epoch": 0.42, - "learning_rate": 9.240665766981556e-06, - "loss": 0.0369, - "step": 4720 - }, - { - "epoch": 0.42, - "learning_rate": 9.238866396761135e-06, - "loss": 0.0792, - "step": 4730 - }, - { - "epoch": 0.42, - "learning_rate": 9.237067026540712e-06, - "loss": 0.1478, - "step": 4740 - }, - { - "epoch": 0.42, - "learning_rate": 9.235267656320288e-06, - "loss": 0.0625, - "step": 4750 - }, - { - "epoch": 0.42, - "learning_rate": 9.233468286099865e-06, - "loss": 0.1502, - "step": 4760 - }, - { - "epoch": 0.43, - "learning_rate": 9.231668915879442e-06, - "loss": 0.1159, - "step": 4770 - }, - { - "epoch": 0.43, - "learning_rate": 9.22986954565902e-06, - "loss": 0.0383, - "step": 4780 - }, - { - "epoch": 0.43, - "learning_rate": 9.228070175438598e-06, - "loss": 0.0419, - "step": 4790 - }, - { - "epoch": 0.43, - "learning_rate": 9.226270805218174e-06, - "loss": 0.0887, - "step": 4800 - }, - { - "epoch": 0.43, - "learning_rate": 9.224471434997751e-06, - "loss": 0.0085, - "step": 4810 - }, - { - "epoch": 0.43, - "learning_rate": 9.222672064777328e-06, - "loss": 0.1119, - "step": 4820 - }, - { - "epoch": 0.43, - "learning_rate": 9.220872694556907e-06, - "loss": 0.0901, - "step": 4830 - }, - { - "epoch": 0.43, - "learning_rate": 9.219073324336482e-06, - "loss": 0.1066, - "step": 4840 - }, - { - "epoch": 0.43, - "learning_rate": 9.21727395411606e-06, - "loss": 0.096, - "step": 4850 - }, - { - "epoch": 0.43, - "learning_rate": 9.215474583895637e-06, - "loss": 0.1194, - "step": 4860 - }, - { - "epoch": 0.43, - "learning_rate": 9.213675213675214e-06, - "loss": 0.0887, - "step": 4870 - }, - { - "epoch": 0.44, - "learning_rate": 9.211875843454791e-06, - "loss": 0.0658, - "step": 4880 - }, - { - "epoch": 0.44, - "learning_rate": 9.210076473234368e-06, - "loss": 0.1369, - "step": 4890 - }, - { - "epoch": 0.44, - "learning_rate": 9.208277103013946e-06, - "loss": 0.1153, - "step": 4900 - }, - { - "epoch": 0.44, - "learning_rate": 9.206477732793523e-06, - "loss": 0.0603, - "step": 4910 - }, - { - "epoch": 0.44, - "learning_rate": 9.2046783625731e-06, - "loss": 0.1061, - "step": 4920 - }, - { - "epoch": 0.44, - "learning_rate": 9.202878992352677e-06, - "loss": 0.0689, - "step": 4930 - }, - { - "epoch": 0.44, - "learning_rate": 9.201079622132254e-06, - "loss": 0.1351, - "step": 4940 - }, - { - "epoch": 0.44, - "learning_rate": 9.199280251911832e-06, - "loss": 0.1209, - "step": 4950 - }, - { - "epoch": 0.44, - "learning_rate": 9.19748088169141e-06, - "loss": 0.0371, - "step": 4960 - }, - { - "epoch": 0.44, - "learning_rate": 9.195681511470986e-06, - "loss": 0.0833, - "step": 4970 - }, - { - "epoch": 0.44, - "learning_rate": 9.193882141250563e-06, - "loss": 0.1354, - "step": 4980 - }, - { - "epoch": 0.44, - "learning_rate": 9.19208277103014e-06, - "loss": 0.2174, - "step": 4990 - }, - { - "epoch": 0.45, - "learning_rate": 9.190283400809717e-06, - "loss": 0.1419, - "step": 5000 - }, - { - "epoch": 0.45, - "learning_rate": 9.188484030589294e-06, - "loss": 0.1198, - "step": 5010 - }, - { - "epoch": 0.45, - "learning_rate": 9.186684660368872e-06, - "loss": 0.1137, - "step": 5020 - }, - { - "epoch": 0.45, - "learning_rate": 9.184885290148449e-06, - "loss": 0.094, - "step": 5030 - }, - { - "epoch": 0.45, - "learning_rate": 9.183085919928026e-06, - "loss": 0.0805, - "step": 5040 - }, - { - "epoch": 0.45, - "learning_rate": 9.181286549707603e-06, - "loss": 0.0516, - "step": 5050 - }, - { - "epoch": 0.45, - "learning_rate": 9.17948717948718e-06, - "loss": 0.0696, - "step": 5060 - }, - { - "epoch": 0.45, - "learning_rate": 9.177687809266758e-06, - "loss": 0.0769, - "step": 5070 - }, - { - "epoch": 0.45, - "learning_rate": 9.175888439046335e-06, - "loss": 0.0959, - "step": 5080 - }, - { - "epoch": 0.45, - "learning_rate": 9.174089068825912e-06, - "loss": 0.0542, - "step": 5090 - }, - { - "epoch": 0.45, - "learning_rate": 9.172289698605489e-06, - "loss": 0.0712, - "step": 5100 - }, - { - "epoch": 0.46, - "learning_rate": 9.170490328385066e-06, - "loss": 0.0537, - "step": 5110 - }, - { - "epoch": 0.46, - "learning_rate": 9.168690958164642e-06, - "loss": 0.1163, - "step": 5120 - }, - { - "epoch": 0.46, - "learning_rate": 9.16689158794422e-06, - "loss": 0.0765, - "step": 5130 - }, - { - "epoch": 0.46, - "learning_rate": 9.165092217723798e-06, - "loss": 0.2003, - "step": 5140 - }, - { - "epoch": 0.46, - "learning_rate": 9.163292847503375e-06, - "loss": 0.0108, - "step": 5150 - }, - { - "epoch": 0.46, - "learning_rate": 9.161493477282952e-06, - "loss": 0.1257, - "step": 5160 - }, - { - "epoch": 0.46, - "learning_rate": 9.159694107062528e-06, - "loss": 0.1449, - "step": 5170 - }, - { - "epoch": 0.46, - "learning_rate": 9.157894736842105e-06, - "loss": 0.053, - "step": 5180 - }, - { - "epoch": 0.46, - "learning_rate": 9.156095366621684e-06, - "loss": 0.0475, - "step": 5190 - }, - { - "epoch": 0.46, - "learning_rate": 9.15429599640126e-06, - "loss": 0.029, - "step": 5200 - }, - { - "epoch": 0.46, - "learning_rate": 9.152496626180838e-06, - "loss": 0.0036, - "step": 5210 - }, - { - "epoch": 0.47, - "learning_rate": 9.150697255960414e-06, - "loss": 0.0076, - "step": 5220 - }, - { - "epoch": 0.47, - "learning_rate": 9.148897885739991e-06, - "loss": 0.0655, - "step": 5230 - }, - { - "epoch": 0.47, - "learning_rate": 9.14709851551957e-06, - "loss": 0.0534, - "step": 5240 - }, - { - "epoch": 0.47, - "learning_rate": 9.145299145299145e-06, - "loss": 0.0378, - "step": 5250 - }, - { - "epoch": 0.47, - "learning_rate": 9.143499775078724e-06, - "loss": 0.1112, - "step": 5260 - }, - { - "epoch": 0.47, - "learning_rate": 9.1417004048583e-06, - "loss": 0.1258, - "step": 5270 - }, - { - "epoch": 0.47, - "learning_rate": 9.139901034637877e-06, - "loss": 0.1018, - "step": 5280 - }, - { - "epoch": 0.47, - "learning_rate": 9.138101664417454e-06, - "loss": 0.1015, - "step": 5290 - }, - { - "epoch": 0.47, - "learning_rate": 9.136302294197031e-06, - "loss": 0.2089, - "step": 5300 - }, - { - "epoch": 0.47, - "learning_rate": 9.13450292397661e-06, - "loss": 0.0352, - "step": 5310 - }, - { - "epoch": 0.47, - "learning_rate": 9.132703553756186e-06, - "loss": 0.1233, - "step": 5320 - }, - { - "epoch": 0.48, - "learning_rate": 9.130904183535763e-06, - "loss": 0.1092, - "step": 5330 - }, - { - "epoch": 0.48, - "learning_rate": 9.12910481331534e-06, - "loss": 0.0103, - "step": 5340 - }, - { - "epoch": 0.48, - "learning_rate": 9.127305443094917e-06, - "loss": 0.0304, - "step": 5350 - }, - { - "epoch": 0.48, - "learning_rate": 9.125506072874496e-06, - "loss": 0.1365, - "step": 5360 - }, - { - "epoch": 0.48, - "learning_rate": 9.12370670265407e-06, - "loss": 0.1332, - "step": 5370 - }, - { - "epoch": 0.48, - "learning_rate": 9.12190733243365e-06, - "loss": 0.0738, - "step": 5380 - }, - { - "epoch": 0.48, - "learning_rate": 9.120107962213226e-06, - "loss": 0.1559, - "step": 5390 - }, - { - "epoch": 0.48, - "learning_rate": 9.118308591992803e-06, - "loss": 0.139, - "step": 5400 - }, - { - "epoch": 0.48, - "learning_rate": 9.11650922177238e-06, - "loss": 0.1414, - "step": 5410 - }, - { - "epoch": 0.48, - "learning_rate": 9.114709851551957e-06, - "loss": 0.1316, - "step": 5420 - }, - { - "epoch": 0.48, - "learning_rate": 9.112910481331535e-06, - "loss": 0.1166, - "step": 5430 - }, - { - "epoch": 0.49, - "learning_rate": 9.111111111111112e-06, - "loss": 0.0909, - "step": 5440 - }, - { - "epoch": 0.49, - "learning_rate": 9.109311740890689e-06, - "loss": 0.1103, - "step": 5450 - }, - { - "epoch": 0.49, - "learning_rate": 9.107512370670266e-06, - "loss": 0.1618, - "step": 5460 - }, - { - "epoch": 0.49, - "learning_rate": 9.105713000449843e-06, - "loss": 0.0721, - "step": 5470 - }, - { - "epoch": 0.49, - "learning_rate": 9.103913630229421e-06, - "loss": 0.1584, - "step": 5480 - }, - { - "epoch": 0.49, - "learning_rate": 9.102114260008998e-06, - "loss": 0.1035, - "step": 5490 - }, - { - "epoch": 0.49, - "learning_rate": 9.100314889788575e-06, - "loss": 0.028, - "step": 5500 - }, - { - "epoch": 0.49, - "learning_rate": 9.098515519568152e-06, - "loss": 0.1161, - "step": 5510 - }, - { - "epoch": 0.49, - "learning_rate": 9.096716149347729e-06, - "loss": 0.1424, - "step": 5520 - }, - { - "epoch": 0.49, - "learning_rate": 9.094916779127306e-06, - "loss": 0.1636, - "step": 5530 - }, - { - "epoch": 0.49, - "learning_rate": 9.093117408906882e-06, - "loss": 0.1167, - "step": 5540 - }, - { - "epoch": 0.49, - "learning_rate": 9.091318038686461e-06, - "loss": 0.0831, - "step": 5550 - }, - { - "epoch": 0.5, - "learning_rate": 9.089518668466038e-06, - "loss": 0.1418, - "step": 5560 - }, - { - "epoch": 0.5, - "learning_rate": 9.087719298245615e-06, - "loss": 0.1449, - "step": 5570 - }, - { - "epoch": 0.5, - "learning_rate": 9.085919928025192e-06, - "loss": 0.1729, - "step": 5580 - }, - { - "epoch": 0.5, - "learning_rate": 9.084120557804768e-06, - "loss": 0.125, - "step": 5590 - }, - { - "epoch": 0.5, - "learning_rate": 9.082321187584347e-06, - "loss": 0.0741, - "step": 5600 - }, - { - "epoch": 0.5, - "learning_rate": 9.080521817363924e-06, - "loss": 0.0811, - "step": 5610 - }, - { - "epoch": 0.5, - "learning_rate": 9.0787224471435e-06, - "loss": 0.1593, - "step": 5620 - }, - { - "epoch": 0.5, - "learning_rate": 9.076923076923078e-06, - "loss": 0.1943, - "step": 5630 - }, - { - "epoch": 0.5, - "learning_rate": 9.075123706702654e-06, - "loss": 0.0983, - "step": 5640 - }, - { - "epoch": 0.5, - "learning_rate": 9.073324336482231e-06, - "loss": 0.1401, - "step": 5650 - }, - { - "epoch": 0.5, - "learning_rate": 9.071524966261808e-06, - "loss": 0.1739, - "step": 5660 - }, - { - "epoch": 0.51, - "learning_rate": 9.069725596041387e-06, - "loss": 0.1541, - "step": 5670 - }, - { - "epoch": 0.51, - "learning_rate": 9.067926225820964e-06, - "loss": 0.1699, - "step": 5680 - }, - { - "epoch": 0.51, - "learning_rate": 9.06612685560054e-06, - "loss": 0.0351, - "step": 5690 - }, - { - "epoch": 0.51, - "learning_rate": 9.064327485380117e-06, - "loss": 0.0941, - "step": 5700 - }, - { - "epoch": 0.51, - "learning_rate": 9.062528115159694e-06, - "loss": 0.1932, - "step": 5710 - }, - { - "epoch": 0.51, - "learning_rate": 9.060728744939273e-06, - "loss": 0.0819, - "step": 5720 - }, - { - "epoch": 0.51, - "learning_rate": 9.05892937471885e-06, - "loss": 0.2269, - "step": 5730 - }, - { - "epoch": 0.51, - "learning_rate": 9.057130004498426e-06, - "loss": 0.0967, - "step": 5740 - }, - { - "epoch": 0.51, - "learning_rate": 9.055330634278003e-06, - "loss": 0.0594, - "step": 5750 - }, - { - "epoch": 0.51, - "learning_rate": 9.05353126405758e-06, - "loss": 0.2919, - "step": 5760 - }, - { - "epoch": 0.51, - "learning_rate": 9.051731893837159e-06, - "loss": 0.0618, - "step": 5770 - }, - { - "epoch": 0.52, - "learning_rate": 9.049932523616734e-06, - "loss": 0.1166, - "step": 5780 - }, - { - "epoch": 0.52, - "learning_rate": 9.048133153396312e-06, - "loss": 0.124, - "step": 5790 - }, - { - "epoch": 0.52, - "learning_rate": 9.04633378317589e-06, - "loss": 0.1845, - "step": 5800 - }, - { - "epoch": 0.52, - "learning_rate": 9.044534412955466e-06, - "loss": 0.1357, - "step": 5810 - }, - { - "epoch": 0.52, - "learning_rate": 9.042735042735043e-06, - "loss": 0.1428, - "step": 5820 - }, - { - "epoch": 0.52, - "learning_rate": 9.04093567251462e-06, - "loss": 0.1932, - "step": 5830 - }, - { - "epoch": 0.52, - "learning_rate": 9.039136302294198e-06, - "loss": 0.0585, - "step": 5840 - }, - { - "epoch": 0.52, - "learning_rate": 9.037336932073775e-06, - "loss": 0.0983, - "step": 5850 - }, - { - "epoch": 0.52, - "learning_rate": 9.035537561853352e-06, - "loss": 0.0764, - "step": 5860 - }, - { - "epoch": 0.52, - "learning_rate": 9.033738191632929e-06, - "loss": 0.1484, - "step": 5870 - }, - { - "epoch": 0.52, - "learning_rate": 9.031938821412506e-06, - "loss": 0.1209, - "step": 5880 - }, - { - "epoch": 0.53, - "learning_rate": 9.030139451192084e-06, - "loss": 0.0421, - "step": 5890 - }, - { - "epoch": 0.53, - "learning_rate": 9.02834008097166e-06, - "loss": 0.0788, - "step": 5900 - }, - { - "epoch": 0.53, - "learning_rate": 9.026540710751238e-06, - "loss": 0.0637, - "step": 5910 - }, - { - "epoch": 0.53, - "learning_rate": 9.024741340530815e-06, - "loss": 0.0962, - "step": 5920 - }, - { - "epoch": 0.53, - "learning_rate": 9.022941970310392e-06, - "loss": 0.0681, - "step": 5930 - }, - { - "epoch": 0.53, - "learning_rate": 9.021142600089969e-06, - "loss": 0.1625, - "step": 5940 - }, - { - "epoch": 0.53, - "learning_rate": 9.019343229869546e-06, - "loss": 0.1218, - "step": 5950 - }, - { - "epoch": 0.53, - "learning_rate": 9.017543859649124e-06, - "loss": 0.0072, - "step": 5960 - }, - { - "epoch": 0.53, - "learning_rate": 9.015744489428701e-06, - "loss": 0.1611, - "step": 5970 - }, - { - "epoch": 0.53, - "learning_rate": 9.013945119208278e-06, - "loss": 0.1108, - "step": 5980 - }, - { - "epoch": 0.53, - "learning_rate": 9.012145748987855e-06, - "loss": 0.1706, - "step": 5990 - }, - { - "epoch": 0.53, - "learning_rate": 9.010346378767432e-06, - "loss": 0.0967, - "step": 6000 - }, - { - "epoch": 0.53, - "eval_accuracy": 0.9735354492341702, - "eval_f1": 0.954082191780822, - "eval_loss": 0.09180905669927597, - "eval_precision": 0.9486760379208892, - "eval_recall": 0.9595503141188141, - "eval_runtime": 436.4287, - "eval_samples_per_second": 72.555, - "eval_steps_per_second": 4.537, - "step": 6000 - }, - { - "epoch": 0.54, - "learning_rate": 9.00854700854701e-06, - "loss": 0.0714, - "step": 6010 - }, - { - "epoch": 0.54, - "learning_rate": 9.006747638326587e-06, - "loss": 0.1627, - "step": 6020 - }, - { - "epoch": 0.54, - "learning_rate": 9.004948268106164e-06, - "loss": 0.0399, - "step": 6030 - }, - { - "epoch": 0.54, - "learning_rate": 9.00314889788574e-06, - "loss": 0.0568, - "step": 6040 - }, - { - "epoch": 0.54, - "learning_rate": 9.001349527665318e-06, - "loss": 0.1471, - "step": 6050 - }, - { - "epoch": 0.54, - "learning_rate": 8.999550157444894e-06, - "loss": 0.0979, - "step": 6060 - }, - { - "epoch": 0.54, - "learning_rate": 8.997750787224471e-06, - "loss": 0.0177, - "step": 6070 - }, - { - "epoch": 0.54, - "learning_rate": 8.99595141700405e-06, - "loss": 0.1574, - "step": 6080 - }, - { - "epoch": 0.54, - "learning_rate": 8.994152046783627e-06, - "loss": 0.1683, - "step": 6090 - }, - { - "epoch": 0.54, - "learning_rate": 8.992352676563204e-06, - "loss": 0.1035, - "step": 6100 - }, - { - "epoch": 0.54, - "learning_rate": 8.99055330634278e-06, - "loss": 0.1381, - "step": 6110 - }, - { - "epoch": 0.55, - "learning_rate": 8.988753936122357e-06, - "loss": 0.1026, - "step": 6120 - }, - { - "epoch": 0.55, - "learning_rate": 8.986954565901936e-06, - "loss": 0.0454, - "step": 6130 - }, - { - "epoch": 0.55, - "learning_rate": 8.985155195681513e-06, - "loss": 0.0653, - "step": 6140 - }, - { - "epoch": 0.55, - "learning_rate": 8.98335582546109e-06, - "loss": 0.0681, - "step": 6150 - }, - { - "epoch": 0.55, - "learning_rate": 8.981556455240666e-06, - "loss": 0.1286, - "step": 6160 - }, - { - "epoch": 0.55, - "learning_rate": 8.979757085020243e-06, - "loss": 0.0823, - "step": 6170 - }, - { - "epoch": 0.55, - "learning_rate": 8.97795771479982e-06, - "loss": 0.0489, - "step": 6180 - }, - { - "epoch": 0.55, - "learning_rate": 8.976158344579397e-06, - "loss": 0.0605, - "step": 6190 - }, - { - "epoch": 0.55, - "learning_rate": 8.974358974358976e-06, - "loss": 0.054, - "step": 6200 - }, - { - "epoch": 0.55, - "learning_rate": 8.972559604138552e-06, - "loss": 0.1616, - "step": 6210 - }, - { - "epoch": 0.55, - "learning_rate": 8.97076023391813e-06, - "loss": 0.0963, - "step": 6220 - }, - { - "epoch": 0.56, - "learning_rate": 8.968960863697706e-06, - "loss": 0.1245, - "step": 6230 - }, - { - "epoch": 0.56, - "learning_rate": 8.967161493477283e-06, - "loss": 0.092, - "step": 6240 - }, - { - "epoch": 0.56, - "learning_rate": 8.965362123256862e-06, - "loss": 0.0728, - "step": 6250 - }, - { - "epoch": 0.56, - "learning_rate": 8.963562753036438e-06, - "loss": 0.1138, - "step": 6260 - }, - { - "epoch": 0.56, - "learning_rate": 8.961763382816015e-06, - "loss": 0.1143, - "step": 6270 - }, - { - "epoch": 0.56, - "learning_rate": 8.959964012595592e-06, - "loss": 0.0954, - "step": 6280 - }, - { - "epoch": 0.56, - "learning_rate": 8.958164642375169e-06, - "loss": 0.0606, - "step": 6290 - }, - { - "epoch": 0.56, - "learning_rate": 8.956365272154746e-06, - "loss": 0.0667, - "step": 6300 - }, - { - "epoch": 0.56, - "learning_rate": 8.954565901934323e-06, - "loss": 0.0505, - "step": 6310 - }, - { - "epoch": 0.56, - "learning_rate": 8.952766531713901e-06, - "loss": 0.1699, - "step": 6320 - }, - { - "epoch": 0.56, - "learning_rate": 8.950967161493478e-06, - "loss": 0.0568, - "step": 6330 - }, - { - "epoch": 0.57, - "learning_rate": 8.949167791273055e-06, - "loss": 0.1413, - "step": 6340 - }, - { - "epoch": 0.57, - "learning_rate": 8.947368421052632e-06, - "loss": 0.0903, - "step": 6350 - }, - { - "epoch": 0.57, - "learning_rate": 8.945569050832209e-06, - "loss": 0.1166, - "step": 6360 - }, - { - "epoch": 0.57, - "learning_rate": 8.943769680611787e-06, - "loss": 0.0781, - "step": 6370 - }, - { - "epoch": 0.57, - "learning_rate": 8.941970310391364e-06, - "loss": 0.0385, - "step": 6380 - }, - { - "epoch": 0.57, - "learning_rate": 8.940170940170941e-06, - "loss": 0.0508, - "step": 6390 - }, - { - "epoch": 0.57, - "learning_rate": 8.938371569950518e-06, - "loss": 0.0893, - "step": 6400 - }, - { - "epoch": 0.57, - "learning_rate": 8.936572199730095e-06, - "loss": 0.1399, - "step": 6410 - }, - { - "epoch": 0.57, - "learning_rate": 8.934772829509673e-06, - "loss": 0.0252, - "step": 6420 - }, - { - "epoch": 0.57, - "learning_rate": 8.932973459289248e-06, - "loss": 0.0843, - "step": 6430 - }, - { - "epoch": 0.57, - "learning_rate": 8.931174089068827e-06, - "loss": 0.0468, - "step": 6440 - }, - { - "epoch": 0.58, - "learning_rate": 8.929374718848404e-06, - "loss": 0.0843, - "step": 6450 - }, - { - "epoch": 0.58, - "learning_rate": 8.92757534862798e-06, - "loss": 0.0478, - "step": 6460 - }, - { - "epoch": 0.58, - "learning_rate": 8.925775978407558e-06, - "loss": 0.1855, - "step": 6470 - }, - { - "epoch": 0.58, - "learning_rate": 8.923976608187134e-06, - "loss": 0.033, - "step": 6480 - }, - { - "epoch": 0.58, - "learning_rate": 8.922177237966713e-06, - "loss": 0.1028, - "step": 6490 - }, - { - "epoch": 0.58, - "learning_rate": 8.92037786774629e-06, - "loss": 0.1134, - "step": 6500 - }, - { - "epoch": 0.58, - "learning_rate": 8.918578497525867e-06, - "loss": 0.0714, - "step": 6510 - }, - { - "epoch": 0.58, - "learning_rate": 8.916779127305444e-06, - "loss": 0.1058, - "step": 6520 - }, - { - "epoch": 0.58, - "learning_rate": 8.91497975708502e-06, - "loss": 0.0672, - "step": 6530 - }, - { - "epoch": 0.58, - "learning_rate": 8.913180386864599e-06, - "loss": 0.1081, - "step": 6540 - }, - { - "epoch": 0.58, - "learning_rate": 8.911381016644174e-06, - "loss": 0.0558, - "step": 6550 - }, - { - "epoch": 0.58, - "learning_rate": 8.909581646423753e-06, - "loss": 0.1227, - "step": 6560 - }, - { - "epoch": 0.59, - "learning_rate": 8.90778227620333e-06, - "loss": 0.1211, - "step": 6570 - }, - { - "epoch": 0.59, - "learning_rate": 8.905982905982906e-06, - "loss": 0.0534, - "step": 6580 - }, - { - "epoch": 0.59, - "learning_rate": 8.904183535762483e-06, - "loss": 0.1179, - "step": 6590 - }, - { - "epoch": 0.59, - "learning_rate": 8.90238416554206e-06, - "loss": 0.1224, - "step": 6600 - }, - { - "epoch": 0.59, - "learning_rate": 8.900584795321639e-06, - "loss": 0.0878, - "step": 6610 - }, - { - "epoch": 0.59, - "learning_rate": 8.898785425101216e-06, - "loss": 0.12, - "step": 6620 - }, - { - "epoch": 0.59, - "learning_rate": 8.896986054880792e-06, - "loss": 0.1773, - "step": 6630 - }, - { - "epoch": 0.59, - "learning_rate": 8.89518668466037e-06, - "loss": 0.0991, - "step": 6640 - }, - { - "epoch": 0.59, - "learning_rate": 8.893387314439946e-06, - "loss": 0.1262, - "step": 6650 - }, - { - "epoch": 0.59, - "learning_rate": 8.891587944219525e-06, - "loss": 0.0043, - "step": 6660 - }, - { - "epoch": 0.59, - "learning_rate": 8.889788573999102e-06, - "loss": 0.0821, - "step": 6670 - }, - { - "epoch": 0.6, - "learning_rate": 8.887989203778678e-06, - "loss": 0.212, - "step": 6680 - }, - { - "epoch": 0.6, - "learning_rate": 8.886189833558255e-06, - "loss": 0.0983, - "step": 6690 - }, - { - "epoch": 0.6, - "learning_rate": 8.884390463337832e-06, - "loss": 0.1402, - "step": 6700 - }, - { - "epoch": 0.6, - "learning_rate": 8.882591093117409e-06, - "loss": 0.159, - "step": 6710 - }, - { - "epoch": 0.6, - "learning_rate": 8.880791722896986e-06, - "loss": 0.1258, - "step": 6720 - }, - { - "epoch": 0.6, - "learning_rate": 8.878992352676564e-06, - "loss": 0.0954, - "step": 6730 - }, - { - "epoch": 0.6, - "learning_rate": 8.877192982456141e-06, - "loss": 0.0412, - "step": 6740 - }, - { - "epoch": 0.6, - "learning_rate": 8.875393612235718e-06, - "loss": 0.078, - "step": 6750 - }, - { - "epoch": 0.6, - "learning_rate": 8.873594242015295e-06, - "loss": 0.1009, - "step": 6760 - }, - { - "epoch": 0.6, - "learning_rate": 8.871794871794872e-06, - "loss": 0.099, - "step": 6770 - }, - { - "epoch": 0.6, - "learning_rate": 8.86999550157445e-06, - "loss": 0.0313, - "step": 6780 - }, - { - "epoch": 0.61, - "learning_rate": 8.868196131354027e-06, - "loss": 0.0375, - "step": 6790 - }, - { - "epoch": 0.61, - "learning_rate": 8.866396761133604e-06, - "loss": 0.0885, - "step": 6800 - }, - { - "epoch": 0.61, - "learning_rate": 8.864597390913181e-06, - "loss": 0.0761, - "step": 6810 - }, - { - "epoch": 0.61, - "learning_rate": 8.862798020692758e-06, - "loss": 0.0892, - "step": 6820 - }, - { - "epoch": 0.61, - "learning_rate": 8.860998650472335e-06, - "loss": 0.1141, - "step": 6830 - }, - { - "epoch": 0.61, - "learning_rate": 8.859199280251912e-06, - "loss": 0.0784, - "step": 6840 - }, - { - "epoch": 0.61, - "learning_rate": 8.85739991003149e-06, - "loss": 0.1206, - "step": 6850 - }, - { - "epoch": 0.61, - "learning_rate": 8.855600539811067e-06, - "loss": 0.054, - "step": 6860 - }, - { - "epoch": 0.61, - "learning_rate": 8.853801169590644e-06, - "loss": 0.2208, - "step": 6870 - }, - { - "epoch": 0.61, - "learning_rate": 8.85200179937022e-06, - "loss": 0.0689, - "step": 6880 - }, - { - "epoch": 0.61, - "learning_rate": 8.850202429149798e-06, - "loss": 0.0898, - "step": 6890 - }, - { - "epoch": 0.62, - "learning_rate": 8.848403058929376e-06, - "loss": 0.1601, - "step": 6900 - }, - { - "epoch": 0.62, - "learning_rate": 8.846603688708953e-06, - "loss": 0.0574, - "step": 6910 - }, - { - "epoch": 0.62, - "learning_rate": 8.84480431848853e-06, - "loss": 0.0564, - "step": 6920 - }, - { - "epoch": 0.62, - "learning_rate": 8.843004948268107e-06, - "loss": 0.0072, - "step": 6930 - }, - { - "epoch": 0.62, - "learning_rate": 8.841205578047684e-06, - "loss": 0.0368, - "step": 6940 - }, - { - "epoch": 0.62, - "learning_rate": 8.839406207827262e-06, - "loss": 0.1152, - "step": 6950 - }, - { - "epoch": 0.62, - "learning_rate": 8.837606837606837e-06, - "loss": 0.0871, - "step": 6960 - }, - { - "epoch": 0.62, - "learning_rate": 8.835807467386416e-06, - "loss": 0.0936, - "step": 6970 - }, - { - "epoch": 0.62, - "learning_rate": 8.834008097165993e-06, - "loss": 0.2966, - "step": 6980 - }, - { - "epoch": 0.62, - "learning_rate": 8.83220872694557e-06, - "loss": 0.1432, - "step": 6990 - }, - { - "epoch": 0.62, - "learning_rate": 8.830409356725146e-06, - "loss": 0.135, - "step": 7000 - }, - { - "epoch": 0.63, - "learning_rate": 8.828609986504723e-06, - "loss": 0.0894, - "step": 7010 - }, - { - "epoch": 0.63, - "learning_rate": 8.826810616284302e-06, - "loss": 0.1052, - "step": 7020 - }, - { - "epoch": 0.63, - "learning_rate": 8.825011246063879e-06, - "loss": 0.1165, - "step": 7030 - }, - { - "epoch": 0.63, - "learning_rate": 8.823211875843456e-06, - "loss": 0.0955, - "step": 7040 - }, - { - "epoch": 0.63, - "learning_rate": 8.821412505623032e-06, - "loss": 0.0356, - "step": 7050 - }, - { - "epoch": 0.63, - "learning_rate": 8.81961313540261e-06, - "loss": 0.143, - "step": 7060 - }, - { - "epoch": 0.63, - "learning_rate": 8.817813765182188e-06, - "loss": 0.0743, - "step": 7070 - }, - { - "epoch": 0.63, - "learning_rate": 8.816014394961763e-06, - "loss": 0.0865, - "step": 7080 - }, - { - "epoch": 0.63, - "learning_rate": 8.814215024741342e-06, - "loss": 0.1262, - "step": 7090 - }, - { - "epoch": 0.63, - "learning_rate": 8.812415654520918e-06, - "loss": 0.0891, - "step": 7100 - }, - { - "epoch": 0.63, - "learning_rate": 8.810616284300495e-06, - "loss": 0.1062, - "step": 7110 - }, - { - "epoch": 0.63, - "learning_rate": 8.808816914080072e-06, - "loss": 0.0651, - "step": 7120 - }, - { - "epoch": 0.64, - "learning_rate": 8.807017543859649e-06, - "loss": 0.1016, - "step": 7130 - }, - { - "epoch": 0.64, - "learning_rate": 8.805218173639228e-06, - "loss": 0.1007, - "step": 7140 - }, - { - "epoch": 0.64, - "learning_rate": 8.803418803418804e-06, - "loss": 0.0866, - "step": 7150 - }, - { - "epoch": 0.64, - "learning_rate": 8.801619433198381e-06, - "loss": 0.0866, - "step": 7160 - }, - { - "epoch": 0.64, - "learning_rate": 8.799820062977958e-06, - "loss": 0.0524, - "step": 7170 - }, - { - "epoch": 0.64, - "learning_rate": 8.798020692757535e-06, - "loss": 0.0092, - "step": 7180 - }, - { - "epoch": 0.64, - "learning_rate": 8.796221322537114e-06, - "loss": 0.0843, - "step": 7190 - }, - { - "epoch": 0.64, - "learning_rate": 8.79442195231669e-06, - "loss": 0.0962, - "step": 7200 - }, - { - "epoch": 0.64, - "learning_rate": 8.792622582096267e-06, - "loss": 0.0755, - "step": 7210 - }, - { - "epoch": 0.64, - "learning_rate": 8.790823211875844e-06, - "loss": 0.1832, - "step": 7220 - }, - { - "epoch": 0.64, - "learning_rate": 8.789023841655421e-06, - "loss": 0.1657, - "step": 7230 - }, - { - "epoch": 0.65, - "learning_rate": 8.787224471434998e-06, - "loss": 0.0969, - "step": 7240 - }, - { - "epoch": 0.65, - "learning_rate": 8.785425101214575e-06, - "loss": 0.1005, - "step": 7250 - }, - { - "epoch": 0.65, - "learning_rate": 8.783625730994153e-06, - "loss": 0.0896, - "step": 7260 - }, - { - "epoch": 0.65, - "learning_rate": 8.78182636077373e-06, - "loss": 0.0941, - "step": 7270 - }, - { - "epoch": 0.65, - "learning_rate": 8.780026990553307e-06, - "loss": 0.1214, - "step": 7280 - }, - { - "epoch": 0.65, - "learning_rate": 8.778227620332884e-06, - "loss": 0.1319, - "step": 7290 - }, - { - "epoch": 0.65, - "learning_rate": 8.77642825011246e-06, - "loss": 0.0806, - "step": 7300 - }, - { - "epoch": 0.65, - "learning_rate": 8.77462887989204e-06, - "loss": 0.1304, - "step": 7310 - }, - { - "epoch": 0.65, - "learning_rate": 8.772829509671616e-06, - "loss": 0.0723, - "step": 7320 - }, - { - "epoch": 0.65, - "learning_rate": 8.771030139451193e-06, - "loss": 0.176, - "step": 7330 - }, - { - "epoch": 0.65, - "learning_rate": 8.76923076923077e-06, - "loss": 0.0992, - "step": 7340 - }, - { - "epoch": 0.66, - "learning_rate": 8.767431399010347e-06, - "loss": 0.128, - "step": 7350 - }, - { - "epoch": 0.66, - "learning_rate": 8.765632028789924e-06, - "loss": 0.0585, - "step": 7360 - }, - { - "epoch": 0.66, - "learning_rate": 8.7638326585695e-06, - "loss": 0.2104, - "step": 7370 - }, - { - "epoch": 0.66, - "learning_rate": 8.762033288349079e-06, - "loss": 0.0491, - "step": 7380 - }, - { - "epoch": 0.66, - "learning_rate": 8.760233918128656e-06, - "loss": 0.1178, - "step": 7390 - }, - { - "epoch": 0.66, - "learning_rate": 8.758434547908233e-06, - "loss": 0.181, - "step": 7400 - }, - { - "epoch": 0.66, - "learning_rate": 8.75663517768781e-06, - "loss": 0.1329, - "step": 7410 - }, - { - "epoch": 0.66, - "learning_rate": 8.754835807467386e-06, - "loss": 0.1067, - "step": 7420 - }, - { - "epoch": 0.66, - "learning_rate": 8.753036437246965e-06, - "loss": 0.1344, - "step": 7430 - }, - { - "epoch": 0.66, - "learning_rate": 8.751237067026542e-06, - "loss": 0.0359, - "step": 7440 - }, - { - "epoch": 0.66, - "learning_rate": 8.749437696806119e-06, - "loss": 0.0587, - "step": 7450 - }, - { - "epoch": 0.67, - "learning_rate": 8.747638326585696e-06, - "loss": 0.0763, - "step": 7460 - }, - { - "epoch": 0.67, - "learning_rate": 8.745838956365272e-06, - "loss": 0.1267, - "step": 7470 - }, - { - "epoch": 0.67, - "learning_rate": 8.744039586144851e-06, - "loss": 0.0407, - "step": 7480 - }, - { - "epoch": 0.67, - "learning_rate": 8.742240215924426e-06, - "loss": 0.0741, - "step": 7490 - }, - { - "epoch": 0.67, - "learning_rate": 8.740440845704005e-06, - "loss": 0.0914, - "step": 7500 - }, - { - "epoch": 0.67, - "eval_accuracy": 0.971293225959261, - "eval_f1": 0.95092057664273, - "eval_loss": 0.12410593777894974, - "eval_precision": 0.9320491109229466, - "eval_recall": 0.9705720268929792, - "eval_runtime": 436.6013, - "eval_samples_per_second": 72.526, - "eval_steps_per_second": 4.535, - "step": 7500 - }, - { - "epoch": 0.67, - "learning_rate": 8.738641475483582e-06, - "loss": 0.1633, - "step": 7510 - }, - { - "epoch": 0.67, - "learning_rate": 8.736842105263158e-06, - "loss": 0.1843, - "step": 7520 - }, - { - "epoch": 0.67, - "learning_rate": 8.735042735042735e-06, - "loss": 0.1008, - "step": 7530 - }, - { - "epoch": 0.67, - "learning_rate": 8.733243364822312e-06, - "loss": 0.0962, - "step": 7540 - }, - { - "epoch": 0.67, - "learning_rate": 8.73144399460189e-06, - "loss": 0.0271, - "step": 7550 - }, - { - "epoch": 0.67, - "learning_rate": 8.729644624381468e-06, - "loss": 0.1435, - "step": 7560 - }, - { - "epoch": 0.67, - "learning_rate": 8.727845254161044e-06, - "loss": 0.0794, - "step": 7570 - }, - { - "epoch": 0.68, - "learning_rate": 8.726045883940621e-06, - "loss": 0.1544, - "step": 7580 - }, - { - "epoch": 0.68, - "learning_rate": 8.724246513720198e-06, - "loss": 0.0104, - "step": 7590 - }, - { - "epoch": 0.68, - "learning_rate": 8.722447143499777e-06, - "loss": 0.0687, - "step": 7600 - }, - { - "epoch": 0.68, - "learning_rate": 8.720647773279352e-06, - "loss": 0.1329, - "step": 7610 - }, - { - "epoch": 0.68, - "learning_rate": 8.71884840305893e-06, - "loss": 0.0721, - "step": 7620 - }, - { - "epoch": 0.68, - "learning_rate": 8.717049032838507e-06, - "loss": 0.1454, - "step": 7630 - }, - { - "epoch": 0.68, - "learning_rate": 8.715249662618084e-06, - "loss": 0.0498, - "step": 7640 - }, - { - "epoch": 0.68, - "learning_rate": 8.713450292397661e-06, - "loss": 0.1081, - "step": 7650 - }, - { - "epoch": 0.68, - "learning_rate": 8.711650922177238e-06, - "loss": 0.072, - "step": 7660 - }, - { - "epoch": 0.68, - "learning_rate": 8.709851551956816e-06, - "loss": 0.0765, - "step": 7670 - }, - { - "epoch": 0.68, - "learning_rate": 8.708052181736393e-06, - "loss": 0.1375, - "step": 7680 - }, - { - "epoch": 0.69, - "learning_rate": 8.70625281151597e-06, - "loss": 0.1537, - "step": 7690 - }, - { - "epoch": 0.69, - "learning_rate": 8.704453441295547e-06, - "loss": 0.097, - "step": 7700 - }, - { - "epoch": 0.69, - "learning_rate": 8.702654071075124e-06, - "loss": 0.1346, - "step": 7710 - }, - { - "epoch": 0.69, - "learning_rate": 8.700854700854702e-06, - "loss": 0.1619, - "step": 7720 - }, - { - "epoch": 0.69, - "learning_rate": 8.69905533063428e-06, - "loss": 0.1027, - "step": 7730 - }, - { - "epoch": 0.69, - "learning_rate": 8.697255960413856e-06, - "loss": 0.1377, - "step": 7740 - }, - { - "epoch": 0.69, - "learning_rate": 8.695456590193433e-06, - "loss": 0.0791, - "step": 7750 - }, - { - "epoch": 0.69, - "learning_rate": 8.693657219973012e-06, - "loss": 0.0814, - "step": 7760 - }, - { - "epoch": 0.69, - "learning_rate": 8.691857849752587e-06, - "loss": 0.078, - "step": 7770 - }, - { - "epoch": 0.69, - "learning_rate": 8.690058479532164e-06, - "loss": 0.0902, - "step": 7780 - }, - { - "epoch": 0.69, - "learning_rate": 8.688259109311742e-06, - "loss": 0.144, - "step": 7790 - }, - { - "epoch": 0.7, - "learning_rate": 8.686459739091319e-06, - "loss": 0.1267, - "step": 7800 - }, - { - "epoch": 0.7, - "learning_rate": 8.684660368870896e-06, - "loss": 0.0564, - "step": 7810 - }, - { - "epoch": 0.7, - "learning_rate": 8.682860998650473e-06, - "loss": 0.0782, - "step": 7820 - }, - { - "epoch": 0.7, - "learning_rate": 8.68106162843005e-06, - "loss": 0.1353, - "step": 7830 - }, - { - "epoch": 0.7, - "learning_rate": 8.679262258209628e-06, - "loss": 0.0428, - "step": 7840 - }, - { - "epoch": 0.7, - "learning_rate": 8.677462887989205e-06, - "loss": 0.0864, - "step": 7850 - }, - { - "epoch": 0.7, - "learning_rate": 8.675663517768782e-06, - "loss": 0.0717, - "step": 7860 - }, - { - "epoch": 0.7, - "learning_rate": 8.673864147548359e-06, - "loss": 0.1192, - "step": 7870 - }, - { - "epoch": 0.7, - "learning_rate": 8.672064777327936e-06, - "loss": 0.0728, - "step": 7880 - }, - { - "epoch": 0.7, - "learning_rate": 8.670265407107512e-06, - "loss": 0.0615, - "step": 7890 - }, - { - "epoch": 0.7, - "learning_rate": 8.66846603688709e-06, - "loss": 0.0884, - "step": 7900 - }, - { - "epoch": 0.71, - "learning_rate": 8.666666666666668e-06, - "loss": 0.1488, - "step": 7910 - }, - { - "epoch": 0.71, - "learning_rate": 8.664867296446245e-06, - "loss": 0.1203, - "step": 7920 - }, - { - "epoch": 0.71, - "learning_rate": 8.663067926225822e-06, - "loss": 0.0469, - "step": 7930 - }, - { - "epoch": 0.71, - "learning_rate": 8.661268556005398e-06, - "loss": 0.0475, - "step": 7940 - }, - { - "epoch": 0.71, - "learning_rate": 8.659469185784975e-06, - "loss": 0.1008, - "step": 7950 - }, - { - "epoch": 0.71, - "learning_rate": 8.657669815564554e-06, - "loss": 0.0544, - "step": 7960 - }, - { - "epoch": 0.71, - "learning_rate": 8.65587044534413e-06, - "loss": 0.1543, - "step": 7970 - }, - { - "epoch": 0.71, - "learning_rate": 8.654071075123708e-06, - "loss": 0.1327, - "step": 7980 - }, - { - "epoch": 0.71, - "learning_rate": 8.652271704903284e-06, - "loss": 0.3129, - "step": 7990 - }, - { - "epoch": 0.71, - "learning_rate": 8.650472334682861e-06, - "loss": 0.0302, - "step": 8000 - }, - { - "epoch": 0.71, - "learning_rate": 8.64867296446244e-06, - "loss": 0.1717, - "step": 8010 - }, - { - "epoch": 0.72, - "learning_rate": 8.646873594242015e-06, - "loss": 0.0878, - "step": 8020 - }, - { - "epoch": 0.72, - "learning_rate": 8.645074224021594e-06, - "loss": 0.0869, - "step": 8030 - }, - { - "epoch": 0.72, - "learning_rate": 8.64327485380117e-06, - "loss": 0.0919, - "step": 8040 - }, - { - "epoch": 0.72, - "learning_rate": 8.641475483580747e-06, - "loss": 0.058, - "step": 8050 - }, - { - "epoch": 0.72, - "learning_rate": 8.639676113360324e-06, - "loss": 0.1208, - "step": 8060 - }, - { - "epoch": 0.72, - "learning_rate": 8.637876743139901e-06, - "loss": 0.0707, - "step": 8070 - }, - { - "epoch": 0.72, - "learning_rate": 8.63607737291948e-06, - "loss": 0.0532, - "step": 8080 - }, - { - "epoch": 0.72, - "learning_rate": 8.634278002699056e-06, - "loss": 0.0889, - "step": 8090 - }, - { - "epoch": 0.72, - "learning_rate": 8.632478632478633e-06, - "loss": 0.0517, - "step": 8100 - }, - { - "epoch": 0.72, - "learning_rate": 8.63067926225821e-06, - "loss": 0.1343, - "step": 8110 - }, - { - "epoch": 0.72, - "learning_rate": 8.628879892037787e-06, - "loss": 0.0304, - "step": 8120 - }, - { - "epoch": 0.72, - "learning_rate": 8.627080521817366e-06, - "loss": 0.1531, - "step": 8130 - }, - { - "epoch": 0.73, - "learning_rate": 8.62528115159694e-06, - "loss": 0.1384, - "step": 8140 - }, - { - "epoch": 0.73, - "learning_rate": 8.62348178137652e-06, - "loss": 0.0572, - "step": 8150 - }, - { - "epoch": 0.73, - "learning_rate": 8.621682411156096e-06, - "loss": 0.043, - "step": 8160 - }, - { - "epoch": 0.73, - "learning_rate": 8.619883040935673e-06, - "loss": 0.0598, - "step": 8170 - }, - { - "epoch": 0.73, - "learning_rate": 8.61808367071525e-06, - "loss": 0.0271, - "step": 8180 - }, - { - "epoch": 0.73, - "learning_rate": 8.616284300494827e-06, - "loss": 0.0278, - "step": 8190 - }, - { - "epoch": 0.73, - "learning_rate": 8.614484930274405e-06, - "loss": 0.0878, - "step": 8200 - }, - { - "epoch": 0.73, - "learning_rate": 8.612685560053982e-06, - "loss": 0.1291, - "step": 8210 - }, - { - "epoch": 0.73, - "learning_rate": 8.610886189833559e-06, - "loss": 0.091, - "step": 8220 - }, - { - "epoch": 0.73, - "learning_rate": 8.609086819613136e-06, - "loss": 0.053, - "step": 8230 - }, - { - "epoch": 0.73, - "learning_rate": 8.607287449392713e-06, - "loss": 0.0576, - "step": 8240 - }, - { - "epoch": 0.74, - "learning_rate": 8.605488079172291e-06, - "loss": 0.1051, - "step": 8250 - }, - { - "epoch": 0.74, - "learning_rate": 8.603688708951866e-06, - "loss": 0.0692, - "step": 8260 - }, - { - "epoch": 0.74, - "learning_rate": 8.601889338731445e-06, - "loss": 0.1337, - "step": 8270 - }, - { - "epoch": 0.74, - "learning_rate": 8.600089968511022e-06, - "loss": 0.0394, - "step": 8280 - }, - { - "epoch": 0.74, - "learning_rate": 8.598290598290599e-06, - "loss": 0.066, - "step": 8290 - }, - { - "epoch": 0.74, - "learning_rate": 8.596491228070176e-06, - "loss": 0.0414, - "step": 8300 - }, - { - "epoch": 0.74, - "learning_rate": 8.594691857849752e-06, - "loss": 0.1076, - "step": 8310 - }, - { - "epoch": 0.74, - "learning_rate": 8.592892487629331e-06, - "loss": 0.1802, - "step": 8320 - }, - { - "epoch": 0.74, - "learning_rate": 8.591093117408908e-06, - "loss": 0.0185, - "step": 8330 - }, - { - "epoch": 0.74, - "learning_rate": 8.589293747188485e-06, - "loss": 0.0604, - "step": 8340 - }, - { - "epoch": 0.74, - "learning_rate": 8.587494376968062e-06, - "loss": 0.1542, - "step": 8350 - }, - { - "epoch": 0.75, - "learning_rate": 8.585695006747638e-06, - "loss": 0.1218, - "step": 8360 - }, - { - "epoch": 0.75, - "learning_rate": 8.583895636527217e-06, - "loss": 0.1441, - "step": 8370 - }, - { - "epoch": 0.75, - "learning_rate": 8.582096266306794e-06, - "loss": 0.0921, - "step": 8380 - }, - { - "epoch": 0.75, - "learning_rate": 8.58029689608637e-06, - "loss": 0.0408, - "step": 8390 - }, - { - "epoch": 0.75, - "learning_rate": 8.578497525865948e-06, - "loss": 0.0247, - "step": 8400 - }, - { - "epoch": 0.75, - "learning_rate": 8.576698155645524e-06, - "loss": 0.0663, - "step": 8410 - }, - { - "epoch": 0.75, - "learning_rate": 8.574898785425101e-06, - "loss": 0.148, - "step": 8420 - }, - { - "epoch": 0.75, - "learning_rate": 8.573099415204678e-06, - "loss": 0.0787, - "step": 8430 - }, - { - "epoch": 0.75, - "learning_rate": 8.571300044984257e-06, - "loss": 0.0468, - "step": 8440 - }, - { - "epoch": 0.75, - "learning_rate": 8.569500674763834e-06, - "loss": 0.1468, - "step": 8450 - }, - { - "epoch": 0.75, - "learning_rate": 8.56770130454341e-06, - "loss": 0.0841, - "step": 8460 - }, - { - "epoch": 0.76, - "learning_rate": 8.565901934322987e-06, - "loss": 0.0974, - "step": 8470 - }, - { - "epoch": 0.76, - "learning_rate": 8.564102564102564e-06, - "loss": 0.0644, - "step": 8480 - }, - { - "epoch": 0.76, - "learning_rate": 8.562303193882143e-06, - "loss": 0.055, - "step": 8490 - }, - { - "epoch": 0.76, - "learning_rate": 8.56050382366172e-06, - "loss": 0.0476, - "step": 8500 - }, - { - "epoch": 0.76, - "learning_rate": 8.558704453441296e-06, - "loss": 0.2432, - "step": 8510 - }, - { - "epoch": 0.76, - "learning_rate": 8.556905083220873e-06, - "loss": 0.1153, - "step": 8520 - }, - { - "epoch": 0.76, - "learning_rate": 8.55510571300045e-06, - "loss": 0.0691, - "step": 8530 - }, - { - "epoch": 0.76, - "learning_rate": 8.553306342780027e-06, - "loss": 0.0634, - "step": 8540 - }, - { - "epoch": 0.76, - "learning_rate": 8.551506972559604e-06, - "loss": 0.1217, - "step": 8550 - }, - { - "epoch": 0.76, - "learning_rate": 8.549707602339182e-06, - "loss": 0.15, - "step": 8560 - }, - { - "epoch": 0.76, - "learning_rate": 8.54790823211876e-06, - "loss": 0.1146, - "step": 8570 - }, - { - "epoch": 0.77, - "learning_rate": 8.546108861898336e-06, - "loss": 0.0232, - "step": 8580 - }, - { - "epoch": 0.77, - "learning_rate": 8.544309491677913e-06, - "loss": 0.1652, - "step": 8590 - }, - { - "epoch": 0.77, - "learning_rate": 8.54251012145749e-06, - "loss": 0.1147, - "step": 8600 - }, - { - "epoch": 0.77, - "learning_rate": 8.540710751237068e-06, - "loss": 0.1156, - "step": 8610 - }, - { - "epoch": 0.77, - "learning_rate": 8.538911381016645e-06, - "loss": 0.0725, - "step": 8620 - }, - { - "epoch": 0.77, - "learning_rate": 8.537112010796222e-06, - "loss": 0.0058, - "step": 8630 - }, - { - "epoch": 0.77, - "learning_rate": 8.535312640575799e-06, - "loss": 0.1656, - "step": 8640 - }, - { - "epoch": 0.77, - "learning_rate": 8.533513270355376e-06, - "loss": 0.0576, - "step": 8650 - }, - { - "epoch": 0.77, - "learning_rate": 8.531713900134954e-06, - "loss": 0.1474, - "step": 8660 - }, - { - "epoch": 0.77, - "learning_rate": 8.52991452991453e-06, - "loss": 0.1511, - "step": 8670 - }, - { - "epoch": 0.77, - "learning_rate": 8.528115159694108e-06, - "loss": 0.1008, - "step": 8680 - }, - { - "epoch": 0.77, - "learning_rate": 8.526315789473685e-06, - "loss": 0.0811, - "step": 8690 - }, - { - "epoch": 0.78, - "learning_rate": 8.524516419253262e-06, - "loss": 0.0772, - "step": 8700 - }, - { - "epoch": 0.78, - "learning_rate": 8.522717049032839e-06, - "loss": 0.0955, - "step": 8710 - }, - { - "epoch": 0.78, - "learning_rate": 8.520917678812416e-06, - "loss": 0.0719, - "step": 8720 - }, - { - "epoch": 0.78, - "learning_rate": 8.519118308591994e-06, - "loss": 0.1383, - "step": 8730 - }, - { - "epoch": 0.78, - "learning_rate": 8.517318938371571e-06, - "loss": 0.0035, - "step": 8740 - }, - { - "epoch": 0.78, - "learning_rate": 8.515519568151148e-06, - "loss": 0.1452, - "step": 8750 - }, - { - "epoch": 0.78, - "learning_rate": 8.513720197930725e-06, - "loss": 0.1435, - "step": 8760 - }, - { - "epoch": 0.78, - "learning_rate": 8.511920827710302e-06, - "loss": 0.1142, - "step": 8770 - }, - { - "epoch": 0.78, - "learning_rate": 8.51012145748988e-06, - "loss": 0.1441, - "step": 8780 - }, - { - "epoch": 0.78, - "learning_rate": 8.508322087269455e-06, - "loss": 0.179, - "step": 8790 - }, - { - "epoch": 0.78, - "learning_rate": 8.506522717049034e-06, - "loss": 0.0934, - "step": 8800 - }, - { - "epoch": 0.79, - "learning_rate": 8.50472334682861e-06, - "loss": 0.0647, - "step": 8810 - }, - { - "epoch": 0.79, - "learning_rate": 8.502923976608188e-06, - "loss": 0.0749, - "step": 8820 - }, - { - "epoch": 0.79, - "learning_rate": 8.501124606387764e-06, - "loss": 0.1038, - "step": 8830 - }, - { - "epoch": 0.79, - "learning_rate": 8.499325236167341e-06, - "loss": 0.0616, - "step": 8840 - }, - { - "epoch": 0.79, - "learning_rate": 8.49752586594692e-06, - "loss": 0.1128, - "step": 8850 - }, - { - "epoch": 0.79, - "learning_rate": 8.495726495726497e-06, - "loss": 0.1401, - "step": 8860 - }, - { - "epoch": 0.79, - "learning_rate": 8.493927125506074e-06, - "loss": 0.2143, - "step": 8870 - }, - { - "epoch": 0.79, - "learning_rate": 8.49212775528565e-06, - "loss": 0.0586, - "step": 8880 - }, - { - "epoch": 0.79, - "learning_rate": 8.490328385065227e-06, - "loss": 0.0917, - "step": 8890 - }, - { - "epoch": 0.79, - "learning_rate": 8.488529014844806e-06, - "loss": 0.1244, - "step": 8900 - }, - { - "epoch": 0.79, - "learning_rate": 8.486729644624383e-06, - "loss": 0.0877, - "step": 8910 - }, - { - "epoch": 0.8, - "learning_rate": 8.48493027440396e-06, - "loss": 0.0792, - "step": 8920 - }, - { - "epoch": 0.8, - "learning_rate": 8.483130904183536e-06, - "loss": 0.1058, - "step": 8930 - }, - { - "epoch": 0.8, - "learning_rate": 8.481331533963115e-06, - "loss": 0.1133, - "step": 8940 - }, - { - "epoch": 0.8, - "learning_rate": 8.47953216374269e-06, - "loss": 0.0392, - "step": 8950 - }, - { - "epoch": 0.8, - "learning_rate": 8.477732793522267e-06, - "loss": 0.1745, - "step": 8960 - }, - { - "epoch": 0.8, - "learning_rate": 8.475933423301846e-06, - "loss": 0.1019, - "step": 8970 - }, - { - "epoch": 0.8, - "learning_rate": 8.474134053081422e-06, - "loss": 0.166, - "step": 8980 - }, - { - "epoch": 0.8, - "learning_rate": 8.472334682861e-06, - "loss": 0.1927, - "step": 8990 - }, - { - "epoch": 0.8, - "learning_rate": 8.470535312640576e-06, - "loss": 0.1251, - "step": 9000 - }, - { - "epoch": 0.8, - "eval_accuracy": 0.9701563240170535, - "eval_f1": 0.9492399419885051, - "eval_loss": 0.09671162813901901, - "eval_precision": 0.9258172673931265, - "eval_recall": 0.9738785407252287, - "eval_runtime": 437.2054, - "eval_samples_per_second": 72.426, - "eval_steps_per_second": 4.529, - "step": 9000 } ], "max_steps": 56075, "num_train_epochs": 5, - "total_flos": 7.6243123003392e+16, + "total_flos": 2.5414374334464e+16, "trial_name": null, "trial_params": null }