diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5488 @@ +{ + "best_metric": 0.9738785407252287, + "best_model_checkpoint": "drive/MyDrive/albertina/checkpoint-9000", + "epoch": 0.8024966562639322, + "global_step": 9000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.0000000000000002e-07, + "loss": 0.7187, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.0000000000000003e-07, + "loss": 0.7199, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-07, + "loss": 0.6146, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-07, + "loss": 0.6195, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.6245, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 1.2000000000000002e-06, + "loss": 0.6241, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 1.4000000000000001e-06, + "loss": 0.5657, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.4948, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 1.8000000000000001e-06, + "loss": 0.5167, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.4677, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 2.2e-06, + "loss": 0.4529, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.4148, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 2.6e-06, + "loss": 0.3799, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-06, + "loss": 0.3091, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.3131, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.2855, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 3.4000000000000005e-06, + "loss": 0.2601, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 3.6000000000000003e-06, + "loss": 0.193, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 3.8000000000000005e-06, + "loss": 0.2041, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 4.000000000000001e-06, + "loss": 0.326, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 4.2000000000000004e-06, + "loss": 0.1665, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.4e-06, + "loss": 0.2885, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.600000000000001e-06, + "loss": 0.2399, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.800000000000001e-06, + "loss": 0.2109, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 5e-06, + "loss": 0.2259, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 5.2e-06, + "loss": 0.2323, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 5.400000000000001e-06, + "loss": 0.1955, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 5.600000000000001e-06, + "loss": 0.1765, + "step": 280 + }, + { + "epoch": 0.03, + "learning_rate": 5.8e-06, + "loss": 0.1722, + "step": 290 + }, + { + "epoch": 0.03, + "learning_rate": 6e-06, + "loss": 0.1648, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 6.200000000000001e-06, + "loss": 0.1617, + "step": 310 + }, + { + "epoch": 0.03, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.1687, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 6.600000000000001e-06, + "loss": 0.1727, + "step": 330 + }, + { + "epoch": 0.03, + "learning_rate": 6.800000000000001e-06, + "loss": 0.2064, + "step": 340 + }, + { + "epoch": 0.03, + "learning_rate": 7e-06, + "loss": 0.1663, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.1032, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 7.4e-06, + "loss": 0.1592, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 7.600000000000001e-06, + "loss": 0.2606, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 7.800000000000002e-06, + "loss": 0.0709, + "step": 390 + }, + { + "epoch": 0.04, + "learning_rate": 8.000000000000001e-06, + "loss": 0.117, + "step": 400 + }, + { + "epoch": 0.04, + "learning_rate": 8.2e-06, + "loss": 0.1836, + "step": 410 + }, + { + "epoch": 0.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0998, + "step": 420 + }, + { + "epoch": 0.04, + "learning_rate": 8.6e-06, + "loss": 0.1288, + "step": 430 + }, + { + "epoch": 0.04, + "learning_rate": 8.8e-06, + "loss": 0.1877, + "step": 440 + }, + { + "epoch": 0.04, + "learning_rate": 9e-06, + "loss": 0.1695, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 9.200000000000002e-06, + "loss": 0.1736, + "step": 460 + }, + { + "epoch": 0.04, + "learning_rate": 9.4e-06, + "loss": 0.195, + "step": 470 + }, + { + "epoch": 0.04, + "learning_rate": 9.600000000000001e-06, + "loss": 0.1599, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 9.800000000000001e-06, + "loss": 0.0596, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 1e-05, + "loss": 0.0599, + "step": 500 + }, + { + "epoch": 0.05, + "learning_rate": 9.998200629779578e-06, + "loss": 0.2157, + "step": 510 + }, + { + "epoch": 0.05, + "learning_rate": 9.996401259559155e-06, + "loss": 0.1006, + "step": 520 + }, + { + "epoch": 0.05, + "learning_rate": 9.994601889338731e-06, + "loss": 0.2082, + "step": 530 + }, + { + "epoch": 0.05, + "learning_rate": 9.99280251911831e-06, + "loss": 0.2305, + "step": 540 + }, + { + "epoch": 0.05, + "learning_rate": 9.991003148897887e-06, + "loss": 0.1995, + "step": 550 + }, + { + "epoch": 0.05, + "learning_rate": 9.989203778677464e-06, + "loss": 0.1551, + "step": 560 + }, + { + "epoch": 0.05, + "learning_rate": 9.98740440845704e-06, + "loss": 0.0971, + "step": 570 + }, + { + "epoch": 0.05, + "learning_rate": 9.985605038236617e-06, + "loss": 0.2007, + "step": 580 + }, + { + "epoch": 0.05, + "learning_rate": 9.983805668016196e-06, + "loss": 0.1312, + "step": 590 + }, + { + "epoch": 0.05, + "learning_rate": 9.982006297795773e-06, + "loss": 0.1931, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 9.98020692757535e-06, + "loss": 0.223, + "step": 610 + }, + { + "epoch": 0.06, + "learning_rate": 9.978407557354927e-06, + "loss": 0.3252, + "step": 620 + }, + { + "epoch": 0.06, + "learning_rate": 9.976608187134503e-06, + "loss": 0.0981, + "step": 630 + }, + { + "epoch": 0.06, + "learning_rate": 9.97480881691408e-06, + "loss": 0.1815, + "step": 640 + }, + { + "epoch": 0.06, + "learning_rate": 9.973009446693657e-06, + "loss": 0.1782, + "step": 650 + }, + { + "epoch": 0.06, + "learning_rate": 9.971210076473236e-06, + "loss": 0.1804, + "step": 660 + }, + { + "epoch": 0.06, + "learning_rate": 9.969410706252813e-06, + "loss": 0.1271, + "step": 670 + }, + { + "epoch": 0.06, + "learning_rate": 9.96761133603239e-06, + "loss": 0.1316, + "step": 680 + }, + { + "epoch": 0.06, + "learning_rate": 9.965811965811966e-06, + "loss": 0.2092, + "step": 690 + }, + { + "epoch": 0.06, + "learning_rate": 9.964012595591543e-06, + "loss": 0.0782, + "step": 700 + }, + { + "epoch": 0.06, + "learning_rate": 9.962213225371122e-06, + "loss": 0.2433, + "step": 710 + }, + { + "epoch": 0.06, + "learning_rate": 9.960413855150699e-06, + "loss": 0.1883, + "step": 720 + }, + { + "epoch": 0.07, + "learning_rate": 9.958614484930275e-06, + "loss": 0.032, + "step": 730 + }, + { + "epoch": 0.07, + "learning_rate": 9.956815114709852e-06, + "loss": 0.2181, + "step": 740 + }, + { + "epoch": 0.07, + "learning_rate": 9.955015744489429e-06, + "loss": 0.1828, + "step": 750 + }, + { + "epoch": 0.07, + "learning_rate": 9.953216374269008e-06, + "loss": 0.1206, + "step": 760 + }, + { + "epoch": 0.07, + "learning_rate": 9.951417004048583e-06, + "loss": 0.1253, + "step": 770 + }, + { + "epoch": 0.07, + "learning_rate": 9.949617633828161e-06, + "loss": 0.1047, + "step": 780 + }, + { + "epoch": 0.07, + "learning_rate": 9.947818263607738e-06, + "loss": 0.1174, + "step": 790 + }, + { + "epoch": 0.07, + "learning_rate": 9.946018893387315e-06, + "loss": 0.3025, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 9.944219523166892e-06, + "loss": 0.0581, + "step": 810 + }, + { + "epoch": 0.07, + "learning_rate": 9.942420152946469e-06, + "loss": 0.2664, + "step": 820 + }, + { + "epoch": 0.07, + "learning_rate": 9.940620782726047e-06, + "loss": 0.1218, + "step": 830 + }, + { + "epoch": 0.07, + "learning_rate": 9.938821412505624e-06, + "loss": 0.1708, + "step": 840 + }, + { + "epoch": 0.08, + "learning_rate": 9.937022042285201e-06, + "loss": 0.1626, + "step": 850 + }, + { + "epoch": 0.08, + "learning_rate": 9.935222672064778e-06, + "loss": 0.0553, + "step": 860 + }, + { + "epoch": 0.08, + "learning_rate": 9.933423301844355e-06, + "loss": 0.1209, + "step": 870 + }, + { + "epoch": 0.08, + "learning_rate": 9.931623931623933e-06, + "loss": 0.11, + "step": 880 + }, + { + "epoch": 0.08, + "learning_rate": 9.929824561403509e-06, + "loss": 0.0945, + "step": 890 + }, + { + "epoch": 0.08, + "learning_rate": 9.928025191183087e-06, + "loss": 0.2105, + "step": 900 + }, + { + "epoch": 0.08, + "learning_rate": 9.926225820962664e-06, + "loss": 0.1548, + "step": 910 + }, + { + "epoch": 0.08, + "learning_rate": 9.92442645074224e-06, + "loss": 0.1819, + "step": 920 + }, + { + "epoch": 0.08, + "learning_rate": 9.922627080521818e-06, + "loss": 0.1461, + "step": 930 + }, + { + "epoch": 0.08, + "learning_rate": 9.920827710301395e-06, + "loss": 0.1917, + "step": 940 + }, + { + "epoch": 0.08, + "learning_rate": 9.919028340080973e-06, + "loss": 0.0796, + "step": 950 + }, + { + "epoch": 0.09, + "learning_rate": 9.91722896986055e-06, + "loss": 0.1768, + "step": 960 + }, + { + "epoch": 0.09, + "learning_rate": 9.915429599640127e-06, + "loss": 0.1726, + "step": 970 + }, + { + "epoch": 0.09, + "learning_rate": 9.913630229419704e-06, + "loss": 0.1244, + "step": 980 + }, + { + "epoch": 0.09, + "learning_rate": 9.91183085919928e-06, + "loss": 0.0765, + "step": 990 + }, + { + "epoch": 0.09, + "learning_rate": 9.910031488978859e-06, + "loss": 0.1842, + "step": 1000 + }, + { + "epoch": 0.09, + "learning_rate": 9.908232118758436e-06, + "loss": 0.1192, + "step": 1010 + }, + { + "epoch": 0.09, + "learning_rate": 9.906432748538013e-06, + "loss": 0.132, + "step": 1020 + }, + { + "epoch": 0.09, + "learning_rate": 9.90463337831759e-06, + "loss": 0.1347, + "step": 1030 + }, + { + "epoch": 0.09, + "learning_rate": 9.902834008097167e-06, + "loss": 0.0591, + "step": 1040 + }, + { + "epoch": 0.09, + "learning_rate": 9.901034637876743e-06, + "loss": 0.1588, + "step": 1050 + }, + { + "epoch": 0.09, + "learning_rate": 9.89923526765632e-06, + "loss": 0.1273, + "step": 1060 + }, + { + "epoch": 0.1, + "learning_rate": 9.897435897435899e-06, + "loss": 0.0527, + "step": 1070 + }, + { + "epoch": 0.1, + "learning_rate": 9.895636527215476e-06, + "loss": 0.1438, + "step": 1080 + }, + { + "epoch": 0.1, + "learning_rate": 9.893837156995053e-06, + "loss": 0.1085, + "step": 1090 + }, + { + "epoch": 0.1, + "learning_rate": 9.89203778677463e-06, + "loss": 0.0793, + "step": 1100 + }, + { + "epoch": 0.1, + "learning_rate": 9.890238416554206e-06, + "loss": 0.1617, + "step": 1110 + }, + { + "epoch": 0.1, + "learning_rate": 9.888439046333785e-06, + "loss": 0.0164, + "step": 1120 + }, + { + "epoch": 0.1, + "learning_rate": 9.886639676113362e-06, + "loss": 0.2053, + "step": 1130 + }, + { + "epoch": 0.1, + "learning_rate": 9.884840305892939e-06, + "loss": 0.1774, + "step": 1140 + }, + { + "epoch": 0.1, + "learning_rate": 9.883040935672515e-06, + "loss": 0.1195, + "step": 1150 + }, + { + "epoch": 0.1, + "learning_rate": 9.881241565452092e-06, + "loss": 0.0487, + "step": 1160 + }, + { + "epoch": 0.1, + "learning_rate": 9.879442195231669e-06, + "loss": 0.1629, + "step": 1170 + }, + { + "epoch": 0.11, + "learning_rate": 9.877642825011246e-06, + "loss": 0.0964, + "step": 1180 + }, + { + "epoch": 0.11, + "learning_rate": 9.875843454790825e-06, + "loss": 0.1443, + "step": 1190 + }, + { + "epoch": 0.11, + "learning_rate": 9.874044084570401e-06, + "loss": 0.1089, + "step": 1200 + }, + { + "epoch": 0.11, + "learning_rate": 9.872244714349978e-06, + "loss": 0.0233, + "step": 1210 + }, + { + "epoch": 0.11, + "learning_rate": 9.870445344129555e-06, + "loss": 0.0712, + "step": 1220 + }, + { + "epoch": 0.11, + "learning_rate": 9.868645973909132e-06, + "loss": 0.2537, + "step": 1230 + }, + { + "epoch": 0.11, + "learning_rate": 9.86684660368871e-06, + "loss": 0.2621, + "step": 1240 + }, + { + "epoch": 0.11, + "learning_rate": 9.865047233468287e-06, + "loss": 0.1302, + "step": 1250 + }, + { + "epoch": 0.11, + "learning_rate": 9.863247863247864e-06, + "loss": 0.136, + "step": 1260 + }, + { + "epoch": 0.11, + "learning_rate": 9.861448493027441e-06, + "loss": 0.146, + "step": 1270 + }, + { + "epoch": 0.11, + "learning_rate": 9.859649122807018e-06, + "loss": 0.0938, + "step": 1280 + }, + { + "epoch": 0.12, + "learning_rate": 9.857849752586597e-06, + "loss": 0.0718, + "step": 1290 + }, + { + "epoch": 0.12, + "learning_rate": 9.856050382366172e-06, + "loss": 0.1728, + "step": 1300 + }, + { + "epoch": 0.12, + "learning_rate": 9.85425101214575e-06, + "loss": 0.2543, + "step": 1310 + }, + { + "epoch": 0.12, + "learning_rate": 9.852451641925327e-06, + "loss": 0.1327, + "step": 1320 + }, + { + "epoch": 0.12, + "learning_rate": 9.850652271704904e-06, + "loss": 0.1058, + "step": 1330 + }, + { + "epoch": 0.12, + "learning_rate": 9.84885290148448e-06, + "loss": 0.13, + "step": 1340 + }, + { + "epoch": 0.12, + "learning_rate": 9.847053531264058e-06, + "loss": 0.1322, + "step": 1350 + }, + { + "epoch": 0.12, + "learning_rate": 9.845254161043636e-06, + "loss": 0.1909, + "step": 1360 + }, + { + "epoch": 0.12, + "learning_rate": 9.843454790823213e-06, + "loss": 0.078, + "step": 1370 + }, + { + "epoch": 0.12, + "learning_rate": 9.84165542060279e-06, + "loss": 0.1836, + "step": 1380 + }, + { + "epoch": 0.12, + "learning_rate": 9.839856050382367e-06, + "loss": 0.1491, + "step": 1390 + }, + { + "epoch": 0.12, + "learning_rate": 9.838056680161944e-06, + "loss": 0.139, + "step": 1400 + }, + { + "epoch": 0.13, + "learning_rate": 9.836257309941522e-06, + "loss": 0.0737, + "step": 1410 + }, + { + "epoch": 0.13, + "learning_rate": 9.834457939721097e-06, + "loss": 0.1696, + "step": 1420 + }, + { + "epoch": 0.13, + "learning_rate": 9.832658569500676e-06, + "loss": 0.2328, + "step": 1430 + }, + { + "epoch": 0.13, + "learning_rate": 9.830859199280253e-06, + "loss": 0.1342, + "step": 1440 + }, + { + "epoch": 0.13, + "learning_rate": 9.82905982905983e-06, + "loss": 0.1243, + "step": 1450 + }, + { + "epoch": 0.13, + "learning_rate": 9.827260458839407e-06, + "loss": 0.1969, + "step": 1460 + }, + { + "epoch": 0.13, + "learning_rate": 9.825461088618983e-06, + "loss": 0.2298, + "step": 1470 + }, + { + "epoch": 0.13, + "learning_rate": 9.823661718398562e-06, + "loss": 0.1167, + "step": 1480 + }, + { + "epoch": 0.13, + "learning_rate": 9.821862348178139e-06, + "loss": 0.123, + "step": 1490 + }, + { + "epoch": 0.13, + "learning_rate": 9.820062977957716e-06, + "loss": 0.1128, + "step": 1500 + }, + { + "epoch": 0.13, + "eval_accuracy": 0.9694931312174325, + "eval_f1": 0.94606967396159, + "eval_loss": 0.09655023366212845, + "eval_precision": 0.9585926009729607, + "eval_recall": 0.9338697233550094, + "eval_runtime": 436.9466, + "eval_samples_per_second": 72.469, + "eval_steps_per_second": 4.531, + "step": 1500 + }, + { + "epoch": 0.13, + "learning_rate": 9.818263607737293e-06, + "loss": 0.0397, + "step": 1510 + }, + { + "epoch": 0.14, + "learning_rate": 9.81646423751687e-06, + "loss": 0.1572, + "step": 1520 + }, + { + "epoch": 0.14, + "learning_rate": 9.814664867296448e-06, + "loss": 0.1214, + "step": 1530 + }, + { + "epoch": 0.14, + "learning_rate": 9.812865497076025e-06, + "loss": 0.135, + "step": 1540 + }, + { + "epoch": 0.14, + "learning_rate": 9.811066126855602e-06, + "loss": 0.3403, + "step": 1550 + }, + { + "epoch": 0.14, + "learning_rate": 9.809266756635179e-06, + "loss": 0.0645, + "step": 1560 + }, + { + "epoch": 0.14, + "learning_rate": 9.807467386414755e-06, + "loss": 0.1162, + "step": 1570 + }, + { + "epoch": 0.14, + "learning_rate": 9.805668016194332e-06, + "loss": 0.065, + "step": 1580 + }, + { + "epoch": 0.14, + "learning_rate": 9.803868645973909e-06, + "loss": 0.0923, + "step": 1590 + }, + { + "epoch": 0.14, + "learning_rate": 9.802069275753488e-06, + "loss": 0.2101, + "step": 1600 + }, + { + "epoch": 0.14, + "learning_rate": 9.800269905533065e-06, + "loss": 0.1123, + "step": 1610 + }, + { + "epoch": 0.14, + "learning_rate": 9.798470535312641e-06, + "loss": 0.2323, + "step": 1620 + }, + { + "epoch": 0.15, + "learning_rate": 9.796671165092218e-06, + "loss": 0.0653, + "step": 1630 + }, + { + "epoch": 0.15, + "learning_rate": 9.794871794871795e-06, + "loss": 0.1639, + "step": 1640 + }, + { + "epoch": 0.15, + "learning_rate": 9.793072424651374e-06, + "loss": 0.0505, + "step": 1650 + }, + { + "epoch": 0.15, + "learning_rate": 9.79127305443095e-06, + "loss": 0.1409, + "step": 1660 + }, + { + "epoch": 0.15, + "learning_rate": 9.789473684210527e-06, + "loss": 0.1419, + "step": 1670 + }, + { + "epoch": 0.15, + "learning_rate": 9.787674313990104e-06, + "loss": 0.1625, + "step": 1680 + }, + { + "epoch": 0.15, + "learning_rate": 9.785874943769681e-06, + "loss": 0.0771, + "step": 1690 + }, + { + "epoch": 0.15, + "learning_rate": 9.784075573549258e-06, + "loss": 0.0929, + "step": 1700 + }, + { + "epoch": 0.15, + "learning_rate": 9.782276203328835e-06, + "loss": 0.095, + "step": 1710 + }, + { + "epoch": 0.15, + "learning_rate": 9.780476833108413e-06, + "loss": 0.1673, + "step": 1720 + }, + { + "epoch": 0.15, + "learning_rate": 9.77867746288799e-06, + "loss": 0.1376, + "step": 1730 + }, + { + "epoch": 0.16, + "learning_rate": 9.776878092667567e-06, + "loss": 0.0783, + "step": 1740 + }, + { + "epoch": 0.16, + "learning_rate": 9.775078722447144e-06, + "loss": 0.1428, + "step": 1750 + }, + { + "epoch": 0.16, + "learning_rate": 9.77327935222672e-06, + "loss": 0.0576, + "step": 1760 + }, + { + "epoch": 0.16, + "learning_rate": 9.7714799820063e-06, + "loss": 0.0739, + "step": 1770 + }, + { + "epoch": 0.16, + "learning_rate": 9.769680611785876e-06, + "loss": 0.09, + "step": 1780 + }, + { + "epoch": 0.16, + "learning_rate": 9.767881241565453e-06, + "loss": 0.0987, + "step": 1790 + }, + { + "epoch": 0.16, + "learning_rate": 9.76608187134503e-06, + "loss": 0.1613, + "step": 1800 + }, + { + "epoch": 0.16, + "learning_rate": 9.764282501124607e-06, + "loss": 0.058, + "step": 1810 + }, + { + "epoch": 0.16, + "learning_rate": 9.762483130904185e-06, + "loss": 0.218, + "step": 1820 + }, + { + "epoch": 0.16, + "learning_rate": 9.76068376068376e-06, + "loss": 0.1083, + "step": 1830 + }, + { + "epoch": 0.16, + "learning_rate": 9.758884390463339e-06, + "loss": 0.1274, + "step": 1840 + }, + { + "epoch": 0.16, + "learning_rate": 9.757085020242916e-06, + "loss": 0.1016, + "step": 1850 + }, + { + "epoch": 0.17, + "learning_rate": 9.755285650022493e-06, + "loss": 0.1442, + "step": 1860 + }, + { + "epoch": 0.17, + "learning_rate": 9.75348627980207e-06, + "loss": 0.0771, + "step": 1870 + }, + { + "epoch": 0.17, + "learning_rate": 9.751686909581647e-06, + "loss": 0.0832, + "step": 1880 + }, + { + "epoch": 0.17, + "learning_rate": 9.749887539361225e-06, + "loss": 0.1264, + "step": 1890 + }, + { + "epoch": 0.17, + "learning_rate": 9.748088169140802e-06, + "loss": 0.1182, + "step": 1900 + }, + { + "epoch": 0.17, + "learning_rate": 9.746288798920379e-06, + "loss": 0.1582, + "step": 1910 + }, + { + "epoch": 0.17, + "learning_rate": 9.744489428699956e-06, + "loss": 0.1256, + "step": 1920 + }, + { + "epoch": 0.17, + "learning_rate": 9.742690058479533e-06, + "loss": 0.1081, + "step": 1930 + }, + { + "epoch": 0.17, + "learning_rate": 9.740890688259111e-06, + "loss": 0.0922, + "step": 1940 + }, + { + "epoch": 0.17, + "learning_rate": 9.739091318038686e-06, + "loss": 0.1155, + "step": 1950 + }, + { + "epoch": 0.17, + "learning_rate": 9.737291947818265e-06, + "loss": 0.0559, + "step": 1960 + }, + { + "epoch": 0.18, + "learning_rate": 9.735492577597842e-06, + "loss": 0.1008, + "step": 1970 + }, + { + "epoch": 0.18, + "learning_rate": 9.733693207377419e-06, + "loss": 0.1123, + "step": 1980 + }, + { + "epoch": 0.18, + "learning_rate": 9.731893837156995e-06, + "loss": 0.1095, + "step": 1990 + }, + { + "epoch": 0.18, + "learning_rate": 9.730094466936572e-06, + "loss": 0.1269, + "step": 2000 + }, + { + "epoch": 0.18, + "learning_rate": 9.72829509671615e-06, + "loss": 0.1702, + "step": 2010 + }, + { + "epoch": 0.18, + "learning_rate": 9.726495726495728e-06, + "loss": 0.1762, + "step": 2020 + }, + { + "epoch": 0.18, + "learning_rate": 9.724696356275305e-06, + "loss": 0.0528, + "step": 2030 + }, + { + "epoch": 0.18, + "learning_rate": 9.722896986054881e-06, + "loss": 0.0912, + "step": 2040 + }, + { + "epoch": 0.18, + "learning_rate": 9.721097615834458e-06, + "loss": 0.0528, + "step": 2050 + }, + { + "epoch": 0.18, + "learning_rate": 9.719298245614037e-06, + "loss": 0.1277, + "step": 2060 + }, + { + "epoch": 0.18, + "learning_rate": 9.717498875393614e-06, + "loss": 0.1685, + "step": 2070 + }, + { + "epoch": 0.19, + "learning_rate": 9.71569950517319e-06, + "loss": 0.0825, + "step": 2080 + }, + { + "epoch": 0.19, + "learning_rate": 9.713900134952767e-06, + "loss": 0.1238, + "step": 2090 + }, + { + "epoch": 0.19, + "learning_rate": 9.712100764732344e-06, + "loss": 0.0815, + "step": 2100 + }, + { + "epoch": 0.19, + "learning_rate": 9.710301394511921e-06, + "loss": 0.0942, + "step": 2110 + }, + { + "epoch": 0.19, + "learning_rate": 9.708502024291498e-06, + "loss": 0.0789, + "step": 2120 + }, + { + "epoch": 0.19, + "learning_rate": 9.706702654071076e-06, + "loss": 0.1439, + "step": 2130 + }, + { + "epoch": 0.19, + "learning_rate": 9.704903283850653e-06, + "loss": 0.1, + "step": 2140 + }, + { + "epoch": 0.19, + "learning_rate": 9.70310391363023e-06, + "loss": 0.0548, + "step": 2150 + }, + { + "epoch": 0.19, + "learning_rate": 9.701304543409807e-06, + "loss": 0.1491, + "step": 2160 + }, + { + "epoch": 0.19, + "learning_rate": 9.699505173189384e-06, + "loss": 0.063, + "step": 2170 + }, + { + "epoch": 0.19, + "learning_rate": 9.697705802968962e-06, + "loss": 0.2628, + "step": 2180 + }, + { + "epoch": 0.2, + "learning_rate": 9.69590643274854e-06, + "loss": 0.2376, + "step": 2190 + }, + { + "epoch": 0.2, + "learning_rate": 9.694107062528116e-06, + "loss": 0.1094, + "step": 2200 + }, + { + "epoch": 0.2, + "learning_rate": 9.692307692307693e-06, + "loss": 0.143, + "step": 2210 + }, + { + "epoch": 0.2, + "learning_rate": 9.69050832208727e-06, + "loss": 0.1503, + "step": 2220 + }, + { + "epoch": 0.2, + "learning_rate": 9.688708951866847e-06, + "loss": 0.1998, + "step": 2230 + }, + { + "epoch": 0.2, + "learning_rate": 9.686909581646424e-06, + "loss": 0.0649, + "step": 2240 + }, + { + "epoch": 0.2, + "learning_rate": 9.685110211426002e-06, + "loss": 0.024, + "step": 2250 + }, + { + "epoch": 0.2, + "learning_rate": 9.683310841205579e-06, + "loss": 0.119, + "step": 2260 + }, + { + "epoch": 0.2, + "learning_rate": 9.681511470985156e-06, + "loss": 0.228, + "step": 2270 + }, + { + "epoch": 0.2, + "learning_rate": 9.679712100764733e-06, + "loss": 0.1202, + "step": 2280 + }, + { + "epoch": 0.2, + "learning_rate": 9.67791273054431e-06, + "loss": 0.053, + "step": 2290 + }, + { + "epoch": 0.21, + "learning_rate": 9.676113360323888e-06, + "loss": 0.1156, + "step": 2300 + }, + { + "epoch": 0.21, + "learning_rate": 9.674313990103465e-06, + "loss": 0.1197, + "step": 2310 + }, + { + "epoch": 0.21, + "learning_rate": 9.672514619883042e-06, + "loss": 0.0872, + "step": 2320 + }, + { + "epoch": 0.21, + "learning_rate": 9.670715249662619e-06, + "loss": 0.1937, + "step": 2330 + }, + { + "epoch": 0.21, + "learning_rate": 9.668915879442196e-06, + "loss": 0.172, + "step": 2340 + }, + { + "epoch": 0.21, + "learning_rate": 9.667116509221774e-06, + "loss": 0.1065, + "step": 2350 + }, + { + "epoch": 0.21, + "learning_rate": 9.66531713900135e-06, + "loss": 0.12, + "step": 2360 + }, + { + "epoch": 0.21, + "learning_rate": 9.663517768780928e-06, + "loss": 0.094, + "step": 2370 + }, + { + "epoch": 0.21, + "learning_rate": 9.661718398560505e-06, + "loss": 0.0623, + "step": 2380 + }, + { + "epoch": 0.21, + "learning_rate": 9.659919028340082e-06, + "loss": 0.1098, + "step": 2390 + }, + { + "epoch": 0.21, + "learning_rate": 9.658119658119659e-06, + "loss": 0.1679, + "step": 2400 + }, + { + "epoch": 0.21, + "learning_rate": 9.656320287899235e-06, + "loss": 0.1222, + "step": 2410 + }, + { + "epoch": 0.22, + "learning_rate": 9.654520917678814e-06, + "loss": 0.0483, + "step": 2420 + }, + { + "epoch": 0.22, + "learning_rate": 9.65272154745839e-06, + "loss": 0.1289, + "step": 2430 + }, + { + "epoch": 0.22, + "learning_rate": 9.650922177237968e-06, + "loss": 0.0802, + "step": 2440 + }, + { + "epoch": 0.22, + "learning_rate": 9.649122807017545e-06, + "loss": 0.0691, + "step": 2450 + }, + { + "epoch": 0.22, + "learning_rate": 9.647323436797121e-06, + "loss": 0.2148, + "step": 2460 + }, + { + "epoch": 0.22, + "learning_rate": 9.6455240665767e-06, + "loss": 0.1005, + "step": 2470 + }, + { + "epoch": 0.22, + "learning_rate": 9.643724696356275e-06, + "loss": 0.1019, + "step": 2480 + }, + { + "epoch": 0.22, + "learning_rate": 9.641925326135854e-06, + "loss": 0.1362, + "step": 2490 + }, + { + "epoch": 0.22, + "learning_rate": 9.64012595591543e-06, + "loss": 0.0445, + "step": 2500 + }, + { + "epoch": 0.22, + "learning_rate": 9.638326585695007e-06, + "loss": 0.1021, + "step": 2510 + }, + { + "epoch": 0.22, + "learning_rate": 9.636527215474584e-06, + "loss": 0.2398, + "step": 2520 + }, + { + "epoch": 0.23, + "learning_rate": 9.634727845254161e-06, + "loss": 0.1228, + "step": 2530 + }, + { + "epoch": 0.23, + "learning_rate": 9.63292847503374e-06, + "loss": 0.1529, + "step": 2540 + }, + { + "epoch": 0.23, + "learning_rate": 9.631129104813316e-06, + "loss": 0.0813, + "step": 2550 + }, + { + "epoch": 0.23, + "learning_rate": 9.629329734592893e-06, + "loss": 0.0542, + "step": 2560 + }, + { + "epoch": 0.23, + "learning_rate": 9.62753036437247e-06, + "loss": 0.1951, + "step": 2570 + }, + { + "epoch": 0.23, + "learning_rate": 9.625730994152047e-06, + "loss": 0.0304, + "step": 2580 + }, + { + "epoch": 0.23, + "learning_rate": 9.623931623931626e-06, + "loss": 0.1102, + "step": 2590 + }, + { + "epoch": 0.23, + "learning_rate": 9.6221322537112e-06, + "loss": 0.0727, + "step": 2600 + }, + { + "epoch": 0.23, + "learning_rate": 9.62033288349078e-06, + "loss": 0.136, + "step": 2610 + }, + { + "epoch": 0.23, + "learning_rate": 9.618533513270356e-06, + "loss": 0.1733, + "step": 2620 + }, + { + "epoch": 0.23, + "learning_rate": 9.616734143049933e-06, + "loss": 0.043, + "step": 2630 + }, + { + "epoch": 0.24, + "learning_rate": 9.61493477282951e-06, + "loss": 0.1242, + "step": 2640 + }, + { + "epoch": 0.24, + "learning_rate": 9.613135402609087e-06, + "loss": 0.0807, + "step": 2650 + }, + { + "epoch": 0.24, + "learning_rate": 9.611336032388665e-06, + "loss": 0.084, + "step": 2660 + }, + { + "epoch": 0.24, + "learning_rate": 9.609536662168242e-06, + "loss": 0.0175, + "step": 2670 + }, + { + "epoch": 0.24, + "learning_rate": 9.607737291947819e-06, + "loss": 0.0686, + "step": 2680 + }, + { + "epoch": 0.24, + "learning_rate": 9.605937921727396e-06, + "loss": 0.0984, + "step": 2690 + }, + { + "epoch": 0.24, + "learning_rate": 9.604138551506973e-06, + "loss": 0.0717, + "step": 2700 + }, + { + "epoch": 0.24, + "learning_rate": 9.602339181286551e-06, + "loss": 0.0323, + "step": 2710 + }, + { + "epoch": 0.24, + "learning_rate": 9.600539811066128e-06, + "loss": 0.1174, + "step": 2720 + }, + { + "epoch": 0.24, + "learning_rate": 9.598740440845705e-06, + "loss": 0.2252, + "step": 2730 + }, + { + "epoch": 0.24, + "learning_rate": 9.596941070625282e-06, + "loss": 0.1463, + "step": 2740 + }, + { + "epoch": 0.25, + "learning_rate": 9.595141700404859e-06, + "loss": 0.1296, + "step": 2750 + }, + { + "epoch": 0.25, + "learning_rate": 9.593342330184436e-06, + "loss": 0.0498, + "step": 2760 + }, + { + "epoch": 0.25, + "learning_rate": 9.591542959964013e-06, + "loss": 0.0488, + "step": 2770 + }, + { + "epoch": 0.25, + "learning_rate": 9.589743589743591e-06, + "loss": 0.1701, + "step": 2780 + }, + { + "epoch": 0.25, + "learning_rate": 9.587944219523168e-06, + "loss": 0.0986, + "step": 2790 + }, + { + "epoch": 0.25, + "learning_rate": 9.586144849302745e-06, + "loss": 0.0605, + "step": 2800 + }, + { + "epoch": 0.25, + "learning_rate": 9.584345479082322e-06, + "loss": 0.0779, + "step": 2810 + }, + { + "epoch": 0.25, + "learning_rate": 9.582546108861898e-06, + "loss": 0.1724, + "step": 2820 + }, + { + "epoch": 0.25, + "learning_rate": 9.580746738641477e-06, + "loss": 0.0229, + "step": 2830 + }, + { + "epoch": 0.25, + "learning_rate": 9.578947368421054e-06, + "loss": 0.1895, + "step": 2840 + }, + { + "epoch": 0.25, + "learning_rate": 9.57714799820063e-06, + "loss": 0.1627, + "step": 2850 + }, + { + "epoch": 0.26, + "learning_rate": 9.575348627980208e-06, + "loss": 0.061, + "step": 2860 + }, + { + "epoch": 0.26, + "learning_rate": 9.573549257759784e-06, + "loss": 0.0394, + "step": 2870 + }, + { + "epoch": 0.26, + "learning_rate": 9.571749887539361e-06, + "loss": 0.0101, + "step": 2880 + }, + { + "epoch": 0.26, + "learning_rate": 9.569950517318938e-06, + "loss": 0.1223, + "step": 2890 + }, + { + "epoch": 0.26, + "learning_rate": 9.568151147098517e-06, + "loss": 0.0859, + "step": 2900 + }, + { + "epoch": 0.26, + "learning_rate": 9.566351776878094e-06, + "loss": 0.0245, + "step": 2910 + }, + { + "epoch": 0.26, + "learning_rate": 9.56455240665767e-06, + "loss": 0.077, + "step": 2920 + }, + { + "epoch": 0.26, + "learning_rate": 9.562753036437247e-06, + "loss": 0.1988, + "step": 2930 + }, + { + "epoch": 0.26, + "learning_rate": 9.560953666216824e-06, + "loss": 0.1803, + "step": 2940 + }, + { + "epoch": 0.26, + "learning_rate": 9.559154295996403e-06, + "loss": 0.0734, + "step": 2950 + }, + { + "epoch": 0.26, + "learning_rate": 9.55735492577598e-06, + "loss": 0.0309, + "step": 2960 + }, + { + "epoch": 0.26, + "learning_rate": 9.555555555555556e-06, + "loss": 0.0958, + "step": 2970 + }, + { + "epoch": 0.27, + "learning_rate": 9.553756185335133e-06, + "loss": 0.1951, + "step": 2980 + }, + { + "epoch": 0.27, + "learning_rate": 9.55195681511471e-06, + "loss": 0.0235, + "step": 2990 + }, + { + "epoch": 0.27, + "learning_rate": 9.550157444894289e-06, + "loss": 0.0854, + "step": 3000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.9742618032528028, + "eval_f1": 0.9549798375959787, + "eval_loss": 0.1135290339589119, + "eval_precision": 0.9572535991140643, + "eval_recall": 0.9527168521988317, + "eval_runtime": 437.2717, + "eval_samples_per_second": 72.415, + "eval_steps_per_second": 4.528, + "step": 3000 + }, + { + "epoch": 0.27, + "learning_rate": 9.548358074673864e-06, + "loss": 0.0418, + "step": 3010 + }, + { + "epoch": 0.27, + "learning_rate": 9.546558704453442e-06, + "loss": 0.0694, + "step": 3020 + }, + { + "epoch": 0.27, + "learning_rate": 9.54475933423302e-06, + "loss": 0.048, + "step": 3030 + }, + { + "epoch": 0.27, + "learning_rate": 9.542959964012596e-06, + "loss": 0.1657, + "step": 3040 + }, + { + "epoch": 0.27, + "learning_rate": 9.541160593792173e-06, + "loss": 0.0778, + "step": 3050 + }, + { + "epoch": 0.27, + "learning_rate": 9.53936122357175e-06, + "loss": 0.1125, + "step": 3060 + }, + { + "epoch": 0.27, + "learning_rate": 9.537561853351328e-06, + "loss": 0.0662, + "step": 3070 + }, + { + "epoch": 0.27, + "learning_rate": 9.535762483130905e-06, + "loss": 0.1191, + "step": 3080 + }, + { + "epoch": 0.28, + "learning_rate": 9.533963112910482e-06, + "loss": 0.171, + "step": 3090 + }, + { + "epoch": 0.28, + "learning_rate": 9.532163742690059e-06, + "loss": 0.0703, + "step": 3100 + }, + { + "epoch": 0.28, + "learning_rate": 9.530364372469636e-06, + "loss": 0.138, + "step": 3110 + }, + { + "epoch": 0.28, + "learning_rate": 9.528565002249214e-06, + "loss": 0.1945, + "step": 3120 + }, + { + "epoch": 0.28, + "learning_rate": 9.52676563202879e-06, + "loss": 0.1241, + "step": 3130 + }, + { + "epoch": 0.28, + "learning_rate": 9.524966261808368e-06, + "loss": 0.1185, + "step": 3140 + }, + { + "epoch": 0.28, + "learning_rate": 9.523166891587945e-06, + "loss": 0.0739, + "step": 3150 + }, + { + "epoch": 0.28, + "learning_rate": 9.521367521367522e-06, + "loss": 0.0361, + "step": 3160 + }, + { + "epoch": 0.28, + "learning_rate": 9.519568151147099e-06, + "loss": 0.1103, + "step": 3170 + }, + { + "epoch": 0.28, + "learning_rate": 9.517768780926676e-06, + "loss": 0.1141, + "step": 3180 + }, + { + "epoch": 0.28, + "learning_rate": 9.515969410706254e-06, + "loss": 0.0741, + "step": 3190 + }, + { + "epoch": 0.29, + "learning_rate": 9.514170040485831e-06, + "loss": 0.0637, + "step": 3200 + }, + { + "epoch": 0.29, + "learning_rate": 9.512370670265408e-06, + "loss": 0.2084, + "step": 3210 + }, + { + "epoch": 0.29, + "learning_rate": 9.510571300044985e-06, + "loss": 0.1303, + "step": 3220 + }, + { + "epoch": 0.29, + "learning_rate": 9.508771929824562e-06, + "loss": 0.1449, + "step": 3230 + }, + { + "epoch": 0.29, + "learning_rate": 9.50697255960414e-06, + "loss": 0.0977, + "step": 3240 + }, + { + "epoch": 0.29, + "learning_rate": 9.505173189383717e-06, + "loss": 0.0754, + "step": 3250 + }, + { + "epoch": 0.29, + "learning_rate": 9.503373819163294e-06, + "loss": 0.0237, + "step": 3260 + }, + { + "epoch": 0.29, + "learning_rate": 9.50157444894287e-06, + "loss": 0.1629, + "step": 3270 + }, + { + "epoch": 0.29, + "learning_rate": 9.499775078722448e-06, + "loss": 0.1183, + "step": 3280 + }, + { + "epoch": 0.29, + "learning_rate": 9.497975708502024e-06, + "loss": 0.0365, + "step": 3290 + }, + { + "epoch": 0.29, + "learning_rate": 9.496176338281601e-06, + "loss": 0.0068, + "step": 3300 + }, + { + "epoch": 0.3, + "learning_rate": 9.49437696806118e-06, + "loss": 0.2428, + "step": 3310 + }, + { + "epoch": 0.3, + "learning_rate": 9.492577597840757e-06, + "loss": 0.0858, + "step": 3320 + }, + { + "epoch": 0.3, + "learning_rate": 9.490778227620334e-06, + "loss": 0.0579, + "step": 3330 + }, + { + "epoch": 0.3, + "learning_rate": 9.48897885739991e-06, + "loss": 0.0911, + "step": 3340 + }, + { + "epoch": 0.3, + "learning_rate": 9.487179487179487e-06, + "loss": 0.0851, + "step": 3350 + }, + { + "epoch": 0.3, + "learning_rate": 9.485380116959066e-06, + "loss": 0.1322, + "step": 3360 + }, + { + "epoch": 0.3, + "learning_rate": 9.483580746738643e-06, + "loss": 0.0657, + "step": 3370 + }, + { + "epoch": 0.3, + "learning_rate": 9.48178137651822e-06, + "loss": 0.0299, + "step": 3380 + }, + { + "epoch": 0.3, + "learning_rate": 9.479982006297796e-06, + "loss": 0.2368, + "step": 3390 + }, + { + "epoch": 0.3, + "learning_rate": 9.478182636077373e-06, + "loss": 0.064, + "step": 3400 + }, + { + "epoch": 0.3, + "learning_rate": 9.47638326585695e-06, + "loss": 0.1288, + "step": 3410 + }, + { + "epoch": 0.3, + "learning_rate": 9.474583895636527e-06, + "loss": 0.0765, + "step": 3420 + }, + { + "epoch": 0.31, + "learning_rate": 9.472784525416106e-06, + "loss": 0.1403, + "step": 3430 + }, + { + "epoch": 0.31, + "learning_rate": 9.470985155195682e-06, + "loss": 0.0662, + "step": 3440 + }, + { + "epoch": 0.31, + "learning_rate": 9.46918578497526e-06, + "loss": 0.1429, + "step": 3450 + }, + { + "epoch": 0.31, + "learning_rate": 9.467386414754836e-06, + "loss": 0.1111, + "step": 3460 + }, + { + "epoch": 0.31, + "learning_rate": 9.465587044534413e-06, + "loss": 0.1807, + "step": 3470 + }, + { + "epoch": 0.31, + "learning_rate": 9.463787674313992e-06, + "loss": 0.1044, + "step": 3480 + }, + { + "epoch": 0.31, + "learning_rate": 9.461988304093568e-06, + "loss": 0.0654, + "step": 3490 + }, + { + "epoch": 0.31, + "learning_rate": 9.460188933873145e-06, + "loss": 0.0255, + "step": 3500 + }, + { + "epoch": 0.31, + "learning_rate": 9.458389563652722e-06, + "loss": 0.1917, + "step": 3510 + }, + { + "epoch": 0.31, + "learning_rate": 9.456590193432299e-06, + "loss": 0.02, + "step": 3520 + }, + { + "epoch": 0.31, + "learning_rate": 9.454790823211878e-06, + "loss": 0.0694, + "step": 3530 + }, + { + "epoch": 0.32, + "learning_rate": 9.452991452991453e-06, + "loss": 0.1769, + "step": 3540 + }, + { + "epoch": 0.32, + "learning_rate": 9.451192082771031e-06, + "loss": 0.0723, + "step": 3550 + }, + { + "epoch": 0.32, + "learning_rate": 9.449392712550608e-06, + "loss": 0.1041, + "step": 3560 + }, + { + "epoch": 0.32, + "learning_rate": 9.447593342330185e-06, + "loss": 0.0711, + "step": 3570 + }, + { + "epoch": 0.32, + "learning_rate": 9.445793972109762e-06, + "loss": 0.0932, + "step": 3580 + }, + { + "epoch": 0.32, + "learning_rate": 9.443994601889339e-06, + "loss": 0.1224, + "step": 3590 + }, + { + "epoch": 0.32, + "learning_rate": 9.442195231668917e-06, + "loss": 0.0452, + "step": 3600 + }, + { + "epoch": 0.32, + "learning_rate": 9.440395861448494e-06, + "loss": 0.0763, + "step": 3610 + }, + { + "epoch": 0.32, + "learning_rate": 9.438596491228071e-06, + "loss": 0.0422, + "step": 3620 + }, + { + "epoch": 0.32, + "learning_rate": 9.436797121007648e-06, + "loss": 0.0184, + "step": 3630 + }, + { + "epoch": 0.32, + "learning_rate": 9.434997750787225e-06, + "loss": 0.0059, + "step": 3640 + }, + { + "epoch": 0.33, + "learning_rate": 9.433198380566803e-06, + "loss": 0.1277, + "step": 3650 + }, + { + "epoch": 0.33, + "learning_rate": 9.431399010346378e-06, + "loss": 0.0845, + "step": 3660 + }, + { + "epoch": 0.33, + "learning_rate": 9.429599640125957e-06, + "loss": 0.1707, + "step": 3670 + }, + { + "epoch": 0.33, + "learning_rate": 9.427800269905534e-06, + "loss": 0.1214, + "step": 3680 + }, + { + "epoch": 0.33, + "learning_rate": 9.42600089968511e-06, + "loss": 0.0493, + "step": 3690 + }, + { + "epoch": 0.33, + "learning_rate": 9.424201529464688e-06, + "loss": 0.1305, + "step": 3700 + }, + { + "epoch": 0.33, + "learning_rate": 9.422402159244264e-06, + "loss": 0.0809, + "step": 3710 + }, + { + "epoch": 0.33, + "learning_rate": 9.420602789023843e-06, + "loss": 0.0707, + "step": 3720 + }, + { + "epoch": 0.33, + "learning_rate": 9.41880341880342e-06, + "loss": 0.1068, + "step": 3730 + }, + { + "epoch": 0.33, + "learning_rate": 9.417004048582997e-06, + "loss": 0.0152, + "step": 3740 + }, + { + "epoch": 0.33, + "learning_rate": 9.415204678362574e-06, + "loss": 0.0773, + "step": 3750 + }, + { + "epoch": 0.34, + "learning_rate": 9.41340530814215e-06, + "loss": 0.072, + "step": 3760 + }, + { + "epoch": 0.34, + "learning_rate": 9.411605937921729e-06, + "loss": 0.1304, + "step": 3770 + }, + { + "epoch": 0.34, + "learning_rate": 9.409806567701306e-06, + "loss": 0.0825, + "step": 3780 + }, + { + "epoch": 0.34, + "learning_rate": 9.408007197480883e-06, + "loss": 0.1158, + "step": 3790 + }, + { + "epoch": 0.34, + "learning_rate": 9.40620782726046e-06, + "loss": 0.2465, + "step": 3800 + }, + { + "epoch": 0.34, + "learning_rate": 9.404408457040036e-06, + "loss": 0.2057, + "step": 3810 + }, + { + "epoch": 0.34, + "learning_rate": 9.402609086819613e-06, + "loss": 0.1457, + "step": 3820 + }, + { + "epoch": 0.34, + "learning_rate": 9.40080971659919e-06, + "loss": 0.0986, + "step": 3830 + }, + { + "epoch": 0.34, + "learning_rate": 9.399010346378769e-06, + "loss": 0.1613, + "step": 3840 + }, + { + "epoch": 0.34, + "learning_rate": 9.397210976158346e-06, + "loss": 0.0951, + "step": 3850 + }, + { + "epoch": 0.34, + "learning_rate": 9.395411605937922e-06, + "loss": 0.094, + "step": 3860 + }, + { + "epoch": 0.35, + "learning_rate": 9.3936122357175e-06, + "loss": 0.0467, + "step": 3870 + }, + { + "epoch": 0.35, + "learning_rate": 9.391812865497076e-06, + "loss": 0.1046, + "step": 3880 + }, + { + "epoch": 0.35, + "learning_rate": 9.390013495276655e-06, + "loss": 0.0671, + "step": 3890 + }, + { + "epoch": 0.35, + "learning_rate": 9.388214125056232e-06, + "loss": 0.0632, + "step": 3900 + }, + { + "epoch": 0.35, + "learning_rate": 9.386414754835808e-06, + "loss": 0.1289, + "step": 3910 + }, + { + "epoch": 0.35, + "learning_rate": 9.384615384615385e-06, + "loss": 0.0962, + "step": 3920 + }, + { + "epoch": 0.35, + "learning_rate": 9.382816014394962e-06, + "loss": 0.1108, + "step": 3930 + }, + { + "epoch": 0.35, + "learning_rate": 9.381016644174539e-06, + "loss": 0.1097, + "step": 3940 + }, + { + "epoch": 0.35, + "learning_rate": 9.379217273954116e-06, + "loss": 0.0337, + "step": 3950 + }, + { + "epoch": 0.35, + "learning_rate": 9.377417903733694e-06, + "loss": 0.0564, + "step": 3960 + }, + { + "epoch": 0.35, + "learning_rate": 9.375618533513271e-06, + "loss": 0.086, + "step": 3970 + }, + { + "epoch": 0.35, + "learning_rate": 9.373819163292848e-06, + "loss": 0.0716, + "step": 3980 + }, + { + "epoch": 0.36, + "learning_rate": 9.372019793072425e-06, + "loss": 0.0274, + "step": 3990 + }, + { + "epoch": 0.36, + "learning_rate": 9.370220422852002e-06, + "loss": 0.0945, + "step": 4000 + }, + { + "epoch": 0.36, + "learning_rate": 9.36842105263158e-06, + "loss": 0.2289, + "step": 4010 + }, + { + "epoch": 0.36, + "learning_rate": 9.366621682411157e-06, + "loss": 0.1001, + "step": 4020 + }, + { + "epoch": 0.36, + "learning_rate": 9.364822312190734e-06, + "loss": 0.127, + "step": 4030 + }, + { + "epoch": 0.36, + "learning_rate": 9.363022941970311e-06, + "loss": 0.0575, + "step": 4040 + }, + { + "epoch": 0.36, + "learning_rate": 9.361223571749888e-06, + "loss": 0.14, + "step": 4050 + }, + { + "epoch": 0.36, + "learning_rate": 9.359424201529466e-06, + "loss": 0.1394, + "step": 4060 + }, + { + "epoch": 0.36, + "learning_rate": 9.357624831309042e-06, + "loss": 0.0982, + "step": 4070 + }, + { + "epoch": 0.36, + "learning_rate": 9.35582546108862e-06, + "loss": 0.137, + "step": 4080 + }, + { + "epoch": 0.36, + "learning_rate": 9.354026090868197e-06, + "loss": 0.0678, + "step": 4090 + }, + { + "epoch": 0.37, + "learning_rate": 9.352226720647774e-06, + "loss": 0.1493, + "step": 4100 + }, + { + "epoch": 0.37, + "learning_rate": 9.35042735042735e-06, + "loss": 0.0452, + "step": 4110 + }, + { + "epoch": 0.37, + "learning_rate": 9.348627980206928e-06, + "loss": 0.0618, + "step": 4120 + }, + { + "epoch": 0.37, + "learning_rate": 9.346828609986506e-06, + "loss": 0.0688, + "step": 4130 + }, + { + "epoch": 0.37, + "learning_rate": 9.345029239766083e-06, + "loss": 0.0446, + "step": 4140 + }, + { + "epoch": 0.37, + "learning_rate": 9.34322986954566e-06, + "loss": 0.1038, + "step": 4150 + }, + { + "epoch": 0.37, + "learning_rate": 9.341430499325237e-06, + "loss": 0.0517, + "step": 4160 + }, + { + "epoch": 0.37, + "learning_rate": 9.339631129104814e-06, + "loss": 0.1752, + "step": 4170 + }, + { + "epoch": 0.37, + "learning_rate": 9.337831758884392e-06, + "loss": 0.0554, + "step": 4180 + }, + { + "epoch": 0.37, + "learning_rate": 9.336032388663967e-06, + "loss": 0.0795, + "step": 4190 + }, + { + "epoch": 0.37, + "learning_rate": 9.334233018443546e-06, + "loss": 0.0729, + "step": 4200 + }, + { + "epoch": 0.38, + "learning_rate": 9.332433648223123e-06, + "loss": 0.1994, + "step": 4210 + }, + { + "epoch": 0.38, + "learning_rate": 9.3306342780027e-06, + "loss": 0.1256, + "step": 4220 + }, + { + "epoch": 0.38, + "learning_rate": 9.328834907782276e-06, + "loss": 0.0416, + "step": 4230 + }, + { + "epoch": 0.38, + "learning_rate": 9.327035537561853e-06, + "loss": 0.0455, + "step": 4240 + }, + { + "epoch": 0.38, + "learning_rate": 9.325236167341432e-06, + "loss": 0.1276, + "step": 4250 + }, + { + "epoch": 0.38, + "learning_rate": 9.323436797121009e-06, + "loss": 0.1538, + "step": 4260 + }, + { + "epoch": 0.38, + "learning_rate": 9.321637426900586e-06, + "loss": 0.072, + "step": 4270 + }, + { + "epoch": 0.38, + "learning_rate": 9.319838056680162e-06, + "loss": 0.1352, + "step": 4280 + }, + { + "epoch": 0.38, + "learning_rate": 9.31803868645974e-06, + "loss": 0.0613, + "step": 4290 + }, + { + "epoch": 0.38, + "learning_rate": 9.316239316239318e-06, + "loss": 0.1611, + "step": 4300 + }, + { + "epoch": 0.38, + "learning_rate": 9.314439946018893e-06, + "loss": 0.0851, + "step": 4310 + }, + { + "epoch": 0.39, + "learning_rate": 9.312640575798472e-06, + "loss": 0.1095, + "step": 4320 + }, + { + "epoch": 0.39, + "learning_rate": 9.310841205578048e-06, + "loss": 0.1378, + "step": 4330 + }, + { + "epoch": 0.39, + "learning_rate": 9.309041835357625e-06, + "loss": 0.0981, + "step": 4340 + }, + { + "epoch": 0.39, + "learning_rate": 9.307242465137202e-06, + "loss": 0.0396, + "step": 4350 + }, + { + "epoch": 0.39, + "learning_rate": 9.305443094916779e-06, + "loss": 0.1012, + "step": 4360 + }, + { + "epoch": 0.39, + "learning_rate": 9.303643724696358e-06, + "loss": 0.1195, + "step": 4370 + }, + { + "epoch": 0.39, + "learning_rate": 9.301844354475934e-06, + "loss": 0.0449, + "step": 4380 + }, + { + "epoch": 0.39, + "learning_rate": 9.300044984255511e-06, + "loss": 0.0778, + "step": 4390 + }, + { + "epoch": 0.39, + "learning_rate": 9.298245614035088e-06, + "loss": 0.0422, + "step": 4400 + }, + { + "epoch": 0.39, + "learning_rate": 9.296446243814665e-06, + "loss": 0.1016, + "step": 4410 + }, + { + "epoch": 0.39, + "learning_rate": 9.294646873594244e-06, + "loss": 0.1021, + "step": 4420 + }, + { + "epoch": 0.4, + "learning_rate": 9.29284750337382e-06, + "loss": 0.1569, + "step": 4430 + }, + { + "epoch": 0.4, + "learning_rate": 9.291048133153397e-06, + "loss": 0.1313, + "step": 4440 + }, + { + "epoch": 0.4, + "learning_rate": 9.289248762932974e-06, + "loss": 0.0682, + "step": 4450 + }, + { + "epoch": 0.4, + "learning_rate": 9.287449392712551e-06, + "loss": 0.1345, + "step": 4460 + }, + { + "epoch": 0.4, + "learning_rate": 9.285650022492128e-06, + "loss": 0.0751, + "step": 4470 + }, + { + "epoch": 0.4, + "learning_rate": 9.283850652271705e-06, + "loss": 0.1235, + "step": 4480 + }, + { + "epoch": 0.4, + "learning_rate": 9.282051282051283e-06, + "loss": 0.1123, + "step": 4490 + }, + { + "epoch": 0.4, + "learning_rate": 9.28025191183086e-06, + "loss": 0.0496, + "step": 4500 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.9711984841307437, + "eval_f1": 0.9508832399827659, + "eval_loss": 0.08512861281633377, + "eval_precision": 0.9297525013164823, + "eval_recall": 0.9729968037032954, + "eval_runtime": 436.7164, + "eval_samples_per_second": 72.507, + "eval_steps_per_second": 4.534, + "step": 4500 + }, + { + "epoch": 0.4, + "learning_rate": 9.278452541610437e-06, + "loss": 0.1278, + "step": 4510 + }, + { + "epoch": 0.4, + "learning_rate": 9.276653171390014e-06, + "loss": 0.0659, + "step": 4520 + }, + { + "epoch": 0.4, + "learning_rate": 9.27485380116959e-06, + "loss": 0.1262, + "step": 4530 + }, + { + "epoch": 0.4, + "learning_rate": 9.27305443094917e-06, + "loss": 0.0633, + "step": 4540 + }, + { + "epoch": 0.41, + "learning_rate": 9.271255060728746e-06, + "loss": 0.1094, + "step": 4550 + }, + { + "epoch": 0.41, + "learning_rate": 9.269455690508323e-06, + "loss": 0.1054, + "step": 4560 + }, + { + "epoch": 0.41, + "learning_rate": 9.2676563202879e-06, + "loss": 0.0375, + "step": 4570 + }, + { + "epoch": 0.41, + "learning_rate": 9.265856950067477e-06, + "loss": 0.0434, + "step": 4580 + }, + { + "epoch": 0.41, + "learning_rate": 9.264057579847054e-06, + "loss": 0.0875, + "step": 4590 + }, + { + "epoch": 0.41, + "learning_rate": 9.26225820962663e-06, + "loss": 0.0963, + "step": 4600 + }, + { + "epoch": 0.41, + "learning_rate": 9.260458839406209e-06, + "loss": 0.1243, + "step": 4610 + }, + { + "epoch": 0.41, + "learning_rate": 9.258659469185786e-06, + "loss": 0.1053, + "step": 4620 + }, + { + "epoch": 0.41, + "learning_rate": 9.256860098965363e-06, + "loss": 0.0566, + "step": 4630 + }, + { + "epoch": 0.41, + "learning_rate": 9.25506072874494e-06, + "loss": 0.0947, + "step": 4640 + }, + { + "epoch": 0.41, + "learning_rate": 9.253261358524516e-06, + "loss": 0.1326, + "step": 4650 + }, + { + "epoch": 0.42, + "learning_rate": 9.251461988304095e-06, + "loss": 0.1622, + "step": 4660 + }, + { + "epoch": 0.42, + "learning_rate": 9.249662618083672e-06, + "loss": 0.0438, + "step": 4670 + }, + { + "epoch": 0.42, + "learning_rate": 9.247863247863249e-06, + "loss": 0.0346, + "step": 4680 + }, + { + "epoch": 0.42, + "learning_rate": 9.246063877642826e-06, + "loss": 0.0647, + "step": 4690 + }, + { + "epoch": 0.42, + "learning_rate": 9.244264507422402e-06, + "loss": 0.2105, + "step": 4700 + }, + { + "epoch": 0.42, + "learning_rate": 9.242465137201981e-06, + "loss": 0.0517, + "step": 4710 + }, + { + "epoch": 0.42, + "learning_rate": 9.240665766981556e-06, + "loss": 0.0369, + "step": 4720 + }, + { + "epoch": 0.42, + "learning_rate": 9.238866396761135e-06, + "loss": 0.0792, + "step": 4730 + }, + { + "epoch": 0.42, + "learning_rate": 9.237067026540712e-06, + "loss": 0.1478, + "step": 4740 + }, + { + "epoch": 0.42, + "learning_rate": 9.235267656320288e-06, + "loss": 0.0625, + "step": 4750 + }, + { + "epoch": 0.42, + "learning_rate": 9.233468286099865e-06, + "loss": 0.1502, + "step": 4760 + }, + { + "epoch": 0.43, + "learning_rate": 9.231668915879442e-06, + "loss": 0.1159, + "step": 4770 + }, + { + "epoch": 0.43, + "learning_rate": 9.22986954565902e-06, + "loss": 0.0383, + "step": 4780 + }, + { + "epoch": 0.43, + "learning_rate": 9.228070175438598e-06, + "loss": 0.0419, + "step": 4790 + }, + { + "epoch": 0.43, + "learning_rate": 9.226270805218174e-06, + "loss": 0.0887, + "step": 4800 + }, + { + "epoch": 0.43, + "learning_rate": 9.224471434997751e-06, + "loss": 0.0085, + "step": 4810 + }, + { + "epoch": 0.43, + "learning_rate": 9.222672064777328e-06, + "loss": 0.1119, + "step": 4820 + }, + { + "epoch": 0.43, + "learning_rate": 9.220872694556907e-06, + "loss": 0.0901, + "step": 4830 + }, + { + "epoch": 0.43, + "learning_rate": 9.219073324336482e-06, + "loss": 0.1066, + "step": 4840 + }, + { + "epoch": 0.43, + "learning_rate": 9.21727395411606e-06, + "loss": 0.096, + "step": 4850 + }, + { + "epoch": 0.43, + "learning_rate": 9.215474583895637e-06, + "loss": 0.1194, + "step": 4860 + }, + { + "epoch": 0.43, + "learning_rate": 9.213675213675214e-06, + "loss": 0.0887, + "step": 4870 + }, + { + "epoch": 0.44, + "learning_rate": 9.211875843454791e-06, + "loss": 0.0658, + "step": 4880 + }, + { + "epoch": 0.44, + "learning_rate": 9.210076473234368e-06, + "loss": 0.1369, + "step": 4890 + }, + { + "epoch": 0.44, + "learning_rate": 9.208277103013946e-06, + "loss": 0.1153, + "step": 4900 + }, + { + "epoch": 0.44, + "learning_rate": 9.206477732793523e-06, + "loss": 0.0603, + "step": 4910 + }, + { + "epoch": 0.44, + "learning_rate": 9.2046783625731e-06, + "loss": 0.1061, + "step": 4920 + }, + { + "epoch": 0.44, + "learning_rate": 9.202878992352677e-06, + "loss": 0.0689, + "step": 4930 + }, + { + "epoch": 0.44, + "learning_rate": 9.201079622132254e-06, + "loss": 0.1351, + "step": 4940 + }, + { + "epoch": 0.44, + "learning_rate": 9.199280251911832e-06, + "loss": 0.1209, + "step": 4950 + }, + { + "epoch": 0.44, + "learning_rate": 9.19748088169141e-06, + "loss": 0.0371, + "step": 4960 + }, + { + "epoch": 0.44, + "learning_rate": 9.195681511470986e-06, + "loss": 0.0833, + "step": 4970 + }, + { + "epoch": 0.44, + "learning_rate": 9.193882141250563e-06, + "loss": 0.1354, + "step": 4980 + }, + { + "epoch": 0.44, + "learning_rate": 9.19208277103014e-06, + "loss": 0.2174, + "step": 4990 + }, + { + "epoch": 0.45, + "learning_rate": 9.190283400809717e-06, + "loss": 0.1419, + "step": 5000 + }, + { + "epoch": 0.45, + "learning_rate": 9.188484030589294e-06, + "loss": 0.1198, + "step": 5010 + }, + { + "epoch": 0.45, + "learning_rate": 9.186684660368872e-06, + "loss": 0.1137, + "step": 5020 + }, + { + "epoch": 0.45, + "learning_rate": 9.184885290148449e-06, + "loss": 0.094, + "step": 5030 + }, + { + "epoch": 0.45, + "learning_rate": 9.183085919928026e-06, + "loss": 0.0805, + "step": 5040 + }, + { + "epoch": 0.45, + "learning_rate": 9.181286549707603e-06, + "loss": 0.0516, + "step": 5050 + }, + { + "epoch": 0.45, + "learning_rate": 9.17948717948718e-06, + "loss": 0.0696, + "step": 5060 + }, + { + "epoch": 0.45, + "learning_rate": 9.177687809266758e-06, + "loss": 0.0769, + "step": 5070 + }, + { + "epoch": 0.45, + "learning_rate": 9.175888439046335e-06, + "loss": 0.0959, + "step": 5080 + }, + { + "epoch": 0.45, + "learning_rate": 9.174089068825912e-06, + "loss": 0.0542, + "step": 5090 + }, + { + "epoch": 0.45, + "learning_rate": 9.172289698605489e-06, + "loss": 0.0712, + "step": 5100 + }, + { + "epoch": 0.46, + "learning_rate": 9.170490328385066e-06, + "loss": 0.0537, + "step": 5110 + }, + { + "epoch": 0.46, + "learning_rate": 9.168690958164642e-06, + "loss": 0.1163, + "step": 5120 + }, + { + "epoch": 0.46, + "learning_rate": 9.16689158794422e-06, + "loss": 0.0765, + "step": 5130 + }, + { + "epoch": 0.46, + "learning_rate": 9.165092217723798e-06, + "loss": 0.2003, + "step": 5140 + }, + { + "epoch": 0.46, + "learning_rate": 9.163292847503375e-06, + "loss": 0.0108, + "step": 5150 + }, + { + "epoch": 0.46, + "learning_rate": 9.161493477282952e-06, + "loss": 0.1257, + "step": 5160 + }, + { + "epoch": 0.46, + "learning_rate": 9.159694107062528e-06, + "loss": 0.1449, + "step": 5170 + }, + { + "epoch": 0.46, + "learning_rate": 9.157894736842105e-06, + "loss": 0.053, + "step": 5180 + }, + { + "epoch": 0.46, + "learning_rate": 9.156095366621684e-06, + "loss": 0.0475, + "step": 5190 + }, + { + "epoch": 0.46, + "learning_rate": 9.15429599640126e-06, + "loss": 0.029, + "step": 5200 + }, + { + "epoch": 0.46, + "learning_rate": 9.152496626180838e-06, + "loss": 0.0036, + "step": 5210 + }, + { + "epoch": 0.47, + "learning_rate": 9.150697255960414e-06, + "loss": 0.0076, + "step": 5220 + }, + { + "epoch": 0.47, + "learning_rate": 9.148897885739991e-06, + "loss": 0.0655, + "step": 5230 + }, + { + "epoch": 0.47, + "learning_rate": 9.14709851551957e-06, + "loss": 0.0534, + "step": 5240 + }, + { + "epoch": 0.47, + "learning_rate": 9.145299145299145e-06, + "loss": 0.0378, + "step": 5250 + }, + { + "epoch": 0.47, + "learning_rate": 9.143499775078724e-06, + "loss": 0.1112, + "step": 5260 + }, + { + "epoch": 0.47, + "learning_rate": 9.1417004048583e-06, + "loss": 0.1258, + "step": 5270 + }, + { + "epoch": 0.47, + "learning_rate": 9.139901034637877e-06, + "loss": 0.1018, + "step": 5280 + }, + { + "epoch": 0.47, + "learning_rate": 9.138101664417454e-06, + "loss": 0.1015, + "step": 5290 + }, + { + "epoch": 0.47, + "learning_rate": 9.136302294197031e-06, + "loss": 0.2089, + "step": 5300 + }, + { + "epoch": 0.47, + "learning_rate": 9.13450292397661e-06, + "loss": 0.0352, + "step": 5310 + }, + { + "epoch": 0.47, + "learning_rate": 9.132703553756186e-06, + "loss": 0.1233, + "step": 5320 + }, + { + "epoch": 0.48, + "learning_rate": 9.130904183535763e-06, + "loss": 0.1092, + "step": 5330 + }, + { + "epoch": 0.48, + "learning_rate": 9.12910481331534e-06, + "loss": 0.0103, + "step": 5340 + }, + { + "epoch": 0.48, + "learning_rate": 9.127305443094917e-06, + "loss": 0.0304, + "step": 5350 + }, + { + "epoch": 0.48, + "learning_rate": 9.125506072874496e-06, + "loss": 0.1365, + "step": 5360 + }, + { + "epoch": 0.48, + "learning_rate": 9.12370670265407e-06, + "loss": 0.1332, + "step": 5370 + }, + { + "epoch": 0.48, + "learning_rate": 9.12190733243365e-06, + "loss": 0.0738, + "step": 5380 + }, + { + "epoch": 0.48, + "learning_rate": 9.120107962213226e-06, + "loss": 0.1559, + "step": 5390 + }, + { + "epoch": 0.48, + "learning_rate": 9.118308591992803e-06, + "loss": 0.139, + "step": 5400 + }, + { + "epoch": 0.48, + "learning_rate": 9.11650922177238e-06, + "loss": 0.1414, + "step": 5410 + }, + { + "epoch": 0.48, + "learning_rate": 9.114709851551957e-06, + "loss": 0.1316, + "step": 5420 + }, + { + "epoch": 0.48, + "learning_rate": 9.112910481331535e-06, + "loss": 0.1166, + "step": 5430 + }, + { + "epoch": 0.49, + "learning_rate": 9.111111111111112e-06, + "loss": 0.0909, + "step": 5440 + }, + { + "epoch": 0.49, + "learning_rate": 9.109311740890689e-06, + "loss": 0.1103, + "step": 5450 + }, + { + "epoch": 0.49, + "learning_rate": 9.107512370670266e-06, + "loss": 0.1618, + "step": 5460 + }, + { + "epoch": 0.49, + "learning_rate": 9.105713000449843e-06, + "loss": 0.0721, + "step": 5470 + }, + { + "epoch": 0.49, + "learning_rate": 9.103913630229421e-06, + "loss": 0.1584, + "step": 5480 + }, + { + "epoch": 0.49, + "learning_rate": 9.102114260008998e-06, + "loss": 0.1035, + "step": 5490 + }, + { + "epoch": 0.49, + "learning_rate": 9.100314889788575e-06, + "loss": 0.028, + "step": 5500 + }, + { + "epoch": 0.49, + "learning_rate": 9.098515519568152e-06, + "loss": 0.1161, + "step": 5510 + }, + { + "epoch": 0.49, + "learning_rate": 9.096716149347729e-06, + "loss": 0.1424, + "step": 5520 + }, + { + "epoch": 0.49, + "learning_rate": 9.094916779127306e-06, + "loss": 0.1636, + "step": 5530 + }, + { + "epoch": 0.49, + "learning_rate": 9.093117408906882e-06, + "loss": 0.1167, + "step": 5540 + }, + { + "epoch": 0.49, + "learning_rate": 9.091318038686461e-06, + "loss": 0.0831, + "step": 5550 + }, + { + "epoch": 0.5, + "learning_rate": 9.089518668466038e-06, + "loss": 0.1418, + "step": 5560 + }, + { + "epoch": 0.5, + "learning_rate": 9.087719298245615e-06, + "loss": 0.1449, + "step": 5570 + }, + { + "epoch": 0.5, + "learning_rate": 9.085919928025192e-06, + "loss": 0.1729, + "step": 5580 + }, + { + "epoch": 0.5, + "learning_rate": 9.084120557804768e-06, + "loss": 0.125, + "step": 5590 + }, + { + "epoch": 0.5, + "learning_rate": 9.082321187584347e-06, + "loss": 0.0741, + "step": 5600 + }, + { + "epoch": 0.5, + "learning_rate": 9.080521817363924e-06, + "loss": 0.0811, + "step": 5610 + }, + { + "epoch": 0.5, + "learning_rate": 9.0787224471435e-06, + "loss": 0.1593, + "step": 5620 + }, + { + "epoch": 0.5, + "learning_rate": 9.076923076923078e-06, + "loss": 0.1943, + "step": 5630 + }, + { + "epoch": 0.5, + "learning_rate": 9.075123706702654e-06, + "loss": 0.0983, + "step": 5640 + }, + { + "epoch": 0.5, + "learning_rate": 9.073324336482231e-06, + "loss": 0.1401, + "step": 5650 + }, + { + "epoch": 0.5, + "learning_rate": 9.071524966261808e-06, + "loss": 0.1739, + "step": 5660 + }, + { + "epoch": 0.51, + "learning_rate": 9.069725596041387e-06, + "loss": 0.1541, + "step": 5670 + }, + { + "epoch": 0.51, + "learning_rate": 9.067926225820964e-06, + "loss": 0.1699, + "step": 5680 + }, + { + "epoch": 0.51, + "learning_rate": 9.06612685560054e-06, + "loss": 0.0351, + "step": 5690 + }, + { + "epoch": 0.51, + "learning_rate": 9.064327485380117e-06, + "loss": 0.0941, + "step": 5700 + }, + { + "epoch": 0.51, + "learning_rate": 9.062528115159694e-06, + "loss": 0.1932, + "step": 5710 + }, + { + "epoch": 0.51, + "learning_rate": 9.060728744939273e-06, + "loss": 0.0819, + "step": 5720 + }, + { + "epoch": 0.51, + "learning_rate": 9.05892937471885e-06, + "loss": 0.2269, + "step": 5730 + }, + { + "epoch": 0.51, + "learning_rate": 9.057130004498426e-06, + "loss": 0.0967, + "step": 5740 + }, + { + "epoch": 0.51, + "learning_rate": 9.055330634278003e-06, + "loss": 0.0594, + "step": 5750 + }, + { + "epoch": 0.51, + "learning_rate": 9.05353126405758e-06, + "loss": 0.2919, + "step": 5760 + }, + { + "epoch": 0.51, + "learning_rate": 9.051731893837159e-06, + "loss": 0.0618, + "step": 5770 + }, + { + "epoch": 0.52, + "learning_rate": 9.049932523616734e-06, + "loss": 0.1166, + "step": 5780 + }, + { + "epoch": 0.52, + "learning_rate": 9.048133153396312e-06, + "loss": 0.124, + "step": 5790 + }, + { + "epoch": 0.52, + "learning_rate": 9.04633378317589e-06, + "loss": 0.1845, + "step": 5800 + }, + { + "epoch": 0.52, + "learning_rate": 9.044534412955466e-06, + "loss": 0.1357, + "step": 5810 + }, + { + "epoch": 0.52, + "learning_rate": 9.042735042735043e-06, + "loss": 0.1428, + "step": 5820 + }, + { + "epoch": 0.52, + "learning_rate": 9.04093567251462e-06, + "loss": 0.1932, + "step": 5830 + }, + { + "epoch": 0.52, + "learning_rate": 9.039136302294198e-06, + "loss": 0.0585, + "step": 5840 + }, + { + "epoch": 0.52, + "learning_rate": 9.037336932073775e-06, + "loss": 0.0983, + "step": 5850 + }, + { + "epoch": 0.52, + "learning_rate": 9.035537561853352e-06, + "loss": 0.0764, + "step": 5860 + }, + { + "epoch": 0.52, + "learning_rate": 9.033738191632929e-06, + "loss": 0.1484, + "step": 5870 + }, + { + "epoch": 0.52, + "learning_rate": 9.031938821412506e-06, + "loss": 0.1209, + "step": 5880 + }, + { + "epoch": 0.53, + "learning_rate": 9.030139451192084e-06, + "loss": 0.0421, + "step": 5890 + }, + { + "epoch": 0.53, + "learning_rate": 9.02834008097166e-06, + "loss": 0.0788, + "step": 5900 + }, + { + "epoch": 0.53, + "learning_rate": 9.026540710751238e-06, + "loss": 0.0637, + "step": 5910 + }, + { + "epoch": 0.53, + "learning_rate": 9.024741340530815e-06, + "loss": 0.0962, + "step": 5920 + }, + { + "epoch": 0.53, + "learning_rate": 9.022941970310392e-06, + "loss": 0.0681, + "step": 5930 + }, + { + "epoch": 0.53, + "learning_rate": 9.021142600089969e-06, + "loss": 0.1625, + "step": 5940 + }, + { + "epoch": 0.53, + "learning_rate": 9.019343229869546e-06, + "loss": 0.1218, + "step": 5950 + }, + { + "epoch": 0.53, + "learning_rate": 9.017543859649124e-06, + "loss": 0.0072, + "step": 5960 + }, + { + "epoch": 0.53, + "learning_rate": 9.015744489428701e-06, + "loss": 0.1611, + "step": 5970 + }, + { + "epoch": 0.53, + "learning_rate": 9.013945119208278e-06, + "loss": 0.1108, + "step": 5980 + }, + { + "epoch": 0.53, + "learning_rate": 9.012145748987855e-06, + "loss": 0.1706, + "step": 5990 + }, + { + "epoch": 0.53, + "learning_rate": 9.010346378767432e-06, + "loss": 0.0967, + "step": 6000 + }, + { + "epoch": 0.53, + "eval_accuracy": 0.9735354492341702, + "eval_f1": 0.954082191780822, + "eval_loss": 0.09180905669927597, + "eval_precision": 0.9486760379208892, + "eval_recall": 0.9595503141188141, + "eval_runtime": 436.4287, + "eval_samples_per_second": 72.555, + "eval_steps_per_second": 4.537, + "step": 6000 + }, + { + "epoch": 0.54, + "learning_rate": 9.00854700854701e-06, + "loss": 0.0714, + "step": 6010 + }, + { + "epoch": 0.54, + "learning_rate": 9.006747638326587e-06, + "loss": 0.1627, + "step": 6020 + }, + { + "epoch": 0.54, + "learning_rate": 9.004948268106164e-06, + "loss": 0.0399, + "step": 6030 + }, + { + "epoch": 0.54, + "learning_rate": 9.00314889788574e-06, + "loss": 0.0568, + "step": 6040 + }, + { + "epoch": 0.54, + "learning_rate": 9.001349527665318e-06, + "loss": 0.1471, + "step": 6050 + }, + { + "epoch": 0.54, + "learning_rate": 8.999550157444894e-06, + "loss": 0.0979, + "step": 6060 + }, + { + "epoch": 0.54, + "learning_rate": 8.997750787224471e-06, + "loss": 0.0177, + "step": 6070 + }, + { + "epoch": 0.54, + "learning_rate": 8.99595141700405e-06, + "loss": 0.1574, + "step": 6080 + }, + { + "epoch": 0.54, + "learning_rate": 8.994152046783627e-06, + "loss": 0.1683, + "step": 6090 + }, + { + "epoch": 0.54, + "learning_rate": 8.992352676563204e-06, + "loss": 0.1035, + "step": 6100 + }, + { + "epoch": 0.54, + "learning_rate": 8.99055330634278e-06, + "loss": 0.1381, + "step": 6110 + }, + { + "epoch": 0.55, + "learning_rate": 8.988753936122357e-06, + "loss": 0.1026, + "step": 6120 + }, + { + "epoch": 0.55, + "learning_rate": 8.986954565901936e-06, + "loss": 0.0454, + "step": 6130 + }, + { + "epoch": 0.55, + "learning_rate": 8.985155195681513e-06, + "loss": 0.0653, + "step": 6140 + }, + { + "epoch": 0.55, + "learning_rate": 8.98335582546109e-06, + "loss": 0.0681, + "step": 6150 + }, + { + "epoch": 0.55, + "learning_rate": 8.981556455240666e-06, + "loss": 0.1286, + "step": 6160 + }, + { + "epoch": 0.55, + "learning_rate": 8.979757085020243e-06, + "loss": 0.0823, + "step": 6170 + }, + { + "epoch": 0.55, + "learning_rate": 8.97795771479982e-06, + "loss": 0.0489, + "step": 6180 + }, + { + "epoch": 0.55, + "learning_rate": 8.976158344579397e-06, + "loss": 0.0605, + "step": 6190 + }, + { + "epoch": 0.55, + "learning_rate": 8.974358974358976e-06, + "loss": 0.054, + "step": 6200 + }, + { + "epoch": 0.55, + "learning_rate": 8.972559604138552e-06, + "loss": 0.1616, + "step": 6210 + }, + { + "epoch": 0.55, + "learning_rate": 8.97076023391813e-06, + "loss": 0.0963, + "step": 6220 + }, + { + "epoch": 0.56, + "learning_rate": 8.968960863697706e-06, + "loss": 0.1245, + "step": 6230 + }, + { + "epoch": 0.56, + "learning_rate": 8.967161493477283e-06, + "loss": 0.092, + "step": 6240 + }, + { + "epoch": 0.56, + "learning_rate": 8.965362123256862e-06, + "loss": 0.0728, + "step": 6250 + }, + { + "epoch": 0.56, + "learning_rate": 8.963562753036438e-06, + "loss": 0.1138, + "step": 6260 + }, + { + "epoch": 0.56, + "learning_rate": 8.961763382816015e-06, + "loss": 0.1143, + "step": 6270 + }, + { + "epoch": 0.56, + "learning_rate": 8.959964012595592e-06, + "loss": 0.0954, + "step": 6280 + }, + { + "epoch": 0.56, + "learning_rate": 8.958164642375169e-06, + "loss": 0.0606, + "step": 6290 + }, + { + "epoch": 0.56, + "learning_rate": 8.956365272154746e-06, + "loss": 0.0667, + "step": 6300 + }, + { + "epoch": 0.56, + "learning_rate": 8.954565901934323e-06, + "loss": 0.0505, + "step": 6310 + }, + { + "epoch": 0.56, + "learning_rate": 8.952766531713901e-06, + "loss": 0.1699, + "step": 6320 + }, + { + "epoch": 0.56, + "learning_rate": 8.950967161493478e-06, + "loss": 0.0568, + "step": 6330 + }, + { + "epoch": 0.57, + "learning_rate": 8.949167791273055e-06, + "loss": 0.1413, + "step": 6340 + }, + { + "epoch": 0.57, + "learning_rate": 8.947368421052632e-06, + "loss": 0.0903, + "step": 6350 + }, + { + "epoch": 0.57, + "learning_rate": 8.945569050832209e-06, + "loss": 0.1166, + "step": 6360 + }, + { + "epoch": 0.57, + "learning_rate": 8.943769680611787e-06, + "loss": 0.0781, + "step": 6370 + }, + { + "epoch": 0.57, + "learning_rate": 8.941970310391364e-06, + "loss": 0.0385, + "step": 6380 + }, + { + "epoch": 0.57, + "learning_rate": 8.940170940170941e-06, + "loss": 0.0508, + "step": 6390 + }, + { + "epoch": 0.57, + "learning_rate": 8.938371569950518e-06, + "loss": 0.0893, + "step": 6400 + }, + { + "epoch": 0.57, + "learning_rate": 8.936572199730095e-06, + "loss": 0.1399, + "step": 6410 + }, + { + "epoch": 0.57, + "learning_rate": 8.934772829509673e-06, + "loss": 0.0252, + "step": 6420 + }, + { + "epoch": 0.57, + "learning_rate": 8.932973459289248e-06, + "loss": 0.0843, + "step": 6430 + }, + { + "epoch": 0.57, + "learning_rate": 8.931174089068827e-06, + "loss": 0.0468, + "step": 6440 + }, + { + "epoch": 0.58, + "learning_rate": 8.929374718848404e-06, + "loss": 0.0843, + "step": 6450 + }, + { + "epoch": 0.58, + "learning_rate": 8.92757534862798e-06, + "loss": 0.0478, + "step": 6460 + }, + { + "epoch": 0.58, + "learning_rate": 8.925775978407558e-06, + "loss": 0.1855, + "step": 6470 + }, + { + "epoch": 0.58, + "learning_rate": 8.923976608187134e-06, + "loss": 0.033, + "step": 6480 + }, + { + "epoch": 0.58, + "learning_rate": 8.922177237966713e-06, + "loss": 0.1028, + "step": 6490 + }, + { + "epoch": 0.58, + "learning_rate": 8.92037786774629e-06, + "loss": 0.1134, + "step": 6500 + }, + { + "epoch": 0.58, + "learning_rate": 8.918578497525867e-06, + "loss": 0.0714, + "step": 6510 + }, + { + "epoch": 0.58, + "learning_rate": 8.916779127305444e-06, + "loss": 0.1058, + "step": 6520 + }, + { + "epoch": 0.58, + "learning_rate": 8.91497975708502e-06, + "loss": 0.0672, + "step": 6530 + }, + { + "epoch": 0.58, + "learning_rate": 8.913180386864599e-06, + "loss": 0.1081, + "step": 6540 + }, + { + "epoch": 0.58, + "learning_rate": 8.911381016644174e-06, + "loss": 0.0558, + "step": 6550 + }, + { + "epoch": 0.58, + "learning_rate": 8.909581646423753e-06, + "loss": 0.1227, + "step": 6560 + }, + { + "epoch": 0.59, + "learning_rate": 8.90778227620333e-06, + "loss": 0.1211, + "step": 6570 + }, + { + "epoch": 0.59, + "learning_rate": 8.905982905982906e-06, + "loss": 0.0534, + "step": 6580 + }, + { + "epoch": 0.59, + "learning_rate": 8.904183535762483e-06, + "loss": 0.1179, + "step": 6590 + }, + { + "epoch": 0.59, + "learning_rate": 8.90238416554206e-06, + "loss": 0.1224, + "step": 6600 + }, + { + "epoch": 0.59, + "learning_rate": 8.900584795321639e-06, + "loss": 0.0878, + "step": 6610 + }, + { + "epoch": 0.59, + "learning_rate": 8.898785425101216e-06, + "loss": 0.12, + "step": 6620 + }, + { + "epoch": 0.59, + "learning_rate": 8.896986054880792e-06, + "loss": 0.1773, + "step": 6630 + }, + { + "epoch": 0.59, + "learning_rate": 8.89518668466037e-06, + "loss": 0.0991, + "step": 6640 + }, + { + "epoch": 0.59, + "learning_rate": 8.893387314439946e-06, + "loss": 0.1262, + "step": 6650 + }, + { + "epoch": 0.59, + "learning_rate": 8.891587944219525e-06, + "loss": 0.0043, + "step": 6660 + }, + { + "epoch": 0.59, + "learning_rate": 8.889788573999102e-06, + "loss": 0.0821, + "step": 6670 + }, + { + "epoch": 0.6, + "learning_rate": 8.887989203778678e-06, + "loss": 0.212, + "step": 6680 + }, + { + "epoch": 0.6, + "learning_rate": 8.886189833558255e-06, + "loss": 0.0983, + "step": 6690 + }, + { + "epoch": 0.6, + "learning_rate": 8.884390463337832e-06, + "loss": 0.1402, + "step": 6700 + }, + { + "epoch": 0.6, + "learning_rate": 8.882591093117409e-06, + "loss": 0.159, + "step": 6710 + }, + { + "epoch": 0.6, + "learning_rate": 8.880791722896986e-06, + "loss": 0.1258, + "step": 6720 + }, + { + "epoch": 0.6, + "learning_rate": 8.878992352676564e-06, + "loss": 0.0954, + "step": 6730 + }, + { + "epoch": 0.6, + "learning_rate": 8.877192982456141e-06, + "loss": 0.0412, + "step": 6740 + }, + { + "epoch": 0.6, + "learning_rate": 8.875393612235718e-06, + "loss": 0.078, + "step": 6750 + }, + { + "epoch": 0.6, + "learning_rate": 8.873594242015295e-06, + "loss": 0.1009, + "step": 6760 + }, + { + "epoch": 0.6, + "learning_rate": 8.871794871794872e-06, + "loss": 0.099, + "step": 6770 + }, + { + "epoch": 0.6, + "learning_rate": 8.86999550157445e-06, + "loss": 0.0313, + "step": 6780 + }, + { + "epoch": 0.61, + "learning_rate": 8.868196131354027e-06, + "loss": 0.0375, + "step": 6790 + }, + { + "epoch": 0.61, + "learning_rate": 8.866396761133604e-06, + "loss": 0.0885, + "step": 6800 + }, + { + "epoch": 0.61, + "learning_rate": 8.864597390913181e-06, + "loss": 0.0761, + "step": 6810 + }, + { + "epoch": 0.61, + "learning_rate": 8.862798020692758e-06, + "loss": 0.0892, + "step": 6820 + }, + { + "epoch": 0.61, + "learning_rate": 8.860998650472335e-06, + "loss": 0.1141, + "step": 6830 + }, + { + "epoch": 0.61, + "learning_rate": 8.859199280251912e-06, + "loss": 0.0784, + "step": 6840 + }, + { + "epoch": 0.61, + "learning_rate": 8.85739991003149e-06, + "loss": 0.1206, + "step": 6850 + }, + { + "epoch": 0.61, + "learning_rate": 8.855600539811067e-06, + "loss": 0.054, + "step": 6860 + }, + { + "epoch": 0.61, + "learning_rate": 8.853801169590644e-06, + "loss": 0.2208, + "step": 6870 + }, + { + "epoch": 0.61, + "learning_rate": 8.85200179937022e-06, + "loss": 0.0689, + "step": 6880 + }, + { + "epoch": 0.61, + "learning_rate": 8.850202429149798e-06, + "loss": 0.0898, + "step": 6890 + }, + { + "epoch": 0.62, + "learning_rate": 8.848403058929376e-06, + "loss": 0.1601, + "step": 6900 + }, + { + "epoch": 0.62, + "learning_rate": 8.846603688708953e-06, + "loss": 0.0574, + "step": 6910 + }, + { + "epoch": 0.62, + "learning_rate": 8.84480431848853e-06, + "loss": 0.0564, + "step": 6920 + }, + { + "epoch": 0.62, + "learning_rate": 8.843004948268107e-06, + "loss": 0.0072, + "step": 6930 + }, + { + "epoch": 0.62, + "learning_rate": 8.841205578047684e-06, + "loss": 0.0368, + "step": 6940 + }, + { + "epoch": 0.62, + "learning_rate": 8.839406207827262e-06, + "loss": 0.1152, + "step": 6950 + }, + { + "epoch": 0.62, + "learning_rate": 8.837606837606837e-06, + "loss": 0.0871, + "step": 6960 + }, + { + "epoch": 0.62, + "learning_rate": 8.835807467386416e-06, + "loss": 0.0936, + "step": 6970 + }, + { + "epoch": 0.62, + "learning_rate": 8.834008097165993e-06, + "loss": 0.2966, + "step": 6980 + }, + { + "epoch": 0.62, + "learning_rate": 8.83220872694557e-06, + "loss": 0.1432, + "step": 6990 + }, + { + "epoch": 0.62, + "learning_rate": 8.830409356725146e-06, + "loss": 0.135, + "step": 7000 + }, + { + "epoch": 0.63, + "learning_rate": 8.828609986504723e-06, + "loss": 0.0894, + "step": 7010 + }, + { + "epoch": 0.63, + "learning_rate": 8.826810616284302e-06, + "loss": 0.1052, + "step": 7020 + }, + { + "epoch": 0.63, + "learning_rate": 8.825011246063879e-06, + "loss": 0.1165, + "step": 7030 + }, + { + "epoch": 0.63, + "learning_rate": 8.823211875843456e-06, + "loss": 0.0955, + "step": 7040 + }, + { + "epoch": 0.63, + "learning_rate": 8.821412505623032e-06, + "loss": 0.0356, + "step": 7050 + }, + { + "epoch": 0.63, + "learning_rate": 8.81961313540261e-06, + "loss": 0.143, + "step": 7060 + }, + { + "epoch": 0.63, + "learning_rate": 8.817813765182188e-06, + "loss": 0.0743, + "step": 7070 + }, + { + "epoch": 0.63, + "learning_rate": 8.816014394961763e-06, + "loss": 0.0865, + "step": 7080 + }, + { + "epoch": 0.63, + "learning_rate": 8.814215024741342e-06, + "loss": 0.1262, + "step": 7090 + }, + { + "epoch": 0.63, + "learning_rate": 8.812415654520918e-06, + "loss": 0.0891, + "step": 7100 + }, + { + "epoch": 0.63, + "learning_rate": 8.810616284300495e-06, + "loss": 0.1062, + "step": 7110 + }, + { + "epoch": 0.63, + "learning_rate": 8.808816914080072e-06, + "loss": 0.0651, + "step": 7120 + }, + { + "epoch": 0.64, + "learning_rate": 8.807017543859649e-06, + "loss": 0.1016, + "step": 7130 + }, + { + "epoch": 0.64, + "learning_rate": 8.805218173639228e-06, + "loss": 0.1007, + "step": 7140 + }, + { + "epoch": 0.64, + "learning_rate": 8.803418803418804e-06, + "loss": 0.0866, + "step": 7150 + }, + { + "epoch": 0.64, + "learning_rate": 8.801619433198381e-06, + "loss": 0.0866, + "step": 7160 + }, + { + "epoch": 0.64, + "learning_rate": 8.799820062977958e-06, + "loss": 0.0524, + "step": 7170 + }, + { + "epoch": 0.64, + "learning_rate": 8.798020692757535e-06, + "loss": 0.0092, + "step": 7180 + }, + { + "epoch": 0.64, + "learning_rate": 8.796221322537114e-06, + "loss": 0.0843, + "step": 7190 + }, + { + "epoch": 0.64, + "learning_rate": 8.79442195231669e-06, + "loss": 0.0962, + "step": 7200 + }, + { + "epoch": 0.64, + "learning_rate": 8.792622582096267e-06, + "loss": 0.0755, + "step": 7210 + }, + { + "epoch": 0.64, + "learning_rate": 8.790823211875844e-06, + "loss": 0.1832, + "step": 7220 + }, + { + "epoch": 0.64, + "learning_rate": 8.789023841655421e-06, + "loss": 0.1657, + "step": 7230 + }, + { + "epoch": 0.65, + "learning_rate": 8.787224471434998e-06, + "loss": 0.0969, + "step": 7240 + }, + { + "epoch": 0.65, + "learning_rate": 8.785425101214575e-06, + "loss": 0.1005, + "step": 7250 + }, + { + "epoch": 0.65, + "learning_rate": 8.783625730994153e-06, + "loss": 0.0896, + "step": 7260 + }, + { + "epoch": 0.65, + "learning_rate": 8.78182636077373e-06, + "loss": 0.0941, + "step": 7270 + }, + { + "epoch": 0.65, + "learning_rate": 8.780026990553307e-06, + "loss": 0.1214, + "step": 7280 + }, + { + "epoch": 0.65, + "learning_rate": 8.778227620332884e-06, + "loss": 0.1319, + "step": 7290 + }, + { + "epoch": 0.65, + "learning_rate": 8.77642825011246e-06, + "loss": 0.0806, + "step": 7300 + }, + { + "epoch": 0.65, + "learning_rate": 8.77462887989204e-06, + "loss": 0.1304, + "step": 7310 + }, + { + "epoch": 0.65, + "learning_rate": 8.772829509671616e-06, + "loss": 0.0723, + "step": 7320 + }, + { + "epoch": 0.65, + "learning_rate": 8.771030139451193e-06, + "loss": 0.176, + "step": 7330 + }, + { + "epoch": 0.65, + "learning_rate": 8.76923076923077e-06, + "loss": 0.0992, + "step": 7340 + }, + { + "epoch": 0.66, + "learning_rate": 8.767431399010347e-06, + "loss": 0.128, + "step": 7350 + }, + { + "epoch": 0.66, + "learning_rate": 8.765632028789924e-06, + "loss": 0.0585, + "step": 7360 + }, + { + "epoch": 0.66, + "learning_rate": 8.7638326585695e-06, + "loss": 0.2104, + "step": 7370 + }, + { + "epoch": 0.66, + "learning_rate": 8.762033288349079e-06, + "loss": 0.0491, + "step": 7380 + }, + { + "epoch": 0.66, + "learning_rate": 8.760233918128656e-06, + "loss": 0.1178, + "step": 7390 + }, + { + "epoch": 0.66, + "learning_rate": 8.758434547908233e-06, + "loss": 0.181, + "step": 7400 + }, + { + "epoch": 0.66, + "learning_rate": 8.75663517768781e-06, + "loss": 0.1329, + "step": 7410 + }, + { + "epoch": 0.66, + "learning_rate": 8.754835807467386e-06, + "loss": 0.1067, + "step": 7420 + }, + { + "epoch": 0.66, + "learning_rate": 8.753036437246965e-06, + "loss": 0.1344, + "step": 7430 + }, + { + "epoch": 0.66, + "learning_rate": 8.751237067026542e-06, + "loss": 0.0359, + "step": 7440 + }, + { + "epoch": 0.66, + "learning_rate": 8.749437696806119e-06, + "loss": 0.0587, + "step": 7450 + }, + { + "epoch": 0.67, + "learning_rate": 8.747638326585696e-06, + "loss": 0.0763, + "step": 7460 + }, + { + "epoch": 0.67, + "learning_rate": 8.745838956365272e-06, + "loss": 0.1267, + "step": 7470 + }, + { + "epoch": 0.67, + "learning_rate": 8.744039586144851e-06, + "loss": 0.0407, + "step": 7480 + }, + { + "epoch": 0.67, + "learning_rate": 8.742240215924426e-06, + "loss": 0.0741, + "step": 7490 + }, + { + "epoch": 0.67, + "learning_rate": 8.740440845704005e-06, + "loss": 0.0914, + "step": 7500 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.971293225959261, + "eval_f1": 0.95092057664273, + "eval_loss": 0.12410593777894974, + "eval_precision": 0.9320491109229466, + "eval_recall": 0.9705720268929792, + "eval_runtime": 436.6013, + "eval_samples_per_second": 72.526, + "eval_steps_per_second": 4.535, + "step": 7500 + }, + { + "epoch": 0.67, + "learning_rate": 8.738641475483582e-06, + "loss": 0.1633, + "step": 7510 + }, + { + "epoch": 0.67, + "learning_rate": 8.736842105263158e-06, + "loss": 0.1843, + "step": 7520 + }, + { + "epoch": 0.67, + "learning_rate": 8.735042735042735e-06, + "loss": 0.1008, + "step": 7530 + }, + { + "epoch": 0.67, + "learning_rate": 8.733243364822312e-06, + "loss": 0.0962, + "step": 7540 + }, + { + "epoch": 0.67, + "learning_rate": 8.73144399460189e-06, + "loss": 0.0271, + "step": 7550 + }, + { + "epoch": 0.67, + "learning_rate": 8.729644624381468e-06, + "loss": 0.1435, + "step": 7560 + }, + { + "epoch": 0.67, + "learning_rate": 8.727845254161044e-06, + "loss": 0.0794, + "step": 7570 + }, + { + "epoch": 0.68, + "learning_rate": 8.726045883940621e-06, + "loss": 0.1544, + "step": 7580 + }, + { + "epoch": 0.68, + "learning_rate": 8.724246513720198e-06, + "loss": 0.0104, + "step": 7590 + }, + { + "epoch": 0.68, + "learning_rate": 8.722447143499777e-06, + "loss": 0.0687, + "step": 7600 + }, + { + "epoch": 0.68, + "learning_rate": 8.720647773279352e-06, + "loss": 0.1329, + "step": 7610 + }, + { + "epoch": 0.68, + "learning_rate": 8.71884840305893e-06, + "loss": 0.0721, + "step": 7620 + }, + { + "epoch": 0.68, + "learning_rate": 8.717049032838507e-06, + "loss": 0.1454, + "step": 7630 + }, + { + "epoch": 0.68, + "learning_rate": 8.715249662618084e-06, + "loss": 0.0498, + "step": 7640 + }, + { + "epoch": 0.68, + "learning_rate": 8.713450292397661e-06, + "loss": 0.1081, + "step": 7650 + }, + { + "epoch": 0.68, + "learning_rate": 8.711650922177238e-06, + "loss": 0.072, + "step": 7660 + }, + { + "epoch": 0.68, + "learning_rate": 8.709851551956816e-06, + "loss": 0.0765, + "step": 7670 + }, + { + "epoch": 0.68, + "learning_rate": 8.708052181736393e-06, + "loss": 0.1375, + "step": 7680 + }, + { + "epoch": 0.69, + "learning_rate": 8.70625281151597e-06, + "loss": 0.1537, + "step": 7690 + }, + { + "epoch": 0.69, + "learning_rate": 8.704453441295547e-06, + "loss": 0.097, + "step": 7700 + }, + { + "epoch": 0.69, + "learning_rate": 8.702654071075124e-06, + "loss": 0.1346, + "step": 7710 + }, + { + "epoch": 0.69, + "learning_rate": 8.700854700854702e-06, + "loss": 0.1619, + "step": 7720 + }, + { + "epoch": 0.69, + "learning_rate": 8.69905533063428e-06, + "loss": 0.1027, + "step": 7730 + }, + { + "epoch": 0.69, + "learning_rate": 8.697255960413856e-06, + "loss": 0.1377, + "step": 7740 + }, + { + "epoch": 0.69, + "learning_rate": 8.695456590193433e-06, + "loss": 0.0791, + "step": 7750 + }, + { + "epoch": 0.69, + "learning_rate": 8.693657219973012e-06, + "loss": 0.0814, + "step": 7760 + }, + { + "epoch": 0.69, + "learning_rate": 8.691857849752587e-06, + "loss": 0.078, + "step": 7770 + }, + { + "epoch": 0.69, + "learning_rate": 8.690058479532164e-06, + "loss": 0.0902, + "step": 7780 + }, + { + "epoch": 0.69, + "learning_rate": 8.688259109311742e-06, + "loss": 0.144, + "step": 7790 + }, + { + "epoch": 0.7, + "learning_rate": 8.686459739091319e-06, + "loss": 0.1267, + "step": 7800 + }, + { + "epoch": 0.7, + "learning_rate": 8.684660368870896e-06, + "loss": 0.0564, + "step": 7810 + }, + { + "epoch": 0.7, + "learning_rate": 8.682860998650473e-06, + "loss": 0.0782, + "step": 7820 + }, + { + "epoch": 0.7, + "learning_rate": 8.68106162843005e-06, + "loss": 0.1353, + "step": 7830 + }, + { + "epoch": 0.7, + "learning_rate": 8.679262258209628e-06, + "loss": 0.0428, + "step": 7840 + }, + { + "epoch": 0.7, + "learning_rate": 8.677462887989205e-06, + "loss": 0.0864, + "step": 7850 + }, + { + "epoch": 0.7, + "learning_rate": 8.675663517768782e-06, + "loss": 0.0717, + "step": 7860 + }, + { + "epoch": 0.7, + "learning_rate": 8.673864147548359e-06, + "loss": 0.1192, + "step": 7870 + }, + { + "epoch": 0.7, + "learning_rate": 8.672064777327936e-06, + "loss": 0.0728, + "step": 7880 + }, + { + "epoch": 0.7, + "learning_rate": 8.670265407107512e-06, + "loss": 0.0615, + "step": 7890 + }, + { + "epoch": 0.7, + "learning_rate": 8.66846603688709e-06, + "loss": 0.0884, + "step": 7900 + }, + { + "epoch": 0.71, + "learning_rate": 8.666666666666668e-06, + "loss": 0.1488, + "step": 7910 + }, + { + "epoch": 0.71, + "learning_rate": 8.664867296446245e-06, + "loss": 0.1203, + "step": 7920 + }, + { + "epoch": 0.71, + "learning_rate": 8.663067926225822e-06, + "loss": 0.0469, + "step": 7930 + }, + { + "epoch": 0.71, + "learning_rate": 8.661268556005398e-06, + "loss": 0.0475, + "step": 7940 + }, + { + "epoch": 0.71, + "learning_rate": 8.659469185784975e-06, + "loss": 0.1008, + "step": 7950 + }, + { + "epoch": 0.71, + "learning_rate": 8.657669815564554e-06, + "loss": 0.0544, + "step": 7960 + }, + { + "epoch": 0.71, + "learning_rate": 8.65587044534413e-06, + "loss": 0.1543, + "step": 7970 + }, + { + "epoch": 0.71, + "learning_rate": 8.654071075123708e-06, + "loss": 0.1327, + "step": 7980 + }, + { + "epoch": 0.71, + "learning_rate": 8.652271704903284e-06, + "loss": 0.3129, + "step": 7990 + }, + { + "epoch": 0.71, + "learning_rate": 8.650472334682861e-06, + "loss": 0.0302, + "step": 8000 + }, + { + "epoch": 0.71, + "learning_rate": 8.64867296446244e-06, + "loss": 0.1717, + "step": 8010 + }, + { + "epoch": 0.72, + "learning_rate": 8.646873594242015e-06, + "loss": 0.0878, + "step": 8020 + }, + { + "epoch": 0.72, + "learning_rate": 8.645074224021594e-06, + "loss": 0.0869, + "step": 8030 + }, + { + "epoch": 0.72, + "learning_rate": 8.64327485380117e-06, + "loss": 0.0919, + "step": 8040 + }, + { + "epoch": 0.72, + "learning_rate": 8.641475483580747e-06, + "loss": 0.058, + "step": 8050 + }, + { + "epoch": 0.72, + "learning_rate": 8.639676113360324e-06, + "loss": 0.1208, + "step": 8060 + }, + { + "epoch": 0.72, + "learning_rate": 8.637876743139901e-06, + "loss": 0.0707, + "step": 8070 + }, + { + "epoch": 0.72, + "learning_rate": 8.63607737291948e-06, + "loss": 0.0532, + "step": 8080 + }, + { + "epoch": 0.72, + "learning_rate": 8.634278002699056e-06, + "loss": 0.0889, + "step": 8090 + }, + { + "epoch": 0.72, + "learning_rate": 8.632478632478633e-06, + "loss": 0.0517, + "step": 8100 + }, + { + "epoch": 0.72, + "learning_rate": 8.63067926225821e-06, + "loss": 0.1343, + "step": 8110 + }, + { + "epoch": 0.72, + "learning_rate": 8.628879892037787e-06, + "loss": 0.0304, + "step": 8120 + }, + { + "epoch": 0.72, + "learning_rate": 8.627080521817366e-06, + "loss": 0.1531, + "step": 8130 + }, + { + "epoch": 0.73, + "learning_rate": 8.62528115159694e-06, + "loss": 0.1384, + "step": 8140 + }, + { + "epoch": 0.73, + "learning_rate": 8.62348178137652e-06, + "loss": 0.0572, + "step": 8150 + }, + { + "epoch": 0.73, + "learning_rate": 8.621682411156096e-06, + "loss": 0.043, + "step": 8160 + }, + { + "epoch": 0.73, + "learning_rate": 8.619883040935673e-06, + "loss": 0.0598, + "step": 8170 + }, + { + "epoch": 0.73, + "learning_rate": 8.61808367071525e-06, + "loss": 0.0271, + "step": 8180 + }, + { + "epoch": 0.73, + "learning_rate": 8.616284300494827e-06, + "loss": 0.0278, + "step": 8190 + }, + { + "epoch": 0.73, + "learning_rate": 8.614484930274405e-06, + "loss": 0.0878, + "step": 8200 + }, + { + "epoch": 0.73, + "learning_rate": 8.612685560053982e-06, + "loss": 0.1291, + "step": 8210 + }, + { + "epoch": 0.73, + "learning_rate": 8.610886189833559e-06, + "loss": 0.091, + "step": 8220 + }, + { + "epoch": 0.73, + "learning_rate": 8.609086819613136e-06, + "loss": 0.053, + "step": 8230 + }, + { + "epoch": 0.73, + "learning_rate": 8.607287449392713e-06, + "loss": 0.0576, + "step": 8240 + }, + { + "epoch": 0.74, + "learning_rate": 8.605488079172291e-06, + "loss": 0.1051, + "step": 8250 + }, + { + "epoch": 0.74, + "learning_rate": 8.603688708951866e-06, + "loss": 0.0692, + "step": 8260 + }, + { + "epoch": 0.74, + "learning_rate": 8.601889338731445e-06, + "loss": 0.1337, + "step": 8270 + }, + { + "epoch": 0.74, + "learning_rate": 8.600089968511022e-06, + "loss": 0.0394, + "step": 8280 + }, + { + "epoch": 0.74, + "learning_rate": 8.598290598290599e-06, + "loss": 0.066, + "step": 8290 + }, + { + "epoch": 0.74, + "learning_rate": 8.596491228070176e-06, + "loss": 0.0414, + "step": 8300 + }, + { + "epoch": 0.74, + "learning_rate": 8.594691857849752e-06, + "loss": 0.1076, + "step": 8310 + }, + { + "epoch": 0.74, + "learning_rate": 8.592892487629331e-06, + "loss": 0.1802, + "step": 8320 + }, + { + "epoch": 0.74, + "learning_rate": 8.591093117408908e-06, + "loss": 0.0185, + "step": 8330 + }, + { + "epoch": 0.74, + "learning_rate": 8.589293747188485e-06, + "loss": 0.0604, + "step": 8340 + }, + { + "epoch": 0.74, + "learning_rate": 8.587494376968062e-06, + "loss": 0.1542, + "step": 8350 + }, + { + "epoch": 0.75, + "learning_rate": 8.585695006747638e-06, + "loss": 0.1218, + "step": 8360 + }, + { + "epoch": 0.75, + "learning_rate": 8.583895636527217e-06, + "loss": 0.1441, + "step": 8370 + }, + { + "epoch": 0.75, + "learning_rate": 8.582096266306794e-06, + "loss": 0.0921, + "step": 8380 + }, + { + "epoch": 0.75, + "learning_rate": 8.58029689608637e-06, + "loss": 0.0408, + "step": 8390 + }, + { + "epoch": 0.75, + "learning_rate": 8.578497525865948e-06, + "loss": 0.0247, + "step": 8400 + }, + { + "epoch": 0.75, + "learning_rate": 8.576698155645524e-06, + "loss": 0.0663, + "step": 8410 + }, + { + "epoch": 0.75, + "learning_rate": 8.574898785425101e-06, + "loss": 0.148, + "step": 8420 + }, + { + "epoch": 0.75, + "learning_rate": 8.573099415204678e-06, + "loss": 0.0787, + "step": 8430 + }, + { + "epoch": 0.75, + "learning_rate": 8.571300044984257e-06, + "loss": 0.0468, + "step": 8440 + }, + { + "epoch": 0.75, + "learning_rate": 8.569500674763834e-06, + "loss": 0.1468, + "step": 8450 + }, + { + "epoch": 0.75, + "learning_rate": 8.56770130454341e-06, + "loss": 0.0841, + "step": 8460 + }, + { + "epoch": 0.76, + "learning_rate": 8.565901934322987e-06, + "loss": 0.0974, + "step": 8470 + }, + { + "epoch": 0.76, + "learning_rate": 8.564102564102564e-06, + "loss": 0.0644, + "step": 8480 + }, + { + "epoch": 0.76, + "learning_rate": 8.562303193882143e-06, + "loss": 0.055, + "step": 8490 + }, + { + "epoch": 0.76, + "learning_rate": 8.56050382366172e-06, + "loss": 0.0476, + "step": 8500 + }, + { + "epoch": 0.76, + "learning_rate": 8.558704453441296e-06, + "loss": 0.2432, + "step": 8510 + }, + { + "epoch": 0.76, + "learning_rate": 8.556905083220873e-06, + "loss": 0.1153, + "step": 8520 + }, + { + "epoch": 0.76, + "learning_rate": 8.55510571300045e-06, + "loss": 0.0691, + "step": 8530 + }, + { + "epoch": 0.76, + "learning_rate": 8.553306342780027e-06, + "loss": 0.0634, + "step": 8540 + }, + { + "epoch": 0.76, + "learning_rate": 8.551506972559604e-06, + "loss": 0.1217, + "step": 8550 + }, + { + "epoch": 0.76, + "learning_rate": 8.549707602339182e-06, + "loss": 0.15, + "step": 8560 + }, + { + "epoch": 0.76, + "learning_rate": 8.54790823211876e-06, + "loss": 0.1146, + "step": 8570 + }, + { + "epoch": 0.77, + "learning_rate": 8.546108861898336e-06, + "loss": 0.0232, + "step": 8580 + }, + { + "epoch": 0.77, + "learning_rate": 8.544309491677913e-06, + "loss": 0.1652, + "step": 8590 + }, + { + "epoch": 0.77, + "learning_rate": 8.54251012145749e-06, + "loss": 0.1147, + "step": 8600 + }, + { + "epoch": 0.77, + "learning_rate": 8.540710751237068e-06, + "loss": 0.1156, + "step": 8610 + }, + { + "epoch": 0.77, + "learning_rate": 8.538911381016645e-06, + "loss": 0.0725, + "step": 8620 + }, + { + "epoch": 0.77, + "learning_rate": 8.537112010796222e-06, + "loss": 0.0058, + "step": 8630 + }, + { + "epoch": 0.77, + "learning_rate": 8.535312640575799e-06, + "loss": 0.1656, + "step": 8640 + }, + { + "epoch": 0.77, + "learning_rate": 8.533513270355376e-06, + "loss": 0.0576, + "step": 8650 + }, + { + "epoch": 0.77, + "learning_rate": 8.531713900134954e-06, + "loss": 0.1474, + "step": 8660 + }, + { + "epoch": 0.77, + "learning_rate": 8.52991452991453e-06, + "loss": 0.1511, + "step": 8670 + }, + { + "epoch": 0.77, + "learning_rate": 8.528115159694108e-06, + "loss": 0.1008, + "step": 8680 + }, + { + "epoch": 0.77, + "learning_rate": 8.526315789473685e-06, + "loss": 0.0811, + "step": 8690 + }, + { + "epoch": 0.78, + "learning_rate": 8.524516419253262e-06, + "loss": 0.0772, + "step": 8700 + }, + { + "epoch": 0.78, + "learning_rate": 8.522717049032839e-06, + "loss": 0.0955, + "step": 8710 + }, + { + "epoch": 0.78, + "learning_rate": 8.520917678812416e-06, + "loss": 0.0719, + "step": 8720 + }, + { + "epoch": 0.78, + "learning_rate": 8.519118308591994e-06, + "loss": 0.1383, + "step": 8730 + }, + { + "epoch": 0.78, + "learning_rate": 8.517318938371571e-06, + "loss": 0.0035, + "step": 8740 + }, + { + "epoch": 0.78, + "learning_rate": 8.515519568151148e-06, + "loss": 0.1452, + "step": 8750 + }, + { + "epoch": 0.78, + "learning_rate": 8.513720197930725e-06, + "loss": 0.1435, + "step": 8760 + }, + { + "epoch": 0.78, + "learning_rate": 8.511920827710302e-06, + "loss": 0.1142, + "step": 8770 + }, + { + "epoch": 0.78, + "learning_rate": 8.51012145748988e-06, + "loss": 0.1441, + "step": 8780 + }, + { + "epoch": 0.78, + "learning_rate": 8.508322087269455e-06, + "loss": 0.179, + "step": 8790 + }, + { + "epoch": 0.78, + "learning_rate": 8.506522717049034e-06, + "loss": 0.0934, + "step": 8800 + }, + { + "epoch": 0.79, + "learning_rate": 8.50472334682861e-06, + "loss": 0.0647, + "step": 8810 + }, + { + "epoch": 0.79, + "learning_rate": 8.502923976608188e-06, + "loss": 0.0749, + "step": 8820 + }, + { + "epoch": 0.79, + "learning_rate": 8.501124606387764e-06, + "loss": 0.1038, + "step": 8830 + }, + { + "epoch": 0.79, + "learning_rate": 8.499325236167341e-06, + "loss": 0.0616, + "step": 8840 + }, + { + "epoch": 0.79, + "learning_rate": 8.49752586594692e-06, + "loss": 0.1128, + "step": 8850 + }, + { + "epoch": 0.79, + "learning_rate": 8.495726495726497e-06, + "loss": 0.1401, + "step": 8860 + }, + { + "epoch": 0.79, + "learning_rate": 8.493927125506074e-06, + "loss": 0.2143, + "step": 8870 + }, + { + "epoch": 0.79, + "learning_rate": 8.49212775528565e-06, + "loss": 0.0586, + "step": 8880 + }, + { + "epoch": 0.79, + "learning_rate": 8.490328385065227e-06, + "loss": 0.0917, + "step": 8890 + }, + { + "epoch": 0.79, + "learning_rate": 8.488529014844806e-06, + "loss": 0.1244, + "step": 8900 + }, + { + "epoch": 0.79, + "learning_rate": 8.486729644624383e-06, + "loss": 0.0877, + "step": 8910 + }, + { + "epoch": 0.8, + "learning_rate": 8.48493027440396e-06, + "loss": 0.0792, + "step": 8920 + }, + { + "epoch": 0.8, + "learning_rate": 8.483130904183536e-06, + "loss": 0.1058, + "step": 8930 + }, + { + "epoch": 0.8, + "learning_rate": 8.481331533963115e-06, + "loss": 0.1133, + "step": 8940 + }, + { + "epoch": 0.8, + "learning_rate": 8.47953216374269e-06, + "loss": 0.0392, + "step": 8950 + }, + { + "epoch": 0.8, + "learning_rate": 8.477732793522267e-06, + "loss": 0.1745, + "step": 8960 + }, + { + "epoch": 0.8, + "learning_rate": 8.475933423301846e-06, + "loss": 0.1019, + "step": 8970 + }, + { + "epoch": 0.8, + "learning_rate": 8.474134053081422e-06, + "loss": 0.166, + "step": 8980 + }, + { + "epoch": 0.8, + "learning_rate": 8.472334682861e-06, + "loss": 0.1927, + "step": 8990 + }, + { + "epoch": 0.8, + "learning_rate": 8.470535312640576e-06, + "loss": 0.1251, + "step": 9000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.9701563240170535, + "eval_f1": 0.9492399419885051, + "eval_loss": 0.09671162813901901, + "eval_precision": 0.9258172673931265, + "eval_recall": 0.9738785407252287, + "eval_runtime": 437.2054, + "eval_samples_per_second": 72.426, + "eval_steps_per_second": 4.529, + "step": 9000 + } + ], + "max_steps": 56075, + "num_train_epochs": 5, + "total_flos": 7.6243123003392e+16, + "trial_name": null, + "trial_params": null +}