{ "best_metric": 0.9616444395459054, "best_model_checkpoint": "drive/MyDrive/albertina/checkpoint-3000", "epoch": 0.26749888542131073, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.0000000000000002e-07, "loss": 0.1336, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.0000000000000003e-07, "loss": 0.1124, "step": 20 }, { "epoch": 0.0, "learning_rate": 6.000000000000001e-07, "loss": 0.1444, "step": 30 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-07, "loss": 0.0761, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.1128, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.2000000000000002e-06, "loss": 0.0611, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.4000000000000001e-06, "loss": 0.1892, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.6000000000000001e-06, "loss": 0.0343, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.8000000000000001e-06, "loss": 0.0828, "step": 90 }, { "epoch": 0.01, "learning_rate": 2.0000000000000003e-06, "loss": 0.1669, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.2e-06, "loss": 0.092, "step": 110 }, { "epoch": 0.01, "learning_rate": 2.4000000000000003e-06, "loss": 0.0867, "step": 120 }, { "epoch": 0.01, "learning_rate": 2.6e-06, "loss": 0.0552, "step": 130 }, { "epoch": 0.01, "learning_rate": 2.8000000000000003e-06, "loss": 0.0261, "step": 140 }, { "epoch": 0.01, "learning_rate": 3e-06, "loss": 0.0743, "step": 150 }, { "epoch": 0.01, "learning_rate": 3.2000000000000003e-06, "loss": 0.0904, "step": 160 }, { "epoch": 0.02, "learning_rate": 3.4000000000000005e-06, "loss": 0.0438, "step": 170 }, { "epoch": 0.02, "learning_rate": 3.6000000000000003e-06, "loss": 0.0075, "step": 180 }, { "epoch": 0.02, "learning_rate": 3.8000000000000005e-06, "loss": 0.0208, "step": 190 }, { "epoch": 0.02, "learning_rate": 4.000000000000001e-06, "loss": 0.0486, "step": 200 }, { "epoch": 0.02, "learning_rate": 4.2000000000000004e-06, "loss": 0.2147, "step": 210 }, { "epoch": 0.02, "learning_rate": 4.4e-06, "loss": 0.0606, "step": 220 }, { "epoch": 0.02, "learning_rate": 4.600000000000001e-06, "loss": 0.1771, "step": 230 }, { "epoch": 0.02, "learning_rate": 4.800000000000001e-06, "loss": 0.1196, "step": 240 }, { "epoch": 0.02, "learning_rate": 5e-06, "loss": 0.1299, "step": 250 }, { "epoch": 0.02, "learning_rate": 5.2e-06, "loss": 0.0107, "step": 260 }, { "epoch": 0.02, "learning_rate": 5.400000000000001e-06, "loss": 0.1268, "step": 270 }, { "epoch": 0.02, "learning_rate": 5.600000000000001e-06, "loss": 0.1448, "step": 280 }, { "epoch": 0.03, "learning_rate": 5.8e-06, "loss": 0.0086, "step": 290 }, { "epoch": 0.03, "learning_rate": 6e-06, "loss": 0.0789, "step": 300 }, { "epoch": 0.03, "learning_rate": 6.200000000000001e-06, "loss": 0.0931, "step": 310 }, { "epoch": 0.03, "learning_rate": 6.4000000000000006e-06, "loss": 0.0277, "step": 320 }, { "epoch": 0.03, "learning_rate": 6.600000000000001e-06, "loss": 0.0705, "step": 330 }, { "epoch": 0.03, "learning_rate": 6.800000000000001e-06, "loss": 0.0323, "step": 340 }, { "epoch": 0.03, "learning_rate": 7e-06, "loss": 0.1415, "step": 350 }, { "epoch": 0.03, "learning_rate": 7.2000000000000005e-06, "loss": 0.0234, "step": 360 }, { "epoch": 0.03, "learning_rate": 7.4e-06, "loss": 0.0493, "step": 370 }, { "epoch": 0.03, "learning_rate": 7.600000000000001e-06, "loss": 0.0803, "step": 380 }, { "epoch": 0.03, "learning_rate": 7.800000000000002e-06, "loss": 0.0166, "step": 390 }, { "epoch": 0.04, "learning_rate": 8.000000000000001e-06, "loss": 0.0832, "step": 400 }, { "epoch": 0.04, "learning_rate": 8.2e-06, "loss": 0.0722, "step": 410 }, { "epoch": 0.04, "learning_rate": 8.400000000000001e-06, "loss": 0.0077, "step": 420 }, { "epoch": 0.04, "learning_rate": 8.6e-06, "loss": 0.082, "step": 430 }, { "epoch": 0.04, "learning_rate": 8.8e-06, "loss": 0.0458, "step": 440 }, { "epoch": 0.04, "learning_rate": 9e-06, "loss": 0.0319, "step": 450 }, { "epoch": 0.04, "learning_rate": 9.200000000000002e-06, "loss": 0.096, "step": 460 }, { "epoch": 0.04, "learning_rate": 9.4e-06, "loss": 0.0713, "step": 470 }, { "epoch": 0.04, "learning_rate": 9.600000000000001e-06, "loss": 0.1074, "step": 480 }, { "epoch": 0.04, "learning_rate": 9.800000000000001e-06, "loss": 0.0695, "step": 490 }, { "epoch": 0.04, "learning_rate": 1e-05, "loss": 0.0408, "step": 500 }, { "epoch": 0.05, "learning_rate": 9.998200629779578e-06, "loss": 0.0412, "step": 510 }, { "epoch": 0.05, "learning_rate": 9.996401259559155e-06, "loss": 0.092, "step": 520 }, { "epoch": 0.05, "learning_rate": 9.994601889338731e-06, "loss": 0.0777, "step": 530 }, { "epoch": 0.05, "learning_rate": 9.99280251911831e-06, "loss": 0.0442, "step": 540 }, { "epoch": 0.05, "learning_rate": 9.991003148897887e-06, "loss": 0.1633, "step": 550 }, { "epoch": 0.05, "learning_rate": 9.989203778677464e-06, "loss": 0.0985, "step": 560 }, { "epoch": 0.05, "learning_rate": 9.98740440845704e-06, "loss": 0.0819, "step": 570 }, { "epoch": 0.05, "learning_rate": 9.985605038236617e-06, "loss": 0.1122, "step": 580 }, { "epoch": 0.05, "learning_rate": 9.983805668016196e-06, "loss": 0.0936, "step": 590 }, { "epoch": 0.05, "learning_rate": 9.982006297795773e-06, "loss": 0.0693, "step": 600 }, { "epoch": 0.05, "learning_rate": 9.98020692757535e-06, "loss": 0.0854, "step": 610 }, { "epoch": 0.06, "learning_rate": 9.978407557354927e-06, "loss": 0.137, "step": 620 }, { "epoch": 0.06, "learning_rate": 9.976608187134503e-06, "loss": 0.0019, "step": 630 }, { "epoch": 0.06, "learning_rate": 9.97480881691408e-06, "loss": 0.1362, "step": 640 }, { "epoch": 0.06, "learning_rate": 9.973009446693657e-06, "loss": 0.0923, "step": 650 }, { "epoch": 0.06, "learning_rate": 9.971210076473236e-06, "loss": 0.0557, "step": 660 }, { "epoch": 0.06, "learning_rate": 9.969410706252813e-06, "loss": 0.0505, "step": 670 }, { "epoch": 0.06, "learning_rate": 9.96761133603239e-06, "loss": 0.0414, "step": 680 }, { "epoch": 0.06, "learning_rate": 9.965811965811966e-06, "loss": 0.074, "step": 690 }, { "epoch": 0.06, "learning_rate": 9.964012595591543e-06, "loss": 0.1012, "step": 700 }, { "epoch": 0.06, "learning_rate": 9.962213225371122e-06, "loss": 0.0792, "step": 710 }, { "epoch": 0.06, "learning_rate": 9.960413855150699e-06, "loss": 0.0928, "step": 720 }, { "epoch": 0.07, "learning_rate": 9.958614484930275e-06, "loss": 0.0312, "step": 730 }, { "epoch": 0.07, "learning_rate": 9.956815114709852e-06, "loss": 0.0529, "step": 740 }, { "epoch": 0.07, "learning_rate": 9.955015744489429e-06, "loss": 0.0793, "step": 750 }, { "epoch": 0.07, "learning_rate": 9.953216374269008e-06, "loss": 0.0597, "step": 760 }, { "epoch": 0.07, "learning_rate": 9.951417004048583e-06, "loss": 0.1541, "step": 770 }, { "epoch": 0.07, "learning_rate": 9.949617633828161e-06, "loss": 0.087, "step": 780 }, { "epoch": 0.07, "learning_rate": 9.947818263607738e-06, "loss": 0.034, "step": 790 }, { "epoch": 0.07, "learning_rate": 9.946018893387315e-06, "loss": 0.0948, "step": 800 }, { "epoch": 0.07, "learning_rate": 9.944219523166892e-06, "loss": 0.0078, "step": 810 }, { "epoch": 0.07, "learning_rate": 9.942420152946469e-06, "loss": 0.0797, "step": 820 }, { "epoch": 0.07, "learning_rate": 9.940620782726047e-06, "loss": 0.0859, "step": 830 }, { "epoch": 0.07, "learning_rate": 9.938821412505624e-06, "loss": 0.1256, "step": 840 }, { "epoch": 0.08, "learning_rate": 9.937022042285201e-06, "loss": 0.0775, "step": 850 }, { "epoch": 0.08, "learning_rate": 9.935222672064778e-06, "loss": 0.0539, "step": 860 }, { "epoch": 0.08, "learning_rate": 9.933423301844355e-06, "loss": 0.0795, "step": 870 }, { "epoch": 0.08, "learning_rate": 9.931623931623933e-06, "loss": 0.0099, "step": 880 }, { "epoch": 0.08, "learning_rate": 9.929824561403509e-06, "loss": 0.0253, "step": 890 }, { "epoch": 0.08, "learning_rate": 9.928025191183087e-06, "loss": 0.0442, "step": 900 }, { "epoch": 0.08, "learning_rate": 9.926225820962664e-06, "loss": 0.1785, "step": 910 }, { "epoch": 0.08, "learning_rate": 9.92442645074224e-06, "loss": 0.1241, "step": 920 }, { "epoch": 0.08, "learning_rate": 9.922627080521818e-06, "loss": 0.1332, "step": 930 }, { "epoch": 0.08, "learning_rate": 9.920827710301395e-06, "loss": 0.0407, "step": 940 }, { "epoch": 0.08, "learning_rate": 9.919028340080973e-06, "loss": 0.0546, "step": 950 }, { "epoch": 0.09, "learning_rate": 9.91722896986055e-06, "loss": 0.1157, "step": 960 }, { "epoch": 0.09, "learning_rate": 9.915429599640127e-06, "loss": 0.0697, "step": 970 }, { "epoch": 0.09, "learning_rate": 9.913630229419704e-06, "loss": 0.1253, "step": 980 }, { "epoch": 0.09, "learning_rate": 9.91183085919928e-06, "loss": 0.0205, "step": 990 }, { "epoch": 0.09, "learning_rate": 9.910031488978859e-06, "loss": 0.1289, "step": 1000 }, { "epoch": 0.09, "learning_rate": 9.908232118758436e-06, "loss": 0.0956, "step": 1010 }, { "epoch": 0.09, "learning_rate": 9.906432748538013e-06, "loss": 0.0972, "step": 1020 }, { "epoch": 0.09, "learning_rate": 9.90463337831759e-06, "loss": 0.0199, "step": 1030 }, { "epoch": 0.09, "learning_rate": 9.902834008097167e-06, "loss": 0.0263, "step": 1040 }, { "epoch": 0.09, "learning_rate": 9.901034637876743e-06, "loss": 0.0388, "step": 1050 }, { "epoch": 0.09, "learning_rate": 9.89923526765632e-06, "loss": 0.119, "step": 1060 }, { "epoch": 0.1, "learning_rate": 9.897435897435899e-06, "loss": 0.032, "step": 1070 }, { "epoch": 0.1, "learning_rate": 9.895636527215476e-06, "loss": 0.0877, "step": 1080 }, { "epoch": 0.1, "learning_rate": 9.893837156995053e-06, "loss": 0.0679, "step": 1090 }, { "epoch": 0.1, "learning_rate": 9.89203778677463e-06, "loss": 0.0273, "step": 1100 }, { "epoch": 0.1, "learning_rate": 9.890238416554206e-06, "loss": 0.048, "step": 1110 }, { "epoch": 0.1, "learning_rate": 9.888439046333785e-06, "loss": 0.0054, "step": 1120 }, { "epoch": 0.1, "learning_rate": 9.886639676113362e-06, "loss": 0.0812, "step": 1130 }, { "epoch": 0.1, "learning_rate": 9.884840305892939e-06, "loss": 0.0903, "step": 1140 }, { "epoch": 0.1, "learning_rate": 9.883040935672515e-06, "loss": 0.0092, "step": 1150 }, { "epoch": 0.1, "learning_rate": 9.881241565452092e-06, "loss": 0.0013, "step": 1160 }, { "epoch": 0.1, "learning_rate": 9.879442195231669e-06, "loss": 0.165, "step": 1170 }, { "epoch": 0.11, "learning_rate": 9.877642825011246e-06, "loss": 0.0509, "step": 1180 }, { "epoch": 0.11, "learning_rate": 9.875843454790825e-06, "loss": 0.0653, "step": 1190 }, { "epoch": 0.11, "learning_rate": 9.874044084570401e-06, "loss": 0.0253, "step": 1200 }, { "epoch": 0.11, "learning_rate": 9.872244714349978e-06, "loss": 0.0375, "step": 1210 }, { "epoch": 0.11, "learning_rate": 9.870445344129555e-06, "loss": 0.0689, "step": 1220 }, { "epoch": 0.11, "learning_rate": 9.868645973909132e-06, "loss": 0.0674, "step": 1230 }, { "epoch": 0.11, "learning_rate": 9.86684660368871e-06, "loss": 0.1405, "step": 1240 }, { "epoch": 0.11, "learning_rate": 9.865047233468287e-06, "loss": 0.0604, "step": 1250 }, { "epoch": 0.11, "learning_rate": 9.863247863247864e-06, "loss": 0.0388, "step": 1260 }, { "epoch": 0.11, "learning_rate": 9.861448493027441e-06, "loss": 0.0627, "step": 1270 }, { "epoch": 0.11, "learning_rate": 9.859649122807018e-06, "loss": 0.0909, "step": 1280 }, { "epoch": 0.12, "learning_rate": 9.857849752586597e-06, "loss": 0.0419, "step": 1290 }, { "epoch": 0.12, "learning_rate": 9.856050382366172e-06, "loss": 0.0019, "step": 1300 }, { "epoch": 0.12, "learning_rate": 9.85425101214575e-06, "loss": 0.1776, "step": 1310 }, { "epoch": 0.12, "learning_rate": 9.852451641925327e-06, "loss": 0.003, "step": 1320 }, { "epoch": 0.12, "learning_rate": 9.850652271704904e-06, "loss": 0.0764, "step": 1330 }, { "epoch": 0.12, "learning_rate": 9.84885290148448e-06, "loss": 0.0753, "step": 1340 }, { "epoch": 0.12, "learning_rate": 9.847053531264058e-06, "loss": 0.0831, "step": 1350 }, { "epoch": 0.12, "learning_rate": 9.845254161043636e-06, "loss": 0.1177, "step": 1360 }, { "epoch": 0.12, "learning_rate": 9.843454790823213e-06, "loss": 0.1527, "step": 1370 }, { "epoch": 0.12, "learning_rate": 9.84165542060279e-06, "loss": 0.1304, "step": 1380 }, { "epoch": 0.12, "learning_rate": 9.839856050382367e-06, "loss": 0.0945, "step": 1390 }, { "epoch": 0.12, "learning_rate": 9.838056680161944e-06, "loss": 0.0516, "step": 1400 }, { "epoch": 0.13, "learning_rate": 9.836257309941522e-06, "loss": 0.0216, "step": 1410 }, { "epoch": 0.13, "learning_rate": 9.834457939721097e-06, "loss": 0.1259, "step": 1420 }, { "epoch": 0.13, "learning_rate": 9.832658569500676e-06, "loss": 0.0558, "step": 1430 }, { "epoch": 0.13, "learning_rate": 9.830859199280253e-06, "loss": 0.0711, "step": 1440 }, { "epoch": 0.13, "learning_rate": 9.82905982905983e-06, "loss": 0.1041, "step": 1450 }, { "epoch": 0.13, "learning_rate": 9.827260458839407e-06, "loss": 0.1173, "step": 1460 }, { "epoch": 0.13, "learning_rate": 9.825461088618983e-06, "loss": 0.1194, "step": 1470 }, { "epoch": 0.13, "learning_rate": 9.823661718398562e-06, "loss": 0.1103, "step": 1480 }, { "epoch": 0.13, "learning_rate": 9.821862348178139e-06, "loss": 0.0481, "step": 1490 }, { "epoch": 0.13, "learning_rate": 9.820062977957716e-06, "loss": 0.0413, "step": 1500 }, { "epoch": 0.13, "eval_accuracy": 0.9769145744512869, "eval_f1": 0.9595305320267951, "eval_loss": 0.07930105179548264, "eval_precision": 0.9639599555061179, "eval_recall": 0.955141629009148, "eval_runtime": 436.6474, "eval_samples_per_second": 72.518, "eval_steps_per_second": 4.535, "step": 1500 }, { "epoch": 0.13, "learning_rate": 9.818263607737293e-06, "loss": 0.0825, "step": 1510 }, { "epoch": 0.14, "learning_rate": 9.81646423751687e-06, "loss": 0.0769, "step": 1520 }, { "epoch": 0.14, "learning_rate": 9.814664867296448e-06, "loss": 0.0901, "step": 1530 }, { "epoch": 0.14, "learning_rate": 9.812865497076025e-06, "loss": 0.0237, "step": 1540 }, { "epoch": 0.14, "learning_rate": 9.811066126855602e-06, "loss": 0.1067, "step": 1550 }, { "epoch": 0.14, "learning_rate": 9.809266756635179e-06, "loss": 0.0536, "step": 1560 }, { "epoch": 0.14, "learning_rate": 9.807467386414755e-06, "loss": 0.0372, "step": 1570 }, { "epoch": 0.14, "learning_rate": 9.805668016194332e-06, "loss": 0.075, "step": 1580 }, { "epoch": 0.14, "learning_rate": 9.803868645973909e-06, "loss": 0.0022, "step": 1590 }, { "epoch": 0.14, "learning_rate": 9.802069275753488e-06, "loss": 0.1077, "step": 1600 }, { "epoch": 0.14, "learning_rate": 9.800269905533065e-06, "loss": 0.019, "step": 1610 }, { "epoch": 0.14, "learning_rate": 9.798470535312641e-06, "loss": 0.0456, "step": 1620 }, { "epoch": 0.15, "learning_rate": 9.796671165092218e-06, "loss": 0.0074, "step": 1630 }, { "epoch": 0.15, "learning_rate": 9.794871794871795e-06, "loss": 0.0944, "step": 1640 }, { "epoch": 0.15, "learning_rate": 9.793072424651374e-06, "loss": 0.0055, "step": 1650 }, { "epoch": 0.15, "learning_rate": 9.79127305443095e-06, "loss": 0.0979, "step": 1660 }, { "epoch": 0.15, "learning_rate": 9.789473684210527e-06, "loss": 0.2072, "step": 1670 }, { "epoch": 0.15, "learning_rate": 9.787674313990104e-06, "loss": 0.1238, "step": 1680 }, { "epoch": 0.15, "learning_rate": 9.785874943769681e-06, "loss": 0.0366, "step": 1690 }, { "epoch": 0.15, "learning_rate": 9.784075573549258e-06, "loss": 0.0754, "step": 1700 }, { "epoch": 0.15, "learning_rate": 9.782276203328835e-06, "loss": 0.042, "step": 1710 }, { "epoch": 0.15, "learning_rate": 9.780476833108413e-06, "loss": 0.0614, "step": 1720 }, { "epoch": 0.15, "learning_rate": 9.77867746288799e-06, "loss": 0.0572, "step": 1730 }, { "epoch": 0.16, "learning_rate": 9.776878092667567e-06, "loss": 0.0323, "step": 1740 }, { "epoch": 0.16, "learning_rate": 9.775078722447144e-06, "loss": 0.058, "step": 1750 }, { "epoch": 0.16, "learning_rate": 9.77327935222672e-06, "loss": 0.0715, "step": 1760 }, { "epoch": 0.16, "learning_rate": 9.7714799820063e-06, "loss": 0.0783, "step": 1770 }, { "epoch": 0.16, "learning_rate": 9.769680611785876e-06, "loss": 0.0982, "step": 1780 }, { "epoch": 0.16, "learning_rate": 9.767881241565453e-06, "loss": 0.0858, "step": 1790 }, { "epoch": 0.16, "learning_rate": 9.76608187134503e-06, "loss": 0.0535, "step": 1800 }, { "epoch": 0.16, "learning_rate": 9.764282501124607e-06, "loss": 0.0447, "step": 1810 }, { "epoch": 0.16, "learning_rate": 9.762483130904185e-06, "loss": 0.0819, "step": 1820 }, { "epoch": 0.16, "learning_rate": 9.76068376068376e-06, "loss": 0.0565, "step": 1830 }, { "epoch": 0.16, "learning_rate": 9.758884390463339e-06, "loss": 0.0829, "step": 1840 }, { "epoch": 0.16, "learning_rate": 9.757085020242916e-06, "loss": 0.1505, "step": 1850 }, { "epoch": 0.17, "learning_rate": 9.755285650022493e-06, "loss": 0.1586, "step": 1860 }, { "epoch": 0.17, "learning_rate": 9.75348627980207e-06, "loss": 0.0098, "step": 1870 }, { "epoch": 0.17, "learning_rate": 9.751686909581647e-06, "loss": 0.1513, "step": 1880 }, { "epoch": 0.17, "learning_rate": 9.749887539361225e-06, "loss": 0.136, "step": 1890 }, { "epoch": 0.17, "learning_rate": 9.748088169140802e-06, "loss": 0.1072, "step": 1900 }, { "epoch": 0.17, "learning_rate": 9.746288798920379e-06, "loss": 0.0829, "step": 1910 }, { "epoch": 0.17, "learning_rate": 9.744489428699956e-06, "loss": 0.0647, "step": 1920 }, { "epoch": 0.17, "learning_rate": 9.742690058479533e-06, "loss": 0.035, "step": 1930 }, { "epoch": 0.17, "learning_rate": 9.740890688259111e-06, "loss": 0.077, "step": 1940 }, { "epoch": 0.17, "learning_rate": 9.739091318038686e-06, "loss": 0.0325, "step": 1950 }, { "epoch": 0.17, "learning_rate": 9.737291947818265e-06, "loss": 0.0155, "step": 1960 }, { "epoch": 0.18, "learning_rate": 9.735492577597842e-06, "loss": 0.041, "step": 1970 }, { "epoch": 0.18, "learning_rate": 9.733693207377419e-06, "loss": 0.1311, "step": 1980 }, { "epoch": 0.18, "learning_rate": 9.731893837156995e-06, "loss": 0.1448, "step": 1990 }, { "epoch": 0.18, "learning_rate": 9.730094466936572e-06, "loss": 0.1902, "step": 2000 }, { "epoch": 0.18, "learning_rate": 9.72829509671615e-06, "loss": 0.1521, "step": 2010 }, { "epoch": 0.18, "learning_rate": 9.726495726495728e-06, "loss": 0.2189, "step": 2020 }, { "epoch": 0.18, "learning_rate": 9.724696356275305e-06, "loss": 0.0697, "step": 2030 }, { "epoch": 0.18, "learning_rate": 9.722896986054881e-06, "loss": 0.0175, "step": 2040 }, { "epoch": 0.18, "learning_rate": 9.721097615834458e-06, "loss": 0.0511, "step": 2050 }, { "epoch": 0.18, "learning_rate": 9.719298245614037e-06, "loss": 0.1222, "step": 2060 }, { "epoch": 0.18, "learning_rate": 9.717498875393614e-06, "loss": 0.0637, "step": 2070 }, { "epoch": 0.19, "learning_rate": 9.71569950517319e-06, "loss": 0.0311, "step": 2080 }, { "epoch": 0.19, "learning_rate": 9.713900134952767e-06, "loss": 0.0493, "step": 2090 }, { "epoch": 0.19, "learning_rate": 9.712100764732344e-06, "loss": 0.0671, "step": 2100 }, { "epoch": 0.19, "learning_rate": 9.710301394511921e-06, "loss": 0.0948, "step": 2110 }, { "epoch": 0.19, "learning_rate": 9.708502024291498e-06, "loss": 0.0475, "step": 2120 }, { "epoch": 0.19, "learning_rate": 9.706702654071076e-06, "loss": 0.042, "step": 2130 }, { "epoch": 0.19, "learning_rate": 9.704903283850653e-06, "loss": 0.0755, "step": 2140 }, { "epoch": 0.19, "learning_rate": 9.70310391363023e-06, "loss": 0.0164, "step": 2150 }, { "epoch": 0.19, "learning_rate": 9.701304543409807e-06, "loss": 0.0568, "step": 2160 }, { "epoch": 0.19, "learning_rate": 9.699505173189384e-06, "loss": 0.0074, "step": 2170 }, { "epoch": 0.19, "learning_rate": 9.697705802968962e-06, "loss": 0.2288, "step": 2180 }, { "epoch": 0.2, "learning_rate": 9.69590643274854e-06, "loss": 0.1608, "step": 2190 }, { "epoch": 0.2, "learning_rate": 9.694107062528116e-06, "loss": 0.0774, "step": 2200 }, { "epoch": 0.2, "learning_rate": 9.692307692307693e-06, "loss": 0.1041, "step": 2210 }, { "epoch": 0.2, "learning_rate": 9.69050832208727e-06, "loss": 0.0561, "step": 2220 }, { "epoch": 0.2, "learning_rate": 9.688708951866847e-06, "loss": 0.1579, "step": 2230 }, { "epoch": 0.2, "learning_rate": 9.686909581646424e-06, "loss": 0.0396, "step": 2240 }, { "epoch": 0.2, "learning_rate": 9.685110211426002e-06, "loss": 0.0969, "step": 2250 }, { "epoch": 0.2, "learning_rate": 9.683310841205579e-06, "loss": 0.0862, "step": 2260 }, { "epoch": 0.2, "learning_rate": 9.681511470985156e-06, "loss": 0.1806, "step": 2270 }, { "epoch": 0.2, "learning_rate": 9.679712100764733e-06, "loss": 0.0316, "step": 2280 }, { "epoch": 0.2, "learning_rate": 9.67791273054431e-06, "loss": 0.059, "step": 2290 }, { "epoch": 0.21, "learning_rate": 9.676113360323888e-06, "loss": 0.0042, "step": 2300 }, { "epoch": 0.21, "learning_rate": 9.674313990103465e-06, "loss": 0.0331, "step": 2310 }, { "epoch": 0.21, "learning_rate": 9.672514619883042e-06, "loss": 0.0941, "step": 2320 }, { "epoch": 0.21, "learning_rate": 9.670715249662619e-06, "loss": 0.0765, "step": 2330 }, { "epoch": 0.21, "learning_rate": 9.668915879442196e-06, "loss": 0.1153, "step": 2340 }, { "epoch": 0.21, "learning_rate": 9.667116509221774e-06, "loss": 0.208, "step": 2350 }, { "epoch": 0.21, "learning_rate": 9.66531713900135e-06, "loss": 0.1071, "step": 2360 }, { "epoch": 0.21, "learning_rate": 9.663517768780928e-06, "loss": 0.0316, "step": 2370 }, { "epoch": 0.21, "learning_rate": 9.661718398560505e-06, "loss": 0.0204, "step": 2380 }, { "epoch": 0.21, "learning_rate": 9.659919028340082e-06, "loss": 0.0503, "step": 2390 }, { "epoch": 0.21, "learning_rate": 9.658119658119659e-06, "loss": 0.1018, "step": 2400 }, { "epoch": 0.21, "learning_rate": 9.656320287899235e-06, "loss": 0.0993, "step": 2410 }, { "epoch": 0.22, "learning_rate": 9.654520917678814e-06, "loss": 0.0304, "step": 2420 }, { "epoch": 0.22, "learning_rate": 9.65272154745839e-06, "loss": 0.0567, "step": 2430 }, { "epoch": 0.22, "learning_rate": 9.650922177237968e-06, "loss": 0.0308, "step": 2440 }, { "epoch": 0.22, "learning_rate": 9.649122807017545e-06, "loss": 0.0647, "step": 2450 }, { "epoch": 0.22, "learning_rate": 9.647323436797121e-06, "loss": 0.1115, "step": 2460 }, { "epoch": 0.22, "learning_rate": 9.6455240665767e-06, "loss": 0.0392, "step": 2470 }, { "epoch": 0.22, "learning_rate": 9.643724696356275e-06, "loss": 0.1018, "step": 2480 }, { "epoch": 0.22, "learning_rate": 9.641925326135854e-06, "loss": 0.0271, "step": 2490 }, { "epoch": 0.22, "learning_rate": 9.64012595591543e-06, "loss": 0.1548, "step": 2500 }, { "epoch": 0.22, "learning_rate": 9.638326585695007e-06, "loss": 0.0135, "step": 2510 }, { "epoch": 0.22, "learning_rate": 9.636527215474584e-06, "loss": 0.0794, "step": 2520 }, { "epoch": 0.23, "learning_rate": 9.634727845254161e-06, "loss": 0.0612, "step": 2530 }, { "epoch": 0.23, "learning_rate": 9.63292847503374e-06, "loss": 0.0375, "step": 2540 }, { "epoch": 0.23, "learning_rate": 9.631129104813316e-06, "loss": 0.0702, "step": 2550 }, { "epoch": 0.23, "learning_rate": 9.629329734592893e-06, "loss": 0.0353, "step": 2560 }, { "epoch": 0.23, "learning_rate": 9.62753036437247e-06, "loss": 0.0771, "step": 2570 }, { "epoch": 0.23, "learning_rate": 9.625730994152047e-06, "loss": 0.0078, "step": 2580 }, { "epoch": 0.23, "learning_rate": 9.623931623931626e-06, "loss": 0.0934, "step": 2590 }, { "epoch": 0.23, "learning_rate": 9.6221322537112e-06, "loss": 0.0922, "step": 2600 }, { "epoch": 0.23, "learning_rate": 9.62033288349078e-06, "loss": 0.0676, "step": 2610 }, { "epoch": 0.23, "learning_rate": 9.618533513270356e-06, "loss": 0.1065, "step": 2620 }, { "epoch": 0.23, "learning_rate": 9.616734143049933e-06, "loss": 0.0321, "step": 2630 }, { "epoch": 0.24, "learning_rate": 9.61493477282951e-06, "loss": 0.0563, "step": 2640 }, { "epoch": 0.24, "learning_rate": 9.613135402609087e-06, "loss": 0.1313, "step": 2650 }, { "epoch": 0.24, "learning_rate": 9.611336032388665e-06, "loss": 0.0676, "step": 2660 }, { "epoch": 0.24, "learning_rate": 9.609536662168242e-06, "loss": 0.0306, "step": 2670 }, { "epoch": 0.24, "learning_rate": 9.607737291947819e-06, "loss": 0.0298, "step": 2680 }, { "epoch": 0.24, "learning_rate": 9.605937921727396e-06, "loss": 0.0494, "step": 2690 }, { "epoch": 0.24, "learning_rate": 9.604138551506973e-06, "loss": 0.024, "step": 2700 }, { "epoch": 0.24, "learning_rate": 9.602339181286551e-06, "loss": 0.0462, "step": 2710 }, { "epoch": 0.24, "learning_rate": 9.600539811066128e-06, "loss": 0.0931, "step": 2720 }, { "epoch": 0.24, "learning_rate": 9.598740440845705e-06, "loss": 0.1424, "step": 2730 }, { "epoch": 0.24, "learning_rate": 9.596941070625282e-06, "loss": 0.0485, "step": 2740 }, { "epoch": 0.25, "learning_rate": 9.595141700404859e-06, "loss": 0.1279, "step": 2750 }, { "epoch": 0.25, "learning_rate": 9.593342330184436e-06, "loss": 0.0226, "step": 2760 }, { "epoch": 0.25, "learning_rate": 9.591542959964013e-06, "loss": 0.0528, "step": 2770 }, { "epoch": 0.25, "learning_rate": 9.589743589743591e-06, "loss": 0.0527, "step": 2780 }, { "epoch": 0.25, "learning_rate": 9.587944219523168e-06, "loss": 0.0817, "step": 2790 }, { "epoch": 0.25, "learning_rate": 9.586144849302745e-06, "loss": 0.0079, "step": 2800 }, { "epoch": 0.25, "learning_rate": 9.584345479082322e-06, "loss": 0.1174, "step": 2810 }, { "epoch": 0.25, "learning_rate": 9.582546108861898e-06, "loss": 0.0392, "step": 2820 }, { "epoch": 0.25, "learning_rate": 9.580746738641477e-06, "loss": 0.0268, "step": 2830 }, { "epoch": 0.25, "learning_rate": 9.578947368421054e-06, "loss": 0.0946, "step": 2840 }, { "epoch": 0.25, "learning_rate": 9.57714799820063e-06, "loss": 0.0524, "step": 2850 }, { "epoch": 0.26, "learning_rate": 9.575348627980208e-06, "loss": 0.0382, "step": 2860 }, { "epoch": 0.26, "learning_rate": 9.573549257759784e-06, "loss": 0.0014, "step": 2870 }, { "epoch": 0.26, "learning_rate": 9.571749887539361e-06, "loss": 0.0695, "step": 2880 }, { "epoch": 0.26, "learning_rate": 9.569950517318938e-06, "loss": 0.0376, "step": 2890 }, { "epoch": 0.26, "learning_rate": 9.568151147098517e-06, "loss": 0.04, "step": 2900 }, { "epoch": 0.26, "learning_rate": 9.566351776878094e-06, "loss": 0.0388, "step": 2910 }, { "epoch": 0.26, "learning_rate": 9.56455240665767e-06, "loss": 0.0649, "step": 2920 }, { "epoch": 0.26, "learning_rate": 9.562753036437247e-06, "loss": 0.1104, "step": 2930 }, { "epoch": 0.26, "learning_rate": 9.560953666216824e-06, "loss": 0.1071, "step": 2940 }, { "epoch": 0.26, "learning_rate": 9.559154295996403e-06, "loss": 0.1165, "step": 2950 }, { "epoch": 0.26, "learning_rate": 9.55735492577598e-06, "loss": 0.0218, "step": 2960 }, { "epoch": 0.26, "learning_rate": 9.555555555555556e-06, "loss": 0.0353, "step": 2970 }, { "epoch": 0.27, "learning_rate": 9.553756185335133e-06, "loss": 0.0902, "step": 2980 }, { "epoch": 0.27, "learning_rate": 9.55195681511471e-06, "loss": 0.0277, "step": 2990 }, { "epoch": 0.27, "learning_rate": 9.550157444894289e-06, "loss": 0.0377, "step": 3000 }, { "epoch": 0.27, "eval_accuracy": 0.9766303489657351, "eval_f1": 0.9593183067619572, "eval_loss": 0.09248499572277069, "eval_precision": 0.9570034002413075, "eval_recall": 0.9616444395459054, "eval_runtime": 436.5505, "eval_samples_per_second": 72.535, "eval_steps_per_second": 4.536, "step": 3000 } ], "max_steps": 56075, "num_train_epochs": 5, "total_flos": 2.5414374334464e+16, "trial_name": null, "trial_params": null }