albertina_gun / trainer_state.json
belisards's picture
Upload 11 files
5d00911
raw
history blame
37 kB
{
"best_metric": 0.9616444395459054,
"best_model_checkpoint": "drive/MyDrive/albertina/checkpoint-3000",
"epoch": 0.26749888542131073,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.0000000000000002e-07,
"loss": 0.1336,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 4.0000000000000003e-07,
"loss": 0.1124,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 6.000000000000001e-07,
"loss": 0.1444,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 8.000000000000001e-07,
"loss": 0.0761,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.1128,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 1.2000000000000002e-06,
"loss": 0.0611,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 1.4000000000000001e-06,
"loss": 0.1892,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 1.6000000000000001e-06,
"loss": 0.0343,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 1.8000000000000001e-06,
"loss": 0.0828,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.1669,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 2.2e-06,
"loss": 0.092,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.0867,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 2.6e-06,
"loss": 0.0552,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 2.8000000000000003e-06,
"loss": 0.0261,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 3e-06,
"loss": 0.0743,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 3.2000000000000003e-06,
"loss": 0.0904,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 3.4000000000000005e-06,
"loss": 0.0438,
"step": 170
},
{
"epoch": 0.02,
"learning_rate": 3.6000000000000003e-06,
"loss": 0.0075,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 3.8000000000000005e-06,
"loss": 0.0208,
"step": 190
},
{
"epoch": 0.02,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0486,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 4.2000000000000004e-06,
"loss": 0.2147,
"step": 210
},
{
"epoch": 0.02,
"learning_rate": 4.4e-06,
"loss": 0.0606,
"step": 220
},
{
"epoch": 0.02,
"learning_rate": 4.600000000000001e-06,
"loss": 0.1771,
"step": 230
},
{
"epoch": 0.02,
"learning_rate": 4.800000000000001e-06,
"loss": 0.1196,
"step": 240
},
{
"epoch": 0.02,
"learning_rate": 5e-06,
"loss": 0.1299,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 5.2e-06,
"loss": 0.0107,
"step": 260
},
{
"epoch": 0.02,
"learning_rate": 5.400000000000001e-06,
"loss": 0.1268,
"step": 270
},
{
"epoch": 0.02,
"learning_rate": 5.600000000000001e-06,
"loss": 0.1448,
"step": 280
},
{
"epoch": 0.03,
"learning_rate": 5.8e-06,
"loss": 0.0086,
"step": 290
},
{
"epoch": 0.03,
"learning_rate": 6e-06,
"loss": 0.0789,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 6.200000000000001e-06,
"loss": 0.0931,
"step": 310
},
{
"epoch": 0.03,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.0277,
"step": 320
},
{
"epoch": 0.03,
"learning_rate": 6.600000000000001e-06,
"loss": 0.0705,
"step": 330
},
{
"epoch": 0.03,
"learning_rate": 6.800000000000001e-06,
"loss": 0.0323,
"step": 340
},
{
"epoch": 0.03,
"learning_rate": 7e-06,
"loss": 0.1415,
"step": 350
},
{
"epoch": 0.03,
"learning_rate": 7.2000000000000005e-06,
"loss": 0.0234,
"step": 360
},
{
"epoch": 0.03,
"learning_rate": 7.4e-06,
"loss": 0.0493,
"step": 370
},
{
"epoch": 0.03,
"learning_rate": 7.600000000000001e-06,
"loss": 0.0803,
"step": 380
},
{
"epoch": 0.03,
"learning_rate": 7.800000000000002e-06,
"loss": 0.0166,
"step": 390
},
{
"epoch": 0.04,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0832,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 8.2e-06,
"loss": 0.0722,
"step": 410
},
{
"epoch": 0.04,
"learning_rate": 8.400000000000001e-06,
"loss": 0.0077,
"step": 420
},
{
"epoch": 0.04,
"learning_rate": 8.6e-06,
"loss": 0.082,
"step": 430
},
{
"epoch": 0.04,
"learning_rate": 8.8e-06,
"loss": 0.0458,
"step": 440
},
{
"epoch": 0.04,
"learning_rate": 9e-06,
"loss": 0.0319,
"step": 450
},
{
"epoch": 0.04,
"learning_rate": 9.200000000000002e-06,
"loss": 0.096,
"step": 460
},
{
"epoch": 0.04,
"learning_rate": 9.4e-06,
"loss": 0.0713,
"step": 470
},
{
"epoch": 0.04,
"learning_rate": 9.600000000000001e-06,
"loss": 0.1074,
"step": 480
},
{
"epoch": 0.04,
"learning_rate": 9.800000000000001e-06,
"loss": 0.0695,
"step": 490
},
{
"epoch": 0.04,
"learning_rate": 1e-05,
"loss": 0.0408,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 9.998200629779578e-06,
"loss": 0.0412,
"step": 510
},
{
"epoch": 0.05,
"learning_rate": 9.996401259559155e-06,
"loss": 0.092,
"step": 520
},
{
"epoch": 0.05,
"learning_rate": 9.994601889338731e-06,
"loss": 0.0777,
"step": 530
},
{
"epoch": 0.05,
"learning_rate": 9.99280251911831e-06,
"loss": 0.0442,
"step": 540
},
{
"epoch": 0.05,
"learning_rate": 9.991003148897887e-06,
"loss": 0.1633,
"step": 550
},
{
"epoch": 0.05,
"learning_rate": 9.989203778677464e-06,
"loss": 0.0985,
"step": 560
},
{
"epoch": 0.05,
"learning_rate": 9.98740440845704e-06,
"loss": 0.0819,
"step": 570
},
{
"epoch": 0.05,
"learning_rate": 9.985605038236617e-06,
"loss": 0.1122,
"step": 580
},
{
"epoch": 0.05,
"learning_rate": 9.983805668016196e-06,
"loss": 0.0936,
"step": 590
},
{
"epoch": 0.05,
"learning_rate": 9.982006297795773e-06,
"loss": 0.0693,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 9.98020692757535e-06,
"loss": 0.0854,
"step": 610
},
{
"epoch": 0.06,
"learning_rate": 9.978407557354927e-06,
"loss": 0.137,
"step": 620
},
{
"epoch": 0.06,
"learning_rate": 9.976608187134503e-06,
"loss": 0.0019,
"step": 630
},
{
"epoch": 0.06,
"learning_rate": 9.97480881691408e-06,
"loss": 0.1362,
"step": 640
},
{
"epoch": 0.06,
"learning_rate": 9.973009446693657e-06,
"loss": 0.0923,
"step": 650
},
{
"epoch": 0.06,
"learning_rate": 9.971210076473236e-06,
"loss": 0.0557,
"step": 660
},
{
"epoch": 0.06,
"learning_rate": 9.969410706252813e-06,
"loss": 0.0505,
"step": 670
},
{
"epoch": 0.06,
"learning_rate": 9.96761133603239e-06,
"loss": 0.0414,
"step": 680
},
{
"epoch": 0.06,
"learning_rate": 9.965811965811966e-06,
"loss": 0.074,
"step": 690
},
{
"epoch": 0.06,
"learning_rate": 9.964012595591543e-06,
"loss": 0.1012,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 9.962213225371122e-06,
"loss": 0.0792,
"step": 710
},
{
"epoch": 0.06,
"learning_rate": 9.960413855150699e-06,
"loss": 0.0928,
"step": 720
},
{
"epoch": 0.07,
"learning_rate": 9.958614484930275e-06,
"loss": 0.0312,
"step": 730
},
{
"epoch": 0.07,
"learning_rate": 9.956815114709852e-06,
"loss": 0.0529,
"step": 740
},
{
"epoch": 0.07,
"learning_rate": 9.955015744489429e-06,
"loss": 0.0793,
"step": 750
},
{
"epoch": 0.07,
"learning_rate": 9.953216374269008e-06,
"loss": 0.0597,
"step": 760
},
{
"epoch": 0.07,
"learning_rate": 9.951417004048583e-06,
"loss": 0.1541,
"step": 770
},
{
"epoch": 0.07,
"learning_rate": 9.949617633828161e-06,
"loss": 0.087,
"step": 780
},
{
"epoch": 0.07,
"learning_rate": 9.947818263607738e-06,
"loss": 0.034,
"step": 790
},
{
"epoch": 0.07,
"learning_rate": 9.946018893387315e-06,
"loss": 0.0948,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 9.944219523166892e-06,
"loss": 0.0078,
"step": 810
},
{
"epoch": 0.07,
"learning_rate": 9.942420152946469e-06,
"loss": 0.0797,
"step": 820
},
{
"epoch": 0.07,
"learning_rate": 9.940620782726047e-06,
"loss": 0.0859,
"step": 830
},
{
"epoch": 0.07,
"learning_rate": 9.938821412505624e-06,
"loss": 0.1256,
"step": 840
},
{
"epoch": 0.08,
"learning_rate": 9.937022042285201e-06,
"loss": 0.0775,
"step": 850
},
{
"epoch": 0.08,
"learning_rate": 9.935222672064778e-06,
"loss": 0.0539,
"step": 860
},
{
"epoch": 0.08,
"learning_rate": 9.933423301844355e-06,
"loss": 0.0795,
"step": 870
},
{
"epoch": 0.08,
"learning_rate": 9.931623931623933e-06,
"loss": 0.0099,
"step": 880
},
{
"epoch": 0.08,
"learning_rate": 9.929824561403509e-06,
"loss": 0.0253,
"step": 890
},
{
"epoch": 0.08,
"learning_rate": 9.928025191183087e-06,
"loss": 0.0442,
"step": 900
},
{
"epoch": 0.08,
"learning_rate": 9.926225820962664e-06,
"loss": 0.1785,
"step": 910
},
{
"epoch": 0.08,
"learning_rate": 9.92442645074224e-06,
"loss": 0.1241,
"step": 920
},
{
"epoch": 0.08,
"learning_rate": 9.922627080521818e-06,
"loss": 0.1332,
"step": 930
},
{
"epoch": 0.08,
"learning_rate": 9.920827710301395e-06,
"loss": 0.0407,
"step": 940
},
{
"epoch": 0.08,
"learning_rate": 9.919028340080973e-06,
"loss": 0.0546,
"step": 950
},
{
"epoch": 0.09,
"learning_rate": 9.91722896986055e-06,
"loss": 0.1157,
"step": 960
},
{
"epoch": 0.09,
"learning_rate": 9.915429599640127e-06,
"loss": 0.0697,
"step": 970
},
{
"epoch": 0.09,
"learning_rate": 9.913630229419704e-06,
"loss": 0.1253,
"step": 980
},
{
"epoch": 0.09,
"learning_rate": 9.91183085919928e-06,
"loss": 0.0205,
"step": 990
},
{
"epoch": 0.09,
"learning_rate": 9.910031488978859e-06,
"loss": 0.1289,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 9.908232118758436e-06,
"loss": 0.0956,
"step": 1010
},
{
"epoch": 0.09,
"learning_rate": 9.906432748538013e-06,
"loss": 0.0972,
"step": 1020
},
{
"epoch": 0.09,
"learning_rate": 9.90463337831759e-06,
"loss": 0.0199,
"step": 1030
},
{
"epoch": 0.09,
"learning_rate": 9.902834008097167e-06,
"loss": 0.0263,
"step": 1040
},
{
"epoch": 0.09,
"learning_rate": 9.901034637876743e-06,
"loss": 0.0388,
"step": 1050
},
{
"epoch": 0.09,
"learning_rate": 9.89923526765632e-06,
"loss": 0.119,
"step": 1060
},
{
"epoch": 0.1,
"learning_rate": 9.897435897435899e-06,
"loss": 0.032,
"step": 1070
},
{
"epoch": 0.1,
"learning_rate": 9.895636527215476e-06,
"loss": 0.0877,
"step": 1080
},
{
"epoch": 0.1,
"learning_rate": 9.893837156995053e-06,
"loss": 0.0679,
"step": 1090
},
{
"epoch": 0.1,
"learning_rate": 9.89203778677463e-06,
"loss": 0.0273,
"step": 1100
},
{
"epoch": 0.1,
"learning_rate": 9.890238416554206e-06,
"loss": 0.048,
"step": 1110
},
{
"epoch": 0.1,
"learning_rate": 9.888439046333785e-06,
"loss": 0.0054,
"step": 1120
},
{
"epoch": 0.1,
"learning_rate": 9.886639676113362e-06,
"loss": 0.0812,
"step": 1130
},
{
"epoch": 0.1,
"learning_rate": 9.884840305892939e-06,
"loss": 0.0903,
"step": 1140
},
{
"epoch": 0.1,
"learning_rate": 9.883040935672515e-06,
"loss": 0.0092,
"step": 1150
},
{
"epoch": 0.1,
"learning_rate": 9.881241565452092e-06,
"loss": 0.0013,
"step": 1160
},
{
"epoch": 0.1,
"learning_rate": 9.879442195231669e-06,
"loss": 0.165,
"step": 1170
},
{
"epoch": 0.11,
"learning_rate": 9.877642825011246e-06,
"loss": 0.0509,
"step": 1180
},
{
"epoch": 0.11,
"learning_rate": 9.875843454790825e-06,
"loss": 0.0653,
"step": 1190
},
{
"epoch": 0.11,
"learning_rate": 9.874044084570401e-06,
"loss": 0.0253,
"step": 1200
},
{
"epoch": 0.11,
"learning_rate": 9.872244714349978e-06,
"loss": 0.0375,
"step": 1210
},
{
"epoch": 0.11,
"learning_rate": 9.870445344129555e-06,
"loss": 0.0689,
"step": 1220
},
{
"epoch": 0.11,
"learning_rate": 9.868645973909132e-06,
"loss": 0.0674,
"step": 1230
},
{
"epoch": 0.11,
"learning_rate": 9.86684660368871e-06,
"loss": 0.1405,
"step": 1240
},
{
"epoch": 0.11,
"learning_rate": 9.865047233468287e-06,
"loss": 0.0604,
"step": 1250
},
{
"epoch": 0.11,
"learning_rate": 9.863247863247864e-06,
"loss": 0.0388,
"step": 1260
},
{
"epoch": 0.11,
"learning_rate": 9.861448493027441e-06,
"loss": 0.0627,
"step": 1270
},
{
"epoch": 0.11,
"learning_rate": 9.859649122807018e-06,
"loss": 0.0909,
"step": 1280
},
{
"epoch": 0.12,
"learning_rate": 9.857849752586597e-06,
"loss": 0.0419,
"step": 1290
},
{
"epoch": 0.12,
"learning_rate": 9.856050382366172e-06,
"loss": 0.0019,
"step": 1300
},
{
"epoch": 0.12,
"learning_rate": 9.85425101214575e-06,
"loss": 0.1776,
"step": 1310
},
{
"epoch": 0.12,
"learning_rate": 9.852451641925327e-06,
"loss": 0.003,
"step": 1320
},
{
"epoch": 0.12,
"learning_rate": 9.850652271704904e-06,
"loss": 0.0764,
"step": 1330
},
{
"epoch": 0.12,
"learning_rate": 9.84885290148448e-06,
"loss": 0.0753,
"step": 1340
},
{
"epoch": 0.12,
"learning_rate": 9.847053531264058e-06,
"loss": 0.0831,
"step": 1350
},
{
"epoch": 0.12,
"learning_rate": 9.845254161043636e-06,
"loss": 0.1177,
"step": 1360
},
{
"epoch": 0.12,
"learning_rate": 9.843454790823213e-06,
"loss": 0.1527,
"step": 1370
},
{
"epoch": 0.12,
"learning_rate": 9.84165542060279e-06,
"loss": 0.1304,
"step": 1380
},
{
"epoch": 0.12,
"learning_rate": 9.839856050382367e-06,
"loss": 0.0945,
"step": 1390
},
{
"epoch": 0.12,
"learning_rate": 9.838056680161944e-06,
"loss": 0.0516,
"step": 1400
},
{
"epoch": 0.13,
"learning_rate": 9.836257309941522e-06,
"loss": 0.0216,
"step": 1410
},
{
"epoch": 0.13,
"learning_rate": 9.834457939721097e-06,
"loss": 0.1259,
"step": 1420
},
{
"epoch": 0.13,
"learning_rate": 9.832658569500676e-06,
"loss": 0.0558,
"step": 1430
},
{
"epoch": 0.13,
"learning_rate": 9.830859199280253e-06,
"loss": 0.0711,
"step": 1440
},
{
"epoch": 0.13,
"learning_rate": 9.82905982905983e-06,
"loss": 0.1041,
"step": 1450
},
{
"epoch": 0.13,
"learning_rate": 9.827260458839407e-06,
"loss": 0.1173,
"step": 1460
},
{
"epoch": 0.13,
"learning_rate": 9.825461088618983e-06,
"loss": 0.1194,
"step": 1470
},
{
"epoch": 0.13,
"learning_rate": 9.823661718398562e-06,
"loss": 0.1103,
"step": 1480
},
{
"epoch": 0.13,
"learning_rate": 9.821862348178139e-06,
"loss": 0.0481,
"step": 1490
},
{
"epoch": 0.13,
"learning_rate": 9.820062977957716e-06,
"loss": 0.0413,
"step": 1500
},
{
"epoch": 0.13,
"eval_accuracy": 0.9769145744512869,
"eval_f1": 0.9595305320267951,
"eval_loss": 0.07930105179548264,
"eval_precision": 0.9639599555061179,
"eval_recall": 0.955141629009148,
"eval_runtime": 436.6474,
"eval_samples_per_second": 72.518,
"eval_steps_per_second": 4.535,
"step": 1500
},
{
"epoch": 0.13,
"learning_rate": 9.818263607737293e-06,
"loss": 0.0825,
"step": 1510
},
{
"epoch": 0.14,
"learning_rate": 9.81646423751687e-06,
"loss": 0.0769,
"step": 1520
},
{
"epoch": 0.14,
"learning_rate": 9.814664867296448e-06,
"loss": 0.0901,
"step": 1530
},
{
"epoch": 0.14,
"learning_rate": 9.812865497076025e-06,
"loss": 0.0237,
"step": 1540
},
{
"epoch": 0.14,
"learning_rate": 9.811066126855602e-06,
"loss": 0.1067,
"step": 1550
},
{
"epoch": 0.14,
"learning_rate": 9.809266756635179e-06,
"loss": 0.0536,
"step": 1560
},
{
"epoch": 0.14,
"learning_rate": 9.807467386414755e-06,
"loss": 0.0372,
"step": 1570
},
{
"epoch": 0.14,
"learning_rate": 9.805668016194332e-06,
"loss": 0.075,
"step": 1580
},
{
"epoch": 0.14,
"learning_rate": 9.803868645973909e-06,
"loss": 0.0022,
"step": 1590
},
{
"epoch": 0.14,
"learning_rate": 9.802069275753488e-06,
"loss": 0.1077,
"step": 1600
},
{
"epoch": 0.14,
"learning_rate": 9.800269905533065e-06,
"loss": 0.019,
"step": 1610
},
{
"epoch": 0.14,
"learning_rate": 9.798470535312641e-06,
"loss": 0.0456,
"step": 1620
},
{
"epoch": 0.15,
"learning_rate": 9.796671165092218e-06,
"loss": 0.0074,
"step": 1630
},
{
"epoch": 0.15,
"learning_rate": 9.794871794871795e-06,
"loss": 0.0944,
"step": 1640
},
{
"epoch": 0.15,
"learning_rate": 9.793072424651374e-06,
"loss": 0.0055,
"step": 1650
},
{
"epoch": 0.15,
"learning_rate": 9.79127305443095e-06,
"loss": 0.0979,
"step": 1660
},
{
"epoch": 0.15,
"learning_rate": 9.789473684210527e-06,
"loss": 0.2072,
"step": 1670
},
{
"epoch": 0.15,
"learning_rate": 9.787674313990104e-06,
"loss": 0.1238,
"step": 1680
},
{
"epoch": 0.15,
"learning_rate": 9.785874943769681e-06,
"loss": 0.0366,
"step": 1690
},
{
"epoch": 0.15,
"learning_rate": 9.784075573549258e-06,
"loss": 0.0754,
"step": 1700
},
{
"epoch": 0.15,
"learning_rate": 9.782276203328835e-06,
"loss": 0.042,
"step": 1710
},
{
"epoch": 0.15,
"learning_rate": 9.780476833108413e-06,
"loss": 0.0614,
"step": 1720
},
{
"epoch": 0.15,
"learning_rate": 9.77867746288799e-06,
"loss": 0.0572,
"step": 1730
},
{
"epoch": 0.16,
"learning_rate": 9.776878092667567e-06,
"loss": 0.0323,
"step": 1740
},
{
"epoch": 0.16,
"learning_rate": 9.775078722447144e-06,
"loss": 0.058,
"step": 1750
},
{
"epoch": 0.16,
"learning_rate": 9.77327935222672e-06,
"loss": 0.0715,
"step": 1760
},
{
"epoch": 0.16,
"learning_rate": 9.7714799820063e-06,
"loss": 0.0783,
"step": 1770
},
{
"epoch": 0.16,
"learning_rate": 9.769680611785876e-06,
"loss": 0.0982,
"step": 1780
},
{
"epoch": 0.16,
"learning_rate": 9.767881241565453e-06,
"loss": 0.0858,
"step": 1790
},
{
"epoch": 0.16,
"learning_rate": 9.76608187134503e-06,
"loss": 0.0535,
"step": 1800
},
{
"epoch": 0.16,
"learning_rate": 9.764282501124607e-06,
"loss": 0.0447,
"step": 1810
},
{
"epoch": 0.16,
"learning_rate": 9.762483130904185e-06,
"loss": 0.0819,
"step": 1820
},
{
"epoch": 0.16,
"learning_rate": 9.76068376068376e-06,
"loss": 0.0565,
"step": 1830
},
{
"epoch": 0.16,
"learning_rate": 9.758884390463339e-06,
"loss": 0.0829,
"step": 1840
},
{
"epoch": 0.16,
"learning_rate": 9.757085020242916e-06,
"loss": 0.1505,
"step": 1850
},
{
"epoch": 0.17,
"learning_rate": 9.755285650022493e-06,
"loss": 0.1586,
"step": 1860
},
{
"epoch": 0.17,
"learning_rate": 9.75348627980207e-06,
"loss": 0.0098,
"step": 1870
},
{
"epoch": 0.17,
"learning_rate": 9.751686909581647e-06,
"loss": 0.1513,
"step": 1880
},
{
"epoch": 0.17,
"learning_rate": 9.749887539361225e-06,
"loss": 0.136,
"step": 1890
},
{
"epoch": 0.17,
"learning_rate": 9.748088169140802e-06,
"loss": 0.1072,
"step": 1900
},
{
"epoch": 0.17,
"learning_rate": 9.746288798920379e-06,
"loss": 0.0829,
"step": 1910
},
{
"epoch": 0.17,
"learning_rate": 9.744489428699956e-06,
"loss": 0.0647,
"step": 1920
},
{
"epoch": 0.17,
"learning_rate": 9.742690058479533e-06,
"loss": 0.035,
"step": 1930
},
{
"epoch": 0.17,
"learning_rate": 9.740890688259111e-06,
"loss": 0.077,
"step": 1940
},
{
"epoch": 0.17,
"learning_rate": 9.739091318038686e-06,
"loss": 0.0325,
"step": 1950
},
{
"epoch": 0.17,
"learning_rate": 9.737291947818265e-06,
"loss": 0.0155,
"step": 1960
},
{
"epoch": 0.18,
"learning_rate": 9.735492577597842e-06,
"loss": 0.041,
"step": 1970
},
{
"epoch": 0.18,
"learning_rate": 9.733693207377419e-06,
"loss": 0.1311,
"step": 1980
},
{
"epoch": 0.18,
"learning_rate": 9.731893837156995e-06,
"loss": 0.1448,
"step": 1990
},
{
"epoch": 0.18,
"learning_rate": 9.730094466936572e-06,
"loss": 0.1902,
"step": 2000
},
{
"epoch": 0.18,
"learning_rate": 9.72829509671615e-06,
"loss": 0.1521,
"step": 2010
},
{
"epoch": 0.18,
"learning_rate": 9.726495726495728e-06,
"loss": 0.2189,
"step": 2020
},
{
"epoch": 0.18,
"learning_rate": 9.724696356275305e-06,
"loss": 0.0697,
"step": 2030
},
{
"epoch": 0.18,
"learning_rate": 9.722896986054881e-06,
"loss": 0.0175,
"step": 2040
},
{
"epoch": 0.18,
"learning_rate": 9.721097615834458e-06,
"loss": 0.0511,
"step": 2050
},
{
"epoch": 0.18,
"learning_rate": 9.719298245614037e-06,
"loss": 0.1222,
"step": 2060
},
{
"epoch": 0.18,
"learning_rate": 9.717498875393614e-06,
"loss": 0.0637,
"step": 2070
},
{
"epoch": 0.19,
"learning_rate": 9.71569950517319e-06,
"loss": 0.0311,
"step": 2080
},
{
"epoch": 0.19,
"learning_rate": 9.713900134952767e-06,
"loss": 0.0493,
"step": 2090
},
{
"epoch": 0.19,
"learning_rate": 9.712100764732344e-06,
"loss": 0.0671,
"step": 2100
},
{
"epoch": 0.19,
"learning_rate": 9.710301394511921e-06,
"loss": 0.0948,
"step": 2110
},
{
"epoch": 0.19,
"learning_rate": 9.708502024291498e-06,
"loss": 0.0475,
"step": 2120
},
{
"epoch": 0.19,
"learning_rate": 9.706702654071076e-06,
"loss": 0.042,
"step": 2130
},
{
"epoch": 0.19,
"learning_rate": 9.704903283850653e-06,
"loss": 0.0755,
"step": 2140
},
{
"epoch": 0.19,
"learning_rate": 9.70310391363023e-06,
"loss": 0.0164,
"step": 2150
},
{
"epoch": 0.19,
"learning_rate": 9.701304543409807e-06,
"loss": 0.0568,
"step": 2160
},
{
"epoch": 0.19,
"learning_rate": 9.699505173189384e-06,
"loss": 0.0074,
"step": 2170
},
{
"epoch": 0.19,
"learning_rate": 9.697705802968962e-06,
"loss": 0.2288,
"step": 2180
},
{
"epoch": 0.2,
"learning_rate": 9.69590643274854e-06,
"loss": 0.1608,
"step": 2190
},
{
"epoch": 0.2,
"learning_rate": 9.694107062528116e-06,
"loss": 0.0774,
"step": 2200
},
{
"epoch": 0.2,
"learning_rate": 9.692307692307693e-06,
"loss": 0.1041,
"step": 2210
},
{
"epoch": 0.2,
"learning_rate": 9.69050832208727e-06,
"loss": 0.0561,
"step": 2220
},
{
"epoch": 0.2,
"learning_rate": 9.688708951866847e-06,
"loss": 0.1579,
"step": 2230
},
{
"epoch": 0.2,
"learning_rate": 9.686909581646424e-06,
"loss": 0.0396,
"step": 2240
},
{
"epoch": 0.2,
"learning_rate": 9.685110211426002e-06,
"loss": 0.0969,
"step": 2250
},
{
"epoch": 0.2,
"learning_rate": 9.683310841205579e-06,
"loss": 0.0862,
"step": 2260
},
{
"epoch": 0.2,
"learning_rate": 9.681511470985156e-06,
"loss": 0.1806,
"step": 2270
},
{
"epoch": 0.2,
"learning_rate": 9.679712100764733e-06,
"loss": 0.0316,
"step": 2280
},
{
"epoch": 0.2,
"learning_rate": 9.67791273054431e-06,
"loss": 0.059,
"step": 2290
},
{
"epoch": 0.21,
"learning_rate": 9.676113360323888e-06,
"loss": 0.0042,
"step": 2300
},
{
"epoch": 0.21,
"learning_rate": 9.674313990103465e-06,
"loss": 0.0331,
"step": 2310
},
{
"epoch": 0.21,
"learning_rate": 9.672514619883042e-06,
"loss": 0.0941,
"step": 2320
},
{
"epoch": 0.21,
"learning_rate": 9.670715249662619e-06,
"loss": 0.0765,
"step": 2330
},
{
"epoch": 0.21,
"learning_rate": 9.668915879442196e-06,
"loss": 0.1153,
"step": 2340
},
{
"epoch": 0.21,
"learning_rate": 9.667116509221774e-06,
"loss": 0.208,
"step": 2350
},
{
"epoch": 0.21,
"learning_rate": 9.66531713900135e-06,
"loss": 0.1071,
"step": 2360
},
{
"epoch": 0.21,
"learning_rate": 9.663517768780928e-06,
"loss": 0.0316,
"step": 2370
},
{
"epoch": 0.21,
"learning_rate": 9.661718398560505e-06,
"loss": 0.0204,
"step": 2380
},
{
"epoch": 0.21,
"learning_rate": 9.659919028340082e-06,
"loss": 0.0503,
"step": 2390
},
{
"epoch": 0.21,
"learning_rate": 9.658119658119659e-06,
"loss": 0.1018,
"step": 2400
},
{
"epoch": 0.21,
"learning_rate": 9.656320287899235e-06,
"loss": 0.0993,
"step": 2410
},
{
"epoch": 0.22,
"learning_rate": 9.654520917678814e-06,
"loss": 0.0304,
"step": 2420
},
{
"epoch": 0.22,
"learning_rate": 9.65272154745839e-06,
"loss": 0.0567,
"step": 2430
},
{
"epoch": 0.22,
"learning_rate": 9.650922177237968e-06,
"loss": 0.0308,
"step": 2440
},
{
"epoch": 0.22,
"learning_rate": 9.649122807017545e-06,
"loss": 0.0647,
"step": 2450
},
{
"epoch": 0.22,
"learning_rate": 9.647323436797121e-06,
"loss": 0.1115,
"step": 2460
},
{
"epoch": 0.22,
"learning_rate": 9.6455240665767e-06,
"loss": 0.0392,
"step": 2470
},
{
"epoch": 0.22,
"learning_rate": 9.643724696356275e-06,
"loss": 0.1018,
"step": 2480
},
{
"epoch": 0.22,
"learning_rate": 9.641925326135854e-06,
"loss": 0.0271,
"step": 2490
},
{
"epoch": 0.22,
"learning_rate": 9.64012595591543e-06,
"loss": 0.1548,
"step": 2500
},
{
"epoch": 0.22,
"learning_rate": 9.638326585695007e-06,
"loss": 0.0135,
"step": 2510
},
{
"epoch": 0.22,
"learning_rate": 9.636527215474584e-06,
"loss": 0.0794,
"step": 2520
},
{
"epoch": 0.23,
"learning_rate": 9.634727845254161e-06,
"loss": 0.0612,
"step": 2530
},
{
"epoch": 0.23,
"learning_rate": 9.63292847503374e-06,
"loss": 0.0375,
"step": 2540
},
{
"epoch": 0.23,
"learning_rate": 9.631129104813316e-06,
"loss": 0.0702,
"step": 2550
},
{
"epoch": 0.23,
"learning_rate": 9.629329734592893e-06,
"loss": 0.0353,
"step": 2560
},
{
"epoch": 0.23,
"learning_rate": 9.62753036437247e-06,
"loss": 0.0771,
"step": 2570
},
{
"epoch": 0.23,
"learning_rate": 9.625730994152047e-06,
"loss": 0.0078,
"step": 2580
},
{
"epoch": 0.23,
"learning_rate": 9.623931623931626e-06,
"loss": 0.0934,
"step": 2590
},
{
"epoch": 0.23,
"learning_rate": 9.6221322537112e-06,
"loss": 0.0922,
"step": 2600
},
{
"epoch": 0.23,
"learning_rate": 9.62033288349078e-06,
"loss": 0.0676,
"step": 2610
},
{
"epoch": 0.23,
"learning_rate": 9.618533513270356e-06,
"loss": 0.1065,
"step": 2620
},
{
"epoch": 0.23,
"learning_rate": 9.616734143049933e-06,
"loss": 0.0321,
"step": 2630
},
{
"epoch": 0.24,
"learning_rate": 9.61493477282951e-06,
"loss": 0.0563,
"step": 2640
},
{
"epoch": 0.24,
"learning_rate": 9.613135402609087e-06,
"loss": 0.1313,
"step": 2650
},
{
"epoch": 0.24,
"learning_rate": 9.611336032388665e-06,
"loss": 0.0676,
"step": 2660
},
{
"epoch": 0.24,
"learning_rate": 9.609536662168242e-06,
"loss": 0.0306,
"step": 2670
},
{
"epoch": 0.24,
"learning_rate": 9.607737291947819e-06,
"loss": 0.0298,
"step": 2680
},
{
"epoch": 0.24,
"learning_rate": 9.605937921727396e-06,
"loss": 0.0494,
"step": 2690
},
{
"epoch": 0.24,
"learning_rate": 9.604138551506973e-06,
"loss": 0.024,
"step": 2700
},
{
"epoch": 0.24,
"learning_rate": 9.602339181286551e-06,
"loss": 0.0462,
"step": 2710
},
{
"epoch": 0.24,
"learning_rate": 9.600539811066128e-06,
"loss": 0.0931,
"step": 2720
},
{
"epoch": 0.24,
"learning_rate": 9.598740440845705e-06,
"loss": 0.1424,
"step": 2730
},
{
"epoch": 0.24,
"learning_rate": 9.596941070625282e-06,
"loss": 0.0485,
"step": 2740
},
{
"epoch": 0.25,
"learning_rate": 9.595141700404859e-06,
"loss": 0.1279,
"step": 2750
},
{
"epoch": 0.25,
"learning_rate": 9.593342330184436e-06,
"loss": 0.0226,
"step": 2760
},
{
"epoch": 0.25,
"learning_rate": 9.591542959964013e-06,
"loss": 0.0528,
"step": 2770
},
{
"epoch": 0.25,
"learning_rate": 9.589743589743591e-06,
"loss": 0.0527,
"step": 2780
},
{
"epoch": 0.25,
"learning_rate": 9.587944219523168e-06,
"loss": 0.0817,
"step": 2790
},
{
"epoch": 0.25,
"learning_rate": 9.586144849302745e-06,
"loss": 0.0079,
"step": 2800
},
{
"epoch": 0.25,
"learning_rate": 9.584345479082322e-06,
"loss": 0.1174,
"step": 2810
},
{
"epoch": 0.25,
"learning_rate": 9.582546108861898e-06,
"loss": 0.0392,
"step": 2820
},
{
"epoch": 0.25,
"learning_rate": 9.580746738641477e-06,
"loss": 0.0268,
"step": 2830
},
{
"epoch": 0.25,
"learning_rate": 9.578947368421054e-06,
"loss": 0.0946,
"step": 2840
},
{
"epoch": 0.25,
"learning_rate": 9.57714799820063e-06,
"loss": 0.0524,
"step": 2850
},
{
"epoch": 0.26,
"learning_rate": 9.575348627980208e-06,
"loss": 0.0382,
"step": 2860
},
{
"epoch": 0.26,
"learning_rate": 9.573549257759784e-06,
"loss": 0.0014,
"step": 2870
},
{
"epoch": 0.26,
"learning_rate": 9.571749887539361e-06,
"loss": 0.0695,
"step": 2880
},
{
"epoch": 0.26,
"learning_rate": 9.569950517318938e-06,
"loss": 0.0376,
"step": 2890
},
{
"epoch": 0.26,
"learning_rate": 9.568151147098517e-06,
"loss": 0.04,
"step": 2900
},
{
"epoch": 0.26,
"learning_rate": 9.566351776878094e-06,
"loss": 0.0388,
"step": 2910
},
{
"epoch": 0.26,
"learning_rate": 9.56455240665767e-06,
"loss": 0.0649,
"step": 2920
},
{
"epoch": 0.26,
"learning_rate": 9.562753036437247e-06,
"loss": 0.1104,
"step": 2930
},
{
"epoch": 0.26,
"learning_rate": 9.560953666216824e-06,
"loss": 0.1071,
"step": 2940
},
{
"epoch": 0.26,
"learning_rate": 9.559154295996403e-06,
"loss": 0.1165,
"step": 2950
},
{
"epoch": 0.26,
"learning_rate": 9.55735492577598e-06,
"loss": 0.0218,
"step": 2960
},
{
"epoch": 0.26,
"learning_rate": 9.555555555555556e-06,
"loss": 0.0353,
"step": 2970
},
{
"epoch": 0.27,
"learning_rate": 9.553756185335133e-06,
"loss": 0.0902,
"step": 2980
},
{
"epoch": 0.27,
"learning_rate": 9.55195681511471e-06,
"loss": 0.0277,
"step": 2990
},
{
"epoch": 0.27,
"learning_rate": 9.550157444894289e-06,
"loss": 0.0377,
"step": 3000
},
{
"epoch": 0.27,
"eval_accuracy": 0.9766303489657351,
"eval_f1": 0.9593183067619572,
"eval_loss": 0.09248499572277069,
"eval_precision": 0.9570034002413075,
"eval_recall": 0.9616444395459054,
"eval_runtime": 436.5505,
"eval_samples_per_second": 72.535,
"eval_steps_per_second": 4.536,
"step": 3000
}
],
"max_steps": 56075,
"num_train_epochs": 5,
"total_flos": 2.5414374334464e+16,
"trial_name": null,
"trial_params": null
}