phobert-classification-travel / trainer_state.json
HieuAnh's picture
Upload 12 files
be5d9fe
raw
history blame
36.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.992622895848703,
"eval_steps": 500,
"global_step": 149000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 1.9932935416806386e-05,
"loss": 0.7069,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 1.986587083361277e-05,
"loss": 0.4778,
"step": 1000
},
{
"epoch": 0.1,
"learning_rate": 1.9798806250419156e-05,
"loss": 0.4614,
"step": 1500
},
{
"epoch": 0.13,
"learning_rate": 1.973174166722554e-05,
"loss": 0.395,
"step": 2000
},
{
"epoch": 0.17,
"learning_rate": 1.9664677084031925e-05,
"loss": 0.3459,
"step": 2500
},
{
"epoch": 0.2,
"learning_rate": 1.9597612500838306e-05,
"loss": 0.3942,
"step": 3000
},
{
"epoch": 0.23,
"learning_rate": 1.9530547917644694e-05,
"loss": 0.5045,
"step": 3500
},
{
"epoch": 0.27,
"learning_rate": 1.946348333445108e-05,
"loss": 0.5222,
"step": 4000
},
{
"epoch": 0.3,
"learning_rate": 1.9396418751257464e-05,
"loss": 0.6322,
"step": 4500
},
{
"epoch": 0.34,
"learning_rate": 1.9329354168063848e-05,
"loss": 0.4139,
"step": 5000
},
{
"epoch": 0.37,
"learning_rate": 1.926228958487023e-05,
"loss": 0.3958,
"step": 5500
},
{
"epoch": 0.4,
"learning_rate": 1.9195225001676618e-05,
"loss": 0.3241,
"step": 6000
},
{
"epoch": 0.44,
"learning_rate": 1.9128160418483002e-05,
"loss": 0.3645,
"step": 6500
},
{
"epoch": 0.47,
"learning_rate": 1.9061095835289383e-05,
"loss": 0.4296,
"step": 7000
},
{
"epoch": 0.5,
"learning_rate": 1.899403125209577e-05,
"loss": 0.4467,
"step": 7500
},
{
"epoch": 0.54,
"learning_rate": 1.8926966668902153e-05,
"loss": 0.499,
"step": 8000
},
{
"epoch": 0.57,
"learning_rate": 1.8859902085708537e-05,
"loss": 0.3124,
"step": 8500
},
{
"epoch": 0.6,
"learning_rate": 1.8792837502514922e-05,
"loss": 0.4169,
"step": 9000
},
{
"epoch": 0.64,
"learning_rate": 1.8725772919321307e-05,
"loss": 0.4071,
"step": 9500
},
{
"epoch": 0.67,
"learning_rate": 1.8658708336127695e-05,
"loss": 0.2717,
"step": 10000
},
{
"epoch": 0.7,
"learning_rate": 1.8591643752934076e-05,
"loss": 0.3581,
"step": 10500
},
{
"epoch": 0.74,
"learning_rate": 1.852457916974046e-05,
"loss": 0.3181,
"step": 11000
},
{
"epoch": 0.77,
"learning_rate": 1.8457514586546845e-05,
"loss": 0.3488,
"step": 11500
},
{
"epoch": 0.8,
"learning_rate": 1.839045000335323e-05,
"loss": 0.3342,
"step": 12000
},
{
"epoch": 0.84,
"learning_rate": 1.8323385420159615e-05,
"loss": 0.4063,
"step": 12500
},
{
"epoch": 0.87,
"learning_rate": 1.8256320836966e-05,
"loss": 0.3071,
"step": 13000
},
{
"epoch": 0.91,
"learning_rate": 1.8189256253772384e-05,
"loss": 0.2998,
"step": 13500
},
{
"epoch": 0.94,
"learning_rate": 1.812219167057877e-05,
"loss": 0.2989,
"step": 14000
},
{
"epoch": 0.97,
"learning_rate": 1.8055127087385153e-05,
"loss": 0.2705,
"step": 14500
},
{
"epoch": 1.01,
"learning_rate": 1.7988062504191538e-05,
"loss": 0.3,
"step": 15000
},
{
"epoch": 1.04,
"learning_rate": 1.7920997920997923e-05,
"loss": 0.3253,
"step": 15500
},
{
"epoch": 1.07,
"learning_rate": 1.7853933337804307e-05,
"loss": 0.2612,
"step": 16000
},
{
"epoch": 1.11,
"learning_rate": 1.7786868754610692e-05,
"loss": 0.3027,
"step": 16500
},
{
"epoch": 1.14,
"learning_rate": 1.7719804171417077e-05,
"loss": 0.3124,
"step": 17000
},
{
"epoch": 1.17,
"learning_rate": 1.765273958822346e-05,
"loss": 0.2997,
"step": 17500
},
{
"epoch": 1.21,
"learning_rate": 1.7585675005029846e-05,
"loss": 0.3177,
"step": 18000
},
{
"epoch": 1.24,
"learning_rate": 1.751861042183623e-05,
"loss": 0.3636,
"step": 18500
},
{
"epoch": 1.27,
"learning_rate": 1.7451545838642615e-05,
"loss": 0.3025,
"step": 19000
},
{
"epoch": 1.31,
"learning_rate": 1.7384481255449e-05,
"loss": 0.4098,
"step": 19500
},
{
"epoch": 1.34,
"learning_rate": 1.731741667225538e-05,
"loss": 0.3093,
"step": 20000
},
{
"epoch": 1.37,
"learning_rate": 1.725035208906177e-05,
"loss": 0.3261,
"step": 20500
},
{
"epoch": 1.41,
"learning_rate": 1.718328750586815e-05,
"loss": 0.2338,
"step": 21000
},
{
"epoch": 1.44,
"learning_rate": 1.711622292267454e-05,
"loss": 0.2204,
"step": 21500
},
{
"epoch": 1.48,
"learning_rate": 1.7049158339480923e-05,
"loss": 0.2963,
"step": 22000
},
{
"epoch": 1.51,
"learning_rate": 1.6982093756287304e-05,
"loss": 0.1368,
"step": 22500
},
{
"epoch": 1.54,
"learning_rate": 1.6915029173093693e-05,
"loss": 0.307,
"step": 23000
},
{
"epoch": 1.58,
"learning_rate": 1.6847964589900074e-05,
"loss": 0.2184,
"step": 23500
},
{
"epoch": 1.61,
"learning_rate": 1.678090000670646e-05,
"loss": 0.2967,
"step": 24000
},
{
"epoch": 1.64,
"learning_rate": 1.6713835423512847e-05,
"loss": 0.2879,
"step": 24500
},
{
"epoch": 1.68,
"learning_rate": 1.6646770840319228e-05,
"loss": 0.2899,
"step": 25000
},
{
"epoch": 1.71,
"learning_rate": 1.6579706257125612e-05,
"loss": 0.2182,
"step": 25500
},
{
"epoch": 1.74,
"learning_rate": 1.6512641673931997e-05,
"loss": 0.1737,
"step": 26000
},
{
"epoch": 1.78,
"learning_rate": 1.6445577090738382e-05,
"loss": 0.2756,
"step": 26500
},
{
"epoch": 1.81,
"learning_rate": 1.6378512507544766e-05,
"loss": 0.284,
"step": 27000
},
{
"epoch": 1.84,
"learning_rate": 1.631144792435115e-05,
"loss": 0.2672,
"step": 27500
},
{
"epoch": 1.88,
"learning_rate": 1.6244383341157536e-05,
"loss": 0.2016,
"step": 28000
},
{
"epoch": 1.91,
"learning_rate": 1.617731875796392e-05,
"loss": 0.2257,
"step": 28500
},
{
"epoch": 1.94,
"learning_rate": 1.6110254174770305e-05,
"loss": 0.3434,
"step": 29000
},
{
"epoch": 1.98,
"learning_rate": 1.604318959157669e-05,
"loss": 0.1699,
"step": 29500
},
{
"epoch": 2.01,
"learning_rate": 1.5976125008383074e-05,
"loss": 0.2511,
"step": 30000
},
{
"epoch": 2.05,
"learning_rate": 1.590906042518946e-05,
"loss": 0.2026,
"step": 30500
},
{
"epoch": 2.08,
"learning_rate": 1.5841995841995844e-05,
"loss": 0.2522,
"step": 31000
},
{
"epoch": 2.11,
"learning_rate": 1.577493125880223e-05,
"loss": 0.2572,
"step": 31500
},
{
"epoch": 2.15,
"learning_rate": 1.5707866675608613e-05,
"loss": 0.2237,
"step": 32000
},
{
"epoch": 2.18,
"learning_rate": 1.5640802092414998e-05,
"loss": 0.195,
"step": 32500
},
{
"epoch": 2.21,
"learning_rate": 1.557373750922138e-05,
"loss": 0.218,
"step": 33000
},
{
"epoch": 2.25,
"learning_rate": 1.5506672926027767e-05,
"loss": 0.1463,
"step": 33500
},
{
"epoch": 2.28,
"learning_rate": 1.543960834283415e-05,
"loss": 0.1723,
"step": 34000
},
{
"epoch": 2.31,
"learning_rate": 1.5372543759640536e-05,
"loss": 0.1874,
"step": 34500
},
{
"epoch": 2.35,
"learning_rate": 1.530547917644692e-05,
"loss": 0.2871,
"step": 35000
},
{
"epoch": 2.38,
"learning_rate": 1.5238414593253304e-05,
"loss": 0.3132,
"step": 35500
},
{
"epoch": 2.41,
"learning_rate": 1.5171350010059689e-05,
"loss": 0.2591,
"step": 36000
},
{
"epoch": 2.45,
"learning_rate": 1.5104285426866075e-05,
"loss": 0.1916,
"step": 36500
},
{
"epoch": 2.48,
"learning_rate": 1.5037220843672458e-05,
"loss": 0.2998,
"step": 37000
},
{
"epoch": 2.51,
"learning_rate": 1.4970156260478843e-05,
"loss": 0.1397,
"step": 37500
},
{
"epoch": 2.55,
"learning_rate": 1.4903091677285225e-05,
"loss": 0.2588,
"step": 38000
},
{
"epoch": 2.58,
"learning_rate": 1.4836027094091612e-05,
"loss": 0.2526,
"step": 38500
},
{
"epoch": 2.62,
"learning_rate": 1.4768962510897995e-05,
"loss": 0.1548,
"step": 39000
},
{
"epoch": 2.65,
"learning_rate": 1.4701897927704381e-05,
"loss": 0.2322,
"step": 39500
},
{
"epoch": 2.68,
"learning_rate": 1.4634833344510766e-05,
"loss": 0.1839,
"step": 40000
},
{
"epoch": 2.72,
"learning_rate": 1.4567768761317149e-05,
"loss": 0.1062,
"step": 40500
},
{
"epoch": 2.75,
"learning_rate": 1.4500704178123535e-05,
"loss": 0.1758,
"step": 41000
},
{
"epoch": 2.78,
"learning_rate": 1.4433639594929918e-05,
"loss": 0.2261,
"step": 41500
},
{
"epoch": 2.82,
"learning_rate": 1.4366575011736303e-05,
"loss": 0.1591,
"step": 42000
},
{
"epoch": 2.85,
"learning_rate": 1.4299510428542687e-05,
"loss": 0.2023,
"step": 42500
},
{
"epoch": 2.88,
"learning_rate": 1.4232445845349072e-05,
"loss": 0.2378,
"step": 43000
},
{
"epoch": 2.92,
"learning_rate": 1.4165381262155458e-05,
"loss": 0.1869,
"step": 43500
},
{
"epoch": 2.95,
"learning_rate": 1.4098316678961841e-05,
"loss": 0.2229,
"step": 44000
},
{
"epoch": 2.98,
"learning_rate": 1.4031252095768226e-05,
"loss": 0.1752,
"step": 44500
},
{
"epoch": 3.02,
"learning_rate": 1.3964187512574609e-05,
"loss": 0.2121,
"step": 45000
},
{
"epoch": 3.05,
"learning_rate": 1.3897122929380995e-05,
"loss": 0.1928,
"step": 45500
},
{
"epoch": 3.08,
"learning_rate": 1.383005834618738e-05,
"loss": 0.0827,
"step": 46000
},
{
"epoch": 3.12,
"learning_rate": 1.3762993762993763e-05,
"loss": 0.2259,
"step": 46500
},
{
"epoch": 3.15,
"learning_rate": 1.369592917980015e-05,
"loss": 0.1447,
"step": 47000
},
{
"epoch": 3.19,
"learning_rate": 1.3628864596606532e-05,
"loss": 0.1481,
"step": 47500
},
{
"epoch": 3.22,
"learning_rate": 1.3561800013412919e-05,
"loss": 0.2343,
"step": 48000
},
{
"epoch": 3.25,
"learning_rate": 1.3494735430219302e-05,
"loss": 0.1454,
"step": 48500
},
{
"epoch": 3.29,
"learning_rate": 1.3427670847025686e-05,
"loss": 0.1152,
"step": 49000
},
{
"epoch": 3.32,
"learning_rate": 1.3360606263832073e-05,
"loss": 0.2361,
"step": 49500
},
{
"epoch": 3.35,
"learning_rate": 1.3293541680638456e-05,
"loss": 0.091,
"step": 50000
},
{
"epoch": 3.39,
"learning_rate": 1.322647709744484e-05,
"loss": 0.1912,
"step": 50500
},
{
"epoch": 3.42,
"learning_rate": 1.3159412514251225e-05,
"loss": 0.1125,
"step": 51000
},
{
"epoch": 3.45,
"learning_rate": 1.309234793105761e-05,
"loss": 0.1295,
"step": 51500
},
{
"epoch": 3.49,
"learning_rate": 1.3025283347863994e-05,
"loss": 0.106,
"step": 52000
},
{
"epoch": 3.52,
"learning_rate": 1.2958218764670379e-05,
"loss": 0.0838,
"step": 52500
},
{
"epoch": 3.55,
"learning_rate": 1.2891154181476764e-05,
"loss": 0.1616,
"step": 53000
},
{
"epoch": 3.59,
"learning_rate": 1.2824089598283147e-05,
"loss": 0.1084,
"step": 53500
},
{
"epoch": 3.62,
"learning_rate": 1.2757025015089533e-05,
"loss": 0.2535,
"step": 54000
},
{
"epoch": 3.66,
"learning_rate": 1.2689960431895916e-05,
"loss": 0.1363,
"step": 54500
},
{
"epoch": 3.69,
"learning_rate": 1.26228958487023e-05,
"loss": 0.2317,
"step": 55000
},
{
"epoch": 3.72,
"learning_rate": 1.2555831265508687e-05,
"loss": 0.2266,
"step": 55500
},
{
"epoch": 3.76,
"learning_rate": 1.248876668231507e-05,
"loss": 0.1864,
"step": 56000
},
{
"epoch": 3.79,
"learning_rate": 1.2421702099121456e-05,
"loss": 0.0946,
"step": 56500
},
{
"epoch": 3.82,
"learning_rate": 1.2354637515927839e-05,
"loss": 0.1919,
"step": 57000
},
{
"epoch": 3.86,
"learning_rate": 1.2287572932734224e-05,
"loss": 0.1106,
"step": 57500
},
{
"epoch": 3.89,
"learning_rate": 1.222050834954061e-05,
"loss": 0.1769,
"step": 58000
},
{
"epoch": 3.92,
"learning_rate": 1.2153443766346993e-05,
"loss": 0.089,
"step": 58500
},
{
"epoch": 3.96,
"learning_rate": 1.2086379183153378e-05,
"loss": 0.1087,
"step": 59000
},
{
"epoch": 3.99,
"learning_rate": 1.2019314599959762e-05,
"loss": 0.1879,
"step": 59500
},
{
"epoch": 4.02,
"learning_rate": 1.1952250016766147e-05,
"loss": 0.14,
"step": 60000
},
{
"epoch": 4.06,
"learning_rate": 1.188518543357253e-05,
"loss": 0.1331,
"step": 60500
},
{
"epoch": 4.09,
"learning_rate": 1.1818120850378916e-05,
"loss": 0.0841,
"step": 61000
},
{
"epoch": 4.12,
"learning_rate": 1.1751056267185301e-05,
"loss": 0.134,
"step": 61500
},
{
"epoch": 4.16,
"learning_rate": 1.1683991683991684e-05,
"loss": 0.1823,
"step": 62000
},
{
"epoch": 4.19,
"learning_rate": 1.161692710079807e-05,
"loss": 0.1005,
"step": 62500
},
{
"epoch": 4.23,
"learning_rate": 1.1549862517604453e-05,
"loss": 0.1003,
"step": 63000
},
{
"epoch": 4.26,
"learning_rate": 1.1482797934410838e-05,
"loss": 0.1729,
"step": 63500
},
{
"epoch": 4.29,
"learning_rate": 1.1415733351217224e-05,
"loss": 0.155,
"step": 64000
},
{
"epoch": 4.33,
"learning_rate": 1.1348668768023607e-05,
"loss": 0.1039,
"step": 64500
},
{
"epoch": 4.36,
"learning_rate": 1.1281604184829994e-05,
"loss": 0.1536,
"step": 65000
},
{
"epoch": 4.39,
"learning_rate": 1.1214539601636377e-05,
"loss": 0.1576,
"step": 65500
},
{
"epoch": 4.43,
"learning_rate": 1.1147475018442761e-05,
"loss": 0.1298,
"step": 66000
},
{
"epoch": 4.46,
"learning_rate": 1.1080410435249144e-05,
"loss": 0.1465,
"step": 66500
},
{
"epoch": 4.49,
"learning_rate": 1.101334585205553e-05,
"loss": 0.1322,
"step": 67000
},
{
"epoch": 4.53,
"learning_rate": 1.0946281268861915e-05,
"loss": 0.1322,
"step": 67500
},
{
"epoch": 4.56,
"learning_rate": 1.0879216685668298e-05,
"loss": 0.1597,
"step": 68000
},
{
"epoch": 4.59,
"learning_rate": 1.0812152102474685e-05,
"loss": 0.1188,
"step": 68500
},
{
"epoch": 4.63,
"learning_rate": 1.0745087519281068e-05,
"loss": 0.1614,
"step": 69000
},
{
"epoch": 4.66,
"learning_rate": 1.0678022936087454e-05,
"loss": 0.1174,
"step": 69500
},
{
"epoch": 4.69,
"learning_rate": 1.0610958352893839e-05,
"loss": 0.1336,
"step": 70000
},
{
"epoch": 4.73,
"learning_rate": 1.0543893769700222e-05,
"loss": 0.0941,
"step": 70500
},
{
"epoch": 4.76,
"learning_rate": 1.0476829186506608e-05,
"loss": 0.0724,
"step": 71000
},
{
"epoch": 4.8,
"learning_rate": 1.040976460331299e-05,
"loss": 0.1419,
"step": 71500
},
{
"epoch": 4.83,
"learning_rate": 1.0342700020119375e-05,
"loss": 0.1308,
"step": 72000
},
{
"epoch": 4.86,
"learning_rate": 1.027563543692576e-05,
"loss": 0.1128,
"step": 72500
},
{
"epoch": 4.9,
"learning_rate": 1.0208570853732145e-05,
"loss": 0.0805,
"step": 73000
},
{
"epoch": 4.93,
"learning_rate": 1.0141506270538531e-05,
"loss": 0.0361,
"step": 73500
},
{
"epoch": 4.96,
"learning_rate": 1.0074441687344914e-05,
"loss": 0.1846,
"step": 74000
},
{
"epoch": 5.0,
"learning_rate": 1.0007377104151299e-05,
"loss": 0.0949,
"step": 74500
},
{
"epoch": 5.03,
"learning_rate": 9.940312520957683e-06,
"loss": 0.1941,
"step": 75000
},
{
"epoch": 5.06,
"learning_rate": 9.873247937764068e-06,
"loss": 0.154,
"step": 75500
},
{
"epoch": 5.1,
"learning_rate": 9.806183354570453e-06,
"loss": 0.1002,
"step": 76000
},
{
"epoch": 5.13,
"learning_rate": 9.739118771376836e-06,
"loss": 0.0869,
"step": 76500
},
{
"epoch": 5.16,
"learning_rate": 9.67205418818322e-06,
"loss": 0.0587,
"step": 77000
},
{
"epoch": 5.2,
"learning_rate": 9.604989604989607e-06,
"loss": 0.1265,
"step": 77500
},
{
"epoch": 5.23,
"learning_rate": 9.537925021795991e-06,
"loss": 0.1247,
"step": 78000
},
{
"epoch": 5.26,
"learning_rate": 9.470860438602374e-06,
"loss": 0.0767,
"step": 78500
},
{
"epoch": 5.3,
"learning_rate": 9.403795855408759e-06,
"loss": 0.0519,
"step": 79000
},
{
"epoch": 5.33,
"learning_rate": 9.336731272215144e-06,
"loss": 0.0949,
"step": 79500
},
{
"epoch": 5.37,
"learning_rate": 9.269666689021528e-06,
"loss": 0.085,
"step": 80000
},
{
"epoch": 5.4,
"learning_rate": 9.202602105827913e-06,
"loss": 0.1271,
"step": 80500
},
{
"epoch": 5.43,
"learning_rate": 9.135537522634298e-06,
"loss": 0.2085,
"step": 81000
},
{
"epoch": 5.47,
"learning_rate": 9.068472939440682e-06,
"loss": 0.0912,
"step": 81500
},
{
"epoch": 5.5,
"learning_rate": 9.001408356247067e-06,
"loss": 0.1413,
"step": 82000
},
{
"epoch": 5.53,
"learning_rate": 8.934343773053452e-06,
"loss": 0.1015,
"step": 82500
},
{
"epoch": 5.57,
"learning_rate": 8.867279189859835e-06,
"loss": 0.1277,
"step": 83000
},
{
"epoch": 5.6,
"learning_rate": 8.80021460666622e-06,
"loss": 0.0239,
"step": 83500
},
{
"epoch": 5.63,
"learning_rate": 8.733150023472606e-06,
"loss": 0.1947,
"step": 84000
},
{
"epoch": 5.67,
"learning_rate": 8.66608544027899e-06,
"loss": 0.0589,
"step": 84500
},
{
"epoch": 5.7,
"learning_rate": 8.599020857085373e-06,
"loss": 0.112,
"step": 85000
},
{
"epoch": 5.73,
"learning_rate": 8.531956273891758e-06,
"loss": 0.0843,
"step": 85500
},
{
"epoch": 5.77,
"learning_rate": 8.464891690698143e-06,
"loss": 0.151,
"step": 86000
},
{
"epoch": 5.8,
"learning_rate": 8.397827107504527e-06,
"loss": 0.1011,
"step": 86500
},
{
"epoch": 5.83,
"learning_rate": 8.330762524310912e-06,
"loss": 0.045,
"step": 87000
},
{
"epoch": 5.87,
"learning_rate": 8.263697941117297e-06,
"loss": 0.0533,
"step": 87500
},
{
"epoch": 5.9,
"learning_rate": 8.196633357923681e-06,
"loss": 0.0512,
"step": 88000
},
{
"epoch": 5.94,
"learning_rate": 8.129568774730066e-06,
"loss": 0.2159,
"step": 88500
},
{
"epoch": 5.97,
"learning_rate": 8.06250419153645e-06,
"loss": 0.1077,
"step": 89000
},
{
"epoch": 6.0,
"learning_rate": 7.995439608342833e-06,
"loss": 0.11,
"step": 89500
},
{
"epoch": 6.04,
"learning_rate": 7.92837502514922e-06,
"loss": 0.1365,
"step": 90000
},
{
"epoch": 6.07,
"learning_rate": 7.861310441955604e-06,
"loss": 0.0742,
"step": 90500
},
{
"epoch": 6.1,
"learning_rate": 7.794245858761989e-06,
"loss": 0.0716,
"step": 91000
},
{
"epoch": 6.14,
"learning_rate": 7.727181275568372e-06,
"loss": 0.1481,
"step": 91500
},
{
"epoch": 6.17,
"learning_rate": 7.660116692374757e-06,
"loss": 0.008,
"step": 92000
},
{
"epoch": 6.2,
"learning_rate": 7.593052109181141e-06,
"loss": 0.0615,
"step": 92500
},
{
"epoch": 6.24,
"learning_rate": 7.525987525987527e-06,
"loss": 0.106,
"step": 93000
},
{
"epoch": 6.27,
"learning_rate": 7.4589229427939116e-06,
"loss": 0.0534,
"step": 93500
},
{
"epoch": 6.3,
"learning_rate": 7.391858359600295e-06,
"loss": 0.1034,
"step": 94000
},
{
"epoch": 6.34,
"learning_rate": 7.32479377640668e-06,
"loss": 0.107,
"step": 94500
},
{
"epoch": 6.37,
"learning_rate": 7.257729193213065e-06,
"loss": 0.0235,
"step": 95000
},
{
"epoch": 6.4,
"learning_rate": 7.1906646100194485e-06,
"loss": 0.0844,
"step": 95500
},
{
"epoch": 6.44,
"learning_rate": 7.123600026825834e-06,
"loss": 0.0495,
"step": 96000
},
{
"epoch": 6.47,
"learning_rate": 7.056535443632219e-06,
"loss": 0.0572,
"step": 96500
},
{
"epoch": 6.51,
"learning_rate": 6.989470860438603e-06,
"loss": 0.024,
"step": 97000
},
{
"epoch": 6.54,
"learning_rate": 6.922406277244987e-06,
"loss": 0.1605,
"step": 97500
},
{
"epoch": 6.57,
"learning_rate": 6.855341694051372e-06,
"loss": 0.1724,
"step": 98000
},
{
"epoch": 6.61,
"learning_rate": 6.7882771108577564e-06,
"loss": 0.0505,
"step": 98500
},
{
"epoch": 6.64,
"learning_rate": 6.721212527664142e-06,
"loss": 0.0549,
"step": 99000
},
{
"epoch": 6.67,
"learning_rate": 6.654147944470526e-06,
"loss": 0.1133,
"step": 99500
},
{
"epoch": 6.71,
"learning_rate": 6.58708336127691e-06,
"loss": 0.0441,
"step": 100000
},
{
"epoch": 6.74,
"learning_rate": 6.520018778083294e-06,
"loss": 0.0775,
"step": 100500
},
{
"epoch": 6.77,
"learning_rate": 6.452954194889679e-06,
"loss": 0.0319,
"step": 101000
},
{
"epoch": 6.81,
"learning_rate": 6.3858896116960635e-06,
"loss": 0.1535,
"step": 101500
},
{
"epoch": 6.84,
"learning_rate": 6.318825028502449e-06,
"loss": 0.0782,
"step": 102000
},
{
"epoch": 6.87,
"learning_rate": 6.251760445308833e-06,
"loss": 0.1007,
"step": 102500
},
{
"epoch": 6.91,
"learning_rate": 6.1846958621152175e-06,
"loss": 0.0677,
"step": 103000
},
{
"epoch": 6.94,
"learning_rate": 6.117631278921602e-06,
"loss": 0.0279,
"step": 103500
},
{
"epoch": 6.97,
"learning_rate": 6.050566695727986e-06,
"loss": 0.0685,
"step": 104000
},
{
"epoch": 7.01,
"learning_rate": 5.983502112534371e-06,
"loss": 0.0705,
"step": 104500
},
{
"epoch": 7.04,
"learning_rate": 5.916437529340756e-06,
"loss": 0.0993,
"step": 105000
},
{
"epoch": 7.08,
"learning_rate": 5.849372946147141e-06,
"loss": 0.0558,
"step": 105500
},
{
"epoch": 7.11,
"learning_rate": 5.782308362953525e-06,
"loss": 0.0672,
"step": 106000
},
{
"epoch": 7.14,
"learning_rate": 5.715243779759909e-06,
"loss": 0.0796,
"step": 106500
},
{
"epoch": 7.18,
"learning_rate": 5.648179196566294e-06,
"loss": 0.038,
"step": 107000
},
{
"epoch": 7.21,
"learning_rate": 5.581114613372678e-06,
"loss": 0.0612,
"step": 107500
},
{
"epoch": 7.24,
"learning_rate": 5.514050030179063e-06,
"loss": 0.0537,
"step": 108000
},
{
"epoch": 7.28,
"learning_rate": 5.446985446985448e-06,
"loss": 0.0291,
"step": 108500
},
{
"epoch": 7.31,
"learning_rate": 5.379920863791832e-06,
"loss": 0.0772,
"step": 109000
},
{
"epoch": 7.34,
"learning_rate": 5.312856280598216e-06,
"loss": 0.0997,
"step": 109500
},
{
"epoch": 7.38,
"learning_rate": 5.245791697404601e-06,
"loss": 0.0505,
"step": 110000
},
{
"epoch": 7.41,
"learning_rate": 5.178727114210985e-06,
"loss": 0.0636,
"step": 110500
},
{
"epoch": 7.44,
"learning_rate": 5.11166253101737e-06,
"loss": 0.0314,
"step": 111000
},
{
"epoch": 7.48,
"learning_rate": 5.044597947823755e-06,
"loss": 0.1147,
"step": 111500
},
{
"epoch": 7.51,
"learning_rate": 4.97753336463014e-06,
"loss": 0.0815,
"step": 112000
},
{
"epoch": 7.54,
"learning_rate": 4.9104687814365235e-06,
"loss": 0.016,
"step": 112500
},
{
"epoch": 7.58,
"learning_rate": 4.843404198242908e-06,
"loss": 0.0509,
"step": 113000
},
{
"epoch": 7.61,
"learning_rate": 4.776339615049293e-06,
"loss": 0.0365,
"step": 113500
},
{
"epoch": 7.65,
"learning_rate": 4.7092750318556775e-06,
"loss": 0.0374,
"step": 114000
},
{
"epoch": 7.68,
"learning_rate": 4.642210448662062e-06,
"loss": 0.1001,
"step": 114500
},
{
"epoch": 7.71,
"learning_rate": 4.575145865468447e-06,
"loss": 0.021,
"step": 115000
},
{
"epoch": 7.75,
"learning_rate": 4.508081282274831e-06,
"loss": 0.0171,
"step": 115500
},
{
"epoch": 7.78,
"learning_rate": 4.441016699081215e-06,
"loss": 0.0759,
"step": 116000
},
{
"epoch": 7.81,
"learning_rate": 4.3739521158876e-06,
"loss": 0.0804,
"step": 116500
},
{
"epoch": 7.85,
"learning_rate": 4.3068875326939846e-06,
"loss": 0.0418,
"step": 117000
},
{
"epoch": 7.88,
"learning_rate": 4.239822949500369e-06,
"loss": 0.1122,
"step": 117500
},
{
"epoch": 7.91,
"learning_rate": 4.172758366306754e-06,
"loss": 0.0737,
"step": 118000
},
{
"epoch": 7.95,
"learning_rate": 4.1056937831131385e-06,
"loss": 0.0526,
"step": 118500
},
{
"epoch": 7.98,
"learning_rate": 4.038629199919522e-06,
"loss": 0.0483,
"step": 119000
},
{
"epoch": 8.01,
"learning_rate": 3.971564616725908e-06,
"loss": 0.037,
"step": 119500
},
{
"epoch": 8.05,
"learning_rate": 3.904500033532292e-06,
"loss": 0.0687,
"step": 120000
},
{
"epoch": 8.08,
"learning_rate": 3.837435450338676e-06,
"loss": 0.023,
"step": 120500
},
{
"epoch": 8.11,
"learning_rate": 3.770370867145061e-06,
"loss": 0.1,
"step": 121000
},
{
"epoch": 8.15,
"learning_rate": 3.7033062839514456e-06,
"loss": 0.0659,
"step": 121500
},
{
"epoch": 8.18,
"learning_rate": 3.63624170075783e-06,
"loss": 0.0989,
"step": 122000
},
{
"epoch": 8.22,
"learning_rate": 3.569177117564215e-06,
"loss": 0.0603,
"step": 122500
},
{
"epoch": 8.25,
"learning_rate": 3.502112534370599e-06,
"loss": 0.0321,
"step": 123000
},
{
"epoch": 8.28,
"learning_rate": 3.4350479511769834e-06,
"loss": 0.0709,
"step": 123500
},
{
"epoch": 8.32,
"learning_rate": 3.3679833679833685e-06,
"loss": 0.0389,
"step": 124000
},
{
"epoch": 8.35,
"learning_rate": 3.3009187847897527e-06,
"loss": 0.036,
"step": 124500
},
{
"epoch": 8.38,
"learning_rate": 3.233854201596137e-06,
"loss": 0.04,
"step": 125000
},
{
"epoch": 8.42,
"learning_rate": 3.166789618402522e-06,
"loss": 0.0265,
"step": 125500
},
{
"epoch": 8.45,
"learning_rate": 3.0997250352089063e-06,
"loss": 0.0434,
"step": 126000
},
{
"epoch": 8.48,
"learning_rate": 3.032660452015291e-06,
"loss": 0.0001,
"step": 126500
},
{
"epoch": 8.52,
"learning_rate": 2.9655958688216756e-06,
"loss": 0.0224,
"step": 127000
},
{
"epoch": 8.55,
"learning_rate": 2.8985312856280603e-06,
"loss": 0.0199,
"step": 127500
},
{
"epoch": 8.58,
"learning_rate": 2.8314667024344445e-06,
"loss": 0.0482,
"step": 128000
},
{
"epoch": 8.62,
"learning_rate": 2.764402119240829e-06,
"loss": 0.0329,
"step": 128500
},
{
"epoch": 8.65,
"learning_rate": 2.697337536047214e-06,
"loss": 0.0515,
"step": 129000
},
{
"epoch": 8.68,
"learning_rate": 2.630272952853598e-06,
"loss": 0.0473,
"step": 129500
},
{
"epoch": 8.72,
"learning_rate": 2.563208369659983e-06,
"loss": 0.1201,
"step": 130000
},
{
"epoch": 8.75,
"learning_rate": 2.4961437864663674e-06,
"loss": 0.0295,
"step": 130500
},
{
"epoch": 8.79,
"learning_rate": 2.429079203272752e-06,
"loss": 0.0001,
"step": 131000
},
{
"epoch": 8.82,
"learning_rate": 2.3620146200791363e-06,
"loss": 0.0537,
"step": 131500
},
{
"epoch": 8.85,
"learning_rate": 2.294950036885521e-06,
"loss": 0.0821,
"step": 132000
},
{
"epoch": 8.89,
"learning_rate": 2.2278854536919056e-06,
"loss": 0.0665,
"step": 132500
},
{
"epoch": 8.92,
"learning_rate": 2.16082087049829e-06,
"loss": 0.0391,
"step": 133000
},
{
"epoch": 8.95,
"learning_rate": 2.0937562873046745e-06,
"loss": 0.0315,
"step": 133500
},
{
"epoch": 8.99,
"learning_rate": 2.026691704111059e-06,
"loss": 0.0431,
"step": 134000
},
{
"epoch": 9.02,
"learning_rate": 1.959627120917444e-06,
"loss": 0.0328,
"step": 134500
},
{
"epoch": 9.05,
"learning_rate": 1.8925625377238282e-06,
"loss": 0.0126,
"step": 135000
},
{
"epoch": 9.09,
"learning_rate": 1.8254979545302127e-06,
"loss": 0.0086,
"step": 135500
},
{
"epoch": 9.12,
"learning_rate": 1.7584333713365973e-06,
"loss": 0.0242,
"step": 136000
},
{
"epoch": 9.15,
"learning_rate": 1.6913687881429818e-06,
"loss": 0.0983,
"step": 136500
},
{
"epoch": 9.19,
"learning_rate": 1.6243042049493662e-06,
"loss": 0.0471,
"step": 137000
},
{
"epoch": 9.22,
"learning_rate": 1.557239621755751e-06,
"loss": 0.0347,
"step": 137500
},
{
"epoch": 9.25,
"learning_rate": 1.4901750385621356e-06,
"loss": 0.0387,
"step": 138000
},
{
"epoch": 9.29,
"learning_rate": 1.4231104553685198e-06,
"loss": 0.0398,
"step": 138500
},
{
"epoch": 9.32,
"learning_rate": 1.3560458721749044e-06,
"loss": 0.062,
"step": 139000
},
{
"epoch": 9.36,
"learning_rate": 1.288981288981289e-06,
"loss": 0.0003,
"step": 139500
},
{
"epoch": 9.39,
"learning_rate": 1.2219167057876738e-06,
"loss": 0.0466,
"step": 140000
},
{
"epoch": 9.42,
"learning_rate": 1.1548521225940582e-06,
"loss": 0.0385,
"step": 140500
},
{
"epoch": 9.46,
"learning_rate": 1.0877875394004427e-06,
"loss": 0.0581,
"step": 141000
},
{
"epoch": 9.49,
"learning_rate": 1.0207229562068273e-06,
"loss": 0.016,
"step": 141500
},
{
"epoch": 9.52,
"learning_rate": 9.536583730132118e-07,
"loss": 0.0202,
"step": 142000
},
{
"epoch": 9.56,
"learning_rate": 8.865937898195963e-07,
"loss": 0.0409,
"step": 142500
},
{
"epoch": 9.59,
"learning_rate": 8.19529206625981e-07,
"loss": 0.0327,
"step": 143000
},
{
"epoch": 9.62,
"learning_rate": 7.524646234323654e-07,
"loss": 0.0101,
"step": 143500
},
{
"epoch": 9.66,
"learning_rate": 6.8540004023875e-07,
"loss": 0.0539,
"step": 144000
},
{
"epoch": 9.69,
"learning_rate": 6.183354570451345e-07,
"loss": 0.0001,
"step": 144500
},
{
"epoch": 9.72,
"learning_rate": 5.512708738515191e-07,
"loss": 0.0521,
"step": 145000
},
{
"epoch": 9.76,
"learning_rate": 4.842062906579036e-07,
"loss": 0.0702,
"step": 145500
},
{
"epoch": 9.79,
"learning_rate": 4.1714170746428813e-07,
"loss": 0.0559,
"step": 146000
},
{
"epoch": 9.82,
"learning_rate": 3.500771242706727e-07,
"loss": 0.0394,
"step": 146500
},
{
"epoch": 9.86,
"learning_rate": 2.8301254107705723e-07,
"loss": 0.0164,
"step": 147000
},
{
"epoch": 9.89,
"learning_rate": 2.1594795788344176e-07,
"loss": 0.0194,
"step": 147500
},
{
"epoch": 9.93,
"learning_rate": 1.488833746898263e-07,
"loss": 0.0652,
"step": 148000
},
{
"epoch": 9.96,
"learning_rate": 8.181879149621086e-08,
"loss": 0.077,
"step": 148500
},
{
"epoch": 9.99,
"learning_rate": 1.4754208302595401e-08,
"loss": 0.0659,
"step": 149000
}
],
"logging_steps": 500,
"max_steps": 149110,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 9801062808576000.0,
"trial_name": null,
"trial_params": null
}