mistral_allFpML0 / checkpoint-14040 /trainer_state.json
YL95's picture
Upload folder using huggingface_hub
42ed185
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 18.846153846153847,
"eval_steps": 500,
"global_step": 14040,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 3.2763532763532763e-06,
"loss": 0.819,
"step": 46
},
{
"epoch": 0.39,
"learning_rate": 6.5527065527065525e-06,
"loss": 0.8698,
"step": 92
},
{
"epoch": 0.59,
"learning_rate": 9.82905982905983e-06,
"loss": 0.7667,
"step": 138
},
{
"epoch": 0.79,
"learning_rate": 1.3105413105413105e-05,
"loss": 0.6645,
"step": 184
},
{
"epoch": 0.98,
"learning_rate": 1.6381766381766382e-05,
"loss": 0.5342,
"step": 230
},
{
"epoch": 1.18,
"learning_rate": 1.965811965811966e-05,
"loss": 0.5067,
"step": 276
},
{
"epoch": 1.38,
"learning_rate": 2.2934472934472936e-05,
"loss": 0.434,
"step": 322
},
{
"epoch": 1.57,
"learning_rate": 2.621082621082621e-05,
"loss": 0.4473,
"step": 368
},
{
"epoch": 1.77,
"learning_rate": 2.948717948717949e-05,
"loss": 0.3901,
"step": 414
},
{
"epoch": 1.97,
"learning_rate": 3.2763532763532764e-05,
"loss": 0.3859,
"step": 460
},
{
"epoch": 2.16,
"learning_rate": 3.603988603988604e-05,
"loss": 0.3526,
"step": 506
},
{
"epoch": 2.36,
"learning_rate": 3.931623931623932e-05,
"loss": 0.3339,
"step": 552
},
{
"epoch": 2.56,
"learning_rate": 4.259259259259259e-05,
"loss": 0.343,
"step": 598
},
{
"epoch": 2.75,
"learning_rate": 4.586894586894587e-05,
"loss": 0.2998,
"step": 644
},
{
"epoch": 2.95,
"learning_rate": 4.9145299145299147e-05,
"loss": 0.2778,
"step": 690
},
{
"epoch": 3.15,
"learning_rate": 5.242165242165242e-05,
"loss": 0.3029,
"step": 736
},
{
"epoch": 1.06,
"learning_rate": 5.5698005698005694e-05,
"loss": 0.2557,
"step": 782
},
{
"epoch": 1.25,
"learning_rate": 5.897435897435898e-05,
"loss": 0.2605,
"step": 828
},
{
"epoch": 1.45,
"learning_rate": 6.225071225071225e-05,
"loss": 0.2566,
"step": 874
},
{
"epoch": 1.65,
"learning_rate": 6.552706552706553e-05,
"loss": 0.2409,
"step": 920
},
{
"epoch": 1.84,
"learning_rate": 6.880341880341881e-05,
"loss": 0.2518,
"step": 966
},
{
"epoch": 2.04,
"learning_rate": 7.207977207977208e-05,
"loss": 0.279,
"step": 1012
},
{
"epoch": 2.24,
"learning_rate": 7.535612535612536e-05,
"loss": 0.2421,
"step": 1058
},
{
"epoch": 2.43,
"learning_rate": 7.863247863247864e-05,
"loss": 0.2237,
"step": 1104
},
{
"epoch": 2.63,
"learning_rate": 8.190883190883192e-05,
"loss": 0.2057,
"step": 1150
},
{
"epoch": 2.82,
"learning_rate": 8.518518518518518e-05,
"loss": 0.2496,
"step": 1196
},
{
"epoch": 3.02,
"learning_rate": 8.846153846153847e-05,
"loss": 0.2397,
"step": 1242
},
{
"epoch": 3.22,
"learning_rate": 9.173789173789175e-05,
"loss": 0.2322,
"step": 1288
},
{
"epoch": 3.41,
"learning_rate": 9.501424501424501e-05,
"loss": 0.2046,
"step": 1334
},
{
"epoch": 3.61,
"learning_rate": 9.829059829059829e-05,
"loss": 0.2308,
"step": 1380
},
{
"epoch": 3.81,
"learning_rate": 9.982589427033872e-05,
"loss": 0.2052,
"step": 1426
},
{
"epoch": 4.0,
"learning_rate": 9.946185501741058e-05,
"loss": 0.1759,
"step": 1472
},
{
"epoch": 4.2,
"learning_rate": 9.909781576448244e-05,
"loss": 0.2116,
"step": 1518
},
{
"epoch": 2.11,
"learning_rate": 9.87337765115543e-05,
"loss": 0.1603,
"step": 1564
},
{
"epoch": 2.31,
"learning_rate": 9.836973725862614e-05,
"loss": 0.1892,
"step": 1610
},
{
"epoch": 2.5,
"learning_rate": 9.800569800569801e-05,
"loss": 0.1656,
"step": 1656
},
{
"epoch": 2.7,
"learning_rate": 9.764165875276987e-05,
"loss": 0.181,
"step": 1702
},
{
"epoch": 2.9,
"learning_rate": 9.727761949984172e-05,
"loss": 0.1727,
"step": 1748
},
{
"epoch": 3.09,
"learning_rate": 9.691358024691359e-05,
"loss": 0.1555,
"step": 1794
},
{
"epoch": 3.29,
"learning_rate": 9.654954099398545e-05,
"loss": 0.1152,
"step": 1840
},
{
"epoch": 3.49,
"learning_rate": 9.61855017410573e-05,
"loss": 0.1404,
"step": 1886
},
{
"epoch": 3.68,
"learning_rate": 9.582146248812915e-05,
"loss": 0.1635,
"step": 1932
},
{
"epoch": 3.88,
"learning_rate": 9.545742323520101e-05,
"loss": 0.1429,
"step": 1978
},
{
"epoch": 4.08,
"learning_rate": 9.509338398227288e-05,
"loss": 0.1369,
"step": 2024
},
{
"epoch": 4.27,
"learning_rate": 9.472934472934474e-05,
"loss": 0.1482,
"step": 2070
},
{
"epoch": 4.47,
"learning_rate": 9.436530547641659e-05,
"loss": 0.1388,
"step": 2116
},
{
"epoch": 4.67,
"learning_rate": 9.400126622348845e-05,
"loss": 0.1602,
"step": 2162
},
{
"epoch": 4.86,
"learning_rate": 9.36372269705603e-05,
"loss": 0.1764,
"step": 2208
},
{
"epoch": 5.06,
"learning_rate": 9.327318771763217e-05,
"loss": 0.16,
"step": 2254
},
{
"epoch": 5.26,
"learning_rate": 9.290914846470402e-05,
"loss": 0.133,
"step": 2300
},
{
"epoch": 3.17,
"learning_rate": 9.254510921177588e-05,
"loss": 0.1226,
"step": 2346
},
{
"epoch": 3.36,
"learning_rate": 9.218106995884775e-05,
"loss": 0.1062,
"step": 2392
},
{
"epoch": 3.56,
"learning_rate": 9.181703070591961e-05,
"loss": 0.1184,
"step": 2438
},
{
"epoch": 3.76,
"learning_rate": 9.145299145299146e-05,
"loss": 0.107,
"step": 2484
},
{
"epoch": 3.95,
"learning_rate": 9.108895220006331e-05,
"loss": 0.1113,
"step": 2530
},
{
"epoch": 4.15,
"learning_rate": 9.072491294713517e-05,
"loss": 0.13,
"step": 2576
},
{
"epoch": 4.35,
"learning_rate": 9.036087369420702e-05,
"loss": 0.1066,
"step": 2622
},
{
"epoch": 4.54,
"learning_rate": 8.999683444127889e-05,
"loss": 0.1131,
"step": 2668
},
{
"epoch": 4.74,
"learning_rate": 8.963279518835075e-05,
"loss": 0.1117,
"step": 2714
},
{
"epoch": 4.94,
"learning_rate": 8.926875593542262e-05,
"loss": 0.1143,
"step": 2760
},
{
"epoch": 5.13,
"learning_rate": 8.890471668249447e-05,
"loss": 0.1041,
"step": 2806
},
{
"epoch": 5.33,
"learning_rate": 8.854067742956632e-05,
"loss": 0.1041,
"step": 2852
},
{
"epoch": 5.53,
"learning_rate": 8.817663817663818e-05,
"loss": 0.1205,
"step": 2898
},
{
"epoch": 5.72,
"learning_rate": 8.781259892371004e-05,
"loss": 0.1214,
"step": 2944
},
{
"epoch": 5.92,
"learning_rate": 8.74485596707819e-05,
"loss": 0.117,
"step": 2990
},
{
"epoch": 6.12,
"learning_rate": 8.708452041785376e-05,
"loss": 0.1115,
"step": 3036
},
{
"epoch": 4.03,
"learning_rate": 8.672048116492562e-05,
"loss": 0.106,
"step": 3082
},
{
"epoch": 4.22,
"learning_rate": 8.635644191199747e-05,
"loss": 0.0876,
"step": 3128
},
{
"epoch": 4.42,
"learning_rate": 8.599240265906932e-05,
"loss": 0.0827,
"step": 3174
},
{
"epoch": 4.62,
"learning_rate": 8.562836340614119e-05,
"loss": 0.0842,
"step": 3220
},
{
"epoch": 4.81,
"learning_rate": 8.526432415321305e-05,
"loss": 0.0933,
"step": 3266
},
{
"epoch": 5.01,
"learning_rate": 8.490028490028491e-05,
"loss": 0.0908,
"step": 3312
},
{
"epoch": 5.21,
"learning_rate": 8.453624564735676e-05,
"loss": 0.089,
"step": 3358
},
{
"epoch": 5.4,
"learning_rate": 8.417220639442863e-05,
"loss": 0.084,
"step": 3404
},
{
"epoch": 5.6,
"learning_rate": 8.380816714150048e-05,
"loss": 0.0745,
"step": 3450
},
{
"epoch": 5.79,
"learning_rate": 8.344412788857233e-05,
"loss": 0.0786,
"step": 3496
},
{
"epoch": 5.99,
"learning_rate": 8.308008863564419e-05,
"loss": 0.077,
"step": 3542
},
{
"epoch": 6.19,
"learning_rate": 8.271604938271605e-05,
"loss": 0.0882,
"step": 3588
},
{
"epoch": 6.38,
"learning_rate": 8.235201012978792e-05,
"loss": 0.0902,
"step": 3634
},
{
"epoch": 6.58,
"learning_rate": 8.198797087685977e-05,
"loss": 0.0879,
"step": 3680
},
{
"epoch": 6.78,
"learning_rate": 8.162393162393163e-05,
"loss": 0.0881,
"step": 3726
},
{
"epoch": 6.97,
"learning_rate": 8.125989237100348e-05,
"loss": 0.0955,
"step": 3772
},
{
"epoch": 7.17,
"learning_rate": 8.089585311807535e-05,
"loss": 0.0917,
"step": 3818
},
{
"epoch": 5.08,
"learning_rate": 8.05318138651472e-05,
"loss": 0.0851,
"step": 3864
},
{
"epoch": 5.28,
"learning_rate": 8.016777461221906e-05,
"loss": 0.0638,
"step": 3910
},
{
"epoch": 5.47,
"learning_rate": 7.980373535929092e-05,
"loss": 0.0795,
"step": 3956
},
{
"epoch": 5.67,
"learning_rate": 7.943969610636279e-05,
"loss": 0.0671,
"step": 4002
},
{
"epoch": 5.87,
"learning_rate": 7.907565685343464e-05,
"loss": 0.0779,
"step": 4048
},
{
"epoch": 6.06,
"learning_rate": 7.871161760050649e-05,
"loss": 0.0722,
"step": 4094
},
{
"epoch": 6.26,
"learning_rate": 7.834757834757835e-05,
"loss": 0.0594,
"step": 4140
},
{
"epoch": 6.46,
"learning_rate": 7.79835390946502e-05,
"loss": 0.0774,
"step": 4186
},
{
"epoch": 6.65,
"learning_rate": 7.761949984172207e-05,
"loss": 0.0727,
"step": 4232
},
{
"epoch": 6.85,
"learning_rate": 7.725546058879393e-05,
"loss": 0.0621,
"step": 4278
},
{
"epoch": 7.05,
"learning_rate": 7.68914213358658e-05,
"loss": 0.0619,
"step": 4324
},
{
"epoch": 7.24,
"learning_rate": 7.652738208293764e-05,
"loss": 0.0633,
"step": 4370
},
{
"epoch": 7.44,
"learning_rate": 7.61633428300095e-05,
"loss": 0.0585,
"step": 4416
},
{
"epoch": 7.64,
"learning_rate": 7.579930357708136e-05,
"loss": 0.0743,
"step": 4462
},
{
"epoch": 7.83,
"learning_rate": 7.543526432415322e-05,
"loss": 0.0751,
"step": 4508
},
{
"epoch": 8.03,
"learning_rate": 7.507122507122507e-05,
"loss": 0.0597,
"step": 4554
},
{
"epoch": 8.23,
"learning_rate": 7.470718581829694e-05,
"loss": 0.0692,
"step": 4600
},
{
"epoch": 6.14,
"learning_rate": 7.43431465653688e-05,
"loss": 0.0595,
"step": 4646
},
{
"epoch": 6.33,
"learning_rate": 7.397910731244065e-05,
"loss": 0.0505,
"step": 4692
},
{
"epoch": 6.53,
"learning_rate": 7.36150680595125e-05,
"loss": 0.0516,
"step": 4738
},
{
"epoch": 6.73,
"learning_rate": 7.325102880658436e-05,
"loss": 0.0577,
"step": 4784
},
{
"epoch": 6.92,
"learning_rate": 7.288698955365623e-05,
"loss": 0.0546,
"step": 4830
},
{
"epoch": 7.12,
"learning_rate": 7.252295030072809e-05,
"loss": 0.0472,
"step": 4876
},
{
"epoch": 7.32,
"learning_rate": 7.215891104779994e-05,
"loss": 0.0565,
"step": 4922
},
{
"epoch": 7.51,
"learning_rate": 7.17948717948718e-05,
"loss": 0.0545,
"step": 4968
},
{
"epoch": 7.71,
"learning_rate": 7.143083254194365e-05,
"loss": 0.0399,
"step": 5014
},
{
"epoch": 7.91,
"learning_rate": 7.10667932890155e-05,
"loss": 0.0536,
"step": 5060
},
{
"epoch": 8.1,
"learning_rate": 7.070275403608737e-05,
"loss": 0.0567,
"step": 5106
},
{
"epoch": 8.3,
"learning_rate": 7.033871478315923e-05,
"loss": 0.0608,
"step": 5152
},
{
"epoch": 8.5,
"learning_rate": 6.99746755302311e-05,
"loss": 0.0517,
"step": 5198
},
{
"epoch": 8.69,
"learning_rate": 6.961063627730295e-05,
"loss": 0.0514,
"step": 5244
},
{
"epoch": 8.89,
"learning_rate": 6.924659702437481e-05,
"loss": 0.0525,
"step": 5290
},
{
"epoch": 9.09,
"learning_rate": 6.888255777144666e-05,
"loss": 0.0554,
"step": 5336
},
{
"epoch": 9.28,
"learning_rate": 6.851851851851852e-05,
"loss": 0.0696,
"step": 5382
},
{
"epoch": 7.19,
"learning_rate": 6.815447926559037e-05,
"loss": 0.0358,
"step": 5428
},
{
"epoch": 7.39,
"learning_rate": 6.779044001266224e-05,
"loss": 0.0413,
"step": 5474
},
{
"epoch": 7.59,
"learning_rate": 6.74264007597341e-05,
"loss": 0.0358,
"step": 5520
},
{
"epoch": 7.78,
"learning_rate": 6.706236150680597e-05,
"loss": 0.0483,
"step": 5566
},
{
"epoch": 7.98,
"learning_rate": 6.669832225387782e-05,
"loss": 0.039,
"step": 5612
},
{
"epoch": 8.18,
"learning_rate": 6.633428300094967e-05,
"loss": 0.0381,
"step": 5658
},
{
"epoch": 8.37,
"learning_rate": 6.597024374802153e-05,
"loss": 0.0502,
"step": 5704
},
{
"epoch": 8.57,
"learning_rate": 6.560620449509338e-05,
"loss": 0.0421,
"step": 5750
},
{
"epoch": 8.76,
"learning_rate": 6.524216524216524e-05,
"loss": 0.0447,
"step": 5796
},
{
"epoch": 8.96,
"learning_rate": 6.487812598923711e-05,
"loss": 0.0406,
"step": 5842
},
{
"epoch": 9.16,
"learning_rate": 6.451408673630897e-05,
"loss": 0.0446,
"step": 5888
},
{
"epoch": 9.35,
"learning_rate": 6.415004748338082e-05,
"loss": 0.0483,
"step": 5934
},
{
"epoch": 9.55,
"learning_rate": 6.378600823045267e-05,
"loss": 0.0432,
"step": 5980
},
{
"epoch": 9.75,
"learning_rate": 6.342196897752454e-05,
"loss": 0.0483,
"step": 6026
},
{
"epoch": 9.94,
"learning_rate": 6.30579297245964e-05,
"loss": 0.0493,
"step": 6072
},
{
"epoch": 10.14,
"learning_rate": 6.269389047166825e-05,
"loss": 0.0448,
"step": 6118
},
{
"epoch": 8.05,
"learning_rate": 6.232985121874011e-05,
"loss": 0.037,
"step": 6164
},
{
"epoch": 8.25,
"learning_rate": 6.196581196581198e-05,
"loss": 0.0319,
"step": 6210
},
{
"epoch": 8.44,
"learning_rate": 6.160177271288383e-05,
"loss": 0.0341,
"step": 6256
},
{
"epoch": 8.64,
"learning_rate": 6.123773345995568e-05,
"loss": 0.0322,
"step": 6302
},
{
"epoch": 8.84,
"learning_rate": 6.087369420702754e-05,
"loss": 0.0375,
"step": 6348
},
{
"epoch": 9.03,
"learning_rate": 6.0509654954099404e-05,
"loss": 0.0291,
"step": 6394
},
{
"epoch": 9.23,
"learning_rate": 6.014561570117127e-05,
"loss": 0.0322,
"step": 6440
},
{
"epoch": 9.43,
"learning_rate": 5.978157644824312e-05,
"loss": 0.0315,
"step": 6486
},
{
"epoch": 9.62,
"learning_rate": 5.9417537195314975e-05,
"loss": 0.0341,
"step": 6532
},
{
"epoch": 9.82,
"learning_rate": 5.905349794238684e-05,
"loss": 0.0314,
"step": 6578
},
{
"epoch": 10.02,
"learning_rate": 5.868945868945869e-05,
"loss": 0.0338,
"step": 6624
},
{
"epoch": 10.21,
"learning_rate": 5.8325419436530546e-05,
"loss": 0.0311,
"step": 6670
},
{
"epoch": 10.41,
"learning_rate": 5.796138018360241e-05,
"loss": 0.0335,
"step": 6716
},
{
"epoch": 10.61,
"learning_rate": 5.7597340930674274e-05,
"loss": 0.0409,
"step": 6762
},
{
"epoch": 10.8,
"learning_rate": 5.7233301677746124e-05,
"loss": 0.0332,
"step": 6808
},
{
"epoch": 11.0,
"learning_rate": 5.686926242481798e-05,
"loss": 0.0456,
"step": 6854
},
{
"epoch": 11.2,
"learning_rate": 5.6505223171889845e-05,
"loss": 0.0311,
"step": 6900
},
{
"epoch": 9.11,
"learning_rate": 5.61411839189617e-05,
"loss": 0.0331,
"step": 6946
},
{
"epoch": 9.3,
"learning_rate": 5.577714466603355e-05,
"loss": 0.0234,
"step": 6992
},
{
"epoch": 9.5,
"learning_rate": 5.5413105413105416e-05,
"loss": 0.0214,
"step": 7038
},
{
"epoch": 9.7,
"learning_rate": 5.504906616017728e-05,
"loss": 0.0266,
"step": 7084
},
{
"epoch": 9.89,
"learning_rate": 5.4685026907249136e-05,
"loss": 0.0279,
"step": 7130
},
{
"epoch": 10.09,
"learning_rate": 5.4320987654320986e-05,
"loss": 0.0256,
"step": 7176
},
{
"epoch": 10.29,
"learning_rate": 5.395694840139285e-05,
"loss": 0.0256,
"step": 7222
},
{
"epoch": 10.48,
"learning_rate": 5.359290914846471e-05,
"loss": 0.0277,
"step": 7268
},
{
"epoch": 10.68,
"learning_rate": 5.322886989553656e-05,
"loss": 0.0237,
"step": 7314
},
{
"epoch": 10.88,
"learning_rate": 5.286483064260842e-05,
"loss": 0.0286,
"step": 7360
},
{
"epoch": 11.07,
"learning_rate": 5.2500791389680285e-05,
"loss": 0.0285,
"step": 7406
},
{
"epoch": 11.27,
"learning_rate": 5.213675213675214e-05,
"loss": 0.0325,
"step": 7452
},
{
"epoch": 11.47,
"learning_rate": 5.177271288382399e-05,
"loss": 0.0345,
"step": 7498
},
{
"epoch": 11.66,
"learning_rate": 5.1408673630895856e-05,
"loss": 0.0254,
"step": 7544
},
{
"epoch": 11.86,
"learning_rate": 5.104463437796771e-05,
"loss": 0.0277,
"step": 7590
},
{
"epoch": 12.06,
"learning_rate": 5.0680595125039577e-05,
"loss": 0.0242,
"step": 7636
},
{
"epoch": 12.25,
"learning_rate": 5.031655587211143e-05,
"loss": 0.0277,
"step": 7682
},
{
"epoch": 10.16,
"learning_rate": 4.995251661918329e-05,
"loss": 0.0188,
"step": 7728
},
{
"epoch": 10.36,
"learning_rate": 4.958847736625515e-05,
"loss": 0.0218,
"step": 7774
},
{
"epoch": 10.56,
"learning_rate": 4.9224438113327004e-05,
"loss": 0.0244,
"step": 7820
},
{
"epoch": 10.75,
"learning_rate": 4.886039886039887e-05,
"loss": 0.0204,
"step": 7866
},
{
"epoch": 10.95,
"learning_rate": 4.849635960747072e-05,
"loss": 0.0224,
"step": 7912
},
{
"epoch": 11.15,
"learning_rate": 4.813232035454258e-05,
"loss": 0.0205,
"step": 7958
},
{
"epoch": 11.34,
"learning_rate": 4.776828110161444e-05,
"loss": 0.027,
"step": 8004
},
{
"epoch": 11.54,
"learning_rate": 4.7404241848686296e-05,
"loss": 0.0183,
"step": 8050
},
{
"epoch": 11.74,
"learning_rate": 4.704020259575815e-05,
"loss": 0.0199,
"step": 8096
},
{
"epoch": 11.93,
"learning_rate": 4.667616334283001e-05,
"loss": 0.0227,
"step": 8142
},
{
"epoch": 12.13,
"learning_rate": 4.6312124089901874e-05,
"loss": 0.0232,
"step": 8188
},
{
"epoch": 12.32,
"learning_rate": 4.5948084836973724e-05,
"loss": 0.0221,
"step": 8234
},
{
"epoch": 12.52,
"learning_rate": 4.558404558404559e-05,
"loss": 0.0207,
"step": 8280
},
{
"epoch": 12.72,
"learning_rate": 4.5220006331117445e-05,
"loss": 0.0213,
"step": 8326
},
{
"epoch": 12.91,
"learning_rate": 4.48559670781893e-05,
"loss": 0.0201,
"step": 8372
},
{
"epoch": 13.11,
"learning_rate": 4.449192782526116e-05,
"loss": 0.0196,
"step": 8418
},
{
"epoch": 11.02,
"learning_rate": 4.412788857233302e-05,
"loss": 0.0197,
"step": 8464
},
{
"epoch": 11.22,
"learning_rate": 4.376384931940488e-05,
"loss": 0.0154,
"step": 8510
},
{
"epoch": 11.41,
"learning_rate": 4.3399810066476736e-05,
"loss": 0.0177,
"step": 8556
},
{
"epoch": 11.61,
"learning_rate": 4.303577081354859e-05,
"loss": 0.0158,
"step": 8602
},
{
"epoch": 11.81,
"learning_rate": 4.267173156062046e-05,
"loss": 0.0158,
"step": 8648
},
{
"epoch": 12.0,
"learning_rate": 4.230769230769231e-05,
"loss": 0.0163,
"step": 8694
},
{
"epoch": 12.2,
"learning_rate": 4.194365305476417e-05,
"loss": 0.0184,
"step": 8740
},
{
"epoch": 12.4,
"learning_rate": 4.157961380183603e-05,
"loss": 0.016,
"step": 8786
},
{
"epoch": 12.59,
"learning_rate": 4.1215574548907885e-05,
"loss": 0.0154,
"step": 8832
},
{
"epoch": 12.79,
"learning_rate": 4.085153529597974e-05,
"loss": 0.0168,
"step": 8878
},
{
"epoch": 12.99,
"learning_rate": 4.04874960430516e-05,
"loss": 0.0165,
"step": 8924
},
{
"epoch": 13.18,
"learning_rate": 4.012345679012346e-05,
"loss": 0.0143,
"step": 8970
},
{
"epoch": 13.38,
"learning_rate": 3.975941753719531e-05,
"loss": 0.0188,
"step": 9016
},
{
"epoch": 13.58,
"learning_rate": 3.9395378284267176e-05,
"loss": 0.0159,
"step": 9062
},
{
"epoch": 13.77,
"learning_rate": 3.903133903133903e-05,
"loss": 0.0192,
"step": 9108
},
{
"epoch": 13.97,
"learning_rate": 3.866729977841089e-05,
"loss": 0.018,
"step": 9154
},
{
"epoch": 14.17,
"learning_rate": 3.830326052548275e-05,
"loss": 0.0194,
"step": 9200
},
{
"epoch": 12.08,
"learning_rate": 3.793922127255461e-05,
"loss": 0.0139,
"step": 9246
},
{
"epoch": 12.27,
"learning_rate": 3.757518201962647e-05,
"loss": 0.0122,
"step": 9292
},
{
"epoch": 12.47,
"learning_rate": 3.7211142766698325e-05,
"loss": 0.015,
"step": 9338
},
{
"epoch": 12.67,
"learning_rate": 3.684710351377018e-05,
"loss": 0.0112,
"step": 9384
},
{
"epoch": 12.86,
"learning_rate": 3.6483064260842046e-05,
"loss": 0.0134,
"step": 9430
},
{
"epoch": 13.06,
"learning_rate": 3.6119025007913896e-05,
"loss": 0.0139,
"step": 9476
},
{
"epoch": 13.26,
"learning_rate": 3.575498575498576e-05,
"loss": 0.0129,
"step": 9522
},
{
"epoch": 13.45,
"learning_rate": 3.539094650205762e-05,
"loss": 0.0147,
"step": 9568
},
{
"epoch": 13.65,
"learning_rate": 3.5026907249129474e-05,
"loss": 0.0133,
"step": 9614
},
{
"epoch": 13.85,
"learning_rate": 3.466286799620133e-05,
"loss": 0.0135,
"step": 9660
},
{
"epoch": 14.04,
"learning_rate": 3.429882874327319e-05,
"loss": 0.015,
"step": 9706
},
{
"epoch": 14.24,
"learning_rate": 3.393478949034505e-05,
"loss": 0.0128,
"step": 9752
},
{
"epoch": 14.44,
"learning_rate": 3.35707502374169e-05,
"loss": 0.0135,
"step": 9798
},
{
"epoch": 14.63,
"learning_rate": 3.3206710984488765e-05,
"loss": 0.0137,
"step": 9844
},
{
"epoch": 14.83,
"learning_rate": 3.284267173156062e-05,
"loss": 0.013,
"step": 9890
},
{
"epoch": 15.03,
"learning_rate": 3.247863247863248e-05,
"loss": 0.0157,
"step": 9936
},
{
"epoch": 15.22,
"learning_rate": 3.2114593225704336e-05,
"loss": 0.0139,
"step": 9982
},
{
"epoch": 13.13,
"learning_rate": 3.17505539727762e-05,
"loss": 0.0103,
"step": 10028
},
{
"epoch": 13.33,
"learning_rate": 3.138651471984806e-05,
"loss": 0.0095,
"step": 10074
},
{
"epoch": 13.53,
"learning_rate": 3.1022475466919914e-05,
"loss": 0.0114,
"step": 10120
},
{
"epoch": 13.72,
"learning_rate": 3.065843621399177e-05,
"loss": 0.0114,
"step": 10166
},
{
"epoch": 13.92,
"learning_rate": 3.029439696106363e-05,
"loss": 0.0097,
"step": 10212
},
{
"epoch": 14.12,
"learning_rate": 2.9930357708135488e-05,
"loss": 0.0094,
"step": 10258
},
{
"epoch": 14.31,
"learning_rate": 2.956631845520735e-05,
"loss": 0.0109,
"step": 10304
},
{
"epoch": 14.51,
"learning_rate": 2.9202279202279202e-05,
"loss": 0.0105,
"step": 10350
},
{
"epoch": 14.71,
"learning_rate": 2.883823994935106e-05,
"loss": 0.0103,
"step": 10396
},
{
"epoch": 14.9,
"learning_rate": 2.847420069642292e-05,
"loss": 0.0111,
"step": 10442
},
{
"epoch": 15.1,
"learning_rate": 2.8110161443494776e-05,
"loss": 0.012,
"step": 10488
},
{
"epoch": 15.29,
"learning_rate": 2.7746122190566637e-05,
"loss": 0.0104,
"step": 10534
},
{
"epoch": 15.49,
"learning_rate": 2.7382082937638494e-05,
"loss": 0.0105,
"step": 10580
},
{
"epoch": 15.69,
"learning_rate": 2.7018043684710354e-05,
"loss": 0.0096,
"step": 10626
},
{
"epoch": 15.88,
"learning_rate": 2.6654004431782208e-05,
"loss": 0.0126,
"step": 10672
},
{
"epoch": 16.08,
"learning_rate": 2.628996517885407e-05,
"loss": 0.012,
"step": 10718
},
{
"epoch": 16.28,
"learning_rate": 2.5925925925925925e-05,
"loss": 0.0091,
"step": 10764
},
{
"epoch": 14.19,
"learning_rate": 2.5561886672997785e-05,
"loss": 0.0086,
"step": 10810
},
{
"epoch": 14.38,
"learning_rate": 2.5197847420069642e-05,
"loss": 0.0095,
"step": 10856
},
{
"epoch": 14.58,
"learning_rate": 2.48338081671415e-05,
"loss": 0.0074,
"step": 10902
},
{
"epoch": 14.78,
"learning_rate": 2.446976891421336e-05,
"loss": 0.0085,
"step": 10948
},
{
"epoch": 14.97,
"learning_rate": 2.4105729661285217e-05,
"loss": 0.0092,
"step": 10994
},
{
"epoch": 15.17,
"learning_rate": 2.3741690408357077e-05,
"loss": 0.0078,
"step": 11040
},
{
"epoch": 15.37,
"learning_rate": 2.3377651155428934e-05,
"loss": 0.0089,
"step": 11086
},
{
"epoch": 15.56,
"learning_rate": 2.301361190250079e-05,
"loss": 0.0083,
"step": 11132
},
{
"epoch": 15.76,
"learning_rate": 2.264957264957265e-05,
"loss": 0.0079,
"step": 11178
},
{
"epoch": 15.96,
"learning_rate": 2.2285533396644508e-05,
"loss": 0.0091,
"step": 11224
},
{
"epoch": 16.15,
"learning_rate": 2.192149414371637e-05,
"loss": 0.0082,
"step": 11270
},
{
"epoch": 16.35,
"learning_rate": 2.1557454890788225e-05,
"loss": 0.0084,
"step": 11316
},
{
"epoch": 16.55,
"learning_rate": 2.1193415637860082e-05,
"loss": 0.0087,
"step": 11362
},
{
"epoch": 16.74,
"learning_rate": 2.0829376384931943e-05,
"loss": 0.0088,
"step": 11408
},
{
"epoch": 16.94,
"learning_rate": 2.04653371320038e-05,
"loss": 0.0098,
"step": 11454
},
{
"epoch": 17.14,
"learning_rate": 2.010129787907566e-05,
"loss": 0.0087,
"step": 11500
},
{
"epoch": 15.05,
"learning_rate": 1.9737258626147517e-05,
"loss": 0.0087,
"step": 11546
},
{
"epoch": 15.24,
"learning_rate": 1.9373219373219374e-05,
"loss": 0.0066,
"step": 11592
},
{
"epoch": 15.44,
"learning_rate": 1.900918012029123e-05,
"loss": 0.0071,
"step": 11638
},
{
"epoch": 15.64,
"learning_rate": 1.8645140867363088e-05,
"loss": 0.0079,
"step": 11684
},
{
"epoch": 15.83,
"learning_rate": 1.828110161443495e-05,
"loss": 0.0074,
"step": 11730
},
{
"epoch": 16.03,
"learning_rate": 1.7917062361506805e-05,
"loss": 0.0069,
"step": 11776
},
{
"epoch": 16.23,
"learning_rate": 1.7553023108578666e-05,
"loss": 0.0063,
"step": 11822
},
{
"epoch": 16.42,
"learning_rate": 1.7188983855650523e-05,
"loss": 0.0075,
"step": 11868
},
{
"epoch": 16.62,
"learning_rate": 1.682494460272238e-05,
"loss": 0.0074,
"step": 11914
},
{
"epoch": 16.82,
"learning_rate": 1.646090534979424e-05,
"loss": 0.0067,
"step": 11960
},
{
"epoch": 17.01,
"learning_rate": 1.6096866096866097e-05,
"loss": 0.0064,
"step": 12006
},
{
"epoch": 17.21,
"learning_rate": 1.5732826843937957e-05,
"loss": 0.0068,
"step": 12052
},
{
"epoch": 17.41,
"learning_rate": 1.5368787591009814e-05,
"loss": 0.0076,
"step": 12098
},
{
"epoch": 17.6,
"learning_rate": 1.5004748338081673e-05,
"loss": 0.0069,
"step": 12144
},
{
"epoch": 17.8,
"learning_rate": 1.4640709085153532e-05,
"loss": 0.0075,
"step": 12190
},
{
"epoch": 18.0,
"learning_rate": 1.427666983222539e-05,
"loss": 0.0072,
"step": 12236
},
{
"epoch": 18.19,
"learning_rate": 1.3912630579297247e-05,
"loss": 0.0066,
"step": 12282
},
{
"epoch": 16.1,
"learning_rate": 1.3548591326369106e-05,
"loss": 0.0069,
"step": 12328
},
{
"epoch": 16.3,
"learning_rate": 1.3184552073440961e-05,
"loss": 0.0052,
"step": 12374
},
{
"epoch": 16.5,
"learning_rate": 1.282051282051282e-05,
"loss": 0.0064,
"step": 12420
},
{
"epoch": 16.69,
"learning_rate": 1.245647356758468e-05,
"loss": 0.0063,
"step": 12466
},
{
"epoch": 16.89,
"learning_rate": 1.2092434314656539e-05,
"loss": 0.006,
"step": 12512
},
{
"epoch": 17.09,
"learning_rate": 1.1728395061728396e-05,
"loss": 0.0057,
"step": 12558
},
{
"epoch": 17.28,
"learning_rate": 1.1364355808800253e-05,
"loss": 0.0061,
"step": 12604
},
{
"epoch": 17.48,
"learning_rate": 1.1000316555872111e-05,
"loss": 0.0055,
"step": 12650
},
{
"epoch": 17.68,
"learning_rate": 1.063627730294397e-05,
"loss": 0.0053,
"step": 12696
},
{
"epoch": 17.87,
"learning_rate": 1.0272238050015829e-05,
"loss": 0.0051,
"step": 12742
},
{
"epoch": 18.07,
"learning_rate": 9.908198797087687e-06,
"loss": 0.0064,
"step": 12788
},
{
"epoch": 18.26,
"learning_rate": 9.544159544159544e-06,
"loss": 0.0056,
"step": 12834
},
{
"epoch": 18.46,
"learning_rate": 9.180120291231403e-06,
"loss": 0.0062,
"step": 12880
},
{
"epoch": 18.66,
"learning_rate": 8.816081038303262e-06,
"loss": 0.0065,
"step": 12926
},
{
"epoch": 18.85,
"learning_rate": 8.452041785375119e-06,
"loss": 0.0065,
"step": 12972
},
{
"epoch": 19.05,
"learning_rate": 8.088002532446977e-06,
"loss": 0.0064,
"step": 13018
},
{
"epoch": 19.25,
"learning_rate": 7.723963279518836e-06,
"loss": 0.0062,
"step": 13064
},
{
"epoch": 17.16,
"learning_rate": 7.359924026590694e-06,
"loss": 0.0055,
"step": 13110
},
{
"epoch": 17.35,
"learning_rate": 6.995884773662552e-06,
"loss": 0.0046,
"step": 13156
},
{
"epoch": 17.55,
"learning_rate": 6.63184552073441e-06,
"loss": 0.0049,
"step": 13202
},
{
"epoch": 17.75,
"learning_rate": 6.267806267806268e-06,
"loss": 0.0044,
"step": 13248
},
{
"epoch": 17.94,
"learning_rate": 5.903767014878126e-06,
"loss": 0.0057,
"step": 13294
},
{
"epoch": 18.14,
"learning_rate": 5.539727761949985e-06,
"loss": 0.0047,
"step": 13340
},
{
"epoch": 18.34,
"learning_rate": 5.175688509021842e-06,
"loss": 0.0048,
"step": 13386
},
{
"epoch": 18.53,
"learning_rate": 4.8116492560937e-06,
"loss": 0.0051,
"step": 13432
},
{
"epoch": 18.73,
"learning_rate": 4.447610003165559e-06,
"loss": 0.0056,
"step": 13478
},
{
"epoch": 18.93,
"learning_rate": 4.083570750237417e-06,
"loss": 0.0051,
"step": 13524
},
{
"epoch": 19.12,
"learning_rate": 3.7195314973092754e-06,
"loss": 0.0052,
"step": 13570
},
{
"epoch": 19.32,
"learning_rate": 3.3554922443811336e-06,
"loss": 0.0068,
"step": 13616
},
{
"epoch": 19.52,
"learning_rate": 2.991452991452992e-06,
"loss": 0.0064,
"step": 13662
},
{
"epoch": 19.71,
"learning_rate": 2.6274137385248497e-06,
"loss": 0.0055,
"step": 13708
},
{
"epoch": 19.91,
"learning_rate": 2.263374485596708e-06,
"loss": 0.0056,
"step": 13754
},
{
"epoch": 20.11,
"learning_rate": 1.899335232668566e-06,
"loss": 0.0052,
"step": 13800
},
{
"epoch": 18.02,
"learning_rate": 1.5352959797404244e-06,
"loss": 0.0051,
"step": 13846
},
{
"epoch": 18.21,
"learning_rate": 1.1712567268122824e-06,
"loss": 0.0052,
"step": 13892
},
{
"epoch": 18.41,
"learning_rate": 8.072174738841407e-07,
"loss": 0.0053,
"step": 13938
},
{
"epoch": 18.61,
"learning_rate": 4.4317822095599874e-07,
"loss": 0.0043,
"step": 13984
},
{
"epoch": 18.8,
"learning_rate": 7.913896802785692e-08,
"loss": 0.0046,
"step": 14030
}
],
"logging_steps": 46,
"max_steps": 14040,
"num_train_epochs": 60,
"save_steps": 500,
"total_flos": 6.139659927158784e+17,
"trial_name": null,
"trial_params": null
}