slava-kpss / trainer_state.json
AlekseyKorshuk's picture
huggingartists
f82b15e
{
"best_metric": 1.6512105464935303,
"best_model_checkpoint": "output/slava-kpss/checkpoint-1922",
"epoch": 2.0,
"global_step": 1922,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 0.0001371902778945302,
"loss": 2.663,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 0.00013716111433378645,
"loss": 2.4309,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 0.00013711251758398495,
"loss": 2.2912,
"step": 15
},
{
"epoch": 0.02,
"learning_rate": 0.0001370445014195492,
"loss": 2.3384,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 0.00013695708511920587,
"loss": 2.2697,
"step": 25
},
{
"epoch": 0.03,
"learning_rate": 0.0001368502934605203,
"loss": 2.2379,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 0.00013672415671287354,
"loss": 2.1656,
"step": 35
},
{
"epoch": 0.04,
"learning_rate": 0.00013657871062888258,
"loss": 2.1628,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 0.00013641399643426666,
"loss": 2.1289,
"step": 45
},
{
"epoch": 0.05,
"learning_rate": 0.000136230060816162,
"loss": 2.09,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 0.00013602695590988865,
"loss": 2.0049,
"step": 55
},
{
"epoch": 0.06,
"learning_rate": 0.0001358047392841732,
"loss": 2.2023,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 0.00013556347392483116,
"loss": 2.1472,
"step": 65
},
{
"epoch": 0.08,
"learning_rate": 0.00013530322821691406,
"loss": 2.0605,
"step": 70
},
{
"epoch": 0.08,
"learning_rate": 0.00013502407592532636,
"loss": 2.1713,
"step": 75
},
{
"epoch": 0.09,
"learning_rate": 0.00013472609617391705,
"loss": 2.0815,
"step": 80
},
{
"epoch": 0.09,
"learning_rate": 0.0001344093734230526,
"loss": 1.998,
"step": 85
},
{
"epoch": 0.1,
"learning_rate": 0.00013407399744567734,
"loss": 1.9623,
"step": 90
},
{
"epoch": 0.1,
"learning_rate": 0.00013372006330186772,
"loss": 2.0139,
"step": 95
},
{
"epoch": 0.11,
"learning_rate": 0.00013334767131188837,
"loss": 2.0258,
"step": 100
},
{
"epoch": 0.11,
"learning_rate": 0.00013295692702775685,
"loss": 1.9965,
"step": 105
},
{
"epoch": 0.12,
"learning_rate": 0.00013254794120332568,
"loss": 2.0578,
"step": 110
},
{
"epoch": 0.12,
"learning_rate": 0.00013212082976288994,
"loss": 1.9713,
"step": 115
},
{
"epoch": 0.13,
"learning_rate": 0.00013167571376832926,
"loss": 2.0398,
"step": 120
},
{
"epoch": 0.13,
"learning_rate": 0.00013121271938479367,
"loss": 1.9906,
"step": 125
},
{
"epoch": 0.14,
"learning_rate": 0.00013073197784494285,
"loss": 1.9652,
"step": 130
},
{
"epoch": 0.14,
"learning_rate": 0.0001302336254117493,
"loss": 1.9658,
"step": 135
},
{
"epoch": 0.15,
"learning_rate": 0.00012971780333987523,
"loss": 2.0634,
"step": 140
},
{
"epoch": 0.16,
"learning_rate": 0.00012918465783563518,
"loss": 2.0384,
"step": 145
},
{
"epoch": 0.16,
"learning_rate": 0.00012863434001555456,
"loss": 1.9783,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 0.00012806700586353683,
"loss": 2.0149,
"step": 155
},
{
"epoch": 0.17,
"learning_rate": 0.00012748281618665092,
"loss": 1.9433,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 0.00012688193656955137,
"loss": 2.0019,
"step": 165
},
{
"epoch": 0.18,
"learning_rate": 0.0001262645373275447,
"loss": 2.036,
"step": 170
},
{
"epoch": 0.19,
"learning_rate": 0.0001256307934583145,
"loss": 1.9862,
"step": 175
},
{
"epoch": 0.19,
"learning_rate": 0.00012498088459231957,
"loss": 1.9842,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 0.00012431499494187896,
"loss": 2.0212,
"step": 185
},
{
"epoch": 0.2,
"learning_rate": 0.000123633313248958,
"loss": 1.8205,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 0.00012293603273167084,
"loss": 2.0015,
"step": 195
},
{
"epoch": 0.21,
"learning_rate": 0.00012222335102951405,
"loss": 1.8653,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 0.00012149547014734692,
"loss": 2.0522,
"step": 205
},
{
"epoch": 0.23,
"learning_rate": 0.00012075259639813464,
"loss": 1.8885,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 0.00011999494034447026,
"loss": 1.8864,
"step": 215
},
{
"epoch": 0.24,
"learning_rate": 0.00011922271673889206,
"loss": 1.9415,
"step": 220
},
{
"epoch": 0.24,
"learning_rate": 0.00011843614446301341,
"loss": 1.9542,
"step": 225
},
{
"epoch": 0.25,
"learning_rate": 0.00011763544646548234,
"loss": 1.8528,
"step": 230
},
{
"epoch": 0.25,
"learning_rate": 0.00011682084969878809,
"loss": 1.8431,
"step": 235
},
{
"epoch": 0.26,
"learning_rate": 0.00011599258505493302,
"loss": 1.9018,
"step": 240
},
{
"epoch": 0.26,
"learning_rate": 0.0001151508872999878,
"loss": 1.8971,
"step": 245
},
{
"epoch": 0.27,
"learning_rate": 0.00011429599500754859,
"loss": 1.9842,
"step": 250
},
{
"epoch": 0.27,
"learning_rate": 0.00011342815049111488,
"loss": 1.8397,
"step": 255
},
{
"epoch": 0.28,
"learning_rate": 0.00011254759973540735,
"loss": 1.854,
"step": 260
},
{
"epoch": 0.28,
"learning_rate": 0.0001116545923266452,
"loss": 1.9264,
"step": 265
},
{
"epoch": 0.29,
"learning_rate": 0.00011074938138180258,
"loss": 1.833,
"step": 270
},
{
"epoch": 0.29,
"learning_rate": 0.00010983222347686431,
"loss": 1.8783,
"step": 275
},
{
"epoch": 0.3,
"learning_rate": 0.00010890337857410102,
"loss": 1.8777,
"step": 280
},
{
"epoch": 0.31,
"learning_rate": 0.00010796310994838476,
"loss": 1.8851,
"step": 285
},
{
"epoch": 0.31,
"learning_rate": 0.00010701168411256533,
"loss": 1.8735,
"step": 290
},
{
"epoch": 0.32,
"learning_rate": 0.0001060493707419291,
"loss": 1.9696,
"step": 295
},
{
"epoch": 0.32,
"learning_rate": 0.00010507644259776136,
"loss": 1.8878,
"step": 300
},
{
"epoch": 0.33,
"learning_rate": 0.00010409317545003389,
"loss": 1.8279,
"step": 305
},
{
"epoch": 0.33,
"learning_rate": 0.0001030998479992398,
"loss": 1.9672,
"step": 310
},
{
"epoch": 0.34,
"learning_rate": 0.00010209674179739785,
"loss": 1.889,
"step": 315
},
{
"epoch": 0.34,
"learning_rate": 0.00010108414116824834,
"loss": 1.8617,
"step": 320
},
{
"epoch": 0.35,
"learning_rate": 0.00010006233312666341,
"loss": 1.9077,
"step": 325
},
{
"epoch": 0.35,
"learning_rate": 9.90316072972947e-05,
"loss": 1.8912,
"step": 330
},
{
"epoch": 0.36,
"learning_rate": 9.79922558324811e-05,
"loss": 1.7622,
"step": 335
},
{
"epoch": 0.36,
"learning_rate": 9.694457332944009e-05,
"loss": 1.8778,
"step": 340
},
{
"epoch": 0.37,
"learning_rate": 9.588885674676624e-05,
"loss": 1.8761,
"step": 345
},
{
"epoch": 0.38,
"learning_rate": 9.482540532026027e-05,
"loss": 1.8225,
"step": 350
},
{
"epoch": 0.38,
"learning_rate": 9.37545204781125e-05,
"loss": 1.7817,
"step": 355
},
{
"epoch": 0.39,
"learning_rate": 9.26765057554653e-05,
"loss": 1.7633,
"step": 360
},
{
"epoch": 0.39,
"learning_rate": 9.159166670837789e-05,
"loss": 1.8406,
"step": 365
},
{
"epoch": 0.4,
"learning_rate": 9.05003108272186e-05,
"loss": 1.9374,
"step": 370
},
{
"epoch": 0.4,
"learning_rate": 8.940274744950875e-05,
"loss": 1.8444,
"step": 375
},
{
"epoch": 0.41,
"learning_rate": 8.829928767224302e-05,
"loss": 1.8098,
"step": 380
},
{
"epoch": 0.41,
"learning_rate": 8.71902442637111e-05,
"loss": 1.822,
"step": 385
},
{
"epoch": 0.42,
"learning_rate": 8.607593157484563e-05,
"loss": 1.87,
"step": 390
},
{
"epoch": 0.42,
"learning_rate": 8.495666545012144e-05,
"loss": 1.8821,
"step": 395
},
{
"epoch": 0.43,
"learning_rate": 8.383276313803162e-05,
"loss": 1.8619,
"step": 400
},
{
"epoch": 0.43,
"learning_rate": 8.270454320116558e-05,
"loss": 1.896,
"step": 405
},
{
"epoch": 0.44,
"learning_rate": 8.157232542591454e-05,
"loss": 1.776,
"step": 410
},
{
"epoch": 0.44,
"learning_rate": 8.043643073183026e-05,
"loss": 1.8886,
"step": 415
},
{
"epoch": 0.45,
"learning_rate": 7.92971810806626e-05,
"loss": 1.8725,
"step": 420
},
{
"epoch": 0.46,
"learning_rate": 7.815489938510145e-05,
"loss": 1.8305,
"step": 425
},
{
"epoch": 0.46,
"learning_rate": 7.700990941724947e-05,
"loss": 1.8383,
"step": 430
},
{
"epoch": 0.47,
"learning_rate": 7.586253571685095e-05,
"loss": 1.872,
"step": 435
},
{
"epoch": 0.47,
"learning_rate": 7.471310349930326e-05,
"loss": 1.8617,
"step": 440
},
{
"epoch": 0.48,
"learning_rate": 7.356193856347655e-05,
"loss": 1.8118,
"step": 445
},
{
"epoch": 0.48,
"learning_rate": 7.24093671993686e-05,
"loss": 1.8363,
"step": 450
},
{
"epoch": 0.49,
"learning_rate": 7.125571609561963e-05,
"loss": 1.7498,
"step": 455
},
{
"epoch": 0.49,
"learning_rate": 7.010131224691501e-05,
"loss": 1.8902,
"step": 460
},
{
"epoch": 0.5,
"learning_rate": 6.894648286130055e-05,
"loss": 1.9075,
"step": 465
},
{
"epoch": 0.5,
"learning_rate": 6.779155526743765e-05,
"loss": 1.8081,
"step": 470
},
{
"epoch": 0.51,
"learning_rate": 6.66368568218242e-05,
"loss": 1.8239,
"step": 475
},
{
"epoch": 0.51,
"learning_rate": 6.548271481600758e-05,
"loss": 1.8584,
"step": 480
},
{
"epoch": 0.52,
"learning_rate": 6.432945638381598e-05,
"loss": 1.9037,
"step": 485
},
{
"epoch": 0.53,
"learning_rate": 6.317740840863456e-05,
"loss": 1.7551,
"step": 490
},
{
"epoch": 0.53,
"learning_rate": 6.202689743075261e-05,
"loss": 1.8329,
"step": 495
},
{
"epoch": 0.54,
"learning_rate": 6.0878249554807756e-05,
"loss": 1.7776,
"step": 500
},
{
"epoch": 0.54,
"learning_rate": 5.9731790357353845e-05,
"loss": 1.8987,
"step": 505
},
{
"epoch": 0.55,
"learning_rate": 5.8587844794578496e-05,
"loss": 1.6097,
"step": 510
},
{
"epoch": 0.55,
"learning_rate": 5.744673711019635e-05,
"loss": 1.8081,
"step": 515
},
{
"epoch": 0.56,
"learning_rate": 5.630879074354446e-05,
"loss": 1.8023,
"step": 520
},
{
"epoch": 0.56,
"learning_rate": 5.517432823790546e-05,
"loss": 1.709,
"step": 525
},
{
"epoch": 0.57,
"learning_rate": 5.404367114908498e-05,
"loss": 1.7632,
"step": 530
},
{
"epoch": 0.57,
"learning_rate": 5.291713995426862e-05,
"loss": 1.796,
"step": 535
},
{
"epoch": 0.58,
"learning_rate": 5.179505396118502e-05,
"loss": 1.7405,
"step": 540
},
{
"epoch": 0.58,
"learning_rate": 5.067773121760007e-05,
"loss": 1.8264,
"step": 545
},
{
"epoch": 0.59,
"learning_rate": 4.9565488421168504e-05,
"loss": 1.8083,
"step": 550
},
{
"epoch": 0.59,
"learning_rate": 4.8458640829668e-05,
"loss": 1.7488,
"step": 555
},
{
"epoch": 0.6,
"learning_rate": 4.735750217164156e-05,
"loss": 1.8385,
"step": 560
},
{
"epoch": 0.61,
"learning_rate": 4.6262384557473104e-05,
"loss": 1.8445,
"step": 565
},
{
"epoch": 0.61,
"learning_rate": 4.517359839092207e-05,
"loss": 1.7808,
"step": 570
},
{
"epoch": 0.62,
"learning_rate": 4.409145228114133e-05,
"loss": 1.7932,
"step": 575
},
{
"epoch": 0.62,
"learning_rate": 4.3016252955204197e-05,
"loss": 1.7931,
"step": 580
},
{
"epoch": 0.63,
"learning_rate": 4.1948305171164515e-05,
"loss": 1.7508,
"step": 585
},
{
"epoch": 0.63,
"learning_rate": 4.08879116316751e-05,
"loss": 1.7877,
"step": 590
},
{
"epoch": 0.64,
"learning_rate": 3.98353728981888e-05,
"loss": 1.8233,
"step": 595
},
{
"epoch": 0.64,
"learning_rate": 3.879098730576618e-05,
"loss": 1.8181,
"step": 600
},
{
"epoch": 0.65,
"learning_rate": 3.7755050878514536e-05,
"loss": 1.6906,
"step": 605
},
{
"epoch": 0.65,
"learning_rate": 3.672785724568185e-05,
"loss": 1.7465,
"step": 610
},
{
"epoch": 0.66,
"learning_rate": 3.570969755842952e-05,
"loss": 1.7276,
"step": 615
},
{
"epoch": 0.66,
"learning_rate": 3.4700860407307565e-05,
"loss": 1.7561,
"step": 620
},
{
"epoch": 0.67,
"learning_rate": 3.3701631740455454e-05,
"loss": 1.8063,
"step": 625
},
{
"epoch": 0.68,
"learning_rate": 3.271229478255218e-05,
"loss": 1.6319,
"step": 630
},
{
"epoch": 0.68,
"learning_rate": 3.173312995453793e-05,
"loss": 1.7234,
"step": 635
},
{
"epoch": 0.69,
"learning_rate": 3.07644147941308e-05,
"loss": 1.811,
"step": 640
},
{
"epoch": 0.69,
"learning_rate": 2.9806423877160492e-05,
"loss": 1.8344,
"step": 645
},
{
"epoch": 0.7,
"learning_rate": 2.8859428739741754e-05,
"loss": 1.7602,
"step": 650
},
{
"epoch": 0.7,
"learning_rate": 2.7923697801309092e-05,
"loss": 1.8365,
"step": 655
},
{
"epoch": 0.71,
"learning_rate": 2.699949628853528e-05,
"loss": 1.7506,
"step": 660
},
{
"epoch": 0.71,
"learning_rate": 2.60870861601545e-05,
"loss": 1.7098,
"step": 665
},
{
"epoch": 0.72,
"learning_rate": 2.518672603271192e-05,
"loss": 1.8215,
"step": 670
},
{
"epoch": 0.72,
"learning_rate": 2.429867110726057e-05,
"loss": 1.8035,
"step": 675
},
{
"epoch": 0.73,
"learning_rate": 2.3423173097026407e-05,
"loss": 1.7455,
"step": 680
},
{
"epoch": 0.73,
"learning_rate": 2.25604801560617e-05,
"loss": 1.8008,
"step": 685
},
{
"epoch": 0.74,
"learning_rate": 2.1710836808907555e-05,
"loss": 1.7584,
"step": 690
},
{
"epoch": 0.74,
"learning_rate": 2.0874483881285084e-05,
"loss": 1.7836,
"step": 695
},
{
"epoch": 0.75,
"learning_rate": 2.0051658431834844e-05,
"loss": 1.7353,
"step": 700
},
{
"epoch": 0.76,
"learning_rate": 1.924259368492425e-05,
"loss": 1.7518,
"step": 705
},
{
"epoch": 0.76,
"learning_rate": 1.844751896454173e-05,
"loss": 1.7968,
"step": 710
},
{
"epoch": 0.77,
"learning_rate": 1.766665962929623e-05,
"loss": 1.751,
"step": 715
},
{
"epoch": 0.77,
"learning_rate": 1.6900237008540944e-05,
"loss": 1.7056,
"step": 720
},
{
"epoch": 0.78,
"learning_rate": 1.6148468339638933e-05,
"loss": 1.7457,
"step": 725
},
{
"epoch": 0.78,
"learning_rate": 1.5411566706388707e-05,
"loss": 1.7712,
"step": 730
},
{
"epoch": 0.79,
"learning_rate": 1.4689740978626948e-05,
"loss": 1.7609,
"step": 735
},
{
"epoch": 0.79,
"learning_rate": 1.3983195753025887e-05,
"loss": 1.7358,
"step": 740
},
{
"epoch": 0.8,
"learning_rate": 1.3292131295101604e-05,
"loss": 1.8011,
"step": 745
},
{
"epoch": 0.8,
"learning_rate": 1.2616743482450217e-05,
"loss": 1.7338,
"step": 750
},
{
"epoch": 0.81,
"learning_rate": 1.1957223749227626e-05,
"loss": 1.7185,
"step": 755
},
{
"epoch": 0.81,
"learning_rate": 1.1313759031888791e-05,
"loss": 1.7987,
"step": 760
},
{
"epoch": 0.82,
"learning_rate": 1.0686531716201893e-05,
"loss": 1.7855,
"step": 765
},
{
"epoch": 0.83,
"learning_rate": 1.0075719585552289e-05,
"loss": 1.755,
"step": 770
},
{
"epoch": 0.83,
"learning_rate": 9.481495770550924e-06,
"loss": 1.7749,
"step": 775
},
{
"epoch": 0.84,
"learning_rate": 8.90402869996171e-06,
"loss": 1.7306,
"step": 780
},
{
"epoch": 0.84,
"learning_rate": 8.343482052961487e-06,
"loss": 1.7385,
"step": 785
},
{
"epoch": 0.85,
"learning_rate": 7.800014712746244e-06,
"loss": 1.8382,
"step": 790
},
{
"epoch": 0.85,
"learning_rate": 7.273780721496786e-06,
"loss": 1.7857,
"step": 795
},
{
"epoch": 0.86,
"learning_rate": 6.7649292367164704e-06,
"loss": 1.7224,
"step": 800
},
{
"epoch": 0.86,
"learning_rate": 6.2736044889534784e-06,
"loss": 1.7514,
"step": 805
},
{
"epoch": 0.87,
"learning_rate": 5.799945740919712e-06,
"loss": 1.7887,
"step": 810
},
{
"epoch": 0.87,
"learning_rate": 5.344087248017646e-06,
"loss": 1.7605,
"step": 815
},
{
"epoch": 0.88,
"learning_rate": 4.906158220286551e-06,
"loss": 1.7712,
"step": 820
},
{
"epoch": 0.88,
"learning_rate": 4.486282785778806e-06,
"loss": 1.7334,
"step": 825
},
{
"epoch": 0.89,
"learning_rate": 4.084579955376559e-06,
"loss": 1.749,
"step": 830
},
{
"epoch": 0.89,
"learning_rate": 3.7011635890589766e-06,
"loss": 1.7343,
"step": 835
},
{
"epoch": 0.9,
"learning_rate": 3.3361423636293224e-06,
"loss": 1.7663,
"step": 840
},
{
"epoch": 0.91,
"learning_rate": 2.989619741911281e-06,
"loss": 1.759,
"step": 845
},
{
"epoch": 0.91,
"learning_rate": 2.6616939434230985e-06,
"loss": 1.7025,
"step": 850
},
{
"epoch": 0.92,
"learning_rate": 2.352457916537921e-06,
"loss": 1.7564,
"step": 855
},
{
"epoch": 0.92,
"learning_rate": 2.0619993121382247e-06,
"loss": 1.8452,
"step": 860
},
{
"epoch": 0.93,
"learning_rate": 1.7904004587717305e-06,
"loss": 1.7741,
"step": 865
},
{
"epoch": 0.93,
"learning_rate": 1.5377383393159132e-06,
"loss": 1.7832,
"step": 870
},
{
"epoch": 0.94,
"learning_rate": 1.3040845691577635e-06,
"loss": 1.8237,
"step": 875
},
{
"epoch": 0.94,
"learning_rate": 1.0895053758948607e-06,
"loss": 1.7458,
"step": 880
},
{
"epoch": 0.95,
"learning_rate": 8.940615805635918e-07,
"loss": 1.7009,
"step": 885
},
{
"epoch": 0.95,
"learning_rate": 7.178085803998752e-07,
"loss": 1.725,
"step": 890
},
{
"epoch": 0.96,
"learning_rate": 5.607963331371593e-07,
"loss": 1.7469,
"step": 895
},
{
"epoch": 0.96,
"learning_rate": 4.2306934284621745e-07,
"loss": 1.6719,
"step": 900
},
{
"epoch": 0.97,
"learning_rate": 3.04666647320803e-07,
"loss": 1.8424,
"step": 905
},
{
"epoch": 0.98,
"learning_rate": 2.0562180701263117e-07,
"loss": 1.7483,
"step": 910
},
{
"epoch": 0.98,
"learning_rate": 1.2596289551889364e-07,
"loss": 1.7528,
"step": 915
},
{
"epoch": 0.99,
"learning_rate": 6.571249162498684e-08,
"loss": 1.8179,
"step": 920
},
{
"epoch": 0.99,
"learning_rate": 2.4887672904708548e-08,
"loss": 1.7763,
"step": 925
},
{
"epoch": 1.0,
"learning_rate": 3.5000108797349717e-09,
"loss": 1.7489,
"step": 930
},
{
"epoch": 1.0,
"eval_loss": 1.7264798879623413,
"eval_runtime": 80.022,
"eval_samples_per_second": 20.494,
"eval_steps_per_second": 2.562,
"step": 933
},
{
"epoch": 0.97,
"learning_rate": 2.476465669200449e-07,
"loss": 1.7548,
"step": 935
},
{
"epoch": 0.98,
"learning_rate": 1.615902325712679e-07,
"loss": 1.7767,
"step": 940
},
{
"epoch": 0.98,
"learning_rate": 9.38184037085626e-08,
"loss": 1.7322,
"step": 945
},
{
"epoch": 0.99,
"learning_rate": 4.434918674879014e-08,
"loss": 1.7019,
"step": 950
},
{
"epoch": 0.99,
"learning_rate": 1.319579824933328e-08,
"loss": 1.6765,
"step": 955
},
{
"epoch": 1.0,
"learning_rate": 3.665613770225118e-10,
"loss": 1.742,
"step": 960
},
{
"epoch": 1.0,
"eval_loss": 1.6864277124404907,
"eval_runtime": 68.8946,
"eval_samples_per_second": 20.466,
"eval_steps_per_second": 2.569,
"step": 961
},
{
"epoch": 1.0,
"learning_rate": 5.8649036845078714e-09,
"loss": 1.574,
"step": 965
},
{
"epoch": 1.01,
"learning_rate": 2.9689356194480253e-08,
"loss": 1.7872,
"step": 970
},
{
"epoch": 1.01,
"learning_rate": 7.183355379217629e-08,
"loss": 1.688,
"step": 975
},
{
"epoch": 1.02,
"learning_rate": 1.3228623692592306e-07,
"loss": 1.7384,
"step": 980
},
{
"epoch": 1.02,
"learning_rate": 2.1103125461527332e-07,
"loss": 1.672,
"step": 985
},
{
"epoch": 1.03,
"learning_rate": 3.0804756876609914e-07,
"loss": 1.7357,
"step": 990
},
{
"epoch": 1.04,
"learning_rate": 4.233092597912044e-07,
"loss": 1.7344,
"step": 995
},
{
"epoch": 1.04,
"learning_rate": 5.567855335352604e-07,
"loss": 1.7574,
"step": 1000
},
{
"epoch": 1.05,
"learning_rate": 7.084407295019475e-07,
"loss": 1.7031,
"step": 1005
},
{
"epoch": 1.05,
"learning_rate": 8.782343303812844e-07,
"loss": 1.6877,
"step": 1010
},
{
"epoch": 1.06,
"learning_rate": 1.0661209728745555e-06,
"loss": 1.6856,
"step": 1015
},
{
"epoch": 1.06,
"learning_rate": 1.272050459813843e-06,
"loss": 1.7801,
"step": 1020
},
{
"epoch": 1.07,
"learning_rate": 1.495967773573164e-06,
"loss": 1.7647,
"step": 1025
},
{
"epoch": 1.07,
"learning_rate": 1.7378130907672579e-06,
"loss": 1.7437,
"step": 1030
},
{
"epoch": 1.08,
"learning_rate": 1.997521798234569e-06,
"loss": 1.7193,
"step": 1035
},
{
"epoch": 1.08,
"learning_rate": 2.275024510299646e-06,
"loss": 1.8438,
"step": 1040
},
{
"epoch": 1.09,
"learning_rate": 2.570247087310856e-06,
"loss": 1.7861,
"step": 1045
},
{
"epoch": 1.09,
"learning_rate": 2.883110655447913e-06,
"loss": 1.7835,
"step": 1050
},
{
"epoch": 1.1,
"learning_rate": 3.213531627794597e-06,
"loss": 1.6692,
"step": 1055
},
{
"epoch": 1.1,
"learning_rate": 3.561421726670198e-06,
"loss": 1.673,
"step": 1060
},
{
"epoch": 1.11,
"learning_rate": 3.926688007214648e-06,
"loss": 1.7161,
"step": 1065
},
{
"epoch": 1.11,
"learning_rate": 4.3092328822202e-06,
"loss": 1.725,
"step": 1070
},
{
"epoch": 1.12,
"learning_rate": 4.708954148203643e-06,
"loss": 1.7126,
"step": 1075
},
{
"epoch": 1.12,
"learning_rate": 5.1257450127116035e-06,
"loss": 1.7486,
"step": 1080
},
{
"epoch": 1.13,
"learning_rate": 5.559494122852188e-06,
"loss": 1.7327,
"step": 1085
},
{
"epoch": 1.13,
"learning_rate": 6.0100855950445935e-06,
"loss": 1.7928,
"step": 1090
},
{
"epoch": 1.14,
"learning_rate": 6.477399045979628e-06,
"loss": 1.7449,
"step": 1095
},
{
"epoch": 1.14,
"learning_rate": 6.961309624782102e-06,
"loss": 1.7278,
"step": 1100
},
{
"epoch": 1.15,
"learning_rate": 7.461688046366857e-06,
"loss": 1.7724,
"step": 1105
},
{
"epoch": 1.16,
"learning_rate": 7.97840062597962e-06,
"loss": 1.7395,
"step": 1110
},
{
"epoch": 1.16,
"learning_rate": 8.511309314913076e-06,
"loss": 1.7612,
"step": 1115
},
{
"epoch": 1.17,
"learning_rate": 9.060271737389124e-06,
"loss": 1.7803,
"step": 1120
},
{
"epoch": 1.17,
"learning_rate": 9.625141228596819e-06,
"loss": 1.698,
"step": 1125
},
{
"epoch": 1.18,
"learning_rate": 1.0205766873876643e-05,
"loss": 1.7627,
"step": 1130
},
{
"epoch": 1.18,
"learning_rate": 1.08019935490398e-05,
"loss": 1.615,
"step": 1135
},
{
"epoch": 1.19,
"learning_rate": 1.1413661961812419e-05,
"loss": 1.7236,
"step": 1140
},
{
"epoch": 1.19,
"learning_rate": 1.2040608694393166e-05,
"loss": 1.732,
"step": 1145
},
{
"epoch": 1.2,
"learning_rate": 1.2682666247113272e-05,
"loss": 1.7578,
"step": 1150
},
{
"epoch": 1.2,
"learning_rate": 1.333966308318674e-05,
"loss": 1.7636,
"step": 1155
},
{
"epoch": 1.21,
"learning_rate": 1.4011423674539631e-05,
"loss": 1.6698,
"step": 1160
},
{
"epoch": 1.21,
"learning_rate": 1.4697768548705208e-05,
"loss": 1.8001,
"step": 1165
},
{
"epoch": 1.22,
"learning_rate": 1.5398514336773276e-05,
"loss": 1.7019,
"step": 1170
},
{
"epoch": 1.22,
"learning_rate": 1.611347382238021e-05,
"loss": 1.7685,
"step": 1175
},
{
"epoch": 1.23,
"learning_rate": 1.684245599172719e-05,
"loss": 1.8088,
"step": 1180
},
{
"epoch": 1.23,
"learning_rate": 1.7585266084612767e-05,
"loss": 1.7428,
"step": 1185
},
{
"epoch": 1.24,
"learning_rate": 1.8341705646466328e-05,
"loss": 1.712,
"step": 1190
},
{
"epoch": 1.24,
"learning_rate": 1.9111572581368946e-05,
"loss": 1.7453,
"step": 1195
},
{
"epoch": 1.25,
"learning_rate": 1.9894661206046444e-05,
"loss": 1.6769,
"step": 1200
},
{
"epoch": 1.25,
"learning_rate": 2.069076230482155e-05,
"loss": 1.6822,
"step": 1205
},
{
"epoch": 1.26,
"learning_rate": 2.1499663185509284e-05,
"loss": 1.7965,
"step": 1210
},
{
"epoch": 1.26,
"learning_rate": 2.232114773624152e-05,
"loss": 1.6978,
"step": 1215
},
{
"epoch": 1.27,
"learning_rate": 2.3154996483204744e-05,
"loss": 1.7852,
"step": 1220
},
{
"epoch": 1.27,
"learning_rate": 2.400098664927671e-05,
"loss": 1.6758,
"step": 1225
},
{
"epoch": 1.28,
"learning_rate": 2.4858892213544702e-05,
"loss": 1.742,
"step": 1230
},
{
"epoch": 1.29,
"learning_rate": 2.5728483971691396e-05,
"loss": 1.7897,
"step": 1235
},
{
"epoch": 1.29,
"learning_rate": 2.660952959723034e-05,
"loss": 1.6749,
"step": 1240
},
{
"epoch": 1.3,
"learning_rate": 2.750179370357635e-05,
"loss": 1.7127,
"step": 1245
},
{
"epoch": 1.3,
"learning_rate": 2.8405037906932926e-05,
"loss": 1.7259,
"step": 1250
},
{
"epoch": 1.31,
"learning_rate": 2.9319020889980675e-05,
"loss": 1.6448,
"step": 1255
},
{
"epoch": 1.31,
"learning_rate": 3.0243498466349233e-05,
"loss": 1.6744,
"step": 1260
},
{
"epoch": 1.32,
"learning_rate": 3.117822364585623e-05,
"loss": 1.6893,
"step": 1265
},
{
"epoch": 1.32,
"learning_rate": 3.212294670049457e-05,
"loss": 1.7822,
"step": 1270
},
{
"epoch": 1.33,
"learning_rate": 3.3077415231151914e-05,
"loss": 1.7231,
"step": 1275
},
{
"epoch": 1.33,
"learning_rate": 3.4041374235043516e-05,
"loss": 1.7438,
"step": 1280
},
{
"epoch": 1.34,
"learning_rate": 3.501456617384015e-05,
"loss": 1.776,
"step": 1285
},
{
"epoch": 1.34,
"learning_rate": 3.599673104247454e-05,
"loss": 1.7459,
"step": 1290
},
{
"epoch": 1.35,
"learning_rate": 3.6987606438605696e-05,
"loss": 1.7127,
"step": 1295
},
{
"epoch": 1.35,
"learning_rate": 3.798692763272465e-05,
"loss": 1.6762,
"step": 1300
},
{
"epoch": 1.36,
"learning_rate": 3.899442763888126e-05,
"loss": 1.7619,
"step": 1305
},
{
"epoch": 1.36,
"learning_rate": 4.000983728601432e-05,
"loss": 1.7135,
"step": 1310
},
{
"epoch": 1.37,
"learning_rate": 4.103288528986497e-05,
"loss": 1.7328,
"step": 1315
},
{
"epoch": 1.37,
"learning_rate": 4.20632983254554e-05,
"loss": 1.7118,
"step": 1320
},
{
"epoch": 1.38,
"learning_rate": 4.3100801100111876e-05,
"loss": 1.801,
"step": 1325
},
{
"epoch": 1.38,
"learning_rate": 4.4145116427014376e-05,
"loss": 1.7959,
"step": 1330
},
{
"epoch": 1.39,
"learning_rate": 4.519596529925159e-05,
"loss": 1.7436,
"step": 1335
},
{
"epoch": 1.39,
"learning_rate": 4.625306696436258e-05,
"loss": 1.7913,
"step": 1340
},
{
"epoch": 1.4,
"learning_rate": 4.7316138999344416e-05,
"loss": 1.7002,
"step": 1345
},
{
"epoch": 1.4,
"learning_rate": 4.838489738610674e-05,
"loss": 1.6758,
"step": 1350
},
{
"epoch": 1.41,
"learning_rate": 4.945905658735171e-05,
"loss": 1.7027,
"step": 1355
},
{
"epoch": 1.42,
"learning_rate": 5.0538329622860605e-05,
"loss": 1.7378,
"step": 1360
},
{
"epoch": 1.42,
"learning_rate": 5.1622428146165326e-05,
"loss": 1.6332,
"step": 1365
},
{
"epoch": 1.43,
"learning_rate": 5.271106252158521e-05,
"loss": 1.7335,
"step": 1370
},
{
"epoch": 1.43,
"learning_rate": 5.3803941901608256e-05,
"loss": 1.7126,
"step": 1375
},
{
"epoch": 1.44,
"learning_rate": 5.4900774304595864e-05,
"loss": 1.7287,
"step": 1380
},
{
"epoch": 1.44,
"learning_rate": 5.600126669279115e-05,
"loss": 1.6935,
"step": 1385
},
{
"epoch": 1.45,
"learning_rate": 5.710512505060865e-05,
"loss": 1.7214,
"step": 1390
},
{
"epoch": 1.45,
"learning_rate": 5.8212054463186014e-05,
"loss": 1.7355,
"step": 1395
},
{
"epoch": 1.46,
"learning_rate": 5.9321759195175425e-05,
"loss": 1.7537,
"step": 1400
},
{
"epoch": 1.46,
"learning_rate": 6.043394276975451e-05,
"loss": 1.776,
"step": 1405
},
{
"epoch": 1.47,
"learning_rate": 6.154830804783502e-05,
"loss": 1.783,
"step": 1410
},
{
"epoch": 1.47,
"learning_rate": 6.266455730744911e-05,
"loss": 1.7703,
"step": 1415
},
{
"epoch": 1.48,
"learning_rate": 6.378239232329044e-05,
"loss": 1.7259,
"step": 1420
},
{
"epoch": 1.48,
"learning_rate": 6.490151444639059e-05,
"loss": 1.6852,
"step": 1425
},
{
"epoch": 1.49,
"learning_rate": 6.602162468390815e-05,
"loss": 1.675,
"step": 1430
},
{
"epoch": 1.49,
"learning_rate": 6.714242377900986e-05,
"loss": 1.7367,
"step": 1435
},
{
"epoch": 1.5,
"learning_rate": 6.826361229082211e-05,
"loss": 1.8171,
"step": 1440
},
{
"epoch": 1.5,
"learning_rate": 6.938489067443205e-05,
"loss": 1.7388,
"step": 1445
},
{
"epoch": 1.51,
"learning_rate": 7.050595936091584e-05,
"loss": 1.7459,
"step": 1450
},
{
"epoch": 1.51,
"learning_rate": 7.162651883737402e-05,
"loss": 1.808,
"step": 1455
},
{
"epoch": 1.52,
"learning_rate": 7.274626972695132e-05,
"loss": 1.7933,
"step": 1460
},
{
"epoch": 1.52,
"learning_rate": 7.386491286882046e-05,
"loss": 1.7711,
"step": 1465
},
{
"epoch": 1.53,
"learning_rate": 7.498214939810823e-05,
"loss": 1.7745,
"step": 1470
},
{
"epoch": 1.53,
"learning_rate": 7.609768082574206e-05,
"loss": 1.6494,
"step": 1475
},
{
"epoch": 1.54,
"learning_rate": 7.721120911819707e-05,
"loss": 1.7815,
"step": 1480
},
{
"epoch": 1.55,
"learning_rate": 7.83224367771204e-05,
"loss": 1.7829,
"step": 1485
},
{
"epoch": 1.55,
"learning_rate": 7.943106691881334e-05,
"loss": 1.7363,
"step": 1490
},
{
"epoch": 1.56,
"learning_rate": 8.053680335354889e-05,
"loss": 1.7149,
"step": 1495
},
{
"epoch": 1.56,
"learning_rate": 8.163935066470398e-05,
"loss": 1.7134,
"step": 1500
},
{
"epoch": 1.57,
"learning_rate": 8.273841428768484e-05,
"loss": 1.7653,
"step": 1505
},
{
"epoch": 1.57,
"learning_rate": 8.383370058862563e-05,
"loss": 1.7677,
"step": 1510
},
{
"epoch": 1.58,
"learning_rate": 8.492491694283713e-05,
"loss": 1.6578,
"step": 1515
},
{
"epoch": 1.58,
"learning_rate": 8.601177181298704e-05,
"loss": 1.7271,
"step": 1520
},
{
"epoch": 1.59,
"learning_rate": 8.709397482698893e-05,
"loss": 1.6661,
"step": 1525
},
{
"epoch": 1.59,
"learning_rate": 8.817123685558034e-05,
"loss": 1.7347,
"step": 1530
},
{
"epoch": 1.6,
"learning_rate": 8.92432700895683e-05,
"loss": 1.7298,
"step": 1535
},
{
"epoch": 1.6,
"learning_rate": 9.03097881167231e-05,
"loss": 1.7343,
"step": 1540
},
{
"epoch": 1.61,
"learning_rate": 9.137050599829776e-05,
"loss": 1.7651,
"step": 1545
},
{
"epoch": 1.61,
"learning_rate": 9.242514034515462e-05,
"loss": 1.7716,
"step": 1550
},
{
"epoch": 1.62,
"learning_rate": 9.34734093934778e-05,
"loss": 1.8064,
"step": 1555
},
{
"epoch": 1.62,
"learning_rate": 9.451503308005074e-05,
"loss": 1.8239,
"step": 1560
},
{
"epoch": 1.63,
"learning_rate": 9.554973311708053e-05,
"loss": 1.7248,
"step": 1565
},
{
"epoch": 1.63,
"learning_rate": 9.657723306654681e-05,
"loss": 1.6457,
"step": 1570
},
{
"epoch": 1.64,
"learning_rate": 9.759725841405743e-05,
"loss": 1.8182,
"step": 1575
},
{
"epoch": 1.64,
"learning_rate": 9.86095366421894e-05,
"loss": 1.6211,
"step": 1580
},
{
"epoch": 1.65,
"learning_rate": 9.961379730329683e-05,
"loss": 1.6631,
"step": 1585
},
{
"epoch": 1.65,
"learning_rate": 0.00010060977209176536,
"loss": 1.7168,
"step": 1590
},
{
"epoch": 1.66,
"learning_rate": 0.0001015971949156952,
"loss": 1.752,
"step": 1595
},
{
"epoch": 1.66,
"learning_rate": 0.00010257580196799162,
"loss": 1.7518,
"step": 1600
},
{
"epoch": 1.67,
"learning_rate": 0.00010354533179684596,
"loss": 1.7591,
"step": 1605
},
{
"epoch": 1.68,
"learning_rate": 0.00010450552537558691,
"loss": 1.7579,
"step": 1610
},
{
"epoch": 1.68,
"learning_rate": 0.00010545612617188394,
"loss": 1.7364,
"step": 1615
},
{
"epoch": 1.69,
"learning_rate": 0.00010639688021628421,
"loss": 1.7379,
"step": 1620
},
{
"epoch": 1.69,
"learning_rate": 0.00010732753617006524,
"loss": 1.7443,
"step": 1625
},
{
"epoch": 1.7,
"learning_rate": 0.00010824784539238402,
"loss": 1.6989,
"step": 1630
},
{
"epoch": 1.7,
"learning_rate": 0.00010915756200670606,
"loss": 1.6362,
"step": 1635
},
{
"epoch": 1.71,
"learning_rate": 0.00011005644296649529,
"loss": 1.7417,
"step": 1640
},
{
"epoch": 1.71,
"learning_rate": 0.00011094424812014832,
"loss": 1.635,
"step": 1645
},
{
"epoch": 1.72,
"learning_rate": 0.00011182074027515503,
"loss": 1.6471,
"step": 1650
},
{
"epoch": 1.72,
"learning_rate": 0.0001126856852614686,
"loss": 1.6803,
"step": 1655
},
{
"epoch": 1.73,
"learning_rate": 0.00011353885199406824,
"loss": 1.6863,
"step": 1660
},
{
"epoch": 1.73,
"learning_rate": 0.00011438001253469733,
"loss": 1.7498,
"step": 1665
},
{
"epoch": 1.74,
"learning_rate": 0.00011520894215276136,
"loss": 1.7037,
"step": 1670
},
{
"epoch": 1.74,
"learning_rate": 0.00011602541938536831,
"loss": 1.6918,
"step": 1675
},
{
"epoch": 1.75,
"learning_rate": 0.00011682922609649652,
"loss": 1.7,
"step": 1680
},
{
"epoch": 1.75,
"learning_rate": 0.0001176201475352733,
"loss": 1.7662,
"step": 1685
},
{
"epoch": 1.76,
"learning_rate": 0.00011839797239334955,
"loss": 1.8041,
"step": 1690
},
{
"epoch": 1.76,
"learning_rate": 0.00011916249286135422,
"loss": 1.7271,
"step": 1695
},
{
"epoch": 1.77,
"learning_rate": 0.00011991350468441439,
"loss": 1.7033,
"step": 1700
},
{
"epoch": 1.77,
"learning_rate": 0.00012065080721672542,
"loss": 1.6855,
"step": 1705
},
{
"epoch": 1.78,
"learning_rate": 0.00012137420347515691,
"loss": 1.6554,
"step": 1710
},
{
"epoch": 1.78,
"learning_rate": 0.00012208350019188007,
"loss": 1.7365,
"step": 1715
},
{
"epoch": 1.79,
"learning_rate": 0.00012277850786600282,
"loss": 1.7106,
"step": 1720
},
{
"epoch": 1.8,
"learning_rate": 0.00012345904081419794,
"loss": 1.735,
"step": 1725
},
{
"epoch": 1.8,
"learning_rate": 0.00012412491722031186,
"loss": 1.6991,
"step": 1730
},
{
"epoch": 1.81,
"learning_rate": 0.00012477595918393978,
"loss": 1.6981,
"step": 1735
},
{
"epoch": 1.81,
"learning_rate": 0.0001254119927679549,
"loss": 1.7559,
"step": 1740
},
{
"epoch": 1.82,
"learning_rate": 0.00012603284804497882,
"loss": 1.7479,
"step": 1745
},
{
"epoch": 1.82,
"learning_rate": 0.00012663835914278047,
"loss": 1.7712,
"step": 1750
},
{
"epoch": 1.83,
"learning_rate": 0.0001272283642885918,
"loss": 1.7758,
"step": 1755
},
{
"epoch": 1.83,
"learning_rate": 0.00012780270585232808,
"loss": 1.7353,
"step": 1760
},
{
"epoch": 1.84,
"learning_rate": 0.00012836123038870173,
"loss": 1.7297,
"step": 1765
},
{
"epoch": 1.84,
"learning_rate": 0.00012890378867821763,
"loss": 1.8349,
"step": 1770
},
{
"epoch": 1.85,
"learning_rate": 0.0001294302357670399,
"loss": 1.8035,
"step": 1775
},
{
"epoch": 1.85,
"learning_rate": 0.00012994043100571866,
"loss": 1.7481,
"step": 1780
},
{
"epoch": 1.86,
"learning_rate": 0.000130434238086767,
"loss": 1.6094,
"step": 1785
},
{
"epoch": 1.86,
"learning_rate": 0.00013091152508107798,
"loss": 1.7314,
"step": 1790
},
{
"epoch": 1.87,
"learning_rate": 0.00013137216447317167,
"loss": 1.8012,
"step": 1795
},
{
"epoch": 1.87,
"learning_rate": 0.0001318160331952632,
"loss": 1.7435,
"step": 1800
},
{
"epoch": 1.88,
"learning_rate": 0.0001322430126601424,
"loss": 1.7132,
"step": 1805
},
{
"epoch": 1.88,
"learning_rate": 0.00013265298879285635,
"loss": 1.6928,
"step": 1810
},
{
"epoch": 1.89,
"learning_rate": 0.00013304585206118667,
"loss": 1.6412,
"step": 1815
},
{
"epoch": 1.89,
"learning_rate": 0.00013342149750491278,
"loss": 1.7068,
"step": 1820
},
{
"epoch": 1.9,
"learning_rate": 0.0001337798247638538,
"loss": 1.7385,
"step": 1825
},
{
"epoch": 1.9,
"learning_rate": 0.0001341207381046819,
"loss": 1.612,
"step": 1830
},
{
"epoch": 1.91,
"learning_rate": 0.00013444414644649843,
"loss": 1.6917,
"step": 1835
},
{
"epoch": 1.91,
"learning_rate": 0.00013474996338516847,
"loss": 1.7257,
"step": 1840
},
{
"epoch": 1.92,
"learning_rate": 0.00013503810721640465,
"loss": 1.7391,
"step": 1845
},
{
"epoch": 1.93,
"learning_rate": 0.00013530850095759623,
"loss": 1.7311,
"step": 1850
},
{
"epoch": 1.93,
"learning_rate": 0.00013556107236837607,
"loss": 1.671,
"step": 1855
},
{
"epoch": 1.94,
"learning_rate": 0.00013579575396992113,
"loss": 1.716,
"step": 1860
},
{
"epoch": 1.94,
"learning_rate": 0.00013601248306298038,
"loss": 1.7233,
"step": 1865
},
{
"epoch": 1.95,
"learning_rate": 0.00013621120174462615,
"loss": 1.6546,
"step": 1870
},
{
"epoch": 1.95,
"learning_rate": 0.00013639185692372385,
"loss": 1.7717,
"step": 1875
},
{
"epoch": 1.96,
"learning_rate": 0.00013655440033511618,
"loss": 1.7035,
"step": 1880
},
{
"epoch": 1.96,
"learning_rate": 0.00013669878855251797,
"loss": 1.7345,
"step": 1885
},
{
"epoch": 1.97,
"learning_rate": 0.00013682498300011836,
"loss": 1.7678,
"step": 1890
},
{
"epoch": 1.97,
"learning_rate": 0.00013693294996288687,
"loss": 1.6938,
"step": 1895
},
{
"epoch": 1.98,
"learning_rate": 0.00013702266059558108,
"loss": 1.7334,
"step": 1900
},
{
"epoch": 1.98,
"learning_rate": 0.00013709409093045299,
"loss": 1.6722,
"step": 1905
},
{
"epoch": 1.99,
"learning_rate": 0.00013714722188365257,
"loss": 1.7148,
"step": 1910
},
{
"epoch": 1.99,
"learning_rate": 0.00013718203926032623,
"loss": 1.6982,
"step": 1915
},
{
"epoch": 2.0,
"learning_rate": 0.00013719853375840932,
"loss": 1.7054,
"step": 1920
},
{
"epoch": 2.0,
"eval_loss": 1.6512105464935303,
"eval_runtime": 68.7702,
"eval_samples_per_second": 20.503,
"eval_steps_per_second": 2.574,
"step": 1922
}
],
"max_steps": 1922,
"num_train_epochs": 2,
"total_flos": 2008029265920000.0,
"trial_name": null,
"trial_params": null
}